Merge branch develop into master

This commit is contained in:
Matthew Barr 2017-09-22 15:20:28 +10:00
commit 3dcd51c272
250 changed files with 10602 additions and 5305 deletions

View File

@ -2,6 +2,17 @@
This is a list of notable changes to Hyperscan, in reverse chronological order.
## [4.6.0] 2017-09-22
- New API feature: stream state compression. This allows the user to compress
and restore state for streams to reduce memory usage.
- Many improvements to literal matching performance, including more support
for Intel(R) Advanced Vector Extensions 512 (Intel(R) AVX-512).
- Compile time improvements, mainly reducing compiler memory allocation.
Also results in reduced compile time for some pattern sets.
- Bugfix for issue #62: fix error building Hyperscan using older versions of
Boost.
- Small updates to fix warnings identified by Coverity.
## [4.5.2] 2017-07-26
- Bugfix for issue #57: Treat characters between `\Q.\E` as codepoints in
UTF8 mode.

View File

@ -2,8 +2,8 @@ cmake_minimum_required (VERSION 2.8.11)
project (hyperscan C CXX)
set (HS_MAJOR_VERSION 4)
set (HS_MINOR_VERSION 5)
set (HS_PATCH_VERSION 2)
set (HS_MINOR_VERSION 6)
set (HS_PATCH_VERSION 0)
set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION})
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
@ -38,6 +38,7 @@ endif()
set(BINDIR "${PROJECT_BINARY_DIR}/bin")
set(LIBDIR "${PROJECT_BINARY_DIR}/lib")
set(INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR})
# First for the generic no-config case
@ -57,6 +58,11 @@ if(CMAKE_GENERATOR STREQUAL Xcode)
set(XCODE TRUE)
endif()
# older versions of cmake don't know things support isystem
if (XCODE OR CMAKE_CXX_COMPILER_ID MATCHES "Intel")
set(CMAKE_INCLUDE_SYSTEM_FLAG_CXX "-isystem")
endif ()
set(CMAKE_INCLUDE_CURRENT_DIR 1)
include_directories(${PROJECT_SOURCE_DIR}/src)
include_directories(${PROJECT_BINARY_DIR})
@ -148,8 +154,9 @@ if(MSVC OR MSVC_IDE)
# todo: change these as required
set(ARCH_C_FLAGS "/arch:AVX2")
set(ARCH_CXX_FLAGS "/arch:AVX2")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O2 /wd4244 /wd4267")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 /wd4244 /wd4267 /wd4800 -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS")
set(MSVC_WARNS "/wd4101 /wd4146 /wd4172 /wd4200 /wd4244 /wd4267 /wd4307 /wd4334 /wd4805 -D_CRT_SECURE_NO_WARNINGS")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O2 ${MSVC_WARNS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 ${MSVC_WARNS} /wd4800 -DBOOST_DETAIL_NO_CONTAINER_FWD")
endif()
string(REPLACE "/RTC1" "" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}")
string(REPLACE "/RTC1" "" CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG}")
@ -248,7 +255,13 @@ else()
endif()
if(CMAKE_COMPILER_IS_GNUCXX)
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fabi-version=0 -Wno-unused-local-typedefs -Wno-maybe-uninitialized")
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-maybe-uninitialized")
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.0)
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fabi-version=0")
endif ()
# don't complain about abi
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-abi")
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-abi")
endif()
if (NOT(ARCH_IA32 AND RELEASE_BUILD))
@ -256,11 +269,6 @@ else()
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fno-omit-frame-pointer")
endif()
if (RELEASE_BUILD)
# we don't need the noise of ABI warnings in a release build
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-abi")
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-abi")
endif ()
if (CMAKE_C_COMPILER_ID MATCHES "Intel")
set(SKYLAKE_FLAG "-xCORE-AVX512")
@ -396,18 +404,14 @@ if (CXX_MISSING_DECLARATIONS)
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wmissing-declarations")
endif()
CHECK_CXX_COMPILER_FLAG("-Wunused-local-typedefs" CXX_UNUSED_LOCAL_TYPEDEFS)
# gcc5 complains about this
CHECK_CXX_COMPILER_FLAG("-Wunused-variable" CXX_WUNUSED_VARIABLE)
endif()
if (NOT XCODE)
include_directories(SYSTEM ${Boost_INCLUDE_DIRS})
else()
# cmake doesn't think Xcode supports isystem
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -isystem ${Boost_INCLUDE_DIRS}")
endif()
include_directories(SYSTEM ${Boost_INCLUDE_DIRS})
if(CMAKE_SYSTEM_NAME MATCHES "Linux")
set(LINUX TRUE)
@ -419,10 +423,10 @@ endif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
if(NOT WIN32)
if(CMAKE_C_COMPILER_ID MATCHES "Intel")
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -diag-error 10006 -diag-disable 177 -diag-disable 2304 -diag-disable 2305 -diag-disable 2338 -diag-disable 1418 -diag-disable=remark")
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -diag-error 10006 -diag-disable 68 -diag-disable 177 -diag-disable 186 -diag-disable 2304 -diag-disable 2305 -diag-disable 2338 -diag-disable 1418 -diag-disable=remark")
endif()
if(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -diag-error 10006 -diag-disable 177 -diag-disable 2304 -diag-disable 2305 -diag-disable 2338 -diag-disable 1418 -diag-disable 1170 -diag-disable 3373 -diag-disable=remark")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -diag-error 10006 -diag-disable 68 -diag-disable 177 -diag-disable 186 -diag-disable 2304 -diag-disable 2305 -diag-disable 2338 -diag-disable 1418 -diag-disable 1170 -diag-disable 3373 -diag-disable=remark")
endif()
endif()
@ -513,6 +517,9 @@ set (hs_exec_SRCS
src/crc32.h
src/report.h
src/runtime.c
src/stream_compress.c
src/stream_compress.h
src/stream_compress_impl.h
src/fdr/fdr.c
src/fdr/fdr.h
src/fdr/fdr_internal.h
@ -629,6 +636,7 @@ set (hs_exec_SRCS
src/util/masked_move.h
src/util/multibit.h
src/util/multibit.c
src/util/multibit_compress.h
src/util/multibit_internal.h
src/util/pack_bits.h
src/util/popcount.h
@ -651,7 +659,7 @@ set (hs_exec_avx2_SRCS
)
SET (hs_SRCS
SET (hs_compile_SRCS
${hs_HEADERS}
src/crc32.h
src/database.h
@ -659,7 +667,6 @@ SET (hs_SRCS
src/grey.h
src/hs.cpp
src/hs_internal.h
src/hs_version.c
src/hs_version.h
src/scratch.h
src/state.h
@ -735,6 +742,7 @@ SET (hs_SRCS
src/nfa/nfa_build_util.h
src/nfa/nfa_internal.h
src/nfa/nfa_kind.h
src/nfa/rdfa.cpp
src/nfa/rdfa.h
src/nfa/rdfa_graph.cpp
src/nfa/rdfa_graph.h
@ -960,6 +968,7 @@ SET (hs_SRCS
src/rose/rose_build_merge.cpp
src/rose/rose_build_merge.h
src/rose/rose_build_misc.cpp
src/rose/rose_build_misc.h
src/rose/rose_build_program.cpp
src/rose/rose_build_program.h
src/rose/rose_build_resources.h
@ -996,9 +1005,13 @@ SET (hs_SRCS
src/util/dump_mask.h
src/util/fatbit_build.cpp
src/util/fatbit_build.h
src/util/flat_containers.h
src/util/graph.h
src/util/graph_range.h
src/util/graph_small_color_map.h
src/util/hash.h
src/util/hash_dynamic_bitset.h
src/util/insertion_ordered.h
src/util/math.h
src/util/multibit_build.cpp
src/util/multibit_build.h
@ -1016,7 +1029,6 @@ SET (hs_SRCS
src/util/small_vector.h
src/util/target_info.cpp
src/util/target_info.h
src/util/ue2_containers.h
src/util/ue2_graph.h
src/util/ue2string.cpp
src/util/ue2string.h
@ -1024,6 +1036,7 @@ SET (hs_SRCS
src/util/unicode_def.h
src/util/unicode_set.h
src/util/uniform_ops.h
src/util/unordered.h
src/util/verify_types.h
)
@ -1076,7 +1089,7 @@ set(hs_dump_SRCS
)
if (DUMP_SUPPORT)
set(hs_SRCS ${hs_SRCS} ${hs_dump_SRCS})
set(hs_compile_SRCS ${hs_compile_SRCS} ${hs_dump_SRCS})
endif()
# we group things by sublibraries, specifying shared and static and then
@ -1099,12 +1112,20 @@ if (NOT FAT_RUNTIME)
add_library(hs_runtime STATIC src/hs_version.c src/hs_valid_platform.c $<TARGET_OBJECTS:hs_exec>)
set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C)
add_library(hs STATIC ${hs_SRCS} src/hs_valid_platform.c $<TARGET_OBJECTS:hs_exec>)
add_library(hs_compile OBJECT ${hs_compile_SRCS})
add_library(hs STATIC
src/hs_version.c
src/hs_valid_platform.c
$<TARGET_OBJECTS:hs_exec>
$<TARGET_OBJECTS:hs_compile>)
endif (BUILD_STATIC_LIBS)
if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
add_library(hs_exec_shared OBJECT ${hs_exec_SRCS})
set_target_properties(hs_exec_shared PROPERTIES POSITION_INDEPENDENT_CODE TRUE)
add_library(hs_compile_shared OBJECT ${hs_compile_SRCS})
set_target_properties(hs_compile_shared PROPERTIES POSITION_INDEPENDENT_CODE TRUE)
endif()
else (FAT_RUNTIME)
@ -1158,10 +1179,11 @@ else (FAT_RUNTIME)
$<TARGET_OBJECTS:hs_exec_common>
${RUNTIME_LIBS})
set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C)
add_library(hs_compile OBJECT ${hs_compile_SRCS})
# we want the static lib for testing
add_library(hs STATIC src/hs_version.c src/hs_valid_platform.c
${hs_SRCS}
$<TARGET_OBJECTS:hs_compile>
$<TARGET_OBJECTS:hs_exec_common>
${RUNTIME_LIBS})
@ -1169,6 +1191,8 @@ else (FAT_RUNTIME)
if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
# build shared libs
add_library(hs_compile_shared OBJECT ${hs_compile_SRCS})
set_target_properties(hs_compile_shared PROPERTIES POSITION_INDEPENDENT_CODE TRUE)
add_library(hs_exec_shared_core2 OBJECT ${hs_exec_SRCS})
list(APPEND RUNTIME_SHLIBS $<TARGET_OBJECTS:hs_exec_shared_core2>)
set_target_properties(hs_exec_shared_core2 PROPERTIES
@ -1249,10 +1273,10 @@ endif()
if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
if (NOT FAT_RUNTIME)
add_library(hs_shared SHARED src/hs_version.c src/hs_valid_platform.c
${hs_SRCS} $<TARGET_OBJECTS:hs_exec_shared>)
$<TARGET_OBJECTS:hs_compile_shared> $<TARGET_OBJECTS:hs_exec_shared>)
else()
add_library(hs_shared SHARED src/hs_version.c src/hs_valid_platform.c
${hs_SRCS} $<TARGET_OBJECTS:hs_exec_common_shared>
$<TARGET_OBJECTS:hs_compile_shared> $<TARGET_OBJECTS:hs_exec_common_shared>
${RUNTIME_SHLIBS})
endif()

View File

@ -64,7 +64,7 @@ libpcre are supported. The use of unsupported constructs will result in
compilation errors.
The version of PCRE used to validate Hyperscan's interpretation of this syntax
is 8.40.
is 8.41.
====================
Supported Constructs

View File

@ -80,6 +80,42 @@ functions for the management of streams:
another, resetting the destination stream first. This call avoids the
allocation done by :c:func:`hs_copy_stream`.
==================
Stream Compression
==================
A stream object is allocated as a fixed size region of memory which has been
sized to ensure that no memory allocations are required during scan
operations. When the system is under memory pressure, it may be useful to reduce
the memory consumed by streams that are not expected to be used soon. The
Hyperscan API provides calls for translating a stream to and from a compressed
representation for this purpose. The compressed representation differs from the
full stream object as it does not reserve space for components which are not
required given the current stream state. The Hyperscan API functions for this
functionality are:
* :c:func:`hs_compress_stream`: fills the provided buffer with a compressed
representation of the stream and returns the number of bytes consumed by the
compressed representation. If the buffer is not large enough to hold the
compressed representation, :c:member:`HS_INSUFFICIENT_SPACE` is returned along
with the required size. This call does not modify the original stream in any
way: it may still be written to with :c:func:`hs_scan_stream`, used as part of
the various reset calls to reinitialise its state, or
:c:func:`hs_close_stream` may be called to free its resources.
* :c:func:`hs_expand_stream`: creates a new stream based on a buffer containing
a compressed representation.
* :c:func:`hs_reset_and_expand_stream`: constructs a stream based on a buffer
containing a compressed representation on top of an existing stream, resetting
the existing stream first. This call avoids the allocation done by
:c:func:`hs_expand_stream`.
Note: it is not recommended to use stream compression between every call to scan
for performance reasons as it takes time to convert between the compressed
representation and a standard stream.
**********
Block Mode
**********

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -165,6 +165,7 @@ static bool higher_is_better(Criterion c) {
}
static void print_criterion(Criterion c, double val) {
std::ios::fmtflags f(cout.flags());
switch (c) {
case CRITERION_THROUGHPUT:
cout << std::fixed << std::setprecision(3) << val << " Megabits/s";
@ -179,6 +180,7 @@ static void print_criterion(Criterion c, double val) {
cout << static_cast<size_t>(val) << " bytes";
break;
}
cout.flags(f);
}
// Key for identifying a stream in our pcap input data, using data from its IP
@ -596,11 +598,13 @@ double eval_set(Benchmark &bench, Sigdata &sigs, unsigned int mode,
size_t bytes = bench.bytes();
size_t matches = bench.matches();
if (diagnose) {
std::ios::fmtflags f(cout.flags());
cout << "Scan time " << std::fixed << std::setprecision(3) << scan_time
<< " sec, Scanned " << bytes * repeatCount << " bytes, Throughput "
<< std::fixed << std::setprecision(3)
<< (bytes * 8 * repeatCount) / (scan_time * 1000000)
<< " Mbps, Matches " << matches << endl;
cout.flags(f);
}
return (bytes * 8 * repeatCount) / (scan_time * 1000000);
}
@ -755,10 +759,12 @@ int main(int argc, char **argv) {
for (unsigned i = count; i < 16; i++) {
cout << " ";
}
std::ios::fmtflags out_f(cout.flags());
cout << "Performance: ";
print_criterion(criterion, best);
cout << " (" << std::fixed << std::setprecision(3) << (best / score_base)
<< "x) after cutting:" << endl;
cout.flags(out_f);
// s now has factor_max signatures
for (const auto &found : s) {

View File

@ -127,6 +127,16 @@ CREATE_DISPATCH(hs_error_t, hs_serialized_database_info, const char *bytes,
CREATE_DISPATCH(hs_error_t, hs_serialized_database_size, const char *bytes,
const size_t length, size_t *deserialized_size);
CREATE_DISPATCH(hs_error_t, hs_compress_stream, const hs_stream_t *stream,
char *buf, size_t buf_space, size_t *used_space);
CREATE_DISPATCH(hs_error_t, hs_expand_stream, const hs_database_t *db,
hs_stream_t **stream, const char *buf,size_t buf_size);
CREATE_DISPATCH(hs_error_t, hs_reset_and_expand_stream, hs_stream_t *to_stream,
const char *buf, size_t buf_size, hs_scratch_t *scratch,
match_event_handler onEvent, void *context);
/** INTERNALS **/
CREATE_DISPATCH(u32, Crc32c_ComputeBuf, u32 inCrc32, const void *buf, size_t bufLen);

View File

@ -32,6 +32,7 @@
#include "fdr_internal.h"
#include "fdr_loadval.h"
#include "flood_runtime.h"
#include "scratch.h"
#include "teddy.h"
#include "teddy_internal.h"
#include "util/arch.h"
@ -358,7 +359,7 @@ void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *control,
}
u64a confVal = unaligned_load_u64a(confLoc + byte - sizeof(u64a) + 1);
confWithBit(fdrc, a, ptr_main - a->buf + byte, control,
last_match_id, confVal);
last_match_id, confVal, conf, bit);
} while (unlikely(!!*conf));
}
@ -725,13 +726,17 @@ static never_inline
hwlm_error_t fdr_engine_exec(const struct FDR *fdr,
const struct FDR_Runtime_Args *a,
hwlm_group_t control) {
assert(ISALIGNED_CL(fdr));
u32 floodBackoff = FLOOD_BACKOFF_START;
u32 last_match_id = INVALID_MATCH_ID;
u32 domain_mask_flipped = ~fdr->domainMask;
u8 stride = fdr->stride;
const u64a *ft =
(const u64a *)((const u8 *)fdr + ROUNDUP_16(sizeof(struct FDR)));
const u32 *confBase = (const u32 *)((const u8 *)ft + fdr->tabSize);
(const u64a *)((const u8 *)fdr + ROUNDUP_CL(sizeof(struct FDR)));
assert(ISALIGNED_CL(ft));
const u32 *confBase = (const u32 *)((const u8 *)fdr + fdr->confOffset);
assert(ISALIGNED_CL(confBase));
struct zone zones[ZONE_MAX];
assert(fdr->domain > 8 && fdr->domain < 16);
@ -798,14 +803,14 @@ static const FDRFUNCTYPE funcs[] = {
fdr_engine_exec,
NULL, /* old: fast teddy */
NULL, /* old: fast teddy */
ONLY_AVX2(fdr_exec_teddy_avx2_msks1_fat),
ONLY_AVX2(fdr_exec_teddy_avx2_msks1_pck_fat),
ONLY_AVX2(fdr_exec_teddy_avx2_msks2_fat),
ONLY_AVX2(fdr_exec_teddy_avx2_msks2_pck_fat),
ONLY_AVX2(fdr_exec_teddy_avx2_msks3_fat),
ONLY_AVX2(fdr_exec_teddy_avx2_msks3_pck_fat),
ONLY_AVX2(fdr_exec_teddy_avx2_msks4_fat),
ONLY_AVX2(fdr_exec_teddy_avx2_msks4_pck_fat),
ONLY_AVX2(fdr_exec_fat_teddy_msks1),
ONLY_AVX2(fdr_exec_fat_teddy_msks1_pck),
ONLY_AVX2(fdr_exec_fat_teddy_msks2),
ONLY_AVX2(fdr_exec_fat_teddy_msks2_pck),
ONLY_AVX2(fdr_exec_fat_teddy_msks3),
ONLY_AVX2(fdr_exec_fat_teddy_msks3_pck),
ONLY_AVX2(fdr_exec_fat_teddy_msks4),
ONLY_AVX2(fdr_exec_fat_teddy_msks4_pck),
fdr_exec_teddy_msks1,
fdr_exec_teddy_msks1_pck,
fdr_exec_teddy_msks2,
@ -820,8 +825,8 @@ static const FDRFUNCTYPE funcs[] = {
static const u8 fake_history[FAKE_HISTORY_SIZE];
hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len,
size_t start, HWLMCallback cb, void *ctxt,
hwlm_group_t groups) {
size_t start, HWLMCallback cb,
struct hs_scratch *scratch, hwlm_group_t groups) {
// We guarantee (for safezone construction) that it is safe to read 16
// bytes before the end of the history buffer.
const u8 *hbuf = fake_history + FAKE_HISTORY_SIZE;
@ -833,7 +838,7 @@ hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len,
0,
start,
cb,
ctxt,
scratch,
nextFloodDetect(buf, len, FLOOD_BACKOFF_START),
0
};
@ -847,7 +852,8 @@ hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len,
hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf,
size_t hlen, const u8 *buf, size_t len,
size_t start, HWLMCallback cb, void *ctxt,
size_t start, HWLMCallback cb,
struct hs_scratch *scratch,
hwlm_group_t groups) {
struct FDR_Runtime_Args a = {
buf,
@ -856,7 +862,7 @@ hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf,
hlen,
start,
cb,
ctxt,
scratch,
nextFloodDetect(buf, len, FLOOD_BACKOFF_START),
/* we are guaranteed to always have 16 initialised bytes at the end of
* the history buffer (they may be garbage). */

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -42,6 +42,7 @@ extern "C" {
#endif
struct FDR;
struct hs_scratch;
/**
* \brief Block-mode scan.
@ -49,13 +50,13 @@ struct FDR;
* \param fdr FDR matcher engine.
* \param buf Buffer to scan.
* \param len Length of buffer to scan.
* \param start First offset in buf at which a match may end.
* \param start First offset in buf at which a match may start.
* \param cb Callback to call when a match is found.
* \param ctxt Caller-provided context pointer supplied to callback on match.
* \param scratch Scratch supplied to callback on match.
* \param groups Initial groups mask.
*/
hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len,
size_t start, HWLMCallback cb, void *ctxt,
size_t start, HWLMCallback cb, struct hs_scratch *scratch,
hwlm_group_t groups);
/**
@ -66,14 +67,15 @@ hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len,
* \param hlen Length of history buffer (hbuf).
* \param buf Buffer to scan.
* \param len Length of buffer to scan (buf).
* \param start First offset in buf at which a match may end.
* \param start First offset in buf at which a match may start.
* \param cb Callback to call when a match is found.
* \param ctxt Caller-provided context pointer supplied to callback on match.
* \param scratch Scratch supplied to callback on match.
* \param groups Initial groups mask.
*/
hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf,
size_t hlen, const u8 *buf, size_t len,
size_t start, HWLMCallback cb, void *ctxt,
size_t start, HWLMCallback cb,
struct hs_scratch *scratch,
hwlm_group_t groups);
#ifdef __cplusplus

View File

@ -42,7 +42,9 @@
#include "ue2common.h"
#include "hwlm/hwlm_build.h"
#include "util/compare.h"
#include "util/container.h"
#include "util/dump_mask.h"
#include "util/make_unique.h"
#include "util/math.h"
#include "util/noncopyable.h"
#include "util/target_info.h"
@ -50,6 +52,7 @@
#include "util/verify_types.h"
#include <algorithm>
#include <array>
#include <cassert>
#include <cctype>
#include <cstdio>
@ -61,6 +64,8 @@
#include <numeric>
#include <set>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include <boost/multi_array.hpp>
@ -81,7 +86,6 @@ private:
bool make_small;
u8 *tabIndexToMask(u32 indexInTable);
void assignStringsToBuckets();
#ifdef DEBUG
void dumpMasks(const u8 *defaultMask);
#endif
@ -90,10 +94,13 @@ private:
void createInitialState(FDR *fdr);
public:
FDRCompiler(vector<hwlmLiteral> lits_in, const FDREngineDescription &eng_in,
FDRCompiler(vector<hwlmLiteral> lits_in,
map<BucketIndex, std::vector<LiteralIndex>> bucketToLits_in,
const FDREngineDescription &eng_in,
bool make_small_in, const Grey &grey_in)
: eng(eng_in), grey(grey_in), tab(eng_in.getTabSizeBytes()),
lits(move(lits_in)), make_small(make_small_in) {}
lits(move(lits_in)), bucketToLits(move(bucketToLits_in)),
make_small(make_small_in) {}
bytecode_ptr<FDR> build();
};
@ -144,61 +151,139 @@ void FDRCompiler::createInitialState(FDR *fdr) {
}
}
/**
* \brief Lay out FDR structures in bytecode.
*
* Note that each major structure (header, table, confirm, flood control) is
* cacheline-aligned.
*/
bytecode_ptr<FDR> FDRCompiler::setupFDR() {
auto floodTable = setupFDRFloodControl(lits, eng, grey);
auto confirmTable = setupFullConfs(lits, eng, bucketToLits, make_small);
size_t headerSize = sizeof(FDR);
size_t tabSize = eng.getTabSizeBytes();
auto floodControlTmp = setupFDRFloodControl(lits, eng, grey);
auto confirmTmp = setupFullConfs(lits, eng, bucketToLits, make_small);
assert(ISALIGNED_16(tabSize));
assert(ISALIGNED_16(confirmTmp.size()));
assert(ISALIGNED_16(floodControlTmp.size()));
size_t headerSize = ROUNDUP_16(sizeof(FDR));
size_t size = ROUNDUP_16(headerSize + tabSize + confirmTmp.size() +
floodControlTmp.size());
// Note: we place each major structure here on a cacheline boundary.
size_t size = ROUNDUP_CL(headerSize) + ROUNDUP_CL(tabSize) +
ROUNDUP_CL(confirmTable.size()) + floodTable.size();
DEBUG_PRINTF("sizes base=%zu tabSize=%zu confirm=%zu floodControl=%zu "
"total=%zu\n",
headerSize, tabSize, confirmTmp.size(), floodControlTmp.size(),
headerSize, tabSize, confirmTable.size(), floodTable.size(),
size);
auto fdr = make_zeroed_bytecode_ptr<FDR>(size, 64);
assert(fdr); // otherwise would have thrown std::bad_alloc
u8 *fdr_base = (u8 *)fdr.get();
// Write header.
fdr->size = size;
fdr->engineID = eng.getID();
fdr->maxStringLen = verify_u32(maxLen(lits));
createInitialState(fdr.get());
u8 *fdr_base = (u8 *)fdr.get();
u8 *ptr = fdr_base + ROUNDUP_16(sizeof(FDR));
copy(tab.begin(), tab.end(), ptr);
ptr += tabSize;
memcpy(ptr, confirmTmp.get(), confirmTmp.size());
ptr += confirmTmp.size();
fdr->floodOffset = verify_u32(ptr - fdr_base);
memcpy(ptr, floodControlTmp.get(), floodControlTmp.size());
ptr += floodControlTmp.size();
/* we are allowing domains 9 to 15 only */
assert(eng.bits > 8 && eng.bits < 16);
fdr->numStrings = verify_u32(lits.size());
assert(eng.bits > 8 && eng.bits < 16); // we allow domains 9 to 15 only
fdr->domain = eng.bits;
fdr->domainMask = (1 << eng.bits) - 1;
fdr->tabSize = (1 << eng.bits) * (eng.schemeWidth / 8);
fdr->tabSize = tabSize;
fdr->stride = eng.stride;
createInitialState(fdr.get());
// Write table.
u8 *ptr = fdr_base + ROUNDUP_CL(sizeof(FDR));
assert(ISALIGNED_CL(ptr));
copy(tab.begin(), tab.end(), ptr);
ptr += ROUNDUP_CL(tabSize);
// Write confirm structures.
assert(ISALIGNED_CL(ptr));
fdr->confOffset = verify_u32(ptr - fdr_base);
memcpy(ptr, confirmTable.get(), confirmTable.size());
ptr += ROUNDUP_CL(confirmTable.size());
// Write flood control structures.
assert(ISALIGNED_CL(ptr));
fdr->floodOffset = verify_u32(ptr - fdr_base);
memcpy(ptr, floodTable.get(), floodTable.size());
ptr += floodTable.size(); // last write, no need to round up
return fdr;
}
//#define DEBUG_ASSIGNMENT
static
double getScoreUtil(u32 len, u32 count) {
return len == 0 ? numeric_limits<double>::max()
: our_pow(count, 1.05) * our_pow(len, -3.0);
}
/**
* Utility class for computing:
*
* score(count, len) = pow(count, 1.05) * pow(len, -3)
*
* Calling pow() is expensive. This is mitigated by using pre-computed LUTs for
* small inputs and a cache for larger ones.
*/
class Scorer {
unordered_map<u32, double> count_factor_cache;
// LUT: pow(count, 1.05) for small values of count.
static const array<double, 100> count_lut;
double count_factor(u32 count) {
if (count < count_lut.size()) {
return count_lut[count];
}
auto it = count_factor_cache.find(count);
if (it != count_factor_cache.end()) {
return it->second;
}
double r = our_pow(count, 1.05);
count_factor_cache.emplace(count, r);
return r;
}
// LUT: pow(len, -3) for len in range [0,8].
static const array<double, 9> len_lut;
double len_factor(u32 len) {
assert(len <= len_lut.size());
return len_lut[len];
}
public:
double operator()(u32 len, u32 count) {
if (len == 0) {
return numeric_limits<double>::max();
}
return count_factor(count) * len_factor(len);
}
};
const array<double, 100> Scorer::count_lut{{
pow(0, 1.05), pow(1, 1.05), pow(2, 1.05), pow(3, 1.05), pow(4, 1.05),
pow(5, 1.05), pow(6, 1.05), pow(7, 1.05), pow(8, 1.05), pow(9, 1.05),
pow(10, 1.05), pow(11, 1.05), pow(12, 1.05), pow(13, 1.05), pow(14, 1.05),
pow(15, 1.05), pow(16, 1.05), pow(17, 1.05), pow(18, 1.05), pow(19, 1.05),
pow(20, 1.05), pow(21, 1.05), pow(22, 1.05), pow(23, 1.05), pow(24, 1.05),
pow(25, 1.05), pow(26, 1.05), pow(27, 1.05), pow(28, 1.05), pow(29, 1.05),
pow(30, 1.05), pow(31, 1.05), pow(32, 1.05), pow(33, 1.05), pow(34, 1.05),
pow(35, 1.05), pow(36, 1.05), pow(37, 1.05), pow(38, 1.05), pow(39, 1.05),
pow(40, 1.05), pow(41, 1.05), pow(42, 1.05), pow(43, 1.05), pow(44, 1.05),
pow(45, 1.05), pow(46, 1.05), pow(47, 1.05), pow(48, 1.05), pow(49, 1.05),
pow(50, 1.05), pow(51, 1.05), pow(52, 1.05), pow(53, 1.05), pow(54, 1.05),
pow(55, 1.05), pow(56, 1.05), pow(57, 1.05), pow(58, 1.05), pow(59, 1.05),
pow(60, 1.05), pow(61, 1.05), pow(62, 1.05), pow(63, 1.05), pow(64, 1.05),
pow(65, 1.05), pow(66, 1.05), pow(67, 1.05), pow(68, 1.05), pow(69, 1.05),
pow(70, 1.05), pow(71, 1.05), pow(72, 1.05), pow(73, 1.05), pow(74, 1.05),
pow(75, 1.05), pow(76, 1.05), pow(77, 1.05), pow(78, 1.05), pow(79, 1.05),
pow(80, 1.05), pow(81, 1.05), pow(82, 1.05), pow(83, 1.05), pow(84, 1.05),
pow(85, 1.05), pow(86, 1.05), pow(87, 1.05), pow(88, 1.05), pow(89, 1.05),
pow(90, 1.05), pow(91, 1.05), pow(92, 1.05), pow(93, 1.05), pow(94, 1.05),
pow(95, 1.05), pow(96, 1.05), pow(97, 1.05), pow(98, 1.05), pow(99, 1.05),
}};
const array<double, 9> Scorer::len_lut{{
pow(0, -3.0), pow(1, -3.0), pow(2, -3.0), pow(3, -3.0), pow(4, -3.0),
pow(5, -3.0), pow(6, -3.0), pow(7, -3.0), pow(8, -3.0)}};
/**
* Returns true if the two given literals should be placed in the same chunk as
@ -297,7 +382,10 @@ next_literal:
return chunks;
}
void FDRCompiler::assignStringsToBuckets() {
static
map<BucketIndex, vector<LiteralIndex>> assignStringsToBuckets(
vector<hwlmLiteral> &lits,
const FDREngineDescription &eng) {
const double MAX_SCORE = numeric_limits<double>::max();
assert(!lits.empty()); // Shouldn't be called with no literals.
@ -340,12 +428,14 @@ void FDRCompiler::assignStringsToBuckets() {
boost::multi_array<pair<double, u32>, 2> t(
boost::extents[numChunks][numBuckets]);
Scorer scorer;
for (u32 j = 0; j < numChunks; j++) {
u32 cnt = 0;
for (u32 k = j; k < numChunks; ++k) {
cnt += chunks[k].count;
}
t[j][0] = {getScoreUtil(chunks[j].length, cnt), 0};
t[j][0] = {scorer(chunks[j].length, cnt), 0};
}
for (u32 i = 1; i < numBuckets; i++) {
@ -353,7 +443,7 @@ void FDRCompiler::assignStringsToBuckets() {
pair<double, u32> best = {MAX_SCORE, 0};
u32 cnt = chunks[j].count;
for (u32 k = j + 1; k < numChunks - 1; k++) {
auto score = getScoreUtil(chunks[j].length, cnt);
auto score = scorer(chunks[j].length, cnt);
if (score > best.first) {
break; // now worse locally than our best score, give up
}
@ -381,6 +471,7 @@ void FDRCompiler::assignStringsToBuckets() {
// our best score is in t[0][N_BUCKETS-1] and we can follow the links
// to find where our buckets should start and what goes into them
vector<vector<LiteralIndex>> buckets;
for (u32 i = 0, n = numBuckets; n && (i != numChunks - 1); n--) {
u32 j = t[i][n - 1].second;
if (j == 0) {
@ -391,21 +482,33 @@ void FDRCompiler::assignStringsToBuckets() {
u32 first_id = chunks[i].first_id;
u32 last_id = chunks[j].first_id;
assert(first_id < last_id);
u32 bucket = numBuckets - n;
UNUSED const auto &first_lit = lits[first_id];
UNUSED const auto &last_lit = lits[last_id - 1];
DEBUG_PRINTF("placing [%u-%u) in bucket %u (%u lits, len %zu-%zu, "
"score %0.4f)\n",
first_id, last_id, bucket, last_id - first_id,
first_lit.s.length(), last_lit.s.length(),
getScoreUtil(first_lit.s.length(), last_id - first_id));
DEBUG_PRINTF("placing [%u-%u) in one bucket (%u lits, len %zu-%zu, "
"score %0.4f)\n",
first_id, last_id, last_id - first_id,
first_lit.s.length(), last_lit.s.length(),
scorer(first_lit.s.length(), last_id - first_id));
auto &bucket_lits = bucketToLits[bucket];
for (u32 k = first_id; k < last_id; k++) {
bucket_lits.push_back(k);
vector<LiteralIndex> litIds;
u32 cnt = last_id - first_id;
// long literals first for included literals checking
for (u32 k = 0; k < cnt; k++) {
litIds.push_back(last_id - k - 1);
}
i = j;
buckets.push_back(litIds);
}
// reverse bucket id, longer literals come first
map<BucketIndex, vector<LiteralIndex>> bucketToLits;
size_t bucketCnt = buckets.size();
for (size_t i = 0; i < bucketCnt; i++) {
bucketToLits.emplace(bucketCnt - i - 1, move(buckets[i]));
}
return bucketToLits;
}
#ifdef DEBUG
@ -426,7 +529,7 @@ bool getMultiEntriesAtPosition(const FDREngineDescription &eng,
const vector<LiteralIndex> &vl,
const vector<hwlmLiteral> &lits,
SuffixPositionInString pos,
std::map<u32, ue2::unordered_set<u32> > &m2) {
map<u32, unordered_set<u32>> &m2) {
assert(eng.bits < 32);
u32 distance = 0;
@ -497,7 +600,7 @@ void FDRCompiler::setupTab() {
SuffixPositionInString pLimit = eng.getBucketWidth(b);
for (SuffixPositionInString pos = 0; pos < pLimit; pos++) {
u32 bit = eng.getSchemeBit(b, pos);
map<u32, ue2::unordered_set<u32>> m2;
map<u32, unordered_set<u32>> m2;
bool done = getMultiEntriesAtPosition(eng, vl, lits, pos, m2);
if (done) {
clearbit(&defaultMask[0], bit);
@ -505,7 +608,7 @@ void FDRCompiler::setupTab() {
}
for (const auto &elem : m2) {
u32 dc = elem.first;
const ue2::unordered_set<u32> &mskSet = elem.second;
const unordered_set<u32> &mskSet = elem.second;
u32 v = ~dc;
do {
u32 b2 = v & dc;
@ -529,24 +632,222 @@ void FDRCompiler::setupTab() {
}
bytecode_ptr<FDR> FDRCompiler::build() {
assignStringsToBuckets();
setupTab();
return setupFDR();
}
static
bool isSuffix(const hwlmLiteral &lit1, const hwlmLiteral &lit2) {
const auto &s1 = lit1.s;
const auto &s2 = lit2.s;
size_t len1 = s1.length();
size_t len2 = s2.length();
assert(len1 >= len2);
if (lit1.nocase || lit2.nocase) {
return equal(s2.begin(), s2.end(), s1.begin() + len1 - len2,
[](char a, char b) { return mytoupper(a) == mytoupper(b); });
} else {
return equal(s2.begin(), s2.end(), s1.begin() + len1 - len2);
}
}
/*
* if lit2 is a suffix of lit1 but the case sensitivity, groups or mask info
* of lit2 is a subset of lit1, then lit1 can't squash lit2 and lit2 can
* possibly match when lit1 matches. In this case, we can't do bucket
* squashing. e.g. AAA(no case) in bucket 0, AA(no case) and aa in bucket 1,
* we can't squash bucket 1 if we have input like "aaa" as aa can also match.
*/
static
bool includedCheck(const hwlmLiteral &lit1, const hwlmLiteral &lit2) {
/* lit1 is caseless and lit2 is case sensitive */
if ((lit1.nocase && !lit2.nocase)) {
return true;
}
/* lit2's group is a subset of lit1 */
if (lit1.groups != lit2.groups &&
(lit2.groups == (lit1.groups & lit2.groups))) {
return true;
}
/* TODO: narrow down cases for mask check */
if (lit1.cmp != lit2.cmp || lit1.msk != lit2.msk) {
return true;
}
return false;
}
/*
* if lit2 is an included literal of both lit0 and lit1, then lit0 and lit1
* shouldn't match at the same offset, otherwise we give up squashing for lit1.
* e.g. lit0:AAA(no case), lit1:aa, lit2:A(no case). We can have duplicate
* matches for input "aaa" if lit0 and lit1 both squash lit2.
*/
static
bool checkParentLit(
const vector<hwlmLiteral> &lits, u32 pos1,
const unordered_set<u32> &parent_map,
const unordered_map<u32, unordered_set<u32>> &exception_map) {
assert(pos1 < lits.size());
const auto &lit1 = lits[pos1];
for (const auto pos2 : parent_map) {
if (contains(exception_map, pos2)) {
const auto &exception_pos = exception_map.at(pos2);
if (contains(exception_pos, pos1)) {
return false;
}
}
/* if lit1 isn't an exception of lit2, then we have to do further
* exclusive check.
* TODO: More mask checks. Note if two literals are group exclusive,
* it is possible that they match at the same offset. */
assert(pos2 < lits.size());
const auto &lit2 = lits[pos2];
if (isSuffix(lit2, lit1)) {
return false;
}
}
return true;
}
static
void buildSquashMask(vector<hwlmLiteral> &lits, u32 id1, u32 bucket1,
size_t start, const vector<pair<u32, u32>> &group,
unordered_map<u32, unordered_set<u32>> &parent_map,
unordered_map<u32, unordered_set<u32>> &exception_map) {
auto &lit1 = lits[id1];
DEBUG_PRINTF("b:%u len:%zu\n", bucket1, lit1.s.length());
size_t cnt = group.size();
bool included = false;
bool exception = false;
u32 child_id = ~0U;
for (size_t i = start; i < cnt; i++) {
u32 bucket2 = group[i].first;
assert(bucket2 >= bucket1);
u32 id2 = group[i].second;
auto &lit2 = lits[id2];
// check if lit2 is a suffix of lit1
if (isSuffix(lit1, lit2)) {
/* if we have a included literal in the same bucket,
* quit and let the included literal to do possible squashing */
if (bucket1 == bucket2) {
DEBUG_PRINTF("same bucket\n");
return;
}
/* if lit2 is a suffix but doesn't pass included checks for
* extra info, we give up sqaushing */
if (includedCheck(lit1, lit2)) {
DEBUG_PRINTF("find exceptional suffix %u\n", lit2.id);
exception_map[id1].insert(id2);
exception = true;
} else if (checkParentLit(lits, id1, parent_map[id2],
exception_map)) {
if (lit1.included_id == INVALID_LIT_ID) {
DEBUG_PRINTF("find suffix lit1 %u lit2 %u\n",
lit1.id, lit2.id);
lit1.included_id = lit2.id;
} else {
/* if we have multiple included literals in one bucket,
* give up squashing. */
DEBUG_PRINTF("multiple included literals\n");
lit1.included_id = INVALID_LIT_ID;
return;
}
child_id = id2;
included = true;
}
}
size_t next = i + 1;
u32 nextBucket = next < cnt ? group[next].first : ~0U;
if (bucket2 != nextBucket) {
if (included) {
if (exception) {
/* give up if we have exception literals
* in the same bucket as the included literal. */
lit1.included_id = INVALID_LIT_ID;
} else {
parent_map[child_id].insert(id1);
lit1.squash |= 1U << bucket2;
DEBUG_PRINTF("build squash mask %2x for %u\n",
lit1.squash, lit1.id);
}
return;
}
exception = false;
}
}
}
static constexpr u32 INCLUDED_LIMIT = 1000;
static
void findIncludedLits(vector<hwlmLiteral> &lits,
const vector<vector<pair<u32, u32>>> &lastCharMap) {
/* Map for finding the positions of literal which includes a literal
* in FDR hwlm literal vector. */
unordered_map<u32, unordered_set<u32>> parent_map;
/* Map for finding the positions of exception literals which could
* sometimes match if a literal matches in FDR hwlm literal vector. */
unordered_map<u32, unordered_set<u32>> exception_map;
for (const auto &group : lastCharMap) {
size_t cnt = group.size();
if (cnt > INCLUDED_LIMIT) {
continue;
}
for (size_t i = 0; i < cnt; i++) {
u32 bucket1 = group[i].first;
u32 id1 = group[i].second;
buildSquashMask(lits, id1, bucket1, i + 1, group, parent_map,
exception_map);
}
}
}
static
void addIncludedInfo(
vector<hwlmLiteral> &lits, u32 nBuckets,
map<BucketIndex, vector<LiteralIndex>> &bucketToLits) {
vector<vector<pair<u32, u32>>> lastCharMap(256);
for (BucketIndex b = 0; b < nBuckets; b++) {
if (!bucketToLits[b].empty()) {
for (const LiteralIndex &lit_idx : bucketToLits[b]) {
const auto &lit = lits[lit_idx];
u8 c = mytoupper(lit.s.back());
lastCharMap[c].emplace_back(b, lit_idx);
}
}
}
findIncludedLits(lits, lastCharMap);
}
} // namespace
static
bytecode_ptr<FDR> fdrBuildTableInternal(const vector<hwlmLiteral> &lits,
bool make_small, const target_t &target,
const Grey &grey, u32 hint) {
unique_ptr<HWLMProto> fdrBuildProtoInternal(u8 engType,
vector<hwlmLiteral> &lits,
bool make_small,
const target_t &target,
const Grey &grey, u32 hint) {
DEBUG_PRINTF("cpu has %s\n", target.has_avx2() ? "avx2" : "no-avx2");
if (grey.fdrAllowTeddy) {
auto fdr = teddyBuildTableHinted(lits, make_small, hint, target, grey);
if (fdr) {
auto proto = teddyBuildProtoHinted(engType, lits, make_small, hint,
target);
if (proto) {
DEBUG_PRINTF("build with teddy succeeded\n");
return fdr;
return proto;
} else {
DEBUG_PRINTF("build with teddy failed, will try with FDR\n");
}
@ -564,23 +865,47 @@ bytecode_ptr<FDR> fdrBuildTableInternal(const vector<hwlmLiteral> &lits,
des->stride = 1;
}
FDRCompiler fc(lits, *des, make_small, grey);
auto bucketToLits = assignStringsToBuckets(lits, *des);
addIncludedInfo(lits, des->getNumBuckets(), bucketToLits);
auto proto =
ue2::make_unique<HWLMProto>(engType, move(des), lits, bucketToLits,
make_small);
return proto;
}
unique_ptr<HWLMProto> fdrBuildProto(u8 engType, vector<hwlmLiteral> lits,
bool make_small, const target_t &target,
const Grey &grey) {
return fdrBuildProtoInternal(engType, lits, make_small, target, grey,
HINT_INVALID);
}
static
bytecode_ptr<FDR> fdrBuildTableInternal(const HWLMProto &proto,
const Grey &grey) {
if (proto.teddyEng) {
return teddyBuildTable(proto, grey);
}
FDRCompiler fc(proto.lits, proto.bucketToLits, *(proto.fdrEng),
proto.make_small, grey);
return fc.build();
}
bytecode_ptr<FDR> fdrBuildTable(const vector<hwlmLiteral> &lits,
bool make_small, const target_t &target,
const Grey &grey) {
return fdrBuildTableInternal(lits, make_small, target, grey, HINT_INVALID);
bytecode_ptr<FDR> fdrBuildTable(const HWLMProto &proto, const Grey &grey) {
return fdrBuildTableInternal(proto, grey);
}
#if !defined(RELEASE_BUILD)
bytecode_ptr<FDR> fdrBuildTableHinted(const vector<hwlmLiteral> &lits,
bool make_small, u32 hint,
const target_t &target,
const Grey &grey) {
return fdrBuildTableInternal(lits, make_small, target, grey, hint);
unique_ptr<HWLMProto> fdrBuildProtoHinted(u8 engType,
vector<hwlmLiteral> lits,
bool make_small, u32 hint,
const target_t &target,
const Grey &grey) {
return fdrBuildProtoInternal(engType, lits, make_small, target, grey,
hint);
}
#endif

View File

@ -34,6 +34,7 @@
#define FDR_COMPILE_H
#include "ue2common.h"
#include "hwlm/hwlm_build.h"
#include "util/bytecode_ptr.h"
#include <vector>
@ -46,18 +47,23 @@ struct hwlmLiteral;
struct Grey;
struct target_t;
bytecode_ptr<FDR> fdrBuildTable(const std::vector<hwlmLiteral> &lits,
bool make_small, const target_t &target,
const Grey &grey);
bytecode_ptr<FDR> fdrBuildTable(const HWLMProto &proto, const Grey &grey);
#if !defined(RELEASE_BUILD)
bytecode_ptr<FDR> fdrBuildTableHinted(const std::vector<hwlmLiteral> &lits,
bool make_small, u32 hint,
const target_t &target, const Grey &grey);
std::unique_ptr<HWLMProto> fdrBuildProtoHinted(
u8 engType,
std::vector<hwlmLiteral> lits,
bool make_small, u32 hint,
const target_t &target,
const Grey &grey);
#endif
std::unique_ptr<HWLMProto> fdrBuildProto(
u8 engType,
std::vector<hwlmLiteral> lits,
bool make_small, const target_t &target,
const Grey &grey);
/** \brief Returns size in bytes of the given FDR engine. */
size_t fdrSize(const struct FDR *fdr);

View File

@ -57,10 +57,11 @@ class FDREngineDescription;
struct hwlmStreamingControl;
struct Grey;
bytecode_ptr<u8> setupFullConfs(const std::vector<hwlmLiteral> &lits,
const EngineDescription &eng,
std::map<BucketIndex, std::vector<LiteralIndex>> &bucketToLits,
bool make_small);
bytecode_ptr<u8> setupFullConfs(
const std::vector<hwlmLiteral> &lits,
const EngineDescription &eng,
const std::map<BucketIndex, std::vector<LiteralIndex>> &bucketToLits,
bool make_small);
// all suffixes include an implicit max_bucket_width suffix to ensure that
// we always read a full-scale flood "behind" us in terms of what's in our

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -42,12 +42,11 @@ u32 mul_hash_64(u64a lv, u64a andmsk, u64a mult, u32 nBits) {
#define CONF_TYPE u64a
#define CONF_HASH_CALL mul_hash_64
typedef enum LitInfoFlags {
NoFlags = 0,
Caseless = 1,
NoRepeat = 2,
ComplexConfirm = 4
} LitInfoFlags;
/**
* \brief Flag indicating this literal doesn't need to be delivered more than
* once, used in LitInfo::flags.
*/
#define FDR_LIT_FLAG_NOREPEAT 1
/**
* \brief Structure describing a literal, linked to by FDRConfirm.
@ -61,12 +60,12 @@ struct LitInfo {
hwlm_group_t groups;
u32 id; // literal ID as passed in
u8 size;
u8 flags; /* LitInfoFlags */
u8 flags; //!< bitfield of flags from FDR_LIT_FLAG_* above.
u8 next;
u8 extended_size;
};
#define FDRC_FLAG_NO_CONFIRM 1
#define FDRC_FLAG_NOREPEAT 2
/**
* \brief FDR confirm header.
@ -79,12 +78,8 @@ struct LitInfo {
struct FDRConfirm {
CONF_TYPE andmsk;
CONF_TYPE mult;
u32 nBitsOrSoleID; // if flags is NO_CONFIRM then this is soleID
u32 flags; // sole meaning is 'non-zero means no-confirm' (that is all)
u32 nBits;
hwlm_group_t groups;
u32 soleLitSize;
u32 soleLitCmp;
u32 soleLitMsk;
};
static really_inline

View File

@ -35,6 +35,7 @@
#include "util/alloc.h"
#include "util/bitutils.h"
#include "util/compare.h"
#include "util/container.h"
#include "util/verify_types.h"
#include <algorithm>
@ -47,19 +48,6 @@ namespace ue2 {
using BC2CONF = map<BucketIndex, bytecode_ptr<FDRConfirm>>;
// return the number of bytes beyond a length threshold in all strings in lits
static
size_t thresholdedSize(const vector<hwlmLiteral> &lits, size_t threshold) {
size_t tot = 0;
for (const auto &lit : lits) {
size_t sz = lit.s.size();
if (sz > threshold) {
tot += ROUNDUP_N(sz - threshold, 8);
}
}
return tot;
}
static
u64a make_u64a_mask(const vector<u8> &v) {
assert(v.size() <= sizeof(u64a));
@ -92,19 +80,12 @@ void fillLitInfo(const vector<hwlmLiteral> &lits, vector<LitInfo> &tmpLitInfo,
LitInfo &info = tmpLitInfo[i];
memset(&info, 0, sizeof(info));
info.id = lit.id;
u8 flags = NoFlags;
if (lit.nocase) {
flags |= Caseless;
}
u8 flags = 0;
if (lit.noruns) {
flags |= NoRepeat;
}
if (lit.msk.size() > lit.s.size()) {
flags |= ComplexConfirm;
info.extended_size = verify_u8(lit.msk.size());
flags |= FDR_LIT_FLAG_NOREPEAT;
}
info.flags = flags;
info.size = verify_u8(lit.s.size());
info.size = verify_u8(max(lit.msk.size(), lit.s.size()));
info.groups = lit.groups;
// these are built up assuming a LE machine
@ -149,7 +130,12 @@ void fillLitInfo(const vector<hwlmLiteral> &lits, vector<LitInfo> &tmpLitInfo,
static
bytecode_ptr<FDRConfirm> getFDRConfirm(const vector<hwlmLiteral> &lits,
bool make_small, bool make_confirm) {
bool make_small) {
// Every literal must fit within CONF_TYPE.
assert(all_of_in(lits, [](const hwlmLiteral &lit) {
return lit.s.size() <= sizeof(CONF_TYPE);
}));
vector<LitInfo> tmpLitInfo(lits.size());
CONF_TYPE andmsk;
fillLitInfo(lits, tmpLitInfo, andmsk);
@ -167,40 +153,6 @@ bytecode_ptr<FDRConfirm> getFDRConfirm(const vector<hwlmLiteral> &lits,
}
CONF_TYPE mult = (CONF_TYPE)0x0b4e0ef37bc32127ULL;
u32 flags = 0;
// we use next three variables for 'confirmless' case to speed-up
// confirmation process
u32 soleLitSize = 0;
u32 soleLitCmp = 0;
u32 soleLitMsk = 0;
if (!make_confirm) {
flags = FDRC_FLAG_NO_CONFIRM;
if (lits[0].noruns) {
flags |= NoRepeat; // messy - need to clean this up later as flags is sorta kinda obsoleted
}
mult = 0;
soleLitSize = lits[0].s.size() - 1;
// we can get to this point only in confirmless case;
// it means that we have only one literal per FDRConfirm (no packing),
// with no literal mask and size of literal is less or equal
// to the number of masks of Teddy engine;
// maximum number of masks for Teddy is 4, so the size of
// literal is definitely less or equal to size of u32
assert(lits[0].s.size() <= sizeof(u32));
for (u32 i = 0; i < lits[0].s.size(); i++) {
u32 shiftLoc = (sizeof(u32) - i - 1) * 8;
u8 c = lits[0].s[lits[0].s.size() - i - 1];
if (lits[0].nocase && ourisalpha(c)) {
soleLitCmp |= (u32)(c & CASE_CLEAR) << shiftLoc;
soleLitMsk |= (u32)CASE_CLEAR << shiftLoc;
}
else {
soleLitCmp |= (u32)c << shiftLoc;
soleLitMsk |= (u32)0xff << shiftLoc;
}
}
}
// we can walk the vector and assign elements from the vectors to a
// map by hash value
@ -276,12 +228,11 @@ bytecode_ptr<FDRConfirm> getFDRConfirm(const vector<hwlmLiteral> &lits,
#endif
const size_t bitsToLitIndexSize = (1U << nBits) * sizeof(u32);
const size_t totalLitSize = thresholdedSize(lits, sizeof(CONF_TYPE));
// this size can now be a worst-case as we can always be a bit smaller
size_t size = ROUNDUP_N(sizeof(FDRConfirm), alignof(u32)) +
ROUNDUP_N(bitsToLitIndexSize, alignof(LitInfo)) +
sizeof(LitInfo) * lits.size() + totalLitSize;
sizeof(LitInfo) * lits.size();
size = ROUNDUP_N(size, alignof(FDRConfirm));
auto fdrc = make_zeroed_bytecode_ptr<FDRConfirm>(size);
@ -289,11 +240,7 @@ bytecode_ptr<FDRConfirm> getFDRConfirm(const vector<hwlmLiteral> &lits,
fdrc->andmsk = andmsk;
fdrc->mult = mult;
fdrc->nBitsOrSoleID = (flags & FDRC_FLAG_NO_CONFIRM) ? lits[0].id : nBits;
fdrc->flags = flags;
fdrc->soleLitSize = soleLitSize;
fdrc->soleLitCmp = soleLitCmp;
fdrc->soleLitMsk = soleLitMsk;
fdrc->nBits = nBits;
fdrc->groups = gm;
@ -345,40 +292,37 @@ bytecode_ptr<FDRConfirm> getFDRConfirm(const vector<hwlmLiteral> &lits,
bytecode_ptr<u8>
setupFullConfs(const vector<hwlmLiteral> &lits,
const EngineDescription &eng,
map<BucketIndex, vector<LiteralIndex>> &bucketToLits,
const map<BucketIndex, vector<LiteralIndex>> &bucketToLits,
bool make_small) {
bool makeConfirm = true;
unique_ptr<TeddyEngineDescription> teddyDescr =
getTeddyDescription(eng.getID());
if (teddyDescr) {
makeConfirm = teddyDescr->needConfirm(lits);
}
BC2CONF bc2Conf;
u32 totalConfirmSize = 0;
for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {
if (!bucketToLits[b].empty()) {
if (contains(bucketToLits, b)) {
vector<hwlmLiteral> vl;
for (const LiteralIndex &lit_idx : bucketToLits[b]) {
for (const LiteralIndex &lit_idx : bucketToLits.at(b)) {
vl.push_back(lits[lit_idx]);
}
DEBUG_PRINTF("b %d sz %zu\n", b, vl.size());
auto fc = getFDRConfirm(vl, make_small, makeConfirm);
auto fc = getFDRConfirm(vl, make_small);
totalConfirmSize += fc.size();
bc2Conf.emplace(b, move(fc));
}
}
u32 nBuckets = eng.getNumBuckets();
u32 totalConfSwitchSize = nBuckets * sizeof(u32);
u32 totalSize = ROUNDUP_16(totalConfSwitchSize + totalConfirmSize);
u32 totalConfSwitchSize = ROUNDUP_CL(nBuckets * sizeof(u32));
u32 totalSize = totalConfSwitchSize + totalConfirmSize;
auto buf = make_zeroed_bytecode_ptr<u8>(totalSize, 16);
auto buf = make_zeroed_bytecode_ptr<u8>(totalSize, 64);
assert(buf); // otherwise would have thrown std::bad_alloc
u32 *confBase = (u32 *)buf.get();
u8 *ptr = buf.get() + totalConfSwitchSize;
assert(ISALIGNED_CL(ptr));
for (const auto &m : bc2Conf) {
const BucketIndex &idx = m.first;

View File

@ -29,6 +29,7 @@
#ifndef FDR_CONFIRM_RUNTIME_H
#define FDR_CONFIRM_RUNTIME_H
#include "scratch.h"
#include "fdr_internal.h"
#include "fdr_loadval.h"
#include "hwlm/hwlm.h"
@ -41,13 +42,14 @@
static really_inline
void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a,
size_t i, hwlmcb_rv_t *control, u32 *last_match,
u64a conf_key) {
u64a conf_key, u64a *conf, u8 bit) {
assert(i < a->len);
assert(i >= a->start_offset);
assert(ISALIGNED(fdrc));
const u8 * buf = a->buf;
u32 c = CONF_HASH_CALL(conf_key, fdrc->andmsk, fdrc->mult,
fdrc->nBitsOrSoleID);
fdrc->nBits);
u32 start = getConfirmLitIndex(fdrc)[c];
if (likely(!start)) {
return;
@ -56,6 +58,10 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a
const struct LitInfo *li
= (const struct LitInfo *)((const u8 *)fdrc + start);
struct hs_scratch *scratch = a->scratch;
assert(!scratch->fdr_conf);
scratch->fdr_conf = conf;
scratch->fdr_conf_offset = bit;
u8 oldNext; // initialized in loop
do {
assert(ISALIGNED(li));
@ -64,7 +70,7 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a
goto out;
}
if ((*last_match == li->id) && (li->flags & NoRepeat)) {
if ((*last_match == li->id) && (li->flags & FDR_LIT_FLAG_NOREPEAT)) {
goto out;
}
@ -86,99 +92,13 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a
goto out;
}
if (unlikely(li->flags & ComplexConfirm)) {
const u8 *loc2 = buf + i - li->extended_size + 1;
if (loc2 < buf) {
u32 full_overhang = buf - loc2;
size_t len_history = a->len_history;
if (full_overhang > len_history) {
goto out;
}
}
}
*last_match = li->id;
*control = a->cb(loc - buf, i, li->id, a->ctxt);
*control = a->cb(i, li->id, scratch);
out:
oldNext = li->next; // oldNext is either 0 or an 'adjust' value
li++;
} while (oldNext);
}
// 'light-weight' confirmation function which is used by 1-mask Teddy;
// in the 'confirmless' case it simply calls callback function,
// otherwise it calls 'confWithBit' function for the full confirmation procedure
static really_inline
void confWithBit1(const struct FDRConfirm *fdrc,
const struct FDR_Runtime_Args *a, size_t i,
hwlmcb_rv_t *control, u32 *last_match, u64a conf_key) {
assert(i < a->len);
assert(ISALIGNED(fdrc));
if (unlikely(fdrc->mult)) {
confWithBit(fdrc, a, i, control, last_match, conf_key);
return;
} else {
u32 id = fdrc->nBitsOrSoleID;
if ((*last_match == id) && (fdrc->flags & NoRepeat)) {
return;
}
*last_match = id;
*control = a->cb(i, i, id, a->ctxt);
}
}
// This is 'light-weight' confirmation function which is used by 2-3-4-mask Teddy
// In the 'confirmless' case it makes fast 32-bit comparison,
// otherwise it calls 'confWithBit' function for the full confirmation procedure
static really_inline
void confWithBitMany(const struct FDRConfirm *fdrc,
const struct FDR_Runtime_Args *a, size_t i, CautionReason r,
hwlmcb_rv_t *control, u32 *last_match, u64a conf_key) {
assert(i < a->len);
assert(ISALIGNED(fdrc));
if (i < a->start_offset) {
return;
}
if (unlikely(fdrc->mult)) {
confWithBit(fdrc, a, i, control, last_match, conf_key);
return;
} else {
const u32 id = fdrc->nBitsOrSoleID;
const u32 len = fdrc->soleLitSize;
if ((*last_match == id) && (fdrc->flags & NoRepeat)) {
return;
}
if (r == VECTORING && len > i - a->start_offset) {
if (len > i + a->len_history) {
return;
}
u32 cmp = (u32)a->buf[i] << 24;
if (len <= i) {
for (u32 j = 1; j <= len; j++) {
cmp |= (u32)a->buf[i - j] << (24 - (j * 8));
}
} else {
for (u32 j = 1; j <= i; j++) {
cmp |= (u32)a->buf[i - j] << (24 - (j * 8));
}
cmp |= (u32)(a->histBytes >> (40 + i * 8));
}
if ((fdrc->soleLitMsk & cmp) != fdrc->soleLitCmp) {
return;
}
}
*last_match = id;
*control = a->cb(i - len, i, id, a->ctxt);
}
scratch->fdr_conf = NULL;
}
#endif

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -30,10 +30,12 @@
#include "fdr_compile.h"
#include "fdr_compile_internal.h"
#include "fdr_confirm.h"
#include "fdr_dump.h"
#include "fdr_engine_description.h"
#include "fdr_internal.h"
#include "teddy_engine_description.h"
#include "teddy_internal.h"
#include "ue2common.h"
#include <cstdio>
@ -43,7 +45,7 @@
#error No dump support!
#endif
using std::unique_ptr;
using namespace std;
namespace ue2 {
@ -58,33 +60,127 @@ bool fdrIsTeddy(const FDR *fdr) {
return !getFdrDescription(engine);
}
void fdrPrintStats(const FDR *fdr, FILE *f) {
const bool isTeddy = fdrIsTeddy(fdr);
static
void dumpLitIndex(const FDRConfirm *fdrc, FILE *f) {
const u32 *lit_index = getConfirmLitIndex(fdrc);
u32 num_lits = 1U << fdrc->nBits;
u32 lits_used = count_if(lit_index, lit_index + num_lits,
[](u32 idx) { return idx != 0; });
if (isTeddy) {
fprintf(f, "TEDDY: %u\n", fdr->engineID);
} else {
fprintf(f, "FDR: %u\n", fdr->engineID);
fprintf(f, " load %u/%u (%0.2f%%)\n", lits_used, num_lits,
(double)lits_used / (double)(num_lits)*100);
}
static
void dumpConfirms(const void *fdr_base, u32 conf_offset, u32 num_confirms,
FILE *f) {
const u32 *conf = (const u32 *)((const char *)fdr_base + conf_offset);
for (u32 i = 0; i < num_confirms; i++) {
const auto *fdrc = (const FDRConfirm *)((const char *)conf + conf[i]);
fprintf(f, " confirm %u\n", i);
fprintf(f, " andmsk 0x%016llx\n", fdrc->andmsk);
fprintf(f, " mult 0x%016llx\n", fdrc->mult);
fprintf(f, " nbits %u\n", fdrc->nBits);
fprintf(f, " groups 0x%016llx\n", fdrc->groups);
dumpLitIndex(fdrc, f);
}
}
if (isTeddy) {
auto des = getTeddyDescription(fdr->engineID);
if (des) {
fprintf(f, " masks %u\n", des->numMasks);
fprintf(f, " buckets %u\n", des->getNumBuckets());
fprintf(f, " packed %s\n", des->packed ? "true" : "false");
} else {
fprintf(f, " <unknown engine>\n");
static
void dumpTeddyReinforced(const u8 *rmsk, const u32 num_tables, FILE *f) {
// dump reinforcement masks
for (u32 b = 0; b < num_tables; b++) {
fprintf(f, " reinforcement table for bucket %u..%u:\n",
b * 8, b * 8 + 7);
for (u32 i = 0; i <= N_CHARS; i++) {
fprintf(f, " 0x%02x: ", i);
for (u32 j = 0; j < 8; j++) {
u8 val = rmsk[b * ((N_CHARS + 1) * 8) + i * 8 + j];
for (u32 k = 0; k < 8; k++) {
fprintf(f, "%s", ((val >> k) & 0x1) ? "1" : "0");
}
fprintf(f, " ");
}
fprintf(f, "\n");
}
} else {
fprintf(f, " domain %u\n", fdr->domain);
fprintf(f, " stride %u\n", fdr->stride);
fprintf(f, "\n");
}
}
static
void dumpTeddyMasks(const u8 *baseMsk, u32 numMasks, u32 maskWidth, FILE *f) {
// dump nibble masks
fprintf(f, " nibble masks:\n");
for (u32 i = 0; i < numMasks * 2; i++) {
fprintf(f, " -%d%s: ", 1 + i / 2, (i % 2) ? "hi" : "lo");
for (u32 j = 0; j < 16 * maskWidth; j++) {
u8 val = baseMsk[i * 16 * maskWidth + j];
for (u32 k = 0; k < 8; k++) {
fprintf(f, "%s", ((val >> k) & 0x1) ? "1" : "0");
}
fprintf(f, " ");
}
fprintf(f, "\n");
}
fprintf(f, "\n");
}
static
void dumpTeddy(const Teddy *teddy, FILE *f) {
fprintf(f, "TEDDY: %u\n", teddy->engineID);
auto des = getTeddyDescription(teddy->engineID);
if (!des) {
fprintf(f, " <unknown engine>\n");
return;
}
fprintf(f, " strings ???\n");
fprintf(f, " masks %u\n", des->numMasks);
fprintf(f, " buckets %u\n", des->getNumBuckets());
fprintf(f, " packed %s\n", des->packed ? "true" : "false");
fprintf(f, " strings %u\n", teddy->numStrings);
fprintf(f, " size %zu bytes\n", fdrSize((const FDR *)teddy));
fprintf(f, " max length %u\n", teddy->maxStringLen);
fprintf(f, " floodoff %u (%x)\n", teddy->floodOffset,
teddy->floodOffset);
fprintf(f, "\n");
u32 maskWidth = des->getNumBuckets() / 8;
size_t headerSize = sizeof(Teddy);
size_t maskLen = des->numMasks * 16 * 2 * maskWidth;
const u8 *teddy_base = (const u8 *)teddy;
const u8 *baseMsk = teddy_base + ROUNDUP_CL(headerSize);
const u8 *rmsk = baseMsk + ROUNDUP_CL(maskLen);
dumpTeddyMasks(baseMsk, des->numMasks, maskWidth, f);
dumpTeddyReinforced(rmsk, maskWidth, f);
dumpConfirms(teddy, teddy->confOffset, des->getNumBuckets(), f);
}
static
void dumpFDR(const FDR *fdr, FILE *f) {
fprintf(f, "FDR: %u\n", fdr->engineID);
auto des = getFdrDescription(fdr->engineID);
if (!des) {
fprintf(f, " <unknown engine>\n");
return;
}
fprintf(f, " domain %u\n", fdr->domain);
fprintf(f, " stride %u\n", fdr->stride);
fprintf(f, " strings %u\n", fdr->numStrings);
fprintf(f, " size %zu bytes\n", fdrSize(fdr));
fprintf(f, " max length %u\n", fdr->maxStringLen);
fprintf(f, " floodoff %u (%x)\n", fdr->floodOffset, fdr->floodOffset);
fprintf(f, "\n");
dumpConfirms(fdr, fdr->confOffset, des->getNumBuckets(), f);
}
void fdrPrintStats(const FDR *fdr, FILE *f) {
if (fdrIsTeddy(fdr)) {
dumpTeddy((const Teddy *)fdr, f);
} else {
dumpFDR(fdr, f);
}
}
} // namespace ue2

View File

@ -30,7 +30,6 @@
#define FDR_ENGINE_DESCRIPTION_H
#include "engine_description.h"
#include "util/ue2_containers.h"
#include <map>
#include <memory>

View File

@ -36,6 +36,8 @@
#include "ue2common.h"
#include "hwlm/hwlm.h" // for hwlm_group_t, HWLMCallback
struct hs_scratch;
typedef enum {
NOT_CAUTIOUS, //!< not near a boundary (quantify?)
VECTORING //!< potentially vectoring
@ -56,7 +58,6 @@ struct FDRFlood {
u32 ids[FDR_FLOOD_MAX_IDS]; //!< the ids
hwlm_group_t groups[FDR_FLOOD_MAX_IDS]; //!< group ids to go with string ids
u32 len[FDR_FLOOD_MAX_IDS]; //!< lengths to go with the string ids
};
/** \brief FDR structure.
@ -69,19 +70,18 @@ struct FDR {
u32 engineID;
u32 size;
u32 maxStringLen;
u32 numStrings;
u32 confOffset;
u32 floodOffset;
u8 stride; /* stride - how frequeuntly the data is consulted by the first
u8 stride; /* stride - how frequently the data is consulted by the first
* stage matcher */
u8 domain; /* number of bits used to index into main FDR table. This value
* is used only of debugging/asserts. */
u16 domainMask; /* pre-computed domain mask */
u32 tabSize; /* pre-computed hashtable size in bytes */
u32 pad;
m128 start; /* initial start state to use at offset 0. The state has been set
* up based on the min length of buckets to reduce the need for
* pointless confirms. */
m128 start; /* initial start state to use at offset 0. The state has been
* set up based on the min length of buckets to reduce the need
* for pointless confirms. */
};
/** \brief FDR runtime arguments.
@ -97,7 +97,7 @@ struct FDR_Runtime_Args {
size_t len_history;
size_t start_offset;
HWLMCallback cb;
void *ctxt;
struct hs_scratch *scratch;
const u8 *firstFloodDetect;
const u64a histBytes;
};

View File

@ -82,11 +82,10 @@ void addFlood(vector<FDRFlood> &tmpFlood, u8 c, const hwlmLiteral &lit,
fl.ids[fl.idCount] = lit.id;
fl.allGroups |= lit.groups;
fl.groups[fl.idCount] = lit.groups;
fl.len[fl.idCount] = suffix;
// when idCount gets to max_ids this flood no longer happens
// only incremented one more time to avoid arithmetic overflow
DEBUG_PRINTF("Added Flood for char '%c' suffix=%u len[%hu]=%u\n",
c, fl.suffix, fl.idCount, suffix);
c, fl.suffix, fl.idCount, suffix);
fl.idCount++;
}
}
@ -182,8 +181,7 @@ bytecode_ptr<u8> setupFDRFloodControl(const vector<hwlmLiteral> &lits,
printf("i is %02x fl->idCount is %hd fl->suffix is %d fl->allGroups is "
"%016llx\n", i, fl.idCount, fl.suffix, fl.allGroups);
for (u32 j = 0; j < fl.idCount; j++) {
printf("j is %d fl.groups[j] %016llx fl.len[j] %d \n", j,
fl.groups[j], fl.len[j]);
printf("j is %d fl.groups[j] %016llx\n", j, fl.groups[j]);
}
}
#endif

View File

@ -94,7 +94,7 @@ const u8 * floodDetect(const struct FDR * fdr,
const u8 * buf = a->buf;
const size_t len = a->len;
HWLMCallback cb = a->cb;
void * ctxt = a->ctxt;
struct hs_scratch *scratch = a->scratch;
const u8 * ptr = *ptrPtr;
// tryFloodDetect is never put in places where unconditional
@ -196,120 +196,110 @@ const u8 * floodDetect(const struct FDR * fdr,
for (u32 t = 0; t < floodSize && (*control & fl->allGroups);
t += 4) {
DEBUG_PRINTF("aaa %u %llx\n", t, fl->groups[0]);
u32 len0 = fl->len[0] - 1;
if (*control & fl->groups[0]) {
*control = cb(i + t + 0 - len0, i + t + 0, fl->ids[0], ctxt);
*control = cb(i + t + 0, fl->ids[0], scratch);
}
if (*control & fl->groups[0]) {
*control = cb(i + t + 1 - len0, i + t + 1, fl->ids[0], ctxt);
*control = cb(i + t + 1, fl->ids[0], scratch);
}
if (*control & fl->groups[0]) {
*control = cb(i + t + 2 - len0, i + t + 2, fl->ids[0], ctxt);
*control = cb(i + t + 2, fl->ids[0], scratch);
}
if (*control & fl->groups[0]) {
*control = cb(i + t + 3 - len0, i + t + 3, fl->ids[0], ctxt);
*control = cb(i + t + 3, fl->ids[0], scratch);
}
}
break;
case 2:
for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 4) {
u32 len0 = fl->len[0] - 1;
u32 len1 = fl->len[1] - 1;
if (*control & fl->groups[0]) {
*control = cb(i + t - len0, i + t, fl->ids[0], ctxt);
*control = cb(i + t, fl->ids[0], scratch);
}
if (*control & fl->groups[1]) {
*control = cb(i + t - len1, i + t, fl->ids[1], ctxt);
*control = cb(i + t, fl->ids[1], scratch);
}
if (*control & fl->groups[0]) {
*control =
cb(i + t + 1 - len0, i + t + 1, fl->ids[0], ctxt);
cb(i + t + 1, fl->ids[0], scratch);
}
if (*control & fl->groups[1]) {
*control = cb(i + t + 1 - len1, i + t + 1, fl->ids[1], ctxt);
*control = cb(i + t + 1, fl->ids[1], scratch);
}
if (*control & fl->groups[0]) {
*control = cb(i + t + 2 - len0, i + t + 2, fl->ids[0], ctxt);
*control = cb(i + t + 2, fl->ids[0], scratch);
}
if (*control & fl->groups[1]) {
*control = cb(i + t + 2 - len1, i + t + 2, fl->ids[1], ctxt);
*control = cb(i + t + 2, fl->ids[1], scratch);
}
if (*control & fl->groups[0]) {
*control = cb(i + t + 3 - len0, i + t + 3, fl->ids[0], ctxt);
*control = cb(i + t + 3, fl->ids[0], scratch);
}
if (*control & fl->groups[1]) {
*control = cb(i + t + 3 - len1, i + t + 3, fl->ids[1], ctxt);
*control = cb(i + t + 3, fl->ids[1], scratch);
}
}
break;
case 3:
for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 2) {
u32 len0 = fl->len[0] - 1;
u32 len1 = fl->len[1] - 1;
u32 len2 = fl->len[2] - 1;
if (*control & fl->groups[0]) {
*control = cb(i + t - len0, i + t, fl->ids[0], ctxt);
*control = cb(i + t, fl->ids[0], scratch);
}
if (*control & fl->groups[1]) {
*control = cb(i + t - len1, i + t, fl->ids[1], ctxt);
*control = cb(i + t, fl->ids[1], scratch);
}
if (*control & fl->groups[2]) {
*control = cb(i + t - len2, i + t, fl->ids[2], ctxt);
*control = cb(i + t, fl->ids[2], scratch);
}
if (*control & fl->groups[0]) {
*control = cb(i + t + 1 - len0, i + t + 1, fl->ids[0], ctxt);
*control = cb(i + t + 1, fl->ids[0], scratch);
}
if (*control & fl->groups[1]) {
*control = cb(i + t + 1 - len1, i + t + 1, fl->ids[1], ctxt);
*control = cb(i + t + 1, fl->ids[1], scratch);
}
if (*control & fl->groups[2]) {
*control = cb(i + t + 1 - len2, i + t + 1, fl->ids[2], ctxt);
*control = cb(i + t + 1, fl->ids[2], scratch);
}
}
break;
default:
// slow generalized loop
for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 2) {
u32 len0 = fl->len[0] - 1;
u32 len1 = fl->len[1] - 1;
u32 len2 = fl->len[2] - 1;
u32 len3 = fl->len[3] - 1;
if (*control & fl->groups[0]) {
*control = cb(i + t - len0, i + t, fl->ids[0], ctxt);
*control = cb(i + t, fl->ids[0], scratch);
}
if (*control & fl->groups[1]) {
*control = cb(i + t - len1, i + t, fl->ids[1], ctxt);
*control = cb(i + t, fl->ids[1], scratch);
}
if (*control & fl->groups[2]) {
*control = cb(i + t - len2, i + t, fl->ids[2], ctxt);
*control = cb(i + t, fl->ids[2], scratch);
}
if (*control & fl->groups[3]) {
*control = cb(i + t - len3, i + t, fl->ids[3], ctxt);
*control = cb(i + t, fl->ids[3], scratch);
}
for (u32 t2 = 4; t2 < fl->idCount; t2++) {
if (*control & fl->groups[t2]) {
*control = cb(i + t - (fl->len[t2] - 1), i + t, fl->ids[t2], ctxt);
*control = cb(i + t, fl->ids[t2], scratch);
}
}
if (*control & fl->groups[0]) {
*control = cb(i + t + 1 - len0, i + t + 1, fl->ids[0], ctxt);
*control = cb(i + t + 1, fl->ids[0], scratch);
}
if (*control & fl->groups[1]) {
*control = cb(i + t + 1 - len1, i + t + 1, fl->ids[1], ctxt);
*control = cb(i + t + 1, fl->ids[1], scratch);
}
if (*control & fl->groups[2]) {
*control = cb(i + t + 1 - len2, i + t + 1, fl->ids[2], ctxt);
*control = cb(i + t + 1, fl->ids[2], scratch);
}
if (*control & fl->groups[3]) {
*control = cb(i + t + 1 - len3, i + t + 1, fl->ids[3], ctxt);
*control = cb(i + t + 1, fl->ids[3], scratch);
}
for (u32 t2 = 4; t2 < fl->idCount; t2++) {
if (*control & fl->groups[t2]) {
*control = cb(i + t + 1 - (fl->len[t2] - 1), i + t + 1, fl->ids[t2], ctxt);
*control = cb(i + t + 1, fl->ids[t2], scratch);
}
}
}
@ -320,7 +310,7 @@ const u8 * floodDetect(const struct FDR * fdr,
for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t++) {
for (u32 t2 = 0; t2 < fl->idCount; t2++) {
if (*control & fl->groups[t2]) {
*control = cb(i + t - (fl->len[t2] - 1), i + t, fl->ids[t2], ctxt);
*control = cb(i + t, fl->ids[t2], scratch);
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -73,37 +73,37 @@ hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr,
#if defined(HAVE_AVX2)
hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr,
const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_fat_teddy_msks1(const struct FDR *fdr,
const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fat(const struct FDR *fdr,
const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_fat_teddy_msks1_pck(const struct FDR *fdr,
const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_teddy_avx2_msks2_fat(const struct FDR *fdr,
const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_fat_teddy_msks2(const struct FDR *fdr,
const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_teddy_avx2_msks2_pck_fat(const struct FDR *fdr,
const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_fat_teddy_msks2_pck(const struct FDR *fdr,
const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_teddy_avx2_msks3_fat(const struct FDR *fdr,
const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_fat_teddy_msks3(const struct FDR *fdr,
const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_teddy_avx2_msks3_pck_fat(const struct FDR *fdr,
const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_fat_teddy_msks3_pck(const struct FDR *fdr,
const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_teddy_avx2_msks4_fat(const struct FDR *fdr,
const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_fat_teddy_msks4(const struct FDR *fdr,
const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr,
const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_fat_teddy_msks4_pck(const struct FDR *fdr,
const struct FDR_Runtime_Args *a,
hwlm_group_t control);
#endif /* HAVE_AVX2 */

File diff suppressed because it is too large Load Diff

View File

@ -42,10 +42,14 @@
#include "teddy_engine_description.h"
#include "grey.h"
#include "ue2common.h"
#include "hwlm/hwlm_build.h"
#include "util/alloc.h"
#include "util/compare.h"
#include "util/container.h"
#include "util/make_unique.h"
#include "util/noncopyable.h"
#include "util/popcount.h"
#include "util/small_vector.h"
#include "util/target_info.h"
#include "util/verify_types.h"
@ -69,38 +73,58 @@ namespace {
//#define TEDDY_DEBUG
/** \brief Max number of Teddy masks we use. */
static constexpr size_t MAX_NUM_MASKS = 4;
class TeddyCompiler : noncopyable {
const TeddyEngineDescription &eng;
const Grey &grey;
const vector<hwlmLiteral> &lits;
map<BucketIndex, std::vector<LiteralIndex>> bucketToLits;
bool make_small;
public:
TeddyCompiler(const vector<hwlmLiteral> &lits_in,
map<BucketIndex, std::vector<LiteralIndex>> bucketToLits_in,
const TeddyEngineDescription &eng_in, bool make_small_in,
const Grey &grey_in)
: eng(eng_in), grey(grey_in), lits(lits_in), make_small(make_small_in) {
}
: eng(eng_in), grey(grey_in), lits(lits_in),
bucketToLits(move(bucketToLits_in)), make_small(make_small_in) {}
bytecode_ptr<FDR> build();
bool pack(map<BucketIndex, std::vector<LiteralIndex> > &bucketToLits);
};
class TeddySet {
/**
* \brief Estimate of the max number of literals in a set, used to
* minimise allocations.
*/
static constexpr size_t LITS_PER_SET = 20;
/** \brief Number of masks. */
u32 len;
// nibbleSets is a series of bitfields over 16 predicates
// that represent the whether shufti nibble set
// so for num_masks = 4 we will represent our strings by
// 8 u16s in the vector that indicate what a shufti bucket
// would have to look like
vector<u16> nibbleSets;
set<u32> litIds;
/**
* \brief A series of bitfields over 16 predicates that represent the
* shufti nibble set.
*
* So for num_masks = 4 we will represent our strings by 8 u16s in the
* vector that indicate what a shufti bucket would have to look like.
*/
small_vector<u16, MAX_NUM_MASKS * 2> nibbleSets;
/**
* \brief Sorted, unique set of literals. We maintain our own set in a
* sorted vector to minimise allocations.
*/
small_vector<u32, LITS_PER_SET> litIds;
public:
explicit TeddySet(u32 len_in) : len(len_in), nibbleSets(len_in * 2, 0) {}
const set<u32> & getLits() const { return litIds; }
size_t litCount() const { return litIds.size(); }
const small_vector<u32, LITS_PER_SET> &getLits() const { return litIds; }
bool operator<(const TeddySet & s) const {
bool operator<(const TeddySet &s) const {
return litIds < s.litIds;
}
@ -116,11 +140,11 @@ public:
printf("%u ", id);
}
printf("\n");
printf("Flood prone : %s\n", isRunProne()?"yes":"no");
printf("Flood prone : %s\n", isRunProne() ? "yes" : "no");
}
#endif
bool identicalTail(const TeddySet & ts) const {
bool identicalTail(const TeddySet &ts) const {
return nibbleSets == ts.nibbleSets;
}
@ -131,24 +155,19 @@ public:
u8 c = s[s.size() - i - 1];
u8 c_hi = (c >> 4) & 0xf;
u8 c_lo = c & 0xf;
nibbleSets[i*2] = 1 << c_lo;
nibbleSets[i * 2] = 1 << c_lo;
if (lit.nocase && ourisalpha(c)) {
nibbleSets[i*2+1] = (1 << (c_hi&0xd)) | (1 << (c_hi|0x2));
nibbleSets[i * 2 + 1] =
(1 << (c_hi & 0xd)) | (1 << (c_hi | 0x2));
} else {
nibbleSets[i*2+1] = 1 << c_hi;
nibbleSets[i * 2 + 1] = 1 << c_hi;
}
} else {
nibbleSets[i*2] = nibbleSets[i*2+1] = 0xffff;
nibbleSets[i * 2] = nibbleSets[i * 2 + 1] = 0xffff;
}
}
litIds.insert(lit_id);
}
void merge(const TeddySet &ts) {
for (u32 i = 0; i < nibbleSets.size(); i++) {
nibbleSets[i] |= ts.nibbleSets[i];
}
litIds.insert(ts.litIds.begin(), ts.litIds.end());
litIds.push_back(lit_id);
sort_and_unique(litIds);
}
// return a value p from 0 .. MAXINT64 that gives p/MAXINT64
@ -167,15 +186,15 @@ public:
// a small fixed cost + the cost of traversing some sort of followup
// (assumption is that the followup is linear)
u64a heuristic() const {
return probability() * (2+litCount());
return probability() * (2 + litCount());
}
bool isRunProne() const {
u16 lo_and = 0xffff;
u16 hi_and = 0xffff;
for (u32 i = 0; i < len; i++) {
lo_and &= nibbleSets[i*2];
hi_and &= nibbleSets[i*2+1];
lo_and &= nibbleSets[i * 2];
hi_and &= nibbleSets[i * 2 + 1];
}
// we're not flood-prone if there's no way to get
// through with a flood
@ -184,10 +203,27 @@ public:
}
return true;
}
friend TeddySet merge(const TeddySet &a, const TeddySet &b) {
assert(a.nibbleSets.size() == b.nibbleSets.size());
TeddySet m(a);
for (size_t i = 0; i < m.nibbleSets.size(); i++) {
m.nibbleSets[i] |= b.nibbleSets[i];
}
m.litIds.insert(m.litIds.end(), b.litIds.begin(), b.litIds.end());
sort_and_unique(m.litIds);
return m;
}
};
bool TeddyCompiler::pack(map<BucketIndex,
std::vector<LiteralIndex> > &bucketToLits) {
static
bool pack(const vector<hwlmLiteral> &lits,
const TeddyEngineDescription &eng,
map<BucketIndex, std::vector<LiteralIndex>> &bucketToLits) {
set<TeddySet> sts;
for (u32 i = 0; i < lits.size(); i++) {
@ -200,7 +236,8 @@ bool TeddyCompiler::pack(map<BucketIndex,
#ifdef TEDDY_DEBUG
printf("Size %zu\n", sts.size());
for (const TeddySet &ts : sts) {
printf("\n"); ts.dump();
printf("\n");
ts.dump();
}
printf("\n===============================================\n");
#endif
@ -220,9 +257,7 @@ bool TeddyCompiler::pack(map<BucketIndex,
continue;
}
TeddySet tmpSet(eng.numMasks);
tmpSet.merge(s1);
tmpSet.merge(s2);
TeddySet tmpSet = merge(s1, s2);
u64a newScore = tmpSet.heuristic();
u64a oldScore = s1.heuristic() + s2.heuristic();
if (newScore < oldScore) {
@ -250,9 +285,7 @@ bool TeddyCompiler::pack(map<BucketIndex,
}
// do the merge
TeddySet nts(eng.numMasks);
nts.merge(*m1);
nts.merge(*m2);
TeddySet nts = merge(*m1, *m2);
#ifdef TEDDY_DEBUG
printf("Merging\n");
printf("m1 = \n");
@ -282,66 +315,51 @@ bool TeddyCompiler::pack(map<BucketIndex,
return true;
}
bytecode_ptr<FDR> TeddyCompiler::build() {
if (lits.size() > eng.getNumBuckets() * TEDDY_BUCKET_LOAD) {
DEBUG_PRINTF("too many literals: %zu\n", lits.size());
return nullptr;
}
// this entry has all-zero mask to skip reinforcement
#define NO_REINFORCEMENT N_CHARS
#ifdef TEDDY_DEBUG
for (size_t i = 0; i < lits.size(); i++) {
printf("lit %zu (len = %zu, %s) is ", i, lits[i].s.size(),
lits[i].nocase ? "caseless" : "caseful");
for (size_t j = 0; j < lits[i].s.size(); j++) {
printf("%02x", ((u32)lits[i].s[j])&0xff);
}
printf("\n");
}
#endif
// this means every entry in reinforcement table
#define ALL_CHAR_SET N_CHARS
map<BucketIndex, std::vector<LiteralIndex> > bucketToLits;
if(eng.needConfirm(lits)) {
if (!pack(bucketToLits)) {
DEBUG_PRINTF("more lits (%zu) than buckets (%u), can't pack.\n",
lits.size(), eng.getNumBuckets());
return nullptr;
// each item's reinforcement mask has REINFORCED_MSK_LEN bytes
#define REINFORCED_MSK_LEN 8
// reinforcement table size for each 8 buckets set
#define RTABLE_SIZE ((N_CHARS + 1) * REINFORCED_MSK_LEN)
static
void initReinforcedTable(u8 *rmsk) {
u64a *mask = (u64a *)rmsk;
fill_n(mask, N_CHARS, 0x00ffffffffffffffULL);
}
static
void fillReinforcedMskZero(u8 *rmsk) {
u8 *mc = rmsk + NO_REINFORCEMENT * REINFORCED_MSK_LEN;
fill_n(mc, REINFORCED_MSK_LEN, 0x00);
}
static
void fillReinforcedMsk(u8 *rmsk, u16 c, u32 j, u8 bmsk) {
assert(j > 0);
if (c == ALL_CHAR_SET) {
for (size_t i = 0; i < N_CHARS; i++) {
u8 *mc = rmsk + i * REINFORCED_MSK_LEN;
mc[j - 1] &= ~bmsk;
}
} else {
for (u32 i = 0; i < lits.size(); i++) {
bucketToLits[i].push_back(i);
}
u8 *mc = rmsk + c * REINFORCED_MSK_LEN;
mc[j - 1] &= ~bmsk;
}
u32 maskWidth = eng.getNumBuckets() / 8;
}
size_t maskLen = eng.numMasks * 16 * 2 * maskWidth;
auto floodControlTmp = setupFDRFloodControl(lits, eng, grey);
auto confirmTmp = setupFullConfs(lits, eng, bucketToLits, make_small);
size_t size = ROUNDUP_N(sizeof(Teddy) +
maskLen +
confirmTmp.size() +
floodControlTmp.size(),
16 * maskWidth);
auto fdr = make_zeroed_bytecode_ptr<FDR>(size, 64);
assert(fdr); // otherwise would have thrown std::bad_alloc
Teddy *teddy = (Teddy *)fdr.get(); // ugly
u8 *teddy_base = (u8 *)teddy;
teddy->size = size;
teddy->engineID = eng.getID();
teddy->maxStringLen = verify_u32(maxLen(lits));
u8 *ptr = teddy_base + sizeof(Teddy) + maskLen;
memcpy(ptr, confirmTmp.get(), confirmTmp.size());
ptr += confirmTmp.size();
teddy->floodOffset = verify_u32(ptr - teddy_base);
memcpy(ptr, floodControlTmp.get(), floodControlTmp.size());
ptr += floodControlTmp.size();
u8 *baseMsk = teddy_base + sizeof(Teddy);
static
void fillNibbleMasks(const map<BucketIndex,
vector<LiteralIndex>> &bucketToLits,
const vector<hwlmLiteral> &lits,
u32 numMasks, u32 maskWidth, size_t maskLen,
u8 *baseMsk) {
memset(baseMsk, 0xff, maskLen);
for (const auto &b2l : bucketToLits) {
const u32 &bucket_id = b2l.first;
@ -354,16 +372,18 @@ bytecode_ptr<FDR> TeddyCompiler::build() {
const u32 sz = verify_u32(l.s.size());
// fill in masks
for (u32 j = 0; j < eng.numMasks; j++) {
u32 msk_id_lo = j * 2 * maskWidth + (bucket_id / 8);
u32 msk_id_hi = (j * 2 + 1) * maskWidth + (bucket_id / 8);
for (u32 j = 0; j < numMasks; j++) {
const u32 msk_id_lo = j * 2 * maskWidth + (bucket_id / 8);
const u32 msk_id_hi = (j * 2 + 1) * maskWidth + (bucket_id / 8);
const u32 lo_base = msk_id_lo * 16;
const u32 hi_base = msk_id_hi * 16;
// if we don't have a char at this position, fill in i
// locations in these masks with '1'
if (j >= sz) {
for (u32 n = 0; n < 16; n++) {
baseMsk[msk_id_lo * 16 + n] |= bmsk;
baseMsk[msk_id_hi * 16 + n] |= bmsk;
baseMsk[lo_base + n] &= ~bmsk;
baseMsk[hi_base + n] &= ~bmsk;
}
} else {
u8 c = l.s[sz - 1 - j];
@ -382,51 +402,173 @@ bytecode_ptr<FDR> TeddyCompiler::build() {
for (u8 cm = 0; cm < 0x10; cm++) {
if ((cm & m_lo) == (cmp_lo & m_lo)) {
baseMsk[msk_id_lo * 16 + cm] |= bmsk;
baseMsk[lo_base + cm] &= ~bmsk;
}
if ((cm & m_hi) == (cmp_hi & m_hi)) {
baseMsk[msk_id_hi * 16 + cm] |= bmsk;
baseMsk[hi_base + cm] &= ~bmsk;
}
}
} else{
} else {
if (l.nocase && ourisalpha(c)) {
u32 cmHalfClear = (0xdf >> hiShift) & 0xf;
u32 cmHalfSet = (0x20 >> hiShift) & 0xf;
baseMsk[msk_id_hi * 16 + (n_hi & cmHalfClear)] |= bmsk;
baseMsk[msk_id_hi * 16 + (n_hi | cmHalfSet )] |= bmsk;
u32 cmHalfSet = (0x20 >> hiShift) & 0xf;
baseMsk[hi_base + (n_hi & cmHalfClear)] &= ~bmsk;
baseMsk[hi_base + (n_hi | cmHalfSet)] &= ~bmsk;
} else {
baseMsk[msk_id_hi * 16 + n_hi] |= bmsk;
baseMsk[hi_base + n_hi] &= ~bmsk;
}
baseMsk[msk_id_lo * 16 + n_lo] |= bmsk;
baseMsk[lo_base + n_lo] &= ~bmsk;
}
}
}
}
}
}
static
void fillReinforcedTable(const map<BucketIndex,
vector<LiteralIndex>> &bucketToLits,
const vector<hwlmLiteral> &lits,
u8 *rtable_base, const u32 num_tables) {
vector<u8 *> tables;
for (u32 i = 0; i < num_tables; i++) {
tables.push_back(rtable_base + i * RTABLE_SIZE);
}
for (auto t : tables) {
initReinforcedTable(t);
}
for (const auto &b2l : bucketToLits) {
const u32 &bucket_id = b2l.first;
const vector<LiteralIndex> &ids = b2l.second;
u8 *rmsk = tables[bucket_id / 8];
const u8 bmsk = 1U << (bucket_id % 8);
for (const LiteralIndex &lit_id : ids) {
const hwlmLiteral &l = lits[lit_id];
DEBUG_PRINTF("putting lit %u into bucket %u\n", lit_id, bucket_id);
const u32 sz = verify_u32(l.s.size());
// fill in reinforced masks
for (u32 j = 1; j < REINFORCED_MSK_LEN; j++) {
if (sz - 1 < j) {
fillReinforcedMsk(rmsk, ALL_CHAR_SET, j, bmsk);
} else {
u8 c = l.s[sz - 1 - j];
if (l.nocase && ourisalpha(c)) {
u8 c_up = c & 0xdf;
fillReinforcedMsk(rmsk, c_up, j, bmsk);
u8 c_lo = c | 0x20;
fillReinforcedMsk(rmsk, c_lo, j, bmsk);
} else {
fillReinforcedMsk(rmsk, c, j, bmsk);
}
}
}
}
}
for (auto t : tables) {
fillReinforcedMskZero(t);
}
}
bytecode_ptr<FDR> TeddyCompiler::build() {
u32 maskWidth = eng.getNumBuckets() / 8;
size_t headerSize = sizeof(Teddy);
size_t maskLen = eng.numMasks * 16 * 2 * maskWidth;
size_t reinforcedMaskLen = RTABLE_SIZE * maskWidth;
auto floodTable = setupFDRFloodControl(lits, eng, grey);
auto confirmTable = setupFullConfs(lits, eng, bucketToLits, make_small);
// Note: we place each major structure here on a cacheline boundary.
size_t size = ROUNDUP_CL(headerSize) + ROUNDUP_CL(maskLen) +
ROUNDUP_CL(reinforcedMaskLen) +
ROUNDUP_CL(confirmTable.size()) + floodTable.size();
auto fdr = make_zeroed_bytecode_ptr<FDR>(size, 64);
assert(fdr); // otherwise would have thrown std::bad_alloc
Teddy *teddy = (Teddy *)fdr.get(); // ugly
u8 *teddy_base = (u8 *)teddy;
// Write header.
teddy->size = size;
teddy->engineID = eng.getID();
teddy->maxStringLen = verify_u32(maxLen(lits));
teddy->numStrings = verify_u32(lits.size());
// Write confirm structures.
u8 *ptr = teddy_base + ROUNDUP_CL(headerSize) + ROUNDUP_CL(maskLen) +
ROUNDUP_CL(reinforcedMaskLen);
assert(ISALIGNED_CL(ptr));
teddy->confOffset = verify_u32(ptr - teddy_base);
memcpy(ptr, confirmTable.get(), confirmTable.size());
ptr += ROUNDUP_CL(confirmTable.size());
// Write flood control structures.
assert(ISALIGNED_CL(ptr));
teddy->floodOffset = verify_u32(ptr - teddy_base);
memcpy(ptr, floodTable.get(), floodTable.size());
ptr += floodTable.size();
// Write teddy masks.
u8 *baseMsk = teddy_base + ROUNDUP_CL(headerSize);
fillNibbleMasks(bucketToLits, lits, eng.numMasks, maskWidth, maskLen,
baseMsk);
// Write reinforcement masks.
u8 *reinforcedMsk = baseMsk + ROUNDUP_CL(maskLen);
fillReinforcedTable(bucketToLits, lits, reinforcedMsk, maskWidth);
return fdr;
}
static
bool assignStringsToBuckets(
const vector<hwlmLiteral> &lits,
TeddyEngineDescription &eng,
map<BucketIndex, vector<LiteralIndex>> &bucketToLits) {
assert(eng.numMasks <= MAX_NUM_MASKS);
if (lits.size() > eng.getNumBuckets() * TEDDY_BUCKET_LOAD) {
DEBUG_PRINTF("too many literals: %zu\n", lits.size());
return false;
}
#ifdef TEDDY_DEBUG
for (u32 i = 0; i < eng.numMasks * 2; i++) {
for (u32 j = 0; j < 16; j++) {
u8 val = baseMsk[i * 16 + j];
for (u32 k = 0; k < 8; k++) {
printf("%s", ((val >> k) & 0x1) ? "1" : "0");
}
printf(" ");
for (size_t i = 0; i < lits.size(); i++) {
printf("lit %zu (len = %zu, %s) is ", i, lits[i].s.size(),
lits[i].nocase ? "caseless" : "caseful");
for (size_t j = 0; j < lits[i].s.size(); j++) {
printf("%02x", ((u32)lits[i].s[j])&0xff);
}
printf("\n");
}
#endif
return fdr;
if (!pack(lits, eng, bucketToLits)) {
DEBUG_PRINTF("more lits (%zu) than buckets (%u), can't pack.\n",
lits.size(), eng.getNumBuckets());
return false;
}
return true;
}
} // namespace
bytecode_ptr<FDR> teddyBuildTableHinted(const vector<hwlmLiteral> &lits,
bool make_small, u32 hint,
const target_t &target,
const Grey &grey) {
bytecode_ptr<FDR> teddyBuildTable(const HWLMProto &proto, const Grey &grey) {
TeddyCompiler tc(proto.lits, proto.bucketToLits, *(proto.teddyEng),
proto.make_small, grey);
return tc.build();
}
unique_ptr<HWLMProto> teddyBuildProtoHinted(
u8 engType, const vector<hwlmLiteral> &lits,
bool make_small, u32 hint, const target_t &target) {
unique_ptr<TeddyEngineDescription> des;
if (hint == HINT_INVALID) {
des = chooseTeddyEngine(target, lits);
@ -436,8 +578,14 @@ bytecode_ptr<FDR> teddyBuildTableHinted(const vector<hwlmLiteral> &lits,
if (!des) {
return nullptr;
}
TeddyCompiler tc(lits, *des, make_small, grey);
return tc.build();
map<BucketIndex, std::vector<LiteralIndex>> bucketToLits;
if (!assignStringsToBuckets(lits, *des, bucketToLits)) {
return nullptr;
}
return ue2::make_unique<HWLMProto>(engType, move(des), lits,
bucketToLits, make_small);
}
} // namespace ue2

View File

@ -35,6 +35,7 @@
#define TEDDY_COMPILE_H
#include "ue2common.h"
#include "hwlm/hwlm_build.h"
#include "util/bytecode_ptr.h"
#include <vector>
@ -43,15 +44,16 @@ struct FDR;
namespace ue2 {
class TeddyEngineDescription;
struct Grey;
struct hwlmLiteral;
struct target_t;
bytecode_ptr<FDR> teddyBuildTableHinted(const std::vector<hwlmLiteral> &lits,
bool make_small, u32 hint,
const target_t &target,
const Grey &grey);
bytecode_ptr<FDR> teddyBuildTable(const HWLMProto &proto, const Grey &grey);
std::unique_ptr<HWLMProto> teddyBuildProtoHinted(
u8 engType, const std::vector<hwlmLiteral> &lits,
bool make_small, u32 hint, const target_t &target);
} // namespace ue2
#endif // TEDDY_COMPILE_H

View File

@ -51,18 +51,6 @@ u32 TeddyEngineDescription::getDefaultFloodSuffixLength() const {
return numMasks;
}
bool TeddyEngineDescription::needConfirm(const vector<hwlmLiteral> &lits) const {
if (packed || lits.size() > getNumBuckets()) {
return true;
}
for (const auto &lit : lits) {
if (lit.s.size() > numMasks || !lit.msk.empty()) {
return true;
}
}
return false;
}
void getTeddyDescriptions(vector<TeddyEngineDescription> *out) {
static const TeddyEngineDef defns[] = {
{ 3, 0 | HS_CPU_FEATURES_AVX2, 1, 16, false },

View File

@ -55,7 +55,6 @@ public:
explicit TeddyEngineDescription(const TeddyEngineDef &def);
u32 getDefaultFloodSuffixLength() const override;
bool needConfirm(const std::vector<hwlmLiteral> &lits) const;
};
std::unique_ptr<TeddyEngineDescription>

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -26,6 +26,28 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
/* Teddy bytecode layout:
* * |-----|
* * | | struct Teddy
* * |-----|
* * | | teddy masks
* * | |
* * |-----|
* * | | reinforcement mask table for bucket 0..7
* * | |
* * |-----|
* * | | reinforcement mask table for bucket 8..15 (FAT teddy)
* * | |
* * |-----|
* * | | confirm
* * | |
* * | |
* * |-----|
* * | | flood control
* * | |
* * |-----|
*/
#ifndef TEDDY_INTERNAL_H
#define TEDDY_INTERNAL_H
@ -36,11 +58,9 @@ struct Teddy {
u32 engineID;
u32 size;
u32 maxStringLen;
u32 numStrings;
u32 confOffset;
u32 floodOffset;
u32 link;
u32 pad1;
u32 pad2;
u32 pad3;
};
#endif

View File

@ -38,8 +38,12 @@
#include "ue2common.h"
#include "util/bitutils.h"
#include "util/simd_utils.h"
#include "util/uniform_ops.h"
extern const u8 ALIGN_DIRECTIVE p_mask_arr[17][32];
#if defined(HAVE_AVX2)
extern const u8 ALIGN_AVX_DIRECTIVE p_mask_arr256[33][64];
#endif
#ifdef ARCH_64_BIT
#define TEDDY_CONF_TYPE u64a
@ -110,8 +114,27 @@ void copyRuntBlock128(u8 *dst, const u8 *src, size_t len) {
}
// Note: p_mask is an output param that initialises a poison mask.
// *p_mask = load128(p_mask_arr[n] + 16 - m) means:
// m byte 0xff in the beginning, followed by n byte 0x00,
// then followed by the rest bytes 0xff.
// ptr >= lo:
// no history.
// for end/short zone, ptr==lo and start_offset==0
// for start zone, see below
// lo ptr hi hi
// |----------|-------|----------------|............|
// -start 0 -start+offset MIN(avail,16)
// p_mask ffff..ff0000...........00ffff..........
// ptr < lo:
// only start zone.
// history
// ptr lo hi hi
// |----------|-------|----------------|............|
// 0 start start+offset end(<=16)
// p_mask ffff.....ffffff..ff0000...........00ffff..........
static really_inline
m128 vectoredLoad128(m128 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi,
m128 vectoredLoad128(m128 *p_mask, const u8 *ptr, const size_t start_offset,
const u8 *lo, const u8 *hi,
const u8 *buf_history, size_t len_history,
const u32 nMasks) {
union {
@ -123,27 +146,34 @@ m128 vectoredLoad128(m128 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi,
uintptr_t copy_start;
uintptr_t copy_len;
if (ptr >= lo) {
if (ptr >= lo) { // short/end/start zone
uintptr_t start = (uintptr_t)(ptr - lo);
uintptr_t avail = (uintptr_t)(hi - ptr);
if (avail >= 16) {
*p_mask = load128(p_mask_arr[16] + 16);
assert(start_offset - start <= 16);
*p_mask = loadu128(p_mask_arr[16 - start_offset + start]
+ 16 - start_offset + start);
return loadu128(ptr);
}
*p_mask = load128(p_mask_arr[avail] + 16);
assert(start_offset - start <= avail);
*p_mask = loadu128(p_mask_arr[avail - start_offset + start]
+ 16 - start_offset + start);
copy_start = 0;
copy_len = avail;
} else {
} else { // start zone
uintptr_t need = MIN((uintptr_t)(lo - ptr),
MIN(len_history, nMasks - 1));
uintptr_t start = (uintptr_t)(lo - ptr);
uintptr_t i;
for (i = start - need; ptr + i < lo; i++) {
u.val8[i] = buf_history[len_history - (lo - (ptr + i))];
for (i = start - need; i < start; i++) {
u.val8[i] = buf_history[len_history - (start - i)];
}
uintptr_t end = MIN(16, (uintptr_t)(hi - ptr));
*p_mask = loadu128(p_mask_arr[end - start] + 16 - start);
copy_start = i;
copy_len = end - i;
assert(start + start_offset <= end);
*p_mask = loadu128(p_mask_arr[end - start - start_offset]
+ 16 - start - start_offset);
copy_start = start;
copy_len = end - start;
}
// Runt block from the buffer.
@ -152,6 +182,205 @@ m128 vectoredLoad128(m128 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi,
return u.val128;
}
#if defined(HAVE_AVX2)
/*
* \brief Copy a block of [0,31] bytes efficiently.
*
* This function is a workaround intended to stop some compilers from
* synthesizing a memcpy function call out of the copy of a small number of
* bytes that we do in vectoredLoad256.
*/
static really_inline
void copyRuntBlock256(u8 *dst, const u8 *src, size_t len) {
switch (len) {
case 0:
break;
case 1:
*dst = *src;
break;
case 2:
unaligned_store_u16(dst, unaligned_load_u16(src));
break;
case 3:
unaligned_store_u16(dst, unaligned_load_u16(src));
dst[2] = src[2];
break;
case 4:
unaligned_store_u32(dst, unaligned_load_u32(src));
break;
case 5:
case 6:
case 7:
/* Perform copy with two overlapping 4-byte chunks. */
unaligned_store_u32(dst + len - 4, unaligned_load_u32(src + len - 4));
unaligned_store_u32(dst, unaligned_load_u32(src));
break;
case 8:
unaligned_store_u64a(dst, unaligned_load_u64a(src));
break;
case 9:
case 10:
case 11:
case 12:
case 13:
case 14:
case 15:
/* Perform copy with two overlapping 8-byte chunks. */
unaligned_store_u64a(dst + len - 8, unaligned_load_u64a(src + len - 8));
unaligned_store_u64a(dst, unaligned_load_u64a(src));
break;
case 16:
storeu128(dst, loadu128(src));
break;
default:
/* Perform copy with two overlapping 16-byte chunks. */
assert(len < 32);
storeu128(dst + len - 16, loadu128(src + len - 16));
storeu128(dst, loadu128(src));
break;
}
}
// Note: p_mask is an output param that initialises a poison mask.
// *p_mask = load256(p_mask_arr256[n] + 32 - m) means:
// m byte 0xff in the beginning, followed by n byte 0x00,
// then followed by the rest bytes 0xff.
// ptr >= lo:
// no history.
// for end/short zone, ptr==lo and start_offset==0
// for start zone, see below
// lo ptr hi hi
// |----------|-------|----------------|............|
// -start 0 -start+offset MIN(avail,32)
// p_mask ffff..ff0000...........00ffff..........
// ptr < lo:
// only start zone.
// history
// ptr lo hi hi
// |----------|-------|----------------|............|
// 0 start start+offset end(<=32)
// p_mask ffff.....ffffff..ff0000...........00ffff..........
static really_inline
m256 vectoredLoad256(m256 *p_mask, const u8 *ptr, const size_t start_offset,
const u8 *lo, const u8 *hi,
const u8 *buf_history, size_t len_history,
const u32 nMasks) {
union {
u8 val8[32];
m256 val256;
} u;
u.val256 = zeroes256();
uintptr_t copy_start;
uintptr_t copy_len;
if (ptr >= lo) { // short/end/start zone
uintptr_t start = (uintptr_t)(ptr - lo);
uintptr_t avail = (uintptr_t)(hi - ptr);
if (avail >= 32) {
assert(start_offset - start <= 32);
*p_mask = loadu256(p_mask_arr256[32 - start_offset + start]
+ 32 - start_offset + start);
return loadu256(ptr);
}
assert(start_offset - start <= avail);
*p_mask = loadu256(p_mask_arr256[avail - start_offset + start]
+ 32 - start_offset + start);
copy_start = 0;
copy_len = avail;
} else { //start zone
uintptr_t need = MIN((uintptr_t)(lo - ptr),
MIN(len_history, nMasks - 1));
uintptr_t start = (uintptr_t)(lo - ptr);
uintptr_t i;
for (i = start - need; i < start; i++) {
u.val8[i] = buf_history[len_history - (start - i)];
}
uintptr_t end = MIN(32, (uintptr_t)(hi - ptr));
assert(start + start_offset <= end);
*p_mask = loadu256(p_mask_arr256[end - start - start_offset]
+ 32 - start - start_offset);
copy_start = start;
copy_len = end - start;
}
// Runt block from the buffer.
copyRuntBlock256(&u.val8[copy_start], &ptr[copy_start], copy_len);
return u.val256;
}
#endif // HAVE_AVX2
#if defined(HAVE_AVX512)
// Note: p_mask is an output param that initialises a poison mask.
// u64a k = ones_u64a << n' >> m'; // m' < n'
// *p_mask = set_mask_m512(~k);
// means p_mask is consist of:
// (n' - m') poison bytes "0xff" at the beginning,
// followed by (64 - n') valid bytes "0x00",
// then followed by the rest m' poison bytes "0xff".
// ptr >= lo:
// no history.
// for end/short zone, ptr==lo and start_offset==0
// for start zone, see below
// lo ptr hi hi
// |----------|-------|----------------|............|
// -start 0 -start+offset MIN(avail,64)
// p_mask ffff..ff0000...........00ffff..........
// ptr < lo:
// only start zone.
// history
// ptr lo hi hi
// |----------|-------|----------------|............|
// 0 start start+offset end(<=64)
// p_mask ffff.....ffffff..ff0000...........00ffff..........
static really_inline
m512 vectoredLoad512(m512 *p_mask, const u8 *ptr, const size_t start_offset,
const u8 *lo, const u8 *hi, const u8 *hbuf, size_t hlen,
const u32 nMasks) {
m512 val;
uintptr_t copy_start;
uintptr_t copy_len;
if (ptr >= lo) { // short/end/start zone
uintptr_t start = (uintptr_t)(ptr - lo);
uintptr_t avail = (uintptr_t)(hi - ptr);
if (avail >= 64) {
assert(start_offset - start <= 64);
u64a k = ones_u64a << (start_offset - start);
*p_mask = set_mask_m512(~k);
return loadu512(ptr);
}
assert(start_offset - start <= avail);
u64a k = ones_u64a << (64 - avail + start_offset - start)
>> (64 - avail);
*p_mask = set_mask_m512(~k);
copy_start = 0;
copy_len = avail;
} else { //start zone
uintptr_t need = MIN((uintptr_t)(lo - ptr),
MIN(hlen, nMasks - 1));
uintptr_t start = (uintptr_t)(lo - ptr);
u64a j = 0x7fffffffffffffffULL >> (63 - need) << (start - need);
val = loadu_maskz_m512(j, &hbuf[hlen - start]);
uintptr_t end = MIN(64, (uintptr_t)(hi - ptr));
assert(start + start_offset <= end);
u64a k = ones_u64a << (64 - end + start + start_offset) >> (64 - end);
*p_mask = set_mask_m512(~k);
copy_start = start;
copy_len = end - start;
}
assert(copy_len < 64);
assert(copy_len > 0);
u64a j = ones_u64a >> (64 - copy_len) << copy_start;
val = loadu_mask_m512(val, j, ptr);
return val;
}
#endif // HAVE_AVX512
static really_inline
u64a getConfVal(const struct FDR_Runtime_Args *a, const u8 *ptr, u32 byte,
CautionReason reason) {
@ -190,63 +419,27 @@ void do_confWithBit_teddy(TEDDY_CONF_TYPE *conf, u8 bucket, u8 offset,
if (!(fdrc->groups & *control)) {
continue;
}
u64a tmp = 0;
u64a confVal = getConfVal(a, ptr, byte, reason);
confWithBit(fdrc, a, ptr - a->buf + byte, control,
last_match, confVal);
last_match, confVal, &tmp, 0);
} while (unlikely(*conf));
}
static really_inline
void do_confWithBit1_teddy(TEDDY_CONF_TYPE *conf, u8 bucket, u8 offset,
const u32 *confBase, CautionReason reason,
const struct FDR_Runtime_Args *a, const u8 *ptr,
hwlmcb_rv_t *control, u32 *last_match) {
do {
u32 bit = TEDDY_FIND_AND_CLEAR_LSB(conf);
u32 byte = bit / bucket + offset;
u32 idx = bit % bucket;
u32 cf = confBase[idx];
const struct FDRConfirm *fdrc = (const struct FDRConfirm *)
((const u8 *)confBase + cf);
if (!(fdrc->groups & *control)) {
continue;
}
u64a confVal = getConfVal(a, ptr, byte, reason);
confWithBit1(fdrc, a, ptr - a->buf + byte, control, last_match,
confVal);
} while (unlikely(*conf));
const m128 *getMaskBase(const struct Teddy *teddy) {
return (const m128 *)((const u8 *)teddy + ROUNDUP_CL(sizeof(struct Teddy)));
}
static really_inline
void do_confWithBitMany_teddy(TEDDY_CONF_TYPE *conf, u8 bucket, u8 offset,
const u32 *confBase, CautionReason reason,
const struct FDR_Runtime_Args *a, const u8 *ptr,
hwlmcb_rv_t *control, u32 *last_match) {
do {
u32 bit = TEDDY_FIND_AND_CLEAR_LSB(conf);
u32 byte = bit / bucket + offset;
u32 idx = bit % bucket;
u32 cf = confBase[idx];
const struct FDRConfirm *fdrc = (const struct FDRConfirm *)
((const u8 *)confBase + cf);
if (!(fdrc->groups & *control)) {
continue;
}
u64a confVal = getConfVal(a, ptr, byte, reason);
confWithBitMany(fdrc, a, ptr - a->buf + byte, reason, control,
last_match, confVal);
} while (unlikely(*conf));
const u64a *getReinforcedMaskBase(const struct Teddy *teddy, u8 numMask) {
return (const u64a *)((const u8 *)getMaskBase(teddy)
+ ROUNDUP_CL(2 * numMask * sizeof(m128)));
}
static really_inline
const m128 * getMaskBase(const struct Teddy *teddy) {
return (const m128 *)((const u8 *)teddy + sizeof(struct Teddy));
}
static really_inline
const u32 * getConfBase(const struct Teddy *teddy, u8 numMask) {
return (const u32 *)((const u8 *)teddy + sizeof(struct Teddy) +
(numMask*32));
const u32 *getConfBase(const struct Teddy *teddy) {
return (const u32 *)((const u8 *)teddy + teddy->confOffset);
}
#endif /* TEDDY_RUNTIME_COMMON_H_ */

View File

@ -139,6 +139,7 @@ Grey::Grey(void) :
limitSmallWriteOutfixSize(1048576), // 1 MB
smallWriteMaxPatterns(10000),
smallWriteMaxLiterals(10000),
smallWriteMergeBatchSize(20),
allowTamarama(true), // Tamarama engine
tamaChunkSize(100),
dumpFlags(0),
@ -302,6 +303,7 @@ void applyGreyOverrides(Grey *g, const string &s) {
G_UPDATE(limitSmallWriteOutfixSize);
G_UPDATE(smallWriteMaxPatterns);
G_UPDATE(smallWriteMaxLiterals);
G_UPDATE(smallWriteMergeBatchSize);
G_UPDATE(allowTamarama);
G_UPDATE(tamaChunkSize);
G_UPDATE(limitPatternCount);

View File

@ -157,6 +157,7 @@ struct Grey {
u32 limitSmallWriteOutfixSize; //!< max total size of outfix DFAs
u32 smallWriteMaxPatterns; // only try small writes if fewer patterns
u32 smallWriteMaxLiterals; // only try small writes if fewer literals
u32 smallWriteMergeBatchSize; // number of DFAs to merge in a batch
// Tamarama engine
bool allowTamarama;

View File

@ -227,10 +227,10 @@ hs_compile_multi_int(const char *const *expressions, const unsigned *flags,
target_t target_info = platform ? target_t(*platform)
: get_current_target();
CompileContext cc(isStreaming, isVectored, target_info, g);
NG ng(cc, elements, somPrecision);
try {
CompileContext cc(isStreaming, isVectored, target_info, g);
NG ng(cc, elements, somPrecision);
for (unsigned int i = 0; i < elements; i++) {
// Add this expression to the compiler
try {
@ -262,7 +262,7 @@ hs_compile_multi_int(const char *const *expressions, const unsigned *flags,
e.hasIndex ? (int)e.index : -1);
return HS_COMPILER_ERROR;
}
catch (std::bad_alloc) {
catch (const std::bad_alloc &) {
*db = nullptr;
*comp_error = const_cast<hs_compile_error_t *>(&hs_enomem);
return HS_COMPILER_ERROR;
@ -399,7 +399,7 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags,
*error = generateCompileError(e);
return HS_COMPILER_ERROR;
}
catch (std::bad_alloc) {
catch (std::bad_alloc &) {
*error = const_cast<hs_compile_error_t *>(&hs_enomem);
return HS_COMPILER_ERROR;
}

View File

@ -561,6 +561,18 @@ hs_error_t HS_CDECL hs_valid_platform(void);
*/
#define HS_ARCH_ERROR (-11)
/**
* Provided buffer was too small.
*
* This error indicates that there was insufficient space in the buffer. The
* call should be repeated with a larger provided buffer.
*
* Note: in this situation, it is normal for the amount of space required to be
* returned in the same manner as the used space would have been returned if the
* call was successful.
*/
#define HS_INSUFFICIENT_SPACE (-12)
/** @} */
#ifdef __cplusplus

View File

@ -321,6 +321,120 @@ hs_error_t HS_CDECL hs_reset_and_copy_stream(hs_stream_t *to_id,
match_event_handler onEvent,
void *context);
/**
* Creates a compressed representation of the provided stream in the buffer
* provided. This compressed representation can be converted back into a stream
* state by using @ref hs_expand_stream() or @ref hs_reset_and_expand_stream().
* The size of the compressed representation will be placed into @a used_space.
*
* If there is not sufficient space in the buffer to hold the compressed
* represention, @ref HS_INSUFFICIENT_SPACE will be returned and @a used_space
* will be populated with the amount of space required.
*
* Note: this function does not close the provided stream, you may continue to
* use the stream or to free it with @ref hs_close_stream().
*
* @param stream
* The stream (as created by @ref hs_open_stream()) to be compressed.
*
* @param buf
* Buffer to write the compressed representation into. Note: if the call is
* just being used to determine the amount of space required, it is allowed
* to pass NULL here and @a buf_space as 0.
*
* @param buf_space
* The number of bytes in @a buf. If buf_space is too small, the call will
* fail with @ref HS_INSUFFICIENT_SPACE.
*
* @param used_space
* Pointer to where the amount of used space will be written to. The used
* buffer space is always less than or equal to @a buf_space. If the call
* fails with @ref HS_INSUFFICIENT_SPACE, this pointer will be used to
* write out the amount of buffer space required.
*
* @return
* @ref HS_SUCCESS on success, @ref HS_INSUFFICIENT_SPACE if the provided
* buffer is too small.
*/
hs_error_t HS_CDECL hs_compress_stream(const hs_stream_t *stream, char *buf,
size_t buf_space, size_t *used_space);
/**
* Decompresses a compressed representation created by @ref hs_compress_stream()
* into a new stream.
*
* Note: @a buf must correspond to a complete compressed representation created
* by @ref hs_compress_stream() of a stream that was opened against @a db. It is
* not always possible to detect misuse of this API and behaviour is undefined
* if these properties are not satisfied.
*
* @param db
* The compiled pattern database that the compressed stream was opened
* against.
*
* @param stream
* On success, a pointer to the expanded @ref hs_stream_t will be
* returned; NULL on failure.
*
* @param buf
* A compressed representation of a stream. These compressed forms are
* created by @ref hs_compress_stream().
*
* @param buf_size
* The size in bytes of the compressed representation.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t HS_CDECL hs_expand_stream(const hs_database_t *db,
hs_stream_t **stream, const char *buf,
size_t buf_size);
/**
* Decompresses a compressed representation created by @ref hs_compress_stream()
* on top of the 'to' stream. The 'to' stream will first be reset (reporting
* any EOD matches if a non-NULL @a onEvent callback handler is provided).
*
* Note: the 'to' stream must be opened against the same database as the
* compressed stream.
*
* Note: @a buf must correspond to a complete compressed representation created
* by @ref hs_compress_stream() of a stream that was opened against @a db. It is
* not always possible to detect misuse of this API and behaviour is undefined
* if these properties are not satisfied.
*
* @param to_stream
* A pointer to the generated @ref hs_stream_t will be
* returned; NULL on failure.
*
* @param buf
* A compressed representation of a stream. These compressed forms are
* created by @ref hs_compress_stream().
*
* @param buf_size
* The size in bytes of the compressed representation.
*
* @param scratch
* A per-thread scratch space allocated by @ref hs_alloc_scratch(). This is
* allowed to be NULL only if the @a onEvent callback is also NULL.
*
* @param onEvent
* Pointer to a match event callback function. If a NULL pointer is given,
* no matches will be returned.
*
* @param context
* The user defined pointer which will be passed to the callback function
* when a match occurs.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t HS_CDECL hs_reset_and_expand_stream(hs_stream_t *to_stream,
const char *buf, size_t buf_size,
hs_scratch_t *scratch,
match_event_handler onEvent,
void *context);
/**
* The block (non-streaming) regular expression scanner.
*

View File

@ -170,7 +170,7 @@ void do_accel_streaming(const union AccelAux *aux, const u8 *hbuf, size_t hlen,
}
hwlm_error_t hwlmExec(const struct HWLM *t, const u8 *buf, size_t len,
size_t start, HWLMCallback cb, void *ctxt,
size_t start, HWLMCallback cb, struct hs_scratch *scratch,
hwlm_group_t groups) {
assert(t);
@ -184,25 +184,23 @@ hwlm_error_t hwlmExec(const struct HWLM *t, const u8 *buf, size_t len,
if (t->type == HWLM_ENGINE_NOOD) {
DEBUG_PRINTF("calling noodExec\n");
return noodExec(HWLM_C_DATA(t), buf + start, len - start, start, cb,
ctxt);
} else {
assert(t->type == HWLM_ENGINE_FDR);
const union AccelAux *aa = &t->accel0;
if ((groups & ~t->accel1_groups) == 0) {
DEBUG_PRINTF("using hq accel %hhu\n", t->accel1.accel_type);
aa = &t->accel1;
}
do_accel_block(aa, buf, len, &start);
DEBUG_PRINTF("calling frankie (groups=%08llx, start=%zu)\n", groups,
start);
return fdrExec(HWLM_C_DATA(t), buf, len, start, cb, ctxt, groups);
return noodExec(HWLM_C_DATA(t), buf, len, start, cb, scratch);
}
assert(t->type == HWLM_ENGINE_FDR);
const union AccelAux *aa = &t->accel0;
if ((groups & ~t->accel1_groups) == 0) {
DEBUG_PRINTF("using hq accel %hhu\n", t->accel1.accel_type);
aa = &t->accel1;
}
do_accel_block(aa, buf, len, &start);
DEBUG_PRINTF("calling frankie (groups=%08llx, start=%zu)\n", groups, start);
return fdrExec(HWLM_C_DATA(t), buf, len, start, cb, scratch, groups);
}
hwlm_error_t hwlmExecStreaming(const struct HWLM *t, struct hs_scratch *scratch,
size_t len, size_t start, HWLMCallback cb,
void *ctxt, hwlm_group_t groups) {
hwlm_error_t hwlmExecStreaming(const struct HWLM *t, size_t len, size_t start,
HWLMCallback cb, struct hs_scratch *scratch,
hwlm_group_t groups) {
assert(t);
assert(scratch);
@ -224,24 +222,21 @@ hwlm_error_t hwlmExecStreaming(const struct HWLM *t, struct hs_scratch *scratch,
// If we've been handed a start offset, we can use a block mode scan at
// that offset.
if (start) {
return noodExec(HWLM_C_DATA(t), buf + start, len - start, start,
cb, ctxt);
return noodExec(HWLM_C_DATA(t), buf, len, start, cb, scratch);
} else {
return noodExecStreaming(HWLM_C_DATA(t), hbuf, hlen, buf, len, cb,
ctxt, scratch->fdr_temp_buf,
FDR_TEMP_BUF_SIZE);
scratch);
}
} else {
// t->type == HWLM_ENGINE_FDR
const union AccelAux *aa = &t->accel0;
if ((groups & ~t->accel1_groups) == 0) {
DEBUG_PRINTF("using hq accel %hhu\n", t->accel1.accel_type);
aa = &t->accel1;
}
do_accel_streaming(aa, hbuf, hlen, buf, len, &start);
DEBUG_PRINTF("calling frankie (groups=%08llx, start=%zu)\n", groups,
start);
return fdrExecStreaming(HWLM_C_DATA(t), hbuf, hlen, buf, len,
start, cb, ctxt, groups);
}
assert(t->type == HWLM_ENGINE_FDR);
const union AccelAux *aa = &t->accel0;
if ((groups & ~t->accel1_groups) == 0) {
DEBUG_PRINTF("using hq accel %hhu\n", t->accel1.accel_type);
aa = &t->accel1;
}
do_accel_streaming(aa, hbuf, hlen, buf, len, &start);
DEBUG_PRINTF("calling frankie (groups=%08llx, start=%zu)\n", groups, start);
return fdrExecStreaming(HWLM_C_DATA(t), hbuf, hlen, buf, len, start, cb,
scratch, groups);
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -71,14 +71,17 @@ typedef hwlm_group_t hwlmcb_rv_t;
* designed for a different architecture). */
#define HWLM_ERROR_UNKNOWN 2
/** \brief Max length of the literal passed to HWLM. */
#define HWLM_LITERAL_MAX_LEN 8
struct hs_scratch;
struct HWLM;
/** \brief The type for an HWLM callback.
*
* This callback receives a start-of-match offset, an end-of-match offset, the
* ID of the match and the context pointer that was passed into \ref
* hwlmExec or \ref hwlmExecStreaming.
* This callback receives an end-of-match offset, the ID of the match and
* the context pointer that was passed into \ref hwlmExec or
* \ref hwlmExecStreaming.
*
* A callback return of \ref HWLM_TERMINATE_MATCHING will stop matching.
*
@ -92,8 +95,8 @@ struct HWLM;
* belonging to the literal which was active at the when the end match location
* was first reached.
*/
typedef hwlmcb_rv_t (*HWLMCallback)(size_t start, size_t end, u32 id,
void *context);
typedef hwlmcb_rv_t (*HWLMCallback)(size_t end, u32 id,
struct hs_scratch *scratch);
/** \brief Match strings in table.
*
@ -104,24 +107,26 @@ typedef hwlmcb_rv_t (*HWLMCallback)(size_t start, size_t end, u32 id,
* Returns \ref HWLM_TERMINATED if scanning is cancelled due to the callback
* returning \ref HWLM_TERMINATE_MATCHING.
*
* \p start is the first offset at which a match may start.
* \p start is the first offset at which a match may start. Note: match
* starts may include masks overhanging the main literal.
*
* The underlying engine may choose not to report any match which starts before
* the first possible match of a literal which is in the initial group mask.
*/
hwlm_error_t hwlmExec(const struct HWLM *tab, const u8 *buf, size_t len,
size_t start, HWLMCallback callback, void *context,
hwlm_group_t groups);
size_t start, HWLMCallback callback,
struct hs_scratch *scratch, hwlm_group_t groups);
/** \brief As for \ref hwlmExec, but a streaming case across two buffers.
*
* \p scratch is used to access fdr_temp_buf and to access the history buffer,
* history length and the main buffer.
*
* \p len is the length of the main buffer to be scanned.
*
* \p start is an advisory hint representing the first offset at which a match
* may start. Some underlying literal matches may not respect it.
* may start. Some underlying literal matches may not respect it. Note: match
* starts may include masks overhanging the main literal.
*
* \p scratch is used to access the history buffer, history length and
* the main buffer.
*
* Two buffers/lengths are provided. Matches that occur entirely within
* the history buffer will not be reported by this function. The offsets
@ -129,10 +134,9 @@ hwlm_error_t hwlmExec(const struct HWLM *tab, const u8 *buf, size_t len,
* match at byte 10 of the main buffer is reported as 10). Matches that start
* in the history buffer will have starts reported with 'negative' values.
*/
hwlm_error_t hwlmExecStreaming(const struct HWLM *tab,
struct hs_scratch *scratch, size_t len,
size_t start, HWLMCallback callback,
void *context, hwlm_group_t groups);
hwlm_error_t hwlmExecStreaming(const struct HWLM *tab, size_t len, size_t start,
HWLMCallback callback,
struct hs_scratch *scratch, hwlm_group_t groups);
#ifdef __cplusplus
} /* extern "C" */

View File

@ -41,8 +41,12 @@
#include "scratch.h"
#include "ue2common.h"
#include "fdr/fdr_compile.h"
#include "fdr/fdr_compile_internal.h"
#include "fdr/fdr_engine_description.h"
#include "fdr/teddy_engine_description.h"
#include "util/compile_context.h"
#include "util/compile_error.h"
#include "util/make_unique.h"
#include "util/ue2string.h"
#include <cassert>
@ -53,6 +57,28 @@ using namespace std;
namespace ue2 {
HWLMProto::HWLMProto(u8 engType_in, vector<hwlmLiteral> lits_in)
: engType(engType_in), lits(move(lits_in)) {}
HWLMProto::HWLMProto(u8 engType_in,
unique_ptr<FDREngineDescription> eng_in,
vector<hwlmLiteral> lits_in,
map<u32, vector<u32>> bucketToLits_in,
bool make_small_in)
: engType(engType_in), fdrEng(move(eng_in)), lits(move(lits_in)),
bucketToLits(move(bucketToLits_in)), make_small(make_small_in) {}
HWLMProto::HWLMProto(u8 engType_in,
unique_ptr<TeddyEngineDescription> eng_in,
vector<hwlmLiteral> lits_in,
map<u32, vector<u32>> bucketToLits_in,
bool make_small_in)
: engType(engType_in), teddyEng(move(eng_in)),
lits(move(lits_in)),
bucketToLits(move(bucketToLits_in)), make_small(make_small_in) {}
HWLMProto::~HWLMProto() {}
static
void dumpLits(UNUSED const vector<hwlmLiteral> &lits) {
#ifdef DEBUG
@ -89,17 +115,55 @@ bool isNoodleable(const vector<hwlmLiteral> &lits,
return false;
}
if (!lits.front().msk.empty()) {
DEBUG_PRINTF("noodle can't handle supplementary masks\n");
return false;
}
return true;
}
bytecode_ptr<HWLM> hwlmBuild(const vector<hwlmLiteral> &lits, bool make_small,
const CompileContext &cc,
bytecode_ptr<HWLM> hwlmBuild(const HWLMProto &proto, const CompileContext &cc,
UNUSED hwlm_group_t expected_groups) {
size_t engSize = 0;
shared_ptr<void> eng;
const auto &lits = proto.lits;
DEBUG_PRINTF("building table with %zu strings\n", lits.size());
if (proto.engType == HWLM_ENGINE_NOOD) {
DEBUG_PRINTF("build noodle table\n");
const hwlmLiteral &lit = lits.front();
auto noodle = noodBuildTable(lit);
if (noodle) {
engSize = noodle.size();
}
eng = move(noodle);
} else {
DEBUG_PRINTF("building a new deal\n");
auto fdr = fdrBuildTable(proto, cc.grey);
if (fdr) {
engSize = fdr.size();
}
eng = move(fdr);
}
if (!eng) {
return nullptr;
}
assert(engSize);
if (engSize > cc.grey.limitLiteralMatcherSize) {
throw ResourceLimitError();
}
const size_t hwlm_len = ROUNDUP_CL(sizeof(HWLM)) + engSize;
auto h = make_zeroed_bytecode_ptr<HWLM>(hwlm_len, 64);
h->type = proto.engType;
memcpy(HWLM_DATA(h.get()), eng.get(), engSize);
return h;
}
unique_ptr<HWLMProto>
hwlmBuildProto(vector<hwlmLiteral> &lits, bool make_small,
const CompileContext &cc) {
assert(!lits.empty());
dumpLits(lits);
@ -129,9 +193,7 @@ bytecode_ptr<HWLM> hwlmBuild(const vector<hwlmLiteral> &lits, bool make_small,
}
}
u8 engType = 0;
size_t engSize = 0;
shared_ptr<void> eng;
unique_ptr<HWLMProto> proto;
DEBUG_PRINTF("building table with %zu strings\n", lits.size());
@ -139,39 +201,17 @@ bytecode_ptr<HWLM> hwlmBuild(const vector<hwlmLiteral> &lits, bool make_small,
if (isNoodleable(lits, cc)) {
DEBUG_PRINTF("build noodle table\n");
engType = HWLM_ENGINE_NOOD;
const hwlmLiteral &lit = lits.front();
auto noodle = noodBuildTable(lit);
if (noodle) {
engSize = noodle.size();
}
eng = move(noodle);
proto = ue2::make_unique<HWLMProto>(HWLM_ENGINE_NOOD, lits);
} else {
DEBUG_PRINTF("building a new deal\n");
engType = HWLM_ENGINE_FDR;
auto fdr = fdrBuildTable(lits, make_small, cc.target_info, cc.grey);
if (fdr) {
engSize = fdr.size();
proto = fdrBuildProto(HWLM_ENGINE_FDR, lits, make_small,
cc.target_info, cc.grey);
if (!proto) {
return nullptr;
}
eng = move(fdr);
}
if (!eng) {
return nullptr;
}
assert(engSize);
if (engSize > cc.grey.limitLiteralMatcherSize) {
throw ResourceLimitError();
}
const size_t hwlm_len = ROUNDUP_CL(sizeof(HWLM)) + engSize;
auto h = make_zeroed_bytecode_ptr<HWLM>(hwlm_len, 64);
h->type = engType;
memcpy(HWLM_DATA(h.get()), eng.get(), engSize);
return h;
return proto;
}
size_t hwlmSize(const HWLM *h) {

View File

@ -34,9 +34,11 @@
#define HWLM_BUILD_H
#include "hwlm.h"
#include "hwlm_literal.h"
#include "ue2common.h"
#include "util/bytecode_ptr.h"
#include <map>
#include <memory>
#include <vector>
@ -44,15 +46,62 @@ struct HWLM;
namespace ue2 {
class FDREngineDescription;
class TeddyEngineDescription;
struct CompileContext;
struct Grey;
struct hwlmLiteral;
/** \brief Class representing a literal matcher prototype. */
struct HWLMProto {
/**
* \brief Engine type to distinguish noodle from FDR and Teddy.
*/
u8 engType;
/**
* \brief FDR engine description.
*/
std::unique_ptr<FDREngineDescription> fdrEng;
/**
* \brief Teddy engine description.
*/
std::unique_ptr<TeddyEngineDescription> teddyEng;
/**
* \brief HWLM literals passed from Rose.
*/
std::vector<hwlmLiteral> lits;
/**
* \brief Bucket assignment info in FDR and Teddy
*/
std::map<u32, std::vector<u32>> bucketToLits;
/**
* \brief Flag to optimise matcher for small size from Rose.
*/
bool make_small = false;
HWLMProto(u8 engType_in, std::vector<hwlmLiteral> lits_in);
HWLMProto(u8 engType_in, std::unique_ptr<FDREngineDescription> eng_in,
std::vector<hwlmLiteral> lits_in,
std::map<u32, std::vector<u32>> bucketToLits_in,
bool make_small_in);
HWLMProto(u8 engType_in, std::unique_ptr<TeddyEngineDescription> eng_in,
std::vector<hwlmLiteral> lits_in,
std::map<u32, std::vector<u32>> bucketToLits_in,
bool make_small_in);
~HWLMProto();
};
/** \brief Build an \ref HWLM literal matcher runtime structure for a group of
* literals.
*
* \param lits The group of literals.
* \param make_small Optimise matcher for small size.
* \param proto Literal matcher prototype.
* \param cc Compile context.
* \param expected_groups FIXME: document me!
*
@ -60,10 +109,13 @@ struct hwlmLiteral;
* may result in a nullptr return value, or a std::bad_alloc exception being
* thrown.
*/
bytecode_ptr<HWLM> hwlmBuild(const std::vector<hwlmLiteral> &lits,
bool make_small, const CompileContext &cc,
bytecode_ptr<HWLM> hwlmBuild(const HWLMProto &proto, const CompileContext &cc,
hwlm_group_t expected_groups = HWLM_ALL_GROUPS);
std::unique_ptr<HWLMProto>
hwlmBuildProto(std::vector<hwlmLiteral> &lits, bool make_small,
const CompileContext &cc);
/**
* Returns an estimate of the number of repeated characters on the end of a
* literal that will make a literal set of size \a numLiterals suffer

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -38,16 +38,19 @@
#include "ue2common.h"
#include "fdr/fdr_dump.h"
#include "nfa/accel_dump.h"
#include <cstdio>
#include "util/dump_util.h"
#ifndef DUMP_SUPPORT
#error No dump support!
#endif
using namespace std;
namespace ue2 {
void hwlmPrintStats(const HWLM *h, FILE *f) {
void hwlmGenerateDumpFiles(const HWLM *h, const string &base) {
StdioFile f(base + ".txt", "w");
switch (h->type) {
case HWLM_ENGINE_NOOD:
noodPrintStats((const noodTable *)HWLM_C_DATA(h), f);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -35,16 +35,16 @@
#ifdef DUMP_SUPPORT
#include <cstdio>
#include <string>
struct HWLM;
namespace ue2 {
/** \brief Dump some information about the give HWLM structure. */
void hwlmPrintStats(const HWLM *h, FILE *f);
void hwlmGenerateDumpFiles(const HWLM *h, const std::string &base);
} // namespace ue2
#endif
#endif
#endif // DUMP_SUPPORT
#endif // HWLM_DUMP_H

View File

@ -42,12 +42,11 @@
namespace ue2 {
/** \brief Max length of the literal passed to HWLM. */
#define HWLM_LITERAL_MAX_LEN 8
/** \brief Max length of the hwlmLiteral::msk and hwlmLiteral::cmp vectors. */
#define HWLM_MASKLEN 8
#define INVALID_LIT_ID ~0U
/** \brief Class representing a literal, fed to \ref hwlmBuild. */
struct hwlmLiteral {
std::string s; //!< \brief The literal itself.
@ -67,6 +66,21 @@ struct hwlmLiteral {
* can be quashed by the literal matcher. */
bool noruns;
/** \brief included literal id. */
u32 included_id = INVALID_LIT_ID;
/** \brief Squash mask for FDR's confirm mask for included literals.
*
* In FDR confirm, if we have included literal in another bucket,
* we can use this mask to squash the bit for the bucket in FDR confirm
* mask and then run programs of included literal directly and avoid
* confirm work.
*
* This value is calculated in FDR compile code once bucket assignment is
* completed
*/
u8 squash = 0;
/** \brief Set of groups that literal belongs to.
*
* Use \ref HWLM_ALL_GROUPS for a literal that could match regardless of

View File

@ -35,14 +35,33 @@
#include "hwlm_literal.h"
#include "noodle_internal.h"
#include "util/bitutils.h"
#include "util/compare.h"
#include "util/verify_types.h"
#include "ue2common.h"
#include <cstring> // for memcpy
#include <vector>
using std::vector;
namespace ue2 {
static
u64a make_u64a_mask(const vector<u8> &v) {
assert(v.size() <= sizeof(u64a));
if (v.size() > sizeof(u64a)) {
throw std::exception();
}
u64a mask = 0;
size_t len = v.size();
unsigned char *m = (unsigned char *)&mask;
DEBUG_PRINTF("making mask len %zu\n", len);
memcpy(m, &v[0], len);
return mask;
}
static
size_t findNoodFragOffset(const hwlmLiteral &lit) {
const auto &s = lit.s;
@ -67,30 +86,59 @@ size_t findNoodFragOffset(const hwlmLiteral &lit) {
}
bytecode_ptr<noodTable> noodBuildTable(const hwlmLiteral &lit) {
if (!lit.msk.empty()) {
DEBUG_PRINTF("noodle can't handle supplementary masks\n");
return nullptr;
const auto &s = lit.s;
size_t mask_len = std::max(s.length(), lit.msk.size());
DEBUG_PRINTF("mask is %zu bytes\n", lit.msk.size());
assert(mask_len <= 8);
assert(lit.msk.size() == lit.cmp.size());
vector<u8> n_msk(mask_len);
vector<u8> n_cmp(mask_len);
for (unsigned i = mask_len - lit.msk.size(), j = 0; i < mask_len;
i++, j++) {
DEBUG_PRINTF("m[%u] %hhx c[%u] %hhx\n", i, lit.msk[j], i, lit.cmp[j]);
n_msk[i] = lit.msk[j];
n_cmp[i] = lit.cmp[j];
}
const auto &s = lit.s;
size_t noodle_len = sizeof(noodTable) + s.length();
auto n = make_zeroed_bytecode_ptr<noodTable>(noodle_len);
size_t s_off = mask_len - s.length();
for (unsigned i = s_off; i < mask_len; i++) {
u8 c = s[i - s_off];
u8 si_msk = lit.nocase && ourisalpha(c) ? (u8)CASE_CLEAR : (u8)0xff;
n_msk[i] |= si_msk;
n_cmp[i] |= c & si_msk;
assert((n_cmp[i] & si_msk) == c);
DEBUG_PRINTF("m[%u] %hhx c[%u] %hhx '%c'\n", i, n_msk[i], i, n_cmp[i],
ourisprint(c) ? (char)c : '.');
}
auto n = make_zeroed_bytecode_ptr<noodTable>(sizeof(noodTable));
assert(n);
DEBUG_PRINTF("size of nood %zu\n", sizeof(noodTable));
size_t key_offset = findNoodFragOffset(lit);
n->id = lit.id;
n->len = verify_u32(s.length());
n->key_offset = verify_u32(key_offset);
n->single = s.length() == 1 ? 1 : 0;
n->key_offset = verify_u8(s.length() - key_offset);
n->nocase = lit.nocase ? 1 : 0;
memcpy(n->str, s.c_str(), s.length());
n->key0 = s[key_offset];
if (n->single) {
n->key1 = 0;
} else {
n->key1 = s[key_offset + 1];
}
n->msk = make_u64a_mask(n_msk);
n->cmp = make_u64a_mask(n_cmp);
n->msk_len = mask_len;
return n;
}
size_t noodSize(const noodTable *n) {
assert(n); // shouldn't call with null
return sizeof(*n) + n->len;
size_t noodSize(const noodTable *) {
return sizeof(noodTable);
}
} // namespace ue2
@ -102,13 +150,17 @@ namespace ue2 {
void noodPrintStats(const noodTable *n, FILE *f) {
fprintf(f, "Noodle table\n");
fprintf(f, "Len: %u Key Offset: %u\n", n->len, n->key_offset);
fprintf(f, "Key Offset: %u\n", n->key_offset);
fprintf(f, "Msk: %llx Cmp: %llx MskLen %u\n",
n->msk >> 8 * (8 - n->msk_len), n->cmp >> 8 * (8 - n->msk_len),
n->msk_len);
fprintf(f, "String: ");
for (u32 i = 0; i < n->len; i++) {
if (isgraph(n->str[i]) && n->str[i] != '\\') {
fprintf(f, "%c", n->str[i]);
for (u32 i = 0; i < n->msk_len; i++) {
const u8 *m = (const u8 *)&n->cmp;
if (isgraph(m[i]) && m[i] != '\\') {
fprintf(f, "%c", m[i]);
} else {
fprintf(f, "\\x%02hhx", n->str[i]);
fprintf(f, "\\x%02hhx", m[i]);
}
}
fprintf(f, "\n");

View File

@ -32,6 +32,7 @@
#include "hwlm.h"
#include "noodle_engine.h"
#include "noodle_internal.h"
#include "scratch.h"
#include "ue2common.h"
#include "util/arch.h"
#include "util/bitutils.h"
@ -39,6 +40,7 @@
#include "util/intrinsics.h"
#include "util/join.h"
#include "util/masked_move.h"
#include "util/partial_store.h"
#include "util/simd_utils.h"
#include <ctype.h>
@ -49,7 +51,7 @@
struct cb_info {
HWLMCallback cb; //!< callback function called on match
u32 id; //!< ID to pass to callback on match
void *ctx; //!< caller-supplied context to pass to callback
struct hs_scratch *scratch; //!< scratch to pass to callback
size_t offsetAdj; //!< used in streaming mode
};
@ -83,9 +85,8 @@ struct cb_info {
while (unlikely(z)) { \
Z_TYPE pos = JOIN(findAndClearLSB_, Z_BITS)(&z); \
size_t matchPos = d - buf + pos; \
DEBUG_PRINTF("match pos %zu\n", matchPos); \
hwlmcb_rv_t rv = final(buf, len, key, 1, 0, 0, noCase, cbi, \
matchPos); \
DEBUG_PRINTF("match pos %zu\n", matchPos); \
hwlmcb_rv_t rv = final(n, buf, len, 1, cbi, matchPos); \
RETURN_IF_TERMINATED(rv); \
} \
} while (0)
@ -95,9 +96,8 @@ struct cb_info {
while (unlikely(z)) { \
Z_TYPE pos = JOIN(findAndClearLSB_, Z_BITS)(&z); \
size_t matchPos = d - buf + pos - 1; \
DEBUG_PRINTF("match pos %zu\n", matchPos); \
hwlmcb_rv_t rv = final(buf, len, key, keyLen, keyOffset, 1, \
noCase, cbi, matchPos); \
DEBUG_PRINTF("match pos %zu\n", matchPos); \
hwlmcb_rv_t rv = final(n, buf, len, 0, cbi, matchPos); \
RETURN_IF_TERMINATED(rv); \
} \
} while (0)
@ -111,21 +111,26 @@ u8 caseClear8(u8 x, bool noCase) {
// is used only for single chars with case insensitivity used correctly,
// so it can go straight to the callback if we get this far.
static really_inline
hwlm_error_t final(const u8 *buf, size_t len, const u8 *key, size_t keyLen,
size_t keyOffset, bool is_double, bool noCase,
const struct cb_info *cbi, size_t pos) {
pos -= keyOffset;
if (is_double) {
if (pos + keyLen > len) {
return HWLM_SUCCESS;
}
if (cmpForward(buf + pos, key, keyLen, noCase)) { // ret 1 on mismatch
return HWLM_SUCCESS;
hwlm_error_t final(const struct noodTable *n, const u8 *buf, UNUSED size_t len,
char single, const struct cb_info *cbi, size_t pos) {
if (single) {
if (n->msk_len == 1) {
goto match;
}
}
pos += cbi->offsetAdj;
DEBUG_PRINTF("match @ %zu->%zu\n", pos, (pos + keyLen - 1));
hwlmcb_rv_t rv = cbi->cb(pos, (pos + keyLen - 1), cbi->id, cbi->ctx);
assert(len >= n->msk_len);
u64a v =
partial_load_u64a(buf + pos + n->key_offset - n->msk_len, n->msk_len);
DEBUG_PRINTF("v %016llx msk %016llx cmp %016llx\n", v, n->msk, n->cmp);
if ((v & n->msk) != n->cmp) {
/* mask didn't match */
return HWLM_SUCCESS;
}
match:
pos -= cbi->offsetAdj;
DEBUG_PRINTF("match @ %zu\n", pos + n->key_offset);
hwlmcb_rv_t rv = cbi->cb(pos + n->key_offset - 1, cbi->id, cbi->scratch);
if (rv == HWLM_TERMINATE_MATCHING) {
return HWLM_TERMINATED;
}
@ -147,38 +152,43 @@ hwlm_error_t final(const u8 *buf, size_t len, const u8 *key, size_t keyLen,
#endif
static really_inline
hwlm_error_t scanSingleMain(const u8 *buf, size_t len, const u8 *key,
bool noCase, const struct cb_info *cbi) {
hwlm_error_t scanSingleMain(const struct noodTable *n, const u8 *buf,
size_t len, size_t start, bool noCase,
const struct cb_info *cbi) {
const MASK_TYPE mask1 = getMask(key[0], noCase);
const MASK_TYPE mask1 = getMask(n->key0, noCase);
const MASK_TYPE caseMask = getCaseMask();
size_t offset = start + n->msk_len - 1;
size_t end = len;
assert(offset < end);
#if !defined(HAVE_AVX512)
hwlm_error_t rv;
size_t end = len;
if (len < CHUNKSIZE) {
rv = scanSingleShort(buf, len, key, noCase, caseMask, mask1, cbi, 0, len);
if (end - offset < CHUNKSIZE) {
rv = scanSingleShort(n, buf, len, noCase, caseMask, mask1, cbi, offset,
end);
return rv;
}
if (len == CHUNKSIZE) {
rv = scanSingleUnaligned(buf, len, 0, key, noCase, caseMask, mask1, cbi,
0, len);
if (end - offset == CHUNKSIZE) {
rv = scanSingleUnaligned(n, buf, len, offset, noCase, caseMask, mask1,
cbi, offset, end);
return rv;
}
uintptr_t data = (uintptr_t)buf;
uintptr_t s2Start = ROUNDUP_N(data, CHUNKSIZE) - data;
uintptr_t s2Start = ROUNDUP_N(data + offset, CHUNKSIZE) - data;
uintptr_t last = data + end;
uintptr_t s2End = ROUNDDOWN_N(last, CHUNKSIZE) - data;
uintptr_t s3Start = len - CHUNKSIZE;
uintptr_t s3Start = end - CHUNKSIZE;
if (s2Start) {
if (offset != s2Start) {
// first scan out to the fast scan starting point
DEBUG_PRINTF("stage 1: -> %zu\n", s2Start);
rv = scanSingleUnaligned(buf, len, 0, key, noCase, caseMask, mask1, cbi,
0, s2Start);
rv = scanSingleUnaligned(n, buf, len, offset, noCase, caseMask, mask1,
cbi, offset, s2Start);
RETURN_IF_TERMINATED(rv);
}
@ -186,68 +196,70 @@ hwlm_error_t scanSingleMain(const u8 *buf, size_t len, const u8 *key,
// scan as far as we can, bounded by the last point this key can
// possibly match
DEBUG_PRINTF("fast: ~ %zu -> %zu\n", s2Start, s2End);
rv = scanSingleFast(buf, len, key, noCase, caseMask, mask1, cbi,
s2Start, s2End);
rv = scanSingleFast(n, buf, len, noCase, caseMask, mask1, cbi, s2Start,
s2End);
RETURN_IF_TERMINATED(rv);
}
// if we are done bail out
if (s2End == end) {
if (s2End == len) {
return HWLM_SUCCESS;
}
DEBUG_PRINTF("stage 3: %zu -> %zu\n", s2End, end);
rv = scanSingleUnaligned(buf, len, s3Start, key, noCase, caseMask, mask1,
cbi, s2End, end);
DEBUG_PRINTF("stage 3: %zu -> %zu\n", s2End, len);
rv = scanSingleUnaligned(n, buf, len, s3Start, noCase, caseMask, mask1, cbi,
s2End, len);
return rv;
#else // HAVE_AVX512
return scanSingle512(buf, len, key, noCase, caseMask, mask1, cbi);
return scanSingle512(n, buf, len, noCase, caseMask, mask1, cbi, offset,
end);
#endif
}
static really_inline
hwlm_error_t scanDoubleMain(const u8 *buf, size_t len, const u8 *key,
size_t keyLen, size_t keyOffset, bool noCase,
hwlm_error_t scanDoubleMain(const struct noodTable *n, const u8 *buf,
size_t len, size_t start, bool noCase,
const struct cb_info *cbi) {
// we stop scanning for the key-fragment when the rest of the key can't
// possibly fit in the remaining buffer
size_t end = len - keyLen + keyOffset + 2;
size_t end = len - n->key_offset + 2;
// the first place the key can match
size_t offset = start + n->msk_len - n->key_offset;
const MASK_TYPE caseMask = getCaseMask();
const MASK_TYPE mask1 = getMask(key[keyOffset + 0], noCase);
const MASK_TYPE mask2 = getMask(key[keyOffset + 1], noCase);
const MASK_TYPE mask1 = getMask(n->key0, noCase);
const MASK_TYPE mask2 = getMask(n->key1, noCase);
#if !defined(HAVE_AVX512)
hwlm_error_t rv;
if (end - keyOffset < CHUNKSIZE) {
rv = scanDoubleShort(buf, len, key, keyLen, keyOffset, noCase, caseMask,
mask1, mask2, cbi, keyOffset, end);
if (end - offset < CHUNKSIZE) {
rv = scanDoubleShort(n, buf, len, noCase, caseMask, mask1, mask2, cbi,
offset, end);
return rv;
}
if (end - keyOffset == CHUNKSIZE) {
rv = scanDoubleUnaligned(buf, len, keyOffset, key, keyLen, keyOffset,
noCase, caseMask, mask1, mask2, cbi, keyOffset,
end);
if (end - offset == CHUNKSIZE) {
rv = scanDoubleUnaligned(n, buf, len, offset, noCase, caseMask, mask1,
mask2, cbi, offset, end);
return rv;
}
uintptr_t data = (uintptr_t)buf;
uintptr_t s2Start = ROUNDUP_N(data + keyOffset, CHUNKSIZE) - data;
uintptr_t s2Start = ROUNDUP_N(data + offset, CHUNKSIZE) - data;
uintptr_t s1End = s2Start + 1;
uintptr_t last = data + end;
uintptr_t s2End = ROUNDDOWN_N(last, CHUNKSIZE) - data;
uintptr_t s3Start = end - CHUNKSIZE;
uintptr_t off = keyOffset;
uintptr_t off = offset;
if (s2Start != keyOffset) {
if (s2Start != off) {
// first scan out to the fast scan starting point plus one char past to
// catch the key on the overlap
DEBUG_PRINTF("stage 1: -> %zu\n", s2Start);
rv = scanDoubleUnaligned(buf, len, keyOffset, key, keyLen, keyOffset,
noCase, caseMask, mask1, mask2, cbi, off,
s1End);
DEBUG_PRINTF("stage 1: %zu -> %zu\n", off, s2Start);
rv = scanDoubleUnaligned(n, buf, len, offset, noCase, caseMask, mask1,
mask2, cbi, off, s1End);
RETURN_IF_TERMINATED(rv);
}
off = s1End;
@ -261,8 +273,8 @@ hwlm_error_t scanDoubleMain(const u8 *buf, size_t len, const u8 *key,
// scan as far as we can, bounded by the last point this key can
// possibly match
DEBUG_PRINTF("fast: ~ %zu -> %zu\n", s2Start, s3Start);
rv = scanDoubleFast(buf, len, key, keyLen, keyOffset, noCase, caseMask,
mask1, mask2, cbi, s2Start, s2End);
rv = scanDoubleFast(n, buf, len, noCase, caseMask, mask1, mask2, cbi,
s2Start, s2End);
RETURN_IF_TERMINATED(rv);
off = s2End;
}
@ -273,130 +285,158 @@ hwlm_error_t scanDoubleMain(const u8 *buf, size_t len, const u8 *key,
}
DEBUG_PRINTF("stage 3: %zu -> %zu\n", s3Start, end);
rv = scanDoubleUnaligned(buf, len, s3Start, key, keyLen, keyOffset, noCase,
caseMask, mask1, mask2, cbi, off, end);
rv = scanDoubleUnaligned(n, buf, len, s3Start, noCase, caseMask, mask1,
mask2, cbi, off, end);
return rv;
#else // AVX512
return scanDouble512(buf, len, key, keyLen, keyOffset, noCase, caseMask,
mask1, mask2, cbi, keyOffset, end);
return scanDouble512(n, buf, len, noCase, caseMask, mask1, mask2, cbi,
offset, end);
#endif // AVX512
}
static really_inline
hwlm_error_t scanSingleNoCase(const u8 *buf, size_t len, const u8 *key,
hwlm_error_t scanSingleNoCase(const struct noodTable *n, const u8 *buf,
size_t len, size_t start,
const struct cb_info *cbi) {
return scanSingleMain(buf, len, key, 1, cbi);
return scanSingleMain(n, buf, len, start, 1, cbi);
}
static really_inline
hwlm_error_t scanSingleCase(const u8 *buf, size_t len, const u8 *key,
hwlm_error_t scanSingleCase(const struct noodTable *n, const u8 *buf,
size_t len, size_t start,
const struct cb_info *cbi) {
return scanSingleMain(buf, len, key, 0, cbi);
return scanSingleMain(n, buf, len, start, 0, cbi);
}
// Single-character specialisation, used when keyLen = 1
static really_inline
hwlm_error_t scanSingle(const u8 *buf, size_t len, const u8 *key, bool noCase,
const struct cb_info *cbi) {
if (!ourisalpha(key[0])) {
hwlm_error_t scanSingle(const struct noodTable *n, const u8 *buf, size_t len,
size_t start, bool noCase, const struct cb_info *cbi) {
if (!ourisalpha(n->key0)) {
noCase = 0; // force noCase off if we don't have an alphabetic char
}
// kinda ugly, but this forces constant propagation
if (noCase) {
return scanSingleNoCase(buf, len, key, cbi);
return scanSingleNoCase(n, buf, len, start, cbi);
} else {
return scanSingleCase(buf, len, key, cbi);
return scanSingleCase(n, buf, len, start, cbi);
}
}
static really_inline
hwlm_error_t scanDoubleNoCase(const u8 *buf, size_t len, const u8 *key,
size_t keyLen, size_t keyOffset,
hwlm_error_t scanDoubleNoCase(const struct noodTable *n, const u8 *buf,
size_t len, size_t start,
const struct cb_info *cbi) {
return scanDoubleMain(buf, len, key, keyLen, keyOffset, 1, cbi);
return scanDoubleMain(n, buf, len, start, 1, cbi);
}
static really_inline
hwlm_error_t scanDoubleCase(const u8 *buf, size_t len, const u8 *key,
size_t keyLen, size_t keyOffset,
hwlm_error_t scanDoubleCase(const struct noodTable *n, const u8 *buf,
size_t len, size_t start,
const struct cb_info *cbi) {
return scanDoubleMain(buf, len, key, keyLen, keyOffset, 0, cbi);
return scanDoubleMain(n, buf, len, start, 0, cbi);
}
static really_inline
hwlm_error_t scanDouble(const u8 *buf, size_t len, const u8 *key, size_t keyLen,
size_t keyOffset, bool noCase,
const struct cb_info *cbi) {
hwlm_error_t scanDouble(const struct noodTable *n, const u8 *buf, size_t len,
size_t start, bool noCase, const struct cb_info *cbi) {
// kinda ugly, but this forces constant propagation
if (noCase) {
return scanDoubleNoCase(buf, len, key, keyLen, keyOffset, cbi);
return scanDoubleNoCase(n, buf, len, start, cbi);
} else {
return scanDoubleCase(buf, len, key, keyLen, keyOffset, cbi);
return scanDoubleCase(n, buf, len, start, cbi);
}
}
// main entry point for the scan code
static really_inline
hwlm_error_t scan(const u8 *buf, size_t len, const u8 *key, size_t keyLen,
size_t keyOffset, bool noCase, const struct cb_info *cbi) {
if (len < keyLen) {
hwlm_error_t scan(const struct noodTable *n, const u8 *buf, size_t len,
size_t start, char single, bool noCase,
const struct cb_info *cbi) {
if (len - start < n->msk_len) {
// can't find string of length keyLen in a shorter buffer
return HWLM_SUCCESS;
}
if (keyLen == 1) {
assert(keyOffset == 0);
return scanSingle(buf, len, key, noCase, cbi);
if (single) {
return scanSingle(n, buf, len, start, noCase, cbi);
} else {
return scanDouble(buf, len, key, keyLen, keyOffset, noCase, cbi);
return scanDouble(n, buf, len, start, noCase, cbi);
}
}
/** \brief Block-mode scanner. */
hwlm_error_t noodExec(const struct noodTable *n, const u8 *buf, size_t len,
size_t offset_adj, HWLMCallback cb, void *ctxt) {
size_t start, HWLMCallback cb,
struct hs_scratch *scratch) {
assert(n && buf);
struct cb_info cbi = { cb, n->id, ctxt, offset_adj };
DEBUG_PRINTF("nood scan of %zu bytes for %*s\n", len, n->len, n->str);
return scan(buf, len, n->str, n->len, n->key_offset, n->nocase, &cbi);
struct cb_info cbi = {cb, n->id, scratch, 0};
DEBUG_PRINTF("nood scan of %zu bytes for %*s @ %p\n", len, n->msk_len,
(const char *)&n->cmp, buf);
return scan(n, buf, len, start, n->single, n->nocase, &cbi);
}
/** \brief Streaming-mode scanner. */
hwlm_error_t noodExecStreaming(const struct noodTable *n, const u8 *hbuf,
size_t hlen, const u8 *buf, size_t len,
HWLMCallback cb, void *ctxt, u8 *temp_buf,
UNUSED size_t temp_buffer_size) {
HWLMCallback cb, struct hs_scratch *scratch) {
assert(n);
struct cb_info cbi = {cb, n->id, ctxt, 0};
hwlm_error_t rv;
if (len + hlen < n->msk_len) {
DEBUG_PRINTF("not enough bytes for a match\n");
return HWLM_SUCCESS;
}
if (hlen) {
struct cb_info cbi = {cb, n->id, scratch, 0};
DEBUG_PRINTF("nood scan of %zu bytes (%zu hlen) for %*s @ %p\n", len, hlen,
n->msk_len, (const char *)&n->cmp, buf);
if (hlen && n->msk_len > 1) {
/*
* we have history, so build up a buffer from enough of the history
* buffer plus what we've been given to scan. Since this is relatively
* short, just check against msk+cmp per byte offset for matches.
*/
assert(hbuf);
u8 ALIGN_DIRECTIVE temp_buf[HWLM_LITERAL_MAX_LEN * 2];
memset(temp_buf, 0, sizeof(temp_buf));
size_t tl1 = MIN(n->len - 1, hlen);
size_t tl2 = MIN(n->len - 1, len);
size_t temp_len = tl1 + tl2;
assert(temp_len < temp_buffer_size);
memcpy(temp_buf, hbuf + hlen - tl1, tl1);
memcpy(temp_buf + tl1, buf, tl2);
assert(n->msk_len);
size_t tl1 = MIN((size_t)n->msk_len - 1, hlen);
size_t tl2 = MIN((size_t)n->msk_len - 1, len);
cbi.offsetAdj = -tl1;
rv = scan(temp_buf, temp_len, n->str, n->len, n->key_offset, n->nocase,
&cbi);
if (rv == HWLM_TERMINATED) {
return HWLM_TERMINATED;
assert(tl1 + tl2 <= sizeof(temp_buf));
assert(tl1 + tl2 >= n->msk_len);
assert(tl1 <= sizeof(u64a));
assert(tl2 <= sizeof(u64a));
DEBUG_PRINTF("using %zu bytes of hist and %zu bytes of buf\n", tl1, tl2);
unaligned_store_u64a(temp_buf,
partial_load_u64a(hbuf + hlen - tl1, tl1));
unaligned_store_u64a(temp_buf + tl1, partial_load_u64a(buf, tl2));
for (size_t i = 0; i <= tl1 + tl2 - n->msk_len; i++) {
u64a v = unaligned_load_u64a(temp_buf + i);
if ((v & n->msk) == n->cmp) {
size_t m_end = -tl1 + i + n->msk_len - 1;
DEBUG_PRINTF("match @ %zu (i %zu)\n", m_end, i);
hwlmcb_rv_t rv = cb(m_end, n->id, scratch);
if (rv == HWLM_TERMINATE_MATCHING) {
return HWLM_TERMINATED;
}
}
}
}
assert(buf);
cbi.offsetAdj = 0;
return scan(buf, len, n->str, n->len, n->key_offset, n->nocase, &cbi);
return scan(n, buf, len, 0, n->single, n->nocase, &cbi);
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -41,16 +41,17 @@ extern "C"
#endif
struct noodTable;
struct hs_scratch;
/** \brief Block-mode scanner. */
hwlm_error_t noodExec(const struct noodTable *n, const u8 *buf, size_t len,
size_t offset_adj, HWLMCallback cb, void *ctxt);
size_t start, HWLMCallback cb,
struct hs_scratch *scratch);
/** \brief Streaming-mode scanner. */
hwlm_error_t noodExecStreaming(const struct noodTable *n, const u8 *hbuf,
size_t hlen, const u8 *buf, size_t len,
HWLMCallback cb, void *ctxt, u8 *temp_buf,
size_t temp_buffer_size);
HWLMCallback cb, struct hs_scratch *scratch);
#ifdef __cplusplus
} /* extern "C" */

View File

@ -38,10 +38,11 @@ static really_inline m256 getCaseMask(void) {
}
static really_inline
hwlm_error_t scanSingleUnaligned(const u8 *buf, size_t len, size_t offset,
const u8 *key, bool noCase, m256 caseMask,
m256 mask1, const struct cb_info *cbi,
size_t start, size_t end) {
hwlm_error_t scanSingleUnaligned(const struct noodTable *n, const u8 *buf,
size_t len, size_t offset, bool noCase,
m256 caseMask, m256 mask1,
const struct cb_info *cbi, size_t start,
size_t end) {
const u8 *d = buf + offset;
DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset);
const size_t l = end - start;
@ -66,11 +67,11 @@ hwlm_error_t scanSingleUnaligned(const u8 *buf, size_t len, size_t offset,
}
static really_inline
hwlm_error_t scanDoubleUnaligned(const u8 *buf, size_t len, size_t offset,
const u8 *key, size_t keyLen, size_t keyOffset,
bool noCase, m256 caseMask, m256 mask1,
m256 mask2, const struct cb_info *cbi,
size_t start, size_t end) {
hwlm_error_t scanDoubleUnaligned(const struct noodTable *n, const u8 *buf,
size_t len, size_t offset, bool noCase,
m256 caseMask, m256 mask1, m256 mask2,
const struct cb_info *cbi, size_t start,
size_t end) {
const u8 *d = buf + offset;
DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset);
size_t l = end - start;
@ -100,8 +101,8 @@ hwlm_error_t scanDoubleUnaligned(const u8 *buf, size_t len, size_t offset,
// alignment boundary if needed and to finish off data that the aligned scan
// function can't handle (due to small/unaligned chunk at end)
static really_inline
hwlm_error_t scanSingleShort(const u8 *buf, size_t len, const u8 *key,
bool noCase, m256 caseMask, m256 mask1,
hwlm_error_t scanSingleShort(const struct noodTable *n, const u8 *buf,
size_t len, bool noCase, m256 caseMask, m256 mask1,
const struct cb_info *cbi, size_t start,
size_t end) {
const u8 *d = buf + start;
@ -140,11 +141,10 @@ hwlm_error_t scanSingleShort(const u8 *buf, size_t len, const u8 *key,
}
static really_inline
hwlm_error_t scanDoubleShort(const u8 *buf, size_t len, const u8 *key,
size_t keyLen, size_t keyOffset, bool noCase,
m256 caseMask, m256 mask1, m256 mask2,
const struct cb_info *cbi, size_t start,
size_t end) {
hwlm_error_t scanDoubleShort(const struct noodTable *n, const u8 *buf,
size_t len, bool noCase, m256 caseMask, m256 mask1,
m256 mask2, const struct cb_info *cbi,
size_t start, size_t end) {
const u8 *d = buf + start;
size_t l = end - start;
if (!l) {
@ -182,8 +182,8 @@ hwlm_error_t scanDoubleShort(const u8 *buf, size_t len, const u8 *key,
}
static really_inline
hwlm_error_t scanSingleFast(const u8 *buf, size_t len, const u8 *key,
bool noCase, m256 caseMask, m256 mask1,
hwlm_error_t scanSingleFast(const struct noodTable *n, const u8 *buf,
size_t len, bool noCase, m256 caseMask, m256 mask1,
const struct cb_info *cbi, size_t start,
size_t end) {
const u8 *d = buf + start, *e = buf + end;
@ -203,10 +203,9 @@ hwlm_error_t scanSingleFast(const u8 *buf, size_t len, const u8 *key,
}
static really_inline
hwlm_error_t scanDoubleFast(const u8 *buf, size_t len, const u8 *key,
size_t keyLen, size_t keyOffset, bool noCase,
m256 caseMask, m256 mask1, m256 mask2,
const struct cb_info *cbi, size_t start,
hwlm_error_t scanDoubleFast(const struct noodTable *n, const u8 *buf,
size_t len, bool noCase, m256 caseMask, m256 mask1,
m256 mask2, const struct cb_info *cbi, size_t start,
size_t end) {
const u8 *d = buf + start, *e = buf + end;
DEBUG_PRINTF("start %zu end %zu \n", start, end);

View File

@ -43,8 +43,8 @@ m512 getCaseMask(void) {
// alignment boundary if needed and to finish off data that the aligned scan
// function can't handle (due to small/unaligned chunk at end)
static really_inline
hwlm_error_t scanSingleShort(const u8 *buf, size_t len, const u8 *key,
bool noCase, m512 caseMask, m512 mask1,
hwlm_error_t scanSingleShort(const struct noodTable *n, const u8 *buf,
size_t len, bool noCase, m512 caseMask, m512 mask1,
const struct cb_info *cbi, size_t start,
size_t end) {
const u8 *d = buf + start;
@ -73,11 +73,12 @@ hwlm_error_t scanSingleShort(const u8 *buf, size_t len, const u8 *key,
}
static really_inline
hwlm_error_t scanSingle512(const u8 *buf, size_t len, const u8 *key,
hwlm_error_t scanSingle512(const struct noodTable *n, const u8 *buf, size_t len,
bool noCase, m512 caseMask, m512 mask1,
const struct cb_info *cbi) {
const u8 *d = buf;
const u8 *e = buf + len;
const struct cb_info *cbi, size_t start,
size_t end) {
const u8 *d = buf + start;
const u8 *e = buf + end;
DEBUG_PRINTF("start %p end %p \n", d, e);
assert(d < e);
if (d + 64 >= e) {
@ -86,8 +87,8 @@ hwlm_error_t scanSingle512(const u8 *buf, size_t len, const u8 *key,
// peel off first part to cacheline boundary
const u8 *d1 = ROUNDUP_PTR(d, 64);
if (scanSingleShort(buf, len, key, noCase, caseMask, mask1, cbi, 0,
d1 - d) == HWLM_TERMINATED) {
if (scanSingleShort(n, buf, len, noCase, caseMask, mask1, cbi, start,
d1 - buf) == HWLM_TERMINATED) {
return HWLM_TERMINATED;
}
d = d1;
@ -106,16 +107,15 @@ tail:
DEBUG_PRINTF("d %p e %p \n", d, e);
// finish off tail
return scanSingleShort(buf, len, key, noCase, caseMask, mask1, cbi, d - buf,
return scanSingleShort(n, buf, len, noCase, caseMask, mask1, cbi, d - buf,
e - buf);
}
static really_inline
hwlm_error_t scanDoubleShort(const u8 *buf, size_t len, const u8 *key,
size_t keyLen, size_t keyOffset, bool noCase,
m512 caseMask, m512 mask1, m512 mask2,
const struct cb_info *cbi, u64a *lastz0,
size_t start, size_t end) {
hwlm_error_t scanDoubleShort(const struct noodTable *n, const u8 *buf,
size_t len, bool noCase, m512 caseMask, m512 mask1,
m512 mask2, const struct cb_info *cbi,
u64a *lastz0, size_t start, size_t end) {
DEBUG_PRINTF("start %zu end %zu last 0x%016llx\n", start, end, *lastz0);
const u8 *d = buf + start;
ptrdiff_t scan_len = end - start;
@ -142,9 +142,8 @@ hwlm_error_t scanDoubleShort(const u8 *buf, size_t len, const u8 *key,
}
static really_inline
hwlm_error_t scanDouble512(const u8 *buf, size_t len, const u8 *key,
size_t keyLen, size_t keyOffset, bool noCase,
m512 caseMask, m512 mask1, m512 mask2,
hwlm_error_t scanDouble512(const struct noodTable *n, const u8 *buf, size_t len,
bool noCase, m512 caseMask, m512 mask1, m512 mask2,
const struct cb_info *cbi, size_t start,
size_t end) {
const u8 *d = buf + start;
@ -158,9 +157,8 @@ hwlm_error_t scanDouble512(const u8 *buf, size_t len, const u8 *key,
// peel off first part to cacheline boundary
const u8 *d1 = ROUNDUP_PTR(d, 64);
if (scanDoubleShort(buf, len, key, keyLen, keyOffset, noCase, caseMask,
mask1, mask2, cbi, &lastz0, start,
d1 - buf) == HWLM_TERMINATED) {
if (scanDoubleShort(n, buf, len, noCase, caseMask, mask1, mask2, cbi,
&lastz0, start, d1 - buf) == HWLM_TERMINATED) {
return HWLM_TERMINATED;
}
d = d1;
@ -188,6 +186,6 @@ tail:
DEBUG_PRINTF("d %p e %p off %zu \n", d, e, d - buf);
// finish off tail
return scanDoubleShort(buf, len, key, keyLen, keyOffset, noCase, caseMask,
mask1, mask2, cbi, &lastz0, d - buf, end);
return scanDoubleShort(n, buf, len, noCase, caseMask, mask1, mask2, cbi,
&lastz0, d - buf, end);
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -38,8 +38,8 @@ static really_inline m128 getCaseMask(void) {
}
static really_inline
hwlm_error_t scanSingleShort(const u8 *buf, size_t len, const u8 *key,
bool noCase, m128 caseMask, m128 mask1,
hwlm_error_t scanSingleShort(const struct noodTable *n, const u8 *buf,
size_t len, bool noCase, m128 caseMask, m128 mask1,
const struct cb_info *cbi, size_t start,
size_t end) {
const u8 *d = buf + start;
@ -67,10 +67,11 @@ hwlm_error_t scanSingleShort(const u8 *buf, size_t len, const u8 *key,
}
static really_inline
hwlm_error_t scanSingleUnaligned(const u8 *buf, size_t len, size_t offset,
const u8 *key, bool noCase, m128 caseMask,
m128 mask1, const struct cb_info *cbi,
size_t start, size_t end) {
hwlm_error_t scanSingleUnaligned(const struct noodTable *n, const u8 *buf,
size_t len, size_t offset, bool noCase,
m128 caseMask, m128 mask1,
const struct cb_info *cbi, size_t start,
size_t end) {
const u8 *d = buf + offset;
DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset);
const size_t l = end - start;
@ -96,11 +97,10 @@ hwlm_error_t scanSingleUnaligned(const u8 *buf, size_t len, size_t offset,
}
static really_inline
hwlm_error_t scanDoubleShort(const u8 *buf, size_t len, const u8 *key,
size_t keyLen, size_t keyOffset, bool noCase,
m128 caseMask, m128 mask1, m128 mask2,
const struct cb_info *cbi, size_t start,
size_t end) {
hwlm_error_t scanDoubleShort(const struct noodTable *n, const u8 *buf,
size_t len, bool noCase, m128 caseMask, m128 mask1,
m128 mask2, const struct cb_info *cbi,
size_t start, size_t end) {
const u8 *d = buf + start;
size_t l = end - start;
if (!l) {
@ -128,11 +128,11 @@ hwlm_error_t scanDoubleShort(const u8 *buf, size_t len, const u8 *key,
}
static really_inline
hwlm_error_t scanDoubleUnaligned(const u8 *buf, size_t len, size_t offset,
const u8 *key, size_t keyLen, size_t keyOffset,
bool noCase, m128 caseMask, m128 mask1,
m128 mask2, const struct cb_info *cbi,
size_t start, size_t end) {
hwlm_error_t scanDoubleUnaligned(const struct noodTable *n, const u8 *buf,
size_t len, size_t offset, bool noCase,
m128 caseMask, m128 mask1, m128 mask2,
const struct cb_info *cbi, size_t start,
size_t end) {
const u8 *d = buf + offset;
DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset);
size_t l = end - start;
@ -158,8 +158,8 @@ hwlm_error_t scanDoubleUnaligned(const u8 *buf, size_t len, size_t offset,
}
static really_inline
hwlm_error_t scanSingleFast(const u8 *buf, size_t len, const u8 *key,
bool noCase, m128 caseMask, m128 mask1,
hwlm_error_t scanSingleFast(const struct noodTable *n, const u8 *buf,
size_t len, bool noCase, m128 caseMask, m128 mask1,
const struct cb_info *cbi, size_t start,
size_t end) {
const u8 *d = buf + start, *e = buf + end;
@ -179,10 +179,9 @@ hwlm_error_t scanSingleFast(const u8 *buf, size_t len, const u8 *key,
}
static really_inline
hwlm_error_t scanDoubleFast(const u8 *buf, size_t len, const u8 *key,
size_t keyLen, size_t keyOffset, bool noCase,
m128 caseMask, m128 mask1, m128 mask2,
const struct cb_info *cbi, size_t start,
hwlm_error_t scanDoubleFast(const struct noodTable *n, const u8 *buf,
size_t len, bool noCase, m128 caseMask, m128 mask1,
m128 mask2, const struct cb_info *cbi, size_t start,
size_t end) {
const u8 *d = buf + start, *e = buf + end;
assert(d < e);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -30,18 +30,22 @@
* \brief Data structures for Noodle literal matcher engine.
*/
#ifndef NOODLE_INTERNAL_H_25D751C42E34A6
#define NOODLE_INTERNAL_H_25D751C42E34A6
#ifndef NOODLE_INTERNAL_H
#define NOODLE_INTERNAL_H
#include "ue2common.h"
struct noodTable {
u32 id;
u32 len;
u32 key_offset;
u8 nocase;
u8 str[];
u64a msk;
u64a cmp;
u8 msk_len;
u8 key_offset;
u8 nocase;
u8 single;
u8 key0;
u8 key1;
};
#endif /* NOODLE_INTERNAL_H_25D751C42E34A6 */
#endif /* NOODLE_INTERNAL_H */

View File

@ -41,6 +41,8 @@
#include "util/verify_types.h"
#include <sstream>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#define PATHS_LIMIT 500
@ -65,6 +67,17 @@ void dump_paths(const Container &paths) {
DEBUG_PRINTF("%zu paths\n", paths.size());
}
static
vector<CharReach> reverse_alpha_remapping(const raw_dfa &rdfa) {
vector<CharReach> rv(rdfa.alpha_size - 1); /* TOP not required */
for (u32 i = 0; i < N_CHARS; i++) {
rv.at(rdfa.alpha_remap[i]).set(i);
}
return rv;
}
static
bool is_useful_path(const vector<path> &good, const path &p) {
for (const auto &g : good) {
@ -98,9 +111,10 @@ path append(const path &orig, const CharReach &cr, u32 new_dest) {
}
static
void extend(const raw_dfa &rdfa, const path &p,
map<u32, vector<path>> &all, vector<path> &out) {
dstate s = rdfa.states[p.dest];
void extend(const raw_dfa &rdfa, const vector<CharReach> &rev_map,
const path &p, unordered_map<u32, vector<path>> &all,
vector<path> &out) {
const dstate &s = rdfa.states[p.dest];
if (!p.reach.empty() && p.reach.back().none()) {
out.push_back(p);
@ -125,9 +139,9 @@ void extend(const raw_dfa &rdfa, const path &p,
}
flat_map<u32, CharReach> dest;
for (unsigned i = 0; i < N_CHARS; i++) {
u32 succ = s.next[rdfa.alpha_remap[i]];
dest[succ].set(i);
for (u32 i = 0; i < rev_map.size(); i++) {
u32 succ = s.next[i];
dest[succ] |= rev_map[i];
}
for (const auto &e : dest) {
@ -148,13 +162,14 @@ void extend(const raw_dfa &rdfa, const path &p,
static
vector<vector<CharReach>> generate_paths(const raw_dfa &rdfa,
dstate_id_t base, u32 len) {
const vector<CharReach> rev_map = reverse_alpha_remapping(rdfa);
vector<path> paths{path(base)};
map<u32, vector<path>> all;
unordered_map<u32, vector<path>> all;
all[base].push_back(path(base));
for (u32 i = 0; i < len && paths.size() < PATHS_LIMIT; i++) {
vector<path> next_gen;
for (const auto &p : paths) {
extend(rdfa, p, all, next_gen);
extend(rdfa, rev_map, p, all, next_gen);
}
paths = move(next_gen);
@ -195,17 +210,6 @@ bool better(const AccelScheme &a, const AccelScheme &b) {
return a.cr.count() < b.cr.count();
}
static
vector<CharReach> reverse_alpha_remapping(const raw_dfa &rdfa) {
vector<CharReach> rv(rdfa.alpha_size - 1); /* TOP not required */
for (u32 i = 0; i < N_CHARS; i++) {
rv.at(rdfa.alpha_remap[i]).set(i);
}
return rv;
}
static
bool double_byte_ok(const AccelScheme &info) {
return !info.double_byte.empty() &&
@ -225,16 +229,16 @@ bool has_self_loop(dstate_id_t s, const raw_dfa &raw) {
}
static
vector<u16> find_nonexit_symbols(const raw_dfa &rdfa,
const CharReach &escape) {
set<u16> rv;
flat_set<u16> find_nonexit_symbols(const raw_dfa &rdfa,
const CharReach &escape) {
flat_set<u16> rv;
CharReach nonexit = ~escape;
for (auto i = nonexit.find_first(); i != CharReach::npos;
for (auto i = nonexit.find_first(); i != nonexit.npos;
i = nonexit.find_next(i)) {
rv.insert(rdfa.alpha_remap[i]);
}
return vector<u16>(rv.begin(), rv.end());
return rv;
}
static
@ -254,7 +258,7 @@ dstate_id_t get_sds_or_proxy(const raw_dfa &raw) {
u16 top_remap = raw.alpha_remap[TOP];
ue2::unordered_set<dstate_id_t> seen;
std::unordered_set<dstate_id_t> seen;
while (true) {
seen.insert(s);
DEBUG_PRINTF("basis %hu\n", s);
@ -288,7 +292,7 @@ dstate_id_t get_sds_or_proxy(const raw_dfa &raw) {
static
set<dstate_id_t> find_region(const raw_dfa &rdfa, dstate_id_t base,
const AccelScheme &ei) {
const AccelScheme &ei) {
DEBUG_PRINTF("looking for region around %hu\n", base);
set<dstate_id_t> region = {base};

View File

@ -44,6 +44,8 @@
#include "util/simd_types.h"
#include <cstdio>
#include <map>
#include <set>
#include <vector>
#ifndef DUMP_SUPPORT

View File

@ -31,7 +31,7 @@
#include "ue2common.h"
#include "util/charreach.h"
#include "util/ue2_containers.h"
#include "util/flat_containers.h"
union AccelAux;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -71,7 +71,7 @@ void dumpTextSubCastle(const SubCastle &sub, FILE *f) {
void nfaExecCastle_dump(const struct NFA *nfa, const string &base) {
const Castle *c = (const Castle *)getImplNfa(nfa);
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
StdioFile f(base + ".txt", "w");
fprintf(f, "Castle multi-tenant repeat engine\n");
fprintf(f, "\n");
@ -117,7 +117,6 @@ void nfaExecCastle_dump(const struct NFA *nfa, const string &base) {
fprintf(f, "Sub %u:\n", i);
dumpTextSubCastle(sub[i], f);
}
fclose(f);
}
} // namespace ue2

View File

@ -48,11 +48,11 @@
#include "util/compile_context.h"
#include "util/container.h"
#include "util/dump_charclass.h"
#include "util/flat_containers.h"
#include "util/graph.h"
#include "util/make_unique.h"
#include "util/multibit_build.h"
#include "util/report_manager.h"
#include "util/ue2_containers.h"
#include "util/verify_types.h"
#include "grey.h"
@ -153,13 +153,11 @@ static
void getNeighborInfo(const CliqueGraph &g, vector<u32> &neighbor,
const CliqueVertex &cv, const set<u32> &group) {
u32 id = g[cv].stateId;
ue2::unordered_set<u32> neighborId;
// find neighbors for cv
for (const auto &v : adjacent_vertices_range(cv, g)) {
if (g[v].stateId != id && contains(group, g[v].stateId)){
if (g[v].stateId != id && contains(group, g[v].stateId)) {
neighbor.push_back(g[v].stateId);
neighborId.insert(g[v].stateId);
DEBUG_PRINTF("Neighbor:%u\n", g[v].stateId);
}
}
@ -772,7 +770,7 @@ bool mergeCastle(CastleProto &c1, const CastleProto &c2,
const u32 top = m.first;
const PureRepeat &pr = m.second;
DEBUG_PRINTF("top %u\n", top);
u32 new_top = c1.add(pr);
u32 new_top = c1.merge(pr);
top_map[top] = new_top;
DEBUG_PRINTF("adding repeat: map %u->%u\n", top, new_top);
}
@ -883,7 +881,7 @@ bool is_equal(const CastleProto &c1, const CastleProto &c2) {
}
bool requiresDedupe(const CastleProto &proto,
const ue2::flat_set<ReportID> &reports) {
const flat_set<ReportID> &reports) {
for (const auto &report : reports) {
auto it = proto.report_map.find(report);
if (it == end(proto.report_map)) {

View File

@ -39,11 +39,12 @@
#include "nfagraph/ng_repeat.h"
#include "util/bytecode_ptr.h"
#include "util/depth.h"
#include "util/ue2_containers.h"
#include "util/flat_containers.h"
#include <map>
#include <memory>
#include <set>
#include <unordered_map>
#include <vector>
struct NFA;
@ -89,7 +90,7 @@ struct CastleProto {
std::map<u32, PureRepeat> repeats;
/** \brief Mapping from report to associated tops. */
ue2::unordered_map<ReportID, flat_set<u32>> report_map;
std::unordered_map<ReportID, flat_set<u32>> report_map;
/**
* \brief Next top id to use. Repeats may be removed without top remapping,
@ -127,7 +128,9 @@ buildCastle(const CastleProto &proto,
const CompileContext &cc, const ReportManager &rm);
/**
* \brief Merge two CastleProto prototypes together, if possible.
* \brief Merge two CastleProto prototypes together, if possible. If a
* particular repeat from c2 is already in c1, then it will be reused rather
* than adding a duplicate repeat.
*
* Returns true if merge of all repeats in c2 into c1 succeeds, and fills
* mapping with the repeat indices.
@ -155,7 +158,7 @@ bool is_equal(const CastleProto &c1, const CastleProto &c2);
* of the reports in the given set.
*/
bool requiresDedupe(const CastleProto &proto,
const ue2::flat_set<ReportID> &reports);
const flat_set<ReportID> &reports);
/**
* \brief Build an NGHolder from a CastleProto.

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -30,11 +30,9 @@
namespace ue2 {
// prevent weak vtables for raw_report_info, dfa_build_strat and raw_dfa
// prevent weak vtables for raw_report_info, dfa_build_strat
raw_report_info::~raw_report_info() {}
dfa_build_strat::~dfa_build_strat() {}
raw_dfa::~raw_dfa() {}
} // namespace ue2

View File

@ -59,12 +59,13 @@
#include "dfa_min.h"
#include "grey.h"
#include "mcclellancompile_util.h"
#include "rdfa.h"
#include "ue2common.h"
#include "util/container.h"
#include "util/flat_containers.h"
#include "util/noncopyable.h"
#include "util/partitioned_set.h"
#include "util/ue2_containers.h"
#include <algorithm>
#include <functional>
@ -299,6 +300,10 @@ void minimize_hopcroft(raw_dfa &rdfa, const Grey &grey) {
return;
}
if (is_dead(rdfa)) {
DEBUG_PRINTF("dfa is empty\n");
}
UNUSED const size_t states_before = rdfa.states.size();
HopcroftInfo info(rdfa);

View File

@ -37,11 +37,11 @@
#include "nfa_internal.h"
#include "util/compile_context.h"
#include "util/container.h"
#include "util/flat_containers.h"
#include "util/graph_range.h"
#include "util/make_unique.h"
#include "util/order_check.h"
#include "util/report_manager.h"
#include "util/ue2_containers.h"
#include "util/verify_types.h"
#include "ue2common.h"

View File

@ -33,7 +33,7 @@
#include "nfa_kind.h"
#include "ue2common.h"
#include "util/bytecode_ptr.h"
#include "util/ue2_containers.h"
#include "util/flat_containers.h"
#include "util/order_check.h"
#include <map>

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -32,8 +32,10 @@
#include "goughcompile_internal.h"
#include "grey.h"
#include "util/container.h"
#include "util/dump_util.h"
#include "util/graph_range.h"
#include <sstream>
#include <string>
#ifndef DUMP_SUPPORT
@ -66,10 +68,7 @@ string dump_name(const gough_edge_id &e) {
static
void dump_graph(const GoughGraph &g, const string &base, const Grey &grey) {
stringstream ss;
ss << grey.dumpPath << "gough_" << base << ".dot";
FILE *f = fopen(ss.str().c_str(), "w");
StdioFile f(grey.dumpPath + "gough_" + base + ".dot", "w");
fprintf(f, "digraph NFA {\n");
fprintf(f, "rankdir=LR;\n");
@ -94,8 +93,6 @@ void dump_graph(const GoughGraph &g, const string &base, const Grey &grey) {
dump_name(g[s]).c_str(), dump_name(g[t]).c_str());
}
fprintf(f, "}\n");
fclose(f);
}
static
@ -133,9 +130,7 @@ set<const GoughSSAVar *> uses(const GoughEdgeProps &ep) {
static
void dump_var_mapping(const GoughGraph &g, const string &base,
const Grey &grey) {
stringstream ss;
ss << grey.dumpPath << "gough_" << base << "_vars.txt";
FILE *f = fopen(ss.str().c_str(), "w");
StdioFile f(grey.dumpPath + "gough_" + base + "_vars.txt", "w");
for (auto v : vertices_range(g)) {
set<const GoughSSAVar *> used = uses(g[v]);
if (g[v].vars.empty() && used.empty()) {
@ -180,7 +175,6 @@ void dump_var_mapping(const GoughGraph &g, const string &base,
fprintf(f, "\n");
}
}
fclose(f);
}
static
@ -220,12 +214,7 @@ void gather_vars(const GoughGraph &g, vector<const GoughSSAVar *> *vars,
static
void dump_vars(const GoughGraph &g, const string &base, const Grey &grey) {
FILE *f;
{
stringstream ss;
ss << grey.dumpPath << "gough_" << base << "_vars.dot";
f = fopen(ss.str().c_str(), "w");
}
StdioFile f(grey.dumpPath + "gough_" + base + "_vars.dot", "w");
fprintf(f, "digraph NFA {\n");
fprintf(f, "rankdir=LR;\n");
fprintf(f, "size=\"11.5,8\"\n");
@ -271,7 +260,6 @@ void dump_vars(const GoughGraph &g, const string &base, const Grey &grey) {
}
fprintf(f, "}\n");
fclose(f);
}
void dump(const GoughGraph &g, const string &base, const Grey &grey) {
@ -317,18 +305,11 @@ void dump_blocks(const map<gough_edge_id, vector<gough_ins>> &blocks,
return;
}
FILE *f;
{
stringstream ss;
ss << grey.dumpPath << "gough_" << base << "_programs.txt";
f = fopen(ss.str().c_str(), "w");
}
StdioFile f(grey.dumpPath + "gough_" + base + "_programs.txt", "w");
for (const auto &m : blocks) {
dump_block(f, m.first, m.second);
}
fclose(f);
}
} // namespace ue2

View File

@ -33,9 +33,9 @@
#include "mcclellancompile.h"
#include "ue2common.h"
#include "util/charreach.h"
#include "util/flat_containers.h"
#include "util/noncopyable.h"
#include "util/order_check.h"
#include "util/ue2_containers.h"
#include <map>
#include <memory>
@ -106,10 +106,10 @@ struct GoughSSAVarJoin;
struct GoughSSAVar : noncopyable {
GoughSSAVar(void) : seen(false), slot(INVALID_SLOT) {}
virtual ~GoughSSAVar();
const ue2::flat_set<GoughSSAVar *> &get_inputs() const {
const flat_set<GoughSSAVar *> &get_inputs() const {
return inputs;
}
const ue2::flat_set<GoughSSAVarWithInputs *> &get_outputs() const {
const flat_set<GoughSSAVarWithInputs *> &get_outputs() const {
return outputs;
}
virtual void replace_input(GoughSSAVar *old_v, GoughSSAVar *new_v) = 0;
@ -127,8 +127,8 @@ struct GoughSSAVar : noncopyable {
clear_outputs();
}
protected:
ue2::flat_set<GoughSSAVar *> inputs;
ue2::flat_set<GoughSSAVarWithInputs *> outputs;
flat_set<GoughSSAVar *> inputs;
flat_set<GoughSSAVarWithInputs *> outputs;
friend struct GoughSSAVarWithInputs;
friend struct GoughSSAVarMin;
friend struct GoughSSAVarJoin;
@ -184,16 +184,14 @@ struct GoughSSAVarJoin : public GoughSSAVarWithInputs {
void add_input(GoughSSAVar *v, GoughEdge prev);
const ue2::flat_set<GoughEdge> &get_edges_for_input(GoughSSAVar *input)
const;
const std::map<GoughSSAVar *, ue2::flat_set<GoughEdge> > &get_input_map()
const;
const flat_set<GoughEdge> &get_edges_for_input(GoughSSAVar *input) const;
const std::map<GoughSSAVar *, flat_set<GoughEdge>> &get_input_map() const;
protected:
void remove_input_raw(GoughSSAVar *v) override;
private:
std::map<GoughSSAVar *, ue2::flat_set<GoughEdge>> input_map;
std::map<GoughSSAVar *, flat_set<GoughEdge>> input_map;
};
struct gough_accel_state_info {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -32,10 +32,10 @@
#include "gough_internal.h"
#include "grey.h"
#include "util/container.h"
#include "util/flat_containers.h"
#include "util/graph.h"
#include "util/graph_range.h"
#include "util/order_check.h"
#include "util/ue2_containers.h"
#include "ue2common.h"
@ -235,7 +235,7 @@ void handle_pending_vertices(GoughSSAVar *def, const GoughGraph &g,
if (contains(aux.containing_v, def)) {
def_v = aux.containing_v.at(def);
}
ue2::unordered_set<GoughVertex> done;
unordered_set<GoughVertex> done;
while (!pending_vertex.empty()) {
GoughVertex current = *pending_vertex.begin();
pending_vertex.erase(current);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -353,22 +353,14 @@ void nfaExecGough16_dumpText(const struct NFA *nfa, FILE *f) {
void nfaExecGough16_dump(const NFA *nfa, const string &base) {
assert(nfa->type == GOUGH_NFA_16);
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
nfaExecGough16_dumpText(nfa, f);
fclose(f);
f = fopen_or_throw((base + ".dot").c_str(), "w");
nfaExecGough16_dumpDot(nfa, f);
fclose(f);
nfaExecGough16_dumpText(nfa, StdioFile(base + ".txt", "w"));
nfaExecGough16_dumpDot(nfa, StdioFile(base + ".dot", "w"));
}
void nfaExecGough8_dump(const NFA *nfa, const string &base) {
assert(nfa->type == GOUGH_NFA_8);
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
nfaExecGough8_dumpText(nfa, f);
fclose(f);
f = fopen_or_throw((base + ".dot").c_str(), "w");
nfaExecGough8_dumpDot(nfa, f);
fclose(f);
nfaExecGough8_dumpText(nfa, StdioFile(base + ".txt", "w"));
nfaExecGough8_dumpDot(nfa, StdioFile(base + ".dot", "w"));
}
} // namespace ue2

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -71,47 +71,40 @@ void nfaExecLbrDot_dump(const NFA *nfa, const string &base) {
assert(nfa);
assert(nfa->type == LBR_NFA_DOT);
const lbr_dot *ld = (const lbr_dot *)getImplNfa(nfa);
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
StdioFile f(base + ".txt", "w");
lbrDumpCommon(&ld->common, f);
fprintf(f, "DOT model\n");
fprintf(f, "\n");
dumpTextReverse(nfa, f);
fclose(f);
}
void nfaExecLbrVerm_dump(const NFA *nfa, const string &base) {
assert(nfa);
assert(nfa->type == LBR_NFA_VERM);
const lbr_verm *lv = (const lbr_verm *)getImplNfa(nfa);
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
StdioFile f(base + ".txt", "w");
lbrDumpCommon(&lv->common, f);
fprintf(f, "VERM model, scanning for 0x%02x\n", lv->c);
fprintf(f, "\n");
dumpTextReverse(nfa, f);
fclose(f);
}
void nfaExecLbrNVerm_dump(const NFA *nfa, const string &base) {
assert(nfa);
assert(nfa->type == LBR_NFA_NVERM);
const lbr_verm *lv = (const lbr_verm *)getImplNfa(nfa);
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
StdioFile f(base + ".txt", "w");
lbrDumpCommon(&lv->common, f);
fprintf(f, "NEGATED VERM model, scanning for 0x%02x\n", lv->c);
fprintf(f, "\n");
dumpTextReverse(nfa, f);
fclose(f);
}
void nfaExecLbrShuf_dump(const NFA *nfa, const string &base) {
assert(nfa);
assert(nfa->type == LBR_NFA_SHUF);
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
StdioFile f(base + ".txt", "w");
const lbr_shuf *ls = (const lbr_shuf *)getImplNfa(nfa);
lbrDumpCommon(&ls->common, f);
@ -122,14 +115,13 @@ void nfaExecLbrShuf_dump(const NFA *nfa, const string &base) {
describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count());
fprintf(f, "\n");
dumpTextReverse(nfa, f);
fclose(f);
}
void nfaExecLbrTruf_dump(const NFA *nfa, const string &base) {
assert(nfa);
assert(nfa->type == LBR_NFA_TRUF);
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
StdioFile f(base + ".txt", "w");
const lbr_truf *lt = (const lbr_truf *)getImplNfa(nfa);
lbrDumpCommon(&lt->common, f);
@ -140,7 +132,6 @@ void nfaExecLbrTruf_dump(const NFA *nfa, const string &base) {
describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count());
fprintf(f, "\n");
dumpTextReverse(nfa, f);
fclose(f);
}
} // namespace ue2

View File

@ -53,11 +53,13 @@
#include "util/charreach.h"
#include "util/compile_context.h"
#include "util/container.h"
#include "util/flat_containers.h"
#include "util/graph.h"
#include "util/graph_range.h"
#include "util/graph_small_color_map.h"
#include "util/order_check.h"
#include "util/unordered.h"
#include "util/verify_types.h"
#include "util/ue2_containers.h"
#include <algorithm>
#include <cassert>
@ -96,18 +98,20 @@ struct precalcAccel {
};
struct limex_accel_info {
ue2::unordered_set<NFAVertex> accelerable;
unordered_set<NFAVertex> accelerable;
map<NFAStateSet, precalcAccel> precalc;
ue2::unordered_map<NFAVertex, flat_set<NFAVertex>> friends;
ue2::unordered_map<NFAVertex, AccelScheme> accel_map;
unordered_map<NFAVertex, flat_set<NFAVertex>> friends;
unordered_map<NFAVertex, AccelScheme> accel_map;
};
static
map<NFAVertex, NFAStateSet>
reindexByStateId(const map<NFAVertex, NFAStateSet> &in, const NGHolder &g,
const ue2::unordered_map<NFAVertex, u32> &state_ids,
unordered_map<NFAVertex, NFAStateSet>
reindexByStateId(const unordered_map<NFAVertex, NFAStateSet> &in,
const NGHolder &g,
const unordered_map<NFAVertex, u32> &state_ids,
const u32 num_states) {
map<NFAVertex, NFAStateSet> out;
unordered_map<NFAVertex, NFAStateSet> out;
out.reserve(in.size());
vector<u32> indexToState(num_vertices(g), NO_STATE);
for (const auto &m : state_ids) {
@ -137,18 +141,20 @@ reindexByStateId(const map<NFAVertex, NFAStateSet> &in, const NGHolder &g,
struct build_info {
build_info(NGHolder &hi,
const ue2::unordered_map<NFAVertex, u32> &states_in,
const unordered_map<NFAVertex, u32> &states_in,
const vector<BoundedRepeatData> &ri,
const map<NFAVertex, NFAStateSet> &rsmi,
const map<NFAVertex, NFAStateSet> &smi,
const unordered_map<NFAVertex, NFAStateSet> &rsmi,
const unordered_map<NFAVertex, NFAStateSet> &smi,
const map<u32, set<NFAVertex>> &ti, const set<NFAVertex> &zi,
bool dai, bool sci, const CompileContext &cci,
u32 nsi)
: h(hi), state_ids(states_in), repeats(ri), tops(ti), zombies(zi),
do_accel(dai), stateCompression(sci), cc(cci),
bool dai, bool sci, const CompileContext &cci, u32 nsi)
: h(hi), state_ids(states_in), repeats(ri), tops(ti), tugs(nsi),
zombies(zi), do_accel(dai), stateCompression(sci), cc(cci),
num_states(nsi) {
for (const auto &br : repeats) {
insert(&tugs, br.tug_triggers);
for (auto v : br.tug_triggers) {
assert(state_ids.at(v) != NO_STATE);
tugs.set(state_ids.at(v));
}
br_cyclic[br.cyclic] =
BoundedRepeatSummary(br.repeatMin, br.repeatMax);
}
@ -160,15 +166,15 @@ struct build_info {
}
NGHolder &h;
const ue2::unordered_map<NFAVertex, u32> &state_ids;
const unordered_map<NFAVertex, u32> &state_ids;
const vector<BoundedRepeatData> &repeats;
// Squash maps; state sets are indexed by state_id.
map<NFAVertex, NFAStateSet> reportSquashMap;
map<NFAVertex, NFAStateSet> squashMap;
unordered_map<NFAVertex, NFAStateSet> reportSquashMap;
unordered_map<NFAVertex, NFAStateSet> squashMap;
const map<u32, set<NFAVertex>> &tops;
ue2::unordered_set<NFAVertex> tugs;
NFAStateSet tugs;
map<NFAVertex, BoundedRepeatSummary> br_cyclic;
const set<NFAVertex> &zombies;
bool do_accel;
@ -238,7 +244,7 @@ bool isLimitedTransition(int from, int to, int maxshift) {
// Fill a bit mask
template<class Mask>
void maskFill(Mask &m, char c) {
void maskFill(Mask &m, u8 c) {
memset(&m, c, sizeof(m));
}
@ -478,7 +484,7 @@ bool allow_wide_accel(const vector<NFAVertex> &vv, const NGHolder &g,
static
void nfaFindAccelSchemes(const NGHolder &g,
const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
ue2::unordered_map<NFAVertex, AccelScheme> *out) {
unordered_map<NFAVertex, AccelScheme> *out) {
vector<CharReach> refined_cr = reduced_cr(g, br_cyclic);
NFAVertex sds_or_proxy = get_sds_or_proxy(g);
@ -503,8 +509,8 @@ void nfaFindAccelSchemes(const NGHolder &g,
}
struct fas_visitor : public boost::default_bfs_visitor {
fas_visitor(const ue2::unordered_map<NFAVertex, AccelScheme> &am_in,
ue2::unordered_map<NFAVertex, AccelScheme> *out_in)
fas_visitor(const unordered_map<NFAVertex, AccelScheme> &am_in,
unordered_map<NFAVertex, AccelScheme> *out_in)
: accel_map(am_in), out(out_in) {}
void discover_vertex(NFAVertex v, const NGHolder &) {
@ -515,13 +521,13 @@ struct fas_visitor : public boost::default_bfs_visitor {
throw this; /* done */
}
}
const ue2::unordered_map<NFAVertex, AccelScheme> &accel_map;
ue2::unordered_map<NFAVertex, AccelScheme> *out;
const unordered_map<NFAVertex, AccelScheme> &accel_map;
unordered_map<NFAVertex, AccelScheme> *out;
};
static
void filterAccelStates(NGHolder &g, const map<u32, set<NFAVertex>> &tops,
ue2::unordered_map<NFAVertex, AccelScheme> *accel_map) {
unordered_map<NFAVertex, AccelScheme> *accel_map) {
/* We want the NFA_MAX_ACCEL_STATES best acceleration states, everything
* else should be ditched. We use a simple BFS to choose accel states near
* the start. */
@ -541,14 +547,12 @@ void filterAccelStates(NGHolder &g, const map<u32, set<NFAVertex>> &tops,
tempEdges.push_back(e); // Remove edge later.
}
ue2::unordered_map<NFAVertex, AccelScheme> out;
unordered_map<NFAVertex, AccelScheme> out;
try {
vector<boost::default_color_type> colour(num_vertices(g));
boost::breadth_first_search(g, g.start,
visitor(fas_visitor(*accel_map, &out))
.color_map(make_iterator_property_map(colour.begin(),
get(vertex_index, g))));
visitor(fas_visitor(*accel_map, &out))
.color_map(make_small_color_map(g)));
} catch (fas_visitor *) {
; /* found max accel_states */
}
@ -983,16 +987,18 @@ u32 addSquashMask(const build_info &args, const NFAVertex &v,
return idx;
}
using ReportListCache = ue2_unordered_map<vector<ReportID>, u32>;
static
u32 addReports(const flat_set<ReportID> &r, vector<ReportID> &reports,
unordered_map<vector<ReportID>, u32> &reportListCache) {
ReportListCache &reports_cache) {
assert(!r.empty());
vector<ReportID> my_reports(begin(r), end(r));
my_reports.push_back(MO_INVALID_IDX); // sentinel
auto cache_it = reportListCache.find(my_reports);
if (cache_it != end(reportListCache)) {
auto cache_it = reports_cache.find(my_reports);
if (cache_it != end(reports_cache)) {
u32 offset = cache_it->second;
DEBUG_PRINTF("reusing cached report list at %u\n", offset);
return offset;
@ -1008,13 +1014,12 @@ u32 addReports(const flat_set<ReportID> &r, vector<ReportID> &reports,
u32 offset = verify_u32(reports.size());
insert(&reports, reports.end(), my_reports);
reportListCache.emplace(move(my_reports), offset);
reports_cache.emplace(move(my_reports), offset);
return offset;
}
static
void buildAcceptsList(const build_info &args,
unordered_map<vector<ReportID>, u32> &reports_cache,
void buildAcceptsList(const build_info &args, ReportListCache &reports_cache,
vector<NFAVertex> &verts, vector<NFAAccept> &accepts,
vector<ReportID> &reports, vector<NFAStateSet> &squash) {
if (verts.empty()) {
@ -1052,8 +1057,7 @@ void buildAcceptsList(const build_info &args,
}
static
void buildAccepts(const build_info &args,
unordered_map<vector<ReportID>, u32> &reports_cache,
void buildAccepts(const build_info &args, ReportListCache &reports_cache,
NFAStateSet &acceptMask, NFAStateSet &acceptEodMask,
vector<NFAAccept> &accepts, vector<NFAAccept> &acceptsEod,
vector<ReportID> &reports, vector<NFAStateSet> &squash) {
@ -1120,7 +1124,7 @@ u32 uncompressedStateSize(u32 num_states) {
static
u32 compressedStateSize(const NGHolder &h, const NFAStateSet &maskedStates,
const ue2::unordered_map<NFAVertex, u32> &state_ids) {
const unordered_map<NFAVertex, u32> &state_ids) {
// Shrink state requirement to enough to fit the compressed largest reach.
vector<u32> allreach(N_CHARS, 0);
@ -1191,7 +1195,7 @@ bool hasSquashableInitDs(const build_info &args) {
static
bool hasInitDsStates(const NGHolder &h,
const ue2::unordered_map<NFAVertex, u32> &state_ids) {
const unordered_map<NFAVertex, u32> &state_ids) {
if (state_ids.at(h.startDs) != NO_STATE) {
return true;
}
@ -1359,17 +1363,16 @@ struct ExceptionProto {
};
static
u32 buildExceptionMap(const build_info &args,
unordered_map<vector<ReportID>, u32> &reports_cache,
const ue2::unordered_set<NFAEdge> &exceptional,
u32 buildExceptionMap(const build_info &args, ReportListCache &reports_cache,
const unordered_set<NFAEdge> &exceptional,
map<ExceptionProto, vector<u32>> &exceptionMap,
vector<ReportID> &reportList) {
const NGHolder &h = args.h;
const u32 num_states = args.num_states;
u32 exceptionCount = 0;
ue2::unordered_map<NFAVertex, u32> pos_trigger;
ue2::unordered_map<NFAVertex, u32> tug_trigger;
unordered_map<NFAVertex, u32> pos_trigger;
unordered_map<NFAVertex, u32> tug_trigger;
for (u32 i = 0; i < args.repeats.size(); i++) {
const BoundedRepeatData &br = args.repeats[i];
@ -1518,18 +1521,14 @@ u32 depth_to_u32(const depth &d) {
}
static
bool isExceptionalTransition(const NGHolder &h, const NFAEdge &e,
const build_info &args, u32 maxShift) {
NFAVertex from = source(e, h);
NFAVertex to = target(e, h);
u32 f = args.state_ids.at(from);
u32 t = args.state_ids.at(to);
if (!isLimitedTransition(f, t, maxShift)) {
bool isExceptionalTransition(u32 from, u32 to, const build_info &args,
u32 maxShift) {
if (!isLimitedTransition(from, to, maxShift)) {
return true;
}
// All transitions out of a tug trigger are exceptional.
if (contains(args.tugs, from)) {
if (args.tugs.test(from)) {
return true;
}
return false;
@ -1545,7 +1544,7 @@ u32 findMaxVarShift(const build_info &args, u32 nShifts) {
if (from == NO_STATE || to == NO_STATE) {
continue;
}
if (!isExceptionalTransition(h, e, args, MAX_SHIFT_AMOUNT)) {
if (!isExceptionalTransition(from, to, args, MAX_SHIFT_AMOUNT)) {
shiftMask |= (1UL << (to - from));
}
}
@ -1574,7 +1573,7 @@ int getLimexScore(const build_info &args, u32 nShifts) {
if (from == NO_STATE || to == NO_STATE) {
continue;
}
if (isExceptionalTransition(h, e, args, maxVarShift)) {
if (isExceptionalTransition(from, to, args, maxVarShift)) {
exceptionalStates.set(from);
}
}
@ -1615,9 +1614,7 @@ bool cannotDie(const build_info &args, const set<NFAVertex> &tops) {
// top, looking for a cyclic path consisting of vertices of dot reach. If
// one exists, than the NFA cannot die after this top is triggered.
vector<boost::default_color_type> colours(num_vertices(h));
auto colour_map = boost::make_iterator_property_map(colours.begin(),
get(vertex_index, h));
auto colour_map = make_small_color_map(h);
struct CycleFound {};
struct CannotDieVisitor : public boost::default_dfs_visitor {
@ -1848,10 +1845,9 @@ struct Factory {
maskSetBit(limex->repeatCyclicMask, cyclic);
}
/* also include tugs in repeat cyclic mask */
for (NFAVertex v : args.tugs) {
u32 v_state = args.state_ids.at(v);
assert(v_state != NO_STATE);
maskSetBit(limex->repeatCyclicMask, v_state);
for (size_t i = args.tugs.find_first(); i != args.tugs.npos;
i = args.tugs.find_next(i)) {
maskSetBit(limex->repeatCyclicMask, i);
}
}
@ -1872,7 +1868,7 @@ struct Factory {
// We check for exceptional transitions here, as we don't want tug
// trigger transitions emitted as limited transitions (even if they
// could be in this model).
if (!isExceptionalTransition(h, e, args, maxShift)) {
if (!isExceptionalTransition(from, to, args, maxShift)) {
u32 shift = to - from;
if ((shiftMask & (1UL << shift)) == 0UL) {
shiftMask |= (1UL << shift);
@ -1896,7 +1892,7 @@ struct Factory {
static
void findExceptionalTransitions(const build_info &args,
ue2::unordered_set<NFAEdge> &exceptional,
unordered_set<NFAEdge> &exceptional,
u32 maxShift) {
const NGHolder &h = args.h;
@ -1907,7 +1903,7 @@ struct Factory {
continue;
}
if (isExceptionalTransition(h, e, args, maxShift)) {
if (isExceptionalTransition(from, to, args, maxShift)) {
exceptional.insert(e);
}
}
@ -2171,9 +2167,9 @@ struct Factory {
// We track report lists that have already been written into the global
// list in case we can reuse them.
unordered_map<vector<ReportID>, u32> reports_cache;
ReportListCache reports_cache;
ue2::unordered_set<NFAEdge> exceptional;
unordered_set<NFAEdge> exceptional;
u32 shiftCount = findBestNumOfVarShifts(args);
assert(shiftCount);
u32 maxShift = findMaxVarShift(args, shiftCount);
@ -2377,10 +2373,10 @@ MAKE_LIMEX_TRAITS(512)
// Some sanity tests, called by an assertion in generate().
static UNUSED
bool isSane(const NGHolder &h, const map<u32, set<NFAVertex>> &tops,
const ue2::unordered_map<NFAVertex, u32> &state_ids,
const unordered_map<NFAVertex, u32> &state_ids,
u32 num_states) {
ue2::unordered_set<u32> seen;
ue2::unordered_set<NFAVertex> top_starts;
unordered_set<u32> seen;
unordered_set<NFAVertex> top_starts;
for (const auto &vv : tops | map_values) {
insert(&top_starts, vv);
}
@ -2427,7 +2423,7 @@ bool isSane(const NGHolder &h, const map<u32, set<NFAVertex>> &tops,
#endif // NDEBUG
static
u32 max_state(const ue2::unordered_map<NFAVertex, u32> &state_ids) {
u32 max_state(const unordered_map<NFAVertex, u32> &state_ids) {
u32 rv = 0;
for (const auto &m : state_ids) {
DEBUG_PRINTF("state %u\n", m.second);
@ -2440,14 +2436,14 @@ u32 max_state(const ue2::unordered_map<NFAVertex, u32> &state_ids) {
}
bytecode_ptr<NFA> generate(NGHolder &h,
const ue2::unordered_map<NFAVertex, u32> &states,
const vector<BoundedRepeatData> &repeats,
const map<NFAVertex, NFAStateSet> &reportSquashMap,
const map<NFAVertex, NFAStateSet> &squashMap,
const map<u32, set<NFAVertex>> &tops,
const set<NFAVertex> &zombies, bool do_accel,
bool stateCompression, u32 hint,
const CompileContext &cc) {
const unordered_map<NFAVertex, u32> &states,
const vector<BoundedRepeatData> &repeats,
const unordered_map<NFAVertex, NFAStateSet> &reportSquashMap,
const unordered_map<NFAVertex, NFAStateSet> &squashMap,
const map<u32, set<NFAVertex>> &tops,
const set<NFAVertex> &zombies, bool do_accel,
bool stateCompression, u32 hint,
const CompileContext &cc) {
const u32 num_states = max_state(states) + 1;
DEBUG_PRINTF("total states: %u\n", num_states);
@ -2510,13 +2506,13 @@ bytecode_ptr<NFA> generate(NGHolder &h,
}
u32 countAccelStates(NGHolder &h,
const ue2::unordered_map<NFAVertex, u32> &states,
const vector<BoundedRepeatData> &repeats,
const map<NFAVertex, NFAStateSet> &reportSquashMap,
const map<NFAVertex, NFAStateSet> &squashMap,
const map<u32, set<NFAVertex>> &tops,
const set<NFAVertex> &zombies,
const CompileContext &cc) {
const unordered_map<NFAVertex, u32> &states,
const vector<BoundedRepeatData> &repeats,
const unordered_map<NFAVertex, NFAStateSet> &reportSquashMap,
const unordered_map<NFAVertex, NFAStateSet> &squashMap,
const map<u32, set<NFAVertex>> &tops,
const set<NFAVertex> &zombies,
const CompileContext &cc) {
const u32 num_states = max_state(states) + 1;
DEBUG_PRINTF("total states: %u\n", num_states);

View File

@ -34,15 +34,16 @@
#ifndef LIMEX_COMPILE_H
#define LIMEX_COMPILE_H
#include <map>
#include <memory>
#include <vector>
#include "nfagraph/ng_holder.h"
#include "nfagraph/ng_squash.h" // for NFAStateSet
#include "ue2common.h"
#include "util/bytecode_ptr.h"
#include "util/ue2_containers.h"
#include <set>
#include <map>
#include <memory>
#include <unordered_map>
#include <vector>
struct NFA;
@ -69,16 +70,16 @@ struct CompileContext;
* graph.
*/
bytecode_ptr<NFA> generate(NGHolder &g,
const ue2::unordered_map<NFAVertex, u32> &states,
const std::vector<BoundedRepeatData> &repeats,
const std::map<NFAVertex, NFAStateSet> &reportSquashMap,
const std::map<NFAVertex, NFAStateSet> &squashMap,
const std::map<u32, std::set<NFAVertex>> &tops,
const std::set<NFAVertex> &zombies,
bool do_accel,
bool stateCompression,
u32 hint,
const CompileContext &cc);
const std::unordered_map<NFAVertex, u32> &states,
const std::vector<BoundedRepeatData> &repeats,
const std::unordered_map<NFAVertex, NFAStateSet> &reportSquashMap,
const std::unordered_map<NFAVertex, NFAStateSet> &squashMap,
const std::map<u32, std::set<NFAVertex>> &tops,
const std::set<NFAVertex> &zombies,
bool do_accel,
bool stateCompression,
u32 hint,
const CompileContext &cc);
/**
* \brief For a given graph, count the number of accelerable states it has.
@ -87,13 +88,13 @@ bytecode_ptr<NFA> generate(NGHolder &g,
* implementable.
*/
u32 countAccelStates(NGHolder &h,
const ue2::unordered_map<NFAVertex, u32> &states,
const std::vector<BoundedRepeatData> &repeats,
const std::map<NFAVertex, NFAStateSet> &reportSquashMap,
const std::map<NFAVertex, NFAStateSet> &squashMap,
const std::map<u32, std::set<NFAVertex>> &tops,
const std::set<NFAVertex> &zombies,
const CompileContext &cc);
const std::unordered_map<NFAVertex, u32> &states,
const std::vector<BoundedRepeatData> &repeats,
const std::unordered_map<NFAVertex, NFAStateSet> &reportSquashMap,
const std::unordered_map<NFAVertex, NFAStateSet> &squashMap,
const std::map<u32, std::set<NFAVertex>> &tops,
const std::set<NFAVertex> &zombies,
const CompileContext &cc);
} // namespace ue2

View File

@ -487,25 +487,24 @@ void dumpLimDotInfo(const limex_type *limex, u32 state, FILE *f) {
}
}
template<typename limex_type>
static
void dumpLimexDot(const NFA *nfa, const limex_type *limex, FILE *f) {
dumpDotPreamble(f);
u32 state_count = nfa->nPositions;
dumpVertexDotInfo(limex, state_count, f, limex_labeller<limex_type>(limex));
for (u32 i = 0; i < state_count; i++) {
dumpLimDotInfo(limex, i, f);
dumpExDotInfo(limex, i, f);
}
dumpDotTrailer(f);
}
#define LIMEX_DUMP_FN(size) \
void nfaExecLimEx##size##_dump(const NFA *nfa, const string &base) { \
auto limex = (const LimExNFA##size *)getImplNfa(nfa); \
\
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); \
dumpLimexText(limex, f); \
fclose(f); \
\
f = fopen_or_throw((base + ".dot").c_str(), "w"); \
dumpDotPreamble(f); \
u32 state_count = nfa->nPositions; \
dumpVertexDotInfo(limex, state_count, f, \
limex_labeller<LimExNFA##size>(limex)); \
for (u32 i = 0; i < state_count; i++) { \
dumpLimDotInfo(limex, i, f); \
dumpExDotInfo(limex, i, f); \
} \
dumpDotTrailer(f); \
fclose(f); \
dumpLimexText(limex, StdioFile(base + ".txt", "w")); \
dumpLimexDot(nfa, limex, StdioFile(base + ".dot", "w")); \
}
LIMEX_DUMP_FN(32)

View File

@ -46,7 +46,7 @@
#include "util/make_unique.h"
#include "util/order_check.h"
#include "util/report_manager.h"
#include "util/ue2_containers.h"
#include "util/flat_containers.h"
#include "util/unaligned.h"
#include "util/verify_types.h"
@ -288,11 +288,12 @@ unique_ptr<raw_report_info> mcclellan_build_strat::gatherReports(
raw_report_list rrl(s.reports, rm, remap_reports);
DEBUG_PRINTF("non empty r\n");
if (rev.find(rrl) != rev.end()) {
reports.push_back(rev[rrl]);
auto it = rev.find(rrl);
if (it != rev.end()) {
reports.push_back(it->second);
} else {
DEBUG_PRINTF("adding to rl %zu\n", ri->size());
rev[rrl] = ri->size();
rev.emplace(rrl, ri->size());
reports.push_back(ri->size());
ri->rl.push_back(rrl);
}
@ -306,13 +307,14 @@ unique_ptr<raw_report_info> mcclellan_build_strat::gatherReports(
DEBUG_PRINTF("non empty r eod\n");
raw_report_list rrl(s.reports_eod, rm, remap_reports);
if (rev.find(rrl) != rev.end()) {
reports_eod.push_back(rev[rrl]);
auto it = rev.find(rrl);
if (it != rev.end()) {
reports_eod.push_back(it->second);
continue;
}
DEBUG_PRINTF("adding to rl eod %zu\n", s.reports_eod.size());
rev[rrl] = ri->size();
rev.emplace(rrl, ri->size());
reports_eod.push_back(ri->size());
ri->rl.push_back(rrl);
}
@ -325,10 +327,9 @@ unique_ptr<raw_report_info> mcclellan_build_strat::gatherReports(
*arbReport = 0;
}
/* if we have only a single report id generated from all accepts (not eod)
* we can take some short cuts */
set<ReportID> reps;
flat_set<ReportID> reps;
for (u32 rl_index : reports) {
if (rl_index == MO_INVALID_IDX) {
@ -897,7 +898,7 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit,
}
u32 self_loop_width = 0;
const dstate curr_raw = info.states[curr_id];
const dstate &curr_raw = info.states[curr_id];
for (unsigned i = 0; i < N_CHARS; i++) {
if (curr_raw.next[info.alpha_remap[i]] == curr_id) {
self_loop_width++;
@ -914,33 +915,6 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit,
info.extra[curr_id].shermanState = true;
}
/*
* Calls accessible outside this module.
*/
u16 raw_dfa::getImplAlphaSize() const {
return alpha_size - N_SPECIAL_SYMBOL;
}
void raw_dfa::stripExtraEodReports(void) {
/* if a state generates a given report as a normal accept - then it does
* not also need to generate an eod report for it */
for (dstate &ds : states) {
for (const ReportID &report : ds.reports) {
ds.reports_eod.erase(report);
}
}
}
bool raw_dfa::hasEodReports(void) const {
for (const dstate &ds : states) {
if (!ds.reports_eod.empty()) {
return true;
}
}
return false;
}
static
bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) {
symbol_t alphasize = raw.getImplAlphaSize();
@ -964,7 +938,8 @@ bytecode_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat,
const CompileContext &cc,
bool trust_daddy_states,
set<dstate_id_t> *accel_states) {
u16 total_daddy = 0;
assert(!is_dead(raw));
dfa_info info(strat);
bool using8bit = cc.grey.allowMcClellan8 && info.size() <= 256;
@ -974,21 +949,24 @@ bytecode_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat,
}
bool has_eod_reports = raw.hasEodReports();
bool any_cyclic_near_anchored_state = is_cyclic_near(raw,
raw.start_anchored);
for (u32 i = 0; i < info.size(); i++) {
find_better_daddy(info, i, using8bit, any_cyclic_near_anchored_state,
trust_daddy_states, cc.grey);
total_daddy += info.extra[i].daddytaken;
}
DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy,
info.size() * info.impl_alpha_size, info.size(),
info.impl_alpha_size);
bytecode_ptr<NFA> nfa;
if (!using8bit) {
u16 total_daddy = 0;
bool any_cyclic_near_anchored_state
= is_cyclic_near(raw, raw.start_anchored);
for (u32 i = 0; i < info.size(); i++) {
find_better_daddy(info, i, using8bit,
any_cyclic_near_anchored_state,
trust_daddy_states, cc.grey);
total_daddy += info.extra[i].daddytaken;
}
DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy,
info.size() * info.impl_alpha_size, info.size(),
info.impl_alpha_size);
nfa = mcclellanCompile16(info, cc, accel_states);
} else {
nfa = mcclellanCompile8(info, cc, accel_states);

View File

@ -33,7 +33,6 @@
#include "rdfa.h"
#include "ue2common.h"
#include "util/bytecode_ptr.h"
#include "util/ue2_containers.h"
#include <memory>
#include <vector>

View File

@ -30,12 +30,11 @@
#include "rdfa.h"
#include "util/container.h"
#include "util/ue2_containers.h"
#include "util/hash.h"
#include "ue2common.h"
#include <deque>
#include <boost/functional/hash/hash.hpp>
#include <map>
using namespace std;
@ -127,13 +126,11 @@ u32 remove_leading_dots(raw_dfa &raw) {
static never_inline
u32 calc_min_dist_from_bob(raw_dfa &raw, vector<u32> *dist_in) {
vector<u32> &dist = *dist_in;
dist.clear();
dist.resize(raw.states.size(), ~0U);
dist.assign(raw.states.size(), ~0U);
assert(raw.start_anchored != DEAD_STATE);
deque<dstate_id_t> to_visit;
to_visit.push_back(raw.start_anchored);
deque<dstate_id_t> to_visit = { raw.start_anchored };
dist[raw.start_anchored] = 0;
u32 last_d = 0;
@ -148,8 +145,7 @@ u32 calc_min_dist_from_bob(raw_dfa &raw, vector<u32> *dist_in) {
assert(d >= last_d);
assert(d != ~0U);
for (u32 j = 0; j < raw.alpha_size; j++) {
dstate_id_t t = raw.states[s].next[j];
for (dstate_id_t t : raw.states[s].next) {
if (t == DEAD_STATE) {
continue;
}
@ -187,7 +183,21 @@ bool clear_deeper_reports(raw_dfa &raw, u32 max_offset) {
}
}
return changed;
if (!changed) {
return false;
}
// We may have cleared all reports from the DFA, in which case it should
// become empty.
if (all_of_in(raw.states, [](const dstate &ds) {
return ds.reports.empty() && ds.reports_eod.empty();
})) {
DEBUG_PRINTF("no reports left at all, dfa is dead\n");
raw.start_anchored = DEAD_STATE;
raw.start_floating = DEAD_STATE;
}
return true;
}
set<ReportID> all_reports(const raw_dfa &rdfa) {
@ -218,22 +228,18 @@ bool has_non_eod_accepts(const raw_dfa &rdfa) {
}
size_t hash_dfa_no_reports(const raw_dfa &rdfa) {
using boost::hash_combine;
using boost::hash_range;
size_t v = 0;
hash_combine(v, rdfa.alpha_size);
hash_combine(v, hash_range(begin(rdfa.alpha_remap), end(rdfa.alpha_remap)));
hash_combine(v, rdfa.alpha_remap);
for (const auto &ds : rdfa.states) {
hash_combine(v, hash_range(begin(ds.next), end(ds.next)));
hash_combine(v, ds.next);
}
return v;
}
size_t hash_dfa(const raw_dfa &rdfa) {
using boost::hash_combine;
size_t v = 0;
hash_combine(v, hash_dfa_no_reports(rdfa));
hash_combine(v, all_reports(rdfa));
@ -272,4 +278,9 @@ bool can_die_early(const raw_dfa &raw, u32 age_limit) {
return can_die_early(raw, raw.start_anchored, visited, age_limit);
}
bool is_dead(const raw_dfa &rdfa) {
return rdfa.start_anchored == DEAD_STATE &&
rdfa.start_floating == DEAD_STATE;
}
} // namespace ue2

View File

@ -59,6 +59,13 @@ size_t hash_dfa(const raw_dfa &rdfa);
bool can_die_early(const raw_dfa &raw, u32 age_limit);
/**
* \brief Returns true if this DFA cannot match, i.e. its start state is
* DEAD_STATE.
*/
bool is_dead(const raw_dfa &rdfa);
} // namespace ue2
#endif

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -442,22 +442,14 @@ void nfaExecMcClellan8_dumpText(const NFA *nfa, FILE *f) {
void nfaExecMcClellan16_dump(const NFA *nfa, const string &base) {
assert(nfa->type == MCCLELLAN_NFA_16);
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
nfaExecMcClellan16_dumpText(nfa, f);
fclose(f);
f = fopen_or_throw((base + ".dot").c_str(), "w");
nfaExecMcClellan16_dumpDot(nfa, f);
fclose(f);
nfaExecMcClellan16_dumpText(nfa, StdioFile(base + ".txt", "w"));
nfaExecMcClellan16_dumpDot(nfa, StdioFile(base + ".dot", "w"));
}
void nfaExecMcClellan8_dump(const NFA *nfa, const string &base) {
assert(nfa->type == MCCLELLAN_NFA_8);
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
nfaExecMcClellan8_dumpText(nfa, f);
fclose(f);
f = fopen_or_throw((base + ".dot").c_str(), "w");
nfaExecMcClellan8_dumpDot(nfa, f);
fclose(f);
nfaExecMcClellan8_dumpText(nfa, StdioFile(base + ".txt", "w"));
nfaExecMcClellan8_dumpDot(nfa, StdioFile(base + ".dot", "w"));
}
} // namespace ue2

View File

@ -45,13 +45,14 @@
#include "util/compare.h"
#include "util/compile_context.h"
#include "util/container.h"
#include "util/flat_containers.h"
#include "util/graph.h"
#include "util/graph_range.h"
#include "util/make_unique.h"
#include "util/order_check.h"
#include "util/report_manager.h"
#include "util/ue2_containers.h"
#include "util/unaligned.h"
#include "util/unordered.h"
#include "util/verify_types.h"
#include <algorithm>
@ -383,6 +384,8 @@ CharReach get_edge_reach(dstate_id_t u, dstate_id_t v, const dfa_info &info) {
#define MAX_SHENG_STATES 16
#define MAX_SHENG_LEAKINESS 0.05
using LeakinessCache = ue2_unordered_map<pair<RdfaVertex, u32>, double>;
/**
* Returns the proportion of strings of length 'depth' which will leave the
* sheng region when starting at state 'u'.
@ -390,8 +393,7 @@ CharReach get_edge_reach(dstate_id_t u, dstate_id_t v, const dfa_info &info) {
static
double leakiness(const RdfaGraph &g, dfa_info &info,
const flat_set<RdfaVertex> &sheng_states, RdfaVertex u,
u32 depth,
unordered_map<pair<RdfaVertex, u32>, double> &cache) {
u32 depth, LeakinessCache &cache) {
double rv = 0;
if (contains(cache, make_pair(u, depth))) {
return cache[make_pair(u, depth)];
@ -426,7 +428,7 @@ double leakiness(const RdfaGraph &g, dfa_info &info,
static
double leakiness(const RdfaGraph &g, dfa_info &info,
const flat_set<RdfaVertex> &sheng_states, RdfaVertex u) {
unordered_map<pair<RdfaVertex, u32>, double> cache;
LeakinessCache cache;
double rv = leakiness(g, info, sheng_states, u, 8, cache);
return rv;
}
@ -738,7 +740,7 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id,
assert(info.is_normal(currState.daddy));
u32 self_loop_width = 0;
const dstate curr_raw = info.states[curr_id];
const dstate &curr_raw = info.states[curr_id];
for (unsigned i = 0; i < N_CHARS; i++) {
if (curr_raw.next[info.alpha_remap[i]] == curr_id) {
self_loop_width++;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016, Intel Corporation
* Copyright (c) 2016-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -394,22 +394,14 @@ void dump_text_8(const NFA *nfa, FILE *f) {
void nfaExecMcSheng16_dump(const NFA *nfa, const string &base) {
assert(nfa->type == MCSHENG_NFA_16);
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
dump_text_16(nfa, f);
fclose(f);
f = fopen_or_throw((base + ".dot").c_str(), "w");
dump_dot_16(nfa, f);
fclose(f);
dump_text_16(nfa, StdioFile(base + ".txt", "w"));
dump_dot_16(nfa, StdioFile(base + ".dot", "w"));
}
void nfaExecMcSheng8_dump(const NFA *nfa, const string &base) {
assert(nfa->type == MCSHENG_NFA_8);
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
dump_text_8(nfa, f);
fclose(f);
f = fopen_or_throw((base + ".dot").c_str(), "w");
dump_dot_8(nfa, f);
fclose(f);
dump_text_8(nfa, StdioFile(base + ".txt", "w"));
dump_dot_8(nfa, StdioFile(base + ".dot", "w"));
}
} // namespace ue2

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -132,7 +132,7 @@ void dumpCounter(FILE *f, const mpv_counter_info *c) {
void nfaExecMpv_dump(const NFA *nfa, const string &base) {
const mpv *m = (const mpv *)getImplNfa(nfa);
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
StdioFile f(base + ".txt", "w");
fprintf(f, "Puff the Magic Engines\n");
fprintf(f, "\n");
@ -154,7 +154,6 @@ void nfaExecMpv_dump(const NFA *nfa, const string &base) {
}
dumpTextReverse(nfa, f);
fclose(f);
}
} // namespace ue2

55
src/nfa/rdfa.cpp Normal file
View File

@ -0,0 +1,55 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "rdfa.h"
namespace ue2 {
// prevent weak vtables
raw_dfa::~raw_dfa() {}
void raw_dfa::stripExtraEodReports(void) {
/* if a state generates a given report as a normal accept - then it does
* not also need to generate an eod report for it */
for (dstate &ds : states) {
for (const ReportID &report : ds.reports) {
ds.reports_eod.erase(report);
}
}
}
bool raw_dfa::hasEodReports(void) const {
for (const dstate &ds : states) {
if (!ds.reports_eod.empty()) {
return true;
}
}
return false;
}
} // namespace ue2

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -32,7 +32,7 @@
#include "nfa_kind.h"
#include "ue2common.h"
#include "util/ue2_containers.h"
#include "util/flat_containers.h"
#include <array>
#include <vector>
@ -81,7 +81,7 @@ struct raw_dfa {
explicit raw_dfa(nfa_kind k) : kind(k) {}
virtual ~raw_dfa();
u16 getImplAlphaSize() const;
u16 getImplAlphaSize() const { return alpha_size - N_SPECIAL_SYMBOL; }
virtual void stripExtraEodReports(void);
bool hasEodReports(void) const;
};

View File

@ -36,9 +36,10 @@
#include "nfagraph/ng_mcclellan_internal.h"
#include "util/container.h"
#include "util/determinise.h"
#include "util/flat_containers.h"
#include "util/make_unique.h"
#include "util/report_manager.h"
#include "util/ue2_containers.h"
#include "util/unordered.h"
#include <algorithm>
#include <queue>
@ -53,8 +54,8 @@ namespace {
class Automaton_Merge {
public:
typedef vector<u16> StateSet;
typedef ue2::unordered_map<StateSet, dstate_id_t> StateMap;
using StateSet = vector<u16>;
using StateMap = ue2_unordered_map<StateSet, dstate_id_t>;
Automaton_Merge(const raw_dfa *rdfa1, const raw_dfa *rdfa2,
const ReportManager *rm_in, const Grey &grey_in)
@ -289,7 +290,7 @@ unique_ptr<raw_dfa> mergeTwoDfas(const raw_dfa *d1, const raw_dfa *d2,
auto rdfa = ue2::make_unique<raw_dfa>(d1->kind);
Automaton_Merge autom(d1, d2, rm, grey);
if (!determinise(autom, rdfa->states, max_states)) {
if (determinise(autom, rdfa->states, max_states)) {
rdfa->start_anchored = autom.start_anchored;
rdfa->start_floating = autom.start_floating;
rdfa->alpha_size = autom.alphasize;
@ -374,7 +375,7 @@ unique_ptr<raw_dfa> mergeAllDfas(const vector<const raw_dfa *> &dfas,
DEBUG_PRINTF("merging dfa\n");
if (determinise(n, rdfa->states, max_states)) {
if (!determinise(n, rdfa->states, max_states)) {
DEBUG_PRINTF("state limit (%zu) exceeded\n", max_states);
return nullptr; /* over state limit */
}

View File

@ -33,7 +33,10 @@
#include "rdfa.h"
#include "util/bytecode_ptr.h"
#include "util/charreach.h"
#include "util/ue2_containers.h"
#include "util/flat_containers.h"
#include <memory>
#include <set>
struct NFA;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016, Intel Corporation
* Copyright (c) 2016-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -41,7 +41,6 @@
#include "util/dump_util.h"
#include "util/simd_types.h"
#ifndef DUMP_SUPPORT
#error No dump support!
#endif
@ -267,12 +266,8 @@ void nfaExecSheng_dumpDot(const NFA *nfa, FILE *f) {
void nfaExecSheng_dump(const NFA *nfa, const string &base) {
assert(nfa->type == SHENG_NFA);
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
nfaExecSheng_dumpText(nfa, f);
fclose(f);
f = fopen_or_throw((base + ".dot").c_str(), "w");
nfaExecSheng_dumpDot(nfa, f);
fclose(f);
nfaExecSheng_dumpText(nfa, StdioFile(base + ".txt", "w"));
nfaExecSheng_dumpDot(nfa, StdioFile(base + ".dot", "w"));
}
} // namespace ue2

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -33,7 +33,7 @@
#include "ue2common.h"
#include "util/charreach.h"
#include "util/container.h"
#include "util/ue2_containers.h"
#include "util/flat_containers.h"
#include <array>
#include <cassert>

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -35,7 +35,7 @@
#include "ue2common.h"
#include "util/charreach.h"
#include "util/ue2_containers.h"
#include "util/flat_containers.h"
#include <utility>

View File

@ -27,7 +27,7 @@
*/
/** \file
* \brief Tamarama: container engine for exclusve engines, dump code.
* \brief Tamarama: container engine for exclusive engines, dump code.
*/
#include "config.h"
@ -54,7 +54,7 @@ namespace ue2 {
void nfaExecTamarama_dump(const struct NFA *nfa, const string &base) {
const Tamarama *t = (const Tamarama *)getImplNfa(nfa);
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
StdioFile f(base + ".txt", "w");
fprintf(f, "Tamarama container engine\n");
fprintf(f, "\n");
@ -63,7 +63,6 @@ void nfaExecTamarama_dump(const struct NFA *nfa, const string &base) {
fprintf(f, "\n");
dumpTextReverse(nfa, f);
fprintf(f, "\n");
fclose(f);
const u32 *subOffset =
(const u32 *)((const char *)t + sizeof(struct Tamarama) +

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -32,12 +32,15 @@
* truffle is always able to represent an entire character class, providing a
* backstop to other acceleration engines.
*/
#include "trufflecompile.h"
#include "ue2common.h"
#include "util/charreach.h"
#include "util/dump_mask.h"
#include "util/simd_types.h"
#include "util/dump_mask.h"
#include <cstring>
using namespace std;

View File

@ -44,7 +44,6 @@
#include "util/graph.h"
#include "util/noncopyable.h"
#include "util/report_manager.h"
#include "util/ue2_containers.h"
#include <deque>
#include <map>

View File

@ -220,6 +220,52 @@ vector<NFAEdge> findShellEdges(const NGHolder &g,
return shell_edges;
}
template<typename GetAdjRange>
bool shellHasOnePath(const NGHolder &g, const flat_set<NFAVertex> &shell,
GetAdjRange adj_range_func) {
if (shell.empty()) {
DEBUG_PRINTF("no shell\n");
return false;
}
NFAVertex exit_vertex = NGHolder::null_vertex();
for (auto u : shell) {
for (auto v : adj_range_func(u, g)) {
if (contains(shell, v)) {
continue;
}
if (!exit_vertex) {
exit_vertex = v;
continue;
}
if (exit_vertex == v) {
continue;
}
return false;
}
}
return true;
}
/**
* True if all edges out of vertices in the head shell lead to at most a single
* outside vertex, or the inverse for the tail shell.
*/
static
bool shellHasOnePath(const NGHolder &g, const flat_set<NFAVertex> &head_shell,
const flat_set<NFAVertex> &tail_shell) {
if (shellHasOnePath(g, head_shell, adjacent_vertices_range<NGHolder>)) {
DEBUG_PRINTF("head shell has only one path through it\n");
return true;
}
if (shellHasOnePath(g, tail_shell, inv_adjacent_vertices_range<NGHolder>)) {
DEBUG_PRINTF("tail shell has only one path into it\n");
return true;
}
return false;
}
/**
* Common code called by calc- and recalc- below. Splits the given holder into
* one or more connected components, adding them to the comps deque.
@ -250,16 +296,25 @@ void splitIntoComponents(unique_ptr<NGHolder> g,
return;
}
// Find edges connecting the head and tail shells directly.
vector<NFAEdge> shell_edges = findShellEdges(*g, head_shell, tail_shell);
DEBUG_PRINTF("%zu vertices in head, %zu in tail, %zu shell edges\n",
head_shell.size(), tail_shell.size(), shell_edges.size());
ue2::unordered_map<NFAVertex, NFAUndirectedVertex> old2new;
// If there are no shell edges and only one path out of the head shell or
// into the tail shell, we aren't going to find more than one component.
if (shell_edges.empty() && shellHasOnePath(*g, head_shell, tail_shell)) {
DEBUG_PRINTF("single component\n");
comps.push_back(std::move(g));
return;
}
unordered_map<NFAVertex, NFAUndirectedVertex> old2new;
auto ug = createUnGraph(*g, true, true, old2new);
// Construct reverse mapping.
ue2::unordered_map<NFAUndirectedVertex, NFAVertex> new2old;
unordered_map<NFAUndirectedVertex, NFAVertex> new2old;
for (const auto &m : old2new) {
new2old.emplace(m.second, m.first);
}
@ -301,7 +356,7 @@ void splitIntoComponents(unique_ptr<NGHolder> g,
DEBUG_PRINTF("vertex %zu is in comp %u\n", (*g)[v].index, c);
}
ue2::unordered_map<NFAVertex, NFAVertex> v_map; // temp map for fillHolder
unordered_map<NFAVertex, NFAVertex> v_map; // temp map for fillHolder
for (auto &vv : verts) {
// Shells are in every component.
vv.insert(vv.end(), begin(head_shell), end(head_shell));

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -62,9 +62,11 @@
#include "ng_prune.h"
#include "ng_util.h"
#include "util/container.h"
#include "util/flat_containers.h"
#include "util/graph_range.h"
#include "util/ue2_containers.h"
#include "util/graph_small_color_map.h"
#include <algorithm>
#include <boost/graph/depth_first_search.hpp>
#include <boost/graph/reverse_graph.hpp>
@ -123,17 +125,17 @@ class SearchVisitor : public boost::default_dfs_visitor {
} // namespace
template<class Graph>
template<class Graph, class ColorMap>
static
bool searchForward(const Graph &g, const CharReach &reach,
ColorMap &colours,
const flat_set<typename Graph::vertex_descriptor> &s,
typename Graph::vertex_descriptor w) {
map<NFAVertex, boost::default_color_type> colours;
colours.fill(small_color::white);
try {
depth_first_visit(g, w, SearchVisitor(reach),
make_assoc_property_map(colours),
VertexInSet<typename Graph::vertex_descriptor, Graph>(s));
} catch (SearchFailed&) {
depth_first_visit(g, w, SearchVisitor(reach), colours,
VertexInSet<typename Graph::vertex_descriptor, Graph>(s));
} catch (SearchFailed &) {
return false;
}
@ -162,6 +164,9 @@ bool removeCyclicPathRedundancy(Graph &g, typename Graph::vertex_descriptor v,
typedef typename Graph::vertex_descriptor vertex_descriptor;
// Colour map used for depth_first_visit().
auto colours = make_small_color_map(g);
// precalc successors of v.
flat_set<vertex_descriptor> succ_v;
insert(&succ_v, adjacent_vertices(v, g));
@ -200,7 +205,7 @@ bool removeCyclicPathRedundancy(Graph &g, typename Graph::vertex_descriptor v,
DEBUG_PRINTF(" - checking w %zu\n", g[w].index);
if (!searchForward(g, reach, s, w)) {
if (!searchForward(g, reach, colours, s, w)) {
continue;
}
@ -234,6 +239,8 @@ bool cyclicPathRedundancyPass(Graph &g, NGHolder &raw) {
}
bool removeCyclicPathRedundancy(NGHolder &g) {
assert(hasCorrectlyNumberedVertices(g));
// Forward pass.
bool f_changed = cyclicPathRedundancyPass(g, g);
if (f_changed) {

View File

@ -34,17 +34,18 @@
#include "ng_util.h"
#include "ue2common.h"
#include "util/graph_range.h"
#include "util/graph_small_color_map.h"
#include <deque>
#include <vector>
#include <boost/graph/breadth_first_search.hpp>
#include <boost/graph/dag_shortest_paths.hpp>
#include <boost/graph/depth_first_search.hpp>
#include <boost/graph/breadth_first_search.hpp>
#include <boost/graph/filtered_graph.hpp>
#include <boost/graph/property_maps/constant_property_map.hpp>
#include <boost/graph/reverse_graph.hpp>
#include <boost/graph/topological_sort.hpp>
#include <boost/graph/property_maps/constant_property_map.hpp>
#include <boost/range/adaptor/reversed.hpp>
using namespace std;
@ -137,13 +138,15 @@ vector<bool> findLoopReachable(const Graph &g,
EdgeSet deadEdges;
BackEdges<EdgeSet> be(deadEdges);
depth_first_search(g, visitor(be).root_vertex(src));
auto colors = make_small_color_map(g);
depth_first_search(g, be, colors, src);
auto af = make_bad_edge_filter(&deadEdges);
auto acyclic_g = make_filtered_graph(g, af);
vector<Vertex> topoOrder; /* actually reverse topological order */
topoOrder.reserve(deadNodes.size());
topological_sort(acyclic_g, back_inserter(topoOrder));
topological_sort(acyclic_g, back_inserter(topoOrder), color_map(colors));
for (const auto &e : deadEdges) {
size_t srcIdx = g[source(e, g)].index;
@ -204,14 +207,16 @@ void calcDepthFromSource(const GraphT &g,
visitor(make_bfs_visitor(record_distances(
make_iterator_property_map(dMin.begin(),
min_index_map),
boost::on_tree_edge()))));
boost::on_tree_edge())))
.color_map(make_small_color_map(mindist_g)));
auto max_index_map = get(vertex_index, maxdist_g);
dag_shortest_paths(maxdist_g, srcVertex,
distance_map(make_iterator_property_map(dMax.begin(),
max_index_map))
.weight_map(make_constant_property<EdgeT>(-1)));
.weight_map(make_constant_property<EdgeT>(-1))
.color_map(make_small_color_map(maxdist_g)));
for (size_t i = 0; i < numVerts; i++) {
if (dMin[i] > DIST_UNREACHABLE) {

View File

@ -36,7 +36,6 @@
#include "ue2common.h"
#include "ng_holder.h"
#include "ng_util.h"
#include "util/ue2_containers.h"
#include <boost-patched/graph/dominator_tree.hpp> // locally patched version
#include <boost-patched/graph/reverse_graph.hpp>

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -36,15 +36,14 @@
#define NG_DOMINATORS_H
#include "ng_holder.h"
#include "util/ue2_containers.h"
#include <unordered_map>
namespace ue2 {
class NGHolder;
std::unordered_map<NFAVertex, NFAVertex> findDominators(const NGHolder &g);
ue2::unordered_map<NFAVertex, NFAVertex> findDominators(const NGHolder &g);
ue2::unordered_map<NFAVertex, NFAVertex> findPostDominators(const NGHolder &g);
std::unordered_map<NFAVertex, NFAVertex> findPostDominators(const NGHolder &g);
} // namespace ue2

View File

@ -51,6 +51,7 @@
#include "smallwrite/smallwrite_dump.h"
#include "util/bitutils.h"
#include "util/dump_charclass.h"
#include "util/dump_util.h"
#include "util/report.h"
#include "util/report_manager.h"
#include "util/ue2string.h"
@ -175,7 +176,7 @@ public:
: g(g_in), rm(&rm_in) {}
NFAWriter(const GraphT &g_in,
const ue2::unordered_map<NFAVertex, u32> &region_map_in)
const unordered_map<NFAVertex, u32> &region_map_in)
: g(g_in), region_map(&region_map_in) {}
void operator()(ostream& os, const VertexT& v) const {
@ -253,7 +254,7 @@ public:
private:
const GraphT &g;
const ReportManager *rm = nullptr;
const ue2::unordered_map<NFAVertex, u32> *region_map = nullptr;
const unordered_map<NFAVertex, u32> *region_map = nullptr;
};
}
@ -277,7 +278,7 @@ void dumpGraphImpl(const char *name, const GraphT &g, const ReportManager &rm) {
template <typename GraphT>
void dumpGraphImpl(const char *name, const GraphT &g,
const ue2::unordered_map<NFAVertex, u32> &region_map) {
const unordered_map<NFAVertex, u32> &region_map) {
typedef typename boost::graph_traits<GraphT>::vertex_descriptor VertexT;
typedef typename boost::graph_traits<GraphT>::edge_descriptor EdgeT;
ofstream os(name);
@ -331,7 +332,7 @@ void dumpHolderImpl(const NGHolder &h, unsigned int stageNumber,
}
void dumpHolderImpl(const NGHolder &h,
const ue2::unordered_map<NFAVertex, u32> &region_map,
const unordered_map<NFAVertex, u32> &region_map,
unsigned int stageNumber, const char *stageName,
const Grey &grey) {
if (grey.dumpFlags & Grey::DUMP_INT_GRAPH) {
@ -348,14 +349,7 @@ void dumpSmallWrite(const RoseEngine *rose, const Grey &grey) {
}
const struct SmallWriteEngine *smwr = getSmallWrite(rose);
stringstream ss;
ss << grey.dumpPath << "smallwrite.txt";
FILE *f = fopen(ss.str().c_str(), "w");
smwrDumpText(smwr, f);
fclose(f);
smwrDumpText(smwr, StdioFile(grey.dumpPath + "smallwrite.txt", "w"));
smwrDumpNFA(smwr, false, grey.dumpPath);
}
@ -420,9 +414,7 @@ void dumpReportManager(const ReportManager &rm, const Grey &grey) {
return;
}
stringstream ss;
ss << grey.dumpPath << "internal_reports.txt";
FILE *f = fopen(ss.str().c_str(), "w");
StdioFile f(grey.dumpPath + "internal_reports.txt", "w");
const vector<Report> &reports = rm.reports();
for (size_t i = 0; i < reports.size(); i++) {
const Report &report = reports[i];
@ -461,7 +453,6 @@ void dumpReportManager(const ReportManager &rm, const Grey &grey) {
}
fprintf(f, "\n");
}
fclose(f);
}
} // namespace ue2

View File

@ -36,7 +36,8 @@
#include "grey.h"
#include "ng_holder.h" // for graph types
#include "ue2common.h"
#include "util/ue2_containers.h"
#include <unordered_map>
#ifdef DUMP_SUPPORT
#include <fstream>
@ -75,7 +76,7 @@ void dumpHolderImpl(const NGHolder &h, unsigned int stageNumber,
// Variant that takes a region map as well.
void dumpHolderImpl(const NGHolder &h,
const ue2::unordered_map<NFAVertex, u32> &region_map,
const std::unordered_map<NFAVertex, u32> &region_map,
unsigned int stageNumber, const char *stageName,
const Grey &grey);
@ -123,7 +124,7 @@ void dumpHolder(UNUSED const NGHolder &h, UNUSED unsigned int stageNumber,
UNUSED static inline
void dumpHolder(UNUSED const NGHolder &h,
UNUSED const ue2::unordered_map<NFAVertex, u32> &region_map,
UNUSED const std::unordered_map<NFAVertex, u32> &region_map,
UNUSED unsigned int stageNumber, UNUSED const char *name,
UNUSED const Grey &grey) {
#ifdef DUMP_SUPPORT

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -38,8 +38,8 @@
#include "parser/position.h"
#include "util/compile_context.h"
#include "util/container.h"
#include "util/flat_containers.h"
#include "util/graph_range.h"
#include "util/ue2_containers.h"
#include <set>
#include <vector>
@ -181,6 +181,28 @@ bool removeEdgeRedundancyNearCyclesFwd(NGHolder &g, bool ignore_starts) {
return dead_count;
}
static
bool checkReportsRev(const NGHolder &g, NFAVertex v,
const set<NFAVertex> &happy) {
if (g[v].reports.empty()) {
return true;
}
assert(edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second);
/* an edge to accept takes priority over eod only accept */
NFAVertex accept = edge(v, g.accept, g).second ? g.accept : g.acceptEod;
flat_set<ReportID> happy_reports;
for (NFAVertex u : happy) {
if (edge(u, accept, g).second) {
insert(&happy_reports, g[u].reports);
}
}
return is_subset_of(g[v].reports, happy_reports);
}
/** \brief Redundant self-loop removal (reverse version).
*
* A self loop on a vertex v can be removed if:
@ -233,7 +255,8 @@ bool removeEdgeRedundancyNearCyclesRev(NGHolder &g) {
happy.insert(u);
}
if (!happy.empty() && checkVerticesRev(g, sad, happy)) {
if (!happy.empty() && checkVerticesRev(g, sad, happy)
&& checkReportsRev(g, v, happy)) {
dead_count++;
remove_edge(v, v, g);
}

View File

@ -37,9 +37,10 @@
#include "ng_holder.h"
#include "ng_util.h"
#include "util/compile_context.h"
#include "util/flat_containers.h"
#include "util/graph_range.h"
#include "util/make_unique.h"
#include "util/ue2_containers.h"
#include "util/unordered.h"
#include <algorithm>
#include <memory>
@ -121,16 +122,9 @@ public:
vertex_flags == b.vertex_flags && rs == b.rs;
}
friend size_t hash_value(const ClassInfo &c) {
size_t val = 0;
boost::hash_combine(val, c.rs);
boost::hash_combine(val, c.vertex_flags);
boost::hash_combine(val, c.cr);
boost::hash_combine(val, c.adjacent_cr);
boost::hash_combine(val, c.node_type);
boost::hash_combine(val, c.depth.d1);
boost::hash_combine(val, c.depth.d2);
return val;
size_t hash() const {
return hash_all(rs, vertex_flags, cr, adjacent_cr, node_type, depth.d1,
depth.d2);
}
private:
@ -319,7 +313,7 @@ vector<VertexInfoSet> partitionGraph(vector<unique_ptr<VertexInfo>> &infos,
const size_t num_verts = infos.size();
vector<VertexInfoSet> classes;
unordered_map<ClassInfo, unsigned> classinfomap;
ue2_unordered_map<ClassInfo, unsigned> classinfomap;
// assume we will have lots of classes, so we don't waste time resizing
// these structures.

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -35,7 +35,7 @@
#define NG_EXECUTE_H
#include "ng_holder.h"
#include "util/ue2_containers.h"
#include "util/flat_containers.h"
#include <vector>

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -48,7 +48,7 @@ namespace ue2 {
static
bool findMask(const NGHolder &g, vector<CharReach> *mask, bool *anchored,
ue2::flat_set<ReportID> *reports) {
flat_set<ReportID> *reports) {
DEBUG_PRINTF("looking for a mask pattern\n");
set<NFAVertex> s_succ;
insert(&s_succ, adjacent_vertices(g.start, g));
@ -117,7 +117,7 @@ bool handleFixedWidth(RoseBuild &rose, const NGHolder &g, const Grey &grey) {
return false;
}
ue2::flat_set<ReportID> reports;
flat_set<ReportID> reports;
bool anchored = false;
vector<CharReach> mask;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -40,10 +40,12 @@
#include "util/bitfield.h"
#include "util/container.h"
#include "util/determinise.h"
#include "util/flat_containers.h"
#include "util/graph.h"
#include "util/graph_range.h"
#include "util/hash_dynamic_bitset.h"
#include "util/make_unique.h"
#include "util/ue2_containers.h"
#include "util/unordered.h"
#include <algorithm>
#include <functional>
@ -236,7 +238,7 @@ public:
struct Big_Traits {
using StateSet = dynamic_bitset<>;
using StateMap = map<StateSet, dstate_id_t>;
using StateMap = unordered_map<StateSet, dstate_id_t, hash_dynamic_bitset>;
static StateSet init_states(u32 num) {
return StateSet(num);
@ -257,7 +259,7 @@ public:
struct Graph_Traits {
using StateSet = bitfield<NFA_STATE_LIMIT>;
using StateMap = ue2::unordered_map<StateSet, dstate_id_t>;
using StateMap = unordered_map<StateSet, dstate_id_t>;
static StateSet init_states(UNUSED u32 num) {
assert(num <= NFA_STATE_LIMIT);
@ -284,8 +286,8 @@ public:
class Automaton_Haig_Merge {
public:
typedef vector<u16> StateSet;
typedef ue2::unordered_map<StateSet, dstate_id_t> StateMap;
using StateSet = vector<u16>;
using StateMap = ue2_unordered_map<StateSet, dstate_id_t>;
explicit Automaton_Haig_Merge(const vector<const raw_som_dfa *> &in)
: nfas(in.begin(), in.end()), dead(in.size()) {
@ -514,11 +516,11 @@ bool doHaig(const NGHolder &g, som_type som,
raw_som_dfa *rdfa) {
u32 state_limit = HAIG_FINAL_DFA_STATE_LIMIT; /* haig never backs down from
a fight */
typedef typename Auto::StateSet StateSet;
using StateSet = typename Auto::StateSet;
vector<StateSet> nfa_state_map;
Auto n(g, som, triggers, unordered_som);
try {
if (determinise(n, rdfa->states, state_limit, &nfa_state_map)) {
if (!determinise(n, rdfa->states, state_limit, &nfa_state_map)) {
DEBUG_PRINTF("state limit exceeded\n");
return false;
}
@ -720,15 +722,14 @@ unique_ptr<raw_som_dfa> attemptToMergeHaig(const vector<const raw_som_dfa *> &df
}
}
typedef Automaton_Haig_Merge::StateSet StateSet;
using StateSet = Automaton_Haig_Merge::StateSet;
vector<StateSet> nfa_state_map;
auto rdfa = ue2::make_unique<raw_som_dfa>(dfas[0]->kind, unordered_som,
NODE_START,
dfas[0]->stream_som_loc_width);
int rv = determinise(n, rdfa->states, limit, &nfa_state_map);
if (rv) {
DEBUG_PRINTF("%d:state limit (%u) exceeded\n", rv, limit);
if (!determinise(n, rdfa->states, limit, &nfa_state_map)) {
DEBUG_PRINTF("state limit (%u) exceeded\n", limit);
return nullptr; /* over state limit */
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -40,7 +40,7 @@
#include "ue2common.h"
#include "nfa/nfa_kind.h"
#include "util/charreach.h"
#include "util/ue2_containers.h"
#include "util/flat_containers.h"
#include "util/ue2_graph.h"
namespace ue2 {
@ -67,7 +67,7 @@ struct NFAGraphEdgeProps {
/** \brief For graphs that will be implemented as multi-top engines, this
* specifies the top events. Only used on edges from the start vertex. */
ue2::flat_set<u32> tops;
flat_set<u32> tops;
/** \brief Flags associated with assertions. */
u32 assert_flags = 0;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -39,13 +39,9 @@
#include "ng_util.h"
#include "ue2common.h"
#include "util/container.h"
#include "util/flat_containers.h"
#include "util/graph_range.h"
#include "util/make_unique.h"
#include "util/ue2_containers.h"
#include <set>
#include <boost/functional/hash/hash.hpp>
using namespace std;
@ -200,11 +196,11 @@ u64a hash_holder(const NGHolder &g) {
size_t rv = 0;
for (auto v : vertices_range(g)) {
boost::hash_combine(rv, g[v].index);
boost::hash_combine(rv, g[v].char_reach);
hash_combine(rv, g[v].index);
hash_combine(rv, g[v].char_reach);
for (auto w : adjacent_vertices_range(v, g)) {
boost::hash_combine(rv, g[w].index);
hash_combine(rv, g[w].index);
}
}

View File

@ -346,24 +346,4 @@ bytecode_ptr<NFA> constructLBR(const NGHolder &g,
return constructLBR(proto, triggers, cc, rm);
}
/** \brief True if graph \p g could be turned into an LBR engine. */
bool isLBR(const NGHolder &g, const Grey &grey) {
if (!grey.allowLbr) {
return false;
}
PureRepeat repeat;
if (!isPureRepeat(g, repeat)) {
DEBUG_PRINTF("not pure bounded repeat\n");
return false;
}
if (repeat.reports.size() != 1) {
DEBUG_PRINTF("too many reports\n");
return false;
}
return true;
}
} // namespace ue2

View File

@ -66,9 +66,6 @@ constructLBR(const CastleProto &proto,
const std::vector<std::vector<CharReach>> &triggers,
const CompileContext &cc, const ReportManager &rm);
/** \brief True if graph \p g could be turned into an LBR engine. */
bool isLBR(const NGHolder &g, const Grey &grey);
} // namespace ue2
#endif // NG_LBR_H

View File

@ -53,11 +53,13 @@
#include "util/container.h"
#include "util/graph_range.h"
#include "util/report_manager.h"
#include "util/ue2_containers.h"
#include "util/flat_containers.h"
#include "util/verify_types.h"
#include <algorithm>
#include <map>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include <boost/range/adaptor/map.hpp>
@ -73,8 +75,8 @@ namespace ue2 {
// Only used in assertions.
static
bool sanityCheckGraph(const NGHolder &g,
const ue2::unordered_map<NFAVertex, u32> &state_ids) {
ue2::unordered_set<u32> seen_states;
const unordered_map<NFAVertex, u32> &state_ids) {
unordered_set<u32> seen_states;
for (auto v : vertices_range(g)) {
// Non-specials should have non-empty reachability.
@ -115,10 +117,9 @@ bool sanityCheckGraph(const NGHolder &g,
#endif
static
void findSquashStates(const NGHolder &g,
const vector<BoundedRepeatData> &repeats,
map<NFAVertex, NFAStateSet> &squashMap) {
squashMap = findSquashers(g);
unordered_map<NFAVertex, NFAStateSet> findSquashStates(const NGHolder &g,
const vector<BoundedRepeatData> &repeats) {
auto squashMap = findSquashers(g);
filterSquashers(g, squashMap);
/* We also filter out the cyclic states representing bounded repeats, as
@ -128,6 +129,8 @@ void findSquashStates(const NGHolder &g,
squashMap.erase(br.cyclic);
}
}
return squashMap;
}
/**
@ -468,7 +471,7 @@ void makeTopStates(NGHolder &g, map<u32, set<NFAVertex>> &tops_out,
static
set<NFAVertex> findZombies(const NGHolder &h,
const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
const ue2::unordered_map<NFAVertex, u32> &state_ids,
const unordered_map<NFAVertex, u32> &state_ids,
const CompileContext &cc) {
set<NFAVertex> zombies;
if (!cc.grey.allowZombies) {
@ -516,7 +519,7 @@ set<NFAVertex> findZombies(const NGHolder &h,
}
static
void reverseStateOrdering(ue2::unordered_map<NFAVertex, u32> &state_ids) {
void reverseStateOrdering(unordered_map<NFAVertex, u32> &state_ids) {
vector<NFAVertex> ordering;
for (auto &e : state_ids) {
if (e.second == NO_STATE) {
@ -569,7 +572,7 @@ prepareGraph(const NGHolder &h_in, const ReportManager *rm,
const map<u32, u32> &fixed_depth_tops,
const map<u32, vector<vector<CharReach>>> &triggers,
bool impl_test_only, const CompileContext &cc,
ue2::unordered_map<NFAVertex, u32> &state_ids,
unordered_map<NFAVertex, u32> &state_ids,
vector<BoundedRepeatData> &repeats,
map<u32, set<NFAVertex>> &tops) {
assert(is_triggered(h_in) || fixed_depth_tops.empty());
@ -637,7 +640,7 @@ constructNFA(const NGHolder &h_in, const ReportManager *rm,
assert(rm);
}
ue2::unordered_map<NFAVertex, u32> state_ids;
unordered_map<NFAVertex, u32> state_ids;
vector<BoundedRepeatData> repeats;
map<u32, set<NFAVertex>> tops;
unique_ptr<NGHolder> h
@ -657,12 +660,12 @@ constructNFA(const NGHolder &h_in, const ReportManager *rm,
br_cyclic[br.cyclic] = BoundedRepeatSummary(br.repeatMin, br.repeatMax);
}
map<NFAVertex, NFAStateSet> reportSquashMap;
map<NFAVertex, NFAStateSet> squashMap;
unordered_map<NFAVertex, NFAStateSet> reportSquashMap;
unordered_map<NFAVertex, NFAStateSet> squashMap;
// build map of squashed and squashers
if (cc.grey.squashNFA) {
findSquashStates(*h, repeats, squashMap);
squashMap = findSquashStates(*h, repeats);
if (rm && cc.grey.highlanderSquash) {
reportSquashMap = findHighlanderSquashers(*h, *rm);
@ -734,8 +737,8 @@ bytecode_ptr<NFA> constructReversedNFA_i(const NGHolder &h_in, u32 hint,
map<u32, set<NFAVertex>> tops; /* only the standards tops for nfas */
set<NFAVertex> zombies;
vector<BoundedRepeatData> repeats;
map<NFAVertex, NFAStateSet> reportSquashMap;
map<NFAVertex, NFAStateSet> squashMap;
unordered_map<NFAVertex, NFAStateSet> reportSquashMap;
unordered_map<NFAVertex, NFAStateSet> squashMap;
return generate(h, state_ids, repeats, reportSquashMap, squashMap, tops,
zombies, false, false, hint, cc);
@ -785,7 +788,7 @@ u32 isImplementableNFA(const NGHolder &g, const ReportManager *rm,
* resultant NGHolder has <= NFA_MAX_STATES. If it does, we know we can
* implement it as an NFA. */
ue2::unordered_map<NFAVertex, u32> state_ids;
unordered_map<NFAVertex, u32> state_ids;
vector<BoundedRepeatData> repeats;
map<u32, set<NFAVertex>> tops;
unique_ptr<NGHolder> h
@ -832,7 +835,7 @@ u32 countAccelStates(const NGHolder &g, const ReportManager *rm,
const map<u32, u32> fixed_depth_tops; // empty
const map<u32, vector<vector<CharReach>>> triggers; // empty
ue2::unordered_map<NFAVertex, u32> state_ids;
unordered_map<NFAVertex, u32> state_ids;
vector<BoundedRepeatData> repeats;
map<u32, set<NFAVertex>> tops;
unique_ptr<NGHolder> h
@ -848,8 +851,8 @@ u32 countAccelStates(const NGHolder &g, const ReportManager *rm,
// Should have no bearing on accel calculation, so we leave these empty.
const set<NFAVertex> zombies;
const map<NFAVertex, NFAStateSet> reportSquashMap;
const map<NFAVertex, NFAStateSet> squashMap;
unordered_map<NFAVertex, NFAStateSet> reportSquashMap;
unordered_map<NFAVertex, NFAStateSet> squashMap;
return countAccelStates(*h, state_ids, repeats, reportSquashMap, squashMap,
tops, zombies, cc);

Some files were not shown because too many files have changed in this diff Show More