mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Merge branch develop into master
This commit is contained in:
commit
0e5c4cbd1d
36
CHANGELOG.md
Normal file
36
CHANGELOG.md
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
# Hyperscan Change Log
|
||||||
|
|
||||||
|
This is a list of notable changes to Hyperscan, in reverse chronological order.
|
||||||
|
|
||||||
|
## [4.1.0] 2015-12-18
|
||||||
|
- Update version of PCRE used by testing tools as a syntax and semantic
|
||||||
|
reference to PCRE 8.38.
|
||||||
|
- Small updates to fix warnings identified by Coverity.
|
||||||
|
- Clean up and unify exception handling behaviour across GPR and SIMD NFA
|
||||||
|
models.
|
||||||
|
- Fix bug in handling of bounded repeat triggers with large gaps between them
|
||||||
|
for sparse repeat model.
|
||||||
|
- Correctly reject POSIX collating elements (`[.ch.]`, `[=ch=]`) in the parser.
|
||||||
|
These are not supported by Hyperscan.
|
||||||
|
- Add support for quoted sequences (`\Q...\E`) inside character classes.
|
||||||
|
- Simplify FDR literal matcher runtime by removing some static specialization.
|
||||||
|
- Fix handling of the POSIX `[:graph:]`, `[:print:]` and `[:punct:]` character
|
||||||
|
classes to match the behaviour of PCRE 8.38 in both standard operation and
|
||||||
|
with the UCP flag set. (Note: some bugs were fixed in this area in PCRE
|
||||||
|
8.38.) Previously Hyperscan's behaviour was the same as versions of PCRE
|
||||||
|
before 8.34.
|
||||||
|
- Improve performance when compiling pattern sets that include a large number
|
||||||
|
of similar bounded repeat constructs. (github issue #9)
|
||||||
|
|
||||||
|
## [4.0.1] 2015-10-30
|
||||||
|
- Minor cleanups to test code.
|
||||||
|
- CMake and other build system improvements.
|
||||||
|
- API update: allow `hs_reset_stream()` and `hs_reset_and_copy_stream()` to be
|
||||||
|
supplied with a NULL scratch pointer if no matches are required. This is in
|
||||||
|
line with the behaviour of `hs_close_stream()`.
|
||||||
|
- Disallow bounded repeats with a very large minimum repeat but no maximum,
|
||||||
|
i.e. {N,} for very large N.
|
||||||
|
- Reduce compile memory usage in literal set explansion for some large cases.
|
||||||
|
|
||||||
|
## [4.0.0] 2015-10-20
|
||||||
|
- Original release of Hyperscan as open-source software.
|
@ -2,13 +2,13 @@ cmake_minimum_required (VERSION 2.8.11)
|
|||||||
project (Hyperscan C CXX)
|
project (Hyperscan C CXX)
|
||||||
|
|
||||||
set (HS_MAJOR_VERSION 4)
|
set (HS_MAJOR_VERSION 4)
|
||||||
set (HS_MINOR_VERSION 0)
|
set (HS_MINOR_VERSION 1)
|
||||||
set (HS_PATCH_VERSION 1)
|
set (HS_PATCH_VERSION 0)
|
||||||
set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION})
|
set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION})
|
||||||
|
|
||||||
string (TIMESTAMP BUILD_DATE "%Y-%m-%d")
|
string (TIMESTAMP BUILD_DATE "%Y-%m-%d")
|
||||||
|
|
||||||
set(CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake)
|
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
|
||||||
include(CheckCCompilerFlag)
|
include(CheckCCompilerFlag)
|
||||||
include(CheckCXXCompilerFlag)
|
include(CheckCXXCompilerFlag)
|
||||||
INCLUDE (CheckFunctionExists)
|
INCLUDE (CheckFunctionExists)
|
||||||
@ -56,8 +56,9 @@ if(CMAKE_GENERATOR STREQUAL Xcode)
|
|||||||
set(XCODE TRUE)
|
set(XCODE TRUE)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
include_directories(src .)
|
set(CMAKE_INCLUDE_CURRENT_DIR 1)
|
||||||
include_directories(${CMAKE_BINARY_DIR})
|
include_directories(${PROJECT_SOURCE_DIR}/src)
|
||||||
|
include_directories(${PROJECT_BINARY_DIR})
|
||||||
include_directories(SYSTEM include)
|
include_directories(SYSTEM include)
|
||||||
|
|
||||||
set(BOOST_USE_STATIC_LIBS OFF)
|
set(BOOST_USE_STATIC_LIBS OFF)
|
||||||
@ -71,7 +72,7 @@ find_package(Boost ${BOOST_MINVERSION})
|
|||||||
if(NOT Boost_FOUND)
|
if(NOT Boost_FOUND)
|
||||||
# we might have boost in tree, so provide a hint and try again
|
# we might have boost in tree, so provide a hint and try again
|
||||||
message(STATUS "trying include dir for boost")
|
message(STATUS "trying include dir for boost")
|
||||||
set(BOOST_INCLUDEDIR "${CMAKE_SOURCE_DIR}/include")
|
set(BOOST_INCLUDEDIR "${PROJECT_SOURCE_DIR}/include")
|
||||||
find_package(Boost ${BOOST_MINVERSION})
|
find_package(Boost ${BOOST_MINVERSION})
|
||||||
if(NOT Boost_FOUND)
|
if(NOT Boost_FOUND)
|
||||||
message(FATAL_ERROR "Boost ${BOOST_MINVERSION} or later not found. Either install system pacakges if available, extract Boost headers to ${CMAKE_SOURCE_DIR}/include, or set the CMake BOOST_ROOT variable.")
|
message(FATAL_ERROR "Boost ${BOOST_MINVERSION} or later not found. Either install system pacakges if available, extract Boost headers to ${CMAKE_SOURCE_DIR}/include, or set the CMake BOOST_ROOT variable.")
|
||||||
@ -219,6 +220,15 @@ CHECK_FUNCTION_EXISTS(_aligned_malloc HAVE__ALIGNED_MALLOC)
|
|||||||
CHECK_C_COMPILER_FLAG(-fvisibility=hidden HAS_C_HIDDEN)
|
CHECK_C_COMPILER_FLAG(-fvisibility=hidden HAS_C_HIDDEN)
|
||||||
CHECK_CXX_COMPILER_FLAG(-fvisibility=hidden HAS_CXX_HIDDEN)
|
CHECK_CXX_COMPILER_FLAG(-fvisibility=hidden HAS_CXX_HIDDEN)
|
||||||
|
|
||||||
|
if (RELEASE_BUILD)
|
||||||
|
if (HAS_C_HIDDEN)
|
||||||
|
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -fvisibility=hidden")
|
||||||
|
endif()
|
||||||
|
if (HAS_CXX_HIDDEN)
|
||||||
|
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fvisibility=hidden")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
# testing a builtin takes a little more work
|
# testing a builtin takes a little more work
|
||||||
CHECK_C_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CC_BUILTIN_ASSUME_ALIGNED)
|
CHECK_C_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CC_BUILTIN_ASSUME_ALIGNED)
|
||||||
CHECK_CXX_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CXX_BUILTIN_ASSUME_ALIGNED)
|
CHECK_CXX_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CXX_BUILTIN_ASSUME_ALIGNED)
|
||||||
@ -327,8 +337,8 @@ if (EXISTS ${CMAKE_SOURCE_DIR}/tools)
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
# do substitutions
|
# do substitutions
|
||||||
configure_file(${CMAKE_MODULE_PATH}/config.h.in ${CMAKE_BINARY_DIR}/config.h)
|
configure_file(${CMAKE_MODULE_PATH}/config.h.in ${PROJECT_BINARY_DIR}/config.h)
|
||||||
configure_file(src/hs_version.h.in hs_version.h)
|
configure_file(src/hs_version.h.in ${PROJECT_BINARY_DIR}/hs_version.h)
|
||||||
|
|
||||||
if (PKG_CONFIG_FOUND)
|
if (PKG_CONFIG_FOUND)
|
||||||
# we really only need to do this if we have pkg-config
|
# we really only need to do this if we have pkg-config
|
||||||
@ -345,7 +355,7 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}")
|
|||||||
# include the autogen targets
|
# include the autogen targets
|
||||||
add_subdirectory(src/fdr)
|
add_subdirectory(src/fdr)
|
||||||
|
|
||||||
include_directories(${CMAKE_BINARY_DIR}/src/fdr)
|
include_directories(${PROJECT_BINARY_DIR}/src/fdr)
|
||||||
|
|
||||||
if(NOT WIN32)
|
if(NOT WIN32)
|
||||||
set(RAGEL_C_FLAGS "-Wno-unused")
|
set(RAGEL_C_FLAGS "-Wno-unused")
|
||||||
|
21
README.md
21
README.md
@ -20,3 +20,24 @@ the [Developer Reference Guide](http://01org.github.io/hyperscan/dev-reference/)
|
|||||||
Hyperscan is licensed under the BSD License. See the LICENSE file in the
|
Hyperscan is licensed under the BSD License. See the LICENSE file in the
|
||||||
project repository.
|
project repository.
|
||||||
|
|
||||||
|
# Versioning
|
||||||
|
|
||||||
|
The `master` branch on Github will always contain the most recent release of
|
||||||
|
Hyperscan. Each version released to `master` goes through QA and testing before
|
||||||
|
it is released; if you're a user, rather than a developer, this is the version
|
||||||
|
you should be using.
|
||||||
|
|
||||||
|
Further development towards the next release takes place on the `develop`
|
||||||
|
branch.
|
||||||
|
|
||||||
|
# Get Involved
|
||||||
|
|
||||||
|
The official homepage for Hyperscan is at [01.org/hyperscan](https://01.org/hyperscan).
|
||||||
|
|
||||||
|
If you have questions or comments, we encourage you to [join the mailing
|
||||||
|
list](https://lists.01.org/mailman/listinfo/hyperscan). Bugs can be filed by
|
||||||
|
sending email to the list, or by creating an issue on Github.
|
||||||
|
|
||||||
|
If you wish to contact the Hyperscan team at Intel directly, without posting
|
||||||
|
publicly to the mailing list, send email to
|
||||||
|
[hyperscan@intel.com](mailto:hyperscan@intel.com).
|
||||||
|
@ -63,6 +63,9 @@ described at <http://www.pcre.org/>. However, not all constructs available in
|
|||||||
libpcre are supported. The use of unsupported constructs will result in
|
libpcre are supported. The use of unsupported constructs will result in
|
||||||
compilation errors.
|
compilation errors.
|
||||||
|
|
||||||
|
The version of PCRE used to validate Hyperscan's interpretation of this syntax
|
||||||
|
is 8.38.
|
||||||
|
|
||||||
====================
|
====================
|
||||||
Supported Constructs
|
Supported Constructs
|
||||||
====================
|
====================
|
||||||
|
@ -109,7 +109,7 @@ static char *readInputData(const char *inputFN, unsigned int *length) {
|
|||||||
* limit the size of our buffer appropriately. */
|
* limit the size of our buffer appropriately. */
|
||||||
if ((unsigned long)dataLen > UINT_MAX) {
|
if ((unsigned long)dataLen > UINT_MAX) {
|
||||||
dataLen = UINT_MAX;
|
dataLen = UINT_MAX;
|
||||||
printf("WARNING: clipping data to %lu bytes\n", dataLen);
|
printf("WARNING: clipping data to %ld bytes\n", dataLen);
|
||||||
} else if (dataLen == 0) {
|
} else if (dataLen == 0) {
|
||||||
fprintf(stderr, "ERROR: input file \"%s\" is empty\n", inputFN);
|
fprintf(stderr, "ERROR: input file \"%s\" is empty\n", inputFN);
|
||||||
fclose(f);
|
fclose(f);
|
||||||
@ -118,7 +118,7 @@ static char *readInputData(const char *inputFN, unsigned int *length) {
|
|||||||
|
|
||||||
char *inputData = malloc(dataLen);
|
char *inputData = malloc(dataLen);
|
||||||
if (!inputData) {
|
if (!inputData) {
|
||||||
fprintf(stderr, "ERROR: unable to malloc %lu bytes\n", dataLen);
|
fprintf(stderr, "ERROR: unable to malloc %ld bytes\n", dataLen);
|
||||||
fclose(f);
|
fclose(f);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@ -27,10 +27,10 @@ fdr_autogen(teddy_runtime teddy_autogen.c)
|
|||||||
fdr_autogen(teddy_compiler teddy_autogen_compiler.cpp)
|
fdr_autogen(teddy_compiler teddy_autogen_compiler.cpp)
|
||||||
|
|
||||||
set(fdr_GENERATED_SRC
|
set(fdr_GENERATED_SRC
|
||||||
${CMAKE_BINARY_DIR}/src/fdr/fdr_autogen.c
|
${PROJECT_BINARY_DIR}/src/fdr/fdr_autogen.c
|
||||||
${CMAKE_BINARY_DIR}/src/fdr/fdr_autogen_compiler.cpp
|
${PROJECT_BINARY_DIR}/src/fdr/fdr_autogen_compiler.cpp
|
||||||
${CMAKE_BINARY_DIR}/src/fdr/teddy_autogen.c
|
${PROJECT_BINARY_DIR}/src/fdr/teddy_autogen.c
|
||||||
${CMAKE_BINARY_DIR}/src/fdr/teddy_autogen_compiler.cpp
|
${PROJECT_BINARY_DIR}/src/fdr/teddy_autogen_compiler.cpp
|
||||||
PARENT_SCOPE)
|
PARENT_SCOPE)
|
||||||
|
|
||||||
set_source_files_properties(${fdr_GENERATED_SRC} PROPERTIES GENERATED TRUE)
|
set_source_files_properties(${fdr_GENERATED_SRC} PROPERTIES GENERATED TRUE)
|
||||||
|
@ -54,16 +54,11 @@ def produce_fdr_compiles(l):
|
|||||||
|
|
||||||
def build_fdr_matchers():
|
def build_fdr_matchers():
|
||||||
all_matchers = [ ]
|
all_matchers = [ ]
|
||||||
domains = [8, 10, 11, 12, 13]
|
strides = [ 1, 2, 4 ]
|
||||||
big_domains = [ 14, 15 ]
|
|
||||||
|
|
||||||
common = { "state_width" : 128, "num_buckets" : 8, "extract_frequency" : 8, "arch" : arch_x86_64 }
|
common = { "state_width" : 128, "num_buckets" : 8, "extract_frequency" : 8, "arch" : arch_x86_64 }
|
||||||
for d in domains:
|
for s in strides:
|
||||||
all_matchers += [ M3(stride = 1, domain = d, **common) ]
|
all_matchers += [ M3(stride = s, **common) ]
|
||||||
all_matchers += [ M3(stride = 2, domain = d, **common) ]
|
|
||||||
all_matchers += [ M3(stride = 4, domain = d, **common) ]
|
|
||||||
for d in big_domains:
|
|
||||||
all_matchers += [ M3(stride = 1, domain = d, **common) ]
|
|
||||||
|
|
||||||
return all_matchers
|
return all_matchers
|
||||||
|
|
||||||
|
@ -40,27 +40,6 @@
|
|||||||
#include "fdr_confirm_runtime.h"
|
#include "fdr_confirm_runtime.h"
|
||||||
#include "fdr_streaming_runtime.h"
|
#include "fdr_streaming_runtime.h"
|
||||||
#include "fdr_loadval.h"
|
#include "fdr_loadval.h"
|
||||||
|
|
||||||
static really_inline UNUSED
|
|
||||||
u32 getPreStartVal(const struct FDR_Runtime_Args *a, u32 numBits) {
|
|
||||||
u32 r = 0;
|
|
||||||
if (a->start_offset == 0) {
|
|
||||||
if (numBits <= 8) {
|
|
||||||
r = a->buf_history[a->len_history - 1];
|
|
||||||
} else {
|
|
||||||
r = a->buf_history[a->len_history - 1];
|
|
||||||
r |= (a->buf[0] << 8);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (numBits <= 8) {
|
|
||||||
r = a->buf[a->start_offset - 1];
|
|
||||||
} else {
|
|
||||||
r = lv_u16(a->buf + a->start_offset - 1, a->buf, a->buf + a->len);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return r & ((1 << numBits) - 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
#include "fdr_autogen.c"
|
#include "fdr_autogen.c"
|
||||||
|
|
||||||
#define FAKE_HISTORY_SIZE 16
|
#define FAKE_HISTORY_SIZE 16
|
||||||
|
@ -74,7 +74,7 @@ class ValueExtractStep(Step):
|
|||||||
dsb = m.datasize_bytes
|
dsb = m.datasize_bytes
|
||||||
modval = offset % dsb
|
modval = offset % dsb
|
||||||
|
|
||||||
if m.domain > 8 and modval == dsb - 1:
|
if modval == dsb - 1:
|
||||||
# Case 1: reading more than one byte over the end of the bulk load
|
# Case 1: reading more than one byte over the end of the bulk load
|
||||||
|
|
||||||
self.latency = 4
|
self.latency = 4
|
||||||
@ -101,7 +101,7 @@ class ValueExtractStep(Step):
|
|||||||
temp_string = "(%s >> %d)" % (lb_var.name, modval*8 - m.reach_shift_adjust)
|
temp_string = "(%s >> %d)" % (lb_var.name, modval*8 - m.reach_shift_adjust)
|
||||||
|
|
||||||
|
|
||||||
init_string = "(%s) & 0x%x" % (temp_string, m.reach_mask)
|
init_string = "(%s) & (domain_mask << %d)" % (temp_string, m.reach_shift_adjust)
|
||||||
v_var = self.nv(m.value_extract_type, "v%d" % offset)
|
v_var = self.nv(m.value_extract_type, "v%d" % offset)
|
||||||
self.val = v_var.gen_initializer_stmt(init_string)
|
self.val = v_var.gen_initializer_stmt(init_string)
|
||||||
|
|
||||||
@ -173,14 +173,10 @@ class ConfirmStep(Step):
|
|||||||
enable_confirmless = m.stride == 1, do_bailout = False)
|
enable_confirmless = m.stride == 1, do_bailout = False)
|
||||||
|
|
||||||
class M3(MatcherBase):
|
class M3(MatcherBase):
|
||||||
def get_hash_safety_parameters(self):
|
|
||||||
h_size = self.single_load_type.size_in_bytes()
|
|
||||||
return (0, h_size - 1)
|
|
||||||
|
|
||||||
def produce_compile_call(self):
|
def produce_compile_call(self):
|
||||||
print " { %d, %d, %d, %d, %d, %s, %d, %d }," % (
|
print " { %d, %d, %d, %d, %s, %d, %d }," % (
|
||||||
self.id, self.state_width, self.num_buckets,
|
self.id, self.state_width, self.num_buckets,
|
||||||
self.stride, self.domain,
|
self.stride,
|
||||||
self.arch.target, self.conf_pull_back, self.conf_top_level_split)
|
self.arch.target, self.conf_pull_back, self.conf_top_level_split)
|
||||||
|
|
||||||
def produce_main_loop(self, switch_variant = False):
|
def produce_main_loop(self, switch_variant = False):
|
||||||
@ -351,7 +347,14 @@ class M3(MatcherBase):
|
|||||||
s = Template("""
|
s = Template("""
|
||||||
$TYPENAME s;
|
$TYPENAME s;
|
||||||
if (a->len_history) {
|
if (a->len_history) {
|
||||||
u32 tmp = getPreStartVal(a, $DOMAIN);
|
u32 tmp = 0;
|
||||||
|
if (a->start_offset == 0) {
|
||||||
|
tmp = a->buf_history[a->len_history - 1];
|
||||||
|
tmp |= (a->buf[0] << 8);
|
||||||
|
} else {
|
||||||
|
tmp = lv_u16(a->buf + a->start_offset - 1, a->buf, a->buf + a->len);
|
||||||
|
}
|
||||||
|
tmp &= fdr->domainMask;
|
||||||
s = *((const $TYPENAME *)ft + tmp);
|
s = *((const $TYPENAME *)ft + tmp);
|
||||||
$SHIFT_EXPR;
|
$SHIFT_EXPR;
|
||||||
} else {
|
} else {
|
||||||
@ -359,15 +362,13 @@ class M3(MatcherBase):
|
|||||||
}
|
}
|
||||||
""").substitute(TYPENAME = s_type.get_name(),
|
""").substitute(TYPENAME = s_type.get_name(),
|
||||||
ZERO_EXPR = s_type.zero_expression(),
|
ZERO_EXPR = s_type.zero_expression(),
|
||||||
DOMAIN = self.domain,
|
|
||||||
SHIFT_EXPR = shift_expr)
|
SHIFT_EXPR = shift_expr)
|
||||||
return s
|
return s
|
||||||
|
|
||||||
def produce_code(self):
|
def produce_code(self):
|
||||||
|
|
||||||
(behind, ahead) = self.get_hash_safety_parameters()
|
loop_read_behind = 0
|
||||||
loop_read_behind = behind
|
loop_read_ahead = self.loop_bytes + 1
|
||||||
loop_read_ahead = self.loop_bytes + ahead
|
|
||||||
|
|
||||||
# we set up mask and shift stuff for extracting our masks from registers
|
# we set up mask and shift stuff for extracting our masks from registers
|
||||||
#
|
#
|
||||||
@ -380,7 +381,7 @@ class M3(MatcherBase):
|
|||||||
ssb = self.state_type.size / 8 # state size in bytes
|
ssb = self.state_type.size / 8 # state size in bytes
|
||||||
|
|
||||||
# Intel path
|
# Intel path
|
||||||
if ssb == 16 and self.domain == 16:
|
if ssb == 16:
|
||||||
# obscure corner - we don't have the room in the register to
|
# obscure corner - we don't have the room in the register to
|
||||||
# do this for all values so we don't. domain==16 is pretty
|
# do this for all values so we don't. domain==16 is pretty
|
||||||
# bad anyhow, of course
|
# bad anyhow, of course
|
||||||
@ -390,7 +391,6 @@ class M3(MatcherBase):
|
|||||||
|
|
||||||
shift_amts = { 1 : 0, 2 : 1, 4 : 2, 8 : 3, 16: 4 }
|
shift_amts = { 1 : 0, 2 : 1, 4 : 2, 8 : 3, 16: 4 }
|
||||||
self.reach_shift_adjust = shift_amts[ ssb/self.reach_mult ]
|
self.reach_shift_adjust = shift_amts[ ssb/self.reach_mult ]
|
||||||
self.reach_mask = ((1 << self.domain) - 1) << self.reach_shift_adjust
|
|
||||||
|
|
||||||
print self.produce_header(visible = False)
|
print self.produce_header(visible = False)
|
||||||
|
|
||||||
@ -398,21 +398,19 @@ class M3(MatcherBase):
|
|||||||
print " Arch: " + self.arch.name,
|
print " Arch: " + self.arch.name,
|
||||||
print " State type: " + self.state_type.get_name(),
|
print " State type: " + self.state_type.get_name(),
|
||||||
print " Num buckets: %d" % self.num_buckets,
|
print " Num buckets: %d" % self.num_buckets,
|
||||||
print " Domain: %d" % self.domain,
|
|
||||||
print " Stride: %d" % self.stride
|
print " Stride: %d" % self.stride
|
||||||
|
|
||||||
print self.produce_common_declarations()
|
print self.produce_common_declarations()
|
||||||
print
|
|
||||||
|
|
||||||
print "\tconst size_t tabSize = %d;" % self.table_size
|
print " assert(fdr->domain > 8 && fdr->domain < 16);"
|
||||||
print """
|
print
|
||||||
const u8 * ft = (const u8 *)fdr + ROUNDUP_16(sizeof(struct FDR));
|
print " u64a domain_mask = fdr->domainMask;"
|
||||||
const u32 * confBase = (const u32 *)(ft + tabSize);
|
print " const u8 * ft = (const u8 *)fdr + ROUNDUP_16(sizeof(struct FDR));"
|
||||||
"""
|
print " const u32 * confBase = (const u32 *)(ft + fdr->tabSize);"
|
||||||
print self.produce_init_state()
|
print self.produce_init_state()
|
||||||
print "\tconst size_t iterBytes = %d;" % self.loop_bytes
|
print " const size_t iterBytes = %d;" % self.loop_bytes
|
||||||
print "\tconst size_t START_MOD = %d;" % self.datasize_bytes
|
print " const size_t START_MOD = %d;" % self.datasize_bytes
|
||||||
print "\tconst size_t LOOP_READ_AHEAD = %d;" % loop_read_ahead
|
print " const size_t LOOP_READ_AHEAD = %d;" % loop_read_ahead
|
||||||
|
|
||||||
print """
|
print """
|
||||||
while (ptr < buf + len) {
|
while (ptr < buf + len) {
|
||||||
@ -451,9 +449,9 @@ class M3(MatcherBase):
|
|||||||
print self.produce_footer()
|
print self.produce_footer()
|
||||||
|
|
||||||
def get_name(self):
|
def get_name(self):
|
||||||
return "fdr_exec_%s_d%d_s%d_w%d" % (self.arch.name, self.domain, self.stride, self.state_width)
|
return "fdr_exec_%s_s%d_w%d" % (self.arch.name, self.stride, self.state_width)
|
||||||
|
|
||||||
def __init__(self, state_width, domain, stride,
|
def __init__(self, state_width, stride,
|
||||||
arch,
|
arch,
|
||||||
table_state_width = None,
|
table_state_width = None,
|
||||||
num_buckets = 8,
|
num_buckets = 8,
|
||||||
@ -474,17 +472,9 @@ class M3(MatcherBase):
|
|||||||
self.table_state_width = state_width
|
self.table_state_width = state_width
|
||||||
self.table_state_type = getRequiredType(self.table_state_width)
|
self.table_state_type = getRequiredType(self.table_state_width)
|
||||||
|
|
||||||
# domain is the number of bits that we draw from our input to
|
# this is the load type required for domain [9:15] if we want to
|
||||||
# index our 'reach' table
|
|
||||||
if not 8 <= domain <= 16:
|
|
||||||
fail_out("Unsupported domain: %d" % domain)
|
|
||||||
self.domain = domain
|
|
||||||
# this is the load type required for this domain if we want to
|
|
||||||
# load it one at a time
|
# load it one at a time
|
||||||
self.single_load_type = getRequiredType(self.domain)
|
self.single_load_type = IntegerType(16)
|
||||||
|
|
||||||
# table size
|
|
||||||
self.table_size = 2**domain * table_state_width // 8
|
|
||||||
|
|
||||||
# stride is the frequency with which we make data-driven
|
# stride is the frequency with which we make data-driven
|
||||||
# accesses to our reach table
|
# accesses to our reach table
|
||||||
|
@ -184,6 +184,13 @@ aligned_unique_ptr<FDR> FDRCompiler::setupFDR(pair<u8 *, size_t> link) {
|
|||||||
ptr += floodControlTmp.second;
|
ptr += floodControlTmp.second;
|
||||||
aligned_free(floodControlTmp.first);
|
aligned_free(floodControlTmp.first);
|
||||||
|
|
||||||
|
/* we are allowing domains 9 to 15 only */
|
||||||
|
assert(eng.bits > 8 && eng.bits < 16);
|
||||||
|
fdr->domain = eng.bits;
|
||||||
|
fdr->schemeWidthByte = eng.schemeWidth / 8;
|
||||||
|
fdr->domainMask = (1 << eng.bits) - 1;
|
||||||
|
fdr->tabSize = (1 << eng.bits) * fdr->schemeWidthByte;
|
||||||
|
|
||||||
if (link.first) {
|
if (link.first) {
|
||||||
fdr->link = verify_u32(ptr - fdr_base);
|
fdr->link = verify_u32(ptr - fdr_base);
|
||||||
memcpy(ptr, link.first, link.second);
|
memcpy(ptr, link.first, link.second);
|
||||||
@ -245,6 +252,8 @@ void FDRCompiler::assignStringsToBuckets() {
|
|||||||
typedef pair<SCORE, u32> SCORE_INDEX_PAIR;
|
typedef pair<SCORE, u32> SCORE_INDEX_PAIR;
|
||||||
|
|
||||||
u32 ls = verify_u32(lits.size());
|
u32 ls = verify_u32(lits.size());
|
||||||
|
assert(ls); // Shouldn't be called with no literals.
|
||||||
|
|
||||||
// make a vector that contains our literals as pointers or u32 LiteralIndex values
|
// make a vector that contains our literals as pointers or u32 LiteralIndex values
|
||||||
vector<LiteralIndex> vli;
|
vector<LiteralIndex> vli;
|
||||||
vli.resize(ls);
|
vli.resize(ls);
|
||||||
@ -292,6 +301,8 @@ void FDRCompiler::assignStringsToBuckets() {
|
|||||||
currentChunk++;
|
currentChunk++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
assert(currentChunk > 0);
|
||||||
count[currentChunk - 1] = ls - chunkStartID;
|
count[currentChunk - 1] = ls - chunkStartID;
|
||||||
// close off chunks with an empty row
|
// close off chunks with an empty row
|
||||||
firstIds[currentChunk] = ls;
|
firstIds[currentChunk] = ls;
|
||||||
@ -383,12 +394,14 @@ bool getMultiEntriesAtPosition(const FDREngineDescription &eng,
|
|||||||
const vector<hwlmLiteral> &lits,
|
const vector<hwlmLiteral> &lits,
|
||||||
SuffixPositionInString pos,
|
SuffixPositionInString pos,
|
||||||
std::map<u32, ue2::unordered_set<u32> > &m2) {
|
std::map<u32, ue2::unordered_set<u32> > &m2) {
|
||||||
|
assert(eng.bits < 32);
|
||||||
|
|
||||||
u32 distance = 0;
|
u32 distance = 0;
|
||||||
if (eng.bits <= 8) {
|
if (eng.bits <= 8) {
|
||||||
distance = 1;
|
distance = 1;
|
||||||
} else if (eng.bits <= 16) {
|
} else if (eng.bits <= 16) {
|
||||||
distance = 2;
|
distance = 2;
|
||||||
} else if (eng.bits <= 32) {
|
} else {
|
||||||
distance = 4;
|
distance = 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -528,6 +541,11 @@ fdrBuildTableInternal(const vector<hwlmLiteral> &lits, bool make_small,
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// temporary hack for unit testing
|
||||||
|
if (hint != HINT_INVALID) {
|
||||||
|
des->bits = 9;
|
||||||
|
}
|
||||||
|
|
||||||
FDRCompiler fc(lits, *des, make_small);
|
FDRCompiler fc(lits, *des, make_small);
|
||||||
return fc.build(link);
|
return fc.build(link);
|
||||||
}
|
}
|
||||||
|
@ -81,6 +81,7 @@ void fdrPrintStats(const FDR *fdr, FILE *f) {
|
|||||||
unique_ptr<FDREngineDescription> des =
|
unique_ptr<FDREngineDescription> des =
|
||||||
getFdrDescription(fdr->engineID);
|
getFdrDescription(fdr->engineID);
|
||||||
if (des) {
|
if (des) {
|
||||||
|
fprintf(f, " domain %u\n", des->bits);
|
||||||
fprintf(f, " stride %u\n", des->stride);
|
fprintf(f, " stride %u\n", des->stride);
|
||||||
fprintf(f, " buckets %u\n", des->getNumBuckets());
|
fprintf(f, " buckets %u\n", des->getNumBuckets());
|
||||||
fprintf(f, " width %u\n", des->schemeWidth);
|
fprintf(f, " width %u\n", des->schemeWidth);
|
||||||
|
@ -48,7 +48,7 @@ FDREngineDescription::FDREngineDescription(const FDREngineDef &def)
|
|||||||
: EngineDescription(def.id, targetByArchFeatures(def.cpu_features),
|
: EngineDescription(def.id, targetByArchFeatures(def.cpu_features),
|
||||||
def.numBuckets, def.confirmPullBackDistance,
|
def.numBuckets, def.confirmPullBackDistance,
|
||||||
def.confirmTopLevelSplit),
|
def.confirmTopLevelSplit),
|
||||||
schemeWidth(def.schemeWidth), stride(def.stride), bits(def.bits) {}
|
schemeWidth(def.schemeWidth), stride(def.stride), bits(0) {}
|
||||||
|
|
||||||
u32 FDREngineDescription::getDefaultFloodSuffixLength() const {
|
u32 FDREngineDescription::getDefaultFloodSuffixLength() const {
|
||||||
// rounding up, so that scheme width 32 and 6 buckets is 6 not 5!
|
// rounding up, so that scheme width 32 and 6 buckets is 6 not 5!
|
||||||
@ -105,11 +105,16 @@ unique_ptr<FDREngineDescription> chooseEngine(const target_t &target,
|
|||||||
DEBUG_PRINTF("%zu lits, msl=%zu, desiredStride=%u\n", vl.size(), msl,
|
DEBUG_PRINTF("%zu lits, msl=%zu, desiredStride=%u\n", vl.size(), msl,
|
||||||
desiredStride);
|
desiredStride);
|
||||||
|
|
||||||
const FDREngineDescription *best = nullptr;
|
FDREngineDescription *best = nullptr;
|
||||||
u32 best_score = 0;
|
u32 best_score = 0;
|
||||||
|
|
||||||
|
for (u32 domain = 9; domain <= 15; domain++) {
|
||||||
for (size_t engineID = 0; engineID < allDescs.size(); engineID++) {
|
for (size_t engineID = 0; engineID < allDescs.size(); engineID++) {
|
||||||
const FDREngineDescription &eng = allDescs[engineID];
|
// to make sure that domains >=14 have stride 1 according to origin
|
||||||
|
if (domain > 13 && engineID > 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
FDREngineDescription &eng = allDescs[engineID];
|
||||||
if (!eng.isValidOnTarget(target)) {
|
if (!eng.isValidOnTarget(target)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -165,7 +170,7 @@ unique_ptr<FDREngineDescription> chooseEngine(const target_t &target,
|
|||||||
ideal -= 2;
|
ideal -= 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
score -= absdiff(ideal, eng.bits);
|
score -= absdiff(ideal, domain);
|
||||||
|
|
||||||
DEBUG_PRINTF("fdr %u: width=%u, bits=%u, buckets=%u, stride=%u "
|
DEBUG_PRINTF("fdr %u: width=%u, bits=%u, buckets=%u, stride=%u "
|
||||||
"-> score=%u\n",
|
"-> score=%u\n",
|
||||||
@ -173,10 +178,12 @@ unique_ptr<FDREngineDescription> chooseEngine(const target_t &target,
|
|||||||
eng.getNumBuckets(), eng.stride, score);
|
eng.getNumBuckets(), eng.stride, score);
|
||||||
|
|
||||||
if (!best || score > best_score) {
|
if (!best || score > best_score) {
|
||||||
|
eng.bits = domain;
|
||||||
best = ŋ
|
best = ŋ
|
||||||
best_score = score;
|
best_score = score;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (!best) {
|
if (!best) {
|
||||||
DEBUG_PRINTF("failed to find engine\n");
|
DEBUG_PRINTF("failed to find engine\n");
|
||||||
|
@ -43,7 +43,6 @@ struct FDREngineDef {
|
|||||||
u32 schemeWidth;
|
u32 schemeWidth;
|
||||||
u32 numBuckets;
|
u32 numBuckets;
|
||||||
u32 stride;
|
u32 stride;
|
||||||
u32 bits;
|
|
||||||
u64a cpu_features;
|
u64a cpu_features;
|
||||||
u32 confirmPullBackDistance;
|
u32 confirmPullBackDistance;
|
||||||
u32 confirmTopLevelSplit;
|
u32 confirmTopLevelSplit;
|
||||||
|
@ -76,9 +76,11 @@ struct FDR {
|
|||||||
* structures (spillover strings and hash table) if we're a secondary
|
* structures (spillover strings and hash table) if we're a secondary
|
||||||
* structure. */
|
* structure. */
|
||||||
u32 link;
|
u32 link;
|
||||||
|
u8 domain; /* dynamic domain info */
|
||||||
|
u8 schemeWidthByte; /* scheme width in bytes */
|
||||||
|
u16 domainMask; /* pre-computed domain mask */
|
||||||
|
u32 tabSize; /* pre-computed hashtable size in bytes */
|
||||||
u32 pad1;
|
u32 pad1;
|
||||||
u32 pad2;
|
|
||||||
u32 pad3;
|
|
||||||
|
|
||||||
union {
|
union {
|
||||||
u32 s_u32;
|
u32 s_u32;
|
||||||
|
@ -58,11 +58,13 @@
|
|||||||
#include <boost/range/adaptor/map.hpp>
|
#include <boost/range/adaptor/map.hpp>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
using boost::adaptors::map_keys;
|
||||||
using boost::adaptors::map_values;
|
using boost::adaptors::map_values;
|
||||||
|
|
||||||
namespace ue2 {
|
namespace ue2 {
|
||||||
|
|
||||||
#define CASTLE_MAX_TOPS 32
|
#define CASTLE_MAX_TOPS 32
|
||||||
|
#define CLIQUE_GRAPH_MAX_SIZE 1000
|
||||||
|
|
||||||
static
|
static
|
||||||
u32 depth_to_u32(const depth &d) {
|
u32 depth_to_u32(const depth &d) {
|
||||||
@ -106,51 +108,35 @@ void writeCastleScanEngine(const CharReach &cr, Castle *c) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
size_t literalOverlap(const vector<CharReach> &a, const vector<CharReach> &b) {
|
bool literalOverlap(const vector<CharReach> &a, const vector<CharReach> &b,
|
||||||
|
const size_t dist) {
|
||||||
for (size_t i = 0; i < b.size(); i++) {
|
for (size_t i = 0; i < b.size(); i++) {
|
||||||
|
if (i > dist) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
size_t overlap_len = b.size() - i;
|
size_t overlap_len = b.size() - i;
|
||||||
if (overlap_len <= a.size()) {
|
if (overlap_len <= a.size()) {
|
||||||
if (matches(a.end() - overlap_len, a.end(), b.begin(),
|
if (matches(a.end() - overlap_len, a.end(), b.begin(),
|
||||||
b.end() - i)) {
|
b.end() - i)) {
|
||||||
return i;
|
return false;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
assert(overlap_len > a.size());
|
assert(overlap_len > a.size());
|
||||||
if (matches(a.begin(), a.end(), b.end() - i - a.size(),
|
if (matches(a.begin(), a.end(), b.end() - i - a.size(),
|
||||||
b.end() - i)) {
|
b.end() - i)) {
|
||||||
return i;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return b.size();
|
return b.size() > dist;
|
||||||
}
|
}
|
||||||
|
|
||||||
// UE-2666 case 1: The problem of find largest exclusive subcastles group
|
|
||||||
// can be reformulated as finding the largest clique (subgraph where every
|
|
||||||
// vertex is connected to every other vertex) in the graph. We use an
|
|
||||||
// approximate algorithm here to find the maximum clique.
|
|
||||||
// References
|
|
||||||
// ----------
|
|
||||||
// [1] Boppana, R., & Halldórsson, M. M. (1992).
|
|
||||||
// Approximating maximum independent sets by excluding subgraphs.
|
|
||||||
// BIT Numerical Mathematics, 32(2), 180–196. Springer.
|
|
||||||
// doi:10.1007/BF01994876
|
|
||||||
// ----------
|
|
||||||
|
|
||||||
struct CliqueVertexProps {
|
struct CliqueVertexProps {
|
||||||
CliqueVertexProps() {}
|
CliqueVertexProps() {}
|
||||||
explicit CliqueVertexProps(u32 state_in) : stateId(state_in) {}
|
explicit CliqueVertexProps(u32 state_in) : stateId(state_in) {}
|
||||||
|
|
||||||
u32 stateId = ~0U;
|
u32 stateId = ~0U;
|
||||||
u32 parentId = ~0U;
|
|
||||||
bool leftChild = false; /* tells us if it is the left child of its parent */
|
|
||||||
bool rightChildVisited = false; /* tells us if its right child is visited */
|
|
||||||
|
|
||||||
vector<u32> clique1; /* clique for the left branch */
|
|
||||||
vector<u32> indepSet1; /* independent set for the left branch */
|
|
||||||
vector<u32> clique2; /* clique for the right branch */
|
|
||||||
vector<u32> indepSet2; /* independent set for the right branch */
|
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef boost::adjacency_list<boost::listS, boost::listS, boost::undirectedS,
|
typedef boost::adjacency_list<boost::listS, boost::listS, boost::undirectedS,
|
||||||
@ -158,181 +144,54 @@ typedef boost::adjacency_list<boost::listS, boost::listS, boost::undirectedS,
|
|||||||
typedef CliqueGraph::vertex_descriptor CliqueVertex;
|
typedef CliqueGraph::vertex_descriptor CliqueVertex;
|
||||||
|
|
||||||
static
|
static
|
||||||
unique_ptr<CliqueGraph> makeCG(const vector<vector<u32>> &exclusiveSet) {
|
void getNeighborInfo(const CliqueGraph &g, vector<u32> &neighbor,
|
||||||
u32 size = exclusiveSet.size();
|
const CliqueVertex &cv, const set<u32> &group) {
|
||||||
|
|
||||||
vector<CliqueVertex> vertices;
|
|
||||||
unique_ptr<CliqueGraph> cg = make_unique<CliqueGraph>();
|
|
||||||
for (u32 i = 0; i < size; ++i) {
|
|
||||||
CliqueVertex v = add_vertex(CliqueVertexProps(i), *cg);
|
|
||||||
vertices.push_back(v);
|
|
||||||
}
|
|
||||||
|
|
||||||
// construct the complement graph, then its maximum independent sets
|
|
||||||
// are equal to the maximum clique of the original graph
|
|
||||||
for (u32 i = 0; i < size; ++i) {
|
|
||||||
CliqueVertex s = vertices[i];
|
|
||||||
vector<u32> complement(size, 0);
|
|
||||||
for (u32 j = 0; j < exclusiveSet[i].size(); ++j) {
|
|
||||||
u32 val = exclusiveSet[i][j];
|
|
||||||
complement[val] = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (u32 k = i + 1; k < size; ++k) {
|
|
||||||
if (!complement[k]) {
|
|
||||||
CliqueVertex d = vertices[k];
|
|
||||||
add_edge(s, d, *cg);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return cg;
|
|
||||||
}
|
|
||||||
|
|
||||||
static
|
|
||||||
CliqueGraph createSubgraph(const CliqueGraph &cg,
|
|
||||||
const vector<CliqueVertex> &vertices) {
|
|
||||||
CliqueGraph g;
|
|
||||||
map<u32, CliqueVertex> vertexMap;
|
|
||||||
for (auto u : vertices) {
|
|
||||||
u32 id = cg[u].stateId;
|
|
||||||
CliqueVertex v = add_vertex(CliqueVertexProps(id), g);
|
|
||||||
vertexMap[id] = v;
|
|
||||||
}
|
|
||||||
|
|
||||||
set<u32> found;
|
|
||||||
for (auto u : vertices) {
|
|
||||||
u32 srcId = cg[u].stateId;
|
|
||||||
CliqueVertex src = vertexMap[srcId];
|
|
||||||
found.insert(srcId);
|
|
||||||
for (auto n : adjacent_vertices_range(u, cg)) {
|
|
||||||
u32 dstId = cg[n].stateId;
|
|
||||||
if (found.find(dstId) == found.end() &&
|
|
||||||
vertexMap.find(dstId) != vertexMap.end()) {
|
|
||||||
CliqueVertex dst = vertexMap[dstId];
|
|
||||||
add_edge(src, dst, g);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return g;
|
|
||||||
}
|
|
||||||
|
|
||||||
static
|
|
||||||
void getNeighborInfo(const CliqueGraph &g, vector<CliqueVertex> &neighbor,
|
|
||||||
vector<CliqueVertex> &nonneighbor,
|
|
||||||
const CliqueVertex &cv) {
|
|
||||||
u32 id = g[cv].stateId;
|
u32 id = g[cv].stateId;
|
||||||
ue2::unordered_set<u32> neighborId;
|
ue2::unordered_set<u32> neighborId;
|
||||||
|
|
||||||
// find neighbors for cv
|
// find neighbors for cv
|
||||||
for (auto v : adjacent_vertices_range(cv, g)) {
|
for (const auto &v : adjacent_vertices_range(cv, g)) {
|
||||||
neighbor.push_back(v);
|
if (g[v].stateId != id && contains(group, g[v].stateId)){
|
||||||
|
neighbor.push_back(g[v].stateId);
|
||||||
neighborId.insert(g[v].stateId);
|
neighborId.insert(g[v].stateId);
|
||||||
}
|
DEBUG_PRINTF("Neighbor:%u\n", g[v].stateId);
|
||||||
|
|
||||||
// find non-neighbors for cv
|
|
||||||
for (auto v : vertices_range(g)) {
|
|
||||||
if (g[v].stateId != id &&
|
|
||||||
neighborId.find(g[v].stateId) == neighborId.end()) {
|
|
||||||
nonneighbor.push_back(v);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void updateCliqueInfo(CliqueGraph &cg, const CliqueVertex &n,
|
void findCliqueGroup(CliqueGraph &cg, vector<u32> &clique) {
|
||||||
vector<u32> &clique, vector<u32> &indepSet) {
|
stack<vector<u32>> gStack;
|
||||||
u32 id = cg[n].stateId;
|
|
||||||
if (cg[n].clique1.size() + 1 > cg[n].clique2.size()) {
|
|
||||||
cg[n].clique1.push_back(id);
|
|
||||||
clique.swap(cg[n].clique1);
|
|
||||||
} else {
|
|
||||||
clique.swap(cg[n].clique2);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cg[n].indepSet2.size() + 1 > cg[n].indepSet1.size()) {
|
// Create mapping between vertex and id
|
||||||
cg[n].indepSet2.push_back(id);
|
|
||||||
indepSet.swap(cg[n].indepSet2);
|
|
||||||
} else {
|
|
||||||
indepSet.swap(cg[n].indepSet1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static
|
|
||||||
void findCliqueGroup(CliqueGraph &cg, vector<u32> &clique,
|
|
||||||
vector<u32> &indepSet) {
|
|
||||||
stack<CliqueGraph> gStack;
|
|
||||||
gStack.push(cg);
|
|
||||||
|
|
||||||
// create mapping between vertex and id
|
|
||||||
map<u32, CliqueVertex> vertexMap;
|
map<u32, CliqueVertex> vertexMap;
|
||||||
for (auto v : vertices_range(cg)) {
|
vector<u32> init;
|
||||||
|
for (const auto &v : vertices_range(cg)) {
|
||||||
vertexMap[cg[v].stateId] = v;
|
vertexMap[cg[v].stateId] = v;
|
||||||
|
init.push_back(cg[v].stateId);
|
||||||
}
|
}
|
||||||
|
gStack.push(init);
|
||||||
|
|
||||||
// get the vertex to start from
|
// Get the vertex to start from
|
||||||
ue2::unordered_set<u32> foundVertexId;
|
|
||||||
CliqueGraph::vertex_iterator vi, ve;
|
CliqueGraph::vertex_iterator vi, ve;
|
||||||
tie(vi, ve) = vertices(cg);
|
tie(vi, ve) = vertices(cg);
|
||||||
CliqueVertex start = *vi;
|
|
||||||
u32 startId = cg[start].stateId;
|
|
||||||
|
|
||||||
bool leftChild = false;
|
|
||||||
u32 prevId = startId;
|
|
||||||
while (!gStack.empty()) {
|
while (!gStack.empty()) {
|
||||||
CliqueGraph g = gStack.top();
|
vector<u32> g = gStack.top();
|
||||||
gStack.pop();
|
gStack.pop();
|
||||||
|
|
||||||
// choose a vertex from the graph
|
// Choose a vertex from the graph
|
||||||
tie(vi, ve) = vertices(g);
|
u32 id = g[0];
|
||||||
CliqueVertex cv = *vi;
|
const CliqueVertex &n = vertexMap.at(id);
|
||||||
u32 id = g[cv].stateId;
|
clique.push_back(id);
|
||||||
|
// Corresponding vertex in the original graph
|
||||||
// corresponding vertex in the original graph
|
vector<u32> neighbor;
|
||||||
CliqueVertex n = vertexMap.at(id);
|
set<u32> subgraphId(g.begin(), g.end());
|
||||||
|
getNeighborInfo(cg, neighbor, n, subgraphId);
|
||||||
vector<CliqueVertex> neighbor;
|
// Get graph consisting of neighbors for left branch
|
||||||
vector<CliqueVertex> nonneighbor;
|
|
||||||
getNeighborInfo(g, neighbor, nonneighbor, cv);
|
|
||||||
|
|
||||||
if (foundVertexId.find(id) != foundVertexId.end()) {
|
|
||||||
prevId = id;
|
|
||||||
// get graph consisting of non-neighbors for right branch
|
|
||||||
if (!cg[n].rightChildVisited) {
|
|
||||||
gStack.push(g);
|
|
||||||
if (!nonneighbor.empty()) {
|
|
||||||
const CliqueGraph &nSub = createSubgraph(g, nonneighbor);
|
|
||||||
gStack.push(nSub);
|
|
||||||
leftChild = false;
|
|
||||||
}
|
|
||||||
cg[n].rightChildVisited = true;
|
|
||||||
} else if (id != startId) {
|
|
||||||
// both the left and right branches are visited,
|
|
||||||
// update its parent's clique and independent sets
|
|
||||||
u32 parentId = cg[n].parentId;
|
|
||||||
CliqueVertex parent = vertexMap.at(parentId);
|
|
||||||
if (cg[n].leftChild) {
|
|
||||||
updateCliqueInfo(cg, n, cg[parent].clique1,
|
|
||||||
cg[parent].indepSet1);
|
|
||||||
} else {
|
|
||||||
updateCliqueInfo(cg, n, cg[parent].clique2,
|
|
||||||
cg[parent].indepSet2);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
foundVertexId.insert(id);
|
|
||||||
g[n].leftChild = leftChild;
|
|
||||||
g[n].parentId = prevId;
|
|
||||||
gStack.push(g);
|
|
||||||
// get graph consisting of neighbors for left branch
|
|
||||||
if (!neighbor.empty()) {
|
if (!neighbor.empty()) {
|
||||||
const CliqueGraph &sub = createSubgraph(g, neighbor);
|
gStack.push(neighbor);
|
||||||
gStack.push(sub);
|
|
||||||
leftChild = true;
|
|
||||||
}
|
|
||||||
prevId = id;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
updateCliqueInfo(cg, start, clique, indepSet);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Graph>
|
template<typename Graph>
|
||||||
@ -345,18 +204,17 @@ bool graph_empty(const Graph &g) {
|
|||||||
static
|
static
|
||||||
vector<u32> removeClique(CliqueGraph &cg) {
|
vector<u32> removeClique(CliqueGraph &cg) {
|
||||||
vector<vector<u32>> cliquesVec(1);
|
vector<vector<u32>> cliquesVec(1);
|
||||||
vector<vector<u32>> indepSetsVec(1);
|
|
||||||
DEBUG_PRINTF("graph size:%lu\n", num_vertices(cg));
|
DEBUG_PRINTF("graph size:%lu\n", num_vertices(cg));
|
||||||
findCliqueGroup(cg, cliquesVec[0], indepSetsVec[0]);
|
findCliqueGroup(cg, cliquesVec[0]);
|
||||||
while (!graph_empty(cg)) {
|
while (!graph_empty(cg)) {
|
||||||
const vector<u32> &c = cliquesVec.back();
|
const vector<u32> &c = cliquesVec.back();
|
||||||
vector<CliqueVertex> dead;
|
vector<CliqueVertex> dead;
|
||||||
for (auto v : vertices_range(cg)) {
|
for (const auto &v : vertices_range(cg)) {
|
||||||
if (find(c.begin(), c.end(), cg[v].stateId) != c.end()) {
|
if (find(c.begin(), c.end(), cg[v].stateId) != c.end()) {
|
||||||
dead.push_back(v);
|
dead.push_back(v);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (auto v : dead) {
|
for (const auto &v : dead) {
|
||||||
clear_vertex(v, cg);
|
clear_vertex(v, cg);
|
||||||
remove_vertex(v, cg);
|
remove_vertex(v, cg);
|
||||||
}
|
}
|
||||||
@ -364,30 +222,22 @@ vector<u32> removeClique(CliqueGraph &cg) {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
vector<u32> clique;
|
vector<u32> clique;
|
||||||
vector<u32> indepSet;
|
findCliqueGroup(cg, clique);
|
||||||
findCliqueGroup(cg, clique, indepSet);
|
|
||||||
cliquesVec.push_back(clique);
|
cliquesVec.push_back(clique);
|
||||||
indepSetsVec.push_back(indepSet);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// get the independent set with max size
|
// get the independent set with max size
|
||||||
size_t max = 0;
|
size_t max = 0;
|
||||||
size_t id = 0;
|
size_t id = 0;
|
||||||
for (size_t j = 0; j < indepSetsVec.size(); ++j) {
|
for (size_t j = 0; j < cliquesVec.size(); ++j) {
|
||||||
if (indepSetsVec[j].size() > max) {
|
if (cliquesVec[j].size() > max) {
|
||||||
max = indepSetsVec[j].size();
|
max = cliquesVec[j].size();
|
||||||
id = j;
|
id = j;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
DEBUG_PRINTF("clique size:%lu\n", indepSetsVec[id].size());
|
DEBUG_PRINTF("clique size:%lu\n", cliquesVec[id].size());
|
||||||
return indepSetsVec[id];
|
return cliquesVec[id];
|
||||||
}
|
|
||||||
|
|
||||||
static
|
|
||||||
vector<u32> findMaxClique(const vector<vector<u32>> &exclusiveSet) {
|
|
||||||
auto cg = makeCG(exclusiveSet);
|
|
||||||
return removeClique(*cg);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// if the location of any reset character in one literal are after
|
// if the location of any reset character in one literal are after
|
||||||
@ -401,10 +251,10 @@ bool findExclusivePair(const u32 id1, const u32 id2,
|
|||||||
const auto &triggers2 = triggers[id2];
|
const auto &triggers2 = triggers[id2];
|
||||||
for (u32 i = 0; i < triggers1.size(); ++i) {
|
for (u32 i = 0; i < triggers1.size(); ++i) {
|
||||||
for (u32 j = 0; j < triggers2.size(); ++j) {
|
for (u32 j = 0; j < triggers2.size(); ++j) {
|
||||||
size_t max_overlap1 = literalOverlap(triggers1[i], triggers2[j]);
|
if (!literalOverlap(triggers1[i], triggers2[j],
|
||||||
size_t max_overlap2 = literalOverlap(triggers2[j], triggers1[i]);
|
min_reset_dist[id2][j]) ||
|
||||||
if (max_overlap1 <= min_reset_dist[id2][j] ||
|
!literalOverlap(triggers2[j], triggers1[i],
|
||||||
max_overlap2 <= min_reset_dist[id1][i]) {
|
min_reset_dist[id1][i])) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -427,21 +277,26 @@ vector<u32> checkExclusion(const CharReach &cr,
|
|||||||
min_reset_dist.push_back(tmp_dist);
|
min_reset_dist.push_back(tmp_dist);
|
||||||
}
|
}
|
||||||
|
|
||||||
vector<vector<u32>> exclusiveSet;
|
vector<CliqueVertex> vertices;
|
||||||
|
unique_ptr<CliqueGraph> cg = make_unique<CliqueGraph>();
|
||||||
|
for (u32 i = 0; i < triggers.size(); ++i) {
|
||||||
|
CliqueVertex v = add_vertex(CliqueVertexProps(i), *cg);
|
||||||
|
vertices.push_back(v);
|
||||||
|
}
|
||||||
|
|
||||||
// find exclusive pair for each repeat
|
// find exclusive pair for each repeat
|
||||||
for (u32 i = 0; i < triggers.size(); ++i) {
|
for (u32 i = 0; i < triggers.size(); ++i) {
|
||||||
vector<u32> repeatIds;
|
CliqueVertex s = vertices[i];
|
||||||
for (u32 j = i + 1; j < triggers.size(); ++j) {
|
for (u32 j = i + 1; j < triggers.size(); ++j) {
|
||||||
if (findExclusivePair(i, j, min_reset_dist, triggers)) {
|
if (findExclusivePair(i, j, min_reset_dist, triggers)) {
|
||||||
repeatIds.push_back(j);
|
CliqueVertex d = vertices[j];
|
||||||
|
add_edge(s, d, *cg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
exclusiveSet.push_back(repeatIds);
|
|
||||||
DEBUG_PRINTF("Exclusive pair size:%lu\n", repeatIds.size());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// find the largest exclusive group
|
// find the largest exclusive group
|
||||||
return findMaxClique(exclusiveSet);
|
return removeClique(*cg);
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
@ -599,7 +454,7 @@ buildCastle(const CastleProto &proto,
|
|||||||
|
|
||||||
repeatInfoPair.push_back(make_pair(min_period, is_reset));
|
repeatInfoPair.push_back(make_pair(min_period, is_reset));
|
||||||
|
|
||||||
if (is_reset) {
|
if (is_reset && candidateRepeats.size() < CLIQUE_GRAPH_MAX_SIZE) {
|
||||||
candidateTriggers.push_back(triggers.at(top));
|
candidateTriggers.push_back(triggers.at(top));
|
||||||
candidateRepeats.push_back(i);
|
candidateRepeats.push_back(i);
|
||||||
}
|
}
|
||||||
@ -608,7 +463,7 @@ buildCastle(const CastleProto &proto,
|
|||||||
// Case 1: exclusive repeats
|
// Case 1: exclusive repeats
|
||||||
bool exclusive = false;
|
bool exclusive = false;
|
||||||
bool pureExclusive = false;
|
bool pureExclusive = false;
|
||||||
u8 activeIdxSize = 0;
|
u32 activeIdxSize = 0;
|
||||||
set<u32> exclusiveGroup;
|
set<u32> exclusiveGroup;
|
||||||
if (cc.grey.castleExclusive) {
|
if (cc.grey.castleExclusive) {
|
||||||
vector<u32> tmpGroup = checkExclusion(cr, candidateTriggers);
|
vector<u32> tmpGroup = checkExclusion(cr, candidateTriggers);
|
||||||
@ -617,7 +472,7 @@ buildCastle(const CastleProto &proto,
|
|||||||
// Case 1: mutual exclusive repeats group found, initialize state
|
// Case 1: mutual exclusive repeats group found, initialize state
|
||||||
// sizes
|
// sizes
|
||||||
exclusive = true;
|
exclusive = true;
|
||||||
activeIdxSize = calcPackedBytes(exclusiveSize);
|
activeIdxSize = calcPackedBytes(numRepeats + 1);
|
||||||
if (exclusiveSize == numRepeats) {
|
if (exclusiveSize == numRepeats) {
|
||||||
pureExclusive = true;
|
pureExclusive = true;
|
||||||
streamStateSize = 0;
|
streamStateSize = 0;
|
||||||
@ -665,7 +520,7 @@ buildCastle(const CastleProto &proto,
|
|||||||
c->numRepeats = verify_u32(subs.size());
|
c->numRepeats = verify_u32(subs.size());
|
||||||
c->exclusive = exclusive;
|
c->exclusive = exclusive;
|
||||||
c->pureExclusive = pureExclusive;
|
c->pureExclusive = pureExclusive;
|
||||||
c->activeIdxSize = activeIdxSize;
|
c->activeIdxSize = verify_u8(activeIdxSize);
|
||||||
|
|
||||||
writeCastleScanEngine(cr, c);
|
writeCastleScanEngine(cr, c);
|
||||||
|
|
||||||
@ -710,8 +565,8 @@ buildCastle(const CastleProto &proto,
|
|||||||
|
|
||||||
set<ReportID> all_reports(const CastleProto &proto) {
|
set<ReportID> all_reports(const CastleProto &proto) {
|
||||||
set<ReportID> reports;
|
set<ReportID> reports;
|
||||||
for (const PureRepeat &pr : proto.repeats | map_values) {
|
for (const ReportID &report : proto.report_map | map_keys) {
|
||||||
reports.insert(pr.reports.begin(), pr.reports.end());
|
reports.insert(report);
|
||||||
}
|
}
|
||||||
return reports;
|
return reports;
|
||||||
}
|
}
|
||||||
@ -732,10 +587,30 @@ depth findMaxWidth(const CastleProto &proto) {
|
|||||||
return max_width;
|
return max_width;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
depth findMinWidth(const CastleProto &proto, u32 top) {
|
||||||
|
if (!contains(proto.repeats, top)) {
|
||||||
|
assert(0); // should not happen
|
||||||
|
return depth::infinity();
|
||||||
|
}
|
||||||
|
return proto.repeats.at(top).bounds.min;
|
||||||
|
}
|
||||||
|
|
||||||
|
depth findMaxWidth(const CastleProto &proto, u32 top) {
|
||||||
|
if (!contains(proto.repeats, top)) {
|
||||||
|
assert(0); // should not happen
|
||||||
|
return depth(0);
|
||||||
|
}
|
||||||
|
return proto.repeats.at(top).bounds.max;
|
||||||
|
}
|
||||||
|
|
||||||
CastleProto::CastleProto(const PureRepeat &pr) {
|
CastleProto::CastleProto(const PureRepeat &pr) {
|
||||||
assert(pr.reach.any());
|
assert(pr.reach.any());
|
||||||
assert(pr.reports.size() == 1);
|
assert(pr.reports.size() == 1);
|
||||||
repeats.insert(make_pair(0, pr));
|
u32 top = 0;
|
||||||
|
repeats.emplace(top, pr);
|
||||||
|
for (const auto &report : pr.reports) {
|
||||||
|
report_map[report].insert(top);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const CharReach &CastleProto::reach() const {
|
const CharReach &CastleProto::reach() const {
|
||||||
@ -743,25 +618,29 @@ const CharReach &CastleProto::reach() const {
|
|||||||
return repeats.begin()->second.reach;
|
return repeats.begin()->second.reach;
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
|
||||||
u32 find_next_top(const map<u32, PureRepeat> &repeats) {
|
|
||||||
u32 top = 0;
|
|
||||||
for (; contains(repeats, top); top++) {
|
|
||||||
// pass
|
|
||||||
}
|
|
||||||
return top;
|
|
||||||
}
|
|
||||||
|
|
||||||
u32 CastleProto::add(const PureRepeat &pr) {
|
u32 CastleProto::add(const PureRepeat &pr) {
|
||||||
assert(repeats.size() < max_occupancy);
|
assert(repeats.size() < max_occupancy);
|
||||||
assert(pr.reach == reach());
|
assert(pr.reach == reach());
|
||||||
assert(pr.reports.size() == 1);
|
assert(pr.reports.size() == 1);
|
||||||
u32 top = find_next_top(repeats);
|
u32 top = next_top++;
|
||||||
DEBUG_PRINTF("selected unused top %u\n", top);
|
DEBUG_PRINTF("selected unused top %u\n", top);
|
||||||
repeats.insert(make_pair(top, pr));
|
assert(!contains(repeats, top));
|
||||||
|
repeats.emplace(top, pr);
|
||||||
|
for (const auto &report : pr.reports) {
|
||||||
|
report_map[report].insert(top);
|
||||||
|
}
|
||||||
return top;
|
return top;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CastleProto::erase(u32 top) {
|
||||||
|
DEBUG_PRINTF("erase top %u\n", top);
|
||||||
|
assert(contains(repeats, top));
|
||||||
|
repeats.erase(top);
|
||||||
|
for (auto &m : report_map) {
|
||||||
|
m.second.erase(top);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
u32 CastleProto::merge(const PureRepeat &pr) {
|
u32 CastleProto::merge(const PureRepeat &pr) {
|
||||||
assert(repeats.size() <= max_occupancy);
|
assert(repeats.size() <= max_occupancy);
|
||||||
assert(pr.reach == reach());
|
assert(pr.reach == reach());
|
||||||
@ -806,8 +685,7 @@ bool mergeCastle(CastleProto &c1, const CastleProto &c2,
|
|||||||
const u32 top = m.first;
|
const u32 top = m.first;
|
||||||
const PureRepeat &pr = m.second;
|
const PureRepeat &pr = m.second;
|
||||||
DEBUG_PRINTF("top %u\n", top);
|
DEBUG_PRINTF("top %u\n", top);
|
||||||
u32 new_top = find_next_top(c1.repeats);
|
u32 new_top = c1.add(pr);
|
||||||
c1.repeats.insert(make_pair(new_top, pr));
|
|
||||||
top_map[top] = new_top;
|
top_map[top] = new_top;
|
||||||
DEBUG_PRINTF("adding repeat: map %u->%u\n", top, new_top);
|
DEBUG_PRINTF("adding repeat: map %u->%u\n", top, new_top);
|
||||||
}
|
}
|
||||||
@ -823,12 +701,23 @@ void remapCastleTops(CastleProto &proto, map<u32, u32> &top_map) {
|
|||||||
for (const auto &m : proto.repeats) {
|
for (const auto &m : proto.repeats) {
|
||||||
const u32 top = m.first;
|
const u32 top = m.first;
|
||||||
const PureRepeat &pr = m.second;
|
const PureRepeat &pr = m.second;
|
||||||
u32 new_top = find_next_top(out);
|
u32 new_top = out.size();
|
||||||
out.insert(make_pair(new_top, pr));
|
out.emplace(new_top, pr);
|
||||||
top_map[top] = new_top;
|
top_map[top] = new_top;
|
||||||
}
|
}
|
||||||
|
|
||||||
proto.repeats.swap(out);
|
proto.repeats.swap(out);
|
||||||
|
|
||||||
|
// Remap report map.
|
||||||
|
proto.report_map.clear();
|
||||||
|
for (const auto &m : proto.repeats) {
|
||||||
|
const u32 top = m.first;
|
||||||
|
const PureRepeat &pr = m.second;
|
||||||
|
for (const auto &report : pr.reports) {
|
||||||
|
proto.report_map[report].insert(top);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
assert(proto.repeats.size() <= proto.max_occupancy);
|
assert(proto.repeats.size() <= proto.max_occupancy);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -904,19 +793,18 @@ bool is_equal(const CastleProto &c1, const CastleProto &c2) {
|
|||||||
return c1.repeats == c2.repeats;
|
return c1.repeats == c2.repeats;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool requiresDedupe(const CastleProto &proto, const set<ReportID> &reports) {
|
bool requiresDedupe(const CastleProto &proto,
|
||||||
ue2::unordered_set<ReportID> seen;
|
const ue2::flat_set<ReportID> &reports) {
|
||||||
for (const PureRepeat &pr : proto.repeats | map_values) {
|
for (const auto &report : reports) {
|
||||||
for (const ReportID &report : pr.reports) {
|
auto it = proto.report_map.find(report);
|
||||||
if (contains(reports, report)) {
|
if (it == end(proto.report_map)) {
|
||||||
if (contains(seen, report)) {
|
continue;
|
||||||
|
}
|
||||||
|
if (it->second.size() > 1) {
|
||||||
DEBUG_PRINTF("castle proto %p has dupe report %u\n", &proto,
|
DEBUG_PRINTF("castle proto %p has dupe report %u\n", &proto,
|
||||||
report);
|
report);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
seen.insert(report);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -38,6 +38,7 @@
|
|||||||
#include "nfagraph/ng_repeat.h"
|
#include "nfagraph/ng_repeat.h"
|
||||||
#include "util/alloc.h"
|
#include "util/alloc.h"
|
||||||
#include "util/depth.h"
|
#include "util/depth.h"
|
||||||
|
#include "util/ue2_containers.h"
|
||||||
|
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
@ -67,8 +68,12 @@ struct CastleProto {
|
|||||||
explicit CastleProto(const PureRepeat &pr);
|
explicit CastleProto(const PureRepeat &pr);
|
||||||
const CharReach &reach() const;
|
const CharReach &reach() const;
|
||||||
|
|
||||||
|
/** \brief Add a new repeat. */
|
||||||
u32 add(const PureRepeat &pr);
|
u32 add(const PureRepeat &pr);
|
||||||
|
|
||||||
|
/** \brief Remove a repeat. */
|
||||||
|
void erase(u32 top);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Merge in the given repeat, returning the top used.
|
* \brief Merge in the given repeat, returning the top used.
|
||||||
*
|
*
|
||||||
@ -80,11 +85,22 @@ struct CastleProto {
|
|||||||
|
|
||||||
/** \brief Mapping from unique top id to repeat. */
|
/** \brief Mapping from unique top id to repeat. */
|
||||||
std::map<u32, PureRepeat> repeats;
|
std::map<u32, PureRepeat> repeats;
|
||||||
|
|
||||||
|
/** \brief Mapping from report to associated tops. */
|
||||||
|
ue2::unordered_map<ReportID, flat_set<u32>> report_map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Next top id to use. Repeats may be removed without top remapping,
|
||||||
|
* so we track this explicitly instead of using repeats.size().
|
||||||
|
*/
|
||||||
|
u32 next_top = 1;
|
||||||
};
|
};
|
||||||
|
|
||||||
std::set<ReportID> all_reports(const CastleProto &proto);
|
std::set<ReportID> all_reports(const CastleProto &proto);
|
||||||
depth findMinWidth(const CastleProto &proto);
|
depth findMinWidth(const CastleProto &proto);
|
||||||
depth findMaxWidth(const CastleProto &proto);
|
depth findMaxWidth(const CastleProto &proto);
|
||||||
|
depth findMinWidth(const CastleProto &proto, u32 top);
|
||||||
|
depth findMaxWidth(const CastleProto &proto, u32 top);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Remap tops to be contiguous.
|
* \brief Remap tops to be contiguous.
|
||||||
@ -133,7 +149,8 @@ bool is_equal(const CastleProto &c1, const CastleProto &c2);
|
|||||||
* \brief True if the given castle contains more than a single instance of any
|
* \brief True if the given castle contains more than a single instance of any
|
||||||
* of the reports in the given set.
|
* of the reports in the given set.
|
||||||
*/
|
*/
|
||||||
bool requiresDedupe(const CastleProto &proto, const std::set<ReportID> &reports);
|
bool requiresDedupe(const CastleProto &proto,
|
||||||
|
const ue2::flat_set<ReportID> &reports);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Build an NGHolder from a CastleProto.
|
* \brief Build an NGHolder from a CastleProto.
|
||||||
|
@ -1136,16 +1136,11 @@ aligned_unique_ptr<NFA> goughCompile(raw_som_dfa &raw, u8 somPrecision,
|
|||||||
gough_dfa->length = gough_size;
|
gough_dfa->length = gough_size;
|
||||||
|
|
||||||
/* copy in blocks */
|
/* copy in blocks */
|
||||||
memcpy((u8 *)gough_dfa.get() + edge_prog_offset, &edge_blocks[0],
|
copy_bytes((u8 *)gough_dfa.get() + edge_prog_offset, edge_blocks);
|
||||||
byte_length(edge_blocks));
|
|
||||||
if (top_prog_offset) {
|
if (top_prog_offset) {
|
||||||
memcpy((u8 *)gough_dfa.get() + top_prog_offset, &top_blocks[0],
|
copy_bytes((u8 *)gough_dfa.get() + top_prog_offset, top_blocks);
|
||||||
byte_length(top_blocks));
|
|
||||||
}
|
|
||||||
if (!temp_blocks.empty()) {
|
|
||||||
memcpy((u8 *)gough_dfa.get() + prog_base_offset, &temp_blocks[0],
|
|
||||||
byte_length(temp_blocks));
|
|
||||||
}
|
}
|
||||||
|
copy_bytes((u8 *)gough_dfa.get() + prog_base_offset, temp_blocks);
|
||||||
|
|
||||||
return gough_dfa;
|
return gough_dfa;
|
||||||
}
|
}
|
||||||
|
@ -70,8 +70,11 @@ struct dstate_som {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct raw_som_dfa : public raw_dfa {
|
struct raw_som_dfa : public raw_dfa {
|
||||||
raw_som_dfa(nfa_kind k, bool unordered_som_triggers_in)
|
raw_som_dfa(nfa_kind k, bool unordered_som_triggers_in, u32 trigger,
|
||||||
: raw_dfa(k), unordered_som_triggers(unordered_som_triggers_in) {
|
u32 stream_som_loc_width_in)
|
||||||
|
: raw_dfa(k), stream_som_loc_width(stream_som_loc_width_in),
|
||||||
|
unordered_som_triggers(unordered_som_triggers_in),
|
||||||
|
trigger_nfa_state(trigger) {
|
||||||
assert(!unordered_som_triggers || is_triggered(kind));
|
assert(!unordered_som_triggers || is_triggered(kind));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1397,8 +1397,7 @@ struct Factory {
|
|||||||
repeat->horizon = rsi.horizon;
|
repeat->horizon = rsi.horizon;
|
||||||
repeat->packedCtrlSize = rsi.packedCtrlSize;
|
repeat->packedCtrlSize = rsi.packedCtrlSize;
|
||||||
repeat->stateSize = rsi.stateSize;
|
repeat->stateSize = rsi.stateSize;
|
||||||
memcpy(repeat->packedFieldSizes, rsi.packedFieldSizes.data(),
|
copy_bytes(repeat->packedFieldSizes, rsi.packedFieldSizes);
|
||||||
byte_length(rsi.packedFieldSizes));
|
|
||||||
repeat->patchCount = rsi.patchCount;
|
repeat->patchCount = rsi.patchCount;
|
||||||
repeat->patchSize = rsi.patchSize;
|
repeat->patchSize = rsi.patchSize;
|
||||||
repeat->encodingSize = rsi.encodingSize;
|
repeat->encodingSize = rsi.encodingSize;
|
||||||
@ -1413,8 +1412,7 @@ struct Factory {
|
|||||||
// Copy in the sparse lookup table.
|
// Copy in the sparse lookup table.
|
||||||
if (br.type == REPEAT_SPARSE_OPTIMAL_P) {
|
if (br.type == REPEAT_SPARSE_OPTIMAL_P) {
|
||||||
assert(!rsi.table.empty());
|
assert(!rsi.table.empty());
|
||||||
memcpy(info_ptr + tableOffset, rsi.table.data(),
|
copy_bytes(info_ptr + tableOffset, rsi.table);
|
||||||
byte_length(rsi.table));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fill the tug mask.
|
// Fill the tug mask.
|
||||||
@ -1702,6 +1700,7 @@ struct Factory {
|
|||||||
|
|
||||||
for (u32 i = 0; i < num_repeats; i++) {
|
for (u32 i = 0; i < num_repeats; i++) {
|
||||||
repeatOffsets[i] = offset;
|
repeatOffsets[i] = offset;
|
||||||
|
assert(repeats[i].first);
|
||||||
memcpy((char *)limex + offset, repeats[i].first.get(),
|
memcpy((char *)limex + offset, repeats[i].first.get(),
|
||||||
repeats[i].second);
|
repeats[i].second);
|
||||||
offset += repeats[i].second;
|
offset += repeats[i].second;
|
||||||
@ -1709,8 +1708,7 @@ struct Factory {
|
|||||||
|
|
||||||
// Write repeat offset lookup table.
|
// Write repeat offset lookup table.
|
||||||
assert(ISALIGNED_N((char *)limex + repeatOffsetsOffset, alignof(u32)));
|
assert(ISALIGNED_N((char *)limex + repeatOffsetsOffset, alignof(u32)));
|
||||||
memcpy((char *)limex + repeatOffsetsOffset, repeatOffsets.data(),
|
copy_bytes((char *)limex + repeatOffsetsOffset, repeatOffsets);
|
||||||
byte_length(repeatOffsets));
|
|
||||||
|
|
||||||
limex->repeatOffset = repeatOffsetsOffset;
|
limex->repeatOffset = repeatOffsetsOffset;
|
||||||
limex->repeatCount = num_repeats;
|
limex->repeatCount = num_repeats;
|
||||||
@ -1725,8 +1723,7 @@ struct Factory {
|
|||||||
limex->exReportOffset = exceptionReportsOffset;
|
limex->exReportOffset = exceptionReportsOffset;
|
||||||
assert(ISALIGNED_N((char *)limex + exceptionReportsOffset,
|
assert(ISALIGNED_N((char *)limex + exceptionReportsOffset,
|
||||||
alignof(ReportID)));
|
alignof(ReportID)));
|
||||||
memcpy((char *)limex + exceptionReportsOffset, reports.data(),
|
copy_bytes((char *)limex + exceptionReportsOffset, reports);
|
||||||
byte_length(reports));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
|
@ -317,7 +317,7 @@ template<typename limex_type>
|
|||||||
struct limex_labeller : public nfa_labeller {
|
struct limex_labeller : public nfa_labeller {
|
||||||
explicit limex_labeller(const limex_type *limex_in) : limex(limex_in) {}
|
explicit limex_labeller(const limex_type *limex_in) : limex(limex_in) {}
|
||||||
|
|
||||||
void label_state(FILE *f, u32 state) const {
|
void label_state(FILE *f, u32 state) const override {
|
||||||
const typename limex_traits<limex_type>::exception_type *exceptions
|
const typename limex_traits<limex_type>::exception_type *exceptions
|
||||||
= getExceptionTable(limex);
|
= getExceptionTable(limex);
|
||||||
if (!testbit((const u8 *)&limex->exceptionMask,
|
if (!testbit((const u8 *)&limex->exceptionMask,
|
||||||
|
@ -218,7 +218,8 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ,
|
|||||||
if (EQ_STATE(estate, LOAD_STATE(&ctx->cached_estate))) {
|
if (EQ_STATE(estate, LOAD_STATE(&ctx->cached_estate))) {
|
||||||
DEBUG_PRINTF("using cached succ from previous state\n");
|
DEBUG_PRINTF("using cached succ from previous state\n");
|
||||||
STORE_STATE(succ, OR_STATE(LOAD_STATE(succ), LOAD_STATE(&ctx->cached_esucc)));
|
STORE_STATE(succ, OR_STATE(LOAD_STATE(succ), LOAD_STATE(&ctx->cached_esucc)));
|
||||||
if (ctx->cached_reports) {
|
if (ctx->cached_reports && (flags & CALLBACK_OUTPUT)) {
|
||||||
|
DEBUG_PRINTF("firing cached reports from previous state\n");
|
||||||
if (unlikely(limexRunReports(ctx->cached_reports, ctx->callback,
|
if (unlikely(limexRunReports(ctx->cached_reports, ctx->callback,
|
||||||
ctx->context, offset)
|
ctx->context, offset)
|
||||||
== MO_HALT_MATCHING)) {
|
== MO_HALT_MATCHING)) {
|
||||||
|
@ -83,7 +83,8 @@ int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ,
|
|||||||
if (estate == ctx->cached_estate) {
|
if (estate == ctx->cached_estate) {
|
||||||
DEBUG_PRINTF("using cached succ from previous state\n");
|
DEBUG_PRINTF("using cached succ from previous state\n");
|
||||||
*succ |= ctx->cached_esucc;
|
*succ |= ctx->cached_esucc;
|
||||||
if (ctx->cached_reports) {
|
if (ctx->cached_reports && (flags & CALLBACK_OUTPUT)) {
|
||||||
|
DEBUG_PRINTF("firing cached reports from previous state\n");
|
||||||
if (unlikely(limexRunReports(ctx->cached_reports, ctx->callback,
|
if (unlikely(limexRunReports(ctx->cached_reports, ctx->callback,
|
||||||
ctx->context, offset)
|
ctx->context, offset)
|
||||||
== MO_HALT_MATCHING)) {
|
== MO_HALT_MATCHING)) {
|
||||||
@ -119,8 +120,10 @@ int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ,
|
|||||||
ctx->cached_reports = new_cache.reports;
|
ctx->cached_reports = new_cache.reports;
|
||||||
ctx->cached_br = new_cache.br;
|
ctx->cached_br = new_cache.br;
|
||||||
} else if (cacheable == DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES) {
|
} else if (cacheable == DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES) {
|
||||||
|
if (ctx->cached_br) {
|
||||||
ctx->cached_estate = 0U;
|
ctx->cached_estate = 0U;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -179,7 +179,6 @@ char STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length,
|
|||||||
assert(ISALIGNED_CL(ctx));
|
assert(ISALIGNED_CL(ctx));
|
||||||
assert(ISALIGNED_CL(&ctx->s));
|
assert(ISALIGNED_CL(&ctx->s));
|
||||||
STATE_T s = LOAD_STATE(&ctx->s);
|
STATE_T s = LOAD_STATE(&ctx->s);
|
||||||
STORE_STATE(&ctx->cached_estate, ZERO_STATE); /* TODO: understand why this is required */
|
|
||||||
|
|
||||||
/* assert(ISALIGNED_16(exceptions)); */
|
/* assert(ISALIGNED_16(exceptions)); */
|
||||||
/* assert(ISALIGNED_16(reach)); */
|
/* assert(ISALIGNED_16(reach)); */
|
||||||
@ -305,7 +304,6 @@ char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length,
|
|||||||
const ReportID *exReports = getExReports(limex);
|
const ReportID *exReports = getExReports(limex);
|
||||||
const u32 *exceptionMap = limex->exceptionMap;
|
const u32 *exceptionMap = limex->exceptionMap;
|
||||||
STATE_T s = LOAD_STATE(&ctx->s);
|
STATE_T s = LOAD_STATE(&ctx->s);
|
||||||
STORE_STATE(&ctx->cached_estate, ZERO_STATE); /* TODO: understand why this is required */
|
|
||||||
|
|
||||||
/* assert(ISALIGNED_16(exceptions)); */
|
/* assert(ISALIGNED_16(exceptions)); */
|
||||||
/* assert(ISALIGNED_16(reach)); */
|
/* assert(ISALIGNED_16(reach)); */
|
||||||
@ -542,7 +540,6 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) {
|
|||||||
ctx->callback = q->cb;
|
ctx->callback = q->cb;
|
||||||
ctx->context = q->context;
|
ctx->context = q->context;
|
||||||
STORE_STATE(&ctx->cached_estate, ZERO_STATE);
|
STORE_STATE(&ctx->cached_estate, ZERO_STATE);
|
||||||
STORE_STATE(&ctx->cached_esucc, ZERO_STATE);
|
|
||||||
|
|
||||||
assert(q->items[q->cur].location >= 0);
|
assert(q->items[q->cur].location >= 0);
|
||||||
DEBUG_PRINTF("LOAD STATE\n");
|
DEBUG_PRINTF("LOAD STATE\n");
|
||||||
@ -638,7 +635,6 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) {
|
|||||||
ctx->callback = q->cb;
|
ctx->callback = q->cb;
|
||||||
ctx->context = q->context;
|
ctx->context = q->context;
|
||||||
STORE_STATE(&ctx->cached_estate, ZERO_STATE);
|
STORE_STATE(&ctx->cached_estate, ZERO_STATE);
|
||||||
STORE_STATE(&ctx->cached_esucc, ZERO_STATE);
|
|
||||||
|
|
||||||
DEBUG_PRINTF("LOAD STATE\n");
|
DEBUG_PRINTF("LOAD STATE\n");
|
||||||
STORE_STATE(&ctx->s, LOAD_STATE(q->state));
|
STORE_STATE(&ctx->s, LOAD_STATE(q->state));
|
||||||
@ -730,7 +726,6 @@ char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q,
|
|||||||
ctx->callback = NULL;
|
ctx->callback = NULL;
|
||||||
ctx->context = NULL;
|
ctx->context = NULL;
|
||||||
STORE_STATE(&ctx->cached_estate, ZERO_STATE);
|
STORE_STATE(&ctx->cached_estate, ZERO_STATE);
|
||||||
STORE_STATE(&ctx->cached_esucc, ZERO_STATE);
|
|
||||||
|
|
||||||
DEBUG_PRINTF("LOAD STATE\n");
|
DEBUG_PRINTF("LOAD STATE\n");
|
||||||
STORE_STATE(&ctx->s, LOAD_STATE(q->state));
|
STORE_STATE(&ctx->s, LOAD_STATE(q->state));
|
||||||
@ -833,7 +828,6 @@ char JOIN(LIMEX_API_ROOT, _B_Reverse)(const struct NFA *n, u64a offset,
|
|||||||
ctx->callback = cb;
|
ctx->callback = cb;
|
||||||
ctx->context = context;
|
ctx->context = context;
|
||||||
STORE_STATE(&ctx->cached_estate, ZERO_STATE);
|
STORE_STATE(&ctx->cached_estate, ZERO_STATE);
|
||||||
STORE_STATE(&ctx->cached_esucc, ZERO_STATE);
|
|
||||||
|
|
||||||
const IMPL_NFA_T *limex = getImplNfa(n);
|
const IMPL_NFA_T *limex = getImplNfa(n);
|
||||||
STORE_STATE(&ctx->s, INITIAL_FN(limex, 0)); // always anchored
|
STORE_STATE(&ctx->s, INITIAL_FN(limex, 0)); // always anchored
|
||||||
|
@ -700,7 +700,10 @@ aligned_unique_ptr<NFA> mcclellanCompile16(dfa_info &info,
|
|||||||
ReportID arb;
|
ReportID arb;
|
||||||
u8 single;
|
u8 single;
|
||||||
u32 accelCount;
|
u32 accelCount;
|
||||||
|
|
||||||
u8 alphaShift = info.getAlphaShift();
|
u8 alphaShift = info.getAlphaShift();
|
||||||
|
assert(alphaShift <= 8);
|
||||||
|
|
||||||
u16 count_real_states;
|
u16 count_real_states;
|
||||||
if (allocateFSN16(info, &count_real_states)) {
|
if (allocateFSN16(info, &count_real_states)) {
|
||||||
DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n",
|
DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n",
|
||||||
@ -843,6 +846,7 @@ void fillInBasicState8(const dfa_info &info, mstate_aux *aux, u8 *succ_table,
|
|||||||
const vector<u32> &reports_eod, u32 i) {
|
const vector<u32> &reports_eod, u32 i) {
|
||||||
dstate_id_t j = info.implId(i);
|
dstate_id_t j = info.implId(i);
|
||||||
u8 alphaShift = info.getAlphaShift();
|
u8 alphaShift = info.getAlphaShift();
|
||||||
|
assert(alphaShift <= 8);
|
||||||
|
|
||||||
for (size_t s = 0; s < info.impl_alpha_size; s++) {
|
for (size_t s = 0; s < info.impl_alpha_size; s++) {
|
||||||
dstate_id_t raw_succ = info.states[i].next[s];
|
dstate_id_t raw_succ = info.states[i].next[s];
|
||||||
|
@ -70,9 +70,9 @@ void dumpKilo(FILE *f, const mpv *m, const mpv_kilopuff *k) {
|
|||||||
break;
|
break;
|
||||||
case MPV_VERM:
|
case MPV_VERM:
|
||||||
if (!ourisprint(k->u.verm.c)) {
|
if (!ourisprint(k->u.verm.c)) {
|
||||||
fprintf(f, "verm 0x%hhu\n", k->u.verm.c);
|
fprintf(f, "verm 0x%02x\n", k->u.verm.c);
|
||||||
} else {
|
} else {
|
||||||
fprintf(f, "verm 0x%hhu '%c'\n", k->u.verm.c, k->u.verm.c);
|
fprintf(f, "verm 0x%02x '%c'\n", k->u.verm.c, k->u.verm.c);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case MPV_SHUFTI:
|
case MPV_SHUFTI:
|
||||||
@ -87,9 +87,9 @@ void dumpKilo(FILE *f, const mpv *m, const mpv_kilopuff *k) {
|
|||||||
break;
|
break;
|
||||||
case MPV_NVERM:
|
case MPV_NVERM:
|
||||||
if (!ourisprint(k->u.verm.c)) {
|
if (!ourisprint(k->u.verm.c)) {
|
||||||
fprintf(f, "nverm 0x%hhu\n", k->u.verm.c);
|
fprintf(f, "nverm 0x%02x\n", k->u.verm.c);
|
||||||
} else {
|
} else {
|
||||||
fprintf(f, "nverm 0x%hhu '%c'\n", k->u.verm.c, k->u.verm.c);
|
fprintf(f, "nverm 0x%02x '%c'\n", k->u.verm.c, k->u.verm.c);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
@ -196,6 +196,14 @@ static really_inline s64a q_cur_loc(const struct mq *q) {
|
|||||||
return q->items[q->cur].location;
|
return q->items[q->cur].location;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** \brief Returns the type of the last event in the queue. */
|
||||||
|
static really_inline u32 q_last_type(const struct mq *q) {
|
||||||
|
assert(q->cur < q->end);
|
||||||
|
assert(q->end > 0);
|
||||||
|
assert(q->end <= MAX_MQE_LEN);
|
||||||
|
return q->items[q->end - 1].type;
|
||||||
|
}
|
||||||
|
|
||||||
/** \brief Returns the location (relative to the beginning of the current data
|
/** \brief Returns the location (relative to the beginning of the current data
|
||||||
* buffer) of the last event in the queue. */
|
* buffer) of the last event in the queue. */
|
||||||
static really_inline s64a q_last_loc(const struct mq *q) {
|
static really_inline s64a q_last_loc(const struct mq *q) {
|
||||||
@ -269,7 +277,7 @@ void debugQueue(const struct mq *q) {
|
|||||||
type = "MQE_TOP_N";
|
type = "MQE_TOP_N";
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
DEBUG_PRINTF("\tq[%u] %lld %d:%s\n", cur, q->items[cur].location,
|
DEBUG_PRINTF("\tq[%u] %lld %u:%s\n", cur, q->items[cur].location,
|
||||||
q->items[cur].type, type);
|
q->items[cur].type, type);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
106
src/nfa/repeat.c
106
src/nfa/repeat.c
@ -39,6 +39,8 @@
|
|||||||
#include "util/pack_bits.h"
|
#include "util/pack_bits.h"
|
||||||
#include "util/partial_store.h"
|
#include "util/partial_store.h"
|
||||||
#include "util/unaligned.h"
|
#include "util/unaligned.h"
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
/** \brief Returns the total capacity of the ring.
|
/** \brief Returns the total capacity of the ring.
|
||||||
@ -709,12 +711,7 @@ enum RepeatMatch repeatHasMatchRing(const struct RepeatInfo *info,
|
|||||||
dumpRing(info, xs, ring);
|
dumpRing(info, xs, ring);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// We work in terms of the distance between the current offset and the base
|
if (offset - xs->offset < info->repeatMin) {
|
||||||
// offset in our history.
|
|
||||||
u64a delta = offset - xs->offset;
|
|
||||||
DEBUG_PRINTF("delta=%llu\n", delta);
|
|
||||||
|
|
||||||
if (delta < info->repeatMin) {
|
|
||||||
DEBUG_PRINTF("haven't even seen repeatMin bytes yet!\n");
|
DEBUG_PRINTF("haven't even seen repeatMin bytes yet!\n");
|
||||||
return REPEAT_NOMATCH;
|
return REPEAT_NOMATCH;
|
||||||
}
|
}
|
||||||
@ -724,17 +721,22 @@ enum RepeatMatch repeatHasMatchRing(const struct RepeatInfo *info,
|
|||||||
return REPEAT_STALE;
|
return REPEAT_STALE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If we're not stale, delta fits in the range [repeatMin, lastTop +
|
||||||
|
// repeatMax], which fits in a u32.
|
||||||
|
assert(offset - xs->offset < UINT32_MAX);
|
||||||
|
u32 delta = (u32)(offset - xs->offset);
|
||||||
|
DEBUG_PRINTF("delta=%u\n", delta);
|
||||||
|
|
||||||
// Find the bounds on possible matches in the ring buffer.
|
// Find the bounds on possible matches in the ring buffer.
|
||||||
u64a lower = delta > info->repeatMax ? delta - info->repeatMax : 0;
|
u32 lower = delta > info->repeatMax ? delta - info->repeatMax : 0;
|
||||||
u64a upper = delta - info->repeatMin + 1;
|
u32 upper = MIN(delta - info->repeatMin + 1, ringOccupancy(xs, ringSize));
|
||||||
upper = MIN(upper, ringOccupancy(xs, ringSize));
|
|
||||||
|
|
||||||
if (lower >= upper) {
|
if (lower >= upper) {
|
||||||
DEBUG_PRINTF("no matches to check\n");
|
DEBUG_PRINTF("no matches to check\n");
|
||||||
return REPEAT_NOMATCH;
|
return REPEAT_NOMATCH;
|
||||||
}
|
}
|
||||||
|
|
||||||
DEBUG_PRINTF("possible match indices=[%llu,%llu]\n", lower, upper);
|
DEBUG_PRINTF("possible match indices=[%u,%u]\n", lower, upper);
|
||||||
if (ringHasMatch(xs, ring, ringSize, lower, upper)) {
|
if (ringHasMatch(xs, ring, ringSize, lower, upper)) {
|
||||||
return REPEAT_MATCH;
|
return REPEAT_MATCH;
|
||||||
}
|
}
|
||||||
@ -1163,7 +1165,7 @@ static
|
|||||||
void storeInitialRingTopPatch(const struct RepeatInfo *info,
|
void storeInitialRingTopPatch(const struct RepeatInfo *info,
|
||||||
struct RepeatRingControl *xs,
|
struct RepeatRingControl *xs,
|
||||||
u8 *state, u64a offset) {
|
u8 *state, u64a offset) {
|
||||||
DEBUG_PRINTF("set the first patch\n");
|
DEBUG_PRINTF("set the first patch, offset=%llu\n", offset);
|
||||||
xs->offset = offset;
|
xs->offset = offset;
|
||||||
|
|
||||||
u8 *active = state;
|
u8 *active = state;
|
||||||
@ -1197,12 +1199,10 @@ u32 getSparseOptimalTargetValue(const struct RepeatInfo *info,
|
|||||||
return loc;
|
return loc;
|
||||||
}
|
}
|
||||||
|
|
||||||
u64a repeatLastTopSparseOptimalP(const struct RepeatInfo *info,
|
static
|
||||||
const union RepeatControl *ctrl,
|
u64a sparseLastTop(const struct RepeatInfo *info,
|
||||||
const void *state) {
|
const struct RepeatRingControl *xs, const u8 *state) {
|
||||||
DEBUG_PRINTF("looking for last top\n");
|
DEBUG_PRINTF("looking for last top\n");
|
||||||
const struct RepeatRingControl *xs = &ctrl->ring;
|
|
||||||
|
|
||||||
u32 patch_size = info->patchSize;
|
u32 patch_size = info->patchSize;
|
||||||
u32 patch_count = info->patchCount;
|
u32 patch_count = info->patchCount;
|
||||||
u32 encoding_size = info->encodingSize;
|
u32 encoding_size = info->encodingSize;
|
||||||
@ -1214,7 +1214,7 @@ u64a repeatLastTopSparseOptimalP(const struct RepeatInfo *info,
|
|||||||
}
|
}
|
||||||
|
|
||||||
DEBUG_PRINTF("patch%u encoding_size%u occ%u\n", patch, encoding_size, occ);
|
DEBUG_PRINTF("patch%u encoding_size%u occ%u\n", patch, encoding_size, occ);
|
||||||
const u8 *ring = (const u8 *)state + info->patchesOffset;
|
const u8 *ring = state + info->patchesOffset;
|
||||||
u64a val = partial_load_u64a(ring + encoding_size * patch, encoding_size);
|
u64a val = partial_load_u64a(ring + encoding_size * patch, encoding_size);
|
||||||
|
|
||||||
DEBUG_PRINTF("val:%llu\n", val);
|
DEBUG_PRINTF("val:%llu\n", val);
|
||||||
@ -1231,6 +1231,12 @@ u64a repeatLastTopSparseOptimalP(const struct RepeatInfo *info,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u64a repeatLastTopSparseOptimalP(const struct RepeatInfo *info,
|
||||||
|
const union RepeatControl *ctrl,
|
||||||
|
const void *state) {
|
||||||
|
return sparseLastTop(info, &ctrl->ring, state);
|
||||||
|
}
|
||||||
|
|
||||||
u64a repeatNextMatchSparseOptimalP(const struct RepeatInfo *info,
|
u64a repeatNextMatchSparseOptimalP(const struct RepeatInfo *info,
|
||||||
const union RepeatControl *ctrl,
|
const union RepeatControl *ctrl,
|
||||||
const void *state, u64a offset) {
|
const void *state, u64a offset) {
|
||||||
@ -1249,13 +1255,13 @@ u64a repeatNextMatchSparseOptimalP(const struct RepeatInfo *info,
|
|||||||
if (nextOffset <= xs->offset + info->repeatMin) {
|
if (nextOffset <= xs->offset + info->repeatMin) {
|
||||||
patch = xs->first;
|
patch = xs->first;
|
||||||
tval = 0;
|
tval = 0;
|
||||||
} else if (nextOffset >
|
} else if (nextOffset > sparseLastTop(info, xs, state) + info->repeatMax) {
|
||||||
repeatLastTopSparseOptimalP(info, ctrl, state) +
|
DEBUG_PRINTF("ring is stale\n");
|
||||||
info->repeatMax) {
|
|
||||||
return 0;
|
return 0;
|
||||||
} else {
|
} else {
|
||||||
u64a delta = nextOffset - xs->offset;
|
assert(nextOffset - xs->offset < UINT32_MAX); // ring is not stale
|
||||||
u64a lower = delta > info->repeatMax ? delta - info->repeatMax : 0;
|
u32 delta = (u32)(nextOffset - xs->offset);
|
||||||
|
u32 lower = delta > info->repeatMax ? delta - info->repeatMax : 0;
|
||||||
patch = lower / patch_size;
|
patch = lower / patch_size;
|
||||||
tval = lower - patch * patch_size;
|
tval = lower - patch * patch_size;
|
||||||
}
|
}
|
||||||
@ -1336,21 +1342,32 @@ void repeatStoreSparseOptimalP(const struct RepeatInfo *info,
|
|||||||
union RepeatControl *ctrl, void *state,
|
union RepeatControl *ctrl, void *state,
|
||||||
u64a offset, char is_alive) {
|
u64a offset, char is_alive) {
|
||||||
struct RepeatRingControl *xs = &ctrl->ring;
|
struct RepeatRingControl *xs = &ctrl->ring;
|
||||||
|
|
||||||
u64a delta = offset - xs->offset;
|
|
||||||
u32 patch_size = info->patchSize;
|
|
||||||
u32 patch_count = info->patchCount;
|
|
||||||
u32 encoding_size = info->encodingSize;
|
|
||||||
u32 patch = delta / patch_size;
|
|
||||||
DEBUG_PRINTF("offset: %llu encoding_size: %u\n", offset, encoding_size);
|
|
||||||
|
|
||||||
u8 *active = (u8 *)state;
|
u8 *active = (u8 *)state;
|
||||||
if (!is_alive) {
|
|
||||||
|
DEBUG_PRINTF("offset: %llu encoding_size: %u\n", offset,
|
||||||
|
info->encodingSize);
|
||||||
|
|
||||||
|
// If (a) this is the first top, or (b) the ring is stale, initialize the
|
||||||
|
// ring and write this offset in as the first top.
|
||||||
|
if (!is_alive ||
|
||||||
|
offset > sparseLastTop(info, xs, state) + info->repeatMax) {
|
||||||
storeInitialRingTopPatch(info, xs, active, offset);
|
storeInitialRingTopPatch(info, xs, active, offset);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(offset >= xs->offset);
|
// Tops should arrive in order, with no duplicates.
|
||||||
|
assert(offset > sparseLastTop(info, xs, state));
|
||||||
|
|
||||||
|
// As the ring is not stale, our delta should fit within a u32.
|
||||||
|
assert(offset - xs->offset <= UINT32_MAX);
|
||||||
|
u32 delta = (u32)(offset - xs->offset);
|
||||||
|
u32 patch_size = info->patchSize;
|
||||||
|
u32 patch_count = info->patchCount;
|
||||||
|
u32 encoding_size = info->encodingSize;
|
||||||
|
u32 patch = delta / patch_size;
|
||||||
|
|
||||||
|
DEBUG_PRINTF("delta=%u, patch_size=%u, patch=%u\n", delta, patch_size,
|
||||||
|
patch);
|
||||||
|
|
||||||
u8 *ring = active + info->patchesOffset;
|
u8 *ring = active + info->patchesOffset;
|
||||||
u32 occ = ringOccupancy(xs, patch_count);
|
u32 occ = ringOccupancy(xs, patch_count);
|
||||||
@ -1361,10 +1378,6 @@ void repeatStoreSparseOptimalP(const struct RepeatInfo *info,
|
|||||||
patch, patch_count, occ);
|
patch, patch_count, occ);
|
||||||
if (patch >= patch_count) {
|
if (patch >= patch_count) {
|
||||||
u32 patch_shift_count = patch - patch_count + 1;
|
u32 patch_shift_count = patch - patch_count + 1;
|
||||||
if (patch_shift_count >= patch_count) {
|
|
||||||
storeInitialRingTopPatch(info, xs, active, offset);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
assert(patch >= patch_shift_count);
|
assert(patch >= patch_shift_count);
|
||||||
DEBUG_PRINTF("shifting by %u\n", patch_shift_count);
|
DEBUG_PRINTF("shifting by %u\n", patch_shift_count);
|
||||||
xs->offset += patch_size * patch_shift_count;
|
xs->offset += patch_size * patch_shift_count;
|
||||||
@ -1401,7 +1414,8 @@ void repeatStoreSparseOptimalP(const struct RepeatInfo *info,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
u64a diff = delta - patch * patch_size;
|
assert((u64a)patch * patch_size <= delta);
|
||||||
|
u32 diff = delta - patch * patch_size;
|
||||||
const u64a *repeatTable = getImplTable(info);
|
const u64a *repeatTable = getImplTable(info);
|
||||||
val += repeatTable[diff];
|
val += repeatTable[diff];
|
||||||
|
|
||||||
@ -1492,21 +1506,25 @@ enum RepeatMatch repeatHasMatchSparseOptimalP(const struct RepeatInfo *info,
|
|||||||
if (offset < xs->offset + info->repeatMin) {
|
if (offset < xs->offset + info->repeatMin) {
|
||||||
DEBUG_PRINTF("too soon\n");
|
DEBUG_PRINTF("too soon\n");
|
||||||
return REPEAT_NOMATCH;
|
return REPEAT_NOMATCH;
|
||||||
} else if (offset > repeatLastTopSparseOptimalP(info, ctrl, state) +
|
} else if (offset > sparseLastTop(info, xs, state) + info->repeatMax) {
|
||||||
info->repeatMax) {
|
|
||||||
DEBUG_PRINTF("stale\n");
|
DEBUG_PRINTF("stale\n");
|
||||||
return REPEAT_STALE;
|
return REPEAT_STALE;
|
||||||
}
|
}
|
||||||
|
|
||||||
u64a delta = offset - xs->offset;
|
// Our delta between the base offset of the ring and the current offset
|
||||||
u64a lower = delta > info->repeatMax ? delta - info->repeatMax : 0;
|
// must fit within the range [repeatMin, lastPossibleTop + repeatMax]. This
|
||||||
u64a upper = delta - info->repeatMin;
|
// range fits comfortably within a u32.
|
||||||
|
assert(offset - xs->offset <= UINT32_MAX);
|
||||||
|
|
||||||
|
u32 delta = (u32)(offset - xs->offset);
|
||||||
u32 patch_size = info->patchSize;
|
u32 patch_size = info->patchSize;
|
||||||
u32 patch_count = info->patchCount;
|
u32 patch_count = info->patchCount;
|
||||||
u32 occ = ringOccupancy(xs, patch_count);
|
u32 occ = ringOccupancy(xs, patch_count);
|
||||||
upper = MIN(upper, occ * patch_size - 1);
|
|
||||||
|
|
||||||
DEBUG_PRINTF("lower=%llu, upper=%llu\n", lower, upper);
|
u32 lower = delta > info->repeatMax ? delta - info->repeatMax : 0;
|
||||||
|
u32 upper = MIN(delta - info->repeatMin, occ * patch_size - 1);
|
||||||
|
|
||||||
|
DEBUG_PRINTF("lower=%u, upper=%u\n", lower, upper);
|
||||||
u32 patch_lower = lower / patch_size;
|
u32 patch_lower = lower / patch_size;
|
||||||
u32 patch_upper = upper / patch_size;
|
u32 patch_upper = upper / patch_size;
|
||||||
|
|
||||||
|
@ -75,7 +75,7 @@ u32 calcPackedBytes(u64a val) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
u64a repeatRecurTable(struct RepeatStateInfo *info, const depth &repeatMax,
|
u32 repeatRecurTable(struct RepeatStateInfo *info, const depth &repeatMax,
|
||||||
const u32 minPeriod) {
|
const u32 minPeriod) {
|
||||||
u32 repeatTmp = info->patchCount > 2 ? 64 : (u32)repeatMax;
|
u32 repeatTmp = info->patchCount > 2 ? 64 : (u32)repeatMax;
|
||||||
u32 repeat_index = repeatTmp < minPeriod ? repeatTmp : minPeriod;
|
u32 repeat_index = repeatTmp < minPeriod ? repeatTmp : minPeriod;
|
||||||
@ -93,7 +93,7 @@ u64a repeatRecurTable(struct RepeatStateInfo *info, const depth &repeatMax,
|
|||||||
|
|
||||||
static
|
static
|
||||||
u32 findOptimalPatchSize(struct RepeatStateInfo *info, const depth &repeatMax,
|
u32 findOptimalPatchSize(struct RepeatStateInfo *info, const depth &repeatMax,
|
||||||
const u32 minPeriod, u64a rv) {
|
const u32 minPeriod, u32 rv) {
|
||||||
u32 cnt = 0;
|
u32 cnt = 0;
|
||||||
u32 patch_bits = 0;
|
u32 patch_bits = 0;
|
||||||
u32 total_size = 0;
|
u32 total_size = 0;
|
||||||
@ -171,7 +171,7 @@ RepeatStateInfo::RepeatStateInfo(enum RepeatType type, const depth &repeatMin,
|
|||||||
assert(minPeriod);
|
assert(minPeriod);
|
||||||
assert(repeatMax.is_finite());
|
assert(repeatMax.is_finite());
|
||||||
{
|
{
|
||||||
u64a rv = repeatRecurTable(this, repeatMax, minPeriod);
|
u32 rv = repeatRecurTable(this, repeatMax, minPeriod);
|
||||||
u32 repeatTmp = 0;
|
u32 repeatTmp = 0;
|
||||||
if ((u32)repeatMax < minPeriod) {
|
if ((u32)repeatMax < minPeriod) {
|
||||||
repeatTmp = repeatMax;
|
repeatTmp = repeatMax;
|
||||||
|
@ -64,7 +64,7 @@ public:
|
|||||||
bool prefilter, const som_type som, ReportID rid, u64a min_offset,
|
bool prefilter, const som_type som, ReportID rid, u64a min_offset,
|
||||||
u64a max_offset, u64a min_length);
|
u64a max_offset, u64a min_length);
|
||||||
|
|
||||||
~NGWrapper();
|
~NGWrapper() override;
|
||||||
|
|
||||||
/** index of the expression represented by this graph, used
|
/** index of the expression represented by this graph, used
|
||||||
* - down the track in error handling
|
* - down the track in error handling
|
||||||
|
@ -55,14 +55,14 @@ namespace ue2 {
|
|||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
/** Distance value used to indicate that the vertex can't be reached. */
|
/** Distance value used to indicate that the vertex can't be reached. */
|
||||||
static const int DIST_UNREACHABLE = INT_MAX;
|
static constexpr int DIST_UNREACHABLE = INT_MAX;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Distance value used to indicate that the distance to a vertex is infinite
|
* Distance value used to indicate that the distance to a vertex is infinite
|
||||||
* (for example, it's the max distance and there's a cycle in the path) or so
|
* (for example, it's the max distance and there's a cycle in the path) or so
|
||||||
* large that we should consider it effectively infinite.
|
* large that we should consider it effectively infinite.
|
||||||
*/
|
*/
|
||||||
static const int DIST_INFINITY = INT_MAX - 1;
|
static constexpr int DIST_INFINITY = INT_MAX - 1;
|
||||||
|
|
||||||
//
|
//
|
||||||
// Filters
|
// Filters
|
||||||
@ -71,10 +71,12 @@ static const int DIST_INFINITY = INT_MAX - 1;
|
|||||||
template <class GraphT>
|
template <class GraphT>
|
||||||
struct NodeFilter {
|
struct NodeFilter {
|
||||||
typedef typename GraphT::edge_descriptor EdgeT;
|
typedef typename GraphT::edge_descriptor EdgeT;
|
||||||
NodeFilter() { }
|
NodeFilter() {} // BGL filters must be default-constructible.
|
||||||
NodeFilter(const vector<bool> *bad_in, const GraphT *g_in)
|
NodeFilter(const vector<bool> *bad_in, const GraphT *g_in)
|
||||||
: bad(bad_in), g(g_in) { }
|
: bad(bad_in), g(g_in) { }
|
||||||
bool operator()(const EdgeT &e) const {
|
bool operator()(const EdgeT &e) const {
|
||||||
|
assert(g && bad);
|
||||||
|
|
||||||
u32 src_idx = (*g)[source(e, *g)].index;
|
u32 src_idx = (*g)[source(e, *g)].index;
|
||||||
u32 tar_idx = (*g)[target(e, *g)].index;
|
u32 tar_idx = (*g)[target(e, *g)].index;
|
||||||
|
|
||||||
@ -84,16 +86,20 @@ struct NodeFilter {
|
|||||||
|
|
||||||
return !(*bad)[src_idx] && !(*bad)[tar_idx];
|
return !(*bad)[src_idx] && !(*bad)[tar_idx];
|
||||||
}
|
}
|
||||||
const vector<bool> *bad;
|
|
||||||
const GraphT *g;
|
private:
|
||||||
|
const vector<bool> *bad = nullptr;
|
||||||
|
const GraphT *g = nullptr;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class GraphT>
|
template <class GraphT>
|
||||||
struct StartFilter {
|
struct StartFilter {
|
||||||
typedef typename GraphT::edge_descriptor EdgeT;
|
typedef typename GraphT::edge_descriptor EdgeT;
|
||||||
StartFilter() { }
|
StartFilter() {} // BGL filters must be default-constructible.
|
||||||
explicit StartFilter(const GraphT *g_in) : g(g_in) { }
|
explicit StartFilter(const GraphT *g_in) : g(g_in) { }
|
||||||
bool operator()(const EdgeT &e) const {
|
bool operator()(const EdgeT &e) const {
|
||||||
|
assert(g);
|
||||||
|
|
||||||
u32 src_idx = (*g)[source(e, *g)].index;
|
u32 src_idx = (*g)[source(e, *g)].index;
|
||||||
u32 tar_idx = (*g)[target(e, *g)].index;
|
u32 tar_idx = (*g)[target(e, *g)].index;
|
||||||
|
|
||||||
@ -107,7 +113,9 @@ struct StartFilter {
|
|||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
const GraphT *g;
|
|
||||||
|
private:
|
||||||
|
const GraphT *g = nullptr;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
@ -125,61 +125,62 @@ void execute_graph_i(const NGHolder &g, const vector<StateInfo> &info,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void fillStateBitset(const NGHolder &g, const set<NFAVertex> &in,
|
dynamic_bitset<> makeStateBitset(const NGHolder &g,
|
||||||
dynamic_bitset<> &out) {
|
const flat_set<NFAVertex> &in) {
|
||||||
out.reset();
|
dynamic_bitset<> work_states(num_vertices(g));
|
||||||
for (auto v : in) {
|
for (const auto &v : in) {
|
||||||
u32 idx = g[v].index;
|
u32 idx = g[v].index;
|
||||||
out.set(idx);
|
work_states.set(idx);
|
||||||
}
|
}
|
||||||
|
return work_states;
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void fillVertexSet(const dynamic_bitset<> &in,
|
flat_set<NFAVertex> getVertices(const dynamic_bitset<> &in,
|
||||||
const vector<StateInfo> &info, set<NFAVertex> &out) {
|
const vector<StateInfo> &info) {
|
||||||
out.clear();
|
flat_set<NFAVertex> out;
|
||||||
for (size_t i = in.find_first(); i != in.npos; i = in.find_next(i)) {
|
for (size_t i = in.find_first(); i != in.npos; i = in.find_next(i)) {
|
||||||
out.insert(info[i].vertex);
|
out.insert(info[i].vertex);
|
||||||
}
|
}
|
||||||
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void fillInfoTable(const NGHolder &g, vector<StateInfo> &info) {
|
vector<StateInfo> makeInfoTable(const NGHolder &g) {
|
||||||
info.resize(num_vertices(g));
|
vector<StateInfo> info(num_vertices(g));
|
||||||
for (auto v : vertices_range(g)) {
|
for (auto v : vertices_range(g)) {
|
||||||
u32 idx = g[v].index;
|
u32 idx = g[v].index;
|
||||||
const CharReach &cr = g[v].char_reach;
|
const CharReach &cr = g[v].char_reach;
|
||||||
assert(idx < info.size());
|
assert(idx < info.size());
|
||||||
info[idx] = StateInfo(v, cr);
|
info[idx] = StateInfo(v, cr);
|
||||||
}
|
}
|
||||||
|
return info;
|
||||||
}
|
}
|
||||||
|
|
||||||
void execute_graph(const NGHolder &g, const ue2_literal &input,
|
flat_set<NFAVertex> execute_graph(const NGHolder &g, const ue2_literal &input,
|
||||||
set<NFAVertex> *states, bool kill_sds) {
|
const flat_set<NFAVertex> &initial_states,
|
||||||
|
bool kill_sds) {
|
||||||
assert(hasCorrectlyNumberedVertices(g));
|
assert(hasCorrectlyNumberedVertices(g));
|
||||||
|
|
||||||
vector<StateInfo> info;
|
auto info = makeInfoTable(g);
|
||||||
fillInfoTable(g, info);
|
auto work_states = makeStateBitset(g, initial_states);
|
||||||
dynamic_bitset<> work_states(num_vertices(g));
|
|
||||||
fillStateBitset(g, *states, work_states);
|
|
||||||
|
|
||||||
execute_graph_i(g, info, input, &work_states, kill_sds);
|
execute_graph_i(g, info, input, &work_states, kill_sds);
|
||||||
|
|
||||||
fillVertexSet(work_states, info, *states);
|
return getVertices(work_states, info);
|
||||||
}
|
}
|
||||||
|
|
||||||
void execute_graph(const NGHolder &g, const vector<CharReach> &input,
|
flat_set<NFAVertex> execute_graph(const NGHolder &g,
|
||||||
set<NFAVertex> *states) {
|
const vector<CharReach> &input,
|
||||||
|
const flat_set<NFAVertex> &initial_states) {
|
||||||
assert(hasCorrectlyNumberedVertices(g));
|
assert(hasCorrectlyNumberedVertices(g));
|
||||||
|
|
||||||
vector<StateInfo> info;
|
auto info = makeInfoTable(g);
|
||||||
fillInfoTable(g, info);
|
auto work_states = makeStateBitset(g, initial_states);
|
||||||
dynamic_bitset<> work_states(num_vertices(g));
|
|
||||||
fillStateBitset(g, *states, work_states);
|
|
||||||
|
|
||||||
execute_graph_i(g, info, input, &work_states, false);
|
execute_graph_i(g, info, input, &work_states, false);
|
||||||
|
|
||||||
fillVertexSet(work_states, info, *states);
|
return getVertices(work_states, info);
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef boost::reverse_graph<const NFAGraph, const NFAGraph &> RevNFAGraph;
|
typedef boost::reverse_graph<const NFAGraph, const NFAGraph &> RevNFAGraph;
|
||||||
@ -276,9 +277,10 @@ private:
|
|||||||
};
|
};
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
void execute_graph(const NGHolder &running_g, const NGHolder &input_dag,
|
flat_set<NFAVertex> execute_graph(const NGHolder &running_g,
|
||||||
const set<NFAVertex> &input_start_states,
|
const NGHolder &input_dag,
|
||||||
set<NFAVertex> *states) {
|
const flat_set<NFAVertex> &input_start_states,
|
||||||
|
const flat_set<NFAVertex> &initial_states) {
|
||||||
DEBUG_PRINTF("g has %zu vertices, input_dag has %zu vertices\n",
|
DEBUG_PRINTF("g has %zu vertices, input_dag has %zu vertices\n",
|
||||||
num_vertices(running_g), num_vertices(input_dag));
|
num_vertices(running_g), num_vertices(input_dag));
|
||||||
assert(hasCorrectlyNumberedVertices(running_g));
|
assert(hasCorrectlyNumberedVertices(running_g));
|
||||||
@ -290,10 +292,8 @@ void execute_graph(const NGHolder &running_g, const NGHolder &input_dag,
|
|||||||
RevNFAGraph revg(input_dag.g);
|
RevNFAGraph revg(input_dag.g);
|
||||||
map<NFAVertex, dynamic_bitset<> > dfs_states;
|
map<NFAVertex, dynamic_bitset<> > dfs_states;
|
||||||
|
|
||||||
vector<StateInfo> info;
|
auto info = makeInfoTable(running_g);
|
||||||
fillInfoTable(running_g, info);
|
auto input_fs = makeStateBitset(running_g, initial_states);
|
||||||
dynamic_bitset<> input_fs(num_vertices(running_g));
|
|
||||||
fillStateBitset(running_g, *states, input_fs);
|
|
||||||
|
|
||||||
for (auto v : input_start_states) {
|
for (auto v : input_start_states) {
|
||||||
dfs_states[v] = input_fs;
|
dfs_states[v] = input_fs;
|
||||||
@ -303,21 +303,25 @@ void execute_graph(const NGHolder &running_g, const NGHolder &input_dag,
|
|||||||
eg_visitor(running_g, info, input_dag, dfs_states),
|
eg_visitor(running_g, info, input_dag, dfs_states),
|
||||||
make_assoc_property_map(colours));
|
make_assoc_property_map(colours));
|
||||||
|
|
||||||
fillVertexSet(dfs_states[input_dag.accept], info, *states);
|
auto states = getVertices(dfs_states[input_dag.accept], info);
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
DEBUG_PRINTF(" output rstates:");
|
DEBUG_PRINTF(" output rstates:");
|
||||||
for (auto v : *states) {
|
for (const auto &v : states) {
|
||||||
printf(" %u", running_g[v].index);
|
printf(" %u", running_g[v].index);
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
return states;
|
||||||
}
|
}
|
||||||
|
|
||||||
void execute_graph(const NGHolder &running_g, const NGHolder &input_dag,
|
flat_set<NFAVertex> execute_graph(const NGHolder &running_g,
|
||||||
set<NFAVertex> *states) {
|
const NGHolder &input_dag,
|
||||||
set<NFAVertex> input_start_states = {input_dag.start, input_dag.startDs};
|
const flat_set<NFAVertex> &initial_states) {
|
||||||
execute_graph(running_g, input_dag, input_start_states, states);
|
auto input_start_states = {input_dag.start, input_dag.startDs};
|
||||||
|
return execute_graph(running_g, input_dag, input_start_states,
|
||||||
|
initial_states);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
@ -35,8 +35,8 @@
|
|||||||
#define NG_EXECUTE_H
|
#define NG_EXECUTE_H
|
||||||
|
|
||||||
#include "ng_holder.h"
|
#include "ng_holder.h"
|
||||||
|
#include "util/ue2_containers.h"
|
||||||
|
|
||||||
#include <set>
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
namespace ue2 {
|
namespace ue2 {
|
||||||
@ -44,23 +44,25 @@ namespace ue2 {
|
|||||||
class CharReach;
|
class CharReach;
|
||||||
struct ue2_literal;
|
struct ue2_literal;
|
||||||
|
|
||||||
void execute_graph(const NGHolder &g, const ue2_literal &input,
|
flat_set<NFAVertex> execute_graph(const NGHolder &g, const ue2_literal &input,
|
||||||
std::set<NFAVertex> *states, bool kill_sds = false);
|
const flat_set<NFAVertex> &initial,
|
||||||
|
bool kill_sds = false);
|
||||||
|
|
||||||
void execute_graph(const NGHolder &g, const std::vector<CharReach> &input,
|
flat_set<NFAVertex> execute_graph(const NGHolder &g,
|
||||||
std::set<NFAVertex> *states);
|
const std::vector<CharReach> &input,
|
||||||
|
const flat_set<NFAVertex> &initial);
|
||||||
|
|
||||||
/** on exit, states contains any state which may still be enabled after
|
/** on exit, states contains any state which may still be enabled after
|
||||||
* receiving an input which corresponds to some path through the input_dag from
|
* receiving an input which corresponds to some path through the input_dag from
|
||||||
* start or startDs to accept. input_dag MUST be acyclic aside from self-loops.
|
* start or startDs to accept. input_dag MUST be acyclic aside from self-loops.
|
||||||
*/
|
*/
|
||||||
void execute_graph(const NGHolder &g, const NGHolder &input_dag,
|
flat_set<NFAVertex> execute_graph(const NGHolder &g, const NGHolder &input_dag,
|
||||||
std::set<NFAVertex> *states);
|
const flat_set<NFAVertex> &initial);
|
||||||
|
|
||||||
/* as above, but able to specify the source states for the input graph */
|
/* as above, but able to specify the source states for the input graph */
|
||||||
void execute_graph(const NGHolder &g, const NGHolder &input_dag,
|
flat_set<NFAVertex> execute_graph(const NGHolder &g, const NGHolder &input_dag,
|
||||||
const std::set<NFAVertex> &input_start_states,
|
const flat_set<NFAVertex> &input_start_states,
|
||||||
std::set<NFAVertex> *states);
|
const flat_set<NFAVertex> &initial);
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
|
||||||
|
@ -114,7 +114,7 @@ void populateAccepts(const NGHolder &g, StateSet *accept, StateSet *acceptEod) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
class Automaton_Base {
|
class Automaton_Base {
|
||||||
public:
|
protected:
|
||||||
Automaton_Base(const NGHolder &graph_in,
|
Automaton_Base(const NGHolder &graph_in,
|
||||||
const ue2::unordered_map<NFAVertex, u32> &state_ids_in)
|
const ue2::unordered_map<NFAVertex, u32> &state_ids_in)
|
||||||
: graph(graph_in), state_ids(state_ids_in) {
|
: graph(graph_in), state_ids(state_ids_in) {
|
||||||
@ -122,6 +122,7 @@ public:
|
|||||||
assert(alphasize <= ALPHABET_SIZE);
|
assert(alphasize <= ALPHABET_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
static bool canPrune(const flat_set<ReportID> &) { return false; }
|
static bool canPrune(const flat_set<ReportID> &) { return false; }
|
||||||
|
|
||||||
const NGHolder &graph;
|
const NGHolder &graph;
|
||||||
@ -608,7 +609,6 @@ bool doHaig(const NGHolder &g,
|
|||||||
}
|
}
|
||||||
|
|
||||||
haig_note_starts(g, &rdfa->new_som_nfa_states);
|
haig_note_starts(g, &rdfa->new_som_nfa_states);
|
||||||
rdfa->trigger_nfa_state = NODE_START;
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -638,7 +638,8 @@ unique_ptr<raw_som_dfa> attemptToBuildHaig(NGHolder &g, som_type som,
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto rdfa = ue2::make_unique<raw_som_dfa>(g.kind, unordered_som);
|
auto rdfa = ue2::make_unique<raw_som_dfa>(g.kind, unordered_som, NODE_START,
|
||||||
|
somPrecision);
|
||||||
|
|
||||||
DEBUG_PRINTF("determinising nfa with %u vertices\n", numStates);
|
DEBUG_PRINTF("determinising nfa with %u vertices\n", numStates);
|
||||||
bool rv;
|
bool rv;
|
||||||
@ -658,7 +659,6 @@ unique_ptr<raw_som_dfa> attemptToBuildHaig(NGHolder &g, som_type som,
|
|||||||
|
|
||||||
DEBUG_PRINTF("determinised, building impl dfa (a,f) = (%hu,%hu)\n",
|
DEBUG_PRINTF("determinised, building impl dfa (a,f) = (%hu,%hu)\n",
|
||||||
rdfa->start_anchored, rdfa->start_floating);
|
rdfa->start_anchored, rdfa->start_floating);
|
||||||
rdfa->stream_som_loc_width = somPrecision;
|
|
||||||
|
|
||||||
assert(rdfa->kind == g.kind);
|
assert(rdfa->kind == g.kind);
|
||||||
return rdfa;
|
return rdfa;
|
||||||
@ -782,7 +782,9 @@ unique_ptr<raw_som_dfa> attemptToMergeHaig(const vector<const raw_som_dfa *> &df
|
|||||||
|
|
||||||
typedef Automaton_Haig_Merge::StateSet StateSet;
|
typedef Automaton_Haig_Merge::StateSet StateSet;
|
||||||
vector<StateSet> nfa_state_map;
|
vector<StateSet> nfa_state_map;
|
||||||
auto rdfa = ue2::make_unique<raw_som_dfa>(dfas[0]->kind, unordered_som);
|
auto rdfa = ue2::make_unique<raw_som_dfa>(dfas[0]->kind, unordered_som,
|
||||||
|
NODE_START,
|
||||||
|
dfas[0]->stream_som_loc_width);
|
||||||
|
|
||||||
int rv = determinise(n, rdfa->states, limit, &nfa_state_map);
|
int rv = determinise(n, rdfa->states, limit, &nfa_state_map);
|
||||||
if (rv) {
|
if (rv) {
|
||||||
@ -830,11 +832,9 @@ unique_ptr<raw_som_dfa> attemptToMergeHaig(const vector<const raw_som_dfa *> &df
|
|||||||
}
|
}
|
||||||
|
|
||||||
haig_merge_note_starts(dfas, per_dfa_adj, &rdfa->new_som_nfa_states);
|
haig_merge_note_starts(dfas, per_dfa_adj, &rdfa->new_som_nfa_states);
|
||||||
rdfa->trigger_nfa_state = NODE_START;
|
|
||||||
|
|
||||||
DEBUG_PRINTF("merged, building impl dfa (a,f) = (%hu,%hu)\n",
|
DEBUG_PRINTF("merged, building impl dfa (a,f) = (%hu,%hu)\n",
|
||||||
rdfa->start_anchored, rdfa->start_floating);
|
rdfa->start_anchored, rdfa->start_floating);
|
||||||
rdfa->stream_som_loc_width = dfas[0]->stream_som_loc_width;
|
|
||||||
|
|
||||||
return rdfa;
|
return rdfa;
|
||||||
}
|
}
|
||||||
|
@ -98,8 +98,7 @@ void fillNfa(NFA *nfa, lbr_common *c, ReportID report, const depth &repeatMin,
|
|||||||
info->packedCtrlSize = rsi.packedCtrlSize;
|
info->packedCtrlSize = rsi.packedCtrlSize;
|
||||||
info->horizon = rsi.horizon;
|
info->horizon = rsi.horizon;
|
||||||
info->minPeriod = minPeriod;
|
info->minPeriod = minPeriod;
|
||||||
memcpy(&info->packedFieldSizes, rsi.packedFieldSizes.data(),
|
copy_bytes(&info->packedFieldSizes, rsi.packedFieldSizes);
|
||||||
byte_length(rsi.packedFieldSizes));
|
|
||||||
info->patchCount = rsi.patchCount;
|
info->patchCount = rsi.patchCount;
|
||||||
info->patchSize = rsi.patchSize;
|
info->patchSize = rsi.patchSize;
|
||||||
info->encodingSize = rsi.encodingSize;
|
info->encodingSize = rsi.encodingSize;
|
||||||
@ -122,7 +121,7 @@ void fillNfa(NFA *nfa, lbr_common *c, ReportID report, const depth &repeatMin,
|
|||||||
nfa->length = verify_u32(len);
|
nfa->length = verify_u32(len);
|
||||||
info->length = verify_u32(sizeof(RepeatInfo)
|
info->length = verify_u32(sizeof(RepeatInfo)
|
||||||
+ sizeof(u64a) * (rsi.patchSize + 1));
|
+ sizeof(u64a) * (rsi.patchSize + 1));
|
||||||
memcpy(table, rsi.table.data(), byte_length(rsi.table));
|
copy_bytes(table, rsi.table);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -316,7 +316,7 @@ bool doComponent(RoseBuild &rose, ReportManager &rm, NGHolder &g, NFAVertex a,
|
|||||||
bool unbounded = false;
|
bool unbounded = false;
|
||||||
bool exhaustible = can_exhaust(g, rm);
|
bool exhaustible = can_exhaust(g, rm);
|
||||||
|
|
||||||
while (a) {
|
while (true) {
|
||||||
if (is_special(a, g)) {
|
if (is_special(a, g)) {
|
||||||
DEBUG_PRINTF("stopped puffing due to special vertex\n");
|
DEBUG_PRINTF("stopped puffing due to special vertex\n");
|
||||||
break;
|
break;
|
||||||
@ -350,9 +350,7 @@ bool doComponent(RoseBuild &rose, ReportManager &rm, NGHolder &g, NFAVertex a,
|
|||||||
|
|
||||||
a = getSoleSourceVertex(g, a);
|
a = getSoleSourceVertex(g, a);
|
||||||
|
|
||||||
if (!a) {
|
assert(a); /* already checked that old a had a proper in degree of 1 */
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Snark: we can't handle this case, because we can only handle a
|
// Snark: we can't handle this case, because we can only handle a
|
||||||
// single report ID on a vertex
|
// single report ID on a vertex
|
||||||
|
@ -266,7 +266,7 @@ bool validateEXSL(const NGHolder &g,
|
|||||||
const vector<CharReach> escapes_vec(1, escapes);
|
const vector<CharReach> escapes_vec(1, escapes);
|
||||||
const vector<CharReach> notescapes_vec(1, ~escapes);
|
const vector<CharReach> notescapes_vec(1, ~escapes);
|
||||||
|
|
||||||
set<NFAVertex> states;
|
ue2::flat_set<NFAVertex> states;
|
||||||
/* turn on all states past the prefix */
|
/* turn on all states past the prefix */
|
||||||
DEBUG_PRINTF("region %u is cutover\n", region);
|
DEBUG_PRINTF("region %u is cutover\n", region);
|
||||||
for (auto v : vertices_range(g)) {
|
for (auto v : vertices_range(g)) {
|
||||||
@ -276,20 +276,20 @@ bool validateEXSL(const NGHolder &g,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* process the escapes */
|
/* process the escapes */
|
||||||
execute_graph(g, escapes_vec, &states);
|
states = execute_graph(g, escapes_vec, states);
|
||||||
|
|
||||||
/* flood with any number of not escapes */
|
/* flood with any number of not escapes */
|
||||||
set<NFAVertex> prev_states;
|
ue2::flat_set<NFAVertex> prev_states;
|
||||||
while (prev_states != states) {
|
while (prev_states != states) {
|
||||||
prev_states = states;
|
prev_states = states;
|
||||||
execute_graph(g, notescapes_vec, &states);
|
states = execute_graph(g, notescapes_vec, states);
|
||||||
insert(&states, prev_states);
|
insert(&states, prev_states);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* find input starts to use for when we are running the prefix through as
|
/* find input starts to use for when we are running the prefix through as
|
||||||
* when the escape character arrives we may be in matching the prefix
|
* when the escape character arrives we may be in matching the prefix
|
||||||
* already */
|
* already */
|
||||||
set<NFAVertex> prefix_start_states;
|
ue2::flat_set<NFAVertex> prefix_start_states;
|
||||||
for (auto v : vertices_range(prefix)) {
|
for (auto v : vertices_range(prefix)) {
|
||||||
if (v != prefix.accept && v != prefix.acceptEod
|
if (v != prefix.accept && v != prefix.acceptEod
|
||||||
/* and as we have already made it past the prefix once */
|
/* and as we have already made it past the prefix once */
|
||||||
@ -298,11 +298,12 @@ bool validateEXSL(const NGHolder &g,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
execute_graph(prefix, escapes_vec, &prefix_start_states);
|
prefix_start_states =
|
||||||
|
execute_graph(prefix, escapes_vec, prefix_start_states);
|
||||||
|
|
||||||
assert(contains(prefix_start_states, prefix.startDs));
|
assert(contains(prefix_start_states, prefix.startDs));
|
||||||
/* see what happens after we feed it the prefix */
|
/* see what happens after we feed it the prefix */
|
||||||
execute_graph(g, prefix, prefix_start_states, &states);
|
states = execute_graph(g, prefix, prefix_start_states, states);
|
||||||
|
|
||||||
for (auto v : states) {
|
for (auto v : states) {
|
||||||
assert(v != g.accept && v != g.acceptEod); /* no cr -> should never be
|
assert(v != g.accept && v != g.acceptEod); /* no cr -> should never be
|
||||||
|
@ -136,7 +136,7 @@ bool firstMatchIsFirst(const NGHolder &p) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
set<NFAVertex> states;
|
ue2::flat_set<NFAVertex> states;
|
||||||
/* turn on all states (except starts - avoid suffix matches) */
|
/* turn on all states (except starts - avoid suffix matches) */
|
||||||
/* If we were doing (1) we would also except states leading to accepts -
|
/* If we were doing (1) we would also except states leading to accepts -
|
||||||
avoid prefix matches */
|
avoid prefix matches */
|
||||||
@ -149,7 +149,7 @@ bool firstMatchIsFirst(const NGHolder &p) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* run the prefix the main graph */
|
/* run the prefix the main graph */
|
||||||
execute_graph(p, p, &states);
|
states = execute_graph(p, p, states);
|
||||||
|
|
||||||
for (auto v : states) {
|
for (auto v : states) {
|
||||||
/* need to check if this vertex may represent an infix match - ie
|
/* need to check if this vertex may represent an infix match - ie
|
||||||
@ -313,7 +313,7 @@ bool sentClearsTail(const NGHolder &g,
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
u32 first_bad_region = ~0U;
|
u32 first_bad_region = ~0U;
|
||||||
set<NFAVertex> states;
|
ue2::flat_set<NFAVertex> states;
|
||||||
/* turn on all states */
|
/* turn on all states */
|
||||||
DEBUG_PRINTF("region %u is cutover\n", last_head_region);
|
DEBUG_PRINTF("region %u is cutover\n", last_head_region);
|
||||||
for (auto v : vertices_range(g)) {
|
for (auto v : vertices_range(g)) {
|
||||||
@ -327,7 +327,7 @@ bool sentClearsTail(const NGHolder &g,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* run the prefix the main graph */
|
/* run the prefix the main graph */
|
||||||
execute_graph(g, sent, &states);
|
states = execute_graph(g, sent, states);
|
||||||
|
|
||||||
/* .. and check if we are left with anything in the tail region */
|
/* .. and check if we are left with anything in the tail region */
|
||||||
for (auto v : states) {
|
for (auto v : states) {
|
||||||
|
@ -51,10 +51,16 @@ namespace ue2 {
|
|||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
/** Filter out edges from start-to-start or accept-to-accept. */
|
/**
|
||||||
|
* Filter out special edges, or in the top-specific variant, start edges that
|
||||||
|
* don't have the right top set.
|
||||||
|
*/
|
||||||
struct SpecialEdgeFilter {
|
struct SpecialEdgeFilter {
|
||||||
SpecialEdgeFilter() {}
|
SpecialEdgeFilter() {}
|
||||||
explicit SpecialEdgeFilter(const NGHolder *h_in) : h(h_in) {}
|
explicit SpecialEdgeFilter(const NGHolder &h_in) : h(&h_in) {}
|
||||||
|
explicit SpecialEdgeFilter(const NGHolder &h_in, u32 top_in)
|
||||||
|
: h(&h_in), single_top(true), top(top_in) {}
|
||||||
|
|
||||||
bool operator()(const NFAEdge &e) const {
|
bool operator()(const NFAEdge &e) const {
|
||||||
const NFAGraph &g = h->g;
|
const NFAGraph &g = h->g;
|
||||||
NFAVertex u = source(e, g), v = target(e, g);
|
NFAVertex u = source(e, g), v = target(e, g);
|
||||||
@ -62,23 +68,33 @@ struct SpecialEdgeFilter {
|
|||||||
(is_any_accept(u, g) && is_any_accept(v, g))) {
|
(is_any_accept(u, g) && is_any_accept(v, g))) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (single_top) {
|
||||||
|
if (u == h->start && g[e].top != top) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (u == h->startDs) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
const NGHolder *h = nullptr;
|
const NGHolder *h = nullptr;
|
||||||
|
bool single_top = false;
|
||||||
|
u32 top = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
static
|
static
|
||||||
depth findMinWidth(const NGHolder &h, NFAVertex src) {
|
depth findMinWidth(const NGHolder &h, const SpecialEdgeFilter &filter,
|
||||||
|
NFAVertex src) {
|
||||||
if (isLeafNode(src, h)) {
|
if (isLeafNode(src, h)) {
|
||||||
return depth::unreachable();
|
return depth::unreachable();
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef boost::filtered_graph<NFAGraph, SpecialEdgeFilter> StartGraph;
|
boost::filtered_graph<NFAGraph, SpecialEdgeFilter> g(h.g, filter);
|
||||||
StartGraph g(h.g, SpecialEdgeFilter(&h));
|
|
||||||
|
|
||||||
assert(hasCorrectlyNumberedVertices(h));
|
assert(hasCorrectlyNumberedVertices(h));
|
||||||
const size_t num = num_vertices(h);
|
const size_t num = num_vertices(h);
|
||||||
@ -112,7 +128,8 @@ depth findMinWidth(const NGHolder &h, NFAVertex src) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
depth findMaxWidth(const NGHolder &h, NFAVertex src) {
|
depth findMaxWidth(const NGHolder &h, const SpecialEdgeFilter &filter,
|
||||||
|
NFAVertex src) {
|
||||||
if (isLeafNode(src, h.g)) {
|
if (isLeafNode(src, h.g)) {
|
||||||
return depth::unreachable();
|
return depth::unreachable();
|
||||||
}
|
}
|
||||||
@ -122,8 +139,7 @@ depth findMaxWidth(const NGHolder &h, NFAVertex src) {
|
|||||||
return depth::infinity();
|
return depth::infinity();
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef boost::filtered_graph<NFAGraph, SpecialEdgeFilter> NodeFilteredGraph;
|
boost::filtered_graph<NFAGraph, SpecialEdgeFilter> g(h.g, filter);
|
||||||
NodeFilteredGraph g(h.g, SpecialEdgeFilter(&h));
|
|
||||||
|
|
||||||
assert(hasCorrectlyNumberedVertices(h));
|
assert(hasCorrectlyNumberedVertices(h));
|
||||||
const size_t num = num_vertices(h);
|
const size_t num = num_vertices(h);
|
||||||
@ -164,7 +180,7 @@ depth findMaxWidth(const NGHolder &h, NFAVertex src) {
|
|||||||
if (d.is_unreachable()) {
|
if (d.is_unreachable()) {
|
||||||
// If we're actually reachable, we'll have a min width, so we can
|
// If we're actually reachable, we'll have a min width, so we can
|
||||||
// return infinity in this case.
|
// return infinity in this case.
|
||||||
if (findMinWidth(h, src).is_reachable()) {
|
if (findMinWidth(h, filter, src).is_reachable()) {
|
||||||
return depth::infinity();
|
return depth::infinity();
|
||||||
}
|
}
|
||||||
return d;
|
return d;
|
||||||
@ -175,11 +191,10 @@ depth findMaxWidth(const NGHolder &h, NFAVertex src) {
|
|||||||
return d - depth(1);
|
return d - depth(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns the minimum width in bytes of an input that will match the given
|
static
|
||||||
* graph. */
|
depth findMinWidth(const NGHolder &h, const SpecialEdgeFilter &filter) {
|
||||||
depth findMinWidth(const NGHolder &h) {
|
depth startDepth = findMinWidth(h, filter, h.start);
|
||||||
depth startDepth = findMinWidth(h, h.start);
|
depth dotstarDepth = findMinWidth(h, filter, h.startDs);
|
||||||
depth dotstarDepth = findMinWidth(h, h.startDs);
|
|
||||||
DEBUG_PRINTF("startDepth=%s, dotstarDepth=%s\n", startDepth.str().c_str(),
|
DEBUG_PRINTF("startDepth=%s, dotstarDepth=%s\n", startDepth.str().c_str(),
|
||||||
dotstarDepth.str().c_str());
|
dotstarDepth.str().c_str());
|
||||||
if (startDepth.is_unreachable()) {
|
if (startDepth.is_unreachable()) {
|
||||||
@ -194,11 +209,18 @@ depth findMinWidth(const NGHolder &h) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns the maximum width in bytes of an input that will match the given
|
depth findMinWidth(const NGHolder &h) {
|
||||||
* graph. If there is no maximum width, returns infinity. */
|
return findMinWidth(h, SpecialEdgeFilter(h));
|
||||||
depth findMaxWidth(const NGHolder &h) {
|
}
|
||||||
depth startDepth = findMaxWidth(h, h.start);
|
|
||||||
depth dotstarDepth = findMaxWidth(h, h.startDs);
|
depth findMinWidth(const NGHolder &h, u32 top) {
|
||||||
|
return findMinWidth(h, SpecialEdgeFilter(h, top));
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
depth findMaxWidth(const NGHolder &h, const SpecialEdgeFilter &filter) {
|
||||||
|
depth startDepth = findMaxWidth(h, filter, h.start);
|
||||||
|
depth dotstarDepth = findMaxWidth(h, filter, h.startDs);
|
||||||
DEBUG_PRINTF("startDepth=%s, dotstarDepth=%s\n", startDepth.str().c_str(),
|
DEBUG_PRINTF("startDepth=%s, dotstarDepth=%s\n", startDepth.str().c_str(),
|
||||||
dotstarDepth.str().c_str());
|
dotstarDepth.str().c_str());
|
||||||
if (startDepth.is_unreachable()) {
|
if (startDepth.is_unreachable()) {
|
||||||
@ -210,4 +232,12 @@ depth findMaxWidth(const NGHolder &h) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
depth findMaxWidth(const NGHolder &h) {
|
||||||
|
return findMaxWidth(h, SpecialEdgeFilter(h));
|
||||||
|
}
|
||||||
|
|
||||||
|
depth findMaxWidth(const NGHolder &h, u32 top) {
|
||||||
|
return findMaxWidth(h, SpecialEdgeFilter(h, top));
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
@ -41,14 +41,34 @@ namespace ue2 {
|
|||||||
|
|
||||||
class NGHolder;
|
class NGHolder;
|
||||||
|
|
||||||
/** Returns the minimum width in bytes of an input that will match the given
|
/**
|
||||||
* graph. */
|
* \brief Compute the minimum width in bytes of an input that will match the
|
||||||
|
* given graph.
|
||||||
|
*/
|
||||||
depth findMinWidth(const NGHolder &h);
|
depth findMinWidth(const NGHolder &h);
|
||||||
|
|
||||||
/** Returns the maximum width in bytes of an input that will match the given
|
/**
|
||||||
* graph. If there is no maximum width, returns infinity. */
|
* \brief Compute the minimum width in bytes of an input that will match the
|
||||||
|
* given graph, considering only paths activated by the given top.
|
||||||
|
*/
|
||||||
|
depth findMinWidth(const NGHolder &h, u32 top);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Compute the maximum width in bytes of an input that will match the
|
||||||
|
* given graph.
|
||||||
|
*
|
||||||
|
* If there is no bound on the maximum width, returns infinity.
|
||||||
|
*/
|
||||||
depth findMaxWidth(const NGHolder &h);
|
depth findMaxWidth(const NGHolder &h);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Compute the maximum width in bytes of an input that will match the
|
||||||
|
* given graph, considering only paths activated by the given top.
|
||||||
|
*
|
||||||
|
* If there is no bound on the maximum width, returns infinity.
|
||||||
|
*/
|
||||||
|
depth findMaxWidth(const NGHolder &h, u32 top);
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
|
||||||
#endif // NG_WIDTH_H
|
#endif // NG_WIDTH_H
|
||||||
|
@ -52,7 +52,8 @@ AsciiComponentClass *AsciiComponentClass::clone() const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool AsciiComponentClass::class_empty(void) const {
|
bool AsciiComponentClass::class_empty(void) const {
|
||||||
return cr.none() && cr_ucp.none();
|
assert(finalized);
|
||||||
|
return cr.none();
|
||||||
}
|
}
|
||||||
|
|
||||||
void AsciiComponentClass::createRange(unichar to) {
|
void AsciiComponentClass::createRange(unichar to) {
|
||||||
@ -60,11 +61,15 @@ void AsciiComponentClass::createRange(unichar to) {
|
|||||||
unsigned char from = (u8)range_start;
|
unsigned char from = (u8)range_start;
|
||||||
if (from > to) {
|
if (from > to) {
|
||||||
throw LocatedParseError("Range out of order in character class");
|
throw LocatedParseError("Range out of order in character class");
|
||||||
} else {
|
|
||||||
in_cand_range = false;
|
|
||||||
cr.setRange(from, to);
|
|
||||||
range_start = INVALID_UNICODE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
in_cand_range = false;
|
||||||
|
CharReach ncr(from, to);
|
||||||
|
if (mode.caseless) {
|
||||||
|
make_caseless(&ncr);
|
||||||
|
}
|
||||||
|
cr |= ncr;
|
||||||
|
range_start = INVALID_UNICODE;
|
||||||
}
|
}
|
||||||
|
|
||||||
void AsciiComponentClass::notePositions(GlushkovBuildState &bs) {
|
void AsciiComponentClass::notePositions(GlushkovBuildState &bs) {
|
||||||
@ -94,16 +99,13 @@ void AsciiComponentClass::add(PredefinedClass c, bool negative) {
|
|||||||
c = translateForUcpMode(c, mode);
|
c = translateForUcpMode(c, mode);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Note: caselessness is handled by getPredefinedCharReach.
|
||||||
CharReach pcr = getPredefinedCharReach(c, mode);
|
CharReach pcr = getPredefinedCharReach(c, mode);
|
||||||
if (negative) {
|
if (negative) {
|
||||||
pcr.flip();
|
pcr.flip();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isUcp(c)) {
|
|
||||||
cr_ucp |= pcr;
|
|
||||||
} else {
|
|
||||||
cr |= pcr;
|
cr |= pcr;
|
||||||
}
|
|
||||||
range_start = INVALID_UNICODE;
|
range_start = INVALID_UNICODE;
|
||||||
in_cand_range = false;
|
in_cand_range = false;
|
||||||
}
|
}
|
||||||
@ -119,7 +121,12 @@ void AsciiComponentClass::add(unichar c) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
cr.set(c);
|
CharReach ncr(c, c);
|
||||||
|
if (mode.caseless) {
|
||||||
|
make_caseless(&ncr);
|
||||||
|
}
|
||||||
|
|
||||||
|
cr |= ncr;
|
||||||
range_start = c;
|
range_start = c;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -135,12 +142,6 @@ void AsciiComponentClass::finalize() {
|
|||||||
in_cand_range = false;
|
in_cand_range = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mode.caseless) {
|
|
||||||
make_caseless(&cr);
|
|
||||||
}
|
|
||||||
|
|
||||||
cr |= cr_ucp; /* characters from ucp props don't participate in caseless */
|
|
||||||
|
|
||||||
if (m_negate) {
|
if (m_negate) {
|
||||||
cr.flip();
|
cr.flip();
|
||||||
}
|
}
|
||||||
|
@ -78,12 +78,10 @@ protected:
|
|||||||
private:
|
private:
|
||||||
Position position;
|
Position position;
|
||||||
CharReach cr;
|
CharReach cr;
|
||||||
CharReach cr_ucp;
|
|
||||||
|
|
||||||
// Private copy ctor. Use clone instead.
|
// Private copy ctor. Use clone instead.
|
||||||
AsciiComponentClass(const AsciiComponentClass &other)
|
AsciiComponentClass(const AsciiComponentClass &other)
|
||||||
: ComponentClass(other), position(other.position), cr(other.cr),
|
: ComponentClass(other), position(other.position), cr(other.cr) {}
|
||||||
cr_ucp(other.cr_ucp) {}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
@ -81,8 +81,9 @@ CharReach getPredefinedCharReach(PredefinedClass c, const ParseMode &mode) {
|
|||||||
case CLASS_DIGIT:
|
case CLASS_DIGIT:
|
||||||
return number;
|
return number;
|
||||||
case CLASS_GRAPH:
|
case CLASS_GRAPH:
|
||||||
case CLASS_XGRAPH:
|
|
||||||
return CharReach(0x21, 0x7e);
|
return CharReach(0x21, 0x7e);
|
||||||
|
case CLASS_XGRAPH:
|
||||||
|
return to_cr(getPredefinedCodePointSet(c, mode));
|
||||||
case CLASS_HORZ:
|
case CLASS_HORZ:
|
||||||
return CharReach("\x09\x20\xA0");
|
return CharReach("\x09\x20\xA0");
|
||||||
case CLASS_LOWER:
|
case CLASS_LOWER:
|
||||||
@ -93,11 +94,15 @@ CharReach getPredefinedCharReach(PredefinedClass c, const ParseMode &mode) {
|
|||||||
}
|
}
|
||||||
case CLASS_PRINT:
|
case CLASS_PRINT:
|
||||||
return CharReach(0x20, 0x7e);
|
return CharReach(0x20, 0x7e);
|
||||||
|
case CLASS_XPRINT:
|
||||||
|
return to_cr(getPredefinedCodePointSet(c, mode));
|
||||||
case CLASS_PUNCT:
|
case CLASS_PUNCT:
|
||||||
return CharReach(0x21, '0' - 1)
|
return CharReach(0x21, '0' - 1)
|
||||||
| CharReach('9' + 1, 'A' - 1)
|
| CharReach('9' + 1, 'A' - 1)
|
||||||
| CharReach('Z' + 1, 'a' - 1)
|
| CharReach('Z' + 1, 'a' - 1)
|
||||||
| CharReach('z' + 1, 126);
|
| CharReach('z' + 1, 126);
|
||||||
|
case CLASS_XPUNCT:
|
||||||
|
return to_cr(getPredefinedCodePointSet(c, mode));
|
||||||
case CLASS_SPACE:
|
case CLASS_SPACE:
|
||||||
return CharReach("\x09\x0a\x0c\x0b\x0d\x20");
|
return CharReach("\x09\x0a\x0c\x0b\x0d\x20");
|
||||||
case CLASS_UPPER:
|
case CLASS_UPPER:
|
||||||
@ -420,7 +425,7 @@ unique_ptr<ComponentClass> getLiteralComponentClass(unsigned char c,
|
|||||||
|
|
||||||
ComponentClass::ComponentClass(const ParseMode &mode_in)
|
ComponentClass::ComponentClass(const ParseMode &mode_in)
|
||||||
: m_negate(false), mode(mode_in), in_cand_range(false),
|
: m_negate(false), mode(mode_in), in_cand_range(false),
|
||||||
range_start(INVALID_UNICODE), finalized(false), firstChar('\0') {}
|
range_start(INVALID_UNICODE), finalized(false) {}
|
||||||
|
|
||||||
ComponentClass::~ComponentClass() { }
|
ComponentClass::~ComponentClass() { }
|
||||||
|
|
||||||
@ -441,7 +446,6 @@ void ComponentClass::addDash(void) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ComponentClass::negate() {
|
void ComponentClass::negate() {
|
||||||
assert(class_empty());
|
|
||||||
m_negate = true;
|
m_negate = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -63,7 +63,9 @@ enum PredefinedClass {
|
|||||||
CLASS_VERT,
|
CLASS_VERT,
|
||||||
CLASS_WORD,
|
CLASS_WORD,
|
||||||
CLASS_XDIGIT,
|
CLASS_XDIGIT,
|
||||||
CLASS_XGRAPH,
|
CLASS_XGRAPH, /* [:graph:] in UCP mode */
|
||||||
|
CLASS_XPRINT, /* [:print:] in UCP mode */
|
||||||
|
CLASS_XPUNCT, /* [:punct:] in UCP mode */
|
||||||
CLASS_UCP_C,
|
CLASS_UCP_C,
|
||||||
CLASS_UCP_CC,
|
CLASS_UCP_CC,
|
||||||
CLASS_UCP_CF,
|
CLASS_UCP_CF,
|
||||||
@ -232,8 +234,12 @@ public:
|
|||||||
Component *accept(ComponentVisitor &v) override = 0;
|
Component *accept(ComponentVisitor &v) override = 0;
|
||||||
void accept(ConstComponentVisitor &v) const override = 0;
|
void accept(ConstComponentVisitor &v) const override = 0;
|
||||||
|
|
||||||
/** True iff we have already started adding members to the class. This is
|
/** \brief True if the class contains no members (i.e. it will not match
|
||||||
* a different concept to Component::empty */
|
* against anything). This function can only be called on a finalized
|
||||||
|
* class.
|
||||||
|
*
|
||||||
|
* Note: This is a different concept to Component::empty.
|
||||||
|
*/
|
||||||
virtual bool class_empty(void) const = 0;
|
virtual bool class_empty(void) const = 0;
|
||||||
|
|
||||||
virtual void add(PredefinedClass c, bool negated) = 0;
|
virtual void add(PredefinedClass c, bool negated) = 0;
|
||||||
@ -245,9 +251,6 @@ public:
|
|||||||
|
|
||||||
bool isNegated() const { return m_negate; }
|
bool isNegated() const { return m_negate; }
|
||||||
|
|
||||||
void setFirstChar(char c) { firstChar = c; }
|
|
||||||
char getFirstChar() const { return firstChar; }
|
|
||||||
|
|
||||||
std::vector<PositionInfo> first() const override = 0;
|
std::vector<PositionInfo> first() const override = 0;
|
||||||
std::vector<PositionInfo> last() const override = 0;
|
std::vector<PositionInfo> last() const override = 0;
|
||||||
bool empty() const override { return false; } /* always 1 codepoint wide */
|
bool empty() const override { return false; } /* always 1 codepoint wide */
|
||||||
@ -263,19 +266,13 @@ protected:
|
|||||||
unichar range_start;
|
unichar range_start;
|
||||||
bool finalized;
|
bool finalized;
|
||||||
|
|
||||||
/** Literal character at the start of this character class, e.g. '.' for
|
|
||||||
* the class [.abc]. Used to identify (unsupported) POSIX collating
|
|
||||||
* elements. */
|
|
||||||
char firstChar;
|
|
||||||
|
|
||||||
virtual void createRange(unichar) = 0;
|
virtual void createRange(unichar) = 0;
|
||||||
|
|
||||||
// Protected copy ctor. Use clone instead.
|
// Protected copy ctor. Use clone instead.
|
||||||
ComponentClass(const ComponentClass &other)
|
ComponentClass(const ComponentClass &other)
|
||||||
: Component(other), m_negate(other.m_negate), mode(other.mode),
|
: Component(other), m_negate(other.m_negate), mode(other.mode),
|
||||||
in_cand_range(other.in_cand_range), range_start(other.range_start),
|
in_cand_range(other.in_cand_range), range_start(other.range_start),
|
||||||
finalized(other.finalized),
|
finalized(other.finalized) {}
|
||||||
firstChar(other.firstChar) {}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
@ -424,6 +424,7 @@ unichar readUtf8CodePoint4c(const u8 *ts) {
|
|||||||
assert(!inCharClass); // not reentrant
|
assert(!inCharClass); // not reentrant
|
||||||
currentCls = getComponentClass(mode);
|
currentCls = getComponentClass(mode);
|
||||||
inCharClass = true;
|
inCharClass = true;
|
||||||
|
inCharClassEarly = true;
|
||||||
currentClsBegin = ts;
|
currentClsBegin = ts;
|
||||||
fgoto readClass;
|
fgoto readClass;
|
||||||
}
|
}
|
||||||
@ -474,6 +475,7 @@ unichar readUtf8CodePoint4c(const u8 *ts) {
|
|||||||
}
|
}
|
||||||
action is_utf8 { mode.utf8 }
|
action is_utf8 { mode.utf8 }
|
||||||
action is_ignore_space { mode.ignore_space }
|
action is_ignore_space { mode.ignore_space }
|
||||||
|
action is_early_charclass { inCharClassEarly }
|
||||||
|
|
||||||
action addNumberedBackRef {
|
action addNumberedBackRef {
|
||||||
if (accumulator == 0) {
|
if (accumulator == 0) {
|
||||||
@ -790,10 +792,12 @@ unichar readUtf8CodePoint4c(const u8 *ts) {
|
|||||||
any => { throw LocatedParseError("Unknown property"); };
|
any => { throw LocatedParseError("Unknown property"); };
|
||||||
*|;
|
*|;
|
||||||
charClassGuts := |*
|
charClassGuts := |*
|
||||||
# We don't like POSIX collating elements (neither does PCRE or Perl).
|
# We don't support POSIX collating elements (neither does PCRE
|
||||||
'\[\.' [^\]]* '\.\]' |
|
# or Perl). These look like [.ch.] or [=ch=].
|
||||||
'\[=' [^\]]* '=\]' => {
|
'\[\.' ( '\\]' | [^\]] )* '\.\]' |
|
||||||
throw LocatedParseError("Unsupported POSIX collating element");
|
'\[=' ( '\\]' | [^\]] )* '=\]' => {
|
||||||
|
throw LocatedParseError("Unsupported POSIX collating "
|
||||||
|
"element");
|
||||||
};
|
};
|
||||||
# Named sets
|
# Named sets
|
||||||
# Adding these may cause the charclass to close, hence the
|
# Adding these may cause the charclass to close, hence the
|
||||||
@ -889,11 +893,7 @@ unichar readUtf8CodePoint4c(const u8 *ts) {
|
|||||||
throw LocatedParseError("Invalid POSIX named class");
|
throw LocatedParseError("Invalid POSIX named class");
|
||||||
};
|
};
|
||||||
'\\Q' => {
|
'\\Q' => {
|
||||||
// fcall readQuotedClass;
|
fcall readQuotedClass;
|
||||||
ostringstream str;
|
|
||||||
str << "\\Q..\\E sequences in character classes not supported at index "
|
|
||||||
<< ts - ptr << ".";
|
|
||||||
throw ParseError(str.str());
|
|
||||||
};
|
};
|
||||||
'\\E' => { /*noop*/};
|
'\\E' => { /*noop*/};
|
||||||
# Backspace (this is only valid for \b in char classes)
|
# Backspace (this is only valid for \b in char classes)
|
||||||
@ -1090,28 +1090,8 @@ unichar readUtf8CodePoint4c(const u8 *ts) {
|
|||||||
throwInvalidUtf8();
|
throwInvalidUtf8();
|
||||||
};
|
};
|
||||||
|
|
||||||
# dot or equals at the end of a character class could be the end
|
|
||||||
# of a collating element, like [.blah.] or [=blah=].
|
|
||||||
[.=] ']' => {
|
|
||||||
if (currentCls->getFirstChar() == *ts) {
|
|
||||||
assert(currentClsBegin);
|
|
||||||
ostringstream oss;
|
|
||||||
oss << "Unsupported POSIX collating element at index "
|
|
||||||
<< currentClsBegin - ptr << ".";
|
|
||||||
throw ParseError(oss.str());
|
|
||||||
}
|
|
||||||
currentCls->add(*ts);
|
|
||||||
currentCls->finalize();
|
|
||||||
currentSeq->addComponent(move(currentCls));
|
|
||||||
inCharClass = false;
|
|
||||||
fgoto main;
|
|
||||||
};
|
|
||||||
|
|
||||||
# Literal character
|
# Literal character
|
||||||
(any - ']') => {
|
(any - ']') => {
|
||||||
if (currentCls->class_empty()) {
|
|
||||||
currentCls->setFirstChar(*ts);
|
|
||||||
}
|
|
||||||
currentCls->add(*ts);
|
currentCls->add(*ts);
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -1127,35 +1107,35 @@ unichar readUtf8CodePoint4c(const u8 *ts) {
|
|||||||
# Parser to read stuff from a character class
|
# Parser to read stuff from a character class
|
||||||
#############################################################
|
#############################################################
|
||||||
readClass := |*
|
readClass := |*
|
||||||
# the negate and right bracket out the front are special
|
# A caret at the beginning of the class means that the rest of the
|
||||||
'\^' => {
|
# class is negated.
|
||||||
|
'\^' when is_early_charclass => {
|
||||||
if (currentCls->isNegated()) {
|
if (currentCls->isNegated()) {
|
||||||
|
// Already seen a caret; the second one is not a meta-character.
|
||||||
|
inCharClassEarly = false;
|
||||||
fhold; fgoto charClassGuts;
|
fhold; fgoto charClassGuts;
|
||||||
} else {
|
} else {
|
||||||
currentCls->negate();
|
currentCls->negate();
|
||||||
|
// Note: we cannot switch off inCharClassEarly here, as /[^]]/
|
||||||
|
// needs to use the right square bracket path below.
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
']' => {
|
# A right square bracket before anything "real" is interpreted as a
|
||||||
// if this is the first thing in the class, add it and move along,
|
# literal right square bracket.
|
||||||
// otherwise jump into the char class machine to handle what might
|
']' when is_early_charclass => {
|
||||||
// end up as fail
|
|
||||||
if (currentCls->class_empty()) {
|
|
||||||
currentCls->add(']');
|
currentCls->add(']');
|
||||||
} else {
|
inCharClassEarly = false;
|
||||||
// leave it for the next machine
|
|
||||||
fhold;
|
|
||||||
}
|
|
||||||
fgoto charClassGuts;
|
|
||||||
};
|
};
|
||||||
# if we hit a quote before anything "real", handle it
|
# if we hit a quote before anything "real", handle it
|
||||||
#'\\Q' => { fcall readQuotedClass; };
|
'\\Q' => { fcall readQuotedClass; };
|
||||||
'\\Q' => {
|
|
||||||
throw LocatedParseError("\\Q..\\E sequences in character classes not supported");
|
|
||||||
};
|
|
||||||
'\\E' => { /*noop*/};
|
'\\E' => { /*noop*/};
|
||||||
|
|
||||||
# time for the real work to happen
|
# time for the real work to happen
|
||||||
any => { fhold; fgoto charClassGuts; };
|
any => {
|
||||||
|
inCharClassEarly = false;
|
||||||
|
fhold;
|
||||||
|
fgoto charClassGuts;
|
||||||
|
};
|
||||||
*|;
|
*|;
|
||||||
|
|
||||||
#############################################################
|
#############################################################
|
||||||
@ -1183,6 +1163,7 @@ unichar readUtf8CodePoint4c(const u8 *ts) {
|
|||||||
# Literal character
|
# Literal character
|
||||||
any => {
|
any => {
|
||||||
currentCls->add(*ts);
|
currentCls->add(*ts);
|
||||||
|
inCharClassEarly = false;
|
||||||
};
|
};
|
||||||
*|;
|
*|;
|
||||||
|
|
||||||
@ -1232,6 +1213,13 @@ unichar readUtf8CodePoint4c(const u8 *ts) {
|
|||||||
throw LocatedParseError("POSIX named classes are only "
|
throw LocatedParseError("POSIX named classes are only "
|
||||||
"supported inside a class");
|
"supported inside a class");
|
||||||
};
|
};
|
||||||
|
# We don't support POSIX collating elements (neither does PCRE
|
||||||
|
# or Perl). These look like [.ch.] or [=ch=].
|
||||||
|
'\[\.' ( '\\]' | [^\]] )* '\.\]' |
|
||||||
|
'\[=' ( '\\]' | [^\]] )* '=\]' => {
|
||||||
|
throw LocatedParseError("Unsupported POSIX collating "
|
||||||
|
"element");
|
||||||
|
};
|
||||||
# Begin eating characters for class
|
# Begin eating characters for class
|
||||||
'\[' => eatClass;
|
'\[' => eatClass;
|
||||||
# Begin quoted literal
|
# Begin quoted literal
|
||||||
@ -1896,6 +1884,11 @@ unique_ptr<Component> parse(const char *const c_ptr, ParseMode &globalMode) {
|
|||||||
// brackets [..].
|
// brackets [..].
|
||||||
bool inCharClass = false;
|
bool inCharClass = false;
|
||||||
|
|
||||||
|
// True if the machine is inside a character class but it has not processed
|
||||||
|
// any "real" elements yet, i.e. it's still processing meta-characters like
|
||||||
|
// '^'.
|
||||||
|
bool inCharClassEarly = false;
|
||||||
|
|
||||||
// Location at which the current character class began.
|
// Location at which the current character class began.
|
||||||
const u8 *currentClsBegin = p;
|
const u8 *currentClsBegin = p;
|
||||||
|
|
||||||
|
@ -75,6 +75,10 @@ PredefinedClass translateForUcpMode(PredefinedClass in, const ParseMode &mode) {
|
|||||||
} else {
|
} else {
|
||||||
return CLASS_UCP_LL;
|
return CLASS_UCP_LL;
|
||||||
}
|
}
|
||||||
|
case CLASS_PRINT:
|
||||||
|
return CLASS_XPRINT;
|
||||||
|
case CLASS_PUNCT:
|
||||||
|
return CLASS_XPUNCT;
|
||||||
case CLASS_SPACE:
|
case CLASS_SPACE:
|
||||||
return CLASS_UCP_XPS;
|
return CLASS_UCP_XPS;
|
||||||
case CLASS_UPPER:
|
case CLASS_UPPER:
|
||||||
@ -90,7 +94,6 @@ PredefinedClass translateForUcpMode(PredefinedClass in, const ParseMode &mode) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
|
||||||
CodePointSet getPredefinedCodePointSet(PredefinedClass c,
|
CodePointSet getPredefinedCodePointSet(PredefinedClass c,
|
||||||
const ParseMode &mode) {
|
const ParseMode &mode) {
|
||||||
/* TODO: support properly PCRE_UCP mode and non PCRE_UCP mode */
|
/* TODO: support properly PCRE_UCP mode and non PCRE_UCP mode */
|
||||||
@ -117,6 +120,22 @@ CodePointSet getPredefinedCodePointSet(PredefinedClass c,
|
|||||||
rv |= cf;
|
rv |= cf;
|
||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
|
case CLASS_XPRINT: {
|
||||||
|
// Same as graph, plus everything with the Zs property.
|
||||||
|
CodePointSet rv = getPredefinedCodePointSet(CLASS_XGRAPH, mode);
|
||||||
|
rv |= getUcpZs();
|
||||||
|
rv.set(0x180e); // Also included in this class by PCRE 8.38.
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
case CLASS_XPUNCT: {
|
||||||
|
// Everything with the P (punctuation) property, plus code points in S
|
||||||
|
// (symbols) that are < 128.
|
||||||
|
CodePointSet rv = getUcpP();
|
||||||
|
CodePointSet symbols = getUcpS();
|
||||||
|
symbols.unsetRange(128, MAX_UNICODE);
|
||||||
|
rv |= symbols;
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
case CLASS_HORZ: {
|
case CLASS_HORZ: {
|
||||||
CodePointSet rv;
|
CodePointSet rv;
|
||||||
rv.set(0x0009); /* Horizontal tab */
|
rv.set(0x0009); /* Horizontal tab */
|
||||||
@ -484,7 +503,8 @@ UTF8ComponentClass *UTF8ComponentClass::clone() const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool UTF8ComponentClass::class_empty(void) const {
|
bool UTF8ComponentClass::class_empty(void) const {
|
||||||
return cps.none() && cps_ucp.none();
|
assert(finalized);
|
||||||
|
return cps.none();
|
||||||
}
|
}
|
||||||
|
|
||||||
void UTF8ComponentClass::createRange(unichar to) {
|
void UTF8ComponentClass::createRange(unichar to) {
|
||||||
@ -492,7 +512,8 @@ void UTF8ComponentClass::createRange(unichar to) {
|
|||||||
unichar from = range_start;
|
unichar from = range_start;
|
||||||
if (from > to) {
|
if (from > to) {
|
||||||
throw LocatedParseError("Range out of order in character class");
|
throw LocatedParseError("Range out of order in character class");
|
||||||
} else {
|
}
|
||||||
|
|
||||||
in_cand_range = false;
|
in_cand_range = false;
|
||||||
CodePointSet ncps;
|
CodePointSet ncps;
|
||||||
ncps.setRange(from, to);
|
ncps.setRange(from, to);
|
||||||
@ -502,7 +523,6 @@ void UTF8ComponentClass::createRange(unichar to) {
|
|||||||
cps |= ncps;
|
cps |= ncps;
|
||||||
range_start = INVALID_UNICODE;
|
range_start = INVALID_UNICODE;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
void UTF8ComponentClass::add(PredefinedClass c, bool negative) {
|
void UTF8ComponentClass::add(PredefinedClass c, bool negative) {
|
||||||
if (in_cand_range) { // can't form a range here
|
if (in_cand_range) { // can't form a range here
|
||||||
@ -520,11 +540,7 @@ void UTF8ComponentClass::add(PredefinedClass c, bool negative) {
|
|||||||
pcps.flip();
|
pcps.flip();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isUcp(c)) {
|
|
||||||
cps_ucp |= pcps;
|
|
||||||
} else {
|
|
||||||
cps |= pcps;
|
cps |= pcps;
|
||||||
}
|
|
||||||
|
|
||||||
range_start = INVALID_UNICODE;
|
range_start = INVALID_UNICODE;
|
||||||
in_cand_range = false;
|
in_cand_range = false;
|
||||||
@ -562,8 +578,6 @@ void UTF8ComponentClass::finalize() {
|
|||||||
in_cand_range = false;
|
in_cand_range = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
cps |= cps_ucp; /* characters from ucp props always case sensitive */
|
|
||||||
|
|
||||||
if (m_negate) {
|
if (m_negate) {
|
||||||
cps.flip();
|
cps.flip();
|
||||||
}
|
}
|
||||||
@ -571,31 +585,6 @@ void UTF8ComponentClass::finalize() {
|
|||||||
finalized = true;
|
finalized = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isUcp(PredefinedClass c) {
|
|
||||||
switch (c) {
|
|
||||||
case CLASS_ALNUM:
|
|
||||||
case CLASS_ALPHA:
|
|
||||||
case CLASS_ANY:
|
|
||||||
case CLASS_ASCII:
|
|
||||||
case CLASS_BLANK:
|
|
||||||
case CLASS_CNTRL:
|
|
||||||
case CLASS_DIGIT:
|
|
||||||
case CLASS_GRAPH:
|
|
||||||
case CLASS_HORZ:
|
|
||||||
case CLASS_LOWER:
|
|
||||||
case CLASS_PRINT:
|
|
||||||
case CLASS_PUNCT:
|
|
||||||
case CLASS_SPACE:
|
|
||||||
case CLASS_UPPER:
|
|
||||||
case CLASS_VERT:
|
|
||||||
case CLASS_WORD:
|
|
||||||
case CLASS_XDIGIT:
|
|
||||||
return false;
|
|
||||||
default:
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Position UTF8ComponentClass::getHead(NFABuilder &builder, u8 first_byte) {
|
Position UTF8ComponentClass::getHead(NFABuilder &builder, u8 first_byte) {
|
||||||
map<u8, Position>::const_iterator it = heads.find(first_byte);
|
map<u8, Position>::const_iterator it = heads.find(first_byte);
|
||||||
if (it != heads.end()) {
|
if (it != heads.end()) {
|
||||||
|
@ -93,7 +93,6 @@ private:
|
|||||||
void buildFourByte(GlushkovBuildState &bs);
|
void buildFourByte(GlushkovBuildState &bs);
|
||||||
|
|
||||||
CodePointSet cps;
|
CodePointSet cps;
|
||||||
CodePointSet cps_ucp;
|
|
||||||
|
|
||||||
std::map<u8, Position> heads;
|
std::map<u8, Position> heads;
|
||||||
Position single_pos;
|
Position single_pos;
|
||||||
@ -108,7 +107,9 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
PredefinedClass translateForUcpMode(PredefinedClass in, const ParseMode &mode);
|
PredefinedClass translateForUcpMode(PredefinedClass in, const ParseMode &mode);
|
||||||
bool isUcp(PredefinedClass c);
|
|
||||||
|
CodePointSet getPredefinedCodePointSet(PredefinedClass c,
|
||||||
|
const ParseMode &mode);
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
|
@ -57,7 +57,7 @@ public:
|
|||||||
ReferenceVisitor(size_t num_groups, const flat_set<string> &targets)
|
ReferenceVisitor(size_t num_groups, const flat_set<string> &targets)
|
||||||
: num_ids(num_groups), names(targets) {}
|
: num_ids(num_groups), names(targets) {}
|
||||||
|
|
||||||
~ReferenceVisitor();
|
~ReferenceVisitor() override;
|
||||||
|
|
||||||
void invalid_index(const char *component, unsigned id) {
|
void invalid_index(const char *component, unsigned id) {
|
||||||
assert(component);
|
assert(component);
|
||||||
|
@ -201,7 +201,7 @@ const ComponentSequence *findCapturingGroup(const Component *root,
|
|||||||
class PrefilterVisitor : public DefaultComponentVisitor {
|
class PrefilterVisitor : public DefaultComponentVisitor {
|
||||||
public:
|
public:
|
||||||
PrefilterVisitor(Component *c, const ParseMode &m) : root(c), mode(m) {}
|
PrefilterVisitor(Component *c, const ParseMode &m) : root(c), mode(m) {}
|
||||||
~PrefilterVisitor();
|
~PrefilterVisitor() override;
|
||||||
|
|
||||||
/** \brief Calls the visitor (recursively) on a new replacement component
|
/** \brief Calls the visitor (recursively) on a new replacement component
|
||||||
* we've just created. Takes care of freeing it if the sequence is itself
|
* we've just created. Takes care of freeing it if the sequence is itself
|
||||||
|
@ -64,7 +64,7 @@ namespace ue2 {
|
|||||||
*/
|
*/
|
||||||
class ConstructLiteralVisitor : public ConstComponentVisitor {
|
class ConstructLiteralVisitor : public ConstComponentVisitor {
|
||||||
public:
|
public:
|
||||||
~ConstructLiteralVisitor();
|
~ConstructLiteralVisitor() override;
|
||||||
|
|
||||||
/** \brief Thrown if this component does not represent a literal. */
|
/** \brief Thrown if this component does not represent a literal. */
|
||||||
struct NotLiteral {};
|
struct NotLiteral {};
|
||||||
|
@ -44,7 +44,7 @@ namespace ue2 {
|
|||||||
* an unsupported component. */
|
* an unsupported component. */
|
||||||
class UnsupportedVisitor : public DefaultConstComponentVisitor {
|
class UnsupportedVisitor : public DefaultConstComponentVisitor {
|
||||||
public:
|
public:
|
||||||
~UnsupportedVisitor();
|
~UnsupportedVisitor() override;
|
||||||
void pre(const ComponentAssertion &) override {
|
void pre(const ComponentAssertion &) override {
|
||||||
throw ParseError("Zero-width assertions are not supported.");
|
throw ParseError("Zero-width assertions are not supported.");
|
||||||
}
|
}
|
||||||
|
@ -379,7 +379,7 @@ void ensureEnd(struct mq *q, UNUSED u32 qi, s64a final_loc) {
|
|||||||
DEBUG_PRINTF("ensure MQE_END %lld for queue %u\n", final_loc, qi);
|
DEBUG_PRINTF("ensure MQE_END %lld for queue %u\n", final_loc, qi);
|
||||||
if (final_loc >= q_last_loc(q)) {
|
if (final_loc >= q_last_loc(q)) {
|
||||||
/* TODO: ensure situation does not arise */
|
/* TODO: ensure situation does not arise */
|
||||||
assert(q->items[q->end - 1].type != MQE_END);
|
assert(q_last_type(q) != MQE_END);
|
||||||
pushQueueNoMerge(q, MQE_END, final_loc);
|
pushQueueNoMerge(q, MQE_END, final_loc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -758,7 +758,7 @@ found_miracle:
|
|||||||
|
|
||||||
q_skip_forward_to(q, miracle_loc);
|
q_skip_forward_to(q, miracle_loc);
|
||||||
|
|
||||||
if (q->items[q->end - 1].type == MQE_START) {
|
if (q_last_type(q) == MQE_START) {
|
||||||
DEBUG_PRINTF("miracle caused infix to die\n");
|
DEBUG_PRINTF("miracle caused infix to die\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -853,7 +853,7 @@ char roseTestLeftfix(const struct RoseEngine *t, const struct RoseRole *tr,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (q_cur_loc(q) < loc || q->items[q->end - 1].type != MQE_START) {
|
if (q_cur_loc(q) < loc || q_last_type(q) != MQE_START) {
|
||||||
if (left->infix) {
|
if (left->infix) {
|
||||||
if (infixTooOld(q, loc)) {
|
if (infixTooOld(q, loc)) {
|
||||||
DEBUG_PRINTF("infix %u died of old age\n", ri);
|
DEBUG_PRINTF("infix %u died of old age\n", ri);
|
||||||
|
@ -42,6 +42,7 @@
|
|||||||
#include "rose_in_graph.h"
|
#include "rose_in_graph.h"
|
||||||
#include "util/alloc.h"
|
#include "util/alloc.h"
|
||||||
#include "util/charreach.h"
|
#include "util/charreach.h"
|
||||||
|
#include "util/ue2_containers.h"
|
||||||
#include "util/ue2string.h"
|
#include "util/ue2string.h"
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
@ -72,8 +73,8 @@ public:
|
|||||||
|
|
||||||
/** \brief True if we can not establish that at most a single callback will
|
/** \brief True if we can not establish that at most a single callback will
|
||||||
* be generated at a given offset from this set of reports. */
|
* be generated at a given offset from this set of reports. */
|
||||||
virtual bool requiresDedupeSupport(const std::set<ReportID> &reports) const
|
virtual bool requiresDedupeSupport(const ue2::flat_set<ReportID> &reports)
|
||||||
= 0;
|
const = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
/** \brief Abstract interface intended for callers from elsewhere in the tree,
|
/** \brief Abstract interface intended for callers from elsewhere in the tree,
|
||||||
|
@ -271,16 +271,13 @@ public:
|
|||||||
typedef Holder_StateSet StateSet;
|
typedef Holder_StateSet StateSet;
|
||||||
typedef ue2::unordered_map<StateSet, dstate_id_t> StateMap;
|
typedef ue2::unordered_map<StateSet, dstate_id_t> StateMap;
|
||||||
|
|
||||||
explicit Automaton_Holder(const NGHolder &g_in) : g(g_in), bad(false) {
|
explicit Automaton_Holder(const NGHolder &g_in) : g(g_in) {
|
||||||
for (auto v : vertices_range(g)) {
|
for (auto v : vertices_range(g)) {
|
||||||
vertexToIndex[v] = indexToVertex.size();
|
vertexToIndex[v] = indexToVertex.size();
|
||||||
indexToVertex.push_back(v);
|
indexToVertex.push_back(v);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (indexToVertex.size() > ANCHORED_NFA_STATE_LIMIT) {
|
assert(indexToVertex.size() <= ANCHORED_NFA_STATE_LIMIT);
|
||||||
bad = true;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEBUG_PRINTF("%zu states\n", indexToVertex.size());
|
DEBUG_PRINTF("%zu states\n", indexToVertex.size());
|
||||||
init.wdelay = 0;
|
init.wdelay = 0;
|
||||||
@ -400,7 +397,6 @@ public:
|
|||||||
array<u16, ALPHABET_SIZE> alpha;
|
array<u16, ALPHABET_SIZE> alpha;
|
||||||
array<u16, ALPHABET_SIZE> unalpha;
|
array<u16, ALPHABET_SIZE> unalpha;
|
||||||
u16 alphasize;
|
u16 alphasize;
|
||||||
bool bad;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
@ -670,13 +666,13 @@ int finalise_out(RoseBuildImpl &tbi, const NGHolder &h,
|
|||||||
|
|
||||||
static
|
static
|
||||||
int addAutomaton(RoseBuildImpl &tbi, const NGHolder &h, ReportID *remap) {
|
int addAutomaton(RoseBuildImpl &tbi, const NGHolder &h, ReportID *remap) {
|
||||||
Automaton_Holder autom(h);
|
if (num_vertices(h) > ANCHORED_NFA_STATE_LIMIT) {
|
||||||
|
|
||||||
if (autom.bad) {
|
|
||||||
DEBUG_PRINTF("autom bad!\n");
|
DEBUG_PRINTF("autom bad!\n");
|
||||||
return ANCHORED_FAIL;
|
return ANCHORED_FAIL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Automaton_Holder autom(h);
|
||||||
|
|
||||||
unique_ptr<raw_dfa> out_dfa = ue2::make_unique<raw_dfa>(NFA_OUTFIX);
|
unique_ptr<raw_dfa> out_dfa = ue2::make_unique<raw_dfa>(NFA_OUTFIX);
|
||||||
if (!determinise(autom, out_dfa->states, MAX_DFA_STATES)) {
|
if (!determinise(autom, out_dfa->states, MAX_DFA_STATES)) {
|
||||||
return finalise_out(tbi, h, autom, move(out_dfa), remap);
|
return finalise_out(tbi, h, autom, move(out_dfa), remap);
|
||||||
@ -738,7 +734,6 @@ void buildSimpleDfas(const RoseBuildImpl &tbi,
|
|||||||
NGHolder h;
|
NGHolder h;
|
||||||
populate_holder(simple.first, exit_ids, &h);
|
populate_holder(simple.first, exit_ids, &h);
|
||||||
Automaton_Holder autom(h);
|
Automaton_Holder autom(h);
|
||||||
assert(!autom.bad);
|
|
||||||
unique_ptr<raw_dfa> rdfa = ue2::make_unique<raw_dfa>(NFA_OUTFIX);
|
unique_ptr<raw_dfa> rdfa = ue2::make_unique<raw_dfa>(NFA_OUTFIX);
|
||||||
UNUSED int rv = determinise(autom, rdfa->states, MAX_DFA_STATES);
|
UNUSED int rv = determinise(autom, rdfa->states, MAX_DFA_STATES);
|
||||||
assert(!rv);
|
assert(!rv);
|
||||||
|
@ -2687,12 +2687,6 @@ void fillInReportInfo(RoseEngine *engine, u32 reportOffset,
|
|||||||
sizeof(internal_report));
|
sizeof(internal_report));
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
|
||||||
void populateInvDkeyTable(char *ptr, const ReportManager &rm) {
|
|
||||||
vector<ReportID> table = rm.getDkeyToReportTable();
|
|
||||||
memcpy(ptr, table.data(), byte_length(table));
|
|
||||||
}
|
|
||||||
|
|
||||||
static
|
static
|
||||||
bool hasSimpleReports(const vector<Report> &reports) {
|
bool hasSimpleReports(const vector<Report> &reports) {
|
||||||
auto it = find_if(reports.begin(), reports.end(), isComplexReport);
|
auto it = find_if(reports.begin(), reports.end(), isComplexReport);
|
||||||
@ -4154,7 +4148,7 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
|
|||||||
engine->ekeyCount = rm.numEkeys();
|
engine->ekeyCount = rm.numEkeys();
|
||||||
engine->dkeyCount = rm.numDkeys();
|
engine->dkeyCount = rm.numDkeys();
|
||||||
engine->invDkeyOffset = dkeyOffset;
|
engine->invDkeyOffset = dkeyOffset;
|
||||||
populateInvDkeyTable(ptr + dkeyOffset, rm);
|
copy_bytes(ptr + dkeyOffset, rm.getDkeyToReportTable());
|
||||||
|
|
||||||
engine->somHorizon = ssm.somPrecision();
|
engine->somHorizon = ssm.somPrecision();
|
||||||
engine->somLocationCount = ssm.numSomSlots();
|
engine->somLocationCount = ssm.numSomSlots();
|
||||||
@ -4314,33 +4308,22 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
|
|||||||
buildLitBenefits(*this, engine.get(), base_lits_benefits_offset);
|
buildLitBenefits(*this, engine.get(), base_lits_benefits_offset);
|
||||||
|
|
||||||
// Copy in other tables
|
// Copy in other tables
|
||||||
memcpy(ptr + bc.engine_blob_base, bc.engine_blob.data(),
|
copy_bytes(ptr + bc.engine_blob_base, bc.engine_blob);
|
||||||
byte_length(bc.engine_blob));
|
copy_bytes(ptr + engine->literalOffset, literalTable);
|
||||||
|
copy_bytes(ptr + engine->roleOffset, bc.roleTable);
|
||||||
memcpy(ptr + engine->literalOffset, literalTable.data(),
|
copy_bytes(ptr + engine->leftOffset, leftInfoTable);
|
||||||
byte_length(literalTable));
|
|
||||||
memcpy(ptr + engine->roleOffset, bc.roleTable.data(),
|
|
||||||
byte_length(bc.roleTable));
|
|
||||||
copy(leftInfoTable.begin(), leftInfoTable.end(),
|
|
||||||
(LeftNfaInfo *)(ptr + engine->leftOffset));
|
|
||||||
|
|
||||||
fillLookaroundTables(ptr + lookaroundTableOffset,
|
fillLookaroundTables(ptr + lookaroundTableOffset,
|
||||||
ptr + lookaroundReachOffset, bc.lookaround);
|
ptr + lookaroundReachOffset, bc.lookaround);
|
||||||
|
|
||||||
fillInSomRevNfas(engine.get(), ssm, rev_nfa_table_offset, rev_nfa_offsets);
|
fillInSomRevNfas(engine.get(), ssm, rev_nfa_table_offset, rev_nfa_offsets);
|
||||||
memcpy(ptr + engine->predOffset, predTable.data(), byte_length(predTable));
|
copy_bytes(ptr + engine->predOffset, predTable);
|
||||||
memcpy(ptr + engine->rootRoleOffset, rootRoleTable.data(),
|
copy_bytes(ptr + engine->rootRoleOffset, rootRoleTable);
|
||||||
byte_length(rootRoleTable));
|
copy_bytes(ptr + engine->anchoredReportMapOffset, art);
|
||||||
memcpy(ptr + engine->anchoredReportMapOffset, art.data(), byte_length(art));
|
copy_bytes(ptr + engine->anchoredReportInverseMapOffset, arit);
|
||||||
memcpy(ptr + engine->anchoredReportInverseMapOffset, arit.data(),
|
copy_bytes(ptr + engine->multidirectOffset, mdr_reports);
|
||||||
byte_length(arit));
|
copy_bytes(ptr + engine->activeLeftIterOffset, activeLeftIter);
|
||||||
memcpy(ptr + engine->multidirectOffset, mdr_reports.data(),
|
copy_bytes(ptr + engine->sideOffset, sideTable);
|
||||||
byte_length(mdr_reports));
|
|
||||||
|
|
||||||
copy(activeLeftIter.begin(), activeLeftIter.end(),
|
|
||||||
(mmbit_sparse_iter *)(ptr + engine->activeLeftIterOffset));
|
|
||||||
|
|
||||||
memcpy(ptr + engine->sideOffset, sideTable.data(), byte_length(sideTable));
|
|
||||||
|
|
||||||
DEBUG_PRINTF("rose done %p\n", engine.get());
|
DEBUG_PRINTF("rose done %p\n", engine.get());
|
||||||
return engine;
|
return engine;
|
||||||
|
@ -1631,20 +1631,23 @@ bool triggerKillsRoseGraph(const RoseBuildImpl &tbi, const left_id &left,
|
|||||||
assert(left.graph());
|
assert(left.graph());
|
||||||
const NGHolder &h = *left.graph();
|
const NGHolder &h = *left.graph();
|
||||||
|
|
||||||
|
ue2::flat_set<NFAVertex> all_states;
|
||||||
|
insert(&all_states, vertices(h));
|
||||||
|
assert(out_degree(h.startDs, h) == 1); /* triggered don't use sds */
|
||||||
|
DEBUG_PRINTF("removing sds\n");
|
||||||
|
all_states.erase(h.startDs);
|
||||||
|
|
||||||
|
ue2::flat_set<NFAVertex> states;
|
||||||
|
|
||||||
/* check each pred literal to see if they all kill previous graph
|
/* check each pred literal to see if they all kill previous graph
|
||||||
* state */
|
* state */
|
||||||
for (u32 lit_id : tbi.g[source(e, tbi.g)].literals) {
|
for (u32 lit_id : tbi.g[source(e, tbi.g)].literals) {
|
||||||
const rose_literal_id &pred_lit = tbi.literals.right.at(lit_id);
|
const rose_literal_id &pred_lit = tbi.literals.right.at(lit_id);
|
||||||
const ue2_literal s = findNonOverlappingTail(all_lits, pred_lit.s);
|
const ue2_literal s = findNonOverlappingTail(all_lits, pred_lit.s);
|
||||||
|
|
||||||
set<NFAVertex> states;
|
|
||||||
insert(&states, vertices(h));
|
|
||||||
assert(out_degree(h.startDs, h) == 1); /* triggered don't use sds */
|
|
||||||
DEBUG_PRINTF("removing sds\n");
|
|
||||||
states.erase(h.startDs);
|
|
||||||
DEBUG_PRINTF("running graph %zu\n", states.size());
|
DEBUG_PRINTF("running graph %zu\n", states.size());
|
||||||
execute_graph(h, s, &states, true);
|
states = execute_graph(h, s, all_states, true);
|
||||||
DEBUG_PRINTF("ran\n");
|
DEBUG_PRINTF("ran, %zu states on\n", states.size());
|
||||||
|
|
||||||
if (!states.empty()) {
|
if (!states.empty()) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -130,6 +130,8 @@ private:
|
|||||||
|
|
||||||
friend depth findMinWidth(const suffix_id &s);
|
friend depth findMinWidth(const suffix_id &s);
|
||||||
friend depth findMaxWidth(const suffix_id &s);
|
friend depth findMaxWidth(const suffix_id &s);
|
||||||
|
friend depth findMinWidth(const suffix_id &s, u32 top);
|
||||||
|
friend depth findMaxWidth(const suffix_id &s, u32 top);
|
||||||
};
|
};
|
||||||
|
|
||||||
std::set<ReportID> all_reports(const suffix_id &s);
|
std::set<ReportID> all_reports(const suffix_id &s);
|
||||||
@ -138,6 +140,8 @@ bool has_eod_accepts(const suffix_id &s);
|
|||||||
bool has_non_eod_accepts(const suffix_id &s);
|
bool has_non_eod_accepts(const suffix_id &s);
|
||||||
depth findMinWidth(const suffix_id &s);
|
depth findMinWidth(const suffix_id &s);
|
||||||
depth findMaxWidth(const suffix_id &s);
|
depth findMaxWidth(const suffix_id &s);
|
||||||
|
depth findMinWidth(const suffix_id &s, u32 top);
|
||||||
|
depth findMaxWidth(const suffix_id &s, u32 top);
|
||||||
size_t hash_value(const suffix_id &s);
|
size_t hash_value(const suffix_id &s);
|
||||||
|
|
||||||
/** \brief represents an engine to the left of a rose role */
|
/** \brief represents an engine to the left of a rose role */
|
||||||
|
@ -77,6 +77,8 @@ RoseBuildImpl::RoseBuildImpl(ReportManager &rm_in, SomSlotManager &ssm_in,
|
|||||||
hasSom(false),
|
hasSom(false),
|
||||||
group_weak_end(0),
|
group_weak_end(0),
|
||||||
group_end(0),
|
group_end(0),
|
||||||
|
anchored_base_id(MO_INVALID_IDX),
|
||||||
|
nonbenefits_base_id(MO_INVALID_IDX),
|
||||||
ematcher_region_size(0),
|
ematcher_region_size(0),
|
||||||
floating_direct_report(false),
|
floating_direct_report(false),
|
||||||
eod_event_literal_id(MO_INVALID_IDX),
|
eod_event_literal_id(MO_INVALID_IDX),
|
||||||
@ -536,7 +538,7 @@ u32 RoseBuildImpl::getNewLiteralId() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
bool requiresDedupe(const NGHolder &h, const set<ReportID> &reports,
|
bool requiresDedupe(const NGHolder &h, const ue2::flat_set<ReportID> &reports,
|
||||||
const Grey &grey) {
|
const Grey &grey) {
|
||||||
/* TODO: tighten */
|
/* TODO: tighten */
|
||||||
NFAVertex seen_vert = NFAGraph::null_vertex();
|
NFAVertex seen_vert = NFAGraph::null_vertex();
|
||||||
@ -579,7 +581,8 @@ bool requiresDedupe(const NGHolder &h, const set<ReportID> &reports,
|
|||||||
class RoseDedupeAuxImpl : public RoseDedupeAux {
|
class RoseDedupeAuxImpl : public RoseDedupeAux {
|
||||||
public:
|
public:
|
||||||
explicit RoseDedupeAuxImpl(const RoseBuildImpl &tbi_in);
|
explicit RoseDedupeAuxImpl(const RoseBuildImpl &tbi_in);
|
||||||
bool requiresDedupeSupport(const set<ReportID> &reports) const override;
|
bool requiresDedupeSupport(
|
||||||
|
const ue2::flat_set<ReportID> &reports) const override;
|
||||||
|
|
||||||
const RoseBuildImpl &tbi;
|
const RoseBuildImpl &tbi;
|
||||||
map<ReportID, set<RoseVertex>> vert_map;
|
map<ReportID, set<RoseVertex>> vert_map;
|
||||||
@ -599,6 +602,8 @@ RoseDedupeAuxImpl::RoseDedupeAuxImpl(const RoseBuildImpl &tbi_in)
|
|||||||
: tbi(tbi_in) {
|
: tbi(tbi_in) {
|
||||||
const RoseGraph &g = tbi.g;
|
const RoseGraph &g = tbi.g;
|
||||||
|
|
||||||
|
set<suffix_id> suffixes;
|
||||||
|
|
||||||
for (auto v : vertices_range(g)) {
|
for (auto v : vertices_range(g)) {
|
||||||
// Literals in the small block table don't count as dupes: although
|
// Literals in the small block table don't count as dupes: although
|
||||||
// they have copies in the anchored table, the two are never run in the
|
// they have copies in the anchored table, the two are never run in the
|
||||||
@ -609,11 +614,17 @@ RoseDedupeAuxImpl::RoseDedupeAuxImpl(const RoseBuildImpl &tbi_in)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Several vertices may share a suffix, so we collect the set of
|
||||||
|
// suffixes first to avoid repeating work.
|
||||||
if (g[v].suffix) {
|
if (g[v].suffix) {
|
||||||
for (const auto &report_id : all_reports(g[v].suffix)) {
|
suffixes.insert(g[v].suffix);
|
||||||
suffix_map[report_id].insert(g[v].suffix);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (const auto &suffix : suffixes) {
|
||||||
|
for (const auto &report_id : all_reports(suffix)) {
|
||||||
|
suffix_map[report_id].insert(suffix);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const auto &outfix : tbi.outfixes) {
|
for (const auto &outfix : tbi.outfixes) {
|
||||||
@ -634,8 +645,8 @@ RoseDedupeAuxImpl::RoseDedupeAuxImpl(const RoseBuildImpl &tbi_in)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RoseDedupeAuxImpl::requiresDedupeSupport(const set<ReportID> &reports)
|
bool RoseDedupeAuxImpl::requiresDedupeSupport(
|
||||||
const {
|
const ue2::flat_set<ReportID> &reports) const {
|
||||||
/* TODO: this could be expanded to check for offset or character
|
/* TODO: this could be expanded to check for offset or character
|
||||||
constraints */
|
constraints */
|
||||||
|
|
||||||
@ -897,6 +908,17 @@ depth findMinWidth(const suffix_id &s) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
depth findMinWidth(const suffix_id &s, u32 top) {
|
||||||
|
assert(s.graph() || s.castle() || s.haig() || s.dfa());
|
||||||
|
if (s.graph()) {
|
||||||
|
return findMinWidth(*s.graph(), top);
|
||||||
|
} else if (s.castle()) {
|
||||||
|
return findMinWidth(*s.castle(), top);
|
||||||
|
} else {
|
||||||
|
return s.dfa_min_width;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
depth findMaxWidth(const suffix_id &s) {
|
depth findMaxWidth(const suffix_id &s) {
|
||||||
assert(s.graph() || s.castle() || s.haig() || s.dfa());
|
assert(s.graph() || s.castle() || s.haig() || s.dfa());
|
||||||
if (s.graph()) {
|
if (s.graph()) {
|
||||||
@ -908,6 +930,17 @@ depth findMaxWidth(const suffix_id &s) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
depth findMaxWidth(const suffix_id &s, u32 top) {
|
||||||
|
assert(s.graph() || s.castle() || s.haig() || s.dfa());
|
||||||
|
if (s.graph()) {
|
||||||
|
return findMaxWidth(*s.graph(), top);
|
||||||
|
} else if (s.castle()) {
|
||||||
|
return findMaxWidth(*s.castle(), top);
|
||||||
|
} else {
|
||||||
|
return s.dfa_max_width;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool has_eod_accepts(const suffix_id &s) {
|
bool has_eod_accepts(const suffix_id &s) {
|
||||||
assert(s.graph() || s.castle() || s.haig() || s.dfa());
|
assert(s.graph() || s.castle() || s.haig() || s.dfa());
|
||||||
if (s.graph()) {
|
if (s.graph()) {
|
||||||
|
@ -439,12 +439,16 @@ size_t hashRightRoleProperties(RoseVertex v, const RoseGraph &g) {
|
|||||||
hash_combine(val, hash_range(begin(props.reports), end(props.reports)));
|
hash_combine(val, hash_range(begin(props.reports), end(props.reports)));
|
||||||
|
|
||||||
if (props.suffix) {
|
if (props.suffix) {
|
||||||
hash_combine(val, all_reports(props.suffix));
|
const auto &suffix = props.suffix;
|
||||||
if (props.suffix.graph) {
|
if (suffix.castle) {
|
||||||
hash_combine(val, num_vertices(*props.suffix.graph));
|
hash_combine(val, suffix.castle->reach());
|
||||||
|
hash_combine(val, suffix.castle->repeats.size());
|
||||||
}
|
}
|
||||||
if (props.suffix.haig) {
|
if (suffix.graph) {
|
||||||
hash_combine(val, hash_dfa(*props.suffix.haig));
|
hash_combine(val, num_vertices(*suffix.graph));
|
||||||
|
}
|
||||||
|
if (suffix.haig) {
|
||||||
|
hash_combine(val, hash_dfa(*suffix.haig));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -747,14 +751,17 @@ void pruneReportIfUnused(const RoseBuildImpl &tbi, shared_ptr<NGHolder> h,
|
|||||||
* Castle. */
|
* Castle. */
|
||||||
static
|
static
|
||||||
void pruneCastle(CastleProto &castle, ReportID report) {
|
void pruneCastle(CastleProto &castle, ReportID report) {
|
||||||
for (map<u32, PureRepeat>::iterator it = castle.repeats.begin();
|
unordered_set<u32> dead; // tops to remove.
|
||||||
it != castle.repeats.end(); /* incr inside */) {
|
for (const auto &m : castle.repeats) {
|
||||||
if (contains(it->second.reports, report)) {
|
if (!contains(m.second.reports, report)) {
|
||||||
++it;
|
dead.insert(m.first);
|
||||||
} else {
|
|
||||||
castle.repeats.erase(it++);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (const auto &top : dead) {
|
||||||
|
castle.erase(top);
|
||||||
|
}
|
||||||
|
|
||||||
assert(!castle.repeats.empty());
|
assert(!castle.repeats.empty());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -794,7 +801,7 @@ void pruneUnusedTops(CastleProto &castle, const RoseGraph &g,
|
|||||||
for (u32 top : assoc_keys(castle.repeats)) {
|
for (u32 top : assoc_keys(castle.repeats)) {
|
||||||
if (!contains(used_tops, top)) {
|
if (!contains(used_tops, top)) {
|
||||||
DEBUG_PRINTF("removing unused top %u\n", top);
|
DEBUG_PRINTF("removing unused top %u\n", top);
|
||||||
castle.repeats.erase(top);
|
castle.erase(top);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -94,10 +94,11 @@ u32 findMinWidth(const RoseBuildImpl &tbi, enum rose_literal_table table) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (g[v].suffix) {
|
if (g[v].suffix) {
|
||||||
depth suffix_width = findMinWidth(g[v].suffix);
|
depth suffix_width = findMinWidth(g[v].suffix, g[v].suffix.top);
|
||||||
assert(suffix_width.is_reachable());
|
assert(suffix_width.is_reachable());
|
||||||
DEBUG_PRINTF("%zu has suffix (width %s), can fire report at %u\n",
|
DEBUG_PRINTF("%zu has suffix with top %u (width %s), can fire "
|
||||||
g[v].idx, suffix_width.str().c_str(),
|
"report at %u\n",
|
||||||
|
g[v].idx, g[v].suffix.top, suffix_width.str().c_str(),
|
||||||
w + suffix_width);
|
w + suffix_width);
|
||||||
minWidth = min(minWidth, w + suffix_width);
|
minWidth = min(minWidth, w + suffix_width);
|
||||||
}
|
}
|
||||||
@ -146,8 +147,9 @@ u32 findMaxBAWidth(const RoseBuildImpl &tbi) {
|
|||||||
if (has_non_eod_accepts(g[v].suffix)) {
|
if (has_non_eod_accepts(g[v].suffix)) {
|
||||||
return ROSE_BOUND_INF;
|
return ROSE_BOUND_INF;
|
||||||
}
|
}
|
||||||
depth suffix_width = findMaxWidth(g[v].suffix);
|
depth suffix_width = findMaxWidth(g[v].suffix, g[v].suffix.top);
|
||||||
DEBUG_PRINTF("suffix max width %s\n", suffix_width.str().c_str());
|
DEBUG_PRINTF("suffix max width for top %u is %s\n", g[v].suffix.top,
|
||||||
|
suffix_width.str().c_str());
|
||||||
assert(suffix_width.is_reachable());
|
assert(suffix_width.is_reachable());
|
||||||
if (!suffix_width.is_finite()) {
|
if (!suffix_width.is_finite()) {
|
||||||
DEBUG_PRINTF("suffix too wide\n");
|
DEBUG_PRINTF("suffix too wide\n");
|
||||||
|
@ -167,7 +167,7 @@ found_miracle:
|
|||||||
|
|
||||||
DEBUG_PRINTF("skip q forward, %lld to %lld\n", begin_loc, miracle_loc);
|
DEBUG_PRINTF("skip q forward, %lld to %lld\n", begin_loc, miracle_loc);
|
||||||
q_skip_forward_to(q, miracle_loc);
|
q_skip_forward_to(q, miracle_loc);
|
||||||
if (q->items[q->end - 1].type == MQE_START) {
|
if (q_last_type(q) == MQE_START) {
|
||||||
DEBUG_PRINTF("miracle caused infix to die\n");
|
DEBUG_PRINTF("miracle caused infix to die\n");
|
||||||
return MIRACLE_DEAD;
|
return MIRACLE_DEAD;
|
||||||
}
|
}
|
||||||
|
@ -98,18 +98,22 @@ u64a theirtoupper64(const u64a x) {
|
|||||||
static really_inline
|
static really_inline
|
||||||
int cmpNocaseNaive(const u8 *p1, const u8 *p2, size_t len) {
|
int cmpNocaseNaive(const u8 *p1, const u8 *p2, size_t len) {
|
||||||
const u8 *pEnd = (const u8 *)p1 + len;
|
const u8 *pEnd = (const u8 *)p1 + len;
|
||||||
for (; p1 < pEnd; p1++, p2++)
|
for (; p1 < pEnd; p1++, p2++) {
|
||||||
if (mytolower(*p1) != mytolower(*p2))
|
if (mytolower(*p1) != mytolower(*p2)) {
|
||||||
return 1;
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
int cmpCaseNaive(const u8 *p1, const u8 *p2, size_t len) {
|
int cmpCaseNaive(const u8 *p1, const u8 *p2, size_t len) {
|
||||||
const u8 *pEnd = (const u8 *)p1 + len;
|
const u8 *pEnd = (const u8 *)p1 + len;
|
||||||
for (; p1 < pEnd; p1++, p2++)
|
for (; p1 < pEnd; p1++, p2++) {
|
||||||
if (*p1 != *p2)
|
if (*p1 != *p2) {
|
||||||
return 1;
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -33,8 +33,13 @@
|
|||||||
#ifndef UTIL_CONTAINER_H
|
#ifndef UTIL_CONTAINER_H
|
||||||
#define UTIL_CONTAINER_H
|
#define UTIL_CONTAINER_H
|
||||||
|
|
||||||
|
#include "ue2common.h"
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <cassert>
|
||||||
|
#include <cstring>
|
||||||
#include <set>
|
#include <set>
|
||||||
|
#include <type_traits>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
|
||||||
namespace ue2 {
|
namespace ue2 {
|
||||||
@ -92,11 +97,35 @@ std::set<typename C::key_type> assoc_keys(const C &container) {
|
|||||||
return keys;
|
return keys;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Return the length in bytes of the given vector of (POD) objects.
|
||||||
|
*/
|
||||||
template<typename T>
|
template<typename T>
|
||||||
typename std::vector<T>::size_type byte_length(const std::vector<T> &vec) {
|
typename std::vector<T>::size_type byte_length(const std::vector<T> &vec) {
|
||||||
|
static_assert(std::is_pod<T>::value, "should be pod");
|
||||||
return vec.size() * sizeof(T);
|
return vec.size() * sizeof(T);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Copy the given vector of POD objects to the given location in memory.
|
||||||
|
* It is safe to give this function an empty vector.
|
||||||
|
*/
|
||||||
|
template<typename T>
|
||||||
|
void *copy_bytes(void *dest, const std::vector<T> &vec) {
|
||||||
|
static_assert(std::is_pod<T>::value, "should be pod");
|
||||||
|
assert(dest);
|
||||||
|
|
||||||
|
// Since we're generally using this function to write into the bytecode,
|
||||||
|
// dest should be appropriately aligned for T.
|
||||||
|
assert(ISALIGNED_N(dest, alignof(T)));
|
||||||
|
|
||||||
|
if (vec.empty()) {
|
||||||
|
return dest; // Protect memcpy against null pointers.
|
||||||
|
}
|
||||||
|
assert(vec.data() != nullptr);
|
||||||
|
return std::memcpy(dest, vec.data(), byte_length(vec));
|
||||||
|
}
|
||||||
|
|
||||||
template<typename OrderedContainer1, typename OrderedContainer2>
|
template<typename OrderedContainer1, typename OrderedContainer2>
|
||||||
bool is_subset_of(const OrderedContainer1 &small, const OrderedContainer2 &big) {
|
bool is_subset_of(const OrderedContainer1 &small, const OrderedContainer2 &big) {
|
||||||
static_assert(std::is_same<typename OrderedContainer1::value_type,
|
static_assert(std::is_same<typename OrderedContainer1::value_type,
|
||||||
|
@ -183,7 +183,7 @@ public:
|
|||||||
|
|
||||||
s64a rv = val + d;
|
s64a rv = val + d;
|
||||||
if (rv < 0 || (u64a)rv >= val_infinity) {
|
if (rv < 0 || (u64a)rv >= val_infinity) {
|
||||||
DEBUG_PRINTF("depth %llu too large to represent!\n", rv);
|
DEBUG_PRINTF("depth %lld too large to represent!\n", rv);
|
||||||
throw DepthOverflowError();
|
throw DepthOverflowError();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -142,23 +142,25 @@ const u32 mmbit_root_offset_from_level[7] = {
|
|||||||
u32 mmbit_size(u32 total_bits) {
|
u32 mmbit_size(u32 total_bits) {
|
||||||
MDEBUG_PRINTF("%u\n", total_bits);
|
MDEBUG_PRINTF("%u\n", total_bits);
|
||||||
|
|
||||||
// UE-2228: multibit has bugs in very, very large cases that we should be
|
|
||||||
// protected against at compile time by resource limits.
|
|
||||||
assert(total_bits <= 1U << 30);
|
|
||||||
|
|
||||||
// Flat model multibit structures are just stored as a bit vector.
|
// Flat model multibit structures are just stored as a bit vector.
|
||||||
if (total_bits <= MMB_FLAT_MAX_BITS) {
|
if (total_bits <= MMB_FLAT_MAX_BITS) {
|
||||||
return ROUNDUP_N(total_bits, 8) / 8;
|
return ROUNDUP_N(total_bits, 8) / 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 current_level = 1;
|
u64a current_level = 1; // Number of blocks on current level.
|
||||||
u32 total = 0;
|
u64a total = 0; // Total number of blocks.
|
||||||
while (current_level * MMB_KEY_BITS < total_bits) {
|
while (current_level * MMB_KEY_BITS < total_bits) {
|
||||||
total += current_level;
|
total += current_level;
|
||||||
current_level <<= MMB_KEY_SHIFT;
|
current_level <<= MMB_KEY_SHIFT;
|
||||||
}
|
}
|
||||||
total += (total_bits + MMB_KEY_BITS - 1)/MMB_KEY_BITS;
|
|
||||||
return sizeof(MMB_TYPE) * total;
|
// Last level is a one-for-one bit vector. It needs room for total_bits
|
||||||
|
// elements, rounded up to the nearest block.
|
||||||
|
u64a last_level = ((u64a)total_bits + MMB_KEY_BITS - 1) / MMB_KEY_BITS;
|
||||||
|
total += last_level;
|
||||||
|
|
||||||
|
assert(total * sizeof(MMB_TYPE) <= UINT32_MAX);
|
||||||
|
return (u32)(total * sizeof(MMB_TYPE));
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef DUMP_SUPPORT
|
#ifdef DUMP_SUPPORT
|
||||||
|
@ -235,18 +235,18 @@ const u8 *mmbit_get_level_root_const(const u8 *bits, u32 level) {
|
|||||||
/** \brief get the block for this key on the current level as a u8 ptr */
|
/** \brief get the block for this key on the current level as a u8 ptr */
|
||||||
static really_inline
|
static really_inline
|
||||||
u8 *mmbit_get_block_ptr(u8 *bits, u32 max_level, u32 level, u32 key) {
|
u8 *mmbit_get_block_ptr(u8 *bits, u32 max_level, u32 level, u32 key) {
|
||||||
return mmbit_get_level_root(bits, level) +
|
u8 *level_root = mmbit_get_level_root(bits, level);
|
||||||
(key >> (mmbit_get_ks(max_level, level) + MMB_KEY_SHIFT)) *
|
u32 ks = mmbit_get_ks(max_level, level);
|
||||||
sizeof(MMB_TYPE);
|
return level_root + ((u64a)key >> (ks + MMB_KEY_SHIFT)) * sizeof(MMB_TYPE);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** \brief get the block for this key on the current level as a const u8 ptr */
|
/** \brief get the block for this key on the current level as a const u8 ptr */
|
||||||
static really_inline
|
static really_inline
|
||||||
const u8 *mmbit_get_block_ptr_const(const u8 *bits, u32 max_level, u32 level,
|
const u8 *mmbit_get_block_ptr_const(const u8 *bits, u32 max_level, u32 level,
|
||||||
u32 key) {
|
u32 key) {
|
||||||
return mmbit_get_level_root_const(bits, level) +
|
const u8 *level_root = mmbit_get_level_root_const(bits, level);
|
||||||
(key >> (mmbit_get_ks(max_level, level) + MMB_KEY_SHIFT)) *
|
u32 ks = mmbit_get_ks(max_level, level);
|
||||||
sizeof(MMB_TYPE);
|
return level_root + ((u64a)key >> (ks + MMB_KEY_SHIFT)) * sizeof(MMB_TYPE);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** \brief get the _byte_ for this key on the current level as a u8 ptr */
|
/** \brief get the _byte_ for this key on the current level as a u8 ptr */
|
||||||
@ -254,7 +254,7 @@ static really_inline
|
|||||||
u8 *mmbit_get_byte_ptr(u8 *bits, u32 max_level, u32 level, u32 key) {
|
u8 *mmbit_get_byte_ptr(u8 *bits, u32 max_level, u32 level, u32 key) {
|
||||||
u8 *level_root = mmbit_get_level_root(bits, level);
|
u8 *level_root = mmbit_get_level_root(bits, level);
|
||||||
u32 ks = mmbit_get_ks(max_level, level);
|
u32 ks = mmbit_get_ks(max_level, level);
|
||||||
return level_root + (key >> (ks + MMB_KEY_SHIFT - 3));
|
return level_root + ((u64a)key >> (ks + MMB_KEY_SHIFT - 3));
|
||||||
}
|
}
|
||||||
|
|
||||||
/** \brief get our key value for the current level */
|
/** \brief get our key value for the current level */
|
||||||
@ -721,11 +721,11 @@ u32 mmbit_iterate_bounded_flat(const u8 *bits, u32 total_bits, u32 begin,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
MMB_TYPE get_lowhi_masks(u32 level, u32 max_level, u32 block_min, u32 block_max,
|
MMB_TYPE get_lowhi_masks(u32 level, u32 max_level, u64a block_min, u64a block_max,
|
||||||
u32 block_base) {
|
u64a block_base) {
|
||||||
const u32 level_shift = (max_level - level) * MMB_KEY_SHIFT;
|
const u32 level_shift = (max_level - level) * MMB_KEY_SHIFT;
|
||||||
u32 lshift = (block_min - block_base) >> level_shift;
|
u64a lshift = (block_min - block_base) >> level_shift;
|
||||||
u32 ushift = (block_max - block_base) >> level_shift;
|
u64a ushift = (block_max - block_base) >> level_shift;
|
||||||
MMB_TYPE lmask = lshift < 64 ? ~mmb_mask_zero_to_nocheck(lshift) : 0;
|
MMB_TYPE lmask = lshift < 64 ? ~mmb_mask_zero_to_nocheck(lshift) : 0;
|
||||||
MMB_TYPE umask =
|
MMB_TYPE umask =
|
||||||
ushift < 63 ? mmb_mask_zero_to_nocheck(ushift + 1) : MMB_ALL_ONES;
|
ushift < 63 ? mmb_mask_zero_to_nocheck(ushift + 1) : MMB_ALL_ONES;
|
||||||
@ -734,7 +734,7 @@ MMB_TYPE get_lowhi_masks(u32 level, u32 max_level, u32 block_min, u32 block_max,
|
|||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
u32 mmbit_iterate_bounded_big(const u8 *bits, u32 total_bits, u32 it_start, u32 it_end) {
|
u32 mmbit_iterate_bounded_big(const u8 *bits, u32 total_bits, u32 it_start, u32 it_end) {
|
||||||
u32 key = 0;
|
u64a key = 0;
|
||||||
u32 ks = mmbit_keyshift(total_bits);
|
u32 ks = mmbit_keyshift(total_bits);
|
||||||
const u32 max_level = mmbit_maxlevel_from_keyshift(ks);
|
const u32 max_level = mmbit_maxlevel_from_keyshift(ks);
|
||||||
u32 level = 0;
|
u32 level = 0;
|
||||||
@ -743,9 +743,9 @@ u32 mmbit_iterate_bounded_big(const u8 *bits, u32 total_bits, u32 it_start, u32
|
|||||||
assert(level <= max_level);
|
assert(level <= max_level);
|
||||||
|
|
||||||
u32 block_width = MMB_KEY_BITS << ks;
|
u32 block_width = MMB_KEY_BITS << ks;
|
||||||
u32 block_base = key*block_width;
|
u64a block_base = key * block_width;
|
||||||
u32 block_min = MAX(it_start, block_base);
|
u64a block_min = MAX(it_start, block_base);
|
||||||
u32 block_max = MIN(it_end, block_base + block_width - 1);
|
u64a block_max = MIN(it_end, block_base + block_width - 1);
|
||||||
const u8 *block_ptr =
|
const u8 *block_ptr =
|
||||||
mmbit_get_level_root_const(bits, level) + key * sizeof(MMB_TYPE);
|
mmbit_get_level_root_const(bits, level) + key * sizeof(MMB_TYPE);
|
||||||
MMB_TYPE block = mmb_load(block_ptr);
|
MMB_TYPE block = mmb_load(block_ptr);
|
||||||
@ -761,13 +761,14 @@ u32 mmbit_iterate_bounded_big(const u8 *bits, u32 total_bits, u32 it_start, u32
|
|||||||
// No bit found, go up a level
|
// No bit found, go up a level
|
||||||
// we know that this block didn't have any answers, so we can push
|
// we know that this block didn't have any answers, so we can push
|
||||||
// our start iterator forward.
|
// our start iterator forward.
|
||||||
it_start = block_base + block_width;
|
u64a next_start = block_base + block_width;
|
||||||
if (it_start > it_end) {
|
if (next_start > it_end) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (level-- == 0) {
|
if (level-- == 0) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
it_start = next_start;
|
||||||
key >>= MMB_KEY_SHIFT;
|
key >>= MMB_KEY_SHIFT;
|
||||||
ks += MMB_KEY_SHIFT;
|
ks += MMB_KEY_SHIFT;
|
||||||
}
|
}
|
||||||
|
@ -128,11 +128,9 @@ vector<ReportID> ReportManager::getDkeyToReportTable() const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ReportManager::assignDkeys(const RoseBuild *rose) {
|
void ReportManager::assignDkeys(const RoseBuild *rose) {
|
||||||
unique_ptr<RoseDedupeAux> dedupe = rose->generateDedupeAux();
|
|
||||||
|
|
||||||
DEBUG_PRINTF("assigning...\n");
|
DEBUG_PRINTF("assigning...\n");
|
||||||
|
|
||||||
map<u32, set<ReportID>> ext_to_int;
|
map<u32, ue2::flat_set<ReportID>> ext_to_int;
|
||||||
|
|
||||||
for (u32 i = 0; i < reportIds.size(); i++) {
|
for (u32 i = 0; i < reportIds.size(); i++) {
|
||||||
const Report &ir = reportIds[i];
|
const Report &ir = reportIds[i];
|
||||||
@ -143,6 +141,8 @@ void ReportManager::assignDkeys(const RoseBuild *rose) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto dedupe = rose->generateDedupeAux();
|
||||||
|
|
||||||
for (const auto &m : ext_to_int) {
|
for (const auto &m : ext_to_int) {
|
||||||
u32 ext = m.first;
|
u32 ext = m.first;
|
||||||
|
|
||||||
|
@ -7,7 +7,8 @@ if(NOT XCODE)
|
|||||||
else()
|
else()
|
||||||
set(CMAKE_CXX_FLAGS "-isystem ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CXX_FLAGS}")
|
set(CMAKE_CXX_FLAGS "-isystem ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CXX_FLAGS}")
|
||||||
endif()
|
endif()
|
||||||
include_directories(${CMAKE_SOURCE_DIR}/util)
|
|
||||||
|
include_directories(${PROJECT_SOURCE_DIR})
|
||||||
|
|
||||||
# remove some warnings
|
# remove some warnings
|
||||||
# cmake's scope means these only apply here
|
# cmake's scope means these only apply here
|
||||||
@ -26,7 +27,7 @@ endif()
|
|||||||
|
|
||||||
add_library(gtest ${gtest_SOURCES})
|
add_library(gtest ${gtest_SOURCES})
|
||||||
|
|
||||||
add_definitions(-DGTEST_HAS_PTHREAD=0 -DSRCDIR=${CMAKE_SOURCE_DIR})
|
add_definitions(-DGTEST_HAS_PTHREAD=0 -DSRCDIR=${PROJECT_SOURCE_DIR})
|
||||||
|
|
||||||
if (NOT RELEASE_BUILD)
|
if (NOT RELEASE_BUILD)
|
||||||
set(unit_internal_SOURCES
|
set(unit_internal_SOURCES
|
||||||
|
@ -85,7 +85,6 @@
|
|||||||
84:/[=\]=]/ #Unsupported POSIX collating element at index 0.
|
84:/[=\]=]/ #Unsupported POSIX collating element at index 0.
|
||||||
85:/A(?!)+Z/ #Invalid repeat at index 5.
|
85:/A(?!)+Z/ #Invalid repeat at index 5.
|
||||||
86:/\X/ #\X unsupported at index 0.
|
86:/\X/ #\X unsupported at index 0.
|
||||||
87:/[a\Qz\E]/ #\Q..\E sequences in character classes not supported at index 2.
|
|
||||||
88:/[A-\d]/ #Invalid range in character class at index 3.
|
88:/[A-\d]/ #Invalid range in character class at index 3.
|
||||||
89:/[A-[:digit:]]/ #Invalid range in character class at index 3.
|
89:/[A-[:digit:]]/ #Invalid range in character class at index 3.
|
||||||
90:/B[--[:digit:]--]+/ #Invalid range in character class at index 4.
|
90:/B[--[:digit:]--]+/ #Invalid range in character class at index 4.
|
||||||
@ -128,3 +127,8 @@
|
|||||||
128:/(*UTF8)^fo?ob{ro|nax_off\Qt=10omnax+8Wnah/ñññññññññññññññññññññññññññ0}l.{1,60}Car*k|npanomnax+8Wnah/ #Expression is not valid UTF-8.
|
128:/(*UTF8)^fo?ob{ro|nax_off\Qt=10omnax+8Wnah/ñññññññññññññññññññññññññññ0}l.{1,60}Car*k|npanomnax+8Wnah/ #Expression is not valid UTF-8.
|
||||||
129:/bignum \1111111111111111111/ #Number is too big at index 7.
|
129:/bignum \1111111111111111111/ #Number is too big at index 7.
|
||||||
130:/foo|&{5555555,}/ #Bounded repeat is too large.
|
130:/foo|&{5555555,}/ #Bounded repeat is too large.
|
||||||
|
131:/[a[..]]/ #Unsupported POSIX collating element at index 2.
|
||||||
|
132:/[a[==]]/ #Unsupported POSIX collating element at index 2.
|
||||||
|
133:/[a[.\].]]/ #Unsupported POSIX collating element at index 2.
|
||||||
|
134:/[a[=\]=]]/ #Unsupported POSIX collating element at index 2.
|
||||||
|
135:/[^\D\d]/8W #Pattern can never match.
|
||||||
|
@ -363,7 +363,9 @@ TEST_P(MultiBitTest, BoundedIteratorSingle) {
|
|||||||
ASSERT_TRUE(ba != nullptr);
|
ASSERT_TRUE(ba != nullptr);
|
||||||
|
|
||||||
// Set one bit on and run some checks.
|
// Set one bit on and run some checks.
|
||||||
for (u32 i = 0; i < test_size; i += stride) {
|
for (u64a i = 0; i < test_size; i += stride) {
|
||||||
|
SCOPED_TRACE(i);
|
||||||
|
|
||||||
mmbit_clear(ba, test_size);
|
mmbit_clear(ba, test_size);
|
||||||
mmbit_set(ba, test_size, i);
|
mmbit_set(ba, test_size, i);
|
||||||
|
|
||||||
@ -381,7 +383,12 @@ TEST_P(MultiBitTest, BoundedIteratorSingle) {
|
|||||||
|
|
||||||
// Scanning from one past our bit to the end should find nothing.
|
// Scanning from one past our bit to the end should find nothing.
|
||||||
if (i != test_size - 1) {
|
if (i != test_size - 1) {
|
||||||
ASSERT_EQ(MMB_INVALID, mmbit_iterate_bounded(ba, test_size, i + 1, test_size));
|
// Ordinary iterator.
|
||||||
|
ASSERT_EQ(MMB_INVALID, mmbit_iterate(ba, test_size, i));
|
||||||
|
|
||||||
|
// Bounded iterator.
|
||||||
|
ASSERT_EQ(MMB_INVALID,
|
||||||
|
mmbit_iterate_bounded(ba, test_size, i + 1, test_size));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -393,7 +400,7 @@ TEST_P(MultiBitTest, BoundedIteratorAll) {
|
|||||||
// Switch everything on.
|
// Switch everything on.
|
||||||
fill_mmbit(ba, test_size);
|
fill_mmbit(ba, test_size);
|
||||||
|
|
||||||
for (u32 i = 0; i < test_size; i += stride) {
|
for (u64a i = 0; i < test_size; i += stride) {
|
||||||
if (i != 0) {
|
if (i != 0) {
|
||||||
ASSERT_EQ(0U, mmbit_iterate_bounded(ba, test_size, 0, i));
|
ASSERT_EQ(0U, mmbit_iterate_bounded(ba, test_size, 0, i));
|
||||||
}
|
}
|
||||||
@ -408,13 +415,13 @@ TEST_P(MultiBitTest, BoundedIteratorEven) {
|
|||||||
|
|
||||||
// Set every even-numbered bit and see what we can see.
|
// Set every even-numbered bit and see what we can see.
|
||||||
mmbit_clear(ba, test_size);
|
mmbit_clear(ba, test_size);
|
||||||
for (u32 i = 0; i < test_size; i += 2) {
|
for (u64a i = 0; i < test_size; i += 2) {
|
||||||
mmbit_set(ba, test_size, i);
|
mmbit_set(ba, test_size, i);
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 even_stride = stride % 2 ? stride + 1 : stride;
|
u32 even_stride = stride % 2 ? stride + 1 : stride;
|
||||||
|
|
||||||
for (u32 i = 0; i < test_size; i += even_stride) {
|
for (u64a i = 0; i < test_size; i += even_stride) {
|
||||||
// Scanning from each even bit to the end should find itself.
|
// Scanning from each even bit to the end should find itself.
|
||||||
ASSERT_EQ(i, mmbit_iterate_bounded(ba, test_size, i, test_size));
|
ASSERT_EQ(i, mmbit_iterate_bounded(ba, test_size, i, test_size));
|
||||||
|
|
||||||
@ -439,13 +446,13 @@ TEST_P(MultiBitTest, BoundedIteratorOdd) {
|
|||||||
|
|
||||||
// Set every odd-numbered bit and see what we can see.
|
// Set every odd-numbered bit and see what we can see.
|
||||||
mmbit_clear(ba, test_size);
|
mmbit_clear(ba, test_size);
|
||||||
for (u32 i = 1; i < test_size; i += 2) {
|
for (u64a i = 1; i < test_size; i += 2) {
|
||||||
mmbit_set(ba, test_size, i);
|
mmbit_set(ba, test_size, i);
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 even_stride = stride % 2 ? stride + 1 : stride;
|
u32 even_stride = stride % 2 ? stride + 1 : stride;
|
||||||
|
|
||||||
for (u32 i = 0; i < test_size; i += even_stride) {
|
for (u64a i = 0; i < test_size; i += even_stride) {
|
||||||
// Scanning from each even bit to the end should find i+1.
|
// Scanning from each even bit to the end should find i+1.
|
||||||
if (i+1 < test_size) {
|
if (i+1 < test_size) {
|
||||||
ASSERT_EQ(i+1, mmbit_iterate_bounded(ba, test_size, i, test_size));
|
ASSERT_EQ(i+1, mmbit_iterate_bounded(ba, test_size, i, test_size));
|
||||||
@ -473,7 +480,7 @@ TEST_P(MultiBitTest, Set) {
|
|||||||
mmbit_clear(ba, test_size);
|
mmbit_clear(ba, test_size);
|
||||||
ASSERT_FALSE(mmbit_any(ba, test_size));
|
ASSERT_FALSE(mmbit_any(ba, test_size));
|
||||||
|
|
||||||
for (u32 i = 0; i < test_size; i += stride) {
|
for (u64a i = 0; i < test_size; i += stride) {
|
||||||
SCOPED_TRACE(i);
|
SCOPED_TRACE(i);
|
||||||
|
|
||||||
// set a bit that wasn't set before
|
// set a bit that wasn't set before
|
||||||
@ -500,7 +507,7 @@ TEST_P(MultiBitTest, Iter) {
|
|||||||
mmbit_clear(ba, test_size);
|
mmbit_clear(ba, test_size);
|
||||||
ASSERT_EQ(MMB_INVALID, mmbit_iterate(ba, test_size, MMB_INVALID));
|
ASSERT_EQ(MMB_INVALID, mmbit_iterate(ba, test_size, MMB_INVALID));
|
||||||
|
|
||||||
for (u32 i = 0; i < test_size; i += stride) {
|
for (u64a i = 0; i < test_size; i += stride) {
|
||||||
SCOPED_TRACE(i);
|
SCOPED_TRACE(i);
|
||||||
mmbit_clear(ba, test_size);
|
mmbit_clear(ba, test_size);
|
||||||
mmbit_set(ba, test_size, i);
|
mmbit_set(ba, test_size, i);
|
||||||
@ -517,13 +524,13 @@ TEST_P(MultiBitTest, IterAll) {
|
|||||||
ASSERT_EQ(MMB_INVALID, mmbit_iterate(ba, test_size, MMB_INVALID));
|
ASSERT_EQ(MMB_INVALID, mmbit_iterate(ba, test_size, MMB_INVALID));
|
||||||
|
|
||||||
// Set all bits.
|
// Set all bits.
|
||||||
for (u32 i = 0; i < test_size; i += stride) {
|
for (u64a i = 0; i < test_size; i += stride) {
|
||||||
mmbit_set(ba, test_size, i);
|
mmbit_set(ba, test_size, i);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Find all bits.
|
// Find all bits.
|
||||||
u32 it = MMB_INVALID;
|
u32 it = MMB_INVALID;
|
||||||
for (u32 i = 0; i < test_size; i += stride) {
|
for (u64a i = 0; i < test_size; i += stride) {
|
||||||
ASSERT_EQ(i, mmbit_iterate(ba, test_size, it));
|
ASSERT_EQ(i, mmbit_iterate(ba, test_size, it));
|
||||||
it = i;
|
it = i;
|
||||||
}
|
}
|
||||||
@ -536,7 +543,7 @@ TEST_P(MultiBitTest, AnyPrecise) {
|
|||||||
mmbit_clear(ba, test_size);
|
mmbit_clear(ba, test_size);
|
||||||
ASSERT_FALSE(mmbit_any_precise(ba, test_size));
|
ASSERT_FALSE(mmbit_any_precise(ba, test_size));
|
||||||
|
|
||||||
for (u32 i = 0; i < test_size; i += stride) {
|
for (u64a i = 0; i < test_size; i += stride) {
|
||||||
SCOPED_TRACE(i);
|
SCOPED_TRACE(i);
|
||||||
mmbit_clear(ba, test_size);
|
mmbit_clear(ba, test_size);
|
||||||
mmbit_set(ba, test_size, i);
|
mmbit_set(ba, test_size, i);
|
||||||
@ -551,7 +558,7 @@ TEST_P(MultiBitTest, Any) {
|
|||||||
mmbit_clear(ba, test_size);
|
mmbit_clear(ba, test_size);
|
||||||
ASSERT_FALSE(mmbit_any(ba, test_size));
|
ASSERT_FALSE(mmbit_any(ba, test_size));
|
||||||
|
|
||||||
for (u32 i = 0; i < test_size; i += stride) {
|
for (u64a i = 0; i < test_size; i += stride) {
|
||||||
SCOPED_TRACE(i);
|
SCOPED_TRACE(i);
|
||||||
mmbit_clear(ba, test_size);
|
mmbit_clear(ba, test_size);
|
||||||
mmbit_set(ba, test_size, i);
|
mmbit_set(ba, test_size, i);
|
||||||
@ -567,7 +574,7 @@ TEST_P(MultiBitTest, UnsetRange1) {
|
|||||||
fill_mmbit(ba, test_size);
|
fill_mmbit(ba, test_size);
|
||||||
|
|
||||||
// Use mmbit_unset_range to switch off any single bit.
|
// Use mmbit_unset_range to switch off any single bit.
|
||||||
for (u32 i = 0; i < test_size; i += stride) {
|
for (u64a i = 0; i < test_size; i += stride) {
|
||||||
SCOPED_TRACE(i);
|
SCOPED_TRACE(i);
|
||||||
ASSERT_TRUE(mmbit_isset(ba, test_size, i));
|
ASSERT_TRUE(mmbit_isset(ba, test_size, i));
|
||||||
mmbit_unset_range(ba, test_size, i, i + 1);
|
mmbit_unset_range(ba, test_size, i, i + 1);
|
||||||
@ -590,7 +597,7 @@ TEST_P(MultiBitTest, UnsetRange2) {
|
|||||||
// Use mmbit_unset_range to switch off all bits.
|
// Use mmbit_unset_range to switch off all bits.
|
||||||
mmbit_unset_range(ba, test_size, 0, test_size);
|
mmbit_unset_range(ba, test_size, 0, test_size);
|
||||||
|
|
||||||
for (u32 i = 0; i < test_size; i += stride) {
|
for (u64a i = 0; i < test_size; i += stride) {
|
||||||
SCOPED_TRACE(i);
|
SCOPED_TRACE(i);
|
||||||
ASSERT_FALSE(mmbit_isset(ba, test_size, i));
|
ASSERT_FALSE(mmbit_isset(ba, test_size, i));
|
||||||
}
|
}
|
||||||
@ -601,12 +608,12 @@ TEST_P(MultiBitTest, UnsetRange3) {
|
|||||||
ASSERT_TRUE(ba != nullptr);
|
ASSERT_TRUE(ba != nullptr);
|
||||||
|
|
||||||
// Use mmbit_unset_range to switch off bits in chunks of 3.
|
// Use mmbit_unset_range to switch off bits in chunks of 3.
|
||||||
for (u32 i = 0; i < test_size - 3; i += stride) {
|
for (u64a i = 0; i < test_size - 3; i += stride) {
|
||||||
// Switch on the bit before, the bits in question, and the bit after.
|
// Switch on the bit before, the bits in question, and the bit after.
|
||||||
if (i > 0) {
|
if (i > 0) {
|
||||||
mmbit_set(ba, test_size, i - 1);
|
mmbit_set(ba, test_size, i - 1);
|
||||||
}
|
}
|
||||||
for (u32 j = i; j < min(i + 4, test_size); j++) {
|
for (u64a j = i; j < min(i + 4, (u64a)test_size); j++) {
|
||||||
mmbit_set(ba, test_size, j);
|
mmbit_set(ba, test_size, j);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -635,7 +642,7 @@ TEST_P(MultiBitTest, InitRangeAll) {
|
|||||||
mmbit_init_range(ba, test_size, 0, test_size);
|
mmbit_init_range(ba, test_size, 0, test_size);
|
||||||
|
|
||||||
// Make sure they're all set.
|
// Make sure they're all set.
|
||||||
for (u32 i = 0; i < test_size; i += stride) {
|
for (u64a i = 0; i < test_size; i += stride) {
|
||||||
SCOPED_TRACE(i);
|
SCOPED_TRACE(i);
|
||||||
ASSERT_TRUE(mmbit_isset(ba, test_size, i));
|
ASSERT_TRUE(mmbit_isset(ba, test_size, i));
|
||||||
}
|
}
|
||||||
@ -656,7 +663,7 @@ TEST_P(MultiBitTest, InitRangeOne) {
|
|||||||
SCOPED_TRACE(test_size);
|
SCOPED_TRACE(test_size);
|
||||||
ASSERT_TRUE(ba != nullptr);
|
ASSERT_TRUE(ba != nullptr);
|
||||||
|
|
||||||
for (u32 i = 0; i < test_size; i += stride) {
|
for (u64a i = 0; i < test_size; i += stride) {
|
||||||
mmbit_init_range(ba, test_size, i, i + 1);
|
mmbit_init_range(ba, test_size, i, i + 1);
|
||||||
|
|
||||||
// Only bit 'i' should be on.
|
// Only bit 'i' should be on.
|
||||||
@ -685,7 +692,7 @@ TEST_P(MultiBitTest, InitRangeChunked) {
|
|||||||
ASSERT_EQ(chunk_begin, mmbit_iterate(ba, test_size, MMB_INVALID));
|
ASSERT_EQ(chunk_begin, mmbit_iterate(ba, test_size, MMB_INVALID));
|
||||||
|
|
||||||
// All bits in the chunk should be on.
|
// All bits in the chunk should be on.
|
||||||
for (u32 i = chunk_begin; i < chunk_end; i += stride) {
|
for (u64a i = chunk_begin; i < chunk_end; i += stride) {
|
||||||
SCOPED_TRACE(i);
|
SCOPED_TRACE(i);
|
||||||
ASSERT_TRUE(mmbit_isset(ba, test_size, i));
|
ASSERT_TRUE(mmbit_isset(ba, test_size, i));
|
||||||
}
|
}
|
||||||
@ -985,7 +992,7 @@ TEST_P(MultiBitTest, SparseIteratorBeginAll) {
|
|||||||
vector<mmbit_sparse_iter> it;
|
vector<mmbit_sparse_iter> it;
|
||||||
vector<u32> bits;
|
vector<u32> bits;
|
||||||
bits.reserve(test_size / stride);
|
bits.reserve(test_size / stride);
|
||||||
for (u32 i = 0; i < test_size; i += stride) {
|
for (u64a i = 0; i < test_size; i += stride) {
|
||||||
bits.push_back(i);
|
bits.push_back(i);
|
||||||
}
|
}
|
||||||
mmbBuildSparseIterator(it, bits, test_size);
|
mmbBuildSparseIterator(it, bits, test_size);
|
||||||
@ -1032,7 +1039,7 @@ TEST_P(MultiBitTest, SparseIteratorBeginThirds) {
|
|||||||
// Switch every third bits on in state
|
// Switch every third bits on in state
|
||||||
mmbit_clear(ba, test_size);
|
mmbit_clear(ba, test_size);
|
||||||
ASSERT_FALSE(mmbit_any(ba, test_size));
|
ASSERT_FALSE(mmbit_any(ba, test_size));
|
||||||
for (u32 i = 0; i < test_size; i += 3) {
|
for (u64a i = 0; i < test_size; i += 3) {
|
||||||
mmbit_set(ba, test_size, i);
|
mmbit_set(ba, test_size, i);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1044,7 +1051,7 @@ TEST_P(MultiBitTest, SparseIteratorBeginThirds) {
|
|||||||
ASSERT_EQ(0U, val);
|
ASSERT_EQ(0U, val);
|
||||||
ASSERT_EQ(0U, idx);
|
ASSERT_EQ(0U, idx);
|
||||||
|
|
||||||
for (u32 i = 0; i < test_size - 3; i += 3) {
|
for (u64a i = 0; i < test_size - 3; i += 3) {
|
||||||
mmbit_unset(ba, test_size, i);
|
mmbit_unset(ba, test_size, i);
|
||||||
val = mmbit_sparse_iter_begin(ba, test_size, &idx, &it[0], &state[0]);
|
val = mmbit_sparse_iter_begin(ba, test_size, &idx, &it[0], &state[0]);
|
||||||
ASSERT_EQ(i+3, val);
|
ASSERT_EQ(i+3, val);
|
||||||
@ -1060,7 +1067,7 @@ TEST_P(MultiBitTest, SparseIteratorNextAll) {
|
|||||||
vector<mmbit_sparse_iter> it;
|
vector<mmbit_sparse_iter> it;
|
||||||
vector<u32> bits;
|
vector<u32> bits;
|
||||||
bits.reserve(test_size / stride);
|
bits.reserve(test_size / stride);
|
||||||
for (u32 i = 0; i < test_size; i += stride) {
|
for (u64a i = 0; i < test_size; i += stride) {
|
||||||
bits.push_back(i);
|
bits.push_back(i);
|
||||||
}
|
}
|
||||||
mmbBuildSparseIterator(it, bits, test_size);
|
mmbBuildSparseIterator(it, bits, test_size);
|
||||||
@ -1103,7 +1110,7 @@ TEST_P(MultiBitTest, SparseIteratorNextExactStrided) {
|
|||||||
vector<mmbit_sparse_iter> it;
|
vector<mmbit_sparse_iter> it;
|
||||||
vector<u32> bits;
|
vector<u32> bits;
|
||||||
bits.reserve(test_size / stride);
|
bits.reserve(test_size / stride);
|
||||||
for (u32 i = 0; i < test_size; i += stride) {
|
for (u64a i = 0; i < test_size; i += stride) {
|
||||||
bits.push_back(i);
|
bits.push_back(i);
|
||||||
mmbit_set(ba, test_size, i);
|
mmbit_set(ba, test_size, i);
|
||||||
}
|
}
|
||||||
@ -1135,7 +1142,7 @@ TEST_P(MultiBitTest, SparseIteratorNextNone) {
|
|||||||
vector<mmbit_sparse_iter> it;
|
vector<mmbit_sparse_iter> it;
|
||||||
vector<u32> bits;
|
vector<u32> bits;
|
||||||
bits.reserve(test_size / stride);
|
bits.reserve(test_size / stride);
|
||||||
for (u32 i = 0; i < test_size; i += stride) {
|
for (u64a i = 0; i < test_size; i += stride) {
|
||||||
bits.push_back(i);
|
bits.push_back(i);
|
||||||
}
|
}
|
||||||
mmbBuildSparseIterator(it, bits, test_size);
|
mmbBuildSparseIterator(it, bits, test_size);
|
||||||
@ -1164,7 +1171,7 @@ TEST_P(MultiBitTest, SparseIteratorUnsetAll) {
|
|||||||
vector<mmbit_sparse_iter> it;
|
vector<mmbit_sparse_iter> it;
|
||||||
vector<u32> bits;
|
vector<u32> bits;
|
||||||
bits.reserve(test_size / stride);
|
bits.reserve(test_size / stride);
|
||||||
for (u32 i = 0; i < test_size; i += stride) {
|
for (u64a i = 0; i < test_size; i += stride) {
|
||||||
bits.push_back(i);
|
bits.push_back(i);
|
||||||
}
|
}
|
||||||
mmbBuildSparseIterator(it, bits, test_size);
|
mmbBuildSparseIterator(it, bits, test_size);
|
||||||
@ -1194,10 +1201,10 @@ TEST_P(MultiBitTest, SparseIteratorUnsetHalves) {
|
|||||||
|
|
||||||
// Two sparse iterators: one for even bits, one for odd ones
|
// Two sparse iterators: one for even bits, one for odd ones
|
||||||
vector<u32> even, odd;
|
vector<u32> even, odd;
|
||||||
for (u32 i = 0; i < test_size; i += 2) {
|
for (u64a i = 0; i < test_size; i += 2) {
|
||||||
even.push_back(i);
|
even.push_back(i);
|
||||||
}
|
}
|
||||||
for (u32 i = 1; i < test_size; i += 2) {
|
for (u64a i = 1; i < test_size; i += 2) {
|
||||||
odd.push_back(i);
|
odd.push_back(i);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1277,9 +1284,9 @@ static const MultiBitTestParam multibitTests[] = {
|
|||||||
{ 1U << 28, 15073 },
|
{ 1U << 28, 15073 },
|
||||||
{ 1U << 29, 24413 },
|
{ 1U << 29, 24413 },
|
||||||
{ 1U << 30, 50377 },
|
{ 1U << 30, 50377 },
|
||||||
|
{ 1U << 31, 104729 },
|
||||||
|
|
||||||
// XXX: cases this large segfault in mmbit_set, FIXME NOW
|
// { UINT32_MAX, 104729 }, // Very slow
|
||||||
//{ 1U << 31, 3701 },
|
|
||||||
};
|
};
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(MultiBit, MultiBitTest, ValuesIn(multibitTests));
|
INSTANTIATE_TEST_CASE_P(MultiBit, MultiBitTest, ValuesIn(multibitTests));
|
||||||
|
@ -36,9 +36,9 @@
|
|||||||
#include "nfagraph/ng_builder.h"
|
#include "nfagraph/ng_builder.h"
|
||||||
#include "nfagraph/ng.h"
|
#include "nfagraph/ng.h"
|
||||||
#include "nfagraph/ng_asserts.h"
|
#include "nfagraph/ng_asserts.h"
|
||||||
#include "util/target_info.h"
|
|
||||||
#include "hs_compile.h"
|
#include "hs_compile.h"
|
||||||
#include "ng_find_matches.h"
|
#include "util/ng_find_matches.h"
|
||||||
|
#include "util/target_info.h"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace testing;
|
using namespace testing;
|
||||||
|
@ -448,6 +448,25 @@ TEST_P(RepeatTest, Pack) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_P(RepeatTest, LargeGap) {
|
||||||
|
SCOPED_TRACE(testing::Message() << "Repeat: " << info);
|
||||||
|
|
||||||
|
if (info.repeatMax == REPEAT_INF) {
|
||||||
|
return; // Test not valid for FIRST-type repeats.
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < 64; i++) {
|
||||||
|
u64a top1 = 1000;
|
||||||
|
repeatStore(&info, ctrl, state, top1, 0); // first top
|
||||||
|
ASSERT_EQ(top1, repeatLastTop(&info, ctrl, state));
|
||||||
|
|
||||||
|
// Add a second top after a gap of 2^i bytes.
|
||||||
|
u64a top2 = top1 + (1ULL << i);
|
||||||
|
repeatStore(&info, ctrl, state, top2, 1); // second top
|
||||||
|
ASSERT_EQ(top2, repeatLastTop(&info, ctrl, state));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
const u32 sparsePeriods[] = {
|
const u32 sparsePeriods[] = {
|
||||||
2,
|
2,
|
||||||
@ -505,6 +524,7 @@ const RepeatTestInfo sparseRepeats[] = {
|
|||||||
{ REPEAT_SPARSE_OPTIMAL_P, 4000, 4000 },
|
{ REPEAT_SPARSE_OPTIMAL_P, 4000, 4000 },
|
||||||
{ REPEAT_SPARSE_OPTIMAL_P, 4500, 4500 },
|
{ REPEAT_SPARSE_OPTIMAL_P, 4500, 4500 },
|
||||||
{ REPEAT_SPARSE_OPTIMAL_P, 5000, 5000 },
|
{ REPEAT_SPARSE_OPTIMAL_P, 5000, 5000 },
|
||||||
|
{ REPEAT_SPARSE_OPTIMAL_P, 65534, 65534 },
|
||||||
// {N, M} repeats
|
// {N, M} repeats
|
||||||
{ REPEAT_SPARSE_OPTIMAL_P, 10, 20 },
|
{ REPEAT_SPARSE_OPTIMAL_P, 10, 20 },
|
||||||
{ REPEAT_SPARSE_OPTIMAL_P, 20, 40 },
|
{ REPEAT_SPARSE_OPTIMAL_P, 20, 40 },
|
||||||
@ -528,7 +548,8 @@ const RepeatTestInfo sparseRepeats[] = {
|
|||||||
{ REPEAT_SPARSE_OPTIMAL_P, 3500, 4000 },
|
{ REPEAT_SPARSE_OPTIMAL_P, 3500, 4000 },
|
||||||
{ REPEAT_SPARSE_OPTIMAL_P, 4000, 8000 },
|
{ REPEAT_SPARSE_OPTIMAL_P, 4000, 8000 },
|
||||||
{ REPEAT_SPARSE_OPTIMAL_P, 4500, 8000 },
|
{ REPEAT_SPARSE_OPTIMAL_P, 4500, 8000 },
|
||||||
{ REPEAT_SPARSE_OPTIMAL_P, 5000, 5001 }
|
{ REPEAT_SPARSE_OPTIMAL_P, 5000, 5001 },
|
||||||
|
{ REPEAT_SPARSE_OPTIMAL_P, 60000, 65534 }
|
||||||
};
|
};
|
||||||
|
|
||||||
static
|
static
|
||||||
@ -802,7 +823,7 @@ TEST_P(SparseOptimalTest, Simple1) {
|
|||||||
1000 + info->repeatMax * 2));
|
1000 + info->repeatMax * 2));
|
||||||
ASSERT_EQ(0U, repeatNextMatch(info, ctrl, state,
|
ASSERT_EQ(0U, repeatNextMatch(info, ctrl, state,
|
||||||
1000 + info->repeatMax * 2 + 1));
|
1000 + info->repeatMax * 2 + 1));
|
||||||
ASSERT_EQ(0U, repeatNextMatch(info, ctrl, state, 10000));
|
ASSERT_EQ(0U, repeatNextMatch(info, ctrl, state, 100000));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_P(SparseOptimalTest, TwoTopsNeg) {
|
TEST_P(SparseOptimalTest, TwoTopsNeg) {
|
||||||
@ -893,6 +914,24 @@ TEST_P(SparseOptimalTest, Simple3e) {
|
|||||||
test_sparse3entryExpire(info, ctrl, state, 2 * info->minPeriod - 1);
|
test_sparse3entryExpire(info, ctrl, state, 2 * info->minPeriod - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_P(SparseOptimalTest, LargeGap) {
|
||||||
|
SCOPED_TRACE(testing::Message() << "Repeat: " << *info);
|
||||||
|
|
||||||
|
for (int i = 0; i < 64; i++) {
|
||||||
|
u64a top1 = 1000;
|
||||||
|
repeatStore(info, ctrl, state, top1, 0); // first top
|
||||||
|
ASSERT_EQ(top1, repeatLastTop(info, ctrl, state));
|
||||||
|
|
||||||
|
// Add a second top after a gap of 2^i bytes.
|
||||||
|
u64a top2 = top1 + (1ULL << i);
|
||||||
|
if (top2 - top1 < info->minPeriod) {
|
||||||
|
continue; // not a valid top
|
||||||
|
}
|
||||||
|
repeatStore(info, ctrl, state, top2, 1); // second top
|
||||||
|
ASSERT_EQ(top2, repeatLastTop(info, ctrl, state));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
TEST_P(SparseOptimalTest, ThreeTops) {
|
TEST_P(SparseOptimalTest, ThreeTops) {
|
||||||
SCOPED_TRACE(testing::Message() << "Repeat: " << *info);
|
SCOPED_TRACE(testing::Message() << "Repeat: " << *info);
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user