diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 00000000..c137017a
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,36 @@
+# Hyperscan Change Log
+
+This is a list of notable changes to Hyperscan, in reverse chronological order.
+
+## [4.1.0] 2015-12-18
+- Update version of PCRE used by testing tools as a syntax and semantic
+ reference to PCRE 8.38.
+- Small updates to fix warnings identified by Coverity.
+- Clean up and unify exception handling behaviour across GPR and SIMD NFA
+ models.
+- Fix bug in handling of bounded repeat triggers with large gaps between them
+ for sparse repeat model.
+- Correctly reject POSIX collating elements (`[.ch.]`, `[=ch=]`) in the parser.
+ These are not supported by Hyperscan.
+- Add support for quoted sequences (`\Q...\E`) inside character classes.
+- Simplify FDR literal matcher runtime by removing some static specialization.
+- Fix handling of the POSIX `[:graph:]`, `[:print:]` and `[:punct:]` character
+ classes to match the behaviour of PCRE 8.38 in both standard operation and
+ with the UCP flag set. (Note: some bugs were fixed in this area in PCRE
+ 8.38.) Previously Hyperscan's behaviour was the same as versions of PCRE
+ before 8.34.
+- Improve performance when compiling pattern sets that include a large number
+ of similar bounded repeat constructs. (github issue #9)
+
+## [4.0.1] 2015-10-30
+- Minor cleanups to test code.
+- CMake and other build system improvements.
+- API update: allow `hs_reset_stream()` and `hs_reset_and_copy_stream()` to be
+ supplied with a NULL scratch pointer if no matches are required. This is in
+ line with the behaviour of `hs_close_stream()`.
+- Disallow bounded repeats with a very large minimum repeat but no maximum,
+ i.e. {N,} for very large N.
+- Reduce compile memory usage in literal set explansion for some large cases.
+
+## [4.0.0] 2015-10-20
+- Original release of Hyperscan as open-source software.
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 67885c93..b4d81754 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,13 +2,13 @@ cmake_minimum_required (VERSION 2.8.11)
project (Hyperscan C CXX)
set (HS_MAJOR_VERSION 4)
-set (HS_MINOR_VERSION 0)
-set (HS_PATCH_VERSION 1)
+set (HS_MINOR_VERSION 1)
+set (HS_PATCH_VERSION 0)
set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION})
string (TIMESTAMP BUILD_DATE "%Y-%m-%d")
-set(CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake)
+set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
include(CheckCCompilerFlag)
include(CheckCXXCompilerFlag)
INCLUDE (CheckFunctionExists)
@@ -56,8 +56,9 @@ if(CMAKE_GENERATOR STREQUAL Xcode)
set(XCODE TRUE)
endif()
-include_directories(src .)
-include_directories(${CMAKE_BINARY_DIR})
+set(CMAKE_INCLUDE_CURRENT_DIR 1)
+include_directories(${PROJECT_SOURCE_DIR}/src)
+include_directories(${PROJECT_BINARY_DIR})
include_directories(SYSTEM include)
set(BOOST_USE_STATIC_LIBS OFF)
@@ -71,7 +72,7 @@ find_package(Boost ${BOOST_MINVERSION})
if(NOT Boost_FOUND)
# we might have boost in tree, so provide a hint and try again
message(STATUS "trying include dir for boost")
- set(BOOST_INCLUDEDIR "${CMAKE_SOURCE_DIR}/include")
+ set(BOOST_INCLUDEDIR "${PROJECT_SOURCE_DIR}/include")
find_package(Boost ${BOOST_MINVERSION})
if(NOT Boost_FOUND)
message(FATAL_ERROR "Boost ${BOOST_MINVERSION} or later not found. Either install system pacakges if available, extract Boost headers to ${CMAKE_SOURCE_DIR}/include, or set the CMake BOOST_ROOT variable.")
@@ -219,6 +220,15 @@ CHECK_FUNCTION_EXISTS(_aligned_malloc HAVE__ALIGNED_MALLOC)
CHECK_C_COMPILER_FLAG(-fvisibility=hidden HAS_C_HIDDEN)
CHECK_CXX_COMPILER_FLAG(-fvisibility=hidden HAS_CXX_HIDDEN)
+if (RELEASE_BUILD)
+ if (HAS_C_HIDDEN)
+ set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -fvisibility=hidden")
+ endif()
+ if (HAS_CXX_HIDDEN)
+ set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fvisibility=hidden")
+ endif()
+endif()
+
# testing a builtin takes a little more work
CHECK_C_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CC_BUILTIN_ASSUME_ALIGNED)
CHECK_CXX_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CXX_BUILTIN_ASSUME_ALIGNED)
@@ -327,8 +337,8 @@ if (EXISTS ${CMAKE_SOURCE_DIR}/tools)
endif()
# do substitutions
-configure_file(${CMAKE_MODULE_PATH}/config.h.in ${CMAKE_BINARY_DIR}/config.h)
-configure_file(src/hs_version.h.in hs_version.h)
+configure_file(${CMAKE_MODULE_PATH}/config.h.in ${PROJECT_BINARY_DIR}/config.h)
+configure_file(src/hs_version.h.in ${PROJECT_BINARY_DIR}/hs_version.h)
if (PKG_CONFIG_FOUND)
# we really only need to do this if we have pkg-config
@@ -345,7 +355,7 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}")
# include the autogen targets
add_subdirectory(src/fdr)
-include_directories(${CMAKE_BINARY_DIR}/src/fdr)
+include_directories(${PROJECT_BINARY_DIR}/src/fdr)
if(NOT WIN32)
set(RAGEL_C_FLAGS "-Wno-unused")
diff --git a/README.md b/README.md
index 37fc38ce..1753ecbe 100644
--- a/README.md
+++ b/README.md
@@ -20,3 +20,24 @@ the [Developer Reference Guide](http://01org.github.io/hyperscan/dev-reference/)
Hyperscan is licensed under the BSD License. See the LICENSE file in the
project repository.
+# Versioning
+
+The `master` branch on Github will always contain the most recent release of
+Hyperscan. Each version released to `master` goes through QA and testing before
+it is released; if you're a user, rather than a developer, this is the version
+you should be using.
+
+Further development towards the next release takes place on the `develop`
+branch.
+
+# Get Involved
+
+The official homepage for Hyperscan is at [01.org/hyperscan](https://01.org/hyperscan).
+
+If you have questions or comments, we encourage you to [join the mailing
+list](https://lists.01.org/mailman/listinfo/hyperscan). Bugs can be filed by
+sending email to the list, or by creating an issue on Github.
+
+If you wish to contact the Hyperscan team at Intel directly, without posting
+publicly to the mailing list, send email to
+[hyperscan@intel.com](mailto:hyperscan@intel.com).
diff --git a/doc/dev-reference/compilation.rst b/doc/dev-reference/compilation.rst
index 6e195f6a..f3723dc9 100644
--- a/doc/dev-reference/compilation.rst
+++ b/doc/dev-reference/compilation.rst
@@ -63,6 +63,9 @@ described at . However, not all constructs available in
libpcre are supported. The use of unsupported constructs will result in
compilation errors.
+The version of PCRE used to validate Hyperscan's interpretation of this syntax
+is 8.38.
+
====================
Supported Constructs
====================
diff --git a/examples/simplegrep.c b/examples/simplegrep.c
index 2fe6e6f3..9e392a8f 100644
--- a/examples/simplegrep.c
+++ b/examples/simplegrep.c
@@ -109,7 +109,7 @@ static char *readInputData(const char *inputFN, unsigned int *length) {
* limit the size of our buffer appropriately. */
if ((unsigned long)dataLen > UINT_MAX) {
dataLen = UINT_MAX;
- printf("WARNING: clipping data to %lu bytes\n", dataLen);
+ printf("WARNING: clipping data to %ld bytes\n", dataLen);
} else if (dataLen == 0) {
fprintf(stderr, "ERROR: input file \"%s\" is empty\n", inputFN);
fclose(f);
@@ -118,7 +118,7 @@ static char *readInputData(const char *inputFN, unsigned int *length) {
char *inputData = malloc(dataLen);
if (!inputData) {
- fprintf(stderr, "ERROR: unable to malloc %lu bytes\n", dataLen);
+ fprintf(stderr, "ERROR: unable to malloc %ld bytes\n", dataLen);
fclose(f);
return NULL;
}
diff --git a/src/fdr/CMakeLists.txt b/src/fdr/CMakeLists.txt
index 25396689..1436c3fc 100644
--- a/src/fdr/CMakeLists.txt
+++ b/src/fdr/CMakeLists.txt
@@ -27,11 +27,11 @@ fdr_autogen(teddy_runtime teddy_autogen.c)
fdr_autogen(teddy_compiler teddy_autogen_compiler.cpp)
set(fdr_GENERATED_SRC
-${CMAKE_BINARY_DIR}/src/fdr/fdr_autogen.c
-${CMAKE_BINARY_DIR}/src/fdr/fdr_autogen_compiler.cpp
-${CMAKE_BINARY_DIR}/src/fdr/teddy_autogen.c
-${CMAKE_BINARY_DIR}/src/fdr/teddy_autogen_compiler.cpp
-PARENT_SCOPE)
+ ${PROJECT_BINARY_DIR}/src/fdr/fdr_autogen.c
+ ${PROJECT_BINARY_DIR}/src/fdr/fdr_autogen_compiler.cpp
+ ${PROJECT_BINARY_DIR}/src/fdr/teddy_autogen.c
+ ${PROJECT_BINARY_DIR}/src/fdr/teddy_autogen_compiler.cpp
+ PARENT_SCOPE)
set_source_files_properties(${fdr_GENERATED_SRC} PROPERTIES GENERATED TRUE)
include_directories(${CMAKE_CURRENT_BINARY_DIR})
diff --git a/src/fdr/autogen.py b/src/fdr/autogen.py
index 36e4c16c..e5b4f39e 100755
--- a/src/fdr/autogen.py
+++ b/src/fdr/autogen.py
@@ -54,16 +54,11 @@ def produce_fdr_compiles(l):
def build_fdr_matchers():
all_matchers = [ ]
- domains = [8, 10, 11, 12, 13]
- big_domains = [ 14, 15 ]
+ strides = [ 1, 2, 4 ]
common = { "state_width" : 128, "num_buckets" : 8, "extract_frequency" : 8, "arch" : arch_x86_64 }
- for d in domains:
- all_matchers += [ M3(stride = 1, domain = d, **common) ]
- all_matchers += [ M3(stride = 2, domain = d, **common) ]
- all_matchers += [ M3(stride = 4, domain = d, **common) ]
- for d in big_domains:
- all_matchers += [ M3(stride = 1, domain = d, **common) ]
+ for s in strides:
+ all_matchers += [ M3(stride = s, **common) ]
return all_matchers
diff --git a/src/fdr/fdr.c b/src/fdr/fdr.c
index 082800f1..f83a4265 100644
--- a/src/fdr/fdr.c
+++ b/src/fdr/fdr.c
@@ -40,27 +40,6 @@
#include "fdr_confirm_runtime.h"
#include "fdr_streaming_runtime.h"
#include "fdr_loadval.h"
-
-static really_inline UNUSED
-u32 getPreStartVal(const struct FDR_Runtime_Args *a, u32 numBits) {
- u32 r = 0;
- if (a->start_offset == 0) {
- if (numBits <= 8) {
- r = a->buf_history[a->len_history - 1];
- } else {
- r = a->buf_history[a->len_history - 1];
- r |= (a->buf[0] << 8);
- }
- } else {
- if (numBits <= 8) {
- r = a->buf[a->start_offset - 1];
- } else {
- r = lv_u16(a->buf + a->start_offset - 1, a->buf, a->buf + a->len);
- }
- }
- return r & ((1 << numBits) - 1);
-}
-
#include "fdr_autogen.c"
#define FAKE_HISTORY_SIZE 16
diff --git a/src/fdr/fdr_autogen.py b/src/fdr/fdr_autogen.py
index 685cca3b..748d811f 100755
--- a/src/fdr/fdr_autogen.py
+++ b/src/fdr/fdr_autogen.py
@@ -74,12 +74,12 @@ class ValueExtractStep(Step):
dsb = m.datasize_bytes
modval = offset % dsb
- if m.domain > 8 and modval == dsb - 1:
+ if modval == dsb - 1:
# Case 1: reading more than one byte over the end of the bulk load
self.latency = 4
if sub_load_cautious:
- code_string = "cautious_forward"
+ code_string = "cautious_forward"
else:
code_string = "normal"
load_string = m.single_load_type.load_expr_data(self.offset, code_string)
@@ -101,7 +101,7 @@ class ValueExtractStep(Step):
temp_string = "(%s >> %d)" % (lb_var.name, modval*8 - m.reach_shift_adjust)
- init_string = "(%s) & 0x%x" % (temp_string, m.reach_mask)
+ init_string = "(%s) & (domain_mask << %d)" % (temp_string, m.reach_shift_adjust)
v_var = self.nv(m.value_extract_type, "v%d" % offset)
self.val = v_var.gen_initializer_stmt(init_string)
@@ -173,14 +173,10 @@ class ConfirmStep(Step):
enable_confirmless = m.stride == 1, do_bailout = False)
class M3(MatcherBase):
- def get_hash_safety_parameters(self):
- h_size = self.single_load_type.size_in_bytes()
- return (0, h_size - 1)
-
def produce_compile_call(self):
- print " { %d, %d, %d, %d, %d, %s, %d, %d }," % (
+ print " { %d, %d, %d, %d, %s, %d, %d }," % (
self.id, self.state_width, self.num_buckets,
- self.stride, self.domain,
+ self.stride,
self.arch.target, self.conf_pull_back, self.conf_top_level_split)
def produce_main_loop(self, switch_variant = False):
@@ -192,8 +188,8 @@ class M3(MatcherBase):
ctxt = CodeGenContext(self)
if switch_variant:
- print " ptr -= (iterBytes - dist);"
- print " { " # need an extra scope around switch variant to stop its globals escaping
+ print " ptr -= (iterBytes - dist);"
+ print " { " # need an extra scope around switch variant to stop its globals escaping
else:
print " if (doMainLoop) {"
print " for (; ptr + LOOP_READ_AHEAD < buf + len; ptr += iterBytes) {"
@@ -349,25 +345,30 @@ class M3(MatcherBase):
shift_expr = "%s = %s" % (state.name, state.type.shift_expr(state.name, shift_distance))
s = Template("""
- $TYPENAME s;
- if (a->len_history) {
- u32 tmp = getPreStartVal(a, $DOMAIN);
- s = *((const $TYPENAME *)ft + tmp);
- $SHIFT_EXPR;
- } else {
- s = *(const $TYPENAME *)&fdr->start;
- }
+ $TYPENAME s;
+ if (a->len_history) {
+ u32 tmp = 0;
+ if (a->start_offset == 0) {
+ tmp = a->buf_history[a->len_history - 1];
+ tmp |= (a->buf[0] << 8);
+ } else {
+ tmp = lv_u16(a->buf + a->start_offset - 1, a->buf, a->buf + a->len);
+ }
+ tmp &= fdr->domainMask;
+ s = *((const $TYPENAME *)ft + tmp);
+ $SHIFT_EXPR;
+ } else {
+ s = *(const $TYPENAME *)&fdr->start;
+ }
""").substitute(TYPENAME = s_type.get_name(),
ZERO_EXPR = s_type.zero_expression(),
- DOMAIN = self.domain,
SHIFT_EXPR = shift_expr)
return s
def produce_code(self):
- (behind, ahead) = self.get_hash_safety_parameters()
- loop_read_behind = behind
- loop_read_ahead = self.loop_bytes + ahead
+ loop_read_behind = 0
+ loop_read_ahead = self.loop_bytes + 1
# we set up mask and shift stuff for extracting our masks from registers
#
@@ -380,7 +381,7 @@ class M3(MatcherBase):
ssb = self.state_type.size / 8 # state size in bytes
# Intel path
- if ssb == 16 and self.domain == 16:
+ if ssb == 16:
# obscure corner - we don't have the room in the register to
# do this for all values so we don't. domain==16 is pretty
# bad anyhow, of course
@@ -390,7 +391,6 @@ class M3(MatcherBase):
shift_amts = { 1 : 0, 2 : 1, 4 : 2, 8 : 3, 16: 4 }
self.reach_shift_adjust = shift_amts[ ssb/self.reach_mult ]
- self.reach_mask = ((1 << self.domain) - 1) << self.reach_shift_adjust
print self.produce_header(visible = False)
@@ -398,21 +398,19 @@ class M3(MatcherBase):
print " Arch: " + self.arch.name,
print " State type: " + self.state_type.get_name(),
print " Num buckets: %d" % self.num_buckets,
- print " Domain: %d" % self.domain,
print " Stride: %d" % self.stride
print self.produce_common_declarations()
- print
- print "\tconst size_t tabSize = %d;" % self.table_size
- print """
- const u8 * ft = (const u8 *)fdr + ROUNDUP_16(sizeof(struct FDR));
- const u32 * confBase = (const u32 *)(ft + tabSize);
-"""
+ print " assert(fdr->domain > 8 && fdr->domain < 16);"
+ print
+ print " u64a domain_mask = fdr->domainMask;"
+ print " const u8 * ft = (const u8 *)fdr + ROUNDUP_16(sizeof(struct FDR));"
+ print " const u32 * confBase = (const u32 *)(ft + fdr->tabSize);"
print self.produce_init_state()
- print "\tconst size_t iterBytes = %d;" % self.loop_bytes
- print "\tconst size_t START_MOD = %d;" % self.datasize_bytes
- print "\tconst size_t LOOP_READ_AHEAD = %d;" % loop_read_ahead
+ print " const size_t iterBytes = %d;" % self.loop_bytes
+ print " const size_t START_MOD = %d;" % self.datasize_bytes
+ print " const size_t LOOP_READ_AHEAD = %d;" % loop_read_ahead
print """
while (ptr < buf + len) {
@@ -451,9 +449,9 @@ class M3(MatcherBase):
print self.produce_footer()
def get_name(self):
- return "fdr_exec_%s_d%d_s%d_w%d" % (self.arch.name, self.domain, self.stride, self.state_width)
+ return "fdr_exec_%s_s%d_w%d" % (self.arch.name, self.stride, self.state_width)
- def __init__(self, state_width, domain, stride,
+ def __init__(self, state_width, stride,
arch,
table_state_width = None,
num_buckets = 8,
@@ -474,17 +472,9 @@ class M3(MatcherBase):
self.table_state_width = state_width
self.table_state_type = getRequiredType(self.table_state_width)
- # domain is the number of bits that we draw from our input to
- # index our 'reach' table
- if not 8 <= domain <= 16:
- fail_out("Unsupported domain: %d" % domain)
- self.domain = domain
- # this is the load type required for this domain if we want to
+ # this is the load type required for domain [9:15] if we want to
# load it one at a time
- self.single_load_type = getRequiredType(self.domain)
-
- # table size
- self.table_size = 2**domain * table_state_width // 8
+ self.single_load_type = IntegerType(16)
# stride is the frequency with which we make data-driven
# accesses to our reach table
diff --git a/src/fdr/fdr_compile.cpp b/src/fdr/fdr_compile.cpp
index 8d658ccd..ccf17626 100644
--- a/src/fdr/fdr_compile.cpp
+++ b/src/fdr/fdr_compile.cpp
@@ -184,6 +184,13 @@ aligned_unique_ptr FDRCompiler::setupFDR(pair link) {
ptr += floodControlTmp.second;
aligned_free(floodControlTmp.first);
+ /* we are allowing domains 9 to 15 only */
+ assert(eng.bits > 8 && eng.bits < 16);
+ fdr->domain = eng.bits;
+ fdr->schemeWidthByte = eng.schemeWidth / 8;
+ fdr->domainMask = (1 << eng.bits) - 1;
+ fdr->tabSize = (1 << eng.bits) * fdr->schemeWidthByte;
+
if (link.first) {
fdr->link = verify_u32(ptr - fdr_base);
memcpy(ptr, link.first, link.second);
@@ -245,6 +252,8 @@ void FDRCompiler::assignStringsToBuckets() {
typedef pair SCORE_INDEX_PAIR;
u32 ls = verify_u32(lits.size());
+ assert(ls); // Shouldn't be called with no literals.
+
// make a vector that contains our literals as pointers or u32 LiteralIndex values
vector vli;
vli.resize(ls);
@@ -292,6 +301,8 @@ void FDRCompiler::assignStringsToBuckets() {
currentChunk++;
}
}
+
+ assert(currentChunk > 0);
count[currentChunk - 1] = ls - chunkStartID;
// close off chunks with an empty row
firstIds[currentChunk] = ls;
@@ -383,12 +394,14 @@ bool getMultiEntriesAtPosition(const FDREngineDescription &eng,
const vector &lits,
SuffixPositionInString pos,
std::map > &m2) {
+ assert(eng.bits < 32);
+
u32 distance = 0;
if (eng.bits <= 8) {
distance = 1;
} else if (eng.bits <= 16) {
distance = 2;
- } else if (eng.bits <= 32) {
+ } else {
distance = 4;
}
@@ -528,6 +541,11 @@ fdrBuildTableInternal(const vector &lits, bool make_small,
return nullptr;
}
+ // temporary hack for unit testing
+ if (hint != HINT_INVALID) {
+ des->bits = 9;
+ }
+
FDRCompiler fc(lits, *des, make_small);
return fc.build(link);
}
diff --git a/src/fdr/fdr_dump.cpp b/src/fdr/fdr_dump.cpp
index ae246270..158170c2 100644
--- a/src/fdr/fdr_dump.cpp
+++ b/src/fdr/fdr_dump.cpp
@@ -81,6 +81,7 @@ void fdrPrintStats(const FDR *fdr, FILE *f) {
unique_ptr des =
getFdrDescription(fdr->engineID);
if (des) {
+ fprintf(f, " domain %u\n", des->bits);
fprintf(f, " stride %u\n", des->stride);
fprintf(f, " buckets %u\n", des->getNumBuckets());
fprintf(f, " width %u\n", des->schemeWidth);
diff --git a/src/fdr/fdr_engine_description.cpp b/src/fdr/fdr_engine_description.cpp
index 2a6fda79..5d470c7e 100644
--- a/src/fdr/fdr_engine_description.cpp
+++ b/src/fdr/fdr_engine_description.cpp
@@ -48,7 +48,7 @@ FDREngineDescription::FDREngineDescription(const FDREngineDef &def)
: EngineDescription(def.id, targetByArchFeatures(def.cpu_features),
def.numBuckets, def.confirmPullBackDistance,
def.confirmTopLevelSplit),
- schemeWidth(def.schemeWidth), stride(def.stride), bits(def.bits) {}
+ schemeWidth(def.schemeWidth), stride(def.stride), bits(0) {}
u32 FDREngineDescription::getDefaultFloodSuffixLength() const {
// rounding up, so that scheme width 32 and 6 buckets is 6 not 5!
@@ -105,76 +105,83 @@ unique_ptr chooseEngine(const target_t &target,
DEBUG_PRINTF("%zu lits, msl=%zu, desiredStride=%u\n", vl.size(), msl,
desiredStride);
- const FDREngineDescription *best = nullptr;
+ FDREngineDescription *best = nullptr;
u32 best_score = 0;
- for (size_t engineID = 0; engineID < allDescs.size(); engineID++) {
- const FDREngineDescription &eng = allDescs[engineID];
- if (!eng.isValidOnTarget(target)) {
- continue;
- }
- if (msl < eng.stride) {
- continue;
- }
-
- u32 score = 100;
-
- score -= absdiff(desiredStride, eng.stride);
-
- if (eng.stride <= desiredStride) {
- score += eng.stride;
- }
-
- u32 effLits = vl.size(); /* * desiredStride;*/
- u32 ideal;
- if (effLits < eng.getNumBuckets()) {
- if (eng.stride == 1) {
- ideal = 8;
- } else {
- ideal = 10;
+ for (u32 domain = 9; domain <= 15; domain++) {
+ for (size_t engineID = 0; engineID < allDescs.size(); engineID++) {
+ // to make sure that domains >=14 have stride 1 according to origin
+ if (domain > 13 && engineID > 0) {
+ continue;
+ }
+ FDREngineDescription &eng = allDescs[engineID];
+ if (!eng.isValidOnTarget(target)) {
+ continue;
+ }
+ if (msl < eng.stride) {
+ continue;
}
- } else if (effLits < 20) {
- ideal = 10;
- } else if (effLits < 100) {
- ideal = 11;
- } else if (effLits < 1000) {
- ideal = 12;
- } else if (effLits < 10000) {
- ideal = 13;
- } else {
- ideal = 15;
- }
- if (ideal != 8 && eng.schemeWidth == 32) {
- ideal += 1;
- }
+ u32 score = 100;
- if (make_small) {
- ideal -= 2;
- }
+ score -= absdiff(desiredStride, eng.stride);
- if (eng.stride > 1) {
- ideal++;
- }
+ if (eng.stride <= desiredStride) {
+ score += eng.stride;
+ }
- DEBUG_PRINTF("effLits %u\n", effLits);
+ u32 effLits = vl.size(); /* * desiredStride;*/
+ u32 ideal;
+ if (effLits < eng.getNumBuckets()) {
+ if (eng.stride == 1) {
+ ideal = 8;
+ } else {
+ ideal = 10;
+ }
+ } else if (effLits < 20) {
+ ideal = 10;
+ } else if (effLits < 100) {
+ ideal = 11;
+ } else if (effLits < 1000) {
+ ideal = 12;
+ } else if (effLits < 10000) {
+ ideal = 13;
+ } else {
+ ideal = 15;
+ }
- if (target.is_atom_class() && !make_small && effLits < 4000) {
- /* Unless it is a very heavy case, we want to build smaller tables
- * on lightweight machines due to their small caches. */
- ideal -= 2;
- }
+ if (ideal != 8 && eng.schemeWidth == 32) {
+ ideal += 1;
+ }
- score -= absdiff(ideal, eng.bits);
+ if (make_small) {
+ ideal -= 2;
+ }
- DEBUG_PRINTF("fdr %u: width=%u, bits=%u, buckets=%u, stride=%u "
- "-> score=%u\n",
- eng.getID(), eng.schemeWidth, eng.bits,
- eng.getNumBuckets(), eng.stride, score);
+ if (eng.stride > 1) {
+ ideal++;
+ }
- if (!best || score > best_score) {
- best = ŋ
- best_score = score;
+ DEBUG_PRINTF("effLits %u\n", effLits);
+
+ if (target.is_atom_class() && !make_small && effLits < 4000) {
+ /* Unless it is a very heavy case, we want to build smaller tables
+ * on lightweight machines due to their small caches. */
+ ideal -= 2;
+ }
+
+ score -= absdiff(ideal, domain);
+
+ DEBUG_PRINTF("fdr %u: width=%u, bits=%u, buckets=%u, stride=%u "
+ "-> score=%u\n",
+ eng.getID(), eng.schemeWidth, eng.bits,
+ eng.getNumBuckets(), eng.stride, score);
+
+ if (!best || score > best_score) {
+ eng.bits = domain;
+ best = ŋ
+ best_score = score;
+ }
}
}
diff --git a/src/fdr/fdr_engine_description.h b/src/fdr/fdr_engine_description.h
index d936095b..45f64ac0 100644
--- a/src/fdr/fdr_engine_description.h
+++ b/src/fdr/fdr_engine_description.h
@@ -43,7 +43,6 @@ struct FDREngineDef {
u32 schemeWidth;
u32 numBuckets;
u32 stride;
- u32 bits;
u64a cpu_features;
u32 confirmPullBackDistance;
u32 confirmTopLevelSplit;
diff --git a/src/fdr/fdr_internal.h b/src/fdr/fdr_internal.h
index 6c722777..607e039c 100644
--- a/src/fdr/fdr_internal.h
+++ b/src/fdr/fdr_internal.h
@@ -76,9 +76,11 @@ struct FDR {
* structures (spillover strings and hash table) if we're a secondary
* structure. */
u32 link;
+ u8 domain; /* dynamic domain info */
+ u8 schemeWidthByte; /* scheme width in bytes */
+ u16 domainMask; /* pre-computed domain mask */
+ u32 tabSize; /* pre-computed hashtable size in bytes */
u32 pad1;
- u32 pad2;
- u32 pad3;
union {
u32 s_u32;
diff --git a/src/nfa/castlecompile.cpp b/src/nfa/castlecompile.cpp
index 5ea0e873..e5cc9267 100644
--- a/src/nfa/castlecompile.cpp
+++ b/src/nfa/castlecompile.cpp
@@ -58,11 +58,13 @@
#include
using namespace std;
+using boost::adaptors::map_keys;
using boost::adaptors::map_values;
namespace ue2 {
#define CASTLE_MAX_TOPS 32
+#define CLIQUE_GRAPH_MAX_SIZE 1000
static
u32 depth_to_u32(const depth &d) {
@@ -106,51 +108,35 @@ void writeCastleScanEngine(const CharReach &cr, Castle *c) {
}
static
-size_t literalOverlap(const vector &a, const vector &b) {
+bool literalOverlap(const vector &a, const vector &b,
+ const size_t dist) {
for (size_t i = 0; i < b.size(); i++) {
+ if (i > dist) {
+ return true;
+ }
size_t overlap_len = b.size() - i;
if (overlap_len <= a.size()) {
if (matches(a.end() - overlap_len, a.end(), b.begin(),
b.end() - i)) {
- return i;
+ return false;
}
} else {
assert(overlap_len > a.size());
if (matches(a.begin(), a.end(), b.end() - i - a.size(),
b.end() - i)) {
- return i;
+ return false;
}
}
}
- return b.size();
+ return b.size() > dist;
}
-// UE-2666 case 1: The problem of find largest exclusive subcastles group
-// can be reformulated as finding the largest clique (subgraph where every
-// vertex is connected to every other vertex) in the graph. We use an
-// approximate algorithm here to find the maximum clique.
-// References
-// ----------
-// [1] Boppana, R., & Halldórsson, M. M. (1992).
-// Approximating maximum independent sets by excluding subgraphs.
-// BIT Numerical Mathematics, 32(2), 180–196. Springer.
-// doi:10.1007/BF01994876
-// ----------
-
struct CliqueVertexProps {
CliqueVertexProps() {}
explicit CliqueVertexProps(u32 state_in) : stateId(state_in) {}
u32 stateId = ~0U;
- u32 parentId = ~0U;
- bool leftChild = false; /* tells us if it is the left child of its parent */
- bool rightChildVisited = false; /* tells us if its right child is visited */
-
- vector clique1; /* clique for the left branch */
- vector indepSet1; /* independent set for the left branch */
- vector clique2; /* clique for the right branch */
- vector indepSet2; /* independent set for the right branch */
};
typedef boost::adjacency_list makeCG(const vector> &exclusiveSet) {
- u32 size = exclusiveSet.size();
-
- vector vertices;
- unique_ptr cg = make_unique();
- for (u32 i = 0; i < size; ++i) {
- CliqueVertex v = add_vertex(CliqueVertexProps(i), *cg);
- vertices.push_back(v);
- }
-
- // construct the complement graph, then its maximum independent sets
- // are equal to the maximum clique of the original graph
- for (u32 i = 0; i < size; ++i) {
- CliqueVertex s = vertices[i];
- vector complement(size, 0);
- for (u32 j = 0; j < exclusiveSet[i].size(); ++j) {
- u32 val = exclusiveSet[i][j];
- complement[val] = 1;
- }
-
- for (u32 k = i + 1; k < size; ++k) {
- if (!complement[k]) {
- CliqueVertex d = vertices[k];
- add_edge(s, d, *cg);
- }
- }
- }
- return cg;
-}
-
-static
-CliqueGraph createSubgraph(const CliqueGraph &cg,
- const vector &vertices) {
- CliqueGraph g;
- map vertexMap;
- for (auto u : vertices) {
- u32 id = cg[u].stateId;
- CliqueVertex v = add_vertex(CliqueVertexProps(id), g);
- vertexMap[id] = v;
- }
-
- set found;
- for (auto u : vertices) {
- u32 srcId = cg[u].stateId;
- CliqueVertex src = vertexMap[srcId];
- found.insert(srcId);
- for (auto n : adjacent_vertices_range(u, cg)) {
- u32 dstId = cg[n].stateId;
- if (found.find(dstId) == found.end() &&
- vertexMap.find(dstId) != vertexMap.end()) {
- CliqueVertex dst = vertexMap[dstId];
- add_edge(src, dst, g);
- }
- }
- }
- return g;
-}
-
-static
-void getNeighborInfo(const CliqueGraph &g, vector &neighbor,
- vector &nonneighbor,
- const CliqueVertex &cv) {
+void getNeighborInfo(const CliqueGraph &g, vector &neighbor,
+ const CliqueVertex &cv, const set &group) {
u32 id = g[cv].stateId;
ue2::unordered_set neighborId;
// find neighbors for cv
- for (auto v : adjacent_vertices_range(cv, g)) {
- neighbor.push_back(v);
- neighborId.insert(g[v].stateId);
- }
-
- // find non-neighbors for cv
- for (auto v : vertices_range(g)) {
- if (g[v].stateId != id &&
- neighborId.find(g[v].stateId) == neighborId.end()) {
- nonneighbor.push_back(v);
+ for (const auto &v : adjacent_vertices_range(cv, g)) {
+ if (g[v].stateId != id && contains(group, g[v].stateId)){
+ neighbor.push_back(g[v].stateId);
+ neighborId.insert(g[v].stateId);
+ DEBUG_PRINTF("Neighbor:%u\n", g[v].stateId);
}
}
}
static
-void updateCliqueInfo(CliqueGraph &cg, const CliqueVertex &n,
- vector &clique, vector &indepSet) {
- u32 id = cg[n].stateId;
- if (cg[n].clique1.size() + 1 > cg[n].clique2.size()) {
- cg[n].clique1.push_back(id);
- clique.swap(cg[n].clique1);
- } else {
- clique.swap(cg[n].clique2);
- }
+void findCliqueGroup(CliqueGraph &cg, vector &clique) {
+ stack> gStack;
- if (cg[n].indepSet2.size() + 1 > cg[n].indepSet1.size()) {
- cg[n].indepSet2.push_back(id);
- indepSet.swap(cg[n].indepSet2);
- } else {
- indepSet.swap(cg[n].indepSet1);
- }
-}
-
-static
-void findCliqueGroup(CliqueGraph &cg, vector &clique,
- vector &indepSet) {
- stack gStack;
- gStack.push(cg);
-
- // create mapping between vertex and id
+ // Create mapping between vertex and id
map vertexMap;
- for (auto v : vertices_range(cg)) {
+ vector init;
+ for (const auto &v : vertices_range(cg)) {
vertexMap[cg[v].stateId] = v;
+ init.push_back(cg[v].stateId);
}
+ gStack.push(init);
- // get the vertex to start from
- ue2::unordered_set foundVertexId;
+ // Get the vertex to start from
CliqueGraph::vertex_iterator vi, ve;
tie(vi, ve) = vertices(cg);
- CliqueVertex start = *vi;
- u32 startId = cg[start].stateId;
-
- bool leftChild = false;
- u32 prevId = startId;
while (!gStack.empty()) {
- CliqueGraph g = gStack.top();
+ vector g = gStack.top();
gStack.pop();
- // choose a vertex from the graph
- tie(vi, ve) = vertices(g);
- CliqueVertex cv = *vi;
- u32 id = g[cv].stateId;
-
- // corresponding vertex in the original graph
- CliqueVertex n = vertexMap.at(id);
-
- vector neighbor;
- vector nonneighbor;
- getNeighborInfo(g, neighbor, nonneighbor, cv);
-
- if (foundVertexId.find(id) != foundVertexId.end()) {
- prevId = id;
- // get graph consisting of non-neighbors for right branch
- if (!cg[n].rightChildVisited) {
- gStack.push(g);
- if (!nonneighbor.empty()) {
- const CliqueGraph &nSub = createSubgraph(g, nonneighbor);
- gStack.push(nSub);
- leftChild = false;
- }
- cg[n].rightChildVisited = true;
- } else if (id != startId) {
- // both the left and right branches are visited,
- // update its parent's clique and independent sets
- u32 parentId = cg[n].parentId;
- CliqueVertex parent = vertexMap.at(parentId);
- if (cg[n].leftChild) {
- updateCliqueInfo(cg, n, cg[parent].clique1,
- cg[parent].indepSet1);
- } else {
- updateCliqueInfo(cg, n, cg[parent].clique2,
- cg[parent].indepSet2);
- }
- }
- } else {
- foundVertexId.insert(id);
- g[n].leftChild = leftChild;
- g[n].parentId = prevId;
- gStack.push(g);
- // get graph consisting of neighbors for left branch
- if (!neighbor.empty()) {
- const CliqueGraph &sub = createSubgraph(g, neighbor);
- gStack.push(sub);
- leftChild = true;
- }
- prevId = id;
+ // Choose a vertex from the graph
+ u32 id = g[0];
+ const CliqueVertex &n = vertexMap.at(id);
+ clique.push_back(id);
+ // Corresponding vertex in the original graph
+ vector neighbor;
+ set subgraphId(g.begin(), g.end());
+ getNeighborInfo(cg, neighbor, n, subgraphId);
+ // Get graph consisting of neighbors for left branch
+ if (!neighbor.empty()) {
+ gStack.push(neighbor);
}
}
- updateCliqueInfo(cg, start, clique, indepSet);
}
template
@@ -345,18 +204,17 @@ bool graph_empty(const Graph &g) {
static
vector removeClique(CliqueGraph &cg) {
vector> cliquesVec(1);
- vector> indepSetsVec(1);
DEBUG_PRINTF("graph size:%lu\n", num_vertices(cg));
- findCliqueGroup(cg, cliquesVec[0], indepSetsVec[0]);
+ findCliqueGroup(cg, cliquesVec[0]);
while (!graph_empty(cg)) {
const vector &c = cliquesVec.back();
vector dead;
- for (auto v : vertices_range(cg)) {
+ for (const auto &v : vertices_range(cg)) {
if (find(c.begin(), c.end(), cg[v].stateId) != c.end()) {
dead.push_back(v);
}
}
- for (auto v : dead) {
+ for (const auto &v : dead) {
clear_vertex(v, cg);
remove_vertex(v, cg);
}
@@ -364,30 +222,22 @@ vector removeClique(CliqueGraph &cg) {
break;
}
vector clique;
- vector indepSet;
- findCliqueGroup(cg, clique, indepSet);
+ findCliqueGroup(cg, clique);
cliquesVec.push_back(clique);
- indepSetsVec.push_back(indepSet);
}
// get the independent set with max size
size_t max = 0;
size_t id = 0;
- for (size_t j = 0; j < indepSetsVec.size(); ++j) {
- if (indepSetsVec[j].size() > max) {
- max = indepSetsVec[j].size();
+ for (size_t j = 0; j < cliquesVec.size(); ++j) {
+ if (cliquesVec[j].size() > max) {
+ max = cliquesVec[j].size();
id = j;
}
}
- DEBUG_PRINTF("clique size:%lu\n", indepSetsVec[id].size());
- return indepSetsVec[id];
-}
-
-static
-vector findMaxClique(const vector> &exclusiveSet) {
- auto cg = makeCG(exclusiveSet);
- return removeClique(*cg);
+ DEBUG_PRINTF("clique size:%lu\n", cliquesVec[id].size());
+ return cliquesVec[id];
}
// if the location of any reset character in one literal are after
@@ -401,10 +251,10 @@ bool findExclusivePair(const u32 id1, const u32 id2,
const auto &triggers2 = triggers[id2];
for (u32 i = 0; i < triggers1.size(); ++i) {
for (u32 j = 0; j < triggers2.size(); ++j) {
- size_t max_overlap1 = literalOverlap(triggers1[i], triggers2[j]);
- size_t max_overlap2 = literalOverlap(triggers2[j], triggers1[i]);
- if (max_overlap1 <= min_reset_dist[id2][j] ||
- max_overlap2 <= min_reset_dist[id1][i]) {
+ if (!literalOverlap(triggers1[i], triggers2[j],
+ min_reset_dist[id2][j]) ||
+ !literalOverlap(triggers2[j], triggers1[i],
+ min_reset_dist[id1][i])) {
return false;
}
}
@@ -420,28 +270,33 @@ vector checkExclusion(const CharReach &cr,
return group;
}
- vector > min_reset_dist;
+ vector> min_reset_dist;
// get min reset distance for each repeat
for (auto it = triggers.begin(); it != triggers.end(); it++) {
const vector &tmp_dist = minResetDistToEnd(*it, cr);
min_reset_dist.push_back(tmp_dist);
}
- vector> exclusiveSet;
+ vector vertices;
+ unique_ptr cg = make_unique();
+ for (u32 i = 0; i < triggers.size(); ++i) {
+ CliqueVertex v = add_vertex(CliqueVertexProps(i), *cg);
+ vertices.push_back(v);
+ }
+
// find exclusive pair for each repeat
for (u32 i = 0; i < triggers.size(); ++i) {
- vector repeatIds;
+ CliqueVertex s = vertices[i];
for (u32 j = i + 1; j < triggers.size(); ++j) {
if (findExclusivePair(i, j, min_reset_dist, triggers)) {
- repeatIds.push_back(j);
+ CliqueVertex d = vertices[j];
+ add_edge(s, d, *cg);
}
}
- exclusiveSet.push_back(repeatIds);
- DEBUG_PRINTF("Exclusive pair size:%lu\n", repeatIds.size());
}
// find the largest exclusive group
- return findMaxClique(exclusiveSet);
+ return removeClique(*cg);
}
static
@@ -599,7 +454,7 @@ buildCastle(const CastleProto &proto,
repeatInfoPair.push_back(make_pair(min_period, is_reset));
- if (is_reset) {
+ if (is_reset && candidateRepeats.size() < CLIQUE_GRAPH_MAX_SIZE) {
candidateTriggers.push_back(triggers.at(top));
candidateRepeats.push_back(i);
}
@@ -608,7 +463,7 @@ buildCastle(const CastleProto &proto,
// Case 1: exclusive repeats
bool exclusive = false;
bool pureExclusive = false;
- u8 activeIdxSize = 0;
+ u32 activeIdxSize = 0;
set exclusiveGroup;
if (cc.grey.castleExclusive) {
vector tmpGroup = checkExclusion(cr, candidateTriggers);
@@ -617,7 +472,7 @@ buildCastle(const CastleProto &proto,
// Case 1: mutual exclusive repeats group found, initialize state
// sizes
exclusive = true;
- activeIdxSize = calcPackedBytes(exclusiveSize);
+ activeIdxSize = calcPackedBytes(numRepeats + 1);
if (exclusiveSize == numRepeats) {
pureExclusive = true;
streamStateSize = 0;
@@ -665,7 +520,7 @@ buildCastle(const CastleProto &proto,
c->numRepeats = verify_u32(subs.size());
c->exclusive = exclusive;
c->pureExclusive = pureExclusive;
- c->activeIdxSize = activeIdxSize;
+ c->activeIdxSize = verify_u8(activeIdxSize);
writeCastleScanEngine(cr, c);
@@ -710,8 +565,8 @@ buildCastle(const CastleProto &proto,
set all_reports(const CastleProto &proto) {
set reports;
- for (const PureRepeat &pr : proto.repeats | map_values) {
- reports.insert(pr.reports.begin(), pr.reports.end());
+ for (const ReportID &report : proto.report_map | map_keys) {
+ reports.insert(report);
}
return reports;
}
@@ -732,10 +587,30 @@ depth findMaxWidth(const CastleProto &proto) {
return max_width;
}
+depth findMinWidth(const CastleProto &proto, u32 top) {
+ if (!contains(proto.repeats, top)) {
+ assert(0); // should not happen
+ return depth::infinity();
+ }
+ return proto.repeats.at(top).bounds.min;
+}
+
+depth findMaxWidth(const CastleProto &proto, u32 top) {
+ if (!contains(proto.repeats, top)) {
+ assert(0); // should not happen
+ return depth(0);
+ }
+ return proto.repeats.at(top).bounds.max;
+}
+
CastleProto::CastleProto(const PureRepeat &pr) {
assert(pr.reach.any());
assert(pr.reports.size() == 1);
- repeats.insert(make_pair(0, pr));
+ u32 top = 0;
+ repeats.emplace(top, pr);
+ for (const auto &report : pr.reports) {
+ report_map[report].insert(top);
+ }
}
const CharReach &CastleProto::reach() const {
@@ -743,25 +618,29 @@ const CharReach &CastleProto::reach() const {
return repeats.begin()->second.reach;
}
-static
-u32 find_next_top(const map &repeats) {
- u32 top = 0;
- for (; contains(repeats, top); top++) {
- // pass
- }
- return top;
-}
-
u32 CastleProto::add(const PureRepeat &pr) {
assert(repeats.size() < max_occupancy);
assert(pr.reach == reach());
assert(pr.reports.size() == 1);
- u32 top = find_next_top(repeats);
+ u32 top = next_top++;
DEBUG_PRINTF("selected unused top %u\n", top);
- repeats.insert(make_pair(top, pr));
+ assert(!contains(repeats, top));
+ repeats.emplace(top, pr);
+ for (const auto &report : pr.reports) {
+ report_map[report].insert(top);
+ }
return top;
}
+void CastleProto::erase(u32 top) {
+ DEBUG_PRINTF("erase top %u\n", top);
+ assert(contains(repeats, top));
+ repeats.erase(top);
+ for (auto &m : report_map) {
+ m.second.erase(top);
+ }
+}
+
u32 CastleProto::merge(const PureRepeat &pr) {
assert(repeats.size() <= max_occupancy);
assert(pr.reach == reach());
@@ -806,8 +685,7 @@ bool mergeCastle(CastleProto &c1, const CastleProto &c2,
const u32 top = m.first;
const PureRepeat &pr = m.second;
DEBUG_PRINTF("top %u\n", top);
- u32 new_top = find_next_top(c1.repeats);
- c1.repeats.insert(make_pair(new_top, pr));
+ u32 new_top = c1.add(pr);
top_map[top] = new_top;
DEBUG_PRINTF("adding repeat: map %u->%u\n", top, new_top);
}
@@ -823,12 +701,23 @@ void remapCastleTops(CastleProto &proto, map &top_map) {
for (const auto &m : proto.repeats) {
const u32 top = m.first;
const PureRepeat &pr = m.second;
- u32 new_top = find_next_top(out);
- out.insert(make_pair(new_top, pr));
+ u32 new_top = out.size();
+ out.emplace(new_top, pr);
top_map[top] = new_top;
}
proto.repeats.swap(out);
+
+ // Remap report map.
+ proto.report_map.clear();
+ for (const auto &m : proto.repeats) {
+ const u32 top = m.first;
+ const PureRepeat &pr = m.second;
+ for (const auto &report : pr.reports) {
+ proto.report_map[report].insert(top);
+ }
+ }
+
assert(proto.repeats.size() <= proto.max_occupancy);
}
@@ -904,18 +793,17 @@ bool is_equal(const CastleProto &c1, const CastleProto &c2) {
return c1.repeats == c2.repeats;
}
-bool requiresDedupe(const CastleProto &proto, const set &reports) {
- ue2::unordered_set seen;
- for (const PureRepeat &pr : proto.repeats | map_values) {
- for (const ReportID &report : pr.reports) {
- if (contains(reports, report)) {
- if (contains(seen, report)) {
- DEBUG_PRINTF("castle proto %p has dupe report %u\n", &proto,
- report);
- return true;
- }
- seen.insert(report);
- }
+bool requiresDedupe(const CastleProto &proto,
+ const ue2::flat_set &reports) {
+ for (const auto &report : reports) {
+ auto it = proto.report_map.find(report);
+ if (it == end(proto.report_map)) {
+ continue;
+ }
+ if (it->second.size() > 1) {
+ DEBUG_PRINTF("castle proto %p has dupe report %u\n", &proto,
+ report);
+ return true;
}
}
return false;
diff --git a/src/nfa/castlecompile.h b/src/nfa/castlecompile.h
index fbafb606..fc4bb991 100644
--- a/src/nfa/castlecompile.h
+++ b/src/nfa/castlecompile.h
@@ -38,6 +38,7 @@
#include "nfagraph/ng_repeat.h"
#include "util/alloc.h"
#include "util/depth.h"
+#include "util/ue2_containers.h"
#include