diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..c137017a --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,36 @@ +# Hyperscan Change Log + +This is a list of notable changes to Hyperscan, in reverse chronological order. + +## [4.1.0] 2015-12-18 +- Update version of PCRE used by testing tools as a syntax and semantic + reference to PCRE 8.38. +- Small updates to fix warnings identified by Coverity. +- Clean up and unify exception handling behaviour across GPR and SIMD NFA + models. +- Fix bug in handling of bounded repeat triggers with large gaps between them + for sparse repeat model. +- Correctly reject POSIX collating elements (`[.ch.]`, `[=ch=]`) in the parser. + These are not supported by Hyperscan. +- Add support for quoted sequences (`\Q...\E`) inside character classes. +- Simplify FDR literal matcher runtime by removing some static specialization. +- Fix handling of the POSIX `[:graph:]`, `[:print:]` and `[:punct:]` character + classes to match the behaviour of PCRE 8.38 in both standard operation and + with the UCP flag set. (Note: some bugs were fixed in this area in PCRE + 8.38.) Previously Hyperscan's behaviour was the same as versions of PCRE + before 8.34. +- Improve performance when compiling pattern sets that include a large number + of similar bounded repeat constructs. (github issue #9) + +## [4.0.1] 2015-10-30 +- Minor cleanups to test code. +- CMake and other build system improvements. +- API update: allow `hs_reset_stream()` and `hs_reset_and_copy_stream()` to be + supplied with a NULL scratch pointer if no matches are required. This is in + line with the behaviour of `hs_close_stream()`. +- Disallow bounded repeats with a very large minimum repeat but no maximum, + i.e. {N,} for very large N. +- Reduce compile memory usage in literal set explansion for some large cases. + +## [4.0.0] 2015-10-20 +- Original release of Hyperscan as open-source software. diff --git a/CMakeLists.txt b/CMakeLists.txt index 67885c93..b4d81754 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,13 +2,13 @@ cmake_minimum_required (VERSION 2.8.11) project (Hyperscan C CXX) set (HS_MAJOR_VERSION 4) -set (HS_MINOR_VERSION 0) -set (HS_PATCH_VERSION 1) +set (HS_MINOR_VERSION 1) +set (HS_PATCH_VERSION 0) set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION}) string (TIMESTAMP BUILD_DATE "%Y-%m-%d") -set(CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake) +set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) include(CheckCCompilerFlag) include(CheckCXXCompilerFlag) INCLUDE (CheckFunctionExists) @@ -56,8 +56,9 @@ if(CMAKE_GENERATOR STREQUAL Xcode) set(XCODE TRUE) endif() -include_directories(src .) -include_directories(${CMAKE_BINARY_DIR}) +set(CMAKE_INCLUDE_CURRENT_DIR 1) +include_directories(${PROJECT_SOURCE_DIR}/src) +include_directories(${PROJECT_BINARY_DIR}) include_directories(SYSTEM include) set(BOOST_USE_STATIC_LIBS OFF) @@ -71,7 +72,7 @@ find_package(Boost ${BOOST_MINVERSION}) if(NOT Boost_FOUND) # we might have boost in tree, so provide a hint and try again message(STATUS "trying include dir for boost") - set(BOOST_INCLUDEDIR "${CMAKE_SOURCE_DIR}/include") + set(BOOST_INCLUDEDIR "${PROJECT_SOURCE_DIR}/include") find_package(Boost ${BOOST_MINVERSION}) if(NOT Boost_FOUND) message(FATAL_ERROR "Boost ${BOOST_MINVERSION} or later not found. Either install system pacakges if available, extract Boost headers to ${CMAKE_SOURCE_DIR}/include, or set the CMake BOOST_ROOT variable.") @@ -219,6 +220,15 @@ CHECK_FUNCTION_EXISTS(_aligned_malloc HAVE__ALIGNED_MALLOC) CHECK_C_COMPILER_FLAG(-fvisibility=hidden HAS_C_HIDDEN) CHECK_CXX_COMPILER_FLAG(-fvisibility=hidden HAS_CXX_HIDDEN) +if (RELEASE_BUILD) + if (HAS_C_HIDDEN) + set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -fvisibility=hidden") + endif() + if (HAS_CXX_HIDDEN) + set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fvisibility=hidden") + endif() +endif() + # testing a builtin takes a little more work CHECK_C_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CC_BUILTIN_ASSUME_ALIGNED) CHECK_CXX_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CXX_BUILTIN_ASSUME_ALIGNED) @@ -327,8 +337,8 @@ if (EXISTS ${CMAKE_SOURCE_DIR}/tools) endif() # do substitutions -configure_file(${CMAKE_MODULE_PATH}/config.h.in ${CMAKE_BINARY_DIR}/config.h) -configure_file(src/hs_version.h.in hs_version.h) +configure_file(${CMAKE_MODULE_PATH}/config.h.in ${PROJECT_BINARY_DIR}/config.h) +configure_file(src/hs_version.h.in ${PROJECT_BINARY_DIR}/hs_version.h) if (PKG_CONFIG_FOUND) # we really only need to do this if we have pkg-config @@ -345,7 +355,7 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}") # include the autogen targets add_subdirectory(src/fdr) -include_directories(${CMAKE_BINARY_DIR}/src/fdr) +include_directories(${PROJECT_BINARY_DIR}/src/fdr) if(NOT WIN32) set(RAGEL_C_FLAGS "-Wno-unused") diff --git a/README.md b/README.md index 37fc38ce..1753ecbe 100644 --- a/README.md +++ b/README.md @@ -20,3 +20,24 @@ the [Developer Reference Guide](http://01org.github.io/hyperscan/dev-reference/) Hyperscan is licensed under the BSD License. See the LICENSE file in the project repository. +# Versioning + +The `master` branch on Github will always contain the most recent release of +Hyperscan. Each version released to `master` goes through QA and testing before +it is released; if you're a user, rather than a developer, this is the version +you should be using. + +Further development towards the next release takes place on the `develop` +branch. + +# Get Involved + +The official homepage for Hyperscan is at [01.org/hyperscan](https://01.org/hyperscan). + +If you have questions or comments, we encourage you to [join the mailing +list](https://lists.01.org/mailman/listinfo/hyperscan). Bugs can be filed by +sending email to the list, or by creating an issue on Github. + +If you wish to contact the Hyperscan team at Intel directly, without posting +publicly to the mailing list, send email to +[hyperscan@intel.com](mailto:hyperscan@intel.com). diff --git a/doc/dev-reference/compilation.rst b/doc/dev-reference/compilation.rst index 6e195f6a..f3723dc9 100644 --- a/doc/dev-reference/compilation.rst +++ b/doc/dev-reference/compilation.rst @@ -63,6 +63,9 @@ described at . However, not all constructs available in libpcre are supported. The use of unsupported constructs will result in compilation errors. +The version of PCRE used to validate Hyperscan's interpretation of this syntax +is 8.38. + ==================== Supported Constructs ==================== diff --git a/examples/simplegrep.c b/examples/simplegrep.c index 2fe6e6f3..9e392a8f 100644 --- a/examples/simplegrep.c +++ b/examples/simplegrep.c @@ -109,7 +109,7 @@ static char *readInputData(const char *inputFN, unsigned int *length) { * limit the size of our buffer appropriately. */ if ((unsigned long)dataLen > UINT_MAX) { dataLen = UINT_MAX; - printf("WARNING: clipping data to %lu bytes\n", dataLen); + printf("WARNING: clipping data to %ld bytes\n", dataLen); } else if (dataLen == 0) { fprintf(stderr, "ERROR: input file \"%s\" is empty\n", inputFN); fclose(f); @@ -118,7 +118,7 @@ static char *readInputData(const char *inputFN, unsigned int *length) { char *inputData = malloc(dataLen); if (!inputData) { - fprintf(stderr, "ERROR: unable to malloc %lu bytes\n", dataLen); + fprintf(stderr, "ERROR: unable to malloc %ld bytes\n", dataLen); fclose(f); return NULL; } diff --git a/src/fdr/CMakeLists.txt b/src/fdr/CMakeLists.txt index 25396689..1436c3fc 100644 --- a/src/fdr/CMakeLists.txt +++ b/src/fdr/CMakeLists.txt @@ -27,11 +27,11 @@ fdr_autogen(teddy_runtime teddy_autogen.c) fdr_autogen(teddy_compiler teddy_autogen_compiler.cpp) set(fdr_GENERATED_SRC -${CMAKE_BINARY_DIR}/src/fdr/fdr_autogen.c -${CMAKE_BINARY_DIR}/src/fdr/fdr_autogen_compiler.cpp -${CMAKE_BINARY_DIR}/src/fdr/teddy_autogen.c -${CMAKE_BINARY_DIR}/src/fdr/teddy_autogen_compiler.cpp -PARENT_SCOPE) + ${PROJECT_BINARY_DIR}/src/fdr/fdr_autogen.c + ${PROJECT_BINARY_DIR}/src/fdr/fdr_autogen_compiler.cpp + ${PROJECT_BINARY_DIR}/src/fdr/teddy_autogen.c + ${PROJECT_BINARY_DIR}/src/fdr/teddy_autogen_compiler.cpp + PARENT_SCOPE) set_source_files_properties(${fdr_GENERATED_SRC} PROPERTIES GENERATED TRUE) include_directories(${CMAKE_CURRENT_BINARY_DIR}) diff --git a/src/fdr/autogen.py b/src/fdr/autogen.py index 36e4c16c..e5b4f39e 100755 --- a/src/fdr/autogen.py +++ b/src/fdr/autogen.py @@ -54,16 +54,11 @@ def produce_fdr_compiles(l): def build_fdr_matchers(): all_matchers = [ ] - domains = [8, 10, 11, 12, 13] - big_domains = [ 14, 15 ] + strides = [ 1, 2, 4 ] common = { "state_width" : 128, "num_buckets" : 8, "extract_frequency" : 8, "arch" : arch_x86_64 } - for d in domains: - all_matchers += [ M3(stride = 1, domain = d, **common) ] - all_matchers += [ M3(stride = 2, domain = d, **common) ] - all_matchers += [ M3(stride = 4, domain = d, **common) ] - for d in big_domains: - all_matchers += [ M3(stride = 1, domain = d, **common) ] + for s in strides: + all_matchers += [ M3(stride = s, **common) ] return all_matchers diff --git a/src/fdr/fdr.c b/src/fdr/fdr.c index 082800f1..f83a4265 100644 --- a/src/fdr/fdr.c +++ b/src/fdr/fdr.c @@ -40,27 +40,6 @@ #include "fdr_confirm_runtime.h" #include "fdr_streaming_runtime.h" #include "fdr_loadval.h" - -static really_inline UNUSED -u32 getPreStartVal(const struct FDR_Runtime_Args *a, u32 numBits) { - u32 r = 0; - if (a->start_offset == 0) { - if (numBits <= 8) { - r = a->buf_history[a->len_history - 1]; - } else { - r = a->buf_history[a->len_history - 1]; - r |= (a->buf[0] << 8); - } - } else { - if (numBits <= 8) { - r = a->buf[a->start_offset - 1]; - } else { - r = lv_u16(a->buf + a->start_offset - 1, a->buf, a->buf + a->len); - } - } - return r & ((1 << numBits) - 1); -} - #include "fdr_autogen.c" #define FAKE_HISTORY_SIZE 16 diff --git a/src/fdr/fdr_autogen.py b/src/fdr/fdr_autogen.py index 685cca3b..748d811f 100755 --- a/src/fdr/fdr_autogen.py +++ b/src/fdr/fdr_autogen.py @@ -74,12 +74,12 @@ class ValueExtractStep(Step): dsb = m.datasize_bytes modval = offset % dsb - if m.domain > 8 and modval == dsb - 1: + if modval == dsb - 1: # Case 1: reading more than one byte over the end of the bulk load self.latency = 4 if sub_load_cautious: - code_string = "cautious_forward" + code_string = "cautious_forward" else: code_string = "normal" load_string = m.single_load_type.load_expr_data(self.offset, code_string) @@ -101,7 +101,7 @@ class ValueExtractStep(Step): temp_string = "(%s >> %d)" % (lb_var.name, modval*8 - m.reach_shift_adjust) - init_string = "(%s) & 0x%x" % (temp_string, m.reach_mask) + init_string = "(%s) & (domain_mask << %d)" % (temp_string, m.reach_shift_adjust) v_var = self.nv(m.value_extract_type, "v%d" % offset) self.val = v_var.gen_initializer_stmt(init_string) @@ -173,14 +173,10 @@ class ConfirmStep(Step): enable_confirmless = m.stride == 1, do_bailout = False) class M3(MatcherBase): - def get_hash_safety_parameters(self): - h_size = self.single_load_type.size_in_bytes() - return (0, h_size - 1) - def produce_compile_call(self): - print " { %d, %d, %d, %d, %d, %s, %d, %d }," % ( + print " { %d, %d, %d, %d, %s, %d, %d }," % ( self.id, self.state_width, self.num_buckets, - self.stride, self.domain, + self.stride, self.arch.target, self.conf_pull_back, self.conf_top_level_split) def produce_main_loop(self, switch_variant = False): @@ -192,8 +188,8 @@ class M3(MatcherBase): ctxt = CodeGenContext(self) if switch_variant: - print " ptr -= (iterBytes - dist);" - print " { " # need an extra scope around switch variant to stop its globals escaping + print " ptr -= (iterBytes - dist);" + print " { " # need an extra scope around switch variant to stop its globals escaping else: print " if (doMainLoop) {" print " for (; ptr + LOOP_READ_AHEAD < buf + len; ptr += iterBytes) {" @@ -349,25 +345,30 @@ class M3(MatcherBase): shift_expr = "%s = %s" % (state.name, state.type.shift_expr(state.name, shift_distance)) s = Template(""" - $TYPENAME s; - if (a->len_history) { - u32 tmp = getPreStartVal(a, $DOMAIN); - s = *((const $TYPENAME *)ft + tmp); - $SHIFT_EXPR; - } else { - s = *(const $TYPENAME *)&fdr->start; - } + $TYPENAME s; + if (a->len_history) { + u32 tmp = 0; + if (a->start_offset == 0) { + tmp = a->buf_history[a->len_history - 1]; + tmp |= (a->buf[0] << 8); + } else { + tmp = lv_u16(a->buf + a->start_offset - 1, a->buf, a->buf + a->len); + } + tmp &= fdr->domainMask; + s = *((const $TYPENAME *)ft + tmp); + $SHIFT_EXPR; + } else { + s = *(const $TYPENAME *)&fdr->start; + } """).substitute(TYPENAME = s_type.get_name(), ZERO_EXPR = s_type.zero_expression(), - DOMAIN = self.domain, SHIFT_EXPR = shift_expr) return s def produce_code(self): - (behind, ahead) = self.get_hash_safety_parameters() - loop_read_behind = behind - loop_read_ahead = self.loop_bytes + ahead + loop_read_behind = 0 + loop_read_ahead = self.loop_bytes + 1 # we set up mask and shift stuff for extracting our masks from registers # @@ -380,7 +381,7 @@ class M3(MatcherBase): ssb = self.state_type.size / 8 # state size in bytes # Intel path - if ssb == 16 and self.domain == 16: + if ssb == 16: # obscure corner - we don't have the room in the register to # do this for all values so we don't. domain==16 is pretty # bad anyhow, of course @@ -390,7 +391,6 @@ class M3(MatcherBase): shift_amts = { 1 : 0, 2 : 1, 4 : 2, 8 : 3, 16: 4 } self.reach_shift_adjust = shift_amts[ ssb/self.reach_mult ] - self.reach_mask = ((1 << self.domain) - 1) << self.reach_shift_adjust print self.produce_header(visible = False) @@ -398,21 +398,19 @@ class M3(MatcherBase): print " Arch: " + self.arch.name, print " State type: " + self.state_type.get_name(), print " Num buckets: %d" % self.num_buckets, - print " Domain: %d" % self.domain, print " Stride: %d" % self.stride print self.produce_common_declarations() - print - print "\tconst size_t tabSize = %d;" % self.table_size - print """ - const u8 * ft = (const u8 *)fdr + ROUNDUP_16(sizeof(struct FDR)); - const u32 * confBase = (const u32 *)(ft + tabSize); -""" + print " assert(fdr->domain > 8 && fdr->domain < 16);" + print + print " u64a domain_mask = fdr->domainMask;" + print " const u8 * ft = (const u8 *)fdr + ROUNDUP_16(sizeof(struct FDR));" + print " const u32 * confBase = (const u32 *)(ft + fdr->tabSize);" print self.produce_init_state() - print "\tconst size_t iterBytes = %d;" % self.loop_bytes - print "\tconst size_t START_MOD = %d;" % self.datasize_bytes - print "\tconst size_t LOOP_READ_AHEAD = %d;" % loop_read_ahead + print " const size_t iterBytes = %d;" % self.loop_bytes + print " const size_t START_MOD = %d;" % self.datasize_bytes + print " const size_t LOOP_READ_AHEAD = %d;" % loop_read_ahead print """ while (ptr < buf + len) { @@ -451,9 +449,9 @@ class M3(MatcherBase): print self.produce_footer() def get_name(self): - return "fdr_exec_%s_d%d_s%d_w%d" % (self.arch.name, self.domain, self.stride, self.state_width) + return "fdr_exec_%s_s%d_w%d" % (self.arch.name, self.stride, self.state_width) - def __init__(self, state_width, domain, stride, + def __init__(self, state_width, stride, arch, table_state_width = None, num_buckets = 8, @@ -474,17 +472,9 @@ class M3(MatcherBase): self.table_state_width = state_width self.table_state_type = getRequiredType(self.table_state_width) - # domain is the number of bits that we draw from our input to - # index our 'reach' table - if not 8 <= domain <= 16: - fail_out("Unsupported domain: %d" % domain) - self.domain = domain - # this is the load type required for this domain if we want to + # this is the load type required for domain [9:15] if we want to # load it one at a time - self.single_load_type = getRequiredType(self.domain) - - # table size - self.table_size = 2**domain * table_state_width // 8 + self.single_load_type = IntegerType(16) # stride is the frequency with which we make data-driven # accesses to our reach table diff --git a/src/fdr/fdr_compile.cpp b/src/fdr/fdr_compile.cpp index 8d658ccd..ccf17626 100644 --- a/src/fdr/fdr_compile.cpp +++ b/src/fdr/fdr_compile.cpp @@ -184,6 +184,13 @@ aligned_unique_ptr FDRCompiler::setupFDR(pair link) { ptr += floodControlTmp.second; aligned_free(floodControlTmp.first); + /* we are allowing domains 9 to 15 only */ + assert(eng.bits > 8 && eng.bits < 16); + fdr->domain = eng.bits; + fdr->schemeWidthByte = eng.schemeWidth / 8; + fdr->domainMask = (1 << eng.bits) - 1; + fdr->tabSize = (1 << eng.bits) * fdr->schemeWidthByte; + if (link.first) { fdr->link = verify_u32(ptr - fdr_base); memcpy(ptr, link.first, link.second); @@ -245,6 +252,8 @@ void FDRCompiler::assignStringsToBuckets() { typedef pair SCORE_INDEX_PAIR; u32 ls = verify_u32(lits.size()); + assert(ls); // Shouldn't be called with no literals. + // make a vector that contains our literals as pointers or u32 LiteralIndex values vector vli; vli.resize(ls); @@ -292,6 +301,8 @@ void FDRCompiler::assignStringsToBuckets() { currentChunk++; } } + + assert(currentChunk > 0); count[currentChunk - 1] = ls - chunkStartID; // close off chunks with an empty row firstIds[currentChunk] = ls; @@ -383,12 +394,14 @@ bool getMultiEntriesAtPosition(const FDREngineDescription &eng, const vector &lits, SuffixPositionInString pos, std::map > &m2) { + assert(eng.bits < 32); + u32 distance = 0; if (eng.bits <= 8) { distance = 1; } else if (eng.bits <= 16) { distance = 2; - } else if (eng.bits <= 32) { + } else { distance = 4; } @@ -528,6 +541,11 @@ fdrBuildTableInternal(const vector &lits, bool make_small, return nullptr; } + // temporary hack for unit testing + if (hint != HINT_INVALID) { + des->bits = 9; + } + FDRCompiler fc(lits, *des, make_small); return fc.build(link); } diff --git a/src/fdr/fdr_dump.cpp b/src/fdr/fdr_dump.cpp index ae246270..158170c2 100644 --- a/src/fdr/fdr_dump.cpp +++ b/src/fdr/fdr_dump.cpp @@ -81,6 +81,7 @@ void fdrPrintStats(const FDR *fdr, FILE *f) { unique_ptr des = getFdrDescription(fdr->engineID); if (des) { + fprintf(f, " domain %u\n", des->bits); fprintf(f, " stride %u\n", des->stride); fprintf(f, " buckets %u\n", des->getNumBuckets()); fprintf(f, " width %u\n", des->schemeWidth); diff --git a/src/fdr/fdr_engine_description.cpp b/src/fdr/fdr_engine_description.cpp index 2a6fda79..5d470c7e 100644 --- a/src/fdr/fdr_engine_description.cpp +++ b/src/fdr/fdr_engine_description.cpp @@ -48,7 +48,7 @@ FDREngineDescription::FDREngineDescription(const FDREngineDef &def) : EngineDescription(def.id, targetByArchFeatures(def.cpu_features), def.numBuckets, def.confirmPullBackDistance, def.confirmTopLevelSplit), - schemeWidth(def.schemeWidth), stride(def.stride), bits(def.bits) {} + schemeWidth(def.schemeWidth), stride(def.stride), bits(0) {} u32 FDREngineDescription::getDefaultFloodSuffixLength() const { // rounding up, so that scheme width 32 and 6 buckets is 6 not 5! @@ -105,76 +105,83 @@ unique_ptr chooseEngine(const target_t &target, DEBUG_PRINTF("%zu lits, msl=%zu, desiredStride=%u\n", vl.size(), msl, desiredStride); - const FDREngineDescription *best = nullptr; + FDREngineDescription *best = nullptr; u32 best_score = 0; - for (size_t engineID = 0; engineID < allDescs.size(); engineID++) { - const FDREngineDescription &eng = allDescs[engineID]; - if (!eng.isValidOnTarget(target)) { - continue; - } - if (msl < eng.stride) { - continue; - } - - u32 score = 100; - - score -= absdiff(desiredStride, eng.stride); - - if (eng.stride <= desiredStride) { - score += eng.stride; - } - - u32 effLits = vl.size(); /* * desiredStride;*/ - u32 ideal; - if (effLits < eng.getNumBuckets()) { - if (eng.stride == 1) { - ideal = 8; - } else { - ideal = 10; + for (u32 domain = 9; domain <= 15; domain++) { + for (size_t engineID = 0; engineID < allDescs.size(); engineID++) { + // to make sure that domains >=14 have stride 1 according to origin + if (domain > 13 && engineID > 0) { + continue; + } + FDREngineDescription &eng = allDescs[engineID]; + if (!eng.isValidOnTarget(target)) { + continue; + } + if (msl < eng.stride) { + continue; } - } else if (effLits < 20) { - ideal = 10; - } else if (effLits < 100) { - ideal = 11; - } else if (effLits < 1000) { - ideal = 12; - } else if (effLits < 10000) { - ideal = 13; - } else { - ideal = 15; - } - if (ideal != 8 && eng.schemeWidth == 32) { - ideal += 1; - } + u32 score = 100; - if (make_small) { - ideal -= 2; - } + score -= absdiff(desiredStride, eng.stride); - if (eng.stride > 1) { - ideal++; - } + if (eng.stride <= desiredStride) { + score += eng.stride; + } - DEBUG_PRINTF("effLits %u\n", effLits); + u32 effLits = vl.size(); /* * desiredStride;*/ + u32 ideal; + if (effLits < eng.getNumBuckets()) { + if (eng.stride == 1) { + ideal = 8; + } else { + ideal = 10; + } + } else if (effLits < 20) { + ideal = 10; + } else if (effLits < 100) { + ideal = 11; + } else if (effLits < 1000) { + ideal = 12; + } else if (effLits < 10000) { + ideal = 13; + } else { + ideal = 15; + } - if (target.is_atom_class() && !make_small && effLits < 4000) { - /* Unless it is a very heavy case, we want to build smaller tables - * on lightweight machines due to their small caches. */ - ideal -= 2; - } + if (ideal != 8 && eng.schemeWidth == 32) { + ideal += 1; + } - score -= absdiff(ideal, eng.bits); + if (make_small) { + ideal -= 2; + } - DEBUG_PRINTF("fdr %u: width=%u, bits=%u, buckets=%u, stride=%u " - "-> score=%u\n", - eng.getID(), eng.schemeWidth, eng.bits, - eng.getNumBuckets(), eng.stride, score); + if (eng.stride > 1) { + ideal++; + } - if (!best || score > best_score) { - best = ŋ - best_score = score; + DEBUG_PRINTF("effLits %u\n", effLits); + + if (target.is_atom_class() && !make_small && effLits < 4000) { + /* Unless it is a very heavy case, we want to build smaller tables + * on lightweight machines due to their small caches. */ + ideal -= 2; + } + + score -= absdiff(ideal, domain); + + DEBUG_PRINTF("fdr %u: width=%u, bits=%u, buckets=%u, stride=%u " + "-> score=%u\n", + eng.getID(), eng.schemeWidth, eng.bits, + eng.getNumBuckets(), eng.stride, score); + + if (!best || score > best_score) { + eng.bits = domain; + best = ŋ + best_score = score; + } } } diff --git a/src/fdr/fdr_engine_description.h b/src/fdr/fdr_engine_description.h index d936095b..45f64ac0 100644 --- a/src/fdr/fdr_engine_description.h +++ b/src/fdr/fdr_engine_description.h @@ -43,7 +43,6 @@ struct FDREngineDef { u32 schemeWidth; u32 numBuckets; u32 stride; - u32 bits; u64a cpu_features; u32 confirmPullBackDistance; u32 confirmTopLevelSplit; diff --git a/src/fdr/fdr_internal.h b/src/fdr/fdr_internal.h index 6c722777..607e039c 100644 --- a/src/fdr/fdr_internal.h +++ b/src/fdr/fdr_internal.h @@ -76,9 +76,11 @@ struct FDR { * structures (spillover strings and hash table) if we're a secondary * structure. */ u32 link; + u8 domain; /* dynamic domain info */ + u8 schemeWidthByte; /* scheme width in bytes */ + u16 domainMask; /* pre-computed domain mask */ + u32 tabSize; /* pre-computed hashtable size in bytes */ u32 pad1; - u32 pad2; - u32 pad3; union { u32 s_u32; diff --git a/src/nfa/castlecompile.cpp b/src/nfa/castlecompile.cpp index 5ea0e873..e5cc9267 100644 --- a/src/nfa/castlecompile.cpp +++ b/src/nfa/castlecompile.cpp @@ -58,11 +58,13 @@ #include using namespace std; +using boost::adaptors::map_keys; using boost::adaptors::map_values; namespace ue2 { #define CASTLE_MAX_TOPS 32 +#define CLIQUE_GRAPH_MAX_SIZE 1000 static u32 depth_to_u32(const depth &d) { @@ -106,51 +108,35 @@ void writeCastleScanEngine(const CharReach &cr, Castle *c) { } static -size_t literalOverlap(const vector &a, const vector &b) { +bool literalOverlap(const vector &a, const vector &b, + const size_t dist) { for (size_t i = 0; i < b.size(); i++) { + if (i > dist) { + return true; + } size_t overlap_len = b.size() - i; if (overlap_len <= a.size()) { if (matches(a.end() - overlap_len, a.end(), b.begin(), b.end() - i)) { - return i; + return false; } } else { assert(overlap_len > a.size()); if (matches(a.begin(), a.end(), b.end() - i - a.size(), b.end() - i)) { - return i; + return false; } } } - return b.size(); + return b.size() > dist; } -// UE-2666 case 1: The problem of find largest exclusive subcastles group -// can be reformulated as finding the largest clique (subgraph where every -// vertex is connected to every other vertex) in the graph. We use an -// approximate algorithm here to find the maximum clique. -// References -// ---------- -// [1] Boppana, R., & Halldórsson, M. M. (1992). -// Approximating maximum independent sets by excluding subgraphs. -// BIT Numerical Mathematics, 32(2), 180–196. Springer. -// doi:10.1007/BF01994876 -// ---------- - struct CliqueVertexProps { CliqueVertexProps() {} explicit CliqueVertexProps(u32 state_in) : stateId(state_in) {} u32 stateId = ~0U; - u32 parentId = ~0U; - bool leftChild = false; /* tells us if it is the left child of its parent */ - bool rightChildVisited = false; /* tells us if its right child is visited */ - - vector clique1; /* clique for the left branch */ - vector indepSet1; /* independent set for the left branch */ - vector clique2; /* clique for the right branch */ - vector indepSet2; /* independent set for the right branch */ }; typedef boost::adjacency_list makeCG(const vector> &exclusiveSet) { - u32 size = exclusiveSet.size(); - - vector vertices; - unique_ptr cg = make_unique(); - for (u32 i = 0; i < size; ++i) { - CliqueVertex v = add_vertex(CliqueVertexProps(i), *cg); - vertices.push_back(v); - } - - // construct the complement graph, then its maximum independent sets - // are equal to the maximum clique of the original graph - for (u32 i = 0; i < size; ++i) { - CliqueVertex s = vertices[i]; - vector complement(size, 0); - for (u32 j = 0; j < exclusiveSet[i].size(); ++j) { - u32 val = exclusiveSet[i][j]; - complement[val] = 1; - } - - for (u32 k = i + 1; k < size; ++k) { - if (!complement[k]) { - CliqueVertex d = vertices[k]; - add_edge(s, d, *cg); - } - } - } - return cg; -} - -static -CliqueGraph createSubgraph(const CliqueGraph &cg, - const vector &vertices) { - CliqueGraph g; - map vertexMap; - for (auto u : vertices) { - u32 id = cg[u].stateId; - CliqueVertex v = add_vertex(CliqueVertexProps(id), g); - vertexMap[id] = v; - } - - set found; - for (auto u : vertices) { - u32 srcId = cg[u].stateId; - CliqueVertex src = vertexMap[srcId]; - found.insert(srcId); - for (auto n : adjacent_vertices_range(u, cg)) { - u32 dstId = cg[n].stateId; - if (found.find(dstId) == found.end() && - vertexMap.find(dstId) != vertexMap.end()) { - CliqueVertex dst = vertexMap[dstId]; - add_edge(src, dst, g); - } - } - } - return g; -} - -static -void getNeighborInfo(const CliqueGraph &g, vector &neighbor, - vector &nonneighbor, - const CliqueVertex &cv) { +void getNeighborInfo(const CliqueGraph &g, vector &neighbor, + const CliqueVertex &cv, const set &group) { u32 id = g[cv].stateId; ue2::unordered_set neighborId; // find neighbors for cv - for (auto v : adjacent_vertices_range(cv, g)) { - neighbor.push_back(v); - neighborId.insert(g[v].stateId); - } - - // find non-neighbors for cv - for (auto v : vertices_range(g)) { - if (g[v].stateId != id && - neighborId.find(g[v].stateId) == neighborId.end()) { - nonneighbor.push_back(v); + for (const auto &v : adjacent_vertices_range(cv, g)) { + if (g[v].stateId != id && contains(group, g[v].stateId)){ + neighbor.push_back(g[v].stateId); + neighborId.insert(g[v].stateId); + DEBUG_PRINTF("Neighbor:%u\n", g[v].stateId); } } } static -void updateCliqueInfo(CliqueGraph &cg, const CliqueVertex &n, - vector &clique, vector &indepSet) { - u32 id = cg[n].stateId; - if (cg[n].clique1.size() + 1 > cg[n].clique2.size()) { - cg[n].clique1.push_back(id); - clique.swap(cg[n].clique1); - } else { - clique.swap(cg[n].clique2); - } +void findCliqueGroup(CliqueGraph &cg, vector &clique) { + stack> gStack; - if (cg[n].indepSet2.size() + 1 > cg[n].indepSet1.size()) { - cg[n].indepSet2.push_back(id); - indepSet.swap(cg[n].indepSet2); - } else { - indepSet.swap(cg[n].indepSet1); - } -} - -static -void findCliqueGroup(CliqueGraph &cg, vector &clique, - vector &indepSet) { - stack gStack; - gStack.push(cg); - - // create mapping between vertex and id + // Create mapping between vertex and id map vertexMap; - for (auto v : vertices_range(cg)) { + vector init; + for (const auto &v : vertices_range(cg)) { vertexMap[cg[v].stateId] = v; + init.push_back(cg[v].stateId); } + gStack.push(init); - // get the vertex to start from - ue2::unordered_set foundVertexId; + // Get the vertex to start from CliqueGraph::vertex_iterator vi, ve; tie(vi, ve) = vertices(cg); - CliqueVertex start = *vi; - u32 startId = cg[start].stateId; - - bool leftChild = false; - u32 prevId = startId; while (!gStack.empty()) { - CliqueGraph g = gStack.top(); + vector g = gStack.top(); gStack.pop(); - // choose a vertex from the graph - tie(vi, ve) = vertices(g); - CliqueVertex cv = *vi; - u32 id = g[cv].stateId; - - // corresponding vertex in the original graph - CliqueVertex n = vertexMap.at(id); - - vector neighbor; - vector nonneighbor; - getNeighborInfo(g, neighbor, nonneighbor, cv); - - if (foundVertexId.find(id) != foundVertexId.end()) { - prevId = id; - // get graph consisting of non-neighbors for right branch - if (!cg[n].rightChildVisited) { - gStack.push(g); - if (!nonneighbor.empty()) { - const CliqueGraph &nSub = createSubgraph(g, nonneighbor); - gStack.push(nSub); - leftChild = false; - } - cg[n].rightChildVisited = true; - } else if (id != startId) { - // both the left and right branches are visited, - // update its parent's clique and independent sets - u32 parentId = cg[n].parentId; - CliqueVertex parent = vertexMap.at(parentId); - if (cg[n].leftChild) { - updateCliqueInfo(cg, n, cg[parent].clique1, - cg[parent].indepSet1); - } else { - updateCliqueInfo(cg, n, cg[parent].clique2, - cg[parent].indepSet2); - } - } - } else { - foundVertexId.insert(id); - g[n].leftChild = leftChild; - g[n].parentId = prevId; - gStack.push(g); - // get graph consisting of neighbors for left branch - if (!neighbor.empty()) { - const CliqueGraph &sub = createSubgraph(g, neighbor); - gStack.push(sub); - leftChild = true; - } - prevId = id; + // Choose a vertex from the graph + u32 id = g[0]; + const CliqueVertex &n = vertexMap.at(id); + clique.push_back(id); + // Corresponding vertex in the original graph + vector neighbor; + set subgraphId(g.begin(), g.end()); + getNeighborInfo(cg, neighbor, n, subgraphId); + // Get graph consisting of neighbors for left branch + if (!neighbor.empty()) { + gStack.push(neighbor); } } - updateCliqueInfo(cg, start, clique, indepSet); } template @@ -345,18 +204,17 @@ bool graph_empty(const Graph &g) { static vector removeClique(CliqueGraph &cg) { vector> cliquesVec(1); - vector> indepSetsVec(1); DEBUG_PRINTF("graph size:%lu\n", num_vertices(cg)); - findCliqueGroup(cg, cliquesVec[0], indepSetsVec[0]); + findCliqueGroup(cg, cliquesVec[0]); while (!graph_empty(cg)) { const vector &c = cliquesVec.back(); vector dead; - for (auto v : vertices_range(cg)) { + for (const auto &v : vertices_range(cg)) { if (find(c.begin(), c.end(), cg[v].stateId) != c.end()) { dead.push_back(v); } } - for (auto v : dead) { + for (const auto &v : dead) { clear_vertex(v, cg); remove_vertex(v, cg); } @@ -364,30 +222,22 @@ vector removeClique(CliqueGraph &cg) { break; } vector clique; - vector indepSet; - findCliqueGroup(cg, clique, indepSet); + findCliqueGroup(cg, clique); cliquesVec.push_back(clique); - indepSetsVec.push_back(indepSet); } // get the independent set with max size size_t max = 0; size_t id = 0; - for (size_t j = 0; j < indepSetsVec.size(); ++j) { - if (indepSetsVec[j].size() > max) { - max = indepSetsVec[j].size(); + for (size_t j = 0; j < cliquesVec.size(); ++j) { + if (cliquesVec[j].size() > max) { + max = cliquesVec[j].size(); id = j; } } - DEBUG_PRINTF("clique size:%lu\n", indepSetsVec[id].size()); - return indepSetsVec[id]; -} - -static -vector findMaxClique(const vector> &exclusiveSet) { - auto cg = makeCG(exclusiveSet); - return removeClique(*cg); + DEBUG_PRINTF("clique size:%lu\n", cliquesVec[id].size()); + return cliquesVec[id]; } // if the location of any reset character in one literal are after @@ -401,10 +251,10 @@ bool findExclusivePair(const u32 id1, const u32 id2, const auto &triggers2 = triggers[id2]; for (u32 i = 0; i < triggers1.size(); ++i) { for (u32 j = 0; j < triggers2.size(); ++j) { - size_t max_overlap1 = literalOverlap(triggers1[i], triggers2[j]); - size_t max_overlap2 = literalOverlap(triggers2[j], triggers1[i]); - if (max_overlap1 <= min_reset_dist[id2][j] || - max_overlap2 <= min_reset_dist[id1][i]) { + if (!literalOverlap(triggers1[i], triggers2[j], + min_reset_dist[id2][j]) || + !literalOverlap(triggers2[j], triggers1[i], + min_reset_dist[id1][i])) { return false; } } @@ -420,28 +270,33 @@ vector checkExclusion(const CharReach &cr, return group; } - vector > min_reset_dist; + vector> min_reset_dist; // get min reset distance for each repeat for (auto it = triggers.begin(); it != triggers.end(); it++) { const vector &tmp_dist = minResetDistToEnd(*it, cr); min_reset_dist.push_back(tmp_dist); } - vector> exclusiveSet; + vector vertices; + unique_ptr cg = make_unique(); + for (u32 i = 0; i < triggers.size(); ++i) { + CliqueVertex v = add_vertex(CliqueVertexProps(i), *cg); + vertices.push_back(v); + } + // find exclusive pair for each repeat for (u32 i = 0; i < triggers.size(); ++i) { - vector repeatIds; + CliqueVertex s = vertices[i]; for (u32 j = i + 1; j < triggers.size(); ++j) { if (findExclusivePair(i, j, min_reset_dist, triggers)) { - repeatIds.push_back(j); + CliqueVertex d = vertices[j]; + add_edge(s, d, *cg); } } - exclusiveSet.push_back(repeatIds); - DEBUG_PRINTF("Exclusive pair size:%lu\n", repeatIds.size()); } // find the largest exclusive group - return findMaxClique(exclusiveSet); + return removeClique(*cg); } static @@ -599,7 +454,7 @@ buildCastle(const CastleProto &proto, repeatInfoPair.push_back(make_pair(min_period, is_reset)); - if (is_reset) { + if (is_reset && candidateRepeats.size() < CLIQUE_GRAPH_MAX_SIZE) { candidateTriggers.push_back(triggers.at(top)); candidateRepeats.push_back(i); } @@ -608,7 +463,7 @@ buildCastle(const CastleProto &proto, // Case 1: exclusive repeats bool exclusive = false; bool pureExclusive = false; - u8 activeIdxSize = 0; + u32 activeIdxSize = 0; set exclusiveGroup; if (cc.grey.castleExclusive) { vector tmpGroup = checkExclusion(cr, candidateTriggers); @@ -617,7 +472,7 @@ buildCastle(const CastleProto &proto, // Case 1: mutual exclusive repeats group found, initialize state // sizes exclusive = true; - activeIdxSize = calcPackedBytes(exclusiveSize); + activeIdxSize = calcPackedBytes(numRepeats + 1); if (exclusiveSize == numRepeats) { pureExclusive = true; streamStateSize = 0; @@ -665,7 +520,7 @@ buildCastle(const CastleProto &proto, c->numRepeats = verify_u32(subs.size()); c->exclusive = exclusive; c->pureExclusive = pureExclusive; - c->activeIdxSize = activeIdxSize; + c->activeIdxSize = verify_u8(activeIdxSize); writeCastleScanEngine(cr, c); @@ -710,8 +565,8 @@ buildCastle(const CastleProto &proto, set all_reports(const CastleProto &proto) { set reports; - for (const PureRepeat &pr : proto.repeats | map_values) { - reports.insert(pr.reports.begin(), pr.reports.end()); + for (const ReportID &report : proto.report_map | map_keys) { + reports.insert(report); } return reports; } @@ -732,10 +587,30 @@ depth findMaxWidth(const CastleProto &proto) { return max_width; } +depth findMinWidth(const CastleProto &proto, u32 top) { + if (!contains(proto.repeats, top)) { + assert(0); // should not happen + return depth::infinity(); + } + return proto.repeats.at(top).bounds.min; +} + +depth findMaxWidth(const CastleProto &proto, u32 top) { + if (!contains(proto.repeats, top)) { + assert(0); // should not happen + return depth(0); + } + return proto.repeats.at(top).bounds.max; +} + CastleProto::CastleProto(const PureRepeat &pr) { assert(pr.reach.any()); assert(pr.reports.size() == 1); - repeats.insert(make_pair(0, pr)); + u32 top = 0; + repeats.emplace(top, pr); + for (const auto &report : pr.reports) { + report_map[report].insert(top); + } } const CharReach &CastleProto::reach() const { @@ -743,25 +618,29 @@ const CharReach &CastleProto::reach() const { return repeats.begin()->second.reach; } -static -u32 find_next_top(const map &repeats) { - u32 top = 0; - for (; contains(repeats, top); top++) { - // pass - } - return top; -} - u32 CastleProto::add(const PureRepeat &pr) { assert(repeats.size() < max_occupancy); assert(pr.reach == reach()); assert(pr.reports.size() == 1); - u32 top = find_next_top(repeats); + u32 top = next_top++; DEBUG_PRINTF("selected unused top %u\n", top); - repeats.insert(make_pair(top, pr)); + assert(!contains(repeats, top)); + repeats.emplace(top, pr); + for (const auto &report : pr.reports) { + report_map[report].insert(top); + } return top; } +void CastleProto::erase(u32 top) { + DEBUG_PRINTF("erase top %u\n", top); + assert(contains(repeats, top)); + repeats.erase(top); + for (auto &m : report_map) { + m.second.erase(top); + } +} + u32 CastleProto::merge(const PureRepeat &pr) { assert(repeats.size() <= max_occupancy); assert(pr.reach == reach()); @@ -806,8 +685,7 @@ bool mergeCastle(CastleProto &c1, const CastleProto &c2, const u32 top = m.first; const PureRepeat &pr = m.second; DEBUG_PRINTF("top %u\n", top); - u32 new_top = find_next_top(c1.repeats); - c1.repeats.insert(make_pair(new_top, pr)); + u32 new_top = c1.add(pr); top_map[top] = new_top; DEBUG_PRINTF("adding repeat: map %u->%u\n", top, new_top); } @@ -823,12 +701,23 @@ void remapCastleTops(CastleProto &proto, map &top_map) { for (const auto &m : proto.repeats) { const u32 top = m.first; const PureRepeat &pr = m.second; - u32 new_top = find_next_top(out); - out.insert(make_pair(new_top, pr)); + u32 new_top = out.size(); + out.emplace(new_top, pr); top_map[top] = new_top; } proto.repeats.swap(out); + + // Remap report map. + proto.report_map.clear(); + for (const auto &m : proto.repeats) { + const u32 top = m.first; + const PureRepeat &pr = m.second; + for (const auto &report : pr.reports) { + proto.report_map[report].insert(top); + } + } + assert(proto.repeats.size() <= proto.max_occupancy); } @@ -904,18 +793,17 @@ bool is_equal(const CastleProto &c1, const CastleProto &c2) { return c1.repeats == c2.repeats; } -bool requiresDedupe(const CastleProto &proto, const set &reports) { - ue2::unordered_set seen; - for (const PureRepeat &pr : proto.repeats | map_values) { - for (const ReportID &report : pr.reports) { - if (contains(reports, report)) { - if (contains(seen, report)) { - DEBUG_PRINTF("castle proto %p has dupe report %u\n", &proto, - report); - return true; - } - seen.insert(report); - } +bool requiresDedupe(const CastleProto &proto, + const ue2::flat_set &reports) { + for (const auto &report : reports) { + auto it = proto.report_map.find(report); + if (it == end(proto.report_map)) { + continue; + } + if (it->second.size() > 1) { + DEBUG_PRINTF("castle proto %p has dupe report %u\n", &proto, + report); + return true; } } return false; diff --git a/src/nfa/castlecompile.h b/src/nfa/castlecompile.h index fbafb606..fc4bb991 100644 --- a/src/nfa/castlecompile.h +++ b/src/nfa/castlecompile.h @@ -38,6 +38,7 @@ #include "nfagraph/ng_repeat.h" #include "util/alloc.h" #include "util/depth.h" +#include "util/ue2_containers.h" #include #include @@ -67,8 +68,12 @@ struct CastleProto { explicit CastleProto(const PureRepeat &pr); const CharReach &reach() const; + /** \brief Add a new repeat. */ u32 add(const PureRepeat &pr); + /** \brief Remove a repeat. */ + void erase(u32 top); + /** * \brief Merge in the given repeat, returning the top used. * @@ -80,11 +85,22 @@ struct CastleProto { /** \brief Mapping from unique top id to repeat. */ std::map repeats; + + /** \brief Mapping from report to associated tops. */ + ue2::unordered_map> report_map; + + /** + * \brief Next top id to use. Repeats may be removed without top remapping, + * so we track this explicitly instead of using repeats.size(). + */ + u32 next_top = 1; }; std::set all_reports(const CastleProto &proto); depth findMinWidth(const CastleProto &proto); depth findMaxWidth(const CastleProto &proto); +depth findMinWidth(const CastleProto &proto, u32 top); +depth findMaxWidth(const CastleProto &proto, u32 top); /** * \brief Remap tops to be contiguous. @@ -133,7 +149,8 @@ bool is_equal(const CastleProto &c1, const CastleProto &c2); * \brief True if the given castle contains more than a single instance of any * of the reports in the given set. */ -bool requiresDedupe(const CastleProto &proto, const std::set &reports); +bool requiresDedupe(const CastleProto &proto, + const ue2::flat_set &reports); /** * \brief Build an NGHolder from a CastleProto. diff --git a/src/nfa/goughcompile.cpp b/src/nfa/goughcompile.cpp index 92d09aa5..d735c80a 100644 --- a/src/nfa/goughcompile.cpp +++ b/src/nfa/goughcompile.cpp @@ -1109,7 +1109,7 @@ aligned_unique_ptr goughCompile(raw_som_dfa &raw, u8 somPrecision, u32 total_prog_size = byte_length(temp_blocks); curr_offset += total_prog_size; - gi.stream_som_loc_count = slot_count; + gi.stream_som_loc_count = slot_count; gi.stream_som_loc_width = somPrecision; u32 gough_size = ROUNDUP_N(curr_offset, 16); @@ -1136,16 +1136,11 @@ aligned_unique_ptr goughCompile(raw_som_dfa &raw, u8 somPrecision, gough_dfa->length = gough_size; /* copy in blocks */ - memcpy((u8 *)gough_dfa.get() + edge_prog_offset, &edge_blocks[0], - byte_length(edge_blocks)); + copy_bytes((u8 *)gough_dfa.get() + edge_prog_offset, edge_blocks); if (top_prog_offset) { - memcpy((u8 *)gough_dfa.get() + top_prog_offset, &top_blocks[0], - byte_length(top_blocks)); - } - if (!temp_blocks.empty()) { - memcpy((u8 *)gough_dfa.get() + prog_base_offset, &temp_blocks[0], - byte_length(temp_blocks)); + copy_bytes((u8 *)gough_dfa.get() + top_prog_offset, top_blocks); } + copy_bytes((u8 *)gough_dfa.get() + prog_base_offset, temp_blocks); return gough_dfa; } diff --git a/src/nfa/goughcompile.h b/src/nfa/goughcompile.h index b575c75b..9da983d4 100644 --- a/src/nfa/goughcompile.h +++ b/src/nfa/goughcompile.h @@ -70,8 +70,11 @@ struct dstate_som { }; struct raw_som_dfa : public raw_dfa { - raw_som_dfa(nfa_kind k, bool unordered_som_triggers_in) - : raw_dfa(k), unordered_som_triggers(unordered_som_triggers_in) { + raw_som_dfa(nfa_kind k, bool unordered_som_triggers_in, u32 trigger, + u32 stream_som_loc_width_in) + : raw_dfa(k), stream_som_loc_width(stream_som_loc_width_in), + unordered_som_triggers(unordered_som_triggers_in), + trigger_nfa_state(trigger) { assert(!unordered_som_triggers || is_triggered(kind)); } diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index 5cf46334..a6c34cb6 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -1397,8 +1397,7 @@ struct Factory { repeat->horizon = rsi.horizon; repeat->packedCtrlSize = rsi.packedCtrlSize; repeat->stateSize = rsi.stateSize; - memcpy(repeat->packedFieldSizes, rsi.packedFieldSizes.data(), - byte_length(rsi.packedFieldSizes)); + copy_bytes(repeat->packedFieldSizes, rsi.packedFieldSizes); repeat->patchCount = rsi.patchCount; repeat->patchSize = rsi.patchSize; repeat->encodingSize = rsi.encodingSize; @@ -1413,8 +1412,7 @@ struct Factory { // Copy in the sparse lookup table. if (br.type == REPEAT_SPARSE_OPTIMAL_P) { assert(!rsi.table.empty()); - memcpy(info_ptr + tableOffset, rsi.table.data(), - byte_length(rsi.table)); + copy_bytes(info_ptr + tableOffset, rsi.table); } // Fill the tug mask. @@ -1702,6 +1700,7 @@ struct Factory { for (u32 i = 0; i < num_repeats; i++) { repeatOffsets[i] = offset; + assert(repeats[i].first); memcpy((char *)limex + offset, repeats[i].first.get(), repeats[i].second); offset += repeats[i].second; @@ -1709,8 +1708,7 @@ struct Factory { // Write repeat offset lookup table. assert(ISALIGNED_N((char *)limex + repeatOffsetsOffset, alignof(u32))); - memcpy((char *)limex + repeatOffsetsOffset, repeatOffsets.data(), - byte_length(repeatOffsets)); + copy_bytes((char *)limex + repeatOffsetsOffset, repeatOffsets); limex->repeatOffset = repeatOffsetsOffset; limex->repeatCount = num_repeats; @@ -1725,8 +1723,7 @@ struct Factory { limex->exReportOffset = exceptionReportsOffset; assert(ISALIGNED_N((char *)limex + exceptionReportsOffset, alignof(ReportID))); - memcpy((char *)limex + exceptionReportsOffset, reports.data(), - byte_length(reports)); + copy_bytes((char *)limex + exceptionReportsOffset, reports); } static diff --git a/src/nfa/limex_dump.cpp b/src/nfa/limex_dump.cpp index b836b63b..084f35dd 100644 --- a/src/nfa/limex_dump.cpp +++ b/src/nfa/limex_dump.cpp @@ -317,7 +317,7 @@ template struct limex_labeller : public nfa_labeller { explicit limex_labeller(const limex_type *limex_in) : limex(limex_in) {} - void label_state(FILE *f, u32 state) const { + void label_state(FILE *f, u32 state) const override { const typename limex_traits::exception_type *exceptions = getExceptionTable(limex); if (!testbit((const u8 *)&limex->exceptionMask, diff --git a/src/nfa/limex_exceptional.h b/src/nfa/limex_exceptional.h index dcc302d4..26c5e5a5 100644 --- a/src/nfa/limex_exceptional.h +++ b/src/nfa/limex_exceptional.h @@ -218,7 +218,8 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ, if (EQ_STATE(estate, LOAD_STATE(&ctx->cached_estate))) { DEBUG_PRINTF("using cached succ from previous state\n"); STORE_STATE(succ, OR_STATE(LOAD_STATE(succ), LOAD_STATE(&ctx->cached_esucc))); - if (ctx->cached_reports) { + if (ctx->cached_reports && (flags & CALLBACK_OUTPUT)) { + DEBUG_PRINTF("firing cached reports from previous state\n"); if (unlikely(limexRunReports(ctx->cached_reports, ctx->callback, ctx->context, offset) == MO_HALT_MATCHING)) { diff --git a/src/nfa/limex_native.c b/src/nfa/limex_native.c index 675c18bd..471e4bf0 100644 --- a/src/nfa/limex_native.c +++ b/src/nfa/limex_native.c @@ -83,7 +83,8 @@ int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ, if (estate == ctx->cached_estate) { DEBUG_PRINTF("using cached succ from previous state\n"); *succ |= ctx->cached_esucc; - if (ctx->cached_reports) { + if (ctx->cached_reports && (flags & CALLBACK_OUTPUT)) { + DEBUG_PRINTF("firing cached reports from previous state\n"); if (unlikely(limexRunReports(ctx->cached_reports, ctx->callback, ctx->context, offset) == MO_HALT_MATCHING)) { @@ -119,7 +120,9 @@ int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ, ctx->cached_reports = new_cache.reports; ctx->cached_br = new_cache.br; } else if (cacheable == DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES) { - ctx->cached_estate = 0U; + if (ctx->cached_br) { + ctx->cached_estate = 0U; + } } return 0; diff --git a/src/nfa/limex_runtime_impl.h b/src/nfa/limex_runtime_impl.h index 611fe747..6ef3bae9 100644 --- a/src/nfa/limex_runtime_impl.h +++ b/src/nfa/limex_runtime_impl.h @@ -179,7 +179,6 @@ char STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, assert(ISALIGNED_CL(ctx)); assert(ISALIGNED_CL(&ctx->s)); STATE_T s = LOAD_STATE(&ctx->s); - STORE_STATE(&ctx->cached_estate, ZERO_STATE); /* TODO: understand why this is required */ /* assert(ISALIGNED_16(exceptions)); */ /* assert(ISALIGNED_16(reach)); */ @@ -305,7 +304,6 @@ char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, const ReportID *exReports = getExReports(limex); const u32 *exceptionMap = limex->exceptionMap; STATE_T s = LOAD_STATE(&ctx->s); - STORE_STATE(&ctx->cached_estate, ZERO_STATE); /* TODO: understand why this is required */ /* assert(ISALIGNED_16(exceptions)); */ /* assert(ISALIGNED_16(reach)); */ @@ -542,7 +540,6 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) { ctx->callback = q->cb; ctx->context = q->context; STORE_STATE(&ctx->cached_estate, ZERO_STATE); - STORE_STATE(&ctx->cached_esucc, ZERO_STATE); assert(q->items[q->cur].location >= 0); DEBUG_PRINTF("LOAD STATE\n"); @@ -638,7 +635,6 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) { ctx->callback = q->cb; ctx->context = q->context; STORE_STATE(&ctx->cached_estate, ZERO_STATE); - STORE_STATE(&ctx->cached_esucc, ZERO_STATE); DEBUG_PRINTF("LOAD STATE\n"); STORE_STATE(&ctx->s, LOAD_STATE(q->state)); @@ -730,7 +726,6 @@ char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q, ctx->callback = NULL; ctx->context = NULL; STORE_STATE(&ctx->cached_estate, ZERO_STATE); - STORE_STATE(&ctx->cached_esucc, ZERO_STATE); DEBUG_PRINTF("LOAD STATE\n"); STORE_STATE(&ctx->s, LOAD_STATE(q->state)); @@ -833,7 +828,6 @@ char JOIN(LIMEX_API_ROOT, _B_Reverse)(const struct NFA *n, u64a offset, ctx->callback = cb; ctx->context = context; STORE_STATE(&ctx->cached_estate, ZERO_STATE); - STORE_STATE(&ctx->cached_esucc, ZERO_STATE); const IMPL_NFA_T *limex = getImplNfa(n); STORE_STATE(&ctx->s, INITIAL_FN(limex, 0)); // always anchored diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index 39d29359..f75d08b5 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -700,7 +700,10 @@ aligned_unique_ptr mcclellanCompile16(dfa_info &info, ReportID arb; u8 single; u32 accelCount; + u8 alphaShift = info.getAlphaShift(); + assert(alphaShift <= 8); + u16 count_real_states; if (allocateFSN16(info, &count_real_states)) { DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n", @@ -843,6 +846,7 @@ void fillInBasicState8(const dfa_info &info, mstate_aux *aux, u8 *succ_table, const vector &reports_eod, u32 i) { dstate_id_t j = info.implId(i); u8 alphaShift = info.getAlphaShift(); + assert(alphaShift <= 8); for (size_t s = 0; s < info.impl_alpha_size; s++) { dstate_id_t raw_succ = info.states[i].next[s]; diff --git a/src/nfa/mpv_dump.cpp b/src/nfa/mpv_dump.cpp index b6e702ac..e731df87 100644 --- a/src/nfa/mpv_dump.cpp +++ b/src/nfa/mpv_dump.cpp @@ -70,9 +70,9 @@ void dumpKilo(FILE *f, const mpv *m, const mpv_kilopuff *k) { break; case MPV_VERM: if (!ourisprint(k->u.verm.c)) { - fprintf(f, "verm 0x%hhu\n", k->u.verm.c); + fprintf(f, "verm 0x%02x\n", k->u.verm.c); } else { - fprintf(f, "verm 0x%hhu '%c'\n", k->u.verm.c, k->u.verm.c); + fprintf(f, "verm 0x%02x '%c'\n", k->u.verm.c, k->u.verm.c); } break; case MPV_SHUFTI: @@ -87,9 +87,9 @@ void dumpKilo(FILE *f, const mpv *m, const mpv_kilopuff *k) { break; case MPV_NVERM: if (!ourisprint(k->u.verm.c)) { - fprintf(f, "nverm 0x%hhu\n", k->u.verm.c); + fprintf(f, "nverm 0x%02x\n", k->u.verm.c); } else { - fprintf(f, "nverm 0x%hhu '%c'\n", k->u.verm.c, k->u.verm.c); + fprintf(f, "nverm 0x%02x '%c'\n", k->u.verm.c, k->u.verm.c); } break; default: diff --git a/src/nfa/nfa_api_queue.h b/src/nfa/nfa_api_queue.h index d8079292..59c18fca 100644 --- a/src/nfa/nfa_api_queue.h +++ b/src/nfa/nfa_api_queue.h @@ -196,6 +196,14 @@ static really_inline s64a q_cur_loc(const struct mq *q) { return q->items[q->cur].location; } +/** \brief Returns the type of the last event in the queue. */ +static really_inline u32 q_last_type(const struct mq *q) { + assert(q->cur < q->end); + assert(q->end > 0); + assert(q->end <= MAX_MQE_LEN); + return q->items[q->end - 1].type; +} + /** \brief Returns the location (relative to the beginning of the current data * buffer) of the last event in the queue. */ static really_inline s64a q_last_loc(const struct mq *q) { @@ -269,7 +277,7 @@ void debugQueue(const struct mq *q) { type = "MQE_TOP_N"; break; } - DEBUG_PRINTF("\tq[%u] %lld %d:%s\n", cur, q->items[cur].location, + DEBUG_PRINTF("\tq[%u] %lld %u:%s\n", cur, q->items[cur].location, q->items[cur].type, type); } } diff --git a/src/nfa/repeat.c b/src/nfa/repeat.c index f84eed71..c1ff5162 100644 --- a/src/nfa/repeat.c +++ b/src/nfa/repeat.c @@ -39,6 +39,8 @@ #include "util/pack_bits.h" #include "util/partial_store.h" #include "util/unaligned.h" + +#include #include /** \brief Returns the total capacity of the ring. @@ -709,12 +711,7 @@ enum RepeatMatch repeatHasMatchRing(const struct RepeatInfo *info, dumpRing(info, xs, ring); #endif - // We work in terms of the distance between the current offset and the base - // offset in our history. - u64a delta = offset - xs->offset; - DEBUG_PRINTF("delta=%llu\n", delta); - - if (delta < info->repeatMin) { + if (offset - xs->offset < info->repeatMin) { DEBUG_PRINTF("haven't even seen repeatMin bytes yet!\n"); return REPEAT_NOMATCH; } @@ -724,17 +721,22 @@ enum RepeatMatch repeatHasMatchRing(const struct RepeatInfo *info, return REPEAT_STALE; } + // If we're not stale, delta fits in the range [repeatMin, lastTop + + // repeatMax], which fits in a u32. + assert(offset - xs->offset < UINT32_MAX); + u32 delta = (u32)(offset - xs->offset); + DEBUG_PRINTF("delta=%u\n", delta); + // Find the bounds on possible matches in the ring buffer. - u64a lower = delta > info->repeatMax ? delta - info->repeatMax : 0; - u64a upper = delta - info->repeatMin + 1; - upper = MIN(upper, ringOccupancy(xs, ringSize)); + u32 lower = delta > info->repeatMax ? delta - info->repeatMax : 0; + u32 upper = MIN(delta - info->repeatMin + 1, ringOccupancy(xs, ringSize)); if (lower >= upper) { DEBUG_PRINTF("no matches to check\n"); return REPEAT_NOMATCH; } - DEBUG_PRINTF("possible match indices=[%llu,%llu]\n", lower, upper); + DEBUG_PRINTF("possible match indices=[%u,%u]\n", lower, upper); if (ringHasMatch(xs, ring, ringSize, lower, upper)) { return REPEAT_MATCH; } @@ -1163,7 +1165,7 @@ static void storeInitialRingTopPatch(const struct RepeatInfo *info, struct RepeatRingControl *xs, u8 *state, u64a offset) { - DEBUG_PRINTF("set the first patch\n"); + DEBUG_PRINTF("set the first patch, offset=%llu\n", offset); xs->offset = offset; u8 *active = state; @@ -1197,12 +1199,10 @@ u32 getSparseOptimalTargetValue(const struct RepeatInfo *info, return loc; } -u64a repeatLastTopSparseOptimalP(const struct RepeatInfo *info, - const union RepeatControl *ctrl, - const void *state) { +static +u64a sparseLastTop(const struct RepeatInfo *info, + const struct RepeatRingControl *xs, const u8 *state) { DEBUG_PRINTF("looking for last top\n"); - const struct RepeatRingControl *xs = &ctrl->ring; - u32 patch_size = info->patchSize; u32 patch_count = info->patchCount; u32 encoding_size = info->encodingSize; @@ -1214,7 +1214,7 @@ u64a repeatLastTopSparseOptimalP(const struct RepeatInfo *info, } DEBUG_PRINTF("patch%u encoding_size%u occ%u\n", patch, encoding_size, occ); - const u8 *ring = (const u8 *)state + info->patchesOffset; + const u8 *ring = state + info->patchesOffset; u64a val = partial_load_u64a(ring + encoding_size * patch, encoding_size); DEBUG_PRINTF("val:%llu\n", val); @@ -1231,6 +1231,12 @@ u64a repeatLastTopSparseOptimalP(const struct RepeatInfo *info, return 0; } +u64a repeatLastTopSparseOptimalP(const struct RepeatInfo *info, + const union RepeatControl *ctrl, + const void *state) { + return sparseLastTop(info, &ctrl->ring, state); +} + u64a repeatNextMatchSparseOptimalP(const struct RepeatInfo *info, const union RepeatControl *ctrl, const void *state, u64a offset) { @@ -1249,20 +1255,20 @@ u64a repeatNextMatchSparseOptimalP(const struct RepeatInfo *info, if (nextOffset <= xs->offset + info->repeatMin) { patch = xs->first; tval = 0; - } else if (nextOffset > - repeatLastTopSparseOptimalP(info, ctrl, state) + - info->repeatMax) { + } else if (nextOffset > sparseLastTop(info, xs, state) + info->repeatMax) { + DEBUG_PRINTF("ring is stale\n"); return 0; } else { - u64a delta = nextOffset - xs->offset; - u64a lower = delta > info->repeatMax ? delta - info->repeatMax : 0; + assert(nextOffset - xs->offset < UINT32_MAX); // ring is not stale + u32 delta = (u32)(nextOffset - xs->offset); + u32 lower = delta > info->repeatMax ? delta - info->repeatMax : 0; patch = lower / patch_size; tval = lower - patch * patch_size; } DEBUG_PRINTF("patch %u\n", patch); u32 patch_count = info->patchCount; - if (patch >= patch_count){ + if (patch >= patch_count) { return 0; } @@ -1336,21 +1342,32 @@ void repeatStoreSparseOptimalP(const struct RepeatInfo *info, union RepeatControl *ctrl, void *state, u64a offset, char is_alive) { struct RepeatRingControl *xs = &ctrl->ring; - - u64a delta = offset - xs->offset; - u32 patch_size = info->patchSize; - u32 patch_count = info->patchCount; - u32 encoding_size = info->encodingSize; - u32 patch = delta / patch_size; - DEBUG_PRINTF("offset: %llu encoding_size: %u\n", offset, encoding_size); - u8 *active = (u8 *)state; - if (!is_alive) { + + DEBUG_PRINTF("offset: %llu encoding_size: %u\n", offset, + info->encodingSize); + + // If (a) this is the first top, or (b) the ring is stale, initialize the + // ring and write this offset in as the first top. + if (!is_alive || + offset > sparseLastTop(info, xs, state) + info->repeatMax) { storeInitialRingTopPatch(info, xs, active, offset); return; } - assert(offset >= xs->offset); + // Tops should arrive in order, with no duplicates. + assert(offset > sparseLastTop(info, xs, state)); + + // As the ring is not stale, our delta should fit within a u32. + assert(offset - xs->offset <= UINT32_MAX); + u32 delta = (u32)(offset - xs->offset); + u32 patch_size = info->patchSize; + u32 patch_count = info->patchCount; + u32 encoding_size = info->encodingSize; + u32 patch = delta / patch_size; + + DEBUG_PRINTF("delta=%u, patch_size=%u, patch=%u\n", delta, patch_size, + patch); u8 *ring = active + info->patchesOffset; u32 occ = ringOccupancy(xs, patch_count); @@ -1361,10 +1378,6 @@ void repeatStoreSparseOptimalP(const struct RepeatInfo *info, patch, patch_count, occ); if (patch >= patch_count) { u32 patch_shift_count = patch - patch_count + 1; - if (patch_shift_count >= patch_count) { - storeInitialRingTopPatch(info, xs, active, offset); - return; - } assert(patch >= patch_shift_count); DEBUG_PRINTF("shifting by %u\n", patch_shift_count); xs->offset += patch_size * patch_shift_count; @@ -1401,7 +1414,8 @@ void repeatStoreSparseOptimalP(const struct RepeatInfo *info, } } - u64a diff = delta - patch * patch_size; + assert((u64a)patch * patch_size <= delta); + u32 diff = delta - patch * patch_size; const u64a *repeatTable = getImplTable(info); val += repeatTable[diff]; @@ -1480,7 +1494,7 @@ char sparseHasMatch(const struct RepeatInfo *info, const u8 *state, enum RepeatMatch repeatHasMatchSparseOptimalP(const struct RepeatInfo *info, const union RepeatControl *ctrl, const void *state, u64a offset) { - DEBUG_PRINTF("check for match at %llu corresponding to trigger" + DEBUG_PRINTF("check for match at %llu corresponding to trigger " "at [%llu, %llu]\n", offset, offset - info->repeatMax, offset - info->repeatMin); @@ -1492,21 +1506,25 @@ enum RepeatMatch repeatHasMatchSparseOptimalP(const struct RepeatInfo *info, if (offset < xs->offset + info->repeatMin) { DEBUG_PRINTF("too soon\n"); return REPEAT_NOMATCH; - } else if (offset > repeatLastTopSparseOptimalP(info, ctrl, state) + - info->repeatMax) { + } else if (offset > sparseLastTop(info, xs, state) + info->repeatMax) { DEBUG_PRINTF("stale\n"); return REPEAT_STALE; } - u64a delta = offset - xs->offset; - u64a lower = delta > info->repeatMax ? delta - info->repeatMax : 0; - u64a upper = delta - info->repeatMin; + // Our delta between the base offset of the ring and the current offset + // must fit within the range [repeatMin, lastPossibleTop + repeatMax]. This + // range fits comfortably within a u32. + assert(offset - xs->offset <= UINT32_MAX); + + u32 delta = (u32)(offset - xs->offset); u32 patch_size = info->patchSize; u32 patch_count = info->patchCount; u32 occ = ringOccupancy(xs, patch_count); - upper = MIN(upper, occ * patch_size - 1); - DEBUG_PRINTF("lower=%llu, upper=%llu\n", lower, upper); + u32 lower = delta > info->repeatMax ? delta - info->repeatMax : 0; + u32 upper = MIN(delta - info->repeatMin, occ * patch_size - 1); + + DEBUG_PRINTF("lower=%u, upper=%u\n", lower, upper); u32 patch_lower = lower / patch_size; u32 patch_upper = upper / patch_size; diff --git a/src/nfa/repeatcompile.cpp b/src/nfa/repeatcompile.cpp index 25e57581..2f187503 100644 --- a/src/nfa/repeatcompile.cpp +++ b/src/nfa/repeatcompile.cpp @@ -75,8 +75,8 @@ u32 calcPackedBytes(u64a val) { } static -u64a repeatRecurTable(struct RepeatStateInfo *info, const depth &repeatMax, - const u32 minPeriod) { +u32 repeatRecurTable(struct RepeatStateInfo *info, const depth &repeatMax, + const u32 minPeriod) { u32 repeatTmp = info->patchCount > 2 ? 64 : (u32)repeatMax; u32 repeat_index = repeatTmp < minPeriod ? repeatTmp : minPeriod; for (u32 i = 0; i <= repeat_index; i++) { @@ -93,7 +93,7 @@ u64a repeatRecurTable(struct RepeatStateInfo *info, const depth &repeatMax, static u32 findOptimalPatchSize(struct RepeatStateInfo *info, const depth &repeatMax, - const u32 minPeriod, u64a rv) { + const u32 minPeriod, u32 rv) { u32 cnt = 0; u32 patch_bits = 0; u32 total_size = 0; @@ -171,7 +171,7 @@ RepeatStateInfo::RepeatStateInfo(enum RepeatType type, const depth &repeatMin, assert(minPeriod); assert(repeatMax.is_finite()); { - u64a rv = repeatRecurTable(this, repeatMax, minPeriod); + u32 rv = repeatRecurTable(this, repeatMax, minPeriod); u32 repeatTmp = 0; if ((u32)repeatMax < minPeriod) { repeatTmp = repeatMax; diff --git a/src/nfagraph/ng.h b/src/nfagraph/ng.h index fe78254c..52353da9 100644 --- a/src/nfagraph/ng.h +++ b/src/nfagraph/ng.h @@ -64,7 +64,7 @@ public: bool prefilter, const som_type som, ReportID rid, u64a min_offset, u64a max_offset, u64a min_length); - ~NGWrapper(); + ~NGWrapper() override; /** index of the expression represented by this graph, used * - down the track in error handling diff --git a/src/nfagraph/ng_depth.cpp b/src/nfagraph/ng_depth.cpp index 719c7654..d7945be9 100644 --- a/src/nfagraph/ng_depth.cpp +++ b/src/nfagraph/ng_depth.cpp @@ -55,14 +55,14 @@ namespace ue2 { namespace { /** Distance value used to indicate that the vertex can't be reached. */ -static const int DIST_UNREACHABLE = INT_MAX; +static constexpr int DIST_UNREACHABLE = INT_MAX; /** * Distance value used to indicate that the distance to a vertex is infinite * (for example, it's the max distance and there's a cycle in the path) or so * large that we should consider it effectively infinite. */ -static const int DIST_INFINITY = INT_MAX - 1; +static constexpr int DIST_INFINITY = INT_MAX - 1; // // Filters @@ -71,10 +71,12 @@ static const int DIST_INFINITY = INT_MAX - 1; template struct NodeFilter { typedef typename GraphT::edge_descriptor EdgeT; - NodeFilter() { } + NodeFilter() {} // BGL filters must be default-constructible. NodeFilter(const vector *bad_in, const GraphT *g_in) : bad(bad_in), g(g_in) { } bool operator()(const EdgeT &e) const { + assert(g && bad); + u32 src_idx = (*g)[source(e, *g)].index; u32 tar_idx = (*g)[target(e, *g)].index; @@ -84,16 +86,20 @@ struct NodeFilter { return !(*bad)[src_idx] && !(*bad)[tar_idx]; } - const vector *bad; - const GraphT *g; + +private: + const vector *bad = nullptr; + const GraphT *g = nullptr; }; template struct StartFilter { typedef typename GraphT::edge_descriptor EdgeT; - StartFilter() { } + StartFilter() {} // BGL filters must be default-constructible. explicit StartFilter(const GraphT *g_in) : g(g_in) { } bool operator()(const EdgeT &e) const { + assert(g); + u32 src_idx = (*g)[source(e, *g)].index; u32 tar_idx = (*g)[target(e, *g)].index; @@ -107,7 +113,9 @@ struct StartFilter { } return true; } - const GraphT *g; + +private: + const GraphT *g = nullptr; }; } // namespace diff --git a/src/nfagraph/ng_execute.cpp b/src/nfagraph/ng_execute.cpp index aebfa712..92bef737 100644 --- a/src/nfagraph/ng_execute.cpp +++ b/src/nfagraph/ng_execute.cpp @@ -125,61 +125,62 @@ void execute_graph_i(const NGHolder &g, const vector &info, } static -void fillStateBitset(const NGHolder &g, const set &in, - dynamic_bitset<> &out) { - out.reset(); - for (auto v : in) { +dynamic_bitset<> makeStateBitset(const NGHolder &g, + const flat_set &in) { + dynamic_bitset<> work_states(num_vertices(g)); + for (const auto &v : in) { u32 idx = g[v].index; - out.set(idx); + work_states.set(idx); } + return work_states; } static -void fillVertexSet(const dynamic_bitset<> &in, - const vector &info, set &out) { - out.clear(); +flat_set getVertices(const dynamic_bitset<> &in, + const vector &info) { + flat_set out; for (size_t i = in.find_first(); i != in.npos; i = in.find_next(i)) { out.insert(info[i].vertex); } + return out; } static -void fillInfoTable(const NGHolder &g, vector &info) { - info.resize(num_vertices(g)); +vector makeInfoTable(const NGHolder &g) { + vector info(num_vertices(g)); for (auto v : vertices_range(g)) { u32 idx = g[v].index; const CharReach &cr = g[v].char_reach; assert(idx < info.size()); info[idx] = StateInfo(v, cr); } + return info; } -void execute_graph(const NGHolder &g, const ue2_literal &input, - set *states, bool kill_sds) { +flat_set execute_graph(const NGHolder &g, const ue2_literal &input, + const flat_set &initial_states, + bool kill_sds) { assert(hasCorrectlyNumberedVertices(g)); - vector info; - fillInfoTable(g, info); - dynamic_bitset<> work_states(num_vertices(g)); - fillStateBitset(g, *states, work_states); + auto info = makeInfoTable(g); + auto work_states = makeStateBitset(g, initial_states); execute_graph_i(g, info, input, &work_states, kill_sds); - fillVertexSet(work_states, info, *states); + return getVertices(work_states, info); } -void execute_graph(const NGHolder &g, const vector &input, - set *states) { +flat_set execute_graph(const NGHolder &g, + const vector &input, + const flat_set &initial_states) { assert(hasCorrectlyNumberedVertices(g)); - vector info; - fillInfoTable(g, info); - dynamic_bitset<> work_states(num_vertices(g)); - fillStateBitset(g, *states, work_states); + auto info = makeInfoTable(g); + auto work_states = makeStateBitset(g, initial_states); execute_graph_i(g, info, input, &work_states, false); - fillVertexSet(work_states, info, *states); + return getVertices(work_states, info); } typedef boost::reverse_graph RevNFAGraph; @@ -276,9 +277,10 @@ private: }; } // namespace -void execute_graph(const NGHolder &running_g, const NGHolder &input_dag, - const set &input_start_states, - set *states) { +flat_set execute_graph(const NGHolder &running_g, + const NGHolder &input_dag, + const flat_set &input_start_states, + const flat_set &initial_states) { DEBUG_PRINTF("g has %zu vertices, input_dag has %zu vertices\n", num_vertices(running_g), num_vertices(input_dag)); assert(hasCorrectlyNumberedVertices(running_g)); @@ -290,10 +292,8 @@ void execute_graph(const NGHolder &running_g, const NGHolder &input_dag, RevNFAGraph revg(input_dag.g); map > dfs_states; - vector info; - fillInfoTable(running_g, info); - dynamic_bitset<> input_fs(num_vertices(running_g)); - fillStateBitset(running_g, *states, input_fs); + auto info = makeInfoTable(running_g); + auto input_fs = makeStateBitset(running_g, initial_states); for (auto v : input_start_states) { dfs_states[v] = input_fs; @@ -303,21 +303,25 @@ void execute_graph(const NGHolder &running_g, const NGHolder &input_dag, eg_visitor(running_g, info, input_dag, dfs_states), make_assoc_property_map(colours)); - fillVertexSet(dfs_states[input_dag.accept], info, *states); + auto states = getVertices(dfs_states[input_dag.accept], info); #ifdef DEBUG - DEBUG_PRINTF(" output rstates:"); - for (auto v : *states) { - printf(" %u", running_g[v].index); - } - printf("\n"); + DEBUG_PRINTF(" output rstates:"); + for (const auto &v : states) { + printf(" %u", running_g[v].index); + } + printf("\n"); #endif + + return states; } -void execute_graph(const NGHolder &running_g, const NGHolder &input_dag, - set *states) { - set input_start_states = {input_dag.start, input_dag.startDs}; - execute_graph(running_g, input_dag, input_start_states, states); +flat_set execute_graph(const NGHolder &running_g, + const NGHolder &input_dag, + const flat_set &initial_states) { + auto input_start_states = {input_dag.start, input_dag.startDs}; + return execute_graph(running_g, input_dag, input_start_states, + initial_states); } } // namespace ue2 diff --git a/src/nfagraph/ng_execute.h b/src/nfagraph/ng_execute.h index 80fdcbd5..e2c7c72d 100644 --- a/src/nfagraph/ng_execute.h +++ b/src/nfagraph/ng_execute.h @@ -35,8 +35,8 @@ #define NG_EXECUTE_H #include "ng_holder.h" +#include "util/ue2_containers.h" -#include #include namespace ue2 { @@ -44,23 +44,25 @@ namespace ue2 { class CharReach; struct ue2_literal; -void execute_graph(const NGHolder &g, const ue2_literal &input, - std::set *states, bool kill_sds = false); +flat_set execute_graph(const NGHolder &g, const ue2_literal &input, + const flat_set &initial, + bool kill_sds = false); -void execute_graph(const NGHolder &g, const std::vector &input, - std::set *states); +flat_set execute_graph(const NGHolder &g, + const std::vector &input, + const flat_set &initial); /** on exit, states contains any state which may still be enabled after * receiving an input which corresponds to some path through the input_dag from * start or startDs to accept. input_dag MUST be acyclic aside from self-loops. */ -void execute_graph(const NGHolder &g, const NGHolder &input_dag, - std::set *states); +flat_set execute_graph(const NGHolder &g, const NGHolder &input_dag, + const flat_set &initial); /* as above, but able to specify the source states for the input graph */ -void execute_graph(const NGHolder &g, const NGHolder &input_dag, - const std::set &input_start_states, - std::set *states); +flat_set execute_graph(const NGHolder &g, const NGHolder &input_dag, + const flat_set &input_start_states, + const flat_set &initial); } // namespace ue2 diff --git a/src/nfagraph/ng_haig.cpp b/src/nfagraph/ng_haig.cpp index 7681fa14..d06083bd 100644 --- a/src/nfagraph/ng_haig.cpp +++ b/src/nfagraph/ng_haig.cpp @@ -114,7 +114,7 @@ void populateAccepts(const NGHolder &g, StateSet *accept, StateSet *acceptEod) { } class Automaton_Base { -public: +protected: Automaton_Base(const NGHolder &graph_in, const ue2::unordered_map &state_ids_in) : graph(graph_in), state_ids(state_ids_in) { @@ -122,6 +122,7 @@ public: assert(alphasize <= ALPHABET_SIZE); } +public: static bool canPrune(const flat_set &) { return false; } const NGHolder &graph; @@ -608,7 +609,6 @@ bool doHaig(const NGHolder &g, } haig_note_starts(g, &rdfa->new_som_nfa_states); - rdfa->trigger_nfa_state = NODE_START; return true; } @@ -638,7 +638,8 @@ unique_ptr attemptToBuildHaig(NGHolder &g, som_type som, return nullptr; } - auto rdfa = ue2::make_unique(g.kind, unordered_som); + auto rdfa = ue2::make_unique(g.kind, unordered_som, NODE_START, + somPrecision); DEBUG_PRINTF("determinising nfa with %u vertices\n", numStates); bool rv; @@ -658,7 +659,6 @@ unique_ptr attemptToBuildHaig(NGHolder &g, som_type som, DEBUG_PRINTF("determinised, building impl dfa (a,f) = (%hu,%hu)\n", rdfa->start_anchored, rdfa->start_floating); - rdfa->stream_som_loc_width = somPrecision; assert(rdfa->kind == g.kind); return rdfa; @@ -782,7 +782,9 @@ unique_ptr attemptToMergeHaig(const vector &df typedef Automaton_Haig_Merge::StateSet StateSet; vector nfa_state_map; - auto rdfa = ue2::make_unique(dfas[0]->kind, unordered_som); + auto rdfa = ue2::make_unique(dfas[0]->kind, unordered_som, + NODE_START, + dfas[0]->stream_som_loc_width); int rv = determinise(n, rdfa->states, limit, &nfa_state_map); if (rv) { @@ -830,11 +832,9 @@ unique_ptr attemptToMergeHaig(const vector &df } haig_merge_note_starts(dfas, per_dfa_adj, &rdfa->new_som_nfa_states); - rdfa->trigger_nfa_state = NODE_START; DEBUG_PRINTF("merged, building impl dfa (a,f) = (%hu,%hu)\n", rdfa->start_anchored, rdfa->start_floating); - rdfa->stream_som_loc_width = dfas[0]->stream_som_loc_width; return rdfa; } diff --git a/src/nfagraph/ng_lbr.cpp b/src/nfagraph/ng_lbr.cpp index 11eded69..b9cacaa7 100644 --- a/src/nfagraph/ng_lbr.cpp +++ b/src/nfagraph/ng_lbr.cpp @@ -98,8 +98,7 @@ void fillNfa(NFA *nfa, lbr_common *c, ReportID report, const depth &repeatMin, info->packedCtrlSize = rsi.packedCtrlSize; info->horizon = rsi.horizon; info->minPeriod = minPeriod; - memcpy(&info->packedFieldSizes, rsi.packedFieldSizes.data(), - byte_length(rsi.packedFieldSizes)); + copy_bytes(&info->packedFieldSizes, rsi.packedFieldSizes); info->patchCount = rsi.patchCount; info->patchSize = rsi.patchSize; info->encodingSize = rsi.encodingSize; @@ -122,7 +121,7 @@ void fillNfa(NFA *nfa, lbr_common *c, ReportID report, const depth &repeatMin, nfa->length = verify_u32(len); info->length = verify_u32(sizeof(RepeatInfo) + sizeof(u64a) * (rsi.patchSize + 1)); - memcpy(table, rsi.table.data(), byte_length(rsi.table)); + copy_bytes(table, rsi.table); } } diff --git a/src/nfagraph/ng_puff.cpp b/src/nfagraph/ng_puff.cpp index cd190171..501d8f7b 100644 --- a/src/nfagraph/ng_puff.cpp +++ b/src/nfagraph/ng_puff.cpp @@ -316,7 +316,7 @@ bool doComponent(RoseBuild &rose, ReportManager &rm, NGHolder &g, NFAVertex a, bool unbounded = false; bool exhaustible = can_exhaust(g, rm); - while (a) { + while (true) { if (is_special(a, g)) { DEBUG_PRINTF("stopped puffing due to special vertex\n"); break; @@ -350,9 +350,7 @@ bool doComponent(RoseBuild &rose, ReportManager &rm, NGHolder &g, NFAVertex a, a = getSoleSourceVertex(g, a); - if (!a) { - break; - } + assert(a); /* already checked that old a had a proper in degree of 1 */ // Snark: we can't handle this case, because we can only handle a // single report ID on a vertex diff --git a/src/nfagraph/ng_som.cpp b/src/nfagraph/ng_som.cpp index 90ebb5c3..f26b62aa 100644 --- a/src/nfagraph/ng_som.cpp +++ b/src/nfagraph/ng_som.cpp @@ -266,7 +266,7 @@ bool validateEXSL(const NGHolder &g, const vector escapes_vec(1, escapes); const vector notescapes_vec(1, ~escapes); - set states; + ue2::flat_set states; /* turn on all states past the prefix */ DEBUG_PRINTF("region %u is cutover\n", region); for (auto v : vertices_range(g)) { @@ -276,20 +276,20 @@ bool validateEXSL(const NGHolder &g, } /* process the escapes */ - execute_graph(g, escapes_vec, &states); + states = execute_graph(g, escapes_vec, states); /* flood with any number of not escapes */ - set prev_states; + ue2::flat_set prev_states; while (prev_states != states) { prev_states = states; - execute_graph(g, notescapes_vec, &states); + states = execute_graph(g, notescapes_vec, states); insert(&states, prev_states); } /* find input starts to use for when we are running the prefix through as * when the escape character arrives we may be in matching the prefix * already */ - set prefix_start_states; + ue2::flat_set prefix_start_states; for (auto v : vertices_range(prefix)) { if (v != prefix.accept && v != prefix.acceptEod /* and as we have already made it past the prefix once */ @@ -298,11 +298,12 @@ bool validateEXSL(const NGHolder &g, } } - execute_graph(prefix, escapes_vec, &prefix_start_states); + prefix_start_states = + execute_graph(prefix, escapes_vec, prefix_start_states); assert(contains(prefix_start_states, prefix.startDs)); /* see what happens after we feed it the prefix */ - execute_graph(g, prefix, prefix_start_states, &states); + states = execute_graph(g, prefix, prefix_start_states, states); for (auto v : states) { assert(v != g.accept && v != g.acceptEod); /* no cr -> should never be diff --git a/src/nfagraph/ng_som_util.cpp b/src/nfagraph/ng_som_util.cpp index 7f487f89..a0829451 100644 --- a/src/nfagraph/ng_som_util.cpp +++ b/src/nfagraph/ng_som_util.cpp @@ -136,7 +136,7 @@ bool firstMatchIsFirst(const NGHolder &p) { return false; } - set states; + ue2::flat_set states; /* turn on all states (except starts - avoid suffix matches) */ /* If we were doing (1) we would also except states leading to accepts - avoid prefix matches */ @@ -149,7 +149,7 @@ bool firstMatchIsFirst(const NGHolder &p) { } /* run the prefix the main graph */ - execute_graph(p, p, &states); + states = execute_graph(p, p, states); for (auto v : states) { /* need to check if this vertex may represent an infix match - ie @@ -313,7 +313,7 @@ bool sentClearsTail(const NGHolder &g, */ u32 first_bad_region = ~0U; - set states; + ue2::flat_set states; /* turn on all states */ DEBUG_PRINTF("region %u is cutover\n", last_head_region); for (auto v : vertices_range(g)) { @@ -327,7 +327,7 @@ bool sentClearsTail(const NGHolder &g, } /* run the prefix the main graph */ - execute_graph(g, sent, &states); + states = execute_graph(g, sent, states); /* .. and check if we are left with anything in the tail region */ for (auto v : states) { diff --git a/src/nfagraph/ng_width.cpp b/src/nfagraph/ng_width.cpp index 3ecb391c..470f9343 100644 --- a/src/nfagraph/ng_width.cpp +++ b/src/nfagraph/ng_width.cpp @@ -51,10 +51,16 @@ namespace ue2 { namespace { -/** Filter out edges from start-to-start or accept-to-accept. */ +/** + * Filter out special edges, or in the top-specific variant, start edges that + * don't have the right top set. + */ struct SpecialEdgeFilter { SpecialEdgeFilter() {} - explicit SpecialEdgeFilter(const NGHolder *h_in) : h(h_in) {} + explicit SpecialEdgeFilter(const NGHolder &h_in) : h(&h_in) {} + explicit SpecialEdgeFilter(const NGHolder &h_in, u32 top_in) + : h(&h_in), single_top(true), top(top_in) {} + bool operator()(const NFAEdge &e) const { const NFAGraph &g = h->g; NFAVertex u = source(e, g), v = target(e, g); @@ -62,23 +68,33 @@ struct SpecialEdgeFilter { (is_any_accept(u, g) && is_any_accept(v, g))) { return false; } + if (single_top) { + if (u == h->start && g[e].top != top) { + return false; + } + if (u == h->startDs) { + return false; + } + } return true; } private: const NGHolder *h = nullptr; + bool single_top = false; + u32 top = 0; }; } // namespace static -depth findMinWidth(const NGHolder &h, NFAVertex src) { +depth findMinWidth(const NGHolder &h, const SpecialEdgeFilter &filter, + NFAVertex src) { if (isLeafNode(src, h)) { return depth::unreachable(); } - typedef boost::filtered_graph StartGraph; - StartGraph g(h.g, SpecialEdgeFilter(&h)); + boost::filtered_graph g(h.g, filter); assert(hasCorrectlyNumberedVertices(h)); const size_t num = num_vertices(h); @@ -112,7 +128,8 @@ depth findMinWidth(const NGHolder &h, NFAVertex src) { } static -depth findMaxWidth(const NGHolder &h, NFAVertex src) { +depth findMaxWidth(const NGHolder &h, const SpecialEdgeFilter &filter, + NFAVertex src) { if (isLeafNode(src, h.g)) { return depth::unreachable(); } @@ -122,8 +139,7 @@ depth findMaxWidth(const NGHolder &h, NFAVertex src) { return depth::infinity(); } - typedef boost::filtered_graph NodeFilteredGraph; - NodeFilteredGraph g(h.g, SpecialEdgeFilter(&h)); + boost::filtered_graph g(h.g, filter); assert(hasCorrectlyNumberedVertices(h)); const size_t num = num_vertices(h); @@ -164,7 +180,7 @@ depth findMaxWidth(const NGHolder &h, NFAVertex src) { if (d.is_unreachable()) { // If we're actually reachable, we'll have a min width, so we can // return infinity in this case. - if (findMinWidth(h, src).is_reachable()) { + if (findMinWidth(h, filter, src).is_reachable()) { return depth::infinity(); } return d; @@ -175,11 +191,10 @@ depth findMaxWidth(const NGHolder &h, NFAVertex src) { return d - depth(1); } -/** Returns the minimum width in bytes of an input that will match the given - * graph. */ -depth findMinWidth(const NGHolder &h) { - depth startDepth = findMinWidth(h, h.start); - depth dotstarDepth = findMinWidth(h, h.startDs); +static +depth findMinWidth(const NGHolder &h, const SpecialEdgeFilter &filter) { + depth startDepth = findMinWidth(h, filter, h.start); + depth dotstarDepth = findMinWidth(h, filter, h.startDs); DEBUG_PRINTF("startDepth=%s, dotstarDepth=%s\n", startDepth.str().c_str(), dotstarDepth.str().c_str()); if (startDepth.is_unreachable()) { @@ -194,11 +209,18 @@ depth findMinWidth(const NGHolder &h) { } } -/** Returns the maximum width in bytes of an input that will match the given - * graph. If there is no maximum width, returns infinity. */ -depth findMaxWidth(const NGHolder &h) { - depth startDepth = findMaxWidth(h, h.start); - depth dotstarDepth = findMaxWidth(h, h.startDs); +depth findMinWidth(const NGHolder &h) { + return findMinWidth(h, SpecialEdgeFilter(h)); +} + +depth findMinWidth(const NGHolder &h, u32 top) { + return findMinWidth(h, SpecialEdgeFilter(h, top)); +} + +static +depth findMaxWidth(const NGHolder &h, const SpecialEdgeFilter &filter) { + depth startDepth = findMaxWidth(h, filter, h.start); + depth dotstarDepth = findMaxWidth(h, filter, h.startDs); DEBUG_PRINTF("startDepth=%s, dotstarDepth=%s\n", startDepth.str().c_str(), dotstarDepth.str().c_str()); if (startDepth.is_unreachable()) { @@ -210,4 +232,12 @@ depth findMaxWidth(const NGHolder &h) { } } +depth findMaxWidth(const NGHolder &h) { + return findMaxWidth(h, SpecialEdgeFilter(h)); +} + +depth findMaxWidth(const NGHolder &h, u32 top) { + return findMaxWidth(h, SpecialEdgeFilter(h, top)); +} + } // namespace ue2 diff --git a/src/nfagraph/ng_width.h b/src/nfagraph/ng_width.h index 5c78409b..871e8a93 100644 --- a/src/nfagraph/ng_width.h +++ b/src/nfagraph/ng_width.h @@ -41,14 +41,34 @@ namespace ue2 { class NGHolder; -/** Returns the minimum width in bytes of an input that will match the given - * graph. */ +/** + * \brief Compute the minimum width in bytes of an input that will match the + * given graph. + */ depth findMinWidth(const NGHolder &h); -/** Returns the maximum width in bytes of an input that will match the given - * graph. If there is no maximum width, returns infinity. */ +/** + * \brief Compute the minimum width in bytes of an input that will match the + * given graph, considering only paths activated by the given top. + */ +depth findMinWidth(const NGHolder &h, u32 top); + +/** + * \brief Compute the maximum width in bytes of an input that will match the + * given graph. + * + * If there is no bound on the maximum width, returns infinity. + */ depth findMaxWidth(const NGHolder &h); +/** + * \brief Compute the maximum width in bytes of an input that will match the + * given graph, considering only paths activated by the given top. + * + * If there is no bound on the maximum width, returns infinity. + */ +depth findMaxWidth(const NGHolder &h, u32 top); + } // namespace ue2 #endif // NG_WIDTH_H diff --git a/src/parser/AsciiComponentClass.cpp b/src/parser/AsciiComponentClass.cpp index de7a40d4..7cfa6e11 100644 --- a/src/parser/AsciiComponentClass.cpp +++ b/src/parser/AsciiComponentClass.cpp @@ -52,7 +52,8 @@ AsciiComponentClass *AsciiComponentClass::clone() const { } bool AsciiComponentClass::class_empty(void) const { - return cr.none() && cr_ucp.none(); + assert(finalized); + return cr.none(); } void AsciiComponentClass::createRange(unichar to) { @@ -60,11 +61,15 @@ void AsciiComponentClass::createRange(unichar to) { unsigned char from = (u8)range_start; if (from > to) { throw LocatedParseError("Range out of order in character class"); - } else { - in_cand_range = false; - cr.setRange(from, to); - range_start = INVALID_UNICODE; } + + in_cand_range = false; + CharReach ncr(from, to); + if (mode.caseless) { + make_caseless(&ncr); + } + cr |= ncr; + range_start = INVALID_UNICODE; } void AsciiComponentClass::notePositions(GlushkovBuildState &bs) { @@ -94,16 +99,13 @@ void AsciiComponentClass::add(PredefinedClass c, bool negative) { c = translateForUcpMode(c, mode); } + // Note: caselessness is handled by getPredefinedCharReach. CharReach pcr = getPredefinedCharReach(c, mode); if (negative) { pcr.flip(); } - if (isUcp(c)) { - cr_ucp |= pcr; - } else { - cr |= pcr; - } + cr |= pcr; range_start = INVALID_UNICODE; in_cand_range = false; } @@ -119,7 +121,12 @@ void AsciiComponentClass::add(unichar c) { return; } - cr.set(c); + CharReach ncr(c, c); + if (mode.caseless) { + make_caseless(&ncr); + } + + cr |= ncr; range_start = c; } @@ -135,12 +142,6 @@ void AsciiComponentClass::finalize() { in_cand_range = false; } - if (mode.caseless) { - make_caseless(&cr); - } - - cr |= cr_ucp; /* characters from ucp props don't participate in caseless */ - if (m_negate) { cr.flip(); } diff --git a/src/parser/AsciiComponentClass.h b/src/parser/AsciiComponentClass.h index 2d5ef843..925fa9bf 100644 --- a/src/parser/AsciiComponentClass.h +++ b/src/parser/AsciiComponentClass.h @@ -78,12 +78,10 @@ protected: private: Position position; CharReach cr; - CharReach cr_ucp; // Private copy ctor. Use clone instead. AsciiComponentClass(const AsciiComponentClass &other) - : ComponentClass(other), position(other.position), cr(other.cr), - cr_ucp(other.cr_ucp) {} + : ComponentClass(other), position(other.position), cr(other.cr) {} }; } // namespace ue2 diff --git a/src/parser/ComponentClass.cpp b/src/parser/ComponentClass.cpp index d90d7db2..a91ae979 100644 --- a/src/parser/ComponentClass.cpp +++ b/src/parser/ComponentClass.cpp @@ -81,8 +81,9 @@ CharReach getPredefinedCharReach(PredefinedClass c, const ParseMode &mode) { case CLASS_DIGIT: return number; case CLASS_GRAPH: - case CLASS_XGRAPH: return CharReach(0x21, 0x7e); + case CLASS_XGRAPH: + return to_cr(getPredefinedCodePointSet(c, mode)); case CLASS_HORZ: return CharReach("\x09\x20\xA0"); case CLASS_LOWER: @@ -93,11 +94,15 @@ CharReach getPredefinedCharReach(PredefinedClass c, const ParseMode &mode) { } case CLASS_PRINT: return CharReach(0x20, 0x7e); + case CLASS_XPRINT: + return to_cr(getPredefinedCodePointSet(c, mode)); case CLASS_PUNCT: return CharReach(0x21, '0' - 1) | CharReach('9' + 1, 'A' - 1) | CharReach('Z' + 1, 'a' - 1) | CharReach('z' + 1, 126); + case CLASS_XPUNCT: + return to_cr(getPredefinedCodePointSet(c, mode)); case CLASS_SPACE: return CharReach("\x09\x0a\x0c\x0b\x0d\x20"); case CLASS_UPPER: @@ -420,7 +425,7 @@ unique_ptr getLiteralComponentClass(unsigned char c, ComponentClass::ComponentClass(const ParseMode &mode_in) : m_negate(false), mode(mode_in), in_cand_range(false), - range_start(INVALID_UNICODE), finalized(false), firstChar('\0') {} + range_start(INVALID_UNICODE), finalized(false) {} ComponentClass::~ComponentClass() { } @@ -441,7 +446,6 @@ void ComponentClass::addDash(void) { } void ComponentClass::negate() { - assert(class_empty()); m_negate = true; } diff --git a/src/parser/ComponentClass.h b/src/parser/ComponentClass.h index 21b51202..040e6d78 100644 --- a/src/parser/ComponentClass.h +++ b/src/parser/ComponentClass.h @@ -63,7 +63,9 @@ enum PredefinedClass { CLASS_VERT, CLASS_WORD, CLASS_XDIGIT, - CLASS_XGRAPH, + CLASS_XGRAPH, /* [:graph:] in UCP mode */ + CLASS_XPRINT, /* [:print:] in UCP mode */ + CLASS_XPUNCT, /* [:punct:] in UCP mode */ CLASS_UCP_C, CLASS_UCP_CC, CLASS_UCP_CF, @@ -232,8 +234,12 @@ public: Component *accept(ComponentVisitor &v) override = 0; void accept(ConstComponentVisitor &v) const override = 0; - /** True iff we have already started adding members to the class. This is - * a different concept to Component::empty */ + /** \brief True if the class contains no members (i.e. it will not match + * against anything). This function can only be called on a finalized + * class. + * + * Note: This is a different concept to Component::empty. + */ virtual bool class_empty(void) const = 0; virtual void add(PredefinedClass c, bool negated) = 0; @@ -245,9 +251,6 @@ public: bool isNegated() const { return m_negate; } - void setFirstChar(char c) { firstChar = c; } - char getFirstChar() const { return firstChar; } - std::vector first() const override = 0; std::vector last() const override = 0; bool empty() const override { return false; } /* always 1 codepoint wide */ @@ -263,19 +266,13 @@ protected: unichar range_start; bool finalized; - /** Literal character at the start of this character class, e.g. '.' for - * the class [.abc]. Used to identify (unsupported) POSIX collating - * elements. */ - char firstChar; - virtual void createRange(unichar) = 0; // Protected copy ctor. Use clone instead. ComponentClass(const ComponentClass &other) : Component(other), m_negate(other.m_negate), mode(other.mode), in_cand_range(other.in_cand_range), range_start(other.range_start), - finalized(other.finalized), - firstChar(other.firstChar) {} + finalized(other.finalized) {} }; } // namespace ue2 diff --git a/src/parser/Parser.rl b/src/parser/Parser.rl index 1481b7d8..65cd7c1a 100644 --- a/src/parser/Parser.rl +++ b/src/parser/Parser.rl @@ -424,6 +424,7 @@ unichar readUtf8CodePoint4c(const u8 *ts) { assert(!inCharClass); // not reentrant currentCls = getComponentClass(mode); inCharClass = true; + inCharClassEarly = true; currentClsBegin = ts; fgoto readClass; } @@ -474,6 +475,7 @@ unichar readUtf8CodePoint4c(const u8 *ts) { } action is_utf8 { mode.utf8 } action is_ignore_space { mode.ignore_space } + action is_early_charclass { inCharClassEarly } action addNumberedBackRef { if (accumulator == 0) { @@ -790,10 +792,12 @@ unichar readUtf8CodePoint4c(const u8 *ts) { any => { throw LocatedParseError("Unknown property"); }; *|; charClassGuts := |* - # We don't like POSIX collating elements (neither does PCRE or Perl). - '\[\.' [^\]]* '\.\]' | - '\[=' [^\]]* '=\]' => { - throw LocatedParseError("Unsupported POSIX collating element"); + # We don't support POSIX collating elements (neither does PCRE + # or Perl). These look like [.ch.] or [=ch=]. + '\[\.' ( '\\]' | [^\]] )* '\.\]' | + '\[=' ( '\\]' | [^\]] )* '=\]' => { + throw LocatedParseError("Unsupported POSIX collating " + "element"); }; # Named sets # Adding these may cause the charclass to close, hence the @@ -889,11 +893,7 @@ unichar readUtf8CodePoint4c(const u8 *ts) { throw LocatedParseError("Invalid POSIX named class"); }; '\\Q' => { - // fcall readQuotedClass; - ostringstream str; - str << "\\Q..\\E sequences in character classes not supported at index " - << ts - ptr << "."; - throw ParseError(str.str()); + fcall readQuotedClass; }; '\\E' => { /*noop*/}; # Backspace (this is only valid for \b in char classes) @@ -1090,28 +1090,8 @@ unichar readUtf8CodePoint4c(const u8 *ts) { throwInvalidUtf8(); }; - # dot or equals at the end of a character class could be the end - # of a collating element, like [.blah.] or [=blah=]. - [.=] ']' => { - if (currentCls->getFirstChar() == *ts) { - assert(currentClsBegin); - ostringstream oss; - oss << "Unsupported POSIX collating element at index " - << currentClsBegin - ptr << "."; - throw ParseError(oss.str()); - } - currentCls->add(*ts); - currentCls->finalize(); - currentSeq->addComponent(move(currentCls)); - inCharClass = false; - fgoto main; - }; - # Literal character (any - ']') => { - if (currentCls->class_empty()) { - currentCls->setFirstChar(*ts); - } currentCls->add(*ts); }; @@ -1127,35 +1107,35 @@ unichar readUtf8CodePoint4c(const u8 *ts) { # Parser to read stuff from a character class ############################################################# readClass := |* - # the negate and right bracket out the front are special - '\^' => { + # A caret at the beginning of the class means that the rest of the + # class is negated. + '\^' when is_early_charclass => { if (currentCls->isNegated()) { + // Already seen a caret; the second one is not a meta-character. + inCharClassEarly = false; fhold; fgoto charClassGuts; } else { currentCls->negate(); + // Note: we cannot switch off inCharClassEarly here, as /[^]]/ + // needs to use the right square bracket path below. } }; - ']' => { - // if this is the first thing in the class, add it and move along, - // otherwise jump into the char class machine to handle what might - // end up as fail - if (currentCls->class_empty()) { - currentCls->add(']'); - } else { - // leave it for the next machine - fhold; - } - fgoto charClassGuts; + # A right square bracket before anything "real" is interpreted as a + # literal right square bracket. + ']' when is_early_charclass => { + currentCls->add(']'); + inCharClassEarly = false; }; # if we hit a quote before anything "real", handle it - #'\\Q' => { fcall readQuotedClass; }; - '\\Q' => { - throw LocatedParseError("\\Q..\\E sequences in character classes not supported"); - }; + '\\Q' => { fcall readQuotedClass; }; '\\E' => { /*noop*/}; # time for the real work to happen - any => { fhold; fgoto charClassGuts; }; + any => { + inCharClassEarly = false; + fhold; + fgoto charClassGuts; + }; *|; ############################################################# @@ -1183,6 +1163,7 @@ unichar readUtf8CodePoint4c(const u8 *ts) { # Literal character any => { currentCls->add(*ts); + inCharClassEarly = false; }; *|; @@ -1232,6 +1213,13 @@ unichar readUtf8CodePoint4c(const u8 *ts) { throw LocatedParseError("POSIX named classes are only " "supported inside a class"); }; + # We don't support POSIX collating elements (neither does PCRE + # or Perl). These look like [.ch.] or [=ch=]. + '\[\.' ( '\\]' | [^\]] )* '\.\]' | + '\[=' ( '\\]' | [^\]] )* '=\]' => { + throw LocatedParseError("Unsupported POSIX collating " + "element"); + }; # Begin eating characters for class '\[' => eatClass; # Begin quoted literal @@ -1896,6 +1884,11 @@ unique_ptr parse(const char *const c_ptr, ParseMode &globalMode) { // brackets [..]. bool inCharClass = false; + // True if the machine is inside a character class but it has not processed + // any "real" elements yet, i.e. it's still processing meta-characters like + // '^'. + bool inCharClassEarly = false; + // Location at which the current character class began. const u8 *currentClsBegin = p; diff --git a/src/parser/Utf8ComponentClass.cpp b/src/parser/Utf8ComponentClass.cpp index 72fc2146..cdfc974a 100644 --- a/src/parser/Utf8ComponentClass.cpp +++ b/src/parser/Utf8ComponentClass.cpp @@ -75,6 +75,10 @@ PredefinedClass translateForUcpMode(PredefinedClass in, const ParseMode &mode) { } else { return CLASS_UCP_LL; } + case CLASS_PRINT: + return CLASS_XPRINT; + case CLASS_PUNCT: + return CLASS_XPUNCT; case CLASS_SPACE: return CLASS_UCP_XPS; case CLASS_UPPER: @@ -90,7 +94,6 @@ PredefinedClass translateForUcpMode(PredefinedClass in, const ParseMode &mode) { } } -static CodePointSet getPredefinedCodePointSet(PredefinedClass c, const ParseMode &mode) { /* TODO: support properly PCRE_UCP mode and non PCRE_UCP mode */ @@ -117,6 +120,22 @@ CodePointSet getPredefinedCodePointSet(PredefinedClass c, rv |= cf; return rv; } + case CLASS_XPRINT: { + // Same as graph, plus everything with the Zs property. + CodePointSet rv = getPredefinedCodePointSet(CLASS_XGRAPH, mode); + rv |= getUcpZs(); + rv.set(0x180e); // Also included in this class by PCRE 8.38. + return rv; + } + case CLASS_XPUNCT: { + // Everything with the P (punctuation) property, plus code points in S + // (symbols) that are < 128. + CodePointSet rv = getUcpP(); + CodePointSet symbols = getUcpS(); + symbols.unsetRange(128, MAX_UNICODE); + rv |= symbols; + return rv; + } case CLASS_HORZ: { CodePointSet rv; rv.set(0x0009); /* Horizontal tab */ @@ -484,7 +503,8 @@ UTF8ComponentClass *UTF8ComponentClass::clone() const { } bool UTF8ComponentClass::class_empty(void) const { - return cps.none() && cps_ucp.none(); + assert(finalized); + return cps.none(); } void UTF8ComponentClass::createRange(unichar to) { @@ -492,16 +512,16 @@ void UTF8ComponentClass::createRange(unichar to) { unichar from = range_start; if (from > to) { throw LocatedParseError("Range out of order in character class"); - } else { - in_cand_range = false; - CodePointSet ncps; - ncps.setRange(from, to); - if (mode.caseless) { - make_caseless(&ncps); - } - cps |= ncps; - range_start = INVALID_UNICODE; } + + in_cand_range = false; + CodePointSet ncps; + ncps.setRange(from, to); + if (mode.caseless) { + make_caseless(&ncps); + } + cps |= ncps; + range_start = INVALID_UNICODE; } void UTF8ComponentClass::add(PredefinedClass c, bool negative) { @@ -520,11 +540,7 @@ void UTF8ComponentClass::add(PredefinedClass c, bool negative) { pcps.flip(); } - if (isUcp(c)) { - cps_ucp |= pcps; - } else { - cps |= pcps; - } + cps |= pcps; range_start = INVALID_UNICODE; in_cand_range = false; @@ -562,8 +578,6 @@ void UTF8ComponentClass::finalize() { in_cand_range = false; } - cps |= cps_ucp; /* characters from ucp props always case sensitive */ - if (m_negate) { cps.flip(); } @@ -571,31 +585,6 @@ void UTF8ComponentClass::finalize() { finalized = true; } -bool isUcp(PredefinedClass c) { - switch (c) { - case CLASS_ALNUM: - case CLASS_ALPHA: - case CLASS_ANY: - case CLASS_ASCII: - case CLASS_BLANK: - case CLASS_CNTRL: - case CLASS_DIGIT: - case CLASS_GRAPH: - case CLASS_HORZ: - case CLASS_LOWER: - case CLASS_PRINT: - case CLASS_PUNCT: - case CLASS_SPACE: - case CLASS_UPPER: - case CLASS_VERT: - case CLASS_WORD: - case CLASS_XDIGIT: - return false; - default: - return true; - } -} - Position UTF8ComponentClass::getHead(NFABuilder &builder, u8 first_byte) { map::const_iterator it = heads.find(first_byte); if (it != heads.end()) { diff --git a/src/parser/Utf8ComponentClass.h b/src/parser/Utf8ComponentClass.h index b2c402f9..f4e7ea32 100644 --- a/src/parser/Utf8ComponentClass.h +++ b/src/parser/Utf8ComponentClass.h @@ -93,7 +93,6 @@ private: void buildFourByte(GlushkovBuildState &bs); CodePointSet cps; - CodePointSet cps_ucp; std::map heads; Position single_pos; @@ -108,7 +107,9 @@ private: }; PredefinedClass translateForUcpMode(PredefinedClass in, const ParseMode &mode); -bool isUcp(PredefinedClass c); + +CodePointSet getPredefinedCodePointSet(PredefinedClass c, + const ParseMode &mode); } // namespace diff --git a/src/parser/check_refs.cpp b/src/parser/check_refs.cpp index 6307a06a..ad81ae76 100644 --- a/src/parser/check_refs.cpp +++ b/src/parser/check_refs.cpp @@ -57,7 +57,7 @@ public: ReferenceVisitor(size_t num_groups, const flat_set &targets) : num_ids(num_groups), names(targets) {} - ~ReferenceVisitor(); + ~ReferenceVisitor() override; void invalid_index(const char *component, unsigned id) { assert(component); diff --git a/src/parser/prefilter.cpp b/src/parser/prefilter.cpp index de99d9f1..f5a0c66c 100644 --- a/src/parser/prefilter.cpp +++ b/src/parser/prefilter.cpp @@ -201,7 +201,7 @@ const ComponentSequence *findCapturingGroup(const Component *root, class PrefilterVisitor : public DefaultComponentVisitor { public: PrefilterVisitor(Component *c, const ParseMode &m) : root(c), mode(m) {} - ~PrefilterVisitor(); + ~PrefilterVisitor() override; /** \brief Calls the visitor (recursively) on a new replacement component * we've just created. Takes care of freeing it if the sequence is itself diff --git a/src/parser/shortcut_literal.cpp b/src/parser/shortcut_literal.cpp index d2b77dc5..f6f5d383 100644 --- a/src/parser/shortcut_literal.cpp +++ b/src/parser/shortcut_literal.cpp @@ -64,7 +64,7 @@ namespace ue2 { */ class ConstructLiteralVisitor : public ConstComponentVisitor { public: - ~ConstructLiteralVisitor(); + ~ConstructLiteralVisitor() override; /** \brief Thrown if this component does not represent a literal. */ struct NotLiteral {}; diff --git a/src/parser/unsupported.cpp b/src/parser/unsupported.cpp index b0b15d8d..c97a5750 100644 --- a/src/parser/unsupported.cpp +++ b/src/parser/unsupported.cpp @@ -44,7 +44,7 @@ namespace ue2 { * an unsupported component. */ class UnsupportedVisitor : public DefaultConstComponentVisitor { public: - ~UnsupportedVisitor(); + ~UnsupportedVisitor() override; void pre(const ComponentAssertion &) override { throw ParseError("Zero-width assertions are not supported."); } diff --git a/src/rose/catchup.c b/src/rose/catchup.c index b9fcd784..77b12b49 100644 --- a/src/rose/catchup.c +++ b/src/rose/catchup.c @@ -379,7 +379,7 @@ void ensureEnd(struct mq *q, UNUSED u32 qi, s64a final_loc) { DEBUG_PRINTF("ensure MQE_END %lld for queue %u\n", final_loc, qi); if (final_loc >= q_last_loc(q)) { /* TODO: ensure situation does not arise */ - assert(q->items[q->end - 1].type != MQE_END); + assert(q_last_type(q) != MQE_END); pushQueueNoMerge(q, MQE_END, final_loc); } } diff --git a/src/rose/match.c b/src/rose/match.c index 32224c24..be9bc35e 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -758,7 +758,7 @@ found_miracle: q_skip_forward_to(q, miracle_loc); - if (q->items[q->end - 1].type == MQE_START) { + if (q_last_type(q) == MQE_START) { DEBUG_PRINTF("miracle caused infix to die\n"); return 0; } @@ -853,7 +853,7 @@ char roseTestLeftfix(const struct RoseEngine *t, const struct RoseRole *tr, } } - if (q_cur_loc(q) < loc || q->items[q->end - 1].type != MQE_START) { + if (q_cur_loc(q) < loc || q_last_type(q) != MQE_START) { if (left->infix) { if (infixTooOld(q, loc)) { DEBUG_PRINTF("infix %u died of old age\n", ri); diff --git a/src/rose/rose_build.h b/src/rose/rose_build.h index 1a1fc223..bef2114f 100644 --- a/src/rose/rose_build.h +++ b/src/rose/rose_build.h @@ -42,6 +42,7 @@ #include "rose_in_graph.h" #include "util/alloc.h" #include "util/charreach.h" +#include "util/ue2_containers.h" #include "util/ue2string.h" #include @@ -72,8 +73,8 @@ public: /** \brief True if we can not establish that at most a single callback will * be generated at a given offset from this set of reports. */ - virtual bool requiresDedupeSupport(const std::set &reports) const - = 0; + virtual bool requiresDedupeSupport(const ue2::flat_set &reports) + const = 0; }; /** \brief Abstract interface intended for callers from elsewhere in the tree, diff --git a/src/rose/rose_build_anchored.cpp b/src/rose/rose_build_anchored.cpp index 0c556dfe..675f8c68 100644 --- a/src/rose/rose_build_anchored.cpp +++ b/src/rose/rose_build_anchored.cpp @@ -271,16 +271,13 @@ public: typedef Holder_StateSet StateSet; typedef ue2::unordered_map StateMap; - explicit Automaton_Holder(const NGHolder &g_in) : g(g_in), bad(false) { + explicit Automaton_Holder(const NGHolder &g_in) : g(g_in) { for (auto v : vertices_range(g)) { vertexToIndex[v] = indexToVertex.size(); indexToVertex.push_back(v); } - if (indexToVertex.size() > ANCHORED_NFA_STATE_LIMIT) { - bad = true; - return; - } + assert(indexToVertex.size() <= ANCHORED_NFA_STATE_LIMIT); DEBUG_PRINTF("%zu states\n", indexToVertex.size()); init.wdelay = 0; @@ -400,7 +397,6 @@ public: array alpha; array unalpha; u16 alphasize; - bool bad; }; } // namespace @@ -670,13 +666,13 @@ int finalise_out(RoseBuildImpl &tbi, const NGHolder &h, static int addAutomaton(RoseBuildImpl &tbi, const NGHolder &h, ReportID *remap) { - Automaton_Holder autom(h); - - if (autom.bad) { + if (num_vertices(h) > ANCHORED_NFA_STATE_LIMIT) { DEBUG_PRINTF("autom bad!\n"); return ANCHORED_FAIL; } + Automaton_Holder autom(h); + unique_ptr out_dfa = ue2::make_unique(NFA_OUTFIX); if (!determinise(autom, out_dfa->states, MAX_DFA_STATES)) { return finalise_out(tbi, h, autom, move(out_dfa), remap); @@ -738,7 +734,6 @@ void buildSimpleDfas(const RoseBuildImpl &tbi, NGHolder h; populate_holder(simple.first, exit_ids, &h); Automaton_Holder autom(h); - assert(!autom.bad); unique_ptr rdfa = ue2::make_unique(NFA_OUTFIX); UNUSED int rv = determinise(autom, rdfa->states, MAX_DFA_STATES); assert(!rv); diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index bbc8644e..e17953aa 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -2687,12 +2687,6 @@ void fillInReportInfo(RoseEngine *engine, u32 reportOffset, sizeof(internal_report)); } -static -void populateInvDkeyTable(char *ptr, const ReportManager &rm) { - vector table = rm.getDkeyToReportTable(); - memcpy(ptr, table.data(), byte_length(table)); -} - static bool hasSimpleReports(const vector &reports) { auto it = find_if(reports.begin(), reports.end(), isComplexReport); @@ -4154,7 +4148,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->ekeyCount = rm.numEkeys(); engine->dkeyCount = rm.numDkeys(); engine->invDkeyOffset = dkeyOffset; - populateInvDkeyTable(ptr + dkeyOffset, rm); + copy_bytes(ptr + dkeyOffset, rm.getDkeyToReportTable()); engine->somHorizon = ssm.somPrecision(); engine->somLocationCount = ssm.numSomSlots(); @@ -4314,33 +4308,22 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { buildLitBenefits(*this, engine.get(), base_lits_benefits_offset); // Copy in other tables - memcpy(ptr + bc.engine_blob_base, bc.engine_blob.data(), - byte_length(bc.engine_blob)); - - memcpy(ptr + engine->literalOffset, literalTable.data(), - byte_length(literalTable)); - memcpy(ptr + engine->roleOffset, bc.roleTable.data(), - byte_length(bc.roleTable)); - copy(leftInfoTable.begin(), leftInfoTable.end(), - (LeftNfaInfo *)(ptr + engine->leftOffset)); + copy_bytes(ptr + bc.engine_blob_base, bc.engine_blob); + copy_bytes(ptr + engine->literalOffset, literalTable); + copy_bytes(ptr + engine->roleOffset, bc.roleTable); + copy_bytes(ptr + engine->leftOffset, leftInfoTable); fillLookaroundTables(ptr + lookaroundTableOffset, ptr + lookaroundReachOffset, bc.lookaround); fillInSomRevNfas(engine.get(), ssm, rev_nfa_table_offset, rev_nfa_offsets); - memcpy(ptr + engine->predOffset, predTable.data(), byte_length(predTable)); - memcpy(ptr + engine->rootRoleOffset, rootRoleTable.data(), - byte_length(rootRoleTable)); - memcpy(ptr + engine->anchoredReportMapOffset, art.data(), byte_length(art)); - memcpy(ptr + engine->anchoredReportInverseMapOffset, arit.data(), - byte_length(arit)); - memcpy(ptr + engine->multidirectOffset, mdr_reports.data(), - byte_length(mdr_reports)); - - copy(activeLeftIter.begin(), activeLeftIter.end(), - (mmbit_sparse_iter *)(ptr + engine->activeLeftIterOffset)); - - memcpy(ptr + engine->sideOffset, sideTable.data(), byte_length(sideTable)); + copy_bytes(ptr + engine->predOffset, predTable); + copy_bytes(ptr + engine->rootRoleOffset, rootRoleTable); + copy_bytes(ptr + engine->anchoredReportMapOffset, art); + copy_bytes(ptr + engine->anchoredReportInverseMapOffset, arit); + copy_bytes(ptr + engine->multidirectOffset, mdr_reports); + copy_bytes(ptr + engine->activeLeftIterOffset, activeLeftIter); + copy_bytes(ptr + engine->sideOffset, sideTable); DEBUG_PRINTF("rose done %p\n", engine.get()); return engine; diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index 34e76269..a2bd971e 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -1631,20 +1631,23 @@ bool triggerKillsRoseGraph(const RoseBuildImpl &tbi, const left_id &left, assert(left.graph()); const NGHolder &h = *left.graph(); + ue2::flat_set all_states; + insert(&all_states, vertices(h)); + assert(out_degree(h.startDs, h) == 1); /* triggered don't use sds */ + DEBUG_PRINTF("removing sds\n"); + all_states.erase(h.startDs); + + ue2::flat_set states; + /* check each pred literal to see if they all kill previous graph * state */ for (u32 lit_id : tbi.g[source(e, tbi.g)].literals) { const rose_literal_id &pred_lit = tbi.literals.right.at(lit_id); const ue2_literal s = findNonOverlappingTail(all_lits, pred_lit.s); - set states; - insert(&states, vertices(h)); - assert(out_degree(h.startDs, h) == 1); /* triggered don't use sds */ - DEBUG_PRINTF("removing sds\n"); - states.erase(h.startDs); DEBUG_PRINTF("running graph %zu\n", states.size()); - execute_graph(h, s, &states, true); - DEBUG_PRINTF("ran\n"); + states = execute_graph(h, s, all_states, true); + DEBUG_PRINTF("ran, %zu states on\n", states.size()); if (!states.empty()) { return false; diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index 26a7c606..3112d639 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -130,6 +130,8 @@ private: friend depth findMinWidth(const suffix_id &s); friend depth findMaxWidth(const suffix_id &s); + friend depth findMinWidth(const suffix_id &s, u32 top); + friend depth findMaxWidth(const suffix_id &s, u32 top); }; std::set all_reports(const suffix_id &s); @@ -138,6 +140,8 @@ bool has_eod_accepts(const suffix_id &s); bool has_non_eod_accepts(const suffix_id &s); depth findMinWidth(const suffix_id &s); depth findMaxWidth(const suffix_id &s); +depth findMinWidth(const suffix_id &s, u32 top); +depth findMaxWidth(const suffix_id &s, u32 top); size_t hash_value(const suffix_id &s); /** \brief represents an engine to the left of a rose role */ diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index 9f309985..cc5bbc70 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -77,6 +77,8 @@ RoseBuildImpl::RoseBuildImpl(ReportManager &rm_in, SomSlotManager &ssm_in, hasSom(false), group_weak_end(0), group_end(0), + anchored_base_id(MO_INVALID_IDX), + nonbenefits_base_id(MO_INVALID_IDX), ematcher_region_size(0), floating_direct_report(false), eod_event_literal_id(MO_INVALID_IDX), @@ -536,7 +538,7 @@ u32 RoseBuildImpl::getNewLiteralId() { } static -bool requiresDedupe(const NGHolder &h, const set &reports, +bool requiresDedupe(const NGHolder &h, const ue2::flat_set &reports, const Grey &grey) { /* TODO: tighten */ NFAVertex seen_vert = NFAGraph::null_vertex(); @@ -579,13 +581,14 @@ bool requiresDedupe(const NGHolder &h, const set &reports, class RoseDedupeAuxImpl : public RoseDedupeAux { public: explicit RoseDedupeAuxImpl(const RoseBuildImpl &tbi_in); - bool requiresDedupeSupport(const set &reports) const override; + bool requiresDedupeSupport( + const ue2::flat_set &reports) const override; const RoseBuildImpl &tbi; - map > vert_map; - map > suffix_map; - map > outfix_map; - map > puff_map; + map> vert_map; + map> suffix_map; + map> outfix_map; + map> puff_map; }; unique_ptr RoseBuildImpl::generateDedupeAux() const { @@ -599,6 +602,8 @@ RoseDedupeAuxImpl::RoseDedupeAuxImpl(const RoseBuildImpl &tbi_in) : tbi(tbi_in) { const RoseGraph &g = tbi.g; + set suffixes; + for (auto v : vertices_range(g)) { // Literals in the small block table don't count as dupes: although // they have copies in the anchored table, the two are never run in the @@ -609,10 +614,16 @@ RoseDedupeAuxImpl::RoseDedupeAuxImpl(const RoseBuildImpl &tbi_in) } } + // Several vertices may share a suffix, so we collect the set of + // suffixes first to avoid repeating work. if (g[v].suffix) { - for (const auto &report_id : all_reports(g[v].suffix)) { - suffix_map[report_id].insert(g[v].suffix); - } + suffixes.insert(g[v].suffix); + } + } + + for (const auto &suffix : suffixes) { + for (const auto &report_id : all_reports(suffix)) { + suffix_map[report_id].insert(suffix); } } @@ -634,8 +645,8 @@ RoseDedupeAuxImpl::RoseDedupeAuxImpl(const RoseBuildImpl &tbi_in) } } -bool RoseDedupeAuxImpl::requiresDedupeSupport(const set &reports) - const { +bool RoseDedupeAuxImpl::requiresDedupeSupport( + const ue2::flat_set &reports) const { /* TODO: this could be expanded to check for offset or character constraints */ @@ -897,6 +908,17 @@ depth findMinWidth(const suffix_id &s) { } } +depth findMinWidth(const suffix_id &s, u32 top) { + assert(s.graph() || s.castle() || s.haig() || s.dfa()); + if (s.graph()) { + return findMinWidth(*s.graph(), top); + } else if (s.castle()) { + return findMinWidth(*s.castle(), top); + } else { + return s.dfa_min_width; + } +} + depth findMaxWidth(const suffix_id &s) { assert(s.graph() || s.castle() || s.haig() || s.dfa()); if (s.graph()) { @@ -908,6 +930,17 @@ depth findMaxWidth(const suffix_id &s) { } } +depth findMaxWidth(const suffix_id &s, u32 top) { + assert(s.graph() || s.castle() || s.haig() || s.dfa()); + if (s.graph()) { + return findMaxWidth(*s.graph(), top); + } else if (s.castle()) { + return findMaxWidth(*s.castle(), top); + } else { + return s.dfa_max_width; + } +} + bool has_eod_accepts(const suffix_id &s) { assert(s.graph() || s.castle() || s.haig() || s.dfa()); if (s.graph()) { diff --git a/src/rose/rose_build_role_aliasing.cpp b/src/rose/rose_build_role_aliasing.cpp index 87b6936c..88deaa25 100644 --- a/src/rose/rose_build_role_aliasing.cpp +++ b/src/rose/rose_build_role_aliasing.cpp @@ -439,12 +439,16 @@ size_t hashRightRoleProperties(RoseVertex v, const RoseGraph &g) { hash_combine(val, hash_range(begin(props.reports), end(props.reports))); if (props.suffix) { - hash_combine(val, all_reports(props.suffix)); - if (props.suffix.graph) { - hash_combine(val, num_vertices(*props.suffix.graph)); + const auto &suffix = props.suffix; + if (suffix.castle) { + hash_combine(val, suffix.castle->reach()); + hash_combine(val, suffix.castle->repeats.size()); } - if (props.suffix.haig) { - hash_combine(val, hash_dfa(*props.suffix.haig)); + if (suffix.graph) { + hash_combine(val, num_vertices(*suffix.graph)); + } + if (suffix.haig) { + hash_combine(val, hash_dfa(*suffix.haig)); } } @@ -747,14 +751,17 @@ void pruneReportIfUnused(const RoseBuildImpl &tbi, shared_ptr h, * Castle. */ static void pruneCastle(CastleProto &castle, ReportID report) { - for (map::iterator it = castle.repeats.begin(); - it != castle.repeats.end(); /* incr inside */) { - if (contains(it->second.reports, report)) { - ++it; - } else { - castle.repeats.erase(it++); + unordered_set dead; // tops to remove. + for (const auto &m : castle.repeats) { + if (!contains(m.second.reports, report)) { + dead.insert(m.first); } } + + for (const auto &top : dead) { + castle.erase(top); + } + assert(!castle.repeats.empty()); } @@ -794,7 +801,7 @@ void pruneUnusedTops(CastleProto &castle, const RoseGraph &g, for (u32 top : assoc_keys(castle.repeats)) { if (!contains(used_tops, top)) { DEBUG_PRINTF("removing unused top %u\n", top); - castle.repeats.erase(top); + castle.erase(top); } } } diff --git a/src/rose/rose_build_width.cpp b/src/rose/rose_build_width.cpp index 2a7f2bd6..6bfcee48 100644 --- a/src/rose/rose_build_width.cpp +++ b/src/rose/rose_build_width.cpp @@ -94,10 +94,11 @@ u32 findMinWidth(const RoseBuildImpl &tbi, enum rose_literal_table table) { } if (g[v].suffix) { - depth suffix_width = findMinWidth(g[v].suffix); + depth suffix_width = findMinWidth(g[v].suffix, g[v].suffix.top); assert(suffix_width.is_reachable()); - DEBUG_PRINTF("%zu has suffix (width %s), can fire report at %u\n", - g[v].idx, suffix_width.str().c_str(), + DEBUG_PRINTF("%zu has suffix with top %u (width %s), can fire " + "report at %u\n", + g[v].idx, g[v].suffix.top, suffix_width.str().c_str(), w + suffix_width); minWidth = min(minWidth, w + suffix_width); } @@ -146,8 +147,9 @@ u32 findMaxBAWidth(const RoseBuildImpl &tbi) { if (has_non_eod_accepts(g[v].suffix)) { return ROSE_BOUND_INF; } - depth suffix_width = findMaxWidth(g[v].suffix); - DEBUG_PRINTF("suffix max width %s\n", suffix_width.str().c_str()); + depth suffix_width = findMaxWidth(g[v].suffix, g[v].suffix.top); + DEBUG_PRINTF("suffix max width for top %u is %s\n", g[v].suffix.top, + suffix_width.str().c_str()); assert(suffix_width.is_reachable()); if (!suffix_width.is_finite()) { DEBUG_PRINTF("suffix too wide\n"); diff --git a/src/rose/stream.c b/src/rose/stream.c index aab79a29..b100eeae 100644 --- a/src/rose/stream.c +++ b/src/rose/stream.c @@ -167,7 +167,7 @@ found_miracle: DEBUG_PRINTF("skip q forward, %lld to %lld\n", begin_loc, miracle_loc); q_skip_forward_to(q, miracle_loc); - if (q->items[q->end - 1].type == MQE_START) { + if (q_last_type(q) == MQE_START) { DEBUG_PRINTF("miracle caused infix to die\n"); return MIRACLE_DEAD; } diff --git a/src/util/compare.h b/src/util/compare.h index b7237ec2..11c01f08 100644 --- a/src/util/compare.h +++ b/src/util/compare.h @@ -98,18 +98,22 @@ u64a theirtoupper64(const u64a x) { static really_inline int cmpNocaseNaive(const u8 *p1, const u8 *p2, size_t len) { const u8 *pEnd = (const u8 *)p1 + len; - for (; p1 < pEnd; p1++, p2++) - if (mytolower(*p1) != mytolower(*p2)) + for (; p1 < pEnd; p1++, p2++) { + if (mytolower(*p1) != mytolower(*p2)) { return 1; + } + } return 0; } static really_inline int cmpCaseNaive(const u8 *p1, const u8 *p2, size_t len) { const u8 *pEnd = (const u8 *)p1 + len; - for (; p1 < pEnd; p1++, p2++) - if (*p1 != *p2) + for (; p1 < pEnd; p1++, p2++) { + if (*p1 != *p2) { return 1; + } + } return 0; } diff --git a/src/util/container.h b/src/util/container.h index b4a10c89..62e841c1 100644 --- a/src/util/container.h +++ b/src/util/container.h @@ -33,8 +33,13 @@ #ifndef UTIL_CONTAINER_H #define UTIL_CONTAINER_H +#include "ue2common.h" + #include +#include +#include #include +#include #include namespace ue2 { @@ -92,11 +97,35 @@ std::set assoc_keys(const C &container) { return keys; } +/** + * \brief Return the length in bytes of the given vector of (POD) objects. + */ template typename std::vector::size_type byte_length(const std::vector &vec) { + static_assert(std::is_pod::value, "should be pod"); return vec.size() * sizeof(T); } +/** + * \brief Copy the given vector of POD objects to the given location in memory. + * It is safe to give this function an empty vector. + */ +template +void *copy_bytes(void *dest, const std::vector &vec) { + static_assert(std::is_pod::value, "should be pod"); + assert(dest); + + // Since we're generally using this function to write into the bytecode, + // dest should be appropriately aligned for T. + assert(ISALIGNED_N(dest, alignof(T))); + + if (vec.empty()) { + return dest; // Protect memcpy against null pointers. + } + assert(vec.data() != nullptr); + return std::memcpy(dest, vec.data(), byte_length(vec)); +} + template bool is_subset_of(const OrderedContainer1 &small, const OrderedContainer2 &big) { static_assert(std::is_same= val_infinity) { - DEBUG_PRINTF("depth %llu too large to represent!\n", rv); + DEBUG_PRINTF("depth %lld too large to represent!\n", rv); throw DepthOverflowError(); } diff --git a/src/util/multibit.c b/src/util/multibit.c index ca5e5656..c22b73ff 100644 --- a/src/util/multibit.c +++ b/src/util/multibit.c @@ -142,23 +142,25 @@ const u32 mmbit_root_offset_from_level[7] = { u32 mmbit_size(u32 total_bits) { MDEBUG_PRINTF("%u\n", total_bits); - // UE-2228: multibit has bugs in very, very large cases that we should be - // protected against at compile time by resource limits. - assert(total_bits <= 1U << 30); - // Flat model multibit structures are just stored as a bit vector. if (total_bits <= MMB_FLAT_MAX_BITS) { return ROUNDUP_N(total_bits, 8) / 8; } - u32 current_level = 1; - u32 total = 0; + u64a current_level = 1; // Number of blocks on current level. + u64a total = 0; // Total number of blocks. while (current_level * MMB_KEY_BITS < total_bits) { total += current_level; current_level <<= MMB_KEY_SHIFT; } - total += (total_bits + MMB_KEY_BITS - 1)/MMB_KEY_BITS; - return sizeof(MMB_TYPE) * total; + + // Last level is a one-for-one bit vector. It needs room for total_bits + // elements, rounded up to the nearest block. + u64a last_level = ((u64a)total_bits + MMB_KEY_BITS - 1) / MMB_KEY_BITS; + total += last_level; + + assert(total * sizeof(MMB_TYPE) <= UINT32_MAX); + return (u32)(total * sizeof(MMB_TYPE)); } #ifdef DUMP_SUPPORT diff --git a/src/util/multibit.h b/src/util/multibit.h index 251316b3..771c158d 100644 --- a/src/util/multibit.h +++ b/src/util/multibit.h @@ -235,18 +235,18 @@ const u8 *mmbit_get_level_root_const(const u8 *bits, u32 level) { /** \brief get the block for this key on the current level as a u8 ptr */ static really_inline u8 *mmbit_get_block_ptr(u8 *bits, u32 max_level, u32 level, u32 key) { - return mmbit_get_level_root(bits, level) + - (key >> (mmbit_get_ks(max_level, level) + MMB_KEY_SHIFT)) * - sizeof(MMB_TYPE); + u8 *level_root = mmbit_get_level_root(bits, level); + u32 ks = mmbit_get_ks(max_level, level); + return level_root + ((u64a)key >> (ks + MMB_KEY_SHIFT)) * sizeof(MMB_TYPE); } /** \brief get the block for this key on the current level as a const u8 ptr */ static really_inline const u8 *mmbit_get_block_ptr_const(const u8 *bits, u32 max_level, u32 level, u32 key) { - return mmbit_get_level_root_const(bits, level) + - (key >> (mmbit_get_ks(max_level, level) + MMB_KEY_SHIFT)) * - sizeof(MMB_TYPE); + const u8 *level_root = mmbit_get_level_root_const(bits, level); + u32 ks = mmbit_get_ks(max_level, level); + return level_root + ((u64a)key >> (ks + MMB_KEY_SHIFT)) * sizeof(MMB_TYPE); } /** \brief get the _byte_ for this key on the current level as a u8 ptr */ @@ -254,7 +254,7 @@ static really_inline u8 *mmbit_get_byte_ptr(u8 *bits, u32 max_level, u32 level, u32 key) { u8 *level_root = mmbit_get_level_root(bits, level); u32 ks = mmbit_get_ks(max_level, level); - return level_root + (key >> (ks + MMB_KEY_SHIFT - 3)); + return level_root + ((u64a)key >> (ks + MMB_KEY_SHIFT - 3)); } /** \brief get our key value for the current level */ @@ -721,11 +721,11 @@ u32 mmbit_iterate_bounded_flat(const u8 *bits, u32 total_bits, u32 begin, } static really_inline -MMB_TYPE get_lowhi_masks(u32 level, u32 max_level, u32 block_min, u32 block_max, - u32 block_base) { +MMB_TYPE get_lowhi_masks(u32 level, u32 max_level, u64a block_min, u64a block_max, + u64a block_base) { const u32 level_shift = (max_level - level) * MMB_KEY_SHIFT; - u32 lshift = (block_min - block_base) >> level_shift; - u32 ushift = (block_max - block_base) >> level_shift; + u64a lshift = (block_min - block_base) >> level_shift; + u64a ushift = (block_max - block_base) >> level_shift; MMB_TYPE lmask = lshift < 64 ? ~mmb_mask_zero_to_nocheck(lshift) : 0; MMB_TYPE umask = ushift < 63 ? mmb_mask_zero_to_nocheck(ushift + 1) : MMB_ALL_ONES; @@ -734,7 +734,7 @@ MMB_TYPE get_lowhi_masks(u32 level, u32 max_level, u32 block_min, u32 block_max, static really_inline u32 mmbit_iterate_bounded_big(const u8 *bits, u32 total_bits, u32 it_start, u32 it_end) { - u32 key = 0; + u64a key = 0; u32 ks = mmbit_keyshift(total_bits); const u32 max_level = mmbit_maxlevel_from_keyshift(ks); u32 level = 0; @@ -743,9 +743,9 @@ u32 mmbit_iterate_bounded_big(const u8 *bits, u32 total_bits, u32 it_start, u32 assert(level <= max_level); u32 block_width = MMB_KEY_BITS << ks; - u32 block_base = key*block_width; - u32 block_min = MAX(it_start, block_base); - u32 block_max = MIN(it_end, block_base + block_width - 1); + u64a block_base = key * block_width; + u64a block_min = MAX(it_start, block_base); + u64a block_max = MIN(it_end, block_base + block_width - 1); const u8 *block_ptr = mmbit_get_level_root_const(bits, level) + key * sizeof(MMB_TYPE); MMB_TYPE block = mmb_load(block_ptr); @@ -761,13 +761,14 @@ u32 mmbit_iterate_bounded_big(const u8 *bits, u32 total_bits, u32 it_start, u32 // No bit found, go up a level // we know that this block didn't have any answers, so we can push // our start iterator forward. - it_start = block_base + block_width; - if (it_start > it_end) { + u64a next_start = block_base + block_width; + if (next_start > it_end) { break; } if (level-- == 0) { break; } + it_start = next_start; key >>= MMB_KEY_SHIFT; ks += MMB_KEY_SHIFT; } diff --git a/src/util/report_manager.cpp b/src/util/report_manager.cpp index eacef005..425f166d 100644 --- a/src/util/report_manager.cpp +++ b/src/util/report_manager.cpp @@ -128,11 +128,9 @@ vector ReportManager::getDkeyToReportTable() const { } void ReportManager::assignDkeys(const RoseBuild *rose) { - unique_ptr dedupe = rose->generateDedupeAux(); - DEBUG_PRINTF("assigning...\n"); - map> ext_to_int; + map> ext_to_int; for (u32 i = 0; i < reportIds.size(); i++) { const Report &ir = reportIds[i]; @@ -143,6 +141,8 @@ void ReportManager::assignDkeys(const RoseBuild *rose) { } } + auto dedupe = rose->generateDedupeAux(); + for (const auto &m : ext_to_int) { u32 ext = m.first; diff --git a/unit/CMakeLists.txt b/unit/CMakeLists.txt index 9bc74e23..a8925a3c 100644 --- a/unit/CMakeLists.txt +++ b/unit/CMakeLists.txt @@ -7,7 +7,8 @@ if(NOT XCODE) else() set(CMAKE_CXX_FLAGS "-isystem ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CXX_FLAGS}") endif() -include_directories(${CMAKE_SOURCE_DIR}/util) + +include_directories(${PROJECT_SOURCE_DIR}) # remove some warnings # cmake's scope means these only apply here @@ -26,7 +27,7 @@ endif() add_library(gtest ${gtest_SOURCES}) -add_definitions(-DGTEST_HAS_PTHREAD=0 -DSRCDIR=${CMAKE_SOURCE_DIR}) +add_definitions(-DGTEST_HAS_PTHREAD=0 -DSRCDIR=${PROJECT_SOURCE_DIR}) if (NOT RELEASE_BUILD) set(unit_internal_SOURCES diff --git a/unit/hyperscan/bad_patterns.txt b/unit/hyperscan/bad_patterns.txt index 1ad445b3..fb2a2357 100644 --- a/unit/hyperscan/bad_patterns.txt +++ b/unit/hyperscan/bad_patterns.txt @@ -85,7 +85,6 @@ 84:/[=\]=]/ #Unsupported POSIX collating element at index 0. 85:/A(?!)+Z/ #Invalid repeat at index 5. 86:/\X/ #\X unsupported at index 0. -87:/[a\Qz\E]/ #\Q..\E sequences in character classes not supported at index 2. 88:/[A-\d]/ #Invalid range in character class at index 3. 89:/[A-[:digit:]]/ #Invalid range in character class at index 3. 90:/B[--[:digit:]--]+/ #Invalid range in character class at index 4. @@ -128,3 +127,8 @@ 128:/(*UTF8)^fo?ob{ro|nax_off\Qt=10omnax+8Wnah/ñññññññññññññññññññññññññññ0}l.{1,60}Car*k|npanomnax+8Wnah/ #Expression is not valid UTF-8. 129:/bignum \1111111111111111111/ #Number is too big at index 7. 130:/foo|&{5555555,}/ #Bounded repeat is too large. +131:/[a[..]]/ #Unsupported POSIX collating element at index 2. +132:/[a[==]]/ #Unsupported POSIX collating element at index 2. +133:/[a[.\].]]/ #Unsupported POSIX collating element at index 2. +134:/[a[=\]=]]/ #Unsupported POSIX collating element at index 2. +135:/[^\D\d]/8W #Pattern can never match. diff --git a/unit/internal/multi_bit.cpp b/unit/internal/multi_bit.cpp index 27725219..3f5c5908 100644 --- a/unit/internal/multi_bit.cpp +++ b/unit/internal/multi_bit.cpp @@ -363,7 +363,9 @@ TEST_P(MultiBitTest, BoundedIteratorSingle) { ASSERT_TRUE(ba != nullptr); // Set one bit on and run some checks. - for (u32 i = 0; i < test_size; i += stride) { + for (u64a i = 0; i < test_size; i += stride) { + SCOPED_TRACE(i); + mmbit_clear(ba, test_size); mmbit_set(ba, test_size, i); @@ -381,7 +383,12 @@ TEST_P(MultiBitTest, BoundedIteratorSingle) { // Scanning from one past our bit to the end should find nothing. if (i != test_size - 1) { - ASSERT_EQ(MMB_INVALID, mmbit_iterate_bounded(ba, test_size, i + 1, test_size)); + // Ordinary iterator. + ASSERT_EQ(MMB_INVALID, mmbit_iterate(ba, test_size, i)); + + // Bounded iterator. + ASSERT_EQ(MMB_INVALID, + mmbit_iterate_bounded(ba, test_size, i + 1, test_size)); } } } @@ -393,7 +400,7 @@ TEST_P(MultiBitTest, BoundedIteratorAll) { // Switch everything on. fill_mmbit(ba, test_size); - for (u32 i = 0; i < test_size; i += stride) { + for (u64a i = 0; i < test_size; i += stride) { if (i != 0) { ASSERT_EQ(0U, mmbit_iterate_bounded(ba, test_size, 0, i)); } @@ -408,13 +415,13 @@ TEST_P(MultiBitTest, BoundedIteratorEven) { // Set every even-numbered bit and see what we can see. mmbit_clear(ba, test_size); - for (u32 i = 0; i < test_size; i += 2) { + for (u64a i = 0; i < test_size; i += 2) { mmbit_set(ba, test_size, i); } u32 even_stride = stride % 2 ? stride + 1 : stride; - for (u32 i = 0; i < test_size; i += even_stride) { + for (u64a i = 0; i < test_size; i += even_stride) { // Scanning from each even bit to the end should find itself. ASSERT_EQ(i, mmbit_iterate_bounded(ba, test_size, i, test_size)); @@ -439,13 +446,13 @@ TEST_P(MultiBitTest, BoundedIteratorOdd) { // Set every odd-numbered bit and see what we can see. mmbit_clear(ba, test_size); - for (u32 i = 1; i < test_size; i += 2) { + for (u64a i = 1; i < test_size; i += 2) { mmbit_set(ba, test_size, i); } u32 even_stride = stride % 2 ? stride + 1 : stride; - for (u32 i = 0; i < test_size; i += even_stride) { + for (u64a i = 0; i < test_size; i += even_stride) { // Scanning from each even bit to the end should find i+1. if (i+1 < test_size) { ASSERT_EQ(i+1, mmbit_iterate_bounded(ba, test_size, i, test_size)); @@ -473,7 +480,7 @@ TEST_P(MultiBitTest, Set) { mmbit_clear(ba, test_size); ASSERT_FALSE(mmbit_any(ba, test_size)); - for (u32 i = 0; i < test_size; i += stride) { + for (u64a i = 0; i < test_size; i += stride) { SCOPED_TRACE(i); // set a bit that wasn't set before @@ -500,7 +507,7 @@ TEST_P(MultiBitTest, Iter) { mmbit_clear(ba, test_size); ASSERT_EQ(MMB_INVALID, mmbit_iterate(ba, test_size, MMB_INVALID)); - for (u32 i = 0; i < test_size; i += stride) { + for (u64a i = 0; i < test_size; i += stride) { SCOPED_TRACE(i); mmbit_clear(ba, test_size); mmbit_set(ba, test_size, i); @@ -517,13 +524,13 @@ TEST_P(MultiBitTest, IterAll) { ASSERT_EQ(MMB_INVALID, mmbit_iterate(ba, test_size, MMB_INVALID)); // Set all bits. - for (u32 i = 0; i < test_size; i += stride) { + for (u64a i = 0; i < test_size; i += stride) { mmbit_set(ba, test_size, i); } // Find all bits. u32 it = MMB_INVALID; - for (u32 i = 0; i < test_size; i += stride) { + for (u64a i = 0; i < test_size; i += stride) { ASSERT_EQ(i, mmbit_iterate(ba, test_size, it)); it = i; } @@ -536,7 +543,7 @@ TEST_P(MultiBitTest, AnyPrecise) { mmbit_clear(ba, test_size); ASSERT_FALSE(mmbit_any_precise(ba, test_size)); - for (u32 i = 0; i < test_size; i += stride) { + for (u64a i = 0; i < test_size; i += stride) { SCOPED_TRACE(i); mmbit_clear(ba, test_size); mmbit_set(ba, test_size, i); @@ -551,7 +558,7 @@ TEST_P(MultiBitTest, Any) { mmbit_clear(ba, test_size); ASSERT_FALSE(mmbit_any(ba, test_size)); - for (u32 i = 0; i < test_size; i += stride) { + for (u64a i = 0; i < test_size; i += stride) { SCOPED_TRACE(i); mmbit_clear(ba, test_size); mmbit_set(ba, test_size, i); @@ -567,7 +574,7 @@ TEST_P(MultiBitTest, UnsetRange1) { fill_mmbit(ba, test_size); // Use mmbit_unset_range to switch off any single bit. - for (u32 i = 0; i < test_size; i += stride) { + for (u64a i = 0; i < test_size; i += stride) { SCOPED_TRACE(i); ASSERT_TRUE(mmbit_isset(ba, test_size, i)); mmbit_unset_range(ba, test_size, i, i + 1); @@ -590,7 +597,7 @@ TEST_P(MultiBitTest, UnsetRange2) { // Use mmbit_unset_range to switch off all bits. mmbit_unset_range(ba, test_size, 0, test_size); - for (u32 i = 0; i < test_size; i += stride) { + for (u64a i = 0; i < test_size; i += stride) { SCOPED_TRACE(i); ASSERT_FALSE(mmbit_isset(ba, test_size, i)); } @@ -601,12 +608,12 @@ TEST_P(MultiBitTest, UnsetRange3) { ASSERT_TRUE(ba != nullptr); // Use mmbit_unset_range to switch off bits in chunks of 3. - for (u32 i = 0; i < test_size - 3; i += stride) { + for (u64a i = 0; i < test_size - 3; i += stride) { // Switch on the bit before, the bits in question, and the bit after. if (i > 0) { mmbit_set(ba, test_size, i - 1); } - for (u32 j = i; j < min(i + 4, test_size); j++) { + for (u64a j = i; j < min(i + 4, (u64a)test_size); j++) { mmbit_set(ba, test_size, j); } @@ -635,7 +642,7 @@ TEST_P(MultiBitTest, InitRangeAll) { mmbit_init_range(ba, test_size, 0, test_size); // Make sure they're all set. - for (u32 i = 0; i < test_size; i += stride) { + for (u64a i = 0; i < test_size; i += stride) { SCOPED_TRACE(i); ASSERT_TRUE(mmbit_isset(ba, test_size, i)); } @@ -656,7 +663,7 @@ TEST_P(MultiBitTest, InitRangeOne) { SCOPED_TRACE(test_size); ASSERT_TRUE(ba != nullptr); - for (u32 i = 0; i < test_size; i += stride) { + for (u64a i = 0; i < test_size; i += stride) { mmbit_init_range(ba, test_size, i, i + 1); // Only bit 'i' should be on. @@ -685,7 +692,7 @@ TEST_P(MultiBitTest, InitRangeChunked) { ASSERT_EQ(chunk_begin, mmbit_iterate(ba, test_size, MMB_INVALID)); // All bits in the chunk should be on. - for (u32 i = chunk_begin; i < chunk_end; i += stride) { + for (u64a i = chunk_begin; i < chunk_end; i += stride) { SCOPED_TRACE(i); ASSERT_TRUE(mmbit_isset(ba, test_size, i)); } @@ -985,7 +992,7 @@ TEST_P(MultiBitTest, SparseIteratorBeginAll) { vector it; vector bits; bits.reserve(test_size / stride); - for (u32 i = 0; i < test_size; i += stride) { + for (u64a i = 0; i < test_size; i += stride) { bits.push_back(i); } mmbBuildSparseIterator(it, bits, test_size); @@ -1032,7 +1039,7 @@ TEST_P(MultiBitTest, SparseIteratorBeginThirds) { // Switch every third bits on in state mmbit_clear(ba, test_size); ASSERT_FALSE(mmbit_any(ba, test_size)); - for (u32 i = 0; i < test_size; i += 3) { + for (u64a i = 0; i < test_size; i += 3) { mmbit_set(ba, test_size, i); } @@ -1044,7 +1051,7 @@ TEST_P(MultiBitTest, SparseIteratorBeginThirds) { ASSERT_EQ(0U, val); ASSERT_EQ(0U, idx); - for (u32 i = 0; i < test_size - 3; i += 3) { + for (u64a i = 0; i < test_size - 3; i += 3) { mmbit_unset(ba, test_size, i); val = mmbit_sparse_iter_begin(ba, test_size, &idx, &it[0], &state[0]); ASSERT_EQ(i+3, val); @@ -1060,7 +1067,7 @@ TEST_P(MultiBitTest, SparseIteratorNextAll) { vector it; vector bits; bits.reserve(test_size / stride); - for (u32 i = 0; i < test_size; i += stride) { + for (u64a i = 0; i < test_size; i += stride) { bits.push_back(i); } mmbBuildSparseIterator(it, bits, test_size); @@ -1103,7 +1110,7 @@ TEST_P(MultiBitTest, SparseIteratorNextExactStrided) { vector it; vector bits; bits.reserve(test_size / stride); - for (u32 i = 0; i < test_size; i += stride) { + for (u64a i = 0; i < test_size; i += stride) { bits.push_back(i); mmbit_set(ba, test_size, i); } @@ -1135,7 +1142,7 @@ TEST_P(MultiBitTest, SparseIteratorNextNone) { vector it; vector bits; bits.reserve(test_size / stride); - for (u32 i = 0; i < test_size; i += stride) { + for (u64a i = 0; i < test_size; i += stride) { bits.push_back(i); } mmbBuildSparseIterator(it, bits, test_size); @@ -1164,7 +1171,7 @@ TEST_P(MultiBitTest, SparseIteratorUnsetAll) { vector it; vector bits; bits.reserve(test_size / stride); - for (u32 i = 0; i < test_size; i += stride) { + for (u64a i = 0; i < test_size; i += stride) { bits.push_back(i); } mmbBuildSparseIterator(it, bits, test_size); @@ -1194,10 +1201,10 @@ TEST_P(MultiBitTest, SparseIteratorUnsetHalves) { // Two sparse iterators: one for even bits, one for odd ones vector even, odd; - for (u32 i = 0; i < test_size; i += 2) { + for (u64a i = 0; i < test_size; i += 2) { even.push_back(i); } - for (u32 i = 1; i < test_size; i += 2) { + for (u64a i = 1; i < test_size; i += 2) { odd.push_back(i); } @@ -1277,9 +1284,9 @@ static const MultiBitTestParam multibitTests[] = { { 1U << 28, 15073 }, { 1U << 29, 24413 }, { 1U << 30, 50377 }, + { 1U << 31, 104729 }, - // XXX: cases this large segfault in mmbit_set, FIXME NOW - //{ 1U << 31, 3701 }, + // { UINT32_MAX, 104729 }, // Very slow }; INSTANTIATE_TEST_CASE_P(MultiBit, MultiBitTest, ValuesIn(multibitTests)); diff --git a/unit/internal/nfagraph_find_matches.cpp b/unit/internal/nfagraph_find_matches.cpp index 41d28c52..553d6dc5 100644 --- a/unit/internal/nfagraph_find_matches.cpp +++ b/unit/internal/nfagraph_find_matches.cpp @@ -36,9 +36,9 @@ #include "nfagraph/ng_builder.h" #include "nfagraph/ng.h" #include "nfagraph/ng_asserts.h" -#include "util/target_info.h" #include "hs_compile.h" -#include "ng_find_matches.h" +#include "util/ng_find_matches.h" +#include "util/target_info.h" using namespace std; using namespace testing; diff --git a/unit/internal/repeat.cpp b/unit/internal/repeat.cpp index 95408f28..94f1bdc1 100644 --- a/unit/internal/repeat.cpp +++ b/unit/internal/repeat.cpp @@ -448,6 +448,25 @@ TEST_P(RepeatTest, Pack) { } } +TEST_P(RepeatTest, LargeGap) { + SCOPED_TRACE(testing::Message() << "Repeat: " << info); + + if (info.repeatMax == REPEAT_INF) { + return; // Test not valid for FIRST-type repeats. + } + + for (int i = 0; i < 64; i++) { + u64a top1 = 1000; + repeatStore(&info, ctrl, state, top1, 0); // first top + ASSERT_EQ(top1, repeatLastTop(&info, ctrl, state)); + + // Add a second top after a gap of 2^i bytes. + u64a top2 = top1 + (1ULL << i); + repeatStore(&info, ctrl, state, top2, 1); // second top + ASSERT_EQ(top2, repeatLastTop(&info, ctrl, state)); + } +} + static const u32 sparsePeriods[] = { 2, @@ -505,6 +524,7 @@ const RepeatTestInfo sparseRepeats[] = { { REPEAT_SPARSE_OPTIMAL_P, 4000, 4000 }, { REPEAT_SPARSE_OPTIMAL_P, 4500, 4500 }, { REPEAT_SPARSE_OPTIMAL_P, 5000, 5000 }, + { REPEAT_SPARSE_OPTIMAL_P, 65534, 65534 }, // {N, M} repeats { REPEAT_SPARSE_OPTIMAL_P, 10, 20 }, { REPEAT_SPARSE_OPTIMAL_P, 20, 40 }, @@ -528,7 +548,8 @@ const RepeatTestInfo sparseRepeats[] = { { REPEAT_SPARSE_OPTIMAL_P, 3500, 4000 }, { REPEAT_SPARSE_OPTIMAL_P, 4000, 8000 }, { REPEAT_SPARSE_OPTIMAL_P, 4500, 8000 }, - { REPEAT_SPARSE_OPTIMAL_P, 5000, 5001 } + { REPEAT_SPARSE_OPTIMAL_P, 5000, 5001 }, + { REPEAT_SPARSE_OPTIMAL_P, 60000, 65534 } }; static @@ -802,7 +823,7 @@ TEST_P(SparseOptimalTest, Simple1) { 1000 + info->repeatMax * 2)); ASSERT_EQ(0U, repeatNextMatch(info, ctrl, state, 1000 + info->repeatMax * 2 + 1)); - ASSERT_EQ(0U, repeatNextMatch(info, ctrl, state, 10000)); + ASSERT_EQ(0U, repeatNextMatch(info, ctrl, state, 100000)); } TEST_P(SparseOptimalTest, TwoTopsNeg) { @@ -893,6 +914,24 @@ TEST_P(SparseOptimalTest, Simple3e) { test_sparse3entryExpire(info, ctrl, state, 2 * info->minPeriod - 1); } +TEST_P(SparseOptimalTest, LargeGap) { + SCOPED_TRACE(testing::Message() << "Repeat: " << *info); + + for (int i = 0; i < 64; i++) { + u64a top1 = 1000; + repeatStore(info, ctrl, state, top1, 0); // first top + ASSERT_EQ(top1, repeatLastTop(info, ctrl, state)); + + // Add a second top after a gap of 2^i bytes. + u64a top2 = top1 + (1ULL << i); + if (top2 - top1 < info->minPeriod) { + continue; // not a valid top + } + repeatStore(info, ctrl, state, top2, 1); // second top + ASSERT_EQ(top2, repeatLastTop(info, ctrl, state)); + } +} + TEST_P(SparseOptimalTest, ThreeTops) { SCOPED_TRACE(testing::Message() << "Repeat: " << *info);