From b859e5cb8a0140198ca48c1bfef066940350a47d Mon Sep 17 00:00:00 2001 From: jason taylor Date: Sun, 4 Sep 2016 19:25:59 -0400 Subject: [PATCH 001/103] Updated nfa source file permissions Resolves issue github issue #37 --- src/nfa/accel_dfa_build_strat.cpp | 0 src/nfa/accel_dfa_build_strat.h | 0 src/nfa/dfa_build_strat.cpp | 0 3 files changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 src/nfa/accel_dfa_build_strat.cpp mode change 100755 => 100644 src/nfa/accel_dfa_build_strat.h mode change 100755 => 100644 src/nfa/dfa_build_strat.cpp diff --git a/src/nfa/accel_dfa_build_strat.cpp b/src/nfa/accel_dfa_build_strat.cpp old mode 100755 new mode 100644 diff --git a/src/nfa/accel_dfa_build_strat.h b/src/nfa/accel_dfa_build_strat.h old mode 100755 new mode 100644 diff --git a/src/nfa/dfa_build_strat.cpp b/src/nfa/dfa_build_strat.cpp old mode 100755 new mode 100644 From 8be6c8b2cac99dd06c0eeb1b82a19f3d11c688ae Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 5 Aug 2016 10:52:24 +1000 Subject: [PATCH 002/103] rose: don't merge large acyclic suffixes Check earlier on in mergeSuffixes that we're not proposing to merge suffixes above our limit from the acyclic merge path. --- src/rose/rose_build_merge.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/rose/rose_build_merge.cpp b/src/rose/rose_build_merge.cpp index 759e0dbe..dbd580ed 100644 --- a/src/rose/rose_build_merge.cpp +++ b/src/rose/rose_build_merge.cpp @@ -2189,6 +2189,11 @@ void mergeSuffixes(RoseBuildImpl &tbi, SuffixBouquet &suffixes, suffix_id s1 = *it; const deque &verts1 = suffixes.vertices(s1); assert(s1.graph() && s1.graph()->kind == NFA_SUFFIX); + + // Caller should ensure that we don't propose merges of graphs that are + // already too big. + assert(num_vertices(*s1.graph()) < small_merge_max_vertices(tbi.cc)); + deque merged; for (auto jt = next(it); jt != suffixes.end(); ++jt) { suffix_id s2 = *jt; @@ -2306,6 +2311,10 @@ void mergeAcyclicSuffixes(RoseBuildImpl &tbi) { assert(!g[v].suffix.haig); + if (num_vertices(*h) >= small_merge_max_vertices(tbi.cc)) { + continue; + } + if (!isAcyclic(*h)) { continue; } From b96d5c23d1f788c43b770d7a241e33bd49da8fac Mon Sep 17 00:00:00 2001 From: "Xu, Chi" Date: Fri, 22 Jul 2016 03:35:53 +0800 Subject: [PATCH 003/103] rose: add new instruction CHECK_MASK_32 This is a specialisation of the "lookaround" code. --- CMakeLists.txt | 1 + src/rose/program_runtime.h | 88 +++++++++++++ src/rose/rose_build_bytecode.cpp | 64 ++++++++++ src/rose/rose_dump.cpp | 14 ++ src/rose/rose_program.h | 16 ++- src/rose/validate_mask.h | 41 ++++++ src/util/copybytes.h | 86 +++++++++++++ src/util/simd_utils.h | 26 ++++ unit/CMakeLists.txt | 1 + unit/internal/rose_mask_32.cpp | 211 +++++++++++++++++++++++++++++++ 10 files changed, 545 insertions(+), 3 deletions(-) create mode 100644 src/util/copybytes.h create mode 100644 unit/internal/rose_mask_32.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index b0094d94..8f7e9bf0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -555,6 +555,7 @@ set (hs_exec_SRCS src/rose/rose_common.h src/rose/validate_mask.h src/util/bitutils.h + src/util/copybytes.h src/util/exhaust.h src/util/fatbit.h src/util/fatbit.c diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index 8bf41715..f54b1347 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -50,6 +50,7 @@ #include "ue2common.h" #include "hwlm/hwlm.h" // for hwlmcb_rv_t #include "util/compare.h" +#include "util/copybytes.h" #include "util/fatbit.h" #include "util/multibit.h" @@ -783,6 +784,82 @@ int roseCheckMask(const struct core_info *ci, u64a and_mask, u64a cmp_mask, return 0; } } + +static rose_inline +int roseCheckMask32(const struct core_info *ci, const u8 *and_mask, + const u8 *cmp_mask, const u32 neg_mask, + s32 checkOffset, u64a end) { + const s64a base_offset = (s64a)end - ci->buf_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + + m256 data = zeroes256(); // consists of the following four parts. + s32 c_shift = 0; // blank bytes after current. + s32 h_shift = 0; // blank bytes before history. + s32 h_len = 32; // number of bytes from history buffer. + s32 c_len = 0; // number of bytes from current buffer. + /* h_shift + h_len + c_len + c_shift = 32 need to be hold.*/ + + if (offset < 0) { + s32 h_offset = 0; // the start offset in history buffer. + if (offset < -(s64a)ci->hlen) { + if (offset + 32 <= -(s64a)ci->hlen) { + DEBUG_PRINTF("all before history\n"); + return 1; + } + h_shift = -(offset + (s64a)ci->hlen); + h_len = 32 - h_shift; + } else { + h_offset = ci->hlen + offset; + } + if (offset + 32 > 0) { + // part in current buffer. + c_len = offset + 32; + h_len = -(offset + h_shift); + if (c_len > (s64a)ci->len) { + // out of current buffer. + c_shift = c_len - ci->len; + c_len = ci->len; + } + copy_upto_32_bytes((u8 *)&data - offset, ci->buf, c_len); + } + assert(h_shift + h_len + c_len + c_shift == 32); + copy_upto_32_bytes((u8 *)&data + h_shift, ci->hbuf + h_offset, h_len); + } else { + if (offset + 32 > (s64a)ci->len) { + if (offset >= (s64a)ci->len) { + DEBUG_PRINTF("all in the future.\n"); + return 1; + } + c_len = ci->len - offset; + c_shift = 32 - c_len; + copy_upto_32_bytes((u8 *)&data, ci->buf + offset, c_len); + } else { + data = loadu256(ci->buf + offset); + } + } + DEBUG_PRINTF("h_shift %d c_shift %d\n", h_shift, c_shift); + DEBUG_PRINTF("h_len %d c_len %d\n", h_len, c_len); + // we use valid_data_mask to blind bytes before history/in the future. + u32 valid_data_mask; + valid_data_mask = (~0u) << (h_shift + c_shift) >> (c_shift); + + m256 and_mask_m256 = loadu256(and_mask); + m256 cmp_mask_m256 = loadu256(cmp_mask); + if (validateMask32(data, valid_data_mask, and_mask_m256, + cmp_mask_m256, neg_mask)) { + DEBUG_PRINTF("Mask32 passed\n"); + return 1; + } + return 0; +} + /** * \brief Scan around a literal, checking that that "lookaround" reach masks * are satisfied. @@ -1213,6 +1290,17 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_MASK_32) { + struct core_info *ci = &scratch->core_info; + if (!roseCheckMask32(ci, ri->and_mask, ri->cmp_mask, + ri->neg_mask, ri->offset, end)) { + assert(ri->fail_jump); + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_BYTE) { const struct core_info *ci = &scratch->core_info; if (!roseCheckByte(ci, ri->and_mask, ri->cmp_mask, diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 56591de8..add3670b 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -201,6 +201,7 @@ public: case ROSE_INSTR_CHECK_NOT_HANDLED: return &u.checkNotHandled; case ROSE_INSTR_CHECK_LOOKAROUND: return &u.checkLookaround; case ROSE_INSTR_CHECK_MASK: return &u.checkMask; + case ROSE_INSTR_CHECK_MASK_32: return &u.checkMask32; case ROSE_INSTR_CHECK_BYTE: return &u.checkByte; case ROSE_INSTR_CHECK_INFIX: return &u.checkInfix; case ROSE_INSTR_CHECK_PREFIX: return &u.checkPrefix; @@ -253,6 +254,7 @@ public: case ROSE_INSTR_CHECK_NOT_HANDLED: return sizeof(u.checkNotHandled); case ROSE_INSTR_CHECK_LOOKAROUND: return sizeof(u.checkLookaround); case ROSE_INSTR_CHECK_MASK: return sizeof(u.checkMask); + case ROSE_INSTR_CHECK_MASK_32: return sizeof(u.checkMask32); case ROSE_INSTR_CHECK_BYTE: return sizeof(u.checkByte); case ROSE_INSTR_CHECK_INFIX: return sizeof(u.checkInfix); case ROSE_INSTR_CHECK_PREFIX: return sizeof(u.checkPrefix); @@ -304,6 +306,7 @@ public: ROSE_STRUCT_CHECK_NOT_HANDLED checkNotHandled; ROSE_STRUCT_CHECK_LOOKAROUND checkLookaround; ROSE_STRUCT_CHECK_MASK checkMask; + ROSE_STRUCT_CHECK_MASK_32 checkMask32; ROSE_STRUCT_CHECK_BYTE checkByte; ROSE_STRUCT_CHECK_INFIX checkInfix; ROSE_STRUCT_CHECK_PREFIX checkPrefix; @@ -2847,6 +2850,9 @@ flattenProgram(const vector> &programs) { case ROSE_INSTR_CHECK_MASK: ri.u.checkMask.fail_jump = jump_val; break; + case ROSE_INSTR_CHECK_MASK_32: + ri.u.checkMask32.fail_jump = jump_val; + break; case ROSE_INSTR_CHECK_BYTE: ri.u.checkByte.fail_jump = jump_val; break; @@ -3292,6 +3298,60 @@ bool makeRoleMask(const vector &look, return false; } +static UNUSED +string convertMaskstoString(u8 *p, int byte_len) { + string s; + for (int i = 0; i < byte_len; i++) { + u8 hi = *p >> 4; + u8 lo = *p & 0xf; + s += (char)(hi + (hi < 10 ? 48 : 87)); + s += (char)(lo + (lo < 10 ? 48 : 87)); + p++; + } + return s; +} + +static +bool makeRoleMask32(const vector &look, + vector &program) { + if (look.back().offset >= look.front().offset + 32) { + return false; + } + s32 base_offset = verify_s32(look.front().offset); + u8 and_mask[32], cmp_mask[32]; + memset(and_mask, 0, sizeof(and_mask)); + memset(cmp_mask, 0, sizeof(cmp_mask)); + u32 neg_mask = 0; + for (const auto &entry : look) { + u8 andmask_u8, cmpmask_u8, flip; + if (!checkReachWithFlip(entry.reach, andmask_u8, + cmpmask_u8, flip)) { + return false; + } + u32 shift = entry.offset - base_offset; + assert(shift < 32); + and_mask[shift] = andmask_u8; + cmp_mask[shift] = cmpmask_u8; + if (flip) { + neg_mask |= 1 << shift; + } + } + + DEBUG_PRINTF("and_mask %s\n", convertMaskstoString(and_mask, 32).c_str()); + DEBUG_PRINTF("cmp_mask %s\n", convertMaskstoString(cmp_mask, 32).c_str()); + DEBUG_PRINTF("neg_mask %08x\n", neg_mask); + DEBUG_PRINTF("base_offset %d\n", base_offset); + + auto ri = RoseInstruction(ROSE_INSTR_CHECK_MASK_32, + JumpTarget::NEXT_BLOCK); + memcpy(ri.u.checkMask32.and_mask, and_mask, sizeof(and_mask)); + memcpy(ri.u.checkMask32.cmp_mask, cmp_mask, sizeof(cmp_mask)); + ri.u.checkMask32.neg_mask = neg_mask; + ri.u.checkMask32.offset = base_offset; + program.push_back(ri); + return true; +} + static void makeRoleLookaround(RoseBuildImpl &build, build_context &bc, RoseVertex v, vector &program) { @@ -3325,6 +3385,10 @@ void makeRoleLookaround(RoseBuildImpl &build, build_context &bc, RoseVertex v, return; } + if (makeRoleMask32(look, program)) { + return; + } + DEBUG_PRINTF("role has lookaround\n"); u32 look_idx = addLookaround(bc, look); u32 look_count = verify_u32(look.size()); diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index a3d00943..d9af8d87 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -303,6 +303,20 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_MASK_32) { + os << " and_mask " + << dumpStrMask(ri->and_mask, sizeof(ri->and_mask)) + << endl; + os << " cmp_mask " + << dumpStrMask(ri->cmp_mask, sizeof(ri->cmp_mask)) + << endl; + os << " neg_mask 0x" << std::hex << std::setw(8) + << std::setfill('0') << ri->neg_mask << std::dec << endl; + os << " offset " << ri->offset << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_BYTE) { os << " and_mask 0x" << std::hex << std::setw(2) << std::setfill('0') << u32{ri->and_mask} << std::dec diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index 545e190f..8dfa47ec 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -51,6 +51,7 @@ enum RoseInstructionCode { ROSE_INSTR_CHECK_NOT_HANDLED, //!< Test & set role in "handled". ROSE_INSTR_CHECK_LOOKAROUND, //!< Lookaround check. ROSE_INSTR_CHECK_MASK, //!< 8-bytes mask check. + ROSE_INSTR_CHECK_MASK_32, //!< 32-bytes and/cmp/neg mask check. ROSE_INSTR_CHECK_BYTE, //!< Single Byte check. ROSE_INSTR_CHECK_INFIX, //!< Infix engine must be in accept state. ROSE_INSTR_CHECK_PREFIX, //!< Prefix engine must be in accept state. @@ -170,9 +171,18 @@ struct ROSE_STRUCT_CHECK_LOOKAROUND { struct ROSE_STRUCT_CHECK_MASK { u8 code; //!< From enum roseInstructionCode. - u64a and_mask; //!< 64-bits and mask. - u64a cmp_mask; //!< 64-bits cmp mask. - u64a neg_mask; //!< 64-bits negation mask. + u64a and_mask; //!< 8-byte and mask. + u64a cmp_mask; //!< 8-byte cmp mask. + u64a neg_mask; //!< 8-byte negation mask. + s32 offset; //!< Relative offset of the first byte. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_MASK_32 { + u8 code; //!< From enum RoseInstructionCode. + u8 and_mask[32]; //!< 32-byte and mask. + u8 cmp_mask[32]; //!< 32-byte cmp mask. + u32 neg_mask; //!< negation mask with 32 bits. s32 offset; //!< Relative offset of the first byte. u32 fail_jump; //!< Jump forward this many bytes on failure. }; diff --git a/src/rose/validate_mask.h b/src/rose/validate_mask.h index b2c2f5d6..ac8cc312 100644 --- a/src/rose/validate_mask.h +++ b/src/rose/validate_mask.h @@ -26,7 +26,22 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#ifndef VALIDATE_MASK_H +#define VALIDATE_MASK_H + #include "ue2common.h" +#include "util/simd_utils.h" + +#if defined(DEBUG) +static +void validateMask32Print(const u8 *mask) { + int i; + for (i = 0; i < 32; i++) { + printf("%02x", mask[i]); + } + printf("\n"); +} +#endif // check positive bytes in cmp_result. // return one if the check passed, zero otherwise. @@ -75,3 +90,29 @@ int validateMask(u64a data, u64a valid_data_mask, u64a and_mask, return 0; } } + +static really_inline +int validateMask32(const m256 data, const u32 valid_data_mask, + const m256 and_mask, const m256 cmp_mask, + const u32 neg_mask) { + m256 cmp_result_256 = eq256(and256(data, and_mask), cmp_mask); + u32 cmp_result = ~movemask256(cmp_result_256); +#ifdef DEBUG + DEBUG_PRINTF("data\n"); + validateMask32Print((const u8 *)&data); + DEBUG_PRINTF("cmp_result\n"); + validateMask32Print((const u8 *)&cmp_result_256); +#endif + DEBUG_PRINTF("cmp_result %08x neg_mask %08x\n", cmp_result, neg_mask); + DEBUG_PRINTF("valid_data_mask %08x\n", valid_data_mask); + + if ((cmp_result & valid_data_mask) == (neg_mask & valid_data_mask)) { + DEBUG_PRINTF("checkCompareResult32 passed\n"); + return 1; + } else { + DEBUG_PRINTF("checkCompareResult32 failed\n"); + return 0; + } +} + +#endif diff --git a/src/util/copybytes.h b/src/util/copybytes.h new file mode 100644 index 00000000..872b8d28 --- /dev/null +++ b/src/util/copybytes.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef COPY_BYTES_H +#define COPY_BYTES_H + +#include "unaligned.h" +#include "simd_utils.h" + +static really_inline +void copy_upto_32_bytes(u8 *dst, const u8 *src, unsigned int len) { + switch (len) { + case 0: + break; + case 1: + *dst = *src; + break; + case 2: + unaligned_store_u16(dst, unaligned_load_u16(src)); + break; + case 3: + unaligned_store_u16(dst, unaligned_load_u16(src)); + dst[2] = src[2]; + break; + case 4: + unaligned_store_u32(dst, unaligned_load_u32(src)); + break; + case 5: + case 6: + case 7: + unaligned_store_u32(dst + len - 4, unaligned_load_u32(src + len - 4)); + unaligned_store_u32(dst, unaligned_load_u32(src)); + break; + case 8: + unaligned_store_u64a(dst, unaligned_load_u64a(src)); + break; + case 9: + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: + unaligned_store_u64a(dst + len - 8, unaligned_load_u64a(src + len - 8)); + unaligned_store_u64a(dst, unaligned_load_u64a(src)); + break; + case 16: + storeu128(dst, loadu128(src)); + break; + case 32: + storeu256(dst, loadu256(src)); + break; + default: + assert(len < 32); + storeu128(dst + len - 16, loadu128(src + len - 16)); + storeu128(dst, loadu128(src)); + break; + } +} + +#endif diff --git a/src/util/simd_utils.h b/src/util/simd_utils.h index 3544629f..87de0940 100644 --- a/src/util/simd_utils.h +++ b/src/util/simd_utils.h @@ -71,6 +71,7 @@ #include "ue2common.h" #include "simd_types.h" +#include "unaligned.h" // Define a common assume_aligned using an appropriate compiler built-in, if // it's available. Note that we need to handle C or C++ compilation. @@ -354,6 +355,21 @@ m256 set32x8(u32 in) { return rv; } +static really_inline +m256 eq256(m256 a, m256 b) { + m256 rv; + rv.lo = eq128(a.lo, b.lo); + rv.hi = eq128(a.hi, b.hi); + return rv; +} + +static really_inline +u32 movemask256(m256 a) { + u32 lo_mask = movemask128(a.lo); + u32 hi_mask = movemask128(a.hi); + return lo_mask | (hi_mask << 16); +} + #endif static really_inline m256 zeroes256(void) { @@ -525,6 +541,16 @@ static really_inline m256 loadu256(const void *ptr) { #endif } +// unaligned store +static really_inline void storeu256(void *ptr, m256 a) { +#if defined(__AVX2__) + _mm256_storeu_si256((m256 *)ptr, a); +#else + storeu128(ptr, a.lo); + storeu128((char *)ptr + 16, a.hi); +#endif +} + // packed unaligned store of first N bytes static really_inline void storebytes256(void *ptr, m256 a, unsigned int n) { diff --git a/unit/CMakeLists.txt b/unit/CMakeLists.txt index 63f3a9ac..17818cac 100644 --- a/unit/CMakeLists.txt +++ b/unit/CMakeLists.txt @@ -71,6 +71,7 @@ set(unit_internal_SOURCES internal/repeat.cpp internal/rose_build_merge.cpp internal/rose_mask.cpp + internal/rose_mask_32.cpp internal/rvermicelli.cpp internal/simd_utils.cpp internal/shuffle.cpp diff --git a/unit/internal/rose_mask_32.cpp b/unit/internal/rose_mask_32.cpp new file mode 100644 index 00000000..732f51a0 --- /dev/null +++ b/unit/internal/rose_mask_32.cpp @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include "rose/validate_mask.h" +#include "gtest/gtest.h" + +#define ONES32 0xffffffffu + +union RoseLookaroundMask32 { + m256 a256; + u8 a8[32]; +}; + +struct ValidateMask32TestInfo { + RoseLookaroundMask32 data; + u32 valid_mask; + RoseLookaroundMask32 and_mask; + RoseLookaroundMask32 cmp_mask; + u32 neg_mask; +}; + +struct ValidateMask32InitInfo { + int idx; + u8 data; + u8 and_mask; + u8 cmp_mask; + u8 neg_mask; +}; + + +static const ValidateMask32InitInfo testBasicIdx[][33] = { + { + {1, 0x34, 0xf8, 0x30, 0}, + {2, 0x34, 0xf8, 0x30, 0}, + {8, 0x23, 0xff, 0x23, 0}, + {9, 0x34, 0xf8, 0x30, 0}, + {10, 0x41, 0xdf, 0x41, 0}, + {11, 0x63, 0xdd, 0x41, 0}, + {12, 0x61, 0xdd, 0x41, 0}, + {13, 0x41, 0xdf, 0x41, 0}, + {14, 0x61, 0xdf, 0x41, 0}, + {15, 0x41, 0xdf, 0x41, 0}, + {16, 0x43, 0xdd, 0x41, 0}, + {17, 0x61, 0xdd, 0x41, 0}, + {23, 0x63, 0xdd, 0x41, 0}, + {24, 0x4f, 0xfc, 0x4c, 0}, + {25, 0x4d, 0xfc, 0x4c, 0}, + {26, 0x4d, 0xfc, 0x4c, 0}, + {-1, 0, 0, 0, 0}, + }, + { + {11, 0, 0xff, 0x55, 1}, + {12, 0, 0xff, 0x36, 1}, + {13, 0, 0xfe, 0x34, 1}, + {14, 0x4d, 0xfe, 0x4c, 0}, + {15, 0x41, 0xbf, 0x01, 0}, + {16, 0x53, 0xdf, 0x73, 1}, + {17, 0x4b, 0, 0, 0}, + {18, 0, 0x2c, 0x2c, 1}, + {-1, 0, 0, 0, 0}, + }, + { + {15, 0x46, 0xdf, 0x46, 0}, + {16, 0x4f, 0xdf, 0x46, 1}, + {17, 0x6f, 0xff, 0x6f, 0}, + {18, 0x31, 0xfe, 0x30, 0}, + {19, 0x34, 0xf8, 0x30, 0}, + {20, 0x66, 0xc0, 0x40, 0}, + {21, 0x6f, 0xf0, 0x60, 0}, + {22, 0x6f, 0, 0, 0}, + {23, 0x46, 0xdf, 0x44, 1}, + {24, 0x4f, 0xdf, 0x46, 1}, + {25, 0x6f, 0xff, 0x4f, 1}, + {26, 0x31, 0xfe, 0x30, 0}, + {27, 0x34, 0xf8, 0x34, 1}, + {28, 0x66, 0xc0, 0x60, 1}, + {29, 0x6f, 0xf0, 0x6f, 1}, + {30, 0x6f, 0, 0x60, 1}, + {-1, 0, 0, 0, 0}, + }, + { + {31, 0x4a, 0x80, 0, 0}, + {-1, 0, 0, 0, 1}, + }, + { + {12, 0x2b, 0x3d, 0x2d, 1}, + {13, 0x2b, 0x3d, 0x4c, 1}, + {23, 0x4a, 0x88, 0x0a, 1}, + {-1, 0, 0, 0, 0}, + }, +}; + +static void initTestInfo(ValidateMask32TestInfo &t) { + t.data.a256 = zeroes256(); + t.valid_mask = 0xffffffff; + t.and_mask.a256 = zeroes256(); + t.cmp_mask.a256 = zeroes256(); + t.neg_mask = 0; +}; + + +static +int testBasicInit(ValidateMask32TestInfo *testB) { + int len = 0; + ValidateMask32TestInfo t; + for (size_t i = 0; i < ARRAY_LENGTH(testBasicIdx); i++) { + initTestInfo(t); + for (const auto &line: testBasicIdx[i]) { + if (line.idx < 0) { + break; + } + int index = line.idx; + t.data.a8[index] = line.data; + t.and_mask.a8[index] = line.and_mask; + t.cmp_mask.a8[index] = line.cmp_mask; + t.neg_mask |= line.neg_mask << index; + } + testB[i] = t; + len++; + } + return len; +} + +TEST(ValidateMask32, testMask32_1) { + ValidateMask32TestInfo testBasic[20]; + int test_len = testBasicInit(testBasic); + for (int i = 0; i < test_len; i++) { + const auto t = testBasic[i]; + EXPECT_EQ(1, validateMask32(t.data.a256, t.valid_mask, + t.and_mask.a256, t.cmp_mask.a256, + t.neg_mask)); + } +} + +TEST(ValidateMask32, testMask32_2) { + ValidateMask32TestInfo testBasic[20]; + int test_len = testBasicInit(testBasic); + for (int left = 0; left <= 32; left++) { + for (int right = 0; right + left < 32; right++) { + u32 valid_mask = ONES32 << (left + right) >> left; + for (int i = 0; i < test_len; i++) { + const auto &t = testBasic[i]; + int bool_result; + bool_result = !(valid_mask & t.neg_mask); + EXPECT_EQ(bool_result, validateMask32(t.data.a256, + valid_mask, + t.and_mask.a256, + t.cmp_mask.a256, + 0)); + bool_result = (valid_mask & t.neg_mask) == valid_mask; + EXPECT_EQ(bool_result, validateMask32(t.data.a256, + valid_mask, + t.and_mask.a256, + t.cmp_mask.a256, + ONES32)); + } + } + } +} + +TEST(ValidateMask32, testMask32_3) { + ValidateMask32TestInfo testBasic[20]; + testing::internal::Random neg_mask_rand(451); + int test_len = testBasicInit(testBasic); + for (int left = 0; left <= 32; left++) { + for (int right = 0; right + left < 32; right++) { + u32 valid_mask = ONES32 << (left + right) >> left; + for (int i = 0; i < test_len; i++) { + const auto &t = testBasic[i]; + int bool_result; + for (int j = 0; j < 5000; j++) { + u32 neg_mask = neg_mask_rand.Generate(1u << 31); + bool_result = (neg_mask & valid_mask) == + (t.neg_mask & valid_mask); + EXPECT_EQ(bool_result, validateMask32(t.data.a256, + valid_mask, + t.and_mask.a256, + t.cmp_mask.a256, + neg_mask)); + } + } + } + } +} From 4ce306864e92e01b7d534451ac212cfd7ede1ce8 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 8 Aug 2016 15:39:43 +1000 Subject: [PATCH 004/103] rose: use lookarounds to implement benefits masks This replaces the CHECK_LIT_MASK instruction. --- src/rose/rose_build_bytecode.cpp | 78 ++++++++++++++++++++------------ 1 file changed, 48 insertions(+), 30 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index add3670b..16eba3df 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -3352,6 +3352,37 @@ bool makeRoleMask32(const vector &look, return true; } +/** + * Builds a lookaround instruction, or an appropriate specialization if one is + * available. + */ +static +void makeLookaroundInstruction(build_context &bc, const vector &look, + vector &program) { + assert(!look.empty()); + + if (makeRoleByte(look, program)) { + return; + } + + if (makeRoleMask(look, program)) { + return; + } + + if (makeRoleMask32(look, program)) { + return; + } + + u32 look_idx = addLookaround(bc, look); + u32 look_count = verify_u32(look.size()); + + auto ri = RoseInstruction(ROSE_INSTR_CHECK_LOOKAROUND, + JumpTarget::NEXT_BLOCK); + ri.u.checkLookaround.index = look_idx; + ri.u.checkLookaround.count = look_count; + program.push_back(ri); +} + static void makeRoleLookaround(RoseBuildImpl &build, build_context &bc, RoseVertex v, vector &program) { @@ -3377,27 +3408,7 @@ void makeRoleLookaround(RoseBuildImpl &build, build_context &bc, RoseVertex v, return; } - if (makeRoleByte(look, program)) { - return; - } - - if (makeRoleMask(look, program)) { - return; - } - - if (makeRoleMask32(look, program)) { - return; - } - - DEBUG_PRINTF("role has lookaround\n"); - u32 look_idx = addLookaround(bc, look); - u32 look_count = verify_u32(look.size()); - - auto ri = RoseInstruction(ROSE_INSTR_CHECK_LOOKAROUND, - JumpTarget::NEXT_BLOCK); - ri.u.checkLookaround.index = look_idx; - ri.u.checkLookaround.count = look_count; - program.push_back(ri); + makeLookaroundInstruction(bc, look, program); } static @@ -4377,7 +4388,7 @@ void addPredBlocks(build_context &bc, vector &program) { const size_t num_preds = predProgramLists.size(); if (num_preds == 0) { - program = flattenProgram({program}); + program.emplace_back(ROSE_INSTR_END); return; } @@ -4420,6 +4431,10 @@ vector makeSparseIterProgram(build_context &bc, program.insert(end(program), begin(root_program), end(root_program)); } + assert(!program.empty()); + assert(program.back().code() == ROSE_INSTR_END); + program.pop_back(); + program = flattenProgram({program}); return program; } @@ -4473,7 +4488,8 @@ void makeGroupCheckInstruction(const RoseBuildImpl &build, u32 final_id, } static -void makeCheckLitMaskInstruction(const RoseBuildImpl &build, u32 final_id, +void makeCheckLitMaskInstruction(const RoseBuildImpl &build, build_context &bc, + u32 final_id, vector &program) { assert(contains(build.final_id_to_literal, final_id)); const auto &lit_infos = getLiteralInfoByFinalId(build, final_id); @@ -4483,7 +4499,7 @@ void makeCheckLitMaskInstruction(const RoseBuildImpl &build, u32 final_id, return; } - auto ri = RoseInstruction(ROSE_INSTR_CHECK_LIT_MASK); + vector look; assert(build.final_id_to_literal.at(final_id).size() == 1); u32 lit_id = *build.final_id_to_literal.at(final_id).begin(); @@ -4491,14 +4507,16 @@ void makeCheckLitMaskInstruction(const RoseBuildImpl &build, u32 final_id, DEBUG_PRINTF("building mask for lit %u (final id %u) %s\n", lit_id, final_id, dumpString(s).c_str()); assert(s.length() <= MAX_MASK2_WIDTH); - u32 i = 0; + s32 i = 0 - s.length(); for (const auto &e : s) { - ri.u.checkLitMask.and_mask.a8[i] = e.nocase ? 0 : CASE_BIT; - ri.u.checkLitMask.cmp_mask.a8[i] = e.nocase ? 0 : (CASE_BIT & e.c); + if (!e.nocase) { + look.emplace_back(verify_s8(i), e); + } i++; } - program.push_back(move(ri)); + assert(!look.empty()); + makeLookaroundInstruction(bc, look, program); } static @@ -4662,7 +4680,7 @@ vector buildLitInitialProgram(RoseBuildImpl &build, DEBUG_PRINTF("final_id %u\n", final_id); // Check lit mask. - makeCheckLitMaskInstruction(build, final_id, pre_program); + makeCheckLitMaskInstruction(build, bc, final_id, pre_program); // Check literal groups. This is an optimisation that we only perform for // delayed literals, as their groups may be switched off; ordinarily, we @@ -4771,7 +4789,7 @@ u32 buildDelayRebuildProgram(RoseBuildImpl &build, build_context &bc, } vector program; - makeCheckLitMaskInstruction(build, final_id, program); + makeCheckLitMaskInstruction(build, bc, final_id, program); makePushDelayedInstructions(build, final_id, program); assert(!program.empty()); program = flattenProgram({program}); From c8868fb9c767e69dc467d7b5cfd08706a37341a9 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 8 Aug 2016 16:12:44 +1000 Subject: [PATCH 005/103] rose: remove CHECK_LIT_MASK instruction --- src/rose/program_runtime.h | 80 +------------------------------- src/rose/rose_build_bytecode.cpp | 8 ---- src/rose/rose_dump.cpp | 10 ---- src/rose/rose_program.h | 13 ------ 4 files changed, 1 insertion(+), 110 deletions(-) diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index f54b1347..b57aebe9 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -71,73 +71,6 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, /* Inline implementation follows. */ -static rose_inline -int roseCheckBenefits(const struct core_info *ci, u64a end, u32 mask_rewind, - const u8 *and_mask, const u8 *exp_mask) { - const u8 *data; - - // If the check works over part of the history and part of the buffer, we - // create a temporary copy of the data in here so it's contiguous. - u8 temp[MAX_MASK2_WIDTH]; - - s64a buffer_offset = (s64a)end - ci->buf_offset; - DEBUG_PRINTF("rel offset %lld\n", buffer_offset); - if (buffer_offset >= mask_rewind) { - data = ci->buf + buffer_offset - mask_rewind; - DEBUG_PRINTF("all in one case data=%p buf=%p rewind=%u\n", data, - ci->buf, mask_rewind); - } else if (buffer_offset <= 0) { - data = ci->hbuf + ci->hlen + buffer_offset - mask_rewind; - DEBUG_PRINTF("all in one case data=%p buf=%p rewind=%u\n", data, - ci->buf, mask_rewind); - } else { - u32 shortfall = mask_rewind - buffer_offset; - DEBUG_PRINTF("shortfall of %u, rewind %u hlen %zu\n", shortfall, - mask_rewind, ci->hlen); - data = temp; - memcpy(temp, ci->hbuf + ci->hlen - shortfall, shortfall); - memcpy(temp + shortfall, ci->buf, mask_rewind - shortfall); - } - -#ifdef DEBUG - DEBUG_PRINTF("DATA: "); - for (u32 i = 0; i < mask_rewind; i++) { - printf("%c", ourisprint(data[i]) ? data[i] : '?'); - } - printf(" (len=%u)\n", mask_rewind); -#endif - - u32 len = mask_rewind; - while (len >= sizeof(u64a)) { - u64a a = unaligned_load_u64a(data); - a &= *(const u64a *)and_mask; - if (a != *(const u64a *)exp_mask) { - DEBUG_PRINTF("argh %016llx %016llx\n", a, *(const u64a *)exp_mask); - return 0; - } - data += sizeof(u64a); - and_mask += sizeof(u64a); - exp_mask += sizeof(u64a); - len -= sizeof(u64a); - } - - while (len) { - u8 a = *data; - a &= *and_mask; - if (a != *exp_mask) { - DEBUG_PRINTF("argh d%02hhx =%02hhx am%02hhx em%02hhx\n", a, - *data, *and_mask, *exp_mask); - return 0; - } - data++; - and_mask++; - exp_mask++; - len--; - } - - return 1; -} - static rose_inline void rosePushDelayedMatch(const struct RoseEngine *t, struct hs_scratch *scratch, u32 delay, @@ -1157,7 +1090,7 @@ void updateSeqPoint(struct RoseContext *tctxt, u64a offset, static rose_inline hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, struct hs_scratch *scratch, u32 programOffset, - u64a som, u64a end, size_t match_len, + u64a som, u64a end, UNUSED size_t match_len, u8 prog_flags) { DEBUG_PRINTF("program=%u, offsets [%llu,%llu], flags=%u\n", programOffset, som, end, prog_flags); @@ -1205,17 +1138,6 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, } PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(CHECK_LIT_MASK) { - assert(match_len); - struct core_info *ci = &scratch->core_info; - if (!roseCheckBenefits(ci, end, match_len, ri->and_mask.a8, - ri->cmp_mask.a8)) { - DEBUG_PRINTF("halt: failed mask check\n"); - return HWLM_CONTINUE_MATCHING; - } - } - PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(CHECK_LIT_EARLY) { if (end < ri->min_offset) { DEBUG_PRINTF("halt: before min_offset=%u\n", diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 16eba3df..d4f2e069 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -193,7 +193,6 @@ public: const void *get() const { switch (code()) { - case ROSE_INSTR_CHECK_LIT_MASK: return &u.checkLitMask; case ROSE_INSTR_CHECK_LIT_EARLY: return &u.checkLitEarly; case ROSE_INSTR_CHECK_GROUPS: return &u.checkGroups; case ROSE_INSTR_CHECK_ONLY_EOD: return &u.checkOnlyEod; @@ -246,7 +245,6 @@ public: size_t length() const { switch (code()) { - case ROSE_INSTR_CHECK_LIT_MASK: return sizeof(u.checkLitMask); case ROSE_INSTR_CHECK_LIT_EARLY: return sizeof(u.checkLitEarly); case ROSE_INSTR_CHECK_GROUPS: return sizeof(u.checkGroups); case ROSE_INSTR_CHECK_ONLY_EOD: return sizeof(u.checkOnlyEod); @@ -298,7 +296,6 @@ public: } union { - ROSE_STRUCT_CHECK_LIT_MASK checkLitMask; ROSE_STRUCT_CHECK_LIT_EARLY checkLitEarly; ROSE_STRUCT_CHECK_GROUPS checkGroups; ROSE_STRUCT_CHECK_ONLY_EOD checkOnlyEod; @@ -377,7 +374,6 @@ struct RoseResources { bool has_states = false; bool checks_groups = false; bool has_lit_delay = false; - bool has_lit_mask = false; bool has_anchored = false; bool has_eod = false; }; @@ -667,7 +663,6 @@ u8 pickRuntimeImpl(const RoseBuildImpl &build, const build_context &bc, DEBUG_PRINTF("has_states=%d\n", bc.resources.has_states); DEBUG_PRINTF("checks_groups=%d\n", bc.resources.checks_groups); DEBUG_PRINTF("has_lit_delay=%d\n", bc.resources.has_lit_delay); - DEBUG_PRINTF("has_lit_mask=%d\n", bc.resources.has_lit_mask); DEBUG_PRINTF("has_anchored=%d\n", bc.resources.has_anchored); DEBUG_PRINTF("has_eod=%d\n", bc.resources.has_eod); @@ -2948,9 +2943,6 @@ void recordResources(RoseResources &resources, case ROSE_INSTR_PUSH_DELAYED: resources.has_lit_delay = true; break; - case ROSE_INSTR_CHECK_LIT_MASK: - resources.has_lit_mask = true; - break; default: break; } diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index d9af8d87..95f43d6c 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -244,16 +244,6 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(CHECK_LIT_MASK) { - os << " and_mask " - << dumpStrMask(ri->and_mask.a8, sizeof(ri->and_mask.a8)) - << endl; - os << " cmp_mask " - << dumpStrMask(ri->cmp_mask.a8, sizeof(ri->cmp_mask.a8)) - << endl; - } - PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(CHECK_LIT_EARLY) { os << " min_offset " << ri->min_offset << endl; } diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index 8dfa47ec..007eb70d 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -43,7 +43,6 @@ /** \brief Role program instruction opcodes. */ enum RoseInstructionCode { ROSE_INSTR_ANCHORED_DELAY, //!< Delay until after anchored matcher. - ROSE_INSTR_CHECK_LIT_MASK, //!< Check and/cmp mask. ROSE_INSTR_CHECK_LIT_EARLY, //!< Skip matches before floating min offset. ROSE_INSTR_CHECK_GROUPS, //!< Check that literal groups are on. ROSE_INSTR_CHECK_ONLY_EOD, //!< Role matches only at EOD. @@ -120,18 +119,6 @@ struct ROSE_STRUCT_ANCHORED_DELAY { u32 done_jump; //!< Jump forward this many bytes if successful. }; -union RoseLiteralMask { - u64a a64[MAX_MASK2_WIDTH / sizeof(u64a)]; - u8 a8[MAX_MASK2_WIDTH]; -}; - -/** Note: check failure will halt program. */ -struct ROSE_STRUCT_CHECK_LIT_MASK { - u8 code; //!< From enum RoseInstructionCode. - union RoseLiteralMask and_mask; - union RoseLiteralMask cmp_mask; -}; - /** Note: check failure will halt program. */ struct ROSE_STRUCT_CHECK_LIT_EARLY { u8 code; //!< From enum RoseInstructionCode. From 0cc941dfd586faa1c5d40ea6d00b4c58a110e518 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 25 Aug 2016 13:25:01 +1000 Subject: [PATCH 006/103] limex_dump: silence gcc large alignment warning Passing mask by const ref silences the warning: "The ABI for passing parameters with 32-byte alignment has changed in GCC 4.6". --- src/nfa/limex_dump.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nfa/limex_dump.cpp b/src/nfa/limex_dump.cpp index c52adc46..8b6b7015 100644 --- a/src/nfa/limex_dump.cpp +++ b/src/nfa/limex_dump.cpp @@ -82,7 +82,7 @@ void dumpMask(FILE *f, const char *name, const u8 *mask, u32 mask_bits) { template static -u32 rank_in_mask(mask_t mask, u32 bit) { +u32 rank_in_mask(const mask_t &mask, u32 bit) { assert(bit < 8 * sizeof(mask)); u32 chunks[sizeof(mask)/sizeof(u32)]; From 3cf4199879c78627d7d7198952835a36e08240a6 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 25 Aug 2016 13:26:27 +1000 Subject: [PATCH 007/103] debug: always use %zu in format string for size_t --- src/nfa/tamaramacompile.cpp | 2 +- src/rose/rose_build_bytecode.cpp | 8 ++++---- src/rose/rose_build_exclusive.cpp | 6 +++--- src/util/clique.cpp | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/nfa/tamaramacompile.cpp b/src/nfa/tamaramacompile.cpp index 73d19595..521c9bb2 100644 --- a/src/nfa/tamaramacompile.cpp +++ b/src/nfa/tamaramacompile.cpp @@ -117,7 +117,7 @@ aligned_unique_ptr buildTamarama(const TamaInfo &tamaInfo, const u32 queue, remapTops(tamaInfo, top_base, out_top_remap); size_t subSize = tamaInfo.subengines.size(); - DEBUG_PRINTF("subSize:%lu\n", subSize); + DEBUG_PRINTF("subSize:%zu\n", subSize); size_t total_size = sizeof(NFA) + // initial NFA structure sizeof(Tamarama) + // Tamarama structure diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index d4f2e069..b8baa9c1 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -1541,7 +1541,7 @@ void buildInfixContainer(RoseGraph &g, build_context &bc, for (const auto &sub : subengines) { const auto &verts = sub.vertices; for (const auto &v : verts) { - DEBUG_PRINTF("vert id:%lu\n", g[v].idx); + DEBUG_PRINTF("vert id:%zu\n", g[v].idx); g[v].left.tamarama = tamaProto; } } @@ -1560,7 +1560,7 @@ void buildSuffixContainer(RoseGraph &g, build_context &bc, for (const auto &sub : subengines) { const auto &verts = sub.vertices; for (const auto &v : verts) { - DEBUG_PRINTF("vert id:%lu\n", g[v].idx); + DEBUG_PRINTF("vert id:%zu\n", g[v].idx); g[v].suffix.tamarama = tamaProto; } const auto &v = verts[0]; @@ -1741,7 +1741,7 @@ void findExclusiveInfixes(RoseBuildImpl &build, build_context &bc, } if (leftfixes.size() > 1) { - DEBUG_PRINTF("leftfix size:%lu\n", leftfixes.size()); + DEBUG_PRINTF("leftfix size:%zu\n", leftfixes.size()); vector> groups; exclusiveAnalysisInfix(build, vertex_map, roleInfoSet, groups); buildExclusiveInfixes(build, bc, qif, infixTriggers, vertex_map, @@ -2178,7 +2178,7 @@ void findExclusiveSuffixes(RoseBuildImpl &tbi, build_context &bc, } if (suffixes.size() > 1) { - DEBUG_PRINTF("suffix size:%lu\n", suffixes.size()); + DEBUG_PRINTF("suffix size:%zu\n", suffixes.size()); vector> groups; exclusiveAnalysisSuffix(tbi, vertex_map, roleInfoSet, groups); buildExclusiveSuffixes(tbi, bc, qif, suffixTriggers, vertex_map, diff --git a/src/rose/rose_build_exclusive.cpp b/src/rose/rose_build_exclusive.cpp index c9e8d215..e91cc297 100644 --- a/src/rose/rose_build_exclusive.cpp +++ b/src/rose/rose_build_exclusive.cpp @@ -306,12 +306,12 @@ void findCliques(const map> &exclusiveGroups, // Find clique groups const auto &clique = removeClique(*cg); for (const auto &i : clique) { - DEBUG_PRINTF("cliq:%lu\n", i.size()); + DEBUG_PRINTF("cliq:%zu\n", i.size()); if (i.size() > 1) { exclusive_roles.push_back(i); } } - DEBUG_PRINTF("Clique graph size:%lu\n", exclusive_roles.size()); + DEBUG_PRINTF("Clique graph size:%zu\n", exclusive_roles.size()); } static @@ -326,7 +326,7 @@ map> findExclusiveGroups(const RoseBuildImpl &build, set group; set q1(vertex_map.at(i).begin(), vertex_map.at(i).end()); - DEBUG_PRINTF("vertex set:%lu\n", q1.size()); + DEBUG_PRINTF("vertex set:%zu\n", q1.size()); for (const auto &val : s) { set q2(vertex_map.at(val).begin(), vertex_map.at(val).end()); diff --git a/src/util/clique.cpp b/src/util/clique.cpp index ea22779c..79f06932 100644 --- a/src/util/clique.cpp +++ b/src/util/clique.cpp @@ -103,7 +103,7 @@ bool graph_empty(const Graph &g) { } vector> removeClique(CliqueGraph &cg) { - DEBUG_PRINTF("graph size:%lu\n", num_vertices(cg)); + DEBUG_PRINTF("graph size:%zu\n", num_vertices(cg)); vector> cliquesVec = {findCliqueGroup(cg)}; while (!graph_empty(cg)) { const vector &c = cliquesVec.back(); From a08e1dd6906b81be2d1557c1fc8ca8f096c276aa Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Fri, 26 Aug 2016 14:17:41 +1000 Subject: [PATCH 008/103] Introduce a 64-bit LimEx model. On 64-bit platforms, the Limex 64 model is implemented in normal GPRs. On 32-bit platforms, however, 128-bit SSE registers are used for the runtime implementation. --- CMakeLists.txt | 1 + src/nfa/limex.h | 1 + src/nfa/limex_64.c | 73 ++++++++++++++ src/nfa/limex_accel.c | 16 ++++ src/nfa/limex_accel.h | 13 ++- src/nfa/limex_common_impl.h | 71 +++++++------- src/nfa/limex_compile.cpp | 13 +-- src/nfa/limex_context.h | 17 +++- src/nfa/limex_dump.cpp | 5 + src/nfa/limex_exceptional.h | 49 ++++------ src/nfa/limex_internal.h | 1 + src/nfa/limex_native.c | 14 +-- src/nfa/limex_runtime.h | 40 +------- src/nfa/limex_runtime_impl.h | 176 +++++++++++++++++++--------------- src/nfa/limex_simd128.c | 13 +-- src/nfa/limex_simd256.c | 13 +-- src/nfa/limex_simd384.c | 13 +-- src/nfa/limex_simd512.c | 13 +-- src/nfa/limex_state_impl.h | 59 ++++++------ src/nfa/nfa_api_dispatch.c | 1 + src/nfa/nfa_build_util.cpp | 22 ++--- src/nfa/nfa_dump_dispatch.cpp | 1 + src/nfa/nfa_internal.h | 2 + src/util/simd_utils.h | 58 ++++++----- src/util/uniform_ops.h | 56 ++++++----- unit/internal/limex_nfa.cpp | 13 +-- unit/internal/simd_utils.cpp | 16 ++-- unit/internal/uniform_ops.cpp | 22 ++--- 28 files changed, 441 insertions(+), 351 deletions(-) create mode 100644 src/nfa/limex_64.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 8f7e9bf0..bdb60b74 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -470,6 +470,7 @@ set (hs_exec_SRCS src/nfa/limex_exceptional.h src/nfa/limex_native.c src/nfa/limex_ring.h + src/nfa/limex_64.c src/nfa/limex_simd128.c src/nfa/limex_simd256.c src/nfa/limex_simd384.c diff --git a/src/nfa/limex.h b/src/nfa/limex.h index ad53503c..70bcdd1c 100644 --- a/src/nfa/limex.h +++ b/src/nfa/limex.h @@ -77,6 +77,7 @@ extern "C" GENERATE_NFA_DUMP_DECL(gf_name) GENERATE_NFA_DECL(nfaExecLimEx32) +GENERATE_NFA_DECL(nfaExecLimEx64) GENERATE_NFA_DECL(nfaExecLimEx128) GENERATE_NFA_DECL(nfaExecLimEx256) GENERATE_NFA_DECL(nfaExecLimEx384) diff --git a/src/nfa/limex_64.c b/src/nfa/limex_64.c new file mode 100644 index 00000000..e8f0880b --- /dev/null +++ b/src/nfa/limex_64.c @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2015-2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief LimEx NFA: 128-bit SIMD runtime implementations. + */ + +/* Limex64 is unusual on as on 32 bit platforms, at runtime it uses an m128 for + * state calculations. + */ + +//#define DEBUG_INPUT +//#define DEBUG_EXCEPTIONS + +#include "limex.h" + +#include "accel.h" +#include "limex_internal.h" +#include "nfa_internal.h" +#include "ue2common.h" +#include "util/bitutils.h" +#include "util/simd_utils.h" + +// Common code +#define STATE_ON_STACK +#define ESTATE_ON_STACK + +#include "limex_runtime.h" + +#define SIZE 64 +#define ENG_STATE_T u64a + +#ifdef ARCH_64_BIT +#define STATE_T u64a +#define LOAD_FROM_ENG load_u64a +#else +#define STATE_T m128 +#define LOAD_FROM_ENG load_m128_from_u64a +#endif + +#include "limex_exceptional.h" + +#include "limex_state_impl.h" + +#define INLINE_ATTR really_inline +#include "limex_common_impl.h" + +#include "limex_runtime_impl.h" diff --git a/src/nfa/limex_accel.c b/src/nfa/limex_accel.c index 28f37083..f883973e 100644 --- a/src/nfa/limex_accel.c +++ b/src/nfa/limex_accel.c @@ -82,6 +82,22 @@ size_t doAccel32(u32 s, u32 accel, const u8 *accelTable, return accelScanWrapper(accelTable, aux, input, idx, i, end); } +#ifdef ARCH_64_BIT +size_t doAccel64(u64a s, u64a accel, const u8 *accelTable, + const union AccelAux *aux, const u8 *input, size_t i, + size_t end) { + u32 idx = packedExtract64(s, accel); + return accelScanWrapper(accelTable, aux, input, idx, i, end); +} +#else +size_t doAccel64(m128 s, m128 accel, const u8 *accelTable, + const union AccelAux *aux, const u8 *input, size_t i, + size_t end) { + u32 idx = packedExtract64(movq(s), movq(accel)); + return accelScanWrapper(accelTable, aux, input, idx, i, end); +} +#endif + size_t doAccel128(const m128 *state, const struct LimExNFA128 *limex, const u8 *accelTable, const union AccelAux *aux, const u8 *input, size_t i, size_t end) { diff --git a/src/nfa/limex_accel.h b/src/nfa/limex_accel.h index 173df759..e5c94e82 100644 --- a/src/nfa/limex_accel.h +++ b/src/nfa/limex_accel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -40,6 +40,7 @@ #include "util/simd_utils.h" // for m128 etc union AccelAux; +struct LimExNFA64; struct LimExNFA128; struct LimExNFA256; struct LimExNFA384; @@ -49,6 +50,16 @@ size_t doAccel32(u32 s, u32 accel, const u8 *accelTable, const union AccelAux *aux, const u8 *input, size_t i, size_t end); +#ifdef ARCH_64_BIT +size_t doAccel64(u64a s, u64a accel, const u8 *accelTable, + const union AccelAux *aux, const u8 *input, size_t i, + size_t end); +#else +size_t doAccel64(m128 s, m128 accel, const u8 *accelTable, + const union AccelAux *aux, const u8 *input, size_t i, + size_t end); +#endif + size_t doAccel128(const m128 *s, const struct LimExNFA128 *limex, const u8 *accelTable, const union AccelAux *aux, const u8 *input, size_t i, size_t end); diff --git a/src/nfa/limex_common_impl.h b/src/nfa/limex_common_impl.h index 9523b073..187a661b 100644 --- a/src/nfa/limex_common_impl.h +++ b/src/nfa/limex_common_impl.h @@ -31,8 +31,9 @@ /* impl of limex functions which depend only on state size */ -#if !defined(SIZE) || !defined(STATE_T) || !defined(INLINE_ATTR) -# error Must define SIZE and STATE_T and INLINE_ATTR in includer. +#if !defined(SIZE) || !defined(STATE_T) || !defined(LOAD_FROM_ENG) \ + || !defined(INLINE_ATTR) +# error Must define SIZE, STATE_T, LOAD_FROM_ENG and INLINE_ATTR in includer. #endif #define IMPL_NFA_T JOIN(struct LimExNFA, SIZE) @@ -50,8 +51,6 @@ #define PROCESS_ACCEPTS_NOSQUASH_FN JOIN(moProcessAcceptsNoSquash, SIZE) #define CONTEXT_T JOIN(NFAContext, SIZE) #define ONES_STATE JOIN(ones_, STATE_T) -#define LOAD_STATE JOIN(load_, STATE_T) -#define STORE_STATE JOIN(store_, STATE_T) #define AND_STATE JOIN(and_, STATE_T) #define OR_STATE JOIN(or_, STATE_T) #define ANDNOT_STATE JOIN(andnot_, STATE_T) @@ -83,7 +82,7 @@ void SQUASH_UNTUG_BR_FN(const IMPL_NFA_T *limex, const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i); u32 cyclicState = info->cyclicState; - if (!TESTBIT_STATE(accstate, cyclicState)) { + if (!TESTBIT_STATE(*accstate, cyclicState)) { continue; } @@ -111,12 +110,12 @@ char PROCESS_ACCEPTS_FN(const IMPL_NFA_T *limex, STATE_T *s, // We have squash masks we might have to apply after firing reports. STATE_T squash = ONES_STATE; - const STATE_T *squashMasks = (const STATE_T *) + const ENG_STATE_T *squashMasks = (const ENG_STATE_T *) ((const char *)limex + limex->squashOffset); for (u32 i = 0; i < acceptCount; i++) { const struct NFAAccept *a = &acceptTable[i]; - if (TESTBIT_STATE(s, a->state)) { + if (TESTBIT_STATE(*s, a->state)) { DEBUG_PRINTF("state %u is on, firing report id=%u, offset=%llu\n", a->state, a->externalId, offset); int rv = callback(0, offset, a->externalId, context); @@ -125,14 +124,14 @@ char PROCESS_ACCEPTS_FN(const IMPL_NFA_T *limex, STATE_T *s, } if (a->squash != MO_INVALID_IDX) { assert(a->squash < limex->squashCount); - const STATE_T *sq = &squashMasks[a->squash]; + const ENG_STATE_T *sq = &squashMasks[a->squash]; DEBUG_PRINTF("squash mask %u @ %p\n", a->squash, sq); - squash = AND_STATE(squash, LOAD_STATE(sq)); + squash = AND_STATE(squash, LOAD_FROM_ENG(sq)); } } } - STORE_STATE(s, AND_STATE(LOAD_STATE(s), squash)); + *s = AND_STATE(*s, squash); return 0; } @@ -147,7 +146,7 @@ char PROCESS_ACCEPTS_NOSQUASH_FN(const STATE_T *s, for (u32 i = 0; i < acceptCount; i++) { const struct NFAAccept *a = &acceptTable[i]; - if (TESTBIT_STATE(s, a->state)) { + if (TESTBIT_STATE(*s, a->state)) { DEBUG_PRINTF("state %u is on, firing report id=%u, offset=%llu\n", a->state, a->externalId, offset); int rv = callback(0, offset, a->externalId, context); @@ -172,8 +171,8 @@ char TESTEOD_FN(const IMPL_NFA_T *limex, const STATE_T *s, return MO_CONTINUE_MATCHING; } - const STATE_T acceptEodMask = LOAD_STATE(&limex->acceptAtEOD); - STATE_T foundAccepts = AND_STATE(LOAD_STATE(s), acceptEodMask); + const STATE_T acceptEodMask = LOAD_FROM_ENG(&limex->acceptAtEOD); + STATE_T foundAccepts = AND_STATE(*s, acceptEodMask); if (do_br) { SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state, @@ -204,8 +203,8 @@ char TESTEOD_REV_FN(const IMPL_NFA_T *limex, const STATE_T *s, u64a offset, return MO_CONTINUE_MATCHING; } - STATE_T acceptEodMask = LOAD_STATE(&limex->acceptAtEOD); - STATE_T foundAccepts = AND_STATE(LOAD_STATE(s), acceptEodMask); + STATE_T acceptEodMask = LOAD_FROM_ENG(&limex->acceptAtEOD); + STATE_T foundAccepts = AND_STATE(*s, acceptEodMask); assert(!limex->repeatCount); @@ -228,8 +227,8 @@ char REPORTCURRENT_FN(const IMPL_NFA_T *limex, const struct mq *q) { assert(q->state); assert(q_cur_type(q) == MQE_START); - STATE_T s = LOAD_STATE(q->state); - STATE_T acceptMask = LOAD_STATE(&limex->accept); + STATE_T s = *(STATE_T *)q->state; + STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept); STATE_T foundAccepts = AND_STATE(s, acceptMask); if (unlikely(ISNONZERO_STATE(foundAccepts))) { @@ -250,7 +249,7 @@ char REPORTCURRENT_FN(const IMPL_NFA_T *limex, const struct mq *q) { static really_inline STATE_T INITIAL_FN(const IMPL_NFA_T *impl, char onlyDs) { - return LOAD_STATE(onlyDs ? &impl->initDS : &impl->init); + return LOAD_FROM_ENG(onlyDs ? &impl->initDS : &impl->init); } static really_inline @@ -261,9 +260,9 @@ STATE_T TOP_FN(const IMPL_NFA_T *impl, char onlyDs, STATE_T state) { static really_inline STATE_T TOPN_FN(const IMPL_NFA_T *limex, STATE_T state, u32 n) { assert(n < limex->topCount); - const STATE_T *topsptr = - (const STATE_T *)((const char *)limex + limex->topOffset); - STATE_T top = LOAD_STATE(&topsptr[n]); + const ENG_STATE_T *topsptr = + (const ENG_STATE_T *)((const char *)limex + limex->topOffset); + STATE_T top = LOAD_FROM_ENG(&topsptr[n]); return OR_STATE(top, state); } @@ -279,8 +278,8 @@ void EXPIRE_ESTATE_FN(const IMPL_NFA_T *limex, struct CONTEXT_T *ctx, DEBUG_PRINTF("expire estate at offset %llu\n", offset); - const STATE_T cyclics = - AND_STATE(LOAD_STATE(&ctx->s), LOAD_STATE(&limex->repeatCyclicMask)); + const STATE_T cyclics + = AND_STATE(ctx->s, LOAD_FROM_ENG(&limex->repeatCyclicMask)); if (ISZERO_STATE(cyclics)) { DEBUG_PRINTF("no cyclic states are on\n"); return; @@ -290,7 +289,7 @@ void EXPIRE_ESTATE_FN(const IMPL_NFA_T *limex, struct CONTEXT_T *ctx, const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i); u32 cyclicState = info->cyclicState; - if (!TESTBIT_STATE(&cyclics, cyclicState)) { + if (!TESTBIT_STATE(cyclics, cyclicState)) { continue; } @@ -310,14 +309,14 @@ void EXPIRE_ESTATE_FN(const IMPL_NFA_T *limex, struct CONTEXT_T *ctx, last_top, repeat->repeatMax); u64a adj = 0; /* if the cycle's tugs are active at repeat max, it is still alive */ - if (TESTBIT_STATE((const STATE_T *)&limex->accept, cyclicState) || - TESTBIT_STATE((const STATE_T *)&limex->acceptAtEOD, cyclicState)) { + if (TESTBIT_STATE(LOAD_FROM_ENG(&limex->accept), cyclicState) || + TESTBIT_STATE(LOAD_FROM_ENG(&limex->acceptAtEOD), cyclicState)) { DEBUG_PRINTF("lazy tug possible - may still be inspected\n"); adj = 1; } else { - const STATE_T *tug_mask = - (const STATE_T *)((const char *)info + info->tugMaskOffset); - if (ISNONZERO_STATE(AND_STATE(ctx->s, LOAD_STATE(tug_mask)))) { + const ENG_STATE_T *tug_mask = + (const ENG_STATE_T *)((const char *)info + info->tugMaskOffset); + if (ISNONZERO_STATE(AND_STATE(ctx->s, LOAD_FROM_ENG(tug_mask)))) { DEBUG_PRINTF("tug possible - may still be inspected\n"); adj = 1; } @@ -339,7 +338,7 @@ char LIMEX_INACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state, u64a offset, ReportID report) { assert(limex); - const STATE_T acceptMask = LOAD_STATE(&limex->accept); + const STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept); STATE_T accstate = AND_STATE(state, acceptMask); // Are we in an accept state? @@ -355,7 +354,7 @@ char LIMEX_INACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state, #ifdef DEBUG DEBUG_PRINTF("accept states that are on: "); for (u32 i = 0; i < sizeof(STATE_T) * 8; i++) { - if (TESTBIT_STATE(&accstate, i)) printf("%u ", i); + if (TESTBIT_STATE(accstate, i)) printf("%u ", i); } printf("\n"); #endif @@ -366,7 +365,7 @@ char LIMEX_INACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state, const struct NFAAccept *a = &acceptTable[i]; DEBUG_PRINTF("checking idx=%u, externalId=%u\n", a->state, a->externalId); - if (a->externalId == report && TESTBIT_STATE(&accstate, a->state)) { + if (a->externalId == report && TESTBIT_STATE(accstate, a->state)) { DEBUG_PRINTF("report is on!\n"); return 1; } @@ -381,7 +380,7 @@ char LIMEX_INANYACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state, u64a offset) { assert(limex); - const STATE_T acceptMask = LOAD_STATE(&limex->accept); + const STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept); STATE_T accstate = AND_STATE(state, acceptMask); // Are we in an accept state? @@ -407,8 +406,6 @@ char LIMEX_INANYACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state, #undef CONTEXT_T #undef IMPL_NFA_T #undef ONES_STATE -#undef LOAD_STATE -#undef STORE_STATE #undef AND_STATE #undef OR_STATE #undef ANDNOT_STATE @@ -420,7 +417,3 @@ char LIMEX_INANYACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state, #undef PROCESS_ACCEPTS_NOSQUASH_FN #undef SQUASH_UNTUG_BR_FN #undef GET_NFA_REPEAT_INFO_FN - -#undef SIZE -#undef STATE_T -#undef INLINE_ATTR diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index 77754e0b..8d7343e5 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -1438,7 +1438,8 @@ struct Factory { sizeof(limex->init), stateSize, repeatscratchStateSize, repeatStreamState); - size_t scratchStateSize = sizeof(limex->init); + size_t scratchStateSize = NFATraits::scratch_state_size; + if (repeatscratchStateSize) { scratchStateSize = ROUNDUP_N(scratchStateSize, alignof(RepeatControl)); @@ -2021,13 +2022,6 @@ struct Factory { sz = 32; } - // Special case: with SIMD available, we definitely prefer using - // 128-bit NFAs over 64-bit ones given the paucity of registers - // available. - if (sz == 64) { - sz = 128; - } - if (args.cc.grey.nfaForceSize) { sz = args.cc.grey.nfaForceSize; } @@ -2067,9 +2061,12 @@ struct scoreNfa { typedef u_##mlt_size tableRow_t; \ typedef NFAException##mlt_size exception_t; \ static const size_t maxStates = mlt_size; \ + static const size_t scratch_state_size = mlt_size == 64 ? sizeof(m128) \ + : sizeof(tableRow_t); \ }; MAKE_LIMEX_TRAITS(32) +MAKE_LIMEX_TRAITS(64) MAKE_LIMEX_TRAITS(128) MAKE_LIMEX_TRAITS(256) MAKE_LIMEX_TRAITS(384) diff --git a/src/nfa/limex_context.h b/src/nfa/limex_context.h index 74f22c32..60d20879 100644 --- a/src/nfa/limex_context.h +++ b/src/nfa/limex_context.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -39,6 +39,16 @@ // Runtime context structures. +/* Note: The size of the context structures may vary from platform to platform + * (notably, for the Limex64 structure). As a result, information based on the + * size and other detail of these structures should not be written into the + * bytecode -- really, the details of the structure should not be accessed by + * the ue2 compile side at all. + */ +#ifdef __cplusplus +#error ue2 runtime only file +#endif + /* cached_estate/esucc etc... * * If the exception state matches the cached_estate we will apply @@ -66,6 +76,11 @@ struct ALIGN_CL_DIRECTIVE NFAContext##nsize { \ }; GEN_CONTEXT_STRUCT(32, u32) +#ifdef ARCH_64_BIT +GEN_CONTEXT_STRUCT(64, u64a) +#else +GEN_CONTEXT_STRUCT(64, m128) +#endif GEN_CONTEXT_STRUCT(128, m128) GEN_CONTEXT_STRUCT(256, m256) GEN_CONTEXT_STRUCT(384, m384) diff --git a/src/nfa/limex_dump.cpp b/src/nfa/limex_dump.cpp index 8b6b7015..181951dc 100644 --- a/src/nfa/limex_dump.cpp +++ b/src/nfa/limex_dump.cpp @@ -70,6 +70,10 @@ template<> struct limex_traits { static const u32 size = 128; typedef NFAException128 exception_type; }; +template<> struct limex_traits { + static const u32 size = 64; + typedef NFAException64 exception_type; +}; template<> struct limex_traits { static const u32 size = 32; typedef NFAException32 exception_type; @@ -486,6 +490,7 @@ void dumpLimDotInfo(const limex_type *limex, u32 state, FILE *f) { DUMP_DOT_FN(size) LIMEX_DUMP_FNS(32) +LIMEX_DUMP_FNS(64) LIMEX_DUMP_FNS(128) LIMEX_DUMP_FNS(256) LIMEX_DUMP_FNS(384) diff --git a/src/nfa/limex_exceptional.h b/src/nfa/limex_exceptional.h index 175ca393..c8296f91 100644 --- a/src/nfa/limex_exceptional.h +++ b/src/nfa/limex_exceptional.h @@ -32,8 +32,8 @@ * X-macro generic impl, included into the various LimEx model implementations. */ -#if !defined(SIZE) || !defined(STATE_T) -# error Must define SIZE and STATE_T in includer. +#if !defined(SIZE) || !defined(STATE_T) || !defined(LOAD_FROM_ENG) +# error Must define SIZE, STATE_T, LOAD_FROM_ENG in includer. #endif #include "config.h" @@ -44,8 +44,6 @@ #define PE_FN JOIN(processExceptional, SIZE) #define RUN_EXCEPTION_FN JOIN(runException, SIZE) #define ZERO_STATE JOIN(zero_, STATE_T) -#define LOAD_STATE JOIN(load_, STATE_T) -#define STORE_STATE JOIN(store_, STATE_T) #define AND_STATE JOIN(and_, STATE_T) #define EQ_STATE(a, b) (!JOIN(noteq_, STATE_T)((a), (b))) #define OR_STATE JOIN(or_, STATE_T) @@ -59,7 +57,7 @@ #define ESTATE_ARG STATE_T estate #else #define ESTATE_ARG const STATE_T *estatep -#define estate LOAD_STATE(estatep) +#define estate (*estatep) #endif #ifdef STATE_ON_STACK @@ -133,7 +131,7 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG, char *repeat_state = ctx->repeat_state + info->stateOffset; if (e->trigger == LIMEX_TRIGGER_POS) { - char cyclic_on = TESTBIT_STATE(STATE_ARG_P, info->cyclicState); + char cyclic_on = TESTBIT_STATE(*STATE_ARG_P, info->cyclicState); processPosTrigger(repeat, repeat_ctrl, repeat_state, offset, cyclic_on); *cacheable = DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES; @@ -149,8 +147,7 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG, *cacheable = DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES; DEBUG_PRINTF("stale history, squashing cyclic state\n"); assert(e->hasSquash == LIMEX_SQUASH_TUG); - STORE_STATE(succ, AND_STATE(LOAD_STATE(succ), - LOAD_STATE(&e->squash))); + *succ = AND_STATE(*succ, LOAD_FROM_ENG(&e->squash)); return 1; // continue } else if (rv == TRIGGER_SUCCESS_CACHE) { new_cache->br = 1; @@ -188,18 +185,16 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG, // Most exceptions have a set of successors to switch on. `local_succ' is // ORed into `succ' at the end of the caller's loop. #ifndef BIG_MODEL - *local_succ = OR_STATE(*local_succ, LOAD_STATE(&e->successors)); + *local_succ = OR_STATE(*local_succ, LOAD_FROM_ENG(&e->successors)); #else - STORE_STATE(&ctx->local_succ, OR_STATE(LOAD_STATE(&ctx->local_succ), - LOAD_STATE(&e->successors))); + ctx->local_succ = OR_STATE(ctx->local_succ, LOAD_FROM_ENG(&e->successors)); #endif // Some exceptions squash states behind them. Note that we squash states in // 'succ', not local_succ. - if (e->hasSquash == LIMEX_SQUASH_CYCLIC || - e->hasSquash == LIMEX_SQUASH_REPORT) { - STORE_STATE(succ, AND_STATE(LOAD_STATE(succ), - LOAD_STATE(&e->squash))); + if (e->hasSquash == LIMEX_SQUASH_CYCLIC + || e->hasSquash == LIMEX_SQUASH_REPORT) { + *succ = AND_STATE(*succ, LOAD_FROM_ENG(&e->squash)); if (*cacheable == CACHE_RESULT) { *cacheable = DO_NOT_CACHE_RESULT; } @@ -219,9 +214,9 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ, char in_rev, char flags) { assert(diffmask > 0); // guaranteed by caller macro - if (EQ_STATE(estate, LOAD_STATE(&ctx->cached_estate))) { + if (EQ_STATE(estate, ctx->cached_estate)) { DEBUG_PRINTF("using cached succ from previous state\n"); - STORE_STATE(succ, OR_STATE(LOAD_STATE(succ), LOAD_STATE(&ctx->cached_esucc))); + *succ = OR_STATE(*succ, ctx->cached_esucc); if (ctx->cached_reports && (flags & CALLBACK_OUTPUT)) { DEBUG_PRINTF("firing cached reports from previous state\n"); if (unlikely(limexRunReports(ctx->cached_reports, ctx->callback, @@ -236,7 +231,7 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ, #ifndef BIG_MODEL STATE_T local_succ = ZERO_STATE; #else - STORE_STATE(&ctx->local_succ, ZERO_STATE); + ctx->local_succ = ZERO_STATE; #endif // A copy of the estate as an array of GPR-sized chunks. @@ -254,7 +249,7 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ, u32 base_index[sizeof(STATE_T) / sizeof(CHUNK_T)]; base_index[0] = 0; - for (u32 i = 0; i < ARRAY_LENGTH(base_index) - 1; i++) { + for (s32 i = 0; i < (s32)ARRAY_LENGTH(base_index) - 1; i++) { base_index[i + 1] = base_index[i] + POPCOUNT_FN(emask_chunks[i]); } @@ -284,23 +279,23 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ, } while (diffmask); #ifndef BIG_MODEL - STORE_STATE(succ, OR_STATE(LOAD_STATE(succ), local_succ)); + *succ = OR_STATE(*succ, local_succ); #else - STORE_STATE(succ, OR_STATE(LOAD_STATE(succ), ctx->local_succ)); + *succ = OR_STATE(*succ, ctx->local_succ); #endif if (cacheable == CACHE_RESULT) { - STORE_STATE(&ctx->cached_estate, estate); + ctx->cached_estate = estate; #ifndef BIG_MODEL ctx->cached_esucc = local_succ; #else - STORE_STATE(&ctx->cached_esucc, LOAD_STATE(&ctx->local_succ)); + ctx->cached_esucc = ctx->local_succ; #endif ctx->cached_reports = new_cache.reports; ctx->cached_br = new_cache.br; } else if (cacheable == DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES) { if (ctx->cached_br) { - STORE_STATE(&ctx->cached_estate, ZERO_STATE); + ctx->cached_estate = ZERO_STATE; } } @@ -314,8 +309,6 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ, #undef EQ_STATE #undef OR_STATE #undef TESTBIT_STATE -#undef LOAD_STATE -#undef STORE_STATE #undef PE_FN #undef RUN_EXCEPTION_FN #undef CONTEXT_T @@ -337,7 +330,3 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ, #undef FIND_AND_CLEAR_FN #undef IMPL_NFA_T #undef GET_NFA_REPEAT_INFO_FN - -// Parameters. -#undef SIZE -#undef STATE_T diff --git a/src/nfa/limex_internal.h b/src/nfa/limex_internal.h index c37f5f40..03ebb384 100644 --- a/src/nfa/limex_internal.h +++ b/src/nfa/limex_internal.h @@ -160,6 +160,7 @@ struct LimExNFA##size { \ }; CREATE_NFA_LIMEX(32) +CREATE_NFA_LIMEX(64) CREATE_NFA_LIMEX(128) CREATE_NFA_LIMEX(256) CREATE_NFA_LIMEX(384) diff --git a/src/nfa/limex_native.c b/src/nfa/limex_native.c index 8a0a8acd..c9949836 100644 --- a/src/nfa/limex_native.c +++ b/src/nfa/limex_native.c @@ -49,12 +49,13 @@ #include "limex_runtime.h" // Other implementation code from X-Macro impl. -#define SIZE 32 -#define STATE_T u32 +#define SIZE 32 +#define STATE_T u32 +#define ENG_STATE_T u32 +#define LOAD_FROM_ENG load_u32 + #include "limex_state_impl.h" -#define SIZE 32 -#define STATE_T u32 #define INLINE_ATTR really_inline #include "limex_common_impl.h" @@ -64,8 +65,6 @@ // Process exceptional states -#define SIZE 32 -#define STATE_T u32 #define STATE_ON_STACK #define ESTATE_ON_STACK #define RUN_EXCEPTION_FN_ONLY @@ -128,7 +127,4 @@ int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ, } // 32-bit models. - -#define SIZE 32 -#define STATE_T u32 #include "limex_runtime_impl.h" diff --git a/src/nfa/limex_runtime.h b/src/nfa/limex_runtime.h index e0c182fc..75094ef6 100644 --- a/src/nfa/limex_runtime.h +++ b/src/nfa/limex_runtime.h @@ -30,8 +30,8 @@ \brief Limex Execution Engine Or: How I Learned To Stop Worrying And Love The Preprocessor - This file includes utility functions which do not depend on the state size or - shift masks directly. + This file includes utility functions which do not depend on the size of the + state or shift masks directly. */ #ifndef LIMEX_RUNTIME_H @@ -72,41 +72,6 @@ struct proto_cache { const ReportID *reports; }; -// Shift macros for Limited NFAs. Defined in terms of uniform ops. -// LimExNFAxxx ptr in 'limex' and the current state in 's' -#define NFA_EXEC_LIM_SHIFT(nels_type, nels_i) \ - (JOIN(lshift_, nels_type)( \ - JOIN(and_, nels_type)(s, \ - JOIN(load_, nels_type)(&limex->shift[nels_i])), \ - limex->shiftAmount[nels_i])) - -// Calculate the (limited model) successors for a number of variable shifts. -// Assumes current state in 's' and successors in 'succ'. - -#define NFA_EXEC_GET_LIM_SUCC(gls_type) \ - do { \ - succ = NFA_EXEC_LIM_SHIFT(gls_type, 0); \ - switch (limex->shiftCount) { \ - case 8: \ - succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 7)); \ - case 7: \ - succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 6)); \ - case 6: \ - succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 5)); \ - case 5: \ - succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 4)); \ - case 4: \ - succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 3)); \ - case 3: \ - succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 2)); \ - case 2: \ - succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 1)); \ - case 1: \ - case 0: \ - ; \ - } \ - } while (0) - #define PE_RV_HALT 1 #ifdef STATE_ON_STACK @@ -170,6 +135,7 @@ int limexRunReports(const ReportID *reports, NfaCallback callback, } MAKE_GET_NFA_REPEAT_INFO(32) +MAKE_GET_NFA_REPEAT_INFO(64) MAKE_GET_NFA_REPEAT_INFO(128) MAKE_GET_NFA_REPEAT_INFO(256) MAKE_GET_NFA_REPEAT_INFO(384) diff --git a/src/nfa/limex_runtime_impl.h b/src/nfa/limex_runtime_impl.h index 881e41fd..644ddd6a 100644 --- a/src/nfa/limex_runtime_impl.h +++ b/src/nfa/limex_runtime_impl.h @@ -29,7 +29,6 @@ #include "util/join.h" #include - /** \file * \brief Limex Execution Engine Or: * How I Learned To Stop Worrying And Love The Preprocessor @@ -37,8 +36,9 @@ * Version 2.0: now with X-Macros, so you get line numbers in your debugger. */ -#if !defined(SIZE) || !defined(STATE_T) -# error Must define SIZE and STATE_T in includer. + +#if !defined(SIZE) || !defined(STATE_T) || !defined(LOAD_FROM_ENG) +# error Must define SIZE, STATE_T, LOAD_FROM_ENG in includer. #endif #define LIMEX_API_ROOT JOIN(nfaExecLimEx, SIZE) @@ -67,11 +67,10 @@ #define STREAMSILENT_FN JOIN(LIMEX_API_ROOT, _Stream_Silent) #define CONTEXT_T JOIN(NFAContext, SIZE) #define EXCEPTION_T JOIN(struct NFAException, SIZE) -#define LOAD_STATE JOIN(load_, STATE_T) -#define STORE_STATE JOIN(store_, STATE_T) #define AND_STATE JOIN(and_, STATE_T) #define ANDNOT_STATE JOIN(andnot_, STATE_T) #define OR_STATE JOIN(or_, STATE_T) +#define LSHIFT_STATE JOIN(lshift_, STATE_T) #define TESTBIT_STATE JOIN(testbit_, STATE_T) #define CLEARBIT_STATE JOIN(clearbit_, STATE_T) #define ZERO_STATE JOIN(zero_, STATE_T) @@ -96,9 +95,9 @@ #define ACCEL_AND_FRIENDS_MASK accel_and_friendsMask #define EXCEPTION_MASK exceptionMask #else -#define ACCEL_MASK LOAD_STATE(&limex->accel) -#define ACCEL_AND_FRIENDS_MASK LOAD_STATE(&limex->accel_and_friends) -#define EXCEPTION_MASK LOAD_STATE(&limex->exceptionMask) +#define ACCEL_MASK LOAD_FROM_ENG(&limex->accel) +#define ACCEL_AND_FRIENDS_MASK LOAD_FROM_ENG(&limex->accel_and_friends) +#define EXCEPTION_MASK LOAD_FROM_ENG(&limex->exceptionMask) #endif // Run exception processing, if necessary. Returns 0 if scanning should @@ -117,13 +116,13 @@ char RUN_EXCEPTIONS_FN(const IMPL_NFA_T *limex, const EXCEPTION_T *exceptions, } if (first_match && i) { - STATE_T acceptMask = LOAD_STATE(&limex->accept); + STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept); STATE_T foundAccepts = AND_STATE(s, acceptMask); if (unlikely(ISNONZERO_STATE(foundAccepts))) { DEBUG_PRINTF("first match at %zu\n", i); DEBUG_PRINTF("for nfa %p\n", limex); assert(final_loc); - STORE_STATE(&ctx->s, s); + ctx->s = s; *final_loc = i; return 1; // Halt matching. } @@ -161,22 +160,56 @@ size_t RUN_ACCEL_FN(const STATE_T s, UNUSED const STATE_T accelMask, return j; } +// Shift macros for Limited NFAs. Defined in terms of uniform ops. +// LimExNFAxxx ptr in 'limex' and the current state in 's' +#define NFA_EXEC_LIM_SHIFT(limex_m, curr_m, shift_idx) \ + LSHIFT_STATE(AND_STATE(curr_m, LOAD_FROM_ENG(&limex_m->shift[shift_idx])), \ + limex_m->shiftAmount[shift_idx]) + +// Calculate the (limited model) successors for a number of variable shifts. +// Assumes current state in 'curr_m' and places the successors in 'succ_m'. +#define NFA_EXEC_GET_LIM_SUCC(limex_m, curr_m, succ_m) \ + do { \ + succ_m = NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 0); \ + switch (limex_m->shiftCount) { \ + case 8: \ + succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 7)); \ + case 7: \ + succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 6)); \ + case 6: \ + succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 5)); \ + case 5: \ + succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 4)); \ + case 4: \ + succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 3)); \ + case 3: \ + succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 2)); \ + case 2: \ + succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 1)); \ + case 1: \ + case 0: \ + ; \ + } \ + } while (0) + + static really_inline char STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, struct CONTEXT_T *ctx, u64a offset, const char flags, u64a *final_loc, const char first_match) { - const STATE_T *reach = (const STATE_T *)((const char *)limex + sizeof(*limex)); + const ENG_STATE_T *reach = get_reach_table(limex); #if SIZE < 256 - const STATE_T accelMask = LOAD_STATE(&limex->accel); - const STATE_T accel_and_friendsMask = LOAD_STATE(&limex->accel_and_friends); - const STATE_T exceptionMask = LOAD_STATE(&limex->exceptionMask); + const STATE_T accelMask = LOAD_FROM_ENG(&limex->accel); + const STATE_T accel_and_friendsMask + = LOAD_FROM_ENG(&limex->accel_and_friends); + const STATE_T exceptionMask = LOAD_FROM_ENG(&limex->exceptionMask); #endif const u8 *accelTable = (const u8 *)((const char *)limex + limex->accelTableOffset); const union AccelAux *accelAux = (const union AccelAux *)((const char *)limex + limex->accelAuxOffset); const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex); const ReportID *exReports = getExReports(limex); - STATE_T s = LOAD_STATE(&ctx->s); + STATE_T s = ctx->s; /* assert(ISALIGNED_16(exceptions)); */ /* assert(ISALIGNED_16(reach)); */ @@ -195,13 +228,13 @@ without_accel: DUMP_INPUT(i); if (ISZERO_STATE(s)) { DEBUG_PRINTF("no states are switched on, early exit\n"); - STORE_STATE(&ctx->s, s); + ctx->s = s; return MO_CONTINUE_MATCHING; } u8 c = input[i]; STATE_T succ; - NFA_EXEC_GET_LIM_SUCC(STATE_T); + NFA_EXEC_GET_LIM_SUCC(limex, s, succ); if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, s, EXCEPTION_MASK, i, offset, &succ, final_loc, ctx, flags, 0, @@ -209,7 +242,7 @@ without_accel: return MO_HALT_MATCHING; } - s = AND_STATE(succ, LOAD_STATE(&reach[limex->reachMap[c]])); + s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]])); } with_accel: @@ -252,7 +285,7 @@ with_accel: u8 c = input[i]; STATE_T succ; - NFA_EXEC_GET_LIM_SUCC(STATE_T); + NFA_EXEC_GET_LIM_SUCC(limex, s, succ); if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, s, EXCEPTION_MASK, i, offset, &succ, final_loc, ctx, flags, 0, @@ -260,20 +293,20 @@ with_accel: return MO_HALT_MATCHING; } - s = AND_STATE(succ, LOAD_STATE(&reach[limex->reachMap[c]])); + s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]])); } - STORE_STATE(&ctx->s, s); + ctx->s = s; if ((first_match || (flags & CALLBACK_OUTPUT)) && limex->acceptCount) { - STATE_T acceptMask = LOAD_STATE(&limex->accept); + STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept); const struct NFAAccept *acceptTable = getAcceptTable(limex); const u32 acceptCount = limex->acceptCount; STATE_T foundAccepts = AND_STATE(s, acceptMask); if (unlikely(ISNONZERO_STATE(foundAccepts))) { if (first_match) { - STORE_STATE(&ctx->s, s); + ctx->s = s; assert(final_loc); *final_loc = length; return MO_HALT_MATCHING; @@ -294,13 +327,13 @@ with_accel: static never_inline char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, struct CONTEXT_T *ctx, u64a offset) { - const STATE_T *reach = (const STATE_T *)((const char *)limex + sizeof(*limex)); + const ENG_STATE_T *reach = get_reach_table(limex); #if SIZE < 256 - const STATE_T exceptionMask = LOAD_STATE(&limex->exceptionMask); + const STATE_T exceptionMask = LOAD_FROM_ENG(&limex->exceptionMask); #endif const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex); const ReportID *exReports = getExReports(limex); - STATE_T s = LOAD_STATE(&ctx->s); + STATE_T s = ctx->s; /* assert(ISALIGNED_16(exceptions)); */ /* assert(ISALIGNED_16(reach)); */ @@ -311,13 +344,13 @@ char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, DUMP_INPUT(i-1); if (ISZERO_STATE(s)) { DEBUG_PRINTF("no states are switched on, early exit\n"); - STORE_STATE(&ctx->s, s); + ctx->s = s; return MO_CONTINUE_MATCHING; } u8 c = input[i-1]; STATE_T succ; - NFA_EXEC_GET_LIM_SUCC(STATE_T); + NFA_EXEC_GET_LIM_SUCC(limex, s, succ); if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, s, EXCEPTION_MASK, i, offset, &succ, final_loc, ctx, @@ -325,12 +358,12 @@ char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, return MO_HALT_MATCHING; } - s = AND_STATE(succ, reach[limex->reachMap[c]]); + s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]])); } - STORE_STATE(&ctx->s, s); + ctx->s = s; - STATE_T acceptMask = LOAD_STATE(&limex->accept); + STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept); const struct NFAAccept *acceptTable = getAcceptTable(limex); const u32 acceptCount = limex->acceptCount; assert(flags & CALLBACK_OUTPUT); @@ -354,9 +387,9 @@ void COMPRESS_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, void *src, return; } - STATE_T s = LOAD_STATE(src); + STATE_T s = *(STATE_T *)src; - if (ISZERO_STATE(AND_STATE(s, LOAD_STATE(&limex->repeatCyclicMask)))) { + if (ISZERO_STATE(AND_STATE(LOAD_FROM_ENG(&limex->repeatCyclicMask), s))) { DEBUG_PRINTF("no cyclics are on\n"); return; } @@ -369,7 +402,7 @@ void COMPRESS_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, void *src, DEBUG_PRINTF("repeat %u\n", i); const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i); - if (!TESTBIT_STATE(&s, info->cyclicState)) { + if (!TESTBIT_STATE(s, info->cyclicState)) { DEBUG_PRINTF("is dead\n"); continue; } @@ -388,7 +421,7 @@ void COMPRESS_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, void *src, offset); } - STORE_STATE(src, s); + *(STATE_T *)src = s; } char JOIN(LIMEX_API_ROOT, _queueCompressState)(const struct NFA *n, @@ -411,7 +444,7 @@ void EXPAND_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, const void *src, // Note: state has already been expanded into 'dest'. const STATE_T cyclics = - AND_STATE(LOAD_STATE(dest), LOAD_STATE(&limex->repeatCyclicMask)); + AND_STATE(*(STATE_T *)dest, LOAD_FROM_ENG(&limex->repeatCyclicMask)); if (ISZERO_STATE(cyclics)) { DEBUG_PRINTF("no cyclics are on\n"); return; @@ -425,7 +458,7 @@ void EXPAND_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, const void *src, DEBUG_PRINTF("repeat %u\n", i); const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i); - if (!TESTBIT_STATE(&cyclics, info->cyclicState)) { + if (!TESTBIT_STATE(cyclics, info->cyclicState)) { DEBUG_PRINTF("is dead\n"); continue; } @@ -447,9 +480,8 @@ char JOIN(LIMEX_API_ROOT, _expandState)(const struct NFA *n, void *dest, return 0; } -char JOIN(LIMEX_API_ROOT, _queueInitState)(const struct NFA *n, - struct mq *q) { - STORE_STATE(q->state, ZERO_STATE); +char JOIN(LIMEX_API_ROOT, _queueInitState)(const struct NFA *n, struct mq *q) { + *(STATE_T *)q->state = ZERO_STATE; // Zero every bounded repeat control block in state. const IMPL_NFA_T *limex = getImplNfa(n); @@ -529,7 +561,7 @@ void JOIN(LIMEX_API_ROOT, _HandleEvent)(const IMPL_NFA_T *limex, u32 e = q->items[q->cur].type; switch (e) { DEFINE_CASE(MQE_TOP) - STORE_STATE(&ctx->s, TOP_FN(limex, !!sp, LOAD_STATE(&ctx->s))); + ctx->s = TOP_FN(limex, !!sp, ctx->s); break; DEFINE_CASE(MQE_START) break; @@ -539,8 +571,7 @@ void JOIN(LIMEX_API_ROOT, _HandleEvent)(const IMPL_NFA_T *limex, assert(e >= MQE_TOP_FIRST); assert(e < MQE_INVALID); DEBUG_PRINTF("MQE_TOP + %d\n", ((int)e - MQE_TOP_FIRST)); - STORE_STATE(&ctx->s, - TOPN_FN(limex, LOAD_STATE(&ctx->s), e - MQE_TOP_FIRST)); + ctx->s = TOPN_FN(limex, ctx->s, e - MQE_TOP_FIRST); } #undef DEFINE_CASE } @@ -570,12 +601,12 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) { ctx.repeat_state = q->streamState + limex->stateSize; ctx.callback = q->cb; ctx.context = q->context; - STORE_STATE(&ctx.cached_estate, ZERO_STATE); + ctx.cached_estate = ZERO_STATE; ctx.cached_br = 0; assert(q->items[q->cur].location >= 0); DEBUG_PRINTF("LOAD STATE\n"); - STORE_STATE(&ctx.s, LOAD_STATE(q->state)); + ctx.s = *(STATE_T *)q->state; assert(q->items[q->cur].type == MQE_START); u64a offset = q->offset; @@ -599,7 +630,7 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) { assert(ep - offset <= q->length); if (STREAMCB_FN(limex, q->buffer + sp - offset, ep - sp, &ctx, sp) == MO_HALT_MATCHING) { - STORE_STATE(q->state, ZERO_STATE); + *(STATE_T *)q->state = ZERO_STATE; return 0; } @@ -616,7 +647,7 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) { q->items[q->cur].type = MQE_START; q->items[q->cur].location = sp - offset; DEBUG_PRINTF("bailing q->cur %u q->end %u\n", q->cur, q->end); - STORE_STATE(q->state, LOAD_STATE(&ctx.s)); + *(STATE_T *)q->state = ctx.s; return MO_ALIVE; } @@ -628,7 +659,7 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) { EXPIRE_ESTATE_FN(limex, &ctx, sp); DEBUG_PRINTF("END\n"); - STORE_STATE(q->state, LOAD_STATE(&ctx.s)); + *(STATE_T *)q->state = ctx.s; if (q->cur != q->end) { q->cur--; @@ -637,7 +668,7 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) { return MO_ALIVE; } - return ISNONZERO_STATE(LOAD_STATE(&ctx.s)); + return ISNONZERO_STATE(ctx.s); } /* used by suffix execution in Rose */ @@ -665,11 +696,11 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) { ctx.repeat_state = q->streamState + limex->stateSize; ctx.callback = q->cb; ctx.context = q->context; - STORE_STATE(&ctx.cached_estate, ZERO_STATE); + ctx.cached_estate = ZERO_STATE; ctx.cached_br = 0; DEBUG_PRINTF("LOAD STATE\n"); - STORE_STATE(&ctx.s, LOAD_STATE(q->state)); + ctx.s = *(STATE_T *)q->state; assert(q->items[q->cur].type == MQE_START); u64a offset = q->offset; @@ -699,7 +730,7 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) { q->cur--; q->items[q->cur].type = MQE_START; q->items[q->cur].location = sp + final_look - offset; - STORE_STATE(q->state, LOAD_STATE(&ctx.s)); + *(STATE_T *)q->state = ctx.s; return MO_MATCHES_PENDING; } @@ -721,7 +752,7 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) { q->cur--; q->items[q->cur].type = MQE_START; q->items[q->cur].location = sp + final_look - offset; - STORE_STATE(q->state, LOAD_STATE(&ctx.s)); + *(STATE_T *)q->state = ctx.s; return MO_MATCHES_PENDING; } @@ -737,7 +768,7 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) { q->items[q->cur].type = MQE_START; q->items[q->cur].location = sp - offset; DEBUG_PRINTF("bailing q->cur %u q->end %u\n", q->cur, q->end); - STORE_STATE(q->state, LOAD_STATE(&ctx.s)); + *(STATE_T *)q->state = ctx.s; return MO_ALIVE; } @@ -749,7 +780,7 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) { EXPIRE_ESTATE_FN(limex, &ctx, sp); DEBUG_PRINTF("END\n"); - STORE_STATE(q->state, LOAD_STATE(&ctx.s)); + *(STATE_T *)q->state = ctx.s; if (q->cur != q->end) { q->cur--; @@ -758,7 +789,7 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) { return MO_ALIVE; } - return ISNONZERO_STATE(LOAD_STATE(&ctx.s)); + return ISNONZERO_STATE(ctx.s); } // Used for execution Rose prefix/infixes. @@ -777,11 +808,11 @@ char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q, ctx.repeat_state = q->streamState + limex->stateSize; ctx.callback = NULL; ctx.context = NULL; - STORE_STATE(&ctx.cached_estate, ZERO_STATE); + ctx.cached_estate = ZERO_STATE; ctx.cached_br = 0; DEBUG_PRINTF("LOAD STATE\n"); - STORE_STATE(&ctx.s, LOAD_STATE(q->state)); + ctx.s = *(STATE_T *)q->state; assert(q->items[q->cur].type == MQE_START); u64a offset = q->offset; @@ -793,7 +824,7 @@ char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q, if (n->maxWidth) { if (ep - sp > n->maxWidth) { sp = ep - n->maxWidth; - STORE_STATE(&ctx.s, INITIAL_FN(limex, !!sp)); + ctx.s = INITIAL_FN(limex, !!sp); } } assert(ep >= sp); @@ -832,14 +863,14 @@ char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q, DEBUG_PRINTF("END, nfa is %s\n", ISNONZERO_STATE(ctx.s) ? "still alive" : "dead"); - STORE_STATE(q->state, LOAD_STATE(&ctx.s)); + *(STATE_T *)q->state = ctx.s; - if (JOIN(limexInAccept, SIZE)(limex, LOAD_STATE(&ctx.s), ctx.repeat_ctrl, + if (JOIN(limexInAccept, SIZE)(limex, ctx.s, ctx.repeat_ctrl, ctx.repeat_state, sp + 1, report)) { return MO_MATCHES_PENDING; } - return ISNONZERO_STATE(LOAD_STATE(&ctx.s)); + return ISNONZERO_STATE(ctx.s); } char JOIN(LIMEX_API_ROOT, _testEOD)(const struct NFA *n, const char *state, @@ -875,11 +906,11 @@ char JOIN(LIMEX_API_ROOT, _B_Reverse)(const struct NFA *n, u64a offset, ctx.repeat_state = NULL; ctx.callback = cb; ctx.context = context; - STORE_STATE(&ctx.cached_estate, ZERO_STATE); + ctx.cached_estate = ZERO_STATE; ctx.cached_br = 0; const IMPL_NFA_T *limex = getImplNfa(n); - STORE_STATE(&ctx.s, INITIAL_FN(limex, 0)); // always anchored + ctx.s = INITIAL_FN(limex, 0); // always anchored // 'buf' may be null, for example when we're scanning at EOD time. if (buflen) { @@ -896,7 +927,7 @@ char JOIN(LIMEX_API_ROOT, _B_Reverse)(const struct NFA *n, u64a offset, REV_STREAM_FN(limex, hbuf, hlen, &ctx, offset); } - if (offset == 0 && ISNONZERO_STATE(LOAD_STATE(&ctx.s))) { + if (offset == 0 && ISNONZERO_STATE(ctx.s)) { TESTEOD_REV_FN(limex, &ctx.s, offset, cb, context); } @@ -913,7 +944,7 @@ char JOIN(LIMEX_API_ROOT, _inAccept)(const struct NFA *nfa, union RepeatControl *repeat_ctrl = getRepeatControlBase(q->state, sizeof(STATE_T)); char *repeat_state = q->streamState + limex->stateSize; - STATE_T state = LOAD_STATE(q->state); + STATE_T state = *(STATE_T *)q->state; u64a offset = q->offset + q_last_loc(q) + 1; return JOIN(limexInAccept, SIZE)(limex, state, repeat_ctrl, repeat_state, @@ -928,7 +959,7 @@ char JOIN(LIMEX_API_ROOT, _inAnyAccept)(const struct NFA *nfa, struct mq *q) { union RepeatControl *repeat_ctrl = getRepeatControlBase(q->state, sizeof(STATE_T)); char *repeat_state = q->streamState + limex->stateSize; - STATE_T state = LOAD_STATE(q->state); + STATE_T state = *(STATE_T *)q->state; u64a offset = q->offset + q_last_loc(q) + 1; return JOIN(limexInAnyAccept, SIZE)(limex, state, repeat_ctrl, repeat_state, @@ -941,8 +972,8 @@ enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)( s64a loc) { assert(nfa->flags & NFA_ZOMBIE); const IMPL_NFA_T *limex = getImplNfa(nfa); - STATE_T state = LOAD_STATE(q->state); - STATE_T zmask = LOAD_STATE(&limex->zombieMask); + STATE_T state = *(STATE_T *)q->state; + STATE_T zmask = LOAD_FROM_ENG(&limex->zombieMask); if (limex->repeatCount) { u64a offset = q->offset + loc + 1; @@ -981,11 +1012,10 @@ enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)( #undef STREAMSILENT_FN #undef CONTEXT_T #undef EXCEPTION_T -#undef LOAD_STATE -#undef STORE_STATE #undef AND_STATE #undef ANDNOT_STATE #undef OR_STATE +#undef LSHIFT_STATE #undef TESTBIT_STATE #undef CLEARBIT_STATE #undef ZERO_STATE @@ -999,8 +1029,4 @@ enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)( #undef ACCEL_MASK #undef ACCEL_AND_FRIENDS_MASK #undef EXCEPTION_MASK - -// Parameters. -#undef SIZE -#undef STATE_T #undef LIMEX_API_ROOT diff --git a/src/nfa/limex_simd128.c b/src/nfa/limex_simd128.c index f0fb1dd4..c5f2b33e 100644 --- a/src/nfa/limex_simd128.c +++ b/src/nfa/limex_simd128.c @@ -48,19 +48,16 @@ #include "limex_runtime.h" -#define SIZE 128 -#define STATE_T m128 +#define SIZE 128 +#define STATE_T m128 +#define ENG_STATE_T m128 +#define LOAD_FROM_ENG load_m128 + #include "limex_exceptional.h" -#define SIZE 128 -#define STATE_T m128 #include "limex_state_impl.h" -#define SIZE 128 -#define STATE_T m128 #define INLINE_ATTR really_inline #include "limex_common_impl.h" -#define SIZE 128 -#define STATE_T m128 #include "limex_runtime_impl.h" diff --git a/src/nfa/limex_simd256.c b/src/nfa/limex_simd256.c index 57648b69..cc232908 100644 --- a/src/nfa/limex_simd256.c +++ b/src/nfa/limex_simd256.c @@ -45,19 +45,16 @@ // Common code #include "limex_runtime.h" -#define SIZE 256 -#define STATE_T m256 +#define SIZE 256 +#define STATE_T m256 +#define ENG_STATE_T m256 +#define LOAD_FROM_ENG load_m256 + #include "limex_exceptional.h" -#define SIZE 256 -#define STATE_T m256 #include "limex_state_impl.h" -#define SIZE 256 -#define STATE_T m256 #define INLINE_ATTR really_inline #include "limex_common_impl.h" -#define SIZE 256 -#define STATE_T m256 #include "limex_runtime_impl.h" diff --git a/src/nfa/limex_simd384.c b/src/nfa/limex_simd384.c index 84061f61..7e596e48 100644 --- a/src/nfa/limex_simd384.c +++ b/src/nfa/limex_simd384.c @@ -45,19 +45,16 @@ // Common code #include "limex_runtime.h" -#define SIZE 384 -#define STATE_T m384 +#define SIZE 384 +#define STATE_T m384 +#define ENG_STATE_T m384 +#define LOAD_FROM_ENG load_m384 + #include "limex_exceptional.h" -#define SIZE 384 -#define STATE_T m384 #include "limex_state_impl.h" -#define SIZE 384 -#define STATE_T m384 #define INLINE_ATTR really_inline #include "limex_common_impl.h" -#define SIZE 384 -#define STATE_T m384 #include "limex_runtime_impl.h" diff --git a/src/nfa/limex_simd512.c b/src/nfa/limex_simd512.c index a6646d83..f779f335 100644 --- a/src/nfa/limex_simd512.c +++ b/src/nfa/limex_simd512.c @@ -45,19 +45,16 @@ // Common code #include "limex_runtime.h" -#define SIZE 512 -#define STATE_T m512 +#define SIZE 512 +#define STATE_T m512 +#define ENG_STATE_T m512 +#define LOAD_FROM_ENG load_m512 + #include "limex_exceptional.h" -#define SIZE 512 -#define STATE_T m512 #include "limex_state_impl.h" -#define SIZE 512 -#define STATE_T m512 #define INLINE_ATTR really_inline #include "limex_common_impl.h" -#define SIZE 512 -#define STATE_T m512 #include "limex_runtime_impl.h" diff --git a/src/nfa/limex_state_impl.h b/src/nfa/limex_state_impl.h index d6e89904..81153f71 100644 --- a/src/nfa/limex_state_impl.h +++ b/src/nfa/limex_state_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,8 +35,8 @@ #include "util/state_compress.h" #include -#if !defined(SIZE) || !defined(STATE_T) -# error Must define SIZE and STATE_T in includer. +#if !defined(SIZE) || !defined(STATE_T) || !defined(LOAD_FROM_ENG) +# error Must define SIZE, STATE_T, LOAD_FROM_ENG in includer. #endif #define IMPL_NFA_T JOIN(struct LimExNFA, SIZE) @@ -44,29 +44,33 @@ #define REACHMASK_FN JOIN(moNfaReachMask, SIZE) #define COMPRESS_FN JOIN(moNfaCompressState, SIZE) #define EXPAND_FN JOIN(moNfaExpandState, SIZE) -#define COMPRESSED_STORE_FN JOIN(storecompressed, SIZE) -#define COMPRESSED_LOAD_FN JOIN(loadcompressed, SIZE) +#define COMPRESSED_STORE_FN JOIN(store_compressed_, STATE_T) +#define COMPRESSED_LOAD_FN JOIN(load_compressed_, STATE_T) #define PARTIAL_STORE_FN JOIN(partial_store_, STATE_T) #define PARTIAL_LOAD_FN JOIN(partial_load_, STATE_T) -#define LOAD_STATE JOIN(load_, STATE_T) -#define STORE_STATE JOIN(store_, STATE_T) #define OR_STATE JOIN(or_, STATE_T) #define AND_STATE JOIN(and_, STATE_T) #define ISZERO_STATE JOIN(isZero_, STATE_T) static really_inline -const STATE_T *REACHMASK_FN(const IMPL_NFA_T *limex, const u8 key) { - const STATE_T *reach - = (const STATE_T *)((const char *)limex + sizeof(*limex)); - assert(ISALIGNED_N(reach, alignof(STATE_T))); - return &reach[limex->reachMap[key]]; +const ENG_STATE_T *get_reach_table(const IMPL_NFA_T *limex) { + const ENG_STATE_T *reach + = (const ENG_STATE_T *)((const char *)limex + sizeof(*limex)); + assert(ISALIGNED_N(reach, alignof(ENG_STATE_T))); + return reach; +} + +static really_inline +STATE_T REACHMASK_FN(const IMPL_NFA_T *limex, const u8 key) { + const ENG_STATE_T *reach = get_reach_table(limex); + return LOAD_FROM_ENG(&reach[limex->reachMap[key]]); } static really_inline void COMPRESS_FN(const IMPL_NFA_T *limex, u8 *dest, const STATE_T *src, u8 key) { assert(ISALIGNED_N(src, alignof(STATE_T))); - STATE_T a_src = LOAD_STATE(src); + STATE_T a_src = *src; DEBUG_PRINTF("compress state: %p -> %p\n", src, dest); @@ -77,31 +81,30 @@ void COMPRESS_FN(const IMPL_NFA_T *limex, u8 *dest, const STATE_T *src, } else { DEBUG_PRINTF("compress state, key=%hhx\n", key); - const STATE_T *reachmask = REACHMASK_FN(limex, key); + STATE_T reachmask = REACHMASK_FN(limex, key); // Masked compression means that we mask off the initDs states and // provide a shortcut for the all-zeroes case. Note that these must be // switched on in the EXPAND call below. if (limex->flags & LIMEX_FLAG_COMPRESS_MASKED) { - STATE_T s = AND_STATE(LOAD_STATE(&limex->compressMask), a_src); + STATE_T s = AND_STATE(LOAD_FROM_ENG(&limex->compressMask), a_src); if (ISZERO_STATE(s)) { DEBUG_PRINTF("after compression mask, all states are zero\n"); memset(dest, 0, limex->stateSize); return; } - STATE_T mask = AND_STATE(LOAD_STATE(&limex->compressMask), - LOAD_STATE(reachmask)); + STATE_T mask = AND_STATE(LOAD_FROM_ENG(&limex->compressMask), + reachmask); COMPRESSED_STORE_FN(dest, &s, &mask, limex->stateSize); } else { - COMPRESSED_STORE_FN(dest, src, reachmask, limex->stateSize); + COMPRESSED_STORE_FN(dest, src, &reachmask, limex->stateSize); } } } static really_inline -void EXPAND_FN(const IMPL_NFA_T *limex, STATE_T *dest, const u8 *src, - u8 key) { +void EXPAND_FN(const IMPL_NFA_T *limex, STATE_T *dest, const u8 *src, u8 key) { assert(ISALIGNED_N(dest, alignof(STATE_T))); DEBUG_PRINTF("expand state: %p -> %p\n", src, dest); @@ -111,16 +114,15 @@ void EXPAND_FN(const IMPL_NFA_T *limex, STATE_T *dest, const u8 *src, *dest = PARTIAL_LOAD_FN(src, limex->stateSize); } else { DEBUG_PRINTF("expand state, key=%hhx\n", key); - const STATE_T *reachmask = REACHMASK_FN(limex, key); + STATE_T reachmask = REACHMASK_FN(limex, key); if (limex->flags & LIMEX_FLAG_COMPRESS_MASKED) { - STATE_T mask = AND_STATE(LOAD_STATE(&limex->compressMask), - LOAD_STATE(reachmask)); + STATE_T mask = AND_STATE(LOAD_FROM_ENG(&limex->compressMask), + reachmask); COMPRESSED_LOAD_FN(dest, src, &mask, limex->stateSize); - STORE_STATE(dest, OR_STATE(LOAD_STATE(&limex->initDS), - LOAD_STATE(dest))); + *dest = OR_STATE(LOAD_FROM_ENG(&limex->initDS), *dest); } else { - COMPRESSED_LOAD_FN(dest, src, reachmask, limex->stateSize); + COMPRESSED_LOAD_FN(dest, src, &reachmask, limex->stateSize); } } } @@ -134,11 +136,6 @@ void EXPAND_FN(const IMPL_NFA_T *limex, STATE_T *dest, const u8 *src, #undef COMPRESSED_LOAD_FN #undef PARTIAL_STORE_FN #undef PARTIAL_LOAD_FN -#undef LOAD_STATE -#undef STORE_STATE #undef OR_STATE #undef AND_STATE #undef ISZERO_STATE - -#undef SIZE -#undef STATE_T diff --git a/src/nfa/nfa_api_dispatch.c b/src/nfa/nfa_api_dispatch.c index c67103b3..2a213ed6 100644 --- a/src/nfa/nfa_api_dispatch.c +++ b/src/nfa/nfa_api_dispatch.c @@ -55,6 +55,7 @@ #define DISPATCH_BY_NFA_TYPE(dbnt_func) \ switch (nfa->type) { \ DISPATCH_CASE(LIMEX, LimEx, 32, dbnt_func); \ + DISPATCH_CASE(LIMEX, LimEx, 64, dbnt_func); \ DISPATCH_CASE(LIMEX, LimEx, 128, dbnt_func); \ DISPATCH_CASE(LIMEX, LimEx, 256, dbnt_func); \ DISPATCH_CASE(LIMEX, LimEx, 384, dbnt_func); \ diff --git a/src/nfa/nfa_build_util.cpp b/src/nfa/nfa_build_util.cpp index 93376b01..f3b5329d 100644 --- a/src/nfa/nfa_build_util.cpp +++ b/src/nfa/nfa_build_util.cpp @@ -170,17 +170,16 @@ enum NFACategory {NFA_LIMEX, NFA_OTHER}; #define DO_IF_DUMP_SUPPORT(a) #endif -#define MAKE_LIMEX_TRAITS(mlt_size) \ +#define MAKE_LIMEX_TRAITS(mlt_size, mlt_align) \ template<> struct NFATraits { \ static UNUSED const char *name; \ static const NFACategory category = NFA_LIMEX; \ typedef LimExNFA##mlt_size implNFA_t; \ - typedef u_##mlt_size tableRow_t; \ static const nfa_dispatch_fn has_accel; \ static const nfa_dispatch_fn has_repeats; \ static const nfa_dispatch_fn has_repeats_other_than_firsts; \ static const u32 stateAlign = \ - MAX(alignof(tableRow_t), alignof(RepeatControl)); \ + MAX(mlt_align, alignof(RepeatControl)); \ static const bool fast = mlt_size <= 64; \ }; \ const nfa_dispatch_fn NFATraits::has_accel \ @@ -194,16 +193,17 @@ enum NFACategory {NFA_LIMEX, NFA_OTHER}; const char *NFATraits::name \ = "LimEx "#mlt_size; \ template<> struct getDescription { \ - static string call(const void *ptr) { \ - return getDescriptionLimEx((const NFA *)ptr); \ - } \ + static string call(const void *p) { \ + return getDescriptionLimEx((const NFA *)p); \ + } \ };) -MAKE_LIMEX_TRAITS(32) -MAKE_LIMEX_TRAITS(128) -MAKE_LIMEX_TRAITS(256) -MAKE_LIMEX_TRAITS(384) -MAKE_LIMEX_TRAITS(512) +MAKE_LIMEX_TRAITS(32, alignof(u32)) +MAKE_LIMEX_TRAITS(64, alignof(m128)) /* special, 32bit arch uses m128 */ +MAKE_LIMEX_TRAITS(128, alignof(m128)) +MAKE_LIMEX_TRAITS(256, alignof(m256)) +MAKE_LIMEX_TRAITS(384, alignof(m384)) +MAKE_LIMEX_TRAITS(512, alignof(m512)) template<> struct NFATraits { UNUSED static const char *name; diff --git a/src/nfa/nfa_dump_dispatch.cpp b/src/nfa/nfa_dump_dispatch.cpp index 388ac003..f7a5e05d 100644 --- a/src/nfa/nfa_dump_dispatch.cpp +++ b/src/nfa/nfa_dump_dispatch.cpp @@ -60,6 +60,7 @@ namespace ue2 { DEBUG_PRINTF("dispatch for NFA type %u\n", nfa->type); \ switch (nfa->type) { \ DISPATCH_CASE(LIMEX, LimEx, 32, dbnt_func); \ + DISPATCH_CASE(LIMEX, LimEx, 64, dbnt_func); \ DISPATCH_CASE(LIMEX, LimEx, 128, dbnt_func); \ DISPATCH_CASE(LIMEX, LimEx, 256, dbnt_func); \ DISPATCH_CASE(LIMEX, LimEx, 384, dbnt_func); \ diff --git a/src/nfa/nfa_internal.h b/src/nfa/nfa_internal.h index 41fee73e..58c3da6c 100644 --- a/src/nfa/nfa_internal.h +++ b/src/nfa/nfa_internal.h @@ -52,6 +52,7 @@ extern "C" enum NFAEngineType { LIMEX_NFA_32, + LIMEX_NFA_64, LIMEX_NFA_128, LIMEX_NFA_256, LIMEX_NFA_384, @@ -164,6 +165,7 @@ static really_inline int isDfaType(u8 t) { static really_inline int isNfaType(u8 t) { switch (t) { case LIMEX_NFA_32: + case LIMEX_NFA_64: case LIMEX_NFA_128: case LIMEX_NFA_256: case LIMEX_NFA_384: diff --git a/src/util/simd_utils.h b/src/util/simd_utils.h index 87de0940..dc8922fd 100644 --- a/src/util/simd_utils.h +++ b/src/util/simd_utils.h @@ -173,6 +173,12 @@ static really_inline u64a movq(const m128 in) { #endif } +/* another form of movq */ +static really_inline +m128 load_m128_from_u64a(const u64a *p) { + return _mm_loadl_epi64((const m128 *)p); +} + #define rshiftbyte_m128(a, count_immed) _mm_srli_si128(a, count_immed) #define lshiftbyte_m128(a, count_immed) _mm_slli_si128(a, count_immed) @@ -270,12 +276,12 @@ void clearbit128(m128 *ptr, unsigned int n) { // tests bit N in the given vector. static really_inline -char testbit128(const m128 *ptr, unsigned int n) { +char testbit128(m128 val, unsigned int n) { const m128 mask = mask1bit128(n); #if defined(__SSE4_1__) - return !_mm_testz_si128(mask, *ptr); + return !_mm_testz_si128(mask, val); #else - return isnonzero128(and128(mask, *ptr)); + return isnonzero128(and128(mask, val)); #endif } @@ -606,13 +612,13 @@ void clearbit256(m256 *ptr, unsigned int n) { // tests bit N in the given vector. static really_inline -char testbit256(const m256 *ptr, unsigned int n) { - assert(n < sizeof(*ptr) * 8); - const m128 *sub; +char testbit256(m256 val, unsigned int n) { + assert(n < sizeof(val) * 8); + m128 sub; if (n < 128) { - sub = &ptr->lo; + sub = val.lo; } else { - sub = &ptr->hi; + sub = val.hi; n -= 128; } return testbit128(sub, n); @@ -633,9 +639,9 @@ void clearbit256(m256 *ptr, unsigned int n) { // tests bit N in the given vector. static really_inline -char testbit256(const m256 *ptr, unsigned int n) { +char testbit256(m256 val, unsigned int n) { const m256 mask = mask1bit256(n); - return !_mm256_testz_si256(mask, *ptr); + return !_mm256_testz_si256(mask, val); } static really_really_inline @@ -827,15 +833,15 @@ void clearbit384(m384 *ptr, unsigned int n) { // tests bit N in the given vector. static really_inline -char testbit384(const m384 *ptr, unsigned int n) { - assert(n < sizeof(*ptr) * 8); - const m128 *sub; +char testbit384(m384 val, unsigned int n) { + assert(n < sizeof(val) * 8); + m128 sub; if (n < 128) { - sub = &ptr->lo; + sub = val.lo; } else if (n < 256) { - sub = &ptr->mid; + sub = val.mid; } else { - sub = &ptr->hi; + sub = val.hi; } return testbit128(sub, n % 128); } @@ -1040,26 +1046,26 @@ void clearbit512(m512 *ptr, unsigned int n) { // tests bit N in the given vector. static really_inline -char testbit512(const m512 *ptr, unsigned int n) { - assert(n < sizeof(*ptr) * 8); +char testbit512(m512 val, unsigned int n) { + assert(n < sizeof(val) * 8); #if !defined(__AVX2__) - const m128 *sub; + m128 sub; if (n < 128) { - sub = &ptr->lo.lo; + sub = val.lo.lo; } else if (n < 256) { - sub = &ptr->lo.hi; + sub = val.lo.hi; } else if (n < 384) { - sub = &ptr->hi.lo; + sub = val.hi.lo; } else { - sub = &ptr->hi.hi; + sub = val.hi.hi; } return testbit128(sub, n % 128); #else - const m256 *sub; + m256 sub; if (n < 256) { - sub = &ptr->lo; + sub = val.lo; } else { - sub = &ptr->hi; + sub = val.hi; n -= 256; } return testbit256(sub, n); diff --git a/src/util/uniform_ops.h b/src/util/uniform_ops.h index 0619c7e4..3385e441 100644 --- a/src/util/uniform_ops.h +++ b/src/util/uniform_ops.h @@ -180,44 +180,52 @@ #define partial_load_m384(ptr, sz) loadbytes384(ptr, sz) #define partial_load_m512(ptr, sz) loadbytes512(ptr, sz) -#define store_compressed_u32(ptr, x, m) storecompressed32(ptr, x, m) -#define store_compressed_u64a(ptr, x, m) storecompressed64(ptr, x, m) -#define store_compressed_m128(ptr, x, m) storecompressed128(ptr, x, m) -#define store_compressed_m256(ptr, x, m) storecompressed256(ptr, x, m) -#define store_compressed_m384(ptr, x, m) storecompressed384(ptr, x, m) -#define store_compressed_m512(ptr, x, m) storecompressed512(ptr, x, m) +#define store_compressed_u32(ptr, x, m, len) storecompressed32(ptr, x, m, len) +#define store_compressed_u64a(ptr, x, m, len) storecompressed64(ptr, x, m, len) +#define store_compressed_m128(ptr, x, m, len) storecompressed128(ptr, x, m, len) +#define store_compressed_m256(ptr, x, m, len) storecompressed256(ptr, x, m, len) +#define store_compressed_m384(ptr, x, m, len) storecompressed384(ptr, x, m, len) +#define store_compressed_m512(ptr, x, m, len) storecompressed512(ptr, x, m, len) -#define load_compressed_u32(x, ptr, m) loadcompressed32(x, ptr, m) -#define load_compressed_u64a(x, ptr, m) loadcompressed64(x, ptr, m) -#define load_compressed_m128(x, ptr, m) loadcompressed128(x, ptr, m) -#define load_compressed_m256(x, ptr, m) loadcompressed256(x, ptr, m) -#define load_compressed_m384(x, ptr, m) loadcompressed384(x, ptr, m) -#define load_compressed_m512(x, ptr, m) loadcompressed512(x, ptr, m) +#define load_compressed_u32(x, ptr, m, len) loadcompressed32(x, ptr, m, len) +#define load_compressed_u64a(x, ptr, m, len) loadcompressed64(x, ptr, m, len) +#define load_compressed_m128(x, ptr, m, len) loadcompressed128(x, ptr, m, len) +#define load_compressed_m256(x, ptr, m, len) loadcompressed256(x, ptr, m, len) +#define load_compressed_m384(x, ptr, m, len) loadcompressed384(x, ptr, m, len) +#define load_compressed_m512(x, ptr, m, len) loadcompressed512(x, ptr, m, len) -static really_inline void clearbit_u32(u32 *p, u32 n) { +static really_inline +void clearbit_u32(u32 *p, u32 n) { assert(n < sizeof(*p) * 8); *p &= ~(1U << n); } -static really_inline void clearbit_u64a(u64a *p, u32 n) { + +static really_inline +void clearbit_u64a(u64a *p, u32 n) { assert(n < sizeof(*p) * 8); *p &= ~(1ULL << n); } + #define clearbit_m128(ptr, n) (clearbit128(ptr, n)) #define clearbit_m256(ptr, n) (clearbit256(ptr, n)) #define clearbit_m384(ptr, n) (clearbit384(ptr, n)) #define clearbit_m512(ptr, n) (clearbit512(ptr, n)) -static really_inline char testbit_u32(const u32 *p, u32 n) { - assert(n < sizeof(*p) * 8); - return !!(*p & (1U << n)); +static really_inline +char testbit_u32(u32 val, u32 n) { + assert(n < sizeof(val) * 8); + return !!(val & (1U << n)); } -static really_inline char testbit_u64a(const u64a *p, u32 n) { - assert(n < sizeof(*p) * 8); - return !!(*p & (1ULL << n)); + +static really_inline +char testbit_u64a(u64a val, u32 n) { + assert(n < sizeof(val) * 8); + return !!(val & (1ULL << n)); } -#define testbit_m128(ptr, n) (testbit128(ptr, n)) -#define testbit_m256(ptr, n) (testbit256(ptr, n)) -#define testbit_m384(ptr, n) (testbit384(ptr, n)) -#define testbit_m512(ptr, n) (testbit512(ptr, n)) + +#define testbit_m128(val, n) (testbit128(val, n)) +#define testbit_m256(val, n) (testbit256(val, n)) +#define testbit_m384(val, n) (testbit384(val, n)) +#define testbit_m512(val, n) (testbit512(val, n)) #endif diff --git a/unit/internal/limex_nfa.cpp b/unit/internal/limex_nfa.cpp index 6bb4fcb9..1c742793 100644 --- a/unit/internal/limex_nfa.cpp +++ b/unit/internal/limex_nfa.cpp @@ -31,7 +31,6 @@ #include "grey.h" #include "compiler/compiler.h" -#include "nfa/limex_context.h" #include "nfa/limex_internal.h" #include "nfa/nfa_api.h" #include "nfa/nfa_api_util.h" @@ -167,11 +166,10 @@ TEST_P(LimExModelTest, QueueExec) { TEST_P(LimExModelTest, CompressExpand) { ASSERT_TRUE(nfa != nullptr); - // 64-bit NFAs assume during compression that they have >= 5 bytes of - // compressed NFA state, which isn't true for our 8-state test pattern. We - // skip this test for just these models. - if (nfa->scratchStateSize == 8) { - return; + u32 real_state_size = nfa->scratchStateSize; + /* Only look at 8 bytes for limex 64 (rather than the padding) */ + if (nfa->type == LIMEX_NFA_64) { + real_state_size = sizeof(u64a); } initQueue(); @@ -195,8 +193,7 @@ TEST_P(LimExModelTest, CompressExpand) { memset(dest, 0xff, nfa->scratchStateSize); nfaExpandState(nfa.get(), dest, q.streamState, q.offset, queue_prev_byte(&q, end)); - ASSERT_TRUE(std::equal(dest, dest + nfa->scratchStateSize, - full_state.get())); + ASSERT_TRUE(std::equal(dest, dest + real_state_size, full_state.get())); } TEST_P(LimExModelTest, InitCompressedState0) { diff --git a/unit/internal/simd_utils.cpp b/unit/internal/simd_utils.cpp index 3c07b2b0..31d4b925 100644 --- a/unit/internal/simd_utils.cpp +++ b/unit/internal/simd_utils.cpp @@ -110,10 +110,10 @@ void simd_setbit(m128 *a, unsigned int i) { return setbit128(a, i); } void simd_setbit(m256 *a, unsigned int i) { return setbit256(a, i); } void simd_setbit(m384 *a, unsigned int i) { return setbit384(a, i); } void simd_setbit(m512 *a, unsigned int i) { return setbit512(a, i); } -bool simd_testbit(const m128 *a, unsigned int i) { return testbit128(a, i); } -bool simd_testbit(const m256 *a, unsigned int i) { return testbit256(a, i); } -bool simd_testbit(const m384 *a, unsigned int i) { return testbit384(a, i); } -bool simd_testbit(const m512 *a, unsigned int i) { return testbit512(a, i); } +bool simd_testbit(const m128 &a, unsigned int i) { return testbit128(a, i); } +bool simd_testbit(const m256 &a, unsigned int i) { return testbit256(a, i); } +bool simd_testbit(const m384 &a, unsigned int i) { return testbit384(a, i); } +bool simd_testbit(const m512 &a, unsigned int i) { return testbit512(a, i); } u32 simd_diffrich(const m128 &a, const m128 &b) { return diffrich128(a, b); } u32 simd_diffrich(const m256 &a, const m256 &b) { return diffrich256(a, b); } u32 simd_diffrich(const m384 &a, const m384 &b) { return diffrich384(a, b); } @@ -419,15 +419,15 @@ TYPED_TEST(SimdUtilsTest, testbit) { // First, all bits are on in 'ones'. for (unsigned int i = 0; i < total_bits; i++) { - ASSERT_EQ(1, simd_testbit(&ones, i)) << "bit " << i << " is on"; + ASSERT_EQ(1, simd_testbit(ones, i)) << "bit " << i << " is on"; } // Try individual bits; only 'i' should be on. for (unsigned int i = 0; i < total_bits; i++) { TypeParam a = setbit(i); for (unsigned int j = 0; j < total_bits; j++) { - ASSERT_EQ(i == j ? 1 : 0, simd_testbit(&a, j)) << "bit " << i - << " is wrong"; + ASSERT_EQ(i == j ? 1 : 0, simd_testbit(a, j)) << "bit " << i + << " is wrong"; } } } @@ -470,7 +470,7 @@ TYPED_TEST(SimdUtilsTest, diffrich) { // and nothing is on in zeroes for (unsigned int i = 0; i < total_bits; i++) { - ASSERT_EQ(0, simd_testbit(&zeroes, i)) << "bit " << i << " is off"; + ASSERT_EQ(0, simd_testbit(zeroes, i)) << "bit " << i << " is off"; } // All-zeroes and all-ones differ in all words diff --git a/unit/internal/uniform_ops.cpp b/unit/internal/uniform_ops.cpp index 33d7cd30..10defdbd 100644 --- a/unit/internal/uniform_ops.cpp +++ b/unit/internal/uniform_ops.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -156,26 +156,26 @@ TEST(Uniform, loadstore_m512) { TEST(Uniform, testbit_u32) { for (u32 i = 0; i < 32; i++) { u32 v = 0; - EXPECT_EQ((char)0, testbit_u32(&v, i)); + EXPECT_EQ((char)0, testbit_u32(v, i)); v |= 1ULL << i; - EXPECT_EQ((char)1, testbit_u32(&v, i)); + EXPECT_EQ((char)1, testbit_u32(v, i)); v = ~v; - EXPECT_EQ((char)0, testbit_u32(&v, i)); + EXPECT_EQ((char)0, testbit_u32(v, i)); v |= 1ULL << i; - EXPECT_EQ((char)1, testbit_u32(&v, i)); + EXPECT_EQ((char)1, testbit_u32(v, i)); } } TEST(Uniform, testbit_u64a) { for (u32 i = 0; i < 64; i++) { u64a v = 0; - EXPECT_EQ((char)0, testbit_u64a(&v, i)); + EXPECT_EQ((char)0, testbit_u64a(v, i)); v |= 1ULL << i; - EXPECT_EQ((char)1, testbit_u64a(&v, i)); + EXPECT_EQ((char)1, testbit_u64a(v, i)); v = ~v; - EXPECT_EQ((char)0, testbit_u64a(&v, i)); + EXPECT_EQ((char)0, testbit_u64a(v, i)); v |= 1ULL << i; - EXPECT_EQ((char)1, testbit_u64a(&v, i)); + EXPECT_EQ((char)1, testbit_u64a(v, i)); } } @@ -183,7 +183,7 @@ TEST(Uniform, clearbit_u32) { for (u32 i = 0; i < 32; i++) { u32 v = ~0U; clearbit_u32(&v, i); - EXPECT_EQ((char)0, testbit_u32(&v, i)); + EXPECT_EQ((char)0, testbit_u32(v, i)); v = ~v; clearbit_u32(&v, i); EXPECT_EQ(0U, v); @@ -194,7 +194,7 @@ TEST(Uniform, clearbit_u64a) { for (u32 i = 0; i < 64; i++) { u64a v = ~0ULL; clearbit_u64a(&v, i); - EXPECT_EQ((char)0, testbit_u64a(&v, i)); + EXPECT_EQ((char)0, testbit_u64a(v, i)); v = ~v; clearbit_u64a(&v, i); EXPECT_EQ(0ULL, v); From bcf40c5136af3ca664a8e3c361e8f7a25c5db588 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Fri, 26 Aug 2016 15:19:10 +1000 Subject: [PATCH 009/103] Limex: don't not build accel schemes for impossible state sets --- src/nfa/limex_compile.cpp | 209 ++++++++++++++++++++++++++++++++++---- src/nfagraph/ng_limex.cpp | 6 +- 2 files changed, 192 insertions(+), 23 deletions(-) diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index 8d7343e5..07e1ed5f 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -37,6 +37,7 @@ #include "limex_internal.h" #include "limex_limits.h" #include "nfa_build_util.h" +#include "nfagraph/ng_dominators.h" #include "nfagraph/ng_holder.h" #include "nfagraph/ng_limex_accel.h" #include "nfagraph/ng_repeat.h" @@ -64,9 +65,12 @@ #include #include #include + #include +#include using namespace std; +using boost::adaptors::map_values; namespace ue2 { @@ -704,6 +708,155 @@ void fillAccelInfo(build_info &bi) { typedef vector > AccelAuxVector; +#define IMPOSSIBLE_ACCEL_MASK (~0U) + +static +u32 getEffectiveAccelStates(const build_info &args, + u32 active_accel_mask, + const vector &accelStates) { + /* accelStates is indexed by the acceleration bit index and contains a + * reference to the original vertex & state_id */ + + /* Cases to consider: + * + * 1: Accel states a and b are on and b can squash a + * --> we can ignore a. This will result in a no longer being accurately + * modelled - we may miss escapes turning it off and we may also miss + * its successors being activated. + * + * 2: Accel state b is on but accel state a is off and a is .* and must be + * seen before b is reached (and would not be covered by (1)) + * --> if a is squashable (or may die unexpectedly) we should continue + * as is + * --> if a is not squashable we can treat this as a+b or as a no accel, + * impossible case + * --> this case could be extended to handle non dot reaches by + * effectively creating something similar to squash masks for the + * reverse graph + * + * + * Other cases: + * + * 3: Accel states a and b are on but have incompatible reaches + * --> we should treat this as an impossible case. Actually, this case + * is unlikely to arise as we pick states with wide reaches to + * accelerate so an empty intersection is unlikely. + * + * Note: we need to be careful when dealing with accel states corresponding + * to bounded repeat cyclics - they may 'turn off' based on a max bound and + * so we may still require on earlier states to be accurately modelled. + */ + const NGHolder &h = args.h; + auto dom_map = findDominators(h); + + /* map from accel_id to mask of accel_ids that it is dominated by */ + vector dominated_by(accelStates.size()); + + map accel_id_map; + for (u32 accel_id = 0; accel_id < accelStates.size(); accel_id++) { + NFAVertex v = accelStates[accel_id].v; + accel_id_map[v] = accel_id; + } + + /* Note: we want a slightly less strict defn of dominate as skip edges + * prevent .* 'truly' dominating */ + for (u32 local_accel_mask = active_accel_mask; local_accel_mask; ) { + u32 accel_id = findAndClearLSB_32(&local_accel_mask); + NFAVertex v = accelStates[accel_id].v; + while (contains(dom_map, v)) { + v = dom_map[v]; + if (contains(accel_id_map, v)) { + dominated_by[accel_id] |= 1U << accel_id_map[v]; + } + /* TODO: could also look at inv_adj vertices to handle fan-in */ + for (NFAVertex a : adjacent_vertices_range(v, h)) { + if (a == v || !contains(accel_id_map, a) + || a == accelStates[accel_id].v /* not likely */) { + continue; + } + if (!is_subset_of(h[v].reports, h[a].reports)) { + continue; + } + flat_set v_succ; + flat_set a_succ; + succ(h, v, &v_succ); + succ(h, a, &a_succ); + if (is_subset_of(v_succ, a_succ)) { + dominated_by[accel_id] |= 1U << accel_id_map[a]; + } + } + } + } + + u32 may_turn_off = 0; /* BR with max bound, non-dots, squashed, etc */ + for (u32 local_accel_mask = active_accel_mask; local_accel_mask; ) { + u32 accel_id = findAndClearLSB_32(&local_accel_mask); + NFAVertex v = accelStates[accel_id].v; + u32 state_id = accelStates[accel_id].state; + assert(contains(args.accel.accelerable, v)); + if (!h[v].char_reach.all()) { + may_turn_off |= 1U << accel_id; + continue; + } + if (contains(args.br_cyclic, v) + && args.br_cyclic.at(v).repeatMax != depth::infinity()) { + may_turn_off |= 1U << accel_id; + continue; + } + for (const auto &s_mask : args.squashMap | map_values) { + if (!s_mask.test(state_id)) { + may_turn_off |= 1U << accel_id; + break; + } + } + for (const auto &s_mask : args.reportSquashMap | map_values) { + if (!s_mask.test(state_id)) { + may_turn_off |= 1U << accel_id; + break; + } + } + } + + /* Case 1: */ + u32 ignored = 0; + for (u32 local_accel_mask = active_accel_mask; local_accel_mask; ) { + u32 accel_id_b = findAndClearLSB_32(&local_accel_mask); + NFAVertex v = accelStates[accel_id_b].v; + if (!contains(args.squashMap, v)) { + continue; + } + assert(!contains(args.br_cyclic, v) + || args.br_cyclic.at(v).repeatMax == depth::infinity()); + NFAStateSet squashed = args.squashMap.at(v); + squashed.flip(); /* default sense for mask of survivors */ + + for (u32 local_accel_mask2 = active_accel_mask; local_accel_mask2; ) { + u32 accel_id_a = findAndClearLSB_32(&local_accel_mask2); + if (squashed.test(accelStates[accel_id_a].state)) { + ignored |= 1U << accel_id_a; + } + } + } + + /* Case 2: */ + for (u32 local_accel_mask = active_accel_mask; local_accel_mask; ) { + u32 accel_id = findAndClearLSB_32(&local_accel_mask); + + u32 stuck_dominators = dominated_by[accel_id] & ~may_turn_off; + if ((stuck_dominators & active_accel_mask) != stuck_dominators) { + DEBUG_PRINTF("only %08x on, but we require %08x\n", + active_accel_mask, stuck_dominators); + return IMPOSSIBLE_ACCEL_MASK; + } + } + + if (ignored) { + DEBUG_PRINTF("in %08x, ignoring %08x\n", active_accel_mask, ignored); + } + + return active_accel_mask & ~ignored; +} + static void buildAccel(const build_info &args, NFAStateSet &accelMask, NFAStateSet &accelFriendsMask, AccelAuxVector &auxvec, @@ -735,11 +888,22 @@ void buildAccel(const build_info &args, NFAStateSet &accelMask, // Set up a unioned AccelBuild for every possible combination of the set // bits in accelStates. vector accelOuts(accelCount); + vector effective_accel_set; + effective_accel_set.push_back(0); /* empty is effectively empty */ + for (u32 i = 1; i < accelCount; i++) { - for (u32 j = 0, j_end = accelStates.size(); j < j_end; j++) { - if (i & (1U << j)) { - combineAccel(accelStates[j], accelOuts[i]); - } + u32 effective_i = getEffectiveAccelStates(args, i, accelStates); + effective_accel_set.push_back(effective_i); + + if (effective_i == IMPOSSIBLE_ACCEL_MASK) { + DEBUG_PRINTF("this combination of accel states is not possible\n"); + accelOuts[i].stop1 = CharReach::dot(); + continue; + } + + while (effective_i) { + u32 base_accel_state = findAndClearLSB_32(&effective_i); + combineAccel(accelStates[base_accel_state], accelOuts[i]); } minimiseAccel(accelOuts[i]); } @@ -759,29 +923,32 @@ void buildAccel(const build_info &args, NFAStateSet &accelMask, for (u32 i = 1; i < accelCount; i++) { memset(&aux, 0, sizeof(aux)); - NFAStateSet states(args.num_states); - for (u32 j = 0; j < accelStates.size(); j++) { - if (i & (1U << j)) { - states.set(accelStates[j].state); - } - } + NFAStateSet effective_states(args.num_states); + u32 effective_i = effective_accel_set[i]; AccelInfo ainfo; ainfo.double_offset = accelOuts[i].offset; ainfo.double_stop1 = accelOuts[i].stop1; ainfo.double_stop2 = accelOuts[i].stop2; - if (contains(accel.precalc, states)) { - const precalcAccel &precalc = accel.precalc.at(states); - if (precalc.ma_info.type != MultibyteAccelInfo::MAT_NONE) { - ainfo.ma_len1 = precalc.ma_info.len1; - ainfo.ma_len2 = precalc.ma_info.len2; - ainfo.multiaccel_offset = precalc.ma_info.offset; - ainfo.multiaccel_stops = precalc.ma_info.cr; - ainfo.ma_type = precalc.ma_info.type; - } else { - ainfo.single_offset = precalc.single_offset; - ainfo.single_stops = precalc.single_cr; + if (effective_i != IMPOSSIBLE_ACCEL_MASK) { + while (effective_i) { + u32 base_accel_id = findAndClearLSB_32(&effective_i); + effective_states.set(accelStates[base_accel_id].state); + } + + if (contains(accel.precalc, effective_states)) { + const auto &precalc = accel.precalc.at(effective_states); + if (precalc.ma_info.type != MultibyteAccelInfo::MAT_NONE) { + ainfo.ma_len1 = precalc.ma_info.len1; + ainfo.ma_len2 = precalc.ma_info.len2; + ainfo.multiaccel_offset = precalc.ma_info.offset; + ainfo.multiaccel_stops = precalc.ma_info.cr; + ainfo.ma_type = precalc.ma_info.type; + } else { + ainfo.single_offset = precalc.single_offset; + ainfo.single_stops = precalc.single_cr; + } } } diff --git a/src/nfagraph/ng_limex.cpp b/src/nfagraph/ng_limex.cpp index 72efa43a..06ea5de3 100644 --- a/src/nfagraph/ng_limex.cpp +++ b/src/nfagraph/ng_limex.cpp @@ -118,9 +118,11 @@ void findSquashStates(const NGHolder &g, filterSquashers(g, squashMap); /* We also filter out the cyclic states representing bounded repeats, as - * they are not really cyclic. */ + * they are not really cyclic -- they may turn off unexpectedly. */ for (const auto &br : repeats) { - squashMap.erase(br.cyclic); + if (br.repeatMax.is_finite()) { + squashMap.erase(br.cyclic); + } } } From e74b141e958816be48693e7258f2a5da1d8df04f Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Mon, 29 Aug 2016 15:07:22 +1000 Subject: [PATCH 010/103] rework load_m128_from_u64a() --- src/util/simd_utils.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/util/simd_utils.h b/src/util/simd_utils.h index dc8922fd..b7cb1c0f 100644 --- a/src/util/simd_utils.h +++ b/src/util/simd_utils.h @@ -176,7 +176,15 @@ static really_inline u64a movq(const m128 in) { /* another form of movq */ static really_inline m128 load_m128_from_u64a(const u64a *p) { +#if defined(__GNUC__) && !defined(__INTEL_COMPILER) + /* unfortunately _mm_loadl_epi64() is best avoided as it seems to cause + * trouble on some older compilers, possibly because it is misdefined to + * take an m128 as its parameter */ + return _mm_set_epi64((__m64)0ULL, (__m64)*p); +#else + /* ICC doesn't like casting to __m64 */ return _mm_loadl_epi64((const m128 *)p); +#endif } #define rshiftbyte_m128(a, count_immed) _mm_srli_si128(a, count_immed) From f4fa6cd4dd9c4de6af83cec502fb64ea46f0fa27 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 25 Aug 2016 15:12:28 +1000 Subject: [PATCH 011/103] rose: tighten up requirements for catch up We only need to catch up when there is an actual anchored table, not merely when there are successors of anchored_root in the Rose graph. --- src/rose/rose_build_bytecode.cpp | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index b8baa9c1..ed9b5bbb 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -554,33 +554,30 @@ u32 countRosePrefixes(const vector &roses) { * \brief True if this Rose engine needs to run a catch up whenever a report is * generated. * - * This is only the case if there are no anchored literals, suffixes, outfixes - * etc. + * Catch up is necessary if there are output-exposed engines (suffixes, + * outfixes) or an anchored table (anchored literals, acyclic DFAs). */ static -bool needsCatchup(const RoseBuildImpl &build) { +bool needsCatchup(const RoseBuildImpl &build, + const vector &anchored_dfas) { if (!build.outfixes.empty()) { DEBUG_PRINTF("has outfixes\n"); return true; } + if (!anchored_dfas.empty()) { + DEBUG_PRINTF("has anchored dfas\n"); + return true; + } const RoseGraph &g = build.g; - if (!isLeafNode(build.anchored_root, g)) { - DEBUG_PRINTF("has anchored vertices\n"); - return true; - } - for (auto v : vertices_range(g)) { if (build.root == v) { continue; } - if (build.anchored_root == v) { - assert(isLeafNode(v, g)); continue; } - if (g[v].suffix) { DEBUG_PRINTF("vertex %zu has suffix\n", g[v].idx); return true; @@ -5286,7 +5283,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { build_context bc; bc.floatingMinLiteralMatchOffset = findMinFloatingLiteralMatch(*this, anchored_dfas); - bc.needs_catchup = needsCatchup(*this); + bc.needs_catchup = needsCatchup(*this, anchored_dfas); recordResources(bc.resources, *this); if (!anchored_dfas.empty()) { bc.resources.has_anchored = true; From 97483eee5bb997e85815e3b61e465012e09e37dd Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Wed, 31 Aug 2016 10:50:46 +1000 Subject: [PATCH 012/103] UE-3019: limex_compile: correctly access the dominator map --- src/nfa/limex_compile.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index 07e1ed5f..78b9729f 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -762,8 +762,9 @@ u32 getEffectiveAccelStates(const build_info &args, * prevent .* 'truly' dominating */ for (u32 local_accel_mask = active_accel_mask; local_accel_mask; ) { u32 accel_id = findAndClearLSB_32(&local_accel_mask); + assert(accel_id < accelStates.size()); NFAVertex v = accelStates[accel_id].v; - while (contains(dom_map, v)) { + while (dom_map[v]) { v = dom_map[v]; if (contains(accel_id_map, v)) { dominated_by[accel_id] |= 1U << accel_id_map[v]; From 3bfef988fe9dd7f62ce3ea374a3219d130478cf0 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 22 Aug 2016 12:37:46 +1000 Subject: [PATCH 013/103] container.h: include --- src/util/container.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/util/container.h b/src/util/container.h index 63e27743..e2cfb485 100644 --- a/src/util/container.h +++ b/src/util/container.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -41,6 +41,7 @@ #include #include #include +#include namespace ue2 { @@ -78,7 +79,9 @@ void insert(C *container, typename C::iterator pos, const D &donor) { } /** - * \brief Constructs a vector from a range bounded by the given pair of iterators. */ + * \brief Constructs a vector from a range bounded by the given pair of + * iterators. + */ template auto make_vector_from(const std::pair &range) -> std::vector { From 13af3bfb744a8b014b55b1038db275020b62e902 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 4 Aug 2016 14:21:51 +1000 Subject: [PATCH 014/103] rose: decouple build-time program representation This commit replaces the build-time representation of the Rose interpreter programs, from a class containing a discriminated union of the bytecode structures to a class hierarchy of build-time prototypes. This makes it easier to reason about and manipulate Rose programs during compilation. --- CMakeLists.txt | 3 + src/rose/program_runtime.h | 22 + src/rose/rose_build_bytecode.cpp | 1351 +++++++-------------- src/rose/rose_build_engine_blob.h | 132 +++ src/rose/rose_build_program.cpp | 491 ++++++++ src/rose/rose_build_program.h | 1802 +++++++++++++++++++++++++++++ src/rose/rose_dump.cpp | 9 +- src/rose/rose_program.h | 7 + 8 files changed, 2856 insertions(+), 961 deletions(-) create mode 100644 src/rose/rose_build_engine_blob.h create mode 100644 src/rose/rose_build_program.cpp create mode 100644 src/rose/rose_build_program.h diff --git a/CMakeLists.txt b/CMakeLists.txt index bdb60b74..0a236845 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -877,6 +877,7 @@ SET (hs_SRCS src/rose/rose_build_compile.cpp src/rose/rose_build_convert.cpp src/rose/rose_build_convert.h + src/rose/rose_build_engine_blob.h src/rose/rose_build_exclusive.cpp src/rose/rose_build_exclusive.h src/rose/rose_build_groups.cpp @@ -891,6 +892,8 @@ SET (hs_SRCS src/rose/rose_build_merge.cpp src/rose/rose_build_merge.h src/rose/rose_build_misc.cpp + src/rose/rose_build_program.cpp + src/rose/rose_build_program.h src/rose/rose_build_role_aliasing.cpp src/rose/rose_build_scatter.cpp src/rose/rose_build_scatter.h diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index b57aebe9..100d9140 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -1600,6 +1600,28 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(SPARSE_ITER_ANY) { + DEBUG_PRINTF("iter_offset=%u\n", ri->iter_offset); + const struct mmbit_sparse_iter *it = + getByOffset(t, ri->iter_offset); + assert(ISALIGNED(it)); + + const u8 *roles = getRoleState(scratch->core_info.state); + + u32 idx = 0; + u32 i = mmbit_sparse_iter_begin(roles, t->rolesWithStateCount, + &idx, it, si_state); + if (i == MMB_INVALID) { + DEBUG_PRINTF("no states in sparse iter are on\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + DEBUG_PRINTF("state %u (idx=%u) is on\n", i, idx); + fatbit_clear(scratch->handled_roles); + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(ENGINES_EOD) { if (roseEnginesEod(t, scratch, end, ri->iter_offset) == HWLM_TERMINATE_MATCHING) { diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index ed9b5bbb..68812b44 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -33,11 +33,13 @@ #include "hs_compile.h" // for HS_MODE_* #include "rose_build_add_internal.h" #include "rose_build_anchored.h" +#include "rose_build_engine_blob.h" #include "rose_build_exclusive.h" #include "rose_build_groups.h" #include "rose_build_infix.h" #include "rose_build_lookaround.h" #include "rose_build_matchers.h" +#include "rose_build_program.h" #include "rose_build_scatter.h" #include "rose_build_util.h" #include "rose_build_width.h" @@ -147,218 +149,6 @@ struct left_build_info { vector lookaround; // alternative implementation to the NFA }; -/** - * \brief Possible jump targets for roles that perform checks. - * - * Fixed up into offsets before the program is written to bytecode. - */ -enum class JumpTarget { - NO_JUMP, //!< Instruction does not jump. - PROGRAM_END, //!< Jump to end of program. - NEXT_BLOCK, //!< Jump to start of next block (sparse iter check, etc). - FIXUP_DONE, //!< Target fixup already applied. -}; - -/** \brief Role instruction model used at compile time. */ -class RoseInstruction { -public: - RoseInstruction(enum RoseInstructionCode c, JumpTarget j) : target(j) { - memset(&u, 0, sizeof(u)); - u.end.code = c; - } - - explicit RoseInstruction(enum RoseInstructionCode c) - : RoseInstruction(c, JumpTarget::NO_JUMP) {} - - bool operator<(const RoseInstruction &a) const { - if (code() != a.code()) { - return code() < a.code(); - } - if (target != a.target) { - return target < a.target; - } - return memcmp(&u, &a.u, sizeof(u)) < 0; - } - - bool operator==(const RoseInstruction &a) const { - return code() == a.code() && target == a.target && - memcmp(&u, &a.u, sizeof(u)) == 0; - } - - enum RoseInstructionCode code() const { - // Note that this sort of type-punning (relying on identical initial - // layout) is explicitly allowed by the C++11 standard. - return (enum RoseInstructionCode)u.end.code; - } - - const void *get() const { - switch (code()) { - case ROSE_INSTR_CHECK_LIT_EARLY: return &u.checkLitEarly; - case ROSE_INSTR_CHECK_GROUPS: return &u.checkGroups; - case ROSE_INSTR_CHECK_ONLY_EOD: return &u.checkOnlyEod; - case ROSE_INSTR_CHECK_BOUNDS: return &u.checkBounds; - case ROSE_INSTR_CHECK_NOT_HANDLED: return &u.checkNotHandled; - case ROSE_INSTR_CHECK_LOOKAROUND: return &u.checkLookaround; - case ROSE_INSTR_CHECK_MASK: return &u.checkMask; - case ROSE_INSTR_CHECK_MASK_32: return &u.checkMask32; - case ROSE_INSTR_CHECK_BYTE: return &u.checkByte; - case ROSE_INSTR_CHECK_INFIX: return &u.checkInfix; - case ROSE_INSTR_CHECK_PREFIX: return &u.checkPrefix; - case ROSE_INSTR_ANCHORED_DELAY: return &u.anchoredDelay; - case ROSE_INSTR_PUSH_DELAYED: return &u.pushDelayed; - case ROSE_INSTR_RECORD_ANCHORED: return &u.recordAnchored; - case ROSE_INSTR_CATCH_UP: return &u.catchUp; - case ROSE_INSTR_CATCH_UP_MPV: return &u.catchUpMpv; - case ROSE_INSTR_SOM_ADJUST: return &u.somAdjust; - case ROSE_INSTR_SOM_LEFTFIX: return &u.somLeftfix; - case ROSE_INSTR_SOM_FROM_REPORT: return &u.somFromReport; - case ROSE_INSTR_SOM_ZERO: return &u.somZero; - case ROSE_INSTR_TRIGGER_INFIX: return &u.triggerInfix; - case ROSE_INSTR_TRIGGER_SUFFIX: return &u.triggerSuffix; - case ROSE_INSTR_DEDUPE: return &u.dedupe; - case ROSE_INSTR_DEDUPE_SOM: return &u.dedupeSom; - case ROSE_INSTR_REPORT_CHAIN: return &u.reportChain; - case ROSE_INSTR_REPORT_SOM_INT: return &u.reportSomInt; - case ROSE_INSTR_REPORT_SOM_AWARE: return &u.reportSomAware; - case ROSE_INSTR_REPORT: return &u.report; - case ROSE_INSTR_REPORT_EXHAUST: return &u.reportExhaust; - case ROSE_INSTR_REPORT_SOM: return &u.reportSom; - case ROSE_INSTR_REPORT_SOM_EXHAUST: return &u.reportSomExhaust; - case ROSE_INSTR_DEDUPE_AND_REPORT: return &u.dedupeAndReport; - case ROSE_INSTR_FINAL_REPORT: return &u.finalReport; - case ROSE_INSTR_CHECK_EXHAUSTED: return &u.checkExhausted; - case ROSE_INSTR_CHECK_MIN_LENGTH: return &u.checkMinLength; - case ROSE_INSTR_SET_STATE: return &u.setState; - case ROSE_INSTR_SET_GROUPS: return &u.setGroups; - case ROSE_INSTR_SQUASH_GROUPS: return &u.squashGroups; - case ROSE_INSTR_CHECK_STATE: return &u.checkState; - case ROSE_INSTR_SPARSE_ITER_BEGIN: return &u.sparseIterBegin; - case ROSE_INSTR_SPARSE_ITER_NEXT: return &u.sparseIterNext; - case ROSE_INSTR_ENGINES_EOD: return &u.enginesEod; - case ROSE_INSTR_SUFFIXES_EOD: return &u.suffixesEod; - case ROSE_INSTR_MATCHER_EOD: return &u.matcherEod; - case ROSE_INSTR_END: return &u.end; - } - assert(0); - return &u.end; - } - - size_t length() const { - switch (code()) { - case ROSE_INSTR_CHECK_LIT_EARLY: return sizeof(u.checkLitEarly); - case ROSE_INSTR_CHECK_GROUPS: return sizeof(u.checkGroups); - case ROSE_INSTR_CHECK_ONLY_EOD: return sizeof(u.checkOnlyEod); - case ROSE_INSTR_CHECK_BOUNDS: return sizeof(u.checkBounds); - case ROSE_INSTR_CHECK_NOT_HANDLED: return sizeof(u.checkNotHandled); - case ROSE_INSTR_CHECK_LOOKAROUND: return sizeof(u.checkLookaround); - case ROSE_INSTR_CHECK_MASK: return sizeof(u.checkMask); - case ROSE_INSTR_CHECK_MASK_32: return sizeof(u.checkMask32); - case ROSE_INSTR_CHECK_BYTE: return sizeof(u.checkByte); - case ROSE_INSTR_CHECK_INFIX: return sizeof(u.checkInfix); - case ROSE_INSTR_CHECK_PREFIX: return sizeof(u.checkPrefix); - case ROSE_INSTR_ANCHORED_DELAY: return sizeof(u.anchoredDelay); - case ROSE_INSTR_PUSH_DELAYED: return sizeof(u.pushDelayed); - case ROSE_INSTR_RECORD_ANCHORED: return sizeof(u.recordAnchored); - case ROSE_INSTR_CATCH_UP: return sizeof(u.catchUp); - case ROSE_INSTR_CATCH_UP_MPV: return sizeof(u.catchUpMpv); - case ROSE_INSTR_SOM_ADJUST: return sizeof(u.somAdjust); - case ROSE_INSTR_SOM_LEFTFIX: return sizeof(u.somLeftfix); - case ROSE_INSTR_SOM_FROM_REPORT: return sizeof(u.somFromReport); - case ROSE_INSTR_SOM_ZERO: return sizeof(u.somZero); - case ROSE_INSTR_TRIGGER_INFIX: return sizeof(u.triggerInfix); - case ROSE_INSTR_TRIGGER_SUFFIX: return sizeof(u.triggerSuffix); - case ROSE_INSTR_DEDUPE: return sizeof(u.dedupe); - case ROSE_INSTR_DEDUPE_SOM: return sizeof(u.dedupeSom); - case ROSE_INSTR_REPORT_CHAIN: return sizeof(u.reportChain); - case ROSE_INSTR_REPORT_SOM_INT: return sizeof(u.reportSomInt); - case ROSE_INSTR_REPORT_SOM_AWARE: return sizeof(u.reportSomAware); - case ROSE_INSTR_REPORT: return sizeof(u.report); - case ROSE_INSTR_REPORT_EXHAUST: return sizeof(u.reportExhaust); - case ROSE_INSTR_REPORT_SOM: return sizeof(u.reportSom); - case ROSE_INSTR_REPORT_SOM_EXHAUST: return sizeof(u.reportSomExhaust); - case ROSE_INSTR_DEDUPE_AND_REPORT: return sizeof(u.dedupeAndReport); - case ROSE_INSTR_FINAL_REPORT: return sizeof(u.finalReport); - case ROSE_INSTR_CHECK_EXHAUSTED: return sizeof(u.checkExhausted); - case ROSE_INSTR_CHECK_MIN_LENGTH: return sizeof(u.checkMinLength); - case ROSE_INSTR_SET_STATE: return sizeof(u.setState); - case ROSE_INSTR_SET_GROUPS: return sizeof(u.setGroups); - case ROSE_INSTR_SQUASH_GROUPS: return sizeof(u.squashGroups); - case ROSE_INSTR_CHECK_STATE: return sizeof(u.checkState); - case ROSE_INSTR_SPARSE_ITER_BEGIN: return sizeof(u.sparseIterBegin); - case ROSE_INSTR_SPARSE_ITER_NEXT: return sizeof(u.sparseIterNext); - case ROSE_INSTR_ENGINES_EOD: return sizeof(u.enginesEod); - case ROSE_INSTR_SUFFIXES_EOD: return sizeof(u.suffixesEod); - case ROSE_INSTR_MATCHER_EOD: return sizeof(u.matcherEod); - case ROSE_INSTR_END: return sizeof(u.end); - } - assert(0); - return 0; - } - - union { - ROSE_STRUCT_CHECK_LIT_EARLY checkLitEarly; - ROSE_STRUCT_CHECK_GROUPS checkGroups; - ROSE_STRUCT_CHECK_ONLY_EOD checkOnlyEod; - ROSE_STRUCT_CHECK_BOUNDS checkBounds; - ROSE_STRUCT_CHECK_NOT_HANDLED checkNotHandled; - ROSE_STRUCT_CHECK_LOOKAROUND checkLookaround; - ROSE_STRUCT_CHECK_MASK checkMask; - ROSE_STRUCT_CHECK_MASK_32 checkMask32; - ROSE_STRUCT_CHECK_BYTE checkByte; - ROSE_STRUCT_CHECK_INFIX checkInfix; - ROSE_STRUCT_CHECK_PREFIX checkPrefix; - ROSE_STRUCT_ANCHORED_DELAY anchoredDelay; - ROSE_STRUCT_PUSH_DELAYED pushDelayed; - ROSE_STRUCT_RECORD_ANCHORED recordAnchored; - ROSE_STRUCT_CATCH_UP catchUp; - ROSE_STRUCT_CATCH_UP_MPV catchUpMpv; - ROSE_STRUCT_SOM_ADJUST somAdjust; - ROSE_STRUCT_SOM_LEFTFIX somLeftfix; - ROSE_STRUCT_SOM_FROM_REPORT somFromReport; - ROSE_STRUCT_SOM_ZERO somZero; - ROSE_STRUCT_TRIGGER_INFIX triggerInfix; - ROSE_STRUCT_TRIGGER_SUFFIX triggerSuffix; - ROSE_STRUCT_DEDUPE dedupe; - ROSE_STRUCT_DEDUPE_SOM dedupeSom; - ROSE_STRUCT_REPORT_CHAIN reportChain; - ROSE_STRUCT_REPORT_SOM_INT reportSomInt; - ROSE_STRUCT_REPORT_SOM_AWARE reportSomAware; - ROSE_STRUCT_REPORT report; - ROSE_STRUCT_REPORT_EXHAUST reportExhaust; - ROSE_STRUCT_REPORT_SOM reportSom; - ROSE_STRUCT_REPORT_SOM_EXHAUST reportSomExhaust; - ROSE_STRUCT_DEDUPE_AND_REPORT dedupeAndReport; - ROSE_STRUCT_FINAL_REPORT finalReport; - ROSE_STRUCT_CHECK_EXHAUSTED checkExhausted; - ROSE_STRUCT_CHECK_MIN_LENGTH checkMinLength; - ROSE_STRUCT_SET_STATE setState; - ROSE_STRUCT_SET_GROUPS setGroups; - ROSE_STRUCT_SQUASH_GROUPS squashGroups; - ROSE_STRUCT_CHECK_STATE checkState; - ROSE_STRUCT_SPARSE_ITER_BEGIN sparseIterBegin; - ROSE_STRUCT_SPARSE_ITER_NEXT sparseIterNext; - ROSE_STRUCT_ENGINES_EOD enginesEod; - ROSE_STRUCT_SUFFIXES_EOD suffixesEod; - ROSE_STRUCT_MATCHER_EOD matcherEod; - ROSE_STRUCT_END end; - } u; - - JumpTarget target; -}; - -static -size_t hash_value(const RoseInstruction &ri) { - size_t val = 0; - boost::hash_combine(val, ri.code()); - boost::hash_combine(val, ri.target); - const char *bytes = (const char *)ri.get(); - const size_t len = ri.length(); - for (size_t i = 0; i < len; i++) { - boost::hash_combine(val, bytes[i]); - } - return val; -} - /** * \brief Structure tracking which resources are used by this Rose instance at * runtime. @@ -402,7 +192,8 @@ struct build_context : boost::noncopyable { /** \brief Simple cache of programs written to engine blob, used for * deduplication. */ - ue2::unordered_map, u32> program_cache; + ue2::unordered_map program_cache; /** \brief LookEntry list cache, so that we don't have to go scanning * through the full list to find cases we've used already. */ @@ -427,7 +218,7 @@ struct build_context : boost::noncopyable { /** \brief Contents of the Rose bytecode immediately following the * RoseEngine. */ - vector> engine_blob; + RoseEngineBlob engine_blob; /** \brief True if reports need CATCH_UP instructions, to catch up anchored * matches, suffixes, outfixes etc. */ @@ -445,81 +236,17 @@ struct build_context : boost::noncopyable { /** \brief Global bitmap of groups that can be squashed. */ rose_group squashable_groups = 0; - - /** \brief Base offset of engine_blob in the Rose engine bytecode. */ - static constexpr u32 engine_blob_base = ROUNDUP_CL(sizeof(RoseEngine)); }; } -static -void pad_engine_blob(build_context &bc, size_t align) { - assert(ISALIGNED_N(bc.engine_blob_base, align)); - size_t s = bc.engine_blob.size(); - - if (ISALIGNED_N(s, align)) { - return; - } - - bc.engine_blob.resize(s + align - s % align); -} - -static -u32 add_to_engine_blob(build_context &bc, const void *a, const size_t len, - const size_t align) { - pad_engine_blob(bc, align); - - size_t rv = bc.engine_blob_base + bc.engine_blob.size(); - assert(rv >= bc.engine_blob_base); - DEBUG_PRINTF("write %zu bytes at offset %zu\n", len, rv); - - assert(ISALIGNED_N(bc.engine_blob.size(), align)); - - bc.engine_blob.resize(bc.engine_blob.size() + len); - memcpy(&bc.engine_blob.back() - len + 1, a, len); - - return verify_u32(rv); -} - -template -static -u32 add_to_engine_blob(build_context &bc, const T &a) { - static_assert(is_pod::value, "should be pod"); - return add_to_engine_blob(bc, &a, sizeof(a), alignof(T)); -} - -template -static -u32 add_to_engine_blob(build_context &bc, const T &a, const size_t len) { - static_assert(is_pod::value, "should be pod"); - return add_to_engine_blob(bc, &a, len, alignof(T)); -} - -template -static -u32 add_to_engine_blob(build_context &bc, Iter b, const Iter &e) { - using value_type = typename std::iterator_traits::value_type; - static_assert(is_pod::value, "should be pod"); - - if (b == e) { - return 0; - } - - u32 offset = add_to_engine_blob(bc, *b); - for (++b; b != e; ++b) { - add_to_engine_blob(bc, *b); - } - - return offset; -} - static const NFA *get_nfa_from_blob(const build_context &bc, u32 qi) { assert(contains(bc.engineOffsets, qi)); u32 nfa_offset = bc.engineOffsets.at(qi); - assert(nfa_offset >= bc.engine_blob_base); + assert(nfa_offset >= bc.engine_blob.base_offset); const NFA *n = (const NFA *)(bc.engine_blob.data() + nfa_offset - - bc.engine_blob_base); + bc.engine_blob.base_offset); assert(n->queueIndex == qi); return n; } @@ -527,7 +254,7 @@ const NFA *get_nfa_from_blob(const build_context &bc, u32 qi) { static const NFA *add_nfa_to_blob(build_context &bc, NFA &nfa) { u32 qi = nfa.queueIndex; - u32 nfa_offset = add_to_engine_blob(bc, nfa, nfa.length); + u32 nfa_offset = bc.engine_blob.add(nfa, nfa.length); DEBUG_PRINTF("added nfa qi=%u, type=%u, length=%u at offset=%u\n", qi, nfa.type, nfa.length, nfa_offset); @@ -2282,7 +2009,7 @@ void buildCountingMiracles(RoseBuildImpl &build, build_context &bc) { rcm.count = lbi.countingMiracleCount; - lbi.countingMiracleOffset = add_to_engine_blob(bc, rcm); + lbi.countingMiracleOffset = bc.engine_blob.add(rcm); pre_built[key] = lbi.countingMiracleOffset; DEBUG_PRINTF("built cm for count of %u @ %u\n", rcm.count, lbi.countingMiracleOffset); @@ -2462,7 +2189,7 @@ u32 addIteratorToTable(build_context &bc, return offset; } - u32 offset = add_to_engine_blob(bc, iter.begin(), iter.end()); + u32 offset = bc.engine_blob.add(iter.begin(), iter.end()); bc.iterCache.insert(make_pair(iter, offset)); @@ -2577,7 +2304,7 @@ void buildSuffixEkeyLists(const RoseBuildImpl &tbi, build_context &bc, for (auto &e : qi_to_ekeys) { assert(!e.second.empty()); e.second.push_back(INVALID_EKEY); /* terminator */ - (*out)[e.first] = add_to_engine_blob(bc, e.second.begin(), + (*out)[e.first] = bc.engine_blob.add(e.second.begin(), e.second.end()); } } @@ -2765,132 +2492,8 @@ getLiteralInfoByFinalId(const RoseBuildImpl &build, u32 final_id) { return out; } -/** - * \brief Flattens a list of role programs into one finalised program with its - * fail_jump/done_jump targets set correctly. - */ static -vector -flattenProgram(const vector> &programs) { - vector out; - - vector offsets; // offset of each instruction (bytes) - vector blocks; // track which block we're in - vector block_offsets; // start offsets for each block - - DEBUG_PRINTF("%zu program blocks\n", programs.size()); - - size_t curr_offset = 0; - for (const auto &program : programs) { - DEBUG_PRINTF("block with %zu instructions\n", program.size()); - block_offsets.push_back(curr_offset); - for (const auto &ri : program) { - assert(ri.code() != ROSE_INSTR_END); - out.push_back(ri); - offsets.push_back(curr_offset); - blocks.push_back(block_offsets.size() - 1); - curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); - } - } - - // Add a final END instruction, which is its own block. - out.emplace_back(ROSE_INSTR_END); - block_offsets.push_back(curr_offset); - offsets.push_back(curr_offset); - - assert(offsets.size() == out.size()); - - for (size_t i = 0; i < out.size(); i++) { - auto &ri = out[i]; - - u32 jump_target = 0; - switch (ri.target) { - case JumpTarget::NO_JUMP: - case JumpTarget::FIXUP_DONE: - continue; // Next instruction. - case JumpTarget::PROGRAM_END: - assert(i != out.size() - 1); - jump_target = offsets.back(); - break; - case JumpTarget::NEXT_BLOCK: - assert(blocks[i] + 1 < block_offsets.size()); - jump_target = block_offsets[blocks[i] + 1]; - break; - } - - // We currently always make progress and never jump backwards. - assert(jump_target > offsets[i]); - assert(jump_target <= offsets.back()); - u32 jump_val = jump_target - offsets[i]; - - switch (ri.code()) { - case ROSE_INSTR_ANCHORED_DELAY: - ri.u.anchoredDelay.done_jump = jump_val; - break; - case ROSE_INSTR_CHECK_ONLY_EOD: - ri.u.checkOnlyEod.fail_jump = jump_val; - break; - case ROSE_INSTR_CHECK_BOUNDS: - ri.u.checkBounds.fail_jump = jump_val; - break; - case ROSE_INSTR_CHECK_NOT_HANDLED: - ri.u.checkNotHandled.fail_jump = jump_val; - break; - case ROSE_INSTR_CHECK_LOOKAROUND: - ri.u.checkLookaround.fail_jump = jump_val; - break; - case ROSE_INSTR_CHECK_MASK: - ri.u.checkMask.fail_jump = jump_val; - break; - case ROSE_INSTR_CHECK_MASK_32: - ri.u.checkMask32.fail_jump = jump_val; - break; - case ROSE_INSTR_CHECK_BYTE: - ri.u.checkByte.fail_jump = jump_val; - break; - case ROSE_INSTR_CHECK_INFIX: - ri.u.checkInfix.fail_jump = jump_val; - break; - case ROSE_INSTR_CHECK_PREFIX: - ri.u.checkPrefix.fail_jump = jump_val; - break; - case ROSE_INSTR_DEDUPE: - ri.u.dedupe.fail_jump = jump_val; - break; - case ROSE_INSTR_DEDUPE_SOM: - ri.u.dedupeSom.fail_jump = jump_val; - break; - case ROSE_INSTR_DEDUPE_AND_REPORT: - ri.u.dedupeAndReport.fail_jump = jump_val; - break; - case ROSE_INSTR_CHECK_EXHAUSTED: - ri.u.checkExhausted.fail_jump = jump_val; - break; - case ROSE_INSTR_CHECK_MIN_LENGTH: - ri.u.checkMinLength.fail_jump = jump_val; - break; - case ROSE_INSTR_CHECK_STATE: - ri.u.checkState.fail_jump = jump_val; - break; - case ROSE_INSTR_SPARSE_ITER_BEGIN: - ri.u.sparseIterBegin.fail_jump = jump_val; - break; - case ROSE_INSTR_SPARSE_ITER_NEXT: - ri.u.sparseIterNext.fail_jump = jump_val; - break; - default: - assert(0); // Unhandled opcode? - break; - } - - ri.target = JumpTarget::FIXUP_DONE; - } - - return out; -} - -static -void applyFinalSpecialisation(vector &program) { +void applyFinalSpecialisation(RoseProgram &program) { assert(!program.empty()); assert(program.back().code() == ROSE_INSTR_END); if (program.size() < 2) { @@ -2899,26 +2502,18 @@ void applyFinalSpecialisation(vector &program) { /* Replace the second-to-last instruction (before END) with a one-shot * specialisation if available. */ - auto &ri = *(next(program.rbegin())); - switch (ri.code()) { - case ROSE_INSTR_REPORT: { + auto it = next(program.rbegin()); + if (auto *ri = dynamic_cast(it->get())) { DEBUG_PRINTF("replacing REPORT with FINAL_REPORT\n"); - auto ri2 = RoseInstruction(ROSE_INSTR_FINAL_REPORT); - ri2.u.finalReport.onmatch = ri.u.report.onmatch; - ri2.u.finalReport.offset_adjust = ri.u.report.offset_adjust; - ri = ri2; - break; - } - default: - break; + program.replace(it, make_unique( + ri->onmatch, ri->offset_adjust)); } } static -void recordResources(RoseResources &resources, - const vector &program) { +void recordResources(RoseResources &resources, const RoseProgram &program) { for (const auto &ri : program) { - switch (ri.code()) { + switch (ri->code()) { case ROSE_INSTR_TRIGGER_SUFFIX: resources.has_suffixes = true; break; @@ -2973,22 +2568,12 @@ void recordResources(RoseResources &resources, } static -u32 writeProgram(build_context &bc, const vector &program) { +u32 writeProgram(build_context &bc, RoseProgram &&program) { if (program.empty()) { DEBUG_PRINTF("no program\n"); return 0; } - assert(program.back().code() == ROSE_INSTR_END); - assert(program.size() >= 1); - - // This program must have been flattened; i.e. all check instructions must - // have their jump offsets set. - assert(all_of(begin(program), end(program), [](const RoseInstruction &ri) { - return ri.target == JumpTarget::NO_JUMP || - ri.target == JumpTarget::FIXUP_DONE; - })); - auto it = bc.program_cache.find(program); if (it != end(bc.program_cache)) { DEBUG_PRINTF("reusing cached program at %u\n", it->second); @@ -2997,20 +2582,13 @@ u32 writeProgram(build_context &bc, const vector &program) { recordResources(bc.resources, program); - DEBUG_PRINTF("writing %zu instructions\n", program.size()); - u32 programOffset = 0; - for (const auto &ri : program) { - u32 offset = - add_to_engine_blob(bc, ri.get(), ri.length(), ROSE_INSTR_MIN_ALIGN); - DEBUG_PRINTF("code %u len %zu written at offset %u\n", ri.code(), - ri.length(), offset); - if (!programOffset) { - programOffset = offset; - } - } - DEBUG_PRINTF("program begins at offset %u\n", programOffset); - bc.program_cache.emplace(program, programOffset); - return programOffset; + u32 len = 0; + auto prog_bytecode = writeProgram(bc.engine_blob, program, &len); + u32 offset = bc.engine_blob.add(prog_bytecode.get(), len, + ROSE_INSTR_MIN_ALIGN); + DEBUG_PRINTF("prog len %u written at offset %u\n", len, offset); + bc.program_cache.emplace(move(program), offset); + return offset; } static @@ -3228,8 +2806,7 @@ bool checkReachWithFlip(const CharReach &cr, u8 &andmask, } static -bool makeRoleByte(const vector &look, - vector &program) { +bool makeRoleByte(const vector &look, RoseProgram &program) { if (look.size() == 1) { const auto &entry = look[0]; u8 andmask_u8, cmpmask_u8; @@ -3239,21 +2816,17 @@ bool makeRoleByte(const vector &look, } s32 checkbyte_offset = verify_s32(entry.offset); DEBUG_PRINTF("CHECK BYTE offset=%d\n", checkbyte_offset); - auto ri = RoseInstruction(ROSE_INSTR_CHECK_BYTE, - JumpTarget::NEXT_BLOCK); - ri.u.checkByte.and_mask = andmask_u8; - ri.u.checkByte.cmp_mask = cmpmask_u8; - ri.u.checkByte.negation = flip; - ri.u.checkByte.offset = checkbyte_offset; - program.push_back(ri); + const auto *end_inst = program.end_instruction(); + auto ri = make_unique(andmask_u8, cmpmask_u8, flip, + checkbyte_offset, end_inst); + program.add_before_end(move(ri)); return true; } return false; } static -bool makeRoleMask(const vector &look, - vector &program) { +bool makeRoleMask(const vector &look, RoseProgram &program) { if (look.back().offset < look.front().offset + 8) { s32 base_offset = verify_s32(look.front().offset); u64a and_mask = 0; @@ -3275,13 +2848,10 @@ bool makeRoleMask(const vector &look, } DEBUG_PRINTF("CHECK MASK and_mask=%llx cmp_mask=%llx\n", and_mask, cmp_mask); - auto ri = RoseInstruction(ROSE_INSTR_CHECK_MASK, - JumpTarget::NEXT_BLOCK); - ri.u.checkMask.and_mask = and_mask; - ri.u.checkMask.cmp_mask = cmp_mask; - ri.u.checkMask.neg_mask = neg_mask; - ri.u.checkMask.offset = base_offset; - program.push_back(ri); + const auto *end_inst = program.end_instruction(); + auto ri = make_unique(and_mask, cmp_mask, neg_mask, + base_offset, end_inst); + program.add_before_end(move(ri)); return true; } return false; @@ -3302,14 +2872,14 @@ string convertMaskstoString(u8 *p, int byte_len) { static bool makeRoleMask32(const vector &look, - vector &program) { + RoseProgram &program) { if (look.back().offset >= look.front().offset + 32) { return false; } s32 base_offset = verify_s32(look.front().offset); - u8 and_mask[32], cmp_mask[32]; - memset(and_mask, 0, sizeof(and_mask)); - memset(cmp_mask, 0, sizeof(cmp_mask)); + array and_mask, cmp_mask; + and_mask.fill(0); + cmp_mask.fill(0); u32 neg_mask = 0; for (const auto &entry : look) { u8 andmask_u8, cmpmask_u8, flip; @@ -3326,18 +2896,17 @@ bool makeRoleMask32(const vector &look, } } - DEBUG_PRINTF("and_mask %s\n", convertMaskstoString(and_mask, 32).c_str()); - DEBUG_PRINTF("cmp_mask %s\n", convertMaskstoString(cmp_mask, 32).c_str()); + DEBUG_PRINTF("and_mask %s\n", + convertMaskstoString(and_mask.data(), 32).c_str()); + DEBUG_PRINTF("cmp_mask %s\n", + convertMaskstoString(cmp_mask.data(), 32).c_str()); DEBUG_PRINTF("neg_mask %08x\n", neg_mask); DEBUG_PRINTF("base_offset %d\n", base_offset); - auto ri = RoseInstruction(ROSE_INSTR_CHECK_MASK_32, - JumpTarget::NEXT_BLOCK); - memcpy(ri.u.checkMask32.and_mask, and_mask, sizeof(and_mask)); - memcpy(ri.u.checkMask32.cmp_mask, cmp_mask, sizeof(cmp_mask)); - ri.u.checkMask32.neg_mask = neg_mask; - ri.u.checkMask32.offset = base_offset; - program.push_back(ri); + const auto *end_inst = program.end_instruction(); + auto ri = make_unique(and_mask, cmp_mask, neg_mask, + base_offset, end_inst); + program.add_before_end(move(ri)); return true; } @@ -3347,7 +2916,7 @@ bool makeRoleMask32(const vector &look, */ static void makeLookaroundInstruction(build_context &bc, const vector &look, - vector &program) { + RoseProgram &program) { assert(!look.empty()); if (makeRoleByte(look, program)) { @@ -3365,16 +2934,14 @@ void makeLookaroundInstruction(build_context &bc, const vector &look, u32 look_idx = addLookaround(bc, look); u32 look_count = verify_u32(look.size()); - auto ri = RoseInstruction(ROSE_INSTR_CHECK_LOOKAROUND, - JumpTarget::NEXT_BLOCK); - ri.u.checkLookaround.index = look_idx; - ri.u.checkLookaround.count = look_count; - program.push_back(ri); + auto ri = make_unique(look_idx, look_count, + program.end_instruction()); + program.add_before_end(move(ri)); } static void makeRoleLookaround(RoseBuildImpl &build, build_context &bc, RoseVertex v, - vector &program) { + RoseProgram &program) { if (!build.cc.grey.roseLookaroundMasks) { return; } @@ -3402,7 +2969,7 @@ void makeRoleLookaround(RoseBuildImpl &build, build_context &bc, RoseVertex v, static void makeRoleCheckLeftfix(RoseBuildImpl &build, build_context &bc, RoseVertex v, - vector &program) { + RoseProgram &program) { auto it = bc.leftfix_info.find(v); if (it == end(bc.leftfix_info)) { return; @@ -3416,26 +2983,24 @@ void makeRoleCheckLeftfix(RoseBuildImpl &build, build_context &bc, RoseVertex v, build.g[v].left.lag <= MAX_STORED_LEFTFIX_LAG); bool is_prefix = build.isRootSuccessor(v); + const auto *end_inst = program.end_instruction(); + + unique_ptr ri; if (is_prefix) { - auto ri = - RoseInstruction(ROSE_INSTR_CHECK_PREFIX, JumpTarget::NEXT_BLOCK); - ri.u.checkPrefix.queue = lni.queue; - ri.u.checkPrefix.lag = build.g[v].left.lag; - ri.u.checkPrefix.report = build.g[v].left.leftfix_report; - program.push_back(move(ri)); + ri = make_unique(lni.queue, build.g[v].left.lag, + build.g[v].left.leftfix_report, + end_inst); } else { - auto ri = - RoseInstruction(ROSE_INSTR_CHECK_INFIX, JumpTarget::NEXT_BLOCK); - ri.u.checkInfix.queue = lni.queue; - ri.u.checkInfix.lag = build.g[v].left.lag; - ri.u.checkInfix.report = build.g[v].left.leftfix_report; - program.push_back(move(ri)); + ri = make_unique(lni.queue, build.g[v].left.lag, + build.g[v].left.leftfix_report, + end_inst); } + program.add_before_end(move(ri)); } static void makeRoleAnchoredDelay(RoseBuildImpl &build, build_context &bc, - RoseVertex v, vector &program) { + RoseVertex v, RoseProgram &program) { // Only relevant for roles that can be triggered by the anchored table. if (!build.isAnchored(v)) { return; @@ -3447,36 +3012,34 @@ void makeRoleAnchoredDelay(RoseBuildImpl &build, build_context &bc, return; } - auto ri = RoseInstruction(ROSE_INSTR_ANCHORED_DELAY, - JumpTarget::NEXT_BLOCK); - ri.u.anchoredDelay.groups = build.g[v].groups; - program.push_back(ri); + const auto *end_inst = program.end_instruction(); + auto ri = make_unique(build.g[v].groups, end_inst); + program.add_before_end(move(ri)); } static void makeDedupe(const RoseBuildImpl &build, const Report &report, - vector &report_block) { - auto ri = RoseInstruction(ROSE_INSTR_DEDUPE, JumpTarget::NEXT_BLOCK); - ri.u.dedupe.quash_som = report.quashSom; - ri.u.dedupe.dkey = build.rm.getDkey(report); - ri.u.dedupe.offset_adjust = report.offsetAdjust; - report_block.push_back(move(ri)); + RoseProgram &program) { + const auto *end_inst = program.end_instruction(); + auto ri = + make_unique(report.quashSom, build.rm.getDkey(report), + report.offsetAdjust, end_inst); + program.add_before_end(move(ri)); } static void makeDedupeSom(const RoseBuildImpl &build, const Report &report, - vector &report_block) { - auto ri = RoseInstruction(ROSE_INSTR_DEDUPE_SOM, JumpTarget::NEXT_BLOCK); - ri.u.dedupeSom.quash_som = report.quashSom; - ri.u.dedupeSom.dkey = build.rm.getDkey(report); - ri.u.dedupeSom.offset_adjust = report.offsetAdjust; - report_block.push_back(move(ri)); + RoseProgram &program) { + const auto *end_inst = program.end_instruction(); + auto ri = make_unique(report.quashSom, + build.rm.getDkey(report), + report.offsetAdjust, end_inst); + program.add_before_end(move(ri)); } static void makeCatchup(RoseBuildImpl &build, build_context &bc, - const flat_set &reports, - vector &program) { + const flat_set &reports, RoseProgram &program) { if (!bc.needs_catchup) { return; } @@ -3494,12 +3057,12 @@ void makeCatchup(RoseBuildImpl &build, build_context &bc, return; } - program.emplace_back(ROSE_INSTR_CATCH_UP); + program.add_before_end(make_unique()); } static void makeCatchupMpv(RoseBuildImpl &build, build_context &bc, ReportID id, - vector &program) { + RoseProgram &program) { if (!bc.needs_mpv_catchup) { return; } @@ -3509,13 +3072,15 @@ void makeCatchupMpv(RoseBuildImpl &build, build_context &bc, ReportID id, return; } - program.emplace_back(ROSE_INSTR_CATCH_UP_MPV); + program.add_before_end(make_unique()); } static void writeSomOperation(const Report &report, som_operation *op) { assert(op); + memset(op, 0, sizeof(*op)); + switch (report.type) { case EXTERNAL_CALLBACK_SOM_REL: op->type = SOM_EXTERNAL_CALLBACK_REL; @@ -3585,51 +3150,46 @@ void writeSomOperation(const Report &report, som_operation *op) { static void makeReport(RoseBuildImpl &build, const ReportID id, - const bool has_som, vector &program) { + const bool has_som, RoseProgram &program) { assert(id < build.rm.numReports()); const Report &report = build.rm.getReport(id); - vector report_block; + RoseProgram report_block; + const RoseInstruction *end_inst = report_block.end_instruction(); // Handle min/max offset checks. if (report.minOffset > 0 || report.maxOffset < MAX_OFFSET) { - auto ri = RoseInstruction(ROSE_INSTR_CHECK_BOUNDS, - JumpTarget::NEXT_BLOCK); - ri.u.checkBounds.min_bound = report.minOffset; - ri.u.checkBounds.max_bound = report.maxOffset; - report_block.push_back(move(ri)); + auto ri = make_unique(report.minOffset, + report.maxOffset, end_inst); + report_block.add_before_end(move(ri)); } // If this report has an exhaustion key, we can check it in the program // rather than waiting until we're in the callback adaptor. if (report.ekey != INVALID_EKEY) { - auto ri = RoseInstruction(ROSE_INSTR_CHECK_EXHAUSTED, - JumpTarget::NEXT_BLOCK); - ri.u.checkExhausted.ekey = report.ekey; - report_block.push_back(move(ri)); + auto ri = make_unique(report.ekey, end_inst); + report_block.add_before_end(move(ri)); } // External SOM reports that aren't passthrough need their SOM value // calculated. if (isExternalSomReport(report) && report.type != EXTERNAL_CALLBACK_SOM_PASS) { - auto ri = RoseInstruction(ROSE_INSTR_SOM_FROM_REPORT); - writeSomOperation(report, &ri.u.somFromReport.som); - report_block.push_back(move(ri)); + auto ri = make_unique(); + writeSomOperation(report, &ri->som); + report_block.add_before_end(move(ri)); } // Min length constraint. if (report.minLength > 0) { assert(build.hasSom); - auto ri = RoseInstruction(ROSE_INSTR_CHECK_MIN_LENGTH, - JumpTarget::NEXT_BLOCK); - ri.u.checkMinLength.end_adj = report.offsetAdjust; - ri.u.checkMinLength.min_length = report.minLength; - report_block.push_back(move(ri)); + auto ri = make_unique( + report.offsetAdjust, report.minLength, end_inst); + report_block.add_before_end(move(ri)); } if (report.quashSom) { - report_block.emplace_back(ROSE_INSTR_SOM_ZERO); + report_block.add_before_end(make_unique()); } switch (report.type) { @@ -3640,42 +3200,30 @@ void makeReport(RoseBuildImpl &build, const ReportID id, bool needs_dedupe = build.rm.getDkey(report) != ~0U || build.hasSom; if (report.ekey == INVALID_EKEY) { if (needs_dedupe) { - report_block.emplace_back(ROSE_INSTR_DEDUPE_AND_REPORT, - JumpTarget::NEXT_BLOCK); - auto &ri = report_block.back(); - ri.u.dedupeAndReport.quash_som = report.quashSom; - ri.u.dedupeAndReport.dkey = build.rm.getDkey(report); - ri.u.dedupeAndReport.onmatch = report.onmatch; - ri.u.dedupeAndReport.offset_adjust = report.offsetAdjust; + report_block.add_before_end( + make_unique( + report.quashSom, build.rm.getDkey(report), + report.onmatch, report.offsetAdjust, end_inst)); } else { - report_block.emplace_back(ROSE_INSTR_REPORT); - auto &ri = report_block.back(); - ri.u.report.onmatch = report.onmatch; - ri.u.report.offset_adjust = report.offsetAdjust; + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust)); } } else { if (needs_dedupe) { makeDedupe(build, report, report_block); } - report_block.emplace_back(ROSE_INSTR_REPORT_EXHAUST); - auto &ri = report_block.back(); - ri.u.reportExhaust.onmatch = report.onmatch; - ri.u.reportExhaust.offset_adjust = report.offsetAdjust; - ri.u.reportExhaust.ekey = report.ekey; + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust, report.ekey)); } } else { // has_som makeDedupeSom(build, report, report_block); if (report.ekey == INVALID_EKEY) { - report_block.emplace_back(ROSE_INSTR_REPORT_SOM); - auto &ri = report_block.back(); - ri.u.reportSom.onmatch = report.onmatch; - ri.u.reportSom.offset_adjust = report.offsetAdjust; + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust)); } else { - report_block.emplace_back(ROSE_INSTR_REPORT_SOM_EXHAUST); - auto &ri = report_block.back(); - ri.u.reportSomExhaust.onmatch = report.onmatch; - ri.u.reportSomExhaust.offset_adjust = report.offsetAdjust; - ri.u.reportSomExhaust.ekey = report.ekey; + report_block.add_before_end( + make_unique( + report.onmatch, report.offsetAdjust, report.ekey)); } } break; @@ -3691,20 +3239,18 @@ void makeReport(RoseBuildImpl &build, const ReportID id, case INTERNAL_SOM_LOC_SET_FROM: case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE: if (has_som) { - report_block.emplace_back(ROSE_INSTR_REPORT_SOM_AWARE); - auto &ri = report_block.back(); - writeSomOperation(report, &ri.u.reportSomAware.som); + auto ri = make_unique(); + writeSomOperation(report, &ri->som); + report_block.add_before_end(move(ri)); } else { - report_block.emplace_back(ROSE_INSTR_REPORT_SOM_INT); - auto &ri = report_block.back(); - writeSomOperation(report, &ri.u.reportSomInt.som); + auto ri = make_unique(); + writeSomOperation(report, &ri->som); + report_block.add_before_end(move(ri)); } break; case INTERNAL_ROSE_CHAIN: { - report_block.emplace_back(ROSE_INSTR_REPORT_CHAIN); - auto &ri = report_block.back(); - ri.u.reportChain.event = report.onmatch; - ri.u.reportChain.top_squash_distance = report.topSquashDistance; + report_block.add_before_end(make_unique( + report.onmatch, report.topSquashDistance)); break; } case EXTERNAL_CALLBACK_SOM_REL: @@ -3713,31 +3259,21 @@ void makeReport(RoseBuildImpl &build, const ReportID id, case EXTERNAL_CALLBACK_SOM_REV_NFA: makeDedupeSom(build, report, report_block); if (report.ekey == INVALID_EKEY) { - report_block.emplace_back(ROSE_INSTR_REPORT_SOM); - auto &ri = report_block.back(); - ri.u.reportSom.onmatch = report.onmatch; - ri.u.reportSom.offset_adjust = report.offsetAdjust; + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust)); } else { - report_block.emplace_back(ROSE_INSTR_REPORT_SOM_EXHAUST); - auto &ri = report_block.back(); - ri.u.reportSomExhaust.onmatch = report.onmatch; - ri.u.reportSomExhaust.offset_adjust = report.offsetAdjust; - ri.u.reportSomExhaust.ekey = report.ekey; + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust, report.ekey)); } break; case EXTERNAL_CALLBACK_SOM_PASS: makeDedupeSom(build, report, report_block); if (report.ekey == INVALID_EKEY) { - report_block.emplace_back(ROSE_INSTR_REPORT_SOM); - auto &ri = report_block.back(); - ri.u.reportSom.onmatch = report.onmatch; - ri.u.reportSom.offset_adjust = report.offsetAdjust; + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust)); } else { - report_block.emplace_back(ROSE_INSTR_REPORT_SOM_EXHAUST); - auto &ri = report_block.back(); - ri.u.reportSomExhaust.onmatch = report.onmatch; - ri.u.reportSomExhaust.offset_adjust = report.offsetAdjust; - ri.u.reportSomExhaust.ekey = report.ekey; + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust, report.ekey)); } break; @@ -3747,15 +3283,12 @@ void makeReport(RoseBuildImpl &build, const ReportID id, } assert(!report_block.empty()); - report_block = flattenProgram({report_block}); - assert(report_block.back().code() == ROSE_INSTR_END); - report_block.pop_back(); - insert(&program, program.end(), report_block); + program.add_block(move(report_block)); } static void makeRoleReports(RoseBuildImpl &build, build_context &bc, RoseVertex v, - vector &program) { + RoseProgram &program) { const auto &g = build.g; /* we are a suffaig - need to update role to provide som to the @@ -3764,29 +3297,28 @@ void makeRoleReports(RoseBuildImpl &build, build_context &bc, RoseVertex v, if (g[v].left.tracksSom()) { assert(contains(bc.leftfix_info, v)); const left_build_info &lni = bc.leftfix_info.at(v); - auto ri = RoseInstruction(ROSE_INSTR_SOM_LEFTFIX); - ri.u.somLeftfix.queue = lni.queue; - ri.u.somLeftfix.lag = g[v].left.lag; - program.push_back(ri); + program.add_before_end( + make_unique(lni.queue, g[v].left.lag)); has_som = true; } else if (g[v].som_adjust) { - auto ri = RoseInstruction(ROSE_INSTR_SOM_ADJUST); - ri.u.somAdjust.distance = g[v].som_adjust; - program.push_back(ri); + program.add_before_end( + make_unique(g[v].som_adjust)); has_som = true; } const auto &reports = g[v].reports; makeCatchup(build, bc, reports, program); + RoseProgram report_block; for (ReportID id : reports) { - makeReport(build, id, has_som, program); + makeReport(build, id, has_som, report_block); } + program.add_before_end(move(report_block)); } static void makeRoleSuffix(RoseBuildImpl &build, build_context &bc, RoseVertex v, - vector &program) { + RoseProgram &program) { const auto &g = build.g; if (!g[v].suffix) { return; @@ -3815,15 +3347,13 @@ void makeRoleSuffix(RoseBuildImpl &build, build_context &bc, RoseVertex v, assert(!g[v].suffix.graph || onlyOneTop(*g[v].suffix.graph)); suffixEvent = MQE_TOP; } - auto ri = RoseInstruction(ROSE_INSTR_TRIGGER_SUFFIX); - ri.u.triggerSuffix.queue = qi; - ri.u.triggerSuffix.event = suffixEvent; - program.push_back(ri); + program.add_before_end( + make_unique(qi, suffixEvent)); } static void makeRoleGroups(RoseBuildImpl &build, build_context &bc, RoseVertex v, - vector &program) { + RoseProgram &program) { const auto &g = build.g; rose_group groups = g[v].groups; if (!groups) { @@ -3854,17 +3384,15 @@ void makeRoleGroups(RoseBuildImpl &build, build_context &bc, RoseVertex v, return; } - auto ri = RoseInstruction(ROSE_INSTR_SET_GROUPS); - ri.u.setGroups.groups = groups; - program.push_back(ri); + program.add_before_end(make_unique(groups)); } static void makeRoleInfixTriggers(RoseBuildImpl &build, build_context &bc, - RoseVertex u, vector &program) { + RoseVertex u, RoseProgram &program) { const auto &g = build.g; - vector infix_program; + vector infix_program; for (const auto &e : out_edges_range(u, g)) { RoseVertex v = target(e, g); @@ -3896,11 +3424,7 @@ void makeRoleInfixTriggers(RoseBuildImpl &build, build_context &bc, assert(top < MQE_INVALID); } - auto ri = RoseInstruction(ROSE_INSTR_TRIGGER_INFIX); - ri.u.triggerInfix.queue = lbi.queue; - ri.u.triggerInfix.event = top; - ri.u.triggerInfix.cancel = g[e].rose_cancel_prev_top; - infix_program.push_back(ri); + infix_program.emplace_back(g[e].rose_cancel_prev_top, lbi.queue, top); } if (infix_program.empty()) { @@ -3908,30 +3432,33 @@ void makeRoleInfixTriggers(RoseBuildImpl &build, build_context &bc, } // Order, de-dupe and add instructions to the end of program. - sort(begin(infix_program), end(infix_program)); - unique_copy(begin(infix_program), end(infix_program), - back_inserter(program)); + sort(begin(infix_program), end(infix_program), + [](const RoseInstrTriggerInfix &a, const RoseInstrTriggerInfix &b) { + return tie(a.cancel, a.queue, a.event) < + tie(b.cancel, b.queue, b.event); + }); + infix_program.erase(unique(begin(infix_program), end(infix_program)), + end(infix_program)); + for (const auto &ri : infix_program) { + program.add_before_end(make_unique(ri)); + } } static void makeRoleSetState(const build_context &bc, RoseVertex v, - vector &program) { + RoseProgram &program) { // We only need this instruction if a state index has been assigned to this // vertex. auto it = bc.roleStateIndices.find(v); if (it == end(bc.roleStateIndices)) { return; } - - u32 idx = it->second; - auto ri = RoseInstruction(ROSE_INSTR_SET_STATE); - ri.u.setState.index = idx; - program.push_back(ri); + program.add_before_end(make_unique(it->second)); } static void makeRoleCheckBounds(const RoseBuildImpl &build, RoseVertex v, - const RoseEdge &e, vector &program) { + const RoseEdge &e, RoseProgram &program) { const RoseGraph &g = build.g; const RoseVertex u = source(e, g); @@ -3972,19 +3499,14 @@ void makeRoleCheckBounds(const RoseBuildImpl &build, RoseVertex v, // than just {length, inf}. assert(min_bound > lit_length || max_bound < MAX_OFFSET); - auto ri = RoseInstruction(ROSE_INSTR_CHECK_BOUNDS, JumpTarget::NEXT_BLOCK); - ri.u.checkBounds.min_bound = min_bound; - ri.u.checkBounds.max_bound = max_bound; - - program.push_back(move(ri)); + const auto *end_inst = program.end_instruction(); + program.add_before_end( + make_unique(min_bound, max_bound, end_inst)); } static void makeRoleCheckNotHandled(build_context &bc, RoseVertex v, - vector &program) { - auto ri = RoseInstruction(ROSE_INSTR_CHECK_NOT_HANDLED, - JumpTarget::NEXT_BLOCK); - + RoseProgram &program) { u32 handled_key; if (contains(bc.handledKeys, v)) { handled_key = bc.handledKeys.at(v); @@ -3993,19 +3515,21 @@ void makeRoleCheckNotHandled(build_context &bc, RoseVertex v, bc.handledKeys.emplace(v, handled_key); } - ri.u.checkNotHandled.key = handled_key; - - program.push_back(move(ri)); + const auto *end_inst = program.end_instruction(); + auto ri = make_unique(handled_key, end_inst); + program.add_before_end(move(ri)); } static void makeRoleEagerEodReports(RoseBuildImpl &build, build_context &bc, - RoseVertex v, vector &program) { - vector eod_program; + RoseVertex v, RoseProgram &program) { + RoseProgram eod_program; for (const auto &e : out_edges_range(v, build.g)) { if (canEagerlyReportAtEod(build, e)) { - makeRoleReports(build, bc, target(e, build.g), eod_program); + RoseProgram block; + makeRoleReports(build, bc, target(e, build.g), block); + eod_program.add_block(move(block)); } } @@ -4016,19 +3540,21 @@ void makeRoleEagerEodReports(RoseBuildImpl &build, build_context &bc, if (!onlyAtEod(build, v)) { // The rest of our program wasn't EOD anchored, so we need to guard // these reports with a check. - program.emplace_back(ROSE_INSTR_CHECK_ONLY_EOD, JumpTarget::NEXT_BLOCK); + const auto *end_inst = eod_program.end_instruction(); + eod_program.insert(begin(eod_program), + make_unique(end_inst)); } - program.insert(end(program), begin(eod_program), end(eod_program)); + program.add_before_end(move(eod_program)); } static -vector makeProgram(RoseBuildImpl &build, build_context &bc, - const RoseEdge &e) { +RoseProgram makeProgram(RoseBuildImpl &build, build_context &bc, + const RoseEdge &e) { const RoseGraph &g = build.g; auto v = target(e, g); - vector program; + RoseProgram program; // First, add program instructions that enforce preconditions without // effects. @@ -4037,8 +3563,8 @@ vector makeProgram(RoseBuildImpl &build, build_context &bc, if (onlyAtEod(build, v)) { DEBUG_PRINTF("only at eod\n"); - program.push_back(RoseInstruction(ROSE_INSTR_CHECK_ONLY_EOD, - JumpTarget::NEXT_BLOCK)); + const auto *end_inst = program.end_instruction(); + program.add_before_end(make_unique(end_inst)); } if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { @@ -4055,24 +3581,41 @@ vector makeProgram(RoseBuildImpl &build, build_context &bc, makeRoleLookaround(build, bc, v, program); makeRoleCheckLeftfix(build, bc, v, program); - // Next, we can add program instructions that have effects. + // Next, we can add program instructions that have effects. This must be + // done as a series of blocks, as some of them (like reports) are + // escapable. - makeRoleReports(build, bc, v, program); + RoseProgram effects_block; - makeRoleInfixTriggers(build, bc, v, program); + RoseProgram reports_block; + makeRoleReports(build, bc, v, reports_block); + effects_block.add_block(move(reports_block)); + + RoseProgram infix_block; + makeRoleInfixTriggers(build, bc, v, infix_block); + effects_block.add_block(move(infix_block)); // Note: SET_GROUPS instruction must be after infix triggers, as an infix // going dead may switch off groups. - makeRoleGroups(build, bc, v, program); + RoseProgram groups_block; + makeRoleGroups(build, bc, v, groups_block); + effects_block.add_block(move(groups_block)); - makeRoleSuffix(build, bc, v, program); + RoseProgram suffix_block; + makeRoleSuffix(build, bc, v, suffix_block); + effects_block.add_block(move(suffix_block)); - makeRoleSetState(bc, v, program); + RoseProgram state_block; + makeRoleSetState(bc, v, state_block); + effects_block.add_block(move(state_block)); // Note: EOD eager reports may generate a CHECK_ONLY_EOD instruction (if // the program doesn't have one already). - makeRoleEagerEodReports(build, bc, v, program); + RoseProgram eod_block; + makeRoleEagerEodReports(build, bc, v, eod_block); + effects_block.add_block(move(eod_block)); + program.add_before_end(move(effects_block)); return program; } @@ -4088,13 +3631,12 @@ u32 writeBoundaryProgram(RoseBuildImpl &build, build_context &bc, // scratch to support it). const bool has_som = false; - vector program; + RoseProgram program; for (const auto &id : reports) { makeReport(build, id, has_som, program); } - program = flattenProgram({program}); applyFinalSpecialisation(program); - return writeProgram(bc, program); + return writeProgram(bc, move(program)); } static @@ -4217,7 +3759,7 @@ void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc, if (hasUsefulStops(lbi)) { assert(lbi.stopAlphabet.size() == N_CHARS); - left.stopTable = add_to_engine_blob(bc, lbi.stopAlphabet.begin(), + left.stopTable = bc.engine_blob.add(lbi.stopAlphabet.begin(), lbi.stopAlphabet.end()); } @@ -4258,178 +3800,125 @@ void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc, } static -void addPredBlocksSingle( - map>> &predProgramLists, - vector &program) { - - vector> prog_blocks; - - for (const auto &m : predProgramLists) { - const u32 &pred_state = m.first; - assert(!m.second.empty()); - auto subprog = flattenProgram(m.second); - - // Check our pred state. - auto ri = RoseInstruction(ROSE_INSTR_CHECK_STATE, - JumpTarget::NEXT_BLOCK); - ri.u.checkState.index = pred_state; - subprog.insert(begin(subprog), ri); - assert(subprog.back().code() == ROSE_INSTR_END); - subprog.pop_back(); - prog_blocks.push_back(move(subprog)); - } - - auto prog = flattenProgram(prog_blocks); - program.insert(end(program), begin(prog), end(prog)); +void addPredBlockSingle(u32 pred_state, RoseProgram &pred_block, + RoseProgram &program) { + // Prepend an instruction to check the pred state is on. + const auto *end_inst = pred_block.end_instruction(); + pred_block.insert(begin(pred_block), + make_unique(pred_state, end_inst)); + program.add_block(move(pred_block)); } static -u32 programLength(const vector &program) { - u32 len = 0; - for (const auto &ri : program) { - len += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); - } - return len; -} +void addPredBlocksAny(build_context &bc, map &pred_blocks, + RoseProgram &program) { + RoseProgram sparse_program; -static -void addPredBlocksMulti(build_context &bc, - map>> &predProgramLists, - vector &program) { - assert(!predProgramLists.empty()); - - // First, add the iterator itself. vector keys; - for (const auto &elem : predProgramLists) { - keys.push_back(elem.first); - } - DEBUG_PRINTF("%zu keys: %s\n", keys.size(), as_string_list(keys).c_str()); - - vector iter; - mmbBuildSparseIterator(iter, keys, bc.numStates); - assert(!iter.empty()); - u32 iter_offset = addIteratorToTable(bc, iter); - - // Construct our program, starting with the SPARSE_ITER_BEGIN - // instruction, keeping track of the jump offset for each sub-program. - vector sparse_program; - vector jump_table; - - sparse_program.push_back(RoseInstruction(ROSE_INSTR_SPARSE_ITER_BEGIN, - JumpTarget::PROGRAM_END)); - u32 curr_offset = programLength(program) + programLength(sparse_program); - - for (const auto &e : predProgramLists) { - DEBUG_PRINTF("subprogram %zu has offset %u\n", jump_table.size(), - curr_offset); - jump_table.push_back(curr_offset); - assert(!e.second.empty()); - auto subprog = flattenProgram(e.second); - - if (e.first != keys.back()) { - // For all but the last subprogram, replace the END instruction - // with a SPARSE_ITER_NEXT. - assert(!subprog.empty()); - assert(subprog.back().code() == ROSE_INSTR_END); - subprog.back() = RoseInstruction(ROSE_INSTR_SPARSE_ITER_NEXT, - JumpTarget::PROGRAM_END); - } - - curr_offset += programLength(subprog); - insert(&sparse_program, end(sparse_program), subprog); + for (const u32 &key : pred_blocks | map_keys) { + keys.push_back(key); } - // Strip the END instruction from the last block. - assert(sparse_program.back().code() == ROSE_INSTR_END); - sparse_program.pop_back(); + const RoseInstruction *end_inst = sparse_program.end_instruction(); + auto ri = make_unique(bc.numStates, keys, end_inst); + sparse_program.add_before_end(move(ri)); - sparse_program = flattenProgram({sparse_program}); - - // Write the jump table into the bytecode. - const u32 jump_table_offset = - add_to_engine_blob(bc, begin(jump_table), end(jump_table)); - - // Write jump table and iterator offset into sparse iter instructions. - auto keys_it = begin(keys); - for (auto &ri : sparse_program) { - switch (ri.code()) { - case ROSE_INSTR_SPARSE_ITER_BEGIN: - ri.u.sparseIterBegin.iter_offset = iter_offset; - ri.u.sparseIterBegin.jump_table = jump_table_offset; - break; - case ROSE_INSTR_SPARSE_ITER_NEXT: - ri.u.sparseIterNext.iter_offset = iter_offset; - ri.u.sparseIterNext.jump_table = jump_table_offset; - assert(keys_it != end(keys)); - ri.u.sparseIterNext.state = *keys_it++; - break; - default: - break; - } - } - - program.insert(end(program), begin(sparse_program), end(sparse_program)); + RoseProgram &block = pred_blocks.begin()->second; + sparse_program.add_before_end(move(block)); + program.add_block(move(sparse_program)); } static -void addPredBlocks(build_context &bc, - map>> &predProgramLists, - vector &program) { - const size_t num_preds = predProgramLists.size(); +void addPredBlocksMulti(build_context &bc, map &pred_blocks, + RoseProgram &program) { + assert(!pred_blocks.empty()); + + RoseProgram sparse_program; + const RoseInstruction *end_inst = sparse_program.end_instruction(); + vector> jump_table; + + // BEGIN instruction. + auto ri_begin = + make_unique(bc.numStates, end_inst); + RoseInstrSparseIterBegin *begin_inst = ri_begin.get(); + sparse_program.add_before_end(move(ri_begin)); + + // NEXT instructions, one per pred program. + u32 prev_key = pred_blocks.begin()->first; + for (auto it = next(begin(pred_blocks)); it != end(pred_blocks); ++it) { + auto ri = make_unique(prev_key, begin_inst, + end_inst); + sparse_program.add_before_end(move(ri)); + prev_key = it->first; + } + + // Splice in each pred program after its BEGIN/NEXT. + auto out_it = begin(sparse_program); + for (auto &m : pred_blocks) { + u32 key = m.first; + RoseProgram &flat_prog = m.second; + assert(!flat_prog.empty()); + const size_t block_len = flat_prog.size() - 1; // without INSTR_END. + + assert(dynamic_cast(out_it->get()) || + dynamic_cast(out_it->get())); + out_it = sparse_program.insert(++out_it, move(flat_prog)); + + // Jump table target for this key is the beginning of the block we just + // spliced in. + jump_table.emplace_back(key, out_it->get()); + + assert(distance(begin(sparse_program), out_it) + block_len <= + sparse_program.size()); + advance(out_it, block_len); + } + + // Write the jump table back into the SPARSE_ITER_BEGIN instruction. + begin_inst->jump_table = move(jump_table); + + program.add_block(move(sparse_program)); +} + +static +void addPredBlocks(build_context &bc, map &pred_blocks, + RoseProgram &program) { + // Trim empty blocks, if any exist. + for (auto it = pred_blocks.begin(); it != pred_blocks.end();) { + if (it->second.empty()) { + it = pred_blocks.erase(it); + } else { + ++it; + } + } + + const size_t num_preds = pred_blocks.size(); if (num_preds == 0) { - program.emplace_back(ROSE_INSTR_END); return; } if (num_preds == 1) { - addPredBlocksSingle(predProgramLists, program); + const auto head = pred_blocks.begin(); + addPredBlockSingle(head->first, head->second, program); return; } - addPredBlocksMulti(bc, predProgramLists, program); -} - -/** - * Returns the pair (program offset, sparse iter offset). - */ -static -vector makeSparseIterProgram(build_context &bc, - map>> &predProgramLists, - const vector &root_program, - const vector &pre_program) { - vector program; - u32 curr_offset = 0; - - // Add pre-program first. - for (const auto &ri : pre_program) { - program.push_back(ri); - curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); + // First, see if all our blocks are equivalent, in which case we can + // collapse them down into one. + const auto &blocks = pred_blocks | map_values; + if (all_of(begin(blocks), end(blocks), [&](const RoseProgram &block) { + return RoseProgramEquivalence()(*begin(blocks), block); + })) { + DEBUG_PRINTF("all blocks equiv\n"); + addPredBlocksAny(bc, pred_blocks, program); + return; } - // Add blocks to deal with non-root edges (triggered by sparse iterator or - // mmbit_isset checks). This operation will flatten the program up to this - // point. - addPredBlocks(bc, predProgramLists, program); - - // If we have a root program, replace the END instruction with it. Note - // that the root program has already been flattened. - assert(!program.empty()); - assert(program.back().code() == ROSE_INSTR_END); - if (!root_program.empty()) { - program.pop_back(); - program.insert(end(program), begin(root_program), end(root_program)); - } - - assert(!program.empty()); - assert(program.back().code() == ROSE_INSTR_END); - program.pop_back(); - program = flattenProgram({program}); - return program; + addPredBlocksMulti(bc, pred_blocks, program); } static void makePushDelayedInstructions(const RoseBuildImpl &build, u32 final_id, - vector &program) { + RoseProgram &program) { const auto &lit_infos = getLiteralInfoByFinalId(build, final_id); const auto &arb_lit_info = **lit_infos.begin(); if (arb_lit_info.delayed_ids.empty()) { @@ -4444,10 +3933,9 @@ void makePushDelayedInstructions(const RoseBuildImpl &build, u32 final_id, DEBUG_PRINTF("final_id=%u delay=%u child_id=%u\n", final_id, child_literal.delay, child_id); - auto ri = RoseInstruction(ROSE_INSTR_PUSH_DELAYED); - ri.u.pushDelayed.delay = verify_u8(child_literal.delay); - ri.u.pushDelayed.index = delay_index; - program.push_back(move(ri)); + auto ri = make_unique( + verify_u8(child_literal.delay), delay_index); + program.add_before_end(move(ri)); } } @@ -4465,21 +3953,17 @@ rose_group getFinalIdGroupsUnion(const RoseBuildImpl &build, u32 final_id) { static void makeGroupCheckInstruction(const RoseBuildImpl &build, u32 final_id, - vector &program) { + RoseProgram &program) { rose_group groups = getFinalIdGroupsUnion(build, final_id); if (!groups) { return; } - - auto ri = RoseInstruction(ROSE_INSTR_CHECK_GROUPS); - ri.u.checkGroups.groups = groups; - program.push_back(move(ri)); + program.add_before_end(make_unique(groups)); } static void makeCheckLitMaskInstruction(const RoseBuildImpl &build, build_context &bc, - u32 final_id, - vector &program) { + u32 final_id, RoseProgram &program) { assert(contains(build.final_id_to_literal, final_id)); const auto &lit_infos = getLiteralInfoByFinalId(build, final_id); assert(!lit_infos.empty()); @@ -4510,7 +3994,7 @@ void makeCheckLitMaskInstruction(const RoseBuildImpl &build, build_context &bc, static void makeGroupSquashInstruction(const RoseBuildImpl &build, u32 final_id, - vector &program) { + RoseProgram &program) { assert(contains(build.final_id_to_literal, final_id)); const auto &lit_infos = getLiteralInfoByFinalId(build, final_id); @@ -4524,10 +4008,8 @@ void makeGroupSquashInstruction(const RoseBuildImpl &build, u32 final_id, } DEBUG_PRINTF("final_id %u squashes 0x%llx\n", final_id, groups); - - auto ri = RoseInstruction(ROSE_INSTR_SQUASH_GROUPS); - ri.u.squashGroups.groups = ~groups; // Negated, so we can just AND it in. - program.push_back(move(ri)); + program.add_before_end( + make_unique(~groups)); // Note negated. } static @@ -4546,7 +4028,7 @@ u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id) { static void makeRecordAnchoredInstruction(const RoseBuildImpl &build, build_context &bc, u32 final_id, - vector &program) { + RoseProgram &program) { assert(contains(build.final_id_to_literal, final_id)); const auto &lit_ids = build.final_id_to_literal.at(final_id); @@ -4568,9 +4050,7 @@ void makeRecordAnchoredInstruction(const RoseBuildImpl &build, return; } - auto ri = RoseInstruction(ROSE_INSTR_RECORD_ANCHORED); - ri.u.recordAnchored.id = final_id; - program.push_back(move(ri)); + program.add_before_end(make_unique(final_id)); } static @@ -4590,7 +4070,7 @@ static void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, build_context &bc, u32 final_id, const vector &lit_edges, - vector &program) { + RoseProgram &program) { if (lit_edges.empty()) { return; } @@ -4636,9 +4116,7 @@ void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, build_context &bc, assert(min_offset < UINT32_MAX); DEBUG_PRINTF("adding lit early check, min_offset=%u\n", min_offset); - auto ri = RoseInstruction(ROSE_INSTR_CHECK_LIT_EARLY); - ri.u.checkLitEarly.min_offset = min_offset; - program.push_back(move(ri)); + program.add_before_end(make_unique(min_offset)); } static @@ -4656,47 +4134,49 @@ bool hasDelayedLiteral(RoseBuildImpl &build, } static -vector buildLitInitialProgram(RoseBuildImpl &build, - build_context &bc, u32 final_id, - const vector &lit_edges) { - vector pre_program; +RoseProgram buildLitInitialProgram(RoseBuildImpl &build, build_context &bc, + u32 final_id, + const vector &lit_edges) { + RoseProgram program; // No initial program for EOD. if (final_id == MO_INVALID_IDX) { - return pre_program; + return program; } DEBUG_PRINTF("final_id %u\n", final_id); // Check lit mask. - makeCheckLitMaskInstruction(build, bc, final_id, pre_program); + makeCheckLitMaskInstruction(build, bc, final_id, program); // Check literal groups. This is an optimisation that we only perform for // delayed literals, as their groups may be switched off; ordinarily, we // can trust the HWLM matcher. if (hasDelayedLiteral(build, lit_edges)) { - makeGroupCheckInstruction(build, final_id, pre_program); + makeGroupCheckInstruction(build, final_id, program); } // Add instructions for pushing delayed matches, if there are any. - makePushDelayedInstructions(build, final_id, pre_program); + makePushDelayedInstructions(build, final_id, program); // Add pre-check for early literals in the floating table. - makeCheckLitEarlyInstruction(build, bc, final_id, lit_edges, pre_program); + makeCheckLitEarlyInstruction(build, bc, final_id, lit_edges, program); - return pre_program; + return program; } static -vector buildLiteralProgram(RoseBuildImpl &build, - build_context &bc, u32 final_id, - const vector &lit_edges) { +RoseProgram buildLiteralProgram(RoseBuildImpl &build, build_context &bc, + u32 final_id, + const vector &lit_edges) { const auto &g = build.g; DEBUG_PRINTF("final id %u, %zu lit edges\n", final_id, lit_edges.size()); - // pred state id -> list of programs - map>> predProgramLists; + RoseProgram program; + + // Predecessor state id -> program block. + map pred_blocks; // Construct sparse iter sub-programs. for (const auto &e : lit_edges) { @@ -4708,64 +4188,51 @@ vector buildLiteralProgram(RoseBuildImpl &build, g[target(e, g)].idx); assert(contains(bc.roleStateIndices, u)); u32 pred_state = bc.roleStateIndices.at(u); - auto program = makeProgram(build, bc, e); - if (program.empty()) { - continue; - } - predProgramLists[pred_state].push_back(program); + pred_blocks[pred_state].add_block(makeProgram(build, bc, e)); } - // Construct sub-program for handling root roles. - vector> root_programs; + // Add blocks to deal with non-root edges (triggered by sparse iterator or + // mmbit_isset checks). + addPredBlocks(bc, pred_blocks, program); + + // Add blocks to handle root roles. for (const auto &e : lit_edges) { const auto &u = source(e, g); if (!build.isAnyStart(u)) { continue; } DEBUG_PRINTF("root edge (%zu,%zu)\n", g[u].idx, g[target(e, g)].idx); - auto role_prog = makeProgram(build, bc, e); - if (role_prog.empty()) { - continue; - } - root_programs.push_back(role_prog); + program.add_block(makeProgram(build, bc, e)); } if (final_id != MO_INVALID_IDX) { - vector prog; + RoseProgram root_block; // Literal may squash groups. - makeGroupSquashInstruction(build, final_id, prog); + makeGroupSquashInstruction(build, final_id, root_block); // Literal may be anchored and need to be recorded. - makeRecordAnchoredInstruction(build, bc, final_id, prog); + makeRecordAnchoredInstruction(build, bc, final_id, root_block); - if (!prog.empty()) { - root_programs.push_back(move(prog)); - } + program.add_block(move(root_block)); } - vector root_program; - if (!root_programs.empty()) { - root_program = flattenProgram(root_programs); - } - - auto pre_program = buildLitInitialProgram(build, bc, final_id, lit_edges); - - // Put it all together. - return makeSparseIterProgram(bc, predProgramLists, root_program, - pre_program); + // Construct initial program up front, as its early checks must be able to + // jump to end and terminate processing for this literal. + auto lit_program = buildLitInitialProgram(build, bc, final_id, lit_edges); + lit_program.add_before_end(move(program)); + return lit_program; } static u32 writeLiteralProgram(RoseBuildImpl &build, build_context &bc, u32 final_id, const vector &lit_edges) { - auto program = buildLiteralProgram(build, bc, final_id, lit_edges); + RoseProgram program = buildLiteralProgram(build, bc, final_id, lit_edges); if (program.empty()) { return 0; } - // Note: already flattened. applyFinalSpecialisation(program); - return writeProgram(bc, program); + return writeProgram(bc, move(program)); } static @@ -4777,13 +4244,12 @@ u32 buildDelayRebuildProgram(RoseBuildImpl &build, build_context &bc, return 0; // No delayed IDs, no work to do. } - vector program; + RoseProgram program; makeCheckLitMaskInstruction(build, bc, final_id, program); makePushDelayedInstructions(build, final_id, program); assert(!program.empty()); - program = flattenProgram({program}); applyFinalSpecialisation(program); - return writeProgram(bc, program); + return writeProgram(bc, move(program)); } static @@ -4844,9 +4310,9 @@ pair buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) { } u32 litProgramsOffset = - add_to_engine_blob(bc, begin(bc.litPrograms), end(bc.litPrograms)); - u32 delayRebuildProgramsOffset = add_to_engine_blob( - bc, begin(delayRebuildPrograms), end(delayRebuildPrograms)); + bc.engine_blob.add(begin(bc.litPrograms), end(bc.litPrograms)); + u32 delayRebuildProgramsOffset = bc.engine_blob.add( + begin(delayRebuildPrograms), end(delayRebuildPrograms)); return {litProgramsOffset, delayRebuildProgramsOffset}; } @@ -4884,35 +4350,31 @@ pair buildReportPrograms(RoseBuildImpl &build, build_context &bc) { vector programs; programs.reserve(reports.size()); - vector program; for (ReportID id : reports) { - program.clear(); + RoseProgram program; const bool has_som = false; makeCatchupMpv(build, bc, id, program); makeReport(build, id, has_som, program); - program = flattenProgram({program}); applyFinalSpecialisation(program); - u32 offset = writeProgram(bc, program); + u32 offset = writeProgram(bc, move(program)); programs.push_back(offset); build.rm.setProgramOffset(id, offset); DEBUG_PRINTF("program for report %u @ %u (%zu instructions)\n", id, programs.back(), program.size()); } - u32 offset = add_to_engine_blob(bc, begin(programs), end(programs)); + u32 offset = bc.engine_blob.add(begin(programs), end(programs)); u32 count = verify_u32(programs.size()); return {offset, count}; } static -vector makeEodAnchorProgram(RoseBuildImpl &build, - build_context &bc, - const RoseEdge &e, - const bool multiple_preds) { +RoseProgram makeEodAnchorProgram(RoseBuildImpl &build, build_context &bc, + const RoseEdge &e, const bool multiple_preds) { const RoseGraph &g = build.g; const RoseVertex v = target(e, g); - vector program; + RoseProgram program; if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { makeRoleCheckBounds(build, v, e, program); @@ -4927,9 +4389,11 @@ vector makeEodAnchorProgram(RoseBuildImpl &build, makeCatchup(build, bc, reports, program); const bool has_som = false; + RoseProgram report_block; for (const auto &id : reports) { - makeReport(build, id, has_som, program); + makeReport(build, id, has_som, report_block); } + program.add_before_end(move(report_block)); return program; } @@ -4961,11 +4425,11 @@ bool hasEodMatcher(const RoseBuildImpl &build) { static void addEodAnchorProgram(RoseBuildImpl &build, build_context &bc, - bool in_etable, vector &program) { + bool in_etable, RoseProgram &program) { const RoseGraph &g = build.g; - // pred state id -> list of programs - map>> predProgramLists; + // Predecessor state id -> program block. + map pred_blocks; for (auto v : vertices_range(g)) { if (!g[v].eod_accept) { @@ -4994,29 +4458,18 @@ void addEodAnchorProgram(RoseBuildImpl &build, build_context &bc, for (const auto &e : edge_list) { RoseVertex u = source(e, g); assert(contains(bc.roleStateIndices, u)); - u32 predStateIdx = bc.roleStateIndices.at(u); - - auto prog = makeEodAnchorProgram(build, bc, e, multiple_preds); - if (prog.empty()) { - continue; - } - predProgramLists[predStateIdx].push_back(prog); + u32 pred_state = bc.roleStateIndices.at(u); + pred_blocks[pred_state].add_block( + makeEodAnchorProgram(build, bc, e, multiple_preds)); } } - if (predProgramLists.empty()) { - return; - } - if (!program.empty()) { - assert(program.back().code() == ROSE_INSTR_END); - program.pop_back(); - } - addPredBlocks(bc, predProgramLists, program); + addPredBlocks(bc, pred_blocks, program); } static void addEodEventProgram(RoseBuildImpl &build, build_context &bc, - vector &program) { + RoseProgram &program) { if (build.eod_event_literal_id == MO_INVALID_IDX) { return; } @@ -5042,61 +4495,47 @@ void addEodEventProgram(RoseBuildImpl &build, build_context &bc, tie(g[source(b, g)].idx, g[target(b, g)].idx); }); - auto prog = buildLiteralProgram(build, bc, MO_INVALID_IDX, edge_list); - program.insert(end(program), begin(prog), end(prog)); + program.add_block( + buildLiteralProgram(build, bc, MO_INVALID_IDX, edge_list)); } static -void addEnginesEodProgram(u32 eodNfaIterOffset, - vector &program) { +void addEnginesEodProgram(u32 eodNfaIterOffset, RoseProgram &program) { if (!eodNfaIterOffset) { return; } - auto ri = RoseInstruction(ROSE_INSTR_ENGINES_EOD); - ri.u.enginesEod.iter_offset = eodNfaIterOffset; - if (!program.empty()) { - assert(program.back().code() == ROSE_INSTR_END); - program.pop_back(); - } - program.push_back(move(ri)); - program.emplace_back(ROSE_INSTR_END); + RoseProgram block; + block.add_before_end(make_unique(eodNfaIterOffset)); + program.add_block(move(block)); } static -void addSuffixesEodProgram(const RoseBuildImpl &build, - vector &program) { +void addSuffixesEodProgram(const RoseBuildImpl &build, RoseProgram &program) { if (!hasEodAnchoredSuffix(build)) { return; } - if (!program.empty()) { - assert(program.back().code() == ROSE_INSTR_END); - program.pop_back(); - } - program.emplace_back(ROSE_INSTR_SUFFIXES_EOD); - program.emplace_back(ROSE_INSTR_END); + RoseProgram block; + block.add_before_end(make_unique()); + program.add_block(move(block)); } static -void addMatcherEodProgram(const RoseBuildImpl &build, - vector &program) { +void addMatcherEodProgram(const RoseBuildImpl &build, RoseProgram &program) { if (!hasEodMatcher(build)) { return; } - if (!program.empty()) { - assert(program.back().code() == ROSE_INSTR_END); - program.pop_back(); - } - program.emplace_back(ROSE_INSTR_MATCHER_EOD); - program.emplace_back(ROSE_INSTR_END); + RoseProgram block; + block.add_before_end(make_unique()); + program.add_block(move(block)); } static u32 writeEodProgram(RoseBuildImpl &build, build_context &bc, u32 eodNfaIterOffset) { - vector program; + RoseProgram program; addEodEventProgram(build, bc, program); addEnginesEodProgram(eodNfaIterOffset, program); @@ -5105,17 +4544,12 @@ u32 writeEodProgram(RoseBuildImpl &build, build_context &bc, addEodAnchorProgram(build, bc, true, program); addSuffixesEodProgram(build, program); - if (program.size() == 1) { - assert(program.back().code() == ROSE_INSTR_END); - return 0; - } - if (program.empty()) { return 0; } applyFinalSpecialisation(program); - return writeProgram(bc, program); + return writeProgram(bc, move(program)); } static @@ -5358,13 +4792,12 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { u32 currOffset; /* relative to base of RoseEngine */ if (!bc.engine_blob.empty()) { - currOffset = bc.engine_blob_base + byte_length(bc.engine_blob); + currOffset = bc.engine_blob.base_offset + bc.engine_blob.size(); } else { currOffset = sizeof(RoseEngine); } - UNUSED const size_t engineBlobSize = - byte_length(bc.engine_blob); // test later + UNUSED const size_t engineBlobSize = bc.engine_blob.size(); // test later currOffset = ROUNDUP_CL(currOffset); DEBUG_PRINTF("currOffset %u\n", currOffset); @@ -5616,7 +5049,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { &engine->tStateSize); // Copy in other tables - copy_bytes(ptr + bc.engine_blob_base, bc.engine_blob); + bc.engine_blob.write_bytes(engine.get()); copy_bytes(ptr + engine->leftOffset, leftInfoTable); fillLookaroundTables(ptr + lookaroundTableOffset, @@ -5627,7 +5060,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { // Safety check: we shouldn't have written anything to the engine blob // after we copied it into the engine bytecode. - assert(byte_length(bc.engine_blob) == engineBlobSize); + assert(bc.engine_blob.size() == engineBlobSize); // Add a small write engine if appropriate. engine = addSmallWriteEngine(*this, move(engine)); diff --git a/src/rose/rose_build_engine_blob.h b/src/rose/rose_build_engine_blob.h new file mode 100644 index 00000000..0914502e --- /dev/null +++ b/src/rose/rose_build_engine_blob.h @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ROSE_BUILD_ENGINE_BLOB_H +#define ROSE_BUILD_ENGINE_BLOB_H + +#include "rose_internal.h" + +#include "ue2common.h" +#include "util/alloc.h" +#include "util/container.h" +#include "util/verify_types.h" + +#include +#include + +#include + +namespace ue2 { + +class RoseEngineBlob : boost::noncopyable { +public: + /** \brief Base offset of engine_blob in the Rose engine bytecode. */ + static constexpr u32 base_offset = ROUNDUP_CL(sizeof(RoseEngine)); + + bool empty() const { + return blob.empty(); + } + + size_t size() const { + return blob.size(); + } + + const char *data() const { + return blob.data(); + } + + u32 add(const void *a, const size_t len, const size_t align) { + pad(align); + + size_t rv = base_offset + blob.size(); + assert(rv >= base_offset); + DEBUG_PRINTF("write %zu bytes at offset %zu\n", len, rv); + + assert(ISALIGNED_N(blob.size(), align)); + + blob.resize(blob.size() + len); + memcpy(&blob.back() - len + 1, a, len); + + return verify_u32(rv); + } + + template + u32 add(const T &a) { + static_assert(std::is_pod::value, "should be pod"); + return add(&a, sizeof(a), alignof(T)); + } + + template + u32 add(const T &a, const size_t len) { + static_assert(std::is_pod::value, "should be pod"); + return add(&a, len, alignof(T)); + } + + template + u32 add(Iter b, const Iter &e) { + using value_type = typename std::iterator_traits::value_type; + static_assert(std::is_pod::value, "should be pod"); + + if (b == e) { + return 0; + } + + u32 offset = add(*b); + for (++b; b != e; ++b) { + add(*b); + } + + return offset; + } + + void write_bytes(RoseEngine *engine) { + copy_bytes((char *)engine + base_offset, blob); + } + +private: + void pad(size_t align) { + assert(ISALIGNED_N(base_offset, align)); + size_t s = blob.size(); + + if (ISALIGNED_N(s, align)) { + return; + } + + blob.resize(s + align - s % align); + } + + /** + * \brief Contents of the Rose bytecode immediately following the + * RoseEngine. + */ + std::vector> blob; +}; + +} // namespace ue2 + +#endif // ROSE_BUILD_ENGINE_BLOB_H diff --git a/src/rose/rose_build_program.cpp b/src/rose/rose_build_program.cpp new file mode 100644 index 00000000..73740976 --- /dev/null +++ b/src/rose/rose_build_program.cpp @@ -0,0 +1,491 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "rose_build_engine_blob.h" +#include "rose_build_program.h" +#include "util/container.h" +#include "util/multibit_build.h" +#include "util/verify_types.h" + +#include +#include + +using namespace std; + +namespace ue2 { + +/* Destructors to avoid weak vtables. */ + +RoseInstruction::~RoseInstruction() = default; +RoseInstrCatchUp::~RoseInstrCatchUp() = default; +RoseInstrCatchUpMpv::~RoseInstrCatchUpMpv() = default; +RoseInstrSomZero::~RoseInstrSomZero() = default; +RoseInstrSuffixesEod::~RoseInstrSuffixesEod() = default; +RoseInstrMatcherEod::~RoseInstrMatcherEod() = default; +RoseInstrEnd::~RoseInstrEnd() = default; + +using OffsetMap = RoseInstruction::OffsetMap; + +static +u32 calc_jump(const OffsetMap &offset_map, const RoseInstruction *from, + const RoseInstruction *to) { + DEBUG_PRINTF("computing relative jump from %p to %p\n", from, to); + assert(from && contains(offset_map, from)); + assert(to && contains(offset_map, to)); + + u32 from_offset = offset_map.at(from); + u32 to_offset = offset_map.at(to); + DEBUG_PRINTF("offsets: %u -> %u\n", from_offset, to_offset); + assert(from_offset <= to_offset); + + return to_offset - from_offset; +} + +void RoseInstrAnchoredDelay::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->groups = groups; + inst->done_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckLitEarly::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->min_offset = min_offset; +} + +void RoseInstrCheckGroups::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->groups = groups; +} + +void RoseInstrCheckOnlyEod::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckBounds::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->min_bound = min_bound; + inst->max_bound = max_bound; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckNotHandled::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->key = key; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckLookaround::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->index = index; + inst->count = count; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMask::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->and_mask = and_mask; + inst->cmp_mask = cmp_mask; + inst->neg_mask = neg_mask; + inst->offset = offset; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMask32::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + copy(begin(and_mask), end(and_mask), inst->and_mask); + copy(begin(cmp_mask), end(cmp_mask), inst->cmp_mask); + inst->neg_mask = neg_mask; + inst->offset = offset; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckByte::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->and_mask = and_mask; + inst->cmp_mask = cmp_mask; + inst->negation = negation; + inst->offset = offset; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckInfix::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->queue = queue; + inst->lag = lag; + inst->report = report; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckPrefix::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->queue = queue; + inst->lag = lag; + inst->report = report; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrPushDelayed::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->delay = delay; + inst->index = index; +} + +void RoseInstrRecordAnchored::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->id = id; +} + +void RoseInstrSomAdjust::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->distance = distance; +} + +void RoseInstrSomLeftfix::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->queue = queue; + inst->lag = lag; +} + +void RoseInstrSomFromReport::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->som = som; +} + +void RoseInstrTriggerInfix::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->cancel = cancel; + inst->queue = queue; + inst->event = event; +} + +void RoseInstrTriggerSuffix::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->queue = queue; + inst->event = event; +} + +void RoseInstrDedupe::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->quash_som = quash_som; + inst->dkey = dkey; + inst->offset_adjust = offset_adjust; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrDedupeSom::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->quash_som = quash_som; + inst->dkey = dkey; + inst->offset_adjust = offset_adjust; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrReportChain::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->event = event; + inst->top_squash_distance = top_squash_distance; +} + +void RoseInstrReportSomInt::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->som = som; +} + +void RoseInstrReportSomAware::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->som = som; +} + +void RoseInstrReport::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->onmatch = onmatch; + inst->offset_adjust = offset_adjust; +} + +void RoseInstrReportExhaust::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->onmatch = onmatch; + inst->offset_adjust = offset_adjust; + inst->ekey = ekey; +} + +void RoseInstrReportSom::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->onmatch = onmatch; + inst->offset_adjust = offset_adjust; +} + +void RoseInstrReportSomExhaust::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->onmatch = onmatch; + inst->offset_adjust = offset_adjust; + inst->ekey = ekey; +} + +void RoseInstrDedupeAndReport::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->quash_som = quash_som; + inst->dkey = dkey; + inst->onmatch = onmatch; + inst->offset_adjust = offset_adjust; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrFinalReport::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->onmatch = onmatch; + inst->offset_adjust = offset_adjust; +} + +void RoseInstrCheckExhausted::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->ekey = ekey; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMinLength::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->end_adj = end_adj; + inst->min_length = min_length; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrSetState::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->index = index; +} + +void RoseInstrSetGroups::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->groups = groups; +} + +void RoseInstrSquashGroups::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->groups = groups; +} + +void RoseInstrCheckState::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->index = index; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrSparseIterBegin::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->fail_jump = calc_jump(offset_map, this, target); + + // Resolve and write the multibit sparse iterator and the jump table. + vector keys; + vector jump_offsets; + for (const auto &jump : jump_table) { + keys.push_back(jump.first); + assert(contains(offset_map, jump.second)); + jump_offsets.push_back(offset_map.at(jump.second)); + } + + vector iter; + mmbBuildSparseIterator(iter, keys, num_keys); + assert(!iter.empty()); + inst->iter_offset = blob.add(iter.begin(), iter.end()); + inst->jump_table = blob.add(jump_offsets.begin(), jump_offsets.end()); + + // Store offsets for corresponding SPARSE_ITER_NEXT operations. + is_written = true; + iter_offset = inst->iter_offset; + jump_table_offset = inst->jump_table; +} + +void RoseInstrSparseIterNext::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->state = state; + inst->fail_jump = calc_jump(offset_map, this, target); + + // Use the same sparse iterator and jump table as the SPARSE_ITER_BEGIN + // instruction. + assert(begin); + assert(contains(offset_map, begin)); + assert(begin->is_written); + inst->iter_offset = begin->iter_offset; + inst->jump_table = begin->jump_table_offset; +} + +void RoseInstrSparseIterAny::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->fail_jump = calc_jump(offset_map, this, target); + + // Write the multibit sparse iterator. + vector iter; + mmbBuildSparseIterator(iter, keys, num_keys); + assert(!iter.empty()); + inst->iter_offset = blob.add(iter.begin(), iter.end()); +} + +void RoseInstrEnginesEod::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->iter_offset = iter_offset; +} + +static +OffsetMap makeOffsetMap(const RoseProgram &program, u32 *total_len) { + OffsetMap offset_map; + u32 offset = 0; + for (const auto &ri : program) { + offset = ROUNDUP_N(offset, ROSE_INSTR_MIN_ALIGN); + DEBUG_PRINTF("instr %p (opcode %d) -> offset %u\n", ri.get(), + ri->code(), offset); + assert(!contains(offset_map, ri.get())); + offset_map.emplace(ri.get(), offset); + offset += ri->byte_length(); + } + *total_len = offset; + return offset_map; +} + +aligned_unique_ptr +writeProgram(RoseEngineBlob &blob, const RoseProgram &program, u32 *total_len) { + const auto offset_map = makeOffsetMap(program, total_len); + DEBUG_PRINTF("%zu instructions, len %u\n", program.size(), *total_len); + + auto bytecode = aligned_zmalloc_unique(*total_len); + char *ptr = bytecode.get(); + + for (const auto &ri : program) { + assert(contains(offset_map, ri.get())); + const u32 offset = offset_map.at(ri.get()); + ri->write(ptr + offset, blob, offset_map); + } + + return bytecode; +} + +bool RoseProgramEquivalence::operator()(const RoseProgram &prog1, + const RoseProgram &prog2) const { + if (prog1.size() != prog2.size()) { + return false; + } + + u32 len_1 = 0, len_2 = 0; + const auto offset_map_1 = makeOffsetMap(prog1, &len_1); + const auto offset_map_2 = makeOffsetMap(prog2, &len_2); + + if (len_1 != len_2) { + return false; + } + + auto is_equiv = [&](const unique_ptr &a, + const unique_ptr &b) { + assert(a && b); + return a->equiv(*b, offset_map_1, offset_map_2); + }; + + return std::equal(prog1.begin(), prog1.end(), prog2.begin(), is_equiv); +} + +} // namespace ue2 diff --git a/src/rose/rose_build_program.h b/src/rose/rose_build_program.h new file mode 100644 index 00000000..0853210b --- /dev/null +++ b/src/rose/rose_build_program.h @@ -0,0 +1,1802 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ROSE_BUILD_PROGRAM_H +#define ROSE_BUILD_PROGRAM_H + +#include "rose_build_impl.h" +#include "rose_program.h" +#include "som/som_operation.h" +#include "util/alloc.h" +#include "util/container.h" +#include "util/make_unique.h" +#include "util/ue2_containers.h" + +#include +#include +#include +#include + +namespace ue2 { + +class RoseEngineBlob; + +/** + * \brief Abstract base class representing a single Rose instruction. + */ +class RoseInstruction { +public: + virtual ~RoseInstruction(); + + /** \brief Opcode used for the instruction in the bytecode. */ + virtual RoseInstructionCode code() const = 0; + + /** + * \brief Simple hash used for program equivalence. + * + * Note that pointers (jumps, for example) should not be used when + * calculating the hash: they will be converted to instruction offsets when + * compared later. + */ + virtual size_t hash() const = 0; + + /** \brief Length of the bytecode instruction in bytes. */ + virtual size_t byte_length() const = 0; + + using OffsetMap = unordered_map; + + /** + * \brief Writes a concrete implementation of this instruction. + * + * Other data that this instruction depends on is written directly into the + * blob, while the instruction structure itself (of size given by + * the byte_length() function) is written to dest. + */ + virtual void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const = 0; + + /** + * \brief Update a target pointer. + * + * If this instruction contains any reference to the old target, replace it + * with the new one. + */ + virtual void update_target(const RoseInstruction *old_target, + const RoseInstruction *new_target) = 0; + + /** + * \brief True if these instructions are equivalent within their own + * programs. + * + * Checks that any pointers to other instructions point to the same + * offsets. + */ + bool equiv(const RoseInstruction &other, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return equiv_impl(other, offsets, other_offsets); + } + +private: + virtual bool equiv_impl(const RoseInstruction &other, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const = 0; +}; + +/** + * \brief Templated implementation class to handle boring boilerplate code. + */ +template +class RoseInstrBase : public RoseInstruction { +protected: + static constexpr RoseInstructionCode opcode = Opcode; + using impl_type = ImplType; + +public: + RoseInstructionCode code() const override { return opcode; } + + size_t byte_length() const override { + return sizeof(impl_type); + } + + /** + * Note: this implementation simply zeroes the destination region and + * writes in the correct opcode. This is sufficient for trivial + * instructions, but instructions with data members will want to override + * it. + */ + void write(void *dest, RoseEngineBlob &, + const RoseInstruction::OffsetMap &) const override { + assert(dest != nullptr); + assert(ISALIGNED_N(dest, ROSE_INSTR_MIN_ALIGN)); + + impl_type *inst = static_cast(dest); + memset(inst, 0, sizeof(impl_type)); + inst->code = verify_u8(opcode); + } + +private: + bool equiv_impl(const RoseInstruction &other, const OffsetMap &offsets, + const OffsetMap &other_offsets) const override { + const auto *ri_that = dynamic_cast(&other); + if (!ri_that) { + return false; + } + const auto *ri_this = dynamic_cast(this); + assert(ri_this); + return ri_this->equiv_to(*ri_that, offsets, other_offsets); + } +}; + +/** + * \brief Refinement of RoseInstrBase to use for instructions that have + * just a single target member, called "target". + */ +template +class RoseInstrBaseOneTarget + : public RoseInstrBase { +public: + void update_target(const RoseInstruction *old_target, + const RoseInstruction *new_target) override { + RoseInstrType *ri = dynamic_cast(this); + assert(ri); + if (ri->target == old_target) { + ri->target = new_target; + } + } +}; + +/** + * \brief Refinement of RoseInstrBase to use for instructions that have no + * targets. + */ +template +class RoseInstrBaseNoTargets + : public RoseInstrBase { +public: + void update_target(const RoseInstruction *, + const RoseInstruction *) override {} +}; + +/** + * \brief Refinement of RoseInstrBaseNoTargets to use for instructions that + * have no members at all, just an opcode. + */ +template +class RoseInstrBaseTrivial + : public RoseInstrBaseNoTargets { +public: + virtual bool operator==(const RoseInstrType &) const { return true; } + + size_t hash() const override { + return Opcode; + } + + bool equiv_to(const RoseInstrType &, const RoseInstruction::OffsetMap &, + const RoseInstruction::OffsetMap &) const { + return true; + } +}; + +//// +//// Concrete implementation classes start here. +//// + +class RoseInstrAnchoredDelay + : public RoseInstrBaseOneTarget { +public: + rose_group groups; + const RoseInstruction *target; + + RoseInstrAnchoredDelay(rose_group groups_in, + const RoseInstruction *target_in) + : groups(groups_in), target(target_in) {} + + bool operator==(const RoseInstrAnchoredDelay &ri) const { + return groups == ri.groups && target == ri.target; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, groups); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrAnchoredDelay &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return groups == ri.groups && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckLitEarly + : public RoseInstrBaseNoTargets { +public: + u32 min_offset; + + explicit RoseInstrCheckLitEarly(u32 min) : min_offset(min) {} + + bool operator==(const RoseInstrCheckLitEarly &ri) const { + return min_offset == ri.min_offset; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, min_offset); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckLitEarly &ri, const OffsetMap &, + const OffsetMap &) const { + return min_offset == ri.min_offset; + } +}; + +class RoseInstrCheckGroups + : public RoseInstrBaseNoTargets { +public: + rose_group groups; + + explicit RoseInstrCheckGroups(rose_group groups_in) : groups(groups_in) {} + + bool operator==(const RoseInstrCheckGroups &ri) const { + return groups == ri.groups; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, groups); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckGroups &ri, const OffsetMap &, + const OffsetMap &) const { + return groups == ri.groups; + } +}; + +class RoseInstrCheckOnlyEod + : public RoseInstrBaseOneTarget { +public: + const RoseInstruction *target; + + explicit RoseInstrCheckOnlyEod(const RoseInstruction *target_in) + : target(target_in) {} + + bool operator==(const RoseInstrCheckOnlyEod &ri) const { + return target == ri.target; + } + + size_t hash() const override { + size_t v = opcode; + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckOnlyEod &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckBounds + : public RoseInstrBaseOneTarget { +public: + u64a min_bound; + u64a max_bound; + const RoseInstruction *target; + + RoseInstrCheckBounds(u64a min, u64a max, const RoseInstruction *target_in) + : min_bound(min), max_bound(max), target(target_in) {} + + bool operator==(const RoseInstrCheckBounds &ri) const { + return min_bound == ri.min_bound && max_bound == ri.max_bound && + target == ri.target; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, min_bound); + boost::hash_combine(v, max_bound); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckBounds &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return min_bound == ri.min_bound && max_bound == ri.max_bound && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckNotHandled + : public RoseInstrBaseOneTarget { +public: + u32 key; + const RoseInstruction *target; + + RoseInstrCheckNotHandled(u32 key_in, const RoseInstruction *target_in) + : key(key_in), target(target_in) {} + + bool operator==(const RoseInstrCheckNotHandled &ri) const { + return key == ri.key && target == ri.target; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, key); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckNotHandled &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return key == ri.key && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckLookaround + : public RoseInstrBaseOneTarget { +public: + u32 index; + u32 count; + const RoseInstruction *target; + + RoseInstrCheckLookaround(u32 index_in, u32 count_in, + const RoseInstruction *target_in) + : index(index_in), count(count_in), target(target_in) {} + + bool operator==(const RoseInstrCheckLookaround &ri) const { + return index == ri.index && count == ri.count && target == ri.target; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, index); + boost::hash_combine(v, count); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckLookaround &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return index == ri.index && count == ri.count && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMask + : public RoseInstrBaseOneTarget { +public: + u64a and_mask; + u64a cmp_mask; + u64a neg_mask; + s32 offset; + const RoseInstruction *target; + + RoseInstrCheckMask(u64a and_mask_in, u64a cmp_mask_in, u64a neg_mask_in, + s32 offset_in, const RoseInstruction *target_in) + : and_mask(and_mask_in), cmp_mask(cmp_mask_in), neg_mask(neg_mask_in), + offset(offset_in), target(target_in) {} + + bool operator==(const RoseInstrCheckMask &ri) const { + return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + target == ri.target; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, and_mask); + boost::hash_combine(v, cmp_mask); + boost::hash_combine(v, neg_mask); + boost::hash_combine(v, offset); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMask &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMask32 + : public RoseInstrBaseOneTarget { +public: + std::array and_mask; + std::array cmp_mask; + u32 neg_mask; + s32 offset; + const RoseInstruction *target; + + RoseInstrCheckMask32(std::array and_mask_in, + std::array cmp_mask_in, u32 neg_mask_in, + s32 offset_in, const RoseInstruction *target_in) + : and_mask(move(and_mask_in)), cmp_mask(move(cmp_mask_in)), + neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} + + bool operator==(const RoseInstrCheckMask32 &ri) const { + return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + target == ri.target; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, and_mask); + boost::hash_combine(v, cmp_mask); + boost::hash_combine(v, neg_mask); + boost::hash_combine(v, offset); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMask32 &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckByte + : public RoseInstrBaseOneTarget { +public: + u8 and_mask; + u8 cmp_mask; + u8 negation; + s32 offset; + const RoseInstruction *target; + + RoseInstrCheckByte(u8 and_mask_in, u8 cmp_mask_in, u8 negation_in, + s32 offset_in, const RoseInstruction *target_in) + : and_mask(and_mask_in), cmp_mask(cmp_mask_in), negation(negation_in), + offset(offset_in), target(target_in) {} + + bool operator==(const RoseInstrCheckByte &ri) const { + return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && + negation == ri.negation && offset == ri.offset && + target == ri.target; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, and_mask); + boost::hash_combine(v, cmp_mask); + boost::hash_combine(v, negation); + boost::hash_combine(v, offset); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckByte &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && + negation == ri.negation && offset == ri.offset && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckInfix + : public RoseInstrBaseOneTarget { +public: + u32 queue; + u32 lag; + ReportID report; + const RoseInstruction *target; + + RoseInstrCheckInfix(u32 queue_in, u32 lag_in, ReportID report_in, + const RoseInstruction *target_in) + : queue(queue_in), lag(lag_in), report(report_in), target(target_in) {} + + bool operator==(const RoseInstrCheckInfix &ri) const { + return queue == ri.queue && lag == ri.lag && report == ri.report && + target == ri.target; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, queue); + boost::hash_combine(v, lag); + boost::hash_combine(v, report); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckInfix &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return queue == ri.queue && lag == ri.lag && report == ri.report && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckPrefix + : public RoseInstrBaseOneTarget { +public: + u32 queue; + u32 lag; + ReportID report; + const RoseInstruction *target; + + RoseInstrCheckPrefix(u32 queue_in, u32 lag_in, ReportID report_in, + const RoseInstruction *target_in) + : queue(queue_in), lag(lag_in), report(report_in), target(target_in) {} + + bool operator==(const RoseInstrCheckPrefix &ri) const { + return queue == ri.queue && lag == ri.lag && report == ri.report && + target == ri.target; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, queue); + boost::hash_combine(v, lag); + boost::hash_combine(v, report); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckPrefix &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return queue == ri.queue && lag == ri.lag && report == ri.report && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrPushDelayed + : public RoseInstrBaseNoTargets { +public: + u8 delay; + u32 index; + + RoseInstrPushDelayed(u8 delay_in, u32 index_in) + : delay(delay_in), index(index_in) {} + + bool operator==(const RoseInstrPushDelayed &ri) const { + return delay == ri.delay && index == ri.index; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, delay); + boost::hash_combine(v, index); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrPushDelayed &ri, const OffsetMap &, + const OffsetMap &) const { + return delay == ri.delay && index == ri.index; + } +}; + +class RoseInstrRecordAnchored + : public RoseInstrBaseNoTargets { +public: + u32 id; + + explicit RoseInstrRecordAnchored(u32 id_in) : id(id_in) {} + + bool operator==(const RoseInstrRecordAnchored &ri) const { + return id == ri.id; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, id); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrRecordAnchored &ri, const OffsetMap &, + const OffsetMap &) const { + return id == ri.id; + } +}; + +class RoseInstrCatchUp + : public RoseInstrBaseTrivial { +public: + ~RoseInstrCatchUp() override; +}; + +class RoseInstrCatchUpMpv + : public RoseInstrBaseTrivial { +public: + ~RoseInstrCatchUpMpv() override; +}; + +class RoseInstrSomAdjust + : public RoseInstrBaseNoTargets { +public: + u32 distance; + + explicit RoseInstrSomAdjust(u32 distance_in) : distance(distance_in) {} + + bool operator==(const RoseInstrSomAdjust &ri) const { + return distance == ri.distance; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, distance); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSomAdjust &ri, const OffsetMap &, + const OffsetMap &) const { + return distance == ri.distance; + } +}; + +class RoseInstrSomLeftfix + : public RoseInstrBaseNoTargets { +public: + u32 queue; + u32 lag; + + RoseInstrSomLeftfix(u32 queue_in, u32 lag_in) + : queue(queue_in), lag(lag_in) {} + + bool operator==(const RoseInstrSomLeftfix &ri) const { + return queue == ri.queue && lag == ri.lag; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, queue); + boost::hash_combine(v, lag); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSomLeftfix &ri, const OffsetMap &, + const OffsetMap &) const { + return queue == ri.queue && lag == ri.lag; + } +}; + +class RoseInstrSomFromReport + : public RoseInstrBaseNoTargets { +public: + som_operation som; + + RoseInstrSomFromReport() { + std::memset(&som, 0, sizeof(som)); + } + + bool operator==(const RoseInstrSomFromReport &ri) const { + return std::memcmp(&som, &ri.som, sizeof(som)) == 0; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, som.type); + boost::hash_combine(v, som.onmatch); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSomFromReport &ri, const OffsetMap &, + const OffsetMap &) const { + return std::memcmp(&som, &ri.som, sizeof(som)) == 0; + } +}; + +class RoseInstrSomZero + : public RoseInstrBaseTrivial { +public: + ~RoseInstrSomZero() override; +}; + +class RoseInstrTriggerInfix + : public RoseInstrBaseNoTargets { +public: + u8 cancel; + u32 queue; + u32 event; + + RoseInstrTriggerInfix(u8 cancel_in, u32 queue_in, u32 event_in) + : cancel(cancel_in), queue(queue_in), event(event_in) {} + + bool operator==(const RoseInstrTriggerInfix &ri) const { + return cancel == ri.cancel && queue == ri.queue && event == ri.event; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, cancel); + boost::hash_combine(v, queue); + boost::hash_combine(v, event); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrTriggerInfix &ri, const OffsetMap &, + const OffsetMap &) const { + return cancel == ri.cancel && queue == ri.queue && event == ri.event; + } +}; + +class RoseInstrTriggerSuffix + : public RoseInstrBaseNoTargets { +public: + u32 queue; + u32 event; + + RoseInstrTriggerSuffix(u32 queue_in, u32 event_in) + : queue(queue_in), event(event_in) {} + + bool operator==(const RoseInstrTriggerSuffix &ri) const { + return queue == ri.queue && event == ri.event; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, queue); + boost::hash_combine(v, event); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrTriggerSuffix &ri, const OffsetMap &, + const OffsetMap &) const { + return queue == ri.queue && event == ri.event; + } +}; + +class RoseInstrDedupe + : public RoseInstrBaseOneTarget { +public: + u8 quash_som; + u32 dkey; + s32 offset_adjust; + const RoseInstruction *target; + + RoseInstrDedupe(u8 quash_som_in, u32 dkey_in, s32 offset_adjust_in, + const RoseInstruction *target_in) + : quash_som(quash_som_in), dkey(dkey_in), + offset_adjust(offset_adjust_in), target(target_in) {} + + bool operator==(const RoseInstrDedupe &ri) const { + return quash_som == ri.quash_som && dkey == ri.dkey && + offset_adjust == ri.offset_adjust && target == ri.target; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, quash_som); + boost::hash_combine(v, dkey); + boost::hash_combine(v, offset_adjust); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrDedupe &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return quash_som == ri.quash_som && dkey == ri.dkey && + offset_adjust == ri.offset_adjust && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrDedupeSom + : public RoseInstrBaseOneTarget { +public: + u8 quash_som; + u32 dkey; + s32 offset_adjust; + const RoseInstruction *target; + + RoseInstrDedupeSom(u8 quash_som_in, u32 dkey_in, s32 offset_adjust_in, + const RoseInstruction *target_in) + : quash_som(quash_som_in), dkey(dkey_in), + offset_adjust(offset_adjust_in), target(target_in) {} + + bool operator==(const RoseInstrDedupeSom &ri) const { + return quash_som == ri.quash_som && dkey == ri.dkey && + offset_adjust == ri.offset_adjust && target == ri.target; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, quash_som); + boost::hash_combine(v, dkey); + boost::hash_combine(v, offset_adjust); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrDedupeSom &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return quash_som == ri.quash_som && dkey == ri.dkey && + offset_adjust == ri.offset_adjust && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrReportChain + : public RoseInstrBaseNoTargets { +public: + u32 event; + u64a top_squash_distance; + + RoseInstrReportChain(u32 event_in, u32 top_squash_distance_in) + : event(event_in), top_squash_distance(top_squash_distance_in) {} + + bool operator==(const RoseInstrReportChain &ri) const { + return event == ri.event && + top_squash_distance == ri.top_squash_distance; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, event); + boost::hash_combine(v, top_squash_distance); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrReportChain &ri, const OffsetMap &, + const OffsetMap &) const { + return event == ri.event && + top_squash_distance == ri.top_squash_distance; + } +}; + +class RoseInstrReportSomInt + : public RoseInstrBaseNoTargets { +public: + som_operation som; + + RoseInstrReportSomInt() { + std::memset(&som, 0, sizeof(som)); + } + + bool operator==(const RoseInstrReportSomInt &ri) const { + return std::memcmp(&som, &ri.som, sizeof(som)) == 0; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, som.type); + boost::hash_combine(v, som.onmatch); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrReportSomInt &ri, const OffsetMap &, + const OffsetMap &) const { + return std::memcmp(&som, &ri.som, sizeof(som)) == 0; + } +}; + +class RoseInstrReportSomAware + : public RoseInstrBaseNoTargets { +public: + som_operation som; + + RoseInstrReportSomAware() { + std::memset(&som, 0, sizeof(som)); + } + + bool operator==(const RoseInstrReportSomAware &ri) const { + return std::memcmp(&som, &ri.som, sizeof(som)) == 0; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, som.type); + boost::hash_combine(v, som.onmatch); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrReportSomAware &ri, const OffsetMap &, + const OffsetMap &) const { + return std::memcmp(&som, &ri.som, sizeof(som)) == 0; + } +}; + +class RoseInstrReport + : public RoseInstrBaseNoTargets { +public: + ReportID onmatch; + s32 offset_adjust; + + RoseInstrReport(ReportID onmatch_in, s32 offset_adjust_in) + : onmatch(onmatch_in), offset_adjust(offset_adjust_in) {} + + bool operator==(const RoseInstrReport &ri) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, onmatch); + boost::hash_combine(v, offset_adjust); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrReport &ri, const OffsetMap &, + const OffsetMap &) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; + } +}; + +class RoseInstrReportExhaust + : public RoseInstrBaseNoTargets { +public: + ReportID onmatch; + s32 offset_adjust; + u32 ekey; + + RoseInstrReportExhaust(ReportID onmatch_in, s32 offset_adjust_in, + u32 ekey_in) + : onmatch(onmatch_in), offset_adjust(offset_adjust_in), ekey(ekey_in) {} + + bool operator==(const RoseInstrReportExhaust &ri) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && + ekey == ri.ekey; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, onmatch); + boost::hash_combine(v, offset_adjust); + boost::hash_combine(v, ekey); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrReportExhaust &ri, const OffsetMap &, + const OffsetMap &) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && + ekey == ri.ekey; + } +}; + +class RoseInstrReportSom + : public RoseInstrBaseNoTargets { +public: + ReportID onmatch; + s32 offset_adjust; + + RoseInstrReportSom(ReportID onmatch_in, s32 offset_adjust_in) + : onmatch(onmatch_in), offset_adjust(offset_adjust_in) {} + + bool operator==(const RoseInstrReportSom &ri) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, onmatch); + boost::hash_combine(v, offset_adjust); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrReportSom &ri, const OffsetMap &, + const OffsetMap &) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; + } +}; + +class RoseInstrReportSomExhaust + : public RoseInstrBaseNoTargets { +public: + ReportID onmatch; + s32 offset_adjust; + u32 ekey; + + RoseInstrReportSomExhaust(ReportID onmatch_in, s32 offset_adjust_in, + u32 ekey_in) + : onmatch(onmatch_in), offset_adjust(offset_adjust_in), ekey(ekey_in) {} + + bool operator==(const RoseInstrReportSomExhaust &ri) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && + ekey == ri.ekey; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, onmatch); + boost::hash_combine(v, offset_adjust); + boost::hash_combine(v, ekey); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrReportSomExhaust &ri, const OffsetMap &, + const OffsetMap &) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && + ekey == ri.ekey; + } +}; + +class RoseInstrDedupeAndReport + : public RoseInstrBaseOneTarget { +public: + u8 quash_som; + u32 dkey; + ReportID onmatch; + s32 offset_adjust; + const RoseInstruction *target; + + RoseInstrDedupeAndReport(u8 quash_som_in, u32 dkey_in, ReportID onmatch_in, + s32 offset_adjust_in, + const RoseInstruction *target_in) + : quash_som(quash_som_in), dkey(dkey_in), onmatch(onmatch_in), + offset_adjust(offset_adjust_in), target(target_in) {} + + bool operator==(const RoseInstrDedupeAndReport &ri) const { + return quash_som == ri.quash_som && dkey == ri.dkey && + onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && + target == ri.target; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, quash_som); + boost::hash_combine(v, dkey); + boost::hash_combine(v, onmatch); + boost::hash_combine(v, offset_adjust); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrDedupeAndReport &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return quash_som == ri.quash_som && dkey == ri.dkey && + onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrFinalReport + : public RoseInstrBaseNoTargets { +public: + ReportID onmatch; + s32 offset_adjust; + + RoseInstrFinalReport(ReportID onmatch_in, s32 offset_adjust_in) + : onmatch(onmatch_in), offset_adjust(offset_adjust_in) {} + + bool operator==(const RoseInstrFinalReport &ri) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, onmatch); + boost::hash_combine(v, offset_adjust); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrFinalReport &ri, const OffsetMap &, + const OffsetMap &) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; + } +}; + +class RoseInstrCheckExhausted + : public RoseInstrBaseOneTarget { +public: + u32 ekey; + const RoseInstruction *target; + + RoseInstrCheckExhausted(u32 ekey_in, const RoseInstruction *target_in) + : ekey(ekey_in), target(target_in) {} + + bool operator==(const RoseInstrCheckExhausted &ri) const { + return ekey == ri.ekey && target == ri.target; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, ekey); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckExhausted &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return ekey == ri.ekey && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMinLength + : public RoseInstrBaseOneTarget { +public: + s32 end_adj; + u64a min_length; + const RoseInstruction *target; + + RoseInstrCheckMinLength(s32 end_adj_in, u64a min_length_in, + const RoseInstruction *target_in) + : end_adj(end_adj_in), min_length(min_length_in), target(target_in) {} + + bool operator==(const RoseInstrCheckMinLength &ri) const { + return end_adj == ri.end_adj && min_length == ri.min_length && + target == ri.target; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, end_adj); + boost::hash_combine(v, min_length); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMinLength &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return end_adj == ri.end_adj && min_length == ri.min_length && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrSetState + : public RoseInstrBaseNoTargets { +public: + u32 index; + + explicit RoseInstrSetState(u32 index_in) : index(index_in) {} + + bool operator==(const RoseInstrSetState &ri) const { + return index == ri.index; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, index); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSetState &ri, const OffsetMap &, + const OffsetMap &) const { + return index == ri.index; + } +}; + +class RoseInstrSetGroups + : public RoseInstrBaseNoTargets { +public: + rose_group groups; + + explicit RoseInstrSetGroups(rose_group groups_in) : groups(groups_in) {} + + bool operator==(const RoseInstrSetGroups &ri) const { + return groups == ri.groups; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, groups); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSetGroups &ri, const OffsetMap &, + const OffsetMap &) const { + return groups == ri.groups; + } +}; + +class RoseInstrSquashGroups + : public RoseInstrBaseNoTargets { +public: + rose_group groups; + + explicit RoseInstrSquashGroups(rose_group groups_in) : groups(groups_in) {} + + bool operator==(const RoseInstrSquashGroups &ri) const { + return groups == ri.groups; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, groups); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSquashGroups &ri, const OffsetMap &, + const OffsetMap &) const { + return groups == ri.groups; + } +}; + +class RoseInstrCheckState + : public RoseInstrBaseOneTarget { +public: + u32 index; + const RoseInstruction *target; + + RoseInstrCheckState(u32 index_in, const RoseInstruction *target_in) + : index(index_in), target(target_in) {} + + bool operator==(const RoseInstrCheckState &ri) const { + return index == ri.index && target == ri.target; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, index); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckState &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return index == ri.index && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrSparseIterBegin + : public RoseInstrBase { +public: + u32 num_keys; // total number of multibit keys + std::vector> jump_table; + const RoseInstruction *target; + + RoseInstrSparseIterBegin(u32 num_keys_in, + const RoseInstruction *target_in) + : num_keys(num_keys_in), target(target_in) {} + + bool operator==(const RoseInstrSparseIterBegin &ri) const { + return num_keys == ri.num_keys && jump_table == ri.jump_table && + target == ri.target; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, num_keys); + for (const auto &jump : jump_table) { + boost::hash_combine(v, jump.first); + } + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + void update_target(const RoseInstruction *old_target, + const RoseInstruction *new_target) override { + if (target == old_target) { + target = new_target; + } + for (auto &jump : jump_table) { + if (jump.second == old_target) { + jump.second = new_target; + } + } + } + + bool equiv_to(const RoseInstrSparseIterBegin &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + if (iter_offset != ri.iter_offset || + offsets.at(target) != other_offsets.at(ri.target)) { + return false; + } + if (jump_table.size() != ri.jump_table.size()) { + return false; + } + auto it1 = jump_table.begin(), it2 = ri.jump_table.begin(); + for (; it1 != jump_table.end(); ++it1, ++it2) { + if (it1->first != it2->first) { + return false; + } + if (offsets.at(it1->second) != other_offsets.at(it2->second)) { + return false; + } + } + return true; + } + +private: + friend class RoseInstrSparseIterNext; + + // These variables allow us to use the same multibit iterator and jump + // table in subsequent SPARSE_ITER_NEXT write() operations. + mutable bool is_written = false; + mutable u32 iter_offset = 0; + mutable u32 jump_table_offset = 0; +}; + +class RoseInstrSparseIterNext + : public RoseInstrBase { +public: + u32 state; + const RoseInstrSparseIterBegin *begin; + const RoseInstruction *target; + + RoseInstrSparseIterNext(u32 state_in, + const RoseInstrSparseIterBegin *begin_in, + const RoseInstruction *target_in) + : state(state_in), begin(begin_in), target(target_in) {} + + bool operator==(const RoseInstrSparseIterNext &ri) const { + return state == ri.state && begin == ri.begin && target == ri.target; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, state); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + void update_target(const RoseInstruction *old_target, + const RoseInstruction *new_target) override { + if (target == old_target) { + target = new_target; + } + if (begin == old_target) { + assert(new_target->code() == ROSE_INSTR_SPARSE_ITER_BEGIN); + begin = static_cast(new_target); + } + } + + bool equiv_to(const RoseInstrSparseIterNext &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return state == ri.state && + offsets.at(begin) == other_offsets.at(ri.begin) && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrSparseIterAny + : public RoseInstrBaseOneTarget { +public: + u32 num_keys; // total number of multibit keys + std::vector keys; + const RoseInstruction *target; + + RoseInstrSparseIterAny(u32 num_keys_in, std::vector keys_in, + const RoseInstruction *target_in) + : num_keys(num_keys_in), keys(std::move(keys_in)), target(target_in) {} + + bool operator==(const RoseInstrSparseIterAny &ri) const { + return num_keys == ri.num_keys && keys == ri.keys && + target == ri.target; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, num_keys); + boost::hash_combine(v, keys); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSparseIterAny &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return num_keys == ri.num_keys && keys == ri.keys && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrEnginesEod + : public RoseInstrBaseNoTargets { +public: + u32 iter_offset; + + explicit RoseInstrEnginesEod(u32 iter_in) : iter_offset(iter_in) {} + + bool operator==(const RoseInstrEnginesEod &ri) const { + return iter_offset == ri.iter_offset; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, iter_offset); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrEnginesEod &ri, const OffsetMap &, + const OffsetMap &) const { + return iter_offset == ri.iter_offset; + } +}; + +class RoseInstrSuffixesEod + : public RoseInstrBaseTrivial { +public: + ~RoseInstrSuffixesEod() override; +}; + +class RoseInstrMatcherEod : public RoseInstrBaseTrivial { +public: + ~RoseInstrMatcherEod() override; +}; + +class RoseInstrEnd + : public RoseInstrBaseTrivial { +public: + ~RoseInstrEnd() override; +}; + +/** + * \brief Container for a list of program instructions. + */ +class RoseProgram { +private: + std::vector> prog; + +public: + RoseProgram() { + prog.push_back(make_unique()); + } + + bool empty() const { + assert(!prog.empty()); + assert(prog.back()->code() == ROSE_INSTR_END); + // Empty if we only have one element, the END instruction. + return std::next(prog.begin()) == prog.end(); + } + + size_t size() const { return prog.size(); } + + const RoseInstruction &back() const { return *prog.back(); } + const RoseInstruction &front() const { return *prog.front(); } + + using iterator = decltype(prog)::iterator; + iterator begin() { return prog.begin(); } + iterator end() { return prog.end(); } + + using const_iterator = decltype(prog)::const_iterator; + const_iterator begin() const { return prog.begin(); } + const_iterator end() const { return prog.end(); } + + using reverse_iterator = decltype(prog)::reverse_iterator; + reverse_iterator rbegin() { return prog.rbegin(); } + reverse_iterator rend() { return prog.rend(); } + + using const_reverse_iterator = decltype(prog)::const_reverse_iterator; + const_reverse_iterator rbegin() const { return prog.rbegin(); } + const_reverse_iterator rend() const { return prog.rend(); } + + /** \brief Retrieve a pointer to the terminating ROSE_INSTR_END. */ + const RoseInstruction *end_instruction() const { + assert(!prog.empty()); + assert(prog.back()->code() == ROSE_INSTR_END); + + return prog.back().get(); + } + +private: + static void update_targets(iterator it, iterator it_end, + const RoseInstruction *old_target, + const RoseInstruction *new_target) { + assert(old_target && new_target && old_target != new_target); + for (; it != it_end; ++it) { + std::unique_ptr &ri = *it; + assert(ri); + ri->update_target(old_target, new_target); + } + } + +public: + iterator insert(iterator it, std::unique_ptr ri) { + assert(!prog.empty()); + assert(it != end()); + assert(prog.back()->code() == ROSE_INSTR_END); + + return prog.insert(it, std::move(ri)); + } + + iterator insert(iterator it, RoseProgram &&block) { + assert(!prog.empty()); + assert(it != end()); + assert(prog.back()->code() == ROSE_INSTR_END); + + if (block.empty()) { + return it; + } + + const RoseInstruction *end_ptr = block.end_instruction(); + assert(end_ptr->code() == ROSE_INSTR_END); + block.prog.pop_back(); + + const RoseInstruction *new_target = it->get(); + update_targets(block.prog.begin(), block.prog.end(), end_ptr, + new_target); + + // Workaround: container insert() for ranges doesn't return an iterator + // in the version of the STL distributed with gcc 4.8. + auto dist = distance(prog.begin(), it); + prog.insert(it, std::make_move_iterator(block.prog.begin()), + std::make_move_iterator(block.prog.end())); + it = prog.begin(); + std::advance(it, dist); + return it; + } + + /** + * \brief Adds this instruction to the program just before the terminating + * ROSE_INSTR_END. + */ + void add_before_end(std::unique_ptr ri) { + assert(!prog.empty()); + insert(std::prev(prog.end()), std::move(ri)); + } + + /** + * \brief Adds this block to the program just before the terminating + * ROSE_INSTR_END. + */ + void add_before_end(RoseProgram &&block) { + assert(!prog.empty()); + assert(prog.back()->code() == ROSE_INSTR_END); + + if (block.empty()) { + return; + } + + insert(std::prev(prog.end()), std::move(block)); + } + + /** + * \brief Append this program block, replacing our current ROSE_INSTR_END. + */ + void add_block(RoseProgram &&block) { + assert(!prog.empty()); + assert(prog.back()->code() == ROSE_INSTR_END); + + if (block.empty()) { + return; + } + + // Replace pointers to the current END with pointers to the first + // instruction in the new sequence. + const RoseInstruction *end_ptr = end_instruction(); + prog.pop_back(); + update_targets(prog.begin(), prog.end(), end_ptr, + block.prog.front().get()); + prog.insert(prog.end(), std::make_move_iterator(block.prog.begin()), + std::make_move_iterator(block.prog.end())); + } + + /** + * \brief Replace the instruction pointed to by the given iterator. + */ + template + void replace(Iter it, std::unique_ptr ri) { + assert(!prog.empty()); + assert(prog.back()->code() == ROSE_INSTR_END); + + const RoseInstruction *old_ptr = it->get(); + *it = move(ri); + update_targets(prog.begin(), prog.end(), old_ptr, it->get()); + + assert(prog.back()->code() == ROSE_INSTR_END); + } +}; + +aligned_unique_ptr +writeProgram(RoseEngineBlob &blob, const RoseProgram &program, u32 *total_len); + +class RoseProgramHash { +public: + size_t operator()(const RoseProgram &program) const { + size_t v = 0; + for (const auto &ri : program) { + assert(ri); + boost::hash_combine(v, ri->hash()); + } + return v; + } +}; + +class RoseProgramEquivalence { +public: + bool operator()(const RoseProgram &prog1, const RoseProgram &prog2) const; +}; + +} // namespace ue2 + +#endif // ROSE_BUILD_PROGRAM_H diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index 95f43d6c..40057d78 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -511,6 +511,12 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(SPARSE_ITER_ANY) { + os << " iter_offset " << ri->iter_offset << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(ENGINES_EOD) { os << " iter_offset " << ri->iter_offset << endl; } @@ -577,9 +583,8 @@ void dumpRoseEodPrograms(const RoseEngine *t, const string &filename) { ofstream os(filename); const char *base = (const char *)t; - os << "EOD Program:" << endl; - if (t->eodProgramOffset) { + os << "EOD Program @ " << t->eodProgramOffset << ":" << endl; dumpProgram(os, t, base + t->eodProgramOffset); os << endl; } else { diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index 007eb70d..ba3e586b 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -99,6 +99,7 @@ enum RoseInstructionCode { ROSE_INSTR_CHECK_STATE, //!< Test a single bit in the state multibit. ROSE_INSTR_SPARSE_ITER_BEGIN, //!< Begin running a sparse iter over states. ROSE_INSTR_SPARSE_ITER_NEXT, //!< Continue running sparse iter over states. + ROSE_INSTR_SPARSE_ITER_ANY, //!< Test for any bit in the sparse iterator. /** \brief Check outfixes and suffixes for EOD and fire reports if so. */ ROSE_INSTR_ENGINES_EOD, @@ -386,6 +387,12 @@ struct ROSE_STRUCT_SPARSE_ITER_NEXT { u32 fail_jump; //!< Jump forward this many bytes on failure. }; +struct ROSE_STRUCT_SPARSE_ITER_ANY { + u8 code; //!< From enum RoseInstructionCode. + u32 iter_offset; //!< Offset of mmbit_sparse_iter structure. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + struct ROSE_STRUCT_ENGINES_EOD { u8 code; //!< From enum RoseInstructionCode. u32 iter_offset; //!< Offset of mmbit_sparse_iter structure. From 13b6023a1861d820f255e9b397a2b9bf2c87b8bd Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 23 Aug 2016 16:12:34 +1000 Subject: [PATCH 015/103] hash: add hash_all variadic tpl func, use in rose --- CMakeLists.txt | 1 + src/rose/rose_build_program.h | 213 +++++++++------------------------- src/util/hash.h | 74 ++++++++++++ 3 files changed, 127 insertions(+), 161 deletions(-) create mode 100644 src/util/hash.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 0a236845..de51c016 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -928,6 +928,7 @@ SET (hs_SRCS src/util/dump_mask.cpp src/util/dump_mask.h src/util/graph.h + src/util/hash.h src/util/multibit_build.cpp src/util/multibit_build.h src/util/order_check.h diff --git a/src/rose/rose_build_program.h b/src/rose/rose_build_program.h index 0853210b..27aeffbe 100644 --- a/src/rose/rose_build_program.h +++ b/src/rose/rose_build_program.h @@ -34,13 +34,16 @@ #include "som/som_operation.h" #include "util/alloc.h" #include "util/container.h" +#include "util/hash.h" #include "util/make_unique.h" #include "util/ue2_containers.h" #include #include #include + #include +#include namespace ue2 { @@ -193,7 +196,7 @@ public: virtual bool operator==(const RoseInstrType &) const { return true; } size_t hash() const override { - return Opcode; + return boost::hash_value(static_cast(Opcode)); } bool equiv_to(const RoseInstrType &, const RoseInstruction::OffsetMap &, @@ -223,9 +226,7 @@ public: } size_t hash() const override { - size_t v = opcode; - boost::hash_combine(v, groups); - return v; + return hash_all(static_cast(opcode), groups); } void write(void *dest, RoseEngineBlob &blob, @@ -252,9 +253,7 @@ public: } size_t hash() const override { - size_t v = opcode; - boost::hash_combine(v, min_offset); - return v; + return hash_all(static_cast(opcode), min_offset); } void write(void *dest, RoseEngineBlob &blob, @@ -280,9 +279,7 @@ public: } size_t hash() const override { - size_t v = opcode; - boost::hash_combine(v, groups); - return v; + return hash_all(static_cast(opcode), groups); } void write(void *dest, RoseEngineBlob &blob, @@ -309,8 +306,7 @@ public: } size_t hash() const override { - size_t v = opcode; - return v; + return boost::hash_value(static_cast(opcode)); } void write(void *dest, RoseEngineBlob &blob, @@ -340,10 +336,7 @@ public: } size_t hash() const override { - size_t v = opcode; - boost::hash_combine(v, min_bound); - boost::hash_combine(v, max_bound); - return v; + return hash_all(static_cast(opcode), min_bound, max_bound); } void write(void *dest, RoseEngineBlob &blob, @@ -372,9 +365,7 @@ public: } size_t hash() const override { - size_t v = opcode; - boost::hash_combine(v, key); - return v; + return hash_all(static_cast(opcode), key); } void write(void *dest, RoseEngineBlob &blob, @@ -405,10 +396,7 @@ public: } size_t hash() const override { - size_t v = opcode; - boost::hash_combine(v, index); - boost::hash_combine(v, count); - return v; + return hash_all(static_cast(opcode), index, count); } void write(void *dest, RoseEngineBlob &blob, @@ -444,12 +432,8 @@ public: } size_t hash() const override { - size_t v = opcode; - boost::hash_combine(v, and_mask); - boost::hash_combine(v, cmp_mask); - boost::hash_combine(v, neg_mask); - boost::hash_combine(v, offset); - return v; + return hash_all(static_cast(opcode), and_mask, cmp_mask, neg_mask, + offset); } void write(void *dest, RoseEngineBlob &blob, @@ -487,12 +471,8 @@ public: } size_t hash() const override { - size_t v = opcode; - boost::hash_combine(v, and_mask); - boost::hash_combine(v, cmp_mask); - boost::hash_combine(v, neg_mask); - boost::hash_combine(v, offset); - return v; + return hash_all(static_cast(opcode), and_mask, cmp_mask, neg_mask, + offset); } void write(void *dest, RoseEngineBlob &blob, @@ -529,12 +509,8 @@ public: } size_t hash() const override { - size_t v = opcode; - boost::hash_combine(v, and_mask); - boost::hash_combine(v, cmp_mask); - boost::hash_combine(v, negation); - boost::hash_combine(v, offset); - return v; + return hash_all(static_cast(opcode), and_mask, cmp_mask, negation, + offset); } void write(void *dest, RoseEngineBlob &blob, @@ -568,11 +544,7 @@ public: } size_t hash() const override { - size_t v = opcode; - boost::hash_combine(v, queue); - boost::hash_combine(v, lag); - boost::hash_combine(v, report); - return v; + return hash_all(static_cast(opcode), queue, lag, report); } void write(void *dest, RoseEngineBlob &blob, @@ -605,11 +577,7 @@ public: } size_t hash() const override { - size_t v = opcode; - boost::hash_combine(v, queue); - boost::hash_combine(v, lag); - boost::hash_combine(v, report); - return v; + return hash_all(static_cast(opcode), queue, lag, report); } void write(void *dest, RoseEngineBlob &blob, @@ -638,10 +606,7 @@ public: } size_t hash() const override { - size_t v = opcode; - boost::hash_combine(v, delay); - boost::hash_combine(v, index); - return v; + return hash_all(static_cast(opcode), delay, index); } void write(void *dest, RoseEngineBlob &blob, @@ -667,9 +632,7 @@ public: } size_t hash() const override { - size_t v = opcode; - boost::hash_combine(v, id); - return v; + return hash_all(static_cast(opcode), id); } void write(void *dest, RoseEngineBlob &blob, @@ -710,9 +673,7 @@ public: } size_t hash() const override { - size_t v = opcode; - boost::hash_combine(v, distance); - return v; + return hash_all(static_cast(opcode), distance); } void write(void *dest, RoseEngineBlob &blob, @@ -740,10 +701,7 @@ public: } size_t hash() const override { - size_t v = opcode; - boost::hash_combine(v, queue); - boost::hash_combine(v, lag); - return v; + return hash_all(static_cast(opcode), queue, lag); } void write(void *dest, RoseEngineBlob &blob, @@ -771,10 +729,7 @@ public: } size_t hash() const override { - size_t v = opcode; - boost::hash_combine(v, som.type); - boost::hash_combine(v, som.onmatch); - return v; + return hash_all(static_cast(opcode), som.type, som.onmatch); } void write(void *dest, RoseEngineBlob &blob, @@ -810,11 +765,7 @@ public: } size_t hash() const override { - size_t v = opcode; - boost::hash_combine(v, cancel); - boost::hash_combine(v, queue); - boost::hash_combine(v, event); - return v; + return hash_all(static_cast(opcode), cancel, queue, event); } void write(void *dest, RoseEngineBlob &blob, @@ -842,10 +793,7 @@ public: } size_t hash() const override { - size_t v = opcode; - boost::hash_combine(v, queue); - boost::hash_combine(v, event); - return v; + return hash_all(static_cast(opcode), queue, event); } void write(void *dest, RoseEngineBlob &blob, @@ -877,11 +825,8 @@ public: } size_t hash() const override { - size_t v = opcode; - boost::hash_combine(v, quash_som); - boost::hash_combine(v, dkey); - boost::hash_combine(v, offset_adjust); - return v; + return hash_all(static_cast(opcode), quash_som, dkey, + offset_adjust); } void write(void *dest, RoseEngineBlob &blob, @@ -916,11 +861,8 @@ public: } size_t hash() const override { - size_t v = opcode; - boost::hash_combine(v, quash_som); - boost::hash_combine(v, dkey); - boost::hash_combine(v, offset_adjust); - return v; + return hash_all(static_cast(opcode), quash_som, dkey, + offset_adjust); } void write(void *dest, RoseEngineBlob &blob, @@ -951,10 +893,7 @@ public: } size_t hash() const override { - size_t v = opcode; - boost::hash_combine(v, event); - boost::hash_combine(v, top_squash_distance); - return v; + return hash_all(static_cast(opcode), event, top_squash_distance); } void write(void *dest, RoseEngineBlob &blob, @@ -983,10 +922,7 @@ public: } size_t hash() const override { - size_t v = opcode; - boost::hash_combine(v, som.type); - boost::hash_combine(v, som.onmatch); - return v; + return hash_all(static_cast(opcode), som.type, som.onmatch); } void write(void *dest, RoseEngineBlob &blob, @@ -1014,10 +950,7 @@ public: } size_t hash() const override { - size_t v = opcode; - boost::hash_combine(v, som.type); - boost::hash_combine(v, som.onmatch); - return v; + return hash_all(static_cast(opcode), som.type, som.onmatch); } void write(void *dest, RoseEngineBlob &blob, @@ -1044,10 +977,7 @@ public: } size_t hash() const override { - size_t v = opcode; - boost::hash_combine(v, onmatch); - boost::hash_combine(v, offset_adjust); - return v; + return hash_all(static_cast(opcode), onmatch, offset_adjust); } void write(void *dest, RoseEngineBlob &blob, @@ -1078,11 +1008,7 @@ public: } size_t hash() const override { - size_t v = opcode; - boost::hash_combine(v, onmatch); - boost::hash_combine(v, offset_adjust); - boost::hash_combine(v, ekey); - return v; + return hash_all(static_cast(opcode), onmatch, offset_adjust, ekey); } void write(void *dest, RoseEngineBlob &blob, @@ -1111,10 +1037,7 @@ public: } size_t hash() const override { - size_t v = opcode; - boost::hash_combine(v, onmatch); - boost::hash_combine(v, offset_adjust); - return v; + return hash_all(static_cast(opcode), onmatch, offset_adjust); } void write(void *dest, RoseEngineBlob &blob, @@ -1145,11 +1068,7 @@ public: } size_t hash() const override { - size_t v = opcode; - boost::hash_combine(v, onmatch); - boost::hash_combine(v, offset_adjust); - boost::hash_combine(v, ekey); - return v; + return hash_all(static_cast(opcode), onmatch, offset_adjust, ekey); } void write(void *dest, RoseEngineBlob &blob, @@ -1186,12 +1105,8 @@ public: } size_t hash() const override { - size_t v = opcode; - boost::hash_combine(v, quash_som); - boost::hash_combine(v, dkey); - boost::hash_combine(v, onmatch); - boost::hash_combine(v, offset_adjust); - return v; + return hash_all(static_cast(opcode), quash_som, dkey, onmatch, + offset_adjust); } void write(void *dest, RoseEngineBlob &blob, @@ -1221,10 +1136,7 @@ public: } size_t hash() const override { - size_t v = opcode; - boost::hash_combine(v, onmatch); - boost::hash_combine(v, offset_adjust); - return v; + return hash_all(static_cast(opcode), onmatch, offset_adjust); } void write(void *dest, RoseEngineBlob &blob, @@ -1252,9 +1164,7 @@ public: } size_t hash() const override { - size_t v = opcode; - boost::hash_combine(v, ekey); - return v; + return hash_all(static_cast(opcode), ekey); } void write(void *dest, RoseEngineBlob &blob, @@ -1286,10 +1196,7 @@ public: } size_t hash() const override { - size_t v = opcode; - boost::hash_combine(v, end_adj); - boost::hash_combine(v, min_length); - return v; + return hash_all(static_cast(opcode), end_adj, min_length); } void write(void *dest, RoseEngineBlob &blob, @@ -1315,9 +1222,7 @@ public: } size_t hash() const override { - size_t v = opcode; - boost::hash_combine(v, index); - return v; + return hash_all(static_cast(opcode), index); } void write(void *dest, RoseEngineBlob &blob, @@ -1343,9 +1248,7 @@ public: } size_t hash() const override { - size_t v = opcode; - boost::hash_combine(v, groups); - return v; + return hash_all(static_cast(opcode), groups); } void write(void *dest, RoseEngineBlob &blob, @@ -1371,9 +1274,7 @@ public: } size_t hash() const override { - size_t v = opcode; - boost::hash_combine(v, groups); - return v; + return hash_all(static_cast(opcode), groups); } void write(void *dest, RoseEngineBlob &blob, @@ -1401,9 +1302,7 @@ public: } size_t hash() const override { - size_t v = opcode; - boost::hash_combine(v, index); - return v; + return hash_all(static_cast(opcode), index); } void write(void *dest, RoseEngineBlob &blob, @@ -1435,10 +1334,9 @@ public: } size_t hash() const override { - size_t v = opcode; - boost::hash_combine(v, num_keys); - for (const auto &jump : jump_table) { - boost::hash_combine(v, jump.first); + size_t v = hash_all(static_cast(opcode), num_keys); + for (const u32 &key : jump_table | boost::adaptors::map_keys) { + boost::hash_combine(v, key); } return v; } @@ -1508,9 +1406,7 @@ public: } size_t hash() const override { - size_t v = opcode; - boost::hash_combine(v, state); - return v; + return hash_all(static_cast(opcode), state); } void write(void *dest, RoseEngineBlob &blob, @@ -1554,10 +1450,7 @@ public: } size_t hash() const override { - size_t v = opcode; - boost::hash_combine(v, num_keys); - boost::hash_combine(v, keys); - return v; + return hash_all(static_cast(opcode), num_keys, keys); } void write(void *dest, RoseEngineBlob &blob, @@ -1584,9 +1477,7 @@ public: } size_t hash() const override { - size_t v = opcode; - boost::hash_combine(v, iter_offset); - return v; + return hash_all(static_cast(opcode), iter_offset); } void write(void *dest, RoseEngineBlob &blob, diff --git a/src/util/hash.h b/src/util/hash.h new file mode 100644 index 00000000..0b571772 --- /dev/null +++ b/src/util/hash.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * \brief Hashing utility functions. + */ + +#ifndef UTIL_HASH_H +#define UTIL_HASH_H + +#include + +namespace ue2 { + +namespace hash_detail { + +template +void hash_build(size_t &v, const T &obj) { + boost::hash_combine(v, obj); +} + +template +void hash_build(size_t &v, const T &obj, Args&&... args) { + hash_build(v, obj); + hash_build(v, args...); // recursive +} + +} // namespace hash_detail + +/** + * \brief Computes the combined hash of all its arguments. + * + * Simply use: + * + * size_t hash = hash_all(a, b, c, d); + * + * Where a, b, c and d are hashable. + */ +template +size_t hash_all(Args&&... args) { + size_t v = 0; + hash_detail::hash_build(v, args...); + return v; +} + +} // namespace ue2 + +#endif // UTIL_HASH_H From 913912364244e90c9b02865d4c5dd24ac8b1978b Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 24 Aug 2016 10:16:12 +1000 Subject: [PATCH 016/103] rose: move sparse iter cache to RoseEngineBlob This enables its use for iterators written by instructions. --- src/rose/rose_build_bytecode.cpp | 28 +++------------------------- src/rose/rose_build_engine_blob.h | 18 ++++++++++++++++++ src/rose/rose_build_program.cpp | 4 ++-- src/util/multibit_build.h | 18 ++++++++++-------- 4 files changed, 33 insertions(+), 35 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 68812b44..3356d214 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -186,10 +186,6 @@ struct build_context : boost::noncopyable { */ size_t numStates = 0; - /** \brief Very simple cache from sparse iter to offset, used when building - * up iterators in early misc. */ - map, u32> iterCache; - /** \brief Simple cache of programs written to engine blob, used for * deduplication. */ ue2::unordered_map &iter) { - if (contains(bc.iterCache, iter)) { - DEBUG_PRINTF("cache hit\n"); - u32 offset = bc.iterCache.at(iter); - return offset; - } - - u32 offset = bc.engine_blob.add(iter.begin(), iter.end()); - - bc.iterCache.insert(make_pair(iter, offset)); - - return offset; -} - static u32 buildLastByteIter(const RoseGraph &g, build_context &bc) { vector lb_roles; @@ -2217,7 +2195,7 @@ u32 buildLastByteIter(const RoseGraph &g, build_context &bc) { vector iter; mmbBuildSparseIterator(iter, lb_roles, bc.numStates); - return addIteratorToTable(bc, iter); + return bc.engine_blob.add_iterator(iter); } static @@ -2329,7 +2307,7 @@ u32 buildEodNfaIterator(build_context &bc, const u32 activeQueueCount) { vector iter; mmbBuildSparseIterator(iter, keys, activeQueueCount); - return addIteratorToTable(bc, iter); + return bc.engine_blob.add_iterator(iter); } static @@ -4669,7 +4647,7 @@ u32 buildEagerQueueIter(const set &eager, u32 leftfixBeginQueue, vector iter; mmbBuildSparseIterator(iter, vec, queue_count - leftfixBeginQueue); - return addIteratorToTable(bc, iter); + return bc.engine_blob.add_iterator(iter); } static diff --git a/src/rose/rose_build_engine_blob.h b/src/rose/rose_build_engine_blob.h index 0914502e..8542b87b 100644 --- a/src/rose/rose_build_engine_blob.h +++ b/src/rose/rose_build_engine_blob.h @@ -34,6 +34,8 @@ #include "ue2common.h" #include "util/alloc.h" #include "util/container.h" +#include "util/multibit_build.h" +#include "util/ue2_containers.h" #include "util/verify_types.h" #include @@ -104,6 +106,19 @@ public: return offset; } + u32 add_iterator(const std::vector &iter) { + auto cache_it = cached_iters.find(iter); + if (cache_it != cached_iters.end()) { + u32 offset = cache_it->second; + DEBUG_PRINTF("cache hit for iter at %u\n", offset); + return offset; + } + + u32 offset = add(iter.begin(), iter.end()); + cached_iters.emplace(iter, offset); + return offset; + } + void write_bytes(RoseEngine *engine) { copy_bytes((char *)engine + base_offset, blob); } @@ -120,6 +135,9 @@ private: blob.resize(s + align - s % align); } + /** \brief Cache of previously-written sparse iterators. */ + unordered_map, u32> cached_iters; + /** * \brief Contents of the Rose bytecode immediately following the * RoseEngine. diff --git a/src/rose/rose_build_program.cpp b/src/rose/rose_build_program.cpp index 73740976..168022f3 100644 --- a/src/rose/rose_build_program.cpp +++ b/src/rose/rose_build_program.cpp @@ -387,7 +387,7 @@ void RoseInstrSparseIterBegin::write(void *dest, RoseEngineBlob &blob, vector iter; mmbBuildSparseIterator(iter, keys, num_keys); assert(!iter.empty()); - inst->iter_offset = blob.add(iter.begin(), iter.end()); + inst->iter_offset = blob.add_iterator(iter); inst->jump_table = blob.add(jump_offsets.begin(), jump_offsets.end()); // Store offsets for corresponding SPARSE_ITER_NEXT operations. @@ -422,7 +422,7 @@ void RoseInstrSparseIterAny::write(void *dest, RoseEngineBlob &blob, vector iter; mmbBuildSparseIterator(iter, keys, num_keys); assert(!iter.empty()); - inst->iter_offset = blob.add(iter.begin(), iter.end()); + inst->iter_offset = blob.add_iterator(iter); } void RoseInstrEnginesEod::write(void *dest, RoseEngineBlob &blob, diff --git a/src/util/multibit_build.h b/src/util/multibit_build.h index ac263552..5fbaab87 100644 --- a/src/util/multibit_build.h +++ b/src/util/multibit_build.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,16 +34,18 @@ #define MULTIBIT_BUILD_H #include "multibit_internal.h" +#include "hash.h" #include -/** \brief Comparator for \ref mmbit_sparse_iter structures. */ -static inline -bool operator<(const mmbit_sparse_iter &a, const mmbit_sparse_iter &b) { - if (a.mask != b.mask) { - return a.mask < b.mask; - } - return a.val < b.val; +inline +bool operator==(const mmbit_sparse_iter &a, const mmbit_sparse_iter &b) { + return a.mask == b.mask && a.val == b.val; +} + +inline +size_t hash_value(const mmbit_sparse_iter &iter) { + return ue2::hash_all(iter.mask, iter.val); } namespace ue2 { From 9c99a923a1147c3dfc35ca2ea8aeb6e384674009 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 1 Sep 2016 16:30:08 +1000 Subject: [PATCH 017/103] limex: remove constant do_br arg from TESTEOD_FN --- src/nfa/limex_common_impl.h | 10 +++------- src/nfa/limex_runtime_impl.h | 4 ++-- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/src/nfa/limex_common_impl.h b/src/nfa/limex_common_impl.h index 187a661b..86dd3103 100644 --- a/src/nfa/limex_common_impl.h +++ b/src/nfa/limex_common_impl.h @@ -162,7 +162,7 @@ char PROCESS_ACCEPTS_NOSQUASH_FN(const STATE_T *s, static really_inline char TESTEOD_FN(const IMPL_NFA_T *limex, const STATE_T *s, const union RepeatControl *repeat_ctrl, - const char *repeat_state, u64a offset, char do_br, + const char *repeat_state, u64a offset, NfaCallback callback, void *context) { assert(limex && s); @@ -174,12 +174,8 @@ char TESTEOD_FN(const IMPL_NFA_T *limex, const STATE_T *s, const STATE_T acceptEodMask = LOAD_FROM_ENG(&limex->acceptAtEOD); STATE_T foundAccepts = AND_STATE(*s, acceptEodMask); - if (do_br) { - SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state, - offset + 1 /* EOD 'symbol' */, &foundAccepts); - } else { - assert(!limex->repeatCount); - } + SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state, + offset + 1 /* EOD 'symbol' */, &foundAccepts); if (unlikely(ISNONZERO_STATE(foundAccepts))) { const struct NFAAccept *acceptEodTable = getAcceptEodTable(limex); diff --git a/src/nfa/limex_runtime_impl.h b/src/nfa/limex_runtime_impl.h index 644ddd6a..4f456388 100644 --- a/src/nfa/limex_runtime_impl.h +++ b/src/nfa/limex_runtime_impl.h @@ -883,8 +883,8 @@ char JOIN(LIMEX_API_ROOT, _testEOD)(const struct NFA *n, const char *state, const union RepeatControl *repeat_ctrl = getRepeatControlBaseConst(state, sizeof(STATE_T)); const char *repeat_state = streamState + limex->stateSize; - return TESTEOD_FN(limex, sptr, repeat_ctrl, repeat_state, offset, 1, - callback, context); + return TESTEOD_FN(limex, sptr, repeat_ctrl, repeat_state, offset, callback, + context); } char JOIN(LIMEX_API_ROOT, _reportCurrent)(const struct NFA *n, struct mq *q) { From 3dcfea19e0008fb70c0b4faac8d5d809983e1736 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 1 Sep 2016 16:42:00 +1000 Subject: [PATCH 018/103] limex: fold TESTEOD_REV_FN use into TESTEOD_FN --- src/nfa/limex_common_impl.h | 32 ++------------------------------ src/nfa/limex_runtime_impl.h | 9 +++++---- 2 files changed, 7 insertions(+), 34 deletions(-) diff --git a/src/nfa/limex_common_impl.h b/src/nfa/limex_common_impl.h index 86dd3103..ae0d1da8 100644 --- a/src/nfa/limex_common_impl.h +++ b/src/nfa/limex_common_impl.h @@ -39,7 +39,6 @@ #define IMPL_NFA_T JOIN(struct LimExNFA, SIZE) #define TESTEOD_FN JOIN(moNfaTestEod, SIZE) -#define TESTEOD_REV_FN JOIN(moNfaRevTestEod, SIZE) #define LIMEX_INACCEPT_FN JOIN(limexInAccept, SIZE) #define LIMEX_INANYACCEPT_FN JOIN(limexInAnyAccept, SIZE) #define EXPIRE_ESTATE_FN JOIN(limexExpireExtendedState, SIZE) @@ -158,7 +157,8 @@ char PROCESS_ACCEPTS_NOSQUASH_FN(const STATE_T *s, return 0; } -// Run EOD accepts. +// Run EOD accepts. Note that repeat_ctrl and repeat_state may be NULL if this +// LimEx contains no repeat structures. static really_inline char TESTEOD_FN(const IMPL_NFA_T *limex, const STATE_T *s, const union RepeatControl *repeat_ctrl, @@ -189,33 +189,6 @@ char TESTEOD_FN(const IMPL_NFA_T *limex, const STATE_T *s, return MO_CONTINUE_MATCHING; } -static really_inline -char TESTEOD_REV_FN(const IMPL_NFA_T *limex, const STATE_T *s, u64a offset, - NfaCallback callback, void *context) { - assert(limex && s); - - // There may not be any EOD accepts in this NFA. - if (!limex->acceptEodCount) { - return MO_CONTINUE_MATCHING; - } - - STATE_T acceptEodMask = LOAD_FROM_ENG(&limex->acceptAtEOD); - STATE_T foundAccepts = AND_STATE(*s, acceptEodMask); - - assert(!limex->repeatCount); - - if (unlikely(ISNONZERO_STATE(foundAccepts))) { - const struct NFAAccept *acceptEodTable = getAcceptEodTable(limex); - if (PROCESS_ACCEPTS_NOSQUASH_FN(&foundAccepts, acceptEodTable, - limex->acceptEodCount, offset, callback, - context)) { - return MO_HALT_MATCHING; - } - } - - return MO_CONTINUE_MATCHING; -} - // Run accepts corresponding to current state. static really_inline char REPORTCURRENT_FN(const IMPL_NFA_T *limex, const struct mq *q) { @@ -391,7 +364,6 @@ char LIMEX_INANYACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state, } #undef TESTEOD_FN -#undef TESTEOD_REV_FN #undef REPORTCURRENT_FN #undef EXPIRE_ESTATE_FN #undef LIMEX_INACCEPT_FN diff --git a/src/nfa/limex_runtime_impl.h b/src/nfa/limex_runtime_impl.h index 4f456388..5bc79c24 100644 --- a/src/nfa/limex_runtime_impl.h +++ b/src/nfa/limex_runtime_impl.h @@ -46,7 +46,6 @@ #define IMPL_NFA_T JOIN(struct LimExNFA, SIZE) #define TESTEOD_FN JOIN(moNfaTestEod, SIZE) -#define TESTEOD_REV_FN JOIN(moNfaRevTestEod, SIZE) #define INITIAL_FN JOIN(moNfaInitial, SIZE) #define TOP_FN JOIN(moNfaTop, SIZE) #define TOPN_FN JOIN(moNfaTopN, SIZE) @@ -927,8 +926,11 @@ char JOIN(LIMEX_API_ROOT, _B_Reverse)(const struct NFA *n, u64a offset, REV_STREAM_FN(limex, hbuf, hlen, &ctx, offset); } - if (offset == 0 && ISNONZERO_STATE(ctx.s)) { - TESTEOD_REV_FN(limex, &ctx.s, offset, cb, context); + if (offset == 0 && limex->acceptEodCount && ISNONZERO_STATE(ctx.s)) { + const union RepeatControl *repeat_ctrl = NULL; + const char *repeat_state = NULL; + TESTEOD_FN(limex, &ctx.s, repeat_ctrl, repeat_state, offset, cb, + context); } // NOTE: return value is unused. @@ -991,7 +993,6 @@ enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)( } #undef TESTEOD_FN -#undef TESTEOD_REV_FN #undef INITIAL_FN #undef TOP_FN #undef TOPN_FN From 77fe1ef6e5294fb0082c2d9c5d3dca93be405ad8 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 30 Aug 2016 14:24:23 +1000 Subject: [PATCH 019/103] limex: rework accept handling Rather that iterating over NFAAccept structures and testing individual bits in the state structure, iterate over the state vector and index into accept structures. Adds report list support to this path, unified with the report lists used for exception handling. --- src/nfa/limex_common_impl.h | 181 ++++++++++++++++--------- src/nfa/limex_compile.cpp | 253 ++++++++++++++++++++++------------- src/nfa/limex_dump.cpp | 53 +++++--- src/nfa/limex_exceptional.h | 17 +-- src/nfa/limex_internal.h | 14 +- src/nfa/limex_native.c | 7 +- src/nfa/limex_runtime.h | 36 ++++- src/nfa/limex_runtime_impl.h | 32 ++--- 8 files changed, 374 insertions(+), 219 deletions(-) diff --git a/src/nfa/limex_common_impl.h b/src/nfa/limex_common_impl.h index ae0d1da8..5bd5187b 100644 --- a/src/nfa/limex_common_impl.h +++ b/src/nfa/limex_common_impl.h @@ -46,6 +46,7 @@ #define INITIAL_FN JOIN(moNfaInitial, SIZE) #define TOP_FN JOIN(moNfaTop, SIZE) #define TOPN_FN JOIN(moNfaTopN, SIZE) +#define PROCESS_ACCEPTS_IMPL_FN JOIN(moProcessAcceptsImpl, SIZE) #define PROCESS_ACCEPTS_FN JOIN(moProcessAccepts, SIZE) #define PROCESS_ACCEPTS_NOSQUASH_FN JOIN(moProcessAcceptsNoSquash, SIZE) #define CONTEXT_T JOIN(NFAContext, SIZE) @@ -60,6 +61,20 @@ #define SQUASH_UNTUG_BR_FN JOIN(lazyTug, SIZE) #define GET_NFA_REPEAT_INFO_FN JOIN(getNfaRepeatInfo, SIZE) +#if defined(ARCH_64_BIT) && (SIZE >= 64) +#define CHUNK_T u64a +#define FIND_AND_CLEAR_FN findAndClearLSB_64 +#define POPCOUNT_FN popcount64 +#define RANK_IN_MASK_FN rank_in_mask64 +#else +#define CHUNK_T u32 +#define FIND_AND_CLEAR_FN findAndClearLSB_32 +#define POPCOUNT_FN popcount32 +#define RANK_IN_MASK_FN rank_in_mask32 +#endif + +#define NUM_STATE_CHUNKS (sizeof(STATE_T) / sizeof(CHUNK_T)) + static really_inline void SQUASH_UNTUG_BR_FN(const IMPL_NFA_T *limex, const union RepeatControl *repeat_ctrl, @@ -98,63 +113,84 @@ void SQUASH_UNTUG_BR_FN(const IMPL_NFA_T *limex, } } -static never_inline -char PROCESS_ACCEPTS_FN(const IMPL_NFA_T *limex, STATE_T *s, - const struct NFAAccept *acceptTable, u32 acceptCount, - u64a offset, NfaCallback callback, void *context) { +static really_inline +char PROCESS_ACCEPTS_IMPL_FN(const IMPL_NFA_T *limex, const STATE_T *s, + STATE_T *squash, const ENG_STATE_T *squashMasks, + const STATE_T *acceptMask, + const struct NFAAccept *acceptTable, u64a offset, + NfaCallback callback, void *context) { assert(s); assert(limex); assert(callback); - assert(acceptCount); + const STATE_T accept_mask = *acceptMask; + STATE_T accepts = AND_STATE(*s, accept_mask); + + // Caller must ensure that we have at least one accept state on. + assert(ISNONZERO_STATE(accepts)); + + CHUNK_T chunks[NUM_STATE_CHUNKS]; + memcpy(chunks, &accepts, sizeof(accepts)); + + CHUNK_T mask_chunks[NUM_STATE_CHUNKS]; + memcpy(mask_chunks, &accept_mask, sizeof(accept_mask)); + + u32 base_index = 0; // Cumulative sum of mask popcount up to current chunk. + for (u32 i = 0; i < NUM_STATE_CHUNKS; i++) { + CHUNK_T chunk = chunks[i]; + while (chunk != 0) { + u32 bit = FIND_AND_CLEAR_FN(&chunk); + u32 local_idx = RANK_IN_MASK_FN(mask_chunks[i], bit); + u32 idx = local_idx + base_index; + const struct NFAAccept *a = &acceptTable[idx]; + DEBUG_PRINTF("state %u: firing report list=%u, offset=%llu\n", + bit + i * (u32)sizeof(chunk) * 8, a->reports, offset); + int rv = limexRunAccept((const char *)limex, a, callback, context, + offset); + if (unlikely(rv == MO_HALT_MATCHING)) { + return 1; + } + if (squash != NULL && a->squash != MO_INVALID_IDX) { + assert(squashMasks); + assert(a->squash < limex->squashCount); + const ENG_STATE_T *sq = &squashMasks[a->squash]; + DEBUG_PRINTF("squash mask %u @ %p\n", a->squash, sq); + *squash = AND_STATE(*squash, LOAD_FROM_ENG(sq)); + } + } + base_index += POPCOUNT_FN(mask_chunks[i]); + } + + return 0; +} + +static never_inline +char PROCESS_ACCEPTS_FN(const IMPL_NFA_T *limex, STATE_T *s, + const STATE_T *acceptMask, + const struct NFAAccept *acceptTable, u64a offset, + NfaCallback callback, void *context) { // We have squash masks we might have to apply after firing reports. STATE_T squash = ONES_STATE; const ENG_STATE_T *squashMasks = (const ENG_STATE_T *) ((const char *)limex + limex->squashOffset); - for (u32 i = 0; i < acceptCount; i++) { - const struct NFAAccept *a = &acceptTable[i]; - if (TESTBIT_STATE(*s, a->state)) { - DEBUG_PRINTF("state %u is on, firing report id=%u, offset=%llu\n", - a->state, a->externalId, offset); - int rv = callback(0, offset, a->externalId, context); - if (unlikely(rv == MO_HALT_MATCHING)) { - return 1; - } - if (a->squash != MO_INVALID_IDX) { - assert(a->squash < limex->squashCount); - const ENG_STATE_T *sq = &squashMasks[a->squash]; - DEBUG_PRINTF("squash mask %u @ %p\n", a->squash, sq); - squash = AND_STATE(squash, LOAD_FROM_ENG(sq)); - } - } - } + return PROCESS_ACCEPTS_IMPL_FN(limex, s, &squash, squashMasks, acceptMask, + acceptTable, offset, callback, context); *s = AND_STATE(*s, squash); - return 0; } static never_inline -char PROCESS_ACCEPTS_NOSQUASH_FN(const STATE_T *s, +char PROCESS_ACCEPTS_NOSQUASH_FN(const IMPL_NFA_T *limex, const STATE_T *s, + const STATE_T *acceptMask, const struct NFAAccept *acceptTable, - u32 acceptCount, u64a offset, - NfaCallback callback, void *context) { - assert(s); - assert(callback); - assert(acceptCount); + u64a offset, NfaCallback callback, + void *context) { + STATE_T *squash = NULL; + const ENG_STATE_T *squashMasks = NULL; - for (u32 i = 0; i < acceptCount; i++) { - const struct NFAAccept *a = &acceptTable[i]; - if (TESTBIT_STATE(*s, a->state)) { - DEBUG_PRINTF("state %u is on, firing report id=%u, offset=%llu\n", - a->state, a->externalId, offset); - int rv = callback(0, offset, a->externalId, context); - if (unlikely(rv == MO_HALT_MATCHING)) { - return 1; - } - } - } - return 0; + return PROCESS_ACCEPTS_IMPL_FN(limex, s, squash, squashMasks, acceptMask, + acceptTable, offset, callback, context); } // Run EOD accepts. Note that repeat_ctrl and repeat_state may be NULL if this @@ -179,8 +215,8 @@ char TESTEOD_FN(const IMPL_NFA_T *limex, const STATE_T *s, if (unlikely(ISNONZERO_STATE(foundAccepts))) { const struct NFAAccept *acceptEodTable = getAcceptEodTable(limex); - if (PROCESS_ACCEPTS_NOSQUASH_FN(&foundAccepts, acceptEodTable, - limex->acceptEodCount, offset, callback, + if (PROCESS_ACCEPTS_NOSQUASH_FN(limex, &foundAccepts, &acceptEodMask, + acceptEodTable, offset, callback, context)) { return MO_HALT_MATCHING; } @@ -206,8 +242,8 @@ char REPORTCURRENT_FN(const IMPL_NFA_T *limex, const struct mq *q) { const struct NFAAccept *acceptTable = getAcceptTable(limex); u64a offset = q_cur_offset(q); - if (PROCESS_ACCEPTS_NOSQUASH_FN(&foundAccepts, acceptTable, - limex->acceptCount, offset, q->cb, + if (PROCESS_ACCEPTS_NOSQUASH_FN(limex, &foundAccepts, &acceptMask, + acceptTable, offset, q->cb, q->context)) { return MO_HALT_MATCHING; } @@ -307,37 +343,45 @@ char LIMEX_INACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state, u64a offset, ReportID report) { assert(limex); - const STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept); - STATE_T accstate = AND_STATE(state, acceptMask); + const STATE_T accept_mask = LOAD_FROM_ENG(&limex->accept); + STATE_T accepts = AND_STATE(state, accept_mask); // Are we in an accept state? - if (ISZERO_STATE(accstate)) { + if (ISZERO_STATE(accepts)) { DEBUG_PRINTF("no accept states are on\n"); return 0; } - SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state, offset, &accstate); + SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state, offset, &accepts); DEBUG_PRINTF("looking for report %u\n", report); -#ifdef DEBUG - DEBUG_PRINTF("accept states that are on: "); - for (u32 i = 0; i < sizeof(STATE_T) * 8; i++) { - if (TESTBIT_STATE(accstate, i)) printf("%u ", i); - } - printf("\n"); -#endif - - // Does one of our states match the given report ID? const struct NFAAccept *acceptTable = getAcceptTable(limex); - for (u32 i = 0; i < limex->acceptCount; i++) { - const struct NFAAccept *a = &acceptTable[i]; - DEBUG_PRINTF("checking idx=%u, externalId=%u\n", a->state, - a->externalId); - if (a->externalId == report && TESTBIT_STATE(accstate, a->state)) { - DEBUG_PRINTF("report is on!\n"); - return 1; + + CHUNK_T chunks[NUM_STATE_CHUNKS]; + memcpy(chunks, &accepts, sizeof(accepts)); + + CHUNK_T mask_chunks[NUM_STATE_CHUNKS]; + memcpy(mask_chunks, &accept_mask, sizeof(accept_mask)); + + u32 base_index = 0; // Cumulative sum of mask popcount up to current chunk. + for (u32 i = 0; i < NUM_STATE_CHUNKS; i++) { + CHUNK_T chunk = chunks[i]; + while (chunk != 0) { + u32 bit = FIND_AND_CLEAR_FN(&chunk); + u32 local_idx = RANK_IN_MASK_FN(mask_chunks[i], bit); + u32 idx = local_idx + base_index; + assert(idx < limex->acceptCount); + const struct NFAAccept *a = &acceptTable[idx]; + DEBUG_PRINTF("state %u is on, report list at %u\n", + bit + i * (u32)sizeof(chunk) * 8, a->reports); + + if (limexAcceptHasReport((const char *)limex, a, report)) { + DEBUG_PRINTF("report %u is on\n", report); + return 1; + } } + base_index += POPCOUNT_FN(mask_chunks[i]); } return 0; @@ -381,7 +425,14 @@ char LIMEX_INANYACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state, #undef TESTBIT_STATE #undef ISNONZERO_STATE #undef ISZERO_STATE +#undef PROCESS_ACCEPTS_IMPL_FN #undef PROCESS_ACCEPTS_FN #undef PROCESS_ACCEPTS_NOSQUASH_FN #undef SQUASH_UNTUG_BR_FN #undef GET_NFA_REPEAT_INFO_FN + +#undef CHUNK_T +#undef FIND_AND_CLEAR_FN +#undef POPCOUNT_FN +#undef RANK_IN_MASK_FN +#undef NUM_STATE_CHUNKS diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index 78b9729f..89eaf10a 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -992,14 +992,105 @@ void buildAccel(const build_info &args, NFAStateSet &accelMask, } static -void buildAccepts(const build_info &args, NFAStateSet &acceptMask, - NFAStateSet &acceptEodMask, vector &accepts, - vector &acceptsEod, vector &squash) { +u32 addSquashMask(const build_info &args, const NFAVertex &v, + vector &squash) { + auto sit = args.reportSquashMap.find(v); + if (sit == args.reportSquashMap.end()) { + return MO_INVALID_IDX; + } + + // This state has a squash mask. Paw through the existing vector to + // see if we've already seen it, otherwise add a new one. + auto it = find(squash.begin(), squash.end(), sit->second); + if (it != squash.end()) { + return verify_u32(distance(squash.begin(), it)); + } + u32 idx = verify_u32(squash.size()); + squash.push_back(sit->second); + return idx; +} + +static +u32 addReports(const flat_set &r, vector &reports, + unordered_map, u32> &reportListCache) { + assert(!r.empty()); + + vector my_reports(begin(r), end(r)); + my_reports.push_back(MO_INVALID_IDX); // sentinel + + auto cache_it = reportListCache.find(my_reports); + if (cache_it != end(reportListCache)) { + u32 offset = cache_it->second; + DEBUG_PRINTF("reusing cached report list at %u\n", offset); + return offset; + } + + auto it = search(begin(reports), end(reports), begin(my_reports), + end(my_reports)); + if (it != end(reports)) { + u32 offset = verify_u32(distance(begin(reports), it)); + DEBUG_PRINTF("reusing found report list at %u\n", offset); + return offset; + } + + u32 offset = verify_u32(reports.size()); + insert(&reports, reports.end(), my_reports); + reportListCache.emplace(move(my_reports), offset); + return offset; +} + +static +void buildAcceptsList(const build_info &args, + unordered_map, u32> &reports_cache, + vector &verts, vector &accepts, + vector &reports, vector &squash) { + if (verts.empty()) { + return; + } + + DEBUG_PRINTF("building accept lists for %zu states\n", verts.size()); + + auto cmp_state_id = [&args](NFAVertex a, NFAVertex b) { + u32 a_state = args.state_ids.at(a); + u32 b_state = args.state_ids.at(b); + assert(a_state != b_state || a == b); + return a_state < b_state; + }; + + sort(begin(verts), end(verts), cmp_state_id); + + const NGHolder &h = args.h; + for (const auto &v : verts) { + DEBUG_PRINTF("state=%u, reports: [%s]\n", args.state_ids.at(v), + as_string_list(h[v].reports).c_str()); + NFAAccept a; + memset(&a, 0, sizeof(a)); + assert(!h[v].reports.empty()); + if (h[v].reports.size() == 1) { + a.single_report = 1; + a.reports = *h[v].reports.begin(); + } else { + a.single_report = 0; + a.reports = addReports(h[v].reports, reports, reports_cache); + } + a.squash = addSquashMask(args, v, squash); + accepts.push_back(move(a)); + } +} + +static +void buildAccepts(const build_info &args, + unordered_map, u32> &reports_cache, + NFAStateSet &acceptMask, NFAStateSet &acceptEodMask, + vector &accepts, vector &acceptsEod, + vector &reports, vector &squash) { const NGHolder &h = args.h; acceptMask.resize(args.num_states); acceptEodMask.resize(args.num_states); + vector verts_accept, verts_accept_eod; + for (auto v : vertices_range(h)) { u32 state_id = args.state_ids.at(v); @@ -1007,41 +1098,20 @@ void buildAccepts(const build_info &args, NFAStateSet &acceptMask, continue; } - u32 squashMaskOffset = MO_INVALID_IDX; - auto sit = args.reportSquashMap.find(v); - if (sit != args.reportSquashMap.end()) { - // This state has a squash mask. Paw through the existing vector to - // see if we've already seen it, otherwise add a new one. - auto it = find(squash.begin(), squash.end(), sit->second); - if (it != squash.end()) { - squashMaskOffset = verify_u32(distance(squash.begin(), it)); - } else { - squashMaskOffset = verify_u32(squash.size()); - squash.push_back(sit->second); - } - } - - // Add an accept (or acceptEod) per report ID. - - vector *accepts_out; if (edge(v, h.accept, h).second) { acceptMask.set(state_id); - accepts_out = &accepts; + verts_accept.push_back(v); } else { assert(edge(v, h.acceptEod, h).second); acceptEodMask.set(state_id); - accepts_out = &acceptsEod; - } - - for (auto report : h[v].reports) { - accepts_out->push_back(NFAAccept()); - NFAAccept &a = accepts_out->back(); - a.state = state_id; - a.externalId = report; - a.squash = squashMaskOffset; - DEBUG_PRINTF("Accept: state=%u, externalId=%u\n", state_id, report); + verts_accept_eod.push_back(v); } } + + buildAcceptsList(args, reports_cache, verts_accept, accepts, reports, + squash); + buildAcceptsList(args, reports_cache, verts_accept_eod, acceptsEod, reports, + squash); } static @@ -1314,36 +1384,12 @@ struct ExceptionProto { } }; -static -u32 getReportListIndex(const flat_set &reports, - vector &exceptionReports, - map, u32> &reportListCache) { - if (reports.empty()) { - return MO_INVALID_IDX; - } - - const vector r(reports.begin(), reports.end()); - - auto it = reportListCache.find(r); - if (it != reportListCache.end()) { - u32 idx = it->second; - assert(idx < exceptionReports.size()); - assert(equal(r.begin(), r.end(), exceptionReports.begin() + idx)); - return idx; - } - - u32 idx = verify_u32(exceptionReports.size()); - reportListCache[r] = idx; - exceptionReports.insert(exceptionReports.end(), r.begin(), r.end()); - exceptionReports.push_back(MO_INVALID_IDX); // terminator - return idx; -} - static u32 buildExceptionMap(const build_info &args, + unordered_map, u32> &reports_cache, const ue2::unordered_set &exceptional, - map > &exceptionMap, - vector &exceptionReports) { + map> &exceptionMap, + vector &reportList) { const NGHolder &h = args.h; const u32 num_states = args.num_states; u32 exceptionCount = 0; @@ -1361,10 +1407,6 @@ u32 buildExceptionMap(const build_info &args, } } - // We track report lists that have already been written into the global - // list in case we can reuse them. - map, u32> reportListCache; - for (auto v : vertices_range(h)) { const u32 i = args.state_ids.at(v); @@ -1383,8 +1425,12 @@ u32 buildExceptionMap(const build_info &args, DEBUG_PRINTF("state %u is exceptional due to accept " "(%zu reports)\n", i, reports.size()); - e.reports_index = - getReportListIndex(reports, exceptionReports, reportListCache); + if (reports.empty()) { + e.reports_index = MO_INVALID_IDX; + } else { + e.reports_index = + addReports(reports, reportList, reports_cache); + } // We may be applying a report squash too. auto mi = args.reportSquashMap.find(v); @@ -1810,9 +1856,10 @@ struct Factory { } static - void writeExceptions(const map > &exceptionMap, - const vector &repeatOffsets, - implNFA_t *limex, const u32 exceptionsOffset) { + void writeExceptions(const map> &exceptionMap, + const vector &repeatOffsets, implNFA_t *limex, + const u32 exceptionsOffset, + const u32 reportListOffset) { DEBUG_PRINTF("exceptionsOffset=%u\n", exceptionsOffset); exception_t *etable = (exception_t *)((char *)limex + exceptionsOffset); @@ -1839,7 +1886,12 @@ struct Factory { exception_t &e = etable[ecount]; maskSetBits(e.squash, proto.squash_states); maskSetBits(e.successors, proto.succ_states); - e.reports = proto.reports_index; + if (proto.reports_index == MO_INVALID_IDX) { + e.reports = MO_INVALID_IDX; + } else { + e.reports = reportListOffset + + proto.reports_index * sizeof(ReportID); + } e.hasSquash = verify_u8(proto.squash); e.trigger = verify_u8(proto.trigger); u32 repeat_offset = proto.repeat_index == MO_INVALID_IDX @@ -1958,7 +2010,9 @@ struct Factory { const vector &acceptsEod, const vector &squash, implNFA_t *limex, const u32 acceptsOffset, const u32 acceptsEodOffset, - const u32 squashOffset) { + const u32 squashOffset, const u32 reportListOffset) { + char *limex_base = (char *)limex; + DEBUG_PRINTF("acceptsOffset=%u, acceptsEodOffset=%u, squashOffset=%u\n", acceptsOffset, acceptsEodOffset, squashOffset); @@ -1966,27 +2020,38 @@ struct Factory { maskSetBits(limex->accept, acceptMask); maskSetBits(limex->acceptAtEOD, acceptEodMask); + // Transforms the index into the report list into an offset relative to + // the base of the limex. + auto report_offset_fn = [&](NFAAccept a) { + if (!a.single_report) { + a.reports = reportListOffset + a.reports * sizeof(ReportID); + } + return a; + }; + // Write accept table. limex->acceptOffset = acceptsOffset; limex->acceptCount = verify_u32(accepts.size()); DEBUG_PRINTF("NFA has %zu accepts\n", accepts.size()); - NFAAccept *acceptsTable = (NFAAccept *)((char *)limex + acceptsOffset); + NFAAccept *acceptsTable = (NFAAccept *)(limex_base + acceptsOffset); assert(ISALIGNED(acceptsTable)); - copy(accepts.begin(), accepts.end(), acceptsTable); + transform(accepts.begin(), accepts.end(), acceptsTable, + report_offset_fn); // Write eod accept table. limex->acceptEodOffset = acceptsEodOffset; limex->acceptEodCount = verify_u32(acceptsEod.size()); DEBUG_PRINTF("NFA has %zu EOD accepts\n", acceptsEod.size()); - NFAAccept *acceptsEodTable = (NFAAccept *)((char *)limex + acceptsEodOffset); + NFAAccept *acceptsEodTable = (NFAAccept *)(limex_base + acceptsEodOffset); assert(ISALIGNED(acceptsEodTable)); - copy(acceptsEod.begin(), acceptsEod.end(), acceptsEodTable); + transform(acceptsEod.begin(), acceptsEod.end(), acceptsEodTable, + report_offset_fn); // Write squash mask table. limex->squashCount = verify_u32(squash.size()); limex->squashOffset = squashOffset; DEBUG_PRINTF("NFA has %zu report squash masks\n", squash.size()); - tableRow_t *mask = (tableRow_t *)((char *)limex + squashOffset); + tableRow_t *mask = (tableRow_t *)(limex_base + squashOffset); assert(ISALIGNED(mask)); for (size_t i = 0, end = squash.size(); i < end; i++) { maskSetBits(mask[i], squash[i]); @@ -2023,15 +2088,12 @@ struct Factory { } static - void writeExceptionReports(const vector &reports, - implNFA_t *limex, - const u32 exceptionReportsOffset) { - DEBUG_PRINTF("exceptionReportsOffset=%u\n", exceptionReportsOffset); - - limex->exReportOffset = exceptionReportsOffset; - assert(ISALIGNED_N((char *)limex + exceptionReportsOffset, + void writeReportList(const vector &reports, implNFA_t *limex, + const u32 reportListOffset) { + DEBUG_PRINTF("reportListOffset=%u\n", reportListOffset); + assert(ISALIGNED_N((char *)limex + reportListOffset, alignof(ReportID))); - copy_bytes((char *)limex + exceptionReportsOffset, reports); + copy_bytes((char *)limex + reportListOffset, reports); } static @@ -2050,6 +2112,10 @@ struct Factory { repeatSize += repeats[i].second; } + // We track report lists that have already been written into the global + // list in case we can reuse them. + unordered_map, u32> reports_cache; + ue2::unordered_set exceptional; u32 shiftCount = findBestNumOfVarShifts(args); assert(shiftCount); @@ -2057,9 +2123,10 @@ struct Factory { findExceptionalTransitions(args, exceptional, maxShift); map > exceptionMap; - vector exceptionReports; - u32 exceptionCount = buildExceptionMap(args, exceptional, exceptionMap, - exceptionReports); + vector reportList; + + u32 exceptionCount = buildExceptionMap(args, reports_cache, exceptional, + exceptionMap, reportList); assert(exceptionCount <= args.num_states); @@ -2076,8 +2143,8 @@ struct Factory { NFAStateSet acceptMask, acceptEodMask; vector accepts, acceptsEod; vector squash; - buildAccepts(args, acceptMask, acceptEodMask, accepts, acceptsEod, - squash); + buildAccepts(args, reports_cache, acceptMask, acceptEodMask, accepts, + acceptsEod, reportList, squash); // Build all our accel info. NFAStateSet accelMask, accelFriendsMask; @@ -2118,8 +2185,8 @@ struct Factory { const u32 exceptionsOffset = offset; offset += sizeof(exception_t) * exceptionCount; - const u32 exceptionReportsOffset = offset; - offset += sizeof(ReportID) * exceptionReports.size(); + const u32 reportListOffset = offset; + offset += sizeof(ReportID) * reportList.size(); const u32 repeatOffsetsOffset = offset; offset += sizeof(u32) * args.repeats.size(); @@ -2146,7 +2213,8 @@ struct Factory { limex, accelTableOffset, accelAuxOffset); writeAccepts(acceptMask, acceptEodMask, accepts, acceptsEod, squash, - limex, acceptsOffset, acceptsEodOffset, squashOffset); + limex, acceptsOffset, acceptsEodOffset, squashOffset, + reportListOffset); limex->shiftCount = shiftCount; writeShiftMasks(args, limex); @@ -2154,14 +2222,15 @@ struct Factory { // Determine the state required for our state vector. findStateSize(args, limex); - writeExceptionReports(exceptionReports, limex, exceptionReportsOffset); + writeReportList(reportList, limex, reportListOffset); // Repeat structures and offset table. vector repeatOffsets; writeRepeats(repeats, repeatOffsets, limex, repeatOffsetsOffset, repeatsOffset); - writeExceptions(exceptionMap, repeatOffsets, limex, exceptionsOffset); + writeExceptions(exceptionMap, repeatOffsets, limex, exceptionsOffset, + reportListOffset); writeLimexMasks(args, limex); diff --git a/src/nfa/limex_dump.cpp b/src/nfa/limex_dump.cpp index 181951dc..149e8107 100644 --- a/src/nfa/limex_dump.cpp +++ b/src/nfa/limex_dump.cpp @@ -180,26 +180,40 @@ void dumpAccel(const limex_type *limex, FILE *f) { } } +static +void dumpAcceptList(const char *limex_base, const struct NFAAccept *accepts, + u32 acceptCount, FILE *f) { + for (u32 i = 0; i < acceptCount; i++) { + const NFAAccept &a = accepts[i]; + if (a.single_report) { + fprintf(f, " idx %u fires single report %u\n", i, a.reports); + continue; + } + fprintf(f, " idx %u fires report list %u:", i, a.reports); + const ReportID *report = (const ReportID *)(limex_base + a.reports); + for (; *report != MO_INVALID_IDX; report++) { + fprintf(f, " %u", *report); + } + fprintf(f, "\n"); + } +} + template static void dumpAccepts(const limex_type *limex, FILE *f) { - u32 acceptCount = limex->acceptCount; - u32 acceptEodCount = limex->acceptEodCount; + const char *limex_base = (const char *)limex; + + const u32 acceptCount = limex->acceptCount; + const u32 acceptEodCount = limex->acceptEodCount; fprintf(f, "\n%u accepts.\n", acceptCount); - const struct NFAAccept *accepts - = (const struct NFAAccept *)((const char *)limex + limex->acceptOffset); - for (u32 i = 0; i < acceptCount; i++) { - fprintf(f, " state %u fires report %u\n", accepts[i].state, - accepts[i].externalId); - } + const auto *accepts = + (const struct NFAAccept *)(limex_base + limex->acceptOffset); + dumpAcceptList(limex_base, accepts, acceptCount, f); fprintf(f, "\n%u accepts at EOD.\n", acceptEodCount); - accepts = (const struct NFAAccept *)((const char *)limex - + limex->acceptEodOffset); - for (u32 i = 0; i < acceptEodCount; i++) { - fprintf(f, " state %u fires report %u\n", accepts[i].state, - accepts[i].externalId); - } + const auto *accepts_eod = + (const struct NFAAccept *)(limex_base + limex->acceptEodOffset); + dumpAcceptList(limex_base, accepts_eod, acceptEodCount, f); fprintf(f, "\n"); } @@ -226,20 +240,15 @@ getExceptionTable(const limex_type *limex) { ((const char *)limex + limex->exceptionOffset); } -template -static -const ReportID *getReportList(const limex_type *limex) { - return (const ReportID *)((const char *)limex + limex->exReportOffset); -} - template static void dumpLimexExceptions(const limex_type *limex, FILE *f) { const typename limex_traits::exception_type *e = getExceptionTable(limex); - const ReportID *reports = getReportList(limex); const u32 size = limex_traits::size; + const char *limex_base = (const char *)limex; + fprintf(f, "\n"); for (u32 i = 0; i < limex->exceptionCount; i++) { fprintf(f, "exception %u: hasSquash=%u, reports offset=%u\n", @@ -255,7 +264,7 @@ void dumpLimexExceptions(const limex_type *limex, FILE *f) { if (e[i].reports == MO_INVALID_IDX) { fprintf(f, " \n"); } else { - const ReportID *r = reports + e[i].reports; + const ReportID *r = (const ReportID *)(limex_base + e[i].reports); while (*r != MO_INVALID_IDX) { fprintf(f, " %u", *r++); } diff --git a/src/nfa/limex_exceptional.h b/src/nfa/limex_exceptional.h index c8296f91..e770c327 100644 --- a/src/nfa/limex_exceptional.h +++ b/src/nfa/limex_exceptional.h @@ -95,7 +95,6 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG, STATE_T *local_succ, #endif const struct IMPL_NFA_T *limex, - const ReportID *exReports, u64a offset, struct CONTEXT_T *ctx, struct proto_cache *new_cache, @@ -161,7 +160,8 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG, // Some exceptions fire accepts. if (e->reports != MO_INVALID_IDX) { if (flags & CALLBACK_OUTPUT) { - const ReportID *reports = exReports + e->reports; + const ReportID *reports = + (const ReportID *)((const char *)limex + e->reports); if (unlikely(limexRunReports(reports, ctx->callback, ctx->context, offset) == MO_HALT_MATCHING)) { @@ -210,8 +210,7 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG, static really_inline int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ, const struct IMPL_NFA_T *limex, const EXCEPTION_T *exceptions, - const ReportID *exReports, u64a offset, struct CONTEXT_T *ctx, - char in_rev, char flags) { + u64a offset, struct CONTEXT_T *ctx, char in_rev, char flags) { assert(diffmask > 0); // guaranteed by caller macro if (EQ_STATE(estate, ctx->cached_estate)) { @@ -271,8 +270,8 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ, #ifndef BIG_MODEL &local_succ, #endif - limex, exReports, offset, ctx, &new_cache, - &cacheable, in_rev, flags)) { + limex, offset, ctx, &new_cache, &cacheable, + in_rev, flags)) { return PE_RV_HALT; } } while (word); @@ -326,7 +325,9 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ, #undef STATE_ARG_NAME #undef STATE_ARG_P +#undef IMPL_NFA_T + #undef CHUNK_T #undef FIND_AND_CLEAR_FN -#undef IMPL_NFA_T -#undef GET_NFA_REPEAT_INFO_FN +#undef POPCOUNT_FN +#undef RANK_IN_MASK_FN diff --git a/src/nfa/limex_internal.h b/src/nfa/limex_internal.h index 03ebb384..0d46732f 100644 --- a/src/nfa/limex_internal.h +++ b/src/nfa/limex_internal.h @@ -132,7 +132,6 @@ struct LimExNFA##size { \ u32 acceptEodOffset; /* rel. to start of LimExNFA */ \ u32 exceptionCount; \ u32 exceptionOffset; /* rel. to start of LimExNFA */ \ - u32 exReportOffset; /* rel. to start of LimExNFA */ \ u32 repeatCount; \ u32 repeatOffset; \ u32 squashOffset; /* rel. to start of LimExNFA; for accept squashing */ \ @@ -184,9 +183,16 @@ struct NFARepeatInfo { }; struct NFAAccept { - u32 state; //!< state ID of triggering state - ReportID externalId; //!< report ID to raise - u32 squash; //!< offset into masks, or MO_INVALID_IDX + u8 single_report; //!< If true, 'reports' is report id. + + /** + * \brief If single report is true, this is the report id to fire. + * Otherwise, it is the offset (relative to the start of the LimExNFA + * structure) of a list of reports, terminated with MO_INVALID_IDX. + */ + u32 reports; + + u32 squash; //!< Offset into squash masks, or MO_INVALID_IDX. }; #endif diff --git a/src/nfa/limex_native.c b/src/nfa/limex_native.c index c9949836..f6f5809c 100644 --- a/src/nfa/limex_native.c +++ b/src/nfa/limex_native.c @@ -73,8 +73,7 @@ static really_inline int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ, const struct LimExNFA32 *limex, - const struct NFAException32 *exceptions, - const ReportID *exReports, u64a offset, + const struct NFAException32 *exceptions, u64a offset, struct NFAContext32 *ctx, char in_rev, char flags) { assert(estate != 0); // guaranteed by calling macro @@ -104,8 +103,8 @@ int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ, u32 bit = findAndClearLSB_32(&estate); u32 idx = rank_in_mask32(limex->exceptionMask, bit); const struct NFAException32 *e = &exceptions[idx]; - if (!runException32(e, s, succ, &local_succ, limex, exReports, offset, - ctx, &new_cache, &cacheable, in_rev, flags)) { + if (!runException32(e, s, succ, &local_succ, limex, offset, ctx, + &new_cache, &cacheable, in_rev, flags)) { return PE_RV_HALT; } } while (estate != 0); diff --git a/src/nfa/limex_runtime.h b/src/nfa/limex_runtime.h index 75094ef6..6109d382 100644 --- a/src/nfa/limex_runtime.h +++ b/src/nfa/limex_runtime.h @@ -103,14 +103,42 @@ int limexRunReports(const ReportID *reports, NfaCallback callback, return MO_CONTINUE_MATCHING; // continue } +static really_inline +int limexRunAccept(const char *limex_base, const struct NFAAccept *accept, + NfaCallback callback, void *context, u64a offset) { + if (accept->single_report) { + const ReportID report = accept->reports; + DEBUG_PRINTF("firing single report for id %u at offset %llu\n", report, + offset); + return callback(0, offset, report, context); + } + const ReportID *reports = (const ReportID *)(limex_base + accept->reports); + return limexRunReports(reports, callback, context, offset); +} + +static really_inline +int limexAcceptHasReport(const char *limex_base, const struct NFAAccept *accept, + ReportID report) { + if (accept->single_report) { + return accept->reports == report; + } + + const ReportID *reports = (const ReportID *)(limex_base + accept->reports); + assert(*reports != MO_INVALID_IDX); + do { + if (*reports == report) { + return 1; + } + reports++; + } while (*reports != MO_INVALID_IDX); + + return 0; +} + /** \brief Return a (correctly typed) pointer to the exception table. */ #define getExceptionTable(exc_type, lim) \ ((const exc_type *)((const char *)(lim) + (lim)->exceptionOffset)) -/** \brief Return a pointer to the exceptional reports list. */ -#define getExReports(lim) \ - ((const ReportID *)((const char *)(lim) + (lim)->exReportOffset)) - /** \brief Return a pointer to the ordinary accepts table. */ #define getAcceptTable(lim) \ ((const struct NFAAccept *)((const char *)(lim) + (lim)->acceptOffset)) diff --git a/src/nfa/limex_runtime_impl.h b/src/nfa/limex_runtime_impl.h index 5bc79c24..45ceb2b5 100644 --- a/src/nfa/limex_runtime_impl.h +++ b/src/nfa/limex_runtime_impl.h @@ -103,8 +103,7 @@ // continue, 1 if an accept was fired and the user instructed us to halt. static really_inline char RUN_EXCEPTIONS_FN(const IMPL_NFA_T *limex, const EXCEPTION_T *exceptions, - const ReportID *exReports, STATE_T s, - const STATE_T emask, size_t i, u64a offset, + STATE_T s, const STATE_T emask, size_t i, u64a offset, STATE_T *succ, u64a *final_loc, struct CONTEXT_T *ctx, const char flags, const char in_rev, const char first_match) { @@ -131,7 +130,7 @@ char RUN_EXCEPTIONS_FN(const IMPL_NFA_T *limex, const EXCEPTION_T *exceptions, char localflags = (!i && !in_rev) ? NO_OUTPUT | FIRST_BYTE : flags; int rv = JOIN(processExceptional, SIZE)( - pass_state, pass_estate, diffmask, succ, limex, exceptions, exReports, + pass_state, pass_estate, diffmask, succ, limex, exceptions, callback_offset, ctx, in_rev, localflags); if (rv == PE_RV_HALT) { return 1; // Halt matching. @@ -207,7 +206,6 @@ char STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, const union AccelAux *accelAux = (const union AccelAux *)((const char *)limex + limex->accelAuxOffset); const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex); - const ReportID *exReports = getExReports(limex); STATE_T s = ctx->s; /* assert(ISALIGNED_16(exceptions)); */ @@ -235,9 +233,8 @@ without_accel: STATE_T succ; NFA_EXEC_GET_LIM_SUCC(limex, s, succ); - if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, s, EXCEPTION_MASK, - i, offset, &succ, final_loc, ctx, flags, 0, - first_match)) { + if (RUN_EXCEPTIONS_FN(limex, exceptions, s, EXCEPTION_MASK, i, offset, + &succ, final_loc, ctx, flags, 0, first_match)) { return MO_HALT_MATCHING; } @@ -286,9 +283,8 @@ with_accel: STATE_T succ; NFA_EXEC_GET_LIM_SUCC(limex, s, succ); - if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, s, EXCEPTION_MASK, - i, offset, &succ, final_loc, ctx, flags, 0, - first_match)) { + if (RUN_EXCEPTIONS_FN(limex, exceptions, s, EXCEPTION_MASK, i, offset, + &succ, final_loc, ctx, flags, 0, first_match)) { return MO_HALT_MATCHING; } @@ -300,8 +296,6 @@ with_accel: if ((first_match || (flags & CALLBACK_OUTPUT)) && limex->acceptCount) { STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept); const struct NFAAccept *acceptTable = getAcceptTable(limex); - const u32 acceptCount = limex->acceptCount; - STATE_T foundAccepts = AND_STATE(s, acceptMask); if (unlikely(ISNONZERO_STATE(foundAccepts))) { if (first_match) { @@ -309,8 +303,8 @@ with_accel: assert(final_loc); *final_loc = length; return MO_HALT_MATCHING; - } else if (PROCESS_ACCEPTS_FN(limex, &ctx->s, acceptTable, - acceptCount, offset + length, + } else if (PROCESS_ACCEPTS_FN(limex, &ctx->s, &acceptMask, + acceptTable, offset + length, ctx->callback, ctx->context)) { return MO_HALT_MATCHING; } @@ -331,7 +325,6 @@ char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, const STATE_T exceptionMask = LOAD_FROM_ENG(&limex->exceptionMask); #endif const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex); - const ReportID *exReports = getExReports(limex); STATE_T s = ctx->s; /* assert(ISALIGNED_16(exceptions)); */ @@ -351,9 +344,8 @@ char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, STATE_T succ; NFA_EXEC_GET_LIM_SUCC(limex, s, succ); - if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, s, - EXCEPTION_MASK, i, offset, &succ, final_loc, ctx, - flags, 1, 0)) { + if (RUN_EXCEPTIONS_FN(limex, exceptions, s, EXCEPTION_MASK, i, offset, + &succ, final_loc, ctx, flags, 1, 0)) { return MO_HALT_MATCHING; } @@ -369,8 +361,8 @@ char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, if (acceptCount) { STATE_T foundAccepts = AND_STATE(s, acceptMask); if (unlikely(ISNONZERO_STATE(foundAccepts))) { - if (PROCESS_ACCEPTS_NOSQUASH_FN(&ctx->s, acceptTable, acceptCount, - offset, ctx->callback, + if (PROCESS_ACCEPTS_NOSQUASH_FN(limex, &ctx->s, &acceptMask, + acceptTable, offset, ctx->callback, ctx->context)) { return MO_HALT_MATCHING; } From 5e3fa7a26610d98715a3e2c9372716f2660ccc9a Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 5 Sep 2016 09:56:58 +1000 Subject: [PATCH 020/103] limex: make NFAAccept::squash rel to LimEx base --- src/nfa/limex_common_impl.h | 23 ++++++++--------------- src/nfa/limex_compile.cpp | 11 ++++++----- src/nfa/limex_internal.h | 2 +- 3 files changed, 15 insertions(+), 21 deletions(-) diff --git a/src/nfa/limex_common_impl.h b/src/nfa/limex_common_impl.h index 5bd5187b..e441945d 100644 --- a/src/nfa/limex_common_impl.h +++ b/src/nfa/limex_common_impl.h @@ -115,8 +115,7 @@ void SQUASH_UNTUG_BR_FN(const IMPL_NFA_T *limex, static really_inline char PROCESS_ACCEPTS_IMPL_FN(const IMPL_NFA_T *limex, const STATE_T *s, - STATE_T *squash, const ENG_STATE_T *squashMasks, - const STATE_T *acceptMask, + STATE_T *squash, const STATE_T *acceptMask, const struct NFAAccept *acceptTable, u64a offset, NfaCallback callback, void *context) { assert(s); @@ -151,10 +150,9 @@ char PROCESS_ACCEPTS_IMPL_FN(const IMPL_NFA_T *limex, const STATE_T *s, return 1; } if (squash != NULL && a->squash != MO_INVALID_IDX) { - assert(squashMasks); - assert(a->squash < limex->squashCount); - const ENG_STATE_T *sq = &squashMasks[a->squash]; - DEBUG_PRINTF("squash mask %u @ %p\n", a->squash, sq); + DEBUG_PRINTF("applying squash mask at offset %u\n", a->squash); + const ENG_STATE_T *sq = + (const ENG_STATE_T *)((const char *)limex + a->squash); *squash = AND_STATE(*squash, LOAD_FROM_ENG(sq)); } } @@ -171,11 +169,8 @@ char PROCESS_ACCEPTS_FN(const IMPL_NFA_T *limex, STATE_T *s, NfaCallback callback, void *context) { // We have squash masks we might have to apply after firing reports. STATE_T squash = ONES_STATE; - const ENG_STATE_T *squashMasks = (const ENG_STATE_T *) - ((const char *)limex + limex->squashOffset); - - return PROCESS_ACCEPTS_IMPL_FN(limex, s, &squash, squashMasks, acceptMask, - acceptTable, offset, callback, context); + return PROCESS_ACCEPTS_IMPL_FN(limex, s, &squash, acceptMask, acceptTable, + offset, callback, context); *s = AND_STATE(*s, squash); } @@ -187,10 +182,8 @@ char PROCESS_ACCEPTS_NOSQUASH_FN(const IMPL_NFA_T *limex, const STATE_T *s, u64a offset, NfaCallback callback, void *context) { STATE_T *squash = NULL; - const ENG_STATE_T *squashMasks = NULL; - - return PROCESS_ACCEPTS_IMPL_FN(limex, s, squash, squashMasks, acceptMask, - acceptTable, offset, callback, context); + return PROCESS_ACCEPTS_IMPL_FN(limex, s, squash, acceptMask, acceptTable, + offset, callback, context); } // Run EOD accepts. Note that repeat_ctrl and repeat_state may be NULL if this diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index 89eaf10a..53a003e3 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -2020,12 +2020,13 @@ struct Factory { maskSetBits(limex->accept, acceptMask); maskSetBits(limex->acceptAtEOD, acceptEodMask); - // Transforms the index into the report list into an offset relative to - // the base of the limex. - auto report_offset_fn = [&](NFAAccept a) { + // Transforms the indices (report list, squash mask) into offsets + // relative to the base of the limex. + auto transform_offset_fn = [&](NFAAccept a) { if (!a.single_report) { a.reports = reportListOffset + a.reports * sizeof(ReportID); } + a.squash = squashOffset + a.squash * sizeof(tableRow_t); return a; }; @@ -2036,7 +2037,7 @@ struct Factory { NFAAccept *acceptsTable = (NFAAccept *)(limex_base + acceptsOffset); assert(ISALIGNED(acceptsTable)); transform(accepts.begin(), accepts.end(), acceptsTable, - report_offset_fn); + transform_offset_fn); // Write eod accept table. limex->acceptEodOffset = acceptsEodOffset; @@ -2045,7 +2046,7 @@ struct Factory { NFAAccept *acceptsEodTable = (NFAAccept *)(limex_base + acceptsEodOffset); assert(ISALIGNED(acceptsEodTable)); transform(acceptsEod.begin(), acceptsEod.end(), acceptsEodTable, - report_offset_fn); + transform_offset_fn); // Write squash mask table. limex->squashCount = verify_u32(squash.size()); diff --git a/src/nfa/limex_internal.h b/src/nfa/limex_internal.h index 0d46732f..723803c1 100644 --- a/src/nfa/limex_internal.h +++ b/src/nfa/limex_internal.h @@ -192,7 +192,7 @@ struct NFAAccept { */ u32 reports; - u32 squash; //!< Offset into squash masks, or MO_INVALID_IDX. + u32 squash; //!< Offset (from LimEx) into squash masks, or MO_INVALID_IDX. }; #endif From 04d79629de8b3da96d8b90f760fff433a6ba01d9 Mon Sep 17 00:00:00 2001 From: "Xu, Chi" Date: Thu, 1 Sep 2016 07:48:04 +0800 Subject: [PATCH 021/103] rose: add shufti-based lookaround instructions More lookaround specialisations that use the shufti approach. --- CMakeLists.txt | 1 + src/rose/program_runtime.h | 275 +++++++++++++++++++++++++++++++ src/rose/rose_build_bytecode.cpp | 180 ++++++++++++++++++++ src/rose/rose_build_program.cpp | 54 ++++++ src/rose/rose_build_program.h | 184 +++++++++++++++++++++ src/rose/rose_dump.cpp | 65 ++++++++ src/rose/rose_program.h | 46 ++++++ src/rose/validate_shufti.h | 175 ++++++++++++++++++++ src/util/simd_utils.h | 33 ++++ 9 files changed, 1013 insertions(+) create mode 100644 src/rose/validate_shufti.h diff --git a/CMakeLists.txt b/CMakeLists.txt index de51c016..76d79821 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -555,6 +555,7 @@ set (hs_exec_SRCS src/rose/rose_types.h src/rose/rose_common.h src/rose/validate_mask.h + src/rose/validate_shufti.h src/util/bitutils.h src/util/copybytes.h src/util/exhaust.h diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index 100d9140..57f39bbe 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -45,6 +45,7 @@ #include "rose_program.h" #include "rose_types.h" #include "validate_mask.h" +#include "validate_shufti.h" #include "runtime.h" #include "scratch.h" #include "ue2common.h" @@ -793,6 +794,231 @@ int roseCheckMask32(const struct core_info *ci, const u8 *and_mask, return 0; } +// get 128/256 bits data from history and current buffer. +// return data and valid_data_mask. +static rose_inline +u32 getBufferDataComplex(const struct core_info *ci, const s64a loc, + u8 *data, const u32 data_len) { + assert(data_len == 16 || data_len == 32); + s32 c_shift = 0; // blank bytes after current. + s32 h_shift = 0; // blank bytes before history. + s32 h_len = data_len; // number of bytes from history buffer. + s32 c_len = 0; // number of bytes from current buffer. + if (loc < 0) { + s32 h_offset = 0; // the start offset in history buffer. + if (loc < -(s64a)ci->hlen) { + if (loc + data_len <= -(s64a)ci->hlen) { + DEBUG_PRINTF("all before history\n"); + return 0; + } + h_shift = -(loc + (s64a)ci->hlen); + h_len = data_len - h_shift; + } else { + h_offset = ci->hlen + loc; + } + if (loc + data_len > 0) { + // part in current buffer. + c_len = loc + data_len; + h_len = -(loc + h_shift); + if (c_len > (s64a)ci->len) { + // out of current buffer. + c_shift = c_len - ci->len; + c_len = ci->len; + } + copy_upto_32_bytes(data - loc, ci->buf, c_len); + } + assert(h_shift + h_len + c_len + c_shift == (s32)data_len); + copy_upto_32_bytes(data + h_shift, ci->hbuf + h_offset, h_len); + } else { + if (loc + data_len > (s64a)ci->len) { + if (loc >= (s64a)ci->len) { + DEBUG_PRINTF("all in the future.\n"); + return 0; + } + c_len = ci->len - loc; + c_shift = data_len - c_len; + copy_upto_32_bytes(data, ci->buf + loc, c_len); + } else { + if (data_len == 16) { + storeu128(data, loadu128(ci->buf + loc)); + return 0xffff; + } else { + storeu256(data, loadu256(ci->buf + loc)); + return 0xffffffff; + } + } + } + DEBUG_PRINTF("h_shift %d c_shift %d\n", h_shift, c_shift); + DEBUG_PRINTF("h_len %d c_len %d\n", h_len, c_len); + + if (data_len == 16) { + return (u16)(0xffff << (h_shift + c_shift)) >> c_shift; + } else { + return (~0u) << (h_shift + c_shift) >> c_shift; + } +} + +static rose_inline +m128 getData128(const struct core_info *ci, s64a offset, u16 *valid_data_mask) { + if (offset > 0 && offset + sizeof(m128) <= ci->len) { + *valid_data_mask = 0xffff; + return loadu128(ci->buf + offset); + } + u8 data[sizeof(m128)] ALIGN_DIRECTIVE; + *valid_data_mask = (u16)getBufferDataComplex(ci, offset, data, 16); + return *(m128 *)data; +} + +static rose_inline +m256 getData256(const struct core_info *ci, s64a offset, u32 *valid_data_mask) { + if (offset > 0 && offset + sizeof(m256) <= ci->len) { + *valid_data_mask = ~0u; + return loadu256(ci->buf + offset); + } + u8 data[sizeof(m256)] ALIGN_DIRECTIVE; + *valid_data_mask = getBufferDataComplex(ci, offset, data, 32); + return *(m256 *)data; +} + +static rose_inline +int roseCheckShufti16x8(const struct core_info *ci, const u8 *nib_mask, + const u8 *bucket_select_mask, u32 neg_mask, + s32 checkOffset, u64a end) { + const s64a base_offset = (s64a)end - ci->buf_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + + u16 valid_data_mask = 0; + m128 data = getData128(ci, offset, &valid_data_mask); + if (unlikely(!valid_data_mask)) { + return 1; + } + + m256 nib_mask_m256 = loadu256(nib_mask); + m128 bucket_select_mask_m128 = loadu128(bucket_select_mask); + if (validateShuftiMask16x8(data, nib_mask_m256, + bucket_select_mask_m128, + neg_mask, valid_data_mask)) { + DEBUG_PRINTF("check shufti 16x8 successfully\n"); + return 1; + } else { + return 0; + } +} + +static rose_inline +int roseCheckShufti16x16(const struct core_info *ci, const u8 *hi_mask, + const u8 *lo_mask, const u8 *bucket_select_mask, + u32 neg_mask, s32 checkOffset, u64a end) { + const s64a base_offset = (s64a)end - ci->buf_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + + u16 valid_data_mask = 0; + m128 data = getData128(ci, offset, &valid_data_mask); + if (unlikely(!valid_data_mask)) { + return 1; + } + + m256 data_m256 = set2x128(data); + m256 hi_mask_m256 = loadu256(hi_mask); + m256 lo_mask_m256 = loadu256(lo_mask); + m256 bucket_select_mask_m256 = loadu256(bucket_select_mask); + if (validateShuftiMask16x16(data_m256, hi_mask_m256, lo_mask_m256, + bucket_select_mask_m256, + neg_mask, valid_data_mask)) { + DEBUG_PRINTF("check shufti 16x16 successfully\n"); + return 1; + } else { + return 0; + } +} + +static rose_inline +int roseCheckShufti32x8(const struct core_info *ci, const u8 *hi_mask, + const u8 *lo_mask, const u8 *bucket_select_mask, + u32 neg_mask, s32 checkOffset, u64a end) { + const s64a base_offset = (s64a)end - ci->buf_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + + u32 valid_data_mask = 0; + m256 data = getData256(ci, offset, &valid_data_mask); + if (unlikely(!valid_data_mask)) { + return 1; + } + + m128 hi_mask_m128 = loadu128(hi_mask); + m128 lo_mask_m128 = loadu128(lo_mask); + m256 hi_mask_m256 = set2x128(hi_mask_m128); + m256 lo_mask_m256 = set2x128(lo_mask_m128); + m256 bucket_select_mask_m256 = loadu256(bucket_select_mask); + if (validateShuftiMask32x8(data, hi_mask_m256, lo_mask_m256, + bucket_select_mask_m256, + neg_mask, valid_data_mask)) { + DEBUG_PRINTF("check shufti 32x8 successfully\n"); + return 1; + } else { + return 0; + } +} + +static rose_inline +int roseCheckShufti32x16(const struct core_info *ci, const u8 *hi_mask, + const u8 *lo_mask, const u8 *bucket_select_mask_hi, + const u8 *bucket_select_mask_lo, u32 neg_mask, + s32 checkOffset, u64a end) { + const s64a base_offset = (s64a)end - ci->buf_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + + u32 valid_data_mask = 0; + m256 data = getData256(ci, offset, &valid_data_mask); + if (unlikely(!valid_data_mask)) { + return 1; + } + + m256 hi_mask_1 = loadu2x128(hi_mask); + m256 hi_mask_2 = loadu2x128(hi_mask + 16); + m256 lo_mask_1 = loadu2x128(lo_mask); + m256 lo_mask_2 = loadu2x128(lo_mask + 16); + + m256 bucket_mask_hi = loadu256(bucket_select_mask_hi); + m256 bucket_mask_lo = loadu256(bucket_select_mask_lo); + if (validateShuftiMask32x16(data, hi_mask_1, hi_mask_2, + lo_mask_1, lo_mask_2, bucket_mask_hi, + bucket_mask_lo, neg_mask, valid_data_mask)) { + DEBUG_PRINTF("check shufti 32x16 successfully\n"); + return 1; + } else { + return 0; + } +} + /** * \brief Scan around a literal, checking that that "lookaround" reach masks * are satisfied. @@ -1235,6 +1461,55 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_SHUFTI_16x8) { + const struct core_info *ci = &scratch->core_info; + if (!roseCheckShufti16x8(ci, ri->nib_mask, + ri->bucket_select_mask, + ri->neg_mask, ri->offset, end)) { + assert(ri->fail_jump); + pc += ri-> fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_SHUFTI_32x8) { + const struct core_info *ci = &scratch->core_info; + if (!roseCheckShufti32x8(ci, ri->hi_mask, ri->lo_mask, + ri->bucket_select_mask, + ri->neg_mask, ri->offset, end)) { + assert(ri->fail_jump); + pc += ri-> fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_SHUFTI_16x16) { + const struct core_info *ci = &scratch->core_info; + if (!roseCheckShufti16x16(ci, ri->hi_mask, ri->lo_mask, + ri->bucket_select_mask, + ri->neg_mask, ri->offset, end)) { + assert(ri->fail_jump); + pc += ri-> fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_SHUFTI_32x16) { + const struct core_info *ci = &scratch->core_info; + if (!roseCheckShufti32x16(ci, ri->hi_mask, ri->lo_mask, + ri->bucket_select_mask_hi, + ri->bucket_select_mask_lo, + ri->neg_mask, ri->offset, end)) { + assert(ri->fail_jump); + pc += ri-> fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_INFIX) { if (!roseTestInfix(t, scratch, ri->queue, ri->lag, ri->report, end)) { diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 3356d214..04ab52ff 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -88,6 +88,7 @@ #include "util/verify_types.h" #include +#include #include #include #include @@ -2888,6 +2889,181 @@ bool makeRoleMask32(const vector &look, return true; } +// Sorting by the size of every bucket. +// Used in map, cmpNibble>. +struct cmpNibble { + bool operator()(const u32 data1, const u32 data2) const{ + u32 size1 = popcount32(data1 >> 16) * popcount32(data1 << 16); + u32 size2 = popcount32(data2 >> 16) * popcount32(data2 << 16); + return std::tie(size1, data1) < std::tie(size2, data2); + } +}; + +// Insert all pairs of bucket and offset into buckets. +static really_inline +void getAllBuckets(const vector &look, + map, cmpNibble> &buckets, u32 &neg_mask) { + s32 base_offset = verify_s32(look.front().offset); + for (const auto &entry : look) { + CharReach cr = entry.reach; + // Flip heavy character classes to save buckets. + if (cr.count() > 128 ) { + cr.flip(); + } else { + neg_mask ^= 1 << (entry.offset - base_offset); + } + map lo2hi; + // We treat Ascii Table as a 16x16 grid. + // Push every row in cr into lo2hi and mark the row number. + for (size_t i = cr.find_first(); i != CharReach::npos;) { + u8 it_hi = i >> 4; + u16 low_encode = 0; + while (i != CharReach::npos && (i >> 4) == it_hi) { + low_encode |= 1 << (i & 0xf); + i = cr.find_next(i); + } + lo2hi[low_encode] |= 1 << it_hi; + } + for (const auto &it : lo2hi) { + u32 hi_lo = (it.second << 16) | it.first; + buckets[hi_lo].push_back(entry.offset); + } + } +} + +// Once we have a new bucket, we'll try to combine it with all old buckets. +static really_inline +void nibUpdate(map &nib, u32 hi_lo) { + u16 hi = hi_lo >> 16; + u16 lo = hi_lo & 0xffff; + for (const auto pairs : nib) { + u32 old = pairs.first; + if ((old >> 16) == hi || (old & 0xffff) == lo) { + if (!nib[old | hi_lo]) { + nib[old | hi_lo] = nib[old] | nib[hi_lo]; + } + } + } +} + +static really_inline +void nibMaskUpdate(array &mask, u32 data, u8 bit_index) { + for (u8 index = 0; data > 0; data >>= 1, index++) { + if (data & 1) { + // 0 ~ 7 bucket in first 16 bytes, + // 8 ~ 15 bucket in second 16 bytes. + if (bit_index >= 8) { + mask[index + 16] |= 1 << (bit_index - 8); + } else { + mask[index] |= 1 << bit_index; + } + } + } +} + +static +bool makeRoleShufti(const vector &look, + RoseProgram &program) { + + s32 base_offset = verify_s32(look.front().offset); + if (look.back().offset >= base_offset + 32) { + return false; + } + array hi_mask, lo_mask; + hi_mask.fill(0); + lo_mask.fill(0); + array bucket_select_hi, bucket_select_lo; + bucket_select_hi.fill(0); // will not be used in 16x8 and 32x8. + bucket_select_lo.fill(0); + u8 bit_index = 0; // number of buckets + map nib; // map every bucket to its bucket number. + map, cmpNibble> bucket2offsets; + u32 neg_mask = ~0u; + + getAllBuckets(look, bucket2offsets, neg_mask); + + for (const auto &it : bucket2offsets) { + u32 hi_lo = it.first; + // New bucket. + if (!nib[hi_lo]) { + if (bit_index >= 16) { + return false; + } + nib[hi_lo] = 1 << bit_index; + + nibUpdate(nib, hi_lo); + nibMaskUpdate(hi_mask, hi_lo >> 16, bit_index); + nibMaskUpdate(lo_mask, hi_lo & 0xffff, bit_index); + bit_index++; + } + + DEBUG_PRINTF("hi_lo %x bucket %x\n", hi_lo, nib[hi_lo]); + + // Update bucket_select_mask. + u8 nib_hi = nib[hi_lo] >> 8; + u8 nib_lo = nib[hi_lo] & 0xff; + for (const auto offset : it.second) { + bucket_select_hi[offset - base_offset] |= nib_hi; + bucket_select_lo[offset - base_offset] |= nib_lo; + } + } + + DEBUG_PRINTF("hi_mask %s\n", + convertMaskstoString(hi_mask.data(), 32).c_str()); + DEBUG_PRINTF("lo_mask %s\n", + convertMaskstoString(lo_mask.data(), 32).c_str()); + DEBUG_PRINTF("bucket_select_hi %s\n", + convertMaskstoString(bucket_select_hi.data(), 32).c_str()); + DEBUG_PRINTF("bucket_select_lo %s\n", + convertMaskstoString(bucket_select_lo.data(), 32).c_str()); + + const auto *end_inst = program.end_instruction(); + if (bit_index < 8) { + if (look.back().offset < base_offset + 16) { + neg_mask &= 0xffff; + array nib_mask; + array bucket_select_mask_16; + copy(hi_mask.begin(), hi_mask.begin() + 16, nib_mask.begin()); + copy(lo_mask.begin(), lo_mask.begin() + 16, nib_mask.begin() + 16); + copy(bucket_select_lo.begin(), bucket_select_lo.begin() + 16, + bucket_select_mask_16.begin()); + auto ri = make_unique + (nib_mask, bucket_select_mask_16, + neg_mask, base_offset, end_inst); + program.add_before_end(move(ri)); + } else { + array hi_mask_16; + array lo_mask_16; + copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_16.begin()); + copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_16.begin()); + auto ri = make_unique + (hi_mask_16, lo_mask_16, bucket_select_lo, + neg_mask, base_offset, end_inst); + program.add_before_end(move(ri)); + } + } else { + if (look.back().offset < base_offset + 16) { + neg_mask &= 0xffff; + array bucket_select_mask_32; + copy(bucket_select_lo.begin(), bucket_select_lo.begin() + 16, + bucket_select_mask_32.begin()); + copy(bucket_select_hi.begin(), bucket_select_hi.begin() + 16, + bucket_select_mask_32.begin() + 16); + auto ri = make_unique + (hi_mask, lo_mask, bucket_select_mask_32, + neg_mask, base_offset, end_inst); + program.add_before_end(move(ri)); + } else { + return false; + auto ri = make_unique + (hi_mask, lo_mask, bucket_select_hi, bucket_select_lo, + neg_mask, base_offset, end_inst); + program.add_before_end(move(ri)); + } + } + return true; +} + /** * Builds a lookaround instruction, or an appropriate specialization if one is * available. @@ -2909,6 +3085,10 @@ void makeLookaroundInstruction(build_context &bc, const vector &look, return; } + if (makeRoleShufti(look, program)) { + return; + } + u32 look_idx = addLookaround(bc, look); u32 look_count = verify_u32(look.size()); diff --git a/src/rose/rose_build_program.cpp b/src/rose/rose_build_program.cpp index 168022f3..69ad31a9 100644 --- a/src/rose/rose_build_program.cpp +++ b/src/rose/rose_build_program.cpp @@ -154,6 +154,60 @@ void RoseInstrCheckByte::write(void *dest, RoseEngineBlob &blob, inst->fail_jump = calc_jump(offset_map, this, target); } +void RoseInstrCheckShufti16x8::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + copy(begin(nib_mask), end(nib_mask), inst->nib_mask); + copy(begin(bucket_select_mask), end(bucket_select_mask), + inst->bucket_select_mask); + inst->neg_mask = neg_mask; + inst->offset = offset; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckShufti32x8::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + copy(begin(hi_mask), end(hi_mask), inst->hi_mask); + copy(begin(lo_mask), end(lo_mask), inst->lo_mask); + copy(begin(bucket_select_mask), end(bucket_select_mask), + inst->bucket_select_mask); + + inst->neg_mask = neg_mask; + inst->offset = offset; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckShufti16x16::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + copy(begin(hi_mask), end(hi_mask), inst->hi_mask); + copy(begin(lo_mask), end(lo_mask), inst->lo_mask); + copy(begin(bucket_select_mask), end(bucket_select_mask), + inst->bucket_select_mask); + inst->neg_mask = neg_mask; + inst->offset = offset; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckShufti32x16::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + copy(begin(hi_mask), end(hi_mask), inst->hi_mask); + copy(begin(lo_mask), end(lo_mask), inst->lo_mask); + copy(begin(bucket_select_mask_hi), end(bucket_select_mask_hi), + inst->bucket_select_mask_hi); + copy(begin(bucket_select_mask_lo), end(bucket_select_mask_lo), + inst->bucket_select_mask_lo); + inst->neg_mask = neg_mask; + inst->offset = offset; + inst->fail_jump = calc_jump(offset_map, this, target); +} + void RoseInstrCheckInfix::write(void *dest, RoseEngineBlob &blob, const OffsetMap &offset_map) const { RoseInstrBase::write(dest, blob, offset_map); diff --git a/src/rose/rose_build_program.h b/src/rose/rose_build_program.h index 27aeffbe..309a1b3e 100644 --- a/src/rose/rose_build_program.h +++ b/src/rose/rose_build_program.h @@ -524,6 +524,190 @@ public: } }; +class RoseInstrCheckShufti16x8 + : public RoseInstrBaseOneTarget { +public: + std::array nib_mask; + std::array bucket_select_mask; + u32 neg_mask; + s32 offset; + const RoseInstruction *target; + + RoseInstrCheckShufti16x8(std::array nib_mask_in, + std::array bucket_select_mask_in, + u32 neg_mask_in, s32 offset_in, + const RoseInstruction *target_in) + : nib_mask(move(nib_mask_in)), + bucket_select_mask(move(bucket_select_mask_in)), + neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} + + bool operator==(const RoseInstrCheckShufti16x8 &ri) const { + return nib_mask == ri.nib_mask && + bucket_select_mask == ri.bucket_select_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), nib_mask, + bucket_select_mask, neg_mask, offset); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckShufti16x8 &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return nib_mask == ri.nib_mask && + bucket_select_mask == ri.bucket_select_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckShufti32x8 + : public RoseInstrBaseOneTarget { +public: + std::array hi_mask; + std::array lo_mask; + std::array bucket_select_mask; + u32 neg_mask; + s32 offset; + const RoseInstruction *target; + + RoseInstrCheckShufti32x8(std::array hi_mask_in, + std::array lo_mask_in, + std::array bucket_select_mask_in, + u32 neg_mask_in, s32 offset_in, + const RoseInstruction *target_in) + : hi_mask(move(hi_mask_in)), lo_mask(move(lo_mask_in)), + bucket_select_mask(move(bucket_select_mask_in)), + neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} + + bool operator==(const RoseInstrCheckShufti32x8 &ri) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask == ri.bucket_select_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), hi_mask, lo_mask, + bucket_select_mask, neg_mask, offset); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckShufti32x8 &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask == ri.bucket_select_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckShufti16x16 + : public RoseInstrBaseOneTarget { +public: + std::array hi_mask; + std::array lo_mask; + std::array bucket_select_mask; + u32 neg_mask; + s32 offset; + const RoseInstruction *target; + + RoseInstrCheckShufti16x16(std::array hi_mask_in, + std::array lo_mask_in, + std::array bucket_select_mask_in, + u32 neg_mask_in, s32 offset_in, + const RoseInstruction *target_in) + : hi_mask(move(hi_mask_in)), lo_mask(move(lo_mask_in)), + bucket_select_mask(move(bucket_select_mask_in)), + neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} + + bool operator==(const RoseInstrCheckShufti16x16 &ri) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask == ri.bucket_select_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), hi_mask, lo_mask, + bucket_select_mask, neg_mask, offset); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckShufti16x16 &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask == ri.bucket_select_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckShufti32x16 + : public RoseInstrBaseOneTarget { +public: + std::array hi_mask; + std::array lo_mask; + std::array bucket_select_mask_hi; + std::array bucket_select_mask_lo; + u32 neg_mask; + s32 offset; + const RoseInstruction *target; + + RoseInstrCheckShufti32x16(std::array hi_mask_in, + std::array lo_mask_in, + std::array bucket_select_mask_hi_in, + std::array bucket_select_mask_lo_in, + u32 neg_mask_in, s32 offset_in, + const RoseInstruction *target_in) + : hi_mask(move(hi_mask_in)), lo_mask(move(lo_mask_in)), + bucket_select_mask_hi(move(bucket_select_mask_hi_in)), + bucket_select_mask_lo(move(bucket_select_mask_lo_in)), + neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} + + bool operator==(const RoseInstrCheckShufti32x16 &ri) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask_hi == ri.bucket_select_mask_hi && + bucket_select_mask_lo == ri.bucket_select_mask_lo && + neg_mask == ri.neg_mask && offset == ri.offset && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), hi_mask, lo_mask, + bucket_select_mask_hi, bucket_select_mask_lo, + neg_mask, offset); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckShufti32x16 &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask_hi == ri.bucket_select_mask_hi && + bucket_select_mask_lo == ri.bucket_select_mask_lo && + neg_mask == ri.neg_mask && offset == ri.offset && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + class RoseInstrCheckInfix : public RoseInstrBaseOneTargetnib_mask, sizeof(ri->nib_mask)) + << endl; + os << " bucket_select_mask " + << dumpStrMask(ri->bucket_select_mask, + sizeof(ri->bucket_select_mask)) + << endl; + os << " offset " << ri->offset << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_SHUFTI_32x8) { + os << " hi_mask " + << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask)) + << endl; + os << " lo_mask " + << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask)) + << endl; + os << " bucket_select_mask " + << dumpStrMask(ri->bucket_select_mask, + sizeof(ri->bucket_select_mask)) + << endl; + os << " offset " << ri->offset << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_SHUFTI_16x16) { + os << " hi_mask " + << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask)) + << endl; + os << " lo_mask " + << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask)) + << endl; + os << " bucket_select_mask " + << dumpStrMask(ri->bucket_select_mask, + sizeof(ri->bucket_select_mask)) + << endl; + os << " offset " << ri->offset << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_SHUFTI_32x16) { + os << " hi_mask " + << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask)) + << endl; + os << " lo_mask " + << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask)) + << endl; + os << " bucket_select_mask_hi " + << dumpStrMask(ri->bucket_select_mask_hi, + sizeof(ri->bucket_select_mask_hi)) + << endl; + os << " bucket_select_mask_lo " + << dumpStrMask(ri->bucket_select_mask_lo, + sizeof(ri->bucket_select_mask_lo)) + << endl; + os << " offset " << ri->offset << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_INFIX) { os << " queue " << ri->queue << endl; os << " lag " << ri->lag << endl; diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index ba3e586b..44d5d524 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -52,6 +52,10 @@ enum RoseInstructionCode { ROSE_INSTR_CHECK_MASK, //!< 8-bytes mask check. ROSE_INSTR_CHECK_MASK_32, //!< 32-bytes and/cmp/neg mask check. ROSE_INSTR_CHECK_BYTE, //!< Single Byte check. + ROSE_INSTR_CHECK_SHUFTI_16x8, //!< Check 16-byte data by 8-bucket shufti. + ROSE_INSTR_CHECK_SHUFTI_32x8, //!< Check 32-byte data by 8-bucket shufti. + ROSE_INSTR_CHECK_SHUFTI_16x16, //!< Check 16-byte data by 16-bucket shufti. + ROSE_INSTR_CHECK_SHUFTI_32x16, //!< Check 32-byte data by 16-bucket shufti. ROSE_INSTR_CHECK_INFIX, //!< Infix engine must be in accept state. ROSE_INSTR_CHECK_PREFIX, //!< Prefix engine must be in accept state. ROSE_INSTR_PUSH_DELAYED, //!< Push delayed literal matches. @@ -184,6 +188,48 @@ struct ROSE_STRUCT_CHECK_BYTE { u32 fail_jump; //!< Jump forward this many bytes on failure. }; +// Since m128 and m256 could be missaligned in the bytecode, +// we'll use u8[16] and u8[32] instead in all rose_check_shufti structures. +struct ROSE_STRUCT_CHECK_SHUFTI_16x8 { + u8 code; //!< From enum RoseInstructionCode. + u8 nib_mask[32]; //!< High 16 and low 16 bits nibble mask in shufti. + u8 bucket_select_mask[16]; //!< Mask for bucket assigning. + u32 neg_mask; //!< Negation mask in low 16 bits. + s32 offset; //!< Relative offset of the first byte. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_SHUFTI_32x8 { + u8 code; //!< From enum RoseInstructionCode. + u8 hi_mask[16]; //!< High nibble mask in shufti. + u8 lo_mask[16]; //!< Low nibble mask in shufti. + u8 bucket_select_mask[32]; //!< Mask for bucket assigning. + u32 neg_mask; //!< 32 bits negation mask. + s32 offset; //!< Relative offset of the first byte. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_SHUFTI_16x16 { + u8 code; //!< From enum RoseInstructionCode. + u8 hi_mask[32]; //!< High nibble mask in shufti. + u8 lo_mask[32]; //!< Low nibble mask in shufti. + u8 bucket_select_mask[32]; //!< Mask for bucket assigning. + u32 neg_mask; //!< Negation mask in low 16 bits. + s32 offset; //!< Relative offset of the first byte. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_SHUFTI_32x16 { + u8 code; //!< From enum RoseInstructionCode. + u8 hi_mask[32]; //!< High nibble mask in shufti. + u8 lo_mask[32]; //!< Low nibble mask in shufti. + u8 bucket_select_mask_hi[32]; //!< Bucket mask for high 8 buckets. + u8 bucket_select_mask_lo[32]; //!< Bucket mask for low 8 buckets. + u32 neg_mask; //!< 32 bits negation mask. + s32 offset; //!< Relative offset of the first byte. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + struct ROSE_STRUCT_CHECK_INFIX { u8 code; //!< From enum RoseInstructionCode. u32 queue; //!< Queue of leftfix to check. diff --git a/src/rose/validate_shufti.h b/src/rose/validate_shufti.h new file mode 100644 index 00000000..49d2c2fe --- /dev/null +++ b/src/rose/validate_shufti.h @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef VALIDATE_SHUFTI_H +#define VALIDATE_SHUFTI_H + +#include "ue2common.h" +#include "util/simd_utils.h" + +#if defined(DEBUG) +static +void dumpMask(const void *mask, int len) { + const u8 *c = (const u8 *)mask; + for (int i = 0; i < len; i++) { + printf("%02x", c[i]); + } + printf("\n"); +} +#endif + +static really_inline +int validateShuftiMask16x16(const m256 data, const m256 hi_mask, + const m256 lo_mask, const m256 and_mask, + const u32 neg_mask, const u16 valid_data_mask) { + m256 low4bits = set32x8(0xf); + m256 c_lo = vpshufb(lo_mask, and256(data, low4bits)); + m256 c_hi = vpshufb(hi_mask, rshift64_m256(andnot256(low4bits, data), 4)); + m256 t = and256(c_lo, c_hi); + u32 nresult = movemask256(eq256(and256(t, and_mask), zeroes256())); +#ifdef DEBUG + DEBUG_PRINTF("data\n"); + dumpMask(&data, 32); + DEBUG_PRINTF("hi_mask\n"); + dumpMask(&hi_mask, 32); + DEBUG_PRINTF("lo_mask\n"); + dumpMask(&lo_mask, 32); + DEBUG_PRINTF("c_lo\n"); + dumpMask(&c_lo, 32); + DEBUG_PRINTF("c_hi\n"); + dumpMask(&c_hi, 32); + DEBUG_PRINTF("and_mask\n"); + dumpMask(&and_mask, 32); + DEBUG_PRINTF("nresult %x\n", nresult); + DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); +#endif + u32 cmp_result = (((nresult >> 16) & nresult) ^ neg_mask) & valid_data_mask; + return !cmp_result; +} + +static really_inline +int validateShuftiMask16x8(const m128 data, const m256 nib_mask, + const m128 and_mask, const u32 neg_mask, + const u16 valid_data_mask) { + m256 data_m256 = combine2x128(rshift64_m128(data, 4), data); + m256 low4bits = set32x8(0xf); + m256 c_nib = vpshufb(nib_mask, and256(data_m256, low4bits)); + m128 t = and128(movdq_hi(c_nib), movdq_lo(c_nib)); + m128 nresult = eq128(and128(t, and_mask), zeroes128()); +#ifdef DEBUG + DEBUG_PRINTF("data\n"); + dumpMask(&data_m256, 32); + DEBUG_PRINTF("nib_mask\n"); + dumpMask(&nib_mask, 32); + DEBUG_PRINTF("c_nib\n"); + dumpMask(&c_nib, 32); + DEBUG_PRINTF("nresult\n"); + dumpMask(&nresult, 16); + DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); +#endif + u32 cmp_result = (movemask128(nresult) ^ neg_mask) & valid_data_mask; + return !cmp_result; +} + +static really_inline +int validateShuftiMask32x8(const m256 data, const m256 hi_mask, + const m256 lo_mask, const m256 and_mask, + const u32 neg_mask, const u32 valid_data_mask) { + m256 low4bits = set32x8(0xf); + m256 c_lo = vpshufb(lo_mask, and256(data, low4bits)); + m256 c_hi = vpshufb(hi_mask, rshift64_m256(andnot256(low4bits, data), 4)); + m256 t = and256(c_lo, c_hi); + m256 nresult = eq256(and256(t, and_mask), zeroes256()); +#ifdef DEBUG + DEBUG_PRINTF("data\n"); + dumpMask(&data, 32); + DEBUG_PRINTF("hi_mask\n"); + dumpMask(&hi_mask, 32); + DEBUG_PRINTF("lo_mask\n"); + dumpMask(&lo_mask, 32); + DEBUG_PRINTF("c_lo\n"); + dumpMask(&c_lo, 32); + DEBUG_PRINTF("c_hi\n"); + dumpMask(&c_hi, 32); + DEBUG_PRINTF("nresult\n"); + dumpMask(&nresult, 32); + DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); +#endif + u32 cmp_result = (movemask256(nresult) ^ neg_mask) & valid_data_mask; + return !cmp_result; +} + +static really_inline +int validateShuftiMask32x16(const m256 data, + const m256 hi_mask_1, const m256 hi_mask_2, + const m256 lo_mask_1, const m256 lo_mask_2, + const m256 bucket_mask_hi, + const m256 bucket_mask_lo, const u32 neg_mask, + const u32 valid_data_mask) { + m256 low4bits = set32x8(0xf); + m256 data_lo = and256(data, low4bits); + m256 data_hi = and256(rshift64_m256(data, 4), low4bits); + m256 c_lo_1 = vpshufb(lo_mask_1, data_lo); + m256 c_lo_2 = vpshufb(lo_mask_2, data_lo); + m256 c_hi_1 = vpshufb(hi_mask_1, data_hi); + m256 c_hi_2 = vpshufb(hi_mask_2, data_hi); + m256 t1 = and256(c_lo_1, c_hi_1); + m256 t2 = and256(c_lo_2, c_hi_2); + m256 result = or256(and256(t1, bucket_mask_lo), and256(t2, bucket_mask_hi)); + u32 nresult = movemask256(eq256(result, zeroes256())); +#ifdef DEBUG + DEBUG_PRINTF("data\n"); + dumpMask(&data, 32); + DEBUG_PRINTF("data_lo\n"); + dumpMask(&data_lo, 32); + DEBUG_PRINTF("data_hi\n"); + dumpMask(&data_hi, 32); + DEBUG_PRINTF("hi_mask_1\n"); + dumpMask(&hi_mask_1, 16); + DEBUG_PRINTF("hi_mask_2\n"); + dumpMask(&hi_mask_2, 16); + DEBUG_PRINTF("lo_mask_1\n"); + dumpMask(&lo_mask_1, 16); + DEBUG_PRINTF("lo_mask_2\n"); + dumpMask(&lo_mask_2, 16); + DEBUG_PRINTF("c_lo_1\n"); + dumpMask(&c_lo_1, 32); + DEBUG_PRINTF("c_lo_2\n"); + dumpMask(&c_lo_2, 32); + DEBUG_PRINTF("c_hi_1\n"); + dumpMask(&c_hi_1, 32); + DEBUG_PRINTF("c_hi_2\n"); + dumpMask(&c_hi_2, 32); + DEBUG_PRINTF("result\n"); + dumpMask(&result, 32); + DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); +#endif + u32 cmp_result = (nresult ^ neg_mask) & valid_data_mask; + return !cmp_result; +} +#endif diff --git a/src/util/simd_utils.h b/src/util/simd_utils.h index b7cb1c0f..4bb055df 100644 --- a/src/util/simd_utils.h +++ b/src/util/simd_utils.h @@ -384,6 +384,11 @@ u32 movemask256(m256 a) { return lo_mask | (hi_mask << 16); } +static really_inline +m256 set2x128(m128 a) { + m256 rv = {a, a}; + return rv; +} #endif static really_inline m256 zeroes256(void) { @@ -534,6 +539,10 @@ static really_inline m256 load2x128(const void *ptr) { #endif } +static really_inline m256 loadu2x128(const void *ptr) { + return set2x128(loadu128(ptr)); +} + // aligned store static really_inline void store256(void *ptr, m256 a) { assert(ISALIGNED_N(ptr, alignof(m256))); @@ -632,6 +641,22 @@ char testbit256(m256 val, unsigned int n) { return testbit128(sub, n); } +static really_really_inline +m128 movdq_hi(m256 x) { + return x.hi; +} + +static really_really_inline +m128 movdq_lo(m256 x) { + return x.lo; +} + +static really_inline +m256 combine2x128(m128 a, m128 b) { + m256 rv = {a, b}; + return rv; +} + #else // AVX2 // switches on bit N in the given vector. @@ -676,6 +701,14 @@ m128 movdq_lo(m256 x) { #define interleave256lo(a, b) _mm256_unpacklo_epi8(a, b); #define vpalignr(r, l, offset) _mm256_alignr_epi8(r, l, offset) +static really_inline +m256 combine2x128(m128 hi, m128 lo) { +#if defined(_mm256_set_m128i) + return _mm256_set_m128i(hi, lo); +#else + return insert128to256(cast128to256(hi), lo, 1); +#endif +} #endif //AVX2 /**** From c3b5efefb6464cf1fae1ddc739cdea7d0481f520 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Mon, 22 Aug 2016 15:59:32 +1000 Subject: [PATCH 022/103] Add short avx2 shufti form --- src/nfa/shufti.c | 147 +++++++++++++++++++++++++++++++++++---- unit/internal/shufti.cpp | 29 ++++++-- 2 files changed, 159 insertions(+), 17 deletions(-) diff --git a/src/nfa/shufti.c b/src/nfa/shufti.c index 903e04da..57890478 100644 --- a/src/nfa/shufti.c +++ b/src/nfa/shufti.c @@ -242,6 +242,7 @@ const u8 *fwdBlock2(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo, m128 mask2_hi, #endif u32 z = movemask128(eq128(t2, ones)); + DEBUG_PRINTF(" z: 0x%08x\n", z); return firstMatch(buf, z); } @@ -302,6 +303,40 @@ const u8 *firstMatch(const u8 *buf, u32 z) { } } +static really_inline +const u8 *fwdBlockShort(m256 mask, m128 chars, const u8 *buf, + const m256 low4bits) { + // do the hi and lo shuffles in the one avx register + m256 c = set2x128(chars); + c = _mm256_srlv_epi64(c, _mm256_set_epi64x(0, 0, 4, 4)); + c = and256(c, low4bits); + m256 c_shuf = vpshufb(mask, c); + m128 t = and128(movdq_hi(c_shuf), cast256to128(c_shuf)); + // the upper 32-bits can't match + u32 z = 0xffff0000U | movemask128(eq128(t, zeroes128())); + + return firstMatch(buf, z); +} + +static really_inline +const u8 *shuftiFwdShort(m128 mask_lo, m128 mask_hi, const u8 *buf, + const u8 *buf_end, const m256 low4bits) { + // run shufti over two overlapping 16-byte unaligned reads + const m256 mask = combine2x128(mask_hi, mask_lo); + m128 chars = loadu128(buf); + const u8 *rv = fwdBlockShort(mask, chars, buf, low4bits); + if (rv) { + return rv; + } + + chars = loadu128(buf_end - 16); + rv = fwdBlockShort(mask, chars, buf_end - 16, low4bits); + if (rv) { + return rv; + } + return buf_end; +} + static really_inline const u8 *fwdBlock(m256 mask_lo, m256 mask_hi, m256 chars, const u8 *buf, const m256 low4bits, const m256 zeroes) { @@ -315,15 +350,21 @@ const u8 *shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, const u8 *buf_end) { assert(buf && buf_end); assert(buf < buf_end); + DEBUG_PRINTF("shufti %p len %zu\n", buf, buf_end - buf); // Slow path for small cases. - if (buf_end - buf < 32) { + if (buf_end - buf < 16) { return shuftiFwdSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi, buf, buf_end); } - const m256 zeroes = zeroes256(); const m256 low4bits = set32x8(0xf); + + if (buf_end - buf <= 32) { + return shuftiFwdShort(mask_lo, mask_hi, buf, buf_end, low4bits); + } + + const m256 zeroes = zeroes256(); const m256 wide_mask_lo = set2x128(mask_lo); const m256 wide_mask_hi = set2x128(mask_hi); const u8 *rv; @@ -365,12 +406,7 @@ const u8 *shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, } static really_inline -const u8 *lastMatch(const u8 *buf, m256 t, m256 compare) { -#ifdef DEBUG - DEBUG_PRINTF("confirming match in:"); dumpMsk256(t); printf("\n"); -#endif - - u32 z = movemask256(eq256(t, compare)); +const u8 *lastMatch(const u8 *buf, u32 z) { if (unlikely(z != 0xffffffff)) { u32 pos = clz32(~z); DEBUG_PRINTF("buf=%p, pos=%u\n", buf, pos); @@ -395,9 +431,46 @@ const u8 *revBlock(m256 mask_lo, m256 mask_hi, m256 chars, const u8 *buf, DEBUG_PRINTF(" t: "); dumpMsk256(t); printf("\n"); #endif - return lastMatch(buf, t, zeroes); + u32 z = movemask256(eq256(t, zeroes)); + return lastMatch(buf, z); } +static really_inline +const u8 *revBlockShort(m256 mask, m128 chars, const u8 *buf, + const m256 low4bits) { + // do the hi and lo shuffles in the one avx register + m256 c = set2x128(chars); + c = _mm256_srlv_epi64(c, _mm256_set_epi64x(0, 0, 4, 4)); + c = and256(c, low4bits); + m256 c_shuf = vpshufb(mask, c); + m128 t = and128(movdq_hi(c_shuf), cast256to128(c_shuf)); + // the upper 32-bits can't match + u32 z = 0xffff0000U | movemask128(eq128(t, zeroes128())); + + return lastMatch(buf, z); +} + +static really_inline +const u8 *shuftiRevShort(m128 mask_lo, m128 mask_hi, const u8 *buf, + const u8 *buf_end, const m256 low4bits) { + // run shufti over two overlapping 16-byte unaligned reads + const m256 mask = combine2x128(mask_hi, mask_lo); + + m128 chars = loadu128(buf_end - 16); + const u8 *rv = revBlockShort(mask, chars, buf_end - 16, low4bits); + if (rv) { + return rv; + } + + chars = loadu128(buf); + rv = revBlockShort(mask, chars, buf, low4bits); + if (rv) { + return rv; + } + return buf - 1; +} + + /* takes 128 bit masks, but operates on 256 bits of data */ const u8 *rshuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, const u8 *buf_end) { @@ -405,13 +478,18 @@ const u8 *rshuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, assert(buf < buf_end); // Slow path for small cases. - if (buf_end - buf < 64) { + if (buf_end - buf < 16) { return shuftiRevSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi, buf, buf_end); } - const m256 zeroes = zeroes256(); const m256 low4bits = set32x8(0xf); + + if (buf_end - buf <= 32) { + return shuftiRevShort(mask_lo, mask_hi, buf, buf_end, low4bits); + } + + const m256 zeroes = zeroes256(); const m256 wide_mask_lo = set2x128(mask_lo); const m256 wide_mask_hi = set2x128(mask_hi); const u8 *rv; @@ -482,14 +560,57 @@ const u8 *fwdBlock2(m256 mask1_lo, m256 mask1_hi, m256 mask2_lo, m256 mask2_hi, return firstMatch(buf, z); } +static really_inline +const u8 *fwdBlockShort2(m256 mask1, m256 mask2, m128 chars, const u8 *buf, + const m256 low4bits) { + // do the hi and lo shuffles in the one avx register + m256 c = set2x128(chars); + c = _mm256_srlv_epi64(c, _mm256_set_epi64x(0, 0, 4, 4)); + c = and256(c, low4bits); + m256 c_shuf1 = vpshufb(mask1, c); + m256 c_shuf2 = rshift128_m256(vpshufb(mask2, c), 1); + m256 t0 = or256(c_shuf1, c_shuf2); + m128 t = or128(movdq_hi(t0), cast256to128(t0)); + // the upper 32-bits can't match + u32 z = 0xffff0000U | movemask128(eq128(t, ones128())); + + return firstMatch(buf, z); +} + +static really_inline +const u8 *shuftiDoubleShort(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo, + m128 mask2_hi, const u8 *buf, const u8 *buf_end) { + DEBUG_PRINTF("buf %p len %zu\n", buf, buf_end - buf); + const m256 low4bits = set32x8(0xf); + // run shufti over two overlapping 16-byte unaligned reads + const m256 mask1 = combine2x128(mask1_hi, mask1_lo); + const m256 mask2 = combine2x128(mask2_hi, mask2_lo); + m128 chars = loadu128(buf); + const u8 *rv = fwdBlockShort2(mask1, mask2, chars, buf, low4bits); + if (rv) { + return rv; + } + + chars = loadu128(buf_end - 16); + rv = fwdBlockShort2(mask1, mask2, chars, buf_end - 16, low4bits); + if (rv) { + return rv; + } + return buf_end; +} + /* takes 128 bit masks, but operates on 256 bits of data */ const u8 *shuftiDoubleExec(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo, m128 mask2_hi, const u8 *buf, const u8 *buf_end) { + /* we should always have at least 16 bytes */ + assert(buf_end - buf >= 16); + if (buf_end - buf < 32) { - // not worth it - return buf; + return shuftiDoubleShort(mask1_lo, mask1_hi, mask2_lo, mask2_hi, buf, + buf_end); } + const m256 ones = ones256(); const m256 low4bits = set32x8(0xf); const m256 wide_mask1_lo = set2x128(mask1_lo); diff --git a/unit/internal/shufti.cpp b/unit/internal/shufti.cpp index 81495a9c..67ceadc5 100644 --- a/unit/internal/shufti.cpp +++ b/unit/internal/shufti.cpp @@ -118,7 +118,7 @@ TEST(Shufti, ExecNoMatch1) { char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; - for (size_t i = 0; i < 16; i++) { + for (size_t i = 0; i < 32; i++) { const u8 *rv = shuftiExec(lo, hi, (u8 *)t1 + i, (u8 *)t1 + strlen(t1)); ASSERT_LE(((size_t)t1 + strlen(t1)) & ~0xf, (size_t)rv); @@ -172,12 +172,12 @@ TEST(Shufti, ExecMatch1) { ASSERT_NE(-1, ret); /* 0123456789012345678901234567890 */ - char t1[] = "bbbbbbbbbbbbbbbbbabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbabbbbbbbbbbbb"; + char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbabbbbbbbbbbbbbbabbbbbbbbbbbb"; - for (size_t i = 0; i < 16; i++) { + for (size_t i = 0; i < 32; i++) { const u8 *rv = shuftiExec(lo, hi, (u8 *)t1 + i, (u8 *)t1 + strlen(t1)); - ASSERT_EQ((size_t)t1 + 17, (size_t)rv); + ASSERT_EQ((size_t)t1 + 33, (size_t)rv); } } @@ -601,6 +601,27 @@ TEST(DoubleShufti, ExecNoMatch3b) { } } +TEST(DoubleShufti, ExecMatchShort1) { + m128 lo1, hi1, lo2, hi2; + + flat_set> lits; + + lits.insert(make_pair('a','b')); + + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(ret); + + /* 0123456789012345678901234567890 */ + char t1[] = "bbbbbbbbbbbbbbbbbabbbbbbbbbbbbbbbbb"; + + for (size_t i = 0; i < 16; i++) { + const u8 *rv = shuftiDoubleExec(lo1, hi1, lo2, hi2, + (u8 *)t1 + i, (u8 *)t1 + strlen(t1)); + + ASSERT_EQ((size_t)t1 + 17, (size_t)rv); + } +} + TEST(DoubleShufti, ExecMatch1) { m128 lo1, hi1, lo2, hi2; From efa3299774411bc0c5f9f10b06a7bdf60c9b3cd5 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Thu, 1 Sep 2016 09:55:53 +1000 Subject: [PATCH 023/103] remove code preventing firing callbacks in the history buffer --- src/nfa/gough.c | 21 ++------------------- src/nfa/mcclellan.c | 20 ++------------------ src/nfa/sheng.c | 4 +--- 3 files changed, 5 insertions(+), 40 deletions(-) diff --git a/src/nfa/gough.c b/src/nfa/gough.c index 520aca93..44acd4c2 100644 --- a/src/nfa/gough.c +++ b/src/nfa/gough.c @@ -655,12 +655,6 @@ char nfaExecGough8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, const u8 *cur_buf = sp < 0 ? hend : buffer; - char report = 1; - if (mode == CALLBACK_OUTPUT) { - /* we are starting inside the history buffer: matches are suppressed */ - report = !(sp < 0); - } - if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) { /* this is as far as we go */ q->cur--; @@ -691,8 +685,7 @@ char nfaExecGough8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, const u8 *final_look; if (goughExec8_i_ni(m, som, &s, cur_buf + sp, local_ep - sp, - offset + sp, cb, context, &final_look, - report ? mode : NO_MATCHES) + offset + sp, cb, context, &final_look, mode) == MO_HALT_MATCHING) { *(u8 *)q->state = 0; return 0; @@ -724,7 +717,6 @@ char nfaExecGough8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, if (sp == 0) { cur_buf = buffer; - report = 1; } if (sp != ep) { @@ -789,12 +781,6 @@ char nfaExecGough16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, const u8 *cur_buf = sp < 0 ? hend : buffer; - char report = 1; - if (mode == CALLBACK_OUTPUT) { - /* we are starting inside the history buffer: matches are suppressed */ - report = !(sp < 0); - } - assert(q->cur); if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) { /* this is as far as we go */ @@ -822,10 +808,8 @@ char nfaExecGough16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, /* do main buffer region */ const u8 *final_look; if (goughExec16_i_ni(m, som, &s, cur_buf + sp, local_ep - sp, - offset + sp, cb, context, &final_look, - report ? mode : NO_MATCHES) + offset + sp, cb, context, &final_look, mode) == MO_HALT_MATCHING) { - assert(report); *(u16 *)q->state = 0; return 0; } @@ -856,7 +840,6 @@ char nfaExecGough16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, if (sp == 0) { cur_buf = buffer; - report = 1; } if (sp != ep) { diff --git a/src/nfa/mcclellan.c b/src/nfa/mcclellan.c index 88da27c0..992f78e2 100644 --- a/src/nfa/mcclellan.c +++ b/src/nfa/mcclellan.c @@ -496,12 +496,6 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, const u8 *cur_buf = sp < 0 ? hend : buffer; - char report = 1; - if (mode == CALLBACK_OUTPUT) { - /* we are starting inside the history buffer: matches are suppressed */ - report = !(sp < 0); - } - assert(q->cur); if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) { DEBUG_PRINTF("this is as far as we go\n"); @@ -530,9 +524,8 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, const u8 *final_look; if (mcclellanExec16_i_ni(m, &s, cur_buf + sp, local_ep - sp, offset + sp, cb, context, single, &final_look, - report ? mode : NO_MATCHES) + mode) == MO_HALT_MATCHING) { - assert(report); *(u16 *)q->state = 0; return 0; } @@ -563,7 +556,6 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, if (sp == 0) { cur_buf = buffer; - report = 1; } if (sp != ep) { @@ -653,12 +645,6 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, const u8 *cur_buf = sp < 0 ? hend : buffer; - char report = 1; - if (mode == CALLBACK_OUTPUT) { - /* we are starting inside the history buffer: matches are suppressed */ - report = !(sp < 0); - } - if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) { DEBUG_PRINTF("this is as far as we go\n"); q->cur--; @@ -687,8 +673,7 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, const u8 *final_look; if (mcclellanExec8_i_ni(m, &s, cur_buf + sp, local_ep - sp, offset + sp, - cb, context, single, &final_look, - report ? mode : NO_MATCHES) + cb, context, single, &final_look, mode) == MO_HALT_MATCHING) { *(u8 *)q->state = 0; return 0; @@ -720,7 +705,6 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, if (sp == 0) { cur_buf = buffer; - report = 1; } if (sp != ep) { diff --git a/src/nfa/sheng.c b/src/nfa/sheng.c index bbbf1f20..a5f96805 100644 --- a/src/nfa/sheng.c +++ b/src/nfa/sheng.c @@ -405,9 +405,7 @@ char runSheng(const struct sheng *sh, struct mq *q, s64a b_end, const u8 * scanned = cur_buf; char rv; - /* if we're in nomatch mode or if we're scanning history buffer */ - if (mode == NO_MATCHES || - (cur_start < 0 && mode == CALLBACK_OUTPUT)) { + if (mode == NO_MATCHES) { runShengNm(sh, q->cb, q->context, q->offset, &cached_accept_state, &cached_accept_id, cur_buf, cur_buf + cur_start, cur_buf + cur_end, can_die, From 385f71b44e6f1704b6bc828128f811295ed474d9 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 7 Sep 2016 10:33:44 +1000 Subject: [PATCH 024/103] rose: enable generation of shufti32x16 case --- src/rose/rose_build_bytecode.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 04ab52ff..7061cb32 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -3054,7 +3054,6 @@ bool makeRoleShufti(const vector &look, neg_mask, base_offset, end_inst); program.add_before_end(move(ri)); } else { - return false; auto ri = make_unique (hi_mask, lo_mask, bucket_select_hi, bucket_select_lo, neg_mask, base_offset, end_inst); From 997787bd4b71bde59b5b7b113b80bde3f02ae3b0 Mon Sep 17 00:00:00 2001 From: "Xu, Chi" Date: Mon, 5 Sep 2016 12:19:15 +0800 Subject: [PATCH 025/103] rose: add CHECK_SINGLE_LOOKAROUND instruction This specialisation is cheaper than the shufti-based variants, so we prefer it for single character class tests. --- src/rose/program_runtime.h | 51 ++++++++++++++++++++++++++++++++ src/rose/rose_build_bytecode.cpp | 9 ++++++ src/rose/rose_build_program.cpp | 9 ++++++ src/rose/rose_build_program.h | 33 +++++++++++++++++++++ src/rose/rose_dump.cpp | 14 +++++++++ src/rose/rose_program.h | 8 +++++ 6 files changed, 124 insertions(+) diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index 57f39bbe..735f8cdb 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -1019,6 +1019,46 @@ int roseCheckShufti32x16(const struct core_info *ci, const u8 *hi_mask, } } +static rose_inline +int roseCheckSingleLookaround(const struct RoseEngine *t, + const struct hs_scratch *scratch, + s8 checkOffset, u32 lookaroundIndex, u64a end) { + assert(lookaroundIndex != MO_INVALID_IDX); + const struct core_info *ci = &scratch->core_info; + DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end, + ci->buf_offset, ci->buf_offset + ci->len); + + const s64a base_offset = end - ci->buf_offset; + const s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("base_offset=%lld\n", base_offset); + DEBUG_PRINTF("checkOffset=%d offset=%lld\n", checkOffset, offset); + + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + + const u8 *reach_base = (const u8 *)t + t->lookaroundReachOffset; + const u8 *reach = reach_base + lookaroundIndex * REACH_BITVECTOR_LEN; + + u8 c; + if (offset >= 0 && offset < (s64a)ci->len) { + c = ci->buf[offset]; + } else if (offset < 0 && offset >= -(s64a)ci->hlen) { + c = ci->hbuf[ci->hlen + offset]; + } else { + return 1; + } + + if (!reachHasBit(reach, c)) { + DEBUG_PRINTF("char 0x%02x failed reach check\n", c); + return 0; + } + + DEBUG_PRINTF("OK :)\n"); + return 1; +} + /** * \brief Scan around a literal, checking that that "lookaround" reach masks * are satisfied. @@ -1415,6 +1455,17 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_SINGLE_LOOKAROUND) { + if (!roseCheckSingleLookaround(t, scratch, ri->offset, + ri->reach_index, end)) { + DEBUG_PRINTF("failed lookaround check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_LOOKAROUND) { if (!roseCheckLookaround(t, scratch, ri->index, ri->count, end)) { diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 7061cb32..5421f1cb 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -3076,6 +3076,15 @@ void makeLookaroundInstruction(build_context &bc, const vector &look, return; } + if (look.size() == 1) { + s8 offset = look.begin()->offset; + u32 look_idx = addLookaround(bc, look); + auto ri = make_unique(offset, look_idx, + program.end_instruction()); + program.add_before_end(move(ri)); + return; + } + if (makeRoleMask(look, program)) { return; } diff --git a/src/rose/rose_build_program.cpp b/src/rose/rose_build_program.cpp index 69ad31a9..fc157b88 100644 --- a/src/rose/rose_build_program.cpp +++ b/src/rose/rose_build_program.cpp @@ -112,6 +112,15 @@ void RoseInstrCheckNotHandled::write(void *dest, RoseEngineBlob &blob, inst->fail_jump = calc_jump(offset_map, this, target); } +void RoseInstrCheckSingleLookaround::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->offset = offset; + inst->reach_index = reach_index; + inst->fail_jump = calc_jump(offset_map, this, target); +} + void RoseInstrCheckLookaround::write(void *dest, RoseEngineBlob &blob, const OffsetMap &offset_map) const { RoseInstrBase::write(dest, blob, offset_map); diff --git a/src/rose/rose_build_program.h b/src/rose/rose_build_program.h index 309a1b3e..c76456cc 100644 --- a/src/rose/rose_build_program.h +++ b/src/rose/rose_build_program.h @@ -378,6 +378,39 @@ public: } }; +class RoseInstrCheckSingleLookaround + : public RoseInstrBaseOneTarget { +public: + s8 offset; + u32 reach_index; + const RoseInstruction *target; + + RoseInstrCheckSingleLookaround(s8 offset_in, u32 reach_index_in, + const RoseInstruction *target_in) + : offset(offset_in), reach_index(reach_index_in), target(target_in) {} + + bool operator==(const RoseInstrCheckSingleLookaround &ri) const { + return offset == ri.offset && reach_index == ri.reach_index && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), offset, reach_index); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckSingleLookaround &ri, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return offset == ri.offset && reach_index == ri.reach_index && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + class RoseInstrCheckLookaround : public RoseInstrBaseOneTargetoffset} << endl; + os << " reach_index " << ri->reach_index << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + const u8 *base = (const u8 *)t; + const u8 *reach_base = base + t->lookaroundReachOffset; + const u8 *reach = reach_base + + ri->reach_index * REACH_BITVECTOR_LEN; + os << " contents:" << endl; + describeClass(os, bitvectorToReach(reach), 1000, CC_OUT_TEXT); + os << endl; + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_LOOKAROUND) { os << " index " << ri->index << endl; os << " count " << ri->count << endl; diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index 44d5d524..370fc826 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -48,6 +48,7 @@ enum RoseInstructionCode { ROSE_INSTR_CHECK_ONLY_EOD, //!< Role matches only at EOD. ROSE_INSTR_CHECK_BOUNDS, //!< Bounds on distance from offset 0. ROSE_INSTR_CHECK_NOT_HANDLED, //!< Test & set role in "handled". + ROSE_INSTR_CHECK_SINGLE_LOOKAROUND, //!< Single lookaround check. ROSE_INSTR_CHECK_LOOKAROUND, //!< Lookaround check. ROSE_INSTR_CHECK_MASK, //!< 8-bytes mask check. ROSE_INSTR_CHECK_MASK_32, //!< 32-bytes and/cmp/neg mask check. @@ -154,6 +155,13 @@ struct ROSE_STRUCT_CHECK_NOT_HANDLED { u32 fail_jump; //!< Jump forward this many bytes if we have seen key before. }; +struct ROSE_STRUCT_CHECK_SINGLE_LOOKAROUND { + u8 code; //!< From enum RoseInstructionCode. + s8 offset; //!< The offset of the byte to examine. + u32 reach_index; //!< The index of the reach table entry to use. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + struct ROSE_STRUCT_CHECK_LOOKAROUND { u8 code; //!< From enum RoseInstructionCode. u32 index; From 6e533589bb7390380ca05943f9fc641be913645d Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 13 Sep 2016 10:55:26 +1000 Subject: [PATCH 026/103] rose: move END instruction to start of enum Stop overloading END as the last Rose interpreter instruction, use new sentinel LAST_ROSE_INSTRUCTION for that. This change will also make it easier to add new instructions without renumbering END and thus changing all generated bytecodes. --- src/rose/program_runtime.h | 14 +++++++------- src/rose/rose_dump.cpp | 8 ++++---- src/rose/rose_program.h | 11 ++++++----- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index 735f8cdb..198b8e13 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -1389,9 +1389,15 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, assert(pc >= pc_base); assert((size_t)(pc - pc_base) < t->size); const u8 code = *(const u8 *)pc; - assert(code <= ROSE_INSTR_END); + assert(code <= LAST_ROSE_INSTRUCTION); switch ((enum RoseInstructionCode)code) { + PROGRAM_CASE(END) { + DEBUG_PRINTF("finished\n"); + return HWLM_CONTINUE_MATCHING; + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(ANCHORED_DELAY) { if (in_anchored && end > t->floatingMinLiteralMatchOffset) { DEBUG_PRINTF("delay until playback\n"); @@ -1971,12 +1977,6 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, } } PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(END) { - DEBUG_PRINTF("finished\n"); - return HWLM_CONTINUE_MATCHING; - } - PROGRAM_NEXT_INSTRUCTION } } diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index f0bec701..4a0d297e 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -234,9 +234,12 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { const char *pc_base = pc; for (;;) { u8 code = *(const u8 *)pc; - assert(code <= ROSE_INSTR_END); + assert(code <= LAST_ROSE_INSTRUCTION); const size_t offset = pc - pc_base; switch (code) { + PROGRAM_CASE(END) { return; } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(ANCHORED_DELAY) { os << " groups 0x" << std::hex << ri->groups << std::dec << endl; @@ -607,9 +610,6 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { PROGRAM_CASE(MATCHER_EOD) {} PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(END) { return; } - PROGRAM_NEXT_INSTRUCTION - default: os << " UNKNOWN (code " << int{code} << ")" << endl; os << " " << endl; diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index 370fc826..4714960c 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -42,6 +42,7 @@ /** \brief Role program instruction opcodes. */ enum RoseInstructionCode { + ROSE_INSTR_END, //!< End of program. ROSE_INSTR_ANCHORED_DELAY, //!< Delay until after anchored matcher. ROSE_INSTR_CHECK_LIT_EARLY, //!< Skip matches before floating min offset. ROSE_INSTR_CHECK_GROUPS, //!< Check that literal groups are on. @@ -116,7 +117,11 @@ enum RoseInstructionCode { /** \brief Run the EOD-anchored HWLM literal matcher. */ ROSE_INSTR_MATCHER_EOD, - ROSE_INSTR_END //!< End of program. + LAST_ROSE_INSTRUCTION = ROSE_INSTR_MATCHER_EOD //!< Sentinel. +}; + +struct ROSE_STRUCT_END { + u8 code; //!< From enum RoseInstructionCode. }; struct ROSE_STRUCT_ANCHORED_DELAY { @@ -460,8 +465,4 @@ struct ROSE_STRUCT_MATCHER_EOD { u8 code; //!< From enum RoseInstructionCode. }; -struct ROSE_STRUCT_END { - u8 code; //!< From enum RoseInstructionCode. -}; - #endif // ROSE_ROSE_PROGRAM_H From 2e5a2ab2a9c00c2768eb0a20460a3751fc0a35a3 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Wed, 7 Sep 2016 14:09:49 +1000 Subject: [PATCH 027/103] cmake: don't be so heavyhanded with flags --- CMakeLists.txt | 70 +++++++++++++++++++------------------------------- 1 file changed, 27 insertions(+), 43 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 76d79821..4ac9e52d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,8 +1,4 @@ cmake_minimum_required (VERSION 2.8.11) - -# don't use the built-in default configs -set (CMAKE_NOT_USING_CONFIG_FLAGS TRUE) - project (Hyperscan C CXX) set (HS_MAJOR_VERSION 4) @@ -10,10 +6,6 @@ set (HS_MINOR_VERSION 3) set (HS_PATCH_VERSION 1) set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION}) -# since we are doing this manually, we only have three types -set (CMAKE_CONFIGURATION_TYPES "Debug;Release;RelWithDebInfo" - CACHE STRING "" FORCE) - set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) include(CheckCCompilerFlag) include(CheckCXXCompilerFlag) @@ -141,6 +133,12 @@ CMAKE_DEPENDENT_OPTION(DUMP_SUPPORT "Dump code support; normally on, except in r CMAKE_DEPENDENT_OPTION(DISABLE_ASSERTS "Disable assert(); Asserts are enabled in debug builds, disabled in release builds" OFF "NOT RELEASE_BUILD" ON) +if (DISABLE_ASSERTS) + if (CMAKE_BUILD_TYPE STREQUAL "DEBUG") + add_definitions(-DNDEBUG) + endif() +endif() + option(WINDOWS_ICC "Use Intel C++ Compiler on Windows, default off, requires ICC to be set in project" OFF) # TODO: per platform config files? @@ -151,27 +149,16 @@ if(MSVC OR MSVC_IDE) if (MSVC_VERSION LESS 1700) message(FATAL_ERROR "The project requires C++11 features.") else() - # set base flags - set(CMAKE_C_FLAGS "/DWIN32 /D_WINDOWS /W3") - set(CMAKE_C_FLAGS_DEBUG "/D_DEBUG /MDd /Zi /Od") - set(CMAKE_C_FLAGS_RELEASE "/MD /O2 /Ob2 /Oi") - set(CMAKE_C_FLAGS_RELWITHDEBINFO "/Zi /MD /O2 /Ob2 /Oi") - - set(CMAKE_CXX_FLAGS "/DWIN32 /D_WINDOWS /W3 /GR /EHsc") - set(CMAKE_CXX_FLAGS_DEBUG "/D_DEBUG /MDd /Zi /Od") - set(CMAKE_CXX_FLAGS_RELEASE "/MD /O2 /Ob2 /Oi") - set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "/Zi /MD /O2 /Ob2 /Oi") - if (WINDOWS_ICC) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Qstd=c99 /Qrestrict /QxHost /wd4267 /Qdiag-disable:remark") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Qstd=c++11 /Qrestrict /QxHost /wd4267 /wd4800 /Qdiag-disable:remark -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O3 /Qstd=c99 /Qrestrict /QxHost /wd4267 /Qdiag-disable:remark") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 /Qstd=c++11 /Qrestrict /QxHost /wd4267 /wd4800 /Qdiag-disable:remark -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS") else() #TODO: don't hardcode arch - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:AVX /wd4267") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX /wd4244 /wd4267 /wd4800 /wd2586 /wd1170 -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O2 /arch:AVX /wd4267") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 /arch:AVX /wd4244 /wd4267 /wd4800 -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS") endif() - - + string(REPLACE "/RTC1" "" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}") + string(REPLACE "/RTC1" "" CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG}") endif() else() @@ -192,6 +179,12 @@ else() unset(_GXX_OUTPUT) endif() + # remove CMake's idea of optimisation + foreach (CONFIG ${CMAKE_BUILD_TYPE} ${CMAKE_CONFIGURATION_TYPES}) + string(REGEX REPLACE "-O[^ ]*" "" CMAKE_C_FLAGS_${CONFIG} "${CMAKE_C_FLAGS_${CONFIG}}") + string(REGEX REPLACE "-O[^ ]*" "" CMAKE_CXX_FLAGS_${CONFIG} "${CMAKE_CXX_FLAGS_${CONFIG}}") + endforeach () + if(OPTIMISE) set(OPT_C_FLAG "-O3") set(OPT_CXX_FLAG "-O2") @@ -200,25 +193,16 @@ else() set(OPT_CXX_FLAG "-O0") endif(OPTIMISE) - # set up base flags for build types - set(CMAKE_C_FLAGS_DEBUG "-g ${OPT_C_FLAG} -Werror") - set(CMAKE_C_FLAGS_RELWITHDEBINFO "-g ${OPT_C_FLAG}") - set(CMAKE_C_FLAGS_RELEASE "${OPT_C_FLAG}") - - set(CMAKE_CXX_FLAGS_DEBUG "-g ${OPT_CXX_FLAG} -Werror") - set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-g ${OPT_CXX_FLAG}") - set(CMAKE_CXX_FLAGS_RELEASE "${OPT_CXX_FLAG}") - - if (DISABLE_ASSERTS) - # usually true for release builds, false for debug - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DNDEBUG") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNDEBUG") - endif() - - # set compiler flags - more are tested and added later - set(EXTRA_C_FLAGS "-std=c99 -Wall -Wextra -Wshadow -Wcast-qual -fno-strict-aliasing") - set(EXTRA_CXX_FLAGS "-std=c++11 -Wall -Wextra -Wshadow -Wswitch -Wreturn-type -Wcast-qual -Wno-deprecated -Wnon-virtual-dtor -fno-strict-aliasing") + set(EXTRA_C_FLAGS "${OPT_C_FLAG} -std=c99 -Wall -Wextra -Wshadow -Wcast-qual -fno-strict-aliasing") + set(EXTRA_CXX_FLAGS "${OPT_CXX_FLAG} -std=c++11 -Wall -Wextra -Wshadow -Wswitch -Wreturn-type -Wcast-qual -Wno-deprecated -Wnon-virtual-dtor -fno-strict-aliasing") + + if (NOT RELEASE_BUILD) + # -Werror is most useful during development, don't potentially break + # release builds + set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Werror") + set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Werror") + endif() if (NOT CMAKE_C_FLAGS MATCHES .*march.*) message(STATUS "Building for current host CPU") From 7849b9d611c92f3181cb03d224663fcc75ae10ab Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Wed, 7 Sep 2016 11:50:00 +1000 Subject: [PATCH 028/103] MSVC prefers the attrib at the beginning --- src/rose/program_runtime.h | 4 ++-- src/util/simd_utils.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index 198b8e13..b9036422 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -864,7 +864,7 @@ m128 getData128(const struct core_info *ci, s64a offset, u16 *valid_data_mask) { *valid_data_mask = 0xffff; return loadu128(ci->buf + offset); } - u8 data[sizeof(m128)] ALIGN_DIRECTIVE; + ALIGN_DIRECTIVE u8 data[sizeof(m128)]; *valid_data_mask = (u16)getBufferDataComplex(ci, offset, data, 16); return *(m128 *)data; } @@ -875,7 +875,7 @@ m256 getData256(const struct core_info *ci, s64a offset, u32 *valid_data_mask) { *valid_data_mask = ~0u; return loadu256(ci->buf + offset); } - u8 data[sizeof(m256)] ALIGN_DIRECTIVE; + ALIGN_DIRECTIVE u8 data[sizeof(m256)]; *valid_data_mask = getBufferDataComplex(ci, offset, data, 32); return *(m256 *)data; } diff --git a/src/util/simd_utils.c b/src/util/simd_utils.c index a86c568d..54b5b4ba 100644 --- a/src/util/simd_utils.c +++ b/src/util/simd_utils.c @@ -32,7 +32,7 @@ #include "simd_utils.h" -const char vbs_mask_data[] ALIGN_CL_DIRECTIVE = { +ALIGN_CL_DIRECTIVE const char vbs_mask_data[] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, @@ -48,7 +48,7 @@ const char vbs_mask_data[] ALIGN_CL_DIRECTIVE = { #define ZEROES_32 ZEROES_8, ZEROES_8, ZEROES_8, ZEROES_8 /** \brief LUT for the mask1bit functions. */ -const u8 simd_onebit_masks[] ALIGN_CL_DIRECTIVE = { +ALIGN_CL_DIRECTIVE const u8 simd_onebit_masks[] = { ZEROES_31, 0x01, ZEROES_32, ZEROES_31, 0x02, ZEROES_32, ZEROES_31, 0x04, ZEROES_32, From 707fe675eaabe1dd4992ba1357e88c19b27e0403 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Wed, 7 Sep 2016 15:47:26 +1000 Subject: [PATCH 029/103] Operator precedence matters --- src/rose/rose_build_misc.cpp | 2 +- src/rose/rose_build_role_aliasing.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index c2f9f580..b9aeabd0 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -1309,7 +1309,7 @@ bool canImplementGraphs(const RoseBuildImpl &tbi) { } if (g[v].left.graph) { assert(g[v].left.graph->kind - == tbi.isRootSuccessor(v) ? NFA_PREFIX : NFA_INFIX); + == (tbi.isRootSuccessor(v) ? NFA_PREFIX : NFA_INFIX)); if (!isImplementableNFA(*g[v].left.graph, nullptr, tbi.cc)) { DEBUG_PRINTF("nfa prefix %zu failed (%zu vertices)\n", g[v].idx, num_vertices(*g[v].left.graph)); diff --git a/src/rose/rose_build_role_aliasing.cpp b/src/rose/rose_build_role_aliasing.cpp index c2366f0e..b223fa92 100644 --- a/src/rose/rose_build_role_aliasing.cpp +++ b/src/rose/rose_build_role_aliasing.cpp @@ -786,8 +786,8 @@ void pruneReportIfUnused(const RoseBuildImpl &build, shared_ptr h, // unimplementable. DEBUG_PRINTF("report %u has been merged away, pruning\n", report); - assert(h->kind == build.isRootSuccessor(*verts.begin()) ? NFA_PREFIX - : NFA_INFIX); + assert(h->kind == (build.isRootSuccessor(*verts.begin()) ? NFA_PREFIX + : NFA_INFIX)); unique_ptr h_new = cloneHolder(*h); pruneReport(*h_new, report); From aca89e66d22bca697724610e7c7f0f37816bf052 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Thu, 1 Sep 2016 14:40:17 +1000 Subject: [PATCH 030/103] hinted insert operations for flat_set --- src/util/ue2_containers.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/util/ue2_containers.h b/src/util/ue2_containers.h index 217d08ea..b6425f77 100644 --- a/src/util/ue2_containers.h +++ b/src/util/ue2_containers.h @@ -207,6 +207,10 @@ public: return std::make_pair(iterator(it), false); } + iterator insert(UNUSED const_iterator hint, const value_type &value) { + return insert(value).first; + } + std::pair insert(value_type &&value) { auto it = std::lower_bound(data.begin(), data.end(), value, comp); if (it == data.end() || comp(value, *it)) { @@ -216,6 +220,10 @@ public: return std::make_pair(iterator(it), false); } + iterator insert(UNUSED const_iterator hint, value_type &&value) { + return insert(value).first; + } + template void insert(InputIt first, InputIt second) { for (; first != second; ++first) { From c94899dd44f4b6a697721f33d337804c9863fb2d Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Thu, 1 Sep 2016 14:58:55 +1000 Subject: [PATCH 031/103] allow sets of tops on edges --- src/nfa/castlecompile.cpp | 6 +- src/nfagraph/ng_dump.cpp | 6 +- src/nfagraph/ng_equivalence.cpp | 29 +++++----- src/nfagraph/ng_graph.h | 6 +- src/nfagraph/ng_holder.cpp | 1 - src/nfagraph/ng_holder.h | 2 + src/nfagraph/ng_is_equal.cpp | 10 ++-- src/nfagraph/ng_limex.cpp | 6 +- src/nfagraph/ng_redundancy.cpp | 2 +- src/nfagraph/ng_repeat.cpp | 62 +++++++++++--------- src/nfagraph/ng_restructuring.cpp | 7 ++- src/nfagraph/ng_rose.cpp | 14 ++++- src/nfagraph/ng_split.cpp | 6 +- src/nfagraph/ng_split.h | 4 +- src/nfagraph/ng_uncalc_components.cpp | 13 +++-- src/nfagraph/ng_util.cpp | 44 ++++++++++----- src/nfagraph/ng_util.h | 12 +++- src/nfagraph/ng_violet.cpp | 26 ++++++++- src/nfagraph/ng_width.cpp | 4 +- src/rose/rose_build_add.cpp | 2 + src/rose/rose_build_add_mask.cpp | 4 +- src/rose/rose_build_bytecode.cpp | 12 ++-- src/rose/rose_build_compile.cpp | 52 ++--------------- src/rose/rose_build_convert.cpp | 7 ++- src/rose/rose_build_impl.h | 3 +- src/rose/rose_build_infix.cpp | 17 +++--- src/rose/rose_build_lookaround.cpp | 2 +- src/rose/rose_build_merge.cpp | 9 ++- src/rose/rose_build_misc.cpp | 81 ++++++++++++++++++--------- src/rose/rose_build_role_aliasing.cpp | 33 ++++++----- src/util/ue2_containers.h | 2 +- 31 files changed, 284 insertions(+), 200 deletions(-) diff --git a/src/nfa/castlecompile.cpp b/src/nfa/castlecompile.cpp index 4bddf767..11ae2000 100644 --- a/src/nfa/castlecompile.cpp +++ b/src/nfa/castlecompile.cpp @@ -904,7 +904,7 @@ void addToHolder(NGHolder &g, u32 top, const PureRepeat &pr) { if (min_bound == 0) { // Vacuous case, we can only do this once. assert(!edge(g.start, g.accept, g).second); NFAEdge e = add_edge(g.start, g.accept, g).first; - g[e].top = top; + g[e].tops.insert(top); g[u].reports.insert(pr.reports.begin(), pr.reports.end()); min_bound = 1; } @@ -914,7 +914,7 @@ void addToHolder(NGHolder &g, u32 top, const PureRepeat &pr) { g[v].char_reach = pr.reach; NFAEdge e = add_edge(u, v, g).first; if (u == g.start) { - g[e].top = top; + g[e].tops.insert(top); } u = v; } @@ -933,7 +933,7 @@ void addToHolder(NGHolder &g, u32 top, const PureRepeat &pr) { } NFAEdge e = add_edge(u, v, g).first; if (u == g.start) { - g[e].top = top; + g[e].tops.insert(top); } u = v; } diff --git a/src/nfagraph/ng_dump.cpp b/src/nfagraph/ng_dump.cpp index 57668caf..7c1894a3 100644 --- a/src/nfagraph/ng_dump.cpp +++ b/src/nfagraph/ng_dump.cpp @@ -234,9 +234,9 @@ public: void operator()(ostream& os, const EdgeT& e) const { // Edge label. Print priority. os << "[fontsize=9,label=\""; - // If it's an edge from start, print top id. - if (is_any_start(source(e, g), g) && !is_any_start(target(e, g), g)) { - os << "TOP " << g[e].top << "\\n"; + // print tops if any set. + if (!g[e].tops.empty()) { + os << "TOP " << as_string_list(g[e].tops) << "\\n"; } // If it's an assert vertex, then display its info. diff --git a/src/nfagraph/ng_equivalence.cpp b/src/nfagraph/ng_equivalence.cpp index d0ab7c4a..383b6c75 100644 --- a/src/nfagraph/ng_equivalence.cpp +++ b/src/nfagraph/ng_equivalence.cpp @@ -72,7 +72,7 @@ struct VertexInfoPtrCmp { class VertexInfo { public: VertexInfo(NFAVertex v_in, const NGHolder &g) - : v(v_in), vert_index(g[v].index), cr(g[v].char_reach), edge_top(~0), + : v(v_in), vert_index(g[v].index), cr(g[v].char_reach), equivalence_class(~0), vertex_flags(g[v].assert_flags) {} flat_set pred; //!< predecessors of this vertex @@ -82,7 +82,7 @@ public: CharReach cr; CharReach pred_cr; CharReach succ_cr; - unsigned edge_top; + flat_set edge_tops; /**< tops on edge from start */ unsigned equivalence_class; unsigned vertex_flags; }; @@ -120,7 +120,7 @@ public: EquivalenceType eq) : /* reports only matter for right-equiv */ rs(eq == RIGHT_EQUIVALENCE ? g[vi.v].reports : flat_set()), - vertex_flags(vi.vertex_flags), edge_top(vi.edge_top), cr(vi.cr), + vertex_flags(vi.vertex_flags), edge_tops(vi.edge_tops), cr(vi.cr), adjacent_cr(eq == LEFT_EQUIVALENCE ? vi.pred_cr : vi.succ_cr), /* treat non-special vertices the same */ node_type(min(g[vi.v].index, u32{N_SPECIALS})), depth(d_in) {} @@ -128,7 +128,7 @@ public: bool operator==(const ClassInfo &b) const { return node_type == b.node_type && depth.d1 == b.depth.d1 && depth.d2 == b.depth.d2 && cr == b.cr && - adjacent_cr == b.adjacent_cr && edge_top == b.edge_top && + adjacent_cr == b.adjacent_cr && edge_tops == b.edge_tops && vertex_flags == b.vertex_flags && rs == b.rs; } @@ -136,7 +136,6 @@ public: size_t val = 0; boost::hash_combine(val, boost::hash_range(begin(c.rs), end(c.rs))); boost::hash_combine(val, c.vertex_flags); - boost::hash_combine(val, c.edge_top); boost::hash_combine(val, c.cr); boost::hash_combine(val, c.adjacent_cr); boost::hash_combine(val, c.node_type); @@ -148,7 +147,7 @@ public: private: flat_set rs; /* for right equiv only */ unsigned vertex_flags; - u32 edge_top; + flat_set edge_tops; CharReach cr; CharReach adjacent_cr; unsigned node_type; @@ -307,7 +306,7 @@ ptr_vector getVertexInfos(const NGHolder &g) { // also set up edge tops if (is_triggered(g) && u == g.start) { - cur_vi.edge_top = g[e].top; + cur_vi.edge_tops = g[e].tops; } } @@ -544,7 +543,7 @@ void mergeClass(ptr_vector &infos, NGHolder &g, unsigned eq_class, infos.push_back(new_vertex_info_eod); } - const unsigned edgetop = (*cur_class_vertices.begin())->edge_top; + const auto &edgetops = (*cur_class_vertices.begin())->edge_tops; for (VertexInfo *old_vertex_info : cur_class_vertices) { assert(old_vertex_info->equivalence_class == eq_class); @@ -565,9 +564,10 @@ void mergeClass(ptr_vector &infos, NGHolder &g, unsigned eq_class, // if edge doesn't exist, create it NFAEdge e = add_edge_if_not_present(pred_info->v, new_v, g).first; - // put edge top, if applicable - if (edgetop != (unsigned) -1) { - g[e].top = edgetop; + // put edge tops, if applicable + if (!edgetops.empty()) { + assert(g[e].tops.empty() || g[e].tops == edgetops); + g[e].tops = edgetops; } pred_info->succ.insert(new_vertex_info); @@ -576,9 +576,10 @@ void mergeClass(ptr_vector &infos, NGHolder &g, unsigned eq_class, NFAEdge ee = add_edge_if_not_present(pred_info->v, new_v_eod, g).first; - // put edge top, if applicable - if (edgetop != (unsigned) -1) { - g[ee].top = edgetop; + // put edge tops, if applicable + if (!edgetops.empty()) { + assert(g[e].tops.empty() || g[e].tops == edgetops); + g[ee].tops = edgetops; } pred_info->succ.insert(new_vertex_info_eod); diff --git a/src/nfagraph/ng_graph.h b/src/nfagraph/ng_graph.h index 64b32839..2d6fea13 100644 --- a/src/nfagraph/ng_graph.h +++ b/src/nfagraph/ng_graph.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -69,8 +69,8 @@ struct NFAGraphEdgeProps { u32 index = 0; /** \brief For graphs that will be implemented as multi-top engines, this - * specifies the top event. Only used on edges from the start vertex. */ - u32 top = 0; + * specifies the top events. Only used on edges from the start vertex. */ + ue2::flat_set tops; /** \brief Flags associated with assertions. */ u32 assert_flags = 0; diff --git a/src/nfagraph/ng_holder.cpp b/src/nfagraph/ng_holder.cpp index 53566891..5d83e626 100644 --- a/src/nfagraph/ng_holder.cpp +++ b/src/nfagraph/ng_holder.cpp @@ -178,7 +178,6 @@ std::pair add_edge(NFAVertex u, NFAVertex v, NGHolder &h) { pair e = add_edge(u, v, h.g); h.g[e.first].index = h.numEdges++; assert(!h.isValidNumEdges || h.numEdges > 0); // no wrapping - h.g[e.first].top = 0; return e; } diff --git a/src/nfagraph/ng_holder.h b/src/nfagraph/ng_holder.h index f0a387d0..49050808 100644 --- a/src/nfagraph/ng_holder.h +++ b/src/nfagraph/ng_holder.h @@ -315,6 +315,8 @@ void remove_edges(Iter begin, Iter end, NGHolder &h, bool renumber = true) { } } +#define DEFAULT_TOP 0U + /** \brief Clear and remove all of the edges pointed to by the edge descriptors * in the given container. * diff --git a/src/nfagraph/ng_is_equal.cpp b/src/nfagraph/ng_is_equal.cpp index cc65fa17..8e71c337 100644 --- a/src/nfagraph/ng_is_equal.cpp +++ b/src/nfagraph/ng_is_equal.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -153,14 +153,14 @@ bool is_equal_i(const NGHolder &a, const NGHolder &b, } /* check top for edges out of start */ - vector> top_a; - vector> top_b; + vector>> top_a; + vector>> top_b; for (const auto &e : out_edges_range(a.start, a)) { - top_a.emplace_back(a[target(e, a)].index, a[e].top); + top_a.emplace_back(a[target(e, a)].index, a[e].tops); } for (const auto &e : out_edges_range(b.start, b)) { - top_b.emplace_back(b[target(e, b)].index, b[e].top); + top_b.emplace_back(b[target(e, b)].index, b[e].tops); } sort(top_a.begin(), top_a.end()); diff --git a/src/nfagraph/ng_limex.cpp b/src/nfagraph/ng_limex.cpp index 06ea5de3..b9f3434b 100644 --- a/src/nfagraph/ng_limex.cpp +++ b/src/nfagraph/ng_limex.cpp @@ -148,14 +148,16 @@ void dropRedundantStartEdges(NGHolder &g) { static void makeTopStates(NGHolder &g, map &tops, const map &top_reach) { + /* TODO: more intelligent creation of top states */ map> top_succs; for (const auto &e : out_edges_range(g.start, g)) { NFAVertex v = target(e, g); if (v == g.startDs) { continue; } - u32 t = g[e].top; - top_succs[t].push_back(v); + for (u32 t : g[e].tops) { + top_succs[t].push_back(v); + } } for (const auto &top : top_succs) { diff --git a/src/nfagraph/ng_redundancy.cpp b/src/nfagraph/ng_redundancy.cpp index 26599251..8fc5d5f3 100644 --- a/src/nfagraph/ng_redundancy.cpp +++ b/src/nfagraph/ng_redundancy.cpp @@ -310,7 +310,7 @@ bool hasInEdgeTops(const NGHolder &g, NFAVertex v) { bool exists; NFAEdge e; tie(e, exists) = edge_by_target(g.start, v, g); - if (exists && g[e].top != 0) { + if (exists && !g[e].tops.empty()) { return true; } return false; diff --git a/src/nfagraph/ng_repeat.cpp b/src/nfagraph/ng_repeat.cpp index bc7e73d3..5bff21b0 100644 --- a/src/nfagraph/ng_repeat.cpp +++ b/src/nfagraph/ng_repeat.cpp @@ -215,8 +215,8 @@ bool rogueSuccessor(const NGHolder &g, NFAVertex v, static bool hasDifferentTops(const NGHolder &g, const vector &verts) { - bool found = false; - u32 top = 0; + /* TODO: check that we need this now that we allow multiple tops */ + const flat_set *tops = nullptr; for (auto v : verts) { for (const auto &e : in_edges_range(v, g)) { @@ -224,17 +224,12 @@ bool hasDifferentTops(const NGHolder &g, const vector &verts) { if (u != g.start && u != g.startDs) { continue; // Only edges from starts have valid top properties. } - u32 t = g[e].top; - DEBUG_PRINTF("edge (%u,%u) with top %u\n", g[u].index, - g[v].index, t); - assert(t < NFA_MAX_TOP_MASKS); - if (!found) { - found = true; - top = t; - } else { - if (t != top) { - return true; // More than one top. - } + DEBUG_PRINTF("edge (%u,%u) with %zu tops\n", g[u].index, g[v].index, + g[e].tops.size()); + if (!tops) { + tops = &g[e].tops; + } else if (g[e].tops != *tops) { + return true; // More than one set of tops. } } } @@ -1123,7 +1118,7 @@ NFAVertex buildTriggerStates(NGHolder &g, const vector &trigger, g[v].char_reach = cr; add_edge(u, v, g); if (u == g.start) { - g[edge(u, v, g).first].top = top; + g[edge(u, v, g).first].tops.insert(top); } u = v; } @@ -1153,18 +1148,21 @@ void addTriggers(NGHolder &g, continue; } - const auto &top = g[e].top; + const auto &tops = g[e].tops; // The caller may not have given us complete trigger information. If we // don't have any triggers for a particular top, we should just leave // it alone. - if (!contains(triggers, top)) { - DEBUG_PRINTF("no triggers for top %u\n", top); - continue; - } + for (u32 top : tops) { + if (!contains(triggers, top)) { + DEBUG_PRINTF("no triggers for top %u\n", top); + goto next_edge; + } - starts_by_top[top].push_back(v); + starts_by_top[top].push_back(v); + } dead.push_back(e); + next_edge:; } remove_edges(dead, g); @@ -2105,14 +2103,26 @@ void populateFixedTopInfo(const map &fixed_depth_tops, if (v == g.startDs) { continue; } - u32 top = g[e].top; + depth td = depth::infinity(); - if (contains(fixed_depth_tops, top)) { - td = fixed_depth_tops.at(top); + for (u32 top : g[e].tops) { + if (!contains(fixed_depth_tops, top)) { + td = depth::infinity(); + break; + } + depth td_t = fixed_depth_tops.at(top); + if (td == td_t) { + continue; + } else if (td == depth::infinity()) { + td = td_t; + } else { + td = depth::infinity(); + break; + } } - DEBUG_PRINTF("scanning from %u top=%u depth=%s\n", - g[v].index, top, td.str().c_str()); + DEBUG_PRINTF("scanning from %u depth=%s\n", g[v].index, + td.str().c_str()); /* for each vertex reachable from v update its map to reflect that it is * reachable from a top of depth td. */ @@ -2428,7 +2438,7 @@ bool isPureRepeat(const NGHolder &g, PureRepeat &repeat) { } // Must have precisely one top. - if (!onlyOneTop(g)) { + if (is_triggered(g) && !onlyOneTop(g)) { DEBUG_PRINTF("Too many tops\n"); return false; } diff --git a/src/nfagraph/ng_restructuring.cpp b/src/nfagraph/ng_restructuring.cpp index 09abf775..c85860c7 100644 --- a/src/nfagraph/ng_restructuring.cpp +++ b/src/nfagraph/ng_restructuring.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -174,6 +174,7 @@ numberStates(NGHolder &h, const map &tops) { u32 countStates(const NGHolder &g, const ue2::unordered_map &state_ids, bool addTops) { + /* TODO: smarter top state allocation, move to limex? */ if (state_ids.empty()) { return 0; } @@ -188,11 +189,11 @@ u32 countStates(const NGHolder &g, u32 num_states = max_state + 1; assert(contains(state_ids, g.start)); - if (addTops && state_ids.at(g.start) != NO_STATE) { + if (addTops && is_triggered(g) && state_ids.at(g.start) != NO_STATE) { num_states--; set tops; for (auto e : out_edges_range(g.start, g)) { - tops.insert(g[e].top); + insert(&tops, g[e].tops); } num_states += tops.size(); } diff --git a/src/nfagraph/ng_rose.cpp b/src/nfagraph/ng_rose.cpp index 137ac5cc..108134a6 100644 --- a/src/nfagraph/ng_rose.cpp +++ b/src/nfagraph/ng_rose.cpp @@ -811,6 +811,7 @@ bool can_match(const NGHolder &g, const ue2_literal &lit, bool overhang_ok) { u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, u32 max_delay, bool overhang_ok) { + assert(isCorrectlyTopped(g)); if (max_delay == MO_INVALID_IDX) { max_delay--; } @@ -878,12 +879,16 @@ u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, sort(verts.begin(), verts.end(), VertexIndexOrdering(g)); for (auto v : verts) { - add_edge(v, g.accept, g); + NFAEdge e = add_edge(v, g.accept, g).first; g[v].reports.insert(0); + if (is_triggered(g) && v == g.start) { + g[e].tops.insert(DEFAULT_TOP); + } } pruneUseless(g); assert(allMatchStatesHaveReports(g)); + assert(isCorrectlyTopped(g)); DEBUG_PRINTF("graph has %zu vertices left\n", num_vertices(g)); return delay; @@ -892,6 +897,7 @@ u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, u32 delay, const vector &preds) { assert(delay <= lit.length()); + assert(isCorrectlyTopped(g)); DEBUG_PRINTF("adding on '%s' %u\n", dumpString(lit).c_str(), delay); NFAVertex prev = g.accept; @@ -906,7 +912,10 @@ void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, } for (auto v : preds) { - add_edge(v, prev, g); + NFAEdge e = add_edge(v, prev, g).first; + if (v == g.start && is_triggered(g)) { + g[e].tops.insert(DEFAULT_TOP); + } } // Every predecessor of accept must have a report. @@ -917,6 +926,7 @@ void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, g.renumberVertices(); g.renumberEdges(); assert(allMatchStatesHaveReports(g)); + assert(isCorrectlyTopped(g)); } void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, diff --git a/src/nfagraph/ng_split.cpp b/src/nfagraph/ng_split.cpp index bce638c0..4576a498 100644 --- a/src/nfagraph/ng_split.cpp +++ b/src/nfagraph/ng_split.cpp @@ -151,7 +151,8 @@ void splitRHS(const NGHolder &base, const vector &pivots, for (auto pivot : pivots) { assert(contains(*rhs_map, pivot)); - add_edge(rhs->start, (*rhs_map)[pivot], *rhs); + NFAEdge e = add_edge(rhs->start, (*rhs_map)[pivot], *rhs).first; + (*rhs)[e].tops.insert(DEFAULT_TOP); } /* should do the renumbering unconditionally as we know edges are already @@ -215,6 +216,7 @@ void splitGraph(const NGHolder &base, const vector &pivots, DEBUG_PRINTF("splitting graph at %zu vertices\n", pivots.size()); assert(!has_parallel_edge(base)); + assert(isCorrectlyTopped(base)); /* RHS pivots are built from the common set of successors of pivots. */ vector rhs_pivots; @@ -228,6 +230,8 @@ void splitGraph(const NGHolder &base, const vector &pivots, assert(!has_parallel_edge(*lhs)); assert(!has_parallel_edge(*rhs)); + assert(isCorrectlyTopped(*lhs)); + assert(isCorrectlyTopped(*rhs)); } void splitGraph(const NGHolder &base, NFAVertex pivot, diff --git a/src/nfagraph/ng_split.h b/src/nfagraph/ng_split.h index 75577e97..31c1cf35 100644 --- a/src/nfagraph/ng_split.h +++ b/src/nfagraph/ng_split.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -47,6 +47,8 @@ class NGHolder; * is in the lhs if it is reachable from start without going through the * pivot. The pivot ends up in the LHS and any adjacent vertices in the RHS. * + * Note: The RHS is setup to be triggered by TOP 0 + * * When multiple split vertices are provided: * - RHS contains all vertices reachable from every pivot * - LHS contains all vertices which are reachable from start ignoring any diff --git a/src/nfagraph/ng_uncalc_components.cpp b/src/nfagraph/ng_uncalc_components.cpp index 217183de..fd6dfc3e 100644 --- a/src/nfagraph/ng_uncalc_components.cpp +++ b/src/nfagraph/ng_uncalc_components.cpp @@ -205,11 +205,10 @@ u32 commonPrefixLength(const NGHolder &ga, break; } - if (ga[*ei].top != gb[b_edge].top) { + if (ga[*ei].tops != gb[b_edge].tops) { max = i; ok = false; - DEBUG_PRINTF("tops don't match on edge %zu->%u\n", - i, sid); + DEBUG_PRINTF("tops don't match on edge %zu->%u\n", i, sid); } } @@ -318,7 +317,7 @@ void mergeNfa(NGHolder &dest, vector &destStateMap, DEBUG_PRINTF("skipping common edge\n"); assert(edge(u, v, dest).second); // Should never merge edges with different top values. - assert(vic[e].top == dest[edge(u, v, dest).first].top); + assert(vic[e].tops == dest[edge(u, v, dest).first].tops); continue; } else { assert(is_any_accept(v, dest)); @@ -506,11 +505,13 @@ bool mergeableStarts(const NGHolder &h1, const NGHolder &h2) { return false; } + /* TODO: relax top checks if reports match */ + // If both graphs have edge (start, accept), the tops must match. auto e1_accept = edge(h1.start, h1.accept, h1); auto e2_accept = edge(h2.start, h2.accept, h2); if (e1_accept.second && e2_accept.second && - h1[e1_accept.first].top != h2[e2_accept.first].top) { + h1[e1_accept.first].tops != h2[e2_accept.first].tops) { return false; } @@ -518,7 +519,7 @@ bool mergeableStarts(const NGHolder &h1, const NGHolder &h2) { auto e1_eod = edge(h1.start, h1.acceptEod, h1); auto e2_eod = edge(h2.start, h2.acceptEod, h2); if (e1_eod.second && e2_eod.second && - h1[e1_eod.first].top != h2[e2_eod.first].top) { + h1[e1_eod.first].tops != h2[e2_eod.first].tops) { return false; } diff --git a/src/nfagraph/ng_util.cpp b/src/nfagraph/ng_util.cpp index c629d553..da9c2438 100644 --- a/src/nfagraph/ng_util.cpp +++ b/src/nfagraph/ng_util.cpp @@ -165,12 +165,7 @@ void clone_in_edges(NGHolder &g, NFAVertex s, NFAVertex dest) { } bool onlyOneTop(const NGHolder &g) { - set tops; - for (const auto &e : out_edges_range(g.start, g)) { - tops.insert(g[e].top); - } - assert(!tops.empty()); - return tops.size() == 1; + return getTops(g).size() == 1; } namespace { @@ -465,17 +460,21 @@ void appendLiteral(NGHolder &h, const ue2_literal &s) { ue2::flat_set getTops(const NGHolder &h) { ue2::flat_set tops; for (const auto &e : out_edges_range(h.start, h)) { - NFAVertex v = target(e, h); - if (v == h.startDs) { - continue; - } - u32 top = h[e].top; - assert(top < NFA_MAX_TOP_MASKS); - tops.insert(top); + insert(&tops, h[e].tops); } return tops; } +void setTops(NGHolder &h, u32 top) { + for (const auto &e : out_edges_range(h.start, h)) { + assert(h[e].tops.empty()); + if (target(e, h) == h.startDs) { + continue; + } + h[e].tops.insert(top); + } +} + void clearReports(NGHolder &g) { DEBUG_PRINTF("clearing reports without an accept edge\n"); ue2::unordered_set allow; @@ -694,6 +693,25 @@ bool hasCorrectlyNumberedEdges(const NGHolder &g) { && num_edges(g) == num_edges(g.g); } +bool isCorrectlyTopped(const NGHolder &g) { + if (is_triggered(g)) { + for (const auto &e : out_edges_range(g.start, g)) { + if (g[e].tops.empty() != (target(e, g) == g.startDs)) { + return false; + } + } + } else { + for (const auto &e : out_edges_range(g.start, g)) { + if (!g[e].tops.empty()) { + return false; + } + } + } + + return true; +} + + #endif // NDEBUG } // namespace ue2 diff --git a/src/nfagraph/ng_util.h b/src/nfagraph/ng_util.h index 4f58dc45..1c6dd461 100644 --- a/src/nfagraph/ng_util.h +++ b/src/nfagraph/ng_util.h @@ -198,9 +198,13 @@ VertexIndexOrdering make_index_ordering(const Graph &g) { bool onlyOneTop(const NGHolder &g); -/** Return a mask of the tops on the given graph. */ +/** Return the set of the tops on the given graph. */ flat_set getTops(const NGHolder &h); +/** Initialise the tops on h to the provide top. Assumes that h is triggered and + * no tops have been set on h. */ +void setTops(NGHolder &h, u32 top = DEFAULT_TOP); + /** adds a vertex to g with all the same vertex properties as \p v (aside from * index) */ NFAVertex clone_vertex(NGHolder &g, NFAVertex v); @@ -319,6 +323,12 @@ bool hasCorrectlyNumberedVertices(const NGHolder &g); */ bool hasCorrectlyNumberedEdges(const NGHolder &g); +/** + * Assertion: returns true if the graph is triggered and all edges out of start + * have tops OR if the graph is not-triggered and all edges out of start have no + * tops. + */ +bool isCorrectlyTopped(const NGHolder &g); #endif // NDEBUG } // namespace ue2 diff --git a/src/nfagraph/ng_violet.cpp b/src/nfagraph/ng_violet.cpp index 94e0a998..538c945d 100644 --- a/src/nfagraph/ng_violet.cpp +++ b/src/nfagraph/ng_violet.cpp @@ -1076,8 +1076,10 @@ bool splitRoseEdge(const NGHolder &base_graph, RoseInGraph &vg, assert(hasCorrectlyNumberedVertices(*rhs)); assert(hasCorrectlyNumberedEdges(*rhs)); + assert(isCorrectlyTopped(*rhs)); assert(hasCorrectlyNumberedVertices(*lhs)); assert(hasCorrectlyNumberedEdges(*lhs)); + assert(isCorrectlyTopped(*lhs)); return true; } @@ -1152,7 +1154,11 @@ void splitEdgesByCut(NGHolder &h, RoseInGraph &vg, /* want to cut off paths to pivot from things other than the pivot - * makes a more svelte graphy */ clear_in_edges(temp_map[pivot], *new_lhs); - add_edge(temp_map[prev_v], temp_map[pivot], *new_lhs); + NFAEdge pivot_edge = add_edge(temp_map[prev_v], temp_map[pivot], + *new_lhs).first; + if (is_triggered(h) && prev_v == h.start) { + (*new_lhs)[pivot_edge].tops.insert(DEFAULT_TOP); + } pruneUseless(*new_lhs, false); renumber_vertices(*new_lhs); @@ -1162,6 +1168,7 @@ void splitEdgesByCut(NGHolder &h, RoseInGraph &vg, assert(hasCorrectlyNumberedVertices(*new_lhs)); assert(hasCorrectlyNumberedEdges(*new_lhs)); + assert(isCorrectlyTopped(*new_lhs)); const set &lits = cut_lits.at(e); for (const auto &lit : lits) { @@ -1228,6 +1235,7 @@ void splitEdgesByCut(NGHolder &h, RoseInGraph &vg, DEBUG_PRINTF(" into rhs %s\n", to_string(new_rhs->kind).c_str()); done_rhs.emplace(adj, new_rhs); + assert(isCorrectlyTopped(*new_rhs)); } assert(done_rhs[adj].get()); @@ -1235,6 +1243,7 @@ void splitEdgesByCut(NGHolder &h, RoseInGraph &vg, assert(hasCorrectlyNumberedVertices(*new_rhs)); assert(hasCorrectlyNumberedEdges(*new_rhs)); + assert(isCorrectlyTopped(*new_rhs)); if (vg[dest].type == RIV_LITERAL && !can_match(*new_rhs, vg[dest].s, true)) { @@ -1380,6 +1389,7 @@ void avoidOutfixes(RoseInGraph &vg, const CompileContext &cc) { RoseInEdge e = *edges(vg).first; NGHolder &h = *vg[e].graph; + assert(isCorrectlyTopped(h)); renumber_vertices(h); renumber_edges(h); @@ -1602,6 +1612,7 @@ void removeRedundantLiteralsFromInfix(const NGHolder &h, RoseInGraph &ig, continue; } + assert(isCorrectlyTopped(*h_new)); graphs[right] = make_pair(h_new, delay); } @@ -1720,6 +1731,8 @@ unique_ptr make_chain(u32 count) { h[u].reports.insert(0); add_edge(u, h.accept, h); + setTops(h); + return rv; } @@ -1777,6 +1790,7 @@ bool makeTransientFromLongLiteral(NGHolder &h, RoseInGraph &vg, assert(willBeTransient(findMaxWidth(*h_new), cc) || willBeAnchoredTable(findMaxWidth(*h_new), cc.grey)); + assert(isCorrectlyTopped(*h_new)); graphs[v] = h_new; } @@ -1811,6 +1825,7 @@ bool improvePrefix(NGHolder &h, RoseInGraph &vg, const vector &ee, const CompileContext &cc) { DEBUG_PRINTF("trying to improve prefix %p, %zu verts\n", &h, num_vertices(h)); + assert(isCorrectlyTopped(h)); renumber_vertices(h); renumber_edges(h); @@ -1860,6 +1875,7 @@ bool improvePrefix(NGHolder &h, RoseInGraph &vg, const vector &ee, for (const auto &e : ee) { shared_ptr hh = cloneHolder(h); auto succ_lit = vg[target(e, vg)].s; + assert(isCorrectlyTopped(*hh)); u32 delay = removeTrailingLiteralStates(*hh, succ_lit, succ_lit.length(), false /* can't overhang start */); @@ -1868,6 +1884,7 @@ bool improvePrefix(NGHolder &h, RoseInGraph &vg, const vector &ee, continue; } + assert(isCorrectlyTopped(*hh)); trimmed[hh].emplace_back(e, delay); } @@ -2110,10 +2127,15 @@ void splitEdgesForSuffix(const NGHolder &base_graph, RoseInGraph &vg, add_edge(lhs->accept, lhs->acceptEod, *lhs); clearReports(*lhs); for (NFAVertex v : splitters) { - add_edge(v_map[v], lhs->accept, *lhs); + NFAEdge e = add_edge(v_map[v], lhs->accept, *lhs).first; + if (v == base_graph.start) { + (*lhs)[e].tops.insert(DEFAULT_TOP); + } (*lhs)[v_map[v]].reports.insert(0); + } pruneUseless(*lhs); + assert(isCorrectlyTopped(*lhs)); /* create literal vertices and connect preds */ for (const auto &lit : split.lit) { diff --git a/src/nfagraph/ng_width.cpp b/src/nfagraph/ng_width.cpp index 470f9343..5fb58ee4 100644 --- a/src/nfagraph/ng_width.cpp +++ b/src/nfagraph/ng_width.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -69,7 +69,7 @@ struct SpecialEdgeFilter { return false; } if (single_top) { - if (u == h->start && g[e].top != top) { + if (u == h->start && !contains(g[e].tops, top)) { return false; } if (u == h->startDs) { diff --git a/src/rose/rose_build_add.cpp b/src/rose/rose_build_add.cpp index 0f0e8d18..72a791ba 100644 --- a/src/rose/rose_build_add.cpp +++ b/src/rose/rose_build_add.cpp @@ -1619,6 +1619,8 @@ bool RoseBuildImpl::addRose(const RoseInGraph &ig, bool prefilter, } NGHolder *h = in[e].graph.get(); + + assert(isCorrectlyTopped(*h)); if (!contains(graphs, h)) { ordered_graphs.push_back(h); } diff --git a/src/rose/rose_build_add_mask.cpp b/src/rose/rose_build_add_mask.cpp index 45333a38..ef83cae1 100644 --- a/src/rose/rose_build_add_mask.cpp +++ b/src/rose/rose_build_add_mask.cpp @@ -574,7 +574,8 @@ unique_ptr buildMaskRhs(const ue2::flat_set &reports, succ = u; } - add_edge(h.start, succ, h); + NFAEdge e = add_edge(h.start, succ, h).first; + h[e].tops.insert(DEFAULT_TOP); return rhs; } @@ -632,6 +633,7 @@ void doAddMask(RoseBuildImpl &tbi, bool anchored, = buildMaskLhs(true, minBound - prefix2_len + overlap, mask3); mhs->kind = NFA_INFIX; + setTops(*mhs); add_edge(u, v, RoseInEdgeProps(mhs, delay), ig); DEBUG_PRINTF("add anch literal too!\n"); diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 5421f1cb..a7979c4f 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -512,7 +512,7 @@ bool nfaStuckOn(const NGHolder &g) { set done_tops; for (const auto &e : out_edges_range(g.start, g)) { - tops.insert(g[e].top); + insert(&tops, g[e].tops); if (!g[target(e, g)].char_reach.all()) { continue; } @@ -521,7 +521,7 @@ bool nfaStuckOn(const NGHolder &g) { insert(&asucc, adjacent_vertices(target(e, g), g)); if (asucc == succ) { - done_tops.insert(g[e].top); + insert(&done_tops, g[e].tops); } } @@ -842,8 +842,8 @@ makeLeftNfa(const RoseBuildImpl &tbi, left_id &left, if (!n && !is_prefix && left.graph() && onlyOneTop(*left.graph())) { map > > triggers; findTriggerSequences(tbi, infixTriggers.at(left), &triggers); - assert(contains(triggers, 0)); // single top - n = constructLBR(*left.graph(), triggers[0], cc, rm); + assert(triggers.size() == 1); // single top + n = constructLBR(*left.graph(), triggers.begin()->second, cc, rm); } if (!n && left.graph()) { @@ -1435,7 +1435,7 @@ void findExclusiveInfixes(RoseBuildImpl &build, build_context &bc, // Sanity check: our NFA should contain each of the tops mentioned on // our in-edges. - assert(roseHasTops(g, v)); + assert(roseHasTops(build, v)); if (contains(leftfixes, leftfix)) { // NFA already built. @@ -1504,7 +1504,7 @@ bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc, // Sanity check: our NFA should contain each of the tops mentioned on // our in-edges. - assert(roseHasTops(g, v)); + assert(roseHasTops(tbi, v)); bool is_transient = contains(tbi.transient, leftfix); diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index 3f82a9cc..c93f4eac 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -43,7 +43,6 @@ #include "nfa/nfa_internal.h" #include "nfa/rdfa.h" #include "nfagraph/ng_holder.h" -#include "nfagraph/ng_dump.h" #include "nfagraph/ng_execute.h" #include "nfagraph/ng_is_equal.h" #include "nfagraph/ng_limex.h" @@ -1554,53 +1553,6 @@ bool roleOffsetsAreValid(const RoseGraph &g) { } return true; } - -static UNUSED -bool hasOrphanedTops(const RoseBuildImpl &tbi) { - const RoseGraph &g = tbi.g; - - ue2::unordered_map > roses; - ue2::unordered_map > suffixes; - - for (auto v : vertices_range(g)) { - if (g[v].left) { - set &tops = roses[g[v].left]; - if (tbi.isRootSuccessor(v)) { - // Prefix, has only one top. - tops.insert(0); - } else { - // Tops for infixes come from the in-edges. - for (const auto &e : in_edges_range(v, g)) { - tops.insert(g[e].rose_top); - } - } - } - if (g[v].suffix) { - suffixes[g[v].suffix].insert(g[v].suffix.top); - } - } - - for (const auto &e : roses) { - if (all_tops(e.first) != e.second) { - DEBUG_PRINTF("rose tops (%s) don't match rose graph (%s)\n", - as_string_list(all_tops(e.first)).c_str(), - as_string_list(e.second).c_str()); - return true; - } - } - - for (const auto &e : suffixes) { - if (all_tops(e.first) != e.second) { - DEBUG_PRINTF("suffix tops (%s) don't match rose graph (%s)\n", - as_string_list(all_tops(e.first)).c_str(), - as_string_list(e.second).c_str()); - return true; - } - } - - return false; -} - #endif // NDEBUG aligned_unique_ptr RoseBuildImpl::buildRose(u32 minWidth) { @@ -1681,13 +1633,17 @@ aligned_unique_ptr RoseBuildImpl::buildRose(u32 minWidth) { mergeSmallLeftfixes(*this); } + assert(!hasOrphanedTops(*this)); + // Do a rose-merging aliasing pass. aliasRoles(*this, true); + assert(!hasOrphanedTops(*this)); // Run a merge pass over the outfixes as well. mergeOutfixes(*this); assert(!danglingVertexRef(*this)); + assert(!hasOrphanedTops(*this)); findMoreLiteralMasks(*this); diff --git a/src/rose/rose_build_convert.cpp b/src/rose/rose_build_convert.cpp index 1578dda1..d3fa1ac6 100644 --- a/src/rose/rose_build_convert.cpp +++ b/src/rose/rose_build_convert.cpp @@ -163,6 +163,8 @@ unique_ptr convertLeafToHolder(const RoseGraph &g, } } + setTops(*out); + // Literal vertices wired to accept. NFAVertex litfirst, litlast; tie(litfirst, litlast) = addLiteralVertices(g, literals, t_v, *out); @@ -400,7 +402,10 @@ unique_ptr makeFloodProneSuffix(const ue2_literal &s, size_t len, NFAVertex u = h->start; for (auto it = s.begin() + s.length() - len; it != s.end(); ++it) { NFAVertex v = addHolderVertex(*it, *h); - add_edge(u, v, *h); + NFAEdge e = add_edge(u, v, *h).first; + if (u == h->start) { + (*h)[e].tops.insert(DEFAULT_TOP); + } u = v; } diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index d239a698..cc00603a 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -615,7 +615,8 @@ ue2_literal findNonOverlappingTail(const std::set &lits, void setReportId(NGHolder &g, ReportID id); #ifndef NDEBUG -bool roseHasTops(const RoseGraph &g, RoseVertex v); +bool roseHasTops(const RoseBuildImpl &build, RoseVertex v); +bool hasOrphanedTops(const RoseBuildImpl &build); #endif u64a findMaxOffset(const std::set &reports, const ReportManager &rm); diff --git a/src/rose/rose_build_infix.cpp b/src/rose/rose_build_infix.cpp index e81a7b00..73f9e99b 100644 --- a/src/rose/rose_build_infix.cpp +++ b/src/rose/rose_build_infix.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -108,15 +108,10 @@ void contractVertex(NGHolder &g, NFAVertex v, } static -u32 findMaxInfixMatches(const NGHolder &h, const set &lits) { +u32 findMaxLiteralMatches(const NGHolder &h, const set &lits) { DEBUG_PRINTF("h=%p, %zu literals\n", &h, lits.size()); //dumpGraph("infix.dot", h.g); - if (!onlyOneTop(h)) { - DEBUG_PRINTF("more than one top!n"); - return NO_MATCH_LIMIT; - } - // Indices of vertices that could terminate any of the literals in 'lits'. set terms; @@ -262,7 +257,11 @@ u32 findMaxInfixMatches(const left_id &left, const set &lits) { return findMaxInfixMatches(*left.castle(), lits); } if (left.graph()) { - return findMaxInfixMatches(*left.graph(), lits); + if (!onlyOneTop(*left.graph())) { + DEBUG_PRINTF("more than one top!n"); + return NO_MATCH_LIMIT; + } + return findMaxLiteralMatches(*left.graph(), lits); } return NO_MATCH_LIMIT; @@ -315,7 +314,7 @@ void findCountingMiracleInfo(const left_id &left, const vector &stopTable, lits.insert(ue2_literal(c, false)); } - u32 count = findMaxInfixMatches(*left.graph(), lits); + u32 count = findMaxLiteralMatches(*left.graph(), lits); DEBUG_PRINTF("counting miracle %u\n", count + 1); if (count && count < 50) { *cm_count = count + 1; diff --git a/src/rose/rose_build_lookaround.cpp b/src/rose/rose_build_lookaround.cpp index ba77b402..7c58f931 100644 --- a/src/rose/rose_build_lookaround.cpp +++ b/src/rose/rose_build_lookaround.cpp @@ -72,7 +72,7 @@ void getForwardReach(const NGHolder &g, u32 top, map &look) { if (v == g.startDs) { continue; } - if (g[e].top == top) { + if (contains(g[e].tops, top)) { curr.insert(v); } } diff --git a/src/rose/rose_build_merge.cpp b/src/rose/rose_build_merge.cpp index dbd580ed..01134736 100644 --- a/src/rose/rose_build_merge.cpp +++ b/src/rose/rose_build_merge.cpp @@ -1762,9 +1762,12 @@ void replaceTops(NGHolder &h, const map &top_mapping) { if (v == h.startDs) { continue; } - DEBUG_PRINTF("vertex %u has top %u\n", h[v].index, h[e].top); - assert(contains(top_mapping, h[e].top)); - h[e].top = top_mapping.at(h[e].top); + flat_set new_tops; + for (u32 t : h[e].tops) { + DEBUG_PRINTF("vertex %u has top %u\n", h[v].index, t); + new_tops.insert(top_mapping.at(t)); + } + h[e].tops = move(new_tops); } } diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index b9aeabd0..dcb2a4eb 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -903,12 +903,15 @@ RoseVertex RoseBuildImpl::cloneVertex(RoseVertex v) { } #ifndef NDEBUG -bool roseHasTops(const RoseGraph &g, RoseVertex v) { +bool roseHasTops(const RoseBuildImpl &build, RoseVertex v) { + const RoseGraph &g = build.g; assert(g[v].left); set graph_tops; - for (const auto &e : in_edges_range(v, g)) { - graph_tops.insert(g[e].rose_top); + if (!build.isRootSuccessor(v)) { + for (const auto &e : in_edges_range(v, g)) { + graph_tops.insert(g[e].rose_top); + } } return is_subset_of(graph_tops, all_tops(g[v].left)); @@ -1073,18 +1076,9 @@ bool has_non_eod_accepts(const suffix_id &s) { set all_tops(const suffix_id &s) { assert(s.graph() || s.castle() || s.haig() || s.dfa()); if (s.graph()) { - set tops; - const NGHolder &h = *s.graph(); - for (const auto &e : out_edges_range(h.start, h)) { - if (target(e, h) == h.startDs) { - continue; - } - tops.insert(h[e].top); - } - if (tops.empty()) { - tops.insert(0); // Vacuous graph, triggered on zero top. - } - return tops; + flat_set tops = getTops(*s.graph()); + assert(!tops.empty()); + return {tops.begin(), tops.end()}; } if (s.castle()) { @@ -1142,18 +1136,8 @@ depth findMaxWidth(const left_id &r) { set all_tops(const left_id &r) { assert(r.graph() || r.castle() || r.haig() || r.dfa()); if (r.graph()) { - set tops; - const NGHolder &h = *r.graph(); - for (const auto &e : out_edges_range(h.start, h)) { - if (target(e, h) == h.startDs) { - continue; - } - tops.insert(h[e].top); - } - if (tops.empty()) { - tops.insert(0); // Vacuous graph, triggered on zero top. - } - return tops; + flat_set tops = getTops(*r.graph()); + return {tops.begin(), tops.end()}; } if (r.castle()) { @@ -1348,6 +1332,49 @@ bool canImplementGraphs(const RoseBuildImpl &tbi) { return true; } + +bool hasOrphanedTops(const RoseBuildImpl &build) { + const RoseGraph &g = build.g; + + ue2::unordered_map > roses; + ue2::unordered_map > suffixes; + + for (auto v : vertices_range(g)) { + if (g[v].left) { + set &tops = roses[g[v].left]; + if (!build.isRootSuccessor(v)) { + // Tops for infixes come from the in-edges. + for (const auto &e : in_edges_range(v, g)) { + tops.insert(g[e].rose_top); + } + } + } + if (g[v].suffix) { + suffixes[g[v].suffix].insert(g[v].suffix.top); + } + } + + for (const auto &e : roses) { + if (all_tops(e.first) != e.second) { + DEBUG_PRINTF("rose tops (%s) don't match rose graph (%s)\n", + as_string_list(all_tops(e.first)).c_str(), + as_string_list(e.second).c_str()); + return true; + } + } + + for (const auto &e : suffixes) { + if (all_tops(e.first) != e.second) { + DEBUG_PRINTF("suffix tops (%s) don't match rose graph (%s)\n", + as_string_list(all_tops(e.first)).c_str(), + as_string_list(e.second).c_str()); + return true; + } + } + + return false; +} + #endif // NDEBUG } // namespace ue2 diff --git a/src/rose/rose_build_role_aliasing.cpp b/src/rose/rose_build_role_aliasing.cpp index b223fa92..66e44b3e 100644 --- a/src/rose/rose_build_role_aliasing.cpp +++ b/src/rose/rose_build_role_aliasing.cpp @@ -863,7 +863,7 @@ void pruneUnusedTops(CastleProto &castle, const RoseGraph &g, static void pruneUnusedTops(NGHolder &h, const RoseGraph &g, const set &verts) { - ue2::unordered_set used_tops; + ue2::flat_set used_tops; for (auto v : verts) { assert(g[v].left.graph.get() == &h); @@ -879,10 +879,13 @@ void pruneUnusedTops(NGHolder &h, const RoseGraph &g, if (v == h.startDs) { continue; // stylised edge, leave it alone. } - u32 top = h[e].top; - if (!contains(used_tops, top)) { - DEBUG_PRINTF("edge (start,%u) has unused top %u\n", - h[v].index, top); + flat_set pruned_tops; + auto pt_inserter = inserter(pruned_tops, pruned_tops.end()); + set_intersection(h[e].tops.begin(), h[e].tops.end(), + used_tops.begin(), used_tops.end(), pt_inserter); + h[e].tops = move(pruned_tops); + if (h[e].tops.empty()) { + DEBUG_PRINTF("edge (start,%u) has only unused tops\n", h[v].index); dead.push_back(e); } } @@ -1327,8 +1330,8 @@ bool attemptRoseGraphMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, DEBUG_PRINTF("winner %zu states\n", num_vertices(*b_h)); if (!setDistinctRoseTops(g, victim, *b_h, deque(1, a))) { - assert(roseHasTops(g, a)); - assert(roseHasTops(g, b)); + assert(roseHasTops(build, a)); + assert(roseHasTops(build, b)); return false; } @@ -1341,8 +1344,8 @@ bool attemptRoseGraphMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, for (const auto &e : in_edges_range(a, g)) { g[e] = a_props[source(e, g)]; } - assert(roseHasTops(g, a)); - assert(roseHasTops(g, b)); + assert(roseHasTops(build, a)); + assert(roseHasTops(build, b)); return false; } @@ -1365,8 +1368,8 @@ bool attemptRoseGraphMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, reduceImplementableGraph(*b_h, SOM_NONE, nullptr, build.cc); - assert(roseHasTops(g, a)); - assert(roseHasTops(g, b)); + assert(roseHasTops(build, a)); + assert(roseHasTops(build, b)); assert(isImplementableNFA(*b_h, nullptr, build.cc)); return true; } @@ -1417,8 +1420,8 @@ bool attemptRoseMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, return false; } - assert(roseHasTops(g, a)); - assert(roseHasTops(g, b)); + assert(roseHasTops(build, a)); + assert(roseHasTops(build, b)); if (a_left_id.graph() && b_left_id.graph()) { return attemptRoseGraphMerge(build, preds_same, a, b, trivialCasesOnly, @@ -1737,6 +1740,7 @@ void leftMergePass(CandidateSet &candidates, RoseBuildImpl &build, } DEBUG_PRINTF("%zu candidates remaining\n", candidates.size()); + assert(!hasOrphanedTops(build)); } // Can't merge vertices with different root predecessors. @@ -1952,6 +1956,7 @@ void rightMergePass(CandidateSet &candidates, RoseBuildImpl &build, } DEBUG_PRINTF("%zu candidates remaining\n", candidates.size()); + assert(!hasOrphanedTops(build)); } /** @@ -2017,6 +2022,7 @@ void filterDiamondCandidates(RoseGraph &g, CandidateSet &candidates) { void aliasRoles(RoseBuildImpl &build, bool mergeRoses) { const CompileContext &cc = build.cc; RoseGraph &g = build.g; + assert(!hasOrphanedTops(build)); if (!cc.grey.roseRoleAliasing || !cc.grey.roseGraphReduction) { return; @@ -2050,6 +2056,7 @@ void aliasRoles(RoseBuildImpl &build, bool mergeRoses) { DEBUG_PRINTF("killed %zu vertices\n", dead.size()); build.removeVertices(dead); + assert(!hasOrphanedTops(build)); } } // namespace ue2 diff --git a/src/util/ue2_containers.h b/src/util/ue2_containers.h index b6425f77..5bbf4cfe 100644 --- a/src/util/ue2_containers.h +++ b/src/util/ue2_containers.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: From 7d3eff8648361174282c497d653dfdf902b25ca6 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Wed, 14 Sep 2016 14:00:43 +1000 Subject: [PATCH 032/103] extern "C" for mask1bit table --- src/util/simd_utils.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/util/simd_utils.h b/src/util/simd_utils.h index 4bb055df..afa8c7f8 100644 --- a/src/util/simd_utils.h +++ b/src/util/simd_utils.h @@ -260,7 +260,13 @@ m128 loadbytes128(const void *ptr, unsigned int n) { return a; } +#ifdef __cplusplus +extern "C" { +#endif extern const u8 simd_onebit_masks[]; +#ifdef __cplusplus +} +#endif static really_inline m128 mask1bit128(unsigned int n) { From 648a3c48247ad9ba1ec0386843a9813a47436050 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Thu, 15 Sep 2016 11:22:29 +1000 Subject: [PATCH 033/103] UE-3025: There is no need to prune tops from non-triggered graphs --- src/nfagraph/ng_limex.cpp | 3 +++ src/rose/rose_build_role_aliasing.cpp | 8 ++++++++ 2 files changed, 11 insertions(+) diff --git a/src/nfagraph/ng_limex.cpp b/src/nfagraph/ng_limex.cpp index b9f3434b..c6e4c24e 100644 --- a/src/nfagraph/ng_limex.cpp +++ b/src/nfagraph/ng_limex.cpp @@ -512,6 +512,9 @@ u32 isImplementableNFA(const NGHolder &g, const ReportManager *rm, if (!cc.grey.allowLimExNFA) { return false; } + + assert(!can_never_match(g)); + // Quick check: we can always implement an NFA with less than NFA_MAX_STATES // states. Note that top masks can generate extra states, so we account for // those here too. diff --git a/src/rose/rose_build_role_aliasing.cpp b/src/rose/rose_build_role_aliasing.cpp index 66e44b3e..6a0c767d 100644 --- a/src/rose/rose_build_role_aliasing.cpp +++ b/src/rose/rose_build_role_aliasing.cpp @@ -863,6 +863,12 @@ void pruneUnusedTops(CastleProto &castle, const RoseGraph &g, static void pruneUnusedTops(NGHolder &h, const RoseGraph &g, const set &verts) { + if (!is_triggered(h)) { + DEBUG_PRINTF("not triggered, no tops\n"); + return; + } + assert(isCorrectlyTopped(h)); + DEBUG_PRINTF("prunning unused tops\n"); ue2::flat_set used_tops; for (auto v : verts) { assert(g[v].left.graph.get() == &h); @@ -2023,6 +2029,7 @@ void aliasRoles(RoseBuildImpl &build, bool mergeRoses) { const CompileContext &cc = build.cc; RoseGraph &g = build.g; assert(!hasOrphanedTops(build)); + assert(canImplementGraphs(build)); if (!cc.grey.roseRoleAliasing || !cc.grey.roseGraphReduction) { return; @@ -2057,6 +2064,7 @@ void aliasRoles(RoseBuildImpl &build, bool mergeRoses) { DEBUG_PRINTF("killed %zu vertices\n", dead.size()); build.removeVertices(dead); assert(!hasOrphanedTops(build)); + assert(canImplementGraphs(build)); } } // namespace ue2 From 6ed30194ce03ccee632dc8f2112a3f04b14c0a81 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Thu, 15 Sep 2016 16:45:00 +1000 Subject: [PATCH 034/103] Disable asserts everywhere when we say so --- CMakeLists.txt | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4ac9e52d..2c53d6c5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -133,12 +133,6 @@ CMAKE_DEPENDENT_OPTION(DUMP_SUPPORT "Dump code support; normally on, except in r CMAKE_DEPENDENT_OPTION(DISABLE_ASSERTS "Disable assert(); Asserts are enabled in debug builds, disabled in release builds" OFF "NOT RELEASE_BUILD" ON) -if (DISABLE_ASSERTS) - if (CMAKE_BUILD_TYPE STREQUAL "DEBUG") - add_definitions(-DNDEBUG) - endif() -endif() - option(WINDOWS_ICC "Use Intel C++ Compiler on Windows, default off, requires ICC to be set in project" OFF) # TODO: per platform config files? @@ -159,6 +153,11 @@ if(MSVC OR MSVC_IDE) endif() string(REPLACE "/RTC1" "" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}") string(REPLACE "/RTC1" "" CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG}") + + if (DISABLE_ASSERTS) + set(CMAKE_C_FLAGS_DEBUG "/DNDEBUG ${CMAKE_C_FLAGS_DEBUG}") + set(CMAKE_CXX_FLAGS_DEBUG "/DNDEBUG ${CMAKE_CXX_FLAGS_DEBUG}") + endif () endif() else() @@ -204,6 +203,11 @@ else() set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Werror") endif() + if (DISABLE_ASSERTS) + set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -DNDEBUG") + set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -DNDEBUG") + endif() + if (NOT CMAKE_C_FLAGS MATCHES .*march.*) message(STATUS "Building for current host CPU") set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -march=native -mtune=native") From 68bf473e2e51112ff62baa1a3e401aa00e3f47b8 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 7 Sep 2016 15:59:23 +1000 Subject: [PATCH 035/103] fdr: move long literal handling into Rose Move the hash table used for long literal support in streaming mode from FDR to Rose, and introduce new instructions CHECK_LONG_LIT and CHECK_LONG_LIT_NOCASE for doing literal confirm for long literals. This simplifies FDR confirm, and guarantees that HWLM matchers will only be used for literals < 256 bytes long. --- CMakeLists.txt | 7 +- src/fdr/fdr.c | 9 +- src/fdr/fdr.h | 7 +- src/fdr/fdr_compile.cpp | 23 +- src/fdr/fdr_confirm_compile.cpp | 2 +- src/fdr/fdr_confirm_runtime.h | 9 +- src/fdr/fdr_internal.h | 2 - src/fdr/fdr_streaming_compile.cpp | 425 ----------------------------- src/fdr/fdr_streaming_internal.h | 152 ----------- src/fdr/fdr_streaming_runtime.h | 368 ------------------------- src/hwlm/hwlm.c | 10 +- src/hwlm/hwlm.h | 5 +- src/hwlm/hwlm_build.cpp | 7 +- src/hwlm/hwlm_build.h | 6 +- src/hwlm/hwlm_literal.cpp | 1 + src/hwlm/hwlm_literal.h | 3 + src/rose/init.c | 5 - src/rose/program_runtime.h | 92 +++++++ src/rose/rose_build_bytecode.cpp | 351 +++++++++++++++++++++++- src/rose/rose_build_compile.cpp | 174 +----------- src/rose/rose_build_dump.cpp | 16 +- src/rose/rose_build_impl.h | 5 + src/rose/rose_build_long_lit.cpp | 348 ++++++++++++++++++++++++ src/rose/rose_build_long_lit.h | 51 ++++ src/rose/rose_build_matchers.cpp | 51 ++-- src/rose/rose_build_matchers.h | 7 +- src/rose/rose_build_program.cpp | 18 ++ src/rose/rose_build_program.h | 57 ++++ src/rose/rose_dump.cpp | 53 +++- src/rose/rose_internal.h | 95 ++++++- src/rose/rose_program.h | 28 +- src/rose/runtime.h | 4 +- src/rose/stream.c | 27 +- src/rose/stream_long_lit.h | 434 ++++++++++++++++++++++++++++++ src/rose/stream_long_lit_hash.h | 65 +++++ src/runtime.c | 13 +- src/scratch.h | 20 ++ src/util/ue2string.h | 23 ++ unit/internal/fdr.cpp | 90 +------ unit/internal/fdr_flood.cpp | 6 +- 40 files changed, 1759 insertions(+), 1310 deletions(-) delete mode 100644 src/fdr/fdr_streaming_compile.cpp delete mode 100644 src/fdr/fdr_streaming_internal.h delete mode 100644 src/fdr/fdr_streaming_runtime.h create mode 100644 src/rose/rose_build_long_lit.cpp create mode 100644 src/rose/rose_build_long_lit.h create mode 100644 src/rose/stream_long_lit.h create mode 100644 src/rose/stream_long_lit_hash.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 2c53d6c5..c9a6f8db 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -425,7 +425,6 @@ set (hs_exec_SRCS src/fdr/fdr_internal.h src/fdr/fdr_confirm.h src/fdr/fdr_confirm_runtime.h - src/fdr/fdr_streaming_runtime.h src/fdr/flood_runtime.h src/fdr/fdr_loadval.h src/fdr/teddy.c @@ -531,6 +530,8 @@ set (hs_exec_SRCS src/rose/init.h src/rose/init.c src/rose/stream.c + src/rose/stream_long_lit.h + src/rose/stream_long_lit_hash.h src/rose/match.h src/rose/match.c src/rose/miracle.h @@ -612,8 +613,6 @@ SET (hs_SRCS src/fdr/fdr_engine_description.cpp src/fdr/fdr_engine_description.h src/fdr/fdr_internal.h - src/fdr/fdr_streaming_compile.cpp - src/fdr/fdr_streaming_internal.h src/fdr/flood_compile.cpp src/fdr/teddy_compile.cpp src/fdr/teddy_compile.h @@ -874,6 +873,8 @@ SET (hs_SRCS src/rose/rose_build_impl.h src/rose/rose_build_infix.cpp src/rose/rose_build_infix.h + src/rose/rose_build_long_lit.cpp + src/rose/rose_build_long_lit.h src/rose/rose_build_lookaround.cpp src/rose/rose_build_lookaround.h src/rose/rose_build_matchers.cpp diff --git a/src/fdr/fdr.c b/src/fdr/fdr.c index 4230c2b1..23416c70 100644 --- a/src/fdr/fdr.c +++ b/src/fdr/fdr.c @@ -31,7 +31,6 @@ #include "fdr_confirm_runtime.h" #include "fdr_internal.h" #include "fdr_loadval.h" -#include "fdr_streaming_runtime.h" #include "flood_runtime.h" #include "teddy.h" #include "teddy_internal.h" @@ -809,8 +808,6 @@ hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len, len, hbuf, 0, - hbuf, // nocase - 0, start, cb, ctxt, @@ -828,14 +825,12 @@ hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len, hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf, size_t hlen, const u8 *buf, size_t len, size_t start, HWLMCallback cb, void *ctxt, - hwlm_group_t groups, u8 *stream_state) { + hwlm_group_t groups) { struct FDR_Runtime_Args a = { buf, len, hbuf, hlen, - hbuf, // nocase - start same as caseful, override later if needed - hlen, // nocase start, cb, ctxt, @@ -844,7 +839,6 @@ hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf, * the history buffer (they may be garbage). */ hbuf ? unaligned_load_u64a(hbuf + hlen - sizeof(u64a)) : (u64a)0 }; - fdrUnpackState(fdr, &a, stream_state); hwlm_error_t ret; if (unlikely(a.start_offset >= a.len)) { @@ -854,6 +848,5 @@ hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf, ret = funcs[fdr->engineID](fdr, &a, groups); } - fdrPackState(fdr, &a, stream_state); return ret; } diff --git a/src/fdr/fdr.h b/src/fdr/fdr.h index e0aa594f..e2b80056 100644 --- a/src/fdr/fdr.h +++ b/src/fdr/fdr.h @@ -43,10 +43,6 @@ extern "C" { struct FDR; -/** \brief Returns non-zero if the contents of the stream state indicate that - * there is active FDR history beyond the regularly used history. */ -u32 fdrStreamStateActive(const struct FDR *fdr, const u8 *stream_state); - /** * \brief Block-mode scan. * @@ -74,12 +70,11 @@ hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len, * \param cb Callback to call when a match is found. * \param ctxt Caller-provided context pointer supplied to callback on match. * \param groups Initial groups mask. - * \param stream_state Persistent stream state for use by FDR. */ hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf, size_t hlen, const u8 *buf, size_t len, size_t start, HWLMCallback cb, void *ctxt, - hwlm_group_t groups, u8 *stream_state); + hwlm_group_t groups); #ifdef __cplusplus } diff --git a/src/fdr/fdr_compile.cpp b/src/fdr/fdr_compile.cpp index 89a0ff72..937513a8 100644 --- a/src/fdr/fdr_compile.cpp +++ b/src/fdr/fdr_compile.cpp @@ -39,6 +39,7 @@ #include "teddy_engine_description.h" #include "grey.h" #include "ue2common.h" +#include "hwlm/hwlm_build.h" #include "util/alloc.h" #include "util/compare.h" #include "util/dump_mask.h" @@ -495,14 +496,34 @@ FDRCompiler::build(pair, size_t> &link) { } // namespace +static +size_t maxMaskLen(const vector &lits) { + size_t rv = 0; + for (const auto &lit : lits) { + rv = max(rv, lit.msk.size()); + } + return rv; +} + +static +void setHistoryRequired(hwlmStreamingControl &stream_ctl, + const vector &lits) { + size_t max_mask_len = maxMaskLen(lits); + + // we want enough history to manage the longest literal and the longest + // mask. + stream_ctl.literal_history_required = max(maxLen(lits), max_mask_len) - 1; +} + static aligned_unique_ptr fdrBuildTableInternal(const vector &lits, bool make_small, const target_t &target, const Grey &grey, u32 hint, hwlmStreamingControl *stream_control) { pair, size_t> link(nullptr, 0); + if (stream_control) { - link = fdrBuildTableStreaming(lits, *stream_control); + setHistoryRequired(*stream_control, lits); } DEBUG_PRINTF("cpu has %s\n", target.has_avx2() ? "avx2" : "no-avx2"); diff --git a/src/fdr/fdr_confirm_compile.cpp b/src/fdr/fdr_confirm_compile.cpp index 23437fe2..f84ed402 100644 --- a/src/fdr/fdr_confirm_compile.cpp +++ b/src/fdr/fdr_confirm_compile.cpp @@ -339,7 +339,7 @@ getFDRConfirm(const vector &lits, bool applyOneCharOpt, ptr = ROUNDUP_PTR(ptr, alignof(LitInfo)); if (next(i) == e) { - finalLI.next = 0x0; + finalLI.next = 0; } else { // our next field represents an adjustment on top of // current address + the actual size of the literal diff --git a/src/fdr/fdr_confirm_runtime.h b/src/fdr/fdr_confirm_runtime.h index 9b1df593..2b0cd595 100644 --- a/src/fdr/fdr_confirm_runtime.h +++ b/src/fdr/fdr_confirm_runtime.h @@ -74,10 +74,8 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a if (loc < buf) { u32 full_overhang = buf - loc; - const u8 *history = caseless ? a->buf_history_nocase - : a->buf_history; - size_t len_history = caseless ? a->len_history_nocase - : a->len_history; + const u8 *history = a->buf_history; + size_t len_history = a->len_history; // can't do a vectored confirm either if we don't have // the bytes @@ -123,8 +121,7 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a const u8 *loc2 = buf + i - li->extended_size + 1 - pullBackAmount; if (loc2 < buf) { u32 full_overhang = buf - loc2; - size_t len_history = caseless ? a->len_history_nocase - : a->len_history; + size_t len_history = a->len_history; if (full_overhang > len_history) { goto out; } diff --git a/src/fdr/fdr_internal.h b/src/fdr/fdr_internal.h index 6272b69e..3bf82837 100644 --- a/src/fdr/fdr_internal.h +++ b/src/fdr/fdr_internal.h @@ -100,8 +100,6 @@ struct FDR_Runtime_Args { size_t len; const u8 *buf_history; size_t len_history; - const u8 *buf_history_nocase; - size_t len_history_nocase; size_t start_offset; HWLMCallback cb; void *ctxt; diff --git a/src/fdr/fdr_streaming_compile.cpp b/src/fdr/fdr_streaming_compile.cpp deleted file mode 100644 index b2e1656c..00000000 --- a/src/fdr/fdr_streaming_compile.cpp +++ /dev/null @@ -1,425 +0,0 @@ -/* - * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "fdr_internal.h" -#include "fdr_streaming_internal.h" -#include "fdr_compile_internal.h" -#include "hwlm/hwlm_build.h" -#include "util/alloc.h" -#include "util/bitutils.h" -#include "util/target_info.h" -#include "util/verify_types.h" - -#include -#include -#include -#include -#include -#include - -#include - -using namespace std; -using boost::dynamic_bitset; - -namespace ue2 { - -namespace { -struct LongLitOrder { - bool operator()(const hwlmLiteral &i1, const hwlmLiteral &i2) const { - if (i1.nocase != i2.nocase) { - return i1.nocase < i2.nocase; - } else { - return i1.s < i2.s; - } - } -}; -} - -static -bool hwlmLitEqual(const hwlmLiteral &l1, const hwlmLiteral &l2) { - return l1.s == l2.s && l1.nocase == l2.nocase; -} - -static -u32 roundUpToPowerOfTwo(u32 x) { - x -= 1; - x |= (x >> 1); - x |= (x >> 2); - x |= (x >> 4); - x |= (x >> 8); - x |= (x >> 16); - return x + 1; -} - -/** - * \brief Creates a long literals vector containing all literals of length > max_len. - * - * The last char of each literal is trimmed as we're not interested in full - * matches, only partial matches. - * - * Literals are sorted (by caseful/caseless, then lexicographical order) and - * made unique. - * - * The ID of each literal is set to its position in the vector. - * - * \return False if there aren't any long literals. - */ -static -bool setupLongLits(const vector &lits, - vector &long_lits, size_t max_len) { - long_lits.reserve(lits.size()); - for (const auto &lit : lits) { - if (lit.s.length() > max_len) { - hwlmLiteral tmp = lit; // copy - tmp.s.pop_back(); - tmp.id = 0; // recalc later - tmp.groups = 0; // filled in later by hash bucket(s) - long_lits.push_back(move(tmp)); - } - } - - if (long_lits.empty()) { - return false; - } - - // sort long_literals by caseful/caseless and in lexicographical order, - // remove duplicates - stable_sort(long_lits.begin(), long_lits.end(), LongLitOrder()); - auto new_end = unique(long_lits.begin(), long_lits.end(), hwlmLitEqual); - long_lits.erase(new_end, long_lits.end()); - - // fill in ids; not currently used - for (auto i = long_lits.begin(), e = long_lits.end(); i != e; ++i) { - i->id = distance(long_lits.begin(), i); - } - return true; -} - -// boundaries are the 'start' boundaries for each 'mode' -// so boundary[CASEFUL] is the index one above the largest caseful index -// positions[CASEFUL] is the # of positions in caseful strings (stream) -// hashedPositions[CASEFUL] is the # of positions in caseful strings -// (not returned - a temporary) -// hashEntries[CASEFUL] is the # of positions hashed for caseful strings -// (rounded up to the nearest power of two) -static -void analyzeLits(const vector &long_lits, size_t max_len, - u32 *boundaries, u32 *positions, u32 *hashEntries) { - u32 hashedPositions[MAX_MODES]; - - for (u32 m = CASEFUL; m < MAX_MODES; ++m) { - boundaries[m] = verify_u32(long_lits.size()); - positions[m] = 0; - hashedPositions[m] = 0; - } - - for (auto i = long_lits.begin(), e = long_lits.end(); i != e; ++i) { - if (i->nocase) { - boundaries[CASEFUL] = verify_u32(distance(long_lits.begin(), i)); - break; - } - } - - for (const auto &lit : long_lits) { - Modes m = lit.nocase ? CASELESS : CASEFUL; - for (u32 j = 1; j < lit.s.size() - max_len + 1; j++) { - hashedPositions[m]++; - } - positions[m] += lit.s.size(); - } - - for (u32 m = CASEFUL; m < MAX_MODES; m++) { - hashEntries[m] = hashedPositions[m] - ? roundUpToPowerOfTwo(MAX(4096, hashedPositions[m])) - : 0; - } - -#ifdef DEBUG_COMPILE - printf("analyzeLits:\n"); - for (Modes m = CASEFUL; m < MAX_MODES; m++) { - printf("mode %s boundary %d positions %d hashedPositions %d " - "hashEntries %d\n", - (m == CASEFUL) ? "caseful" : "caseless", boundaries[m], - positions[m], hashedPositions[m], hashEntries[m]); - } - printf("\n"); -#endif -} - -static -u32 hashLit(const hwlmLiteral &l, u32 offset, size_t max_len, Modes m) { - return streaming_hash((const u8 *)l.s.c_str() + offset, max_len, m); -} - -// sort by 'distance from start' -namespace { -struct OffsetIDFromEndOrder { - const vector &lits; // not currently used - explicit OffsetIDFromEndOrder(const vector &lits_in) - : lits(lits_in) {} - bool operator()(const pair &i1, const pair &i2) const { - if (i1.second != i2.second) { - // longest is 'first', so > not < - return i1.second > i2.second; - } - return i1.first < i2.first; - } -}; -} - -static -void fillHashes(const vector &long_lits, size_t max_len, - FDRSHashEntry *tab, size_t numEntries, Modes mode, - map &litToOffsetVal) { - const u32 nbits = lg2(numEntries); - map > > bucketToLitOffPairs; - map bucketToBitfield; - - for (const auto &lit : long_lits) { - if ((mode == CASELESS) != lit.nocase) { - continue; - } - for (u32 j = 1; j < lit.s.size() - max_len + 1; j++) { - u32 h = hashLit(lit, j, max_len, mode); - u32 h_ent = h & ((1U << nbits) - 1); - u32 h_low = (h >> nbits) & 63; - bucketToLitOffPairs[h_ent].emplace_back(lit.id, j); - bucketToBitfield[h_ent] |= (1ULL << h_low); - } - } - - // this used to be a set, but a bitset is much much faster given that - // we're using it only for membership testing. - dynamic_bitset<> filledBuckets(numEntries); // all bits zero by default. - - // sweep out bitfield entries and save the results swapped accordingly - // also, anything with bitfield entries is put in filledBuckets - for (const auto &m : bucketToBitfield) { - const u32 &bucket = m.first; - const u64a &contents = m.second; - tab[bucket].bitfield = contents; - filledBuckets.set(bucket); - } - - // store out all our chains based on free values in our hash table. - // find nearest free locations that are empty (there will always be more - // entries than strings, at present) - for (auto &m : bucketToLitOffPairs) { - u32 bucket = m.first; - deque> &d = m.second; - - // sort d by distance of the residual string (len minus our depth into - // the string). We need to put the 'furthest back' string first... - stable_sort(d.begin(), d.end(), OffsetIDFromEndOrder(long_lits)); - - while (1) { - // first time through is always at bucket, then we fill in links - filledBuckets.set(bucket); - FDRSHashEntry *ent = &tab[bucket]; - u32 lit_id = d.front().first; - u32 offset = d.front().second; - - ent->state = verify_u32(litToOffsetVal[lit_id] + offset + max_len); - ent->link = (u32)LINK_INVALID; - - d.pop_front(); - if (d.empty()) { - break; - } - // now, if there is another value - // find a bucket for it and put in 'bucket' and repeat - // all we really need to do is find something not in filledBuckets, - // ideally something close to bucket - // we search backward and forward from bucket, trying to stay as - // close as possible. - UNUSED bool found = false; - int bucket_candidate = 0; - for (u32 k = 1; k < numEntries * 2; k++) { - bucket_candidate = bucket + (((k & 1) == 0) - ? (-(int)k / 2) : (k / 2)); - if (bucket_candidate < 0 || - (size_t)bucket_candidate >= numEntries) { - continue; - } - if (!filledBuckets.test(bucket_candidate)) { - found = true; - break; - } - } - - assert(found); - bucket = bucket_candidate; - ent->link = bucket; - } - } -} - -static -size_t maxMaskLen(const vector &lits) { - size_t rv = 0; - for (const auto &lit : lits) { - rv = max(rv, lit.msk.size()); - } - return rv; -} - -pair, size_t> -fdrBuildTableStreaming(const vector &lits, - hwlmStreamingControl &stream_control) { - // refuse to compile if we are forced to have smaller than minimum - // history required for long-literal support, full stop - // otherwise, choose the maximum of the preferred history quantity - // (currently a fairly extravagant 32) or the already used history - // quantity - subject to the limitation of stream_control.history_max - - const size_t MIN_HISTORY_REQUIRED = 32; - - if (MIN_HISTORY_REQUIRED > stream_control.history_max) { - throw std::logic_error("Cannot set history to minimum history required"); - } - - size_t max_len = - MIN(stream_control.history_max, - MAX(MIN_HISTORY_REQUIRED, stream_control.history_min)); - assert(max_len >= MIN_HISTORY_REQUIRED); - size_t max_mask_len = maxMaskLen(lits); - - vector long_lits; - if (!setupLongLits(lits, long_lits, max_len) || false) { - // "Don't need to do anything" path, not really a fail - DEBUG_PRINTF("Streaming literal path produces no table\n"); - - // we want enough history to manage the longest literal and the longest - // mask. - stream_control.literal_history_required = - max(maxLen(lits), max_mask_len) - 1; - stream_control.literal_stream_state_required = 0; - return {nullptr, size_t{0}}; - } - - // Ensure that we have enough room for the longest mask. - if (max_mask_len) { - max_len = max(max_len, max_mask_len - 1); - } - - u32 boundary[MAX_MODES]; - u32 positions[MAX_MODES]; - u32 hashEntries[MAX_MODES]; - - analyzeLits(long_lits, max_len, boundary, positions, hashEntries); - - // first assess the size and find our caseless threshold - size_t headerSize = ROUNDUP_16(sizeof(FDRSTableHeader)); - - size_t litTabOffset = headerSize; - - size_t litTabNumEntries = long_lits.size() + 1; - size_t litTabSize = ROUNDUP_16(litTabNumEntries * sizeof(FDRSLiteral)); - - size_t wholeLitTabOffset = litTabOffset + litTabSize; - size_t totalWholeLitTabSize = ROUNDUP_16(positions[CASEFUL] + - positions[CASELESS]); - - size_t htOffset[MAX_MODES]; - size_t htSize[MAX_MODES]; - - htOffset[CASEFUL] = wholeLitTabOffset + totalWholeLitTabSize; - htSize[CASEFUL] = hashEntries[CASEFUL] * sizeof(FDRSHashEntry); - htOffset[CASELESS] = htOffset[CASEFUL] + htSize[CASEFUL]; - htSize[CASELESS] = hashEntries[CASELESS] * sizeof(FDRSHashEntry); - - size_t tabSize = ROUNDUP_16(htOffset[CASELESS] + htSize[CASELESS]); - - // need to add +2 to both of these to allow space for the actual largest - // value as well as handling the fact that we add one to the space when - // storing out a position to allow zero to mean "no stream state value" - u8 streamBits[MAX_MODES]; - streamBits[CASEFUL] = lg2(roundUpToPowerOfTwo(positions[CASEFUL] + 2)); - streamBits[CASELESS] = lg2(roundUpToPowerOfTwo(positions[CASELESS] + 2)); - u32 tot_state_bytes = (streamBits[CASEFUL] + streamBits[CASELESS] + 7) / 8; - - auto secondaryTable = aligned_zmalloc_unique(tabSize); - assert(secondaryTable); // otherwise would have thrown std::bad_alloc - - // then fill it in - u8 * ptr = secondaryTable.get(); - FDRSTableHeader * header = (FDRSTableHeader *)ptr; - // fill in header - header->pseudoEngineID = (u32)0xffffffff; - header->N = verify_u8(max_len); // u8 so doesn't matter; won't go > 255 - for (u32 m = CASEFUL; m < MAX_MODES; ++m) { - header->boundary[m] = boundary[m]; - header->hashOffset[m] = verify_u32(htOffset[m]); - header->hashNBits[m] = lg2(hashEntries[m]); - header->streamStateBits[m] = streamBits[m]; - } - assert(tot_state_bytes < sizeof(u64a)); - header->streamStateBytes = verify_u8(tot_state_bytes); // u8 - - ptr += headerSize; - - // now fill in the rest - - FDRSLiteral * litTabPtr = (FDRSLiteral *)ptr; - ptr += litTabSize; - - map litToOffsetVal; - for (auto i = long_lits.begin(), e = long_lits.end(); i != e; ++i) { - u32 entry = verify_u32(i - long_lits.begin()); - u32 offset = verify_u32(ptr - secondaryTable.get()); - - // point the table entry to the string location - litTabPtr[entry].offset = offset; - - litToOffsetVal[entry] = offset; - - // copy the string into the string location - memcpy(ptr, i->s.c_str(), i->s.size()); - - ptr += i->s.size(); // and the string location - } - - // fill in final lit table entry with current ptr (serves as end value) - litTabPtr[long_lits.size()].offset = verify_u32(ptr - secondaryTable.get()); - - // fill hash tables - ptr = secondaryTable.get() + htOffset[CASEFUL]; - for (u32 m = CASEFUL; m < MAX_MODES; ++m) { - fillHashes(long_lits, max_len, (FDRSHashEntry *)ptr, hashEntries[m], - (Modes)m, litToOffsetVal); - ptr += htSize[m]; - } - - // tell the world what we did - stream_control.literal_history_required = max_len; - stream_control.literal_stream_state_required = tot_state_bytes; - return {move(secondaryTable), tabSize}; -} - -} // namespace ue2 diff --git a/src/fdr/fdr_streaming_internal.h b/src/fdr/fdr_streaming_internal.h deleted file mode 100644 index 11b07b56..00000000 --- a/src/fdr/fdr_streaming_internal.h +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef FDR_STREAMING_INTERNAL_H -#define FDR_STREAMING_INTERNAL_H - -#include "ue2common.h" -#include "fdr_internal.h" -#include "util/unaligned.h" - -// tertiary table: -// a header (FDRSTableHeader) -// long_lits.size()+1 entries holding an offset to the string in the -// 'whole literal table' (FDRSLiteral structure) -// the whole literal table - every string packed in (freeform) -// hash table (caseful) (FDRSHashEntry) -// hash table (caseless) (FDRSHashEntry) - -enum Modes { - CASEFUL = 0, - CASELESS = 1, - MAX_MODES = 2 -}; - -// We have one of these structures hanging off the 'link' of our secondary -// FDR table that handles streaming strings -struct FDRSTableHeader { - u32 pseudoEngineID; // set to 0xffffffff to indicate this isn't an FDR - - // string id one beyond the maximum entry for this type of literal - // boundary[CASEFUL] is the end of the caseful literals - // boundary[CASELESS] is the end of the caseless literals and one beyond - // the largest literal id (the size of the littab) - u32 boundary[MAX_MODES]; - - // offsets are 0 if no such table exists - // offset from the base of the tertiary structure to the hash table - u32 hashOffset[MAX_MODES]; - u32 hashNBits[MAX_MODES]; // lg2 of the size of the hash table - - u8 streamStateBits[MAX_MODES]; - u8 streamStateBytes; // total size of packed stream state in bytes - u8 N; // prefix lengths - u16 pad; -}; - -// One of these structures per literal entry in our secondary FDR table. -struct FDRSLiteral { - u32 offset; - // potentially - another u32 to point to the 'next lesser included literal' - // which would be a literal that overlaps this one in such a way that a - // failure to match _this_ literal can leave us in a state that we might - // still match that literal. Offset information might also be called for, - // in which case we might be wanting to use a FDRSLiteralOffset -}; - -typedef u32 FDRSLiteralOffset; - -#define LINK_INVALID 0xffffffff - -// One of these structures per hash table entry in our secondary FDR table -struct FDRSHashEntry { - u64a bitfield; - FDRSLiteralOffset state; - u32 link; -}; - -static really_inline -u32 get_start_lit_idx(const struct FDRSTableHeader * h, enum Modes m) { - return m == CASEFUL ? 0 : h->boundary[m-1]; -} - -static really_inline -u32 get_end_lit_idx(const struct FDRSTableHeader * h, enum Modes m) { - return h->boundary[m]; -} - -static really_inline -const struct FDRSLiteral * getLitTab(const struct FDRSTableHeader * h) { - return (const struct FDRSLiteral *) (((const u8 *)h) + - ROUNDUP_16(sizeof(struct FDRSTableHeader))); -} - -static really_inline -u32 getBaseOffsetOfLits(const struct FDRSTableHeader * h, enum Modes m) { - return getLitTab(h)[get_start_lit_idx(h, m)].offset; -} - -static really_inline -u32 packStateVal(const struct FDRSTableHeader * h, enum Modes m, u32 v) { - return v - getBaseOffsetOfLits(h, m) + 1; -} - -static really_inline -u32 unpackStateVal(const struct FDRSTableHeader * h, enum Modes m, u32 v) { - return v + getBaseOffsetOfLits(h, m) - 1; -} - -static really_inline -u32 has_bit(const struct FDRSHashEntry * ent, u32 bit) { - return (ent->bitfield >> bit) & 0x1; -} - -static really_inline -u32 streaming_hash(const u8 *ptr, UNUSED size_t len, enum Modes mode) { - const u64a CASEMASK = 0xdfdfdfdfdfdfdfdfULL; - const u64a MULTIPLIER = 0x0b4e0ef37bc32127ULL; - assert(len >= 32); - - u64a v1 = unaligned_load_u64a(ptr); - u64a v2 = unaligned_load_u64a(ptr + 8); - u64a v3 = unaligned_load_u64a(ptr + 16); - if (mode == CASELESS) { - v1 &= CASEMASK; - v2 &= CASEMASK; - v3 &= CASEMASK; - } - v1 *= MULTIPLIER; - v2 *= (MULTIPLIER*MULTIPLIER); - v3 *= (MULTIPLIER*MULTIPLIER*MULTIPLIER); - v1 >>= 32; - v2 >>= 32; - v3 >>= 32; - return v1 ^ v2 ^ v3; -} - -#endif diff --git a/src/fdr/fdr_streaming_runtime.h b/src/fdr/fdr_streaming_runtime.h deleted file mode 100644 index 8e264c76..00000000 --- a/src/fdr/fdr_streaming_runtime.h +++ /dev/null @@ -1,368 +0,0 @@ -/* - * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef FDR_STREAMING_RUNTIME_H -#define FDR_STREAMING_RUNTIME_H - -#include "fdr_streaming_internal.h" -#include "util/partial_store.h" - -#include - -static really_inline -const struct FDRSTableHeader * getSHDR(const struct FDR * fdr) { - const u8 * linkPtr = ((const u8 *)fdr) + fdr->link; - // test if it's not really a engineID, but a 'pseudo engine id' - assert(*(const u32 *)linkPtr == 0xffffffff); - assert(linkPtr); - return (const struct FDRSTableHeader *)linkPtr; -} - -// Reads from stream state and unpacks values into stream state table. -static really_inline -void getStreamStates(const struct FDRSTableHeader * streamingTable, - const u8 * stream_state, u32 * table) { - assert(streamingTable); - assert(stream_state); - assert(table); - - u8 ss_bytes = streamingTable->streamStateBytes; - u8 ssb = streamingTable->streamStateBits[CASEFUL]; - UNUSED u8 ssb_nc = streamingTable->streamStateBits[CASELESS]; - assert(ss_bytes == (ssb + ssb_nc + 7) / 8); - -#if defined(ARCH_32_BIT) - // On 32-bit hosts, we may be able to avoid having to do any u64a - // manipulation at all. - if (ss_bytes <= 4) { - u32 ssb_mask = (1U << ssb) - 1; - u32 streamVal = partial_load_u32(stream_state, ss_bytes); - table[CASEFUL] = (u32)(streamVal & ssb_mask); - table[CASELESS] = (u32)(streamVal >> ssb); - return; - } -#endif - - u64a ssb_mask = (1ULL << ssb) - 1; - u64a streamVal = partial_load_u64a(stream_state, ss_bytes); - table[CASEFUL] = (u32)(streamVal & ssb_mask); - table[CASELESS] = (u32)(streamVal >> (u64a)ssb); -} - -#ifndef NDEBUG -// Defensive checking (used in assert) that these table values don't overflow -// outside the range available. -static really_inline UNUSED -u32 streamingTableOverflow(u32 * table, u8 ssb, u8 ssb_nc) { - u32 ssb_mask = (1ULL << (ssb)) - 1; - if (table[CASEFUL] & ~ssb_mask) { - return 1; - } - u32 ssb_nc_mask = (1ULL << (ssb_nc)) - 1; - if (table[CASELESS] & ~ssb_nc_mask) { - return 1; - } - return 0; -} -#endif - -// Reads from stream state table and packs values into stream state. -static really_inline -void setStreamStates(const struct FDRSTableHeader * streamingTable, - u8 * stream_state, u32 * table) { - assert(streamingTable); - assert(stream_state); - assert(table); - - u8 ss_bytes = streamingTable->streamStateBytes; - u8 ssb = streamingTable->streamStateBits[CASEFUL]; - UNUSED u8 ssb_nc = streamingTable->streamStateBits[CASELESS]; - assert(ss_bytes == (ssb + ssb_nc + 7) / 8); - assert(!streamingTableOverflow(table, ssb, ssb_nc)); - -#if defined(ARCH_32_BIT) - // On 32-bit hosts, we may be able to avoid having to do any u64a - // manipulation at all. - if (ss_bytes <= 4) { - u32 stagingStreamState = table[CASEFUL]; - stagingStreamState |= (table[CASELESS] << ssb); - - partial_store_u32(stream_state, stagingStreamState, ss_bytes); - return; - } -#endif - - u64a stagingStreamState = (u64a)table[CASEFUL]; - stagingStreamState |= (u64a)table[CASELESS] << ((u64a)ssb); - partial_store_u64a(stream_state, stagingStreamState, ss_bytes); -} - -u32 fdrStreamStateActive(const struct FDR * fdr, const u8 * stream_state) { - if (!stream_state) { - return 0; - } - const struct FDRSTableHeader * streamingTable = getSHDR(fdr); - u8 ss_bytes = streamingTable->streamStateBytes; - - // We just care if there are any bits set, and the test below is faster - // than a partial_load_u64a (especially on 32-bit hosts). - for (u32 i = 0; i < ss_bytes; i++) { - if (*stream_state) { - return 1; - } - ++stream_state; - } - return 0; -} - -// binary search for the literal index that contains the current state -static really_inline -u32 findLitTabEntry(const struct FDRSTableHeader * streamingTable, - u32 stateValue, enum Modes m) { - const struct FDRSLiteral * litTab = getLitTab(streamingTable); - u32 lo = get_start_lit_idx(streamingTable, m); - u32 hi = get_end_lit_idx(streamingTable, m); - - // Now move stateValue back by one so that we're looking for the - // litTab entry that includes it the string, not the one 'one past' it - stateValue -= 1; - assert(lo != hi); - assert(litTab[lo].offset <= stateValue); - assert(litTab[hi].offset > stateValue); - - // binary search to find the entry e such that: - // litTab[e].offsetToLiteral <= stateValue < litTab[e+1].offsetToLiteral - while (lo + 1 < hi) { - u32 mid = (lo + hi) / 2; - if (litTab[mid].offset <= stateValue) { - lo = mid; - } else { //(litTab[mid].offset > stateValue) { - hi = mid; - } - } - assert(litTab[lo].offset <= stateValue); - assert(litTab[hi].offset > stateValue); - return lo; -} - -static really_inline -void fdrUnpackStateMode(struct FDR_Runtime_Args *a, - const struct FDRSTableHeader *streamingTable, - const struct FDRSLiteral * litTab, - const u32 *state_table, - const enum Modes m) { - if (!state_table[m]) { - return; - } - - u32 stateValue = unpackStateVal(streamingTable, m, state_table[m]); - u32 idx = findLitTabEntry(streamingTable, stateValue, m); - size_t found_offset = litTab[idx].offset; - const u8 * found_buf = found_offset + (const u8 *)streamingTable; - size_t found_sz = stateValue - found_offset; - if (m == CASEFUL) { - a->buf_history = found_buf; - a->len_history = found_sz; - } else { - a->buf_history_nocase = found_buf; - a->len_history_nocase = found_sz; - } -} - -static really_inline -void fdrUnpackState(const struct FDR * fdr, struct FDR_Runtime_Args * a, - const u8 * stream_state) { - // nothing to do if there's no stream state for the case - if (!stream_state) { - return; - } - - const struct FDRSTableHeader * streamingTable = getSHDR(fdr); - const struct FDRSLiteral * litTab = getLitTab(streamingTable); - - u32 state_table[MAX_MODES]; - getStreamStates(streamingTable, stream_state, state_table); - - fdrUnpackStateMode(a, streamingTable, litTab, state_table, CASEFUL); - fdrUnpackStateMode(a, streamingTable, litTab, state_table, CASELESS); -} - -static really_inline -u32 do_single_confirm(const struct FDRSTableHeader *streamingTable, - const struct FDR_Runtime_Args *a, u32 hashState, - enum Modes m) { - const struct FDRSLiteral * litTab = getLitTab(streamingTable); - u32 idx = findLitTabEntry(streamingTable, hashState, m); - size_t found_offset = litTab[idx].offset; - const u8 * s1 = found_offset + (const u8 *)streamingTable; - assert(hashState > found_offset); - size_t l1 = hashState - found_offset; - const u8 * buf = a->buf; - size_t len = a->len; - const char nocase = m != CASEFUL; - - if (l1 > len) { - const u8 * hist = nocase ? a->buf_history_nocase : a->buf_history; - size_t hist_len = nocase ? a->len_history_nocase : a->len_history; - - if (l1 > len+hist_len) { - return 0; // Break out - not enough total history - } - - size_t overhang = l1 - len; - assert(overhang <= hist_len); - - if (cmpForward(hist + hist_len - overhang, s1, overhang, nocase)) { - return 0; - } - s1 += overhang; - l1 -= overhang; - } - // if we got here, we don't need history or we compared ok out of history - assert(l1 <= len); - - if (cmpForward(buf + len - l1, s1, l1, nocase)) { - return 0; - } - return hashState; // our new state -} - -static really_inline -void fdrFindStreamingHash(const struct FDR_Runtime_Args *a, - const struct FDRSTableHeader *streamingTable, - u8 hash_len, u32 *hashes) { - u8 tempbuf[128]; - const u8 *base; - if (hash_len > a->len) { - assert(hash_len <= 128); - size_t overhang = hash_len - a->len; - assert(overhang <= a->len_history); - memcpy(tempbuf, a->buf_history + a->len_history - overhang, overhang); - memcpy(tempbuf + overhang, a->buf, a->len); - base = tempbuf; - } else { - assert(hash_len <= a->len); - base = a->buf + a->len - hash_len; - } - - if (streamingTable->hashNBits[CASEFUL]) { - hashes[CASEFUL] = streaming_hash(base, hash_len, CASEFUL); - } - if (streamingTable->hashNBits[CASELESS]) { - hashes[CASELESS] = streaming_hash(base, hash_len, CASELESS); - } -} - -static really_inline -const struct FDRSHashEntry *getEnt(const struct FDRSTableHeader *streamingTable, - u32 h, const enum Modes m) { - u32 nbits = streamingTable->hashNBits[m]; - if (!nbits) { - return NULL; - } - - u32 h_ent = h & ((1 << nbits) - 1); - u32 h_low = (h >> nbits) & 63; - - const struct FDRSHashEntry *tab = - (const struct FDRSHashEntry *)((const u8 *)streamingTable - + streamingTable->hashOffset[m]); - const struct FDRSHashEntry *ent = tab + h_ent; - - if (!has_bit(ent, h_low)) { - return NULL; - } - - return ent; -} - -static really_inline -void fdrPackStateMode(u32 *state_table, const struct FDR_Runtime_Args *a, - const struct FDRSTableHeader *streamingTable, - const struct FDRSHashEntry *ent, const enum Modes m) { - assert(ent); - assert(streamingTable->hashNBits[m]); - - const struct FDRSHashEntry *tab = - (const struct FDRSHashEntry *)((const u8 *)streamingTable - + streamingTable->hashOffset[m]); - - while (1) { - u32 tmp = 0; - if ((tmp = do_single_confirm(streamingTable, a, ent->state, m))) { - state_table[m] = packStateVal(streamingTable, m, tmp); - break; - } - if (ent->link == LINK_INVALID) { - break; - } - ent = tab + ent->link; - } -} - -static really_inline -void fdrPackState(const struct FDR *fdr, const struct FDR_Runtime_Args *a, - u8 *stream_state) { - // nothing to do if there's no stream state for the case - if (!stream_state) { - return; - } - - // get pointers to the streamer FDR and the tertiary structure - const struct FDRSTableHeader *streamingTable = getSHDR(fdr); - - assert(streamingTable->N); - - u32 state_table[MAX_MODES] = {0, 0}; - - // if we don't have enough history, we don't need to do anything - if (streamingTable->N <= a->len + a->len_history) { - u32 hashes[MAX_MODES] = {0, 0}; - - fdrFindStreamingHash(a, streamingTable, streamingTable->N, hashes); - - const struct FDRSHashEntry *ent_ful = getEnt(streamingTable, - hashes[CASEFUL], CASEFUL); - const struct FDRSHashEntry *ent_less = getEnt(streamingTable, - hashes[CASELESS], CASELESS); - - if (ent_ful) { - fdrPackStateMode(state_table, a, streamingTable, ent_ful, - CASEFUL); - } - - if (ent_less) { - fdrPackStateMode(state_table, a, streamingTable, ent_less, - CASELESS); - } - } - - setStreamStates(streamingTable, stream_state, state_table); -} - -#endif diff --git a/src/hwlm/hwlm.c b/src/hwlm/hwlm.c index 2e16f1ac..3c7615a7 100644 --- a/src/hwlm/hwlm.c +++ b/src/hwlm/hwlm.c @@ -200,8 +200,7 @@ hwlm_error_t hwlmExec(const struct HWLM *t, const u8 *buf, size_t len, hwlm_error_t hwlmExecStreaming(const struct HWLM *t, struct hs_scratch *scratch, size_t len, size_t start, HWLMCallback cb, - void *ctxt, hwlm_group_t groups, - u8 *stream_state) { + void *ctxt, hwlm_group_t groups) { const u8 *hbuf = scratch->core_info.hbuf; const size_t hlen = scratch->core_info.hlen; const u8 *buf = scratch->core_info.buf; @@ -234,13 +233,10 @@ hwlm_error_t hwlmExecStreaming(const struct HWLM *t, struct hs_scratch *scratch, DEBUG_PRINTF("using hq accel %hhu\n", t->accel1.accel_type); aa = &t->accel1; } - // if no active stream state, use acceleration - if (!fdrStreamStateActive(HWLM_C_DATA(t), stream_state)) { - do_accel_streaming(aa, hbuf, hlen, buf, len, &start); - } + do_accel_streaming(aa, hbuf, hlen, buf, len, &start); DEBUG_PRINTF("calling frankie (groups=%08llx, start=%zu)\n", groups, start); return fdrExecStreaming(HWLM_C_DATA(t), hbuf, hlen, buf, len, - start, cb, ctxt, groups, stream_state); + start, cb, ctxt, groups); } } diff --git a/src/hwlm/hwlm.h b/src/hwlm/hwlm.h index 009550e9..a17575df 100644 --- a/src/hwlm/hwlm.h +++ b/src/hwlm/hwlm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -132,8 +132,7 @@ hwlm_error_t hwlmExec(const struct HWLM *tab, const u8 *buf, size_t len, hwlm_error_t hwlmExecStreaming(const struct HWLM *tab, struct hs_scratch *scratch, size_t len, size_t start, HWLMCallback callback, - void *context, hwlm_group_t groups, - u8 *stream_state); + void *context, hwlm_group_t groups); #ifdef __cplusplus } /* extern "C" */ diff --git a/src/hwlm/hwlm_build.cpp b/src/hwlm/hwlm_build.cpp index b1814245..32de6bd0 100644 --- a/src/hwlm/hwlm_build.cpp +++ b/src/hwlm/hwlm_build.cpp @@ -552,6 +552,12 @@ aligned_unique_ptr hwlmBuild(const vector &lits, if (stream_control) { assert(stream_control->history_min <= stream_control->history_max); + + // We should not have been passed any literals that are too long to + // match with a maximally-sized history buffer. + assert(all_of(begin(lits), end(lits), [&](const hwlmLiteral &lit) { + return lit.s.length() <= stream_control->history_max + 1; + })); } // Check that we haven't exceeded the maximum number of literals. @@ -602,7 +608,6 @@ aligned_unique_ptr hwlmBuild(const vector &lits, stream_control->literal_history_required = lit.s.length() - 1; assert(stream_control->literal_history_required <= stream_control->history_max); - stream_control->literal_stream_state_required = 0; } eng = move(noodle); } else { diff --git a/src/hwlm/hwlm_build.h b/src/hwlm/hwlm_build.h index b5bdb0ea..fbf359e6 100644 --- a/src/hwlm/hwlm_build.h +++ b/src/hwlm/hwlm_build.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -63,10 +63,6 @@ struct hwlmStreamingControl { /** \brief OUT parameter: History required by the literal matcher to * correctly match all literals. */ size_t literal_history_required; - - /** OUT parameter: Stream state required by literal matcher in bytes. Can - * be zero, and generally will be small (0-8 bytes). */ - size_t literal_stream_state_required; }; /** \brief Build an \ref HWLM literal matcher runtime structure for a group of diff --git a/src/hwlm/hwlm_literal.cpp b/src/hwlm/hwlm_literal.cpp index 9e365a0c..b0968d79 100644 --- a/src/hwlm/hwlm_literal.cpp +++ b/src/hwlm/hwlm_literal.cpp @@ -86,6 +86,7 @@ hwlmLiteral::hwlmLiteral(const std::string &s_in, bool nocase_in, const vector &msk_in, const vector &cmp_in) : s(s_in), id(id_in), nocase(nocase_in), noruns(noruns_in), groups(groups_in), msk(msk_in), cmp(cmp_in) { + assert(s.size() <= HWLM_LITERAL_MAX_LEN); assert(msk.size() <= HWLM_MASKLEN); assert(msk.size() == cmp.size()); diff --git a/src/hwlm/hwlm_literal.h b/src/hwlm/hwlm_literal.h index 7e63a6f3..b7af99d3 100644 --- a/src/hwlm/hwlm_literal.h +++ b/src/hwlm/hwlm_literal.h @@ -41,6 +41,9 @@ namespace ue2 { +/** \brief Max length of the literal passed to HWLM. */ +#define HWLM_LITERAL_MAX_LEN 255 + /** \brief Max length of the hwlmLiteral::msk and hwlmLiteral::cmp vectors. */ #define HWLM_MASKLEN 8 diff --git a/src/rose/init.c b/src/rose/init.c index 511eafe4..025ecca0 100644 --- a/src/rose/init.c +++ b/src/rose/init.c @@ -85,9 +85,4 @@ void roseInitState(const struct RoseEngine *t, char *state) { init_state(t, state); init_outfixes(t, state); - - // Clear the floating matcher state, if any. - DEBUG_PRINTF("clearing %u bytes of floating matcher state\n", - t->floatingStreamState); - memset(getFloatingMatcherState(t, state), 0, t->floatingStreamState); } diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index b9036422..3c94f543 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -1331,6 +1331,78 @@ hwlmcb_rv_t roseMatcherEod(const struct RoseEngine *rose, return HWLM_CONTINUE_MATCHING; } +static rose_inline +int roseCheckLongLiteral(const struct RoseEngine *t, + const struct hs_scratch *scratch, u64a end, + u32 lit_offset, u32 lit_length, char nocase) { + const struct core_info *ci = &scratch->core_info; + const u8 *lit = getByOffset(t, lit_offset); + + DEBUG_PRINTF("check lit at %llu, length %u\n", end, lit_length); + DEBUG_PRINTF("base buf_offset=%llu\n", ci->buf_offset); + + if (end < lit_length) { + DEBUG_PRINTF("too short!\n"); + return 0; + } + + // If any portion of the literal matched in the current buffer, check it. + if (end > ci->buf_offset) { + u32 scan_len = MIN(end - ci->buf_offset, lit_length); + u64a scan_start = end - ci->buf_offset - scan_len; + DEBUG_PRINTF("checking suffix (%u bytes) in buf[%llu:%llu]\n", scan_len, + scan_start, end); + if (cmpForward(ci->buf + scan_start, lit + lit_length - scan_len, + scan_len, nocase)) { + DEBUG_PRINTF("cmp of suffix failed\n"); + return 0; + } + } + + // If the entirety of the literal was in the current block, we are done. + if (end - lit_length >= ci->buf_offset) { + DEBUG_PRINTF("literal confirmed in current block\n"); + return 1; + } + + // We still have a prefix which we must test against the buffer prepared by + // the long literal table. This is only done in streaming mode. + + assert(t->mode != HS_MODE_BLOCK); + + const u8 *ll_buf; + size_t ll_len; + if (nocase) { + ll_buf = scratch->tctxt.ll_buf_nocase; + ll_len = scratch->tctxt.ll_len_nocase; + } else { + ll_buf = scratch->tctxt.ll_buf; + ll_len = scratch->tctxt.ll_len; + } + + assert(ll_buf); + + u64a lit_start_offset = end - lit_length; + u32 prefix_len = MIN(lit_length, ci->buf_offset - lit_start_offset); + u32 hist_rewind = ci->buf_offset - lit_start_offset; + DEBUG_PRINTF("ll_len=%zu, hist_rewind=%u\n", ll_len, hist_rewind); + if (hist_rewind > ll_len) { + DEBUG_PRINTF("not enough history\n"); + return 0; + } + + DEBUG_PRINTF("check prefix len=%u from hist (len %zu, rewind %u)\n", + prefix_len, ll_len, hist_rewind); + assert(hist_rewind <= ll_len); + if (cmpForward(ll_buf + ll_len - hist_rewind, lit, prefix_len, nocase)) { + DEBUG_PRINTF("cmp of prefix failed\n"); + return 0; + } + + DEBUG_PRINTF("cmp succeeded\n"); + return 1; +} + static void updateSeqPoint(struct RoseContext *tctxt, u64a offset, const char from_mpv) { @@ -1977,6 +2049,26 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, } } PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_LONG_LIT) { + const char nocase = 0; + if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset, + ri->lit_length, nocase)) { + DEBUG_PRINTF("halt: failed long lit check\n"); + return HWLM_CONTINUE_MATCHING; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_LONG_LIT_NOCASE) { + const char nocase = 1; + if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset, + ri->lit_length, nocase)) { + DEBUG_PRINTF("halt: failed nocase long lit check\n"); + return HWLM_CONTINUE_MATCHING; + } + } + PROGRAM_NEXT_INSTRUCTION } } diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index a7979c4f..2871138a 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -37,14 +37,17 @@ #include "rose_build_exclusive.h" #include "rose_build_groups.h" #include "rose_build_infix.h" +#include "rose_build_long_lit.h" #include "rose_build_lookaround.h" #include "rose_build_matchers.h" #include "rose_build_program.h" #include "rose_build_scatter.h" #include "rose_build_util.h" #include "rose_build_width.h" +#include "rose_internal.h" #include "rose_program.h" #include "hwlm/hwlm.h" /* engine types */ +#include "hwlm/hwlm_literal.h" #include "nfa/castlecompile.h" #include "nfa/goughcompile.h" #include "nfa/mcclellancompile.h" @@ -165,6 +168,7 @@ struct RoseResources { bool has_states = false; bool checks_groups = false; bool has_lit_delay = false; + bool has_lit_check = false; // long literal support bool has_anchored = false; bool has_eod = false; }; @@ -210,9 +214,16 @@ struct build_context : boost::noncopyable { * written to the engine_blob. */ vector litPrograms; + /** \brief List of long literals (ones with CHECK_LITERAL instructions) + * that need hash table support. */ + vector longLiterals; + /** \brief Minimum offset of a match from the floating table. */ u32 floatingMinLiteralMatchOffset = 0; + /** \brief Long literal length threshold, used in streaming mode. */ + size_t longLitLengthThreshold = 0; + /** \brief Contents of the Rose bytecode immediately following the * RoseEngine. */ RoseEngineBlob engine_blob; @@ -314,7 +325,7 @@ bool needsCatchup(const RoseBuildImpl &build, } static -bool isPureFloating(const RoseResources &resources) { +bool isPureFloating(const RoseResources &resources, const CompileContext &cc) { if (resources.has_outfixes || resources.has_suffixes || resources.has_leftfixes) { DEBUG_PRINTF("has engines\n"); @@ -341,6 +352,12 @@ bool isPureFloating(const RoseResources &resources) { return false; } + if (cc.streaming && resources.has_lit_check) { + DEBUG_PRINTF("has long literals in streaming mode, which needs " + "long literal table support\n"); + return false; + } + if (resources.checks_groups) { DEBUG_PRINTF("has group checks\n"); return false; @@ -384,10 +401,11 @@ u8 pickRuntimeImpl(const RoseBuildImpl &build, const build_context &bc, DEBUG_PRINTF("has_states=%d\n", bc.resources.has_states); DEBUG_PRINTF("checks_groups=%d\n", bc.resources.checks_groups); DEBUG_PRINTF("has_lit_delay=%d\n", bc.resources.has_lit_delay); + DEBUG_PRINTF("has_lit_check=%d\n", bc.resources.has_lit_check); DEBUG_PRINTF("has_anchored=%d\n", bc.resources.has_anchored); DEBUG_PRINTF("has_eod=%d\n", bc.resources.has_eod); - if (isPureFloating(bc.resources)) { + if (isPureFloating(bc.resources, build.cc)) { return ROSE_RUNTIME_PURE_LITERAL; } @@ -427,7 +445,7 @@ static void fillStateOffsets(const RoseBuildImpl &tbi, u32 rolesWithStateCount, u32 anchorStateSize, u32 activeArrayCount, u32 activeLeftCount, u32 laggedRoseCount, - u32 floatingStreamStateRequired, u32 historyRequired, + u32 longLitStreamStateRequired, u32 historyRequired, RoseStateOffsets *so) { u32 curr_offset = 0; @@ -445,8 +463,8 @@ void fillStateOffsets(const RoseBuildImpl &tbi, u32 rolesWithStateCount, so->activeLeftArray_size = mmbit_size(activeLeftCount); curr_offset += so->activeLeftArray_size; - so->floatingMatcherState = curr_offset; - curr_offset += floatingStreamStateRequired; + so->longLitState = curr_offset; + curr_offset += longLitStreamStateRequired; // ONE WHOLE BYTE for each active leftfix with lag. so->leftfixLagTable = curr_offset; @@ -2514,6 +2532,10 @@ void recordResources(RoseResources &resources, const RoseProgram &program) { case ROSE_INSTR_PUSH_DELAYED: resources.has_lit_delay = true; break; + case ROSE_INSTR_CHECK_LONG_LIT: + case ROSE_INSTR_CHECK_LONG_LIT_NOCASE: + resources.has_lit_check = true; + break; default: break; } @@ -2546,6 +2568,25 @@ void recordResources(RoseResources &resources, } } +static +void recordLongLiterals(build_context &bc, const RoseProgram &program) { + for (const auto &ri : program) { + if (const auto *ri_check = + dynamic_cast(ri.get())) { + DEBUG_PRINTF("found CHECK_LITERAL for string '%s'\n", + escapeString(ri_check->literal).c_str()); + bc.longLiterals.emplace_back(ri_check->literal, false); + continue; + } + if (const auto *ri_check = + dynamic_cast(ri.get())) { + DEBUG_PRINTF("found CHECK_LITERAL_NOCASE for string '%s'\n", + escapeString(ri_check->literal).c_str()); + bc.longLiterals.emplace_back(ri_check->literal, true); + } + } +} + static u32 writeProgram(build_context &bc, RoseProgram &&program) { if (program.empty()) { @@ -2560,6 +2601,7 @@ u32 writeProgram(build_context &bc, RoseProgram &&program) { } recordResources(bc.resources, program); + recordLongLiterals(bc, program); u32 len = 0; auto prog_bytecode = writeProgram(bc.engine_blob, program, &len); @@ -4285,6 +4327,48 @@ void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, build_context &bc, program.add_before_end(make_unique(min_offset)); } +static +void makeCheckLiteralInstruction(const RoseBuildImpl &build, + const build_context &bc, u32 final_id, + RoseProgram &program) { + const auto &lits = build.final_id_to_literal.at(final_id); + if (lits.size() != 1) { + // Long literals should not share a final_id. + assert(all_of(begin(lits), end(lits), [&](u32 lit_id) { + const rose_literal_id &lit = build.literals.right.at(lit_id); + return lit.table != ROSE_FLOATING || + lit.s.length() <= bc.longLitLengthThreshold; + })); + return; + } + + u32 lit_id = *lits.begin(); + if (build.isDelayed(lit_id)) { + return; + } + + const rose_literal_id &lit = build.literals.right.at(lit_id); + if (lit.table != ROSE_FLOATING) { + return; + } + if (lit.s.length() <= bc.longLitLengthThreshold) { + return; + } + + // Check resource limits as well. + if (lit.s.length() > build.cc.grey.limitLiteralLength) { + throw ResourceLimitError(); + } + + unique_ptr ri; + if (lit.s.any_nocase()) { + ri = make_unique(lit.s.get_string()); + } else { + ri = make_unique(lit.s.get_string()); + } + program.add_before_end(move(ri)); +} + static bool hasDelayedLiteral(RoseBuildImpl &build, const vector &lit_edges) { @@ -4312,6 +4396,9 @@ RoseProgram buildLitInitialProgram(RoseBuildImpl &build, build_context &bc, DEBUG_PRINTF("final_id %u\n", final_id); + // Check long literal info. + makeCheckLiteralInstruction(build, bc, final_id, program); + // Check lit mask. makeCheckLitMaskInstruction(build, bc, final_id, program); @@ -4838,6 +4925,172 @@ u32 buildEagerQueueIter(const set &eager, u32 leftfixBeginQueue, return bc.engine_blob.add_iterator(iter); } +static +void allocateFinalIdToSet(RoseBuildImpl &build, const set &lits, + size_t longLitLengthThreshold, u32 *next_final_id) { + const auto &g = build.g; + auto &literal_info = build.literal_info; + auto &final_id_to_literal = build.final_id_to_literal; + + /* We can allocate the same final id to multiple literals of the same type + * if they share the same vertex set and trigger the same delayed literal + * ids and squash the same roles and have the same group squashing + * behaviour. Benefits literals cannot be merged. */ + + for (u32 int_id : lits) { + rose_literal_info &curr_info = literal_info[int_id]; + const rose_literal_id &lit = build.literals.right.at(int_id); + const auto &verts = curr_info.vertices; + + // Literals with benefits cannot be merged. + if (curr_info.requires_benefits) { + DEBUG_PRINTF("id %u has benefits\n", int_id); + goto assign_new_id; + } + + // Long literals (that require CHECK_LITERAL instructions) cannot be + // merged. + if (lit.s.length() > longLitLengthThreshold) { + DEBUG_PRINTF("id %u is a long literal\n", int_id); + goto assign_new_id; + } + + if (!verts.empty() && curr_info.delayed_ids.empty()) { + vector cand; + insert(&cand, cand.end(), g[*verts.begin()].literals); + for (auto v : verts) { + vector temp; + set_intersection(cand.begin(), cand.end(), + g[v].literals.begin(), + g[v].literals.end(), + inserter(temp, temp.end())); + cand.swap(temp); + } + + for (u32 cand_id : cand) { + if (cand_id >= int_id) { + break; + } + + const auto &cand_info = literal_info[cand_id]; + const auto &cand_lit = build.literals.right.at(cand_id); + + if (cand_lit.s.length() > longLitLengthThreshold) { + continue; + } + + if (cand_info.requires_benefits) { + continue; + } + + if (!cand_info.delayed_ids.empty()) { + /* TODO: allow cases where delayed ids are equivalent. + * This is awkward currently as the have not had their + * final ids allocated yet */ + continue; + } + + if (lits.find(cand_id) == lits.end() + || cand_info.vertices.size() != verts.size() + || cand_info.squash_group != curr_info.squash_group) { + continue; + } + + /* if we are squashing groups we need to check if they are the + * same group */ + if (cand_info.squash_group + && cand_info.group_mask != curr_info.group_mask) { + continue; + } + + u32 final_id = cand_info.final_id; + assert(final_id != MO_INVALID_IDX); + assert(curr_info.final_id == MO_INVALID_IDX); + curr_info.final_id = final_id; + final_id_to_literal[final_id].insert(int_id); + goto next_lit; + } + } + + assign_new_id: + /* oh well, have to give it a fresh one, hang the expense */ + DEBUG_PRINTF("allocating final id %u to %u\n", *next_final_id, int_id); + assert(curr_info.final_id == MO_INVALID_IDX); + curr_info.final_id = *next_final_id; + final_id_to_literal[*next_final_id].insert(int_id); + (*next_final_id)++; + next_lit:; + } +} + +static +bool isUsedLiteral(const RoseBuildImpl &build, u32 lit_id) { + assert(lit_id < build.literal_info.size()); + const auto &info = build.literal_info[lit_id]; + if (!info.vertices.empty()) { + return true; + } + + for (const u32 &delayed_id : info.delayed_ids) { + assert(delayed_id < build.literal_info.size()); + const rose_literal_info &delayed_info = build.literal_info[delayed_id]; + if (!delayed_info.vertices.empty()) { + return true; + } + } + + DEBUG_PRINTF("literal %u has no refs\n", lit_id); + return false; +} + +/** \brief Allocate final literal IDs for all literals. */ +static +void allocateFinalLiteralId(RoseBuildImpl &build, + size_t longLitLengthThreshold) { + set anch; + set norm; + set delay; + + /* undelayed ids come first */ + assert(build.final_id_to_literal.empty()); + u32 next_final_id = 0; + for (u32 i = 0; i < build.literal_info.size(); i++) { + assert(!build.hasFinalId(i)); + + if (!isUsedLiteral(build, i)) { + /* what is this literal good for? absolutely nothing */ + continue; + } + + // The special EOD event literal has its own program and does not need + // a real literal ID. + if (i == build.eod_event_literal_id) { + assert(build.eod_event_literal_id != MO_INVALID_IDX); + continue; + } + + if (build.isDelayed(i)) { + assert(!build.literal_info[i].requires_benefits); + delay.insert(i); + } else if (build.literals.right.at(i).table == ROSE_ANCHORED) { + anch.insert(i); + } else { + norm.insert(i); + } + } + + /* normal lits */ + allocateFinalIdToSet(build, norm, longLitLengthThreshold, &next_final_id); + + /* next anchored stuff */ + build.anchored_base_id = next_final_id; + allocateFinalIdToSet(build, anch, longLitLengthThreshold, &next_final_id); + + /* delayed ids come last */ + build.delay_base_id = next_final_id; + allocateFinalIdToSet(build, delay, longLitLengthThreshold, &next_final_id); +} + static aligned_unique_ptr addSmallWriteEngine(RoseBuildImpl &build, aligned_unique_ptr rose) { @@ -4873,16 +5126,89 @@ aligned_unique_ptr addSmallWriteEngine(RoseBuildImpl &build, return rose2; } +/** + * \brief Returns the pair (number of literals, max length) for all real + * literals in the floating table that are in-use. + */ +static +pair floatingCountAndMaxLen(const RoseBuildImpl &build) { + size_t num = 0; + size_t max_len = 0; + + for (const auto &e : build.literals.right) { + const u32 id = e.first; + const rose_literal_id &lit = e.second; + + if (lit.table != ROSE_FLOATING) { + continue; + } + if (lit.delay) { + // Skip delayed literals, so that we only count the undelayed + // version that ends up in the HWLM table. + continue; + } + if (!isUsedLiteral(build, id)) { + continue; + } + + num++; + max_len = max(max_len, lit.s.length()); + } + DEBUG_PRINTF("%zu floating literals with max_len=%zu\n", num, max_len); + return {num, max_len}; +} + +size_t calcLongLitThreshold(const RoseBuildImpl &build, + const size_t historyRequired) { + const auto &cc = build.cc; + + // In block mode, we should only use the long literal support for literals + // that cannot be handled by HWLM. + if (!cc.streaming) { + return HWLM_LITERAL_MAX_LEN; + } + + size_t longLitLengthThreshold = ROSE_LONG_LITERAL_THRESHOLD_MIN; + + // Expand to size of history we've already allocated. Note that we need N-1 + // bytes of history to match a literal of length N. + longLitLengthThreshold = max(longLitLengthThreshold, historyRequired + 1); + + // If we only have one literal, allow for a larger value in order to avoid + // building a long literal table for a trivial Noodle case that we could + // fit in history. + const auto num_len = floatingCountAndMaxLen(build); + if (num_len.first == 1) { + if (num_len.second > longLitLengthThreshold) { + DEBUG_PRINTF("expanding for single literal of length %zu\n", + num_len.second); + longLitLengthThreshold = num_len.second; + } + } + + // Clamp to max history available. + longLitLengthThreshold = + min(longLitLengthThreshold, size_t{cc.grey.maxHistoryAvailable} + 1); + + return longLitLengthThreshold; +} + aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { DerivedBoundaryReports dboundary(boundary); size_t historyRequired = calcHistoryRequired(); // Updated by HWLM. + size_t longLitLengthThreshold = calcLongLitThreshold(*this, + historyRequired); + DEBUG_PRINTF("longLitLengthThreshold=%zu\n", longLitLengthThreshold); + + allocateFinalLiteralId(*this, longLitLengthThreshold); auto anchored_dfas = buildAnchoredDfas(*this); build_context bc; bc.floatingMinLiteralMatchOffset = findMinFloatingLiteralMatch(*this, anchored_dfas); + bc.longLitLengthThreshold = longLitLengthThreshold; bc.needs_catchup = needsCatchup(*this, anchored_dfas); recordResources(bc.resources, *this); if (!anchored_dfas.empty()) { @@ -4944,6 +5270,11 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { u32 eodProgramOffset = writeEodProgram(*this, bc, eodNfaIterOffset); + size_t longLitStreamStateRequired = 0; + u32 longLitTableOffset = buildLongLiteralTable(*this, bc.engine_blob, + bc.longLiterals, longLitLengthThreshold, &historyRequired, + &longLitStreamStateRequired); + vector activeLeftIter; buildActiveLeftIter(leftInfoTable, activeLeftIter); @@ -4982,9 +5313,8 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { // Build floating HWLM matcher. rose_group fgroups = 0; size_t fsize = 0; - size_t floatingStreamStateRequired = 0; - auto ftable = buildFloatingMatcher(*this, &fgroups, &fsize, &historyRequired, - &floatingStreamStateRequired); + auto ftable = buildFloatingMatcher(*this, bc.longLitLengthThreshold, + &fgroups, &fsize, &historyRequired); u32 fmatcherOffset = 0; if (ftable) { currOffset = ROUNDUP_CL(currOffset); @@ -5057,7 +5387,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { memset(&stateOffsets, 0, sizeof(stateOffsets)); fillStateOffsets(*this, bc.numStates, anchorStateSize, activeArrayCount, activeLeftCount, laggedRoseCount, - floatingStreamStateRequired, historyRequired, + longLitStreamStateRequired, historyRequired, &stateOffsets); scatter_plan_raw state_scatter; @@ -5173,6 +5503,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->ematcherOffset = ematcherOffset; engine->sbmatcherOffset = sbmatcherOffset; engine->fmatcherOffset = fmatcherOffset; + engine->longLitTableOffset = longLitTableOffset; engine->amatcherMinWidth = findMinWidth(*this, ROSE_ANCHORED); engine->fmatcherMinWidth = findMinWidth(*this, ROSE_FLOATING); engine->eodmatcherMinWidth = findMinWidth(*this, ROSE_EOD_ANCHORED); @@ -5198,7 +5529,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->totalNumLiterals = verify_u32(literal_info.size()); engine->asize = verify_u32(asize); engine->ematcherRegionSize = ematcher_region_size; - engine->floatingStreamState = verify_u32(floatingStreamStateRequired); + engine->longLitStreamState = verify_u32(longLitStreamStateRequired); engine->boundary.reportEodOffset = boundary_out.reportEodOffset; engine->boundary.reportZeroOffset = boundary_out.reportZeroOffset; diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index c93f4eac..6b19549b 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -87,172 +87,6 @@ namespace ue2 { #define ANCHORED_REHOME_DEEP 25 #define ANCHORED_REHOME_SHORT_LEN 3 -#ifdef DEBUG -static UNUSED -void printLitInfo(const rose_literal_info &li, u32 id) { - DEBUG_PRINTF("lit_info %u\n", id); - DEBUG_PRINTF(" parent %u%s", li.undelayed_id, - li.delayed_ids.empty() ? "":", children:"); - for (u32 d_id : li.delayed_ids) { - printf(" %u", d_id); - } - printf("\n"); - DEBUG_PRINTF(" group %llu %s\n", li.group_mask, li.squash_group ? "s":""); -} -#endif - -static -void allocateFinalIdToSet(const RoseGraph &g, const set &lits, - deque *literal_info, - map > *final_id_to_literal, - u32 *next_final_id) { - /* We can allocate the same final id to multiple literals of the same type - * if they share the same vertex set and trigger the same delayed literal - * ids and squash the same roles and have the same group squashing - * behaviour. Benefits literals cannot be merged. */ - - for (u32 int_id : lits) { - rose_literal_info &curr_info = (*literal_info)[int_id]; - const auto &verts = curr_info.vertices; - - if (!verts.empty() && !curr_info.requires_benefits - && curr_info.delayed_ids.empty()) { - vector cand; - insert(&cand, cand.end(), g[*verts.begin()].literals); - for (auto v : verts) { - vector temp; - set_intersection(cand.begin(), cand.end(), - g[v].literals.begin(), - g[v].literals.end(), - inserter(temp, temp.end())); - cand.swap(temp); - } - - for (u32 cand_id : cand) { - if (cand_id >= int_id) { - break; - } - - const rose_literal_info &cand_info = (*literal_info)[cand_id]; - - if (cand_info.requires_benefits) { - continue; - } - - if (!cand_info.delayed_ids.empty()) { - /* TODO: allow cases where delayed ids are equivalent. - * This is awkward currently as the have not had their - * final ids allocated yet */ - continue; - } - - if (lits.find(cand_id) == lits.end() - || cand_info.vertices.size() != verts.size() - || cand_info.squash_group != curr_info.squash_group) { - continue; - } - - /* if we are squashing groups we need to check if they are the - * same group */ - if (cand_info.squash_group - && cand_info.group_mask != curr_info.group_mask) { - continue; - } - - u32 final_id = cand_info.final_id; - assert(final_id != MO_INVALID_IDX); - assert(curr_info.final_id == MO_INVALID_IDX); - curr_info.final_id = final_id; - (*final_id_to_literal)[final_id].insert(int_id); - goto next_lit; - } - } - - /* oh well, have to give it a fresh one, hang the expense */ - DEBUG_PRINTF("allocating final id %u to %u\n", *next_final_id, int_id); - assert(curr_info.final_id == MO_INVALID_IDX); - curr_info.final_id = *next_final_id; - (*final_id_to_literal)[*next_final_id].insert(int_id); - (*next_final_id)++; - next_lit:; - } -} - -static -bool isUsedLiteral(const RoseBuildImpl &build, u32 lit_id) { - assert(lit_id < build.literal_info.size()); - const auto &info = build.literal_info[lit_id]; - if (!info.vertices.empty()) { - return true; - } - - for (const u32 &delayed_id : info.delayed_ids) { - assert(delayed_id < build.literal_info.size()); - const rose_literal_info &delayed_info = build.literal_info[delayed_id]; - if (!delayed_info.vertices.empty()) { - return true; - } - } - - DEBUG_PRINTF("literal %u has no refs\n", lit_id); - return false; -} - -/** \brief Allocate final literal IDs for all literals. - * - * These are the literal ids used in the bytecode. - */ -static -void allocateFinalLiteralId(RoseBuildImpl &tbi) { - RoseGraph &g = tbi.g; - - set anch; - set norm; - set delay; - - /* undelayed ids come first */ - assert(tbi.final_id_to_literal.empty()); - u32 next_final_id = 0; - for (u32 i = 0; i < tbi.literal_info.size(); i++) { - assert(!tbi.hasFinalId(i)); - - if (!isUsedLiteral(tbi, i)) { - /* what is this literal good for? absolutely nothing */ - continue; - } - - // The special EOD event literal has its own program and does not need - // a real literal ID. - if (i == tbi.eod_event_literal_id) { - assert(tbi.eod_event_literal_id != MO_INVALID_IDX); - continue; - } - - if (tbi.isDelayed(i)) { - assert(!tbi.literal_info[i].requires_benefits); - delay.insert(i); - } else if (tbi.literals.right.at(i).table == ROSE_ANCHORED) { - anch.insert(i); - } else { - norm.insert(i); - } - } - - /* normal lits */ - allocateFinalIdToSet(g, norm, &tbi.literal_info, &tbi.final_id_to_literal, - &next_final_id); - - /* next anchored stuff */ - tbi.anchored_base_id = next_final_id; - allocateFinalIdToSet(g, anch, &tbi.literal_info, &tbi.final_id_to_literal, - &next_final_id); - - /* delayed ids come last */ - tbi.delay_base_id = next_final_id; - allocateFinalIdToSet(g, delay, &tbi.literal_info, &tbi.final_id_to_literal, - &next_final_id); -} - #define MAX_EXPLOSION_NC 3 static bool limited_explosion(const ue2_literal &s) { @@ -284,7 +118,12 @@ void RoseBuildImpl::handleMixedSensitivity(void) { continue; } - if (limited_explosion(lit.s)) { + // We don't want to explode long literals, as they require confirmation + // with a CHECK_LITERAL instruction and need unique final_ids. + // TODO: we could allow explosion for literals where the prefixes + // covered by CHECK_LITERAL are identical. + if (lit.s.length() <= ROSE_LONG_LITERAL_THRESHOLD_MIN && + limited_explosion(lit.s)) { DEBUG_PRINTF("need to explode existing string '%s'\n", dumpString(lit.s).c_str()); literal_info[id].requires_explode = true; @@ -1653,7 +1492,6 @@ aligned_unique_ptr RoseBuildImpl::buildRose(u32 minWidth) { /* final prep work */ remapCastleTops(*this); - allocateFinalLiteralId(*this); inspectRoseTops(*this); buildRoseSquashMasks(*this); diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index 5fb27c55..516548b3 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -442,20 +442,26 @@ void dumpTestLiterals(const string &filename, const vector &lits) { static void dumpRoseTestLiterals(const RoseBuildImpl &build, const string &base) { - auto lits = fillHamsterLiteralList(build, ROSE_ANCHORED); + size_t historyRequired = build.calcHistoryRequired(); + size_t longLitLengthThreshold = + calcLongLitThreshold(build, historyRequired); + + auto lits = fillHamsterLiteralList(build, ROSE_ANCHORED, + longLitLengthThreshold); dumpTestLiterals(base + "rose_anchored_test_literals.txt", lits); - lits = fillHamsterLiteralList(build, ROSE_FLOATING); + lits = fillHamsterLiteralList(build, ROSE_FLOATING, longLitLengthThreshold); dumpTestLiterals(base + "rose_float_test_literals.txt", lits); - lits = fillHamsterLiteralList(build, ROSE_EOD_ANCHORED); + lits = fillHamsterLiteralList(build, ROSE_EOD_ANCHORED, + build.ematcher_region_size); dumpTestLiterals(base + "rose_eod_test_literals.txt", lits); if (!build.cc.streaming) { lits = fillHamsterLiteralList(build, ROSE_FLOATING, - ROSE_SMALL_BLOCK_LEN); + ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); auto lits2 = fillHamsterLiteralList(build, ROSE_ANCHORED_SMALL_BLOCK, - ROSE_SMALL_BLOCK_LEN); + ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); lits.insert(end(lits), begin(lits2), end(lits2)); dumpTestLiterals(base + "rose_smallblock_test_literals.txt", lits); } diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index cc00603a..b3f986aa 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -56,6 +56,8 @@ namespace ue2 { #define ROSE_GROUPS_MAX 64 +#define ROSE_LONG_LITERAL_THRESHOLD_MIN 33 + struct BoundaryReports; struct CastleProto; struct CompileContext; @@ -603,6 +605,9 @@ private: ReportID next_nfa_report; }; +size_t calcLongLitThreshold(const RoseBuildImpl &build, + const size_t historyRequired); + // Free functions, in rose_build_misc.cpp bool hasAnchHistorySucc(const RoseGraph &g, RoseVertex v); diff --git a/src/rose/rose_build_long_lit.cpp b/src/rose/rose_build_long_lit.cpp new file mode 100644 index 00000000..c01bdc8f --- /dev/null +++ b/src/rose/rose_build_long_lit.cpp @@ -0,0 +1,348 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "rose_build_long_lit.h" + +#include "rose_build_engine_blob.h" +#include "rose_build_impl.h" +#include "stream_long_lit_hash.h" +#include "util/alloc.h" +#include "util/bitutils.h" +#include "util/verify_types.h" +#include "util/compile_context.h" + +using namespace std; + +namespace ue2 { + +/** \brief Minimum size for a non-empty hash table. */ +static constexpr u32 MIN_HASH_TABLE_SIZE = 4096; + +struct LongLitModeInfo { + u32 boundary = 0; //!< One above the largest index for this mode. + u32 positions = 0; //!< Total number of string positions. + u32 hashEntries = 0; //!< Number of hash table entries. +}; + +struct LongLitInfo { + LongLitModeInfo caseful; + LongLitModeInfo nocase; +}; + +static +u32 roundUpToPowerOfTwo(u32 x) { + assert(x != 0); + u32 bits = lg2(x - 1) + 1; + assert(bits < 32); + return 1U << bits; +} + +static +LongLitInfo analyzeLongLits(const vector &lits, + size_t max_len) { + LongLitInfo info; + u32 hashedPositionsCase = 0; + u32 hashedPositionsNocase = 0; + + // Caseful boundary is the index of the first nocase literal, as we're + // ordered (caseful, nocase). + auto first_nocase = find_if(begin(lits), end(lits), + [](const ue2_case_string &lit) { return lit.nocase; }); + info.caseful.boundary = verify_u32(distance(lits.begin(), first_nocase)); + + // Nocase boundary is the size of the literal set. + info.nocase.boundary = verify_u32(lits.size()); + + for (const auto &lit : lits) { + if (lit.nocase) { + hashedPositionsNocase += lit.s.size() - max_len; + info.nocase.positions += lit.s.size(); + } else { + hashedPositionsCase += lit.s.size() - max_len; + info.caseful.positions += lit.s.size(); + } + } + + info.caseful.hashEntries = hashedPositionsCase + ? roundUpToPowerOfTwo(max(MIN_HASH_TABLE_SIZE, hashedPositionsCase)) + : 0; + info.nocase.hashEntries = hashedPositionsNocase + ? roundUpToPowerOfTwo(max(MIN_HASH_TABLE_SIZE, hashedPositionsNocase)) + : 0; + + DEBUG_PRINTF("caseful: boundary=%u, positions=%u, hashedPositions=%u, " + "hashEntries=%u\n", + info.caseful.boundary, info.caseful.positions, + hashedPositionsCase, info.caseful.hashEntries); + DEBUG_PRINTF("nocase: boundary=%u, positions=%u, hashedPositions=%u, " + "hashEntries=%u\n", + info.nocase.boundary, info.nocase.positions, + hashedPositionsNocase, info.nocase.hashEntries); + + return info; +} + +static +void fillHashes(const vector &lits, size_t max_len, + RoseLongLitHashEntry *tab, size_t numEntries, bool nocase, + const map &litToOffsetVal) { + const u32 nbits = lg2(numEntries); + map>> bucketToLitOffPairs; + map bucketToBitfield; + + for (u32 lit_id = 0; lit_id < lits.size(); lit_id++) { + const ue2_case_string &lit = lits[lit_id]; + if (nocase != lit.nocase) { + continue; + } + for (u32 offset = 1; offset < lit.s.size() - max_len + 1; offset++) { + const u8 *substr = (const u8 *)lit.s.c_str() + offset; + u32 h = hashLongLiteral(substr, max_len, lit.nocase); + u32 h_ent = h & ((1U << nbits) - 1); + u32 h_low = (h >> nbits) & 63; + bucketToLitOffPairs[h_ent].emplace_back(lit_id, offset); + bucketToBitfield[h_ent] |= (1ULL << h_low); + } + } + + // this used to be a set, but a bitset is much much faster given that + // we're using it only for membership testing. + boost::dynamic_bitset<> filledBuckets(numEntries); // all zero by default. + + // sweep out bitfield entries and save the results swapped accordingly + // also, anything with bitfield entries is put in filledBuckets + for (const auto &m : bucketToBitfield) { + const u32 &bucket = m.first; + const u64a &contents = m.second; + tab[bucket].bitfield = contents; + filledBuckets.set(bucket); + } + + // store out all our chains based on free values in our hash table. + // find nearest free locations that are empty (there will always be more + // entries than strings, at present) + for (auto &m : bucketToLitOffPairs) { + u32 bucket = m.first; + deque> &d = m.second; + + // sort d by distance of the residual string (len minus our depth into + // the string). We need to put the 'furthest back' string first... + stable_sort(d.begin(), d.end(), + [](const pair &a, const pair &b) { + if (a.second != b.second) { + return a.second > b.second; /* longest is first */ + } + return a.first < b.first; + }); + + while (1) { + // first time through is always at bucket, then we fill in links + filledBuckets.set(bucket); + RoseLongLitHashEntry *ent = &tab[bucket]; + u32 lit_id = d.front().first; + u32 offset = d.front().second; + + ent->state = verify_u32(litToOffsetVal.at(lit_id) + + offset + max_len); + ent->link = (u32)LINK_INVALID; + + d.pop_front(); + if (d.empty()) { + break; + } + // now, if there is another value + // find a bucket for it and put in 'bucket' and repeat + // all we really need to do is find something not in filledBuckets, + // ideally something close to bucket + // we search backward and forward from bucket, trying to stay as + // close as possible. + UNUSED bool found = false; + int bucket_candidate = 0; + for (u32 k = 1; k < numEntries * 2; k++) { + bucket_candidate = bucket + (((k & 1) == 0) + ? (-(int)k / 2) : (k / 2)); + if (bucket_candidate < 0 || + (size_t)bucket_candidate >= numEntries) { + continue; + } + if (!filledBuckets.test(bucket_candidate)) { + found = true; + break; + } + } + + assert(found); + bucket = bucket_candidate; + ent->link = bucket; + } + } +} + +u32 buildLongLiteralTable(const RoseBuildImpl &build, RoseEngineBlob &blob, + vector &lits, + size_t longLitLengthThreshold, + size_t *historyRequired, + size_t *longLitStreamStateRequired) { + // Work in terms of history requirement (i.e. literal len - 1). + const size_t max_len = longLitLengthThreshold - 1; + + // We should only be building the long literal hash table in streaming mode. + if (!build.cc.streaming) { + return 0; + } + + if (lits.empty()) { + DEBUG_PRINTF("no long literals\n"); + return 0; + } + + // The last char of each literal is trimmed as we're not interested in full + // matches, only partial matches. + for (auto &lit : lits) { + assert(!lit.s.empty()); + lit.s.pop_back(); + } + + // Sort by caseful/caseless and in lexicographical order. + stable_sort(begin(lits), end(lits), [](const ue2_case_string &a, + const ue2_case_string &b) { + if (a.nocase != b.nocase) { + return a.nocase < b.nocase; + } + return a.s < b.s; + }); + + // Find literals that are prefixes of other literals (including + // duplicates). Note that we iterate in reverse, since we want to retain + // only the longest string from a set of prefixes. + auto it = unique(lits.rbegin(), lits.rend(), [](const ue2_case_string &a, + const ue2_case_string &b) { + return a.nocase == b.nocase && a.s.size() >= b.s.size() && + equal(b.s.begin(), b.s.end(), a.s.begin()); + }); + + // Erase dupes found by unique(). + lits.erase(lits.begin(), it.base()); + + LongLitInfo info = analyzeLongLits(lits, max_len); + + // first assess the size and find our caseless threshold + size_t headerSize = ROUNDUP_16(sizeof(RoseLongLitTable)); + + size_t litTabOffset = headerSize; + + size_t litTabNumEntries = lits.size() + 1; + size_t litTabSize = ROUNDUP_16(litTabNumEntries * sizeof(RoseLongLiteral)); + + size_t wholeLitTabOffset = litTabOffset + litTabSize; + size_t totalWholeLitTabSize = + ROUNDUP_16(info.caseful.positions + info.nocase.positions); + + size_t htOffsetCase = wholeLitTabOffset + totalWholeLitTabSize; + size_t htSizeCase = info.caseful.hashEntries * sizeof(RoseLongLitHashEntry); + size_t htOffsetNocase = htOffsetCase + htSizeCase; + size_t htSizeNocase = + info.nocase.hashEntries * sizeof(RoseLongLitHashEntry); + + size_t tabSize = ROUNDUP_16(htOffsetNocase + htSizeNocase); + + // need to add +2 to both of these to allow space for the actual largest + // value as well as handling the fact that we add one to the space when + // storing out a position to allow zero to mean "no stream state value" + u8 streamBitsCase = lg2(roundUpToPowerOfTwo(info.caseful.positions + 2)); + u8 streamBitsNocase = lg2(roundUpToPowerOfTwo(info.nocase.positions + 2)); + u32 tot_state_bytes = ROUNDUP_N(streamBitsCase + streamBitsNocase, 8) / 8; + + auto table = aligned_zmalloc_unique(tabSize); + assert(table); // otherwise would have thrown std::bad_alloc + + // then fill it in + char *ptr = table.get(); + RoseLongLitTable *header = (RoseLongLitTable *)ptr; + // fill in header + header->maxLen = verify_u8(max_len); // u8 so doesn't matter; won't go > 255 + header->boundaryCase = info.caseful.boundary; + header->hashOffsetCase = verify_u32(htOffsetCase); + header->hashNBitsCase = lg2(info.caseful.hashEntries); + header->streamStateBitsCase = streamBitsCase; + header->boundaryNocase = info.nocase.boundary; + header->hashOffsetNocase = verify_u32(htOffsetNocase); + header->hashNBitsNocase = lg2(info.nocase.hashEntries); + header->streamStateBitsNocase = streamBitsNocase; + assert(tot_state_bytes < sizeof(u64a)); + header->streamStateBytes = verify_u8(tot_state_bytes); // u8 + + ptr += headerSize; + + // now fill in the rest + + RoseLongLiteral *litTabPtr = (RoseLongLiteral *)ptr; + ptr += litTabSize; + + map litToOffsetVal; + for (auto i = lits.begin(), e = lits.end(); i != e; ++i) { + u32 entry = verify_u32(i - lits.begin()); + u32 offset = verify_u32(ptr - table.get()); + + // point the table entry to the string location + litTabPtr[entry].offset = offset; + + litToOffsetVal[entry] = offset; + + // copy the string into the string location + const auto &s = i->s; + memcpy(ptr, s.c_str(), s.size()); + + ptr += s.size(); // and the string location + } + + // fill in final lit table entry with current ptr (serves as end value) + litTabPtr[lits.size()].offset = verify_u32(ptr - table.get()); + + // fill hash tables + ptr = table.get() + htOffsetCase; + fillHashes(lits, max_len, (RoseLongLitHashEntry *)ptr, + info.caseful.hashEntries, false, litToOffsetVal); + ptr += htSizeCase; + fillHashes(lits, max_len, (RoseLongLitHashEntry *)ptr, + info.nocase.hashEntries, true, litToOffsetVal); + ptr += htSizeNocase; + + assert(ptr <= table.get() + tabSize); + + DEBUG_PRINTF("built streaming table, size=%zu\n", tabSize); + DEBUG_PRINTF("requires %zu bytes of history\n", max_len); + DEBUG_PRINTF("requires %u bytes of stream state\n", tot_state_bytes); + + *historyRequired = max(*historyRequired, max_len); + *longLitStreamStateRequired = tot_state_bytes; + + return blob.add(table.get(), tabSize, 16); +} + +} // namespace ue2 diff --git a/src/rose/rose_build_long_lit.h b/src/rose/rose_build_long_lit.h new file mode 100644 index 00000000..a77b1b69 --- /dev/null +++ b/src/rose/rose_build_long_lit.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ROSE_BUILD_LONG_LIT_H +#define ROSE_BUILD_LONG_LIT_H + +#include "ue2common.h" + +#include + +namespace ue2 { + +class RoseBuildImpl; +class RoseEngineBlob; +struct ue2_case_string; + +u32 buildLongLiteralTable(const RoseBuildImpl &build, RoseEngineBlob &blob, + std::vector &lits, + size_t longLitLengthThreshold, + size_t *historyRequired, + size_t *longLitStreamStateRequired); + +} // namespace ue2 + + +#endif // ROSE_BUILD_LONG_LIT_H diff --git a/src/rose/rose_build_matchers.cpp b/src/rose/rose_build_matchers.cpp index 2eb70f60..522ff6b6 100644 --- a/src/rose/rose_build_matchers.cpp +++ b/src/rose/rose_build_matchers.cpp @@ -485,7 +485,7 @@ bool isNoRunsVertex(const RoseBuildImpl &build, RoseVertex u) { static bool isNoRunsLiteral(const RoseBuildImpl &build, const u32 id, - const rose_literal_info &info) { + const rose_literal_info &info, const size_t max_len) { DEBUG_PRINTF("lit id %u\n", id); if (info.requires_benefits) { @@ -493,6 +493,11 @@ bool isNoRunsLiteral(const RoseBuildImpl &build, const u32 id, return false; } + if (build.literals.right.at(id).s.length() > max_len) { + DEBUG_PRINTF("requires literal check\n"); + return false; + } + if (isDirectHighlander(build, id, info)) { DEBUG_PRINTF("highlander direct report\n"); return true; @@ -625,7 +630,7 @@ u64a literalMinReportOffset(const RoseBuildImpl &build, vector fillHamsterLiteralList(const RoseBuildImpl &build, rose_literal_table table, - u32 max_offset) { + size_t max_len, u32 max_offset) { vector lits; for (const auto &e : build.literals.right) { @@ -663,10 +668,14 @@ vector fillHamsterLiteralList(const RoseBuildImpl &build, const vector &msk = e.second.msk; const vector &cmp = e.second.cmp; - bool noruns = isNoRunsLiteral(build, id, info); + bool noruns = isNoRunsLiteral(build, id, info, max_len); if (info.requires_explode) { DEBUG_PRINTF("exploding lit\n"); + + // We do not require_explode for long literals. + assert(lit.length() <= max_len); + case_iter cit = caseIterateBegin(lit); case_iter cite = caseIterateEnd(); for (; cit != cite; ++cit) { @@ -687,20 +696,28 @@ vector fillHamsterLiteralList(const RoseBuildImpl &build, msk, cmp); } } else { - const std::string &s = lit.get_string(); - const bool nocase = lit.any_nocase(); + string s = lit.get_string(); + bool nocase = lit.any_nocase(); DEBUG_PRINTF("id=%u, s='%s', nocase=%d, noruns=%d, msk=%s, " "cmp=%s\n", final_id, escapeString(s).c_str(), (int)nocase, noruns, dumpMask(msk).c_str(), dumpMask(cmp).c_str()); + if (s.length() > max_len) { + DEBUG_PRINTF("truncating to tail of length %zu\n", max_len); + s.erase(0, s.length() - max_len); + // We shouldn't have set a threshold below 8 chars. + assert(msk.size() <= max_len); + } + if (!maskIsConsistent(s, nocase, msk, cmp)) { DEBUG_PRINTF("msk/cmp for literal can't match, skipping\n"); continue; } - lits.emplace_back(s, nocase, noruns, final_id, groups, msk, cmp); + lits.emplace_back(move(s), nocase, noruns, final_id, groups, msk, + cmp); } } @@ -708,14 +725,15 @@ vector fillHamsterLiteralList(const RoseBuildImpl &build, } aligned_unique_ptr buildFloatingMatcher(const RoseBuildImpl &build, + size_t longLitLengthThreshold, rose_group *fgroups, size_t *fsize, - size_t *historyRequired, - size_t *streamStateRequired) { + size_t *historyRequired) { *fsize = 0; *fgroups = 0; - auto fl = fillHamsterLiteralList(build, ROSE_FLOATING); + auto fl = fillHamsterLiteralList(build, ROSE_FLOATING, + longLitLengthThreshold); if (fl.empty()) { DEBUG_PRINTF("empty floating matcher\n"); return nullptr; @@ -747,13 +765,10 @@ aligned_unique_ptr buildFloatingMatcher(const RoseBuildImpl &build, if (build.cc.streaming) { DEBUG_PRINTF("literal_history_required=%zu\n", ctl.literal_history_required); - DEBUG_PRINTF("literal_stream_state_required=%zu\n", - ctl.literal_stream_state_required); assert(ctl.literal_history_required <= build.cc.grey.maxHistoryAvailable); *historyRequired = max(*historyRequired, ctl.literal_history_required); - *streamStateRequired = ctl.literal_stream_state_required; } *fsize = hwlmSize(ftable.get()); @@ -778,8 +793,8 @@ aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, return nullptr; } - auto lits = fillHamsterLiteralList(build, ROSE_FLOATING, - ROSE_SMALL_BLOCK_LEN); + auto lits = fillHamsterLiteralList( + build, ROSE_FLOATING, ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); if (lits.empty()) { DEBUG_PRINTF("no floating table\n"); return nullptr; @@ -788,8 +803,9 @@ aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, return nullptr; } - auto anchored_lits = fillHamsterLiteralList(build, - ROSE_ANCHORED_SMALL_BLOCK, ROSE_SMALL_BLOCK_LEN); + auto anchored_lits = + fillHamsterLiteralList(build, ROSE_ANCHORED_SMALL_BLOCK, + ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); if (anchored_lits.empty()) { DEBUG_PRINTF("no small-block anchored literals\n"); return nullptr; @@ -823,7 +839,8 @@ aligned_unique_ptr buildEodAnchoredMatcher(const RoseBuildImpl &build, size_t *esize) { *esize = 0; - auto el = fillHamsterLiteralList(build, ROSE_EOD_ANCHORED); + auto el = fillHamsterLiteralList(build, ROSE_EOD_ANCHORED, + build.ematcher_region_size); if (el.empty()) { DEBUG_PRINTF("no eod anchored literals\n"); diff --git a/src/rose/rose_build_matchers.h b/src/rose/rose_build_matchers.h index 2a225bf5..a25dbca3 100644 --- a/src/rose/rose_build_matchers.h +++ b/src/rose/rose_build_matchers.h @@ -51,13 +51,14 @@ struct hwlmLiteral; * only lead to a pattern match after max_offset may be excluded. */ std::vector fillHamsterLiteralList(const RoseBuildImpl &build, - rose_literal_table table, u32 max_offset = ROSE_BOUND_INF); + rose_literal_table table, size_t max_len, + u32 max_offset = ROSE_BOUND_INF); aligned_unique_ptr buildFloatingMatcher(const RoseBuildImpl &build, + size_t longLitLengthThreshold, rose_group *fgroups, size_t *fsize, - size_t *historyRequired, - size_t *streamStateRequired); + size_t *historyRequired); aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, size_t *sbsize); diff --git a/src/rose/rose_build_program.cpp b/src/rose/rose_build_program.cpp index fc157b88..ee237639 100644 --- a/src/rose/rose_build_program.cpp +++ b/src/rose/rose_build_program.cpp @@ -495,6 +495,24 @@ void RoseInstrEnginesEod::write(void *dest, RoseEngineBlob &blob, inst->iter_offset = iter_offset; } +void RoseInstrCheckLongLit::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + assert(!literal.empty()); + inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1); + inst->lit_length = verify_u32(literal.size()); +} + +void RoseInstrCheckLongLitNocase::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + assert(!literal.empty()); + inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1); + inst->lit_length = verify_u32(literal.size()); +} + static OffsetMap makeOffsetMap(const RoseProgram &program, u32 *total_len) { OffsetMap offset_map; diff --git a/src/rose/rose_build_program.h b/src/rose/rose_build_program.h index c76456cc..0c725b46 100644 --- a/src/rose/rose_build_program.h +++ b/src/rose/rose_build_program.h @@ -37,6 +37,7 @@ #include "util/hash.h" #include "util/make_unique.h" #include "util/ue2_containers.h" +#include "util/ue2string.h" #include #include @@ -1721,6 +1722,62 @@ public: ~RoseInstrMatcherEod() override; }; +class RoseInstrCheckLongLit + : public RoseInstrBaseNoTargets { +public: + std::string literal; + + RoseInstrCheckLongLit(std::string literal_in) + : literal(std::move(literal_in)) {} + + bool operator==(const RoseInstrCheckLongLit &ri) const { + return literal == ri.literal; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), literal); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckLongLit &ri, const OffsetMap &, + const OffsetMap &) const { + return literal == ri.literal; + } +}; + +class RoseInstrCheckLongLitNocase + : public RoseInstrBaseNoTargets { +public: + std::string literal; + + RoseInstrCheckLongLitNocase(std::string literal_in) + : literal(std::move(literal_in)) { + upperString(literal); + } + + bool operator==(const RoseInstrCheckLongLitNocase &ri) const { + return literal == ri.literal; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), literal); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckLongLitNocase &ri, const OffsetMap &, + const OffsetMap &) const { + return literal == ri.literal; + } +}; + class RoseInstrEnd : public RoseInstrBaseTrivial { diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index 4a0d297e..9a0bd28c 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -610,6 +610,24 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { PROGRAM_CASE(MATCHER_EOD) {} PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_LONG_LIT) { + os << " lit_offset " << ri->lit_offset << endl; + os << " lit_length " << ri->lit_length << endl; + const char *lit = (const char *)t + ri->lit_offset; + os << " literal: \"" + << escapeString(string(lit, ri->lit_length)) << "\"" << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_LONG_LIT_NOCASE) { + os << " lit_offset " << ri->lit_offset << endl; + os << " lit_length " << ri->lit_length << endl; + const char *lit = (const char *)t + ri->lit_offset; + os << " literal: \"" + << escapeString(string(lit, ri->lit_length)) << "\"" << endl; + } + PROGRAM_NEXT_INSTRUCTION + default: os << " UNKNOWN (code " << int{code} << ")" << endl; os << " " << endl; @@ -1031,6 +1049,32 @@ void dumpAnchoredStats(const void *atable, FILE *f) { } +static +void dumpLongLiteralTable(const RoseEngine *t, FILE *f) { + if (!t->longLitTableOffset) { + return; + } + + fprintf(f, "\n"); + fprintf(f, "Long literal table (streaming):\n"); + + const auto *ll_table = + (const struct RoseLongLitTable *)loadFromByteCodeOffset( + t, t->longLitTableOffset); + + u32 num_caseful = ll_table->boundaryCase; + u32 num_caseless = ll_table->boundaryNocase - num_caseful; + + fprintf(f, " longest len: %u\n", ll_table->maxLen); + fprintf(f, " counts: %u caseful, %u caseless\n", num_caseful, + num_caseless); + fprintf(f, " hash bits: %u caseful, %u caseless\n", + ll_table->hashNBitsCase, ll_table->hashNBitsNocase); + fprintf(f, " state bits: %u caseful, %u caseless\n", + ll_table->streamStateBitsCase, ll_table->streamStateBitsNocase); + fprintf(f, " stream state: %u bytes\n", ll_table->streamStateBytes); +} + // Externally accessible functions void roseDumpText(const RoseEngine *t, FILE *f) { @@ -1106,7 +1150,7 @@ void roseDumpText(const RoseEngine *t, FILE *f) { fprintf(f, " - history buffer : %u bytes\n", t->historyRequired); fprintf(f, " - exhaustion vector : %u bytes\n", (t->ekeyCount + 7) / 8); fprintf(f, " - role state mmbit : %u bytes\n", t->stateSize); - fprintf(f, " - floating matcher : %u bytes\n", t->floatingStreamState); + fprintf(f, " - long lit matcher : %u bytes\n", t->longLitStreamState); fprintf(f, " - active array : %u bytes\n", mmbit_size(t->activeArrayCount)); fprintf(f, " - active rose : %u bytes\n", @@ -1160,6 +1204,8 @@ void roseDumpText(const RoseEngine *t, FILE *f) { fprintf(f, "\nSmall-block literal matcher stats:\n\n"); hwlmPrintStats(sbtable, f); } + + dumpLongLiteralTable(t, f); } #define DUMP_U8(o, member) \ @@ -1196,6 +1242,7 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, ematcherOffset); DUMP_U32(t, fmatcherOffset); DUMP_U32(t, sbmatcherOffset); + DUMP_U32(t, longLitTableOffset); DUMP_U32(t, amatcherMinWidth); DUMP_U32(t, fmatcherMinWidth); DUMP_U32(t, eodmatcherMinWidth); @@ -1245,7 +1292,7 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, stateOffsets.anchorState); DUMP_U32(t, stateOffsets.groups); DUMP_U32(t, stateOffsets.groups_size); - DUMP_U32(t, stateOffsets.floatingMatcherState); + DUMP_U32(t, stateOffsets.longLitState); DUMP_U32(t, stateOffsets.somLocation); DUMP_U32(t, stateOffsets.somValid); DUMP_U32(t, stateOffsets.somWritable); @@ -1264,7 +1311,7 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, ematcherRegionSize); DUMP_U32(t, somRevCount); DUMP_U32(t, somRevOffsetOffset); - DUMP_U32(t, floatingStreamState); + DUMP_U32(t, longLitStreamState); fprintf(f, "}\n"); fprintf(f, "sizeof(RoseEngine) = %zu\n", sizeof(RoseEngine)); } diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index 51913984..32805ab3 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -217,8 +217,8 @@ struct RoseStateOffsets { /** Size of packed Rose groups value, in bytes. */ u32 groups_size; - /** State for floating literal matcher (managed by HWLM). */ - u32 floatingMatcherState; + /** State for long literal support. */ + u32 longLitState; /** Packed SOM location slots. */ u32 somLocation; @@ -325,6 +325,7 @@ struct RoseEngine { u32 ematcherOffset; // offset of the eod-anchored literal matcher (bytes) u32 fmatcherOffset; // offset of the floating literal matcher (bytes) u32 sbmatcherOffset; // offset of the small-block literal matcher (bytes) + u32 longLitTableOffset; // offset of the long literal table u32 amatcherMinWidth; /**< minimum number of bytes required for a pattern * involved with the anchored table to produce a full * match. */ @@ -434,7 +435,7 @@ struct RoseEngine { u32 ematcherRegionSize; /* max region size to pass to ematcher */ u32 somRevCount; /**< number of som reverse nfas */ u32 somRevOffsetOffset; /**< offset to array of offsets to som rev nfas */ - u32 floatingStreamState; // size in bytes + u32 longLitStreamState; // size in bytes struct scatter_full_plan state_init; }; @@ -445,6 +446,94 @@ struct ALIGN_CL_DIRECTIVE anchored_matcher_info { u32 anchoredMinDistance; /* start of region to run anchored table over */ }; +/** + * \brief Long literal table header. + */ +struct RoseLongLitTable { + /** \brief String ID one beyond the maximum entry for caseful literals. */ + u32 boundaryCase; + + /** + * \brief String ID one beyond the maximum entry for caseless literals. + * This is also the total size of the literal table. + */ + u32 boundaryNocase; + + /** + * \brief Offset of the caseful hash table (relative to RoseLongLitTable + * base). + * + * Offset is zero if no such table exists. + */ + u32 hashOffsetCase; + + /** + * \brief Offset of the caseless hash table (relative to RoseLongLitTable + * base). + * + * Offset is zero if no such table exists. + */ + u32 hashOffsetNocase; + + /** \brief lg2 of the size of the caseful hash table. */ + u32 hashNBitsCase; + + /** \brief lg2 of the size of the caseless hash table. */ + u32 hashNBitsNocase; + + /** + * \brief Number of bits of packed stream state for the caseful hash table. + */ + u8 streamStateBitsCase; + + /** + * \brief Number of bits of packed stream state for the caseless hash + * table. + */ + u8 streamStateBitsNocase; + + /** \brief Total size of packed stream state in bytes. */ + u8 streamStateBytes; + + /** \brief Max length of literal prefixes. */ + u8 maxLen; +}; + +/** + * \brief One of these structures per literal entry in our long literal table. + */ +struct RoseLongLiteral { + /** + * \brief Offset of the literal string itself, relative to + * RoseLongLitTable base. + */ + u32 offset; +}; + +/** \brief "No further links" value used for \ref RoseLongLitHashEntry::link. */ +#define LINK_INVALID 0xffffffff + +/** + * \brief One of these structures per hash table entry in our long literal + * table. + */ +struct RoseLongLitHashEntry { + /** + * \brief Bitfield used as a quick guard for hash buckets. + * + * For a given hash value N, the low six bits of N are taken and the + * corresponding bit is switched on in this bitfield if this bucket is used + * for that hash. + */ + u64a bitfield; + + /** \brief Offset in the literal table for this string. */ + u32 state; + + /** \brief Hash table index of next entry in the chain for this bucket. */ + u32 link; +}; + static really_inline const struct anchored_matcher_info *getALiteralMatcher( const struct RoseEngine *t) { diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index 4714960c..ed913316 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -117,7 +117,19 @@ enum RoseInstructionCode { /** \brief Run the EOD-anchored HWLM literal matcher. */ ROSE_INSTR_MATCHER_EOD, - LAST_ROSE_INSTRUCTION = ROSE_INSTR_MATCHER_EOD //!< Sentinel. + /** + * \brief Confirm a case-sensitive literal at the current offset. In + * streaming mode, this makes use of the long literal table. + */ + ROSE_INSTR_CHECK_LONG_LIT, + + /** + * \brief Confirm a case-insensitive literal at the current offset. In + * streaming mode, this makes use of the long literal table. + */ + ROSE_INSTR_CHECK_LONG_LIT_NOCASE, + + LAST_ROSE_INSTRUCTION = ROSE_INSTR_CHECK_LONG_LIT_NOCASE //!< Sentinel. }; struct ROSE_STRUCT_END { @@ -465,4 +477,18 @@ struct ROSE_STRUCT_MATCHER_EOD { u8 code; //!< From enum RoseInstructionCode. }; +/** Note: check failure will halt program. */ +struct ROSE_STRUCT_CHECK_LONG_LIT { + u8 code; //!< From enum RoseInstructionCode. + u32 lit_offset; //!< Offset of literal string. + u32 lit_length; //!< Length of literal string. +}; + +/** Note: check failure will halt program. */ +struct ROSE_STRUCT_CHECK_LONG_LIT_NOCASE { + u8 code; //!< From enum RoseInstructionCode. + u32 lit_offset; //!< Offset of literal string. + u32 lit_length; //!< Length of literal string. +}; + #endif // ROSE_ROSE_PROGRAM_H diff --git a/src/rose/runtime.h b/src/rose/runtime.h index 60c7d34b..d2a4b5d7 100644 --- a/src/rose/runtime.h +++ b/src/rose/runtime.h @@ -97,8 +97,8 @@ void storeGroups(const struct RoseEngine *t, char *state, rose_group groups) { } static really_inline -u8 *getFloatingMatcherState(const struct RoseEngine *t, char *state) { - return (u8 *)(state + t->stateOffsets.floatingMatcherState); +u8 *getLongLitState(const struct RoseEngine *t, char *state) { + return (u8 *)(state + t->stateOffsets.longLitState); } static really_inline diff --git a/src/rose/stream.c b/src/rose/stream.c index b934f98f..72286b4b 100644 --- a/src/rose/stream.c +++ b/src/rose/stream.c @@ -33,6 +33,8 @@ #include "miracle.h" #include "program_runtime.h" #include "rose.h" +#include "rose_internal.h" +#include "stream_long_lit.h" #include "hwlm/hwlm.h" #include "nfa/mcclellan.h" #include "nfa/nfa_api.h" @@ -406,6 +408,7 @@ void ensureStreamNeatAndTidy(const struct RoseEngine *t, char *state, roseFlushLastByteHistory(t, scratch, offset + length); tctxt->lastEndOffset = offset + length; storeGroups(t, state, tctxt->groups); + storeLongLiteralState(t, state, scratch); } static really_inline @@ -588,11 +591,17 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) { } size_t hlength = scratch->core_info.hlen; + char rebuild = 0; + + if (hlength) { + // Can only have long literal state or rebuild if this is not the + // first write to this stream. + loadLongLiteralState(t, state, scratch); + rebuild = (scratch->core_info.status & STATUS_DELAY_DIRTY) && + (t->maxFloatingDelayedMatch == ROSE_BOUND_INF || + offset < t->maxFloatingDelayedMatch); + } - char rebuild = hlength && - (scratch->core_info.status & STATUS_DELAY_DIRTY) && - (t->maxFloatingDelayedMatch == ROSE_BOUND_INF || - offset < t->maxFloatingDelayedMatch); DEBUG_PRINTF("**rebuild %hhd status %hhu mfdm %u, offset %llu\n", rebuild, scratch->core_info.status, t->maxFloatingDelayedMatch, offset); @@ -621,17 +630,9 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) { } DEBUG_PRINTF("start=%zu\n", start); - u8 *stream_state; - if (t->floatingStreamState) { - stream_state = getFloatingMatcherState(t, state); - } else { - stream_state = NULL; - } - DEBUG_PRINTF("BEGIN FLOATING (over %zu/%zu)\n", flen, length); hwlmExecStreaming(ftable, scratch, flen, start, roseFloatingCallback, - scratch, tctxt->groups & t->floating_group_mask, - stream_state); + scratch, tctxt->groups & t->floating_group_mask); } flush_delay_and_exit: diff --git a/src/rose/stream_long_lit.h b/src/rose/stream_long_lit.h new file mode 100644 index 00000000..676544d7 --- /dev/null +++ b/src/rose/stream_long_lit.h @@ -0,0 +1,434 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef STREAM_LONG_LIT_H +#define STREAM_LONG_LIT_H + +#include "rose.h" +#include "rose_common.h" +#include "rose_internal.h" +#include "stream_long_lit_hash.h" +#include "util/copybytes.h" + +static really_inline +const struct RoseLongLiteral * +getLitTab(const struct RoseLongLitTable *ll_table) { + return (const struct RoseLongLiteral *)((const char *)ll_table + + ROUNDUP_16(sizeof(struct RoseLongLitTable))); +} + +static really_inline +u32 get_start_lit_idx(const struct RoseLongLitTable *ll_table, + const char nocase) { + return nocase ? ll_table->boundaryCase : 0; +} + +static really_inline +u32 get_end_lit_idx(const struct RoseLongLitTable *ll_table, + const char nocase) { + return nocase ? ll_table->boundaryNocase : ll_table->boundaryCase; +} + +// search for the literal index that contains the current state +static rose_inline +u32 findLitTabEntry(const struct RoseLongLitTable *ll_table, + u32 stateValue, const char nocase) { + const struct RoseLongLiteral *litTab = getLitTab(ll_table); + u32 lo = get_start_lit_idx(ll_table, nocase); + u32 hi = get_end_lit_idx(ll_table, nocase); + + // Now move stateValue back by one so that we're looking for the + // litTab entry that includes it the string, not the one 'one past' it + stateValue -= 1; + assert(lo != hi); + assert(litTab[lo].offset <= stateValue); + assert(litTab[hi].offset > stateValue); + + // binary search to find the entry e such that: + // litTab[e].offsetToLiteral <= stateValue < litTab[e+1].offsetToLiteral + while (lo + 1 < hi) { + u32 mid = (lo + hi) / 2; + if (litTab[mid].offset <= stateValue) { + lo = mid; + } else { // (litTab[mid].offset > stateValue) { + hi = mid; + } + } + assert(litTab[lo].offset <= stateValue); + assert(litTab[hi].offset > stateValue); + return lo; +} + +// Reads from stream state and unpacks values into stream state table. +static really_inline +void loadLongLitStreamState(const struct RoseLongLitTable *ll_table, + const u8 *ll_state, u32 *state_case, + u32 *state_nocase) { + assert(ll_table); + assert(ll_state); + assert(state_case && state_nocase); + + u8 ss_bytes = ll_table->streamStateBytes; + u8 ssb = ll_table->streamStateBitsCase; + UNUSED u8 ssb_nc = ll_table->streamStateBitsNocase; + assert(ss_bytes == (ssb + ssb_nc + 7) / 8); + +#if defined(ARCH_32_BIT) + // On 32-bit hosts, we may be able to avoid having to do any u64a + // manipulation at all. + if (ss_bytes <= 4) { + u32 ssb_mask = (1U << ssb) - 1; + u32 streamVal = partial_load_u32(ll_state, ss_bytes); + *state_case = (u32)(streamVal & ssb_mask); + *state_nocase = (u32)(streamVal >> ssb); + return; + } +#endif + + u64a ssb_mask = (1ULL << ssb) - 1; + u64a streamVal = partial_load_u64a(ll_state, ss_bytes); + *state_case = (u32)(streamVal & ssb_mask); + *state_nocase = (u32)(streamVal >> ssb); +} + +static really_inline +u32 getBaseOffsetOfLits(const struct RoseLongLitTable *ll_table, + const char nocase) { + u32 lit_idx = get_start_lit_idx(ll_table, nocase); + return getLitTab(ll_table)[lit_idx].offset; +} + +static really_inline +u32 unpackStateVal(const struct RoseLongLitTable *ll_table, const char nocase, + u32 v) { + return v + getBaseOffsetOfLits(ll_table, nocase) - 1; +} + +static really_inline +u32 packStateVal(const struct RoseLongLitTable *ll_table, const char nocase, + u32 v) { + return v - getBaseOffsetOfLits(ll_table, nocase) + 1; +} + +static rose_inline +void loadLongLiteralStateMode(struct hs_scratch *scratch, + const struct RoseLongLitTable *ll_table, + const struct RoseLongLiteral *litTab, + const u32 state, const char nocase) { + if (!state) { + DEBUG_PRINTF("no state for %s\n", nocase ? "caseless" : "caseful"); + return; + } + + u32 stateValue = unpackStateVal(ll_table, nocase, state); + u32 idx = findLitTabEntry(ll_table, stateValue, nocase); + size_t found_offset = litTab[idx].offset; + const u8 *found_buf = found_offset + (const u8 *)ll_table; + size_t found_sz = stateValue - found_offset; + + struct RoseContext *tctxt = &scratch->tctxt; + if (nocase) { + tctxt->ll_buf_nocase = found_buf; + tctxt->ll_len_nocase = found_sz; + } else { + tctxt->ll_buf = found_buf; + tctxt->ll_len = found_sz; + } +} + +static rose_inline +void loadLongLiteralState(const struct RoseEngine *t, char *state, + struct hs_scratch *scratch) { + if (!t->longLitTableOffset) { + return; + } + + scratch->tctxt.ll_buf = scratch->core_info.hbuf; + scratch->tctxt.ll_len = scratch->core_info.hlen; + scratch->tctxt.ll_buf_nocase = scratch->core_info.hbuf; + scratch->tctxt.ll_len_nocase = scratch->core_info.hlen; + + const struct RoseLongLitTable *ll_table = + getByOffset(t, t->longLitTableOffset); + const struct RoseLongLiteral *litTab = getLitTab(ll_table); + const u8 *ll_state = getLongLitState(t, state); + + u32 state_case; + u32 state_nocase; + loadLongLitStreamState(ll_table, ll_state, &state_case, &state_nocase); + + loadLongLiteralStateMode(scratch, ll_table, litTab, state_case, 0); + loadLongLiteralStateMode(scratch, ll_table, litTab, state_nocase, 1); +} + +static rose_inline +char confirmLongLiteral(const struct RoseLongLitTable *ll_table, + const hs_scratch_t *scratch, u32 hashState, + const char nocase) { + const struct RoseLongLiteral *litTab = getLitTab(ll_table); + u32 idx = findLitTabEntry(ll_table, hashState, nocase); + size_t found_offset = litTab[idx].offset; + const u8 *s = found_offset + (const u8 *)ll_table; + assert(hashState > found_offset); + size_t len = hashState - found_offset; + const u8 *buf = scratch->core_info.buf; + const size_t buf_len = scratch->core_info.len; + + if (len > buf_len) { + const struct RoseContext *tctxt = &scratch->tctxt; + const u8 *hist = nocase ? tctxt->ll_buf_nocase : tctxt->ll_buf; + size_t hist_len = nocase ? tctxt->ll_len_nocase : tctxt->ll_len; + + if (len > buf_len + hist_len) { + return 0; // Break out - not enough total history + } + + size_t overhang = len - buf_len; + assert(overhang <= hist_len); + + if (cmpForward(hist + hist_len - overhang, s, overhang, nocase)) { + return 0; + } + s += overhang; + len -= overhang; + } + + // if we got here, we don't need history or we compared ok out of history + assert(len <= buf_len); + + if (cmpForward(buf + buf_len - len, s, len, nocase)) { + return 0; + } + + DEBUG_PRINTF("confirmed hashState=%u\n", hashState); + return 1; +} + +static rose_inline +void calcStreamingHash(const struct core_info *ci, + const struct RoseLongLitTable *ll_table, u8 hash_len, + u32 *hash_case, u32 *hash_nocase) { + assert(hash_len >= LONG_LIT_HASH_LEN); + + // Our hash function operates over LONG_LIT_HASH_LEN bytes, starting from + // location (end of buffer - hash_len). If this block can be satisfied + // entirely from either the current buffer or the history buffer, we pass + // in the pointer directly; otherwise we must make a copy. + + u8 tempbuf[LONG_LIT_HASH_LEN]; + const u8 *base; + + if (hash_len > ci->len) { + size_t overhang = hash_len - ci->len; + if (overhang >= LONG_LIT_HASH_LEN) { + // Can read enough to hash from inside the history buffer. + assert(overhang <= ci->hlen); + base = ci->hbuf + ci->hlen - overhang; + } else { + // Copy: first chunk from history buffer. + assert(overhang <= ci->hlen); + copy_upto_32_bytes(tempbuf, ci->hbuf + ci->hlen - overhang, + overhang); + // Copy: second chunk from current buffer. + size_t copy_buf_len = LONG_LIT_HASH_LEN - overhang; + assert(copy_buf_len <= ci->len); + copy_upto_32_bytes(tempbuf + overhang, ci->buf, copy_buf_len); + // Read from our temporary buffer for the hash. + base = tempbuf; + } + } else { + // Can read enough to hash from inside the current buffer. + base = ci->buf + ci->len - hash_len; + } + + if (ll_table->hashNBitsCase) { + *hash_case = hashLongLiteral(base, LONG_LIT_HASH_LEN, 0); + DEBUG_PRINTF("caseful hash %u\n", *hash_case); + } + if (ll_table->hashNBitsNocase) { + *hash_nocase = hashLongLiteral(base, LONG_LIT_HASH_LEN, 1); + DEBUG_PRINTF("caseless hash %u\n", *hash_nocase); + } +} + +static really_inline +const struct RoseLongLitHashEntry * +getHashTableBase(const struct RoseLongLitTable *ll_table, const char nocase) { + const u32 hashOffset = nocase ? ll_table->hashOffsetNocase + : ll_table->hashOffsetCase; + return (const struct RoseLongLitHashEntry *)((const char *)ll_table + + hashOffset); +} + +static rose_inline +const struct RoseLongLitHashEntry * +getLongLitHashEnt(const struct RoseLongLitTable *ll_table, u32 h, + const char nocase) { + u32 nbits = nocase ? ll_table->hashNBitsNocase : ll_table->hashNBitsCase; + if (!nbits) { + return NULL; + } + + u32 h_ent = h & ((1 << nbits) - 1); + u32 h_low = (h >> nbits) & 63; + + const struct RoseLongLitHashEntry *tab = getHashTableBase(ll_table, nocase); + const struct RoseLongLitHashEntry *ent = tab + h_ent; + + if (!((ent->bitfield >> h_low) & 0x1)) { + return NULL; + } + + return ent; +} + +static rose_inline +u32 storeLongLiteralStateMode(const struct hs_scratch *scratch, + const struct RoseLongLitTable *ll_table, + const struct RoseLongLitHashEntry *ent, + const char nocase) { + assert(ent); + assert(nocase ? ll_table->hashNBitsNocase : ll_table->hashNBitsCase); + + const struct RoseLongLitHashEntry *tab = getHashTableBase(ll_table, nocase); + + u32 packed_state = 0; + while (1) { + if (confirmLongLiteral(ll_table, scratch, ent->state, nocase)) { + packed_state = packStateVal(ll_table, nocase, ent->state); + DEBUG_PRINTF("set %s state to %u\n", nocase ? "nocase" : "case", + packed_state); + break; + } + if (ent->link == LINK_INVALID) { + break; + } + ent = tab + ent->link; + } + return packed_state; +} + +#ifndef NDEBUG +// Defensive checking (used in assert) that these table values don't overflow +// the range available. +static really_inline +char streamingTableOverflow(u32 state_case, u32 state_nocase, u8 ssb, + u8 ssb_nc) { + u32 ssb_mask = (1ULL << (ssb)) - 1; + if (state_case & ~ssb_mask) { + return 1; + } + u32 ssb_nc_mask = (1ULL << (ssb_nc)) - 1; + if (state_nocase & ~ssb_nc_mask) { + return 1; + } + return 0; +} +#endif + +// Reads from stream state table and packs values into stream state. +static rose_inline +void storeLongLitStreamState(const struct RoseLongLitTable *ll_table, + u8 *ll_state, u32 state_case, u32 state_nocase) { + assert(ll_table); + assert(ll_state); + + u8 ss_bytes = ll_table->streamStateBytes; + u8 ssb = ll_table->streamStateBitsCase; + UNUSED u8 ssb_nc = ll_table->streamStateBitsNocase; + assert(ss_bytes == ROUNDUP_N(ssb + ssb_nc, 8) / 8); + assert(!streamingTableOverflow(state_case, state_nocase, ssb, ssb_nc)); + +#if defined(ARCH_32_BIT) + // On 32-bit hosts, we may be able to avoid having to do any u64a + // manipulation at all. + if (ss_bytes <= 4) { + u32 stagingStreamState = state_case; + stagingStreamState |= (state_nocase << ssb); + partial_store_u32(ll_state, stagingStreamState, ss_bytes); + return; + } +#endif + + u64a stagingStreamState = (u64a)state_case; + stagingStreamState |= (u64a)state_nocase << ssb; + partial_store_u64a(ll_state, stagingStreamState, ss_bytes); +} + +static rose_inline +void storeLongLiteralState(const struct RoseEngine *t, char *state, + struct hs_scratch *scratch) { + if (!t->longLitTableOffset) { + DEBUG_PRINTF("no table\n"); + return; + } + + struct core_info *ci = &scratch->core_info; + const struct RoseLongLitTable *ll_table = + getByOffset(t, t->longLitTableOffset); + assert(ll_table->maxLen); + + DEBUG_PRINTF("maxLen=%u, len=%zu, hlen=%zu\n", ll_table->maxLen, ci->len, + ci->hlen); + + u32 state_case = 0; + u32 state_nocase = 0; + + // If we don't have enough history, we don't need to do anything. + if (ll_table->maxLen <= ci->len + ci->hlen) { + u32 hash_case = 0; + u32 hash_nocase = 0; + + calcStreamingHash(ci, ll_table, ll_table->maxLen, &hash_case, + &hash_nocase); + + const struct RoseLongLitHashEntry *ent_case = + getLongLitHashEnt(ll_table, hash_case, 0); + const struct RoseLongLitHashEntry *ent_nocase = + getLongLitHashEnt(ll_table, hash_nocase, 1); + + DEBUG_PRINTF("ent_caseful=%p, ent_caseless=%p\n", ent_case, ent_nocase); + + if (ent_case) { + state_case = storeLongLiteralStateMode(scratch, ll_table, + ent_case, 0); + } + + if (ent_nocase) { + state_nocase = storeLongLiteralStateMode(scratch, ll_table, + ent_nocase, 1); + } + } + + DEBUG_PRINTF("store {%u, %u}\n", state_case, state_nocase); + + u8 *ll_state = getLongLitState(t, state); + storeLongLitStreamState(ll_table, ll_state, state_case, state_nocase); +} + +#endif // STREAM_LONG_LIT_H diff --git a/src/rose/stream_long_lit_hash.h b/src/rose/stream_long_lit_hash.h new file mode 100644 index 00000000..0e1606c5 --- /dev/null +++ b/src/rose/stream_long_lit_hash.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef STREAM_LONG_LIT_HASH_H +#define STREAM_LONG_LIT_HASH_H + +#include "ue2common.h" +#include "util/unaligned.h" + +/** \brief Length of the buffer operated on by \ref hashLongLiteral(). */ +#define LONG_LIT_HASH_LEN 24 + +/** \brief Hash function used for long literal table in streaming mode. */ +static really_inline +u32 hashLongLiteral(const u8 *ptr, UNUSED size_t len, char nocase) { + const u64a CASEMASK = 0xdfdfdfdfdfdfdfdfULL; + const u64a MULTIPLIER = 0x0b4e0ef37bc32127ULL; + + // We unconditionally hash LONG_LIT_HASH_LEN bytes; all use cases of this + // hash are for strings longer than this. + assert(len >= 24); + + u64a v1 = unaligned_load_u64a(ptr); + u64a v2 = unaligned_load_u64a(ptr + 8); + u64a v3 = unaligned_load_u64a(ptr + 16); + if (nocase) { + v1 &= CASEMASK; + v2 &= CASEMASK; + v3 &= CASEMASK; + } + v1 *= MULTIPLIER; + v2 *= MULTIPLIER * MULTIPLIER; + v3 *= MULTIPLIER * MULTIPLIER * MULTIPLIER; + v1 >>= 32; + v2 >>= 32; + v3 >>= 32; + return v1 ^ v2 ^ v3; +} + +#endif // STREAM_LONG_LIT_HASH_H diff --git a/src/runtime.c b/src/runtime.c index e761acc2..30745d81 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -736,20 +736,11 @@ void pureLiteralStreamExec(struct hs_stream *stream_state, assert(scratch); assert(!can_stop_matching(scratch)); - char *state = getMultiState(stream_state); - const struct RoseEngine *rose = stream_state->rose; const struct HWLM *ftable = getFLiteralMatcher(rose); size_t len2 = scratch->core_info.len; - u8 *hwlm_stream_state; - if (rose->floatingStreamState) { - hwlm_stream_state = getFloatingMatcherState(rose, state); - } else { - hwlm_stream_state = NULL; - } - DEBUG_PRINTF("::: streaming rose ::: offset = %llu len = %zu\n", stream_state->offset, scratch->core_info.len); @@ -761,8 +752,8 @@ void pureLiteralStreamExec(struct hs_stream *stream_state, // start the match region at zero. const size_t start = 0; - hwlmExecStreaming(ftable, scratch, len2, start, roseCallback, - scratch, rose->initialGroups, hwlm_stream_state); + hwlmExecStreaming(ftable, scratch, len2, start, roseCallback, scratch, + rose->initialGroups); if (!told_to_stop_matching(scratch) && isAllExhausted(rose, scratch->core_info.exhaustionVector)) { diff --git a/src/scratch.h b/src/scratch.h index a2f02503..73a35149 100644 --- a/src/scratch.h +++ b/src/scratch.h @@ -122,6 +122,26 @@ struct RoseContext { u32 filledDelayedSlots; u32 curr_qi; /**< currently executing main queue index during * \ref nfaQueueExec */ + + /** + * \brief Buffer for caseful long literal support, used in streaming mode + * only. + * + * If a long literal prefix was at the end of the buffer at the end of a + * stream write, then the long lit table hashes it and stores the result in + * stream state. At the start of the next write, this value is used to set + * this buffer to the matching prefix string (stored in the bytecode. + */ + const u8 *ll_buf; + + /** \brief Length in bytes of the string pointed to by ll_buf. */ + size_t ll_len; + + /** \brief Caseless version of ll_buf. */ + const u8 *ll_buf_nocase; + + /** \brief Length in bytes of the string pointed to by ll_buf_nocase. */ + size_t ll_len_nocase; }; struct match_deduper { diff --git a/src/util/ue2string.h b/src/util/ue2string.h index 3c7be473..08b6a544 100644 --- a/src/util/ue2string.h +++ b/src/util/ue2string.h @@ -55,6 +55,29 @@ size_t maxStringSelfOverlap(const std::string &a, bool nocase); /// Compares two strings, returns non-zero if they're different. u32 cmp(const char *a, const char *b, size_t len, bool nocase); +/** + * \brief String type that also records whether the whole string is caseful or + * caseless. + * + * You should use \ref ue2_literal if you need to represent a mixed-case + * literal. + */ +struct ue2_case_string { + ue2_case_string(std::string s_in, bool nocase_in) + : s(std::move(s_in)), nocase(nocase_in) { + if (nocase) { + upperString(s); + } + } + + bool operator==(const ue2_case_string &other) const { + return s == other.s && nocase == other.nocase; + } + + std::string s; + bool nocase; +}; + struct ue2_literal { public: /// Single element proxy, pointed to by our const_iterator. diff --git a/unit/internal/fdr.cpp b/unit/internal/fdr.cpp index c66ab4c5..6116bfdb 100644 --- a/unit/internal/fdr.cpp +++ b/unit/internal/fdr.cpp @@ -337,8 +337,8 @@ TEST_P(FDRp, NoRepeat3) { static hwlm_error_t safeExecStreaming(const FDR *fdr, const u8 *hbuf, size_t hlen, const u8 *buf, size_t len, size_t start, - HWLMCallback cb, void *ctxt, hwlm_group_t groups, - u8 *stream_state) { + HWLMCallback cb, void *ctxt, + hwlm_group_t groups) { array wrapped_history = {{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}}; if (hlen < 16) { @@ -346,8 +346,7 @@ hwlm_error_t safeExecStreaming(const FDR *fdr, const u8 *hbuf, size_t hlen, memcpy(new_hbuf, hbuf, hlen); hbuf = new_hbuf; } - return fdrExecStreaming(fdr, hbuf, hlen, buf, len, start, cb, ctxt, groups, - stream_state); + return fdrExecStreaming(fdr, hbuf, hlen, buf, len, start, cb, ctxt, groups); } TEST_P(FDRp, SmallStreaming) { @@ -366,7 +365,7 @@ TEST_P(FDRp, SmallStreaming) { expected.push_back(match(2, 2, 1)); safeExecStreaming(fdr.get(), (const u8 *)"", 0, (const u8 *)"aaar", 4, 0, - decentCallback, &matches, HWLM_ALL_GROUPS, nullptr); + decentCallback, &matches, HWLM_ALL_GROUPS); for (u32 i = 0; i < MIN(expected.size(), matches.size()); i++) { EXPECT_EQ(expected[i], matches[i]); } @@ -378,7 +377,7 @@ TEST_P(FDRp, SmallStreaming) { expected.push_back(match(1, 8, 10)); safeExecStreaming(fdr.get(), (const u8 *)"aaar", 4, (const u8 *)"dvark", 5, - 0, decentCallback, &matches, HWLM_ALL_GROUPS, nullptr); + 0, decentCallback, &matches, HWLM_ALL_GROUPS); for (u32 i = 0; i < MIN(expected.size(), matches.size()); i++) { EXPECT_EQ(expected[i], matches[i] + 4); @@ -407,7 +406,7 @@ TEST_P(FDRp, SmallStreaming2) { safeExecStreaming(fdr.get(), (const u8 *)"foobar", 6, (const u8 *)"aardvarkkk", 10, 0, decentCallback, &matches, - HWLM_ALL_GROUPS, nullptr); + HWLM_ALL_GROUPS); for (u32 i = 0; i < MIN(expected.size(), matches.size()); i++) { EXPECT_EQ(expected[i], matches[i] + 6); @@ -445,44 +444,6 @@ TEST_P(FDRp, LongLiteral) { EXPECT_EQ(0U, count); } -TEST_P(FDRp, VeryLongLiteral) { - const u32 hint = GetParam(); - SCOPED_TRACE(hint); - vector lits; - - string s1000; - for(int i = 0; i < 1000; i++) { - s1000 += char('A' + i % 10); - } - - string s66k; - for(int i = 0; i < 66; i++) { - s66k += s1000; - } - - string corpus = s66k + s66k; - lits.push_back(hwlmLiteral(s66k.c_str(), 0, 10)); - - auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey()); - CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); - - vector matches; - u32 rv = fdrExec(fdr.get(), (const u8 *)s66k.c_str(), s66k.size(), 0, - decentCallback, &matches, HWLM_ALL_GROUPS); - EXPECT_EQ(0U, rv); - ASSERT_EQ(1U, matches.size()); - ASSERT_EQ(match(0, 65999, 10), matches[0]); - - matches.clear(); - rv = fdrExec(fdr.get(), (const u8 *)corpus.c_str(), corpus.size(), 0, - decentCallback, &matches, HWLM_ALL_GROUPS); - EXPECT_EQ(0U, rv); - for (u32 i = 0; i < matches.size(); i++) { - ASSERT_EQ(match(10 * i, 65999 + 10 * i, 10), matches[i]); - } - EXPECT_EQ(6601U, matches.size()); -} - TEST_P(FDRp, moveByteStream) { const u32 hint = GetParam(); SCOPED_TRACE(hint); @@ -538,9 +499,9 @@ TEST_P(FDRp, Stream1) { // check matches vector matches; - fdrStatus = safeExecStreaming( - fdr.get(), (const u8 *)data1, data_len1, (const u8 *)data2, data_len2, - 0, decentCallback, &matches, HWLM_ALL_GROUPS, nullptr); + fdrStatus = safeExecStreaming(fdr.get(), (const u8 *)data1, data_len1, + (const u8 *)data2, data_len2, 0, + decentCallback, &matches, HWLM_ALL_GROUPS); ASSERT_EQ(0, fdrStatus); ASSERT_EQ(4U, matches.size()); @@ -783,9 +744,9 @@ TEST(FDR, FDRTermS) { // check matches vector matches; - fdrStatus = safeExecStreaming( - fdr.get(), (const u8 *)data1, data_len1, (const u8 *)data2, data_len2, - 0, decentCallbackT, &matches, HWLM_ALL_GROUPS, nullptr); + fdrStatus = safeExecStreaming(fdr.get(), (const u8 *)data1, data_len1, + (const u8 *)data2, data_len2, 0, + decentCallbackT, &matches, HWLM_ALL_GROUPS); ASSERT_EQ(HWLM_TERMINATED, fdrStatus); ASSERT_EQ(1U, matches.size()); @@ -812,30 +773,3 @@ TEST(FDR, FDRTermB) { ASSERT_EQ(1U, matches.size()); } - -TEST(FDR, ManyLengths) { - // UE-2400: we had a crash due to div by zero in the compiler when given a - // set of literals with precisely 512 different lengths. - const u32 num = 512; - vector lits; - char c = 0; - string s; - for (u32 i = 0; i < num; i++) { - s.push_back(c++); - lits.push_back(hwlmLiteral(s, false, i + 1)); - } - - auto fdr = fdrBuildTable(lits, false, get_current_target(), Grey()); - ASSERT_TRUE(fdr != nullptr); - - // Confirm that we can scan against this FDR table as well. - - vector matches; - - hwlm_error_t fdrStatus = - fdrExec(fdr.get(), (const u8 *)s.c_str(), s.size(), 0, decentCallback, - &matches, HWLM_ALL_GROUPS); - ASSERT_EQ(HWLM_SUCCESS, fdrStatus); - - ASSERT_EQ(768U, matches.size()); -} diff --git a/unit/internal/fdr_flood.cpp b/unit/internal/fdr_flood.cpp index 68d8f632..7b00ac4c 100644 --- a/unit/internal/fdr_flood.cpp +++ b/unit/internal/fdr_flood.cpp @@ -495,7 +495,7 @@ TEST_P(FDRFloodp, StreamingMask) { const u8 *fhist = fake_history.data() + fake_history_size; fdrStatus = fdrExecStreaming(fdr.get(), fhist, 0, d, streamChunk, 0, countCallback, &matchesCounts, - HWLM_ALL_GROUPS, nullptr); + HWLM_ALL_GROUPS); ASSERT_EQ(0, fdrStatus); for (u32 j = streamChunk; j < dataSize; j += streamChunk) { if (j < 16) { @@ -506,12 +506,12 @@ TEST_P(FDRFloodp, StreamingMask) { fdrStatus = fdrExecStreaming(fdr.get(), tmp_d, j, tmp_d + j, streamChunk, 0, countCallback, &matchesCounts, - HWLM_ALL_GROUPS, nullptr); + HWLM_ALL_GROUPS); } else { fdrStatus = fdrExecStreaming(fdr.get(), d + j - 8, 8, d + j, streamChunk, 0, countCallback, &matchesCounts, - HWLM_ALL_GROUPS, nullptr); + HWLM_ALL_GROUPS); } ASSERT_EQ(0, fdrStatus); } From 8869dee6434fab8b84ffccbe9f18b6261fad67b2 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 22 Sep 2016 13:58:42 +1000 Subject: [PATCH 036/103] rose: simplify long lit table, add bloom filter Replaces the original long lit hash table (used in streaming mode) with a smaller, simpler linear probing approach. Adds a bloom filter in front of it to reduce time spent on false positives. Sizing of both the hash table and bloom filter are done based on max load. --- src/rose/rose_build_bytecode.cpp | 3 + src/rose/rose_build_long_lit.cpp | 433 +++++++++++++++++++------------ src/rose/rose_dump.cpp | 54 +++- src/rose/rose_internal.h | 102 +++----- src/rose/stream.c | 21 +- src/rose/stream_long_lit.h | 283 ++++++++------------ src/rose/stream_long_lit_hash.h | 58 ++++- src/util/bitutils.h | 1 + 8 files changed, 520 insertions(+), 435 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 2871138a..80e6450d 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -4351,6 +4351,7 @@ void makeCheckLiteralInstruction(const RoseBuildImpl &build, if (lit.table != ROSE_FLOATING) { return; } + assert(bc.longLitLengthThreshold > 0); if (lit.s.length() <= bc.longLitLengthThreshold) { return; } @@ -4937,6 +4938,8 @@ void allocateFinalIdToSet(RoseBuildImpl &build, const set &lits, * ids and squash the same roles and have the same group squashing * behaviour. Benefits literals cannot be merged. */ + assert(longLitLengthThreshold > 0); + for (u32 int_id : lits) { rose_literal_info &curr_info = literal_info[int_id]; const rose_literal_id &lit = build.literals.right.at(int_id); diff --git a/src/rose/rose_build_long_lit.cpp b/src/rose/rose_build_long_lit.cpp index c01bdc8f..c32f49d0 100644 --- a/src/rose/rose_build_long_lit.cpp +++ b/src/rose/rose_build_long_lit.cpp @@ -36,17 +36,28 @@ #include "util/verify_types.h" #include "util/compile_context.h" +#include +#include + using namespace std; namespace ue2 { -/** \brief Minimum size for a non-empty hash table. */ -static constexpr u32 MIN_HASH_TABLE_SIZE = 4096; +/** \brief Minimum size for a non-empty hash table. Must be a power of two. */ +static constexpr u32 MIN_HASH_TABLE_SIZE = 128; + +/** \brief Maximum load factor (between zero and one) for a hash table. */ +static constexpr double MAX_HASH_TABLE_LOAD = 0.7; + +/** \brief Minimum size (in bits) for a bloom filter. Must be a power of two. */ +static constexpr u32 MIN_BLOOM_FILTER_SIZE = 256; + +/** \brief Maximum load factor (between zero and one) for a bloom filter. */ +static constexpr double MAX_BLOOM_FILTER_LOAD = 0.25; struct LongLitModeInfo { - u32 boundary = 0; //!< One above the largest index for this mode. - u32 positions = 0; //!< Total number of string positions. - u32 hashEntries = 0; //!< Number of hash table entries. + u32 num_literals = 0; //!< Number of strings for this mode. + u32 hashed_positions = 0; //!< Number of hashable string positions. }; struct LongLitInfo { @@ -66,54 +77,120 @@ static LongLitInfo analyzeLongLits(const vector &lits, size_t max_len) { LongLitInfo info; - u32 hashedPositionsCase = 0; - u32 hashedPositionsNocase = 0; - - // Caseful boundary is the index of the first nocase literal, as we're - // ordered (caseful, nocase). - auto first_nocase = find_if(begin(lits), end(lits), - [](const ue2_case_string &lit) { return lit.nocase; }); - info.caseful.boundary = verify_u32(distance(lits.begin(), first_nocase)); - - // Nocase boundary is the size of the literal set. - info.nocase.boundary = verify_u32(lits.size()); for (const auto &lit : lits) { - if (lit.nocase) { - hashedPositionsNocase += lit.s.size() - max_len; - info.nocase.positions += lit.s.size(); - } else { - hashedPositionsCase += lit.s.size() - max_len; - info.caseful.positions += lit.s.size(); - } + auto &lit_info = lit.nocase ? info.nocase : info.caseful; + assert(lit.s.size() > max_len); + lit_info.num_literals++; + lit_info.hashed_positions += lit.s.size() - max_len; } - info.caseful.hashEntries = hashedPositionsCase - ? roundUpToPowerOfTwo(max(MIN_HASH_TABLE_SIZE, hashedPositionsCase)) - : 0; - info.nocase.hashEntries = hashedPositionsNocase - ? roundUpToPowerOfTwo(max(MIN_HASH_TABLE_SIZE, hashedPositionsNocase)) - : 0; - - DEBUG_PRINTF("caseful: boundary=%u, positions=%u, hashedPositions=%u, " - "hashEntries=%u\n", - info.caseful.boundary, info.caseful.positions, - hashedPositionsCase, info.caseful.hashEntries); - DEBUG_PRINTF("nocase: boundary=%u, positions=%u, hashedPositions=%u, " - "hashEntries=%u\n", - info.nocase.boundary, info.nocase.positions, - hashedPositionsNocase, info.nocase.hashEntries); + DEBUG_PRINTF("case: hashed %u positions\n", info.caseful.hashed_positions); + DEBUG_PRINTF("nocase: hashed %u positions\n", info.nocase.hashed_positions); return info; } static -void fillHashes(const vector &lits, size_t max_len, - RoseLongLitHashEntry *tab, size_t numEntries, bool nocase, - const map &litToOffsetVal) { - const u32 nbits = lg2(numEntries); - map>> bucketToLitOffPairs; - map bucketToBitfield; +void addToBloomFilter(vector &bloom, const u8 *substr, bool nocase) { + const u32 num_keys = verify_u32(bloom.size() * 8); + const u32 key_mask = (1U << lg2(num_keys)) -1; + + const auto hash_functions = { bloomHash_1, bloomHash_2, bloomHash_3 }; + for (const auto &hash_func : hash_functions) { + u32 hash = hash_func(substr, nocase); + u32 key = hash & key_mask; + DEBUG_PRINTF("set key %u (of %zu)\n", key, bloom.size() * 8); + bloom[key / 8] |= 1U << (key % 8); + } +} + +static +size_t bloomOccupancy(const vector &bloom) { + return accumulate(begin(bloom), end(bloom), 0, + [](const size_t &sum, const u8 &elem) { + return sum + popcount32(elem); + }); +} + +static +double bloomLoad(const vector &bloom) { + return (double)bloomOccupancy(bloom) / (double)(bloom.size() * 8); +} + +static +vector buildBloomFilter(const vector &lits, size_t max_len, + size_t num_entries, bool nocase) { + assert(num_entries % 8 == 0); + assert((num_entries & (num_entries - 1)) == 0); // Must be power of two. + + vector bloom(num_entries / 8, 0); + + if (!num_entries) { + return bloom; + } + + for (const auto &lit : lits) { + if (nocase != lit.nocase) { + continue; + } + for (u32 offset = 1; offset < lit.s.size() - max_len + 1; offset++) { + const u8 *substr = (const u8 *)lit.s.c_str() + offset; + addToBloomFilter(bloom, substr, nocase); + } + } + + DEBUG_PRINTF("%s bloom filter occupancy %zu of %zu entries\n", + nocase ? "nocase" : "caseful", bloomOccupancy(bloom), + num_entries); + + return bloom; +} + + +static +vector makeBloomFilter(const vector &lits, + size_t max_len, bool nocase) { + vector bloom; + + size_t num_entries = MIN_BLOOM_FILTER_SIZE; + for (;;) { + bloom = buildBloomFilter(lits, max_len, num_entries, nocase); + DEBUG_PRINTF("built %s bloom for %zu entries: load %f\n", + nocase ? "nocase" : "caseful", num_entries, + bloomLoad(bloom)); + if (bloomLoad(bloom) < MAX_BLOOM_FILTER_LOAD) { + break; + } + num_entries *= 2; + } + return bloom; +} + +static +size_t hashTableOccupancy(const vector &tab) { + return count_if(begin(tab), end(tab), [](const RoseLongLitHashEntry &ent) { + return ent.str_offset != 0; + }); +} + +static +double hashTableLoad(const vector &tab) { + return (double)hashTableOccupancy(tab) / (double)(tab.size()); +} + +static +vector buildHashTable(const vector &lits, + size_t max_len, + const vector &litToOffsetVal, + size_t numEntries, bool nocase) { + vector tab(numEntries, {0,0}); + + if (!numEntries) { + return tab; + } + + map>> hashToLitOffPairs; for (u32 lit_id = 0; lit_id < lits.size(); lit_id++) { const ue2_case_string &lit = lits[lit_id]; @@ -122,37 +199,41 @@ void fillHashes(const vector &lits, size_t max_len, } for (u32 offset = 1; offset < lit.s.size() - max_len + 1; offset++) { const u8 *substr = (const u8 *)lit.s.c_str() + offset; - u32 h = hashLongLiteral(substr, max_len, lit.nocase); - u32 h_ent = h & ((1U << nbits) - 1); - u32 h_low = (h >> nbits) & 63; - bucketToLitOffPairs[h_ent].emplace_back(lit_id, offset); - bucketToBitfield[h_ent] |= (1ULL << h_low); + u32 hash = hashLongLiteral(substr, max_len, lit.nocase); + hashToLitOffPairs[hash].emplace_back(lit_id, offset); } } - // this used to be a set, but a bitset is much much faster given that - // we're using it only for membership testing. - boost::dynamic_bitset<> filledBuckets(numEntries); // all zero by default. + for (auto &m : hashToLitOffPairs) { + u32 hash = m.first; + vector> &d = m.second; - // sweep out bitfield entries and save the results swapped accordingly - // also, anything with bitfield entries is put in filledBuckets - for (const auto &m : bucketToBitfield) { - const u32 &bucket = m.first; - const u64a &contents = m.second; - tab[bucket].bitfield = contents; - filledBuckets.set(bucket); - } + // Sort by (offset, string) so that we'll be able to remove identical + // string prefixes. + stable_sort(begin(d), end(d), + [&](const pair &a, const pair &b) { + const auto &str_a = lits[a.first].s; + const auto &str_b = lits[b.first].s; + return tie(a.second, str_a) < tie(b.second, str_b); + }); - // store out all our chains based on free values in our hash table. - // find nearest free locations that are empty (there will always be more - // entries than strings, at present) - for (auto &m : bucketToLitOffPairs) { - u32 bucket = m.first; - deque> &d = m.second; + // Remove entries that point to the same literal prefix. + d.erase(unique(begin(d), end(d), + [&](const pair &a, const pair &b) { + if (a.second != b.second) { + return false; + } + const auto &str_a = lits[a.first].s; + const auto &str_b = lits[b.first].s; + const size_t len = max_len + a.second; + return equal(begin(str_a), begin(str_a) + len, + begin(str_b)); + }), + end(d)); - // sort d by distance of the residual string (len minus our depth into - // the string). We need to put the 'furthest back' string first... - stable_sort(d.begin(), d.end(), + // Sort d by distance of the residual string (len minus our depth into + // the string). We need to put the 'furthest back' string first. + stable_sort(begin(d), end(d), [](const pair &a, const pair &b) { if (a.second != b.second) { return a.second > b.second; /* longest is first */ @@ -160,47 +241,79 @@ void fillHashes(const vector &lits, size_t max_len, return a.first < b.first; }); - while (1) { - // first time through is always at bucket, then we fill in links - filledBuckets.set(bucket); - RoseLongLitHashEntry *ent = &tab[bucket]; - u32 lit_id = d.front().first; - u32 offset = d.front().second; + u32 bucket = hash % numEntries; - ent->state = verify_u32(litToOffsetVal.at(lit_id) + - offset + max_len); - ent->link = (u32)LINK_INVALID; - - d.pop_front(); - if (d.empty()) { - break; - } - // now, if there is another value - // find a bucket for it and put in 'bucket' and repeat - // all we really need to do is find something not in filledBuckets, - // ideally something close to bucket - // we search backward and forward from bucket, trying to stay as - // close as possible. - UNUSED bool found = false; - int bucket_candidate = 0; - for (u32 k = 1; k < numEntries * 2; k++) { - bucket_candidate = bucket + (((k & 1) == 0) - ? (-(int)k / 2) : (k / 2)); - if (bucket_candidate < 0 || - (size_t)bucket_candidate >= numEntries) { - continue; - } - if (!filledBuckets.test(bucket_candidate)) { - found = true; - break; + // Placement via linear probing. + for (const auto &lit_offset : d) { + while (tab[bucket].str_offset != 0) { + bucket++; + if (bucket == numEntries) { + bucket = 0; } } - assert(found); - bucket = bucket_candidate; - ent->link = bucket; + u32 lit_id = lit_offset.first; + u32 offset = lit_offset.second; + + DEBUG_PRINTF("hash 0x%08x lit_id %u offset %u bucket %u\n", hash, + lit_id, offset, bucket); + + auto &entry = tab[bucket]; + entry.str_offset = verify_u32(litToOffsetVal.at(lit_id)); + assert(entry.str_offset != 0); + entry.str_len = offset + max_len; } } + + DEBUG_PRINTF("%s hash table occupancy %zu of %zu entries\n", + nocase ? "nocase" : "caseful", hashTableOccupancy(tab), + numEntries); + + return tab; +} + +static +vector makeHashTable(const vector &lits, + size_t max_len, + const vector &litToOffsetVal, + u32 numPositions, bool nocase) { + vector tab; + + // Note: for the hash table, we must always have at least enough entries + // for the number of hashable positions. + size_t num_entries = roundUpToPowerOfTwo(max(MIN_HASH_TABLE_SIZE, + numPositions)); + + for (;;) { + tab = buildHashTable(lits, max_len, litToOffsetVal, num_entries, + nocase); + DEBUG_PRINTF("built %s hash table for %zu entries: load %f\n", + nocase ? "nocase" : "caseful", num_entries, + hashTableLoad(tab)); + if (hashTableLoad(tab) < MAX_HASH_TABLE_LOAD) { + break; + } + num_entries *= 2; + } + return tab; +} + +static +vector buildLits(const vector &lits, u32 baseOffset, + vector &litToOffsetVal) { + vector blob; + litToOffsetVal.resize(lits.size(), 0); + + u32 lit_id = 0; + for (const auto &lit : lits) { + u32 offset = baseOffset + verify_u32(blob.size()); + blob.insert(blob.end(), begin(lit.s), end(lit.s)); + litToOffsetVal[lit_id] = offset; + lit_id++; + } + + DEBUG_PRINTF("built %zu bytes of strings\n", blob.size()); + return blob; } u32 buildLongLiteralTable(const RoseBuildImpl &build, RoseEngineBlob &blob, @@ -251,89 +364,69 @@ u32 buildLongLiteralTable(const RoseBuildImpl &build, RoseEngineBlob &blob, LongLitInfo info = analyzeLongLits(lits, max_len); - // first assess the size and find our caseless threshold - size_t headerSize = ROUNDUP_16(sizeof(RoseLongLitTable)); + vector litToOffsetVal; + const size_t headerSize = ROUNDUP_16(sizeof(RoseLongLitTable)); + vector lit_blob = buildLits(lits, headerSize, litToOffsetVal); - size_t litTabOffset = headerSize; + // Build caseful bloom filter and hash table. + vector bloom_case; + vector tab_case; + if (info.caseful.num_literals) { + bloom_case = makeBloomFilter(lits, max_len, false); + tab_case = makeHashTable(lits, max_len, litToOffsetVal, + info.caseful.hashed_positions, false); + } - size_t litTabNumEntries = lits.size() + 1; - size_t litTabSize = ROUNDUP_16(litTabNumEntries * sizeof(RoseLongLiteral)); + // Build nocase bloom filter and hash table. + vector bloom_nocase; + vector tab_nocase; + if (info.nocase.num_literals) { + bloom_nocase = makeBloomFilter(lits, max_len, true); + tab_nocase = makeHashTable(lits, max_len, litToOffsetVal, + info.nocase.hashed_positions, true); + } - size_t wholeLitTabOffset = litTabOffset + litTabSize; - size_t totalWholeLitTabSize = - ROUNDUP_16(info.caseful.positions + info.nocase.positions); + size_t wholeLitTabSize = ROUNDUP_16(byte_length(lit_blob)); + size_t htOffsetCase = headerSize + wholeLitTabSize; + size_t htOffsetNocase = htOffsetCase + byte_length(tab_case); + size_t bloomOffsetCase = htOffsetNocase + byte_length(tab_nocase); + size_t bloomOffsetNocase = bloomOffsetCase + byte_length(bloom_case); - size_t htOffsetCase = wholeLitTabOffset + totalWholeLitTabSize; - size_t htSizeCase = info.caseful.hashEntries * sizeof(RoseLongLitHashEntry); - size_t htOffsetNocase = htOffsetCase + htSizeCase; - size_t htSizeNocase = - info.nocase.hashEntries * sizeof(RoseLongLitHashEntry); - - size_t tabSize = ROUNDUP_16(htOffsetNocase + htSizeNocase); + size_t tabSize = ROUNDUP_16(bloomOffsetNocase + byte_length(bloom_nocase)); // need to add +2 to both of these to allow space for the actual largest // value as well as handling the fact that we add one to the space when // storing out a position to allow zero to mean "no stream state value" - u8 streamBitsCase = lg2(roundUpToPowerOfTwo(info.caseful.positions + 2)); - u8 streamBitsNocase = lg2(roundUpToPowerOfTwo(info.nocase.positions + 2)); + u8 streamBitsCase = lg2(roundUpToPowerOfTwo(tab_case.size() + 2)); + u8 streamBitsNocase = lg2(roundUpToPowerOfTwo(tab_nocase.size() + 2)); u32 tot_state_bytes = ROUNDUP_N(streamBitsCase + streamBitsNocase, 8) / 8; auto table = aligned_zmalloc_unique(tabSize); assert(table); // otherwise would have thrown std::bad_alloc - // then fill it in - char *ptr = table.get(); - RoseLongLitTable *header = (RoseLongLitTable *)ptr; - // fill in header + // Fill in the RoseLongLitTable header structure. + RoseLongLitTable *header = (RoseLongLitTable *)(table.get()); + header->size = verify_u32(tabSize); header->maxLen = verify_u8(max_len); // u8 so doesn't matter; won't go > 255 - header->boundaryCase = info.caseful.boundary; - header->hashOffsetCase = verify_u32(htOffsetCase); - header->hashNBitsCase = lg2(info.caseful.hashEntries); - header->streamStateBitsCase = streamBitsCase; - header->boundaryNocase = info.nocase.boundary; - header->hashOffsetNocase = verify_u32(htOffsetNocase); - header->hashNBitsNocase = lg2(info.nocase.hashEntries); - header->streamStateBitsNocase = streamBitsNocase; + header->caseful.hashOffset = verify_u32(htOffsetCase); + header->caseful.hashBits = lg2(tab_case.size()); + header->caseful.streamStateBits = streamBitsCase; + header->caseful.bloomOffset = verify_u32(bloomOffsetCase); + header->caseful.bloomBits = lg2(bloom_case.size() * 8); + header->nocase.hashOffset = verify_u32(htOffsetNocase); + header->nocase.hashBits = lg2(tab_nocase.size()); + header->nocase.streamStateBits = streamBitsNocase; + header->nocase.bloomOffset = verify_u32(bloomOffsetNocase); + header->nocase.bloomBits = lg2(bloom_nocase.size() * 8); assert(tot_state_bytes < sizeof(u64a)); header->streamStateBytes = verify_u8(tot_state_bytes); // u8 - ptr += headerSize; - - // now fill in the rest - - RoseLongLiteral *litTabPtr = (RoseLongLiteral *)ptr; - ptr += litTabSize; - - map litToOffsetVal; - for (auto i = lits.begin(), e = lits.end(); i != e; ++i) { - u32 entry = verify_u32(i - lits.begin()); - u32 offset = verify_u32(ptr - table.get()); - - // point the table entry to the string location - litTabPtr[entry].offset = offset; - - litToOffsetVal[entry] = offset; - - // copy the string into the string location - const auto &s = i->s; - memcpy(ptr, s.c_str(), s.size()); - - ptr += s.size(); // and the string location - } - - // fill in final lit table entry with current ptr (serves as end value) - litTabPtr[lits.size()].offset = verify_u32(ptr - table.get()); - - // fill hash tables - ptr = table.get() + htOffsetCase; - fillHashes(lits, max_len, (RoseLongLitHashEntry *)ptr, - info.caseful.hashEntries, false, litToOffsetVal); - ptr += htSizeCase; - fillHashes(lits, max_len, (RoseLongLitHashEntry *)ptr, - info.nocase.hashEntries, true, litToOffsetVal); - ptr += htSizeNocase; - - assert(ptr <= table.get() + tabSize); + // Copy in the literal strings, hash tables and bloom filters, + copy_bytes(table.get() + headerSize, lit_blob); + copy_bytes(table.get() + htOffsetCase, tab_case); + copy_bytes(table.get() + bloomOffsetCase, bloom_case); + copy_bytes(table.get() + htOffsetNocase, tab_nocase); + copy_bytes(table.get() + bloomOffsetNocase, bloom_nocase); DEBUG_PRINTF("built streaming table, size=%zu\n", tabSize); DEBUG_PRINTF("requires %zu bytes of history\n", max_len); diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index 9a0bd28c..1ab11f9f 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -49,9 +49,10 @@ #include #include #include +#include #include -#include #include +#include #include #ifndef DUMP_SUPPORT @@ -1049,6 +1050,39 @@ void dumpAnchoredStats(const void *atable, FILE *f) { } +static +void dumpLongLiteralSubtable(const RoseLongLitTable *ll_table, + const RoseLongLitSubtable *ll_sub, FILE *f) { + if (!ll_sub->hashBits) { + fprintf(f, " \n"); + return; + } + + const char *base = (const char *)ll_table; + + u32 nbits = ll_sub->hashBits; + u32 num_entries = 1U << nbits; + const auto *tab = (const RoseLongLitHashEntry *)(base + ll_sub->hashOffset); + u32 hash_occ = + count_if(tab, tab + num_entries, [](const RoseLongLitHashEntry &ent) { + return ent.str_offset != 0; + }); + float hash_occ_percent = ((float)hash_occ / (float)num_entries) * 100; + + fprintf(f, " hash table : %u bits, occupancy %u/%u (%0.1f%%)\n", + nbits, hash_occ, num_entries, hash_occ_percent); + + u32 bloom_bits = ll_sub->bloomBits; + u32 bloom_size = 1U << bloom_bits; + const u8 *bloom = (const u8 *)base + ll_sub->bloomOffset; + u32 bloom_occ = accumulate(bloom, bloom + bloom_size / 8, 0, + [](const u32 &sum, const u8 &elem) { return sum + popcount32(elem); }); + float bloom_occ_percent = ((float)bloom_occ / (float)(bloom_size)) * 100; + + fprintf(f, " bloom filter : %u bits, occupancy %u/%u (%0.1f%%)\n", + bloom_bits, bloom_occ, bloom_size, bloom_occ_percent); +} + static void dumpLongLiteralTable(const RoseEngine *t, FILE *f) { if (!t->longLitTableOffset) { @@ -1062,17 +1096,15 @@ void dumpLongLiteralTable(const RoseEngine *t, FILE *f) { (const struct RoseLongLitTable *)loadFromByteCodeOffset( t, t->longLitTableOffset); - u32 num_caseful = ll_table->boundaryCase; - u32 num_caseless = ll_table->boundaryNocase - num_caseful; + fprintf(f, " total size : %u bytes\n", ll_table->size); + fprintf(f, " longest len : %u\n", ll_table->maxLen); + fprintf(f, " stream state : %u bytes\n", ll_table->streamStateBytes); - fprintf(f, " longest len: %u\n", ll_table->maxLen); - fprintf(f, " counts: %u caseful, %u caseless\n", num_caseful, - num_caseless); - fprintf(f, " hash bits: %u caseful, %u caseless\n", - ll_table->hashNBitsCase, ll_table->hashNBitsNocase); - fprintf(f, " state bits: %u caseful, %u caseless\n", - ll_table->streamStateBitsCase, ll_table->streamStateBitsNocase); - fprintf(f, " stream state: %u bytes\n", ll_table->streamStateBytes); + fprintf(f, " caseful:\n"); + dumpLongLiteralSubtable(ll_table, &ll_table->caseful, f); + + fprintf(f, " nocase:\n"); + dumpLongLiteralSubtable(ll_table, &ll_table->nocase, f); } // Externally accessible functions diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index 32805ab3..3a366f0d 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -446,51 +446,49 @@ struct ALIGN_CL_DIRECTIVE anchored_matcher_info { u32 anchoredMinDistance; /* start of region to run anchored table over */ }; +/** + * \brief Long literal subtable for a particular mode (caseful or nocase). + */ +struct RoseLongLitSubtable { + /** + * \brief Offset of the hash table (relative to RoseLongLitTable base). + * + * Offset is zero if no such table exists. + */ + u32 hashOffset; + + /** + * \brief Offset of the bloom filter (relative to RoseLongLitTable base). + * + * Offset is zero if no such table exists. + */ + u32 bloomOffset; + + /** \brief lg2 of the size of the hash table. */ + u8 hashBits; + + /** \brief Size of the bloom filter in bits. */ + u8 bloomBits; + + /** \brief Number of bits of packed stream state used. */ + u8 streamStateBits; +}; + /** * \brief Long literal table header. */ struct RoseLongLitTable { - /** \brief String ID one beyond the maximum entry for caseful literals. */ - u32 boundaryCase; - /** - * \brief String ID one beyond the maximum entry for caseless literals. - * This is also the total size of the literal table. + * \brief Total size of the whole table (including strings, bloom filters, + * hash tables). */ - u32 boundaryNocase; + u32 size; - /** - * \brief Offset of the caseful hash table (relative to RoseLongLitTable - * base). - * - * Offset is zero if no such table exists. - */ - u32 hashOffsetCase; + /** \brief Caseful sub-table (hash table and bloom filter). */ + struct RoseLongLitSubtable caseful; - /** - * \brief Offset of the caseless hash table (relative to RoseLongLitTable - * base). - * - * Offset is zero if no such table exists. - */ - u32 hashOffsetNocase; - - /** \brief lg2 of the size of the caseful hash table. */ - u32 hashNBitsCase; - - /** \brief lg2 of the size of the caseless hash table. */ - u32 hashNBitsNocase; - - /** - * \brief Number of bits of packed stream state for the caseful hash table. - */ - u8 streamStateBitsCase; - - /** - * \brief Number of bits of packed stream state for the caseless hash - * table. - */ - u8 streamStateBitsNocase; + /** \brief Caseless sub-table (hash table and bloom filter). */ + struct RoseLongLitSubtable nocase; /** \brief Total size of packed stream state in bytes. */ u8 streamStateBytes; @@ -499,39 +497,19 @@ struct RoseLongLitTable { u8 maxLen; }; -/** - * \brief One of these structures per literal entry in our long literal table. - */ -struct RoseLongLiteral { - /** - * \brief Offset of the literal string itself, relative to - * RoseLongLitTable base. - */ - u32 offset; -}; - -/** \brief "No further links" value used for \ref RoseLongLitHashEntry::link. */ -#define LINK_INVALID 0xffffffff - /** * \brief One of these structures per hash table entry in our long literal * table. */ struct RoseLongLitHashEntry { /** - * \brief Bitfield used as a quick guard for hash buckets. - * - * For a given hash value N, the low six bits of N are taken and the - * corresponding bit is switched on in this bitfield if this bucket is used - * for that hash. + * \brief Offset of the literal string itself, relative to + * RoseLongLitTable base. Zero if this bucket is empty. */ - u64a bitfield; + u32 str_offset; - /** \brief Offset in the literal table for this string. */ - u32 state; - - /** \brief Hash table index of next entry in the chain for this bucket. */ - u32 link; + /** \brief Length of the literal string. */ + u32 str_len; }; static really_inline diff --git a/src/rose/stream.c b/src/rose/stream.c index 72286b4b..6e4d0add 100644 --- a/src/rose/stream.c +++ b/src/rose/stream.c @@ -551,6 +551,11 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) { tctxt->minMatchOffset = offset; tctxt->minNonMpvMatchOffset = offset; tctxt->next_mpv_offset = 0; + tctxt->ll_buf = scratch->core_info.hbuf; + tctxt->ll_len = scratch->core_info.hlen; + tctxt->ll_buf_nocase = scratch->core_info.hbuf; + tctxt->ll_len_nocase = scratch->core_info.hlen; + DEBUG_PRINTF("BEGIN: history len=%zu, buffer len=%zu groups=%016llx\n", scratch->core_info.hlen, scratch->core_info.len, tctxt->groups); @@ -590,18 +595,14 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) { MIN(t->floatingDistance, length + offset) - offset : 0; } + loadLongLiteralState(t, state, scratch); + size_t hlength = scratch->core_info.hlen; - char rebuild = 0; - - if (hlength) { - // Can only have long literal state or rebuild if this is not the - // first write to this stream. - loadLongLiteralState(t, state, scratch); - rebuild = (scratch->core_info.status & STATUS_DELAY_DIRTY) && - (t->maxFloatingDelayedMatch == ROSE_BOUND_INF || - offset < t->maxFloatingDelayedMatch); - } + char rebuild = hlength && + (scratch->core_info.status & STATUS_DELAY_DIRTY) && + (t->maxFloatingDelayedMatch == ROSE_BOUND_INF || + offset < t->maxFloatingDelayedMatch); DEBUG_PRINTF("**rebuild %hhd status %hhu mfdm %u, offset %llu\n", rebuild, scratch->core_info.status, t->maxFloatingDelayedMatch, offset); diff --git a/src/rose/stream_long_lit.h b/src/rose/stream_long_lit.h index 676544d7..d78e2863 100644 --- a/src/rose/stream_long_lit.h +++ b/src/rose/stream_long_lit.h @@ -36,52 +36,12 @@ #include "util/copybytes.h" static really_inline -const struct RoseLongLiteral * -getLitTab(const struct RoseLongLitTable *ll_table) { - return (const struct RoseLongLiteral *)((const char *)ll_table + - ROUNDUP_16(sizeof(struct RoseLongLitTable))); -} - -static really_inline -u32 get_start_lit_idx(const struct RoseLongLitTable *ll_table, - const char nocase) { - return nocase ? ll_table->boundaryCase : 0; -} - -static really_inline -u32 get_end_lit_idx(const struct RoseLongLitTable *ll_table, - const char nocase) { - return nocase ? ll_table->boundaryNocase : ll_table->boundaryCase; -} - -// search for the literal index that contains the current state -static rose_inline -u32 findLitTabEntry(const struct RoseLongLitTable *ll_table, - u32 stateValue, const char nocase) { - const struct RoseLongLiteral *litTab = getLitTab(ll_table); - u32 lo = get_start_lit_idx(ll_table, nocase); - u32 hi = get_end_lit_idx(ll_table, nocase); - - // Now move stateValue back by one so that we're looking for the - // litTab entry that includes it the string, not the one 'one past' it - stateValue -= 1; - assert(lo != hi); - assert(litTab[lo].offset <= stateValue); - assert(litTab[hi].offset > stateValue); - - // binary search to find the entry e such that: - // litTab[e].offsetToLiteral <= stateValue < litTab[e+1].offsetToLiteral - while (lo + 1 < hi) { - u32 mid = (lo + hi) / 2; - if (litTab[mid].offset <= stateValue) { - lo = mid; - } else { // (litTab[mid].offset > stateValue) { - hi = mid; - } - } - assert(litTab[lo].offset <= stateValue); - assert(litTab[hi].offset > stateValue); - return lo; +const struct RoseLongLitHashEntry * +getHashTableBase(const struct RoseLongLitTable *ll_table, + const struct RoseLongLitSubtable *ll_sub) { + assert(ll_sub->hashOffset); + return (const struct RoseLongLitHashEntry *)((const char *)ll_table + + ll_sub->hashOffset); } // Reads from stream state and unpacks values into stream state table. @@ -94,8 +54,8 @@ void loadLongLitStreamState(const struct RoseLongLitTable *ll_table, assert(state_case && state_nocase); u8 ss_bytes = ll_table->streamStateBytes; - u8 ssb = ll_table->streamStateBitsCase; - UNUSED u8 ssb_nc = ll_table->streamStateBitsNocase; + u8 ssb = ll_table->caseful.streamStateBits; + UNUSED u8 ssb_nc = ll_table->nocase.streamStateBits; assert(ss_bytes == (ssb + ssb_nc + 7) / 8); #if defined(ARCH_32_BIT) @@ -116,40 +76,22 @@ void loadLongLitStreamState(const struct RoseLongLitTable *ll_table, *state_nocase = (u32)(streamVal >> ssb); } -static really_inline -u32 getBaseOffsetOfLits(const struct RoseLongLitTable *ll_table, - const char nocase) { - u32 lit_idx = get_start_lit_idx(ll_table, nocase); - return getLitTab(ll_table)[lit_idx].offset; -} - -static really_inline -u32 unpackStateVal(const struct RoseLongLitTable *ll_table, const char nocase, - u32 v) { - return v + getBaseOffsetOfLits(ll_table, nocase) - 1; -} - -static really_inline -u32 packStateVal(const struct RoseLongLitTable *ll_table, const char nocase, - u32 v) { - return v - getBaseOffsetOfLits(ll_table, nocase) + 1; -} - static rose_inline void loadLongLiteralStateMode(struct hs_scratch *scratch, const struct RoseLongLitTable *ll_table, - const struct RoseLongLiteral *litTab, + const struct RoseLongLitSubtable *ll_sub, const u32 state, const char nocase) { if (!state) { DEBUG_PRINTF("no state for %s\n", nocase ? "caseless" : "caseful"); return; } - u32 stateValue = unpackStateVal(ll_table, nocase, state); - u32 idx = findLitTabEntry(ll_table, stateValue, nocase); - size_t found_offset = litTab[idx].offset; - const u8 *found_buf = found_offset + (const u8 *)ll_table; - size_t found_sz = stateValue - found_offset; + const struct RoseLongLitHashEntry *tab = getHashTableBase(ll_table, ll_sub); + const struct RoseLongLitHashEntry *ent = tab + state - 1; + + assert(ent->str_offset + ent->str_len <= ll_table->size); + const u8 *found_buf = (const u8 *)ll_table + ent->str_offset; + size_t found_sz = ent->str_len; struct RoseContext *tctxt = &scratch->tctxt; if (nocase) { @@ -168,34 +110,42 @@ void loadLongLiteralState(const struct RoseEngine *t, char *state, return; } + // If we don't have any long literals in play, these values must point to + // the real history buffer so that CHECK_LITERAL instructions examine the + // history buffer. scratch->tctxt.ll_buf = scratch->core_info.hbuf; scratch->tctxt.ll_len = scratch->core_info.hlen; scratch->tctxt.ll_buf_nocase = scratch->core_info.hbuf; scratch->tctxt.ll_len_nocase = scratch->core_info.hlen; + if (!scratch->core_info.hlen) { + return; + } + const struct RoseLongLitTable *ll_table = getByOffset(t, t->longLitTableOffset); - const struct RoseLongLiteral *litTab = getLitTab(ll_table); const u8 *ll_state = getLongLitState(t, state); u32 state_case; u32 state_nocase; loadLongLitStreamState(ll_table, ll_state, &state_case, &state_nocase); - loadLongLiteralStateMode(scratch, ll_table, litTab, state_case, 0); - loadLongLiteralStateMode(scratch, ll_table, litTab, state_nocase, 1); + DEBUG_PRINTF("loaded {%u, %u}\n", state_case, state_nocase); + + loadLongLiteralStateMode(scratch, ll_table, &ll_table->caseful, + state_case, 0); + loadLongLiteralStateMode(scratch, ll_table, &ll_table->nocase, + state_nocase, 1); } static rose_inline char confirmLongLiteral(const struct RoseLongLitTable *ll_table, - const hs_scratch_t *scratch, u32 hashState, + const struct hs_scratch *scratch, + const struct RoseLongLitHashEntry *ent, const char nocase) { - const struct RoseLongLiteral *litTab = getLitTab(ll_table); - u32 idx = findLitTabEntry(ll_table, hashState, nocase); - size_t found_offset = litTab[idx].offset; - const u8 *s = found_offset + (const u8 *)ll_table; - assert(hashState > found_offset); - size_t len = hashState - found_offset; + assert(ent->str_offset + ent->str_len <= ll_table->size); + const u8 *s = (const u8 *)ll_table + ent->str_offset; + size_t len = ent->str_len; const u8 *buf = scratch->core_info.buf; const size_t buf_len = scratch->core_info.len; @@ -225,14 +175,13 @@ char confirmLongLiteral(const struct RoseLongLitTable *ll_table, return 0; } - DEBUG_PRINTF("confirmed hashState=%u\n", hashState); return 1; } static rose_inline -void calcStreamingHash(const struct core_info *ci, - const struct RoseLongLitTable *ll_table, u8 hash_len, - u32 *hash_case, u32 *hash_nocase) { +const u8 *prepScanBuffer(const struct core_info *ci, + const struct RoseLongLitTable *ll_table, u8 *tempbuf) { + const u8 hash_len = ll_table->maxLen; assert(hash_len >= LONG_LIT_HASH_LEN); // Our hash function operates over LONG_LIT_HASH_LEN bytes, starting from @@ -240,7 +189,6 @@ void calcStreamingHash(const struct core_info *ci, // entirely from either the current buffer or the history buffer, we pass // in the pointer directly; otherwise we must make a copy. - u8 tempbuf[LONG_LIT_HASH_LEN]; const u8 *base; if (hash_len > ci->len) { @@ -266,71 +214,7 @@ void calcStreamingHash(const struct core_info *ci, base = ci->buf + ci->len - hash_len; } - if (ll_table->hashNBitsCase) { - *hash_case = hashLongLiteral(base, LONG_LIT_HASH_LEN, 0); - DEBUG_PRINTF("caseful hash %u\n", *hash_case); - } - if (ll_table->hashNBitsNocase) { - *hash_nocase = hashLongLiteral(base, LONG_LIT_HASH_LEN, 1); - DEBUG_PRINTF("caseless hash %u\n", *hash_nocase); - } -} - -static really_inline -const struct RoseLongLitHashEntry * -getHashTableBase(const struct RoseLongLitTable *ll_table, const char nocase) { - const u32 hashOffset = nocase ? ll_table->hashOffsetNocase - : ll_table->hashOffsetCase; - return (const struct RoseLongLitHashEntry *)((const char *)ll_table + - hashOffset); -} - -static rose_inline -const struct RoseLongLitHashEntry * -getLongLitHashEnt(const struct RoseLongLitTable *ll_table, u32 h, - const char nocase) { - u32 nbits = nocase ? ll_table->hashNBitsNocase : ll_table->hashNBitsCase; - if (!nbits) { - return NULL; - } - - u32 h_ent = h & ((1 << nbits) - 1); - u32 h_low = (h >> nbits) & 63; - - const struct RoseLongLitHashEntry *tab = getHashTableBase(ll_table, nocase); - const struct RoseLongLitHashEntry *ent = tab + h_ent; - - if (!((ent->bitfield >> h_low) & 0x1)) { - return NULL; - } - - return ent; -} - -static rose_inline -u32 storeLongLiteralStateMode(const struct hs_scratch *scratch, - const struct RoseLongLitTable *ll_table, - const struct RoseLongLitHashEntry *ent, - const char nocase) { - assert(ent); - assert(nocase ? ll_table->hashNBitsNocase : ll_table->hashNBitsCase); - - const struct RoseLongLitHashEntry *tab = getHashTableBase(ll_table, nocase); - - u32 packed_state = 0; - while (1) { - if (confirmLongLiteral(ll_table, scratch, ent->state, nocase)) { - packed_state = packStateVal(ll_table, nocase, ent->state); - DEBUG_PRINTF("set %s state to %u\n", nocase ? "nocase" : "case", - packed_state); - break; - } - if (ent->link == LINK_INVALID) { - break; - } - ent = tab + ent->link; - } - return packed_state; + return base; } #ifndef NDEBUG @@ -359,8 +243,8 @@ void storeLongLitStreamState(const struct RoseLongLitTable *ll_table, assert(ll_state); u8 ss_bytes = ll_table->streamStateBytes; - u8 ssb = ll_table->streamStateBitsCase; - UNUSED u8 ssb_nc = ll_table->streamStateBitsNocase; + u8 ssb = ll_table->caseful.streamStateBits; + UNUSED u8 ssb_nc = ll_table->nocase.streamStateBits; assert(ss_bytes == ROUNDUP_N(ssb + ssb_nc, 8) / 8); assert(!streamingTableOverflow(state_case, state_nocase, ssb, ssb_nc)); @@ -380,6 +264,65 @@ void storeLongLitStreamState(const struct RoseLongLitTable *ll_table, partial_store_u64a(ll_state, stagingStreamState, ss_bytes); } +static really_inline +char has_bit(const u8 *data, u32 bit) { + return (data[bit / 8] >> (bit % 8)) & 1; +} + +static rose_inline +char bloomHasKey(const u8 *bloom, u32 bloom_mask, u32 hash) { + return has_bit(bloom, hash & bloom_mask); +} + +static rose_inline +char checkBloomFilter(const struct RoseLongLitTable *ll_table, + const struct RoseLongLitSubtable *ll_sub, + const u8 *scan_buf, char nocase) { + assert(ll_sub->bloomBits); + + const u8 *bloom = (const u8 *)ll_table + ll_sub->bloomOffset; + const u32 bloom_mask = (1U << ll_sub->bloomBits) - 1; + + char v = 1; + v &= bloomHasKey(bloom, bloom_mask, bloomHash_1(scan_buf, nocase)); + v &= bloomHasKey(bloom, bloom_mask, bloomHash_2(scan_buf, nocase)); + v &= bloomHasKey(bloom, bloom_mask, bloomHash_3(scan_buf, nocase)); + return v; +} + +/** + * \brief Look for a hit in the hash table. + * + * Returns zero if not found, otherwise returns (bucket + 1). + */ +static rose_inline +u32 checkHashTable(const struct RoseLongLitTable *ll_table, + const struct RoseLongLitSubtable *ll_sub, const u8 *scan_buf, + const struct hs_scratch *scratch, char nocase) { + const u32 nbits = ll_sub->hashBits; + assert(nbits && nbits < 32); + const u32 num_entries = 1U << nbits; + + const struct RoseLongLitHashEntry *tab = getHashTableBase(ll_table, ll_sub); + + u32 hash = hashLongLiteral(scan_buf, LONG_LIT_HASH_LEN, nocase); + u32 bucket = hash & ((1U << nbits) - 1); + + while (tab[bucket].str_offset != 0) { + DEBUG_PRINTF("checking bucket %u\n", bucket); + if (confirmLongLiteral(ll_table, scratch, &tab[bucket], nocase)) { + DEBUG_PRINTF("found hit for bucket %u\n", bucket); + return bucket + 1; + } + + if (++bucket == num_entries) { + bucket = 0; + } + } + + return 0; +} + static rose_inline void storeLongLiteralState(const struct RoseEngine *t, char *state, struct hs_scratch *scratch) { @@ -401,28 +344,22 @@ void storeLongLiteralState(const struct RoseEngine *t, char *state, // If we don't have enough history, we don't need to do anything. if (ll_table->maxLen <= ci->len + ci->hlen) { - u32 hash_case = 0; - u32 hash_nocase = 0; + u8 tempbuf[LONG_LIT_HASH_LEN]; + const u8 *scan_buf = prepScanBuffer(ci, ll_table, tempbuf); - calcStreamingHash(ci, ll_table, ll_table->maxLen, &hash_case, - &hash_nocase); - - const struct RoseLongLitHashEntry *ent_case = - getLongLitHashEnt(ll_table, hash_case, 0); - const struct RoseLongLitHashEntry *ent_nocase = - getLongLitHashEnt(ll_table, hash_nocase, 1); - - DEBUG_PRINTF("ent_caseful=%p, ent_caseless=%p\n", ent_case, ent_nocase); - - if (ent_case) { - state_case = storeLongLiteralStateMode(scratch, ll_table, - ent_case, 0); + if (ll_table->caseful.hashBits && + checkBloomFilter(ll_table, &ll_table->caseful, scan_buf, 0)) { + state_case = checkHashTable(ll_table, &ll_table->caseful, scan_buf, + scratch, 0); } - if (ent_nocase) { - state_nocase = storeLongLiteralStateMode(scratch, ll_table, - ent_nocase, 1); + if (ll_table->nocase.hashBits && + checkBloomFilter(ll_table, &ll_table->nocase, scan_buf, 1)) { + state_nocase = checkHashTable(ll_table, &ll_table->nocase, scan_buf, + scratch, 1); } + } else { + DEBUG_PRINTF("not enough history (%zu bytes)\n", ci->len + ci->hlen); } DEBUG_PRINTF("store {%u, %u}\n", state_case, state_nocase); diff --git a/src/rose/stream_long_lit_hash.h b/src/rose/stream_long_lit_hash.h index 0e1606c5..041f05e6 100644 --- a/src/rose/stream_long_lit_hash.h +++ b/src/rose/stream_long_lit_hash.h @@ -30,17 +30,18 @@ #define STREAM_LONG_LIT_HASH_H #include "ue2common.h" +#include "util/bitutils.h" #include "util/unaligned.h" /** \brief Length of the buffer operated on by \ref hashLongLiteral(). */ #define LONG_LIT_HASH_LEN 24 +/** \brief Multiplier used by al the hash functions below. */ +#define HASH_MULTIPLIER 0x0b4e0ef37bc32127ULL + /** \brief Hash function used for long literal table in streaming mode. */ static really_inline u32 hashLongLiteral(const u8 *ptr, UNUSED size_t len, char nocase) { - const u64a CASEMASK = 0xdfdfdfdfdfdfdfdfULL; - const u64a MULTIPLIER = 0x0b4e0ef37bc32127ULL; - // We unconditionally hash LONG_LIT_HASH_LEN bytes; all use cases of this // hash are for strings longer than this. assert(len >= 24); @@ -49,17 +50,56 @@ u32 hashLongLiteral(const u8 *ptr, UNUSED size_t len, char nocase) { u64a v2 = unaligned_load_u64a(ptr + 8); u64a v3 = unaligned_load_u64a(ptr + 16); if (nocase) { - v1 &= CASEMASK; - v2 &= CASEMASK; - v3 &= CASEMASK; + v1 &= OCTO_CASE_CLEAR; + v2 &= OCTO_CASE_CLEAR; + v3 &= OCTO_CASE_CLEAR; } - v1 *= MULTIPLIER; - v2 *= MULTIPLIER * MULTIPLIER; - v3 *= MULTIPLIER * MULTIPLIER * MULTIPLIER; + v1 *= HASH_MULTIPLIER; + v2 *= HASH_MULTIPLIER * HASH_MULTIPLIER; + v3 *= HASH_MULTIPLIER * HASH_MULTIPLIER * HASH_MULTIPLIER; v1 >>= 32; v2 >>= 32; v3 >>= 32; return v1 ^ v2 ^ v3; } +/** + * \brief Internal, used by the bloom filter hash functions below. Hashes 16 + * bytes beginning at (ptr + offset). + */ +static really_inline +u32 bloomHash_i(const u8 *ptr, u32 offset, u64a multiplier, char nocase) { + assert(offset + 16 <= LONG_LIT_HASH_LEN); + + u64a v = unaligned_load_u64a(ptr + offset); + if (nocase) { + v &= OCTO_CASE_CLEAR; + } + v *= multiplier; + return v >> 32; +} + +/* + * We ensure that we see every byte the first LONG_LIT_HASH_LEN bytes of input + * data (using at least one of the following functions). + */ + +static really_inline +u32 bloomHash_1(const u8 *ptr, char nocase) { + const u64a multiplier = HASH_MULTIPLIER; + return bloomHash_i(ptr, 0, multiplier, nocase); +} + +static really_inline +u32 bloomHash_2(const u8 *ptr, char nocase) { + const u64a multiplier = HASH_MULTIPLIER * HASH_MULTIPLIER; + return bloomHash_i(ptr, 4, multiplier, nocase); +} + +static really_inline +u32 bloomHash_3(const u8 *ptr, char nocase) { + const u64a multiplier = HASH_MULTIPLIER * HASH_MULTIPLIER * HASH_MULTIPLIER; + return bloomHash_i(ptr, 8, multiplier, nocase); +} + #endif // STREAM_LONG_LIT_HASH_H diff --git a/src/util/bitutils.h b/src/util/bitutils.h index 6f1bcd09..b7a09ca7 100644 --- a/src/util/bitutils.h +++ b/src/util/bitutils.h @@ -70,6 +70,7 @@ #define CASE_BIT 0x20 #define CASE_CLEAR 0xdf #define DOUBLE_CASE_CLEAR 0xdfdf +#define OCTO_CASE_CLEAR 0xdfdfdfdfdfdfdfdfULL static really_inline u32 clz32(u32 x) { From 98c791dc6e6c55c402aa8d41b62564ed51b6a681 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 19 Sep 2016 11:23:37 +1000 Subject: [PATCH 037/103] noodle: correct history req calculation --- src/hwlm/hwlm_build.cpp | 2 +- src/scratch.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hwlm/hwlm_build.cpp b/src/hwlm/hwlm_build.cpp index 32de6bd0..3ab10f23 100644 --- a/src/hwlm/hwlm_build.cpp +++ b/src/hwlm/hwlm_build.cpp @@ -523,7 +523,7 @@ bool isNoodleable(const vector &lits, } if (stream_control) { // nullptr if in block mode - if (lits.front().s.length() + 1 > stream_control->history_max) { + if (lits.front().s.length() > stream_control->history_max + 1) { DEBUG_PRINTF("length of %zu too long for history max %zu\n", lits.front().s.length(), stream_control->history_max); diff --git a/src/scratch.h b/src/scratch.h index 73a35149..8c7a1281 100644 --- a/src/scratch.h +++ b/src/scratch.h @@ -45,7 +45,7 @@ extern "C" #endif UNUSED static const u32 SCRATCH_MAGIC = 0x544F4259; -#define FDR_TEMP_BUF_SIZE 220 +#define FDR_TEMP_BUF_SIZE 222 struct fatbit; struct hs_scratch; From f7cc8a618d7c5b0d331dbca6ff95062e50436a88 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 13 Sep 2016 15:52:39 +1000 Subject: [PATCH 038/103] fdr: reduce confirm size to a u8 Also removes the flexible array member from the LitInfo structure. --- src/fdr/fdr_confirm.h | 9 ++++----- src/fdr/fdr_confirm_compile.cpp | 6 +++--- src/fdr/fdr_confirm_runtime.h | 5 +++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/fdr/fdr_confirm.h b/src/fdr/fdr_confirm.h index 865218b4..6ce85afd 100644 --- a/src/fdr/fdr_confirm.h +++ b/src/fdr/fdr_confirm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -52,19 +52,18 @@ typedef enum LitInfoFlags { /** * \brief Structure describing a literal, linked to by FDRConfirm. * - * This structure is followed in memory by a variable-sized string prefix at - * LitInfo::s, for strings that are longer than CONF_TYPE. + * This structure is followed in memory by a variable-sized string prefix, for + * strings that are longer than CONF_TYPE. */ struct LitInfo { CONF_TYPE v; CONF_TYPE msk; hwlm_group_t groups; - u32 size; u32 id; // literal ID as passed in + u8 size; u8 flags; /* LitInfoFlags */ u8 next; u8 extended_size; - u8 s[1]; // literal prefix, which continues "beyond" this struct. }; #define FDRC_FLAG_NO_CONFIRM 1 diff --git a/src/fdr/fdr_confirm_compile.cpp b/src/fdr/fdr_confirm_compile.cpp index f84ed402..e77c46d1 100644 --- a/src/fdr/fdr_confirm_compile.cpp +++ b/src/fdr/fdr_confirm_compile.cpp @@ -107,7 +107,7 @@ void fillLitInfo(const vector &lits, vector &tmpLitInfo, info.extended_size = verify_u8(lit.msk.size()); } info.flags = flags; - info.size = verify_u32(lit.s.size()); + info.size = verify_u8(lit.s.size()); info.groups = lit.groups; // these are built up assuming a LE machine @@ -333,8 +333,8 @@ getFDRConfirm(const vector &lits, bool applyOneCharOpt, const string &t = lits[litIdx].s; if (t.size() > sizeof(CONF_TYPE)) { size_t prefix_len = t.size() - sizeof(CONF_TYPE); - memcpy(&finalLI.s[0], t.c_str(), prefix_len); - ptr = &finalLI.s[0] + prefix_len; + memcpy(ptr, t.c_str(), prefix_len); + ptr += prefix_len; } ptr = ROUNDUP_PTR(ptr, alignof(LitInfo)); diff --git a/src/fdr/fdr_confirm_runtime.h b/src/fdr/fdr_confirm_runtime.h index 2b0cd595..87ade9fe 100644 --- a/src/fdr/fdr_confirm_runtime.h +++ b/src/fdr/fdr_confirm_runtime.h @@ -86,7 +86,7 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a // as for the regular case, no need to do a full confirm if // we're a short literal if (unlikely(li->size > sizeof(CONF_TYPE))) { - const u8 *s1 = li->s; + const u8 *s1 = (const u8 *)li + sizeof(*li); const u8 *s2 = s1 + full_overhang; const u8 *loc1 = history + len_history - full_overhang; const u8 *loc2 = buf; @@ -106,7 +106,8 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a // if string < conf_type we don't need regular string cmp if (unlikely(li->size > sizeof(CONF_TYPE))) { - if (cmpForward(loc, li->s, li->size - sizeof(CONF_TYPE), + const u8 *s = (const u8 *)li + sizeof(*li); + if (cmpForward(loc, s, li->size - sizeof(CONF_TYPE), caseless)) { goto out; } From 445cf987a8050e3413514d878d0268bd860d9888 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Wed, 28 Sep 2016 13:56:15 +1000 Subject: [PATCH 039/103] remove unused includes --- src/nfagraph/ng_rose.cpp | 2 -- src/nfagraph/ng_som.cpp | 1 - src/nfagraph/ng_squash.cpp | 1 - src/nfagraph/ng_violet.cpp | 2 -- src/rose/rose_build_role_aliasing.cpp | 1 - unit/internal/limex_nfa.cpp | 1 - 6 files changed, 8 deletions(-) diff --git a/src/nfagraph/ng_rose.cpp b/src/nfagraph/ng_rose.cpp index 108134a6..46f180a8 100644 --- a/src/nfagraph/ng_rose.cpp +++ b/src/nfagraph/ng_rose.cpp @@ -68,8 +68,6 @@ #include #include #include -#include -#include #define NDEBUG_PRINTF(x, ...) \ do { if (0) { DEBUG_PRINTF(x, ## __VA_ARGS__); } } while (0) diff --git a/src/nfagraph/ng_som.cpp b/src/nfagraph/ng_som.cpp index ed2942bb..09687c4f 100644 --- a/src/nfagraph/ng_som.cpp +++ b/src/nfagraph/ng_som.cpp @@ -40,7 +40,6 @@ #include "ng_redundancy.h" #include "ng_region.h" #include "ng_reports.h" -#include "ng_restructuring.h" #include "ng_rose.h" #include "ng_som.h" #include "ng_som_add_redundancy.h" diff --git a/src/nfagraph/ng_squash.cpp b/src/nfagraph/ng_squash.cpp index 6577673f..21703f8b 100644 --- a/src/nfagraph/ng_squash.cpp +++ b/src/nfagraph/ng_squash.cpp @@ -102,7 +102,6 @@ #include "ng_holder.h" #include "ng_prune.h" #include "ng_region.h" -#include "ng_restructuring.h" #include "ng_som_util.h" #include "ng_util.h" #include "ng_util.h" diff --git a/src/nfagraph/ng_violet.cpp b/src/nfagraph/ng_violet.cpp index 538c945d..9c99ba8a 100644 --- a/src/nfagraph/ng_violet.cpp +++ b/src/nfagraph/ng_violet.cpp @@ -67,8 +67,6 @@ #include #include #include -#include -#include #include #define STAGE_DEBUG_PRINTF DEBUG_PRINTF diff --git a/src/rose/rose_build_role_aliasing.cpp b/src/rose/rose_build_role_aliasing.cpp index 6a0c767d..4757eb11 100644 --- a/src/rose/rose_build_role_aliasing.cpp +++ b/src/rose/rose_build_role_aliasing.cpp @@ -40,7 +40,6 @@ #include "nfagraph/ng_is_equal.h" #include "nfagraph/ng_limex.h" #include "nfagraph/ng_prune.h" -#include "nfagraph/ng_restructuring.h" #include "nfagraph/ng_uncalc_components.h" #include "nfagraph/ng_util.h" #include "util/bitutils.h" diff --git a/unit/internal/limex_nfa.cpp b/unit/internal/limex_nfa.cpp index 1c742793..804fcb1f 100644 --- a/unit/internal/limex_nfa.cpp +++ b/unit/internal/limex_nfa.cpp @@ -37,7 +37,6 @@ #include "nfa/nfa_internal.h" #include "nfagraph/ng.h" #include "nfagraph/ng_limex.h" -#include "nfagraph/ng_restructuring.h" #include "nfagraph/ng_util.h" #include "util/alloc.h" #include "util/target_info.h" From 592ce06eeb7be6145c95b78568f85cec81637f0a Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Wed, 28 Sep 2016 13:57:24 +1000 Subject: [PATCH 040/103] Create combo tops for trigger limexes --- src/nfa/castlecompile.cpp | 5 - src/nfa/limex_compile.cpp | 57 ++-- src/nfa/limex_compile.h | 4 +- src/nfa/limex_limits.h | 3 +- src/nfagraph/ng_haig.cpp | 39 ++- src/nfagraph/ng_limex.cpp | 386 +++++++++++++++++++++----- src/nfagraph/ng_mcclellan.cpp | 47 +++- src/nfagraph/ng_mcclellan_internal.h | 10 +- src/nfagraph/ng_restructuring.cpp | 233 ++++------------ src/nfagraph/ng_restructuring.h | 39 +-- src/nfagraph/ng_uncalc_components.cpp | 225 ++++++++------- src/nfagraph/ng_uncalc_components.h | 9 +- src/nfagraph/ng_util.cpp | 97 +++++++ src/nfagraph/ng_util.h | 10 +- src/rose/rose_build_compile.cpp | 273 ++++++++++++++++-- src/rose/rose_build_merge.cpp | 28 +- 16 files changed, 928 insertions(+), 537 deletions(-) diff --git a/src/nfa/castlecompile.cpp b/src/nfa/castlecompile.cpp index 11ae2000..b76078f9 100644 --- a/src/nfa/castlecompile.cpp +++ b/src/nfa/castlecompile.cpp @@ -978,11 +978,6 @@ unique_ptr makeHolder(const CastleProto &proto, auto g = ue2::make_unique(proto.kind); for (const auto &m : proto.repeats) { - if (m.first >= NFA_MAX_TOP_MASKS) { - DEBUG_PRINTF("top %u too big for an NFA\n", m.first); - return nullptr; - } - addToHolder(*g, m.first, m.second); } diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index 53a003e3..2c164090 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -41,7 +41,6 @@ #include "nfagraph/ng_holder.h" #include "nfagraph/ng_limex_accel.h" #include "nfagraph/ng_repeat.h" -#include "nfagraph/ng_restructuring.h" #include "nfagraph/ng_squash.h" #include "nfagraph/ng_util.h" #include "ue2common.h" @@ -74,6 +73,12 @@ using boost::adaptors::map_values; namespace ue2 { +/** + * \brief Special state index value meaning that the vertex will not + * participate in an (NFA/DFA/etc) implementation. + */ +static constexpr u32 NO_STATE = ~0; + namespace { struct precalcAccel { @@ -91,7 +96,7 @@ struct precalcAccel { struct limex_accel_info { ue2::unordered_set accelerable; map precalc; - ue2::unordered_map > friends; + ue2::unordered_map> friends; ue2::unordered_map accel_map; }; @@ -134,7 +139,7 @@ struct build_info { const vector &ri, const map &rsmi, const map &smi, - const map &ti, const set &zi, + const map> &ti, const set &zi, bool dai, bool sci, const CompileContext &cci, u32 nsi) : h(hi), state_ids(states_in), repeats(ri), tops(ti), zombies(zi), @@ -160,7 +165,7 @@ struct build_info { map reportSquashMap; map squashMap; - const map &tops; + const map> &tops; ue2::unordered_set tugs; map br_cyclic; const set &zombies; @@ -522,20 +527,25 @@ struct fas_visitor : public boost::default_bfs_visitor { }; static -void filterAccelStates(NGHolder &g, const map &tops, +void filterAccelStates(NGHolder &g, const map> &tops, ue2::unordered_map *accel_map) { /* We want the NFA_MAX_ACCEL_STATES best acceleration states, everything * else should be ditched. We use a simple BFS to choose accel states near * the start. */ - // Temporarily wire start to each top for the BFS. - vector topEdges; - wireStartToTops(g, tops, topEdges); + vector tempEdges; + for (const auto &vv : tops | map_values) { + for (NFAVertex v : vv) { + if (!edge(g.start, v, g).second) { + tempEdges.push_back(add_edge(g.start, v, g).first); + } + } + } // Similarly, connect (start, startDs) if necessary. if (!edge(g.start, g.startDs, g).second) { auto e = add_edge(g.start, g.startDs, g).first; - topEdges.push_back(e); // Remove edge later. + tempEdges.push_back(e); // Remove edge later. } ue2::unordered_map out; @@ -551,7 +561,7 @@ void filterAccelStates(NGHolder &g, const map &tops, ; /* found max accel_states */ } - remove_edges(topEdges, g); + remove_edges(tempEdges, g); assert(out.size() <= NFA_MAX_ACCEL_STATES); accel_map->swap(out); @@ -705,7 +715,7 @@ void fillAccelInfo(build_info &bi) { /** The AccelAux structure has large alignment specified, and this makes some * compilers do odd things unless we specify a custom allocator. */ -typedef vector > +typedef vector> AccelAuxVector; #define IMPOSSIBLE_ACCEL_MASK (~0U) @@ -1122,19 +1132,20 @@ void buildTopMasks(const build_info &args, vector &topMasks) { u32 numMasks = args.tops.rbegin()->first + 1; // max mask index DEBUG_PRINTF("we have %u top masks\n", numMasks); - assert(numMasks <= NFA_MAX_TOP_MASKS); topMasks.assign(numMasks, NFAStateSet(args.num_states)); // all zeroes for (const auto &m : args.tops) { u32 mask_idx = m.first; - u32 state_id = args.state_ids.at(m.second); - DEBUG_PRINTF("state %u is in top mask %u\n", state_id, mask_idx); + for (NFAVertex v : m.second) { + u32 state_id = args.state_ids.at(v); + DEBUG_PRINTF("state %u is in top mask %u\n", state_id, mask_idx); - assert(mask_idx < numMasks); - assert(state_id != NO_STATE); + assert(mask_idx < numMasks); + assert(state_id != NO_STATE); - topMasks[mask_idx].set(state_id); + topMasks[mask_idx].set(state_id); + } } } @@ -2123,7 +2134,7 @@ struct Factory { u32 maxShift = findMaxVarShift(args, shiftCount); findExceptionalTransitions(args, exceptional, maxShift); - map > exceptionMap; + map> exceptionMap; vector reportList; u32 exceptionCount = buildExceptionMap(args, reports_cache, exceptional, @@ -2315,13 +2326,13 @@ MAKE_LIMEX_TRAITS(512) #ifndef NDEBUG // Some sanity tests, called by an assertion in generate(). static UNUSED -bool isSane(const NGHolder &h, const map &tops, +bool isSane(const NGHolder &h, const map> &tops, const ue2::unordered_map &state_ids, u32 num_states) { ue2::unordered_set seen; ue2::unordered_set top_starts; - for (const auto &m : tops) { - top_starts.insert(m.second); + for (const auto &vv : tops | map_values) { + insert(&top_starts, vv); } for (auto v : vertices_range(h)) { @@ -2385,7 +2396,7 @@ aligned_unique_ptr generate(NGHolder &h, const vector &repeats, const map &reportSquashMap, const map &squashMap, - const map &tops, + const map> &tops, const set &zombies, bool do_accel, bool stateCompression, @@ -2457,7 +2468,7 @@ u32 countAccelStates(NGHolder &h, const vector &repeats, const map &reportSquashMap, const map &squashMap, - const map &tops, + const map> &tops, const set &zombies, const CompileContext &cc) { const u32 num_states = max_state(states) + 1; diff --git a/src/nfa/limex_compile.h b/src/nfa/limex_compile.h index 62a07e10..21cb7608 100644 --- a/src/nfa/limex_compile.h +++ b/src/nfa/limex_compile.h @@ -71,7 +71,7 @@ aligned_unique_ptr generate(NGHolder &g, const std::vector &repeats, const std::map &reportSquashMap, const std::map &squashMap, - const std::map &tops, + const std::map> &tops, const std::set &zombies, bool do_accel, bool stateCompression, @@ -89,7 +89,7 @@ u32 countAccelStates(NGHolder &h, const std::vector &repeats, const std::map &reportSquashMap, const std::map &squashMap, - const std::map &tops, + const std::map> &tops, const std::set &zombies, const CompileContext &cc); diff --git a/src/nfa/limex_limits.h b/src/nfa/limex_limits.h index 9b35b115..f4df54a4 100644 --- a/src/nfa/limex_limits.h +++ b/src/nfa/limex_limits.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -31,6 +31,5 @@ #define NFA_MAX_STATES 512 /**< max states in an NFA */ #define NFA_MAX_ACCEL_STATES 8 /**< max accel states in a NFA */ -#define NFA_MAX_TOP_MASKS 32 /**< max number of MQE_TOP_N event types */ #endif diff --git a/src/nfagraph/ng_haig.cpp b/src/nfagraph/ng_haig.cpp index e70b7708..143dca16 100644 --- a/src/nfagraph/ng_haig.cpp +++ b/src/nfagraph/ng_haig.cpp @@ -35,7 +35,6 @@ #include "nfa/goughcompile.h" #include "ng_holder.h" #include "ng_mcclellan_internal.h" -#include "ng_restructuring.h" #include "ng_som_util.h" #include "ng_squash.h" #include "ng_util.h" @@ -118,11 +117,11 @@ public: using StateMap = typename Automaton_Traits::StateMap; protected: - Automaton_Base(const NGHolder &graph_in, - const flat_set &unused_in, som_type som, + Automaton_Base(const NGHolder &graph_in, som_type som, const vector> &triggers, bool unordered_som) - : graph(graph_in), numStates(num_vertices(graph)), unused(unused_in), + : graph(graph_in), numStates(num_vertices(graph)), + unused(getRedundantStarts(graph_in)), init(Automaton_Traits::init_states(numStates)), initDS(Automaton_Traits::init_states(numStates)), squash(Automaton_Traits::init_states(numStates)), @@ -210,7 +209,7 @@ public: const NGHolder &graph; const u32 numStates; - const flat_set &unused; + const flat_set unused; array alpha; array unalpha; @@ -251,10 +250,9 @@ struct Big_Traits { class Automaton_Big : public Automaton_Base { public: - Automaton_Big(const NGHolder &graph_in, - const flat_set &unused_in, som_type som, + Automaton_Big(const NGHolder &graph_in, som_type som, const vector> &triggers, bool unordered_som) - : Automaton_Base(graph_in, unused_in, som, triggers, unordered_som) {} + : Automaton_Base(graph_in, som, triggers, unordered_som) {} }; struct Graph_Traits { @@ -278,11 +276,10 @@ struct Graph_Traits { class Automaton_Graph : public Automaton_Base { public: - Automaton_Graph(const NGHolder &graph_in, - const flat_set &unused_in, som_type som, + Automaton_Graph(const NGHolder &graph_in, som_type som, const vector> &triggers, bool unordered_som) - : Automaton_Base(graph_in, unused_in, som, triggers, unordered_som) {} + : Automaton_Base(graph_in, som, triggers, unordered_som) {} }; class Automaton_Haig_Merge { @@ -512,15 +509,14 @@ void haig_note_starts(const NGHolder &g, map *out) { template static -bool doHaig(const NGHolder &g, - const flat_set &unused, - som_type som, const vector> &triggers, - bool unordered_som, raw_som_dfa *rdfa) { +bool doHaig(const NGHolder &g, som_type som, + const vector> &triggers, bool unordered_som, + raw_som_dfa *rdfa) { u32 state_limit = HAIG_FINAL_DFA_STATE_LIMIT; /* haig never backs down from a fight */ typedef typename Auto::StateSet StateSet; vector nfa_state_map; - Auto n(g, unused, som, triggers, unordered_som); + Auto n(g, som, triggers, unordered_som); try { if (determinise(n, rdfa->states, state_limit, &nfa_state_map)) { DEBUG_PRINTF("state limit exceeded\n"); @@ -550,9 +546,9 @@ bool doHaig(const NGHolder &g, haig_do_preds(g, source_states, n.v_by_index, rdfa->state_som.back().preds); - haig_do_report(g, unused, g.accept, source_states, n.v_by_index, + haig_do_report(g, n.unused, g.accept, source_states, n.v_by_index, rdfa->state_som.back().reports); - haig_do_report(g, unused, g.acceptEod, source_states, n.v_by_index, + haig_do_report(g, n.unused, g.acceptEod, source_states, n.v_by_index, rdfa->state_som.back().reports_eod); } @@ -577,8 +573,6 @@ attemptToBuildHaig(const NGHolder &g, som_type som, u32 somPrecision, assert(allMatchStatesHaveReports(g)); assert(hasCorrectlyNumberedVertices(g)); - auto unused = findUnusedStates(g); - u32 numStates = num_vertices(g); if (numStates > HAIG_MAX_NFA_STATE) { DEBUG_PRINTF("giving up... looks too big\n"); @@ -592,12 +586,11 @@ attemptToBuildHaig(const NGHolder &g, som_type som, u32 somPrecision, bool rv; if (numStates <= NFA_STATE_LIMIT) { /* fast path */ - rv = doHaig(g, unused, som, triggers, unordered_som, + rv = doHaig(g, som, triggers, unordered_som, rdfa.get()); } else { /* not the fast path */ - rv = doHaig(g, unused, som, triggers, unordered_som, - rdfa.get()); + rv = doHaig(g, som, triggers, unordered_som, rdfa.get()); } if (!rv) { diff --git a/src/nfagraph/ng_limex.cpp b/src/nfagraph/ng_limex.cpp index c6e4c24e..66494c77 100644 --- a/src/nfagraph/ng_limex.cpp +++ b/src/nfagraph/ng_limex.cpp @@ -54,10 +54,15 @@ #include "util/ue2_containers.h" #include "util/verify_types.h" +#include #include #include +#include + using namespace std; +using boost::adaptors::map_values; +using boost::adaptors::map_keys; namespace ue2 { @@ -146,78 +151,310 @@ void dropRedundantStartEdges(NGHolder &g) { } static -void makeTopStates(NGHolder &g, map &tops, - const map &top_reach) { - /* TODO: more intelligent creation of top states */ - map> top_succs; - for (const auto &e : out_edges_range(g.start, g)) { - NFAVertex v = target(e, g); - if (v == g.startDs) { - continue; - } - for (u32 t : g[e].tops) { - top_succs[t].push_back(v); - } - } - - for (const auto &top : top_succs) { - u32 t = top.first; - - CharReach top_cr; +CharReach calcTopVertexReach(const flat_set &tops, + const map &top_reach) { + CharReach top_cr; + for (u32 t : tops) { if (contains(top_reach, t)) { - top_cr = top_reach.at(t); + top_cr |= top_reach.at(t); } else { top_cr = CharReach::dot(); - } - - assert(!contains(tops, t)); - - NFAVertex s = NGHolder::null_vertex(); - flat_set succs; - insert(&succs, top.second); - - for (auto v : top.second) { - if (!top_cr.isSubsetOf(g[v].char_reach)) { - continue; - } - - flat_set vsuccs; - insert(&vsuccs, adjacent_vertices(v, g)); - - if (succs != vsuccs) { - continue; - } - - if (g[v].reports != g[g.start].reports) { - continue; - } - s = v; break; } + } + return top_cr; +} - if (!s) { - s = add_vertex(g[g.start], g); - g[s].char_reach = top_cr; - for (auto v : top.second) { - add_edge(s, v, g); +static +NFAVertex makeTopStartVertex(NGHolder &g, const flat_set &tops, + const flat_set &succs, + const map &top_reach) { + assert(!succs.empty()); + assert(!tops.empty()); + + bool reporter = false; + + NFAVertex u = add_vertex(g[g.start], g); + CharReach top_cr = calcTopVertexReach(tops, top_reach); + g[u].char_reach = top_cr; + for (auto v : succs) { + if (v == g.accept || v == g.acceptEod) { + reporter = true; + } + add_edge(u, v, g); + } + + // Only retain reports (which we copied on add_vertex above) for new top + // vertices connected to accepts. + if (!reporter) { + g[u].reports.clear(); + } + + return u; +} + +static +void pickNextTopStateToHandle(const map> &top_succs, + const map> &succ_tops, + flat_set *picked_tops, + flat_set *picked_succs) { + /* pick top or vertex we want to handle */ + if (top_succs.size() < succ_tops.size()) { + auto best = top_succs.end(); + for (auto it = top_succs.begin(); it != top_succs.end(); ++it) { + if (best == top_succs.end() + || it->second.size() < best->second.size()) { + best = it; } } - tops[t] = s; + assert(best != top_succs.end()); + assert(!best->second.empty()); /* should already been pruned */ + + *picked_tops = { best->first }; + *picked_succs = best->second; + } else { + auto best = succ_tops.end(); + for (auto it = succ_tops.begin(); it != succ_tops.end(); ++it) { + /* have to worry about determinism for this one */ + if (best == succ_tops.end() + || it->second.size() < best->second.size() + || (it->second.size() == best->second.size() + && it->second < best->second)) { + best = it; + } + } + assert(best != succ_tops.end()); + assert(!best->second.empty()); /* should already been pruned */ + + *picked_succs = { best->first }; + *picked_tops = best->second; } +} + +static +void expandCbsByTops(const map> &unhandled_top_succs, + const map> &top_succs, + const map> &succ_tops, + flat_set &picked_tops, + flat_set &picked_succs) { + NFAVertex v = *picked_succs.begin(); /* arbitrary successor - all equiv */ + const auto &cand_tops = succ_tops.at(v); + + for (u32 t : cand_tops) { + if (!contains(unhandled_top_succs, t)) { + continue; + } + if (!has_intersection(unhandled_top_succs.at(t), picked_succs)) { + continue; /* not adding any useful work that hasn't already been + * done */ + } + if (!is_subset_of(picked_succs, top_succs.at(t))) { + continue; /* will not form a cbs */ + } + picked_tops.insert(t); + } +} + +static +void expandCbsBySuccs(const map> &unhandled_succ_tops, + const map> &top_succs, + const map> &succ_tops, + flat_set &picked_tops, + flat_set &picked_succs) { + u32 t = *picked_tops.begin(); /* arbitrary top - all equiv */ + const auto &cand_succs = top_succs.at(t); + + for (NFAVertex v : cand_succs) { + if (!contains(unhandled_succ_tops, v)) { + continue; + } + if (!has_intersection(unhandled_succ_tops.at(v), picked_tops)) { + continue; /* not adding any useful work that hasn't already been + * done */ + } + if (!is_subset_of(picked_tops, succ_tops.at(v))) { + continue; /* will not form a cbs */ + } + picked_succs.insert(v); + } +} + +/* See if we can expand the complete bipartite subgraph (cbs) specified by the + * picked tops/succs by adding more to either of the tops or succs. + */ +static +void expandTopSuccCbs(const map> &top_succs, + const map> &succ_tops, + const map> &unhandled_top_succs, + const map> &unhandled_succ_tops, + flat_set &picked_tops, + flat_set &picked_succs) { + /* Note: all picked (tops|succs) are equivalent */ + + /* Try to expand first (as we are more likely to succeed) on the side + * with fewest remaining things to be handled */ + + if (unhandled_top_succs.size() < unhandled_succ_tops.size()) { + expandCbsByTops(unhandled_top_succs, top_succs, succ_tops, + picked_tops, picked_succs); + expandCbsBySuccs(unhandled_succ_tops, top_succs, succ_tops, + picked_tops, picked_succs); + } else { + expandCbsBySuccs(unhandled_succ_tops, top_succs, succ_tops, + picked_tops, picked_succs); + expandCbsByTops(unhandled_top_succs, top_succs, succ_tops, + picked_tops, picked_succs); + } +} + +static +void markTopSuccAsHandled(NFAVertex start_v, + const flat_set &handled_tops, + const flat_set &handled_succs, + map> &tops_out, + map> &unhandled_top_succs, + map> &unhandled_succ_tops) { + for (u32 t : handled_tops) { + tops_out[t].insert(start_v); + assert(contains(unhandled_top_succs, t)); + erase_all(&unhandled_top_succs[t], handled_succs); + if (unhandled_top_succs[t].empty()) { + unhandled_top_succs.erase(t); + } + } + + for (NFAVertex v : handled_succs) { + assert(contains(unhandled_succ_tops, v)); + erase_all(&unhandled_succ_tops[v], handled_tops); + if (unhandled_succ_tops[v].empty()) { + unhandled_succ_tops.erase(v); + } + } +} + +static +void attemptToUseAsStart(const NGHolder &g, NFAVertex u, + const map &top_reach, + map> &unhandled_top_succs, + map> &unhandled_succ_tops, + map> &tops_out) { + flat_set top_inter = unhandled_succ_tops.at(u); + flat_set succs; + for (NFAVertex v : adjacent_vertices_range(u, g)) { + if (!contains(unhandled_succ_tops, v)) { + return; + } + const flat_set &v_tops = unhandled_succ_tops.at(v); + flat_set new_inter; + auto ni_inserter = inserter(new_inter, new_inter.end()); + set_intersection(top_inter.begin(), top_inter.end(), + v_tops.begin(), v_tops.end(), ni_inserter); + top_inter = move(new_inter); + succs.insert(v); + } + + if (top_inter.empty()) { + return; + } + + auto top_cr = calcTopVertexReach(top_inter, top_reach); + if (!top_cr.isSubsetOf(g[u].char_reach)) { + return; + } + + markTopSuccAsHandled(u, top_inter, succs, tops_out, unhandled_top_succs, + unhandled_succ_tops); +} + +/* We may have cases where a top triggers something that starts with a .* (or + * similar state). In these cases we can make use of that state as a start + * state. + */ +static +void reusePredsAsStarts(const NGHolder &g, const map &top_reach, + map> &unhandled_top_succs, + map> &unhandled_succ_tops, + map> &tops_out) { + /* create list of candidates first, to avoid issues of iter invalidation + * and determinism */ + vector cand_starts; + for (NFAVertex u : unhandled_succ_tops | map_keys) { + if (hasSelfLoop(u, g)) { + cand_starts.push_back(u); + } + } + sort(cand_starts.begin(), cand_starts.end(), make_index_ordering(g)); + + for (NFAVertex u : cand_starts) { + if (!contains(unhandled_succ_tops, u)) { + continue; + } + attemptToUseAsStart(g, u, top_reach, unhandled_top_succs, + unhandled_succ_tops, tops_out); + } +} + +static +void makeTopStates(NGHolder &g, map> &tops_out, + const map &top_reach) { + /* Ideally, we want to add the smallest number of states to the graph for + * tops to turn on so that they can accurately trigger their successors. + * + * The relationships between tops and their successors forms a bipartite + * graph. Finding the optimal number of start states to add is equivalent to + * finding a minimal biclique coverings. Unfortunately, this is known to be + * NP-complete. + * + * Given this, we will just do something simple to avoid creating something + * truly wasteful: + * 1) Try to find any cyclic states which can act as their own start states + * 2) Pick a top or a succ to create a start state for and then try to find + * the largest complete bipartite subgraph that it is part of. + */ + + map> top_succs; + map> succ_tops; + for (const auto &e : out_edges_range(g.start, g)) { + NFAVertex v = target(e, g); + for (u32 t : g[e].tops) { + top_succs[t].insert(v); + succ_tops[v].insert(t); + } + } + + auto unhandled_top_succs = top_succs; + auto unhandled_succ_tops = succ_tops; + + reusePredsAsStarts(g, top_reach, unhandled_top_succs, unhandled_succ_tops, + tops_out); + + /* Note: there may be successors which are equivalent (in terms of + top-triggering), it may be more efficient to discover this and treat them + as a unit. TODO */ + + while (!unhandled_succ_tops.empty()) { + assert(!unhandled_top_succs.empty()); + flat_set u_tops; + flat_set u_succs; + pickNextTopStateToHandle(unhandled_top_succs, unhandled_succ_tops, + &u_tops, &u_succs); + + expandTopSuccCbs(top_succs, succ_tops, unhandled_top_succs, + unhandled_succ_tops, u_tops, u_succs); + + /* create start vertex to handle this top/succ combination */ + NFAVertex u = makeTopStartVertex(g, u_tops, u_succs, top_reach); + + /* update maps */ + markTopSuccAsHandled(u, u_tops, u_succs, tops_out, unhandled_top_succs, + unhandled_succ_tops); + } + assert(unhandled_top_succs.empty()); // We are completely replacing the start vertex, so clear its reports. clear_out_edges(g.start, g); add_edge(g.start, g.startDs, g); g[g.start].reports.clear(); - - // Only retain reports (which we copied on add_vertex above) for new top - // vertices connected to accepts. - for (const auto &m : tops) { - NFAVertex v = m.second; - if (!edge(v, g.accept, g).second && !edge(v, g.acceptEod, g).second) { - g[v].reports.clear(); - } - } } static @@ -325,7 +562,8 @@ prepareGraph(const NGHolder &h_in, const ReportManager *rm, const map>> &triggers, bool impl_test_only, const CompileContext &cc, ue2::unordered_map &state_ids, - vector &repeats, map &tops) { + vector &repeats, + map> &tops) { assert(is_triggered(h_in) || fixed_depth_tops.empty()); unique_ptr h = cloneHolder(h_in); @@ -335,15 +573,19 @@ prepareGraph(const NGHolder &h_in, const ReportManager *rm, impl_test_only, cc.grey); // If we're building a rose/suffix, do the top dance. + flat_set topVerts; if (is_triggered(*h)) { makeTopStates(*h, tops, findTopReach(triggers)); + + for (const auto &vv : tops | map_values) { + insert(&topVerts, vv); + } } dropRedundantStartEdges(*h); // Do state numbering - state_ids = numberStates(*h, tops); - dropUnusedStarts(*h, state_ids); + state_ids = numberStates(*h, topVerts); // In debugging, we sometimes like to reverse the state numbering to stress // the NFA construction code. @@ -389,14 +631,14 @@ constructNFA(const NGHolder &h_in, const ReportManager *rm, ue2::unordered_map state_ids; vector repeats; - map tops; + map> tops; unique_ptr h = prepareGraph(h_in, rm, fixed_depth_tops, triggers, impl_test_only, cc, state_ids, repeats, tops); // Quick exit: if we've got an embarrassment of riches, i.e. more states // than we can implement in our largest NFA model, bail here. - u32 numStates = countStates(*h, state_ids, false); + u32 numStates = countStates(state_ids); if (numStates > NFA_MAX_STATES) { DEBUG_PRINTF("Can't build an NFA with %u states\n", numStates); return nullptr; @@ -469,13 +711,11 @@ aligned_unique_ptr constructReversedNFA_i(const NGHolder &h_in, u32 hint, assert(h.kind == NFA_REV_PREFIX); /* triggered, raises internal callbacks */ // Do state numbering. - auto state_ids = numberStates(h); - - dropUnusedStarts(h, state_ids); + auto state_ids = numberStates(h, {}); // Quick exit: if we've got an embarrassment of riches, i.e. more states // than we can implement in our largest NFA model, bail here. - u32 numStates = countStates(h, state_ids, false); + u32 numStates = countStates(state_ids); if (numStates > NFA_MAX_STATES) { DEBUG_PRINTF("Can't build an NFA with %u states\n", numStates); return nullptr; @@ -483,7 +723,7 @@ aligned_unique_ptr constructReversedNFA_i(const NGHolder &h_in, u32 hint, assert(sanityCheckGraph(h, state_ids)); - map tops; /* only the standards tops for nfas */ + map> tops; /* only the standards tops for nfas */ set zombies; vector repeats; map reportSquashMap; @@ -518,7 +758,7 @@ u32 isImplementableNFA(const NGHolder &g, const ReportManager *rm, // Quick check: we can always implement an NFA with less than NFA_MAX_STATES // states. Note that top masks can generate extra states, so we account for // those here too. - if (num_vertices(g) + NFA_MAX_TOP_MASKS < NFA_MAX_STATES) { + if (num_vertices(g) + getTops(g).size() < NFA_MAX_STATES) { return true; } @@ -539,12 +779,12 @@ u32 isImplementableNFA(const NGHolder &g, const ReportManager *rm, ue2::unordered_map state_ids; vector repeats; - map tops; + map> tops; unique_ptr h = prepareGraph(g, rm, fixed_depth_tops, triggers, impl_test_only, cc, state_ids, repeats, tops); assert(h); - u32 numStates = countStates(*h, state_ids, false); + u32 numStates = countStates(state_ids); if (numStates <= NFA_MAX_STATES) { return numStates; } @@ -586,12 +826,12 @@ u32 countAccelStates(const NGHolder &g, const ReportManager *rm, ue2::unordered_map state_ids; vector repeats; - map tops; + map> tops; unique_ptr h = prepareGraph(g, rm, fixed_depth_tops, triggers, impl_test_only, cc, state_ids, repeats, tops); - if (!h || countStates(*h, state_ids, false) > NFA_MAX_STATES) { + if (!h || countStates(state_ids) > NFA_MAX_STATES) { DEBUG_PRINTF("not constructible\n"); return NFA_MAX_ACCEL_STATES + 1; } diff --git a/src/nfagraph/ng_mcclellan.cpp b/src/nfagraph/ng_mcclellan.cpp index 39788570..71c9a05e 100644 --- a/src/nfagraph/ng_mcclellan.cpp +++ b/src/nfagraph/ng_mcclellan.cpp @@ -36,7 +36,6 @@ #include "nfa/rdfa.h" #include "ng_holder.h" #include "ng_mcclellan_internal.h" -#include "ng_restructuring.h" #include "ng_squash.h" #include "ng_util.h" #include "ue2common.h" @@ -348,10 +347,11 @@ public: using StateMap = typename Automaton_Traits::StateMap; Automaton_Base(const ReportManager *rm_in, const NGHolder &graph_in, - const flat_set &unused_in, bool single_trigger, + bool single_trigger, const vector> &triggers, bool prunable_in) : rm(rm_in), graph(graph_in), numStates(num_vertices(graph)), - unused(unused_in), init(Automaton_Traits::init_states(numStates)), + unused(getRedundantStarts(graph_in)), + init(Automaton_Traits::init_states(numStates)), initDS(Automaton_Traits::init_states(numStates)), squash(Automaton_Traits::init_states(numStates)), accept(Automaton_Traits::init_states(numStates)), @@ -444,7 +444,7 @@ private: public: const NGHolder &graph; u32 numStates; - const flat_set &unused; + const flat_set unused; vector v_by_index; vector cr_by_index; /* pre alpha'ed */ StateSet init; @@ -482,9 +482,9 @@ struct Big_Traits { class Automaton_Big : public Automaton_Base { public: Automaton_Big(const ReportManager *rm_in, const NGHolder &graph_in, - const flat_set &unused_in, bool single_trigger, + bool single_trigger, const vector> &triggers, bool prunable_in) - : Automaton_Base(rm_in, graph_in, unused_in, single_trigger, triggers, + : Automaton_Base(rm_in, graph_in, single_trigger, triggers, prunable_in) {} }; @@ -510,14 +510,36 @@ struct Graph_Traits { class Automaton_Graph : public Automaton_Base { public: Automaton_Graph(const ReportManager *rm_in, const NGHolder &graph_in, - const flat_set &unused_in, bool single_trigger, - const vector> &triggers, bool prunable_in) - : Automaton_Base(rm_in, graph_in, unused_in, single_trigger, triggers, + bool single_trigger, + const vector> &triggers, bool prunable_in) + : Automaton_Base(rm_in, graph_in, single_trigger, triggers, prunable_in) {} }; } // namespace +static +bool startIsRedundant(const NGHolder &g) { + set start; + set startDs; + + insert(&start, adjacent_vertices(g.start, g)); + insert(&startDs, adjacent_vertices(g.startDs, g)); + + return start == startDs; +} + +flat_set getRedundantStarts(const NGHolder &g) { + flat_set dead; + if (startIsRedundant(g)) { + dead.insert(g.start); + } + if (proper_out_degree(g.startDs, g) == 0) { + dead.insert(g.startDs); + } + return dead; +} + unique_ptr buildMcClellan(const NGHolder &graph, const ReportManager *rm, bool single_trigger, const vector> &triggers, @@ -526,8 +548,6 @@ unique_ptr buildMcClellan(const NGHolder &graph, return nullptr; } - auto unused = findUnusedStates(graph); - DEBUG_PRINTF("attempting to build ?%d? mcclellan\n", (int)graph.kind); assert(allMatchStatesHaveReports(graph)); @@ -553,8 +573,7 @@ unique_ptr buildMcClellan(const NGHolder &graph, if (numStates <= NFA_STATE_LIMIT) { /* Fast path. Automaton_Graph uses a bitfield internally to represent * states and is quicker than Automaton_Big. */ - Automaton_Graph n(rm, graph, unused, single_trigger, triggers, - prunable); + Automaton_Graph n(rm, graph, single_trigger, triggers, prunable); if (determinise(n, rdfa->states, state_limit)) { DEBUG_PRINTF("state limit exceeded\n"); return nullptr; /* over state limit */ @@ -566,7 +585,7 @@ unique_ptr buildMcClellan(const NGHolder &graph, rdfa->alpha_remap = n.alpha; } else { /* Slow path. Too many states to use Automaton_Graph. */ - Automaton_Big n(rm, graph, unused, single_trigger, triggers, prunable); + Automaton_Big n(rm, graph, single_trigger, triggers, prunable); if (determinise(n, rdfa->states, state_limit)) { DEBUG_PRINTF("state limit exceeded\n"); return nullptr; /* over state limit */ diff --git a/src/nfagraph/ng_mcclellan_internal.h b/src/nfagraph/ng_mcclellan_internal.h index 22fcf01e..b78dac3b 100644 --- a/src/nfagraph/ng_mcclellan_internal.h +++ b/src/nfagraph/ng_mcclellan_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,7 +36,6 @@ #include "ue2common.h" #include "nfa/mcclellancompile.h" #include "nfagraph/ng_holder.h" -#include "nfagraph/ng_restructuring.h" // for NO_STATE #include "util/charreach.h" #include "util/graph_range.h" #include "util/ue2_containers.h" @@ -69,6 +68,13 @@ void markToppableStarts(const NGHolder &g, const flat_set &unused, const std::vector> &triggers, boost::dynamic_bitset<> *out); +/** + * \brief Returns a set of start vertices that will not participate in an + * implementation of this graph. These are either starts with no successors or + * starts which are redundant with startDs. + */ +flat_set getRedundantStarts(const NGHolder &g); + template void transition_graph(autom &nfa, const std::vector &vByStateId, const typename autom::StateSet &in, diff --git a/src/nfagraph/ng_restructuring.cpp b/src/nfagraph/ng_restructuring.cpp index c85860c7..46990330 100644 --- a/src/nfagraph/ng_restructuring.cpp +++ b/src/nfagraph/ng_restructuring.cpp @@ -49,37 +49,71 @@ namespace ue2 { /** Connect the start vertex to each of the vertices in \p tops. This is useful * temporarily for when we need to run a graph algorithm that expects a single * source vertex. */ -void wireStartToTops(NGHolder &g, const map &tops, - vector &topEdges) { - for (const auto &top : tops) { - NFAVertex v = top.second; +static +void wireStartToTops(NGHolder &g, const flat_set &tops, + vector &tempEdges) { + for (NFAVertex v : tops) { assert(!isLeafNode(v, g)); const NFAEdge &e = add_edge(g.start, v, g).first; - topEdges.push_back(e); + tempEdges.push_back(e); } } +/** + * Returns true if start's successors (aside from startDs) are subset of + * startDs's proper successors or if start has no successors other than startDs. + */ static -void getStateOrdering(NGHolder &g, const map &tops, +bool startIsRedundant(const NGHolder &g) { + /* We ignore startDs as the self-loop may have been stripped as an + * optimisation for repeats (improveLeadingRepeats()). */ + set start; + insert(&start, adjacent_vertices_range(g.start, g)); + start.erase(g.startDs); + + // Trivial case: start has no successors other than startDs. + if (start.empty()) { + DEBUG_PRINTF("start has no out-edges other than to startDs\n"); + return true; + } + + set startDs; + insert(&startDs, adjacent_vertices_range(g.startDs, g)); + startDs.erase(g.startDs); + + if (!is_subset_of(start, startDs)) { + DEBUG_PRINTF("out-edges of start and startDs aren't equivalent\n"); + return false; + } + + return true; +} + +static +void getStateOrdering(NGHolder &g, const flat_set &tops, vector &ordering) { // First, wire up our "tops" to start so that we have a single source, // which will give a nicer topo order. - vector topEdges; - wireStartToTops(g, tops, topEdges); + vector tempEdges; + wireStartToTops(g, tops, tempEdges); renumberGraphVertices(g); vector temp = getTopoOrdering(g); - remove_edges(topEdges, g); + remove_edges(tempEdges, g); // Move {start, startDs} to the end, so they'll be first when we reverse - // the ordering. + // the ordering (if they are required). temp.erase(remove(temp.begin(), temp.end(), g.startDs)); temp.erase(remove(temp.begin(), temp.end(), g.start)); - temp.push_back(g.startDs); - temp.push_back(g.start); + if (proper_out_degree(g.startDs, g)) { + temp.push_back(g.startDs); + } + if (!startIsRedundant(g)) { + temp.push_back(g.start); + } // Walk ordering, remove vertices that shouldn't be participating in state // numbering, such as accepts. @@ -149,16 +183,15 @@ void optimiseTightLoops(const NGHolder &g, vector &ordering) { continue; } - DEBUG_PRINTF("moving vertex %u next to %u\n", - g[v].index, g[u].index); + DEBUG_PRINTF("moving vertex %u next to %u\n", g[v].index, g[u].index); ordering.erase(v_it); ordering.insert(++u_it, v); } } -ue2::unordered_map -numberStates(NGHolder &h, const map &tops) { +unordered_map +numberStates(NGHolder &h, const flat_set &tops) { DEBUG_PRINTF("numbering states for holder %p\n", &h); vector ordering; @@ -166,15 +199,10 @@ numberStates(NGHolder &h, const map &tops) { optimiseTightLoops(h, ordering); - ue2::unordered_map states = getStateIndices(h, ordering); - - return states; + return getStateIndices(h, ordering); } -u32 countStates(const NGHolder &g, - const ue2::unordered_map &state_ids, - bool addTops) { - /* TODO: smarter top state allocation, move to limex? */ +u32 countStates(const unordered_map &state_ids) { if (state_ids.empty()) { return 0; } @@ -185,168 +213,9 @@ u32 countStates(const NGHolder &g, max_state = max(m.second, max_state); } } - u32 num_states = max_state + 1; - assert(contains(state_ids, g.start)); - if (addTops && is_triggered(g) && state_ids.at(g.start) != NO_STATE) { - num_states--; - set tops; - for (auto e : out_edges_range(g.start, g)) { - insert(&tops, g[e].tops); - } - num_states += tops.size(); - } - return num_states; } -/** - * Returns true if start leads to all of startDs's proper successors or if - * start has no successors other than startDs. - */ -static -bool startIsRedundant(const NGHolder &g) { - set start, startDs; - - for (const auto &e : out_edges_range(g.start, g)) { - NFAVertex v = target(e, g); - if (v == g.startDs) { - continue; - } - start.insert(v); - } - - for (const auto &e : out_edges_range(g.startDs, g)) { - NFAVertex v = target(e, g); - if (v == g.startDs) { - continue; - } - startDs.insert(v); - } - - // Trivial case: start has no successors other than startDs. - if (start.empty()) { - DEBUG_PRINTF("start has no out-edges other than to startDs\n"); - return true; - } - - if (start != startDs) { - DEBUG_PRINTF("out-edges of start and startDs aren't equivalent\n"); - return false; - } - - return true; -} - -/** One final, FINAL optimisation. Drop either start or startDs if it's unused - * in this graph. We leave this until this late because having both vertices in - * the graph, with fixed state indices, is useful for merging and other - * analyses. */ -void dropUnusedStarts(NGHolder &g, ue2::unordered_map &states) { - u32 adj = 0; - - if (startIsRedundant(g)) { - DEBUG_PRINTF("dropping unused start\n"); - states[g.start] = NO_STATE; - adj++; - } - - if (proper_out_degree(g.startDs, g) == 0) { - DEBUG_PRINTF("dropping unused startDs\n"); - states[g.startDs] = NO_STATE; - adj++; - } - - if (!adj) { - DEBUG_PRINTF("both start and startDs must remain\n"); - return; - } - - // We have removed one or both of the starts. Walk the non-special vertices - // in the graph with state indices assigned to them and subtract - // adj from all of them. - for (auto v : vertices_range(g)) { - u32 &state = states[v]; // note ref - if (state == NO_STATE) { - continue; - } - if (is_any_start(v, g)) { - assert(state <= 1); - state = 0; // one start remains - } else { - assert(!is_special(v, g)); - assert(state >= adj); - state -= adj; - } - } -} - -flat_set findUnusedStates(const NGHolder &g) { - flat_set dead; - if (startIsRedundant(g)) { - dead.insert(g.start); - } - if (proper_out_degree(g.startDs, g) == 0) { - dead.insert(g.startDs); - } - return dead; -} - -/** Construct a reversed copy of an arbitrary NGHolder, mapping starts to - * accepts. */ -void reverseHolder(const NGHolder &g_in, NGHolder &g) { - // Make the BGL do the grunt work. - ue2::unordered_map vertexMap; - boost::transpose_graph(g_in.g, g.g, - orig_to_copy(boost::make_assoc_property_map(vertexMap)). - vertex_index_map(get(&NFAGraphVertexProps::index, g_in.g))); - - // The transpose_graph operation will have created extra copies of our - // specials. We have to rewire their neighbours to the 'real' specials and - // delete them. - NFAVertex start = vertexMap[g_in.acceptEod]; - NFAVertex startDs = vertexMap[g_in.accept]; - NFAVertex accept = vertexMap[g_in.startDs]; - NFAVertex acceptEod = vertexMap[g_in.start]; - - // Successors of starts. - for (const auto &e : out_edges_range(start, g)) { - NFAVertex v = target(e, g); - add_edge(g.start, v, g[e], g); - } - for (const auto &e : out_edges_range(startDs, g)) { - NFAVertex v = target(e, g); - add_edge(g.startDs, v, g[e], g); - } - - // Predecessors of accepts. - for (const auto &e : in_edges_range(accept, g)) { - NFAVertex u = source(e, g); - add_edge(u, g.accept, g[e], g); - } - for (const auto &e : in_edges_range(acceptEod, g)) { - NFAVertex u = source(e, g); - add_edge(u, g.acceptEod, g[e], g); - } - - // Remove our impostors. - clear_vertex(start, g); - remove_vertex(start, g); - clear_vertex(startDs, g); - remove_vertex(startDs, g); - clear_vertex(accept, g); - remove_vertex(accept, g); - clear_vertex(acceptEod, g); - remove_vertex(acceptEod, g); - - // Renumber so that g's properties (number of vertices, edges) are - // accurate. - g.renumberVertices(); - g.renumberEdges(); - - assert(num_vertices(g) == num_vertices(g_in)); - assert(num_edges(g) == num_edges(g_in)); -} - } // namespace ue2 diff --git a/src/nfagraph/ng_restructuring.h b/src/nfagraph/ng_restructuring.h index 5e244bf6..bbd478d5 100644 --- a/src/nfagraph/ng_restructuring.h +++ b/src/nfagraph/ng_restructuring.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,23 +37,8 @@ #include "ue2common.h" #include "util/ue2_containers.h" -#include -#include - namespace ue2 { -class NGHolder; - -/** Construct a reversed copy of an arbitrary NGHolder, mapping starts to - * accepts. */ -void reverseHolder(const NGHolder &g, NGHolder &out); - -/** Connect the start vertex to each of the vertices in \p tops. This is useful - * temporarily for when we need to run a graph algorithm that expects a single - * source vertex. */ -void wireStartToTops(NGHolder &g, const std::map &tops, - std::vector &topEdges); - /** * \brief Special state index value meaning that the vertex will not * participate in an (NFA/DFA/etc) implementation. @@ -63,30 +48,14 @@ static constexpr u32 NO_STATE = ~0; /** * \brief Gives each participating vertex in the graph a unique state index. */ -ue2::unordered_map -numberStates(NGHolder &h, - const std::map &tops = std::map{}); +unordered_map +numberStates(NGHolder &h, const flat_set &tops); /** * \brief Counts the number of states (vertices with state indices) in the * graph. - * - * If addTops is true, also accounts for states that will be constructed for - * each unique top. */ -u32 countStates(const NGHolder &g, - const ue2::unordered_map &state_ids, - bool addTops = true); - -/** Optimisation: drop unnecessary start states. */ -void dropUnusedStarts(NGHolder &g, ue2::unordered_map &states); - -/** - * \brief Returns a set of vertices that will not participate in an - * implementation (NFA, DFA etc) of this graph. For example, starts with no - * successors. - */ -flat_set findUnusedStates(const NGHolder &g); +u32 countStates(const unordered_map &state_ids); } // namespace ue2 diff --git a/src/nfagraph/ng_uncalc_components.cpp b/src/nfagraph/ng_uncalc_components.cpp index fd6dfc3e..3326d6f4 100644 --- a/src/nfagraph/ng_uncalc_components.cpp +++ b/src/nfagraph/ng_uncalc_components.cpp @@ -39,7 +39,6 @@ #include "ng_limex.h" #include "ng_redundancy.h" #include "ng_region.h" -#include "ng_restructuring.h" #include "ng_uncalc_components.h" #include "ng_util.h" #include "ue2common.h" @@ -55,42 +54,52 @@ #include #include +#include + using namespace std; +using boost::adaptors::map_values; namespace ue2 { static const u32 FAST_STATE_LIMIT = 256; /**< largest possible desirable NFA */ /** Sentinel value meaning no component has yet been selected. */ -static const u32 NO_COMPONENT = 0xffffffffu; +static const u32 NO_COMPONENT = ~0U; -static -vector getSortedVA(const NGHolder &g, - const ue2::unordered_map &state_ids) { - vector out; - out.reserve(num_vertices(g)); +static const u32 UNUSED_STATE = ~0U; - for (auto v : vertices_range(g)) { - assert(contains(state_ids, v)); - if (state_ids.at(v) == NO_STATE) { - continue; +namespace { +struct ranking_info { + explicit ranking_info(const NGHolder &h) : to_vertex(getTopoOrdering(h)) { + u32 rank = 0; + + reverse(to_vertex.begin(), to_vertex.end()); + + for (NFAVertex v : to_vertex) { + to_rank[v] = rank++; + } + + for (NFAVertex v : vertices_range(h)) { + if (!contains(to_rank, v)) { + to_rank[v] = UNUSED_STATE; + } } - out.push_back(v); } - // Order vertices by their state indices. - sort(begin(out), end(out), [&state_ids](NFAVertex a, NFAVertex b) { - return state_ids.at(a) < state_ids.at(b); - }); - -#ifndef NDEBUG - // State indices should match vector indices. - for (u32 i = 0; i < out.size(); i++) { - assert(state_ids.at(out.at(i)) == i); + NFAVertex at(u32 ranking) const { return to_vertex.at(ranking); } + u32 get(NFAVertex v) const { return to_rank.at(v); } + u32 size() const { return (u32)to_vertex.size(); } + u32 add_to_tail(NFAVertex v) { + u32 rank = size(); + to_rank[v] = rank; + to_vertex.push_back(v); + return rank; } -#endif - return out; +private: + vector to_vertex; + unordered_map to_rank; +}; } static never_inline @@ -122,9 +131,9 @@ bool cplVerticesMatch(const NGHolder &ga, NFAVertex va, } static never_inline -u32 cplCommonReachAndSimple(const NGHolder &ga, const vector &a, - const NGHolder &gb, const vector &b) { - u32 ml = min(a.size(), b.size()); +u32 cplCommonReachAndSimple(const NGHolder &ga, const ranking_info &a_ranking, + const NGHolder &gb, const ranking_info &b_ranking) { + u32 ml = min(a_ranking.size(), b_ranking.size()); if (ml > 65535) { ml = 65535; } @@ -133,7 +142,7 @@ u32 cplCommonReachAndSimple(const NGHolder &ga, const vector &a, // "startedness" properties. u32 max = 0; for (; max < ml; max++) { - if (!cplVerticesMatch(ga, a[max], gb, b[max])) { + if (!cplVerticesMatch(ga, a_ranking.at(max), gb, b_ranking.at(max))) { break; } } @@ -141,34 +150,30 @@ u32 cplCommonReachAndSimple(const NGHolder &ga, const vector &a, return max; } -u32 commonPrefixLength(const NGHolder &ga, - const ue2::unordered_map &a_state_ids, - const NGHolder &gb, - const ue2::unordered_map &b_state_ids) { - vector a = getSortedVA(ga, a_state_ids); - vector b = getSortedVA(gb, b_state_ids); - +static +u32 commonPrefixLength(const NGHolder &ga, const ranking_info &a_ranking, + const NGHolder &gb, const ranking_info &b_ranking) { /* upper bound on the common region based on local properties */ - u32 max = cplCommonReachAndSimple(ga, a, gb, b); + u32 max = cplCommonReachAndSimple(ga, a_ranking, gb, b_ranking); DEBUG_PRINTF("cpl upper bound %u\n", max); while (max > 0) { - bool ok = true; - /* shrink max region based on in-edges from outside the region */ for (size_t j = max; j > 0; j--) { - for (auto u : inv_adjacent_vertices_range(a[j - 1], ga)) { - u32 state_id = a_state_ids.at(u); - if (state_id != NO_STATE && state_id >= max) { + NFAVertex a_v = a_ranking.at(j - 1); + NFAVertex b_v = b_ranking.at(j - 1); + for (auto u : inv_adjacent_vertices_range(a_v, ga)) { + u32 state_id = a_ranking.get(u); + if (state_id != UNUSED_STATE && state_id >= max) { max = j - 1; DEBUG_PRINTF("lowering max to %u\n", max); goto next_vertex; } } - for (auto u : inv_adjacent_vertices_range(b[j - 1], gb)) { - u32 state_id = b_state_ids.at(u); - if (state_id != NO_STATE && state_id >= max) { + for (auto u : inv_adjacent_vertices_range(b_v, gb)) { + u32 state_id = b_ranking.get(u); + if (state_id != UNUSED_STATE && state_id >= max) { max = j - 1; DEBUG_PRINTF("lowering max to %u\n", max); goto next_vertex; @@ -180,14 +185,13 @@ u32 commonPrefixLength(const NGHolder &ga, /* Ensure that every pair of vertices has same out-edges to vertices in the region. */ - for (size_t i = 0; ok && i < max; i++) { + for (size_t i = 0; i < max; i++) { size_t a_count = 0; size_t b_count = 0; - NGHolder::out_edge_iterator ei, ee; - for (tie(ei, ee) = out_edges(a[i], ga); ok && ei != ee; ++ei) { - u32 sid = a_state_ids.at(target(*ei, ga)); - if (sid == NO_STATE || sid >= max) { + for (NFAEdge a_edge : out_edges_range(a_ranking.at(i), ga)) { + u32 sid = a_ranking.get(target(a_edge, ga)); + if (sid == UNUSED_STATE || sid >= max) { continue; } @@ -195,28 +199,26 @@ u32 commonPrefixLength(const NGHolder &ga, NFAEdge b_edge; bool has_b_edge; - tie(b_edge, has_b_edge) = edge(b[i], b[sid], gb); + tie(b_edge, has_b_edge) = edge(b_ranking.at(i), + b_ranking.at(sid), gb); if (!has_b_edge) { max = i; - ok = false; DEBUG_PRINTF("lowering max to %u due to edge %zu->%u\n", max, i, sid); - break; + goto try_smaller; } - if (ga[*ei].tops != gb[b_edge].tops) { + if (ga[a_edge].tops != gb[b_edge].tops) { max = i; - ok = false; DEBUG_PRINTF("tops don't match on edge %zu->%u\n", i, sid); + goto try_smaller; } } - NGHolder::adjacency_iterator ai, ae; - for (tie(ai, ae) = adjacent_vertices(b[i], gb); ok && ai != ae; - ++ai) { - u32 sid = b_state_ids.at(*ai); - if (sid == NO_STATE || sid >= max) { + for (NFAVertex b_v : adjacent_vertices_range(b_ranking.at(i), gb)) { + u32 sid = b_ranking.get(b_v); + if (sid == UNUSED_STATE || sid >= max) { continue; } @@ -225,28 +227,32 @@ u32 commonPrefixLength(const NGHolder &ga, if (a_count != b_count) { max = i; - DEBUG_PRINTF("lowering max to %u due to a,b count " - "(a_count=%zu, b_count=%zu)\n", max, a_count, - b_count); - ok = false; + DEBUG_PRINTF("lowering max to %u due to a,b count (a_count=%zu," + " b_count=%zu)\n", max, a_count, b_count); + goto try_smaller; } } - if (ok) { - DEBUG_PRINTF("survived checks, returning cpl %u\n", max); - return max; - } + DEBUG_PRINTF("survived checks, returning cpl %u\n", max); + return max; + try_smaller:; } DEBUG_PRINTF("failed to find any common region\n"); return 0; } +u32 commonPrefixLength(const NGHolder &ga, const NGHolder &gb) { + return commonPrefixLength(ga, ranking_info(ga), gb, ranking_info(gb)); +} + static never_inline -void mergeNfa(NGHolder &dest, vector &destStateMap, - ue2::unordered_map &dest_state_ids, - NGHolder &vic, vector &vicStateMap, - size_t common_len) { +void mergeNfaComponent(NGHolder &dest, const NGHolder &vic, size_t common_len) { + assert(&dest != &vic); + + auto dest_info = ranking_info(dest); + auto vic_info = ranking_info(vic); + map vmap; // vic -> dest vmap[vic.start] = dest.start; @@ -255,22 +261,20 @@ void mergeNfa(NGHolder &dest, vector &destStateMap, vmap[vic.acceptEod] = dest.acceptEod; vmap[nullptr] = nullptr; - u32 stateNum = countStates(dest, dest_state_ids); - // For vertices in the common len, add to vmap and merge in the reports, if // any. for (u32 i = 0; i < common_len; i++) { - NFAVertex v_old = vicStateMap[i], v = destStateMap[i]; + NFAVertex v_old = vic_info.at(i); + NFAVertex v = dest_info.at(i); vmap[v_old] = v; const auto &reports = vic[v_old].reports; dest[v].reports.insert(reports.begin(), reports.end()); } - // Add in vertices beyond the common len, giving them state numbers - // starting at stateNum. - for (u32 i = common_len; i < vicStateMap.size(); i++) { - NFAVertex v_old = vicStateMap[i]; + // Add in vertices beyond the common len + for (u32 i = common_len; i < vic_info.size(); i++) { + NFAVertex v_old = vic_info.at(i); if (is_special(v_old, vic)) { // Dest already has start vertices, just merge the reports. @@ -282,15 +286,17 @@ void mergeNfa(NGHolder &dest, vector &destStateMap, } NFAVertex v = add_vertex(vic[v_old], dest); - dest_state_ids[v] = stateNum++; + dest_info.add_to_tail(v); vmap[v_old] = v; } /* add edges */ DEBUG_PRINTF("common_len=%zu\n", common_len); for (const auto &e : edges_range(vic)) { - NFAVertex u_old = source(e, vic), v_old = target(e, vic); - NFAVertex u = vmap[u_old], v = vmap[v_old]; + NFAVertex u_old = source(e, vic); + NFAVertex v_old = target(e, vic); + NFAVertex u = vmap[u_old]; + NFAVertex v = vmap[v_old]; bool uspecial = is_special(u, dest); bool vspecial = is_special(v, dest); @@ -301,15 +307,14 @@ void mergeNfa(NGHolder &dest, vector &destStateMap, // We're in the common region if v's state ID is low enough, unless v // is a special (an accept), in which case we use u's state ID. - assert(contains(dest_state_ids, v)); - bool in_common_region = dest_state_ids.at(v) < common_len; - if (vspecial && dest_state_ids.at(u) < common_len) { + bool in_common_region = dest_info.get(v) < common_len; + if (vspecial && dest_info.get(u) < common_len) { in_common_region = true; } DEBUG_PRINTF("adding idx=%u (state %u) -> idx=%u (state %u)%s\n", - dest[u].index, dest_state_ids.at(u), - dest[v].index, dest_state_ids.at(v), + dest[u].index, dest_info.get(u), + dest[v].index, dest_info.get(v), in_common_region ? " [common]" : ""); if (in_common_region) { @@ -337,18 +342,6 @@ void mergeNfa(NGHolder &dest, vector &destStateMap, dest.renumberVertices(); } -static never_inline -void mergeNfaComponent(NGHolder &pholder, NGHolder &vholder, size_t cpl) { - assert(&pholder != &vholder); - - auto v_state_ids = numberStates(vholder); - auto p_state_ids = numberStates(pholder); - auto vhvmap = getSortedVA(vholder, v_state_ids); - auto phvmap = getSortedVA(pholder, p_state_ids); - - mergeNfa(pholder, phvmap, p_state_ids, vholder, vhvmap, cpl); -} - namespace { struct NfaMergeCandidateH { NfaMergeCandidateH(size_t cpl_in, NGHolder *first_in, NGHolder *second_in, @@ -373,14 +366,19 @@ struct NfaMergeCandidateH { /** Returns true if graphs \p h1 and \p h2 can (and should) be merged. */ static -bool shouldMerge(NGHolder &ha, - const ue2::unordered_map &a_state_ids, - NGHolder &hb, - const ue2::unordered_map &b_state_ids, - size_t cpl, const ReportManager *rm, - const CompileContext &cc) { - size_t combinedStateCount = - countStates(ha, a_state_ids) + countStates(hb, b_state_ids) - cpl; +bool shouldMerge(const NGHolder &ha, const NGHolder &hb, size_t cpl, + const ReportManager *rm, const CompileContext &cc) { + size_t combinedStateCount = num_vertices(ha) + num_vertices(hb) - cpl; + + combinedStateCount -= 2 * 2; /* discount accepts from both */ + + if (is_triggered(ha)) { + /* allow for a state for each top, ignore existing starts */ + combinedStateCount -= 2; /* for start, startDs */ + auto tops = getTops(ha); + insert(&tops, getTops(hb)); + combinedStateCount += tops.size(); + } if (combinedStateCount > FAST_STATE_LIMIT) { // More complex implementability check. @@ -423,11 +421,13 @@ void buildNfaMergeQueue(const vector &cluster, // First, make sure all holders have numbered states and collect their // counts. - vector> states_map(cs); + vector states_map; + states_map.reserve(cs); for (size_t i = 0; i < cs; i++) { assert(cluster[i]); - NGHolder &g = *(cluster[i]); - states_map[i] = numberStates(g); + assert(states_map.size() == i); + const NGHolder &g = *(cluster[i]); + states_map.emplace_back(g); } vector seen_cpl(cs * cs, 0); @@ -536,11 +536,9 @@ bool mergeableStarts(const NGHolder &h1, const NGHolder &h2) { } /** Merge graph \p ga into graph \p gb. Returns false on failure. */ -bool mergeNfaPair(NGHolder &ga, NGHolder &gb, const ReportManager *rm, +bool mergeNfaPair(const NGHolder &ga, NGHolder &gb, const ReportManager *rm, const CompileContext &cc) { assert(ga.kind == gb.kind); - auto a_state_ids = numberStates(ga); - auto b_state_ids = numberStates(gb); // Vacuous NFAs require special checks on their starts to ensure that tops // match, and that reports match for mixed-accept cases. @@ -549,14 +547,13 @@ bool mergeNfaPair(NGHolder &ga, NGHolder &gb, const ReportManager *rm, return false; } - u32 cpl = commonPrefixLength(ga, a_state_ids, gb, b_state_ids); - if (!shouldMerge(gb, b_state_ids, ga, a_state_ids, cpl, rm, cc)) { + u32 cpl = commonPrefixLength(ga, gb); + if (!shouldMerge(gb, ga, cpl, rm, cc)) { return false; } mergeNfaComponent(gb, ga, cpl); reduceImplementableGraph(gb, SOM_NONE, rm, cc); - b_state_ids = numberStates(gb); return true; } diff --git a/src/nfagraph/ng_uncalc_components.h b/src/nfagraph/ng_uncalc_components.h index 5f341961..ddab8825 100644 --- a/src/nfagraph/ng_uncalc_components.h +++ b/src/nfagraph/ng_uncalc_components.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -52,10 +52,7 @@ class ReportManager; * The CPL is calculated based the topological ordering given by the state * indices for each graph. */ -u32 commonPrefixLength(const NGHolder &ga, - const ue2::unordered_map &a_state_ids, - const NGHolder &gb, - const ue2::unordered_map &b_state_ids); +u32 commonPrefixLength(const NGHolder &ga, const NGHolder &gb); /** * \brief Merge the group of graphs in \p cluster where possible. @@ -73,7 +70,7 @@ void mergeNfaCluster(const std::vector &cluster, * Returns false on failure. On success, \p gb is reduced via \ref * reduceImplementableGraph and renumbered. */ -bool mergeNfaPair(NGHolder &ga, NGHolder &gb, const ReportManager *rm, +bool mergeNfaPair(const NGHolder &ga, NGHolder &gb, const ReportManager *rm, const CompileContext &cc); } // namespace ue2 diff --git a/src/nfagraph/ng_util.cpp b/src/nfagraph/ng_util.cpp index da9c2438..71eef7eb 100644 --- a/src/nfagraph/ng_util.cpp +++ b/src/nfagraph/ng_util.cpp @@ -343,6 +343,47 @@ bool is_virtual_start(NFAVertex v, const NGHolder &g) { return g[v].assert_flags & POS_FLAG_VIRTUAL_START; } +static +void reorderSpecials(const NGHolder &g, vector &topoOrder) { + // Start is last element of reverse topo ordering. + auto it = find(topoOrder.begin(), topoOrder.end(), g.start); + if (it != topoOrder.end() - 1) { + DEBUG_PRINTF("repositioning start\n"); + assert(it != topoOrder.end()); + topoOrder.erase(it); + topoOrder.insert(topoOrder.end(), g.start); + } + + // StartDs is second-to-last element of reverse topo ordering. + it = find(topoOrder.begin(), topoOrder.end(), g.startDs); + if (it != topoOrder.end() - 2) { + DEBUG_PRINTF("repositioning start ds\n"); + assert(it != topoOrder.end()); + topoOrder.erase(it); + topoOrder.insert(topoOrder.end() - 1, g.startDs); + } + + // AcceptEOD is first element of reverse topo ordering. + it = find(topoOrder.begin(), topoOrder.end(), g.acceptEod); + if (it != topoOrder.begin()) { + DEBUG_PRINTF("repositioning accept\n"); + assert(it != topoOrder.end()); + topoOrder.erase(it); + topoOrder.insert(topoOrder.begin(), g.acceptEod); + } + + // Accept is second element of reverse topo ordering, if it's connected. + it = find(topoOrder.begin(), topoOrder.end(), g.accept); + if (it != topoOrder.begin() + 1) { + DEBUG_PRINTF("repositioning accept\n"); + assert(it != topoOrder.end()); + topoOrder.erase(it); + if (in_degree(g.accept, g) != 0) { + topoOrder.insert(topoOrder.begin() + 1, g.accept); + } + } +} + vector getTopoOrdering(const NGHolder &g) { assert(hasCorrectlyNumberedVertices(g)); @@ -372,6 +413,8 @@ vector getTopoOrdering(const NGHolder &g) { color_map(make_iterator_property_map(colour.begin(), index_map)) .vertex_index_map(index_map)); + reorderSpecials(g, ordering); + return ordering; } @@ -629,6 +672,60 @@ unique_ptr cloneHolder(const NGHolder &in) { return h; } +void reverseHolder(const NGHolder &g_in, NGHolder &g) { + // Make the BGL do the grunt work. + ue2::unordered_map vertexMap; + boost::transpose_graph(g_in.g, g.g, + orig_to_copy(boost::make_assoc_property_map(vertexMap)). + vertex_index_map(get(&NFAGraphVertexProps::index, g_in.g))); + + // The transpose_graph operation will have created extra copies of our + // specials. We have to rewire their neighbours to the 'real' specials and + // delete them. + NFAVertex start = vertexMap[g_in.acceptEod]; + NFAVertex startDs = vertexMap[g_in.accept]; + NFAVertex accept = vertexMap[g_in.startDs]; + NFAVertex acceptEod = vertexMap[g_in.start]; + + // Successors of starts. + for (const auto &e : out_edges_range(start, g)) { + NFAVertex v = target(e, g); + add_edge(g.start, v, g[e], g); + } + for (const auto &e : out_edges_range(startDs, g)) { + NFAVertex v = target(e, g); + add_edge(g.startDs, v, g[e], g); + } + + // Predecessors of accepts. + for (const auto &e : in_edges_range(accept, g)) { + NFAVertex u = source(e, g); + add_edge(u, g.accept, g[e], g); + } + for (const auto &e : in_edges_range(acceptEod, g)) { + NFAVertex u = source(e, g); + add_edge(u, g.acceptEod, g[e], g); + } + + // Remove our impostors. + clear_vertex(start, g); + remove_vertex(start, g); + clear_vertex(startDs, g); + remove_vertex(startDs, g); + clear_vertex(accept, g); + remove_vertex(accept, g); + clear_vertex(acceptEod, g); + remove_vertex(acceptEod, g); + + // Renumber so that g's properties (number of vertices, edges) are + // accurate. + g.renumberVertices(); + g.renumberEdges(); + + assert(num_vertices(g) == num_vertices(g_in)); + assert(num_edges(g) == num_edges(g_in)); +} + #ifndef NDEBUG bool allMatchStatesHaveReports(const NGHolder &g) { diff --git a/src/nfagraph/ng_util.h b/src/nfagraph/ng_util.h index 1c6dd461..6c6907a3 100644 --- a/src/nfagraph/ng_util.h +++ b/src/nfagraph/ng_util.h @@ -174,7 +174,11 @@ bool is_match_vertex(NFAVertex v, const GraphT &g) { } /** Generate a reverse topological ordering for a back-edge filtered version of - * our graph (as it must be a DAG and correctly numbered) */ + * our graph (as it must be a DAG and correctly numbered). + * + * Note: we ensure that we produce a topo ordering that begins with acceptEod + * and accept (if present) and ends with startDs followed by start. + */ std::vector getTopoOrdering(const NGHolder &g); /** Comparison functor used to sort by vertex_index. */ @@ -300,6 +304,10 @@ void clearReports(NGHolder &g); * r_old. */ void duplicateReport(NGHolder &g, ReportID r_old, ReportID r_new); +/** Construct a reversed copy of an arbitrary NGHolder, mapping starts to + * accepts. */ +void reverseHolder(const NGHolder &g, NGHolder &out); + #ifndef NDEBUG // Assertions: only available in internal builds. diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index 6b19549b..38c488be 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -47,6 +47,7 @@ #include "nfagraph/ng_is_equal.h" #include "nfagraph/ng_limex.h" #include "nfagraph/ng_mcclellan.h" +#include "nfagraph/ng_prune.h" #include "nfagraph/ng_repeat.h" #include "nfagraph/ng_reports.h" #include "nfagraph/ng_stop.h" @@ -788,19 +789,230 @@ void RoseBuildImpl::findTransientLeftfixes(void) { /** Find all the different roses and their associated literals. */ static -map> findLeftSucc(RoseBuildImpl &tbi) { +map> findLeftSucc(const RoseBuildImpl &build) { map> leftfixes; - for (auto v : vertices_range(tbi.g)) { - if (tbi.g[v].left) { - const LeftEngInfo &lei = tbi.g[v].left; + for (auto v : vertices_range(build.g)) { + if (build.g[v].left) { + const LeftEngInfo &lei = build.g[v].left; leftfixes[lei].push_back(v); } } return leftfixes; } +namespace { +struct infix_info { + set preds; + set succs; +}; +} + static -bool triggerKillsRoseGraph(const RoseBuildImpl &tbi, const left_id &left, +map findInfixGraphInfo(const RoseBuildImpl &build) { + map rv; + + for (auto v : vertices_range(build.g)) { + if (!build.g[v].left) { + continue; + } + + if (build.isRootSuccessor(v)) { + DEBUG_PRINTF("a prefix is never an infix\n"); + continue; + } + + /* ensure only proper nfas */ + const LeftEngInfo &lei = build.g[v].left; + if (!lei.graph) { + continue; + } + if (lei.haig || lei.dfa) { + continue; + } + assert(!lei.castle); + infix_info &info = rv[lei.graph.get()]; + insert(&info.preds, inv_adjacent_vertices_range(v, build.g)); + info.succs.insert(v); + } + + return rv; +} + +static +map> getTopInfo(const NGHolder &h) { + map> rv; + for (NFAEdge e : out_edges_range(h.start, h)) { + for (u32 t : h[e].tops) { + rv[t].insert(e); + } + } + return rv; +} + +static +u32 findUnusedTop(const map> &tops) { + u32 i = 0; + while (contains(tops, i)) { + i++; + } + return i; +} + +static +bool reduceTopTriggerLoad(RoseBuildImpl &build, NGHolder &h, RoseVertex u) { + RoseGraph &g = build.g; + + set tops; /* tops triggered by u */ + for (RoseEdge e : out_edges_range(u, g)) { + RoseVertex v = target(e, g); + if (g[v].left.graph.get() != &h) { + continue; + } + tops.insert(g[e].rose_top); + } + + assert(!tops.empty()); + if (tops.size() <= 1) { + return false; + } + DEBUG_PRINTF("%zu triggers %zu tops for %p\n", build.g[u].idx, tops.size(), + &h); + + auto h_top_info = getTopInfo(h); + flat_set edges_to_trigger; + for (u32 t : tops) { + insert(&edges_to_trigger, h_top_info[t]); + } + + u32 new_top = ~0U; + /* check if there is already a top with the right the successor set */ + for (const auto &elem : h_top_info) { + if (elem.second == edges_to_trigger) { + new_top = elem.first; + break; + } + } + + /* if no existing suitable top, add a new top for us */ + if (new_top == ~0U) { + new_top = findUnusedTop(h_top_info); + + /* add top to edges out of start */ + for (NFAEdge e : out_edges_range(h.start, h)) { + if (has_intersection(tops, h[e].tops)) { + h[e].tops.insert(new_top); + } + } + + /* check still implementable if we add a new top */ + if (!isImplementableNFA(h, nullptr, build.cc)) { + DEBUG_PRINTF("unable to add new top\n"); + for (NFAEdge e : out_edges_range(h.start, h)) { + h[e].tops.erase(new_top); + } + /* we should be back to the original graph */ + assert(isImplementableNFA(h, nullptr, build.cc)); + return false; + } + } + + DEBUG_PRINTF("using new merged top %u\n", new_top); + assert(new_top != ~0U); + for (RoseEdge e: out_edges_range(u, g)) { + RoseVertex v = target(e, g); + if (g[v].left.graph.get() != &h) { + continue; + } + g[e].rose_top = new_top; + } + + return true; +} + +static +void packInfixTops(NGHolder &h, RoseGraph &g, + const set &verts) { + if (!is_triggered(h)) { + DEBUG_PRINTF("not triggered, no tops\n"); + return; + } + assert(isCorrectlyTopped(h)); + DEBUG_PRINTF("pruning unused tops\n"); + flat_set used_tops; + for (auto v : verts) { + assert(g[v].left.graph.get() == &h); + + for (const auto &e : in_edges_range(v, g)) { + u32 top = g[e].rose_top; + used_tops.insert(top); + } + } + + map top_mapping; + for (u32 t : used_tops) { + u32 new_top = top_mapping.size(); + top_mapping[t] = new_top; + } + + for (auto v : verts) { + assert(g[v].left.graph.get() == &h); + + for (const auto &e : in_edges_range(v, g)) { + g[e].rose_top = top_mapping.at(g[e].rose_top); + } + } + + vector dead; + for (const auto &e : out_edges_range(h.start, h)) { + NFAVertex v = target(e, h); + if (v == h.startDs) { + continue; // stylised edge, leave it alone. + } + flat_set updated_tops; + for (u32 t : h[e].tops) { + if (contains(top_mapping, t)) { + updated_tops.insert(top_mapping.at(t)); + } + } + h[e].tops = move(updated_tops); + if (h[e].tops.empty()) { + DEBUG_PRINTF("edge (start,%u) has only unused tops\n", h[v].index); + dead.push_back(e); + } + } + + if (dead.empty()) { + return; + } + + remove_edges(dead, h); + pruneUseless(h); + clearReports(h); // As we may have removed vacuous edges. +} + +static +void reduceTopTriggerLoad(RoseBuildImpl &build) { + auto infixes = findInfixGraphInfo(build); + + for (auto &p : infixes) { + if (onlyOneTop(*p.first)) { + continue; + } + + bool changed = false; + for (RoseVertex v : p.second.preds) { + changed |= reduceTopTriggerLoad(build, *p.first, v); + } + + if (changed) { + packInfixTops(*p.first, build.g, p.second.succs); + reduceImplementableGraph(*p.first, SOM_NONE, nullptr, build.cc); + } + } +} + +static +bool triggerKillsRoseGraph(const RoseBuildImpl &build, const left_id &left, const set &all_lits, const RoseEdge &e) { assert(left.graph()); @@ -816,8 +1028,8 @@ bool triggerKillsRoseGraph(const RoseBuildImpl &tbi, const left_id &left, /* check each pred literal to see if they all kill previous graph * state */ - for (u32 lit_id : tbi.g[source(e, tbi.g)].literals) { - const rose_literal_id &pred_lit = tbi.literals.right.at(lit_id); + for (u32 lit_id : build.g[source(e, build.g)].literals) { + const rose_literal_id &pred_lit = build.literals.right.at(lit_id); const ue2_literal s = findNonOverlappingTail(all_lits, pred_lit.s); DEBUG_PRINTF("running graph %zu\n", states.size()); @@ -833,7 +1045,7 @@ bool triggerKillsRoseGraph(const RoseBuildImpl &tbi, const left_id &left, } static -bool triggerKillsRose(const RoseBuildImpl &tbi, const left_id &left, +bool triggerKillsRose(const RoseBuildImpl &build, const left_id &left, const set &all_lits, const RoseEdge &e) { if (left.haig()) { /* TODO: To allow this for som-based engines we would also need to @@ -843,32 +1055,30 @@ bool triggerKillsRose(const RoseBuildImpl &tbi, const left_id &left, } if (left.graph()) { - return triggerKillsRoseGraph(tbi, left, all_lits, e); + return triggerKillsRoseGraph(build, left, all_lits, e); } if (left.castle()) { - return triggerKillsRoseCastle(tbi, left, all_lits, e); + return triggerKillsRoseCastle(build, left, all_lits, e); } return false; } +/* Sometimes the arrival of a top for a rose infix can ensure that the nfa would + * be dead at that time. In the case of multiple trigger literals, we can only + * base our decision on that portion of literal after any overlapping literals. + */ static -void inspectRoseTops(RoseBuildImpl &tbi) { - /* Sometimes the arrival of a top for a rose infix can ensure that the nfa - * would be dead at that time. In the case of multiple trigger literals we - * can only base our decision on that portion of literal after any - * overlapping literals */ +void findTopTriggerCancels(RoseBuildImpl &build) { + auto left_succ = findLeftSucc(build); /* leftfixes -> succ verts */ - map> roses = - findLeftSucc(tbi); /* rose -> succ verts */ - - for (const auto &r : roses) { + for (const auto &r : left_succ) { const left_id &left = r.first; const vector &succs = r.second; assert(!succs.empty()); - if (tbi.isRootSuccessor(*succs.begin())) { + if (build.isRootSuccessor(*succs.begin())) { /* a prefix is never an infix */ continue; } @@ -878,10 +1088,10 @@ void inspectRoseTops(RoseBuildImpl &tbi) { set pred_lit_ids; for (auto v : succs) { - for (const auto &e : in_edges_range(v, tbi.g)) { - RoseVertex u = source(e, tbi.g); - tops_seen.insert(tbi.g[e].rose_top); - insert(&pred_lit_ids, tbi.g[u].literals); + for (const auto &e : in_edges_range(v, build.g)) { + RoseVertex u = source(e, build.g); + tops_seen.insert(build.g[e].rose_top); + insert(&pred_lit_ids, build.g[u].literals); rose_edges.insert(e); } } @@ -893,7 +1103,7 @@ void inspectRoseTops(RoseBuildImpl &tbi) { } for (u32 lit_id : pred_lit_ids) { - const rose_literal_id &p_lit = tbi.literals.right.at(lit_id); + const rose_literal_id &p_lit = build.literals.right.at(lit_id); if (p_lit.delay || p_lit.table == ROSE_ANCHORED) { goto next_rose; } @@ -905,15 +1115,22 @@ void inspectRoseTops(RoseBuildImpl &tbi) { all_lits.size(), rose_edges.size()); for (const auto &e : rose_edges) { - if (triggerKillsRose(tbi, left, all_lits, e)) { + if (triggerKillsRose(build, left, all_lits, e)) { DEBUG_PRINTF("top will override previous rose state\n"); - tbi.g[e].rose_cancel_prev_top = true; + build.g[e].rose_cancel_prev_top = true; } } next_rose:; } } +static +void optimiseRoseTops(RoseBuildImpl &build) { + reduceTopTriggerLoad(build); + /* prune unused tops ? */ + findTopTriggerCancels(build); +} + static void buildRoseSquashMasks(RoseBuildImpl &tbi) { /* Rose nfa squash masks are applied to the groups when the nfa can no @@ -1492,7 +1709,7 @@ aligned_unique_ptr RoseBuildImpl::buildRose(u32 minWidth) { /* final prep work */ remapCastleTops(*this); - inspectRoseTops(*this); + optimiseRoseTops(*this); buildRoseSquashMasks(*this); rm.assignDkeys(this); diff --git a/src/rose/rose_build_merge.cpp b/src/rose/rose_build_merge.cpp index 01134736..054dd12f 100644 --- a/src/rose/rose_build_merge.cpp +++ b/src/rose/rose_build_merge.cpp @@ -53,7 +53,6 @@ #include "nfagraph/ng_redundancy.h" #include "nfagraph/ng_repeat.h" #include "nfagraph/ng_reports.h" -#include "nfagraph/ng_restructuring.h" #include "nfagraph/ng_stop.h" #include "nfagraph/ng_uncalc_components.h" #include "nfagraph/ng_util.h" @@ -1457,11 +1456,7 @@ bool hasReformedStartDotStar(const NGHolder &h, const Grey &grey) { static u32 commonPrefixLength(left_id &r1, left_id &r2) { if (r1.graph() && r2.graph()) { - auto &g1 = *r1.graph(); - auto &g2 = *r2.graph(); - auto state_ids_1 = numberStates(g1); - auto state_ids_2 = numberStates(g2); - return commonPrefixLength(g1, state_ids_1, g2, state_ids_2); + return commonPrefixLength(*r1.graph(), *r2.graph()); } else if (r1.castle() && r2.castle()) { return min(findMinWidth(*r1.castle()), findMinWidth(*r2.castle())); } @@ -1750,7 +1745,6 @@ u32 findUnusedTop(const ue2::flat_set &tops) { while (contains(tops, i)) { i++; } - assert(i < NFA_MAX_TOP_MASKS); return i; } @@ -1779,11 +1773,6 @@ bool setDistinctTops(NGHolder &h1, const NGHolder &h2, DEBUG_PRINTF("before: h1 has %zu tops, h2 has %zu tops\n", tops1.size(), tops2.size()); - if (tops1.size() + tops2.size() > NFA_MAX_TOP_MASKS) { - DEBUG_PRINTF("too many tops!\n"); - return false; - } - // If our tops don't intersect, we're OK to merge with no changes. if (!has_intersection(tops1, tops2)) { DEBUG_PRINTF("tops don't intersect\n"); @@ -1856,11 +1845,6 @@ bool setDistinctSuffixTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2, return true; } -static -bool hasMaxTops(const NGHolder &h) { - return getTops(h).size() == NFA_MAX_TOP_MASKS; -} - /** \brief Estimate the number of accel states in the given graph when built as * an NFA. * @@ -1899,11 +1883,6 @@ void mergeNfaLeftfixes(RoseBuildImpl &tbi, RoseBouquet &roses) { "with %p (%zu verts)\n", r1.graph(), verts1.size(), r2.graph(), verts2.size()); - if (hasMaxTops(*r1.graph())) { - DEBUG_PRINTF("h1 has hit max tops\n"); - break; // next h1 - } - u32 accel1 = accel_count[r1]; if (accel1 >= NFA_MAX_ACCEL_STATES) { DEBUG_PRINTF("h1 has hit max accel\n"); @@ -2203,11 +2182,6 @@ void mergeSuffixes(RoseBuildImpl &tbi, SuffixBouquet &suffixes, const deque &verts2 = suffixes.vertices(s2); assert(s2.graph() && s2.graph()->kind == NFA_SUFFIX); - if (hasMaxTops(*s1.graph())) { - DEBUG_PRINTF("h1 has hit max tops\n"); - break; // next h1 - } - if (!acyclic) { u32 accel1 = accel_count[s1]; if (accel1 >= NFA_MAX_ACCEL_STATES) { From b9650d4fd08ae895ad40470abac1873ecd21a8d1 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 28 Sep 2016 17:17:58 +1000 Subject: [PATCH 041/103] rose: don't unconditionally init ll_buf etc This is only necessary (and already always done) if there is a long literal table. --- src/rose/stream.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/rose/stream.c b/src/rose/stream.c index 6e4d0add..703c0940 100644 --- a/src/rose/stream.c +++ b/src/rose/stream.c @@ -551,10 +551,6 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) { tctxt->minMatchOffset = offset; tctxt->minNonMpvMatchOffset = offset; tctxt->next_mpv_offset = 0; - tctxt->ll_buf = scratch->core_info.hbuf; - tctxt->ll_len = scratch->core_info.hlen; - tctxt->ll_buf_nocase = scratch->core_info.hbuf; - tctxt->ll_len_nocase = scratch->core_info.hlen; DEBUG_PRINTF("BEGIN: history len=%zu, buffer len=%zu groups=%016llx\n", scratch->core_info.hlen, scratch->core_info.len, tctxt->groups); From 924089d95e15e0025b6de0176cffef10cb612f17 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Thu, 29 Sep 2016 10:42:43 +1000 Subject: [PATCH 042/103] properly consider report behaviour when finding start verts --- src/nfagraph/ng_limex.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/nfagraph/ng_limex.cpp b/src/nfagraph/ng_limex.cpp index 66494c77..5f782460 100644 --- a/src/nfagraph/ng_limex.cpp +++ b/src/nfagraph/ng_limex.cpp @@ -344,6 +344,13 @@ void attemptToUseAsStart(const NGHolder &g, NFAVertex u, if (!contains(unhandled_succ_tops, v)) { return; } + /* if it has vacuous reports we need to make sure that the report sets + * are the same */ + if ((v == g.accept || v == g.acceptEod) + && g[g.start].reports != g[u].reports) { + DEBUG_PRINTF("different report behaviour\n"); + return; + } const flat_set &v_tops = unhandled_succ_tops.at(v); flat_set new_inter; auto ni_inserter = inserter(new_inter, new_inter.end()); @@ -362,6 +369,7 @@ void attemptToUseAsStart(const NGHolder &g, NFAVertex u, return; } + DEBUG_PRINTF("reusing %u is a start vertex\n", g[u].index); markTopSuccAsHandled(u, top_inter, succs, tops_out, unhandled_top_succs, unhandled_succ_tops); } @@ -377,6 +385,7 @@ void reusePredsAsStarts(const NGHolder &g, const map &top_reach, map> &tops_out) { /* create list of candidates first, to avoid issues of iter invalidation * and determinism */ + DEBUG_PRINTF("attempting to reuse vertices for top starts\n"); vector cand_starts; for (NFAVertex u : unhandled_succ_tops | map_keys) { if (hasSelfLoop(u, g)) { @@ -434,6 +443,7 @@ void makeTopStates(NGHolder &g, map> &tops_out, while (!unhandled_succ_tops.empty()) { assert(!unhandled_top_succs.empty()); + DEBUG_PRINTF("creating top start vertex\n"); flat_set u_tops; flat_set u_succs; pickNextTopStateToHandle(unhandled_top_succs, unhandled_succ_tops, @@ -473,7 +483,7 @@ set findZombies(const NGHolder &h, } if (in_degree(h.acceptEod, h) != 1 || all_reports(h).size() != 1) { - DEBUG_PRINTF("can be made undead - bad reports\n"); + DEBUG_PRINTF("cannot be made undead - bad reports\n"); return zombies; } From 8cadba0bdd8c61de0b93250049dbd09fce76962a Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 29 Sep 2016 10:29:42 +1000 Subject: [PATCH 043/103] rose: call loadLongLiteralState() earlier The ll_buf, ll_buf_nocase buffers must be initialised before anyh path that could lead to storeLongLiteralState(). --- src/rose/stream.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/rose/stream.c b/src/rose/stream.c index 703c0940..9599612f 100644 --- a/src/rose/stream.c +++ b/src/rose/stream.c @@ -580,6 +580,12 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) { const struct HWLM *ftable = getFLiteralMatcher(t); if (ftable) { + // Load in long literal table state and set up "fake history" buffers + // (ll_buf, etc, used by the CHECK_LONG_LIT instruction). Note that this + // must be done here in order to ensure that it happens before any path + // that leads to storeLongLiteralState(), which relies on these buffers. + loadLongLiteralState(t, state, scratch); + if (t->noFloatingRoots && !roseHasInFlightMatches(t, state, scratch)) { DEBUG_PRINTF("skip FLOATING: no inflight matches\n"); goto flush_delay_and_exit; @@ -591,8 +597,6 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) { MIN(t->floatingDistance, length + offset) - offset : 0; } - loadLongLiteralState(t, state, scratch); - size_t hlength = scratch->core_info.hlen; char rebuild = hlength && From 47f53f63a7b27377ec2b596d18231a17c10802e2 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Thu, 6 Oct 2016 15:54:48 +1100 Subject: [PATCH 044/103] simple pass to pick up paths redundant with those from cyclic's succs --- src/nfa/limex_compile.cpp | 6 +- src/nfagraph/ng.cpp | 1 + src/nfagraph/ng_depth.cpp | 6 +- src/nfagraph/ng_extparam.cpp | 6 +- src/nfagraph/ng_misc_opt.cpp | 155 ++++++++++++++++++++++++++++++-- src/nfagraph/ng_misc_opt.h | 9 +- src/nfagraph/ng_region.cpp | 6 +- src/nfagraph/ng_repeat.cpp | 3 +- src/nfagraph/ng_utf8.cpp | 15 ++-- src/nfagraph/ng_util.cpp | 7 +- src/nfagraph/ng_util.h | 55 ++++++++---- src/rose/rose_build_convert.cpp | 15 ++-- src/util/graph.h | 18 +++- 13 files changed, 234 insertions(+), 68 deletions(-) diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index 2c164090..b7ea93d9 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -788,10 +788,8 @@ u32 getEffectiveAccelStates(const build_info &args, if (!is_subset_of(h[v].reports, h[a].reports)) { continue; } - flat_set v_succ; - flat_set a_succ; - succ(h, v, &v_succ); - succ(h, a, &a_succ); + auto v_succ = succs(v, h); + auto a_succ = succs(a, h); if (is_subset_of(v_succ, a_succ)) { dominated_by[accel_id] |= 1U << accel_id_map[a]; } diff --git a/src/nfagraph/ng.cpp b/src/nfagraph/ng.cpp index deca3fd5..071e5c63 100644 --- a/src/nfagraph/ng.cpp +++ b/src/nfagraph/ng.cpp @@ -421,6 +421,7 @@ bool NG::addGraph(NGWrapper &w) { // Perform a reduction pass to merge sibling character classes together. if (cc.grey.performGraphSimplification) { removeRedundancy(w, som); + prunePathsRedundantWithSuccessorOfCyclics(w, som); } dumpDotWrapper(w, "04_reduced", cc.grey); diff --git a/src/nfagraph/ng_depth.cpp b/src/nfagraph/ng_depth.cpp index d7945be9..8afa644a 100644 --- a/src/nfagraph/ng_depth.cpp +++ b/src/nfagraph/ng_depth.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -134,8 +134,8 @@ void findLoopReachable(const GraphT &g, const NFAVertex srcVertex, depth_first_search(g, visitor(be).root_vertex(srcVertex).vertex_index_map( index_map)); - AcyclicFilter af(&deadEdges); - filtered_graph > acyclic_g(g, af); + auto af = make_bad_edge_filter(&deadEdges); + auto acyclic_g = make_filtered_graph(g, af); vector topoOrder; /* actually reverse topological order */ topoOrder.reserve(deadNodes.size()); diff --git a/src/nfagraph/ng_extparam.cpp b/src/nfagraph/ng_extparam.cpp index bc101df2..eeb15299 100644 --- a/src/nfagraph/ng_extparam.cpp +++ b/src/nfagraph/ng_extparam.cpp @@ -382,8 +382,7 @@ bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) { while (v != cyclic) { DEBUG_PRINTF("vertex %u\n", g[v].index); width++; - tie(ai, ae) = adjacent_vertices(v, g); - set succ(ai, ae); + auto succ = succs(v, g); if (contains(succ, cyclic)) { if (succ.size() == 1) { v = cyclic; @@ -421,8 +420,7 @@ bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) { while (!is_any_accept(v, g)) { DEBUG_PRINTF("vertex %u\n", g[v].index); width++; - tie(ai, ae) = adjacent_vertices(v, g); - set succ(ai, ae); + auto succ = succs(v, g); if (succ.size() != 1) { DEBUG_PRINTF("bad form\n"); return false; diff --git a/src/nfagraph/ng_misc_opt.cpp b/src/nfagraph/ng_misc_opt.cpp index 2e02933a..3f685226 100644 --- a/src/nfagraph/ng_misc_opt.cpp +++ b/src/nfagraph/ng_misc_opt.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -69,8 +69,12 @@ #include "util/charreach.h" #include "util/container.h" #include "util/graph_range.h" +#include "util/ue2_containers.h" #include "ue2common.h" +#include +#include + #include #include #include @@ -94,8 +98,8 @@ void findCandidates(NGHolder &g, const vector &ordering, // For `v' to be a candidate, its predecessors must all have the same // successor set as `v'. - set succ_v, succ_u; - succ(g, v, &succ_v); + auto succ_v = succs(v, g); + flat_set succ_u; for (auto u : inv_adjacent_vertices_range(v, g)) { succ_u.clear(); @@ -125,8 +129,8 @@ void findCandidates_rev(NGHolder &g, const vector &ordering, // For `v' to be a candidate, its predecessors must all have the same // successor set as `v'. - set pred_v, pred_u; - pred(g, v, &pred_v); + auto pred_v = preds(v, g); + flat_set pred_u; for (auto u : adjacent_vertices_range(v, g)) { pred_u.clear(); @@ -172,8 +176,7 @@ void succCRIntersection(const NGHolder &g, NFAVertex v, CharReach &add) { static set findSustainSet(const NGHolder &g, NFAVertex p, bool ignore_starts, const CharReach &new_cr) { - set cand; - pred(g, p, &cand); + auto cand = preds>(p, g); if (ignore_starts) { cand.erase(g.startDs); } @@ -209,8 +212,7 @@ set findSustainSet(const NGHolder &g, NFAVertex p, static set findSustainSet_rev(const NGHolder &g, NFAVertex p, const CharReach &new_cr) { - set cand; - succ(g, p, &cand); + auto cand = succs>(p, g); /* remove elements from cand until the sustain set property holds */ bool changed; do { @@ -546,4 +548,139 @@ bool mergeCyclicDotStars(NGHolder &g) { return true; } +/** + * Returns the set of vertices that cannot be on if v is not on. + */ +static +flat_set findDependentVertices(const NGHolder &g, NFAVertex v) { + auto v_pred = preds(v, g); + flat_set may_be_on; + + /* We need to exclude any vertex that may be reached on a path which is + * incompatible with the vertex v being on. */ + + /* A vertex u is bad if: + * 1) its reach may be incompatible with v (not a subset) + * 2) it if there is an edge from a bad vertex b and there is either not an + * edge v->u or not an edge b->v. + * Note: 2) means v is never bad as it has a selfloop + * + * Can do this with a DFS from all the initial bad states with a conditional + * check down edges. Alternately can just filter these edges out of the + * graph first. + */ + flat_set no_explore; + for (NFAVertex t : adjacent_vertices_range(v, g)) { + for (NFAEdge e : in_edges_range(t, g)) { + NFAVertex s = source(e, g); + if (edge(s, v, g).second) { + no_explore.insert(e); + } + } + } + + auto filtered_g = make_filtered_graph(g.g, + make_bad_edge_filter(&no_explore)); + + vector color_raw(num_vertices(g)); + auto color = make_iterator_property_map(color_raw.begin(), + get(&NFAGraphVertexProps::index, g.g)); + flat_set bad; + for (NFAVertex b : vertices_range(g)) { + if (b != g.start && g[b].char_reach.isSubsetOf(g[v].char_reach)) { + continue; + } + boost::depth_first_visit(filtered_g, b, make_vertex_recorder(bad), + color); + } + + flat_set rv; + for (NFAVertex u : vertices_range(g)) { + if (!contains(bad, u)) { + DEBUG_PRINTF("%u is good\n", g[u].index); + rv.insert(u); + } + } + return rv; +} + +static +bool pruneUsingSuccessors(NGHolder &g, NFAVertex u, som_type som) { + if (som && (is_virtual_start(u, g) || u == g.startDs)) { + return false; + } + + bool changed = false; + DEBUG_PRINTF("using cyclic %u as base\n", g[u].index); + auto children = findDependentVertices(g, u); + vector u_succs; + for (NFAVertex v : adjacent_vertices_range(u, g)) { + if (som && is_virtual_start(v, g)) { + /* as v is virtual start, its som has been reset so can not override + * existing in progress matches. */ + continue; + } + u_succs.push_back(v); + } + sort(u_succs.begin(), u_succs.end(), + [&](NFAVertex a, NFAVertex b) { + return g[a].char_reach.count() > g[b].char_reach.count(); + }); + for (NFAVertex v : u_succs) { + DEBUG_PRINTF(" using %u as killer\n", g[v].index); + set dead; + for (NFAVertex s : adjacent_vertices_range(v, g)) { + DEBUG_PRINTF(" looking at preds of %u\n", g[s].index); + for (NFAEdge e : in_edges_range(s, g)) { + NFAVertex p = source(e, g); + if (!contains(children, p) || p == v || p == u + || p == g.accept) { + DEBUG_PRINTF("%u not a cand\n", g[p].index); + continue; + } + if (is_any_accept(s, g) && g[p].reports != g[v].reports) { + DEBUG_PRINTF("%u bad reports\n", g[p].index); + continue; + } + if (g[p].char_reach.isSubsetOf(g[v].char_reach)) { + dead.insert(e); + changed = true; + DEBUG_PRINTF("removing edge %u->%u\n", g[p].index, + g[s].index); + } else if (is_subset_of(succs(p, g), succs(u, g))) { + if (is_match_vertex(p, g) + && !is_subset_of(g[p].reports, g[v].reports)) { + continue; + } + DEBUG_PRINTF("updating reach on %u\n", g[p].index); + changed |= (g[p].char_reach & g[v].char_reach).any(); + g[p].char_reach &= ~g[v].char_reach; + } + + } + } + remove_edges(dead, g); + } + + DEBUG_PRINTF("changed %d\n", (int)changed); + return changed; +} + +bool prunePathsRedundantWithSuccessorOfCyclics(NGHolder &g, som_type som) { + /* TODO: the reverse form of this is also possible */ + bool changed = false; + for (NFAVertex v : vertices_range(g)) { + if (hasSelfLoop(v, g) && g[v].char_reach.all()) { + changed |= pruneUsingSuccessors(g, v, som); + } + } + + if (changed) { + pruneUseless(g); + clearReports(g); + } + + return changed; +} + } // namespace ue2 diff --git a/src/nfagraph/ng_misc_opt.h b/src/nfagraph/ng_misc_opt.h index 4955c7af..5ed089dc 100644 --- a/src/nfagraph/ng_misc_opt.h +++ b/src/nfagraph/ng_misc_opt.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -72,6 +72,13 @@ std::vector reduced_cr(const NGHolder &g, /** Remove cyclic stars connected to start */ bool mergeCyclicDotStars(NGHolder &g); +/** + * Given a cyclic state 'c' with a broad reach and a later state 'v' that is + * only reachable if c is still on, then any edges to a successor of a direct + * successor of c with reach a superset of v are redundant. + */ +bool prunePathsRedundantWithSuccessorOfCyclics(NGHolder &h, som_type som); + } // namespace ue2 #endif diff --git a/src/nfagraph/ng_region.cpp b/src/nfagraph/ng_region.cpp index 124e9fa5..c7472e0d 100644 --- a/src/nfagraph/ng_region.cpp +++ b/src/nfagraph/ng_region.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -71,7 +71,7 @@ using namespace std; namespace ue2 { typedef ue2::unordered_set BackEdgeSet; -typedef boost::filtered_graph> +typedef boost::filtered_graph> AcyclicGraph; namespace { @@ -454,7 +454,7 @@ ue2::unordered_map assignRegions(const NGHolder &g) { .color_map(make_iterator_property_map( colours.begin(), get(&NFAGraphVertexProps::index, g.g)))); - AcyclicFilter af(&deadEdges); + auto af = make_bad_edge_filter(&deadEdges); AcyclicGraph acyclic_g(g.g, af); // Build a (reverse) topological ordering. diff --git a/src/nfagraph/ng_repeat.cpp b/src/nfagraph/ng_repeat.cpp index 5bff21b0..6eb2a9d7 100644 --- a/src/nfagraph/ng_repeat.cpp +++ b/src/nfagraph/ng_repeat.cpp @@ -138,8 +138,7 @@ void buildTopoOrder(const Graph &g, vector &topoOrder) { depth_first_search(g, visitor(BackEdges(deadEdges)). color_map(make_assoc_property_map(colours))); - AcyclicFilter af(&deadEdges); - boost::filtered_graph > acyclic_g(g, af); + auto acyclic_g = make_filtered_graph(g, make_bad_edge_filter(&deadEdges)); topological_sort(acyclic_g, back_inserter(topoOrder), color_map(make_assoc_property_map(colours))); diff --git a/src/nfagraph/ng_utf8.cpp b/src/nfagraph/ng_utf8.cpp index 719e42e2..352359f2 100644 --- a/src/nfagraph/ng_utf8.cpp +++ b/src/nfagraph/ng_utf8.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -187,10 +187,9 @@ bool expandCyclic(NGHolder &h, NFAVertex v) { DEBUG_PRINTF("inspecting %u\n", h[v].index); bool changes = false; - set v_preds; - set v_succs; - pred(h, v, &v_preds); - succ(h, v, &v_succs); + auto v_preds = preds(v, h); + auto v_succs = succs(v, h); + set start_siblings; set end_siblings; @@ -199,8 +198,7 @@ bool expandCyclic(NGHolder &h, NFAVertex v) { /* We need to find start vertices which have all of our preds. * As we have a self loop, it must be one of our succs. */ for (auto a : adjacent_vertices_range(v, h)) { - set a_preds; - pred(h, a, &a_preds); + auto a_preds = preds(a, h); if (a_preds == v_preds && isutf8start(h[a].char_reach)) { DEBUG_PRINTF("%u is a start v\n", h[a].index); @@ -211,8 +209,7 @@ bool expandCyclic(NGHolder &h, NFAVertex v) { /* We also need to find full cont vertices which have all our own succs; * As we have a self loop, it must be one of our preds. */ for (auto a : inv_adjacent_vertices_range(v, h)) { - set a_succs; - succ(h, a, &a_succs); + auto a_succs = succs(a, h); if (a_succs == v_succs && h[a].char_reach == UTF_CONT_CR) { DEBUG_PRINTF("%u is a full tail cont\n", h[a].index); diff --git a/src/nfagraph/ng_util.cpp b/src/nfagraph/ng_util.cpp index 71eef7eb..de4ca656 100644 --- a/src/nfagraph/ng_util.cpp +++ b/src/nfagraph/ng_util.cpp @@ -403,8 +403,7 @@ vector getTopoOrdering(const NGHolder &g) { colour.begin(), index_map)) .vertex_index_map(index_map)); - AcyclicFilter af(&be.backEdges); - filtered_graph> acyclic_g(g.g, af); + auto acyclic_g = make_filtered_graph(g.g, make_bad_edge_filter(&backEdges)); vector ordering; ordering.reserve(num_verts); @@ -435,9 +434,7 @@ void mustBeSetBefore_int(NFAVertex u, const NGHolder &g, } } - // The AcyclicFilter is badly named, it's really just an edge-set filter. - filtered_graph>> prefix(g.g, - AcyclicFilter>(&dead)); + auto prefix = make_filtered_graph(g.g, make_bad_edge_filter(&dead)); depth_first_visit( prefix, g.start, make_dfs_visitor(boost::null_visitor()), diff --git a/src/nfagraph/ng_util.h b/src/nfagraph/ng_util.h index 6c6907a3..6b5090ce 100644 --- a/src/nfagraph/ng_util.h +++ b/src/nfagraph/ng_util.h @@ -70,6 +70,13 @@ void succ(const NGHolder &g, NFAVertex v, U *s) { s->insert(ai, ae); } +template> +ContTemp succs(NFAVertex u, const NGHolder &g) { + ContTemp rv; + succ(g, u, &rv); + return rv; +} + /** adds predecessors of v to s */ template static really_inline @@ -79,6 +86,13 @@ void pred(const NGHolder &g, NFAVertex v, U *p) { p->insert(it, ite); } +template> +ContTemp preds(NFAVertex u, const NGHolder &g) { + ContTemp rv; + pred(g, u, &rv); + return rv; +} + /** returns a vertex with an out edge from v and is not v. * v must have exactly one out-edge excluding self-loops. * will return NGHolder::null_vertex() if the preconditions don't hold. @@ -88,6 +102,30 @@ NFAVertex getSoleDestVertex(const NGHolder &g, NFAVertex v); /** Like getSoleDestVertex but for in-edges */ NFAVertex getSoleSourceVertex(const NGHolder &g, NFAVertex v); +/** \brief edge filtered graph. + * + * This will give you a view over the graph that has none of the edges from + * the provided set included. + * + * If this is provided with the back edges of the graph, this will result in an + * acyclic subgraph view. This is useful for topological_sort and other + * algorithms that require a DAG. + */ +template +struct bad_edge_filter { + bad_edge_filter() {} + explicit bad_edge_filter(const EdgeSet *bad_e) : bad_edges(bad_e) {} + bool operator()(const typename EdgeSet::value_type &e) const { + return !contains(*bad_edges, e); /* keep edges not in the bad set */ + } + const EdgeSet *bad_edges = nullptr; +}; + +template +bad_edge_filter make_bad_edge_filter(const EdgeSet *e) { + return bad_edge_filter(e); +} + /** Visitor that records back edges */ template class BackEdges : public boost::default_dfs_visitor { @@ -100,23 +138,6 @@ public: BackEdgeSet &backEdges; }; -/** \brief Acyclic filtered graph. - * - * This will give you a view over the graph that is directed and acyclic: - * useful for topological_sort and other algorithms that require a DAG. - */ -template -struct AcyclicFilter { - AcyclicFilter() {} - explicit AcyclicFilter(const BackEdgeSet *edges) : backEdges(edges) {} - template - bool operator()(const EdgeT &e) const { - // Only keep edges that aren't in the back edge set. - return (backEdges->find(e) == backEdges->end()); - } - const BackEdgeSet *backEdges = nullptr; -}; - /** * Generic code to renumber all the vertices in a graph. Assumes that we're * using a vertex_index property of type u32, and that we always have diff --git a/src/rose/rose_build_convert.cpp b/src/rose/rose_build_convert.cpp index d3fa1ac6..dfc0ed23 100644 --- a/src/rose/rose_build_convert.cpp +++ b/src/rose/rose_build_convert.cpp @@ -733,10 +733,8 @@ bool handleStartDsPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v, u32 repeatCount = 0; NFAVertex hu = h.startDs; - set start_succ; - set startds_succ; - succ(h, h.start, &start_succ); - succ(h, h.startDs, &startds_succ); + auto start_succ = succs>(h.start, h); + auto startds_succ = succs>(h.startDs, h); if (!is_subset_of(start_succ, startds_succ)) { DEBUG_PRINTF("not a simple chain\n"); @@ -790,10 +788,8 @@ bool handleMixedPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v, NFAVertex base = anchored ? h.start : h.startDs; if (!anchored) { - set start_succ; - set startds_succ; - succ(h, h.start, &start_succ); - succ(h, h.startDs, &startds_succ); + auto start_succ = succs>(h.start, h); + auto startds_succ = succs>(h.startDs, h); if (!is_subset_of(start_succ, startds_succ)) { DEBUG_PRINTF("not a simple chain\n"); @@ -852,8 +848,7 @@ bool handleMixedPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v, exits = exits_and_repeat_verts; erase_all(&exits, rep_verts); - set base_succ; - succ(h, base, &base_succ); + auto base_succ = succs>(base, h); base_succ.erase(h.startDs); if (is_subset_of(base_succ, rep_verts)) { diff --git a/src/util/graph.h b/src/util/graph.h index 90589f14..d15e77aa 100644 --- a/src/util/graph.h +++ b/src/util/graph.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -291,6 +291,22 @@ bool is_dag(const Graph &g, bool ignore_self_loops = false) { return true; } +template +class vertex_recorder : public boost::default_dfs_visitor { +public: + explicit vertex_recorder(Cont &o) : out(o) {} + template + void discover_vertex(typename Cont::value_type v, const G &) { + out.insert(v); + } + Cont &out; +}; + +template +vertex_recorder make_vertex_recorder(Cont &o) { + return vertex_recorder(o); +} + template std::pair add_edge_if_not_present(typename Graph::vertex_descriptor u, From 779bebfd127b33dc45a3805cc09e9956c032242f Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Fri, 7 Oct 2016 10:06:46 +1100 Subject: [PATCH 045/103] fix for analysis in previous commit Properly distinguish between vertices that will get set after the cyclic from vertices that may get set alongside the cyclic --- src/nfagraph/ng_misc_opt.cpp | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/nfagraph/ng_misc_opt.cpp b/src/nfagraph/ng_misc_opt.cpp index 3f685226..584c001f 100644 --- a/src/nfagraph/ng_misc_opt.cpp +++ b/src/nfagraph/ng_misc_opt.cpp @@ -604,6 +604,18 @@ flat_set findDependentVertices(const NGHolder &g, NFAVertex v) { return rv; } +static +bool willBeEnabledConcurrently(NFAVertex main_cyclic, NFAVertex v, + const NGHolder &g) { + return is_subset_of(preds(main_cyclic, g), preds(v, g)); +} + +static +bool sometimesEnabledConcurrently(NFAVertex main_cyclic, NFAVertex v, + const NGHolder &g) { + return has_intersection(preds(main_cyclic, g), preds(v, g)); +} + static bool pruneUsingSuccessors(NGHolder &g, NFAVertex u, som_type som) { if (som && (is_virtual_start(u, g) || u == g.startDs)) { @@ -628,6 +640,10 @@ bool pruneUsingSuccessors(NGHolder &g, NFAVertex u, som_type som) { }); for (NFAVertex v : u_succs) { DEBUG_PRINTF(" using %u as killer\n", g[v].index); + /* Need to distinguish between vertices that are switched on after the + * cyclic vs vertices that are switched on concurrently with the cyclic + * if (subject to a suitable reach) */ + bool v_peer_of_cyclic = willBeEnabledConcurrently(u, v, g); set dead; for (NFAVertex s : adjacent_vertices_range(v, g)) { DEBUG_PRINTF(" looking at preds of %u\n", g[s].index); @@ -642,6 +658,17 @@ bool pruneUsingSuccessors(NGHolder &g, NFAVertex u, som_type som) { DEBUG_PRINTF("%u bad reports\n", g[p].index); continue; } + /* the out-edges of a vertex that may be enabled on the same + * byte as the cyclic can only be killed by the out-edges of a + * peer vertex which will be enabled with the cyclic (a non-peer + * may not be switched on until another byte is processed). */ + if (!v_peer_of_cyclic + && sometimesEnabledConcurrently(u, p, g)) { + DEBUG_PRINTF("%u can only be squashed by a proper peer\n", + g[p].index); + continue; + } + if (g[p].char_reach.isSubsetOf(g[v].char_reach)) { dead.insert(e); changed = true; From 1a24b0b4dbb5fc8a7a56887ab7ba30136c6fcde2 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 11 Oct 2016 14:03:28 +1100 Subject: [PATCH 046/103] ng_equivalence: don't use ptr_vector Switch over ptr_vector to vector>. This works around some issues we were seeing with MSVC builds, where the contents of the ptr_vector were being destroyed when it was returned. --- src/nfagraph/ng_equivalence.cpp | 87 +++++++++++++++++---------------- 1 file changed, 45 insertions(+), 42 deletions(-) diff --git a/src/nfagraph/ng_equivalence.cpp b/src/nfagraph/ng_equivalence.cpp index 383b6c75..6f8f6532 100644 --- a/src/nfagraph/ng_equivalence.cpp +++ b/src/nfagraph/ng_equivalence.cpp @@ -38,17 +38,16 @@ #include "ng_util.h" #include "util/compile_context.h" #include "util/graph_range.h" +#include "util/make_unique.h" #include "util/ue2_containers.h" #include +#include #include #include #include -#include - using namespace std; -using boost::ptr_vector; namespace ue2 { @@ -276,47 +275,47 @@ bool hasEdgeAsserts(NFAVertex v, const NGHolder &g) { // populate VertexInfo table static -ptr_vector getVertexInfos(const NGHolder &g) { +vector> getVertexInfos(const NGHolder &g) { const size_t num_verts = num_vertices(g); - ptr_vector infos; + vector> infos; infos.reserve(num_verts * 2); vector vertex_map; // indexed by vertex_index property vertex_map.resize(num_verts); for (auto v : vertices_range(g)) { - VertexInfo *vi = new VertexInfo(v, g); - - // insert our new shiny VertexInfo into the info map - infos.push_back(vi); - - vertex_map[g[v].index] = vi; + infos.push_back(make_unique(v, g)); + vertex_map[g[v].index] = infos.back().get(); } - // now, go through each vertex and populate its predecessor and successor lists - for (VertexInfo &cur_vi : infos) { - // find predecessors - for (const auto &e : in_edges_range(cur_vi.v, g)) { - NFAVertex u = source(e, g); - VertexInfo *vmi = vertex_map[g[u].index]; + // now, go through each vertex and populate its predecessor and successor + // lists + for (auto &vi : infos) { + assert(vi); + NFAVertex v = vi->v; - cur_vi.pred_cr |= vmi->cr; - cur_vi.pred.insert(vmi); + // find predecessors + for (const auto &e : in_edges_range(v, g)) { + NFAVertex u = source(e, g); + VertexInfo *u_vi = vertex_map[g[u].index]; + + vi->pred_cr |= u_vi->cr; + vi->pred.insert(u_vi); // also set up edge tops if (is_triggered(g) && u == g.start) { - cur_vi.edge_tops = g[e].tops; + vi->edge_tops = g[e].tops; } } // find successors - for (auto w : adjacent_vertices_range(cur_vi.v, g)) { - VertexInfo *vmi = vertex_map[g[w].index]; - cur_vi.succ_cr |= vmi->cr; - cur_vi.succ.insert(vmi); + for (auto w : adjacent_vertices_range(v, g)) { + VertexInfo *w_vi = vertex_map[g[w].index]; + vi->succ_cr |= w_vi->cr; + vi->succ.insert(w_vi); } - assert(!hasEdgeAsserts(cur_vi.v, g)); + assert(!hasEdgeAsserts(vi->v, g)); } return infos; @@ -324,7 +323,7 @@ ptr_vector getVertexInfos(const NGHolder &g) { // store equivalence class in VertexInfo for each vertex static -vector partitionGraph(ptr_vector &infos, +vector partitionGraph(vector> &infos, WorkQueue &work_queue, const NGHolder &g, EquivalenceType eq) { const size_t num_verts = infos.size(); @@ -349,28 +348,30 @@ vector partitionGraph(ptr_vector &infos, } // partition the graph based on CharReach - for (VertexInfo &vi : infos) { + for (auto &vi : infos) { + assert(vi); + ClassInfo::ClassDepth depth; if (eq == LEFT_EQUIVALENCE) { - depth = depths[vi.vert_index]; + depth = depths[vi->vert_index]; } else { - depth = rdepths[vi.vert_index]; + depth = rdepths[vi->vert_index]; } - ClassInfo ci(g, vi, depth, eq); + ClassInfo ci(g, *vi, depth, eq); auto ii = classinfomap.find(ci); if (ii == classinfomap.end()) { // vertex is in a new equivalence class by itself. unsigned eq_class = classes.size(); - vi.equivalence_class = eq_class; - classes.push_back({&vi}); + vi->equivalence_class = eq_class; + classes.push_back({vi.get()}); classinfomap.emplace(move(ci), eq_class); } else { // vertex is added to an existing class. unsigned eq_class = ii->second; - vi.equivalence_class = eq_class; - classes.at(eq_class).insert(&vi); + vi->equivalence_class = eq_class; + classes.at(eq_class).insert(vi.get()); // we now know that this particular class has more than one // vertex, so we add it to the work queue @@ -500,8 +501,9 @@ bool require_separate_eod_vertex(const VertexInfoSet &vert_infos, } static -void mergeClass(ptr_vector &infos, NGHolder &g, unsigned eq_class, - VertexInfoSet &cur_class_vertices, set *toRemove) { +void mergeClass(vector> &infos, NGHolder &g, + unsigned eq_class, VertexInfoSet &cur_class_vertices, + set *toRemove) { DEBUG_PRINTF("Replacing %zd vertices from equivalence class %u with a " "single vertex.\n", cur_class_vertices.size(), eq_class); @@ -529,9 +531,9 @@ void mergeClass(ptr_vector &infos, NGHolder &g, unsigned eq_class, * props */ g[new_v].reports.clear(); /* populated as we pull in succs */ - VertexInfo *new_vertex_info = new VertexInfo(new_v, g); // store this vertex in our global vertex list - infos.push_back(new_vertex_info); + infos.push_back(make_unique(new_v, g)); + VertexInfo *new_vertex_info = infos.back().get(); NFAVertex new_v_eod = NGHolder::null_vertex(); VertexInfo *new_vertex_info_eod = nullptr; @@ -539,8 +541,8 @@ void mergeClass(ptr_vector &infos, NGHolder &g, unsigned eq_class, if (require_separate_eod_vertex(cur_class_vertices, g)) { new_v_eod = clone_vertex(g, old_v); g[new_v_eod].reports.clear(); - new_vertex_info_eod = new VertexInfo(new_v_eod, g); - infos.push_back(new_vertex_info_eod); + infos.push_back(make_unique(new_v_eod, g)); + new_vertex_info_eod = infos.back().get(); } const auto &edgetops = (*cur_class_vertices.begin())->edge_tops; @@ -627,7 +629,8 @@ void mergeClass(ptr_vector &infos, NGHolder &g, unsigned eq_class, // report behaviour with a single vertex). static bool mergeEquivalentClasses(vector &classes, - ptr_vector &infos, NGHolder &g) { + vector> &infos, + NGHolder &g) { bool merged = false; set toRemove; @@ -657,7 +660,7 @@ bool reduceGraphEquivalences(NGHolder &g, EquivalenceType eq_type) { // get information on every vertex in the graph // new vertices are allocated here, and stored in infos - ptr_vector infos = getVertexInfos(g); + auto infos = getVertexInfos(g); // partition the graph auto classes = partitionGraph(infos, work_queue, g, eq_type); From e108cb841f585e0d88e5f3c58e1f50d16cdf42e9 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 12 Oct 2016 09:49:09 +1100 Subject: [PATCH 047/103] ng_restructuring: wire start to tops in idx order --- src/nfagraph/ng_restructuring.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/nfagraph/ng_restructuring.cpp b/src/nfagraph/ng_restructuring.cpp index 46990330..3b30a689 100644 --- a/src/nfagraph/ng_restructuring.cpp +++ b/src/nfagraph/ng_restructuring.cpp @@ -52,7 +52,11 @@ namespace ue2 { static void wireStartToTops(NGHolder &g, const flat_set &tops, vector &tempEdges) { - for (NFAVertex v : tops) { + // Construct edges in vertex index order, for determinism. + vector ordered_tops(begin(tops), end(tops)); + sort(begin(ordered_tops), end(ordered_tops), make_index_ordering(g)); + + for (NFAVertex v : ordered_tops) { assert(!isLeafNode(v, g)); const NFAEdge &e = add_edge(g.start, v, g).first; From 21a1b47637f064515240188d351f54347fa188fa Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 12 Oct 2016 09:53:12 +1100 Subject: [PATCH 048/103] ng_limex: add edges in deterministic ordering --- src/nfagraph/ng_limex.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/nfagraph/ng_limex.cpp b/src/nfagraph/ng_limex.cpp index 5f782460..5e5a18d9 100644 --- a/src/nfagraph/ng_limex.cpp +++ b/src/nfagraph/ng_limex.cpp @@ -177,7 +177,12 @@ NFAVertex makeTopStartVertex(NGHolder &g, const flat_set &tops, NFAVertex u = add_vertex(g[g.start], g); CharReach top_cr = calcTopVertexReach(tops, top_reach); g[u].char_reach = top_cr; - for (auto v : succs) { + + // Add edges in vertex index order, for determinism. + vector ordered_succs(begin(succs), end(succs)); + sort(begin(ordered_succs), end(ordered_succs), make_index_ordering(g)); + + for (auto v : ordered_succs) { if (v == g.accept || v == g.acceptEod) { reporter = true; } From c67a3610804dd2299cf75f5746dd45a2744385b1 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 11 Oct 2016 11:39:26 +1100 Subject: [PATCH 049/103] smallwrite: prune overlong nfa graphs early --- src/smallwrite/smallwrite_build.cpp | 80 ++++++++++++++++++++++++++--- 1 file changed, 74 insertions(+), 6 deletions(-) diff --git a/src/smallwrite/smallwrite_build.cpp b/src/smallwrite/smallwrite_build.cpp index 90770ba5..65361c6f 100644 --- a/src/smallwrite/smallwrite_build.cpp +++ b/src/smallwrite/smallwrite_build.cpp @@ -36,10 +36,11 @@ #include "nfa/rdfa_merge.h" #include "nfa/shengcompile.h" #include "nfagraph/ng.h" +#include "nfagraph/ng_depth.h" #include "nfagraph/ng_holder.h" #include "nfagraph/ng_mcclellan.h" +#include "nfagraph/ng_prune.h" #include "nfagraph/ng_util.h" -#include "nfagraph/ng_width.h" #include "smallwrite/smallwrite_internal.h" #include "util/alloc.h" #include "util/charreach.h" @@ -101,6 +102,74 @@ SmallWriteBuildImpl::SmallWriteBuildImpl(size_t num_patterns, || num_patterns > cc.grey.smallWriteMaxPatterns) { } +/** + * \brief Remove any reports from the given vertex that cannot match within + * max_depth due to their constraints. + */ +static +bool pruneOverlongReports(NFAVertex v, NGHolder &g, const depth &max_depth, + const ReportManager &rm) { + assert(!g[v].reports.empty()); + + vector bad_reports; + + for (ReportID id : g[v].reports) { + const auto &report = rm.getReport(id); + if (report.minOffset > max_depth) { + bad_reports.push_back(id); + } + } + + for (ReportID id : bad_reports) { + g[v].reports.erase(id); + } + + if (g[v].reports.empty()) { + DEBUG_PRINTF("none of vertex %u's reports can match, cut accepts\n", + g[v].index); + remove_edge(v, g.accept, g); + remove_edge(v, g.acceptEod, g); + } + + return !bad_reports.empty(); +} + +/** + * \brief Prune vertices and reports from the graph that cannot match within + * max_depth. + */ +static +bool pruneOverlong(NGHolder &g, const depth &max_depth, + const ReportManager &rm) { + bool modified = false; + std::vector depths; + calcDepths(g, depths); + + for (auto v : vertices_range(g)) { + if (is_special(v, g)) { + continue; + } + const auto &d = depths.at(g[v].index); + depth min_depth = min(d.fromStart.min, d.fromStartDotStar.min); + if (min_depth > max_depth) { + clear_vertex(v, g); + modified = true; + continue; + } + + if (is_match_vertex(v, g)) { + modified |= pruneOverlongReports(v, g, max_depth, rm); + } + } + + if (modified) { + pruneUseless(g); + DEBUG_PRINTF("pruned graph down to %zu vertices\n", num_vertices(g)); + } + + return modified; +} + void SmallWriteBuildImpl::add(const NGWrapper &w) { // If the graph is poisoned (i.e. we can't build a SmallWrite version), // we don't even try. @@ -118,13 +187,12 @@ void SmallWriteBuildImpl::add(const NGWrapper &w) { // make a copy of the graph so that we can modify it for our purposes unique_ptr h = cloneHolder(w); + pruneOverlong(*h, depth(cc.grey.smallWriteLargestBuffer), rm); + reduceGraph(*h, SOM_NONE, w.utf8, cc); - // If the earliest match location is outside the small write region, - // then we don't need to build a SmallWrite version. - // However, we don't poison this case either, since it is simply a case, - // where we know the resulting graph won't match. - if (findMinWidth(*h) > depth(cc.grey.smallWriteLargestBuffer)) { + if (can_never_match(*h)) { + DEBUG_PRINTF("graph can never match in small block\n"); return; } From 054749f9ee22cf4bdbe700ab2c86136f520d6499 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 11 Oct 2016 15:36:16 +1100 Subject: [PATCH 050/103] smallwrite: minimize DFAs if they have been pruned --- src/smallwrite/smallwrite_build.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/smallwrite/smallwrite_build.cpp b/src/smallwrite/smallwrite_build.cpp index 65361c6f..d395a7af 100644 --- a/src/smallwrite/smallwrite_build.cpp +++ b/src/smallwrite/smallwrite_build.cpp @@ -30,6 +30,7 @@ #include "grey.h" #include "ue2common.h" +#include "nfa/dfa_min.h" #include "nfa/mcclellancompile.h" #include "nfa/mcclellancompile_util.h" #include "nfa/nfa_internal.h" @@ -208,7 +209,9 @@ void SmallWriteBuildImpl::add(const NGWrapper &w) { return; } - prune_overlong(*r, cc.grey.smallWriteLargestBuffer); + if (prune_overlong(*r, cc.grey.smallWriteLargestBuffer)) { + minimize_hopcroft(*r, cc.grey); + } if (rdfa) { // do a merge of the new dfa with the existing dfa @@ -418,6 +421,7 @@ aligned_unique_ptr prepEngine(raw_dfa &rdfa, u32 roseQuality, return nullptr; } if (prune_overlong(rdfa, *small_region - *start_offset)) { + minimize_hopcroft(rdfa, cc.grey); if (rdfa.start_anchored == DEAD_STATE) { DEBUG_PRINTF("all patterns pruned out\n"); return nullptr; From 91a7ce1cda6b90c9fa2d1b5a81dbe2f019a8d64f Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 12 Oct 2016 14:59:20 +1100 Subject: [PATCH 051/103] getData256(): data needs to be 32-byte aligned --- src/rose/program_runtime.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index 3c94f543..5b2c829f 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -875,7 +875,7 @@ m256 getData256(const struct core_info *ci, s64a offset, u32 *valid_data_mask) { *valid_data_mask = ~0u; return loadu256(ci->buf + offset); } - ALIGN_DIRECTIVE u8 data[sizeof(m256)]; + ALIGN_AVX_DIRECTIVE u8 data[sizeof(m256)]; *valid_data_mask = getBufferDataComplex(ci, offset, data, 32); return *(m256 *)data; } From 29472c7b7142111d54ed0ee54e86d7367ddff5b1 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 14 Oct 2016 09:14:28 +1100 Subject: [PATCH 052/103] rose_dump: remove stray newline --- src/rose/rose_dump.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index 1ab11f9f..36156a42 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -285,7 +285,7 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { const u8 *reach_base = base + t->lookaroundReachOffset; const u8 *reach = reach_base + ri->reach_index * REACH_BITVECTOR_LEN; - os << " contents:" << endl; + os << " contents "; describeClass(os, bitvectorToReach(reach), 1000, CC_OUT_TEXT); os << endl; } From 2341fe7baaba71c3abadb3fd41f45df2d67e906d Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Thu, 27 Oct 2016 14:26:00 +1100 Subject: [PATCH 053/103] use stable_sort in analysis from 47f53f6; missed review comment --- src/nfagraph/ng_misc_opt.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nfagraph/ng_misc_opt.cpp b/src/nfagraph/ng_misc_opt.cpp index 584c001f..716802ba 100644 --- a/src/nfagraph/ng_misc_opt.cpp +++ b/src/nfagraph/ng_misc_opt.cpp @@ -634,7 +634,7 @@ bool pruneUsingSuccessors(NGHolder &g, NFAVertex u, som_type som) { } u_succs.push_back(v); } - sort(u_succs.begin(), u_succs.end(), + stable_sort(u_succs.begin(), u_succs.end(), [&](NFAVertex a, NFAVertex b) { return g[a].char_reach.count() > g[b].char_reach.count(); }); From 05683655cb4b08e2fd6703591968cc22eece47ef Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Thu, 27 Oct 2016 14:29:34 +1100 Subject: [PATCH 054/103] remove unused define and old inlining controls --- src/nfa/mcclellan.c | 4 ++-- src/nfa/mcclellan_common_impl.h | 8 -------- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/src/nfa/mcclellan.c b/src/nfa/mcclellan.c index 992f78e2..b8ca75e9 100644 --- a/src/nfa/mcclellan.c +++ b/src/nfa/mcclellan.c @@ -583,7 +583,7 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, } } -static really_inline really_flatten +static really_inline char nfaExecMcClellan16_Bi(const struct NFA *n, u64a offset, const u8 *buffer, size_t length, NfaCallback cb, void *context, char single) { @@ -732,7 +732,7 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, } } -static really_inline really_flatten +static really_inline char nfaExecMcClellan8_Bi(const struct NFA *n, u64a offset, const u8 *buffer, size_t length, NfaCallback cb, void *context, char single) { diff --git a/src/nfa/mcclellan_common_impl.h b/src/nfa/mcclellan_common_impl.h index e3bcf43e..4906ce5b 100644 --- a/src/nfa/mcclellan_common_impl.h +++ b/src/nfa/mcclellan_common_impl.h @@ -26,14 +26,6 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#if defined(__INTEL_COMPILER) || defined(__clang__) || defined(_WIN32) || defined(__GNUC__) && (__GNUC__ < 4) -#define really_flatten -#else -#define really_flatten __attribute__ ((flatten)) -#endif - -#define CASE_MASK 0xdf - enum MatchMode { CALLBACK_OUTPUT, STOP_AT_MATCH, From e1e9010cac3bc1b23ef5caf5f94b2984ca82a881 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Wed, 24 Aug 2016 16:12:51 +1000 Subject: [PATCH 055/103] Introduce custom adjacency-list based graph --- CMakeLists.txt | 2 +- src/compiler/asserts.cpp | 25 +- src/nfa/accel_dfa_build_strat.cpp | 9 - src/nfa/castlecompile.cpp | 3 +- src/nfa/limex_compile.cpp | 25 +- src/nfagraph/ng.cpp | 1 + src/nfagraph/ng_anchored_dots.cpp | 29 +- src/nfagraph/ng_asserts.cpp | 18 +- src/nfagraph/ng_builder.cpp | 20 +- src/nfagraph/ng_calc_components.cpp | 20 +- src/nfagraph/ng_cyclic_redundancy.cpp | 49 +- src/nfagraph/ng_depth.cpp | 68 +- src/nfagraph/ng_dominators.cpp | 30 +- src/nfagraph/ng_dump.cpp | 12 +- src/nfagraph/ng_edge_redundancy.cpp | 14 +- src/nfagraph/ng_equivalence.cpp | 6 +- src/nfagraph/ng_execute.cpp | 13 +- src/nfagraph/ng_expr_info.cpp | 4 +- src/nfagraph/ng_extparam.cpp | 29 +- src/nfagraph/ng_fixed_width.cpp | 4 +- src/nfagraph/ng_graph.h | 114 -- src/nfagraph/ng_haig.cpp | 8 +- src/nfagraph/ng_holder.cpp | 163 +-- src/nfagraph/ng_holder.h | 266 ++-- src/nfagraph/ng_is_equal.cpp | 22 +- src/nfagraph/ng_limex.cpp | 27 +- src/nfagraph/ng_limex_accel.cpp | 8 +- src/nfagraph/ng_literal_analysis.cpp | 5 +- src/nfagraph/ng_literal_component.cpp | 6 +- src/nfagraph/ng_literal_decorated.cpp | 15 +- src/nfagraph/ng_mcclellan.cpp | 2 +- src/nfagraph/ng_misc_opt.cpp | 42 +- src/nfagraph/ng_netflow.cpp | 16 +- src/nfagraph/ng_prefilter.cpp | 18 +- src/nfagraph/ng_prune.cpp | 52 +- src/nfagraph/ng_puff.cpp | 28 +- src/nfagraph/ng_redundancy.cpp | 54 +- src/nfagraph/ng_region.cpp | 61 +- src/nfagraph/ng_region.h | 8 +- src/nfagraph/ng_region_redundancy.cpp | 16 +- src/nfagraph/ng_repeat.cpp | 169 ++- src/nfagraph/ng_restructuring.cpp | 12 +- src/nfagraph/ng_rose.cpp | 43 +- src/nfagraph/ng_small_literal_set.cpp | 8 +- src/nfagraph/ng_som.cpp | 87 +- src/nfagraph/ng_som_add_redundancy.cpp | 6 +- src/nfagraph/ng_som_util.cpp | 30 +- src/nfagraph/ng_split.cpp | 6 +- src/nfagraph/ng_squash.cpp | 25 +- src/nfagraph/ng_uncalc_components.cpp | 8 +- src/nfagraph/ng_uncalc_components.h | 3 +- src/nfagraph/ng_undirected.h | 29 +- src/nfagraph/ng_utf8.cpp | 16 +- src/nfagraph/ng_util.cpp | 121 +- src/nfagraph/ng_util.h | 80 +- src/nfagraph/ng_violet.cpp | 15 +- src/nfagraph/ng_width.cpp | 27 +- src/rose/rose_build_add.cpp | 57 +- src/rose/rose_build_add_mask.cpp | 2 +- src/rose/rose_build_anchored.cpp | 2 +- src/rose/rose_build_bytecode.cpp | 72 +- src/rose/rose_build_castle.cpp | 4 +- src/rose/rose_build_compile.cpp | 43 +- src/rose/rose_build_convert.cpp | 32 +- src/rose/rose_build_dump.cpp | 12 +- src/rose/rose_build_groups.cpp | 11 +- src/rose/rose_build_impl.h | 3 - src/rose/rose_build_infix.cpp | 6 +- src/rose/rose_build_lookaround.cpp | 4 +- src/rose/rose_build_matchers.cpp | 24 +- src/rose/rose_build_merge.cpp | 48 +- src/rose/rose_build_misc.cpp | 26 +- src/rose/rose_build_role_aliasing.cpp | 54 +- src/rose/rose_build_util.h | 25 - src/rose/rose_build_width.cpp | 15 +- src/rose/rose_graph.h | 22 +- src/rose/rose_in_dump.cpp | 2 +- src/rose/rose_in_graph.h | 12 +- src/rose/rose_in_util.cpp | 27 +- src/smallwrite/smallwrite_build.cpp | 2 +- src/som/slot_manager.h | 4 +- src/util/dump_charclass.cpp | 11 +- src/util/dump_charclass.h | 5 + src/util/graph.h | 157 +-- src/util/graph_range.h | 3 +- src/util/ue2_graph.h | 1083 +++++++++++++++ unit/internal/graph.cpp | 1689 ++++++++++++++++++++++-- unit/internal/nfagraph_equivalence.cpp | 35 +- unit/internal/nfagraph_redundancy.cpp | 36 +- unit/internal/rose_build_merge.cpp | 1 - util/ng_corpus_generator.cpp | 14 +- util/ng_find_matches.cpp | 2 +- 92 files changed, 3730 insertions(+), 1812 deletions(-) delete mode 100644 src/nfagraph/ng_graph.h create mode 100644 src/util/ue2_graph.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 6f506e9b..8def2baf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -711,7 +711,6 @@ SET (hs_SRCS src/nfagraph/ng_extparam.h src/nfagraph/ng_fixed_width.cpp src/nfagraph/ng_fixed_width.h - src/nfagraph/ng_graph.h src/nfagraph/ng_haig.cpp src/nfagraph/ng_haig.h src/nfagraph/ng_holder.cpp @@ -933,6 +932,7 @@ SET (hs_SRCS src/util/target_info.cpp src/util/target_info.h src/util/ue2_containers.h + src/util/ue2_graph.h src/util/ue2string.cpp src/util/ue2string.h src/util/unaligned.h diff --git a/src/compiler/asserts.cpp b/src/compiler/asserts.cpp index 0365e268..e67fd8bc 100644 --- a/src/compiler/asserts.cpp +++ b/src/compiler/asserts.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -117,11 +117,11 @@ typedef map, NFAEdge> edge_cache_t; static void replaceAssertVertex(NGWrapper &g, NFAVertex t, edge_cache_t &edge_cache, u32 &assert_edge_count) { - DEBUG_PRINTF("replacing assert vertex %u\n", g[t].index); + DEBUG_PRINTF("replacing assert vertex %zu\n", g[t].index); const u32 flags = g[t].assert_flags; - DEBUG_PRINTF("consider assert vertex %u with flags %u\n", - g[t].index, flags); + DEBUG_PRINTF("consider assert vertex %zu with flags %u\n", g[t].index, + flags); // Wire up all the predecessors to all the successors. @@ -142,7 +142,7 @@ void replaceAssertVertex(NGWrapper &g, NFAVertex t, edge_cache_t &edge_cache, for (const auto &outEdge : out_edges_range(t, g)) { NFAVertex v = target(outEdge, g); - DEBUG_PRINTF("consider path [%u,%u,%u]\n", g[u].index, + DEBUG_PRINTF("consider path [%zu,%zu,%zu]\n", g[u].index, g[t].index, g[v].index); if (v == t) { @@ -173,8 +173,7 @@ void replaceAssertVertex(NGWrapper &g, NFAVertex t, edge_cache_t &edge_cache, auto cache_key = make_pair(u, v); auto ecit = edge_cache.find(cache_key); if (ecit == edge_cache.end()) { - DEBUG_PRINTF("adding edge %u %u\n", g[u].index, - g[v].index); + DEBUG_PRINTF("adding edge %zu %zu\n", g[u].index, g[v].index); NFAEdge e = add_edge(u, v, g).first; edge_cache.emplace(cache_key, e); g[e].assert_flags = flags; @@ -184,7 +183,7 @@ void replaceAssertVertex(NGWrapper &g, NFAVertex t, edge_cache_t &edge_cache, } } else { NFAEdge e = ecit->second; - DEBUG_PRINTF("updating edge %u %u [a %u]\n", g[u].index, + DEBUG_PRINTF("updating edge %zu %zu [a %zu]\n", g[u].index, g[v].index, g[t].index); // Edge already exists. u32 &e_flags = g[e].assert_flags; @@ -211,8 +210,7 @@ void setReportId(ReportManager &rm, NGWrapper &g, NFAVertex v, s32 adj) { Report r = rm.getBasicInternalReport(g, adj); g[v].reports.insert(rm.getInternalId(r)); - DEBUG_PRINTF("set report id for vertex %u, adj %d\n", - g[v].index, adj); + DEBUG_PRINTF("set report id for vertex %zu, adj %d\n", g[v].index, adj); } static @@ -222,8 +220,7 @@ void checkForMultilineStart(ReportManager &rm, NGWrapper &g) { if (!(g[v].assert_flags & POS_FLAG_MULTILINE_START)) { continue; } - DEBUG_PRINTF("mls %u %08x\n", g[v].index, - g[v].assert_flags); + DEBUG_PRINTF("mls %zu %08x\n", g[v].index, g[v].assert_flags); /* we have found a multi-line start (maybe more than one) */ @@ -299,8 +296,8 @@ void removeAssertVertices(ReportManager &rm, NGWrapper &g) { DEBUG_PRINTF("resolved %zu assert vertices\n", num); pruneUseless(g); pruneEmptyVertices(g); - g.renumberVertices(); - g.renumberEdges(); + renumber_vertices(g); + renumber_edges(g); } DEBUG_PRINTF("after: graph has %zu vertices\n", num_vertices(g)); diff --git a/src/nfa/accel_dfa_build_strat.cpp b/src/nfa/accel_dfa_build_strat.cpp index ba21adc7..70d2d103 100644 --- a/src/nfa/accel_dfa_build_strat.cpp +++ b/src/nfa/accel_dfa_build_strat.cpp @@ -56,15 +56,6 @@ struct path { }; }; -static UNUSED -string describeClasses(const vector &v) { - std::ostringstream oss; - for (const auto &cr : v) { - describeClass(oss, cr); - } - return oss.str(); -} - static void dump_paths(const vector &paths) { for (UNUSED const auto &p : paths) { diff --git a/src/nfa/castlecompile.cpp b/src/nfa/castlecompile.cpp index b76078f9..fb685f21 100644 --- a/src/nfa/castlecompile.cpp +++ b/src/nfa/castlecompile.cpp @@ -58,6 +58,7 @@ #include #include +#include #include using namespace std; @@ -981,7 +982,7 @@ unique_ptr makeHolder(const CastleProto &proto, addToHolder(*g, m.first, m.second); } - //dumpGraph("castle_holder.dot", g->g); + //dumpGraph("castle_holder.dot", *g); // Sanity checks. assert(allMatchStatesHaveReports(*g)); diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index b7ea93d9..481113e3 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -494,7 +494,7 @@ void nfaFindAccelSchemes(const NGHolder &g, // We want to skip any vertices that don't lead to at least one other // (self-loops don't count) vertex. if (!has_proper_successor(v, g)) { - DEBUG_PRINTF("skipping vertex %u\n", g[v].index); + DEBUG_PRINTF("skipping vertex %zu\n", g[v].index); continue; } @@ -502,7 +502,7 @@ void nfaFindAccelSchemes(const NGHolder &g, AccelScheme as; if (nfaCheckAccel(g, v, refined_cr, br_cyclic, &as, allow_wide)) { - DEBUG_PRINTF("graph vertex %u is accelerable with offset %u.\n", + DEBUG_PRINTF("graph vertex %zu is accelerable with offset %u.\n", g[v].index, as.offset); (*out)[v] = as; } @@ -514,7 +514,7 @@ struct fas_visitor : public boost::default_bfs_visitor { ue2::unordered_map *out_in) : accel_map(am_in), out(out_in) {} - void discover_vertex(NFAVertex v, const NFAGraph &) { + void discover_vertex(NFAVertex v, const NGHolder &) { if (accel_map.find(v) != accel_map.end()) { (*out)[v] = accel_map.find(v)->second; } @@ -552,11 +552,10 @@ void filterAccelStates(NGHolder &g, const map> &tops, try { vector colour(num_vertices(g)); - breadth_first_search( - g.g, g.start, + boost::breadth_first_search(g, g.start, visitor(fas_visitor(*accel_map, &out)) - .color_map(make_iterator_property_map( - colour.begin(), get(&NFAGraphVertexProps::index, g.g)))); + .color_map(make_iterator_property_map(colour.begin(), + get(vertex_index, g)))); } catch (fas_visitor *) { ; /* found max accel_states */ } @@ -628,7 +627,7 @@ void fillAccelInfo(build_info &bi) { /* for each subset of the accel keys need to find an accel scheme */ assert(astates.size() < 32); - sort(astates.begin(), astates.end(), make_index_ordering(g)); + sort(astates.begin(), astates.end()); for (u32 i = 1, i_end = 1U << astates.size(); i < i_end; i++) { DEBUG_PRINTF("saving info for accel %u\n", i); @@ -2335,8 +2334,7 @@ bool isSane(const NGHolder &h, const map> &tops, for (auto v : vertices_range(h)) { if (!contains(state_ids, v)) { - DEBUG_PRINTF("no entry for vertex %u in state map\n", - h[v].index); + DEBUG_PRINTF("no entry for vertex %zu in state map\n", h[v].index); return false; } const u32 i = state_ids.at(v); @@ -2344,8 +2342,7 @@ bool isSane(const NGHolder &h, const map> &tops, continue; } - DEBUG_PRINTF("checking vertex %u (state %u)\n", h[v].index, - i); + DEBUG_PRINTF("checking vertex %zu (state %u)\n", h[v].index, i); if (i >= num_states || contains(seen, i)) { DEBUG_PRINTF("vertex %u/%u has invalid state\n", i, num_states); @@ -2355,7 +2352,7 @@ bool isSane(const NGHolder &h, const map> &tops, // All our states should be reachable and have a state assigned. if (h[v].char_reach.none()) { - DEBUG_PRINTF("vertex %u has empty reachability\n", h[v].index); + DEBUG_PRINTF("vertex %zu has empty reachability\n", h[v].index); return false; } @@ -2363,7 +2360,7 @@ bool isSane(const NGHolder &h, const map> &tops, // must have at least one predecessor that is not itself. if (v != h.start && v != h.startDs && !contains(top_starts, v) && !proper_in_degree(v, h)) { - DEBUG_PRINTF("vertex %u has no pred\n", h[v].index); + DEBUG_PRINTF("vertex %zu has no pred\n", h[v].index); return false; } } diff --git a/src/nfagraph/ng.cpp b/src/nfagraph/ng.cpp index 071e5c63..dff9c7e8 100644 --- a/src/nfagraph/ng.cpp +++ b/src/nfagraph/ng.cpp @@ -203,6 +203,7 @@ static bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som, const u32 comp_id) { const CompileContext &cc = ng.cc; + assert(hasCorrectlyNumberedVertices(g)); DEBUG_PRINTF("expr=%u, comp=%u: %zu vertices, %zu edges\n", w.expressionIndex, comp_id, num_vertices(g), num_edges(g)); diff --git a/src/nfagraph/ng_anchored_dots.cpp b/src/nfagraph/ng_anchored_dots.cpp index ba352e60..ed9c7f48 100644 --- a/src/nfagraph/ng_anchored_dots.cpp +++ b/src/nfagraph/ng_anchored_dots.cpp @@ -202,7 +202,7 @@ void reformAnchoredRepeatsComponent(NGHolder &g, } if (!isStartNode(dotV, g.start, g, true)) { - DEBUG_PRINTF("fleeing: vertex %u has other preds\n", g[dotV].index); + DEBUG_PRINTF("fleeing: vertex %zu has other preds\n", g[dotV].index); return; } @@ -249,7 +249,7 @@ void reformAnchoredRepeatsComponent(NGHolder &g, remove_edge(g.start, v, g); } - DEBUG_PRINTF("removing vertex %u\n", g[dotV].index); + DEBUG_PRINTF("removing vertex %zu\n", g[dotV].index); clear_vertex(dotV, g); dead.insert(dotV); compAnchoredStarts.erase(dotV); @@ -313,14 +313,15 @@ void reformUnanchoredRepeatsComponent(NGHolder &g, } // A self-loop indicates that this is a '.+' or '.*' - DEBUG_PRINTF("self-loop detected on %u\n", g[dotV].index); + DEBUG_PRINTF("self-loop detected on %zu\n", g[dotV].index); *startEnd = depth::infinity(); remove_edge(dotV, dotV, g); return; } if (!isStartNode(dotV, g.startDs, g, true)) { - DEBUG_PRINTF("fleeing: vertex %u has other preds\n", g[dotV].index); + DEBUG_PRINTF("fleeing: vertex %zu has other preds\n", + g[dotV].index); return; } @@ -362,14 +363,14 @@ void reformUnanchoredRepeatsComponent(NGHolder &g, compUnanchoredStarts.clear(); for (auto t : adjacent_vertices_range(dotV, g)) { if (t != dotV) { - DEBUG_PRINTF("connecting sds -> %u\n", g[t].index); + DEBUG_PRINTF("connecting sds -> %zu\n", g[t].index); add_edge(g.startDs, t, g); add_edge(g.start, t, g); compUnanchoredStarts.insert(t); } } - DEBUG_PRINTF("removing vertex %u\n", g[dotV].index); + DEBUG_PRINTF("removing vertex %zu\n", g[dotV].index); dead.insert(dotV); clear_vertex(dotV, g); compUnanchoredStarts.erase(dotV); @@ -416,7 +417,7 @@ bool gatherParticipants(const NGHolder &g, if (isOptionalDot(t, v, g)) { // another dot; bail if we've seen it once already if (dots.find(t) != dots.end()) { - DEBUG_PRINTF("cycle detected at vertex %u\n", g[t].index); + DEBUG_PRINTF("cycle detected at vertex %zu\n", g[t].index); return false; } dots.insert(t); @@ -432,7 +433,7 @@ bool gatherParticipants(const NGHolder &g, for (auto w : adjacent_vertices_range(v, g)) { succ.insert(w); if (!edge(start, w, g).second) { - DEBUG_PRINTF("failing, vertex %u does not have edge from start\n", + DEBUG_PRINTF("failing, vertex %zu does not have edge from start\n", g[w].index); return false; } @@ -474,7 +475,7 @@ void collapseVariableDotRepeat(NGHolder &g, NFAVertex start, return; } initialDot = v; - DEBUG_PRINTF("initial dot vertex is %u\n", g[v].index); + DEBUG_PRINTF("initial dot vertex is %zu\n", g[v].index); } } @@ -507,12 +508,8 @@ void collapseVariableDotRepeat(NGHolder &g, NFAVertex start, } assert(startEnd->is_reachable()); - // For determinism, copy and sort our successor vertices. - deque s(succ.begin(), succ.end()); - sort(s.begin(), s.end(), make_index_ordering(g)); - // Connect our successor vertices to both start and startDs. - for (auto v : s) { + for (auto v : succ) { add_edge_if_not_present(g.start, v, g); add_edge_if_not_present(g.startDs, v, g); } @@ -637,8 +634,8 @@ void restoreLeadingDots(NGHolder &g, const depth &startBegin, } addDotsBetween(g, root, rhs, startBegin, startEnd); - g.renumberVertices(); - g.renumberEdges(); + renumber_vertices(g); + renumber_edges(g); } // Entry point. diff --git a/src/nfagraph/ng_asserts.cpp b/src/nfagraph/ng_asserts.cpp index e9e39345..e0d43e7b 100644 --- a/src/nfagraph/ng_asserts.cpp +++ b/src/nfagraph/ng_asserts.cpp @@ -101,7 +101,7 @@ vector getAsserts(const NGHolder &g) { static void addToSplit(const NGHolder &g, NFAVertex v, map *to_split) { - DEBUG_PRINTF("%u needs splitting\n", g[v].index); + DEBUG_PRINTF("%zu needs splitting\n", g[v].index); to_split->emplace(g[v].index, v); } @@ -194,7 +194,7 @@ void setReportId(ReportManager &rm, NGWrapper &g, NFAVertex v, s32 adj) { Report ir = rm.getBasicInternalReport(g, adj); g[v].reports.insert(rm.getInternalId(ir)); - DEBUG_PRINTF("set report id for vertex %u, adj %d\n", g[v].index, adj); + DEBUG_PRINTF("set report id for vertex %zu, adj %d\n", g[v].index, adj); } static @@ -224,7 +224,7 @@ void splitVertex(ReportManager &rm, NGWrapper &g, NFAVertex v, bool ucp) { assert(v != g.start); assert(v != g.accept); assert(v != g.acceptEod); - DEBUG_PRINTF("partitioning vertex %u ucp:%d\n", g[v].index, (int)ucp); + DEBUG_PRINTF("partitioning vertex %zu ucp:%d\n", g[v].index, (int)ucp); CharReach cr_word = ucp ? CHARREACH_WORD_UCP_PRE : CHARREACH_WORD; CharReach cr_nonword = ucp ? CHARREACH_NONWORD_UCP_PRE : CHARREACH_NONWORD; @@ -267,8 +267,8 @@ void resolveEdges(ReportManager &rm, NGWrapper &g, set *dead) { bool impassable = true; bool ucp = flags & UCP_ASSERT_FLAGS; - DEBUG_PRINTF("resolving edge %u->%u (flags=0x%x, ucp=%d)\n", g[u].index, - g[v].index, flags, (int)ucp); + DEBUG_PRINTF("resolving edge %zu->%zu (flags=0x%x, ucp=%d)\n", + g[u].index, g[v].index, flags, (int)ucp); while (flags && impassable) { u32 flag = 1U << findAndClearLSB_32(&flags); switch (flag) { @@ -482,12 +482,12 @@ void resolveAsserts(ReportManager &rm, NGWrapper &g) { resolveEdges(rm, g, &dead); remove_edges(dead, g); - g.renumberVertices(); + renumber_vertices(g); pruneUseless(g); pruneEmptyVertices(g); - g.renumberVertices(); - g.renumberEdges(); + renumber_vertices(g); + renumber_edges(g); clearReports(g); } @@ -552,7 +552,7 @@ void ensureCodePointStart(ReportManager &rm, NGWrapper &g) { add_edge(g.start, v_4, g); add_edge(g.startDs, v_4, g); remove_edge(orig, g); - g.renumberEdges(); + renumber_edges(g); clearReports(g); } } diff --git a/src/nfagraph/ng_builder.cpp b/src/nfagraph/ng_builder.cpp index 8a92b7ee..6e1ea71e 100644 --- a/src/nfagraph/ng_builder.cpp +++ b/src/nfagraph/ng_builder.cpp @@ -132,7 +132,7 @@ NFAVertex NFABuilderImpl::getVertex(Position pos) const { assert(id2vertex.size() >= pos); const NFAVertex v = id2vertex[pos]; assert(v != NGHolder::null_vertex()); - assert(graph->g[v].index == pos); + assert((*graph)[v].index == pos); return v; } @@ -147,7 +147,7 @@ void NFABuilderImpl::addVertex(Position pos) { id2vertex.resize(pos + 1); } id2vertex[pos] = v; - graph->g[v].index = pos; + (*graph)[v].index = pos; } unique_ptr NFABuilderImpl::getGraph() { @@ -177,22 +177,22 @@ void NFABuilderImpl::setNodeReportID(Position pos, int offsetAdjust) { void NFABuilderImpl::addCharReach(Position pos, const CharReach &cr) { NFAVertex v = getVertex(pos); - graph->g[v].char_reach |= cr; + (*graph)[v].char_reach |= cr; } void NFABuilderImpl::setAssertFlag(Position pos, u32 flag) { NFAVertex v = getVertex(pos); - graph->g[v].assert_flags |= flag; + (*graph)[v].assert_flags |= flag; } u32 NFABuilderImpl::getAssertFlag(Position pos) { NFAVertex v = getVertex(pos); - return graph->g[v].assert_flags; + return (*graph)[v].assert_flags; } pair NFABuilderImpl::addEdge(NFAVertex u, NFAVertex v) { // assert that the edge doesn't already exist - assert(edge(u, v, graph->g).second == false); + assert(edge(u, v, *graph).second == false); pair e = add_edge(u, v, *graph); assert(e.second); @@ -209,16 +209,16 @@ void NFABuilderImpl::addEdge(Position startPos, Position endPos) { if ((u == graph->start || u == graph->startDs) && v == graph->startDs) { /* standard special -> special edges already exist */ - assert(edge(u, v, graph->g).second == true); + assert(edge(u, v, *graph).second == true); return; } - assert(edge(u, v, graph->g).second == false); + assert(edge(u, v, *graph).second == false); addEdge(u, v); } bool NFABuilderImpl::hasEdge(Position startPos, Position endPos) const { - return edge(getVertex(startPos), getVertex(endPos), graph->g).second; + return edge(getVertex(startPos), getVertex(endPos), *graph).second; } Position NFABuilderImpl::getStart() const { @@ -252,7 +252,7 @@ Position NFABuilderImpl::makePositions(size_t nPositions) { } void NFABuilderImpl::cloneRegion(Position first, Position last, unsigned posOffset) { - NFAGraph &g = graph->g; + NGHolder &g = *graph; assert(posOffset > 0); // walk the nodes between first and last and copy their vertex properties diff --git a/src/nfagraph/ng_calc_components.cpp b/src/nfagraph/ng_calc_components.cpp index 658e7001..da6775e4 100644 --- a/src/nfagraph/ng_calc_components.cpp +++ b/src/nfagraph/ng_calc_components.cpp @@ -162,7 +162,7 @@ flat_set findHeadShell(const NGHolder &g, } for (UNUSED auto v : shell) { - DEBUG_PRINTF("shell: %u\n", g[v].index); + DEBUG_PRINTF("shell: %zu\n", g[v].index); } return shell; @@ -184,7 +184,7 @@ flat_set findTailShell(const NGHolder &g, } for (UNUSED auto v : shell) { - DEBUG_PRINTF("shell: %u\n", g[v].index); + DEBUG_PRINTF("shell: %zu\n", g[v].index); } return shell; @@ -209,7 +209,8 @@ vector findShellEdges(const NGHolder &g, if ((is_special(u, g) || contains(head_shell, u)) && (is_special(v, g) || contains(tail_shell, v))) { - DEBUG_PRINTF("edge (%u,%u) is a shell edge\n", g[u].index, g[v].index); + DEBUG_PRINTF("edge (%zu,%zu) is a shell edge\n", g[u].index, + g[v].index); shell_edges.push_back(e); } } @@ -275,9 +276,8 @@ void splitIntoComponents(const NGHolder &g, deque> &comps, NFAUndirectedGraph ug; ue2::unordered_map old2new; - ue2::unordered_map newIdx2old; - createUnGraph(g.g, true, true, ug, old2new, newIdx2old); + createUnGraph(g, true, true, ug, old2new); // Construct reverse mapping. ue2::unordered_map new2old; @@ -313,7 +313,7 @@ void splitIntoComponents(const NGHolder &g, deque> &comps, assert(contains(new2old, uv)); NFAVertex v = new2old.at(uv); verts[c].push_back(v); - DEBUG_PRINTF("vertex %u is in comp %u\n", g[v].index, c); + DEBUG_PRINTF("vertex %zu is in comp %u\n", g[v].index, c); } ue2::unordered_map v_map; // temp map for fillHolder @@ -322,8 +322,9 @@ void splitIntoComponents(const NGHolder &g, deque> &comps, vv.insert(vv.end(), begin(head_shell), end(head_shell)); vv.insert(vv.end(), begin(tail_shell), end(tail_shell)); - // Sort by vertex index for determinism. - sort(begin(vv), end(vv), VertexIndexOrdering(g)); + /* Sort for determinism. Still required as NFAUndirectedVertex have + * no deterministic ordering (split_components map). */ + sort(begin(vv), end(vv)); auto gc = ue2::make_unique(); v_map.clear(); @@ -349,9 +350,6 @@ void splitIntoComponents(const NGHolder &g, deque> &comps, vv.insert(vv.end(), begin(head_shell), end(head_shell)); vv.insert(vv.end(), begin(tail_shell), end(tail_shell)); - // Sort by vertex index for determinism. - sort(begin(vv), end(vv), VertexIndexOrdering(g)); - auto gc = ue2::make_unique(); v_map.clear(); fillHolder(gc.get(), g, vv, &v_map); diff --git a/src/nfagraph/ng_cyclic_redundancy.cpp b/src/nfagraph/ng_cyclic_redundancy.cpp index e2272264..9ae4458c 100644 --- a/src/nfagraph/ng_cyclic_redundancy.cpp +++ b/src/nfagraph/ng_cyclic_redundancy.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -99,7 +99,7 @@ class SearchVisitor : public boost::default_dfs_visitor { template void discover_vertex(const Vertex &v, const Graph &g) const { - DEBUG_PRINTF("vertex %u\n", g[v].index); + DEBUG_PRINTF("vertex %zu\n", g[v].index); if (is_special(v, g)) { DEBUG_PRINTF("start or accept\n"); throw SearchFailed(); @@ -141,24 +141,16 @@ bool searchForward(const Graph &g, const CharReach &reach, } static -NFAEdge to_raw(const NFAEdge &e, const NFAGraph &, const NGHolder &) { +NFAEdge to_raw(const NFAEdge &e, const NGHolder &) { return e; } static -NFAEdge to_raw(const reverse_graph::edge_descriptor &e, - const reverse_graph &g, - const NGHolder &raw) { - /* clang doesn't seem to like edge_underlying */ - NFAVertex t = source(e, g); - NFAVertex s = target(e, g); - - assert(edge(s, t, raw).second); - - return edge(s, t, raw).first; +NFAEdge to_raw(const reverse_graph::edge_descriptor &e, + const reverse_graph &g) { + return get(boost::edge_underlying, g, e); } - /* returns true if we did stuff */ template static @@ -185,7 +177,7 @@ bool removeCyclicPathRedundancy(Graph &g, typename Graph::vertex_descriptor v, continue; } - DEBUG_PRINTF("- checking u %u\n", g[u].index); + DEBUG_PRINTF("- checking u %zu\n", g[u].index); // let s be intersection(succ(u), succ(v)) s.clear(); @@ -206,17 +198,18 @@ bool removeCyclicPathRedundancy(Graph &g, typename Graph::vertex_descriptor v, continue; } - DEBUG_PRINTF(" - checking w %u\n", g[w].index); + DEBUG_PRINTF(" - checking w %zu\n", g[w].index); - if (searchForward(g, reach, s, w)) { - DEBUG_PRINTF("removing edge (%u,%u)\n", - g[u].index, g[w].index); - /* we are currently iterating over the in-edges of v, so it - would be unwise to remove edges to v. However, */ - assert(w != v); /* as v is in s */ - remove_edge(to_raw(e_u, g, raw), raw); - did_stuff = true; + if (!searchForward(g, reach, s, w)) { + continue; } + + DEBUG_PRINTF("removing edge (%zu,%zu)\n", g[u].index, g[w].index); + /* we are currently iterating over the in-edges of v, so it + would be unwise to remove edges to v. However, */ + assert(w != v); /* as v is in s */ + remove_edge(to_raw(e_u, g), raw); + did_stuff = true; } } @@ -233,7 +226,7 @@ bool cyclicPathRedundancyPass(Graph &g, NGHolder &raw) { continue; } - DEBUG_PRINTF("examining cyclic vertex %u\n", g[v].index); + DEBUG_PRINTF("examining cyclic vertex %zu\n", g[v].index); did_stuff |= removeCyclicPathRedundancy(g, v, raw); } @@ -242,7 +235,7 @@ bool cyclicPathRedundancyPass(Graph &g, NGHolder &raw) { bool removeCyclicPathRedundancy(NGHolder &g) { // Forward pass. - bool f_changed = cyclicPathRedundancyPass(g.g, g); + bool f_changed = cyclicPathRedundancyPass(g, g); if (f_changed) { DEBUG_PRINTF("edges removed by forward pass\n"); pruneUseless(g); @@ -250,8 +243,8 @@ bool removeCyclicPathRedundancy(NGHolder &g) { // Reverse pass. DEBUG_PRINTF("REVERSE PASS\n"); - typedef reverse_graph RevGraph; - RevGraph revg(g.g); + typedef reverse_graph RevGraph; + RevGraph revg(g); bool r_changed = cyclicPathRedundancyPass(revg, g); if (r_changed) { DEBUG_PRINTF("edges removed by reverse pass\n"); diff --git a/src/nfagraph/ng_depth.cpp b/src/nfagraph/ng_depth.cpp index 8afa644a..5111b752 100644 --- a/src/nfagraph/ng_depth.cpp +++ b/src/nfagraph/ng_depth.cpp @@ -44,11 +44,14 @@ #include #include #include +#include using namespace std; using boost::filtered_graph; +using boost::make_filtered_graph; using boost::make_constant_property; using boost::reverse_graph; +using boost::adaptors::reverse; namespace ue2 { @@ -122,25 +125,23 @@ private: template static -void findLoopReachable(const GraphT &g, const NFAVertex srcVertex, +void findLoopReachable(const GraphT &g, + const typename GraphT::vertex_descriptor srcVertex, vector &deadNodes) { typedef typename GraphT::edge_descriptor EdgeT; + typedef typename GraphT::vertex_descriptor VertexT; typedef set EdgeSet; EdgeSet deadEdges; BackEdges be(deadEdges); - auto index_map = get(&NFAGraphVertexProps::index, g); - - depth_first_search(g, visitor(be).root_vertex(srcVertex).vertex_index_map( - index_map)); + depth_first_search(g, visitor(be).root_vertex(srcVertex)); auto af = make_bad_edge_filter(&deadEdges); auto acyclic_g = make_filtered_graph(g, af); - vector topoOrder; /* actually reverse topological order */ + vector topoOrder; /* actually reverse topological order */ topoOrder.reserve(deadNodes.size()); - topological_sort(acyclic_g, back_inserter(topoOrder), - vertex_index_map(index_map)); + topological_sort(acyclic_g, back_inserter(topoOrder)); for (const auto &e : deadEdges) { u32 srcIdx = g[source(e, g)].index; @@ -149,8 +150,7 @@ void findLoopReachable(const GraphT &g, const NFAVertex srcVertex, } } - for (auto it = topoOrder.rbegin(); it != topoOrder.rend(); ++it) { - NFAVertex v = *it; + for (VertexT v : reverse(topoOrder)) { for (const auto &e : in_edges_range(v, g)) { if (deadNodes[g[source(e, g)].index]) { deadNodes[g[v].index] = true; @@ -194,22 +194,20 @@ void calcDepthFromSource(const NGHolder &graph, const GraphT &g, using boost::make_iterator_property_map; - auto min_index_map = get(&NFAGraphVertexProps::index, mindist_g); + auto min_index_map = get(vertex_index, mindist_g); breadth_first_search(mindist_g, srcVertex, - boost::vertex_index_map(min_index_map). visitor(make_bfs_visitor(record_distances( - make_iterator_property_map( - dMin.begin(), min_index_map), + make_iterator_property_map(dMin.begin(), + min_index_map), boost::on_tree_edge())))); - auto max_index_map = get(&NFAGraphVertexProps::index, maxdist_g); + auto max_index_map = get(vertex_index, maxdist_g); dag_shortest_paths(maxdist_g, srcVertex, - boost::vertex_index_map(max_index_map). - distance_map(make_iterator_property_map(dMax.begin(), - max_index_map)). - weight_map(make_constant_property(-1))); + distance_map(make_iterator_property_map(dMax.begin(), + max_index_map)) + .weight_map(make_constant_property(-1))); for (size_t i = 0; i < numVerts; i++) { if (dMin[i] > DIST_UNREACHABLE) { @@ -285,14 +283,14 @@ void calcDepths(const NGHolder &g, std::vector &depths) { * reachable from a loop need to be removed */ vector deadNodes(numVertices); - findLoopReachable(g.g, g.start, deadNodes); + findLoopReachable(g, g.start, deadNodes); DEBUG_PRINTF("doing start\n"); - calcAndStoreDepth(g, g.g, g.start, deadNodes, dMin, dMax, - depths, &NFAVertexDepth::fromStart); + calcAndStoreDepth(g, g, g.start, deadNodes, dMin, dMax, depths, + &NFAVertexDepth::fromStart); DEBUG_PRINTF("doing startds\n"); - calcAndStoreDepth(g, g.g, g.startDs, deadNodes, dMin, dMax, - depths, &NFAVertexDepth::fromStartDotStar); + calcAndStoreDepth(g, g, g.startDs, deadNodes, dMin, dMax, depths, + &NFAVertexDepth::fromStartDotStar); } void calcDepths(const NGHolder &g, std::vector &depths) { @@ -305,8 +303,8 @@ void calcDepths(const NGHolder &g, std::vector &depths) { vector dMax; /* reverse the graph before walking it */ - typedef reverse_graph RevNFAGraph; - const RevNFAGraph rg(g.g); + typedef reverse_graph RevNFAGraph; + const RevNFAGraph rg(g); /* * create a filtered graph for max depth calculations: all nodes/edges @@ -340,20 +338,20 @@ void calcDepths(const NGHolder &g, vector &depths) { * reachable from a loop need to be removed */ vector deadNodes(numVertices); - findLoopReachable(g.g, g.start, deadNodes); + findLoopReachable(g, g.start, deadNodes); DEBUG_PRINTF("doing start\n"); - calcAndStoreDepth( - g, g.g, g.start, deadNodes, dMin, dMax, depths, + calcAndStoreDepth( + g, g, g.start, deadNodes, dMin, dMax, depths, &NFAVertexBidiDepth::fromStart); DEBUG_PRINTF("doing startds\n"); - calcAndStoreDepth( - g, g.g, g.startDs, deadNodes, dMin, dMax, depths, + calcAndStoreDepth( + g, g, g.startDs, deadNodes, dMin, dMax, depths, &NFAVertexBidiDepth::fromStartDotStar); /* Now go backwards */ - typedef reverse_graph RevNFAGraph; - const RevNFAGraph rg(g.g); + typedef reverse_graph RevNFAGraph; + const RevNFAGraph rg(g); deadNodes.assign(numVertices, false); findLoopReachable(rg, g.acceptEod, deadNodes); @@ -374,10 +372,10 @@ void calcDepthsFrom(const NGHolder &g, const NFAVertex src, const size_t numVertices = num_vertices(g); vector deadNodes(numVertices); - findLoopReachable(g.g, g.start, deadNodes); + findLoopReachable(g, g.start, deadNodes); vector dMin, dMax; - calcDepthFromSource(g, g.g, src, deadNodes, dMin, dMax); + calcDepthFromSource(g, g, src, deadNodes, dMin, dMax); depths.clear(); depths.resize(numVertices); diff --git a/src/nfagraph/ng_dominators.cpp b/src/nfagraph/ng_dominators.cpp index 05650aaf..d01af994 100644 --- a/src/nfagraph/ng_dominators.cpp +++ b/src/nfagraph/ng_dominators.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-16, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -48,37 +48,45 @@ using boost::make_iterator_property_map; namespace ue2 { template -ue2::unordered_map calcDominators(const Graph &g, - NFAVertex source) { +unordered_map calcDominators(const Graph &g, + typename Graph::vertex_descriptor source) { + using Vertex = typename Graph::vertex_descriptor; const size_t num_verts = num_vertices(g); auto index_map = get(&NFAGraphVertexProps::index, g); vector dfnum(num_verts, 0); - vector parents(num_verts, Graph::null_vertex()); + vector parents(num_verts, Graph::null_vertex()); auto dfnum_map = make_iterator_property_map(dfnum.begin(), index_map); auto parent_map = make_iterator_property_map(parents.begin(), index_map); - vector vertices_by_dfnum(num_verts, Graph::null_vertex()); + vector vertices_by_dfnum(num_verts, Graph::null_vertex()); // Output map. - unordered_map doms; + unordered_map doms; auto dom_map = make_assoc_property_map(doms); boost_ue2::lengauer_tarjan_dominator_tree(g, source, index_map, dfnum_map, parent_map, vertices_by_dfnum, dom_map); - return doms; + /* Translate back to an NFAVertex map */ + unordered_map doms2; + for (const auto &e : doms) { + NFAVertex f(e.first); + NFAVertex s(e.second); + doms2[f] = s; + } + return doms2; } -ue2::unordered_map findDominators(const NGHolder &g) { +unordered_map findDominators(const NGHolder &g) { assert(hasCorrectlyNumberedVertices(g)); - return calcDominators(g.g, g.start); + return calcDominators(g, g.start); } -ue2::unordered_map findPostDominators(const NGHolder &g) { +unordered_map findPostDominators(const NGHolder &g) { assert(hasCorrectlyNumberedVertices(g)); - return calcDominators(boost::reverse_graph(g.g), + return calcDominators(boost::reverse_graph(g), g.acceptEod); } diff --git a/src/nfagraph/ng_dump.cpp b/src/nfagraph/ng_dump.cpp index 7c1894a3..fc840f25 100644 --- a/src/nfagraph/ng_dump.cpp +++ b/src/nfagraph/ng_dump.cpp @@ -285,7 +285,7 @@ void dumpGraphImpl(const char *name, const GraphT &g, } // manual instantiation of templated dumpGraph above. -template void dumpGraphImpl(const char *, const NFAGraph &); +template void dumpGraphImpl(const char *, const NGHolder &); void dumpDotWrapperImpl(const NGWrapper &nw, const char *name, const Grey &grey) { @@ -293,7 +293,7 @@ void dumpDotWrapperImpl(const NGWrapper &nw, const char *name, stringstream ss; ss << grey.dumpPath << "Expr_" << nw.expressionIndex << "_" << name << ".dot"; DEBUG_PRINTF("dumping dot graph to '%s'\n", ss.str().c_str()); - dumpGraphImpl(ss.str().c_str(), nw.g); + dumpGraphImpl(ss.str().c_str(), nw); } } @@ -304,7 +304,7 @@ void dumpComponentImpl(const NGHolder &g, const char *name, u32 expr, ss << grey.dumpPath << "Comp_" << expr << "-" << comp << "_" << name << ".dot"; DEBUG_PRINTF("dumping dot graph to '%s'\n", ss.str().c_str()); - dumpGraphImpl(ss.str().c_str(), g.g); + dumpGraphImpl(ss.str().c_str(), g); } } @@ -315,7 +315,7 @@ void dumpSomSubComponentImpl(const NGHolder &g, const char *name, u32 expr, ss << grey.dumpPath << "Comp_" << expr << "-" << comp << "_" << name << "_" << plan << ".dot"; DEBUG_PRINTF("dumping dot graph to '%s'\n", ss.str().c_str()); - dumpGraphImpl(ss.str().c_str(), g.g); + dumpGraphImpl(ss.str().c_str(), g); } } @@ -325,7 +325,7 @@ void dumpHolderImpl(const NGHolder &h, unsigned int stageNumber, stringstream ss; ss << grey.dumpPath << "Holder_X_" << stageNumber << "-" << stageName << ".dot"; - dumpGraphImpl(ss.str().c_str(), h.g); + dumpGraphImpl(ss.str().c_str(), h); } } @@ -337,7 +337,7 @@ void dumpHolderImpl(const NGHolder &h, stringstream ss; ss << grey.dumpPath << "Holder_X_" << stageNumber << "-" << stageName << ".dot"; - dumpGraphImpl(ss.str().c_str(), h.g, region_map); + dumpGraphImpl(ss.str().c_str(), h, region_map); } } diff --git a/src/nfagraph/ng_edge_redundancy.cpp b/src/nfagraph/ng_edge_redundancy.cpp index 5944cfef..3ce62c41 100644 --- a/src/nfagraph/ng_edge_redundancy.cpp +++ b/src/nfagraph/ng_edge_redundancy.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -297,9 +297,8 @@ bool checkFwdCandidate(const NGHolder &g, NFAVertex fixed_src, return false; } - DEBUG_PRINTF("edge (%u, %u) killed by edge (%u, %u)\n", - g[w].index, g[v].index, - g[fixed_src].index, g[v].index); + DEBUG_PRINTF("edge (%zu, %zu) killed by edge (%zu, %zu)\n", + g[w].index, g[v].index, g[fixed_src].index, g[v].index); return true; } @@ -415,7 +414,7 @@ bool removeEdgeRedundancyFwd(NGHolder &g, bool ignore_starts) { pred(g, u, &parents_u); done.clear(); - if (hasGreaterOutDegree(1, u, g)) { + if (out_degree(u, g) > 1) { checkLargeOutU(g, u, parents_u, possible_w, done, &dead); } else { checkSmallOutU(g, u, parents_u, done, &dead); @@ -460,7 +459,7 @@ bool removeSiblingsOfStartDotStar(NGHolder &g) { vector dead; for (auto v : adjacent_vertices_range(g.startDs, g)) { - DEBUG_PRINTF("checking %u\n", g[v].index); + DEBUG_PRINTF("checking %zu\n", g[v].index); if (is_special(v, g)) { continue; } @@ -470,8 +469,7 @@ bool removeSiblingsOfStartDotStar(NGHolder &g) { if (is_special(u, g)) { continue; } - DEBUG_PRINTF("removing %u->%u\n", g[u].index, - g[v].index); + DEBUG_PRINTF("removing %zu->%zu\n", g[u].index, g[v].index); dead.push_back(e); } } diff --git a/src/nfagraph/ng_equivalence.cpp b/src/nfagraph/ng_equivalence.cpp index 6f8f6532..7e1f7c6f 100644 --- a/src/nfagraph/ng_equivalence.cpp +++ b/src/nfagraph/ng_equivalence.cpp @@ -77,7 +77,7 @@ public: flat_set pred; //!< predecessors of this vertex flat_set succ; //!< successors of this vertex NFAVertex v; - u32 vert_index; + size_t vert_index; CharReach cr; CharReach pred_cr; CharReach succ_cr; @@ -122,7 +122,7 @@ public: vertex_flags(vi.vertex_flags), edge_tops(vi.edge_tops), cr(vi.cr), adjacent_cr(eq == LEFT_EQUIVALENCE ? vi.pred_cr : vi.succ_cr), /* treat non-special vertices the same */ - node_type(min(g[vi.v].index, u32{N_SPECIALS})), depth(d_in) {} + node_type(min(g[vi.v].index, size_t{N_SPECIALS})), depth(d_in) {} bool operator==(const ClassInfo &b) const { return node_type == b.node_type && depth.d1 == b.depth.d1 && @@ -678,7 +678,7 @@ bool reduceGraphEquivalences(NGHolder &g, const CompileContext &cc) { DEBUG_PRINTF("equivalence processing disabled in grey box\n"); return false; } - g.renumberVertices(); + renumber_vertices(g); // Cheap check: if all the non-special vertices have in-degree one and // out-degree one, there's no redundancy in this here graph and we can diff --git a/src/nfagraph/ng_execute.cpp b/src/nfagraph/ng_execute.cpp index 4ffd89c0..9d904894 100644 --- a/src/nfagraph/ng_execute.cpp +++ b/src/nfagraph/ng_execute.cpp @@ -183,8 +183,6 @@ flat_set execute_graph(const NGHolder &g, return getVertices(work_states, info); } -typedef boost::reverse_graph RevNFAGraph; - namespace { class eg_visitor : public boost::default_dfs_visitor { public: @@ -195,13 +193,14 @@ public: info(info_in), input_g(input_g_in), states(states_in), succs(vertex_count) {} - void finish_vertex(NFAVertex input_v, const RevNFAGraph &) { + void finish_vertex(NFAVertex input_v, + const boost::reverse_graph &) { if (input_v == input_g.accept) { return; } assert(input_v != input_g.acceptEod); - DEBUG_PRINTF("finished p%u\n", input_g[input_v].index); + DEBUG_PRINTF("finished p%zu\n", input_g[input_v].index); /* finish vertex is called on vertex --> implies that all its parents * (in the forward graph) are also finished. Our parents will have @@ -236,7 +235,7 @@ public: /* we need to push into all our (forward) children their successors * from us. */ for (auto v : adjacent_vertices_range(input_v, input_g)) { - DEBUG_PRINTF("pushing our states to pstate %u\n", + DEBUG_PRINTF("pushing our states to pstate %zu\n", input_g[v].index); if (v == input_g.startDs) { /* no need for intra start edges */ @@ -289,7 +288,7 @@ flat_set execute_graph(const NGHolder &running_g, map colours; /* could just a topo order, but really it is time to pull a slightly bigger * gun: DFS */ - RevNFAGraph revg(input_dag.g); + boost::reverse_graph revg(input_dag); map > dfs_states; auto info = makeInfoTable(running_g); @@ -308,7 +307,7 @@ flat_set execute_graph(const NGHolder &running_g, #ifdef DEBUG DEBUG_PRINTF(" output rstates:"); for (const auto &v : states) { - printf(" %u", running_g[v].index); + printf(" %zu", running_g[v].index); } printf("\n"); #endif diff --git a/src/nfagraph/ng_expr_info.cpp b/src/nfagraph/ng_expr_info.cpp index cfd34ce6..b43c7fd1 100644 --- a/src/nfagraph/ng_expr_info.cpp +++ b/src/nfagraph/ng_expr_info.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -118,7 +118,7 @@ void checkVertex(const ReportManager &rm, const NGWrapper &w, NFAVertex v, rd.max = min(rd.max, max_offset); } - DEBUG_PRINTF("vertex %u report %u: %s\n", w[v].index, report_id, + DEBUG_PRINTF("vertex %zu report %u: %s\n", w[v].index, report_id, rd.str().c_str()); info = unionDepthMinMax(info, rd); diff --git a/src/nfagraph/ng_extparam.cpp b/src/nfagraph/ng_extparam.cpp index eeb15299..a504ac50 100644 --- a/src/nfagraph/ng_extparam.cpp +++ b/src/nfagraph/ng_extparam.cpp @@ -172,8 +172,7 @@ void updateReportBounds(ReportManager &rm, NGWrapper &g, NFAVertex accept, new_reports.insert(rm.getInternalId(ir)); } - DEBUG_PRINTF("swapping reports on vertex %u\n", - g[v].index); + DEBUG_PRINTF("swapping reports on vertex %zu\n", g[v].index); reports.swap(new_reports); } } @@ -286,8 +285,8 @@ bool anchorPatternWithBoundedRepeat(NGWrapper &g, const depth &minWidth, add_edge(u, v, g); } - g.renumberVertices(); - g.renumberEdges(); + renumber_vertices(g); + renumber_edges(g); return true; } @@ -309,7 +308,7 @@ NFAVertex findSingleCyclic(const NGHolder &g) { } if (v != NGHolder::null_vertex()) { - DEBUG_PRINTF("cyclic is %u\n", g[v].index); + DEBUG_PRINTF("cyclic is %zu\n", g[v].index); assert(!is_special(v, g)); } return v; @@ -380,7 +379,7 @@ bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) { // Walk from the start vertex to the cyclic state and ensure we have a // chain of vertices. while (v != cyclic) { - DEBUG_PRINTF("vertex %u\n", g[v].index); + DEBUG_PRINTF("vertex %zu\n", g[v].index); width++; auto succ = succs(v, g); if (contains(succ, cyclic)) { @@ -418,7 +417,7 @@ bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) { // Walk from the cyclic state to an accept and ensure we have a chain of // vertices. while (!is_any_accept(v, g)) { - DEBUG_PRINTF("vertex %u\n", g[v].index); + DEBUG_PRINTF("vertex %zu\n", g[v].index); width++; auto succ = succs(v, g); if (succ.size() != 1) { @@ -435,7 +434,7 @@ bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) { DEBUG_PRINTF("adjusting width by %d\n", offsetAdjust); width += offsetAdjust; - DEBUG_PRINTF("width=%u, vertex %u is cyclic\n", width, + DEBUG_PRINTF("width=%u, vertex %zu is cyclic\n", width, g[cyclic].index); if (width >= g.min_length) { @@ -448,7 +447,7 @@ bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) { vector preds; vector dead; for (auto u : inv_adjacent_vertices_range(cyclic, g)) { - DEBUG_PRINTF("pred %u\n", g[u].index); + DEBUG_PRINTF("pred %zu\n", g[u].index); if (u == cyclic) { continue; } @@ -484,8 +483,8 @@ bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) { add_edge(u, cyclic, g); } - g.renumberVertices(); - g.renumberEdges(); + renumber_vertices(g); + renumber_edges(g); clearReports(g); g.min_length = 0; @@ -542,8 +541,7 @@ bool isEdgePrunable(const NGWrapper &g, const NFAVertex u = source(e, g); const NFAVertex v = target(e, g); - DEBUG_PRINTF("edge (%u,%u)\n", g[u].index, - g[v].index); + DEBUG_PRINTF("edge (%zu,%zu)\n", g[u].index, g[v].index); // Leave our special-to-special edges alone. if (is_special(u, g) && is_special(v, g)) { @@ -716,8 +714,7 @@ static bool isUnanchored(const NGHolder &g) { for (auto v : adjacent_vertices_range(g.start, g)) { if (!edge(g.startDs, v, g).second) { - DEBUG_PRINTF("fail, %u is anchored vertex\n", - g[v].index); + DEBUG_PRINTF("fail, %zu is anchored vertex\n", g[v].index); return false; } } @@ -862,7 +859,7 @@ void handleExtendedParams(ReportManager &rm, NGWrapper &g, } } } - //dumpGraph("final.dot", g.g); + //dumpGraph("final.dot", g); if (!hasExtParams(g)) { return; diff --git a/src/nfagraph/ng_fixed_width.cpp b/src/nfagraph/ng_fixed_width.cpp index 46d77913..978dad44 100644 --- a/src/nfagraph/ng_fixed_width.cpp +++ b/src/nfagraph/ng_fixed_width.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -77,7 +77,7 @@ bool findMask(const NGHolder &g, vector *mask, bool *anchored, NFAVertex v = *succs.begin(); while (true) { - DEBUG_PRINTF("validating vertex %u\n", g[v].index); + DEBUG_PRINTF("validating vertex %zu\n", g[v].index); assert(v != g.acceptEod); diff --git a/src/nfagraph/ng_graph.h b/src/nfagraph/ng_graph.h deleted file mode 100644 index 2d6fea13..00000000 --- a/src/nfagraph/ng_graph.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Definition of the NFAGraph type used for all NFA graph - * representations. - * - * Note that most of the time we don't work on a bare NFAGraph: instead - * we use an NGHolder, which wraps the graph and defines our special vertices, - * etc. - */ - -#ifndef NG_GRAPH_H -#define NG_GRAPH_H - -#include "util/charreach.h" -#include "util/ue2_containers.h" -#include "ue2common.h" - -#include -#include -#include - -namespace ue2 { - -/** \brief Properties associated with each vertex in an NFAGraph. */ -struct NFAGraphVertexProps { - /** \brief Set of characters on which this vertex is reachable. */ - CharReach char_reach; - - /** \brief Set of reports raised by this vertex. */ - ue2::flat_set reports; - - /** \brief Unique index for this vertex, used for BGL algorithms. */ - u32 index = 0; - - /** \brief Flags associated with assertions. */ - u32 assert_flags = 0; -}; - -/** \brief Properties associated with each edge in an NFAGraph. */ -struct NFAGraphEdgeProps { - /** \brief Unique index for this edge, used for BGL algorithms. */ - u32 index = 0; - - /** \brief For graphs that will be implemented as multi-top engines, this - * specifies the top events. Only used on edges from the start vertex. */ - ue2::flat_set tops; - - /** \brief Flags associated with assertions. */ - u32 assert_flags = 0; -}; - -// For flexibility: boost::listS, boost::listS for out-edge and vertex lists. -// boost::bidirectionalS for directed graph so that we can get at in-edges. -typedef boost::adjacency_list NFAGraph; - -typedef NFAGraph::vertex_descriptor NFAVertex; -typedef NFAGraph::edge_descriptor NFAEdge; - -/** \brief vertex_index values for special nodes in the NFAGraph. */ -enum SpecialNodes { - /** \brief Anchored start vertex. WARNING: this may be triggered at various - * locations (not just zero) for triggered graphs. */ - NODE_START, - - /** \brief Unanchored start-dotstar vertex. WARNING: this may not have a - * proper self-loop. */ - NODE_START_DOTSTAR, - - /** \brief Accept vertex. All vertices that can match at arbitrary offsets - * must have an edge to this vertex. */ - NODE_ACCEPT, - - /** \brief Accept-EOD vertex. Vertices that must raise a match at EOD only - * must have an edge to this vertex. */ - NODE_ACCEPT_EOD, - - /** \brief Sentinel, number of special vertices. */ - N_SPECIALS -}; - -} // namespace ue2 - -#endif diff --git a/src/nfagraph/ng_haig.cpp b/src/nfagraph/ng_haig.cpp index 143dca16..e4be14c3 100644 --- a/src/nfagraph/ng_haig.cpp +++ b/src/nfagraph/ng_haig.cpp @@ -37,10 +37,10 @@ #include "ng_mcclellan_internal.h" #include "ng_som_util.h" #include "ng_squash.h" -#include "ng_util.h" #include "util/bitfield.h" #include "util/container.h" #include "util/determinise.h" +#include "util/graph.h" #include "util/graph_range.h" #include "util/make_unique.h" #include "util/ue2_containers.h" @@ -449,7 +449,7 @@ void haig_do_preds(const NGHolder &g, const stateset &nfa_states, NFAVertex v = state_mapping[i]; s32 slot_id = g[v].index; - DEBUG_PRINTF("d vertex %u\n", g[v].index); + DEBUG_PRINTF("d vertex %zu\n", g[v].index); vector &out_map = preds[slot_id]; for (auto u : inv_adjacent_vertices_range(v, g)) { out_map.push_back(g[u].index); @@ -490,7 +490,7 @@ void haig_note_starts(const NGHolder &g, map *out) { for (auto v : vertices_range(g)) { if (is_any_start_inc_virtual(v, g)) { - DEBUG_PRINTF("%u creates new som value\n", g[v].index); + DEBUG_PRINTF("%zu creates new som value\n", g[v].index); out->emplace(g[v].index, 0U); continue; } @@ -501,7 +501,7 @@ void haig_note_starts(const NGHolder &g, map *out) { const DepthMinMax &d = depths[g[v].index]; if (d.min == d.max && d.min.is_finite()) { - DEBUG_PRINTF("%u is fixed at %u\n", g[v].index, (u32)d.min); + DEBUG_PRINTF("%zu is fixed at %u\n", g[v].index, (u32)d.min); out->emplace(g[v].index, d.min); } } diff --git a/src/nfagraph/ng_holder.cpp b/src/nfagraph/ng_holder.cpp index 5d83e626..a2fbb288 100644 --- a/src/nfagraph/ng_holder.cpp +++ b/src/nfagraph/ng_holder.cpp @@ -36,123 +36,33 @@ using namespace std; namespace ue2 { // internal use only -static NFAVertex addSpecialVertex(NFAGraph &g, SpecialNodes id) { - NFAVertex v = add_vertex(g); +static NFAVertex addSpecialVertex(NGHolder &g, SpecialNodes id) { + NFAVertex v(add_vertex(g)); g[v].index = id; return v; } -NGHolder::NGHolder(void) - : g(), - // add initial special nodes - start(addSpecialVertex(g, NODE_START)), - startDs(addSpecialVertex(g, NODE_START_DOTSTAR)), - accept(addSpecialVertex(g, NODE_ACCEPT)), - acceptEod(addSpecialVertex(g, NODE_ACCEPT_EOD)), - // misc data - numVertices(N_SPECIALS), - numEdges(0), - isValidNumEdges(true), - isValidNumVertices(true) { - - // wire up some fake edges for the stylized bits of the NFA - add_edge(start, startDs, *this); - add_edge(startDs, startDs, *this); - add_edge(accept, acceptEod, *this); - - g[start].char_reach.setall(); - g[startDs].char_reach.setall(); -} - NGHolder::NGHolder(nfa_kind k) - : kind (k), g(), + : kind (k), // add initial special nodes - start(addSpecialVertex(g, NODE_START)), - startDs(addSpecialVertex(g, NODE_START_DOTSTAR)), - accept(addSpecialVertex(g, NODE_ACCEPT)), - acceptEod(addSpecialVertex(g, NODE_ACCEPT_EOD)), - // misc data - numVertices(N_SPECIALS), - numEdges(0), - isValidNumEdges(true), - isValidNumVertices(true) { + start(addSpecialVertex(*this, NODE_START)), + startDs(addSpecialVertex(*this, NODE_START_DOTSTAR)), + accept(addSpecialVertex(*this, NODE_ACCEPT)), + acceptEod(addSpecialVertex(*this, NODE_ACCEPT_EOD)) { // wire up some fake edges for the stylized bits of the NFA add_edge(start, startDs, *this); add_edge(startDs, startDs, *this); add_edge(accept, acceptEod, *this); - g[start].char_reach.setall(); - g[startDs].char_reach.setall(); + (*this)[start].char_reach.setall(); + (*this)[startDs].char_reach.setall(); } NGHolder::~NGHolder(void) { DEBUG_PRINTF("destroying holder @ %p\n", this); } -size_t num_edges(NGHolder &h) { - if (!h.isValidNumEdges) { - h.numEdges = num_edges(h.g); - h.isValidNumEdges = true; - } - return h.numEdges; -} - -size_t num_edges(const NGHolder &h) { - if (!h.isValidNumEdges) { - return num_edges(h.g); - } - return h.numEdges; -} - -size_t num_vertices(NGHolder &h) { - if (!h.isValidNumVertices) { - h.numVertices = num_vertices(h.g); - h.isValidNumVertices = true; - } - return h.numVertices; -} - -size_t num_vertices(const NGHolder &h) { - if (!h.isValidNumVertices) { - return num_vertices(h.g); - } - return h.numVertices; -} - -void remove_edge(const NFAEdge &e, NGHolder &h) { - remove_edge(e, h.g); - assert(!h.isValidNumEdges || h.numEdges > 0); - h.numEdges--; -} - -void remove_edge(NFAVertex u, NFAVertex v, NGHolder &h) { - remove_edge(u, v, h.g); - assert(!h.isValidNumEdges || h.numEdges > 0); - h.numEdges--; -} - -void remove_vertex(NFAVertex v, NGHolder &h) { - remove_vertex(v, h.g); - assert(!h.isValidNumVertices || h.numVertices > 0); - h.numVertices--; -} - -void clear_vertex(NFAVertex v, NGHolder &h) { - h.isValidNumEdges = false; - clear_vertex_faster(v, h.g); -} - -void clear_in_edges(NFAVertex v, NGHolder &h) { - h.isValidNumEdges = false; - clear_in_edges(v, h.g); -} - -void clear_out_edges(NFAVertex v, NGHolder &h) { - h.isValidNumEdges = false; - clear_out_edges(v, h.g); -} - void clear_graph(NGHolder &h) { NGHolder::vertex_iterator vi, ve; for (tie(vi, ve) = vertices(h); vi != ve;) { @@ -166,6 +76,8 @@ void clear_graph(NGHolder &h) { } assert(num_vertices(h) == N_SPECIALS); + renumber_vertices(h); /* ensure that we reset our next allocated index */ + renumber_edges(h); // Recreate special stylised edges. add_edge(h.start, h.startDs, h); @@ -173,56 +85,13 @@ void clear_graph(NGHolder &h) { add_edge(h.accept, h.acceptEod, h); } -std::pair add_edge(NFAVertex u, NFAVertex v, NGHolder &h) { - assert(edge(u, v, h.g).second == false); - pair e = add_edge(u, v, h.g); - h.g[e.first].index = h.numEdges++; - assert(!h.isValidNumEdges || h.numEdges > 0); // no wrapping - return e; -} - -std::pair add_edge(NFAVertex u, NFAVertex v, - const NFAGraph::edge_property_type &ep, - NGHolder &h) { - assert(edge(u, v, h.g).second == false); - pair e = add_edge(u, v, ep, h.g); - h.g[e.first].index = h.numEdges++; - assert(!h.isValidNumEdges || h.numEdges > 0); // no wrapping - return e; -} - -NFAVertex add_vertex(NGHolder &h) { - NFAVertex v = add_vertex(h.g); - h[v].index = h.numVertices++; - assert(h.numVertices > 0); // no wrapping - return v; -} - -NFAVertex add_vertex(const NFAGraph::vertex_property_type &vp, NGHolder &h) { - NFAVertex v = add_vertex(h); - u32 i = h.g[v].index; /* preserve index */ - h.g[v] = vp; - h.g[v].index = i; - return v; -} - -void NGHolder::renumberEdges() { - numEdges = renumberGraphEdges(g); - isValidNumEdges = true; -} - -void NGHolder::renumberVertices() { - numVertices = renumberGraphVertices(g); - isValidNumVertices = true; -} - NFAVertex NGHolder::getSpecialVertex(u32 id) const { switch (id) { - case NODE_START: return start; - case NODE_START_DOTSTAR: return startDs; - case NODE_ACCEPT: return accept; - case NODE_ACCEPT_EOD: return acceptEod; - default: return nullptr; + case NODE_START: return start; + case NODE_START_DOTSTAR: return startDs; + case NODE_ACCEPT: return accept; + case NODE_ACCEPT_EOD: return acceptEod; + default: return null_vertex(); } } diff --git a/src/nfagraph/ng_holder.h b/src/nfagraph/ng_holder.h index 49050808..fbb6ac52 100644 --- a/src/nfagraph/ng_holder.h +++ b/src/nfagraph/ng_holder.h @@ -26,19 +26,75 @@ * POSSIBILITY OF SUCH DAMAGE. */ +/** \file + * \brief Definition of the NGHolder type used for to represent general nfa + * graphs as well as all associated types (vertex and edge properties, etc). + * + * The NGHolder also contains the special vertices used to represents starts and + * accepts. + */ + #ifndef NG_HOLDER_H #define NG_HOLDER_H -#include "ng_graph.h" #include "ue2common.h" #include "nfa/nfa_kind.h" - -#include -#include -#include +#include "util/charreach.h" +#include "util/ue2_containers.h" +#include "util/ue2_graph.h" namespace ue2 { +/** \brief Properties associated with each vertex in an NFAGraph. */ +struct NFAGraphVertexProps { + /** \brief Set of characters on which this vertex is reachable. */ + CharReach char_reach; + + /** \brief Set of reports raised by this vertex. */ + flat_set reports; + + /** \brief Unique index for this vertex, used for BGL algorithms. */ + size_t index = 0; + + /** \brief Flags associated with assertions. */ + u32 assert_flags = 0; +}; + +/** \brief Properties associated with each edge in an NFAGraph. */ +struct NFAGraphEdgeProps { + /** \brief Unique index for this edge, used for BGL algorithms. */ + size_t index = 0; + + /** \brief For graphs that will be implemented as multi-top engines, this + * specifies the top events. Only used on edges from the start vertex. */ + ue2::flat_set tops; + + /** \brief Flags associated with assertions. */ + u32 assert_flags = 0; +}; + +/** \brief vertex_index values for special nodes in the NFAGraph. */ +enum SpecialNodes { + /** \brief Anchored start vertex. WARNING: this may be triggered at various + * locations (not just zero) for triggered graphs. */ + NODE_START, + + /** \brief Unanchored start-dotstar vertex. WARNING: this may not have a + * proper self-loop. */ + NODE_START_DOTSTAR, + + /** \brief Accept vertex. All vertices that can match at arbitrary offsets + * must have an edge to this vertex. */ + NODE_ACCEPT, + + /** \brief Accept-EOD vertex. Vertices that must raise a match at EOD only + * must have an edge to this vertex. */ + NODE_ACCEPT_EOD, + + /** \brief Sentinel, number of special vertices. */ + N_SPECIALS +}; + /** \brief Encapsulates an NFAGraph, stores special vertices and other * metadata. * @@ -49,188 +105,34 @@ namespace ue2 { * - (startDs, startDs) (self-loop) * - (accept, acceptEod) */ -class NGHolder : boost::noncopyable { +class NGHolder : public ue2_graph { public: - NGHolder(void); explicit NGHolder(nfa_kind kind); + NGHolder(void) : NGHolder(NFA_OUTFIX) {}; virtual ~NGHolder(void); - // Pack edge and vertex indices. - // Note: maintaining edge index order can be expensive due to the frequency - // of edge removal/addition, so only renumberEdges() when required by - // operations on edge lists. - void renumberEdges(); - void renumberVertices(); + nfa_kind kind; /* Role that this plays in Rose */ - NFAVertex getSpecialVertex(u32 id) const; + static const size_t N_SPECIAL_VERTICES = N_SPECIALS; +public: + const vertex_descriptor start; //!< Anchored start vertex. + const vertex_descriptor startDs; //!< Unanchored start-dotstar vertex. + const vertex_descriptor accept; //!< Accept vertex. + const vertex_descriptor acceptEod; //!< Accept at EOD vertex. - nfa_kind kind = NFA_OUTFIX; /* Role that this plays in Rose */ - - /** \brief Underlying graph object */ - NFAGraph g; - - const NFAVertex start; //!< Anchored start vertex. - const NFAVertex startDs; //!< Unanchored start-dotstar vertex. - const NFAVertex accept; //!< Accept vertex. - const NFAVertex acceptEod; //!< Accept at EOD vertex. - - using directed_category = NFAGraph::directed_category; - using edge_parallel_category = NFAGraph::edge_parallel_category; - using traversal_category = NFAGraph::traversal_category; - - using vertex_descriptor = NFAGraph::vertex_descriptor; - using edge_descriptor = NFAGraph::edge_descriptor; - using adjacency_iterator = NFAGraph::adjacency_iterator; - using edge_iterator = NFAGraph::edge_iterator; - using in_edge_iterator = NFAGraph::in_edge_iterator; - using inv_adjacency_iterator = NFAGraph::inv_adjacency_iterator; - using out_edge_iterator = NFAGraph::out_edge_iterator; - using vertex_iterator = NFAGraph::vertex_iterator; - using edge_property_type = NFAGraph::edge_property_type; - using vertex_property_type = NFAGraph::vertex_property_type; - - // These free functions, which follow the BGL model, are the interface to - // the graph held by this class. - friend size_t num_vertices(NGHolder &h); - friend size_t num_vertices(const NGHolder &h); - friend size_t num_edges(NGHolder &h); - friend size_t num_edges(const NGHolder &h); - friend void remove_vertex(NFAVertex v, NGHolder &h); - friend void clear_vertex(NFAVertex v, NGHolder &h); - friend void clear_in_edges(NFAVertex v, NGHolder &h); - friend void clear_out_edges(NFAVertex v, NGHolder &h); - friend void remove_edge(const NFAEdge &e, NGHolder &h); - friend void remove_edge(NFAVertex u, NFAVertex v, NGHolder &h); - - template - friend void remove_out_edge_if(NFAVertex v, Predicate pred, NGHolder &h) { - boost::remove_out_edge_if(v, pred, h.g); - h.isValidNumEdges = false; - } - - template - friend void remove_in_edge_if(NFAVertex v, Predicate pred, NGHolder &h) { - boost::remove_in_edge_if(v, pred, h.g); - h.isValidNumEdges = false; - } - - template - friend void remove_edge_if(Predicate pred, NGHolder &h) { - boost::remove_edge_if(pred, h.g); - h.isValidNumEdges = false; - } - - friend std::pair add_edge(NFAVertex u, NFAVertex v, - NGHolder &h); - friend std::pair add_edge(NFAVertex u, NFAVertex v, - const edge_property_type &ep, - NGHolder &h); - friend NFAVertex add_vertex(NGHolder &h); - friend NFAVertex add_vertex(const vertex_property_type &vp, NGHolder &h); - - static NFAVertex null_vertex(void) { return NFAGraph::null_vertex(); } - - // Subscript operators for BGL bundled properties. - using graph_bundled = NFAGraph::graph_bundled; - using vertex_bundled = NFAGraph::vertex_bundled; - using edge_bundled = NFAGraph::edge_bundled; - - vertex_bundled &operator[](NFAVertex v) { - return get(boost::vertex_bundle, g)[v]; - } - const vertex_bundled &operator[](NFAVertex v) const { - return get(boost::vertex_bundle, g)[v]; - } - edge_bundled &operator[](const NFAEdge &e) { - return get(boost::edge_bundle, g)[e]; - } - const edge_bundled &operator[](const NFAEdge &e) const { - return get(boost::edge_bundle, g)[e]; - } - -protected: - - /* Since the NFAGraph vertex/edge list selectors are std::lists, computing - * num_vertices and num_edges is O(N). We use these members to store a - * cached copy of the size. - * - * In the future, with C++11's constant-time std::list::size, these may - * become obsolete. */ - - u32 numVertices; - u32 numEdges; - bool isValidNumEdges; - bool isValidNumVertices; + vertex_descriptor getSpecialVertex(u32 id) const; }; +typedef NGHolder::vertex_descriptor NFAVertex; +typedef NGHolder::edge_descriptor NFAEdge; + /** \brief True if the vertex \p v is one of our special vertices. */ template -static really_inline -bool is_special(const NFAVertex v, const GraphT &g) { +bool is_special(const typename GraphT::vertex_descriptor v, const GraphT &g) { return g[v].index < N_SPECIALS; } -static really_inline -std::pair -adjacent_vertices(NFAVertex v, const NGHolder &h) { - return adjacent_vertices(v, h.g); -} - -static really_inline -std::pair edge(NFAVertex u, NFAVertex v, const NGHolder &h) { - return boost::edge(u, v, h.g); -} - -static really_inline -std::pair -edges(const NGHolder &h) { - return edges(h.g); -} - -static really_inline -size_t in_degree(NFAVertex v, const NGHolder &h) { - return in_degree(v, h.g); -} - -static really_inline -std::pair -in_edges(NFAVertex v, const NGHolder &h) { - return in_edges(v, h.g); -} - -static really_inline -std::pair -inv_adjacent_vertices(NFAVertex v, const NGHolder &h) { - return inv_adjacent_vertices(v, h.g); -} - -static really_inline -size_t out_degree(NFAVertex v, const NGHolder &h) { - return out_degree(v, h.g); -} - -static really_inline -std::pair -out_edges(NFAVertex v, const NGHolder &h) { - return out_edges(v, h.g); -} - -static really_inline -NFAVertex source(const NFAEdge &e, const NGHolder &h) { - return source(e, h.g); -} - -static really_inline -NFAVertex target(const NFAEdge &e, const NGHolder &h) { - return target(e, h.g); -} - -static really_inline -std::pair -vertices(const NGHolder &h) { - return vertices(h.g); -} - /** * \brief Clears all non-special vertices and edges from the graph. * @@ -239,16 +141,6 @@ vertices(const NGHolder &h) { */ void clear_graph(NGHolder &h); -inline -void renumber_edges(NGHolder &h) { - h.renumberEdges(); -} - -inline -void renumber_vertices(NGHolder &h) { - h.renumberVertices(); -} - /* * \brief Clear and remove all of the vertices pointed to by the given iterator * range. @@ -275,8 +167,8 @@ void remove_vertices(Iter begin, Iter end, NGHolder &h, bool renumber = true) { } if (renumber) { - h.renumberEdges(); - h.renumberVertices(); + renumber_edges(h); + renumber_vertices(h); } } @@ -311,7 +203,7 @@ void remove_edges(Iter begin, Iter end, NGHolder &h, bool renumber = true) { } if (renumber) { - h.renumberEdges(); + renumber_edges(h); } } diff --git a/src/nfagraph/ng_is_equal.cpp b/src/nfagraph/ng_is_equal.cpp index 8e71c337..2df79f50 100644 --- a/src/nfagraph/ng_is_equal.cpp +++ b/src/nfagraph/ng_is_equal.cpp @@ -77,6 +77,26 @@ private: ReportID a_rep; ReportID b_rep; }; + +/** Comparison functor used to sort by vertex_index. */ +template +struct VertexIndexOrdering { + explicit VertexIndexOrdering(const Graph &g_in) : g(g_in) {} + bool operator()(typename Graph::vertex_descriptor a, + typename Graph::vertex_descriptor b) const { + assert(a == b || g[a].index != g[b].index); + return g[a].index < g[b].index; + } +private: + const Graph &g; +}; + +template +static +VertexIndexOrdering make_index_ordering(const Graph &g) { + return VertexIndexOrdering(g); +} + } static @@ -109,7 +129,7 @@ bool is_equal_i(const NGHolder &a, const NGHolder &b, for (size_t i = 0; i < vert_a.size(); i++) { NFAVertex va = vert_a[i]; NFAVertex vb = vert_b[i]; - DEBUG_PRINTF("vertex %u\n", a[va].index); + DEBUG_PRINTF("vertex %zu\n", a[va].index); // Vertex index must be the same. if (a[va].index != b[vb].index) { diff --git a/src/nfagraph/ng_limex.cpp b/src/nfagraph/ng_limex.cpp index 5e5a18d9..e92790b9 100644 --- a/src/nfagraph/ng_limex.cpp +++ b/src/nfagraph/ng_limex.cpp @@ -78,8 +78,7 @@ bool sanityCheckGraph(const NGHolder &g, // Non-specials should have non-empty reachability. if (!is_special(v, g)) { if (g[v].char_reach.none()) { - DEBUG_PRINTF("vertex %u has empty reach\n", - g[v].index); + DEBUG_PRINTF("vertex %zu has empty reach\n", g[v].index); return false; } } @@ -88,25 +87,23 @@ bool sanityCheckGraph(const NGHolder &g, // other vertices must not have them. if (is_match_vertex(v, g) && v != g.accept) { if (g[v].reports.empty()) { - DEBUG_PRINTF("vertex %u has no reports\n", g[v].index); + DEBUG_PRINTF("vertex %zu has no reports\n", g[v].index); return false; } } else if (!g[v].reports.empty()) { - DEBUG_PRINTF("vertex %u has reports but no accept edge\n", + DEBUG_PRINTF("vertex %zu has reports but no accept edge\n", g[v].index); return false; } // Participant vertices should have distinct state indices. if (!contains(state_ids, v)) { - DEBUG_PRINTF("vertex %u has no state index!\n", - g[v].index); + DEBUG_PRINTF("vertex %zu has no state index!\n", g[v].index); return false; } u32 s = state_ids.at(v); if (s != NO_STATE && !seen_states.insert(s).second) { - DEBUG_PRINTF("vertex %u has dupe state %u\n", - g[v].index, s); + DEBUG_PRINTF("vertex %zu has dupe state %u\n", g[v].index, s); return false; } } @@ -178,11 +175,7 @@ NFAVertex makeTopStartVertex(NGHolder &g, const flat_set &tops, CharReach top_cr = calcTopVertexReach(tops, top_reach); g[u].char_reach = top_cr; - // Add edges in vertex index order, for determinism. - vector ordered_succs(begin(succs), end(succs)); - sort(begin(ordered_succs), end(ordered_succs), make_index_ordering(g)); - - for (auto v : ordered_succs) { + for (auto v : succs) { if (v == g.accept || v == g.acceptEod) { reporter = true; } @@ -374,7 +367,7 @@ void attemptToUseAsStart(const NGHolder &g, NFAVertex u, return; } - DEBUG_PRINTF("reusing %u is a start vertex\n", g[u].index); + DEBUG_PRINTF("reusing %zu is a start vertex\n", g[u].index); markTopSuccAsHandled(u, top_inter, succs, tops_out, unhandled_top_succs, unhandled_succ_tops); } @@ -388,8 +381,7 @@ void reusePredsAsStarts(const NGHolder &g, const map &top_reach, map> &unhandled_top_succs, map> &unhandled_succ_tops, map> &tops_out) { - /* create list of candidates first, to avoid issues of iter invalidation - * and determinism */ + /* create list of candidates first, to avoid issues of iter invalidation */ DEBUG_PRINTF("attempting to reuse vertices for top starts\n"); vector cand_starts; for (NFAVertex u : unhandled_succ_tops | map_keys) { @@ -397,7 +389,6 @@ void reusePredsAsStarts(const NGHolder &g, const map &top_reach, cand_starts.push_back(u); } } - sort(cand_starts.begin(), cand_starts.end(), make_index_ordering(g)); for (NFAVertex u : cand_starts) { if (!contains(unhandled_succ_tops, u)) { @@ -625,7 +616,7 @@ void remapReportsToPrograms(NGHolder &h, const ReportManager &rm) { u32 program = rm.getProgramOffset(id); reports.insert(program); } - DEBUG_PRINTF("vertex %u: remapped reports {%s} to programs {%s}\n", + DEBUG_PRINTF("vertex %zu: remapped reports {%s} to programs {%s}\n", h[v].index, as_string_list(old_reports).c_str(), as_string_list(reports).c_str()); } diff --git a/src/nfagraph/ng_limex_accel.cpp b/src/nfagraph/ng_limex_accel.cpp index deaf2ffd..bfba7c71 100644 --- a/src/nfagraph/ng_limex_accel.cpp +++ b/src/nfagraph/ng_limex_accel.cpp @@ -69,7 +69,7 @@ void findAccelFriendGeneration(const NGHolder &g, const CharReach &cr, } const CharReach &acr = g[v].char_reach; - DEBUG_PRINTF("checking %u\n", g[v].index); + DEBUG_PRINTF("checking %zu\n", g[v].index); if (acr.count() < WIDE_FRIEND_MIN || !acr.isSubsetOf(cr)) { DEBUG_PRINTF("bad reach %zu\n", acr.count()); @@ -86,7 +86,7 @@ void findAccelFriendGeneration(const NGHolder &g, const CharReach &cr, next_preds->insert(v); insert(next_cands, adjacent_vertices(v, g)); - DEBUG_PRINTF("%u is a friend indeed\n", g[v].index); + DEBUG_PRINTF("%zu is a friend indeed\n", g[v].index); friends->insert(v); next_cand:; } @@ -675,7 +675,7 @@ NFAVertex get_sds_or_proxy(const NGHolder &g) { while (true) { if (hasSelfLoop(v, g)) { - DEBUG_PRINTF("woot %u\n", g[v].index); + DEBUG_PRINTF("woot %zu\n", g[v].index); return v; } if (out_degree(v, g) != 1) { @@ -837,7 +837,7 @@ bool nfaCheckAccel(const NGHolder &g, NFAVertex v, CharReach terminating = g[v].char_reach; terminating.flip(); - DEBUG_PRINTF("vertex %u is cyclic and has %zu stop chars%s\n", + DEBUG_PRINTF("vertex %zu is cyclic and has %zu stop chars%s\n", g[v].index, terminating.count(), allow_wide ? " (w)" : ""); diff --git a/src/nfagraph/ng_literal_analysis.cpp b/src/nfagraph/ng_literal_analysis.cpp index 9229457c..68c1bdd6 100644 --- a/src/nfagraph/ng_literal_analysis.cpp +++ b/src/nfagraph/ng_literal_analysis.cpp @@ -46,6 +46,7 @@ #include #include +#include #include using namespace std; @@ -335,7 +336,7 @@ void processWorkQueue(const NGHolder &g, const NFAEdge &e, // Our literal set should contain no literal that is a suffix of another. assert(!hasSuffixLiterals(s)); - DEBUG_PRINTF("edge %u (%u->%u) produced %zu literals\n", g[e].index, + DEBUG_PRINTF("edge %zu (%zu->%zu) produced %zu literals\n", g[e].index, g[source(e, g)].index, g[target(e, g)].index, s.size()); } @@ -791,7 +792,7 @@ bool splitOffLeadingLiteral_i(const NGHolder &g, bool anch, } while (true) { - DEBUG_PRINTF("validating vertex %u\n", g[v].index); + DEBUG_PRINTF("validating vertex %zu\n", g[v].index); assert(v != g.acceptEod && v != g.accept); diff --git a/src/nfagraph/ng_literal_component.cpp b/src/nfagraph/ng_literal_component.cpp index 871c8ac7..e3cfe867 100644 --- a/src/nfagraph/ng_literal_component.cpp +++ b/src/nfagraph/ng_literal_component.cpp @@ -95,7 +95,7 @@ void addToString(string &s, const NGHolder &g, NFAVertex v) { static bool splitOffLiteral(NG &ng, NGWrapper &g, NFAVertex v, const bool anchored, set &dead) { - DEBUG_PRINTF("examine vertex %u\n", g[v].index); + DEBUG_PRINTF("examine vertex %zu\n", g[v].index); bool nocase = false, casefixed = false; assert(!is_special(v, g)); @@ -109,7 +109,7 @@ bool splitOffLiteral(NG &ng, NGWrapper &g, NFAVertex v, const bool anchored, assert(edge(g.start, v, g).second); assert(edge(g.startDs, v, g).second); } - if (hasGreaterInDegree(reqInDegree, v, g)) { + if (in_degree(v, g) > reqInDegree) { DEBUG_PRINTF("extra in-edges\n"); return false; } @@ -134,7 +134,7 @@ bool splitOffLiteral(NG &ng, NGWrapper &g, NFAVertex v, const bool anchored, u = v; // previous vertex v = *(adjacent_vertices(v, g).first); - DEBUG_PRINTF("loop, v=%u\n", g[v].index); + DEBUG_PRINTF("loop, v=%zu\n", g[v].index); if (is_special(v, g)) { if (v == g.accept || v == g.acceptEod) { diff --git a/src/nfagraph/ng_literal_decorated.cpp b/src/nfagraph/ng_literal_decorated.cpp index 02b25a73..89c01a6c 100644 --- a/src/nfagraph/ng_literal_decorated.cpp +++ b/src/nfagraph/ng_literal_decorated.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -77,7 +77,7 @@ bool findPaths(const NGHolder &g, vector &paths) { read_count[g[v].index] = out_degree(v, g); - DEBUG_PRINTF("setting read_count to %zu for %u\n", + DEBUG_PRINTF("setting read_count to %zu for %zu\n", read_count[g[v].index], g[v].index); if (v == g.start || v == g.startDs) { @@ -117,7 +117,7 @@ bool findPaths(const NGHolder &g, vector &paths) { read_count[g[u].index]--; if (!read_count[g[u].index]) { - DEBUG_PRINTF("clearing %u as finished reading\n", g[u].index); + DEBUG_PRINTF("clearing %zu as finished reading\n", g[u].index); built[g[u].index].clear(); built[g[u].index].shrink_to_fit(); } @@ -138,9 +138,9 @@ bool hasLargeDegreeVertex(const NGHolder &g) { if (is_special(v, g)) { // specials can have large degree continue; } - if (has_greater_degree(MAX_VERTEX_DEGREE, v, g)) { - DEBUG_PRINTF("vertex %u has degree %zu\n", g[v].index, - boost::degree(v, g.g)); + if (degree(v, g) > MAX_VERTEX_DEGREE) { + DEBUG_PRINTF("vertex %zu has degree %zu\n", g[v].index, + degree(v, g)); return true; } } @@ -188,7 +188,8 @@ struct PathMask { } // Reports are attached to the second-to-last vertex. - reports = g[*next(path.rbegin())].reports; + NFAVertex u = *std::next(path.rbegin()); + reports = g[u].reports; assert(!reports.empty()); } diff --git a/src/nfagraph/ng_mcclellan.cpp b/src/nfagraph/ng_mcclellan.cpp index 71c9a05e..375086a4 100644 --- a/src/nfagraph/ng_mcclellan.cpp +++ b/src/nfagraph/ng_mcclellan.cpp @@ -328,7 +328,7 @@ void markToppableStarts(const NGHolder &g, const flat_set &unused, } for (const auto &trigger : triggers) { if (triggerAllowed(g, v, triggers, trigger)) { - DEBUG_PRINTF("idx %u is valid location for top\n", g[v].index); + DEBUG_PRINTF("idx %zu is valid location for top\n", g[v].index); out->set(g[v].index); break; } diff --git a/src/nfagraph/ng_misc_opt.cpp b/src/nfagraph/ng_misc_opt.cpp index 716802ba..29939fec 100644 --- a/src/nfagraph/ng_misc_opt.cpp +++ b/src/nfagraph/ng_misc_opt.cpp @@ -80,6 +80,7 @@ #include using namespace std; +using boost::make_filtered_graph; namespace ue2 { @@ -108,7 +109,7 @@ void findCandidates(NGHolder &g, const vector &ordering, goto next_cand; } } - DEBUG_PRINTF("vertex %u is a candidate\n", g[v].index); + DEBUG_PRINTF("vertex %zu is a candidate\n", g[v].index); cand->push_back(v); next_cand:; } @@ -139,7 +140,7 @@ void findCandidates_rev(NGHolder &g, const vector &ordering, goto next_cand; } } - DEBUG_PRINTF("vertex %u is a candidate\n", g[v].index); + DEBUG_PRINTF("vertex %zu is a candidate\n", g[v].index); cand->push_back(v); next_cand:; } @@ -242,7 +243,7 @@ set findSustainSet_rev(const NGHolder &g, NFAVertex p, static bool enlargeCyclicVertex(NGHolder &g, som_type som, NFAVertex v) { - DEBUG_PRINTF("considering vertex %u\n", g[v].index); + DEBUG_PRINTF("considering vertex %zu\n", g[v].index); const CharReach &v_cr = g[v].char_reach; CharReach add; @@ -261,7 +262,7 @@ bool enlargeCyclicVertex(NGHolder &g, som_type som, NFAVertex v) { if (p == v) { continue; } - DEBUG_PRINTF("looking at pred %u\n", g[p].index); + DEBUG_PRINTF("looking at pred %zu\n", g[p].index); bool ignore_sds = som; /* if we are tracking som, entries into a state from sds are significant. */ @@ -291,13 +292,13 @@ bool enlargeCyclicVertex(NGHolder &g, som_type som, NFAVertex v) { /* the cr can be increased */ g[v].char_reach = add; - DEBUG_PRINTF("vertex %u was widened\n", g[v].index); + DEBUG_PRINTF("vertex %zu was widened\n", g[v].index); return true; } static bool enlargeCyclicVertex_rev(NGHolder &g, NFAVertex v) { - DEBUG_PRINTF("considering vertex %u\n", g[v].index); + DEBUG_PRINTF("considering vertex %zu\n", g[v].index); const CharReach &v_cr = g[v].char_reach; CharReach add; @@ -316,7 +317,7 @@ bool enlargeCyclicVertex_rev(NGHolder &g, NFAVertex v) { if (p == v) { continue; } - DEBUG_PRINTF("looking at succ %u\n", g[p].index); + DEBUG_PRINTF("looking at succ %zu\n", g[p].index); set sustain = findSustainSet_rev(g, p, add); DEBUG_PRINTF("sustain set is %zu\n", sustain.size()); @@ -341,7 +342,7 @@ bool enlargeCyclicVertex_rev(NGHolder &g, NFAVertex v) { /* the cr can be increased */ g[v].char_reach = add; - DEBUG_PRINTF("vertex %u was widened\n", g[v].index); + DEBUG_PRINTF("vertex %zu was widened\n", g[v].index); return true; } @@ -390,7 +391,7 @@ bool improveGraph(NGHolder &g, som_type som) { * enlargeCyclicCR. */ CharReach reduced_cr(NFAVertex v, const NGHolder &g, const map &br_cyclic) { - DEBUG_PRINTF("find minimal cr for %u\n", g[v].index); + DEBUG_PRINTF("find minimal cr for %zu\n", g[v].index); CharReach v_cr = g[v].char_reach; if (proper_in_degree(v, g) != 1) { return v_cr; @@ -579,12 +580,11 @@ flat_set findDependentVertices(const NGHolder &g, NFAVertex v) { } } - auto filtered_g = make_filtered_graph(g.g, - make_bad_edge_filter(&no_explore)); + auto filtered_g = make_filtered_graph(g, make_bad_edge_filter(&no_explore)); vector color_raw(num_vertices(g)); auto color = make_iterator_property_map(color_raw.begin(), - get(&NFAGraphVertexProps::index, g.g)); + get(vertex_index, g)); flat_set bad; for (NFAVertex b : vertices_range(g)) { if (b != g.start && g[b].char_reach.isSubsetOf(g[v].char_reach)) { @@ -597,7 +597,7 @@ flat_set findDependentVertices(const NGHolder &g, NFAVertex v) { flat_set rv; for (NFAVertex u : vertices_range(g)) { if (!contains(bad, u)) { - DEBUG_PRINTF("%u is good\n", g[u].index); + DEBUG_PRINTF("%zu is good\n", g[u].index); rv.insert(u); } } @@ -623,7 +623,7 @@ bool pruneUsingSuccessors(NGHolder &g, NFAVertex u, som_type som) { } bool changed = false; - DEBUG_PRINTF("using cyclic %u as base\n", g[u].index); + DEBUG_PRINTF("using cyclic %zu as base\n", g[u].index); auto children = findDependentVertices(g, u); vector u_succs; for (NFAVertex v : adjacent_vertices_range(u, g)) { @@ -639,23 +639,23 @@ bool pruneUsingSuccessors(NGHolder &g, NFAVertex u, som_type som) { return g[a].char_reach.count() > g[b].char_reach.count(); }); for (NFAVertex v : u_succs) { - DEBUG_PRINTF(" using %u as killer\n", g[v].index); + DEBUG_PRINTF(" using %zu as killer\n", g[v].index); /* Need to distinguish between vertices that are switched on after the * cyclic vs vertices that are switched on concurrently with the cyclic * if (subject to a suitable reach) */ bool v_peer_of_cyclic = willBeEnabledConcurrently(u, v, g); set dead; for (NFAVertex s : adjacent_vertices_range(v, g)) { - DEBUG_PRINTF(" looking at preds of %u\n", g[s].index); + DEBUG_PRINTF(" looking at preds of %zu\n", g[s].index); for (NFAEdge e : in_edges_range(s, g)) { NFAVertex p = source(e, g); if (!contains(children, p) || p == v || p == u || p == g.accept) { - DEBUG_PRINTF("%u not a cand\n", g[p].index); + DEBUG_PRINTF("%zu not a cand\n", g[p].index); continue; } if (is_any_accept(s, g) && g[p].reports != g[v].reports) { - DEBUG_PRINTF("%u bad reports\n", g[p].index); + DEBUG_PRINTF("%zu bad reports\n", g[p].index); continue; } /* the out-edges of a vertex that may be enabled on the same @@ -664,7 +664,7 @@ bool pruneUsingSuccessors(NGHolder &g, NFAVertex u, som_type som) { * may not be switched on until another byte is processed). */ if (!v_peer_of_cyclic && sometimesEnabledConcurrently(u, p, g)) { - DEBUG_PRINTF("%u can only be squashed by a proper peer\n", + DEBUG_PRINTF("%zu can only be squashed by a proper peer\n", g[p].index); continue; } @@ -672,14 +672,14 @@ bool pruneUsingSuccessors(NGHolder &g, NFAVertex u, som_type som) { if (g[p].char_reach.isSubsetOf(g[v].char_reach)) { dead.insert(e); changed = true; - DEBUG_PRINTF("removing edge %u->%u\n", g[p].index, + DEBUG_PRINTF("removing edge %zu->%zu\n", g[p].index, g[s].index); } else if (is_subset_of(succs(p, g), succs(u, g))) { if (is_match_vertex(p, g) && !is_subset_of(g[p].reports, g[v].reports)) { continue; } - DEBUG_PRINTF("updating reach on %u\n", g[p].index); + DEBUG_PRINTF("updating reach on %zu\n", g[p].index); changed |= (g[p].char_reach & g[v].char_reach).any(); g[p].char_reach &= ~g[v].char_reach; } diff --git a/src/nfagraph/ng_netflow.cpp b/src/nfagraph/ng_netflow.cpp index 9004024f..4859d864 100644 --- a/src/nfagraph/ng_netflow.cpp +++ b/src/nfagraph/ng_netflow.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -111,6 +111,7 @@ static void removeEdgesFromIndex(NGHolder &g, vector &capacityMap, u32 idx) { remove_edge_if([&](const NFAEdge &e) { return g[e].index >= idx; }, g); capacityMap.resize(idx); + renumber_edges(g); } /** A wrapper around boykov_kolmogorov_max_flow, returns the max flow and @@ -142,11 +143,10 @@ u64a getMaxFlow(NGHolder &h, const vector &capacityMap_in, vector distances(numVertices); assert(colorMap.size() == numVertices); - const NFAGraph &g = h.g; - auto v_index_map = get(&NFAGraphVertexProps::index, g); - auto e_index_map = get(&NFAGraphEdgeProps::index, g); + auto v_index_map = get(vertex_index, h); + auto e_index_map = get(edge_index, h); - u64a flow = boykov_kolmogorov_max_flow(g, + u64a flow = boykov_kolmogorov_max_flow(h, make_iterator_property_map(capacityMap.begin(), e_index_map), make_iterator_property_map(edgeResiduals.begin(), e_index_map), make_iterator_property_map(reverseEdges.begin(), e_index_map), @@ -158,7 +158,7 @@ u64a getMaxFlow(NGHolder &h, const vector &capacityMap_in, // Remove reverse edges from graph. removeEdgesFromIndex(h, capacityMap, numRealEdges); - assert(num_edges(h.g) == numRealEdges); + assert(num_edges(h) == numRealEdges); DEBUG_PRINTF("flow = %llu\n", flow); return flow; @@ -190,14 +190,14 @@ vector findMinCut(NGHolder &h, const vector &scores) { if (fromColor != boost::white_color && toColor == boost::white_color) { assert(ec <= INVALID_EDGE_CAP); - DEBUG_PRINTF("found white cut edge %u->%u cap %llu\n", + DEBUG_PRINTF("found white cut edge %zu->%zu cap %llu\n", h[from].index, h[to].index, ec); observed_white_flow += ec; picked_white.push_back(e); } if (fromColor == boost::black_color && toColor != boost::black_color) { assert(ec <= INVALID_EDGE_CAP); - DEBUG_PRINTF("found black cut edge %u->%u cap %llu\n", + DEBUG_PRINTF("found black cut edge %zu->%zu cap %llu\n", h[from].index, h[to].index, ec); observed_black_flow += ec; picked_black.push_back(e); diff --git a/src/nfagraph/ng_prefilter.cpp b/src/nfagraph/ng_prefilter.cpp index 8abc45b3..012b4e8d 100644 --- a/src/nfagraph/ng_prefilter.cpp +++ b/src/nfagraph/ng_prefilter.cpp @@ -220,13 +220,7 @@ void copyInEdges(NGHolder &g, NFAVertex from, NFAVertex to, continue; } - // Check with edge_by_target to cope with predecessors with large - // fan-out. - if (edge_by_target(u, to, g).second) { - continue; - } - - add_edge(u, to, g[e], g); + add_edge_if_not_present(u, to, g[e], g); } } @@ -361,7 +355,7 @@ void reduceRegions(NGHolder &h) { // We may have vertices that have edges to both accept and acceptEod: in // this case, we can optimize for performance by removing the acceptEod // edges. - remove_in_edge_if(h.acceptEod, SourceHasEdgeToAccept(h), h.g); + remove_in_edge_if(h.acceptEod, SourceHasEdgeToAccept(h), h); } void prefilterReductions(NGHolder &h, const CompileContext &cc) { @@ -378,13 +372,13 @@ void prefilterReductions(NGHolder &h, const CompileContext &cc) { DEBUG_PRINTF("before: graph with %zu vertices, %zu edges\n", num_vertices(h), num_edges(h)); - h.renumberVertices(); - h.renumberEdges(); + renumber_vertices(h); + renumber_edges(h); reduceRegions(h); - h.renumberVertices(); - h.renumberEdges(); + renumber_vertices(h); + renumber_edges(h); DEBUG_PRINTF("after: graph with %zu vertices, %zu edges\n", num_vertices(h), num_edges(h)); diff --git a/src/nfagraph/ng_prune.cpp b/src/nfagraph/ng_prune.cpp index 473b9586..88f1880f 100644 --- a/src/nfagraph/ng_prune.cpp +++ b/src/nfagraph/ng_prune.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -57,9 +57,8 @@ namespace ue2 { void pruneUnreachable(NGHolder &g) { deque dead; - if (!hasGreaterInDegree(1, g.acceptEod, g) && - !hasGreaterInDegree(0, g.accept, g) && - edge(g.accept, g.acceptEod, g).second) { + if (in_degree(g.acceptEod, g) == 1 && !in_degree(g.accept, g) + && edge(g.accept, g.acceptEod, g).second) { // Trivial case: there are no in-edges to our accepts (other than // accept->acceptEod), so all non-specials are unreachable. for (auto v : vertices_range(g)) { @@ -70,10 +69,10 @@ void pruneUnreachable(NGHolder &g) { } else { // Walk a reverse graph from acceptEod with Boost's depth_first_visit // call. - typedef reverse_graph RevNFAGraph; - RevNFAGraph revg(g.g); + typedef reverse_graph RevNFAGraph; + RevNFAGraph revg(g); - map colours; + map colours; depth_first_visit(revg, g.acceptEod, make_dfs_visitor(boost::null_visitor()), @@ -104,7 +103,8 @@ void pruneUnreachable(NGHolder &g) { template static -bool pruneForwardUseless(NGHolder &h, const nfag_t &g, NFAVertex s, +bool pruneForwardUseless(NGHolder &h, const nfag_t &g, + typename nfag_t::vertex_descriptor s, vector &vertexColor) { // Begin with all vertices set to white, as DFV only marks visited // vertices. @@ -122,9 +122,9 @@ bool pruneForwardUseless(NGHolder &h, const nfag_t &g, NFAVertex s, for (auto v : vertices_range(g)) { u32 idx = g[v].index; if (!is_special(v, g) && vertexColor[idx] == boost::white_color) { - DEBUG_PRINTF("vertex %u is unreachable from %u\n", + DEBUG_PRINTF("vertex %zu is unreachable from %zu\n", g[v].index, g[s].index); - dead.push_back(v); + dead.push_back(NFAVertex(v)); } } @@ -145,17 +145,17 @@ void pruneUseless(NGHolder &g, bool renumber) { assert(hasCorrectlyNumberedVertices(g)); vector vertexColor(num_vertices(g)); - bool work_done = pruneForwardUseless(g, g.g, g.start, vertexColor); - work_done |= pruneForwardUseless( - g, reverse_graph(g.g), g.acceptEod, vertexColor); + bool work_done = pruneForwardUseless(g, g, g.start, vertexColor); + work_done |= pruneForwardUseless(g, reverse_graph(g), + g.acceptEod, vertexColor); if (!work_done) { return; } if (renumber) { - g.renumberEdges(); - g.renumberVertices(); + renumber_edges(g); + renumber_vertices(g); } } @@ -172,7 +172,7 @@ void pruneEmptyVertices(NGHolder &g) { const CharReach &cr = g[v].char_reach; if (cr.none()) { - DEBUG_PRINTF("empty: %u\n", g[v].index); + DEBUG_PRINTF("empty: %zu\n", g[v].index); dead.push_back(v); } } @@ -234,7 +234,7 @@ bool isDominatedByReporter(const NGHolder &g, // Note: reporters with edges only to acceptEod are not considered to // dominate. if (edge(u, g.accept, g).second && contains(g[u].reports, report_id)) { - DEBUG_PRINTF("%u is dominated by %u, and both report %u\n", + DEBUG_PRINTF("%zu is dominated by %zu, and both report %u\n", g[v].index, g[u].index, report_id); return true; } @@ -296,7 +296,7 @@ void pruneHighlanderDominated(NGHolder &g, const ReportManager &rm) { } - sort(begin(reporters), end(reporters), make_index_ordering(g)); + sort(begin(reporters), end(reporters)); reporters.erase(unique(begin(reporters), end(reporters)), end(reporters)); DEBUG_PRINTF("%zu vertices have simple exhaustible reports\n", @@ -315,14 +315,14 @@ void pruneHighlanderDominated(NGHolder &g, const ReportManager &rm) { continue; } if (isDominatedByReporter(g, dom, v, report_id)) { - DEBUG_PRINTF("removed dominated report %u from vertex %u\n", + DEBUG_PRINTF("removed dominated report %u from vertex %zu\n", report_id, g[v].index); g[v].reports.erase(report_id); } } if (g[v].reports.empty()) { - DEBUG_PRINTF("removed edges to accepts from %u, no reports left\n", + DEBUG_PRINTF("removed edges to accepts from %zu, no reports left\n", g[v].index); remove_edge(v, g.accept, g); remove_edge(v, g.acceptEod, g); @@ -337,7 +337,7 @@ void pruneHighlanderDominated(NGHolder &g, const ReportManager &rm) { if (hasOnlySelfLoopAndExhaustibleAccepts(g, rm, v)) { remove_edge(v, v, g); modified = true; - DEBUG_PRINTF("removed self-loop on %u\n", g[v].index); + DEBUG_PRINTF("removed self-loop on %zu\n", g[v].index); } } @@ -349,7 +349,7 @@ void pruneHighlanderDominated(NGHolder &g, const ReportManager &rm) { // We may have only removed self-loops, in which case pruneUseless wouldn't // renumber, so we do edge renumbering explicitly here. - g.renumberEdges(); + renumber_edges(g); } /** Removes the given Report ID from vertices connected to accept, and then @@ -388,8 +388,8 @@ void pruneReport(NGHolder &g, ReportID report) { remove_edges(dead, g); pruneUnreachable(g); - g.renumberVertices(); - g.renumberEdges(); + renumber_vertices(g); + renumber_edges(g); } /** Removes all Report IDs bar the given one from vertices connected to accept, @@ -431,8 +431,8 @@ void pruneAllOtherReports(NGHolder &g, ReportID report) { remove_edges(dead, g); pruneUnreachable(g); - g.renumberVertices(); - g.renumberEdges(); + renumber_vertices(g); + renumber_edges(g); } } // namespace ue2 diff --git a/src/nfagraph/ng_puff.cpp b/src/nfagraph/ng_puff.cpp index 00b2e8ac..7281471f 100644 --- a/src/nfagraph/ng_puff.cpp +++ b/src/nfagraph/ng_puff.cpp @@ -59,7 +59,7 @@ static size_t countChain(const NGHolder &g, NFAVertex v) { size_t count = 0; while (v) { - DEBUG_PRINTF("counting vertex %u\n", g[v].index); + DEBUG_PRINTF("counting vertex %zu\n", g[v].index); if (is_special(v, g)) { break; } @@ -79,7 +79,7 @@ void wireNewAccepts(NGHolder &g, NFAVertex head, continue; } - DEBUG_PRINTF("adding edge: %u -> accept\n", g[u].index); + DEBUG_PRINTF("adding edge: %zu -> accept\n", g[u].index); assert(!edge(u, g.accept, g).second); assert(!edge(u, g.acceptEod, g).second); add_edge(u, g.accept, g); @@ -136,13 +136,13 @@ bool singleStart(const NGHolder &g) { for (auto v : adjacent_vertices_range(g.start, g)) { if (!is_special(v, g)) { - DEBUG_PRINTF("saw %u\n", g[v].index); + DEBUG_PRINTF("saw %zu\n", g[v].index); seen.insert(v); } } for (auto v : adjacent_vertices_range(g.startDs, g)) { if (!is_special(v, g)) { - DEBUG_PRINTF("saw %u\n", g[v].index); + DEBUG_PRINTF("saw %zu\n", g[v].index); seen.insert(v); } } @@ -158,7 +158,7 @@ bool triggerResetsPuff(const NGHolder &g, NFAVertex head) { for (auto u : inv_adjacent_vertices_range(head, g)) { if (!g[u].char_reach.isSubsetOf(puff_escapes)) { - DEBUG_PRINTF("no reset on trigger %u %u\n", g[u].index, + DEBUG_PRINTF("no reset on trigger %zu %zu\n", g[u].index, g[head].index); return false; } @@ -172,7 +172,7 @@ bool triggerResetsPuff(const NGHolder &g, NFAVertex head) { * */ static bool triggerFloodsPuff(const NGHolder &g, NFAVertex head) { - DEBUG_PRINTF("head = %u\n", g[head].index); + DEBUG_PRINTF("head = %zu\n", g[head].index); const CharReach &puff_cr = g[head].char_reach; @@ -186,14 +186,14 @@ bool triggerFloodsPuff(const NGHolder &g, NFAVertex head) { if (proper_in_degree(head, g) == 1 && puff_cr == g[getSoleSourceVertex(g, head)].char_reach) { head = getSoleSourceVertex(g, head); - DEBUG_PRINTF("temp new head = %u\n", g[head].index); + DEBUG_PRINTF("temp new head = %zu\n", g[head].index); } for (auto s : inv_adjacent_vertices_range(head, g)) { - DEBUG_PRINTF("s = %u\n", g[s].index); + DEBUG_PRINTF("s = %zu\n", g[s].index); if (!puff_cr.isSubsetOf(g[s].char_reach)) { - DEBUG_PRINTF("no flood on trigger %u %u\n", - g[s].index, g[head].index); + DEBUG_PRINTF("no flood on trigger %zu %zu\n", g[s].index, + g[head].index); return false; } @@ -268,7 +268,7 @@ void constructPuff(NGHolder &g, const NFAVertex a, const NFAVertex puffv, RoseBuild &rose, ReportManager &rm, flat_set &chain_reports, bool prefilter) { DEBUG_PRINTF("constructing Puff for report %u\n", report); - DEBUG_PRINTF("a = %u\n", g[a].index); + DEBUG_PRINTF("a = %zu\n", g[a].index); const Report &puff_report = rm.getReport(report); const bool simple_exhaust = isSimpleExhaustible(puff_report); @@ -349,7 +349,7 @@ bool doComponent(RoseBuild &rose, ReportManager &rm, NGHolder &g, NFAVertex a, } nodes.push_back(a); - DEBUG_PRINTF("vertex %u has in_degree %zu\n", g[a].index, + DEBUG_PRINTF("vertex %zu has in_degree %zu\n", g[a].index, in_degree(a, g)); a = getSoleSourceVertex(g, a); @@ -387,10 +387,10 @@ bool doComponent(RoseBuild &rose, ReportManager &rm, NGHolder &g, NFAVertex a, bool auto_restart = false; - DEBUG_PRINTF("a = %u\n", g[a].index); + DEBUG_PRINTF("a = %zu\n", g[a].index); if (nodes.size() < MIN_PUFF_LENGTH || a == g.startDs) { - DEBUG_PRINTF("bad %zu %u\n", nodes.size(), g[a].index); + DEBUG_PRINTF("bad %zu %zu\n", nodes.size(), g[a].index); if (nodes.size() < MIN_PUFF_LENGTH) { return false; } else { diff --git a/src/nfagraph/ng_redundancy.cpp b/src/nfagraph/ng_redundancy.cpp index 8fc5d5f3..4ca695d8 100644 --- a/src/nfagraph/ng_redundancy.cpp +++ b/src/nfagraph/ng_redundancy.cpp @@ -309,14 +309,10 @@ static bool hasInEdgeTops(const NGHolder &g, NFAVertex v) { bool exists; NFAEdge e; - tie(e, exists) = edge_by_target(g.start, v, g); - if (exists && !g[e].tops.empty()) { - return true; - } - return false; + tie(e, exists) = edge(g.start, v, g); + return exists && !g[e].tops.empty(); } - /** Transform (1), removal of redundant vertices. */ static bool doUselessMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap, @@ -348,8 +344,7 @@ bool doUselessMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap, } if (info.pred.empty() || info.succ.empty()) { - DEBUG_PRINTF("vertex %u has empty pred/succ list\n", - g[v].index); + DEBUG_PRINTF("vertex %zu has empty pred/succ list\n", g[v].index); assert(0); // non-special states should always have succ/pred lists continue; } @@ -448,7 +443,7 @@ bool doUselessMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap, CharReach &otherReach = g[t].char_reach; if (currReach.isSubsetOf(otherReach)) { - DEBUG_PRINTF("removing redundant vertex %u (keeping %u)\n", + DEBUG_PRINTF("removing redundant vertex %zu (keeping %zu)\n", g[v].index, g[t].index); markForRemoval(v, infoMap, removable); changed = true; @@ -539,9 +534,6 @@ bool doDiamondMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap, continue; } - /* ensure that we look for candidates in the same order */ - sort(intersection.begin(), intersection.end(), make_index_ordering(g)); - const CharReach &currReach = g[v].char_reach; const auto &currReports = g[v].reports; for (auto t : intersection) { @@ -578,8 +570,8 @@ bool doDiamondMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap, CharReach &otherReach = g[t].char_reach; otherReach |= currReach; // v can be removed - DEBUG_PRINTF("removing redundant vertex %u and merging " - "reachability with vertex %u\n", + DEBUG_PRINTF("removing redundant vertex %zu and merging " + "reachability with vertex %zu\n", g[v].index, g[t].index); markForRemoval(v, infoMap, removable); changed = true; @@ -645,14 +637,14 @@ bool reversePathReachSubset(const NFAEdge &e, const NFAVertex &dom, } NFAVertex start = source(e, g); - using RevGraph = boost::reverse_graph; + using RevGraph = boost::reverse_graph; map vertexColor; // Walk the graph backwards from v, examining each node. We fail (return // false) if we encounter a node with reach NOT a subset of domReach, and // we stop searching at dom. try { - depth_first_visit(RevGraph(g.g), start, + depth_first_visit(RevGraph(g), start, ReachSubsetVisitor(domReach), make_assoc_property_map(vertexColor), VertexIs(dom)); @@ -674,16 +666,15 @@ bool forwardPathReachSubset(const NFAEdge &e, const NFAVertex &dom, } NFAVertex start = target(e, g); - map vertexColor; + map vertexColor; // Walk the graph forward from v, examining each node. We fail (return // false) if we encounter a node with reach NOT a subset of domReach, and // we stop searching at dom. try { - depth_first_visit(g.g, start, - ReachSubsetVisitor(domReach), + depth_first_visit(g, start, ReachSubsetVisitor(domReach), make_assoc_property_map(vertexColor), - VertexIs(dom)); + VertexIs(dom)); } catch(ReachMismatch&) { return false; } @@ -775,9 +766,8 @@ void findCyclicDom(NGHolder &g, vector &cyclic, continue; } - DEBUG_PRINTF("vertex %u is dominated by directly-connected cyclic " - "vertex %u\n", g[v].index, - g[dom].index); + DEBUG_PRINTF("vertex %zu is dominated by directly-connected cyclic " + "vertex %zu\n", g[v].index, g[dom].index); // iff all paths through in-edge e of v involve vertices whose // reachability is a subset of reach(dom), we can delete edge e. @@ -787,8 +777,8 @@ void findCyclicDom(NGHolder &g, vector &cyclic, } if (reversePathReachSubset(e, dom, g)) { - DEBUG_PRINTF("edge (%u, %u) can be removed: leading paths " - "share dom reach\n", + DEBUG_PRINTF("edge (%zu, %zu) can be removed: leading " + "paths share dom reach\n", g[source(e, g)].index, g[target(e, g)].index); dead.insert(e); if (source(e, g) == v) { @@ -814,11 +804,9 @@ void findCyclicPostDom(NGHolder &g, vector &cyclic, // Path out through a post-dominator (e.g. a?.+foobar') NFAVertex postdom = postdominators[v]; - if (postdom && cyclic[g[postdom].index] - && edge(v, postdom, g).second) { - DEBUG_PRINTF("vertex %u is postdominated by directly-connected " - "cyclic vertex %u\n", g[v].index, - g[postdom].index); + if (postdom && cyclic[g[postdom].index] && edge(v, postdom, g).second) { + DEBUG_PRINTF("vertex %zu is postdominated by directly-connected " + "cyclic vertex %zu\n", g[v].index, g[postdom].index); // iff all paths through in-edge e of v involve vertices whose // reachability is a subset of reach(dom), we can delete edge e. @@ -828,8 +816,8 @@ void findCyclicPostDom(NGHolder &g, vector &cyclic, } if (forwardPathReachSubset(e, postdom, g)) { - DEBUG_PRINTF("edge (%u, %u) can be removed: trailing paths " - "share postdom reach\n", + DEBUG_PRINTF("edge (%zu, %zu) can be removed: trailing " + "paths share postdom reach\n", g[source(e, g)].index, g[target(e, g)].index); if (target(e, g) == v) { cyclic[g[v].index] = false; @@ -844,7 +832,7 @@ void findCyclicPostDom(NGHolder &g, vector &cyclic, bool removeRedundancy(NGHolder &g, som_type som) { DEBUG_PRINTF("rr som = %d\n", (int)som); - g.renumberVertices(); + renumber_vertices(g); // Cheap check: if all the non-special vertices have in-degree one and // out-degree one, there's no redundancy in this here graph and we can diff --git a/src/nfagraph/ng_region.cpp b/src/nfagraph/ng_region.cpp index c7472e0d..0ecd7bd6 100644 --- a/src/nfagraph/ng_region.cpp +++ b/src/nfagraph/ng_region.cpp @@ -71,7 +71,7 @@ using namespace std; namespace ue2 { typedef ue2::unordered_set BackEdgeSet; -typedef boost::filtered_graph> +typedef boost::filtered_graph> AcyclicGraph; namespace { @@ -92,17 +92,17 @@ void checkAndAddExitCandidate(const AcyclicGraph &g, /* find the set of vertices reachable from v which are not in r */ for (auto w : adjacent_vertices_range(v, g)) { - if (!contains(r, w)) { + if (!contains(r, NFAVertex(w))) { if (!open) { - exits->push_back(exit_info(v)); + exits->push_back(exit_info(NFAVertex(v))); open = &exits->back().open; } - open->insert(w); + open->insert(NFAVertex(w)); } } if (open) { - DEBUG_PRINTF("exit %u\n", g[v].index); + DEBUG_PRINTF("exit %zu\n", g[v].index); } } @@ -141,7 +141,7 @@ bool exitValid(UNUSED const AcyclicGraph &g, const vector &exits, return true; } if (exits.size() == 1 && open_jumps.size() == 1) { - DEBUG_PRINTF("oj %u, e %u\n", g[*open_jumps.begin()].index, + DEBUG_PRINTF("oj %zu, e %zu\n", g[*open_jumps.begin()].index, g[exits[0].exit].index); if (*open_jumps.begin() == exits[0].exit) { return true; @@ -190,7 +190,7 @@ void buildInitialCandidate(const AcyclicGraph &g, if (exits->empty()) { DEBUG_PRINTF("odd\n"); candidate->clear(); - DEBUG_PRINTF("adding %u to initial\n", g[*it].index); + DEBUG_PRINTF("adding %zu to initial\n", g[*it].index); candidate->insert(*it); open_jumps->erase(*it); checkAndAddExitCandidate(g, *candidate, *it, exits); @@ -202,7 +202,7 @@ void buildInitialCandidate(const AcyclicGraph &g, candidate->clear(); for (; it != ite; ++it) { - DEBUG_PRINTF("adding %u to initial\n", g[*it].index); + DEBUG_PRINTF("adding %zu to initial\n", g[*it].index); candidate->insert(*it); if (contains(enters, *it)) { break; @@ -231,10 +231,10 @@ void findDagLeaders(const NGHolder &h, const AcyclicGraph &g, vector exits; ue2::unordered_set candidate; ue2::unordered_set open_jumps; - DEBUG_PRINTF("adding %u to current\n", g[*t_it].index); + DEBUG_PRINTF("adding %zu to current\n", g[*t_it].index); assert(t_it != topo.rend()); candidate.insert(*t_it++); - DEBUG_PRINTF("adding %u to current\n", g[*t_it].index); + DEBUG_PRINTF("adding %zu to current\n", g[*t_it].index); assert(t_it != topo.rend()); candidate.insert(*t_it++); findExits(g, candidate, &exits); @@ -257,7 +257,7 @@ void findDagLeaders(const NGHolder &h, const AcyclicGraph &g, &open_jumps); } else { NFAVertex curr = *t_it; - DEBUG_PRINTF("adding %u to current\n", g[curr].index); + DEBUG_PRINTF("adding %zu to current\n", g[curr].index); candidate.insert(curr); open_jumps.erase(curr); refineExits(g, candidate, *t_it, &exits); @@ -284,7 +284,7 @@ void mergeUnderBackEdges(const NGHolder &g, const vector &topo, continue; } - DEBUG_PRINTF("merging v = %u(%u), u = %u(%u)\n", g[v].index, rv, + DEBUG_PRINTF("merging v = %zu(%u), u = %zu(%u)\n", g[v].index, rv, g[u].index, ru); assert(rv < ru); @@ -350,8 +350,8 @@ void liftSinks(const AcyclicGraph &acyclic_g, vector &topoOrder) { } if (isLeafNode(v, acyclic_g)) { - DEBUG_PRINTF("sink found %u\n", acyclic_g[v].index); - sinks.insert(v); + DEBUG_PRINTF("sink found %zu\n", acyclic_g[v].index); + sinks.insert(NFAVertex(v)); } } @@ -365,18 +365,18 @@ void liftSinks(const AcyclicGraph &acyclic_g, vector &topoOrder) { DEBUG_PRINTF("look\n"); changed = false; for (auto v : vertices_range(acyclic_g)) { - if (is_special(v, acyclic_g) || contains(sinks, v)) { + if (is_special(v, acyclic_g) || contains(sinks, NFAVertex(v))) { continue; } for (auto w : adjacent_vertices_range(v, acyclic_g)) { - if (!contains(sinks, w)) { + if (!contains(sinks, NFAVertex(w))) { goto next; } } - DEBUG_PRINTF("sink found %u\n", acyclic_g[v].index); - sinks.insert(v); + DEBUG_PRINTF("sink found %zu\n", acyclic_g[v].index); + sinks.insert(NFAVertex(v)); changed = true; next:; } @@ -387,10 +387,10 @@ void liftSinks(const AcyclicGraph &acyclic_g, vector &topoOrder) { continue; } NFAVertex s = *ri; - DEBUG_PRINTF("handling sink %u\n", acyclic_g[s].index); + DEBUG_PRINTF("handling sink %zu\n", acyclic_g[s].index); ue2::unordered_set parents; for (const auto &e : in_edges_range(s, acyclic_g)) { - parents.insert(source(e, acyclic_g)); + parents.insert(NFAVertex(source(e, acyclic_g))); } /* vertex has no children not reachable on a back edge, bubble the @@ -417,10 +417,9 @@ vector buildTopoOrder(const NGHolder &w, vector &colours) { vector topoOrder; - topological_sort( - acyclic_g, back_inserter(topoOrder), - color_map(make_iterator_property_map( - colours.begin(), get(&NFAGraphVertexProps::index, acyclic_g)))); + topological_sort(acyclic_g, back_inserter(topoOrder), + color_map(make_iterator_property_map(colours.begin(), + get(vertex_index, acyclic_g)))); reorderSpecials(w, acyclic_g, topoOrder); @@ -432,7 +431,7 @@ vector buildTopoOrder(const NGHolder &w, DEBUG_PRINTF("TOPO ORDER\n"); for (auto ri = topoOrder.rbegin(); ri != topoOrder.rend(); ++ri) { - DEBUG_PRINTF("[%u]\n", acyclic_g[*ri].index); + DEBUG_PRINTF("[%zu]\n", acyclic_g[*ri].index); } DEBUG_PRINTF("----------\n"); @@ -448,14 +447,14 @@ ue2::unordered_map assignRegions(const NGHolder &g) { // Build an acyclic graph for this NGHolder. BackEdgeSet deadEdges; - depth_first_search( - g.g, visitor(BackEdges(deadEdges)) - .root_vertex(g.start) - .color_map(make_iterator_property_map( - colours.begin(), get(&NFAGraphVertexProps::index, g.g)))); + depth_first_search(g, + visitor(BackEdges(deadEdges)) + .root_vertex(g.start) + .color_map(make_iterator_property_map(colours.begin(), + get(vertex_index, g)))); auto af = make_bad_edge_filter(&deadEdges); - AcyclicGraph acyclic_g(g.g, af); + AcyclicGraph acyclic_g(g, af); // Build a (reverse) topological ordering. vector topoOrder = buildTopoOrder(g, acyclic_g, colours); diff --git a/src/nfagraph/ng_region.h b/src/nfagraph/ng_region.h index 464a6838..a56933dc 100644 --- a/src/nfagraph/ng_region.h +++ b/src/nfagraph/ng_region.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -181,7 +181,7 @@ bool isOptionalRegion(const Graph &g, NFAVertex v, const ue2::unordered_map ®ion_map) { assert(isRegionEntry(g, v, region_map)); - DEBUG_PRINTF("check if r%u is optional (inspecting v%u)\n", + DEBUG_PRINTF("check if r%u is optional (inspecting v%zu)\n", region_map.at(v), g[v].index); // Region zero is never optional. @@ -198,12 +198,12 @@ bool isOptionalRegion(const Graph &g, NFAVertex v, if (inSameRegion(g, v, u, region_map)) { continue; } - DEBUG_PRINTF(" searching from u=%u\n", g[u].index); + DEBUG_PRINTF(" searching from u=%zu\n", g[u].index); assert(inEarlierRegion(g, v, u, region_map)); for (auto w : adjacent_vertices_range(u, g)) { - DEBUG_PRINTF(" searching to w=%u\n", g[w].index); + DEBUG_PRINTF(" searching to w=%zu\n", g[w].index); if (inLaterRegion(g, v, w, region_map)) { return true; } diff --git a/src/nfagraph/ng_region_redundancy.cpp b/src/nfagraph/ng_region_redundancy.cpp index 5cd266dc..264e4312 100644 --- a/src/nfagraph/ng_region_redundancy.cpp +++ b/src/nfagraph/ng_region_redundancy.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -90,7 +90,7 @@ void processCyclicStateForward(NGHolder &h, NFAVertex cyc, CharReach cr = h[cyc].char_reach; auto reports = h[cyc].reports; - DEBUG_PRINTF("going forward from %u/%u\n", h[cyc].index, + DEBUG_PRINTF("going forward from %zu/%u\n", h[cyc].index, region); map::const_iterator it; @@ -98,7 +98,7 @@ void processCyclicStateForward(NGHolder &h, NFAVertex cyc, NFAVertex v = it->second.entry; const CharReach ®ion_cr = it->second.cr; assert(isRegionEntry(h, v, region_map) && !is_special(v, h)); - DEBUG_PRINTF("checking %u\n", h[v].index); + DEBUG_PRINTF("checking %zu\n", h[v].index); if (!region_cr.isSubsetOf(cr)) { DEBUG_PRINTF("doesn't cover the reach of region %u\n", region); @@ -107,8 +107,8 @@ void processCyclicStateForward(NGHolder &h, NFAVertex cyc, if (isOptionalRegion(h, v, region_map) && !regionHasUnexpectedAccept(h, region, reports, region_map)) { - DEBUG_PRINTF("cyclic state %u leads to optional region leader %u\n", - h[cyc].index, h[v].index); + DEBUG_PRINTF("cyclic state %zu leads to optional region leader" + " %zu\n", h[cyc].index, h[v].index); deadRegions.insert(region); } else if (isSingletonRegion(h, v, region_map)) { /* we can use this region as straw and suck in optional regions on @@ -136,14 +136,14 @@ void processCyclicStateReverse(NGHolder &h, NFAVertex cyc, CharReach cr = h[cyc].char_reach; auto reports = h[cyc].reports; - DEBUG_PRINTF("going back from %u/%u\n", h[cyc].index, region); + DEBUG_PRINTF("going back from %zu/%u\n", h[cyc].index, region); map::const_iterator it; while ((it = info.find(--region)) != info.end()) { NFAVertex v = it->second.entry; const CharReach ®ion_cr = it->second.cr; assert(isRegionEntry(h, v, region_map) && !is_special(v, h)); - DEBUG_PRINTF("checking %u\n", h[v].index); + DEBUG_PRINTF("checking %zu\n", h[v].index); if (!region_cr.isSubsetOf(cr)) { DEBUG_PRINTF("doesn't cover the reach of region %u\n", region); @@ -152,7 +152,7 @@ void processCyclicStateReverse(NGHolder &h, NFAVertex cyc, if (isOptionalRegion(h, v, region_map) && !regionHasUnexpectedAccept(h, region, reports, region_map)) { - DEBUG_PRINTF("cyclic state %u trails optional region leader %u\n", + DEBUG_PRINTF("cyclic state %zu trails optional region leader %zu\n", h[cyc].index, h[v].index); deadRegions.insert(region); } else if (isSingletonRegion(h, v, region_map)) { diff --git a/src/nfagraph/ng_repeat.cpp b/src/nfagraph/ng_repeat.cpp index 6eb2a9d7..0aa6dc4b 100644 --- a/src/nfagraph/ng_repeat.cpp +++ b/src/nfagraph/ng_repeat.cpp @@ -61,6 +61,8 @@ #include using namespace std; +using boost::depth_first_search; +using boost::depth_first_visit; namespace ue2 { @@ -99,7 +101,7 @@ struct ReachFilter { const Graph *g = nullptr; }; -typedef boost::filtered_graph > RepeatGraph; +typedef boost::filtered_graph> RepeatGraph; struct ReachSubgraph { vector vertices; @@ -126,9 +128,11 @@ void findInitDepths(const NGHolder &g, } } -template static -void buildTopoOrder(const Graph &g, vector &topoOrder) { +vector buildTopoOrder(const RepeatGraph &g) { + /* Note: RepeatGraph is a filtered version of NGHolder and still has + * NFAVertex as its vertex descriptor */ + typedef ue2::unordered_set EdgeSet; EdgeSet deadEdges; @@ -140,10 +144,13 @@ void buildTopoOrder(const Graph &g, vector &topoOrder) { color_map(make_assoc_property_map(colours))); auto acyclic_g = make_filtered_graph(g, make_bad_edge_filter(&deadEdges)); + vector topoOrder; topological_sort(acyclic_g, back_inserter(topoOrder), color_map(make_assoc_property_map(colours))); reverse(topoOrder.begin(), topoOrder.end()); + + return topoOrder; } static @@ -171,7 +178,7 @@ bool roguePredecessor(const NGHolder &g, NFAVertex v, continue; } if (!contains(pred, u)) { - DEBUG_PRINTF("%u is a rogue pred\n", g[u].index); + DEBUG_PRINTF("%zu is a rogue pred\n", g[u].index); return true; } @@ -197,7 +204,7 @@ bool rogueSuccessor(const NGHolder &g, NFAVertex v, } if (!contains(succ, w)) { - DEBUG_PRINTF("%u is a rogue succ\n", g[w].index); + DEBUG_PRINTF("%zu is a rogue succ\n", g[w].index); return true; } @@ -223,8 +230,8 @@ bool hasDifferentTops(const NGHolder &g, const vector &verts) { if (u != g.start && u != g.startDs) { continue; // Only edges from starts have valid top properties. } - DEBUG_PRINTF("edge (%u,%u) with %zu tops\n", g[u].index, g[v].index, - g[e].tops.size()); + DEBUG_PRINTF("edge (%zu,%zu) with %zu tops\n", g[u].index, + g[v].index, g[e].tops.size()); if (!tops) { tops = &g[e].tops; } else if (g[e].tops != *tops) { @@ -243,14 +250,14 @@ bool vertexIsBad(const NGHolder &g, NFAVertex v, const ue2::unordered_set &pred, const ue2::unordered_set &succ, const flat_set &reports) { - DEBUG_PRINTF("check vertex %u\n", g[v].index); + DEBUG_PRINTF("check vertex %zu\n", g[v].index); // We must drop any vertex that is the target of a back-edge within // our subgraph. The tail set contains all vertices that are after v in a // topo ordering. for (auto u : inv_adjacent_vertices_range(v, g)) { if (contains(tail, u)) { - DEBUG_PRINTF("back-edge (%u,%u) in subgraph found\n", + DEBUG_PRINTF("back-edge (%zu,%zu) in subgraph found\n", g[u].index, g[v].index); return true; } @@ -260,18 +267,18 @@ bool vertexIsBad(const NGHolder &g, NFAVertex v, // edges from *all* the vertices in pred and no other external entries. // Similarly for exits. if (roguePredecessor(g, v, involved, pred)) { - DEBUG_PRINTF("preds for %u not well-formed\n", g[v].index); + DEBUG_PRINTF("preds for %zu not well-formed\n", g[v].index); return true; } if (rogueSuccessor(g, v, involved, succ)) { - DEBUG_PRINTF("succs for %u not well-formed\n", g[v].index); + DEBUG_PRINTF("succs for %zu not well-formed\n", g[v].index); return true; } // All reporting vertices should have the same reports. if (is_match_vertex(v, g) && reports != g[v].reports) { - DEBUG_PRINTF("report mismatch to %u\n", g[v].index); + DEBUG_PRINTF("report mismatch to %zu\n", g[v].index); return true; } @@ -291,8 +298,7 @@ void splitSubgraph(const NGHolder &g, const deque &verts, NFAUndirectedGraph ug; ue2::unordered_map old2new; - ue2::unordered_map newIdx2old; - createUnGraph(verts_g.g, true, true, ug, old2new, newIdx2old); + createUnGraph(verts_g, true, true, ug, old2new); ue2::unordered_map repeatMap; @@ -517,7 +523,7 @@ bool processSubgraph(const NGHolder &g, ReachSubgraph &rsi, if (u == first) { continue; // no self-loops } - DEBUG_PRINTF("pred vertex %u\n", g[u].index); + DEBUG_PRINTF("pred vertex %zu\n", g[u].index); dist[u].insert(0); } @@ -619,7 +625,7 @@ void buildTugTrigger(NGHolder &g, NFAVertex cyclic, NFAVertex v, vector &tugs) { if (allPredsInSubgraph(v, g, involved)) { // We can transform this vertex into a tug trigger in-place. - DEBUG_PRINTF("all preds in subgraph, vertex %u becomes tug\n", + DEBUG_PRINTF("all preds in subgraph, vertex %zu becomes tug\n", g[v].index); add_edge(cyclic, v, g); tugs.push_back(v); @@ -631,7 +637,7 @@ void buildTugTrigger(NGHolder &g, NFAVertex cyclic, NFAVertex v, NFAVertex t = clone_vertex(g, v); depths[t] = depths[v]; - DEBUG_PRINTF("there are other paths, cloned tug %u from vertex %u\n", + DEBUG_PRINTF("there are other paths, cloned tug %zu from vertex %zu\n", g[t].index, g[v].index); tugs.push_back(t); @@ -648,7 +654,7 @@ NFAVertex createCyclic(NGHolder &g, ReachSubgraph &rsi) { NFAVertex cyclic = clone_vertex(g, last); add_edge(cyclic, cyclic, g); - DEBUG_PRINTF("created cyclic vertex %u\n", g[cyclic].index); + DEBUG_PRINTF("created cyclic vertex %zu\n", g[cyclic].index); return cyclic; } @@ -659,7 +665,7 @@ NFAVertex createPos(NGHolder &g, ReachSubgraph &rsi) { g[pos].char_reach = g[first].char_reach; - DEBUG_PRINTF("created pos vertex %u\n", g[pos].index); + DEBUG_PRINTF("created pos vertex %zu\n", g[pos].index); return pos; } @@ -705,7 +711,7 @@ void unpeelNearEnd(NGHolder &g, ReachSubgraph &rsi, NFAVertex d = clone_vertex(g, last); depths[d] = depths[last]; - DEBUG_PRINTF("created vertex %u\n", g[d].index); + DEBUG_PRINTF("created vertex %zu\n", g[d].index); for (auto v : *succs) { add_edge(d, v, g); @@ -946,7 +952,7 @@ bool peelSubgraph(const NGHolder &g, const Grey &grey, ReachSubgraph &rsi, zap = it; break; } else { - DEBUG_PRINTF("%u is involved in another repeat\n", g[*it].index); + DEBUG_PRINTF("%zu is involved in another repeat\n", g[*it].index); } } DEBUG_PRINTF("peeling %zu vertices from front\n", @@ -963,7 +969,7 @@ bool peelSubgraph(const NGHolder &g, const Grey &grey, ReachSubgraph &rsi, zap = it.base(); // Note: erases everything after it. break; } else { - DEBUG_PRINTF("%u is involved in another repeat\n", g[*it].index); + DEBUG_PRINTF("%zu is involved in another repeat\n", g[*it].index); } } DEBUG_PRINTF("peeling %zu vertices from back\n", @@ -974,7 +980,7 @@ bool peelSubgraph(const NGHolder &g, const Grey &grey, ReachSubgraph &rsi, // no-no. for (auto v : rsi.vertices) { if (contains(created, v)) { - DEBUG_PRINTF("vertex %u is in another repeat\n", g[v].index); + DEBUG_PRINTF("vertex %zu is in another repeat\n", g[v].index); return false; } } @@ -997,7 +1003,7 @@ void peelStartDotStar(const NGHolder &g, NFAVertex first = rsi.vertices.front(); if (depths.at(first).fromStartDotStar.min == depth(1)) { - DEBUG_PRINTF("peeling start front vertex %u\n", g[first].index); + DEBUG_PRINTF("peeling start front vertex %zu\n", g[first].index); rsi.vertices.erase(rsi.vertices.begin()); reprocessSubgraph(g, grey, rsi); } @@ -1006,8 +1012,8 @@ void peelStartDotStar(const NGHolder &g, static void buildReachSubgraphs(const NGHolder &g, vector &rs, const u32 minNumVertices) { - const ReachFilter fil(&g.g); - const RepeatGraph rg(g.g, fil); + const ReachFilter fil(&g); + const RepeatGraph rg(g, fil); if (!isCompBigEnough(rg, minNumVertices)) { DEBUG_PRINTF("component not big enough, bailing\n"); @@ -1015,19 +1021,17 @@ void buildReachSubgraphs(const NGHolder &g, vector &rs, } NFAUndirectedGraph ug; - ue2::unordered_map old2new; - ue2::unordered_map newIdx2old; - createUnGraph(rg, true, true, ug, old2new, newIdx2old); + unordered_map old2new; + createUnGraph(rg, true, true, ug, old2new); - ue2::unordered_map repeatMap; + unordered_map repeatMap; unsigned int num; num = connected_components(ug, make_assoc_property_map(repeatMap)); DEBUG_PRINTF("found %u connected repeat components\n", num); // Now, we build a set of topo-ordered ReachSubgraphs. - vector topoOrder; - buildTopoOrder(rg, topoOrder); + vector topoOrder = buildTopoOrder(rg); rs.resize(num); @@ -1078,7 +1082,7 @@ bool entered_at_fixed_offset(NFAVertex v, const NGHolder &g, if (is_triggered(g) && !contains(reached_by_fixed_tops, v)) { /* can't do this for infix/suffixes unless we know trigger literals * can only occur at one offset */ - DEBUG_PRINTF("bad top(s) for %u\n", g[v].index); + DEBUG_PRINTF("bad top(s) for %zu\n", g[v].index); return false; } @@ -1098,8 +1102,8 @@ bool entered_at_fixed_offset(NFAVertex v, const NGHolder &g, for (auto u : inv_adjacent_vertices_range(v, g)) { const depth &u_max_depth = depths.at(u).fromStart.max; - DEBUG_PRINTF("pred %u max depth %s from start\n", - g[u].index, u_max_depth.str().c_str()); + DEBUG_PRINTF("pred %zu max depth %s from start\n", g[u].index, + u_max_depth.str().c_str()); if (u_max_depth != first - depth(1)) { return false; } @@ -1122,7 +1126,7 @@ NFAVertex buildTriggerStates(NGHolder &g, const vector &trigger, u = v; } - DEBUG_PRINTF("trigger len=%zu has sink %u\n", trigger.size(), g[u].index); + DEBUG_PRINTF("trigger len=%zu has sink %zu\n", trigger.size(), g[u].index); return u; } @@ -1252,7 +1256,7 @@ void buildRepeatGraph(NGHolder &rg, if (is_triggered(rg)) { // Add vertices for all our triggers addTriggers(rg, triggers); - rg.renumberVertices(); + renumber_vertices(rg); // We don't know anything about how often this graph is triggered, so we // make the start vertex cyclic for the purposes of this analysis ONLY. @@ -1274,30 +1278,26 @@ void buildInputGraph(NGHolder &lhs, ue2::unordered_map &lhs_map, const NGHolder &g, const NFAVertex first, const map>> &triggers) { - DEBUG_PRINTF("building lhs with first=%u\n", g[first].index); + DEBUG_PRINTF("building lhs with first=%zu\n", g[first].index); cloneHolder(lhs, g, &lhs_map); assert(g.kind == lhs.kind); addTriggers(lhs, triggers); - lhs.renumberVertices(); + renumber_vertices(lhs); // Replace each back-edge (u,v) with an edge (startDs,v), which will // generate entries at at least the rate of the loop created by that // back-edge. set dead; BackEdges > backEdgeVisitor(dead); - depth_first_search( - lhs.g, visitor(backEdgeVisitor) - .root_vertex(lhs.start) - .vertex_index_map(get(&NFAGraphVertexProps::index, lhs.g))); + depth_first_search(lhs, visitor(backEdgeVisitor).root_vertex(lhs.start)); for (const auto &e : dead) { const NFAVertex u = source(e, lhs), v = target(e, lhs); if (u == v) { continue; // Self-loops are OK. } - DEBUG_PRINTF("replacing back-edge (%u,%u) with edge (startDs,%u)\n", - lhs[u].index, lhs[v].index, - lhs[v].index); + DEBUG_PRINTF("replacing back-edge (%zu,%zu) with edge (startDs,%zu)\n", + lhs[u].index, lhs[v].index, lhs[v].index); add_edge_if_not_present(lhs.startDs, v, lhs); remove_edge(e, lhs); @@ -1384,13 +1384,13 @@ bool hasSoleEntry(const NGHolder &g, const ReachSubgraph &rsi, for (const auto &v : rsi.vertices) { assert(!is_special(v, g)); // no specials in repeats assert(contains(rg_map, v)); - DEBUG_PRINTF("rg vertex %u in repeat\n", rg[rg_map.at(v)].index); + DEBUG_PRINTF("rg vertex %zu in repeat\n", rg[rg_map.at(v)].index); region_map.emplace(rg_map.at(v), repeat_region); } for (const auto &v : vertices_range(rg)) { if (!contains(region_map, v)) { - DEBUG_PRINTF("rg vertex %u in lhs (trigger)\n", rg[v].index); + DEBUG_PRINTF("rg vertex %zu in lhs (trigger)\n", rg[v].index); region_map.emplace(v, lhs_region); } } @@ -1432,7 +1432,7 @@ struct StrawWalker { if (next == v) { // Ignore self loop. ++ai; if (ai == ae) { - return NFAGraph::null_vertex(); + return NGHolder::null_vertex(); } next = *ai; } @@ -1447,7 +1447,7 @@ struct StrawWalker { succs.erase(v); for (tie(ai, ae) = adjacent_vertices(v, g); ai != ae; ++ai) { next = *ai; - DEBUG_PRINTF("checking %u\n", g[next].index); + DEBUG_PRINTF("checking %zu\n", g[next].index); if (next == v) { continue; } @@ -1468,32 +1468,31 @@ struct StrawWalker { return next; } DEBUG_PRINTF("bailing\n"); - return NFAGraph::null_vertex(); + return NGHolder::null_vertex(); } return next; } NFAVertex walk(NFAVertex v, vector &straw) const { - DEBUG_PRINTF("walk from %u\n", g[v].index); + DEBUG_PRINTF("walk from %zu\n", g[v].index); ue2::unordered_set visited; straw.clear(); while (!is_special(v, g)) { - DEBUG_PRINTF("checking %u\n", g[v].index); + DEBUG_PRINTF("checking %zu\n", g[v].index); NFAVertex next = step(v); - if (next == NFAGraph::null_vertex()) { + if (next == NGHolder::null_vertex()) { break; } if (!visited.insert(next).second) { - DEBUG_PRINTF("already visited %u, bailing\n", - g[next].index); + DEBUG_PRINTF("already visited %zu, bailing\n", g[next].index); break; /* don't want to get stuck in any complicated loops */ } const CharReach &reach_v = g[v].char_reach; const CharReach &reach_next = g[next].char_reach; if (!reach_v.isSubsetOf(reach_next)) { - DEBUG_PRINTF("%u's reach is not a superset of %u's\n", + DEBUG_PRINTF("%zu's reach is not a superset of %zu's\n", g[next].index, g[v].index); break; } @@ -1501,7 +1500,7 @@ struct StrawWalker { // If this is cyclic with the right reach, we're done. Note that // startDs fulfils this requirement. if (hasSelfLoop(next, g) && !isBoundedRepeatCyclic(next)) { - DEBUG_PRINTF("found cyclic %u\n", g[next].index); + DEBUG_PRINTF("found cyclic %zu\n", g[next].index); return next; } @@ -1510,7 +1509,7 @@ struct StrawWalker { } straw.clear(); - return NFAGraph::null_vertex(); + return NGHolder::null_vertex(); } private: @@ -1525,8 +1524,8 @@ static NFAVertex walkStrawToCyclicRev(const NGHolder &g, NFAVertex v, const vector &all_repeats, vector &straw) { - typedef boost::reverse_graph RevGraph; - const RevGraph revg(g.g); + typedef boost::reverse_graph RevGraph; + const RevGraph revg(g); auto cyclic = StrawWalker(g, revg, all_repeats).walk(v, straw); reverse(begin(straw), end(straw)); // path comes from cyclic @@ -1537,7 +1536,7 @@ static NFAVertex walkStrawToCyclicFwd(const NGHolder &g, NFAVertex v, const vector &all_repeats, vector &straw) { - return StrawWalker(g, g.g, all_repeats).walk(v, straw); + return StrawWalker(g, g, all_repeats).walk(v, straw); } /** True if entries to this subgraph must pass through a cyclic state with @@ -1553,7 +1552,7 @@ bool hasCyclicSupersetEntryPath(const NGHolder &g, const ReachSubgraph &rsi, // until we encounter our cyclic, all of which must have superset reach. vector straw; return walkStrawToCyclicRev(g, rsi.vertices.front(), all_repeats, straw) != - NFAGraph::null_vertex(); + NGHolder::null_vertex(); } static @@ -1561,7 +1560,7 @@ bool hasCyclicSupersetExitPath(const NGHolder &g, const ReachSubgraph &rsi, const vector &all_repeats) { vector straw; return walkStrawToCyclicFwd(g, rsi.vertices.back(), all_repeats, straw) != - NFAGraph::null_vertex(); + NGHolder::null_vertex(); } static @@ -1844,7 +1843,7 @@ void buildFeeder(NGHolder &g, const BoundedRepeatData &rd, add_edge(u, feeder, g); } - DEBUG_PRINTF("added feeder %u\n", g[feeder].index); + DEBUG_PRINTF("added feeder %zu\n", g[feeder].index); } else { // No neg trigger means feeder is empty, and unnecessary. assert(g[rd.pos_trigger].char_reach.all()); @@ -1892,13 +1891,13 @@ bool improveLeadingRepeat(NGHolder &g, BoundedRepeatData &rd, // This transformation is only safe if the straw path from startDs that // we've discovered can *only* lead to this repeat, since we're going to // remove the self-loop on startDs. - if (hasGreaterOutDegree(2, g.startDs, g)) { + if (proper_out_degree(g.startDs, g) > 1) { DEBUG_PRINTF("startDs has other successors\n"); return false; } for (const auto &v : straw) { if (proper_out_degree(v, g) != 1) { - DEBUG_PRINTF("branch between startDs and repeat, from vertex %u\n", + DEBUG_PRINTF("branch between startDs and repeat, from vertex %zu\n", g[v].index); return false; } @@ -2068,8 +2067,8 @@ public: const depth &our_depth_in) : top_depths(top_depths_in), our_depth(our_depth_in) {} - void discover_vertex(NFAVertex v, UNUSED const NFAGraph &g) { - DEBUG_PRINTF("discovered %u (depth %s)\n", g[v].index, + void discover_vertex(NFAVertex v, UNUSED const NGHolder &g) { + DEBUG_PRINTF("discovered %zu (depth %s)\n", g[v].index, our_depth.str().c_str()); auto it = top_depths.find(v); @@ -2120,22 +2119,21 @@ void populateFixedTopInfo(const map &fixed_depth_tops, } } - DEBUG_PRINTF("scanning from %u depth=%s\n", g[v].index, + DEBUG_PRINTF("scanning from %zu depth=%s\n", g[v].index, td.str().c_str()); /* for each vertex reachable from v update its map to reflect that it is * reachable from a top of depth td. */ - depth_first_visit( - g.g, v, pfti_visitor(top_depths, td), - make_iterator_property_map(colours.begin(), - get(&NFAGraphVertexProps::index, g.g))); + depth_first_visit(g, v, pfti_visitor(top_depths, td), + make_iterator_property_map(colours.begin(), + get(vertex_index, g))); } for (const auto &v_depth : top_depths) { const NFAVertex v = v_depth.first; const depth &d = v_depth.second; if (d.is_finite()) { - DEBUG_PRINTF("%u reached by fixed tops at depth %s\n", + DEBUG_PRINTF("%zu reached by fixed tops at depth %s\n", g[v].index, d.str().c_str()); reached_by_fixed_tops->insert(v); } @@ -2152,19 +2150,16 @@ bool hasOverlappingRepeats(UNUSED const NGHolder &g, for (const auto &br : repeats) { if (contains(involved, br.cyclic)) { - DEBUG_PRINTF("already seen cyclic %u\n", - g[br.cyclic].index); + DEBUG_PRINTF("already seen cyclic %zu\n", g[br.cyclic].index); return true; } if (contains(involved, br.pos_trigger)) { - DEBUG_PRINTF("already seen pos %u\n", - g[br.pos_trigger].index); + DEBUG_PRINTF("already seen pos %zu\n", g[br.pos_trigger].index); return true; } for (auto v : br.tug_triggers) { if (contains(involved, v)) { - DEBUG_PRINTF("already seen tug %u\n", - g[v].index); + DEBUG_PRINTF("already seen tug %zu\n", g[v].index); return true; } } @@ -2310,7 +2305,7 @@ void analyseRepeats(NGHolder &g, const ReportManager *rm, // Go to town on the remaining acceptable subgraphs. ue2::unordered_set created; for (auto &rsi : rs) { - DEBUG_PRINTF("subgraph (beginning vertex %u) is a {%s,%s} repeat\n", + DEBUG_PRINTF("subgraph (beginning vertex %zu) is a {%s,%s} repeat\n", g[rsi.vertices.front()].index, rsi.repeatMin.str().c_str(), rsi.repeatMax.str().c_str()); @@ -2343,7 +2338,7 @@ void analyseRepeats(NGHolder &g, const ReportManager *rm, // Some of our analyses require correctly numbered vertices, so we // renumber after changes. - g.renumberVertices(); + renumber_vertices(g); } bool modified_start_ds = false; @@ -2384,8 +2379,8 @@ void analyseRepeats(NGHolder &g, const ReportManager *rm, // We have modified the graph, so we need to ensure that our edges // and vertices are correctly numbered. - g.renumberVertices(); - g.renumberEdges(); + renumber_vertices(g); + renumber_edges(g); // Remove stray report IDs. clearReports(g); } @@ -2424,14 +2419,14 @@ bool isPureRepeat(const NGHolder &g, PureRepeat &repeat) { // Must be start anchored. assert(edge(g.startDs, g.startDs, g).second); - if (hasGreaterOutDegree(1, g.startDs, g)) { + if (out_degree(g.startDs, g) > 1) { DEBUG_PRINTF("Unanchored\n"); return false; } // Must not be EOD-anchored. assert(edge(g.accept, g.acceptEod, g).second); - if (hasGreaterInDegree(1, g.acceptEod, g)) { + if (in_degree(g.acceptEod, g) > 1) { DEBUG_PRINTF("EOD anchored\n"); return false; } diff --git a/src/nfagraph/ng_restructuring.cpp b/src/nfagraph/ng_restructuring.cpp index 3b30a689..7bb3e991 100644 --- a/src/nfagraph/ng_restructuring.cpp +++ b/src/nfagraph/ng_restructuring.cpp @@ -52,11 +52,7 @@ namespace ue2 { static void wireStartToTops(NGHolder &g, const flat_set &tops, vector &tempEdges) { - // Construct edges in vertex index order, for determinism. - vector ordered_tops(begin(tops), end(tops)); - sort(begin(ordered_tops), end(ordered_tops), make_index_ordering(g)); - - for (NFAVertex v : ordered_tops) { + for (NFAVertex v : tops) { assert(!isLeafNode(v, g)); const NFAEdge &e = add_edge(g.start, v, g).first; @@ -102,7 +98,7 @@ void getStateOrdering(NGHolder &g, const flat_set &tops, vector tempEdges; wireStartToTops(g, tops, tempEdges); - renumberGraphVertices(g); + renumber_vertices(g); vector temp = getTopoOrdering(g); @@ -144,7 +140,7 @@ getStateIndices(const NGHolder &h, const vector &ordering) { u32 stateNum = 0; for (auto v : ordering) { - DEBUG_PRINTF("assigning state num %u to vertex %u\n", stateNum, + DEBUG_PRINTF("assigning state num %u to vertex %zu\n", stateNum, h[v].index); states[v] = stateNum++; } @@ -187,7 +183,7 @@ void optimiseTightLoops(const NGHolder &g, vector &ordering) { continue; } - DEBUG_PRINTF("moving vertex %u next to %u\n", g[v].index, g[u].index); + DEBUG_PRINTF("moving vertex %zu next to %zu\n", g[v].index, g[u].index); ordering.erase(v_it); ordering.insert(++u_it, v); diff --git a/src/nfagraph/ng_rose.cpp b/src/nfagraph/ng_rose.cpp index 46f180a8..b3649ce0 100644 --- a/src/nfagraph/ng_rose.cpp +++ b/src/nfagraph/ng_rose.cpp @@ -538,7 +538,7 @@ void getRegionRoseLiterals(const NGHolder &g, DEBUG_PRINTF("inspecting region %u\n", region); set s; for (auto v : vv) { - DEBUG_PRINTF(" exit vertex: %u\n", g[v].index); + DEBUG_PRINTF(" exit vertex: %zu\n", g[v].index); /* Note: RHS can not be depended on to take all subsequent revisits * to this vertex */ set ss = getLiteralSet(g, v, false); @@ -573,8 +573,7 @@ void gatherBackEdges(const NGHolder &g, ue2::unordered_map> *out) { set backEdges; BackEdges> be(backEdges); - depth_first_search(g.g, visitor(be).root_vertex(g.start).vertex_index_map( - get(&NFAGraphVertexProps::index, g.g))); + depth_first_search(g, visitor(be).root_vertex(g.start)); for (const auto &e : backEdges) { (*out)[source(e, g)].push_back(target(e, g)); @@ -757,7 +756,7 @@ unique_ptr LitCollection::pickNext() { unique_ptr rv = move(lits.back()); lits.pop_back(); poisonCandidates(*rv); - DEBUG_PRINTF("best is '%s' %u a%d t%d\n", + DEBUG_PRINTF("best is '%s' %zu a%d t%d\n", dumpString(*(rv->lit.begin())).c_str(), g[rv->vv.front()].index, (int)createsAnchoredLHS(g, rv->vv, depths, grey), @@ -863,8 +862,6 @@ u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, assert(delay <= lit.length()); DEBUG_PRINTF("managed delay %u (of max %u)\n", delay, max_delay); - // For determinism, we make sure that we create these edges from vertices - // in index-sorted order. set pred; for (auto v : curr) { insert(&pred, inv_adjacent_vertices_range(v, g)); @@ -873,10 +870,7 @@ u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, clear_in_edges(g.accept, g); clearReports(g); - vector verts(pred.begin(), pred.end()); - sort(verts.begin(), verts.end(), VertexIndexOrdering(g)); - - for (auto v : verts) { + for (auto v : pred) { NFAEdge e = add_edge(v, g.accept, g).first; g[v].reports.insert(0); if (is_triggered(g) && v == g.start) { @@ -921,8 +915,8 @@ void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, g[u].reports.insert(0); } - g.renumberVertices(); - g.renumberEdges(); + renumber_vertices(g); + renumber_edges(g); assert(allMatchStatesHaveReports(g)); assert(isCorrectlyTopped(g)); } @@ -1152,7 +1146,7 @@ void deanchorIfNeeded(NGHolder &g, bool *orig_anch) { succ_g.erase(g.startDs); for (auto v : adjacent_vertices_range(g.start, g)) { - DEBUG_PRINTF("inspecting cand %u || =%zu\n", g[v].index, + DEBUG_PRINTF("inspecting cand %zu || =%zu\n", g[v].index, g[v].char_reach.size()); if (v == g.startDs || !g[v].char_reach.all()) { @@ -1170,7 +1164,7 @@ void deanchorIfNeeded(NGHolder &g, bool *orig_anch) { } clear_vertex(v, g); remove_vertex(v, g); - g.renumberVertices(); + renumber_vertices(g); return; } @@ -1701,7 +1695,7 @@ void splitEdgesByCut(RoseInGraph &ig, const vector &to_cut, /* TODO need to update v_mapping (if we were doing more cuts) */ } - DEBUG_PRINTF("splitting on pivot %u\n", h[pivot].index); + DEBUG_PRINTF("splitting on pivot %zu\n", h[pivot].index); ue2::unordered_map temp_map; shared_ptr new_lhs = make_shared(); splitLHS(h, pivot, new_lhs.get(), &temp_map); @@ -1774,8 +1768,8 @@ bool doNetflowCut(RoseInGraph &ig, const vector &to_cut, return false; } - h.renumberVertices(); - h.renumberEdges(); + renumber_vertices(h); + renumber_edges(h); /* Step 1: Get scores for all edges */ vector scores = scoreEdges(h); /* scores by edge_index */ /* Step 2: poison scores for edges covered by successor literal */ @@ -2573,7 +2567,7 @@ bool followedByStar(const vector &vv, const NGHolder &g) { static bool isEodPrefixCandidate(const NGHolder &g) { - if (hasGreaterInDegree(0, g.accept, g)) { + if (in_degree(g.accept, g)) { DEBUG_PRINTF("graph isn't eod anchored\n"); return false; } @@ -2644,7 +2638,7 @@ void processEodPrefixes(RoseInGraph &g) { } // TODO: handle cases with multiple out-edges. - if (hasGreaterOutDegree(1, source(e, g), g)) { + if (out_degree(source(e, g), g) > 1) { continue; } @@ -2671,7 +2665,7 @@ void processEodPrefixes(RoseInGraph &g) { } for (auto v : accepts) { - if (!hasGreaterInDegree(0, v, g)) { + if (!in_degree(v, g)) { remove_vertex(v, g); } } @@ -2813,6 +2807,7 @@ unique_ptr buildRose(const NGHolder &h, bool desperation, dumpPreRoseGraph(ig, cc.grey); + renumber_vertices(ig); calcVertexOffsets(ig); return igp; } @@ -2829,6 +2824,7 @@ void desperationImprove(RoseInGraph &ig, const CompileContext &cc) { handleLongMixedSensitivityLiterals(ig); dedupe(ig); pruneUseless(ig); + renumber_vertices(ig); calcVertexOffsets(ig); } @@ -2839,8 +2835,7 @@ bool splitOffRose(RoseBuild &rose, const NGHolder &h, bool prefilter, } // We should have at least one edge into accept or acceptEod! - assert(hasGreaterInDegree(0, h.accept, h) || - hasGreaterInDegree(1, h.acceptEod, h)); + assert(in_degree(h.accept, h) || in_degree(h.acceptEod, h) > 1); unique_ptr igp = buildRose(h, false, cc); if (igp && rose.addRose(*igp, prefilter)) { @@ -2932,6 +2927,7 @@ bool finalChanceRose(RoseBuild &rose, const NGHolder &h, bool prefilter, add_edge(v, a, RoseInEdgeProps(rhs, 0U), ig); } + renumber_vertices(ig); calcVertexOffsets(ig); return rose.addRose(ig, prefilter, true /* final chance */); @@ -2944,8 +2940,7 @@ bool checkRose(const ReportManager &rm, const NGHolder &h, bool prefilter, } // We should have at least one edge into accept or acceptEod! - assert(hasGreaterInDegree(0, h.accept, h) || - hasGreaterInDegree(1, h.acceptEod, h)); + assert(in_degree(h.accept, h) || in_degree(h.acceptEod, h) > 1); unique_ptr igp; diff --git a/src/nfagraph/ng_small_literal_set.cpp b/src/nfagraph/ng_small_literal_set.cpp index b5867bb9..1d7be65b 100644 --- a/src/nfagraph/ng_small_literal_set.cpp +++ b/src/nfagraph/ng_small_literal_set.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -125,7 +125,7 @@ bool findLiterals(const NGHolder &g, set &out = built[g[v].index]; read_count[g[v].index] = out_degree(v, g); - DEBUG_PRINTF("setting read_count to %zu for %u\n", + DEBUG_PRINTF("setting read_count to %zu for %zu\n", read_count[g[v].index], g[v].index); assert(out.empty()); @@ -154,7 +154,7 @@ bool findLiterals(const NGHolder &g, } set &in = built[g[u].index]; - DEBUG_PRINTF("getting from %u (%zu reads to go)\n", + DEBUG_PRINTF("getting from %zu (%zu reads to go)\n", g[u].index, read_count[g[u].index]); assert(!in.empty()); assert(read_count[g[u].index]); @@ -188,7 +188,7 @@ bool findLiterals(const NGHolder &g, read_count[g[u].index]--; if (!read_count[g[u].index]) { - DEBUG_PRINTF("clearing %u as finished reading\n", g[u].index); + DEBUG_PRINTF("clearing %zu as finished reading\n", g[u].index); in.clear(); } } diff --git a/src/nfagraph/ng_som.cpp b/src/nfagraph/ng_som.cpp index 09687c4f..862f5b53 100644 --- a/src/nfagraph/ng_som.cpp +++ b/src/nfagraph/ng_som.cpp @@ -110,7 +110,7 @@ bool regionCanEstablishSom(const NGHolder &g, DEBUG_PRINTF("region %u\n", region); for (UNUSED auto v : r_exits) { - DEBUG_PRINTF(" exit %u\n", g[v].index); + DEBUG_PRINTF(" exit %zu\n", g[v].index); } /* simple if each region exit is at fixed distance from SOM. Note SOM does @@ -119,12 +119,12 @@ bool regionCanEstablishSom(const NGHolder &g, assert(regions.at(v) == region); const DepthMinMax &d = depths.at(g[v].index); if (d.min != d.max) { - DEBUG_PRINTF("failing %u as %s != %s\n", g[v].index, + DEBUG_PRINTF("failing %zu as %s != %s\n", g[v].index, d.min.str().c_str(), d.max.str().c_str()); return false; } } - DEBUG_PRINTF("region %u/%u is good\n", regions.at(r_exits[0]), + DEBUG_PRINTF("region %u/%zu is good\n", regions.at(r_exits[0]), g[r_exits[0]].index); return true; @@ -178,10 +178,7 @@ void buildRegionMapping(const NGHolder &g, set be; BackEdges > backEdgeVisitor(be); - depth_first_search( - g.g, visitor(backEdgeVisitor) - .root_vertex(g.start) - .vertex_index_map(get(&NFAGraphVertexProps::index, g.g))); + boost::depth_first_search(g, visitor(backEdgeVisitor).root_vertex(g.start)); for (const auto &e : be) { NFAVertex u = source(e, g); @@ -208,17 +205,17 @@ void buildRegionMapping(const NGHolder &g, r_i.optional ? " (optional)" : ""); DEBUG_PRINTF(" enters:"); for (u32 i = 0; i < r_i.enters.size(); i++) { - printf(" %u", g[r_i.enters[i]].index); + printf(" %zu", g[r_i.enters[i]].index); } printf("\n"); DEBUG_PRINTF(" exits:"); for (u32 i = 0; i < r_i.exits.size(); i++) { - printf(" %u", g[r_i.exits[i]].index); + printf(" %zu", g[r_i.exits[i]].index); } printf("\n"); DEBUG_PRINTF(" all:"); for (u32 i = 0; i < r_i.full.size(); i++) { - printf(" %u", g[r_i.full[i]].index); + printf(" %zu", g[r_i.full[i]].index); } printf("\n"); } @@ -235,8 +232,7 @@ bool validateXSL(const NGHolder &g, u32 v_region = regions.at(v); if (!is_special(v, g) && v_region > region && (escapes & g[v].char_reach).any()) { - DEBUG_PRINTF("problem with escapes for %u\n", - g[v].index); + DEBUG_PRINTF("problem with escapes for %zu\n", g[v].index); first_bad_region = MIN(first_bad_region, v_region); } } @@ -402,7 +398,7 @@ makePrefix(const NGHolder &g, const ue2::unordered_map ®ions, vector to_clear; assert(contains(lhs_map, curr_exits.front())); NFAVertex p_u = lhs_map[curr_exits.front()]; - DEBUG_PRINTF("p_u: %u\n", prefix[p_u].index); + DEBUG_PRINTF("p_u: %zu\n", prefix[p_u].index); for (auto p_v : adjacent_vertices_range(p_u, prefix)) { auto v = rev_map.at(p_v); if (p_v == prefix.accept || regions.at(v) < dead_region) { @@ -412,7 +408,7 @@ makePrefix(const NGHolder &g, const ue2::unordered_map ®ions, } for (auto v : to_clear) { - DEBUG_PRINTF("clearing in_edges on %u\n", prefix[v].index); + DEBUG_PRINTF("clearing in_edges on %zu\n", prefix[v].index); clear_in_edges(v, prefix); } @@ -575,7 +571,7 @@ void replaceExternalReportsWithSomRep(ReportManager &rm, NGHolder &g, ir.somDistance = param; ReportID rep = rm.getInternalId(ir); - DEBUG_PRINTF("vertex %u, replacing report %u with %u (type %u)\n", + DEBUG_PRINTF("vertex %zu, replacing report %u with %u (type %u)\n", g[v].index, report_id, rep, ir_type); r_new.insert(rep); } @@ -713,7 +709,7 @@ void fillHolderForLockCheck(NGHolder *out, const NGHolder &g, /* add all vertices in region, create mapping */ for (auto v : jt->second.full) { - DEBUG_PRINTF("adding v %u to midfix\n", g[v].index); + DEBUG_PRINTF("adding v %zu to midfix\n", g[v].index); if (contains(v_map, v)) { continue; } @@ -758,7 +754,7 @@ void fillHolderForLockCheck(NGHolder *out, const NGHolder &g, } assert(in_degree(midfix.accept, midfix)); - midfix.renumberVertices(); + renumber_vertices(midfix); } static @@ -785,7 +781,7 @@ void fillRoughMidfix(NGHolder *out, const NGHolder &g, /* add all vertices in region, create mapping */ for (auto v : jt->second.full) { - DEBUG_PRINTF("adding v %u to midfix\n", g[v].index); + DEBUG_PRINTF("adding v %zu to midfix\n", g[v].index); NFAVertex vnew = add_vertex(g[v], midfix); v_map[v] = vnew; } @@ -825,7 +821,7 @@ void fillRoughMidfix(NGHolder *out, const NGHolder &g, do { for (auto v : jt->second.exits) { - DEBUG_PRINTF("adding v %u to midfix\n", g[v].index); + DEBUG_PRINTF("adding v %zu to midfix\n", g[v].index); NFAVertex vnew = add_vertex(g[v], midfix); v_map[v] = vnew; @@ -1012,8 +1008,7 @@ bool addPlan(vector &plan, u32 parent) { // Fetches all preds of {accept, acceptEod} for this graph. static void addReporterVertices(const NGHolder &g, vector &reporters) { - // Order reporter vertices by index for determinism. - set > tmp(g); + set tmp; insert(&tmp, inv_adjacent_vertices(g.accept, g)); insert(&tmp, inv_adjacent_vertices(g.acceptEod, g)); tmp.erase(g.accept); @@ -1021,7 +1016,7 @@ void addReporterVertices(const NGHolder &g, vector &reporters) { #ifdef DEBUG DEBUG_PRINTF("add reporters:"); for (UNUSED auto v : tmp) { - printf(" %u", g[v].index); + printf(" %zu", g[v].index); } printf("\n"); #endif @@ -1035,7 +1030,7 @@ void addReporterVertices(const region_info &r, const NGHolder &g, vector &reporters) { for (auto v : r.exits) { if (edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second) { - DEBUG_PRINTF("add reporter %u\n", g[v].index); + DEBUG_PRINTF("add reporter %zu\n", g[v].index); reporters.push_back(v); } } @@ -1048,7 +1043,7 @@ void addMappedReporterVertices(const region_info &r, const NGHolder &g, vector &reporters) { for (auto v : r.exits) { if (edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second) { - DEBUG_PRINTF("adding v=%u\n", g[v].index); + DEBUG_PRINTF("adding v=%zu\n", g[v].index); ue2::unordered_map::const_iterator it = mapping.find(v); assert(it != mapping.end()); @@ -1105,7 +1100,7 @@ void expandGraph(NGHolder &g, ue2::unordered_map ®ions, } for (auto enter : enters) { - DEBUG_PRINTF("processing enter %u\n", g[enter].index); + DEBUG_PRINTF("processing enter %zu\n", g[enter].index); map orig_to_copy; // Make a copy of all of the tail vertices, storing region info along @@ -1155,7 +1150,7 @@ void expandGraph(NGHolder &g, ue2::unordered_map ®ions, [&](const NFAEdge &e) { NFAVertex u = source(e, g); return regions.at(u) < split_region; - }, g.g); + }, g); } new_enters.push_back(orig_to_copy[enter]); @@ -1327,7 +1322,7 @@ bool doTreePlanning(NGHolder &g, dumpHolder(g, g_regions, 14, "som_expandedtree", grey); for (auto v : enters) { - DEBUG_PRINTF("enter %u\n", g[v].index); + DEBUG_PRINTF("enter %zu\n", g[v].index); // For this entry vertex, construct a version of the graph without the // other entries in this region (g_path), and calculate its depths and @@ -1562,12 +1557,12 @@ void dumpSomPlan(UNUSED const NGHolder &g, UNUSED const som_plan &p, p.is_reset, p.parent); printf(" reporters:"); for (auto v : p.reporters) { - printf(" %u", g[v].index); + printf(" %zu", g[v].index); } printf("\n"); printf(" reporters_in:"); for (auto v : p.reporters_in) { - printf(" %u", g[v].index); + printf(" %zu", g[v].index); } printf("\n"); #endif @@ -1633,7 +1628,7 @@ void implementSomPlan(NG &ng, const NGWrapper &w, u32 comp_id, NGHolder &g, /* create prefix to set the som_loc */ if (!plan.front().no_implement) { - plan.front().prefix->renumberVertices(); + renumber_vertices(*plan.front().prefix); assert(plan.front().prefix->kind == NFA_OUTFIX); if (!ng.addHolder(*plan.front().prefix)) { throw CompileError(w.expressionIndex, "Pattern is too large."); @@ -1745,7 +1740,7 @@ aligned_unique_ptr makeBareSomRevNfa(const NGHolder &g, setZeroReports(g_rev); // Prep for actual construction. - g_rev.renumberVertices(); + renumber_vertices(g_rev); g_rev.kind = NFA_REV_PREFIX; reduceGraphEquivalences(g_rev, cc); removeRedundancy(g_rev, SOM_NONE); @@ -1785,7 +1780,7 @@ bool makeSomRevNfa(vector &som_nfas, const NGHolder &g, return true; } - g2.renumberVertices(); // for findMinWidth, findMaxWidth. + renumber_vertices(g2); // for findMinWidth, findMaxWidth. aligned_unique_ptr nfa = makeBareSomRevNfa(g2, cc); if (!nfa) { @@ -2220,7 +2215,7 @@ bool leadingLiterals(const NGHolder &g, set *lits, for (const auto &m : curr) { const NFAVertex u = m.first; const vector &base = m.second; - DEBUG_PRINTF("expanding from %u\n", g[u].index); + DEBUG_PRINTF("expanding from %zu\n", g[u].index); for (auto v : adjacent_vertices_range(u, g)) { if (v == g.startDs) { continue; @@ -2233,8 +2228,7 @@ bool leadingLiterals(const NGHolder &g, set *lits, DEBUG_PRINTF("match\n"); goto skip_to_next_terminal; } - if (g[v].char_reach.count() - > 2 * MAX_LEADING_LITERALS) { + if (g[v].char_reach.count() > 2 * MAX_LEADING_LITERALS) { DEBUG_PRINTF("wide\n"); goto skip_to_next_terminal; } @@ -2250,8 +2244,8 @@ bool leadingLiterals(const NGHolder &g, set *lits, CharReach cr = g[v].char_reach; vector &out = next[v]; - DEBUG_PRINTF("expanding to %u (|| = %zu)\n", - g[v].index, cr.count()); + DEBUG_PRINTF("expanding to %zu (|| = %zu)\n", g[v].index, + cr.count()); for (size_t c = cr.find_first(); c != CharReach::npos; c = cr.find_next(c)) { bool nocase = ourisalpha(c) && cr.test(mytoupper(c)) @@ -2327,7 +2321,7 @@ bool splitOffLeadingLiterals(const NGHolder &g, set *lit_out, set adj_term1; insert(&adj_term1, adjacent_vertices(*terms.begin(), g)); for (auto v : terms) { - DEBUG_PRINTF("term %u\n", g[v].index); + DEBUG_PRINTF("term %zu\n", g[v].index); set temp; insert(&temp, adjacent_vertices(v, g)); if (temp != adj_term1) { @@ -2354,7 +2348,7 @@ void findBestLiteral(const NGHolder &g, buildRegionMapping(g, regions, info, false); ue2_literal best; - NFAVertex best_v = nullptr; + NFAVertex best_v = NGHolder::null_vertex(); map::const_iterator lit = info.begin(); while (1) { @@ -2390,7 +2384,7 @@ bool splitOffBestLiteral(const NGHolder &g, const ue2::unordered_map ®ions, ue2_literal *lit_out, NGHolder *lhs, NGHolder *rhs, const CompileContext &cc) { - NFAVertex v = nullptr; + NFAVertex v = NGHolder::null_vertex(); findBestLiteral(g, regions, lit_out, &v, cc); if (lit_out->empty()) { @@ -2404,7 +2398,7 @@ bool splitOffBestLiteral(const NGHolder &g, splitGraph(g, v, lhs, &lhs_map, rhs, &rhs_map); - DEBUG_PRINTF("v = %u\n", g[v].index); + DEBUG_PRINTF("v = %zu\n", g[v].index); return true; } @@ -2624,7 +2618,7 @@ bool doHaigLitHaigSom(NG &ng, NGHolder &g, } } else { DEBUG_PRINTF("has start->accept edge\n"); - if (hasGreaterInDegree(1, g.acceptEod, g)) { + if (in_degree(g.acceptEod, g) > 1) { DEBUG_PRINTF("also has a path to EOD\n"); return false; } @@ -2825,7 +2819,7 @@ map::const_iterator tryForLaterRevNfaCut(const NGHolder &g, reverseHolder(*prefix, g_rev); anchorStarts(g_rev); - g_rev.renumberVertices(); + renumber_vertices(g_rev); g_rev.kind = NFA_REV_PREFIX; reduceGraphEquivalences(g_rev, cc); removeRedundancy(g_rev, SOM_NONE); @@ -2869,7 +2863,7 @@ unique_ptr makePrefixForChain(NGHolder &g, } depths->clear(); /* renumbering invalidates depths */ - prefix->renumberVertices(); + renumber_vertices(*prefix); DEBUG_PRINTF("done\n"); return prefix; @@ -2885,8 +2879,7 @@ sombe_rv doSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id, // Special case: if g is completely anchored or begins with a dot-star, we // know that we have an absolute SOM of zero all the time. - assert(edge(g.startDs, g.startDs, g).second); - if (!hasGreaterOutDegree(1, g.startDs, g) || beginsWithDotStar(g)) { + if (!proper_out_degree(g.startDs, g) || beginsWithDotStar(g)) { makeSomAbsReports(rm, g, g.accept); makeSomAbsReports(rm, g, g.acceptEod); return SOMBE_HANDLED_INTERNAL; @@ -3003,7 +2996,7 @@ sombe_rv doSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id, u32 rev_comp_id = doSomRevNfaPrefix(ng, w, *prefix, cc); updatePrefixReportsRevNFA(rm, *prefix, rev_comp_id); } - prefix->renumberVertices(); + renumber_vertices(*prefix); if (!ng.addHolder(*prefix)) { DEBUG_PRINTF("failed to add holder\n"); clear_graph(g); diff --git a/src/nfagraph/ng_som_add_redundancy.cpp b/src/nfagraph/ng_som_add_redundancy.cpp index 924cfad1..33544ec1 100644 --- a/src/nfagraph/ng_som_add_redundancy.cpp +++ b/src/nfagraph/ng_som_add_redundancy.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -155,13 +155,13 @@ bool addSomRedundancy(NGHolder &g, vector &depths) { if (is_special(v, g)) { continue; } - if (!hasGreaterInDegree(0, v, g)) { + if (!in_degree(v, g)) { continue; // unreachable, probably killed } const DepthMinMax &d = getDepth(v, g, depths); - DEBUG_PRINTF("vertex %u has depths %s\n", g[v].index, + DEBUG_PRINTF("vertex %zu has depths %s\n", g[v].index, d.str().c_str()); if (d.min == d.max) { diff --git a/src/nfagraph/ng_som_util.cpp b/src/nfagraph/ng_som_util.cpp index 676fb523..c4337341 100644 --- a/src/nfagraph/ng_som_util.cpp +++ b/src/nfagraph/ng_som_util.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -76,7 +76,7 @@ vector getDistancesFromSOM(const NGHolder &g_orig) { clear_in_edges(v, g); } - //dumpGraph("som_depth.dot", g.g); + //dumpGraph("som_depth.dot", g); vector temp_depths; // numbered by vertex index in g calcDepthsFrom(g, g.start, temp_depths); @@ -143,7 +143,7 @@ bool firstMatchIsFirst(const NGHolder &p) { for (auto v : vertices_range(p)) { assert(!is_virtual_start(v, p)); if (!is_special(v, p)) { - DEBUG_PRINTF("turning on %u\n", p[v].index); + DEBUG_PRINTF("turning on %zu\n", p[v].index); states.insert(v); } } @@ -154,9 +154,9 @@ bool firstMatchIsFirst(const NGHolder &p) { for (auto v : states) { /* need to check if this vertex may represent an infix match - ie * it does not have an edge to accept. */ - DEBUG_PRINTF("check %u\n", p[v].index); + DEBUG_PRINTF("check %zu\n", p[v].index); if (!edge(v, p.accept, p).second) { - DEBUG_PRINTF("fail %u\n", p[v].index); + DEBUG_PRINTF("fail %zu\n", p[v].index); return false; } } @@ -186,14 +186,11 @@ bool somMayGoBackwards(NFAVertex u, const NGHolder &g, return cache.smgb[u]; } - DEBUG_PRINTF("checking if som can go backwards on %u\n", g[u].index); + DEBUG_PRINTF("checking if som can go backwards on %zu\n", g[u].index); set be; BackEdges> backEdgeVisitor(be); - depth_first_search( - g.g, visitor(backEdgeVisitor) - .root_vertex(g.start) - .vertex_index_map(get(&NFAGraphVertexProps::index, g.g))); + boost::depth_first_search(g, visitor(backEdgeVisitor).root_vertex(g.start)); bool rv; if (0) { @@ -210,8 +207,7 @@ bool somMayGoBackwards(NFAVertex u, const NGHolder &g, NFAVertex s = source(e, g); NFAVertex t = target(e, g); /* only need to worry about big cycles including/before u */ - DEBUG_PRINTF("back edge %u %u\n", g[s].index, - g[t].index); + DEBUG_PRINTF("back edge %zu %zu\n", g[s].index, g[t].index); if (s != t && region_map.at(s) <= u_region) { DEBUG_PRINTF("eek big cycle\n"); rv = true; /* big cycle -> eek */ @@ -268,13 +264,13 @@ bool somMayGoBackwards(NFAVertex u, const NGHolder &g, pruneUseless(c_g); be.clear(); - depth_first_search(c_g.g, visitor(backEdgeVisitor).root_vertex(c_g.start). - vertex_index_map(get(&NFAGraphVertexProps::index, c_g.g))); + boost::depth_first_search(c_g, visitor(backEdgeVisitor) + .root_vertex(c_g.start)); for (const auto &e : be) { NFAVertex s = source(e, c_g); NFAVertex t = target(e, c_g); - DEBUG_PRINTF("back edge %u %u\n", c_g[s].index, c_g[t].index); + DEBUG_PRINTF("back edge %zu %zu\n", c_g[s].index, c_g[t].index); if (s != t) { assert(0); DEBUG_PRINTF("eek big cycle\n"); @@ -326,7 +322,7 @@ bool sentClearsTail(const NGHolder &g, } for (UNUSED auto v : states) { - DEBUG_PRINTF("start state: %u\n", g[v].index); + DEBUG_PRINTF("start state: %zu\n", g[v].index); } /* run the prefix the main graph */ @@ -338,7 +334,7 @@ bool sentClearsTail(const NGHolder &g, continue; /* not in tail */ } - DEBUG_PRINTF("v %u is still on\n", g[v].index); + DEBUG_PRINTF("v %zu is still on\n", g[v].index); assert(v != g.accept && v != g.acceptEod); /* no cr */ assert(contains(region_map, v)); diff --git a/src/nfagraph/ng_split.cpp b/src/nfagraph/ng_split.cpp index 4576a498..ce267d0f 100644 --- a/src/nfagraph/ng_split.cpp +++ b/src/nfagraph/ng_split.cpp @@ -87,7 +87,7 @@ void splitLHS(const NGHolder &base, const vector &pivots, clearAccepts(*lhs); for (auto pivot : pivots) { - DEBUG_PRINTF("pivot is %u lv %zu lm %zu\n", base[pivot].index, + DEBUG_PRINTF("pivot is %zu lv %zu lm %zu\n", base[pivot].index, num_vertices(*lhs), lhs_map->size()); assert(contains(*lhs_map, pivot)); @@ -191,8 +191,8 @@ void findCommonSuccessors(const NGHolder &g, const vector &pivots, vector &succ) { assert(!pivots.empty()); - // Note: for determinism, we must sort our successor sets by vertex_index. - set > adj(g), adj_temp(g); + set adj; + set adj_temp; insert(&adj, adjacent_vertices(pivots.at(0), g)); diff --git a/src/nfagraph/ng_squash.cpp b/src/nfagraph/ng_squash.cpp index 21703f8b..ebec3a4a 100644 --- a/src/nfagraph/ng_squash.cpp +++ b/src/nfagraph/ng_squash.cpp @@ -134,8 +134,7 @@ void buildPDomTree(const NGHolder &g, PostDomTree &tree) { } NFAVertex pdom = postdominators[v]; if (pdom) { - DEBUG_PRINTF("vertex %u -> %u\n", g[pdom].index, - g[v].index); + DEBUG_PRINTF("vertex %zu -> %zu\n", g[pdom].index, g[v].index); tree[pdom].insert(v); } } @@ -153,8 +152,7 @@ void buildSquashMask(NFAStateSet &mask, const NGHolder &g, NFAVertex v, som_type som, const vector &som_depths, const ue2::unordered_map ®ion_map, smgb_cache &cache) { - DEBUG_PRINTF("build base squash mask for vertex %u)\n", - g[v].index); + DEBUG_PRINTF("build base squash mask for vertex %zu)\n", g[v].index); vector q; @@ -301,7 +299,7 @@ void findDerivedSquashers(const NGHolder &g, const vector &vByIndex, } NFAStateSet u_squash(init.size()); - u32 u_index = g[u].index; + size_t u_index = g[u].index; buildSquashMask(u_squash, g, u, g[u].char_reach, init, vByIndex, pdom_tree, som, som_depths, region_map, cache); @@ -309,7 +307,7 @@ void findDerivedSquashers(const NGHolder &g, const vector &vByIndex, u_squash.set(u_index); /* never clear ourselves */ if ((~u_squash).any()) { // i.e. some bits unset in mask - DEBUG_PRINTF("%u is an upstream squasher of %u\n", u_index, + DEBUG_PRINTF("%zu is an upstream squasher of %zu\n", u_index, g[v].index); (*squash)[u] = u_squash; remaining.push_back(u); @@ -521,8 +519,7 @@ void filterSquashers(const NGHolder &g, if (!contains(squash, v)) { continue; } - DEBUG_PRINTF("looking at squash set for vertex %u\n", - g[v].index); + DEBUG_PRINTF("looking at squash set for vertex %zu\n", g[v].index); if (!hasSelfLoop(v, g)) { DEBUG_PRINTF("acyclic\n"); @@ -600,7 +597,7 @@ void removeEdgesToAccept(NGHolder &g, NFAVertex v) { NFAVertex u = source(e, g); const auto &r = g[u].reports; if (!r.empty() && is_subset_of(r, reports)) { - DEBUG_PRINTF("vertex %u\n", g[u].index); + DEBUG_PRINTF("vertex %zu\n", g[u].index); dead.insert(e); } } @@ -609,7 +606,7 @@ void removeEdgesToAccept(NGHolder &g, NFAVertex v) { NFAVertex u = source(e, g); const auto &r = g[u].reports; if (!r.empty() && is_subset_of(r, reports)) { - DEBUG_PRINTF("vertex %u\n", g[u].index); + DEBUG_PRINTF("vertex %zu\n", g[u].index); dead.insert(e); } } @@ -620,7 +617,7 @@ void removeEdgesToAccept(NGHolder &g, NFAVertex v) { static vector findUnreachable(const NGHolder &g) { - const boost::reverse_graph revg(g.g); + const boost::reverse_graph revg(g); ue2::unordered_map colours; colours.reserve(num_vertices(g)); @@ -633,7 +630,7 @@ vector findUnreachable(const NGHolder &g) { vector unreach; for (auto v : vertices_range(revg)) { if (!contains(colours, v)) { - unreach.push_back(v); + unreach.push_back(NFAVertex(v)); } } return unreach; @@ -656,7 +653,7 @@ findHighlanderSquashers(const NGHolder &g, const ReportManager &rm) { const u32 numStates = num_vertices(g); for (auto v : verts) { - DEBUG_PRINTF("vertex %u with %zu reports\n", g[v].index, + DEBUG_PRINTF("vertex %zu with %zu reports\n", g[v].index, g[v].reports.size()); // Find the set of vertices that lead to v or any other reporter with a @@ -683,7 +680,7 @@ findHighlanderSquashers(const NGHolder &g, const ReportManager &rm) { NFAStateSet &mask = squash[v]; for (auto uv : unreach) { - DEBUG_PRINTF("squashes index %u\n", h[uv].index); + DEBUG_PRINTF("squashes index %zu\n", h[uv].index); mask.reset(h[uv].index); } } diff --git a/src/nfagraph/ng_uncalc_components.cpp b/src/nfagraph/ng_uncalc_components.cpp index 3326d6f4..baab3b0f 100644 --- a/src/nfagraph/ng_uncalc_components.cpp +++ b/src/nfagraph/ng_uncalc_components.cpp @@ -259,7 +259,7 @@ void mergeNfaComponent(NGHolder &dest, const NGHolder &vic, size_t common_len) { vmap[vic.startDs] = dest.startDs; vmap[vic.accept] = dest.accept; vmap[vic.acceptEod] = dest.acceptEod; - vmap[nullptr] = nullptr; + vmap[NGHolder::null_vertex()] = NGHolder::null_vertex(); // For vertices in the common len, add to vmap and merge in the reports, if // any. @@ -312,7 +312,7 @@ void mergeNfaComponent(NGHolder &dest, const NGHolder &vic, size_t common_len) { in_common_region = true; } - DEBUG_PRINTF("adding idx=%u (state %u) -> idx=%u (state %u)%s\n", + DEBUG_PRINTF("adding idx=%zu (state %u) -> idx=%zu (state %u)%s\n", dest[u].index, dest_info.get(u), dest[v].index, dest_info.get(v), in_common_region ? " [common]" : ""); @@ -338,8 +338,8 @@ void mergeNfaComponent(NGHolder &dest, const NGHolder &vic, size_t common_len) { add_edge(u, v, vic[e], dest); } - dest.renumberEdges(); - dest.renumberVertices(); + renumber_edges(dest); + renumber_vertices(dest); } namespace { diff --git a/src/nfagraph/ng_uncalc_components.h b/src/nfagraph/ng_uncalc_components.h index ddab8825..d7883578 100644 --- a/src/nfagraph/ng_uncalc_components.h +++ b/src/nfagraph/ng_uncalc_components.h @@ -36,14 +36,13 @@ #include #include -#include "nfagraph/ng_graph.h" +#include "nfagraph/ng_holder.h" #include "util/ue2_containers.h" namespace ue2 { struct CompileContext; struct Grey; -class NGHolder; class ReportManager; /** diff --git a/src/nfagraph/ng_undirected.h b/src/nfagraph/ng_undirected.h index 12632e05..7df6c7dc 100644 --- a/src/nfagraph/ng_undirected.h +++ b/src/nfagraph/ng_undirected.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -39,6 +39,10 @@ #include "util/graph_range.h" #include "util/ue2_containers.h" +#include + +#include + namespace ue2 { /** @@ -51,7 +55,7 @@ namespace ue2 { typedef boost::adjacency_list > + boost::property > NFAUndirectedGraph; typedef NFAUndirectedGraph::vertex_descriptor NFAUndirectedVertex; @@ -60,16 +64,18 @@ typedef NFAUndirectedGraph::vertex_descriptor NFAUndirectedVertex; * Make a copy of an NFAGraph with undirected edges, optionally without start * vertices. Mappings from the original graph to the new one are provided. * - * Note that new vertex indices are assigned contiguously in \a vertices(g) order. + * Note that new vertex indices are assigned contiguously in \a vertices(g) + * order. */ template void createUnGraph(const GraphT &g, - bool excludeStarts, - bool excludeAccepts, - NFAUndirectedGraph &ug, - ue2::unordered_map &old2new, - ue2::unordered_map &newIdx2old) { - u32 idx = 0; + bool excludeStarts, + bool excludeAccepts, + NFAUndirectedGraph &ug, + ue2::unordered_map &old2new) { + size_t idx = 0; + typedef typename GraphT::vertex_descriptor VertexT; for (auto v : ue2::vertices_range(g)) { // skip all accept nodes @@ -84,13 +90,12 @@ void createUnGraph(const GraphT &g, NFAUndirectedVertex nuv = boost::add_vertex(ug); old2new[v] = nuv; - newIdx2old[idx] = v; boost::put(boost::vertex_index, ug, nuv, idx++); } for (const auto &e : ue2::edges_range(g)) { - NFAVertex src = source(e, g); - NFAVertex targ = target(e, g); + VertexT src = source(e, g); + VertexT targ = target(e, g); if ((excludeAccepts && is_any_accept(src, g)) || (excludeStarts && is_any_start(src, g))) { diff --git a/src/nfagraph/ng_utf8.cpp b/src/nfagraph/ng_utf8.cpp index 352359f2..383aa142 100644 --- a/src/nfagraph/ng_utf8.cpp +++ b/src/nfagraph/ng_utf8.cpp @@ -176,7 +176,7 @@ void findSeeds(const NGHolder &h, const bool som, vector *seeds) { continue; } - DEBUG_PRINTF("%u is a seed\n", h[v].index); + DEBUG_PRINTF("%zu is a seed\n", h[v].index); seeds->push_back(v); already_seeds.insert(v); } @@ -184,7 +184,7 @@ void findSeeds(const NGHolder &h, const bool som, vector *seeds) { static bool expandCyclic(NGHolder &h, NFAVertex v) { - DEBUG_PRINTF("inspecting %u\n", h[v].index); + DEBUG_PRINTF("inspecting %zu\n", h[v].index); bool changes = false; auto v_preds = preds(v, h); @@ -201,7 +201,7 @@ bool expandCyclic(NGHolder &h, NFAVertex v) { auto a_preds = preds(a, h); if (a_preds == v_preds && isutf8start(h[a].char_reach)) { - DEBUG_PRINTF("%u is a start v\n", h[a].index); + DEBUG_PRINTF("%zu is a start v\n", h[a].index); start_siblings.insert(a); } } @@ -212,7 +212,7 @@ bool expandCyclic(NGHolder &h, NFAVertex v) { auto a_succs = succs(a, h); if (a_succs == v_succs && h[a].char_reach == UTF_CONT_CR) { - DEBUG_PRINTF("%u is a full tail cont\n", h[a].index); + DEBUG_PRINTF("%zu is a full tail cont\n", h[a].index); end_siblings.insert(a); } } @@ -226,7 +226,7 @@ bool expandCyclic(NGHolder &h, NFAVertex v) { if (cr.isSubsetOf(UTF_TWO_START_CR)) { if (end_siblings.find(*adjacent_vertices(s, h).first) == end_siblings.end()) { - DEBUG_PRINTF("%u is odd\n", h[s].index); + DEBUG_PRINTF("%zu is odd\n", h[s].index); continue; } } else if (cr.isSubsetOf(UTF_THREE_START_CR)) { @@ -238,7 +238,7 @@ bool expandCyclic(NGHolder &h, NFAVertex v) { } if (end_siblings.find(*adjacent_vertices(m, h).first) == end_siblings.end()) { - DEBUG_PRINTF("%u is odd\n", h[s].index); + DEBUG_PRINTF("%zu is odd\n", h[s].index); continue; } } else if (cr.isSubsetOf(UTF_FOUR_START_CR)) { @@ -258,11 +258,11 @@ bool expandCyclic(NGHolder &h, NFAVertex v) { if (end_siblings.find(*adjacent_vertices(m2, h).first) == end_siblings.end()) { - DEBUG_PRINTF("%u is odd\n", h[s].index); + DEBUG_PRINTF("%zu is odd\n", h[s].index); continue; } } else { - DEBUG_PRINTF("%u is bad\n", h[s].index); + DEBUG_PRINTF("%zu is bad\n", h[s].index); continue; } diff --git a/src/nfagraph/ng_util.cpp b/src/nfagraph/ng_util.cpp index de4ca656..ad40debe 100644 --- a/src/nfagraph/ng_util.cpp +++ b/src/nfagraph/ng_util.cpp @@ -52,7 +52,7 @@ using namespace std; using boost::default_color_type; -using boost::filtered_graph; +using boost::make_filtered_graph; using boost::make_assoc_property_map; using boost::adaptors::map_values; @@ -172,15 +172,14 @@ namespace { struct CycleFound {}; struct DetectCycles : public boost::default_dfs_visitor { explicit DetectCycles(const NGHolder &g) : startDs(g.startDs) {} - void back_edge(const NFAEdge &e, const NFAGraph &g) const { + void back_edge(const NFAEdge &e, const NGHolder &g) const { NFAVertex u = source(e, g), v = target(e, g); // We ignore the startDs self-loop. if (u == startDs && v == startDs) { return; } // Any other back-edge indicates a cycle. - DEBUG_PRINTF("back edge %u->%u found\n", g[u].index, - g[v].index); + DEBUG_PRINTF("back edge %zu->%zu found\n", g[u].index, g[v].index); throw CycleFound(); } private: @@ -215,10 +214,8 @@ bool isFloating(const NGHolder &g) { bool isAcyclic(const NGHolder &g) { try { - depth_first_search( - g.g, visitor(DetectCycles(g)) - .root_vertex(g.start) - .vertex_index_map(get(&NFAGraphVertexProps::index, g.g))); + boost::depth_first_search(g, visitor(DetectCycles(g)) + .root_vertex(g.start)); } catch (const CycleFound &) { return false; } @@ -234,11 +231,11 @@ bool hasReachableCycle(const NGHolder &g, NFAVertex src) { try { // Use depth_first_visit, rather than depth_first_search, so that we // only search from src. - auto index_map = get(&NFAGraphVertexProps::index, g.g); - depth_first_visit( - g.g, src, DetectCycles(g), - make_iterator_property_map(colors.begin(), index_map)); - } catch (const CycleFound&) { + auto index_map = get(vertex_index, g); + boost::depth_first_visit(g, src, DetectCycles(g), + make_iterator_property_map(colors.begin(), + index_map)); + } catch (const CycleFound &) { return true; } @@ -249,10 +246,7 @@ bool hasBigCycles(const NGHolder &g) { assert(hasCorrectlyNumberedVertices(g)); set dead; BackEdges> backEdgeVisitor(dead); - depth_first_search( - g.g, visitor(backEdgeVisitor) - .root_vertex(g.start) - .vertex_index_map(get(&NFAGraphVertexProps::index, g.g))); + boost::depth_first_search(g, visitor(backEdgeVisitor).root_vertex(g.start)); for (const auto &e : dead) { if (source(e, g) != target(e, g)) { @@ -266,8 +260,7 @@ bool hasBigCycles(const NGHolder &g) { set findVerticesInCycles(const NGHolder &g) { map comp_map; - strong_components(g.g, make_assoc_property_map(comp_map), - vertex_index_map(get(&NFAGraphVertexProps::index, g.g))); + strong_components(g, make_assoc_property_map(comp_map)); map > comps; @@ -298,8 +291,7 @@ set findVerticesInCycles(const NGHolder &g) { bool can_never_match(const NGHolder &g) { assert(edge(g.accept, g.acceptEod, g).second); - if (!hasGreaterInDegree(0, g.accept, g) - && !hasGreaterInDegree(1, g.acceptEod, g)) { + if (in_degree(g.accept, g) == 0 && in_degree(g.acceptEod, g) == 1) { DEBUG_PRINTF("no paths into accept\n"); return true; } @@ -308,7 +300,7 @@ bool can_never_match(const NGHolder &g) { } bool can_match_at_eod(const NGHolder &h) { - if (hasGreaterInDegree(1, h.acceptEod, h)) { + if (in_degree(h.acceptEod, h) > 1) { DEBUG_PRINTF("more than one edge to acceptEod\n"); return true; } @@ -396,21 +388,17 @@ vector getTopoOrdering(const NGHolder &g) { EdgeSet backEdges; BackEdges be(backEdges); - auto index_map = get(&NFAGraphVertexProps::index, g.g); - depth_first_search(g.g, visitor(be) - .root_vertex(g.start) - .color_map(make_iterator_property_map( - colour.begin(), index_map)) - .vertex_index_map(index_map)); + auto index_map = get(vertex_index, g); + depth_first_search(g, visitor(be).root_vertex(g.start) + .color_map(make_iterator_property_map( + colour.begin(), index_map))); - auto acyclic_g = make_filtered_graph(g.g, make_bad_edge_filter(&backEdges)); + auto acyclic_g = make_filtered_graph(g, make_bad_edge_filter(&backEdges)); vector ordering; ordering.reserve(num_verts); - topological_sort( - acyclic_g, back_inserter(ordering), - color_map(make_iterator_property_map(colour.begin(), index_map)) - .vertex_index_map(index_map)); + topological_sort(acyclic_g, back_inserter(ordering), + color_map(make_iterator_property_map(colour.begin(), index_map))); reorderSpecials(g, ordering); @@ -434,12 +422,12 @@ void mustBeSetBefore_int(NFAVertex u, const NGHolder &g, } } - auto prefix = make_filtered_graph(g.g, make_bad_edge_filter(&dead)); + auto prefix = make_filtered_graph(g, make_bad_edge_filter(&dead)); depth_first_visit( prefix, g.start, make_dfs_visitor(boost::null_visitor()), make_iterator_property_map(vertexColor.begin(), - get(&NFAGraphVertexProps::index, g.g))); + get(vertex_index, g))); } bool mustBeSetBefore(NFAVertex u, NFAVertex v, const NGHolder &g, @@ -456,15 +444,14 @@ bool mustBeSetBefore(NFAVertex u, NFAVertex v, const NGHolder &g, mustBeSetBefore_int(u, g, vertexColor); for (auto vi : vertices_range(g)) { - auto key2 = make_pair(g[u].index, - g[vi].index); - DEBUG_PRINTF("adding %u %u\n", key2.first, key2.second); + auto key2 = make_pair(g[u].index, g[vi].index); + DEBUG_PRINTF("adding %zu %zu\n", key2.first, key2.second); assert(!contains(cache.cache, key2)); bool value = vertexColor[g[vi].index] == boost::white_color; cache.cache[key2] = value; assert(contains(cache.cache, key2)); } - DEBUG_PRINTF("cache miss %u %u (%zu)\n", key.first, key.second, + DEBUG_PRINTF("cache miss %zu %zu (%zu)\n", key.first, key.second, cache.cache.size()); return cache.cache[key]; } @@ -592,12 +579,13 @@ void fillHolder(NGHolder *outp, const NGHolder &in, const deque &vv, fillHolderOutEdges(out, in, v_map, u); } - out.renumberEdges(); - out.renumberVertices(); + renumber_edges(out); + renumber_vertices(out); } void cloneHolder(NGHolder &out, const NGHolder &in) { assert(hasCorrectlyNumberedVertices(in)); + assert(hasCorrectlyNumberedVertices(out)); out.kind = in.kind; // Note: depending on the state of the input graph, some stylized edges @@ -607,6 +595,7 @@ void cloneHolder(NGHolder &out, const NGHolder &in) { /* remove the existing special edges */ clear_vertex(out.startDs, out); clear_vertex(out.accept, out); + renumber_edges(out); vector out_mapping(num_vertices(in)); out_mapping[NODE_START] = out.start; @@ -642,8 +631,8 @@ void cloneHolder(NGHolder &out, const NGHolder &in) { } // Safety checks. - assert(num_vertices(in.g) == num_vertices(out.g)); - assert(num_edges(in.g) == num_edges(out.g)); + assert(num_vertices(in) == num_vertices(out)); + assert(num_edges(in) == num_edges(out)); assert(hasCorrectlyNumberedVertices(out)); } @@ -672,9 +661,8 @@ unique_ptr cloneHolder(const NGHolder &in) { void reverseHolder(const NGHolder &g_in, NGHolder &g) { // Make the BGL do the grunt work. ue2::unordered_map vertexMap; - boost::transpose_graph(g_in.g, g.g, - orig_to_copy(boost::make_assoc_property_map(vertexMap)). - vertex_index_map(get(&NFAGraphVertexProps::index, g_in.g))); + boost::transpose_graph(g_in, g, + orig_to_copy(boost::make_assoc_property_map(vertexMap))); // The transpose_graph operation will have created extra copies of our // specials. We have to rewire their neighbours to the 'real' specials and @@ -716,8 +704,8 @@ void reverseHolder(const NGHolder &g_in, NGHolder &g) { // Renumber so that g's properties (number of vertices, edges) are // accurate. - g.renumberVertices(); - g.renumberEdges(); + renumber_vertices(g); + renumber_edges(g); assert(num_vertices(g) == num_vertices(g_in)); assert(num_edges(g) == num_edges(g_in)); @@ -729,8 +717,7 @@ bool allMatchStatesHaveReports(const NGHolder &g) { unordered_set reporters; for (auto v : inv_adjacent_vertices_range(g.accept, g)) { if (g[v].reports.empty()) { - DEBUG_PRINTF("vertex %u has no reports!\n", - g[v].index); + DEBUG_PRINTF("vertex %zu has no reports!\n", g[v].index); return false; } reporters.insert(v); @@ -741,8 +728,7 @@ bool allMatchStatesHaveReports(const NGHolder &g) { continue; // stylised edge } if (g[v].reports.empty()) { - DEBUG_PRINTF("vertex %u has no reports!\n", - g[v].index); + DEBUG_PRINTF("vertex %zu has no reports!\n", g[v].index); return false; } reporters.insert(v); @@ -750,7 +736,7 @@ bool allMatchStatesHaveReports(const NGHolder &g) { for (auto v : vertices_range(g)) { if (!contains(reporters, v) && !g[v].reports.empty()) { - DEBUG_PRINTF("vertex %u is not a match state, but has reports!\n", + DEBUG_PRINTF("vertex %zu is not a match state, but has reports!\n", g[v].index); return false; } @@ -759,34 +745,6 @@ bool allMatchStatesHaveReports(const NGHolder &g) { return true; } -bool hasCorrectlyNumberedVertices(const NGHolder &g) { - size_t count = num_vertices(g); - vector ids(count, false); - for (auto v : vertices_range(g)) { - u32 id = g[v].index; - if (id >= count || ids[id]) { - return false; // duplicate - } - ids[id] = true; - } - return find(ids.begin(), ids.end(), false) == ids.end() - && num_vertices(g) == num_vertices(g.g); -} - -bool hasCorrectlyNumberedEdges(const NGHolder &g) { - size_t count = num_edges(g); - vector ids(count, false); - for (const auto &e : edges_range(g)) { - u32 id = g[e].index; - if (id >= count || ids[id]) { - return false; // duplicate - } - ids[id] = true; - } - return find(ids.begin(), ids.end(), false) == ids.end() - && num_edges(g) == num_edges(g.g); -} - bool isCorrectlyTopped(const NGHolder &g) { if (is_triggered(g)) { for (const auto &e : out_edges_range(g.start, g)) { @@ -805,7 +763,6 @@ bool isCorrectlyTopped(const NGHolder &g) { return true; } - #endif // NDEBUG } // namespace ue2 diff --git a/src/nfagraph/ng_util.h b/src/nfagraph/ng_util.h index 6b5090ce..a0752533 100644 --- a/src/nfagraph/ng_util.h +++ b/src/nfagraph/ng_util.h @@ -65,9 +65,8 @@ bool is_dot(NFAVertex v, const GraphT &g) { template static really_inline void succ(const NGHolder &g, NFAVertex v, U *s) { - NGHolder::adjacency_iterator ai, ae; - tie(ai, ae) = adjacent_vertices(v, g); - s->insert(ai, ae); + auto rv = adjacent_vertices(v, g); + s->insert(rv.first, rv.second); } template> @@ -81,9 +80,8 @@ ContTemp succs(NFAVertex u, const NGHolder &g) { template static really_inline void pred(const NGHolder &g, NFAVertex v, U *p) { - NGHolder::inv_adjacency_iterator it, ite; - tie(it, ite) = inv_adjacent_vertices(v, g); - p->insert(it, ite); + auto rv = inv_adjacent_vertices(v, g); + p->insert(rv.first, rv.second); } template> @@ -138,42 +136,11 @@ public: BackEdgeSet &backEdges; }; -/** - * Generic code to renumber all the vertices in a graph. Assumes that we're - * using a vertex_index property of type u32, and that we always have - * N_SPECIALS special vertices already present (which we don't want to - * renumber). - */ -template -static really_inline -size_t renumberGraphVertices(GraphT &g) { - size_t num = N_SPECIALS; - for (const auto &v : vertices_range(g)) { - if (!is_special(v, g)) { - g[v].index = num++; - assert(num > 0); // no wrapping - } - } - return num; -} - -/** Renumber all the edges in a graph. */ -template -static really_inline -size_t renumberGraphEdges(GraphT &g) { - size_t num = 0; - for (const auto &e : edges_range(g)) { - g[e].index = num++; - assert(num > 0); // no wrapping - } - return num; -} - /** Returns true if the vertex is either of the real starts (NODE_START, * NODE_START_DOTSTAR). */ template static really_inline -bool is_any_start(const NFAVertex v, const GraphT &g) { +bool is_any_start(typename GraphT::vertex_descriptor v, const GraphT &g) { u32 i = g[v].index; return i == NODE_START || i == NODE_START_DOTSTAR; } @@ -181,16 +148,14 @@ bool is_any_start(const NFAVertex v, const GraphT &g) { bool is_virtual_start(NFAVertex v, const NGHolder &g); template -static really_inline -bool is_any_accept(const NFAVertex v, const GraphT &g) { +bool is_any_accept(typename GraphT::vertex_descriptor v, const GraphT &g) { u32 i = g[v].index; return i == NODE_ACCEPT || i == NODE_ACCEPT_EOD; } /** returns true iff v has an edge to accept or acceptEod */ template -static really_inline -bool is_match_vertex(NFAVertex v, const GraphT &g) { +bool is_match_vertex(typename GraphT::vertex_descriptor v, const GraphT &g) { return edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second; } @@ -202,25 +167,6 @@ bool is_match_vertex(NFAVertex v, const GraphT &g) { */ std::vector getTopoOrdering(const NGHolder &g); -/** Comparison functor used to sort by vertex_index. */ -template -struct VertexIndexOrdering { - VertexIndexOrdering(const Graph &g_in) : g(&g_in) {} - bool operator()(typename Graph::vertex_descriptor a, - typename Graph::vertex_descriptor b) const { - assert(a == b || (*g)[a].index != (*g)[b].index); - return (*g)[a].index < (*g)[b].index; - } -private: - const Graph *g; -}; - -template -static -VertexIndexOrdering make_index_ordering(const Graph &g) { - return VertexIndexOrdering(g); -} - bool onlyOneTop(const NGHolder &g); /** Return the set of the tops on the given graph. */ @@ -340,18 +286,6 @@ void reverseHolder(const NGHolder &g, NGHolder &out); */ bool allMatchStatesHaveReports(const NGHolder &g); -/** - * Assertion: returns true if the vertices in this graph are contiguously (and - * uniquely) numbered from zero. - */ -bool hasCorrectlyNumberedVertices(const NGHolder &g); - -/** - * Assertion: returns true if the edges in this graph are contiguously (and - * uniquely) numbered from zero. - */ -bool hasCorrectlyNumberedEdges(const NGHolder &g); - /** * Assertion: returns true if the graph is triggered and all edges out of start * have tops OR if the graph is not-triggered and all edges out of start have no diff --git a/src/nfagraph/ng_violet.cpp b/src/nfagraph/ng_violet.cpp index 9c99ba8a..9e50ea3d 100644 --- a/src/nfagraph/ng_violet.cpp +++ b/src/nfagraph/ng_violet.cpp @@ -464,7 +464,7 @@ void getRegionRoseLiterals(const NGHolder &g, bool seeking_anchored, DEBUG_PRINTF("inspecting region %u\n", region); set s; for (auto v : vv) { - DEBUG_PRINTF(" exit vertex: %u\n", g[v].index); + DEBUG_PRINTF(" exit vertex: %zu\n", g[v].index); /* Note: RHS can not be depended on to take all subsequent revisits * to this vertex */ set ss = getLiteralSet(g, v, false); @@ -669,7 +669,7 @@ unique_ptr findBestSplit(const NGHolder &g, lits.pop_back(); } - DEBUG_PRINTF("best is '%s' %u a%d t%d\n", + DEBUG_PRINTF("best is '%s' %zu a%d t%d\n", dumpString(*best->lit.begin()).c_str(), g[best->vv.front()].index, depths ? (int)createsAnchoredLHS(g, best->vv, *depths, cc.grey) : 0, @@ -777,7 +777,7 @@ set poisonVertices(const NGHolder &h, const RoseInGraph &vg, set bad_vertices; for (const NFAEdge &e : bad_edges) { bad_vertices.insert(target(e, h)); - DEBUG_PRINTF("bad: %u->%u\n", h[source(e, h)].index, + DEBUG_PRINTF("bad: %zu->%zu\n", h[source(e, h)].index, h[target(e, h)].index); } @@ -1144,7 +1144,7 @@ void splitEdgesByCut(NGHolder &h, RoseInGraph &vg, NFAVertex prev_v = source(e, h); NFAVertex pivot = target(e, h); - DEBUG_PRINTF("splitting on pivot %u\n", h[pivot].index); + DEBUG_PRINTF("splitting on pivot %zu\n", h[pivot].index); ue2::unordered_map temp_map; shared_ptr new_lhs = make_shared(); splitLHS(h, pivot, new_lhs.get(), &temp_map); @@ -1324,7 +1324,7 @@ bool deanchorIfNeeded(NGHolder &g) { succ_g.erase(g.startDs); for (auto v : adjacent_vertices_range(g.start, g)) { - DEBUG_PRINTF("inspecting cand %u || = %zu\n", g[v].index, + DEBUG_PRINTF("inspecting cand %zu || = %zu\n", g[v].index, g[v].char_reach.count()); if (v == g.startDs || !g[v].char_reach.all()) { @@ -2339,7 +2339,7 @@ bool leadingDotStartLiteral(const NGHolder &h, VertLitInfo *out) { make_nocase(&lit); } - DEBUG_PRINTF("%u found %s\n", h[v].index, dumpString(lit).c_str()); + DEBUG_PRINTF("%zu found %s\n", h[v].index, dumpString(lit).c_str()); out->vv = {v}; out->lit = {lit}; return true; @@ -2468,7 +2468,7 @@ bool trailingDotStarLiteral(const NGHolder &h, VertLitInfo *out) { } ue2_literal lit = reverse_literal(rv.second); - DEBUG_PRINTF("%u found %s\n", h[v].index, dumpString(lit).c_str()); + DEBUG_PRINTF("%zu found %s\n", h[v].index, dumpString(lit).c_str()); if (bad_mixed_sensitivity(lit)) { make_nocase(&lit); @@ -2672,6 +2672,7 @@ bool doViolet(RoseBuild &rose, const NGHolder &h, bool prefilter, pruneUseless(vg); dumpPreRoseGraph(vg, cc.grey); + renumber_vertices(vg); calcVertexOffsets(vg); bool rv = rose.addRose(vg, prefilter); DEBUG_PRINTF("violet: %s\n", rv ? "success" : "fail"); diff --git a/src/nfagraph/ng_width.cpp b/src/nfagraph/ng_width.cpp index 5fb58ee4..d596b7b5 100644 --- a/src/nfagraph/ng_width.cpp +++ b/src/nfagraph/ng_width.cpp @@ -58,18 +58,18 @@ namespace { struct SpecialEdgeFilter { SpecialEdgeFilter() {} explicit SpecialEdgeFilter(const NGHolder &h_in) : h(&h_in) {} - explicit SpecialEdgeFilter(const NGHolder &h_in, u32 top_in) + SpecialEdgeFilter(const NGHolder &h_in, u32 top_in) : h(&h_in), single_top(true), top(top_in) {} bool operator()(const NFAEdge &e) const { - const NFAGraph &g = h->g; - NFAVertex u = source(e, g), v = target(e, g); - if ((is_any_start(u, g) && is_any_start(v, g)) || - (is_any_accept(u, g) && is_any_accept(v, g))) { + NFAVertex u = source(e, *h); + NFAVertex v = target(e, *h); + if ((is_any_start(u, *h) && is_any_start(v, *h)) || + (is_any_accept(u, *h) && is_any_accept(v, *h))) { return false; } if (single_top) { - if (u == h->start && !contains(g[e].tops, top)) { + if (u == h->start && !contains((*h)[e].tops, top)) { return false; } if (u == h->startDs) { @@ -94,7 +94,7 @@ depth findMinWidth(const NGHolder &h, const SpecialEdgeFilter &filter, return depth::unreachable(); } - boost::filtered_graph g(h.g, filter); + boost::filtered_graph g(h, filter); assert(hasCorrectlyNumberedVertices(h)); const size_t num = num_vertices(h); @@ -106,11 +106,10 @@ depth findMinWidth(const NGHolder &h, const SpecialEdgeFilter &filter, // Since we are interested in the single-source shortest paths on a graph // with the same weight on every edge, using BFS will be faster than // Dijkstra here. - breadth_first_search( - g, src, + breadth_first_search(g, src, visitor(make_bfs_visitor(record_distances( make_iterator_property_map(distance.begin(), index_map), - boost::on_tree_edge()))).vertex_index_map(index_map)); + boost::on_tree_edge())))); DEBUG_PRINTF("d[accept]=%s, d[acceptEod]=%s\n", distance.at(NODE_ACCEPT).str().c_str(), @@ -130,7 +129,7 @@ depth findMinWidth(const NGHolder &h, const SpecialEdgeFilter &filter, static depth findMaxWidth(const NGHolder &h, const SpecialEdgeFilter &filter, NFAVertex src) { - if (isLeafNode(src, h.g)) { + if (isLeafNode(src, h)) { return depth::unreachable(); } @@ -139,7 +138,7 @@ depth findMaxWidth(const NGHolder &h, const SpecialEdgeFilter &filter, return depth::infinity(); } - boost::filtered_graph g(h.g, filter); + boost::filtered_graph g(h, filter); assert(hasCorrectlyNumberedVertices(h)); const size_t num = num_vertices(h); @@ -149,11 +148,9 @@ depth findMaxWidth(const NGHolder &h, const SpecialEdgeFilter &filter, auto index_map = get(&NFAGraphVertexProps::index, g); // DAG shortest paths with negative edge weights. - dag_shortest_paths( - g, src, + dag_shortest_paths(g, src, distance_map(make_iterator_property_map(distance.begin(), index_map)) .weight_map(boost::make_constant_property(-1)) - .vertex_index_map(index_map) .color_map(make_iterator_property_map(colors.begin(), index_map))); depth acceptDepth, acceptEodDepth; diff --git a/src/rose/rose_build_add.cpp b/src/rose/rose_build_add.cpp index 72a791ba..e185bb37 100644 --- a/src/rose/rose_build_add.cpp +++ b/src/rose/rose_build_add.cpp @@ -112,11 +112,10 @@ RoseVertex createVertex(RoseBuildImpl *build, u32 literalId, u32 min_offset, RoseGraph &g = build->g; // add to tree RoseVertex v = add_vertex(g); - g[v].idx = build->vertexIndex++; g[v].min_offset = min_offset; g[v].max_offset = max_offset; - DEBUG_PRINTF("insert vertex %zu into literal %u's vertex set\n", g[v].idx, + DEBUG_PRINTF("insert vertex %zu into literal %u's vertex set\n", g[v].index, literalId); g[v].literals.insert(literalId); build->literal_info[literalId].vertices.insert(v); @@ -167,7 +166,7 @@ RoseVertex createAnchoredVertex(RoseBuildImpl *build, u32 literalId, RoseGraph &g = build->g; RoseVertex v = createVertex(build, literalId, min_offset, max_offset); - DEBUG_PRINTF("created anchored vertex %zu with lit id %u\n", g[v].idx, + DEBUG_PRINTF("created anchored vertex %zu with lit id %u\n", g[v].index, literalId); RoseEdge e = add_edge(build->anchored_root, v, g).first; @@ -181,8 +180,7 @@ static RoseVertex duplicate(RoseBuildImpl *build, RoseVertex v) { RoseGraph &g = build->g; RoseVertex w = add_vertex(g[v], g); - g[w].idx = build->vertexIndex++; - DEBUG_PRINTF("added vertex %zu\n", g[w].idx); + DEBUG_PRINTF("added vertex %zu\n", g[w].index); for (auto lit_id : g[w].literals) { build->literal_info[lit_id].vertices.insert(w); @@ -191,7 +189,7 @@ RoseVertex duplicate(RoseBuildImpl *build, RoseVertex v) { for (const auto &e : in_edges_range(v, g)) { RoseVertex s = source(e, g); add_edge(s, w, g[e], g); - DEBUG_PRINTF("added edge (%zu,%zu)\n", g[s].idx, g[w].idx); + DEBUG_PRINTF("added edge (%zu,%zu)\n", g[s].index, g[w].index); } return w; @@ -227,7 +225,7 @@ RoseRoleHistory selectHistory(const RoseBuildImpl &tbi, const RoseBuildData &bd, const bool has_bounds = g[e].minBound || (g[e].maxBound != ROSE_BOUND_INF); DEBUG_PRINTF("edge %zu->%zu, bounds=[%u,%u], fixed_u=%d, prefix=%d\n", - g[u].idx, g[v].idx, g[e].minBound, g[e].maxBound, + g[u].index, g[v].index, g[e].minBound, g[e].maxBound, (int)g[u].fixedOffset(), (int)g[v].left); if (g[v].left) { @@ -309,7 +307,7 @@ void createVertices(RoseBuildImpl *tbi, DEBUG_PRINTF("set som_adjust to %u\n", g[w].som_adjust); } - DEBUG_PRINTF(" adding new vertex idx=%zu\n", tbi->g[w].idx); + DEBUG_PRINTF(" adding new vertex index=%zu\n", tbi->g[w].index); vertex_map[iv].push_back(w); } else { w = created[key]; @@ -383,7 +381,7 @@ void removeFalsePaths(NGHolder &g, const ue2_literal &lit) { for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) { next.clear(); for (auto curr_v : curr) { - DEBUG_PRINTF("handling %u\n", g[curr_v].index); + DEBUG_PRINTF("handling %zu\n", g[curr_v].index); vector next_cand; insert(&next_cand, next_cand.end(), inv_adjacent_vertices(curr_v, g)); @@ -401,7 +399,7 @@ void removeFalsePaths(NGHolder &g, const ue2_literal &lit) { const CharReach &cr = g[v].char_reach; if (!overlaps(*it, cr)) { - DEBUG_PRINTF("false edge %u\n", g[v].index); + DEBUG_PRINTF("false edge %zu\n", g[v].index); continue; } @@ -409,7 +407,7 @@ void removeFalsePaths(NGHolder &g, const ue2_literal &lit) { clone_in_edges(g, v, v2); add_edge(v2, curr_v, g); g[v2].char_reach &= *it; - DEBUG_PRINTF("next <- %u\n", g[v2].index); + DEBUG_PRINTF("next <- %zu\n", g[v2].index); next.insert(v2); } } @@ -557,7 +555,7 @@ void findRoseLiteralMask(const NGHolder &h, const u32 lag, vector &msk, next.clear(); CharReach cr; for (auto v : curr) { - DEBUG_PRINTF("vertex %u, reach %s\n", h[v].index, + DEBUG_PRINTF("vertex %zu, reach %s\n", h[v].index, describeClass(h[v].char_reach).c_str()); cr |= h[v].char_reach; insert(&next, inv_adjacent_vertices(v, h)); @@ -705,7 +703,6 @@ void makeEodEventLeftfix(RoseBuildImpl &build, RoseVertex u, for (const auto &report_mapping : report_remap) { RoseVertex v = add_vertex(g); - g[v].idx = build.vertexIndex++; g[v].literals.insert(eod_event); build.literal_info[eod_event].vertices.insert(v); @@ -728,7 +725,6 @@ void makeEodEventLeftfix(RoseBuildImpl &build, RoseVertex u, g[e1].history = ROSE_ROLE_HISTORY_NONE; // handled by prefix RoseVertex w = add_vertex(g); - g[w].idx = build.vertexIndex++; g[w].eod_accept = true; g[w].reports = report_mapping.first; g[w].min_offset = g[v].min_offset; @@ -737,7 +733,7 @@ void makeEodEventLeftfix(RoseBuildImpl &build, RoseVertex u, g[e].minBound = 0; g[e].maxBound = 0; g[e].history = ROSE_ROLE_HISTORY_LAST_BYTE; - DEBUG_PRINTF("accept eod vertex (idx=%zu)\n", g[w].idx); + DEBUG_PRINTF("accept eod vertex (index=%zu)\n", g[w].index); } } @@ -769,7 +765,7 @@ void doRoseAcceptVertex(RoseBuildImpl *tbi, || (ig[iv].type == RIV_ACCEPT_EOD && out_degree(u, g) && !edge_props.graph) || (!isLeafNode(u, g) && !tbi->isAnyStart(u))) { - DEBUG_PRINTF("duplicating for parent %zu\n", g[u].idx); + DEBUG_PRINTF("duplicating for parent %zu\n", g[u].index); assert(!tbi->isAnyStart(u)); u = duplicate(tbi, u); g[u].suffix.reset(); @@ -780,20 +776,20 @@ void doRoseAcceptVertex(RoseBuildImpl *tbi, if (ig[iv].type == RIV_ACCEPT) { assert(!tbi->isAnyStart(u)); if (contains(bd.early_dfas, edge_props.graph.get())) { - DEBUG_PRINTF("adding early dfa suffix to i%zu\n", g[u].idx); + DEBUG_PRINTF("adding early dfa suffix to i%zu\n", g[u].index); g[u].suffix.rdfa = bd.early_dfas.at(edge_props.graph.get()); g[u].suffix.dfa_min_width = findMinWidth(*edge_props.graph); g[u].suffix.dfa_max_width = findMaxWidth(*edge_props.graph); } else if (edge_props.graph) { - DEBUG_PRINTF("adding suffix to i%zu\n", g[u].idx); + DEBUG_PRINTF("adding suffix to i%zu\n", g[u].index); g[u].suffix.graph = edge_props.graph; assert(g[u].suffix.graph->kind == NFA_SUFFIX); /* TODO: set dfa_(min|max)_width */ } else if (edge_props.haig) { - DEBUG_PRINTF("adding suffaig to i%zu\n", g[u].idx); + DEBUG_PRINTF("adding suffaig to i%zu\n", g[u].index); g[u].suffix.haig = edge_props.haig; } else { - DEBUG_PRINTF("adding boring accept to i%zu\n", g[u].idx); + DEBUG_PRINTF("adding boring accept to i%zu\n", g[u].index); assert(!g[u].eod_accept); g[u].reports = ig[iv].reports; } @@ -803,7 +799,6 @@ void doRoseAcceptVertex(RoseBuildImpl *tbi, if (!edge_props.graph) { RoseVertex w = add_vertex(g); - g[w].idx = tbi->vertexIndex++; g[w].eod_accept = true; g[w].reports = ig[iv].reports; g[w].min_offset = g[u].min_offset; @@ -812,7 +807,7 @@ void doRoseAcceptVertex(RoseBuildImpl *tbi, g[e].minBound = 0; g[e].maxBound = 0; g[e].history = ROSE_ROLE_HISTORY_LAST_BYTE; - DEBUG_PRINTF("accept eod vertex (idx=%zu)\n", g[w].idx); + DEBUG_PRINTF("accept eod vertex (index=%zu)\n", g[w].index); continue; } @@ -824,7 +819,7 @@ void doRoseAcceptVertex(RoseBuildImpl *tbi, assert(h.kind == NFA_SUFFIX); assert(!tbi->isAnyStart(u)); /* etable can't/shouldn't use eod event */ - DEBUG_PRINTF("adding suffix to i%zu\n", g[u].idx); + DEBUG_PRINTF("adding suffix to i%zu\n", g[u].index); g[u].suffix.graph = edge_props.graph; continue; } @@ -976,7 +971,7 @@ void populateRoseGraph(RoseBuildImpl *tbi, RoseBuildData &bd) { || ig[v_order.front()].type == RIV_ANCHORED_START); for (RoseInVertex iv : v_order) { - DEBUG_PRINTF("vertex %p\n", iv); + DEBUG_PRINTF("vertex %zu\n", ig[iv].index); if (ig[iv].type == RIV_START) { DEBUG_PRINTF("is root\n"); @@ -1588,6 +1583,7 @@ bool RoseBuildImpl::addRose(const RoseInGraph &ig, bool prefilter, bool finalChance) { DEBUG_PRINTF("trying to rose\n"); assert(validateKinds(ig)); + assert(hasCorrectlyNumberedVertices(ig)); if (::ue2::empty(ig)) { assert(0); @@ -1603,7 +1599,8 @@ bool RoseBuildImpl::addRose(const RoseInGraph &ig, bool prefilter, transformAnchoredLiteralOverlap(in, bd, cc); transformSuffixDelay(in, cc); - assert(validateKinds(ig)); + renumber_vertices(in); + assert(validateKinds(in)); map > graphs; vector ordered_graphs; // Stored in first-encounter order. @@ -1762,8 +1759,7 @@ static u32 findMaxBAWidth(const NGHolder &h) { // Must be bi-anchored: no out-edges from startDs (other than its // self-loop), no in-edges to accept. - if (hasGreaterOutDegree(1, h.startDs, h) || - hasGreaterInDegree(0, h.accept, h)) { + if (out_degree(h.startDs, h) > 1 || in_degree(h.accept, h)) { return ROSE_BOUND_INF; } depth d = findMaxWidth(h); @@ -1889,9 +1885,9 @@ bool prepAcceptForAddAnchoredNFA(RoseBuildImpl &tbi, const NGHolder &w, map &allocated_reports, flat_set &added_lit_ids) { const depth max_anchored_depth(tbi.cc.grey.maxAnchoredRegion); - const u32 idx = w[u].index; - assert(idx < vertexDepths.size()); - const DepthMinMax &d = vertexDepths.at(idx); + const size_t index = w[u].index; + assert(index < vertexDepths.size()); + const DepthMinMax &d = vertexDepths.at(index); for (const auto &int_report : w[u].reports) { assert(int_report != MO_INVALID_IDX); @@ -2008,7 +2004,6 @@ bool RoseBuildImpl::addAnchoredAcyclic(const NGHolder &h) { RoseVertex v = createAnchoredVertex(this, lit_id, minBound, maxBound); RoseVertex eod = add_vertex(g); - g[eod].idx = vertexIndex++; g[eod].eod_accept = true; g[eod].reports.insert(report); g[eod].min_offset = g[v].min_offset; diff --git a/src/rose/rose_build_add_mask.cpp b/src/rose/rose_build_add_mask.cpp index ef83cae1..f46e1004 100644 --- a/src/rose/rose_build_add_mask.cpp +++ b/src/rose/rose_build_add_mask.cpp @@ -532,7 +532,7 @@ void addTransientMask(RoseBuildImpl &build, const vector &mask, g[v].left.leftfix_report = mask_report; } else { // Make sure our edge bounds are correct. - auto e = edge_by_target(parent, v, g).first; + auto e = edge(parent, v, g).first; g[e].minBound = 0; g[e].maxBound = anchored ? 0 : ROSE_BOUND_INF; g[e].history = anchored ? ROSE_ROLE_HISTORY_ANCH diff --git a/src/rose/rose_build_anchored.cpp b/src/rose/rose_build_anchored.cpp index 60732ff9..3d0affc6 100644 --- a/src/rose/rose_build_anchored.cpp +++ b/src/rose/rose_build_anchored.cpp @@ -549,7 +549,7 @@ bool isSimple(const NGHolder &h, u32 *min_bound, u32 *max_bound, /* lit should only be connected to dot vertices */ for (auto u : inv_adjacent_vertices_range(lit_head, h)) { - DEBUG_PRINTF("checking %u\n", h[u].index); + DEBUG_PRINTF("checking %zu\n", h[u].index); if (!h[u].char_reach.all()) { return false; } diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 80e6450d..43df7962 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -314,7 +314,7 @@ bool needsCatchup(const RoseBuildImpl &build, continue; } if (g[v].suffix) { - DEBUG_PRINTF("vertex %zu has suffix\n", g[v].idx); + DEBUG_PRINTF("vertex %zu has suffix\n", g[v].index); return true; } @@ -947,7 +947,7 @@ void appendTailToHolder(NGHolder &h, const vector &tail) { appendTailToHolder(h, e.first, e.second, tail); } - h.renumberEdges(); + renumber_edges(h); } static @@ -1232,11 +1232,11 @@ void updateTops(const RoseGraph &g, const TamaInfo &tamaInfo, for (const auto &n : tamaInfo.subengines) { for (const auto &v : subengines[i].vertices) { if (is_suffix) { - tamaProto.add(n, g[v].idx, g[v].suffix.top, + tamaProto.add(n, g[v].index, g[v].suffix.top, out_top_remap); } else { for (const auto &e : in_edges_range(v, g)) { - tamaProto.add(n, g[v].idx, g[e].rose_top, + tamaProto.add(n, g[v].index, g[e].rose_top, out_top_remap); } } @@ -1280,7 +1280,7 @@ void buildInfixContainer(RoseGraph &g, build_context &bc, for (const auto &sub : subengines) { const auto &verts = sub.vertices; for (const auto &v : verts) { - DEBUG_PRINTF("vert id:%zu\n", g[v].idx); + DEBUG_PRINTF("vert id:%zu\n", g[v].index); g[v].left.tamarama = tamaProto; } } @@ -1299,7 +1299,7 @@ void buildSuffixContainer(RoseGraph &g, build_context &bc, for (const auto &sub : subengines) { const auto &verts = sub.vertices; for (const auto &v : verts) { - DEBUG_PRINTF("vert id:%zu\n", g[v].idx); + DEBUG_PRINTF("vert id:%zu\n", g[v].index); g[v].suffix.tamarama = tamaProto; } const auto &v = verts[0]; @@ -1790,7 +1790,7 @@ void assignSuffixQueues(RoseBuildImpl &build, build_context &bc) { const suffix_id s(g[v].suffix); - DEBUG_PRINTF("vertex %zu triggers suffix %p\n", g[v].idx, s.graph()); + DEBUG_PRINTF("vertex %zu triggers suffix %p\n", g[v].index, s.graph()); // We may have already built this NFA. if (contains(bc.suffixes, s)) { @@ -1887,7 +1887,7 @@ void findExclusiveSuffixes(RoseBuildImpl &tbi, build_context &bc, const suffix_id s(g[v].suffix); - DEBUG_PRINTF("vertex %zu triggers suffix %p\n", g[v].idx, s.graph()); + DEBUG_PRINTF("vertex %zu triggers suffix %p\n", g[v].index, s.graph()); // We may have already built this NFA. if (contains(suffixes, s)) { @@ -1977,24 +1977,13 @@ bool buildSuffixes(const RoseBuildImpl &tbi, build_context &bc, } static -void buildCountingMiracles(RoseBuildImpl &build, build_context &bc) { +void buildCountingMiracles(build_context &bc) { map, u32> pre_built; - // To ensure compile determinism, we need to iterate over our leftfixes in - // a stronger order than directly over bc.leftfix_info. - vector cm_vertices; - for (const auto &m : bc.leftfix_info) { - if (m.second.countingMiracleCount) { - cm_vertices.push_back(m.first); + for (left_build_info &lbi : bc.leftfix_info | map_values) { + if (!lbi.countingMiracleCount) { + continue; } - } - sort(begin(cm_vertices), end(cm_vertices), VertexIndexComp(build.g)); - - DEBUG_PRINTF("%zu vertices with counting miracles\n", cm_vertices.size()); - - for (const auto &v : cm_vertices) { - auto &lbi = bc.leftfix_info.at(v); - assert(lbi.countingMiracleCount); const CharReach &cr = lbi.countingMiracleReach; assert(!cr.all() && !cr.none()); @@ -2255,12 +2244,12 @@ u32 findMinFloatingLiteralMatch(const RoseBuildImpl &build, u32 minWidth = ROSE_BOUND_INF; for (auto v : vertices_range(g)) { if (build.isAnchored(v) || build.isVirtualVertex(v)) { - DEBUG_PRINTF("skipping %zu anchored or root\n", g[v].idx); + DEBUG_PRINTF("skipping %zu anchored or root\n", g[v].index); continue; } u32 w = g[v].min_offset; - DEBUG_PRINTF("%zu m_o = %u\n", g[v].idx, w); + DEBUG_PRINTF("%zu m_o = %u\n", g[v].index, w); if (w < minWidth) { minWidth = w; @@ -3540,7 +3529,7 @@ void makeRoleSuffix(RoseBuildImpl &build, build_context &bc, RoseVertex v, auto tamaProto = g[v].suffix.tamarama.get(); assert(tamaProto); u32 top = (u32)MQE_TOP_FIRST + - tamaProto->top_remap.at(make_pair(g[v].idx, + tamaProto->top_remap.at(make_pair(g[v].index, g[v].suffix.top)); assert(top < MQE_INVALID); suffixEvent = top; @@ -3622,7 +3611,7 @@ void makeRoleInfixTriggers(RoseBuildImpl &build, build_context &bc, auto tamaProto = g[v].left.tamarama.get(); assert(tamaProto); top = MQE_TOP_FIRST + tamaProto->top_remap.at( - make_pair(g[v].idx, g[e].rose_top)); + make_pair(g[v].index, g[e].rose_top)); assert(top < MQE_INVALID); } else if (!isMultiTopType(nfa->type)) { assert(num_tops(g[v].left) == 1); @@ -3782,7 +3771,7 @@ RoseProgram makeProgram(RoseBuildImpl &build, build_context &bc, // This program may be triggered by different predecessors, with different // offset bounds. We must ensure we put this check/set operation after the // bounds check to deal with this case. - if (hasGreaterInDegree(1, v, g)) { + if (in_degree(v, g) > 1) { makeRoleCheckNotHandled(bc, v, program); } @@ -4438,8 +4427,8 @@ RoseProgram buildLiteralProgram(RoseBuildImpl &build, build_context &bc, if (build.isAnyStart(u)) { continue; // Root roles are not handled with sparse iterator. } - DEBUG_PRINTF("sparse iter edge (%zu,%zu)\n", g[u].idx, - g[target(e, g)].idx); + DEBUG_PRINTF("sparse iter edge (%zu,%zu)\n", g[u].index, + g[target(e, g)].index); assert(contains(bc.roleStateIndices, u)); u32 pred_state = bc.roleStateIndices.at(u); pred_blocks[pred_state].add_block(makeProgram(build, bc, e)); @@ -4455,7 +4444,8 @@ RoseProgram buildLiteralProgram(RoseBuildImpl &build, build_context &bc, if (!build.isAnyStart(u)) { continue; } - DEBUG_PRINTF("root edge (%zu,%zu)\n", g[u].idx, g[target(e, g)].idx); + DEBUG_PRINTF("root edge (%zu,%zu)\n", g[u].index, + g[target(e, g)].index); program.add_block(makeProgram(build, bc, e)); } @@ -4531,8 +4521,8 @@ map> findEdgesByLiteral(const RoseBuildImpl &build) { auto edge_list = vector(begin(m.second), end(m.second)); sort(begin(edge_list), end(edge_list), [&g](const RoseEdge &a, const RoseEdge &b) { - return tie(g[source(a, g)].idx, g[target(a, g)].idx) < - tie(g[source(b, g)].idx, g[target(b, g)].idx); + return tie(g[source(a, g)].index, g[target(a, g)].index) < + tie(g[source(b, g)].index, g[target(b, g)].index); }); lit_edge_map.emplace(m.first, edge_list); } @@ -4658,7 +4648,7 @@ bool hasEodAnchoredSuffix(const RoseBuildImpl &build) { for (auto v : vertices_range(g)) { if (g[v].suffix && build.isInETable(v)) { DEBUG_PRINTF("vertex %zu is in eod table and has a suffix\n", - g[v].idx); + g[v].index); return true; } } @@ -4670,7 +4660,7 @@ bool hasEodMatcher(const RoseBuildImpl &build) { const RoseGraph &g = build.g; for (auto v : vertices_range(g)) { if (build.isInETable(v)) { - DEBUG_PRINTF("vertex %zu is in eod table\n", g[v].idx); + DEBUG_PRINTF("vertex %zu is in eod table\n", g[v].index); return true; } } @@ -4690,19 +4680,19 @@ void addEodAnchorProgram(RoseBuildImpl &build, build_context &bc, continue; } - DEBUG_PRINTF("vertex %zu (with %zu preds) fires on EOD\n", g[v].idx, + DEBUG_PRINTF("vertex %zu (with %zu preds) fires on EOD\n", g[v].index, in_degree(v, g)); vector edge_list; for (const auto &e : in_edges_range(v, g)) { RoseVertex u = source(e, g); if (build.isInETable(u) != in_etable) { - DEBUG_PRINTF("pred %zu %s in etable\n", g[u].idx, + DEBUG_PRINTF("pred %zu %s in etable\n", g[u].index, in_etable ? "is not" : "is"); continue; } if (canEagerlyReportAtEod(build, e)) { - DEBUG_PRINTF("already done report for vertex %zu\n", g[u].idx); + DEBUG_PRINTF("already done report for vertex %zu\n", g[u].index); continue; } edge_list.push_back(e); @@ -4745,8 +4735,8 @@ void addEodEventProgram(RoseBuildImpl &build, build_context &bc, // Sort edge list for determinism, prettiness. sort(begin(edge_list), end(edge_list), [&g](const RoseEdge &a, const RoseEdge &b) { - return tie(g[source(a, g)].idx, g[target(a, g)].idx) < - tie(g[source(b, g)].idx, g[target(b, g)].idx); + return tie(g[source(a, g)].index, g[target(a, g)].index) < + tie(g[source(b, g)].index, g[target(b, g)].index); }); program.add_block( @@ -5247,7 +5237,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { return nullptr; } u32 eodNfaIterOffset = buildEodNfaIterator(bc, leftfixBeginQueue); - buildCountingMiracles(*this, bc); + buildCountingMiracles(bc); u32 queue_count = qif.allocated_count(); /* excludes anchored matcher q; * som rev nfas */ diff --git a/src/rose/rose_build_castle.cpp b/src/rose/rose_build_castle.cpp index c65e840d..7987b0f6 100644 --- a/src/rose/rose_build_castle.cpp +++ b/src/rose/rose_build_castle.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -163,7 +163,7 @@ void renovateCastle(RoseBuildImpl &tbi, CastleProto *castle, for (RoseVertex v : verts) { assert(g[v].left.castle.get() == castle); - DEBUG_PRINTF("%zu checks at lag %u\n", g[v].idx, g[v].left.lag); + DEBUG_PRINTF("%zu checks at lag %u\n", g[v].index, g[v].left.lag); vector lits = literals_for_vertex(tbi, v); for (const auto &e : lits) { DEBUG_PRINTF("%s +%u\n", dumpString(e.s).c_str(), e.delay); diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index 38c488be..2f1af8a4 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -205,14 +205,6 @@ bool RoseBuildImpl::hasOnlyPseudoStarInEdges(RoseVertex v) const { return true; } -void RoseBuildImpl::renumberVertices() { - vertexIndex = 0; - DEBUG_PRINTF("renumbering vertices\n"); - for (auto v : vertices_range(g)) { - g[v].idx = vertexIndex++; - } -} - static size_t trailerDueToSelf(const rose_literal_id &lit) { size_t trailer = lit.s.length() - maxPeriod(lit.s); @@ -231,7 +223,7 @@ RoseRoleHistory findHistoryScheme(const RoseBuildImpl &tbi, const RoseEdge &e) { const RoseVertex u = source(e, g); /* pred role */ const RoseVertex v = target(e, g); /* current role */ - DEBUG_PRINTF("find history for [%zu,%zu]\n", g[u].idx, g[v].idx); + DEBUG_PRINTF("find history for [%zu,%zu]\n", g[u].index, g[v].index); DEBUG_PRINTF("u has min_offset=%u, max_offset=%u\n", g[u].min_offset, g[u].max_offset); @@ -285,7 +277,7 @@ RoseRoleHistory findHistoryScheme(const RoseBuildImpl &tbi, const RoseEdge &e) { // Non-EOD cases. DEBUG_PRINTF("examining edge [%zu,%zu] with bounds {%u,%u}\n", - g[u].idx, g[v].idx, g[e].minBound, g[e].maxBound); + g[u].index, g[v].index, g[e].minBound, g[e].maxBound); if (tbi.isAnchored(v)) { // Matches for literals in the anchored table will always arrive at the @@ -875,8 +867,8 @@ bool reduceTopTriggerLoad(RoseBuildImpl &build, NGHolder &h, RoseVertex u) { if (tops.size() <= 1) { return false; } - DEBUG_PRINTF("%zu triggers %zu tops for %p\n", build.g[u].idx, tops.size(), - &h); + DEBUG_PRINTF("%zu triggers %zu tops for %p\n", build.g[u].index, + tops.size(), &h); auto h_top_info = getTopInfo(h); flat_set edges_to_trigger; @@ -976,7 +968,7 @@ void packInfixTops(NGHolder &h, RoseGraph &g, } h[e].tops = move(updated_tops); if (h[e].tops.empty()) { - DEBUG_PRINTF("edge (start,%u) has only unused tops\n", h[v].index); + DEBUG_PRINTF("edge (start,%zu) has only unused tops\n", h[v].index); dead.push_back(e); } } @@ -1311,15 +1303,9 @@ void addSmallBlockLiteral(RoseBuildImpl &tbi, const simple_anchored_info &sai, assert(old_id < tbi.literal_info.size()); const rose_literal_info &li = tbi.literal_info[old_id]; - // For compile determinism, operate over literal vertices in index - // order. - vector lit_verts(begin(li.vertices), end(li.vertices)); - sort(begin(lit_verts), end(lit_verts), VertexIndexComp(g)); - - for (auto lit_v : lit_verts) { + for (auto lit_v : li.vertices) { // Clone vertex with the new literal ID. RoseVertex v = add_vertex(g[lit_v], g); - g[v].idx = tbi.vertexIndex++; g[v].literals.clear(); g[v].literals.insert(lit_id); g[v].min_offset = sai.min_bound + sai.literal.length(); @@ -1347,7 +1333,6 @@ void addSmallBlockLiteral(RoseBuildImpl &tbi, const ue2_literal &lit, RoseGraph &g = tbi.g; RoseVertex v = add_vertex(g); - g[v].idx = tbi.vertexIndex++; g[v].literals.insert(lit_id); g[v].reports = reports; @@ -1557,7 +1542,7 @@ bool historiesAreValid(const RoseGraph &g) { for (const auto &e : edges_range(g)) { if (g[e].history == ROSE_ROLE_HISTORY_INVALID) { DEBUG_PRINTF("edge [%zu,%zu] has invalid history\n", - g[source(e, g)].idx, g[target(e, g)].idx); + g[source(e, g)].index, g[target(e, g)].index); return false; } } @@ -1576,18 +1561,20 @@ bool danglingVertexRef(RoseBuildImpl &tbi) { const ue2::unordered_set valid_vertices(vi, ve); if (!contains(valid_vertices, tbi.anchored_root)) { - DEBUG_PRINTF("anchored root vertex %p not in graph\n", - tbi.anchored_root); + DEBUG_PRINTF("anchored root vertex %zu not in graph\n", + tbi.g[tbi.anchored_root].index); return true; } for (const auto &e : tbi.ghost) { if (!contains(valid_vertices, e.first)) { - DEBUG_PRINTF("ghost key vertex %p not in graph\n", e.first); + DEBUG_PRINTF("ghost key vertex %zu not in graph\n", + tbi.g[e.first].index); return true; } if (!contains(valid_vertices, e.second)) { - DEBUG_PRINTF("ghost value vertex %p not in graph\n", e.second); + DEBUG_PRINTF("ghost value vertex %zu not in graph\n", + tbi.g[e.second].index); return true; } } @@ -1599,11 +1586,11 @@ static bool roleOffsetsAreValid(const RoseGraph &g) { for (auto v : vertices_range(g)) { if (g[v].min_offset >= ROSE_BOUND_INF) { - DEBUG_PRINTF("invalid min_offset for role %zu\n", g[v].idx); + DEBUG_PRINTF("invalid min_offset for role %zu\n", g[v].index); return false; } if (g[v].min_offset > g[v].max_offset) { - DEBUG_PRINTF("min_offset > max_offset for %zu\n", g[v].idx); + DEBUG_PRINTF("min_offset > max_offset for %zu\n", g[v].index); return false; } } diff --git a/src/rose/rose_build_convert.cpp b/src/rose/rose_build_convert.cpp index dfc0ed23..ae08b7cb 100644 --- a/src/rose/rose_build_convert.cpp +++ b/src/rose/rose_build_convert.cpp @@ -290,7 +290,7 @@ bool isUnconvertibleLeaf(const RoseBuildImpl &tbi, const RoseVertex v) { // Find all of the leaves with literals whose length is <= len. static -void findBadLeaves(RoseBuildImpl &tbi, RoseVertexSet &bad) { +void findBadLeaves(RoseBuildImpl &tbi, set &bad) { RoseGraph &g = tbi.g; u32 len = tbi.cc.grey.roseMaxBadLeafLength; @@ -309,15 +309,7 @@ void findBadLeaves(RoseBuildImpl &tbi, RoseVertexSet &bad) { const rose_literal_info &info = tbi.literal_info[lid]; - // Because we do the "clone pred and re-home" trick below, we need to - // iterate over our vertices in a defined ordering, otherwise we'll get - // non-determinism in our bytecode. So, copy and sort this literal's - // vertices. - - vector verts(info.vertices.begin(), info.vertices.end()); - sort(verts.begin(), verts.end(), VertexIndexComp(g)); - - for (auto v : verts) { + for (auto v : info.vertices) { if (!isLeafNode(v, g)) { continue; } @@ -331,7 +323,7 @@ void findBadLeaves(RoseBuildImpl &tbi, RoseVertexSet &bad) { const RoseEdge &e = *in_edges(v, g).first; RoseVertex u = source(e, g); if (out_degree(u, g) != 1) { - DEBUG_PRINTF("re-homing %zu to cloned pred\n", g[v].idx); + DEBUG_PRINTF("re-homing %zu to cloned pred\n", g[v].index); RoseVertex u2 = tbi.cloneVertex(u); for (const auto &e_in : in_edges_range(u, g)) { add_edge(source(e_in, g), u2, g[e_in], g); @@ -340,7 +332,7 @@ void findBadLeaves(RoseBuildImpl &tbi, RoseVertexSet &bad) { remove_edge(e, g); } - DEBUG_PRINTF("%zu is a bad leaf vertex\n", g[v].idx); + DEBUG_PRINTF("%zu is a bad leaf vertex\n", g[v].index); bad.insert(v); } } @@ -348,7 +340,7 @@ void findBadLeaves(RoseBuildImpl &tbi, RoseVertexSet &bad) { void convertBadLeaves(RoseBuildImpl &tbi) { RoseGraph &g = tbi.g; - RoseVertexSet bad(g); + set bad; findBadLeaves(tbi, bad); DEBUG_PRINTF("found %zu bad leaves\n", bad.size()); @@ -371,7 +363,7 @@ void convertBadLeaves(RoseBuildImpl &tbi) { RoseVertex u = source(e, g); assert(!g[u].suffix); g[u].suffix.graph = h; - DEBUG_PRINTF("%zu's nfa holder %p\n", g[u].idx, h.get()); + DEBUG_PRINTF("%zu's nfa holder %p\n", g[u].index, h.get()); dead.push_back(v); } @@ -784,7 +776,7 @@ bool handleMixedPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v, assert(in_degree(h.acceptEod, h) == 1); bool anchored = !proper_out_degree(h.startDs, h); - NFAVertex key = nullptr; + NFAVertex key = NGHolder::null_vertex(); NFAVertex base = anchored ? h.start : h.startDs; if (!anchored) { @@ -798,7 +790,7 @@ bool handleMixedPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v, } for (auto w : adjacent_vertices_range(base, h)) { - DEBUG_PRINTF("checking %u\n", h[w].index); + DEBUG_PRINTF("checking %zu\n", h[w].index); if (!h[w].char_reach.all()) { continue; } @@ -833,7 +825,7 @@ bool handleMixedPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v, set exits_and_repeat_verts; for (auto repeat_v : ri.vertices) { - DEBUG_PRINTF("repeat vertex %u\n", h[repeat_v].index); + DEBUG_PRINTF("repeat vertex %zu\n", h[repeat_v].index); succ(h, repeat_v, &exits_and_repeat_verts); exits_and_repeat_verts.insert(repeat_v); } @@ -963,7 +955,7 @@ void convertPrefixToBounds(RoseBuildImpl &tbi) { continue; } - DEBUG_PRINTF("inspecting prefix of %zu\n", g[v].idx); + DEBUG_PRINTF("inspecting prefix of %zu\n", g[v].index); if (!proper_out_degree(h.startDs, h)) { if (handleStartPrefixCliche(h, g, v, e, ar, &to_delete)) { @@ -1009,7 +1001,7 @@ void convertPrefixToBounds(RoseBuildImpl &tbi) { continue; } - DEBUG_PRINTF("inspecting prefix of %zu\n", g[v].idx); + DEBUG_PRINTF("inspecting prefix of %zu\n", g[v].index); if (!proper_out_degree(h.startDs, h)) { if (handleStartPrefixCliche(h, g, v, e, ar, &to_delete)) { @@ -1044,7 +1036,7 @@ void convertAnchPrefixToBounds(RoseBuildImpl &tbi) { continue; } - DEBUG_PRINTF("vertex %zu\n", g[v].idx); + DEBUG_PRINTF("vertex %zu\n", g[v].index); // This pass runs after makeCastles, so we use the fact that bounded // repeat detection has already been done for us. diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index 516548b3..105ee338 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -104,7 +104,7 @@ public: } os << "[label=\""; - os << "idx=" << g[v].idx <<"\\n"; + os << "index=" << g[v].index <<"\\n"; for (u32 lit_id : g[v].literals) { writeLiteral(os, lit_id); @@ -267,14 +267,14 @@ void dumpRoseGraph(const RoseBuild &build_base, const RoseEngine *t, ofstream os(ss.str()); RoseGraphWriter writer(build, t); - writeGraphviz(os, build.g, writer, get(&RoseVertexProps::idx, build.g)); + writeGraphviz(os, build.g, writer, get(boost::vertex_index, build.g)); } namespace { struct CompareVertexRole { explicit CompareVertexRole(const RoseGraph &g_in) : g(g_in) {} inline bool operator()(const RoseVertex &a, const RoseVertex &b) const { - return g[a].idx < g[b].idx; + return g[a].index < g[b].index; } private: const RoseGraph &g; @@ -372,7 +372,7 @@ void dumpRoseLiterals(const RoseBuildImpl &build, const char *filename) { for (RoseVertex v : verts) { // role info - os << " Index " << g[v].idx << ": groups=0x" << hex << setw(16) + os << " Index " << g[v].index << ": groups=0x" << hex << setw(16) << setfill('0') << g[v].groups << dec; if (g[v].reports.empty()) { @@ -386,13 +386,13 @@ void dumpRoseLiterals(const RoseBuildImpl &build, const char *filename) { // pred info for (const auto &ie : in_edges_range(v, g)) { const auto &u = source(ie, g); - os << " Predecessor idx="; + os << " Predecessor index="; if (u == build.root) { os << "ROOT"; } else if (u == build.anchored_root) { os << "ANCHORED_ROOT"; } else { - os << g[u].idx; + os << g[u].index; } os << ": bounds [" << g[ie].minBound << ", "; if (g[ie].maxBound == ROSE_BOUND_INF) { diff --git a/src/rose/rose_build_groups.cpp b/src/rose/rose_build_groups.cpp index 5e477e3b..0a1c501f 100644 --- a/src/rose/rose_build_groups.cpp +++ b/src/rose/rose_build_groups.cpp @@ -136,7 +136,7 @@ rose_group calcLocalGroup(const RoseVertex v, const RoseGraph &g, } } else { DEBUG_PRINTF("not sibling different mother %zu %zu\n", - g[v].idx, g[w].idx); + g[v].index, g[w].index); } } } @@ -382,7 +382,7 @@ void assignGroupsToRoles(RoseBuildImpl &build) { g[ghost_it->second].groups |= succ_groups; } - DEBUG_PRINTF("vertex %zu: groups=%llx\n", g[v].idx, g[v].groups); + DEBUG_PRINTF("vertex %zu: groups=%llx\n", g[v].index, g[v].groups); } } @@ -397,8 +397,7 @@ getVertexGroupMap(const RoseBuildImpl &build) { vector v_order; v_order.reserve(num_vertices(g)); - boost::topological_sort(g, back_inserter(v_order), - vertex_index_map(get(&RoseVertexProps::idx, g))); + boost::topological_sort(g, back_inserter(v_order)); unordered_map vertex_group_map; vertex_group_map.reserve(num_vertices(g)); @@ -406,7 +405,7 @@ getVertexGroupMap(const RoseBuildImpl &build) { const rose_group initial_groups = build.getInitialGroups(); for (const auto &v : boost::adaptors::reverse(v_order)) { - DEBUG_PRINTF("vertex %zu\n", g[v].idx); + DEBUG_PRINTF("vertex %zu\n", g[v].index); if (build.isAnyStart(v)) { DEBUG_PRINTF("start vertex, groups=0x%llx\n", initial_groups); @@ -419,7 +418,7 @@ getVertexGroupMap(const RoseBuildImpl &build) { assert(in_degree(v, g) > 0); rose_group pred_groups = ~rose_group{0}; for (auto u : inv_adjacent_vertices_range(v, g)) { - DEBUG_PRINTF("pred %zu\n", g[u].idx); + DEBUG_PRINTF("pred %zu\n", g[u].index); assert(contains(vertex_group_map, u)); pred_groups &= vertex_group_map.at(u); } diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index b3f986aa..6b326d34 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -527,8 +527,6 @@ public: // max overlap considered for every pair (ulit, vlit). size_t maxLiteralOverlap(RoseVertex u, RoseVertex v) const; - void renumberVertices(void); - bool isPseudoStar(const RoseEdge &e) const; bool isPseudoStarOrFirstOnly(const RoseEdge &e) const; bool hasOnlyPseudoStarInEdges(RoseVertex v) const; @@ -551,7 +549,6 @@ public: const RoseVertex anchored_root; RoseLiteralMap literals; std::map ghost; - size_t vertexIndex; ReportID getNewNfaReport() override { return next_nfa_report++; } diff --git a/src/rose/rose_build_infix.cpp b/src/rose/rose_build_infix.cpp index 73f9e99b..f3e7680f 100644 --- a/src/rose/rose_build_infix.cpp +++ b/src/rose/rose_build_infix.cpp @@ -110,7 +110,7 @@ void contractVertex(NGHolder &g, NFAVertex v, static u32 findMaxLiteralMatches(const NGHolder &h, const set &lits) { DEBUG_PRINTF("h=%p, %zu literals\n", &h, lits.size()); - //dumpGraph("infix.dot", h.g); + //dumpGraph("infix.dot", h); // Indices of vertices that could terminate any of the literals in 'lits'. set terms; @@ -163,7 +163,7 @@ u32 findMaxLiteralMatches(const NGHolder &h, const set &lits) { } remove_vertices(dead, g); - //dumpGraph("relaxed.dot", g.g); + //dumpGraph("relaxed.dot", g); depth maxWidth = findMaxWidth(g); DEBUG_PRINTF("maxWidth=%s\n", maxWidth.str().c_str()); @@ -286,7 +286,7 @@ void findCountingMiracleInfo(const left_id &left, const vector &stopTable, CharReach cyclic_cr; for (NFAVertex v : cyclics) { - DEBUG_PRINTF("considering %u ||=%zu\n", g[v].index, + DEBUG_PRINTF("considering %zu ||=%zu\n", g[v].index, g[v].char_reach.count()); cyclic_cr |= g[v].char_reach; } diff --git a/src/rose/rose_build_lookaround.cpp b/src/rose/rose_build_lookaround.cpp index 7c58f931..d2c4b541 100644 --- a/src/rose/rose_build_lookaround.cpp +++ b/src/rose/rose_build_lookaround.cpp @@ -261,7 +261,7 @@ void findForwardReach(const RoseGraph &g, const RoseVertex v, for (const auto &e : out_edges_range(v, g)) { RoseVertex t = target(e, g); if (!g[t].left) { - DEBUG_PRINTF("successor %zu has no leftfix\n", g[t].idx); + DEBUG_PRINTF("successor %zu has no leftfix\n", g[t].index); return; } rose_look.push_back(map()); @@ -585,7 +585,7 @@ bool getTransientPrefixReach(const NGHolder &g, u32 lag, NFAVertex v = *(inv_adjacent_vertices(g.accept, g).first); u32 i = lag + 1; while (v != g.startDs) { - DEBUG_PRINTF("i=%u, v=%u\n", i, g[v].index); + DEBUG_PRINTF("i=%u, v=%zu\n", i, g[v].index); if (is_special(v, g)) { DEBUG_PRINTF("special\n"); return false; diff --git a/src/rose/rose_build_matchers.cpp b/src/rose/rose_build_matchers.cpp index 522ff6b6..f9251b8a 100644 --- a/src/rose/rose_build_matchers.cpp +++ b/src/rose/rose_build_matchers.cpp @@ -102,7 +102,7 @@ bool maskFromLeftGraph(const LeftEngInfo &left, vector &msk, CharReach cr; for (NFAVertex v : curr) { const auto &v_cr = h[v].char_reach; - DEBUG_PRINTF("vertex %u, reach %s\n", h[v].index, + DEBUG_PRINTF("vertex %zu, reach %s\n", h[v].index, describeClass(v_cr).c_str()); cr |= v_cr; insert(&next, inv_adjacent_vertices(v, h)); @@ -438,45 +438,45 @@ static bool isNoRunsVertex(const RoseBuildImpl &build, RoseVertex u) { const RoseGraph &g = build.g; if (!g[u].isBoring()) { - DEBUG_PRINTF("u=%zu is not boring\n", g[u].idx); + DEBUG_PRINTF("u=%zu is not boring\n", g[u].index); return false; } if (!g[u].reports.empty()) { - DEBUG_PRINTF("u=%zu has accept\n", g[u].idx); + DEBUG_PRINTF("u=%zu has accept\n", g[u].index); return false; } /* TODO: handle non-root roles as well. It can't be that difficult... */ - if (!in_degree_equal_to(u, g, 1)) { - DEBUG_PRINTF("u=%zu is not a root role\n", g[u].idx); + if (in_degree(u, g) != 1) { + DEBUG_PRINTF("u=%zu is not a root role\n", g[u].index); return false; } RoseEdge e; bool exists; - tie(e, exists) = edge_by_target(build.root, u, g); + tie(e, exists) = edge(build.root, u, g); if (!exists) { - DEBUG_PRINTF("u=%zu is not a root role\n", g[u].idx); + DEBUG_PRINTF("u=%zu is not a root role\n", g[u].index); return false; } if (g[e].minBound != 0 || g[e].maxBound != ROSE_BOUND_INF) { - DEBUG_PRINTF("u=%zu has bounds from root\n", g[u].idx); + DEBUG_PRINTF("u=%zu has bounds from root\n", g[u].index); return false; } for (const auto &oe : out_edges_range(u, g)) { RoseVertex v = target(oe, g); if (g[oe].maxBound != ROSE_BOUND_INF) { - DEBUG_PRINTF("edge (%zu,%zu) has max bound\n", g[u].idx, - g[target(oe, g)].idx); + DEBUG_PRINTF("edge (%zu,%zu) has max bound\n", g[u].index, + g[v].index); return false; } if (g[v].left) { - DEBUG_PRINTF("v=%zu has rose prefix\n", g[v].idx); + DEBUG_PRINTF("v=%zu has rose prefix\n", g[v].index); return false; } } @@ -563,7 +563,7 @@ u64a literalMinReportOffset(const RoseBuildImpl &build, u64a lit_min_offset = UINT64_MAX; for (const auto &v : info.vertices) { - DEBUG_PRINTF("vertex %zu min_offset=%u\n", g[v].idx, g[v].min_offset); + DEBUG_PRINTF("vertex %zu min_offset=%u\n", g[v].index, g[v].min_offset); u64a vert_offset = g[v].min_offset; diff --git a/src/rose/rose_build_merge.cpp b/src/rose/rose_build_merge.cpp index 054dd12f..01db84a1 100644 --- a/src/rose/rose_build_merge.cpp +++ b/src/rose/rose_build_merge.cpp @@ -206,8 +206,9 @@ void mergeDupeLeaves(RoseBuildImpl &tbi) { continue; } - DEBUG_PRINTF("inspecting vertex idx=%zu in_degree %zu out_degree %zu\n", - g[v].idx, in_degree(v, g), out_degree(v, g)); + DEBUG_PRINTF("inspecting vertex index=%zu in_degree %zu " + "out_degree %zu\n", g[v].index, in_degree(v, g), + out_degree(v, g)); // Vertex must be a reporting leaf node if (g[v].reports.empty() || !isLeafNode(v, g)) { @@ -227,13 +228,13 @@ void mergeDupeLeaves(RoseBuildImpl &tbi) { } RoseVertex t = leaves.find(dupe)->second; - DEBUG_PRINTF("found two leaf dupe roles, idx=%zu,%zu\n", g[v].idx, - g[t].idx); + DEBUG_PRINTF("found two leaf dupe roles, index=%zu,%zu\n", g[v].index, + g[t].index); vector deadEdges; for (const auto &e : in_edges_range(v, g)) { RoseVertex u = source(e, g); - DEBUG_PRINTF("u idx=%zu\n", g[u].idx); + DEBUG_PRINTF("u index=%zu\n", g[u].index); RoseEdge et; bool exists; tie (et, exists) = edge(u, t, g); @@ -244,7 +245,8 @@ void mergeDupeLeaves(RoseBuildImpl &tbi) { deadEdges.push_back(e); } } else { - DEBUG_PRINTF("rehome edge: add %zu->%zu\n", g[u].idx, g[t].idx); + DEBUG_PRINTF("rehome edge: add %zu->%zu\n", + g[u].index, g[t].index); add_edge(u, t, g[e], g); deadEdges.push_back(e); } @@ -279,7 +281,7 @@ void mergeDupeLeaves(RoseBuildImpl &tbi) { // if we've removed anything, we need to renumber vertices if (countRemovals) { - tbi.renumberVertices(); + renumber_vertices(g); DEBUG_PRINTF("removed %zu vertices.\n", countRemovals); } } @@ -350,7 +352,7 @@ void findUncalcLeavesCandidates(RoseBuildImpl &tbi, // Ref count all suffixes, as we don't want to merge a suffix // that happens to be shared with a non-leaf vertex somewhere. - DEBUG_PRINTF("vertex %zu has suffix %p\n", g[v].idx, + DEBUG_PRINTF("vertex %zu has suffix %p\n", g[v].index, g[v].suffix.graph.get()); fcount[g[v].suffix.graph.get()]++; @@ -459,7 +461,7 @@ struct RoseGroup { const RoseGraph &g = build.g; assert(in_degree(v, g) == 1); RoseVertex u = *inv_adjacent_vertices(v, g).first; - parent = g[u].idx; + parent = g[u].index; } bool operator<(const RoseGroup &b) const { @@ -580,14 +582,14 @@ bool dedupeLeftfixes(RoseBuildImpl &tbi) { } // Scan the rest of the list for dupes. - for (auto kt = next(jt); kt != jte; ++kt) { + for (auto kt = std::next(jt); kt != jte; ++kt) { if (g[v].left == g[*kt].left || !rosecmp(v, *kt)) { continue; } // Dupe found. DEBUG_PRINTF("rose at vertex %zu is a dupe of %zu\n", - g[*kt].idx, g[v].idx); + g[*kt].index, g[v].index); assert(g[v].left.lag == g[*kt].left.lag); g[*kt].left = g[v].left; work_done = true; @@ -1070,8 +1072,8 @@ bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u, return false; } - DEBUG_PRINTF("roses on %zu and %zu are mergeable\n", tbi.g[u].idx, - tbi.g[v].idx); + DEBUG_PRINTF("roses on %zu and %zu are mergeable\n", tbi.g[u].index, + tbi.g[v].index); return true; } @@ -1387,7 +1389,7 @@ void processMergeQueue(RoseBuildImpl &tbi, RoseBouquet &roses, static bool nfaHasNarrowStart(const NGHolder &g) { - if (hasGreaterOutDegree(1, g.startDs, g)) { + if (out_degree(g.startDs, g) > 1) { return false; // unanchored } @@ -1409,7 +1411,7 @@ bool nfaHasFiniteMaxWidth(const NGHolder &g) { namespace { struct RoseMergeKey { - RoseMergeKey(const RoseVertexSet &parents_in, + RoseMergeKey(const set &parents_in, bool narrowStart_in, bool hasMaxWidth_in) : narrowStart(narrowStart_in), hasMaxWidth(hasMaxWidth_in), @@ -1427,7 +1429,7 @@ struct RoseMergeKey { bool narrowStart; bool hasMaxWidth; - RoseVertexSet parents; + set parents; }; } @@ -1491,7 +1493,7 @@ void mergeLeftfixesVariableLag(RoseBuildImpl &tbi) { map rosesByParent; RoseGraph &g = tbi.g; - RoseVertexSet parents(g); + set parents; DEBUG_PRINTF("-----\n"); DEBUG_PRINTF("entry\n"); @@ -1626,7 +1628,7 @@ struct DedupeLeftKey { : left_hash(hashLeftfix(build.g[v].left)) { const auto &g = build.g; for (const auto &e : in_edges_range(v, g)) { - preds.emplace(g[source(e, g)].idx, g[e].rose_top); + preds.emplace(g[source(e, g)].index, g[e].rose_top); } } @@ -1726,7 +1728,7 @@ void dedupeLeftfixesVariableLag(RoseBuildImpl &tbi) { for (auto v : verts1) { DEBUG_PRINTF("replacing report %u with %u on %zu\n", g[v].left.leftfix_report, - v2_left.leftfix_report, g[v].idx); + v2_left.leftfix_report, g[v].index); u32 orig_lag = g[v].left.lag; g[v].left = v2_left; g[v].left.lag = orig_lag; @@ -1758,7 +1760,7 @@ void replaceTops(NGHolder &h, const map &top_mapping) { } flat_set new_tops; for (u32 t : h[e].tops) { - DEBUG_PRINTF("vertex %u has top %u\n", h[v].index, t); + DEBUG_PRINTF("vertex %zu has top %u\n", h[v].index, t); new_tops.insert(top_mapping.at(t)); } h[e].tops = move(new_tops); @@ -1806,7 +1808,7 @@ bool setDistinctRoseTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2, } for (auto v : verts1) { - DEBUG_PRINTF("vertex %zu\n", g[v].idx); + DEBUG_PRINTF("vertex %zu\n", g[v].index); assert(!g[v].left.haig); assert(!g[v].left.dfa); for (const auto &e : in_edges_range(v, g)) { @@ -1815,7 +1817,7 @@ bool setDistinctRoseTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2, assert(contains(top_mapping, t)); g[e].rose_top = top_mapping[t]; DEBUG_PRINTF("edge (%zu,%zu) went from top %u to %u\n", - g[source(e, g)].idx, g[target(e, g)].idx, t, + g[source(e, g)].index, g[target(e, g)].index, t, top_mapping[t]); } } @@ -1836,7 +1838,7 @@ bool setDistinctSuffixTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2, } for (auto v : verts1) { - DEBUG_PRINTF("vertex %zu\n", g[v].idx); + DEBUG_PRINTF("vertex %zu\n", g[v].index); u32 t = g[v].suffix.top; assert(contains(top_mapping, t)); g[v].suffix.top = top_mapping[t]; diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index dcb2a4eb..50ca1d9e 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -75,7 +75,6 @@ RoseBuildImpl::RoseBuildImpl(ReportManager &rm_in, : cc(cc_in), root(add_vertex(g)), anchored_root(add_vertex(g)), - vertexIndex(0), delay_base_id(MO_INVALID_IDX), hasSom(false), group_end(0), @@ -89,11 +88,9 @@ RoseBuildImpl::RoseBuildImpl(ReportManager &rm_in, boundary(boundary_in), next_nfa_report(0) { // add root vertices to graph - g[root].idx = vertexIndex++; g[root].min_offset = 0; g[root].max_offset = 0; - g[anchored_root].idx = vertexIndex++; g[anchored_root].min_offset = 0; g[anchored_root].max_offset = 0; } @@ -193,7 +190,7 @@ bool RoseBuildImpl::hasLiteralInTable(RoseVertex v, bool RoseBuildImpl::hasNoFloatingRoots() const { for (auto v : adjacent_vertices_range(root, g)) { if (isFloating(v)) { - DEBUG_PRINTF("direct floating root %zu\n", g[v].idx); + DEBUG_PRINTF("direct floating root %zu\n", g[v].index); return false; } } @@ -201,7 +198,7 @@ bool RoseBuildImpl::hasNoFloatingRoots() const { /* need to check if the anchored_root has any literals which are too deep */ for (auto v : adjacent_vertices_range(anchored_root, g)) { if (isFloating(v)) { - DEBUG_PRINTF("indirect floating root %zu\n", g[v].idx); + DEBUG_PRINTF("indirect floating root %zu\n", g[v].index); return false; } } @@ -337,14 +334,14 @@ size_t RoseBuildImpl::maxLiteralOverlap(RoseVertex u, RoseVertex v) const { void RoseBuildImpl::removeVertices(const vector &dead) { for (auto v : dead) { assert(!isAnyStart(v)); - DEBUG_PRINTF("removing vertex %zu\n", g[v].idx); + DEBUG_PRINTF("removing vertex %zu\n", g[v].index); for (auto lit_id : g[v].literals) { literal_info[lit_id].vertices.erase(v); } - clear_vertex_faster(v, g); + clear_vertex(v, g); remove_vertex(v, g); } - renumberVertices(); + renumber_vertices(g); } // Find the maximum bound on the edges to this vertex's successors ignoring @@ -893,7 +890,6 @@ bool operator<(const RoseEdgeProps &a, const RoseEdgeProps &b) { // Note: only clones the vertex, you'll have to wire up your own edges. RoseVertex RoseBuildImpl::cloneVertex(RoseVertex v) { RoseVertex v2 = add_vertex(g[v], g); - g[v2].idx = vertexIndex++; for (const auto &lit_id : g[v2].literals) { literal_info[lit_id].vertices.insert(v2); @@ -1277,7 +1273,7 @@ bool canImplementGraphs(const RoseBuildImpl &tbi) { // First, check the Rose leftfixes. for (auto v : vertices_range(g)) { - DEBUG_PRINTF("leftfix: check vertex %zu\n", g[v].idx); + DEBUG_PRINTF("leftfix: check vertex %zu\n", g[v].index); if (g[v].left.castle) { DEBUG_PRINTF("castle ok\n"); @@ -1295,8 +1291,8 @@ bool canImplementGraphs(const RoseBuildImpl &tbi) { assert(g[v].left.graph->kind == (tbi.isRootSuccessor(v) ? NFA_PREFIX : NFA_INFIX)); if (!isImplementableNFA(*g[v].left.graph, nullptr, tbi.cc)) { - DEBUG_PRINTF("nfa prefix %zu failed (%zu vertices)\n", g[v].idx, - num_vertices(*g[v].left.graph)); + DEBUG_PRINTF("nfa prefix %zu failed (%zu vertices)\n", + g[v].index, num_vertices(*g[v].left.graph)); return false; } } @@ -1305,7 +1301,7 @@ bool canImplementGraphs(const RoseBuildImpl &tbi) { // Suffix graphs. for (auto v : vertices_range(g)) { - DEBUG_PRINTF("suffix: check vertex %zu\n", g[v].idx); + DEBUG_PRINTF("suffix: check vertex %zu\n", g[v].index); const RoseSuffixInfo &suffix = g[v].suffix; if (suffix.castle) { @@ -1323,8 +1319,8 @@ bool canImplementGraphs(const RoseBuildImpl &tbi) { if (suffix.graph) { assert(suffix.graph->kind == NFA_SUFFIX); if (!isImplementableNFA(*suffix.graph, &tbi.rm, tbi.cc)) { - DEBUG_PRINTF("nfa suffix %zu failed (%zu vertices)\n", g[v].idx, - num_vertices(*suffix.graph)); + DEBUG_PRINTF("nfa suffix %zu failed (%zu vertices)\n", + g[v].index, num_vertices(*suffix.graph)); return false; } } diff --git a/src/rose/rose_build_role_aliasing.cpp b/src/rose/rose_build_role_aliasing.cpp index 4757eb11..099e3e7a 100644 --- a/src/rose/rose_build_role_aliasing.cpp +++ b/src/rose/rose_build_role_aliasing.cpp @@ -111,11 +111,9 @@ struct AliasInEdge : EdgeAndVertex { class CandidateSet { public: - typedef RoseVertexSet::iterator iterator; + typedef set::iterator iterator; typedef RoseVertex key_type; - explicit CandidateSet(const VertexIndexComp &comp) : main_cont(comp) {} - iterator begin() { return main_cont.begin(); } iterator end() { return main_cont.end(); } @@ -151,7 +149,7 @@ public: private: /* if a vertex is worth storing, it is worth storing twice */ - RoseVertexSet main_cont; /* deterministic iterator */ + set main_cont; /* deterministic iterator */ ue2::unordered_set hash_cont; /* member checks */ }; @@ -258,7 +256,7 @@ bool samePredecessors(RoseVertex a, RoseVertex b, const RoseGraph &g) { for (const auto &e_a : in_edges_range(a, g)) { bool exists; RoseEdge e; - tie(e, exists) = edge_by_target(source(e_a, g), b, g); + tie(e, exists) = edge(source(e_a, g), b, g); if (!exists || g[e].rose_top != g[e_a].rose_top) { DEBUG_PRINTF("bad tops\n"); return false; @@ -297,7 +295,7 @@ bool hasCommonPredWithBadBounds(RoseVertex a, RoseVertex b, for (const auto &e_a : in_edges_range(a, g)) { bool exists; RoseEdge e; - tie(e, exists) = edge_by_target(source(e_a, g), b, g); + tie(e, exists) = edge(source(e_a, g), b, g); if (exists) { if (g[e_a].maxBound < g[e].minBound || g[e].maxBound < g[e_a].minBound) { @@ -498,11 +496,11 @@ void mergeEdgeAdd(RoseVertex u, RoseVertex v, const RoseEdge &from_edge, const RoseEdgeProps &from_props = g[from_edge]; if (!to_edge) { - DEBUG_PRINTF("adding edge [%zu,%zu]\n", g[u].idx, g[v].idx); + DEBUG_PRINTF("adding edge [%zu,%zu]\n", g[u].index, g[v].index); add_edge(u, v, from_props, g); } else { // union of the two edges. - DEBUG_PRINTF("updating edge [%zu,%zu]\n", g[u].idx, g[v].idx); + DEBUG_PRINTF("updating edge [%zu,%zu]\n", g[u].index, g[v].index); RoseEdgeProps &to_props = g[*to_edge]; to_props.minBound = min(to_props.minBound, from_props.minBound); to_props.maxBound = max(to_props.maxBound, from_props.maxBound); @@ -626,7 +624,7 @@ static void mergeVerticesLeft(RoseVertex a, RoseVertex b, RoseBuildImpl &build, RoseAliasingInfo &rai) { RoseGraph &g = build.g; - DEBUG_PRINTF("merging vertex %zu into %zu\n", g[a].idx, g[b].idx); + DEBUG_PRINTF("merging vertex %zu into %zu\n", g[a].index, g[b].index); insert(&g[b].reports, g[a].reports); @@ -648,7 +646,7 @@ static void mergeVerticesRight(RoseVertex a, RoseVertex b, RoseBuildImpl &build, RoseAliasingInfo &rai) { RoseGraph &g = build.g; - DEBUG_PRINTF("merging vertex %zu into %zu\n", g[a].idx, g[b].idx); + DEBUG_PRINTF("merging vertex %zu into %zu\n", g[a].index, g[b].index); insert(&g[b].reports, g[a].reports); g[b].min_offset = min(g[a].min_offset, g[b].min_offset); @@ -666,7 +664,7 @@ static void mergeVerticesDiamond(RoseVertex a, RoseVertex b, RoseBuildImpl &build, RoseAliasingInfo &rai) { RoseGraph &g = build.g; - DEBUG_PRINTF("merging vertex %zu into %zu\n", g[a].idx, g[b].idx); + DEBUG_PRINTF("merging vertex %zu into %zu\n", g[a].index, g[b].index); // For a diamond merge, most properties are already the same (with the // notable exception of the literal set). @@ -683,7 +681,7 @@ static never_inline void findCandidates(const RoseBuildImpl &build, CandidateSet *candidates) { for (auto v : vertices_range(build.g)) { if (isAliasingCandidate(v, build)) { - DEBUG_PRINTF("candidate %zu\n", build.g[v].idx); + DEBUG_PRINTF("candidate %zu\n", build.g[v].index); DEBUG_PRINTF("lits: %u\n", *build.g[v].literals.begin()); candidates->insert(v); } @@ -748,7 +746,7 @@ bool hasCommonPredWithDiffRoses(RoseVertex a, RoseVertex b, for (const auto &e_a : in_edges_range(a, g)) { bool exists; RoseEdge e; - tie(e, exists) = edge_by_target(source(e_a, g), b, g); + tie(e, exists) = edge(source(e_a, g), b, g); if (exists) { DEBUG_PRINTF("common pred, e_r=%d r_t %u,%u\n", (int)equal_roses, g[e].rose_top, g[e_a].rose_top); @@ -890,7 +888,7 @@ void pruneUnusedTops(NGHolder &h, const RoseGraph &g, used_tops.begin(), used_tops.end(), pt_inserter); h[e].tops = move(pruned_tops); if (h[e].tops.empty()) { - DEBUG_PRINTF("edge (start,%u) has only unused tops\n", h[v].index); + DEBUG_PRINTF("edge (start,%zu) has only unused tops\n", h[v].index); dead.push_back(e); } } @@ -1295,7 +1293,7 @@ bool attemptRoseGraphMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, } DEBUG_PRINTF("attempting merge of roses on vertices %zu and %zu\n", - g[a].idx, g[b].idx); + g[a].index, g[b].index); set &b_verts = rai.rev_leftfix[b_left]; set aa; @@ -1387,7 +1385,7 @@ bool attemptRoseMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, RoseVertex b, bool trivialCasesOnly, RoseAliasingInfo &rai) { DEBUG_PRINTF("attempting rose merge, vertices a=%zu, b=%zu\n", - build.g[a].idx, build.g[b].idx); + build.g[a].index, build.g[b].index); assert(a != b); RoseGraph &g = build.g; @@ -1600,7 +1598,7 @@ void diamondMergePass(CandidateSet &candidates, RoseBuildImpl &build, assert(contains(candidates, a)); - DEBUG_PRINTF("trying to merge %zu into somebody\n", g[a].idx); + DEBUG_PRINTF("trying to merge %zu into somebody\n", g[a].index); for (auto jt = it; jt != siblings.end(); ++jt) { RoseVertex b = *jt; assert(contains(candidates, b)); @@ -1714,8 +1712,8 @@ void leftMergePass(CandidateSet &candidates, RoseBuildImpl &build, RoseVertex pred = pickPred(a, g, build); siblings.clear(); - if (pred == RoseGraph::null_vertex() || build.isAnyStart(pred) || - hasGreaterOutDegree(verts.size(), pred, g)) { + if (pred == RoseGraph::null_vertex() || build.isAnyStart(pred) + || out_degree(pred, g) > verts.size()) { // Select sibling from amongst the vertices that share a literal. siblings.insert(siblings.end(), verts.begin(), verts.end()); } else { @@ -1724,8 +1722,6 @@ void leftMergePass(CandidateSet &candidates, RoseBuildImpl &build, insert(&siblings, siblings.end(), adjacent_vertices(pred, g)); } - sort(siblings.begin(), siblings.end(), VertexIndexComp(g)); - auto jt = findLeftMergeSibling(siblings.begin(), siblings.end(), a, build, rai, candidates); if (jt == siblings.end()) { @@ -1754,12 +1750,12 @@ bool safeRootPreds(RoseVertex a, RoseVertex b, const RoseGraph &g) { set a_roots, b_roots; for (auto u : inv_adjacent_vertices_range(a, g)) { - if (!hasGreaterInDegree(0, u, g)) { + if (!in_degree(u, g)) { a_roots.insert(u); } } for (auto u : inv_adjacent_vertices_range(b, g)) { - if (!hasGreaterInDegree(0, u, g)) { + if (!in_degree(u, g)) { b_roots.insert(u); } } @@ -1867,8 +1863,8 @@ void buildCandidateRightSiblings(CandidateSet &candidates, RoseBuildImpl &build, u32 lit_id = *g[a].literals.begin(); RoseVertex succ = pickSucc(a, g); const auto &verts = build.literal_info.at(lit_id).vertices; - if (succ != RoseGraph::null_vertex() && - !hasGreaterInDegree(verts.size(), succ, g)) { + if (succ != RoseGraph::null_vertex() + && in_degree(succ, g) < verts.size()) { if (!done_succ.insert(succ).second) { continue; // succ already in done_succ. } @@ -1901,7 +1897,7 @@ void buildCandidateRightSiblings(CandidateSet &candidates, RoseBuildImpl &build, } for (auto &siblings : sibling_cache | map_values) { - sort(siblings.begin(), siblings.end(), VertexIndexComp(build.g)); + sort(siblings.begin(), siblings.end()); } } @@ -1976,7 +1972,7 @@ bool hasNoDiamondSiblings(const RoseGraph &g, RoseVertex v) { if (has_successor(v, g)) { bool only_succ = true; for (const auto &w : adjacent_vertices_range(v, g)) { - if (hasGreaterInDegree(1, w, g)) { + if (in_degree(w, g) > 1) { only_succ = false; break; } @@ -1992,7 +1988,7 @@ bool hasNoDiamondSiblings(const RoseGraph &g, RoseVertex v) { bool only_pred = true; for (const auto &u : inv_adjacent_vertices_range(v, g)) { - if (hasGreaterOutDegree(1, u, g)) { + if (out_degree(u, g) > 1) { only_pred = false; break; } @@ -2040,7 +2036,7 @@ void aliasRoles(RoseBuildImpl &build, bool mergeRoses) { mergeRoses &= cc.grey.mergeRose & cc.grey.roseMergeRosesDuringAliasing; - CandidateSet candidates(g); + CandidateSet candidates; findCandidates(build, &candidates); DEBUG_PRINTF("candidates %zu\n", candidates.size()); diff --git a/src/rose/rose_build_util.h b/src/rose/rose_build_util.h index 85cfc010..81bb6845 100644 --- a/src/rose/rose_build_util.h +++ b/src/rose/rose_build_util.h @@ -39,31 +39,6 @@ namespace ue2 { /** Max allowed width for transient graphs in block mode */ #define ROSE_BLOCK_TRANSIENT_MAX_WIDTH 255U -// Comparator for vertices using their index property. -struct VertexIndexComp { - VertexIndexComp(const RoseGraph &gg) : g(gg) {} - - bool operator()(const RoseVertex &a, const RoseVertex &b) const { - const RoseVertexProps &pa = g[a]; - const RoseVertexProps &pb = g[b]; - - if (pa.idx < pb.idx) { - return true; - } - if (pa.idx > pb.idx) { - return false; - } - - assert(a == b); // All vertex indices should be distinct. - return a < b; - } - - const RoseGraph &g; -}; - -// Vertex set type, ordered by index. Construct with a graph reference. -typedef std::set RoseVertexSet; - /** * \brief Add two Rose depths together, coping correctly with infinity at * ROSE_BOUND_INF. diff --git a/src/rose/rose_build_width.cpp b/src/rose/rose_build_width.cpp index 6bfcee48..182b62ee 100644 --- a/src/rose/rose_build_width.cpp +++ b/src/rose/rose_build_width.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -77,19 +77,20 @@ u32 findMinWidth(const RoseBuildImpl &tbi, enum rose_literal_table table) { u32 minWidth = ROSE_BOUND_INF; for (auto v : reachable) { if (g[v].eod_accept) { - DEBUG_PRINTF("skipping %zu - not a real vertex\n", g[v].idx); + DEBUG_PRINTF("skipping %zu - not a real vertex\n", g[v].index); continue; } const u32 w = g[v].min_offset; if (!g[v].reports.empty()) { - DEBUG_PRINTF("%zu can fire report at offset %u\n", g[v].idx, w); + DEBUG_PRINTF("%zu can fire report at offset %u\n", g[v].index, w); minWidth = min(minWidth, w); } if (is_end_anchored(g, v)) { - DEBUG_PRINTF("%zu can fire eod report at offset %u\n", g[v].idx, w); + DEBUG_PRINTF("%zu can fire eod report at offset %u\n", g[v].index, + w); minWidth = min(minWidth, w); } @@ -98,7 +99,7 @@ u32 findMinWidth(const RoseBuildImpl &tbi, enum rose_literal_table table) { assert(suffix_width.is_reachable()); DEBUG_PRINTF("%zu has suffix with top %u (width %s), can fire " "report at %u\n", - g[v].idx, g[v].suffix.top, suffix_width.str().c_str(), + g[v].index, g[v].suffix.top, suffix_width.str().c_str(), w + suffix_width); minWidth = min(minWidth, w + suffix_width); } @@ -203,10 +204,10 @@ u32 findMaxBAWidth(const RoseBuildImpl &tbi, enum rose_literal_table table) { // Everyone's anchored, so the max width can be taken from the max // max_offset on our vertices (so long as all accepts are ACCEPT_EOD). for (auto v : reachable) { - DEBUG_PRINTF("inspecting vert %zu\n", g[v].idx); + DEBUG_PRINTF("inspecting vert %zu\n", g[v].index); if (g[v].eod_accept) { - DEBUG_PRINTF("skipping %zu - not a real vertex\n", g[v].idx); + DEBUG_PRINTF("skipping %zu - not a real vertex\n", g[v].index); continue; } diff --git a/src/rose/rose_graph.h b/src/rose/rose_graph.h index 6abe629b..c3af749f 100644 --- a/src/rose/rose_graph.h +++ b/src/rose/rose_graph.h @@ -44,11 +44,10 @@ #include "util/charreach.h" #include "util/depth.h" #include "util/ue2_containers.h" +#include "util/ue2_graph.h" #include #include -#include -#include namespace ue2 { @@ -139,7 +138,7 @@ struct RoseSuffixInfo { /** \brief Properties attached to each Rose graph vertex. */ struct RoseVertexProps { /** \brief Unique dense vertex index. Used for BGL algorithms. */ - size_t idx = ~size_t{0}; + size_t index = ~size_t{0}; /** \brief IDs of literals in the Rose literal map. */ flat_set literals; @@ -183,6 +182,9 @@ struct RoseVertexProps { /** \brief Properties attached to each Rose graph edge. */ /* bounds are distance from end of prev to start of the next */ struct RoseEdgeProps { + /** \brief Unique dense vertex index. Used for BGL algorithms. */ + size_t index = ~size_t{0}; + /** * \brief Minimum distance from the end of the source role's match to the * start of the target role's match. @@ -215,18 +217,10 @@ bool operator<(const RoseEdgeProps &a, const RoseEdgeProps &b); /** * \brief Core Rose graph structure. - * - * Note that we use the list selector for the edge and vertex lists: we depend - * on insertion order for determinism, so we must use these containers. */ -using RoseGraph = boost::adjacency_list; - +struct RoseGraph : public ue2_graph { + friend class RoseBuildImpl; /* to allow index renumbering */ +}; using RoseVertex = RoseGraph::vertex_descriptor; using RoseEdge = RoseGraph::edge_descriptor; diff --git a/src/rose/rose_in_dump.cpp b/src/rose/rose_in_dump.cpp index fbd6858b..172b58e8 100644 --- a/src/rose/rose_in_dump.cpp +++ b/src/rose/rose_in_dump.cpp @@ -122,7 +122,7 @@ void dumpPreRoseGraph(const RoseInGraph &ig, const Grey &grey, ostringstream name; name << grey.dumpPath << "pre_rose_" << id << ".dot"; - dumpGraph(name.str().c_str(), h->g); + dumpGraph(name.str().c_str(), *h); assert(allMatchStatesHaveReports(*h)); } diff --git a/src/rose/rose_in_graph.h b/src/rose/rose_in_graph.h index 14d4d9b2..0e218576 100644 --- a/src/rose/rose_in_graph.h +++ b/src/rose/rose_in_graph.h @@ -46,13 +46,11 @@ #include "ue2common.h" #include "rose/rose_common.h" #include "util/ue2_containers.h" +#include "util/ue2_graph.h" #include "util/ue2string.h" #include -#include -#include - namespace ue2 { class NGHolder; @@ -128,6 +126,7 @@ public: flat_set reports; /**< for RIV_ACCEPT/RIV_ACCEPT_EOD */ u32 min_offset; /**< Minimum offset at which this vertex can match. */ u32 max_offset; /**< Maximum offset at which this vertex can match. */ + size_t index = 0; }; struct RoseInEdgeProps { @@ -174,11 +173,12 @@ struct RoseInEdgeProps { std::shared_ptr haig; u32 graph_lag; + size_t index = 0; }; -typedef boost::adjacency_list RoseInGraph; +struct RoseInGraph + : public ue2_graph { +}; typedef RoseInGraph::vertex_descriptor RoseInVertex; typedef RoseInGraph::edge_descriptor RoseInEdge; diff --git a/src/rose/rose_in_util.cpp b/src/rose/rose_in_util.cpp index cce6ff35..3b31b38e 100644 --- a/src/rose/rose_in_util.cpp +++ b/src/rose/rose_in_util.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -48,27 +48,15 @@ using namespace std; namespace ue2 { -static -void populateIndexMap(const RoseInGraph &in, - map *index_map) { - size_t i = 0; - for (auto v : vertices_range(in)) { - (*index_map)[v] = i++; - } -} - /* Returns a topological ordering of the vertices in g. That is the starts are * at the front and all the predecessors of a vertex occur earlier in the list * than the vertex. */ vector topo_order(const RoseInGraph &g) { - map index_map; - populateIndexMap(g, &index_map); - + assert(hasCorrectlyNumberedVertices(g)); vector v_order; - v_order.reserve(index_map.size()); + v_order.reserve(num_vertices(g)); - topological_sort(g, back_inserter(v_order), - vertex_index_map(boost::make_assoc_property_map(index_map))); + boost::topological_sort(g, back_inserter(v_order)); reverse(v_order.begin(), v_order.end()); /* put starts at the front */ @@ -105,6 +93,7 @@ private: } unique_ptr cloneRoseGraph(const RoseInGraph &ig) { + assert(hasCorrectlyNumberedVertices(ig)); unique_ptr out = make_unique(); unordered_map> graph_map; @@ -120,12 +109,8 @@ unique_ptr cloneRoseGraph(const RoseInGraph &ig) { } } - map index_map; - populateIndexMap(ig, &index_map); - copy_graph(ig, *out, - boost::edge_copy(RoseEdgeCopier(ig, *out, graph_map, haig_map)) - .vertex_index_map(boost::make_assoc_property_map(index_map))); + boost::edge_copy(RoseEdgeCopier(ig, *out, graph_map, haig_map))); return out; } diff --git a/src/smallwrite/smallwrite_build.cpp b/src/smallwrite/smallwrite_build.cpp index d395a7af..108bca8a 100644 --- a/src/smallwrite/smallwrite_build.cpp +++ b/src/smallwrite/smallwrite_build.cpp @@ -126,7 +126,7 @@ bool pruneOverlongReports(NFAVertex v, NGHolder &g, const depth &max_depth, } if (g[v].reports.empty()) { - DEBUG_PRINTF("none of vertex %u's reports can match, cut accepts\n", + DEBUG_PRINTF("none of vertex %zu's reports can match, cut accepts\n", g[v].index); remove_edge(v, g.accept, g); remove_edge(v, g.acceptEod, g); diff --git a/src/som/slot_manager.h b/src/som/slot_manager.h index 9de78f44..971ea362 100644 --- a/src/som/slot_manager.h +++ b/src/som/slot_manager.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,7 +34,7 @@ #define SLOT_MANAGER_H #include "ue2common.h" -#include "nfagraph/ng_graph.h" +#include "nfagraph/ng_holder.h" #include "util/alloc.h" #include "util/ue2_containers.h" diff --git a/src/util/dump_charclass.cpp b/src/util/dump_charclass.cpp index 74b45414..4c159ec2 100644 --- a/src/util/dump_charclass.cpp +++ b/src/util/dump_charclass.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -249,6 +249,15 @@ string describeClass(const CharReach &cr, size_t maxLength, return oss.str(); } +string describeClasses(const std::vector &v, size_t maxClassLength, + enum cc_output_t out_type) { + std::ostringstream oss; + for (const auto &cr : v) { + describeClass(oss, cr, maxClassLength, out_type); + } + return oss.str(); +} + // C stdio wrapper void describeClass(FILE *f, const CharReach &cr, size_t maxLength, enum cc_output_t out_type) { diff --git a/src/util/dump_charclass.h b/src/util/dump_charclass.h index 9c3362bc..45b707f1 100644 --- a/src/util/dump_charclass.h +++ b/src/util/dump_charclass.h @@ -38,6 +38,7 @@ #include #include #include +#include namespace ue2 { @@ -54,6 +55,10 @@ void describeClass(std::ostream &os, const CharReach &cr, size_t maxLength = 16, std::string describeClass(const CharReach &cr, size_t maxLength = 16, enum cc_output_t out_type = CC_OUT_TEXT); +std::string describeClasses(const std::vector &v, + size_t maxClassLength = 16, + enum cc_output_t out_type = CC_OUT_TEXT); + void describeClass(FILE *f, const CharReach &cr, size_t maxLength, enum cc_output_t out_type); diff --git a/src/util/graph.h b/src/util/graph.h index d15e77aa..ae7c2c90 100644 --- a/src/util/graph.h +++ b/src/util/graph.h @@ -38,71 +38,18 @@ #include "util/graph_range.h" #include "util/ue2_containers.h" -#include -#include #include -#include + +#include +#include +#include namespace ue2 { /** \brief True if the given vertex has no out-edges. */ template bool isLeafNode(const typename Graph::vertex_descriptor& v, const Graph& g) { - typename Graph::adjacency_iterator ai, ae; - std::tie(ai, ae) = adjacent_vertices(v, g); - return ai == ae; // no out edges -} - -/** \brief True if the out-degree of vertex \a v is greater than the given - * limit. */ -template -bool hasGreaterOutDegree(size_t limit, - const typename Graph::vertex_descriptor& v, - const Graph& g) { - typename Graph::out_edge_iterator ei, ee; - for (std::tie(ei, ee) = out_edges(v, g); ei != ee; ++ei) { - if (limit-- == 0) { - return true; - } - } - return false; -} - -/** \brief Returns true if the in-degree of vertex \a v is greater than the - * given limit. */ -template -bool hasGreaterInDegree(size_t limit, - const typename Graph::vertex_descriptor& v, - const Graph& g) { - typename Graph::in_edge_iterator ei, ee; - for (std::tie(ei, ee) = in_edges(v, g); ei != ee; ++ei) { - if (limit-- == 0) { - return true; - } - } - return false; -} - -/** - * \brief True if the degree of vertex \a v is greater than the given limit. - */ -template -bool has_greater_degree(size_t limit, - const typename Graph::vertex_descriptor &v, - const Graph &g) { - typename Graph::in_edge_iterator ei, ee; - for (std::tie(ei, ee) = in_edges(v, g); ei != ee; ++ei) { - if (limit-- == 0) { - return true; - } - } - typename Graph::out_edge_iterator oi, oe; - for (std::tie(oi, oe) = out_edges(v, g); oi != oe; ++oi) { - if (limit-- == 0) { - return true; - } - } - return false; + return out_degree(v, g) == 0; } /** \brief True if vertex \a v has an edge to itself. */ @@ -137,48 +84,10 @@ size_t proper_in_degree(const typename Graph::vertex_descriptor &v, return in_degree(v, g) - (edge(v, v, g).second ? 1 : 0); } -/** \brief Returns true iff the in-degree of vertex \a v is \a expected */ -template -bool in_degree_equal_to(const typename Graph::vertex_descriptor &v, - const Graph &g, size_t expected) { - size_t seen = 0; - typename Graph::in_edge_iterator ei, ee; - for (std::tie(ei, ee) = in_edges(v, g);; ++ei, seen++) { - if (seen == expected) { - return ei == ee; - } - if (ei == ee) { - return false; - } - } -} - -/** \brief same as edge(s, t, g) by finds edge by inspecting in-edges of target. - * Should be used when it is known that t has a small in-degree and when s - * may have a large out-degree. - */ -template -std::pair -edge_by_target(const typename Graph::vertex_descriptor &s, - const typename Graph::vertex_descriptor &t, const Graph &g) { - typename Graph::in_edge_iterator ei, ee; - for (std::tie(ei, ee) = in_edges(t, g); ei != ee; ++ei) { - if (source(*ei, g) == s) { - return std::make_pair(*ei, true); - } - } - - return std::make_pair(typename Graph::edge_descriptor(), false); -} - - /** \brief True if vertex \a v has at least one successor. */ template bool has_successor(const typename Graph::vertex_descriptor &v, const Graph &g) { - typename Graph::adjacency_iterator ai, ae; - std::tie(ai, ae) = adjacent_vertices(v, g); - - return ai != ae; + return out_degree(v, g) > 0; } /** \brief True if vertex \a v has at least one successor other than itself. */ @@ -197,26 +106,6 @@ bool has_proper_successor(const typename Graph::vertex_descriptor &v, return ai != ae; } -/** \brief A version of clear_vertex that explicitly removes in- and out-edges - * for vertex \a v. For many graphs, this is faster than the BGL clear_vertex - * function, which walks the graph's full edge list. */ -template -void clear_vertex_faster(typename Graph::vertex_descriptor v, Graph &g) { - typename Graph::in_edge_iterator ei, ee; - tie(ei, ee) = in_edges(v, g); - while (ei != ee) { - remove_edge(*ei++, g); - } - - typename Graph::out_edge_iterator oi, oe; - tie(oi, oe) = out_edges(v, g); - while (oi != oe) { - // NOTE: version that takes out_edge_iterator is faster according to - // the BGL docs. - remove_edge(oi++, g); - } -} - /** \brief Find the set of vertices that are reachable from the vertices in \a * sources. */ template @@ -329,6 +218,40 @@ std::pair add_edge_if_not_present( return e; } +#ifndef NDEBUG + +template +bool hasCorrectlyNumberedVertices(const Graph &g) { + auto count = num_vertices(g); + std::vector ids(count, false); + for (auto v : vertices_range(g)) { + auto id = g[v].index; + if (id >= count || ids[id]) { + return false; // duplicate + } + ids[id] = true; + } + return std::find(ids.begin(), ids.end(), false) == ids.end() + && count == vertex_index_upper_bound(g); +} + +template +bool hasCorrectlyNumberedEdges(const Graph &g) { + auto count = num_edges(g); + std::vector ids(count, false); + for (const auto &e : edges_range(g)) { + auto id = g[e].index; + if (id >= count || ids[id]) { + return false; // duplicate + } + ids[id] = true; + } + return std::find(ids.begin(), ids.end(), false) == ids.end() + && count == edge_index_upper_bound(g); +} + +#endif + } // namespace ue2 #endif // UTIL_GRAPH_H diff --git a/src/util/graph_range.h b/src/util/graph_range.h index 82814695..3df06911 100644 --- a/src/util/graph_range.h +++ b/src/util/graph_range.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -51,7 +51,6 @@ #ifndef UTIL_GRAPH_RANGE_H #define UTIL_GRAPH_RANGE_H -#include #include namespace ue2 { diff --git a/src/util/ue2_graph.h b/src/util/ue2_graph.h new file mode 100644 index 00000000..07c24746 --- /dev/null +++ b/src/util/ue2_graph.h @@ -0,0 +1,1083 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef UE2_GRAPH_H +#define UE2_GRAPH_H + +#include "ue2common.h" +#include "util/graph_range.h" + +#include +#include +#include /* vertex_index_t, ... */ +#include /* no_property */ +#include +#include +#include +#include + +#include /* tie */ +#include /* pair, declval */ + +/* + * Basic design of ue2_graph: + * + * Fairly standard adjacency list type graph structure. The main internal + * structures are vertex_node and edge_node. + * + * Each vertex_node maintains lists of incoming and outgoing edge_nodes, a + * serial number and the vertex properties. + * + * Each edge_node contains pointers to the source and target vertex as well as + * the serial number and edge properties. + * + * Every time an edge_node or vertex_node is created in the graph, it is given a + * unique serial number by increasing a private counter in the graph. + * + * The main thing to note is that the in and out edge lists are intrusive lists + * with the edge_node containing the necessary hooks. This means that we can + * easily convert the edge_node to iterators of the in_edge_list and + * out_edge_list and remove them from the lists. + * + * vertex_descriptor and edge_descriptor structures both just wrap pointers to + * the relevant node structure along with the serial number. operator<() for the + * descriptors is overridden to look at the serial member of the node. + * We do not use: + * - the address of the node structure as this would lead to an unstable + * ordering of vertices between runs. + * - the index field as this would mean that the generation of new index + * values (during say renumbering of vertex nodes after removing some + * vertices) would potentially reorder vertices and corrupt containers + * such as std::set<>. + * The serial number is copied into the descriptors so that we can still have + * descriptors in a container (such as set or unordered_set) after removing the + * underlying node. + * + * Hashing of descriptors is based on the serial field for similar reasons. + * + * + * + * Main differences from boost::adjacency_list<> with listS: + * + * (1) Deterministic ordering for vertices and edges + * boost::adjacency_list<> uses pointer ordering for vertex_descriptors. As + * a result, ordering of vertices and edges between runs is + * non-deterministic unless containers, etc use custom comparators. + * + * (2) Proper types for descriptors, etc. + * No more void * for vertex_descriptors and trying to use it for the wrong + * graph type. + * + * (3) Constant time num_edges(), num_vertices(), degree(), in_degree() and + * out_degree() + * std::list is meant to have constant time in C++11 ::size(), but this is + * not always implemented as people want to keep ABI compatibility with + * existing C++98 standard libraries (gcc 4.8). As ue2_graph_h uses + * intrusive lists rather than std::list this is not an issue for us. + * + * (4) Constant time remove_edge(e, g) + * ue2_graph uses boost::intrusive_lists internally so we can easily unlink + * an edge from the in and out edgelist of its source and target. + * + * (5) More efficient edge(u, v, g) and remove_edge(u, v, g) + * ue2_graph will check which of u and v has the smallest relevant degree + * and use that to search for the edge(s). + * + * (6) Automatically populate the index field of vertex and edge bundles. + * Saves us from doing it manually. Naturally there is nothing to prevent + * the user from stuffing up the index properties later. + * + * (7) Different edge iteration order + * ue2_graph does not maintain an explicit global edge list, so the + * edge_iterator is constructed out of vertex_iterator and + * out_edge_iterators by iterating the out_edges of each vertices. This + * means that edge iteration order is not insertion order like for + * adjacency_list. + * + * (8) null_edge() + * Because why not? + * + * (9) vertex and edge properties must have an index field. + * We generally need them so the effort has not been put into specialising + * for when they are not present. + * + * + * + * Possible Future Work: + * + * (1) Improve edge(u, v, g) performance + * This function sees a fair amount of use and is O(n) in the smallest of + * the source out_degree or target in_degree. This could be improved by + * changes on of the edge containers to be something similar to a multiset. + * + * (2) 'Lie' about the number of edges / vertices + * + * One of the main uses of num_edges() and num_vertices() is to allocate a + * vector, etc so that it can be indexed by edge or vertex index. If + * num_edges() and num_vertices() returned the appropriate size for such a + * vector (at least one more than the largest index), we would be able to + * avoid some renumbering operations. Functions would have to be provided to + * get the real number of vertices and edges. Having num_vertices() and + * num_edges() return an over-estimate is not without precedence in the BGL + * - the filtered_graph adaptor does the same thing and is compatible with + * various (all?) BGL algorithms. It is not clear that this was done + * deliberately for the same reason or because it is difficult for + * filtered_graph to get the true counts. + * + * (3) Investigate slab/pooled allocation schemes for nodes. + */ + +namespace ue2 { + +namespace graph_detail { + +class graph_base : boost::noncopyable { +}; + +struct default_edge_property { + size_t index; +}; + +struct default_vertex_property { + size_t index; +}; + +} + +template +class ue2_graph : graph_detail::graph_base { +private: + struct in_edge_tag { }; + struct out_edge_tag { }; + + struct vertex_node; + + using out_edge_hook + = boost::intrusive::list_base_hook >; + + /* in_edge_hook does not use safe mode as during graph destruction we do not + * maintain the in edge lists */ + using in_edge_hook + = boost::intrusive::list_base_hook, + boost::intrusive::link_mode >; + + struct edge_node : public out_edge_hook, public in_edge_hook { + explicit edge_node(u64a serial_in) : serial(serial_in) { } + + vertex_node *source = nullptr; + vertex_node *target = nullptr; + const u64a serial; /*< used to order edges. We do not use props.index so + * that there is no danger of invalidating sets or + * other containers by changing the index due to + * renumbering */ + EdgePropertyType props; + }; + + template using vertex_edge_list + = boost::intrusive::list >; + + struct vertex_node : public boost::intrusive::list_base_hook<> { + explicit vertex_node(u64a serial_in) : serial(serial_in) { } + + VertexPropertyType props; + const u64a serial; /*< used to order vertices. We do not use props.index + * so that there is no danger of invalidating sets or + * other containers by changing the index due to + * renumbering */ + + /* The incoming edges are not considered owned by the vertex */ + vertex_edge_list in_edge_list; + + /* The out going edges are considered owned by the vertex and + * need to be freed when the graph is begin destroyed */ + vertex_edge_list out_edge_list; + + /* The destructor only frees memory owned by the vertex and will leave + * the neighbour's edges in a bad state. If a vertex is being removed + * (rather than the graph being destroyed), then the more gentle clean + * up of clear_vertex() is required to be called first */ + ~vertex_node() { + out_edge_list.clear_and_dispose(delete_disposer()); + } + }; + + struct delete_disposer { + template void operator()(const T *d) const { delete d; } + }; + + struct in_edge_disposer { + void operator()(edge_node *e) const { + /* remove from source's out edge list before deleting */ + vertex_node *u = e->source; + u->out_edge_list.erase(u->out_edge_list.iterator_to(*e)); + delete e; + } + }; + + struct out_edge_disposer { + void operator()(edge_node *e) const { + /* remove from target's in edge list before deleting */ + vertex_node *v = e->target; + v->in_edge_list.erase(v->in_edge_list.iterator_to(*e)); + delete e; + } + }; + + using vertices_list_type + = boost::intrusive::list > >; + + vertices_list_type vertices_list; + +protected: /* to allow renumbering */ + static const size_t N_SPECIAL_VERTICES = 0; /* override in derived class */ + size_t next_vertex_index = 0; + size_t next_edge_index = 0; + +private: + size_t graph_edge_count = 0; /* maintained explicitly as we have no global + edge list */ + + u64a next_serial = 0; + u64a new_serial() { + u64a serial = next_serial++; + if (!next_serial) { + /* if we have created enough graph edges/vertices to overflow a u64a + * we must have spent close to an eternity adding to this graph so + * something must have gone very wrong and we will not be producing + * a final bytecode in a reasonable amount of time. Or, more likely, + * the next_serial value has become corrupt. */ + throw std::overflow_error("too many graph edges/vertices created"); + } + return serial; + } +public: + using vertices_size_type = typename vertices_list_type::size_type; + using degree_size_type + = typename vertex_edge_list::size_type; + using edges_size_type = size_t; + + using vertex_property_type = VertexPropertyType; + using edge_property_type = EdgePropertyType; + + using graph_bundled = boost::no_property; + using vertex_bundled = VertexPropertyType; + using edge_bundled = EdgePropertyType; + + class vertex_descriptor : boost::totally_ordered { + public: + vertex_descriptor() : p(nullptr), serial(0) { } + explicit vertex_descriptor(vertex_node *pp) + : p(pp), serial(pp->serial) { } + + operator bool() const { return p; } + bool operator<(const vertex_descriptor b) const { + if (p && b.p) { + /* no vertices in the same graph can have the same serial */ + assert(p == b.p || serial != b.serial); + return serial < b.serial; + } else { + return p < b.p; + } + } + bool operator==(const vertex_descriptor b) const { + return p == b.p; + } + + friend size_t hash_value(vertex_descriptor v) { + using boost::hash_value; + return hash_value(v.serial); + } + + private: + vertex_node *p; + u64a serial; + friend ue2_graph; + }; + + class edge_descriptor : boost::totally_ordered { + public: + edge_descriptor() : p(nullptr), serial(0) { } + explicit edge_descriptor(edge_node *pp) : p(pp), serial(pp->serial) { } + + operator bool() const { return p; } + bool operator<(const edge_descriptor b) const { + if (p && b.p) { + /* no edges in the same graph can have the same serial */ + assert(p == b.p || serial != b.serial); + return serial < b.serial; + } else { + return p < b.p; + } + } + bool operator==(const edge_descriptor b) const { + return p == b.p; + } + + friend size_t hash_value(edge_descriptor e) { + using boost::hash_value; + return hash_value(e.serial); + } + + private: + edge_node *p; + u64a serial; + friend ue2_graph; + }; + +private: + static + vertex_node *raw(vertex_descriptor v) { return v.p; } + + static + edge_node *raw(edge_descriptor e) { return e.p; } + + /* Note: apparently, nested class templates cannot be fully specialised but + * they can be partially specialised. Sigh, ... */ + template + struct bundle_key_type { + }; + + template + struct bundle_key_type { + using type = vertex_descriptor; + }; + + template + struct bundle_key_type { + using type = edge_descriptor; + }; + +public: + class out_edge_iterator : public boost::iterator_adaptor< + out_edge_iterator, + typename vertex_edge_list::const_iterator, + edge_descriptor, + boost::bidirectional_traversal_tag, + edge_descriptor> { + using super = typename out_edge_iterator::iterator_adaptor_; + public: + out_edge_iterator() : super() { } + explicit out_edge_iterator( + typename vertex_edge_list::const_iterator it) + : super(it) { } + edge_descriptor dereference() const { + /* :( const_cast makes me sad but constness is defined by the graph + * parameter of bgl api calls */ + return edge_descriptor(const_cast(&*super::base())); + } + }; + + class in_edge_iterator : public boost::iterator_adaptor< + in_edge_iterator, + typename vertex_edge_list::const_iterator, + edge_descriptor, + boost::bidirectional_traversal_tag, + edge_descriptor> { + using super = typename in_edge_iterator::iterator_adaptor_; + public: + in_edge_iterator() : super() { } + explicit in_edge_iterator( + typename vertex_edge_list::const_iterator it) + : super(it) { } + edge_descriptor dereference() const { + /* :( const_cast makes me sad but constness is defined by the graph + * parameter of bgl api calls */ + return edge_descriptor(const_cast(&*super::base())); + } + }; + + class adjacency_iterator : public boost::iterator_adaptor< + adjacency_iterator, + out_edge_iterator, + vertex_descriptor, + boost::bidirectional_traversal_tag, + vertex_descriptor> { + using super = typename adjacency_iterator::iterator_adaptor_; + public: + adjacency_iterator(out_edge_iterator a) : super(std::move(a)) { } + adjacency_iterator() { } + + vertex_descriptor dereference() const { + return vertex_descriptor(super::base()->p->target); + } + }; + + class inv_adjacency_iterator : public boost::iterator_adaptor< + inv_adjacency_iterator, + in_edge_iterator, + vertex_descriptor, + boost::bidirectional_traversal_tag, + vertex_descriptor> { + using super = typename inv_adjacency_iterator::iterator_adaptor_; + public: + inv_adjacency_iterator(in_edge_iterator a) : super(std::move(a)) { } + inv_adjacency_iterator() { } + + vertex_descriptor dereference() const { + return vertex_descriptor(super::base()->p->source); + } + }; + + class vertex_iterator : public boost::iterator_adaptor< + vertex_iterator, + typename vertices_list_type::const_iterator, + vertex_descriptor, + boost::bidirectional_traversal_tag, + vertex_descriptor> { + using super = typename vertex_iterator::iterator_adaptor_; + public: + vertex_iterator() : super() { } + explicit vertex_iterator(typename vertices_list_type::const_iterator it) + : super(it) { } + vertex_descriptor dereference() const { + /* :( const_cast makes me sad but constness is defined by the graph + * parameter of bgl api calls */ + return vertex_descriptor( + const_cast(&*super::base())); + } + }; + + class edge_iterator : public boost::iterator_facade< + edge_iterator, + edge_descriptor, + boost::forward_traversal_tag, /* TODO: make bidi */ + edge_descriptor> { + public: + using main_base_iter_type = vertex_iterator; + using aux_base_iter_type = out_edge_iterator; + + edge_iterator(main_base_iter_type b, main_base_iter_type e) + : main(std::move(b)), main_end(std::move(e)) { + if (main == main_end) { + return; + } + std::tie(aux, aux_end) = out_edges_i(*main); + while (aux == aux_end) { + ++main; + if (main == main_end) { + break; + } + std::tie(aux, aux_end) = out_edges_i(*main); + } + } + edge_iterator() { } + + friend class boost::iterator_core_access; + void increment() { + ++aux; + while (aux == aux_end) { + ++main; + if (main == main_end) { + break; + } + std::tie(aux, aux_end) = out_edges_i(*main); + } + } + bool equal(const edge_iterator &other) const { + return main == other.main && (main == main_end || aux == other.aux); + } + edge_descriptor dereference() const { + return *aux; + } + + main_base_iter_type main; + main_base_iter_type main_end; + aux_base_iter_type aux; + aux_base_iter_type aux_end; + }; + +private: + static + std::pair + out_edges_i(vertex_descriptor v) { + return {out_edge_iterator(raw(v)->out_edge_list.begin()), + out_edge_iterator(raw(v)->out_edge_list.end())}; + } + +public: + static + vertex_descriptor null_vertex() { return vertex_descriptor(); } + + friend + vertex_descriptor add_vertex(Graph &g) { + vertex_node *v = new vertex_node(g.new_serial()); + v->props.index = g.next_vertex_index++; + g.vertices_list.push_back(*v); + return vertex_descriptor(v); + } + + friend + void remove_vertex(vertex_descriptor v, Graph &g) { + vertex_node *vv = Graph::raw(v); + assert(vv->in_edge_list.empty()); + assert(vv->out_edge_list.empty()); + g.vertices_list.erase_and_dispose(g.vertices_list.iterator_to(*vv), + delete_disposer()); + } + + friend + void clear_in_edges(vertex_descriptor v, Graph &g) { + g.graph_edge_count -= Graph::raw(v)->in_edge_list.size(); + Graph::raw(v)->in_edge_list.clear_and_dispose(in_edge_disposer()); + } + + friend + void clear_out_edges(vertex_descriptor v, Graph &g) { + g.graph_edge_count -= Graph::raw(v)->out_edge_list.size(); + Graph::raw(v)->out_edge_list.clear_and_dispose(out_edge_disposer()); + } + + friend + void clear_vertex(vertex_descriptor v, Graph &g) { + clear_in_edges(v, g); + clear_out_edges(v, g); + } + + /* IncidenceGraph concept functions */ + + friend + vertex_descriptor source(edge_descriptor e, const Graph &) { + return vertex_descriptor(Graph::raw(e)->source); + } + + friend + vertex_descriptor target(edge_descriptor e, const Graph &) { + return vertex_descriptor(Graph::raw(e)->target); + } + + friend + degree_size_type out_degree(vertex_descriptor v, const Graph &) { + return Graph::raw(v)->out_edge_list.size(); + } + + friend + std::pair + out_edges(vertex_descriptor v, const Graph &) { + return Graph::out_edges_i(v); + } + + /* BidirectionalGraph concept functions */ + + friend + degree_size_type in_degree(vertex_descriptor v, const Graph &) { + return Graph::raw(v)->in_edge_list.size(); + } + + friend + std::pair + in_edges(vertex_descriptor v, const Graph &) { + return {in_edge_iterator(Graph::raw(v)->in_edge_list.begin()), + in_edge_iterator(Graph::raw(v)->in_edge_list.end())}; + } + + /* Note: this is defined so that self loops are counted twice - which may or + * may not be what you want. Actually, you probably don't want this at + * all. */ + friend + degree_size_type degree(vertex_descriptor v, const Graph &g) { + return in_degree(v, g) + out_degree(v, g); + } + + /* AdjacencyList concept functions */ + + friend + std::pair + adjacent_vertices(vertex_descriptor v, const Graph &g) { + auto out_edge_its = out_edges(v, g); + return {adjacency_iterator(out_edge_its.first), + adjacency_iterator(out_edge_its.second)}; + } + + /* AdjacencyMatrix concept functions + * (Note: complexity guarantee is not met) */ + + friend + std::pair edge(vertex_descriptor u, + vertex_descriptor v, const Graph &g) { + if (in_degree(v, g) < out_degree(u, g)) { + for (const edge_descriptor &e : in_edges_range(v, g)) { + if (source(e, g) == u) { + return {e, true}; + } + } + } else { + for (const edge_descriptor &e : out_edges_range(u, g)) { + if (target(e, g) == v) { + return {e, true}; + } + } + } + + return {edge_descriptor(), false}; + } + + /* Misc functions that don't actually seem to belong to a formal BGL + concept. */ + static + edge_descriptor null_edge() { return edge_descriptor(); } + + friend + std::pair + inv_adjacent_vertices(vertex_descriptor v, const Graph &g) { + auto in_edge_its = in_edges(v, g); + return {inv_adjacency_iterator(in_edge_its.first), + inv_adjacency_iterator(in_edge_its.second)}; + } + + /* MutableGraph concept functions */ + + friend + std::pair + add_edge(vertex_descriptor u, vertex_descriptor v, Graph &g) { + bool added = true; /* we always allow parallel edges */ + edge_node *e = new edge_node(g.new_serial()); + e->source = Graph::raw(u); + e->target = Graph::raw(v); + e->props.index = g.next_edge_index++; + + Graph::raw(u)->out_edge_list.push_back(*e); + Graph::raw(v)->in_edge_list.push_back(*e); + + g.graph_edge_count++; + return {edge_descriptor(e), added}; + } + + friend + void remove_edge(edge_descriptor e, Graph &g) { + g.graph_edge_count--; + + vertex_node *u = Graph::raw(source(e, g)); + vertex_node *v = Graph::raw(target(e, g)); + + v->in_edge_list.erase(v->in_edge_list.iterator_to(*Graph::raw(e))); + u->out_edge_list.erase(u->out_edge_list.iterator_to(*Graph::raw(e))); + + delete Graph::raw(e); + } + + template + friend + void remove_edge(Iter it, Graph &g) { + remove_edge(*it, g); + } + + template + friend + void remove_out_edge_if(vertex_descriptor v, Predicate pred, Graph &g) { + out_edge_iterator it, ite; + std::tie(it, ite) = out_edges(v, g); + while (it != ite) { + auto jt = it; + ++it; + if (pred(*jt)) { + remove_edge(*jt, g); + } + } + } + + template + friend + void remove_in_edge_if(vertex_descriptor v, Predicate pred, Graph &g) { + in_edge_iterator it, ite; + std::tie(it, ite) = in_edges(v, g); + while (it != ite) { + auto jt = it; + ++it; + if (pred(*jt)) { + remove_edge(*jt, g); + } + } + } + + template + friend + void remove_edge_if(Predicate pred, Graph &g) { + edge_iterator it, ite; + std::tie(it, ite) = edges(g); + while (it != ite) { + auto jt = it; + ++it; + if (pred(*jt)) { + remove_edge(*jt, g); + } + } + } + +private: + /* GCC 4.8 has bugs with lambdas in templated friend functions, so: */ + struct source_match { + source_match(const vertex_descriptor &uu, const Graph &gg) + : u(uu), g(gg) { } + bool operator()(edge_descriptor e) const { return source(e, g) == u; } + const vertex_descriptor &u; + const Graph &g; + }; + + struct target_match { + target_match(const vertex_descriptor &vv, const Graph &gg) + : v(vv), g(gg) { } + bool operator()(edge_descriptor e) const { return target(e, g) == v; } + const vertex_descriptor &v; + const Graph &g; + }; +public: + + /* Note: (u,v) variant needs to remove all (parallel) edges between (u,v). + * + * The edge_descriptor version should be strongly preferred if the + * edge_descriptor is available. + */ + friend + void remove_edge(const vertex_descriptor &u, + const vertex_descriptor &v, + Graph &g) { + if (in_degree(v, g) < out_degree(u, g)) { + remove_in_edge_if(v, source_match(u, g), g); + } else { + remove_out_edge_if(u, target_match(v, g), g); + } + } + + /* VertexListGraph concept functions */ + + friend + vertices_size_type num_vertices(const Graph &g) { + return g.vertices_list.size(); + } + + friend + std::pair vertices(const Graph &g) { + return {vertex_iterator(g.vertices_list.begin()), + vertex_iterator(g.vertices_list.end())}; + } + + /* EdgeListGraph concept functions (aside from those in IncidenceGraph) */ + + friend + edges_size_type num_edges(const Graph &g) { + return g.graph_edge_count; + } + + friend + std::pair edges(const Graph &g) { + vertex_iterator vi, ve; + std::tie(vi, ve) = vertices(g); + + return {edge_iterator(vi, ve), edge_iterator(ve, ve)}; + } + + /* bundled properties functions */ + + vertex_property_type &operator[](vertex_descriptor v) { + return raw(v)->props; + } + + const vertex_property_type &operator[](vertex_descriptor v) const { + return raw(v)->props; + } + + edge_property_type &operator[](edge_descriptor e) { + return raw(e)->props; + } + + const edge_property_type &operator[](edge_descriptor e) const { + return raw(e)->props; + } + + /* PropertyGraph concept functions & helpers */ + + template + struct prop_map : public boost::put_get_helper > { + using value_type = typename std::decay::type; + using reference = R; + using key_type = typename bundle_key_type::type; + + typedef typename boost::lvalue_property_map_tag category; + + prop_map(value_type P_of::*m_in) : member(m_in) { } + + reference operator[](key_type k) const { + return Graph::raw(k)->props.*member; + } + reference operator()(key_type k) const { return (*this)[k]; } + + private: + value_type P_of::*member; + }; + + template + struct prop_map_all : public boost::put_get_helper > { + using value_type = typename std::decay::type; + using reference = R; + using key_type = typename bundle_key_type::type; + + typedef typename boost::lvalue_property_map_tag category; + + reference operator[](key_type k) const { + return Graph::raw(k)->props; + } + reference operator()(key_type k) const { return (*this)[k]; } + }; + + template + friend + prop_map get(P_type P_of::*t, Graph &) { + return prop_map(t); + } + + template + friend + prop_map get(P_type P_of::*t, const Graph &) { + return prop_map(t); + } + + /* We can't seem to use auto/decltype returns here as it seems that the + * templated member functions are not yet visible when the compile is + * evaluating the decltype for the return value. We could probably work + * around it by making this a dummy templated function. */ + friend + prop_map + get(boost::vertex_index_t, Graph &g) { + return get(&VertexPropertyType::index, g); + } + + friend + prop_map + get(boost::vertex_index_t, const Graph &g) { + return get(&VertexPropertyType::index, g); + } + + friend + prop_map + get(boost::edge_index_t, Graph &g) { + return get(&EdgePropertyType::index, g); + } + + friend + prop_map + get(boost::edge_index_t, const Graph &g) { + return get(&EdgePropertyType::index, g); + } + + friend + prop_map_all get(boost::vertex_all_t, Graph &) { + return {}; + } + + friend + prop_map_all get(boost::vertex_all_t, + const Graph &) { + return {}; + } + + friend + prop_map_all get(boost::edge_all_t, Graph &) { + return {}; + } + + friend + prop_map_all get(boost::edge_all_t, + const Graph &) { + return {}; + } + + friend + prop_map_all get(boost::vertex_bundle_t, Graph &) { + return {}; + } + + friend + prop_map_all get(boost::vertex_bundle_t, + const Graph &) { + return {}; + } + + friend + prop_map_all get(boost::edge_bundle_t, Graph &) { + return {}; + } + + friend + prop_map_all get(boost::edge_bundle_t, + const Graph &) { + return {}; + } + + template + friend + auto get(Prop p, Graph &g, K key) -> decltype(get(p, g)[key]) { + return get(p, g)[key]; + } + + template + friend + auto get(Prop p, const Graph &g, K key) -> decltype(get(p, g)[key]) { + return get(p, g)[key]; + } + + template + friend + void put(Prop p, Graph &g, K key, const V &value) { + get(p, g)[key] = value; + } + + /* MutablePropertyGraph concept functions */ + + /* Note: add_vertex(g, vp) allocates a next index value for the vertex + * rather than using the index in vp. i.e., except for in rare coincidences: + * g[add_vertex(g, vp)].index != vp.index + */ + friend + vertex_descriptor add_vertex(const VertexPropertyType &vp, Graph &g) { + vertex_descriptor v = add_vertex(g); + auto i = g[v].index; + g[v] = vp; + g[v].index = i; + + return v; + } + + /* Note: add_edge(u, v, g, vp) allocates a next index value for the edge + * rather than using the index in ep. i.e., except for in rare coincidences: + * g[add_edge(u, v, g, ep)].index != ep.index + */ + friend + std::pair + add_edge(vertex_descriptor u, vertex_descriptor v, + const EdgePropertyType &ep, Graph &g) { + auto e = add_edge(u, v, g); + auto i = g[e.first].index; + g[e.first] = ep; + g[e.first].index = i; + + return e; + } + + /* End MutablePropertyGraph */ + + /** Pack the edge index into a contiguous range [ 0, num_edges(g) ). */ + friend + void renumber_edges(Graph &g) { + g.next_edge_index = 0; + for (const auto &e : edges_range(g)) { + g[e].index = g.next_edge_index++; + } + } + + /** Pack the vertex index into a contiguous range [ 0, num_vertices(g) ). + * Vertices with indices less than N_SPECIAL_VERTICES are not renumbered. + */ + friend + void renumber_vertices(Graph &g) { + DEBUG_PRINTF("renumbering above %zu\n", Graph::N_SPECIAL_VERTICES); + g.next_vertex_index = Graph::N_SPECIAL_VERTICES; + for (const auto &v : vertices_range(g)) { + if (g[v].index < Graph::N_SPECIAL_VERTICES) { + continue; + } + + g[v].index = g.next_vertex_index++; + } + } + + /** Returns what the next allocated vertex index will be. This is an upper + * on the values of index for vertices (vertex removal means that there may + * be gaps). */ + friend + vertices_size_type vertex_index_upper_bound(const Graph &g) { + return g.next_vertex_index; + } + + /** Returns what the next allocated edge index will be. This is an upper on + * the values of index for edges (edge removal means that there may be + * gaps). */ + friend + vertices_size_type edge_index_upper_bound(const Graph &g) { + return g.next_edge_index; + } + + using directed_category = boost::directed_tag; + using edge_parallel_category = boost::allow_parallel_edge_tag; + struct traversal_category : + public virtual boost::bidirectional_graph_tag, + public virtual boost::adjacency_graph_tag, + public virtual boost::vertex_list_graph_tag, + public virtual boost::edge_list_graph_tag { }; + + ue2_graph() = default; + + ue2_graph(ue2_graph &&old) + : next_vertex_index(old.next_vertex_index), + next_edge_index(old.next_edge_index), + graph_edge_count(old.graph_edge_count), + next_serial(old.next_serial) { + using std::swap; + swap(vertices_list, old.vertices_list); + } + + ue2_graph &operator=(ue2_graph &&old) { + next_vertex_index = old.next_vertex_index; + next_edge_index = old.next_edge_index; + graph_edge_count = old.graph_edge_count; + next_serial = old.next_serial; + using std::swap; + swap(vertices_list, old.vertices_list); + return *this; + } + + ~ue2_graph() { + vertices_list.clear_and_dispose(delete_disposer()); + } +}; + +using boost::vertex_index; +using boost::edge_index; + +} + +namespace boost { + +/* Install partial specialisation of property_map - this is required for + * adaptors (like filtered_graph) to know the type of the property maps */ +template +struct property_map::value + >::type > { + typedef decltype(get(std::declval(), + std::declval())) type; + typedef decltype(get(std::declval(), + std::declval())) const_type; +}; + +} +#endif diff --git a/unit/internal/graph.cpp b/unit/internal/graph.cpp index 3ab3326d..3f81ac13 100644 --- a/unit/internal/graph.cpp +++ b/unit/internal/graph.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -29,10 +29,14 @@ #include "config.h" #include "gtest/gtest.h" #include "util/graph.h" +#include "util/ue2_graph.h" #include #include #include +#include + +#include using namespace boost; using namespace std; @@ -167,107 +171,1614 @@ TEST(graph_util, degrees) { ASSERT_TRUE( has_proper_successor(d, g)); ASSERT_FALSE(has_proper_successor(e, g)); ASSERT_TRUE( has_proper_successor(f, g)); - - ASSERT_TRUE( hasGreaterInDegree(0, a, g)); - ASSERT_FALSE(hasGreaterInDegree(1, a, g)); - ASSERT_TRUE( hasGreaterInDegree(2, b, g)); - ASSERT_FALSE(hasGreaterInDegree(3, b, g)); - ASSERT_TRUE( hasGreaterInDegree(1, c, g)); - ASSERT_FALSE(hasGreaterInDegree(2, c, g)); - ASSERT_FALSE(hasGreaterInDegree(0, d, g)); - ASSERT_TRUE( hasGreaterInDegree(1, e, g)); - ASSERT_FALSE(hasGreaterInDegree(2, e, g)); - ASSERT_FALSE(hasGreaterInDegree(0, f, g)); - - ASSERT_TRUE( hasGreaterOutDegree(0, a, g)); - ASSERT_FALSE(hasGreaterOutDegree(1, a, g)); - ASSERT_TRUE( hasGreaterOutDegree(1, b, g)); - ASSERT_FALSE(hasGreaterOutDegree(2, b, g)); - ASSERT_FALSE(hasGreaterOutDegree(0, c, g)); - ASSERT_TRUE( hasGreaterOutDegree(0, d, g)); - ASSERT_FALSE(hasGreaterOutDegree(1, d, g)); - ASSERT_TRUE( hasGreaterOutDegree(0, e, g)); - ASSERT_FALSE(hasGreaterOutDegree(1, e, g)); - ASSERT_TRUE( hasGreaterOutDegree(2, f, g)); - ASSERT_FALSE(hasGreaterOutDegree(3, f, g)); } -TEST(graph_util, in_degree_equal_to_1) { - unit_graph g; +struct SimpleV { + size_t index; + string test_v = "SimpleV"; +}; - unit_vertex a = add_vertex(g); - unit_vertex b = add_vertex(g); - unit_vertex c = add_vertex(g); - unit_vertex d = add_vertex(g); +struct SimpleE { + size_t index; + string test_e = "SimpleE"; +}; - ASSERT_TRUE(in_degree_equal_to(a, g, 0)); - ASSERT_FALSE(in_degree_equal_to(a, g, 1)); - ASSERT_FALSE(in_degree_equal_to(a, g, 2)); +struct SimpleG : public ue2_graph { +}; + +TEST(ue2_graph, graph_concept) { + static_assert(std::is_same::vertex_descriptor>::value, + "vertex_descriptor"); + static_assert(std::is_same::edge_descriptor>::value, + "edge_descriptor"); + static_assert(std::is_same::directed_category>::value, + "directed_category"); + static_assert(std::is_same::edge_parallel_category>::value, + "edge_parallel_category"); + static_assert(std::is_same::traversal_category>::value, + "traversal_category"); + + UNUSED SimpleG::vertex_descriptor n = SimpleG::null_vertex(); + + BOOST_CONCEPT_ASSERT((GraphConcept)); +} + +TEST(ue2_graph, vertex_list_concept) { + BOOST_CONCEPT_ASSERT((VertexListGraphConcept)); +} + +TEST(ue2_graph, edge_list_concept) { + BOOST_CONCEPT_ASSERT((EdgeListGraphConcept)); +} + +TEST(ue2_graph, incidence_concept) { + BOOST_CONCEPT_ASSERT((IncidenceGraphConcept)); +} + +TEST(ue2_graph, bidi_concept) { + BOOST_CONCEPT_ASSERT((BidirectionalGraphConcept)); +} + +TEST(ue2_graph, mutable_concept) { + BOOST_CONCEPT_ASSERT((MutableGraphConcept)); +} + +TEST(ue2_graph, property_concept) { + static_assert(std::is_same::value, + "vertex_property_type"); + static_assert(std::is_same::value, + "edge_property_type"); + + /* Although documented as part of the MutablePropertyGraph concept, + * (vertex|edge)_property_type don't appear to exist in the traits for any + * existing graph types and the typedefs are not installed by default */ + + // static_assert(std::is_same< + // typename graph_traits::vertex_property_type, + // SimpleV>::value, + // "vertex_property_type"); + // static_assert(std::is_same< + // typename graph_traits::edge_property_type, + // SimpleE>::value, + // "edge_property_type"); + + /* However, there does seem to be an undocumented templated structure + * paralleling the main graph_traits */ + static_assert(std::is_same< + typename vertex_property_type::type, + SimpleV>::value, + "vertex_property_type"); + static_assert(std::is_same< + typename edge_property_type::type, + SimpleE>::value, + "edge_property_type"); + + BOOST_CONCEPT_ASSERT((VertexMutablePropertyGraphConcept)); + BOOST_CONCEPT_ASSERT((EdgeMutablePropertyGraphConcept)); +} + +TEST(ue2_graph, add_vertex) { + SimpleG g; + SimpleG::vertex_descriptor a = add_vertex(g); + ASSERT_NE(SimpleG::null_vertex(), a); +} + +TEST(ue2_graph, add_and_remove_vertex) { + SimpleG g; + ASSERT_EQ(0U, num_vertices(g)); + + SimpleG::vertex_descriptor a = add_vertex(g); + ASSERT_EQ(1U, num_vertices(g)); + ASSERT_NE(SimpleG::null_vertex(), a); + auto p = vertices(g); + ASSERT_NE(p.first, p.second); + ASSERT_EQ(a, *p.first); + ++p.first; + ASSERT_EQ(p.first, p.second); + + remove_vertex(a, g); + ASSERT_EQ(0U, num_vertices(g)); + auto q = vertices(g); + ASSERT_EQ(q.first, q.second); +} + +TEST(ue2_graph, add_edge) { + SimpleG g; + SimpleG::vertex_descriptor a = add_vertex(g); + ASSERT_NE(SimpleG::null_vertex(), a); + SimpleG::vertex_descriptor b = add_vertex(g); + ASSERT_NE(SimpleG::null_vertex(), b); + ASSERT_NE(a, b); + auto p = add_edge(a, b, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(1U, num_edges(g)); + + ASSERT_EQ(a, source(p.first, g)); + ASSERT_EQ(b, target(p.first, g)); + + auto q = edge(a, b, g); + ASSERT_TRUE(q.second); + ASSERT_EQ(p.second, q.first); +} + +TEST(ue2_graph, add_remove_edge1) { + SimpleG g; + SimpleG::vertex_descriptor a = add_vertex(g); + ASSERT_NE(SimpleG::null_vertex(), a); + SimpleG::vertex_descriptor b = add_vertex(g); + ASSERT_NE(SimpleG::null_vertex(), b); + ASSERT_NE(a, b); + auto p = add_edge(a, b, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(1U, num_edges(g)); + + ASSERT_EQ(a, source(p.first, g)); + ASSERT_EQ(b, target(p.first, g)); + + remove_edge(p.first, g); + auto q = edge(a, b, g); + ASSERT_FALSE(q.second); + ASSERT_EQ(q.first, SimpleG::null_edge()); + ASSERT_EQ(0U, num_edges(g)); +} + +TEST(ue2_graph, add_remove_edge2) { + SimpleG g; + SimpleG::vertex_descriptor a = add_vertex(g); + ASSERT_NE(SimpleG::null_vertex(), a); + SimpleG::vertex_descriptor b = add_vertex(g); + ASSERT_NE(SimpleG::null_vertex(), b); + ASSERT_NE(a, b); + auto p = add_edge(a, b, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(1U, num_edges(g)); + + ASSERT_EQ(a, source(p.first, g)); + ASSERT_EQ(b, target(p.first, g)); + + remove_edge(a, b, g); + auto q = edge(a, b, g); + ASSERT_FALSE(q.second); + ASSERT_EQ(q.first, SimpleG::null_edge()); + ASSERT_EQ(0U, num_edges(g)); +} + +TEST(ue2_graph, add_edge_clear1) { + SimpleG g; + SimpleG::vertex_descriptor a = add_vertex(g); + ASSERT_NE(SimpleG::null_vertex(), a); + SimpleG::vertex_descriptor b = add_vertex(g); + ASSERT_NE(SimpleG::null_vertex(), b); + ASSERT_NE(a, b); + auto p = add_edge(a, b, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(1U, num_edges(g)); + + ASSERT_EQ(a, source(p.first, g)); + ASSERT_EQ(b, target(p.first, g)); + + clear_vertex(a, g); + auto q = edge(a, b, g); + ASSERT_FALSE(q.second); + ASSERT_EQ(q.first, SimpleG::null_edge()); + ASSERT_EQ(0U, num_edges(g)); +} + +TEST(ue2_graph, add_edge_clear2) { + SimpleG g; + SimpleG::vertex_descriptor a = add_vertex(g); + ASSERT_NE(SimpleG::null_vertex(), a); + SimpleG::vertex_descriptor b = add_vertex(g); + ASSERT_NE(SimpleG::null_vertex(), b); + ASSERT_NE(a, b); + auto p = add_edge(a, b, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(1U, num_edges(g)); + + ASSERT_EQ(a, source(p.first, g)); + ASSERT_EQ(b, target(p.first, g)); + + clear_vertex(b, g); + auto q = edge(a, b, g); + ASSERT_FALSE(q.second); + ASSERT_EQ(q.first, SimpleG::null_edge()); + ASSERT_EQ(0U, num_edges(g)); +} + +TEST(ue2_graph, add_edge_clear_out) { + SimpleG g; + SimpleG::vertex_descriptor a = add_vertex(g); + ASSERT_NE(SimpleG::null_vertex(), a); + SimpleG::vertex_descriptor b = add_vertex(g); + ASSERT_NE(SimpleG::null_vertex(), b); + ASSERT_NE(a, b); + auto p = add_edge(a, b, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(1U, num_edges(g)); + + ASSERT_EQ(a, source(p.first, g)); + ASSERT_EQ(b, target(p.first, g)); + + clear_out_edges(a, g); + auto q = edge(a, b, g); + ASSERT_FALSE(q.second); + ASSERT_EQ(q.first, SimpleG::null_edge()); + ASSERT_EQ(0U, num_edges(g)); +} + +TEST(ue2_graph, add_edge_clear_in) { + SimpleG g; + SimpleG::vertex_descriptor a = add_vertex(g); + ASSERT_NE(SimpleG::null_vertex(), a); + SimpleG::vertex_descriptor b = add_vertex(g); + ASSERT_NE(SimpleG::null_vertex(), b); + ASSERT_NE(a, b); + auto p = add_edge(a, b, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(1U, num_edges(g)); + + ASSERT_EQ(a, source(p.first, g)); + ASSERT_EQ(b, target(p.first, g)); + + clear_in_edges(b, g); + auto q = edge(a, b, g); + ASSERT_FALSE(q.second); + ASSERT_EQ(q.first, SimpleG::null_edge()); + ASSERT_EQ(0U, num_edges(g)); +} + +TEST(ue2_graph, add_remove_edge_iter) { + SimpleG g; + SimpleG::vertex_descriptor a = add_vertex(g); + ASSERT_NE(SimpleG::null_vertex(), a); + SimpleG::vertex_descriptor b = add_vertex(g); + ASSERT_NE(SimpleG::null_vertex(), b); + ASSERT_NE(a, b); + auto p = add_edge(a, b, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(1U, num_edges(g)); + + ASSERT_EQ(a, source(p.first, g)); + ASSERT_EQ(b, target(p.first, g)); + + remove_edge(edges(g).first, g); + auto q = edge(a, b, g); + ASSERT_FALSE(q.second); + ASSERT_EQ(q.first, SimpleG::null_edge()); + ASSERT_EQ(0U, num_edges(g)); +} + +TEST(ue2_graph, vertices_0) { + SimpleG g; + auto p = vertices(g); + ASSERT_EQ(p.first, p.second); +} + +TEST(ue2_graph, vertices_1) { + SimpleG g; + SimpleG::vertex_iterator vi; + SimpleG::vertex_iterator ve; + auto a = add_vertex(g); + + ASSERT_EQ(1U, num_vertices(g)); + tie(vi, ve) = vertices(g); + ASSERT_EQ(a, *vi++); + ASSERT_EQ(vi, ve); + + auto b = add_vertex(g); + auto c = add_vertex(g); + auto d = add_vertex(g); + + ASSERT_EQ(4U, num_vertices(g)); + tie(vi, ve) = vertices(g); + ASSERT_EQ(a, *vi++); + ASSERT_EQ(b, *vi++); + ASSERT_EQ(c, *vi++); + ASSERT_EQ(d, *vi++); + ASSERT_EQ(vi, ve); + + remove_vertex(c, g); + + ASSERT_EQ(3U, num_vertices(g)); + tie(vi, ve) = vertices(g); + ASSERT_EQ(a, *vi++); + ASSERT_EQ(b, *vi++); + ASSERT_EQ(d, *vi++); + ASSERT_EQ(vi, ve); + + remove_vertex(a, g); + + ASSERT_EQ(2U, num_vertices(g)); + tie(vi, ve) = vertices(g); + ASSERT_EQ(b, *vi++); + ASSERT_EQ(d, *vi++); + ASSERT_EQ(vi, ve); + + auto e = add_vertex(g); + + ASSERT_EQ(3U, num_vertices(g)); + tie(vi, ve) = vertices(g); + ASSERT_EQ(b, *vi++); + ASSERT_EQ(d, *vi++); + ASSERT_EQ(e, *vi++); + ASSERT_EQ(vi, ve); + + remove_vertex(e, g); + + ASSERT_EQ(2U, num_vertices(g)); + tie(vi, ve) = vertices(g); + ASSERT_EQ(b, *vi++); + ASSERT_EQ(d, *vi++); + ASSERT_EQ(vi, ve); + + remove_vertex(b, g); + remove_vertex(d, g); + + ASSERT_EQ(0U, num_vertices(g)); + tie(vi, ve) = vertices(g); + ASSERT_EQ(vi, ve); +} + +TEST(ue2_graph, out_edges_1) { + SimpleG g; + auto a = add_vertex(g); + + ASSERT_EQ(1U, num_vertices(g)); + ASSERT_EQ(0U, out_degree(a, g)); + + SimpleG::out_edge_iterator ei; + SimpleG::out_edge_iterator ee; + + tie(ei, ee) = out_edges(a, g); + ASSERT_TRUE(ei == ee); + + auto p = add_edge(a, a, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(1U, num_edges(g)); + SimpleG::edge_descriptor e1 = p.first; + + ASSERT_EQ(1U, out_degree(a, g)); + tie(ei, ee) = out_edges(a, g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(ei, ee); + + p = add_edge(a, a, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(2U, num_edges(g)); + SimpleG::edge_descriptor e2 = p.first; + + ASSERT_EQ(2U, out_degree(a, g)); + tie(ei, ee) = out_edges(a, g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(e2, *ei++); + ASSERT_EQ(ei, ee); +} + +TEST(ue2_graph, out_edges_2) { + SimpleG g; + auto a = add_vertex(g); + auto b = add_vertex(g); + auto c = add_vertex(g); + + ASSERT_EQ(3U, num_vertices(g)); + ASSERT_EQ(0U, out_degree(a, g)); + + SimpleG::out_edge_iterator ei; + SimpleG::out_edge_iterator ee; + + tie(ei, ee) = out_edges(a, g); + ASSERT_TRUE(ei == ee); + + auto p = add_edge(a, b, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(1U, num_edges(g)); + SimpleG::edge_descriptor e1 = p.first; + + ASSERT_EQ(1U, out_degree(a, g)); + tie(ei, ee) = out_edges(a, g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(ei, ee); + + p = add_edge(a, c, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(2U, num_edges(g)); + SimpleG::edge_descriptor e2 = p.first; + + ASSERT_EQ(2U, out_degree(a, g)); + tie(ei, ee) = out_edges(a, g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(e2, *ei++); + ASSERT_EQ(ei, ee); + + p = add_edge(c, b, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(3U, num_edges(g)); + + ASSERT_EQ(2U, out_degree(a, g)); + tie(ei, ee) = out_edges(a, g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(e2, *ei++); + ASSERT_EQ(ei, ee); + + p = add_edge(b, a, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(4U, num_edges(g)); + + ASSERT_EQ(2U, out_degree(a, g)); + tie(ei, ee) = out_edges(a, g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(e2, *ei++); + ASSERT_EQ(ei, ee); + + remove_edge(a, c, g); + ASSERT_EQ(3U, num_edges(g)); + + ASSERT_EQ(1U, out_degree(a, g)); + tie(ei, ee) = out_edges(a, g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(ei, ee); + + p = add_edge(a, a, g); + ASSERT_EQ(4U, num_edges(g)); + ASSERT_TRUE(p.second); + SimpleG::edge_descriptor e3 = p.first; + + ASSERT_EQ(2U, out_degree(a, g)); + tie(ei, ee) = out_edges(a, g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(e3, *ei++); + ASSERT_EQ(ei, ee); + + clear_out_edges(a, g); + ASSERT_EQ(2U, num_edges(g)); + + ASSERT_EQ(0U, out_degree(a, g)); + tie(ei, ee) = out_edges(a, g); + ASSERT_EQ(ei, ee); +} + +TEST(ue2_graph, in_edges_1) { + SimpleG g; + auto a = add_vertex(g); + + ASSERT_EQ(1U, num_vertices(g)); + ASSERT_EQ(0U, in_degree(a, g)); + + SimpleG::in_edge_iterator ei; + SimpleG::in_edge_iterator ee; + + tie(ei, ee) = in_edges(a, g); + ASSERT_TRUE(ei == ee); + + auto p = add_edge(a, a, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(1U, num_edges(g)); + SimpleG::edge_descriptor e1 = p.first; + + ASSERT_EQ(1U, in_degree(a, g)); + tie(ei, ee) = in_edges(a, g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(ei, ee); + + p = add_edge(a, a, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(2U, num_edges(g)); + SimpleG::edge_descriptor e2 = p.first; + + ASSERT_EQ(2U, in_degree(a, g)); + tie(ei, ee) = in_edges(a, g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(e2, *ei++); + ASSERT_EQ(ei, ee); +} + +TEST(ue2_graph, in_edges_2) { + SimpleG g; + auto a = add_vertex(g); + auto b = add_vertex(g); + auto c = add_vertex(g); + + ASSERT_EQ(3U, num_vertices(g)); + ASSERT_EQ(0U, in_degree(a, g)); + + SimpleG::in_edge_iterator ei; + SimpleG::in_edge_iterator ee; + + tie(ei, ee) = in_edges(a, g); + ASSERT_TRUE(ei == ee); + + auto p = add_edge(b, a, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(1U, num_edges(g)); + SimpleG::edge_descriptor e1 = p.first; + + ASSERT_EQ(1U, in_degree(a, g)); + tie(ei, ee) = in_edges(a, g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(ei, ee); + + p = add_edge(c, a, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(2U, num_edges(g)); + SimpleG::edge_descriptor e2 = p.first; + + ASSERT_EQ(2U, in_degree(a, g)); + tie(ei, ee) = in_edges(a, g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(e2, *ei++); + ASSERT_EQ(ei, ee); + + p = add_edge(c, b, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(3U, num_edges(g)); + + ASSERT_EQ(2U, in_degree(a, g)); + tie(ei, ee) = in_edges(a, g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(e2, *ei++); + ASSERT_EQ(ei, ee); + + p = add_edge(a, b, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(4U, num_edges(g)); + + ASSERT_EQ(2U, in_degree(a, g)); + tie(ei, ee) = in_edges(a, g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(e2, *ei++); + ASSERT_EQ(ei, ee); + + remove_edge(c, a, g); + ASSERT_EQ(3U, num_edges(g)); + + ASSERT_EQ(1U, in_degree(a, g)); + tie(ei, ee) = in_edges(a, g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(ei, ee); + + p = add_edge(a, a, g); + ASSERT_EQ(4U, num_edges(g)); + ASSERT_TRUE(p.second); + SimpleG::edge_descriptor e3 = p.first; + + ASSERT_EQ(2U, in_degree(a, g)); + tie(ei, ee) = in_edges(a, g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(e3, *ei++); + ASSERT_EQ(ei, ee); + + clear_in_edges(a, g); + ASSERT_EQ(2U, num_edges(g)); + + ASSERT_EQ(0U, in_degree(a, g)); + tie(ei, ee) = in_edges(a, g); + ASSERT_EQ(ei, ee); +} + +TEST(ue2_graph, parallel_1) { + SimpleG g; + SimpleG::vertex_iterator vi; + SimpleG::vertex_iterator ve; + auto a = add_vertex(g); + + ASSERT_EQ(1U, num_vertices(g)); + ASSERT_EQ(0U, out_degree(a, g)); + + SimpleG::out_edge_iterator ei; + SimpleG::out_edge_iterator ee; + + tie(ei, ee) = out_edges(a, g); + ASSERT_TRUE(ei == ee); + + auto p = add_edge(a, a, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(1U, num_edges(g)); + SimpleG::edge_descriptor e1 = p.first; + + ASSERT_EQ(1U, out_degree(a, g)); + tie(ei, ee) = out_edges(a, g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(ei, ee); + + p = add_edge(a, a, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(2U, num_edges(g)); + SimpleG::edge_descriptor e2 = p.first; + + ASSERT_EQ(2U, out_degree(a, g)); + tie(ei, ee) = out_edges(a, g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(e2, *ei++); + ASSERT_EQ(ei, ee); + + remove_edge(e1, g); + + ASSERT_EQ(1U, out_degree(a, g)); + tie(ei, ee) = out_edges(a, g); + ASSERT_EQ(e2, *ei++); + ASSERT_EQ(ei, ee); + + p = add_edge(a, a, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(2U, num_edges(g)); + SimpleG::edge_descriptor e3 = p.first; + + ASSERT_EQ(2U, out_degree(a, g)); + tie(ei, ee) = out_edges(a, g); + ASSERT_EQ(e2, *ei++); + ASSERT_EQ(e3, *ei++); + ASSERT_EQ(ei, ee); + + remove_edge(a, a, g); + ASSERT_EQ(0U, out_degree(a, g)); + tie(ei, ee) = out_edges(a, g); + ASSERT_EQ(ei, ee); +} + +TEST(ue2_graph, edges_0a) { + SimpleG g; + auto p = edges(g); + ASSERT_EQ(p.first, p.second); +} + +TEST(ue2_graph, edges_0b) { + SimpleG g; + add_vertex(g); + ASSERT_EQ(1U, num_vertices(g)); + auto p = edges(g); + ASSERT_EQ(p.first, p.second); +} + +TEST(ue2_graph, edges_0c) { + SimpleG g; + add_vertex(g); + add_vertex(g); + ASSERT_EQ(2U, num_vertices(g)); + auto p = edges(g); + ASSERT_EQ(p.first, p.second); +} + +TEST(ue2_graph, edges_1a) { + SimpleG g; + ASSERT_EQ(0U, num_edges(g)); + + auto v = add_vertex(g); + + ASSERT_EQ(0U, num_edges(g)); + auto e1 = add_edge(v, v, g).first; + + SimpleG::edge_iterator ei, ee; + + ASSERT_EQ(1U, num_edges(g)); + tie(ei, ee) = edges(g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(ee, ei); + + remove_edge(e1, g); + + ASSERT_EQ(0U, num_edges(g)); + tie(ei, ee) = edges(g); + ASSERT_EQ(ee, ei); +} + +TEST(ue2_graph, edges_1b) { + SimpleG g; + ASSERT_EQ(0U, num_edges(g)); + + auto u = add_vertex(g); + auto v = add_vertex(g); + + ASSERT_EQ(0U, num_edges(g)); + auto e1 = add_edge(u, v, g).first; + + SimpleG::edge_iterator ei, ee; + + ASSERT_EQ(1U, num_edges(g)); + tie(ei, ee) = edges(g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(ee, ei); + + remove_edge(e1, g); + + ASSERT_EQ(0U, num_edges(g)); + tie(ei, ee) = edges(g); + ASSERT_EQ(ee, ei); +} + +TEST(ue2_graph, edges_1c) { + SimpleG g; + ASSERT_EQ(0U, num_edges(g)); + + auto u = add_vertex(g); + auto v = add_vertex(g); + + ASSERT_EQ(0U, num_edges(g)); + auto e1 = add_edge(v, u, g).first; + + SimpleG::edge_iterator ei, ee; + + ASSERT_EQ(1U, num_edges(g)); + tie(ei, ee) = edges(g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(ee, ei); + + remove_edge(e1, g); + + ASSERT_EQ(0U, num_edges(g)); + tie(ei, ee) = edges(g); + ASSERT_EQ(ee, ei); +} + +TEST(ue2_graph, edges_1d) { + SimpleG g; + ASSERT_EQ(0U, num_edges(g)); + + UNUSED auto u = add_vertex(g); + UNUSED auto v = add_vertex(g); + auto w = add_vertex(g); + auto x = add_vertex(g); + UNUSED auto y = add_vertex(g); + UNUSED auto z = add_vertex(g); + + ASSERT_EQ(0U, num_edges(g)); + auto e1 = add_edge(w, x, g).first; + + SimpleG::edge_iterator ei, ee; + + ASSERT_EQ(1U, num_edges(g)); + tie(ei, ee) = edges(g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(ee, ei); + + remove_edge(e1, g); + + ASSERT_EQ(0U, num_edges(g)); + tie(ei, ee) = edges(g); + ASSERT_EQ(ee, ei); +} + +TEST(ue2_graph, edges_2a) { + SimpleG g; + ASSERT_EQ(0U, num_edges(g)); + + auto v = add_vertex(g); + + ASSERT_EQ(0U, num_edges(g)); + auto e1 = add_edge(v, v, g).first; + auto e2 = add_edge(v, v, g).first; + + SimpleG::edge_iterator ei, ee; + + ASSERT_EQ(2U, num_edges(g)); + tie(ei, ee) = edges(g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(e2, *ei++); + ASSERT_EQ(ee, ei); + + remove_edge(e1, g); + + ASSERT_EQ(1U, num_edges(g)); + tie(ei, ee) = edges(g); + ASSERT_EQ(e2, *ei++); + ASSERT_EQ(ee, ei); + + remove_edge(e2, g); + + ASSERT_EQ(0U, num_edges(g)); + tie(ei, ee) = edges(g); + ASSERT_EQ(ee, ei); +} + +TEST(ue2_graph, edges_2b) { + SimpleG g; + ASSERT_EQ(0U, num_edges(g)); + + auto u = add_vertex(g); + auto v = add_vertex(g); + + ASSERT_EQ(0U, num_edges(g)); + auto e1 = add_edge(u, v, g).first; + auto e2 = add_edge(v, u, g).first; + + SimpleG::edge_iterator ei, ee; + + ASSERT_EQ(2U, num_edges(g)); + tie(ei, ee) = edges(g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(e2, *ei++); + ASSERT_EQ(ee, ei); + + remove_edge(e1, g); + + ASSERT_EQ(1U, num_edges(g)); + tie(ei, ee) = edges(g); + ASSERT_EQ(e2, *ei++); + ASSERT_EQ(ee, ei); + + remove_edge(e2, g); + + ASSERT_EQ(0U, num_edges(g)); + tie(ei, ee) = edges(g); + ASSERT_EQ(ee, ei); +} + +TEST(ue2_graph, edges_2c) { + SimpleG g; + ASSERT_EQ(0U, num_edges(g)); + + UNUSED auto s = add_vertex(g); + UNUSED auto t = add_vertex(g); + auto u = add_vertex(g); + UNUSED auto v = add_vertex(g); + auto w = add_vertex(g); + auto x = add_vertex(g); + UNUSED auto y = add_vertex(g); + UNUSED auto z = add_vertex(g); + + ASSERT_EQ(0U, num_edges(g)); + auto e1 = add_edge(w, x, g).first; + auto e2 = add_edge(u, x, g).first; + + SimpleG::edge_iterator ei, ee; + + ASSERT_EQ(2U, num_edges(g)); + tie(ei, ee) = edges(g); + ASSERT_EQ(e2, *ei++); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(ee, ei); + + clear_in_edges(x, g); + + ASSERT_EQ(0U, num_edges(g)); + tie(ei, ee) = edges(g); + ASSERT_EQ(ee, ei); +} + +TEST(ue2_graph, edges_3a) { + SimpleG g; + ASSERT_EQ(0U, num_edges(g)); + + UNUSED auto s = add_vertex(g); + UNUSED auto t = add_vertex(g); + auto u = add_vertex(g); + auto v = add_vertex(g); + auto w = add_vertex(g); + auto x = add_vertex(g); + UNUSED auto y = add_vertex(g); + auto z = add_vertex(g); + + ASSERT_EQ(0U, num_edges(g)); + auto e1 = add_edge(w, x, g).first; + auto e2 = add_edge(u, v, g).first; + auto e3 = add_edge(u, z, g).first; + + SimpleG::edge_iterator ei, ee; + + ASSERT_EQ(3U, num_edges(g)); + tie(ei, ee) = edges(g); + ASSERT_EQ(e2, *ei++); + ASSERT_EQ(e3, *ei++); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(ee, ei); + + remove_edge(e1, g); + + ASSERT_EQ(2U, num_edges(g)); + clear_out_edges(u, g); + + ASSERT_EQ(0U, num_edges(g)); + + tie(ei, ee) = edges(g); + ASSERT_EQ(ee, ei); +} + +TEST(ue2_graph, degree) { + SimpleG g; + auto a = add_vertex(g); + auto b = add_vertex(g); + auto c = add_vertex(g); + auto d = add_vertex(g); + + add_edge(a, b, g); + add_edge(a, c, g); + add_edge(a, d, g); + + ASSERT_EQ(3U, degree(a, g)); + ASSERT_EQ(1U, degree(b, g)); + ASSERT_EQ(1U, degree(c, g)); + ASSERT_EQ(1U, degree(d, g)); + + add_edge(b, c, g); + + ASSERT_EQ(3U, degree(a, g)); + ASSERT_EQ(2U, degree(b, g)); + ASSERT_EQ(2U, degree(c, g)); + ASSERT_EQ(1U, degree(d, g)); + + add_edge(d, d, g); + ASSERT_EQ(3U, degree(a, g)); + ASSERT_EQ(2U, degree(b, g)); + ASSERT_EQ(2U, degree(c, g)); + ASSERT_EQ(3U, degree(d, g)); add_edge(b, a, g); + ASSERT_EQ(4U, degree(a, g)); + ASSERT_EQ(3U, degree(b, g)); + ASSERT_EQ(2U, degree(c, g)); + ASSERT_EQ(3U, degree(d, g)); - ASSERT_FALSE(in_degree_equal_to(a, g, 0)); - ASSERT_TRUE(in_degree_equal_to(a, g, 1)); - ASSERT_FALSE(in_degree_equal_to(a, g, 2)); + add_edge(b, a, g); + ASSERT_EQ(5U, degree(a, g)); + ASSERT_EQ(4U, degree(b, g)); + ASSERT_EQ(2U, degree(c, g)); + ASSERT_EQ(3U, degree(d, g)); - add_edge(c, a, g); - - ASSERT_FALSE(in_degree_equal_to(a, g, 0)); - ASSERT_FALSE(in_degree_equal_to(a, g, 1)); - ASSERT_TRUE(in_degree_equal_to(a, g, 2)); - - add_edge(d, a, g); - - ASSERT_FALSE(in_degree_equal_to(a, g, 0)); - ASSERT_FALSE(in_degree_equal_to(a, g, 1)); - ASSERT_FALSE(in_degree_equal_to(a, g, 2)); + add_edge(d, d, g); + ASSERT_EQ(5U, degree(a, g)); + ASSERT_EQ(4U, degree(b, g)); + ASSERT_EQ(2U, degree(c, g)); + ASSERT_EQ(5U, degree(d, g)); } -TEST(graph_util, edge_by_target_1) { - unit_graph g; +TEST(ue2_graph, adj) { + SimpleG g; + auto a = add_vertex(g); + auto b = add_vertex(g); + auto c = add_vertex(g); + auto d = add_vertex(g); - unit_vertex a = add_vertex(g); - unit_vertex b = add_vertex(g); - unit_vertex c = add_vertex(g); + add_edge(a, b, g); + add_edge(a, c, g); + add_edge(a, d, g); + add_edge(b, a, g); + add_edge(b, b, g); - ASSERT_FALSE(edge_by_target(a, a, g).second); - ASSERT_FALSE(edge_by_target(a, b, g).second); - ASSERT_FALSE(edge_by_target(a, c, g).second); - ASSERT_FALSE(edge_by_target(b, a, g).second); - ASSERT_FALSE(edge_by_target(c, b, g).second); + SimpleG::adjacency_iterator ai, ae; + tie(ai, ae) = adjacent_vertices(a, g); + ASSERT_EQ(b, *ai++); + ASSERT_EQ(c, *ai++); + ASSERT_EQ(d, *ai++); + ASSERT_EQ(ai, ae); - unit_edge ab = add_edge(a, b, g).first; + tie(ai, ae) = adjacent_vertices(b, g); + ASSERT_EQ(a, *ai++); + ASSERT_EQ(b, *ai++); + ASSERT_EQ(ai, ae); - ASSERT_FALSE(edge_by_target(a, a, g).second); - ASSERT_TRUE(edge_by_target(a, b, g).second); - ASSERT_TRUE(ab == edge_by_target(a, b, g).first); - ASSERT_FALSE(edge_by_target(a, c, g).second); - ASSERT_FALSE(edge_by_target(b, a, g).second); - ASSERT_FALSE(edge_by_target(b, b, g).second); - ASSERT_FALSE(edge_by_target(c, b, g).second); + tie(ai, ae) = adjacent_vertices(c, g); + ASSERT_EQ(ai, ae); - unit_edge cb = add_edge(c, b, g).first; - - ASSERT_FALSE(edge_by_target(a, a, g).second); - ASSERT_TRUE(edge_by_target(a, b, g).second); - ASSERT_TRUE(ab == edge_by_target(a, b, g).first); - ASSERT_FALSE(edge_by_target(a, c, g).second); - ASSERT_FALSE(edge_by_target(b, a, g).second); - ASSERT_FALSE(edge_by_target(b, b, g).second); - ASSERT_TRUE(edge_by_target(c, b, g).second); - ASSERT_TRUE(cb == edge_by_target(c, b, g).first); - - unit_edge aa = add_edge(a, a, g).first; - unit_edge bb = add_edge(b, b, g).first; - - ASSERT_TRUE(edge_by_target(a, a, g).second); - ASSERT_TRUE(aa == edge_by_target(a, a, g).first); - ASSERT_TRUE(edge_by_target(a, b, g).second); - ASSERT_TRUE(ab == edge_by_target(a, b, g).first); - ASSERT_FALSE(edge_by_target(a, c, g).second); - ASSERT_FALSE(edge_by_target(b, a, g).second); - ASSERT_TRUE(edge_by_target(b, b, g).second); - ASSERT_TRUE(bb == edge_by_target(b, b, g).first); - ASSERT_TRUE(edge_by_target(c, b, g).second); - ASSERT_TRUE(cb == edge_by_target(c, b, g).first); + tie(ai, ae) = adjacent_vertices(d, g); + ASSERT_EQ(ai, ae); +} + +TEST(ue2_graph, inv_adj) { + SimpleG g; + auto a = add_vertex(g); + auto b = add_vertex(g); + auto c = add_vertex(g); + auto d = add_vertex(g); + + add_edge(a, b, g); + add_edge(a, c, g); + add_edge(a, d, g); + add_edge(b, a, g); + add_edge(b, b, g); + + SimpleG::inv_adjacency_iterator ai, ae; + tie(ai, ae) = inv_adjacent_vertices(a, g); + ASSERT_EQ(b, *ai++); + ASSERT_EQ(ai, ae); + + tie(ai, ae) = inv_adjacent_vertices(b, g); + ASSERT_EQ(a, *ai++); + ASSERT_EQ(b, *ai++); + ASSERT_EQ(ai, ae); + + tie(ai, ae) = inv_adjacent_vertices(c, g); + ASSERT_EQ(a, *ai++); + ASSERT_EQ(ai, ae); + + tie(ai, ae) = inv_adjacent_vertices(d, g); + ASSERT_EQ(a, *ai++); + ASSERT_EQ(ai, ae); +} + +TEST(ue2_graph, square_brackets_v) { + SimpleG g; + auto a = add_vertex(g); + auto b = add_vertex(g); + auto c = add_vertex(g); + auto d = add_vertex(g); + + ASSERT_EQ(0U, g[a].index); + ASSERT_EQ(1U, g[b].index); + ASSERT_EQ(2U, g[c].index); + ASSERT_EQ(3U, g[d].index); + + ASSERT_EQ("SimpleV", g[a].test_v); + ASSERT_EQ("SimpleV", g[b].test_v); + ASSERT_EQ("SimpleV", g[c].test_v); + ASSERT_EQ("SimpleV", g[d].test_v); + + g[a].test_v = "a"; + g[b].test_v = "b"; + g[c].test_v = "c"; + g[d].test_v = "d"; + + ASSERT_EQ("a", g[a].test_v); + ASSERT_EQ("b", g[b].test_v); + ASSERT_EQ("c", g[c].test_v); + ASSERT_EQ("d", g[d].test_v); +} + +TEST(ue2_graph, square_brackets_e) { + SimpleG g; + auto u = add_vertex(g); + auto v = add_vertex(g); + auto a = add_edge(u, v, g).first; + auto b = add_edge(u, v, g).first; + auto c = add_edge(u, u, g).first; + auto d = add_edge(v, u, g).first; + + ASSERT_EQ(0U, g[a].index); + ASSERT_EQ(1U, g[b].index); + ASSERT_EQ(2U, g[c].index); + ASSERT_EQ(3U, g[d].index); + + ASSERT_EQ("SimpleE", g[a].test_e); + ASSERT_EQ("SimpleE", g[b].test_e); + ASSERT_EQ("SimpleE", g[c].test_e); + ASSERT_EQ("SimpleE", g[d].test_e); + + g[a].test_e = "a"; + g[b].test_e = "b"; + g[c].test_e = "c"; + g[d].test_e = "d"; + + ASSERT_EQ("a", g[a].test_e); + ASSERT_EQ("b", g[b].test_e); + ASSERT_EQ("c", g[c].test_e); + ASSERT_EQ("d", g[d].test_e); +} + +TEST(ue2_graph, vertex_ordering_1) { + SimpleG g; + auto a = add_vertex(g); + auto b = add_vertex(g); + auto c = add_vertex(g); + auto d = add_vertex(g); + + ASSERT_LE(a, b); + ASSERT_LE(a, c); + ASSERT_LE(a, d); + ASSERT_LE(b, c); + ASSERT_LE(b, d); + ASSERT_LE(c, d); + + g[a].index = 5; + g[b].index = 0; + g[c].index = 3; + g[d].index = 1; + + ASSERT_LE(a, b); + ASSERT_LE(a, c); + ASSERT_LE(a, d); + ASSERT_LE(b, c); + ASSERT_LE(b, d); + ASSERT_LE(c, d); +} + +TEST(ue2_graph, vertex_ordering_2) { + SimpleG g; + auto a = add_vertex(g); + auto b = add_vertex(g); + auto c = add_vertex(g); + auto d = add_vertex(g); + + set s; + s.insert(a); + s.insert(b); + s.insert(c); + s.insert(d); + + auto it = s.begin(); + ASSERT_EQ(a, *it++); + ASSERT_EQ(b, *it++); + ASSERT_EQ(c, *it++); + ASSERT_EQ(d, *it++); + ASSERT_EQ(it, s.end()); + + g[a].index = 5; + g[b].index = 0; + g[c].index = 3; + g[d].index = 1; + + it = s.begin(); + ASSERT_EQ(a, *it++); + ASSERT_EQ(b, *it++); + ASSERT_EQ(c, *it++); + ASSERT_EQ(d, *it++); + ASSERT_EQ(it, s.end()); +} + +TEST(ue2_graph, get_v_2_arg) { + SimpleG g; + auto a = add_vertex(g); + auto b = add_vertex(g); + + auto pm = get(&SimpleV::test_v, g); + + ASSERT_EQ("SimpleV", pm[a]); + ASSERT_EQ("SimpleV", pm[b]); + + pm[a] = "a"; + pm[b] = "b"; + + ASSERT_EQ("a", pm[a]); + ASSERT_EQ("b", pm[b]); + + ASSERT_EQ("a", g[a].test_v); + ASSERT_EQ("b", g[b].test_v); + + g[a].test_v = "X"; + g[b].test_v = "Y"; + + ASSERT_EQ("X", pm[a]); + ASSERT_EQ("Y", pm[b]); + + ASSERT_EQ("X", get(pm, a)); + ASSERT_EQ("Y", get(pm, b)); + + put(pm, a, "A"); + put(pm, b, "B"); + + ASSERT_EQ("A", g[a].test_v); + ASSERT_EQ("B", g[b].test_v); +} + +TEST(ue2_graph, get_v_2_arg_const) { + SimpleG g; + const SimpleG &gg = g; + auto a = add_vertex(g); + auto b = add_vertex(g); + + auto pm = get(&SimpleV::test_v, gg); + + ASSERT_EQ("SimpleV", pm[a]); + ASSERT_EQ("SimpleV", pm[b]); + + g[a].test_v = "a"; + g[b].test_v = "b"; + + ASSERT_EQ("a", pm[a]); + ASSERT_EQ("b", pm[b]); + + ASSERT_EQ("a", get(pm, a)); + ASSERT_EQ("b", get(pm, b)); +} + +TEST(ue2_graph, get_e_2_arg) { + SimpleG g; + auto u = add_vertex(g); + auto v = add_vertex(g); + auto a = add_edge(u, v, g).first; + auto b = add_edge(v, u, g).first; + + auto pm = get(&SimpleE::test_e, g); + + ASSERT_EQ("SimpleE", pm[a]); + ASSERT_EQ("SimpleE", pm[b]); + + pm[a] = "a"; + pm[b] = "b"; + + ASSERT_EQ("a", pm[a]); + ASSERT_EQ("b", pm[b]); + + ASSERT_EQ("a", g[a].test_e); + ASSERT_EQ("b", g[b].test_e); + + g[a].test_e = "X"; + g[b].test_e = "Y"; + + ASSERT_EQ("X", pm[a]); + ASSERT_EQ("Y", pm[b]); + + ASSERT_EQ("X", get(pm, a)); + ASSERT_EQ("Y", get(pm, b)); + + put(pm, a, "A"); + put(pm, b, "B"); + + ASSERT_EQ("A", g[a].test_e); + ASSERT_EQ("B", g[b].test_e); +} + +TEST(ue2_graph, get_e_2_arg_const) { + SimpleG g; + const SimpleG &gg = g; + auto u = add_vertex(g); + auto v = add_vertex(g); + auto a = add_edge(u, v, g).first; + auto b = add_edge(v, u, g).first; + + auto pm = get(&SimpleE::test_e, gg); + + ASSERT_EQ("SimpleE", pm[a]); + ASSERT_EQ("SimpleE", pm[b]); + + g[a].test_e = "a"; + g[b].test_e = "b"; + + ASSERT_EQ("a", pm[a]); + ASSERT_EQ("b", pm[b]); + + ASSERT_EQ("a", get(pm, a)); + ASSERT_EQ("b", get(pm, b)); +} + +TEST(ue2_graph, get_v_3_arg) { + SimpleG g; + auto a = add_vertex(g); + auto b = add_vertex(g); + + ASSERT_EQ("SimpleV", get(&SimpleV::test_v, g, a)); + ASSERT_EQ("SimpleV", get(&SimpleV::test_v, g, a)); + + get(&SimpleV::test_v, g, a) = "a"; + get(&SimpleV::test_v, g, b) = "b"; + + ASSERT_EQ("a", get(&SimpleV::test_v, g, a)); + ASSERT_EQ("b", get(&SimpleV::test_v, g, b)); + + ASSERT_EQ("a", g[a].test_v); + ASSERT_EQ("b", g[b].test_v); + + g[a].test_v = "X"; + g[b].test_v = "Y"; + + ASSERT_EQ("X", get(&SimpleV::test_v, g, a)); + ASSERT_EQ("Y", get(&SimpleV::test_v, g, b)); + + //std::decay::type x = "A"; + + put(&SimpleV::test_v, g, a, "A"); + put(&SimpleV::test_v, g, b, "B"); + + ASSERT_EQ("A", g[a].test_v); + ASSERT_EQ("B", g[b].test_v); +} + +TEST(ue2_graph, get_v_3_arg_const) { + SimpleG g; + const SimpleG &gg = g; + auto a = add_vertex(g); + auto b = add_vertex(g); + + ASSERT_EQ("SimpleV", get(&SimpleV::test_v, gg, a)); + ASSERT_EQ("SimpleV", get(&SimpleV::test_v, gg, b)); + + g[a].test_v = "a"; + g[b].test_v = "b"; + + ASSERT_EQ("a", get(&SimpleV::test_v, gg, a)); + ASSERT_EQ("b", get(&SimpleV::test_v, gg, b)); +} + +TEST(ue2_graph, get_e_3_arg) { + SimpleG g; + auto u = add_vertex(g); + auto v = add_vertex(g); + auto a = add_edge(u, v, g).first; + auto b = add_edge(v, u, g).first; + + ASSERT_EQ("SimpleE", get(&SimpleE::test_e, g, a)); + ASSERT_EQ("SimpleE", get(&SimpleE::test_e, g, b)); + + get(&SimpleE::test_e, g, a) = "a"; + get(&SimpleE::test_e, g, b) = "b"; + + ASSERT_EQ("a", get(&SimpleE::test_e, g, a)); + ASSERT_EQ("b", get(&SimpleE::test_e, g, b)); + + ASSERT_EQ("a", g[a].test_e); + ASSERT_EQ("b", g[b].test_e); + + g[a].test_e = "X"; + g[b].test_e = "Y"; + + ASSERT_EQ("X", get(&SimpleE::test_e, g, a)); + ASSERT_EQ("Y", get(&SimpleE::test_e, g, b)); +} + +TEST(ue2_graph, get_e_3_arg_const) { + SimpleG g; + const SimpleG &gg = g; + auto u = add_vertex(g); + auto v = add_vertex(g); + auto a = add_edge(u, v, g).first; + auto b = add_edge(v, u, g).first; + + ASSERT_EQ("SimpleE", get(&SimpleE::test_e, gg, a)); + ASSERT_EQ("SimpleE", get(&SimpleE::test_e, gg, b)); + + g[a].test_e = "a"; + g[b].test_e = "b"; + + ASSERT_EQ("a", get(&SimpleE::test_e, gg, a)); + ASSERT_EQ("b", get(&SimpleE::test_e, gg, b)); +} + +TEST(ue2_graph, get_vertex_index) { + SimpleG g; + auto a = add_vertex(g); + auto pm = get(vertex_index, g); + ASSERT_EQ(0U, pm(a)); + pm(a) = 1; + ASSERT_EQ(1U, pm[a]); + ASSERT_EQ(1U, g[a].index); + ASSERT_EQ(1U, get(vertex_index, g, a)); +} + +TEST(ue2_graph, get_vertex_index_const) { + SimpleG g; + const SimpleG &gg = g; + auto a = add_vertex(g); + auto pm = get(vertex_index, gg); + ASSERT_EQ(0U, pm(a)); + g[a].index = 1; + ASSERT_EQ(1U, pm[a]); + ASSERT_EQ(1U, get(vertex_index, gg, a)); +} + +TEST(ue2_graph, get_edge_index) { + SimpleG g; + auto u = add_vertex(g); + auto v = add_vertex(g); + auto a = add_edge(u, v, g).first; + auto pm = get(edge_index, g); + ASSERT_EQ(0U, pm(a)); + pm(a) = 1; + ASSERT_EQ(1U, pm[a]); + ASSERT_EQ(1U, g[a].index); + ASSERT_EQ(1U, get(edge_index, g, a)); +} + +TEST(ue2_graph, get_edge_index_const) { + SimpleG g; + const SimpleG &gg = g; + auto u = add_vertex(g); + auto v = add_vertex(g); + auto a = add_edge(u, v, g).first; + auto pm = get(edge_index, gg); + ASSERT_EQ(0U, pm(a)); + g[a].index = 1; + ASSERT_EQ(1U, pm[a]); + ASSERT_EQ(1U, get(edge_index, gg, a)); +} + +TEST(ue2_graph, get_vertex_all) { + SimpleG g; + auto a = add_vertex(g); + auto pm = get(vertex_all, g); + ASSERT_EQ(0U, pm(a).index); + pm(a).index = 1; + ASSERT_EQ(1U, pm[a].index); + ASSERT_EQ(1U, g[a].index); + ASSERT_EQ(1U, get(vertex_all, g, a).index); + auto &a_all = get(vertex_all, g, a); + ASSERT_EQ(1U, a_all.index); + g[a].index = 2; + ASSERT_EQ(2U, a_all.index); +} + +TEST(ue2_graph, get_vertex_all_const) { + SimpleG g; + const SimpleG &gg = g; + auto a = add_vertex(g); + auto pm = get(vertex_all, gg); + ASSERT_EQ(0U, pm(a).index); + g[a].index = 1; + ASSERT_EQ(1U, pm[a].index); + ASSERT_EQ(1U, get(vertex_all, gg, a).index); + auto &a_all = get(vertex_all, gg, a); + ASSERT_EQ(1U, a_all.index); + g[a].index = 2; + ASSERT_EQ(2U, a_all.index); +} + +TEST(ue2_graph, get_vertex_bundle) { + SimpleG g; + auto a = add_vertex(g); + auto pm = get(vertex_bundle, g); + ASSERT_EQ(0U, pm(a).index); + pm(a).index = 1; + ASSERT_EQ(1U, pm[a].index); + ASSERT_EQ(1U, g[a].index); + ASSERT_EQ(1U, get(vertex_bundle, g, a).index); + auto &a_bundle = get(vertex_bundle, g, a); + ASSERT_EQ(1U, a_bundle.index); + g[a].index = 2; + ASSERT_EQ(2U, a_bundle.index); +} + +TEST(ue2_graph, get_vertex_bundle_const) { + SimpleG g; + const SimpleG &gg = g; + auto a = add_vertex(g); + auto pm = get(vertex_bundle, gg); + ASSERT_EQ(0U, pm(a).index); + g[a].index = 1; + ASSERT_EQ(1U, pm[a].index); + ASSERT_EQ(1U, get(vertex_bundle, gg, a).index); + auto &a_bundle = get(vertex_bundle, gg, a); + ASSERT_EQ(1U, a_bundle.index); + g[a].index = 2; + ASSERT_EQ(2U, a_bundle.index); +} + +TEST(ue2_graph, get_edge_all) { + SimpleG g; + auto u = add_vertex(g); + auto v = add_vertex(g); + auto a = add_edge(u, v, g).first; + auto pm = get(edge_all, g); + ASSERT_EQ(0U, pm(a).index); + pm(a).index = 1; + ASSERT_EQ(1U, pm[a].index); + ASSERT_EQ(1U, g[a].index); + ASSERT_EQ(1U, get(edge_all, g, a).index); + auto &a_all = get(edge_all, g, a); + ASSERT_EQ(1U, a_all.index); + g[a].index = 2; + ASSERT_EQ(2U, a_all.index); +} + +TEST(ue2_graph, get_edge_all_const) { + SimpleG g; + const SimpleG &gg = g; + auto u = add_vertex(g); + auto v = add_vertex(g); + auto a = add_edge(u, v, g).first; + auto pm = get(edge_all, gg); + ASSERT_EQ(0U, pm(a).index); + g[a].index = 1; + ASSERT_EQ(1U, pm[a].index); + ASSERT_EQ(1U, get(edge_all, gg, a).index); + auto &a_all = get(edge_all, gg, a); + ASSERT_EQ(1U, a_all.index); + g[a].index = 2; + ASSERT_EQ(2U, a_all.index); +} + +TEST(ue2_graph, get_edge_bundle) { + SimpleG g; + auto u = add_vertex(g); + auto v = add_vertex(g); + auto a = add_edge(u, v, g).first; + auto pm = get(edge_bundle, g); + ASSERT_EQ(0U, pm(a).index); + pm(a).index = 1; + ASSERT_EQ(1U, pm[a].index); + ASSERT_EQ(1U, g[a].index); + ASSERT_EQ(1U, get(edge_bundle, g, a).index); + auto &a_bundle = get(edge_bundle, g, a); + ASSERT_EQ(1U, a_bundle.index); + g[a].index = 2; + ASSERT_EQ(2U, a_bundle.index); +} + +TEST(ue2_graph, get_edge_bundle_const) { + SimpleG g; + const SimpleG &gg = g; + auto u = add_vertex(g); + auto v = add_vertex(g); + auto a = add_edge(u, v, g).first; + auto pm = get(edge_bundle, gg); + ASSERT_EQ(0U, pm(a).index); + g[a].index = 1; + ASSERT_EQ(1U, pm[a].index); + ASSERT_EQ(1U, get(edge_bundle, gg, a).index); + auto &a_bundle = get(edge_bundle, gg, a); + ASSERT_EQ(1U, a_bundle.index); + g[a].index = 2; + ASSERT_EQ(2U, a_bundle.index); +} + +TEST(ue2_graph, add_vertex_prop) { + SimpleG g; + SimpleV vp; + vp.index = 42; + vp.test_v = "prop"; + auto u = add_vertex(vp, g); + auto v = add_vertex(vp, g); + + ASSERT_EQ(0U, g[u].index); + ASSERT_EQ(1U, g[v].index); + + ASSERT_EQ("prop", g[u].test_v); + ASSERT_EQ("prop", g[v].test_v); +} + +TEST(ue2_graph, add_edge_prop) { + SimpleG g; + SimpleE ep; + ep.index = 42; + ep.test_e = "prop"; + auto u = add_vertex(g); + auto v = add_vertex(g); + + auto e = add_edge(u, v, ep, g).first; + auto f = add_edge(u, v, ep, g).first; + + ASSERT_EQ(0U, g[e].index); + ASSERT_EQ(1U, g[f].index); + + ASSERT_EQ("prop", g[e].test_e); + ASSERT_EQ("prop", g[f].test_e); +} + +TEST(ue2_graph, reverse_graph) { + SimpleG g; + auto a = add_vertex(g); + auto b = add_vertex(g); + auto e = add_edge(a, b, g).first; + reverse_graph rg(g); + auto index_map = get(vertex_index, rg); + + ASSERT_EQ(0U, rg[a].index); + ASSERT_EQ(1U, rg[b].index); + ASSERT_EQ(0U, rg[e].index); + + ASSERT_EQ(0U, get(vertex_index, rg, a)); + ASSERT_EQ(1U, get(vertex_index, rg, b)); + ASSERT_EQ(0U, get(edge_index, rg, edge(b, a, rg).first)); + + ASSERT_EQ(0U, index_map(a)); + ASSERT_EQ(1U, index_map(b)); + + ASSERT_TRUE(edge(b, a, rg).second); + ASSERT_FALSE(edge(a, b, rg).second); +} + +TEST(ue2_graph, reverse_graph_const) { + SimpleG g; + auto a = add_vertex(g); + auto b = add_vertex(g); + auto e = add_edge(a, b, g).first; + reverse_graph rg(g); + auto index_map = get(&SimpleV::index, rg); + + // Note: reverse_graph fails to make bundles const so things break. + // ASSERT_EQ(0U, rg[a].index); + // ASSERT_EQ(1U, rg[b].index); + // ASSERT_EQ(0U, rg[e].index); + + ASSERT_EQ(0U, get(vertex_index, g, a)); + ASSERT_EQ(1U, get(vertex_index, g, b)); + ASSERT_EQ(0U, get(edge_index, g, e)); + + ASSERT_EQ(0U, index_map(a)); + ASSERT_EQ(1U, index_map(b)); + + ASSERT_TRUE(edge(b, a, rg).second); + ASSERT_FALSE(edge(a, b, rg).second); +} + +TEST(ue2_graph, default_param) { + struct TestGraph : ue2_graph { }; + TestGraph g; + + auto v = add_vertex(g); + auto e = add_edge(v, v, g).first; + + ASSERT_EQ(0U, get(vertex_index, g, v)); + ASSERT_EQ(0U, get(&ue2::graph_detail::default_edge_property::index, g, e)); + ASSERT_EQ(0U, get(edge_index, g, e)); } diff --git a/unit/internal/nfagraph_equivalence.cpp b/unit/internal/nfagraph_equivalence.cpp index 3ca1923f..8fda9223 100644 --- a/unit/internal/nfagraph_equivalence.cpp +++ b/unit/internal/nfagraph_equivalence.cpp @@ -84,7 +84,7 @@ TEST(NFAGraph, RemoveEquivalence1) { ASSERT_TRUE(tmpcr.test('a')); } // check if we found our vertex - ASSERT_TRUE(a != nullptr); + ASSERT_TRUE(a != NGHolder::null_vertex()); // There should be two edges from v to nodes with reachability 'b' and 'c' NFAVertex b = NGHolder::null_vertex(); @@ -101,8 +101,8 @@ TEST(NFAGraph, RemoveEquivalence1) { } } // check if we found our vertices - ASSERT_TRUE(b != nullptr); - ASSERT_TRUE(c != nullptr); + ASSERT_TRUE(b != NGHolder::null_vertex()); + ASSERT_TRUE(c != NGHolder::null_vertex()); // both vertices should have an edge to accept ASSERT_TRUE(edge(b, g.accept, g).second); @@ -145,7 +145,7 @@ TEST(NFAGraph, RemoveEquivalence2) { ASSERT_TRUE(tmpcr.test('a')); } // check if we found our vertex - ASSERT_TRUE(a != nullptr); + ASSERT_TRUE(a != NGHolder::null_vertex()); // There should be two edges from v to nodes with reachability 'b' and 'c' NFAVertex b = NGHolder::null_vertex(); @@ -162,8 +162,8 @@ TEST(NFAGraph, RemoveEquivalence2) { } } // check if we found our vertices - ASSERT_TRUE(b != nullptr); - ASSERT_TRUE(c != nullptr); + ASSERT_TRUE(b != NGHolder::null_vertex()); + ASSERT_TRUE(c != NGHolder::null_vertex()); // both new vertices should have edges from startDs ASSERT_TRUE(edge(g.startDs, b, g).second); @@ -207,7 +207,7 @@ TEST(NFAGraph, RemoveEquivalence3) { ASSERT_TRUE(tmpcr.test('a')); } // check if we found our 'a' - ASSERT_TRUE(a != nullptr); + ASSERT_TRUE(a != NGHolder::null_vertex()); // There should be an edge from 'a' to '.' ASSERT_EQ(1U, out_degree(a, g)); @@ -234,7 +234,6 @@ TEST(NFAGraph, RemoveEquivalence3) { NFAVertex X = NGHolder::null_vertex(); NFAVertex Y = NGHolder::null_vertex(); for (NFAVertex tmp : adjacent_vertices_range(dot2, g)) { - // we already know about dot1, so skip it if (tmp == dot1) { continue; @@ -251,8 +250,8 @@ TEST(NFAGraph, RemoveEquivalence3) { } } // check if we found both vertices - ASSERT_TRUE(X != nullptr); - ASSERT_TRUE(Y != nullptr); + ASSERT_TRUE(X != NGHolder::null_vertex()); + ASSERT_TRUE(Y != NGHolder::null_vertex()); // finally, check if these two vertices only have edges to accept ASSERT_EQ(1U, out_degree(X, g)); @@ -306,8 +305,8 @@ TEST(NFAGraph, RemoveEquivalence4) { } } // check if we found both vertices - ASSERT_TRUE(X != nullptr); - ASSERT_TRUE(Y != nullptr); + ASSERT_TRUE(X != NGHolder::null_vertex()); + ASSERT_TRUE(Y != NGHolder::null_vertex()); // now, find first dot from X ASSERT_EQ(1U, out_degree(X, g)); @@ -351,7 +350,7 @@ TEST(NFAGraph, RemoveEquivalence4) { } } // make sure we found our 'a' - ASSERT_TRUE(a != nullptr); + ASSERT_TRUE(a != NGHolder::null_vertex()); // now, check if 'a' has an edge to accept ASSERT_EQ(1U, out_degree(a, g)); @@ -396,7 +395,7 @@ TEST(NFAGraph, RemoveEquivalence5) { ASSERT_TRUE(edge(v, v, g).second); } // check if we found our vertex - ASSERT_TRUE(v != nullptr); + ASSERT_TRUE(v != NGHolder::null_vertex()); // now, find the vertex leading to accept NFAVertex v2 = NGHolder::null_vertex(); @@ -414,7 +413,7 @@ TEST(NFAGraph, RemoveEquivalence5) { ASSERT_TRUE(edge(tmp, g.accept, g).second); } // check if we found our vertex - ASSERT_TRUE(v2 != nullptr); + ASSERT_TRUE(v2 != NGHolder::null_vertex()); } // catching UE-2692 @@ -452,7 +451,7 @@ TEST(NFAGraph, RemoveEquivalence6) { ASSERT_TRUE(edge(v, g.accept, g).second); } // check if we found our vertex - ASSERT_TRUE(v != nullptr); + ASSERT_TRUE(v != NGHolder::null_vertex()); } // catching UE-2692 @@ -492,7 +491,7 @@ TEST(NFAGraph, RemoveEquivalence7) { ASSERT_EQ(1U, proper_out_degree(v, g)); } // check if we found our vertex - ASSERT_TRUE(v != nullptr); + ASSERT_TRUE(v != NGHolder::null_vertex()); // find the next vertex and ensure it has an edge to accept NFAVertex v2 = NGHolder::null_vertex(); @@ -511,7 +510,7 @@ TEST(NFAGraph, RemoveEquivalence7) { ASSERT_TRUE(edge(v2, g.accept, g).second); } // check if we found our vertex - ASSERT_TRUE(v2 != nullptr); + ASSERT_TRUE(v2 != NGHolder::null_vertex()); } TEST(NFAGraph, RemoveEquivalence_Reports1) { diff --git a/unit/internal/nfagraph_redundancy.cpp b/unit/internal/nfagraph_redundancy.cpp index acb3cc7b..be9527fd 100644 --- a/unit/internal/nfagraph_redundancy.cpp +++ b/unit/internal/nfagraph_redundancy.cpp @@ -55,13 +55,13 @@ TEST(NFAGraph, RemoveRedundancy1) { unique_ptr graph(constructGraphWithCC("(a|b)c", cc, 0)); ASSERT_TRUE(graph.get() != nullptr); + NGHolder &g = *graph; // Run removeRedundancy - removeRedundancy(*graph, SOM_NONE); - NFAGraph &g = graph->g; + removeRedundancy(g, SOM_NONE); // Our graph should only have two non-special nodes - ASSERT_EQ((size_t)N_SPECIALS + 2, num_vertices(*graph)); + ASSERT_EQ((size_t)N_SPECIALS + 2, num_vertices(g)); // Dot-star start state should be connected to itself and a single other // vertex @@ -98,13 +98,13 @@ TEST(NFAGraph, RemoveRedundancy2) { unique_ptr graph(constructGraphWithCC("a.*b?c", cc, HS_FLAG_DOTALL)); ASSERT_TRUE(graph.get() != nullptr); + NGHolder &g = *graph; // Run removeRedundancy - removeRedundancy(*graph, SOM_NONE); - NFAGraph &g = graph->g; + removeRedundancy(g, SOM_NONE); // Our graph should now have only 3 non-special vertices - ASSERT_EQ((size_t)N_SPECIALS + 3, num_vertices(*graph)); + ASSERT_EQ((size_t)N_SPECIALS + 3, num_vertices(g)); // Dot-star start state should be connected to itself and a single other // vertex @@ -156,12 +156,12 @@ TEST(NFAGraph, RemoveRedundancy3) { cc, 0)); ASSERT_TRUE(graph.get() != nullptr); - unsigned countBefore = num_vertices(graph->g); + unsigned countBefore = num_vertices(*graph); removeRedundancy(*graph, SOM_NONE); // The '(a|b)?' construction (two states) should have disappeared, leaving // this expr as 'foobar.*teakettle' - ASSERT_EQ(countBefore - 2, num_vertices(graph->g)); + ASSERT_EQ(countBefore - 2, num_vertices(*graph)); } TEST(NFAGraph, RemoveRedundancy4) { @@ -169,11 +169,11 @@ TEST(NFAGraph, RemoveRedundancy4) { unique_ptr graph(constructGraphWithCC("foo([A-Z]|a|b|q)", cc, 0)); ASSERT_TRUE(graph.get() != nullptr); - unsigned countBefore = num_vertices(graph->g); + unsigned countBefore = num_vertices(*graph); removeRedundancy(*graph, SOM_NONE); // We should end up with the alternation collapsing into one state - ASSERT_EQ(countBefore - 3, num_vertices(graph->g)); + ASSERT_EQ(countBefore - 3, num_vertices(*graph)); } TEST(NFAGraph, RemoveRedundancy5) { @@ -182,12 +182,12 @@ TEST(NFAGraph, RemoveRedundancy5) { cc, 0)); ASSERT_TRUE(graph.get() != nullptr); - unsigned countBefore = num_vertices(graph->g); + unsigned countBefore = num_vertices(*graph); removeRedundancy(*graph, SOM_NONE); // Since we don't return a start offset, the first state ('[0-9]?') is // redundant. - ASSERT_EQ(countBefore - 1, num_vertices(graph->g)); + ASSERT_EQ(countBefore - 1, num_vertices(*graph)); } TEST(NFAGraph, RemoveEdgeRedundancy1) { @@ -196,12 +196,12 @@ TEST(NFAGraph, RemoveEdgeRedundancy1) { auto graph = constructGraphWithCC("A+hatstand", cc, HS_FLAG_DOTALL); ASSERT_TRUE(graph.get() != nullptr); - unsigned countBefore = num_edges(graph->g); + unsigned countBefore = num_edges(*graph); removeEdgeRedundancy(*graph, SOM_NONE, cc); // One edge (the self-loop on the leading A+) should have been removed. - ASSERT_EQ(countBefore - 1, num_edges(graph->g)); + ASSERT_EQ(countBefore - 1, num_edges(*graph)); } TEST(NFAGraph, RemoveEdgeRedundancy2) { @@ -210,12 +210,12 @@ TEST(NFAGraph, RemoveEdgeRedundancy2) { auto graph = constructGraphWithCC("foo.*A*bar", cc, HS_FLAG_DOTALL); ASSERT_TRUE(graph.get() != nullptr); - size_t numEdgesBefore = num_edges(graph->g); - size_t numVertsBefore = num_vertices(graph->g); + size_t numEdgesBefore = num_edges(*graph); + size_t numVertsBefore = num_vertices(*graph); removeEdgeRedundancy(*graph, SOM_NONE, cc); // The .* should swallow up the A* and its self-loop. - ASSERT_EQ(numEdgesBefore - 4, num_edges(graph->g)); - ASSERT_EQ(numVertsBefore - 1, num_vertices(graph->g)); + ASSERT_EQ(numEdgesBefore - 4, num_edges(*graph)); + ASSERT_EQ(numVertsBefore - 1, num_vertices(*graph)); } diff --git a/unit/internal/rose_build_merge.cpp b/unit/internal/rose_build_merge.cpp index 3f5a8382..291c241a 100644 --- a/unit/internal/rose_build_merge.cpp +++ b/unit/internal/rose_build_merge.cpp @@ -64,7 +64,6 @@ RoseVertex addVertex(RoseBuildImpl &build, RoseVertex parent, u32 lit_id) { RoseGraph &g = build.g; RoseVertex v = add_vertex(g); - g[v].idx = build.vertexIndex++; g[v].min_offset = 0; g[v].max_offset = ROSE_BOUND_INF; g[v].literals.insert(lit_id); diff --git a/util/ng_corpus_generator.cpp b/util/ng_corpus_generator.cpp index 9fa6743e..ca7c413a 100644 --- a/util/ng_corpus_generator.cpp +++ b/util/ng_corpus_generator.cpp @@ -144,7 +144,7 @@ void findPaths(const NGHolder &g, CorpusProperties &cProps, ue2::unordered_set one_way_in; for (const auto &v : vertices_range(g)) { - if (!hasGreaterInDegree(1, v, g)) { + if (in_degree(v, g) <= 1) { one_way_in.insert(v); } } @@ -155,7 +155,7 @@ void findPaths(const NGHolder &g, CorpusProperties &cProps, ptr_vector::auto_type p = open.pop_back(); NFAVertex u = p->back(); - DEBUG_PRINTF("dequeuing path %s, back %u\n", + DEBUG_PRINTF("dequeuing path %s, back %zu\n", pathToString(g, *p).c_str(), g[u].index); NGHolder::adjacency_iterator ai, ae; @@ -187,7 +187,7 @@ void findPaths(const NGHolder &g, CorpusProperties &cProps, // Note that vertices that only have one predecessor don't need // their cycle limit checked, as their predecessors will have // the same count. - DEBUG_PRINTF("exceeded cycle limit for v=%u, pruning path\n", + DEBUG_PRINTF("exceeded cycle limit for v=%zu, pruning path\n", g[v].index); continue; } @@ -301,7 +301,7 @@ void CorpusGeneratorImpl::addRandom(const min_max &mm, string *out) { } unsigned char CorpusGeneratorImpl::getChar(NFAVertex v) { - const CharReach &cr = graph.g[v].char_reach; + const CharReach &cr = graph[v].char_reach; switch (cProps.throwDice()) { case CorpusProperties::ROLLED_MATCH: @@ -521,7 +521,7 @@ CorpusGeneratorUtf8::pathToCorpus(const vector &path) { } static -u32 classify_vertex(const NFAGraph &g, NFAVertex v) { +u32 classify_vertex(const NGHolder &g, NFAVertex v) { const CharReach &cr = g[v].char_reach; if (cr.isSubsetOf(UTF_ASCII_CR)) { return 1; @@ -560,7 +560,7 @@ void expandCodePointSet(const CharReach &cr, CodePointSet *out, u32 mask, } static -void decodePath(const NFAGraph &g, const VertexPath &in, +void decodePath(const NGHolder &g, const VertexPath &in, vector &out) { VertexPath::const_iterator it = in.begin(); while (it != in.end()) { @@ -618,7 +618,7 @@ void translatePaths(const NGHolder &graph, assert(out); for (const auto &path : allPathsTemp) { out->push_back(vector()); - decodePath(graph.g, path, out->back()); + decodePath(graph, path, out->back()); } } diff --git a/util/ng_find_matches.cpp b/util/ng_find_matches.cpp index 60ff0a17..2b337365 100644 --- a/util/ng_find_matches.cpp +++ b/util/ng_find_matches.cpp @@ -34,7 +34,7 @@ #include "ng_find_matches.h" -#include "nfagraph/ng_graph.h" +#include "nfagraph/ng_holder.h" #include "nfagraph/ng_util.h" #include "parser/position.h" #include "util/container.h" From 530d84c6f3fdbc4d2d368940b60df2b719bdcf61 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Tue, 30 Aug 2016 16:08:49 +1000 Subject: [PATCH 056/103] allow edge_descriptors to be created from pair --- src/compiler/asserts.cpp | 2 +- src/nfa/castlecompile.cpp | 6 ++--- src/nfa/limex_compile.cpp | 2 +- src/nfagraph/ng_asserts.cpp | 36 +++++++++++---------------- src/nfagraph/ng_builder.cpp | 4 +-- src/nfagraph/ng_equivalence.cpp | 4 +-- src/nfagraph/ng_netflow.cpp | 2 +- src/nfagraph/ng_redundancy.cpp | 11 +++----- src/nfagraph/ng_repeat.cpp | 2 +- src/nfagraph/ng_restructuring.cpp | 2 +- src/nfagraph/ng_rose.cpp | 8 +++--- src/nfagraph/ng_split.cpp | 2 +- src/nfagraph/ng_uncalc_components.cpp | 26 ++++++++----------- src/nfagraph/ng_util.cpp | 15 ++++------- src/nfagraph/ng_violet.cpp | 4 +-- src/rose/rose_build_add.cpp | 20 ++++++--------- src/rose/rose_build_add_mask.cpp | 6 ++--- src/rose/rose_build_compile.cpp | 4 +-- src/rose/rose_build_convert.cpp | 12 +++------ src/rose/rose_build_matchers.cpp | 6 ++--- src/rose/rose_build_merge.cpp | 9 +++---- src/rose/rose_build_role_aliasing.cpp | 24 +++++------------- src/util/ue2_graph.h | 9 +++++++ 23 files changed, 86 insertions(+), 130 deletions(-) diff --git a/src/compiler/asserts.cpp b/src/compiler/asserts.cpp index e67fd8bc..be836b06 100644 --- a/src/compiler/asserts.cpp +++ b/src/compiler/asserts.cpp @@ -174,7 +174,7 @@ void replaceAssertVertex(NGWrapper &g, NFAVertex t, edge_cache_t &edge_cache, auto ecit = edge_cache.find(cache_key); if (ecit == edge_cache.end()) { DEBUG_PRINTF("adding edge %zu %zu\n", g[u].index, g[v].index); - NFAEdge e = add_edge(u, v, g).first; + NFAEdge e = add_edge(u, v, g); edge_cache.emplace(cache_key, e); g[e].assert_flags = flags; if (++assert_edge_count > MAX_ASSERT_EDGES) { diff --git a/src/nfa/castlecompile.cpp b/src/nfa/castlecompile.cpp index fb685f21..a956c92a 100644 --- a/src/nfa/castlecompile.cpp +++ b/src/nfa/castlecompile.cpp @@ -904,7 +904,7 @@ void addToHolder(NGHolder &g, u32 top, const PureRepeat &pr) { u32 min_bound = pr.bounds.min; // always finite if (min_bound == 0) { // Vacuous case, we can only do this once. assert(!edge(g.start, g.accept, g).second); - NFAEdge e = add_edge(g.start, g.accept, g).first; + NFAEdge e = add_edge(g.start, g.accept, g); g[e].tops.insert(top); g[u].reports.insert(pr.reports.begin(), pr.reports.end()); min_bound = 1; @@ -913,7 +913,7 @@ void addToHolder(NGHolder &g, u32 top, const PureRepeat &pr) { for (u32 i = 0; i < min_bound; i++) { NFAVertex v = add_vertex(g); g[v].char_reach = pr.reach; - NFAEdge e = add_edge(u, v, g).first; + NFAEdge e = add_edge(u, v, g); if (u == g.start) { g[e].tops.insert(top); } @@ -932,7 +932,7 @@ void addToHolder(NGHolder &g, u32 top, const PureRepeat &pr) { if (head != u) { add_edge(head, v, g); } - NFAEdge e = add_edge(u, v, g).first; + NFAEdge e = add_edge(u, v, g); if (u == g.start) { g[e].tops.insert(top); } diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index 481113e3..ba4d0f0d 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -544,7 +544,7 @@ void filterAccelStates(NGHolder &g, const map> &tops, // Similarly, connect (start, startDs) if necessary. if (!edge(g.start, g.startDs, g).second) { - auto e = add_edge(g.start, g.startDs, g).first; + NFAEdge e = add_edge(g.start, g.startDs, g); tempEdges.push_back(e); // Remove edge later. } diff --git a/src/nfagraph/ng_asserts.cpp b/src/nfagraph/ng_asserts.cpp index e0d43e7b..c2f0d68f 100644 --- a/src/nfagraph/ng_asserts.cpp +++ b/src/nfagraph/ng_asserts.cpp @@ -377,17 +377,14 @@ void resolveEdges(ReportManager &rm, NGWrapper &g, set *dead) { add_edge(vv, g.accept, g); g[e].assert_flags = 0; add_edge(u, vv, g[e], g); - if (!edge(u, g.acceptEod, g).second) { - add_edge(u, g.acceptEod, g[e], g); - } else { - /* there may already be a different edge from start to eod - * if so we need to make it unconditional and alive - */ - NFAEdge start_eod = edge(u, g.acceptEod, g).first; - + /* there may already be a different edge from start to eod if so + * we need to make it unconditional and alive + */ + if (NFAEdge start_eod = edge(u, g.acceptEod, g)) { g[start_eod].assert_flags = 0; dead->erase(start_eod); - + } else { + add_edge(u, g.acceptEod, g[e], g); } dead->insert(e); } @@ -433,17 +430,14 @@ void resolveEdges(ReportManager &rm, NGWrapper &g, set *dead) { add_edge(vv, g.accept, g); g[e].assert_flags = 0; add_edge(u, vv, g[e], g); - if (!edge(u, g.acceptEod, g).second) { - add_edge(u, g.acceptEod, g[e], g); - } else { - /* there may already be a different edge from start to eod - * if so we need to make it unconditional and alive - */ - NFAEdge start_eod = edge(u, g.acceptEod, g).first; - + /* there may already be a different edge from start to eod if so + * we need to make it unconditional and alive + */ + if (NFAEdge start_eod = edge(u, g.acceptEod, g)) { g[start_eod].assert_flags = 0; dead->erase(start_eod); - + } else { + add_edge(u, g.acceptEod, g[e], g); } dead->insert(e); } @@ -496,10 +490,8 @@ void ensureCodePointStart(ReportManager &rm, NGWrapper &g) { * boundaries. Assert resolution handles the badness coming from asserts. * The only other source of trouble is startDs->accept connections. */ - bool exists; - NFAEdge orig; - tie(orig, exists) = edge(g.startDs, g.accept, g); - if (g.utf8 && exists) { + NFAEdge orig = edge(g.startDs, g.accept, g); + if (g.utf8 && orig) { DEBUG_PRINTF("rectifying %u\n", g.reportId); Report ir = rm.getBasicInternalReport(g); ReportID rep = rm.getInternalId(ir); diff --git a/src/nfagraph/ng_builder.cpp b/src/nfagraph/ng_builder.cpp index 6e1ea71e..4ca0b37e 100644 --- a/src/nfagraph/ng_builder.cpp +++ b/src/nfagraph/ng_builder.cpp @@ -194,9 +194,7 @@ pair NFABuilderImpl::addEdge(NFAVertex u, NFAVertex v) { // assert that the edge doesn't already exist assert(edge(u, v, *graph).second == false); - pair e = add_edge(u, v, *graph); - assert(e.second); - return e; + return add_edge(u, v, *graph); } void NFABuilderImpl::addEdge(Position startPos, Position endPos) { diff --git a/src/nfagraph/ng_equivalence.cpp b/src/nfagraph/ng_equivalence.cpp index 7e1f7c6f..32a392a6 100644 --- a/src/nfagraph/ng_equivalence.cpp +++ b/src/nfagraph/ng_equivalence.cpp @@ -564,7 +564,7 @@ void mergeClass(vector> &infos, NGHolder &g, pred_info->succ.erase(old_vertex_info); // if edge doesn't exist, create it - NFAEdge e = add_edge_if_not_present(pred_info->v, new_v, g).first; + NFAEdge e = add_edge_if_not_present(pred_info->v, new_v, g); // put edge tops, if applicable if (!edgetops.empty()) { @@ -576,7 +576,7 @@ void mergeClass(vector> &infos, NGHolder &g, if (new_v_eod) { NFAEdge ee = add_edge_if_not_present(pred_info->v, new_v_eod, - g).first; + g); // put edge tops, if applicable if (!edgetops.empty()) { diff --git a/src/nfagraph/ng_netflow.cpp b/src/nfagraph/ng_netflow.cpp index 4859d864..cff26358 100644 --- a/src/nfagraph/ng_netflow.cpp +++ b/src/nfagraph/ng_netflow.cpp @@ -92,7 +92,7 @@ void addReverseEdges(NGHolder &g, vector &reverseEdge, if (it == allEdges.end()) { // No reverse edge, add one. NFAVertex u = source(fwd, g), v = target(fwd, g); - NFAEdge rev = add_edge(v, u, g).first; + NFAEdge rev = add_edge(v, u, g); it = allEdges.insert(make_pair(make_pair(vidx, uidx), rev)).first; // Add to capacity map. u32 revIndex = g[rev].index; diff --git a/src/nfagraph/ng_redundancy.cpp b/src/nfagraph/ng_redundancy.cpp index 4ca695d8..76bc93da 100644 --- a/src/nfagraph/ng_redundancy.cpp +++ b/src/nfagraph/ng_redundancy.cpp @@ -307,10 +307,8 @@ void markForRemoval(const NFAVertex v, VertexInfoMap &infoMap, static bool hasInEdgeTops(const NGHolder &g, NFAVertex v) { - bool exists; - NFAEdge e; - tie(e, exists) = edge(g.start, v, g); - return exists && !g[e].tops.empty(); + NFAEdge e = edge(g.start, v, g); + return e && !g[e].tops.empty(); } /** Transform (1), removal of redundant vertices. */ @@ -737,11 +735,10 @@ u32 findCyclic(const NGHolder &g, vector &cyclic) { for (auto v : vertices_range(g)) { assert(g[v].index < cyclic.size()); - bool c = edge(v, v, g).second; - if (c) { + if (hasSelfLoop(v, g)) { count++; + cyclic[g[v].index] = true; } - cyclic[g[v].index] = c; } return count; diff --git a/src/nfagraph/ng_repeat.cpp b/src/nfagraph/ng_repeat.cpp index 0aa6dc4b..a16e2715 100644 --- a/src/nfagraph/ng_repeat.cpp +++ b/src/nfagraph/ng_repeat.cpp @@ -1121,7 +1121,7 @@ NFAVertex buildTriggerStates(NGHolder &g, const vector &trigger, g[v].char_reach = cr; add_edge(u, v, g); if (u == g.start) { - g[edge(u, v, g).first].tops.insert(top); + g[edge(u, v, g)].tops.insert(top); } u = v; } diff --git a/src/nfagraph/ng_restructuring.cpp b/src/nfagraph/ng_restructuring.cpp index 7bb3e991..32cdac23 100644 --- a/src/nfagraph/ng_restructuring.cpp +++ b/src/nfagraph/ng_restructuring.cpp @@ -55,7 +55,7 @@ void wireStartToTops(NGHolder &g, const flat_set &tops, for (NFAVertex v : tops) { assert(!isLeafNode(v, g)); - const NFAEdge &e = add_edge(g.start, v, g).first; + const NFAEdge &e = add_edge(g.start, v, g); tempEdges.push_back(e); } } diff --git a/src/nfagraph/ng_rose.cpp b/src/nfagraph/ng_rose.cpp index b3649ce0..1c4163ce 100644 --- a/src/nfagraph/ng_rose.cpp +++ b/src/nfagraph/ng_rose.cpp @@ -871,7 +871,7 @@ u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, clearReports(g); for (auto v : pred) { - NFAEdge e = add_edge(v, g.accept, g).first; + NFAEdge e = add_edge(v, g.accept, g); g[v].reports.insert(0); if (is_triggered(g) && v == g.start) { g[e].tops.insert(DEFAULT_TOP); @@ -904,7 +904,7 @@ void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, } for (auto v : preds) { - NFAEdge e = add_edge(v, prev, g).first; + NFAEdge e = add_edge(v, prev, g); if (v == g.start && is_triggered(g)) { g[e].tops.insert(DEFAULT_TOP); } @@ -2408,14 +2408,14 @@ void explodeLiteral(RoseInGraph &g, RoseInVertex v, g[v_new].s = lit; for (const auto &e : in_edges_range(v, g)) { - RoseInEdge e2 = add_edge(source(e, g), v_new, g[e], g).first; + RoseInEdge e2 = add_edge(source(e, g), v_new, g[e], g); // FIXME: are we safe to share graphs here? For now, make our very // own copy. g[e2].graph = makeGraphCopy(g[e].graph.get()); } for (const auto &e : out_edges_range(v, g)) { - RoseInEdge e2 = add_edge(v_new, target(e, g), g[e], g).first; + RoseInEdge e2 = add_edge(v_new, target(e, g), g[e], g); // FIXME: are we safe to share graphs here? For now, make our very // own copy. g[e2].graph = makeGraphCopy(g[e].graph.get()); diff --git a/src/nfagraph/ng_split.cpp b/src/nfagraph/ng_split.cpp index ce267d0f..3c2baee4 100644 --- a/src/nfagraph/ng_split.cpp +++ b/src/nfagraph/ng_split.cpp @@ -151,7 +151,7 @@ void splitRHS(const NGHolder &base, const vector &pivots, for (auto pivot : pivots) { assert(contains(*rhs_map, pivot)); - NFAEdge e = add_edge(rhs->start, (*rhs_map)[pivot], *rhs).first; + NFAEdge e = add_edge(rhs->start, (*rhs_map)[pivot], *rhs); (*rhs)[e].tops.insert(DEFAULT_TOP); } diff --git a/src/nfagraph/ng_uncalc_components.cpp b/src/nfagraph/ng_uncalc_components.cpp index baab3b0f..877c396c 100644 --- a/src/nfagraph/ng_uncalc_components.cpp +++ b/src/nfagraph/ng_uncalc_components.cpp @@ -197,12 +197,9 @@ u32 commonPrefixLength(const NGHolder &ga, const ranking_info &a_ranking, a_count++; - NFAEdge b_edge; - bool has_b_edge; - tie(b_edge, has_b_edge) = edge(b_ranking.at(i), - b_ranking.at(sid), gb); + NFAEdge b_edge = edge(b_ranking.at(i), b_ranking.at(sid), gb); - if (!has_b_edge) { + if (!b_edge) { max = i; DEBUG_PRINTF("lowering max to %u due to edge %zu->%u\n", max, i, sid); @@ -322,7 +319,7 @@ void mergeNfaComponent(NGHolder &dest, const NGHolder &vic, size_t common_len) { DEBUG_PRINTF("skipping common edge\n"); assert(edge(u, v, dest).second); // Should never merge edges with different top values. - assert(vic[e].tops == dest[edge(u, v, dest).first].tops); + assert(vic[e].tops == dest[edge(u, v, dest)].tops); continue; } else { assert(is_any_accept(v, dest)); @@ -508,25 +505,22 @@ bool mergeableStarts(const NGHolder &h1, const NGHolder &h2) { /* TODO: relax top checks if reports match */ // If both graphs have edge (start, accept), the tops must match. - auto e1_accept = edge(h1.start, h1.accept, h1); - auto e2_accept = edge(h2.start, h2.accept, h2); - if (e1_accept.second && e2_accept.second && - h1[e1_accept.first].tops != h2[e2_accept.first].tops) { + NFAEdge e1_accept = edge(h1.start, h1.accept, h1); + NFAEdge e2_accept = edge(h2.start, h2.accept, h2); + if (e1_accept && e2_accept && h1[e1_accept].tops != h2[e2_accept].tops) { return false; } // If both graphs have edge (start, acceptEod), the tops must match. - auto e1_eod = edge(h1.start, h1.acceptEod, h1); - auto e2_eod = edge(h2.start, h2.acceptEod, h2); - if (e1_eod.second && e2_eod.second && - h1[e1_eod.first].tops != h2[e2_eod.first].tops) { + NFAEdge e1_eod = edge(h1.start, h1.acceptEod, h1); + NFAEdge e2_eod = edge(h2.start, h2.acceptEod, h2); + if (e1_eod && e2_eod && h1[e1_eod].tops != h2[e2_eod].tops) { return false; } // If one graph has an edge to accept and the other has an edge to // acceptEod, the reports must match for the merge to be safe. - if ((e1_accept.second && e2_eod.second) || - (e2_accept.second && e1_eod.second)) { + if ((e1_accept && e2_eod) || (e2_accept && e1_eod)) { if (h1[h1.start].reports != h2[h2.start].reports) { return false; } diff --git a/src/nfagraph/ng_util.cpp b/src/nfagraph/ng_util.cpp index ad40debe..948cd7f1 100644 --- a/src/nfagraph/ng_util.cpp +++ b/src/nfagraph/ng_util.cpp @@ -146,7 +146,7 @@ void clone_out_edges(NGHolder &g, NFAVertex source, NFAVertex dest) { if (edge(dest, t, g).second) { continue; } - NFAEdge clone = add_edge(dest, t, g).first; + NFAEdge clone = add_edge(dest, t, g); u32 idx = g[clone].index; g[clone] = g[e]; g[clone].index = idx; @@ -157,7 +157,7 @@ void clone_in_edges(NGHolder &g, NFAVertex s, NFAVertex dest) { for (const auto &e : in_edges_range(s, g)) { NFAVertex ss = source(e, g); assert(!edge(ss, dest, g).second); - NFAEdge clone = add_edge(ss, dest, g).first; + NFAEdge clone = add_edge(ss, dest, g); u32 idx = g[clone].index; g[clone] = g[e]; g[clone].index = idx; @@ -324,11 +324,9 @@ bool can_only_match_at_eod(const NGHolder &g) { } bool matches_everywhere(const NGHolder &h) { - NFAEdge e; - bool exists; - tie(e, exists) = edge(h.startDs, h.accept, h); + NFAEdge e = edge(h.startDs, h.accept, h); - return exists && !h[e].assert_flags; + return e && !h[e].assert_flags; } bool is_virtual_start(NFAVertex v, const NGHolder &g) { @@ -623,10 +621,7 @@ void cloneHolder(NGHolder &out, const NGHolder &in) { NFAVertex s = out_mapping[si]; NFAVertex t = out_mapping[ti]; - UNUSED bool added; - NFAEdge e2; - tie(e2, added) = add_edge(s, t, out); - assert(added); + NFAEdge e2 = add_edge(s, t, out); out[e2] = in[e]; } diff --git a/src/nfagraph/ng_violet.cpp b/src/nfagraph/ng_violet.cpp index 9e50ea3d..985246f0 100644 --- a/src/nfagraph/ng_violet.cpp +++ b/src/nfagraph/ng_violet.cpp @@ -1153,7 +1153,7 @@ void splitEdgesByCut(NGHolder &h, RoseInGraph &vg, * makes a more svelte graphy */ clear_in_edges(temp_map[pivot], *new_lhs); NFAEdge pivot_edge = add_edge(temp_map[prev_v], temp_map[pivot], - *new_lhs).first; + *new_lhs); if (is_triggered(h) && prev_v == h.start) { (*new_lhs)[pivot_edge].tops.insert(DEFAULT_TOP); } @@ -2125,7 +2125,7 @@ void splitEdgesForSuffix(const NGHolder &base_graph, RoseInGraph &vg, add_edge(lhs->accept, lhs->acceptEod, *lhs); clearReports(*lhs); for (NFAVertex v : splitters) { - NFAEdge e = add_edge(v_map[v], lhs->accept, *lhs).first; + NFAEdge e = add_edge(v_map[v], lhs->accept, *lhs); if (v == base_graph.start) { (*lhs)[e].tops.insert(DEFAULT_TOP); } diff --git a/src/rose/rose_build_add.cpp b/src/rose/rose_build_add.cpp index e185bb37..8b10bc7d 100644 --- a/src/rose/rose_build_add.cpp +++ b/src/rose/rose_build_add.cpp @@ -136,10 +136,7 @@ RoseVertex createVertex(RoseBuildImpl *build, const RoseVertex parent, /* fill in report information */ g[v].reports.insert(reports.begin(), reports.end()); - RoseEdge e; - bool added; - tie(e, added) = add_edge(parent, v, g); - assert(added); + RoseEdge e = add_edge(parent, v, g); DEBUG_PRINTF("adding edge (%u, %u) to parent\n", minBound, maxBound); g[e].minBound = minBound; @@ -169,7 +166,7 @@ RoseVertex createAnchoredVertex(RoseBuildImpl *build, u32 literalId, DEBUG_PRINTF("created anchored vertex %zu with lit id %u\n", g[v].index, literalId); - RoseEdge e = add_edge(build->anchored_root, v, g).first; + RoseEdge e = add_edge(build->anchored_root, v, g); g[e].minBound = min_offset; g[e].maxBound = max_offset; @@ -315,10 +312,7 @@ void createVertices(RoseBuildImpl *tbi, RoseVertex p = pv.first; - RoseEdge e; - bool added; - tie(e, added) = add_edge(p, w, g); - assert(added); + RoseEdge e = add_edge(p, w, g); DEBUG_PRINTF("adding edge (%u,%u) to parent\n", edge_props.minBound, edge_props.maxBound); g[e].minBound = edge_props.minBound; @@ -356,7 +350,7 @@ void createVertices(RoseBuildImpl *tbi, for (const auto &pv : parents) { const RoseInEdgeProps &edge_props = bd.ig[pv.second]; - RoseEdge e = add_edge(pv.first, g_v, tbi->g).first; + RoseEdge e = add_edge(pv.first, g_v, tbi->g); g[e].minBound = edge_props.minBound; g[e].maxBound = edge_props.maxBound; g[e].history = selectHistory(*tbi, bd, pv.second, e); @@ -709,7 +703,7 @@ void makeEodEventLeftfix(RoseBuildImpl &build, RoseVertex u, g[v].left.graph = eod_leftfix; g[v].left.leftfix_report = report_mapping.second; g[v].left.lag = 0; - RoseEdge e1 = add_edge(u, v, g).first; + RoseEdge e1 = add_edge(u, v, g); g[e1].minBound = 0; g[e1].maxBound = ROSE_BOUND_INF; g[v].min_offset = add_rose_depth(g[u].min_offset, @@ -729,7 +723,7 @@ void makeEodEventLeftfix(RoseBuildImpl &build, RoseVertex u, g[w].reports = report_mapping.first; g[w].min_offset = g[v].min_offset; g[w].max_offset = g[v].max_offset; - RoseEdge e = add_edge(v, w, g).first; + RoseEdge e = add_edge(v, w, g); g[e].minBound = 0; g[e].maxBound = 0; g[e].history = ROSE_ROLE_HISTORY_LAST_BYTE; @@ -803,7 +797,7 @@ void doRoseAcceptVertex(RoseBuildImpl *tbi, g[w].reports = ig[iv].reports; g[w].min_offset = g[u].min_offset; g[w].max_offset = g[u].max_offset; - RoseEdge e = add_edge(u, w, g).first; + RoseEdge e = add_edge(u, w, g); g[e].minBound = 0; g[e].maxBound = 0; g[e].history = ROSE_ROLE_HISTORY_LAST_BYTE; diff --git a/src/rose/rose_build_add_mask.cpp b/src/rose/rose_build_add_mask.cpp index f46e1004..de3bdf0a 100644 --- a/src/rose/rose_build_add_mask.cpp +++ b/src/rose/rose_build_add_mask.cpp @@ -532,7 +532,7 @@ void addTransientMask(RoseBuildImpl &build, const vector &mask, g[v].left.leftfix_report = mask_report; } else { // Make sure our edge bounds are correct. - auto e = edge(parent, v, g).first; + RoseEdge e = edge(parent, v, g); g[e].minBound = 0; g[e].maxBound = anchored ? 0 : ROSE_BOUND_INF; g[e].history = anchored ? ROSE_ROLE_HISTORY_ANCH @@ -544,7 +544,7 @@ void addTransientMask(RoseBuildImpl &build, const vector &mask, g[v].max_offset = v_max_offset; if (eod) { - auto e = add_edge(v, eod_v, g).first; + RoseEdge e = add_edge(v, eod_v, g); g[e].minBound = 0; g[e].maxBound = 0; g[e].history = ROSE_ROLE_HISTORY_LAST_BYTE; @@ -574,7 +574,7 @@ unique_ptr buildMaskRhs(const ue2::flat_set &reports, succ = u; } - NFAEdge e = add_edge(h.start, succ, h).first; + NFAEdge e = add_edge(h.start, succ, h); h[e].tops.insert(DEFAULT_TOP); return rhs; diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index 2f1af8a4..e13d7c5c 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -1312,7 +1312,7 @@ void addSmallBlockLiteral(RoseBuildImpl &tbi, const simple_anchored_info &sai, g[v].max_offset = sai.max_bound + sai.literal.length(); lit_info.vertices.insert(v); - RoseEdge e = add_edge(anchored_root, v, g).first; + RoseEdge e = add_edge(anchored_root, v, g); g[e].minBound = sai.min_bound; g[e].maxBound = sai.max_bound; } @@ -1336,7 +1336,7 @@ void addSmallBlockLiteral(RoseBuildImpl &tbi, const ue2_literal &lit, g[v].literals.insert(lit_id); g[v].reports = reports; - RoseEdge e = add_edge(tbi.root, v, g).first; + RoseEdge e = add_edge(tbi.root, v, g); g[e].minBound = 0; g[e].maxBound = ROSE_BOUND_INF; g[v].min_offset = 1; diff --git a/src/rose/rose_build_convert.cpp b/src/rose/rose_build_convert.cpp index ae08b7cb..b151c0c9 100644 --- a/src/rose/rose_build_convert.cpp +++ b/src/rose/rose_build_convert.cpp @@ -394,7 +394,7 @@ unique_ptr makeFloodProneSuffix(const ue2_literal &s, size_t len, NFAVertex u = h->start; for (auto it = s.begin() + s.length() - len; it != s.end(); ++it) { NFAVertex v = addHolderVertex(*it, *h); - NFAEdge e = add_edge(u, v, *h).first; + NFAEdge e = add_edge(u, v, *h); if (u == h->start) { (*h)[e].tops.insert(DEFAULT_TOP); } @@ -705,10 +705,7 @@ bool handleStartPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v, assert(g[e_old].maxBound >= bound_max); setEdgeBounds(g, e_old, bound_min, bound_max); } else { - RoseEdge e_new; - UNUSED bool added; - tie(e_new, added) = add_edge(ar, v, g); - assert(added); + RoseEdge e_new = add_edge(ar, v, g); setEdgeBounds(g, e_new, bound_min, bound_max); to_delete->push_back(e_old); } @@ -900,10 +897,7 @@ bool handleMixedPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v, if (source(e_old, g) == ar) { setEdgeBounds(g, e_old, ri.repeatMin + width, ri.repeatMax + width); } else { - RoseEdge e_new; - UNUSED bool added; - tie(e_new, added) = add_edge(ar, v, g); - assert(added); + RoseEdge e_new = add_edge(ar, v, g); setEdgeBounds(g, e_new, ri.repeatMin + width, ri.repeatMax + width); to_delete->push_back(e_old); } diff --git a/src/rose/rose_build_matchers.cpp b/src/rose/rose_build_matchers.cpp index f9251b8a..01633c06 100644 --- a/src/rose/rose_build_matchers.cpp +++ b/src/rose/rose_build_matchers.cpp @@ -454,11 +454,9 @@ bool isNoRunsVertex(const RoseBuildImpl &build, RoseVertex u) { return false; } - RoseEdge e; - bool exists; - tie(e, exists) = edge(build.root, u, g); + RoseEdge e = edge(build.root, u, g); - if (!exists) { + if (!e) { DEBUG_PRINTF("u=%zu is not a root role\n", g[u].index); return false; } diff --git a/src/rose/rose_build_merge.cpp b/src/rose/rose_build_merge.cpp index 01db84a1..2643bdca 100644 --- a/src/rose/rose_build_merge.cpp +++ b/src/rose/rose_build_merge.cpp @@ -235,18 +235,15 @@ void mergeDupeLeaves(RoseBuildImpl &tbi) { for (const auto &e : in_edges_range(v, g)) { RoseVertex u = source(e, g); DEBUG_PRINTF("u index=%zu\n", g[u].index); - RoseEdge et; - bool exists; - tie (et, exists) = edge(u, t, g); - if (exists) { + if (RoseEdge et = edge(u, t, g)) { if (g[et].minBound <= g[e].minBound && g[et].maxBound >= g[e].maxBound) { DEBUG_PRINTF("remove more constrained edge\n"); deadEdges.push_back(e); } } else { - DEBUG_PRINTF("rehome edge: add %zu->%zu\n", - g[u].index, g[t].index); + DEBUG_PRINTF("rehome edge: add %zu->%zu\n", g[u].index, + g[t].index); add_edge(u, t, g[e], g); deadEdges.push_back(e); } diff --git a/src/rose/rose_build_role_aliasing.cpp b/src/rose/rose_build_role_aliasing.cpp index 099e3e7a..c6139097 100644 --- a/src/rose/rose_build_role_aliasing.cpp +++ b/src/rose/rose_build_role_aliasing.cpp @@ -254,10 +254,8 @@ bool samePredecessors(RoseVertex a, RoseVertex b, const RoseGraph &g) { } for (const auto &e_a : in_edges_range(a, g)) { - bool exists; - RoseEdge e; - tie(e, exists) = edge(source(e_a, g), b, g); - if (!exists || g[e].rose_top != g[e_a].rose_top) { + RoseEdge e = edge(source(e_a, g), b, g); + if (!e || g[e].rose_top != g[e_a].rose_top) { DEBUG_PRINTF("bad tops\n"); return false; } @@ -271,10 +269,7 @@ static bool hasCommonSuccWithBadBounds(RoseVertex a, RoseVertex b, const RoseGraph &g) { for (const auto &e_a : out_edges_range(a, g)) { - bool exists; - RoseEdge e; - tie(e, exists) = edge(b, target(e_a, g), g); - if (exists) { + if (RoseEdge e = edge(b, target(e_a, g), g)) { if (g[e_a].maxBound < g[e].minBound || g[e].maxBound < g[e_a].minBound) { return true; @@ -293,10 +288,7 @@ static bool hasCommonPredWithBadBounds(RoseVertex a, RoseVertex b, const RoseGraph &g) { for (const auto &e_a : in_edges_range(a, g)) { - bool exists; - RoseEdge e; - tie(e, exists) = edge(source(e_a, g), b, g); - if (exists) { + if (RoseEdge e = edge(source(e_a, g), b, g)) { if (g[e_a].maxBound < g[e].minBound || g[e].maxBound < g[e_a].minBound) { return true; @@ -744,10 +736,7 @@ bool hasCommonPredWithDiffRoses(RoseVertex a, RoseVertex b, const bool equal_roses = hasEqualLeftfixes(a, b, g); for (const auto &e_a : in_edges_range(a, g)) { - bool exists; - RoseEdge e; - tie(e, exists) = edge(source(e_a, g), b, g); - if (exists) { + if (RoseEdge e = edge(source(e_a, g), b, g)) { DEBUG_PRINTF("common pred, e_r=%d r_t %u,%u\n", (int)equal_roses, g[e].rose_top, g[e_a].rose_top); if (!equal_roses) { @@ -1122,8 +1111,7 @@ bool attemptRoseCastleMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, // We should be protected from merging common preds with tops leading // to completely different repeats by earlier checks, but just in // case... - if (edge(source(e, g), a, g).second) { - RoseEdge a_edge = edge(source(e, g), a, g).first; + if (RoseEdge a_edge = edge(source(e, g), a, g)) { u32 a_top = g[a_edge].rose_top; const PureRepeat &a_pr = m_castle->repeats[a_top]; // new report if (pr != a_pr) { diff --git a/src/util/ue2_graph.h b/src/util/ue2_graph.h index 07c24746..7751e332 100644 --- a/src/util/ue2_graph.h +++ b/src/util/ue2_graph.h @@ -328,6 +328,15 @@ public: edge_descriptor() : p(nullptr), serial(0) { } explicit edge_descriptor(edge_node *pp) : p(pp), serial(pp->serial) { } + /* Convenice ctor to allow us to directly get an edge_descriptor from + * edge() and add_edge(). As we have null_edges and we always allow + * parallel edges, the bool component of the return from these functions + * is not required. */ + edge_descriptor(const std::pair &tup) + : p(tup.first.p), serial(tup.first.serial) { + assert(tup.second == (bool)tup.first); + } + operator bool() const { return p; } bool operator<(const edge_descriptor b) const { if (p && b.p) { From 1614c73eeb18a1feb81138eca9bb704f77cd5dab Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Thu, 15 Sep 2016 15:44:35 +1000 Subject: [PATCH 057/103] Implement some ue2_graph functions using TMP rather than friends This helps work around issues with some compilers --- src/util/ue2_graph.h | 568 +++++++++++++++++++++++++++++-------------- 1 file changed, 390 insertions(+), 178 deletions(-) diff --git a/src/util/ue2_graph.h b/src/util/ue2_graph.h index 7751e332..9634b032 100644 --- a/src/util/ue2_graph.h +++ b/src/util/ue2_graph.h @@ -318,6 +318,7 @@ public: } private: + vertex_node *raw(void) { return p; } vertex_node *p; u64a serial; friend ue2_graph; @@ -357,18 +358,13 @@ public: } private: + edge_node *raw(void) { return p; } edge_node *p; u64a serial; friend ue2_graph; }; private: - static - vertex_node *raw(vertex_descriptor v) { return v.p; } - - static - edge_node *raw(edge_descriptor e) { return e.p; } - /* Note: apparently, nested class templates cannot be fully specialised but * they can be partially specialised. Sigh, ... */ template @@ -489,13 +485,13 @@ public: if (main == main_end) { return; } - std::tie(aux, aux_end) = out_edges_i(*main); + std::tie(aux, aux_end) = out_edges_impl(*main); while (aux == aux_end) { ++main; if (main == main_end) { break; } - std::tie(aux, aux_end) = out_edges_i(*main); + std::tie(aux, aux_end) = out_edges_impl(*main); } } edge_iterator() { } @@ -508,7 +504,7 @@ public: if (main == main_end) { break; } - std::tie(aux, aux_end) = out_edges_i(*main); + std::tie(aux, aux_end) = out_edges_impl(*main); } } bool equal(const edge_iterator &other) const { @@ -524,104 +520,87 @@ public: aux_base_iter_type aux_end; }; -private: - static - std::pair - out_edges_i(vertex_descriptor v) { - return {out_edge_iterator(raw(v)->out_edge_list.begin()), - out_edge_iterator(raw(v)->out_edge_list.end())}; - } - public: static vertex_descriptor null_vertex() { return vertex_descriptor(); } - friend - vertex_descriptor add_vertex(Graph &g) { - vertex_node *v = new vertex_node(g.new_serial()); - v->props.index = g.next_vertex_index++; - g.vertices_list.push_back(*v); + vertex_descriptor add_vertex_impl() { + vertex_node *v = new vertex_node(new_serial()); + v->props.index = next_vertex_index++; + vertices_list.push_back(*v); return vertex_descriptor(v); } - friend - void remove_vertex(vertex_descriptor v, Graph &g) { - vertex_node *vv = Graph::raw(v); + void remove_vertex_impl(vertex_descriptor v) { + vertex_node *vv = v.raw(); assert(vv->in_edge_list.empty()); assert(vv->out_edge_list.empty()); - g.vertices_list.erase_and_dispose(g.vertices_list.iterator_to(*vv), - delete_disposer()); + vertices_list.erase_and_dispose(vertices_list.iterator_to(*vv), + delete_disposer()); } - friend - void clear_in_edges(vertex_descriptor v, Graph &g) { - g.graph_edge_count -= Graph::raw(v)->in_edge_list.size(); - Graph::raw(v)->in_edge_list.clear_and_dispose(in_edge_disposer()); + void clear_in_edges_impl(vertex_descriptor v) { + graph_edge_count -= v.raw()->in_edge_list.size(); + v.raw()->in_edge_list.clear_and_dispose(in_edge_disposer()); } - friend - void clear_out_edges(vertex_descriptor v, Graph &g) { - g.graph_edge_count -= Graph::raw(v)->out_edge_list.size(); - Graph::raw(v)->out_edge_list.clear_and_dispose(out_edge_disposer()); - } - - friend - void clear_vertex(vertex_descriptor v, Graph &g) { - clear_in_edges(v, g); - clear_out_edges(v, g); + void clear_out_edges_impl(vertex_descriptor v) { + graph_edge_count -= v.raw()->out_edge_list.size(); + v.raw()->out_edge_list.clear_and_dispose(out_edge_disposer()); } /* IncidenceGraph concept functions */ - friend - vertex_descriptor source(edge_descriptor e, const Graph &) { - return vertex_descriptor(Graph::raw(e)->source); + static + vertex_descriptor source_impl(edge_descriptor e) { + return vertex_descriptor(e.raw()->source); } - friend - vertex_descriptor target(edge_descriptor e, const Graph &) { - return vertex_descriptor(Graph::raw(e)->target); + static + vertex_descriptor target_impl(edge_descriptor e) { + return vertex_descriptor(e.raw()->target); } - friend - degree_size_type out_degree(vertex_descriptor v, const Graph &) { - return Graph::raw(v)->out_edge_list.size(); + static + degree_size_type out_degree_impl(vertex_descriptor v) { + return v.raw()->out_edge_list.size(); } - friend + static std::pair - out_edges(vertex_descriptor v, const Graph &) { - return Graph::out_edges_i(v); + out_edges_impl(vertex_descriptor v) { + return {out_edge_iterator(v.raw()->out_edge_list.begin()), + out_edge_iterator(v.raw()->out_edge_list.end())}; } /* BidirectionalGraph concept functions */ - friend - degree_size_type in_degree(vertex_descriptor v, const Graph &) { - return Graph::raw(v)->in_edge_list.size(); + static + degree_size_type in_degree_impl(vertex_descriptor v) { + return v.raw()->in_edge_list.size(); } - friend + static std::pair - in_edges(vertex_descriptor v, const Graph &) { - return {in_edge_iterator(Graph::raw(v)->in_edge_list.begin()), - in_edge_iterator(Graph::raw(v)->in_edge_list.end())}; + in_edges_impl(vertex_descriptor v) { + return {in_edge_iterator(v.raw()->in_edge_list.begin()), + in_edge_iterator(v.raw()->in_edge_list.end())}; } /* Note: this is defined so that self loops are counted twice - which may or * may not be what you want. Actually, you probably don't want this at * all. */ - friend - degree_size_type degree(vertex_descriptor v, const Graph &g) { - return in_degree(v, g) + out_degree(v, g); + static + degree_size_type degree_impl(vertex_descriptor v) { + return in_degree_impl(v) + out_degree_impl(v); } /* AdjacencyList concept functions */ - friend + static std::pair - adjacent_vertices(vertex_descriptor v, const Graph &g) { - auto out_edge_its = out_edges(v, g); + adjacent_vertices_impl(vertex_descriptor v) { + auto out_edge_its = out_edges_impl(v); return {adjacency_iterator(out_edge_its.first), adjacency_iterator(out_edge_its.second)}; } @@ -629,18 +608,17 @@ public: /* AdjacencyMatrix concept functions * (Note: complexity guarantee is not met) */ - friend - std::pair edge(vertex_descriptor u, - vertex_descriptor v, const Graph &g) { - if (in_degree(v, g) < out_degree(u, g)) { - for (const edge_descriptor &e : in_edges_range(v, g)) { - if (source(e, g) == u) { + std::pair edge_impl(vertex_descriptor u, + vertex_descriptor v) const { + if (in_degree_impl(v) < out_degree_impl(u)) { + for (const edge_descriptor &e : in_edges_range(v, *this)) { + if (source_impl(e) == u) { return {e, true}; } } } else { - for (const edge_descriptor &e : out_edges_range(u, g)) { - if (target(e, g) == v) { + for (const edge_descriptor &e : out_edges_range(u, *this)) { + if (target_impl(e) == v) { return {e, true}; } } @@ -654,89 +632,78 @@ public: static edge_descriptor null_edge() { return edge_descriptor(); } - friend + static std::pair - inv_adjacent_vertices(vertex_descriptor v, const Graph &g) { - auto in_edge_its = in_edges(v, g); + inv_adjacent_vertices_impl(vertex_descriptor v) { + auto in_edge_its = in_edges_impl(v); return {inv_adjacency_iterator(in_edge_its.first), inv_adjacency_iterator(in_edge_its.second)}; } /* MutableGraph concept functions */ - friend std::pair - add_edge(vertex_descriptor u, vertex_descriptor v, Graph &g) { + add_edge_impl(vertex_descriptor u, vertex_descriptor v) { bool added = true; /* we always allow parallel edges */ - edge_node *e = new edge_node(g.new_serial()); - e->source = Graph::raw(u); - e->target = Graph::raw(v); - e->props.index = g.next_edge_index++; + edge_node *e = new edge_node(new_serial()); + e->source = u.raw(); + e->target = v.raw(); + e->props.index = next_edge_index++; - Graph::raw(u)->out_edge_list.push_back(*e); - Graph::raw(v)->in_edge_list.push_back(*e); + u.raw()->out_edge_list.push_back(*e); + v.raw()->in_edge_list.push_back(*e); - g.graph_edge_count++; + graph_edge_count++; return {edge_descriptor(e), added}; } - friend - void remove_edge(edge_descriptor e, Graph &g) { - g.graph_edge_count--; + void remove_edge_impl(edge_descriptor e) { + graph_edge_count--; - vertex_node *u = Graph::raw(source(e, g)); - vertex_node *v = Graph::raw(target(e, g)); + vertex_node *u = e.raw()->source; + vertex_node *v = e.raw()->target; - v->in_edge_list.erase(v->in_edge_list.iterator_to(*Graph::raw(e))); - u->out_edge_list.erase(u->out_edge_list.iterator_to(*Graph::raw(e))); + v->in_edge_list.erase(v->in_edge_list.iterator_to(*e.raw())); + u->out_edge_list.erase(u->out_edge_list.iterator_to(*e.raw())); - delete Graph::raw(e); - } - - template - friend - void remove_edge(Iter it, Graph &g) { - remove_edge(*it, g); + delete e.raw(); } template - friend - void remove_out_edge_if(vertex_descriptor v, Predicate pred, Graph &g) { + void remove_out_edge_if_impl(vertex_descriptor v, Predicate pred) { out_edge_iterator it, ite; - std::tie(it, ite) = out_edges(v, g); + std::tie(it, ite) = out_edges_impl(v); while (it != ite) { auto jt = it; ++it; if (pred(*jt)) { - remove_edge(*jt, g); + this->remove_edge_impl(*jt); } } } template - friend - void remove_in_edge_if(vertex_descriptor v, Predicate pred, Graph &g) { + void remove_in_edge_if_impl(vertex_descriptor v, Predicate pred) { in_edge_iterator it, ite; - std::tie(it, ite) = in_edges(v, g); + std::tie(it, ite) = in_edges_impl(v); while (it != ite) { auto jt = it; ++it; if (pred(*jt)) { - remove_edge(*jt, g); + remove_edge_impl(*jt); } } } template - friend - void remove_edge_if(Predicate pred, Graph &g) { + void remove_edge_if_impl(Predicate pred) { edge_iterator it, ite; - std::tie(it, ite) = edges(g); + std::tie(it, ite) = edges_impl(); while (it != ite) { auto jt = it; ++it; if (pred(*jt)) { - remove_edge(*jt, g); + remove_edge_impl(*jt); } } } @@ -744,62 +711,50 @@ public: private: /* GCC 4.8 has bugs with lambdas in templated friend functions, so: */ struct source_match { - source_match(const vertex_descriptor &uu, const Graph &gg) - : u(uu), g(gg) { } - bool operator()(edge_descriptor e) const { return source(e, g) == u; } + explicit source_match(const vertex_descriptor &uu) : u(uu) { } + bool operator()(edge_descriptor e) const { return source_impl(e) == u; } const vertex_descriptor &u; - const Graph &g; }; struct target_match { - target_match(const vertex_descriptor &vv, const Graph &gg) - : v(vv), g(gg) { } - bool operator()(edge_descriptor e) const { return target(e, g) == v; } + explicit target_match(const vertex_descriptor &vv) : v(vv) { } + bool operator()(edge_descriptor e) const { return target_impl(e) == v; } const vertex_descriptor &v; - const Graph &g; }; public: - /* Note: (u,v) variant needs to remove all (parallel) edges between (u,v). * * The edge_descriptor version should be strongly preferred if the * edge_descriptor is available. */ - friend - void remove_edge(const vertex_descriptor &u, - const vertex_descriptor &v, - Graph &g) { - if (in_degree(v, g) < out_degree(u, g)) { - remove_in_edge_if(v, source_match(u, g), g); + void remove_edge_impl(const vertex_descriptor &u, + const vertex_descriptor &v) { + if (in_degree_impl(v) < out_degree_impl(u)) { + remove_in_edge_if_impl(v, source_match(u)); } else { - remove_out_edge_if(u, target_match(v, g), g); + remove_out_edge_if_impl(u, target_match(v)); } } /* VertexListGraph concept functions */ - - friend - vertices_size_type num_vertices(const Graph &g) { - return g.vertices_list.size(); + vertices_size_type num_vertices_impl() const { + return vertices_list.size(); } - friend - std::pair vertices(const Graph &g) { - return {vertex_iterator(g.vertices_list.begin()), - vertex_iterator(g.vertices_list.end())}; + std::pair vertices_impl() const { + return {vertex_iterator(vertices_list.begin()), + vertex_iterator(vertices_list.end())}; } /* EdgeListGraph concept functions (aside from those in IncidenceGraph) */ - friend - edges_size_type num_edges(const Graph &g) { - return g.graph_edge_count; + edges_size_type num_edges_impl() const { + return graph_edge_count; } - friend - std::pair edges(const Graph &g) { + std::pair edges_impl() const { vertex_iterator vi, ve; - std::tie(vi, ve) = vertices(g); + std::tie(vi, ve) = vertices_impl(); return {edge_iterator(vi, ve), edge_iterator(ve, ve)}; } @@ -807,19 +762,19 @@ public: /* bundled properties functions */ vertex_property_type &operator[](vertex_descriptor v) { - return raw(v)->props; + return v.raw()->props; } const vertex_property_type &operator[](vertex_descriptor v) const { - return raw(v)->props; + return v.raw()->props; } edge_property_type &operator[](edge_descriptor e) { - return raw(e)->props; + return e.raw()->props; } const edge_property_type &operator[](edge_descriptor e) const { - return raw(e)->props; + return e.raw()->props; } /* PropertyGraph concept functions & helpers */ @@ -835,7 +790,7 @@ public: prop_map(value_type P_of::*m_in) : member(m_in) { } reference operator[](key_type k) const { - return Graph::raw(k)->props.*member; + return k.raw()->props.*member; } reference operator()(key_type k) const { return (*this)[k]; } @@ -852,7 +807,7 @@ public: typedef typename boost::lvalue_property_map_tag category; reference operator[](key_type k) const { - return Graph::raw(k)->props; + return k.raw()->props; } reference operator()(key_type k) const { return (*this)[k]; } }; @@ -965,12 +920,11 @@ public: * rather than using the index in vp. i.e., except for in rare coincidences: * g[add_vertex(g, vp)].index != vp.index */ - friend - vertex_descriptor add_vertex(const VertexPropertyType &vp, Graph &g) { - vertex_descriptor v = add_vertex(g); - auto i = g[v].index; - g[v] = vp; - g[v].index = i; + vertex_descriptor add_vertex_impl(const VertexPropertyType &vp) { + vertex_descriptor v = add_vertex_impl(); + auto i = (*this)[v].index; + (*this)[v] = vp; + (*this)[v].index = i; return v; } @@ -979,14 +933,13 @@ public: * rather than using the index in ep. i.e., except for in rare coincidences: * g[add_edge(u, v, g, ep)].index != ep.index */ - friend std::pair - add_edge(vertex_descriptor u, vertex_descriptor v, - const EdgePropertyType &ep, Graph &g) { - auto e = add_edge(u, v, g); - auto i = g[e.first].index; - g[e.first] = ep; - g[e.first].index = i; + add_edge_impl(vertex_descriptor u, vertex_descriptor v, + const EdgePropertyType &ep) { + auto e = add_edge_impl(u, v); + auto i = (*this)[e.first].index; + (*this)[e.first] = ep; + (*this)[e.first].index = i; return e; } @@ -994,44 +947,44 @@ public: /* End MutablePropertyGraph */ /** Pack the edge index into a contiguous range [ 0, num_edges(g) ). */ - friend - void renumber_edges(Graph &g) { - g.next_edge_index = 0; - for (const auto &e : edges_range(g)) { - g[e].index = g.next_edge_index++; + void renumber_edges_impl() { + next_edge_index = 0; + edge_iterator it; + edge_iterator ite; + for (std::tie(it, ite) = edges_impl(); it != ite; ++it) { + (*this)[*it].index = next_edge_index++; } } /** Pack the vertex index into a contiguous range [ 0, num_vertices(g) ). * Vertices with indices less than N_SPECIAL_VERTICES are not renumbered. */ - friend - void renumber_vertices(Graph &g) { + void renumber_vertices_impl() { DEBUG_PRINTF("renumbering above %zu\n", Graph::N_SPECIAL_VERTICES); - g.next_vertex_index = Graph::N_SPECIAL_VERTICES; - for (const auto &v : vertices_range(g)) { - if (g[v].index < Graph::N_SPECIAL_VERTICES) { + next_vertex_index = Graph::N_SPECIAL_VERTICES; + vertex_iterator it; + vertex_iterator ite; + for (std::tie(it, ite) = vertices_impl(); it != ite; ++it) { + if ((*this)[*it].index < Graph::N_SPECIAL_VERTICES) { continue; } - g[v].index = g.next_vertex_index++; + (*this)[*it].index = next_vertex_index++; } } /** Returns what the next allocated vertex index will be. This is an upper * on the values of index for vertices (vertex removal means that there may * be gaps). */ - friend - vertices_size_type vertex_index_upper_bound(const Graph &g) { - return g.next_vertex_index; + vertices_size_type vertex_index_upper_bound_impl() const { + return next_vertex_index; } /** Returns what the next allocated edge index will be. This is an upper on * the values of index for edges (edge removal means that there may be * gaps). */ - friend - vertices_size_type edge_index_upper_bound(const Graph &g) { - return g.next_edge_index; + vertices_size_type edge_index_upper_bound_impl() const { + return next_edge_index; } using directed_category = boost::directed_tag; @@ -1068,6 +1021,265 @@ public: } }; +template +typename std::enable_if< + std::is_base_of::value, + typename Graph::vertex_descriptor>::type +add_vertex(Graph &g) { + return g.add_vertex_impl(); +} + +template +typename std::enable_if< + std::is_base_of::value>::type +remove_vertex(typename Graph::vertex_descriptor v, Graph &g) { + g.remove_vertex_impl(v); +} + +template +typename std::enable_if< + std::is_base_of::value>::type +clear_in_edges(typename Graph::vertex_descriptor v, Graph &g) { + g.clear_in_edges_impl(v); +} + +template +typename std::enable_if< + std::is_base_of::value>::type +clear_out_edges(typename Graph::vertex_descriptor v, Graph &g) { + g.clear_out_edges_impl(v); +} + +template +typename std::enable_if< + std::is_base_of::value>::type +clear_vertex(typename Graph::vertex_descriptor v, Graph &g) { + g.clear_in_edges_impl(v); + g.clear_out_edges_impl(v); +} + +template +typename std::enable_if< + std::is_base_of::value, + typename Graph::vertex_descriptor>::type +source(typename Graph::edge_descriptor e, const Graph &) { + return Graph::source_impl(e); +} + +template +typename std::enable_if< + std::is_base_of::value, + typename Graph::vertex_descriptor>::type +target(typename Graph::edge_descriptor e, const Graph &) { + return Graph::target_impl(e); +} + +template +typename std::enable_if< + std::is_base_of::value, + typename Graph::degree_size_type>::type +out_degree(typename Graph::vertex_descriptor v, const Graph &) { + return Graph::out_degree_impl(v); +} + +template +typename std::enable_if< + std::is_base_of::value, + std::pair>::type +out_edges(typename Graph::vertex_descriptor v, const Graph &) { + return Graph::out_edges_impl(v); +} + +template +typename std::enable_if< + std::is_base_of::value, + typename Graph::degree_size_type>::type +in_degree(typename Graph::vertex_descriptor v, const Graph &) { + return Graph::in_degree_impl(v); +} + +template +typename std::enable_if< + std::is_base_of::value, + std::pair>::type +in_edges(typename Graph::vertex_descriptor v, const Graph &) { + return Graph::in_edges_impl(v); +} + +template +typename std::enable_if< + std::is_base_of::value, + typename Graph::degree_size_type>::type +degree(typename Graph::vertex_descriptor v, const Graph &) { + return Graph::degree_impl(v); +} + +template +typename std::enable_if< + std::is_base_of::value, + std::pair>::type +adjacent_vertices(typename Graph::vertex_descriptor v, const Graph &) { + return Graph::adjacent_vertices_impl(v); +} + +template +typename std::enable_if< + std::is_base_of::value, + std::pair>::type +edge(typename Graph::vertex_descriptor u, typename Graph::vertex_descriptor v, + const Graph &g) { + return g.edge_impl(u, v); +} + +template +typename std::enable_if< + std::is_base_of::value, + std::pair>::type +inv_adjacent_vertices(typename Graph::vertex_descriptor v, const Graph &) { + return Graph::inv_adjacent_vertices_impl(v); +} + +template +typename std::enable_if< + std::is_base_of::value, + std::pair>::type +add_edge(typename Graph::vertex_descriptor u, + typename Graph::vertex_descriptor v, Graph &g) { + return g.add_edge_impl(u, v); +} + +template +typename std::enable_if< + std::is_base_of::value>::type +remove_edge(typename Graph::edge_descriptor e, Graph &g) { + g.remove_edge_impl(e); +} + +template +typename std::enable_if< + !std::is_convertible::value + && std::is_base_of::value>::type +remove_edge(Iter it, Graph &g) { + g.remove_edge_impl(*it); +} + +template +typename std::enable_if< + std::is_base_of::value>::type +remove_out_edge_if(typename Graph::vertex_descriptor v, Predicate pred, + Graph &g) { + g.remove_out_edge_if_impl(v, pred); +} + +template +typename std::enable_if< + std::is_base_of::value>::type +remove_in_edge_if(typename Graph::vertex_descriptor v, Predicate pred, + Graph &g) { + g.remove_in_edge_if_impl(v, pred); +} + +template +typename std::enable_if< + std::is_base_of::value>::type +remove_edge_if(Predicate pred, Graph &g) { + g.remove_edge_if_impl(pred); +} + +template +typename std::enable_if< + std::is_base_of::value>::type +remove_edge(const typename Graph::vertex_descriptor &u, + const typename Graph::vertex_descriptor &v, Graph &g) { + g.remove_edge_impl(u, v); +} + +template +typename std::enable_if< + std::is_base_of::value, + typename Graph::vertices_size_type>::type +num_vertices(const Graph &g) { + return g.num_vertices_impl(); +} + +template +typename std::enable_if< + std::is_base_of::value, + std::pair>::type +vertices(const Graph &g) { + return g.vertices_impl(); +} + +template +typename std::enable_if< + std::is_base_of::value, + typename Graph::edges_size_type>::type +num_edges(const Graph &g) { + return g.num_edges_impl(); +} + +template +typename std::enable_if< + std::is_base_of::value, + std::pair>::type +edges(const Graph &g) { + return g.edges_impl(); +} + +template +typename std::enable_if< + std::is_base_of::value, + typename Graph::vertex_descriptor>::type +add_vertex(const typename Graph::vertex_property_type &vp, Graph &g) { + return g.add_vertex_impl(vp); +} + +template +typename std::enable_if< + std::is_base_of::value, + std::pair>::type +add_edge(typename Graph::vertex_descriptor u, + typename Graph::vertex_descriptor v, + const typename Graph::edge_property_type &ep, Graph &g) { + return g.add_edge_impl(u, v, ep); +} + +template +typename std::enable_if< + std::is_base_of::value>::type +renumber_edges(Graph &g) { + g.renumber_edges_impl(); +} + +template +typename std::enable_if< + std::is_base_of::value>::type +renumber_vertices(Graph &g) { + g.renumber_vertices_impl(); +} + +template +typename std::enable_if< + std::is_base_of::value, + typename Graph::vertices_size_type>::type +vertex_index_upper_bound(const Graph &g) { + return g.vertex_index_upper_bound_impl(); +} + +template +typename std::enable_if< + std::is_base_of::value, + typename Graph::edges_size_type>::type +edge_index_upper_bound(const Graph &g) { + return g.edge_index_upper_bound_impl(); +} + using boost::vertex_index; using boost::edge_index; From 71ff480b7767a94db32712d0d28ab434bca6ef22 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Fri, 28 Oct 2016 09:50:09 +1100 Subject: [PATCH 058/103] nfa_api: remove subtype from dispatch --- src/nfa/castle.c | 55 +++++++++++---------- src/nfa/castle.h | 34 ++++++------- src/nfa/castle_dump.cpp | 6 +-- src/nfa/castle_dump.h | 5 +- src/nfa/castlecompile.cpp | 2 +- src/nfa/lbr.c | 20 ++++---- src/nfa/lbr_dump.cpp | 10 ++-- src/nfa/mpv.c | 28 +++++------ src/nfa/mpv.h | 34 ++++++------- src/nfa/mpv_dump.cpp | 6 +-- src/nfa/mpv_dump.h | 6 +-- src/nfa/mpvcompile.cpp | 2 +- src/nfa/nfa_api_dispatch.c | 52 ++++++++++---------- src/nfa/nfa_build_util.cpp | 90 +++++++++++++++++------------------ src/nfa/nfa_dump_dispatch.cpp | 54 ++++++++++----------- src/nfa/nfa_internal.h | 30 ++++++------ src/nfa/sheng.c | 43 ++++++++--------- src/nfa/sheng.h | 38 +++++++-------- src/nfa/shengcompile.cpp | 2 +- src/nfa/shengdump.cpp | 8 ++-- src/nfa/shengdump.h | 6 +-- src/nfa/tamarama.c | 36 +++++++------- src/nfa/tamarama.h | 39 ++++++++------- src/nfa/tamarama_dump.cpp | 6 +-- src/nfa/tamarama_dump.h | 6 +-- src/nfa/tamaramacompile.cpp | 2 +- src/nfagraph/ng_lbr.cpp | 17 +++---- src/rose/catchup.c | 4 +- src/runtime.c | 6 +-- 29 files changed, 318 insertions(+), 329 deletions(-) diff --git a/src/nfa/castle.c b/src/nfa/castle.c index 6a72ae31..7c158b31 100644 --- a/src/nfa/castle.c +++ b/src/nfa/castle.c @@ -745,10 +745,10 @@ void clear_repeats(const struct Castle *c, const struct mq *q, u8 *active) { } static really_inline -char nfaExecCastle0_Q_i(const struct NFA *n, struct mq *q, s64a end, - enum MatchMode mode) { +char nfaExecCastle_Q_i(const struct NFA *n, struct mq *q, s64a end, + enum MatchMode mode) { assert(n && q); - assert(n->type == CASTLE_NFA_0); + assert(n->type == CASTLE_NFA); DEBUG_PRINTF("state=%p, streamState=%p\n", q->state, q->streamState); @@ -856,14 +856,14 @@ char nfaExecCastle0_Q_i(const struct NFA *n, struct mq *q, s64a end, return mmbit_any_precise(active, c->numRepeats); } -char nfaExecCastle0_Q(const struct NFA *n, struct mq *q, s64a end) { +char nfaExecCastle_Q(const struct NFA *n, struct mq *q, s64a end) { DEBUG_PRINTF("entry\n"); - return nfaExecCastle0_Q_i(n, q, end, CALLBACK_OUTPUT); + return nfaExecCastle_Q_i(n, q, end, CALLBACK_OUTPUT); } -char nfaExecCastle0_Q2(const struct NFA *n, struct mq *q, s64a end) { +char nfaExecCastle_Q2(const struct NFA *n, struct mq *q, s64a end) { DEBUG_PRINTF("entry\n"); - return nfaExecCastle0_Q_i(n, q, end, STOP_AT_MATCH); + return nfaExecCastle_Q_i(n, q, end, STOP_AT_MATCH); } static @@ -896,9 +896,9 @@ s64a castleLastKillLoc(const struct Castle *c, struct mq *q) { return sp - 1; /* the repeats are never killed */ } -char nfaExecCastle0_QR(const struct NFA *n, struct mq *q, ReportID report) { +char nfaExecCastle_QR(const struct NFA *n, struct mq *q, ReportID report) { assert(n && q); - assert(n->type == CASTLE_NFA_0); + assert(n->type == CASTLE_NFA); DEBUG_PRINTF("entry\n"); if (q->cur == q->end) { @@ -959,9 +959,9 @@ char nfaExecCastle0_QR(const struct NFA *n, struct mq *q, ReportID report) { return 1; } -char nfaExecCastle0_reportCurrent(const struct NFA *n, struct mq *q) { +char nfaExecCastle_reportCurrent(const struct NFA *n, struct mq *q) { assert(n && q); - assert(n->type == CASTLE_NFA_0); + assert(n->type == CASTLE_NFA); DEBUG_PRINTF("entry\n"); const struct Castle *c = getImplNfa(n); @@ -969,19 +969,19 @@ char nfaExecCastle0_reportCurrent(const struct NFA *n, struct mq *q) { return 0; } -char nfaExecCastle0_inAccept(const struct NFA *n, ReportID report, - struct mq *q) { +char nfaExecCastle_inAccept(const struct NFA *n, ReportID report, + struct mq *q) { assert(n && q); - assert(n->type == CASTLE_NFA_0); + assert(n->type == CASTLE_NFA); DEBUG_PRINTF("entry\n"); const struct Castle *c = getImplNfa(n); return castleInAccept(c, q, report, q_cur_offset(q)); } -char nfaExecCastle0_inAnyAccept(const struct NFA *n, struct mq *q) { +char nfaExecCastle_inAnyAccept(const struct NFA *n, struct mq *q) { assert(n && q); - assert(n->type == CASTLE_NFA_0); + assert(n->type == CASTLE_NFA); DEBUG_PRINTF("entry\n"); const struct Castle *c = getImplNfa(n); @@ -1019,9 +1019,9 @@ char nfaExecCastle0_inAnyAccept(const struct NFA *n, struct mq *q) { } -char nfaExecCastle0_queueInitState(UNUSED const struct NFA *n, struct mq *q) { +char nfaExecCastle_queueInitState(UNUSED const struct NFA *n, struct mq *q) { assert(n && q); - assert(n->type == CASTLE_NFA_0); + assert(n->type == CASTLE_NFA); DEBUG_PRINTF("entry\n"); const struct Castle *c = getImplNfa(n); @@ -1038,10 +1038,10 @@ char nfaExecCastle0_queueInitState(UNUSED const struct NFA *n, struct mq *q) { return 0; } -char nfaExecCastle0_initCompressedState(const struct NFA *n, UNUSED u64a offset, - void *state, UNUSED u8 key) { +char nfaExecCastle_initCompressedState(const struct NFA *n, UNUSED u64a offset, + void *state, UNUSED u8 key) { assert(n && state); - assert(n->type == CASTLE_NFA_0); + assert(n->type == CASTLE_NFA); DEBUG_PRINTF("entry\n"); const struct Castle *c = getImplNfa(n); @@ -1070,10 +1070,10 @@ void subCastleQueueCompressState(const struct Castle *c, const u32 subIdx, repeatPack(packed, info, rctrl, offset); } -char nfaExecCastle0_queueCompressState(const struct NFA *n, const struct mq *q, - s64a loc) { +char nfaExecCastle_queueCompressState(const struct NFA *n, const struct mq *q, + s64a loc) { assert(n && q); - assert(n->type == CASTLE_NFA_0); + assert(n->type == CASTLE_NFA); DEBUG_PRINTF("entry, loc=%lld\n", loc); const struct Castle *c = getImplNfa(n); @@ -1118,11 +1118,10 @@ void subCastleExpandState(const struct Castle *c, const u32 subIdx, packed + info->packedCtrlSize, offset)); } -char nfaExecCastle0_expandState(const struct NFA *n, void *dest, - const void *src, u64a offset, - UNUSED u8 key) { +char nfaExecCastle_expandState(const struct NFA *n, void *dest, const void *src, + u64a offset, UNUSED u8 key) { assert(n && dest && src); - assert(n->type == CASTLE_NFA_0); + assert(n->type == CASTLE_NFA); DEBUG_PRINTF("entry, src=%p, dest=%p, offset=%llu\n", src, dest, offset); const struct Castle *c = getImplNfa(n); diff --git a/src/nfa/castle.h b/src/nfa/castle.h index 84d79097..cc7496ca 100644 --- a/src/nfa/castle.h +++ b/src/nfa/castle.h @@ -38,24 +38,24 @@ extern "C" { struct mq; struct NFA; -char nfaExecCastle0_Q(const struct NFA *n, struct mq *q, s64a end); -char nfaExecCastle0_Q2(const struct NFA *n, struct mq *q, s64a end); -char nfaExecCastle0_QR(const struct NFA *n, struct mq *q, ReportID report); -char nfaExecCastle0_reportCurrent(const struct NFA *n, struct mq *q); -char nfaExecCastle0_inAccept(const struct NFA *n, ReportID report, - struct mq *q); -char nfaExecCastle0_inAnyAccept(const struct NFA *n, struct mq *q); -char nfaExecCastle0_queueInitState(const struct NFA *n, struct mq *q); -char nfaExecCastle0_initCompressedState(const struct NFA *n, u64a offset, - void *state, u8 key); -char nfaExecCastle0_queueCompressState(const struct NFA *nfa, - const struct mq *q, s64a loc); -char nfaExecCastle0_expandState(const struct NFA *nfa, void *dest, - const void *src, u64a offset, u8 key); +char nfaExecCastle_Q(const struct NFA *n, struct mq *q, s64a end); +char nfaExecCastle_Q2(const struct NFA *n, struct mq *q, s64a end); +char nfaExecCastle_QR(const struct NFA *n, struct mq *q, ReportID report); +char nfaExecCastle_reportCurrent(const struct NFA *n, struct mq *q); +char nfaExecCastle_inAccept(const struct NFA *n, ReportID report, + struct mq *q); +char nfaExecCastle_inAnyAccept(const struct NFA *n, struct mq *q); +char nfaExecCastle_queueInitState(const struct NFA *n, struct mq *q); +char nfaExecCastle_initCompressedState(const struct NFA *n, u64a offset, + void *state, u8 key); +char nfaExecCastle_queueCompressState(const struct NFA *nfa, const struct mq *q, + s64a loc); +char nfaExecCastle_expandState(const struct NFA *nfa, void *dest, + const void *src, u64a offset, u8 key); -#define nfaExecCastle0_testEOD NFA_API_NO_IMPL -#define nfaExecCastle0_B_Reverse NFA_API_NO_IMPL -#define nfaExecCastle0_zombie_status NFA_API_ZOMBIE_NO_IMPL +#define nfaExecCastle_testEOD NFA_API_NO_IMPL +#define nfaExecCastle_B_Reverse NFA_API_NO_IMPL +#define nfaExecCastle_zombie_status NFA_API_ZOMBIE_NO_IMPL #ifdef __cplusplus } diff --git a/src/nfa/castle_dump.cpp b/src/nfa/castle_dump.cpp index fd1521a5..6d51b2ce 100644 --- a/src/nfa/castle_dump.cpp +++ b/src/nfa/castle_dump.cpp @@ -48,8 +48,8 @@ namespace ue2 { -void nfaExecCastle0_dumpDot(const struct NFA *, FILE *, - UNUSED const std::string &base) { +void nfaExecCastle_dumpDot(const struct NFA *, FILE *, + UNUSED const std::string &base) { // No GraphViz output for Castles. } @@ -68,7 +68,7 @@ void dumpTextSubCastle(const SubCastle &sub, FILE *f) { fprintf(f, "\n"); } -void nfaExecCastle0_dumpText(const struct NFA *nfa, FILE *f) { +void nfaExecCastle_dumpText(const struct NFA *nfa, FILE *f) { const Castle *c = (const Castle *)getImplNfa(nfa); fprintf(f, "Castle multi-tenant repeat engine\n"); diff --git a/src/nfa/castle_dump.h b/src/nfa/castle_dump.h index 94dadec0..d5ec7d3d 100644 --- a/src/nfa/castle_dump.h +++ b/src/nfa/castle_dump.h @@ -38,9 +38,8 @@ struct NFA; namespace ue2 { -void nfaExecCastle0_dumpDot(const NFA *nfa, FILE *file, - const std::string &base); -void nfaExecCastle0_dumpText(const NFA *nfa, FILE *file); +void nfaExecCastle_dumpDot(const NFA *nfa, FILE *file, const std::string &base); +void nfaExecCastle_dumpText(const NFA *nfa, FILE *file); } // namespace ue2 diff --git a/src/nfa/castlecompile.cpp b/src/nfa/castlecompile.cpp index a956c92a..1f767353 100644 --- a/src/nfa/castlecompile.cpp +++ b/src/nfa/castlecompile.cpp @@ -577,7 +577,7 @@ buildCastle(const CastleProto &proto, total_size += byte_length(stale_iter); // stale sparse iter aligned_unique_ptr nfa = aligned_zmalloc_unique(total_size); - nfa->type = verify_u8(CASTLE_NFA_0); + nfa->type = verify_u8(CASTLE_NFA); nfa->length = verify_u32(total_size); nfa->nPositions = verify_u32(subs.size()); nfa->streamStateSize = streamStateSize; diff --git a/src/nfa/lbr.c b/src/nfa/lbr.c index 07e59239..3075be33 100644 --- a/src/nfa/lbr.c +++ b/src/nfa/lbr.c @@ -307,7 +307,7 @@ char lbrRevScanDot(UNUSED const struct NFA *nfa, UNUSED const u8 *buf, UNUSED size_t begin, UNUSED size_t end, UNUSED size_t *loc) { assert(begin <= end); - assert(nfa->type == LBR_NFA_Dot); + assert(nfa->type == LBR_NFA_DOT); // Nothing can kill a dot! return 0; } @@ -316,7 +316,7 @@ static really_inline char lbrRevScanVerm(const struct NFA *nfa, const u8 *buf, size_t begin, size_t end, size_t *loc) { assert(begin <= end); - assert(nfa->type == LBR_NFA_Verm); + assert(nfa->type == LBR_NFA_VERM); const struct lbr_verm *l = getImplNfa(nfa); if (begin == end) { @@ -340,7 +340,7 @@ static really_inline char lbrRevScanNVerm(const struct NFA *nfa, const u8 *buf, size_t begin, size_t end, size_t *loc) { assert(begin <= end); - assert(nfa->type == LBR_NFA_NVerm); + assert(nfa->type == LBR_NFA_NVERM); const struct lbr_verm *l = getImplNfa(nfa); if (begin == end) { @@ -365,7 +365,7 @@ char lbrRevScanShuf(const struct NFA *nfa, const u8 *buf, size_t begin, size_t end, size_t *loc) { assert(begin <= end); - assert(nfa->type == LBR_NFA_Shuf); + assert(nfa->type == LBR_NFA_SHUF); const struct lbr_shuf *l = getImplNfa(nfa); if (begin == end) { @@ -389,7 +389,7 @@ char lbrRevScanTruf(const struct NFA *nfa, const u8 *buf, size_t begin, size_t end, size_t *loc) { assert(begin <= end); - assert(nfa->type == LBR_NFA_Truf); + assert(nfa->type == LBR_NFA_TRUF); const struct lbr_truf *l = getImplNfa(nfa); if (begin == end) { @@ -413,7 +413,7 @@ char lbrFwdScanDot(UNUSED const struct NFA *nfa, UNUSED const u8 *buf, UNUSED size_t begin, UNUSED size_t end, UNUSED size_t *loc) { assert(begin <= end); - assert(nfa->type == LBR_NFA_Dot); + assert(nfa->type == LBR_NFA_DOT); // Nothing can kill a dot! return 0; } @@ -422,7 +422,7 @@ static really_inline char lbrFwdScanVerm(const struct NFA *nfa, const u8 *buf, size_t begin, size_t end, size_t *loc) { assert(begin <= end); - assert(nfa->type == LBR_NFA_Verm); + assert(nfa->type == LBR_NFA_VERM); const struct lbr_verm *l = getImplNfa(nfa); if (begin == end) { @@ -446,7 +446,7 @@ static really_inline char lbrFwdScanNVerm(const struct NFA *nfa, const u8 *buf, size_t begin, size_t end, size_t *loc) { assert(begin <= end); - assert(nfa->type == LBR_NFA_NVerm); + assert(nfa->type == LBR_NFA_NVERM); const struct lbr_verm *l = getImplNfa(nfa); if (begin == end) { @@ -471,7 +471,7 @@ char lbrFwdScanShuf(const struct NFA *nfa, const u8 *buf, size_t begin, size_t end, size_t *loc) { assert(begin <= end); - assert(nfa->type == LBR_NFA_Shuf); + assert(nfa->type == LBR_NFA_SHUF); const struct lbr_shuf *l = getImplNfa(nfa); if (begin == end) { @@ -495,7 +495,7 @@ char lbrFwdScanTruf(const struct NFA *nfa, const u8 *buf, size_t begin, size_t end, size_t *loc) { assert(begin <= end); - assert(nfa->type == LBR_NFA_Truf); + assert(nfa->type == LBR_NFA_TRUF); const struct lbr_truf *l = getImplNfa(nfa); if (begin == end) { diff --git a/src/nfa/lbr_dump.cpp b/src/nfa/lbr_dump.cpp index 3412ddf5..9619b8d6 100644 --- a/src/nfa/lbr_dump.cpp +++ b/src/nfa/lbr_dump.cpp @@ -90,7 +90,7 @@ void lbrDumpCommon(const lbr_common *lc, FILE *f) { void nfaExecLbrDot_dumpText(const NFA *nfa, FILE *f) { assert(nfa); - assert(nfa->type == LBR_NFA_Dot); + assert(nfa->type == LBR_NFA_DOT); const lbr_dot *ld = (const lbr_dot *)getImplNfa(nfa); lbrDumpCommon(&ld->common, f); fprintf(f, "DOT model\n"); @@ -100,7 +100,7 @@ void nfaExecLbrDot_dumpText(const NFA *nfa, FILE *f) { void nfaExecLbrVerm_dumpText(const NFA *nfa, FILE *f) { assert(nfa); - assert(nfa->type == LBR_NFA_Verm); + assert(nfa->type == LBR_NFA_VERM); const lbr_verm *lv = (const lbr_verm *)getImplNfa(nfa); lbrDumpCommon(&lv->common, f); fprintf(f, "VERM model, scanning for 0x%02x\n", lv->c); @@ -110,7 +110,7 @@ void nfaExecLbrVerm_dumpText(const NFA *nfa, FILE *f) { void nfaExecLbrNVerm_dumpText(const NFA *nfa, FILE *f) { assert(nfa); - assert(nfa->type == LBR_NFA_NVerm); + assert(nfa->type == LBR_NFA_NVERM); const lbr_verm *lv = (const lbr_verm *)getImplNfa(nfa); lbrDumpCommon(&lv->common, f); fprintf(f, "NEGATED VERM model, scanning for 0x%02x\n", lv->c); @@ -120,7 +120,7 @@ void nfaExecLbrNVerm_dumpText(const NFA *nfa, FILE *f) { void nfaExecLbrShuf_dumpText(const NFA *nfa, FILE *f) { assert(nfa); - assert(nfa->type == LBR_NFA_Shuf); + assert(nfa->type == LBR_NFA_SHUF); const lbr_shuf *ls = (const lbr_shuf *)getImplNfa(nfa); lbrDumpCommon(&ls->common, f); @@ -133,7 +133,7 @@ void nfaExecLbrShuf_dumpText(const NFA *nfa, FILE *f) { void nfaExecLbrTruf_dumpText(const NFA *nfa, FILE *f) { assert(nfa); - assert(nfa->type == LBR_NFA_Truf); + assert(nfa->type == LBR_NFA_TRUF); const lbr_truf *lt = (const lbr_truf *)getImplNfa(nfa); lbrDumpCommon(<->common, f); diff --git a/src/nfa/mpv.c b/src/nfa/mpv.c index c6c8cb88..552754d6 100644 --- a/src/nfa/mpv.c +++ b/src/nfa/mpv.c @@ -825,21 +825,21 @@ void mpvStoreState(const struct NFA *n, char *state, } } -char nfaExecMpv0_queueCompressState(const struct NFA *nfa, const struct mq *q, - UNUSED s64a loc) { +char nfaExecMpv_queueCompressState(const struct NFA *nfa, const struct mq *q, + UNUSED s64a loc) { void *dest = q->streamState; const void *src = q->state; mpvStoreState(nfa, dest, src); return 0; } -char nfaExecMpv0_expandState(const struct NFA *nfa, void *dest, const void *src, - UNUSED u64a offset, UNUSED u8 key) { +char nfaExecMpv_expandState(const struct NFA *nfa, void *dest, const void *src, + UNUSED u64a offset, UNUSED u8 key) { mpvLoadState(dest, nfa, src); return 0; } -char nfaExecMpv0_reportCurrent(const struct NFA *n, struct mq *q) { +char nfaExecMpv_reportCurrent(const struct NFA *n, struct mq *q) { const struct mpv *m = getImplNfa(n); u64a offset = q_cur_offset(q); struct mpv_decomp_state *s = (struct mpv_decomp_state *)q->state; @@ -855,7 +855,7 @@ char nfaExecMpv0_reportCurrent(const struct NFA *n, struct mq *q) { return 0; } -char nfaExecMpv0_queueInitState(const struct NFA *n, struct mq *q) { +char nfaExecMpv_queueInitState(const struct NFA *n, struct mq *q) { struct mpv_decomp_state *out = (void *)q->state; const struct mpv *m = getImplNfa(n); assert(sizeof(*out) <= n->scratchStateSize); @@ -880,8 +880,8 @@ char nfaExecMpv0_queueInitState(const struct NFA *n, struct mq *q) { return 0; } -char nfaExecMpv0_initCompressedState(const struct NFA *n, u64a offset, - void *state, UNUSED u8 key) { +char nfaExecMpv_initCompressedState(const struct NFA *n, u64a offset, + void *state, UNUSED u8 key) { const struct mpv *m = getImplNfa(n); memset(state, 0, m->active_offset); /* active_offset marks end of comp * counters */ @@ -896,7 +896,7 @@ char nfaExecMpv0_initCompressedState(const struct NFA *n, u64a offset, } static really_inline -char nfaExecMpv0_Q_i(const struct NFA *n, struct mq *q, s64a end) { +char nfaExecMpv_Q_i(const struct NFA *n, struct mq *q, s64a end) { u64a offset = q->offset; const u8 *buffer = q->buffer; size_t length = q->length; @@ -1021,18 +1021,18 @@ char nfaExecMpv0_Q_i(const struct NFA *n, struct mq *q, s64a end) { return alive; } -char nfaExecMpv0_Q(const struct NFA *n, struct mq *q, s64a end) { +char nfaExecMpv_Q(const struct NFA *n, struct mq *q, s64a end) { DEBUG_PRINTF("_Q %lld\n", end); - return nfaExecMpv0_Q_i(n, q, end); + return nfaExecMpv_Q_i(n, q, end); } -s64a nfaExecMpv0_QueueExecRaw(const struct NFA *nfa, struct mq *q, s64a end) { +s64a nfaExecMpv_QueueExecRaw(const struct NFA *nfa, struct mq *q, s64a end) { DEBUG_PRINTF("nfa=%p end=%lld\n", nfa, end); #ifdef DEBUG debugQueue(q); #endif - assert(nfa->type == MPV_NFA_0); + assert(nfa->type == MPV_NFA); assert(q && q->context && q->state); assert(end >= 0); assert(q->cur < q->end); @@ -1058,7 +1058,7 @@ s64a nfaExecMpv0_QueueExecRaw(const struct NFA *nfa, struct mq *q, s64a end) { /* TODO: restore max offset stuff, if/when _interesting_ max offset stuff * is filled in */ - char rv = nfaExecMpv0_Q_i(nfa, q, end); + char rv = nfaExecMpv_Q_i(nfa, q, end); assert(!q->report_current); DEBUG_PRINTF("returned rv=%d, q_trimmed=%d\n", rv, q_trimmed); diff --git a/src/nfa/mpv.h b/src/nfa/mpv.h index a3f90719..3780728d 100644 --- a/src/nfa/mpv.h +++ b/src/nfa/mpv.h @@ -34,27 +34,27 @@ struct mq; struct NFA; -char nfaExecMpv0_Q(const struct NFA *n, struct mq *q, s64a end); -char nfaExecMpv0_reportCurrent(const struct NFA *n, struct mq *q); -char nfaExecMpv0_queueInitState(const struct NFA *n, struct mq *q); -char nfaExecMpv0_initCompressedState(const struct NFA *n, u64a offset, - void *state, u8 key); -char nfaExecMpv0_queueCompressState(const struct NFA *nfa, const struct mq *q, - s64a loc); -char nfaExecMpv0_expandState(const struct NFA *nfa, void *dest, const void *src, - u64a offset, u8 key); +char nfaExecMpv_Q(const struct NFA *n, struct mq *q, s64a end); +char nfaExecMpv_reportCurrent(const struct NFA *n, struct mq *q); +char nfaExecMpv_queueInitState(const struct NFA *n, struct mq *q); +char nfaExecMpv_initCompressedState(const struct NFA *n, u64a offset, + void *state, u8 key); +char nfaExecMpv_queueCompressState(const struct NFA *nfa, const struct mq *q, + s64a loc); +char nfaExecMpv_expandState(const struct NFA *nfa, void *dest, const void *src, + u64a offset, u8 key); -#define nfaExecMpv0_testEOD NFA_API_NO_IMPL -#define nfaExecMpv0_inAccept NFA_API_NO_IMPL -#define nfaExecMpv0_inAnyAccept NFA_API_NO_IMPL -#define nfaExecMpv0_QR NFA_API_NO_IMPL -#define nfaExecMpv0_Q2 NFA_API_NO_IMPL /* for non-chained suffixes. */ -#define nfaExecMpv0_B_Reverse NFA_API_NO_IMPL -#define nfaExecMpv0_zombie_status NFA_API_ZOMBIE_NO_IMPL +#define nfaExecMpv_testEOD NFA_API_NO_IMPL +#define nfaExecMpv_inAccept NFA_API_NO_IMPL +#define nfaExecMpv_inAnyAccept NFA_API_NO_IMPL +#define nfaExecMpv_QR NFA_API_NO_IMPL +#define nfaExecMpv_Q2 NFA_API_NO_IMPL /* for non-chained suffixes. */ +#define nfaExecMpv_B_Reverse NFA_API_NO_IMPL +#define nfaExecMpv_zombie_status NFA_API_ZOMBIE_NO_IMPL /** * return 0 if the mpv dies, otherwise returns the location of the next possible * match (given the currently known events). */ -s64a nfaExecMpv0_QueueExecRaw(const struct NFA *nfa, struct mq *q, s64a end); +s64a nfaExecMpv_QueueExecRaw(const struct NFA *nfa, struct mq *q, s64a end); #endif diff --git a/src/nfa/mpv_dump.cpp b/src/nfa/mpv_dump.cpp index da21d7cf..e91d378f 100644 --- a/src/nfa/mpv_dump.cpp +++ b/src/nfa/mpv_dump.cpp @@ -48,8 +48,8 @@ namespace ue2 { -void nfaExecMpv0_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *file, - UNUSED const std::string &base) { +void nfaExecMpv_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *file, + UNUSED const std::string &base) { } static really_inline @@ -128,7 +128,7 @@ void dumpCounter(FILE *f, const mpv_counter_info *c) { fprintf(f, "\n"); } -void nfaExecMpv0_dumpText(const NFA *nfa, FILE *f) { +void nfaExecMpv_dumpText(const NFA *nfa, FILE *f) { const mpv *m = (const mpv *)getImplNfa(nfa); fprintf(f, "Puff the Magic Engines\n"); diff --git a/src/nfa/mpv_dump.h b/src/nfa/mpv_dump.h index 23910dce..b44d2b74 100644 --- a/src/nfa/mpv_dump.h +++ b/src/nfa/mpv_dump.h @@ -38,9 +38,9 @@ struct NFA; namespace ue2 { -void nfaExecMpv0_dumpDot(const struct NFA *nfa, FILE *file, - const std::string &base); -void nfaExecMpv0_dumpText(const struct NFA *nfa, FILE *file); +void nfaExecMpv_dumpDot(const struct NFA *nfa, FILE *file, + const std::string &base); +void nfaExecMpv_dumpText(const struct NFA *nfa, FILE *file); } // namespace ue2 diff --git a/src/nfa/mpvcompile.cpp b/src/nfa/mpvcompile.cpp index b024b530..4d70fa2d 100644 --- a/src/nfa/mpvcompile.cpp +++ b/src/nfa/mpvcompile.cpp @@ -207,7 +207,7 @@ void writeCoreNfa(NFA *nfa, u32 len, u32 min_width, u32 max_counter, nfa->length = len; nfa->nPositions = max_counter - 1; - nfa->type = MPV_NFA_0; + nfa->type = MPV_NFA; nfa->streamStateSize = streamStateSize; assert(16 >= sizeof(mpv_decomp_kilo)); nfa->scratchStateSize = scratchStateSize; diff --git a/src/nfa/nfa_api_dispatch.c b/src/nfa/nfa_api_dispatch.c index 2a213ed6..d4e9eb78 100644 --- a/src/nfa/nfa_api_dispatch.c +++ b/src/nfa/nfa_api_dispatch.c @@ -45,36 +45,36 @@ #include "sheng.h" #include "tamarama.h" -#define DISPATCH_CASE(dc_ltype, dc_ftype, dc_subtype, dc_func_call) \ - case dc_ltype##_NFA_##dc_subtype: \ - return nfaExec##dc_ftype##dc_subtype##dc_func_call; \ +#define DISPATCH_CASE(dc_ltype, dc_ftype, dc_func_call) \ + case dc_ltype: \ + return nfaExec##dc_ftype##dc_func_call; \ break // general framework calls -#define DISPATCH_BY_NFA_TYPE(dbnt_func) \ - switch (nfa->type) { \ - DISPATCH_CASE(LIMEX, LimEx, 32, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 64, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 128, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 256, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 384, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 512, dbnt_func); \ - DISPATCH_CASE(MCCLELLAN, McClellan, 8, dbnt_func); \ - DISPATCH_CASE(MCCLELLAN, McClellan, 16, dbnt_func); \ - DISPATCH_CASE(GOUGH, Gough, 8, dbnt_func); \ - DISPATCH_CASE(GOUGH, Gough, 16, dbnt_func); \ - DISPATCH_CASE(MPV, Mpv, 0, dbnt_func); \ - DISPATCH_CASE(LBR, Lbr, Dot, dbnt_func); \ - DISPATCH_CASE(LBR, Lbr, Verm, dbnt_func); \ - DISPATCH_CASE(LBR, Lbr, NVerm, dbnt_func); \ - DISPATCH_CASE(LBR, Lbr, Shuf, dbnt_func); \ - DISPATCH_CASE(LBR, Lbr, Truf, dbnt_func); \ - DISPATCH_CASE(CASTLE, Castle, 0, dbnt_func); \ - DISPATCH_CASE(SHENG, Sheng, 0, dbnt_func); \ - DISPATCH_CASE(TAMARAMA, Tamarama, 0, dbnt_func); \ - default: \ - assert(0); \ +#define DISPATCH_BY_NFA_TYPE(dbnt_func) \ + switch (nfa->type) { \ + DISPATCH_CASE(LIMEX_NFA_32, LimEx32, dbnt_func); \ + DISPATCH_CASE(LIMEX_NFA_64, LimEx64, dbnt_func); \ + DISPATCH_CASE(LIMEX_NFA_128, LimEx128, dbnt_func); \ + DISPATCH_CASE(LIMEX_NFA_256, LimEx256, dbnt_func); \ + DISPATCH_CASE(LIMEX_NFA_384, LimEx384, dbnt_func); \ + DISPATCH_CASE(LIMEX_NFA_512, LimEx512, dbnt_func); \ + DISPATCH_CASE(MCCLELLAN_NFA_8, McClellan8, dbnt_func); \ + DISPATCH_CASE(MCCLELLAN_NFA_16, McClellan16, dbnt_func); \ + DISPATCH_CASE(GOUGH_NFA_8, Gough8, dbnt_func); \ + DISPATCH_CASE(GOUGH_NFA_16, Gough16, dbnt_func); \ + DISPATCH_CASE(MPV_NFA, Mpv, dbnt_func); \ + DISPATCH_CASE(LBR_NFA_DOT, LbrDot, dbnt_func); \ + DISPATCH_CASE(LBR_NFA_VERM, LbrVerm, dbnt_func); \ + DISPATCH_CASE(LBR_NFA_NVERM, LbrNVerm, dbnt_func); \ + DISPATCH_CASE(LBR_NFA_SHUF, LbrShuf, dbnt_func); \ + DISPATCH_CASE(LBR_NFA_TRUF, LbrTruf, dbnt_func); \ + DISPATCH_CASE(CASTLE_NFA, Castle, dbnt_func); \ + DISPATCH_CASE(SHENG_NFA, Sheng, dbnt_func); \ + DISPATCH_CASE(TAMARAMA_NFA, Tamarama, dbnt_func); \ + default: \ + assert(0); \ } char nfaCheckFinalState(const struct NFA *nfa, const char *state, diff --git a/src/nfa/nfa_build_util.cpp b/src/nfa/nfa_build_util.cpp index f3b5329d..3b235bf4 100644 --- a/src/nfa/nfa_build_util.cpp +++ b/src/nfa/nfa_build_util.cpp @@ -269,7 +269,7 @@ const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = d const char *NFATraits::name = "Goughfish 16"; #endif -template<> struct NFATraits { +template<> struct NFATraits { UNUSED static const char *name; static const NFACategory category = NFA_OTHER; static const u32 stateAlign = 8; @@ -278,14 +278,14 @@ template<> struct NFATraits { static const nfa_dispatch_fn has_repeats; static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; -const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; -const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; +const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) -const char *NFATraits::name = "Mega-Puff-Vac"; +const char *NFATraits::name = "Mega-Puff-Vac"; #endif -template<> struct NFATraits { +template<> struct NFATraits { UNUSED static const char *name; static const NFACategory category = NFA_OTHER; static const u32 stateAlign = 8; @@ -294,14 +294,14 @@ template<> struct NFATraits { static const nfa_dispatch_fn has_repeats; static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; -const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; -const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; +const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) -const char *NFATraits::name = "Castle"; +const char *NFATraits::name = "Castle"; #endif -template<> struct NFATraits { +template<> struct NFATraits { UNUSED static const char *name; static const NFACategory category = NFA_OTHER; static const u32 stateAlign = 8; @@ -310,14 +310,14 @@ template<> struct NFATraits { static const nfa_dispatch_fn has_repeats; static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; -const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; -const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; +const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) -const char *NFATraits::name = "Lim Bounded Repeat (D)"; +const char *NFATraits::name = "Lim Bounded Repeat (D)"; #endif -template<> struct NFATraits { +template<> struct NFATraits { UNUSED static const char *name; static const NFACategory category = NFA_OTHER; static const u32 stateAlign = 8; @@ -326,14 +326,14 @@ template<> struct NFATraits { static const nfa_dispatch_fn has_repeats; static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; -const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; -const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; +const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) -const char *NFATraits::name = "Lim Bounded Repeat (V)"; +const char *NFATraits::name = "Lim Bounded Repeat (V)"; #endif -template<> struct NFATraits { +template<> struct NFATraits { UNUSED static const char *name; static const NFACategory category = NFA_OTHER; static const u32 stateAlign = 8; @@ -342,14 +342,14 @@ template<> struct NFATraits { static const nfa_dispatch_fn has_repeats; static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; -const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; -const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; +const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) -const char *NFATraits::name = "Lim Bounded Repeat (NV)"; +const char *NFATraits::name = "Lim Bounded Repeat (NV)"; #endif -template<> struct NFATraits { +template<> struct NFATraits { UNUSED static const char *name; static const NFACategory category = NFA_OTHER; static const u32 stateAlign = 8; @@ -358,14 +358,14 @@ template<> struct NFATraits { static const nfa_dispatch_fn has_repeats; static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; -const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; -const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; +const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) -const char *NFATraits::name = "Lim Bounded Repeat (S)"; +const char *NFATraits::name = "Lim Bounded Repeat (S)"; #endif -template<> struct NFATraits { +template<> struct NFATraits { UNUSED static const char *name; static const NFACategory category = NFA_OTHER; static const u32 stateAlign = 8; @@ -374,14 +374,14 @@ template<> struct NFATraits { static const nfa_dispatch_fn has_repeats; static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; -const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; -const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; +const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) -const char *NFATraits::name = "Lim Bounded Repeat (M)"; +const char *NFATraits::name = "Lim Bounded Repeat (M)"; #endif -template<> struct NFATraits { +template<> struct NFATraits { UNUSED static const char *name; static const NFACategory category = NFA_OTHER; static const u32 stateAlign = 1; @@ -390,14 +390,14 @@ template<> struct NFATraits { static const nfa_dispatch_fn has_repeats; static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const nfa_dispatch_fn NFATraits::has_accel = has_accel_sheng; -const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; -const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; +const nfa_dispatch_fn NFATraits::has_accel = has_accel_sheng; +const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) -const char *NFATraits::name = "Sheng"; +const char *NFATraits::name = "Sheng"; #endif -template<> struct NFATraits { +template<> struct NFATraits { UNUSED static const char *name; static const NFACategory category = NFA_OTHER; static const u32 stateAlign = 32; @@ -406,11 +406,11 @@ template<> struct NFATraits { static const nfa_dispatch_fn has_repeats; static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; -const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; -const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; +const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) -const char *NFATraits::name = "Tamarama"; +const char *NFATraits::name = "Tamarama"; #endif } // namespace diff --git a/src/nfa/nfa_dump_dispatch.cpp b/src/nfa/nfa_dump_dispatch.cpp index f7a5e05d..84190232 100644 --- a/src/nfa/nfa_dump_dispatch.cpp +++ b/src/nfa/nfa_dump_dispatch.cpp @@ -49,37 +49,37 @@ namespace ue2 { -#define DISPATCH_CASE(dc_ltype, dc_ftype, dc_subtype, dc_func_call) \ - case dc_ltype##_NFA_##dc_subtype: \ - nfaExec##dc_ftype##dc_subtype##dc_func_call; \ +#define DISPATCH_CASE(dc_ltype, dc_ftype, dc_func_call) \ + case dc_ltype: \ + nfaExec##dc_ftype##dc_func_call; \ break // general framework calls -#define DISPATCH_BY_NFA_TYPE(dbnt_func) \ - DEBUG_PRINTF("dispatch for NFA type %u\n", nfa->type); \ - switch (nfa->type) { \ - DISPATCH_CASE(LIMEX, LimEx, 32, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 64, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 128, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 256, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 384, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 512, dbnt_func); \ - DISPATCH_CASE(MCCLELLAN, McClellan, 8, dbnt_func); \ - DISPATCH_CASE(MCCLELLAN, McClellan, 16, dbnt_func); \ - DISPATCH_CASE(GOUGH, Gough, 8, dbnt_func); \ - DISPATCH_CASE(GOUGH, Gough, 16, dbnt_func); \ - DISPATCH_CASE(MPV, Mpv, 0, dbnt_func); \ - DISPATCH_CASE(LBR, Lbr, Dot, dbnt_func); \ - DISPATCH_CASE(LBR, Lbr, Verm, dbnt_func); \ - DISPATCH_CASE(LBR, Lbr, NVerm, dbnt_func); \ - DISPATCH_CASE(LBR, Lbr, Shuf, dbnt_func); \ - DISPATCH_CASE(LBR, Lbr, Truf, dbnt_func); \ - DISPATCH_CASE(CASTLE, Castle, 0, dbnt_func); \ - DISPATCH_CASE(SHENG, Sheng, 0, dbnt_func); \ - DISPATCH_CASE(TAMARAMA, Tamarama, 0, dbnt_func); \ - default: \ - assert(0); \ +#define DISPATCH_BY_NFA_TYPE(dbnt_func) \ + DEBUG_PRINTF("dispatch for NFA type %u\n", nfa->type); \ + switch (nfa->type) { \ + DISPATCH_CASE(LIMEX_NFA_32, LimEx32, dbnt_func); \ + DISPATCH_CASE(LIMEX_NFA_64, LimEx64, dbnt_func); \ + DISPATCH_CASE(LIMEX_NFA_128, LimEx128, dbnt_func); \ + DISPATCH_CASE(LIMEX_NFA_256, LimEx256, dbnt_func); \ + DISPATCH_CASE(LIMEX_NFA_384, LimEx384, dbnt_func); \ + DISPATCH_CASE(LIMEX_NFA_512, LimEx512, dbnt_func); \ + DISPATCH_CASE(MCCLELLAN_NFA_8, McClellan8, dbnt_func); \ + DISPATCH_CASE(MCCLELLAN_NFA_16, McClellan16, dbnt_func); \ + DISPATCH_CASE(GOUGH_NFA_8, Gough8, dbnt_func); \ + DISPATCH_CASE(GOUGH_NFA_16, Gough16, dbnt_func); \ + DISPATCH_CASE(MPV_NFA, Mpv, dbnt_func); \ + DISPATCH_CASE(LBR_NFA_DOT, LbrDot, dbnt_func); \ + DISPATCH_CASE(LBR_NFA_VERM, LbrVerm, dbnt_func); \ + DISPATCH_CASE(LBR_NFA_NVERM, LbrNVerm, dbnt_func); \ + DISPATCH_CASE(LBR_NFA_SHUF, LbrShuf, dbnt_func); \ + DISPATCH_CASE(LBR_NFA_TRUF, LbrTruf, dbnt_func); \ + DISPATCH_CASE(CASTLE_NFA, Castle, dbnt_func); \ + DISPATCH_CASE(SHENG_NFA, Sheng, dbnt_func); \ + DISPATCH_CASE(TAMARAMA_NFA, Tamarama, dbnt_func); \ + default: \ + assert(0); \ } void nfaDumpDot(const struct NFA *nfa, FILE *dotFile, diff --git a/src/nfa/nfa_internal.h b/src/nfa/nfa_internal.h index 58c3da6c..1ce566ff 100644 --- a/src/nfa/nfa_internal.h +++ b/src/nfa/nfa_internal.h @@ -61,15 +61,15 @@ enum NFAEngineType { MCCLELLAN_NFA_16, /**< magic pseudo nfa */ GOUGH_NFA_8, /**< magic pseudo nfa */ GOUGH_NFA_16, /**< magic pseudo nfa */ - MPV_NFA_0, /**< magic pseudo nfa */ - LBR_NFA_Dot, /**< magic pseudo nfa */ - LBR_NFA_Verm, /**< magic pseudo nfa */ - LBR_NFA_NVerm, /**< magic pseudo nfa */ - LBR_NFA_Shuf, /**< magic pseudo nfa */ - LBR_NFA_Truf, /**< magic pseudo nfa */ - CASTLE_NFA_0, /**< magic pseudo nfa */ - SHENG_NFA_0, /**< magic pseudo nfa */ - TAMARAMA_NFA_0, /**< magic nfa container */ + MPV_NFA, /**< magic pseudo nfa */ + LBR_NFA_DOT, /**< magic pseudo nfa */ + LBR_NFA_VERM, /**< magic pseudo nfa */ + LBR_NFA_NVERM, /**< magic pseudo nfa */ + LBR_NFA_SHUF, /**< magic pseudo nfa */ + LBR_NFA_TRUF, /**< magic pseudo nfa */ + CASTLE_NFA, /**< magic pseudo nfa */ + SHENG_NFA, /**< magic pseudo nfa */ + TAMARAMA_NFA, /**< magic nfa container */ /** \brief bogus NFA - not used */ INVALID_NFA }; @@ -150,7 +150,7 @@ static really_inline int isGoughType(u8 t) { /** \brief True if the given type (from NFA::type) is a Sheng DFA. */ static really_inline int isShengType(u8 t) { - return t == SHENG_NFA_0; + return t == SHENG_NFA; } /** @@ -180,14 +180,14 @@ static really_inline int isNfaType(u8 t) { /** \brief True if the given type (from NFA::type) is an LBR. */ static really_inline int isLbrType(u8 t) { - return t == LBR_NFA_Dot || t == LBR_NFA_Verm || t == LBR_NFA_NVerm || - t == LBR_NFA_Shuf || t == LBR_NFA_Truf; + return t == LBR_NFA_DOT || t == LBR_NFA_VERM || t == LBR_NFA_NVERM || + t == LBR_NFA_SHUF || t == LBR_NFA_TRUF; } /** \brief True if the given type (from NFA::type) is a container engine. */ static really_inline int isContainerType(u8 t) { - return t == TAMARAMA_NFA_0; + return t == TAMARAMA_NFA; } static really_inline @@ -202,14 +202,14 @@ int isMultiTopType(u8 t) { /* Use for functions that return an integer. */ #define NFA_API_NO_IMPL(...) \ ({ \ - assert(!"not implemented for this engine!"); \ + assert(!"not implemented for this engine!"); \ 0; /* return value, for places that need it */ \ }) /* Use for _zombie_status functions. */ #define NFA_API_ZOMBIE_NO_IMPL(...) \ ({ \ - assert(!"not implemented for this engine!"); \ + assert(!"not implemented for this engine!"); \ NFA_ZOMBIE_NO; \ }) diff --git a/src/nfa/sheng.c b/src/nfa/sheng.c index a5f96805..837aa7df 100644 --- a/src/nfa/sheng.c +++ b/src/nfa/sheng.c @@ -504,10 +504,10 @@ char runSheng(const struct sheng *sh, struct mq *q, s64a b_end, } } -char nfaExecSheng0_B(const struct NFA *n, u64a offset, const u8 *buffer, - size_t length, NfaCallback cb, void *context) { +char nfaExecSheng_B(const struct NFA *n, u64a offset, const u8 *buffer, + size_t length, NfaCallback cb, void *context) { DEBUG_PRINTF("smallwrite Sheng\n"); - assert(n->type == SHENG_NFA_0); + assert(n->type == SHENG_NFA); const struct sheng *sh = getImplNfa(n); u8 state = sh->anchored; u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE; @@ -543,32 +543,31 @@ char nfaExecSheng0_B(const struct NFA *n, u64a offset, const u8 *buffer, return state & SHENG_STATE_DEAD ? MO_DEAD : MO_ALIVE; } -char nfaExecSheng0_Q(const struct NFA *n, struct mq *q, s64a end) { +char nfaExecSheng_Q(const struct NFA *n, struct mq *q, s64a end) { const struct sheng *sh = get_sheng(n); char rv = runSheng(sh, q, end, CALLBACK_OUTPUT); return rv; } -char nfaExecSheng0_Q2(const struct NFA *n, struct mq *q, s64a end) { +char nfaExecSheng_Q2(const struct NFA *n, struct mq *q, s64a end) { const struct sheng *sh = get_sheng(n); char rv = runSheng(sh, q, end, STOP_AT_MATCH); return rv; } -char nfaExecSheng0_QR(const struct NFA *n, struct mq *q, ReportID report) { +char nfaExecSheng_QR(const struct NFA *n, struct mq *q, ReportID report) { assert(q_cur_type(q) == MQE_START); const struct sheng *sh = get_sheng(n); char rv = runSheng(sh, q, 0 /* end */, NO_MATCHES); - if (rv && nfaExecSheng0_inAccept(n, report, q)) { + if (rv && nfaExecSheng_inAccept(n, report, q)) { return MO_MATCHES_PENDING; } return rv; } -char nfaExecSheng0_inAccept(const struct NFA *n, ReportID report, - struct mq *q) { +char nfaExecSheng_inAccept(const struct NFA *n, ReportID report, struct mq *q) { assert(n && q); const struct sheng *sh = get_sheng(n); @@ -584,7 +583,7 @@ char nfaExecSheng0_inAccept(const struct NFA *n, ReportID report, return shengHasAccept(sh, aux, report); } -char nfaExecSheng0_inAnyAccept(const struct NFA *n, struct mq *q) { +char nfaExecSheng_inAnyAccept(const struct NFA *n, struct mq *q) { assert(n && q); const struct sheng *sh = get_sheng(n); @@ -595,9 +594,9 @@ char nfaExecSheng0_inAnyAccept(const struct NFA *n, struct mq *q) { return !!aux->accept; } -char nfaExecSheng0_testEOD(const struct NFA *nfa, const char *state, - UNUSED const char *streamState, u64a offset, - NfaCallback cb, void *ctxt) { +char nfaExecSheng_testEOD(const struct NFA *nfa, const char *state, + UNUSED const char *streamState, u64a offset, + NfaCallback cb, void *ctxt) { assert(nfa); const struct sheng *sh = get_sheng(nfa); @@ -613,7 +612,7 @@ char nfaExecSheng0_testEOD(const struct NFA *nfa, const char *state, return fireReports(sh, cb, ctxt, s, offset, NULL, NULL, 1); } -char nfaExecSheng0_reportCurrent(const struct NFA *n, struct mq *q) { +char nfaExecSheng_reportCurrent(const struct NFA *n, struct mq *q) { const struct sheng *sh = (const struct sheng *)getImplNfa(n); NfaCallback cb = q->cb; void *ctxt = q->context; @@ -636,15 +635,15 @@ char nfaExecSheng0_reportCurrent(const struct NFA *n, struct mq *q) { return 0; } -char nfaExecSheng0_initCompressedState(const struct NFA *nfa, u64a offset, - void *state, UNUSED u8 key) { +char nfaExecSheng_initCompressedState(const struct NFA *nfa, u64a offset, + void *state, UNUSED u8 key) { const struct sheng *sh = get_sheng(nfa); u8 *s = (u8 *)state; *s = offset ? sh->floating: sh->anchored; return !(*s & SHENG_STATE_DEAD); } -char nfaExecSheng0_queueInitState(const struct NFA *nfa, struct mq *q) { +char nfaExecSheng_queueInitState(const struct NFA *nfa, struct mq *q) { assert(nfa->scratchStateSize == 1); /* starting in floating state */ @@ -654,8 +653,8 @@ char nfaExecSheng0_queueInitState(const struct NFA *nfa, struct mq *q) { return 0; } -char nfaExecSheng0_queueCompressState(UNUSED const struct NFA *nfa, - const struct mq *q, UNUSED s64a loc) { +char nfaExecSheng_queueCompressState(UNUSED const struct NFA *nfa, + const struct mq *q, UNUSED s64a loc) { void *dest = q->streamState; const void *src = q->state; assert(nfa->scratchStateSize == 1); @@ -664,9 +663,9 @@ char nfaExecSheng0_queueCompressState(UNUSED const struct NFA *nfa, return 0; } -char nfaExecSheng0_expandState(UNUSED const struct NFA *nfa, void *dest, - const void *src, UNUSED u64a offset, - UNUSED u8 key) { +char nfaExecSheng_expandState(UNUSED const struct NFA *nfa, void *dest, + const void *src, UNUSED u64a offset, + UNUSED u8 key) { assert(nfa->scratchStateSize == 1); assert(nfa->streamStateSize == 1); *(u8 *)dest = *(const u8 *)src; diff --git a/src/nfa/sheng.h b/src/nfa/sheng.h index 46ead180..84a2b6b5 100644 --- a/src/nfa/sheng.h +++ b/src/nfa/sheng.h @@ -35,27 +35,27 @@ struct mq; struct NFA; -#define nfaExecSheng0_B_Reverse NFA_API_NO_IMPL -#define nfaExecSheng0_zombie_status NFA_API_ZOMBIE_NO_IMPL +#define nfaExecSheng_B_Reverse NFA_API_NO_IMPL +#define nfaExecSheng_zombie_status NFA_API_ZOMBIE_NO_IMPL -char nfaExecSheng0_Q(const struct NFA *n, struct mq *q, s64a end); -char nfaExecSheng0_Q2(const struct NFA *n, struct mq *q, s64a end); -char nfaExecSheng0_QR(const struct NFA *n, struct mq *q, ReportID report); -char nfaExecSheng0_inAccept(const struct NFA *n, ReportID report, struct mq *q); -char nfaExecSheng0_inAnyAccept(const struct NFA *n, struct mq *q); -char nfaExecSheng0_queueInitState(const struct NFA *nfa, struct mq *q); -char nfaExecSheng0_queueCompressState(const struct NFA *nfa, const struct mq *q, - s64a loc); -char nfaExecSheng0_expandState(const struct NFA *nfa, void *dest, - const void *src, u64a offset, u8 key); -char nfaExecSheng0_initCompressedState(const struct NFA *nfa, u64a offset, - void *state, u8 key); -char nfaExecSheng0_testEOD(const struct NFA *nfa, const char *state, - const char *streamState, u64a offset, - NfaCallback callback, void *context); -char nfaExecSheng0_reportCurrent(const struct NFA *n, struct mq *q); +char nfaExecSheng_Q(const struct NFA *n, struct mq *q, s64a end); +char nfaExecSheng_Q2(const struct NFA *n, struct mq *q, s64a end); +char nfaExecSheng_QR(const struct NFA *n, struct mq *q, ReportID report); +char nfaExecSheng_inAccept(const struct NFA *n, ReportID report, struct mq *q); +char nfaExecSheng_inAnyAccept(const struct NFA *n, struct mq *q); +char nfaExecSheng_queueInitState(const struct NFA *nfa, struct mq *q); +char nfaExecSheng_queueCompressState(const struct NFA *nfa, const struct mq *q, + s64a loc); +char nfaExecSheng_expandState(const struct NFA *nfa, void *dest, + const void *src, u64a offset, u8 key); +char nfaExecSheng_initCompressedState(const struct NFA *nfa, u64a offset, + void *state, u8 key); +char nfaExecSheng_testEOD(const struct NFA *nfa, const char *state, + const char *streamState, u64a offset, + NfaCallback callback, void *context); +char nfaExecSheng_reportCurrent(const struct NFA *n, struct mq *q); -char nfaExecSheng0_B(const struct NFA *n, u64a offset, const u8 *buffer, +char nfaExecSheng_B(const struct NFA *n, u64a offset, const u8 *buffer, size_t length, NfaCallback cb, void *context); #endif /* SHENG_H_ */ diff --git a/src/nfa/shengcompile.cpp b/src/nfa/shengcompile.cpp index 911f6d70..3902dbaf 100644 --- a/src/nfa/shengcompile.cpp +++ b/src/nfa/shengcompile.cpp @@ -358,7 +358,7 @@ void populateBasicInfo(struct NFA *n, dfa_info &info, n->scratchStateSize = 1; n->streamStateSize = 1; n->nPositions = info.size(); - n->type = SHENG_NFA_0; + n->type = SHENG_NFA; n->flags |= info.raw.hasEodReports() ? NFA_ACCEPTS_EOD : 0; sheng *s = (sheng *)getMutableImplNfa(n); diff --git a/src/nfa/shengdump.cpp b/src/nfa/shengdump.cpp index 037dfb05..a7e7fc2b 100644 --- a/src/nfa/shengdump.cpp +++ b/src/nfa/shengdump.cpp @@ -115,8 +115,8 @@ void dumpMasks(FILE *f, const sheng *s) { } } -void nfaExecSheng0_dumpText(const NFA *nfa, FILE *f) { - assert(nfa->type == SHENG_NFA_0); +void nfaExecSheng_dumpText(const NFA *nfa, FILE *f) { + assert(nfa->type == SHENG_NFA); const sheng *s = (const sheng *)getImplNfa(nfa); fprintf(f, "sheng DFA\n"); @@ -243,8 +243,8 @@ void shengGetTransitions(const NFA *n, u16 state, u16 *t) { t[TOP] = aux->top & SHENG_STATE_MASK; } -void nfaExecSheng0_dumpDot(const NFA *nfa, FILE *f, const string &) { - assert(nfa->type == SHENG_NFA_0); +void nfaExecSheng_dumpDot(const NFA *nfa, FILE *f, const string &) { + assert(nfa->type == SHENG_NFA); const sheng *s = (const sheng *)getImplNfa(nfa); dumpDotPreambleDfa(f); diff --git a/src/nfa/shengdump.h b/src/nfa/shengdump.h index 5334894f..008d2aba 100644 --- a/src/nfa/shengdump.h +++ b/src/nfa/shengdump.h @@ -38,9 +38,9 @@ struct NFA; namespace ue2 { -void nfaExecSheng0_dumpDot(const struct NFA *nfa, FILE *file, - const std::string &base); -void nfaExecSheng0_dumpText(const struct NFA *nfa, FILE *file); +void nfaExecSheng_dumpDot(const struct NFA *nfa, FILE *file, + const std::string &base); +void nfaExecSheng_dumpText(const struct NFA *nfa, FILE *file); } // namespace ue2 diff --git a/src/nfa/tamarama.c b/src/nfa/tamarama.c index b5f90e85..43480f06 100644 --- a/src/nfa/tamarama.c +++ b/src/nfa/tamarama.c @@ -265,9 +265,9 @@ void copyBack(const struct Tamarama *t, struct mq *q, struct mq *q1) { #endif } -char nfaExecTamarama0_testEOD(const struct NFA *n, const char *state, - const char *streamState, u64a offset, - NfaCallback callback, void *context) { +char nfaExecTamarama_testEOD(const struct NFA *n, const char *state, + const char *streamState, u64a offset, + NfaCallback callback, void *context) { const struct Tamarama *t = getImplNfa(n); u32 activeIdx = loadActiveIdx(streamState, t->activeIdxSize); if (activeIdx == t->numSubEngines) { @@ -285,8 +285,7 @@ char nfaExecTamarama0_testEOD(const struct NFA *n, const char *state, return MO_CONTINUE_MATCHING; } -char nfaExecTamarama0_QR(const struct NFA *n, struct mq *q, - ReportID report) { +char nfaExecTamarama_QR(const struct NFA *n, struct mq *q, ReportID report) { DEBUG_PRINTF("exec rose\n"); struct mq q1; q1.cur = q1.end = 0; @@ -304,7 +303,7 @@ char nfaExecTamarama0_QR(const struct NFA *n, struct mq *q, return rv; } -char nfaExecTamarama0_reportCurrent(const struct NFA *n, struct mq *q) { +char nfaExecTamarama_reportCurrent(const struct NFA *n, struct mq *q) { const struct Tamarama *t = getImplNfa(n); u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize); if (activeIdx == t->numSubEngines) { @@ -317,8 +316,8 @@ char nfaExecTamarama0_reportCurrent(const struct NFA *n, struct mq *q) { return nfaReportCurrentMatches(sub, &q1); } -char nfaExecTamarama0_inAccept(const struct NFA *n, ReportID report, - struct mq *q) { +char nfaExecTamarama_inAccept(const struct NFA *n, ReportID report, + struct mq *q) { const struct Tamarama *t = getImplNfa(n); u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize); if (activeIdx == t->numSubEngines) { @@ -331,7 +330,7 @@ char nfaExecTamarama0_inAccept(const struct NFA *n, ReportID report, return nfaInAcceptState(sub, report, &q1); } -char nfaExecTamarama0_inAnyAccept(const struct NFA *n, struct mq *q) { +char nfaExecTamarama_inAnyAccept(const struct NFA *n, struct mq *q) { const struct Tamarama *t = getImplNfa(n); u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize); if (activeIdx == t->numSubEngines) { @@ -344,7 +343,7 @@ char nfaExecTamarama0_inAnyAccept(const struct NFA *n, struct mq *q) { return nfaInAnyAcceptState(sub, &q1); } -char nfaExecTamarama0_queueInitState(const struct NFA *n, struct mq *q) { +char nfaExecTamarama_queueInitState(const struct NFA *n, struct mq *q) { DEBUG_PRINTF("init state\n"); const struct Tamarama *t = getImplNfa(n); char *ptr = q->streamState; @@ -354,8 +353,8 @@ char nfaExecTamarama0_queueInitState(const struct NFA *n, struct mq *q) { return 0; } -char nfaExecTamarama0_queueCompressState(const struct NFA *n, - const struct mq *q, s64a loc) { +char nfaExecTamarama_queueCompressState(const struct NFA *n, const struct mq *q, + s64a loc) { const struct Tamarama *t = getImplNfa(n); u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize); if (activeIdx == t->numSubEngines) { @@ -369,8 +368,8 @@ char nfaExecTamarama0_queueCompressState(const struct NFA *n, return nfaQueueCompressState(sub, &q1, loc); } -char nfaExecTamarama0_expandState(const struct NFA *n, void *dest, - const void *src, u64a offset, u8 key) { +char nfaExecTamarama_expandState(const struct NFA *n, void *dest, + const void *src, u64a offset, u8 key) { const struct Tamarama *t = getImplNfa(n); u32 activeIdx = loadActiveIdx(src, t->activeIdxSize); if (activeIdx == t->numSubEngines) { @@ -383,8 +382,8 @@ char nfaExecTamarama0_expandState(const struct NFA *n, void *dest, return nfaExpandState(sub, dest, subStreamState, offset, key); } -enum nfa_zombie_status nfaExecTamarama0_zombie_status(const struct NFA *n, - struct mq *q, s64a loc) { +enum nfa_zombie_status nfaExecTamarama_zombie_status(const struct NFA *n, + struct mq *q, s64a loc) { const struct Tamarama *t = getImplNfa(n); u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize); if (activeIdx == t->numSubEngines) { @@ -397,7 +396,7 @@ enum nfa_zombie_status nfaExecTamarama0_zombie_status(const struct NFA *n, return nfaGetZombieStatus(sub, &q1, loc); } -char nfaExecTamarama0_Q(const struct NFA *n, struct mq *q, s64a end) { +char nfaExecTamarama_Q(const struct NFA *n, struct mq *q, s64a end) { DEBUG_PRINTF("exec\n"); struct mq q1; char rv = MO_ALIVE; @@ -418,8 +417,7 @@ char nfaExecTamarama0_Q(const struct NFA *n, struct mq *q, s64a end) { return rv; } -char nfaExecTamarama0_Q2(const struct NFA *n, - struct mq *q, s64a end) { +char nfaExecTamarama_Q2(const struct NFA *n, struct mq *q, s64a end) { DEBUG_PRINTF("exec to match\n"); struct mq q1; char rv = 0; diff --git a/src/nfa/tamarama.h b/src/nfa/tamarama.h index 7ccfa5a0..3b52d8de 100644 --- a/src/nfa/tamarama.h +++ b/src/nfa/tamarama.h @@ -41,28 +41,27 @@ struct mq; struct NFA; struct hs_scratch; -char nfaExecTamarama0_testEOD(const struct NFA *n, const char *state, - const char *streamState, u64a offset, - NfaCallback callback, void *context); -char nfaExecTamarama0_QR(const struct NFA *n, struct mq *q, ReportID report); -char nfaExecTamarama0_reportCurrent(const struct NFA *n, struct mq *q); -char nfaExecTamarama0_inAccept(const struct NFA *n, ReportID report, - struct mq *q); -char nfaExecTamarama0_inAnyAccept(const struct NFA *n, struct mq *q); -char nfaExecTamarama0_queueInitState(const struct NFA *n, struct mq *q); -char nfaExecTamarama0_queueCompressState(const struct NFA *n, - const struct mq *q, - s64a loc); -char nfaExecTamarama0_expandState(const struct NFA *n, void *dest, - const void *src, u64a offset, u8 key); -enum nfa_zombie_status nfaExecTamarama0_zombie_status(const struct NFA *n, - struct mq *q, s64a loc); -char nfaExecTamarama0_Q(const struct NFA *nfa, struct mq *q, s64a end); -char nfaExecTamarama0_Q2(const struct NFA *nfa, struct mq *q, s64a end); +char nfaExecTamarama_testEOD(const struct NFA *n, const char *state, + const char *streamState, u64a offset, + NfaCallback callback, void *context); +char nfaExecTamarama_QR(const struct NFA *n, struct mq *q, ReportID report); +char nfaExecTamarama_reportCurrent(const struct NFA *n, struct mq *q); +char nfaExecTamarama_inAccept(const struct NFA *n, ReportID report, + struct mq *q); +char nfaExecTamarama_inAnyAccept(const struct NFA *n, struct mq *q); +char nfaExecTamarama_queueInitState(const struct NFA *n, struct mq *q); +char nfaExecTamarama_queueCompressState(const struct NFA *n, const struct mq *q, + s64a loc); +char nfaExecTamarama_expandState(const struct NFA *n, void *dest, + const void *src, u64a offset, u8 key); +enum nfa_zombie_status nfaExecTamarama_zombie_status(const struct NFA *n, + struct mq *q, s64a loc); +char nfaExecTamarama_Q(const struct NFA *nfa, struct mq *q, s64a end); +char nfaExecTamarama_Q2(const struct NFA *nfa, struct mq *q, s64a end); // only used by outfix and miracles, no implementation for tamarama -#define nfaExecTamarama0_initCompressedState NFA_API_NO_IMPL -#define nfaExecTamarama0_B_Reverse NFA_API_NO_IMPL +#define nfaExecTamarama_initCompressedState NFA_API_NO_IMPL +#define nfaExecTamarama_B_Reverse NFA_API_NO_IMPL #ifdef __cplusplus } diff --git a/src/nfa/tamarama_dump.cpp b/src/nfa/tamarama_dump.cpp index 181fa9af..f03b842c 100644 --- a/src/nfa/tamarama_dump.cpp +++ b/src/nfa/tamarama_dump.cpp @@ -48,8 +48,8 @@ namespace ue2 { -void nfaExecTamarama0_dumpDot(const struct NFA *nfa, UNUSED FILE *f, - const std::string &base) { +void nfaExecTamarama_dumpDot(const struct NFA *nfa, UNUSED FILE *f, + const std::string &base) { const Tamarama *t = (const Tamarama *)getImplNfa(nfa); const u32 *subOffset = (const u32 *)((const char *)t + sizeof(struct Tamarama) + @@ -65,7 +65,7 @@ void nfaExecTamarama0_dumpDot(const struct NFA *nfa, UNUSED FILE *f, } } -void nfaExecTamarama0_dumpText(const struct NFA *nfa, FILE *f) { +void nfaExecTamarama_dumpText(const struct NFA *nfa, FILE *f) { const Tamarama *t = (const Tamarama *)getImplNfa(nfa); fprintf(f, "Tamarama container engine\n"); diff --git a/src/nfa/tamarama_dump.h b/src/nfa/tamarama_dump.h index dc976004..6e3f80ca 100644 --- a/src/nfa/tamarama_dump.h +++ b/src/nfa/tamarama_dump.h @@ -38,9 +38,9 @@ struct NFA; namespace ue2 { -void nfaExecTamarama0_dumpDot(const NFA *nfa, FILE *file, - const std::string &base); -void nfaExecTamarama0_dumpText(const NFA *nfa, FILE *file); +void nfaExecTamarama_dumpDot(const NFA *nfa, FILE *file, + const std::string &base); +void nfaExecTamarama_dumpText(const NFA *nfa, FILE *file); } // namespace ue2 diff --git a/src/nfa/tamaramacompile.cpp b/src/nfa/tamaramacompile.cpp index 521c9bb2..c28caacb 100644 --- a/src/nfa/tamaramacompile.cpp +++ b/src/nfa/tamaramacompile.cpp @@ -134,7 +134,7 @@ aligned_unique_ptr buildTamarama(const TamaInfo &tamaInfo, const u32 queue, // so add one to subSize here u32 activeIdxSize = calcPackedBytes(subSize + 1); aligned_unique_ptr nfa = aligned_zmalloc_unique(total_size); - nfa->type = verify_u8(TAMARAMA_NFA_0); + nfa->type = verify_u8(TAMARAMA_NFA); nfa->length = verify_u32(total_size); nfa->queueIndex = queue; diff --git a/src/nfagraph/ng_lbr.cpp b/src/nfagraph/ng_lbr.cpp index d7183817..d68c7681 100644 --- a/src/nfagraph/ng_lbr.cpp +++ b/src/nfagraph/ng_lbr.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -153,8 +153,7 @@ aligned_unique_ptr buildLbrDot(const CharReach &cr, const depth &repeatMin, enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, is_reset); - aligned_unique_ptr nfa - = makeLbrNfa(LBR_NFA_Dot, rtype, repeatMax); + auto nfa = makeLbrNfa(LBR_NFA_DOT, rtype, repeatMax); struct lbr_dot *ld = (struct lbr_dot *)getMutableImplNfa(nfa.get()); fillNfa(nfa.get(), &ld->common, report, repeatMin, repeatMax, @@ -177,8 +176,7 @@ aligned_unique_ptr buildLbrVerm(const CharReach &cr, enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, is_reset); - aligned_unique_ptr nfa - = makeLbrNfa(LBR_NFA_Verm, rtype, repeatMax); + auto nfa = makeLbrNfa(LBR_NFA_VERM, rtype, repeatMax); struct lbr_verm *lv = (struct lbr_verm *)getMutableImplNfa(nfa.get()); lv->c = escapes.find_first(); @@ -202,8 +200,7 @@ aligned_unique_ptr buildLbrNVerm(const CharReach &cr, enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, is_reset); - aligned_unique_ptr nfa - = makeLbrNfa(LBR_NFA_NVerm, rtype, repeatMax); + auto nfa = makeLbrNfa(LBR_NFA_NVERM, rtype, repeatMax); struct lbr_verm *lv = (struct lbr_verm *)getMutableImplNfa(nfa.get()); lv->c = escapes.find_first(); @@ -221,8 +218,7 @@ aligned_unique_ptr buildLbrShuf(const CharReach &cr, bool is_reset, ReportID report) { enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, is_reset); - aligned_unique_ptr nfa - = makeLbrNfa(LBR_NFA_Shuf, rtype, repeatMax); + auto nfa = makeLbrNfa(LBR_NFA_SHUF, rtype, repeatMax); struct lbr_shuf *ls = (struct lbr_shuf *)getMutableImplNfa(nfa.get()); fillNfa(nfa.get(), &ls->common, report, repeatMin, repeatMax, @@ -243,8 +239,7 @@ aligned_unique_ptr buildLbrTruf(const CharReach &cr, bool is_reset, ReportID report) { enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, is_reset); - aligned_unique_ptr nfa - = makeLbrNfa(LBR_NFA_Truf, rtype, repeatMax); + auto nfa = makeLbrNfa(LBR_NFA_TRUF, rtype, repeatMax); struct lbr_truf *lc = (struct lbr_truf *)getMutableImplNfa(nfa.get()); fillNfa(nfa.get(), &lc->common, report, repeatMin, repeatMax, diff --git a/src/rose/catchup.c b/src/rose/catchup.c index 017a6bf0..82537241 100644 --- a/src/rose/catchup.c +++ b/src/rose/catchup.c @@ -401,7 +401,7 @@ hwlmcb_rv_t roseCatchUpMPV_i(const struct RoseEngine *t, s64a loc, scratch->tctxt.mpv_inactive = 0; /* we know it is going to be an mpv, skip the indirection */ - next_pos_match_loc = nfaExecMpv0_QueueExecRaw(q->nfa, q, loc); + next_pos_match_loc = nfaExecMpv_QueueExecRaw(q->nfa, q, loc); assert(!q->report_current); if (!next_pos_match_loc) { /* 0 means dead */ @@ -441,7 +441,7 @@ char in_mpv(const struct RoseEngine *rose, const struct hs_scratch *scratch) { const struct RoseContext *tctxt = &scratch->tctxt; assert(tctxt->curr_qi < rose->queueCount); if (tctxt->curr_qi < rose->outfixBeginQueue) { - assert(getNfaByQueue(rose, tctxt->curr_qi)->type == MPV_NFA_0); + assert(getNfaByQueue(rose, tctxt->curr_qi)->type == MPV_NFA); return 1; } return 0; diff --git a/src/runtime.c b/src/runtime.c index 30745d81..d8e2f28d 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -291,12 +291,12 @@ void runSmallWriteEngine(const struct SmallWriteEngine *smwr, if (nfa->type == MCCLELLAN_NFA_8) { nfaExecMcClellan8_B(nfa, smwr->start_offset, local_buffer, local_alen, roseReportAdaptor, scratch); - } else if (nfa->type == MCCLELLAN_NFA_16){ + } else if (nfa->type == MCCLELLAN_NFA_16) { nfaExecMcClellan16_B(nfa, smwr->start_offset, local_buffer, local_alen, roseReportAdaptor, scratch); } else { - nfaExecSheng0_B(nfa, smwr->start_offset, local_buffer, - local_alen, roseReportAdaptor, scratch); + nfaExecSheng_B(nfa, smwr->start_offset, local_buffer, + local_alen, roseReportAdaptor, scratch); } } From 32c826e9c6ab69586f386ee7b83887f52686f6e3 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Wed, 2 Nov 2016 10:36:24 +1100 Subject: [PATCH 059/103] have single dump function per engine --- CMakeLists.txt | 2 + src/nfa/castle_dump.cpp | 15 +++++--- src/nfa/castle_dump.h | 4 +- src/nfa/goughdump.cpp | 31 +++++++++++++-- src/nfa/goughdump.h | 8 +--- src/nfa/lbr_dump.cpp | 57 ++++++++++++++-------------- src/nfa/lbr_dump.h | 21 +++-------- src/nfa/limex.h | 4 +- src/nfa/limex_dump.cpp | 40 +++++++++----------- src/nfa/mcclellandump.cpp | 31 +++++++++++++-- src/nfa/mcclellandump.h | 10 ++--- src/nfa/mpv_dump.cpp | 14 ++++--- src/nfa/mpv_dump.h | 5 +-- src/nfa/nfa_dump_api.h | 11 ++---- src/nfa/nfa_dump_dispatch.cpp | 9 +---- src/nfa/shengdump.cpp | 15 +++++++- src/nfa/shengdump.h | 5 +-- src/nfa/tamarama_dump.cpp | 30 +++++---------- src/nfa/tamarama_dump.h | 5 +-- src/rose/rose_dump.cpp | 60 ++++++++---------------------- src/smallwrite/smallwrite_dump.cpp | 11 +----- src/util/dump_util.cpp | 42 +++++++++++++++++++++ src/util/dump_util.h | 39 +++++++++++++++++++ 23 files changed, 262 insertions(+), 207 deletions(-) create mode 100644 src/util/dump_util.cpp create mode 100644 src/util/dump_util.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 8def2baf..52d54955 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -986,6 +986,8 @@ set(hs_dump_SRCS src/rose/rose_dump.h src/util/dump_charclass.cpp src/util/dump_charclass.h + src/util/dump_util.cpp + src/util/dump_util.h ) if (DUMP_SUPPORT) diff --git a/src/nfa/castle_dump.cpp b/src/nfa/castle_dump.cpp index 6d51b2ce..9426b6db 100644 --- a/src/nfa/castle_dump.cpp +++ b/src/nfa/castle_dump.cpp @@ -40,18 +40,18 @@ #include "shufticompile.h" #include "trufflecompile.h" #include "util/charreach.h" +#include "util/dump_util.h" #include "util/dump_charclass.h" #ifndef DUMP_SUPPORT #error No dump support! #endif -namespace ue2 { +/* Note: No dot files for castle */ -void nfaExecCastle_dumpDot(const struct NFA *, FILE *, - UNUSED const std::string &base) { - // No GraphViz output for Castles. -} +using namespace std; + +namespace ue2 { static void dumpTextSubCastle(const SubCastle &sub, FILE *f) { @@ -68,9 +68,11 @@ void dumpTextSubCastle(const SubCastle &sub, FILE *f) { fprintf(f, "\n"); } -void nfaExecCastle_dumpText(const struct NFA *nfa, FILE *f) { +void nfaExecCastle_dump(const struct NFA *nfa, const string &base) { const Castle *c = (const Castle *)getImplNfa(nfa); + FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); + fprintf(f, "Castle multi-tenant repeat engine\n"); fprintf(f, "\n"); fprintf(f, "Number of repeat tenants: %u\n", c->numRepeats); @@ -113,6 +115,7 @@ void nfaExecCastle_dumpText(const struct NFA *nfa, FILE *f) { fprintf(f, "Sub %u:\n", i); dumpTextSubCastle(sub[i], f); } + fclose(f); } } // namespace ue2 diff --git a/src/nfa/castle_dump.h b/src/nfa/castle_dump.h index d5ec7d3d..06e7e36e 100644 --- a/src/nfa/castle_dump.h +++ b/src/nfa/castle_dump.h @@ -31,15 +31,13 @@ #if defined(DUMP_SUPPORT) -#include #include struct NFA; namespace ue2 { -void nfaExecCastle_dumpDot(const NFA *nfa, FILE *file, const std::string &base); -void nfaExecCastle_dumpText(const NFA *nfa, FILE *file); +void nfaExecCastle_dump(const NFA *nfa, const std::string &base); } // namespace ue2 diff --git a/src/nfa/goughdump.cpp b/src/nfa/goughdump.cpp index 4e6e5425..1b37a0b1 100644 --- a/src/nfa/goughdump.cpp +++ b/src/nfa/goughdump.cpp @@ -37,6 +37,7 @@ #include "ue2common.h" #include "util/charreach.h" #include "util/dump_charclass.h" +#include "util/dump_util.h" #include "util/unaligned.h" #include @@ -259,8 +260,8 @@ void dumpTransitions(const NFA *nfa, FILE *f, fprintf(f, "\n"); } -void nfaExecGough8_dumpDot(const struct NFA *nfa, FILE *f, - UNUSED const string &base) { +static +void nfaExecGough8_dumpDot(const struct NFA *nfa, FILE *f) { assert(nfa->type == GOUGH_NFA_8); const mcclellan *m = (const mcclellan *)getImplNfa(nfa); @@ -279,6 +280,7 @@ void nfaExecGough8_dumpDot(const struct NFA *nfa, FILE *f, fprintf(f, "}\n"); } +static void nfaExecGough8_dumpText(const struct NFA *nfa, FILE *f) { assert(nfa->type == GOUGH_NFA_8); @@ -303,8 +305,8 @@ void nfaExecGough8_dumpText(const struct NFA *nfa, FILE *f) { dumpTextReverse(nfa, f); } -void nfaExecGough16_dumpDot(const struct NFA *nfa, FILE *f, - UNUSED const string &base) { +static +void nfaExecGough16_dumpDot(const struct NFA *nfa, FILE *f) { assert(nfa->type == GOUGH_NFA_16); const mcclellan *m = (const mcclellan *)getImplNfa(nfa); @@ -323,6 +325,7 @@ void nfaExecGough16_dumpDot(const struct NFA *nfa, FILE *f, fprintf(f, "}\n"); } +static void nfaExecGough16_dumpText(const struct NFA *nfa, FILE *f) { assert(nfa->type == GOUGH_NFA_16); const mcclellan *m = (const mcclellan *)getImplNfa(nfa); @@ -348,4 +351,24 @@ void nfaExecGough16_dumpText(const struct NFA *nfa, FILE *f) { dumpTextReverse(nfa, f); } +void nfaExecGough16_dump(const NFA *nfa, const string &base) { + assert(nfa->type == GOUGH_NFA_16); + FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); + nfaExecGough16_dumpText(nfa, f); + fclose(f); + f = fopen_or_throw((base + ".dot").c_str(), "w"); + nfaExecGough16_dumpDot(nfa, f); + fclose(f); +} + +void nfaExecGough8_dump(const NFA *nfa, const string &base) { + assert(nfa->type == GOUGH_NFA_8); + FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); + nfaExecGough8_dumpText(nfa, f); + fclose(f); + f = fopen_or_throw((base + ".dot").c_str(), "w"); + nfaExecGough8_dumpDot(nfa, f); + fclose(f); +} + } // namespace ue2 diff --git a/src/nfa/goughdump.h b/src/nfa/goughdump.h index b96938e4..2d204d5a 100644 --- a/src/nfa/goughdump.h +++ b/src/nfa/goughdump.h @@ -39,12 +39,8 @@ struct NFA; namespace ue2 { -void nfaExecGough8_dumpDot(const NFA *nfa, FILE *file, - const std::string &base); -void nfaExecGough16_dumpDot(const NFA *nfa, FILE *file, - const std::string &base); -void nfaExecGough8_dumpText(const NFA *nfa, FILE *file); -void nfaExecGough16_dumpText(const NFA *nfa, FILE *file); +void nfaExecGough8_dump(const NFA *nfa, const std::string &base); +void nfaExecGough16_dump(const NFA *nfa, const std::string &base); } // namespace ue2 diff --git a/src/nfa/lbr_dump.cpp b/src/nfa/lbr_dump.cpp index 9619b8d6..92cf7e03 100644 --- a/src/nfa/lbr_dump.cpp +++ b/src/nfa/lbr_dump.cpp @@ -42,38 +42,17 @@ #include "trufflecompile.h" #include "util/charreach.h" #include "util/dump_charclass.h" +#include "util/dump_util.h" #ifndef DUMP_SUPPORT #error No dump support! #endif +/* Note: No dot files for LBR */ +using namespace std; + namespace ue2 { -void nfaExecLbrDot_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f, - UNUSED const std::string &base) { - // No impl -} - -void nfaExecLbrVerm_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f, - UNUSED const std::string &base) { - // No impl -} - -void nfaExecLbrNVerm_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f, - UNUSED const std::string &base) { - // No impl -} - -void nfaExecLbrShuf_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f, - UNUSED const std::string &base) { - // No impl -} - -void nfaExecLbrTruf_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f, - UNUSED const std::string &base) { - // No impl -} - static void lbrDumpCommon(const lbr_common *lc, FILE *f) { const RepeatInfo *info @@ -88,39 +67,52 @@ void lbrDumpCommon(const lbr_common *lc, FILE *f) { fprintf(f, "min period: %u\n", info->minPeriod); } -void nfaExecLbrDot_dumpText(const NFA *nfa, FILE *f) { +void nfaExecLbrDot_dump(const NFA *nfa, const string &base) { assert(nfa); assert(nfa->type == LBR_NFA_DOT); const lbr_dot *ld = (const lbr_dot *)getImplNfa(nfa); + FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); lbrDumpCommon(&ld->common, f); fprintf(f, "DOT model\n"); fprintf(f, "\n"); dumpTextReverse(nfa, f); + fclose(f); } -void nfaExecLbrVerm_dumpText(const NFA *nfa, FILE *f) { +void nfaExecLbrVerm_dump(const NFA *nfa, const string &base) { assert(nfa); assert(nfa->type == LBR_NFA_VERM); const lbr_verm *lv = (const lbr_verm *)getImplNfa(nfa); + + FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); + lbrDumpCommon(&lv->common, f); fprintf(f, "VERM model, scanning for 0x%02x\n", lv->c); fprintf(f, "\n"); dumpTextReverse(nfa, f); + fclose(f); } -void nfaExecLbrNVerm_dumpText(const NFA *nfa, FILE *f) { +void nfaExecLbrNVerm_dump(const NFA *nfa, const string &base) { assert(nfa); assert(nfa->type == LBR_NFA_NVERM); const lbr_verm *lv = (const lbr_verm *)getImplNfa(nfa); + + FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); + lbrDumpCommon(&lv->common, f); fprintf(f, "NEGATED VERM model, scanning for 0x%02x\n", lv->c); fprintf(f, "\n"); dumpTextReverse(nfa, f); + fclose(f); } -void nfaExecLbrShuf_dumpText(const NFA *nfa, FILE *f) { +void nfaExecLbrShuf_dump(const NFA *nfa, const string &base) { assert(nfa); assert(nfa->type == LBR_NFA_SHUF); + + FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); + const lbr_shuf *ls = (const lbr_shuf *)getImplNfa(nfa); lbrDumpCommon(&ls->common, f); @@ -129,11 +121,15 @@ void nfaExecLbrShuf_dumpText(const NFA *nfa, FILE *f) { describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count()); fprintf(f, "\n"); dumpTextReverse(nfa, f); + fclose(f); } -void nfaExecLbrTruf_dumpText(const NFA *nfa, FILE *f) { +void nfaExecLbrTruf_dump(const NFA *nfa, const string &base) { assert(nfa); assert(nfa->type == LBR_NFA_TRUF); + + FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); + const lbr_truf *lt = (const lbr_truf *)getImplNfa(nfa); lbrDumpCommon(<->common, f); @@ -142,6 +138,7 @@ void nfaExecLbrTruf_dumpText(const NFA *nfa, FILE *f) { describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count()); fprintf(f, "\n"); dumpTextReverse(nfa, f); + fclose(f); } } // namespace ue2 diff --git a/src/nfa/lbr_dump.h b/src/nfa/lbr_dump.h index 06ed51e2..ea4e3f38 100644 --- a/src/nfa/lbr_dump.h +++ b/src/nfa/lbr_dump.h @@ -31,28 +31,17 @@ #ifdef DUMP_SUPPORT -#include #include struct NFA; namespace ue2 { -void nfaExecLbrDot_dumpDot(const struct NFA *nfa, FILE *file, - const std::string &base); -void nfaExecLbrVerm_dumpDot(const struct NFA *nfa, FILE *file, - const std::string &base); -void nfaExecLbrNVerm_dumpDot(const struct NFA *nfa, FILE *file, - const std::string &base); -void nfaExecLbrShuf_dumpDot(const struct NFA *nfa, FILE *file, - const std::string &base); -void nfaExecLbrTruf_dumpDot(const struct NFA *nfa, FILE *file, - const std::string &base); -void nfaExecLbrDot_dumpText(const struct NFA *nfa, FILE *file); -void nfaExecLbrVerm_dumpText(const struct NFA *nfa, FILE *file); -void nfaExecLbrNVerm_dumpText(const struct NFA *nfa, FILE *file); -void nfaExecLbrTruf_dumpText(const struct NFA *nfa, FILE *file); -void nfaExecLbrShuf_dumpText(const struct NFA *nfa, FILE *file); +void nfaExecLbrDot_dump(const struct NFA *nfa, const std::string &base); +void nfaExecLbrVerm_dump(const struct NFA *nfa, const std::string &base); +void nfaExecLbrNVerm_dump(const struct NFA *nfa, const std::string &base); +void nfaExecLbrShuf_dump(const struct NFA *nfa, const std::string &base); +void nfaExecLbrTruf_dump(const struct NFA *nfa, const std::string &base); } // namespace ue2 diff --git a/src/nfa/limex.h b/src/nfa/limex.h index 70bcdd1c..0223604d 100644 --- a/src/nfa/limex.h +++ b/src/nfa/limex.h @@ -41,9 +41,7 @@ extern "C" #define GENERATE_NFA_DUMP_DECL(gf_name) \ } /* extern "C" */ \ namespace ue2 { \ - void gf_name##_dumpDot(const struct NFA *nfa, FILE *file, \ - const std::string &base); \ - void gf_name##_dumpText(const struct NFA *nfa, FILE *file); \ + void gf_name##_dump(const struct NFA *nfa, const std::string &base); \ } /* namespace ue2 */ \ extern "C" { diff --git a/src/nfa/limex_dump.cpp b/src/nfa/limex_dump.cpp index 149e8107..852639ea 100644 --- a/src/nfa/limex_dump.cpp +++ b/src/nfa/limex_dump.cpp @@ -35,9 +35,10 @@ #include "limex_internal.h" #include "nfa_dump_internal.h" #include "ue2common.h" +#include "util/charreach.h" #include "util/dump_charclass.h" #include "util/dump_mask.h" -#include "util/charreach.h" +#include "util/dump_util.h" #include #include @@ -472,37 +473,32 @@ void dumpLimDotInfo(const limex_type *limex, u32 state, FILE *f) { } } -#define DUMP_TEXT_FN(ddf_n) \ - void nfaExecLimEx##ddf_n##_dumpText(const NFA *nfa, FILE *f) { \ - dumpLimexText((const LimExNFA##ddf_n *)getImplNfa(nfa), f); \ - } - -#define DUMP_DOT_FN(ddf_n) \ - void nfaExecLimEx##ddf_n##_dumpDot(const NFA *nfa, FILE *f, \ - UNUSED const string &base) { \ - const LimExNFA##ddf_n *limex = \ - (const LimExNFA##ddf_n *)getImplNfa(nfa); \ +#define LIMEX_DUMP_FN(size) \ + void nfaExecLimEx##size##_dump(const NFA *nfa, const string &base) { \ + auto limex = (const LimExNFA##size *)getImplNfa(nfa); \ \ + FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); \ + dumpLimexText(limex, f); \ + fclose(f); \ + \ + f = fopen_or_throw((base + ".dot").c_str(), "w"); \ dumpDotPreamble(f); \ u32 state_count = nfa->nPositions; \ dumpVertexDotInfo(limex, state_count, f, \ - limex_labeller(limex)); \ + limex_labeller(limex)); \ for (u32 i = 0; i < state_count; i++) { \ dumpLimDotInfo(limex, i, f); \ dumpExDotInfo(limex, i, f); \ } \ dumpDotTrailer(f); \ + fclose(f); \ } -#define LIMEX_DUMP_FNS(size) \ - DUMP_TEXT_FN(size) \ - DUMP_DOT_FN(size) - -LIMEX_DUMP_FNS(32) -LIMEX_DUMP_FNS(64) -LIMEX_DUMP_FNS(128) -LIMEX_DUMP_FNS(256) -LIMEX_DUMP_FNS(384) -LIMEX_DUMP_FNS(512) +LIMEX_DUMP_FN(32) +LIMEX_DUMP_FN(64) +LIMEX_DUMP_FN(128) +LIMEX_DUMP_FN(256) +LIMEX_DUMP_FN(384) +LIMEX_DUMP_FN(512) } // namespace ue2 diff --git a/src/nfa/mcclellandump.cpp b/src/nfa/mcclellandump.cpp index dcbb0915..9e04ad63 100644 --- a/src/nfa/mcclellandump.cpp +++ b/src/nfa/mcclellandump.cpp @@ -39,6 +39,7 @@ #include "ue2common.h" #include "util/charreach.h" #include "util/dump_charclass.h" +#include "util/dump_util.h" #include "util/unaligned.h" #include @@ -267,8 +268,8 @@ void dumpDotPreambleDfa(FILE *f) { fprintf(f, "0 [style=invis];\n"); } -void nfaExecMcClellan16_dumpDot(const NFA *nfa, FILE *f, - UNUSED const string &base) { +static +void nfaExecMcClellan16_dumpDot(const NFA *nfa, FILE *f) { assert(nfa->type == MCCLELLAN_NFA_16); const mcclellan *m = (const mcclellan *)getImplNfa(nfa); @@ -287,8 +288,8 @@ void nfaExecMcClellan16_dumpDot(const NFA *nfa, FILE *f, fprintf(f, "}\n"); } -void nfaExecMcClellan8_dumpDot(const NFA *nfa, FILE *f, - UNUSED const string &base) { +static +void nfaExecMcClellan8_dumpDot(const NFA *nfa, FILE *f) { assert(nfa->type == MCCLELLAN_NFA_8); const mcclellan *m = (const mcclellan *)getImplNfa(nfa); @@ -397,6 +398,7 @@ void dumpTransitions(FILE *f, const NFA *nfa, const mcclellan *m, } } +static void nfaExecMcClellan16_dumpText(const NFA *nfa, FILE *f) { assert(nfa->type == MCCLELLAN_NFA_16); const mcclellan *m = (const mcclellan *)getImplNfa(nfa); @@ -417,6 +419,7 @@ void nfaExecMcClellan16_dumpText(const NFA *nfa, FILE *f) { dumpTextReverse(nfa, f); } +static void nfaExecMcClellan8_dumpText(const NFA *nfa, FILE *f) { assert(nfa->type == MCCLELLAN_NFA_8); const mcclellan *m = (const mcclellan *)getImplNfa(nfa); @@ -437,4 +440,24 @@ void nfaExecMcClellan8_dumpText(const NFA *nfa, FILE *f) { dumpTextReverse(nfa, f); } +void nfaExecMcClellan16_dump(const NFA *nfa, const string &base) { + assert(nfa->type == MCCLELLAN_NFA_16); + FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); + nfaExecMcClellan16_dumpText(nfa, f); + fclose(f); + f = fopen_or_throw((base + ".dot").c_str(), "w"); + nfaExecMcClellan16_dumpDot(nfa, f); + fclose(f); +} + +void nfaExecMcClellan8_dump(const NFA *nfa, const string &base) { + assert(nfa->type == MCCLELLAN_NFA_8); + FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); + nfaExecMcClellan8_dumpText(nfa, f); + fclose(f); + f = fopen_or_throw((base + ".dot").c_str(), "w"); + nfaExecMcClellan8_dumpDot(nfa, f); + fclose(f); +} + } // namespace ue2 diff --git a/src/nfa/mcclellandump.h b/src/nfa/mcclellandump.h index efa61544..5b63a206 100644 --- a/src/nfa/mcclellandump.h +++ b/src/nfa/mcclellandump.h @@ -43,14 +43,10 @@ union AccelAux; namespace ue2 { -void nfaExecMcClellan8_dumpDot(const struct NFA *nfa, FILE *file, - const std::string &base); -void nfaExecMcClellan16_dumpDot(const struct NFA *nfa, FILE *file, - const std::string &base); -void nfaExecMcClellan8_dumpText(const struct NFA *nfa, FILE *file); -void nfaExecMcClellan16_dumpText(const struct NFA *nfa, FILE *file); +void nfaExecMcClellan8_dump(const struct NFA *nfa, const std::string &base); +void nfaExecMcClellan16_dump(const struct NFA *nfa, const std::string &base); -/* These functions are shared with the Haig dump code. */ +/* These functions are shared with the Gough dump code. */ const mstate_aux *getAux(const NFA *n, dstate_id_t i); void describeEdge(FILE *f, const u16 *t, u16 i); diff --git a/src/nfa/mpv_dump.cpp b/src/nfa/mpv_dump.cpp index e91d378f..9a8a4067 100644 --- a/src/nfa/mpv_dump.cpp +++ b/src/nfa/mpv_dump.cpp @@ -36,6 +36,7 @@ #include "ue2common.h" #include "util/compare.h" #include "util/dump_mask.h" +#include "util/dump_util.h" #include #include @@ -46,11 +47,11 @@ #error No dump support! #endif -namespace ue2 { +/* Note: No dot files for MPV */ -void nfaExecMpv_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *file, - UNUSED const std::string &base) { -} +using namespace std; + +namespace ue2 { static really_inline u32 largest_puff_repeat(const mpv *m, const mpv_kilopuff *kp) { @@ -128,9 +129,11 @@ void dumpCounter(FILE *f, const mpv_counter_info *c) { fprintf(f, "\n"); } -void nfaExecMpv_dumpText(const NFA *nfa, FILE *f) { +void nfaExecMpv_dump(const NFA *nfa, const string &base) { const mpv *m = (const mpv *)getImplNfa(nfa); + FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); + fprintf(f, "Puff the Magic Engines\n"); fprintf(f, "\n"); fprintf(f, "%u puffettes in %u kilopuffs\n", m->puffette_count, @@ -151,6 +154,7 @@ void nfaExecMpv_dumpText(const NFA *nfa, FILE *f) { } dumpTextReverse(nfa, f); + fclose(f); } } // namespace ue2 diff --git a/src/nfa/mpv_dump.h b/src/nfa/mpv_dump.h index b44d2b74..e587619e 100644 --- a/src/nfa/mpv_dump.h +++ b/src/nfa/mpv_dump.h @@ -31,16 +31,13 @@ #if defined(DUMP_SUPPORT) -#include #include struct NFA; namespace ue2 { -void nfaExecMpv_dumpDot(const struct NFA *nfa, FILE *file, - const std::string &base); -void nfaExecMpv_dumpText(const struct NFA *nfa, FILE *file); +void nfaExecMpv_dump(const struct NFA *nfa, const std::string &base); } // namespace ue2 diff --git a/src/nfa/nfa_dump_api.h b/src/nfa/nfa_dump_api.h index 1054a204..a0c4a9c9 100644 --- a/src/nfa/nfa_dump_api.h +++ b/src/nfa/nfa_dump_api.h @@ -35,7 +35,6 @@ #if defined(DUMP_SUPPORT) -#include #include struct NFA; @@ -43,13 +42,11 @@ struct NFA; namespace ue2 { /** - * \brief Dump (in Graphviz 'dot' format) a representation of the NFA into the - * file pointed to by dotFile. + * \brief Dump files representing the engine. All files dumped should begin with + * path/prefix specified by base. Generally a text file and a grpahviz (dot) + * files should be produced. */ -void nfaDumpDot(const struct NFA *nfa, FILE *dotFile, const std::string &base); - -/** \brief Dump a textual representation of the NFA. */ -void nfaDumpText(const struct NFA *fact, FILE *textFile); +void nfaGenerateDumpFiles(const struct NFA *nfa, const std::string &base); } // namespace ue2 diff --git a/src/nfa/nfa_dump_dispatch.cpp b/src/nfa/nfa_dump_dispatch.cpp index 84190232..3dea5ef7 100644 --- a/src/nfa/nfa_dump_dispatch.cpp +++ b/src/nfa/nfa_dump_dispatch.cpp @@ -82,13 +82,8 @@ namespace ue2 { assert(0); \ } -void nfaDumpDot(const struct NFA *nfa, FILE *dotFile, - const std::string &base) { - DISPATCH_BY_NFA_TYPE(_dumpDot(nfa, dotFile, base)); -} - -void nfaDumpText(const struct NFA *nfa, FILE *txtFile) { - DISPATCH_BY_NFA_TYPE(_dumpText(nfa, txtFile)); +void nfaGenerateDumpFiles(const struct NFA *nfa, const std::string &base) { + DISPATCH_BY_NFA_TYPE(_dump(nfa, base)); } } // namespace ue2 diff --git a/src/nfa/shengdump.cpp b/src/nfa/shengdump.cpp index a7e7fc2b..c2371601 100644 --- a/src/nfa/shengdump.cpp +++ b/src/nfa/shengdump.cpp @@ -38,6 +38,7 @@ #include "ue2common.h" #include "util/charreach.h" #include "util/dump_charclass.h" +#include "util/dump_util.h" #include "util/simd_utils.h" @@ -115,6 +116,7 @@ void dumpMasks(FILE *f, const sheng *s) { } } +static void nfaExecSheng_dumpText(const NFA *nfa, FILE *f) { assert(nfa->type == SHENG_NFA); const sheng *s = (const sheng *)getImplNfa(nfa); @@ -243,7 +245,8 @@ void shengGetTransitions(const NFA *n, u16 state, u16 *t) { t[TOP] = aux->top & SHENG_STATE_MASK; } -void nfaExecSheng_dumpDot(const NFA *nfa, FILE *f, const string &) { +static +void nfaExecSheng_dumpDot(const NFA *nfa, FILE *f) { assert(nfa->type == SHENG_NFA); const sheng *s = (const sheng *)getImplNfa(nfa); @@ -262,4 +265,14 @@ void nfaExecSheng_dumpDot(const NFA *nfa, FILE *f, const string &) { fprintf(f, "}\n"); } +void nfaExecSheng_dump(const NFA *nfa, const string &base) { + assert(nfa->type == SHENG_NFA); + FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); + nfaExecSheng_dumpText(nfa, f); + fclose(f); + f = fopen_or_throw((base + ".dot").c_str(), "w"); + nfaExecSheng_dumpDot(nfa, f); + fclose(f); +} + } // namespace ue2 diff --git a/src/nfa/shengdump.h b/src/nfa/shengdump.h index 008d2aba..2bdffeb9 100644 --- a/src/nfa/shengdump.h +++ b/src/nfa/shengdump.h @@ -31,16 +31,13 @@ #ifdef DUMP_SUPPORT -#include #include struct NFA; namespace ue2 { -void nfaExecSheng_dumpDot(const struct NFA *nfa, FILE *file, - const std::string &base); -void nfaExecSheng_dumpText(const struct NFA *nfa, FILE *file); +void nfaExecSheng_dump(const struct NFA *nfa, const std::string &base); } // namespace ue2 diff --git a/src/nfa/tamarama_dump.cpp b/src/nfa/tamarama_dump.cpp index f03b842c..88cb33cc 100644 --- a/src/nfa/tamarama_dump.cpp +++ b/src/nfa/tamarama_dump.cpp @@ -38,6 +38,7 @@ #include "nfa_dump_api.h" #include "nfa_dump_internal.h" #include "nfa_internal.h" +#include "util/dump_util.h" #include #include @@ -46,27 +47,14 @@ #error No dump support! #endif +using namespace std; + namespace ue2 { -void nfaExecTamarama_dumpDot(const struct NFA *nfa, UNUSED FILE *f, - const std::string &base) { +void nfaExecTamarama_dump(const struct NFA *nfa, const string &base) { const Tamarama *t = (const Tamarama *)getImplNfa(nfa); - const u32 *subOffset = - (const u32 *)((const char *)t + sizeof(struct Tamarama) + - t->numSubEngines * sizeof(u32)); - for (u32 i = 0; i < t->numSubEngines; i++) { - std::stringstream ssdot; - ssdot << base << "rose_nfa_" << nfa->queueIndex - << "_sub_" << i << ".dot"; - const NFA *sub = (const struct NFA *)((const char *)t + subOffset[i]); - FILE *f1 = fopen(ssdot.str().c_str(), "w"); - nfaDumpDot(sub, f1, base); - fclose(f1); - } -} -void nfaExecTamarama_dumpText(const struct NFA *nfa, FILE *f) { - const Tamarama *t = (const Tamarama *)getImplNfa(nfa); + FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); fprintf(f, "Tamarama container engine\n"); fprintf(f, "\n"); @@ -75,15 +63,17 @@ void nfaExecTamarama_dumpText(const struct NFA *nfa, FILE *f) { fprintf(f, "\n"); dumpTextReverse(nfa, f); fprintf(f, "\n"); + fclose(f); const u32 *subOffset = (const u32 *)((const char *)t + sizeof(struct Tamarama) + t->numSubEngines * sizeof(u32)); for (u32 i = 0; i < t->numSubEngines; i++) { - fprintf(f, "Sub %u:\n", i); const NFA *sub = (const struct NFA *)((const char *)t + subOffset[i]); - nfaDumpText(sub, f); - fprintf(f, "\n"); + + stringstream sssub; + sssub << base << "_sub_" << i; + nfaGenerateDumpFiles(sub, sssub.str()); } } diff --git a/src/nfa/tamarama_dump.h b/src/nfa/tamarama_dump.h index 6e3f80ca..f40b7ecf 100644 --- a/src/nfa/tamarama_dump.h +++ b/src/nfa/tamarama_dump.h @@ -31,16 +31,13 @@ #if defined(DUMP_SUPPORT) -#include #include struct NFA; namespace ue2 { -void nfaExecTamarama_dumpDot(const NFA *nfa, FILE *file, - const std::string &base); -void nfaExecTamarama_dumpText(const NFA *nfa, FILE *file); +void nfaExecTamarama_dump(const NFA *nfa, const std::string &base); } // namespace ue2 diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index 36156a42..c0272348 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -917,24 +917,14 @@ void dumpNfas(const RoseEngine *t, bool dump_raw, const string &base) { const NfaInfo *nfa_info = getNfaInfoByQueue(t, i); const NFA *n = getNfaByInfo(t, nfa_info); - stringstream sstxt, ssdot, ssraw; - - sstxt << base << "rose_nfa_" << i << ".txt"; - ssdot << base << "rose_nfa_" << i << ".dot"; - ssraw << base << "rose_nfa_" << i << ".raw"; - - FILE *f; - - f = fopen(ssdot.str().c_str(), "w"); - nfaDumpDot(n, f, base); - fclose(f); - - f = fopen(sstxt.str().c_str(), "w"); - nfaDumpText(n, f); - fclose(f); + stringstream ssbase; + ssbase << base << "rose_nfa_" << i; + nfaGenerateDumpFiles(n, ssbase.str()); if (dump_raw) { - f = fopen(ssraw.str().c_str(), "w"); + stringstream ssraw; + ssraw << base << "rose_nfa_" << i << ".raw"; + FILE *f = fopen(ssraw.str().c_str(), "w"); fwrite(n, 1, n->length, f); fclose(f); } @@ -977,24 +967,14 @@ void dumpRevNfas(const RoseEngine *t, bool dump_raw, const string &base) { for (u32 i = 0; i < t->somRevCount; i++) { const NFA *n = (const NFA *)(tp + rev_offsets[i]); - stringstream sstxt, ssdot, ssraw; - - sstxt << base << "som_rev_nfa_" << i << ".txt"; - ssdot << base << "som_rev_nfa_" << i << ".dot"; - ssraw << base << "som_nfa_nfa_" << i << ".raw"; - - FILE *f; - - f = fopen(ssdot.str().c_str(), "w"); - nfaDumpDot(n, f, base); - fclose(f); - - f = fopen(sstxt.str().c_str(), "w"); - nfaDumpText(n, f); - fclose(f); + stringstream ssbase; + ssbase << base << "rose_nfa_" << i; + nfaGenerateDumpFiles(n, ssbase.str()); if (dump_raw) { - f = fopen(ssraw.str().c_str(), "w"); + stringstream ssraw; + ssraw << base << "som_rev_nfa_" << i << ".raw"; + FILE *f = fopen(ssraw.str().c_str(), "w"); fwrite(n, 1, n->length, f); fclose(f); } @@ -1009,20 +989,10 @@ void dumpAnchored(const RoseEngine *t, const string &base) { while (curr) { const NFA *n = (const NFA *)((const char *)curr + sizeof(*curr)); - stringstream sstxt, ssdot; - sstxt << base << "anchored_" << i << ".txt"; - ssdot << base << "anchored_" << i << ".dot"; - - FILE *f; - - f = fopen(ssdot.str().c_str(), "w"); - nfaDumpDot(n, f, base); - fclose(f); - - f = fopen(sstxt.str().c_str(), "w"); - nfaDumpText(n, f); - fclose(f); + stringstream ssbase; + ssbase << base << "anchored_" << i; + nfaGenerateDumpFiles(n, ssbase.str()); curr = curr->next_offset ? (const anchored_matcher_info *) ((const char *)curr + curr->next_offset) : nullptr; diff --git a/src/smallwrite/smallwrite_dump.cpp b/src/smallwrite/smallwrite_dump.cpp index 0db97df5..bdf55c30 100644 --- a/src/smallwrite/smallwrite_dump.cpp +++ b/src/smallwrite/smallwrite_dump.cpp @@ -70,18 +70,11 @@ void smwrDumpNFA(const SmallWriteEngine *smwr, bool dump_raw, } const struct NFA *n = getSmwrNfa(smwr); - FILE *f; - f = fopen((base + "smallwrite_nfa.dot").c_str(), "w"); - nfaDumpDot(n, f, base); - fclose(f); - - f = fopen((base + "smallwrite_nfa.txt").c_str(), "w"); - nfaDumpText(n, f); - fclose(f); + nfaGenerateDumpFiles(n, base + "smallwrite_nfa"); if (dump_raw) { - f = fopen((base + "smallwrite_nfa.raw").c_str(), "w"); + FILE *f = fopen((base + "smallwrite_nfa.raw").c_str(), "w"); fwrite(n, 1, n->length, f); fclose(f); } diff --git a/src/util/dump_util.cpp b/src/util/dump_util.cpp new file mode 100644 index 00000000..5b961367 --- /dev/null +++ b/src/util/dump_util.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "dump_util.h" + +#include +#include + +using namespace std; + +FILE *fopen_or_throw(const char *path, const char *mode) { + FILE *f = fopen(path, mode); + if (!f) { + throw runtime_error(string("Unable to open file: ") + path); + } + return f; +} diff --git a/src/util/dump_util.h b/src/util/dump_util.h new file mode 100644 index 00000000..487d2e7c --- /dev/null +++ b/src/util/dump_util.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef DUMP_UTIL +#define DUMP_UTIL + +#include + +/** + * Same as fopen(), but on error throws an exception rather than returning NULL. + */ +FILE *fopen_or_throw(const char *path, const char *mode); + +#endif From 8ff7a3cdbb44ebbd4d940b2afba167fb02399cfd Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Fri, 4 Nov 2016 10:59:33 +1100 Subject: [PATCH 060/103] correct dump filenames of som rev engines --- src/rose/rose_dump.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index c0272348..47249587 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -968,7 +968,7 @@ void dumpRevNfas(const RoseEngine *t, bool dump_raw, const string &base) { const NFA *n = (const NFA *)(tp + rev_offsets[i]); stringstream ssbase; - ssbase << base << "rose_nfa_" << i; + ssbase << base << "som_rev_nfa_" << i; nfaGenerateDumpFiles(n, ssbase.str()); if (dump_raw) { From 99e14df117d679cfa212e326dcc63900814565ce Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Tue, 13 Sep 2016 15:07:16 +1000 Subject: [PATCH 061/103] Fix combine2x128 --- src/nfa/shufti.c | 6 +++--- src/rose/rose_build_bytecode.cpp | 4 ++-- src/util/simd_utils.h | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/nfa/shufti.c b/src/nfa/shufti.c index 57890478..2e63be9f 100644 --- a/src/nfa/shufti.c +++ b/src/nfa/shufti.c @@ -308,7 +308,7 @@ const u8 *fwdBlockShort(m256 mask, m128 chars, const u8 *buf, const m256 low4bits) { // do the hi and lo shuffles in the one avx register m256 c = set2x128(chars); - c = _mm256_srlv_epi64(c, _mm256_set_epi64x(0, 0, 4, 4)); + c = _mm256_srlv_epi64(c, _mm256_set_epi64x(4, 4, 0, 0)); c = and256(c, low4bits); m256 c_shuf = vpshufb(mask, c); m128 t = and128(movdq_hi(c_shuf), cast256to128(c_shuf)); @@ -440,7 +440,7 @@ const u8 *revBlockShort(m256 mask, m128 chars, const u8 *buf, const m256 low4bits) { // do the hi and lo shuffles in the one avx register m256 c = set2x128(chars); - c = _mm256_srlv_epi64(c, _mm256_set_epi64x(0, 0, 4, 4)); + c = _mm256_srlv_epi64(c, _mm256_set_epi64x(4, 4, 0, 0)); c = and256(c, low4bits); m256 c_shuf = vpshufb(mask, c); m128 t = and128(movdq_hi(c_shuf), cast256to128(c_shuf)); @@ -565,7 +565,7 @@ const u8 *fwdBlockShort2(m256 mask1, m256 mask2, m128 chars, const u8 *buf, const m256 low4bits) { // do the hi and lo shuffles in the one avx register m256 c = set2x128(chars); - c = _mm256_srlv_epi64(c, _mm256_set_epi64x(0, 0, 4, 4)); + c = _mm256_srlv_epi64(c, _mm256_set_epi64x(4, 4, 0, 0)); c = and256(c, low4bits); m256 c_shuf1 = vpshufb(mask1, c); m256 c_shuf2 = rshift128_m256(vpshufb(mask2, c), 1); diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 43df7962..f074973d 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -3054,8 +3054,8 @@ bool makeRoleShufti(const vector &look, neg_mask &= 0xffff; array nib_mask; array bucket_select_mask_16; - copy(hi_mask.begin(), hi_mask.begin() + 16, nib_mask.begin()); - copy(lo_mask.begin(), lo_mask.begin() + 16, nib_mask.begin() + 16); + copy(lo_mask.begin(), lo_mask.begin() + 16, nib_mask.begin()); + copy(hi_mask.begin(), hi_mask.begin() + 16, nib_mask.begin() + 16); copy(bucket_select_lo.begin(), bucket_select_lo.begin() + 16, bucket_select_mask_16.begin()); auto ri = make_unique diff --git a/src/util/simd_utils.h b/src/util/simd_utils.h index afa8c7f8..35e1a390 100644 --- a/src/util/simd_utils.h +++ b/src/util/simd_utils.h @@ -658,8 +658,8 @@ m128 movdq_lo(m256 x) { } static really_inline -m256 combine2x128(m128 a, m128 b) { - m256 rv = {a, b}; +m256 combine2x128(m128 hi, m128 lo) { + m256 rv = {lo, hi}; return rv; } @@ -712,7 +712,7 @@ m256 combine2x128(m128 hi, m128 lo) { #if defined(_mm256_set_m128i) return _mm256_set_m128i(hi, lo); #else - return insert128to256(cast128to256(hi), lo, 1); + return insert128to256(cast128to256(lo), hi, 1); #endif } #endif //AVX2 From 5a842caaf1227523b22de4a2bc0ebee459ea3d30 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Tue, 13 Sep 2016 15:07:31 +1000 Subject: [PATCH 062/103] shufti: slightly faster short shufti operation It is better to shift the high lane values in an XMM before then using insert to combine the high and low lanes. --- src/nfa/shufti.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/nfa/shufti.c b/src/nfa/shufti.c index 2e63be9f..d68b1b04 100644 --- a/src/nfa/shufti.c +++ b/src/nfa/shufti.c @@ -307,8 +307,7 @@ static really_inline const u8 *fwdBlockShort(m256 mask, m128 chars, const u8 *buf, const m256 low4bits) { // do the hi and lo shuffles in the one avx register - m256 c = set2x128(chars); - c = _mm256_srlv_epi64(c, _mm256_set_epi64x(4, 4, 0, 0)); + m256 c = combine2x128(rshift64_m128(chars, 4), chars); c = and256(c, low4bits); m256 c_shuf = vpshufb(mask, c); m128 t = and128(movdq_hi(c_shuf), cast256to128(c_shuf)); @@ -439,8 +438,7 @@ static really_inline const u8 *revBlockShort(m256 mask, m128 chars, const u8 *buf, const m256 low4bits) { // do the hi and lo shuffles in the one avx register - m256 c = set2x128(chars); - c = _mm256_srlv_epi64(c, _mm256_set_epi64x(4, 4, 0, 0)); + m256 c = combine2x128(rshift64_m128(chars, 4), chars); c = and256(c, low4bits); m256 c_shuf = vpshufb(mask, c); m128 t = and128(movdq_hi(c_shuf), cast256to128(c_shuf)); @@ -564,8 +562,7 @@ static really_inline const u8 *fwdBlockShort2(m256 mask1, m256 mask2, m128 chars, const u8 *buf, const m256 low4bits) { // do the hi and lo shuffles in the one avx register - m256 c = set2x128(chars); - c = _mm256_srlv_epi64(c, _mm256_set_epi64x(4, 4, 0, 0)); + m256 c = combine2x128(rshift64_m128(chars, 4), chars); c = and256(c, low4bits); m256 c_shuf1 = vpshufb(mask1, c); m256 c_shuf2 = rshift128_m256(vpshufb(mask2, c), 1); From 16aa22a361df9ab06cf1b1a1be9a634af9fd542c Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 16 Nov 2016 10:56:37 +1100 Subject: [PATCH 063/103] gough: don't dump int data unless it's requested --- src/nfa/goughcompile_dump.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/nfa/goughcompile_dump.cpp b/src/nfa/goughcompile_dump.cpp index dd76b9ec..cb361cdb 100644 --- a/src/nfa/goughcompile_dump.cpp +++ b/src/nfa/goughcompile_dump.cpp @@ -275,7 +275,7 @@ void dump_vars(const GoughGraph &g, const string &base, const Grey &grey) { } void dump(const GoughGraph &g, const string &base, const Grey &grey) { - if (!grey.dumpFlags) { + if (!(grey.dumpFlags & Grey::DUMP_INT_GRAPH)) { return; } @@ -311,9 +311,9 @@ void dump_block(FILE *f, const gough_edge_id &e, } } -void dump_blocks(const map > &blocks, +void dump_blocks(const map> &blocks, const string &base, const Grey &grey) { - if (!grey.dumpFlags) { + if (!(grey.dumpFlags & Grey::DUMP_INT_GRAPH)) { return; } From f605ca0dc1fcaf0b0323f20d379107841fc86377 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Thu, 17 Nov 2016 09:41:23 +1100 Subject: [PATCH 064/103] check for partial matches at end of dverm --- src/nfa/vermicelli.h | 41 ++++++++++++++++++++++-------------- unit/internal/vermicelli.cpp | 28 +++++++++++++----------- 2 files changed, 41 insertions(+), 28 deletions(-) diff --git a/src/nfa/vermicelli.h b/src/nfa/vermicelli.h index ba8afcf1..b2f2ab7c 100644 --- a/src/nfa/vermicelli.h +++ b/src/nfa/vermicelli.h @@ -74,9 +74,7 @@ const u8 *vermicelliExec(char c, char nocase, const u8 *buf, } buf += VERM_BOUNDARY - min; - if (buf >= buf_end) { - return buf_end; - } + assert(buf < buf_end); } // Aligned loops from here on in @@ -129,9 +127,7 @@ const u8 *nvermicelliExec(char c, char nocase, const u8 *buf, } buf += VERM_BOUNDARY - min; - if (buf >= buf_end) { - return buf_end; - } + assert(buf < buf_end); } // Aligned loops from here on in @@ -172,9 +168,7 @@ const u8 *vermicelliDoubleExec(char c1, char c2, char nocase, const u8 *buf, } buf += VERM_BOUNDARY - min; - if (buf >= buf_end) { - return buf_end - 1; - } + assert(buf < buf_end); } // Aligned loops from here on in @@ -190,9 +184,18 @@ const u8 *vermicelliDoubleExec(char c1, char c2, char nocase, const u8 *buf, ptr = nocase ? dvermPreconditionNocase(chars1, chars2, buf_end - VERM_BOUNDARY) : dvermPrecondition(chars1, chars2, buf_end - VERM_BOUNDARY); - /* buf_end - 1 to be conservative in case last byte is a partial match */ - return ptr ? ptr : buf_end - 1; + if (ptr) { + return ptr; + } + + /* check for partial match at end */ + u8 mask = nocase ? CASE_CLEAR : 0xff; + if ((buf_end[-1] & mask) == c1) { + return buf_end - 1; + } + + return buf_end; } static really_inline @@ -220,9 +223,7 @@ const u8 *vermicelliDoubleMaskedExec(char c1, char c2, char m1, char m2, } buf += VERM_BOUNDARY - min; - if (buf >= buf_end) { - return buf_end - 1; - } + assert(buf < buf_end); } // Aligned loops from here on in @@ -235,9 +236,17 @@ const u8 *vermicelliDoubleMaskedExec(char c1, char c2, char m1, char m2, // Tidy up the mess at the end ptr = dvermPreconditionMasked(chars1, chars2, mask1, mask2, buf_end - VERM_BOUNDARY); - /* buf_end - 1 to be conservative in case last byte is a partial match */ - return ptr ? ptr : buf_end - 1; + if (ptr) { + return ptr; + } + + /* check for partial match at end */ + if ((buf_end[-1] & m1) == c1) { + return buf_end - 1; + } + + return buf_end; } // Reverse vermicelli scan. Provides exact semantics and returns (buf - 1) if diff --git a/unit/internal/vermicelli.cpp b/unit/internal/vermicelli.cpp index 5d66a332..5e4a8253 100644 --- a/unit/internal/vermicelli.cpp +++ b/unit/internal/vermicelli.cpp @@ -126,27 +126,29 @@ TEST(DoubleVermicelli, ExecNoMatch1) { const u8 *rv = vermicelliDoubleExec('a', 'b', 0, (u8 *)t1 + i, (u8 *)t1 + strlen(t1) - j); - ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1), (size_t)rv); + ASSERT_EQ(((size_t)t1 + strlen(t1) - j), (size_t)rv); rv = vermicelliDoubleExec('B', 'b', 0, (u8 *)t1 + i, (u8 *)t1 + strlen(t1) - j); - ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1), (size_t)rv); + ASSERT_EQ(((size_t)t1 + strlen(t1) - j), (size_t)rv); rv = vermicelliDoubleExec('A', 'B', 1, (u8 *)t1 + i, (u8 *)t1 + strlen(t1) - j); - ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1), (size_t)rv); + ASSERT_EQ(((size_t)t1 + strlen(t1) - j), (size_t)rv); + /* partial match */ rv = vermicelliDoubleExec('b', 'B', 0, (u8 *)t1 + i, (u8 *)t1 + strlen(t1) - j); - ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1), (size_t)rv); + ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1), (size_t)rv); + /* partial match */ rv = vermicelliDoubleExec('B', 'A', 1, (u8 *)t1 + i, (u8 *)t1 + strlen(t1) - j); - ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1), (size_t)rv); + ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1), (size_t)rv); } } } @@ -353,30 +355,32 @@ TEST(DoubleVermicelliMasked, ExecNoMatch1) { t1_raw + i, t1_raw + t1.length() - i - j); - ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1), (size_t)rv); - rv = vermicelliDoubleMaskedExec('B', 'b', 0xff, CASE_CLEAR, + ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j), (size_t)rv); + + rv = vermicelliDoubleMaskedExec('B', 'B', 0xff, CASE_CLEAR, t1_raw + i, t1_raw + t1.length() - i - j); - ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1), (size_t)rv); + ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j), (size_t)rv); rv = vermicelliDoubleMaskedExec('A', 'B', CASE_CLEAR, CASE_CLEAR, t1_raw + i, t1_raw + t1.length() -i - j); - ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1), (size_t)rv); + ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j), (size_t)rv); - rv = vermicelliDoubleMaskedExec('b', 'B', CASE_CLEAR, 0xff, + /* partial match */ + rv = vermicelliDoubleMaskedExec('B', 'B', CASE_CLEAR, 0xff, t1_raw + i, t1_raw + t1.length() - i - j); - ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1), (size_t)rv); + ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1), (size_t)rv); rv = vermicelliDoubleMaskedExec('B', 'A', 0xff, 0xff, t1_raw + i, t1_raw + t1.length() - i - j); - ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1), (size_t)rv); + ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j), (size_t)rv); } } } From 8b94eb86f8ac800698240ca68ef4c45a5913ab34 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Thu, 17 Nov 2016 16:47:20 +1100 Subject: [PATCH 065/103] precise dverm fix for 5a32993: handle sign extension correctly --- src/nfa/vermicelli.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/nfa/vermicelli.h b/src/nfa/vermicelli.h index b2f2ab7c..817e681a 100644 --- a/src/nfa/vermicelli.h +++ b/src/nfa/vermicelli.h @@ -191,7 +191,8 @@ const u8 *vermicelliDoubleExec(char c1, char c2, char nocase, const u8 *buf, /* check for partial match at end */ u8 mask = nocase ? CASE_CLEAR : 0xff; - if ((buf_end[-1] & mask) == c1) { + if ((buf_end[-1] & mask) == (u8)c1) { + DEBUG_PRINTF("partial!!!\n"); return buf_end - 1; } @@ -242,7 +243,7 @@ const u8 *vermicelliDoubleMaskedExec(char c1, char c2, char m1, char m2, } /* check for partial match at end */ - if ((buf_end[-1] & m1) == c1) { + if ((buf_end[-1] & m1) == (u8)c1) { return buf_end - 1; } From eb7759361da9ae210af37f683dd96431b4d70735 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Tue, 22 Nov 2016 13:48:00 +1100 Subject: [PATCH 066/103] Rework the creation of startDs edges in fillHolderForLockCheck(). This prevents clearing of edges created for virtual starts in late regions. --- src/nfagraph/ng_som.cpp | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/src/nfagraph/ng_som.cpp b/src/nfagraph/ng_som.cpp index 862f5b53..f6ba0fa7 100644 --- a/src/nfagraph/ng_som.cpp +++ b/src/nfagraph/ng_som.cpp @@ -685,27 +685,22 @@ void fillHolderForLockCheck(NGHolder *out, const NGHolder &g, map::const_iterator picked) { /* NOTE: This is appropriate for firstMatchIsFirst */ DEBUG_PRINTF("prepping for lock check\n"); + NGHolder &midfix = *out; - add_edge(midfix.startDs, midfix.accept, midfix); map v_map; v_map[g.start] = midfix.start; v_map[g.startDs] = midfix.startDs; - map::const_iterator jt = picked; - /* include the lock region */ - assert(jt != info.end()); - ++jt; - assert(!jt->second.dag); - assert(jt->second.full.size() == 1); + assert(picked != info.end()); + auto graph_last = next(picked); - for (; ; --jt) { + assert(!graph_last->second.dag); + assert(graph_last->second.full.size() == 1); + + for (auto jt = graph_last; ; --jt) { DEBUG_PRINTF("adding r %u to midfix\n", jt->first); - if (!jt->second.optional) { - clear_out_edges(midfix.startDs, midfix); - add_edge(midfix.startDs, midfix.startDs, midfix); - } /* add all vertices in region, create mapping */ for (auto v : jt->second.full) { @@ -741,14 +736,27 @@ void fillHolderForLockCheck(NGHolder *out, const NGHolder &g, } } - /* add edges from startds to enters */ + if (jt == info.begin()) { + break; + } + } + + /* add edges from startds to the enters of all the initial optional + * regions and the first mandatory region. */ + for (auto jt = info.begin(); ; ++jt) { for (auto enter : jt->second.enters) { assert(contains(v_map, enter)); NFAVertex v = v_map[enter]; add_edge_if_not_present(midfix.startDs, v, midfix); } - if (jt == info.begin()) { + if (!jt->second.optional) { + break; + } + + if (jt == graph_last) { + /* all regions are optional - add a direct edge to accept */ + add_edge_if_not_present(midfix.startDs, midfix.accept, midfix); break; } } From ef99ae108f0c37d6e3ee1d582f8eb1ac3eb95011 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 23 Nov 2016 15:38:26 +1100 Subject: [PATCH 067/103] rose_build_merge: correctly merge NFA outfixes We were not doing our bookkeeping properly for merges where the number of NFAs was greater than the batch size of 200. --- src/nfagraph/ng_uncalc_components.cpp | 16 ++++++++-------- src/nfagraph/ng_uncalc_components.h | 13 +++++-------- src/rose/rose_build_merge.cpp | 6 +++--- 3 files changed, 16 insertions(+), 19 deletions(-) diff --git a/src/nfagraph/ng_uncalc_components.cpp b/src/nfagraph/ng_uncalc_components.cpp index 877c396c..4ad5ff78 100644 --- a/src/nfagraph/ng_uncalc_components.cpp +++ b/src/nfagraph/ng_uncalc_components.cpp @@ -551,18 +551,16 @@ bool mergeNfaPair(const NGHolder &ga, NGHolder &gb, const ReportManager *rm, return true; } -/** Merge the group of graphs in \p cluster where possible. The (from, to) - * mapping of merged graphs is returned in \p merged. */ -void mergeNfaCluster(const vector &cluster, - const ReportManager *rm, - map &merged, - const CompileContext &cc) { +map mergeNfaCluster(const vector &cluster, + const ReportManager *rm, + const CompileContext &cc) { + map merged; + if (cluster.size() < 2) { - return; + return merged; } DEBUG_PRINTF("new cluster, size %zu\n", cluster.size()); - merged.clear(); priority_queue pq; buildNfaMergeQueue(cluster, &pq); @@ -591,6 +589,8 @@ void mergeNfaCluster(const vector &cluster, } } } + + return merged; } } // namespace ue2 diff --git a/src/nfagraph/ng_uncalc_components.h b/src/nfagraph/ng_uncalc_components.h index d7883578..b0f42670 100644 --- a/src/nfagraph/ng_uncalc_components.h +++ b/src/nfagraph/ng_uncalc_components.h @@ -36,13 +36,11 @@ #include #include -#include "nfagraph/ng_holder.h" -#include "util/ue2_containers.h" - namespace ue2 { struct CompileContext; struct Grey; +class NGHolder; class ReportManager; /** @@ -56,12 +54,11 @@ u32 commonPrefixLength(const NGHolder &ga, const NGHolder &gb); /** * \brief Merge the group of graphs in \p cluster where possible. * - * The (from, to) mapping of merged graphs is returned in \p merged. + * The (from, to) mapping of merged graphs is returned. */ -void mergeNfaCluster(const std::vector &cluster, - const ReportManager *rm, - std::map &merged, - const CompileContext &cc); +std::map +mergeNfaCluster(const std::vector &cluster, const ReportManager *rm, + const CompileContext &cc); /** * \brief Merge graph \p ga into graph \p gb. diff --git a/src/rose/rose_build_merge.cpp b/src/rose/rose_build_merge.cpp index 2643bdca..54a7390e 100644 --- a/src/rose/rose_build_merge.cpp +++ b/src/rose/rose_build_merge.cpp @@ -311,8 +311,7 @@ void mergeCluster(RoseGraph &g, const ReportManager &rm, it = it2; DEBUG_PRINTF("merging cluster %zu\n", cluster.size()); - map merged; - mergeNfaCluster(cluster, &rm, merged, cc); + auto merged = mergeNfaCluster(cluster, &rm, cc); DEBUG_PRINTF("done\n"); for (const auto &m : merged) { @@ -2414,7 +2413,8 @@ map chunkedNfaMerge(RoseBuildImpl &build, batch.push_back(*it); assert((*it)->kind == NFA_OUTFIX); if (batch.size() == MERGE_GROUP_SIZE_MAX || next(it) == ite) { - mergeNfaCluster(batch, &build.rm, merged, build.cc); + auto batch_merged = mergeNfaCluster(batch, &build.rm, build.cc); + insert(&merged, batch_merged); batch.clear(); } } From 106667e24e87b8abe065acc946deadfc0ffdba8a Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Fri, 25 Nov 2016 15:33:27 +1100 Subject: [PATCH 068/103] refactor mcclellan runtime 1. use u32 to hold the state during runtime to prevent repeated zero extension 2. have a tight small loop for processing characters which breaks when something interesting happens --- src/nfa/mcclellan.c | 460 +++++++++++++++++++------------- src/nfa/mcclellan_common_impl.h | 18 +- src/nfa/mcclellan_internal.h | 8 +- 3 files changed, 288 insertions(+), 198 deletions(-) diff --git a/src/nfa/mcclellan.c b/src/nfa/mcclellan.c index b8ca75e9..896c0703 100644 --- a/src/nfa/mcclellan.c +++ b/src/nfa/mcclellan.c @@ -42,10 +42,10 @@ static really_inline char doComplexReport(NfaCallback cb, void *ctxt, const struct mcclellan *m, - u16 s, u64a loc, char eod, u16 *const cached_accept_state, - u32 *const cached_accept_id) { - DEBUG_PRINTF("reporting state = %hu, loc=%llu, eod %hhu\n", - (u16)(s & STATE_MASK), loc, eod); + u32 s, u64a loc, char eod, u32 *cached_accept_state, + u32 *cached_accept_id) { + DEBUG_PRINTF("reporting state = %u, loc=%llu, eod %hhu\n", + s & STATE_MASK, loc, eod); if (!eod && s == *cached_accept_state) { if (cb(0, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) { @@ -89,25 +89,106 @@ char doComplexReport(NfaCallback cb, void *ctxt, const struct mcclellan *m, } static really_inline -char mcclellanExec16_i(const struct mcclellan *m, u16 *state, const u8 *buf, +const u8 *run_mcclellan_accel(const struct mcclellan *m, + const struct mstate_aux *aux, u32 s, + const u8 **min_accel_offset, + const u8 *c, const u8 *c_end) { + DEBUG_PRINTF("skipping\n"); + u32 accel_offset = aux[s].accel_offset; + + assert(aux[s].accel_offset); + assert(accel_offset >= m->aux_offset); + assert(!m->sherman_offset || accel_offset < m->sherman_offset); + + const union AccelAux *aaux = (const void *)((const char *)m + accel_offset); + const u8 *c2 = run_accel(aaux, c, c_end); + + if (c2 < *min_accel_offset + BAD_ACCEL_DIST) { + *min_accel_offset = c2 + BIG_ACCEL_PENALTY; + } else { + *min_accel_offset = c2 + SMALL_ACCEL_PENALTY; + } + + if (*min_accel_offset >= c_end - ACCEL_MIN_LEN) { + *min_accel_offset = c_end; + } + + DEBUG_PRINTF("advanced %zd, next accel chance in %zd/%zd\n", + c2 - c, *min_accel_offset - c2, c_end - c2); + + return c2; +} + +static really_inline +u32 doNormal16(const struct mcclellan *m, const u8 **c_inout, const u8 *end, + u32 s, char do_accel, enum MatchMode mode) { + const u8 *c = *c_inout; + + const u16 *succ_table + = (const u16 *)((const char *)m + sizeof(struct mcclellan)); + assert(ISALIGNED_N(succ_table, 2)); + u32 sherman_base = m->sherman_limit; + const char *sherman_base_offset + = (const char *)m - sizeof(struct NFA) + m->sherman_offset; + u32 as = m->alphaShift; + + s &= STATE_MASK; + + while (c < end && s) { + u8 cprime = m->remap[*c]; + DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx (s=%u)\n", *c, + ourisprint(*c) ? *c : '?', cprime, s); + if (s < sherman_base) { + DEBUG_PRINTF("doing normal\n"); + assert(s < m->state_count); + s = succ_table[(s << as) + cprime]; + } else { + const char *sherman_state + = findShermanState(m, sherman_base_offset, sherman_base, s); + DEBUG_PRINTF("doing sherman (%hu)\n", s); + s = doSherman16(sherman_state, cprime, succ_table, as); + } + + DEBUG_PRINTF("s: %u (%u)\n", s, s & STATE_MASK); + c++; + + if (do_accel && (s & ACCEL_FLAG)) { + break; + } + if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) { + break; + } + + s &= STATE_MASK; + } + + *c_inout = c; + return s; +} + +static really_inline +char mcclellanExec16_i(const struct mcclellan *m, u32 *state, const u8 *buf, size_t len, u64a offAdj, NfaCallback cb, void *ctxt, char single, const u8 **c_final, enum MatchMode mode) { assert(ISALIGNED_N(state, 2)); + if (!len) { + if (mode == STOP_AT_MATCH) { + *c_final = buf; + } + return MO_CONTINUE_MATCHING; + } - u16 s = *state; - const u8 *c = buf, *c_end = buf + len; - const u16 *succ_table = (const u16 *)((const char *)m - + sizeof(struct mcclellan)); - assert(ISALIGNED_N(succ_table, 2)); - const u16 sherman_base = m->sherman_limit; - const char *sherman_base_offset - = (const char *)m - sizeof(struct NFA) + m->sherman_offset; - const u32 as = m->alphaShift; + u32 s = *state; + const u8 *c = buf; + const u8 *c_end = buf + len; + const struct mstate_aux *aux + = (const struct mstate_aux *)((const char *)m + m->aux_offset + - sizeof(struct NFA)); s &= STATE_MASK; u32 cached_accept_id = 0; - u16 cached_accept_state = 0; + u32 cached_accept_state = 0; DEBUG_PRINTF("s: %hu, len %zu\n", s, len); @@ -120,20 +201,13 @@ char mcclellanExec16_i(const struct mcclellan *m, u16 *state, const u8 *buf, goto with_accel; without_accel: - while (c < min_accel_offset && s) { - u8 cprime = m->remap[*(c++)]; - DEBUG_PRINTF("c: %02hhx cp:%02hhx (s=%hu)\n", *(c-1), cprime, s); - if (s < sherman_base) { - DEBUG_PRINTF("doing normal\n"); - assert(s < m->state_count); - s = succ_table[((u32)s << as) + cprime]; - } else { - const char *sherman_state - = findShermanState(m, sherman_base_offset, sherman_base, s); - DEBUG_PRINTF("doing sherman (%hu)\n", s); - s = doSherman16(sherman_state, cprime, succ_table, as); + do { + assert(c < min_accel_offset); + if (!s) { + goto exit; } - DEBUG_PRINTF("s: %hu (%hu)\n", s, (u16)(s & STATE_MASK)); + + s = doNormal16(m, &c, min_accel_offset, s, 0, mode); if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) { if (mode == STOP_AT_MATCH) { @@ -150,30 +224,42 @@ without_accel: return MO_HALT_MATCHING; /* termination requested */ } } else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0, - &cached_accept_state, - &cached_accept_id) == MO_HALT_MATCHING) { + &cached_accept_state, &cached_accept_id) + == MO_HALT_MATCHING) { return MO_HALT_MATCHING; } } - s &= STATE_MASK; + assert(c <= min_accel_offset); + } while (c < min_accel_offset); + + s &= STATE_MASK; + + if (c == c_end) { + goto exit; + } else { + goto with_accel; } with_accel: - while (c < c_end && s) { - u8 cprime = m->remap[*(c++)]; - DEBUG_PRINTF("c: %02hhx cp:%02hhx (s=%hu)\n", *(c-1), cprime, s); - if (s < sherman_base) { - DEBUG_PRINTF("doing normal\n"); - assert(s < m->state_count); - s = succ_table[((u32)s << as) + cprime]; - } else { - const char *sherman_state - = findShermanState(m, sherman_base_offset, sherman_base, s); - DEBUG_PRINTF("doing sherman (%hu)\n", s); - s = doSherman16(sherman_state, cprime, succ_table, as); + do { + assert(c < c_end); + if (!s) { + goto exit; } - DEBUG_PRINTF("s: %hu (%hu)\n", s, (u16)(s & STATE_MASK)); + + if (s & ACCEL_FLAG) { + DEBUG_PRINTF("skipping\n"); + s &= STATE_MASK; + c = run_mcclellan_accel(m, aux, s, &min_accel_offset, c, c_end); + if (c == c_end) { + goto exit; + } else { + goto without_accel; + } + } + + s = doNormal16(m, &c, c_end, s, 1, mode); if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) { if (mode == STOP_AT_MATCH) { @@ -190,42 +276,17 @@ with_accel: return MO_HALT_MATCHING; /* termination requested */ } } else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0, - &cached_accept_state, - &cached_accept_id) == MO_HALT_MATCHING) { + &cached_accept_state, &cached_accept_id) + == MO_HALT_MATCHING) { return MO_HALT_MATCHING; } - } else if (s & ACCEL_FLAG) { - DEBUG_PRINTF("skipping\n"); - const struct mstate_aux *this_aux = get_aux(m, s & STATE_MASK); - u32 accel_offset = this_aux->accel_offset; - - assert(accel_offset >= m->aux_offset); - assert(accel_offset < m->sherman_offset); - - const union AccelAux *aaux - = (const void *)((const char *)m + accel_offset); - const u8 *c2 = run_accel(aaux, c, c_end); - - if (c2 < min_accel_offset + BAD_ACCEL_DIST) { - min_accel_offset = c2 + BIG_ACCEL_PENALTY; - } else { - min_accel_offset = c2 + SMALL_ACCEL_PENALTY; - } - - if (min_accel_offset >= c_end - ACCEL_MIN_LEN) { - min_accel_offset = c_end; - } - - DEBUG_PRINTF("advanced %zd, next accel chance in %zd/%zd\n", - c2 - c, min_accel_offset - c2, c_end - c2); - - c = c2; - s &= STATE_MASK; - goto without_accel; } - s &= STATE_MASK; - } + assert(c <= c_end); + } while (c < c_end); + +exit: + s &= STATE_MASK; if (mode == STOP_AT_MATCH) { *c_final = c_end; @@ -236,7 +297,7 @@ with_accel: } static never_inline -char mcclellanExec16_i_cb(const struct mcclellan *m, u16 *state, const u8 *buf, +char mcclellanExec16_i_cb(const struct mcclellan *m, u32 *state, const u8 *buf, size_t len, u64a offAdj, NfaCallback cb, void *ctxt, char single, const u8 **final_point) { return mcclellanExec16_i(m, state, buf, len, offAdj, cb, ctxt, single, @@ -244,7 +305,7 @@ char mcclellanExec16_i_cb(const struct mcclellan *m, u16 *state, const u8 *buf, } static never_inline -char mcclellanExec16_i_sam(const struct mcclellan *m, u16 *state, const u8 *buf, +char mcclellanExec16_i_sam(const struct mcclellan *m, u32 *state, const u8 *buf, size_t len, u64a offAdj, NfaCallback cb, void *ctxt, char single, const u8 **final_point) { return mcclellanExec16_i(m, state, buf, len, offAdj, cb, ctxt, single, @@ -252,15 +313,15 @@ char mcclellanExec16_i_sam(const struct mcclellan *m, u16 *state, const u8 *buf, } static never_inline -char mcclellanExec16_i_nm(const struct mcclellan *m, u16 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **final_point) { +char mcclellanExec16_i_nm(const struct mcclellan *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point) { return mcclellanExec16_i(m, state, buf, len, offAdj, cb, ctxt, single, final_point, NO_MATCHES); } static really_inline -char mcclellanExec16_i_ni(const struct mcclellan *m, u16 *state, const u8 *buf, +char mcclellanExec16_i_ni(const struct mcclellan *m, u32 *state, const u8 *buf, size_t len, u64a offAdj, NfaCallback cb, void *ctxt, char single, const u8 **final_point, enum MatchMode mode) { @@ -271,35 +332,68 @@ char mcclellanExec16_i_ni(const struct mcclellan *m, u16 *state, const u8 *buf, return mcclellanExec16_i_sam(m, state, buf, len, offAdj, cb, ctxt, single, final_point); } else { - assert (mode == NO_MATCHES); + assert(mode == NO_MATCHES); return mcclellanExec16_i_nm(m, state, buf, len, offAdj, cb, ctxt, single, final_point); } } static really_inline -char mcclellanExec8_i(const struct mcclellan *m, u8 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **c_final, enum MatchMode mode) { - u8 s = *state; - const u8 *c = buf, *c_end = buf + len; +u32 doNormal8(const struct mcclellan *m, const u8 **c_inout, const u8 *end, + u32 s, char do_accel, enum MatchMode mode) { + const u8 *c = *c_inout; + u32 accel_limit = m->accel_limit_8; + u32 accept_limit = m->accept_limit_8; + + const u32 as = m->alphaShift; const u8 *succ_table = (const u8 *)((const char *)m + sizeof(struct mcclellan)); - const u32 as = m->alphaShift; - const struct mstate_aux *aux; + while (c < end && s) { + u8 cprime = m->remap[*c]; + DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx\n", *c, + ourisprint(*c) ? *c : '?', cprime); + s = succ_table[(s << as) + cprime]; - aux = (const struct mstate_aux *)((const char *)m + m->aux_offset + DEBUG_PRINTF("s: %hhu\n", s); + c++; + if (do_accel) { + if (s >= accel_limit) { + break; + } + } else { + if (mode != NO_MATCHES && s >= accept_limit) { + break; + } + } + } + *c_inout = c; + return s; +} + +static really_inline +char mcclellanExec8_i(const struct mcclellan *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **c_final, enum MatchMode mode) { + if (!len) { + *c_final = buf; + return MO_CONTINUE_MATCHING; + } + u32 s = *state; + const u8 *c = buf; + const u8 *c_end = buf + len; + + const struct mstate_aux *aux + = (const struct mstate_aux *)((const char *)m + m->aux_offset - sizeof(struct NFA)); - - u16 accel_limit = m->accel_limit_8; - u16 accept_limit = m->accept_limit_8; + u32 accept_limit = m->accept_limit_8; u32 cached_accept_id = 0; - u16 cached_accept_state = 0; + u32 cached_accept_state = 0; - DEBUG_PRINTF("accel %hu, accept %hu\n", accel_limit, accept_limit); + DEBUG_PRINTF("accel %hu, accept %hu\n", + m->accel_limit_8, m->accept_limit_8); - DEBUG_PRINTF("s: %hhu, len %zu\n", s, len); + DEBUG_PRINTF("s: %u, len %zu\n", s, len); const u8 *min_accel_offset = c; if (!m->has_accel || len < ACCEL_MIN_LEN) { @@ -310,12 +404,13 @@ char mcclellanExec8_i(const struct mcclellan *m, u8 *state, const u8 *buf, goto with_accel; without_accel: - while (c < min_accel_offset && s) { - u8 cprime = m->remap[*(c++)]; - DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx\n", *(c-1), - ourisprint(*(c-1)) ? *(c-1) : '?', cprime); - s = succ_table[((u32)s << as) + cprime]; - DEBUG_PRINTF("s: %hhu\n", s); + do { + assert(c < min_accel_offset); + if (!s) { + goto exit; + } + + s = doNormal8(m, &c, min_accel_offset, s, 0, mode); if (mode != NO_MATCHES && s >= accept_limit) { if (mode == STOP_AT_MATCH) { @@ -332,69 +427,63 @@ without_accel: return MO_HALT_MATCHING; } } else if (doComplexReport(cb, ctxt, m, s, loc, 0, - &cached_accept_state, - &cached_accept_id) + &cached_accept_state, &cached_accept_id) == MO_HALT_MATCHING) { return MO_HALT_MATCHING; } } + + assert(c <= min_accel_offset); + } while (c < min_accel_offset); + + if (c == c_end) { + goto exit; } with_accel: - while (c < c_end && s) { - u8 cprime = m->remap[*(c++)]; - DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx\n", *(c-1), - ourisprint(*(c-1)) ? *(c-1) : '?', cprime); - s = succ_table[((u32)s << as) + cprime]; - DEBUG_PRINTF("s: %hhu\n", s); + do { + u32 accel_limit = m->accel_limit_8; + assert(c < c_end); - if (s >= accel_limit) { /* accept_limit >= accel_limit */ - if (mode != NO_MATCHES && s >= accept_limit) { - if (mode == STOP_AT_MATCH) { - DEBUG_PRINTF("match - pausing\n"); - *state = s; - *c_final = c - 1; - return MO_CONTINUE_MATCHING; - } + if (!s) { + goto exit; + } - u64a loc = (c - 1) - buf + offAdj + 1; - if (single) { - DEBUG_PRINTF("reporting %u\n", m->arb_report); - if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } else if (doComplexReport(cb, ctxt, m, s, loc, 0, - &cached_accept_state, - &cached_accept_id) - == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } else if (aux[s].accel_offset) { - DEBUG_PRINTF("skipping\n"); - - const union AccelAux *aaux = (const void *)((const char *)m - + aux[s].accel_offset); - const u8 *c2 = run_accel(aaux, c, c_end); - - if (c2 < min_accel_offset + BAD_ACCEL_DIST) { - min_accel_offset = c2 + BIG_ACCEL_PENALTY; - } else { - min_accel_offset = c2 + SMALL_ACCEL_PENALTY; - } - - if (min_accel_offset >= c_end - ACCEL_MIN_LEN) { - min_accel_offset = c_end; - } - - DEBUG_PRINTF("advanced %zd, next accel chance in %zd/%zd\n", - c2 - c, min_accel_offset - c2, c_end - c2); - - c = c2; + if (s >= accel_limit && aux[s].accel_offset) { + c = run_mcclellan_accel(m, aux, s, &min_accel_offset, c, c_end); + if (c == c_end) { + goto exit; + } else { goto without_accel; } } - } + s = doNormal8(m, &c, c_end, s, 1, mode); + if (mode != NO_MATCHES && s >= accept_limit) { + if (mode == STOP_AT_MATCH) { + DEBUG_PRINTF("match - pausing\n"); + *state = s; + *c_final = c - 1; + return MO_CONTINUE_MATCHING; + } + + u64a loc = (c - 1) - buf + offAdj + 1; + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } else if (doComplexReport(cb, ctxt, m, s, loc, 0, + &cached_accept_state, &cached_accept_id) + == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } + + assert(c <= c_end); + } while (c < c_end); + +exit: *state = s; if (mode == STOP_AT_MATCH) { *c_final = c_end; @@ -403,31 +492,31 @@ with_accel: } static never_inline -char mcclellanExec8_i_cb(const struct mcclellan *m, u8 *state, const u8 *buf, +char mcclellanExec8_i_cb(const struct mcclellan *m, u32 *state, const u8 *buf, size_t len, u64a offAdj, NfaCallback cb, void *ctxt, char single, const u8 **final_point) { return mcclellanExec8_i(m, state, buf, len, offAdj, cb, ctxt, single, - final_point, CALLBACK_OUTPUT); + final_point, CALLBACK_OUTPUT); } static never_inline -char mcclellanExec8_i_sam(const struct mcclellan *m, u8 *state, const u8 *buf, +char mcclellanExec8_i_sam(const struct mcclellan *m, u32 *state, const u8 *buf, size_t len, u64a offAdj, NfaCallback cb, void *ctxt, char single, const u8 **final_point) { return mcclellanExec8_i(m, state, buf, len, offAdj, cb, ctxt, single, - final_point, STOP_AT_MATCH); + final_point, STOP_AT_MATCH); } static never_inline -char mcclellanExec8_i_nm(const struct mcclellan *m, u8 *state, const u8 *buf, +char mcclellanExec8_i_nm(const struct mcclellan *m, u32 *state, const u8 *buf, size_t len, u64a offAdj, NfaCallback cb, void *ctxt, char single, const u8 **final_point) { return mcclellanExec8_i(m, state, buf, len, offAdj, cb, ctxt, single, - final_point, NO_MATCHES); + final_point, NO_MATCHES); } static really_inline -char mcclellanExec8_i_ni(const struct mcclellan *m, u8 *state, const u8 *buf, +char mcclellanExec8_i_ni(const struct mcclellan *m, u32 *state, const u8 *buf, size_t len, u64a offAdj, NfaCallback cb, void *ctxt, char single, const u8 **final_point, enum MatchMode mode) { @@ -445,7 +534,7 @@ char mcclellanExec8_i_ni(const struct mcclellan *m, u8 *state, const u8 *buf, } static really_inline -char mcclellanCheckEOD(const struct NFA *nfa, u16 s, u64a offset, +char mcclellanCheckEOD(const struct NFA *nfa, u32 s, u64a offset, NfaCallback cb, void *ctxt) { const struct mcclellan *m = getImplNfa(nfa); const struct mstate_aux *aux = get_aux(m, s); @@ -466,7 +555,7 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, s64a sp; assert(ISALIGNED_N(q->state, 2)); - u16 s = *(u16 *)q->state; + u32 s = *(u16 *)q->state; if (q->report_current) { assert(s); @@ -478,7 +567,7 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, rv = cb(0, q_cur_offset(q), m->arb_report, context); } else { u32 cached_accept_id = 0; - u16 cached_accept_state = 0; + u32 cached_accept_state = 0; rv = doComplexReport(cb, context, m, s, q_cur_offset(q), 0, &cached_accept_state, &cached_accept_id); @@ -532,8 +621,7 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, if (mode == STOP_AT_MATCH && final_look != cur_buf + local_ep) { DEBUG_PRINTF("this is as far as we go\n"); assert(q->cur); - DEBUG_PRINTF("state %hu final_look %zd\n", s, - final_look - cur_buf); + DEBUG_PRINTF("state %hu final_look %zd\n", s, final_look - cur_buf); q->cur--; q->items[q->cur].type = MQE_START; q->items[q->cur].location = final_look - cur_buf + 1; /* due to @@ -584,12 +672,12 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, } static really_inline -char nfaExecMcClellan16_Bi(const struct NFA *n, u64a offset, - const u8 *buffer, size_t length, - NfaCallback cb, void *context, char single) { +char nfaExecMcClellan16_Bi(const struct NFA *n, u64a offset, const u8 *buffer, + size_t length, NfaCallback cb, void *context, + char single) { assert(n->type == MCCLELLAN_NFA_16); const struct mcclellan *m = getImplNfa(n); - u16 s = m->start_anchored; + u32 s = m->start_anchored; if (mcclellanExec16_i(m, &s, buffer, length, offset, cb, context, single, NULL, CALLBACK_OUTPUT) @@ -608,14 +696,14 @@ char nfaExecMcClellan16_Bi(const struct NFA *n, u64a offset, static really_inline char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, - const u8 *hend, NfaCallback cb, void *context, - struct mq *q, char single, s64a end, - enum MatchMode mode) { + const u8 *hend, NfaCallback cb, void *context, + struct mq *q, char single, s64a end, + enum MatchMode mode) { assert(n->type == MCCLELLAN_NFA_8); const struct mcclellan *m = getImplNfa(n); s64a sp; - u8 s = *(u8 *)q->state; + u32 s = *(u8 *)q->state; if (q->report_current) { assert(s); @@ -627,7 +715,7 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, rv = cb(0, q_cur_offset(q), m->arb_report, context); } else { u32 cached_accept_id = 0; - u16 cached_accept_state = 0; + u32 cached_accept_state = 0; rv = doComplexReport(cb, context, m, s, q_cur_offset(q), 0, &cached_accept_state, &cached_accept_id); @@ -738,7 +826,7 @@ char nfaExecMcClellan8_Bi(const struct NFA *n, u64a offset, const u8 *buffer, char single) { assert(n->type == MCCLELLAN_NFA_8); const struct mcclellan *m = getImplNfa(n); - u8 s = (u8)m->start_anchored; + u32 s = m->start_anchored; if (mcclellanExec8_i(m, &s, buffer, length, offset, cb, context, single, NULL, CALLBACK_OUTPUT) @@ -811,7 +899,7 @@ char nfaExecMcClellan8_reportCurrent(const struct NFA *n, struct mq *q) { const struct mcclellan *m = getImplNfa(n); NfaCallback cb = q->cb; void *ctxt = q->context; - u8 s = *(u8 *)q->state; + u32 s = *(u8 *)q->state; u8 single = m->flags & MCCLELLAN_FLAG_SINGLE; u64a offset = q_cur_offset(q); assert(q_cur_type(q) == MQE_START); @@ -823,7 +911,7 @@ char nfaExecMcClellan8_reportCurrent(const struct NFA *n, struct mq *q) { cb(0, offset, m->arb_report, ctxt); } else { u32 cached_accept_id = 0; - u16 cached_accept_state = 0; + u32 cached_accept_state = 0; doComplexReport(cb, ctxt, m, s, offset, 0, &cached_accept_state, &cached_accept_id); @@ -837,7 +925,7 @@ char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q) { const struct mcclellan *m = getImplNfa(n); NfaCallback cb = q->cb; void *ctxt = q->context; - u16 s = *(u16 *)q->state; + u32 s = *(u16 *)q->state; const struct mstate_aux *aux = get_aux(m, s); u8 single = m->flags & MCCLELLAN_FLAG_SINGLE; u64a offset = q_cur_offset(q); @@ -851,7 +939,7 @@ char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q) { cb(0, offset, m->arb_report, ctxt); } else { u32 cached_accept_id = 0; - u16 cached_accept_state = 0; + u32 cached_accept_state = 0; doComplexReport(cb, ctxt, m, s, offset, 0, &cached_accept_state, &cached_accept_id); @@ -1025,7 +1113,7 @@ void nfaExecMcClellan8_SimpStream(const struct NFA *nfa, char *state, size_t len, NfaCallback cb, void *ctxt) { const struct mcclellan *m = getImplNfa(nfa); - u8 s = top ? m->start_anchored : *(u8 *)state; + u32 s = top ? m->start_anchored : *(u8 *)state; if (m->flags & MCCLELLAN_FLAG_SINGLE) { mcclellanExec8_i(m, &s, buf + start_off, len - start_off, @@ -1043,14 +1131,14 @@ void nfaExecMcClellan16_SimpStream(const struct NFA *nfa, char *state, size_t len, NfaCallback cb, void *ctxt) { const struct mcclellan *m = getImplNfa(nfa); - u16 s = top ? m->start_anchored : unaligned_load_u16(state); + u32 s = top ? m->start_anchored : unaligned_load_u16(state); if (m->flags & MCCLELLAN_FLAG_SINGLE) { mcclellanExec16_i(m, &s, buf + start_off, len - start_off, - start_off, cb, ctxt, 1, NULL, CALLBACK_OUTPUT); + start_off, cb, ctxt, 1, NULL, CALLBACK_OUTPUT); } else { mcclellanExec16_i(m, &s, buf + start_off, len - start_off, - start_off, cb, ctxt, 0, NULL, CALLBACK_OUTPUT); + start_off, cb, ctxt, 0, NULL, CALLBACK_OUTPUT); } unaligned_store_u16(state, s); @@ -1071,13 +1159,15 @@ char nfaExecMcClellan16_testEOD(const struct NFA *nfa, const char *state, context); } -char nfaExecMcClellan8_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) { +char nfaExecMcClellan8_queueInitState(UNUSED const struct NFA *nfa, + struct mq *q) { assert(nfa->scratchStateSize == 1); *(u8 *)q->state = 0; return 0; } -char nfaExecMcClellan16_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) { +char nfaExecMcClellan16_queueInitState(UNUSED const struct NFA *nfa, + struct mq *q) { assert(nfa->scratchStateSize == 2); assert(ISALIGNED_N(q->state, 2)); *(u16 *)q->state = 0; diff --git a/src/nfa/mcclellan_common_impl.h b/src/nfa/mcclellan_common_impl.h index 4906ce5b..751ef05a 100644 --- a/src/nfa/mcclellan_common_impl.h +++ b/src/nfa/mcclellan_common_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,7 +33,7 @@ enum MatchMode { }; static really_inline -const struct mstate_aux *get_aux(const struct mcclellan *m, u16 s) { +const struct mstate_aux *get_aux(const struct mcclellan *m, u32 s) { const char *nfa = (const char *)m - sizeof(struct NFA); const struct mstate_aux *aux = s + (const struct mstate_aux *)(nfa + m->aux_offset); @@ -43,7 +43,7 @@ const struct mstate_aux *get_aux(const struct mcclellan *m, u16 s) { } static really_inline -u16 mcclellanEnableStarts(const struct mcclellan *m, u16 s) { +u32 mcclellanEnableStarts(const struct mcclellan *m, u32 s) { const struct mstate_aux *aux = get_aux(m, s); DEBUG_PRINTF("enabling starts %hu->%hu\n", s, aux->top); @@ -51,7 +51,7 @@ u16 mcclellanEnableStarts(const struct mcclellan *m, u16 s) { } static really_inline -u16 doSherman16(const char *sherman_state, u8 cprime, const u16 *succ_table, +u32 doSherman16(const char *sherman_state, u8 cprime, const u16 *succ_table, u32 as) { assert(ISALIGNED_N(sherman_state, 16)); @@ -70,15 +70,15 @@ u16 doSherman16(const char *sherman_state, u8 cprime, const u16 *succ_table, if (z) { u32 i = ctz32(z & ~0xf) - 4; - u16 s_out = unaligned_load_u16((const u8 *)sherman_state + u32 s_out = unaligned_load_u16((const u8 *)sherman_state + SHERMAN_STATES_OFFSET(len) + sizeof(u16) * i); - DEBUG_PRINTF("found sherman match at %u/%u for c'=%hhu " - "s=%hu\n", i, len, cprime, s_out); + DEBUG_PRINTF("found sherman match at %u/%u for c'=%hhu s=%u\n", i, + len, cprime, s_out); return s_out; } } - u16 daddy = *(const u16 *)(sherman_state + SHERMAN_DADDY_OFFSET); - return succ_table[((u32)daddy << as) + cprime]; + u32 daddy = *(const u16 *)(sherman_state + SHERMAN_DADDY_OFFSET); + return succ_table[(daddy << as) + cprime]; } diff --git a/src/nfa/mcclellan_internal.h b/src/nfa/mcclellan_internal.h index aad296c4..4a27aadb 100644 --- a/src/nfa/mcclellan_internal.h +++ b/src/nfa/mcclellan_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -90,8 +90,8 @@ struct mcclellan { static really_inline const char *findShermanState(UNUSED const struct mcclellan *m, - const char *sherman_base_offset, u16 sherman_base, - u16 s) { + const char *sherman_base_offset, u32 sherman_base, + u32 s) { const char *rv = sherman_base_offset + SHERMAN_FIXED_SIZE * (s - sherman_base); assert(rv < (const char *)m + m->length - sizeof(struct NFA)); @@ -102,7 +102,7 @@ const char *findShermanState(UNUSED const struct mcclellan *m, static really_inline char *findMutableShermanState(char *sherman_base_offset, u16 sherman_base, - u16 s) { + u32 s) { return sherman_base_offset + SHERMAN_FIXED_SIZE * (s - sherman_base); } From 32af5fa79457b8dbca4680afa76b17e872bd7718 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 28 Nov 2016 16:19:42 +1100 Subject: [PATCH 069/103] mcclellan: fix printf format specifiers --- src/nfa/mcclellan.c | 10 +++++----- src/nfa/mcclellan_common_impl.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/nfa/mcclellan.c b/src/nfa/mcclellan.c index 896c0703..63f5f535 100644 --- a/src/nfa/mcclellan.c +++ b/src/nfa/mcclellan.c @@ -145,7 +145,7 @@ u32 doNormal16(const struct mcclellan *m, const u8 **c_inout, const u8 *end, } else { const char *sherman_state = findShermanState(m, sherman_base_offset, sherman_base, s); - DEBUG_PRINTF("doing sherman (%hu)\n", s); + DEBUG_PRINTF("doing sherman (%u)\n", s); s = doSherman16(sherman_state, cprime, succ_table, as); } @@ -190,7 +190,7 @@ char mcclellanExec16_i(const struct mcclellan *m, u32 *state, const u8 *buf, u32 cached_accept_id = 0; u32 cached_accept_state = 0; - DEBUG_PRINTF("s: %hu, len %zu\n", s, len); + DEBUG_PRINTF("s: %u, len %zu\n", s, len); const u8 *min_accel_offset = c; if (!m->has_accel || len < ACCEL_MIN_LEN) { @@ -354,7 +354,7 @@ u32 doNormal8(const struct mcclellan *m, const u8 **c_inout, const u8 *end, ourisprint(*c) ? *c : '?', cprime); s = succ_table[(s << as) + cprime]; - DEBUG_PRINTF("s: %hhu\n", s); + DEBUG_PRINTF("s: %u\n", s); c++; if (do_accel) { if (s >= accel_limit) { @@ -621,7 +621,7 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, if (mode == STOP_AT_MATCH && final_look != cur_buf + local_ep) { DEBUG_PRINTF("this is as far as we go\n"); assert(q->cur); - DEBUG_PRINTF("state %hu final_look %zd\n", s, final_look - cur_buf); + DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf); q->cur--; q->items[q->cur].type = MQE_START; q->items[q->cur].location = final_look - cur_buf + 1; /* due to @@ -930,7 +930,7 @@ char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q) { u8 single = m->flags & MCCLELLAN_FLAG_SINGLE; u64a offset = q_cur_offset(q); assert(q_cur_type(q) == MQE_START); - DEBUG_PRINTF("state %hu\n", s); + DEBUG_PRINTF("state %u\n", s); assert(s); if (aux->accept) { diff --git a/src/nfa/mcclellan_common_impl.h b/src/nfa/mcclellan_common_impl.h index 751ef05a..be130715 100644 --- a/src/nfa/mcclellan_common_impl.h +++ b/src/nfa/mcclellan_common_impl.h @@ -46,7 +46,7 @@ static really_inline u32 mcclellanEnableStarts(const struct mcclellan *m, u32 s) { const struct mstate_aux *aux = get_aux(m, s); - DEBUG_PRINTF("enabling starts %hu->%hu\n", s, aux->top); + DEBUG_PRINTF("enabling starts %u->%hu\n", s, aux->top); return aux->top; } From 06cde4c94dfa7524b8d7eeb75ac1b446f5fea9eb Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 29 Nov 2016 14:49:01 +1100 Subject: [PATCH 070/103] ng_literal_analysis: use ue2_graph This reduces compile time ~10% on a number of large cases. --- src/nfagraph/ng_literal_analysis.cpp | 160 +++++++++++++-------------- 1 file changed, 76 insertions(+), 84 deletions(-) diff --git a/src/nfagraph/ng_literal_analysis.cpp b/src/nfagraph/ng_literal_analysis.cpp index 68c1bdd6..a5f3468b 100644 --- a/src/nfagraph/ng_literal_analysis.cpp +++ b/src/nfagraph/ng_literal_analysis.cpp @@ -40,17 +40,16 @@ #include "util/depth.h" #include "util/graph.h" #include "util/graph_range.h" +#include "util/ue2_graph.h" #include "util/ue2string.h" #include #include #include -#include #include using namespace std; -using boost::vertex_index; namespace ue2 { @@ -65,24 +64,29 @@ namespace { /* Small literal graph type used for the suffix tree used in * compressAndScore. */ - struct LitGraphVertexProps { - LitGraphVertexProps() {} - explicit LitGraphVertexProps(const ue2_literal::elem &c_in) : c(c_in) {} + LitGraphVertexProps() = default; + explicit LitGraphVertexProps(ue2_literal::elem c_in) : c(move(c_in)) {} ue2_literal::elem c; // string element (char + bool) + size_t index; // managed by ue2_graph }; struct LitGraphEdgeProps { - LitGraphEdgeProps() {} + LitGraphEdgeProps() = default; explicit LitGraphEdgeProps(u64a score_in) : score(score_in) {} u64a score = NO_LITERAL_AT_EDGE_SCORE; - size_t index; /* only initialised when the reverse edges are added. */ + size_t index; // managed by ue2_graph +}; + +struct LitGraph + : public ue2_graph { + + LitGraph() : root(add_vertex(*this)), sink(add_vertex(*this)) {} + + const vertex_descriptor root; + const vertex_descriptor sink; }; -/* keep edgeList = listS as you cannot remove edges if edgeList = vecS */ -typedef boost::adjacency_list LitGraph; typedef LitGraph::vertex_descriptor LitVertex; typedef LitGraph::edge_descriptor LitEdge; @@ -95,17 +99,16 @@ typedef std::queue LitVertexQ; /** \brief Dump the literal graph in Graphviz format. */ static UNUSED -void dumpGraph(const char *filename, const LitGraph &lg, const LitVertex &root, - const LitVertex &sink) { +void dumpGraph(const char *filename, const LitGraph &lg) { ofstream fout(filename); fout << "digraph G {" << endl; for (auto v : vertices_range(lg)) { - fout << boost::get(vertex_index, lg, v); - if (v == root) { + fout << lg[v].index; + if (v == lg.root) { fout << "[label=\"ROOT\"];"; - } else if (v == sink) { + } else if (v == lg.sink) { fout << "[label=\"SINK\"];"; } else { ue2_literal s; @@ -117,10 +120,9 @@ void dumpGraph(const char *filename, const LitGraph &lg, const LitVertex &root, for (const auto &e : edges_range(lg)) { LitVertex u = source(e, lg), v = target(e, lg); - fout << boost::get(vertex_index, lg, u) << " -> " << - boost::get(vertex_index, lg, v) << - "[label=\"" << lg[e].score << "\"]" << - ";" << endl; + fout << lg[u].index << " -> " << lg[v].index << "[label=\"" + << lg[e].score << "\"]" + << ";" << endl; } fout << "}" << endl; @@ -142,11 +144,11 @@ bool allowExpand(size_t numItems, size_t totalPathsSoFar) { } static -LitVertex addToLitGraph(LitGraph &lg, LitVertex sink, - LitVertex pred, const ue2_literal::elem &c) { +LitVertex addToLitGraph(LitGraph &lg, LitVertex pred, + const ue2_literal::elem &c) { // Check if we already have this in the graph. for (auto v : adjacent_vertices_range(pred, lg)) { - if (v == sink) { + if (v == lg.sink) { continue; } if (lg[v].c == c) { @@ -160,9 +162,10 @@ LitVertex addToLitGraph(LitGraph &lg, LitVertex sink, } static -void addToQueue(LitVertexQ &workQ, LitGraph &lg, LitVertex sink, - LitVertex pred, const CharReach &cr, NFAVertex v) { - for (size_t i = cr.find_first(); i != CharReach::npos; i = cr.find_next(i)) { +void addToQueue(LitVertexQ &workQ, LitGraph &lg, LitVertex pred, + const CharReach &cr, NFAVertex v) { + for (size_t i = cr.find_first(); i != CharReach::npos; + i = cr.find_next(i)) { if (myisupper(i) && cr.test(mytolower(i))) { // ignore upper half of a nocase pair continue; @@ -170,14 +173,14 @@ void addToQueue(LitVertexQ &workQ, LitGraph &lg, LitVertex sink, bool nocase = myislower(i) && cr.test(mytoupper(i)); ue2_literal::elem c((char)i, nocase); - LitVertex lv = addToLitGraph(lg, sink, pred, c); + LitVertex lv = addToLitGraph(lg, pred, c); workQ.push(VertexPair(lv, v)); } } static -void initWorkQueue(LitVertexQ &workQ, LitGraph &lg, LitVertex root, - LitVertex sink, const NGHolder &g, const NFAEdge &e) { +void initWorkQueue(LitVertexQ &workQ, LitGraph &lg, const NGHolder &g, + const NFAEdge &e) { NFAVertex u = source(e, g); NFAVertex v = target(e, g); const CharReach &cr = g[v].char_reach; @@ -186,7 +189,7 @@ void initWorkQueue(LitVertexQ &workQ, LitGraph &lg, LitVertex root, return; } - addToQueue(workQ, lg, sink, root, cr, u); + addToQueue(workQ, lg, lg.root, cr, u); } static @@ -198,7 +201,8 @@ u32 crCardinality(const CharReach &cr) { } u32 rv = 0; - for (size_t i = cr.find_first(); i != CharReach::npos; i = cr.find_next(i)) { + for (size_t i = cr.find_first(); i != CharReach::npos; + i = cr.find_next(i)) { if (myisupper(i) && cr.test(mytolower(i))) { // ignore upper half of a nocase pair continue; @@ -213,10 +217,10 @@ u32 crCardinality(const CharReach &cr) { * identifying vertices connected to the sink and removing their other * out-edges. */ static -void filterLitGraph(LitGraph &lg, const LitVertex sink) { - for (auto v : inv_adjacent_vertices_range(sink, lg)) { - remove_out_edge_if(v, [&lg, &sink](const LitEdge &e) { - return target(e, lg) != sink; +void filterLitGraph(LitGraph &lg) { + for (auto v : inv_adjacent_vertices_range(lg.sink, lg)) { + remove_out_edge_if(v, [&lg](const LitEdge &e) { + return target(e, lg) != lg.sink; }, lg); } @@ -229,13 +233,12 @@ void filterLitGraph(LitGraph &lg, const LitVertex sink) { * from each predecessor of the sink (note: it's a suffix tree except for this * convenience) towards the source, storing each string as we go. */ static -void extractLiterals(const LitGraph &lg, const LitVertex root, - const LitVertex sink, set &s) { +void extractLiterals(const LitGraph &lg, set &s) { ue2_literal lit; - for (auto u : inv_adjacent_vertices_range(sink, lg)) { + for (auto u : inv_adjacent_vertices_range(lg.sink, lg)) { lit.clear(); - while (u != root) { + while (u != lg.root) { lit.push_back(lg[u].c); assert(in_degree(u, lg) <= 1); LitGraph::inv_adjacency_iterator ai2, ae2; @@ -277,11 +280,9 @@ void processWorkQueue(const NGHolder &g, const NFAEdge &e, } LitGraph lg; - LitVertex root = add_vertex(lg); - LitVertex sink = add_vertex(lg); LitVertexQ workQ; - initWorkQueue(workQ, lg, root, sink, g, e); + initWorkQueue(workQ, lg, g, e); while (!workQ.empty()) { const LitVertex lv = workQ.front().first; @@ -290,18 +291,18 @@ void processWorkQueue(const NGHolder &g, const NFAEdge &e, u32 cr_card = crCardinality(cr); size_t numItems = cr_card * in_degree(t, g); - size_t committed_count = workQ.size() + in_degree(sink, lg) - 1; + size_t committed_count = workQ.size() + in_degree(lg.sink, lg) - 1; if (g[t].index == NODE_START) { // reached start, add to literal set - add_edge_if_not_present(lv, sink, lg); + add_edge_if_not_present(lv, lg.sink, lg); goto next_work_elem; } // Expand next vertex if (allowExpand(numItems, committed_count)) { for (auto u : inv_adjacent_vertices_range(t, g)) { - addToQueue(workQ, lg, sink, lv, cr, u); + addToQueue(workQ, lg, lv, cr, u); } goto next_work_elem; } @@ -317,21 +318,21 @@ void processWorkQueue(const NGHolder &g, const NFAEdge &e, bool nocase = myislower(i) && cr.test(mytoupper(i)); ue2_literal::elem c((char)i, nocase); - LitVertex lt = addToLitGraph(lg, sink, lv, c); - add_edge_if_not_present(lt, sink, lg); + LitVertex lt = addToLitGraph(lg, lv, c); + add_edge_if_not_present(lt, lg.sink, lg); } goto next_work_elem; } // add to literal set - add_edge_if_not_present(lv, sink, lg); + add_edge_if_not_present(lv, lg.sink, lg); next_work_elem: workQ.pop(); } - filterLitGraph(lg, sink); - //dumpGraph("litgraph.dot", lg, root, sink); - extractLiterals(lg, root, sink, s); + filterLitGraph(lg); + //dumpGraph("litgraph.dot", lg); + extractLiterals(lg, s); // Our literal set should contain no literal that is a suffix of another. assert(!hasSuffixLiterals(s)); @@ -410,16 +411,15 @@ u64a calculateScore(const ue2_literal &s) { /** Adds a literal in reverse order, building up a suffix tree. */ static -void addReversedLiteral(const ue2_literal &lit, LitGraph &lg, - const LitVertex &root, const LitVertex &sink) { +void addReversedLiteral(const ue2_literal &lit, LitGraph &lg) { DEBUG_PRINTF("literal: '%s'\n", escapeString(lit).c_str()); ue2_literal suffix; - LitVertex v = root; + LitVertex v = lg.root; for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) { suffix.push_back(*it); LitVertex w; for (auto v2 : adjacent_vertices_range(v, lg)) { - if (v2 != sink && lg[v2].c == *it) { + if (v2 != lg.sink && lg[v2].c == *it) { w = v2; goto next_char; } @@ -431,17 +431,18 @@ next_char: } // Wire the last vertex to the sink. - add_edge(v, sink, lg); + add_edge(v, lg.sink, lg); } static void extractLiterals(const vector &cutset, const LitGraph &lg, - const LitVertex &root, set &s) { + set &s) { for (const auto &e : cutset) { - LitVertex u = source(e, lg), v = target(e, lg); + LitVertex u = source(e, lg); + LitVertex v = target(e, lg); ue2_literal lit; lit.push_back(lg[v].c); - while (u != root) { + while (u != lg.root) { lit.push_back(lg[u].c); assert(in_degree(u, lg) == 1); LitGraph::inv_adjacency_iterator ai, ae; @@ -488,10 +489,7 @@ const char *describeColor(boost::default_color_type c) { static vector add_reverse_edges_and_index(LitGraph &lg) { vector fwd_edges; - - size_t next_index = 0; for (const auto &e : edges_range(lg)) { - lg[e].index = next_index++; fwd_edges.push_back(e); } @@ -503,9 +501,7 @@ vector add_reverse_edges_and_index(LitGraph &lg) { assert(!edge(v, u, lg).second); - LitEdge rev = add_edge(v, u, lg).first; - lg[rev].score = 0; - lg[rev].index = next_index++; + LitEdge rev = add_edge(v, u, LitGraphEdgeProps(0), lg).first; rev_map[lg[e].index] = rev; rev_map[lg[rev].index] = e; } @@ -514,20 +510,19 @@ vector add_reverse_edges_and_index(LitGraph &lg) { } static -void findMinCut(LitGraph &lg, const LitVertex &root, const LitVertex &sink, - vector &cutset) { +void findMinCut(LitGraph &lg, vector &cutset) { cutset.clear(); - //dumpGraph("litgraph.dot", lg, root, sink); + //dumpGraph("litgraph.dot", lg); - assert(!in_degree(root, lg)); - assert(!out_degree(sink, lg)); + assert(!in_degree(lg.root, lg)); + assert(!out_degree(lg.sink, lg)); size_t num_real_edges = num_edges(lg); // Add reverse edges for the convenience of the BGL's max flow algorithm. vector rev_edges = add_reverse_edges_and_index(lg); - const auto v_index_map = get(vertex_index, lg); + const auto v_index_map = get(&LitGraphVertexProps::index, lg); const auto e_index_map = get(&LitGraphEdgeProps::index, lg); const size_t num_verts = num_vertices(lg); vector colors(num_verts); @@ -542,7 +537,7 @@ void findMinCut(LitGraph &lg, const LitVertex &root, const LitVertex &sink, make_iterator_property_map(predecessors.begin(), v_index_map), make_iterator_property_map(colors.begin(), v_index_map), make_iterator_property_map(distances.begin(), v_index_map), - v_index_map, root, sink); + v_index_map, lg.root, lg.sink); DEBUG_PRINTF("done, flow = %llu\n", flow); /* remove reverse edges */ @@ -555,21 +550,20 @@ void findMinCut(LitGraph &lg, const LitVertex &root, const LitVertex &sink, for (const auto &e : edges_range(lg)) { const LitVertex u = source(e, lg), v = target(e, lg); - const auto ucolor = colors[boost::get(vertex_index, lg, u)]; - const auto vcolor = colors[boost::get(vertex_index, lg, v)]; + const auto ucolor = colors[lg[u].index]; + const auto vcolor = colors[lg[v].index]; - DEBUG_PRINTF("edge %zu:%s -> %zu:%s score %llu\n", - boost::get(vertex_index, lg, u), describeColor(ucolor), - boost::get(vertex_index, lg, v), describeColor(vcolor), + DEBUG_PRINTF("edge %zu:%s -> %zu:%s score %llu\n", lg[u].index, + describeColor(ucolor), lg[v].index, describeColor(vcolor), lg[e].score); if (ucolor != boost::white_color && vcolor == boost::white_color) { - assert(target(e, lg) != sink); + assert(v != lg.sink); white_cut.push_back(e); white_flow += lg[e].score; } if (ucolor == boost::black_color && vcolor != boost::black_color) { - assert(target(e, lg) != sink); + assert(v != lg.sink); black_cut.push_back(e); black_flow += lg[e].score; } @@ -609,21 +603,19 @@ u64a compressAndScore(set &s) { initialScore); LitGraph lg; - const LitVertex root = add_vertex(lg); - const LitVertex sink = add_vertex(lg); for (const auto &lit : s) { - addReversedLiteral(lit, lg, root, sink); + addReversedLiteral(lit, lg); } DEBUG_PRINTF("suffix tree has %zu vertices and %zu edges\n", num_vertices(lg), num_edges(lg)); vector cutset; - findMinCut(lg, root, sink, cutset); + findMinCut(lg, cutset); s.clear(); - extractLiterals(cutset, lg, root, s); + extractLiterals(cutset, lg, s); u64a score = scoreSet(s); DEBUG_PRINTF("compressed score is %llu\n", score); From f626276271fbc0ea209ada0e8c499c53a23284c8 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Wed, 14 Dec 2016 15:26:01 +1100 Subject: [PATCH 071/103] hsbench: add Hyperscan benchmarker The hsbench tool provides an easy way to measure Hyperscan's performance for a particular set of patterns and corpus of data to be scanned. --- cmake/sqlite3.cmake | 53 ++ tools/CMakeLists.txt | 19 + tools/hsbench/CMakeLists.txt | 36 ++ tools/hsbench/README.md | 8 + tools/hsbench/common.h | 42 ++ tools/hsbench/data_corpus.cpp | 133 ++++ tools/hsbench/data_corpus.h | 63 ++ tools/hsbench/engine_hyperscan.cpp | 411 ++++++++++++ tools/hsbench/engine_hyperscan.h | 97 +++ tools/hsbench/heapstats.cpp | 146 +++++ tools/hsbench/heapstats.h | 36 ++ tools/hsbench/huge.cpp | 201 ++++++ tools/hsbench/huge.h | 37 ++ tools/hsbench/main.cpp | 780 +++++++++++++++++++++++ tools/hsbench/scripts/CorpusBuilder.py | 58 ++ tools/hsbench/scripts/gutenbergCorpus.py | 68 ++ tools/hsbench/scripts/linebasedCorpus.py | 53 ++ tools/hsbench/scripts/pcapCorpus.py | 301 +++++++++ tools/hsbench/thread_barrier.h | 71 +++ tools/hsbench/timer.h | 59 ++ util/CMakeLists.txt | 16 +- util/cross_compile.cpp | 115 ++++ util/cross_compile.h | 42 ++ util/database_util.cpp | 155 +++++ util/database_util.h | 39 ++ util/expression_path.h | 107 ++++ 26 files changed, 3145 insertions(+), 1 deletion(-) create mode 100644 cmake/sqlite3.cmake create mode 100644 tools/CMakeLists.txt create mode 100644 tools/hsbench/CMakeLists.txt create mode 100644 tools/hsbench/README.md create mode 100644 tools/hsbench/common.h create mode 100644 tools/hsbench/data_corpus.cpp create mode 100644 tools/hsbench/data_corpus.h create mode 100644 tools/hsbench/engine_hyperscan.cpp create mode 100644 tools/hsbench/engine_hyperscan.h create mode 100644 tools/hsbench/heapstats.cpp create mode 100644 tools/hsbench/heapstats.h create mode 100644 tools/hsbench/huge.cpp create mode 100644 tools/hsbench/huge.h create mode 100644 tools/hsbench/main.cpp create mode 100755 tools/hsbench/scripts/CorpusBuilder.py create mode 100755 tools/hsbench/scripts/gutenbergCorpus.py create mode 100755 tools/hsbench/scripts/linebasedCorpus.py create mode 100755 tools/hsbench/scripts/pcapCorpus.py create mode 100644 tools/hsbench/thread_barrier.h create mode 100644 tools/hsbench/timer.h create mode 100644 util/cross_compile.cpp create mode 100644 util/cross_compile.h create mode 100644 util/database_util.cpp create mode 100644 util/database_util.h create mode 100644 util/expression_path.h diff --git a/cmake/sqlite3.cmake b/cmake/sqlite3.cmake new file mode 100644 index 00000000..c07f1161 --- /dev/null +++ b/cmake/sqlite3.cmake @@ -0,0 +1,53 @@ +# +# a lot of noise to find sqlite +# + +option(SQLITE_PREFER_STATIC "Build sqlite3 statically instead of using an installed lib" OFF) + +if(NOT WIN32 AND NOT SQLITE_PREFER_STATIC) +find_package(PkgConfig QUIET) + +# first check for sqlite on the system +pkg_check_modules(SQLITE3 sqlite3) +endif() + +if (NOT SQLITE3_FOUND) + message(STATUS "looking for sqlite3 in source tree") + # look in the source tree + if (EXISTS "${PROJECT_SOURCE_DIR}/sqlite3/sqlite3.h" AND + EXISTS "${PROJECT_SOURCE_DIR}/sqlite3/sqlite3.c") + message(STATUS " found sqlite3 in source tree") + set(SQLITE3_FOUND TRUE) + set(SQLITE3_BUILD_SOURCE TRUE) + set(SQLITE3_INCLUDE_DIRS "${PROJECT_SOURCE_DIR}/sqlite3") + set(SQLITE3_LDFLAGS sqlite3_static) + else() + message(FATAL_ERROR " no sqlite3 in source tree") + endif() +endif() + +# now do version checks +if (SQLITE3_FOUND) + list(INSERT CMAKE_REQUIRED_INCLUDES 0 "${SQLITE3_INCLUDE_DIRS}") + CHECK_C_SOURCE_COMPILES("#include \n#if SQLITE_VERSION_NUMBER >= 3008007 && SQLITE_VERSION_NUMBER < 3008010\n#error broken sqlite\n#endif\nint main() {return 0;}" SQLITE_VERSION_OK) + if (NOT SQLITE_VERSION_OK) + message(FATAL_ERROR "sqlite3 is broken from 3.8.7 to 3.8.10 - please find a working version") + endif() +if (NOT SQLITE3_BUILD_SOURCE) + set(_SAVED_FLAGS ${CMAKE_REQUIRED_FLAGS}) + list(INSERT CMAKE_REQUIRED_LIBRARIES 0 ${SQLITE3_LDFLAGS}) + CHECK_SYMBOL_EXISTS(sqlite3_open_v2 sqlite3.h HAVE_SQLITE3_OPEN_V2) + list(REMOVE_ITEM CMAKE_REQUIRED_INCLUDES "${SQLITE3_INCLUDE_DIRS}") + list(REMOVE_ITEM CMAKE_REQUIRED_LIBRARIES ${SQLITE3_LDFLAGS}) +else() + if (NOT TARGET sqlite3_static) + # build sqlite as a static lib to compile into our test programs + add_library(sqlite3_static STATIC "${PROJECT_SOURCE_DIR}/sqlite3/sqlite3.c") + if (NOT WIN32) + set_target_properties(sqlite3_static PROPERTIES COMPILE_FLAGS "-Wno-unused -Wno-cast-qual -DSQLITE_OMIT_LOAD_EXTENSION") + endif() + endif() +endif() +endif() + +# that's enough about sqlite diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt new file mode 100644 index 00000000..049fd368 --- /dev/null +++ b/tools/CMakeLists.txt @@ -0,0 +1,19 @@ +find_package(Threads) + +# remove some warnings +if(CMAKE_CXX_FLAGS MATCHES "-Wmissing-declarations" ) + string(REPLACE "-Wmissing-declarations" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") +endif() + +include_directories(${PROJECT_SOURCE_DIR}) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src) +include_directories(${PROJECT_SOURCE_DIR}/util) + +# add any subdir with a cmake file +file(GLOB dirents RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *) +foreach(e ${dirents}) + if(IS_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/${e} AND + EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${e}/CMakeLists.txt) + add_subdirectory(${e}) + endif () +endforeach () diff --git a/tools/hsbench/CMakeLists.txt b/tools/hsbench/CMakeLists.txt new file mode 100644 index 00000000..25a833d0 --- /dev/null +++ b/tools/hsbench/CMakeLists.txt @@ -0,0 +1,36 @@ +include (${CMAKE_MODULE_PATH}/sqlite3.cmake) + +if (NOT XCODE) + include_directories(SYSTEM ${SQLITE3_INCLUDE_DIRS}) +else() + # cmake doesn't think Xcode supports isystem + set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -isystem ${SQLITE3_INCLUDE_DIRS}") +endif() + +CHECK_FUNCTION_EXISTS(malloc_info HAVE_MALLOC_INFO) +CHECK_FUNCTION_EXISTS(shmget HAVE_SHMGET) +set(HAVE_SHMGET ${HAVE_SHMGET} CACHE BOOL "shmget()") + +# only set these after all tests are done +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}") + + +SET(hsbench_SOURCES + common.h + data_corpus.cpp + data_corpus.h + engine_hyperscan.cpp + engine_hyperscan.h + heapstats.cpp + heapstats.h + huge.cpp + huge.h + main.cpp + thread_barrier.h + timer.h +) + +add_executable(hsbench ${hsbench_SOURCES}) +target_link_libraries(hsbench hs databaseutil expressionutil ${SQLITE3_LDFLAGS} + ${CMAKE_THREAD_LIBS_INIT}) diff --git a/tools/hsbench/README.md b/tools/hsbench/README.md new file mode 100644 index 00000000..344a6c00 --- /dev/null +++ b/tools/hsbench/README.md @@ -0,0 +1,8 @@ +Hyperscan Benchmarker: hsbench +============================== + +The `hsbench` tool provides an easy way to measure Hyperscan's performance +for a particular set of patterns and corpus of data to be scanned. + +Documentation describing its operation is available in the Tools section of the +[Developer Reference Guide](http://01org.github.io/hyperscan/dev-reference/). diff --git a/tools/hsbench/common.h b/tools/hsbench/common.h new file mode 100644 index 00000000..a4d60021 --- /dev/null +++ b/tools/hsbench/common.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef COMMON_H +#define COMMON_H + +#include + +enum class ScanMode { BLOCK, STREAMING, VECTORED }; + +extern bool echo_matches; +extern bool saveDatabases; +extern bool loadDatabases; +extern std::string serializePath; +extern unsigned int somPrecisionMode; + +#endif // COMMON_H diff --git a/tools/hsbench/data_corpus.cpp b/tools/hsbench/data_corpus.cpp new file mode 100644 index 00000000..55bfe93a --- /dev/null +++ b/tools/hsbench/data_corpus.cpp @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include "data_corpus.h" + +#include "util/container.h" +#include "ue2common.h" + +#include +#include +#include +#include +#include + +#include + +using namespace std; +using namespace ue2; + +static +void readRow(sqlite3_stmt *statement, vector &blocks, + map &stream_indices) { + unsigned int id = sqlite3_column_int(statement, 0); + unsigned int stream_id = sqlite3_column_int(statement, 1); + const char *blob = (const char *)sqlite3_column_blob(statement, 2); + unsigned int bytes = sqlite3_column_bytes(statement, 2); + + if (!contains(stream_indices, stream_id)) { + unsigned int internal_stream_index = stream_indices.size(); + stream_indices[stream_id] = internal_stream_index; + } + auto internal_stream_index = stream_indices[stream_id]; + + assert(blob || bytes > 0); + blocks.emplace_back(id, stream_id, internal_stream_index, + string(blob, blob + bytes)); +} + +vector readCorpus(const string &filename) { + int status; + sqlite3 *db = nullptr; + + status = sqlite3_open_v2(filename.c_str(), &db, SQLITE_OPEN_READONLY, + nullptr); + + assert(db); + if (status != SQLITE_OK) { + ostringstream err; + err << "Unable to open database '" << filename << "': " + << sqlite3_errmsg(db); + status = sqlite3_close(db); + assert(status == SQLITE_OK); + throw DataCorpusError(err.str()); + } + + static const string query("SELECT id, stream_id, data " + "FROM chunk ORDER BY id;"); + + sqlite3_stmt *statement = nullptr; + + status = sqlite3_prepare_v2(db, query.c_str(), query.size(), &statement, + nullptr); + if (status != SQLITE_OK) { + status = sqlite3_finalize(statement); + assert(status == SQLITE_OK); + status = sqlite3_close(db); + assert(status == SQLITE_OK); + + ostringstream oss; + oss << "Query failed: " << query; + throw DataCorpusError(oss.str()); + } + + vector blocks; + map stream_indices; + + status = sqlite3_step(statement); + while (status == SQLITE_ROW) { + readRow(statement, blocks, stream_indices); + status = sqlite3_step(statement); + } + + if (status != SQLITE_DONE) { + ostringstream oss; + oss << "Error retrieving blocks from corpus: " + << sqlite3_errstr(status); + + status = sqlite3_finalize(statement); + assert(status == SQLITE_OK); + status = sqlite3_close(db); + assert(status == SQLITE_OK); + + throw DataCorpusError(oss.str()); + } + + status = sqlite3_finalize(statement); + assert(status == SQLITE_OK); + status = sqlite3_close(db); + assert(status == SQLITE_OK); + + if (blocks.empty()) { + throw DataCorpusError("Database contains no blocks."); + } + + return blocks; +} diff --git a/tools/hsbench/data_corpus.h b/tools/hsbench/data_corpus.h new file mode 100644 index 00000000..91a87acc --- /dev/null +++ b/tools/hsbench/data_corpus.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef DATACORPUS_H +#define DATACORPUS_H + +#include +#include + +class DataBlock { +public: + DataBlock(unsigned int in_id, unsigned int in_stream, + unsigned int int_stream_index_in, std::string in_data) + : id(in_id), stream_id(in_stream), + internal_stream_index(int_stream_index_in), + payload(std::move(in_data)) {} + + unsigned int id; // unique block identifier + unsigned int stream_id; // unique stream identifier (from corpus file) + unsigned int internal_stream_index; /* dense index for this stream + * (allocated by hsbench) */ + std::string payload; // actual block payload +}; + +/** Exception thrown if an error occurs. */ +class DataCorpusError { +public: + explicit DataCorpusError(std::string msg_in) : msg(std::move(msg_in)) {} + std::string msg; +}; + +/** + * Interface to a corpus database. Any error will produce a DataCorpusError + * and should be considered fatal. + */ +std::vector readCorpus(const std::string &filename); + +#endif // DATACORPUS_H diff --git a/tools/hsbench/engine_hyperscan.cpp b/tools/hsbench/engine_hyperscan.cpp new file mode 100644 index 00000000..f5abb9fa --- /dev/null +++ b/tools/hsbench/engine_hyperscan.cpp @@ -0,0 +1,411 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include "ExpressionParser.h" +#include "common.h" +#include "engine_hyperscan.h" +#include "expressions.h" +#include "heapstats.h" +#include "huge.h" +#include "timer.h" + +#include "crc32.h" +#include "database.h" +#include "hs_compile.h" +#include "hs_internal.h" +#include "hs_runtime.h" +#include "util/database_util.h" +#include "util/make_unique.h" + +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +EngineContext::EngineContext(const hs_database_t *db) { + hs_alloc_scratch(db, &scratch); + assert(scratch); +} + +EngineContext::~EngineContext() { + hs_free_scratch(scratch); +} + +namespace /* anonymous */ { + +/** Scan context structure passed to the onMatch callback function. */ +struct ScanContext { + ScanContext(unsigned id_in, ResultEntry &result_in, + const EngineStream *stream_in) + : id(id_in), result(result_in), stream(stream_in) {} + unsigned id; + ResultEntry &result; + const EngineStream *stream; // nullptr except in streaming mode. +}; + +} // namespace + +/** + * Callback function called for every match that Hyperscan produces, used when + * "echo matches" is off. + */ +static +int onMatch(unsigned int, unsigned long long, unsigned long long, unsigned int, + void *ctx) { + ScanContext *sc = static_cast(ctx); + assert(sc); + sc->result.matches++; + + return 0; +} + +/** + * Callback function called for every match that Hyperscan produces when "echo + * matches" is enabled. + */ +static +int onMatchEcho(unsigned int id, unsigned long long, unsigned long long to, + unsigned int, void *ctx) { + ScanContext *sc = static_cast(ctx); + assert(sc); + sc->result.matches++; + + if (sc->stream) { + printf("Match @%u:%u:%llu for %u\n", sc->stream->sn, sc->id, to, id); + } else { + printf("Match @%u:%llu for %u\n", sc->id, to, id); + } + + return 0; +} + +EngineHyperscan::EngineHyperscan(hs_database_t *db_in) : db(db_in) { + assert(db); +} + +EngineHyperscan::~EngineHyperscan() { + release_huge(db); +} + +unique_ptr EngineHyperscan::makeContext() const { + return ue2::make_unique(db); +} + +void EngineHyperscan::scan(const char *data, unsigned int len, unsigned int id, + ResultEntry &result, EngineContext &ctx) const { + assert(data); + + ScanContext sc(id, result, nullptr); + auto callback = echo_matches ? onMatchEcho : onMatch; + hs_error_t rv = hs_scan(db, data, len, 0, ctx.scratch, callback, &sc); + + if (rv != HS_SUCCESS) { + printf("Fatal error: hs_scan returned error %d\n", rv); + abort(); + } +} + +void EngineHyperscan::scan_vectored(const char *const *data, + const unsigned int *len, unsigned int count, + unsigned streamId, ResultEntry &result, + EngineContext &ctx) const { + assert(data); + assert(len); + + ScanContext sc(streamId, result, nullptr); + auto callback = echo_matches ? onMatchEcho : onMatch; + hs_error_t rv = + hs_scan_vector(db, data, len, count, 0, ctx.scratch, callback, &sc); + + if (rv != HS_SUCCESS) { + printf("Fatal error: hs_scan_vector returned error %d\n", rv); + abort(); + } +} + +unique_ptr EngineHyperscan::streamOpen(EngineContext &ctx, + unsigned streamId) const { + auto stream = ue2::make_unique(); + stream->ctx = &ctx; + + hs_open_stream(db, 0, &stream->id); + if (!stream->id) { + // an error occurred, propagate to caller + return nullptr; + } + stream->sn = streamId; + return stream; +} + +void EngineHyperscan::streamClose(unique_ptr stream, + ResultEntry &result) const { + assert(stream); + + auto &s = static_cast(*stream); + EngineContext &ctx = *s.ctx; + + ScanContext sc(0, result, &s); + auto callback = echo_matches ? onMatchEcho : onMatch; + + assert(s.id); + hs_close_stream(s.id, ctx.scratch, callback, &sc); + s.id = nullptr; +} + +void EngineHyperscan::streamScan(EngineStream &stream, const char *data, + unsigned len, unsigned id, + ResultEntry &result) const { + assert(data); + + auto &s = static_cast(stream); + EngineContext &ctx = *s.ctx; + + ScanContext sc(id, result, &s); + auto callback = echo_matches ? onMatchEcho : onMatch; + hs_error_t rv = + hs_scan_stream(s.id, data, len, 0, ctx.scratch, callback, &sc); + + if (rv != HS_SUCCESS) { + printf("Fatal error: hs_scan_stream returned error %d\n", rv); + abort(); + } +} + +static +unsigned makeModeFlags(ScanMode scan_mode) { + switch (scan_mode) { + case ScanMode::BLOCK: + return HS_MODE_BLOCK; + case ScanMode::STREAMING: + return HS_MODE_STREAM; + case ScanMode::VECTORED: + return HS_MODE_VECTORED; + } + assert(0); + return HS_MODE_STREAM; +} + +/** + * Hash the settings used to compile a database, returning a string that can be + * used as a filename. + */ +static +string dbSettingsHash(const string &filename, u32 mode) { + ostringstream info_oss; + + info_oss << filename.c_str() << ' '; + info_oss << mode << ' '; + + string info = info_oss.str(); + + u32 crc = Crc32c_ComputeBuf(0, info.data(), info.size()); + + // return STL string with printable version of digest + ostringstream oss; + oss << hex << setw(8) << setfill('0') << crc << dec; + + return oss.str(); +} + +static +string dbFilename(const std::string &name, unsigned mode) { + ostringstream oss; + oss << serializePath << '/' << dbSettingsHash(name, mode) << ".db"; + return oss.str(); +} + +std::unique_ptr +buildEngineHyperscan(const ExpressionMap &expressions, ScanMode scan_mode, + const std::string &name, UNUSED const ue2::Grey &grey) { + if (expressions.empty()) { + assert(0); + return nullptr; + } + + long double compileSecs = 0.0; + size_t compiledSize = 0.0; + size_t streamSize = 0; + size_t scratchSize = 0; + unsigned int peakMemorySize = 0; + unsigned int crc = 0; + std::string db_info; + + unsigned int mode = makeModeFlags(scan_mode); + + hs_database_t *db; + hs_error_t err; + + if (loadDatabases) { + db = loadDatabase(dbFilename(name, mode).c_str()); + if (!db) { + return nullptr; + } + } else { + const unsigned int count = expressions.size(); + + vector exprs; + vector flags, ids; + vector ext; + + for (const auto &m : expressions) { + string expr; + unsigned int f = 0; + hs_expr_ext extparam; + extparam.flags = 0; + if (!readExpression(m.second, expr, &f, &extparam)) { + printf("Error parsing PCRE: %s (id %u)\n", m.second.c_str(), + m.first); + return nullptr; + } + + exprs.push_back(expr); + ids.push_back(m.first); + flags.push_back(f); + ext.push_back(extparam); + } + + unsigned full_mode = mode; + if (mode == HS_MODE_STREAM) { + full_mode |= somPrecisionMode; + } + + // Our compiler takes an array of plain ol' C strings. + vector patterns(count); + for (unsigned int i = 0; i < count; i++) { + patterns[i] = exprs[i].c_str(); + } + + // Extended parameters are passed as pointers to hs_expr_ext structures. + vector ext_ptr(count); + for (unsigned int i = 0; i < count; i++) { + ext_ptr[i] = &ext[i]; + } + + Timer timer; + timer.start(); + + hs_compile_error_t *compile_err; + +#ifndef RELEASE_BUILD + err = hs_compile_multi_int(patterns.data(), flags.data(), ids.data(), + ext_ptr.data(), count, full_mode, nullptr, + &db, &compile_err, grey); +#else + err = hs_compile_ext_multi(patterns.data(), flags.data(), ids.data(), + ext_ptr.data(), count, full_mode, nullptr, + &db, &compile_err); +#endif + + timer.complete(); + compileSecs = timer.seconds(); + peakMemorySize = getPeakHeap(); + + if (err == HS_COMPILER_ERROR) { + if (compile_err->expression >= 0) { + printf("Compile error for signature #%u: %s\n", + compile_err->expression, compile_err->message); + } else { + printf("Compile error: %s\n", compile_err->message); + } + hs_free_compile_error(compile_err); + return nullptr; + } + } + + // copy the db into huge pages (where available) to reduce TLB pressure + db = get_huge(db); + if (!db) { + return nullptr; + } + + err = hs_database_size(db, &compiledSize); + if (err != HS_SUCCESS) { + return nullptr; + } + assert(compiledSize > 0); + + crc = db->crc32; + + if (saveDatabases) { + saveDatabase(db, dbFilename(name, mode).c_str()); + } + + if (mode & HS_MODE_STREAM) { + err = hs_stream_size(db, &streamSize); + if (err != HS_SUCCESS) { + return nullptr; + } + } else { + streamSize = 0; + } + + char *info; + err = hs_database_info(db, &info); + if (err != HS_SUCCESS) { + return nullptr; + } else { + db_info = string(info); + free(info); + } + + // Allocate scratch temporarily to find its size: this is a good test + // anyway. + hs_scratch_t *scratch = nullptr; + err = hs_alloc_scratch(db, &scratch); + if (err != HS_SUCCESS) { + return nullptr; + } + + err = hs_scratch_size(scratch, &scratchSize); + if (err != HS_SUCCESS) { + return nullptr; + } + hs_free_scratch(scratch); + + // Output summary information. + printf("Signatures: %s\n", name.c_str()); + printf("Hyperscan info: %s\n", db_info.c_str()); + printf("Expression count: %'zu\n", expressions.size()); + printf("Bytecode size: %'zu bytes\n", compiledSize); + printf("Database CRC: 0x%x\n", crc); + if (mode & HS_MODE_STREAM) { + printf("Stream state size: %'zu bytes\n", streamSize); + } + printf("Scratch size: %'zu bytes\n", scratchSize); + printf("Compile time: %'0.3Lf seconds\n", compileSecs); + printf("Peak heap usage: %'u bytes\n", peakMemorySize); + + return ue2::make_unique(db); +} diff --git a/tools/hsbench/engine_hyperscan.h b/tools/hsbench/engine_hyperscan.h new file mode 100644 index 00000000..7875decc --- /dev/null +++ b/tools/hsbench/engine_hyperscan.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ENGINEHYPERSCAN_H +#define ENGINEHYPERSCAN_H + +#include "expressions.h" +#include "common.h" +#include "hs_runtime.h" + +#include + +/** Structure for the result of a single complete scan. */ +struct ResultEntry { + double seconds = 0; //!< Time taken for scan. + unsigned int matches = 0; //!< Count of matches found. +}; + +/** Engine context which is allocated on a per-thread basis. */ +class EngineContext { +public: + explicit EngineContext(const hs_database_t *db); + ~EngineContext(); + + hs_scratch_t *scratch = nullptr; +}; + +/** Streaming mode scans have persistent stream state associated with them. */ +class EngineStream { +public: + hs_stream_t *id; + unsigned int sn; + EngineContext *ctx; +}; + +/** Hyperscan Engine for scanning data. */ +class EngineHyperscan { +public: + explicit EngineHyperscan(hs_database_t *db); + ~EngineHyperscan(); + + std::unique_ptr makeContext() const; + + void scan(const char *data, unsigned int len, unsigned int id, + ResultEntry &result, EngineContext &ctx) const; + + void scan_vectored(const char *const *data, const unsigned int *len, + unsigned int count, unsigned int streamId, + ResultEntry &result, EngineContext &ctx) const; + + std::unique_ptr streamOpen(EngineContext &ctx, + unsigned id) const; + + void streamClose(std::unique_ptr stream, + ResultEntry &result) const; + + void streamScan(EngineStream &stream, const char *data, unsigned int len, + unsigned int id, ResultEntry &result) const; + +private: + hs_database_t *db; +}; + +namespace ue2 { +struct Grey; +} + +std::unique_ptr +buildEngineHyperscan(const ExpressionMap &expressions, ScanMode scan_mode, + const std::string &name, const ue2::Grey &grey); + +#endif // ENGINEHYPERSCAN_H diff --git a/tools/hsbench/heapstats.cpp b/tools/hsbench/heapstats.cpp new file mode 100644 index 00000000..d0dffdb3 --- /dev/null +++ b/tools/hsbench/heapstats.cpp @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2015-2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Peak heap usage code. + * + * At present, we only have an implementation for modern glibc systems, using + * the malloc_info() call. We return zero elsewhere. + */ + +#include "config.h" + +#include "heapstats.h" + +#if defined HAVE_MALLOC_INFO + +#include +#include +#include +#include + +#include + +size_t getPeakHeap(void) { + FILE *tmpf = tmpfile(); + if (!tmpf) { + return 0; + } + + int rv = malloc_info(0, tmpf); + if (rv != 0) { + fclose(tmpf); + return 0; + } + + rewind(tmpf); + + // We don't want to depend on a real XML parser. This is ugly and brittle + // and hopefully good enough for the time being. We look for the last + // system tag with type max, which should be the malloc-wide one. + + static const char begin[] = " +#include +#include +#include + +#include +#include + +using namespace std; + +size_t getPeakHeap(void) { + // Modern Linux kernels write a 'VmPeak' value into /proc/$PID/status. This + // is a reasonable approximation, though it likely includes shared libs and + // the like as well... + ostringstream path; + path << "/proc/" << getpid() << "/status"; + + ifstream f(path.str().c_str()); + if (!f.good()) { + return 0; + } + + const string vmpeak("VmPeak:"); + + string line; + while (getline(f, line)) { + istringstream iss(line, istringstream::in); + string word; + iss >> word; + if (word != vmpeak) { + continue; + } + + // Skip spaces + while (iss.good() && !isdigit(iss.peek())) { + iss.ignore(); + } + + size_t num = 0; + iss >> num; + return num * 1024; + } + + f.close(); + return 0; +} + +#else + +// Stub. +size_t getPeakHeap(void) { + return 0; +} + +#endif diff --git a/tools/hsbench/heapstats.h b/tools/hsbench/heapstats.h new file mode 100644 index 00000000..c2c37998 --- /dev/null +++ b/tools/hsbench/heapstats.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef HEAPSTATS_H +#define HEAPSTATS_H + +#include // for size_t + +size_t getPeakHeap(void); + +#endif diff --git a/tools/hsbench/huge.cpp b/tools/hsbench/huge.cpp new file mode 100644 index 00000000..dbb453b2 --- /dev/null +++ b/tools/hsbench/huge.cpp @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include "hs.h" +#include "ue2common.h" + +#include "common.h" +#include "huge.h" + +#ifndef _WIN32 +#include +#include +#include +#include +#include +#include +#include +#if defined(HAVE_SHMGET) +#include +#include +#endif + +UNUSED static int hsdb_shmid; + +using namespace std; + +long gethugepagesize(void); + +hs_database_t *get_huge(hs_database_t *db) { +#if defined(HAVE_SHMGET) && defined(SHM_HUGETLB) + /* move the database to huge pages where possible, but fail politely */ + hs_error_t err; + size_t len; + char *bytes; + + long hpage_size = gethugepagesize(); + if (hpage_size < 0) { + printf("Couldn't determine huge page size\n"); + hsdb_shmid = -1; + return db; + } + + err = hs_serialize_database(db, &bytes, &len); + if (err != HS_SUCCESS) { + printf("Failed to serialize database for copy: %d\n", err); + // this is weird - don't fail gracefully this time + return nullptr; + } + + size_t size; + err = hs_serialized_database_size(bytes, len, &size); + if (err != HS_SUCCESS) { + printf("Failed to get database size: %d\n", err); + // this is weird - don't fail gracefully this time + return nullptr; + } + + void *shmaddr; + if ((hsdb_shmid = shmget(IPC_PRIVATE, ROUNDUP_N(size, gethugepagesize()), + SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W)) < 0) { + // This could fail if the user doesn't have permission to shmget(), + // which is OK. + goto fini; + } + + shmaddr = shmat(hsdb_shmid, nullptr, SHM_RND); + if (shmaddr == (char *)-1) { + perror("Shared memory attach failure"); + goto fini; + } + + // Mark this segment to be destroyed after this process detaches. + shmctl(hsdb_shmid, IPC_RMID, nullptr); + + err = hs_deserialize_database_at(bytes, len, (hs_database_t *)shmaddr); + if (err != HS_SUCCESS) { + printf("Failed to deserialize database into shm: %d\n", err); + shmdt((const void *)shmaddr); + goto fini; + } + + free(bytes); + hs_free_database(db); + return (hs_database_t *)shmaddr; + +fini: + free(bytes); + hsdb_shmid = -1; + return db; +#else + return db; +#endif +} + +void release_huge(hs_database_t *db) { +#if defined(HAVE_SHMGET) && defined(SHM_HUGETLB) + if (hsdb_shmid != -1) { + if (shmdt((const void *)db) != 0) { + perror("Detach failure"); + } + } else { + // fallback + hs_free_database(db); + } +#else + hs_free_database(db); +#endif +} + +#define BUF_SIZE 4096 +static long read_meminfo(const char *tag) { + int fd; + char buf[BUF_SIZE]; + int len; + char *p, *q; + long val; + + fd = open("/proc/meminfo", O_RDONLY); + if (fd < 0) { + perror("Couldn't open /proc/meminfo"); + return -1; + } + + len = read(fd, buf, sizeof(buf)); + close(fd); + if (len < 0) { + perror("Error reading /proc/meminfo"); + return -1; + } + if (len == sizeof(buf)) { + printf("/proc/meminfo is too large\n"); + return -1; + } + buf[len] = '\0'; + + p = strstr(buf, tag); + if (!p) { + return -1; + } + + p += strlen(tag); + val = strtol(p, &q, 0); + if (!isspace(*q)) { + printf("Couldn't parse /proc/meminfo value\n"); + return -1; + } + + return val; +} + +long gethugepagesize(void) { + long hpage_size; + int hpage_kb; + + hpage_kb = read_meminfo("Hugepagesize:"); + if (hpage_kb < 0) { + hpage_size = -1; + } else { + /* convert from kb to bytes */ + hpage_size = 1024 * hpage_kb; + } + + return hpage_size; +} + +#else + +/* No huge page support on WIN32. */ + +hs_database_t *get_huge(hs_database_t *db) { return db; } + +void release_huge(hs_database_t *db) { hs_free_database(db); } + +#endif diff --git a/tools/hsbench/huge.h b/tools/hsbench/huge.h new file mode 100644 index 00000000..da539bd6 --- /dev/null +++ b/tools/hsbench/huge.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef HUGE_H +#define HUGE_H + +#include "hs.h" + +hs_database_t *get_huge(hs_database_t *db); +void release_huge(hs_database_t *db); + +#endif /* HUGE_H */ diff --git a/tools/hsbench/main.cpp b/tools/hsbench/main.cpp new file mode 100644 index 00000000..4298963b --- /dev/null +++ b/tools/hsbench/main.cpp @@ -0,0 +1,780 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include "common.h" +#include "data_corpus.h" +#include "engine_hyperscan.h" +#include "expressions.h" +#include "thread_barrier.h" +#include "timer.h" +#include "util/expression_path.h" +#include "util/string_util.h" + +#include "grey.h" +#include "hs.h" +#include "ue2common.h" +#include "util/make_unique.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#ifndef _WIN32 +#include +#include +#endif + +#include +#include + +using namespace std; +using namespace ue2; +using boost::adaptors::map_keys; + +// Globals common to all files. +bool echo_matches = false; +bool saveDatabases = false; +bool loadDatabases = false; +string serializePath(""); +unsigned int somPrecisionMode = HS_MODE_SOM_HORIZON_LARGE; + +namespace /* anonymous */ { + +// Globals local to this file. +bool display_per_scan = false; +ScanMode scan_mode = ScanMode::STREAMING; +unsigned repeats = 20; +string exprPath(""); +string corpusFile(""); +vector threadCores; +Timer totalTimer; +double totalSecs = 0; + +typedef void (*thread_func_t)(void *context); + +class ThreadContext : boost::noncopyable { +public: + ThreadContext(unsigned num_in, const EngineHyperscan &db_in, + thread_barrier &tb_in, thread_func_t function_in, + vector corpus_data_in) + : num(num_in), results(repeats), engine(db_in), + enginectx(db_in.makeContext()), corpus_data(move(corpus_data_in)), + tb(tb_in), function(function_in) {} + + // Start the thread. + bool start(int cpu) { + thr = thread(function, this); + + // affine if it's asked for + if (cpu >= 0) { + return affine(cpu); + } + return true; + } + + // Wait for the thread to exit. + void join() { + thr.join(); + } + + // Serialise all threads on a global barrier. + void barrier() { + tb.wait(); + } + + // Apply processor affinity (if available) to this thread. + bool affine(UNUSED int cpu) { +#ifdef HAVE_DECL_PTHREAD_SETAFFINITY_NP + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + assert(cpu >= 0 && cpu < CPU_SETSIZE); + + // The 'clang' compiler complains about an unused result here, so we + // silence it. + (void)CPU_SET(cpu, &cpuset); + + int rv = pthread_setaffinity_np(thr.native_handle(), sizeof(cpuset), + &cpuset); + return (rv == 0); +#endif + return false; // not available + } + + unsigned num; + Timer timer; + vector results; + const EngineHyperscan &engine; + unique_ptr enginectx; + vector corpus_data; + +protected: + thread_barrier &tb; // shared barrier for time sync + thread_func_t function; + thread thr; +}; + +/** Display usage information, with an optional error. */ +static +void usage(const char *error) { + printf("Usage: hsbench [OPTIONS...]\n\n"); + printf("Options:\n\n"); + printf(" -h Display help and exit.\n"); + printf(" -G OVERRIDES Overrides for the grey box.\n"); + printf(" -e PATH Path to expression directory.\n"); + printf(" -s FILE Signature file to use.\n"); + printf(" -z NUM Signature ID to use.\n"); + printf(" -c FILE File to use as corpus.\n"); + printf(" -n NUMBER Repeat scan NUMBER times (default 20).\n"); + printf(" -N Benchmark in block mode" + " (default: streaming).\n"); + printf(" -V Benchmark in vectored mode" + " (default: streaming).\n"); + printf(" -T CPU,CPU,... Benchmark with threads on these CPUs.\n"); + printf(" -i DIR Don't compile, load from files in DIR" + " instead.\n"); + printf(" -w DIR After compiling, save to files in DIR.\n"); + printf(" -d NUMBER Set SOM precision mode (default: 8 (large)).\n"); + printf("\n"); + printf(" --per-scan Display per-scan Mbit/sec results.\n"); + printf(" --echo-matches Display all matches that occur during scan.\n"); + printf("\n\n"); + + if (error) { + printf("Error: %s\n", error); + } +} + +/** Wraps up a name and the set of signature IDs it refers to. */ +struct BenchmarkSigs { + BenchmarkSigs(string name_in, SignatureSet sigs_in) + : name(move(name_in)), sigs(move(sigs_in)) {} + string name; + SignatureSet sigs; +}; + +/** Process command-line arguments. Prints usage and exits on error. */ +static +void processArgs(int argc, char *argv[], vector &sigSets, + UNUSED Grey &grey) { + const char options[] = "-b:c:Cd:e:G:hi:n:No:p:sT:Vw:z:"; + int in_sigfile = 0; + int do_per_scan = 0; + int do_echo_matches = 0; + vector sigFiles; + + static struct option longopts[] = { + {"per-scan", 0, &do_per_scan, 1}, + {"echo-matches", 0, &do_echo_matches, 1}, + {nullptr, 0, nullptr, 0} + }; + + for (;;) { + int c = getopt_long(argc, argv, options, longopts, nullptr); + if (c < 0) { + break; + } + switch (c) { + case 'c': + corpusFile.assign(optarg); + break; + case 'd': { + unsigned dist; + if (!fromString(optarg, dist)) { + usage("Must provide an integer argument to '-d' flag"); + exit(1); + } + switch (dist) { + case 2: + somPrecisionMode = HS_MODE_SOM_HORIZON_SMALL; + break; + case 4: + somPrecisionMode = HS_MODE_SOM_HORIZON_MEDIUM; + break; + case 8: + somPrecisionMode = HS_MODE_SOM_HORIZON_LARGE; + break; + default: + usage("SOM precision must be 2, 4 or 8"); + exit(1); + } + break; + } + case 'e': + exprPath.assign(optarg); + break; +#ifndef RELEASE_BUILD + case 'G': + applyGreyOverrides(&grey, string(optarg)); + break; +#endif + case 'h': + usage(nullptr); + exit(0); + break; + case 'n': + if (!fromString(optarg, repeats) || repeats == 0) { + usage("Couldn't parse argument to -n flag, should be" + " a positive integer."); + exit(1); + } + break; + case 's': + in_sigfile = 2; + break; + case 'N': + scan_mode = ScanMode::BLOCK; + break; + case 'V': + scan_mode = ScanMode::VECTORED; + break; + case 'T': + if (!strToList(optarg, threadCores)) { + usage("Couldn't parse argument to -T flag, should be" + " a list of positive integers."); + exit(1); + } + break; + case 'z': { + unsigned int sinumber; + if (!fromString(optarg, sinumber)) { + usage("Argument to '-z' flag must be an integer"); + exit(1); + } + SignatureSet sigs = {sinumber}; + sigSets.emplace_back(string("-z ") + optarg, sigs); + break; + } + case 'i': + loadDatabases = true; + serializePath = optarg; + break; + case 'w': + saveDatabases = true; + serializePath = optarg; + break; + case 1: + if (in_sigfile) { + sigFiles.push_back(optarg); + in_sigfile = 2; + break; + } + case 0: + break; + default: + usage("Unrecognised command line argument."); + exit(1); + } + + if (in_sigfile) { + in_sigfile--; + } + } + + if (do_echo_matches) { + echo_matches = true; + } + if (do_per_scan) { + display_per_scan = true; + } + + if (exprPath.empty() && !sigFiles.empty()) { + /* attempt to infer an expression directory */ + auto si = sigFiles.begin(); + exprPath = inferExpressionPath(*si); + for (++si; si != sigFiles.end(); ++si) { + if (exprPath != inferExpressionPath(*si)) { + usage("Unable to infer consistent expression directory"); + exit(1); + } + } + } + + // Must have a valid expression path + if (exprPath.empty()) { + usage("Must specify an expression path with the -e option."); + exit(1); + } + + // Must have valid database to scan + if (corpusFile.empty()) { + usage("Must specify a corpus file with the -c option."); + exit(1); + } + + // Cannot ask for both loading and saving + if (loadDatabases && saveDatabases) { + usage("You cannot both load and save databases."); + exit(1); + } + + // Read in any -s signature sets. + for (const auto &file : sigFiles) { + SignatureSet sigs; + loadSignatureList(file, sigs); + sigSets.emplace_back(file, move(sigs)); + } +} + +/** Start the global timer. */ +static +void startTotalTimer(ThreadContext *ctx) { + if (ctx->num != 0) { + return; // only runs in the first thread + } + totalTimer.start(); +} + +/** Stop the global timer and calculate totals. */ +static +void stopTotalTimer(ThreadContext *ctx) { + if (ctx->num != 0) { + return; // only runs in the first thread + } + totalTimer.complete(); + totalSecs = totalTimer.seconds(); +} + +/** Run a benchmark over a given engine and corpus in block mode. */ +static +void benchBlock(void *context) { + ThreadContext *ctx = (ThreadContext *)context; + + // Synchronization point + ctx->barrier(); + + startTotalTimer(ctx); + + for (ResultEntry &r : ctx->results) { + ctx->timer.start(); + + for (const DataBlock &block : ctx->corpus_data) { + ctx->engine.scan(block.payload.c_str(), block.payload.size(), + block.id, r, *ctx->enginectx); + } + + ctx->timer.complete(); + r.seconds = ctx->timer.seconds(); + } + + // Synchronization point + ctx->barrier(); + + // Now that all threads are finished, we can stop the clock. + stopTotalTimer(ctx); +} + +/** Structure used to represent a stream. */ +struct StreamInfo { + unsigned int stream_id = ~0U; + unsigned int first_block_id = ~0U; + unsigned int last_block_id = 0; + unique_ptr eng_handle; +}; + +static +u64a count_streams(const vector &corpus_blocks) { + set streams; + for (const DataBlock &block : corpus_blocks) { + streams.insert(block.stream_id); + } + + return (u64a)streams.size(); +} + +/** + * Take a ThreadContext and prepare a vector for streaming mode + * scanning from it. + */ +static +vector prepStreamingData(const ThreadContext *ctx) { + vector info(count_streams(ctx->corpus_data)); + for (const DataBlock &block : ctx->corpus_data) { + assert(block.internal_stream_index < info.size()); + StreamInfo &si = info[block.internal_stream_index]; + + /* check if this is the first time we have encountered this stream */ + if (si.first_block_id > si.last_block_id) { + si.stream_id = block.stream_id; + si.first_block_id = block.id; + si.last_block_id = block.id; + } else { + assert(block.stream_id == si.stream_id); + assert(block.id > si.last_block_id); + assert(block.id > si.first_block_id); + si.last_block_id = block.id; + } + } + return info; +} + +static +void benchStreamingInternal(ThreadContext *ctx, vector &streams) { + assert(ctx); + const EngineHyperscan &e = ctx->engine; + const vector &blocks = ctx->corpus_data; + + for (ResultEntry &r : ctx->results) { + ctx->timer.start(); + + for (const auto &b : blocks) { + StreamInfo &stream = streams[b.internal_stream_index]; + assert(stream.stream_id == b.stream_id); + + // If this is the first block in the stream, open the stream + // handle. + if (b.id == stream.first_block_id) { + assert(!stream.eng_handle); + stream.eng_handle = e.streamOpen(*ctx->enginectx, b.stream_id); + if (!stream.eng_handle) { + printf("Fatal error: stream open failed!\n"); + exit(1); + } + } + + assert(stream.eng_handle); + + e.streamScan(*stream.eng_handle, b.payload.c_str(), + b.payload.size(), b.id, r); + + // if this was the last block in the stream, close the stream handle + if (b.id == stream.last_block_id) { + e.streamClose(move(stream.eng_handle), r); + stream.eng_handle = nullptr; + } + } + + ctx->timer.complete(); + r.seconds = ctx->timer.seconds(); + } +} + +/** Run a benchmark over a given engine and corpus in streaming mode. */ +static +void benchStreaming(void *context) { + ThreadContext *ctx = (ThreadContext *)context; + vector streams = prepStreamingData(ctx); + + // Synchronization point + ctx->barrier(); + + startTotalTimer(ctx); + + benchStreamingInternal(ctx, streams); + + // Synchronization point + ctx->barrier(); + + // Now that all threads are finished, we can stop the clock. + stopTotalTimer(ctx); +} + +/** In-memory structure for a data block to be scanned in vectored mode. */ +struct VectoredInfo { + vector data; + vector len; + unsigned int stream_id; +}; + +/** + * Take a ThreadContext and prepare a vector for vectored mode + * scanning from it. + */ +static +vector prepVectorData(const ThreadContext *ctx) { + vector out(count_streams(ctx->corpus_data)); + for (const DataBlock &block : ctx->corpus_data) { + VectoredInfo &vi = out[block.internal_stream_index]; + if (vi.data.empty()) { + vi.stream_id = block.stream_id; + } else { + assert(vi.stream_id == block.stream_id); + } + vi.data.push_back(block.payload.c_str()); + vi.len.push_back(block.payload.size()); + } + + return out; +} + +/** Run a benchmark over a given engine and corpus in vectored mode. */ +static +void benchVectored(void *context) { + ThreadContext *ctx = (ThreadContext *)context; + + vector v_plans = prepVectorData(ctx); + + // Synchronization point + ctx->barrier(); + + startTotalTimer(ctx); + + for (ResultEntry &r : ctx->results) { + ctx->timer.start(); + + for (const VectoredInfo &v_plan : v_plans) { + ctx->engine.scan_vectored(&v_plan.data[0], &v_plan.len[0], + v_plan.data.size(), v_plan.stream_id, r, + *ctx->enginectx); + } + + ctx->timer.complete(); + r.seconds = ctx->timer.seconds(); + } + + // Synchronization point + ctx->barrier(); + + // Now that all threads are finished, we can stop the clock. + stopTotalTimer(ctx); +} + +/** Given a time and a size, compute the throughput in megabits/sec. */ +static +long double calc_mbps(double seconds, u64a bytes) { + assert(seconds > 0); + return (long double)bytes / ((long double)seconds * 125000); +} + +/** Dump per-scan throughput data to screen. */ +static +void displayPerScanResults(const vector> &threads, + u64a bytesPerRun) { + for (const auto &t : threads) { + const auto &results = t->results; + for (size_t j = 0; j != results.size(); j++) { + const auto &r = results[j]; + double mbps = calc_mbps(r.seconds, bytesPerRun); + printf("T %2u Scan %2zu: %'0.2f Mbit/sec\n", t->num, j, mbps); + } + } + printf("\n"); +} + +static +u64a byte_size(const vector &corpus_blocks) { + u64a total = 0; + for (const DataBlock &block : corpus_blocks) { + total += block.payload.size(); + } + + return total; +} + +/** Dump benchmark results to screen. */ +static +void displayResults(const vector> &threads, + const vector &corpus_blocks) { + u64a bytesPerRun = byte_size(corpus_blocks); + u64a matchesPerRun = threads[0]->results[0].matches; + + // Sanity check: all of our results should have the same match count. + for (const auto &t : threads) { + if (!all_of(begin(t->results), end(t->results), + [&matchesPerRun](const ResultEntry &e) { + return e.matches == matchesPerRun; + })) { + printf("\nWARNING: PER-SCAN MATCH COUNTS ARE INCONSISTENT!\n\n"); + break; + } + } + + printf("Time spent scanning: %'0.3f seconds\n", totalSecs); + printf("Corpus size: %'llu bytes ", bytesPerRun); + switch (scan_mode) { + case ScanMode::STREAMING: + printf("(%'zu blocks in %'llu streams)\n", corpus_blocks.size(), + count_streams(corpus_blocks)); + break; + case ScanMode::VECTORED: + printf("(%'zu blocks in %'llu vectors)\n", corpus_blocks.size(), + count_streams(corpus_blocks)); + break; + case ScanMode::BLOCK: + printf("(%'zu blocks)\n", corpus_blocks.size()); + break; + } + + u64a totalBytes = bytesPerRun * repeats * threads.size(); + u64a totalBlocks = corpus_blocks.size() * repeats * threads.size(); + + double matchRate = ((double)matchesPerRun * 1024) / bytesPerRun; + printf("Matches per iteration: %'llu (%'0.3f matches/kilobyte)\n", + matchesPerRun, matchRate); + + double blockRate = (double)totalBlocks / (double)totalSecs; + printf("Overall block rate: %'0.2f blocks/sec\n", blockRate); + printf("Overall throughput: %'0.2Lf Mbit/sec\n", + calc_mbps(totalSecs, totalBytes)); + printf("\n"); + + if (display_per_scan) { + displayPerScanResults(threads, bytesPerRun); + } +} + +/** + * Construct a thread context for this scanning mode. + * + * Note: does not take blocks by reference. This is to give every thread their + * own copy of the data. It would be unrealistic for every thread to be scanning + * the same copy of the data. + */ +static +unique_ptr makeThreadContext(const EngineHyperscan &db, + const vector &blocks, + unsigned id, + thread_barrier &sync_barrier) { + thread_func_t fn = nullptr; + switch (scan_mode) { + case ScanMode::STREAMING: + fn = benchStreaming; + break; + case ScanMode::VECTORED: + fn = benchVectored; + break; + case ScanMode::BLOCK: + fn = benchBlock; + break; + } + assert(fn); + + return ue2::make_unique(id, db, sync_barrier, fn, blocks); +} + +/** Run the given benchmark. */ +static +void runBenchmark(const EngineHyperscan &db, + const vector &corpus_blocks) { + size_t numThreads; + bool useAffinity = false; + + if (threadCores.empty()) { + numThreads = 1; + } else { + numThreads = threadCores.size(); +#ifdef HAVE_DECL_PTHREAD_SETAFFINITY_NP + useAffinity = true; +#else + useAffinity = false; +#endif + } + + // Initialise a barrier that will let us sync threads before/after scanning + // for timer measurements. + thread_barrier sync_barrier(numThreads); + + vector> threads; + + for (unsigned i = 0; i < numThreads; i++) { + auto t = makeThreadContext(db, corpus_blocks, i, sync_barrier); + int core = useAffinity ? (int)threadCores[i] : -1; + if (!t->start(core)) { + printf("Unable to start processing thread %u\n", i); + exit(1); + } + threads.push_back(move(t)); + } + + // Reap threads. + for (auto &t : threads) { + t->join(); + } + + // Display global results. + displayResults(threads, corpus_blocks); +} + +} // namespace + +/** Main driver. */ +int main(int argc, char *argv[]) { + Grey grey; + + setlocale(LC_ALL, ""); // use the user's locale + +#ifndef NDEBUG + printf("\nWARNING: DO NOT BENCHMARK A HYPERSCAN BUILD WITH ASSERTIONS\n\n"); +#endif + + vector sigSets; + processArgs(argc, argv, sigSets, grey); + + // read in and process our expressions + ExpressionMap exprMapTemplate; + loadExpressions(exprPath, exprMapTemplate); + + // If we have no signature sets, the user wants us to benchmark all the + // known expressions together. + if (sigSets.empty()) { + SignatureSet sigs; + for (auto i : exprMapTemplate | map_keys) { + sigs.push_back(i); + } + sigSets.emplace_back(exprPath, move(sigs)); + } + + // read in and process our corpus + vector corpus_blocks; + try { + corpus_blocks = readCorpus(corpusFile); + } catch (const DataCorpusError &e) { + printf("Corpus data error: %s\n", e.msg.c_str()); + return 1; + } + + for (const auto &s : sigSets) { + ExpressionMap exprMap = exprMapTemplate; // copy + + limitBySignature(exprMap, s.sigs); + if (exprMap.empty()) { + continue; + } + + auto engine = buildEngineHyperscan(exprMap, scan_mode, s.name, grey); + if (!engine) { + printf("Error: expressions failed to compile.\n"); + exit(1); + } + + printf("\n"); + + runBenchmark(*engine, corpus_blocks); + } + + return 0; +} diff --git a/tools/hsbench/scripts/CorpusBuilder.py b/tools/hsbench/scripts/CorpusBuilder.py new file mode 100755 index 00000000..5baed2bd --- /dev/null +++ b/tools/hsbench/scripts/CorpusBuilder.py @@ -0,0 +1,58 @@ +#!/usr/bin/python + +''' +A module to construct corpora databases for the Hyperscan benchmarker +(hsbench). + +After construction, simply add blocks with the add_chunk() method, then call +finish() when you're done. +''' + +import os.path + +try: + from sqlite3 import dbapi2 as sqlite +except: + from pysqlite2 import dbapi2 as sqlite + +class CorpusBuilder: + SCHEMA = ''' +CREATE TABLE chunk ( + id integer primary key, + stream_id integer not null, + data blob +); +''' + + def __init__(self, outfile): + if os.path.exists(outfile): + raise RuntimeError("Database '%s' already exists" % outfile) + self.outfile = outfile + self.db = sqlite.connect(self.outfile) + self.db.executescript(CorpusBuilder.SCHEMA) + self.current_chunk_id = 0; + + def add_chunk(self, stream_id, data): + chunk_id = self.current_chunk_id; + c = self.db.cursor() + q = 'insert into chunk (id, stream_id, data) values (?, ?, ?)' + c.execute(q, (chunk_id, stream_id, sqlite.Binary(data))) + self.current_chunk_id += 1 + return chunk_id + + def finish(self): + self.db.commit() + + c = self.db.cursor() + q = 'create index chunk_stream_id_idx on chunk(stream_id)' + c.execute(q) + + c = self.db.cursor() + q = 'vacuum' + c.execute(q) + + c = self.db.cursor() + q = 'analyze' + c.execute(q) + + self.db.commit() diff --git a/tools/hsbench/scripts/gutenbergCorpus.py b/tools/hsbench/scripts/gutenbergCorpus.py new file mode 100755 index 00000000..fa1b1570 --- /dev/null +++ b/tools/hsbench/scripts/gutenbergCorpus.py @@ -0,0 +1,68 @@ +#!/usr/bin/python + +''' +This script creates a Hyperscan benchmarking corpus database from a supplied +group of Project Gutenberg texts. +''' + +import sys, getopt, os.path +import gutenberg.acquire, gutenberg.cleanup, gutenberg.query +from CorpusBuilder import CorpusBuilder + +stream_id = 0 +stream_bytes = 0 + +def addBlocks(builder, block_size, stream_size, text_id, text): + global stream_id + global stream_bytes + + print "text", text_id, "len", len(text) + i = 0 + while i < len(text): + chunk = text[i:min(len(text), i + block_size)] + builder.add_chunk(stream_id, chunk) + i += block_size + stream_bytes += len(chunk) + if stream_bytes >= stream_size: + stream_id += 1 + stream_bytes = 0 + print "Text", text_id, ": added", i/block_size, "blocks of", block_size, "bytes." + +def buildCorpus(outFN, block_size, stream_size, text_ids): + if len(text_ids) == 0: + print >>sys.stderr, "Must provide at least one input ID" + sys.exit(0) + + builder = CorpusBuilder(outFN) + + total_bytes = 0 + stream_id = 0 + stream_bytes = 0 + + for text_id in text_ids: + text_id = int(text_id) + text = gutenberg.acquire.load_etext(text_id) + text = gutenberg.cleanup.strip_headers(text).strip() + addBlocks(builder, block_size, stream_size, text_id, text) + total_bytes += len(text) + + builder.finish() + + print "Total:", total_bytes, "bytes." + +def usage(exeName): + errmsg = "Usage: %s -o -b -s ..." + errmsg = errmsg % exeName + print >> sys.stderr, errmsg + sys.exit(-1) + +if __name__ == '__main__': + opts, args = getopt.getopt(sys.argv[1:], 'o:b:s:') + opts = dict(opts) + + requiredKeys = [ '-o', '-b', '-s' ] + for k in requiredKeys: + if not opts.has_key(k): + usage(os.path.basename(sys.argv[0])) + + buildCorpus(opts['-o'], int(opts['-b']), int(opts['-s']), args) diff --git a/tools/hsbench/scripts/linebasedCorpus.py b/tools/hsbench/scripts/linebasedCorpus.py new file mode 100755 index 00000000..bde20e39 --- /dev/null +++ b/tools/hsbench/scripts/linebasedCorpus.py @@ -0,0 +1,53 @@ +#!/usr/bin/python + +''' +Simple script to take a file full of lines of text and push them into a +Hyperscan benchmarking corpus database, one block per line. +''' + +import sys, getopt, os.path +from CorpusBuilder import CorpusBuilder + +def lineCorpus(inFN, outFN): + ''' + Read lines from file name @inFN and write them as blocks to a new db with + name @outFN. + ''' + + if not os.path.exists(inFN): + print >> sys.stderr, "Input file '%s' does not exist. Exiting." % outFN + sys.exit(-1) + + lines = open(inFN).readlines() + + if len(lines) == 0: + print >> sys.stderr, "Input file contained no lines. Exiting." + sys.exit(0) + + builder = CorpusBuilder(outFN) + + # write a single stream to contain everything + streamId = 0 + + for l in lines: + builder.add_chunk(streamId, l.rstrip()) + + builder.finish() + +def usage(exeName): + errmsg = "Usage: %s -i -o " + errmsg = errmsg % exeName + print >> sys.stderr, errmsg + sys.exit(-1) + +if __name__ == '__main__': + args = getopt.getopt(sys.argv[1:], 'i:o:c:') + args = dict(args[0]) + + requiredKeys = [ '-i', '-o' ] + for k in requiredKeys: + if not args.has_key(k): + usage(os.path.basename(sys.argv[0])) + + fnArgs = tuple([args[k] for k in requiredKeys]) + lineCorpus(*fnArgs) diff --git a/tools/hsbench/scripts/pcapCorpus.py b/tools/hsbench/scripts/pcapCorpus.py new file mode 100755 index 00000000..c10bfef3 --- /dev/null +++ b/tools/hsbench/scripts/pcapCorpus.py @@ -0,0 +1,301 @@ +#!/usr/bin/env python + +''' +Script to convert a pcap file containing UDP and TCP packets to a corpus file. +''' + +import sys, getopt, pprint, os +from sqlite3 import dbapi2 as sqlite +import pcap +from optparse import OptionParser +from socket import AF_INET, IPPROTO_UDP, IPPROTO_TCP, inet_ntop, ntohs, ntohl, inet_ntoa +import struct +from CorpusBuilder import CorpusBuilder + +ETHERTYPE_IP = 0x0800 # IP protocol +ETHERTYPE_ARP = 0x0806 # Addr. resolution protocol +ETHERTYPE_REVARP = 0x8035 # reverse Addr. resolution protocol +ETHERTYPE_VLAN = 0x8100 # IEEE 802.1Q VLAN tagging +ETHERTYPE_IPV6 = 0x86dd # IPv6 + +# +# A dictionary of active TCP streams +# +tcp_streams = {} + +# +# A dictionary of UDP streams +# +udp_streams = {} + +# +# Current stream id +cur_stream_id = 0 + +def usage(exeName) : + errmsg = "Usage: %s -i -o " + errmsg = errmsg % exeName + print >> sys.stderr, errmsg + sys.exit(-1) + +class FiveTuple(object): + def __init__(self, protocol, src_addr, src_port, dst_addr, dst_port): + self.protocol = protocol + self.src_addr = src_addr + self.src_port = src_port + self.dst_addr = dst_addr + self.dst_port = dst_port + + def __str__(self): + return "%d,%s,%d,%s,%d" % (self.protocol, self.src_addr, self.src_port, self.dst_addr, self.dst_port) + +class UdpSegment: + """Definition of a UDP segment + """ + def __init__(self, five_tuple, header, payload): + self.five_tuple = five_tuple + self.udp_header = header + self.udp_payload = payload + +class TcpSegment: + """Definition of a TCP segment + """ + def __init__(self, five_tuple, header, payload): + self.five_tuple = five_tuple + self.tcp_header = header + self.tcp_payload = payload + self.tcp_sequence_number, self.tcp_acknowledgement_number = struct.unpack('!LL', header[4:12]) + + def opt_isset_FIN(self): + opts = ord(self.tcp_header[13]) & 0x3F + return (opts & 0x01) + + def opt_isset_SYN(self): + opts = ord(self.tcp_header[13]) & 0x3F + return (opts & 0x02) + + def get_sequence_number(self): + return self.tcp_sequence_number + + def __cmp__(self, other): + return cmp(self.tcp_sequence_number, other.tcp_sequence_number) + +class TcpStream: + """Definition of a TCP stream. + """ + TCP_STREAM_ACTIVE = 0x1 + TCP_STREAM_CLOSED = 0x02 + + def __init__(self, five_tuple): + self.five_tuple = five_tuple + self.initial_sequence_number = 0 + self.segments = [] + + def reset_stream(self): + self.segments = [] + self.initial_sequence_number = 0 + + def set_initial_sequence_number(self, sequence_number): + self.initial_sequence_number = sequence_number + + def append_segment(self, tcp_segment): + if len(self.segments) == 0: + self.set_initial_sequence_number(tcp_segment.get_sequence_number()) + self.segments.append(tcp_segment) + + def get_segments_sorted(self): + return sorted(self.segments) + +class UdpStream: + """A container for UDP packets that share the same 5-tuple + """ + def __init__(self, five_tuple): + self.five_tuple = five_tuple + self.segments = [] + + def append_segment(self, udp_segment): + self.segments.append(udp_segment) + + +def newStream(five_tuple): + ''' + Create a new stream using the arguments passed-in and return its ID. + ''' + global cur_stream_id + stream_id = cur_stream_id + cur_stream_id += 1 + return stream_id + +def process_tcp_segment(builder, segment): + """Process a tcp segment. It checks for SYN and FIN segments are + if set modifies the associated stream. + """ + segment_id = str(segment.five_tuple) + if segment_id in tcp_streams: + m_tcp_stream = tcp_streams[segment_id] + m_tcp_stream.append_segment(segment) + else: + m_tcp_stream = TcpStream(segment.five_tuple) + m_tcp_stream.append_segment(segment) + tcp_streams[segment_id] = m_tcp_stream + + + if segment.opt_isset_SYN(): + m_tcp_stream.segments = [] + + if segment.opt_isset_FIN(): + # + # Finished with the stream - add the segments in the + # stream to db allowing the stream to be reused. + # + db_add_tcp_stream_segments(builder, m_tcp_stream) + del tcp_streams[segment_id] + +def process_udp_segment(builder, segment): + """ Process a UDP segment. Given the connectionless nature of the UDP + protocol we simple accumulate the segment for later processing + when all the packets have been read + """ + segment_id = str(segment.five_tuple) + if segment_id in udp_streams: + m_udp_stream = udp_streams[segment_id] + m_udp_stream.append_segment(segment) + else: + m_udp_stream = UdpStream(segment.five_tuple) + m_udp_stream.append_segment(segment) + udp_streams[segment_id] = m_udp_stream + + +def db_add_tcp_stream_segments(builder, tcp_stream): + """Add the contents of a tcp stream to the database + """ + tcp_segments = tcp_stream.get_segments_sorted() + last_sequence_num = 0 + streamID = None + + for tcp_segment in tcp_segments: + if (len(tcp_segment.tcp_payload) > 0) and (tcp_segment.tcp_sequence_number > last_sequence_num): + # + # Segment with an actual payload - add it to the stream's + # list of chunks. + # + # Note: delay creating the stream until we have a via chunk to + # commit to it + # + if streamID == None: + streamID = newStream(tcp_stream.five_tuple) + builder.add_chunk(streamID, tcp_segment.tcp_payload) + last_sequence_num = tcp_segment.tcp_sequence_number + + +def db_add_udp_stream_segments(builder, udp_stream): + """Add the contents of a UDP stream to the database. Since UDP is + connection-less, a UDP stream object is really just an accumulation + of all the packets associated with a given 5-tuple. + """ + udp_segments = udp_stream.segments + streamID = None + for udp_segment in udp_segments: + if len(udp_segment.udp_payload) > 0: + if streamID == None: + streamID = newStream(udp_stream.five_tuple) + builder.add_chunk(streamID, udp_segment.udp_payload) + +def enchunk_pcap(pcapFN, sqliteFN): + """Read the contents of a pcap file with name @pcapFN and produce + a sqlite db with name @sqliteFN. It will contain chunks of data + from TCP and UDP streams, + """ + + if not os.path.exists(pcapFN): + print >> sys.stderr, "Input file '%s' does not exist. Exiting." % pcapFN + sys.exit(-1) + + builder = CorpusBuilder(sqliteFN) + + # + # Read in the contents of the pcap file, adding stream segments as found + # + pkt_cnt = 0; + ip_pkt_cnt = 0; + unsupported_ip_protocol_cnt = 0 + pcap_ref = pcap.pcap(pcapFN) + done = False + + while not done: + try: + ts, packet = pcap_ref.next() + except: + break + + pkt_cnt += 1 + + linkLayerType = struct.unpack('!H', packet[(pcap_ref.dloff - 2):pcap_ref.dloff])[0] + if linkLayerType != ETHERTYPE_IP: + # + # We're only interested in IP packets + # + continue + + ip_pkt_cnt += 1 + + ip_pkt_total_len = struct.unpack('!H', packet[pcap_ref.dloff + 2: pcap_ref.dloff + 4])[0] + ip_pkt = packet[pcap_ref.dloff:pcap_ref.dloff + ip_pkt_total_len] + pkt_protocol = struct.unpack('B', ip_pkt[9])[0] + + if (pkt_protocol != IPPROTO_UDP) and (pkt_protocol != IPPROTO_TCP): + # + # we're only interested in UDP and TCP packets at the moment + # + continue + + pkt_src_addr = inet_ntoa(ip_pkt[12:16]) + pkt_dst_addr = inet_ntoa(ip_pkt[16:20]) + + ip_hdr_len_offset = (ord(ip_pkt[0]) & 0x0f) * 4 + ip_payload = ip_pkt[ip_hdr_len_offset:len(ip_pkt)] + + pkt_src_port, pkt_dst_port = struct.unpack('!HH', ip_payload[0:4]) + five_tuple = FiveTuple(pkt_protocol, pkt_src_addr, pkt_src_port, pkt_dst_addr, pkt_dst_port) + five_tuple_id = str(five_tuple) + + if pkt_protocol == IPPROTO_UDP: + udp_payload_len = struct.unpack('!H', ip_payload[4:6])[0] - 8 + udp_header = ip_payload[0:8] + udp_payload = ip_payload[8:len(ip_payload)] + udp_segment = UdpSegment(five_tuple, udp_header, udp_payload) + process_udp_segment(builder, udp_segment) + elif pkt_protocol == IPPROTO_TCP: + tcp_hdr_len = (ord(ip_payload[12]) >> 4) * 4 + tcp_header = ip_payload[0:tcp_hdr_len] + tcp_payload = ip_payload[tcp_hdr_len:len(ip_payload)] + segment = TcpSegment(five_tuple, tcp_header, tcp_payload) + process_tcp_segment(builder, segment) + + # + # Having read the contents of the pcap, we fill the database with any + # remaining TCP and UDP segments + # + for tcp_stream in tcp_streams.itervalues(): + db_add_tcp_stream_segments(builder, tcp_stream) + + for udp_stream in udp_streams.itervalues(): + db_add_udp_stream_segments(builder, udp_stream) + + # + # We've finished with the database + # + builder.finish() + +if __name__ == '__main__' : + + args = getopt.getopt(sys.argv[1:], 'i:o:') + args = dict(args[0]) + + requiredKeys = [ '-i', '-o'] + for k in requiredKeys : + if not args.has_key(k) : + usage(os.path.basename(sys.argv[0])) + + fnArgs = tuple([ args[k] for k in requiredKeys ]) + enchunk_pcap(*fnArgs) diff --git a/tools/hsbench/thread_barrier.h b/tools/hsbench/thread_barrier.h new file mode 100644 index 00000000..1c3a53e7 --- /dev/null +++ b/tools/hsbench/thread_barrier.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * \brief Simple thread barrier. + */ + +#ifndef TOOLS_THREAD_BARRIER_H +#define TOOLS_THREAD_BARRIER_H + +#include +#include + +/** + * \brief Simple thread barrier class. + * + * Blocks until wait() has been called N times. + */ +class thread_barrier { +public: + explicit thread_barrier(unsigned int n) : max(n) { + if (max == 0) { + throw std::runtime_error("invalid barrier"); + } + } + + void wait() { + std::unique_lock lock(mtx); + count++; + if (count >= max) { + count = 0; + condvar.notify_all(); + } else { + condvar.wait(lock); + } + } + +private: + std::mutex mtx; + std::condition_variable condvar; + unsigned int count = 0; + unsigned int max; +}; + +#endif // TOOLS_THREAD_BARRIER_H diff --git a/tools/hsbench/timer.h b/tools/hsbench/timer.h new file mode 100644 index 00000000..85bd294c --- /dev/null +++ b/tools/hsbench/timer.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef TIMER_H +#define TIMER_H + +#include "ue2common.h" + +#include + +class Timer { +public: + Timer() = default; + + void start() { + clock_start = Clock::now(); + } + + void complete() { + clock_end = Clock::now(); + } + + double seconds() const { + std::chrono::duration secs = clock_end - clock_start; + return secs.count(); + } + +protected: + using Clock = std::chrono::steady_clock; + std::chrono::time_point clock_start; + std::chrono::time_point clock_end; +}; + +#endif // TIMER_H diff --git a/util/CMakeLists.txt b/util/CMakeLists.txt index dc731322..c0a6bc21 100644 --- a/util/CMakeLists.txt +++ b/util/CMakeLists.txt @@ -1,7 +1,10 @@ # utility libs +CHECK_FUNCTION_EXISTS(mmap HAVE_MMAP) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}") -include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) +include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR} + ${PROJECT_SOURCE_DIR}) set_source_files_properties( ${CMAKE_BINARY_DIR}/tools/ExpressionParser.cpp @@ -31,3 +34,14 @@ SET(corpusomatic_SRCS ) add_library(corpusomatic STATIC ${corpusomatic_SRCS}) +set(databaseutil_SRCS + database_util.cpp + database_util.h +) +add_library(databaseutil STATIC ${databaseutil_SRCS}) + +set(crosscompileutil_SRCS + cross_compile.cpp + cross_compile.h + ) +add_library(crosscompileutil STATIC ${crosscompileutil_SRCS}) diff --git a/util/cross_compile.cpp b/util/cross_compile.cpp new file mode 100644 index 00000000..b4d1f5f1 --- /dev/null +++ b/util/cross_compile.cpp @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2015-2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include "cross_compile.h" +#include "src/ue2common.h" +#include "src/hs_compile.h" +#include "src/util/make_unique.h" + +#include +#include + +using namespace std; + +struct XcompileMode { + const char *name; + unsigned long long cpu_features; +}; + +static const XcompileMode xcompile_options[] = { + { "avx2", HS_CPU_FEATURES_AVX2 }, + { "base", 0 }, +}; + +unique_ptr xcompileReadMode(const char *s) { + hs_platform_info rv; + UNUSED hs_error_t err; + err = hs_populate_platform(&rv); + assert(!err); + + string str(s); + string mode = str.substr(0, str.find(":")); + string opt = str.substr(str.find(":")+1, str.npos); + bool found_mode = false; + + if (!opt.empty()) { + const size_t numOpts = ARRAY_LENGTH(xcompile_options); + for (size_t i = 0; i < numOpts; i++) { + if (opt.compare(xcompile_options[i].name) == 0) { + DEBUG_PRINTF("found opt %zu:%llu\n", i, + xcompile_options[i].cpu_features); + rv.cpu_features = xcompile_options[i].cpu_features; + found_mode = true; + break; + } + } + } + + if (!found_mode) { + return nullptr; + } else { + DEBUG_PRINTF("cpu_features %llx\n", rv.cpu_features); + return ue2::make_unique(rv); + } +} + +string to_string(const hs_platform_info &p) { + ostringstream out; + if (p.tune) { + out << p.tune; + } + + if (p.cpu_features) { + u64a features = p.cpu_features; + if (features & HS_CPU_FEATURES_AVX2) { + out << " avx2"; + features &= ~HS_CPU_FEATURES_AVX2; + } + + if (features) { + out << " " << "?cpu_features?:" << features; + } + } + + return out.str(); +} + +string xcompileUsage(void) { + string variants = "Instruction set options: "; + const size_t numOpts = ARRAY_LENGTH(xcompile_options); + for (size_t i = 0; i < numOpts; i++) { + variants += xcompile_options[i].name; + if (i + 1 != numOpts) { + variants += ", "; + } + } + + return variants; +} diff --git a/util/cross_compile.h b/util/cross_compile.h new file mode 100644 index 00000000..ddfc7b10 --- /dev/null +++ b/util/cross_compile.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2015-2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CROSS_COMPILE_H +#define CROSS_COMPILE_H + +#include +#include + +struct hs_platform_info; + +std::unique_ptr xcompileReadMode(const char *s); +std::string xcompileUsage(void); + +std::string to_string(const hs_platform_info &p); + +#endif /* CROSS_COMPILE_H */ diff --git a/util/database_util.cpp b/util/database_util.cpp new file mode 100644 index 00000000..3df75e2a --- /dev/null +++ b/util/database_util.cpp @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2015-2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "database_util.h" + +#include "hs_common.h" + +#include +#include +#include +#include +#include + +#if defined(HAVE_MMAP) +#include // for mmap +#include // for close +#include +#include +#endif + +using namespace std; + +bool saveDatabase(const hs_database_t *db, const char *filename, bool verbose) { + assert(db); + assert(filename); + + if (verbose) { + cout << "Saving database to: " << filename << endl; + } + + char *bytes = nullptr; + size_t length = 0; + hs_error_t err = hs_serialize_database(db, &bytes, &length); + if (err != HS_SUCCESS) { + return false; + } + + assert(bytes); + assert(length > 0); + + ofstream out(filename, ios::binary); + out.write(bytes, length); + out.close(); + + ::free(bytes); + + return true; +} + +hs_database_t * loadDatabase(const char *filename, bool verbose) { + assert(filename); + + if (verbose) { + cout << "Loading database from: " << filename << endl; + } + + char *bytes = nullptr; + +#if defined(HAVE_MMAP) + // Use mmap to read the file + int fd = open(filename, O_RDONLY); + if (fd < 0) { + return nullptr; + } + struct stat st; + if (fstat(fd, &st) < 0) { + close(fd); + return nullptr; + } + size_t len = st.st_size; + + bytes = (char *)mmap(nullptr, len, PROT_READ, MAP_SHARED, fd, 0); + if (bytes == MAP_FAILED) { + cout << "mmap failed" << endl; + close(fd); + return nullptr; + } +#else + // Fall back on stream IO + ifstream is; + is.open(filename, ios::in | ios::binary); + if (!is.is_open()) { + return nullptr; + } + is.seekg(0, ios::end); + size_t len = is.tellg(); + if (verbose) { + cout << "Reading " << len << " bytes" << endl; + } + is.seekg(0, ios::beg); + bytes = new char[len]; + is.read(bytes, len); + is.close(); +#endif + + assert(bytes); + + if (verbose) { + char *info = nullptr; + hs_error_t err = hs_serialized_database_info(bytes, len, &info); + if (err) { + cout << "Unable to decode serialized database info: " << err + << endl; + } else if (info) { + cout << "Serialized database info: " << info << endl; + std::free(info); + } else { + cout << "Unable to decode serialized database info." << endl; + } + } + + hs_database_t *db = nullptr; + hs_error_t err = hs_deserialize_database(bytes, len, &db); + +#if defined(HAVE_MMAP) + munmap(bytes, len); + close(fd); +#else + delete [] bytes; +#endif + + if (err != HS_SUCCESS) { + cout << "hs_deserialize_database call failed: " << err << endl; + return nullptr; + } + + assert(db); + + return db; +} diff --git a/util/database_util.h b/util/database_util.h new file mode 100644 index 00000000..badd036d --- /dev/null +++ b/util/database_util.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2015-2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef DATABASE_UTIL_H +#define DATABASE_UTIL_H + +struct hs_database; + +bool saveDatabase(const hs_database *db, const char *filename, + bool verbose = false); + +hs_database *loadDatabase(const char *filename, bool verbose = false); + +#endif /* DATABASE_UTIL_H */ diff --git a/util/expression_path.h b/util/expression_path.h new file mode 100644 index 00000000..3075b4d4 --- /dev/null +++ b/util/expression_path.h @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2015-2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef EXPRESSION_PATH_H +#define EXPRESSION_PATH_H + +#include "ue2common.h" + +#include +#include +#include +#include +#include + +#include +#if !defined(_WIN32) +#include +#include +#endif + +// +// Utility functions +// + +/** + * Given a path to a signature file, infer the path of the pcre directory. + */ +static inline +std::string inferExpressionPath(const std::string &sigFile) { +#ifndef _WIN32 + // POSIX variant. + + // dirname() may modify its argument, so we must make a copy. + std::vector path(sigFile.size() + 1); + memcpy(path.data(), sigFile.c_str(), sigFile.size()); + path[sigFile.size()] = 0; // ensure null termination. + + std::string rv = dirname(path.data()); +#else + // Windows variant. + if (sigFile.size() >= _MAX_DIR) { + return std::string(); + } + char path[_MAX_DIR]; + _splitpath(sigFile.c_str(), nullptr, path, nullptr, nullptr); + std::string rv(path); +#endif + + rv += "/../pcre"; + return rv; +} + +#if defined(_WIN32) +#define stat _stat +#define S_IFREG _S_IFREG +#endif + +static inline +bool isDir(const std::string &filename) { + struct stat s; + + if (stat(filename.c_str(), &s) == -1) { + std::cerr << "stat: " << strerror(errno) << std::endl; + return false; + } + + return (S_IFDIR & s.st_mode); +} + +static inline +bool isFile(const std::string &filename) { + struct stat s; + + if (stat(filename.c_str(), &s) == -1) { + std::cerr << "stat: " << strerror(errno) << std::endl; + return false; + } + + return (S_IFREG & s.st_mode); +} + +#endif /* EXPRESSION_PATH_H */ From e51b6d23b9caa8362e0283fc806e227a5fd9f494 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Thu, 1 Dec 2016 14:32:47 +1100 Subject: [PATCH 072/103] introduce Sheng-McClellan hybrid --- CMakeLists.txt | 18 +- src/grey.cpp | 2 + src/grey.h | 1 + src/nfa/limex_accel.c | 6 +- src/nfa/limex_shuffle.h | 46 - src/nfa/mcclellan.c | 92 +- src/nfa/mcclellan_internal.h | 12 +- src/nfa/mcclellancompile.cpp | 10 +- src/nfa/mcclellancompile.h | 2 - src/nfa/mcsheng.c | 1406 ++++++++++++++++++++++++++++++ src/nfa/mcsheng.h | 84 ++ src/nfa/mcsheng_compile.cpp | 1144 ++++++++++++++++++++++++ src/nfa/mcsheng_compile.h | 59 ++ src/nfa/mcsheng_data.c | 43 + src/nfa/mcsheng_dump.cpp | 415 +++++++++ src/nfa/mcsheng_dump.h | 50 ++ src/nfa/mcsheng_internal.h | 95 ++ src/nfa/nfa_api_dispatch.c | 3 + src/nfa/nfa_build_util.cpp | 33 + src/nfa/nfa_dump_dispatch.cpp | 3 + src/nfa/nfa_internal.h | 19 +- src/nfa/rdfa_graph.cpp | 68 ++ src/nfa/rdfa_graph.h | 54 ++ src/nfa/shengcompile.cpp | 5 +- src/nfagraph/ng_util.cpp | 34 - src/rose/rose_build_bytecode.cpp | 26 +- src/rose/rose_build_infix.cpp | 2 +- src/rose/rose_build_misc.cpp | 2 +- src/util/bitutils.h | 51 ++ src/util/graph.h | 39 + src/util/simd_utils.h | 23 + unit/internal/bitutils.cpp | 15 +- unit/internal/nfagraph_util.cpp | 20 +- unit/internal/shuffle.cpp | 76 +- unit/internal/simd_utils.cpp | 52 ++ 35 files changed, 3804 insertions(+), 206 deletions(-) create mode 100644 src/nfa/mcsheng.c create mode 100644 src/nfa/mcsheng.h create mode 100644 src/nfa/mcsheng_compile.cpp create mode 100644 src/nfa/mcsheng_compile.h create mode 100644 src/nfa/mcsheng_data.c create mode 100644 src/nfa/mcsheng_dump.cpp create mode 100644 src/nfa/mcsheng_dump.h create mode 100644 src/nfa/mcsheng_internal.h create mode 100644 src/nfa/rdfa_graph.cpp create mode 100644 src/nfa/rdfa_graph.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 52d54955..9062c287 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -448,10 +448,6 @@ set (hs_exec_SRCS src/nfa/lbr.h src/nfa/lbr_common_impl.h src/nfa/lbr_internal.h - src/nfa/mcclellan.c - src/nfa/mcclellan.h - src/nfa/mcclellan_common_impl.h - src/nfa/mcclellan_internal.h src/nfa/limex_accel.c src/nfa/limex_accel.h src/nfa/limex_exceptional.h @@ -470,6 +466,14 @@ set (hs_exec_SRCS src/nfa/limex_runtime_impl.h src/nfa/limex_shuffle.h src/nfa/limex_state_impl.h + src/nfa/mcclellan.c + src/nfa/mcclellan.h + src/nfa/mcclellan_common_impl.h + src/nfa/mcclellan_internal.h + src/nfa/mcsheng.c + src/nfa/mcsheng_data.c + src/nfa/mcsheng.h + src/nfa/mcsheng_internal.h src/nfa/mpv.h src/nfa/mpv.c src/nfa/mpv_internal.h @@ -650,6 +654,8 @@ SET (hs_SRCS src/nfa/mcclellancompile.h src/nfa/mcclellancompile_util.cpp src/nfa/mcclellancompile_util.h + src/nfa/mcsheng_compile.cpp + src/nfa/mcsheng_compile.h src/nfa/limex_compile.cpp src/nfa/limex_compile.h src/nfa/limex_accel.h @@ -667,6 +673,8 @@ SET (hs_SRCS src/nfa/nfa_internal.h src/nfa/nfa_kind.h src/nfa/rdfa.h + src/nfa/rdfa_graph.cpp + src/nfa/rdfa_graph.h src/nfa/rdfa_merge.cpp src/nfa/rdfa_merge.h src/nfa/repeat_internal.h @@ -962,6 +970,8 @@ set(hs_dump_SRCS src/nfa/limex_dump.cpp src/nfa/mcclellandump.cpp src/nfa/mcclellandump.h + src/nfa/mcsheng_dump.cpp + src/nfa/mcsheng_dump.h src/nfa/mpv_dump.cpp src/nfa/nfa_dump_api.h src/nfa/nfa_dump_dispatch.cpp diff --git a/src/grey.cpp b/src/grey.cpp index bad56b56..340a34bf 100644 --- a/src/grey.cpp +++ b/src/grey.cpp @@ -51,6 +51,7 @@ Grey::Grey(void) : allowLbr(true), allowMcClellan(true), allowSheng(true), + allowMcSheng(true), allowPuff(true), allowLiteral(true), allowRose(true), @@ -217,6 +218,7 @@ void applyGreyOverrides(Grey *g, const string &s) { G_UPDATE(allowLbr); G_UPDATE(allowMcClellan); G_UPDATE(allowSheng); + G_UPDATE(allowMcSheng); G_UPDATE(allowPuff); G_UPDATE(allowLiteral); G_UPDATE(allowRose); diff --git a/src/grey.h b/src/grey.h index 90f5f826..4882af7d 100644 --- a/src/grey.h +++ b/src/grey.h @@ -51,6 +51,7 @@ struct Grey { bool allowLbr; bool allowMcClellan; bool allowSheng; + bool allowMcSheng; bool allowPuff; bool allowLiteral; bool allowRose; diff --git a/src/nfa/limex_accel.c b/src/nfa/limex_accel.c index f883973e..c74c7079 100644 --- a/src/nfa/limex_accel.c +++ b/src/nfa/limex_accel.c @@ -78,7 +78,7 @@ size_t accelScanWrapper(const u8 *accelTable, const union AccelAux *aux, size_t doAccel32(u32 s, u32 accel, const u8 *accelTable, const union AccelAux *aux, const u8 *input, size_t i, size_t end) { - u32 idx = packedExtract32(s, accel); + u32 idx = pext32(s, accel); return accelScanWrapper(accelTable, aux, input, idx, i, end); } @@ -86,14 +86,14 @@ size_t doAccel32(u32 s, u32 accel, const u8 *accelTable, size_t doAccel64(u64a s, u64a accel, const u8 *accelTable, const union AccelAux *aux, const u8 *input, size_t i, size_t end) { - u32 idx = packedExtract64(s, accel); + u32 idx = pext64(s, accel); return accelScanWrapper(accelTable, aux, input, idx, i, end); } #else size_t doAccel64(m128 s, m128 accel, const u8 *accelTable, const union AccelAux *aux, const u8 *input, size_t i, size_t end) { - u32 idx = packedExtract64(movq(s), movq(accel)); + u32 idx = pext64(movq(s), movq(accel)); return accelScanWrapper(accelTable, aux, input, idx, i, end); } #endif diff --git a/src/nfa/limex_shuffle.h b/src/nfa/limex_shuffle.h index e45e4331..5ca8fce0 100644 --- a/src/nfa/limex_shuffle.h +++ b/src/nfa/limex_shuffle.h @@ -41,52 +41,6 @@ #include "util/bitutils.h" #include "util/simd_utils.h" -#if defined(__BMI2__) || (defined(_WIN32) && defined(__AVX2__)) -#define HAVE_PEXT -#endif - -static really_inline -u32 packedExtract32(u32 x, u32 mask) { -#if defined(HAVE_PEXT) - // Intel BMI2 can do this operation in one instruction. - return _pext_u32(x, mask); -#else - - u32 result = 0, num = 1; - while (mask != 0) { - u32 bit = findAndClearLSB_32(&mask); - if (x & (1U << bit)) { - assert(num != 0); // more than 32 bits! - result |= num; - } - num <<= 1; - } - return result; -#endif -} - -static really_inline -u32 packedExtract64(u64a x, u64a mask) { -#if defined(HAVE_PEXT) && defined(ARCH_64_BIT) - // Intel BMI2 can do this operation in one instruction. - return _pext_u64(x, mask); -#else - - u32 result = 0, num = 1; - while (mask != 0) { - u32 bit = findAndClearLSB_64(&mask); - if (x & (1ULL << bit)) { - assert(num != 0); // more than 32 bits! - result |= num; - } - num <<= 1; - } - return result; -#endif -} - -#undef HAVE_PEXT - static really_inline u32 packedExtract128(m128 s, const m128 permute, const m128 compare) { m128 shuffled = pshufb(s, permute); diff --git a/src/nfa/mcclellan.c b/src/nfa/mcclellan.c index 63f5f535..584670c2 100644 --- a/src/nfa/mcclellan.c +++ b/src/nfa/mcclellan.c @@ -175,7 +175,7 @@ char mcclellanExec16_i(const struct mcclellan *m, u32 *state, const u8 *buf, if (mode == STOP_AT_MATCH) { *c_final = buf; } - return MO_CONTINUE_MATCHING; + return MO_ALIVE; } u32 s = *state; @@ -213,7 +213,7 @@ without_accel: if (mode == STOP_AT_MATCH) { *state = s & STATE_MASK; *c_final = c - 1; - return MO_CONTINUE_MATCHING; + return MO_MATCHES_PENDING; } u64a loc = (c - 1) - buf + offAdj + 1; @@ -221,12 +221,12 @@ without_accel: if (single) { DEBUG_PRINTF("reporting %u\n", m->arb_report); if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; /* termination requested */ + return MO_DEAD; /* termination requested */ } } else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0, &cached_accept_state, &cached_accept_id) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; + return MO_DEAD; } } @@ -265,7 +265,7 @@ with_accel: if (mode == STOP_AT_MATCH) { *state = s & STATE_MASK; *c_final = c - 1; - return MO_CONTINUE_MATCHING; + return MO_MATCHES_PENDING; } u64a loc = (c - 1) - buf + offAdj + 1; @@ -273,12 +273,12 @@ with_accel: if (single) { DEBUG_PRINTF("reporting %u\n", m->arb_report); if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; /* termination requested */ + return MO_DEAD; /* termination requested */ } } else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0, &cached_accept_state, &cached_accept_id) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; + return MO_DEAD; } } @@ -293,7 +293,7 @@ exit: } *state = s; - return MO_CONTINUE_MATCHING; + return MO_ALIVE; } static never_inline @@ -376,7 +376,7 @@ char mcclellanExec8_i(const struct mcclellan *m, u32 *state, const u8 *buf, char single, const u8 **c_final, enum MatchMode mode) { if (!len) { *c_final = buf; - return MO_CONTINUE_MATCHING; + return MO_ALIVE; } u32 s = *state; const u8 *c = buf; @@ -390,8 +390,7 @@ char mcclellanExec8_i(const struct mcclellan *m, u32 *state, const u8 *buf, u32 cached_accept_id = 0; u32 cached_accept_state = 0; - DEBUG_PRINTF("accel %hu, accept %hu\n", - m->accel_limit_8, m->accept_limit_8); + DEBUG_PRINTF("accel %hu, accept %u\n", m->accel_limit_8, accept_limit); DEBUG_PRINTF("s: %u, len %zu\n", s, len); @@ -417,19 +416,19 @@ without_accel: DEBUG_PRINTF("match - pausing\n"); *state = s; *c_final = c - 1; - return MO_CONTINUE_MATCHING; + return MO_MATCHES_PENDING; } u64a loc = (c - 1) - buf + offAdj + 1; if (single) { DEBUG_PRINTF("reporting %u\n", m->arb_report); if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; + return MO_DEAD; } } else if (doComplexReport(cb, ctxt, m, s, loc, 0, &cached_accept_state, &cached_accept_id) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; + return MO_DEAD; } } @@ -464,19 +463,19 @@ with_accel: DEBUG_PRINTF("match - pausing\n"); *state = s; *c_final = c - 1; - return MO_CONTINUE_MATCHING; + return MO_MATCHES_PENDING; } u64a loc = (c - 1) - buf + offAdj + 1; if (single) { DEBUG_PRINTF("reporting %u\n", m->arb_report); if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; + return MO_DEAD; } } else if (doComplexReport(cb, ctxt, m, s, loc, 0, &cached_accept_state, &cached_accept_id) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; + return MO_DEAD; } } @@ -488,7 +487,7 @@ exit: if (mode == STOP_AT_MATCH) { *c_final = c_end; } - return MO_CONTINUE_MATCHING; + return MO_ALIVE; } static never_inline @@ -576,7 +575,7 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, q->report_current = 0; if (rv == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; + return MO_DEAD; } } @@ -611,17 +610,20 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, /* do main buffer region */ const u8 *final_look; - if (mcclellanExec16_i_ni(m, &s, cur_buf + sp, local_ep - sp, - offset + sp, cb, context, single, &final_look, - mode) - == MO_HALT_MATCHING) { + char rv = mcclellanExec16_i_ni(m, &s, cur_buf + sp, local_ep - sp, + offset + sp, cb, context, single, + &final_look, mode); + if (rv == MO_DEAD) { *(u16 *)q->state = 0; - return 0; + return MO_DEAD; } - if (mode == STOP_AT_MATCH && final_look != cur_buf + local_ep) { + if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) { DEBUG_PRINTF("this is as far as we go\n"); - assert(q->cur); DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf); + + assert(q->cur); + assert(final_look != cur_buf + local_ep); + q->cur--; q->items[q->cur].type = MQE_START; q->items[q->cur].location = final_look - cur_buf + 1; /* due to @@ -630,6 +632,7 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, return MO_MATCHES_PENDING; } + assert(rv == MO_ALIVE); assert(q->cur); if (mode != NO_MATCHES && q->items[q->cur].location > end) { DEBUG_PRINTF("this is as far as we go\n"); @@ -662,7 +665,7 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, case MQE_END: *(u16 *)q->state = s; q->cur++; - return s ? MO_ALIVE : 0; + return s ? MO_ALIVE : MO_DEAD; default: assert(!"invalid queue event"); } @@ -681,8 +684,8 @@ char nfaExecMcClellan16_Bi(const struct NFA *n, u64a offset, const u8 *buffer, if (mcclellanExec16_i(m, &s, buffer, length, offset, cb, context, single, NULL, CALLBACK_OUTPUT) - == MO_HALT_MATCHING) { - return 0; + == MO_DEAD) { + return s ? MO_ALIVE : MO_DEAD; } const struct mstate_aux *aux = get_aux(m, s); @@ -691,7 +694,7 @@ char nfaExecMcClellan16_Bi(const struct NFA *n, u64a offset, const u8 *buffer, doComplexReport(cb, context, m, s, offset + length, 1, NULL, NULL); } - return !!s; + return MO_ALIVE; } static really_inline @@ -724,7 +727,7 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, q->report_current = 0; if (rv == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; + return MO_DEAD; } } @@ -760,16 +763,20 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, } const u8 *final_look; - if (mcclellanExec8_i_ni(m, &s, cur_buf + sp, local_ep - sp, offset + sp, - cb, context, single, &final_look, mode) - == MO_HALT_MATCHING) { + char rv = mcclellanExec8_i_ni(m, &s, cur_buf + sp, local_ep - sp, + offset + sp, cb, context, single, + &final_look, mode); + if (rv == MO_HALT_MATCHING) { *(u8 *)q->state = 0; - return 0; + return MO_DEAD; } - if (mode == STOP_AT_MATCH && final_look != cur_buf + local_ep) { - /* found a match */ - DEBUG_PRINTF("found a match\n"); + if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) { + DEBUG_PRINTF("this is as far as we go\n"); + DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf); + assert(q->cur); + assert(final_look != cur_buf + local_ep); + q->cur--; q->items[q->cur].type = MQE_START; q->items[q->cur].location = final_look - cur_buf + 1; /* due to @@ -778,6 +785,7 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, return MO_MATCHES_PENDING; } + assert(rv == MO_ALIVE); assert(q->cur); if (mode != NO_MATCHES && q->items[q->cur].location > end) { DEBUG_PRINTF("this is as far as we go\n"); @@ -811,7 +819,7 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, case MQE_END: *(u8 *)q->state = s; q->cur++; - return s ? MO_ALIVE : 0; + return s ? MO_ALIVE : MO_DEAD; default: assert(!"invalid queue event"); } @@ -830,8 +838,8 @@ char nfaExecMcClellan8_Bi(const struct NFA *n, u64a offset, const u8 *buffer, if (mcclellanExec8_i(m, &s, buffer, length, offset, cb, context, single, NULL, CALLBACK_OUTPUT) - == MO_HALT_MATCHING) { - return 0; + == MO_DEAD) { + return MO_DEAD; } const struct mstate_aux *aux = get_aux(m, s); @@ -840,7 +848,7 @@ char nfaExecMcClellan8_Bi(const struct NFA *n, u64a offset, const u8 *buffer, doComplexReport(cb, context, m, s, offset + length, 1, NULL, NULL); } - return s; + return s ? MO_ALIVE : MO_DEAD; } char nfaExecMcClellan8_B(const struct NFA *n, u64a offset, const u8 *buffer, diff --git a/src/nfa/mcclellan_internal.h b/src/nfa/mcclellan_internal.h index 4a27aadb..549bccf5 100644 --- a/src/nfa/mcclellan_internal.h +++ b/src/nfa/mcclellan_internal.h @@ -71,17 +71,17 @@ struct mcclellan { u16 start_floating; /**< floating start state */ u32 aux_offset; /**< offset of the aux structures relative to the start of * the nfa structure */ - u32 sherman_offset; /**< offset of to array of sherman state offsets - * the state_info structures relative to the start of the - * nfa structure */ - u32 sherman_end; /**< offset of the end of the state_info structures relative - * to the start of the nfa structure */ + u32 sherman_offset; /**< offset of array of sherman state offsets the + * state_info structures relative to the start of the + * nfa structure */ + u32 sherman_end; /**< offset of the end of the state_info structures + * relative to the start of the nfa structure */ u16 accel_limit_8; /**< 8 bit, lowest accelerable state */ u16 accept_limit_8; /**< 8 bit, lowest accept state */ u16 sherman_limit; /**< lowest sherman state */ u8 alphaShift; u8 flags; - u8 has_accel; /**< 1 iff there are any accel planes */ + u8 has_accel; /**< 1 iff there are any accel plans */ u8 remap[256]; /**< remaps characters to a smaller alphabet */ ReportID arb_report; /**< one of the accepts that this dfa may raise */ u32 accel_offset; /**< offset of the accel structures from start of NFA */ diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index 09006d5b..7a73c9d4 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -415,9 +415,9 @@ void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info, : info.raw.start_floating); } -/* returns non-zero on error */ +/* returns false on error */ static -int allocateFSN16(dfa_info &info, dstate_id_t *sherman_base) { +bool allocateFSN16(dfa_info &info, dstate_id_t *sherman_base) { info.states[0].impl_id = 0; /* dead is always 0 */ vector norm; @@ -426,7 +426,7 @@ int allocateFSN16(dfa_info &info, dstate_id_t *sherman_base) { if (info.size() > (1 << 16)) { DEBUG_PRINTF("too many states\n"); *sherman_base = 0; - return 1; + return false; } for (u32 i = 1; i < info.size(); i++) { @@ -452,7 +452,7 @@ int allocateFSN16(dfa_info &info, dstate_id_t *sherman_base) { /* Check to see if we haven't over allocated our states */ DEBUG_PRINTF("next sherman %u masked %u\n", next_sherman, (dstate_id_t)(next_sherman & STATE_MASK)); - return (next_sherman - 1) != ((next_sherman - 1) & STATE_MASK); + return (next_sherman - 1) == ((next_sherman - 1) & STATE_MASK); } static @@ -470,7 +470,7 @@ aligned_unique_ptr mcclellanCompile16(dfa_info &info, assert(alphaShift <= 8); u16 count_real_states; - if (allocateFSN16(info, &count_real_states)) { + if (!allocateFSN16(info, &count_real_states)) { DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n", info.size()); return nullptr; diff --git a/src/nfa/mcclellancompile.h b/src/nfa/mcclellancompile.h index e6f548a7..8d8dfb19 100644 --- a/src/nfa/mcclellancompile.h +++ b/src/nfa/mcclellancompile.h @@ -32,9 +32,7 @@ #include "accel_dfa_build_strat.h" #include "rdfa.h" #include "ue2common.h" -#include "util/accel_scheme.h" #include "util/alloc.h" -#include "util/charreach.h" #include "util/ue2_containers.h" #include diff --git a/src/nfa/mcsheng.c b/src/nfa/mcsheng.c new file mode 100644 index 00000000..98db3f0a --- /dev/null +++ b/src/nfa/mcsheng.c @@ -0,0 +1,1406 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "mcsheng.h" + +#include "accel.h" +#include "mcsheng_internal.h" +#include "nfa_api.h" +#include "nfa_api_queue.h" +#include "nfa_internal.h" +#include "util/bitutils.h" +#include "util/compare.h" +#include "util/simd_utils.h" +#include "ue2common.h" + +enum MatchMode { + CALLBACK_OUTPUT, + STOP_AT_MATCH, + NO_MATCHES +}; + +static really_inline +const struct mstate_aux *get_aux(const struct mcsheng *m, u32 s) { + const char *nfa = (const char *)m - sizeof(struct NFA); + const struct mstate_aux *aux + = s + (const struct mstate_aux *)(nfa + m->aux_offset); + + assert(ISALIGNED(aux)); + return aux; +} + +static really_inline +u32 mcshengEnableStarts(const struct mcsheng *m, u32 s) { + const struct mstate_aux *aux = get_aux(m, s); + + DEBUG_PRINTF("enabling starts %u->%hu\n", s, aux->top); + return aux->top; +} + +static really_inline +u32 doSherman16(const char *sherman_state, u8 cprime, const u16 *succ_table, + u32 as) { + assert(ISALIGNED_N(sherman_state, 16)); + + u8 len = *(const u8 *)(sherman_state + SHERMAN_LEN_OFFSET); + + if (len) { + m128 ss_char = load128(sherman_state); + m128 cur_char = set16x8(cprime); + + u32 z = movemask128(eq128(ss_char, cur_char)); + + /* remove header cruft: type 1, len 1, daddy 2*/ + z &= ~0xf; + z &= (1U << (len + 4)) - 1; + + if (z) { + u32 i = ctz32(z & ~0xf) - 4; + + u32 s_out = unaligned_load_u16((const u8 *)sherman_state + + SHERMAN_STATES_OFFSET(len) + + sizeof(u16) * i); + DEBUG_PRINTF("found sherman match at %u/%u for c'=%hhu s=%u\n", i, + len, cprime, s_out); + return s_out; + } + } + + u32 daddy = *(const u16 *)(sherman_state + SHERMAN_DADDY_OFFSET); + return succ_table[(daddy << as) + cprime]; +} + +static really_inline +char doComplexReport(NfaCallback cb, void *ctxt, const struct mcsheng *m, + u32 s, u64a loc, char eod, u32 *cached_accept_state, + u32 *cached_accept_id) { + DEBUG_PRINTF("reporting state = %u, loc=%llu, eod %hhu\n", + s & STATE_MASK, loc, eod); + + if (!eod && s == *cached_accept_state) { + if (cb(0, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; /* termination requested */ + } + + return MO_CONTINUE_MATCHING; /* continue execution */ + } + + const struct mstate_aux *aux = get_aux(m, s); + size_t offset = eod ? aux->accept_eod : aux->accept; + + assert(offset); + const struct report_list *rl + = (const void *)((const char *)m + offset - sizeof(struct NFA)); + assert(ISALIGNED(rl)); + + DEBUG_PRINTF("report list size %u\n", rl->count); + u32 count = rl->count; + + if (!eod && count == 1) { + *cached_accept_state = s; + *cached_accept_id = rl->report[0]; + + DEBUG_PRINTF("reporting %u\n", rl->report[0]); + if (cb(0, loc, rl->report[0], ctxt) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; /* termination requested */ + } + + return MO_CONTINUE_MATCHING; /* continue execution */ + } + + for (u32 i = 0; i < count; i++) { + DEBUG_PRINTF("reporting %u\n", rl->report[i]); + if (cb(0, loc, rl->report[i], ctxt) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; /* termination requested */ + } + } + + return MO_CONTINUE_MATCHING; /* continue execution */ +} + +#define SHENG_CHUNK 8 + +static really_inline +u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end, + const u8 *hard_c_end, u32 s_in, char do_accel) { + assert(s_in < m->sheng_end); + assert(s_in); /* should not already be dead */ + assert(soft_c_end <= hard_c_end); + DEBUG_PRINTF("s_in = %u (adjusted %u)\n", s_in, s_in - 1); + m128 s = set16x8(s_in - 1); + const u8 *c = *c_inout; + const u8 *c_end = hard_c_end - SHENG_CHUNK + 1; + if (!do_accel) { + c_end = MIN(soft_c_end, hard_c_end - SHENG_CHUNK + 1); + } + const m128 *masks = m->sheng_masks; + u8 sheng_limit = m->sheng_end - 1; /* - 1: no dead state */ + u8 sheng_stop_limit = do_accel ? m->sheng_accel_limit : sheng_limit; + + /* When we use movd to get a u32 containing our state, it will have 4 lanes + * all duplicating the state. We can create versions of our limits with 4 + * copies to directly compare against, this prevents us generating code to + * extract a single copy of the state from the u32 for checking. */ + u32 sheng_stop_limit_x4 = sheng_stop_limit * 0x01010101; + +#if defined(HAVE_PEXT) && defined(ARCH_64_BIT) + u32 sheng_limit_x4 = sheng_limit * 0x01010101; + m128 simd_stop_limit = set4x32(sheng_stop_limit_x4); + m128 accel_delta = set16x8(sheng_limit - sheng_stop_limit); + DEBUG_PRINTF("end %hu, accel %hhu --> limit %hhu\n", sheng_limit, + m->sheng_accel_limit, sheng_stop_limit); +#endif + +#define SHENG_SINGLE_ITER do { \ + m128 shuffle_mask = masks[*(c++)]; \ + s = pshufb(shuffle_mask, s); \ + u32 s_gpr_x4 = movd(s); /* convert to u8 */ \ + DEBUG_PRINTF("c %hhu (%c) --> s %hhu\n", c[-1], c[-1], s_gpr); \ + if (s_gpr_x4 >= sheng_stop_limit_x4) { \ + s_gpr = s_gpr_x4; \ + goto exit; \ + } \ + } while (0) + + u8 s_gpr; + while (c < c_end) { +#if defined(HAVE_PEXT) && defined(ARCH_64_BIT) + /* This version uses pext for efficently bitbashing out scaled + * versions of the bytes to process from a u64a */ + + u64a data_bytes = unaligned_load_u64a(c); + u64a cc0 = pdep64(data_bytes, 0xff0); /* extract scaled low byte */ + data_bytes &= ~0xffULL; /* clear low bits for scale space */ + m128 shuffle_mask0 = load128((const char *)masks + cc0); + s = pshufb(shuffle_mask0, s); + m128 s_max = s; + m128 s_max0 = s_max; + DEBUG_PRINTF("c %02llx --> s %hhu\n", cc0 >> 4, movd(s)); + +#define SHENG_SINGLE_UNROLL_ITER(iter) \ + assert(iter); \ + u64a cc##iter = pext64(data_bytes, mcsheng_pext_mask[iter]); \ + assert(cc##iter == (u64a)c[iter] << 4); \ + m128 shuffle_mask##iter = load128((const char *)masks + cc##iter); \ + s = pshufb(shuffle_mask##iter, s); \ + if (do_accel && iter == 7) { \ + /* in the final iteration we also have to check against accel */ \ + m128 s_temp = sadd_u8_m128(s, accel_delta); \ + s_max = max_u8_m128(s_max, s_temp); \ + } else { \ + s_max = max_u8_m128(s_max, s); \ + } \ + m128 s_max##iter = s_max; \ + DEBUG_PRINTF("c %02llx --> s %hhu max %hhu\n", cc##iter >> 4, \ + movd(s), movd(s_max)); + + SHENG_SINGLE_UNROLL_ITER(1); + + SHENG_SINGLE_UNROLL_ITER(2); + SHENG_SINGLE_UNROLL_ITER(3); + + SHENG_SINGLE_UNROLL_ITER(4); + SHENG_SINGLE_UNROLL_ITER(5); + + SHENG_SINGLE_UNROLL_ITER(6); + SHENG_SINGLE_UNROLL_ITER(7); + + if (movd(s_max7) >= sheng_limit_x4) { + DEBUG_PRINTF("exit found\n"); + + /* Explicitly check the last byte as it is more likely as it also + * checks for acceleration. */ + if (movd(s_max6) < sheng_limit_x4) { + c += SHENG_CHUNK; + s_gpr = movq(s); + assert(s_gpr >= sheng_stop_limit); + goto exit; + } + + /* use shift-xor to create a register containing all of the max + * values */ + m128 blended = rshift64_m128(s_max0, 56); + blended = xor128(blended, rshift64_m128(s_max1, 48)); + blended = xor128(blended, rshift64_m128(s_max2, 40)); + blended = xor128(blended, rshift64_m128(s_max3, 32)); + blended = xor128(blended, rshift64_m128(s_max4, 24)); + blended = xor128(blended, rshift64_m128(s_max5, 16)); + blended = xor128(blended, rshift64_m128(s_max6, 8)); + blended = xor128(blended, s); + blended = xor128(blended, rshift64_m128(blended, 8)); + DEBUG_PRINTF("blended %016llx\n", movq(blended)); + + m128 final = min_u8_m128(blended, simd_stop_limit); + m128 cmp = sub_u8_m128(final, simd_stop_limit); + u64a stops = ~movemask128(cmp); + assert(stops); + u32 earliest = ctz32(stops); + DEBUG_PRINTF("stops %02llx, earliest %u\n", stops, earliest); + assert(earliest < 8); + c += earliest + 1; + s_gpr = movq(blended) >> (earliest * 8); + assert(s_gpr >= sheng_stop_limit); + goto exit; + } else { + c += SHENG_CHUNK; + } +#else + SHENG_SINGLE_ITER; + SHENG_SINGLE_ITER; + SHENG_SINGLE_ITER; + SHENG_SINGLE_ITER; + + SHENG_SINGLE_ITER; + SHENG_SINGLE_ITER; + SHENG_SINGLE_ITER; + SHENG_SINGLE_ITER; +#endif + } + + assert(c_end - c < SHENG_CHUNK); + if (c < soft_c_end) { + assert(soft_c_end - c < SHENG_CHUNK); + switch (soft_c_end - c) { + case 7: + SHENG_SINGLE_ITER; + case 6: + SHENG_SINGLE_ITER; + case 5: + SHENG_SINGLE_ITER; + case 4: + SHENG_SINGLE_ITER; + case 3: + SHENG_SINGLE_ITER; + case 2: + SHENG_SINGLE_ITER; + case 1: + SHENG_SINGLE_ITER; + } + } + + assert(c >= soft_c_end); + + s_gpr = movd(s); +exit: + assert(c <= hard_c_end); + DEBUG_PRINTF("%zu from end; s %hhu\n", c_end - c, s_gpr); + assert(c >= soft_c_end || s_gpr >= sheng_stop_limit); + /* undo state adjustment to match mcclellan view */ + if (s_gpr == sheng_limit) { + s_gpr = 0; + } else if (s_gpr < sheng_limit) { + s_gpr++; + } + + *c_inout = c; + return s_gpr; +} + +static really_inline +const char *findShermanState(UNUSED const struct mcsheng *m, + const char *sherman_base_offset, u32 sherman_base, + u32 s) { + const char *rv + = sherman_base_offset + SHERMAN_FIXED_SIZE * (s - sherman_base); + assert(rv < (const char *)m + m->length - sizeof(struct NFA)); + UNUSED u8 type = *(const u8 *)(rv + SHERMAN_TYPE_OFFSET); + assert(type == SHERMAN_STATE); + return rv; +} + +static really_inline +const u8 *run_mcsheng_accel(const struct mcsheng *m, + const struct mstate_aux *aux, u32 s, + const u8 **min_accel_offset, + const u8 *c, const u8 *c_end) { + DEBUG_PRINTF("skipping\n"); + u32 accel_offset = aux[s].accel_offset; + + assert(aux[s].accel_offset); + assert(accel_offset >= m->aux_offset); + assert(!m->sherman_offset || accel_offset < m->sherman_offset); + + const union AccelAux *aaux = (const void *)((const char *)m + accel_offset); + const u8 *c2 = run_accel(aaux, c, c_end); + + if (c2 < *min_accel_offset + BAD_ACCEL_DIST) { + *min_accel_offset = c2 + BIG_ACCEL_PENALTY; + } else { + *min_accel_offset = c2 + SMALL_ACCEL_PENALTY; + } + + if (*min_accel_offset >= c_end - ACCEL_MIN_LEN) { + *min_accel_offset = c_end; + } + + DEBUG_PRINTF("advanced %zd, next accel chance in %zd/%zd\n", + c2 - c, *min_accel_offset - c2, c_end - c2); + + return c2; +} + +static really_inline +u32 doNormal16(const struct mcsheng *m, const u8 **c_inout, const u8 *end, + u32 s, char do_accel, enum MatchMode mode) { + const u8 *c = *c_inout; + + const u16 *succ_table + = (const u16 *)((const char *)m + sizeof(struct mcsheng)); + assert(ISALIGNED_N(succ_table, 2)); + u32 sheng_end = m->sheng_end; + u32 sherman_base = m->sherman_limit; + const char *sherman_base_offset + = (const char *)m - sizeof(struct NFA) + m->sherman_offset; + u32 as = m->alphaShift; + + /* Adjust start of succ table so we can index into using state id (rather + * than adjust to normal id). As we will not be processing states with low + * state ids, we will not be accessing data before the succ table. Note: due + * to the size of the sheng tables, the succ_table pointer will still be + * inside the engine.*/ + succ_table -= sheng_end << as; + + s &= STATE_MASK; + + while (c < end && s >= sheng_end) { + u8 cprime = m->remap[*c]; + DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx (s=%u)\n", *c, + ourisprint(*c) ? *c : '?', cprime, s); + if (s < sherman_base) { + DEBUG_PRINTF("doing normal\n"); + assert(s < m->state_count); + s = succ_table[(s << as) + cprime]; + } else { + const char *sherman_state + = findShermanState(m, sherman_base_offset, sherman_base, s); + DEBUG_PRINTF("doing sherman (%u)\n", s); + s = doSherman16(sherman_state, cprime, succ_table, as); + } + + DEBUG_PRINTF("s: %u (%u)\n", s, s & STATE_MASK); + c++; + + if (do_accel && (s & ACCEL_FLAG)) { + break; + } + if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) { + break; + } + + s &= STATE_MASK; + } + + *c_inout = c; + return s; +} + +static really_inline +char mcshengExec16_i(const struct mcsheng *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **c_final, enum MatchMode mode) { + assert(ISALIGNED_N(state, 2)); + if (!len) { + if (mode == STOP_AT_MATCH) { + *c_final = buf; + } + return MO_ALIVE; + } + + u32 s = *state; + const u8 *c = buf; + const u8 *c_end = buf + len; + const u8 sheng_end = m->sheng_end; + const struct mstate_aux *aux + = (const struct mstate_aux *)((const char *)m + m->aux_offset + - sizeof(struct NFA)); + + s &= STATE_MASK; + + u32 cached_accept_id = 0; + u32 cached_accept_state = 0; + + DEBUG_PRINTF("s: %u, len %zu\n", s, len); + + const u8 *min_accel_offset = c; + if (!m->has_accel || len < ACCEL_MIN_LEN) { + min_accel_offset = c_end; + goto without_accel; + } + + goto with_accel; + +without_accel: + do { + assert(c < min_accel_offset); + int do_accept; + if (!s) { + goto exit; + } else if (s < sheng_end) { + s = doSheng(m, &c, min_accel_offset, c_end, s, 0); + do_accept = mode != NO_MATCHES && get_aux(m, s)->accept; + } else { + s = doNormal16(m, &c, min_accel_offset, s, 0, mode); + + do_accept = mode != NO_MATCHES && (s & ACCEPT_FLAG); + } + + if (do_accept) { + if (mode == STOP_AT_MATCH) { + *state = s & STATE_MASK; + *c_final = c - 1; + return MO_MATCHES_PENDING; + } + + u64a loc = (c - 1) - buf + offAdj + 1; + + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { + return MO_DEAD; /* termination requested */ + } + } else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0, + &cached_accept_state, &cached_accept_id) + == MO_HALT_MATCHING) { + return MO_DEAD; + } + } + + assert(c <= c_end); /* sheng is fuzzy for min_accel_offset */ + } while (c < min_accel_offset); + + if (c == c_end) { + goto exit; + } + +with_accel: + do { + assert(c < c_end); + int do_accept; + + if (!s) { + goto exit; + } else if (s < sheng_end) { + if (s > m->sheng_accel_limit) { + c = run_mcsheng_accel(m, aux, s, &min_accel_offset, c, c_end); + if (c == c_end) { + goto exit; + } else { + goto without_accel; + } + } + s = doSheng(m, &c, c_end, c_end, s, 1); + do_accept = mode != NO_MATCHES && get_aux(m, s)->accept; + } else { + if (s & ACCEL_FLAG) { + DEBUG_PRINTF("skipping\n"); + s &= STATE_MASK; + c = run_mcsheng_accel(m, aux, s, &min_accel_offset, c, c_end); + if (c == c_end) { + goto exit; + } else { + goto without_accel; + } + } + + s = doNormal16(m, &c, c_end, s, 1, mode); + do_accept = mode != NO_MATCHES && (s & ACCEPT_FLAG); + } + + if (do_accept) { + if (mode == STOP_AT_MATCH) { + *state = s & STATE_MASK; + *c_final = c - 1; + return MO_MATCHES_PENDING; + } + + u64a loc = (c - 1) - buf + offAdj + 1; + + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { + return MO_DEAD; /* termination requested */ + } + } else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0, + &cached_accept_state, &cached_accept_id) + == MO_HALT_MATCHING) { + return MO_DEAD; + } + } + + assert(c <= c_end); + } while (c < c_end); + +exit: + s &= STATE_MASK; + + if (mode == STOP_AT_MATCH) { + *c_final = c_end; + } + *state = s; + + return MO_ALIVE; +} + +static never_inline +char mcshengExec16_i_cb(const struct mcsheng *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point) { + return mcshengExec16_i(m, state, buf, len, offAdj, cb, ctxt, single, + final_point, CALLBACK_OUTPUT); +} + +static never_inline +char mcshengExec16_i_sam(const struct mcsheng *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point) { + return mcshengExec16_i(m, state, buf, len, offAdj, cb, ctxt, single, + final_point, STOP_AT_MATCH); +} + +static never_inline +char mcshengExec16_i_nm(const struct mcsheng *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point) { + return mcshengExec16_i(m, state, buf, len, offAdj, cb, ctxt, single, + final_point, NO_MATCHES); +} + +static really_inline +char mcshengExec16_i_ni(const struct mcsheng *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point, + enum MatchMode mode) { + if (mode == CALLBACK_OUTPUT) { + return mcshengExec16_i_cb(m, state, buf, len, offAdj, cb, ctxt, + single, final_point); + } else if (mode == STOP_AT_MATCH) { + return mcshengExec16_i_sam(m, state, buf, len, offAdj, cb, ctxt, + single, final_point); + } else { + assert (mode == NO_MATCHES); + return mcshengExec16_i_nm(m, state, buf, len, offAdj, cb, ctxt, + single, final_point); + } +} + +static really_inline +u32 doNormal8(const struct mcsheng *m, const u8 **c_inout, const u8 *end, u32 s, + char do_accel, enum MatchMode mode) { + const u8 *c = *c_inout; + u32 sheng_end = m->sheng_end; + u32 accel_limit = m->accel_limit_8; + u32 accept_limit = m->accept_limit_8; + + const u32 as = m->alphaShift; + const u8 *succ_table = (const u8 *)((const char *)m + + sizeof(struct mcsheng)); + /* Adjust start of succ table so we can index into using state id (rather + * than adjust to normal id). As we will not be processing states with low + * state ids, we will not be accessing data before the succ table. Note: due + * to the size of the sheng tables, the succ_table pointer will still be + * inside the engine.*/ + succ_table -= sheng_end << as; + + assert(s >= sheng_end); + + while (c < end && s >= sheng_end) { + u8 cprime = m->remap[*c]; + DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx\n", *c, + ourisprint(*c) ? *c : '?', cprime); + s = succ_table[(s << as) + cprime]; + + DEBUG_PRINTF("s: %u\n", s); + c++; + if (do_accel) { + if (s >= accel_limit) { + break; + } + } else { + if (mode != NO_MATCHES && s >= accept_limit) { + break; + } + } + } + *c_inout = c; + return s; +} + +static really_inline +char mcshengExec8_i(const struct mcsheng *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **c_final, enum MatchMode mode) { + if (!len) { + *c_final = buf; + return MO_ALIVE; + } + u32 s = *state; + const u8 *c = buf; + const u8 *c_end = buf + len; + const u8 sheng_end = m->sheng_end; + + const struct mstate_aux *aux + = (const struct mstate_aux *)((const char *)m + m->aux_offset + - sizeof(struct NFA)); + u32 accept_limit = m->accept_limit_8; + + u32 cached_accept_id = 0; + u32 cached_accept_state = 0; + + DEBUG_PRINTF("accel %hu, accept %u\n", m->accel_limit_8, accept_limit); + + DEBUG_PRINTF("s: %u, len %zu\n", s, len); + + const u8 *min_accel_offset = c; + if (!m->has_accel || len < ACCEL_MIN_LEN) { + min_accel_offset = c_end; + goto without_accel; + } + + goto with_accel; + +without_accel: + do { + assert(c < min_accel_offset); + if (!s) { + goto exit; + } else if (s < sheng_end) { + s = doSheng(m, &c, min_accel_offset, c_end, s, 0); + } else { + s = doNormal8(m, &c, min_accel_offset, s, 0, mode); + assert(c <= min_accel_offset); + } + + if (mode != NO_MATCHES && s >= accept_limit) { + if (mode == STOP_AT_MATCH) { + DEBUG_PRINTF("match - pausing\n"); + *state = s; + *c_final = c - 1; + return MO_MATCHES_PENDING; + } + + u64a loc = (c - 1) - buf + offAdj + 1; + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { + return MO_DEAD; + } + } else if (doComplexReport(cb, ctxt, m, s, loc, 0, + &cached_accept_state, &cached_accept_id) + == MO_HALT_MATCHING) { + return MO_DEAD; + } + } + + assert(c <= c_end); /* sheng is fuzzy for min_accel_offset */ + } while (c < min_accel_offset); + + if (c == c_end) { + goto exit; + } + +with_accel: + do { + u32 accel_limit = m->accel_limit_8; + + assert(c < c_end); + if (!s) { + goto exit; + } else if (s < sheng_end) { + if (s > m->sheng_accel_limit) { + c = run_mcsheng_accel(m, aux, s, &min_accel_offset, c, c_end); + if (c == c_end) { + goto exit; + } else { + goto without_accel; + } + } + s = doSheng(m, &c, c_end, c_end, s, 1); + } else { + if (s >= accel_limit && aux[s].accel_offset) { + c = run_mcsheng_accel(m, aux, s, &min_accel_offset, c, c_end); + if (c == c_end) { + goto exit; + } else { + goto without_accel; + } + } + s = doNormal8(m, &c, c_end, s, 1, mode); + } + + if (mode != NO_MATCHES && s >= accept_limit) { + if (mode == STOP_AT_MATCH) { + DEBUG_PRINTF("match - pausing\n"); + *state = s; + *c_final = c - 1; + return MO_MATCHES_PENDING; + } + + u64a loc = (c - 1) - buf + offAdj + 1; + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { + return MO_DEAD; + } + } else if (doComplexReport(cb, ctxt, m, s, loc, 0, + &cached_accept_state, &cached_accept_id) + == MO_HALT_MATCHING) { + return MO_DEAD; + } + } + + assert(c <= c_end); + } while (c < c_end); + +exit: + *state = s; + if (mode == STOP_AT_MATCH) { + *c_final = c_end; + } + return MO_ALIVE; +} + +static never_inline +char mcshengExec8_i_cb(const struct mcsheng *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point) { + return mcshengExec8_i(m, state, buf, len, offAdj, cb, ctxt, single, + final_point, CALLBACK_OUTPUT); +} + +static never_inline +char mcshengExec8_i_sam(const struct mcsheng *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point) { + return mcshengExec8_i(m, state, buf, len, offAdj, cb, ctxt, single, + final_point, STOP_AT_MATCH); +} + +static never_inline +char mcshengExec8_i_nm(const struct mcsheng *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point) { + return mcshengExec8_i(m, state, buf, len, offAdj, cb, ctxt, single, + final_point, NO_MATCHES); +} + +static really_inline +char mcshengExec8_i_ni(const struct mcsheng *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point, + enum MatchMode mode) { + if (mode == CALLBACK_OUTPUT) { + return mcshengExec8_i_cb(m, state, buf, len, offAdj, cb, ctxt, single, + final_point); + } else if (mode == STOP_AT_MATCH) { + return mcshengExec8_i_sam(m, state, buf, len, offAdj, cb, ctxt, + single, final_point); + } else { + assert(mode == NO_MATCHES); + return mcshengExec8_i_nm(m, state, buf, len, offAdj, cb, ctxt, single, + final_point); + } +} + +static really_inline +char mcshengCheckEOD(const struct NFA *nfa, u32 s, u64a offset, + NfaCallback cb, void *ctxt) { + const struct mcsheng *m = getImplNfa(nfa); + const struct mstate_aux *aux = get_aux(m, s); + + if (!aux->accept_eod) { + return MO_CONTINUE_MATCHING; + } + return doComplexReport(cb, ctxt, m, s, offset, 1, NULL, NULL); +} + +static really_inline +char nfaExecMcSheng16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, + const u8 *hend, NfaCallback cb, void *context, + struct mq *q, char single, s64a end, + enum MatchMode mode) { + assert(n->type == MCSHENG_NFA_16); + const struct mcsheng *m = getImplNfa(n); + s64a sp; + + assert(ISALIGNED_N(q->state, 2)); + u32 s = *(u16 *)q->state; + + if (q->report_current) { + assert(s); + assert(get_aux(m, s)->accept); + + int rv; + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + rv = cb(0, q_cur_offset(q), m->arb_report, context); + } else { + u32 cached_accept_id = 0; + u32 cached_accept_state = 0; + + rv = doComplexReport(cb, context, m, s, q_cur_offset(q), 0, + &cached_accept_state, &cached_accept_id); + } + + q->report_current = 0; + + if (rv == MO_HALT_MATCHING) { + return MO_DEAD; + } + } + + sp = q_cur_loc(q); + q->cur++; + + const u8 *cur_buf = sp < 0 ? hend : buffer; + + assert(q->cur); + if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) { + DEBUG_PRINTF("this is as far as we go\n"); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = end; + *(u16 *)q->state = s; + return MO_ALIVE; + } + + while (1) { + assert(q->cur < q->end); + s64a ep = q->items[q->cur].location; + if (mode != NO_MATCHES) { + ep = MIN(ep, end); + } + + assert(ep >= sp); + + s64a local_ep = ep; + if (sp < 0) { + local_ep = MIN(0, ep); + } + + /* do main buffer region */ + const u8 *final_look; + char rv = mcshengExec16_i_ni(m, &s, cur_buf + sp, local_ep - sp, + offset + sp, cb, context, single, + &final_look, mode); + if (rv == MO_DEAD) { + *(u16 *)q->state = 0; + return MO_DEAD; + } + if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) { + DEBUG_PRINTF("this is as far as we go\n"); + DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf); + + assert(q->cur); + assert(final_look != cur_buf + local_ep); + + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = final_look - cur_buf + 1; /* due to + * early -1 */ + *(u16 *)q->state = s; + return MO_MATCHES_PENDING; + } + + assert(rv == MO_ALIVE); + assert(q->cur); + if (mode != NO_MATCHES && q->items[q->cur].location > end) { + DEBUG_PRINTF("this is as far as we go\n"); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = end; + *(u16 *)q->state = s; + return MO_ALIVE; + } + + sp = local_ep; + + if (sp == 0) { + cur_buf = buffer; + } + + if (sp != ep) { + continue; + } + + switch (q->items[q->cur].type) { + case MQE_TOP: + assert(sp + offset || !s); + if (sp + offset == 0) { + s = m->start_anchored; + break; + } + s = mcshengEnableStarts(m, s); + break; + case MQE_END: + *(u16 *)q->state = s; + q->cur++; + return s ? MO_ALIVE : MO_DEAD; + default: + assert(!"invalid queue event"); + } + + q->cur++; + } +} + +static really_inline +char nfaExecMcSheng8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, + const u8 *hend, NfaCallback cb, void *context, + struct mq *q, char single, s64a end, + enum MatchMode mode) { + assert(n->type == MCSHENG_NFA_8); + const struct mcsheng *m = getImplNfa(n); + s64a sp; + + u32 s = *(u8 *)q->state; + + if (q->report_current) { + assert(s); + assert(s >= m->accept_limit_8); + + int rv; + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + rv = cb(0, q_cur_offset(q), m->arb_report, context); + } else { + u32 cached_accept_id = 0; + u32 cached_accept_state = 0; + + rv = doComplexReport(cb, context, m, s, q_cur_offset(q), 0, + &cached_accept_state, &cached_accept_id); + } + + q->report_current = 0; + + if (rv == MO_HALT_MATCHING) { + return MO_DEAD; + } + } + + sp = q_cur_loc(q); + q->cur++; + + const u8 *cur_buf = sp < 0 ? hend : buffer; + + if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) { + DEBUG_PRINTF("this is as far as we go\n"); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = end; + *(u8 *)q->state = s; + return MO_ALIVE; + } + + while (1) { + DEBUG_PRINTF("%s @ %llu\n", q->items[q->cur].type == MQE_TOP ? "TOP" : + q->items[q->cur].type == MQE_END ? "END" : "???", + q->items[q->cur].location + offset); + assert(q->cur < q->end); + s64a ep = q->items[q->cur].location; + if (mode != NO_MATCHES) { + ep = MIN(ep, end); + } + + assert(ep >= sp); + + s64a local_ep = ep; + if (sp < 0) { + local_ep = MIN(0, ep); + } + + const u8 *final_look; + char rv = mcshengExec8_i_ni(m, &s, cur_buf + sp, local_ep - sp, + offset + sp, cb, context, single, + &final_look, mode); + if (rv == MO_HALT_MATCHING) { + *(u8 *)q->state = 0; + return MO_DEAD; + } + if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) { + DEBUG_PRINTF("this is as far as we go\n"); + DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf); + + assert(q->cur); + assert(final_look != cur_buf + local_ep); + + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = final_look - cur_buf + 1; /* due to + * early -1 */ + *(u8 *)q->state = s; + return MO_MATCHES_PENDING; + } + + assert(rv == MO_ALIVE); + assert(q->cur); + if (mode != NO_MATCHES && q->items[q->cur].location > end) { + DEBUG_PRINTF("this is as far as we go\n"); + assert(q->cur); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = end; + *(u8 *)q->state = s; + return MO_ALIVE; + } + + sp = local_ep; + + if (sp == 0) { + cur_buf = buffer; + } + + if (sp != ep) { + continue; + } + + switch (q->items[q->cur].type) { + case MQE_TOP: + assert(sp + offset || !s); + if (sp + offset == 0) { + s = (u8)m->start_anchored; + break; + } + s = mcshengEnableStarts(m, s); + break; + case MQE_END: + *(u8 *)q->state = s; + q->cur++; + return s ? MO_ALIVE : MO_DEAD; + default: + assert(!"invalid queue event"); + } + + q->cur++; + } +} + +char nfaExecMcSheng8_Q(const struct NFA *n, struct mq *q, s64a end) { + u64a offset = q->offset; + const u8 *buffer = q->buffer; + NfaCallback cb = q->cb; + void *context = q->context; + assert(n->type == MCSHENG_NFA_8); + const struct mcsheng *m = getImplNfa(n); + const u8 *hend = q->history + q->hlength; + + return nfaExecMcSheng8_Q2i(n, offset, buffer, hend, cb, context, q, + m->flags & MCSHENG_FLAG_SINGLE, end, + CALLBACK_OUTPUT); +} + +char nfaExecMcSheng16_Q(const struct NFA *n, struct mq *q, s64a end) { + u64a offset = q->offset; + const u8 *buffer = q->buffer; + NfaCallback cb = q->cb; + void *context = q->context; + assert(n->type == MCSHENG_NFA_16); + const struct mcsheng *m = getImplNfa(n); + const u8 *hend = q->history + q->hlength; + + return nfaExecMcSheng16_Q2i(n, offset, buffer, hend, cb, context, q, + m->flags & MCSHENG_FLAG_SINGLE, end, + CALLBACK_OUTPUT); +} + +char nfaExecMcSheng8_reportCurrent(const struct NFA *n, struct mq *q) { + const struct mcsheng *m = getImplNfa(n); + NfaCallback cb = q->cb; + void *ctxt = q->context; + u32 s = *(u8 *)q->state; + u8 single = m->flags & MCSHENG_FLAG_SINGLE; + u64a offset = q_cur_offset(q); + assert(q_cur_type(q) == MQE_START); + assert(s); + + if (s >= m->accept_limit_8) { + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + cb(0, offset, m->arb_report, ctxt); + } else { + u32 cached_accept_id = 0; + u32 cached_accept_state = 0; + + doComplexReport(cb, ctxt, m, s, offset, 0, &cached_accept_state, + &cached_accept_id); + } + } + + return 0; +} + +char nfaExecMcSheng16_reportCurrent(const struct NFA *n, struct mq *q) { + const struct mcsheng *m = getImplNfa(n); + NfaCallback cb = q->cb; + void *ctxt = q->context; + u32 s = *(u16 *)q->state; + const struct mstate_aux *aux = get_aux(m, s); + u8 single = m->flags & MCSHENG_FLAG_SINGLE; + u64a offset = q_cur_offset(q); + assert(q_cur_type(q) == MQE_START); + DEBUG_PRINTF("state %u\n", s); + assert(s); + + if (aux->accept) { + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + cb(0, offset, m->arb_report, ctxt); + } else { + u32 cached_accept_id = 0; + u32 cached_accept_state = 0; + + doComplexReport(cb, ctxt, m, s, offset, 0, &cached_accept_state, + &cached_accept_id); + } + } + + return 0; +} + +static +char mcshengHasAccept(const struct mcsheng *m, const struct mstate_aux *aux, + ReportID report) { + assert(m && aux); + + if (!aux->accept) { + return 0; + } + + const struct report_list *rl = (const struct report_list *) + ((const char *)m + aux->accept - sizeof(struct NFA)); + assert(ISALIGNED_N(rl, 4)); + + DEBUG_PRINTF("report list has %u entries\n", rl->count); + + for (u32 i = 0; i < rl->count; i++) { + if (rl->report[i] == report) { + return 1; + } + } + + return 0; +} + +char nfaExecMcSheng8_inAccept(const struct NFA *n, ReportID report, + struct mq *q) { + assert(n && q); + + const struct mcsheng *m = getImplNfa(n); + u8 s = *(u8 *)q->state; + DEBUG_PRINTF("checking accepts for %hhu\n", s); + + return mcshengHasAccept(m, get_aux(m, s), report); +} + +char nfaExecMcSheng8_inAnyAccept(const struct NFA *n, struct mq *q) { + assert(n && q); + + const struct mcsheng *m = getImplNfa(n); + u8 s = *(u8 *)q->state; + DEBUG_PRINTF("checking accepts for %hhu\n", s); + + return !!get_aux(m, s)->accept; +} + +char nfaExecMcSheng16_inAccept(const struct NFA *n, ReportID report, + struct mq *q) { + assert(n && q); + + const struct mcsheng *m = getImplNfa(n); + u16 s = *(u16 *)q->state; + DEBUG_PRINTF("checking accepts for %hu\n", s); + + return mcshengHasAccept(m, get_aux(m, s), report); +} + +char nfaExecMcSheng16_inAnyAccept(const struct NFA *n, struct mq *q) { + assert(n && q); + + const struct mcsheng *m = getImplNfa(n); + u16 s = *(u16 *)q->state; + DEBUG_PRINTF("checking accepts for %hu\n", s); + + return !!get_aux(m, s)->accept; +} + +char nfaExecMcSheng8_Q2(const struct NFA *n, struct mq *q, s64a end) { + u64a offset = q->offset; + const u8 *buffer = q->buffer; + NfaCallback cb = q->cb; + void *context = q->context; + assert(n->type == MCSHENG_NFA_8); + const struct mcsheng *m = getImplNfa(n); + const u8 *hend = q->history + q->hlength; + + return nfaExecMcSheng8_Q2i(n, offset, buffer, hend, cb, context, q, + m->flags & MCSHENG_FLAG_SINGLE, end, + STOP_AT_MATCH); +} + +char nfaExecMcSheng16_Q2(const struct NFA *n, struct mq *q, s64a end) { + u64a offset = q->offset; + const u8 *buffer = q->buffer; + NfaCallback cb = q->cb; + void *context = q->context; + assert(n->type == MCSHENG_NFA_16); + const struct mcsheng *m = getImplNfa(n); + const u8 *hend = q->history + q->hlength; + + return nfaExecMcSheng16_Q2i(n, offset, buffer, hend, cb, context, q, + m->flags & MCSHENG_FLAG_SINGLE, end, + STOP_AT_MATCH); +} + +char nfaExecMcSheng8_QR(const struct NFA *n, struct mq *q, ReportID report) { + u64a offset = q->offset; + const u8 *buffer = q->buffer; + NfaCallback cb = q->cb; + void *context = q->context; + assert(n->type == MCSHENG_NFA_8); + const struct mcsheng *m = getImplNfa(n); + const u8 *hend = q->history + q->hlength; + + char rv = nfaExecMcSheng8_Q2i(n, offset, buffer, hend, cb, context, q, + m->flags & MCSHENG_FLAG_SINGLE, 0 /* end */, + NO_MATCHES); + if (rv && nfaExecMcSheng8_inAccept(n, report, q)) { + return MO_MATCHES_PENDING; + } else { + return rv; + } +} + +char nfaExecMcSheng16_QR(const struct NFA *n, struct mq *q, ReportID report) { + u64a offset = q->offset; + const u8 *buffer = q->buffer; + NfaCallback cb = q->cb; + void *context = q->context; + assert(n->type == MCSHENG_NFA_16); + const struct mcsheng *m = getImplNfa(n); + const u8 *hend = q->history + q->hlength; + + char rv = nfaExecMcSheng16_Q2i(n, offset, buffer, hend, cb, context, q, + m->flags & MCSHENG_FLAG_SINGLE, 0 /* end */, + NO_MATCHES); + + if (rv && nfaExecMcSheng16_inAccept(n, report, q)) { + return MO_MATCHES_PENDING; + } else { + return rv; + } +} + +char nfaExecMcSheng8_initCompressedState(const struct NFA *nfa, u64a offset, + void *state, UNUSED u8 key) { + const struct mcsheng *m = getImplNfa(nfa); + u8 s = offset ? m->start_floating : m->start_anchored; + if (s) { + *(u8 *)state = s; + return 1; + } + return 0; +} + +char nfaExecMcSheng16_initCompressedState(const struct NFA *nfa, u64a offset, + void *state, UNUSED u8 key) { + const struct mcsheng *m = getImplNfa(nfa); + u16 s = offset ? m->start_floating : m->start_anchored; + if (s) { + unaligned_store_u16(state, s); + return 1; + } + return 0; +} + +char nfaExecMcSheng8_testEOD(const struct NFA *nfa, const char *state, + UNUSED const char *streamState, u64a offset, + NfaCallback callback, void *context) { + return mcshengCheckEOD(nfa, *(const u8 *)state, offset, callback, + context); +} + +char nfaExecMcSheng16_testEOD(const struct NFA *nfa, const char *state, + UNUSED const char *streamState, u64a offset, + NfaCallback callback, void *context) { + assert(ISALIGNED_N(state, 2)); + return mcshengCheckEOD(nfa, *(const u16 *)state, offset, callback, + context); +} + +char nfaExecMcSheng8_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) { + assert(nfa->scratchStateSize == 1); + *(u8 *)q->state = 0; + return 0; +} + +char nfaExecMcSheng16_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) { + assert(nfa->scratchStateSize == 2); + assert(ISALIGNED_N(q->state, 2)); + *(u16 *)q->state = 0; + return 0; +} + +char nfaExecMcSheng8_queueCompressState(UNUSED const struct NFA *nfa, + const struct mq *q, UNUSED s64a loc) { + void *dest = q->streamState; + const void *src = q->state; + assert(nfa->scratchStateSize == 1); + assert(nfa->streamStateSize == 1); + *(u8 *)dest = *(const u8 *)src; + return 0; +} + +char nfaExecMcSheng8_expandState(UNUSED const struct NFA *nfa, void *dest, + const void *src, UNUSED u64a offset, + UNUSED u8 key) { + assert(nfa->scratchStateSize == 1); + assert(nfa->streamStateSize == 1); + *(u8 *)dest = *(const u8 *)src; + return 0; +} + +char nfaExecMcSheng16_queueCompressState(UNUSED const struct NFA *nfa, + const struct mq *q, + UNUSED s64a loc) { + void *dest = q->streamState; + const void *src = q->state; + assert(nfa->scratchStateSize == 2); + assert(nfa->streamStateSize == 2); + assert(ISALIGNED_N(src, 2)); + unaligned_store_u16(dest, *(const u16 *)(src)); + return 0; +} + +char nfaExecMcSheng16_expandState(UNUSED const struct NFA *nfa, void *dest, + const void *src, UNUSED u64a offset, + UNUSED u8 key) { + assert(nfa->scratchStateSize == 2); + assert(nfa->streamStateSize == 2); + assert(ISALIGNED_N(dest, 2)); + *(u16 *)dest = unaligned_load_u16(src); + return 0; +} diff --git a/src/nfa/mcsheng.h b/src/nfa/mcsheng.h new file mode 100644 index 00000000..19fd6961 --- /dev/null +++ b/src/nfa/mcsheng.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MCSHENG_H +#define MCSHENG_H + +#include "callback.h" +#include "ue2common.h" + +struct mq; +struct NFA; + +/* 8-bit Sheng-McClellan hybrid */ + +char nfaExecMcSheng8_testEOD(const struct NFA *nfa, const char *state, + const char *streamState, u64a offset, + NfaCallback callback, void *context); +char nfaExecMcSheng8_Q(const struct NFA *n, struct mq *q, s64a end); +char nfaExecMcSheng8_Q2(const struct NFA *n, struct mq *q, s64a end); +char nfaExecMcSheng8_QR(const struct NFA *n, struct mq *q, ReportID report); +char nfaExecMcSheng8_reportCurrent(const struct NFA *n, struct mq *q); +char nfaExecMcSheng8_inAccept(const struct NFA *n, ReportID report, + struct mq *q); +char nfaExecMcSheng8_inAnyAccept(const struct NFA *n, struct mq *q); +char nfaExecMcSheng8_queueInitState(const struct NFA *n, struct mq *q); +char nfaExecMcSheng8_initCompressedState(const struct NFA *n, u64a offset, + void *state, u8 key); +char nfaExecMcSheng8_queueCompressState(const struct NFA *nfa, + const struct mq *q, s64a loc); +char nfaExecMcSheng8_expandState(const struct NFA *nfa, void *dest, + const void *src, u64a offset, u8 key); + +#define nfaExecMcSheng8_B_Reverse NFA_API_NO_IMPL +#define nfaExecMcSheng8_zombie_status NFA_API_ZOMBIE_NO_IMPL + +/* 16-bit Sheng-McClellan hybrid */ + +char nfaExecMcSheng16_testEOD(const struct NFA *nfa, const char *state, + const char *streamState, u64a offset, + NfaCallback callback, void *context); +char nfaExecMcSheng16_Q(const struct NFA *n, struct mq *q, s64a end); +char nfaExecMcSheng16_Q2(const struct NFA *n, struct mq *q, s64a end); +char nfaExecMcSheng16_QR(const struct NFA *n, struct mq *q, ReportID report); +char nfaExecMcSheng16_reportCurrent(const struct NFA *n, struct mq *q); +char nfaExecMcSheng16_inAccept(const struct NFA *n, ReportID report, + struct mq *q); +char nfaExecMcSheng16_inAnyAccept(const struct NFA *n, struct mq *q); +char nfaExecMcSheng16_queueInitState(const struct NFA *n, struct mq *q); +char nfaExecMcSheng16_initCompressedState(const struct NFA *n, u64a offset, + void *state, u8 key); +char nfaExecMcSheng16_queueCompressState(const struct NFA *nfa, + const struct mq *q, s64a loc); +char nfaExecMcSheng16_expandState(const struct NFA *nfa, void *dest, + const void *src, u64a offset, u8 key); + +#define nfaExecMcSheng16_B_Reverse NFA_API_NO_IMPL +#define nfaExecMcSheng16_zombie_status NFA_API_ZOMBIE_NO_IMPL + +#endif diff --git a/src/nfa/mcsheng_compile.cpp b/src/nfa/mcsheng_compile.cpp new file mode 100644 index 00000000..666c3b1d --- /dev/null +++ b/src/nfa/mcsheng_compile.cpp @@ -0,0 +1,1144 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "mcsheng_compile.h" + +#include "accel.h" +#include "accelcompile.h" +#include "grey.h" +#include "mcclellancompile.h" +#include "mcclellancompile_util.h" +#include "mcsheng_internal.h" +#include "nfa_internal.h" +#include "rdfa_graph.h" +#include "shufticompile.h" +#include "trufflecompile.h" +#include "ue2common.h" +#include "util/alloc.h" +#include "util/bitutils.h" +#include "util/charreach.h" +#include "util/compare.h" +#include "util/compile_context.h" +#include "util/container.h" +#include "util/graph.h" +#include "util/graph_range.h" +#include "util/make_unique.h" +#include "util/order_check.h" +#include "util/report_manager.h" +#include "util/ue2_containers.h" +#include "util/unaligned.h" +#include "util/verify_types.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +using namespace std; +using boost::adaptors::map_keys; + +namespace ue2 { + +namespace /* anon */ { + +#define MIN_SHENG_SIZE 6 +#define INVALID_SHENG_ID 255 + +struct dstate_extra { + u16 daddytaken = 0; + bool shermanState = false; + bool sheng_succ = false; + u8 sheng_id = INVALID_SHENG_ID; +}; + +struct dfa_info { + accel_dfa_build_strat &strat; + raw_dfa &raw; + vector &states; + vector extra; + const u16 alpha_size; /* including special symbols */ + const array &alpha_remap; + vector rev_alpha; + const u16 impl_alpha_size; + + u8 getAlphaShift() const; + + explicit dfa_info(accel_dfa_build_strat &s) + : strat(s), + raw(s.get_raw()), + states(raw.states), + extra(raw.states.size()), + alpha_size(raw.alpha_size), + alpha_remap(raw.alpha_remap), + impl_alpha_size(raw.getImplAlphaSize()) { + rev_alpha.resize(impl_alpha_size); + for (u32 i = 0; i < N_CHARS; i++) { + rev_alpha[alpha_remap[i]].set(i); + } + } + + dstate_id_t implId(dstate_id_t raw_id) const { + return states[raw_id].impl_id; + } + + bool is_sherman(dstate_id_t raw_id) const { + return extra[raw_id].shermanState; + } + + bool is_sheng(dstate_id_t raw_id) const { + return extra[raw_id].sheng_id != INVALID_SHENG_ID; + } + + bool is_sheng_succ(dstate_id_t raw_id) const { + return extra[raw_id].sheng_succ; + } + + /* states which use the normal transition/successor table */ + bool is_normal(dstate_id_t raw_id) const { + return raw_id != DEAD_STATE && !is_sheng(raw_id) && !is_sherman(raw_id); + } + size_t size(void) const { return states.size(); } +}; + +u8 dfa_info::getAlphaShift() const { + if (impl_alpha_size < 2) { + return 1; + } else { + /* log2 round up */ + return 32 - clz32(impl_alpha_size - 1); + } +} + +} // namespace + +static +mstate_aux *getAux(NFA *n, dstate_id_t i) { + mcsheng *m = (mcsheng *)getMutableImplNfa(n); + mstate_aux *aux_base = (mstate_aux *)((char *)n + m->aux_offset); + + mstate_aux *aux = aux_base + i; + assert((const char *)aux < (const char *)n + m->length); + return aux; +} + +static +void createShuffleMasks(mcsheng *m, const dfa_info &info, + dstate_id_t sheng_end, + const map &accel_escape_info) { + DEBUG_PRINTF("using first %hu states for a sheng\n", sheng_end); + assert(sheng_end > DEAD_STATE + 1); + assert(sheng_end <= sizeof(m128) + 1); + vector> masks; + masks.resize(info.alpha_size); + /* -1 to avoid wasting a slot as we do not include dead state */ + vector raw_ids; + raw_ids.resize(sheng_end - 1); + for (dstate_id_t s = DEAD_STATE + 1; s < info.states.size(); s++) { + assert(info.implId(s)); /* should not map to DEAD_STATE */ + if (info.is_sheng(s)) { + raw_ids[info.extra[s].sheng_id] = s; + } + } + for (u32 i = 0; i < info.alpha_size; i++) { + if (i == info.alpha_remap[TOP]) { + continue; + } + auto &mask = masks[i]; + assert(sizeof(mask) == sizeof(m128)); + mask.fill(0); + + for (dstate_id_t sheng_id = 0; sheng_id < sheng_end - 1; sheng_id++) { + dstate_id_t raw_id = raw_ids[sheng_id]; + dstate_id_t next_id = info.implId(info.states[raw_id].next[i]); + if (next_id == DEAD_STATE) { + next_id = sheng_end - 1; + } else if (next_id < sheng_end) { + next_id--; + } + DEBUG_PRINTF("%hu: %u->next %hu\n", sheng_id, i, next_id); + mask[sheng_id] = verify_u8(next_id); + } + } + for (u32 i = 0; i < N_CHARS; i++) { + assert(info.alpha_remap[i] != info.alpha_remap[TOP]); + m->sheng_masks[i] = loadu128(masks[info.alpha_remap[i]].data()); + } + m->sheng_end = sheng_end; + m->sheng_accel_limit = sheng_end - 1; + + for (dstate_id_t s : raw_ids) { + if (contains(accel_escape_info, s)) { + LIMIT_TO_AT_MOST(&m->sheng_accel_limit, info.extra[s].sheng_id); + } + } +} + +static +void populateBasicInfo(size_t state_size, const dfa_info &info, + u32 total_size, u32 aux_offset, u32 accel_offset, + u32 accel_count, ReportID arb, bool single, NFA *nfa) { + assert(state_size == sizeof(u16) || state_size == sizeof(u8)); + + nfa->length = total_size; + nfa->nPositions = info.states.size(); + + nfa->scratchStateSize = verify_u32(state_size); + nfa->streamStateSize = verify_u32(state_size); + + if (state_size == sizeof(u8)) { + nfa->type = MCSHENG_NFA_8; + } else { + nfa->type = MCSHENG_NFA_16; + } + + mcsheng *m = (mcsheng *)getMutableImplNfa(nfa); + for (u32 i = 0; i < 256; i++) { + m->remap[i] = verify_u8(info.alpha_remap[i]); + } + m->alphaShift = info.getAlphaShift(); + m->length = total_size; + m->aux_offset = aux_offset; + m->accel_offset = accel_offset; + m->arb_report = arb; + m->state_count = verify_u16(info.size()); + m->start_anchored = info.implId(info.raw.start_anchored); + m->start_floating = info.implId(info.raw.start_floating); + m->has_accel = accel_count ? 1 : 0; + + if (single) { + m->flags |= MCSHENG_FLAG_SINGLE; + } +} + +namespace { + +struct raw_report_list { + flat_set reports; + + raw_report_list(const flat_set &reports_in, + const ReportManager &rm, bool do_remap) { + if (do_remap) { + for (auto &id : reports_in) { + reports.insert(rm.getProgramOffset(id)); + } + } else { + reports = reports_in; + } + } + + bool operator<(const raw_report_list &b) const { + return reports < b.reports; + } +}; + +struct raw_report_info_impl : public raw_report_info { + vector rl; + u32 getReportListSize() const override; + size_t size() const override; + void fillReportLists(NFA *n, size_t base_offset, + std::vector &ro /* out */) const override; +}; +} + +u32 raw_report_info_impl::getReportListSize() const { + u32 rv = 0; + + for (const auto &reps : rl) { + rv += sizeof(report_list); + rv += sizeof(ReportID) * reps.reports.size(); + } + + return rv; +} + +size_t raw_report_info_impl::size() const { + return rl.size(); +} + +void raw_report_info_impl::fillReportLists(NFA *n, size_t base_offset, + vector &ro) const { + for (const auto &reps : rl) { + ro.push_back(base_offset); + + report_list *p = (report_list *)((char *)n + base_offset); + + u32 i = 0; + for (const ReportID report : reps.reports) { + p->report[i++] = report; + } + p->count = verify_u32(reps.reports.size()); + + base_offset += sizeof(report_list); + base_offset += sizeof(ReportID) * reps.reports.size(); + } +} + +static +void fillAccelOut(const map &accel_escape_info, + set *accel_states) { + for (dstate_id_t i : accel_escape_info | map_keys) { + accel_states->insert(i); + } +} + +static +size_t calcShermanRegionSize(const dfa_info &info) { + size_t rv = 0; + + for (size_t i = 0; i < info.size(); i++) { + if (info.is_sherman(i)) { + rv += SHERMAN_FIXED_SIZE; + } + } + + return ROUNDUP_16(rv); +} + +static +void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info, + const vector &reports, const vector &reports_eod, + const vector &reportOffsets) { + const dstate &raw_state = info.states[i]; + aux->accept = raw_state.reports.empty() ? 0 : reportOffsets[reports[i]]; + aux->accept_eod = raw_state.reports_eod.empty() ? 0 + : reportOffsets[reports_eod[i]]; + aux->top = info.implId(i ? raw_state.next[info.alpha_remap[TOP]] + : info.raw.start_floating); +} + +/* returns false on error */ +static +bool allocateImplId16(dfa_info &info, dstate_id_t sheng_end, + dstate_id_t *sherman_base) { + info.states[0].impl_id = 0; /* dead is always 0 */ + + vector norm; + vector sherm; + vector norm_sheng_succ; + vector sherm_sheng_succ; + + if (info.size() > (1 << 16)) { + DEBUG_PRINTF("too many states\n"); + *sherman_base = 0; + return false; + } + + for (u32 i = 1; i < info.size(); i++) { + if (info.is_sheng(i)) { + continue; /* sheng impl ids have already been allocated */ + } if (info.is_sherman(i)) { + if (info.is_sheng_succ(i)) { + sherm_sheng_succ.push_back(i); + } else { + sherm.push_back(i); + } + } else { + if (info.is_sheng_succ(i)) { + norm_sheng_succ.push_back(i); + } else { + norm.push_back(i); + } + } + } + + dstate_id_t next_norm = sheng_end; + for (dstate_id_t s : norm_sheng_succ) { + info.states[s].impl_id = next_norm++; + } + if (next_norm + norm.size() + sherm_sheng_succ.size() > UINT8_MAX) { + /* we need to give sheng_succs ids which fit into a u8 -- demote these + * to normal states */ + for (dstate_id_t s : sherm_sheng_succ) { + info.states[s].impl_id = next_norm++; + info.extra[s].shermanState = false; + } + sherm_sheng_succ.clear(); + } + for (dstate_id_t s : norm) { + info.states[s].impl_id = next_norm++; + } + + *sherman_base = next_norm; + dstate_id_t next_sherman = next_norm; + + for (dstate_id_t s : sherm_sheng_succ) { + info.states[s].impl_id = next_sherman++; + } + + for (dstate_id_t s : sherm) { + info.states[s].impl_id = next_sherman++; + } + + /* Check to see if we haven't over allocated our states */ + DEBUG_PRINTF("next sherman %u masked %u\n", next_sherman, + (dstate_id_t)(next_sherman & STATE_MASK)); + return (next_sherman - 1) == ((next_sherman - 1) & STATE_MASK); +} + +typedef RdfaGraph::vertex_descriptor RdfaVertex; + +static +bool mark_sheng_succs(const RdfaGraph &g, dfa_info &info, + const flat_set &sheng_states) { + u32 exit_count = 0; + + for (auto v : sheng_states) { + dstate_id_t s = g[v].index; + for (u32 i = 0; i != info.alpha_size; i++) { + if (i == info.alpha_remap[TOP]) { + continue; + } + dstate_id_t next = info.states[s].next[i]; + if (!next || info.is_sheng(next) || info.is_sheng_succ(next)) { + continue; + } + exit_count++; + info.extra[next].sheng_succ = true; + } + } + + if (exit_count + sheng_states.size() < UINT8_MAX) { + return true; + } else { + DEBUG_PRINTF("fail: unable to fit %u exits in byte", exit_count); + return false; + } +} + +static +CharReach get_edge_reach(dstate_id_t u, dstate_id_t v, const dfa_info &info) { + CharReach rv; + for (u32 i = 0; i < info.impl_alpha_size; i++) { + if (info.raw.states[u].next[i] == v) { + assert(info.rev_alpha[i].any()); + rv |= info.rev_alpha[i]; + } + } + assert(rv.any()); + return rv; +} + +#define MAX_SHENG_STATES 16 +#define MAX_SHENG_LEAKINESS 0.05 + +/** + * Returns the proportion of strings of length 'depth' which will leave the + * sheng region when starting at state 'u'. + */ +static +double leakiness(const RdfaGraph &g, dfa_info &info, + const flat_set &sheng_states, RdfaVertex u, + u32 depth, + unordered_map, double> &cache) { + double rv = 0; + if (contains(cache, make_pair(u, depth))) { + return cache[make_pair(u, depth)]; + } + for (RdfaVertex v : adjacent_vertices_range(u, g)) { + if (g[v].index == DEAD_STATE) { + continue; + } + double width = get_edge_reach(g[u].index, g[v].index, info).count(); + width /= N_CHARS; + + double weight; + if (!contains(sheng_states, v)) { + weight = 1; + } else if (depth > 1) { + weight = leakiness(g, info, sheng_states, v, depth - 1, cache); + } else { + continue; /* weight = 0 */ + } + rv += width * weight; + } + + cache[make_pair(u, depth)] = rv; + DEBUG_PRINTF("%zu [%u] q = %g\n", g[u].index, depth, rv); + return rv; +} + +/** + * Returns the proportion of 8 byte strings which will leave the sheng region + * when starting at state 'u'. + */ +static +double leakiness(const RdfaGraph &g, dfa_info &info, + const flat_set &sheng_states, RdfaVertex u) { + unordered_map, double> cache; + double rv = leakiness(g, info, sheng_states, u, 8, cache); + return rv; +} + +static +dstate_id_t find_sheng_states(dfa_info &info, + map &accel_escape_info) { + RdfaGraph g(info.raw); + auto cyclics = find_vertices_in_cycles(g); + + auto base_cyclic = RdfaGraph::null_vertex(); + for (const auto &v : cyclics) { + if (g[v].index == DEAD_STATE) { + continue; + } + DEBUG_PRINTF("considering cyclic %zu\n", g[v].index); + /* get an estimate of stickness of the cyclic: assume any edges from + * states with larger state ids are back edges */ + CharReach est_back_reach; + for (const auto &u : inv_adjacent_vertices_range(v, g)) { + if (g[u].index < g[v].index) { + continue; + } + est_back_reach |= get_edge_reach(g[u].index, g[v].index, info); + } + + if (est_back_reach.count() < 30) { + continue; + } + base_cyclic = v; + break; + } + if (!base_cyclic) { + return DEAD_STATE; + } + + flat_set sheng_states; + deque to_consider = { base_cyclic }; + flat_set considered = { DEAD_STATE }; + bool seen_back_edge = false; + while (!to_consider.empty() + && sheng_states.size() < MAX_SHENG_STATES) { + auto v = to_consider.front(); + to_consider.pop_front(); + if (!considered.insert(g[v].index).second) { + continue; + } + + assert(!contains(sheng_states, v)); + + if (generates_callbacks(info.raw.kind) + && !info.states[g[v].index].reports.empty()) { + /* cannot raise callbacks from sheng region */ + continue; + } + + sheng_states.insert(v); + for (const auto &t : adjacent_vertices_range(v, g)) { + if (!contains(considered, g[t].index)) { + to_consider.push_back(t); + } + if (t == base_cyclic) { + seen_back_edge = true; + } + } + } + + /* allocate normal ids */ + dstate_id_t sheng_end = DEAD_STATE + 1; + for (auto v : sheng_states) { + dstate_id_t s = g[v].index; + if (!contains(accel_escape_info, s)) { + info.states[s].impl_id = sheng_end++; + info.extra[s].sheng_id = info.states[s].impl_id - 1; + } + } + + /* allocate accel ids */ + for (auto v : sheng_states) { + dstate_id_t s = g[v].index; + if (contains(accel_escape_info, s)) { + assert(!info.states[s].impl_id); + info.states[s].impl_id = sheng_end++; + info.extra[s].sheng_id = info.states[s].impl_id - 1; + } + } + + if (sheng_states.size() < MIN_SHENG_SIZE) { + DEBUG_PRINTF("sheng region too small\n"); + return DEAD_STATE; + } + + if (!seen_back_edge) { + DEBUG_PRINTF("did not include cyclic\n"); + return DEAD_STATE; + } + + double leak = leakiness(g, info, sheng_states, base_cyclic); + if (leak > MAX_SHENG_LEAKINESS) { + DEBUG_PRINTF("too leaky (%g)\n", leak); + return DEAD_STATE; + } + + if (!mark_sheng_succs(g, info, sheng_states)) { + return DEAD_STATE; + } + + /* TODO: ensure sufficiently 'sticky' */ + /* TODO: check not all states accel */ + DEBUG_PRINTF("sheng_end = %hu\n", sheng_end); + return sheng_end; +} + +static +void fill_in_aux_info(NFA *nfa, const dfa_info &info, + const map &accel_escape_info, + u32 accel_offset, UNUSED u32 accel_end_offset, + const vector &reports, + const vector &reports_eod, + u32 report_base_offset, + const raw_report_info &ri) { + mcsheng *m = (mcsheng *)getMutableImplNfa(nfa); + + vector reportOffsets; + + ri.fillReportLists(nfa, report_base_offset, reportOffsets); + + for (u32 i = 0; i < info.size(); i++) { + u16 impl_id = info.implId(i); + mstate_aux *this_aux = getAux(nfa, impl_id); + + fillInAux(this_aux, i, info, reports, reports_eod, reportOffsets); + if (contains(accel_escape_info, i)) { + this_aux->accel_offset = accel_offset; + accel_offset += info.strat.accelSize(); + assert(accel_offset <= accel_end_offset); + assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); + info.strat.buildAccel(i, accel_escape_info.at(i), + (void *)((char *)m + this_aux->accel_offset)); + } + } +} + +static +u16 get_edge_flags(NFA *nfa, dstate_id_t target_impl_id) { + mstate_aux *aux = getAux(nfa, target_impl_id); + u16 flags = 0; + + if (aux->accept) { + flags |= ACCEPT_FLAG; + } + + if (aux->accel_offset) { + flags |= ACCEL_FLAG; + } + + return flags; +} + +static +void fill_in_succ_table_16(NFA *nfa, const dfa_info &info, + dstate_id_t sheng_end, + UNUSED dstate_id_t sherman_base) { + u16 *succ_table = (u16 *)((char *)nfa + sizeof(NFA) + sizeof(mcsheng)); + + u8 alphaShift = info.getAlphaShift(); + assert(alphaShift <= 8); + + for (size_t i = 0; i < info.size(); i++) { + if (!info.is_normal(i)) { + assert(info.implId(i) < sheng_end || info.is_sherman(i)); + continue; + } + + assert(info.implId(i) < sherman_base); + u16 normal_id = verify_u16(info.implId(i) - sheng_end); + + for (size_t s = 0; s < info.impl_alpha_size; s++) { + dstate_id_t raw_succ = info.states[i].next[s]; + u16 &entry = succ_table[(normal_id << alphaShift) + s]; + + entry = info.implId(raw_succ); + entry |= get_edge_flags(nfa, entry); + } + } +} + +#define MAX_SHERMAN_LIST_LEN 8 + +static +void addIfEarlier(set &dest, dstate_id_t candidate, + dstate_id_t max) { + if (candidate < max) { + dest.insert(candidate); + } +} + +static +void addSuccessors(set &dest, const dstate &source, + u16 alphasize, dstate_id_t curr_id) { + for (symbol_t s = 0; s < alphasize; s++) { + addIfEarlier(dest, source.next[s], curr_id); + } +} + +#define MAX_SHERMAN_SELF_LOOP 20 + +static +void find_better_daddy(dfa_info &info, dstate_id_t curr_id, + bool any_cyclic_near_anchored_state, const Grey &grey) { + if (!grey.allowShermanStates) { + return; + } + + const u16 width = sizeof(u16); + const u16 alphasize = info.impl_alpha_size; + + if (info.raw.start_anchored != DEAD_STATE + && any_cyclic_near_anchored_state + && curr_id < alphasize * 3) { + /* crude attempt to prevent frequent states from being sherman'ed + * depends on the fact that states are numbers are currently in bfs + * order */ + DEBUG_PRINTF("%hu is banned\n", curr_id); + return; + } + + if (info.raw.start_floating != DEAD_STATE + && curr_id >= info.raw.start_floating + && curr_id < info.raw.start_floating + alphasize * 3) { + /* crude attempt to prevent frequent states from being sherman'ed + * depends on the fact that states are numbers are currently in bfs + * order */ + DEBUG_PRINTF("%hu is banned (%hu)\n", curr_id, info.raw.start_floating); + return; + } + + const u16 full_state_size = width * alphasize; + const u16 max_list_len = MIN(MAX_SHERMAN_LIST_LEN, + (full_state_size - 2)/(width + 1)); + u16 best_score = 0; + dstate_id_t best_daddy = 0; + dstate &currState = info.states[curr_id]; + + set hinted; /* set of states to search for a better daddy */ + addIfEarlier(hinted, 0, curr_id); + addIfEarlier(hinted, info.raw.start_anchored, curr_id); + addIfEarlier(hinted, info.raw.start_floating, curr_id); + + dstate_id_t mydaddy = currState.daddy; + if (mydaddy) { + addIfEarlier(hinted, mydaddy, curr_id); + addSuccessors(hinted, info.states[mydaddy], alphasize, curr_id); + dstate_id_t mygranddaddy = info.states[mydaddy].daddy; + if (mygranddaddy) { + addIfEarlier(hinted, mygranddaddy, curr_id); + addSuccessors(hinted, info.states[mygranddaddy], alphasize, + curr_id); + } + } + + for (const dstate_id_t &donor : hinted) { + assert(donor < curr_id); + u32 score = 0; + + if (!info.is_normal(donor)) { + continue; + } + + const dstate &donorState = info.states[donor]; + for (symbol_t s = 0; s < alphasize; s++) { + if (currState.next[s] == donorState.next[s]) { + score++; + } + } + + /* prefer lower ids to provide some stability amongst potential + * siblings */ + if (score > best_score || (score == best_score && donor < best_daddy)) { + best_daddy = donor; + best_score = score; + + if (score == alphasize) { + break; + } + } + } + + currState.daddy = best_daddy; + info.extra[curr_id].daddytaken = best_score; + DEBUG_PRINTF("%hu -> daddy %hu: %u/%u BF\n", curr_id, best_daddy, + best_score, alphasize); + + if (best_daddy == DEAD_STATE) { + return; /* No good daddy */ + } + + if (best_score + max_list_len < alphasize) { + return; /* ??? */ + } + + assert(info.is_normal(currState.daddy)); + + u32 self_loop_width = 0; + const dstate curr_raw = info.states[curr_id]; + for (unsigned i = 0; i < N_CHARS; i++) { + if (curr_raw.next[info.alpha_remap[i]] == curr_id) { + self_loop_width++; + } + } + + if (self_loop_width > MAX_SHERMAN_SELF_LOOP) { + DEBUG_PRINTF("%hu is banned wide self loop (%u)\n", curr_id, + self_loop_width); + return; + } + + if (info.is_sheng(curr_id)) { + return; + } + + DEBUG_PRINTF("%hu is sherman\n", curr_id); + info.extra[curr_id].shermanState = true; +} + +static +bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) { + symbol_t alphasize = raw.getImplAlphaSize(); + for (symbol_t s = 0; s < alphasize; s++) { + dstate_id_t succ_id = raw.states[root].next[s]; + if (succ_id == DEAD_STATE) { + continue; + } + + const dstate &succ = raw.states[succ_id]; + for (symbol_t t = 0; t < alphasize; t++) { + if (succ.next[t] == root || succ.next[t] == succ_id) { + return true; + } + } + } + return false; +} + +static +void fill_in_sherman(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) { + char *nfa_base = (char *)nfa; + mcsheng *m = (mcsheng *)getMutableImplNfa(nfa); + char *sherman_table = nfa_base + m->sherman_offset; + + assert(ISALIGNED_16(sherman_table)); + for (size_t i = 0; i < info.size(); i++) { + if (!info.is_sherman(i)) { + continue; + } + u16 fs = verify_u16(info.implId(i)); + DEBUG_PRINTF("building sherman %zu impl %hu\n", i, fs); + + assert(fs >= sherman_limit); + + char *curr_sherman_entry + = sherman_table + (fs - m->sherman_limit) * SHERMAN_FIXED_SIZE; + assert(curr_sherman_entry <= nfa_base + m->length); + + u8 len = verify_u8(info.impl_alpha_size - info.extra[i].daddytaken); + assert(len <= 9); + dstate_id_t d = info.states[i].daddy; + + *(u8 *)(curr_sherman_entry + SHERMAN_TYPE_OFFSET) = SHERMAN_STATE; + *(u8 *)(curr_sherman_entry + SHERMAN_LEN_OFFSET) = len; + *(u16 *)(curr_sherman_entry + SHERMAN_DADDY_OFFSET) = info.implId(d); + u8 *chars = (u8 *)(curr_sherman_entry + SHERMAN_CHARS_OFFSET); + + for (u16 s = 0; s < info.impl_alpha_size; s++) { + if (info.states[i].next[s] != info.states[d].next[s]) { + *(chars++) = (u8)s; + } + } + + u16 *states = (u16 *)(curr_sherman_entry + SHERMAN_STATES_OFFSET(len)); + for (u16 s = 0; s < info.impl_alpha_size; s++) { + if (info.states[i].next[s] != info.states[d].next[s]) { + DEBUG_PRINTF("s overrider %hu dad %hu char next %hu\n", fs, + info.implId(d), + info.implId(info.states[i].next[s])); + u16 entry_val = info.implId(info.states[i].next[s]); + entry_val |= get_edge_flags(nfa, entry_val); + unaligned_store_u16((u8 *)states++, entry_val); + } + } + } +} + +static +aligned_unique_ptr mcshengCompile16(dfa_info &info, dstate_id_t sheng_end, + const map &accel_escape_info, + const Grey &grey) { + DEBUG_PRINTF("building mcsheng 16\n"); + + vector reports; /* index in ri for the appropriate report list */ + vector reports_eod; /* as above */ + ReportID arb; + u8 single; + + assert(info.getAlphaShift() <= 8); + + u16 total_daddy = 0; + for (u32 i = 0; i < info.size(); i++) { + find_better_daddy(info, i, + is_cyclic_near(info.raw, info.raw.start_anchored), + grey); + total_daddy += info.extra[i].daddytaken; + } + + DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy, + info.size() * info.impl_alpha_size, info.size(), + info.impl_alpha_size); + + u16 sherman_limit; + if (!allocateImplId16(info, sheng_end, &sherman_limit)) { + DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n", + info.size()); + return nullptr; + } + u16 count_real_states = sherman_limit - sheng_end; + + auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); + + size_t tran_size = (1 << info.getAlphaShift()) * sizeof(u16) + * count_real_states; + + size_t aux_size = sizeof(mstate_aux) * info.size(); + + size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcsheng) + tran_size); + size_t accel_size = info.strat.accelSize() * accel_escape_info.size(); + size_t accel_offset = ROUNDUP_N(aux_offset + aux_size + + ri->getReportListSize(), 32); + size_t sherman_offset = ROUNDUP_16(accel_offset + accel_size); + size_t sherman_size = calcShermanRegionSize(info); + + size_t total_size = sherman_offset + sherman_size; + + accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */ + assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); + + aligned_unique_ptr nfa = aligned_zmalloc_unique(total_size); + mcsheng *m = (mcsheng *)getMutableImplNfa(nfa.get()); + + populateBasicInfo(sizeof(u16), info, total_size, aux_offset, accel_offset, + accel_escape_info.size(), arb, single, nfa.get()); + createShuffleMasks(m, info, sheng_end, accel_escape_info); + + /* copy in the mc header information */ + m->sherman_offset = sherman_offset; + m->sherman_end = total_size; + m->sherman_limit = sherman_limit; + + DEBUG_PRINTF("%hu sheng, %hu norm, %zu total\n", sheng_end, + count_real_states, info.size()); + + fill_in_aux_info(nfa.get(), info, accel_escape_info, accel_offset, + sherman_offset - sizeof(NFA), reports, reports_eod, + aux_offset + aux_size, *ri); + + fill_in_succ_table_16(nfa.get(), info, sheng_end, sherman_limit); + + fill_in_sherman(nfa.get(), info, sherman_limit); + + return nfa; +} + +static +void fill_in_succ_table_8(NFA *nfa, const dfa_info &info, + dstate_id_t sheng_end) { + u8 *succ_table = (u8 *)nfa + sizeof(NFA) + sizeof(mcsheng); + + u8 alphaShift = info.getAlphaShift(); + assert(alphaShift <= 8); + + for (size_t i = 0; i < info.size(); i++) { + assert(!info.is_sherman(i)); + if (!info.is_normal(i)) { + assert(info.implId(i) < sheng_end); + continue; + } + u8 normal_id = verify_u8(info.implId(i) - sheng_end); + + for (size_t s = 0; s < info.impl_alpha_size; s++) { + dstate_id_t raw_succ = info.states[i].next[s]; + succ_table[(normal_id << alphaShift) + s] = info.implId(raw_succ); + } + } +} + +static +void allocateImplId8(dfa_info &info, dstate_id_t sheng_end, + const map &accel_escape_info, + u16 *accel_limit, u16 *accept_limit) { + info.states[0].impl_id = 0; /* dead is always 0 */ + + vector norm; + vector accel; + vector accept; + + assert(info.size() <= (1 << 8)); + + for (u32 i = 1; i < info.size(); i++) { + if (info.is_sheng(i)) { + continue; /* already allocated */ + } else if (!info.states[i].reports.empty()) { + accept.push_back(i); + } else if (contains(accel_escape_info, i)) { + accel.push_back(i); + } else { + norm.push_back(i); + } + } + + u32 j = sheng_end; + for (const dstate_id_t &s : norm) { + assert(j <= 256); + DEBUG_PRINTF("mapping state %u to %u\n", s, j); + info.states[s].impl_id = j++; + } + *accel_limit = j; + for (const dstate_id_t &s : accel) { + assert(j <= 256); + DEBUG_PRINTF("mapping state %u to %u\n", s, j); + info.states[s].impl_id = j++; + } + *accept_limit = j; + for (const dstate_id_t &s : accept) { + assert(j <= 256); + DEBUG_PRINTF("mapping state %u to %u\n", s, j); + info.states[s].impl_id = j++; + } +} + +static +aligned_unique_ptr mcshengCompile8(dfa_info &info, dstate_id_t sheng_end, + const map &accel_escape_info) { + DEBUG_PRINTF("building mcsheng 8\n"); + + vector reports; + vector reports_eod; + ReportID arb; + u8 single; + + auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); + + size_t normal_count = info.size() - sheng_end; + + size_t tran_size = sizeof(u8) * (1 << info.getAlphaShift()) * normal_count; + size_t aux_size = sizeof(mstate_aux) * info.size(); + size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcsheng) + tran_size); + size_t accel_size = info.strat.accelSize() * accel_escape_info.size(); + size_t accel_offset = ROUNDUP_N(aux_offset + aux_size + + ri->getReportListSize(), 32); + size_t total_size = accel_offset + accel_size; + + DEBUG_PRINTF("aux_size %zu\n", aux_size); + DEBUG_PRINTF("aux_offset %zu\n", aux_offset); + DEBUG_PRINTF("rl size %u\n", ri->getReportListSize()); + DEBUG_PRINTF("accel_size %zu\n", accel_size); + DEBUG_PRINTF("accel_offset %zu\n", accel_offset); + DEBUG_PRINTF("total_size %zu\n", total_size); + + accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */ + assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); + + aligned_unique_ptr nfa = aligned_zmalloc_unique(total_size); + mcsheng *m = (mcsheng *)getMutableImplNfa(nfa.get()); + + allocateImplId8(info, sheng_end, accel_escape_info, &m->accel_limit_8, + &m->accept_limit_8); + + populateBasicInfo(sizeof(u8), info, total_size, aux_offset, accel_offset, + accel_escape_info.size(), arb, single, nfa.get()); + createShuffleMasks(m, info, sheng_end, accel_escape_info); + + fill_in_aux_info(nfa.get(), info, accel_escape_info, accel_offset, + total_size - sizeof(NFA), reports, reports_eod, + aux_offset + aux_size, *ri); + + fill_in_succ_table_8(nfa.get(), info, sheng_end); + + DEBUG_PRINTF("rl size %zu\n", ri->size()); + + return nfa; +} + +aligned_unique_ptr mcshengCompile(raw_dfa &raw, const CompileContext &cc, + const ReportManager &rm, + set *accel_states) { + if (!cc.grey.allowMcSheng) { + return nullptr; + } + + mcclellan_build_strat mbs(raw, rm); + dfa_info info(mbs); + bool using8bit = cc.grey.allowMcClellan8 && info.size() <= 256; + + if (!cc.streaming) { /* TODO: work out if we can do the strip in streaming + * mode with our semantics */ + raw.stripExtraEodReports(); + } + + bool has_eod_reports = raw.hasEodReports(); + + map accel_escape_info + = info.strat.getAccelInfo(cc.grey); + + dstate_id_t sheng_end = find_sheng_states(info, accel_escape_info); + if (sheng_end <= DEAD_STATE + 1) { + return nullptr; + } + + aligned_unique_ptr nfa; + if (!using8bit) { + nfa = mcshengCompile16(info, sheng_end, accel_escape_info, cc.grey); + } else { + nfa = mcshengCompile8(info, sheng_end, accel_escape_info); + } + + if (!nfa) { + return nfa; + } + + if (has_eod_reports) { + nfa->flags |= NFA_ACCEPTS_EOD; + } + + if (accel_states) { + fillAccelOut(accel_escape_info, accel_states); + } + + DEBUG_PRINTF("compile done\n"); + return nfa; +} + +bool has_accel_mcsheng(const NFA *) { + return true; /* consider the sheng region as accelerated */ +} + +} // namespace ue2 diff --git a/src/nfa/mcsheng_compile.h b/src/nfa/mcsheng_compile.h new file mode 100644 index 00000000..24cc66e9 --- /dev/null +++ b/src/nfa/mcsheng_compile.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MCSHENGCOMPILE_H +#define MCSHENGCOMPILE_H + +#include "accel_dfa_build_strat.h" +#include "rdfa.h" +#include "ue2common.h" +#include "util/alloc.h" +#include "util/ue2_containers.h" + +#include +#include + +struct NFA; + +namespace ue2 { + +class ReportManager; +struct CompileContext; + +/* accel_states: (optional) on success, is filled with the set of accelerable + * states */ +ue2::aligned_unique_ptr +mcshengCompile(raw_dfa &raw, const CompileContext &cc, + const ReportManager &rm, + std::set *accel_states = nullptr); + +bool has_accel_mcsheng(const NFA *nfa); + +} // namespace ue2 + +#endif diff --git a/src/nfa/mcsheng_data.c b/src/nfa/mcsheng_data.c new file mode 100644 index 00000000..eaf3cbbb --- /dev/null +++ b/src/nfa/mcsheng_data.c @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "mcsheng_internal.h" + +/* This table is in a separate translation unit from mcsheng.c as we want to + * prevent the compiler from seeing these constants. We have the load resources + * free at runtime to load the masks with no problems. */ +const u64a mcsheng_pext_mask[8] = { + 0, /* dummy */ + 0x000000000000ff0f, + 0x0000000000ff000f, + 0x00000000ff00000f, + 0x000000ff0000000f, + 0x0000ff000000000f, + 0x00ff00000000000f, + 0xff0000000000000f, +}; diff --git a/src/nfa/mcsheng_dump.cpp b/src/nfa/mcsheng_dump.cpp new file mode 100644 index 00000000..f5c058af --- /dev/null +++ b/src/nfa/mcsheng_dump.cpp @@ -0,0 +1,415 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include "mcsheng_dump.h" + +#include "accel.h" +#include "accel_dump.h" +#include "nfa_dump_internal.h" +#include "nfa_internal.h" +#include "mcsheng_internal.h" +#include "rdfa.h" +#include "ue2common.h" +#include "util/charreach.h" +#include "util/dump_charclass.h" +#include "util/dump_util.h" +#include "util/unaligned.h" + +#include +#include +#include +#include +#include + +#ifndef DUMP_SUPPORT +#error No dump support! +#endif + +using namespace std; + +namespace ue2 { + +static +const mstate_aux *getAux(const NFA *n, dstate_id_t i) { + auto *m = (const mcsheng *)getImplNfa(n); + auto *aux_base = (const mstate_aux *)((const char *)n + m->aux_offset); + + const mstate_aux *aux = aux_base + i; + + assert((const char *)aux < (const char *)n + m->length); + return aux; +} + +static +void next_states(const NFA *n, u16 s, u16 *t) { + const mcsheng *m = (const mcsheng *)getImplNfa(n); + const mstate_aux *aux = getAux(n, s); + const u32 as = m->alphaShift; + assert(s != DEAD_STATE); + + if (s < m->sheng_end) { + for (u16 c = 0; c < N_CHARS; c++) { + u8 sheng_s = s - 1; + auto trans_for_c = (const char *)&m->sheng_masks[c]; + assert(sheng_s < sizeof(m128)); + u8 raw_succ = trans_for_c[sheng_s]; + if (raw_succ == m->sheng_end - 1) { + t[c] = DEAD_STATE; + } else if (raw_succ < m->sheng_end) { + t[c] = raw_succ + 1; + } else { + t[c] = raw_succ; + } + } + } else if (n->type == MCSHENG_NFA_8) { + const u8 *succ_table = (const u8 *)((const char *)m + sizeof(mcsheng)); + for (u16 c = 0; c < N_CHARS; c++) { + u32 normal_id = s - m->sheng_end; + t[c] = succ_table[(normal_id << as) + m->remap[c]]; + } + } else { + u16 base_s = s; + const char *winfo_base = (const char *)n + m->sherman_offset; + const char *state_base + = winfo_base + SHERMAN_FIXED_SIZE * (s - m->sherman_limit); + + if (s >= m->sherman_limit) { + base_s = unaligned_load_u16(state_base + SHERMAN_DADDY_OFFSET); + assert(base_s >= m->sheng_end); + } + + const u16 *succ_table = (const u16 *)((const char *)m + + sizeof(mcsheng)); + for (u16 c = 0; c < N_CHARS; c++) { + u32 normal_id = base_s - m->sheng_end; + t[c] = succ_table[(normal_id << as) + m->remap[c]]; + } + + if (s >= m->sherman_limit) { + UNUSED char type = *(state_base + SHERMAN_TYPE_OFFSET); + assert(type == SHERMAN_STATE); + u8 len = *(const u8 *)(SHERMAN_LEN_OFFSET + state_base); + const char *chars = state_base + SHERMAN_CHARS_OFFSET; + const u16 *states = (const u16 *)(state_base + + SHERMAN_STATES_OFFSET(len)); + + for (u8 i = 0; i < len; i++) { + for (u16 c = 0; c < N_CHARS; c++) { + if (m->remap[c] == chars[i]) { + t[c] = unaligned_load_u16((const u8*)&states[i]); + } + } + } + } + + for (u16 c = 0; c < N_CHARS; c++) { + t[c] &= STATE_MASK; + } + + } + + t[TOP] = aux->top & STATE_MASK; +} + +static +void describeEdge(FILE *f, const mcsheng *m, const u16 *t, u16 i) { + for (u16 s = 0; s < N_CHARS; s++) { + if (!t[s]) { + continue; + } + + u16 ss; + for (ss = 0; ss < s; ss++) { + if (t[s] == t[ss]) { + break; + } + } + + if (ss != s) { + continue; + } + + CharReach reach; + for (ss = s; ss < 256; ss++) { + if (t[s] == t[ss]) { + reach.set(ss); + } + } + + fprintf(f, "%u -> %u [ ", i, t[s]); + if (i < m->sheng_end && t[s] < m->sheng_end) { + fprintf(f, "color = red, fontcolor = red "); + } + fprintf(f, "label = \""); + describeClass(f, reach, 5, CC_OUT_DOT); + + fprintf(f, "\" ];\n"); + } +} + +static +void dumpAccelDot(FILE *f, u16 i, const union AccelAux *accel) { + switch(accel->accel_type) { + case ACCEL_NONE: + break; + case ACCEL_VERM: + case ACCEL_VERM_NOCASE: + case ACCEL_DVERM: + case ACCEL_DVERM_NOCASE: + fprintf(f, "%u [ color = forestgreen style=diagonals];\n", i); + break; + case ACCEL_SHUFTI: + case ACCEL_DSHUFTI: + case ACCEL_TRUFFLE: + fprintf(f, "%u [ color = darkgreen style=diagonals ];\n", i); + break; + default: + fprintf(f, "%u [ color = yellow style=diagonals ];\n", i); + break; + } +} + +static +void describeNode(const NFA *n, const mcsheng *m, u16 i, FILE *f) { + const mstate_aux *aux = getAux(n, i); + + bool isSherman = m->sherman_limit && i >= m->sherman_limit; + + fprintf(f, "%u [ width = 1, fixedsize = true, fontsize = 12, " + "label = \"%u%s\" ]; \n", i, i, isSherman ? "w":""); + + if (aux->accel_offset) { + dumpAccelDot(f, i, (const union AccelAux *) + ((const char *)m + aux->accel_offset)); + } + + if (i && i < m->sheng_end) { + fprintf(f, "%u [color = red, fontcolor = red]; \n", i); + } + + if (aux->accept_eod) { + fprintf(f, "%u [ color = darkorchid ];\n", i); + } + + if (aux->accept) { + fprintf(f, "%u [ shape = doublecircle ];\n", i); + } + + if (aux->top && aux->top != i) { + fprintf(f, "%u -> %u [color = darkgoldenrod weight=0.1 ]\n", i, + aux->top); + } + + if (i == m->start_anchored) { + fprintf(f, "STARTA -> %u [color = blue ]\n", i); + } + + if (i == m->start_floating) { + fprintf(f, "STARTF -> %u [color = red ]\n", i); + } + + if (isSherman) { + const char *winfo_base = (const char *)n + m->sherman_offset; + const char *state_base + = winfo_base + SHERMAN_FIXED_SIZE * (i - m->sherman_limit); + assert(state_base < (const char *)m + m->length - sizeof(NFA)); + UNUSED u8 type = *(const u8 *)(state_base + SHERMAN_TYPE_OFFSET); + assert(type == SHERMAN_STATE); + fprintf(f, "%u [ fillcolor = lightblue style=filled ];\n", i); + u16 daddy = *(const u16 *)(state_base + SHERMAN_DADDY_OFFSET); + if (daddy) { + fprintf(f, "%u -> %u [ color=royalblue style=dashed weight=0.1]\n", + i, daddy); + } + } + + if (i && i < m->sheng_end) { + fprintf(f, "subgraph cluster_sheng { %u } \n", i); + } + +} + +static +void dumpDotPreambleDfa(FILE *f) { + dumpDotPreamble(f); + + // DFA specific additions. + fprintf(f, "STARTF [style=invis];\n"); + fprintf(f, "STARTA [style=invis];\n"); + fprintf(f, "0 [style=invis];\n"); + fprintf(f, "subgraph cluster_sheng { style = dashed }\n"); +} + +static +void dump_dot_16(const NFA *nfa, FILE *f) { + auto *m = (const mcsheng *)getImplNfa(nfa); + + dumpDotPreambleDfa(f); + + for (u16 i = 1; i < m->state_count; i++) { + describeNode(nfa, m, i, f); + + u16 t[ALPHABET_SIZE]; + + next_states(nfa, i, t); + + describeEdge(f, m, t, i); + } + + fprintf(f, "}\n"); +} + +static +void dump_dot_8(const NFA *nfa, FILE *f) { + auto m = (const mcsheng *)getImplNfa(nfa); + + dumpDotPreambleDfa(f); + + for (u16 i = 1; i < m->state_count; i++) { + describeNode(nfa, m, i, f); + + u16 t[ALPHABET_SIZE]; + + next_states(nfa, i, t); + + describeEdge(f, m, t, i); + } + + fprintf(f, "}\n"); +} + +static +void dumpAccelMasks(FILE *f, const mcsheng *m, const mstate_aux *aux) { + fprintf(f, "\n"); + fprintf(f, "Acceleration\n"); + fprintf(f, "------------\n"); + + for (u16 i = 0; i < m->state_count; i++) { + if (!aux[i].accel_offset) { + continue; + } + + auto accel = (const AccelAux *)((const char *)m + aux[i].accel_offset); + fprintf(f, "%05hu ", i); + dumpAccelInfo(f, *accel); + } +} + +static +void describeAlphabet(FILE *f, const mcsheng *m) { + map rev; + + for (u16 i = 0; i < N_CHARS; i++) { + rev[m->remap[i]].clear(); + } + + for (u16 i = 0; i < N_CHARS; i++) { + rev[m->remap[i]].set(i); + } + + map::const_iterator it; + fprintf(f, "\nAlphabet\n"); + for (it = rev.begin(); it != rev.end(); ++it) { + fprintf(f, "%3hhu: ", it->first); + describeClass(f, it->second, 10240, CC_OUT_TEXT); + fprintf(f, "\n"); + } + fprintf(f, "\n"); +} + +static +void dumpCommonHeader(FILE *f, const mcsheng *m) { + fprintf(f, "report: %u, states: %u, length: %u\n", m->arb_report, + m->state_count, m->length); + fprintf(f, "astart: %hu, fstart: %hu\n", m->start_anchored, + m->start_floating); + fprintf(f, "single accept: %d, has_accel: %d\n", + !!(int)m->flags & MCSHENG_FLAG_SINGLE, m->has_accel); + fprintf(f, "sheng_end: %hu\n", m->sheng_end); + fprintf(f, "sheng_accel_limit: %hu\n", m->sheng_accel_limit); +} + +static +void dump_text_16(const NFA *nfa, FILE *f) { + auto *m = (const mcsheng *)getImplNfa(nfa); + auto *aux = (const mstate_aux *)((const char *)nfa + m->aux_offset); + + fprintf(f, "mcsheng 16\n"); + dumpCommonHeader(f, m); + fprintf(f, "sherman_limit: %d, sherman_end: %d\n", (int)m->sherman_limit, + (int)m->sherman_end); + fprintf(f, "\n"); + + describeAlphabet(f, m); + dumpAccelMasks(f, m, aux); + + fprintf(f, "\n"); + dumpTextReverse(nfa, f); +} + +static +void dump_text_8(const NFA *nfa, FILE *f) { + auto m = (const mcsheng *)getImplNfa(nfa); + auto aux = (const mstate_aux *)((const char *)nfa + m->aux_offset); + + fprintf(f, "mcsheng 8\n"); + dumpCommonHeader(f, m); + fprintf(f, "accel_limit: %hu, accept_limit %hu\n", m->accel_limit_8, + m->accept_limit_8); + fprintf(f, "\n"); + + describeAlphabet(f, m); + dumpAccelMasks(f, m, aux); + + fprintf(f, "\n"); + dumpTextReverse(nfa, f); +} + +void nfaExecMcSheng16_dump(const NFA *nfa, const string &base) { + assert(nfa->type == MCSHENG_NFA_16); + FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); + dump_text_16(nfa, f); + fclose(f); + f = fopen_or_throw((base + ".dot").c_str(), "w"); + dump_dot_16(nfa, f); + fclose(f); +} + +void nfaExecMcSheng8_dump(const NFA *nfa, const string &base) { + assert(nfa->type == MCSHENG_NFA_8); + FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); + dump_text_8(nfa, f); + fclose(f); + f = fopen_or_throw((base + ".dot").c_str(), "w"); + dump_dot_8(nfa, f); + fclose(f); +} + +} // namespace ue2 diff --git a/src/nfa/mcsheng_dump.h b/src/nfa/mcsheng_dump.h new file mode 100644 index 00000000..1b699367 --- /dev/null +++ b/src/nfa/mcsheng_dump.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MCSHENG_DUMP_H +#define MCSHENG_DUMP_H + +#ifdef DUMP_SUPPORT + +#include "rdfa.h" + +#include +#include + +struct NFA; + +namespace ue2 { + +void nfaExecMcSheng8_dump(const struct NFA *nfa, const std::string &base); +void nfaExecMcSheng16_dump(const struct NFA *nfa, const std::string &base); + +} // namespace ue2 + +#endif // DUMP_SUPPORT + +#endif // MCSHENG_DUMP_H diff --git a/src/nfa/mcsheng_internal.h b/src/nfa/mcsheng_internal.h new file mode 100644 index 00000000..5ced6f76 --- /dev/null +++ b/src/nfa/mcsheng_internal.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MCSHENG_INTERNAL_H +#define MCSHENG_INTERNAL_H + +#include "nfa_internal.h" +#include "ue2common.h" +#include "util/simd_utils.h" + +#define ACCEPT_FLAG 0x8000 +#define ACCEL_FLAG 0x4000 +#define STATE_MASK 0x3fff + +#define SHERMAN_STATE 1 + +#define SHERMAN_TYPE_OFFSET 0 +#define SHERMAN_FIXED_SIZE 32 + +#define SHERMAN_LEN_OFFSET 1 +#define SHERMAN_DADDY_OFFSET 2 +#define SHERMAN_CHARS_OFFSET 4 +#define SHERMAN_STATES_OFFSET(sso_len) (4 + (sso_len)) + +struct report_list { + u32 count; + ReportID report[]; +}; + +struct mstate_aux { + u32 accept; + u32 accept_eod; + u16 top; + u32 accel_offset; /* relative to start of struct mcsheng; 0 if no accel */ +}; + +#define MCSHENG_FLAG_SINGLE 1 /**< we raise only single accept id */ + +struct mcsheng { + u16 state_count; /**< total number of states */ + u32 length; /**< length of dfa in bytes */ + u16 start_anchored; /**< anchored start state */ + u16 start_floating; /**< floating start state */ + u32 aux_offset; /**< offset of the aux structures relative to the start of + * the nfa structure */ + u32 sherman_offset; /**< offset of array of sherman state offsets the + * state_info structures relative to the start of the + * nfa structure */ + u32 sherman_end; /**< offset of the end of the state_info structures + * relative to the start of the nfa structure */ + u16 sheng_end; /**< first non-sheng state */ + u16 sheng_accel_limit; /**< first sheng accel state. state given in terms of + * internal sheng ids */ + u16 accel_limit_8; /**< 8 bit, lowest accelerable state */ + u16 accept_limit_8; /**< 8 bit, lowest accept state */ + u16 sherman_limit; /**< lowest sherman state */ + u8 alphaShift; + u8 flags; + u8 has_accel; /**< 1 iff there are any accel plans */ + u8 remap[256]; /**< remaps characters to a smaller alphabet */ + ReportID arb_report; /**< one of the accepts that this dfa may raise */ + u32 accel_offset; /**< offset of the accel structures from start of NFA */ + m128 sheng_masks[N_CHARS]; +}; + +/* pext masks for the runtime to access appropriately copies of bytes 1..7 + * representing the data from a u64a. */ +extern const u64a mcsheng_pext_mask[8]; + +#endif diff --git a/src/nfa/nfa_api_dispatch.c b/src/nfa/nfa_api_dispatch.c index d4e9eb78..f4b7552e 100644 --- a/src/nfa/nfa_api_dispatch.c +++ b/src/nfa/nfa_api_dispatch.c @@ -41,6 +41,7 @@ #include "lbr.h" #include "limex.h" #include "mcclellan.h" +#include "mcsheng.h" #include "mpv.h" #include "sheng.h" #include "tamarama.h" @@ -73,6 +74,8 @@ DISPATCH_CASE(CASTLE_NFA, Castle, dbnt_func); \ DISPATCH_CASE(SHENG_NFA, Sheng, dbnt_func); \ DISPATCH_CASE(TAMARAMA_NFA, Tamarama, dbnt_func); \ + DISPATCH_CASE(MCSHENG_NFA_8, McSheng8, dbnt_func); \ + DISPATCH_CASE(MCSHENG_NFA_16, McSheng16, dbnt_func); \ default: \ assert(0); \ } diff --git a/src/nfa/nfa_build_util.cpp b/src/nfa/nfa_build_util.cpp index 3b235bf4..3103cd29 100644 --- a/src/nfa/nfa_build_util.cpp +++ b/src/nfa/nfa_build_util.cpp @@ -30,6 +30,7 @@ #include "limex_internal.h" #include "mcclellancompile.h" +#include "mcsheng_compile.h" #include "shengcompile.h" #include "nfa_internal.h" #include "repeat_internal.h" @@ -413,6 +414,38 @@ const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = d const char *NFATraits::name = "Tamarama"; #endif +template<> struct NFATraits { + UNUSED static const char *name; + static const NFACategory category = NFA_OTHER; + static const u32 stateAlign = 1; + static const bool fast = true; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; +}; +const nfa_dispatch_fn NFATraits::has_accel = has_accel_mcsheng; +const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; +#if defined(DUMP_SUPPORT) +const char *NFATraits::name = "Shengy McShengFace 8"; +#endif + +template<> struct NFATraits { + UNUSED static const char *name; + static const NFACategory category = NFA_OTHER; + static const u32 stateAlign = 2; + static const bool fast = true; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; +}; +const nfa_dispatch_fn NFATraits::has_accel = has_accel_mcsheng; +const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; +#if defined(DUMP_SUPPORT) +const char *NFATraits::name = "Shengy McShengFace 16"; +#endif + } // namespace #if defined(DUMP_SUPPORT) diff --git a/src/nfa/nfa_dump_dispatch.cpp b/src/nfa/nfa_dump_dispatch.cpp index 3dea5ef7..5607ed27 100644 --- a/src/nfa/nfa_dump_dispatch.cpp +++ b/src/nfa/nfa_dump_dispatch.cpp @@ -39,6 +39,7 @@ #include "lbr_dump.h" #include "limex.h" #include "mcclellandump.h" +#include "mcsheng_dump.h" #include "mpv_dump.h" #include "shengdump.h" #include "tamarama_dump.h" @@ -78,6 +79,8 @@ namespace ue2 { DISPATCH_CASE(CASTLE_NFA, Castle, dbnt_func); \ DISPATCH_CASE(SHENG_NFA, Sheng, dbnt_func); \ DISPATCH_CASE(TAMARAMA_NFA, Tamarama, dbnt_func); \ + DISPATCH_CASE(MCSHENG_NFA_8, McSheng8, dbnt_func); \ + DISPATCH_CASE(MCSHENG_NFA_16, McSheng16, dbnt_func); \ default: \ assert(0); \ } diff --git a/src/nfa/nfa_internal.h b/src/nfa/nfa_internal.h index 1ce566ff..9d280822 100644 --- a/src/nfa/nfa_internal.h +++ b/src/nfa/nfa_internal.h @@ -70,6 +70,8 @@ enum NFAEngineType { CASTLE_NFA, /**< magic pseudo nfa */ SHENG_NFA, /**< magic pseudo nfa */ TAMARAMA_NFA, /**< magic nfa container */ + MCSHENG_NFA_8, /**< magic pseudo nfa */ + MCSHENG_NFA_16, /**< magic pseudo nfa */ /** \brief bogus NFA - not used */ INVALID_NFA }; @@ -143,6 +145,12 @@ static really_inline int isMcClellanType(u8 t) { return t == MCCLELLAN_NFA_8 || t == MCCLELLAN_NFA_16; } +/** \brief True if the given type (from NFA::type) is a Sheng-McClellan hybrid + * DFA. */ +static really_inline int isShengMcClellanType(u8 t) { + return t == MCSHENG_NFA_8 || t == MCSHENG_NFA_16; +} + /** \brief True if the given type (from NFA::type) is a Gough DFA. */ static really_inline int isGoughType(u8 t) { return t == GOUGH_NFA_8 || t == GOUGH_NFA_16; @@ -158,7 +166,16 @@ static really_inline int isShengType(u8 t) { * Sheng DFA. */ static really_inline int isDfaType(u8 t) { - return isMcClellanType(t) || isGoughType(t) || isShengType(t); + return isMcClellanType(t) || isGoughType(t) || isShengType(t) + || isShengMcClellanType(t); +} + +static really_inline int isBigDfaType(u8 t) { + return t == MCCLELLAN_NFA_16 || t == MCSHENG_NFA_16 || t == GOUGH_NFA_16; +} + +static really_inline int isSmallDfaType(u8 t) { + return isDfaType(t) && !isBigDfaType(t); } /** \brief True if the given type (from NFA::type) is an NFA. */ diff --git a/src/nfa/rdfa_graph.cpp b/src/nfa/rdfa_graph.cpp new file mode 100644 index 00000000..2467748b --- /dev/null +++ b/src/nfa/rdfa_graph.cpp @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +#include "rdfa_graph.h" + +#include "rdfa.h" +#include "util/container.h" + +#include + +using namespace std; + +namespace ue2 { + +RdfaGraph::RdfaGraph(const raw_dfa &rdfa) { + RdfaGraph &g = *this; + + vector verts; + verts.reserve(rdfa.states.size()); + for (dstate_id_t i = 0; i < rdfa.states.size(); i++) { + verts.push_back(add_vertex(g)); + assert(g[verts.back()].index == i); + } + + symbol_t symbol_end = rdfa.alpha_size - 1; + + flat_set local_succs; + for (dstate_id_t i = 0; i < rdfa.states.size(); i++) { + local_succs.clear(); + for (symbol_t s = 0; s < symbol_end; s++) { + dstate_id_t next = rdfa.states[i].next[s]; + if (contains(local_succs, next)) { + continue; + } + DEBUG_PRINTF("%hu->%hu\n", i, next); + add_edge(verts[i], verts[next], g); + local_succs.insert(next); + } + } +} + +} diff --git a/src/nfa/rdfa_graph.h b/src/nfa/rdfa_graph.h new file mode 100644 index 00000000..6d166c2f --- /dev/null +++ b/src/nfa/rdfa_graph.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef RDFA_GRAPH_H +#define RDFA_GRAPH_H + +#include "ue2common.h" +#include "util/ue2_graph.h" + +namespace ue2 { + +struct raw_dfa; + +struct RdfaVertexProps { + size_t index = 0; +}; + +struct RdfaEdgeProps { + size_t index = 0; +}; + +struct RdfaGraph : public ue2_graph { + RdfaGraph(const raw_dfa &rdfa); +}; + + +} + +#endif diff --git a/src/nfa/shengcompile.cpp b/src/nfa/shengcompile.cpp index 3902dbaf..a02a9b96 100644 --- a/src/nfa/shengcompile.cpp +++ b/src/nfa/shengcompile.cpp @@ -447,9 +447,8 @@ void createShuffleMasks(sheng *s, dfa_info &info, } } -bool has_accel_sheng(const NFA *nfa) { - const sheng *s = (const sheng *)getImplNfa(nfa); - return s->flags & SHENG_FLAG_HAS_ACCEL; +bool has_accel_sheng(const NFA *) { + return true; /* consider the sheng region as accelerated */ } aligned_unique_ptr shengCompile(raw_dfa &raw, diff --git a/src/nfagraph/ng_util.cpp b/src/nfagraph/ng_util.cpp index 948cd7f1..5252eb18 100644 --- a/src/nfagraph/ng_util.cpp +++ b/src/nfagraph/ng_util.cpp @@ -46,7 +46,6 @@ #include #include #include -#include #include #include @@ -54,7 +53,6 @@ using namespace std; using boost::default_color_type; using boost::make_filtered_graph; using boost::make_assoc_property_map; -using boost::adaptors::map_values; namespace ue2 { @@ -257,38 +255,6 @@ bool hasBigCycles(const NGHolder &g) { return false; } -set findVerticesInCycles(const NGHolder &g) { - map comp_map; - - strong_components(g, make_assoc_property_map(comp_map)); - - map > comps; - - for (const auto &e : comp_map) { - comps[e.second].insert(e.first); - } - - - set rv; - - for (const auto &comp : comps | map_values) { - /* every vertex in a strongly connected component is reachable from - * every other vertex in the component. A vertex is involved in a cycle - * therefore if it is in a strongly connected component with more than - * one vertex or if it is the only vertex and it has a self loop. */ - assert(!comp.empty()); - if (comp.size() > 1) { - insert(&rv, comp); - } - NFAVertex v = *comp.begin(); - if (hasSelfLoop(v, g)) { - rv.insert(v); - } - } - - return rv; -} - bool can_never_match(const NGHolder &g) { assert(edge(g.accept, g.acceptEod, g).second); if (in_degree(g.accept, g) == 0 && in_degree(g.acceptEod, g) == 1) { diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index f074973d..ef74619d 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -52,6 +52,7 @@ #include "nfa/goughcompile.h" #include "nfa/mcclellancompile.h" #include "nfa/mcclellancompile_util.h" +#include "nfa/mcsheng_compile.h" #include "nfa/nfa_api_queue.h" #include "nfa/nfa_build_util.h" #include "nfa/nfa_internal.h" @@ -615,7 +616,7 @@ aligned_unique_ptr pickImpl(aligned_unique_ptr dfa_impl, bool d_accel = has_accel(*dfa_impl); bool n_accel = has_accel(*nfa_impl); - bool d_big = dfa_impl->type == MCCLELLAN_NFA_16; + bool d_big = isBigDfaType(dfa_impl->type); bool n_vsmall = nfa_impl->nPositions <= 32; bool n_br = has_bounded_repeats(*nfa_impl); DEBUG_PRINTF("da %d na %d db %d nvs %d nbr %d\n", (int)d_accel, @@ -666,10 +667,17 @@ buildRepeatEngine(const CastleProto &proto, } static -aligned_unique_ptr getDfa(raw_dfa &rdfa, const CompileContext &cc, +aligned_unique_ptr getDfa(raw_dfa &rdfa, bool is_transient, + const CompileContext &cc, const ReportManager &rm) { // Unleash the Sheng!! auto dfa = shengCompile(rdfa, cc, rm); + if (!dfa && !is_transient) { + // Sheng wasn't successful, so unleash McClellan! + /* We don't try the hybrid for transient prefixes due to the extra + * bytecode and that they are usually run on small blocks */ + dfa = mcshengCompile(rdfa, cc, rm); + } if (!dfa) { // Sheng wasn't successful, so unleash McClellan! dfa = mcclellanCompile(rdfa, cc, rm); @@ -697,7 +705,7 @@ buildSuffix(const ReportManager &rm, const SomSlotManager &ssm, } if (suff.dfa()) { - auto d = getDfa(*suff.dfa(), cc, rm); + auto d = getDfa(*suff.dfa(), false, cc, rm); assert(d); return d; } @@ -726,7 +734,7 @@ buildSuffix(const ReportManager &rm, const SomSlotManager &ssm, auto rdfa = buildMcClellan(holder, &rm, false, triggers.at(0), cc.grey); if (rdfa) { - auto d = getDfa(*rdfa, cc, rm); + auto d = getDfa(*rdfa, false, cc, rm); assert(d); if (cc.grey.roseMcClellanSuffix != 2) { n = pickImpl(move(d), move(n)); @@ -846,12 +854,12 @@ makeLeftNfa(const RoseBuildImpl &tbi, left_id &left, } if (left.dfa()) { - n = getDfa(*left.dfa(), cc, rm); + n = getDfa(*left.dfa(), is_transient, cc, rm); } else if (left.graph() && cc.grey.roseMcClellanPrefix == 2 && is_prefix && !is_transient) { auto rdfa = buildMcClellan(*left.graph(), nullptr, cc.grey); if (rdfa) { - n = getDfa(*rdfa, cc, rm); + n = getDfa(*rdfa, is_transient, cc, rm); assert(n); } } @@ -878,7 +886,7 @@ makeLeftNfa(const RoseBuildImpl &tbi, left_id &left, && (!n || !has_bounded_repeats_other_than_firsts(*n) || !is_fast(*n))) { auto rdfa = buildMcClellan(*left.graph(), nullptr, cc.grey); if (rdfa) { - auto d = getDfa(*rdfa, cc, rm); + auto d = getDfa(*rdfa, is_transient, cc, rm); assert(d); n = pickImpl(move(d), move(n)); } @@ -1614,7 +1622,7 @@ public: aligned_unique_ptr operator()(unique_ptr &rdfa) const { // Unleash the mighty DFA! - return getDfa(*rdfa, build.cc, build.rm); + return getDfa(*rdfa, false, build.cc, build.rm); } aligned_unique_ptr operator()(unique_ptr &haig) const { @@ -1642,7 +1650,7 @@ public: !has_bounded_repeats_other_than_firsts(*n)) { auto rdfa = buildMcClellan(h, &rm, cc.grey); if (rdfa) { - auto d = getDfa(*rdfa, cc, rm); + auto d = getDfa(*rdfa, false, cc, rm); if (d) { n = pickImpl(move(d), move(n)); } diff --git a/src/rose/rose_build_infix.cpp b/src/rose/rose_build_infix.cpp index f3e7680f..4bbb3525 100644 --- a/src/rose/rose_build_infix.cpp +++ b/src/rose/rose_build_infix.cpp @@ -278,7 +278,7 @@ void findCountingMiracleInfo(const left_id &left, const vector &stopTable, const NGHolder &g = *left.graph(); - auto cyclics = findVerticesInCycles(g); + auto cyclics = find_vertices_in_cycles(g); if (!proper_out_degree(g.startDs, g)) { cyclics.erase(g.startDs); diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index 50ca1d9e..28b885bd 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -1206,7 +1206,7 @@ u32 roseQuality(const RoseEngine *t) { } const NFA *nfa = (const NFA *)((const char *)atable + sizeof(*atable)); - if (nfa->type != MCCLELLAN_NFA_8) { + if (!isSmallDfaType(nfa->type)) { DEBUG_PRINTF("m16 atable engine\n"); return 0; } diff --git a/src/util/bitutils.h b/src/util/bitutils.h index b7a09ca7..d144e879 100644 --- a/src/util/bitutils.h +++ b/src/util/bitutils.h @@ -471,4 +471,55 @@ u32 rank_in_mask64(u64a mask, u32 bit) { return popcount64(mask); } +#if defined(__BMI2__) || (defined(_WIN32) && defined(__AVX2__)) +#define HAVE_PEXT +#endif + +static really_inline +u32 pext32(u32 x, u32 mask) { +#if defined(HAVE_PEXT) + // Intel BMI2 can do this operation in one instruction. + return _pext_u32(x, mask); +#else + + u32 result = 0, num = 1; + while (mask != 0) { + u32 bit = findAndClearLSB_32(&mask); + if (x & (1U << bit)) { + assert(num != 0); // more than 32 bits! + result |= num; + } + num <<= 1; + } + return result; +#endif +} + +static really_inline +u64a pext64(u64a x, u64a mask) { +#if defined(HAVE_PEXT) && defined(ARCH_64_BIT) + // Intel BMI2 can do this operation in one instruction. + return _pext_u64(x, mask); +#else + + u32 result = 0, num = 1; + while (mask != 0) { + u32 bit = findAndClearLSB_64(&mask); + if (x & (1ULL << bit)) { + assert(num != 0); // more than 32 bits! + result |= num; + } + num <<= 1; + } + return result; +#endif +} + +#if defined(HAVE_PEXT) && defined(ARCH_64_BIT) +static really_inline +u64a pdep64(u64a x, u64a mask) { + return _pdep_u64(x, mask); +} +#endif + #endif // BITUTILS_H diff --git a/src/util/graph.h b/src/util/graph.h index ae7c2c90..4c2876f1 100644 --- a/src/util/graph.h +++ b/src/util/graph.h @@ -39,8 +39,12 @@ #include "util/ue2_containers.h" #include +#include +#include #include +#include +#include #include #include @@ -140,6 +144,41 @@ void find_unreachable(const Graph &g, const SourceCont &sources, OutCont *out) { } } +template +ue2::flat_set +find_vertices_in_cycles(const Graph &g) { + using vertex_descriptor = typename Graph::vertex_descriptor; + + std::map comp_map; + + boost::strong_components(g, boost::make_assoc_property_map(comp_map)); + + std::map> comps; + + for (const auto &e : comp_map) { + comps[e.second].push_back(e.first); + } + + ue2::flat_set rv; + + for (const auto &comp : comps | boost::adaptors::map_values) { + /* every vertex in a strongly connected component is reachable from + * every other vertex in the component. A vertex is involved in a cycle + * therefore if it is in a strongly connected component with more than + * one vertex or if it is the only vertex and it has a self loop. */ + assert(!comp.empty()); + if (comp.size() > 1) { + insert(&rv, comp); + } + vertex_descriptor v = *comp.begin(); + if (hasSelfLoop(v, g)) { + rv.insert(v); + } + } + + return rv; +} + template bool has_parallel_edge(const Graph &g) { using vertex_descriptor = typename Graph::vertex_descriptor; diff --git a/src/util/simd_utils.h b/src/util/simd_utils.h index 35e1a390..e8676249 100644 --- a/src/util/simd_utils.h +++ b/src/util/simd_utils.h @@ -159,6 +159,10 @@ static really_inline m128 set16x8(u8 c) { return _mm_set1_epi8(c); } +static really_inline m128 set4x32(u32 c) { + return _mm_set1_epi32(c); +} + static really_inline u32 movd(const m128 in) { return _mm_cvtsi128_si32(in); } @@ -328,6 +332,25 @@ m128 variable_byte_shift_m128(m128 in, s32 amount) { return pshufb(in, shift_mask); } +static really_inline +m128 max_u8_m128(m128 a, m128 b) { + return _mm_max_epu8(a, b); +} + +static really_inline +m128 min_u8_m128(m128 a, m128 b) { + return _mm_min_epu8(a, b); +} + +static really_inline +m128 sadd_u8_m128(m128 a, m128 b) { + return _mm_adds_epu8(a, b); +} + +static really_inline +m128 sub_u8_m128(m128 a, m128 b) { + return _mm_sub_epi8(a, b); +} /**** **** 256-bit Primitives diff --git a/unit/internal/bitutils.cpp b/unit/internal/bitutils.cpp index 4d476932..31aaf17f 100644 --- a/unit/internal/bitutils.cpp +++ b/unit/internal/bitutils.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -436,3 +436,16 @@ TEST(BitUtils, rank_in_mask64) { ASSERT_EQ(15, rank_in_mask64(0xf0f0f0f0f0f0f0f0ULL, 31)); ASSERT_EQ(31, rank_in_mask64(0xf0f0f0f0f0f0f0f0ULL, 63)); } + +#if defined(HAVE_PEXT) && defined(ARCH_64_BIT) +TEST(BitUtils, pdep64) { + u64a data = 0xF123456789ABCDEF; + ASSERT_EQ(0xfULL, pdep64(data, 0xf)); + ASSERT_EQ(0xefULL, pdep64(data, 0xff)); + ASSERT_EQ(0xf0ULL, pdep64(data, 0xf0)); + ASSERT_EQ(0xfULL, pdep64(data, 0xf)); + ASSERT_EQ(0xef0ULL, pdep64(data, 0xff0)); + ASSERT_EQ(0xef00ULL, pdep64(data, 0xff00)); + ASSERT_EQ(0xd0e0f00ULL, pdep64(data, 0xf0f0f00)); +} +#endif diff --git a/unit/internal/nfagraph_util.cpp b/unit/internal/nfagraph_util.cpp index 135276dd..b6952f5a 100644 --- a/unit/internal/nfagraph_util.cpp +++ b/unit/internal/nfagraph_util.cpp @@ -320,9 +320,9 @@ TEST(NFAGraph, cyclicVerts1) { add_edge(a, b, g); add_edge(b, a, g); - auto cyclics = findVerticesInCycles(g); + auto cyclics = find_vertices_in_cycles(g); - ASSERT_EQ(set({g.startDs, a, b}), cyclics); + ASSERT_EQ(flat_set({g.startDs, a, b}), cyclics); } TEST(NFAGraph, cyclicVerts2) { @@ -341,9 +341,9 @@ TEST(NFAGraph, cyclicVerts2) { add_edge(c, d, g); add_edge(a, e, g); - auto cyclics = findVerticesInCycles(g); + auto cyclics = find_vertices_in_cycles(g); - ASSERT_EQ(set({g.startDs, a, b, c}), cyclics); + ASSERT_EQ(flat_set({g.startDs, a, b, c}), cyclics); } TEST(NFAGraph, cyclicVerts3) { @@ -369,9 +369,9 @@ TEST(NFAGraph, cyclicVerts3) { add_edge(f, h, g); add_edge(h, h, g); - auto cyclics = findVerticesInCycles(g); + auto cyclics = find_vertices_in_cycles(g); - ASSERT_EQ(set({g.startDs, a, b, c, d, e, h}), cyclics); + ASSERT_EQ(flat_set({g.startDs, a, b, c, d, e, h}), cyclics); } TEST(NFAGraph, cyclicVerts4) { @@ -396,9 +396,9 @@ TEST(NFAGraph, cyclicVerts4) { add_edge(e, f, g); add_edge(f, h, g); - auto cyclics = findVerticesInCycles(g); + auto cyclics = find_vertices_in_cycles(g); - ASSERT_EQ(set({g.startDs, a, b, c, d, e}), cyclics); + ASSERT_EQ(flat_set({g.startDs, a, b, c, d, e}), cyclics); } TEST(NFAGraph, cyclicVerts5) { @@ -418,7 +418,7 @@ TEST(NFAGraph, cyclicVerts5) { add_edge(c, d, g); add_edge(e, c, g); - auto cyclics = findVerticesInCycles(g); + auto cyclics = find_vertices_in_cycles(g); - ASSERT_EQ(set({g.startDs, b, c}), cyclics); + ASSERT_EQ(flat_set({g.startDs, b, c}), cyclics); } diff --git a/unit/internal/shuffle.cpp b/unit/internal/shuffle.cpp index 614b641d..a4632c36 100644 --- a/unit/internal/shuffle.cpp +++ b/unit/internal/shuffle.cpp @@ -54,14 +54,14 @@ TEST(Shuffle, PackedExtract32_1) { for (unsigned int i = 0; i < 32; i++) { // shuffle a single 1 bit to the front u32 mask = 1U << i; - EXPECT_EQ(1U, packedExtract32(mask, mask)); - EXPECT_EQ(1U, packedExtract32(~0U, mask)); + EXPECT_EQ(1U, pext32(mask, mask)); + EXPECT_EQ(1U, pext32(~0U, mask)); // we should get zero out of these cases - EXPECT_EQ(0U, packedExtract32(0, mask)); - EXPECT_EQ(0U, packedExtract32(~mask, mask)); + EXPECT_EQ(0U, pext32(0, mask)); + EXPECT_EQ(0U, pext32(~mask, mask)); // we should get zero out of all the other bit positions for (unsigned int j = 0; (j != i && j < 32); j++) { - EXPECT_EQ(0U, packedExtract32((1U << j), mask)); + EXPECT_EQ(0U, pext32((1U << j), mask)); } } } @@ -69,10 +69,10 @@ TEST(Shuffle, PackedExtract32_1) { TEST(Shuffle, PackedExtract32_2) { // All 32 bits in mask are on u32 mask = ~0U; - EXPECT_EQ(0U, packedExtract32(0, mask)); - EXPECT_EQ(mask, packedExtract32(mask, mask)); + EXPECT_EQ(0U, pext32(0, mask)); + EXPECT_EQ(mask, pext32(mask, mask)); for (unsigned int i = 0; i < 32; i++) { - EXPECT_EQ(1U << i, packedExtract32(1U << i, mask)); + EXPECT_EQ(1U << i, pext32(1U << i, mask)); } } @@ -84,16 +84,16 @@ TEST(Shuffle, PackedExtract32_3) { } // Test both cases (all even bits, all odd bits) - EXPECT_EQ((1U << 16) - 1, packedExtract32(mask, mask)); - EXPECT_EQ((1U << 16) - 1, packedExtract32(~mask, ~mask)); - EXPECT_EQ(0U, packedExtract32(~mask, mask)); - EXPECT_EQ(0U, packedExtract32(mask, ~mask)); + EXPECT_EQ((1U << 16) - 1, pext32(mask, mask)); + EXPECT_EQ((1U << 16) - 1, pext32(~mask, ~mask)); + EXPECT_EQ(0U, pext32(~mask, mask)); + EXPECT_EQ(0U, pext32(mask, ~mask)); for (unsigned int i = 0; i < 32; i += 2) { - EXPECT_EQ(1U << (i/2), packedExtract32(1U << i, mask)); - EXPECT_EQ(0U, packedExtract32(1U << i, ~mask)); - EXPECT_EQ(1U << (i/2), packedExtract32(1U << (i+1), ~mask)); - EXPECT_EQ(0U, packedExtract32(1U << (i+1), mask)); + EXPECT_EQ(1U << (i/2), pext32(1U << i, mask)); + EXPECT_EQ(0U, pext32(1U << i, ~mask)); + EXPECT_EQ(1U << (i/2), pext32(1U << (i+1), ~mask)); + EXPECT_EQ(0U, pext32(1U << (i+1), mask)); } } @@ -102,14 +102,14 @@ TEST(Shuffle, PackedExtract64_1) { for (unsigned int i = 0; i < 64; i++) { // shuffle a single 1 bit to the front u64a mask = 1ULL << i; - EXPECT_EQ(1U, packedExtract64(mask, mask)); - EXPECT_EQ(1U, packedExtract64(~0ULL, mask)); + EXPECT_EQ(1U, pext64(mask, mask)); + EXPECT_EQ(1U, pext64(~0ULL, mask)); // we should get zero out of these cases - EXPECT_EQ(0U, packedExtract64(0, mask)); - EXPECT_EQ(0U, packedExtract64(~mask, mask)); + EXPECT_EQ(0U, pext64(0, mask)); + EXPECT_EQ(0U, pext64(~mask, mask)); // we should get zero out of all the other bit positions for (unsigned int j = 0; (j != i && j < 64); j++) { - EXPECT_EQ(0U, packedExtract64((1ULL << j), mask)); + EXPECT_EQ(0U, pext64((1ULL << j), mask)); } } } @@ -117,26 +117,26 @@ TEST(Shuffle, PackedExtract64_1) { TEST(Shuffle, PackedExtract64_2) { // Fill first half of mask u64a mask = 0x00000000ffffffffULL; - EXPECT_EQ(0U, packedExtract64(0, mask)); - EXPECT_EQ(0xffffffffU, packedExtract64(mask, mask)); + EXPECT_EQ(0U, pext64(0, mask)); + EXPECT_EQ(0xffffffffU, pext64(mask, mask)); for (unsigned int i = 0; i < 32; i++) { - EXPECT_EQ(1U << i, packedExtract64(1ULL << i, mask)); + EXPECT_EQ(1U << i, pext64(1ULL << i, mask)); } // Fill second half of mask mask = 0xffffffff00000000ULL; - EXPECT_EQ(0U, packedExtract64(0, mask)); - EXPECT_EQ(0xffffffffU, packedExtract64(mask, mask)); + EXPECT_EQ(0U, pext64(0, mask)); + EXPECT_EQ(0xffffffffU, pext64(mask, mask)); for (unsigned int i = 32; i < 64; i++) { - EXPECT_EQ(1U << (i - 32), packedExtract64(1ULL << i, mask)); + EXPECT_EQ(1U << (i - 32), pext64(1ULL << i, mask)); } // Try one in the middle mask = 0x0000ffffffff0000ULL; - EXPECT_EQ(0U, packedExtract64(0, mask)); - EXPECT_EQ(0xffffffffU, packedExtract64(mask, mask)); + EXPECT_EQ(0U, pext64(0, mask)); + EXPECT_EQ(0xffffffffU, pext64(mask, mask)); for (unsigned int i = 16; i < 48; i++) { - EXPECT_EQ(1U << (i - 16), packedExtract64(1ULL << i, mask)); + EXPECT_EQ(1U << (i - 16), pext64(1ULL << i, mask)); } } @@ -148,16 +148,16 @@ TEST(Shuffle, PackedExtract64_3) { } // Test both cases (all even bits, all odd bits) - EXPECT_EQ(0xffffffffU, packedExtract64(mask, mask)); - EXPECT_EQ(0xffffffffU, packedExtract64(~mask, ~mask)); - EXPECT_EQ(0U, packedExtract64(~mask, mask)); - EXPECT_EQ(0U, packedExtract64(mask, ~mask)); + EXPECT_EQ(0xffffffffU, pext64(mask, mask)); + EXPECT_EQ(0xffffffffU, pext64(~mask, ~mask)); + EXPECT_EQ(0U, pext64(~mask, mask)); + EXPECT_EQ(0U, pext64(mask, ~mask)); for (unsigned int i = 0; i < 64; i += 2) { - EXPECT_EQ(1U << (i/2), packedExtract64(1ULL << i, mask)); - EXPECT_EQ(0U, packedExtract64(1ULL << i, ~mask)); - EXPECT_EQ(1U << (i/2), packedExtract64(1ULL << (i+1), ~mask)); - EXPECT_EQ(0U, packedExtract64(1ULL << (i+1), mask)); + EXPECT_EQ(1U << (i/2), pext64(1ULL << i, mask)); + EXPECT_EQ(0U, pext64(1ULL << i, ~mask)); + EXPECT_EQ(1U << (i/2), pext64(1ULL << (i+1), ~mask)); + EXPECT_EQ(0U, pext64(1ULL << (i+1), mask)); } } diff --git a/unit/internal/simd_utils.cpp b/unit/internal/simd_utils.cpp index 31d4b925..7b34d92e 100644 --- a/unit/internal/simd_utils.cpp +++ b/unit/internal/simd_utils.cpp @@ -614,6 +614,12 @@ TEST(SimdUtilsTest, set16x8) { } } +TEST(SimdUtilsTest, set4x32) { + u32 cmp[4] = { 0x12345678, 0x12345678, 0x12345678, 0x12345678 }; + m128 simd = set4x32(cmp[0]); + ASSERT_EQ(0, memcmp(cmp, &simd, sizeof(simd))); +} + #if defined(__AVX2__) TEST(SimdUtilsTest, set32x8) { char cmp[sizeof(m256)]; @@ -693,4 +699,50 @@ TEST(SimdUtilsTest, variableByteShift128) { EXPECT_TRUE(!diff128(zeroes128(), variable_byte_shift_m128(in, -16))); } +TEST(SimdUtilsTest, max_u8_m128) { + char base1[] = "0123456789ABCDE\xfe"; + char base2[] = "!!23455889aBCd\xff\xff"; + char expec[] = "0123456889aBCd\xff\xff"; + m128 in1 = loadu128(base1); + m128 in2 = loadu128(base2); + m128 result = max_u8_m128(in1, in2); + EXPECT_TRUE(!diff128(result, loadu128(expec))); +} + +TEST(SimdUtilsTest, min_u8_m128) { + char base1[] = "0123456789ABCDE\xfe"; + char base2[] = "!!23455889aBCd\xff\xff"; + char expec[] = "!!23455789ABCDE\xfe"; + m128 in1 = loadu128(base1); + m128 in2 = loadu128(base2); + m128 result = min_u8_m128(in1, in2); + EXPECT_TRUE(!diff128(result, loadu128(expec))); +} + +TEST(SimdUtilsTest, sadd_u8_m128) { + unsigned char base1[] = {0, 0x80, 0xff, 'A', '1', '2', '3', '4', + '1', '2', '3', '4', '1', '2', '3', '4'}; + unsigned char base2[] = {'a', 0x80, 'b', 'A', 0x10, 0x10, 0x10, 0x10, + 0x30, 0x30, 0x30, 0x30, 0, 0, 0, 0}; + unsigned char expec[] = {'a', 0xff, 0xff, 0x82, 'A', 'B', 'C', 'D', + 'a', 'b', 'c', 'd', '1', '2', '3', '4'}; + m128 in1 = loadu128(base1); + m128 in2 = loadu128(base2); + m128 result = sadd_u8_m128(in1, in2); + EXPECT_TRUE(!diff128(result, loadu128(expec))); +} + +TEST(SimdUtilsTest, sub_u8_m128) { + unsigned char base1[] = {'a', 0xff, 0xff, 0x82, 'A', 'B', 'C', 'D', + 'a', 'b', 'c', 'd', '1', '2', '3', '4'}; + unsigned char base2[] = {0, 0x80, 0xff, 'A', '1', '2', '3', '4', + '1', '2', '3', '4', '1', '2', '3', '4'}; + unsigned char expec[] = {'a', 0x7f, 0, 'A', 0x10, 0x10, 0x10, 0x10, + 0x30, 0x30, 0x30, 0x30, 0, 0, 0, 0}; + m128 in1 = loadu128(base1); + m128 in2 = loadu128(base2); + m128 result = sub_u8_m128(in1, in2); + EXPECT_TRUE(!diff128(result, loadu128(expec))); +} + } // namespace From 582f71c5bb5f4b56c835386b9082e3979fd24195 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Fri, 2 Dec 2016 10:42:26 +1100 Subject: [PATCH 073/103] mcsheng: remove dead code --- src/nfa/mcsheng_compile.cpp | 78 +------------------------------------ src/nfa/mcsheng_compile.h | 6 +-- 2 files changed, 2 insertions(+), 82 deletions(-) diff --git a/src/nfa/mcsheng_compile.cpp b/src/nfa/mcsheng_compile.cpp index 666c3b1d..bc12cc5c 100644 --- a/src/nfa/mcsheng_compile.cpp +++ b/src/nfa/mcsheng_compile.cpp @@ -242,77 +242,6 @@ void populateBasicInfo(size_t state_size, const dfa_info &info, } } -namespace { - -struct raw_report_list { - flat_set reports; - - raw_report_list(const flat_set &reports_in, - const ReportManager &rm, bool do_remap) { - if (do_remap) { - for (auto &id : reports_in) { - reports.insert(rm.getProgramOffset(id)); - } - } else { - reports = reports_in; - } - } - - bool operator<(const raw_report_list &b) const { - return reports < b.reports; - } -}; - -struct raw_report_info_impl : public raw_report_info { - vector rl; - u32 getReportListSize() const override; - size_t size() const override; - void fillReportLists(NFA *n, size_t base_offset, - std::vector &ro /* out */) const override; -}; -} - -u32 raw_report_info_impl::getReportListSize() const { - u32 rv = 0; - - for (const auto &reps : rl) { - rv += sizeof(report_list); - rv += sizeof(ReportID) * reps.reports.size(); - } - - return rv; -} - -size_t raw_report_info_impl::size() const { - return rl.size(); -} - -void raw_report_info_impl::fillReportLists(NFA *n, size_t base_offset, - vector &ro) const { - for (const auto &reps : rl) { - ro.push_back(base_offset); - - report_list *p = (report_list *)((char *)n + base_offset); - - u32 i = 0; - for (const ReportID report : reps.reports) { - p->report[i++] = report; - } - p->count = verify_u32(reps.reports.size()); - - base_offset += sizeof(report_list); - base_offset += sizeof(ReportID) * reps.reports.size(); - } -} - -static -void fillAccelOut(const map &accel_escape_info, - set *accel_states) { - for (dstate_id_t i : accel_escape_info | map_keys) { - accel_states->insert(i); - } -} - static size_t calcShermanRegionSize(const dfa_info &info) { size_t rv = 0; @@ -1089,8 +1018,7 @@ aligned_unique_ptr mcshengCompile8(dfa_info &info, dstate_id_t sheng_end, } aligned_unique_ptr mcshengCompile(raw_dfa &raw, const CompileContext &cc, - const ReportManager &rm, - set *accel_states) { + const ReportManager &rm) { if (!cc.grey.allowMcSheng) { return nullptr; } @@ -1129,10 +1057,6 @@ aligned_unique_ptr mcshengCompile(raw_dfa &raw, const CompileContext &cc, nfa->flags |= NFA_ACCEPTS_EOD; } - if (accel_states) { - fillAccelOut(accel_escape_info, accel_states); - } - DEBUG_PRINTF("compile done\n"); return nfa; } diff --git a/src/nfa/mcsheng_compile.h b/src/nfa/mcsheng_compile.h index 24cc66e9..d1ae1e32 100644 --- a/src/nfa/mcsheng_compile.h +++ b/src/nfa/mcsheng_compile.h @@ -36,7 +36,6 @@ #include "util/ue2_containers.h" #include -#include struct NFA; @@ -45,12 +44,9 @@ namespace ue2 { class ReportManager; struct CompileContext; -/* accel_states: (optional) on success, is filled with the set of accelerable - * states */ ue2::aligned_unique_ptr mcshengCompile(raw_dfa &raw, const CompileContext &cc, - const ReportManager &rm, - std::set *accel_states = nullptr); + const ReportManager &rm); bool has_accel_mcsheng(const NFA *nfa); From 8b7b06d2a407d21cd777cfcdc123f625c93e5599 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Fri, 2 Dec 2016 16:02:09 +1100 Subject: [PATCH 074/103] calcDepthFromSource: only take one copy of the graph --- src/nfagraph/ng_depth.cpp | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/src/nfagraph/ng_depth.cpp b/src/nfagraph/ng_depth.cpp index 5111b752..63e0e46b 100644 --- a/src/nfagraph/ng_depth.cpp +++ b/src/nfagraph/ng_depth.cpp @@ -162,13 +162,13 @@ void findLoopReachable(const GraphT &g, template static -void calcDepthFromSource(const NGHolder &graph, const GraphT &g, +void calcDepthFromSource(const GraphT &g, typename GraphT::vertex_descriptor srcVertex, - const vector &deadNodes, - vector &dMin, vector &dMax) { + const vector &deadNodes, vector &dMin, + vector &dMax) { typedef typename GraphT::edge_descriptor EdgeT; - const size_t numVerts = num_vertices(graph); + const size_t numVerts = num_vertices(g); NodeFilter nf(&deadNodes, &g); StartFilter sf(&g); @@ -252,14 +252,14 @@ DepthMinMax getDepths(u32 idx, const vector &dMin, template static -void calcAndStoreDepth(const NGHolder &h, const Graph &g, +void calcAndStoreDepth(const Graph &g, const typename Graph::vertex_descriptor src, const vector &deadNodes, vector &dMin /* util */, vector &dMax /* util */, vector &depths, DepthMinMax Output::*store) { - calcDepthFromSource(h, g, src, deadNodes, dMin, dMax); + calcDepthFromSource(g, src, deadNodes, dMin, dMax); for (auto v : vertices_range(g)) { u32 idx = g[v].index; @@ -286,10 +286,10 @@ void calcDepths(const NGHolder &g, std::vector &depths) { findLoopReachable(g, g.start, deadNodes); DEBUG_PRINTF("doing start\n"); - calcAndStoreDepth(g, g, g.start, deadNodes, dMin, dMax, depths, + calcAndStoreDepth(g, g.start, deadNodes, dMin, dMax, depths, &NFAVertexDepth::fromStart); DEBUG_PRINTF("doing startds\n"); - calcAndStoreDepth(g, g, g.startDs, deadNodes, dMin, dMax, depths, + calcAndStoreDepth(g, g.startDs, deadNodes, dMin, dMax, depths, &NFAVertexDepth::fromStartDotStar); } @@ -306,6 +306,8 @@ void calcDepths(const NGHolder &g, std::vector &depths) { typedef reverse_graph RevNFAGraph; const RevNFAGraph rg(g); + assert(num_vertices(g) == num_vertices(rg)); + /* * create a filtered graph for max depth calculations: all nodes/edges * reachable from a loop need to be removed @@ -315,12 +317,12 @@ void calcDepths(const NGHolder &g, std::vector &depths) { DEBUG_PRINTF("doing accept\n"); calcAndStoreDepth( - g, rg, g.accept, deadNodes, dMin, dMax, depths, + rg, g.accept, deadNodes, dMin, dMax, depths, &NFAVertexRevDepth::toAccept); DEBUG_PRINTF("doing accepteod\n"); deadNodes[NODE_ACCEPT] = true; // Hide accept->acceptEod edge. calcAndStoreDepth( - g, rg, g.acceptEod, deadNodes, dMin, dMax, depths, + rg, g.acceptEod, deadNodes, dMin, dMax, depths, &NFAVertexRevDepth::toAcceptEod); } @@ -342,11 +344,11 @@ void calcDepths(const NGHolder &g, vector &depths) { DEBUG_PRINTF("doing start\n"); calcAndStoreDepth( - g, g, g.start, deadNodes, dMin, dMax, depths, + g, g.start, deadNodes, dMin, dMax, depths, &NFAVertexBidiDepth::fromStart); DEBUG_PRINTF("doing startds\n"); calcAndStoreDepth( - g, g, g.startDs, deadNodes, dMin, dMax, depths, + g, g.startDs, deadNodes, dMin, dMax, depths, &NFAVertexBidiDepth::fromStartDotStar); /* Now go backwards */ @@ -357,12 +359,12 @@ void calcDepths(const NGHolder &g, vector &depths) { DEBUG_PRINTF("doing accept\n"); calcAndStoreDepth( - g, rg, g.accept, deadNodes, dMin, dMax, depths, + rg, g.accept, deadNodes, dMin, dMax, depths, &NFAVertexBidiDepth::toAccept); DEBUG_PRINTF("doing accepteod\n"); deadNodes[NODE_ACCEPT] = true; // Hide accept->acceptEod edge. calcAndStoreDepth( - g, rg, g.acceptEod, deadNodes, dMin, dMax, depths, + rg, g.acceptEod, deadNodes, dMin, dMax, depths, &NFAVertexBidiDepth::toAcceptEod); } @@ -375,7 +377,7 @@ void calcDepthsFrom(const NGHolder &g, const NFAVertex src, findLoopReachable(g, g.start, deadNodes); vector dMin, dMax; - calcDepthFromSource(g, g, src, deadNodes, dMin, dMax); + calcDepthFromSource(g, src, deadNodes, dMin, dMax); depths.clear(); depths.resize(numVertices); From e271781d955e9e57b373b9b5ea97f4801e7b1717 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 5 Dec 2016 16:20:52 +1100 Subject: [PATCH 075/103] multibit, fatbit: make _size build-time only This commit makes mmbit_size() and fatbit_size compile-time only, and adds a resource limit for very large multibits. --- CMakeLists.txt | 3 +- src/nfa/castlecompile.cpp | 1 - src/nfa/mpvcompile.cpp | 2 +- src/nfa/repeatcompile.cpp | 4 +- src/rose/rose_build_bytecode.cpp | 11 +++- src/rose/rose_dump.cpp | 8 ++- src/rose/rose_internal.h | 10 +++- src/runtime.c | 1 - src/scratch.c | 75 +++++++++++++++---------- src/scratch.h | 9 ++- src/scratch_dump.cpp | 9 ++- src/util/fatbit.h | 13 +++-- src/util/{fatbit.c => fatbit_build.cpp} | 16 +++++- src/util/fatbit_build.h | 48 ++++++++++++++++ src/util/multibit.c | 61 +------------------- src/util/multibit_build.cpp | 30 +++++++++- src/util/multibit_build.h | 9 +++ src/util/multibit_internal.h | 12 ++-- unit/internal/multi_bit.cpp | 8 ++- 19 files changed, 201 insertions(+), 129 deletions(-) rename src/util/{fatbit.c => fatbit_build.cpp} (86%) create mode 100644 src/util/fatbit_build.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 9062c287..98804923 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -553,7 +553,6 @@ set (hs_exec_SRCS src/util/copybytes.h src/util/exhaust.h src/util/fatbit.h - src/util/fatbit.c src/util/join.h src/util/masked_move.h src/util/multibit.h @@ -924,6 +923,8 @@ SET (hs_SRCS src/util/determinise.h src/util/dump_mask.cpp src/util/dump_mask.h + src/util/fatbit_build.cpp + src/util/fatbit_build.h src/util/graph.h src/util/hash.h src/util/multibit_build.cpp diff --git a/src/nfa/castlecompile.cpp b/src/nfa/castlecompile.cpp index 1f767353..4f3bcf2e 100644 --- a/src/nfa/castlecompile.cpp +++ b/src/nfa/castlecompile.cpp @@ -49,7 +49,6 @@ #include "util/graph.h" #include "util/make_unique.h" #include "util/multibit_build.h" -#include "util/multibit_internal.h" #include "util/report_manager.h" #include "util/ue2_containers.h" #include "util/verify_types.h" diff --git a/src/nfa/mpvcompile.cpp b/src/nfa/mpvcompile.cpp index 4d70fa2d..908267be 100644 --- a/src/nfa/mpvcompile.cpp +++ b/src/nfa/mpvcompile.cpp @@ -34,7 +34,7 @@ #include "shufticompile.h" #include "trufflecompile.h" #include "util/alloc.h" -#include "util/multibit_internal.h" +#include "util/multibit_build.h" #include "util/order_check.h" #include "util/report_manager.h" #include "util/verify_types.h" diff --git a/src/nfa/repeatcompile.cpp b/src/nfa/repeatcompile.cpp index 2e1010bb..934dd29e 100644 --- a/src/nfa/repeatcompile.cpp +++ b/src/nfa/repeatcompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,7 +34,7 @@ #include "util/charreach.h" #include "util/depth.h" #include "util/dump_charclass.h" -#include "util/multibit_internal.h" +#include "util/multibit_build.h" #include "util/verify_types.h" #include diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index ef74619d..3d89f87a 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -81,6 +81,7 @@ #include "util/compile_context.h" #include "util/compile_error.h" #include "util/container.h" +#include "util/fatbit_build.h" #include "util/graph_range.h" #include "util/make_unique.h" #include "util/multibit_build.h" @@ -5435,11 +5436,13 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->ekeyCount = rm.numEkeys(); engine->dkeyCount = rm.numDkeys(); + engine->dkeyLogSize = fatbit_size(engine->dkeyCount); engine->invDkeyOffset = dkeyOffset; copy_bytes(ptr + dkeyOffset, rm.getDkeyToReportTable()); engine->somHorizon = ssm.somPrecision(); engine->somLocationCount = ssm.numSomSlots(); + engine->somLocationFatbitSize = fatbit_size(engine->somLocationCount); engine->needsCatchup = bc.needs_catchup ? 1 : 0; @@ -5454,8 +5457,10 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->activeArrayCount = activeArrayCount; engine->activeLeftCount = activeLeftCount; engine->queueCount = queue_count; + engine->activeQueueArraySize = fatbit_size(queue_count); engine->eagerIterOffset = eagerIterOffset; engine->handledKeyCount = bc.handledKeys.size(); + engine->handledKeyFatbitSize = fatbit_size(engine->handledKeyCount); engine->rolesWithStateCount = bc.numStates; @@ -5475,11 +5480,13 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->lastByteHistoryIterOffset = lastByteOffset; - u32 delay_count = verify_u32(final_id_to_literal.size() - delay_base_id); - engine->delay_count = delay_count; + engine->delay_count = + verify_u32(final_id_to_literal.size() - delay_base_id); + engine->delay_fatbit_size = fatbit_size(engine->delay_count); engine->delay_base_id = delay_base_id; engine->anchored_base_id = anchored_base_id; engine->anchored_count = delay_base_id - anchored_base_id; + engine->anchored_fatbit_size = fatbit_size(engine->anchored_count); engine->rosePrefixCount = rosePrefixCount; diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index 47249587..1867be50 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -42,7 +42,7 @@ #include "nfa/nfa_internal.h" #include "nfa/nfa_kind.h" #include "util/dump_charclass.h" -#include "util/multibit_internal.h" +#include "util/multibit_build.h" #include "util/multibit.h" #include @@ -1232,8 +1232,10 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, historyRequired); DUMP_U32(t, ekeyCount); DUMP_U32(t, dkeyCount); + DUMP_U32(t, dkeyLogSize); DUMP_U32(t, invDkeyOffset); DUMP_U32(t, somLocationCount); + DUMP_U32(t, somLocationFatbitSize); DUMP_U32(t, rolesWithStateCount); DUMP_U32(t, stateSize); DUMP_U32(t, anchorStateSize); @@ -1258,8 +1260,10 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, activeArrayCount); DUMP_U32(t, activeLeftCount); DUMP_U32(t, queueCount); + DUMP_U32(t, activeQueueArraySize); DUMP_U32(t, eagerIterOffset); DUMP_U32(t, handledKeyCount); + DUMP_U32(t, handledKeyFatbitSize); DUMP_U32(t, leftOffset); DUMP_U32(t, roseCount); DUMP_U32(t, lookaroundTableOffset); @@ -1280,8 +1284,10 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U64(t, floating_group_mask); DUMP_U32(t, size); DUMP_U32(t, delay_count); + DUMP_U32(t, delay_fatbit_size); DUMP_U32(t, delay_base_id); DUMP_U32(t, anchored_count); + DUMP_U32(t, anchored_fatbit_size); DUMP_U32(t, anchored_base_id); DUMP_U32(t, maxFloatingDelayedMatch); DUMP_U32(t, delayRebuildLength); diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index 3a366f0d..411ce03f 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -309,9 +309,11 @@ struct RoseEngine { u32 historyRequired; /**< max amount of history required for streaming */ u32 ekeyCount; /**< number of exhaustion keys */ u32 dkeyCount; /**< number of dedupe keys */ + u32 dkeyLogSize; /**< size of fatbit for storing dkey log (bytes) */ u32 invDkeyOffset; /**< offset to table mapping from dkeys to the external * report ids */ u32 somLocationCount; /**< number of som locations required */ + u32 somLocationFatbitSize; /**< size of SOM location fatbit (bytes) */ u32 rolesWithStateCount; // number of roles with entries in state bitset u32 stateSize; /* size of the state bitset * WARNING: not the size of the rose state */ @@ -370,14 +372,18 @@ struct RoseEngine { u32 activeArrayCount; //number of nfas tracked in the active array u32 activeLeftCount; //number of nfas tracked in the active rose array u32 queueCount; /**< number of nfa queues */ + u32 activeQueueArraySize; //!< size of fatbit for active queues (bytes) u32 eagerIterOffset; /**< offset to sparse iter for eager prefixes or 0 if * none */ /** \brief Number of keys used by CHECK_SET_HANDLED instructions in role - * programs. Used to size the handled_roles fatbit in scratch. */ + * programs. */ u32 handledKeyCount; + /** \brief Size of the handled keys fatbit in scratch (bytes). */ + u32 handledKeyFatbitSize; + u32 leftOffset; u32 roseCount; u32 lookaroundTableOffset; //!< base of lookaround offset list (of s8 values) @@ -412,9 +418,11 @@ struct RoseEngine { rose_group floating_group_mask; /* groups that are used by the ftable */ u32 size; // (bytes) u32 delay_count; /* number of delayed literal ids. */ + u32 delay_fatbit_size; //!< size of each delay fatbit in scratch (bytes) u32 delay_base_id; /* literal id of the first delayed literal. * delayed literal ids are contiguous */ u32 anchored_count; /* number of anchored literal ids */ + u32 anchored_fatbit_size; //!< size of each anch fatbit in scratch (bytes) u32 anchored_base_id; /* literal id of the first literal in the A table. * anchored literal ids are contiguous */ u32 maxFloatingDelayedMatch; /* max offset that a delayed literal can diff --git a/src/runtime.c b/src/runtime.c index d8e2f28d..88e866dc 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -55,7 +55,6 @@ #include "state.h" #include "ue2common.h" #include "util/exhaust.h" -#include "util/fatbit.h" #include "util/multibit.h" static really_inline diff --git a/src/scratch.c b/src/scratch.c index dae2c672..8cbe9760 100644 --- a/src/scratch.c +++ b/src/scratch.c @@ -43,17 +43,19 @@ #include "nfa/nfa_api_queue.h" #include "rose/rose_internal.h" #include "util/fatbit.h" -#include "util/multibit.h" /** * Determine the space required for a correctly aligned array of fatbit * structure, laid out as: * * - an array of num_entries pointers, each to a fatbit. - * - an array of fatbit structures, each of size fatbit_size(num_keys). + * - an array of fatbit structures, each of size fatbit_len. + * + * fatbit_len should have been determined at compile time, via the + * fatbit_size() call. */ static -size_t fatbit_array_size(u32 num_entries, u32 num_keys) { +size_t fatbit_array_size(u32 num_entries, u32 fatbit_len) { size_t len = 0; // Array of pointers to each fatbit entry. @@ -61,7 +63,7 @@ size_t fatbit_array_size(u32 num_entries, u32 num_keys) { // Fatbit entries themselves. len = ROUNDUP_N(len, alignof(struct fatbit)); - len += (size_t)fatbit_size(num_keys) * num_entries; + len += (size_t)fatbit_len * num_entries; return ROUNDUP_N(len, 8); // Round up for potential padding. } @@ -71,17 +73,19 @@ size_t fatbit_array_size(u32 num_entries, u32 num_keys) { static hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { u32 queueCount = proto->queueCount; - u32 deduperCount = proto->deduper.log_size; + u32 activeQueueArraySize = proto->activeQueueArraySize; + u32 deduperCount = proto->deduper.dkey_count; + u32 deduperLogSize = proto->deduper.log_size; u32 bStateSize = proto->bStateSize; u32 tStateSize = proto->tStateSize; u32 fullStateSize = proto->fullStateSize; u32 anchored_literal_region_len = proto->anchored_literal_region_len; - u32 anchored_literal_region_width = proto->anchored_literal_count; + u32 anchored_literal_fatbit_size = proto->anchored_literal_fatbit_size; u32 som_store_size = proto->som_store_count * sizeof(u64a); u32 som_attempted_store_size = proto->som_store_count * sizeof(u64a); - u32 som_now_size = fatbit_size(proto->som_store_count); - u32 som_attempted_size = fatbit_size(proto->som_store_count); + u32 som_now_size = proto->som_fatbit_size; + u32 som_attempted_size = proto->som_fatbit_size; struct hs_scratch *s; struct hs_scratch *s_tmp; @@ -91,18 +95,18 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { assert(anchored_literal_region_len < 8 * sizeof(s->al_log_sum)); size_t anchored_literal_region_size = fatbit_array_size( - anchored_literal_region_len, anchored_literal_region_width); + anchored_literal_region_len, proto->anchored_literal_fatbit_size); size_t delay_region_size = - fatbit_array_size(DELAY_SLOT_COUNT, proto->delay_count); + fatbit_array_size(DELAY_SLOT_COUNT, proto->delay_fatbit_size); // the size is all the allocated stuff, not including the struct itself size_t size = queue_size + 63 + bStateSize + tStateSize + fullStateSize + 63 /* cacheline padding */ - + fatbit_size(proto->handledKeyCount) /* handled roles */ - + fatbit_size(queueCount) /* active queue array */ - + 2 * fatbit_size(deduperCount) /* need odd and even logs */ - + 2 * fatbit_size(deduperCount) /* ditto som logs */ + + proto->handledKeyFatbitSize /* handled roles */ + + activeQueueArraySize /* active queue array */ + + 2 * deduperLogSize /* need odd and even logs */ + + 2 * deduperLogSize /* ditto som logs */ + 2 * sizeof(u64a) * deduperCount /* start offsets for som */ + anchored_literal_region_size + qmpq_size + delay_region_size @@ -157,7 +161,7 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { for (u32 i = 0; i < DELAY_SLOT_COUNT; i++) { s->delay_slots[i] = (struct fatbit *)current; assert(ISALIGNED(s->delay_slots[i])); - current += fatbit_size(proto->delay_count); + current += proto->delay_fatbit_size; } current = ROUNDUP_PTR(current, alignof(struct fatbit *)); @@ -167,7 +171,7 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { for (u32 i = 0; i < anchored_literal_region_len; i++) { s->al_log[i] = (struct fatbit *)current; assert(ISALIGNED(s->al_log[i])); - current += fatbit_size(anchored_literal_region_width); + current += anchored_literal_fatbit_size; } current = ROUNDUP_PTR(current, 8); @@ -193,22 +197,22 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { assert(ISALIGNED_N(current, 8)); s->aqa = (struct fatbit *)current; - current += fatbit_size(queueCount); + current += activeQueueArraySize; s->handled_roles = (struct fatbit *)current; - current += fatbit_size(proto->handledKeyCount); + current += proto->handledKeyFatbitSize; s->deduper.log[0] = (struct fatbit *)current; - current += fatbit_size(deduperCount); + current += deduperLogSize; s->deduper.log[1] = (struct fatbit *)current; - current += fatbit_size(deduperCount); + current += deduperLogSize; s->deduper.som_log[0] = (struct fatbit *)current; - current += fatbit_size(deduperCount); + current += deduperLogSize; s->deduper.som_log[1] = (struct fatbit *)current; - current += fatbit_size(deduperCount); + current += deduperLogSize; s->som_set_now = (struct fatbit *)current; current += som_now_size; @@ -293,19 +297,19 @@ hs_error_t hs_alloc_scratch(const hs_database_t *db, hs_scratch_t **scratch) { proto->anchored_literal_region_len = rose->anchoredDistance; } - if (rose->anchored_count > proto->anchored_literal_count) { + if (rose->anchored_fatbit_size > proto->anchored_literal_fatbit_size) { resize = 1; - proto->anchored_literal_count = rose->anchored_count; + proto->anchored_literal_fatbit_size = rose->anchored_fatbit_size; } - if (rose->delay_count > proto->delay_count) { + if (rose->delay_fatbit_size > proto->delay_fatbit_size) { resize = 1; - proto->delay_count = rose->delay_count; + proto->delay_fatbit_size = rose->delay_fatbit_size; } - if (rose->handledKeyCount > proto->handledKeyCount) { + if (rose->handledKeyFatbitSize > proto->handledKeyFatbitSize) { resize = 1; - proto->handledKeyCount = rose->handledKeyCount; + proto->handledKeyFatbitSize = rose->handledKeyFatbitSize; } if (rose->tStateSize > proto->tStateSize) { @@ -319,12 +323,22 @@ hs_error_t hs_alloc_scratch(const hs_database_t *db, hs_scratch_t **scratch) { proto->som_store_count = som_store_count; } + if (rose->somLocationFatbitSize > proto->som_fatbit_size) { + resize = 1; + proto->som_fatbit_size = rose->somLocationFatbitSize; + } + u32 queueCount = rose->queueCount; if (queueCount > proto->queueCount) { resize = 1; proto->queueCount = queueCount; } + if (rose->activeQueueArraySize > proto->activeQueueArraySize) { + resize = 1; + proto->activeQueueArraySize = rose->activeQueueArraySize; + } + u32 bStateSize = 0; if (rose->mode == HS_MODE_BLOCK) { bStateSize = rose->stateOffsets.end; @@ -344,9 +358,10 @@ hs_error_t hs_alloc_scratch(const hs_database_t *db, hs_scratch_t **scratch) { proto->fullStateSize = fullStateSize; } - if (rose->dkeyCount > proto->deduper.log_size) { + if (rose->dkeyCount > proto->deduper.dkey_count) { resize = 1; - proto->deduper.log_size = rose->dkeyCount; + proto->deduper.dkey_count = rose->dkeyCount; + proto->deduper.log_size = rose->dkeyLogSize; } if (resize) { diff --git a/src/scratch.h b/src/scratch.h index 8c7a1281..b59dc8d4 100644 --- a/src/scratch.h +++ b/src/scratch.h @@ -148,6 +148,7 @@ struct match_deduper { struct fatbit *log[2]; /**< even, odd logs */ struct fatbit *som_log[2]; /**< even, odd fatbit logs for som */ u64a *som_start_log[2]; /**< even, odd start offset logs for som */ + u32 dkey_count; u32 log_size; u64a current_report_offset; u8 som_log_dirty; @@ -162,6 +163,7 @@ struct ALIGN_CL_DIRECTIVE hs_scratch { u32 magic; u8 in_use; /**< non-zero when being used by an API call. */ u32 queueCount; + u32 activeQueueArraySize; /**< size of active queue array fatbit in bytes */ u32 bStateSize; /**< sizeof block mode states */ u32 tStateSize; /**< sizeof transient rose states */ u32 fullStateSize; /**< size of uncompressed nfa state */ @@ -179,7 +181,7 @@ struct ALIGN_CL_DIRECTIVE hs_scratch { struct core_info core_info; struct match_deduper deduper; u32 anchored_literal_region_len; - u32 anchored_literal_count; + u32 anchored_literal_fatbit_size; /**< size of each anch fatbit in bytes */ struct fatbit *handled_roles; /**< fatbit of ROLES (not states) already * handled by this literal */ u64a *som_store; /**< array of som locations */ @@ -191,8 +193,9 @@ struct ALIGN_CL_DIRECTIVE hs_scratch { * location had been writable */ u64a som_set_now_offset; /**< offset at which som_set_now represents */ u32 som_store_count; - u32 handledKeyCount; - u32 delay_count; + u32 som_fatbit_size; /**< size of som location fatbit structures in bytes */ + u32 handledKeyFatbitSize; /**< size of handled_keys fatbit in bytes */ + u32 delay_fatbit_size; /**< size of each delay fatbit in bytes */ u32 scratchSize; char *scratch_alloc; /* user allocated scratch object */ u8 ALIGN_DIRECTIVE fdr_temp_buf[FDR_TEMP_BUF_SIZE]; diff --git a/src/scratch_dump.cpp b/src/scratch_dump.cpp index 78a854bb..47c93c37 100644 --- a/src/scratch_dump.cpp +++ b/src/scratch_dump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -32,7 +32,7 @@ #include "scratch_dump.h" #include "hs_internal.h" #include "ue2common.h" -#include "util/multibit_internal.h" +#include "util/multibit_build.h" #include "nfa/nfa_api_queue.h" #include "rose/rose_internal.h" @@ -54,12 +54,11 @@ void dumpScratch(const struct hs_scratch *s, FILE *f) { fprintf(f, " queues : %zu bytes\n", s->queueCount * sizeof(struct mq)); fprintf(f, " bStateSize : %u bytes\n", s->bStateSize); - fprintf(f, " active queue array : %u bytes\n", - mmbit_size(s->queueCount)); + fprintf(f, " active queue array : %u bytes\n", s->activeQueueArraySize); fprintf(f, " qmpq : %zu bytes\n", s->queueCount * sizeof(struct queue_match)); fprintf(f, " delay info : %u bytes\n", - mmbit_size(s->delay_count) * DELAY_SLOT_COUNT); + s->delay_fatbit_size * DELAY_SLOT_COUNT); } } // namespace ue2 diff --git a/src/util/fatbit.h b/src/util/fatbit.h index ad607638..3c65db1a 100644 --- a/src/util/fatbit.h +++ b/src/util/fatbit.h @@ -40,6 +40,10 @@ #include "multibit.h" #include "ue2common.h" +#ifdef __cplusplus +extern "C" { +#endif + #define MIN_FAT_SIZE 32 struct fatbit { @@ -82,11 +86,8 @@ u32 fatbit_iterate(const struct fatbit *bits, u32 total_bits, u32 it_in) { return mmbit_iterate(bits->fb_int.raw, total_bits, it_in); } -/** \brief Return the size in bytes of a fatbit that can store the given - * number of bits. - * - * Not for use in performance-critical code, implementation is in fatbit.c. - */ -u32 fatbit_size(u32 total_bits); +#ifdef __cplusplus +} // extern "C" +#endif #endif diff --git a/src/util/fatbit.c b/src/util/fatbit_build.cpp similarity index 86% rename from src/util/fatbit.c rename to src/util/fatbit_build.cpp index a80c3165..77f4b550 100644 --- a/src/util/fatbit.c +++ b/src/util/fatbit_build.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,9 +26,19 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include "fatbit_build.h" + #include "fatbit.h" -#include "multibit.h" +#include "multibit_build.h" + +#include + +using namespace std; + +namespace ue2 { u32 fatbit_size(u32 total_bits) { - return MAX(sizeof(struct fatbit), mmbit_size(total_bits)); + return max(u32{sizeof(struct fatbit)}, mmbit_size(total_bits)); } + +} // namespace ue2 diff --git a/src/util/fatbit_build.h b/src/util/fatbit_build.h new file mode 100644 index 00000000..d7611657 --- /dev/null +++ b/src/util/fatbit_build.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Fatbit: build code + */ + +#ifndef FATBIT_BUILD_H +#define FATBIT_BUILD_H + +#include "ue2common.h" + +namespace ue2 { + +/** + * \brief Return the size in bytes of a fatbit that can store the given + * number of bits. + */ +u32 fatbit_size(u32 total_bits); + +} // namespace ue2 + +#endif // FATBIT_BUILD_H diff --git a/src/util/multibit.c b/src/util/multibit.c index c22b73ff..de192d7d 100644 --- a/src/util/multibit.c +++ b/src/util/multibit.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -138,62 +138,3 @@ const u32 mmbit_root_offset_from_level[7] = { 1 + (1 << MMB_KEY_SHIFT) + (1 << MMB_KEY_SHIFT * 2) + (1 << MMB_KEY_SHIFT * 3) + (1 << MMB_KEY_SHIFT * 4), 1 + (1 << MMB_KEY_SHIFT) + (1 << MMB_KEY_SHIFT * 2) + (1 << MMB_KEY_SHIFT * 3) + (1 << MMB_KEY_SHIFT * 4) + (1 << MMB_KEY_SHIFT * 5), }; - -u32 mmbit_size(u32 total_bits) { - MDEBUG_PRINTF("%u\n", total_bits); - - // Flat model multibit structures are just stored as a bit vector. - if (total_bits <= MMB_FLAT_MAX_BITS) { - return ROUNDUP_N(total_bits, 8) / 8; - } - - u64a current_level = 1; // Number of blocks on current level. - u64a total = 0; // Total number of blocks. - while (current_level * MMB_KEY_BITS < total_bits) { - total += current_level; - current_level <<= MMB_KEY_SHIFT; - } - - // Last level is a one-for-one bit vector. It needs room for total_bits - // elements, rounded up to the nearest block. - u64a last_level = ((u64a)total_bits + MMB_KEY_BITS - 1) / MMB_KEY_BITS; - total += last_level; - - assert(total * sizeof(MMB_TYPE) <= UINT32_MAX); - return (u32)(total * sizeof(MMB_TYPE)); -} - -#ifdef DUMP_SUPPORT - -#include -#include - -/** \brief Dump a sparse iterator's keys to stdout. */ -void mmbit_sparse_iter_dump(const struct mmbit_sparse_iter *it, - u32 total_bits) { - // Expediency and future-proofing: create a temporary multibit of the right - // size with all the bits on, then walk it with this sparse iterator. - size_t bytes = mmbit_size(total_bits); - u8 *bits = malloc(bytes); - if (!bits) { - printf("Failed to alloc %zu bytes for temp multibit", bytes); - return; - } - for (u32 i = 0; i < total_bits; i++) { - mmbit_set_i(bits, total_bits, i); - } - - struct mmbit_sparse_state s[MAX_SPARSE_ITER_STATES]; - u32 idx = 0; - for (u32 i = mmbit_sparse_iter_begin(bits, total_bits, &idx, it, s); - i != MMB_INVALID; - i = mmbit_sparse_iter_next(bits, total_bits, i, &idx, it, s)) { - printf("%u ", i); - } - - printf("(%u keys)", idx + 1); - - free(bits); -} - -#endif // DUMP_SUPPORT diff --git a/src/util/multibit_build.cpp b/src/util/multibit_build.cpp index 2a402d8c..5fe2d617 100644 --- a/src/util/multibit_build.cpp +++ b/src/util/multibit_build.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,6 +34,7 @@ #include "scatter.h" #include "ue2common.h" #include "rose/rose_build_scatter.h" +#include "util/compile_error.h" #include #include // for memset @@ -45,6 +46,32 @@ using namespace std; namespace ue2 { +u32 mmbit_size(u32 total_bits) { + if (total_bits > MMB_MAX_BITS) { + throw ResourceLimitError(); + } + + // Flat model multibit structures are just stored as a bit vector. + if (total_bits <= MMB_FLAT_MAX_BITS) { + return ROUNDUP_N(total_bits, 8) / 8; + } + + u64a current_level = 1; // Number of blocks on current level. + u64a total = 0; // Total number of blocks. + while (current_level * MMB_KEY_BITS < total_bits) { + total += current_level; + current_level <<= MMB_KEY_SHIFT; + } + + // Last level is a one-for-one bit vector. It needs room for total_bits + // elements, rounded up to the nearest block. + u64a last_level = ((u64a)total_bits + MMB_KEY_BITS - 1) / MMB_KEY_BITS; + total += last_level; + + assert(total * sizeof(MMB_TYPE) <= UINT32_MAX); + return (u32)(total * sizeof(MMB_TYPE)); +} + namespace { struct TreeNode { MMB_TYPE mask = 0; @@ -133,6 +160,7 @@ void mmbBuildSparseIterator(vector &out, assert(out.empty()); assert(!bits.empty()); assert(total_bits > 0); + assert(total_bits <= MMB_MAX_BITS); DEBUG_PRINTF("building sparse iter for %zu of %u bits\n", bits.size(), total_bits); diff --git a/src/util/multibit_build.h b/src/util/multibit_build.h index 5fbaab87..951f1fb4 100644 --- a/src/util/multibit_build.h +++ b/src/util/multibit_build.h @@ -50,6 +50,15 @@ size_t hash_value(const mmbit_sparse_iter &iter) { namespace ue2 { +/** + * \brief Return the size in bytes of a multibit that can store the given + * number of bits. + * + * This will throw a resource limit assertion if the requested mmbit is too + * large. + */ +u32 mmbit_size(u32 total_bits); + /** \brief Construct a sparse iterator over the values in \a bits for a * multibit of size \a total_bits. */ void mmbBuildSparseIterator(std::vector &out, diff --git a/src/util/multibit_internal.h b/src/util/multibit_internal.h index de87fe2a..350f3bfd 100644 --- a/src/util/multibit_internal.h +++ b/src/util/multibit_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -47,6 +47,9 @@ extern "C" { typedef u64a MMB_TYPE; /**< Basic block type for mmbit operations. */ #define MMB_MAX_LEVEL 6 /**< Maximum level in the mmbit pyramid. */ +/** \brief Maximum number of keys (bits) in a multibit. */ +#define MMB_MAX_BITS (1U << 31) + /** \brief Sparse iterator record type. * * A sparse iterator is a tree of these records, where val identifies the @@ -71,13 +74,6 @@ struct mmbit_sparse_state { /** \brief Maximum number of \ref mmbit_sparse_state that could be needed. */ #define MAX_SPARSE_ITER_STATES (6 + 1) -/** \brief Return the size in bytes of a multibit that can store the given - * number of bits. - * - * Not for use in performance-critical code, implementation is in multibit.c. - */ -u32 mmbit_size(u32 total_bits); - #ifdef __cplusplus } // extern "C" #endif diff --git a/unit/internal/multi_bit.cpp b/unit/internal/multi_bit.cpp index 925092b3..38da1d8a 100644 --- a/unit/internal/multi_bit.cpp +++ b/unit/internal/multi_bit.cpp @@ -30,10 +30,10 @@ #include "gtest/gtest.h" #include "ue2common.h" +#include "util/compile_error.h" #include "util/make_unique.h" #include "util/multibit.h" #include "util/multibit_build.h" -#include "util/target_info.h" #include #include @@ -1303,9 +1303,11 @@ static const MultiBitTestParam multibitTests[] = { { 1U << 29, 24413 }, { 1U << 30, 50377 }, { 1U << 31, 104729 }, - - // { UINT32_MAX, 104729 }, // Very slow }; INSTANTIATE_TEST_CASE_P(MultiBit, MultiBitTest, ValuesIn(multibitTests)); +TEST(MultiBit, SizeTooBig) { + ASSERT_NO_THROW(mmbit_size(MMB_MAX_BITS)); + ASSERT_THROW(mmbit_size(MMB_MAX_BITS + 1), ResourceLimitError); +} From e993fe1eb0ebdc7e3eac452f59004252cb47990c Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Thu, 6 Oct 2016 13:51:14 +1100 Subject: [PATCH 076/103] Remove erroneous semicolon --- src/util/multibit.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/util/multibit.h b/src/util/multibit.h index ddc8bbdd..4df8733a 100644 --- a/src/util/multibit.h +++ b/src/util/multibit.h @@ -162,7 +162,7 @@ u32 mmb_popcount(MMB_TYPE val) { } #ifndef MMMB_DEBUG -#define MDEBUG_PRINTF(x, ...) do { } while(0); +#define MDEBUG_PRINTF(x, ...) do { } while(0) #else #define MDEBUG_PRINTF DEBUG_PRINTF #endif From 2214296b7f4302bf1f81d1e25c2b1f3fa250222f Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Thu, 6 Oct 2016 15:33:24 +1100 Subject: [PATCH 077/103] Convert compile-time code to not require SIMD --- CMakeLists.txt | 8 +- src/hwlm/hwlm_build.cpp | 5 +- src/nfa/accel_dfa_build_strat.cpp | 13 ++-- src/nfa/accel_dump.cpp | 88 +++++++++++++--------- src/nfa/accelcompile.cpp | 63 ++++++++-------- src/nfa/castle_dump.cpp | 6 +- src/nfa/castlecompile.cpp | 6 +- src/nfa/lbr_dump.cpp | 6 +- src/nfa/mcsheng_compile.cpp | 2 +- src/nfa/mcsheng_internal.h | 2 +- src/nfa/mpvcompile.cpp | 7 +- src/nfa/multishufti.h | 4 +- src/nfa/sheng_internal.h | 2 +- src/nfa/shengcompile.cpp | 5 +- src/nfa/shengdump.cpp | 6 +- src/nfa/shufticompile.cpp | 8 +- src/nfa/shufticompile.h | 6 +- src/nfa/trufflecompile.cpp | 24 +++--- src/nfa/trufflecompile.h | 6 +- src/nfagraph/ng_lbr.cpp | 4 +- src/rose/rose_build_bytecode.cpp | 2 +- src/util/simd_types.h | 4 + unit/CMakeLists.txt | 2 +- unit/internal/shufti.cpp | 117 +++++++++++++++++------------- unit/internal/truffle.cpp | 70 +++++++++--------- 25 files changed, 254 insertions(+), 212 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 98804923..0559932d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -210,10 +210,12 @@ else() if (NOT CMAKE_C_FLAGS MATCHES .*march.*) message(STATUS "Building for current host CPU") - set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -march=native -mtune=native") + set(ARCH_C_FLAGS "${ARCH_C_FLAGS} -march=native -mtune=native") endif() + + # we don't use these for the lib, but other tools/unit tests if (NOT CMAKE_CXX_FLAGS MATCHES .*march.*) - set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -march=native -mtune=native") + set(ARCH_CXX_FLAGS "${ARCH_CXX_FLAGS} -march=native -mtune=native") endif() if(CMAKE_COMPILER_IS_GNUCC) @@ -389,7 +391,7 @@ if (NOT WIN32) endif() # only set these after all tests are done -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}") +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARCH_C_FLAGS} ${EXTRA_C_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}") diff --git a/src/hwlm/hwlm_build.cpp b/src/hwlm/hwlm_build.cpp index 3ab10f23..fa6335c9 100644 --- a/src/hwlm/hwlm_build.cpp +++ b/src/hwlm/hwlm_build.cpp @@ -461,7 +461,8 @@ void findForwardAccelScheme(const vector &lits, } const CharReach &cr = reach[min_offset]; - if (shuftiBuildMasks(cr, &aux->shufti.lo, &aux->shufti.hi) != -1) { + if (-1 != + shuftiBuildMasks(cr, (u8 *)&aux->shufti.lo, (u8 *)&aux->shufti.hi)) { DEBUG_PRINTF("built shufti for %s (%zu chars, offset %u)\n", describeClass(cr).c_str(), cr.count(), min_offset); aux->shufti.accel_type = ACCEL_SHUFTI; @@ -469,7 +470,7 @@ void findForwardAccelScheme(const vector &lits, return; } - truffleBuildMasks(cr, &aux->truffle.mask1, &aux->truffle.mask2); + truffleBuildMasks(cr, (u8 *)&aux->truffle.mask1, (u8 *)&aux->truffle.mask2); DEBUG_PRINTF("built truffle for %s (%zu chars, offset %u)\n", describeClass(cr).c_str(), cr.count(), min_offset); aux->truffle.accel_type = ACCEL_TRUFFLE; diff --git a/src/nfa/accel_dfa_build_strat.cpp b/src/nfa/accel_dfa_build_strat.cpp index 70d2d103..d257b530 100644 --- a/src/nfa/accel_dfa_build_strat.cpp +++ b/src/nfa/accel_dfa_build_strat.cpp @@ -473,9 +473,10 @@ accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx, } if (double_byte_ok(info) && - shuftiBuildDoubleMasks(info.double_cr, info.double_byte, - &accel->dshufti.lo1, &accel->dshufti.hi1, - &accel->dshufti.lo2, &accel->dshufti.hi2)) { + shuftiBuildDoubleMasks( + info.double_cr, info.double_byte, (u8 *)&accel->dshufti.lo1, + (u8 *)&accel->dshufti.hi1, (u8 *)&accel->dshufti.lo2, + (u8 *)&accel->dshufti.hi2)) { accel->accel_type = ACCEL_DSHUFTI; accel->dshufti.offset = verify_u8(info.double_offset); DEBUG_PRINTF("state %hu is double shufti\n", this_idx); @@ -511,14 +512,16 @@ accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx, } accel->accel_type = ACCEL_SHUFTI; - if (-1 != shuftiBuildMasks(info.cr, &accel->shufti.lo, &accel->shufti.hi)) { + if (-1 != shuftiBuildMasks(info.cr, (u8 *)&accel->shufti.lo, + (u8 *)&accel->shufti.hi)) { DEBUG_PRINTF("state %hu is shufti\n", this_idx); return; } assert(!info.cr.none()); accel->accel_type = ACCEL_TRUFFLE; - truffleBuildMasks(info.cr, &accel->truffle.mask1, &accel->truffle.mask2); + truffleBuildMasks(info.cr, (u8 *)&accel->truffle.mask1, + (u8 *)&accel->truffle.mask2); DEBUG_PRINTF("state %hu is truffle\n", this_idx); } diff --git a/src/nfa/accel_dump.cpp b/src/nfa/accel_dump.cpp index 6e2b8f41..e99e71a5 100644 --- a/src/nfa/accel_dump.cpp +++ b/src/nfa/accel_dump.cpp @@ -41,7 +41,7 @@ #include "util/charreach.h" #include "util/dump_charclass.h" #include "util/dump_mask.h" -#include "util/simd_utils.h" +#include "util/simd_types.h" #include #include @@ -147,16 +147,20 @@ const char *accelName(u8 accel_type) { } static -void dumpShuftiCharReach(FILE *f, const m128 &lo, const m128 &hi) { +void dumpShuftiCharReach(FILE *f, const u8 *lo, const u8 *hi) { CharReach cr = shufti2cr(lo, hi); fprintf(f, "count %zu class %s\n", cr.count(), describeClass(cr).c_str()); } static -vector shufti2cr_array(const m128 lo_in, const m128 hi_in) { - const u8 *lo = (const u8 *)&lo_in; - const u8 *hi = (const u8 *)&hi_in; +vector dshufti2cr_array(const u8 *lo_in, const u8 *hi_in) { + u8 lo[16]; + u8 hi[16]; + for (u32 i = 0; i < 16; i++) { + lo[i] = ~lo_in[i]; + hi[i] = ~hi_in[i]; + } vector crs(8); for (u32 i = 0; i < 256; i++) { u32 combined = lo[(u8)i & 0xf] & hi[(u8)i >> 4]; @@ -169,10 +173,10 @@ vector shufti2cr_array(const m128 lo_in, const m128 hi_in) { } static -void dumpDShuftiCharReach(FILE *f, const m128 &lo1, const m128 &hi1, - const m128 &lo2, const m128 &hi2) { - vector cr1 = shufti2cr_array(not128(lo1), not128(hi1)); - vector cr2 = shufti2cr_array(not128(lo2), not128(hi2)); +void dumpDShuftiCharReach(FILE *f, const u8 *lo1, const u8 *hi1, + const u8 *lo2, const u8 *hi2) { + vector cr1 = dshufti2cr_array(lo1, hi1); + vector cr2 = dshufti2cr_array(lo2, hi2); map > cr1_group; assert(cr1.size() == 8 && cr2.size() == 8); for (u32 i = 0; i < 8; i++) { @@ -208,26 +212,22 @@ void dumpDShuftiCharReach(FILE *f, const m128 &lo1, const m128 &hi1, } static -void dumpShuftiMasks(FILE *f, const m128 &lo, const m128 &hi) { - fprintf(f, "lo %s\n", - dumpMask((const u8 *)&lo, 128).c_str()); - fprintf(f, "hi %s\n", - dumpMask((const u8 *)&hi, 128).c_str()); +void dumpShuftiMasks(FILE *f, const u8 *lo, const u8 *hi) { + fprintf(f, "lo %s\n", dumpMask(lo, 128).c_str()); + fprintf(f, "hi %s\n", dumpMask(hi, 128).c_str()); } static -void dumpTruffleCharReach(FILE *f, const m128 &hiset, const m128 &hiclear) { +void dumpTruffleCharReach(FILE *f, const u8 *hiset, const u8 *hiclear) { CharReach cr = truffle2cr(hiset, hiclear); fprintf(f, "count %zu class %s\n", cr.count(), describeClass(cr).c_str()); } static -void dumpTruffleMasks(FILE *f, const m128 &hiset, const m128 &hiclear) { - fprintf(f, "lo %s\n", - dumpMask((const u8 *)&hiset, 128).c_str()); - fprintf(f, "hi %s\n", - dumpMask((const u8 *)&hiclear, 128).c_str()); +void dumpTruffleMasks(FILE *f, const u8 *hiset, const u8 *hiclear) { + fprintf(f, "lo %s\n", dumpMask(hiset, 128).c_str()); + fprintf(f, "hi %s\n", dumpMask(hiclear, 128).c_str()); } @@ -256,23 +256,31 @@ void dumpAccelInfo(FILE *f, const AccelAux &accel) { break; case ACCEL_SHUFTI: { fprintf(f, "\n"); - dumpShuftiMasks(f, accel.shufti.lo, accel.shufti.hi); - dumpShuftiCharReach(f, accel.shufti.lo, accel.shufti.hi); + dumpShuftiMasks(f, (const u8 *)&accel.shufti.lo, + (const u8 *)&accel.shufti.hi); + dumpShuftiCharReach(f, (const u8 *)&accel.shufti.lo, + (const u8 *)&accel.shufti.hi); break; } case ACCEL_DSHUFTI: fprintf(f, "\n"); fprintf(f, "mask 1\n"); - dumpShuftiMasks(f, accel.dshufti.lo1, accel.dshufti.hi1); + dumpShuftiMasks(f, (const u8 *)&accel.dshufti.lo1, + (const u8 *)&accel.dshufti.hi1); fprintf(f, "mask 2\n"); - dumpShuftiMasks(f, accel.dshufti.lo2, accel.dshufti.hi2); - dumpDShuftiCharReach(f, accel.dshufti.lo1, accel.dshufti.hi1, - accel.dshufti.lo2, accel.dshufti.hi2); + dumpShuftiMasks(f, (const u8 *)&accel.dshufti.lo2, + (const u8 *)&accel.dshufti.hi2); + dumpDShuftiCharReach(f, (const u8 *)&accel.dshufti.lo1, + (const u8 *)&accel.dshufti.hi1, + (const u8 *)&accel.dshufti.lo2, + (const u8 *)&accel.dshufti.hi2); break; case ACCEL_TRUFFLE: { fprintf(f, "\n"); - dumpTruffleMasks(f, accel.truffle.mask1, accel.truffle.mask2); - dumpTruffleCharReach(f, accel.truffle.mask1, accel.truffle.mask2); + dumpTruffleMasks(f, (const u8 *)&accel.truffle.mask1, + (const u8 *)&accel.truffle.mask2); + dumpTruffleCharReach(f, (const u8 *)&accel.truffle.mask1, + (const u8 *)&accel.truffle.mask2); break; } case ACCEL_MLVERM: @@ -297,28 +305,36 @@ void dumpAccelInfo(FILE *f, const AccelAux &accel) { case ACCEL_MSSHUFTI: case ACCEL_MSGSHUFTI: fprintf(f, " len:%u\n", accel.mshufti.len); - dumpShuftiMasks(f, accel.mshufti.lo, accel.mshufti.hi); - dumpShuftiCharReach(f, accel.mshufti.lo, accel.mshufti.hi); + dumpShuftiMasks(f, (const u8 *)&accel.mshufti.lo, + (const u8 *)&accel.mshufti.hi); + dumpShuftiCharReach(f, (const u8 *)&accel.mshufti.lo, + (const u8 *)&accel.mshufti.hi); break; case ACCEL_MDSSHUFTI: case ACCEL_MDSGSHUFTI: fprintf(f, " len1:%u len2:%u\n", accel.mdshufti.len1, accel.mdshufti.len2); - dumpShuftiMasks(f, accel.mdshufti.lo, accel.mdshufti.hi); - dumpShuftiCharReach(f, accel.mdshufti.lo, accel.mdshufti.hi); + dumpShuftiMasks(f, (const u8 *)&accel.mdshufti.lo, + (const u8 *)&accel.mdshufti.hi); + dumpShuftiCharReach(f, (const u8 *)&accel.mdshufti.lo, + (const u8 *)&accel.mdshufti.hi); break; case ACCEL_MLTRUFFLE: case ACCEL_MLGTRUFFLE: case ACCEL_MSTRUFFLE: case ACCEL_MSGTRUFFLE: fprintf(f, " len:%u\n", accel.mtruffle.len); - dumpTruffleMasks(f, accel.mtruffle.mask1, accel.mtruffle.mask2); - dumpTruffleCharReach(f, accel.mtruffle.mask1, accel.mtruffle.mask2); + dumpTruffleMasks(f, (const u8 *)&accel.mtruffle.mask1, + (const u8 *)&accel.mtruffle.mask2); + dumpTruffleCharReach(f, (const u8 *)&accel.mtruffle.mask1, + (const u8 *)&accel.mtruffle.mask2); break; case ACCEL_MDSTRUFFLE: case ACCEL_MDSGTRUFFLE: fprintf(f, " len1:%u len2:%u\n", accel.mdtruffle.len1, accel.mdtruffle.len2); - dumpTruffleMasks(f, accel.mdtruffle.mask1, accel.mdtruffle.mask2); - dumpTruffleCharReach(f, accel.mdtruffle.mask1, accel.mdtruffle.mask2); + dumpTruffleMasks(f, (const u8 *)&accel.mdtruffle.mask1, + (const u8 *)&accel.mdtruffle.mask2); + dumpTruffleCharReach(f, (const u8 *)&accel.mdtruffle.mask1, + (const u8 *)&accel.mdtruffle.mask2); break; default: fprintf(f, "\n"); diff --git a/src/nfa/accelcompile.cpp b/src/nfa/accelcompile.cpp index 75960dda..32e569ba 100644 --- a/src/nfa/accelcompile.cpp +++ b/src/nfa/accelcompile.cpp @@ -72,8 +72,8 @@ void buildAccelSingle(const AccelInfo &info, AccelAux *aux) { } DEBUG_PRINTF("attempting shufti for %zu chars\n", outs); - if (-1 != shuftiBuildMasks(info.single_stops, &aux->shufti.lo, - &aux->shufti.hi)) { + if (-1 != shuftiBuildMasks(info.single_stops, (u8 *)&aux->shufti.lo, + (u8 *)&aux->shufti.hi)) { aux->accel_type = ACCEL_SHUFTI; aux->shufti.offset = offset; DEBUG_PRINTF("shufti built OK\n"); @@ -86,8 +86,8 @@ void buildAccelSingle(const AccelInfo &info, AccelAux *aux) { DEBUG_PRINTF("building Truffle for %zu chars\n", outs); aux->accel_type = ACCEL_TRUFFLE; aux->truffle.offset = offset; - truffleBuildMasks(info.single_stops, &aux->truffle.mask1, - &aux->truffle.mask2); + truffleBuildMasks(info.single_stops, (u8 *)&aux->truffle.mask1, + (u8 *)&aux->truffle.mask2); return; } @@ -212,9 +212,10 @@ void buildAccelDouble(const AccelInfo &info, AccelAux *aux) { " two-byte literals\n", outs1, outs2); aux->accel_type = ACCEL_DSHUFTI; aux->dshufti.offset = offset; - if (shuftiBuildDoubleMasks(info.double_stop1, info.double_stop2, - &aux->dshufti.lo1, &aux->dshufti.hi1, - &aux->dshufti.lo2, &aux->dshufti.hi2)) { + if (shuftiBuildDoubleMasks( + info.double_stop1, info.double_stop2, (u8 *)&aux->dshufti.lo1, + (u8 *)&aux->dshufti.hi1, (u8 *)&aux->dshufti.lo2, + (u8 *)&aux->dshufti.hi2)) { return; } } @@ -372,8 +373,8 @@ void buildAccelMulti(const AccelInfo &info, AccelAux *aux) { switch (info.ma_type) { case MultibyteAccelInfo::MAT_LONG: - if (shuftiBuildMasks(stops, &aux->mshufti.lo, - &aux->mshufti.hi) == -1) { + if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo, + (u8 *)&aux->mshufti.hi) == -1) { break; } aux->accel_type = ACCEL_MLSHUFTI; @@ -381,8 +382,8 @@ void buildAccelMulti(const AccelInfo &info, AccelAux *aux) { aux->mshufti.len = info.ma_len1; return; case MultibyteAccelInfo::MAT_LONGGRAB: - if (shuftiBuildMasks(stops, &aux->mshufti.lo, - &aux->mshufti.hi) == -1) { + if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo, + (u8 *)&aux->mshufti.hi) == -1) { break; } aux->accel_type = ACCEL_MLGSHUFTI; @@ -390,8 +391,8 @@ void buildAccelMulti(const AccelInfo &info, AccelAux *aux) { aux->mshufti.len = info.ma_len1; return; case MultibyteAccelInfo::MAT_SHIFT: - if (shuftiBuildMasks(stops, &aux->mshufti.lo, - &aux->mshufti.hi) == -1) { + if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo, + (u8 *)&aux->mshufti.hi) == -1) { break; } aux->accel_type = ACCEL_MSSHUFTI; @@ -399,8 +400,8 @@ void buildAccelMulti(const AccelInfo &info, AccelAux *aux) { aux->mshufti.len = info.ma_len1; return; case MultibyteAccelInfo::MAT_SHIFTGRAB: - if (shuftiBuildMasks(stops, &aux->mshufti.lo, - &aux->mshufti.hi) == -1) { + if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo, + (u8 *)&aux->mshufti.hi) == -1) { break; } aux->accel_type = ACCEL_MSGSHUFTI; @@ -408,8 +409,8 @@ void buildAccelMulti(const AccelInfo &info, AccelAux *aux) { aux->mshufti.len = info.ma_len1; return; case MultibyteAccelInfo::MAT_DSHIFT: - if (shuftiBuildMasks(stops, &aux->mdshufti.lo, - &aux->mdshufti.hi) == -1) { + if (shuftiBuildMasks(stops, (u8 *)&aux->mdshufti.lo, + (u8 *)&aux->mdshufti.hi) == -1) { break; } aux->accel_type = ACCEL_MDSSHUFTI; @@ -418,8 +419,8 @@ void buildAccelMulti(const AccelInfo &info, AccelAux *aux) { aux->mdshufti.len2 = info.ma_len2; return; case MultibyteAccelInfo::MAT_DSHIFTGRAB: - if (shuftiBuildMasks(stops, &aux->mdshufti.lo, - &aux->mdshufti.hi) == -1) { + if (shuftiBuildMasks(stops, (u8 *)&aux->mdshufti.lo, + (u8 *)&aux->mdshufti.hi) == -1) { break; } aux->accel_type = ACCEL_MDSGSHUFTI; @@ -441,45 +442,45 @@ void buildAccelMulti(const AccelInfo &info, AccelAux *aux) { aux->accel_type = ACCEL_MLTRUFFLE; aux->mtruffle.offset = offset; aux->mtruffle.len = info.ma_len1; - truffleBuildMasks(stops, &aux->mtruffle.mask1, - &aux->mtruffle.mask2); + truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1, + (u8 *)&aux->mtruffle.mask2); break; case MultibyteAccelInfo::MAT_LONGGRAB: aux->accel_type = ACCEL_MLGTRUFFLE; aux->mtruffle.offset = offset; aux->mtruffle.len = info.ma_len1; - truffleBuildMasks(stops, &aux->mtruffle.mask1, - &aux->mtruffle.mask2); + truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1, + (u8 *)&aux->mtruffle.mask2); break; case MultibyteAccelInfo::MAT_SHIFT: aux->accel_type = ACCEL_MSTRUFFLE; aux->mtruffle.offset = offset; aux->mtruffle.len = info.ma_len1; - truffleBuildMasks(stops, &aux->mtruffle.mask1, - &aux->mtruffle.mask2); + truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1, + (u8 *)&aux->mtruffle.mask2); break; case MultibyteAccelInfo::MAT_SHIFTGRAB: aux->accel_type = ACCEL_MSGTRUFFLE; aux->mtruffle.offset = offset; aux->mtruffle.len = info.ma_len1; - truffleBuildMasks(stops, &aux->mtruffle.mask1, - &aux->mtruffle.mask2); + truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1, + (u8 *)&aux->mtruffle.mask2); break; case MultibyteAccelInfo::MAT_DSHIFT: aux->accel_type = ACCEL_MDSTRUFFLE; aux->mdtruffle.offset = offset; aux->mdtruffle.len1 = info.ma_len1; aux->mdtruffle.len2 = info.ma_len2; - truffleBuildMasks(stops, &aux->mtruffle.mask1, - &aux->mdtruffle.mask2); + truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1, + (u8 *)&aux->mdtruffle.mask2); break; case MultibyteAccelInfo::MAT_DSHIFTGRAB: aux->accel_type = ACCEL_MDSGTRUFFLE; aux->mdtruffle.offset = offset; aux->mdtruffle.len1 = info.ma_len1; aux->mdtruffle.len2 = info.ma_len2; - truffleBuildMasks(stops, &aux->mtruffle.mask1, - &aux->mdtruffle.mask2); + truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1, + (u8 *)&aux->mdtruffle.mask2); break; default: // shouldn't happen diff --git a/src/nfa/castle_dump.cpp b/src/nfa/castle_dump.cpp index 9426b6db..1514ca8c 100644 --- a/src/nfa/castle_dump.cpp +++ b/src/nfa/castle_dump.cpp @@ -88,13 +88,15 @@ void nfaExecCastle_dump(const struct NFA *nfa, const string &base) { fprintf(f, "negated verm, scanning for 0x%02x\n", c->u.verm.c); break; case CASTLE_SHUFTI: { - const CharReach cr = shufti2cr(c->u.shuf.mask_lo, c->u.shuf.mask_hi); + const CharReach cr = shufti2cr((const u8 *)&c->u.shuf.mask_lo, + (const u8 *)&c->u.shuf.mask_hi); fprintf(f, "shufti, scanning for %s (%zu chars)\n", describeClass(cr).c_str(), cr.count()); break; } case CASTLE_TRUFFLE: { - const CharReach cr = truffle2cr(c->u.truffle.mask1, c->u.truffle.mask2); + const CharReach cr = truffle2cr((const u8 *)&c->u.truffle.mask1, + (const u8 *)&c->u.truffle.mask2); fprintf(f, "truffle, scanning for %s (%zu chars)\n", describeClass(cr).c_str(), cr.count()); break; diff --git a/src/nfa/castlecompile.cpp b/src/nfa/castlecompile.cpp index 4f3bcf2e..3b40ab9a 100644 --- a/src/nfa/castlecompile.cpp +++ b/src/nfa/castlecompile.cpp @@ -100,13 +100,15 @@ void writeCastleScanEngine(const CharReach &cr, Castle *c) { return; } - if (shuftiBuildMasks(negated, &c->u.shuf.mask_lo, &c->u.shuf.mask_hi) != -1) { + if (shuftiBuildMasks(negated, (u8 *)&c->u.shuf.mask_lo, + (u8 *)&c->u.shuf.mask_hi) != -1) { c->type = CASTLE_SHUFTI; return; } c->type = CASTLE_TRUFFLE; - truffleBuildMasks(negated, &c->u.truffle.mask1, &c->u.truffle.mask2); + truffleBuildMasks(negated, (u8 *)(u8 *)&c->u.truffle.mask1, + (u8 *)&c->u.truffle.mask2); } static diff --git a/src/nfa/lbr_dump.cpp b/src/nfa/lbr_dump.cpp index 92cf7e03..0948e122 100644 --- a/src/nfa/lbr_dump.cpp +++ b/src/nfa/lbr_dump.cpp @@ -116,7 +116,8 @@ void nfaExecLbrShuf_dump(const NFA *nfa, const string &base) { const lbr_shuf *ls = (const lbr_shuf *)getImplNfa(nfa); lbrDumpCommon(&ls->common, f); - CharReach cr = shufti2cr(ls->mask_lo, ls->mask_hi); + CharReach cr = shufti2cr((const u8 *)&ls->mask_lo, + (const u8 *)&ls->mask_hi); fprintf(f, "SHUF model, scanning for: %s (%zu chars)\n", describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count()); fprintf(f, "\n"); @@ -133,7 +134,8 @@ void nfaExecLbrTruf_dump(const NFA *nfa, const string &base) { const lbr_truf *lt = (const lbr_truf *)getImplNfa(nfa); lbrDumpCommon(<->common, f); - CharReach cr = truffle2cr(lt->mask1, lt->mask2); + CharReach cr = truffle2cr((const u8 *)<->mask1, + (const u8 *)<->mask2); fprintf(f, "TRUFFLE model, scanning for: %s (%zu chars)\n", describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count()); fprintf(f, "\n"); diff --git a/src/nfa/mcsheng_compile.cpp b/src/nfa/mcsheng_compile.cpp index bc12cc5c..b7570af4 100644 --- a/src/nfa/mcsheng_compile.cpp +++ b/src/nfa/mcsheng_compile.cpp @@ -193,7 +193,7 @@ void createShuffleMasks(mcsheng *m, const dfa_info &info, } for (u32 i = 0; i < N_CHARS; i++) { assert(info.alpha_remap[i] != info.alpha_remap[TOP]); - m->sheng_masks[i] = loadu128(masks[info.alpha_remap[i]].data()); + memcpy((u8*)&m->sheng_masks[i], (u8*)masks[info.alpha_remap[i]].data(), sizeof(m128)); } m->sheng_end = sheng_end; m->sheng_accel_limit = sheng_end - 1; diff --git a/src/nfa/mcsheng_internal.h b/src/nfa/mcsheng_internal.h index 5ced6f76..81a658e0 100644 --- a/src/nfa/mcsheng_internal.h +++ b/src/nfa/mcsheng_internal.h @@ -31,7 +31,7 @@ #include "nfa_internal.h" #include "ue2common.h" -#include "util/simd_utils.h" +#include "util/simd_types.h" #define ACCEPT_FLAG 0x8000 #define ACCEL_FLAG 0x4000 diff --git a/src/nfa/mpvcompile.cpp b/src/nfa/mpvcompile.cpp index 908267be..87fb462e 100644 --- a/src/nfa/mpvcompile.cpp +++ b/src/nfa/mpvcompile.cpp @@ -175,12 +175,13 @@ void writeKiloPuff(const map>::const_iterator &it, size_t set = reach.find_first(); assert(set != CharReach::npos); kp->u.verm.c = (char)set; - } else if (shuftiBuildMasks(~reach, &kp->u.shuf.mask_lo, - &kp->u.shuf.mask_hi) != -1) { + } else if (shuftiBuildMasks(~reach, (u8 *)&kp->u.shuf.mask_lo, + (u8 *)&kp->u.shuf.mask_hi) != -1) { kp->type = MPV_SHUFTI; } else { kp->type = MPV_TRUFFLE; - truffleBuildMasks(~reach, &kp->u.truffle.mask1, &kp->u.truffle.mask2); + truffleBuildMasks(~reach, (u8 *)&kp->u.truffle.mask1, + (u8 *)&kp->u.truffle.mask2); } kp->count = verify_u32(puffs.size()); diff --git a/src/nfa/multishufti.h b/src/nfa/multishufti.h index bcccf607..af578483 100644 --- a/src/nfa/multishufti.h +++ b/src/nfa/multishufti.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,7 +36,7 @@ #define MULTISHUFTI_H #include "ue2common.h" -#include "util/simd_utils.h" +#include "util/simd_types.h" #ifdef __cplusplus extern "C" diff --git a/src/nfa/sheng_internal.h b/src/nfa/sheng_internal.h index 046eb759..ff843ebe 100644 --- a/src/nfa/sheng_internal.h +++ b/src/nfa/sheng_internal.h @@ -30,7 +30,7 @@ #define SHENG_INTERNAL_H_ #include "ue2common.h" -#include "util/simd_utils.h" +#include "util/simd_types.h" #define SHENG_STATE_ACCEPT 0x10 #define SHENG_STATE_DEAD 0x20 diff --git a/src/nfa/shengcompile.cpp b/src/nfa/shengcompile.cpp index a02a9b96..53f2c131 100644 --- a/src/nfa/shengcompile.cpp +++ b/src/nfa/shengcompile.cpp @@ -48,7 +48,7 @@ #include "util/compile_context.h" #include "util/make_unique.h" #include "util/verify_types.h" -#include "util/simd_utils.h" +#include "util/simd_types.h" #include #include @@ -442,8 +442,7 @@ void createShuffleMasks(sheng *s, dfa_info &info, #ifdef DEBUG dumpShuffleMask(chr, buf, sizeof(buf)); #endif - m128 mask = loadu128(buf); - s->shuffle_masks[chr] = mask; + memcpy(&s->shuffle_masks[chr], buf, sizeof(m128)); } } diff --git a/src/nfa/shengdump.cpp b/src/nfa/shengdump.cpp index c2371601..ce87beaf 100644 --- a/src/nfa/shengdump.cpp +++ b/src/nfa/shengdump.cpp @@ -39,7 +39,7 @@ #include "util/charreach.h" #include "util/dump_charclass.h" #include "util/dump_util.h" -#include "util/simd_utils.h" +#include "util/simd_types.h" #ifndef DUMP_SUPPORT @@ -101,7 +101,7 @@ void dumpMasks(FILE *f, const sheng *s) { for (u32 chr = 0; chr < 256; chr++) { u8 buf[16]; m128 shuffle_mask = s->shuffle_masks[chr]; - store128(buf, shuffle_mask); + memcpy(buf, &shuffle_mask, sizeof(m128)); fprintf(f, "%3u: ", chr); for (u32 pos = 0; pos < 16; pos++) { @@ -237,7 +237,7 @@ void shengGetTransitions(const NFA *n, u16 state, u16 *t) { u8 buf[16]; m128 shuffle_mask = s->shuffle_masks[i]; - store128(buf, shuffle_mask); + memcpy(buf, &shuffle_mask, sizeof(m128)); t[i] = buf[state] & SHENG_STATE_MASK; } diff --git a/src/nfa/shufticompile.cpp b/src/nfa/shufticompile.cpp index 217fcee0..12a94b7b 100644 --- a/src/nfa/shufticompile.cpp +++ b/src/nfa/shufticompile.cpp @@ -51,7 +51,7 @@ namespace ue2 { * * Note: always able to construct masks for 8 or fewer characters. */ -int shuftiBuildMasks(const CharReach &c, m128 *lo, m128 *hi) { +int shuftiBuildMasks(const CharReach &c, u8 *lo, u8 *hi) { /* Things could be packed much more optimally, but this should be able to * handle any set of characters entirely in the lower half. */ @@ -134,7 +134,7 @@ void set_buckets_from_mask(u16 nibble_mask, u32 bucket, bool shuftiBuildDoubleMasks(const CharReach &onechar, const flat_set> &twochar, - m128 *lo1, m128 *hi1, m128 *lo2, m128 *hi2) { + u8 *lo1, u8 *hi1, u8 *lo2, u8 *hi2) { DEBUG_PRINTF("unibytes %zu dibytes %zu\n", onechar.size(), twochar.size()); array lo1_a; @@ -210,9 +210,7 @@ bool shuftiBuildDoubleMasks(const CharReach &onechar, #ifdef DUMP_SUPPORT -CharReach shufti2cr(const m128 lo_in, const m128 hi_in) { - const u8 *lo = (const u8 *)&lo_in; - const u8 *hi = (const u8 *)&hi_in; +CharReach shufti2cr(const u8 *lo, const u8 *hi) { CharReach cr; for (u32 i = 0; i < 256; i++) { if (lo[(u8)i & 0xf] & hi[(u8)i >> 4]) { diff --git a/src/nfa/shufticompile.h b/src/nfa/shufticompile.h index 59126b0b..a72904e0 100644 --- a/src/nfa/shufticompile.h +++ b/src/nfa/shufticompile.h @@ -48,7 +48,7 @@ namespace ue2 { * * Note: always able to construct masks for 8 or fewer characters. */ -int shuftiBuildMasks(const CharReach &chars, m128 *lo, m128 *hi); +int shuftiBuildMasks(const CharReach &chars, u8 *lo, u8 *hi); /** \brief Double-byte variant * @@ -56,7 +56,7 @@ int shuftiBuildMasks(const CharReach &chars, m128 *lo, m128 *hi); */ bool shuftiBuildDoubleMasks(const CharReach &onechar, const flat_set> &twochar, - m128 *lo1, m128 *hi1, m128 *lo2, m128 *hi2); + u8 *lo1, u8 *hi1, u8 *lo2, u8 *hi2); #ifdef DUMP_SUPPORT @@ -64,7 +64,7 @@ bool shuftiBuildDoubleMasks(const CharReach &onechar, * \brief Dump code: returns a CharReach with the reach that would match this * shufti. */ -CharReach shufti2cr(const m128 lo, const m128 hi); +CharReach shufti2cr(const u8 *lo, const u8 *hi); #endif // DUMP_SUPPORT diff --git a/src/nfa/trufflecompile.cpp b/src/nfa/trufflecompile.cpp index 6bde7abb..9442d046 100644 --- a/src/nfa/trufflecompile.cpp +++ b/src/nfa/trufflecompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,7 +36,7 @@ #include "ue2common.h" #include "util/charreach.h" #include "util/simd_types.h" -#include "util/simd_utils.h" + #include "util/dump_mask.h" using namespace std; @@ -53,17 +53,15 @@ namespace ue2 { * bits 456 is the bit that is set at that offset. */ -void truffleBuildMasks(const CharReach &cr, m128 *shuf_mask_lo_highclear, - m128 *shuf_mask_lo_highset) { - *shuf_mask_lo_highset = zeroes128(); - *shuf_mask_lo_highclear = zeroes128(); - u8 *lo_highset = (u8 *)shuf_mask_lo_highset; - u8 *lo_highclear = (u8 *)shuf_mask_lo_highclear; +void truffleBuildMasks(const CharReach &cr, u8 *shuf_mask_lo_highclear, + u8 *shuf_mask_lo_highset) { + memset(shuf_mask_lo_highset, 0, sizeof(m128)); + memset(shuf_mask_lo_highclear, 0, sizeof(m128)); for (size_t v = cr.find_first(); v != CharReach::npos; v = cr.find_next(v)) { DEBUG_PRINTF("adding 0x%02x to %s\n", (u8)v, (v & 0x80) ? "highset" : "highclear"); - u8 *change_mask = (v & 0x80) ? lo_highset : lo_highclear; + u8 *change_mask = (v & 0x80) ? shuf_mask_lo_highset : shuf_mask_lo_highclear; u8 low_nibble = v & 0xf; u8 bits_456 = (v & 0x70) >> 4; change_mask[low_nibble] |= 1 << bits_456; @@ -73,18 +71,16 @@ void truffleBuildMasks(const CharReach &cr, m128 *shuf_mask_lo_highclear, /* * Reconstruct the charclass that the truffle masks represent */ -CharReach truffle2cr(const m128 highclear, const m128 highset) { - const u8 *lo = (const u8 *)&highclear; - const u8 *hi = (const u8 *)&highset; +CharReach truffle2cr(const u8 *highclear, const u8 *highset) { CharReach cr; for (u8 i = 0; i < 16; i++) { - u32 bits_456 = lo[i]; + u32 bits_456 = highclear[i]; while (bits_456) { u32 pos = findAndClearLSB_32(&bits_456); assert(pos < 8); cr.set(pos << 4 | i); } - bits_456 = hi[i]; + bits_456 = highset[i]; while (bits_456) { u32 pos = findAndClearLSB_32(&bits_456); assert(pos < 8); diff --git a/src/nfa/trufflecompile.h b/src/nfa/trufflecompile.h index 19d3eb54..14b314f3 100644 --- a/src/nfa/trufflecompile.h +++ b/src/nfa/trufflecompile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,8 +34,8 @@ namespace ue2 { -void truffleBuildMasks(const CharReach &cr, m128 *mask1, m128 *mask2); -CharReach truffle2cr(const m128 lo_in, const m128 hi_in); +void truffleBuildMasks(const CharReach &cr, u8 *mask1, u8 *mask2); +CharReach truffle2cr(const u8 *lo_in, const u8 *hi_in); } diff --git a/src/nfagraph/ng_lbr.cpp b/src/nfagraph/ng_lbr.cpp index d68c7681..d832bdaa 100644 --- a/src/nfagraph/ng_lbr.cpp +++ b/src/nfagraph/ng_lbr.cpp @@ -224,7 +224,7 @@ aligned_unique_ptr buildLbrShuf(const CharReach &cr, fillNfa(nfa.get(), &ls->common, report, repeatMin, repeatMax, minPeriod, rtype); - if (shuftiBuildMasks(~cr, &ls->mask_lo, &ls->mask_hi) == -1) { + if (shuftiBuildMasks(~cr, (u8 *)&ls->mask_lo, (u8 *)&ls->mask_hi) == -1) { return nullptr; } @@ -245,7 +245,7 @@ aligned_unique_ptr buildLbrTruf(const CharReach &cr, fillNfa(nfa.get(), &lc->common, report, repeatMin, repeatMax, minPeriod, rtype); - truffleBuildMasks(~cr, &lc->mask1, &lc->mask2); + truffleBuildMasks(~cr, (u8 *)&lc->mask1, (u8 *)&lc->mask2); DEBUG_PRINTF("built truffle lbr\n"); return nfa; diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 3d89f87a..9f4abcad 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -2010,7 +2010,7 @@ void buildCountingMiracles(build_context &bc) { rcm.c = cr.find_first(); } else { rcm.shufti = 1; - int rv = shuftiBuildMasks(cr, &rcm.lo, &rcm.hi); + int rv = shuftiBuildMasks(cr, (u8 *)&rcm.lo, (u8 *)&rcm.hi); if (rv == -1) { DEBUG_PRINTF("failed to build shufti\n"); lbi.countingMiracleCount = 0; /* remove counting miracle */ diff --git a/src/util/simd_types.h b/src/util/simd_types.h index e4541411..35f27e66 100644 --- a/src/util/simd_types.h +++ b/src/util/simd_types.h @@ -62,6 +62,10 @@ #endif typedef __m128i m128; +#else +typedef struct ALIGN_DIRECTIVE {u64a hi; u64a lo;} m128; +#endif + #if defined(__AVX2__) typedef __m256i m256; #else diff --git a/unit/CMakeLists.txt b/unit/CMakeLists.txt index 17818cac..77f3ac3b 100644 --- a/unit/CMakeLists.txt +++ b/unit/CMakeLists.txt @@ -1,5 +1,5 @@ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_CXX_FLAGS} ${EXTRA_CXX_FLAGS}") set(gtest_SOURCES gtest/gtest-all.cc gtest/gtest.h) if(NOT XCODE) diff --git a/unit/internal/shufti.cpp b/unit/internal/shufti.cpp index 67ceadc5..06407c41 100644 --- a/unit/internal/shufti.cpp +++ b/unit/internal/shufti.cpp @@ -47,7 +47,7 @@ TEST(Shufti, BuildMask1) { chars.set('a'); - int ret = shuftiBuildMasks(chars, &lomask, &himask); + int ret = shuftiBuildMasks(chars, (u8 *)&lomask, (u8 *)&himask); ASSERT_NE(-1, ret); u8 *lo = (u8 *)&lomask; @@ -75,7 +75,7 @@ TEST(Shufti, BuildMask2) { chars.set('a'); chars.set('B'); - int ret = shuftiBuildMasks(chars, &lomask, &himask); + int ret = shuftiBuildMasks(chars, (u8 *)&lomask, (u8 *)&himask); ASSERT_NE(-1, ret); u8 *lo = (u8 *)&lomask; @@ -96,7 +96,7 @@ TEST(Shufti, BuildMask4) { chars.set('A'); chars.set('b'); - int ret = shuftiBuildMasks(chars, &lomask, &himask); + int ret = shuftiBuildMasks(chars, (u8 *)&lomask, (u8 *)&himask); ASSERT_NE(-1, ret); u8 *lo = (u8 *)&lomask; @@ -113,7 +113,7 @@ TEST(Shufti, ExecNoMatch1) { CharReach chars; chars.set('a'); - int ret = shuftiBuildMasks(chars, &lo, &hi); + int ret = shuftiBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); ASSERT_NE(-1, ret); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -132,7 +132,7 @@ TEST(Shufti, ExecNoMatch2) { chars.set('a'); chars.set('B'); - int ret = shuftiBuildMasks(chars, &lo, &hi); + int ret = shuftiBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); ASSERT_NE(-1, ret); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -150,7 +150,7 @@ TEST(Shufti, ExecNoMatch3) { CharReach chars; chars.set('V'); /* V = 0x56, e = 0x65 */ - int ret = shuftiBuildMasks(chars, &lo, &hi); + int ret = shuftiBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); ASSERT_NE(-1, ret); char t1[] = "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"; @@ -168,7 +168,7 @@ TEST(Shufti, ExecMatch1) { CharReach chars; chars.set('a'); - int ret = shuftiBuildMasks(chars, &lo, &hi); + int ret = shuftiBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); ASSERT_NE(-1, ret); /* 0123456789012345678901234567890 */ @@ -187,7 +187,7 @@ TEST(Shufti, ExecMatch2) { CharReach chars; chars.set('a'); - int ret = shuftiBuildMasks(chars, &lo, &hi); + int ret = shuftiBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); ASSERT_NE(-1, ret); /* 0123456789012345678901234567890 */ @@ -207,7 +207,7 @@ TEST(Shufti, ExecMatch3) { chars.set('a'); chars.set('B'); - int ret = shuftiBuildMasks(chars, &lo, &hi); + int ret = shuftiBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); ASSERT_NE(-1, ret); /* 0123456789012345678901234567890 */ @@ -229,7 +229,7 @@ TEST(Shufti, ExecMatch4) { chars.set('A'); chars.set('c'); - int ret = shuftiBuildMasks(chars, &lo, &hi); + int ret = shuftiBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); ASSERT_NE(-1, ret); /* 0123456789012345678901234567890 */ @@ -263,7 +263,7 @@ TEST(Shufti, ExecMatch5) { CharReach chars; chars.set('a'); - int ret = shuftiBuildMasks(chars, &lo, &hi); + int ret = shuftiBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); ASSERT_NE(-1, ret); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -283,8 +283,8 @@ TEST(DoubleShufti, BuildMask1) { lits.insert(make_pair('a', 'B')); - bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1m, &hi1m, - &lo2m, &hi2m); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, (u8 *)&lo1m, (u8 *)&hi1m, + (u8 *)&lo2m, (u8 *)&hi2m); ASSERT_TRUE(ret); u8 *lo1 = (u8 *)&lo1m; @@ -326,8 +326,8 @@ TEST(DoubleShufti, BuildMask2) { lits.insert(make_pair('a','z')); lits.insert(make_pair('B','z')); - bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1m, &hi1m, - &lo2m, &hi2m); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, (u8 *)&lo1m, (u8 *)&hi1m, + (u8 *)&lo2m, (u8 *)&hi2m); ASSERT_TRUE(ret); u8 *lo1 = (u8 *)&lo1m; @@ -354,8 +354,8 @@ TEST(DoubleShufti, BuildMask4) { lits.insert(make_pair('A','z')); lits.insert(make_pair('b','z')); - bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1m, &hi1m, - &lo2m, &hi2m); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, (u8 *)&lo1m, (u8 *)&hi1m, + (u8 *)&lo2m, (u8 *)&hi2m); ASSERT_TRUE(ret); u8 *lo1 = (u8 *)&lo1m; @@ -383,8 +383,8 @@ TEST(DoubleShufti, BuildMask5) { CharReach bytes; bytes.set('X'); - bool ret = shuftiBuildDoubleMasks(bytes, lits, &lo1m, &hi1m, - &lo2m, &hi2m); + bool ret = shuftiBuildDoubleMasks(bytes, lits, (u8 *)&lo1m, (u8 *)&hi1m, + (u8 *)&lo2m, (u8 *)&hi2m); ASSERT_TRUE(ret); u8 *lo1 = (u8 *)&lo1m; @@ -421,8 +421,8 @@ TEST(DoubleShufti, BuildMask6) { lits.insert(make_pair('A','x')); lits.insert(make_pair('b','x')); - bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1m, &hi1m, - &lo2m, &hi2m); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, (u8 *)&lo1m, (u8 *)&hi1m, + (u8 *)&lo2m, (u8 *)&hi2m); ASSERT_TRUE(ret); u8 *lo1 = (u8 *)&lo1m; @@ -473,8 +473,8 @@ TEST(DoubleShufti, BuildMask7) { lits.insert(make_pair('u','v')); lits.insert(make_pair('w','x')); - bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1m, &hi1m, - &lo2m, &hi2m); + bool rv = shuftiBuildDoubleMasks(CharReach(), lits, (u8 *)&lo1m, (u8 *)&hi1m, + (u8 *)&lo2m, (u8 *)&hi2m); ASSERT_FALSE(rv); } @@ -485,8 +485,8 @@ TEST(DoubleShufti, ExecNoMatch1) { lits.insert(make_pair('a','b')); - bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, - &lo2, &hi2); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, (u8 *)&lo1, (u8 *)&hi1, + (u8 *)&lo2, (u8 *)&hi2); ASSERT_TRUE(ret); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -506,7 +506,8 @@ TEST(DoubleShufti, ExecNoMatch1b) { lits.insert(make_pair('b','a')); - bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, (u8 *)&lo1, (u8 *)&hi1, + (u8 *)&lo2, (u8 *)&hi2); ASSERT_TRUE(ret); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -527,7 +528,8 @@ TEST(DoubleShufti, ExecNoMatch2) { lits.insert(make_pair('a','b')); lits.insert(make_pair('B','b')); - bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, (u8 *)&lo1, (u8 *)&hi1, + (u8 *)&lo2, (u8 *)&hi2); ASSERT_TRUE(ret); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -548,14 +550,15 @@ TEST(DoubleShufti, ExecNoMatch2b) { lits.insert(make_pair('b','a')); lits.insert(make_pair('b','B')); - bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, (u8 *)&lo1, (u8 *)&hi1, + (u8 *)&lo2, (u8 *)&hi2); ASSERT_TRUE(ret); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; for (size_t i = 0; i < 16; i++) { - const u8 *rv = shuftiDoubleExec(lo1, hi1, lo2, hi2, - (u8 *)t1 + i, (u8 *)t1 + strlen(t1)); + const u8 *rv = shuftiDoubleExec(lo1, hi1, lo2, hi2, (u8 *)t1 + i, + (u8 *)t1 + strlen(t1)); ASSERT_EQ((size_t)t1 + i + 15, (size_t)rv); } @@ -568,7 +571,8 @@ TEST(DoubleShufti, ExecNoMatch3) { lits.insert(make_pair('V','e')); - bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, (u8 *)&lo1, (u8 *)&hi1, + (u8 *)&lo2, (u8 *)&hi2); ASSERT_TRUE(ret); char t1[] = "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"; @@ -588,7 +592,8 @@ TEST(DoubleShufti, ExecNoMatch3b) { lits.insert(make_pair('e','V')); - bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, (u8 *)&lo1, (u8 *)&hi1, + (u8 *)&lo2, (u8 *)&hi2); ASSERT_TRUE(ret); char t1[] = "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"; @@ -608,7 +613,8 @@ TEST(DoubleShufti, ExecMatchShort1) { lits.insert(make_pair('a','b')); - bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, (u8 *)&lo1, (u8 *)&hi1, + (u8 *)&lo2, (u8 *)&hi2); ASSERT_TRUE(ret); /* 0123456789012345678901234567890 */ @@ -629,7 +635,8 @@ TEST(DoubleShufti, ExecMatch1) { lits.insert(make_pair('a','b')); - bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, (u8 *)&lo1, (u8 *)&hi1, + (u8 *)&lo2, (u8 *)&hi2); ASSERT_TRUE(ret); /* 0123456789012345678901234567890 */ @@ -650,7 +657,8 @@ TEST(DoubleShufti, ExecMatch2) { lits.insert(make_pair('a','a')); - bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, (u8 *)&lo1, (u8 *)&hi1, + (u8 *)&lo2, (u8 *)&hi2); ASSERT_TRUE(ret); /* 0123456789012345678901234567890 */ @@ -672,7 +680,8 @@ TEST(DoubleShufti, ExecMatch3) { lits.insert(make_pair('B','a')); lits.insert(make_pair('a','a')); - bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, (u8 *)&lo1, (u8 *)&hi1, + (u8 *)&lo2, (u8 *)&hi2); ASSERT_TRUE(ret); /* 0123456789012345678901234567890 */ @@ -696,7 +705,8 @@ TEST(DoubleShufti, ExecMatch4) { lits.insert(make_pair('C','a')); lits.insert(make_pair('c','a')); - bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, (u8 *)&lo1, (u8 *)&hi1, + (u8 *)&lo2, (u8 *)&hi2); ASSERT_TRUE(ret); /* 0123456789012345678901234567890 */ @@ -738,7 +748,8 @@ TEST(DoubleShufti, ExecMatch4b) { lits.insert(make_pair('a','C')); lits.insert(make_pair('a','c')); - bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, (u8 *)&lo1, (u8 *)&hi1, + (u8 *)&lo2, (u8 *)&hi2); ASSERT_TRUE(ret); /* 0123456789012345678901234567890 */ @@ -777,7 +788,8 @@ TEST(DoubleShufti, ExecMatch5) { lits.insert(make_pair('a','A')); - bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, (u8 *)&lo1, (u8 *)&hi1, + (u8 *)&lo2, (u8 *)&hi2); ASSERT_TRUE(ret); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -801,7 +813,8 @@ TEST(DoubleShufti, ExecMatchMixed1) { // just one one-byte literal onebyte.set('a'); - bool ret = shuftiBuildDoubleMasks(onebyte, twobyte, &lo1, &hi1, &lo2, &hi2); + bool ret = shuftiBuildDoubleMasks(onebyte, twobyte, (u8 *)&lo1, (u8 *)&hi1, + (u8 *)&lo2, (u8 *)&hi2); ASSERT_TRUE(ret); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -825,7 +838,8 @@ TEST(DoubleShufti, ExecMatchMixed2) { onebyte.set('a'); twobyte.insert(make_pair('x', 'y')); - bool ret = shuftiBuildDoubleMasks(onebyte, twobyte, &lo1, &hi1, &lo2, &hi2); + bool ret = shuftiBuildDoubleMasks(onebyte, twobyte, (u8 *)&lo1, (u8 *)&hi1, + (u8 *)&lo2, (u8 *)&hi2); ASSERT_TRUE(ret); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -859,7 +873,8 @@ TEST(DoubleShufti, ExecMatchMixed3) { onebyte.set('a'); twobyte.insert(make_pair('x', 'y')); - bool ret = shuftiBuildDoubleMasks(onebyte, twobyte, &lo1, &hi1, &lo2, &hi2); + bool ret = shuftiBuildDoubleMasks(onebyte, twobyte, (u8 *)&lo1, (u8 *)&hi1, + (u8 *)&lo2, (u8 *)&hi2); ASSERT_TRUE(ret); const int len = 420; @@ -892,7 +907,7 @@ TEST(ReverseShufti, ExecNoMatch1) { CharReach chars; chars.set('a'); - int ret = shuftiBuildMasks(chars, &lo, &hi); + int ret = shuftiBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); ASSERT_NE(-1, ret); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -911,7 +926,7 @@ TEST(ReverseShufti, ExecNoMatch2) { chars.set('a'); chars.set('B'); - int ret = shuftiBuildMasks(chars, &lo, &hi); + int ret = shuftiBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); ASSERT_NE(-1, ret); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -929,7 +944,7 @@ TEST(ReverseShufti, ExecNoMatch3) { CharReach chars; chars.set('V'); /* V = 0x56, e = 0x65 */ - int ret = shuftiBuildMasks(chars, &lo, &hi); + int ret = shuftiBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); ASSERT_NE(-1, ret); char t1[] = "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"; @@ -947,7 +962,7 @@ TEST(ReverseShufti, ExecMatch1) { CharReach chars; chars.set('a'); - int ret = shuftiBuildMasks(chars, &lo, &hi); + int ret = shuftiBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); ASSERT_NE(-1, ret); /* 0123456789012345678901234567890 */ @@ -968,7 +983,7 @@ TEST(ReverseShufti, ExecMatch2) { CharReach chars; chars.set('a'); - int ret = shuftiBuildMasks(chars, &lo, &hi); + int ret = shuftiBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); ASSERT_NE(-1, ret); /* 0123456789012345678901234567890 */ @@ -990,7 +1005,7 @@ TEST(ReverseShufti, ExecMatch3) { chars.set('a'); chars.set('B'); - int ret = shuftiBuildMasks(chars, &lo, &hi); + int ret = shuftiBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); ASSERT_NE(-1, ret); /* 0123456789012345678901234567890 */ @@ -1024,7 +1039,7 @@ TEST(ReverseShufti, ExecMatch4) { chars.set('A'); chars.set('c'); - int ret = shuftiBuildMasks(chars, &lo, &hi); + int ret = shuftiBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); ASSERT_NE(-1, ret); /* 0123456789012345678901234567890 */ @@ -1059,7 +1074,7 @@ TEST(ReverseShufti, ExecMatch5) { CharReach chars; chars.set('a'); - int ret = shuftiBuildMasks(chars, &lo, &hi); + int ret = shuftiBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); ASSERT_NE(-1, ret); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -1079,7 +1094,7 @@ TEST(ReverseShufti, ExecMatch6) { CharReach chars; chars.set('a'); - int ret = shuftiBuildMasks(chars, &lo, &hi); + int ret = shuftiBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); ASSERT_NE(-1, ret); const size_t len = 256; diff --git a/unit/internal/truffle.cpp b/unit/internal/truffle.cpp index 859c8a08..e9e4f19c 100644 --- a/unit/internal/truffle.cpp +++ b/unit/internal/truffle.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -45,9 +45,9 @@ TEST(Truffle, CompileDot) { chars.setall(); - truffleBuildMasks(chars, &mask1, &mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); - CharReach out = truffle2cr(mask1, mask2); + CharReach out = truffle2cr((u8 *)&mask1, (u8 *)&mask2); ASSERT_EQ(out, chars); @@ -64,8 +64,8 @@ TEST(Truffle, CompileChars) { mask2 = zeroes128(); chars.clear(); chars.set((u8)c); - truffleBuildMasks(chars, &mask1, &mask2); - CharReach out = truffle2cr(mask1, mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); + CharReach out = truffle2cr((u8 *)&mask1, (u8 *)&mask2); ASSERT_EQ(out, chars); } @@ -74,8 +74,8 @@ TEST(Truffle, CompileChars) { mask1 = zeroes128(); mask2 = zeroes128(); chars.set((u8)c); - truffleBuildMasks(chars, &mask1, &mask2); - CharReach out = truffle2cr(mask1, mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); + CharReach out = truffle2cr((u8 *)&mask1, (u8 *)&mask2); ASSERT_EQ(out, chars); } @@ -84,8 +84,8 @@ TEST(Truffle, CompileChars) { mask1 = zeroes128(); mask2 = zeroes128(); chars.clear((u8)c); - truffleBuildMasks(chars, &mask1, &mask2); - CharReach out = truffle2cr(mask1, mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); + CharReach out = truffle2cr((u8 *)&mask1, (u8 *)&mask2); ASSERT_EQ(out, chars); } @@ -100,7 +100,7 @@ TEST(Truffle, ExecNoMatch1) { chars.set('a'); - truffleBuildMasks(chars, &mask1, &mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\xff"; @@ -119,7 +119,7 @@ TEST(Truffle, ExecNoMatch2) { chars.set('a'); chars.set('B'); - truffleBuildMasks(chars, &mask1, &mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -137,7 +137,7 @@ TEST(Truffle, ExecNoMatch3) { chars.set('V'); /* V = 0x56, e = 0x65 */ - truffleBuildMasks(chars, &mask1, &mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); char t1[] = "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"; @@ -154,7 +154,7 @@ TEST(Truffle, ExecMiniMatch0) { CharReach chars; chars.set('a'); - truffleBuildMasks(chars, &lo, &hi); + truffleBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); char t1[] = "a"; @@ -169,7 +169,7 @@ TEST(Truffle, ExecMiniMatch1) { CharReach chars; chars.set('a'); - truffleBuildMasks(chars, &lo, &hi); + truffleBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); char t1[] = "bbbbbbbabbb"; @@ -184,7 +184,7 @@ TEST(Truffle, ExecMiniMatch2) { CharReach chars; chars.set(0); - truffleBuildMasks(chars, &lo, &hi); + truffleBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); char t1[] = "bbbbbbb\0bbb"; @@ -199,7 +199,7 @@ TEST(Truffle, ExecMiniMatch3) { CharReach chars; chars.set('a'); - truffleBuildMasks(chars, &lo, &hi); + truffleBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); char t1[] = "\0\0\0\0\0\0\0a\0\0\0"; @@ -214,7 +214,7 @@ TEST(Truffle, ExecMatchBig) { CharReach chars; chars.set('a'); - truffleBuildMasks(chars, &lo, &hi); + truffleBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); std::array t1; t1.fill('b'); @@ -234,7 +234,7 @@ TEST(Truffle, ExecMatch1) { chars.set('a'); - truffleBuildMasks(chars, &mask1, &mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); /* 0123456789012345678901234567890 */ char t1[] = "bbbbbbbbbbbbbbbbbabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbabbbbbbbbbbbb"; @@ -253,7 +253,7 @@ TEST(Truffle, ExecMatch2) { chars.set('a'); - truffleBuildMasks(chars, &mask1, &mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); /* 0123456789012345678901234567890 */ char t1[] = "bbbbbbbbbbbbbbbbbaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbabbbbbbbbbbbb"; @@ -273,7 +273,7 @@ TEST(Truffle, ExecMatch3) { chars.set('a'); chars.set('B'); - truffleBuildMasks(chars, &mask1, &mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); /* 0123456789012345678901234567890 */ char t1[] = "bbbbbbbbbbbbbbbbbBaaaaaaaaaaaaaaabbbbbbbbbbbbbbbabbbbbbbbbbbb"; @@ -295,7 +295,7 @@ TEST(Truffle, ExecMatch4) { chars.set('A'); chars.set('c'); - truffleBuildMasks(chars, &mask1, &mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); /* 0123456789012345678901234567890 */ char t1[] = "bbbbbbbbbbbbbbbbbAaaaaaaaaaaaaaaabbbbbbbbbbbbbbbabbbbbbbbbbbb"; @@ -329,7 +329,7 @@ TEST(Truffle, ExecMatch5) { chars.set('a'); - truffleBuildMasks(chars, &mask1, &mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -349,7 +349,7 @@ TEST(Truffle, ExecMatch6) { // [0-Z] - includes some graph chars chars.setRange('0', 'Z'); - truffleBuildMasks(chars, &mask1, &mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); std::array t1; t1.fill('*'); // it's full of stars! @@ -370,7 +370,7 @@ TEST(Truffle, ExecMatch7) { // hi bits chars.setRange(127, 255); - truffleBuildMasks(chars, &mask1, &mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); std::array t1; t1.fill('*'); // it's full of stars! @@ -389,7 +389,7 @@ TEST(ReverseTruffle, ExecNoMatch1) { CharReach chars; chars.set('a'); - truffleBuildMasks(chars, &mask1, &mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; size_t len = strlen(t1); @@ -408,7 +408,7 @@ TEST(ReverseTruffle, ExecNoMatch2) { chars.set('a'); chars.set('B'); - truffleBuildMasks(chars, &mask1, &mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; size_t len = strlen(t1); @@ -425,7 +425,7 @@ TEST(ReverseTruffle, ExecNoMatch3) { CharReach chars; chars.set('V'); /* V = 0x56, e = 0x65 */ - truffleBuildMasks(chars, &mask1, &mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); char t1[] = "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"; size_t len = strlen(t1); @@ -442,7 +442,7 @@ TEST(ReverseTruffle, ExecMiniMatch0) { CharReach chars; chars.set('a'); - truffleBuildMasks(chars, &lo, &hi); + truffleBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); char t1[] = "a"; @@ -457,7 +457,7 @@ TEST(ReverseTruffle, ExecMiniMatch1) { CharReach chars; chars.set('a'); - truffleBuildMasks(chars, &mask1, &mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); /* 0123456789012345678901234567890 */ char t1[] = "bbbbbbbabbbb"; @@ -475,7 +475,7 @@ TEST(ReverseTruffle, ExecMiniMatch2) { CharReach chars; chars.set('a'); - truffleBuildMasks(chars, &mask1, &mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); /* 0123456789012345678901234567890 */ char t1[] = "babbbbbabbbb"; @@ -494,7 +494,7 @@ TEST(ReverseTruffle, ExecMatch1) { CharReach chars; chars.set('a'); - truffleBuildMasks(chars, &mask1, &mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); /* 0123456789012345678901234567890 */ char t1[] = "bbbbbbabbbbbbbbbbabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -514,7 +514,7 @@ TEST(ReverseTruffle, ExecMatch2) { CharReach chars; chars.set('a'); - truffleBuildMasks(chars, &mask1, &mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); /* 0123456789012345678901234567890 */ char t1[] = "bbbbabbbbbbbbbbbbaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -535,7 +535,7 @@ TEST(ReverseTruffle, ExecMatch3) { chars.set('a'); chars.set('B'); - truffleBuildMasks(chars, &mask1, &mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); /* 0123456789012345678901234567890 */ char t1[] = "bbbbbbbbbbbbbbbbbaaaaaaaaaaaaaaaBbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -568,7 +568,7 @@ TEST(ReverseTruffle, ExecMatch4) { chars.set('A'); chars.set('c'); - truffleBuildMasks(chars, &mask1, &mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); /* 0123456789012345678901234567890 */ char t1[] = "bbbbbbbbbbbbbbbbbaaaaaaaaaaaaaaaAbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -602,7 +602,7 @@ TEST(ReverseTruffle, ExecMatch5) { CharReach chars; chars.set('a'); - truffleBuildMasks(chars, &mask1, &mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; size_t len = strlen(t1); From c3a73446ee0c1f534e84ae31b085ab48f1d01686 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Wed, 2 Nov 2016 11:01:28 +1100 Subject: [PATCH 078/103] Fat runtime --- CMakeLists.txt | 165 ++++++++++++++++++++++++++++++------ cmake/arch.cmake | 11 +-- cmake/attrib.cmake | 3 + cmake/build_wrapper.sh | 27 ++++++ cmake/config.h.in | 3 + cmake/keep.syms.in | 11 +++ src/compiler/compiler.cpp | 41 +++++++++ src/database.c | 39 +-------- src/database.h | 3 +- src/dispatcher.c | 122 ++++++++++++++++++++++++++ src/hs.cpp | 20 +++++ src/hs_common.h | 11 +++ src/nfa/mcsheng_compile.cpp | 3 +- src/util/cpuid_flags.c | 23 ++++- src/util/cpuid_flags.h | 5 ++ src/util/simd_types.h | 1 + unit/CMakeLists.txt | 6 +- 17 files changed, 411 insertions(+), 83 deletions(-) create mode 100644 cmake/attrib.cmake create mode 100755 cmake/build_wrapper.sh create mode 100644 cmake/keep.syms.in create mode 100644 src/dispatcher.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 0559932d..9f953c6e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -213,7 +213,6 @@ else() set(ARCH_C_FLAGS "${ARCH_C_FLAGS} -march=native -mtune=native") endif() - # we don't use these for the lib, but other tools/unit tests if (NOT CMAKE_CXX_FLAGS MATCHES .*march.*) set(ARCH_CXX_FLAGS "${ARCH_CXX_FLAGS} -march=native -mtune=native") endif() @@ -257,9 +256,24 @@ if (RELEASE_BUILD) endif() endif() -# ensure we are building for the right target arch +if (CMAKE_SYSTEM_NAME MATCHES "Linux") + # This is a Linux-only feature for now - requires platform support + # elsewhere + option(FAT_RUNTIME "Build a library that supports multiple microarchitecures" RELEASE_BUILD) + if (FAT_RUNTIME) + include (${CMAKE_MODULE_PATH}/attrib.cmake) + if (NOT HAS_C_ATTR_IFUNC) + message(FATAL_ERROR "Compiler does not support ifunc attribute, cannot build fat runtime") + endif() + endif() +endif () + include (${CMAKE_MODULE_PATH}/arch.cmake) +if (NOT FAT_RUNTIME AND NOT HAVE_SSSE3) + message(FATAL_ERROR "A minimum of SSSE3 compiler support is required") +endif () + # testing a builtin takes a little more work CHECK_C_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CC_BUILTIN_ASSUME_ALIGNED) CHECK_CXX_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CXX_BUILTIN_ASSUME_ALIGNED) @@ -365,6 +379,14 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "Intel") endif() endif() +if (NOT FAT_RUNTIME) +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARCH_C_FLAGS}") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_CXX_FLAGS}") +else() +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") +endif() + add_subdirectory(util) add_subdirectory(unit) add_subdirectory(doc/dev-reference) @@ -391,8 +413,13 @@ if (NOT WIN32) endif() # only set these after all tests are done -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARCH_C_FLAGS} ${EXTRA_C_FLAGS}") +if (NOT FAT_RUNTIME) +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}") +else() +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}") +endif() if(NOT WIN32) @@ -414,12 +441,19 @@ SET(hs_HEADERS ) install(FILES ${hs_HEADERS} DESTINATION include/hs) +set (hs_exec_common_SRCS + src/alloc.c + src/scratch.c + src/util/multibit.c + ) + set (hs_exec_SRCS ${hs_HEADERS} src/hs_version.h src/ue2common.h - src/alloc.c src/allocator.h + src/crc32.c + src/crc32.h src/report.h src/runtime.c src/fdr/fdr.c @@ -558,8 +592,8 @@ set (hs_exec_SRCS src/util/join.h src/util/masked_move.h src/util/multibit.h - src/util/multibit_internal.h src/util/multibit.c + src/util/multibit_internal.h src/util/pack_bits.h src/util/popcount.h src/util/pqueue.h @@ -571,21 +605,14 @@ set (hs_exec_SRCS src/util/state_compress.c src/util/unaligned.h src/util/uniform_ops.h - src/scratch.h - src/scratch.c - src/crc32.c - src/crc32.h src/database.c src/database.h ) -if (HAVE_AVX2) - set (hs_exec_SRCS - ${hs_exec_SRCS} - src/fdr/teddy_avx2.c - src/util/masked_move.c - ) -endif () +set (hs_exec_avx2_SRCS + src/fdr/teddy_avx2.c + src/util/masked_move.c +) SET (hs_SRCS @@ -1013,27 +1040,101 @@ endif() set (LIB_VERSION ${HS_VERSION}) set (LIB_SOVERSION ${HS_MAJOR_VERSION}) -add_library(hs_exec OBJECT ${hs_exec_SRCS}) +if (NOT FAT_RUNTIME) + + if (HAVE_AVX2) + add_library(hs_exec OBJECT ${hs_exec_common_SRCS} ${hs_exec_SRCS} + ${hs_exec_avx2_SRCS}) + else() + add_library(hs_exec OBJECT ${hs_exec_common_SRCS} ${hs_exec_SRCS}) + endif() + + add_library(hs_runtime STATIC src/hs_version.c $) + set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C) + + if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) + add_library(hs_exec_shared OBJECT ${hs_exec_SRCS}) + set_target_properties(hs_exec_shared PROPERTIES POSITION_INDEPENDENT_CODE TRUE) + endif() + +else (FAT_RUNTIME) + set(BUILD_WRAPPER "${PROJECT_SOURCE_DIR}/cmake/build_wrapper.sh") + add_library(hs_exec_core2 OBJECT ${hs_exec_SRCS}) + set_target_properties(hs_exec_core2 PROPERTIES + COMPILE_FLAGS "-march=core2" + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} core2 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + + add_library(hs_exec_corei7 OBJECT ${hs_exec_SRCS}) + set_target_properties(hs_exec_corei7 PROPERTIES + COMPILE_FLAGS "-march=corei7" + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} corei7 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + + add_library(hs_exec_avx2 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) + set_target_properties(hs_exec_avx2 PROPERTIES + COMPILE_FLAGS "-march=core-avx2" + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx2 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + + add_library(hs_exec_common OBJECT + ${hs_exec_common_SRCS} + src/dispatcher.c + ) + set_source_files_properties(src/dispatcher.c PROPERTIES + COMPILE_FLAGS "-Wno-unused-parameter -Wno-unused-function") + set_source_files_properties(${hs_exec_common_SRCS} PROPERTIES + COMPILE_FLAGS "-march=core-avx2") + + if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) + add_library(hs_exec_shared_core2 OBJECT ${hs_exec_SRCS}) + set_target_properties(hs_exec_shared_core2 PROPERTIES + COMPILE_FLAGS "-march=core2" + POSITION_INDEPENDENT_CODE TRUE + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} core2 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + add_library(hs_exec_shared_corei7 OBJECT ${hs_exec_SRCS}) + set_target_properties(hs_exec_shared_corei7 PROPERTIES + COMPILE_FLAGS "-march=corei7" + POSITION_INDEPENDENT_CODE TRUE + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} corei7 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + add_library(hs_exec_shared_avx2 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) + set_target_properties(hs_exec_shared_avx2 PROPERTIES + COMPILE_FLAGS "-march=core-avx2" + POSITION_INDEPENDENT_CODE TRUE + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx2 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + add_library(hs_exec_common_shared OBJECT + ${hs_exec_common_SRCS} + src/dispatcher.c + ) + set_target_properties(hs_exec_common_shared PROPERTIES + OUTPUT_NAME hs_exec_common + POSITION_INDEPENDENT_CODE TRUE) + endif() # SHARED -if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) -add_library(hs_exec_shared OBJECT ${hs_exec_SRCS}) -set_target_properties(hs_exec_shared PROPERTIES - POSITION_INDEPENDENT_CODE TRUE) -endif() # hs_version.c is added explicitly to avoid some build systems that refuse to # create a lib without any src (I'm looking at you Xcode) -add_library(hs_runtime STATIC src/hs_version.c $) + add_library(hs_runtime STATIC src/hs_version.c + $ $ + $ $) +endif (NOT FAT_RUNTIME) -set_target_properties(hs_runtime PROPERTIES - LINKER_LANGUAGE C) + +set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C) if (NOT BUILD_SHARED_LIBS) install(TARGETS hs_runtime DESTINATION lib) endif() if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) - add_library(hs_runtime_shared SHARED src/hs_version.c $) + if (NOT FAT_RUNTIME) + add_library(hs_runtime_shared SHARED src/hs_version.c $) + else() + add_library(hs_runtime_shared SHARED src/hs_version.c $ $ $ $) + endif() set_target_properties(hs_runtime_shared PROPERTIES VERSION ${LIB_VERSION} SOVERSION ${LIB_SOVERSION} @@ -1046,8 +1147,12 @@ if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) LIBRARY DESTINATION lib) endif() +if (NOT FAT_RUNTIME) + add_library(hs STATIC ${hs_SRCS} $) +else() # we want the static lib for testing -add_library(hs STATIC ${hs_SRCS} $) +add_library(hs STATIC src/hs_version.c ${hs_SRCS} $ $ $ $) +endif() add_dependencies(hs ragel_Parser) @@ -1056,7 +1161,11 @@ install(TARGETS hs DESTINATION lib) endif() if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) - add_library(hs_shared SHARED ${hs_SRCS} $) + if (NOT FAT_RUNTIME) + add_library(hs_shared SHARED src/hs_version.c ${hs_SRCS} $) + else() + add_library(hs_shared SHARED src/hs_version.c ${hs_SRCS} $ $ $ $) + endif() add_dependencies(hs_shared ragel_Parser) set_target_properties(hs_shared PROPERTIES OUTPUT_NAME hs diff --git a/cmake/arch.cmake b/cmake/arch.cmake index c00401dd..e98fbf22 100644 --- a/cmake/arch.cmake +++ b/cmake/arch.cmake @@ -11,7 +11,8 @@ else () endif () -set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}") +set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} ${ARCH_C_FLAGS}") + # ensure we have the minimum of SSSE3 - call a SSSE3 intrinsic CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}> int main() { @@ -19,10 +20,6 @@ int main() { (void)_mm_shuffle_epi8(a, a); }" HAVE_SSSE3) -if (NOT HAVE_SSSE3) - message(FATAL_ERROR "A minimum of SSSE3 compiler support is required") -endif () - # now look for AVX2 CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}> #if !defined(__AVX2__) @@ -34,9 +31,5 @@ int main(){ (void)_mm256_xor_si256(z, z); }" HAVE_AVX2) -if (NOT HAVE_AVX2) - message(STATUS "Building without AVX2 support") -endif () - unset (CMAKE_REQUIRED_FLAGS) unset (INTRIN_INC_H) diff --git a/cmake/attrib.cmake b/cmake/attrib.cmake new file mode 100644 index 00000000..6ce3f2a7 --- /dev/null +++ b/cmake/attrib.cmake @@ -0,0 +1,3 @@ +# tests for compiler properties + +CHECK_C_SOURCE_COMPILES("int foo(int) __attribute__ ((ifunc(\"foo_i\"))); int f1(int i) { return i; } void (*foo_i()) { return f1; } int main(void) { return 0; }" HAS_C_ATTR_IFUNC) diff --git a/cmake/build_wrapper.sh b/cmake/build_wrapper.sh new file mode 100755 index 00000000..5baf209b --- /dev/null +++ b/cmake/build_wrapper.sh @@ -0,0 +1,27 @@ +#!/bin/sh -e +# This is used for renaming symbols for the fat runtime, don't call directly +# TODO: make this a lot less fragile! +PREFIX=$1 +KEEPSYMS_IN=$2 +shift 2 +BUILD=$@ +OUT=$(echo $BUILD | sed 's/.* -o \(.*\.o\).*/\1/') +SYMSFILE=/tmp/${PREFIX}_rename.syms.$$ +KEEPSYMS=/tmp/keep.syms.$$ +# grab the command without the target obj or src file flags +# we don't just call gcc directly as there may be flags modifying the arch +CC_CMD=$(echo $BUILD | sed 's/ -o .*\.o//;s/ -c //;s/ .[^ ]*\.c//;') +# find me a libc +LIBC_SO=$(${CC_CMD} --print-file-name=libc.so.6) +cp ${KEEPSYMS_IN} ${KEEPSYMS} +# get all symbols from libc and turn them into patterns +nm -f p -g -D ${LIBC_SO} | sed -s 's/\([^ ]*\).*/^\1$/' >> ${KEEPSYMS} +# build the object +${BUILD} +# rename the symbols in the object +nm -f p -g ${OUT} | cut -f1 -d' ' | grep -v -f ${KEEPSYMS} | sed -e "s/\(.*\)/\1\ ${PREFIX}_\1/" >> ${SYMSFILE} +if test -s ${SYMSFILE} +then + objcopy --redefine-syms=${SYMSFILE} ${OUT} +fi +rm -f ${SYMSFILE} ${KEEPSYMS} diff --git a/cmake/config.h.in b/cmake/config.h.in index 75c27b3e..198d96c5 100644 --- a/cmake/config.h.in +++ b/cmake/config.h.in @@ -15,6 +15,9 @@ /* internal build, switch on dump support. */ #cmakedefine DUMP_SUPPORT +/* Define if building "fat" runtime. */ +#cmakedefine FAT_RUNTIME + /* Define to 1 if `backtrace' works. */ #cmakedefine HAVE_BACKTRACE diff --git a/cmake/keep.syms.in b/cmake/keep.syms.in new file mode 100644 index 00000000..ab6f82a5 --- /dev/null +++ b/cmake/keep.syms.in @@ -0,0 +1,11 @@ +# names to exclude +hs_misc_alloc +hs_misc_free +hs_free_scratch +hs_stream_alloc +hs_stream_free +hs_scratch_alloc +hs_scratch_free +hs_database_alloc +hs_database_free +^_ diff --git a/src/compiler/compiler.cpp b/src/compiler/compiler.cpp index d56aff88..4a4afc64 100644 --- a/src/compiler/compiler.cpp +++ b/src/compiler/compiler.cpp @@ -29,8 +29,10 @@ /** \file * \brief Compiler front-end interface. */ +#include "allocator.h" #include "asserts.h" #include "compiler.h" +#include "crc32.h" #include "database.h" #include "grey.h" #include "hs_internal.h" @@ -321,6 +323,45 @@ platform_t target_to_platform(const target_t &target_info) { return p; } +/** \brief Encapsulate the given bytecode (RoseEngine) in a newly-allocated + * \ref hs_database, ensuring that it is padded correctly to give cacheline + * alignment. */ +static +hs_database_t *dbCreate(const char *in_bytecode, size_t len, u64a platform) { + size_t db_len = sizeof(struct hs_database) + len; + DEBUG_PRINTF("db size %zu\n", db_len); + DEBUG_PRINTF("db platform %llx\n", platform); + + struct hs_database *db = (struct hs_database *)hs_database_alloc(db_len); + if (hs_check_alloc(db) != HS_SUCCESS) { + hs_database_free(db); + return nullptr; + } + + // So that none of our database is uninitialized + memset(db, 0, db_len); + + // we need to align things manually + size_t shift = (uintptr_t)db->bytes & 0x3f; + DEBUG_PRINTF("shift is %zu\n", shift); + + db->bytecode = offsetof(struct hs_database, bytes) - shift; + char *bytecode = (char *)db + db->bytecode; + assert(ISALIGNED_CL(bytecode)); + + db->magic = HS_DB_MAGIC; + db->version = HS_DB_VERSION; + db->length = len; + db->platform = platform; + + // Copy bytecode + memcpy(bytecode, in_bytecode, len); + + db->crc32 = Crc32c_ComputeBuf(0, bytecode, db->length); + return db; +} + + struct hs_database *build(NG &ng, unsigned int *length) { assert(length); diff --git a/src/database.c b/src/database.c index a4e10c22..61eb021f 100644 --- a/src/database.c +++ b/src/database.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -348,43 +348,6 @@ hs_error_t dbIsValid(const hs_database_t *db) { return HS_SUCCESS; } -/** \brief Encapsulate the given bytecode (RoseEngine) in a newly-allocated - * \ref hs_database, ensuring that it is padded correctly to give cacheline - * alignment. */ -hs_database_t *dbCreate(const char *in_bytecode, size_t len, u64a platform) { - size_t db_len = sizeof(struct hs_database) + len; - DEBUG_PRINTF("db size %zu\n", db_len); - DEBUG_PRINTF("db platform %llx\n", platform); - - struct hs_database *db = (struct hs_database *)hs_database_alloc(db_len); - if (hs_check_alloc(db) != HS_SUCCESS) { - hs_database_free(db); - return NULL; - } - - // So that none of our database is uninitialized - memset(db, 0, db_len); - - // we need to align things manually - size_t shift = (uintptr_t)db->bytes & 0x3f; - DEBUG_PRINTF("shift is %zu\n", shift); - - db->bytecode = offsetof(struct hs_database, bytes) - shift; - char *bytecode = (char *)db + db->bytecode; - assert(ISALIGNED_CL(bytecode)); - - db->magic = HS_DB_MAGIC; - db->version = HS_DB_VERSION; - db->length = len; - db->platform = platform; - - // Copy bytecode - memcpy(bytecode, in_bytecode, len); - - db->crc32 = Crc32c_ComputeBuf(0, bytecode, db->length); - return db; -} - #if defined(_WIN32) #define SNPRINTF_COMPAT _snprintf #else diff --git a/src/database.h b/src/database.h index 5488c93d..399513fc 100644 --- a/src/database.h +++ b/src/database.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -110,7 +110,6 @@ hs_error_t validDatabase(const hs_database_t *db) { } hs_error_t dbIsValid(const struct hs_database *db); -struct hs_database *dbCreate(const char *bytecode, size_t len, u64a platform); #ifdef __cplusplus } /* extern "C" */ diff --git a/src/dispatcher.c b/src/dispatcher.c new file mode 100644 index 00000000..810a5299 --- /dev/null +++ b/src/dispatcher.c @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "hs_common.h" +#include "hs_runtime.h" +#include "ue2common.h" +#include "util/cpuid_flags.h" +#include "util/join.h" + +#define CREATE_DISPATCH(RTYPE, NAME, ...) \ + /* create defns */ \ + RTYPE JOIN(avx2_, NAME)(__VA_ARGS__); \ + RTYPE JOIN(corei7_, NAME)(__VA_ARGS__); \ + RTYPE JOIN(core2_, NAME)(__VA_ARGS__); \ + \ + /* error func */ \ + static inline RTYPE JOIN(error_, NAME)(__VA_ARGS__) { \ + return (RTYPE)HS_ARCH_ERROR; \ + } \ + \ + /* resolver */ \ + static void(*JOIN(resolve_, NAME)(void)) { \ + if (check_avx2()) { \ + return JOIN(avx2_, NAME); \ + } \ + if (check_sse42() && check_popcnt()) { \ + return JOIN(corei7_, NAME); \ + } \ + if (check_ssse3()) { \ + return JOIN(core2_, NAME); \ + } \ + /* anything else is fail */ \ + return JOIN(error_, NAME); \ + } \ + \ + /* function */ \ + RTYPE NAME(__VA_ARGS__) __attribute__((ifunc("resolve_" #NAME))) + +CREATE_DISPATCH(hs_error_t, hs_scan, const hs_database_t *db, const char *data, + unsigned length, unsigned flags, hs_scratch_t *scratch, + match_event_handler onEvent, void *userCtx); + +CREATE_DISPATCH(hs_error_t, hs_stream_size, const hs_database_t *database, + size_t *stream_size); + +CREATE_DISPATCH(hs_error_t, hs_database_size, const hs_database_t *db, + size_t *size); +CREATE_DISPATCH(hs_error_t, dbIsValid, const hs_database_t *db); +CREATE_DISPATCH(hs_error_t, hs_free_database, hs_database_t *db); + +CREATE_DISPATCH(hs_error_t, hs_open_stream, const hs_database_t *db, + unsigned int flags, hs_stream_t **stream); + +CREATE_DISPATCH(hs_error_t, hs_scan_stream, hs_stream_t *id, const char *data, + unsigned int length, unsigned int flags, hs_scratch_t *scratch, + match_event_handler onEvent, void *ctxt); + +CREATE_DISPATCH(hs_error_t, hs_close_stream, hs_stream_t *id, + hs_scratch_t *scratch, match_event_handler onEvent, void *ctxt); + +CREATE_DISPATCH(hs_error_t, hs_scan_vector, const hs_database_t *db, + const char *const *data, const unsigned int *length, + unsigned int count, unsigned int flags, hs_scratch_t *scratch, + match_event_handler onevent, void *context); + +CREATE_DISPATCH(hs_error_t, hs_database_info, const hs_database_t *db, char **info); + +CREATE_DISPATCH(hs_error_t, hs_copy_stream, hs_stream_t **to_id, + const hs_stream_t *from_id); + +CREATE_DISPATCH(hs_error_t, hs_reset_stream, hs_stream_t *id, + unsigned int flags, hs_scratch_t *scratch, + match_event_handler onEvent, void *context); + +CREATE_DISPATCH(hs_error_t, hs_reset_and_copy_stream, hs_stream_t *to_id, + const hs_stream_t *from_id, hs_scratch_t *scratch, + match_event_handler onEvent, void *context); + +CREATE_DISPATCH(hs_error_t, hs_serialize_database, const hs_database_t *db, + char **bytes, size_t *length); + +CREATE_DISPATCH(hs_error_t, hs_deserialize_database, const char *bytes, + const size_t length, hs_database_t **db); + +CREATE_DISPATCH(hs_error_t, hs_deserialize_database_at, const char *bytes, + const size_t length, hs_database_t *db); + +CREATE_DISPATCH(hs_error_t, hs_serialized_database_info, const char *bytes, + size_t length, char **info); + +CREATE_DISPATCH(hs_error_t, hs_serialized_database_size, const char *bytes, + const size_t length, size_t *deserialized_size); + +/** INTERNALS **/ + +CREATE_DISPATCH(u32, Crc32c_ComputeBuf, u32 inCrc32, const void *buf, size_t bufLen); diff --git a/src/hs.cpp b/src/hs.cpp index 07f6d2c1..f64e867a 100644 --- a/src/hs.cpp +++ b/src/hs.cpp @@ -192,6 +192,14 @@ hs_compile_multi_int(const char *const *expressions, const unsigned *flags, return HS_COMPILER_ERROR; } +#if defined(FAT_RUNTIME) + if (!check_ssse3()) { + *db = nullptr; + *comp_error = generateCompileError("Unsupported architecture", -1); + return HS_ARCH_ERROR; + } +#endif + if (!checkMode(mode, comp_error)) { *db = nullptr; assert(*comp_error); // set by checkMode. @@ -319,6 +327,13 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags, return HS_COMPILER_ERROR; } +#if defined(FAT_RUNTIME) + if (!check_ssse3()) { + *error = generateCompileError("Unsupported architecture", -1); + return HS_ARCH_ERROR; + } +#endif + if (!info) { *error = generateCompileError("Invalid parameter: info is NULL", -1); return HS_COMPILER_ERROR; @@ -426,6 +441,11 @@ hs_error_t hs_populate_platform(hs_platform_info_t *platform) { extern "C" HS_PUBLIC_API hs_error_t hs_free_compile_error(hs_compile_error_t *error) { +#if defined(FAT_RUNTIME) + if (!check_ssse3()) { + return HS_ARCH_ERROR; + } +#endif freeCompileError(error); return HS_SUCCESS; } diff --git a/src/hs_common.h b/src/hs_common.h index 4bf31146..ad8d9880 100644 --- a/src/hs_common.h +++ b/src/hs_common.h @@ -519,6 +519,17 @@ const char *hs_version(void); */ #define HS_SCRATCH_IN_USE (-10) +/** + * Unsupported CPU architecture. + * + * This error is returned when Hyperscan is able to detect that the current + * system does not support the required instruction set. + * + * At a minimum, Hyperscan requires Supplemental Streaming SIMD Extensions 3 + * (SSSE3). + */ +#define HS_ARCH_ERROR (-11) + /** @} */ #ifdef __cplusplus diff --git a/src/nfa/mcsheng_compile.cpp b/src/nfa/mcsheng_compile.cpp index b7570af4..a7713bb0 100644 --- a/src/nfa/mcsheng_compile.cpp +++ b/src/nfa/mcsheng_compile.cpp @@ -193,7 +193,8 @@ void createShuffleMasks(mcsheng *m, const dfa_info &info, } for (u32 i = 0; i < N_CHARS; i++) { assert(info.alpha_remap[i] != info.alpha_remap[TOP]); - memcpy((u8*)&m->sheng_masks[i], (u8*)masks[info.alpha_remap[i]].data(), sizeof(m128)); + memcpy((u8 *)&m->sheng_masks[i], + (u8 *)masks[info.alpha_remap[i]].data(), sizeof(m128)); } m->sheng_end = sheng_end; m->sheng_accel_limit = sheng_end - 1; diff --git a/src/util/cpuid_flags.c b/src/util/cpuid_flags.c index 9a8bd922..dba147ee 100644 --- a/src/util/cpuid_flags.c +++ b/src/util/cpuid_flags.c @@ -40,12 +40,14 @@ #define SSSE3 (1 << 9) #define SSE4_1 (1 << 19) #define SSE4_2 (1 << 20) +#define POPCNT (1 << 23) #define XSAVE (1 << 27) #define AVX (1 << 28) // EDX +#define FXSAVE (1 << 24) #define SSE (1 << 25) -#define SSE2 (1 << 25) +#define SSE2 (1 << 26) #define HTT (1 << 28) // Structured Extended Feature Flags Enumeration Leaf ECX values @@ -87,7 +89,6 @@ u64a xgetbv(u32 op) { #endif } -static int check_avx2(void) { #if defined(__INTEL_COMPILER) return _may_i_use_cpu_feature(_FEATURE_AVX2); @@ -137,6 +138,24 @@ u64a cpuid_flags(void) { return cap; } +int check_ssse3(void) { + unsigned int eax, ebx, ecx, edx; + cpuid(1, 0, &eax, &ebx, &ecx, &edx); + return !!(ecx & SSSE3); +} + +int check_sse42(void) { + unsigned int eax, ebx, ecx, edx; + cpuid(1, 0, &eax, &ebx, &ecx, &edx); + return !!(ecx & SSE4_2); +} + +int check_popcnt(void) { + unsigned int eax, ebx, ecx, edx; + cpuid(1, 0, &eax, &ebx, &ecx, &edx); + return !!(ecx & POPCNT); +} + struct family_id { u32 full_family; u32 full_model; diff --git a/src/util/cpuid_flags.h b/src/util/cpuid_flags.h index 2df97ab5..8b23d495 100644 --- a/src/util/cpuid_flags.h +++ b/src/util/cpuid_flags.h @@ -41,6 +41,11 @@ u64a cpuid_flags(void); u32 cpuid_tune(void); +int check_avx2(void); +int check_ssse3(void); +int check_sse42(void); +int check_popcnt(void); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/src/util/simd_types.h b/src/util/simd_types.h index 35f27e66..74e2abec 100644 --- a/src/util/simd_types.h +++ b/src/util/simd_types.h @@ -61,6 +61,7 @@ #error no intrinsics! #endif +#if defined(__SSE2__) typedef __m128i m128; #else typedef struct ALIGN_DIRECTIVE {u64a hi; u64a lo;} m128; diff --git a/unit/CMakeLists.txt b/unit/CMakeLists.txt index 77f3ac3b..8b494444 100644 --- a/unit/CMakeLists.txt +++ b/unit/CMakeLists.txt @@ -1,5 +1,5 @@ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_CXX_FLAGS} ${EXTRA_CXX_FLAGS}") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}") set(gtest_SOURCES gtest/gtest-all.cc gtest/gtest.h) if(NOT XCODE) @@ -34,7 +34,7 @@ add_library(gtest STATIC ${gtest_SOURCES}) add_definitions(-DGTEST_HAS_PTHREAD=0 -DSRCDIR=${PROJECT_SOURCE_DIR}) -if (NOT RELEASE_BUILD) +if (NOT (RELEASE_BUILD OR FAT_RUNTIME)) set(unit_internal_SOURCES internal/bitfield.cpp internal/bitutils.cpp @@ -89,7 +89,7 @@ set(unit_internal_SOURCES add_executable(unit-internal ${unit_internal_SOURCES}) target_link_libraries(unit-internal hs gtest corpusomatic) -endif(NOT RELEASE_BUILD) +endif(NOT (RELEASE_BUILD OR FAT_RUNTIME)) set(unit_hyperscan_SOURCES hyperscan/allocators.cpp From 0d0e1a5106b080c1ba5675dc5f3040dd09a4b2c9 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Tue, 29 Nov 2016 16:08:14 +1100 Subject: [PATCH 079/103] api: hs_valid_platform --- CMakeLists.txt | 31 +++++++++++++++++++-------- src/hs_common.h | 17 +++++++++++++++ src/hs_valid_platform.c | 40 +++++++++++++++++++++++++++++++++++ unit/hyperscan/arg_checks.cpp | 6 ++++++ 4 files changed, 85 insertions(+), 9 deletions(-) create mode 100644 src/hs_valid_platform.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 9f953c6e..e9e8f3fa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1049,7 +1049,7 @@ if (NOT FAT_RUNTIME) add_library(hs_exec OBJECT ${hs_exec_common_SRCS} ${hs_exec_SRCS}) endif() - add_library(hs_runtime STATIC src/hs_version.c $) + add_library(hs_runtime STATIC src/hs_version.c src/hs_valid_platform.c $) set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C) if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) @@ -1131,9 +1131,15 @@ endif() if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) if (NOT FAT_RUNTIME) - add_library(hs_runtime_shared SHARED src/hs_version.c $) - else() - add_library(hs_runtime_shared SHARED src/hs_version.c $ $ $ $) + add_library(hs_runtime_shared SHARED src/hs_version.c src/hs_valid_platform.c +$) + else() + add_library(hs_runtime_shared SHARED src/hs_version.c + src/hs_valid_platform.c + $ + $ + $ + $) endif() set_target_properties(hs_runtime_shared PROPERTIES VERSION ${LIB_VERSION} @@ -1148,10 +1154,12 @@ if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) endif() if (NOT FAT_RUNTIME) - add_library(hs STATIC ${hs_SRCS} $) + add_library(hs STATIC ${hs_SRCS} src/hs_valid_platform.c $) else() -# we want the static lib for testing -add_library(hs STATIC src/hs_version.c ${hs_SRCS} $ $ $ $) + # we want the static lib for testing + add_library(hs STATIC src/hs_version.c src/hs_valid_platform.c + ${hs_SRCS} $ $ + $ $) endif() add_dependencies(hs ragel_Parser) @@ -1162,9 +1170,14 @@ endif() if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) if (NOT FAT_RUNTIME) - add_library(hs_shared SHARED src/hs_version.c ${hs_SRCS} $) + add_library(hs_shared SHARED src/hs_version.c src/hs_valid_platform.c + ${hs_SRCS} $) else() - add_library(hs_shared SHARED src/hs_version.c ${hs_SRCS} $ $ $ $) + add_library(hs_shared SHARED src/hs_version.c src/hs_valid_platform.c + ${hs_SRCS} $ + $ + $ + $) endif() add_dependencies(hs_shared ragel_Parser) set_target_properties(hs_shared PROPERTIES diff --git a/src/hs_common.h b/src/hs_common.h index ad8d9880..b25b1842 100644 --- a/src/hs_common.h +++ b/src/hs_common.h @@ -435,6 +435,23 @@ hs_error_t hs_set_stream_allocator(hs_alloc_t alloc_func, hs_free_t free_func); */ const char *hs_version(void); +/** + * Utility function to test the current system architecture. + * + * Hyperscan requires the Supplemental Streaming SIMD Extensions 3 instruction + * set. This function can be called on any x86 platform to determine if the + * system provides the required instruction set. + * + * This function does not test for more advanced features if Hyperscan has + * been built for a more specific architecture, for example the AVX2 + * instruction set. + * + * @return + * @ref HS_SUCCESS on success, @ref HS_ARCH_ERROR if system does not + * support Hyperscan. + */ +hs_error_t hs_valid_platform(void); + /** * @defgroup HS_ERROR hs_error_t values * diff --git a/src/hs_valid_platform.c b/src/hs_valid_platform.c new file mode 100644 index 00000000..939cde1f --- /dev/null +++ b/src/hs_valid_platform.c @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "hs_common.h" +#include "util/cpuid_flags.h" + +HS_PUBLIC_API +hs_error_t hs_valid_platform(void) { + /* Hyperscan requires SSSE3, anything else is a bonus */ + if (check_ssse3()) { + return HS_SUCCESS; + } else { + return HS_ARCH_ERROR; + } +} diff --git a/unit/hyperscan/arg_checks.cpp b/unit/hyperscan/arg_checks.cpp index d277a26b..8e86cc64 100644 --- a/unit/hyperscan/arg_checks.cpp +++ b/unit/hyperscan/arg_checks.cpp @@ -84,6 +84,12 @@ void breakDatabaseBytecode(hs_database *db) { *bytecode += 3; } +// Check that hs_valid_platform says we can run here +TEST(HyperscanArgChecks, ValidPlatform) { + hs_error_t error = hs_valid_platform(); + ASSERT_EQ(HS_SUCCESS, error) << "hs_valid_platform should return zero"; +} + // Check that hs_version gives us a reasonable string back TEST(HyperscanArgChecks, Version) { const char *version = hs_version(); From 6967c7ddf1bf38685ff9ea9c2478c4626ce84ad2 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Tue, 29 Nov 2016 16:19:04 +1100 Subject: [PATCH 080/103] cmake: unneeded header check --- CMakeLists.txt | 1 - cmake/config.h.in | 3 --- 2 files changed, 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e9e8f3fa..cfc33848 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -236,7 +236,6 @@ endif() CHECK_INCLUDE_FILES(unistd.h HAVE_UNISTD_H) CHECK_INCLUDE_FILES(intrin.h HAVE_C_INTRIN_H) CHECK_INCLUDE_FILE_CXX(intrin.h HAVE_CXX_INTRIN_H) -CHECK_INCLUDE_FILES(tmmintrin.h HAVE_TMMINTRIN_H) CHECK_INCLUDE_FILES(x86intrin.h HAVE_C_X86INTRIN_H) CHECK_INCLUDE_FILE_CXX(x86intrin.h HAVE_CXX_X86INTRIN_H) diff --git a/cmake/config.h.in b/cmake/config.h.in index 198d96c5..d8430f22 100644 --- a/cmake/config.h.in +++ b/cmake/config.h.in @@ -70,9 +70,6 @@ /* Define if the sqlite3_open_v2 call is available */ #cmakedefine HAVE_SQLITE3_OPEN_V2 -/* Define to 1 if you have the header file. */ -#cmakedefine HAVE_TMMINTRIN_H - /* Define to 1 if you have the header file. */ #cmakedefine HAVE_UNISTD_H From c337ac665b3284881c8a978a8c20c58c8d1b6dbd Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Mon, 12 Dec 2016 11:34:15 +1100 Subject: [PATCH 081/103] cmake: default to fat runtime for release builds --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index cfc33848..760906b7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -258,7 +258,7 @@ endif() if (CMAKE_SYSTEM_NAME MATCHES "Linux") # This is a Linux-only feature for now - requires platform support # elsewhere - option(FAT_RUNTIME "Build a library that supports multiple microarchitecures" RELEASE_BUILD) + option(FAT_RUNTIME "Build a library that supports multiple microarchitecures" ${RELEASE_BUILD}) if (FAT_RUNTIME) include (${CMAKE_MODULE_PATH}/attrib.cmake) if (NOT HAS_C_ATTR_IFUNC) From 1ed2bdc46fdf2e7c8ad412e556ef15e6cfd9ba5c Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Mon, 12 Dec 2016 11:55:56 +1100 Subject: [PATCH 082/103] Don't set flags for common files --- CMakeLists.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 760906b7..a6ef3fe8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1082,8 +1082,6 @@ else (FAT_RUNTIME) ) set_source_files_properties(src/dispatcher.c PROPERTIES COMPILE_FLAGS "-Wno-unused-parameter -Wno-unused-function") - set_source_files_properties(${hs_exec_common_SRCS} PROPERTIES - COMPILE_FLAGS "-march=core-avx2") if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) add_library(hs_exec_shared_core2 OBJECT ${hs_exec_SRCS}) From 5754709dd51b93526444b4ea8122bec58c420514 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Mon, 12 Dec 2016 14:52:06 +1100 Subject: [PATCH 083/103] Don't ignore unused attrib warnings in test --- cmake/attrib.cmake | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/cmake/attrib.cmake b/cmake/attrib.cmake index 6ce3f2a7..5600ce6b 100644 --- a/cmake/attrib.cmake +++ b/cmake/attrib.cmake @@ -1,3 +1,13 @@ # tests for compiler properties -CHECK_C_SOURCE_COMPILES("int foo(int) __attribute__ ((ifunc(\"foo_i\"))); int f1(int i) { return i; } void (*foo_i()) { return f1; } int main(void) { return 0; }" HAS_C_ATTR_IFUNC) +# set -Werror so we can't ignore unused attribute warnings +set (CMAKE_REQUIRED_FLAGS "-Werror") + +CHECK_C_SOURCE_COMPILES(" + int foo(int) __attribute__ ((ifunc(\"foo_i\"))); + int f1(int i) { return i; } + void (*foo_i()) { return f1; } + int main(void) { return 0; } + " HAS_C_ATTR_IFUNC) + +unset(CMAKE_REQUIRED_FLAGS) From f29b203a8613bee82836ff38278f70db1d1c39a3 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Mon, 12 Dec 2016 16:37:38 +1100 Subject: [PATCH 084/103] cmake: combine hs_exec src lists correctly --- CMakeLists.txt | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a6ef3fe8..8a4864f4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -441,10 +441,12 @@ SET(hs_HEADERS install(FILES ${hs_HEADERS} DESTINATION include/hs) set (hs_exec_common_SRCS - src/alloc.c - src/scratch.c - src/util/multibit.c - ) + src/alloc.c + src/scratch.c + src/util/cpuid_flags.c + src/util/cpuid_flags.h + src/util/multibit.c + ) set (hs_exec_SRCS ${hs_HEADERS} @@ -944,8 +946,6 @@ SET (hs_SRCS src/util/compile_error.cpp src/util/compile_error.h src/util/container.h - src/util/cpuid_flags.c - src/util/cpuid_flags.h src/util/depth.cpp src/util/depth.h src/util/determinise.h @@ -1041,13 +1041,14 @@ set (LIB_SOVERSION ${HS_MAJOR_VERSION}) if (NOT FAT_RUNTIME) + set(hs_exec_SRCS ${hs_exec_SRCS} ${hs_exec_common_SRCS}) + if (HAVE_AVX2) - add_library(hs_exec OBJECT ${hs_exec_common_SRCS} ${hs_exec_SRCS} - ${hs_exec_avx2_SRCS}) - else() - add_library(hs_exec OBJECT ${hs_exec_common_SRCS} ${hs_exec_SRCS}) + set(hs_exec_SRCS ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) endif() + add_library(hs_exec OBJECT ${hs_exec_SRCS}) + add_library(hs_runtime STATIC src/hs_version.c src/hs_valid_platform.c $) set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C) @@ -1175,6 +1176,7 @@ if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) $ $ $) + endif() add_dependencies(hs_shared ragel_Parser) set_target_properties(hs_shared PROPERTIES From 0d87116d332c6f49c939ae9063d857a0bafe4789 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Mon, 12 Dec 2016 16:42:32 +1100 Subject: [PATCH 085/103] cmake: check clang version for fat runtime support --- CMakeLists.txt | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8a4864f4..61eb9893 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -258,13 +258,20 @@ endif() if (CMAKE_SYSTEM_NAME MATCHES "Linux") # This is a Linux-only feature for now - requires platform support # elsewhere - option(FAT_RUNTIME "Build a library that supports multiple microarchitecures" ${RELEASE_BUILD}) - if (FAT_RUNTIME) + if (CMAKE_C_COMPILER_ID MATCHES "Clang" AND + CMAKE_C_COMPILER_VERSION VERSION_LESS "3.9") + message (STATUS "Clang v3.9 or higher required for fat runtime, cannot build fat runtime") + set (FAT_RUNTIME_REQUISITES FALSE) + else () include (${CMAKE_MODULE_PATH}/attrib.cmake) if (NOT HAS_C_ATTR_IFUNC) - message(FATAL_ERROR "Compiler does not support ifunc attribute, cannot build fat runtime") + message(STATUS "Compiler does not support ifunc attribute, cannot build fat runtime") + set (FAT_RUNTIME_REQUISITES FALSE) + else () + set (FAT_RUNTIME_REQUISITES TRUE) endif() endif() + CMAKE_DEPENDENT_OPTION(FAT_RUNTIME "Build a library that supports multiple microarchitecures" ${RELEASE_BUILD} "FAT_RUNTIME_REQUISITES" OFF) endif () include (${CMAKE_MODULE_PATH}/arch.cmake) From ea5e3bc0b1d21005fc30df8f525f5848657cfce7 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Tue, 13 Dec 2016 09:03:36 +1100 Subject: [PATCH 086/103] Mark dispatch functions as public API --- src/dispatcher.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/dispatcher.c b/src/dispatcher.c index 810a5299..fb2f4f02 100644 --- a/src/dispatcher.c +++ b/src/dispatcher.c @@ -60,6 +60,7 @@ } \ \ /* function */ \ + HS_PUBLIC_API \ RTYPE NAME(__VA_ARGS__) __attribute__((ifunc("resolve_" #NAME))) CREATE_DISPATCH(hs_error_t, hs_scan, const hs_database_t *db, const char *data, From 833474506318255832ca808de592ed44138cb982 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Tue, 13 Dec 2016 10:01:16 +1100 Subject: [PATCH 087/103] SSE2 preprocessor test for MSVC MSVC doesn't define __SSE2__ but we can figure it out from other macros. --- src/util/simd_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/util/simd_types.h b/src/util/simd_types.h index 74e2abec..d6e5d6a3 100644 --- a/src/util/simd_types.h +++ b/src/util/simd_types.h @@ -61,7 +61,7 @@ #error no intrinsics! #endif -#if defined(__SSE2__) +#if defined(__SSE2__) || defined(_M_X64) || (_M_IX86_FP >= 2) typedef __m128i m128; #else typedef struct ALIGN_DIRECTIVE {u64a hi; u64a lo;} m128; From 2f57681bb612e8272f2c7109185c3085c23c5719 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Wed, 14 Dec 2016 15:15:37 +1100 Subject: [PATCH 088/103] cmake: check the generator for fat runtime builds The RULE_LAUNCH_COMPILE property only works for Unix Makefiles, or for Ninja with CMake newer than v3.0. --- CMakeLists.txt | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 61eb9893..e1f7cd72 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -209,7 +209,6 @@ else() endif() if (NOT CMAKE_C_FLAGS MATCHES .*march.*) - message(STATUS "Building for current host CPU") set(ARCH_C_FLAGS "${ARCH_C_FLAGS} -march=native -mtune=native") endif() @@ -258,11 +257,16 @@ endif() if (CMAKE_SYSTEM_NAME MATCHES "Linux") # This is a Linux-only feature for now - requires platform support # elsewhere + message(STATUS "generator is ${CMAKE_GENERATOR}") if (CMAKE_C_COMPILER_ID MATCHES "Clang" AND CMAKE_C_COMPILER_VERSION VERSION_LESS "3.9") message (STATUS "Clang v3.9 or higher required for fat runtime, cannot build fat runtime") set (FAT_RUNTIME_REQUISITES FALSE) - else () + elseif (NOT (CMAKE_GENERATOR MATCHES "Unix Makefiles" OR + (CMAKE_VERSION VERSION_GREATER "3.0" AND CMAKE_GENERATOR MATCHES "Ninja"))) + message (STATUS "Building the fat runtime requires the Unix Makefiles generator, or Ninja with CMake v3.0 or higher") + set (FAT_RUNTIME_REQUISITES FALSE) + else() include (${CMAKE_MODULE_PATH}/attrib.cmake) if (NOT HAS_C_ATTR_IFUNC) message(STATUS "Compiler does not support ifunc attribute, cannot build fat runtime") @@ -386,9 +390,11 @@ endif() endif() if (NOT FAT_RUNTIME) +message(STATUS "Building for current host CPU") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARCH_C_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_CXX_FLAGS}") else() +message(STATUS "Building runtime for multiple microarchitectures") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") endif() From 83e69cc195b7677810bd7904561bb9e389baf514 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Wed, 23 Nov 2016 11:07:10 +1100 Subject: [PATCH 089/103] hyperscan#41: ignore carriage-return in pattern files --- examples/patbench.cc | 4 +++- examples/pcapscan.cc | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/examples/patbench.cc b/examples/patbench.cc index 9c2b41fa..f82f47a7 100644 --- a/examples/patbench.cc +++ b/examples/patbench.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -833,6 +833,8 @@ static unsigned parseFlags(const string &flagsStr) { flags |= HS_FLAG_UTF8; break; case 'W': flags |= HS_FLAG_UCP; break; + case '\r': // stray carriage-return + break; default: cerr << "Unsupported flag \'" << c << "\'" << endl; exit(-1); diff --git a/examples/pcapscan.cc b/examples/pcapscan.cc index 032b19cd..12b94438 100644 --- a/examples/pcapscan.cc +++ b/examples/pcapscan.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -621,6 +621,8 @@ static unsigned parseFlags(const string &flagsStr) { flags |= HS_FLAG_UTF8; break; case 'W': flags |= HS_FLAG_UCP; break; + case '\r': // stray carriage-return + break; default: cerr << "Unsupported flag \'" << c << "\'" << endl; exit(-1); From c699e987500d8553a6f81d50990129be4f3ab828 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Mon, 19 Dec 2016 12:42:34 +1100 Subject: [PATCH 090/103] Add explicit casts to succ table entry calculations. Although overflow should not be possible given the range of alphaShift, this resolves coverity scan issues CID 158536 and CID 158537. --- src/nfa/mcsheng_compile.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/nfa/mcsheng_compile.cpp b/src/nfa/mcsheng_compile.cpp index a7713bb0..7b4e58ab 100644 --- a/src/nfa/mcsheng_compile.cpp +++ b/src/nfa/mcsheng_compile.cpp @@ -606,7 +606,7 @@ void fill_in_succ_table_16(NFA *nfa, const dfa_info &info, for (size_t s = 0; s < info.impl_alpha_size; s++) { dstate_id_t raw_succ = info.states[i].next[s]; - u16 &entry = succ_table[(normal_id << alphaShift) + s]; + u16 &entry = succ_table[((size_t)normal_id << alphaShift) + s]; entry = info.implId(raw_succ); entry |= get_edge_flags(nfa, entry); @@ -916,7 +916,8 @@ void fill_in_succ_table_8(NFA *nfa, const dfa_info &info, for (size_t s = 0; s < info.impl_alpha_size; s++) { dstate_id_t raw_succ = info.states[i].next[s]; - succ_table[(normal_id << alphaShift) + s] = info.implId(raw_succ); + succ_table[((size_t)normal_id << alphaShift) + s] + = info.implId(raw_succ); } } } From 10346f53f7428442122a0c80dc12019b900737ab Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Tue, 3 Jan 2017 12:57:10 +1100 Subject: [PATCH 091/103] mcclellan: handle 0 length block scans correctly --- src/nfa/mcclellan.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/nfa/mcclellan.c b/src/nfa/mcclellan.c index 584670c2..6b6848d6 100644 --- a/src/nfa/mcclellan.c +++ b/src/nfa/mcclellan.c @@ -375,7 +375,9 @@ char mcclellanExec8_i(const struct mcclellan *m, u32 *state, const u8 *buf, size_t len, u64a offAdj, NfaCallback cb, void *ctxt, char single, const u8 **c_final, enum MatchMode mode) { if (!len) { - *c_final = buf; + if (mode == STOP_AT_MATCH) { + *c_final = buf; + } return MO_ALIVE; } u32 s = *state; From 988ee0eb55f1617fd0401d6988d9d56d5b19c1cd Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Thu, 5 Jan 2017 15:12:43 +1100 Subject: [PATCH 092/103] MSVC requires the attribute before the type --- src/util/masked_move.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/util/masked_move.c b/src/util/masked_move.c index 71406308..8560af3f 100644 --- a/src/util/masked_move.c +++ b/src/util/masked_move.c @@ -34,7 +34,7 @@ /* masks for masked moves */ /* magic mask for maskload (vmmaskmovq) - described in UE-2424 */ -const u32 mm_mask_mask[16] ALIGN_CL_DIRECTIVE = { +const ALIGN_CL_DIRECTIVE u32 mm_mask_mask[16] = { 0x00000000U, 0x00000000U, 0x00000000U, From 981e9896b4d57d48bb672a92357166c89a09eacd Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Thu, 5 Jan 2017 15:12:56 +1100 Subject: [PATCH 093/103] MSVC gets confused by one line of the graph tests --- unit/internal/graph.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/unit/internal/graph.cpp b/unit/internal/graph.cpp index 3f81ac13..5e3952a6 100644 --- a/unit/internal/graph.cpp +++ b/unit/internal/graph.cpp @@ -1779,6 +1779,9 @@ TEST(ue2_graph, default_param) { auto e = add_edge(v, v, g).first; ASSERT_EQ(0U, get(vertex_index, g, v)); - ASSERT_EQ(0U, get(&ue2::graph_detail::default_edge_property::index, g, e)); ASSERT_EQ(0U, get(edge_index, g, e)); +#if !defined(_MSC_VER) + /* This makes MSVC up to VS2015 sad in ways that shouldn't happen. */ + ASSERT_EQ(0U, get(&ue2::graph_detail::default_edge_property::index, g, e)); +#endif } From 46f74300f28b0b089cdd523d56de966ea8223c0f Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Mon, 9 Jan 2017 11:35:03 +1100 Subject: [PATCH 094/103] update copyright year --- src/nfa/mcclellan.c | 2 +- src/util/masked_move.c | 2 +- unit/internal/graph.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/nfa/mcclellan.c b/src/nfa/mcclellan.c index 6b6848d6..ceedb9db 100644 --- a/src/nfa/mcclellan.c +++ b/src/nfa/mcclellan.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: diff --git a/src/util/masked_move.c b/src/util/masked_move.c index 8560af3f..ec788db7 100644 --- a/src/util/masked_move.c +++ b/src/util/masked_move.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: diff --git a/unit/internal/graph.cpp b/unit/internal/graph.cpp index 5e3952a6..b7ec7b03 100644 --- a/unit/internal/graph.cpp +++ b/unit/internal/graph.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: From 1c9f38e98d36aa6ab108d0edc2b347788ef4b1f3 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Fri, 13 Jan 2017 10:14:23 +1100 Subject: [PATCH 095/103] Silence ABI notes in release builds --- CMakeLists.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index e1f7cd72..dee408e3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -230,6 +230,12 @@ else() set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fno-omit-frame-pointer") endif() + if (RELEASE_BUILD) + # we don't need the noise of ABI warnings in a release build + set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-abi") + set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-abi") + endif () + endif() CHECK_INCLUDE_FILES(unistd.h HAVE_UNISTD_H) From e23316f453d5ccbd5df24e25e75b3d984cb955f2 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Fri, 13 Jan 2017 10:23:00 +1100 Subject: [PATCH 096/103] Boost 1.61 required for clang/libc++ The libc++ headers aren't specific enough and causes the compiler to think there are ambiguous functions when using older Boost versions. --- CMakeLists.txt | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index dee408e3..bc60fe48 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -62,7 +62,14 @@ include_directories(SYSTEM include) set(BOOST_USE_STATIC_LIBS OFF) set(BOOST_USE_MULTITHREADED OFF) set(BOOST_USE_STATIC_RUNTIME OFF) -set(BOOST_MINVERSION 1.57.0) +if (CMAKE_SYSTEM_NAME MATCHES "Darwin" + OR (CMAKE_SYSTEM_NAME MATCHES "FreeBSD" + AND CMAKE_C_COMPILER_ID MATCHES "Clang")) + # we need a more recent boost for libc++ used by clang on OSX and FreeBSD + set(BOOST_MINVERSION 1.61.0) +else () + set(BOOST_MINVERSION 1.57.0) +endif () set(BOOST_NO_BOOST_CMAKE ON) # first check for Boost installed on the system From 35716c70ef01b18e2c4a971fd6ded77f912fe84d Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Mon, 16 Jan 2017 10:58:31 +1100 Subject: [PATCH 097/103] ensure that we add default top when creating a new infix graph --- src/nfagraph/ng_rose.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/nfagraph/ng_rose.cpp b/src/nfagraph/ng_rose.cpp index 1c4163ce..7066ab27 100644 --- a/src/nfagraph/ng_rose.cpp +++ b/src/nfagraph/ng_rose.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -2368,12 +2368,17 @@ void makeNocaseWithPrefixMask(RoseInGraph &g, RoseInVertex v) { h[ds].char_reach = CharReach::dot(); - add_edge(h.start, ds, h); + NFAEdge e_start_to_ds = add_edge(h.start, ds, h); add_edge(ds, ds, h); add_edge(ds, h.accept, h); h[h.start].reports.insert(0); h[ds].reports.insert(0); + + if (g[u].type == RIV_LITERAL) { + h[e_start_to_ds].tops.insert(DEFAULT_TOP); + } } else { + assert(g[u].type == RIV_ANCHORED_START); add_edge(h.start, h.accept, h); h[h.start].reports.insert(0); } From 734eb2ce62624c8500b2d7fd3677280bdeeca94a Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Mon, 16 Jan 2017 10:14:41 +1100 Subject: [PATCH 098/103] we can only trim lookarounds based on information common to all literals --- src/rose/rose_build_lookaround.cpp | 34 +++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/src/rose/rose_build_lookaround.cpp b/src/rose/rose_build_lookaround.cpp index d2c4b541..10bd59de 100644 --- a/src/rose/rose_build_lookaround.cpp +++ b/src/rose/rose_build_lookaround.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -460,17 +460,41 @@ void findFloodReach(const RoseBuildImpl &tbi, const RoseVertex v, } } +static +map findLiteralReach(const rose_literal_id &lit) { + map look; + + u32 i = lit.delay + 1; + for (auto it = lit.s.rbegin(), ite = lit.s.rend(); it != ite; ++it) { + look[0 - i] |= *it; + i++; + } + + return look; +} + static map findLiteralReach(const RoseBuildImpl &build, const RoseVertex v) { + bool first = true; map look; for (u32 lit_id : build.g[v].literals) { const rose_literal_id &lit = build.literals.right.at(lit_id); + auto lit_look = findLiteralReach(lit); - u32 i = lit.delay + 1; - for (auto it = lit.s.rbegin(), ite = lit.s.rend(); it != ite; ++it) { - look[0 - i] |= *it; - i++; + if (first) { + look = move(lit_look); + first = false; + } else { + for (auto it = look.begin(); it != look.end();) { + auto l_it = lit_look.find(it->first); + if (l_it == lit_look.end()) { + it = look.erase(it); + } else { + it->second |= l_it->second; + ++it; + } + } } } From cacf07fe9bf7fd396ca841f7ab8ae849500ca048 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 18 Jan 2017 11:33:57 +1100 Subject: [PATCH 099/103] prefilter: workaround for \b in UCP and !UTF8 mode For now, just drop the assertion (which will still return a superset of matches, as per prefiltering semantics). --- src/parser/prefilter.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/parser/prefilter.cpp b/src/parser/prefilter.cpp index ea58a134..f69362e4 100644 --- a/src/parser/prefilter.cpp +++ b/src/parser/prefilter.cpp @@ -295,6 +295,16 @@ public: Component *visit(ComponentWordBoundary *c) override { assert(c); + + // TODO: Right now, we do not have correct code for resolving these + // when prefiltering is on, UCP is on, and UTF-8 is *off*. For now, we + // just replace with an empty sequence (as that will return a superset + // of matches). + if (mode.ucp && !mode.utf8) { + return new ComponentSequence(); + } + + // All other cases can be prefiltered. c->setPrefilter(true); return c; } From 8c2e033540ec5eb5601c3e89e18af8ff12a2ffe2 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Wed, 18 Jan 2017 15:16:18 +1100 Subject: [PATCH 100/103] cmake: test reverse_graph instead of using version It seems that some distros are using a patched Boost 1.62.0 which means our workaround in reverse_graph has a conflict. Add a CMake test to see if we need to use the patched reverse_graph. --- CMakeLists.txt | 1 + cmake/boost.cmake | 41 +++++++++++++++++++ cmake/config.h.in | 2 + include/boost-patched/graph/reverse_graph.hpp | 2 +- 4 files changed, 45 insertions(+), 1 deletion(-) create mode 100644 cmake/boost.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index bc60fe48..97039b13 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -84,6 +84,7 @@ if(NOT Boost_FOUND) endif() endif() +include (${CMAKE_MODULE_PATH}/boost.cmake) # -- make this work? set(python_ADDITIONAL_VERSIONS 2.7 2.6) find_package(PythonInterp) diff --git a/cmake/boost.cmake b/cmake/boost.cmake new file mode 100644 index 00000000..3d513deb --- /dev/null +++ b/cmake/boost.cmake @@ -0,0 +1,41 @@ +# Boost 1.62 has a bug that we've patched around, check if it is required +if (Boost_VERSION EQUAL 106200) + set (CMAKE_REQUIRED_INCLUDES ${BOOST_INCLUDEDIR} "${PROJECT_SOURCE_DIR}/include") + set (BOOST_REV_TEST " +#include +#include +#include +#include + +int main(int,char*[]) +{ + using namespace boost; + // Check const reverse_graph + { + typedef adjacency_list< vecS, vecS, bidirectionalS, + property, + property, + property + > AdjList; + typedef reverse_graph Graph; + BOOST_CONCEPT_ASSERT(( BidirectionalGraphConcept )); + } + return 0; +} +") + + CHECK_CXX_SOURCE_COMPILES("${BOOST_REV_TEST}" BOOST_REVGRAPH_OK) + + if (NOT BOOST_REVGRAPH_OK) + message(STATUS "trying patched") + CHECK_CXX_SOURCE_COMPILES(" +#include +${BOOST_REV_TEST}" BOOST_REVGRAPH_PATCH) + endif() + + if (NOT BOOST_REVGRAPH_OK AND NOT BOOST_REVGRAPH_PATCH) + message(FATAL_ERROR "Something is wrong with this copy of boost::reverse_graph") + endif() + + unset (CMAKE_REQUIRED_INCLUDES) +endif () # Boost 1.62.0 diff --git a/cmake/config.h.in b/cmake/config.h.in index d8430f22..c7b577c2 100644 --- a/cmake/config.h.in +++ b/cmake/config.h.in @@ -89,3 +89,5 @@ /* define if this is a release build. */ #cmakedefine RELEASE_BUILD +/* define if reverse_graph requires patch for boost 1.62.0 */ +#cmakedefine BOOST_REVGRAPH_PATCH diff --git a/include/boost-patched/graph/reverse_graph.hpp b/include/boost-patched/graph/reverse_graph.hpp index 07a11f9b..8f98a1d5 100644 --- a/include/boost-patched/graph/reverse_graph.hpp +++ b/include/boost-patched/graph/reverse_graph.hpp @@ -5,7 +5,7 @@ #include -#if (BOOST_VERSION == 106200) +#if defined(BOOST_REVGRAPH_PATCH) // Boost 1.62.0 does not implement degree() in reverse_graph which is required // by BidirectionalGraph, so add it. From c6e28567c145f3e31bd3251fdc824c37847762ba Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Fri, 20 Jan 2017 13:42:26 +1100 Subject: [PATCH 101/103] Documentation updates for the fat runtime --- doc/dev-reference/getting_started.rst | 67 +++++++++++++++++++++++++-- 1 file changed, 62 insertions(+), 5 deletions(-) diff --git a/doc/dev-reference/getting_started.rst b/doc/dev-reference/getting_started.rst index 826349a7..1794f3e9 100644 --- a/doc/dev-reference/getting_started.rst +++ b/doc/dev-reference/getting_started.rst @@ -169,6 +169,9 @@ Common options for CMake include: +------------------------+----------------------------------------------------+ | DEBUG_OUTPUT | Enable very verbose debug output. Default off. | +------------------------+----------------------------------------------------+ +| FAT_RUNTIME | Build the :ref:`fat runtime`. Default | +| | true on Linux, not available elsewhere. | ++------------------------+----------------------------------------------------+ For example, to generate a ``Debug`` build: :: @@ -199,11 +202,11 @@ The other types of builds are: Target Architecture ------------------- -By default, Hyperscan will be compiled to target the instruction set of the -processor of the machine that being used for compilation. This is done via -the use of ``-march=native``. The result of this means that a library built on -one machine may not work on a different machine if they differ in supported -instruction subsets. +Unless using the :ref:`fat runtime`, by default Hyperscan will be +compiled to target the instruction set of the processor of the machine that +being used for compilation. This is done via the use of ``-march=native``. The +result of this means that a library built on one machine may not work on a +different machine if they differ in supported instruction subsets. To override the use of ``-march=native``, set appropriate flags for the compiler in ``CFLAGS`` and ``CXXFLAGS`` environment variables before invoking @@ -215,3 +218,57 @@ example, to set the instruction subsets up to ``SSE4.2`` using GCC 4.8: :: For more information, refer to :ref:`instr_specialization`. +.. _fat_runtime: + +Fat Runtime +----------- + +A feature introduced in Hyperscan v4.4 is the ability for the Hyperscan +library to dispatch the most appropriate runtime code for the host processor. +This feature is called the "fat runtime", as a single Hyperscan library +contains multiple copies of the runtime code for different instruction sets. + +.. note:: + + The fat runtime feature is only available on Linux. Release builds of + Hyperscan will default to having the fat runtime enabled where supported. + +When building the library with the fat runtime, the Hyperscan runtime code +will be compiled multiple times for these different instruction sets, and +these compiled objects are combined into one library. There are no changes to +how user applications are built against this library. + +When applications are executed, the correct version of the runtime is selected +for the machine that it is running on. This is done using a ``CPUID`` check +for the presence of the instruction set, and then an indirect function is +resolved so that the right version of each API function is used. There is no +impact on function call performance, as this check and resolution is performed +by the ELF loader once when the binary is loaded. + +If the Hyperscan library is used on x86 systems without ``SSSE3``, the runtime +API functions will resolve to functions that return :c:member:`HS_ARCH_ERROR` +instead of potentially executing illegal instructions. The API function +:c:func:`hs_valid_platform` can be used by application writers to determine if +the current platform is supported by Hyperscan. + +At of this release, the variants of the runtime that are built, and the CPU +capability that is required, are the following: + ++----------+-------------------------------+---------------------+ +| Variant | CPU Feature Flag(s) Required | gcc arch flag | ++==========+===============================+=====================+ +| Core 2 | ``SSSE3`` | ``-march=core2`` | ++----------+-------------------------------+---------------------+ +| Core i7 | ``SSE4_2`` and ``POPCNT`` | ``-march=corei7`` | ++----------+-------------------------------+---------------------+ +| AVX 2 | ``AVX2`` | ``-march=avx2`` | ++----------+-------------------------------+---------------------+ + +As this requires compiler, libc, and binutils support, at this time the fat +runtime will only be enabled for Linux builds where the compiler supports the +`indirect function "ifunc" function attribute +`_. + +This attribute should be available on all supported versions of GCC, and +recent versions of Clang and ICC. There is currently no operating system +support for this feature on non-Linux systems. From 64c9bdc7b048d2fdb217aa53dbdc84f805ed885d Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 10 Jan 2017 10:31:09 +1100 Subject: [PATCH 102/103] changelog: updates for 4.4 release --- CHANGELOG.md | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 729e58ef..03eba64d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,8 +2,40 @@ This is a list of notable changes to Hyperscan, in reverse chronological order. -## [4.3.2] 2016-11-15 +## [4.4.0] 2017-01-20 +- Introduce the "fat runtime" build. This will build several variants of the + Hyperscan scanning engine specialised for different processor feature sets, + and use the appropriate one for the host at runtime. This uses the "ifunc" + indirect function attribute provided by GCC and is currently available on + Linux only, where it is the default for release builds. +- New API function: add the `hs_valid_platform()` function. This function tests + whether the host provides the SSSE3 instruction set required by Hyperscan. +- Introduce a new standard benchmarking tool, "hsbench". This provides an easy + way to measure Hyperscan's performance for a particular set of patterns and + corpus of data to be scanned. +- Introduce a 64-bit GPR LimEx NFA model, which uses 64-bit GPRs on 64-bit + hosts and SSE registers on 32-bit hosts. +- Introduce a new DFA model ("McSheng") which is a hybrid of the existing + McClellan and Sheng models. This improves scanning performance for some + cases. +- Introduce lookaround specialisations to improve scanning performance. +- Improve the handling of long literals by moving confirmation to the Rose + interpreter and simplifying the hash table used to track them in streaming + mode. +- Improve compile time optimisation for removing redundant paths from + expression graphs. +- Build: improve support for building with MSVC toolchain. +- Reduce the size of small write DFAs used for small scans in block mode. +- Introduce a custom graph type (`ue2_graph`) used in place of the Boost Graph + Library's `adjacency_list` type. Improves compile time performance and type + safety. +- Improve scanning performance of the McClellan DFA. +- Bugfix for a very unusual SOM case where the incorrect start offset was + reported for a match. +- Bugfix for issue #37, removing execute permissions from some source files. +- Bugfix for issue #41, handle Windows line endings in pattern files. +## [4.3.2] 2016-11-15 - Bugfix for issue #39. This small change is a workaround for an issue in Boost 1.62. The fix has been submitted to Boost for inclusion in a future release. @@ -11,7 +43,7 @@ This is a list of notable changes to Hyperscan, in reverse chronological order. ## [4.3.1] 2016-08-29 - Bugfix for issue #30. In recent versions of Clang, a write to a variable was being elided, resulting in corrupted stream state after calling - hs_reset_stream(). + `hs_reset_stream()`. ## [4.3.0] 2016-08-24 - Introduce a new analysis pass ("Violet") used for decomposition of patterns From 53a8d97e7751e95144be926fec847b24285df245 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Fri, 20 Jan 2017 13:59:51 +1100 Subject: [PATCH 103/103] Bump version number for release --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 97039b13..3a7d40ea 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,8 +2,8 @@ cmake_minimum_required (VERSION 2.8.11) project (Hyperscan C CXX) set (HS_MAJOR_VERSION 4) -set (HS_MINOR_VERSION 3) -set (HS_PATCH_VERSION 2) +set (HS_MINOR_VERSION 4) +set (HS_PATCH_VERSION 0) set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION}) set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)