From 2fda8c0b2035f894344f4e8df1abd5f18b1fe096 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 30 Nov 2016 16:55:52 +1100 Subject: [PATCH] hwlm: move accel from hwlm build to rose --- CMakeLists.txt | 2 + src/hwlm/hwlm_build.cpp | 446 +--------------------------- src/rose/rose_build_lit_accel.cpp | 471 ++++++++++++++++++++++++++++++ src/rose/rose_build_lit_accel.h | 71 +++++ src/rose/rose_build_matchers.cpp | 7 + 5 files changed, 555 insertions(+), 442 deletions(-) create mode 100644 src/rose/rose_build_lit_accel.cpp create mode 100644 src/rose/rose_build_lit_accel.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 7ede52b4..8fe82a70 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -936,6 +936,8 @@ SET (hs_SRCS src/rose/rose_build_impl.h src/rose/rose_build_infix.cpp src/rose/rose_build_infix.h + src/rose/rose_build_lit_accel.cpp + src/rose/rose_build_lit_accel.h src/rose/rose_build_long_lit.cpp src/rose/rose_build_long_lit.h src/rose/rose_build_lookaround.cpp diff --git a/src/hwlm/hwlm_build.cpp b/src/hwlm/hwlm_build.cpp index 29e71293..a05ca1a2 100644 --- a/src/hwlm/hwlm_build.cpp +++ b/src/hwlm/hwlm_build.cpp @@ -29,9 +29,11 @@ /** \file * \brief Hamster Wheel Literal Matcher: build code. */ + +#include "hwlm_build.h" + #include "grey.h" #include "hwlm.h" -#include "hwlm_build.h" #include "hwlm_internal.h" #include "hwlm_literal.h" #include "noodle_engine.h" @@ -39,22 +41,11 @@ #include "scratch.h" #include "ue2common.h" #include "fdr/fdr_compile.h" -#include "nfa/shufticompile.h" -#include "nfa/trufflecompile.h" -#include "util/alloc.h" -#include "util/bitutils.h" -#include "util/charreach.h" -#include "util/compare.h" #include "util/compile_context.h" #include "util/compile_error.h" -#include "util/dump_charclass.h" -#include "util/target_info.h" #include "util/ue2string.h" -#include "util/verify_types.h" #include -#include -#include #include #include @@ -62,431 +53,6 @@ using namespace std; namespace ue2 { -static const unsigned int MAX_ACCEL_OFFSET = 16; -static const unsigned int MAX_SHUFTI_WIDTH = 240; - -static -size_t mask_overhang(const hwlmLiteral &lit) { - size_t msk_true_size = lit.msk.size(); - assert(msk_true_size <= HWLM_MASKLEN); - assert(HWLM_MASKLEN <= MAX_ACCEL_OFFSET); - for (u8 c : lit.msk) { - if (!c) { - msk_true_size--; - } else { - break; - } - } - - if (lit.s.length() >= msk_true_size) { - return 0; - } - - /* only short literals should be able to have a mask which overhangs */ - assert(lit.s.length() < MAX_ACCEL_OFFSET); - return msk_true_size - lit.s.length(); -} - -static -bool findDVerm(const vector &lits, AccelAux *aux) { - const hwlmLiteral &first = *lits.front(); - - struct candidate { - candidate(void) - : c1(0), c2(0), max_offset(0), b5insens(false), valid(false) {} - candidate(const hwlmLiteral &base, u32 offset) - : c1(base.s[offset]), c2(base.s[offset + 1]), max_offset(0), - b5insens(false), valid(true) {} - char c1; - char c2; - u32 max_offset; - bool b5insens; - bool valid; - - bool operator>(const candidate &other) const { - if (!valid) { - return false; - } - - if (!other.valid) { - return true; - } - - if (other.cdiffers() && !cdiffers()) { - return false; - } - - if (!other.cdiffers() && cdiffers()) { - return true; - } - - if (!other.b5insens && b5insens) { - return false; - } - - if (other.b5insens && !b5insens) { - return true; - } - - if (max_offset > other.max_offset) { - return false; - } - - return true; - } - - bool cdiffers(void) const { - if (!b5insens) { - return c1 != c2; - } - return (c1 & CASE_CLEAR) != (c2 & CASE_CLEAR); - } - }; - - candidate best; - - for (u32 i = 0; i < MIN(MAX_ACCEL_OFFSET, first.s.length()) - 1; i++) { - candidate curr(first, i); - - /* check to see if this pair appears in each string */ - for (const auto &lit_ptr : lits) { - const hwlmLiteral &lit = *lit_ptr; - if (lit.nocase && (ourisalpha(curr.c1) || ourisalpha(curr.c2))) { - curr.b5insens = true; /* no choice but to be case insensitive */ - } - - bool found = false; - bool found_nc = false; - for (u32 j = 0; - !found && j < MIN(MAX_ACCEL_OFFSET, lit.s.length()) - 1; j++) { - found |= curr.c1 == lit.s[j] && curr.c2 == lit.s[j + 1]; - found_nc |= (curr.c1 & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR) - && (curr.c2 & CASE_CLEAR) == (lit.s[j + 1] & CASE_CLEAR); - - if (curr.b5insens) { - found = found_nc; - } - } - - if (!curr.b5insens && !found && found_nc) { - curr.b5insens = true; - found = true; - } - - if (!found) { - goto next_candidate; - } - } - - /* check to find the max offset where this appears */ - for (const auto &lit_ptr : lits) { - const hwlmLiteral &lit = *lit_ptr; - for (u32 j = 0; j < MIN(MAX_ACCEL_OFFSET, lit.s.length()) - 1; - j++) { - bool found = false; - if (curr.b5insens) { - found = (curr.c1 & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR) - && (curr.c2 & CASE_CLEAR) == (lit.s[j + 1] & CASE_CLEAR); - } else { - found = curr.c1 == lit.s[j] && curr.c2 == lit.s[j + 1]; - } - - if (found) { - assert(j + mask_overhang(lit) <= MAX_ACCEL_OFFSET); - ENSURE_AT_LEAST(&curr.max_offset, j + mask_overhang(lit)); - break; - } - } - } - - if (curr > best) { - best = curr; - } - - next_candidate:; - } - - if (!best.valid) { - return false; - } - - aux->dverm.offset = verify_u8(best.max_offset); - - if (!best.b5insens) { - aux->dverm.accel_type = ACCEL_DVERM; - aux->dverm.c1 = best.c1; - aux->dverm.c2 = best.c2; - DEBUG_PRINTF("built dverm for %02hhx%02hhx\n", - aux->dverm.c1, aux->dverm.c2); - } else { - aux->dverm.accel_type = ACCEL_DVERM_NOCASE; - aux->dverm.c1 = best.c1 & CASE_CLEAR; - aux->dverm.c2 = best.c2 & CASE_CLEAR; - DEBUG_PRINTF("built dverm nc for %02hhx%02hhx\n", - aux->dverm.c1, aux->dverm.c2); - } - return true; -} - -static -bool findSVerm(const vector &lits, AccelAux *aux) { - const hwlmLiteral &first = *lits.front(); - - struct candidate { - candidate(void) - : c(0), max_offset(0), b5insens(false), valid(false) {} - candidate(const hwlmLiteral &base, u32 offset) - : c(base.s[offset]), max_offset(0), - b5insens(false), valid(true) {} - char c; - u32 max_offset; - bool b5insens; - bool valid; - - bool operator>(const candidate &other) const { - if (!valid) { - return false; - } - - if (!other.valid) { - return true; - } - - if (!other.b5insens && b5insens) { - return false; - } - - if (other.b5insens && !b5insens) { - return true; - } - - if (max_offset > other.max_offset) { - return false; - } - - return true; - } - }; - - candidate best; - - for (u32 i = 0; i < MIN(MAX_ACCEL_OFFSET, first.s.length()); i++) { - candidate curr(first, i); - - /* check to see if this pair appears in each string */ - for (const auto &lit_ptr : lits) { - const hwlmLiteral &lit = *lit_ptr; - if (lit.nocase && ourisalpha(curr.c)) { - curr.b5insens = true; /* no choice but to be case insensitive */ - } - - bool found = false; - bool found_nc = false; - for (u32 j = 0; - !found && j < MIN(MAX_ACCEL_OFFSET, lit.s.length()); j++) { - found |= curr.c == lit.s[j]; - found_nc |= (curr.c & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR); - - if (curr.b5insens) { - found = found_nc; - } - } - - if (!curr.b5insens && !found && found_nc) { - curr.b5insens = true; - found = true; - } - - if (!found) { - goto next_candidate; - } - } - - /* check to find the max offset where this appears */ - for (const auto &lit_ptr : lits) { - const hwlmLiteral &lit = *lit_ptr; - for (u32 j = 0; j < MIN(MAX_ACCEL_OFFSET, lit.s.length()); j++) { - bool found = false; - if (curr.b5insens) { - found = (curr.c & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR); - } else { - found = curr.c == lit.s[j]; - } - - if (found) { - assert(j + mask_overhang(lit) <= MAX_ACCEL_OFFSET); - ENSURE_AT_LEAST(&curr.max_offset, j + mask_overhang(lit)); - } - } - } - - if (curr > best) { - best = curr; - } - - next_candidate:; - } - - if (!best.valid) { - return false; - } - - if (!best.b5insens) { - aux->verm.accel_type = ACCEL_VERM; - aux->verm.c = best.c; - DEBUG_PRINTF("built verm for %02hhx\n", aux->verm.c); - } else { - aux->verm.accel_type = ACCEL_VERM_NOCASE; - aux->verm.c = best.c & CASE_CLEAR; - DEBUG_PRINTF("built verm nc for %02hhx\n", aux->verm.c); - } - aux->verm.offset = verify_u8(best.max_offset); - - return true; -} - -static -void filterLits(const vector &lits, hwlm_group_t expected_groups, - vector *filtered_lits, u32 *min_len) { - *min_len = MAX_ACCEL_OFFSET; - - for (const auto &lit : lits) { - if (!(lit.groups & expected_groups)) { - continue; - } - - const size_t lit_len = lit.s.length(); - if (lit_len < *min_len) { - *min_len = verify_u32(lit_len); - } - - filtered_lits->push_back(&lit); - -#ifdef DEBUG - DEBUG_PRINTF("lit:"); - for (u32 i = 0; i < lit.s.length(); i++) { - printf("%02hhx", lit.s[i]); - } - printf("\n"); -#endif - } -} - -static -bool litGuardedByCharReach(const CharReach &cr, const hwlmLiteral &lit, - u32 max_offset) { - for (u32 i = 0; i <= max_offset && i < lit.s.length(); i++) { - unsigned char c = lit.s[i]; - if (lit.nocase) { - if (cr.test(mytoupper(c)) && cr.test(mytolower(c))) { - return true; - } - } else { - if (cr.test(c)) { - return true; - } - } - } - - return false; -} - -static -void findForwardAccelScheme(const vector &lits, - hwlm_group_t expected_groups, AccelAux *aux) { - DEBUG_PRINTF("building accel expected=%016llx\n", expected_groups); - u32 min_len = MAX_ACCEL_OFFSET; - vector filtered_lits; - - filterLits(lits, expected_groups, &filtered_lits, &min_len); - if (filtered_lits.empty()) { - return; - } - - if (findDVerm(filtered_lits, aux) - || findSVerm(filtered_lits, aux)) { - return; - } - - /* look for shufti/truffle */ - - vector reach(MAX_ACCEL_OFFSET, CharReach()); - for (const auto &lit : lits) { - if (!(lit.groups & expected_groups)) { - continue; - } - - u32 overhang = mask_overhang(lit); - for (u32 i = 0; i < overhang; i++) { - /* this offset overhangs the start of the real literal; look at the - * msk/cmp */ - for (u32 j = 0; j < N_CHARS; j++) { - if ((j & lit.msk[i]) == lit.cmp[i]) { - reach[i].set(j); - } - } - } - for (u32 i = overhang; i < MAX_ACCEL_OFFSET; i++) { - CharReach &reach_i = reach[i]; - u32 i_effective = i - overhang; - - if (litGuardedByCharReach(reach_i, lit, i_effective)) { - continue; - } - unsigned char c = i_effective < lit.s.length() ? lit.s[i_effective] - : lit.s.back(); - if (lit.nocase) { - reach_i.set(mytoupper(c)); - reach_i.set(mytolower(c)); - } else { - reach_i.set(c); - } - } - } - - u32 min_count = ~0U; - u32 min_offset = ~0U; - for (u32 i = 0; i < MAX_ACCEL_OFFSET; i++) { - size_t count = reach[i].count(); - DEBUG_PRINTF("offset %u is %s (reach %zu)\n", i, - describeClass(reach[i]).c_str(), count); - if (count < min_count) { - min_count = (u32)count; - min_offset = i; - } - } - - if (min_count > MAX_SHUFTI_WIDTH) { - DEBUG_PRINTF("FAIL: min shufti with %u chars is too wide\n", min_count); - return; - } - - const CharReach &cr = reach[min_offset]; - if (-1 != - shuftiBuildMasks(cr, (u8 *)&aux->shufti.lo, (u8 *)&aux->shufti.hi)) { - DEBUG_PRINTF("built shufti for %s (%zu chars, offset %u)\n", - describeClass(cr).c_str(), cr.count(), min_offset); - aux->shufti.accel_type = ACCEL_SHUFTI; - aux->shufti.offset = verify_u8(min_offset); - return; - } - - truffleBuildMasks(cr, (u8 *)&aux->truffle.mask1, (u8 *)&aux->truffle.mask2); - DEBUG_PRINTF("built truffle for %s (%zu chars, offset %u)\n", - describeClass(cr).c_str(), cr.count(), min_offset); - aux->truffle.accel_type = ACCEL_TRUFFLE; - aux->truffle.offset = verify_u8(min_offset); -} - -static -void buildForwardAccel(HWLM *h, const vector &lits, - hwlm_group_t expected_groups) { - findForwardAccelScheme(lits, expected_groups, &h->accel1); - findForwardAccelScheme(lits, HWLM_ALL_GROUPS, &h->accel0); - - h->accel1_groups = expected_groups; -} - static void dumpLits(UNUSED const vector &lits) { #ifdef DEBUG @@ -533,7 +99,7 @@ bool isNoodleable(const vector &lits, aligned_unique_ptr hwlmBuild(const vector &lits, bool make_small, const CompileContext &cc, - hwlm_group_t expected_groups) { + UNUSED hwlm_group_t expected_groups) { assert(!lits.empty()); dumpLits(lits); @@ -604,10 +170,6 @@ aligned_unique_ptr hwlmBuild(const vector &lits, h->type = engType; memcpy(HWLM_DATA(h.get()), eng.get(), engSize); - if (engType == HWLM_ENGINE_FDR && cc.grey.hamsterAccelForward) { - buildForwardAccel(h.get(), lits, expected_groups); - } - return h; } diff --git a/src/rose/rose_build_lit_accel.cpp b/src/rose/rose_build_lit_accel.cpp new file mode 100644 index 00000000..1388069d --- /dev/null +++ b/src/rose/rose_build_lit_accel.cpp @@ -0,0 +1,471 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "rose_build_lit_accel.h" + +#include "grey.h" +#include "ue2common.h" +#include "hwlm/hwlm_build.h" +#include "hwlm/hwlm_internal.h" +#include "hwlm/hwlm_literal.h" +#include "nfa/accel.h" +#include "nfa/shufticompile.h" +#include "nfa/trufflecompile.h" +#include "util/compare.h" +#include "util/dump_charclass.h" +#include "util/verify_types.h" + +using namespace std; + +namespace ue2 { + +static const unsigned int MAX_ACCEL_OFFSET = 16; +static const unsigned int MAX_SHUFTI_WIDTH = 240; + +static +size_t mask_overhang(const AccelString &lit) { + size_t msk_true_size = lit.msk.size(); + assert(msk_true_size <= HWLM_MASKLEN); + assert(HWLM_MASKLEN <= MAX_ACCEL_OFFSET); + for (u8 c : lit.msk) { + if (!c) { + msk_true_size--; + } else { + break; + } + } + + if (lit.s.length() >= msk_true_size) { + return 0; + } + + /* only short literals should be able to have a mask which overhangs */ + assert(lit.s.length() < MAX_ACCEL_OFFSET); + return msk_true_size - lit.s.length(); +} + +static +bool findDVerm(const vector &lits, AccelAux *aux) { + const AccelString &first = *lits.front(); + + struct candidate { + candidate(void) + : c1(0), c2(0), max_offset(0), b5insens(false), valid(false) {} + candidate(const AccelString &base, u32 offset) + : c1(base.s[offset]), c2(base.s[offset + 1]), max_offset(0), + b5insens(false), valid(true) {} + char c1; + char c2; + u32 max_offset; + bool b5insens; + bool valid; + + bool operator>(const candidate &other) const { + if (!valid) { + return false; + } + + if (!other.valid) { + return true; + } + + if (other.cdiffers() && !cdiffers()) { + return false; + } + + if (!other.cdiffers() && cdiffers()) { + return true; + } + + if (!other.b5insens && b5insens) { + return false; + } + + if (other.b5insens && !b5insens) { + return true; + } + + if (max_offset > other.max_offset) { + return false; + } + + return true; + } + + bool cdiffers(void) const { + if (!b5insens) { + return c1 != c2; + } + return (c1 & CASE_CLEAR) != (c2 & CASE_CLEAR); + } + }; + + candidate best; + + for (u32 i = 0; i < MIN(MAX_ACCEL_OFFSET, first.s.length()) - 1; i++) { + candidate curr(first, i); + + /* check to see if this pair appears in each string */ + for (const auto &lit_ptr : lits) { + const AccelString &lit = *lit_ptr; + if (lit.nocase && (ourisalpha(curr.c1) || ourisalpha(curr.c2))) { + curr.b5insens = true; /* no choice but to be case insensitive */ + } + + bool found = false; + bool found_nc = false; + for (u32 j = 0; + !found && j < MIN(MAX_ACCEL_OFFSET, lit.s.length()) - 1; j++) { + found |= curr.c1 == lit.s[j] && curr.c2 == lit.s[j + 1]; + found_nc |= (curr.c1 & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR) + && (curr.c2 & CASE_CLEAR) == (lit.s[j + 1] & CASE_CLEAR); + + if (curr.b5insens) { + found = found_nc; + } + } + + if (!curr.b5insens && !found && found_nc) { + curr.b5insens = true; + found = true; + } + + if (!found) { + goto next_candidate; + } + } + + /* check to find the max offset where this appears */ + for (const auto &lit_ptr : lits) { + const AccelString &lit = *lit_ptr; + for (u32 j = 0; j < MIN(MAX_ACCEL_OFFSET, lit.s.length()) - 1; + j++) { + bool found = false; + if (curr.b5insens) { + found = (curr.c1 & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR) + && (curr.c2 & CASE_CLEAR) == (lit.s[j + 1] & CASE_CLEAR); + } else { + found = curr.c1 == lit.s[j] && curr.c2 == lit.s[j + 1]; + } + + if (found) { + assert(j + mask_overhang(lit) <= MAX_ACCEL_OFFSET); + ENSURE_AT_LEAST(&curr.max_offset, j + mask_overhang(lit)); + break; + } + } + } + + if (curr > best) { + best = curr; + } + + next_candidate:; + } + + if (!best.valid) { + return false; + } + + aux->dverm.offset = verify_u8(best.max_offset); + + if (!best.b5insens) { + aux->dverm.accel_type = ACCEL_DVERM; + aux->dverm.c1 = best.c1; + aux->dverm.c2 = best.c2; + DEBUG_PRINTF("built dverm for %02hhx%02hhx\n", + aux->dverm.c1, aux->dverm.c2); + } else { + aux->dverm.accel_type = ACCEL_DVERM_NOCASE; + aux->dverm.c1 = best.c1 & CASE_CLEAR; + aux->dverm.c2 = best.c2 & CASE_CLEAR; + DEBUG_PRINTF("built dverm nc for %02hhx%02hhx\n", + aux->dverm.c1, aux->dverm.c2); + } + return true; +} + +static +bool findSVerm(const vector &lits, AccelAux *aux) { + const AccelString &first = *lits.front(); + + struct candidate { + candidate(void) + : c(0), max_offset(0), b5insens(false), valid(false) {} + candidate(const AccelString &base, u32 offset) + : c(base.s[offset]), max_offset(0), + b5insens(false), valid(true) {} + char c; + u32 max_offset; + bool b5insens; + bool valid; + + bool operator>(const candidate &other) const { + if (!valid) { + return false; + } + + if (!other.valid) { + return true; + } + + if (!other.b5insens && b5insens) { + return false; + } + + if (other.b5insens && !b5insens) { + return true; + } + + if (max_offset > other.max_offset) { + return false; + } + + return true; + } + }; + + candidate best; + + for (u32 i = 0; i < MIN(MAX_ACCEL_OFFSET, first.s.length()); i++) { + candidate curr(first, i); + + /* check to see if this pair appears in each string */ + for (const auto &lit_ptr : lits) { + const AccelString &lit = *lit_ptr; + if (lit.nocase && ourisalpha(curr.c)) { + curr.b5insens = true; /* no choice but to be case insensitive */ + } + + bool found = false; + bool found_nc = false; + for (u32 j = 0; + !found && j < MIN(MAX_ACCEL_OFFSET, lit.s.length()); j++) { + found |= curr.c == lit.s[j]; + found_nc |= (curr.c & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR); + + if (curr.b5insens) { + found = found_nc; + } + } + + if (!curr.b5insens && !found && found_nc) { + curr.b5insens = true; + found = true; + } + + if (!found) { + goto next_candidate; + } + } + + /* check to find the max offset where this appears */ + for (const auto &lit_ptr : lits) { + const AccelString &lit = *lit_ptr; + for (u32 j = 0; j < MIN(MAX_ACCEL_OFFSET, lit.s.length()); j++) { + bool found = false; + if (curr.b5insens) { + found = (curr.c & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR); + } else { + found = curr.c == lit.s[j]; + } + + if (found) { + assert(j + mask_overhang(lit) <= MAX_ACCEL_OFFSET); + ENSURE_AT_LEAST(&curr.max_offset, j + mask_overhang(lit)); + } + } + } + + if (curr > best) { + best = curr; + } + + next_candidate:; + } + + if (!best.valid) { + return false; + } + + if (!best.b5insens) { + aux->verm.accel_type = ACCEL_VERM; + aux->verm.c = best.c; + DEBUG_PRINTF("built verm for %02hhx\n", aux->verm.c); + } else { + aux->verm.accel_type = ACCEL_VERM_NOCASE; + aux->verm.c = best.c & CASE_CLEAR; + DEBUG_PRINTF("built verm nc for %02hhx\n", aux->verm.c); + } + aux->verm.offset = verify_u8(best.max_offset); + + return true; +} + +static +void filterLits(const vector &lits, hwlm_group_t expected_groups, + vector *filtered_lits, u32 *min_len) { + *min_len = MAX_ACCEL_OFFSET; + + for (const auto &lit : lits) { + if (!(lit.groups & expected_groups)) { + continue; + } + + const size_t lit_len = lit.s.length(); + if (lit_len < *min_len) { + *min_len = verify_u32(lit_len); + } + + filtered_lits->push_back(&lit); + +#ifdef DEBUG + DEBUG_PRINTF("lit:"); + for (u32 i = 0; i < lit.s.length(); i++) { + printf("%02hhx", lit.s[i]); + } + printf("\n"); +#endif + } +} + +static +bool litGuardedByCharReach(const CharReach &cr, const AccelString &lit, + u32 max_offset) { + for (u32 i = 0; i <= max_offset && i < lit.s.length(); i++) { + unsigned char c = lit.s[i]; + if (lit.nocase) { + if (cr.test(mytoupper(c)) && cr.test(mytolower(c))) { + return true; + } + } else { + if (cr.test(c)) { + return true; + } + } + } + + return false; +} + +static +void findForwardAccelScheme(const vector &lits, + hwlm_group_t expected_groups, AccelAux *aux) { + DEBUG_PRINTF("building accel expected=%016llx\n", expected_groups); + u32 min_len = MAX_ACCEL_OFFSET; + vector filtered_lits; + + filterLits(lits, expected_groups, &filtered_lits, &min_len); + if (filtered_lits.empty()) { + return; + } + + if (findDVerm(filtered_lits, aux) + || findSVerm(filtered_lits, aux)) { + return; + } + + /* look for shufti/truffle */ + + vector reach(MAX_ACCEL_OFFSET, CharReach()); + for (const auto &lit : lits) { + if (!(lit.groups & expected_groups)) { + continue; + } + + u32 overhang = mask_overhang(lit); + for (u32 i = 0; i < overhang; i++) { + /* this offset overhangs the start of the real literal; look at the + * msk/cmp */ + for (u32 j = 0; j < N_CHARS; j++) { + if ((j & lit.msk[i]) == lit.cmp[i]) { + reach[i].set(j); + } + } + } + for (u32 i = overhang; i < MAX_ACCEL_OFFSET; i++) { + CharReach &reach_i = reach[i]; + u32 i_effective = i - overhang; + + if (litGuardedByCharReach(reach_i, lit, i_effective)) { + continue; + } + unsigned char c = i_effective < lit.s.length() ? lit.s[i_effective] + : lit.s.back(); + if (lit.nocase) { + reach_i.set(mytoupper(c)); + reach_i.set(mytolower(c)); + } else { + reach_i.set(c); + } + } + } + + u32 min_count = ~0U; + u32 min_offset = ~0U; + for (u32 i = 0; i < MAX_ACCEL_OFFSET; i++) { + size_t count = reach[i].count(); + DEBUG_PRINTF("offset %u is %s (reach %zu)\n", i, + describeClass(reach[i]).c_str(), count); + if (count < min_count) { + min_count = (u32)count; + min_offset = i; + } + } + + if (min_count > MAX_SHUFTI_WIDTH) { + DEBUG_PRINTF("FAIL: min shufti with %u chars is too wide\n", min_count); + return; + } + + const CharReach &cr = reach[min_offset]; + if (-1 != + shuftiBuildMasks(cr, (u8 *)&aux->shufti.lo, (u8 *)&aux->shufti.hi)) { + DEBUG_PRINTF("built shufti for %s (%zu chars, offset %u)\n", + describeClass(cr).c_str(), cr.count(), min_offset); + aux->shufti.accel_type = ACCEL_SHUFTI; + aux->shufti.offset = verify_u8(min_offset); + return; + } + + truffleBuildMasks(cr, (u8 *)&aux->truffle.mask1, (u8 *)&aux->truffle.mask2); + DEBUG_PRINTF("built truffle for %s (%zu chars, offset %u)\n", + describeClass(cr).c_str(), cr.count(), min_offset); + aux->truffle.accel_type = ACCEL_TRUFFLE; + aux->truffle.offset = verify_u8(min_offset); +} + +void buildForwardAccel(HWLM *h, const vector &lits, + hwlm_group_t expected_groups) { + findForwardAccelScheme(lits, expected_groups, &h->accel1); + findForwardAccelScheme(lits, HWLM_ALL_GROUPS, &h->accel0); + + h->accel1_groups = expected_groups; +} + +} // namespace ue2 diff --git a/src/rose/rose_build_lit_accel.h b/src/rose/rose_build_lit_accel.h new file mode 100644 index 00000000..f0c01434 --- /dev/null +++ b/src/rose/rose_build_lit_accel.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ROSE_BUILD_LIT_ACCEL_H +#define ROSE_BUILD_LIT_ACCEL_H + +#include "hwlm/hwlm.h" + +#include +#include +#include +#include + +struct HWLM; + +namespace ue2 { + +struct AccelString { + AccelString(std::string s_in, bool nocase_in, std::vector msk_in, + std::vector cmp_in, hwlm_group_t groups_in) + : s(std::move(s_in)), nocase(nocase_in), msk(std::move(msk_in)), + cmp(std::move(cmp_in)), groups(groups_in) {} + + std::string s; + bool nocase; + std::vector msk; + std::vector cmp; + hwlm_group_t groups; + + bool operator==(const AccelString &a) const { + return s == a.s && nocase == a.nocase && msk == a.msk && cmp == a.cmp && + groups == a.groups; + } + + bool operator<(const AccelString &a) const { + return std::tie(s, nocase, msk, cmp, groups) < + std::tie(a.s, a.nocase, a.msk, a.cmp, a.groups); + } +}; + +void buildForwardAccel(HWLM *h, const std::vector &lits, + hwlm_group_t expected_groups); + +} // namespace ue2 + +#endif // ROSE_BUILD_LIT_ACCEL_H diff --git a/src/rose/rose_build_matchers.cpp b/src/rose/rose_build_matchers.cpp index c51905ca..8d6f68df 100644 --- a/src/rose/rose_build_matchers.cpp +++ b/src/rose/rose_build_matchers.cpp @@ -34,6 +34,7 @@ #include "rose_build_matchers.h" #include "rose_build_impl.h" +#include "rose_build_lit_accel.h" #include "rose_build_width.h" #include "hwlm/hwlm_build.h" #include "hwlm/hwlm_literal.h" @@ -801,6 +802,8 @@ buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold, throw CompileError("Unable to generate bytecode."); } + buildForwardAccel(hwlm.get(), mp.lits, build.getInitialGroups()); + if (build.cc.streaming) { DEBUG_PRINTF("history_required=%zu\n", mp.history_required); assert(mp.history_required <= build.cc.grey.maxHistoryAvailable); @@ -866,6 +869,8 @@ buildSmallBlockMatcher(const RoseBuildImpl &build, throw CompileError("Unable to generate bytecode."); } + buildForwardAccel(hwlm.get(), mp.lits, build.getInitialGroups()); + *sbsize = hwlmSize(hwlm.get()); assert(*sbsize); DEBUG_PRINTF("built small block literal table size %zu bytes\n", *sbsize); @@ -893,6 +898,8 @@ buildEodAnchoredMatcher(const RoseBuildImpl &build, throw CompileError("Unable to generate bytecode."); } + buildForwardAccel(hwlm.get(), mp.lits, build.getInitialGroups()); + *esize = hwlmSize(hwlm.get()); assert(*esize); DEBUG_PRINTF("built eod-anchored literal table size %zu bytes\n", *esize);