diff --git a/src/hwlm/hwlm_build.cpp b/src/hwlm/hwlm_build.cpp index 7ba82fcc..b1814245 100644 --- a/src/hwlm/hwlm_build.cpp +++ b/src/hwlm/hwlm_build.cpp @@ -64,6 +64,28 @@ namespace ue2 { static const unsigned int MAX_ACCEL_OFFSET = 16; static const unsigned int MAX_SHUFTI_WIDTH = 240; +static +size_t mask_overhang(const hwlmLiteral &lit) { + size_t msk_true_size = lit.msk.size(); + assert(msk_true_size <= HWLM_MASKLEN); + assert(HWLM_MASKLEN <= MAX_ACCEL_OFFSET); + for (u8 c : lit.msk) { + if (!c) { + msk_true_size--; + } else { + break; + } + } + + if (lit.s.length() >= msk_true_size) { + return 0; + } + + /* only short literals should be able to have a mask which overhangs */ + assert(lit.s.length() < MAX_ACCEL_OFFSET); + return msk_true_size - lit.s.length(); +} + static bool findDVerm(const vector &lits, AccelAux *aux) { const hwlmLiteral &first = *lits.front(); @@ -169,7 +191,8 @@ bool findDVerm(const vector &lits, AccelAux *aux) { } if (found) { - curr.max_offset = MAX(curr.max_offset, j); + assert(j + mask_overhang(lit) <= MAX_ACCEL_OFFSET); + ENSURE_AT_LEAST(&curr.max_offset, j + mask_overhang(lit)); break; } } @@ -290,8 +313,8 @@ bool findSVerm(const vector &lits, AccelAux *aux) { } if (found) { - curr.max_offset = MAX(curr.max_offset, j); - break; + assert(j + mask_overhang(lit) <= MAX_ACCEL_OFFSET); + ENSURE_AT_LEAST(&curr.max_offset, j + mask_overhang(lit)); } } } @@ -392,13 +415,25 @@ void findForwardAccelScheme(const vector &lits, continue; } - for (u32 i = 0; i < MAX_ACCEL_OFFSET; i++) { + u32 overhang = mask_overhang(lit); + for (u32 i = 0; i < overhang; i++) { + /* this offset overhangs the start of the real literal; look at the + * msk/cmp */ + for (u32 j = 0; j < N_CHARS; j++) { + if ((j & lit.msk[i]) == lit.cmp[i]) { + reach[i].set(j); + } + } + } + for (u32 i = overhang; i < MAX_ACCEL_OFFSET; i++) { CharReach &reach_i = reach[i]; + u32 i_effective = i - overhang; - if (litGuardedByCharReach(reach_i, lit, i)) { + if (litGuardedByCharReach(reach_i, lit, i_effective)) { continue; } - unsigned char c = i < lit.s.length() ? lit.s[i] : lit.s.back(); + unsigned char c = i_effective < lit.s.length() ? lit.s[i_effective] + : lit.s.back(); if (lit.nocase) { reach_i.set(mytoupper(c)); reach_i.set(mytolower(c)); diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index a8440916..c0d8d0a7 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -5031,6 +5031,9 @@ void fillMatcherDistances(const RoseBuildImpl &build, RoseEngine *engine) { u32 max_d = g[v].max_offset; u32 min_d = g[v].min_offset; + DEBUG_PRINTF("checking %u: elen %zu min/max %u/%u\n", lit_id, + key.elength_including_mask(), min_d, max_d); + if (build.literal_info[lit_id].undelayed_id != lit_id) { /* this is a delayed match; need to update delay properties */ /* TODO: can delayed literals ever be in another table ? */ @@ -5050,9 +5053,9 @@ void fillMatcherDistances(const RoseBuildImpl &build, RoseEngine *engine) { switch (key.table) { case ROSE_FLOATING: ENSURE_AT_LEAST(&engine->floatingDistance, max_d); - if (min_d >= key.elength()) { + if (min_d >= key.elength_including_mask()) { LIMIT_TO_AT_MOST(&engine->floatingMinDistance, - min_d - (u32)key.elength()); + min_d - (u32)key.elength_including_mask()); } else { /* overlapped literals from rose + anchored table can * cause us to underflow due to sloppiness in diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index 19f803b2..15047491 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -277,6 +277,17 @@ struct rose_literal_id { u32 distinctiveness; size_t elength(void) const { return s.length() + delay; } + size_t elength_including_mask(void) const { + size_t mask_len = msk.size(); + for (u8 c : msk) { + if (!c) { + mask_len--; + } else { + break; + } + } + return MAX(mask_len, s.length()) + delay; + } }; static inline