diff --git a/src/hwlm/hwlm.c b/src/hwlm/hwlm.c index 37e56ae0..4af987c5 100644 --- a/src/hwlm/hwlm.c +++ b/src/hwlm/hwlm.c @@ -184,20 +184,18 @@ hwlm_error_t hwlmExec(const struct HWLM *t, const u8 *buf, size_t len, if (t->type == HWLM_ENGINE_NOOD) { DEBUG_PRINTF("calling noodExec\n"); - return noodExec(HWLM_C_DATA(t), buf + start, len - start, start, cb, - ctxt); - } else { - assert(t->type == HWLM_ENGINE_FDR); - const union AccelAux *aa = &t->accel0; - if ((groups & ~t->accel1_groups) == 0) { - DEBUG_PRINTF("using hq accel %hhu\n", t->accel1.accel_type); - aa = &t->accel1; - } - do_accel_block(aa, buf, len, &start); - DEBUG_PRINTF("calling frankie (groups=%08llx, start=%zu)\n", groups, - start); - return fdrExec(HWLM_C_DATA(t), buf, len, start, cb, ctxt, groups); + return noodExec(HWLM_C_DATA(t), buf, len, start, cb, ctxt); } + + assert(t->type == HWLM_ENGINE_FDR); + const union AccelAux *aa = &t->accel0; + if ((groups & ~t->accel1_groups) == 0) { + DEBUG_PRINTF("using hq accel %hhu\n", t->accel1.accel_type); + aa = &t->accel1; + } + do_accel_block(aa, buf, len, &start); + DEBUG_PRINTF("calling frankie (groups=%08llx, start=%zu)\n", groups, start); + return fdrExec(HWLM_C_DATA(t), buf, len, start, cb, ctxt, groups); } hwlm_error_t hwlmExecStreaming(const struct HWLM *t, struct hs_scratch *scratch, @@ -224,23 +222,21 @@ hwlm_error_t hwlmExecStreaming(const struct HWLM *t, struct hs_scratch *scratch, // If we've been handed a start offset, we can use a block mode scan at // that offset. if (start) { - return noodExec(HWLM_C_DATA(t), buf + start, len - start, start, - cb, ctxt); + return noodExec(HWLM_C_DATA(t), buf, len, start, cb, ctxt); } else { return noodExecStreaming(HWLM_C_DATA(t), hbuf, hlen, buf, len, cb, ctxt); } - } else { - // t->type == HWLM_ENGINE_FDR - const union AccelAux *aa = &t->accel0; - if ((groups & ~t->accel1_groups) == 0) { - DEBUG_PRINTF("using hq accel %hhu\n", t->accel1.accel_type); - aa = &t->accel1; - } - do_accel_streaming(aa, hbuf, hlen, buf, len, &start); - DEBUG_PRINTF("calling frankie (groups=%08llx, start=%zu)\n", groups, - start); - return fdrExecStreaming(HWLM_C_DATA(t), hbuf, hlen, buf, len, - start, cb, ctxt, groups); } + + assert(t->type == HWLM_ENGINE_FDR); + const union AccelAux *aa = &t->accel0; + if ((groups & ~t->accel1_groups) == 0) { + DEBUG_PRINTF("using hq accel %hhu\n", t->accel1.accel_type); + aa = &t->accel1; + } + do_accel_streaming(aa, hbuf, hlen, buf, len, &start); + DEBUG_PRINTF("calling frankie (groups=%08llx, start=%zu)\n", groups, start); + return fdrExecStreaming(HWLM_C_DATA(t), hbuf, hlen, buf, len, start, cb, + ctxt, groups); } diff --git a/src/hwlm/hwlm_build.cpp b/src/hwlm/hwlm_build.cpp index 2f61ea6d..c2db5480 100644 --- a/src/hwlm/hwlm_build.cpp +++ b/src/hwlm/hwlm_build.cpp @@ -89,11 +89,6 @@ bool isNoodleable(const vector &lits, return false; } - if (!lits.front().msk.empty()) { - DEBUG_PRINTF("noodle can't handle supplementary masks\n"); - return false; - } - return true; } diff --git a/src/hwlm/noodle_build.cpp b/src/hwlm/noodle_build.cpp index 63fdf072..4a6ac8d7 100644 --- a/src/hwlm/noodle_build.cpp +++ b/src/hwlm/noodle_build.cpp @@ -35,14 +35,33 @@ #include "hwlm_literal.h" #include "noodle_internal.h" +#include "util/bitutils.h" #include "util/compare.h" #include "util/verify_types.h" #include "ue2common.h" #include // for memcpy +#include + +using std::vector; namespace ue2 { +static +u64a make_u64a_mask(const vector &v) { + assert(v.size() <= sizeof(u64a)); + if (v.size() > sizeof(u64a)) { + throw std::exception(); + } + + u64a mask = 0; + size_t len = v.size(); + unsigned char *m = (unsigned char *)&mask; + DEBUG_PRINTF("making mask len %zu\n", len); + memcpy(m, &v[0], len); + return mask; +} + static size_t findNoodFragOffset(const hwlmLiteral &lit) { const auto &s = lit.s; @@ -67,30 +86,60 @@ size_t findNoodFragOffset(const hwlmLiteral &lit) { } bytecode_ptr noodBuildTable(const hwlmLiteral &lit) { - if (!lit.msk.empty()) { - DEBUG_PRINTF("noodle can't handle supplementary masks\n"); - return nullptr; + const auto &s = lit.s; + + size_t mask_len = std::max(s.length(), lit.msk.size()); + DEBUG_PRINTF("mask is %zu bytes\n", lit.msk.size()); + assert(mask_len <= 8); + assert(lit.msk.size() == lit.cmp.size()); + + vector n_msk(mask_len); + vector n_cmp(mask_len); + + for (unsigned i = mask_len - lit.msk.size(), j = 0; i < mask_len; + i++, j++) { + DEBUG_PRINTF("m[%u] %hhx c[%u] %hhx\n", i, lit.msk[j], i, lit.cmp[j]); + n_msk[i] = lit.msk[j]; + n_cmp[i] = lit.cmp[j]; } - const auto &s = lit.s; - size_t noodle_len = sizeof(noodTable) + s.length(); - auto n = make_zeroed_bytecode_ptr(noodle_len); + size_t s_off = mask_len - s.length(); + for (unsigned i = s_off; i < mask_len; i++) { + u8 c = s[i - s_off]; + u8 si_msk = lit.nocase && ourisalpha(c) ? (u8)CASE_CLEAR : (u8)0xff; + n_msk[i] |= si_msk; + n_cmp[i] |= c & si_msk; + assert((n_cmp[i] & si_msk) == c); + DEBUG_PRINTF("m[%u] %hhx c[%u] %hhx '%c'\n", i, n_msk[i], i, n_cmp[i], + ourisprint(c) ? (char)c : '.'); + } + + auto n = make_zeroed_bytecode_ptr(sizeof(noodTable)); assert(n); + DEBUG_PRINTF("size of nood %zu\n", sizeof(noodTable)); size_t key_offset = findNoodFragOffset(lit); n->id = lit.id; - n->len = verify_u32(s.length()); - n->key_offset = verify_u32(key_offset); + n->lit_len = s.length(); + n->single = s.length() == 1 ? 1 : 0; + n->key_offset = verify_u8(n->lit_len - key_offset); n->nocase = lit.nocase ? 1 : 0; - memcpy(n->str, s.c_str(), s.length()); + n->key0 = s[key_offset]; + if (n->single) { + n->key1 = 0; + } else { + n->key1 = s[key_offset + 1]; + } + n->msk = make_u64a_mask(n_msk); + n->cmp = make_u64a_mask(n_cmp); + n->msk_len = mask_len; return n; } -size_t noodSize(const noodTable *n) { - assert(n); // shouldn't call with null - return sizeof(*n) + n->len; +size_t noodSize(const noodTable *) { + return sizeof(noodTable); } } // namespace ue2 @@ -102,13 +151,17 @@ namespace ue2 { void noodPrintStats(const noodTable *n, FILE *f) { fprintf(f, "Noodle table\n"); - fprintf(f, "Len: %u Key Offset: %u\n", n->len, n->key_offset); + fprintf(f, "Len: %u Key Offset: %u\n", n->lit_len, n->key_offset); + fprintf(f, "Msk: %llx Cmp: %llx MskLen %u\n", + n->msk >> 8 * (8 - n->msk_len), n->cmp >> 8 * (8 - n->msk_len), + n->msk_len); fprintf(f, "String: "); - for (u32 i = 0; i < n->len; i++) { - if (isgraph(n->str[i]) && n->str[i] != '\\') { - fprintf(f, "%c", n->str[i]); + for (u32 i = n->msk_len - n->lit_len; i < n->msk_len; i++) { + const u8 *m = (const u8 *)&n->cmp; + if (isgraph(m[i]) && m[i] != '\\') { + fprintf(f, "%c", m[i]); } else { - fprintf(f, "\\x%02hhx", n->str[i]); + fprintf(f, "\\x%02hhx", m[i]); } } fprintf(f, "\n"); diff --git a/src/hwlm/noodle_engine.c b/src/hwlm/noodle_engine.c index cdc07dfc..ba8d6913 100644 --- a/src/hwlm/noodle_engine.c +++ b/src/hwlm/noodle_engine.c @@ -84,9 +84,8 @@ struct cb_info { while (unlikely(z)) { \ Z_TYPE pos = JOIN(findAndClearLSB_, Z_BITS)(&z); \ size_t matchPos = d - buf + pos; \ - DEBUG_PRINTF("match pos %zu\n", matchPos); \ - hwlmcb_rv_t rv = final(buf, len, key, 1, 0, 0, noCase, cbi, \ - matchPos); \ + DEBUG_PRINTF("match pos %zu\n", matchPos); \ + hwlmcb_rv_t rv = final(n, buf, len, 1, cbi, matchPos); \ RETURN_IF_TERMINATED(rv); \ } \ } while (0) @@ -96,9 +95,8 @@ struct cb_info { while (unlikely(z)) { \ Z_TYPE pos = JOIN(findAndClearLSB_, Z_BITS)(&z); \ size_t matchPos = d - buf + pos - 1; \ - DEBUG_PRINTF("match pos %zu\n", matchPos); \ - hwlmcb_rv_t rv = final(buf, len, key, keyLen, keyOffset, 1, \ - noCase, cbi, matchPos); \ + DEBUG_PRINTF("match pos %zu\n", matchPos); \ + hwlmcb_rv_t rv = final(n, buf, len, 0, cbi, matchPos); \ RETURN_IF_TERMINATED(rv); \ } \ } while (0) @@ -112,21 +110,28 @@ u8 caseClear8(u8 x, bool noCase) { // is used only for single chars with case insensitivity used correctly, // so it can go straight to the callback if we get this far. static really_inline -hwlm_error_t final(const u8 *buf, size_t len, const u8 *key, size_t keyLen, - size_t keyOffset, bool is_double, bool noCase, - const struct cb_info *cbi, size_t pos) { - pos -= keyOffset; - if (is_double) { - if (pos + keyLen > len) { - return HWLM_SUCCESS; - } - if (cmpForward(buf + pos, key, keyLen, noCase)) { // ret 1 on mismatch - return HWLM_SUCCESS; +hwlm_error_t final(const struct noodTable *n, const u8 *buf, UNUSED size_t len, + char single, const struct cb_info *cbi, size_t pos) { + if (single) { + if (n->msk_len == 1) { + goto match; } } - pos += cbi->offsetAdj; - DEBUG_PRINTF("match @ %zu->%zu\n", pos, (pos + keyLen - 1)); - hwlmcb_rv_t rv = cbi->cb(pos, (pos + keyLen - 1), cbi->id, cbi->ctx); + assert(len >= n->msk_len); + u64a v = + partial_load_u64a(buf + pos + n->key_offset - n->msk_len, n->msk_len); + DEBUG_PRINTF("v %016llx msk %016llx cmp %016llx\n", v, n->msk, n->cmp); + if ((v & n->msk) != n->cmp) { + /* mask didn't match */ + return HWLM_SUCCESS; + } + +match: + pos -= cbi->offsetAdj; + DEBUG_PRINTF("match @ %zu->%zu\n", pos + n->key_offset - n->lit_len, + pos + n->key_offset); + hwlmcb_rv_t rv = cbi->cb(pos + n->key_offset - n->lit_len, + pos + n->key_offset - 1, cbi->id, cbi->ctx); if (rv == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATED; } @@ -148,38 +153,43 @@ hwlm_error_t final(const u8 *buf, size_t len, const u8 *key, size_t keyLen, #endif static really_inline -hwlm_error_t scanSingleMain(const u8 *buf, size_t len, const u8 *key, - bool noCase, const struct cb_info *cbi) { +hwlm_error_t scanSingleMain(const struct noodTable *n, const u8 *buf, + size_t len, size_t start, bool noCase, + const struct cb_info *cbi) { - const MASK_TYPE mask1 = getMask(key[0], noCase); + const MASK_TYPE mask1 = getMask(n->key0, noCase); const MASK_TYPE caseMask = getCaseMask(); + size_t offset = start + n->msk_len - 1; + size_t end = len; + assert(offset < end); + #if !defined(HAVE_AVX512) hwlm_error_t rv; - size_t end = len; - if (len < CHUNKSIZE) { - rv = scanSingleShort(buf, len, key, noCase, caseMask, mask1, cbi, 0, len); + if (end - offset < CHUNKSIZE) { + rv = scanSingleShort(n, buf, len, noCase, caseMask, mask1, cbi, offset, + end); return rv; } - if (len == CHUNKSIZE) { - rv = scanSingleUnaligned(buf, len, 0, key, noCase, caseMask, mask1, cbi, - 0, len); + if (end - offset == CHUNKSIZE) { + rv = scanSingleUnaligned(n, buf, len, 0, noCase, caseMask, mask1, cbi, + offset, end); return rv; } uintptr_t data = (uintptr_t)buf; - uintptr_t s2Start = ROUNDUP_N(data, CHUNKSIZE) - data; + uintptr_t s2Start = ROUNDUP_N(data + offset, CHUNKSIZE) - data; uintptr_t last = data + end; uintptr_t s2End = ROUNDDOWN_N(last, CHUNKSIZE) - data; - uintptr_t s3Start = len - CHUNKSIZE; + uintptr_t s3Start = end - CHUNKSIZE; - if (s2Start) { + if (offset != s2Start) { // first scan out to the fast scan starting point DEBUG_PRINTF("stage 1: -> %zu\n", s2Start); - rv = scanSingleUnaligned(buf, len, 0, key, noCase, caseMask, mask1, cbi, - 0, s2Start); + rv = scanSingleUnaligned(n, buf, len, 0, noCase, caseMask, mask1, cbi, + offset, s2Start); RETURN_IF_TERMINATED(rv); } @@ -187,68 +197,70 @@ hwlm_error_t scanSingleMain(const u8 *buf, size_t len, const u8 *key, // scan as far as we can, bounded by the last point this key can // possibly match DEBUG_PRINTF("fast: ~ %zu -> %zu\n", s2Start, s2End); - rv = scanSingleFast(buf, len, key, noCase, caseMask, mask1, cbi, - s2Start, s2End); + rv = scanSingleFast(n, buf, len, noCase, caseMask, mask1, cbi, s2Start, + s2End); RETURN_IF_TERMINATED(rv); } // if we are done bail out - if (s2End == end) { + if (s2End == len) { return HWLM_SUCCESS; } - DEBUG_PRINTF("stage 3: %zu -> %zu\n", s2End, end); - rv = scanSingleUnaligned(buf, len, s3Start, key, noCase, caseMask, mask1, - cbi, s2End, end); + DEBUG_PRINTF("stage 3: %zu -> %zu\n", s2End, len); + rv = scanSingleUnaligned(n, buf, len, s3Start, noCase, caseMask, mask1, cbi, + s2End, len); return rv; #else // HAVE_AVX512 - return scanSingle512(buf, len, key, noCase, caseMask, mask1, cbi); + return scanSingle512(n, buf, len, noCase, caseMask, mask1, cbi, offset, + end); #endif } static really_inline -hwlm_error_t scanDoubleMain(const u8 *buf, size_t len, const u8 *key, - size_t keyLen, size_t keyOffset, bool noCase, +hwlm_error_t scanDoubleMain(const struct noodTable *n, const u8 *buf, + size_t len, size_t start, bool noCase, const struct cb_info *cbi) { // we stop scanning for the key-fragment when the rest of the key can't // possibly fit in the remaining buffer - size_t end = len - keyLen + keyOffset + 2; + size_t end = len - n->key_offset + 2; + + // the first place the key can match + size_t offset = start + n->msk_len - n->key_offset; const MASK_TYPE caseMask = getCaseMask(); - const MASK_TYPE mask1 = getMask(key[keyOffset + 0], noCase); - const MASK_TYPE mask2 = getMask(key[keyOffset + 1], noCase); + const MASK_TYPE mask1 = getMask(n->key0, noCase); + const MASK_TYPE mask2 = getMask(n->key1, noCase); #if !defined(HAVE_AVX512) hwlm_error_t rv; - if (end - keyOffset < CHUNKSIZE) { - rv = scanDoubleShort(buf, len, key, keyLen, keyOffset, noCase, caseMask, - mask1, mask2, cbi, keyOffset, end); + if (end - offset < CHUNKSIZE) { + rv = scanDoubleShort(n, buf, len, noCase, caseMask, mask1, mask2, cbi, + offset, end); return rv; } - if (end - keyOffset == CHUNKSIZE) { - rv = scanDoubleUnaligned(buf, len, keyOffset, key, keyLen, keyOffset, - noCase, caseMask, mask1, mask2, cbi, keyOffset, - end); + if (end - offset == CHUNKSIZE) { + rv = scanDoubleUnaligned(n, buf, len, offset, noCase, caseMask, mask1, + mask2, cbi, offset, end); return rv; } uintptr_t data = (uintptr_t)buf; - uintptr_t s2Start = ROUNDUP_N(data + keyOffset, CHUNKSIZE) - data; + uintptr_t s2Start = ROUNDUP_N(data + offset, CHUNKSIZE) - data; uintptr_t s1End = s2Start + 1; uintptr_t last = data + end; uintptr_t s2End = ROUNDDOWN_N(last, CHUNKSIZE) - data; uintptr_t s3Start = end - CHUNKSIZE; - uintptr_t off = keyOffset; + uintptr_t off = offset; - if (s2Start != keyOffset) { + if (s2Start != off) { // first scan out to the fast scan starting point plus one char past to // catch the key on the overlap - DEBUG_PRINTF("stage 1: -> %zu\n", s2Start); - rv = scanDoubleUnaligned(buf, len, keyOffset, key, keyLen, keyOffset, - noCase, caseMask, mask1, mask2, cbi, off, - s1End); + DEBUG_PRINTF("stage 1: %zu -> %zu\n", off, s2Start); + rv = scanDoubleUnaligned(n, buf, len, offset, noCase, caseMask, mask1, + mask2, cbi, off, s1End); RETURN_IF_TERMINATED(rv); } off = s1End; @@ -262,8 +274,8 @@ hwlm_error_t scanDoubleMain(const u8 *buf, size_t len, const u8 *key, // scan as far as we can, bounded by the last point this key can // possibly match DEBUG_PRINTF("fast: ~ %zu -> %zu\n", s2Start, s3Start); - rv = scanDoubleFast(buf, len, key, keyLen, keyOffset, noCase, caseMask, - mask1, mask2, cbi, s2Start, s2End); + rv = scanDoubleFast(n, buf, len, noCase, caseMask, mask1, mask2, cbi, + s2Start, s2End); RETURN_IF_TERMINATED(rv); off = s2End; } @@ -274,98 +286,101 @@ hwlm_error_t scanDoubleMain(const u8 *buf, size_t len, const u8 *key, } DEBUG_PRINTF("stage 3: %zu -> %zu\n", s3Start, end); - rv = scanDoubleUnaligned(buf, len, s3Start, key, keyLen, keyOffset, noCase, - caseMask, mask1, mask2, cbi, off, end); + rv = scanDoubleUnaligned(n, buf, len, s3Start, noCase, caseMask, mask1, + mask2, cbi, off, end); return rv; #else // AVX512 - return scanDouble512(buf, len, key, keyLen, keyOffset, noCase, caseMask, - mask1, mask2, cbi, keyOffset, end); + return scanDouble512(n, buf, len, noCase, caseMask, mask1, mask2, cbi, + offset, end); #endif // AVX512 } static really_inline -hwlm_error_t scanSingleNoCase(const u8 *buf, size_t len, const u8 *key, +hwlm_error_t scanSingleNoCase(const struct noodTable *n, const u8 *buf, + size_t len, size_t start, const struct cb_info *cbi) { - return scanSingleMain(buf, len, key, 1, cbi); + return scanSingleMain(n, buf, len, start, 1, cbi); } static really_inline -hwlm_error_t scanSingleCase(const u8 *buf, size_t len, const u8 *key, +hwlm_error_t scanSingleCase(const struct noodTable *n, const u8 *buf, + size_t len, size_t start, const struct cb_info *cbi) { - return scanSingleMain(buf, len, key, 0, cbi); + return scanSingleMain(n, buf, len, start, 0, cbi); } // Single-character specialisation, used when keyLen = 1 static really_inline -hwlm_error_t scanSingle(const u8 *buf, size_t len, const u8 *key, bool noCase, - const struct cb_info *cbi) { - if (!ourisalpha(key[0])) { +hwlm_error_t scanSingle(const struct noodTable *n, const u8 *buf, size_t len, + size_t start, bool noCase, const struct cb_info *cbi) { + if (!ourisalpha(n->key0)) { noCase = 0; // force noCase off if we don't have an alphabetic char } // kinda ugly, but this forces constant propagation if (noCase) { - return scanSingleNoCase(buf, len, key, cbi); + return scanSingleNoCase(n, buf, len, start, cbi); } else { - return scanSingleCase(buf, len, key, cbi); + return scanSingleCase(n, buf, len, start, cbi); } } static really_inline -hwlm_error_t scanDoubleNoCase(const u8 *buf, size_t len, const u8 *key, - size_t keyLen, size_t keyOffset, +hwlm_error_t scanDoubleNoCase(const struct noodTable *n, const u8 *buf, + size_t len, size_t start, const struct cb_info *cbi) { - return scanDoubleMain(buf, len, key, keyLen, keyOffset, 1, cbi); + return scanDoubleMain(n, buf, len, start, 1, cbi); } static really_inline -hwlm_error_t scanDoubleCase(const u8 *buf, size_t len, const u8 *key, - size_t keyLen, size_t keyOffset, +hwlm_error_t scanDoubleCase(const struct noodTable *n, const u8 *buf, + size_t len, size_t start, const struct cb_info *cbi) { - return scanDoubleMain(buf, len, key, keyLen, keyOffset, 0, cbi); + return scanDoubleMain(n, buf, len, start, 0, cbi); } static really_inline -hwlm_error_t scanDouble(const u8 *buf, size_t len, const u8 *key, size_t keyLen, - size_t keyOffset, bool noCase, - const struct cb_info *cbi) { +hwlm_error_t scanDouble(const struct noodTable *n, const u8 *buf, size_t len, + size_t start, bool noCase, const struct cb_info *cbi) { // kinda ugly, but this forces constant propagation if (noCase) { - return scanDoubleNoCase(buf, len, key, keyLen, keyOffset, cbi); + return scanDoubleNoCase(n, buf, len, start, cbi); } else { - return scanDoubleCase(buf, len, key, keyLen, keyOffset, cbi); + return scanDoubleCase(n, buf, len, start, cbi); } } // main entry point for the scan code static really_inline -hwlm_error_t scan(const u8 *buf, size_t len, const u8 *key, size_t keyLen, - size_t keyOffset, bool noCase, const struct cb_info *cbi) { - if (len < keyLen) { +hwlm_error_t scan(const struct noodTable *n, const u8 *buf, size_t len, + size_t start, char single, bool noCase, + const struct cb_info *cbi) { + if (len - start < n->msk_len) { // can't find string of length keyLen in a shorter buffer return HWLM_SUCCESS; } - if (keyLen == 1) { - assert(keyOffset == 0); - return scanSingle(buf, len, key, noCase, cbi); + if (single) { + return scanSingle(n, buf, len, start, noCase, cbi); } else { - return scanDouble(buf, len, key, keyLen, keyOffset, noCase, cbi); + return scanDouble(n, buf, len, start, noCase, cbi); } } /** \brief Block-mode scanner. */ hwlm_error_t noodExec(const struct noodTable *n, const u8 *buf, size_t len, - size_t offset_adj, HWLMCallback cb, void *ctxt) { + size_t start, HWLMCallback cb, void *ctxt) { assert(n && buf); - struct cb_info cbi = { cb, n->id, ctxt, offset_adj }; - DEBUG_PRINTF("nood scan of %zu bytes for %*s\n", len, n->len, n->str); - return scan(buf, len, n->str, n->len, n->key_offset, n->nocase, &cbi); + struct cb_info cbi = {cb, n->id, ctxt, 0}; + DEBUG_PRINTF("nood scan of %zu bytes for %*s @ %p\n", len, n->lit_len, + (const char *)&n->cmp + n->msk_len - n->lit_len, buf); + + return scan(n, buf, len, start, n->single, n->nocase, &cbi); } /** \brief Streaming-mode scanner. */ @@ -375,34 +390,49 @@ hwlm_error_t noodExecStreaming(const struct noodTable *n, const u8 *hbuf, assert(n); struct cb_info cbi = {cb, n->id, ctxt, 0}; - hwlm_error_t rv; + DEBUG_PRINTF("nood scan of %zu bytes (%zu hlen) for %*s @ %p\n", len, hlen, + n->lit_len, (const char *)&n->cmp + n->msk_len - n->lit_len, + buf); if (hlen) { + /* + * we have history, so build up a buffer from enough of the history + * buffer plus what we've been given to scan. Since this is relatively + * short, just check against msk+cmp per byte offset for matches. + */ assert(hbuf); - u8 ALIGN_DIRECTIVE temp_buf[HWLM_LITERAL_MAX_LEN * 2]; memset(temp_buf, 0, sizeof(temp_buf)); - size_t tl1 = MIN(n->len - 1, hlen); - size_t tl2 = MIN(n->len - 1, len); - size_t temp_len = tl1 + tl2; - assert(temp_len < sizeof(temp_buf)); + assert(n->msk_len); + size_t tl1 = MIN((size_t)n->msk_len - 1, hlen); + size_t tl2 = MIN((size_t)n->msk_len - 1, len); + + assert(tl1 + tl2 <= sizeof(temp_buf)); assert(tl1 <= sizeof(u64a)); assert(tl2 <= sizeof(u64a)); + DEBUG_PRINTF("using %zu bytes of hist and %zu bytes of buf\n", tl1, tl2); + unaligned_store_u64a(temp_buf, partial_load_u64a(hbuf + hlen - tl1, tl1)); unaligned_store_u64a(temp_buf + tl1, partial_load_u64a(buf, tl2)); - cbi.offsetAdj = -tl1; - rv = scan(temp_buf, temp_len, n->str, n->len, n->key_offset, n->nocase, - &cbi); - if (rv == HWLM_TERMINATED) { - return HWLM_TERMINATED; + for (size_t i = 0; i < tl1; i++) { + u64a v = unaligned_load_u64a(temp_buf + i); + if ((v & n->msk) == n->cmp) { + size_t m_end = -tl1 + i + n->msk_len - 1; + size_t m_start = m_end - n->lit_len; + DEBUG_PRINTF("match @ %zu->%zu (i %zu)\n", m_start, m_end, i); + hwlmcb_rv_t rv = cb(m_start, m_end, n->id, ctxt); + if (rv == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATED; + } + } } } assert(buf); cbi.offsetAdj = 0; - return scan(buf, len, n->str, n->len, n->key_offset, n->nocase, &cbi); + return scan(n, buf, len, 0, n->single, n->nocase, &cbi); } diff --git a/src/hwlm/noodle_engine_avx2.c b/src/hwlm/noodle_engine_avx2.c index a3f46047..f10e4a7b 100644 --- a/src/hwlm/noodle_engine_avx2.c +++ b/src/hwlm/noodle_engine_avx2.c @@ -38,10 +38,11 @@ static really_inline m256 getCaseMask(void) { } static really_inline -hwlm_error_t scanSingleUnaligned(const u8 *buf, size_t len, size_t offset, - const u8 *key, bool noCase, m256 caseMask, - m256 mask1, const struct cb_info *cbi, - size_t start, size_t end) { +hwlm_error_t scanSingleUnaligned(const struct noodTable *n, const u8 *buf, + size_t len, size_t offset, bool noCase, + m256 caseMask, m256 mask1, + const struct cb_info *cbi, size_t start, + size_t end) { const u8 *d = buf + offset; DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset); const size_t l = end - start; @@ -66,11 +67,11 @@ hwlm_error_t scanSingleUnaligned(const u8 *buf, size_t len, size_t offset, } static really_inline -hwlm_error_t scanDoubleUnaligned(const u8 *buf, size_t len, size_t offset, - const u8 *key, size_t keyLen, size_t keyOffset, - bool noCase, m256 caseMask, m256 mask1, - m256 mask2, const struct cb_info *cbi, - size_t start, size_t end) { +hwlm_error_t scanDoubleUnaligned(const struct noodTable *n, const u8 *buf, + size_t len, size_t offset, bool noCase, + m256 caseMask, m256 mask1, m256 mask2, + const struct cb_info *cbi, size_t start, + size_t end) { const u8 *d = buf + offset; DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset); size_t l = end - start; @@ -100,8 +101,8 @@ hwlm_error_t scanDoubleUnaligned(const u8 *buf, size_t len, size_t offset, // alignment boundary if needed and to finish off data that the aligned scan // function can't handle (due to small/unaligned chunk at end) static really_inline -hwlm_error_t scanSingleShort(const u8 *buf, size_t len, const u8 *key, - bool noCase, m256 caseMask, m256 mask1, +hwlm_error_t scanSingleShort(const struct noodTable *n, const u8 *buf, + size_t len, bool noCase, m256 caseMask, m256 mask1, const struct cb_info *cbi, size_t start, size_t end) { const u8 *d = buf + start; @@ -140,11 +141,10 @@ hwlm_error_t scanSingleShort(const u8 *buf, size_t len, const u8 *key, } static really_inline -hwlm_error_t scanDoubleShort(const u8 *buf, size_t len, const u8 *key, - size_t keyLen, size_t keyOffset, bool noCase, - m256 caseMask, m256 mask1, m256 mask2, - const struct cb_info *cbi, size_t start, - size_t end) { +hwlm_error_t scanDoubleShort(const struct noodTable *n, const u8 *buf, + size_t len, bool noCase, m256 caseMask, m256 mask1, + m256 mask2, const struct cb_info *cbi, + size_t start, size_t end) { const u8 *d = buf + start; size_t l = end - start; if (!l) { @@ -182,8 +182,8 @@ hwlm_error_t scanDoubleShort(const u8 *buf, size_t len, const u8 *key, } static really_inline -hwlm_error_t scanSingleFast(const u8 *buf, size_t len, const u8 *key, - bool noCase, m256 caseMask, m256 mask1, +hwlm_error_t scanSingleFast(const struct noodTable *n, const u8 *buf, + size_t len, bool noCase, m256 caseMask, m256 mask1, const struct cb_info *cbi, size_t start, size_t end) { const u8 *d = buf + start, *e = buf + end; @@ -203,10 +203,9 @@ hwlm_error_t scanSingleFast(const u8 *buf, size_t len, const u8 *key, } static really_inline -hwlm_error_t scanDoubleFast(const u8 *buf, size_t len, const u8 *key, - size_t keyLen, size_t keyOffset, bool noCase, - m256 caseMask, m256 mask1, m256 mask2, - const struct cb_info *cbi, size_t start, +hwlm_error_t scanDoubleFast(const struct noodTable *n, const u8 *buf, + size_t len, bool noCase, m256 caseMask, m256 mask1, + m256 mask2, const struct cb_info *cbi, size_t start, size_t end) { const u8 *d = buf + start, *e = buf + end; DEBUG_PRINTF("start %zu end %zu \n", start, end); diff --git a/src/hwlm/noodle_engine_avx512.c b/src/hwlm/noodle_engine_avx512.c index d4e6527f..8cac1b15 100644 --- a/src/hwlm/noodle_engine_avx512.c +++ b/src/hwlm/noodle_engine_avx512.c @@ -43,8 +43,8 @@ m512 getCaseMask(void) { // alignment boundary if needed and to finish off data that the aligned scan // function can't handle (due to small/unaligned chunk at end) static really_inline -hwlm_error_t scanSingleShort(const u8 *buf, size_t len, const u8 *key, - bool noCase, m512 caseMask, m512 mask1, +hwlm_error_t scanSingleShort(const struct noodTable *n, const u8 *buf, + size_t len, bool noCase, m512 caseMask, m512 mask1, const struct cb_info *cbi, size_t start, size_t end) { const u8 *d = buf + start; @@ -73,11 +73,12 @@ hwlm_error_t scanSingleShort(const u8 *buf, size_t len, const u8 *key, } static really_inline -hwlm_error_t scanSingle512(const u8 *buf, size_t len, const u8 *key, +hwlm_error_t scanSingle512(const struct noodTable *n, const u8 *buf, size_t len, bool noCase, m512 caseMask, m512 mask1, - const struct cb_info *cbi) { - const u8 *d = buf; - const u8 *e = buf + len; + const struct cb_info *cbi, size_t start, + size_t end) { + const u8 *d = buf + start; + const u8 *e = buf + end; DEBUG_PRINTF("start %p end %p \n", d, e); assert(d < e); if (d + 64 >= e) { @@ -86,8 +87,8 @@ hwlm_error_t scanSingle512(const u8 *buf, size_t len, const u8 *key, // peel off first part to cacheline boundary const u8 *d1 = ROUNDUP_PTR(d, 64); - if (scanSingleShort(buf, len, key, noCase, caseMask, mask1, cbi, 0, - d1 - d) == HWLM_TERMINATED) { + if (scanSingleShort(n, buf, len, noCase, caseMask, mask1, cbi, start, + d1 - buf) == HWLM_TERMINATED) { return HWLM_TERMINATED; } d = d1; @@ -106,16 +107,15 @@ tail: DEBUG_PRINTF("d %p e %p \n", d, e); // finish off tail - return scanSingleShort(buf, len, key, noCase, caseMask, mask1, cbi, d - buf, + return scanSingleShort(n, buf, len, noCase, caseMask, mask1, cbi, d - buf, e - buf); } static really_inline -hwlm_error_t scanDoubleShort(const u8 *buf, size_t len, const u8 *key, - size_t keyLen, size_t keyOffset, bool noCase, - m512 caseMask, m512 mask1, m512 mask2, - const struct cb_info *cbi, u64a *lastz0, - size_t start, size_t end) { +hwlm_error_t scanDoubleShort(const struct noodTable *n, const u8 *buf, + size_t len, bool noCase, m512 caseMask, m512 mask1, + m512 mask2, const struct cb_info *cbi, + u64a *lastz0, size_t start, size_t end) { DEBUG_PRINTF("start %zu end %zu last 0x%016llx\n", start, end, *lastz0); const u8 *d = buf + start; ptrdiff_t scan_len = end - start; @@ -142,9 +142,8 @@ hwlm_error_t scanDoubleShort(const u8 *buf, size_t len, const u8 *key, } static really_inline -hwlm_error_t scanDouble512(const u8 *buf, size_t len, const u8 *key, - size_t keyLen, size_t keyOffset, bool noCase, - m512 caseMask, m512 mask1, m512 mask2, +hwlm_error_t scanDouble512(const struct noodTable *n, const u8 *buf, size_t len, + bool noCase, m512 caseMask, m512 mask1, m512 mask2, const struct cb_info *cbi, size_t start, size_t end) { const u8 *d = buf + start; @@ -158,9 +157,8 @@ hwlm_error_t scanDouble512(const u8 *buf, size_t len, const u8 *key, // peel off first part to cacheline boundary const u8 *d1 = ROUNDUP_PTR(d, 64); - if (scanDoubleShort(buf, len, key, keyLen, keyOffset, noCase, caseMask, - mask1, mask2, cbi, &lastz0, start, - d1 - buf) == HWLM_TERMINATED) { + if (scanDoubleShort(n, buf, len, noCase, caseMask, mask1, mask2, cbi, + &lastz0, start, d1 - buf) == HWLM_TERMINATED) { return HWLM_TERMINATED; } d = d1; @@ -188,6 +186,6 @@ tail: DEBUG_PRINTF("d %p e %p off %zu \n", d, e, d - buf); // finish off tail - return scanDoubleShort(buf, len, key, keyLen, keyOffset, noCase, caseMask, - mask1, mask2, cbi, &lastz0, d - buf, end); + return scanDoubleShort(n, buf, len, noCase, caseMask, mask1, mask2, cbi, + &lastz0, d - buf, end); } diff --git a/src/hwlm/noodle_engine_sse.c b/src/hwlm/noodle_engine_sse.c index 40575409..7cd53d7c 100644 --- a/src/hwlm/noodle_engine_sse.c +++ b/src/hwlm/noodle_engine_sse.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -38,8 +38,8 @@ static really_inline m128 getCaseMask(void) { } static really_inline -hwlm_error_t scanSingleShort(const u8 *buf, size_t len, const u8 *key, - bool noCase, m128 caseMask, m128 mask1, +hwlm_error_t scanSingleShort(const struct noodTable *n, const u8 *buf, + size_t len, bool noCase, m128 caseMask, m128 mask1, const struct cb_info *cbi, size_t start, size_t end) { const u8 *d = buf + start; @@ -67,10 +67,11 @@ hwlm_error_t scanSingleShort(const u8 *buf, size_t len, const u8 *key, } static really_inline -hwlm_error_t scanSingleUnaligned(const u8 *buf, size_t len, size_t offset, - const u8 *key, bool noCase, m128 caseMask, - m128 mask1, const struct cb_info *cbi, - size_t start, size_t end) { +hwlm_error_t scanSingleUnaligned(const struct noodTable *n, const u8 *buf, + size_t len, size_t offset, bool noCase, + m128 caseMask, m128 mask1, + const struct cb_info *cbi, size_t start, + size_t end) { const u8 *d = buf + offset; DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset); const size_t l = end - start; @@ -96,11 +97,10 @@ hwlm_error_t scanSingleUnaligned(const u8 *buf, size_t len, size_t offset, } static really_inline -hwlm_error_t scanDoubleShort(const u8 *buf, size_t len, const u8 *key, - size_t keyLen, size_t keyOffset, bool noCase, - m128 caseMask, m128 mask1, m128 mask2, - const struct cb_info *cbi, size_t start, - size_t end) { +hwlm_error_t scanDoubleShort(const struct noodTable *n, const u8 *buf, + size_t len, bool noCase, m128 caseMask, m128 mask1, + m128 mask2, const struct cb_info *cbi, + size_t start, size_t end) { const u8 *d = buf + start; size_t l = end - start; if (!l) { @@ -128,11 +128,11 @@ hwlm_error_t scanDoubleShort(const u8 *buf, size_t len, const u8 *key, } static really_inline -hwlm_error_t scanDoubleUnaligned(const u8 *buf, size_t len, size_t offset, - const u8 *key, size_t keyLen, size_t keyOffset, - bool noCase, m128 caseMask, m128 mask1, - m128 mask2, const struct cb_info *cbi, - size_t start, size_t end) { +hwlm_error_t scanDoubleUnaligned(const struct noodTable *n, const u8 *buf, + size_t len, size_t offset, bool noCase, + m128 caseMask, m128 mask1, m128 mask2, + const struct cb_info *cbi, size_t start, + size_t end) { const u8 *d = buf + offset; DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset); size_t l = end - start; @@ -158,8 +158,8 @@ hwlm_error_t scanDoubleUnaligned(const u8 *buf, size_t len, size_t offset, } static really_inline -hwlm_error_t scanSingleFast(const u8 *buf, size_t len, const u8 *key, - bool noCase, m128 caseMask, m128 mask1, +hwlm_error_t scanSingleFast(const struct noodTable *n, const u8 *buf, + size_t len, bool noCase, m128 caseMask, m128 mask1, const struct cb_info *cbi, size_t start, size_t end) { const u8 *d = buf + start, *e = buf + end; @@ -179,10 +179,9 @@ hwlm_error_t scanSingleFast(const u8 *buf, size_t len, const u8 *key, } static really_inline -hwlm_error_t scanDoubleFast(const u8 *buf, size_t len, const u8 *key, - size_t keyLen, size_t keyOffset, bool noCase, - m128 caseMask, m128 mask1, m128 mask2, - const struct cb_info *cbi, size_t start, +hwlm_error_t scanDoubleFast(const struct noodTable *n, const u8 *buf, + size_t len, bool noCase, m128 caseMask, m128 mask1, + m128 mask2, const struct cb_info *cbi, size_t start, size_t end) { const u8 *d = buf + start, *e = buf + end; assert(d < e); diff --git a/src/hwlm/noodle_internal.h b/src/hwlm/noodle_internal.h index cc287816..bfb1a9e2 100644 --- a/src/hwlm/noodle_internal.h +++ b/src/hwlm/noodle_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,18 +30,23 @@ * \brief Data structures for Noodle literal matcher engine. */ -#ifndef NOODLE_INTERNAL_H_25D751C42E34A6 -#define NOODLE_INTERNAL_H_25D751C42E34A6 +#ifndef NOODLE_INTERNAL_H +#define NOODLE_INTERNAL_H #include "ue2common.h" struct noodTable { u32 id; - u32 len; - u32 key_offset; - u8 nocase; - u8 str[]; + u64a msk; + u64a cmp; + u8 lit_len; + u8 msk_len; + u8 key_offset; + u8 nocase; + u8 single; + u8 key0; + u8 key1; }; -#endif /* NOODLE_INTERNAL_H_25D751C42E34A6 */ +#endif /* NOODLE_INTERNAL_H */