diff --git a/CMakeLists.txt b/CMakeLists.txt index 2c53d6c5..c9a6f8db 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -425,7 +425,6 @@ set (hs_exec_SRCS src/fdr/fdr_internal.h src/fdr/fdr_confirm.h src/fdr/fdr_confirm_runtime.h - src/fdr/fdr_streaming_runtime.h src/fdr/flood_runtime.h src/fdr/fdr_loadval.h src/fdr/teddy.c @@ -531,6 +530,8 @@ set (hs_exec_SRCS src/rose/init.h src/rose/init.c src/rose/stream.c + src/rose/stream_long_lit.h + src/rose/stream_long_lit_hash.h src/rose/match.h src/rose/match.c src/rose/miracle.h @@ -612,8 +613,6 @@ SET (hs_SRCS src/fdr/fdr_engine_description.cpp src/fdr/fdr_engine_description.h src/fdr/fdr_internal.h - src/fdr/fdr_streaming_compile.cpp - src/fdr/fdr_streaming_internal.h src/fdr/flood_compile.cpp src/fdr/teddy_compile.cpp src/fdr/teddy_compile.h @@ -874,6 +873,8 @@ SET (hs_SRCS src/rose/rose_build_impl.h src/rose/rose_build_infix.cpp src/rose/rose_build_infix.h + src/rose/rose_build_long_lit.cpp + src/rose/rose_build_long_lit.h src/rose/rose_build_lookaround.cpp src/rose/rose_build_lookaround.h src/rose/rose_build_matchers.cpp diff --git a/src/fdr/fdr.c b/src/fdr/fdr.c index 4230c2b1..23416c70 100644 --- a/src/fdr/fdr.c +++ b/src/fdr/fdr.c @@ -31,7 +31,6 @@ #include "fdr_confirm_runtime.h" #include "fdr_internal.h" #include "fdr_loadval.h" -#include "fdr_streaming_runtime.h" #include "flood_runtime.h" #include "teddy.h" #include "teddy_internal.h" @@ -809,8 +808,6 @@ hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len, len, hbuf, 0, - hbuf, // nocase - 0, start, cb, ctxt, @@ -828,14 +825,12 @@ hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len, hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf, size_t hlen, const u8 *buf, size_t len, size_t start, HWLMCallback cb, void *ctxt, - hwlm_group_t groups, u8 *stream_state) { + hwlm_group_t groups) { struct FDR_Runtime_Args a = { buf, len, hbuf, hlen, - hbuf, // nocase - start same as caseful, override later if needed - hlen, // nocase start, cb, ctxt, @@ -844,7 +839,6 @@ hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf, * the history buffer (they may be garbage). */ hbuf ? unaligned_load_u64a(hbuf + hlen - sizeof(u64a)) : (u64a)0 }; - fdrUnpackState(fdr, &a, stream_state); hwlm_error_t ret; if (unlikely(a.start_offset >= a.len)) { @@ -854,6 +848,5 @@ hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf, ret = funcs[fdr->engineID](fdr, &a, groups); } - fdrPackState(fdr, &a, stream_state); return ret; } diff --git a/src/fdr/fdr.h b/src/fdr/fdr.h index e0aa594f..e2b80056 100644 --- a/src/fdr/fdr.h +++ b/src/fdr/fdr.h @@ -43,10 +43,6 @@ extern "C" { struct FDR; -/** \brief Returns non-zero if the contents of the stream state indicate that - * there is active FDR history beyond the regularly used history. */ -u32 fdrStreamStateActive(const struct FDR *fdr, const u8 *stream_state); - /** * \brief Block-mode scan. * @@ -74,12 +70,11 @@ hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len, * \param cb Callback to call when a match is found. * \param ctxt Caller-provided context pointer supplied to callback on match. * \param groups Initial groups mask. - * \param stream_state Persistent stream state for use by FDR. */ hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf, size_t hlen, const u8 *buf, size_t len, size_t start, HWLMCallback cb, void *ctxt, - hwlm_group_t groups, u8 *stream_state); + hwlm_group_t groups); #ifdef __cplusplus } diff --git a/src/fdr/fdr_compile.cpp b/src/fdr/fdr_compile.cpp index 89a0ff72..937513a8 100644 --- a/src/fdr/fdr_compile.cpp +++ b/src/fdr/fdr_compile.cpp @@ -39,6 +39,7 @@ #include "teddy_engine_description.h" #include "grey.h" #include "ue2common.h" +#include "hwlm/hwlm_build.h" #include "util/alloc.h" #include "util/compare.h" #include "util/dump_mask.h" @@ -495,14 +496,34 @@ FDRCompiler::build(pair, size_t> &link) { } // namespace +static +size_t maxMaskLen(const vector &lits) { + size_t rv = 0; + for (const auto &lit : lits) { + rv = max(rv, lit.msk.size()); + } + return rv; +} + +static +void setHistoryRequired(hwlmStreamingControl &stream_ctl, + const vector &lits) { + size_t max_mask_len = maxMaskLen(lits); + + // we want enough history to manage the longest literal and the longest + // mask. + stream_ctl.literal_history_required = max(maxLen(lits), max_mask_len) - 1; +} + static aligned_unique_ptr fdrBuildTableInternal(const vector &lits, bool make_small, const target_t &target, const Grey &grey, u32 hint, hwlmStreamingControl *stream_control) { pair, size_t> link(nullptr, 0); + if (stream_control) { - link = fdrBuildTableStreaming(lits, *stream_control); + setHistoryRequired(*stream_control, lits); } DEBUG_PRINTF("cpu has %s\n", target.has_avx2() ? "avx2" : "no-avx2"); diff --git a/src/fdr/fdr_confirm_compile.cpp b/src/fdr/fdr_confirm_compile.cpp index 23437fe2..f84ed402 100644 --- a/src/fdr/fdr_confirm_compile.cpp +++ b/src/fdr/fdr_confirm_compile.cpp @@ -339,7 +339,7 @@ getFDRConfirm(const vector &lits, bool applyOneCharOpt, ptr = ROUNDUP_PTR(ptr, alignof(LitInfo)); if (next(i) == e) { - finalLI.next = 0x0; + finalLI.next = 0; } else { // our next field represents an adjustment on top of // current address + the actual size of the literal diff --git a/src/fdr/fdr_confirm_runtime.h b/src/fdr/fdr_confirm_runtime.h index 9b1df593..2b0cd595 100644 --- a/src/fdr/fdr_confirm_runtime.h +++ b/src/fdr/fdr_confirm_runtime.h @@ -74,10 +74,8 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a if (loc < buf) { u32 full_overhang = buf - loc; - const u8 *history = caseless ? a->buf_history_nocase - : a->buf_history; - size_t len_history = caseless ? a->len_history_nocase - : a->len_history; + const u8 *history = a->buf_history; + size_t len_history = a->len_history; // can't do a vectored confirm either if we don't have // the bytes @@ -123,8 +121,7 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a const u8 *loc2 = buf + i - li->extended_size + 1 - pullBackAmount; if (loc2 < buf) { u32 full_overhang = buf - loc2; - size_t len_history = caseless ? a->len_history_nocase - : a->len_history; + size_t len_history = a->len_history; if (full_overhang > len_history) { goto out; } diff --git a/src/fdr/fdr_internal.h b/src/fdr/fdr_internal.h index 6272b69e..3bf82837 100644 --- a/src/fdr/fdr_internal.h +++ b/src/fdr/fdr_internal.h @@ -100,8 +100,6 @@ struct FDR_Runtime_Args { size_t len; const u8 *buf_history; size_t len_history; - const u8 *buf_history_nocase; - size_t len_history_nocase; size_t start_offset; HWLMCallback cb; void *ctxt; diff --git a/src/fdr/fdr_streaming_compile.cpp b/src/fdr/fdr_streaming_compile.cpp deleted file mode 100644 index b2e1656c..00000000 --- a/src/fdr/fdr_streaming_compile.cpp +++ /dev/null @@ -1,425 +0,0 @@ -/* - * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "fdr_internal.h" -#include "fdr_streaming_internal.h" -#include "fdr_compile_internal.h" -#include "hwlm/hwlm_build.h" -#include "util/alloc.h" -#include "util/bitutils.h" -#include "util/target_info.h" -#include "util/verify_types.h" - -#include -#include -#include -#include -#include -#include - -#include - -using namespace std; -using boost::dynamic_bitset; - -namespace ue2 { - -namespace { -struct LongLitOrder { - bool operator()(const hwlmLiteral &i1, const hwlmLiteral &i2) const { - if (i1.nocase != i2.nocase) { - return i1.nocase < i2.nocase; - } else { - return i1.s < i2.s; - } - } -}; -} - -static -bool hwlmLitEqual(const hwlmLiteral &l1, const hwlmLiteral &l2) { - return l1.s == l2.s && l1.nocase == l2.nocase; -} - -static -u32 roundUpToPowerOfTwo(u32 x) { - x -= 1; - x |= (x >> 1); - x |= (x >> 2); - x |= (x >> 4); - x |= (x >> 8); - x |= (x >> 16); - return x + 1; -} - -/** - * \brief Creates a long literals vector containing all literals of length > max_len. - * - * The last char of each literal is trimmed as we're not interested in full - * matches, only partial matches. - * - * Literals are sorted (by caseful/caseless, then lexicographical order) and - * made unique. - * - * The ID of each literal is set to its position in the vector. - * - * \return False if there aren't any long literals. - */ -static -bool setupLongLits(const vector &lits, - vector &long_lits, size_t max_len) { - long_lits.reserve(lits.size()); - for (const auto &lit : lits) { - if (lit.s.length() > max_len) { - hwlmLiteral tmp = lit; // copy - tmp.s.pop_back(); - tmp.id = 0; // recalc later - tmp.groups = 0; // filled in later by hash bucket(s) - long_lits.push_back(move(tmp)); - } - } - - if (long_lits.empty()) { - return false; - } - - // sort long_literals by caseful/caseless and in lexicographical order, - // remove duplicates - stable_sort(long_lits.begin(), long_lits.end(), LongLitOrder()); - auto new_end = unique(long_lits.begin(), long_lits.end(), hwlmLitEqual); - long_lits.erase(new_end, long_lits.end()); - - // fill in ids; not currently used - for (auto i = long_lits.begin(), e = long_lits.end(); i != e; ++i) { - i->id = distance(long_lits.begin(), i); - } - return true; -} - -// boundaries are the 'start' boundaries for each 'mode' -// so boundary[CASEFUL] is the index one above the largest caseful index -// positions[CASEFUL] is the # of positions in caseful strings (stream) -// hashedPositions[CASEFUL] is the # of positions in caseful strings -// (not returned - a temporary) -// hashEntries[CASEFUL] is the # of positions hashed for caseful strings -// (rounded up to the nearest power of two) -static -void analyzeLits(const vector &long_lits, size_t max_len, - u32 *boundaries, u32 *positions, u32 *hashEntries) { - u32 hashedPositions[MAX_MODES]; - - for (u32 m = CASEFUL; m < MAX_MODES; ++m) { - boundaries[m] = verify_u32(long_lits.size()); - positions[m] = 0; - hashedPositions[m] = 0; - } - - for (auto i = long_lits.begin(), e = long_lits.end(); i != e; ++i) { - if (i->nocase) { - boundaries[CASEFUL] = verify_u32(distance(long_lits.begin(), i)); - break; - } - } - - for (const auto &lit : long_lits) { - Modes m = lit.nocase ? CASELESS : CASEFUL; - for (u32 j = 1; j < lit.s.size() - max_len + 1; j++) { - hashedPositions[m]++; - } - positions[m] += lit.s.size(); - } - - for (u32 m = CASEFUL; m < MAX_MODES; m++) { - hashEntries[m] = hashedPositions[m] - ? roundUpToPowerOfTwo(MAX(4096, hashedPositions[m])) - : 0; - } - -#ifdef DEBUG_COMPILE - printf("analyzeLits:\n"); - for (Modes m = CASEFUL; m < MAX_MODES; m++) { - printf("mode %s boundary %d positions %d hashedPositions %d " - "hashEntries %d\n", - (m == CASEFUL) ? "caseful" : "caseless", boundaries[m], - positions[m], hashedPositions[m], hashEntries[m]); - } - printf("\n"); -#endif -} - -static -u32 hashLit(const hwlmLiteral &l, u32 offset, size_t max_len, Modes m) { - return streaming_hash((const u8 *)l.s.c_str() + offset, max_len, m); -} - -// sort by 'distance from start' -namespace { -struct OffsetIDFromEndOrder { - const vector &lits; // not currently used - explicit OffsetIDFromEndOrder(const vector &lits_in) - : lits(lits_in) {} - bool operator()(const pair &i1, const pair &i2) const { - if (i1.second != i2.second) { - // longest is 'first', so > not < - return i1.second > i2.second; - } - return i1.first < i2.first; - } -}; -} - -static -void fillHashes(const vector &long_lits, size_t max_len, - FDRSHashEntry *tab, size_t numEntries, Modes mode, - map &litToOffsetVal) { - const u32 nbits = lg2(numEntries); - map > > bucketToLitOffPairs; - map bucketToBitfield; - - for (const auto &lit : long_lits) { - if ((mode == CASELESS) != lit.nocase) { - continue; - } - for (u32 j = 1; j < lit.s.size() - max_len + 1; j++) { - u32 h = hashLit(lit, j, max_len, mode); - u32 h_ent = h & ((1U << nbits) - 1); - u32 h_low = (h >> nbits) & 63; - bucketToLitOffPairs[h_ent].emplace_back(lit.id, j); - bucketToBitfield[h_ent] |= (1ULL << h_low); - } - } - - // this used to be a set, but a bitset is much much faster given that - // we're using it only for membership testing. - dynamic_bitset<> filledBuckets(numEntries); // all bits zero by default. - - // sweep out bitfield entries and save the results swapped accordingly - // also, anything with bitfield entries is put in filledBuckets - for (const auto &m : bucketToBitfield) { - const u32 &bucket = m.first; - const u64a &contents = m.second; - tab[bucket].bitfield = contents; - filledBuckets.set(bucket); - } - - // store out all our chains based on free values in our hash table. - // find nearest free locations that are empty (there will always be more - // entries than strings, at present) - for (auto &m : bucketToLitOffPairs) { - u32 bucket = m.first; - deque> &d = m.second; - - // sort d by distance of the residual string (len minus our depth into - // the string). We need to put the 'furthest back' string first... - stable_sort(d.begin(), d.end(), OffsetIDFromEndOrder(long_lits)); - - while (1) { - // first time through is always at bucket, then we fill in links - filledBuckets.set(bucket); - FDRSHashEntry *ent = &tab[bucket]; - u32 lit_id = d.front().first; - u32 offset = d.front().second; - - ent->state = verify_u32(litToOffsetVal[lit_id] + offset + max_len); - ent->link = (u32)LINK_INVALID; - - d.pop_front(); - if (d.empty()) { - break; - } - // now, if there is another value - // find a bucket for it and put in 'bucket' and repeat - // all we really need to do is find something not in filledBuckets, - // ideally something close to bucket - // we search backward and forward from bucket, trying to stay as - // close as possible. - UNUSED bool found = false; - int bucket_candidate = 0; - for (u32 k = 1; k < numEntries * 2; k++) { - bucket_candidate = bucket + (((k & 1) == 0) - ? (-(int)k / 2) : (k / 2)); - if (bucket_candidate < 0 || - (size_t)bucket_candidate >= numEntries) { - continue; - } - if (!filledBuckets.test(bucket_candidate)) { - found = true; - break; - } - } - - assert(found); - bucket = bucket_candidate; - ent->link = bucket; - } - } -} - -static -size_t maxMaskLen(const vector &lits) { - size_t rv = 0; - for (const auto &lit : lits) { - rv = max(rv, lit.msk.size()); - } - return rv; -} - -pair, size_t> -fdrBuildTableStreaming(const vector &lits, - hwlmStreamingControl &stream_control) { - // refuse to compile if we are forced to have smaller than minimum - // history required for long-literal support, full stop - // otherwise, choose the maximum of the preferred history quantity - // (currently a fairly extravagant 32) or the already used history - // quantity - subject to the limitation of stream_control.history_max - - const size_t MIN_HISTORY_REQUIRED = 32; - - if (MIN_HISTORY_REQUIRED > stream_control.history_max) { - throw std::logic_error("Cannot set history to minimum history required"); - } - - size_t max_len = - MIN(stream_control.history_max, - MAX(MIN_HISTORY_REQUIRED, stream_control.history_min)); - assert(max_len >= MIN_HISTORY_REQUIRED); - size_t max_mask_len = maxMaskLen(lits); - - vector long_lits; - if (!setupLongLits(lits, long_lits, max_len) || false) { - // "Don't need to do anything" path, not really a fail - DEBUG_PRINTF("Streaming literal path produces no table\n"); - - // we want enough history to manage the longest literal and the longest - // mask. - stream_control.literal_history_required = - max(maxLen(lits), max_mask_len) - 1; - stream_control.literal_stream_state_required = 0; - return {nullptr, size_t{0}}; - } - - // Ensure that we have enough room for the longest mask. - if (max_mask_len) { - max_len = max(max_len, max_mask_len - 1); - } - - u32 boundary[MAX_MODES]; - u32 positions[MAX_MODES]; - u32 hashEntries[MAX_MODES]; - - analyzeLits(long_lits, max_len, boundary, positions, hashEntries); - - // first assess the size and find our caseless threshold - size_t headerSize = ROUNDUP_16(sizeof(FDRSTableHeader)); - - size_t litTabOffset = headerSize; - - size_t litTabNumEntries = long_lits.size() + 1; - size_t litTabSize = ROUNDUP_16(litTabNumEntries * sizeof(FDRSLiteral)); - - size_t wholeLitTabOffset = litTabOffset + litTabSize; - size_t totalWholeLitTabSize = ROUNDUP_16(positions[CASEFUL] + - positions[CASELESS]); - - size_t htOffset[MAX_MODES]; - size_t htSize[MAX_MODES]; - - htOffset[CASEFUL] = wholeLitTabOffset + totalWholeLitTabSize; - htSize[CASEFUL] = hashEntries[CASEFUL] * sizeof(FDRSHashEntry); - htOffset[CASELESS] = htOffset[CASEFUL] + htSize[CASEFUL]; - htSize[CASELESS] = hashEntries[CASELESS] * sizeof(FDRSHashEntry); - - size_t tabSize = ROUNDUP_16(htOffset[CASELESS] + htSize[CASELESS]); - - // need to add +2 to both of these to allow space for the actual largest - // value as well as handling the fact that we add one to the space when - // storing out a position to allow zero to mean "no stream state value" - u8 streamBits[MAX_MODES]; - streamBits[CASEFUL] = lg2(roundUpToPowerOfTwo(positions[CASEFUL] + 2)); - streamBits[CASELESS] = lg2(roundUpToPowerOfTwo(positions[CASELESS] + 2)); - u32 tot_state_bytes = (streamBits[CASEFUL] + streamBits[CASELESS] + 7) / 8; - - auto secondaryTable = aligned_zmalloc_unique(tabSize); - assert(secondaryTable); // otherwise would have thrown std::bad_alloc - - // then fill it in - u8 * ptr = secondaryTable.get(); - FDRSTableHeader * header = (FDRSTableHeader *)ptr; - // fill in header - header->pseudoEngineID = (u32)0xffffffff; - header->N = verify_u8(max_len); // u8 so doesn't matter; won't go > 255 - for (u32 m = CASEFUL; m < MAX_MODES; ++m) { - header->boundary[m] = boundary[m]; - header->hashOffset[m] = verify_u32(htOffset[m]); - header->hashNBits[m] = lg2(hashEntries[m]); - header->streamStateBits[m] = streamBits[m]; - } - assert(tot_state_bytes < sizeof(u64a)); - header->streamStateBytes = verify_u8(tot_state_bytes); // u8 - - ptr += headerSize; - - // now fill in the rest - - FDRSLiteral * litTabPtr = (FDRSLiteral *)ptr; - ptr += litTabSize; - - map litToOffsetVal; - for (auto i = long_lits.begin(), e = long_lits.end(); i != e; ++i) { - u32 entry = verify_u32(i - long_lits.begin()); - u32 offset = verify_u32(ptr - secondaryTable.get()); - - // point the table entry to the string location - litTabPtr[entry].offset = offset; - - litToOffsetVal[entry] = offset; - - // copy the string into the string location - memcpy(ptr, i->s.c_str(), i->s.size()); - - ptr += i->s.size(); // and the string location - } - - // fill in final lit table entry with current ptr (serves as end value) - litTabPtr[long_lits.size()].offset = verify_u32(ptr - secondaryTable.get()); - - // fill hash tables - ptr = secondaryTable.get() + htOffset[CASEFUL]; - for (u32 m = CASEFUL; m < MAX_MODES; ++m) { - fillHashes(long_lits, max_len, (FDRSHashEntry *)ptr, hashEntries[m], - (Modes)m, litToOffsetVal); - ptr += htSize[m]; - } - - // tell the world what we did - stream_control.literal_history_required = max_len; - stream_control.literal_stream_state_required = tot_state_bytes; - return {move(secondaryTable), tabSize}; -} - -} // namespace ue2 diff --git a/src/fdr/fdr_streaming_internal.h b/src/fdr/fdr_streaming_internal.h deleted file mode 100644 index 11b07b56..00000000 --- a/src/fdr/fdr_streaming_internal.h +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef FDR_STREAMING_INTERNAL_H -#define FDR_STREAMING_INTERNAL_H - -#include "ue2common.h" -#include "fdr_internal.h" -#include "util/unaligned.h" - -// tertiary table: -// a header (FDRSTableHeader) -// long_lits.size()+1 entries holding an offset to the string in the -// 'whole literal table' (FDRSLiteral structure) -// the whole literal table - every string packed in (freeform) -// hash table (caseful) (FDRSHashEntry) -// hash table (caseless) (FDRSHashEntry) - -enum Modes { - CASEFUL = 0, - CASELESS = 1, - MAX_MODES = 2 -}; - -// We have one of these structures hanging off the 'link' of our secondary -// FDR table that handles streaming strings -struct FDRSTableHeader { - u32 pseudoEngineID; // set to 0xffffffff to indicate this isn't an FDR - - // string id one beyond the maximum entry for this type of literal - // boundary[CASEFUL] is the end of the caseful literals - // boundary[CASELESS] is the end of the caseless literals and one beyond - // the largest literal id (the size of the littab) - u32 boundary[MAX_MODES]; - - // offsets are 0 if no such table exists - // offset from the base of the tertiary structure to the hash table - u32 hashOffset[MAX_MODES]; - u32 hashNBits[MAX_MODES]; // lg2 of the size of the hash table - - u8 streamStateBits[MAX_MODES]; - u8 streamStateBytes; // total size of packed stream state in bytes - u8 N; // prefix lengths - u16 pad; -}; - -// One of these structures per literal entry in our secondary FDR table. -struct FDRSLiteral { - u32 offset; - // potentially - another u32 to point to the 'next lesser included literal' - // which would be a literal that overlaps this one in such a way that a - // failure to match _this_ literal can leave us in a state that we might - // still match that literal. Offset information might also be called for, - // in which case we might be wanting to use a FDRSLiteralOffset -}; - -typedef u32 FDRSLiteralOffset; - -#define LINK_INVALID 0xffffffff - -// One of these structures per hash table entry in our secondary FDR table -struct FDRSHashEntry { - u64a bitfield; - FDRSLiteralOffset state; - u32 link; -}; - -static really_inline -u32 get_start_lit_idx(const struct FDRSTableHeader * h, enum Modes m) { - return m == CASEFUL ? 0 : h->boundary[m-1]; -} - -static really_inline -u32 get_end_lit_idx(const struct FDRSTableHeader * h, enum Modes m) { - return h->boundary[m]; -} - -static really_inline -const struct FDRSLiteral * getLitTab(const struct FDRSTableHeader * h) { - return (const struct FDRSLiteral *) (((const u8 *)h) + - ROUNDUP_16(sizeof(struct FDRSTableHeader))); -} - -static really_inline -u32 getBaseOffsetOfLits(const struct FDRSTableHeader * h, enum Modes m) { - return getLitTab(h)[get_start_lit_idx(h, m)].offset; -} - -static really_inline -u32 packStateVal(const struct FDRSTableHeader * h, enum Modes m, u32 v) { - return v - getBaseOffsetOfLits(h, m) + 1; -} - -static really_inline -u32 unpackStateVal(const struct FDRSTableHeader * h, enum Modes m, u32 v) { - return v + getBaseOffsetOfLits(h, m) - 1; -} - -static really_inline -u32 has_bit(const struct FDRSHashEntry * ent, u32 bit) { - return (ent->bitfield >> bit) & 0x1; -} - -static really_inline -u32 streaming_hash(const u8 *ptr, UNUSED size_t len, enum Modes mode) { - const u64a CASEMASK = 0xdfdfdfdfdfdfdfdfULL; - const u64a MULTIPLIER = 0x0b4e0ef37bc32127ULL; - assert(len >= 32); - - u64a v1 = unaligned_load_u64a(ptr); - u64a v2 = unaligned_load_u64a(ptr + 8); - u64a v3 = unaligned_load_u64a(ptr + 16); - if (mode == CASELESS) { - v1 &= CASEMASK; - v2 &= CASEMASK; - v3 &= CASEMASK; - } - v1 *= MULTIPLIER; - v2 *= (MULTIPLIER*MULTIPLIER); - v3 *= (MULTIPLIER*MULTIPLIER*MULTIPLIER); - v1 >>= 32; - v2 >>= 32; - v3 >>= 32; - return v1 ^ v2 ^ v3; -} - -#endif diff --git a/src/fdr/fdr_streaming_runtime.h b/src/fdr/fdr_streaming_runtime.h deleted file mode 100644 index 8e264c76..00000000 --- a/src/fdr/fdr_streaming_runtime.h +++ /dev/null @@ -1,368 +0,0 @@ -/* - * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef FDR_STREAMING_RUNTIME_H -#define FDR_STREAMING_RUNTIME_H - -#include "fdr_streaming_internal.h" -#include "util/partial_store.h" - -#include - -static really_inline -const struct FDRSTableHeader * getSHDR(const struct FDR * fdr) { - const u8 * linkPtr = ((const u8 *)fdr) + fdr->link; - // test if it's not really a engineID, but a 'pseudo engine id' - assert(*(const u32 *)linkPtr == 0xffffffff); - assert(linkPtr); - return (const struct FDRSTableHeader *)linkPtr; -} - -// Reads from stream state and unpacks values into stream state table. -static really_inline -void getStreamStates(const struct FDRSTableHeader * streamingTable, - const u8 * stream_state, u32 * table) { - assert(streamingTable); - assert(stream_state); - assert(table); - - u8 ss_bytes = streamingTable->streamStateBytes; - u8 ssb = streamingTable->streamStateBits[CASEFUL]; - UNUSED u8 ssb_nc = streamingTable->streamStateBits[CASELESS]; - assert(ss_bytes == (ssb + ssb_nc + 7) / 8); - -#if defined(ARCH_32_BIT) - // On 32-bit hosts, we may be able to avoid having to do any u64a - // manipulation at all. - if (ss_bytes <= 4) { - u32 ssb_mask = (1U << ssb) - 1; - u32 streamVal = partial_load_u32(stream_state, ss_bytes); - table[CASEFUL] = (u32)(streamVal & ssb_mask); - table[CASELESS] = (u32)(streamVal >> ssb); - return; - } -#endif - - u64a ssb_mask = (1ULL << ssb) - 1; - u64a streamVal = partial_load_u64a(stream_state, ss_bytes); - table[CASEFUL] = (u32)(streamVal & ssb_mask); - table[CASELESS] = (u32)(streamVal >> (u64a)ssb); -} - -#ifndef NDEBUG -// Defensive checking (used in assert) that these table values don't overflow -// outside the range available. -static really_inline UNUSED -u32 streamingTableOverflow(u32 * table, u8 ssb, u8 ssb_nc) { - u32 ssb_mask = (1ULL << (ssb)) - 1; - if (table[CASEFUL] & ~ssb_mask) { - return 1; - } - u32 ssb_nc_mask = (1ULL << (ssb_nc)) - 1; - if (table[CASELESS] & ~ssb_nc_mask) { - return 1; - } - return 0; -} -#endif - -// Reads from stream state table and packs values into stream state. -static really_inline -void setStreamStates(const struct FDRSTableHeader * streamingTable, - u8 * stream_state, u32 * table) { - assert(streamingTable); - assert(stream_state); - assert(table); - - u8 ss_bytes = streamingTable->streamStateBytes; - u8 ssb = streamingTable->streamStateBits[CASEFUL]; - UNUSED u8 ssb_nc = streamingTable->streamStateBits[CASELESS]; - assert(ss_bytes == (ssb + ssb_nc + 7) / 8); - assert(!streamingTableOverflow(table, ssb, ssb_nc)); - -#if defined(ARCH_32_BIT) - // On 32-bit hosts, we may be able to avoid having to do any u64a - // manipulation at all. - if (ss_bytes <= 4) { - u32 stagingStreamState = table[CASEFUL]; - stagingStreamState |= (table[CASELESS] << ssb); - - partial_store_u32(stream_state, stagingStreamState, ss_bytes); - return; - } -#endif - - u64a stagingStreamState = (u64a)table[CASEFUL]; - stagingStreamState |= (u64a)table[CASELESS] << ((u64a)ssb); - partial_store_u64a(stream_state, stagingStreamState, ss_bytes); -} - -u32 fdrStreamStateActive(const struct FDR * fdr, const u8 * stream_state) { - if (!stream_state) { - return 0; - } - const struct FDRSTableHeader * streamingTable = getSHDR(fdr); - u8 ss_bytes = streamingTable->streamStateBytes; - - // We just care if there are any bits set, and the test below is faster - // than a partial_load_u64a (especially on 32-bit hosts). - for (u32 i = 0; i < ss_bytes; i++) { - if (*stream_state) { - return 1; - } - ++stream_state; - } - return 0; -} - -// binary search for the literal index that contains the current state -static really_inline -u32 findLitTabEntry(const struct FDRSTableHeader * streamingTable, - u32 stateValue, enum Modes m) { - const struct FDRSLiteral * litTab = getLitTab(streamingTable); - u32 lo = get_start_lit_idx(streamingTable, m); - u32 hi = get_end_lit_idx(streamingTable, m); - - // Now move stateValue back by one so that we're looking for the - // litTab entry that includes it the string, not the one 'one past' it - stateValue -= 1; - assert(lo != hi); - assert(litTab[lo].offset <= stateValue); - assert(litTab[hi].offset > stateValue); - - // binary search to find the entry e such that: - // litTab[e].offsetToLiteral <= stateValue < litTab[e+1].offsetToLiteral - while (lo + 1 < hi) { - u32 mid = (lo + hi) / 2; - if (litTab[mid].offset <= stateValue) { - lo = mid; - } else { //(litTab[mid].offset > stateValue) { - hi = mid; - } - } - assert(litTab[lo].offset <= stateValue); - assert(litTab[hi].offset > stateValue); - return lo; -} - -static really_inline -void fdrUnpackStateMode(struct FDR_Runtime_Args *a, - const struct FDRSTableHeader *streamingTable, - const struct FDRSLiteral * litTab, - const u32 *state_table, - const enum Modes m) { - if (!state_table[m]) { - return; - } - - u32 stateValue = unpackStateVal(streamingTable, m, state_table[m]); - u32 idx = findLitTabEntry(streamingTable, stateValue, m); - size_t found_offset = litTab[idx].offset; - const u8 * found_buf = found_offset + (const u8 *)streamingTable; - size_t found_sz = stateValue - found_offset; - if (m == CASEFUL) { - a->buf_history = found_buf; - a->len_history = found_sz; - } else { - a->buf_history_nocase = found_buf; - a->len_history_nocase = found_sz; - } -} - -static really_inline -void fdrUnpackState(const struct FDR * fdr, struct FDR_Runtime_Args * a, - const u8 * stream_state) { - // nothing to do if there's no stream state for the case - if (!stream_state) { - return; - } - - const struct FDRSTableHeader * streamingTable = getSHDR(fdr); - const struct FDRSLiteral * litTab = getLitTab(streamingTable); - - u32 state_table[MAX_MODES]; - getStreamStates(streamingTable, stream_state, state_table); - - fdrUnpackStateMode(a, streamingTable, litTab, state_table, CASEFUL); - fdrUnpackStateMode(a, streamingTable, litTab, state_table, CASELESS); -} - -static really_inline -u32 do_single_confirm(const struct FDRSTableHeader *streamingTable, - const struct FDR_Runtime_Args *a, u32 hashState, - enum Modes m) { - const struct FDRSLiteral * litTab = getLitTab(streamingTable); - u32 idx = findLitTabEntry(streamingTable, hashState, m); - size_t found_offset = litTab[idx].offset; - const u8 * s1 = found_offset + (const u8 *)streamingTable; - assert(hashState > found_offset); - size_t l1 = hashState - found_offset; - const u8 * buf = a->buf; - size_t len = a->len; - const char nocase = m != CASEFUL; - - if (l1 > len) { - const u8 * hist = nocase ? a->buf_history_nocase : a->buf_history; - size_t hist_len = nocase ? a->len_history_nocase : a->len_history; - - if (l1 > len+hist_len) { - return 0; // Break out - not enough total history - } - - size_t overhang = l1 - len; - assert(overhang <= hist_len); - - if (cmpForward(hist + hist_len - overhang, s1, overhang, nocase)) { - return 0; - } - s1 += overhang; - l1 -= overhang; - } - // if we got here, we don't need history or we compared ok out of history - assert(l1 <= len); - - if (cmpForward(buf + len - l1, s1, l1, nocase)) { - return 0; - } - return hashState; // our new state -} - -static really_inline -void fdrFindStreamingHash(const struct FDR_Runtime_Args *a, - const struct FDRSTableHeader *streamingTable, - u8 hash_len, u32 *hashes) { - u8 tempbuf[128]; - const u8 *base; - if (hash_len > a->len) { - assert(hash_len <= 128); - size_t overhang = hash_len - a->len; - assert(overhang <= a->len_history); - memcpy(tempbuf, a->buf_history + a->len_history - overhang, overhang); - memcpy(tempbuf + overhang, a->buf, a->len); - base = tempbuf; - } else { - assert(hash_len <= a->len); - base = a->buf + a->len - hash_len; - } - - if (streamingTable->hashNBits[CASEFUL]) { - hashes[CASEFUL] = streaming_hash(base, hash_len, CASEFUL); - } - if (streamingTable->hashNBits[CASELESS]) { - hashes[CASELESS] = streaming_hash(base, hash_len, CASELESS); - } -} - -static really_inline -const struct FDRSHashEntry *getEnt(const struct FDRSTableHeader *streamingTable, - u32 h, const enum Modes m) { - u32 nbits = streamingTable->hashNBits[m]; - if (!nbits) { - return NULL; - } - - u32 h_ent = h & ((1 << nbits) - 1); - u32 h_low = (h >> nbits) & 63; - - const struct FDRSHashEntry *tab = - (const struct FDRSHashEntry *)((const u8 *)streamingTable - + streamingTable->hashOffset[m]); - const struct FDRSHashEntry *ent = tab + h_ent; - - if (!has_bit(ent, h_low)) { - return NULL; - } - - return ent; -} - -static really_inline -void fdrPackStateMode(u32 *state_table, const struct FDR_Runtime_Args *a, - const struct FDRSTableHeader *streamingTable, - const struct FDRSHashEntry *ent, const enum Modes m) { - assert(ent); - assert(streamingTable->hashNBits[m]); - - const struct FDRSHashEntry *tab = - (const struct FDRSHashEntry *)((const u8 *)streamingTable - + streamingTable->hashOffset[m]); - - while (1) { - u32 tmp = 0; - if ((tmp = do_single_confirm(streamingTable, a, ent->state, m))) { - state_table[m] = packStateVal(streamingTable, m, tmp); - break; - } - if (ent->link == LINK_INVALID) { - break; - } - ent = tab + ent->link; - } -} - -static really_inline -void fdrPackState(const struct FDR *fdr, const struct FDR_Runtime_Args *a, - u8 *stream_state) { - // nothing to do if there's no stream state for the case - if (!stream_state) { - return; - } - - // get pointers to the streamer FDR and the tertiary structure - const struct FDRSTableHeader *streamingTable = getSHDR(fdr); - - assert(streamingTable->N); - - u32 state_table[MAX_MODES] = {0, 0}; - - // if we don't have enough history, we don't need to do anything - if (streamingTable->N <= a->len + a->len_history) { - u32 hashes[MAX_MODES] = {0, 0}; - - fdrFindStreamingHash(a, streamingTable, streamingTable->N, hashes); - - const struct FDRSHashEntry *ent_ful = getEnt(streamingTable, - hashes[CASEFUL], CASEFUL); - const struct FDRSHashEntry *ent_less = getEnt(streamingTable, - hashes[CASELESS], CASELESS); - - if (ent_ful) { - fdrPackStateMode(state_table, a, streamingTable, ent_ful, - CASEFUL); - } - - if (ent_less) { - fdrPackStateMode(state_table, a, streamingTable, ent_less, - CASELESS); - } - } - - setStreamStates(streamingTable, stream_state, state_table); -} - -#endif diff --git a/src/hwlm/hwlm.c b/src/hwlm/hwlm.c index 2e16f1ac..3c7615a7 100644 --- a/src/hwlm/hwlm.c +++ b/src/hwlm/hwlm.c @@ -200,8 +200,7 @@ hwlm_error_t hwlmExec(const struct HWLM *t, const u8 *buf, size_t len, hwlm_error_t hwlmExecStreaming(const struct HWLM *t, struct hs_scratch *scratch, size_t len, size_t start, HWLMCallback cb, - void *ctxt, hwlm_group_t groups, - u8 *stream_state) { + void *ctxt, hwlm_group_t groups) { const u8 *hbuf = scratch->core_info.hbuf; const size_t hlen = scratch->core_info.hlen; const u8 *buf = scratch->core_info.buf; @@ -234,13 +233,10 @@ hwlm_error_t hwlmExecStreaming(const struct HWLM *t, struct hs_scratch *scratch, DEBUG_PRINTF("using hq accel %hhu\n", t->accel1.accel_type); aa = &t->accel1; } - // if no active stream state, use acceleration - if (!fdrStreamStateActive(HWLM_C_DATA(t), stream_state)) { - do_accel_streaming(aa, hbuf, hlen, buf, len, &start); - } + do_accel_streaming(aa, hbuf, hlen, buf, len, &start); DEBUG_PRINTF("calling frankie (groups=%08llx, start=%zu)\n", groups, start); return fdrExecStreaming(HWLM_C_DATA(t), hbuf, hlen, buf, len, - start, cb, ctxt, groups, stream_state); + start, cb, ctxt, groups); } } diff --git a/src/hwlm/hwlm.h b/src/hwlm/hwlm.h index 009550e9..a17575df 100644 --- a/src/hwlm/hwlm.h +++ b/src/hwlm/hwlm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -132,8 +132,7 @@ hwlm_error_t hwlmExec(const struct HWLM *tab, const u8 *buf, size_t len, hwlm_error_t hwlmExecStreaming(const struct HWLM *tab, struct hs_scratch *scratch, size_t len, size_t start, HWLMCallback callback, - void *context, hwlm_group_t groups, - u8 *stream_state); + void *context, hwlm_group_t groups); #ifdef __cplusplus } /* extern "C" */ diff --git a/src/hwlm/hwlm_build.cpp b/src/hwlm/hwlm_build.cpp index b1814245..32de6bd0 100644 --- a/src/hwlm/hwlm_build.cpp +++ b/src/hwlm/hwlm_build.cpp @@ -552,6 +552,12 @@ aligned_unique_ptr hwlmBuild(const vector &lits, if (stream_control) { assert(stream_control->history_min <= stream_control->history_max); + + // We should not have been passed any literals that are too long to + // match with a maximally-sized history buffer. + assert(all_of(begin(lits), end(lits), [&](const hwlmLiteral &lit) { + return lit.s.length() <= stream_control->history_max + 1; + })); } // Check that we haven't exceeded the maximum number of literals. @@ -602,7 +608,6 @@ aligned_unique_ptr hwlmBuild(const vector &lits, stream_control->literal_history_required = lit.s.length() - 1; assert(stream_control->literal_history_required <= stream_control->history_max); - stream_control->literal_stream_state_required = 0; } eng = move(noodle); } else { diff --git a/src/hwlm/hwlm_build.h b/src/hwlm/hwlm_build.h index b5bdb0ea..fbf359e6 100644 --- a/src/hwlm/hwlm_build.h +++ b/src/hwlm/hwlm_build.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -63,10 +63,6 @@ struct hwlmStreamingControl { /** \brief OUT parameter: History required by the literal matcher to * correctly match all literals. */ size_t literal_history_required; - - /** OUT parameter: Stream state required by literal matcher in bytes. Can - * be zero, and generally will be small (0-8 bytes). */ - size_t literal_stream_state_required; }; /** \brief Build an \ref HWLM literal matcher runtime structure for a group of diff --git a/src/hwlm/hwlm_literal.cpp b/src/hwlm/hwlm_literal.cpp index 9e365a0c..b0968d79 100644 --- a/src/hwlm/hwlm_literal.cpp +++ b/src/hwlm/hwlm_literal.cpp @@ -86,6 +86,7 @@ hwlmLiteral::hwlmLiteral(const std::string &s_in, bool nocase_in, const vector &msk_in, const vector &cmp_in) : s(s_in), id(id_in), nocase(nocase_in), noruns(noruns_in), groups(groups_in), msk(msk_in), cmp(cmp_in) { + assert(s.size() <= HWLM_LITERAL_MAX_LEN); assert(msk.size() <= HWLM_MASKLEN); assert(msk.size() == cmp.size()); diff --git a/src/hwlm/hwlm_literal.h b/src/hwlm/hwlm_literal.h index 7e63a6f3..b7af99d3 100644 --- a/src/hwlm/hwlm_literal.h +++ b/src/hwlm/hwlm_literal.h @@ -41,6 +41,9 @@ namespace ue2 { +/** \brief Max length of the literal passed to HWLM. */ +#define HWLM_LITERAL_MAX_LEN 255 + /** \brief Max length of the hwlmLiteral::msk and hwlmLiteral::cmp vectors. */ #define HWLM_MASKLEN 8 diff --git a/src/rose/init.c b/src/rose/init.c index 511eafe4..025ecca0 100644 --- a/src/rose/init.c +++ b/src/rose/init.c @@ -85,9 +85,4 @@ void roseInitState(const struct RoseEngine *t, char *state) { init_state(t, state); init_outfixes(t, state); - - // Clear the floating matcher state, if any. - DEBUG_PRINTF("clearing %u bytes of floating matcher state\n", - t->floatingStreamState); - memset(getFloatingMatcherState(t, state), 0, t->floatingStreamState); } diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index b9036422..3c94f543 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -1331,6 +1331,78 @@ hwlmcb_rv_t roseMatcherEod(const struct RoseEngine *rose, return HWLM_CONTINUE_MATCHING; } +static rose_inline +int roseCheckLongLiteral(const struct RoseEngine *t, + const struct hs_scratch *scratch, u64a end, + u32 lit_offset, u32 lit_length, char nocase) { + const struct core_info *ci = &scratch->core_info; + const u8 *lit = getByOffset(t, lit_offset); + + DEBUG_PRINTF("check lit at %llu, length %u\n", end, lit_length); + DEBUG_PRINTF("base buf_offset=%llu\n", ci->buf_offset); + + if (end < lit_length) { + DEBUG_PRINTF("too short!\n"); + return 0; + } + + // If any portion of the literal matched in the current buffer, check it. + if (end > ci->buf_offset) { + u32 scan_len = MIN(end - ci->buf_offset, lit_length); + u64a scan_start = end - ci->buf_offset - scan_len; + DEBUG_PRINTF("checking suffix (%u bytes) in buf[%llu:%llu]\n", scan_len, + scan_start, end); + if (cmpForward(ci->buf + scan_start, lit + lit_length - scan_len, + scan_len, nocase)) { + DEBUG_PRINTF("cmp of suffix failed\n"); + return 0; + } + } + + // If the entirety of the literal was in the current block, we are done. + if (end - lit_length >= ci->buf_offset) { + DEBUG_PRINTF("literal confirmed in current block\n"); + return 1; + } + + // We still have a prefix which we must test against the buffer prepared by + // the long literal table. This is only done in streaming mode. + + assert(t->mode != HS_MODE_BLOCK); + + const u8 *ll_buf; + size_t ll_len; + if (nocase) { + ll_buf = scratch->tctxt.ll_buf_nocase; + ll_len = scratch->tctxt.ll_len_nocase; + } else { + ll_buf = scratch->tctxt.ll_buf; + ll_len = scratch->tctxt.ll_len; + } + + assert(ll_buf); + + u64a lit_start_offset = end - lit_length; + u32 prefix_len = MIN(lit_length, ci->buf_offset - lit_start_offset); + u32 hist_rewind = ci->buf_offset - lit_start_offset; + DEBUG_PRINTF("ll_len=%zu, hist_rewind=%u\n", ll_len, hist_rewind); + if (hist_rewind > ll_len) { + DEBUG_PRINTF("not enough history\n"); + return 0; + } + + DEBUG_PRINTF("check prefix len=%u from hist (len %zu, rewind %u)\n", + prefix_len, ll_len, hist_rewind); + assert(hist_rewind <= ll_len); + if (cmpForward(ll_buf + ll_len - hist_rewind, lit, prefix_len, nocase)) { + DEBUG_PRINTF("cmp of prefix failed\n"); + return 0; + } + + DEBUG_PRINTF("cmp succeeded\n"); + return 1; +} + static void updateSeqPoint(struct RoseContext *tctxt, u64a offset, const char from_mpv) { @@ -1977,6 +2049,26 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, } } PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_LONG_LIT) { + const char nocase = 0; + if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset, + ri->lit_length, nocase)) { + DEBUG_PRINTF("halt: failed long lit check\n"); + return HWLM_CONTINUE_MATCHING; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_LONG_LIT_NOCASE) { + const char nocase = 1; + if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset, + ri->lit_length, nocase)) { + DEBUG_PRINTF("halt: failed nocase long lit check\n"); + return HWLM_CONTINUE_MATCHING; + } + } + PROGRAM_NEXT_INSTRUCTION } } diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index a7979c4f..2871138a 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -37,14 +37,17 @@ #include "rose_build_exclusive.h" #include "rose_build_groups.h" #include "rose_build_infix.h" +#include "rose_build_long_lit.h" #include "rose_build_lookaround.h" #include "rose_build_matchers.h" #include "rose_build_program.h" #include "rose_build_scatter.h" #include "rose_build_util.h" #include "rose_build_width.h" +#include "rose_internal.h" #include "rose_program.h" #include "hwlm/hwlm.h" /* engine types */ +#include "hwlm/hwlm_literal.h" #include "nfa/castlecompile.h" #include "nfa/goughcompile.h" #include "nfa/mcclellancompile.h" @@ -165,6 +168,7 @@ struct RoseResources { bool has_states = false; bool checks_groups = false; bool has_lit_delay = false; + bool has_lit_check = false; // long literal support bool has_anchored = false; bool has_eod = false; }; @@ -210,9 +214,16 @@ struct build_context : boost::noncopyable { * written to the engine_blob. */ vector litPrograms; + /** \brief List of long literals (ones with CHECK_LITERAL instructions) + * that need hash table support. */ + vector longLiterals; + /** \brief Minimum offset of a match from the floating table. */ u32 floatingMinLiteralMatchOffset = 0; + /** \brief Long literal length threshold, used in streaming mode. */ + size_t longLitLengthThreshold = 0; + /** \brief Contents of the Rose bytecode immediately following the * RoseEngine. */ RoseEngineBlob engine_blob; @@ -314,7 +325,7 @@ bool needsCatchup(const RoseBuildImpl &build, } static -bool isPureFloating(const RoseResources &resources) { +bool isPureFloating(const RoseResources &resources, const CompileContext &cc) { if (resources.has_outfixes || resources.has_suffixes || resources.has_leftfixes) { DEBUG_PRINTF("has engines\n"); @@ -341,6 +352,12 @@ bool isPureFloating(const RoseResources &resources) { return false; } + if (cc.streaming && resources.has_lit_check) { + DEBUG_PRINTF("has long literals in streaming mode, which needs " + "long literal table support\n"); + return false; + } + if (resources.checks_groups) { DEBUG_PRINTF("has group checks\n"); return false; @@ -384,10 +401,11 @@ u8 pickRuntimeImpl(const RoseBuildImpl &build, const build_context &bc, DEBUG_PRINTF("has_states=%d\n", bc.resources.has_states); DEBUG_PRINTF("checks_groups=%d\n", bc.resources.checks_groups); DEBUG_PRINTF("has_lit_delay=%d\n", bc.resources.has_lit_delay); + DEBUG_PRINTF("has_lit_check=%d\n", bc.resources.has_lit_check); DEBUG_PRINTF("has_anchored=%d\n", bc.resources.has_anchored); DEBUG_PRINTF("has_eod=%d\n", bc.resources.has_eod); - if (isPureFloating(bc.resources)) { + if (isPureFloating(bc.resources, build.cc)) { return ROSE_RUNTIME_PURE_LITERAL; } @@ -427,7 +445,7 @@ static void fillStateOffsets(const RoseBuildImpl &tbi, u32 rolesWithStateCount, u32 anchorStateSize, u32 activeArrayCount, u32 activeLeftCount, u32 laggedRoseCount, - u32 floatingStreamStateRequired, u32 historyRequired, + u32 longLitStreamStateRequired, u32 historyRequired, RoseStateOffsets *so) { u32 curr_offset = 0; @@ -445,8 +463,8 @@ void fillStateOffsets(const RoseBuildImpl &tbi, u32 rolesWithStateCount, so->activeLeftArray_size = mmbit_size(activeLeftCount); curr_offset += so->activeLeftArray_size; - so->floatingMatcherState = curr_offset; - curr_offset += floatingStreamStateRequired; + so->longLitState = curr_offset; + curr_offset += longLitStreamStateRequired; // ONE WHOLE BYTE for each active leftfix with lag. so->leftfixLagTable = curr_offset; @@ -2514,6 +2532,10 @@ void recordResources(RoseResources &resources, const RoseProgram &program) { case ROSE_INSTR_PUSH_DELAYED: resources.has_lit_delay = true; break; + case ROSE_INSTR_CHECK_LONG_LIT: + case ROSE_INSTR_CHECK_LONG_LIT_NOCASE: + resources.has_lit_check = true; + break; default: break; } @@ -2546,6 +2568,25 @@ void recordResources(RoseResources &resources, } } +static +void recordLongLiterals(build_context &bc, const RoseProgram &program) { + for (const auto &ri : program) { + if (const auto *ri_check = + dynamic_cast(ri.get())) { + DEBUG_PRINTF("found CHECK_LITERAL for string '%s'\n", + escapeString(ri_check->literal).c_str()); + bc.longLiterals.emplace_back(ri_check->literal, false); + continue; + } + if (const auto *ri_check = + dynamic_cast(ri.get())) { + DEBUG_PRINTF("found CHECK_LITERAL_NOCASE for string '%s'\n", + escapeString(ri_check->literal).c_str()); + bc.longLiterals.emplace_back(ri_check->literal, true); + } + } +} + static u32 writeProgram(build_context &bc, RoseProgram &&program) { if (program.empty()) { @@ -2560,6 +2601,7 @@ u32 writeProgram(build_context &bc, RoseProgram &&program) { } recordResources(bc.resources, program); + recordLongLiterals(bc, program); u32 len = 0; auto prog_bytecode = writeProgram(bc.engine_blob, program, &len); @@ -4285,6 +4327,48 @@ void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, build_context &bc, program.add_before_end(make_unique(min_offset)); } +static +void makeCheckLiteralInstruction(const RoseBuildImpl &build, + const build_context &bc, u32 final_id, + RoseProgram &program) { + const auto &lits = build.final_id_to_literal.at(final_id); + if (lits.size() != 1) { + // Long literals should not share a final_id. + assert(all_of(begin(lits), end(lits), [&](u32 lit_id) { + const rose_literal_id &lit = build.literals.right.at(lit_id); + return lit.table != ROSE_FLOATING || + lit.s.length() <= bc.longLitLengthThreshold; + })); + return; + } + + u32 lit_id = *lits.begin(); + if (build.isDelayed(lit_id)) { + return; + } + + const rose_literal_id &lit = build.literals.right.at(lit_id); + if (lit.table != ROSE_FLOATING) { + return; + } + if (lit.s.length() <= bc.longLitLengthThreshold) { + return; + } + + // Check resource limits as well. + if (lit.s.length() > build.cc.grey.limitLiteralLength) { + throw ResourceLimitError(); + } + + unique_ptr ri; + if (lit.s.any_nocase()) { + ri = make_unique(lit.s.get_string()); + } else { + ri = make_unique(lit.s.get_string()); + } + program.add_before_end(move(ri)); +} + static bool hasDelayedLiteral(RoseBuildImpl &build, const vector &lit_edges) { @@ -4312,6 +4396,9 @@ RoseProgram buildLitInitialProgram(RoseBuildImpl &build, build_context &bc, DEBUG_PRINTF("final_id %u\n", final_id); + // Check long literal info. + makeCheckLiteralInstruction(build, bc, final_id, program); + // Check lit mask. makeCheckLitMaskInstruction(build, bc, final_id, program); @@ -4838,6 +4925,172 @@ u32 buildEagerQueueIter(const set &eager, u32 leftfixBeginQueue, return bc.engine_blob.add_iterator(iter); } +static +void allocateFinalIdToSet(RoseBuildImpl &build, const set &lits, + size_t longLitLengthThreshold, u32 *next_final_id) { + const auto &g = build.g; + auto &literal_info = build.literal_info; + auto &final_id_to_literal = build.final_id_to_literal; + + /* We can allocate the same final id to multiple literals of the same type + * if they share the same vertex set and trigger the same delayed literal + * ids and squash the same roles and have the same group squashing + * behaviour. Benefits literals cannot be merged. */ + + for (u32 int_id : lits) { + rose_literal_info &curr_info = literal_info[int_id]; + const rose_literal_id &lit = build.literals.right.at(int_id); + const auto &verts = curr_info.vertices; + + // Literals with benefits cannot be merged. + if (curr_info.requires_benefits) { + DEBUG_PRINTF("id %u has benefits\n", int_id); + goto assign_new_id; + } + + // Long literals (that require CHECK_LITERAL instructions) cannot be + // merged. + if (lit.s.length() > longLitLengthThreshold) { + DEBUG_PRINTF("id %u is a long literal\n", int_id); + goto assign_new_id; + } + + if (!verts.empty() && curr_info.delayed_ids.empty()) { + vector cand; + insert(&cand, cand.end(), g[*verts.begin()].literals); + for (auto v : verts) { + vector temp; + set_intersection(cand.begin(), cand.end(), + g[v].literals.begin(), + g[v].literals.end(), + inserter(temp, temp.end())); + cand.swap(temp); + } + + for (u32 cand_id : cand) { + if (cand_id >= int_id) { + break; + } + + const auto &cand_info = literal_info[cand_id]; + const auto &cand_lit = build.literals.right.at(cand_id); + + if (cand_lit.s.length() > longLitLengthThreshold) { + continue; + } + + if (cand_info.requires_benefits) { + continue; + } + + if (!cand_info.delayed_ids.empty()) { + /* TODO: allow cases where delayed ids are equivalent. + * This is awkward currently as the have not had their + * final ids allocated yet */ + continue; + } + + if (lits.find(cand_id) == lits.end() + || cand_info.vertices.size() != verts.size() + || cand_info.squash_group != curr_info.squash_group) { + continue; + } + + /* if we are squashing groups we need to check if they are the + * same group */ + if (cand_info.squash_group + && cand_info.group_mask != curr_info.group_mask) { + continue; + } + + u32 final_id = cand_info.final_id; + assert(final_id != MO_INVALID_IDX); + assert(curr_info.final_id == MO_INVALID_IDX); + curr_info.final_id = final_id; + final_id_to_literal[final_id].insert(int_id); + goto next_lit; + } + } + + assign_new_id: + /* oh well, have to give it a fresh one, hang the expense */ + DEBUG_PRINTF("allocating final id %u to %u\n", *next_final_id, int_id); + assert(curr_info.final_id == MO_INVALID_IDX); + curr_info.final_id = *next_final_id; + final_id_to_literal[*next_final_id].insert(int_id); + (*next_final_id)++; + next_lit:; + } +} + +static +bool isUsedLiteral(const RoseBuildImpl &build, u32 lit_id) { + assert(lit_id < build.literal_info.size()); + const auto &info = build.literal_info[lit_id]; + if (!info.vertices.empty()) { + return true; + } + + for (const u32 &delayed_id : info.delayed_ids) { + assert(delayed_id < build.literal_info.size()); + const rose_literal_info &delayed_info = build.literal_info[delayed_id]; + if (!delayed_info.vertices.empty()) { + return true; + } + } + + DEBUG_PRINTF("literal %u has no refs\n", lit_id); + return false; +} + +/** \brief Allocate final literal IDs for all literals. */ +static +void allocateFinalLiteralId(RoseBuildImpl &build, + size_t longLitLengthThreshold) { + set anch; + set norm; + set delay; + + /* undelayed ids come first */ + assert(build.final_id_to_literal.empty()); + u32 next_final_id = 0; + for (u32 i = 0; i < build.literal_info.size(); i++) { + assert(!build.hasFinalId(i)); + + if (!isUsedLiteral(build, i)) { + /* what is this literal good for? absolutely nothing */ + continue; + } + + // The special EOD event literal has its own program and does not need + // a real literal ID. + if (i == build.eod_event_literal_id) { + assert(build.eod_event_literal_id != MO_INVALID_IDX); + continue; + } + + if (build.isDelayed(i)) { + assert(!build.literal_info[i].requires_benefits); + delay.insert(i); + } else if (build.literals.right.at(i).table == ROSE_ANCHORED) { + anch.insert(i); + } else { + norm.insert(i); + } + } + + /* normal lits */ + allocateFinalIdToSet(build, norm, longLitLengthThreshold, &next_final_id); + + /* next anchored stuff */ + build.anchored_base_id = next_final_id; + allocateFinalIdToSet(build, anch, longLitLengthThreshold, &next_final_id); + + /* delayed ids come last */ + build.delay_base_id = next_final_id; + allocateFinalIdToSet(build, delay, longLitLengthThreshold, &next_final_id); +} + static aligned_unique_ptr addSmallWriteEngine(RoseBuildImpl &build, aligned_unique_ptr rose) { @@ -4873,16 +5126,89 @@ aligned_unique_ptr addSmallWriteEngine(RoseBuildImpl &build, return rose2; } +/** + * \brief Returns the pair (number of literals, max length) for all real + * literals in the floating table that are in-use. + */ +static +pair floatingCountAndMaxLen(const RoseBuildImpl &build) { + size_t num = 0; + size_t max_len = 0; + + for (const auto &e : build.literals.right) { + const u32 id = e.first; + const rose_literal_id &lit = e.second; + + if (lit.table != ROSE_FLOATING) { + continue; + } + if (lit.delay) { + // Skip delayed literals, so that we only count the undelayed + // version that ends up in the HWLM table. + continue; + } + if (!isUsedLiteral(build, id)) { + continue; + } + + num++; + max_len = max(max_len, lit.s.length()); + } + DEBUG_PRINTF("%zu floating literals with max_len=%zu\n", num, max_len); + return {num, max_len}; +} + +size_t calcLongLitThreshold(const RoseBuildImpl &build, + const size_t historyRequired) { + const auto &cc = build.cc; + + // In block mode, we should only use the long literal support for literals + // that cannot be handled by HWLM. + if (!cc.streaming) { + return HWLM_LITERAL_MAX_LEN; + } + + size_t longLitLengthThreshold = ROSE_LONG_LITERAL_THRESHOLD_MIN; + + // Expand to size of history we've already allocated. Note that we need N-1 + // bytes of history to match a literal of length N. + longLitLengthThreshold = max(longLitLengthThreshold, historyRequired + 1); + + // If we only have one literal, allow for a larger value in order to avoid + // building a long literal table for a trivial Noodle case that we could + // fit in history. + const auto num_len = floatingCountAndMaxLen(build); + if (num_len.first == 1) { + if (num_len.second > longLitLengthThreshold) { + DEBUG_PRINTF("expanding for single literal of length %zu\n", + num_len.second); + longLitLengthThreshold = num_len.second; + } + } + + // Clamp to max history available. + longLitLengthThreshold = + min(longLitLengthThreshold, size_t{cc.grey.maxHistoryAvailable} + 1); + + return longLitLengthThreshold; +} + aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { DerivedBoundaryReports dboundary(boundary); size_t historyRequired = calcHistoryRequired(); // Updated by HWLM. + size_t longLitLengthThreshold = calcLongLitThreshold(*this, + historyRequired); + DEBUG_PRINTF("longLitLengthThreshold=%zu\n", longLitLengthThreshold); + + allocateFinalLiteralId(*this, longLitLengthThreshold); auto anchored_dfas = buildAnchoredDfas(*this); build_context bc; bc.floatingMinLiteralMatchOffset = findMinFloatingLiteralMatch(*this, anchored_dfas); + bc.longLitLengthThreshold = longLitLengthThreshold; bc.needs_catchup = needsCatchup(*this, anchored_dfas); recordResources(bc.resources, *this); if (!anchored_dfas.empty()) { @@ -4944,6 +5270,11 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { u32 eodProgramOffset = writeEodProgram(*this, bc, eodNfaIterOffset); + size_t longLitStreamStateRequired = 0; + u32 longLitTableOffset = buildLongLiteralTable(*this, bc.engine_blob, + bc.longLiterals, longLitLengthThreshold, &historyRequired, + &longLitStreamStateRequired); + vector activeLeftIter; buildActiveLeftIter(leftInfoTable, activeLeftIter); @@ -4982,9 +5313,8 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { // Build floating HWLM matcher. rose_group fgroups = 0; size_t fsize = 0; - size_t floatingStreamStateRequired = 0; - auto ftable = buildFloatingMatcher(*this, &fgroups, &fsize, &historyRequired, - &floatingStreamStateRequired); + auto ftable = buildFloatingMatcher(*this, bc.longLitLengthThreshold, + &fgroups, &fsize, &historyRequired); u32 fmatcherOffset = 0; if (ftable) { currOffset = ROUNDUP_CL(currOffset); @@ -5057,7 +5387,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { memset(&stateOffsets, 0, sizeof(stateOffsets)); fillStateOffsets(*this, bc.numStates, anchorStateSize, activeArrayCount, activeLeftCount, laggedRoseCount, - floatingStreamStateRequired, historyRequired, + longLitStreamStateRequired, historyRequired, &stateOffsets); scatter_plan_raw state_scatter; @@ -5173,6 +5503,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->ematcherOffset = ematcherOffset; engine->sbmatcherOffset = sbmatcherOffset; engine->fmatcherOffset = fmatcherOffset; + engine->longLitTableOffset = longLitTableOffset; engine->amatcherMinWidth = findMinWidth(*this, ROSE_ANCHORED); engine->fmatcherMinWidth = findMinWidth(*this, ROSE_FLOATING); engine->eodmatcherMinWidth = findMinWidth(*this, ROSE_EOD_ANCHORED); @@ -5198,7 +5529,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->totalNumLiterals = verify_u32(literal_info.size()); engine->asize = verify_u32(asize); engine->ematcherRegionSize = ematcher_region_size; - engine->floatingStreamState = verify_u32(floatingStreamStateRequired); + engine->longLitStreamState = verify_u32(longLitStreamStateRequired); engine->boundary.reportEodOffset = boundary_out.reportEodOffset; engine->boundary.reportZeroOffset = boundary_out.reportZeroOffset; diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index c93f4eac..6b19549b 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -87,172 +87,6 @@ namespace ue2 { #define ANCHORED_REHOME_DEEP 25 #define ANCHORED_REHOME_SHORT_LEN 3 -#ifdef DEBUG -static UNUSED -void printLitInfo(const rose_literal_info &li, u32 id) { - DEBUG_PRINTF("lit_info %u\n", id); - DEBUG_PRINTF(" parent %u%s", li.undelayed_id, - li.delayed_ids.empty() ? "":", children:"); - for (u32 d_id : li.delayed_ids) { - printf(" %u", d_id); - } - printf("\n"); - DEBUG_PRINTF(" group %llu %s\n", li.group_mask, li.squash_group ? "s":""); -} -#endif - -static -void allocateFinalIdToSet(const RoseGraph &g, const set &lits, - deque *literal_info, - map > *final_id_to_literal, - u32 *next_final_id) { - /* We can allocate the same final id to multiple literals of the same type - * if they share the same vertex set and trigger the same delayed literal - * ids and squash the same roles and have the same group squashing - * behaviour. Benefits literals cannot be merged. */ - - for (u32 int_id : lits) { - rose_literal_info &curr_info = (*literal_info)[int_id]; - const auto &verts = curr_info.vertices; - - if (!verts.empty() && !curr_info.requires_benefits - && curr_info.delayed_ids.empty()) { - vector cand; - insert(&cand, cand.end(), g[*verts.begin()].literals); - for (auto v : verts) { - vector temp; - set_intersection(cand.begin(), cand.end(), - g[v].literals.begin(), - g[v].literals.end(), - inserter(temp, temp.end())); - cand.swap(temp); - } - - for (u32 cand_id : cand) { - if (cand_id >= int_id) { - break; - } - - const rose_literal_info &cand_info = (*literal_info)[cand_id]; - - if (cand_info.requires_benefits) { - continue; - } - - if (!cand_info.delayed_ids.empty()) { - /* TODO: allow cases where delayed ids are equivalent. - * This is awkward currently as the have not had their - * final ids allocated yet */ - continue; - } - - if (lits.find(cand_id) == lits.end() - || cand_info.vertices.size() != verts.size() - || cand_info.squash_group != curr_info.squash_group) { - continue; - } - - /* if we are squashing groups we need to check if they are the - * same group */ - if (cand_info.squash_group - && cand_info.group_mask != curr_info.group_mask) { - continue; - } - - u32 final_id = cand_info.final_id; - assert(final_id != MO_INVALID_IDX); - assert(curr_info.final_id == MO_INVALID_IDX); - curr_info.final_id = final_id; - (*final_id_to_literal)[final_id].insert(int_id); - goto next_lit; - } - } - - /* oh well, have to give it a fresh one, hang the expense */ - DEBUG_PRINTF("allocating final id %u to %u\n", *next_final_id, int_id); - assert(curr_info.final_id == MO_INVALID_IDX); - curr_info.final_id = *next_final_id; - (*final_id_to_literal)[*next_final_id].insert(int_id); - (*next_final_id)++; - next_lit:; - } -} - -static -bool isUsedLiteral(const RoseBuildImpl &build, u32 lit_id) { - assert(lit_id < build.literal_info.size()); - const auto &info = build.literal_info[lit_id]; - if (!info.vertices.empty()) { - return true; - } - - for (const u32 &delayed_id : info.delayed_ids) { - assert(delayed_id < build.literal_info.size()); - const rose_literal_info &delayed_info = build.literal_info[delayed_id]; - if (!delayed_info.vertices.empty()) { - return true; - } - } - - DEBUG_PRINTF("literal %u has no refs\n", lit_id); - return false; -} - -/** \brief Allocate final literal IDs for all literals. - * - * These are the literal ids used in the bytecode. - */ -static -void allocateFinalLiteralId(RoseBuildImpl &tbi) { - RoseGraph &g = tbi.g; - - set anch; - set norm; - set delay; - - /* undelayed ids come first */ - assert(tbi.final_id_to_literal.empty()); - u32 next_final_id = 0; - for (u32 i = 0; i < tbi.literal_info.size(); i++) { - assert(!tbi.hasFinalId(i)); - - if (!isUsedLiteral(tbi, i)) { - /* what is this literal good for? absolutely nothing */ - continue; - } - - // The special EOD event literal has its own program and does not need - // a real literal ID. - if (i == tbi.eod_event_literal_id) { - assert(tbi.eod_event_literal_id != MO_INVALID_IDX); - continue; - } - - if (tbi.isDelayed(i)) { - assert(!tbi.literal_info[i].requires_benefits); - delay.insert(i); - } else if (tbi.literals.right.at(i).table == ROSE_ANCHORED) { - anch.insert(i); - } else { - norm.insert(i); - } - } - - /* normal lits */ - allocateFinalIdToSet(g, norm, &tbi.literal_info, &tbi.final_id_to_literal, - &next_final_id); - - /* next anchored stuff */ - tbi.anchored_base_id = next_final_id; - allocateFinalIdToSet(g, anch, &tbi.literal_info, &tbi.final_id_to_literal, - &next_final_id); - - /* delayed ids come last */ - tbi.delay_base_id = next_final_id; - allocateFinalIdToSet(g, delay, &tbi.literal_info, &tbi.final_id_to_literal, - &next_final_id); -} - #define MAX_EXPLOSION_NC 3 static bool limited_explosion(const ue2_literal &s) { @@ -284,7 +118,12 @@ void RoseBuildImpl::handleMixedSensitivity(void) { continue; } - if (limited_explosion(lit.s)) { + // We don't want to explode long literals, as they require confirmation + // with a CHECK_LITERAL instruction and need unique final_ids. + // TODO: we could allow explosion for literals where the prefixes + // covered by CHECK_LITERAL are identical. + if (lit.s.length() <= ROSE_LONG_LITERAL_THRESHOLD_MIN && + limited_explosion(lit.s)) { DEBUG_PRINTF("need to explode existing string '%s'\n", dumpString(lit.s).c_str()); literal_info[id].requires_explode = true; @@ -1653,7 +1492,6 @@ aligned_unique_ptr RoseBuildImpl::buildRose(u32 minWidth) { /* final prep work */ remapCastleTops(*this); - allocateFinalLiteralId(*this); inspectRoseTops(*this); buildRoseSquashMasks(*this); diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index 5fb27c55..516548b3 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -442,20 +442,26 @@ void dumpTestLiterals(const string &filename, const vector &lits) { static void dumpRoseTestLiterals(const RoseBuildImpl &build, const string &base) { - auto lits = fillHamsterLiteralList(build, ROSE_ANCHORED); + size_t historyRequired = build.calcHistoryRequired(); + size_t longLitLengthThreshold = + calcLongLitThreshold(build, historyRequired); + + auto lits = fillHamsterLiteralList(build, ROSE_ANCHORED, + longLitLengthThreshold); dumpTestLiterals(base + "rose_anchored_test_literals.txt", lits); - lits = fillHamsterLiteralList(build, ROSE_FLOATING); + lits = fillHamsterLiteralList(build, ROSE_FLOATING, longLitLengthThreshold); dumpTestLiterals(base + "rose_float_test_literals.txt", lits); - lits = fillHamsterLiteralList(build, ROSE_EOD_ANCHORED); + lits = fillHamsterLiteralList(build, ROSE_EOD_ANCHORED, + build.ematcher_region_size); dumpTestLiterals(base + "rose_eod_test_literals.txt", lits); if (!build.cc.streaming) { lits = fillHamsterLiteralList(build, ROSE_FLOATING, - ROSE_SMALL_BLOCK_LEN); + ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); auto lits2 = fillHamsterLiteralList(build, ROSE_ANCHORED_SMALL_BLOCK, - ROSE_SMALL_BLOCK_LEN); + ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); lits.insert(end(lits), begin(lits2), end(lits2)); dumpTestLiterals(base + "rose_smallblock_test_literals.txt", lits); } diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index cc00603a..b3f986aa 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -56,6 +56,8 @@ namespace ue2 { #define ROSE_GROUPS_MAX 64 +#define ROSE_LONG_LITERAL_THRESHOLD_MIN 33 + struct BoundaryReports; struct CastleProto; struct CompileContext; @@ -603,6 +605,9 @@ private: ReportID next_nfa_report; }; +size_t calcLongLitThreshold(const RoseBuildImpl &build, + const size_t historyRequired); + // Free functions, in rose_build_misc.cpp bool hasAnchHistorySucc(const RoseGraph &g, RoseVertex v); diff --git a/src/rose/rose_build_long_lit.cpp b/src/rose/rose_build_long_lit.cpp new file mode 100644 index 00000000..c01bdc8f --- /dev/null +++ b/src/rose/rose_build_long_lit.cpp @@ -0,0 +1,348 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "rose_build_long_lit.h" + +#include "rose_build_engine_blob.h" +#include "rose_build_impl.h" +#include "stream_long_lit_hash.h" +#include "util/alloc.h" +#include "util/bitutils.h" +#include "util/verify_types.h" +#include "util/compile_context.h" + +using namespace std; + +namespace ue2 { + +/** \brief Minimum size for a non-empty hash table. */ +static constexpr u32 MIN_HASH_TABLE_SIZE = 4096; + +struct LongLitModeInfo { + u32 boundary = 0; //!< One above the largest index for this mode. + u32 positions = 0; //!< Total number of string positions. + u32 hashEntries = 0; //!< Number of hash table entries. +}; + +struct LongLitInfo { + LongLitModeInfo caseful; + LongLitModeInfo nocase; +}; + +static +u32 roundUpToPowerOfTwo(u32 x) { + assert(x != 0); + u32 bits = lg2(x - 1) + 1; + assert(bits < 32); + return 1U << bits; +} + +static +LongLitInfo analyzeLongLits(const vector &lits, + size_t max_len) { + LongLitInfo info; + u32 hashedPositionsCase = 0; + u32 hashedPositionsNocase = 0; + + // Caseful boundary is the index of the first nocase literal, as we're + // ordered (caseful, nocase). + auto first_nocase = find_if(begin(lits), end(lits), + [](const ue2_case_string &lit) { return lit.nocase; }); + info.caseful.boundary = verify_u32(distance(lits.begin(), first_nocase)); + + // Nocase boundary is the size of the literal set. + info.nocase.boundary = verify_u32(lits.size()); + + for (const auto &lit : lits) { + if (lit.nocase) { + hashedPositionsNocase += lit.s.size() - max_len; + info.nocase.positions += lit.s.size(); + } else { + hashedPositionsCase += lit.s.size() - max_len; + info.caseful.positions += lit.s.size(); + } + } + + info.caseful.hashEntries = hashedPositionsCase + ? roundUpToPowerOfTwo(max(MIN_HASH_TABLE_SIZE, hashedPositionsCase)) + : 0; + info.nocase.hashEntries = hashedPositionsNocase + ? roundUpToPowerOfTwo(max(MIN_HASH_TABLE_SIZE, hashedPositionsNocase)) + : 0; + + DEBUG_PRINTF("caseful: boundary=%u, positions=%u, hashedPositions=%u, " + "hashEntries=%u\n", + info.caseful.boundary, info.caseful.positions, + hashedPositionsCase, info.caseful.hashEntries); + DEBUG_PRINTF("nocase: boundary=%u, positions=%u, hashedPositions=%u, " + "hashEntries=%u\n", + info.nocase.boundary, info.nocase.positions, + hashedPositionsNocase, info.nocase.hashEntries); + + return info; +} + +static +void fillHashes(const vector &lits, size_t max_len, + RoseLongLitHashEntry *tab, size_t numEntries, bool nocase, + const map &litToOffsetVal) { + const u32 nbits = lg2(numEntries); + map>> bucketToLitOffPairs; + map bucketToBitfield; + + for (u32 lit_id = 0; lit_id < lits.size(); lit_id++) { + const ue2_case_string &lit = lits[lit_id]; + if (nocase != lit.nocase) { + continue; + } + for (u32 offset = 1; offset < lit.s.size() - max_len + 1; offset++) { + const u8 *substr = (const u8 *)lit.s.c_str() + offset; + u32 h = hashLongLiteral(substr, max_len, lit.nocase); + u32 h_ent = h & ((1U << nbits) - 1); + u32 h_low = (h >> nbits) & 63; + bucketToLitOffPairs[h_ent].emplace_back(lit_id, offset); + bucketToBitfield[h_ent] |= (1ULL << h_low); + } + } + + // this used to be a set, but a bitset is much much faster given that + // we're using it only for membership testing. + boost::dynamic_bitset<> filledBuckets(numEntries); // all zero by default. + + // sweep out bitfield entries and save the results swapped accordingly + // also, anything with bitfield entries is put in filledBuckets + for (const auto &m : bucketToBitfield) { + const u32 &bucket = m.first; + const u64a &contents = m.second; + tab[bucket].bitfield = contents; + filledBuckets.set(bucket); + } + + // store out all our chains based on free values in our hash table. + // find nearest free locations that are empty (there will always be more + // entries than strings, at present) + for (auto &m : bucketToLitOffPairs) { + u32 bucket = m.first; + deque> &d = m.second; + + // sort d by distance of the residual string (len minus our depth into + // the string). We need to put the 'furthest back' string first... + stable_sort(d.begin(), d.end(), + [](const pair &a, const pair &b) { + if (a.second != b.second) { + return a.second > b.second; /* longest is first */ + } + return a.first < b.first; + }); + + while (1) { + // first time through is always at bucket, then we fill in links + filledBuckets.set(bucket); + RoseLongLitHashEntry *ent = &tab[bucket]; + u32 lit_id = d.front().first; + u32 offset = d.front().second; + + ent->state = verify_u32(litToOffsetVal.at(lit_id) + + offset + max_len); + ent->link = (u32)LINK_INVALID; + + d.pop_front(); + if (d.empty()) { + break; + } + // now, if there is another value + // find a bucket for it and put in 'bucket' and repeat + // all we really need to do is find something not in filledBuckets, + // ideally something close to bucket + // we search backward and forward from bucket, trying to stay as + // close as possible. + UNUSED bool found = false; + int bucket_candidate = 0; + for (u32 k = 1; k < numEntries * 2; k++) { + bucket_candidate = bucket + (((k & 1) == 0) + ? (-(int)k / 2) : (k / 2)); + if (bucket_candidate < 0 || + (size_t)bucket_candidate >= numEntries) { + continue; + } + if (!filledBuckets.test(bucket_candidate)) { + found = true; + break; + } + } + + assert(found); + bucket = bucket_candidate; + ent->link = bucket; + } + } +} + +u32 buildLongLiteralTable(const RoseBuildImpl &build, RoseEngineBlob &blob, + vector &lits, + size_t longLitLengthThreshold, + size_t *historyRequired, + size_t *longLitStreamStateRequired) { + // Work in terms of history requirement (i.e. literal len - 1). + const size_t max_len = longLitLengthThreshold - 1; + + // We should only be building the long literal hash table in streaming mode. + if (!build.cc.streaming) { + return 0; + } + + if (lits.empty()) { + DEBUG_PRINTF("no long literals\n"); + return 0; + } + + // The last char of each literal is trimmed as we're not interested in full + // matches, only partial matches. + for (auto &lit : lits) { + assert(!lit.s.empty()); + lit.s.pop_back(); + } + + // Sort by caseful/caseless and in lexicographical order. + stable_sort(begin(lits), end(lits), [](const ue2_case_string &a, + const ue2_case_string &b) { + if (a.nocase != b.nocase) { + return a.nocase < b.nocase; + } + return a.s < b.s; + }); + + // Find literals that are prefixes of other literals (including + // duplicates). Note that we iterate in reverse, since we want to retain + // only the longest string from a set of prefixes. + auto it = unique(lits.rbegin(), lits.rend(), [](const ue2_case_string &a, + const ue2_case_string &b) { + return a.nocase == b.nocase && a.s.size() >= b.s.size() && + equal(b.s.begin(), b.s.end(), a.s.begin()); + }); + + // Erase dupes found by unique(). + lits.erase(lits.begin(), it.base()); + + LongLitInfo info = analyzeLongLits(lits, max_len); + + // first assess the size and find our caseless threshold + size_t headerSize = ROUNDUP_16(sizeof(RoseLongLitTable)); + + size_t litTabOffset = headerSize; + + size_t litTabNumEntries = lits.size() + 1; + size_t litTabSize = ROUNDUP_16(litTabNumEntries * sizeof(RoseLongLiteral)); + + size_t wholeLitTabOffset = litTabOffset + litTabSize; + size_t totalWholeLitTabSize = + ROUNDUP_16(info.caseful.positions + info.nocase.positions); + + size_t htOffsetCase = wholeLitTabOffset + totalWholeLitTabSize; + size_t htSizeCase = info.caseful.hashEntries * sizeof(RoseLongLitHashEntry); + size_t htOffsetNocase = htOffsetCase + htSizeCase; + size_t htSizeNocase = + info.nocase.hashEntries * sizeof(RoseLongLitHashEntry); + + size_t tabSize = ROUNDUP_16(htOffsetNocase + htSizeNocase); + + // need to add +2 to both of these to allow space for the actual largest + // value as well as handling the fact that we add one to the space when + // storing out a position to allow zero to mean "no stream state value" + u8 streamBitsCase = lg2(roundUpToPowerOfTwo(info.caseful.positions + 2)); + u8 streamBitsNocase = lg2(roundUpToPowerOfTwo(info.nocase.positions + 2)); + u32 tot_state_bytes = ROUNDUP_N(streamBitsCase + streamBitsNocase, 8) / 8; + + auto table = aligned_zmalloc_unique(tabSize); + assert(table); // otherwise would have thrown std::bad_alloc + + // then fill it in + char *ptr = table.get(); + RoseLongLitTable *header = (RoseLongLitTable *)ptr; + // fill in header + header->maxLen = verify_u8(max_len); // u8 so doesn't matter; won't go > 255 + header->boundaryCase = info.caseful.boundary; + header->hashOffsetCase = verify_u32(htOffsetCase); + header->hashNBitsCase = lg2(info.caseful.hashEntries); + header->streamStateBitsCase = streamBitsCase; + header->boundaryNocase = info.nocase.boundary; + header->hashOffsetNocase = verify_u32(htOffsetNocase); + header->hashNBitsNocase = lg2(info.nocase.hashEntries); + header->streamStateBitsNocase = streamBitsNocase; + assert(tot_state_bytes < sizeof(u64a)); + header->streamStateBytes = verify_u8(tot_state_bytes); // u8 + + ptr += headerSize; + + // now fill in the rest + + RoseLongLiteral *litTabPtr = (RoseLongLiteral *)ptr; + ptr += litTabSize; + + map litToOffsetVal; + for (auto i = lits.begin(), e = lits.end(); i != e; ++i) { + u32 entry = verify_u32(i - lits.begin()); + u32 offset = verify_u32(ptr - table.get()); + + // point the table entry to the string location + litTabPtr[entry].offset = offset; + + litToOffsetVal[entry] = offset; + + // copy the string into the string location + const auto &s = i->s; + memcpy(ptr, s.c_str(), s.size()); + + ptr += s.size(); // and the string location + } + + // fill in final lit table entry with current ptr (serves as end value) + litTabPtr[lits.size()].offset = verify_u32(ptr - table.get()); + + // fill hash tables + ptr = table.get() + htOffsetCase; + fillHashes(lits, max_len, (RoseLongLitHashEntry *)ptr, + info.caseful.hashEntries, false, litToOffsetVal); + ptr += htSizeCase; + fillHashes(lits, max_len, (RoseLongLitHashEntry *)ptr, + info.nocase.hashEntries, true, litToOffsetVal); + ptr += htSizeNocase; + + assert(ptr <= table.get() + tabSize); + + DEBUG_PRINTF("built streaming table, size=%zu\n", tabSize); + DEBUG_PRINTF("requires %zu bytes of history\n", max_len); + DEBUG_PRINTF("requires %u bytes of stream state\n", tot_state_bytes); + + *historyRequired = max(*historyRequired, max_len); + *longLitStreamStateRequired = tot_state_bytes; + + return blob.add(table.get(), tabSize, 16); +} + +} // namespace ue2 diff --git a/src/rose/rose_build_long_lit.h b/src/rose/rose_build_long_lit.h new file mode 100644 index 00000000..a77b1b69 --- /dev/null +++ b/src/rose/rose_build_long_lit.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ROSE_BUILD_LONG_LIT_H +#define ROSE_BUILD_LONG_LIT_H + +#include "ue2common.h" + +#include + +namespace ue2 { + +class RoseBuildImpl; +class RoseEngineBlob; +struct ue2_case_string; + +u32 buildLongLiteralTable(const RoseBuildImpl &build, RoseEngineBlob &blob, + std::vector &lits, + size_t longLitLengthThreshold, + size_t *historyRequired, + size_t *longLitStreamStateRequired); + +} // namespace ue2 + + +#endif // ROSE_BUILD_LONG_LIT_H diff --git a/src/rose/rose_build_matchers.cpp b/src/rose/rose_build_matchers.cpp index 2eb70f60..522ff6b6 100644 --- a/src/rose/rose_build_matchers.cpp +++ b/src/rose/rose_build_matchers.cpp @@ -485,7 +485,7 @@ bool isNoRunsVertex(const RoseBuildImpl &build, RoseVertex u) { static bool isNoRunsLiteral(const RoseBuildImpl &build, const u32 id, - const rose_literal_info &info) { + const rose_literal_info &info, const size_t max_len) { DEBUG_PRINTF("lit id %u\n", id); if (info.requires_benefits) { @@ -493,6 +493,11 @@ bool isNoRunsLiteral(const RoseBuildImpl &build, const u32 id, return false; } + if (build.literals.right.at(id).s.length() > max_len) { + DEBUG_PRINTF("requires literal check\n"); + return false; + } + if (isDirectHighlander(build, id, info)) { DEBUG_PRINTF("highlander direct report\n"); return true; @@ -625,7 +630,7 @@ u64a literalMinReportOffset(const RoseBuildImpl &build, vector fillHamsterLiteralList(const RoseBuildImpl &build, rose_literal_table table, - u32 max_offset) { + size_t max_len, u32 max_offset) { vector lits; for (const auto &e : build.literals.right) { @@ -663,10 +668,14 @@ vector fillHamsterLiteralList(const RoseBuildImpl &build, const vector &msk = e.second.msk; const vector &cmp = e.second.cmp; - bool noruns = isNoRunsLiteral(build, id, info); + bool noruns = isNoRunsLiteral(build, id, info, max_len); if (info.requires_explode) { DEBUG_PRINTF("exploding lit\n"); + + // We do not require_explode for long literals. + assert(lit.length() <= max_len); + case_iter cit = caseIterateBegin(lit); case_iter cite = caseIterateEnd(); for (; cit != cite; ++cit) { @@ -687,20 +696,28 @@ vector fillHamsterLiteralList(const RoseBuildImpl &build, msk, cmp); } } else { - const std::string &s = lit.get_string(); - const bool nocase = lit.any_nocase(); + string s = lit.get_string(); + bool nocase = lit.any_nocase(); DEBUG_PRINTF("id=%u, s='%s', nocase=%d, noruns=%d, msk=%s, " "cmp=%s\n", final_id, escapeString(s).c_str(), (int)nocase, noruns, dumpMask(msk).c_str(), dumpMask(cmp).c_str()); + if (s.length() > max_len) { + DEBUG_PRINTF("truncating to tail of length %zu\n", max_len); + s.erase(0, s.length() - max_len); + // We shouldn't have set a threshold below 8 chars. + assert(msk.size() <= max_len); + } + if (!maskIsConsistent(s, nocase, msk, cmp)) { DEBUG_PRINTF("msk/cmp for literal can't match, skipping\n"); continue; } - lits.emplace_back(s, nocase, noruns, final_id, groups, msk, cmp); + lits.emplace_back(move(s), nocase, noruns, final_id, groups, msk, + cmp); } } @@ -708,14 +725,15 @@ vector fillHamsterLiteralList(const RoseBuildImpl &build, } aligned_unique_ptr buildFloatingMatcher(const RoseBuildImpl &build, + size_t longLitLengthThreshold, rose_group *fgroups, size_t *fsize, - size_t *historyRequired, - size_t *streamStateRequired) { + size_t *historyRequired) { *fsize = 0; *fgroups = 0; - auto fl = fillHamsterLiteralList(build, ROSE_FLOATING); + auto fl = fillHamsterLiteralList(build, ROSE_FLOATING, + longLitLengthThreshold); if (fl.empty()) { DEBUG_PRINTF("empty floating matcher\n"); return nullptr; @@ -747,13 +765,10 @@ aligned_unique_ptr buildFloatingMatcher(const RoseBuildImpl &build, if (build.cc.streaming) { DEBUG_PRINTF("literal_history_required=%zu\n", ctl.literal_history_required); - DEBUG_PRINTF("literal_stream_state_required=%zu\n", - ctl.literal_stream_state_required); assert(ctl.literal_history_required <= build.cc.grey.maxHistoryAvailable); *historyRequired = max(*historyRequired, ctl.literal_history_required); - *streamStateRequired = ctl.literal_stream_state_required; } *fsize = hwlmSize(ftable.get()); @@ -778,8 +793,8 @@ aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, return nullptr; } - auto lits = fillHamsterLiteralList(build, ROSE_FLOATING, - ROSE_SMALL_BLOCK_LEN); + auto lits = fillHamsterLiteralList( + build, ROSE_FLOATING, ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); if (lits.empty()) { DEBUG_PRINTF("no floating table\n"); return nullptr; @@ -788,8 +803,9 @@ aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, return nullptr; } - auto anchored_lits = fillHamsterLiteralList(build, - ROSE_ANCHORED_SMALL_BLOCK, ROSE_SMALL_BLOCK_LEN); + auto anchored_lits = + fillHamsterLiteralList(build, ROSE_ANCHORED_SMALL_BLOCK, + ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); if (anchored_lits.empty()) { DEBUG_PRINTF("no small-block anchored literals\n"); return nullptr; @@ -823,7 +839,8 @@ aligned_unique_ptr buildEodAnchoredMatcher(const RoseBuildImpl &build, size_t *esize) { *esize = 0; - auto el = fillHamsterLiteralList(build, ROSE_EOD_ANCHORED); + auto el = fillHamsterLiteralList(build, ROSE_EOD_ANCHORED, + build.ematcher_region_size); if (el.empty()) { DEBUG_PRINTF("no eod anchored literals\n"); diff --git a/src/rose/rose_build_matchers.h b/src/rose/rose_build_matchers.h index 2a225bf5..a25dbca3 100644 --- a/src/rose/rose_build_matchers.h +++ b/src/rose/rose_build_matchers.h @@ -51,13 +51,14 @@ struct hwlmLiteral; * only lead to a pattern match after max_offset may be excluded. */ std::vector fillHamsterLiteralList(const RoseBuildImpl &build, - rose_literal_table table, u32 max_offset = ROSE_BOUND_INF); + rose_literal_table table, size_t max_len, + u32 max_offset = ROSE_BOUND_INF); aligned_unique_ptr buildFloatingMatcher(const RoseBuildImpl &build, + size_t longLitLengthThreshold, rose_group *fgroups, size_t *fsize, - size_t *historyRequired, - size_t *streamStateRequired); + size_t *historyRequired); aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, size_t *sbsize); diff --git a/src/rose/rose_build_program.cpp b/src/rose/rose_build_program.cpp index fc157b88..ee237639 100644 --- a/src/rose/rose_build_program.cpp +++ b/src/rose/rose_build_program.cpp @@ -495,6 +495,24 @@ void RoseInstrEnginesEod::write(void *dest, RoseEngineBlob &blob, inst->iter_offset = iter_offset; } +void RoseInstrCheckLongLit::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + assert(!literal.empty()); + inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1); + inst->lit_length = verify_u32(literal.size()); +} + +void RoseInstrCheckLongLitNocase::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + assert(!literal.empty()); + inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1); + inst->lit_length = verify_u32(literal.size()); +} + static OffsetMap makeOffsetMap(const RoseProgram &program, u32 *total_len) { OffsetMap offset_map; diff --git a/src/rose/rose_build_program.h b/src/rose/rose_build_program.h index c76456cc..0c725b46 100644 --- a/src/rose/rose_build_program.h +++ b/src/rose/rose_build_program.h @@ -37,6 +37,7 @@ #include "util/hash.h" #include "util/make_unique.h" #include "util/ue2_containers.h" +#include "util/ue2string.h" #include #include @@ -1721,6 +1722,62 @@ public: ~RoseInstrMatcherEod() override; }; +class RoseInstrCheckLongLit + : public RoseInstrBaseNoTargets { +public: + std::string literal; + + RoseInstrCheckLongLit(std::string literal_in) + : literal(std::move(literal_in)) {} + + bool operator==(const RoseInstrCheckLongLit &ri) const { + return literal == ri.literal; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), literal); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckLongLit &ri, const OffsetMap &, + const OffsetMap &) const { + return literal == ri.literal; + } +}; + +class RoseInstrCheckLongLitNocase + : public RoseInstrBaseNoTargets { +public: + std::string literal; + + RoseInstrCheckLongLitNocase(std::string literal_in) + : literal(std::move(literal_in)) { + upperString(literal); + } + + bool operator==(const RoseInstrCheckLongLitNocase &ri) const { + return literal == ri.literal; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), literal); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckLongLitNocase &ri, const OffsetMap &, + const OffsetMap &) const { + return literal == ri.literal; + } +}; + class RoseInstrEnd : public RoseInstrBaseTrivial { diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index 4a0d297e..9a0bd28c 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -610,6 +610,24 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { PROGRAM_CASE(MATCHER_EOD) {} PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_LONG_LIT) { + os << " lit_offset " << ri->lit_offset << endl; + os << " lit_length " << ri->lit_length << endl; + const char *lit = (const char *)t + ri->lit_offset; + os << " literal: \"" + << escapeString(string(lit, ri->lit_length)) << "\"" << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_LONG_LIT_NOCASE) { + os << " lit_offset " << ri->lit_offset << endl; + os << " lit_length " << ri->lit_length << endl; + const char *lit = (const char *)t + ri->lit_offset; + os << " literal: \"" + << escapeString(string(lit, ri->lit_length)) << "\"" << endl; + } + PROGRAM_NEXT_INSTRUCTION + default: os << " UNKNOWN (code " << int{code} << ")" << endl; os << " " << endl; @@ -1031,6 +1049,32 @@ void dumpAnchoredStats(const void *atable, FILE *f) { } +static +void dumpLongLiteralTable(const RoseEngine *t, FILE *f) { + if (!t->longLitTableOffset) { + return; + } + + fprintf(f, "\n"); + fprintf(f, "Long literal table (streaming):\n"); + + const auto *ll_table = + (const struct RoseLongLitTable *)loadFromByteCodeOffset( + t, t->longLitTableOffset); + + u32 num_caseful = ll_table->boundaryCase; + u32 num_caseless = ll_table->boundaryNocase - num_caseful; + + fprintf(f, " longest len: %u\n", ll_table->maxLen); + fprintf(f, " counts: %u caseful, %u caseless\n", num_caseful, + num_caseless); + fprintf(f, " hash bits: %u caseful, %u caseless\n", + ll_table->hashNBitsCase, ll_table->hashNBitsNocase); + fprintf(f, " state bits: %u caseful, %u caseless\n", + ll_table->streamStateBitsCase, ll_table->streamStateBitsNocase); + fprintf(f, " stream state: %u bytes\n", ll_table->streamStateBytes); +} + // Externally accessible functions void roseDumpText(const RoseEngine *t, FILE *f) { @@ -1106,7 +1150,7 @@ void roseDumpText(const RoseEngine *t, FILE *f) { fprintf(f, " - history buffer : %u bytes\n", t->historyRequired); fprintf(f, " - exhaustion vector : %u bytes\n", (t->ekeyCount + 7) / 8); fprintf(f, " - role state mmbit : %u bytes\n", t->stateSize); - fprintf(f, " - floating matcher : %u bytes\n", t->floatingStreamState); + fprintf(f, " - long lit matcher : %u bytes\n", t->longLitStreamState); fprintf(f, " - active array : %u bytes\n", mmbit_size(t->activeArrayCount)); fprintf(f, " - active rose : %u bytes\n", @@ -1160,6 +1204,8 @@ void roseDumpText(const RoseEngine *t, FILE *f) { fprintf(f, "\nSmall-block literal matcher stats:\n\n"); hwlmPrintStats(sbtable, f); } + + dumpLongLiteralTable(t, f); } #define DUMP_U8(o, member) \ @@ -1196,6 +1242,7 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, ematcherOffset); DUMP_U32(t, fmatcherOffset); DUMP_U32(t, sbmatcherOffset); + DUMP_U32(t, longLitTableOffset); DUMP_U32(t, amatcherMinWidth); DUMP_U32(t, fmatcherMinWidth); DUMP_U32(t, eodmatcherMinWidth); @@ -1245,7 +1292,7 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, stateOffsets.anchorState); DUMP_U32(t, stateOffsets.groups); DUMP_U32(t, stateOffsets.groups_size); - DUMP_U32(t, stateOffsets.floatingMatcherState); + DUMP_U32(t, stateOffsets.longLitState); DUMP_U32(t, stateOffsets.somLocation); DUMP_U32(t, stateOffsets.somValid); DUMP_U32(t, stateOffsets.somWritable); @@ -1264,7 +1311,7 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, ematcherRegionSize); DUMP_U32(t, somRevCount); DUMP_U32(t, somRevOffsetOffset); - DUMP_U32(t, floatingStreamState); + DUMP_U32(t, longLitStreamState); fprintf(f, "}\n"); fprintf(f, "sizeof(RoseEngine) = %zu\n", sizeof(RoseEngine)); } diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index 51913984..32805ab3 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -217,8 +217,8 @@ struct RoseStateOffsets { /** Size of packed Rose groups value, in bytes. */ u32 groups_size; - /** State for floating literal matcher (managed by HWLM). */ - u32 floatingMatcherState; + /** State for long literal support. */ + u32 longLitState; /** Packed SOM location slots. */ u32 somLocation; @@ -325,6 +325,7 @@ struct RoseEngine { u32 ematcherOffset; // offset of the eod-anchored literal matcher (bytes) u32 fmatcherOffset; // offset of the floating literal matcher (bytes) u32 sbmatcherOffset; // offset of the small-block literal matcher (bytes) + u32 longLitTableOffset; // offset of the long literal table u32 amatcherMinWidth; /**< minimum number of bytes required for a pattern * involved with the anchored table to produce a full * match. */ @@ -434,7 +435,7 @@ struct RoseEngine { u32 ematcherRegionSize; /* max region size to pass to ematcher */ u32 somRevCount; /**< number of som reverse nfas */ u32 somRevOffsetOffset; /**< offset to array of offsets to som rev nfas */ - u32 floatingStreamState; // size in bytes + u32 longLitStreamState; // size in bytes struct scatter_full_plan state_init; }; @@ -445,6 +446,94 @@ struct ALIGN_CL_DIRECTIVE anchored_matcher_info { u32 anchoredMinDistance; /* start of region to run anchored table over */ }; +/** + * \brief Long literal table header. + */ +struct RoseLongLitTable { + /** \brief String ID one beyond the maximum entry for caseful literals. */ + u32 boundaryCase; + + /** + * \brief String ID one beyond the maximum entry for caseless literals. + * This is also the total size of the literal table. + */ + u32 boundaryNocase; + + /** + * \brief Offset of the caseful hash table (relative to RoseLongLitTable + * base). + * + * Offset is zero if no such table exists. + */ + u32 hashOffsetCase; + + /** + * \brief Offset of the caseless hash table (relative to RoseLongLitTable + * base). + * + * Offset is zero if no such table exists. + */ + u32 hashOffsetNocase; + + /** \brief lg2 of the size of the caseful hash table. */ + u32 hashNBitsCase; + + /** \brief lg2 of the size of the caseless hash table. */ + u32 hashNBitsNocase; + + /** + * \brief Number of bits of packed stream state for the caseful hash table. + */ + u8 streamStateBitsCase; + + /** + * \brief Number of bits of packed stream state for the caseless hash + * table. + */ + u8 streamStateBitsNocase; + + /** \brief Total size of packed stream state in bytes. */ + u8 streamStateBytes; + + /** \brief Max length of literal prefixes. */ + u8 maxLen; +}; + +/** + * \brief One of these structures per literal entry in our long literal table. + */ +struct RoseLongLiteral { + /** + * \brief Offset of the literal string itself, relative to + * RoseLongLitTable base. + */ + u32 offset; +}; + +/** \brief "No further links" value used for \ref RoseLongLitHashEntry::link. */ +#define LINK_INVALID 0xffffffff + +/** + * \brief One of these structures per hash table entry in our long literal + * table. + */ +struct RoseLongLitHashEntry { + /** + * \brief Bitfield used as a quick guard for hash buckets. + * + * For a given hash value N, the low six bits of N are taken and the + * corresponding bit is switched on in this bitfield if this bucket is used + * for that hash. + */ + u64a bitfield; + + /** \brief Offset in the literal table for this string. */ + u32 state; + + /** \brief Hash table index of next entry in the chain for this bucket. */ + u32 link; +}; + static really_inline const struct anchored_matcher_info *getALiteralMatcher( const struct RoseEngine *t) { diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index 4714960c..ed913316 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -117,7 +117,19 @@ enum RoseInstructionCode { /** \brief Run the EOD-anchored HWLM literal matcher. */ ROSE_INSTR_MATCHER_EOD, - LAST_ROSE_INSTRUCTION = ROSE_INSTR_MATCHER_EOD //!< Sentinel. + /** + * \brief Confirm a case-sensitive literal at the current offset. In + * streaming mode, this makes use of the long literal table. + */ + ROSE_INSTR_CHECK_LONG_LIT, + + /** + * \brief Confirm a case-insensitive literal at the current offset. In + * streaming mode, this makes use of the long literal table. + */ + ROSE_INSTR_CHECK_LONG_LIT_NOCASE, + + LAST_ROSE_INSTRUCTION = ROSE_INSTR_CHECK_LONG_LIT_NOCASE //!< Sentinel. }; struct ROSE_STRUCT_END { @@ -465,4 +477,18 @@ struct ROSE_STRUCT_MATCHER_EOD { u8 code; //!< From enum RoseInstructionCode. }; +/** Note: check failure will halt program. */ +struct ROSE_STRUCT_CHECK_LONG_LIT { + u8 code; //!< From enum RoseInstructionCode. + u32 lit_offset; //!< Offset of literal string. + u32 lit_length; //!< Length of literal string. +}; + +/** Note: check failure will halt program. */ +struct ROSE_STRUCT_CHECK_LONG_LIT_NOCASE { + u8 code; //!< From enum RoseInstructionCode. + u32 lit_offset; //!< Offset of literal string. + u32 lit_length; //!< Length of literal string. +}; + #endif // ROSE_ROSE_PROGRAM_H diff --git a/src/rose/runtime.h b/src/rose/runtime.h index 60c7d34b..d2a4b5d7 100644 --- a/src/rose/runtime.h +++ b/src/rose/runtime.h @@ -97,8 +97,8 @@ void storeGroups(const struct RoseEngine *t, char *state, rose_group groups) { } static really_inline -u8 *getFloatingMatcherState(const struct RoseEngine *t, char *state) { - return (u8 *)(state + t->stateOffsets.floatingMatcherState); +u8 *getLongLitState(const struct RoseEngine *t, char *state) { + return (u8 *)(state + t->stateOffsets.longLitState); } static really_inline diff --git a/src/rose/stream.c b/src/rose/stream.c index b934f98f..72286b4b 100644 --- a/src/rose/stream.c +++ b/src/rose/stream.c @@ -33,6 +33,8 @@ #include "miracle.h" #include "program_runtime.h" #include "rose.h" +#include "rose_internal.h" +#include "stream_long_lit.h" #include "hwlm/hwlm.h" #include "nfa/mcclellan.h" #include "nfa/nfa_api.h" @@ -406,6 +408,7 @@ void ensureStreamNeatAndTidy(const struct RoseEngine *t, char *state, roseFlushLastByteHistory(t, scratch, offset + length); tctxt->lastEndOffset = offset + length; storeGroups(t, state, tctxt->groups); + storeLongLiteralState(t, state, scratch); } static really_inline @@ -588,11 +591,17 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) { } size_t hlength = scratch->core_info.hlen; + char rebuild = 0; + + if (hlength) { + // Can only have long literal state or rebuild if this is not the + // first write to this stream. + loadLongLiteralState(t, state, scratch); + rebuild = (scratch->core_info.status & STATUS_DELAY_DIRTY) && + (t->maxFloatingDelayedMatch == ROSE_BOUND_INF || + offset < t->maxFloatingDelayedMatch); + } - char rebuild = hlength && - (scratch->core_info.status & STATUS_DELAY_DIRTY) && - (t->maxFloatingDelayedMatch == ROSE_BOUND_INF || - offset < t->maxFloatingDelayedMatch); DEBUG_PRINTF("**rebuild %hhd status %hhu mfdm %u, offset %llu\n", rebuild, scratch->core_info.status, t->maxFloatingDelayedMatch, offset); @@ -621,17 +630,9 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) { } DEBUG_PRINTF("start=%zu\n", start); - u8 *stream_state; - if (t->floatingStreamState) { - stream_state = getFloatingMatcherState(t, state); - } else { - stream_state = NULL; - } - DEBUG_PRINTF("BEGIN FLOATING (over %zu/%zu)\n", flen, length); hwlmExecStreaming(ftable, scratch, flen, start, roseFloatingCallback, - scratch, tctxt->groups & t->floating_group_mask, - stream_state); + scratch, tctxt->groups & t->floating_group_mask); } flush_delay_and_exit: diff --git a/src/rose/stream_long_lit.h b/src/rose/stream_long_lit.h new file mode 100644 index 00000000..676544d7 --- /dev/null +++ b/src/rose/stream_long_lit.h @@ -0,0 +1,434 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef STREAM_LONG_LIT_H +#define STREAM_LONG_LIT_H + +#include "rose.h" +#include "rose_common.h" +#include "rose_internal.h" +#include "stream_long_lit_hash.h" +#include "util/copybytes.h" + +static really_inline +const struct RoseLongLiteral * +getLitTab(const struct RoseLongLitTable *ll_table) { + return (const struct RoseLongLiteral *)((const char *)ll_table + + ROUNDUP_16(sizeof(struct RoseLongLitTable))); +} + +static really_inline +u32 get_start_lit_idx(const struct RoseLongLitTable *ll_table, + const char nocase) { + return nocase ? ll_table->boundaryCase : 0; +} + +static really_inline +u32 get_end_lit_idx(const struct RoseLongLitTable *ll_table, + const char nocase) { + return nocase ? ll_table->boundaryNocase : ll_table->boundaryCase; +} + +// search for the literal index that contains the current state +static rose_inline +u32 findLitTabEntry(const struct RoseLongLitTable *ll_table, + u32 stateValue, const char nocase) { + const struct RoseLongLiteral *litTab = getLitTab(ll_table); + u32 lo = get_start_lit_idx(ll_table, nocase); + u32 hi = get_end_lit_idx(ll_table, nocase); + + // Now move stateValue back by one so that we're looking for the + // litTab entry that includes it the string, not the one 'one past' it + stateValue -= 1; + assert(lo != hi); + assert(litTab[lo].offset <= stateValue); + assert(litTab[hi].offset > stateValue); + + // binary search to find the entry e such that: + // litTab[e].offsetToLiteral <= stateValue < litTab[e+1].offsetToLiteral + while (lo + 1 < hi) { + u32 mid = (lo + hi) / 2; + if (litTab[mid].offset <= stateValue) { + lo = mid; + } else { // (litTab[mid].offset > stateValue) { + hi = mid; + } + } + assert(litTab[lo].offset <= stateValue); + assert(litTab[hi].offset > stateValue); + return lo; +} + +// Reads from stream state and unpacks values into stream state table. +static really_inline +void loadLongLitStreamState(const struct RoseLongLitTable *ll_table, + const u8 *ll_state, u32 *state_case, + u32 *state_nocase) { + assert(ll_table); + assert(ll_state); + assert(state_case && state_nocase); + + u8 ss_bytes = ll_table->streamStateBytes; + u8 ssb = ll_table->streamStateBitsCase; + UNUSED u8 ssb_nc = ll_table->streamStateBitsNocase; + assert(ss_bytes == (ssb + ssb_nc + 7) / 8); + +#if defined(ARCH_32_BIT) + // On 32-bit hosts, we may be able to avoid having to do any u64a + // manipulation at all. + if (ss_bytes <= 4) { + u32 ssb_mask = (1U << ssb) - 1; + u32 streamVal = partial_load_u32(ll_state, ss_bytes); + *state_case = (u32)(streamVal & ssb_mask); + *state_nocase = (u32)(streamVal >> ssb); + return; + } +#endif + + u64a ssb_mask = (1ULL << ssb) - 1; + u64a streamVal = partial_load_u64a(ll_state, ss_bytes); + *state_case = (u32)(streamVal & ssb_mask); + *state_nocase = (u32)(streamVal >> ssb); +} + +static really_inline +u32 getBaseOffsetOfLits(const struct RoseLongLitTable *ll_table, + const char nocase) { + u32 lit_idx = get_start_lit_idx(ll_table, nocase); + return getLitTab(ll_table)[lit_idx].offset; +} + +static really_inline +u32 unpackStateVal(const struct RoseLongLitTable *ll_table, const char nocase, + u32 v) { + return v + getBaseOffsetOfLits(ll_table, nocase) - 1; +} + +static really_inline +u32 packStateVal(const struct RoseLongLitTable *ll_table, const char nocase, + u32 v) { + return v - getBaseOffsetOfLits(ll_table, nocase) + 1; +} + +static rose_inline +void loadLongLiteralStateMode(struct hs_scratch *scratch, + const struct RoseLongLitTable *ll_table, + const struct RoseLongLiteral *litTab, + const u32 state, const char nocase) { + if (!state) { + DEBUG_PRINTF("no state for %s\n", nocase ? "caseless" : "caseful"); + return; + } + + u32 stateValue = unpackStateVal(ll_table, nocase, state); + u32 idx = findLitTabEntry(ll_table, stateValue, nocase); + size_t found_offset = litTab[idx].offset; + const u8 *found_buf = found_offset + (const u8 *)ll_table; + size_t found_sz = stateValue - found_offset; + + struct RoseContext *tctxt = &scratch->tctxt; + if (nocase) { + tctxt->ll_buf_nocase = found_buf; + tctxt->ll_len_nocase = found_sz; + } else { + tctxt->ll_buf = found_buf; + tctxt->ll_len = found_sz; + } +} + +static rose_inline +void loadLongLiteralState(const struct RoseEngine *t, char *state, + struct hs_scratch *scratch) { + if (!t->longLitTableOffset) { + return; + } + + scratch->tctxt.ll_buf = scratch->core_info.hbuf; + scratch->tctxt.ll_len = scratch->core_info.hlen; + scratch->tctxt.ll_buf_nocase = scratch->core_info.hbuf; + scratch->tctxt.ll_len_nocase = scratch->core_info.hlen; + + const struct RoseLongLitTable *ll_table = + getByOffset(t, t->longLitTableOffset); + const struct RoseLongLiteral *litTab = getLitTab(ll_table); + const u8 *ll_state = getLongLitState(t, state); + + u32 state_case; + u32 state_nocase; + loadLongLitStreamState(ll_table, ll_state, &state_case, &state_nocase); + + loadLongLiteralStateMode(scratch, ll_table, litTab, state_case, 0); + loadLongLiteralStateMode(scratch, ll_table, litTab, state_nocase, 1); +} + +static rose_inline +char confirmLongLiteral(const struct RoseLongLitTable *ll_table, + const hs_scratch_t *scratch, u32 hashState, + const char nocase) { + const struct RoseLongLiteral *litTab = getLitTab(ll_table); + u32 idx = findLitTabEntry(ll_table, hashState, nocase); + size_t found_offset = litTab[idx].offset; + const u8 *s = found_offset + (const u8 *)ll_table; + assert(hashState > found_offset); + size_t len = hashState - found_offset; + const u8 *buf = scratch->core_info.buf; + const size_t buf_len = scratch->core_info.len; + + if (len > buf_len) { + const struct RoseContext *tctxt = &scratch->tctxt; + const u8 *hist = nocase ? tctxt->ll_buf_nocase : tctxt->ll_buf; + size_t hist_len = nocase ? tctxt->ll_len_nocase : tctxt->ll_len; + + if (len > buf_len + hist_len) { + return 0; // Break out - not enough total history + } + + size_t overhang = len - buf_len; + assert(overhang <= hist_len); + + if (cmpForward(hist + hist_len - overhang, s, overhang, nocase)) { + return 0; + } + s += overhang; + len -= overhang; + } + + // if we got here, we don't need history or we compared ok out of history + assert(len <= buf_len); + + if (cmpForward(buf + buf_len - len, s, len, nocase)) { + return 0; + } + + DEBUG_PRINTF("confirmed hashState=%u\n", hashState); + return 1; +} + +static rose_inline +void calcStreamingHash(const struct core_info *ci, + const struct RoseLongLitTable *ll_table, u8 hash_len, + u32 *hash_case, u32 *hash_nocase) { + assert(hash_len >= LONG_LIT_HASH_LEN); + + // Our hash function operates over LONG_LIT_HASH_LEN bytes, starting from + // location (end of buffer - hash_len). If this block can be satisfied + // entirely from either the current buffer or the history buffer, we pass + // in the pointer directly; otherwise we must make a copy. + + u8 tempbuf[LONG_LIT_HASH_LEN]; + const u8 *base; + + if (hash_len > ci->len) { + size_t overhang = hash_len - ci->len; + if (overhang >= LONG_LIT_HASH_LEN) { + // Can read enough to hash from inside the history buffer. + assert(overhang <= ci->hlen); + base = ci->hbuf + ci->hlen - overhang; + } else { + // Copy: first chunk from history buffer. + assert(overhang <= ci->hlen); + copy_upto_32_bytes(tempbuf, ci->hbuf + ci->hlen - overhang, + overhang); + // Copy: second chunk from current buffer. + size_t copy_buf_len = LONG_LIT_HASH_LEN - overhang; + assert(copy_buf_len <= ci->len); + copy_upto_32_bytes(tempbuf + overhang, ci->buf, copy_buf_len); + // Read from our temporary buffer for the hash. + base = tempbuf; + } + } else { + // Can read enough to hash from inside the current buffer. + base = ci->buf + ci->len - hash_len; + } + + if (ll_table->hashNBitsCase) { + *hash_case = hashLongLiteral(base, LONG_LIT_HASH_LEN, 0); + DEBUG_PRINTF("caseful hash %u\n", *hash_case); + } + if (ll_table->hashNBitsNocase) { + *hash_nocase = hashLongLiteral(base, LONG_LIT_HASH_LEN, 1); + DEBUG_PRINTF("caseless hash %u\n", *hash_nocase); + } +} + +static really_inline +const struct RoseLongLitHashEntry * +getHashTableBase(const struct RoseLongLitTable *ll_table, const char nocase) { + const u32 hashOffset = nocase ? ll_table->hashOffsetNocase + : ll_table->hashOffsetCase; + return (const struct RoseLongLitHashEntry *)((const char *)ll_table + + hashOffset); +} + +static rose_inline +const struct RoseLongLitHashEntry * +getLongLitHashEnt(const struct RoseLongLitTable *ll_table, u32 h, + const char nocase) { + u32 nbits = nocase ? ll_table->hashNBitsNocase : ll_table->hashNBitsCase; + if (!nbits) { + return NULL; + } + + u32 h_ent = h & ((1 << nbits) - 1); + u32 h_low = (h >> nbits) & 63; + + const struct RoseLongLitHashEntry *tab = getHashTableBase(ll_table, nocase); + const struct RoseLongLitHashEntry *ent = tab + h_ent; + + if (!((ent->bitfield >> h_low) & 0x1)) { + return NULL; + } + + return ent; +} + +static rose_inline +u32 storeLongLiteralStateMode(const struct hs_scratch *scratch, + const struct RoseLongLitTable *ll_table, + const struct RoseLongLitHashEntry *ent, + const char nocase) { + assert(ent); + assert(nocase ? ll_table->hashNBitsNocase : ll_table->hashNBitsCase); + + const struct RoseLongLitHashEntry *tab = getHashTableBase(ll_table, nocase); + + u32 packed_state = 0; + while (1) { + if (confirmLongLiteral(ll_table, scratch, ent->state, nocase)) { + packed_state = packStateVal(ll_table, nocase, ent->state); + DEBUG_PRINTF("set %s state to %u\n", nocase ? "nocase" : "case", + packed_state); + break; + } + if (ent->link == LINK_INVALID) { + break; + } + ent = tab + ent->link; + } + return packed_state; +} + +#ifndef NDEBUG +// Defensive checking (used in assert) that these table values don't overflow +// the range available. +static really_inline +char streamingTableOverflow(u32 state_case, u32 state_nocase, u8 ssb, + u8 ssb_nc) { + u32 ssb_mask = (1ULL << (ssb)) - 1; + if (state_case & ~ssb_mask) { + return 1; + } + u32 ssb_nc_mask = (1ULL << (ssb_nc)) - 1; + if (state_nocase & ~ssb_nc_mask) { + return 1; + } + return 0; +} +#endif + +// Reads from stream state table and packs values into stream state. +static rose_inline +void storeLongLitStreamState(const struct RoseLongLitTable *ll_table, + u8 *ll_state, u32 state_case, u32 state_nocase) { + assert(ll_table); + assert(ll_state); + + u8 ss_bytes = ll_table->streamStateBytes; + u8 ssb = ll_table->streamStateBitsCase; + UNUSED u8 ssb_nc = ll_table->streamStateBitsNocase; + assert(ss_bytes == ROUNDUP_N(ssb + ssb_nc, 8) / 8); + assert(!streamingTableOverflow(state_case, state_nocase, ssb, ssb_nc)); + +#if defined(ARCH_32_BIT) + // On 32-bit hosts, we may be able to avoid having to do any u64a + // manipulation at all. + if (ss_bytes <= 4) { + u32 stagingStreamState = state_case; + stagingStreamState |= (state_nocase << ssb); + partial_store_u32(ll_state, stagingStreamState, ss_bytes); + return; + } +#endif + + u64a stagingStreamState = (u64a)state_case; + stagingStreamState |= (u64a)state_nocase << ssb; + partial_store_u64a(ll_state, stagingStreamState, ss_bytes); +} + +static rose_inline +void storeLongLiteralState(const struct RoseEngine *t, char *state, + struct hs_scratch *scratch) { + if (!t->longLitTableOffset) { + DEBUG_PRINTF("no table\n"); + return; + } + + struct core_info *ci = &scratch->core_info; + const struct RoseLongLitTable *ll_table = + getByOffset(t, t->longLitTableOffset); + assert(ll_table->maxLen); + + DEBUG_PRINTF("maxLen=%u, len=%zu, hlen=%zu\n", ll_table->maxLen, ci->len, + ci->hlen); + + u32 state_case = 0; + u32 state_nocase = 0; + + // If we don't have enough history, we don't need to do anything. + if (ll_table->maxLen <= ci->len + ci->hlen) { + u32 hash_case = 0; + u32 hash_nocase = 0; + + calcStreamingHash(ci, ll_table, ll_table->maxLen, &hash_case, + &hash_nocase); + + const struct RoseLongLitHashEntry *ent_case = + getLongLitHashEnt(ll_table, hash_case, 0); + const struct RoseLongLitHashEntry *ent_nocase = + getLongLitHashEnt(ll_table, hash_nocase, 1); + + DEBUG_PRINTF("ent_caseful=%p, ent_caseless=%p\n", ent_case, ent_nocase); + + if (ent_case) { + state_case = storeLongLiteralStateMode(scratch, ll_table, + ent_case, 0); + } + + if (ent_nocase) { + state_nocase = storeLongLiteralStateMode(scratch, ll_table, + ent_nocase, 1); + } + } + + DEBUG_PRINTF("store {%u, %u}\n", state_case, state_nocase); + + u8 *ll_state = getLongLitState(t, state); + storeLongLitStreamState(ll_table, ll_state, state_case, state_nocase); +} + +#endif // STREAM_LONG_LIT_H diff --git a/src/rose/stream_long_lit_hash.h b/src/rose/stream_long_lit_hash.h new file mode 100644 index 00000000..0e1606c5 --- /dev/null +++ b/src/rose/stream_long_lit_hash.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef STREAM_LONG_LIT_HASH_H +#define STREAM_LONG_LIT_HASH_H + +#include "ue2common.h" +#include "util/unaligned.h" + +/** \brief Length of the buffer operated on by \ref hashLongLiteral(). */ +#define LONG_LIT_HASH_LEN 24 + +/** \brief Hash function used for long literal table in streaming mode. */ +static really_inline +u32 hashLongLiteral(const u8 *ptr, UNUSED size_t len, char nocase) { + const u64a CASEMASK = 0xdfdfdfdfdfdfdfdfULL; + const u64a MULTIPLIER = 0x0b4e0ef37bc32127ULL; + + // We unconditionally hash LONG_LIT_HASH_LEN bytes; all use cases of this + // hash are for strings longer than this. + assert(len >= 24); + + u64a v1 = unaligned_load_u64a(ptr); + u64a v2 = unaligned_load_u64a(ptr + 8); + u64a v3 = unaligned_load_u64a(ptr + 16); + if (nocase) { + v1 &= CASEMASK; + v2 &= CASEMASK; + v3 &= CASEMASK; + } + v1 *= MULTIPLIER; + v2 *= MULTIPLIER * MULTIPLIER; + v3 *= MULTIPLIER * MULTIPLIER * MULTIPLIER; + v1 >>= 32; + v2 >>= 32; + v3 >>= 32; + return v1 ^ v2 ^ v3; +} + +#endif // STREAM_LONG_LIT_HASH_H diff --git a/src/runtime.c b/src/runtime.c index e761acc2..30745d81 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -736,20 +736,11 @@ void pureLiteralStreamExec(struct hs_stream *stream_state, assert(scratch); assert(!can_stop_matching(scratch)); - char *state = getMultiState(stream_state); - const struct RoseEngine *rose = stream_state->rose; const struct HWLM *ftable = getFLiteralMatcher(rose); size_t len2 = scratch->core_info.len; - u8 *hwlm_stream_state; - if (rose->floatingStreamState) { - hwlm_stream_state = getFloatingMatcherState(rose, state); - } else { - hwlm_stream_state = NULL; - } - DEBUG_PRINTF("::: streaming rose ::: offset = %llu len = %zu\n", stream_state->offset, scratch->core_info.len); @@ -761,8 +752,8 @@ void pureLiteralStreamExec(struct hs_stream *stream_state, // start the match region at zero. const size_t start = 0; - hwlmExecStreaming(ftable, scratch, len2, start, roseCallback, - scratch, rose->initialGroups, hwlm_stream_state); + hwlmExecStreaming(ftable, scratch, len2, start, roseCallback, scratch, + rose->initialGroups); if (!told_to_stop_matching(scratch) && isAllExhausted(rose, scratch->core_info.exhaustionVector)) { diff --git a/src/scratch.h b/src/scratch.h index a2f02503..73a35149 100644 --- a/src/scratch.h +++ b/src/scratch.h @@ -122,6 +122,26 @@ struct RoseContext { u32 filledDelayedSlots; u32 curr_qi; /**< currently executing main queue index during * \ref nfaQueueExec */ + + /** + * \brief Buffer for caseful long literal support, used in streaming mode + * only. + * + * If a long literal prefix was at the end of the buffer at the end of a + * stream write, then the long lit table hashes it and stores the result in + * stream state. At the start of the next write, this value is used to set + * this buffer to the matching prefix string (stored in the bytecode. + */ + const u8 *ll_buf; + + /** \brief Length in bytes of the string pointed to by ll_buf. */ + size_t ll_len; + + /** \brief Caseless version of ll_buf. */ + const u8 *ll_buf_nocase; + + /** \brief Length in bytes of the string pointed to by ll_buf_nocase. */ + size_t ll_len_nocase; }; struct match_deduper { diff --git a/src/util/ue2string.h b/src/util/ue2string.h index 3c7be473..08b6a544 100644 --- a/src/util/ue2string.h +++ b/src/util/ue2string.h @@ -55,6 +55,29 @@ size_t maxStringSelfOverlap(const std::string &a, bool nocase); /// Compares two strings, returns non-zero if they're different. u32 cmp(const char *a, const char *b, size_t len, bool nocase); +/** + * \brief String type that also records whether the whole string is caseful or + * caseless. + * + * You should use \ref ue2_literal if you need to represent a mixed-case + * literal. + */ +struct ue2_case_string { + ue2_case_string(std::string s_in, bool nocase_in) + : s(std::move(s_in)), nocase(nocase_in) { + if (nocase) { + upperString(s); + } + } + + bool operator==(const ue2_case_string &other) const { + return s == other.s && nocase == other.nocase; + } + + std::string s; + bool nocase; +}; + struct ue2_literal { public: /// Single element proxy, pointed to by our const_iterator. diff --git a/unit/internal/fdr.cpp b/unit/internal/fdr.cpp index c66ab4c5..6116bfdb 100644 --- a/unit/internal/fdr.cpp +++ b/unit/internal/fdr.cpp @@ -337,8 +337,8 @@ TEST_P(FDRp, NoRepeat3) { static hwlm_error_t safeExecStreaming(const FDR *fdr, const u8 *hbuf, size_t hlen, const u8 *buf, size_t len, size_t start, - HWLMCallback cb, void *ctxt, hwlm_group_t groups, - u8 *stream_state) { + HWLMCallback cb, void *ctxt, + hwlm_group_t groups) { array wrapped_history = {{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}}; if (hlen < 16) { @@ -346,8 +346,7 @@ hwlm_error_t safeExecStreaming(const FDR *fdr, const u8 *hbuf, size_t hlen, memcpy(new_hbuf, hbuf, hlen); hbuf = new_hbuf; } - return fdrExecStreaming(fdr, hbuf, hlen, buf, len, start, cb, ctxt, groups, - stream_state); + return fdrExecStreaming(fdr, hbuf, hlen, buf, len, start, cb, ctxt, groups); } TEST_P(FDRp, SmallStreaming) { @@ -366,7 +365,7 @@ TEST_P(FDRp, SmallStreaming) { expected.push_back(match(2, 2, 1)); safeExecStreaming(fdr.get(), (const u8 *)"", 0, (const u8 *)"aaar", 4, 0, - decentCallback, &matches, HWLM_ALL_GROUPS, nullptr); + decentCallback, &matches, HWLM_ALL_GROUPS); for (u32 i = 0; i < MIN(expected.size(), matches.size()); i++) { EXPECT_EQ(expected[i], matches[i]); } @@ -378,7 +377,7 @@ TEST_P(FDRp, SmallStreaming) { expected.push_back(match(1, 8, 10)); safeExecStreaming(fdr.get(), (const u8 *)"aaar", 4, (const u8 *)"dvark", 5, - 0, decentCallback, &matches, HWLM_ALL_GROUPS, nullptr); + 0, decentCallback, &matches, HWLM_ALL_GROUPS); for (u32 i = 0; i < MIN(expected.size(), matches.size()); i++) { EXPECT_EQ(expected[i], matches[i] + 4); @@ -407,7 +406,7 @@ TEST_P(FDRp, SmallStreaming2) { safeExecStreaming(fdr.get(), (const u8 *)"foobar", 6, (const u8 *)"aardvarkkk", 10, 0, decentCallback, &matches, - HWLM_ALL_GROUPS, nullptr); + HWLM_ALL_GROUPS); for (u32 i = 0; i < MIN(expected.size(), matches.size()); i++) { EXPECT_EQ(expected[i], matches[i] + 6); @@ -445,44 +444,6 @@ TEST_P(FDRp, LongLiteral) { EXPECT_EQ(0U, count); } -TEST_P(FDRp, VeryLongLiteral) { - const u32 hint = GetParam(); - SCOPED_TRACE(hint); - vector lits; - - string s1000; - for(int i = 0; i < 1000; i++) { - s1000 += char('A' + i % 10); - } - - string s66k; - for(int i = 0; i < 66; i++) { - s66k += s1000; - } - - string corpus = s66k + s66k; - lits.push_back(hwlmLiteral(s66k.c_str(), 0, 10)); - - auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey()); - CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); - - vector matches; - u32 rv = fdrExec(fdr.get(), (const u8 *)s66k.c_str(), s66k.size(), 0, - decentCallback, &matches, HWLM_ALL_GROUPS); - EXPECT_EQ(0U, rv); - ASSERT_EQ(1U, matches.size()); - ASSERT_EQ(match(0, 65999, 10), matches[0]); - - matches.clear(); - rv = fdrExec(fdr.get(), (const u8 *)corpus.c_str(), corpus.size(), 0, - decentCallback, &matches, HWLM_ALL_GROUPS); - EXPECT_EQ(0U, rv); - for (u32 i = 0; i < matches.size(); i++) { - ASSERT_EQ(match(10 * i, 65999 + 10 * i, 10), matches[i]); - } - EXPECT_EQ(6601U, matches.size()); -} - TEST_P(FDRp, moveByteStream) { const u32 hint = GetParam(); SCOPED_TRACE(hint); @@ -538,9 +499,9 @@ TEST_P(FDRp, Stream1) { // check matches vector matches; - fdrStatus = safeExecStreaming( - fdr.get(), (const u8 *)data1, data_len1, (const u8 *)data2, data_len2, - 0, decentCallback, &matches, HWLM_ALL_GROUPS, nullptr); + fdrStatus = safeExecStreaming(fdr.get(), (const u8 *)data1, data_len1, + (const u8 *)data2, data_len2, 0, + decentCallback, &matches, HWLM_ALL_GROUPS); ASSERT_EQ(0, fdrStatus); ASSERT_EQ(4U, matches.size()); @@ -783,9 +744,9 @@ TEST(FDR, FDRTermS) { // check matches vector matches; - fdrStatus = safeExecStreaming( - fdr.get(), (const u8 *)data1, data_len1, (const u8 *)data2, data_len2, - 0, decentCallbackT, &matches, HWLM_ALL_GROUPS, nullptr); + fdrStatus = safeExecStreaming(fdr.get(), (const u8 *)data1, data_len1, + (const u8 *)data2, data_len2, 0, + decentCallbackT, &matches, HWLM_ALL_GROUPS); ASSERT_EQ(HWLM_TERMINATED, fdrStatus); ASSERT_EQ(1U, matches.size()); @@ -812,30 +773,3 @@ TEST(FDR, FDRTermB) { ASSERT_EQ(1U, matches.size()); } - -TEST(FDR, ManyLengths) { - // UE-2400: we had a crash due to div by zero in the compiler when given a - // set of literals with precisely 512 different lengths. - const u32 num = 512; - vector lits; - char c = 0; - string s; - for (u32 i = 0; i < num; i++) { - s.push_back(c++); - lits.push_back(hwlmLiteral(s, false, i + 1)); - } - - auto fdr = fdrBuildTable(lits, false, get_current_target(), Grey()); - ASSERT_TRUE(fdr != nullptr); - - // Confirm that we can scan against this FDR table as well. - - vector matches; - - hwlm_error_t fdrStatus = - fdrExec(fdr.get(), (const u8 *)s.c_str(), s.size(), 0, decentCallback, - &matches, HWLM_ALL_GROUPS); - ASSERT_EQ(HWLM_SUCCESS, fdrStatus); - - ASSERT_EQ(768U, matches.size()); -} diff --git a/unit/internal/fdr_flood.cpp b/unit/internal/fdr_flood.cpp index 68d8f632..7b00ac4c 100644 --- a/unit/internal/fdr_flood.cpp +++ b/unit/internal/fdr_flood.cpp @@ -495,7 +495,7 @@ TEST_P(FDRFloodp, StreamingMask) { const u8 *fhist = fake_history.data() + fake_history_size; fdrStatus = fdrExecStreaming(fdr.get(), fhist, 0, d, streamChunk, 0, countCallback, &matchesCounts, - HWLM_ALL_GROUPS, nullptr); + HWLM_ALL_GROUPS); ASSERT_EQ(0, fdrStatus); for (u32 j = streamChunk; j < dataSize; j += streamChunk) { if (j < 16) { @@ -506,12 +506,12 @@ TEST_P(FDRFloodp, StreamingMask) { fdrStatus = fdrExecStreaming(fdr.get(), tmp_d, j, tmp_d + j, streamChunk, 0, countCallback, &matchesCounts, - HWLM_ALL_GROUPS, nullptr); + HWLM_ALL_GROUPS); } else { fdrStatus = fdrExecStreaming(fdr.get(), d + j - 8, 8, d + j, streamChunk, 0, countCallback, &matchesCounts, - HWLM_ALL_GROUPS, nullptr); + HWLM_ALL_GROUPS); } ASSERT_EQ(0, fdrStatus); }