diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 2871138a..80e6450d 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -4351,6 +4351,7 @@ void makeCheckLiteralInstruction(const RoseBuildImpl &build, if (lit.table != ROSE_FLOATING) { return; } + assert(bc.longLitLengthThreshold > 0); if (lit.s.length() <= bc.longLitLengthThreshold) { return; } @@ -4937,6 +4938,8 @@ void allocateFinalIdToSet(RoseBuildImpl &build, const set &lits, * ids and squash the same roles and have the same group squashing * behaviour. Benefits literals cannot be merged. */ + assert(longLitLengthThreshold > 0); + for (u32 int_id : lits) { rose_literal_info &curr_info = literal_info[int_id]; const rose_literal_id &lit = build.literals.right.at(int_id); diff --git a/src/rose/rose_build_long_lit.cpp b/src/rose/rose_build_long_lit.cpp index c01bdc8f..c32f49d0 100644 --- a/src/rose/rose_build_long_lit.cpp +++ b/src/rose/rose_build_long_lit.cpp @@ -36,17 +36,28 @@ #include "util/verify_types.h" #include "util/compile_context.h" +#include +#include + using namespace std; namespace ue2 { -/** \brief Minimum size for a non-empty hash table. */ -static constexpr u32 MIN_HASH_TABLE_SIZE = 4096; +/** \brief Minimum size for a non-empty hash table. Must be a power of two. */ +static constexpr u32 MIN_HASH_TABLE_SIZE = 128; + +/** \brief Maximum load factor (between zero and one) for a hash table. */ +static constexpr double MAX_HASH_TABLE_LOAD = 0.7; + +/** \brief Minimum size (in bits) for a bloom filter. Must be a power of two. */ +static constexpr u32 MIN_BLOOM_FILTER_SIZE = 256; + +/** \brief Maximum load factor (between zero and one) for a bloom filter. */ +static constexpr double MAX_BLOOM_FILTER_LOAD = 0.25; struct LongLitModeInfo { - u32 boundary = 0; //!< One above the largest index for this mode. - u32 positions = 0; //!< Total number of string positions. - u32 hashEntries = 0; //!< Number of hash table entries. + u32 num_literals = 0; //!< Number of strings for this mode. + u32 hashed_positions = 0; //!< Number of hashable string positions. }; struct LongLitInfo { @@ -66,54 +77,120 @@ static LongLitInfo analyzeLongLits(const vector &lits, size_t max_len) { LongLitInfo info; - u32 hashedPositionsCase = 0; - u32 hashedPositionsNocase = 0; - - // Caseful boundary is the index of the first nocase literal, as we're - // ordered (caseful, nocase). - auto first_nocase = find_if(begin(lits), end(lits), - [](const ue2_case_string &lit) { return lit.nocase; }); - info.caseful.boundary = verify_u32(distance(lits.begin(), first_nocase)); - - // Nocase boundary is the size of the literal set. - info.nocase.boundary = verify_u32(lits.size()); for (const auto &lit : lits) { - if (lit.nocase) { - hashedPositionsNocase += lit.s.size() - max_len; - info.nocase.positions += lit.s.size(); - } else { - hashedPositionsCase += lit.s.size() - max_len; - info.caseful.positions += lit.s.size(); - } + auto &lit_info = lit.nocase ? info.nocase : info.caseful; + assert(lit.s.size() > max_len); + lit_info.num_literals++; + lit_info.hashed_positions += lit.s.size() - max_len; } - info.caseful.hashEntries = hashedPositionsCase - ? roundUpToPowerOfTwo(max(MIN_HASH_TABLE_SIZE, hashedPositionsCase)) - : 0; - info.nocase.hashEntries = hashedPositionsNocase - ? roundUpToPowerOfTwo(max(MIN_HASH_TABLE_SIZE, hashedPositionsNocase)) - : 0; - - DEBUG_PRINTF("caseful: boundary=%u, positions=%u, hashedPositions=%u, " - "hashEntries=%u\n", - info.caseful.boundary, info.caseful.positions, - hashedPositionsCase, info.caseful.hashEntries); - DEBUG_PRINTF("nocase: boundary=%u, positions=%u, hashedPositions=%u, " - "hashEntries=%u\n", - info.nocase.boundary, info.nocase.positions, - hashedPositionsNocase, info.nocase.hashEntries); + DEBUG_PRINTF("case: hashed %u positions\n", info.caseful.hashed_positions); + DEBUG_PRINTF("nocase: hashed %u positions\n", info.nocase.hashed_positions); return info; } static -void fillHashes(const vector &lits, size_t max_len, - RoseLongLitHashEntry *tab, size_t numEntries, bool nocase, - const map &litToOffsetVal) { - const u32 nbits = lg2(numEntries); - map>> bucketToLitOffPairs; - map bucketToBitfield; +void addToBloomFilter(vector &bloom, const u8 *substr, bool nocase) { + const u32 num_keys = verify_u32(bloom.size() * 8); + const u32 key_mask = (1U << lg2(num_keys)) -1; + + const auto hash_functions = { bloomHash_1, bloomHash_2, bloomHash_3 }; + for (const auto &hash_func : hash_functions) { + u32 hash = hash_func(substr, nocase); + u32 key = hash & key_mask; + DEBUG_PRINTF("set key %u (of %zu)\n", key, bloom.size() * 8); + bloom[key / 8] |= 1U << (key % 8); + } +} + +static +size_t bloomOccupancy(const vector &bloom) { + return accumulate(begin(bloom), end(bloom), 0, + [](const size_t &sum, const u8 &elem) { + return sum + popcount32(elem); + }); +} + +static +double bloomLoad(const vector &bloom) { + return (double)bloomOccupancy(bloom) / (double)(bloom.size() * 8); +} + +static +vector buildBloomFilter(const vector &lits, size_t max_len, + size_t num_entries, bool nocase) { + assert(num_entries % 8 == 0); + assert((num_entries & (num_entries - 1)) == 0); // Must be power of two. + + vector bloom(num_entries / 8, 0); + + if (!num_entries) { + return bloom; + } + + for (const auto &lit : lits) { + if (nocase != lit.nocase) { + continue; + } + for (u32 offset = 1; offset < lit.s.size() - max_len + 1; offset++) { + const u8 *substr = (const u8 *)lit.s.c_str() + offset; + addToBloomFilter(bloom, substr, nocase); + } + } + + DEBUG_PRINTF("%s bloom filter occupancy %zu of %zu entries\n", + nocase ? "nocase" : "caseful", bloomOccupancy(bloom), + num_entries); + + return bloom; +} + + +static +vector makeBloomFilter(const vector &lits, + size_t max_len, bool nocase) { + vector bloom; + + size_t num_entries = MIN_BLOOM_FILTER_SIZE; + for (;;) { + bloom = buildBloomFilter(lits, max_len, num_entries, nocase); + DEBUG_PRINTF("built %s bloom for %zu entries: load %f\n", + nocase ? "nocase" : "caseful", num_entries, + bloomLoad(bloom)); + if (bloomLoad(bloom) < MAX_BLOOM_FILTER_LOAD) { + break; + } + num_entries *= 2; + } + return bloom; +} + +static +size_t hashTableOccupancy(const vector &tab) { + return count_if(begin(tab), end(tab), [](const RoseLongLitHashEntry &ent) { + return ent.str_offset != 0; + }); +} + +static +double hashTableLoad(const vector &tab) { + return (double)hashTableOccupancy(tab) / (double)(tab.size()); +} + +static +vector buildHashTable(const vector &lits, + size_t max_len, + const vector &litToOffsetVal, + size_t numEntries, bool nocase) { + vector tab(numEntries, {0,0}); + + if (!numEntries) { + return tab; + } + + map>> hashToLitOffPairs; for (u32 lit_id = 0; lit_id < lits.size(); lit_id++) { const ue2_case_string &lit = lits[lit_id]; @@ -122,37 +199,41 @@ void fillHashes(const vector &lits, size_t max_len, } for (u32 offset = 1; offset < lit.s.size() - max_len + 1; offset++) { const u8 *substr = (const u8 *)lit.s.c_str() + offset; - u32 h = hashLongLiteral(substr, max_len, lit.nocase); - u32 h_ent = h & ((1U << nbits) - 1); - u32 h_low = (h >> nbits) & 63; - bucketToLitOffPairs[h_ent].emplace_back(lit_id, offset); - bucketToBitfield[h_ent] |= (1ULL << h_low); + u32 hash = hashLongLiteral(substr, max_len, lit.nocase); + hashToLitOffPairs[hash].emplace_back(lit_id, offset); } } - // this used to be a set, but a bitset is much much faster given that - // we're using it only for membership testing. - boost::dynamic_bitset<> filledBuckets(numEntries); // all zero by default. + for (auto &m : hashToLitOffPairs) { + u32 hash = m.first; + vector> &d = m.second; - // sweep out bitfield entries and save the results swapped accordingly - // also, anything with bitfield entries is put in filledBuckets - for (const auto &m : bucketToBitfield) { - const u32 &bucket = m.first; - const u64a &contents = m.second; - tab[bucket].bitfield = contents; - filledBuckets.set(bucket); - } + // Sort by (offset, string) so that we'll be able to remove identical + // string prefixes. + stable_sort(begin(d), end(d), + [&](const pair &a, const pair &b) { + const auto &str_a = lits[a.first].s; + const auto &str_b = lits[b.first].s; + return tie(a.second, str_a) < tie(b.second, str_b); + }); - // store out all our chains based on free values in our hash table. - // find nearest free locations that are empty (there will always be more - // entries than strings, at present) - for (auto &m : bucketToLitOffPairs) { - u32 bucket = m.first; - deque> &d = m.second; + // Remove entries that point to the same literal prefix. + d.erase(unique(begin(d), end(d), + [&](const pair &a, const pair &b) { + if (a.second != b.second) { + return false; + } + const auto &str_a = lits[a.first].s; + const auto &str_b = lits[b.first].s; + const size_t len = max_len + a.second; + return equal(begin(str_a), begin(str_a) + len, + begin(str_b)); + }), + end(d)); - // sort d by distance of the residual string (len minus our depth into - // the string). We need to put the 'furthest back' string first... - stable_sort(d.begin(), d.end(), + // Sort d by distance of the residual string (len minus our depth into + // the string). We need to put the 'furthest back' string first. + stable_sort(begin(d), end(d), [](const pair &a, const pair &b) { if (a.second != b.second) { return a.second > b.second; /* longest is first */ @@ -160,47 +241,79 @@ void fillHashes(const vector &lits, size_t max_len, return a.first < b.first; }); - while (1) { - // first time through is always at bucket, then we fill in links - filledBuckets.set(bucket); - RoseLongLitHashEntry *ent = &tab[bucket]; - u32 lit_id = d.front().first; - u32 offset = d.front().second; + u32 bucket = hash % numEntries; - ent->state = verify_u32(litToOffsetVal.at(lit_id) + - offset + max_len); - ent->link = (u32)LINK_INVALID; - - d.pop_front(); - if (d.empty()) { - break; - } - // now, if there is another value - // find a bucket for it and put in 'bucket' and repeat - // all we really need to do is find something not in filledBuckets, - // ideally something close to bucket - // we search backward and forward from bucket, trying to stay as - // close as possible. - UNUSED bool found = false; - int bucket_candidate = 0; - for (u32 k = 1; k < numEntries * 2; k++) { - bucket_candidate = bucket + (((k & 1) == 0) - ? (-(int)k / 2) : (k / 2)); - if (bucket_candidate < 0 || - (size_t)bucket_candidate >= numEntries) { - continue; - } - if (!filledBuckets.test(bucket_candidate)) { - found = true; - break; + // Placement via linear probing. + for (const auto &lit_offset : d) { + while (tab[bucket].str_offset != 0) { + bucket++; + if (bucket == numEntries) { + bucket = 0; } } - assert(found); - bucket = bucket_candidate; - ent->link = bucket; + u32 lit_id = lit_offset.first; + u32 offset = lit_offset.second; + + DEBUG_PRINTF("hash 0x%08x lit_id %u offset %u bucket %u\n", hash, + lit_id, offset, bucket); + + auto &entry = tab[bucket]; + entry.str_offset = verify_u32(litToOffsetVal.at(lit_id)); + assert(entry.str_offset != 0); + entry.str_len = offset + max_len; } } + + DEBUG_PRINTF("%s hash table occupancy %zu of %zu entries\n", + nocase ? "nocase" : "caseful", hashTableOccupancy(tab), + numEntries); + + return tab; +} + +static +vector makeHashTable(const vector &lits, + size_t max_len, + const vector &litToOffsetVal, + u32 numPositions, bool nocase) { + vector tab; + + // Note: for the hash table, we must always have at least enough entries + // for the number of hashable positions. + size_t num_entries = roundUpToPowerOfTwo(max(MIN_HASH_TABLE_SIZE, + numPositions)); + + for (;;) { + tab = buildHashTable(lits, max_len, litToOffsetVal, num_entries, + nocase); + DEBUG_PRINTF("built %s hash table for %zu entries: load %f\n", + nocase ? "nocase" : "caseful", num_entries, + hashTableLoad(tab)); + if (hashTableLoad(tab) < MAX_HASH_TABLE_LOAD) { + break; + } + num_entries *= 2; + } + return tab; +} + +static +vector buildLits(const vector &lits, u32 baseOffset, + vector &litToOffsetVal) { + vector blob; + litToOffsetVal.resize(lits.size(), 0); + + u32 lit_id = 0; + for (const auto &lit : lits) { + u32 offset = baseOffset + verify_u32(blob.size()); + blob.insert(blob.end(), begin(lit.s), end(lit.s)); + litToOffsetVal[lit_id] = offset; + lit_id++; + } + + DEBUG_PRINTF("built %zu bytes of strings\n", blob.size()); + return blob; } u32 buildLongLiteralTable(const RoseBuildImpl &build, RoseEngineBlob &blob, @@ -251,89 +364,69 @@ u32 buildLongLiteralTable(const RoseBuildImpl &build, RoseEngineBlob &blob, LongLitInfo info = analyzeLongLits(lits, max_len); - // first assess the size and find our caseless threshold - size_t headerSize = ROUNDUP_16(sizeof(RoseLongLitTable)); + vector litToOffsetVal; + const size_t headerSize = ROUNDUP_16(sizeof(RoseLongLitTable)); + vector lit_blob = buildLits(lits, headerSize, litToOffsetVal); - size_t litTabOffset = headerSize; + // Build caseful bloom filter and hash table. + vector bloom_case; + vector tab_case; + if (info.caseful.num_literals) { + bloom_case = makeBloomFilter(lits, max_len, false); + tab_case = makeHashTable(lits, max_len, litToOffsetVal, + info.caseful.hashed_positions, false); + } - size_t litTabNumEntries = lits.size() + 1; - size_t litTabSize = ROUNDUP_16(litTabNumEntries * sizeof(RoseLongLiteral)); + // Build nocase bloom filter and hash table. + vector bloom_nocase; + vector tab_nocase; + if (info.nocase.num_literals) { + bloom_nocase = makeBloomFilter(lits, max_len, true); + tab_nocase = makeHashTable(lits, max_len, litToOffsetVal, + info.nocase.hashed_positions, true); + } - size_t wholeLitTabOffset = litTabOffset + litTabSize; - size_t totalWholeLitTabSize = - ROUNDUP_16(info.caseful.positions + info.nocase.positions); + size_t wholeLitTabSize = ROUNDUP_16(byte_length(lit_blob)); + size_t htOffsetCase = headerSize + wholeLitTabSize; + size_t htOffsetNocase = htOffsetCase + byte_length(tab_case); + size_t bloomOffsetCase = htOffsetNocase + byte_length(tab_nocase); + size_t bloomOffsetNocase = bloomOffsetCase + byte_length(bloom_case); - size_t htOffsetCase = wholeLitTabOffset + totalWholeLitTabSize; - size_t htSizeCase = info.caseful.hashEntries * sizeof(RoseLongLitHashEntry); - size_t htOffsetNocase = htOffsetCase + htSizeCase; - size_t htSizeNocase = - info.nocase.hashEntries * sizeof(RoseLongLitHashEntry); - - size_t tabSize = ROUNDUP_16(htOffsetNocase + htSizeNocase); + size_t tabSize = ROUNDUP_16(bloomOffsetNocase + byte_length(bloom_nocase)); // need to add +2 to both of these to allow space for the actual largest // value as well as handling the fact that we add one to the space when // storing out a position to allow zero to mean "no stream state value" - u8 streamBitsCase = lg2(roundUpToPowerOfTwo(info.caseful.positions + 2)); - u8 streamBitsNocase = lg2(roundUpToPowerOfTwo(info.nocase.positions + 2)); + u8 streamBitsCase = lg2(roundUpToPowerOfTwo(tab_case.size() + 2)); + u8 streamBitsNocase = lg2(roundUpToPowerOfTwo(tab_nocase.size() + 2)); u32 tot_state_bytes = ROUNDUP_N(streamBitsCase + streamBitsNocase, 8) / 8; auto table = aligned_zmalloc_unique(tabSize); assert(table); // otherwise would have thrown std::bad_alloc - // then fill it in - char *ptr = table.get(); - RoseLongLitTable *header = (RoseLongLitTable *)ptr; - // fill in header + // Fill in the RoseLongLitTable header structure. + RoseLongLitTable *header = (RoseLongLitTable *)(table.get()); + header->size = verify_u32(tabSize); header->maxLen = verify_u8(max_len); // u8 so doesn't matter; won't go > 255 - header->boundaryCase = info.caseful.boundary; - header->hashOffsetCase = verify_u32(htOffsetCase); - header->hashNBitsCase = lg2(info.caseful.hashEntries); - header->streamStateBitsCase = streamBitsCase; - header->boundaryNocase = info.nocase.boundary; - header->hashOffsetNocase = verify_u32(htOffsetNocase); - header->hashNBitsNocase = lg2(info.nocase.hashEntries); - header->streamStateBitsNocase = streamBitsNocase; + header->caseful.hashOffset = verify_u32(htOffsetCase); + header->caseful.hashBits = lg2(tab_case.size()); + header->caseful.streamStateBits = streamBitsCase; + header->caseful.bloomOffset = verify_u32(bloomOffsetCase); + header->caseful.bloomBits = lg2(bloom_case.size() * 8); + header->nocase.hashOffset = verify_u32(htOffsetNocase); + header->nocase.hashBits = lg2(tab_nocase.size()); + header->nocase.streamStateBits = streamBitsNocase; + header->nocase.bloomOffset = verify_u32(bloomOffsetNocase); + header->nocase.bloomBits = lg2(bloom_nocase.size() * 8); assert(tot_state_bytes < sizeof(u64a)); header->streamStateBytes = verify_u8(tot_state_bytes); // u8 - ptr += headerSize; - - // now fill in the rest - - RoseLongLiteral *litTabPtr = (RoseLongLiteral *)ptr; - ptr += litTabSize; - - map litToOffsetVal; - for (auto i = lits.begin(), e = lits.end(); i != e; ++i) { - u32 entry = verify_u32(i - lits.begin()); - u32 offset = verify_u32(ptr - table.get()); - - // point the table entry to the string location - litTabPtr[entry].offset = offset; - - litToOffsetVal[entry] = offset; - - // copy the string into the string location - const auto &s = i->s; - memcpy(ptr, s.c_str(), s.size()); - - ptr += s.size(); // and the string location - } - - // fill in final lit table entry with current ptr (serves as end value) - litTabPtr[lits.size()].offset = verify_u32(ptr - table.get()); - - // fill hash tables - ptr = table.get() + htOffsetCase; - fillHashes(lits, max_len, (RoseLongLitHashEntry *)ptr, - info.caseful.hashEntries, false, litToOffsetVal); - ptr += htSizeCase; - fillHashes(lits, max_len, (RoseLongLitHashEntry *)ptr, - info.nocase.hashEntries, true, litToOffsetVal); - ptr += htSizeNocase; - - assert(ptr <= table.get() + tabSize); + // Copy in the literal strings, hash tables and bloom filters, + copy_bytes(table.get() + headerSize, lit_blob); + copy_bytes(table.get() + htOffsetCase, tab_case); + copy_bytes(table.get() + bloomOffsetCase, bloom_case); + copy_bytes(table.get() + htOffsetNocase, tab_nocase); + copy_bytes(table.get() + bloomOffsetNocase, bloom_nocase); DEBUG_PRINTF("built streaming table, size=%zu\n", tabSize); DEBUG_PRINTF("requires %zu bytes of history\n", max_len); diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index 9a0bd28c..1ab11f9f 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -49,9 +49,10 @@ #include #include #include +#include #include -#include #include +#include #include #ifndef DUMP_SUPPORT @@ -1049,6 +1050,39 @@ void dumpAnchoredStats(const void *atable, FILE *f) { } +static +void dumpLongLiteralSubtable(const RoseLongLitTable *ll_table, + const RoseLongLitSubtable *ll_sub, FILE *f) { + if (!ll_sub->hashBits) { + fprintf(f, " \n"); + return; + } + + const char *base = (const char *)ll_table; + + u32 nbits = ll_sub->hashBits; + u32 num_entries = 1U << nbits; + const auto *tab = (const RoseLongLitHashEntry *)(base + ll_sub->hashOffset); + u32 hash_occ = + count_if(tab, tab + num_entries, [](const RoseLongLitHashEntry &ent) { + return ent.str_offset != 0; + }); + float hash_occ_percent = ((float)hash_occ / (float)num_entries) * 100; + + fprintf(f, " hash table : %u bits, occupancy %u/%u (%0.1f%%)\n", + nbits, hash_occ, num_entries, hash_occ_percent); + + u32 bloom_bits = ll_sub->bloomBits; + u32 bloom_size = 1U << bloom_bits; + const u8 *bloom = (const u8 *)base + ll_sub->bloomOffset; + u32 bloom_occ = accumulate(bloom, bloom + bloom_size / 8, 0, + [](const u32 &sum, const u8 &elem) { return sum + popcount32(elem); }); + float bloom_occ_percent = ((float)bloom_occ / (float)(bloom_size)) * 100; + + fprintf(f, " bloom filter : %u bits, occupancy %u/%u (%0.1f%%)\n", + bloom_bits, bloom_occ, bloom_size, bloom_occ_percent); +} + static void dumpLongLiteralTable(const RoseEngine *t, FILE *f) { if (!t->longLitTableOffset) { @@ -1062,17 +1096,15 @@ void dumpLongLiteralTable(const RoseEngine *t, FILE *f) { (const struct RoseLongLitTable *)loadFromByteCodeOffset( t, t->longLitTableOffset); - u32 num_caseful = ll_table->boundaryCase; - u32 num_caseless = ll_table->boundaryNocase - num_caseful; + fprintf(f, " total size : %u bytes\n", ll_table->size); + fprintf(f, " longest len : %u\n", ll_table->maxLen); + fprintf(f, " stream state : %u bytes\n", ll_table->streamStateBytes); - fprintf(f, " longest len: %u\n", ll_table->maxLen); - fprintf(f, " counts: %u caseful, %u caseless\n", num_caseful, - num_caseless); - fprintf(f, " hash bits: %u caseful, %u caseless\n", - ll_table->hashNBitsCase, ll_table->hashNBitsNocase); - fprintf(f, " state bits: %u caseful, %u caseless\n", - ll_table->streamStateBitsCase, ll_table->streamStateBitsNocase); - fprintf(f, " stream state: %u bytes\n", ll_table->streamStateBytes); + fprintf(f, " caseful:\n"); + dumpLongLiteralSubtable(ll_table, &ll_table->caseful, f); + + fprintf(f, " nocase:\n"); + dumpLongLiteralSubtable(ll_table, &ll_table->nocase, f); } // Externally accessible functions diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index 32805ab3..3a366f0d 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -446,51 +446,49 @@ struct ALIGN_CL_DIRECTIVE anchored_matcher_info { u32 anchoredMinDistance; /* start of region to run anchored table over */ }; +/** + * \brief Long literal subtable for a particular mode (caseful or nocase). + */ +struct RoseLongLitSubtable { + /** + * \brief Offset of the hash table (relative to RoseLongLitTable base). + * + * Offset is zero if no such table exists. + */ + u32 hashOffset; + + /** + * \brief Offset of the bloom filter (relative to RoseLongLitTable base). + * + * Offset is zero if no such table exists. + */ + u32 bloomOffset; + + /** \brief lg2 of the size of the hash table. */ + u8 hashBits; + + /** \brief Size of the bloom filter in bits. */ + u8 bloomBits; + + /** \brief Number of bits of packed stream state used. */ + u8 streamStateBits; +}; + /** * \brief Long literal table header. */ struct RoseLongLitTable { - /** \brief String ID one beyond the maximum entry for caseful literals. */ - u32 boundaryCase; - /** - * \brief String ID one beyond the maximum entry for caseless literals. - * This is also the total size of the literal table. + * \brief Total size of the whole table (including strings, bloom filters, + * hash tables). */ - u32 boundaryNocase; + u32 size; - /** - * \brief Offset of the caseful hash table (relative to RoseLongLitTable - * base). - * - * Offset is zero if no such table exists. - */ - u32 hashOffsetCase; + /** \brief Caseful sub-table (hash table and bloom filter). */ + struct RoseLongLitSubtable caseful; - /** - * \brief Offset of the caseless hash table (relative to RoseLongLitTable - * base). - * - * Offset is zero if no such table exists. - */ - u32 hashOffsetNocase; - - /** \brief lg2 of the size of the caseful hash table. */ - u32 hashNBitsCase; - - /** \brief lg2 of the size of the caseless hash table. */ - u32 hashNBitsNocase; - - /** - * \brief Number of bits of packed stream state for the caseful hash table. - */ - u8 streamStateBitsCase; - - /** - * \brief Number of bits of packed stream state for the caseless hash - * table. - */ - u8 streamStateBitsNocase; + /** \brief Caseless sub-table (hash table and bloom filter). */ + struct RoseLongLitSubtable nocase; /** \brief Total size of packed stream state in bytes. */ u8 streamStateBytes; @@ -499,39 +497,19 @@ struct RoseLongLitTable { u8 maxLen; }; -/** - * \brief One of these structures per literal entry in our long literal table. - */ -struct RoseLongLiteral { - /** - * \brief Offset of the literal string itself, relative to - * RoseLongLitTable base. - */ - u32 offset; -}; - -/** \brief "No further links" value used for \ref RoseLongLitHashEntry::link. */ -#define LINK_INVALID 0xffffffff - /** * \brief One of these structures per hash table entry in our long literal * table. */ struct RoseLongLitHashEntry { /** - * \brief Bitfield used as a quick guard for hash buckets. - * - * For a given hash value N, the low six bits of N are taken and the - * corresponding bit is switched on in this bitfield if this bucket is used - * for that hash. + * \brief Offset of the literal string itself, relative to + * RoseLongLitTable base. Zero if this bucket is empty. */ - u64a bitfield; + u32 str_offset; - /** \brief Offset in the literal table for this string. */ - u32 state; - - /** \brief Hash table index of next entry in the chain for this bucket. */ - u32 link; + /** \brief Length of the literal string. */ + u32 str_len; }; static really_inline diff --git a/src/rose/stream.c b/src/rose/stream.c index 72286b4b..6e4d0add 100644 --- a/src/rose/stream.c +++ b/src/rose/stream.c @@ -551,6 +551,11 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) { tctxt->minMatchOffset = offset; tctxt->minNonMpvMatchOffset = offset; tctxt->next_mpv_offset = 0; + tctxt->ll_buf = scratch->core_info.hbuf; + tctxt->ll_len = scratch->core_info.hlen; + tctxt->ll_buf_nocase = scratch->core_info.hbuf; + tctxt->ll_len_nocase = scratch->core_info.hlen; + DEBUG_PRINTF("BEGIN: history len=%zu, buffer len=%zu groups=%016llx\n", scratch->core_info.hlen, scratch->core_info.len, tctxt->groups); @@ -590,18 +595,14 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) { MIN(t->floatingDistance, length + offset) - offset : 0; } + loadLongLiteralState(t, state, scratch); + size_t hlength = scratch->core_info.hlen; - char rebuild = 0; - - if (hlength) { - // Can only have long literal state or rebuild if this is not the - // first write to this stream. - loadLongLiteralState(t, state, scratch); - rebuild = (scratch->core_info.status & STATUS_DELAY_DIRTY) && - (t->maxFloatingDelayedMatch == ROSE_BOUND_INF || - offset < t->maxFloatingDelayedMatch); - } + char rebuild = hlength && + (scratch->core_info.status & STATUS_DELAY_DIRTY) && + (t->maxFloatingDelayedMatch == ROSE_BOUND_INF || + offset < t->maxFloatingDelayedMatch); DEBUG_PRINTF("**rebuild %hhd status %hhu mfdm %u, offset %llu\n", rebuild, scratch->core_info.status, t->maxFloatingDelayedMatch, offset); diff --git a/src/rose/stream_long_lit.h b/src/rose/stream_long_lit.h index 676544d7..d78e2863 100644 --- a/src/rose/stream_long_lit.h +++ b/src/rose/stream_long_lit.h @@ -36,52 +36,12 @@ #include "util/copybytes.h" static really_inline -const struct RoseLongLiteral * -getLitTab(const struct RoseLongLitTable *ll_table) { - return (const struct RoseLongLiteral *)((const char *)ll_table + - ROUNDUP_16(sizeof(struct RoseLongLitTable))); -} - -static really_inline -u32 get_start_lit_idx(const struct RoseLongLitTable *ll_table, - const char nocase) { - return nocase ? ll_table->boundaryCase : 0; -} - -static really_inline -u32 get_end_lit_idx(const struct RoseLongLitTable *ll_table, - const char nocase) { - return nocase ? ll_table->boundaryNocase : ll_table->boundaryCase; -} - -// search for the literal index that contains the current state -static rose_inline -u32 findLitTabEntry(const struct RoseLongLitTable *ll_table, - u32 stateValue, const char nocase) { - const struct RoseLongLiteral *litTab = getLitTab(ll_table); - u32 lo = get_start_lit_idx(ll_table, nocase); - u32 hi = get_end_lit_idx(ll_table, nocase); - - // Now move stateValue back by one so that we're looking for the - // litTab entry that includes it the string, not the one 'one past' it - stateValue -= 1; - assert(lo != hi); - assert(litTab[lo].offset <= stateValue); - assert(litTab[hi].offset > stateValue); - - // binary search to find the entry e such that: - // litTab[e].offsetToLiteral <= stateValue < litTab[e+1].offsetToLiteral - while (lo + 1 < hi) { - u32 mid = (lo + hi) / 2; - if (litTab[mid].offset <= stateValue) { - lo = mid; - } else { // (litTab[mid].offset > stateValue) { - hi = mid; - } - } - assert(litTab[lo].offset <= stateValue); - assert(litTab[hi].offset > stateValue); - return lo; +const struct RoseLongLitHashEntry * +getHashTableBase(const struct RoseLongLitTable *ll_table, + const struct RoseLongLitSubtable *ll_sub) { + assert(ll_sub->hashOffset); + return (const struct RoseLongLitHashEntry *)((const char *)ll_table + + ll_sub->hashOffset); } // Reads from stream state and unpacks values into stream state table. @@ -94,8 +54,8 @@ void loadLongLitStreamState(const struct RoseLongLitTable *ll_table, assert(state_case && state_nocase); u8 ss_bytes = ll_table->streamStateBytes; - u8 ssb = ll_table->streamStateBitsCase; - UNUSED u8 ssb_nc = ll_table->streamStateBitsNocase; + u8 ssb = ll_table->caseful.streamStateBits; + UNUSED u8 ssb_nc = ll_table->nocase.streamStateBits; assert(ss_bytes == (ssb + ssb_nc + 7) / 8); #if defined(ARCH_32_BIT) @@ -116,40 +76,22 @@ void loadLongLitStreamState(const struct RoseLongLitTable *ll_table, *state_nocase = (u32)(streamVal >> ssb); } -static really_inline -u32 getBaseOffsetOfLits(const struct RoseLongLitTable *ll_table, - const char nocase) { - u32 lit_idx = get_start_lit_idx(ll_table, nocase); - return getLitTab(ll_table)[lit_idx].offset; -} - -static really_inline -u32 unpackStateVal(const struct RoseLongLitTable *ll_table, const char nocase, - u32 v) { - return v + getBaseOffsetOfLits(ll_table, nocase) - 1; -} - -static really_inline -u32 packStateVal(const struct RoseLongLitTable *ll_table, const char nocase, - u32 v) { - return v - getBaseOffsetOfLits(ll_table, nocase) + 1; -} - static rose_inline void loadLongLiteralStateMode(struct hs_scratch *scratch, const struct RoseLongLitTable *ll_table, - const struct RoseLongLiteral *litTab, + const struct RoseLongLitSubtable *ll_sub, const u32 state, const char nocase) { if (!state) { DEBUG_PRINTF("no state for %s\n", nocase ? "caseless" : "caseful"); return; } - u32 stateValue = unpackStateVal(ll_table, nocase, state); - u32 idx = findLitTabEntry(ll_table, stateValue, nocase); - size_t found_offset = litTab[idx].offset; - const u8 *found_buf = found_offset + (const u8 *)ll_table; - size_t found_sz = stateValue - found_offset; + const struct RoseLongLitHashEntry *tab = getHashTableBase(ll_table, ll_sub); + const struct RoseLongLitHashEntry *ent = tab + state - 1; + + assert(ent->str_offset + ent->str_len <= ll_table->size); + const u8 *found_buf = (const u8 *)ll_table + ent->str_offset; + size_t found_sz = ent->str_len; struct RoseContext *tctxt = &scratch->tctxt; if (nocase) { @@ -168,34 +110,42 @@ void loadLongLiteralState(const struct RoseEngine *t, char *state, return; } + // If we don't have any long literals in play, these values must point to + // the real history buffer so that CHECK_LITERAL instructions examine the + // history buffer. scratch->tctxt.ll_buf = scratch->core_info.hbuf; scratch->tctxt.ll_len = scratch->core_info.hlen; scratch->tctxt.ll_buf_nocase = scratch->core_info.hbuf; scratch->tctxt.ll_len_nocase = scratch->core_info.hlen; + if (!scratch->core_info.hlen) { + return; + } + const struct RoseLongLitTable *ll_table = getByOffset(t, t->longLitTableOffset); - const struct RoseLongLiteral *litTab = getLitTab(ll_table); const u8 *ll_state = getLongLitState(t, state); u32 state_case; u32 state_nocase; loadLongLitStreamState(ll_table, ll_state, &state_case, &state_nocase); - loadLongLiteralStateMode(scratch, ll_table, litTab, state_case, 0); - loadLongLiteralStateMode(scratch, ll_table, litTab, state_nocase, 1); + DEBUG_PRINTF("loaded {%u, %u}\n", state_case, state_nocase); + + loadLongLiteralStateMode(scratch, ll_table, &ll_table->caseful, + state_case, 0); + loadLongLiteralStateMode(scratch, ll_table, &ll_table->nocase, + state_nocase, 1); } static rose_inline char confirmLongLiteral(const struct RoseLongLitTable *ll_table, - const hs_scratch_t *scratch, u32 hashState, + const struct hs_scratch *scratch, + const struct RoseLongLitHashEntry *ent, const char nocase) { - const struct RoseLongLiteral *litTab = getLitTab(ll_table); - u32 idx = findLitTabEntry(ll_table, hashState, nocase); - size_t found_offset = litTab[idx].offset; - const u8 *s = found_offset + (const u8 *)ll_table; - assert(hashState > found_offset); - size_t len = hashState - found_offset; + assert(ent->str_offset + ent->str_len <= ll_table->size); + const u8 *s = (const u8 *)ll_table + ent->str_offset; + size_t len = ent->str_len; const u8 *buf = scratch->core_info.buf; const size_t buf_len = scratch->core_info.len; @@ -225,14 +175,13 @@ char confirmLongLiteral(const struct RoseLongLitTable *ll_table, return 0; } - DEBUG_PRINTF("confirmed hashState=%u\n", hashState); return 1; } static rose_inline -void calcStreamingHash(const struct core_info *ci, - const struct RoseLongLitTable *ll_table, u8 hash_len, - u32 *hash_case, u32 *hash_nocase) { +const u8 *prepScanBuffer(const struct core_info *ci, + const struct RoseLongLitTable *ll_table, u8 *tempbuf) { + const u8 hash_len = ll_table->maxLen; assert(hash_len >= LONG_LIT_HASH_LEN); // Our hash function operates over LONG_LIT_HASH_LEN bytes, starting from @@ -240,7 +189,6 @@ void calcStreamingHash(const struct core_info *ci, // entirely from either the current buffer or the history buffer, we pass // in the pointer directly; otherwise we must make a copy. - u8 tempbuf[LONG_LIT_HASH_LEN]; const u8 *base; if (hash_len > ci->len) { @@ -266,71 +214,7 @@ void calcStreamingHash(const struct core_info *ci, base = ci->buf + ci->len - hash_len; } - if (ll_table->hashNBitsCase) { - *hash_case = hashLongLiteral(base, LONG_LIT_HASH_LEN, 0); - DEBUG_PRINTF("caseful hash %u\n", *hash_case); - } - if (ll_table->hashNBitsNocase) { - *hash_nocase = hashLongLiteral(base, LONG_LIT_HASH_LEN, 1); - DEBUG_PRINTF("caseless hash %u\n", *hash_nocase); - } -} - -static really_inline -const struct RoseLongLitHashEntry * -getHashTableBase(const struct RoseLongLitTable *ll_table, const char nocase) { - const u32 hashOffset = nocase ? ll_table->hashOffsetNocase - : ll_table->hashOffsetCase; - return (const struct RoseLongLitHashEntry *)((const char *)ll_table + - hashOffset); -} - -static rose_inline -const struct RoseLongLitHashEntry * -getLongLitHashEnt(const struct RoseLongLitTable *ll_table, u32 h, - const char nocase) { - u32 nbits = nocase ? ll_table->hashNBitsNocase : ll_table->hashNBitsCase; - if (!nbits) { - return NULL; - } - - u32 h_ent = h & ((1 << nbits) - 1); - u32 h_low = (h >> nbits) & 63; - - const struct RoseLongLitHashEntry *tab = getHashTableBase(ll_table, nocase); - const struct RoseLongLitHashEntry *ent = tab + h_ent; - - if (!((ent->bitfield >> h_low) & 0x1)) { - return NULL; - } - - return ent; -} - -static rose_inline -u32 storeLongLiteralStateMode(const struct hs_scratch *scratch, - const struct RoseLongLitTable *ll_table, - const struct RoseLongLitHashEntry *ent, - const char nocase) { - assert(ent); - assert(nocase ? ll_table->hashNBitsNocase : ll_table->hashNBitsCase); - - const struct RoseLongLitHashEntry *tab = getHashTableBase(ll_table, nocase); - - u32 packed_state = 0; - while (1) { - if (confirmLongLiteral(ll_table, scratch, ent->state, nocase)) { - packed_state = packStateVal(ll_table, nocase, ent->state); - DEBUG_PRINTF("set %s state to %u\n", nocase ? "nocase" : "case", - packed_state); - break; - } - if (ent->link == LINK_INVALID) { - break; - } - ent = tab + ent->link; - } - return packed_state; + return base; } #ifndef NDEBUG @@ -359,8 +243,8 @@ void storeLongLitStreamState(const struct RoseLongLitTable *ll_table, assert(ll_state); u8 ss_bytes = ll_table->streamStateBytes; - u8 ssb = ll_table->streamStateBitsCase; - UNUSED u8 ssb_nc = ll_table->streamStateBitsNocase; + u8 ssb = ll_table->caseful.streamStateBits; + UNUSED u8 ssb_nc = ll_table->nocase.streamStateBits; assert(ss_bytes == ROUNDUP_N(ssb + ssb_nc, 8) / 8); assert(!streamingTableOverflow(state_case, state_nocase, ssb, ssb_nc)); @@ -380,6 +264,65 @@ void storeLongLitStreamState(const struct RoseLongLitTable *ll_table, partial_store_u64a(ll_state, stagingStreamState, ss_bytes); } +static really_inline +char has_bit(const u8 *data, u32 bit) { + return (data[bit / 8] >> (bit % 8)) & 1; +} + +static rose_inline +char bloomHasKey(const u8 *bloom, u32 bloom_mask, u32 hash) { + return has_bit(bloom, hash & bloom_mask); +} + +static rose_inline +char checkBloomFilter(const struct RoseLongLitTable *ll_table, + const struct RoseLongLitSubtable *ll_sub, + const u8 *scan_buf, char nocase) { + assert(ll_sub->bloomBits); + + const u8 *bloom = (const u8 *)ll_table + ll_sub->bloomOffset; + const u32 bloom_mask = (1U << ll_sub->bloomBits) - 1; + + char v = 1; + v &= bloomHasKey(bloom, bloom_mask, bloomHash_1(scan_buf, nocase)); + v &= bloomHasKey(bloom, bloom_mask, bloomHash_2(scan_buf, nocase)); + v &= bloomHasKey(bloom, bloom_mask, bloomHash_3(scan_buf, nocase)); + return v; +} + +/** + * \brief Look for a hit in the hash table. + * + * Returns zero if not found, otherwise returns (bucket + 1). + */ +static rose_inline +u32 checkHashTable(const struct RoseLongLitTable *ll_table, + const struct RoseLongLitSubtable *ll_sub, const u8 *scan_buf, + const struct hs_scratch *scratch, char nocase) { + const u32 nbits = ll_sub->hashBits; + assert(nbits && nbits < 32); + const u32 num_entries = 1U << nbits; + + const struct RoseLongLitHashEntry *tab = getHashTableBase(ll_table, ll_sub); + + u32 hash = hashLongLiteral(scan_buf, LONG_LIT_HASH_LEN, nocase); + u32 bucket = hash & ((1U << nbits) - 1); + + while (tab[bucket].str_offset != 0) { + DEBUG_PRINTF("checking bucket %u\n", bucket); + if (confirmLongLiteral(ll_table, scratch, &tab[bucket], nocase)) { + DEBUG_PRINTF("found hit for bucket %u\n", bucket); + return bucket + 1; + } + + if (++bucket == num_entries) { + bucket = 0; + } + } + + return 0; +} + static rose_inline void storeLongLiteralState(const struct RoseEngine *t, char *state, struct hs_scratch *scratch) { @@ -401,28 +344,22 @@ void storeLongLiteralState(const struct RoseEngine *t, char *state, // If we don't have enough history, we don't need to do anything. if (ll_table->maxLen <= ci->len + ci->hlen) { - u32 hash_case = 0; - u32 hash_nocase = 0; + u8 tempbuf[LONG_LIT_HASH_LEN]; + const u8 *scan_buf = prepScanBuffer(ci, ll_table, tempbuf); - calcStreamingHash(ci, ll_table, ll_table->maxLen, &hash_case, - &hash_nocase); - - const struct RoseLongLitHashEntry *ent_case = - getLongLitHashEnt(ll_table, hash_case, 0); - const struct RoseLongLitHashEntry *ent_nocase = - getLongLitHashEnt(ll_table, hash_nocase, 1); - - DEBUG_PRINTF("ent_caseful=%p, ent_caseless=%p\n", ent_case, ent_nocase); - - if (ent_case) { - state_case = storeLongLiteralStateMode(scratch, ll_table, - ent_case, 0); + if (ll_table->caseful.hashBits && + checkBloomFilter(ll_table, &ll_table->caseful, scan_buf, 0)) { + state_case = checkHashTable(ll_table, &ll_table->caseful, scan_buf, + scratch, 0); } - if (ent_nocase) { - state_nocase = storeLongLiteralStateMode(scratch, ll_table, - ent_nocase, 1); + if (ll_table->nocase.hashBits && + checkBloomFilter(ll_table, &ll_table->nocase, scan_buf, 1)) { + state_nocase = checkHashTable(ll_table, &ll_table->nocase, scan_buf, + scratch, 1); } + } else { + DEBUG_PRINTF("not enough history (%zu bytes)\n", ci->len + ci->hlen); } DEBUG_PRINTF("store {%u, %u}\n", state_case, state_nocase); diff --git a/src/rose/stream_long_lit_hash.h b/src/rose/stream_long_lit_hash.h index 0e1606c5..041f05e6 100644 --- a/src/rose/stream_long_lit_hash.h +++ b/src/rose/stream_long_lit_hash.h @@ -30,17 +30,18 @@ #define STREAM_LONG_LIT_HASH_H #include "ue2common.h" +#include "util/bitutils.h" #include "util/unaligned.h" /** \brief Length of the buffer operated on by \ref hashLongLiteral(). */ #define LONG_LIT_HASH_LEN 24 +/** \brief Multiplier used by al the hash functions below. */ +#define HASH_MULTIPLIER 0x0b4e0ef37bc32127ULL + /** \brief Hash function used for long literal table in streaming mode. */ static really_inline u32 hashLongLiteral(const u8 *ptr, UNUSED size_t len, char nocase) { - const u64a CASEMASK = 0xdfdfdfdfdfdfdfdfULL; - const u64a MULTIPLIER = 0x0b4e0ef37bc32127ULL; - // We unconditionally hash LONG_LIT_HASH_LEN bytes; all use cases of this // hash are for strings longer than this. assert(len >= 24); @@ -49,17 +50,56 @@ u32 hashLongLiteral(const u8 *ptr, UNUSED size_t len, char nocase) { u64a v2 = unaligned_load_u64a(ptr + 8); u64a v3 = unaligned_load_u64a(ptr + 16); if (nocase) { - v1 &= CASEMASK; - v2 &= CASEMASK; - v3 &= CASEMASK; + v1 &= OCTO_CASE_CLEAR; + v2 &= OCTO_CASE_CLEAR; + v3 &= OCTO_CASE_CLEAR; } - v1 *= MULTIPLIER; - v2 *= MULTIPLIER * MULTIPLIER; - v3 *= MULTIPLIER * MULTIPLIER * MULTIPLIER; + v1 *= HASH_MULTIPLIER; + v2 *= HASH_MULTIPLIER * HASH_MULTIPLIER; + v3 *= HASH_MULTIPLIER * HASH_MULTIPLIER * HASH_MULTIPLIER; v1 >>= 32; v2 >>= 32; v3 >>= 32; return v1 ^ v2 ^ v3; } +/** + * \brief Internal, used by the bloom filter hash functions below. Hashes 16 + * bytes beginning at (ptr + offset). + */ +static really_inline +u32 bloomHash_i(const u8 *ptr, u32 offset, u64a multiplier, char nocase) { + assert(offset + 16 <= LONG_LIT_HASH_LEN); + + u64a v = unaligned_load_u64a(ptr + offset); + if (nocase) { + v &= OCTO_CASE_CLEAR; + } + v *= multiplier; + return v >> 32; +} + +/* + * We ensure that we see every byte the first LONG_LIT_HASH_LEN bytes of input + * data (using at least one of the following functions). + */ + +static really_inline +u32 bloomHash_1(const u8 *ptr, char nocase) { + const u64a multiplier = HASH_MULTIPLIER; + return bloomHash_i(ptr, 0, multiplier, nocase); +} + +static really_inline +u32 bloomHash_2(const u8 *ptr, char nocase) { + const u64a multiplier = HASH_MULTIPLIER * HASH_MULTIPLIER; + return bloomHash_i(ptr, 4, multiplier, nocase); +} + +static really_inline +u32 bloomHash_3(const u8 *ptr, char nocase) { + const u64a multiplier = HASH_MULTIPLIER * HASH_MULTIPLIER * HASH_MULTIPLIER; + return bloomHash_i(ptr, 8, multiplier, nocase); +} + #endif // STREAM_LONG_LIT_HASH_H diff --git a/src/util/bitutils.h b/src/util/bitutils.h index 6f1bcd09..b7a09ca7 100644 --- a/src/util/bitutils.h +++ b/src/util/bitutils.h @@ -70,6 +70,7 @@ #define CASE_BIT 0x20 #define CASE_CLEAR 0xdf #define DOUBLE_CASE_CLEAR 0xdfdf +#define OCTO_CASE_CLEAR 0xdfdfdfdfdfdfdfdfULL static really_inline u32 clz32(u32 x) {