mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
rose: simplify long lit table, add bloom filter
Replaces the original long lit hash table (used in streaming mode) with a smaller, simpler linear probing approach. Adds a bloom filter in front of it to reduce time spent on false positives. Sizing of both the hash table and bloom filter are done based on max load.
This commit is contained in:
parent
68bf473e2e
commit
8869dee643
@ -4351,6 +4351,7 @@ void makeCheckLiteralInstruction(const RoseBuildImpl &build,
|
||||
if (lit.table != ROSE_FLOATING) {
|
||||
return;
|
||||
}
|
||||
assert(bc.longLitLengthThreshold > 0);
|
||||
if (lit.s.length() <= bc.longLitLengthThreshold) {
|
||||
return;
|
||||
}
|
||||
@ -4937,6 +4938,8 @@ void allocateFinalIdToSet(RoseBuildImpl &build, const set<u32> &lits,
|
||||
* ids and squash the same roles and have the same group squashing
|
||||
* behaviour. Benefits literals cannot be merged. */
|
||||
|
||||
assert(longLitLengthThreshold > 0);
|
||||
|
||||
for (u32 int_id : lits) {
|
||||
rose_literal_info &curr_info = literal_info[int_id];
|
||||
const rose_literal_id &lit = build.literals.right.at(int_id);
|
||||
|
@ -36,17 +36,28 @@
|
||||
#include "util/verify_types.h"
|
||||
#include "util/compile_context.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <numeric>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
/** \brief Minimum size for a non-empty hash table. */
|
||||
static constexpr u32 MIN_HASH_TABLE_SIZE = 4096;
|
||||
/** \brief Minimum size for a non-empty hash table. Must be a power of two. */
|
||||
static constexpr u32 MIN_HASH_TABLE_SIZE = 128;
|
||||
|
||||
/** \brief Maximum load factor (between zero and one) for a hash table. */
|
||||
static constexpr double MAX_HASH_TABLE_LOAD = 0.7;
|
||||
|
||||
/** \brief Minimum size (in bits) for a bloom filter. Must be a power of two. */
|
||||
static constexpr u32 MIN_BLOOM_FILTER_SIZE = 256;
|
||||
|
||||
/** \brief Maximum load factor (between zero and one) for a bloom filter. */
|
||||
static constexpr double MAX_BLOOM_FILTER_LOAD = 0.25;
|
||||
|
||||
struct LongLitModeInfo {
|
||||
u32 boundary = 0; //!< One above the largest index for this mode.
|
||||
u32 positions = 0; //!< Total number of string positions.
|
||||
u32 hashEntries = 0; //!< Number of hash table entries.
|
||||
u32 num_literals = 0; //!< Number of strings for this mode.
|
||||
u32 hashed_positions = 0; //!< Number of hashable string positions.
|
||||
};
|
||||
|
||||
struct LongLitInfo {
|
||||
@ -66,54 +77,120 @@ static
|
||||
LongLitInfo analyzeLongLits(const vector<ue2_case_string> &lits,
|
||||
size_t max_len) {
|
||||
LongLitInfo info;
|
||||
u32 hashedPositionsCase = 0;
|
||||
u32 hashedPositionsNocase = 0;
|
||||
|
||||
// Caseful boundary is the index of the first nocase literal, as we're
|
||||
// ordered (caseful, nocase).
|
||||
auto first_nocase = find_if(begin(lits), end(lits),
|
||||
[](const ue2_case_string &lit) { return lit.nocase; });
|
||||
info.caseful.boundary = verify_u32(distance(lits.begin(), first_nocase));
|
||||
|
||||
// Nocase boundary is the size of the literal set.
|
||||
info.nocase.boundary = verify_u32(lits.size());
|
||||
|
||||
for (const auto &lit : lits) {
|
||||
if (lit.nocase) {
|
||||
hashedPositionsNocase += lit.s.size() - max_len;
|
||||
info.nocase.positions += lit.s.size();
|
||||
} else {
|
||||
hashedPositionsCase += lit.s.size() - max_len;
|
||||
info.caseful.positions += lit.s.size();
|
||||
}
|
||||
auto &lit_info = lit.nocase ? info.nocase : info.caseful;
|
||||
assert(lit.s.size() > max_len);
|
||||
lit_info.num_literals++;
|
||||
lit_info.hashed_positions += lit.s.size() - max_len;
|
||||
}
|
||||
|
||||
info.caseful.hashEntries = hashedPositionsCase
|
||||
? roundUpToPowerOfTwo(max(MIN_HASH_TABLE_SIZE, hashedPositionsCase))
|
||||
: 0;
|
||||
info.nocase.hashEntries = hashedPositionsNocase
|
||||
? roundUpToPowerOfTwo(max(MIN_HASH_TABLE_SIZE, hashedPositionsNocase))
|
||||
: 0;
|
||||
|
||||
DEBUG_PRINTF("caseful: boundary=%u, positions=%u, hashedPositions=%u, "
|
||||
"hashEntries=%u\n",
|
||||
info.caseful.boundary, info.caseful.positions,
|
||||
hashedPositionsCase, info.caseful.hashEntries);
|
||||
DEBUG_PRINTF("nocase: boundary=%u, positions=%u, hashedPositions=%u, "
|
||||
"hashEntries=%u\n",
|
||||
info.nocase.boundary, info.nocase.positions,
|
||||
hashedPositionsNocase, info.nocase.hashEntries);
|
||||
DEBUG_PRINTF("case: hashed %u positions\n", info.caseful.hashed_positions);
|
||||
DEBUG_PRINTF("nocase: hashed %u positions\n", info.nocase.hashed_positions);
|
||||
|
||||
return info;
|
||||
}
|
||||
|
||||
static
|
||||
void fillHashes(const vector<ue2_case_string> &lits, size_t max_len,
|
||||
RoseLongLitHashEntry *tab, size_t numEntries, bool nocase,
|
||||
const map<u32, u32> &litToOffsetVal) {
|
||||
const u32 nbits = lg2(numEntries);
|
||||
map<u32, deque<pair<u32, u32>>> bucketToLitOffPairs;
|
||||
map<u32, u64a> bucketToBitfield;
|
||||
void addToBloomFilter(vector<u8> &bloom, const u8 *substr, bool nocase) {
|
||||
const u32 num_keys = verify_u32(bloom.size() * 8);
|
||||
const u32 key_mask = (1U << lg2(num_keys)) -1;
|
||||
|
||||
const auto hash_functions = { bloomHash_1, bloomHash_2, bloomHash_3 };
|
||||
for (const auto &hash_func : hash_functions) {
|
||||
u32 hash = hash_func(substr, nocase);
|
||||
u32 key = hash & key_mask;
|
||||
DEBUG_PRINTF("set key %u (of %zu)\n", key, bloom.size() * 8);
|
||||
bloom[key / 8] |= 1U << (key % 8);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
size_t bloomOccupancy(const vector<u8> &bloom) {
|
||||
return accumulate(begin(bloom), end(bloom), 0,
|
||||
[](const size_t &sum, const u8 &elem) {
|
||||
return sum + popcount32(elem);
|
||||
});
|
||||
}
|
||||
|
||||
static
|
||||
double bloomLoad(const vector<u8> &bloom) {
|
||||
return (double)bloomOccupancy(bloom) / (double)(bloom.size() * 8);
|
||||
}
|
||||
|
||||
static
|
||||
vector<u8> buildBloomFilter(const vector<ue2_case_string> &lits, size_t max_len,
|
||||
size_t num_entries, bool nocase) {
|
||||
assert(num_entries % 8 == 0);
|
||||
assert((num_entries & (num_entries - 1)) == 0); // Must be power of two.
|
||||
|
||||
vector<u8> bloom(num_entries / 8, 0);
|
||||
|
||||
if (!num_entries) {
|
||||
return bloom;
|
||||
}
|
||||
|
||||
for (const auto &lit : lits) {
|
||||
if (nocase != lit.nocase) {
|
||||
continue;
|
||||
}
|
||||
for (u32 offset = 1; offset < lit.s.size() - max_len + 1; offset++) {
|
||||
const u8 *substr = (const u8 *)lit.s.c_str() + offset;
|
||||
addToBloomFilter(bloom, substr, nocase);
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("%s bloom filter occupancy %zu of %zu entries\n",
|
||||
nocase ? "nocase" : "caseful", bloomOccupancy(bloom),
|
||||
num_entries);
|
||||
|
||||
return bloom;
|
||||
}
|
||||
|
||||
|
||||
static
|
||||
vector<u8> makeBloomFilter(const vector<ue2_case_string> &lits,
|
||||
size_t max_len, bool nocase) {
|
||||
vector<u8> bloom;
|
||||
|
||||
size_t num_entries = MIN_BLOOM_FILTER_SIZE;
|
||||
for (;;) {
|
||||
bloom = buildBloomFilter(lits, max_len, num_entries, nocase);
|
||||
DEBUG_PRINTF("built %s bloom for %zu entries: load %f\n",
|
||||
nocase ? "nocase" : "caseful", num_entries,
|
||||
bloomLoad(bloom));
|
||||
if (bloomLoad(bloom) < MAX_BLOOM_FILTER_LOAD) {
|
||||
break;
|
||||
}
|
||||
num_entries *= 2;
|
||||
}
|
||||
return bloom;
|
||||
}
|
||||
|
||||
static
|
||||
size_t hashTableOccupancy(const vector<RoseLongLitHashEntry> &tab) {
|
||||
return count_if(begin(tab), end(tab), [](const RoseLongLitHashEntry &ent) {
|
||||
return ent.str_offset != 0;
|
||||
});
|
||||
}
|
||||
|
||||
static
|
||||
double hashTableLoad(const vector<RoseLongLitHashEntry> &tab) {
|
||||
return (double)hashTableOccupancy(tab) / (double)(tab.size());
|
||||
}
|
||||
|
||||
static
|
||||
vector<RoseLongLitHashEntry> buildHashTable(const vector<ue2_case_string> &lits,
|
||||
size_t max_len,
|
||||
const vector<u32> &litToOffsetVal,
|
||||
size_t numEntries, bool nocase) {
|
||||
vector<RoseLongLitHashEntry> tab(numEntries, {0,0});
|
||||
|
||||
if (!numEntries) {
|
||||
return tab;
|
||||
}
|
||||
|
||||
map<u32, vector<pair<u32, u32>>> hashToLitOffPairs;
|
||||
|
||||
for (u32 lit_id = 0; lit_id < lits.size(); lit_id++) {
|
||||
const ue2_case_string &lit = lits[lit_id];
|
||||
@ -122,37 +199,41 @@ void fillHashes(const vector<ue2_case_string> &lits, size_t max_len,
|
||||
}
|
||||
for (u32 offset = 1; offset < lit.s.size() - max_len + 1; offset++) {
|
||||
const u8 *substr = (const u8 *)lit.s.c_str() + offset;
|
||||
u32 h = hashLongLiteral(substr, max_len, lit.nocase);
|
||||
u32 h_ent = h & ((1U << nbits) - 1);
|
||||
u32 h_low = (h >> nbits) & 63;
|
||||
bucketToLitOffPairs[h_ent].emplace_back(lit_id, offset);
|
||||
bucketToBitfield[h_ent] |= (1ULL << h_low);
|
||||
u32 hash = hashLongLiteral(substr, max_len, lit.nocase);
|
||||
hashToLitOffPairs[hash].emplace_back(lit_id, offset);
|
||||
}
|
||||
}
|
||||
|
||||
// this used to be a set<u32>, but a bitset is much much faster given that
|
||||
// we're using it only for membership testing.
|
||||
boost::dynamic_bitset<> filledBuckets(numEntries); // all zero by default.
|
||||
for (auto &m : hashToLitOffPairs) {
|
||||
u32 hash = m.first;
|
||||
vector<pair<u32, u32>> &d = m.second;
|
||||
|
||||
// sweep out bitfield entries and save the results swapped accordingly
|
||||
// also, anything with bitfield entries is put in filledBuckets
|
||||
for (const auto &m : bucketToBitfield) {
|
||||
const u32 &bucket = m.first;
|
||||
const u64a &contents = m.second;
|
||||
tab[bucket].bitfield = contents;
|
||||
filledBuckets.set(bucket);
|
||||
}
|
||||
// Sort by (offset, string) so that we'll be able to remove identical
|
||||
// string prefixes.
|
||||
stable_sort(begin(d), end(d),
|
||||
[&](const pair<u32, u32> &a, const pair<u32, u32> &b) {
|
||||
const auto &str_a = lits[a.first].s;
|
||||
const auto &str_b = lits[b.first].s;
|
||||
return tie(a.second, str_a) < tie(b.second, str_b);
|
||||
});
|
||||
|
||||
// store out all our chains based on free values in our hash table.
|
||||
// find nearest free locations that are empty (there will always be more
|
||||
// entries than strings, at present)
|
||||
for (auto &m : bucketToLitOffPairs) {
|
||||
u32 bucket = m.first;
|
||||
deque<pair<u32, u32>> &d = m.second;
|
||||
// Remove entries that point to the same literal prefix.
|
||||
d.erase(unique(begin(d), end(d),
|
||||
[&](const pair<u32, u32> &a, const pair<u32, u32> &b) {
|
||||
if (a.second != b.second) {
|
||||
return false;
|
||||
}
|
||||
const auto &str_a = lits[a.first].s;
|
||||
const auto &str_b = lits[b.first].s;
|
||||
const size_t len = max_len + a.second;
|
||||
return equal(begin(str_a), begin(str_a) + len,
|
||||
begin(str_b));
|
||||
}),
|
||||
end(d));
|
||||
|
||||
// sort d by distance of the residual string (len minus our depth into
|
||||
// the string). We need to put the 'furthest back' string first...
|
||||
stable_sort(d.begin(), d.end(),
|
||||
// Sort d by distance of the residual string (len minus our depth into
|
||||
// the string). We need to put the 'furthest back' string first.
|
||||
stable_sort(begin(d), end(d),
|
||||
[](const pair<u32, u32> &a, const pair<u32, u32> &b) {
|
||||
if (a.second != b.second) {
|
||||
return a.second > b.second; /* longest is first */
|
||||
@ -160,47 +241,79 @@ void fillHashes(const vector<ue2_case_string> &lits, size_t max_len,
|
||||
return a.first < b.first;
|
||||
});
|
||||
|
||||
while (1) {
|
||||
// first time through is always at bucket, then we fill in links
|
||||
filledBuckets.set(bucket);
|
||||
RoseLongLitHashEntry *ent = &tab[bucket];
|
||||
u32 lit_id = d.front().first;
|
||||
u32 offset = d.front().second;
|
||||
u32 bucket = hash % numEntries;
|
||||
|
||||
ent->state = verify_u32(litToOffsetVal.at(lit_id) +
|
||||
offset + max_len);
|
||||
ent->link = (u32)LINK_INVALID;
|
||||
|
||||
d.pop_front();
|
||||
if (d.empty()) {
|
||||
break;
|
||||
}
|
||||
// now, if there is another value
|
||||
// find a bucket for it and put in 'bucket' and repeat
|
||||
// all we really need to do is find something not in filledBuckets,
|
||||
// ideally something close to bucket
|
||||
// we search backward and forward from bucket, trying to stay as
|
||||
// close as possible.
|
||||
UNUSED bool found = false;
|
||||
int bucket_candidate = 0;
|
||||
for (u32 k = 1; k < numEntries * 2; k++) {
|
||||
bucket_candidate = bucket + (((k & 1) == 0)
|
||||
? (-(int)k / 2) : (k / 2));
|
||||
if (bucket_candidate < 0 ||
|
||||
(size_t)bucket_candidate >= numEntries) {
|
||||
continue;
|
||||
}
|
||||
if (!filledBuckets.test(bucket_candidate)) {
|
||||
found = true;
|
||||
break;
|
||||
// Placement via linear probing.
|
||||
for (const auto &lit_offset : d) {
|
||||
while (tab[bucket].str_offset != 0) {
|
||||
bucket++;
|
||||
if (bucket == numEntries) {
|
||||
bucket = 0;
|
||||
}
|
||||
}
|
||||
|
||||
assert(found);
|
||||
bucket = bucket_candidate;
|
||||
ent->link = bucket;
|
||||
u32 lit_id = lit_offset.first;
|
||||
u32 offset = lit_offset.second;
|
||||
|
||||
DEBUG_PRINTF("hash 0x%08x lit_id %u offset %u bucket %u\n", hash,
|
||||
lit_id, offset, bucket);
|
||||
|
||||
auto &entry = tab[bucket];
|
||||
entry.str_offset = verify_u32(litToOffsetVal.at(lit_id));
|
||||
assert(entry.str_offset != 0);
|
||||
entry.str_len = offset + max_len;
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("%s hash table occupancy %zu of %zu entries\n",
|
||||
nocase ? "nocase" : "caseful", hashTableOccupancy(tab),
|
||||
numEntries);
|
||||
|
||||
return tab;
|
||||
}
|
||||
|
||||
static
|
||||
vector<RoseLongLitHashEntry> makeHashTable(const vector<ue2_case_string> &lits,
|
||||
size_t max_len,
|
||||
const vector<u32> &litToOffsetVal,
|
||||
u32 numPositions, bool nocase) {
|
||||
vector<RoseLongLitHashEntry> tab;
|
||||
|
||||
// Note: for the hash table, we must always have at least enough entries
|
||||
// for the number of hashable positions.
|
||||
size_t num_entries = roundUpToPowerOfTwo(max(MIN_HASH_TABLE_SIZE,
|
||||
numPositions));
|
||||
|
||||
for (;;) {
|
||||
tab = buildHashTable(lits, max_len, litToOffsetVal, num_entries,
|
||||
nocase);
|
||||
DEBUG_PRINTF("built %s hash table for %zu entries: load %f\n",
|
||||
nocase ? "nocase" : "caseful", num_entries,
|
||||
hashTableLoad(tab));
|
||||
if (hashTableLoad(tab) < MAX_HASH_TABLE_LOAD) {
|
||||
break;
|
||||
}
|
||||
num_entries *= 2;
|
||||
}
|
||||
return tab;
|
||||
}
|
||||
|
||||
static
|
||||
vector<u8> buildLits(const vector<ue2_case_string> &lits, u32 baseOffset,
|
||||
vector<u32> &litToOffsetVal) {
|
||||
vector<u8> blob;
|
||||
litToOffsetVal.resize(lits.size(), 0);
|
||||
|
||||
u32 lit_id = 0;
|
||||
for (const auto &lit : lits) {
|
||||
u32 offset = baseOffset + verify_u32(blob.size());
|
||||
blob.insert(blob.end(), begin(lit.s), end(lit.s));
|
||||
litToOffsetVal[lit_id] = offset;
|
||||
lit_id++;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("built %zu bytes of strings\n", blob.size());
|
||||
return blob;
|
||||
}
|
||||
|
||||
u32 buildLongLiteralTable(const RoseBuildImpl &build, RoseEngineBlob &blob,
|
||||
@ -251,89 +364,69 @@ u32 buildLongLiteralTable(const RoseBuildImpl &build, RoseEngineBlob &blob,
|
||||
|
||||
LongLitInfo info = analyzeLongLits(lits, max_len);
|
||||
|
||||
// first assess the size and find our caseless threshold
|
||||
size_t headerSize = ROUNDUP_16(sizeof(RoseLongLitTable));
|
||||
vector<u32> litToOffsetVal;
|
||||
const size_t headerSize = ROUNDUP_16(sizeof(RoseLongLitTable));
|
||||
vector<u8> lit_blob = buildLits(lits, headerSize, litToOffsetVal);
|
||||
|
||||
size_t litTabOffset = headerSize;
|
||||
// Build caseful bloom filter and hash table.
|
||||
vector<u8> bloom_case;
|
||||
vector<RoseLongLitHashEntry> tab_case;
|
||||
if (info.caseful.num_literals) {
|
||||
bloom_case = makeBloomFilter(lits, max_len, false);
|
||||
tab_case = makeHashTable(lits, max_len, litToOffsetVal,
|
||||
info.caseful.hashed_positions, false);
|
||||
}
|
||||
|
||||
size_t litTabNumEntries = lits.size() + 1;
|
||||
size_t litTabSize = ROUNDUP_16(litTabNumEntries * sizeof(RoseLongLiteral));
|
||||
// Build nocase bloom filter and hash table.
|
||||
vector<u8> bloom_nocase;
|
||||
vector<RoseLongLitHashEntry> tab_nocase;
|
||||
if (info.nocase.num_literals) {
|
||||
bloom_nocase = makeBloomFilter(lits, max_len, true);
|
||||
tab_nocase = makeHashTable(lits, max_len, litToOffsetVal,
|
||||
info.nocase.hashed_positions, true);
|
||||
}
|
||||
|
||||
size_t wholeLitTabOffset = litTabOffset + litTabSize;
|
||||
size_t totalWholeLitTabSize =
|
||||
ROUNDUP_16(info.caseful.positions + info.nocase.positions);
|
||||
size_t wholeLitTabSize = ROUNDUP_16(byte_length(lit_blob));
|
||||
size_t htOffsetCase = headerSize + wholeLitTabSize;
|
||||
size_t htOffsetNocase = htOffsetCase + byte_length(tab_case);
|
||||
size_t bloomOffsetCase = htOffsetNocase + byte_length(tab_nocase);
|
||||
size_t bloomOffsetNocase = bloomOffsetCase + byte_length(bloom_case);
|
||||
|
||||
size_t htOffsetCase = wholeLitTabOffset + totalWholeLitTabSize;
|
||||
size_t htSizeCase = info.caseful.hashEntries * sizeof(RoseLongLitHashEntry);
|
||||
size_t htOffsetNocase = htOffsetCase + htSizeCase;
|
||||
size_t htSizeNocase =
|
||||
info.nocase.hashEntries * sizeof(RoseLongLitHashEntry);
|
||||
|
||||
size_t tabSize = ROUNDUP_16(htOffsetNocase + htSizeNocase);
|
||||
size_t tabSize = ROUNDUP_16(bloomOffsetNocase + byte_length(bloom_nocase));
|
||||
|
||||
// need to add +2 to both of these to allow space for the actual largest
|
||||
// value as well as handling the fact that we add one to the space when
|
||||
// storing out a position to allow zero to mean "no stream state value"
|
||||
u8 streamBitsCase = lg2(roundUpToPowerOfTwo(info.caseful.positions + 2));
|
||||
u8 streamBitsNocase = lg2(roundUpToPowerOfTwo(info.nocase.positions + 2));
|
||||
u8 streamBitsCase = lg2(roundUpToPowerOfTwo(tab_case.size() + 2));
|
||||
u8 streamBitsNocase = lg2(roundUpToPowerOfTwo(tab_nocase.size() + 2));
|
||||
u32 tot_state_bytes = ROUNDUP_N(streamBitsCase + streamBitsNocase, 8) / 8;
|
||||
|
||||
auto table = aligned_zmalloc_unique<char>(tabSize);
|
||||
assert(table); // otherwise would have thrown std::bad_alloc
|
||||
|
||||
// then fill it in
|
||||
char *ptr = table.get();
|
||||
RoseLongLitTable *header = (RoseLongLitTable *)ptr;
|
||||
// fill in header
|
||||
// Fill in the RoseLongLitTable header structure.
|
||||
RoseLongLitTable *header = (RoseLongLitTable *)(table.get());
|
||||
header->size = verify_u32(tabSize);
|
||||
header->maxLen = verify_u8(max_len); // u8 so doesn't matter; won't go > 255
|
||||
header->boundaryCase = info.caseful.boundary;
|
||||
header->hashOffsetCase = verify_u32(htOffsetCase);
|
||||
header->hashNBitsCase = lg2(info.caseful.hashEntries);
|
||||
header->streamStateBitsCase = streamBitsCase;
|
||||
header->boundaryNocase = info.nocase.boundary;
|
||||
header->hashOffsetNocase = verify_u32(htOffsetNocase);
|
||||
header->hashNBitsNocase = lg2(info.nocase.hashEntries);
|
||||
header->streamStateBitsNocase = streamBitsNocase;
|
||||
header->caseful.hashOffset = verify_u32(htOffsetCase);
|
||||
header->caseful.hashBits = lg2(tab_case.size());
|
||||
header->caseful.streamStateBits = streamBitsCase;
|
||||
header->caseful.bloomOffset = verify_u32(bloomOffsetCase);
|
||||
header->caseful.bloomBits = lg2(bloom_case.size() * 8);
|
||||
header->nocase.hashOffset = verify_u32(htOffsetNocase);
|
||||
header->nocase.hashBits = lg2(tab_nocase.size());
|
||||
header->nocase.streamStateBits = streamBitsNocase;
|
||||
header->nocase.bloomOffset = verify_u32(bloomOffsetNocase);
|
||||
header->nocase.bloomBits = lg2(bloom_nocase.size() * 8);
|
||||
assert(tot_state_bytes < sizeof(u64a));
|
||||
header->streamStateBytes = verify_u8(tot_state_bytes); // u8
|
||||
|
||||
ptr += headerSize;
|
||||
|
||||
// now fill in the rest
|
||||
|
||||
RoseLongLiteral *litTabPtr = (RoseLongLiteral *)ptr;
|
||||
ptr += litTabSize;
|
||||
|
||||
map<u32, u32> litToOffsetVal;
|
||||
for (auto i = lits.begin(), e = lits.end(); i != e; ++i) {
|
||||
u32 entry = verify_u32(i - lits.begin());
|
||||
u32 offset = verify_u32(ptr - table.get());
|
||||
|
||||
// point the table entry to the string location
|
||||
litTabPtr[entry].offset = offset;
|
||||
|
||||
litToOffsetVal[entry] = offset;
|
||||
|
||||
// copy the string into the string location
|
||||
const auto &s = i->s;
|
||||
memcpy(ptr, s.c_str(), s.size());
|
||||
|
||||
ptr += s.size(); // and the string location
|
||||
}
|
||||
|
||||
// fill in final lit table entry with current ptr (serves as end value)
|
||||
litTabPtr[lits.size()].offset = verify_u32(ptr - table.get());
|
||||
|
||||
// fill hash tables
|
||||
ptr = table.get() + htOffsetCase;
|
||||
fillHashes(lits, max_len, (RoseLongLitHashEntry *)ptr,
|
||||
info.caseful.hashEntries, false, litToOffsetVal);
|
||||
ptr += htSizeCase;
|
||||
fillHashes(lits, max_len, (RoseLongLitHashEntry *)ptr,
|
||||
info.nocase.hashEntries, true, litToOffsetVal);
|
||||
ptr += htSizeNocase;
|
||||
|
||||
assert(ptr <= table.get() + tabSize);
|
||||
// Copy in the literal strings, hash tables and bloom filters,
|
||||
copy_bytes(table.get() + headerSize, lit_blob);
|
||||
copy_bytes(table.get() + htOffsetCase, tab_case);
|
||||
copy_bytes(table.get() + bloomOffsetCase, bloom_case);
|
||||
copy_bytes(table.get() + htOffsetNocase, tab_nocase);
|
||||
copy_bytes(table.get() + bloomOffsetNocase, bloom_nocase);
|
||||
|
||||
DEBUG_PRINTF("built streaming table, size=%zu\n", tabSize);
|
||||
DEBUG_PRINTF("requires %zu bytes of history\n", max_len);
|
||||
|
@ -49,9 +49,10 @@
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <map>
|
||||
#include <numeric>
|
||||
#include <ostream>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#ifndef DUMP_SUPPORT
|
||||
@ -1049,6 +1050,39 @@ void dumpAnchoredStats(const void *atable, FILE *f) {
|
||||
|
||||
}
|
||||
|
||||
static
|
||||
void dumpLongLiteralSubtable(const RoseLongLitTable *ll_table,
|
||||
const RoseLongLitSubtable *ll_sub, FILE *f) {
|
||||
if (!ll_sub->hashBits) {
|
||||
fprintf(f, " <no table>\n");
|
||||
return;
|
||||
}
|
||||
|
||||
const char *base = (const char *)ll_table;
|
||||
|
||||
u32 nbits = ll_sub->hashBits;
|
||||
u32 num_entries = 1U << nbits;
|
||||
const auto *tab = (const RoseLongLitHashEntry *)(base + ll_sub->hashOffset);
|
||||
u32 hash_occ =
|
||||
count_if(tab, tab + num_entries, [](const RoseLongLitHashEntry &ent) {
|
||||
return ent.str_offset != 0;
|
||||
});
|
||||
float hash_occ_percent = ((float)hash_occ / (float)num_entries) * 100;
|
||||
|
||||
fprintf(f, " hash table : %u bits, occupancy %u/%u (%0.1f%%)\n",
|
||||
nbits, hash_occ, num_entries, hash_occ_percent);
|
||||
|
||||
u32 bloom_bits = ll_sub->bloomBits;
|
||||
u32 bloom_size = 1U << bloom_bits;
|
||||
const u8 *bloom = (const u8 *)base + ll_sub->bloomOffset;
|
||||
u32 bloom_occ = accumulate(bloom, bloom + bloom_size / 8, 0,
|
||||
[](const u32 &sum, const u8 &elem) { return sum + popcount32(elem); });
|
||||
float bloom_occ_percent = ((float)bloom_occ / (float)(bloom_size)) * 100;
|
||||
|
||||
fprintf(f, " bloom filter : %u bits, occupancy %u/%u (%0.1f%%)\n",
|
||||
bloom_bits, bloom_occ, bloom_size, bloom_occ_percent);
|
||||
}
|
||||
|
||||
static
|
||||
void dumpLongLiteralTable(const RoseEngine *t, FILE *f) {
|
||||
if (!t->longLitTableOffset) {
|
||||
@ -1062,17 +1096,15 @@ void dumpLongLiteralTable(const RoseEngine *t, FILE *f) {
|
||||
(const struct RoseLongLitTable *)loadFromByteCodeOffset(
|
||||
t, t->longLitTableOffset);
|
||||
|
||||
u32 num_caseful = ll_table->boundaryCase;
|
||||
u32 num_caseless = ll_table->boundaryNocase - num_caseful;
|
||||
fprintf(f, " total size : %u bytes\n", ll_table->size);
|
||||
fprintf(f, " longest len : %u\n", ll_table->maxLen);
|
||||
fprintf(f, " stream state : %u bytes\n", ll_table->streamStateBytes);
|
||||
|
||||
fprintf(f, " longest len: %u\n", ll_table->maxLen);
|
||||
fprintf(f, " counts: %u caseful, %u caseless\n", num_caseful,
|
||||
num_caseless);
|
||||
fprintf(f, " hash bits: %u caseful, %u caseless\n",
|
||||
ll_table->hashNBitsCase, ll_table->hashNBitsNocase);
|
||||
fprintf(f, " state bits: %u caseful, %u caseless\n",
|
||||
ll_table->streamStateBitsCase, ll_table->streamStateBitsNocase);
|
||||
fprintf(f, " stream state: %u bytes\n", ll_table->streamStateBytes);
|
||||
fprintf(f, " caseful:\n");
|
||||
dumpLongLiteralSubtable(ll_table, &ll_table->caseful, f);
|
||||
|
||||
fprintf(f, " nocase:\n");
|
||||
dumpLongLiteralSubtable(ll_table, &ll_table->nocase, f);
|
||||
}
|
||||
|
||||
// Externally accessible functions
|
||||
|
@ -446,51 +446,49 @@ struct ALIGN_CL_DIRECTIVE anchored_matcher_info {
|
||||
u32 anchoredMinDistance; /* start of region to run anchored table over */
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Long literal subtable for a particular mode (caseful or nocase).
|
||||
*/
|
||||
struct RoseLongLitSubtable {
|
||||
/**
|
||||
* \brief Offset of the hash table (relative to RoseLongLitTable base).
|
||||
*
|
||||
* Offset is zero if no such table exists.
|
||||
*/
|
||||
u32 hashOffset;
|
||||
|
||||
/**
|
||||
* \brief Offset of the bloom filter (relative to RoseLongLitTable base).
|
||||
*
|
||||
* Offset is zero if no such table exists.
|
||||
*/
|
||||
u32 bloomOffset;
|
||||
|
||||
/** \brief lg2 of the size of the hash table. */
|
||||
u8 hashBits;
|
||||
|
||||
/** \brief Size of the bloom filter in bits. */
|
||||
u8 bloomBits;
|
||||
|
||||
/** \brief Number of bits of packed stream state used. */
|
||||
u8 streamStateBits;
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Long literal table header.
|
||||
*/
|
||||
struct RoseLongLitTable {
|
||||
/** \brief String ID one beyond the maximum entry for caseful literals. */
|
||||
u32 boundaryCase;
|
||||
|
||||
/**
|
||||
* \brief String ID one beyond the maximum entry for caseless literals.
|
||||
* This is also the total size of the literal table.
|
||||
* \brief Total size of the whole table (including strings, bloom filters,
|
||||
* hash tables).
|
||||
*/
|
||||
u32 boundaryNocase;
|
||||
u32 size;
|
||||
|
||||
/**
|
||||
* \brief Offset of the caseful hash table (relative to RoseLongLitTable
|
||||
* base).
|
||||
*
|
||||
* Offset is zero if no such table exists.
|
||||
*/
|
||||
u32 hashOffsetCase;
|
||||
/** \brief Caseful sub-table (hash table and bloom filter). */
|
||||
struct RoseLongLitSubtable caseful;
|
||||
|
||||
/**
|
||||
* \brief Offset of the caseless hash table (relative to RoseLongLitTable
|
||||
* base).
|
||||
*
|
||||
* Offset is zero if no such table exists.
|
||||
*/
|
||||
u32 hashOffsetNocase;
|
||||
|
||||
/** \brief lg2 of the size of the caseful hash table. */
|
||||
u32 hashNBitsCase;
|
||||
|
||||
/** \brief lg2 of the size of the caseless hash table. */
|
||||
u32 hashNBitsNocase;
|
||||
|
||||
/**
|
||||
* \brief Number of bits of packed stream state for the caseful hash table.
|
||||
*/
|
||||
u8 streamStateBitsCase;
|
||||
|
||||
/**
|
||||
* \brief Number of bits of packed stream state for the caseless hash
|
||||
* table.
|
||||
*/
|
||||
u8 streamStateBitsNocase;
|
||||
/** \brief Caseless sub-table (hash table and bloom filter). */
|
||||
struct RoseLongLitSubtable nocase;
|
||||
|
||||
/** \brief Total size of packed stream state in bytes. */
|
||||
u8 streamStateBytes;
|
||||
@ -499,39 +497,19 @@ struct RoseLongLitTable {
|
||||
u8 maxLen;
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief One of these structures per literal entry in our long literal table.
|
||||
*/
|
||||
struct RoseLongLiteral {
|
||||
/**
|
||||
* \brief Offset of the literal string itself, relative to
|
||||
* RoseLongLitTable base.
|
||||
*/
|
||||
u32 offset;
|
||||
};
|
||||
|
||||
/** \brief "No further links" value used for \ref RoseLongLitHashEntry::link. */
|
||||
#define LINK_INVALID 0xffffffff
|
||||
|
||||
/**
|
||||
* \brief One of these structures per hash table entry in our long literal
|
||||
* table.
|
||||
*/
|
||||
struct RoseLongLitHashEntry {
|
||||
/**
|
||||
* \brief Bitfield used as a quick guard for hash buckets.
|
||||
*
|
||||
* For a given hash value N, the low six bits of N are taken and the
|
||||
* corresponding bit is switched on in this bitfield if this bucket is used
|
||||
* for that hash.
|
||||
* \brief Offset of the literal string itself, relative to
|
||||
* RoseLongLitTable base. Zero if this bucket is empty.
|
||||
*/
|
||||
u64a bitfield;
|
||||
u32 str_offset;
|
||||
|
||||
/** \brief Offset in the literal table for this string. */
|
||||
u32 state;
|
||||
|
||||
/** \brief Hash table index of next entry in the chain for this bucket. */
|
||||
u32 link;
|
||||
/** \brief Length of the literal string. */
|
||||
u32 str_len;
|
||||
};
|
||||
|
||||
static really_inline
|
||||
|
@ -551,6 +551,11 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) {
|
||||
tctxt->minMatchOffset = offset;
|
||||
tctxt->minNonMpvMatchOffset = offset;
|
||||
tctxt->next_mpv_offset = 0;
|
||||
tctxt->ll_buf = scratch->core_info.hbuf;
|
||||
tctxt->ll_len = scratch->core_info.hlen;
|
||||
tctxt->ll_buf_nocase = scratch->core_info.hbuf;
|
||||
tctxt->ll_len_nocase = scratch->core_info.hlen;
|
||||
|
||||
DEBUG_PRINTF("BEGIN: history len=%zu, buffer len=%zu groups=%016llx\n",
|
||||
scratch->core_info.hlen, scratch->core_info.len, tctxt->groups);
|
||||
|
||||
@ -590,18 +595,14 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) {
|
||||
MIN(t->floatingDistance, length + offset) - offset : 0;
|
||||
}
|
||||
|
||||
loadLongLiteralState(t, state, scratch);
|
||||
|
||||
size_t hlength = scratch->core_info.hlen;
|
||||
char rebuild = 0;
|
||||
|
||||
if (hlength) {
|
||||
// Can only have long literal state or rebuild if this is not the
|
||||
// first write to this stream.
|
||||
loadLongLiteralState(t, state, scratch);
|
||||
rebuild = (scratch->core_info.status & STATUS_DELAY_DIRTY) &&
|
||||
(t->maxFloatingDelayedMatch == ROSE_BOUND_INF ||
|
||||
offset < t->maxFloatingDelayedMatch);
|
||||
}
|
||||
|
||||
char rebuild = hlength &&
|
||||
(scratch->core_info.status & STATUS_DELAY_DIRTY) &&
|
||||
(t->maxFloatingDelayedMatch == ROSE_BOUND_INF ||
|
||||
offset < t->maxFloatingDelayedMatch);
|
||||
DEBUG_PRINTF("**rebuild %hhd status %hhu mfdm %u, offset %llu\n",
|
||||
rebuild, scratch->core_info.status,
|
||||
t->maxFloatingDelayedMatch, offset);
|
||||
|
@ -36,52 +36,12 @@
|
||||
#include "util/copybytes.h"
|
||||
|
||||
static really_inline
|
||||
const struct RoseLongLiteral *
|
||||
getLitTab(const struct RoseLongLitTable *ll_table) {
|
||||
return (const struct RoseLongLiteral *)((const char *)ll_table +
|
||||
ROUNDUP_16(sizeof(struct RoseLongLitTable)));
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u32 get_start_lit_idx(const struct RoseLongLitTable *ll_table,
|
||||
const char nocase) {
|
||||
return nocase ? ll_table->boundaryCase : 0;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u32 get_end_lit_idx(const struct RoseLongLitTable *ll_table,
|
||||
const char nocase) {
|
||||
return nocase ? ll_table->boundaryNocase : ll_table->boundaryCase;
|
||||
}
|
||||
|
||||
// search for the literal index that contains the current state
|
||||
static rose_inline
|
||||
u32 findLitTabEntry(const struct RoseLongLitTable *ll_table,
|
||||
u32 stateValue, const char nocase) {
|
||||
const struct RoseLongLiteral *litTab = getLitTab(ll_table);
|
||||
u32 lo = get_start_lit_idx(ll_table, nocase);
|
||||
u32 hi = get_end_lit_idx(ll_table, nocase);
|
||||
|
||||
// Now move stateValue back by one so that we're looking for the
|
||||
// litTab entry that includes it the string, not the one 'one past' it
|
||||
stateValue -= 1;
|
||||
assert(lo != hi);
|
||||
assert(litTab[lo].offset <= stateValue);
|
||||
assert(litTab[hi].offset > stateValue);
|
||||
|
||||
// binary search to find the entry e such that:
|
||||
// litTab[e].offsetToLiteral <= stateValue < litTab[e+1].offsetToLiteral
|
||||
while (lo + 1 < hi) {
|
||||
u32 mid = (lo + hi) / 2;
|
||||
if (litTab[mid].offset <= stateValue) {
|
||||
lo = mid;
|
||||
} else { // (litTab[mid].offset > stateValue) {
|
||||
hi = mid;
|
||||
}
|
||||
}
|
||||
assert(litTab[lo].offset <= stateValue);
|
||||
assert(litTab[hi].offset > stateValue);
|
||||
return lo;
|
||||
const struct RoseLongLitHashEntry *
|
||||
getHashTableBase(const struct RoseLongLitTable *ll_table,
|
||||
const struct RoseLongLitSubtable *ll_sub) {
|
||||
assert(ll_sub->hashOffset);
|
||||
return (const struct RoseLongLitHashEntry *)((const char *)ll_table +
|
||||
ll_sub->hashOffset);
|
||||
}
|
||||
|
||||
// Reads from stream state and unpacks values into stream state table.
|
||||
@ -94,8 +54,8 @@ void loadLongLitStreamState(const struct RoseLongLitTable *ll_table,
|
||||
assert(state_case && state_nocase);
|
||||
|
||||
u8 ss_bytes = ll_table->streamStateBytes;
|
||||
u8 ssb = ll_table->streamStateBitsCase;
|
||||
UNUSED u8 ssb_nc = ll_table->streamStateBitsNocase;
|
||||
u8 ssb = ll_table->caseful.streamStateBits;
|
||||
UNUSED u8 ssb_nc = ll_table->nocase.streamStateBits;
|
||||
assert(ss_bytes == (ssb + ssb_nc + 7) / 8);
|
||||
|
||||
#if defined(ARCH_32_BIT)
|
||||
@ -116,40 +76,22 @@ void loadLongLitStreamState(const struct RoseLongLitTable *ll_table,
|
||||
*state_nocase = (u32)(streamVal >> ssb);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u32 getBaseOffsetOfLits(const struct RoseLongLitTable *ll_table,
|
||||
const char nocase) {
|
||||
u32 lit_idx = get_start_lit_idx(ll_table, nocase);
|
||||
return getLitTab(ll_table)[lit_idx].offset;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u32 unpackStateVal(const struct RoseLongLitTable *ll_table, const char nocase,
|
||||
u32 v) {
|
||||
return v + getBaseOffsetOfLits(ll_table, nocase) - 1;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u32 packStateVal(const struct RoseLongLitTable *ll_table, const char nocase,
|
||||
u32 v) {
|
||||
return v - getBaseOffsetOfLits(ll_table, nocase) + 1;
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
void loadLongLiteralStateMode(struct hs_scratch *scratch,
|
||||
const struct RoseLongLitTable *ll_table,
|
||||
const struct RoseLongLiteral *litTab,
|
||||
const struct RoseLongLitSubtable *ll_sub,
|
||||
const u32 state, const char nocase) {
|
||||
if (!state) {
|
||||
DEBUG_PRINTF("no state for %s\n", nocase ? "caseless" : "caseful");
|
||||
return;
|
||||
}
|
||||
|
||||
u32 stateValue = unpackStateVal(ll_table, nocase, state);
|
||||
u32 idx = findLitTabEntry(ll_table, stateValue, nocase);
|
||||
size_t found_offset = litTab[idx].offset;
|
||||
const u8 *found_buf = found_offset + (const u8 *)ll_table;
|
||||
size_t found_sz = stateValue - found_offset;
|
||||
const struct RoseLongLitHashEntry *tab = getHashTableBase(ll_table, ll_sub);
|
||||
const struct RoseLongLitHashEntry *ent = tab + state - 1;
|
||||
|
||||
assert(ent->str_offset + ent->str_len <= ll_table->size);
|
||||
const u8 *found_buf = (const u8 *)ll_table + ent->str_offset;
|
||||
size_t found_sz = ent->str_len;
|
||||
|
||||
struct RoseContext *tctxt = &scratch->tctxt;
|
||||
if (nocase) {
|
||||
@ -168,34 +110,42 @@ void loadLongLiteralState(const struct RoseEngine *t, char *state,
|
||||
return;
|
||||
}
|
||||
|
||||
// If we don't have any long literals in play, these values must point to
|
||||
// the real history buffer so that CHECK_LITERAL instructions examine the
|
||||
// history buffer.
|
||||
scratch->tctxt.ll_buf = scratch->core_info.hbuf;
|
||||
scratch->tctxt.ll_len = scratch->core_info.hlen;
|
||||
scratch->tctxt.ll_buf_nocase = scratch->core_info.hbuf;
|
||||
scratch->tctxt.ll_len_nocase = scratch->core_info.hlen;
|
||||
|
||||
if (!scratch->core_info.hlen) {
|
||||
return;
|
||||
}
|
||||
|
||||
const struct RoseLongLitTable *ll_table =
|
||||
getByOffset(t, t->longLitTableOffset);
|
||||
const struct RoseLongLiteral *litTab = getLitTab(ll_table);
|
||||
const u8 *ll_state = getLongLitState(t, state);
|
||||
|
||||
u32 state_case;
|
||||
u32 state_nocase;
|
||||
loadLongLitStreamState(ll_table, ll_state, &state_case, &state_nocase);
|
||||
|
||||
loadLongLiteralStateMode(scratch, ll_table, litTab, state_case, 0);
|
||||
loadLongLiteralStateMode(scratch, ll_table, litTab, state_nocase, 1);
|
||||
DEBUG_PRINTF("loaded {%u, %u}\n", state_case, state_nocase);
|
||||
|
||||
loadLongLiteralStateMode(scratch, ll_table, &ll_table->caseful,
|
||||
state_case, 0);
|
||||
loadLongLiteralStateMode(scratch, ll_table, &ll_table->nocase,
|
||||
state_nocase, 1);
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
char confirmLongLiteral(const struct RoseLongLitTable *ll_table,
|
||||
const hs_scratch_t *scratch, u32 hashState,
|
||||
const struct hs_scratch *scratch,
|
||||
const struct RoseLongLitHashEntry *ent,
|
||||
const char nocase) {
|
||||
const struct RoseLongLiteral *litTab = getLitTab(ll_table);
|
||||
u32 idx = findLitTabEntry(ll_table, hashState, nocase);
|
||||
size_t found_offset = litTab[idx].offset;
|
||||
const u8 *s = found_offset + (const u8 *)ll_table;
|
||||
assert(hashState > found_offset);
|
||||
size_t len = hashState - found_offset;
|
||||
assert(ent->str_offset + ent->str_len <= ll_table->size);
|
||||
const u8 *s = (const u8 *)ll_table + ent->str_offset;
|
||||
size_t len = ent->str_len;
|
||||
const u8 *buf = scratch->core_info.buf;
|
||||
const size_t buf_len = scratch->core_info.len;
|
||||
|
||||
@ -225,14 +175,13 @@ char confirmLongLiteral(const struct RoseLongLitTable *ll_table,
|
||||
return 0;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("confirmed hashState=%u\n", hashState);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
void calcStreamingHash(const struct core_info *ci,
|
||||
const struct RoseLongLitTable *ll_table, u8 hash_len,
|
||||
u32 *hash_case, u32 *hash_nocase) {
|
||||
const u8 *prepScanBuffer(const struct core_info *ci,
|
||||
const struct RoseLongLitTable *ll_table, u8 *tempbuf) {
|
||||
const u8 hash_len = ll_table->maxLen;
|
||||
assert(hash_len >= LONG_LIT_HASH_LEN);
|
||||
|
||||
// Our hash function operates over LONG_LIT_HASH_LEN bytes, starting from
|
||||
@ -240,7 +189,6 @@ void calcStreamingHash(const struct core_info *ci,
|
||||
// entirely from either the current buffer or the history buffer, we pass
|
||||
// in the pointer directly; otherwise we must make a copy.
|
||||
|
||||
u8 tempbuf[LONG_LIT_HASH_LEN];
|
||||
const u8 *base;
|
||||
|
||||
if (hash_len > ci->len) {
|
||||
@ -266,71 +214,7 @@ void calcStreamingHash(const struct core_info *ci,
|
||||
base = ci->buf + ci->len - hash_len;
|
||||
}
|
||||
|
||||
if (ll_table->hashNBitsCase) {
|
||||
*hash_case = hashLongLiteral(base, LONG_LIT_HASH_LEN, 0);
|
||||
DEBUG_PRINTF("caseful hash %u\n", *hash_case);
|
||||
}
|
||||
if (ll_table->hashNBitsNocase) {
|
||||
*hash_nocase = hashLongLiteral(base, LONG_LIT_HASH_LEN, 1);
|
||||
DEBUG_PRINTF("caseless hash %u\n", *hash_nocase);
|
||||
}
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const struct RoseLongLitHashEntry *
|
||||
getHashTableBase(const struct RoseLongLitTable *ll_table, const char nocase) {
|
||||
const u32 hashOffset = nocase ? ll_table->hashOffsetNocase
|
||||
: ll_table->hashOffsetCase;
|
||||
return (const struct RoseLongLitHashEntry *)((const char *)ll_table +
|
||||
hashOffset);
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
const struct RoseLongLitHashEntry *
|
||||
getLongLitHashEnt(const struct RoseLongLitTable *ll_table, u32 h,
|
||||
const char nocase) {
|
||||
u32 nbits = nocase ? ll_table->hashNBitsNocase : ll_table->hashNBitsCase;
|
||||
if (!nbits) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
u32 h_ent = h & ((1 << nbits) - 1);
|
||||
u32 h_low = (h >> nbits) & 63;
|
||||
|
||||
const struct RoseLongLitHashEntry *tab = getHashTableBase(ll_table, nocase);
|
||||
const struct RoseLongLitHashEntry *ent = tab + h_ent;
|
||||
|
||||
if (!((ent->bitfield >> h_low) & 0x1)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return ent;
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
u32 storeLongLiteralStateMode(const struct hs_scratch *scratch,
|
||||
const struct RoseLongLitTable *ll_table,
|
||||
const struct RoseLongLitHashEntry *ent,
|
||||
const char nocase) {
|
||||
assert(ent);
|
||||
assert(nocase ? ll_table->hashNBitsNocase : ll_table->hashNBitsCase);
|
||||
|
||||
const struct RoseLongLitHashEntry *tab = getHashTableBase(ll_table, nocase);
|
||||
|
||||
u32 packed_state = 0;
|
||||
while (1) {
|
||||
if (confirmLongLiteral(ll_table, scratch, ent->state, nocase)) {
|
||||
packed_state = packStateVal(ll_table, nocase, ent->state);
|
||||
DEBUG_PRINTF("set %s state to %u\n", nocase ? "nocase" : "case",
|
||||
packed_state);
|
||||
break;
|
||||
}
|
||||
if (ent->link == LINK_INVALID) {
|
||||
break;
|
||||
}
|
||||
ent = tab + ent->link;
|
||||
}
|
||||
return packed_state;
|
||||
return base;
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
@ -359,8 +243,8 @@ void storeLongLitStreamState(const struct RoseLongLitTable *ll_table,
|
||||
assert(ll_state);
|
||||
|
||||
u8 ss_bytes = ll_table->streamStateBytes;
|
||||
u8 ssb = ll_table->streamStateBitsCase;
|
||||
UNUSED u8 ssb_nc = ll_table->streamStateBitsNocase;
|
||||
u8 ssb = ll_table->caseful.streamStateBits;
|
||||
UNUSED u8 ssb_nc = ll_table->nocase.streamStateBits;
|
||||
assert(ss_bytes == ROUNDUP_N(ssb + ssb_nc, 8) / 8);
|
||||
assert(!streamingTableOverflow(state_case, state_nocase, ssb, ssb_nc));
|
||||
|
||||
@ -380,6 +264,65 @@ void storeLongLitStreamState(const struct RoseLongLitTable *ll_table,
|
||||
partial_store_u64a(ll_state, stagingStreamState, ss_bytes);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
char has_bit(const u8 *data, u32 bit) {
|
||||
return (data[bit / 8] >> (bit % 8)) & 1;
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
char bloomHasKey(const u8 *bloom, u32 bloom_mask, u32 hash) {
|
||||
return has_bit(bloom, hash & bloom_mask);
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
char checkBloomFilter(const struct RoseLongLitTable *ll_table,
|
||||
const struct RoseLongLitSubtable *ll_sub,
|
||||
const u8 *scan_buf, char nocase) {
|
||||
assert(ll_sub->bloomBits);
|
||||
|
||||
const u8 *bloom = (const u8 *)ll_table + ll_sub->bloomOffset;
|
||||
const u32 bloom_mask = (1U << ll_sub->bloomBits) - 1;
|
||||
|
||||
char v = 1;
|
||||
v &= bloomHasKey(bloom, bloom_mask, bloomHash_1(scan_buf, nocase));
|
||||
v &= bloomHasKey(bloom, bloom_mask, bloomHash_2(scan_buf, nocase));
|
||||
v &= bloomHasKey(bloom, bloom_mask, bloomHash_3(scan_buf, nocase));
|
||||
return v;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Look for a hit in the hash table.
|
||||
*
|
||||
* Returns zero if not found, otherwise returns (bucket + 1).
|
||||
*/
|
||||
static rose_inline
|
||||
u32 checkHashTable(const struct RoseLongLitTable *ll_table,
|
||||
const struct RoseLongLitSubtable *ll_sub, const u8 *scan_buf,
|
||||
const struct hs_scratch *scratch, char nocase) {
|
||||
const u32 nbits = ll_sub->hashBits;
|
||||
assert(nbits && nbits < 32);
|
||||
const u32 num_entries = 1U << nbits;
|
||||
|
||||
const struct RoseLongLitHashEntry *tab = getHashTableBase(ll_table, ll_sub);
|
||||
|
||||
u32 hash = hashLongLiteral(scan_buf, LONG_LIT_HASH_LEN, nocase);
|
||||
u32 bucket = hash & ((1U << nbits) - 1);
|
||||
|
||||
while (tab[bucket].str_offset != 0) {
|
||||
DEBUG_PRINTF("checking bucket %u\n", bucket);
|
||||
if (confirmLongLiteral(ll_table, scratch, &tab[bucket], nocase)) {
|
||||
DEBUG_PRINTF("found hit for bucket %u\n", bucket);
|
||||
return bucket + 1;
|
||||
}
|
||||
|
||||
if (++bucket == num_entries) {
|
||||
bucket = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
void storeLongLiteralState(const struct RoseEngine *t, char *state,
|
||||
struct hs_scratch *scratch) {
|
||||
@ -401,28 +344,22 @@ void storeLongLiteralState(const struct RoseEngine *t, char *state,
|
||||
|
||||
// If we don't have enough history, we don't need to do anything.
|
||||
if (ll_table->maxLen <= ci->len + ci->hlen) {
|
||||
u32 hash_case = 0;
|
||||
u32 hash_nocase = 0;
|
||||
u8 tempbuf[LONG_LIT_HASH_LEN];
|
||||
const u8 *scan_buf = prepScanBuffer(ci, ll_table, tempbuf);
|
||||
|
||||
calcStreamingHash(ci, ll_table, ll_table->maxLen, &hash_case,
|
||||
&hash_nocase);
|
||||
|
||||
const struct RoseLongLitHashEntry *ent_case =
|
||||
getLongLitHashEnt(ll_table, hash_case, 0);
|
||||
const struct RoseLongLitHashEntry *ent_nocase =
|
||||
getLongLitHashEnt(ll_table, hash_nocase, 1);
|
||||
|
||||
DEBUG_PRINTF("ent_caseful=%p, ent_caseless=%p\n", ent_case, ent_nocase);
|
||||
|
||||
if (ent_case) {
|
||||
state_case = storeLongLiteralStateMode(scratch, ll_table,
|
||||
ent_case, 0);
|
||||
if (ll_table->caseful.hashBits &&
|
||||
checkBloomFilter(ll_table, &ll_table->caseful, scan_buf, 0)) {
|
||||
state_case = checkHashTable(ll_table, &ll_table->caseful, scan_buf,
|
||||
scratch, 0);
|
||||
}
|
||||
|
||||
if (ent_nocase) {
|
||||
state_nocase = storeLongLiteralStateMode(scratch, ll_table,
|
||||
ent_nocase, 1);
|
||||
if (ll_table->nocase.hashBits &&
|
||||
checkBloomFilter(ll_table, &ll_table->nocase, scan_buf, 1)) {
|
||||
state_nocase = checkHashTable(ll_table, &ll_table->nocase, scan_buf,
|
||||
scratch, 1);
|
||||
}
|
||||
} else {
|
||||
DEBUG_PRINTF("not enough history (%zu bytes)\n", ci->len + ci->hlen);
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("store {%u, %u}\n", state_case, state_nocase);
|
||||
|
@ -30,17 +30,18 @@
|
||||
#define STREAM_LONG_LIT_HASH_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/unaligned.h"
|
||||
|
||||
/** \brief Length of the buffer operated on by \ref hashLongLiteral(). */
|
||||
#define LONG_LIT_HASH_LEN 24
|
||||
|
||||
/** \brief Multiplier used by al the hash functions below. */
|
||||
#define HASH_MULTIPLIER 0x0b4e0ef37bc32127ULL
|
||||
|
||||
/** \brief Hash function used for long literal table in streaming mode. */
|
||||
static really_inline
|
||||
u32 hashLongLiteral(const u8 *ptr, UNUSED size_t len, char nocase) {
|
||||
const u64a CASEMASK = 0xdfdfdfdfdfdfdfdfULL;
|
||||
const u64a MULTIPLIER = 0x0b4e0ef37bc32127ULL;
|
||||
|
||||
// We unconditionally hash LONG_LIT_HASH_LEN bytes; all use cases of this
|
||||
// hash are for strings longer than this.
|
||||
assert(len >= 24);
|
||||
@ -49,17 +50,56 @@ u32 hashLongLiteral(const u8 *ptr, UNUSED size_t len, char nocase) {
|
||||
u64a v2 = unaligned_load_u64a(ptr + 8);
|
||||
u64a v3 = unaligned_load_u64a(ptr + 16);
|
||||
if (nocase) {
|
||||
v1 &= CASEMASK;
|
||||
v2 &= CASEMASK;
|
||||
v3 &= CASEMASK;
|
||||
v1 &= OCTO_CASE_CLEAR;
|
||||
v2 &= OCTO_CASE_CLEAR;
|
||||
v3 &= OCTO_CASE_CLEAR;
|
||||
}
|
||||
v1 *= MULTIPLIER;
|
||||
v2 *= MULTIPLIER * MULTIPLIER;
|
||||
v3 *= MULTIPLIER * MULTIPLIER * MULTIPLIER;
|
||||
v1 *= HASH_MULTIPLIER;
|
||||
v2 *= HASH_MULTIPLIER * HASH_MULTIPLIER;
|
||||
v3 *= HASH_MULTIPLIER * HASH_MULTIPLIER * HASH_MULTIPLIER;
|
||||
v1 >>= 32;
|
||||
v2 >>= 32;
|
||||
v3 >>= 32;
|
||||
return v1 ^ v2 ^ v3;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Internal, used by the bloom filter hash functions below. Hashes 16
|
||||
* bytes beginning at (ptr + offset).
|
||||
*/
|
||||
static really_inline
|
||||
u32 bloomHash_i(const u8 *ptr, u32 offset, u64a multiplier, char nocase) {
|
||||
assert(offset + 16 <= LONG_LIT_HASH_LEN);
|
||||
|
||||
u64a v = unaligned_load_u64a(ptr + offset);
|
||||
if (nocase) {
|
||||
v &= OCTO_CASE_CLEAR;
|
||||
}
|
||||
v *= multiplier;
|
||||
return v >> 32;
|
||||
}
|
||||
|
||||
/*
|
||||
* We ensure that we see every byte the first LONG_LIT_HASH_LEN bytes of input
|
||||
* data (using at least one of the following functions).
|
||||
*/
|
||||
|
||||
static really_inline
|
||||
u32 bloomHash_1(const u8 *ptr, char nocase) {
|
||||
const u64a multiplier = HASH_MULTIPLIER;
|
||||
return bloomHash_i(ptr, 0, multiplier, nocase);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u32 bloomHash_2(const u8 *ptr, char nocase) {
|
||||
const u64a multiplier = HASH_MULTIPLIER * HASH_MULTIPLIER;
|
||||
return bloomHash_i(ptr, 4, multiplier, nocase);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u32 bloomHash_3(const u8 *ptr, char nocase) {
|
||||
const u64a multiplier = HASH_MULTIPLIER * HASH_MULTIPLIER * HASH_MULTIPLIER;
|
||||
return bloomHash_i(ptr, 8, multiplier, nocase);
|
||||
}
|
||||
|
||||
#endif // STREAM_LONG_LIT_HASH_H
|
||||
|
@ -70,6 +70,7 @@
|
||||
#define CASE_BIT 0x20
|
||||
#define CASE_CLEAR 0xdf
|
||||
#define DOUBLE_CASE_CLEAR 0xdfdf
|
||||
#define OCTO_CASE_CLEAR 0xdfdfdfdfdfdfdfdfULL
|
||||
|
||||
static really_inline
|
||||
u32 clz32(u32 x) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user