fdr: move long literal handling into Rose

Move the hash table used for long literal support in streaming mode from
FDR to Rose, and introduce new instructions CHECK_LONG_LIT and
CHECK_LONG_LIT_NOCASE for doing literal confirm for long literals.

This simplifies FDR confirm, and guarantees that HWLM matchers will only
be used for literals < 256 bytes long.
This commit is contained in:
Justin Viiret
2016-09-07 15:59:23 +10:00
committed by Matthew Barr
parent 6ed30194ce
commit 68bf473e2e
40 changed files with 1759 additions and 1310 deletions

View File

@@ -0,0 +1,348 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "rose_build_long_lit.h"
#include "rose_build_engine_blob.h"
#include "rose_build_impl.h"
#include "stream_long_lit_hash.h"
#include "util/alloc.h"
#include "util/bitutils.h"
#include "util/verify_types.h"
#include "util/compile_context.h"
using namespace std;
namespace ue2 {
/** \brief Minimum size for a non-empty hash table. */
static constexpr u32 MIN_HASH_TABLE_SIZE = 4096;
struct LongLitModeInfo {
u32 boundary = 0; //!< One above the largest index for this mode.
u32 positions = 0; //!< Total number of string positions.
u32 hashEntries = 0; //!< Number of hash table entries.
};
struct LongLitInfo {
LongLitModeInfo caseful;
LongLitModeInfo nocase;
};
static
u32 roundUpToPowerOfTwo(u32 x) {
assert(x != 0);
u32 bits = lg2(x - 1) + 1;
assert(bits < 32);
return 1U << bits;
}
static
LongLitInfo analyzeLongLits(const vector<ue2_case_string> &lits,
size_t max_len) {
LongLitInfo info;
u32 hashedPositionsCase = 0;
u32 hashedPositionsNocase = 0;
// Caseful boundary is the index of the first nocase literal, as we're
// ordered (caseful, nocase).
auto first_nocase = find_if(begin(lits), end(lits),
[](const ue2_case_string &lit) { return lit.nocase; });
info.caseful.boundary = verify_u32(distance(lits.begin(), first_nocase));
// Nocase boundary is the size of the literal set.
info.nocase.boundary = verify_u32(lits.size());
for (const auto &lit : lits) {
if (lit.nocase) {
hashedPositionsNocase += lit.s.size() - max_len;
info.nocase.positions += lit.s.size();
} else {
hashedPositionsCase += lit.s.size() - max_len;
info.caseful.positions += lit.s.size();
}
}
info.caseful.hashEntries = hashedPositionsCase
? roundUpToPowerOfTwo(max(MIN_HASH_TABLE_SIZE, hashedPositionsCase))
: 0;
info.nocase.hashEntries = hashedPositionsNocase
? roundUpToPowerOfTwo(max(MIN_HASH_TABLE_SIZE, hashedPositionsNocase))
: 0;
DEBUG_PRINTF("caseful: boundary=%u, positions=%u, hashedPositions=%u, "
"hashEntries=%u\n",
info.caseful.boundary, info.caseful.positions,
hashedPositionsCase, info.caseful.hashEntries);
DEBUG_PRINTF("nocase: boundary=%u, positions=%u, hashedPositions=%u, "
"hashEntries=%u\n",
info.nocase.boundary, info.nocase.positions,
hashedPositionsNocase, info.nocase.hashEntries);
return info;
}
static
void fillHashes(const vector<ue2_case_string> &lits, size_t max_len,
RoseLongLitHashEntry *tab, size_t numEntries, bool nocase,
const map<u32, u32> &litToOffsetVal) {
const u32 nbits = lg2(numEntries);
map<u32, deque<pair<u32, u32>>> bucketToLitOffPairs;
map<u32, u64a> bucketToBitfield;
for (u32 lit_id = 0; lit_id < lits.size(); lit_id++) {
const ue2_case_string &lit = lits[lit_id];
if (nocase != lit.nocase) {
continue;
}
for (u32 offset = 1; offset < lit.s.size() - max_len + 1; offset++) {
const u8 *substr = (const u8 *)lit.s.c_str() + offset;
u32 h = hashLongLiteral(substr, max_len, lit.nocase);
u32 h_ent = h & ((1U << nbits) - 1);
u32 h_low = (h >> nbits) & 63;
bucketToLitOffPairs[h_ent].emplace_back(lit_id, offset);
bucketToBitfield[h_ent] |= (1ULL << h_low);
}
}
// this used to be a set<u32>, but a bitset is much much faster given that
// we're using it only for membership testing.
boost::dynamic_bitset<> filledBuckets(numEntries); // all zero by default.
// sweep out bitfield entries and save the results swapped accordingly
// also, anything with bitfield entries is put in filledBuckets
for (const auto &m : bucketToBitfield) {
const u32 &bucket = m.first;
const u64a &contents = m.second;
tab[bucket].bitfield = contents;
filledBuckets.set(bucket);
}
// store out all our chains based on free values in our hash table.
// find nearest free locations that are empty (there will always be more
// entries than strings, at present)
for (auto &m : bucketToLitOffPairs) {
u32 bucket = m.first;
deque<pair<u32, u32>> &d = m.second;
// sort d by distance of the residual string (len minus our depth into
// the string). We need to put the 'furthest back' string first...
stable_sort(d.begin(), d.end(),
[](const pair<u32, u32> &a, const pair<u32, u32> &b) {
if (a.second != b.second) {
return a.second > b.second; /* longest is first */
}
return a.first < b.first;
});
while (1) {
// first time through is always at bucket, then we fill in links
filledBuckets.set(bucket);
RoseLongLitHashEntry *ent = &tab[bucket];
u32 lit_id = d.front().first;
u32 offset = d.front().second;
ent->state = verify_u32(litToOffsetVal.at(lit_id) +
offset + max_len);
ent->link = (u32)LINK_INVALID;
d.pop_front();
if (d.empty()) {
break;
}
// now, if there is another value
// find a bucket for it and put in 'bucket' and repeat
// all we really need to do is find something not in filledBuckets,
// ideally something close to bucket
// we search backward and forward from bucket, trying to stay as
// close as possible.
UNUSED bool found = false;
int bucket_candidate = 0;
for (u32 k = 1; k < numEntries * 2; k++) {
bucket_candidate = bucket + (((k & 1) == 0)
? (-(int)k / 2) : (k / 2));
if (bucket_candidate < 0 ||
(size_t)bucket_candidate >= numEntries) {
continue;
}
if (!filledBuckets.test(bucket_candidate)) {
found = true;
break;
}
}
assert(found);
bucket = bucket_candidate;
ent->link = bucket;
}
}
}
u32 buildLongLiteralTable(const RoseBuildImpl &build, RoseEngineBlob &blob,
vector<ue2_case_string> &lits,
size_t longLitLengthThreshold,
size_t *historyRequired,
size_t *longLitStreamStateRequired) {
// Work in terms of history requirement (i.e. literal len - 1).
const size_t max_len = longLitLengthThreshold - 1;
// We should only be building the long literal hash table in streaming mode.
if (!build.cc.streaming) {
return 0;
}
if (lits.empty()) {
DEBUG_PRINTF("no long literals\n");
return 0;
}
// The last char of each literal is trimmed as we're not interested in full
// matches, only partial matches.
for (auto &lit : lits) {
assert(!lit.s.empty());
lit.s.pop_back();
}
// Sort by caseful/caseless and in lexicographical order.
stable_sort(begin(lits), end(lits), [](const ue2_case_string &a,
const ue2_case_string &b) {
if (a.nocase != b.nocase) {
return a.nocase < b.nocase;
}
return a.s < b.s;
});
// Find literals that are prefixes of other literals (including
// duplicates). Note that we iterate in reverse, since we want to retain
// only the longest string from a set of prefixes.
auto it = unique(lits.rbegin(), lits.rend(), [](const ue2_case_string &a,
const ue2_case_string &b) {
return a.nocase == b.nocase && a.s.size() >= b.s.size() &&
equal(b.s.begin(), b.s.end(), a.s.begin());
});
// Erase dupes found by unique().
lits.erase(lits.begin(), it.base());
LongLitInfo info = analyzeLongLits(lits, max_len);
// first assess the size and find our caseless threshold
size_t headerSize = ROUNDUP_16(sizeof(RoseLongLitTable));
size_t litTabOffset = headerSize;
size_t litTabNumEntries = lits.size() + 1;
size_t litTabSize = ROUNDUP_16(litTabNumEntries * sizeof(RoseLongLiteral));
size_t wholeLitTabOffset = litTabOffset + litTabSize;
size_t totalWholeLitTabSize =
ROUNDUP_16(info.caseful.positions + info.nocase.positions);
size_t htOffsetCase = wholeLitTabOffset + totalWholeLitTabSize;
size_t htSizeCase = info.caseful.hashEntries * sizeof(RoseLongLitHashEntry);
size_t htOffsetNocase = htOffsetCase + htSizeCase;
size_t htSizeNocase =
info.nocase.hashEntries * sizeof(RoseLongLitHashEntry);
size_t tabSize = ROUNDUP_16(htOffsetNocase + htSizeNocase);
// need to add +2 to both of these to allow space for the actual largest
// value as well as handling the fact that we add one to the space when
// storing out a position to allow zero to mean "no stream state value"
u8 streamBitsCase = lg2(roundUpToPowerOfTwo(info.caseful.positions + 2));
u8 streamBitsNocase = lg2(roundUpToPowerOfTwo(info.nocase.positions + 2));
u32 tot_state_bytes = ROUNDUP_N(streamBitsCase + streamBitsNocase, 8) / 8;
auto table = aligned_zmalloc_unique<char>(tabSize);
assert(table); // otherwise would have thrown std::bad_alloc
// then fill it in
char *ptr = table.get();
RoseLongLitTable *header = (RoseLongLitTable *)ptr;
// fill in header
header->maxLen = verify_u8(max_len); // u8 so doesn't matter; won't go > 255
header->boundaryCase = info.caseful.boundary;
header->hashOffsetCase = verify_u32(htOffsetCase);
header->hashNBitsCase = lg2(info.caseful.hashEntries);
header->streamStateBitsCase = streamBitsCase;
header->boundaryNocase = info.nocase.boundary;
header->hashOffsetNocase = verify_u32(htOffsetNocase);
header->hashNBitsNocase = lg2(info.nocase.hashEntries);
header->streamStateBitsNocase = streamBitsNocase;
assert(tot_state_bytes < sizeof(u64a));
header->streamStateBytes = verify_u8(tot_state_bytes); // u8
ptr += headerSize;
// now fill in the rest
RoseLongLiteral *litTabPtr = (RoseLongLiteral *)ptr;
ptr += litTabSize;
map<u32, u32> litToOffsetVal;
for (auto i = lits.begin(), e = lits.end(); i != e; ++i) {
u32 entry = verify_u32(i - lits.begin());
u32 offset = verify_u32(ptr - table.get());
// point the table entry to the string location
litTabPtr[entry].offset = offset;
litToOffsetVal[entry] = offset;
// copy the string into the string location
const auto &s = i->s;
memcpy(ptr, s.c_str(), s.size());
ptr += s.size(); // and the string location
}
// fill in final lit table entry with current ptr (serves as end value)
litTabPtr[lits.size()].offset = verify_u32(ptr - table.get());
// fill hash tables
ptr = table.get() + htOffsetCase;
fillHashes(lits, max_len, (RoseLongLitHashEntry *)ptr,
info.caseful.hashEntries, false, litToOffsetVal);
ptr += htSizeCase;
fillHashes(lits, max_len, (RoseLongLitHashEntry *)ptr,
info.nocase.hashEntries, true, litToOffsetVal);
ptr += htSizeNocase;
assert(ptr <= table.get() + tabSize);
DEBUG_PRINTF("built streaming table, size=%zu\n", tabSize);
DEBUG_PRINTF("requires %zu bytes of history\n", max_len);
DEBUG_PRINTF("requires %u bytes of stream state\n", tot_state_bytes);
*historyRequired = max(*historyRequired, max_len);
*longLitStreamStateRequired = tot_state_bytes;
return blob.add(table.get(), tabSize, 16);
}
} // namespace ue2