fdr: move long literal handling into Rose

Move the hash table used for long literal support in streaming mode from
FDR to Rose, and introduce new instructions CHECK_LONG_LIT and
CHECK_LONG_LIT_NOCASE for doing literal confirm for long literals.

This simplifies FDR confirm, and guarantees that HWLM matchers will only
be used for literals < 256 bytes long.
This commit is contained in:
Justin Viiret 2016-09-07 15:59:23 +10:00 committed by Matthew Barr
parent 6ed30194ce
commit 68bf473e2e
40 changed files with 1759 additions and 1310 deletions

View File

@ -425,7 +425,6 @@ set (hs_exec_SRCS
src/fdr/fdr_internal.h
src/fdr/fdr_confirm.h
src/fdr/fdr_confirm_runtime.h
src/fdr/fdr_streaming_runtime.h
src/fdr/flood_runtime.h
src/fdr/fdr_loadval.h
src/fdr/teddy.c
@ -531,6 +530,8 @@ set (hs_exec_SRCS
src/rose/init.h
src/rose/init.c
src/rose/stream.c
src/rose/stream_long_lit.h
src/rose/stream_long_lit_hash.h
src/rose/match.h
src/rose/match.c
src/rose/miracle.h
@ -612,8 +613,6 @@ SET (hs_SRCS
src/fdr/fdr_engine_description.cpp
src/fdr/fdr_engine_description.h
src/fdr/fdr_internal.h
src/fdr/fdr_streaming_compile.cpp
src/fdr/fdr_streaming_internal.h
src/fdr/flood_compile.cpp
src/fdr/teddy_compile.cpp
src/fdr/teddy_compile.h
@ -874,6 +873,8 @@ SET (hs_SRCS
src/rose/rose_build_impl.h
src/rose/rose_build_infix.cpp
src/rose/rose_build_infix.h
src/rose/rose_build_long_lit.cpp
src/rose/rose_build_long_lit.h
src/rose/rose_build_lookaround.cpp
src/rose/rose_build_lookaround.h
src/rose/rose_build_matchers.cpp

View File

@ -31,7 +31,6 @@
#include "fdr_confirm_runtime.h"
#include "fdr_internal.h"
#include "fdr_loadval.h"
#include "fdr_streaming_runtime.h"
#include "flood_runtime.h"
#include "teddy.h"
#include "teddy_internal.h"
@ -809,8 +808,6 @@ hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len,
len,
hbuf,
0,
hbuf, // nocase
0,
start,
cb,
ctxt,
@ -828,14 +825,12 @@ hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len,
hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf,
size_t hlen, const u8 *buf, size_t len,
size_t start, HWLMCallback cb, void *ctxt,
hwlm_group_t groups, u8 *stream_state) {
hwlm_group_t groups) {
struct FDR_Runtime_Args a = {
buf,
len,
hbuf,
hlen,
hbuf, // nocase - start same as caseful, override later if needed
hlen, // nocase
start,
cb,
ctxt,
@ -844,7 +839,6 @@ hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf,
* the history buffer (they may be garbage). */
hbuf ? unaligned_load_u64a(hbuf + hlen - sizeof(u64a)) : (u64a)0
};
fdrUnpackState(fdr, &a, stream_state);
hwlm_error_t ret;
if (unlikely(a.start_offset >= a.len)) {
@ -854,6 +848,5 @@ hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf,
ret = funcs[fdr->engineID](fdr, &a, groups);
}
fdrPackState(fdr, &a, stream_state);
return ret;
}

View File

@ -43,10 +43,6 @@ extern "C" {
struct FDR;
/** \brief Returns non-zero if the contents of the stream state indicate that
* there is active FDR history beyond the regularly used history. */
u32 fdrStreamStateActive(const struct FDR *fdr, const u8 *stream_state);
/**
* \brief Block-mode scan.
*
@ -74,12 +70,11 @@ hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len,
* \param cb Callback to call when a match is found.
* \param ctxt Caller-provided context pointer supplied to callback on match.
* \param groups Initial groups mask.
* \param stream_state Persistent stream state for use by FDR.
*/
hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf,
size_t hlen, const u8 *buf, size_t len,
size_t start, HWLMCallback cb, void *ctxt,
hwlm_group_t groups, u8 *stream_state);
hwlm_group_t groups);
#ifdef __cplusplus
}

View File

@ -39,6 +39,7 @@
#include "teddy_engine_description.h"
#include "grey.h"
#include "ue2common.h"
#include "hwlm/hwlm_build.h"
#include "util/alloc.h"
#include "util/compare.h"
#include "util/dump_mask.h"
@ -495,14 +496,34 @@ FDRCompiler::build(pair<aligned_unique_ptr<u8>, size_t> &link) {
} // namespace
static
size_t maxMaskLen(const vector<hwlmLiteral> &lits) {
size_t rv = 0;
for (const auto &lit : lits) {
rv = max(rv, lit.msk.size());
}
return rv;
}
static
void setHistoryRequired(hwlmStreamingControl &stream_ctl,
const vector<hwlmLiteral> &lits) {
size_t max_mask_len = maxMaskLen(lits);
// we want enough history to manage the longest literal and the longest
// mask.
stream_ctl.literal_history_required = max(maxLen(lits), max_mask_len) - 1;
}
static
aligned_unique_ptr<FDR>
fdrBuildTableInternal(const vector<hwlmLiteral> &lits, bool make_small,
const target_t &target, const Grey &grey, u32 hint,
hwlmStreamingControl *stream_control) {
pair<aligned_unique_ptr<u8>, size_t> link(nullptr, 0);
if (stream_control) {
link = fdrBuildTableStreaming(lits, *stream_control);
setHistoryRequired(*stream_control, lits);
}
DEBUG_PRINTF("cpu has %s\n", target.has_avx2() ? "avx2" : "no-avx2");

View File

@ -339,7 +339,7 @@ getFDRConfirm(const vector<hwlmLiteral> &lits, bool applyOneCharOpt,
ptr = ROUNDUP_PTR(ptr, alignof(LitInfo));
if (next(i) == e) {
finalLI.next = 0x0;
finalLI.next = 0;
} else {
// our next field represents an adjustment on top of
// current address + the actual size of the literal

View File

@ -74,10 +74,8 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a
if (loc < buf) {
u32 full_overhang = buf - loc;
const u8 *history = caseless ? a->buf_history_nocase
: a->buf_history;
size_t len_history = caseless ? a->len_history_nocase
: a->len_history;
const u8 *history = a->buf_history;
size_t len_history = a->len_history;
// can't do a vectored confirm either if we don't have
// the bytes
@ -123,8 +121,7 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a
const u8 *loc2 = buf + i - li->extended_size + 1 - pullBackAmount;
if (loc2 < buf) {
u32 full_overhang = buf - loc2;
size_t len_history = caseless ? a->len_history_nocase
: a->len_history;
size_t len_history = a->len_history;
if (full_overhang > len_history) {
goto out;
}

View File

@ -100,8 +100,6 @@ struct FDR_Runtime_Args {
size_t len;
const u8 *buf_history;
size_t len_history;
const u8 *buf_history_nocase;
size_t len_history_nocase;
size_t start_offset;
HWLMCallback cb;
void *ctxt;

View File

@ -1,425 +0,0 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "fdr_internal.h"
#include "fdr_streaming_internal.h"
#include "fdr_compile_internal.h"
#include "hwlm/hwlm_build.h"
#include "util/alloc.h"
#include "util/bitutils.h"
#include "util/target_info.h"
#include "util/verify_types.h"
#include <algorithm>
#include <cassert>
#include <cstdio>
#include <cstring>
#include <deque>
#include <set>
#include <boost/dynamic_bitset.hpp>
using namespace std;
using boost::dynamic_bitset;
namespace ue2 {
namespace {
struct LongLitOrder {
bool operator()(const hwlmLiteral &i1, const hwlmLiteral &i2) const {
if (i1.nocase != i2.nocase) {
return i1.nocase < i2.nocase;
} else {
return i1.s < i2.s;
}
}
};
}
static
bool hwlmLitEqual(const hwlmLiteral &l1, const hwlmLiteral &l2) {
return l1.s == l2.s && l1.nocase == l2.nocase;
}
static
u32 roundUpToPowerOfTwo(u32 x) {
x -= 1;
x |= (x >> 1);
x |= (x >> 2);
x |= (x >> 4);
x |= (x >> 8);
x |= (x >> 16);
return x + 1;
}
/**
* \brief Creates a long literals vector containing all literals of length > max_len.
*
* The last char of each literal is trimmed as we're not interested in full
* matches, only partial matches.
*
* Literals are sorted (by caseful/caseless, then lexicographical order) and
* made unique.
*
* The ID of each literal is set to its position in the vector.
*
* \return False if there aren't any long literals.
*/
static
bool setupLongLits(const vector<hwlmLiteral> &lits,
vector<hwlmLiteral> &long_lits, size_t max_len) {
long_lits.reserve(lits.size());
for (const auto &lit : lits) {
if (lit.s.length() > max_len) {
hwlmLiteral tmp = lit; // copy
tmp.s.pop_back();
tmp.id = 0; // recalc later
tmp.groups = 0; // filled in later by hash bucket(s)
long_lits.push_back(move(tmp));
}
}
if (long_lits.empty()) {
return false;
}
// sort long_literals by caseful/caseless and in lexicographical order,
// remove duplicates
stable_sort(long_lits.begin(), long_lits.end(), LongLitOrder());
auto new_end = unique(long_lits.begin(), long_lits.end(), hwlmLitEqual);
long_lits.erase(new_end, long_lits.end());
// fill in ids; not currently used
for (auto i = long_lits.begin(), e = long_lits.end(); i != e; ++i) {
i->id = distance(long_lits.begin(), i);
}
return true;
}
// boundaries are the 'start' boundaries for each 'mode'
// so boundary[CASEFUL] is the index one above the largest caseful index
// positions[CASEFUL] is the # of positions in caseful strings (stream)
// hashedPositions[CASEFUL] is the # of positions in caseful strings
// (not returned - a temporary)
// hashEntries[CASEFUL] is the # of positions hashed for caseful strings
// (rounded up to the nearest power of two)
static
void analyzeLits(const vector<hwlmLiteral> &long_lits, size_t max_len,
u32 *boundaries, u32 *positions, u32 *hashEntries) {
u32 hashedPositions[MAX_MODES];
for (u32 m = CASEFUL; m < MAX_MODES; ++m) {
boundaries[m] = verify_u32(long_lits.size());
positions[m] = 0;
hashedPositions[m] = 0;
}
for (auto i = long_lits.begin(), e = long_lits.end(); i != e; ++i) {
if (i->nocase) {
boundaries[CASEFUL] = verify_u32(distance(long_lits.begin(), i));
break;
}
}
for (const auto &lit : long_lits) {
Modes m = lit.nocase ? CASELESS : CASEFUL;
for (u32 j = 1; j < lit.s.size() - max_len + 1; j++) {
hashedPositions[m]++;
}
positions[m] += lit.s.size();
}
for (u32 m = CASEFUL; m < MAX_MODES; m++) {
hashEntries[m] = hashedPositions[m]
? roundUpToPowerOfTwo(MAX(4096, hashedPositions[m]))
: 0;
}
#ifdef DEBUG_COMPILE
printf("analyzeLits:\n");
for (Modes m = CASEFUL; m < MAX_MODES; m++) {
printf("mode %s boundary %d positions %d hashedPositions %d "
"hashEntries %d\n",
(m == CASEFUL) ? "caseful" : "caseless", boundaries[m],
positions[m], hashedPositions[m], hashEntries[m]);
}
printf("\n");
#endif
}
static
u32 hashLit(const hwlmLiteral &l, u32 offset, size_t max_len, Modes m) {
return streaming_hash((const u8 *)l.s.c_str() + offset, max_len, m);
}
// sort by 'distance from start'
namespace {
struct OffsetIDFromEndOrder {
const vector<hwlmLiteral> &lits; // not currently used
explicit OffsetIDFromEndOrder(const vector<hwlmLiteral> &lits_in)
: lits(lits_in) {}
bool operator()(const pair<u32, u32> &i1, const pair<u32, u32> &i2) const {
if (i1.second != i2.second) {
// longest is 'first', so > not <
return i1.second > i2.second;
}
return i1.first < i2.first;
}
};
}
static
void fillHashes(const vector<hwlmLiteral> &long_lits, size_t max_len,
FDRSHashEntry *tab, size_t numEntries, Modes mode,
map<u32, u32> &litToOffsetVal) {
const u32 nbits = lg2(numEntries);
map<u32, deque<pair<u32, u32> > > bucketToLitOffPairs;
map<u32, u64a> bucketToBitfield;
for (const auto &lit : long_lits) {
if ((mode == CASELESS) != lit.nocase) {
continue;
}
for (u32 j = 1; j < lit.s.size() - max_len + 1; j++) {
u32 h = hashLit(lit, j, max_len, mode);
u32 h_ent = h & ((1U << nbits) - 1);
u32 h_low = (h >> nbits) & 63;
bucketToLitOffPairs[h_ent].emplace_back(lit.id, j);
bucketToBitfield[h_ent] |= (1ULL << h_low);
}
}
// this used to be a set<u32>, but a bitset is much much faster given that
// we're using it only for membership testing.
dynamic_bitset<> filledBuckets(numEntries); // all bits zero by default.
// sweep out bitfield entries and save the results swapped accordingly
// also, anything with bitfield entries is put in filledBuckets
for (const auto &m : bucketToBitfield) {
const u32 &bucket = m.first;
const u64a &contents = m.second;
tab[bucket].bitfield = contents;
filledBuckets.set(bucket);
}
// store out all our chains based on free values in our hash table.
// find nearest free locations that are empty (there will always be more
// entries than strings, at present)
for (auto &m : bucketToLitOffPairs) {
u32 bucket = m.first;
deque<pair<u32, u32>> &d = m.second;
// sort d by distance of the residual string (len minus our depth into
// the string). We need to put the 'furthest back' string first...
stable_sort(d.begin(), d.end(), OffsetIDFromEndOrder(long_lits));
while (1) {
// first time through is always at bucket, then we fill in links
filledBuckets.set(bucket);
FDRSHashEntry *ent = &tab[bucket];
u32 lit_id = d.front().first;
u32 offset = d.front().second;
ent->state = verify_u32(litToOffsetVal[lit_id] + offset + max_len);
ent->link = (u32)LINK_INVALID;
d.pop_front();
if (d.empty()) {
break;
}
// now, if there is another value
// find a bucket for it and put in 'bucket' and repeat
// all we really need to do is find something not in filledBuckets,
// ideally something close to bucket
// we search backward and forward from bucket, trying to stay as
// close as possible.
UNUSED bool found = false;
int bucket_candidate = 0;
for (u32 k = 1; k < numEntries * 2; k++) {
bucket_candidate = bucket + (((k & 1) == 0)
? (-(int)k / 2) : (k / 2));
if (bucket_candidate < 0 ||
(size_t)bucket_candidate >= numEntries) {
continue;
}
if (!filledBuckets.test(bucket_candidate)) {
found = true;
break;
}
}
assert(found);
bucket = bucket_candidate;
ent->link = bucket;
}
}
}
static
size_t maxMaskLen(const vector<hwlmLiteral> &lits) {
size_t rv = 0;
for (const auto &lit : lits) {
rv = max(rv, lit.msk.size());
}
return rv;
}
pair<aligned_unique_ptr<u8>, size_t>
fdrBuildTableStreaming(const vector<hwlmLiteral> &lits,
hwlmStreamingControl &stream_control) {
// refuse to compile if we are forced to have smaller than minimum
// history required for long-literal support, full stop
// otherwise, choose the maximum of the preferred history quantity
// (currently a fairly extravagant 32) or the already used history
// quantity - subject to the limitation of stream_control.history_max
const size_t MIN_HISTORY_REQUIRED = 32;
if (MIN_HISTORY_REQUIRED > stream_control.history_max) {
throw std::logic_error("Cannot set history to minimum history required");
}
size_t max_len =
MIN(stream_control.history_max,
MAX(MIN_HISTORY_REQUIRED, stream_control.history_min));
assert(max_len >= MIN_HISTORY_REQUIRED);
size_t max_mask_len = maxMaskLen(lits);
vector<hwlmLiteral> long_lits;
if (!setupLongLits(lits, long_lits, max_len) || false) {
// "Don't need to do anything" path, not really a fail
DEBUG_PRINTF("Streaming literal path produces no table\n");
// we want enough history to manage the longest literal and the longest
// mask.
stream_control.literal_history_required =
max(maxLen(lits), max_mask_len) - 1;
stream_control.literal_stream_state_required = 0;
return {nullptr, size_t{0}};
}
// Ensure that we have enough room for the longest mask.
if (max_mask_len) {
max_len = max(max_len, max_mask_len - 1);
}
u32 boundary[MAX_MODES];
u32 positions[MAX_MODES];
u32 hashEntries[MAX_MODES];
analyzeLits(long_lits, max_len, boundary, positions, hashEntries);
// first assess the size and find our caseless threshold
size_t headerSize = ROUNDUP_16(sizeof(FDRSTableHeader));
size_t litTabOffset = headerSize;
size_t litTabNumEntries = long_lits.size() + 1;
size_t litTabSize = ROUNDUP_16(litTabNumEntries * sizeof(FDRSLiteral));
size_t wholeLitTabOffset = litTabOffset + litTabSize;
size_t totalWholeLitTabSize = ROUNDUP_16(positions[CASEFUL] +
positions[CASELESS]);
size_t htOffset[MAX_MODES];
size_t htSize[MAX_MODES];
htOffset[CASEFUL] = wholeLitTabOffset + totalWholeLitTabSize;
htSize[CASEFUL] = hashEntries[CASEFUL] * sizeof(FDRSHashEntry);
htOffset[CASELESS] = htOffset[CASEFUL] + htSize[CASEFUL];
htSize[CASELESS] = hashEntries[CASELESS] * sizeof(FDRSHashEntry);
size_t tabSize = ROUNDUP_16(htOffset[CASELESS] + htSize[CASELESS]);
// need to add +2 to both of these to allow space for the actual largest
// value as well as handling the fact that we add one to the space when
// storing out a position to allow zero to mean "no stream state value"
u8 streamBits[MAX_MODES];
streamBits[CASEFUL] = lg2(roundUpToPowerOfTwo(positions[CASEFUL] + 2));
streamBits[CASELESS] = lg2(roundUpToPowerOfTwo(positions[CASELESS] + 2));
u32 tot_state_bytes = (streamBits[CASEFUL] + streamBits[CASELESS] + 7) / 8;
auto secondaryTable = aligned_zmalloc_unique<u8>(tabSize);
assert(secondaryTable); // otherwise would have thrown std::bad_alloc
// then fill it in
u8 * ptr = secondaryTable.get();
FDRSTableHeader * header = (FDRSTableHeader *)ptr;
// fill in header
header->pseudoEngineID = (u32)0xffffffff;
header->N = verify_u8(max_len); // u8 so doesn't matter; won't go > 255
for (u32 m = CASEFUL; m < MAX_MODES; ++m) {
header->boundary[m] = boundary[m];
header->hashOffset[m] = verify_u32(htOffset[m]);
header->hashNBits[m] = lg2(hashEntries[m]);
header->streamStateBits[m] = streamBits[m];
}
assert(tot_state_bytes < sizeof(u64a));
header->streamStateBytes = verify_u8(tot_state_bytes); // u8
ptr += headerSize;
// now fill in the rest
FDRSLiteral * litTabPtr = (FDRSLiteral *)ptr;
ptr += litTabSize;
map<u32, u32> litToOffsetVal;
for (auto i = long_lits.begin(), e = long_lits.end(); i != e; ++i) {
u32 entry = verify_u32(i - long_lits.begin());
u32 offset = verify_u32(ptr - secondaryTable.get());
// point the table entry to the string location
litTabPtr[entry].offset = offset;
litToOffsetVal[entry] = offset;
// copy the string into the string location
memcpy(ptr, i->s.c_str(), i->s.size());
ptr += i->s.size(); // and the string location
}
// fill in final lit table entry with current ptr (serves as end value)
litTabPtr[long_lits.size()].offset = verify_u32(ptr - secondaryTable.get());
// fill hash tables
ptr = secondaryTable.get() + htOffset[CASEFUL];
for (u32 m = CASEFUL; m < MAX_MODES; ++m) {
fillHashes(long_lits, max_len, (FDRSHashEntry *)ptr, hashEntries[m],
(Modes)m, litToOffsetVal);
ptr += htSize[m];
}
// tell the world what we did
stream_control.literal_history_required = max_len;
stream_control.literal_stream_state_required = tot_state_bytes;
return {move(secondaryTable), tabSize};
}
} // namespace ue2

View File

@ -1,152 +0,0 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef FDR_STREAMING_INTERNAL_H
#define FDR_STREAMING_INTERNAL_H
#include "ue2common.h"
#include "fdr_internal.h"
#include "util/unaligned.h"
// tertiary table:
// a header (FDRSTableHeader)
// long_lits.size()+1 entries holding an offset to the string in the
// 'whole literal table' (FDRSLiteral structure)
// the whole literal table - every string packed in (freeform)
// hash table (caseful) (FDRSHashEntry)
// hash table (caseless) (FDRSHashEntry)
enum Modes {
CASEFUL = 0,
CASELESS = 1,
MAX_MODES = 2
};
// We have one of these structures hanging off the 'link' of our secondary
// FDR table that handles streaming strings
struct FDRSTableHeader {
u32 pseudoEngineID; // set to 0xffffffff to indicate this isn't an FDR
// string id one beyond the maximum entry for this type of literal
// boundary[CASEFUL] is the end of the caseful literals
// boundary[CASELESS] is the end of the caseless literals and one beyond
// the largest literal id (the size of the littab)
u32 boundary[MAX_MODES];
// offsets are 0 if no such table exists
// offset from the base of the tertiary structure to the hash table
u32 hashOffset[MAX_MODES];
u32 hashNBits[MAX_MODES]; // lg2 of the size of the hash table
u8 streamStateBits[MAX_MODES];
u8 streamStateBytes; // total size of packed stream state in bytes
u8 N; // prefix lengths
u16 pad;
};
// One of these structures per literal entry in our secondary FDR table.
struct FDRSLiteral {
u32 offset;
// potentially - another u32 to point to the 'next lesser included literal'
// which would be a literal that overlaps this one in such a way that a
// failure to match _this_ literal can leave us in a state that we might
// still match that literal. Offset information might also be called for,
// in which case we might be wanting to use a FDRSLiteralOffset
};
typedef u32 FDRSLiteralOffset;
#define LINK_INVALID 0xffffffff
// One of these structures per hash table entry in our secondary FDR table
struct FDRSHashEntry {
u64a bitfield;
FDRSLiteralOffset state;
u32 link;
};
static really_inline
u32 get_start_lit_idx(const struct FDRSTableHeader * h, enum Modes m) {
return m == CASEFUL ? 0 : h->boundary[m-1];
}
static really_inline
u32 get_end_lit_idx(const struct FDRSTableHeader * h, enum Modes m) {
return h->boundary[m];
}
static really_inline
const struct FDRSLiteral * getLitTab(const struct FDRSTableHeader * h) {
return (const struct FDRSLiteral *) (((const u8 *)h) +
ROUNDUP_16(sizeof(struct FDRSTableHeader)));
}
static really_inline
u32 getBaseOffsetOfLits(const struct FDRSTableHeader * h, enum Modes m) {
return getLitTab(h)[get_start_lit_idx(h, m)].offset;
}
static really_inline
u32 packStateVal(const struct FDRSTableHeader * h, enum Modes m, u32 v) {
return v - getBaseOffsetOfLits(h, m) + 1;
}
static really_inline
u32 unpackStateVal(const struct FDRSTableHeader * h, enum Modes m, u32 v) {
return v + getBaseOffsetOfLits(h, m) - 1;
}
static really_inline
u32 has_bit(const struct FDRSHashEntry * ent, u32 bit) {
return (ent->bitfield >> bit) & 0x1;
}
static really_inline
u32 streaming_hash(const u8 *ptr, UNUSED size_t len, enum Modes mode) {
const u64a CASEMASK = 0xdfdfdfdfdfdfdfdfULL;
const u64a MULTIPLIER = 0x0b4e0ef37bc32127ULL;
assert(len >= 32);
u64a v1 = unaligned_load_u64a(ptr);
u64a v2 = unaligned_load_u64a(ptr + 8);
u64a v3 = unaligned_load_u64a(ptr + 16);
if (mode == CASELESS) {
v1 &= CASEMASK;
v2 &= CASEMASK;
v3 &= CASEMASK;
}
v1 *= MULTIPLIER;
v2 *= (MULTIPLIER*MULTIPLIER);
v3 *= (MULTIPLIER*MULTIPLIER*MULTIPLIER);
v1 >>= 32;
v2 >>= 32;
v3 >>= 32;
return v1 ^ v2 ^ v3;
}
#endif

View File

@ -1,368 +0,0 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef FDR_STREAMING_RUNTIME_H
#define FDR_STREAMING_RUNTIME_H
#include "fdr_streaming_internal.h"
#include "util/partial_store.h"
#include <string.h>
static really_inline
const struct FDRSTableHeader * getSHDR(const struct FDR * fdr) {
const u8 * linkPtr = ((const u8 *)fdr) + fdr->link;
// test if it's not really a engineID, but a 'pseudo engine id'
assert(*(const u32 *)linkPtr == 0xffffffff);
assert(linkPtr);
return (const struct FDRSTableHeader *)linkPtr;
}
// Reads from stream state and unpacks values into stream state table.
static really_inline
void getStreamStates(const struct FDRSTableHeader * streamingTable,
const u8 * stream_state, u32 * table) {
assert(streamingTable);
assert(stream_state);
assert(table);
u8 ss_bytes = streamingTable->streamStateBytes;
u8 ssb = streamingTable->streamStateBits[CASEFUL];
UNUSED u8 ssb_nc = streamingTable->streamStateBits[CASELESS];
assert(ss_bytes == (ssb + ssb_nc + 7) / 8);
#if defined(ARCH_32_BIT)
// On 32-bit hosts, we may be able to avoid having to do any u64a
// manipulation at all.
if (ss_bytes <= 4) {
u32 ssb_mask = (1U << ssb) - 1;
u32 streamVal = partial_load_u32(stream_state, ss_bytes);
table[CASEFUL] = (u32)(streamVal & ssb_mask);
table[CASELESS] = (u32)(streamVal >> ssb);
return;
}
#endif
u64a ssb_mask = (1ULL << ssb) - 1;
u64a streamVal = partial_load_u64a(stream_state, ss_bytes);
table[CASEFUL] = (u32)(streamVal & ssb_mask);
table[CASELESS] = (u32)(streamVal >> (u64a)ssb);
}
#ifndef NDEBUG
// Defensive checking (used in assert) that these table values don't overflow
// outside the range available.
static really_inline UNUSED
u32 streamingTableOverflow(u32 * table, u8 ssb, u8 ssb_nc) {
u32 ssb_mask = (1ULL << (ssb)) - 1;
if (table[CASEFUL] & ~ssb_mask) {
return 1;
}
u32 ssb_nc_mask = (1ULL << (ssb_nc)) - 1;
if (table[CASELESS] & ~ssb_nc_mask) {
return 1;
}
return 0;
}
#endif
// Reads from stream state table and packs values into stream state.
static really_inline
void setStreamStates(const struct FDRSTableHeader * streamingTable,
u8 * stream_state, u32 * table) {
assert(streamingTable);
assert(stream_state);
assert(table);
u8 ss_bytes = streamingTable->streamStateBytes;
u8 ssb = streamingTable->streamStateBits[CASEFUL];
UNUSED u8 ssb_nc = streamingTable->streamStateBits[CASELESS];
assert(ss_bytes == (ssb + ssb_nc + 7) / 8);
assert(!streamingTableOverflow(table, ssb, ssb_nc));
#if defined(ARCH_32_BIT)
// On 32-bit hosts, we may be able to avoid having to do any u64a
// manipulation at all.
if (ss_bytes <= 4) {
u32 stagingStreamState = table[CASEFUL];
stagingStreamState |= (table[CASELESS] << ssb);
partial_store_u32(stream_state, stagingStreamState, ss_bytes);
return;
}
#endif
u64a stagingStreamState = (u64a)table[CASEFUL];
stagingStreamState |= (u64a)table[CASELESS] << ((u64a)ssb);
partial_store_u64a(stream_state, stagingStreamState, ss_bytes);
}
u32 fdrStreamStateActive(const struct FDR * fdr, const u8 * stream_state) {
if (!stream_state) {
return 0;
}
const struct FDRSTableHeader * streamingTable = getSHDR(fdr);
u8 ss_bytes = streamingTable->streamStateBytes;
// We just care if there are any bits set, and the test below is faster
// than a partial_load_u64a (especially on 32-bit hosts).
for (u32 i = 0; i < ss_bytes; i++) {
if (*stream_state) {
return 1;
}
++stream_state;
}
return 0;
}
// binary search for the literal index that contains the current state
static really_inline
u32 findLitTabEntry(const struct FDRSTableHeader * streamingTable,
u32 stateValue, enum Modes m) {
const struct FDRSLiteral * litTab = getLitTab(streamingTable);
u32 lo = get_start_lit_idx(streamingTable, m);
u32 hi = get_end_lit_idx(streamingTable, m);
// Now move stateValue back by one so that we're looking for the
// litTab entry that includes it the string, not the one 'one past' it
stateValue -= 1;
assert(lo != hi);
assert(litTab[lo].offset <= stateValue);
assert(litTab[hi].offset > stateValue);
// binary search to find the entry e such that:
// litTab[e].offsetToLiteral <= stateValue < litTab[e+1].offsetToLiteral
while (lo + 1 < hi) {
u32 mid = (lo + hi) / 2;
if (litTab[mid].offset <= stateValue) {
lo = mid;
} else { //(litTab[mid].offset > stateValue) {
hi = mid;
}
}
assert(litTab[lo].offset <= stateValue);
assert(litTab[hi].offset > stateValue);
return lo;
}
static really_inline
void fdrUnpackStateMode(struct FDR_Runtime_Args *a,
const struct FDRSTableHeader *streamingTable,
const struct FDRSLiteral * litTab,
const u32 *state_table,
const enum Modes m) {
if (!state_table[m]) {
return;
}
u32 stateValue = unpackStateVal(streamingTable, m, state_table[m]);
u32 idx = findLitTabEntry(streamingTable, stateValue, m);
size_t found_offset = litTab[idx].offset;
const u8 * found_buf = found_offset + (const u8 *)streamingTable;
size_t found_sz = stateValue - found_offset;
if (m == CASEFUL) {
a->buf_history = found_buf;
a->len_history = found_sz;
} else {
a->buf_history_nocase = found_buf;
a->len_history_nocase = found_sz;
}
}
static really_inline
void fdrUnpackState(const struct FDR * fdr, struct FDR_Runtime_Args * a,
const u8 * stream_state) {
// nothing to do if there's no stream state for the case
if (!stream_state) {
return;
}
const struct FDRSTableHeader * streamingTable = getSHDR(fdr);
const struct FDRSLiteral * litTab = getLitTab(streamingTable);
u32 state_table[MAX_MODES];
getStreamStates(streamingTable, stream_state, state_table);
fdrUnpackStateMode(a, streamingTable, litTab, state_table, CASEFUL);
fdrUnpackStateMode(a, streamingTable, litTab, state_table, CASELESS);
}
static really_inline
u32 do_single_confirm(const struct FDRSTableHeader *streamingTable,
const struct FDR_Runtime_Args *a, u32 hashState,
enum Modes m) {
const struct FDRSLiteral * litTab = getLitTab(streamingTable);
u32 idx = findLitTabEntry(streamingTable, hashState, m);
size_t found_offset = litTab[idx].offset;
const u8 * s1 = found_offset + (const u8 *)streamingTable;
assert(hashState > found_offset);
size_t l1 = hashState - found_offset;
const u8 * buf = a->buf;
size_t len = a->len;
const char nocase = m != CASEFUL;
if (l1 > len) {
const u8 * hist = nocase ? a->buf_history_nocase : a->buf_history;
size_t hist_len = nocase ? a->len_history_nocase : a->len_history;
if (l1 > len+hist_len) {
return 0; // Break out - not enough total history
}
size_t overhang = l1 - len;
assert(overhang <= hist_len);
if (cmpForward(hist + hist_len - overhang, s1, overhang, nocase)) {
return 0;
}
s1 += overhang;
l1 -= overhang;
}
// if we got here, we don't need history or we compared ok out of history
assert(l1 <= len);
if (cmpForward(buf + len - l1, s1, l1, nocase)) {
return 0;
}
return hashState; // our new state
}
static really_inline
void fdrFindStreamingHash(const struct FDR_Runtime_Args *a,
const struct FDRSTableHeader *streamingTable,
u8 hash_len, u32 *hashes) {
u8 tempbuf[128];
const u8 *base;
if (hash_len > a->len) {
assert(hash_len <= 128);
size_t overhang = hash_len - a->len;
assert(overhang <= a->len_history);
memcpy(tempbuf, a->buf_history + a->len_history - overhang, overhang);
memcpy(tempbuf + overhang, a->buf, a->len);
base = tempbuf;
} else {
assert(hash_len <= a->len);
base = a->buf + a->len - hash_len;
}
if (streamingTable->hashNBits[CASEFUL]) {
hashes[CASEFUL] = streaming_hash(base, hash_len, CASEFUL);
}
if (streamingTable->hashNBits[CASELESS]) {
hashes[CASELESS] = streaming_hash(base, hash_len, CASELESS);
}
}
static really_inline
const struct FDRSHashEntry *getEnt(const struct FDRSTableHeader *streamingTable,
u32 h, const enum Modes m) {
u32 nbits = streamingTable->hashNBits[m];
if (!nbits) {
return NULL;
}
u32 h_ent = h & ((1 << nbits) - 1);
u32 h_low = (h >> nbits) & 63;
const struct FDRSHashEntry *tab =
(const struct FDRSHashEntry *)((const u8 *)streamingTable
+ streamingTable->hashOffset[m]);
const struct FDRSHashEntry *ent = tab + h_ent;
if (!has_bit(ent, h_low)) {
return NULL;
}
return ent;
}
static really_inline
void fdrPackStateMode(u32 *state_table, const struct FDR_Runtime_Args *a,
const struct FDRSTableHeader *streamingTable,
const struct FDRSHashEntry *ent, const enum Modes m) {
assert(ent);
assert(streamingTable->hashNBits[m]);
const struct FDRSHashEntry *tab =
(const struct FDRSHashEntry *)((const u8 *)streamingTable
+ streamingTable->hashOffset[m]);
while (1) {
u32 tmp = 0;
if ((tmp = do_single_confirm(streamingTable, a, ent->state, m))) {
state_table[m] = packStateVal(streamingTable, m, tmp);
break;
}
if (ent->link == LINK_INVALID) {
break;
}
ent = tab + ent->link;
}
}
static really_inline
void fdrPackState(const struct FDR *fdr, const struct FDR_Runtime_Args *a,
u8 *stream_state) {
// nothing to do if there's no stream state for the case
if (!stream_state) {
return;
}
// get pointers to the streamer FDR and the tertiary structure
const struct FDRSTableHeader *streamingTable = getSHDR(fdr);
assert(streamingTable->N);
u32 state_table[MAX_MODES] = {0, 0};
// if we don't have enough history, we don't need to do anything
if (streamingTable->N <= a->len + a->len_history) {
u32 hashes[MAX_MODES] = {0, 0};
fdrFindStreamingHash(a, streamingTable, streamingTable->N, hashes);
const struct FDRSHashEntry *ent_ful = getEnt(streamingTable,
hashes[CASEFUL], CASEFUL);
const struct FDRSHashEntry *ent_less = getEnt(streamingTable,
hashes[CASELESS], CASELESS);
if (ent_ful) {
fdrPackStateMode(state_table, a, streamingTable, ent_ful,
CASEFUL);
}
if (ent_less) {
fdrPackStateMode(state_table, a, streamingTable, ent_less,
CASELESS);
}
}
setStreamStates(streamingTable, stream_state, state_table);
}
#endif

View File

@ -200,8 +200,7 @@ hwlm_error_t hwlmExec(const struct HWLM *t, const u8 *buf, size_t len,
hwlm_error_t hwlmExecStreaming(const struct HWLM *t, struct hs_scratch *scratch,
size_t len, size_t start, HWLMCallback cb,
void *ctxt, hwlm_group_t groups,
u8 *stream_state) {
void *ctxt, hwlm_group_t groups) {
const u8 *hbuf = scratch->core_info.hbuf;
const size_t hlen = scratch->core_info.hlen;
const u8 *buf = scratch->core_info.buf;
@ -234,13 +233,10 @@ hwlm_error_t hwlmExecStreaming(const struct HWLM *t, struct hs_scratch *scratch,
DEBUG_PRINTF("using hq accel %hhu\n", t->accel1.accel_type);
aa = &t->accel1;
}
// if no active stream state, use acceleration
if (!fdrStreamStateActive(HWLM_C_DATA(t), stream_state)) {
do_accel_streaming(aa, hbuf, hlen, buf, len, &start);
}
do_accel_streaming(aa, hbuf, hlen, buf, len, &start);
DEBUG_PRINTF("calling frankie (groups=%08llx, start=%zu)\n", groups,
start);
return fdrExecStreaming(HWLM_C_DATA(t), hbuf, hlen, buf, len,
start, cb, ctxt, groups, stream_state);
start, cb, ctxt, groups);
}
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -132,8 +132,7 @@ hwlm_error_t hwlmExec(const struct HWLM *tab, const u8 *buf, size_t len,
hwlm_error_t hwlmExecStreaming(const struct HWLM *tab,
struct hs_scratch *scratch, size_t len,
size_t start, HWLMCallback callback,
void *context, hwlm_group_t groups,
u8 *stream_state);
void *context, hwlm_group_t groups);
#ifdef __cplusplus
} /* extern "C" */

View File

@ -552,6 +552,12 @@ aligned_unique_ptr<HWLM> hwlmBuild(const vector<hwlmLiteral> &lits,
if (stream_control) {
assert(stream_control->history_min <= stream_control->history_max);
// We should not have been passed any literals that are too long to
// match with a maximally-sized history buffer.
assert(all_of(begin(lits), end(lits), [&](const hwlmLiteral &lit) {
return lit.s.length() <= stream_control->history_max + 1;
}));
}
// Check that we haven't exceeded the maximum number of literals.
@ -602,7 +608,6 @@ aligned_unique_ptr<HWLM> hwlmBuild(const vector<hwlmLiteral> &lits,
stream_control->literal_history_required = lit.s.length() - 1;
assert(stream_control->literal_history_required
<= stream_control->history_max);
stream_control->literal_stream_state_required = 0;
}
eng = move(noodle);
} else {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -63,10 +63,6 @@ struct hwlmStreamingControl {
/** \brief OUT parameter: History required by the literal matcher to
* correctly match all literals. */
size_t literal_history_required;
/** OUT parameter: Stream state required by literal matcher in bytes. Can
* be zero, and generally will be small (0-8 bytes). */
size_t literal_stream_state_required;
};
/** \brief Build an \ref HWLM literal matcher runtime structure for a group of

View File

@ -86,6 +86,7 @@ hwlmLiteral::hwlmLiteral(const std::string &s_in, bool nocase_in,
const vector<u8> &msk_in, const vector<u8> &cmp_in)
: s(s_in), id(id_in), nocase(nocase_in), noruns(noruns_in),
groups(groups_in), msk(msk_in), cmp(cmp_in) {
assert(s.size() <= HWLM_LITERAL_MAX_LEN);
assert(msk.size() <= HWLM_MASKLEN);
assert(msk.size() == cmp.size());

View File

@ -41,6 +41,9 @@
namespace ue2 {
/** \brief Max length of the literal passed to HWLM. */
#define HWLM_LITERAL_MAX_LEN 255
/** \brief Max length of the hwlmLiteral::msk and hwlmLiteral::cmp vectors. */
#define HWLM_MASKLEN 8

View File

@ -85,9 +85,4 @@ void roseInitState(const struct RoseEngine *t, char *state) {
init_state(t, state);
init_outfixes(t, state);
// Clear the floating matcher state, if any.
DEBUG_PRINTF("clearing %u bytes of floating matcher state\n",
t->floatingStreamState);
memset(getFloatingMatcherState(t, state), 0, t->floatingStreamState);
}

View File

@ -1331,6 +1331,78 @@ hwlmcb_rv_t roseMatcherEod(const struct RoseEngine *rose,
return HWLM_CONTINUE_MATCHING;
}
static rose_inline
int roseCheckLongLiteral(const struct RoseEngine *t,
const struct hs_scratch *scratch, u64a end,
u32 lit_offset, u32 lit_length, char nocase) {
const struct core_info *ci = &scratch->core_info;
const u8 *lit = getByOffset(t, lit_offset);
DEBUG_PRINTF("check lit at %llu, length %u\n", end, lit_length);
DEBUG_PRINTF("base buf_offset=%llu\n", ci->buf_offset);
if (end < lit_length) {
DEBUG_PRINTF("too short!\n");
return 0;
}
// If any portion of the literal matched in the current buffer, check it.
if (end > ci->buf_offset) {
u32 scan_len = MIN(end - ci->buf_offset, lit_length);
u64a scan_start = end - ci->buf_offset - scan_len;
DEBUG_PRINTF("checking suffix (%u bytes) in buf[%llu:%llu]\n", scan_len,
scan_start, end);
if (cmpForward(ci->buf + scan_start, lit + lit_length - scan_len,
scan_len, nocase)) {
DEBUG_PRINTF("cmp of suffix failed\n");
return 0;
}
}
// If the entirety of the literal was in the current block, we are done.
if (end - lit_length >= ci->buf_offset) {
DEBUG_PRINTF("literal confirmed in current block\n");
return 1;
}
// We still have a prefix which we must test against the buffer prepared by
// the long literal table. This is only done in streaming mode.
assert(t->mode != HS_MODE_BLOCK);
const u8 *ll_buf;
size_t ll_len;
if (nocase) {
ll_buf = scratch->tctxt.ll_buf_nocase;
ll_len = scratch->tctxt.ll_len_nocase;
} else {
ll_buf = scratch->tctxt.ll_buf;
ll_len = scratch->tctxt.ll_len;
}
assert(ll_buf);
u64a lit_start_offset = end - lit_length;
u32 prefix_len = MIN(lit_length, ci->buf_offset - lit_start_offset);
u32 hist_rewind = ci->buf_offset - lit_start_offset;
DEBUG_PRINTF("ll_len=%zu, hist_rewind=%u\n", ll_len, hist_rewind);
if (hist_rewind > ll_len) {
DEBUG_PRINTF("not enough history\n");
return 0;
}
DEBUG_PRINTF("check prefix len=%u from hist (len %zu, rewind %u)\n",
prefix_len, ll_len, hist_rewind);
assert(hist_rewind <= ll_len);
if (cmpForward(ll_buf + ll_len - hist_rewind, lit, prefix_len, nocase)) {
DEBUG_PRINTF("cmp of prefix failed\n");
return 0;
}
DEBUG_PRINTF("cmp succeeded\n");
return 1;
}
static
void updateSeqPoint(struct RoseContext *tctxt, u64a offset,
const char from_mpv) {
@ -1977,6 +2049,26 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t,
}
}
PROGRAM_NEXT_INSTRUCTION
PROGRAM_CASE(CHECK_LONG_LIT) {
const char nocase = 0;
if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset,
ri->lit_length, nocase)) {
DEBUG_PRINTF("halt: failed long lit check\n");
return HWLM_CONTINUE_MATCHING;
}
}
PROGRAM_NEXT_INSTRUCTION
PROGRAM_CASE(CHECK_LONG_LIT_NOCASE) {
const char nocase = 1;
if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset,
ri->lit_length, nocase)) {
DEBUG_PRINTF("halt: failed nocase long lit check\n");
return HWLM_CONTINUE_MATCHING;
}
}
PROGRAM_NEXT_INSTRUCTION
}
}

View File

@ -37,14 +37,17 @@
#include "rose_build_exclusive.h"
#include "rose_build_groups.h"
#include "rose_build_infix.h"
#include "rose_build_long_lit.h"
#include "rose_build_lookaround.h"
#include "rose_build_matchers.h"
#include "rose_build_program.h"
#include "rose_build_scatter.h"
#include "rose_build_util.h"
#include "rose_build_width.h"
#include "rose_internal.h"
#include "rose_program.h"
#include "hwlm/hwlm.h" /* engine types */
#include "hwlm/hwlm_literal.h"
#include "nfa/castlecompile.h"
#include "nfa/goughcompile.h"
#include "nfa/mcclellancompile.h"
@ -165,6 +168,7 @@ struct RoseResources {
bool has_states = false;
bool checks_groups = false;
bool has_lit_delay = false;
bool has_lit_check = false; // long literal support
bool has_anchored = false;
bool has_eod = false;
};
@ -210,9 +214,16 @@ struct build_context : boost::noncopyable {
* written to the engine_blob. */
vector<u32> litPrograms;
/** \brief List of long literals (ones with CHECK_LITERAL instructions)
* that need hash table support. */
vector<ue2_case_string> longLiterals;
/** \brief Minimum offset of a match from the floating table. */
u32 floatingMinLiteralMatchOffset = 0;
/** \brief Long literal length threshold, used in streaming mode. */
size_t longLitLengthThreshold = 0;
/** \brief Contents of the Rose bytecode immediately following the
* RoseEngine. */
RoseEngineBlob engine_blob;
@ -314,7 +325,7 @@ bool needsCatchup(const RoseBuildImpl &build,
}
static
bool isPureFloating(const RoseResources &resources) {
bool isPureFloating(const RoseResources &resources, const CompileContext &cc) {
if (resources.has_outfixes || resources.has_suffixes ||
resources.has_leftfixes) {
DEBUG_PRINTF("has engines\n");
@ -341,6 +352,12 @@ bool isPureFloating(const RoseResources &resources) {
return false;
}
if (cc.streaming && resources.has_lit_check) {
DEBUG_PRINTF("has long literals in streaming mode, which needs "
"long literal table support\n");
return false;
}
if (resources.checks_groups) {
DEBUG_PRINTF("has group checks\n");
return false;
@ -384,10 +401,11 @@ u8 pickRuntimeImpl(const RoseBuildImpl &build, const build_context &bc,
DEBUG_PRINTF("has_states=%d\n", bc.resources.has_states);
DEBUG_PRINTF("checks_groups=%d\n", bc.resources.checks_groups);
DEBUG_PRINTF("has_lit_delay=%d\n", bc.resources.has_lit_delay);
DEBUG_PRINTF("has_lit_check=%d\n", bc.resources.has_lit_check);
DEBUG_PRINTF("has_anchored=%d\n", bc.resources.has_anchored);
DEBUG_PRINTF("has_eod=%d\n", bc.resources.has_eod);
if (isPureFloating(bc.resources)) {
if (isPureFloating(bc.resources, build.cc)) {
return ROSE_RUNTIME_PURE_LITERAL;
}
@ -427,7 +445,7 @@ static
void fillStateOffsets(const RoseBuildImpl &tbi, u32 rolesWithStateCount,
u32 anchorStateSize, u32 activeArrayCount,
u32 activeLeftCount, u32 laggedRoseCount,
u32 floatingStreamStateRequired, u32 historyRequired,
u32 longLitStreamStateRequired, u32 historyRequired,
RoseStateOffsets *so) {
u32 curr_offset = 0;
@ -445,8 +463,8 @@ void fillStateOffsets(const RoseBuildImpl &tbi, u32 rolesWithStateCount,
so->activeLeftArray_size = mmbit_size(activeLeftCount);
curr_offset += so->activeLeftArray_size;
so->floatingMatcherState = curr_offset;
curr_offset += floatingStreamStateRequired;
so->longLitState = curr_offset;
curr_offset += longLitStreamStateRequired;
// ONE WHOLE BYTE for each active leftfix with lag.
so->leftfixLagTable = curr_offset;
@ -2514,6 +2532,10 @@ void recordResources(RoseResources &resources, const RoseProgram &program) {
case ROSE_INSTR_PUSH_DELAYED:
resources.has_lit_delay = true;
break;
case ROSE_INSTR_CHECK_LONG_LIT:
case ROSE_INSTR_CHECK_LONG_LIT_NOCASE:
resources.has_lit_check = true;
break;
default:
break;
}
@ -2546,6 +2568,25 @@ void recordResources(RoseResources &resources,
}
}
static
void recordLongLiterals(build_context &bc, const RoseProgram &program) {
for (const auto &ri : program) {
if (const auto *ri_check =
dynamic_cast<const RoseInstrCheckLongLit *>(ri.get())) {
DEBUG_PRINTF("found CHECK_LITERAL for string '%s'\n",
escapeString(ri_check->literal).c_str());
bc.longLiterals.emplace_back(ri_check->literal, false);
continue;
}
if (const auto *ri_check =
dynamic_cast<const RoseInstrCheckLongLitNocase *>(ri.get())) {
DEBUG_PRINTF("found CHECK_LITERAL_NOCASE for string '%s'\n",
escapeString(ri_check->literal).c_str());
bc.longLiterals.emplace_back(ri_check->literal, true);
}
}
}
static
u32 writeProgram(build_context &bc, RoseProgram &&program) {
if (program.empty()) {
@ -2560,6 +2601,7 @@ u32 writeProgram(build_context &bc, RoseProgram &&program) {
}
recordResources(bc.resources, program);
recordLongLiterals(bc, program);
u32 len = 0;
auto prog_bytecode = writeProgram(bc.engine_blob, program, &len);
@ -4285,6 +4327,48 @@ void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, build_context &bc,
program.add_before_end(make_unique<RoseInstrCheckLitEarly>(min_offset));
}
static
void makeCheckLiteralInstruction(const RoseBuildImpl &build,
const build_context &bc, u32 final_id,
RoseProgram &program) {
const auto &lits = build.final_id_to_literal.at(final_id);
if (lits.size() != 1) {
// Long literals should not share a final_id.
assert(all_of(begin(lits), end(lits), [&](u32 lit_id) {
const rose_literal_id &lit = build.literals.right.at(lit_id);
return lit.table != ROSE_FLOATING ||
lit.s.length() <= bc.longLitLengthThreshold;
}));
return;
}
u32 lit_id = *lits.begin();
if (build.isDelayed(lit_id)) {
return;
}
const rose_literal_id &lit = build.literals.right.at(lit_id);
if (lit.table != ROSE_FLOATING) {
return;
}
if (lit.s.length() <= bc.longLitLengthThreshold) {
return;
}
// Check resource limits as well.
if (lit.s.length() > build.cc.grey.limitLiteralLength) {
throw ResourceLimitError();
}
unique_ptr<RoseInstruction> ri;
if (lit.s.any_nocase()) {
ri = make_unique<RoseInstrCheckLongLitNocase>(lit.s.get_string());
} else {
ri = make_unique<RoseInstrCheckLongLit>(lit.s.get_string());
}
program.add_before_end(move(ri));
}
static
bool hasDelayedLiteral(RoseBuildImpl &build,
const vector<RoseEdge> &lit_edges) {
@ -4312,6 +4396,9 @@ RoseProgram buildLitInitialProgram(RoseBuildImpl &build, build_context &bc,
DEBUG_PRINTF("final_id %u\n", final_id);
// Check long literal info.
makeCheckLiteralInstruction(build, bc, final_id, program);
// Check lit mask.
makeCheckLitMaskInstruction(build, bc, final_id, program);
@ -4838,6 +4925,172 @@ u32 buildEagerQueueIter(const set<u32> &eager, u32 leftfixBeginQueue,
return bc.engine_blob.add_iterator(iter);
}
static
void allocateFinalIdToSet(RoseBuildImpl &build, const set<u32> &lits,
size_t longLitLengthThreshold, u32 *next_final_id) {
const auto &g = build.g;
auto &literal_info = build.literal_info;
auto &final_id_to_literal = build.final_id_to_literal;
/* We can allocate the same final id to multiple literals of the same type
* if they share the same vertex set and trigger the same delayed literal
* ids and squash the same roles and have the same group squashing
* behaviour. Benefits literals cannot be merged. */
for (u32 int_id : lits) {
rose_literal_info &curr_info = literal_info[int_id];
const rose_literal_id &lit = build.literals.right.at(int_id);
const auto &verts = curr_info.vertices;
// Literals with benefits cannot be merged.
if (curr_info.requires_benefits) {
DEBUG_PRINTF("id %u has benefits\n", int_id);
goto assign_new_id;
}
// Long literals (that require CHECK_LITERAL instructions) cannot be
// merged.
if (lit.s.length() > longLitLengthThreshold) {
DEBUG_PRINTF("id %u is a long literal\n", int_id);
goto assign_new_id;
}
if (!verts.empty() && curr_info.delayed_ids.empty()) {
vector<u32> cand;
insert(&cand, cand.end(), g[*verts.begin()].literals);
for (auto v : verts) {
vector<u32> temp;
set_intersection(cand.begin(), cand.end(),
g[v].literals.begin(),
g[v].literals.end(),
inserter(temp, temp.end()));
cand.swap(temp);
}
for (u32 cand_id : cand) {
if (cand_id >= int_id) {
break;
}
const auto &cand_info = literal_info[cand_id];
const auto &cand_lit = build.literals.right.at(cand_id);
if (cand_lit.s.length() > longLitLengthThreshold) {
continue;
}
if (cand_info.requires_benefits) {
continue;
}
if (!cand_info.delayed_ids.empty()) {
/* TODO: allow cases where delayed ids are equivalent.
* This is awkward currently as the have not had their
* final ids allocated yet */
continue;
}
if (lits.find(cand_id) == lits.end()
|| cand_info.vertices.size() != verts.size()
|| cand_info.squash_group != curr_info.squash_group) {
continue;
}
/* if we are squashing groups we need to check if they are the
* same group */
if (cand_info.squash_group
&& cand_info.group_mask != curr_info.group_mask) {
continue;
}
u32 final_id = cand_info.final_id;
assert(final_id != MO_INVALID_IDX);
assert(curr_info.final_id == MO_INVALID_IDX);
curr_info.final_id = final_id;
final_id_to_literal[final_id].insert(int_id);
goto next_lit;
}
}
assign_new_id:
/* oh well, have to give it a fresh one, hang the expense */
DEBUG_PRINTF("allocating final id %u to %u\n", *next_final_id, int_id);
assert(curr_info.final_id == MO_INVALID_IDX);
curr_info.final_id = *next_final_id;
final_id_to_literal[*next_final_id].insert(int_id);
(*next_final_id)++;
next_lit:;
}
}
static
bool isUsedLiteral(const RoseBuildImpl &build, u32 lit_id) {
assert(lit_id < build.literal_info.size());
const auto &info = build.literal_info[lit_id];
if (!info.vertices.empty()) {
return true;
}
for (const u32 &delayed_id : info.delayed_ids) {
assert(delayed_id < build.literal_info.size());
const rose_literal_info &delayed_info = build.literal_info[delayed_id];
if (!delayed_info.vertices.empty()) {
return true;
}
}
DEBUG_PRINTF("literal %u has no refs\n", lit_id);
return false;
}
/** \brief Allocate final literal IDs for all literals. */
static
void allocateFinalLiteralId(RoseBuildImpl &build,
size_t longLitLengthThreshold) {
set<u32> anch;
set<u32> norm;
set<u32> delay;
/* undelayed ids come first */
assert(build.final_id_to_literal.empty());
u32 next_final_id = 0;
for (u32 i = 0; i < build.literal_info.size(); i++) {
assert(!build.hasFinalId(i));
if (!isUsedLiteral(build, i)) {
/* what is this literal good for? absolutely nothing */
continue;
}
// The special EOD event literal has its own program and does not need
// a real literal ID.
if (i == build.eod_event_literal_id) {
assert(build.eod_event_literal_id != MO_INVALID_IDX);
continue;
}
if (build.isDelayed(i)) {
assert(!build.literal_info[i].requires_benefits);
delay.insert(i);
} else if (build.literals.right.at(i).table == ROSE_ANCHORED) {
anch.insert(i);
} else {
norm.insert(i);
}
}
/* normal lits */
allocateFinalIdToSet(build, norm, longLitLengthThreshold, &next_final_id);
/* next anchored stuff */
build.anchored_base_id = next_final_id;
allocateFinalIdToSet(build, anch, longLitLengthThreshold, &next_final_id);
/* delayed ids come last */
build.delay_base_id = next_final_id;
allocateFinalIdToSet(build, delay, longLitLengthThreshold, &next_final_id);
}
static
aligned_unique_ptr<RoseEngine> addSmallWriteEngine(RoseBuildImpl &build,
aligned_unique_ptr<RoseEngine> rose) {
@ -4873,16 +5126,89 @@ aligned_unique_ptr<RoseEngine> addSmallWriteEngine(RoseBuildImpl &build,
return rose2;
}
/**
* \brief Returns the pair (number of literals, max length) for all real
* literals in the floating table that are in-use.
*/
static
pair<size_t, size_t> floatingCountAndMaxLen(const RoseBuildImpl &build) {
size_t num = 0;
size_t max_len = 0;
for (const auto &e : build.literals.right) {
const u32 id = e.first;
const rose_literal_id &lit = e.second;
if (lit.table != ROSE_FLOATING) {
continue;
}
if (lit.delay) {
// Skip delayed literals, so that we only count the undelayed
// version that ends up in the HWLM table.
continue;
}
if (!isUsedLiteral(build, id)) {
continue;
}
num++;
max_len = max(max_len, lit.s.length());
}
DEBUG_PRINTF("%zu floating literals with max_len=%zu\n", num, max_len);
return {num, max_len};
}
size_t calcLongLitThreshold(const RoseBuildImpl &build,
const size_t historyRequired) {
const auto &cc = build.cc;
// In block mode, we should only use the long literal support for literals
// that cannot be handled by HWLM.
if (!cc.streaming) {
return HWLM_LITERAL_MAX_LEN;
}
size_t longLitLengthThreshold = ROSE_LONG_LITERAL_THRESHOLD_MIN;
// Expand to size of history we've already allocated. Note that we need N-1
// bytes of history to match a literal of length N.
longLitLengthThreshold = max(longLitLengthThreshold, historyRequired + 1);
// If we only have one literal, allow for a larger value in order to avoid
// building a long literal table for a trivial Noodle case that we could
// fit in history.
const auto num_len = floatingCountAndMaxLen(build);
if (num_len.first == 1) {
if (num_len.second > longLitLengthThreshold) {
DEBUG_PRINTF("expanding for single literal of length %zu\n",
num_len.second);
longLitLengthThreshold = num_len.second;
}
}
// Clamp to max history available.
longLitLengthThreshold =
min(longLitLengthThreshold, size_t{cc.grey.maxHistoryAvailable} + 1);
return longLitLengthThreshold;
}
aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
DerivedBoundaryReports dboundary(boundary);
size_t historyRequired = calcHistoryRequired(); // Updated by HWLM.
size_t longLitLengthThreshold = calcLongLitThreshold(*this,
historyRequired);
DEBUG_PRINTF("longLitLengthThreshold=%zu\n", longLitLengthThreshold);
allocateFinalLiteralId(*this, longLitLengthThreshold);
auto anchored_dfas = buildAnchoredDfas(*this);
build_context bc;
bc.floatingMinLiteralMatchOffset =
findMinFloatingLiteralMatch(*this, anchored_dfas);
bc.longLitLengthThreshold = longLitLengthThreshold;
bc.needs_catchup = needsCatchup(*this, anchored_dfas);
recordResources(bc.resources, *this);
if (!anchored_dfas.empty()) {
@ -4944,6 +5270,11 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
u32 eodProgramOffset = writeEodProgram(*this, bc, eodNfaIterOffset);
size_t longLitStreamStateRequired = 0;
u32 longLitTableOffset = buildLongLiteralTable(*this, bc.engine_blob,
bc.longLiterals, longLitLengthThreshold, &historyRequired,
&longLitStreamStateRequired);
vector<mmbit_sparse_iter> activeLeftIter;
buildActiveLeftIter(leftInfoTable, activeLeftIter);
@ -4982,9 +5313,8 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
// Build floating HWLM matcher.
rose_group fgroups = 0;
size_t fsize = 0;
size_t floatingStreamStateRequired = 0;
auto ftable = buildFloatingMatcher(*this, &fgroups, &fsize, &historyRequired,
&floatingStreamStateRequired);
auto ftable = buildFloatingMatcher(*this, bc.longLitLengthThreshold,
&fgroups, &fsize, &historyRequired);
u32 fmatcherOffset = 0;
if (ftable) {
currOffset = ROUNDUP_CL(currOffset);
@ -5057,7 +5387,7 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
memset(&stateOffsets, 0, sizeof(stateOffsets));
fillStateOffsets(*this, bc.numStates, anchorStateSize,
activeArrayCount, activeLeftCount, laggedRoseCount,
floatingStreamStateRequired, historyRequired,
longLitStreamStateRequired, historyRequired,
&stateOffsets);
scatter_plan_raw state_scatter;
@ -5173,6 +5503,7 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
engine->ematcherOffset = ematcherOffset;
engine->sbmatcherOffset = sbmatcherOffset;
engine->fmatcherOffset = fmatcherOffset;
engine->longLitTableOffset = longLitTableOffset;
engine->amatcherMinWidth = findMinWidth(*this, ROSE_ANCHORED);
engine->fmatcherMinWidth = findMinWidth(*this, ROSE_FLOATING);
engine->eodmatcherMinWidth = findMinWidth(*this, ROSE_EOD_ANCHORED);
@ -5198,7 +5529,7 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
engine->totalNumLiterals = verify_u32(literal_info.size());
engine->asize = verify_u32(asize);
engine->ematcherRegionSize = ematcher_region_size;
engine->floatingStreamState = verify_u32(floatingStreamStateRequired);
engine->longLitStreamState = verify_u32(longLitStreamStateRequired);
engine->boundary.reportEodOffset = boundary_out.reportEodOffset;
engine->boundary.reportZeroOffset = boundary_out.reportZeroOffset;

View File

@ -87,172 +87,6 @@ namespace ue2 {
#define ANCHORED_REHOME_DEEP 25
#define ANCHORED_REHOME_SHORT_LEN 3
#ifdef DEBUG
static UNUSED
void printLitInfo(const rose_literal_info &li, u32 id) {
DEBUG_PRINTF("lit_info %u\n", id);
DEBUG_PRINTF(" parent %u%s", li.undelayed_id,
li.delayed_ids.empty() ? "":", children:");
for (u32 d_id : li.delayed_ids) {
printf(" %u", d_id);
}
printf("\n");
DEBUG_PRINTF(" group %llu %s\n", li.group_mask, li.squash_group ? "s":"");
}
#endif
static
void allocateFinalIdToSet(const RoseGraph &g, const set<u32> &lits,
deque<rose_literal_info> *literal_info,
map<u32, set<u32> > *final_id_to_literal,
u32 *next_final_id) {
/* We can allocate the same final id to multiple literals of the same type
* if they share the same vertex set and trigger the same delayed literal
* ids and squash the same roles and have the same group squashing
* behaviour. Benefits literals cannot be merged. */
for (u32 int_id : lits) {
rose_literal_info &curr_info = (*literal_info)[int_id];
const auto &verts = curr_info.vertices;
if (!verts.empty() && !curr_info.requires_benefits
&& curr_info.delayed_ids.empty()) {
vector<u32> cand;
insert(&cand, cand.end(), g[*verts.begin()].literals);
for (auto v : verts) {
vector<u32> temp;
set_intersection(cand.begin(), cand.end(),
g[v].literals.begin(),
g[v].literals.end(),
inserter(temp, temp.end()));
cand.swap(temp);
}
for (u32 cand_id : cand) {
if (cand_id >= int_id) {
break;
}
const rose_literal_info &cand_info = (*literal_info)[cand_id];
if (cand_info.requires_benefits) {
continue;
}
if (!cand_info.delayed_ids.empty()) {
/* TODO: allow cases where delayed ids are equivalent.
* This is awkward currently as the have not had their
* final ids allocated yet */
continue;
}
if (lits.find(cand_id) == lits.end()
|| cand_info.vertices.size() != verts.size()
|| cand_info.squash_group != curr_info.squash_group) {
continue;
}
/* if we are squashing groups we need to check if they are the
* same group */
if (cand_info.squash_group
&& cand_info.group_mask != curr_info.group_mask) {
continue;
}
u32 final_id = cand_info.final_id;
assert(final_id != MO_INVALID_IDX);
assert(curr_info.final_id == MO_INVALID_IDX);
curr_info.final_id = final_id;
(*final_id_to_literal)[final_id].insert(int_id);
goto next_lit;
}
}
/* oh well, have to give it a fresh one, hang the expense */
DEBUG_PRINTF("allocating final id %u to %u\n", *next_final_id, int_id);
assert(curr_info.final_id == MO_INVALID_IDX);
curr_info.final_id = *next_final_id;
(*final_id_to_literal)[*next_final_id].insert(int_id);
(*next_final_id)++;
next_lit:;
}
}
static
bool isUsedLiteral(const RoseBuildImpl &build, u32 lit_id) {
assert(lit_id < build.literal_info.size());
const auto &info = build.literal_info[lit_id];
if (!info.vertices.empty()) {
return true;
}
for (const u32 &delayed_id : info.delayed_ids) {
assert(delayed_id < build.literal_info.size());
const rose_literal_info &delayed_info = build.literal_info[delayed_id];
if (!delayed_info.vertices.empty()) {
return true;
}
}
DEBUG_PRINTF("literal %u has no refs\n", lit_id);
return false;
}
/** \brief Allocate final literal IDs for all literals.
*
* These are the literal ids used in the bytecode.
*/
static
void allocateFinalLiteralId(RoseBuildImpl &tbi) {
RoseGraph &g = tbi.g;
set<u32> anch;
set<u32> norm;
set<u32> delay;
/* undelayed ids come first */
assert(tbi.final_id_to_literal.empty());
u32 next_final_id = 0;
for (u32 i = 0; i < tbi.literal_info.size(); i++) {
assert(!tbi.hasFinalId(i));
if (!isUsedLiteral(tbi, i)) {
/* what is this literal good for? absolutely nothing */
continue;
}
// The special EOD event literal has its own program and does not need
// a real literal ID.
if (i == tbi.eod_event_literal_id) {
assert(tbi.eod_event_literal_id != MO_INVALID_IDX);
continue;
}
if (tbi.isDelayed(i)) {
assert(!tbi.literal_info[i].requires_benefits);
delay.insert(i);
} else if (tbi.literals.right.at(i).table == ROSE_ANCHORED) {
anch.insert(i);
} else {
norm.insert(i);
}
}
/* normal lits */
allocateFinalIdToSet(g, norm, &tbi.literal_info, &tbi.final_id_to_literal,
&next_final_id);
/* next anchored stuff */
tbi.anchored_base_id = next_final_id;
allocateFinalIdToSet(g, anch, &tbi.literal_info, &tbi.final_id_to_literal,
&next_final_id);
/* delayed ids come last */
tbi.delay_base_id = next_final_id;
allocateFinalIdToSet(g, delay, &tbi.literal_info, &tbi.final_id_to_literal,
&next_final_id);
}
#define MAX_EXPLOSION_NC 3
static
bool limited_explosion(const ue2_literal &s) {
@ -284,7 +118,12 @@ void RoseBuildImpl::handleMixedSensitivity(void) {
continue;
}
if (limited_explosion(lit.s)) {
// We don't want to explode long literals, as they require confirmation
// with a CHECK_LITERAL instruction and need unique final_ids.
// TODO: we could allow explosion for literals where the prefixes
// covered by CHECK_LITERAL are identical.
if (lit.s.length() <= ROSE_LONG_LITERAL_THRESHOLD_MIN &&
limited_explosion(lit.s)) {
DEBUG_PRINTF("need to explode existing string '%s'\n",
dumpString(lit.s).c_str());
literal_info[id].requires_explode = true;
@ -1653,7 +1492,6 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildRose(u32 minWidth) {
/* final prep work */
remapCastleTops(*this);
allocateFinalLiteralId(*this);
inspectRoseTops(*this);
buildRoseSquashMasks(*this);

View File

@ -442,20 +442,26 @@ void dumpTestLiterals(const string &filename, const vector<hwlmLiteral> &lits) {
static
void dumpRoseTestLiterals(const RoseBuildImpl &build, const string &base) {
auto lits = fillHamsterLiteralList(build, ROSE_ANCHORED);
size_t historyRequired = build.calcHistoryRequired();
size_t longLitLengthThreshold =
calcLongLitThreshold(build, historyRequired);
auto lits = fillHamsterLiteralList(build, ROSE_ANCHORED,
longLitLengthThreshold);
dumpTestLiterals(base + "rose_anchored_test_literals.txt", lits);
lits = fillHamsterLiteralList(build, ROSE_FLOATING);
lits = fillHamsterLiteralList(build, ROSE_FLOATING, longLitLengthThreshold);
dumpTestLiterals(base + "rose_float_test_literals.txt", lits);
lits = fillHamsterLiteralList(build, ROSE_EOD_ANCHORED);
lits = fillHamsterLiteralList(build, ROSE_EOD_ANCHORED,
build.ematcher_region_size);
dumpTestLiterals(base + "rose_eod_test_literals.txt", lits);
if (!build.cc.streaming) {
lits = fillHamsterLiteralList(build, ROSE_FLOATING,
ROSE_SMALL_BLOCK_LEN);
ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN);
auto lits2 = fillHamsterLiteralList(build, ROSE_ANCHORED_SMALL_BLOCK,
ROSE_SMALL_BLOCK_LEN);
ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN);
lits.insert(end(lits), begin(lits2), end(lits2));
dumpTestLiterals(base + "rose_smallblock_test_literals.txt", lits);
}

View File

@ -56,6 +56,8 @@ namespace ue2 {
#define ROSE_GROUPS_MAX 64
#define ROSE_LONG_LITERAL_THRESHOLD_MIN 33
struct BoundaryReports;
struct CastleProto;
struct CompileContext;
@ -603,6 +605,9 @@ private:
ReportID next_nfa_report;
};
size_t calcLongLitThreshold(const RoseBuildImpl &build,
const size_t historyRequired);
// Free functions, in rose_build_misc.cpp
bool hasAnchHistorySucc(const RoseGraph &g, RoseVertex v);

View File

@ -0,0 +1,348 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "rose_build_long_lit.h"
#include "rose_build_engine_blob.h"
#include "rose_build_impl.h"
#include "stream_long_lit_hash.h"
#include "util/alloc.h"
#include "util/bitutils.h"
#include "util/verify_types.h"
#include "util/compile_context.h"
using namespace std;
namespace ue2 {
/** \brief Minimum size for a non-empty hash table. */
static constexpr u32 MIN_HASH_TABLE_SIZE = 4096;
struct LongLitModeInfo {
u32 boundary = 0; //!< One above the largest index for this mode.
u32 positions = 0; //!< Total number of string positions.
u32 hashEntries = 0; //!< Number of hash table entries.
};
struct LongLitInfo {
LongLitModeInfo caseful;
LongLitModeInfo nocase;
};
static
u32 roundUpToPowerOfTwo(u32 x) {
assert(x != 0);
u32 bits = lg2(x - 1) + 1;
assert(bits < 32);
return 1U << bits;
}
static
LongLitInfo analyzeLongLits(const vector<ue2_case_string> &lits,
size_t max_len) {
LongLitInfo info;
u32 hashedPositionsCase = 0;
u32 hashedPositionsNocase = 0;
// Caseful boundary is the index of the first nocase literal, as we're
// ordered (caseful, nocase).
auto first_nocase = find_if(begin(lits), end(lits),
[](const ue2_case_string &lit) { return lit.nocase; });
info.caseful.boundary = verify_u32(distance(lits.begin(), first_nocase));
// Nocase boundary is the size of the literal set.
info.nocase.boundary = verify_u32(lits.size());
for (const auto &lit : lits) {
if (lit.nocase) {
hashedPositionsNocase += lit.s.size() - max_len;
info.nocase.positions += lit.s.size();
} else {
hashedPositionsCase += lit.s.size() - max_len;
info.caseful.positions += lit.s.size();
}
}
info.caseful.hashEntries = hashedPositionsCase
? roundUpToPowerOfTwo(max(MIN_HASH_TABLE_SIZE, hashedPositionsCase))
: 0;
info.nocase.hashEntries = hashedPositionsNocase
? roundUpToPowerOfTwo(max(MIN_HASH_TABLE_SIZE, hashedPositionsNocase))
: 0;
DEBUG_PRINTF("caseful: boundary=%u, positions=%u, hashedPositions=%u, "
"hashEntries=%u\n",
info.caseful.boundary, info.caseful.positions,
hashedPositionsCase, info.caseful.hashEntries);
DEBUG_PRINTF("nocase: boundary=%u, positions=%u, hashedPositions=%u, "
"hashEntries=%u\n",
info.nocase.boundary, info.nocase.positions,
hashedPositionsNocase, info.nocase.hashEntries);
return info;
}
static
void fillHashes(const vector<ue2_case_string> &lits, size_t max_len,
RoseLongLitHashEntry *tab, size_t numEntries, bool nocase,
const map<u32, u32> &litToOffsetVal) {
const u32 nbits = lg2(numEntries);
map<u32, deque<pair<u32, u32>>> bucketToLitOffPairs;
map<u32, u64a> bucketToBitfield;
for (u32 lit_id = 0; lit_id < lits.size(); lit_id++) {
const ue2_case_string &lit = lits[lit_id];
if (nocase != lit.nocase) {
continue;
}
for (u32 offset = 1; offset < lit.s.size() - max_len + 1; offset++) {
const u8 *substr = (const u8 *)lit.s.c_str() + offset;
u32 h = hashLongLiteral(substr, max_len, lit.nocase);
u32 h_ent = h & ((1U << nbits) - 1);
u32 h_low = (h >> nbits) & 63;
bucketToLitOffPairs[h_ent].emplace_back(lit_id, offset);
bucketToBitfield[h_ent] |= (1ULL << h_low);
}
}
// this used to be a set<u32>, but a bitset is much much faster given that
// we're using it only for membership testing.
boost::dynamic_bitset<> filledBuckets(numEntries); // all zero by default.
// sweep out bitfield entries and save the results swapped accordingly
// also, anything with bitfield entries is put in filledBuckets
for (const auto &m : bucketToBitfield) {
const u32 &bucket = m.first;
const u64a &contents = m.second;
tab[bucket].bitfield = contents;
filledBuckets.set(bucket);
}
// store out all our chains based on free values in our hash table.
// find nearest free locations that are empty (there will always be more
// entries than strings, at present)
for (auto &m : bucketToLitOffPairs) {
u32 bucket = m.first;
deque<pair<u32, u32>> &d = m.second;
// sort d by distance of the residual string (len minus our depth into
// the string). We need to put the 'furthest back' string first...
stable_sort(d.begin(), d.end(),
[](const pair<u32, u32> &a, const pair<u32, u32> &b) {
if (a.second != b.second) {
return a.second > b.second; /* longest is first */
}
return a.first < b.first;
});
while (1) {
// first time through is always at bucket, then we fill in links
filledBuckets.set(bucket);
RoseLongLitHashEntry *ent = &tab[bucket];
u32 lit_id = d.front().first;
u32 offset = d.front().second;
ent->state = verify_u32(litToOffsetVal.at(lit_id) +
offset + max_len);
ent->link = (u32)LINK_INVALID;
d.pop_front();
if (d.empty()) {
break;
}
// now, if there is another value
// find a bucket for it and put in 'bucket' and repeat
// all we really need to do is find something not in filledBuckets,
// ideally something close to bucket
// we search backward and forward from bucket, trying to stay as
// close as possible.
UNUSED bool found = false;
int bucket_candidate = 0;
for (u32 k = 1; k < numEntries * 2; k++) {
bucket_candidate = bucket + (((k & 1) == 0)
? (-(int)k / 2) : (k / 2));
if (bucket_candidate < 0 ||
(size_t)bucket_candidate >= numEntries) {
continue;
}
if (!filledBuckets.test(bucket_candidate)) {
found = true;
break;
}
}
assert(found);
bucket = bucket_candidate;
ent->link = bucket;
}
}
}
u32 buildLongLiteralTable(const RoseBuildImpl &build, RoseEngineBlob &blob,
vector<ue2_case_string> &lits,
size_t longLitLengthThreshold,
size_t *historyRequired,
size_t *longLitStreamStateRequired) {
// Work in terms of history requirement (i.e. literal len - 1).
const size_t max_len = longLitLengthThreshold - 1;
// We should only be building the long literal hash table in streaming mode.
if (!build.cc.streaming) {
return 0;
}
if (lits.empty()) {
DEBUG_PRINTF("no long literals\n");
return 0;
}
// The last char of each literal is trimmed as we're not interested in full
// matches, only partial matches.
for (auto &lit : lits) {
assert(!lit.s.empty());
lit.s.pop_back();
}
// Sort by caseful/caseless and in lexicographical order.
stable_sort(begin(lits), end(lits), [](const ue2_case_string &a,
const ue2_case_string &b) {
if (a.nocase != b.nocase) {
return a.nocase < b.nocase;
}
return a.s < b.s;
});
// Find literals that are prefixes of other literals (including
// duplicates). Note that we iterate in reverse, since we want to retain
// only the longest string from a set of prefixes.
auto it = unique(lits.rbegin(), lits.rend(), [](const ue2_case_string &a,
const ue2_case_string &b) {
return a.nocase == b.nocase && a.s.size() >= b.s.size() &&
equal(b.s.begin(), b.s.end(), a.s.begin());
});
// Erase dupes found by unique().
lits.erase(lits.begin(), it.base());
LongLitInfo info = analyzeLongLits(lits, max_len);
// first assess the size and find our caseless threshold
size_t headerSize = ROUNDUP_16(sizeof(RoseLongLitTable));
size_t litTabOffset = headerSize;
size_t litTabNumEntries = lits.size() + 1;
size_t litTabSize = ROUNDUP_16(litTabNumEntries * sizeof(RoseLongLiteral));
size_t wholeLitTabOffset = litTabOffset + litTabSize;
size_t totalWholeLitTabSize =
ROUNDUP_16(info.caseful.positions + info.nocase.positions);
size_t htOffsetCase = wholeLitTabOffset + totalWholeLitTabSize;
size_t htSizeCase = info.caseful.hashEntries * sizeof(RoseLongLitHashEntry);
size_t htOffsetNocase = htOffsetCase + htSizeCase;
size_t htSizeNocase =
info.nocase.hashEntries * sizeof(RoseLongLitHashEntry);
size_t tabSize = ROUNDUP_16(htOffsetNocase + htSizeNocase);
// need to add +2 to both of these to allow space for the actual largest
// value as well as handling the fact that we add one to the space when
// storing out a position to allow zero to mean "no stream state value"
u8 streamBitsCase = lg2(roundUpToPowerOfTwo(info.caseful.positions + 2));
u8 streamBitsNocase = lg2(roundUpToPowerOfTwo(info.nocase.positions + 2));
u32 tot_state_bytes = ROUNDUP_N(streamBitsCase + streamBitsNocase, 8) / 8;
auto table = aligned_zmalloc_unique<char>(tabSize);
assert(table); // otherwise would have thrown std::bad_alloc
// then fill it in
char *ptr = table.get();
RoseLongLitTable *header = (RoseLongLitTable *)ptr;
// fill in header
header->maxLen = verify_u8(max_len); // u8 so doesn't matter; won't go > 255
header->boundaryCase = info.caseful.boundary;
header->hashOffsetCase = verify_u32(htOffsetCase);
header->hashNBitsCase = lg2(info.caseful.hashEntries);
header->streamStateBitsCase = streamBitsCase;
header->boundaryNocase = info.nocase.boundary;
header->hashOffsetNocase = verify_u32(htOffsetNocase);
header->hashNBitsNocase = lg2(info.nocase.hashEntries);
header->streamStateBitsNocase = streamBitsNocase;
assert(tot_state_bytes < sizeof(u64a));
header->streamStateBytes = verify_u8(tot_state_bytes); // u8
ptr += headerSize;
// now fill in the rest
RoseLongLiteral *litTabPtr = (RoseLongLiteral *)ptr;
ptr += litTabSize;
map<u32, u32> litToOffsetVal;
for (auto i = lits.begin(), e = lits.end(); i != e; ++i) {
u32 entry = verify_u32(i - lits.begin());
u32 offset = verify_u32(ptr - table.get());
// point the table entry to the string location
litTabPtr[entry].offset = offset;
litToOffsetVal[entry] = offset;
// copy the string into the string location
const auto &s = i->s;
memcpy(ptr, s.c_str(), s.size());
ptr += s.size(); // and the string location
}
// fill in final lit table entry with current ptr (serves as end value)
litTabPtr[lits.size()].offset = verify_u32(ptr - table.get());
// fill hash tables
ptr = table.get() + htOffsetCase;
fillHashes(lits, max_len, (RoseLongLitHashEntry *)ptr,
info.caseful.hashEntries, false, litToOffsetVal);
ptr += htSizeCase;
fillHashes(lits, max_len, (RoseLongLitHashEntry *)ptr,
info.nocase.hashEntries, true, litToOffsetVal);
ptr += htSizeNocase;
assert(ptr <= table.get() + tabSize);
DEBUG_PRINTF("built streaming table, size=%zu\n", tabSize);
DEBUG_PRINTF("requires %zu bytes of history\n", max_len);
DEBUG_PRINTF("requires %u bytes of stream state\n", tot_state_bytes);
*historyRequired = max(*historyRequired, max_len);
*longLitStreamStateRequired = tot_state_bytes;
return blob.add(table.get(), tabSize, 16);
}
} // namespace ue2

View File

@ -0,0 +1,51 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ROSE_BUILD_LONG_LIT_H
#define ROSE_BUILD_LONG_LIT_H
#include "ue2common.h"
#include <vector>
namespace ue2 {
class RoseBuildImpl;
class RoseEngineBlob;
struct ue2_case_string;
u32 buildLongLiteralTable(const RoseBuildImpl &build, RoseEngineBlob &blob,
std::vector<ue2_case_string> &lits,
size_t longLitLengthThreshold,
size_t *historyRequired,
size_t *longLitStreamStateRequired);
} // namespace ue2
#endif // ROSE_BUILD_LONG_LIT_H

View File

@ -485,7 +485,7 @@ bool isNoRunsVertex(const RoseBuildImpl &build, RoseVertex u) {
static
bool isNoRunsLiteral(const RoseBuildImpl &build, const u32 id,
const rose_literal_info &info) {
const rose_literal_info &info, const size_t max_len) {
DEBUG_PRINTF("lit id %u\n", id);
if (info.requires_benefits) {
@ -493,6 +493,11 @@ bool isNoRunsLiteral(const RoseBuildImpl &build, const u32 id,
return false;
}
if (build.literals.right.at(id).s.length() > max_len) {
DEBUG_PRINTF("requires literal check\n");
return false;
}
if (isDirectHighlander(build, id, info)) {
DEBUG_PRINTF("highlander direct report\n");
return true;
@ -625,7 +630,7 @@ u64a literalMinReportOffset(const RoseBuildImpl &build,
vector<hwlmLiteral> fillHamsterLiteralList(const RoseBuildImpl &build,
rose_literal_table table,
u32 max_offset) {
size_t max_len, u32 max_offset) {
vector<hwlmLiteral> lits;
for (const auto &e : build.literals.right) {
@ -663,10 +668,14 @@ vector<hwlmLiteral> fillHamsterLiteralList(const RoseBuildImpl &build,
const vector<u8> &msk = e.second.msk;
const vector<u8> &cmp = e.second.cmp;
bool noruns = isNoRunsLiteral(build, id, info);
bool noruns = isNoRunsLiteral(build, id, info, max_len);
if (info.requires_explode) {
DEBUG_PRINTF("exploding lit\n");
// We do not require_explode for long literals.
assert(lit.length() <= max_len);
case_iter cit = caseIterateBegin(lit);
case_iter cite = caseIterateEnd();
for (; cit != cite; ++cit) {
@ -687,20 +696,28 @@ vector<hwlmLiteral> fillHamsterLiteralList(const RoseBuildImpl &build,
msk, cmp);
}
} else {
const std::string &s = lit.get_string();
const bool nocase = lit.any_nocase();
string s = lit.get_string();
bool nocase = lit.any_nocase();
DEBUG_PRINTF("id=%u, s='%s', nocase=%d, noruns=%d, msk=%s, "
"cmp=%s\n",
final_id, escapeString(s).c_str(), (int)nocase, noruns,
dumpMask(msk).c_str(), dumpMask(cmp).c_str());
if (s.length() > max_len) {
DEBUG_PRINTF("truncating to tail of length %zu\n", max_len);
s.erase(0, s.length() - max_len);
// We shouldn't have set a threshold below 8 chars.
assert(msk.size() <= max_len);
}
if (!maskIsConsistent(s, nocase, msk, cmp)) {
DEBUG_PRINTF("msk/cmp for literal can't match, skipping\n");
continue;
}
lits.emplace_back(s, nocase, noruns, final_id, groups, msk, cmp);
lits.emplace_back(move(s), nocase, noruns, final_id, groups, msk,
cmp);
}
}
@ -708,14 +725,15 @@ vector<hwlmLiteral> fillHamsterLiteralList(const RoseBuildImpl &build,
}
aligned_unique_ptr<HWLM> buildFloatingMatcher(const RoseBuildImpl &build,
size_t longLitLengthThreshold,
rose_group *fgroups,
size_t *fsize,
size_t *historyRequired,
size_t *streamStateRequired) {
size_t *historyRequired) {
*fsize = 0;
*fgroups = 0;
auto fl = fillHamsterLiteralList(build, ROSE_FLOATING);
auto fl = fillHamsterLiteralList(build, ROSE_FLOATING,
longLitLengthThreshold);
if (fl.empty()) {
DEBUG_PRINTF("empty floating matcher\n");
return nullptr;
@ -747,13 +765,10 @@ aligned_unique_ptr<HWLM> buildFloatingMatcher(const RoseBuildImpl &build,
if (build.cc.streaming) {
DEBUG_PRINTF("literal_history_required=%zu\n",
ctl.literal_history_required);
DEBUG_PRINTF("literal_stream_state_required=%zu\n",
ctl.literal_stream_state_required);
assert(ctl.literal_history_required <=
build.cc.grey.maxHistoryAvailable);
*historyRequired = max(*historyRequired,
ctl.literal_history_required);
*streamStateRequired = ctl.literal_stream_state_required;
}
*fsize = hwlmSize(ftable.get());
@ -778,8 +793,8 @@ aligned_unique_ptr<HWLM> buildSmallBlockMatcher(const RoseBuildImpl &build,
return nullptr;
}
auto lits = fillHamsterLiteralList(build, ROSE_FLOATING,
ROSE_SMALL_BLOCK_LEN);
auto lits = fillHamsterLiteralList(
build, ROSE_FLOATING, ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN);
if (lits.empty()) {
DEBUG_PRINTF("no floating table\n");
return nullptr;
@ -788,8 +803,9 @@ aligned_unique_ptr<HWLM> buildSmallBlockMatcher(const RoseBuildImpl &build,
return nullptr;
}
auto anchored_lits = fillHamsterLiteralList(build,
ROSE_ANCHORED_SMALL_BLOCK, ROSE_SMALL_BLOCK_LEN);
auto anchored_lits =
fillHamsterLiteralList(build, ROSE_ANCHORED_SMALL_BLOCK,
ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN);
if (anchored_lits.empty()) {
DEBUG_PRINTF("no small-block anchored literals\n");
return nullptr;
@ -823,7 +839,8 @@ aligned_unique_ptr<HWLM> buildEodAnchoredMatcher(const RoseBuildImpl &build,
size_t *esize) {
*esize = 0;
auto el = fillHamsterLiteralList(build, ROSE_EOD_ANCHORED);
auto el = fillHamsterLiteralList(build, ROSE_EOD_ANCHORED,
build.ematcher_region_size);
if (el.empty()) {
DEBUG_PRINTF("no eod anchored literals\n");

View File

@ -51,13 +51,14 @@ struct hwlmLiteral;
* only lead to a pattern match after max_offset may be excluded.
*/
std::vector<hwlmLiteral> fillHamsterLiteralList(const RoseBuildImpl &build,
rose_literal_table table, u32 max_offset = ROSE_BOUND_INF);
rose_literal_table table, size_t max_len,
u32 max_offset = ROSE_BOUND_INF);
aligned_unique_ptr<HWLM> buildFloatingMatcher(const RoseBuildImpl &build,
size_t longLitLengthThreshold,
rose_group *fgroups,
size_t *fsize,
size_t *historyRequired,
size_t *streamStateRequired);
size_t *historyRequired);
aligned_unique_ptr<HWLM> buildSmallBlockMatcher(const RoseBuildImpl &build,
size_t *sbsize);

View File

@ -495,6 +495,24 @@ void RoseInstrEnginesEod::write(void *dest, RoseEngineBlob &blob,
inst->iter_offset = iter_offset;
}
void RoseInstrCheckLongLit::write(void *dest, RoseEngineBlob &blob,
const OffsetMap &offset_map) const {
RoseInstrBase::write(dest, blob, offset_map);
auto *inst = static_cast<impl_type *>(dest);
assert(!literal.empty());
inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1);
inst->lit_length = verify_u32(literal.size());
}
void RoseInstrCheckLongLitNocase::write(void *dest, RoseEngineBlob &blob,
const OffsetMap &offset_map) const {
RoseInstrBase::write(dest, blob, offset_map);
auto *inst = static_cast<impl_type *>(dest);
assert(!literal.empty());
inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1);
inst->lit_length = verify_u32(literal.size());
}
static
OffsetMap makeOffsetMap(const RoseProgram &program, u32 *total_len) {
OffsetMap offset_map;

View File

@ -37,6 +37,7 @@
#include "util/hash.h"
#include "util/make_unique.h"
#include "util/ue2_containers.h"
#include "util/ue2string.h"
#include <algorithm>
#include <array>
@ -1721,6 +1722,62 @@ public:
~RoseInstrMatcherEod() override;
};
class RoseInstrCheckLongLit
: public RoseInstrBaseNoTargets<ROSE_INSTR_CHECK_LONG_LIT,
ROSE_STRUCT_CHECK_LONG_LIT,
RoseInstrCheckLongLit> {
public:
std::string literal;
RoseInstrCheckLongLit(std::string literal_in)
: literal(std::move(literal_in)) {}
bool operator==(const RoseInstrCheckLongLit &ri) const {
return literal == ri.literal;
}
size_t hash() const override {
return hash_all(static_cast<int>(opcode), literal);
}
void write(void *dest, RoseEngineBlob &blob,
const OffsetMap &offset_map) const override;
bool equiv_to(const RoseInstrCheckLongLit &ri, const OffsetMap &,
const OffsetMap &) const {
return literal == ri.literal;
}
};
class RoseInstrCheckLongLitNocase
: public RoseInstrBaseNoTargets<ROSE_INSTR_CHECK_LONG_LIT_NOCASE,
ROSE_STRUCT_CHECK_LONG_LIT_NOCASE,
RoseInstrCheckLongLitNocase> {
public:
std::string literal;
RoseInstrCheckLongLitNocase(std::string literal_in)
: literal(std::move(literal_in)) {
upperString(literal);
}
bool operator==(const RoseInstrCheckLongLitNocase &ri) const {
return literal == ri.literal;
}
size_t hash() const override {
return hash_all(static_cast<int>(opcode), literal);
}
void write(void *dest, RoseEngineBlob &blob,
const OffsetMap &offset_map) const override;
bool equiv_to(const RoseInstrCheckLongLitNocase &ri, const OffsetMap &,
const OffsetMap &) const {
return literal == ri.literal;
}
};
class RoseInstrEnd
: public RoseInstrBaseTrivial<ROSE_INSTR_END, ROSE_STRUCT_END,
RoseInstrEnd> {

View File

@ -610,6 +610,24 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
PROGRAM_CASE(MATCHER_EOD) {}
PROGRAM_NEXT_INSTRUCTION
PROGRAM_CASE(CHECK_LONG_LIT) {
os << " lit_offset " << ri->lit_offset << endl;
os << " lit_length " << ri->lit_length << endl;
const char *lit = (const char *)t + ri->lit_offset;
os << " literal: \""
<< escapeString(string(lit, ri->lit_length)) << "\"" << endl;
}
PROGRAM_NEXT_INSTRUCTION
PROGRAM_CASE(CHECK_LONG_LIT_NOCASE) {
os << " lit_offset " << ri->lit_offset << endl;
os << " lit_length " << ri->lit_length << endl;
const char *lit = (const char *)t + ri->lit_offset;
os << " literal: \""
<< escapeString(string(lit, ri->lit_length)) << "\"" << endl;
}
PROGRAM_NEXT_INSTRUCTION
default:
os << " UNKNOWN (code " << int{code} << ")" << endl;
os << " <stopping>" << endl;
@ -1031,6 +1049,32 @@ void dumpAnchoredStats(const void *atable, FILE *f) {
}
static
void dumpLongLiteralTable(const RoseEngine *t, FILE *f) {
if (!t->longLitTableOffset) {
return;
}
fprintf(f, "\n");
fprintf(f, "Long literal table (streaming):\n");
const auto *ll_table =
(const struct RoseLongLitTable *)loadFromByteCodeOffset(
t, t->longLitTableOffset);
u32 num_caseful = ll_table->boundaryCase;
u32 num_caseless = ll_table->boundaryNocase - num_caseful;
fprintf(f, " longest len: %u\n", ll_table->maxLen);
fprintf(f, " counts: %u caseful, %u caseless\n", num_caseful,
num_caseless);
fprintf(f, " hash bits: %u caseful, %u caseless\n",
ll_table->hashNBitsCase, ll_table->hashNBitsNocase);
fprintf(f, " state bits: %u caseful, %u caseless\n",
ll_table->streamStateBitsCase, ll_table->streamStateBitsNocase);
fprintf(f, " stream state: %u bytes\n", ll_table->streamStateBytes);
}
// Externally accessible functions
void roseDumpText(const RoseEngine *t, FILE *f) {
@ -1106,7 +1150,7 @@ void roseDumpText(const RoseEngine *t, FILE *f) {
fprintf(f, " - history buffer : %u bytes\n", t->historyRequired);
fprintf(f, " - exhaustion vector : %u bytes\n", (t->ekeyCount + 7) / 8);
fprintf(f, " - role state mmbit : %u bytes\n", t->stateSize);
fprintf(f, " - floating matcher : %u bytes\n", t->floatingStreamState);
fprintf(f, " - long lit matcher : %u bytes\n", t->longLitStreamState);
fprintf(f, " - active array : %u bytes\n",
mmbit_size(t->activeArrayCount));
fprintf(f, " - active rose : %u bytes\n",
@ -1160,6 +1204,8 @@ void roseDumpText(const RoseEngine *t, FILE *f) {
fprintf(f, "\nSmall-block literal matcher stats:\n\n");
hwlmPrintStats(sbtable, f);
}
dumpLongLiteralTable(t, f);
}
#define DUMP_U8(o, member) \
@ -1196,6 +1242,7 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
DUMP_U32(t, ematcherOffset);
DUMP_U32(t, fmatcherOffset);
DUMP_U32(t, sbmatcherOffset);
DUMP_U32(t, longLitTableOffset);
DUMP_U32(t, amatcherMinWidth);
DUMP_U32(t, fmatcherMinWidth);
DUMP_U32(t, eodmatcherMinWidth);
@ -1245,7 +1292,7 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
DUMP_U32(t, stateOffsets.anchorState);
DUMP_U32(t, stateOffsets.groups);
DUMP_U32(t, stateOffsets.groups_size);
DUMP_U32(t, stateOffsets.floatingMatcherState);
DUMP_U32(t, stateOffsets.longLitState);
DUMP_U32(t, stateOffsets.somLocation);
DUMP_U32(t, stateOffsets.somValid);
DUMP_U32(t, stateOffsets.somWritable);
@ -1264,7 +1311,7 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
DUMP_U32(t, ematcherRegionSize);
DUMP_U32(t, somRevCount);
DUMP_U32(t, somRevOffsetOffset);
DUMP_U32(t, floatingStreamState);
DUMP_U32(t, longLitStreamState);
fprintf(f, "}\n");
fprintf(f, "sizeof(RoseEngine) = %zu\n", sizeof(RoseEngine));
}

View File

@ -217,8 +217,8 @@ struct RoseStateOffsets {
/** Size of packed Rose groups value, in bytes. */
u32 groups_size;
/** State for floating literal matcher (managed by HWLM). */
u32 floatingMatcherState;
/** State for long literal support. */
u32 longLitState;
/** Packed SOM location slots. */
u32 somLocation;
@ -325,6 +325,7 @@ struct RoseEngine {
u32 ematcherOffset; // offset of the eod-anchored literal matcher (bytes)
u32 fmatcherOffset; // offset of the floating literal matcher (bytes)
u32 sbmatcherOffset; // offset of the small-block literal matcher (bytes)
u32 longLitTableOffset; // offset of the long literal table
u32 amatcherMinWidth; /**< minimum number of bytes required for a pattern
* involved with the anchored table to produce a full
* match. */
@ -434,7 +435,7 @@ struct RoseEngine {
u32 ematcherRegionSize; /* max region size to pass to ematcher */
u32 somRevCount; /**< number of som reverse nfas */
u32 somRevOffsetOffset; /**< offset to array of offsets to som rev nfas */
u32 floatingStreamState; // size in bytes
u32 longLitStreamState; // size in bytes
struct scatter_full_plan state_init;
};
@ -445,6 +446,94 @@ struct ALIGN_CL_DIRECTIVE anchored_matcher_info {
u32 anchoredMinDistance; /* start of region to run anchored table over */
};
/**
* \brief Long literal table header.
*/
struct RoseLongLitTable {
/** \brief String ID one beyond the maximum entry for caseful literals. */
u32 boundaryCase;
/**
* \brief String ID one beyond the maximum entry for caseless literals.
* This is also the total size of the literal table.
*/
u32 boundaryNocase;
/**
* \brief Offset of the caseful hash table (relative to RoseLongLitTable
* base).
*
* Offset is zero if no such table exists.
*/
u32 hashOffsetCase;
/**
* \brief Offset of the caseless hash table (relative to RoseLongLitTable
* base).
*
* Offset is zero if no such table exists.
*/
u32 hashOffsetNocase;
/** \brief lg2 of the size of the caseful hash table. */
u32 hashNBitsCase;
/** \brief lg2 of the size of the caseless hash table. */
u32 hashNBitsNocase;
/**
* \brief Number of bits of packed stream state for the caseful hash table.
*/
u8 streamStateBitsCase;
/**
* \brief Number of bits of packed stream state for the caseless hash
* table.
*/
u8 streamStateBitsNocase;
/** \brief Total size of packed stream state in bytes. */
u8 streamStateBytes;
/** \brief Max length of literal prefixes. */
u8 maxLen;
};
/**
* \brief One of these structures per literal entry in our long literal table.
*/
struct RoseLongLiteral {
/**
* \brief Offset of the literal string itself, relative to
* RoseLongLitTable base.
*/
u32 offset;
};
/** \brief "No further links" value used for \ref RoseLongLitHashEntry::link. */
#define LINK_INVALID 0xffffffff
/**
* \brief One of these structures per hash table entry in our long literal
* table.
*/
struct RoseLongLitHashEntry {
/**
* \brief Bitfield used as a quick guard for hash buckets.
*
* For a given hash value N, the low six bits of N are taken and the
* corresponding bit is switched on in this bitfield if this bucket is used
* for that hash.
*/
u64a bitfield;
/** \brief Offset in the literal table for this string. */
u32 state;
/** \brief Hash table index of next entry in the chain for this bucket. */
u32 link;
};
static really_inline
const struct anchored_matcher_info *getALiteralMatcher(
const struct RoseEngine *t) {

View File

@ -117,7 +117,19 @@ enum RoseInstructionCode {
/** \brief Run the EOD-anchored HWLM literal matcher. */
ROSE_INSTR_MATCHER_EOD,
LAST_ROSE_INSTRUCTION = ROSE_INSTR_MATCHER_EOD //!< Sentinel.
/**
* \brief Confirm a case-sensitive literal at the current offset. In
* streaming mode, this makes use of the long literal table.
*/
ROSE_INSTR_CHECK_LONG_LIT,
/**
* \brief Confirm a case-insensitive literal at the current offset. In
* streaming mode, this makes use of the long literal table.
*/
ROSE_INSTR_CHECK_LONG_LIT_NOCASE,
LAST_ROSE_INSTRUCTION = ROSE_INSTR_CHECK_LONG_LIT_NOCASE //!< Sentinel.
};
struct ROSE_STRUCT_END {
@ -465,4 +477,18 @@ struct ROSE_STRUCT_MATCHER_EOD {
u8 code; //!< From enum RoseInstructionCode.
};
/** Note: check failure will halt program. */
struct ROSE_STRUCT_CHECK_LONG_LIT {
u8 code; //!< From enum RoseInstructionCode.
u32 lit_offset; //!< Offset of literal string.
u32 lit_length; //!< Length of literal string.
};
/** Note: check failure will halt program. */
struct ROSE_STRUCT_CHECK_LONG_LIT_NOCASE {
u8 code; //!< From enum RoseInstructionCode.
u32 lit_offset; //!< Offset of literal string.
u32 lit_length; //!< Length of literal string.
};
#endif // ROSE_ROSE_PROGRAM_H

View File

@ -97,8 +97,8 @@ void storeGroups(const struct RoseEngine *t, char *state, rose_group groups) {
}
static really_inline
u8 *getFloatingMatcherState(const struct RoseEngine *t, char *state) {
return (u8 *)(state + t->stateOffsets.floatingMatcherState);
u8 *getLongLitState(const struct RoseEngine *t, char *state) {
return (u8 *)(state + t->stateOffsets.longLitState);
}
static really_inline

View File

@ -33,6 +33,8 @@
#include "miracle.h"
#include "program_runtime.h"
#include "rose.h"
#include "rose_internal.h"
#include "stream_long_lit.h"
#include "hwlm/hwlm.h"
#include "nfa/mcclellan.h"
#include "nfa/nfa_api.h"
@ -406,6 +408,7 @@ void ensureStreamNeatAndTidy(const struct RoseEngine *t, char *state,
roseFlushLastByteHistory(t, scratch, offset + length);
tctxt->lastEndOffset = offset + length;
storeGroups(t, state, tctxt->groups);
storeLongLiteralState(t, state, scratch);
}
static really_inline
@ -588,11 +591,17 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) {
}
size_t hlength = scratch->core_info.hlen;
char rebuild = 0;
if (hlength) {
// Can only have long literal state or rebuild if this is not the
// first write to this stream.
loadLongLiteralState(t, state, scratch);
rebuild = (scratch->core_info.status & STATUS_DELAY_DIRTY) &&
(t->maxFloatingDelayedMatch == ROSE_BOUND_INF ||
offset < t->maxFloatingDelayedMatch);
}
char rebuild = hlength &&
(scratch->core_info.status & STATUS_DELAY_DIRTY) &&
(t->maxFloatingDelayedMatch == ROSE_BOUND_INF ||
offset < t->maxFloatingDelayedMatch);
DEBUG_PRINTF("**rebuild %hhd status %hhu mfdm %u, offset %llu\n",
rebuild, scratch->core_info.status,
t->maxFloatingDelayedMatch, offset);
@ -621,17 +630,9 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) {
}
DEBUG_PRINTF("start=%zu\n", start);
u8 *stream_state;
if (t->floatingStreamState) {
stream_state = getFloatingMatcherState(t, state);
} else {
stream_state = NULL;
}
DEBUG_PRINTF("BEGIN FLOATING (over %zu/%zu)\n", flen, length);
hwlmExecStreaming(ftable, scratch, flen, start, roseFloatingCallback,
scratch, tctxt->groups & t->floating_group_mask,
stream_state);
scratch, tctxt->groups & t->floating_group_mask);
}
flush_delay_and_exit:

434
src/rose/stream_long_lit.h Normal file
View File

@ -0,0 +1,434 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef STREAM_LONG_LIT_H
#define STREAM_LONG_LIT_H
#include "rose.h"
#include "rose_common.h"
#include "rose_internal.h"
#include "stream_long_lit_hash.h"
#include "util/copybytes.h"
static really_inline
const struct RoseLongLiteral *
getLitTab(const struct RoseLongLitTable *ll_table) {
return (const struct RoseLongLiteral *)((const char *)ll_table +
ROUNDUP_16(sizeof(struct RoseLongLitTable)));
}
static really_inline
u32 get_start_lit_idx(const struct RoseLongLitTable *ll_table,
const char nocase) {
return nocase ? ll_table->boundaryCase : 0;
}
static really_inline
u32 get_end_lit_idx(const struct RoseLongLitTable *ll_table,
const char nocase) {
return nocase ? ll_table->boundaryNocase : ll_table->boundaryCase;
}
// search for the literal index that contains the current state
static rose_inline
u32 findLitTabEntry(const struct RoseLongLitTable *ll_table,
u32 stateValue, const char nocase) {
const struct RoseLongLiteral *litTab = getLitTab(ll_table);
u32 lo = get_start_lit_idx(ll_table, nocase);
u32 hi = get_end_lit_idx(ll_table, nocase);
// Now move stateValue back by one so that we're looking for the
// litTab entry that includes it the string, not the one 'one past' it
stateValue -= 1;
assert(lo != hi);
assert(litTab[lo].offset <= stateValue);
assert(litTab[hi].offset > stateValue);
// binary search to find the entry e such that:
// litTab[e].offsetToLiteral <= stateValue < litTab[e+1].offsetToLiteral
while (lo + 1 < hi) {
u32 mid = (lo + hi) / 2;
if (litTab[mid].offset <= stateValue) {
lo = mid;
} else { // (litTab[mid].offset > stateValue) {
hi = mid;
}
}
assert(litTab[lo].offset <= stateValue);
assert(litTab[hi].offset > stateValue);
return lo;
}
// Reads from stream state and unpacks values into stream state table.
static really_inline
void loadLongLitStreamState(const struct RoseLongLitTable *ll_table,
const u8 *ll_state, u32 *state_case,
u32 *state_nocase) {
assert(ll_table);
assert(ll_state);
assert(state_case && state_nocase);
u8 ss_bytes = ll_table->streamStateBytes;
u8 ssb = ll_table->streamStateBitsCase;
UNUSED u8 ssb_nc = ll_table->streamStateBitsNocase;
assert(ss_bytes == (ssb + ssb_nc + 7) / 8);
#if defined(ARCH_32_BIT)
// On 32-bit hosts, we may be able to avoid having to do any u64a
// manipulation at all.
if (ss_bytes <= 4) {
u32 ssb_mask = (1U << ssb) - 1;
u32 streamVal = partial_load_u32(ll_state, ss_bytes);
*state_case = (u32)(streamVal & ssb_mask);
*state_nocase = (u32)(streamVal >> ssb);
return;
}
#endif
u64a ssb_mask = (1ULL << ssb) - 1;
u64a streamVal = partial_load_u64a(ll_state, ss_bytes);
*state_case = (u32)(streamVal & ssb_mask);
*state_nocase = (u32)(streamVal >> ssb);
}
static really_inline
u32 getBaseOffsetOfLits(const struct RoseLongLitTable *ll_table,
const char nocase) {
u32 lit_idx = get_start_lit_idx(ll_table, nocase);
return getLitTab(ll_table)[lit_idx].offset;
}
static really_inline
u32 unpackStateVal(const struct RoseLongLitTable *ll_table, const char nocase,
u32 v) {
return v + getBaseOffsetOfLits(ll_table, nocase) - 1;
}
static really_inline
u32 packStateVal(const struct RoseLongLitTable *ll_table, const char nocase,
u32 v) {
return v - getBaseOffsetOfLits(ll_table, nocase) + 1;
}
static rose_inline
void loadLongLiteralStateMode(struct hs_scratch *scratch,
const struct RoseLongLitTable *ll_table,
const struct RoseLongLiteral *litTab,
const u32 state, const char nocase) {
if (!state) {
DEBUG_PRINTF("no state for %s\n", nocase ? "caseless" : "caseful");
return;
}
u32 stateValue = unpackStateVal(ll_table, nocase, state);
u32 idx = findLitTabEntry(ll_table, stateValue, nocase);
size_t found_offset = litTab[idx].offset;
const u8 *found_buf = found_offset + (const u8 *)ll_table;
size_t found_sz = stateValue - found_offset;
struct RoseContext *tctxt = &scratch->tctxt;
if (nocase) {
tctxt->ll_buf_nocase = found_buf;
tctxt->ll_len_nocase = found_sz;
} else {
tctxt->ll_buf = found_buf;
tctxt->ll_len = found_sz;
}
}
static rose_inline
void loadLongLiteralState(const struct RoseEngine *t, char *state,
struct hs_scratch *scratch) {
if (!t->longLitTableOffset) {
return;
}
scratch->tctxt.ll_buf = scratch->core_info.hbuf;
scratch->tctxt.ll_len = scratch->core_info.hlen;
scratch->tctxt.ll_buf_nocase = scratch->core_info.hbuf;
scratch->tctxt.ll_len_nocase = scratch->core_info.hlen;
const struct RoseLongLitTable *ll_table =
getByOffset(t, t->longLitTableOffset);
const struct RoseLongLiteral *litTab = getLitTab(ll_table);
const u8 *ll_state = getLongLitState(t, state);
u32 state_case;
u32 state_nocase;
loadLongLitStreamState(ll_table, ll_state, &state_case, &state_nocase);
loadLongLiteralStateMode(scratch, ll_table, litTab, state_case, 0);
loadLongLiteralStateMode(scratch, ll_table, litTab, state_nocase, 1);
}
static rose_inline
char confirmLongLiteral(const struct RoseLongLitTable *ll_table,
const hs_scratch_t *scratch, u32 hashState,
const char nocase) {
const struct RoseLongLiteral *litTab = getLitTab(ll_table);
u32 idx = findLitTabEntry(ll_table, hashState, nocase);
size_t found_offset = litTab[idx].offset;
const u8 *s = found_offset + (const u8 *)ll_table;
assert(hashState > found_offset);
size_t len = hashState - found_offset;
const u8 *buf = scratch->core_info.buf;
const size_t buf_len = scratch->core_info.len;
if (len > buf_len) {
const struct RoseContext *tctxt = &scratch->tctxt;
const u8 *hist = nocase ? tctxt->ll_buf_nocase : tctxt->ll_buf;
size_t hist_len = nocase ? tctxt->ll_len_nocase : tctxt->ll_len;
if (len > buf_len + hist_len) {
return 0; // Break out - not enough total history
}
size_t overhang = len - buf_len;
assert(overhang <= hist_len);
if (cmpForward(hist + hist_len - overhang, s, overhang, nocase)) {
return 0;
}
s += overhang;
len -= overhang;
}
// if we got here, we don't need history or we compared ok out of history
assert(len <= buf_len);
if (cmpForward(buf + buf_len - len, s, len, nocase)) {
return 0;
}
DEBUG_PRINTF("confirmed hashState=%u\n", hashState);
return 1;
}
static rose_inline
void calcStreamingHash(const struct core_info *ci,
const struct RoseLongLitTable *ll_table, u8 hash_len,
u32 *hash_case, u32 *hash_nocase) {
assert(hash_len >= LONG_LIT_HASH_LEN);
// Our hash function operates over LONG_LIT_HASH_LEN bytes, starting from
// location (end of buffer - hash_len). If this block can be satisfied
// entirely from either the current buffer or the history buffer, we pass
// in the pointer directly; otherwise we must make a copy.
u8 tempbuf[LONG_LIT_HASH_LEN];
const u8 *base;
if (hash_len > ci->len) {
size_t overhang = hash_len - ci->len;
if (overhang >= LONG_LIT_HASH_LEN) {
// Can read enough to hash from inside the history buffer.
assert(overhang <= ci->hlen);
base = ci->hbuf + ci->hlen - overhang;
} else {
// Copy: first chunk from history buffer.
assert(overhang <= ci->hlen);
copy_upto_32_bytes(tempbuf, ci->hbuf + ci->hlen - overhang,
overhang);
// Copy: second chunk from current buffer.
size_t copy_buf_len = LONG_LIT_HASH_LEN - overhang;
assert(copy_buf_len <= ci->len);
copy_upto_32_bytes(tempbuf + overhang, ci->buf, copy_buf_len);
// Read from our temporary buffer for the hash.
base = tempbuf;
}
} else {
// Can read enough to hash from inside the current buffer.
base = ci->buf + ci->len - hash_len;
}
if (ll_table->hashNBitsCase) {
*hash_case = hashLongLiteral(base, LONG_LIT_HASH_LEN, 0);
DEBUG_PRINTF("caseful hash %u\n", *hash_case);
}
if (ll_table->hashNBitsNocase) {
*hash_nocase = hashLongLiteral(base, LONG_LIT_HASH_LEN, 1);
DEBUG_PRINTF("caseless hash %u\n", *hash_nocase);
}
}
static really_inline
const struct RoseLongLitHashEntry *
getHashTableBase(const struct RoseLongLitTable *ll_table, const char nocase) {
const u32 hashOffset = nocase ? ll_table->hashOffsetNocase
: ll_table->hashOffsetCase;
return (const struct RoseLongLitHashEntry *)((const char *)ll_table +
hashOffset);
}
static rose_inline
const struct RoseLongLitHashEntry *
getLongLitHashEnt(const struct RoseLongLitTable *ll_table, u32 h,
const char nocase) {
u32 nbits = nocase ? ll_table->hashNBitsNocase : ll_table->hashNBitsCase;
if (!nbits) {
return NULL;
}
u32 h_ent = h & ((1 << nbits) - 1);
u32 h_low = (h >> nbits) & 63;
const struct RoseLongLitHashEntry *tab = getHashTableBase(ll_table, nocase);
const struct RoseLongLitHashEntry *ent = tab + h_ent;
if (!((ent->bitfield >> h_low) & 0x1)) {
return NULL;
}
return ent;
}
static rose_inline
u32 storeLongLiteralStateMode(const struct hs_scratch *scratch,
const struct RoseLongLitTable *ll_table,
const struct RoseLongLitHashEntry *ent,
const char nocase) {
assert(ent);
assert(nocase ? ll_table->hashNBitsNocase : ll_table->hashNBitsCase);
const struct RoseLongLitHashEntry *tab = getHashTableBase(ll_table, nocase);
u32 packed_state = 0;
while (1) {
if (confirmLongLiteral(ll_table, scratch, ent->state, nocase)) {
packed_state = packStateVal(ll_table, nocase, ent->state);
DEBUG_PRINTF("set %s state to %u\n", nocase ? "nocase" : "case",
packed_state);
break;
}
if (ent->link == LINK_INVALID) {
break;
}
ent = tab + ent->link;
}
return packed_state;
}
#ifndef NDEBUG
// Defensive checking (used in assert) that these table values don't overflow
// the range available.
static really_inline
char streamingTableOverflow(u32 state_case, u32 state_nocase, u8 ssb,
u8 ssb_nc) {
u32 ssb_mask = (1ULL << (ssb)) - 1;
if (state_case & ~ssb_mask) {
return 1;
}
u32 ssb_nc_mask = (1ULL << (ssb_nc)) - 1;
if (state_nocase & ~ssb_nc_mask) {
return 1;
}
return 0;
}
#endif
// Reads from stream state table and packs values into stream state.
static rose_inline
void storeLongLitStreamState(const struct RoseLongLitTable *ll_table,
u8 *ll_state, u32 state_case, u32 state_nocase) {
assert(ll_table);
assert(ll_state);
u8 ss_bytes = ll_table->streamStateBytes;
u8 ssb = ll_table->streamStateBitsCase;
UNUSED u8 ssb_nc = ll_table->streamStateBitsNocase;
assert(ss_bytes == ROUNDUP_N(ssb + ssb_nc, 8) / 8);
assert(!streamingTableOverflow(state_case, state_nocase, ssb, ssb_nc));
#if defined(ARCH_32_BIT)
// On 32-bit hosts, we may be able to avoid having to do any u64a
// manipulation at all.
if (ss_bytes <= 4) {
u32 stagingStreamState = state_case;
stagingStreamState |= (state_nocase << ssb);
partial_store_u32(ll_state, stagingStreamState, ss_bytes);
return;
}
#endif
u64a stagingStreamState = (u64a)state_case;
stagingStreamState |= (u64a)state_nocase << ssb;
partial_store_u64a(ll_state, stagingStreamState, ss_bytes);
}
static rose_inline
void storeLongLiteralState(const struct RoseEngine *t, char *state,
struct hs_scratch *scratch) {
if (!t->longLitTableOffset) {
DEBUG_PRINTF("no table\n");
return;
}
struct core_info *ci = &scratch->core_info;
const struct RoseLongLitTable *ll_table =
getByOffset(t, t->longLitTableOffset);
assert(ll_table->maxLen);
DEBUG_PRINTF("maxLen=%u, len=%zu, hlen=%zu\n", ll_table->maxLen, ci->len,
ci->hlen);
u32 state_case = 0;
u32 state_nocase = 0;
// If we don't have enough history, we don't need to do anything.
if (ll_table->maxLen <= ci->len + ci->hlen) {
u32 hash_case = 0;
u32 hash_nocase = 0;
calcStreamingHash(ci, ll_table, ll_table->maxLen, &hash_case,
&hash_nocase);
const struct RoseLongLitHashEntry *ent_case =
getLongLitHashEnt(ll_table, hash_case, 0);
const struct RoseLongLitHashEntry *ent_nocase =
getLongLitHashEnt(ll_table, hash_nocase, 1);
DEBUG_PRINTF("ent_caseful=%p, ent_caseless=%p\n", ent_case, ent_nocase);
if (ent_case) {
state_case = storeLongLiteralStateMode(scratch, ll_table,
ent_case, 0);
}
if (ent_nocase) {
state_nocase = storeLongLiteralStateMode(scratch, ll_table,
ent_nocase, 1);
}
}
DEBUG_PRINTF("store {%u, %u}\n", state_case, state_nocase);
u8 *ll_state = getLongLitState(t, state);
storeLongLitStreamState(ll_table, ll_state, state_case, state_nocase);
}
#endif // STREAM_LONG_LIT_H

View File

@ -0,0 +1,65 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef STREAM_LONG_LIT_HASH_H
#define STREAM_LONG_LIT_HASH_H
#include "ue2common.h"
#include "util/unaligned.h"
/** \brief Length of the buffer operated on by \ref hashLongLiteral(). */
#define LONG_LIT_HASH_LEN 24
/** \brief Hash function used for long literal table in streaming mode. */
static really_inline
u32 hashLongLiteral(const u8 *ptr, UNUSED size_t len, char nocase) {
const u64a CASEMASK = 0xdfdfdfdfdfdfdfdfULL;
const u64a MULTIPLIER = 0x0b4e0ef37bc32127ULL;
// We unconditionally hash LONG_LIT_HASH_LEN bytes; all use cases of this
// hash are for strings longer than this.
assert(len >= 24);
u64a v1 = unaligned_load_u64a(ptr);
u64a v2 = unaligned_load_u64a(ptr + 8);
u64a v3 = unaligned_load_u64a(ptr + 16);
if (nocase) {
v1 &= CASEMASK;
v2 &= CASEMASK;
v3 &= CASEMASK;
}
v1 *= MULTIPLIER;
v2 *= MULTIPLIER * MULTIPLIER;
v3 *= MULTIPLIER * MULTIPLIER * MULTIPLIER;
v1 >>= 32;
v2 >>= 32;
v3 >>= 32;
return v1 ^ v2 ^ v3;
}
#endif // STREAM_LONG_LIT_HASH_H

View File

@ -736,20 +736,11 @@ void pureLiteralStreamExec(struct hs_stream *stream_state,
assert(scratch);
assert(!can_stop_matching(scratch));
char *state = getMultiState(stream_state);
const struct RoseEngine *rose = stream_state->rose;
const struct HWLM *ftable = getFLiteralMatcher(rose);
size_t len2 = scratch->core_info.len;
u8 *hwlm_stream_state;
if (rose->floatingStreamState) {
hwlm_stream_state = getFloatingMatcherState(rose, state);
} else {
hwlm_stream_state = NULL;
}
DEBUG_PRINTF("::: streaming rose ::: offset = %llu len = %zu\n",
stream_state->offset, scratch->core_info.len);
@ -761,8 +752,8 @@ void pureLiteralStreamExec(struct hs_stream *stream_state,
// start the match region at zero.
const size_t start = 0;
hwlmExecStreaming(ftable, scratch, len2, start, roseCallback,
scratch, rose->initialGroups, hwlm_stream_state);
hwlmExecStreaming(ftable, scratch, len2, start, roseCallback, scratch,
rose->initialGroups);
if (!told_to_stop_matching(scratch) &&
isAllExhausted(rose, scratch->core_info.exhaustionVector)) {

View File

@ -122,6 +122,26 @@ struct RoseContext {
u32 filledDelayedSlots;
u32 curr_qi; /**< currently executing main queue index during
* \ref nfaQueueExec */
/**
* \brief Buffer for caseful long literal support, used in streaming mode
* only.
*
* If a long literal prefix was at the end of the buffer at the end of a
* stream write, then the long lit table hashes it and stores the result in
* stream state. At the start of the next write, this value is used to set
* this buffer to the matching prefix string (stored in the bytecode.
*/
const u8 *ll_buf;
/** \brief Length in bytes of the string pointed to by ll_buf. */
size_t ll_len;
/** \brief Caseless version of ll_buf. */
const u8 *ll_buf_nocase;
/** \brief Length in bytes of the string pointed to by ll_buf_nocase. */
size_t ll_len_nocase;
};
struct match_deduper {

View File

@ -55,6 +55,29 @@ size_t maxStringSelfOverlap(const std::string &a, bool nocase);
/// Compares two strings, returns non-zero if they're different.
u32 cmp(const char *a, const char *b, size_t len, bool nocase);
/**
* \brief String type that also records whether the whole string is caseful or
* caseless.
*
* You should use \ref ue2_literal if you need to represent a mixed-case
* literal.
*/
struct ue2_case_string {
ue2_case_string(std::string s_in, bool nocase_in)
: s(std::move(s_in)), nocase(nocase_in) {
if (nocase) {
upperString(s);
}
}
bool operator==(const ue2_case_string &other) const {
return s == other.s && nocase == other.nocase;
}
std::string s;
bool nocase;
};
struct ue2_literal {
public:
/// Single element proxy, pointed to by our const_iterator.

View File

@ -337,8 +337,8 @@ TEST_P(FDRp, NoRepeat3) {
static
hwlm_error_t safeExecStreaming(const FDR *fdr, const u8 *hbuf, size_t hlen,
const u8 *buf, size_t len, size_t start,
HWLMCallback cb, void *ctxt, hwlm_group_t groups,
u8 *stream_state) {
HWLMCallback cb, void *ctxt,
hwlm_group_t groups) {
array<u8, 16> wrapped_history = {{'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}};
if (hlen < 16) {
@ -346,8 +346,7 @@ hwlm_error_t safeExecStreaming(const FDR *fdr, const u8 *hbuf, size_t hlen,
memcpy(new_hbuf, hbuf, hlen);
hbuf = new_hbuf;
}
return fdrExecStreaming(fdr, hbuf, hlen, buf, len, start, cb, ctxt, groups,
stream_state);
return fdrExecStreaming(fdr, hbuf, hlen, buf, len, start, cb, ctxt, groups);
}
TEST_P(FDRp, SmallStreaming) {
@ -366,7 +365,7 @@ TEST_P(FDRp, SmallStreaming) {
expected.push_back(match(2, 2, 1));
safeExecStreaming(fdr.get(), (const u8 *)"", 0, (const u8 *)"aaar", 4, 0,
decentCallback, &matches, HWLM_ALL_GROUPS, nullptr);
decentCallback, &matches, HWLM_ALL_GROUPS);
for (u32 i = 0; i < MIN(expected.size(), matches.size()); i++) {
EXPECT_EQ(expected[i], matches[i]);
}
@ -378,7 +377,7 @@ TEST_P(FDRp, SmallStreaming) {
expected.push_back(match(1, 8, 10));
safeExecStreaming(fdr.get(), (const u8 *)"aaar", 4, (const u8 *)"dvark", 5,
0, decentCallback, &matches, HWLM_ALL_GROUPS, nullptr);
0, decentCallback, &matches, HWLM_ALL_GROUPS);
for (u32 i = 0; i < MIN(expected.size(), matches.size()); i++) {
EXPECT_EQ(expected[i], matches[i] + 4);
@ -407,7 +406,7 @@ TEST_P(FDRp, SmallStreaming2) {
safeExecStreaming(fdr.get(), (const u8 *)"foobar", 6,
(const u8 *)"aardvarkkk", 10, 0, decentCallback, &matches,
HWLM_ALL_GROUPS, nullptr);
HWLM_ALL_GROUPS);
for (u32 i = 0; i < MIN(expected.size(), matches.size()); i++) {
EXPECT_EQ(expected[i], matches[i] + 6);
@ -445,44 +444,6 @@ TEST_P(FDRp, LongLiteral) {
EXPECT_EQ(0U, count);
}
TEST_P(FDRp, VeryLongLiteral) {
const u32 hint = GetParam();
SCOPED_TRACE(hint);
vector<hwlmLiteral> lits;
string s1000;
for(int i = 0; i < 1000; i++) {
s1000 += char('A' + i % 10);
}
string s66k;
for(int i = 0; i < 66; i++) {
s66k += s1000;
}
string corpus = s66k + s66k;
lits.push_back(hwlmLiteral(s66k.c_str(), 0, 10));
auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey());
CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint);
vector<match> matches;
u32 rv = fdrExec(fdr.get(), (const u8 *)s66k.c_str(), s66k.size(), 0,
decentCallback, &matches, HWLM_ALL_GROUPS);
EXPECT_EQ(0U, rv);
ASSERT_EQ(1U, matches.size());
ASSERT_EQ(match(0, 65999, 10), matches[0]);
matches.clear();
rv = fdrExec(fdr.get(), (const u8 *)corpus.c_str(), corpus.size(), 0,
decentCallback, &matches, HWLM_ALL_GROUPS);
EXPECT_EQ(0U, rv);
for (u32 i = 0; i < matches.size(); i++) {
ASSERT_EQ(match(10 * i, 65999 + 10 * i, 10), matches[i]);
}
EXPECT_EQ(6601U, matches.size());
}
TEST_P(FDRp, moveByteStream) {
const u32 hint = GetParam();
SCOPED_TRACE(hint);
@ -538,9 +499,9 @@ TEST_P(FDRp, Stream1) {
// check matches
vector<match> matches;
fdrStatus = safeExecStreaming(
fdr.get(), (const u8 *)data1, data_len1, (const u8 *)data2, data_len2,
0, decentCallback, &matches, HWLM_ALL_GROUPS, nullptr);
fdrStatus = safeExecStreaming(fdr.get(), (const u8 *)data1, data_len1,
(const u8 *)data2, data_len2, 0,
decentCallback, &matches, HWLM_ALL_GROUPS);
ASSERT_EQ(0, fdrStatus);
ASSERT_EQ(4U, matches.size());
@ -783,9 +744,9 @@ TEST(FDR, FDRTermS) {
// check matches
vector<match> matches;
fdrStatus = safeExecStreaming(
fdr.get(), (const u8 *)data1, data_len1, (const u8 *)data2, data_len2,
0, decentCallbackT, &matches, HWLM_ALL_GROUPS, nullptr);
fdrStatus = safeExecStreaming(fdr.get(), (const u8 *)data1, data_len1,
(const u8 *)data2, data_len2, 0,
decentCallbackT, &matches, HWLM_ALL_GROUPS);
ASSERT_EQ(HWLM_TERMINATED, fdrStatus);
ASSERT_EQ(1U, matches.size());
@ -812,30 +773,3 @@ TEST(FDR, FDRTermB) {
ASSERT_EQ(1U, matches.size());
}
TEST(FDR, ManyLengths) {
// UE-2400: we had a crash due to div by zero in the compiler when given a
// set of literals with precisely 512 different lengths.
const u32 num = 512;
vector<hwlmLiteral> lits;
char c = 0;
string s;
for (u32 i = 0; i < num; i++) {
s.push_back(c++);
lits.push_back(hwlmLiteral(s, false, i + 1));
}
auto fdr = fdrBuildTable(lits, false, get_current_target(), Grey());
ASSERT_TRUE(fdr != nullptr);
// Confirm that we can scan against this FDR table as well.
vector<match> matches;
hwlm_error_t fdrStatus =
fdrExec(fdr.get(), (const u8 *)s.c_str(), s.size(), 0, decentCallback,
&matches, HWLM_ALL_GROUPS);
ASSERT_EQ(HWLM_SUCCESS, fdrStatus);
ASSERT_EQ(768U, matches.size());
}

View File

@ -495,7 +495,7 @@ TEST_P(FDRFloodp, StreamingMask) {
const u8 *fhist = fake_history.data() + fake_history_size;
fdrStatus = fdrExecStreaming(fdr.get(), fhist, 0, d, streamChunk, 0,
countCallback, &matchesCounts,
HWLM_ALL_GROUPS, nullptr);
HWLM_ALL_GROUPS);
ASSERT_EQ(0, fdrStatus);
for (u32 j = streamChunk; j < dataSize; j += streamChunk) {
if (j < 16) {
@ -506,12 +506,12 @@ TEST_P(FDRFloodp, StreamingMask) {
fdrStatus = fdrExecStreaming(fdr.get(), tmp_d, j, tmp_d + j,
streamChunk, 0, countCallback,
&matchesCounts,
HWLM_ALL_GROUPS, nullptr);
HWLM_ALL_GROUPS);
} else {
fdrStatus = fdrExecStreaming(fdr.get(), d + j - 8, 8, d + j,
streamChunk, 0, countCallback,
&matchesCounts,
HWLM_ALL_GROUPS, nullptr);
HWLM_ALL_GROUPS);
}
ASSERT_EQ(0, fdrStatus);
}