fdr: move long literal handling into Rose

Move the hash table used for long literal support in streaming mode from
FDR to Rose, and introduce new instructions CHECK_LONG_LIT and
CHECK_LONG_LIT_NOCASE for doing literal confirm for long literals.

This simplifies FDR confirm, and guarantees that HWLM matchers will only
be used for literals < 256 bytes long.
This commit is contained in:
Justin Viiret
2016-09-07 15:59:23 +10:00
committed by Matthew Barr
parent 6ed30194ce
commit 68bf473e2e
40 changed files with 1759 additions and 1310 deletions

View File

@@ -85,9 +85,4 @@ void roseInitState(const struct RoseEngine *t, char *state) {
init_state(t, state);
init_outfixes(t, state);
// Clear the floating matcher state, if any.
DEBUG_PRINTF("clearing %u bytes of floating matcher state\n",
t->floatingStreamState);
memset(getFloatingMatcherState(t, state), 0, t->floatingStreamState);
}

View File

@@ -1331,6 +1331,78 @@ hwlmcb_rv_t roseMatcherEod(const struct RoseEngine *rose,
return HWLM_CONTINUE_MATCHING;
}
static rose_inline
int roseCheckLongLiteral(const struct RoseEngine *t,
const struct hs_scratch *scratch, u64a end,
u32 lit_offset, u32 lit_length, char nocase) {
const struct core_info *ci = &scratch->core_info;
const u8 *lit = getByOffset(t, lit_offset);
DEBUG_PRINTF("check lit at %llu, length %u\n", end, lit_length);
DEBUG_PRINTF("base buf_offset=%llu\n", ci->buf_offset);
if (end < lit_length) {
DEBUG_PRINTF("too short!\n");
return 0;
}
// If any portion of the literal matched in the current buffer, check it.
if (end > ci->buf_offset) {
u32 scan_len = MIN(end - ci->buf_offset, lit_length);
u64a scan_start = end - ci->buf_offset - scan_len;
DEBUG_PRINTF("checking suffix (%u bytes) in buf[%llu:%llu]\n", scan_len,
scan_start, end);
if (cmpForward(ci->buf + scan_start, lit + lit_length - scan_len,
scan_len, nocase)) {
DEBUG_PRINTF("cmp of suffix failed\n");
return 0;
}
}
// If the entirety of the literal was in the current block, we are done.
if (end - lit_length >= ci->buf_offset) {
DEBUG_PRINTF("literal confirmed in current block\n");
return 1;
}
// We still have a prefix which we must test against the buffer prepared by
// the long literal table. This is only done in streaming mode.
assert(t->mode != HS_MODE_BLOCK);
const u8 *ll_buf;
size_t ll_len;
if (nocase) {
ll_buf = scratch->tctxt.ll_buf_nocase;
ll_len = scratch->tctxt.ll_len_nocase;
} else {
ll_buf = scratch->tctxt.ll_buf;
ll_len = scratch->tctxt.ll_len;
}
assert(ll_buf);
u64a lit_start_offset = end - lit_length;
u32 prefix_len = MIN(lit_length, ci->buf_offset - lit_start_offset);
u32 hist_rewind = ci->buf_offset - lit_start_offset;
DEBUG_PRINTF("ll_len=%zu, hist_rewind=%u\n", ll_len, hist_rewind);
if (hist_rewind > ll_len) {
DEBUG_PRINTF("not enough history\n");
return 0;
}
DEBUG_PRINTF("check prefix len=%u from hist (len %zu, rewind %u)\n",
prefix_len, ll_len, hist_rewind);
assert(hist_rewind <= ll_len);
if (cmpForward(ll_buf + ll_len - hist_rewind, lit, prefix_len, nocase)) {
DEBUG_PRINTF("cmp of prefix failed\n");
return 0;
}
DEBUG_PRINTF("cmp succeeded\n");
return 1;
}
static
void updateSeqPoint(struct RoseContext *tctxt, u64a offset,
const char from_mpv) {
@@ -1977,6 +2049,26 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t,
}
}
PROGRAM_NEXT_INSTRUCTION
PROGRAM_CASE(CHECK_LONG_LIT) {
const char nocase = 0;
if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset,
ri->lit_length, nocase)) {
DEBUG_PRINTF("halt: failed long lit check\n");
return HWLM_CONTINUE_MATCHING;
}
}
PROGRAM_NEXT_INSTRUCTION
PROGRAM_CASE(CHECK_LONG_LIT_NOCASE) {
const char nocase = 1;
if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset,
ri->lit_length, nocase)) {
DEBUG_PRINTF("halt: failed nocase long lit check\n");
return HWLM_CONTINUE_MATCHING;
}
}
PROGRAM_NEXT_INSTRUCTION
}
}

View File

@@ -37,14 +37,17 @@
#include "rose_build_exclusive.h"
#include "rose_build_groups.h"
#include "rose_build_infix.h"
#include "rose_build_long_lit.h"
#include "rose_build_lookaround.h"
#include "rose_build_matchers.h"
#include "rose_build_program.h"
#include "rose_build_scatter.h"
#include "rose_build_util.h"
#include "rose_build_width.h"
#include "rose_internal.h"
#include "rose_program.h"
#include "hwlm/hwlm.h" /* engine types */
#include "hwlm/hwlm_literal.h"
#include "nfa/castlecompile.h"
#include "nfa/goughcompile.h"
#include "nfa/mcclellancompile.h"
@@ -165,6 +168,7 @@ struct RoseResources {
bool has_states = false;
bool checks_groups = false;
bool has_lit_delay = false;
bool has_lit_check = false; // long literal support
bool has_anchored = false;
bool has_eod = false;
};
@@ -210,9 +214,16 @@ struct build_context : boost::noncopyable {
* written to the engine_blob. */
vector<u32> litPrograms;
/** \brief List of long literals (ones with CHECK_LITERAL instructions)
* that need hash table support. */
vector<ue2_case_string> longLiterals;
/** \brief Minimum offset of a match from the floating table. */
u32 floatingMinLiteralMatchOffset = 0;
/** \brief Long literal length threshold, used in streaming mode. */
size_t longLitLengthThreshold = 0;
/** \brief Contents of the Rose bytecode immediately following the
* RoseEngine. */
RoseEngineBlob engine_blob;
@@ -314,7 +325,7 @@ bool needsCatchup(const RoseBuildImpl &build,
}
static
bool isPureFloating(const RoseResources &resources) {
bool isPureFloating(const RoseResources &resources, const CompileContext &cc) {
if (resources.has_outfixes || resources.has_suffixes ||
resources.has_leftfixes) {
DEBUG_PRINTF("has engines\n");
@@ -341,6 +352,12 @@ bool isPureFloating(const RoseResources &resources) {
return false;
}
if (cc.streaming && resources.has_lit_check) {
DEBUG_PRINTF("has long literals in streaming mode, which needs "
"long literal table support\n");
return false;
}
if (resources.checks_groups) {
DEBUG_PRINTF("has group checks\n");
return false;
@@ -384,10 +401,11 @@ u8 pickRuntimeImpl(const RoseBuildImpl &build, const build_context &bc,
DEBUG_PRINTF("has_states=%d\n", bc.resources.has_states);
DEBUG_PRINTF("checks_groups=%d\n", bc.resources.checks_groups);
DEBUG_PRINTF("has_lit_delay=%d\n", bc.resources.has_lit_delay);
DEBUG_PRINTF("has_lit_check=%d\n", bc.resources.has_lit_check);
DEBUG_PRINTF("has_anchored=%d\n", bc.resources.has_anchored);
DEBUG_PRINTF("has_eod=%d\n", bc.resources.has_eod);
if (isPureFloating(bc.resources)) {
if (isPureFloating(bc.resources, build.cc)) {
return ROSE_RUNTIME_PURE_LITERAL;
}
@@ -427,7 +445,7 @@ static
void fillStateOffsets(const RoseBuildImpl &tbi, u32 rolesWithStateCount,
u32 anchorStateSize, u32 activeArrayCount,
u32 activeLeftCount, u32 laggedRoseCount,
u32 floatingStreamStateRequired, u32 historyRequired,
u32 longLitStreamStateRequired, u32 historyRequired,
RoseStateOffsets *so) {
u32 curr_offset = 0;
@@ -445,8 +463,8 @@ void fillStateOffsets(const RoseBuildImpl &tbi, u32 rolesWithStateCount,
so->activeLeftArray_size = mmbit_size(activeLeftCount);
curr_offset += so->activeLeftArray_size;
so->floatingMatcherState = curr_offset;
curr_offset += floatingStreamStateRequired;
so->longLitState = curr_offset;
curr_offset += longLitStreamStateRequired;
// ONE WHOLE BYTE for each active leftfix with lag.
so->leftfixLagTable = curr_offset;
@@ -2514,6 +2532,10 @@ void recordResources(RoseResources &resources, const RoseProgram &program) {
case ROSE_INSTR_PUSH_DELAYED:
resources.has_lit_delay = true;
break;
case ROSE_INSTR_CHECK_LONG_LIT:
case ROSE_INSTR_CHECK_LONG_LIT_NOCASE:
resources.has_lit_check = true;
break;
default:
break;
}
@@ -2546,6 +2568,25 @@ void recordResources(RoseResources &resources,
}
}
static
void recordLongLiterals(build_context &bc, const RoseProgram &program) {
for (const auto &ri : program) {
if (const auto *ri_check =
dynamic_cast<const RoseInstrCheckLongLit *>(ri.get())) {
DEBUG_PRINTF("found CHECK_LITERAL for string '%s'\n",
escapeString(ri_check->literal).c_str());
bc.longLiterals.emplace_back(ri_check->literal, false);
continue;
}
if (const auto *ri_check =
dynamic_cast<const RoseInstrCheckLongLitNocase *>(ri.get())) {
DEBUG_PRINTF("found CHECK_LITERAL_NOCASE for string '%s'\n",
escapeString(ri_check->literal).c_str());
bc.longLiterals.emplace_back(ri_check->literal, true);
}
}
}
static
u32 writeProgram(build_context &bc, RoseProgram &&program) {
if (program.empty()) {
@@ -2560,6 +2601,7 @@ u32 writeProgram(build_context &bc, RoseProgram &&program) {
}
recordResources(bc.resources, program);
recordLongLiterals(bc, program);
u32 len = 0;
auto prog_bytecode = writeProgram(bc.engine_blob, program, &len);
@@ -4285,6 +4327,48 @@ void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, build_context &bc,
program.add_before_end(make_unique<RoseInstrCheckLitEarly>(min_offset));
}
static
void makeCheckLiteralInstruction(const RoseBuildImpl &build,
const build_context &bc, u32 final_id,
RoseProgram &program) {
const auto &lits = build.final_id_to_literal.at(final_id);
if (lits.size() != 1) {
// Long literals should not share a final_id.
assert(all_of(begin(lits), end(lits), [&](u32 lit_id) {
const rose_literal_id &lit = build.literals.right.at(lit_id);
return lit.table != ROSE_FLOATING ||
lit.s.length() <= bc.longLitLengthThreshold;
}));
return;
}
u32 lit_id = *lits.begin();
if (build.isDelayed(lit_id)) {
return;
}
const rose_literal_id &lit = build.literals.right.at(lit_id);
if (lit.table != ROSE_FLOATING) {
return;
}
if (lit.s.length() <= bc.longLitLengthThreshold) {
return;
}
// Check resource limits as well.
if (lit.s.length() > build.cc.grey.limitLiteralLength) {
throw ResourceLimitError();
}
unique_ptr<RoseInstruction> ri;
if (lit.s.any_nocase()) {
ri = make_unique<RoseInstrCheckLongLitNocase>(lit.s.get_string());
} else {
ri = make_unique<RoseInstrCheckLongLit>(lit.s.get_string());
}
program.add_before_end(move(ri));
}
static
bool hasDelayedLiteral(RoseBuildImpl &build,
const vector<RoseEdge> &lit_edges) {
@@ -4312,6 +4396,9 @@ RoseProgram buildLitInitialProgram(RoseBuildImpl &build, build_context &bc,
DEBUG_PRINTF("final_id %u\n", final_id);
// Check long literal info.
makeCheckLiteralInstruction(build, bc, final_id, program);
// Check lit mask.
makeCheckLitMaskInstruction(build, bc, final_id, program);
@@ -4838,6 +4925,172 @@ u32 buildEagerQueueIter(const set<u32> &eager, u32 leftfixBeginQueue,
return bc.engine_blob.add_iterator(iter);
}
static
void allocateFinalIdToSet(RoseBuildImpl &build, const set<u32> &lits,
size_t longLitLengthThreshold, u32 *next_final_id) {
const auto &g = build.g;
auto &literal_info = build.literal_info;
auto &final_id_to_literal = build.final_id_to_literal;
/* We can allocate the same final id to multiple literals of the same type
* if they share the same vertex set and trigger the same delayed literal
* ids and squash the same roles and have the same group squashing
* behaviour. Benefits literals cannot be merged. */
for (u32 int_id : lits) {
rose_literal_info &curr_info = literal_info[int_id];
const rose_literal_id &lit = build.literals.right.at(int_id);
const auto &verts = curr_info.vertices;
// Literals with benefits cannot be merged.
if (curr_info.requires_benefits) {
DEBUG_PRINTF("id %u has benefits\n", int_id);
goto assign_new_id;
}
// Long literals (that require CHECK_LITERAL instructions) cannot be
// merged.
if (lit.s.length() > longLitLengthThreshold) {
DEBUG_PRINTF("id %u is a long literal\n", int_id);
goto assign_new_id;
}
if (!verts.empty() && curr_info.delayed_ids.empty()) {
vector<u32> cand;
insert(&cand, cand.end(), g[*verts.begin()].literals);
for (auto v : verts) {
vector<u32> temp;
set_intersection(cand.begin(), cand.end(),
g[v].literals.begin(),
g[v].literals.end(),
inserter(temp, temp.end()));
cand.swap(temp);
}
for (u32 cand_id : cand) {
if (cand_id >= int_id) {
break;
}
const auto &cand_info = literal_info[cand_id];
const auto &cand_lit = build.literals.right.at(cand_id);
if (cand_lit.s.length() > longLitLengthThreshold) {
continue;
}
if (cand_info.requires_benefits) {
continue;
}
if (!cand_info.delayed_ids.empty()) {
/* TODO: allow cases where delayed ids are equivalent.
* This is awkward currently as the have not had their
* final ids allocated yet */
continue;
}
if (lits.find(cand_id) == lits.end()
|| cand_info.vertices.size() != verts.size()
|| cand_info.squash_group != curr_info.squash_group) {
continue;
}
/* if we are squashing groups we need to check if they are the
* same group */
if (cand_info.squash_group
&& cand_info.group_mask != curr_info.group_mask) {
continue;
}
u32 final_id = cand_info.final_id;
assert(final_id != MO_INVALID_IDX);
assert(curr_info.final_id == MO_INVALID_IDX);
curr_info.final_id = final_id;
final_id_to_literal[final_id].insert(int_id);
goto next_lit;
}
}
assign_new_id:
/* oh well, have to give it a fresh one, hang the expense */
DEBUG_PRINTF("allocating final id %u to %u\n", *next_final_id, int_id);
assert(curr_info.final_id == MO_INVALID_IDX);
curr_info.final_id = *next_final_id;
final_id_to_literal[*next_final_id].insert(int_id);
(*next_final_id)++;
next_lit:;
}
}
static
bool isUsedLiteral(const RoseBuildImpl &build, u32 lit_id) {
assert(lit_id < build.literal_info.size());
const auto &info = build.literal_info[lit_id];
if (!info.vertices.empty()) {
return true;
}
for (const u32 &delayed_id : info.delayed_ids) {
assert(delayed_id < build.literal_info.size());
const rose_literal_info &delayed_info = build.literal_info[delayed_id];
if (!delayed_info.vertices.empty()) {
return true;
}
}
DEBUG_PRINTF("literal %u has no refs\n", lit_id);
return false;
}
/** \brief Allocate final literal IDs for all literals. */
static
void allocateFinalLiteralId(RoseBuildImpl &build,
size_t longLitLengthThreshold) {
set<u32> anch;
set<u32> norm;
set<u32> delay;
/* undelayed ids come first */
assert(build.final_id_to_literal.empty());
u32 next_final_id = 0;
for (u32 i = 0; i < build.literal_info.size(); i++) {
assert(!build.hasFinalId(i));
if (!isUsedLiteral(build, i)) {
/* what is this literal good for? absolutely nothing */
continue;
}
// The special EOD event literal has its own program and does not need
// a real literal ID.
if (i == build.eod_event_literal_id) {
assert(build.eod_event_literal_id != MO_INVALID_IDX);
continue;
}
if (build.isDelayed(i)) {
assert(!build.literal_info[i].requires_benefits);
delay.insert(i);
} else if (build.literals.right.at(i).table == ROSE_ANCHORED) {
anch.insert(i);
} else {
norm.insert(i);
}
}
/* normal lits */
allocateFinalIdToSet(build, norm, longLitLengthThreshold, &next_final_id);
/* next anchored stuff */
build.anchored_base_id = next_final_id;
allocateFinalIdToSet(build, anch, longLitLengthThreshold, &next_final_id);
/* delayed ids come last */
build.delay_base_id = next_final_id;
allocateFinalIdToSet(build, delay, longLitLengthThreshold, &next_final_id);
}
static
aligned_unique_ptr<RoseEngine> addSmallWriteEngine(RoseBuildImpl &build,
aligned_unique_ptr<RoseEngine> rose) {
@@ -4873,16 +5126,89 @@ aligned_unique_ptr<RoseEngine> addSmallWriteEngine(RoseBuildImpl &build,
return rose2;
}
/**
* \brief Returns the pair (number of literals, max length) for all real
* literals in the floating table that are in-use.
*/
static
pair<size_t, size_t> floatingCountAndMaxLen(const RoseBuildImpl &build) {
size_t num = 0;
size_t max_len = 0;
for (const auto &e : build.literals.right) {
const u32 id = e.first;
const rose_literal_id &lit = e.second;
if (lit.table != ROSE_FLOATING) {
continue;
}
if (lit.delay) {
// Skip delayed literals, so that we only count the undelayed
// version that ends up in the HWLM table.
continue;
}
if (!isUsedLiteral(build, id)) {
continue;
}
num++;
max_len = max(max_len, lit.s.length());
}
DEBUG_PRINTF("%zu floating literals with max_len=%zu\n", num, max_len);
return {num, max_len};
}
size_t calcLongLitThreshold(const RoseBuildImpl &build,
const size_t historyRequired) {
const auto &cc = build.cc;
// In block mode, we should only use the long literal support for literals
// that cannot be handled by HWLM.
if (!cc.streaming) {
return HWLM_LITERAL_MAX_LEN;
}
size_t longLitLengthThreshold = ROSE_LONG_LITERAL_THRESHOLD_MIN;
// Expand to size of history we've already allocated. Note that we need N-1
// bytes of history to match a literal of length N.
longLitLengthThreshold = max(longLitLengthThreshold, historyRequired + 1);
// If we only have one literal, allow for a larger value in order to avoid
// building a long literal table for a trivial Noodle case that we could
// fit in history.
const auto num_len = floatingCountAndMaxLen(build);
if (num_len.first == 1) {
if (num_len.second > longLitLengthThreshold) {
DEBUG_PRINTF("expanding for single literal of length %zu\n",
num_len.second);
longLitLengthThreshold = num_len.second;
}
}
// Clamp to max history available.
longLitLengthThreshold =
min(longLitLengthThreshold, size_t{cc.grey.maxHistoryAvailable} + 1);
return longLitLengthThreshold;
}
aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
DerivedBoundaryReports dboundary(boundary);
size_t historyRequired = calcHistoryRequired(); // Updated by HWLM.
size_t longLitLengthThreshold = calcLongLitThreshold(*this,
historyRequired);
DEBUG_PRINTF("longLitLengthThreshold=%zu\n", longLitLengthThreshold);
allocateFinalLiteralId(*this, longLitLengthThreshold);
auto anchored_dfas = buildAnchoredDfas(*this);
build_context bc;
bc.floatingMinLiteralMatchOffset =
findMinFloatingLiteralMatch(*this, anchored_dfas);
bc.longLitLengthThreshold = longLitLengthThreshold;
bc.needs_catchup = needsCatchup(*this, anchored_dfas);
recordResources(bc.resources, *this);
if (!anchored_dfas.empty()) {
@@ -4944,6 +5270,11 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
u32 eodProgramOffset = writeEodProgram(*this, bc, eodNfaIterOffset);
size_t longLitStreamStateRequired = 0;
u32 longLitTableOffset = buildLongLiteralTable(*this, bc.engine_blob,
bc.longLiterals, longLitLengthThreshold, &historyRequired,
&longLitStreamStateRequired);
vector<mmbit_sparse_iter> activeLeftIter;
buildActiveLeftIter(leftInfoTable, activeLeftIter);
@@ -4982,9 +5313,8 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
// Build floating HWLM matcher.
rose_group fgroups = 0;
size_t fsize = 0;
size_t floatingStreamStateRequired = 0;
auto ftable = buildFloatingMatcher(*this, &fgroups, &fsize, &historyRequired,
&floatingStreamStateRequired);
auto ftable = buildFloatingMatcher(*this, bc.longLitLengthThreshold,
&fgroups, &fsize, &historyRequired);
u32 fmatcherOffset = 0;
if (ftable) {
currOffset = ROUNDUP_CL(currOffset);
@@ -5057,7 +5387,7 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
memset(&stateOffsets, 0, sizeof(stateOffsets));
fillStateOffsets(*this, bc.numStates, anchorStateSize,
activeArrayCount, activeLeftCount, laggedRoseCount,
floatingStreamStateRequired, historyRequired,
longLitStreamStateRequired, historyRequired,
&stateOffsets);
scatter_plan_raw state_scatter;
@@ -5173,6 +5503,7 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
engine->ematcherOffset = ematcherOffset;
engine->sbmatcherOffset = sbmatcherOffset;
engine->fmatcherOffset = fmatcherOffset;
engine->longLitTableOffset = longLitTableOffset;
engine->amatcherMinWidth = findMinWidth(*this, ROSE_ANCHORED);
engine->fmatcherMinWidth = findMinWidth(*this, ROSE_FLOATING);
engine->eodmatcherMinWidth = findMinWidth(*this, ROSE_EOD_ANCHORED);
@@ -5198,7 +5529,7 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
engine->totalNumLiterals = verify_u32(literal_info.size());
engine->asize = verify_u32(asize);
engine->ematcherRegionSize = ematcher_region_size;
engine->floatingStreamState = verify_u32(floatingStreamStateRequired);
engine->longLitStreamState = verify_u32(longLitStreamStateRequired);
engine->boundary.reportEodOffset = boundary_out.reportEodOffset;
engine->boundary.reportZeroOffset = boundary_out.reportZeroOffset;

View File

@@ -87,172 +87,6 @@ namespace ue2 {
#define ANCHORED_REHOME_DEEP 25
#define ANCHORED_REHOME_SHORT_LEN 3
#ifdef DEBUG
static UNUSED
void printLitInfo(const rose_literal_info &li, u32 id) {
DEBUG_PRINTF("lit_info %u\n", id);
DEBUG_PRINTF(" parent %u%s", li.undelayed_id,
li.delayed_ids.empty() ? "":", children:");
for (u32 d_id : li.delayed_ids) {
printf(" %u", d_id);
}
printf("\n");
DEBUG_PRINTF(" group %llu %s\n", li.group_mask, li.squash_group ? "s":"");
}
#endif
static
void allocateFinalIdToSet(const RoseGraph &g, const set<u32> &lits,
deque<rose_literal_info> *literal_info,
map<u32, set<u32> > *final_id_to_literal,
u32 *next_final_id) {
/* We can allocate the same final id to multiple literals of the same type
* if they share the same vertex set and trigger the same delayed literal
* ids and squash the same roles and have the same group squashing
* behaviour. Benefits literals cannot be merged. */
for (u32 int_id : lits) {
rose_literal_info &curr_info = (*literal_info)[int_id];
const auto &verts = curr_info.vertices;
if (!verts.empty() && !curr_info.requires_benefits
&& curr_info.delayed_ids.empty()) {
vector<u32> cand;
insert(&cand, cand.end(), g[*verts.begin()].literals);
for (auto v : verts) {
vector<u32> temp;
set_intersection(cand.begin(), cand.end(),
g[v].literals.begin(),
g[v].literals.end(),
inserter(temp, temp.end()));
cand.swap(temp);
}
for (u32 cand_id : cand) {
if (cand_id >= int_id) {
break;
}
const rose_literal_info &cand_info = (*literal_info)[cand_id];
if (cand_info.requires_benefits) {
continue;
}
if (!cand_info.delayed_ids.empty()) {
/* TODO: allow cases where delayed ids are equivalent.
* This is awkward currently as the have not had their
* final ids allocated yet */
continue;
}
if (lits.find(cand_id) == lits.end()
|| cand_info.vertices.size() != verts.size()
|| cand_info.squash_group != curr_info.squash_group) {
continue;
}
/* if we are squashing groups we need to check if they are the
* same group */
if (cand_info.squash_group
&& cand_info.group_mask != curr_info.group_mask) {
continue;
}
u32 final_id = cand_info.final_id;
assert(final_id != MO_INVALID_IDX);
assert(curr_info.final_id == MO_INVALID_IDX);
curr_info.final_id = final_id;
(*final_id_to_literal)[final_id].insert(int_id);
goto next_lit;
}
}
/* oh well, have to give it a fresh one, hang the expense */
DEBUG_PRINTF("allocating final id %u to %u\n", *next_final_id, int_id);
assert(curr_info.final_id == MO_INVALID_IDX);
curr_info.final_id = *next_final_id;
(*final_id_to_literal)[*next_final_id].insert(int_id);
(*next_final_id)++;
next_lit:;
}
}
static
bool isUsedLiteral(const RoseBuildImpl &build, u32 lit_id) {
assert(lit_id < build.literal_info.size());
const auto &info = build.literal_info[lit_id];
if (!info.vertices.empty()) {
return true;
}
for (const u32 &delayed_id : info.delayed_ids) {
assert(delayed_id < build.literal_info.size());
const rose_literal_info &delayed_info = build.literal_info[delayed_id];
if (!delayed_info.vertices.empty()) {
return true;
}
}
DEBUG_PRINTF("literal %u has no refs\n", lit_id);
return false;
}
/** \brief Allocate final literal IDs for all literals.
*
* These are the literal ids used in the bytecode.
*/
static
void allocateFinalLiteralId(RoseBuildImpl &tbi) {
RoseGraph &g = tbi.g;
set<u32> anch;
set<u32> norm;
set<u32> delay;
/* undelayed ids come first */
assert(tbi.final_id_to_literal.empty());
u32 next_final_id = 0;
for (u32 i = 0; i < tbi.literal_info.size(); i++) {
assert(!tbi.hasFinalId(i));
if (!isUsedLiteral(tbi, i)) {
/* what is this literal good for? absolutely nothing */
continue;
}
// The special EOD event literal has its own program and does not need
// a real literal ID.
if (i == tbi.eod_event_literal_id) {
assert(tbi.eod_event_literal_id != MO_INVALID_IDX);
continue;
}
if (tbi.isDelayed(i)) {
assert(!tbi.literal_info[i].requires_benefits);
delay.insert(i);
} else if (tbi.literals.right.at(i).table == ROSE_ANCHORED) {
anch.insert(i);
} else {
norm.insert(i);
}
}
/* normal lits */
allocateFinalIdToSet(g, norm, &tbi.literal_info, &tbi.final_id_to_literal,
&next_final_id);
/* next anchored stuff */
tbi.anchored_base_id = next_final_id;
allocateFinalIdToSet(g, anch, &tbi.literal_info, &tbi.final_id_to_literal,
&next_final_id);
/* delayed ids come last */
tbi.delay_base_id = next_final_id;
allocateFinalIdToSet(g, delay, &tbi.literal_info, &tbi.final_id_to_literal,
&next_final_id);
}
#define MAX_EXPLOSION_NC 3
static
bool limited_explosion(const ue2_literal &s) {
@@ -284,7 +118,12 @@ void RoseBuildImpl::handleMixedSensitivity(void) {
continue;
}
if (limited_explosion(lit.s)) {
// We don't want to explode long literals, as they require confirmation
// with a CHECK_LITERAL instruction and need unique final_ids.
// TODO: we could allow explosion for literals where the prefixes
// covered by CHECK_LITERAL are identical.
if (lit.s.length() <= ROSE_LONG_LITERAL_THRESHOLD_MIN &&
limited_explosion(lit.s)) {
DEBUG_PRINTF("need to explode existing string '%s'\n",
dumpString(lit.s).c_str());
literal_info[id].requires_explode = true;
@@ -1653,7 +1492,6 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildRose(u32 minWidth) {
/* final prep work */
remapCastleTops(*this);
allocateFinalLiteralId(*this);
inspectRoseTops(*this);
buildRoseSquashMasks(*this);

View File

@@ -442,20 +442,26 @@ void dumpTestLiterals(const string &filename, const vector<hwlmLiteral> &lits) {
static
void dumpRoseTestLiterals(const RoseBuildImpl &build, const string &base) {
auto lits = fillHamsterLiteralList(build, ROSE_ANCHORED);
size_t historyRequired = build.calcHistoryRequired();
size_t longLitLengthThreshold =
calcLongLitThreshold(build, historyRequired);
auto lits = fillHamsterLiteralList(build, ROSE_ANCHORED,
longLitLengthThreshold);
dumpTestLiterals(base + "rose_anchored_test_literals.txt", lits);
lits = fillHamsterLiteralList(build, ROSE_FLOATING);
lits = fillHamsterLiteralList(build, ROSE_FLOATING, longLitLengthThreshold);
dumpTestLiterals(base + "rose_float_test_literals.txt", lits);
lits = fillHamsterLiteralList(build, ROSE_EOD_ANCHORED);
lits = fillHamsterLiteralList(build, ROSE_EOD_ANCHORED,
build.ematcher_region_size);
dumpTestLiterals(base + "rose_eod_test_literals.txt", lits);
if (!build.cc.streaming) {
lits = fillHamsterLiteralList(build, ROSE_FLOATING,
ROSE_SMALL_BLOCK_LEN);
ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN);
auto lits2 = fillHamsterLiteralList(build, ROSE_ANCHORED_SMALL_BLOCK,
ROSE_SMALL_BLOCK_LEN);
ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN);
lits.insert(end(lits), begin(lits2), end(lits2));
dumpTestLiterals(base + "rose_smallblock_test_literals.txt", lits);
}

View File

@@ -56,6 +56,8 @@ namespace ue2 {
#define ROSE_GROUPS_MAX 64
#define ROSE_LONG_LITERAL_THRESHOLD_MIN 33
struct BoundaryReports;
struct CastleProto;
struct CompileContext;
@@ -603,6 +605,9 @@ private:
ReportID next_nfa_report;
};
size_t calcLongLitThreshold(const RoseBuildImpl &build,
const size_t historyRequired);
// Free functions, in rose_build_misc.cpp
bool hasAnchHistorySucc(const RoseGraph &g, RoseVertex v);

View File

@@ -0,0 +1,348 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "rose_build_long_lit.h"
#include "rose_build_engine_blob.h"
#include "rose_build_impl.h"
#include "stream_long_lit_hash.h"
#include "util/alloc.h"
#include "util/bitutils.h"
#include "util/verify_types.h"
#include "util/compile_context.h"
using namespace std;
namespace ue2 {
/** \brief Minimum size for a non-empty hash table. */
static constexpr u32 MIN_HASH_TABLE_SIZE = 4096;
struct LongLitModeInfo {
u32 boundary = 0; //!< One above the largest index for this mode.
u32 positions = 0; //!< Total number of string positions.
u32 hashEntries = 0; //!< Number of hash table entries.
};
struct LongLitInfo {
LongLitModeInfo caseful;
LongLitModeInfo nocase;
};
static
u32 roundUpToPowerOfTwo(u32 x) {
assert(x != 0);
u32 bits = lg2(x - 1) + 1;
assert(bits < 32);
return 1U << bits;
}
static
LongLitInfo analyzeLongLits(const vector<ue2_case_string> &lits,
size_t max_len) {
LongLitInfo info;
u32 hashedPositionsCase = 0;
u32 hashedPositionsNocase = 0;
// Caseful boundary is the index of the first nocase literal, as we're
// ordered (caseful, nocase).
auto first_nocase = find_if(begin(lits), end(lits),
[](const ue2_case_string &lit) { return lit.nocase; });
info.caseful.boundary = verify_u32(distance(lits.begin(), first_nocase));
// Nocase boundary is the size of the literal set.
info.nocase.boundary = verify_u32(lits.size());
for (const auto &lit : lits) {
if (lit.nocase) {
hashedPositionsNocase += lit.s.size() - max_len;
info.nocase.positions += lit.s.size();
} else {
hashedPositionsCase += lit.s.size() - max_len;
info.caseful.positions += lit.s.size();
}
}
info.caseful.hashEntries = hashedPositionsCase
? roundUpToPowerOfTwo(max(MIN_HASH_TABLE_SIZE, hashedPositionsCase))
: 0;
info.nocase.hashEntries = hashedPositionsNocase
? roundUpToPowerOfTwo(max(MIN_HASH_TABLE_SIZE, hashedPositionsNocase))
: 0;
DEBUG_PRINTF("caseful: boundary=%u, positions=%u, hashedPositions=%u, "
"hashEntries=%u\n",
info.caseful.boundary, info.caseful.positions,
hashedPositionsCase, info.caseful.hashEntries);
DEBUG_PRINTF("nocase: boundary=%u, positions=%u, hashedPositions=%u, "
"hashEntries=%u\n",
info.nocase.boundary, info.nocase.positions,
hashedPositionsNocase, info.nocase.hashEntries);
return info;
}
static
void fillHashes(const vector<ue2_case_string> &lits, size_t max_len,
RoseLongLitHashEntry *tab, size_t numEntries, bool nocase,
const map<u32, u32> &litToOffsetVal) {
const u32 nbits = lg2(numEntries);
map<u32, deque<pair<u32, u32>>> bucketToLitOffPairs;
map<u32, u64a> bucketToBitfield;
for (u32 lit_id = 0; lit_id < lits.size(); lit_id++) {
const ue2_case_string &lit = lits[lit_id];
if (nocase != lit.nocase) {
continue;
}
for (u32 offset = 1; offset < lit.s.size() - max_len + 1; offset++) {
const u8 *substr = (const u8 *)lit.s.c_str() + offset;
u32 h = hashLongLiteral(substr, max_len, lit.nocase);
u32 h_ent = h & ((1U << nbits) - 1);
u32 h_low = (h >> nbits) & 63;
bucketToLitOffPairs[h_ent].emplace_back(lit_id, offset);
bucketToBitfield[h_ent] |= (1ULL << h_low);
}
}
// this used to be a set<u32>, but a bitset is much much faster given that
// we're using it only for membership testing.
boost::dynamic_bitset<> filledBuckets(numEntries); // all zero by default.
// sweep out bitfield entries and save the results swapped accordingly
// also, anything with bitfield entries is put in filledBuckets
for (const auto &m : bucketToBitfield) {
const u32 &bucket = m.first;
const u64a &contents = m.second;
tab[bucket].bitfield = contents;
filledBuckets.set(bucket);
}
// store out all our chains based on free values in our hash table.
// find nearest free locations that are empty (there will always be more
// entries than strings, at present)
for (auto &m : bucketToLitOffPairs) {
u32 bucket = m.first;
deque<pair<u32, u32>> &d = m.second;
// sort d by distance of the residual string (len minus our depth into
// the string). We need to put the 'furthest back' string first...
stable_sort(d.begin(), d.end(),
[](const pair<u32, u32> &a, const pair<u32, u32> &b) {
if (a.second != b.second) {
return a.second > b.second; /* longest is first */
}
return a.first < b.first;
});
while (1) {
// first time through is always at bucket, then we fill in links
filledBuckets.set(bucket);
RoseLongLitHashEntry *ent = &tab[bucket];
u32 lit_id = d.front().first;
u32 offset = d.front().second;
ent->state = verify_u32(litToOffsetVal.at(lit_id) +
offset + max_len);
ent->link = (u32)LINK_INVALID;
d.pop_front();
if (d.empty()) {
break;
}
// now, if there is another value
// find a bucket for it and put in 'bucket' and repeat
// all we really need to do is find something not in filledBuckets,
// ideally something close to bucket
// we search backward and forward from bucket, trying to stay as
// close as possible.
UNUSED bool found = false;
int bucket_candidate = 0;
for (u32 k = 1; k < numEntries * 2; k++) {
bucket_candidate = bucket + (((k & 1) == 0)
? (-(int)k / 2) : (k / 2));
if (bucket_candidate < 0 ||
(size_t)bucket_candidate >= numEntries) {
continue;
}
if (!filledBuckets.test(bucket_candidate)) {
found = true;
break;
}
}
assert(found);
bucket = bucket_candidate;
ent->link = bucket;
}
}
}
u32 buildLongLiteralTable(const RoseBuildImpl &build, RoseEngineBlob &blob,
vector<ue2_case_string> &lits,
size_t longLitLengthThreshold,
size_t *historyRequired,
size_t *longLitStreamStateRequired) {
// Work in terms of history requirement (i.e. literal len - 1).
const size_t max_len = longLitLengthThreshold - 1;
// We should only be building the long literal hash table in streaming mode.
if (!build.cc.streaming) {
return 0;
}
if (lits.empty()) {
DEBUG_PRINTF("no long literals\n");
return 0;
}
// The last char of each literal is trimmed as we're not interested in full
// matches, only partial matches.
for (auto &lit : lits) {
assert(!lit.s.empty());
lit.s.pop_back();
}
// Sort by caseful/caseless and in lexicographical order.
stable_sort(begin(lits), end(lits), [](const ue2_case_string &a,
const ue2_case_string &b) {
if (a.nocase != b.nocase) {
return a.nocase < b.nocase;
}
return a.s < b.s;
});
// Find literals that are prefixes of other literals (including
// duplicates). Note that we iterate in reverse, since we want to retain
// only the longest string from a set of prefixes.
auto it = unique(lits.rbegin(), lits.rend(), [](const ue2_case_string &a,
const ue2_case_string &b) {
return a.nocase == b.nocase && a.s.size() >= b.s.size() &&
equal(b.s.begin(), b.s.end(), a.s.begin());
});
// Erase dupes found by unique().
lits.erase(lits.begin(), it.base());
LongLitInfo info = analyzeLongLits(lits, max_len);
// first assess the size and find our caseless threshold
size_t headerSize = ROUNDUP_16(sizeof(RoseLongLitTable));
size_t litTabOffset = headerSize;
size_t litTabNumEntries = lits.size() + 1;
size_t litTabSize = ROUNDUP_16(litTabNumEntries * sizeof(RoseLongLiteral));
size_t wholeLitTabOffset = litTabOffset + litTabSize;
size_t totalWholeLitTabSize =
ROUNDUP_16(info.caseful.positions + info.nocase.positions);
size_t htOffsetCase = wholeLitTabOffset + totalWholeLitTabSize;
size_t htSizeCase = info.caseful.hashEntries * sizeof(RoseLongLitHashEntry);
size_t htOffsetNocase = htOffsetCase + htSizeCase;
size_t htSizeNocase =
info.nocase.hashEntries * sizeof(RoseLongLitHashEntry);
size_t tabSize = ROUNDUP_16(htOffsetNocase + htSizeNocase);
// need to add +2 to both of these to allow space for the actual largest
// value as well as handling the fact that we add one to the space when
// storing out a position to allow zero to mean "no stream state value"
u8 streamBitsCase = lg2(roundUpToPowerOfTwo(info.caseful.positions + 2));
u8 streamBitsNocase = lg2(roundUpToPowerOfTwo(info.nocase.positions + 2));
u32 tot_state_bytes = ROUNDUP_N(streamBitsCase + streamBitsNocase, 8) / 8;
auto table = aligned_zmalloc_unique<char>(tabSize);
assert(table); // otherwise would have thrown std::bad_alloc
// then fill it in
char *ptr = table.get();
RoseLongLitTable *header = (RoseLongLitTable *)ptr;
// fill in header
header->maxLen = verify_u8(max_len); // u8 so doesn't matter; won't go > 255
header->boundaryCase = info.caseful.boundary;
header->hashOffsetCase = verify_u32(htOffsetCase);
header->hashNBitsCase = lg2(info.caseful.hashEntries);
header->streamStateBitsCase = streamBitsCase;
header->boundaryNocase = info.nocase.boundary;
header->hashOffsetNocase = verify_u32(htOffsetNocase);
header->hashNBitsNocase = lg2(info.nocase.hashEntries);
header->streamStateBitsNocase = streamBitsNocase;
assert(tot_state_bytes < sizeof(u64a));
header->streamStateBytes = verify_u8(tot_state_bytes); // u8
ptr += headerSize;
// now fill in the rest
RoseLongLiteral *litTabPtr = (RoseLongLiteral *)ptr;
ptr += litTabSize;
map<u32, u32> litToOffsetVal;
for (auto i = lits.begin(), e = lits.end(); i != e; ++i) {
u32 entry = verify_u32(i - lits.begin());
u32 offset = verify_u32(ptr - table.get());
// point the table entry to the string location
litTabPtr[entry].offset = offset;
litToOffsetVal[entry] = offset;
// copy the string into the string location
const auto &s = i->s;
memcpy(ptr, s.c_str(), s.size());
ptr += s.size(); // and the string location
}
// fill in final lit table entry with current ptr (serves as end value)
litTabPtr[lits.size()].offset = verify_u32(ptr - table.get());
// fill hash tables
ptr = table.get() + htOffsetCase;
fillHashes(lits, max_len, (RoseLongLitHashEntry *)ptr,
info.caseful.hashEntries, false, litToOffsetVal);
ptr += htSizeCase;
fillHashes(lits, max_len, (RoseLongLitHashEntry *)ptr,
info.nocase.hashEntries, true, litToOffsetVal);
ptr += htSizeNocase;
assert(ptr <= table.get() + tabSize);
DEBUG_PRINTF("built streaming table, size=%zu\n", tabSize);
DEBUG_PRINTF("requires %zu bytes of history\n", max_len);
DEBUG_PRINTF("requires %u bytes of stream state\n", tot_state_bytes);
*historyRequired = max(*historyRequired, max_len);
*longLitStreamStateRequired = tot_state_bytes;
return blob.add(table.get(), tabSize, 16);
}
} // namespace ue2

View File

@@ -0,0 +1,51 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ROSE_BUILD_LONG_LIT_H
#define ROSE_BUILD_LONG_LIT_H
#include "ue2common.h"
#include <vector>
namespace ue2 {
class RoseBuildImpl;
class RoseEngineBlob;
struct ue2_case_string;
u32 buildLongLiteralTable(const RoseBuildImpl &build, RoseEngineBlob &blob,
std::vector<ue2_case_string> &lits,
size_t longLitLengthThreshold,
size_t *historyRequired,
size_t *longLitStreamStateRequired);
} // namespace ue2
#endif // ROSE_BUILD_LONG_LIT_H

View File

@@ -485,7 +485,7 @@ bool isNoRunsVertex(const RoseBuildImpl &build, RoseVertex u) {
static
bool isNoRunsLiteral(const RoseBuildImpl &build, const u32 id,
const rose_literal_info &info) {
const rose_literal_info &info, const size_t max_len) {
DEBUG_PRINTF("lit id %u\n", id);
if (info.requires_benefits) {
@@ -493,6 +493,11 @@ bool isNoRunsLiteral(const RoseBuildImpl &build, const u32 id,
return false;
}
if (build.literals.right.at(id).s.length() > max_len) {
DEBUG_PRINTF("requires literal check\n");
return false;
}
if (isDirectHighlander(build, id, info)) {
DEBUG_PRINTF("highlander direct report\n");
return true;
@@ -625,7 +630,7 @@ u64a literalMinReportOffset(const RoseBuildImpl &build,
vector<hwlmLiteral> fillHamsterLiteralList(const RoseBuildImpl &build,
rose_literal_table table,
u32 max_offset) {
size_t max_len, u32 max_offset) {
vector<hwlmLiteral> lits;
for (const auto &e : build.literals.right) {
@@ -663,10 +668,14 @@ vector<hwlmLiteral> fillHamsterLiteralList(const RoseBuildImpl &build,
const vector<u8> &msk = e.second.msk;
const vector<u8> &cmp = e.second.cmp;
bool noruns = isNoRunsLiteral(build, id, info);
bool noruns = isNoRunsLiteral(build, id, info, max_len);
if (info.requires_explode) {
DEBUG_PRINTF("exploding lit\n");
// We do not require_explode for long literals.
assert(lit.length() <= max_len);
case_iter cit = caseIterateBegin(lit);
case_iter cite = caseIterateEnd();
for (; cit != cite; ++cit) {
@@ -687,20 +696,28 @@ vector<hwlmLiteral> fillHamsterLiteralList(const RoseBuildImpl &build,
msk, cmp);
}
} else {
const std::string &s = lit.get_string();
const bool nocase = lit.any_nocase();
string s = lit.get_string();
bool nocase = lit.any_nocase();
DEBUG_PRINTF("id=%u, s='%s', nocase=%d, noruns=%d, msk=%s, "
"cmp=%s\n",
final_id, escapeString(s).c_str(), (int)nocase, noruns,
dumpMask(msk).c_str(), dumpMask(cmp).c_str());
if (s.length() > max_len) {
DEBUG_PRINTF("truncating to tail of length %zu\n", max_len);
s.erase(0, s.length() - max_len);
// We shouldn't have set a threshold below 8 chars.
assert(msk.size() <= max_len);
}
if (!maskIsConsistent(s, nocase, msk, cmp)) {
DEBUG_PRINTF("msk/cmp for literal can't match, skipping\n");
continue;
}
lits.emplace_back(s, nocase, noruns, final_id, groups, msk, cmp);
lits.emplace_back(move(s), nocase, noruns, final_id, groups, msk,
cmp);
}
}
@@ -708,14 +725,15 @@ vector<hwlmLiteral> fillHamsterLiteralList(const RoseBuildImpl &build,
}
aligned_unique_ptr<HWLM> buildFloatingMatcher(const RoseBuildImpl &build,
size_t longLitLengthThreshold,
rose_group *fgroups,
size_t *fsize,
size_t *historyRequired,
size_t *streamStateRequired) {
size_t *historyRequired) {
*fsize = 0;
*fgroups = 0;
auto fl = fillHamsterLiteralList(build, ROSE_FLOATING);
auto fl = fillHamsterLiteralList(build, ROSE_FLOATING,
longLitLengthThreshold);
if (fl.empty()) {
DEBUG_PRINTF("empty floating matcher\n");
return nullptr;
@@ -747,13 +765,10 @@ aligned_unique_ptr<HWLM> buildFloatingMatcher(const RoseBuildImpl &build,
if (build.cc.streaming) {
DEBUG_PRINTF("literal_history_required=%zu\n",
ctl.literal_history_required);
DEBUG_PRINTF("literal_stream_state_required=%zu\n",
ctl.literal_stream_state_required);
assert(ctl.literal_history_required <=
build.cc.grey.maxHistoryAvailable);
*historyRequired = max(*historyRequired,
ctl.literal_history_required);
*streamStateRequired = ctl.literal_stream_state_required;
}
*fsize = hwlmSize(ftable.get());
@@ -778,8 +793,8 @@ aligned_unique_ptr<HWLM> buildSmallBlockMatcher(const RoseBuildImpl &build,
return nullptr;
}
auto lits = fillHamsterLiteralList(build, ROSE_FLOATING,
ROSE_SMALL_BLOCK_LEN);
auto lits = fillHamsterLiteralList(
build, ROSE_FLOATING, ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN);
if (lits.empty()) {
DEBUG_PRINTF("no floating table\n");
return nullptr;
@@ -788,8 +803,9 @@ aligned_unique_ptr<HWLM> buildSmallBlockMatcher(const RoseBuildImpl &build,
return nullptr;
}
auto anchored_lits = fillHamsterLiteralList(build,
ROSE_ANCHORED_SMALL_BLOCK, ROSE_SMALL_BLOCK_LEN);
auto anchored_lits =
fillHamsterLiteralList(build, ROSE_ANCHORED_SMALL_BLOCK,
ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN);
if (anchored_lits.empty()) {
DEBUG_PRINTF("no small-block anchored literals\n");
return nullptr;
@@ -823,7 +839,8 @@ aligned_unique_ptr<HWLM> buildEodAnchoredMatcher(const RoseBuildImpl &build,
size_t *esize) {
*esize = 0;
auto el = fillHamsterLiteralList(build, ROSE_EOD_ANCHORED);
auto el = fillHamsterLiteralList(build, ROSE_EOD_ANCHORED,
build.ematcher_region_size);
if (el.empty()) {
DEBUG_PRINTF("no eod anchored literals\n");

View File

@@ -51,13 +51,14 @@ struct hwlmLiteral;
* only lead to a pattern match after max_offset may be excluded.
*/
std::vector<hwlmLiteral> fillHamsterLiteralList(const RoseBuildImpl &build,
rose_literal_table table, u32 max_offset = ROSE_BOUND_INF);
rose_literal_table table, size_t max_len,
u32 max_offset = ROSE_BOUND_INF);
aligned_unique_ptr<HWLM> buildFloatingMatcher(const RoseBuildImpl &build,
size_t longLitLengthThreshold,
rose_group *fgroups,
size_t *fsize,
size_t *historyRequired,
size_t *streamStateRequired);
size_t *historyRequired);
aligned_unique_ptr<HWLM> buildSmallBlockMatcher(const RoseBuildImpl &build,
size_t *sbsize);

View File

@@ -495,6 +495,24 @@ void RoseInstrEnginesEod::write(void *dest, RoseEngineBlob &blob,
inst->iter_offset = iter_offset;
}
void RoseInstrCheckLongLit::write(void *dest, RoseEngineBlob &blob,
const OffsetMap &offset_map) const {
RoseInstrBase::write(dest, blob, offset_map);
auto *inst = static_cast<impl_type *>(dest);
assert(!literal.empty());
inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1);
inst->lit_length = verify_u32(literal.size());
}
void RoseInstrCheckLongLitNocase::write(void *dest, RoseEngineBlob &blob,
const OffsetMap &offset_map) const {
RoseInstrBase::write(dest, blob, offset_map);
auto *inst = static_cast<impl_type *>(dest);
assert(!literal.empty());
inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1);
inst->lit_length = verify_u32(literal.size());
}
static
OffsetMap makeOffsetMap(const RoseProgram &program, u32 *total_len) {
OffsetMap offset_map;

View File

@@ -37,6 +37,7 @@
#include "util/hash.h"
#include "util/make_unique.h"
#include "util/ue2_containers.h"
#include "util/ue2string.h"
#include <algorithm>
#include <array>
@@ -1721,6 +1722,62 @@ public:
~RoseInstrMatcherEod() override;
};
class RoseInstrCheckLongLit
: public RoseInstrBaseNoTargets<ROSE_INSTR_CHECK_LONG_LIT,
ROSE_STRUCT_CHECK_LONG_LIT,
RoseInstrCheckLongLit> {
public:
std::string literal;
RoseInstrCheckLongLit(std::string literal_in)
: literal(std::move(literal_in)) {}
bool operator==(const RoseInstrCheckLongLit &ri) const {
return literal == ri.literal;
}
size_t hash() const override {
return hash_all(static_cast<int>(opcode), literal);
}
void write(void *dest, RoseEngineBlob &blob,
const OffsetMap &offset_map) const override;
bool equiv_to(const RoseInstrCheckLongLit &ri, const OffsetMap &,
const OffsetMap &) const {
return literal == ri.literal;
}
};
class RoseInstrCheckLongLitNocase
: public RoseInstrBaseNoTargets<ROSE_INSTR_CHECK_LONG_LIT_NOCASE,
ROSE_STRUCT_CHECK_LONG_LIT_NOCASE,
RoseInstrCheckLongLitNocase> {
public:
std::string literal;
RoseInstrCheckLongLitNocase(std::string literal_in)
: literal(std::move(literal_in)) {
upperString(literal);
}
bool operator==(const RoseInstrCheckLongLitNocase &ri) const {
return literal == ri.literal;
}
size_t hash() const override {
return hash_all(static_cast<int>(opcode), literal);
}
void write(void *dest, RoseEngineBlob &blob,
const OffsetMap &offset_map) const override;
bool equiv_to(const RoseInstrCheckLongLitNocase &ri, const OffsetMap &,
const OffsetMap &) const {
return literal == ri.literal;
}
};
class RoseInstrEnd
: public RoseInstrBaseTrivial<ROSE_INSTR_END, ROSE_STRUCT_END,
RoseInstrEnd> {

View File

@@ -610,6 +610,24 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
PROGRAM_CASE(MATCHER_EOD) {}
PROGRAM_NEXT_INSTRUCTION
PROGRAM_CASE(CHECK_LONG_LIT) {
os << " lit_offset " << ri->lit_offset << endl;
os << " lit_length " << ri->lit_length << endl;
const char *lit = (const char *)t + ri->lit_offset;
os << " literal: \""
<< escapeString(string(lit, ri->lit_length)) << "\"" << endl;
}
PROGRAM_NEXT_INSTRUCTION
PROGRAM_CASE(CHECK_LONG_LIT_NOCASE) {
os << " lit_offset " << ri->lit_offset << endl;
os << " lit_length " << ri->lit_length << endl;
const char *lit = (const char *)t + ri->lit_offset;
os << " literal: \""
<< escapeString(string(lit, ri->lit_length)) << "\"" << endl;
}
PROGRAM_NEXT_INSTRUCTION
default:
os << " UNKNOWN (code " << int{code} << ")" << endl;
os << " <stopping>" << endl;
@@ -1031,6 +1049,32 @@ void dumpAnchoredStats(const void *atable, FILE *f) {
}
static
void dumpLongLiteralTable(const RoseEngine *t, FILE *f) {
if (!t->longLitTableOffset) {
return;
}
fprintf(f, "\n");
fprintf(f, "Long literal table (streaming):\n");
const auto *ll_table =
(const struct RoseLongLitTable *)loadFromByteCodeOffset(
t, t->longLitTableOffset);
u32 num_caseful = ll_table->boundaryCase;
u32 num_caseless = ll_table->boundaryNocase - num_caseful;
fprintf(f, " longest len: %u\n", ll_table->maxLen);
fprintf(f, " counts: %u caseful, %u caseless\n", num_caseful,
num_caseless);
fprintf(f, " hash bits: %u caseful, %u caseless\n",
ll_table->hashNBitsCase, ll_table->hashNBitsNocase);
fprintf(f, " state bits: %u caseful, %u caseless\n",
ll_table->streamStateBitsCase, ll_table->streamStateBitsNocase);
fprintf(f, " stream state: %u bytes\n", ll_table->streamStateBytes);
}
// Externally accessible functions
void roseDumpText(const RoseEngine *t, FILE *f) {
@@ -1106,7 +1150,7 @@ void roseDumpText(const RoseEngine *t, FILE *f) {
fprintf(f, " - history buffer : %u bytes\n", t->historyRequired);
fprintf(f, " - exhaustion vector : %u bytes\n", (t->ekeyCount + 7) / 8);
fprintf(f, " - role state mmbit : %u bytes\n", t->stateSize);
fprintf(f, " - floating matcher : %u bytes\n", t->floatingStreamState);
fprintf(f, " - long lit matcher : %u bytes\n", t->longLitStreamState);
fprintf(f, " - active array : %u bytes\n",
mmbit_size(t->activeArrayCount));
fprintf(f, " - active rose : %u bytes\n",
@@ -1160,6 +1204,8 @@ void roseDumpText(const RoseEngine *t, FILE *f) {
fprintf(f, "\nSmall-block literal matcher stats:\n\n");
hwlmPrintStats(sbtable, f);
}
dumpLongLiteralTable(t, f);
}
#define DUMP_U8(o, member) \
@@ -1196,6 +1242,7 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
DUMP_U32(t, ematcherOffset);
DUMP_U32(t, fmatcherOffset);
DUMP_U32(t, sbmatcherOffset);
DUMP_U32(t, longLitTableOffset);
DUMP_U32(t, amatcherMinWidth);
DUMP_U32(t, fmatcherMinWidth);
DUMP_U32(t, eodmatcherMinWidth);
@@ -1245,7 +1292,7 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
DUMP_U32(t, stateOffsets.anchorState);
DUMP_U32(t, stateOffsets.groups);
DUMP_U32(t, stateOffsets.groups_size);
DUMP_U32(t, stateOffsets.floatingMatcherState);
DUMP_U32(t, stateOffsets.longLitState);
DUMP_U32(t, stateOffsets.somLocation);
DUMP_U32(t, stateOffsets.somValid);
DUMP_U32(t, stateOffsets.somWritable);
@@ -1264,7 +1311,7 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
DUMP_U32(t, ematcherRegionSize);
DUMP_U32(t, somRevCount);
DUMP_U32(t, somRevOffsetOffset);
DUMP_U32(t, floatingStreamState);
DUMP_U32(t, longLitStreamState);
fprintf(f, "}\n");
fprintf(f, "sizeof(RoseEngine) = %zu\n", sizeof(RoseEngine));
}

View File

@@ -217,8 +217,8 @@ struct RoseStateOffsets {
/** Size of packed Rose groups value, in bytes. */
u32 groups_size;
/** State for floating literal matcher (managed by HWLM). */
u32 floatingMatcherState;
/** State for long literal support. */
u32 longLitState;
/** Packed SOM location slots. */
u32 somLocation;
@@ -325,6 +325,7 @@ struct RoseEngine {
u32 ematcherOffset; // offset of the eod-anchored literal matcher (bytes)
u32 fmatcherOffset; // offset of the floating literal matcher (bytes)
u32 sbmatcherOffset; // offset of the small-block literal matcher (bytes)
u32 longLitTableOffset; // offset of the long literal table
u32 amatcherMinWidth; /**< minimum number of bytes required for a pattern
* involved with the anchored table to produce a full
* match. */
@@ -434,7 +435,7 @@ struct RoseEngine {
u32 ematcherRegionSize; /* max region size to pass to ematcher */
u32 somRevCount; /**< number of som reverse nfas */
u32 somRevOffsetOffset; /**< offset to array of offsets to som rev nfas */
u32 floatingStreamState; // size in bytes
u32 longLitStreamState; // size in bytes
struct scatter_full_plan state_init;
};
@@ -445,6 +446,94 @@ struct ALIGN_CL_DIRECTIVE anchored_matcher_info {
u32 anchoredMinDistance; /* start of region to run anchored table over */
};
/**
* \brief Long literal table header.
*/
struct RoseLongLitTable {
/** \brief String ID one beyond the maximum entry for caseful literals. */
u32 boundaryCase;
/**
* \brief String ID one beyond the maximum entry for caseless literals.
* This is also the total size of the literal table.
*/
u32 boundaryNocase;
/**
* \brief Offset of the caseful hash table (relative to RoseLongLitTable
* base).
*
* Offset is zero if no such table exists.
*/
u32 hashOffsetCase;
/**
* \brief Offset of the caseless hash table (relative to RoseLongLitTable
* base).
*
* Offset is zero if no such table exists.
*/
u32 hashOffsetNocase;
/** \brief lg2 of the size of the caseful hash table. */
u32 hashNBitsCase;
/** \brief lg2 of the size of the caseless hash table. */
u32 hashNBitsNocase;
/**
* \brief Number of bits of packed stream state for the caseful hash table.
*/
u8 streamStateBitsCase;
/**
* \brief Number of bits of packed stream state for the caseless hash
* table.
*/
u8 streamStateBitsNocase;
/** \brief Total size of packed stream state in bytes. */
u8 streamStateBytes;
/** \brief Max length of literal prefixes. */
u8 maxLen;
};
/**
* \brief One of these structures per literal entry in our long literal table.
*/
struct RoseLongLiteral {
/**
* \brief Offset of the literal string itself, relative to
* RoseLongLitTable base.
*/
u32 offset;
};
/** \brief "No further links" value used for \ref RoseLongLitHashEntry::link. */
#define LINK_INVALID 0xffffffff
/**
* \brief One of these structures per hash table entry in our long literal
* table.
*/
struct RoseLongLitHashEntry {
/**
* \brief Bitfield used as a quick guard for hash buckets.
*
* For a given hash value N, the low six bits of N are taken and the
* corresponding bit is switched on in this bitfield if this bucket is used
* for that hash.
*/
u64a bitfield;
/** \brief Offset in the literal table for this string. */
u32 state;
/** \brief Hash table index of next entry in the chain for this bucket. */
u32 link;
};
static really_inline
const struct anchored_matcher_info *getALiteralMatcher(
const struct RoseEngine *t) {

View File

@@ -117,7 +117,19 @@ enum RoseInstructionCode {
/** \brief Run the EOD-anchored HWLM literal matcher. */
ROSE_INSTR_MATCHER_EOD,
LAST_ROSE_INSTRUCTION = ROSE_INSTR_MATCHER_EOD //!< Sentinel.
/**
* \brief Confirm a case-sensitive literal at the current offset. In
* streaming mode, this makes use of the long literal table.
*/
ROSE_INSTR_CHECK_LONG_LIT,
/**
* \brief Confirm a case-insensitive literal at the current offset. In
* streaming mode, this makes use of the long literal table.
*/
ROSE_INSTR_CHECK_LONG_LIT_NOCASE,
LAST_ROSE_INSTRUCTION = ROSE_INSTR_CHECK_LONG_LIT_NOCASE //!< Sentinel.
};
struct ROSE_STRUCT_END {
@@ -465,4 +477,18 @@ struct ROSE_STRUCT_MATCHER_EOD {
u8 code; //!< From enum RoseInstructionCode.
};
/** Note: check failure will halt program. */
struct ROSE_STRUCT_CHECK_LONG_LIT {
u8 code; //!< From enum RoseInstructionCode.
u32 lit_offset; //!< Offset of literal string.
u32 lit_length; //!< Length of literal string.
};
/** Note: check failure will halt program. */
struct ROSE_STRUCT_CHECK_LONG_LIT_NOCASE {
u8 code; //!< From enum RoseInstructionCode.
u32 lit_offset; //!< Offset of literal string.
u32 lit_length; //!< Length of literal string.
};
#endif // ROSE_ROSE_PROGRAM_H

View File

@@ -97,8 +97,8 @@ void storeGroups(const struct RoseEngine *t, char *state, rose_group groups) {
}
static really_inline
u8 *getFloatingMatcherState(const struct RoseEngine *t, char *state) {
return (u8 *)(state + t->stateOffsets.floatingMatcherState);
u8 *getLongLitState(const struct RoseEngine *t, char *state) {
return (u8 *)(state + t->stateOffsets.longLitState);
}
static really_inline

View File

@@ -33,6 +33,8 @@
#include "miracle.h"
#include "program_runtime.h"
#include "rose.h"
#include "rose_internal.h"
#include "stream_long_lit.h"
#include "hwlm/hwlm.h"
#include "nfa/mcclellan.h"
#include "nfa/nfa_api.h"
@@ -406,6 +408,7 @@ void ensureStreamNeatAndTidy(const struct RoseEngine *t, char *state,
roseFlushLastByteHistory(t, scratch, offset + length);
tctxt->lastEndOffset = offset + length;
storeGroups(t, state, tctxt->groups);
storeLongLiteralState(t, state, scratch);
}
static really_inline
@@ -588,11 +591,17 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) {
}
size_t hlength = scratch->core_info.hlen;
char rebuild = 0;
if (hlength) {
// Can only have long literal state or rebuild if this is not the
// first write to this stream.
loadLongLiteralState(t, state, scratch);
rebuild = (scratch->core_info.status & STATUS_DELAY_DIRTY) &&
(t->maxFloatingDelayedMatch == ROSE_BOUND_INF ||
offset < t->maxFloatingDelayedMatch);
}
char rebuild = hlength &&
(scratch->core_info.status & STATUS_DELAY_DIRTY) &&
(t->maxFloatingDelayedMatch == ROSE_BOUND_INF ||
offset < t->maxFloatingDelayedMatch);
DEBUG_PRINTF("**rebuild %hhd status %hhu mfdm %u, offset %llu\n",
rebuild, scratch->core_info.status,
t->maxFloatingDelayedMatch, offset);
@@ -621,17 +630,9 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) {
}
DEBUG_PRINTF("start=%zu\n", start);
u8 *stream_state;
if (t->floatingStreamState) {
stream_state = getFloatingMatcherState(t, state);
} else {
stream_state = NULL;
}
DEBUG_PRINTF("BEGIN FLOATING (over %zu/%zu)\n", flen, length);
hwlmExecStreaming(ftable, scratch, flen, start, roseFloatingCallback,
scratch, tctxt->groups & t->floating_group_mask,
stream_state);
scratch, tctxt->groups & t->floating_group_mask);
}
flush_delay_and_exit:

434
src/rose/stream_long_lit.h Normal file
View File

@@ -0,0 +1,434 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef STREAM_LONG_LIT_H
#define STREAM_LONG_LIT_H
#include "rose.h"
#include "rose_common.h"
#include "rose_internal.h"
#include "stream_long_lit_hash.h"
#include "util/copybytes.h"
static really_inline
const struct RoseLongLiteral *
getLitTab(const struct RoseLongLitTable *ll_table) {
return (const struct RoseLongLiteral *)((const char *)ll_table +
ROUNDUP_16(sizeof(struct RoseLongLitTable)));
}
static really_inline
u32 get_start_lit_idx(const struct RoseLongLitTable *ll_table,
const char nocase) {
return nocase ? ll_table->boundaryCase : 0;
}
static really_inline
u32 get_end_lit_idx(const struct RoseLongLitTable *ll_table,
const char nocase) {
return nocase ? ll_table->boundaryNocase : ll_table->boundaryCase;
}
// search for the literal index that contains the current state
static rose_inline
u32 findLitTabEntry(const struct RoseLongLitTable *ll_table,
u32 stateValue, const char nocase) {
const struct RoseLongLiteral *litTab = getLitTab(ll_table);
u32 lo = get_start_lit_idx(ll_table, nocase);
u32 hi = get_end_lit_idx(ll_table, nocase);
// Now move stateValue back by one so that we're looking for the
// litTab entry that includes it the string, not the one 'one past' it
stateValue -= 1;
assert(lo != hi);
assert(litTab[lo].offset <= stateValue);
assert(litTab[hi].offset > stateValue);
// binary search to find the entry e such that:
// litTab[e].offsetToLiteral <= stateValue < litTab[e+1].offsetToLiteral
while (lo + 1 < hi) {
u32 mid = (lo + hi) / 2;
if (litTab[mid].offset <= stateValue) {
lo = mid;
} else { // (litTab[mid].offset > stateValue) {
hi = mid;
}
}
assert(litTab[lo].offset <= stateValue);
assert(litTab[hi].offset > stateValue);
return lo;
}
// Reads from stream state and unpacks values into stream state table.
static really_inline
void loadLongLitStreamState(const struct RoseLongLitTable *ll_table,
const u8 *ll_state, u32 *state_case,
u32 *state_nocase) {
assert(ll_table);
assert(ll_state);
assert(state_case && state_nocase);
u8 ss_bytes = ll_table->streamStateBytes;
u8 ssb = ll_table->streamStateBitsCase;
UNUSED u8 ssb_nc = ll_table->streamStateBitsNocase;
assert(ss_bytes == (ssb + ssb_nc + 7) / 8);
#if defined(ARCH_32_BIT)
// On 32-bit hosts, we may be able to avoid having to do any u64a
// manipulation at all.
if (ss_bytes <= 4) {
u32 ssb_mask = (1U << ssb) - 1;
u32 streamVal = partial_load_u32(ll_state, ss_bytes);
*state_case = (u32)(streamVal & ssb_mask);
*state_nocase = (u32)(streamVal >> ssb);
return;
}
#endif
u64a ssb_mask = (1ULL << ssb) - 1;
u64a streamVal = partial_load_u64a(ll_state, ss_bytes);
*state_case = (u32)(streamVal & ssb_mask);
*state_nocase = (u32)(streamVal >> ssb);
}
static really_inline
u32 getBaseOffsetOfLits(const struct RoseLongLitTable *ll_table,
const char nocase) {
u32 lit_idx = get_start_lit_idx(ll_table, nocase);
return getLitTab(ll_table)[lit_idx].offset;
}
static really_inline
u32 unpackStateVal(const struct RoseLongLitTable *ll_table, const char nocase,
u32 v) {
return v + getBaseOffsetOfLits(ll_table, nocase) - 1;
}
static really_inline
u32 packStateVal(const struct RoseLongLitTable *ll_table, const char nocase,
u32 v) {
return v - getBaseOffsetOfLits(ll_table, nocase) + 1;
}
static rose_inline
void loadLongLiteralStateMode(struct hs_scratch *scratch,
const struct RoseLongLitTable *ll_table,
const struct RoseLongLiteral *litTab,
const u32 state, const char nocase) {
if (!state) {
DEBUG_PRINTF("no state for %s\n", nocase ? "caseless" : "caseful");
return;
}
u32 stateValue = unpackStateVal(ll_table, nocase, state);
u32 idx = findLitTabEntry(ll_table, stateValue, nocase);
size_t found_offset = litTab[idx].offset;
const u8 *found_buf = found_offset + (const u8 *)ll_table;
size_t found_sz = stateValue - found_offset;
struct RoseContext *tctxt = &scratch->tctxt;
if (nocase) {
tctxt->ll_buf_nocase = found_buf;
tctxt->ll_len_nocase = found_sz;
} else {
tctxt->ll_buf = found_buf;
tctxt->ll_len = found_sz;
}
}
static rose_inline
void loadLongLiteralState(const struct RoseEngine *t, char *state,
struct hs_scratch *scratch) {
if (!t->longLitTableOffset) {
return;
}
scratch->tctxt.ll_buf = scratch->core_info.hbuf;
scratch->tctxt.ll_len = scratch->core_info.hlen;
scratch->tctxt.ll_buf_nocase = scratch->core_info.hbuf;
scratch->tctxt.ll_len_nocase = scratch->core_info.hlen;
const struct RoseLongLitTable *ll_table =
getByOffset(t, t->longLitTableOffset);
const struct RoseLongLiteral *litTab = getLitTab(ll_table);
const u8 *ll_state = getLongLitState(t, state);
u32 state_case;
u32 state_nocase;
loadLongLitStreamState(ll_table, ll_state, &state_case, &state_nocase);
loadLongLiteralStateMode(scratch, ll_table, litTab, state_case, 0);
loadLongLiteralStateMode(scratch, ll_table, litTab, state_nocase, 1);
}
static rose_inline
char confirmLongLiteral(const struct RoseLongLitTable *ll_table,
const hs_scratch_t *scratch, u32 hashState,
const char nocase) {
const struct RoseLongLiteral *litTab = getLitTab(ll_table);
u32 idx = findLitTabEntry(ll_table, hashState, nocase);
size_t found_offset = litTab[idx].offset;
const u8 *s = found_offset + (const u8 *)ll_table;
assert(hashState > found_offset);
size_t len = hashState - found_offset;
const u8 *buf = scratch->core_info.buf;
const size_t buf_len = scratch->core_info.len;
if (len > buf_len) {
const struct RoseContext *tctxt = &scratch->tctxt;
const u8 *hist = nocase ? tctxt->ll_buf_nocase : tctxt->ll_buf;
size_t hist_len = nocase ? tctxt->ll_len_nocase : tctxt->ll_len;
if (len > buf_len + hist_len) {
return 0; // Break out - not enough total history
}
size_t overhang = len - buf_len;
assert(overhang <= hist_len);
if (cmpForward(hist + hist_len - overhang, s, overhang, nocase)) {
return 0;
}
s += overhang;
len -= overhang;
}
// if we got here, we don't need history or we compared ok out of history
assert(len <= buf_len);
if (cmpForward(buf + buf_len - len, s, len, nocase)) {
return 0;
}
DEBUG_PRINTF("confirmed hashState=%u\n", hashState);
return 1;
}
static rose_inline
void calcStreamingHash(const struct core_info *ci,
const struct RoseLongLitTable *ll_table, u8 hash_len,
u32 *hash_case, u32 *hash_nocase) {
assert(hash_len >= LONG_LIT_HASH_LEN);
// Our hash function operates over LONG_LIT_HASH_LEN bytes, starting from
// location (end of buffer - hash_len). If this block can be satisfied
// entirely from either the current buffer or the history buffer, we pass
// in the pointer directly; otherwise we must make a copy.
u8 tempbuf[LONG_LIT_HASH_LEN];
const u8 *base;
if (hash_len > ci->len) {
size_t overhang = hash_len - ci->len;
if (overhang >= LONG_LIT_HASH_LEN) {
// Can read enough to hash from inside the history buffer.
assert(overhang <= ci->hlen);
base = ci->hbuf + ci->hlen - overhang;
} else {
// Copy: first chunk from history buffer.
assert(overhang <= ci->hlen);
copy_upto_32_bytes(tempbuf, ci->hbuf + ci->hlen - overhang,
overhang);
// Copy: second chunk from current buffer.
size_t copy_buf_len = LONG_LIT_HASH_LEN - overhang;
assert(copy_buf_len <= ci->len);
copy_upto_32_bytes(tempbuf + overhang, ci->buf, copy_buf_len);
// Read from our temporary buffer for the hash.
base = tempbuf;
}
} else {
// Can read enough to hash from inside the current buffer.
base = ci->buf + ci->len - hash_len;
}
if (ll_table->hashNBitsCase) {
*hash_case = hashLongLiteral(base, LONG_LIT_HASH_LEN, 0);
DEBUG_PRINTF("caseful hash %u\n", *hash_case);
}
if (ll_table->hashNBitsNocase) {
*hash_nocase = hashLongLiteral(base, LONG_LIT_HASH_LEN, 1);
DEBUG_PRINTF("caseless hash %u\n", *hash_nocase);
}
}
static really_inline
const struct RoseLongLitHashEntry *
getHashTableBase(const struct RoseLongLitTable *ll_table, const char nocase) {
const u32 hashOffset = nocase ? ll_table->hashOffsetNocase
: ll_table->hashOffsetCase;
return (const struct RoseLongLitHashEntry *)((const char *)ll_table +
hashOffset);
}
static rose_inline
const struct RoseLongLitHashEntry *
getLongLitHashEnt(const struct RoseLongLitTable *ll_table, u32 h,
const char nocase) {
u32 nbits = nocase ? ll_table->hashNBitsNocase : ll_table->hashNBitsCase;
if (!nbits) {
return NULL;
}
u32 h_ent = h & ((1 << nbits) - 1);
u32 h_low = (h >> nbits) & 63;
const struct RoseLongLitHashEntry *tab = getHashTableBase(ll_table, nocase);
const struct RoseLongLitHashEntry *ent = tab + h_ent;
if (!((ent->bitfield >> h_low) & 0x1)) {
return NULL;
}
return ent;
}
static rose_inline
u32 storeLongLiteralStateMode(const struct hs_scratch *scratch,
const struct RoseLongLitTable *ll_table,
const struct RoseLongLitHashEntry *ent,
const char nocase) {
assert(ent);
assert(nocase ? ll_table->hashNBitsNocase : ll_table->hashNBitsCase);
const struct RoseLongLitHashEntry *tab = getHashTableBase(ll_table, nocase);
u32 packed_state = 0;
while (1) {
if (confirmLongLiteral(ll_table, scratch, ent->state, nocase)) {
packed_state = packStateVal(ll_table, nocase, ent->state);
DEBUG_PRINTF("set %s state to %u\n", nocase ? "nocase" : "case",
packed_state);
break;
}
if (ent->link == LINK_INVALID) {
break;
}
ent = tab + ent->link;
}
return packed_state;
}
#ifndef NDEBUG
// Defensive checking (used in assert) that these table values don't overflow
// the range available.
static really_inline
char streamingTableOverflow(u32 state_case, u32 state_nocase, u8 ssb,
u8 ssb_nc) {
u32 ssb_mask = (1ULL << (ssb)) - 1;
if (state_case & ~ssb_mask) {
return 1;
}
u32 ssb_nc_mask = (1ULL << (ssb_nc)) - 1;
if (state_nocase & ~ssb_nc_mask) {
return 1;
}
return 0;
}
#endif
// Reads from stream state table and packs values into stream state.
static rose_inline
void storeLongLitStreamState(const struct RoseLongLitTable *ll_table,
u8 *ll_state, u32 state_case, u32 state_nocase) {
assert(ll_table);
assert(ll_state);
u8 ss_bytes = ll_table->streamStateBytes;
u8 ssb = ll_table->streamStateBitsCase;
UNUSED u8 ssb_nc = ll_table->streamStateBitsNocase;
assert(ss_bytes == ROUNDUP_N(ssb + ssb_nc, 8) / 8);
assert(!streamingTableOverflow(state_case, state_nocase, ssb, ssb_nc));
#if defined(ARCH_32_BIT)
// On 32-bit hosts, we may be able to avoid having to do any u64a
// manipulation at all.
if (ss_bytes <= 4) {
u32 stagingStreamState = state_case;
stagingStreamState |= (state_nocase << ssb);
partial_store_u32(ll_state, stagingStreamState, ss_bytes);
return;
}
#endif
u64a stagingStreamState = (u64a)state_case;
stagingStreamState |= (u64a)state_nocase << ssb;
partial_store_u64a(ll_state, stagingStreamState, ss_bytes);
}
static rose_inline
void storeLongLiteralState(const struct RoseEngine *t, char *state,
struct hs_scratch *scratch) {
if (!t->longLitTableOffset) {
DEBUG_PRINTF("no table\n");
return;
}
struct core_info *ci = &scratch->core_info;
const struct RoseLongLitTable *ll_table =
getByOffset(t, t->longLitTableOffset);
assert(ll_table->maxLen);
DEBUG_PRINTF("maxLen=%u, len=%zu, hlen=%zu\n", ll_table->maxLen, ci->len,
ci->hlen);
u32 state_case = 0;
u32 state_nocase = 0;
// If we don't have enough history, we don't need to do anything.
if (ll_table->maxLen <= ci->len + ci->hlen) {
u32 hash_case = 0;
u32 hash_nocase = 0;
calcStreamingHash(ci, ll_table, ll_table->maxLen, &hash_case,
&hash_nocase);
const struct RoseLongLitHashEntry *ent_case =
getLongLitHashEnt(ll_table, hash_case, 0);
const struct RoseLongLitHashEntry *ent_nocase =
getLongLitHashEnt(ll_table, hash_nocase, 1);
DEBUG_PRINTF("ent_caseful=%p, ent_caseless=%p\n", ent_case, ent_nocase);
if (ent_case) {
state_case = storeLongLiteralStateMode(scratch, ll_table,
ent_case, 0);
}
if (ent_nocase) {
state_nocase = storeLongLiteralStateMode(scratch, ll_table,
ent_nocase, 1);
}
}
DEBUG_PRINTF("store {%u, %u}\n", state_case, state_nocase);
u8 *ll_state = getLongLitState(t, state);
storeLongLitStreamState(ll_table, ll_state, state_case, state_nocase);
}
#endif // STREAM_LONG_LIT_H

View File

@@ -0,0 +1,65 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef STREAM_LONG_LIT_HASH_H
#define STREAM_LONG_LIT_HASH_H
#include "ue2common.h"
#include "util/unaligned.h"
/** \brief Length of the buffer operated on by \ref hashLongLiteral(). */
#define LONG_LIT_HASH_LEN 24
/** \brief Hash function used for long literal table in streaming mode. */
static really_inline
u32 hashLongLiteral(const u8 *ptr, UNUSED size_t len, char nocase) {
const u64a CASEMASK = 0xdfdfdfdfdfdfdfdfULL;
const u64a MULTIPLIER = 0x0b4e0ef37bc32127ULL;
// We unconditionally hash LONG_LIT_HASH_LEN bytes; all use cases of this
// hash are for strings longer than this.
assert(len >= 24);
u64a v1 = unaligned_load_u64a(ptr);
u64a v2 = unaligned_load_u64a(ptr + 8);
u64a v3 = unaligned_load_u64a(ptr + 16);
if (nocase) {
v1 &= CASEMASK;
v2 &= CASEMASK;
v3 &= CASEMASK;
}
v1 *= MULTIPLIER;
v2 *= MULTIPLIER * MULTIPLIER;
v3 *= MULTIPLIER * MULTIPLIER * MULTIPLIER;
v1 >>= 32;
v2 >>= 32;
v3 >>= 32;
return v1 ^ v2 ^ v3;
}
#endif // STREAM_LONG_LIT_HASH_H