rose/hwlm: limit literals to eight bytes

Rework HWLM to work over literals of eight bytes ("medium length"),
doing confirm in the Rose interpreter.
This commit is contained in:
Justin Viiret 2016-11-28 16:46:03 +11:00 committed by Matthew Barr
parent 5c9c540424
commit 07a6b6510c
19 changed files with 452 additions and 348 deletions

View File

@ -545,35 +545,12 @@ FDRCompiler::build(pair<aligned_unique_ptr<u8>, size_t> &link) {
} // namespace } // namespace
static static
size_t maxMaskLen(const vector<hwlmLiteral> &lits) { aligned_unique_ptr<FDR> fdrBuildTableInternal(const vector<hwlmLiteral> &lits,
size_t rv = 0; bool make_small,
for (const auto &lit : lits) { const target_t &target,
rv = max(rv, lit.msk.size()); const Grey &grey, u32 hint) {
}
return rv;
}
static
void setHistoryRequired(hwlmStreamingControl &stream_ctl,
const vector<hwlmLiteral> &lits) {
size_t max_mask_len = maxMaskLen(lits);
// we want enough history to manage the longest literal and the longest
// mask.
stream_ctl.literal_history_required = max(maxLen(lits), max_mask_len) - 1;
}
static
aligned_unique_ptr<FDR>
fdrBuildTableInternal(const vector<hwlmLiteral> &lits, bool make_small,
const target_t &target, const Grey &grey, u32 hint,
hwlmStreamingControl *stream_control) {
pair<aligned_unique_ptr<u8>, size_t> link(nullptr, 0); pair<aligned_unique_ptr<u8>, size_t> link(nullptr, 0);
if (stream_control) {
setHistoryRequired(*stream_control, lits);
}
DEBUG_PRINTF("cpu has %s\n", target.has_avx2() ? "avx2" : "no-avx2"); DEBUG_PRINTF("cpu has %s\n", target.has_avx2() ? "avx2" : "no-avx2");
if (grey.fdrAllowTeddy) { if (grey.fdrAllowTeddy) {
@ -606,21 +583,18 @@ fdrBuildTableInternal(const vector<hwlmLiteral> &lits, bool make_small,
aligned_unique_ptr<FDR> fdrBuildTable(const vector<hwlmLiteral> &lits, aligned_unique_ptr<FDR> fdrBuildTable(const vector<hwlmLiteral> &lits,
bool make_small, const target_t &target, bool make_small, const target_t &target,
const Grey &grey, const Grey &grey) {
hwlmStreamingControl *stream_control) { return fdrBuildTableInternal(lits, make_small, target, grey, HINT_INVALID);
return fdrBuildTableInternal(lits, make_small, target, grey, HINT_INVALID,
stream_control);
} }
#if !defined(RELEASE_BUILD) #if !defined(RELEASE_BUILD)
aligned_unique_ptr<FDR> aligned_unique_ptr<FDR> fdrBuildTableHinted(const vector<hwlmLiteral> &lits,
fdrBuildTableHinted(const vector<hwlmLiteral> &lits, bool make_small, u32 hint, bool make_small, u32 hint,
const target_t &target, const Grey &grey, const target_t &target,
hwlmStreamingControl *stream_control) { const Grey &grey) {
pair<u8 *, size_t> link(nullptr, 0); pair<u8 *, size_t> link(nullptr, 0);
return fdrBuildTableInternal(lits, make_small, target, grey, hint, return fdrBuildTableInternal(lits, make_small, target, grey, hint);
stream_control);
} }
#endif #endif

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2016, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -43,21 +43,18 @@ struct FDR;
namespace ue2 { namespace ue2 {
struct hwlmLiteral; struct hwlmLiteral;
struct hwlmStreamingControl;
struct Grey; struct Grey;
struct target_t; struct target_t;
ue2::aligned_unique_ptr<FDR> ue2::aligned_unique_ptr<FDR>
fdrBuildTable(const std::vector<hwlmLiteral> &lits, bool make_small, fdrBuildTable(const std::vector<hwlmLiteral> &lits, bool make_small,
const target_t &target, const Grey &grey, const target_t &target, const Grey &grey);
hwlmStreamingControl *stream_control = nullptr);
#if !defined(RELEASE_BUILD) #if !defined(RELEASE_BUILD)
ue2::aligned_unique_ptr<FDR> ue2::aligned_unique_ptr<FDR>
fdrBuildTableHinted(const std::vector<hwlmLiteral> &lits, bool make_small, fdrBuildTableHinted(const std::vector<hwlmLiteral> &lits, bool make_small,
u32 hint, const target_t &target, const Grey &grey, u32 hint, const target_t &target, const Grey &grey);
hwlmStreamingControl *stream_control = nullptr);
#endif #endif

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2016, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -33,6 +33,7 @@
#include "hwlm.h" #include "hwlm.h"
#include "hwlm_build.h" #include "hwlm_build.h"
#include "hwlm_internal.h" #include "hwlm_internal.h"
#include "hwlm_literal.h"
#include "noodle_engine.h" #include "noodle_engine.h"
#include "noodle_build.h" #include "noodle_build.h"
#include "scratch.h" #include "scratch.h"
@ -512,7 +513,6 @@ bool everyoneHasGroups(const vector<hwlmLiteral> &lits) {
static static
bool isNoodleable(const vector<hwlmLiteral> &lits, bool isNoodleable(const vector<hwlmLiteral> &lits,
const hwlmStreamingControl *stream_control,
const CompileContext &cc) { const CompileContext &cc) {
if (!cc.grey.allowNoodle) { if (!cc.grey.allowNoodle) {
return false; return false;
@ -523,19 +523,6 @@ bool isNoodleable(const vector<hwlmLiteral> &lits,
return false; return false;
} }
if (stream_control) { // nullptr if in block mode
if (lits.front().s.length() > stream_control->history_max + 1) {
DEBUG_PRINTF("length of %zu too long for history max %zu\n",
lits.front().s.length(),
stream_control->history_max);
return false;
}
if (2 * lits.front().s.length() - 2 > FDR_TEMP_BUF_SIZE) {
assert(0);
return false;
}
}
if (!lits.front().msk.empty()) { if (!lits.front().msk.empty()) {
DEBUG_PRINTF("noodle can't handle supplementary masks\n"); DEBUG_PRINTF("noodle can't handle supplementary masks\n");
return false; return false;
@ -545,22 +532,11 @@ bool isNoodleable(const vector<hwlmLiteral> &lits,
} }
aligned_unique_ptr<HWLM> hwlmBuild(const vector<hwlmLiteral> &lits, aligned_unique_ptr<HWLM> hwlmBuild(const vector<hwlmLiteral> &lits,
hwlmStreamingControl *stream_control,
bool make_small, const CompileContext &cc, bool make_small, const CompileContext &cc,
hwlm_group_t expected_groups) { hwlm_group_t expected_groups) {
assert(!lits.empty()); assert(!lits.empty());
dumpLits(lits); dumpLits(lits);
if (stream_control) {
assert(stream_control->history_min <= stream_control->history_max);
// We should not have been passed any literals that are too long to
// match with a maximally-sized history buffer.
assert(all_of(begin(lits), end(lits), [&](const hwlmLiteral &lit) {
return lit.s.length() <= stream_control->history_max + 1;
}));
}
// Check that we haven't exceeded the maximum number of literals. // Check that we haven't exceeded the maximum number of literals.
if (lits.size() > cc.grey.limitLiteralCount) { if (lits.size() > cc.grey.limitLiteralCount) {
throw ResourceLimitError(); throw ResourceLimitError();
@ -595,7 +571,7 @@ aligned_unique_ptr<HWLM> hwlmBuild(const vector<hwlmLiteral> &lits,
assert(everyoneHasGroups(lits)); assert(everyoneHasGroups(lits));
if (isNoodleable(lits, stream_control, cc)) { if (isNoodleable(lits, cc)) {
DEBUG_PRINTF("build noodle table\n"); DEBUG_PRINTF("build noodle table\n");
engType = HWLM_ENGINE_NOOD; engType = HWLM_ENGINE_NOOD;
const hwlmLiteral &lit = lits.front(); const hwlmLiteral &lit = lits.front();
@ -603,19 +579,11 @@ aligned_unique_ptr<HWLM> hwlmBuild(const vector<hwlmLiteral> &lits,
if (noodle) { if (noodle) {
engSize = noodSize(noodle.get()); engSize = noodSize(noodle.get());
} }
if (stream_control) {
// For now, a single literal still goes to noodle and asks
// for a great big history
stream_control->literal_history_required = lit.s.length() - 1;
assert(stream_control->literal_history_required
<= stream_control->history_max);
}
eng = move(noodle); eng = move(noodle);
} else { } else {
DEBUG_PRINTF("building a new deal\n"); DEBUG_PRINTF("building a new deal\n");
engType = HWLM_ENGINE_FDR; engType = HWLM_ENGINE_FDR;
auto fdr = fdrBuildTable(lits, make_small, cc.target_info, cc.grey, auto fdr = fdrBuildTable(lits, make_small, cc.target_info, cc.grey);
stream_control);
if (fdr) { if (fdr) {
engSize = fdrSize(fdr.get()); engSize = fdrSize(fdr.get());
} }
@ -640,14 +608,6 @@ aligned_unique_ptr<HWLM> hwlmBuild(const vector<hwlmLiteral> &lits,
buildForwardAccel(h.get(), lits, expected_groups); buildForwardAccel(h.get(), lits, expected_groups);
} }
if (stream_control) {
DEBUG_PRINTF("requires %zu (of max %zu) bytes of history\n",
stream_control->literal_history_required,
stream_control->history_max);
assert(stream_control->literal_history_required
<= stream_control->history_max);
}
return h; return h;
} }

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2016, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -34,7 +34,6 @@
#define HWLM_BUILD_H #define HWLM_BUILD_H
#include "hwlm.h" #include "hwlm.h"
#include "hwlm_literal.h"
#include "ue2common.h" #include "ue2common.h"
#include "util/alloc.h" #include "util/alloc.h"
@ -47,30 +46,12 @@ namespace ue2 {
struct CompileContext; struct CompileContext;
struct Grey; struct Grey;
struct target_t; struct hwlmLiteral;
/** \brief Structure gathering together the input/output parameters related to
* streaming mode operation. */
struct hwlmStreamingControl {
/** \brief IN parameter: Upper limit on the amount of history that can be
* requested. */
size_t history_max;
/** \brief IN parameter: History already known to be used before literal
* analysis. */
size_t history_min;
/** \brief OUT parameter: History required by the literal matcher to
* correctly match all literals. */
size_t literal_history_required;
};
/** \brief Build an \ref HWLM literal matcher runtime structure for a group of /** \brief Build an \ref HWLM literal matcher runtime structure for a group of
* literals. * literals.
* *
* \param lits The group of literals. * \param lits The group of literals.
* \param stream_control Streaming control parameters. If the matcher will
* operate in non-streaming (block) mode, this pointer should be NULL.
* \param make_small Optimise matcher for small size. * \param make_small Optimise matcher for small size.
* \param cc Compile context. * \param cc Compile context.
* \param expected_groups FIXME: document me! * \param expected_groups FIXME: document me!
@ -80,8 +61,7 @@ struct hwlmStreamingControl {
* thrown. * thrown.
*/ */
aligned_unique_ptr<HWLM> aligned_unique_ptr<HWLM>
hwlmBuild(const std::vector<hwlmLiteral> &lits, hwlmBuild(const std::vector<hwlmLiteral> &lits, bool make_small,
hwlmStreamingControl *stream_control, bool make_small,
const CompileContext &cc, const CompileContext &cc,
hwlm_group_t expected_groups = HWLM_ALL_GROUPS); hwlm_group_t expected_groups = HWLM_ALL_GROUPS);

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2016, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -42,7 +42,7 @@
namespace ue2 { namespace ue2 {
/** \brief Max length of the literal passed to HWLM. */ /** \brief Max length of the literal passed to HWLM. */
#define HWLM_LITERAL_MAX_LEN 255 #define HWLM_LITERAL_MAX_LEN 8
/** \brief Max length of the hwlmLiteral::msk and hwlmLiteral::cmp vectors. */ /** \brief Max length of the hwlmLiteral::msk and hwlmLiteral::cmp vectors. */
#define HWLM_MASKLEN 8 #define HWLM_MASKLEN 8

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2016, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -1409,6 +1409,68 @@ int roseCheckLongLiteral(const struct RoseEngine *t,
return 1; return 1;
} }
static rose_inline
int roseCheckMediumLiteral(const struct RoseEngine *t,
const struct hs_scratch *scratch, u64a end,
u32 lit_offset, u32 lit_length, char nocase) {
const struct core_info *ci = &scratch->core_info;
const u8 *lit = getByOffset(t, lit_offset);
DEBUG_PRINTF("check lit at %llu, length %u\n", end, lit_length);
DEBUG_PRINTF("base buf_offset=%llu\n", ci->buf_offset);
if (end < lit_length) {
DEBUG_PRINTF("too short!\n");
return 0;
}
// If any portion of the literal matched in the current buffer, check it.
if (end > ci->buf_offset) {
u32 scan_len = MIN(end - ci->buf_offset, lit_length);
u64a scan_start = end - ci->buf_offset - scan_len;
DEBUG_PRINTF("checking suffix (%u bytes) in buf[%llu:%llu]\n", scan_len,
scan_start, end);
if (cmpForward(ci->buf + scan_start, lit + lit_length - scan_len,
scan_len, nocase)) {
DEBUG_PRINTF("cmp of suffix failed\n");
return 0;
}
}
// If the entirety of the literal was in the current block, we are done.
if (end - lit_length >= ci->buf_offset) {
DEBUG_PRINTF("literal confirmed in current block\n");
return 1;
}
// We still have a prefix which we must test against the history buffer.
assert(t->mode != HS_MODE_BLOCK);
u64a lit_start_offset = end - lit_length;
u32 prefix_len = MIN(lit_length, ci->buf_offset - lit_start_offset);
u32 hist_rewind = ci->buf_offset - lit_start_offset;
DEBUG_PRINTF("hlen=%zu, hist_rewind=%u\n", ci->hlen, hist_rewind);
// History length check required for confirm in the EOD and delayed
// rebuild paths.
if (hist_rewind > ci->hlen) {
DEBUG_PRINTF("not enough history\n");
return 0;
}
DEBUG_PRINTF("check prefix len=%u from hist (len %zu, rewind %u)\n",
prefix_len, ci->hlen, hist_rewind);
assert(hist_rewind <= ci->hlen);
if (cmpForward(ci->hbuf + ci->hlen - hist_rewind, lit, prefix_len,
nocase)) {
DEBUG_PRINTF("cmp of prefix failed\n");
return 0;
}
DEBUG_PRINTF("cmp succeeded\n");
return 1;
}
static static
void updateSeqPoint(struct RoseContext *tctxt, u64a offset, void updateSeqPoint(struct RoseContext *tctxt, u64a offset,
const char from_mpv) { const char from_mpv) {
@ -2060,8 +2122,10 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t,
const char nocase = 0; const char nocase = 0;
if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset, if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset,
ri->lit_length, nocase)) { ri->lit_length, nocase)) {
DEBUG_PRINTF("halt: failed long lit check\n"); DEBUG_PRINTF("failed long lit check\n");
return HWLM_CONTINUE_MATCHING; assert(ri->fail_jump); // must progress
pc += ri->fail_jump;
continue;
} }
} }
PROGRAM_NEXT_INSTRUCTION PROGRAM_NEXT_INSTRUCTION
@ -2070,8 +2134,34 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t,
const char nocase = 1; const char nocase = 1;
if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset, if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset,
ri->lit_length, nocase)) { ri->lit_length, nocase)) {
DEBUG_PRINTF("halt: failed nocase long lit check\n"); DEBUG_PRINTF("failed nocase long lit check\n");
return HWLM_CONTINUE_MATCHING; assert(ri->fail_jump); // must progress
pc += ri->fail_jump;
continue;
}
}
PROGRAM_NEXT_INSTRUCTION
PROGRAM_CASE(CHECK_MED_LIT) {
const char nocase = 0;
if (!roseCheckMediumLiteral(t, scratch, end, ri->lit_offset,
ri->lit_length, nocase)) {
DEBUG_PRINTF("failed lit check\n");
assert(ri->fail_jump); // must progress
pc += ri->fail_jump;
continue;
}
}
PROGRAM_NEXT_INSTRUCTION
PROGRAM_CASE(CHECK_MED_LIT_NOCASE) {
const char nocase = 1;
if (!roseCheckMediumLiteral(t, scratch, end, ri->lit_offset,
ri->lit_length, nocase)) {
DEBUG_PRINTF("failed long lit check\n");
assert(ri->fail_jump); // must progress
pc += ri->fail_jump;
continue;
} }
} }
PROGRAM_NEXT_INSTRUCTION PROGRAM_NEXT_INSTRUCTION

View File

@ -4353,13 +4353,18 @@ static
void makeCheckLiteralInstruction(const RoseBuildImpl &build, void makeCheckLiteralInstruction(const RoseBuildImpl &build,
const build_context &bc, u32 final_id, const build_context &bc, u32 final_id,
RoseProgram &program) { RoseProgram &program) {
assert(bc.longLitLengthThreshold > 0);
DEBUG_PRINTF("final_id %u, long lit threshold %zu\n", final_id,
bc.longLitLengthThreshold);
const auto &lits = build.final_id_to_literal.at(final_id); const auto &lits = build.final_id_to_literal.at(final_id);
if (lits.size() != 1) { if (lits.size() != 1) {
// Long literals should not share a final_id. // final_id sharing is only allowed for literals that are short enough
// to not require any additional confirm work.
assert(all_of(begin(lits), end(lits), [&](u32 lit_id) { assert(all_of(begin(lits), end(lits), [&](u32 lit_id) {
const rose_literal_id &lit = build.literals.right.at(lit_id); const rose_literal_id &lit = build.literals.right.at(lit_id);
return lit.table != ROSE_FLOATING || return lit.s.length() <= ROSE_SHORT_LITERAL_LEN_MAX;
lit.s.length() <= bc.longLitLengthThreshold;
})); }));
return; return;
} }
@ -4370,11 +4375,9 @@ void makeCheckLiteralInstruction(const RoseBuildImpl &build,
} }
const rose_literal_id &lit = build.literals.right.at(lit_id); const rose_literal_id &lit = build.literals.right.at(lit_id);
if (lit.table != ROSE_FLOATING) {
return; if (lit.s.length() <= ROSE_SHORT_LITERAL_LEN_MAX) {
} DEBUG_PRINTF("lit short enough to not need confirm\n");
assert(bc.longLitLengthThreshold > 0);
if (lit.s.length() <= bc.longLitLengthThreshold) {
return; return;
} }
@ -4383,11 +4386,34 @@ void makeCheckLiteralInstruction(const RoseBuildImpl &build,
throw ResourceLimitError(); throw ResourceLimitError();
} }
if (lit.s.length() <= bc.longLitLengthThreshold) {
DEBUG_PRINTF("is a medium-length literal\n");
const auto *end_inst = program.end_instruction();
unique_ptr<RoseInstruction> ri; unique_ptr<RoseInstruction> ri;
if (lit.s.any_nocase()) { if (lit.s.any_nocase()) {
ri = make_unique<RoseInstrCheckLongLitNocase>(lit.s.get_string()); ri = make_unique<RoseInstrCheckMedLitNocase>(lit.s.get_string(),
end_inst);
} else { } else {
ri = make_unique<RoseInstrCheckLongLit>(lit.s.get_string()); ri = make_unique<RoseInstrCheckMedLit>(lit.s.get_string(),
end_inst);
}
program.add_before_end(move(ri));
return;
}
// Long literal support should only really be used for the floating table
// in streaming mode.
assert(lit.table == ROSE_FLOATING && build.cc.streaming);
DEBUG_PRINTF("is a long literal\n");
const auto *end_inst = program.end_instruction();
unique_ptr<RoseInstruction> ri;
if (lit.s.any_nocase()) {
ri = make_unique<RoseInstrCheckLongLitNocase>(lit.s.get_string(),
end_inst);
} else {
ri = make_unique<RoseInstrCheckLongLit>(lit.s.get_string(), end_inst);
} }
program.add_before_end(move(ri)); program.add_before_end(move(ri));
} }
@ -4522,6 +4548,7 @@ u32 buildDelayRebuildProgram(RoseBuildImpl &build, build_context &bc,
} }
RoseProgram program; RoseProgram program;
makeCheckLiteralInstruction(build, bc, final_id, program);
makeCheckLitMaskInstruction(build, bc, final_id, program); makeCheckLitMaskInstruction(build, bc, final_id, program);
makePushDelayedInstructions(build, final_id, program); makePushDelayedInstructions(build, final_id, program);
assert(!program.empty()); assert(!program.empty());
@ -4951,7 +4978,7 @@ u32 buildEagerQueueIter(const set<u32> &eager, u32 leftfixBeginQueue,
static static
void allocateFinalIdToSet(RoseBuildImpl &build, const set<u32> &lits, void allocateFinalIdToSet(RoseBuildImpl &build, const set<u32> &lits,
size_t longLitLengthThreshold, u32 *next_final_id) { u32 *next_final_id) {
const auto &g = build.g; const auto &g = build.g;
auto &literal_info = build.literal_info; auto &literal_info = build.literal_info;
auto &final_id_to_literal = build.final_id_to_literal; auto &final_id_to_literal = build.final_id_to_literal;
@ -4961,8 +4988,6 @@ void allocateFinalIdToSet(RoseBuildImpl &build, const set<u32> &lits,
* ids and squash the same roles and have the same group squashing * ids and squash the same roles and have the same group squashing
* behaviour. Benefits literals cannot be merged. */ * behaviour. Benefits literals cannot be merged. */
assert(longLitLengthThreshold > 0);
for (u32 int_id : lits) { for (u32 int_id : lits) {
rose_literal_info &curr_info = literal_info[int_id]; rose_literal_info &curr_info = literal_info[int_id];
const rose_literal_id &lit = build.literals.right.at(int_id); const rose_literal_id &lit = build.literals.right.at(int_id);
@ -4974,10 +4999,10 @@ void allocateFinalIdToSet(RoseBuildImpl &build, const set<u32> &lits,
goto assign_new_id; goto assign_new_id;
} }
// Long literals (that require CHECK_LONG_LIT instructions) cannot be // Literals that need confirmation with CHECK_LONG_LIT or CHECK_MED_LIT
// merged. // cannot be merged.
if (lit.s.length() > longLitLengthThreshold) { if (lit.s.length() > ROSE_SHORT_LITERAL_LEN_MAX) {
DEBUG_PRINTF("id %u is a long literal\n", int_id); DEBUG_PRINTF("id %u needs lit confirm\n", int_id);
goto assign_new_id; goto assign_new_id;
} }
@ -5001,7 +5026,7 @@ void allocateFinalIdToSet(RoseBuildImpl &build, const set<u32> &lits,
const auto &cand_info = literal_info[cand_id]; const auto &cand_info = literal_info[cand_id];
const auto &cand_lit = build.literals.right.at(cand_id); const auto &cand_lit = build.literals.right.at(cand_id);
if (cand_lit.s.length() > longLitLengthThreshold) { if (cand_lit.s.length() > ROSE_SHORT_LITERAL_LEN_MAX) {
continue; continue;
} }
@ -5071,8 +5096,7 @@ bool isUsedLiteral(const RoseBuildImpl &build, u32 lit_id) {
/** \brief Allocate final literal IDs for all literals. */ /** \brief Allocate final literal IDs for all literals. */
static static
void allocateFinalLiteralId(RoseBuildImpl &build, void allocateFinalLiteralId(RoseBuildImpl &build) {
size_t longLitLengthThreshold) {
set<u32> anch; set<u32> anch;
set<u32> norm; set<u32> norm;
set<u32> delay; set<u32> delay;
@ -5106,15 +5130,15 @@ void allocateFinalLiteralId(RoseBuildImpl &build,
} }
/* normal lits */ /* normal lits */
allocateFinalIdToSet(build, norm, longLitLengthThreshold, &next_final_id); allocateFinalIdToSet(build, norm, &next_final_id);
/* next anchored stuff */ /* next anchored stuff */
build.anchored_base_id = next_final_id; build.anchored_base_id = next_final_id;
allocateFinalIdToSet(build, anch, longLitLengthThreshold, &next_final_id); allocateFinalIdToSet(build, anch, &next_final_id);
/* delayed ids come last */ /* delayed ids come last */
build.delay_base_id = next_final_id; build.delay_base_id = next_final_id;
allocateFinalIdToSet(build, delay, longLitLengthThreshold, &next_final_id); allocateFinalIdToSet(build, delay, &next_final_id);
} }
static static
@ -5188,10 +5212,11 @@ size_t calcLongLitThreshold(const RoseBuildImpl &build,
const size_t historyRequired) { const size_t historyRequired) {
const auto &cc = build.cc; const auto &cc = build.cc;
// In block mode, we should only use the long literal support for literals // In block mode, we don't have history, so we don't need long literal
// that cannot be handled by HWLM. // support and can just use "medium-length" literal confirm. TODO: we could
// specialize further and have a block mode literal confirm instruction.
if (!cc.streaming) { if (!cc.streaming) {
return HWLM_LITERAL_MAX_LEN; return SIZE_MAX;
} }
size_t longLitLengthThreshold = ROSE_LONG_LITERAL_THRESHOLD_MIN; size_t longLitLengthThreshold = ROSE_LONG_LITERAL_THRESHOLD_MIN;
@ -5227,7 +5252,7 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
historyRequired); historyRequired);
DEBUG_PRINTF("longLitLengthThreshold=%zu\n", longLitLengthThreshold); DEBUG_PRINTF("longLitLengthThreshold=%zu\n", longLitLengthThreshold);
allocateFinalLiteralId(*this, longLitLengthThreshold); allocateFinalLiteralId(*this);
auto anchored_dfas = buildAnchoredDfas(*this); auto anchored_dfas = buildAnchoredDfas(*this);

View File

@ -123,7 +123,7 @@ void RoseBuildImpl::handleMixedSensitivity(void) {
// with a CHECK_LONG_LIT instruction and need unique final_ids. // with a CHECK_LONG_LIT instruction and need unique final_ids.
// TODO: we could allow explosion for literals where the prefixes // TODO: we could allow explosion for literals where the prefixes
// covered by CHECK_LONG_LIT are identical. // covered by CHECK_LONG_LIT are identical.
if (lit.s.length() <= ROSE_LONG_LITERAL_THRESHOLD_MIN && if (lit.s.length() <= ROSE_SHORT_LITERAL_LEN_MAX &&
limited_explosion(lit.s)) { limited_explosion(lit.s)) {
DEBUG_PRINTF("need to explode existing string '%s'\n", DEBUG_PRINTF("need to explode existing string '%s'\n",
dumpString(lit.s).c_str()); dumpString(lit.s).c_str());

View File

@ -35,7 +35,7 @@
#include "rose/rose_dump.h" #include "rose/rose_dump.h"
#include "rose_internal.h" #include "rose_internal.h"
#include "ue2common.h" #include "ue2common.h"
#include "hwlm/hwlm_build.h" #include "hwlm/hwlm_literal.h"
#include "nfa/castlecompile.h" #include "nfa/castlecompile.h"
#include "nfa/nfa_internal.h" #include "nfa/nfa_internal.h"
#include "nfagraph/ng_dump.h" #include "nfagraph/ng_dump.h"
@ -505,24 +505,22 @@ void dumpRoseTestLiterals(const RoseBuildImpl &build, const string &base) {
size_t longLitLengthThreshold = size_t longLitLengthThreshold =
calcLongLitThreshold(build, historyRequired); calcLongLitThreshold(build, historyRequired);
auto lits = fillHamsterLiteralList(build, ROSE_ANCHORED, auto mp = makeMatcherProto(build, ROSE_ANCHORED, longLitLengthThreshold);
longLitLengthThreshold); dumpTestLiterals(base + "rose_anchored_test_literals.txt", mp.lits);
dumpTestLiterals(base + "rose_anchored_test_literals.txt", lits);
lits = fillHamsterLiteralList(build, ROSE_FLOATING, longLitLengthThreshold); mp = makeMatcherProto(build, ROSE_FLOATING, longLitLengthThreshold);
dumpTestLiterals(base + "rose_float_test_literals.txt", lits); dumpTestLiterals(base + "rose_float_test_literals.txt", mp.lits);
lits = fillHamsterLiteralList(build, ROSE_EOD_ANCHORED, mp = makeMatcherProto(build, ROSE_EOD_ANCHORED, build.ematcher_region_size);
build.ematcher_region_size); dumpTestLiterals(base + "rose_eod_test_literals.txt", mp.lits);
dumpTestLiterals(base + "rose_eod_test_literals.txt", lits);
if (!build.cc.streaming) { if (!build.cc.streaming) {
lits = fillHamsterLiteralList(build, ROSE_FLOATING, mp = makeMatcherProto(build, ROSE_FLOATING, ROSE_SMALL_BLOCK_LEN,
ROSE_SMALL_BLOCK_LEN);
auto mp2 = makeMatcherProto(build, ROSE_ANCHORED_SMALL_BLOCK,
ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN);
auto lits2 = fillHamsterLiteralList(build, ROSE_ANCHORED_SMALL_BLOCK, mp.lits.insert(end(mp.lits), begin(mp2.lits), end(mp2.lits));
ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); dumpTestLiterals(base + "rose_smallblock_test_literals.txt", mp.lits);
lits.insert(end(lits), begin(lits2), end(lits2));
dumpTestLiterals(base + "rose_smallblock_test_literals.txt", lits);
} }
} }

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2016, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -58,6 +58,17 @@ namespace ue2 {
#define ROSE_LONG_LITERAL_THRESHOLD_MIN 33 #define ROSE_LONG_LITERAL_THRESHOLD_MIN 33
/**
* \brief The largest allowable "short" literal fragment which can be given to
* a literal matcher directly.
*
* Literals longer than this will be truncated to their suffix and confirmed in
* the Rose interpreter, either as "medium length" literals which can be
* confirmed from history, or "long literals" which make use of the streaming
* table support.
*/
#define ROSE_SHORT_LITERAL_LEN_MAX 8
struct BoundaryReports; struct BoundaryReports;
struct CastleProto; struct CastleProto;
struct CompileContext; struct CompileContext;

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2016, Intel Corporation * Copyright (c) 2016-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -491,8 +491,14 @@ bool isNoRunsLiteral(const RoseBuildImpl &build, const u32 id,
return false; return false;
} }
if (build.literals.right.at(id).s.length() > max_len) { size_t len = build.literals.right.at(id).s.length();
DEBUG_PRINTF("requires literal check\n"); if (len > max_len) {
DEBUG_PRINTF("long literal, requires confirm\n");
return false;
}
if (len > ROSE_SHORT_LITERAL_LEN_MAX) {
DEBUG_PRINTF("medium-length literal, requires confirm\n");
return false; return false;
} }
@ -626,10 +632,10 @@ u64a literalMinReportOffset(const RoseBuildImpl &build,
return lit_min_offset; return lit_min_offset;
} }
vector<hwlmLiteral> fillHamsterLiteralList(const RoseBuildImpl &build, MatcherProto makeMatcherProto(const RoseBuildImpl &build,
rose_literal_table table, rose_literal_table table, size_t max_len,
size_t max_len, u32 max_offset) { u32 max_offset) {
vector<hwlmLiteral> lits; MatcherProto mp;
for (const auto &e : build.literals.right) { for (const auto &e : build.literals.right) {
const u32 id = e.first; const u32 id = e.first;
@ -652,7 +658,8 @@ vector<hwlmLiteral> fillHamsterLiteralList(const RoseBuildImpl &build,
/* Note: requires_benefits are handled in the literal entries */ /* Note: requires_benefits are handled in the literal entries */
const ue2_literal &lit = e.second.s; const ue2_literal &lit = e.second.s;
DEBUG_PRINTF("lit='%s'\n", escapeString(lit).c_str()); DEBUG_PRINTF("lit='%s' (len %zu)\n", escapeString(lit).c_str(),
lit.length());
if (max_offset != ROSE_BOUND_INF) { if (max_offset != ROSE_BOUND_INF) {
u64a min_report = literalMinReportOffset(build, e.second, info); u64a min_report = literalMinReportOffset(build, e.second, info);
@ -665,14 +672,22 @@ vector<hwlmLiteral> fillHamsterLiteralList(const RoseBuildImpl &build,
const vector<u8> &msk = e.second.msk; const vector<u8> &msk = e.second.msk;
const vector<u8> &cmp = e.second.cmp; const vector<u8> &cmp = e.second.cmp;
bool noruns = isNoRunsLiteral(build, id, info, max_len); bool noruns = isNoRunsLiteral(build, id, info, max_len);
size_t lit_hist_len = 0;
if (build.cc.streaming) {
lit_hist_len = max(msk.size(), min(lit.length(), max_len));
lit_hist_len = lit_hist_len ? lit_hist_len - 1 : 0;
}
DEBUG_PRINTF("lit requires %zu bytes of history\n", lit_hist_len);
assert(lit_hist_len <= build.cc.grey.maxHistoryAvailable);
if (info.requires_explode) { if (info.requires_explode) {
DEBUG_PRINTF("exploding lit\n"); DEBUG_PRINTF("exploding lit\n");
// We do not require_explode for long literals. // We do not require_explode for literals that need confirm
assert(lit.length() <= max_len); // (long/medium length literals).
assert(lit.length() <= ROSE_SHORT_LITERAL_LEN_MAX);
case_iter cit = caseIterateBegin(lit); case_iter cit = caseIterateBegin(lit);
case_iter cite = caseIterateEnd(); case_iter cite = caseIterateEnd();
@ -690,7 +705,8 @@ vector<hwlmLiteral> fillHamsterLiteralList(const RoseBuildImpl &build,
continue; continue;
} }
lits.emplace_back(move(s), nocase, noruns, final_id, groups, mp.history_required = max(mp.history_required, lit_hist_len);
mp.lits.emplace_back(move(s), nocase, noruns, final_id, groups,
msk, cmp); msk, cmp);
} }
} else { } else {
@ -702,11 +718,13 @@ vector<hwlmLiteral> fillHamsterLiteralList(const RoseBuildImpl &build,
final_id, escapeString(s).c_str(), (int)nocase, noruns, final_id, escapeString(s).c_str(), (int)nocase, noruns,
dumpMask(msk).c_str(), dumpMask(cmp).c_str()); dumpMask(msk).c_str(), dumpMask(cmp).c_str());
if (s.length() > max_len) { if (s.length() > ROSE_SHORT_LITERAL_LEN_MAX) {
DEBUG_PRINTF("truncating to tail of length %zu\n", max_len); DEBUG_PRINTF("truncating to tail of length %zu\n",
s.erase(0, s.length() - max_len); size_t{ROSE_SHORT_LITERAL_LEN_MAX});
s.erase(0, s.length() - ROSE_SHORT_LITERAL_LEN_MAX);
// We shouldn't have set a threshold below 8 chars. // We shouldn't have set a threshold below 8 chars.
assert(msk.size() <= max_len); assert(msk.size() <= ROSE_SHORT_LITERAL_LEN_MAX);
assert(!noruns);
} }
if (!maskIsConsistent(s, nocase, msk, cmp)) { if (!maskIsConsistent(s, nocase, msk, cmp)) {
@ -714,12 +732,13 @@ vector<hwlmLiteral> fillHamsterLiteralList(const RoseBuildImpl &build,
continue; continue;
} }
lits.emplace_back(move(s), nocase, noruns, final_id, groups, msk, mp.history_required = max(mp.history_required, lit_hist_len);
mp.lits.emplace_back(move(s), nocase, noruns, final_id, groups, msk,
cmp); cmp);
} }
} }
return lits; return mp;
} }
aligned_unique_ptr<HWLM> buildFloatingMatcher(const RoseBuildImpl &build, aligned_unique_ptr<HWLM> buildFloatingMatcher(const RoseBuildImpl &build,
@ -730,49 +749,31 @@ aligned_unique_ptr<HWLM> buildFloatingMatcher(const RoseBuildImpl &build,
*fsize = 0; *fsize = 0;
*fgroups = 0; *fgroups = 0;
auto fl = fillHamsterLiteralList(build, ROSE_FLOATING, auto mp = makeMatcherProto(build, ROSE_FLOATING, longLitLengthThreshold);
longLitLengthThreshold); if (mp.lits.empty()) {
if (fl.empty()) {
DEBUG_PRINTF("empty floating matcher\n"); DEBUG_PRINTF("empty floating matcher\n");
return nullptr; return nullptr;
} }
for (const hwlmLiteral &hlit : fl) { for (const hwlmLiteral &lit : mp.lits) {
*fgroups |= hlit.groups; *fgroups |= lit.groups;
} }
hwlmStreamingControl ctl; auto hwlm = hwlmBuild(mp.lits, false, build.cc, build.getInitialGroups());
hwlmStreamingControl *ctlp; if (!hwlm) {
if (build.cc.streaming) {
ctl.history_max = build.cc.grey.maxHistoryAvailable;
ctl.history_min = MAX(*historyRequired,
build.cc.grey.minHistoryAvailable);
DEBUG_PRINTF("streaming control, history max=%zu, min=%zu\n",
ctl.history_max, ctl.history_min);
ctlp = &ctl;
} else {
ctlp = nullptr; // Null for non-streaming.
}
aligned_unique_ptr<HWLM> ftable =
hwlmBuild(fl, ctlp, false, build.cc, build.getInitialGroups());
if (!ftable) {
throw CompileError("Unable to generate bytecode."); throw CompileError("Unable to generate bytecode.");
} }
if (build.cc.streaming) { if (build.cc.streaming) {
DEBUG_PRINTF("literal_history_required=%zu\n", DEBUG_PRINTF("history_required=%zu\n", mp.history_required);
ctl.literal_history_required); assert(mp.history_required <= build.cc.grey.maxHistoryAvailable);
assert(ctl.literal_history_required <= *historyRequired = max(*historyRequired, mp.history_required);
build.cc.grey.maxHistoryAvailable);
*historyRequired = max(*historyRequired,
ctl.literal_history_required);
} }
*fsize = hwlmSize(ftable.get()); *fsize = hwlmSize(hwlm.get());
assert(*fsize); assert(*fsize);
DEBUG_PRINTF("built floating literal table size %zu bytes\n", *fsize); DEBUG_PRINTF("built floating literal table size %zu bytes\n", *fsize);
return ftable; return hwlm;
} }
aligned_unique_ptr<HWLM> buildSmallBlockMatcher(const RoseBuildImpl &build, aligned_unique_ptr<HWLM> buildSmallBlockMatcher(const RoseBuildImpl &build,
@ -791,38 +792,38 @@ aligned_unique_ptr<HWLM> buildSmallBlockMatcher(const RoseBuildImpl &build,
return nullptr; return nullptr;
} }
auto lits = fillHamsterLiteralList( auto mp = makeMatcherProto(build, ROSE_FLOATING, ROSE_SMALL_BLOCK_LEN,
build, ROSE_FLOATING, ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); ROSE_SMALL_BLOCK_LEN);
if (lits.empty()) { if (mp.lits.empty()) {
DEBUG_PRINTF("no floating table\n"); DEBUG_PRINTF("no floating table\n");
return nullptr; return nullptr;
} else if (lits.size() == 1) { } else if (mp.lits.size() == 1) {
DEBUG_PRINTF("single floating literal, noodle will be fast enough\n"); DEBUG_PRINTF("single floating literal, noodle will be fast enough\n");
return nullptr; return nullptr;
} }
auto anchored_lits = auto mp_anchored =
fillHamsterLiteralList(build, ROSE_ANCHORED_SMALL_BLOCK, makeMatcherProto(build, ROSE_ANCHORED_SMALL_BLOCK, ROSE_SMALL_BLOCK_LEN,
ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); ROSE_SMALL_BLOCK_LEN);
if (anchored_lits.empty()) { if (mp_anchored.lits.empty()) {
DEBUG_PRINTF("no small-block anchored literals\n"); DEBUG_PRINTF("no small-block anchored literals\n");
return nullptr; return nullptr;
} }
lits.insert(lits.end(), anchored_lits.begin(), anchored_lits.end()); mp.lits.insert(mp.lits.end(), mp_anchored.lits.begin(),
mp_anchored.lits.end());
// None of our literals should be longer than the small block limit. // None of our literals should be longer than the small block limit.
assert(all_of(begin(lits), end(lits), [](const hwlmLiteral &lit) { assert(all_of(begin(mp.lits), end(mp.lits), [](const hwlmLiteral &lit) {
return lit.s.length() <= ROSE_SMALL_BLOCK_LEN; return lit.s.length() <= ROSE_SMALL_BLOCK_LEN;
})); }));
if (lits.empty()) { if (mp.lits.empty()) {
DEBUG_PRINTF("no literals shorter than small block len\n"); DEBUG_PRINTF("no literals shorter than small block len\n");
return nullptr; return nullptr;
} }
aligned_unique_ptr<HWLM> hwlm = auto hwlm = hwlmBuild(mp.lits, true, build.cc, build.getInitialGroups());
hwlmBuild(lits, nullptr, true, build.cc, build.getInitialGroups());
if (!hwlm) { if (!hwlm) {
throw CompileError("Unable to generate bytecode."); throw CompileError("Unable to generate bytecode.");
} }
@ -837,10 +838,10 @@ aligned_unique_ptr<HWLM> buildEodAnchoredMatcher(const RoseBuildImpl &build,
size_t *esize) { size_t *esize) {
*esize = 0; *esize = 0;
auto el = fillHamsterLiteralList(build, ROSE_EOD_ANCHORED, auto mp =
build.ematcher_region_size); makeMatcherProto(build, ROSE_EOD_ANCHORED, build.ematcher_region_size);
if (el.empty()) { if (mp.lits.empty()) {
DEBUG_PRINTF("no eod anchored literals\n"); DEBUG_PRINTF("no eod anchored literals\n");
assert(!build.ematcher_region_size); assert(!build.ematcher_region_size);
return nullptr; return nullptr;
@ -848,17 +849,15 @@ aligned_unique_ptr<HWLM> buildEodAnchoredMatcher(const RoseBuildImpl &build,
assert(build.ematcher_region_size); assert(build.ematcher_region_size);
hwlmStreamingControl *ctlp = nullptr; // not a streaming case auto hwlm = hwlmBuild(mp.lits, true, build.cc, build.getInitialGroups());
aligned_unique_ptr<HWLM> etable = if (!hwlm) {
hwlmBuild(el, ctlp, true, build.cc, build.getInitialGroups());
if (!etable) {
throw CompileError("Unable to generate bytecode."); throw CompileError("Unable to generate bytecode.");
} }
*esize = hwlmSize(etable.get()); *esize = hwlmSize(hwlm.get());
assert(*esize); assert(*esize);
DEBUG_PRINTF("built eod-anchored literal table size %zu bytes\n", *esize); DEBUG_PRINTF("built eod-anchored literal table size %zu bytes\n", *esize);
return etable; return hwlm;
} }
} // namespace ue2 } // namespace ue2

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2016, Intel Corporation * Copyright (c) 2016-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -44,13 +44,19 @@ namespace ue2 {
struct hwlmLiteral; struct hwlmLiteral;
struct MatcherProto {
std::vector<hwlmLiteral> lits;
size_t history_required = 0;
};
/** /**
* \brief Build up a vector of literals for the given table. * \brief Build up a vector of literals (and associated other data) for the
* given table.
* *
* If max_offset is specified (and not ROSE_BOUND_INF), then literals that can * If max_offset is specified (and not ROSE_BOUND_INF), then literals that can
* only lead to a pattern match after max_offset may be excluded. * only lead to a pattern match after max_offset may be excluded.
*/ */
std::vector<hwlmLiteral> fillHamsterLiteralList(const RoseBuildImpl &build, MatcherProto makeMatcherProto(const RoseBuildImpl &build,
rose_literal_table table, size_t max_len, rose_literal_table table, size_t max_len,
u32 max_offset = ROSE_BOUND_INF); u32 max_offset = ROSE_BOUND_INF);

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2016, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -28,7 +28,7 @@
#include "rose_build_impl.h" #include "rose_build_impl.h"
#include "hwlm/hwlm_build.h" #include "hwlm/hwlm_literal.h"
#include "nfa/castlecompile.h" #include "nfa/castlecompile.h"
#include "nfa/goughcompile.h" #include "nfa/goughcompile.h"
#include "nfa/mcclellancompile_util.h" #include "nfa/mcclellancompile_util.h"

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2016, Intel Corporation * Copyright (c) 2016-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -502,6 +502,7 @@ void RoseInstrCheckLongLit::write(void *dest, RoseEngineBlob &blob,
assert(!literal.empty()); assert(!literal.empty());
inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1); inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1);
inst->lit_length = verify_u32(literal.size()); inst->lit_length = verify_u32(literal.size());
inst->fail_jump = calc_jump(offset_map, this, target);
} }
void RoseInstrCheckLongLitNocase::write(void *dest, RoseEngineBlob &blob, void RoseInstrCheckLongLitNocase::write(void *dest, RoseEngineBlob &blob,
@ -511,6 +512,27 @@ void RoseInstrCheckLongLitNocase::write(void *dest, RoseEngineBlob &blob,
assert(!literal.empty()); assert(!literal.empty());
inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1); inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1);
inst->lit_length = verify_u32(literal.size()); inst->lit_length = verify_u32(literal.size());
inst->fail_jump = calc_jump(offset_map, this, target);
}
void RoseInstrCheckMedLit::write(void *dest, RoseEngineBlob &blob,
const OffsetMap &offset_map) const {
RoseInstrBase::write(dest, blob, offset_map);
auto *inst = static_cast<impl_type *>(dest);
assert(!literal.empty());
inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1);
inst->lit_length = verify_u32(literal.size());
inst->fail_jump = calc_jump(offset_map, this, target);
}
void RoseInstrCheckMedLitNocase::write(void *dest, RoseEngineBlob &blob,
const OffsetMap &offset_map) const {
RoseInstrBase::write(dest, blob, offset_map);
auto *inst = static_cast<impl_type *>(dest);
assert(!literal.empty());
inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1);
inst->lit_length = verify_u32(literal.size());
inst->fail_jump = calc_jump(offset_map, this, target);
} }
static static

View File

@ -1723,17 +1723,19 @@ public:
}; };
class RoseInstrCheckLongLit class RoseInstrCheckLongLit
: public RoseInstrBaseNoTargets<ROSE_INSTR_CHECK_LONG_LIT, : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_LONG_LIT,
ROSE_STRUCT_CHECK_LONG_LIT, ROSE_STRUCT_CHECK_LONG_LIT,
RoseInstrCheckLongLit> { RoseInstrCheckLongLit> {
public: public:
std::string literal; std::string literal;
const RoseInstruction *target;
explicit RoseInstrCheckLongLit(std::string literal_in) RoseInstrCheckLongLit(std::string literal_in,
: literal(std::move(literal_in)) {} const RoseInstruction *target_in)
: literal(std::move(literal_in)), target(target_in) {}
bool operator==(const RoseInstrCheckLongLit &ri) const { bool operator==(const RoseInstrCheckLongLit &ri) const {
return literal == ri.literal; return literal == ri.literal && target == ri.target;
} }
size_t hash() const override { size_t hash() const override {
@ -1743,26 +1745,29 @@ public:
void write(void *dest, RoseEngineBlob &blob, void write(void *dest, RoseEngineBlob &blob,
const OffsetMap &offset_map) const override; const OffsetMap &offset_map) const override;
bool equiv_to(const RoseInstrCheckLongLit &ri, const OffsetMap &, bool equiv_to(const RoseInstrCheckLongLit &ri, const OffsetMap &offsets,
const OffsetMap &) const { const OffsetMap &other_offsets) const {
return literal == ri.literal; return literal == ri.literal &&
offsets.at(target) == other_offsets.at(ri.target);
} }
}; };
class RoseInstrCheckLongLitNocase class RoseInstrCheckLongLitNocase
: public RoseInstrBaseNoTargets<ROSE_INSTR_CHECK_LONG_LIT_NOCASE, : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_LONG_LIT_NOCASE,
ROSE_STRUCT_CHECK_LONG_LIT_NOCASE, ROSE_STRUCT_CHECK_LONG_LIT_NOCASE,
RoseInstrCheckLongLitNocase> { RoseInstrCheckLongLitNocase> {
public: public:
std::string literal; std::string literal;
const RoseInstruction *target;
explicit RoseInstrCheckLongLitNocase(std::string literal_in) RoseInstrCheckLongLitNocase(std::string literal_in,
: literal(std::move(literal_in)) { const RoseInstruction *target_in)
: literal(std::move(literal_in)), target(target_in) {
upperString(literal); upperString(literal);
} }
bool operator==(const RoseInstrCheckLongLitNocase &ri) const { bool operator==(const RoseInstrCheckLongLitNocase &ri) const {
return literal == ri.literal; return literal == ri.literal && target == ri.target;
} }
size_t hash() const override { size_t hash() const override {
@ -1772,9 +1777,74 @@ public:
void write(void *dest, RoseEngineBlob &blob, void write(void *dest, RoseEngineBlob &blob,
const OffsetMap &offset_map) const override; const OffsetMap &offset_map) const override;
bool equiv_to(const RoseInstrCheckLongLitNocase &ri, const OffsetMap &, bool equiv_to(const RoseInstrCheckLongLitNocase &ri,
const OffsetMap &) const { const OffsetMap &offsets,
return literal == ri.literal; const OffsetMap &other_offsets) const {
return literal == ri.literal &&
offsets.at(target) == other_offsets.at(ri.target);
}
};
class RoseInstrCheckMedLit
: public RoseInstrBaseNoTargets<ROSE_INSTR_CHECK_MED_LIT,
ROSE_STRUCT_CHECK_MED_LIT,
RoseInstrCheckMedLit> {
public:
std::string literal;
const RoseInstruction *target;
explicit RoseInstrCheckMedLit(std::string literal_in,
const RoseInstruction *target_in)
: literal(std::move(literal_in)), target(target_in) {}
bool operator==(const RoseInstrCheckMedLit &ri) const {
return literal == ri.literal && target == ri.target;
}
size_t hash() const override {
return hash_all(static_cast<int>(opcode), literal);
}
void write(void *dest, RoseEngineBlob &blob,
const OffsetMap &offset_map) const override;
bool equiv_to(const RoseInstrCheckMedLit &ri, const OffsetMap &offsets,
const OffsetMap &other_offsets) const {
return literal == ri.literal &&
offsets.at(target) == other_offsets.at(ri.target);
}
};
class RoseInstrCheckMedLitNocase
: public RoseInstrBaseNoTargets<ROSE_INSTR_CHECK_MED_LIT_NOCASE,
ROSE_STRUCT_CHECK_MED_LIT_NOCASE,
RoseInstrCheckMedLitNocase> {
public:
std::string literal;
const RoseInstruction *target;
explicit RoseInstrCheckMedLitNocase(std::string literal_in,
const RoseInstruction *target_in)
: literal(std::move(literal_in)), target(target_in) {
upperString(literal);
}
bool operator==(const RoseInstrCheckMedLitNocase &ri) const {
return literal == ri.literal && target == ri.target;
}
size_t hash() const override {
return hash_all(static_cast<int>(opcode), literal);
}
void write(void *dest, RoseEngineBlob &blob,
const OffsetMap &offset_map) const override;
bool equiv_to(const RoseInstrCheckMedLitNocase &ri,
const OffsetMap &offsets,
const OffsetMap &other_offsets) const {
return literal == ri.literal &&
offsets.at(target) == other_offsets.at(ri.target);
} }
}; };

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2016, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -617,6 +617,7 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
const char *lit = (const char *)t + ri->lit_offset; const char *lit = (const char *)t + ri->lit_offset;
os << " literal: \"" os << " literal: \""
<< escapeString(string(lit, ri->lit_length)) << "\"" << endl; << escapeString(string(lit, ri->lit_length)) << "\"" << endl;
os << " fail_jump " << offset + ri->fail_jump << endl;
} }
PROGRAM_NEXT_INSTRUCTION PROGRAM_NEXT_INSTRUCTION
@ -626,6 +627,27 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
const char *lit = (const char *)t + ri->lit_offset; const char *lit = (const char *)t + ri->lit_offset;
os << " literal: \"" os << " literal: \""
<< escapeString(string(lit, ri->lit_length)) << "\"" << endl; << escapeString(string(lit, ri->lit_length)) << "\"" << endl;
os << " fail_jump " << offset + ri->fail_jump << endl;
}
PROGRAM_NEXT_INSTRUCTION
PROGRAM_CASE(CHECK_MED_LIT) {
os << " lit_offset " << ri->lit_offset << endl;
os << " lit_length " << ri->lit_length << endl;
const char *lit = (const char *)t + ri->lit_offset;
os << " literal: \""
<< escapeString(string(lit, ri->lit_length)) << "\"" << endl;
os << " fail_jump " << offset + ri->fail_jump << endl;
}
PROGRAM_NEXT_INSTRUCTION
PROGRAM_CASE(CHECK_MED_LIT_NOCASE) {
os << " lit_offset " << ri->lit_offset << endl;
os << " lit_length " << ri->lit_length << endl;
const char *lit = (const char *)t + ri->lit_offset;
os << " literal: \""
<< escapeString(string(lit, ri->lit_length)) << "\"" << endl;
os << " fail_jump " << offset + ri->fail_jump << endl;
} }
PROGRAM_NEXT_INSTRUCTION PROGRAM_NEXT_INSTRUCTION

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2016, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -129,7 +129,19 @@ enum RoseInstructionCode {
*/ */
ROSE_INSTR_CHECK_LONG_LIT_NOCASE, ROSE_INSTR_CHECK_LONG_LIT_NOCASE,
LAST_ROSE_INSTRUCTION = ROSE_INSTR_CHECK_LONG_LIT_NOCASE //!< Sentinel. /**
* \brief Confirm a case-sensitive "medium length" literal at the current
* offset. In streaming mode, this will check history if needed.
*/
ROSE_INSTR_CHECK_MED_LIT,
/**
* \brief Confirm a case-insensitive "medium length" literal at the current
* offset. In streaming mode, this will check history if needed.
*/
ROSE_INSTR_CHECK_MED_LIT_NOCASE,
LAST_ROSE_INSTRUCTION = ROSE_INSTR_CHECK_MED_LIT_NOCASE //!< Sentinel.
}; };
struct ROSE_STRUCT_END { struct ROSE_STRUCT_END {
@ -477,18 +489,32 @@ struct ROSE_STRUCT_MATCHER_EOD {
u8 code; //!< From enum RoseInstructionCode. u8 code; //!< From enum RoseInstructionCode.
}; };
/** Note: check failure will halt program. */
struct ROSE_STRUCT_CHECK_LONG_LIT { struct ROSE_STRUCT_CHECK_LONG_LIT {
u8 code; //!< From enum RoseInstructionCode. u8 code; //!< From enum RoseInstructionCode.
u32 lit_offset; //!< Offset of literal string. u32 lit_offset; //!< Offset of literal string.
u32 lit_length; //!< Length of literal string. u32 lit_length; //!< Length of literal string.
u32 fail_jump; //!< Jump forward this many bytes on failure.
}; };
/** Note: check failure will halt program. */
struct ROSE_STRUCT_CHECK_LONG_LIT_NOCASE { struct ROSE_STRUCT_CHECK_LONG_LIT_NOCASE {
u8 code; //!< From enum RoseInstructionCode. u8 code; //!< From enum RoseInstructionCode.
u32 lit_offset; //!< Offset of literal string. u32 lit_offset; //!< Offset of literal string.
u32 lit_length; //!< Length of literal string. u32 lit_length; //!< Length of literal string.
u32 fail_jump; //!< Jump forward this many bytes on failure.
};
struct ROSE_STRUCT_CHECK_MED_LIT {
u8 code; //!< From enum RoseInstructionCode.
u32 lit_offset; //!< Offset of literal string.
u32 lit_length; //!< Length of literal string.
u32 fail_jump; //!< Jump forward this many bytes on failure.
};
struct ROSE_STRUCT_CHECK_MED_LIT_NOCASE {
u8 code; //!< From enum RoseInstructionCode.
u32 lit_offset; //!< Offset of literal string.
u32 lit_length; //!< Length of literal string.
u32 fail_jump; //!< Jump forward this many bytes on failure.
}; };
#endif // ROSE_ROSE_PROGRAM_H #endif // ROSE_ROSE_PROGRAM_H

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2016, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -96,15 +96,6 @@ struct match {
}; };
extern "C" { extern "C" {
static
hwlmcb_rv_t countCallback(UNUSED size_t start, UNUSED size_t end, u32,
void *ctxt) {
if (ctxt) {
++*(u32 *)ctxt;
}
return HWLM_CONTINUE_MATCHING;
}
static static
hwlmcb_rv_t decentCallback(size_t start, size_t end, u32 id, void *ctxt) { hwlmcb_rv_t decentCallback(size_t start, size_t end, u32 id, void *ctxt) {
@ -231,42 +222,6 @@ TEST_P(FDRp, MultiLocation) {
} }
} }
TEST_P(FDRp, Flood) {
const u32 hint = GetParam();
SCOPED_TRACE(hint);
vector<hwlmLiteral> lits;
lits.push_back(hwlmLiteral("aaaa", 0, 1));
lits.push_back(hwlmLiteral("aaaaaaaa", 0, 2));
lits.push_back(hwlmLiteral("baaaaaaaa", 0, 3));
lits.push_back(hwlmLiteral("aaaaaaaab", 0, 4));
auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey());
CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint);
const u32 testSize = 1024;
vector<u8> data(testSize, 'a');
vector<match> matches;
fdrExec(fdr.get(), data.data(), testSize, 0, decentCallback, &matches,
HWLM_ALL_GROUPS);
ASSERT_EQ(testSize - 3 + testSize - 7, matches.size());
EXPECT_EQ(match(0, 3, 1), matches[0]);
EXPECT_EQ(match(1, 4, 1), matches[1]);
EXPECT_EQ(match(2, 5, 1), matches[2]);
EXPECT_EQ(match(3, 6, 1), matches[3]);
u32 currentMatch = 4;
for (u32 i = 7; i < testSize; i++, currentMatch += 2) {
EXPECT_TRUE(
(match(i - 3, i, 1) == matches[currentMatch] &&
match(i - 7, i, 2) == matches[currentMatch+1]) ||
(match(i - 7, i, 2) == matches[currentMatch+1] &&
match(i - 3, i, 1) == matches[currentMatch])
);
}
}
TEST_P(FDRp, NoRepeat1) { TEST_P(FDRp, NoRepeat1) {
const u32 hint = GetParam(); const u32 hint = GetParam();
SCOPED_TRACE(hint); SCOPED_TRACE(hint);
@ -414,36 +369,6 @@ TEST_P(FDRp, SmallStreaming2) {
ASSERT_EQ(expected.size(), matches.size()); ASSERT_EQ(expected.size(), matches.size());
} }
TEST_P(FDRp, LongLiteral) {
const u32 hint = GetParam();
SCOPED_TRACE(hint);
size_t sz;
const u8 *data;
vector<hwlmLiteral> lits;
string alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
string alpha4 = alpha+alpha+alpha+alpha;
lits.push_back(hwlmLiteral(alpha4.c_str(), 0,10));
auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey());
CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint);
u32 count = 0;
data = (const u8 *)alpha4.c_str();
sz = alpha4.size();
fdrExec(fdr.get(), data, sz, 0, countCallback, &count, HWLM_ALL_GROUPS);
EXPECT_EQ(1U, count);
count = 0;
fdrExec(fdr.get(), data, sz - 1, 0, countCallback, &count, HWLM_ALL_GROUPS);
EXPECT_EQ(0U, count);
count = 0;
fdrExec(fdr.get(), data + 1, sz - 1, 0, countCallback, &count,
HWLM_ALL_GROUPS);
EXPECT_EQ(0U, count);
}
TEST_P(FDRp, moveByteStream) { TEST_P(FDRp, moveByteStream) {
const u32 hint = GetParam(); const u32 hint = GetParam();
SCOPED_TRACE(hint); SCOPED_TRACE(hint);
@ -491,7 +416,7 @@ TEST_P(FDRp, Stream1) {
vector<hwlmLiteral> lits; vector<hwlmLiteral> lits;
lits.push_back(hwlmLiteral("f", 0, 0)); lits.push_back(hwlmLiteral("f", 0, 0));
lits.push_back(hwlmLiteral("longsigislong", 0, 1)); lits.push_back(hwlmLiteral("literal", 0, 1));
auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey()); auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey());
CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint);
@ -514,7 +439,7 @@ INSTANTIATE_TEST_CASE_P(FDR, FDRp, ValuesIn(getValidFdrEngines()));
typedef struct { typedef struct {
string pattern; string pattern;
unsigned char alien; unsigned char alien; // character not present in pattern
} pattern_alien_t; } pattern_alien_t;
// gtest helper // gtest helper
@ -529,7 +454,6 @@ class FDRpp : public TestWithParam<tuple<u32, pattern_alien_t>> {};
// not happen if literal is partially (from 1 character up to full literal // not happen if literal is partially (from 1 character up to full literal
// length) is out of searched buffer - "too early" and "too late" conditions // length) is out of searched buffer - "too early" and "too late" conditions
TEST_P(FDRpp, AlignAndTooEarly) { TEST_P(FDRpp, AlignAndTooEarly) {
const size_t buf_alignment = 32; const size_t buf_alignment = 32;
// Buffer should be big enough to hold two instances of matching literals // Buffer should be big enough to hold two instances of matching literals
// (up to 64 bytes each) and room for offset (up to 32 bytes) // (up to 64 bytes each) and room for offset (up to 32 bytes)
@ -538,7 +462,7 @@ TEST_P(FDRpp, AlignAndTooEarly) {
const u32 hint = get<0>(GetParam()); const u32 hint = get<0>(GetParam());
SCOPED_TRACE(hint); SCOPED_TRACE(hint);
// pattern which is used to generate literals of variable size - from 1 to 64 // pattern which is used to generate literals of variable size - from 1 to 8
const string &pattern = get<1>(GetParam()).pattern; const string &pattern = get<1>(GetParam()).pattern;
const size_t patLen = pattern.size(); const size_t patLen = pattern.size();
const unsigned char alien = get<1>(GetParam()).alien; const unsigned char alien = get<1>(GetParam()).alien;
@ -551,7 +475,7 @@ TEST_P(FDRpp, AlignAndTooEarly) {
vector<hwlmLiteral> lits; vector<hwlmLiteral> lits;
for (size_t litLen = 1; litLen <= patLen; litLen++) { for (size_t litLen = 1; litLen <= patLen; litLen++) {
// building literal from pattern substring of variable length 1-64 // building literal from pattern substring of variable length 1-patLen
lits.push_back(hwlmLiteral(string(pattern, 0, litLen), 0, 0)); lits.push_back(hwlmLiteral(string(pattern, 0, litLen), 0, 0));
auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(),
Grey()); Grey());
@ -596,9 +520,9 @@ TEST_P(FDRpp, AlignAndTooEarly) {
} }
static const pattern_alien_t test_pattern[] = { static const pattern_alien_t test_pattern[] = {
{"abaabaaabaaabbaaaaabaaaaabbaaaaaaabaabbaaaabaaaaaaaabbbbaaaaaaab", 'x'}, {"abaabaaa", 'x'},
{"zzzyyzyzyyyyzyyyyyzzzzyyyyyyyyzyyyyyyyzzzzzyzzzzzzzzzyzzyzzzzzzz", (unsigned char)'\x99'}, {"zzzyyzyz", (unsigned char)'\x99'},
{"abcdef lafjk askldfjklf alfqwei9rui 'gldgkjnooiuswfs138746453583", '\0'} {"abcdef l", '\0'}
}; };
INSTANTIATE_TEST_CASE_P(FDR, FDRpp, Combine(ValuesIn(getValidFdrEngines()), INSTANTIATE_TEST_CASE_P(FDR, FDRpp, Combine(ValuesIn(getValidFdrEngines()),

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2016, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -161,8 +161,8 @@ TEST_P(FDRFloodp, NoMask) {
vector<hwlmLiteral> lits; vector<hwlmLiteral> lits;
// build literals of type "aaaa", "aaab", "baaa" // build literals of type "aaaa", "aaab", "baaa"
// of lengths 1, 2, 4, 8, 16, 32, both case-less and case-sensitive // of lengths 1, 2, 4, 8, both case-less and case-sensitive
for (int i = 0; i < 6 ; i++) { for (int i = 0; i < 4; i++) {
string s(1 << i, c); string s(1 << i, c);
lits.push_back(hwlmLiteral(s, false, i * 8 + 0)); lits.push_back(hwlmLiteral(s, false, i * 8 + 0));
s[0] = cAlt; s[0] = cAlt;
@ -189,7 +189,7 @@ TEST_P(FDRFloodp, NoMask) {
0, countCallback, (void *)&matchesCounts, HWLM_ALL_GROUPS); 0, countCallback, (void *)&matchesCounts, HWLM_ALL_GROUPS);
ASSERT_EQ(0, fdrStatus); ASSERT_EQ(0, fdrStatus);
for (u8 i = 0; i < 6 ; i++) { for (u8 i = 0; i < 4; i++) {
u32 cnt = dataSize - (1 << i) + 1; u32 cnt = dataSize - (1 << i) + 1;
ASSERT_EQ(cnt, matchesCounts[i * 8 + 0]); ASSERT_EQ(cnt, matchesCounts[i * 8 + 0]);
ASSERT_EQ(0, matchesCounts[i * 8 + 1]); ASSERT_EQ(0, matchesCounts[i * 8 + 1]);
@ -214,7 +214,7 @@ TEST_P(FDRFloodp, NoMask) {
0, countCallback, (void *)&matchesCounts, HWLM_ALL_GROUPS); 0, countCallback, (void *)&matchesCounts, HWLM_ALL_GROUPS);
ASSERT_EQ(0, fdrStatus); ASSERT_EQ(0, fdrStatus);
for (u8 i = 0; i < 6 ; i++) { for (u8 i = 0; i < 4; i++) {
u32 cnt = dataSize - (1 << i) + 1; u32 cnt = dataSize - (1 << i) + 1;
ASSERT_EQ(0, matchesCounts[i * 8 + 0]); ASSERT_EQ(0, matchesCounts[i * 8 + 0]);
ASSERT_EQ(i == 0 ? cnt : 0, matchesCounts[i * 8 + 1]); ASSERT_EQ(i == 0 ? cnt : 0, matchesCounts[i * 8 + 1]);