From 07a6b6510ca51255245d8198a44e36218f1c28a2 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 28 Nov 2016 16:46:03 +1100 Subject: [PATCH] rose/hwlm: limit literals to eight bytes Rework HWLM to work over literals of eight bytes ("medium length"), doing confirm in the Rose interpreter. --- src/fdr/fdr_compile.cpp | 48 +++-------- src/fdr/fdr_compile.h | 9 +- src/hwlm/hwlm_build.cpp | 48 +---------- src/hwlm/hwlm_build.h | 26 +----- src/hwlm/hwlm_literal.h | 4 +- src/rose/program_runtime.h | 100 ++++++++++++++++++++-- src/rose/rose_build_bytecode.cpp | 79 +++++++++++------ src/rose/rose_build_compile.cpp | 2 +- src/rose/rose_build_dump.cpp | 26 +++--- src/rose/rose_build_impl.h | 13 ++- src/rose/rose_build_matchers.cpp | 141 +++++++++++++++---------------- src/rose/rose_build_matchers.h | 16 ++-- src/rose/rose_build_misc.cpp | 4 +- src/rose/rose_build_program.cpp | 24 +++++- src/rose/rose_build_program.h | 98 ++++++++++++++++++--- src/rose/rose_dump.cpp | 24 +++++- src/rose/rose_program.h | 34 +++++++- unit/internal/fdr.cpp | 92 ++------------------ unit/internal/fdr_flood.cpp | 12 +-- 19 files changed, 452 insertions(+), 348 deletions(-) diff --git a/src/fdr/fdr_compile.cpp b/src/fdr/fdr_compile.cpp index f7451492..c9d6cbcb 100644 --- a/src/fdr/fdr_compile.cpp +++ b/src/fdr/fdr_compile.cpp @@ -545,35 +545,12 @@ FDRCompiler::build(pair, size_t> &link) { } // namespace static -size_t maxMaskLen(const vector &lits) { - size_t rv = 0; - for (const auto &lit : lits) { - rv = max(rv, lit.msk.size()); - } - return rv; -} - -static -void setHistoryRequired(hwlmStreamingControl &stream_ctl, - const vector &lits) { - size_t max_mask_len = maxMaskLen(lits); - - // we want enough history to manage the longest literal and the longest - // mask. - stream_ctl.literal_history_required = max(maxLen(lits), max_mask_len) - 1; -} - -static -aligned_unique_ptr -fdrBuildTableInternal(const vector &lits, bool make_small, - const target_t &target, const Grey &grey, u32 hint, - hwlmStreamingControl *stream_control) { +aligned_unique_ptr fdrBuildTableInternal(const vector &lits, + bool make_small, + const target_t &target, + const Grey &grey, u32 hint) { pair, size_t> link(nullptr, 0); - if (stream_control) { - setHistoryRequired(*stream_control, lits); - } - DEBUG_PRINTF("cpu has %s\n", target.has_avx2() ? "avx2" : "no-avx2"); if (grey.fdrAllowTeddy) { @@ -606,21 +583,18 @@ fdrBuildTableInternal(const vector &lits, bool make_small, aligned_unique_ptr fdrBuildTable(const vector &lits, bool make_small, const target_t &target, - const Grey &grey, - hwlmStreamingControl *stream_control) { - return fdrBuildTableInternal(lits, make_small, target, grey, HINT_INVALID, - stream_control); + const Grey &grey) { + return fdrBuildTableInternal(lits, make_small, target, grey, HINT_INVALID); } #if !defined(RELEASE_BUILD) -aligned_unique_ptr -fdrBuildTableHinted(const vector &lits, bool make_small, u32 hint, - const target_t &target, const Grey &grey, - hwlmStreamingControl *stream_control) { +aligned_unique_ptr fdrBuildTableHinted(const vector &lits, + bool make_small, u32 hint, + const target_t &target, + const Grey &grey) { pair link(nullptr, 0); - return fdrBuildTableInternal(lits, make_small, target, grey, hint, - stream_control); + return fdrBuildTableInternal(lits, make_small, target, grey, hint); } #endif diff --git a/src/fdr/fdr_compile.h b/src/fdr/fdr_compile.h index c12e0071..a135a6e1 100644 --- a/src/fdr/fdr_compile.h +++ b/src/fdr/fdr_compile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -43,21 +43,18 @@ struct FDR; namespace ue2 { struct hwlmLiteral; -struct hwlmStreamingControl; struct Grey; struct target_t; ue2::aligned_unique_ptr fdrBuildTable(const std::vector &lits, bool make_small, - const target_t &target, const Grey &grey, - hwlmStreamingControl *stream_control = nullptr); + const target_t &target, const Grey &grey); #if !defined(RELEASE_BUILD) ue2::aligned_unique_ptr fdrBuildTableHinted(const std::vector &lits, bool make_small, - u32 hint, const target_t &target, const Grey &grey, - hwlmStreamingControl *stream_control = nullptr); + u32 hint, const target_t &target, const Grey &grey); #endif diff --git a/src/hwlm/hwlm_build.cpp b/src/hwlm/hwlm_build.cpp index fa6335c9..29e71293 100644 --- a/src/hwlm/hwlm_build.cpp +++ b/src/hwlm/hwlm_build.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,6 +33,7 @@ #include "hwlm.h" #include "hwlm_build.h" #include "hwlm_internal.h" +#include "hwlm_literal.h" #include "noodle_engine.h" #include "noodle_build.h" #include "scratch.h" @@ -512,7 +513,6 @@ bool everyoneHasGroups(const vector &lits) { static bool isNoodleable(const vector &lits, - const hwlmStreamingControl *stream_control, const CompileContext &cc) { if (!cc.grey.allowNoodle) { return false; @@ -523,19 +523,6 @@ bool isNoodleable(const vector &lits, return false; } - if (stream_control) { // nullptr if in block mode - if (lits.front().s.length() > stream_control->history_max + 1) { - DEBUG_PRINTF("length of %zu too long for history max %zu\n", - lits.front().s.length(), - stream_control->history_max); - return false; - } - if (2 * lits.front().s.length() - 2 > FDR_TEMP_BUF_SIZE) { - assert(0); - return false; - } - } - if (!lits.front().msk.empty()) { DEBUG_PRINTF("noodle can't handle supplementary masks\n"); return false; @@ -545,22 +532,11 @@ bool isNoodleable(const vector &lits, } aligned_unique_ptr hwlmBuild(const vector &lits, - hwlmStreamingControl *stream_control, bool make_small, const CompileContext &cc, hwlm_group_t expected_groups) { assert(!lits.empty()); dumpLits(lits); - if (stream_control) { - assert(stream_control->history_min <= stream_control->history_max); - - // We should not have been passed any literals that are too long to - // match with a maximally-sized history buffer. - assert(all_of(begin(lits), end(lits), [&](const hwlmLiteral &lit) { - return lit.s.length() <= stream_control->history_max + 1; - })); - } - // Check that we haven't exceeded the maximum number of literals. if (lits.size() > cc.grey.limitLiteralCount) { throw ResourceLimitError(); @@ -595,7 +571,7 @@ aligned_unique_ptr hwlmBuild(const vector &lits, assert(everyoneHasGroups(lits)); - if (isNoodleable(lits, stream_control, cc)) { + if (isNoodleable(lits, cc)) { DEBUG_PRINTF("build noodle table\n"); engType = HWLM_ENGINE_NOOD; const hwlmLiteral &lit = lits.front(); @@ -603,19 +579,11 @@ aligned_unique_ptr hwlmBuild(const vector &lits, if (noodle) { engSize = noodSize(noodle.get()); } - if (stream_control) { - // For now, a single literal still goes to noodle and asks - // for a great big history - stream_control->literal_history_required = lit.s.length() - 1; - assert(stream_control->literal_history_required - <= stream_control->history_max); - } eng = move(noodle); } else { DEBUG_PRINTF("building a new deal\n"); engType = HWLM_ENGINE_FDR; - auto fdr = fdrBuildTable(lits, make_small, cc.target_info, cc.grey, - stream_control); + auto fdr = fdrBuildTable(lits, make_small, cc.target_info, cc.grey); if (fdr) { engSize = fdrSize(fdr.get()); } @@ -640,14 +608,6 @@ aligned_unique_ptr hwlmBuild(const vector &lits, buildForwardAccel(h.get(), lits, expected_groups); } - if (stream_control) { - DEBUG_PRINTF("requires %zu (of max %zu) bytes of history\n", - stream_control->literal_history_required, - stream_control->history_max); - assert(stream_control->literal_history_required - <= stream_control->history_max); - } - return h; } diff --git a/src/hwlm/hwlm_build.h b/src/hwlm/hwlm_build.h index fbf359e6..5dd7dbc9 100644 --- a/src/hwlm/hwlm_build.h +++ b/src/hwlm/hwlm_build.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,7 +34,6 @@ #define HWLM_BUILD_H #include "hwlm.h" -#include "hwlm_literal.h" #include "ue2common.h" #include "util/alloc.h" @@ -47,30 +46,12 @@ namespace ue2 { struct CompileContext; struct Grey; -struct target_t; - -/** \brief Structure gathering together the input/output parameters related to - * streaming mode operation. */ -struct hwlmStreamingControl { - /** \brief IN parameter: Upper limit on the amount of history that can be - * requested. */ - size_t history_max; - - /** \brief IN parameter: History already known to be used before literal - * analysis. */ - size_t history_min; - - /** \brief OUT parameter: History required by the literal matcher to - * correctly match all literals. */ - size_t literal_history_required; -}; +struct hwlmLiteral; /** \brief Build an \ref HWLM literal matcher runtime structure for a group of * literals. * * \param lits The group of literals. - * \param stream_control Streaming control parameters. If the matcher will - * operate in non-streaming (block) mode, this pointer should be NULL. * \param make_small Optimise matcher for small size. * \param cc Compile context. * \param expected_groups FIXME: document me! @@ -80,8 +61,7 @@ struct hwlmStreamingControl { * thrown. */ aligned_unique_ptr -hwlmBuild(const std::vector &lits, - hwlmStreamingControl *stream_control, bool make_small, +hwlmBuild(const std::vector &lits, bool make_small, const CompileContext &cc, hwlm_group_t expected_groups = HWLM_ALL_GROUPS); diff --git a/src/hwlm/hwlm_literal.h b/src/hwlm/hwlm_literal.h index b7af99d3..a08b2ff6 100644 --- a/src/hwlm/hwlm_literal.h +++ b/src/hwlm/hwlm_literal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -42,7 +42,7 @@ namespace ue2 { /** \brief Max length of the literal passed to HWLM. */ -#define HWLM_LITERAL_MAX_LEN 255 +#define HWLM_LITERAL_MAX_LEN 8 /** \brief Max length of the hwlmLiteral::msk and hwlmLiteral::cmp vectors. */ #define HWLM_MASKLEN 8 diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index e883c239..1a5f25e9 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -1409,6 +1409,68 @@ int roseCheckLongLiteral(const struct RoseEngine *t, return 1; } +static rose_inline +int roseCheckMediumLiteral(const struct RoseEngine *t, + const struct hs_scratch *scratch, u64a end, + u32 lit_offset, u32 lit_length, char nocase) { + const struct core_info *ci = &scratch->core_info; + const u8 *lit = getByOffset(t, lit_offset); + + DEBUG_PRINTF("check lit at %llu, length %u\n", end, lit_length); + DEBUG_PRINTF("base buf_offset=%llu\n", ci->buf_offset); + + if (end < lit_length) { + DEBUG_PRINTF("too short!\n"); + return 0; + } + + // If any portion of the literal matched in the current buffer, check it. + if (end > ci->buf_offset) { + u32 scan_len = MIN(end - ci->buf_offset, lit_length); + u64a scan_start = end - ci->buf_offset - scan_len; + DEBUG_PRINTF("checking suffix (%u bytes) in buf[%llu:%llu]\n", scan_len, + scan_start, end); + if (cmpForward(ci->buf + scan_start, lit + lit_length - scan_len, + scan_len, nocase)) { + DEBUG_PRINTF("cmp of suffix failed\n"); + return 0; + } + } + + // If the entirety of the literal was in the current block, we are done. + if (end - lit_length >= ci->buf_offset) { + DEBUG_PRINTF("literal confirmed in current block\n"); + return 1; + } + + // We still have a prefix which we must test against the history buffer. + assert(t->mode != HS_MODE_BLOCK); + + u64a lit_start_offset = end - lit_length; + u32 prefix_len = MIN(lit_length, ci->buf_offset - lit_start_offset); + u32 hist_rewind = ci->buf_offset - lit_start_offset; + DEBUG_PRINTF("hlen=%zu, hist_rewind=%u\n", ci->hlen, hist_rewind); + + // History length check required for confirm in the EOD and delayed + // rebuild paths. + if (hist_rewind > ci->hlen) { + DEBUG_PRINTF("not enough history\n"); + return 0; + } + + DEBUG_PRINTF("check prefix len=%u from hist (len %zu, rewind %u)\n", + prefix_len, ci->hlen, hist_rewind); + assert(hist_rewind <= ci->hlen); + if (cmpForward(ci->hbuf + ci->hlen - hist_rewind, lit, prefix_len, + nocase)) { + DEBUG_PRINTF("cmp of prefix failed\n"); + return 0; + } + + DEBUG_PRINTF("cmp succeeded\n"); + return 1; +} + static void updateSeqPoint(struct RoseContext *tctxt, u64a offset, const char from_mpv) { @@ -2060,8 +2122,10 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, const char nocase = 0; if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset, ri->lit_length, nocase)) { - DEBUG_PRINTF("halt: failed long lit check\n"); - return HWLM_CONTINUE_MATCHING; + DEBUG_PRINTF("failed long lit check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; } } PROGRAM_NEXT_INSTRUCTION @@ -2070,8 +2134,34 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, const char nocase = 1; if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset, ri->lit_length, nocase)) { - DEBUG_PRINTF("halt: failed nocase long lit check\n"); - return HWLM_CONTINUE_MATCHING; + DEBUG_PRINTF("failed nocase long lit check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MED_LIT) { + const char nocase = 0; + if (!roseCheckMediumLiteral(t, scratch, end, ri->lit_offset, + ri->lit_length, nocase)) { + DEBUG_PRINTF("failed lit check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MED_LIT_NOCASE) { + const char nocase = 1; + if (!roseCheckMediumLiteral(t, scratch, end, ri->lit_offset, + ri->lit_length, nocase)) { + DEBUG_PRINTF("failed long lit check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; } } PROGRAM_NEXT_INSTRUCTION diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index edf3e5e9..9f978134 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -4353,13 +4353,18 @@ static void makeCheckLiteralInstruction(const RoseBuildImpl &build, const build_context &bc, u32 final_id, RoseProgram &program) { + assert(bc.longLitLengthThreshold > 0); + + DEBUG_PRINTF("final_id %u, long lit threshold %zu\n", final_id, + bc.longLitLengthThreshold); + const auto &lits = build.final_id_to_literal.at(final_id); if (lits.size() != 1) { - // Long literals should not share a final_id. + // final_id sharing is only allowed for literals that are short enough + // to not require any additional confirm work. assert(all_of(begin(lits), end(lits), [&](u32 lit_id) { const rose_literal_id &lit = build.literals.right.at(lit_id); - return lit.table != ROSE_FLOATING || - lit.s.length() <= bc.longLitLengthThreshold; + return lit.s.length() <= ROSE_SHORT_LITERAL_LEN_MAX; })); return; } @@ -4370,11 +4375,9 @@ void makeCheckLiteralInstruction(const RoseBuildImpl &build, } const rose_literal_id &lit = build.literals.right.at(lit_id); - if (lit.table != ROSE_FLOATING) { - return; - } - assert(bc.longLitLengthThreshold > 0); - if (lit.s.length() <= bc.longLitLengthThreshold) { + + if (lit.s.length() <= ROSE_SHORT_LITERAL_LEN_MAX) { + DEBUG_PRINTF("lit short enough to not need confirm\n"); return; } @@ -4383,11 +4386,34 @@ void makeCheckLiteralInstruction(const RoseBuildImpl &build, throw ResourceLimitError(); } + if (lit.s.length() <= bc.longLitLengthThreshold) { + DEBUG_PRINTF("is a medium-length literal\n"); + const auto *end_inst = program.end_instruction(); + unique_ptr ri; + if (lit.s.any_nocase()) { + ri = make_unique(lit.s.get_string(), + end_inst); + } else { + ri = make_unique(lit.s.get_string(), + end_inst); + } + program.add_before_end(move(ri)); + return; + } + + // Long literal support should only really be used for the floating table + // in streaming mode. + assert(lit.table == ROSE_FLOATING && build.cc.streaming); + + DEBUG_PRINTF("is a long literal\n"); + + const auto *end_inst = program.end_instruction(); unique_ptr ri; if (lit.s.any_nocase()) { - ri = make_unique(lit.s.get_string()); + ri = make_unique(lit.s.get_string(), + end_inst); } else { - ri = make_unique(lit.s.get_string()); + ri = make_unique(lit.s.get_string(), end_inst); } program.add_before_end(move(ri)); } @@ -4522,6 +4548,7 @@ u32 buildDelayRebuildProgram(RoseBuildImpl &build, build_context &bc, } RoseProgram program; + makeCheckLiteralInstruction(build, bc, final_id, program); makeCheckLitMaskInstruction(build, bc, final_id, program); makePushDelayedInstructions(build, final_id, program); assert(!program.empty()); @@ -4951,7 +4978,7 @@ u32 buildEagerQueueIter(const set &eager, u32 leftfixBeginQueue, static void allocateFinalIdToSet(RoseBuildImpl &build, const set &lits, - size_t longLitLengthThreshold, u32 *next_final_id) { + u32 *next_final_id) { const auto &g = build.g; auto &literal_info = build.literal_info; auto &final_id_to_literal = build.final_id_to_literal; @@ -4961,8 +4988,6 @@ void allocateFinalIdToSet(RoseBuildImpl &build, const set &lits, * ids and squash the same roles and have the same group squashing * behaviour. Benefits literals cannot be merged. */ - assert(longLitLengthThreshold > 0); - for (u32 int_id : lits) { rose_literal_info &curr_info = literal_info[int_id]; const rose_literal_id &lit = build.literals.right.at(int_id); @@ -4974,10 +4999,10 @@ void allocateFinalIdToSet(RoseBuildImpl &build, const set &lits, goto assign_new_id; } - // Long literals (that require CHECK_LONG_LIT instructions) cannot be - // merged. - if (lit.s.length() > longLitLengthThreshold) { - DEBUG_PRINTF("id %u is a long literal\n", int_id); + // Literals that need confirmation with CHECK_LONG_LIT or CHECK_MED_LIT + // cannot be merged. + if (lit.s.length() > ROSE_SHORT_LITERAL_LEN_MAX) { + DEBUG_PRINTF("id %u needs lit confirm\n", int_id); goto assign_new_id; } @@ -5001,7 +5026,7 @@ void allocateFinalIdToSet(RoseBuildImpl &build, const set &lits, const auto &cand_info = literal_info[cand_id]; const auto &cand_lit = build.literals.right.at(cand_id); - if (cand_lit.s.length() > longLitLengthThreshold) { + if (cand_lit.s.length() > ROSE_SHORT_LITERAL_LEN_MAX) { continue; } @@ -5071,8 +5096,7 @@ bool isUsedLiteral(const RoseBuildImpl &build, u32 lit_id) { /** \brief Allocate final literal IDs for all literals. */ static -void allocateFinalLiteralId(RoseBuildImpl &build, - size_t longLitLengthThreshold) { +void allocateFinalLiteralId(RoseBuildImpl &build) { set anch; set norm; set delay; @@ -5106,15 +5130,15 @@ void allocateFinalLiteralId(RoseBuildImpl &build, } /* normal lits */ - allocateFinalIdToSet(build, norm, longLitLengthThreshold, &next_final_id); + allocateFinalIdToSet(build, norm, &next_final_id); /* next anchored stuff */ build.anchored_base_id = next_final_id; - allocateFinalIdToSet(build, anch, longLitLengthThreshold, &next_final_id); + allocateFinalIdToSet(build, anch, &next_final_id); /* delayed ids come last */ build.delay_base_id = next_final_id; - allocateFinalIdToSet(build, delay, longLitLengthThreshold, &next_final_id); + allocateFinalIdToSet(build, delay, &next_final_id); } static @@ -5188,10 +5212,11 @@ size_t calcLongLitThreshold(const RoseBuildImpl &build, const size_t historyRequired) { const auto &cc = build.cc; - // In block mode, we should only use the long literal support for literals - // that cannot be handled by HWLM. + // In block mode, we don't have history, so we don't need long literal + // support and can just use "medium-length" literal confirm. TODO: we could + // specialize further and have a block mode literal confirm instruction. if (!cc.streaming) { - return HWLM_LITERAL_MAX_LEN; + return SIZE_MAX; } size_t longLitLengthThreshold = ROSE_LONG_LITERAL_THRESHOLD_MIN; @@ -5227,7 +5252,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { historyRequired); DEBUG_PRINTF("longLitLengthThreshold=%zu\n", longLitLengthThreshold); - allocateFinalLiteralId(*this, longLitLengthThreshold); + allocateFinalLiteralId(*this); auto anchored_dfas = buildAnchoredDfas(*this); diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index 9b8ea7f7..0505a71e 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -123,7 +123,7 @@ void RoseBuildImpl::handleMixedSensitivity(void) { // with a CHECK_LONG_LIT instruction and need unique final_ids. // TODO: we could allow explosion for literals where the prefixes // covered by CHECK_LONG_LIT are identical. - if (lit.s.length() <= ROSE_LONG_LITERAL_THRESHOLD_MIN && + if (lit.s.length() <= ROSE_SHORT_LITERAL_LEN_MAX && limited_explosion(lit.s)) { DEBUG_PRINTF("need to explode existing string '%s'\n", dumpString(lit.s).c_str()); diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index 67740312..e7cef100 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -35,7 +35,7 @@ #include "rose/rose_dump.h" #include "rose_internal.h" #include "ue2common.h" -#include "hwlm/hwlm_build.h" +#include "hwlm/hwlm_literal.h" #include "nfa/castlecompile.h" #include "nfa/nfa_internal.h" #include "nfagraph/ng_dump.h" @@ -505,24 +505,22 @@ void dumpRoseTestLiterals(const RoseBuildImpl &build, const string &base) { size_t longLitLengthThreshold = calcLongLitThreshold(build, historyRequired); - auto lits = fillHamsterLiteralList(build, ROSE_ANCHORED, - longLitLengthThreshold); - dumpTestLiterals(base + "rose_anchored_test_literals.txt", lits); + auto mp = makeMatcherProto(build, ROSE_ANCHORED, longLitLengthThreshold); + dumpTestLiterals(base + "rose_anchored_test_literals.txt", mp.lits); - lits = fillHamsterLiteralList(build, ROSE_FLOATING, longLitLengthThreshold); - dumpTestLiterals(base + "rose_float_test_literals.txt", lits); + mp = makeMatcherProto(build, ROSE_FLOATING, longLitLengthThreshold); + dumpTestLiterals(base + "rose_float_test_literals.txt", mp.lits); - lits = fillHamsterLiteralList(build, ROSE_EOD_ANCHORED, - build.ematcher_region_size); - dumpTestLiterals(base + "rose_eod_test_literals.txt", lits); + mp = makeMatcherProto(build, ROSE_EOD_ANCHORED, build.ematcher_region_size); + dumpTestLiterals(base + "rose_eod_test_literals.txt", mp.lits); if (!build.cc.streaming) { - lits = fillHamsterLiteralList(build, ROSE_FLOATING, + mp = makeMatcherProto(build, ROSE_FLOATING, ROSE_SMALL_BLOCK_LEN, + ROSE_SMALL_BLOCK_LEN); + auto mp2 = makeMatcherProto(build, ROSE_ANCHORED_SMALL_BLOCK, ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); - auto lits2 = fillHamsterLiteralList(build, ROSE_ANCHORED_SMALL_BLOCK, - ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); - lits.insert(end(lits), begin(lits2), end(lits2)); - dumpTestLiterals(base + "rose_smallblock_test_literals.txt", lits); + mp.lits.insert(end(mp.lits), begin(mp2.lits), end(mp2.lits)); + dumpTestLiterals(base + "rose_smallblock_test_literals.txt", mp.lits); } } diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index 6b326d34..02c5a389 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -58,6 +58,17 @@ namespace ue2 { #define ROSE_LONG_LITERAL_THRESHOLD_MIN 33 +/** + * \brief The largest allowable "short" literal fragment which can be given to + * a literal matcher directly. + * + * Literals longer than this will be truncated to their suffix and confirmed in + * the Rose interpreter, either as "medium length" literals which can be + * confirmed from history, or "long literals" which make use of the streaming + * table support. + */ +#define ROSE_SHORT_LITERAL_LEN_MAX 8 + struct BoundaryReports; struct CastleProto; struct CompileContext; diff --git a/src/rose/rose_build_matchers.cpp b/src/rose/rose_build_matchers.cpp index 01633c06..f7c237a7 100644 --- a/src/rose/rose_build_matchers.cpp +++ b/src/rose/rose_build_matchers.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -491,8 +491,14 @@ bool isNoRunsLiteral(const RoseBuildImpl &build, const u32 id, return false; } - if (build.literals.right.at(id).s.length() > max_len) { - DEBUG_PRINTF("requires literal check\n"); + size_t len = build.literals.right.at(id).s.length(); + if (len > max_len) { + DEBUG_PRINTF("long literal, requires confirm\n"); + return false; + } + + if (len > ROSE_SHORT_LITERAL_LEN_MAX) { + DEBUG_PRINTF("medium-length literal, requires confirm\n"); return false; } @@ -626,10 +632,10 @@ u64a literalMinReportOffset(const RoseBuildImpl &build, return lit_min_offset; } -vector fillHamsterLiteralList(const RoseBuildImpl &build, - rose_literal_table table, - size_t max_len, u32 max_offset) { - vector lits; +MatcherProto makeMatcherProto(const RoseBuildImpl &build, + rose_literal_table table, size_t max_len, + u32 max_offset) { + MatcherProto mp; for (const auto &e : build.literals.right) { const u32 id = e.first; @@ -652,7 +658,8 @@ vector fillHamsterLiteralList(const RoseBuildImpl &build, /* Note: requires_benefits are handled in the literal entries */ const ue2_literal &lit = e.second.s; - DEBUG_PRINTF("lit='%s'\n", escapeString(lit).c_str()); + DEBUG_PRINTF("lit='%s' (len %zu)\n", escapeString(lit).c_str(), + lit.length()); if (max_offset != ROSE_BOUND_INF) { u64a min_report = literalMinReportOffset(build, e.second, info); @@ -665,14 +672,22 @@ vector fillHamsterLiteralList(const RoseBuildImpl &build, const vector &msk = e.second.msk; const vector &cmp = e.second.cmp; - bool noruns = isNoRunsLiteral(build, id, info, max_len); + size_t lit_hist_len = 0; + if (build.cc.streaming) { + lit_hist_len = max(msk.size(), min(lit.length(), max_len)); + lit_hist_len = lit_hist_len ? lit_hist_len - 1 : 0; + } + DEBUG_PRINTF("lit requires %zu bytes of history\n", lit_hist_len); + assert(lit_hist_len <= build.cc.grey.maxHistoryAvailable); + if (info.requires_explode) { DEBUG_PRINTF("exploding lit\n"); - // We do not require_explode for long literals. - assert(lit.length() <= max_len); + // We do not require_explode for literals that need confirm + // (long/medium length literals). + assert(lit.length() <= ROSE_SHORT_LITERAL_LEN_MAX); case_iter cit = caseIterateBegin(lit); case_iter cite = caseIterateEnd(); @@ -690,8 +705,9 @@ vector fillHamsterLiteralList(const RoseBuildImpl &build, continue; } - lits.emplace_back(move(s), nocase, noruns, final_id, groups, - msk, cmp); + mp.history_required = max(mp.history_required, lit_hist_len); + mp.lits.emplace_back(move(s), nocase, noruns, final_id, groups, + msk, cmp); } } else { string s = lit.get_string(); @@ -702,11 +718,13 @@ vector fillHamsterLiteralList(const RoseBuildImpl &build, final_id, escapeString(s).c_str(), (int)nocase, noruns, dumpMask(msk).c_str(), dumpMask(cmp).c_str()); - if (s.length() > max_len) { - DEBUG_PRINTF("truncating to tail of length %zu\n", max_len); - s.erase(0, s.length() - max_len); + if (s.length() > ROSE_SHORT_LITERAL_LEN_MAX) { + DEBUG_PRINTF("truncating to tail of length %zu\n", + size_t{ROSE_SHORT_LITERAL_LEN_MAX}); + s.erase(0, s.length() - ROSE_SHORT_LITERAL_LEN_MAX); // We shouldn't have set a threshold below 8 chars. - assert(msk.size() <= max_len); + assert(msk.size() <= ROSE_SHORT_LITERAL_LEN_MAX); + assert(!noruns); } if (!maskIsConsistent(s, nocase, msk, cmp)) { @@ -714,12 +732,13 @@ vector fillHamsterLiteralList(const RoseBuildImpl &build, continue; } - lits.emplace_back(move(s), nocase, noruns, final_id, groups, msk, - cmp); + mp.history_required = max(mp.history_required, lit_hist_len); + mp.lits.emplace_back(move(s), nocase, noruns, final_id, groups, msk, + cmp); } } - return lits; + return mp; } aligned_unique_ptr buildFloatingMatcher(const RoseBuildImpl &build, @@ -730,49 +749,31 @@ aligned_unique_ptr buildFloatingMatcher(const RoseBuildImpl &build, *fsize = 0; *fgroups = 0; - auto fl = fillHamsterLiteralList(build, ROSE_FLOATING, - longLitLengthThreshold); - if (fl.empty()) { + auto mp = makeMatcherProto(build, ROSE_FLOATING, longLitLengthThreshold); + if (mp.lits.empty()) { DEBUG_PRINTF("empty floating matcher\n"); return nullptr; } - for (const hwlmLiteral &hlit : fl) { - *fgroups |= hlit.groups; + for (const hwlmLiteral &lit : mp.lits) { + *fgroups |= lit.groups; } - hwlmStreamingControl ctl; - hwlmStreamingControl *ctlp; - if (build.cc.streaming) { - ctl.history_max = build.cc.grey.maxHistoryAvailable; - ctl.history_min = MAX(*historyRequired, - build.cc.grey.minHistoryAvailable); - DEBUG_PRINTF("streaming control, history max=%zu, min=%zu\n", - ctl.history_max, ctl.history_min); - ctlp = &ctl; - } else { - ctlp = nullptr; // Null for non-streaming. - } - - aligned_unique_ptr ftable = - hwlmBuild(fl, ctlp, false, build.cc, build.getInitialGroups()); - if (!ftable) { + auto hwlm = hwlmBuild(mp.lits, false, build.cc, build.getInitialGroups()); + if (!hwlm) { throw CompileError("Unable to generate bytecode."); } if (build.cc.streaming) { - DEBUG_PRINTF("literal_history_required=%zu\n", - ctl.literal_history_required); - assert(ctl.literal_history_required <= - build.cc.grey.maxHistoryAvailable); - *historyRequired = max(*historyRequired, - ctl.literal_history_required); + DEBUG_PRINTF("history_required=%zu\n", mp.history_required); + assert(mp.history_required <= build.cc.grey.maxHistoryAvailable); + *historyRequired = max(*historyRequired, mp.history_required); } - *fsize = hwlmSize(ftable.get()); + *fsize = hwlmSize(hwlm.get()); assert(*fsize); DEBUG_PRINTF("built floating literal table size %zu bytes\n", *fsize); - return ftable; + return hwlm; } aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, @@ -791,38 +792,38 @@ aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, return nullptr; } - auto lits = fillHamsterLiteralList( - build, ROSE_FLOATING, ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); - if (lits.empty()) { + auto mp = makeMatcherProto(build, ROSE_FLOATING, ROSE_SMALL_BLOCK_LEN, + ROSE_SMALL_BLOCK_LEN); + if (mp.lits.empty()) { DEBUG_PRINTF("no floating table\n"); return nullptr; - } else if (lits.size() == 1) { + } else if (mp.lits.size() == 1) { DEBUG_PRINTF("single floating literal, noodle will be fast enough\n"); return nullptr; } - auto anchored_lits = - fillHamsterLiteralList(build, ROSE_ANCHORED_SMALL_BLOCK, - ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); - if (anchored_lits.empty()) { + auto mp_anchored = + makeMatcherProto(build, ROSE_ANCHORED_SMALL_BLOCK, ROSE_SMALL_BLOCK_LEN, + ROSE_SMALL_BLOCK_LEN); + if (mp_anchored.lits.empty()) { DEBUG_PRINTF("no small-block anchored literals\n"); return nullptr; } - lits.insert(lits.end(), anchored_lits.begin(), anchored_lits.end()); + mp.lits.insert(mp.lits.end(), mp_anchored.lits.begin(), + mp_anchored.lits.end()); // None of our literals should be longer than the small block limit. - assert(all_of(begin(lits), end(lits), [](const hwlmLiteral &lit) { + assert(all_of(begin(mp.lits), end(mp.lits), [](const hwlmLiteral &lit) { return lit.s.length() <= ROSE_SMALL_BLOCK_LEN; })); - if (lits.empty()) { + if (mp.lits.empty()) { DEBUG_PRINTF("no literals shorter than small block len\n"); return nullptr; } - aligned_unique_ptr hwlm = - hwlmBuild(lits, nullptr, true, build.cc, build.getInitialGroups()); + auto hwlm = hwlmBuild(mp.lits, true, build.cc, build.getInitialGroups()); if (!hwlm) { throw CompileError("Unable to generate bytecode."); } @@ -837,10 +838,10 @@ aligned_unique_ptr buildEodAnchoredMatcher(const RoseBuildImpl &build, size_t *esize) { *esize = 0; - auto el = fillHamsterLiteralList(build, ROSE_EOD_ANCHORED, - build.ematcher_region_size); + auto mp = + makeMatcherProto(build, ROSE_EOD_ANCHORED, build.ematcher_region_size); - if (el.empty()) { + if (mp.lits.empty()) { DEBUG_PRINTF("no eod anchored literals\n"); assert(!build.ematcher_region_size); return nullptr; @@ -848,17 +849,15 @@ aligned_unique_ptr buildEodAnchoredMatcher(const RoseBuildImpl &build, assert(build.ematcher_region_size); - hwlmStreamingControl *ctlp = nullptr; // not a streaming case - aligned_unique_ptr etable = - hwlmBuild(el, ctlp, true, build.cc, build.getInitialGroups()); - if (!etable) { + auto hwlm = hwlmBuild(mp.lits, true, build.cc, build.getInitialGroups()); + if (!hwlm) { throw CompileError("Unable to generate bytecode."); } - *esize = hwlmSize(etable.get()); + *esize = hwlmSize(hwlm.get()); assert(*esize); DEBUG_PRINTF("built eod-anchored literal table size %zu bytes\n", *esize); - return etable; + return hwlm; } } // namespace ue2 diff --git a/src/rose/rose_build_matchers.h b/src/rose/rose_build_matchers.h index a25dbca3..15ccf278 100644 --- a/src/rose/rose_build_matchers.h +++ b/src/rose/rose_build_matchers.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -44,15 +44,21 @@ namespace ue2 { struct hwlmLiteral; +struct MatcherProto { + std::vector lits; + size_t history_required = 0; +}; + /** - * \brief Build up a vector of literals for the given table. + * \brief Build up a vector of literals (and associated other data) for the + * given table. * * If max_offset is specified (and not ROSE_BOUND_INF), then literals that can * only lead to a pattern match after max_offset may be excluded. */ -std::vector fillHamsterLiteralList(const RoseBuildImpl &build, - rose_literal_table table, size_t max_len, - u32 max_offset = ROSE_BOUND_INF); +MatcherProto makeMatcherProto(const RoseBuildImpl &build, + rose_literal_table table, size_t max_len, + u32 max_offset = ROSE_BOUND_INF); aligned_unique_ptr buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold, diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index 28b885bd..97579111 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -28,7 +28,7 @@ #include "rose_build_impl.h" -#include "hwlm/hwlm_build.h" +#include "hwlm/hwlm_literal.h" #include "nfa/castlecompile.h" #include "nfa/goughcompile.h" #include "nfa/mcclellancompile_util.h" diff --git a/src/rose/rose_build_program.cpp b/src/rose/rose_build_program.cpp index ee237639..5f7ab0bf 100644 --- a/src/rose/rose_build_program.cpp +++ b/src/rose/rose_build_program.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -502,6 +502,7 @@ void RoseInstrCheckLongLit::write(void *dest, RoseEngineBlob &blob, assert(!literal.empty()); inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1); inst->lit_length = verify_u32(literal.size()); + inst->fail_jump = calc_jump(offset_map, this, target); } void RoseInstrCheckLongLitNocase::write(void *dest, RoseEngineBlob &blob, @@ -511,6 +512,27 @@ void RoseInstrCheckLongLitNocase::write(void *dest, RoseEngineBlob &blob, assert(!literal.empty()); inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1); inst->lit_length = verify_u32(literal.size()); + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMedLit::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + assert(!literal.empty()); + inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1); + inst->lit_length = verify_u32(literal.size()); + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMedLitNocase::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + assert(!literal.empty()); + inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1); + inst->lit_length = verify_u32(literal.size()); + inst->fail_jump = calc_jump(offset_map, this, target); } static diff --git a/src/rose/rose_build_program.h b/src/rose/rose_build_program.h index 39e2e23c..440bf4e1 100644 --- a/src/rose/rose_build_program.h +++ b/src/rose/rose_build_program.h @@ -1723,17 +1723,19 @@ public: }; class RoseInstrCheckLongLit - : public RoseInstrBaseNoTargets { public: std::string literal; + const RoseInstruction *target; - explicit RoseInstrCheckLongLit(std::string literal_in) - : literal(std::move(literal_in)) {} + RoseInstrCheckLongLit(std::string literal_in, + const RoseInstruction *target_in) + : literal(std::move(literal_in)), target(target_in) {} bool operator==(const RoseInstrCheckLongLit &ri) const { - return literal == ri.literal; + return literal == ri.literal && target == ri.target; } size_t hash() const override { @@ -1743,26 +1745,29 @@ public: void write(void *dest, RoseEngineBlob &blob, const OffsetMap &offset_map) const override; - bool equiv_to(const RoseInstrCheckLongLit &ri, const OffsetMap &, - const OffsetMap &) const { - return literal == ri.literal; + bool equiv_to(const RoseInstrCheckLongLit &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return literal == ri.literal && + offsets.at(target) == other_offsets.at(ri.target); } }; class RoseInstrCheckLongLitNocase - : public RoseInstrBaseNoTargets { public: std::string literal; + const RoseInstruction *target; - explicit RoseInstrCheckLongLitNocase(std::string literal_in) - : literal(std::move(literal_in)) { + RoseInstrCheckLongLitNocase(std::string literal_in, + const RoseInstruction *target_in) + : literal(std::move(literal_in)), target(target_in) { upperString(literal); } bool operator==(const RoseInstrCheckLongLitNocase &ri) const { - return literal == ri.literal; + return literal == ri.literal && target == ri.target; } size_t hash() const override { @@ -1772,9 +1777,74 @@ public: void write(void *dest, RoseEngineBlob &blob, const OffsetMap &offset_map) const override; - bool equiv_to(const RoseInstrCheckLongLitNocase &ri, const OffsetMap &, - const OffsetMap &) const { - return literal == ri.literal; + bool equiv_to(const RoseInstrCheckLongLitNocase &ri, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return literal == ri.literal && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMedLit + : public RoseInstrBaseNoTargets { +public: + std::string literal; + const RoseInstruction *target; + + explicit RoseInstrCheckMedLit(std::string literal_in, + const RoseInstruction *target_in) + : literal(std::move(literal_in)), target(target_in) {} + + bool operator==(const RoseInstrCheckMedLit &ri) const { + return literal == ri.literal && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), literal); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMedLit &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return literal == ri.literal && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMedLitNocase + : public RoseInstrBaseNoTargets { +public: + std::string literal; + const RoseInstruction *target; + + explicit RoseInstrCheckMedLitNocase(std::string literal_in, + const RoseInstruction *target_in) + : literal(std::move(literal_in)), target(target_in) { + upperString(literal); + } + + bool operator==(const RoseInstrCheckMedLitNocase &ri) const { + return literal == ri.literal && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), literal); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMedLitNocase &ri, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return literal == ri.literal && + offsets.at(target) == other_offsets.at(ri.target); } }; diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index 1867be50..5d79da2e 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -617,6 +617,7 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { const char *lit = (const char *)t + ri->lit_offset; os << " literal: \"" << escapeString(string(lit, ri->lit_length)) << "\"" << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION @@ -626,6 +627,27 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { const char *lit = (const char *)t + ri->lit_offset; os << " literal: \"" << escapeString(string(lit, ri->lit_length)) << "\"" << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MED_LIT) { + os << " lit_offset " << ri->lit_offset << endl; + os << " lit_length " << ri->lit_length << endl; + const char *lit = (const char *)t + ri->lit_offset; + os << " literal: \"" + << escapeString(string(lit, ri->lit_length)) << "\"" << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MED_LIT_NOCASE) { + os << " lit_offset " << ri->lit_offset << endl; + os << " lit_length " << ri->lit_length << endl; + const char *lit = (const char *)t + ri->lit_offset; + os << " literal: \"" + << escapeString(string(lit, ri->lit_length)) << "\"" << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index ed913316..c5ddc942 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -129,7 +129,19 @@ enum RoseInstructionCode { */ ROSE_INSTR_CHECK_LONG_LIT_NOCASE, - LAST_ROSE_INSTRUCTION = ROSE_INSTR_CHECK_LONG_LIT_NOCASE //!< Sentinel. + /** + * \brief Confirm a case-sensitive "medium length" literal at the current + * offset. In streaming mode, this will check history if needed. + */ + ROSE_INSTR_CHECK_MED_LIT, + + /** + * \brief Confirm a case-insensitive "medium length" literal at the current + * offset. In streaming mode, this will check history if needed. + */ + ROSE_INSTR_CHECK_MED_LIT_NOCASE, + + LAST_ROSE_INSTRUCTION = ROSE_INSTR_CHECK_MED_LIT_NOCASE //!< Sentinel. }; struct ROSE_STRUCT_END { @@ -477,18 +489,32 @@ struct ROSE_STRUCT_MATCHER_EOD { u8 code; //!< From enum RoseInstructionCode. }; -/** Note: check failure will halt program. */ struct ROSE_STRUCT_CHECK_LONG_LIT { u8 code; //!< From enum RoseInstructionCode. u32 lit_offset; //!< Offset of literal string. u32 lit_length; //!< Length of literal string. + u32 fail_jump; //!< Jump forward this many bytes on failure. }; -/** Note: check failure will halt program. */ struct ROSE_STRUCT_CHECK_LONG_LIT_NOCASE { u8 code; //!< From enum RoseInstructionCode. u32 lit_offset; //!< Offset of literal string. u32 lit_length; //!< Length of literal string. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_MED_LIT { + u8 code; //!< From enum RoseInstructionCode. + u32 lit_offset; //!< Offset of literal string. + u32 lit_length; //!< Length of literal string. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_MED_LIT_NOCASE { + u8 code; //!< From enum RoseInstructionCode. + u32 lit_offset; //!< Offset of literal string. + u32 lit_length; //!< Length of literal string. + u32 fail_jump; //!< Jump forward this many bytes on failure. }; #endif // ROSE_ROSE_PROGRAM_H diff --git a/unit/internal/fdr.cpp b/unit/internal/fdr.cpp index 6116bfdb..8ec72598 100644 --- a/unit/internal/fdr.cpp +++ b/unit/internal/fdr.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -96,15 +96,6 @@ struct match { }; extern "C" { -static -hwlmcb_rv_t countCallback(UNUSED size_t start, UNUSED size_t end, u32, - void *ctxt) { - if (ctxt) { - ++*(u32 *)ctxt; - } - - return HWLM_CONTINUE_MATCHING; -} static hwlmcb_rv_t decentCallback(size_t start, size_t end, u32 id, void *ctxt) { @@ -231,42 +222,6 @@ TEST_P(FDRp, MultiLocation) { } } -TEST_P(FDRp, Flood) { - const u32 hint = GetParam(); - SCOPED_TRACE(hint); - - vector lits; - lits.push_back(hwlmLiteral("aaaa", 0, 1)); - lits.push_back(hwlmLiteral("aaaaaaaa", 0, 2)); - lits.push_back(hwlmLiteral("baaaaaaaa", 0, 3)); - lits.push_back(hwlmLiteral("aaaaaaaab", 0, 4)); - - auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey()); - CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); - - const u32 testSize = 1024; - vector data(testSize, 'a'); - - vector matches; - fdrExec(fdr.get(), data.data(), testSize, 0, decentCallback, &matches, - HWLM_ALL_GROUPS); - ASSERT_EQ(testSize - 3 + testSize - 7, matches.size()); - EXPECT_EQ(match(0, 3, 1), matches[0]); - EXPECT_EQ(match(1, 4, 1), matches[1]); - EXPECT_EQ(match(2, 5, 1), matches[2]); - EXPECT_EQ(match(3, 6, 1), matches[3]); - - u32 currentMatch = 4; - for (u32 i = 7; i < testSize; i++, currentMatch += 2) { - EXPECT_TRUE( - (match(i - 3, i, 1) == matches[currentMatch] && - match(i - 7, i, 2) == matches[currentMatch+1]) || - (match(i - 7, i, 2) == matches[currentMatch+1] && - match(i - 3, i, 1) == matches[currentMatch]) - ); - } -} - TEST_P(FDRp, NoRepeat1) { const u32 hint = GetParam(); SCOPED_TRACE(hint); @@ -414,36 +369,6 @@ TEST_P(FDRp, SmallStreaming2) { ASSERT_EQ(expected.size(), matches.size()); } -TEST_P(FDRp, LongLiteral) { - const u32 hint = GetParam(); - SCOPED_TRACE(hint); - size_t sz; - const u8 *data; - vector lits; - - string alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; - string alpha4 = alpha+alpha+alpha+alpha; - lits.push_back(hwlmLiteral(alpha4.c_str(), 0,10)); - - auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey()); - CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); - - u32 count = 0; - - data = (const u8 *)alpha4.c_str(); - sz = alpha4.size(); - - fdrExec(fdr.get(), data, sz, 0, countCallback, &count, HWLM_ALL_GROUPS); - EXPECT_EQ(1U, count); - count = 0; - fdrExec(fdr.get(), data, sz - 1, 0, countCallback, &count, HWLM_ALL_GROUPS); - EXPECT_EQ(0U, count); - count = 0; - fdrExec(fdr.get(), data + 1, sz - 1, 0, countCallback, &count, - HWLM_ALL_GROUPS); - EXPECT_EQ(0U, count); -} - TEST_P(FDRp, moveByteStream) { const u32 hint = GetParam(); SCOPED_TRACE(hint); @@ -491,7 +416,7 @@ TEST_P(FDRp, Stream1) { vector lits; lits.push_back(hwlmLiteral("f", 0, 0)); - lits.push_back(hwlmLiteral("longsigislong", 0, 1)); + lits.push_back(hwlmLiteral("literal", 0, 1)); auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey()); CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); @@ -514,7 +439,7 @@ INSTANTIATE_TEST_CASE_P(FDR, FDRp, ValuesIn(getValidFdrEngines())); typedef struct { string pattern; - unsigned char alien; + unsigned char alien; // character not present in pattern } pattern_alien_t; // gtest helper @@ -529,7 +454,6 @@ class FDRpp : public TestWithParam> {}; // not happen if literal is partially (from 1 character up to full literal // length) is out of searched buffer - "too early" and "too late" conditions TEST_P(FDRpp, AlignAndTooEarly) { - const size_t buf_alignment = 32; // Buffer should be big enough to hold two instances of matching literals // (up to 64 bytes each) and room for offset (up to 32 bytes) @@ -538,7 +462,7 @@ TEST_P(FDRpp, AlignAndTooEarly) { const u32 hint = get<0>(GetParam()); SCOPED_TRACE(hint); - // pattern which is used to generate literals of variable size - from 1 to 64 + // pattern which is used to generate literals of variable size - from 1 to 8 const string &pattern = get<1>(GetParam()).pattern; const size_t patLen = pattern.size(); const unsigned char alien = get<1>(GetParam()).alien; @@ -551,7 +475,7 @@ TEST_P(FDRpp, AlignAndTooEarly) { vector lits; for (size_t litLen = 1; litLen <= patLen; litLen++) { - // building literal from pattern substring of variable length 1-64 + // building literal from pattern substring of variable length 1-patLen lits.push_back(hwlmLiteral(string(pattern, 0, litLen), 0, 0)); auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey()); @@ -596,9 +520,9 @@ TEST_P(FDRpp, AlignAndTooEarly) { } static const pattern_alien_t test_pattern[] = { - {"abaabaaabaaabbaaaaabaaaaabbaaaaaaabaabbaaaabaaaaaaaabbbbaaaaaaab", 'x'}, - {"zzzyyzyzyyyyzyyyyyzzzzyyyyyyyyzyyyyyyyzzzzzyzzzzzzzzzyzzyzzzzzzz", (unsigned char)'\x99'}, - {"abcdef lafjk askldfjklf alfqwei9rui 'gldgkjnooiuswfs138746453583", '\0'} + {"abaabaaa", 'x'}, + {"zzzyyzyz", (unsigned char)'\x99'}, + {"abcdef l", '\0'} }; INSTANTIATE_TEST_CASE_P(FDR, FDRpp, Combine(ValuesIn(getValidFdrEngines()), diff --git a/unit/internal/fdr_flood.cpp b/unit/internal/fdr_flood.cpp index 7b00ac4c..952fffc1 100644 --- a/unit/internal/fdr_flood.cpp +++ b/unit/internal/fdr_flood.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -161,8 +161,8 @@ TEST_P(FDRFloodp, NoMask) { vector lits; // build literals of type "aaaa", "aaab", "baaa" - // of lengths 1, 2, 4, 8, 16, 32, both case-less and case-sensitive - for (int i = 0; i < 6 ; i++) { + // of lengths 1, 2, 4, 8, both case-less and case-sensitive + for (int i = 0; i < 4; i++) { string s(1 << i, c); lits.push_back(hwlmLiteral(s, false, i * 8 + 0)); s[0] = cAlt; @@ -183,13 +183,13 @@ TEST_P(FDRFloodp, NoMask) { Grey()); CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); - map matchesCounts; + map matchesCounts; hwlm_error_t fdrStatus = fdrExec(fdr.get(), &data[0], dataSize, 0, countCallback, (void *)&matchesCounts, HWLM_ALL_GROUPS); ASSERT_EQ(0, fdrStatus); - for (u8 i = 0; i < 6 ; i++) { + for (u8 i = 0; i < 4; i++) { u32 cnt = dataSize - (1 << i) + 1; ASSERT_EQ(cnt, matchesCounts[i * 8 + 0]); ASSERT_EQ(0, matchesCounts[i * 8 + 1]); @@ -214,7 +214,7 @@ TEST_P(FDRFloodp, NoMask) { 0, countCallback, (void *)&matchesCounts, HWLM_ALL_GROUPS); ASSERT_EQ(0, fdrStatus); - for (u8 i = 0; i < 6 ; i++) { + for (u8 i = 0; i < 4; i++) { u32 cnt = dataSize - (1 << i) + 1; ASSERT_EQ(0, matchesCounts[i * 8 + 0]); ASSERT_EQ(i == 0 ? cnt : 0, matchesCounts[i * 8 + 1]);