From bbd64f98ae49e34e6ce405f57b699201ed686b1c Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Tue, 31 Jan 2017 09:29:41 +1100 Subject: [PATCH] allow streams to marked as exhausted in more cases At stream boundaries, we can mark streams as exhausted if there are no groups active and there are no other ways to report matches. This allows us to stop maintaining the history buffer on subsequent stream writes. Previously, streams were only marked as exhausted if a pure highlander case reported all patterns or the outfix in a sole outfix case died. --- src/hwlm/hwlm.c | 7 ++- src/rose/rose_build_add.cpp | 4 +- src/rose/rose_build_bytecode.cpp | 14 ++++- src/rose/rose_build_compile.cpp | 8 ++- src/rose/rose_build_groups.cpp | 95 ++++++++++++++++++++++++++------ src/rose/rose_build_impl.h | 2 + src/rose/rose_build_misc.cpp | 7 +++ src/rose/stream.c | 38 ++++++++++++- src/runtime.c | 6 +- src/scratch.h | 9 ++- src/util/container.h | 11 ++++ 11 files changed, 171 insertions(+), 30 deletions(-) diff --git a/src/hwlm/hwlm.c b/src/hwlm/hwlm.c index 3c7615a7..6eaa7ed1 100644 --- a/src/hwlm/hwlm.c +++ b/src/hwlm/hwlm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -172,6 +172,8 @@ void do_accel_streaming(const union AccelAux *aux, const u8 *hbuf, size_t hlen, hwlm_error_t hwlmExec(const struct HWLM *t, const u8 *buf, size_t len, size_t start, HWLMCallback cb, void *ctxt, hwlm_group_t groups) { + assert(t); + DEBUG_PRINTF("buf len=%zu, start=%zu, groups=%llx\n", len, start, groups); if (!groups) { DEBUG_PRINTF("groups all off\n"); @@ -201,6 +203,9 @@ hwlm_error_t hwlmExec(const struct HWLM *t, const u8 *buf, size_t len, hwlm_error_t hwlmExecStreaming(const struct HWLM *t, struct hs_scratch *scratch, size_t len, size_t start, HWLMCallback cb, void *ctxt, hwlm_group_t groups) { + assert(t); + assert(scratch); + const u8 *hbuf = scratch->core_info.hbuf; const size_t hlen = scratch->core_info.hlen; const u8 *buf = scratch->core_info.buf; diff --git a/src/rose/rose_build_add.cpp b/src/rose/rose_build_add.cpp index e6861ea4..01d7d827 100644 --- a/src/rose/rose_build_add.cpp +++ b/src/rose/rose_build_add.cpp @@ -722,7 +722,9 @@ void makeEodEventLeftfix(RoseBuildImpl &build, RoseVertex u, RoseEdge e = add_edge(v, w, g); g[e].minBound = 0; g[e].maxBound = 0; - g[e].history = ROSE_ROLE_HISTORY_LAST_BYTE; + /* No need to set history as the event is only delivered at the last + * byte anyway - no need to invalidate stale entries. */ + g[e].history = ROSE_ROLE_HISTORY_NONE; DEBUG_PRINTF("accept eod vertex (index=%zu)\n", g[w].index); } } diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index b4270278..736e0d35 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -172,6 +172,7 @@ struct RoseResources { bool has_lit_delay = false; bool has_lit_check = false; // long literal support bool has_anchored = false; + bool has_floating = false; bool has_eod = false; }; @@ -352,6 +353,11 @@ bool needsCatchup(const RoseBuildImpl &build, static bool isPureFloating(const RoseResources &resources, const CompileContext &cc) { + if (!resources.has_floating) { + DEBUG_PRINTF("no floating table\n"); + return false; + } + if (resources.has_outfixes || resources.has_suffixes || resources.has_leftfixes) { DEBUG_PRINTF("has engines\n"); @@ -429,6 +435,7 @@ u8 pickRuntimeImpl(const RoseBuildImpl &build, const build_context &bc, DEBUG_PRINTF("has_lit_delay=%d\n", bc.resources.has_lit_delay); DEBUG_PRINTF("has_lit_check=%d\n", bc.resources.has_lit_check); DEBUG_PRINTF("has_anchored=%d\n", bc.resources.has_anchored); + DEBUG_PRINTF("has_floating=%d\n", bc.resources.has_floating); DEBUG_PRINTF("has_eod=%d\n", bc.resources.has_eod); if (isPureFloating(bc.resources, build.cc)) { @@ -539,7 +546,10 @@ void fillStateOffsets(const RoseBuildImpl &tbi, u32 rolesWithStateCount, // Get the mask of initial vertices due to root and anchored_root. rose_group RoseBuildImpl::getInitialGroups() const { - rose_group groups = getSuccGroups(root) | getSuccGroups(anchored_root); + rose_group groups = getSuccGroups(root) + | getSuccGroups(anchored_root) + | boundary_group_mask; + DEBUG_PRINTF("initial groups = %016llx\n", groups); return groups; } @@ -2227,6 +2237,7 @@ u32 buildLastByteIter(const RoseGraph &g, build_context &bc) { auto it = bc.roleStateIndices.find(v); if (it != end(bc.roleStateIndices)) { lb_roles.push_back(it->second); + DEBUG_PRINTF("last byte %u\n", it->second); } } @@ -5521,6 +5532,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { currOffset = ROUNDUP_CL(currOffset); fmatcherOffset = currOffset; currOffset += verify_u32(fsize); + bc.resources.has_floating = true; } // Build EOD-anchored HWLM matcher. diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index 24df8427..63b5bd0f 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -1236,11 +1236,15 @@ void buildRoseSquashMasks(RoseBuildImpl &tbi) { } } - rose_group unsquashable = 0; + rose_group unsquashable = tbi.boundary_group_mask; for (u32 lit_id : lit_ids) { const rose_literal_info &info = tbi.literal_info[lit_id]; - if (info.vertices.size() > 1 || !info.delayed_ids.empty()) { + if (!info.delayed_ids.empty() + || !all_of_in(info.vertices, + [&](RoseVertex v) { + return left == tbi.g[v].left; })) { + DEBUG_PRINTF("group %llu is unsquashable\n", info.group_mask); unsquashable |= info.group_mask; } } diff --git a/src/rose/rose_build_groups.cpp b/src/rose/rose_build_groups.cpp index 0a1c501f..a253ef04 100644 --- a/src/rose/rose_build_groups.cpp +++ b/src/rose/rose_build_groups.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,6 +33,9 @@ #include "rose_build_groups.h" +#include "util/boundary_reports.h" +#include "util/compile_context.h" + #include #include @@ -71,24 +74,18 @@ bool superStrong(const rose_literal_id &lit) { static bool eligibleForAlwaysOnGroup(const RoseBuildImpl &build, u32 id) { - /* returns true if it or any of its delay versions have root role */ - for (auto v : build.literal_info[id].vertices) { - if (build.isRootSuccessor(v)) { - NGHolder *h = build.g[v].left.graph.get(); - if (!h || proper_out_degree(h->startDs, *h)) { - return true; - } - } + auto eligble = [&](RoseVertex v) { + return build.isRootSuccessor(v) + && (!build.g[v].left || !isAnchored(build.g[v].left)); + }; + + if (any_of_in(build.literal_info[id].vertices, eligble)) { + return true; } for (u32 delayed_id : build.literal_info[id].delayed_ids) { - for (auto v : build.literal_info[delayed_id].vertices) { - if (build.isRootSuccessor(v)) { - NGHolder *h = build.g[v].left.graph.get(); - if (!h || proper_out_degree(h->startDs, *h)) { - return true; - } - } + if (any_of_in(build.literal_info[delayed_id].vertices, eligble)) { + return true; } } @@ -170,6 +167,64 @@ u32 next_available_group(u32 counter, u32 min_start_group) { return counter; } +static +void allocateGroupForBoundary(RoseBuildImpl &build, u32 group_always_on, + map &groupCount) { + /* Boundary reports at zero will always fired and forgotten, no need to + * worry about preventing the stream being marked as exhausted */ + if (build.boundary.report_at_eod.empty()) { + return; + } + + /* Group based stream exhaustion is only done at stream boundaries */ + if (!build.cc.streaming) { + return; + } + + DEBUG_PRINTF("allocating %u as boundary group id\n", group_always_on); + + build.boundary_group_mask = 1ULL << group_always_on; + groupCount[group_always_on]++; +} + +static +void allocateGroupForEvent(RoseBuildImpl &build, u32 group_always_on, + map &groupCount, u32 *counter) { + if (build.eod_event_literal_id == MO_INVALID_IDX) { + return; + } + + /* Group based stream exhaustion is only done at stream boundaries */ + if (!build.cc.streaming) { + return; + } + + rose_literal_info &info = build.literal_info[build.eod_event_literal_id]; + + if (info.vertices.empty()) { + return; + } + + bool new_group = !groupCount[group_always_on]; + for (RoseVertex v : info.vertices) { + if (build.g[v].left && !isAnchored(build.g[v].left)) { + new_group = false; + } + } + + u32 group; + if (!new_group) { + group = group_always_on; + } else { + group = *counter; + *counter += 1; + } + + DEBUG_PRINTF("allocating %u as eod event group id\n", *counter); + info.group_mask = 1ULL << group; + groupCount[group]++; +} + void assignGroupsToLiterals(RoseBuildImpl &build) { auto &literals = build.literals; auto &literal_info = build.literal_info; @@ -211,6 +266,9 @@ void assignGroupsToLiterals(RoseBuildImpl &build) { counter++; } + allocateGroupForBoundary(build, group_always_on, groupCount); + allocateGroupForEvent(build, group_always_on, groupCount, &counter); + u32 min_start_group = counter; priority_queue> pq; @@ -453,6 +511,7 @@ rose_group getSquashableGroups(const RoseBuildImpl &build) { } DEBUG_PRINTF("squashable groups=0x%llx\n", squashable_groups); + assert(!(squashable_groups & build.boundary_group_mask)); return squashable_groups; } @@ -505,7 +564,7 @@ bool isGroupSquasher(const RoseBuildImpl &build, const u32 id /* literal id */, lit_info.group_mask); if (build.literals.right.at(id).table == ROSE_EVENT) { - DEBUG_PRINTF("event literal, has no groups to squash\n"); + DEBUG_PRINTF("event literal\n"); return false; } @@ -628,7 +687,7 @@ bool isGroupSquasher(const RoseBuildImpl &build, const u32 id /* literal id */, } void findGroupSquashers(RoseBuildImpl &build) { - rose_group forbidden_squash_group = 0; + rose_group forbidden_squash_group = build.boundary_group_mask; for (const auto &e : build.literals.right) { if (e.second.delay) { forbidden_squash_group |= build.literal_info[e.first].group_mask; diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index 2cefb42a..bfdca80c 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -601,6 +601,8 @@ public: u32 max_rose_anchored_floating_overlap; + rose_group boundary_group_mask = 0; + QueueIndexFactory qif; ReportManager &rm; SomSlotManager &ssm; diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index b33192da..142bf138 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -1082,6 +1082,13 @@ bool isAnchored(const left_id &r) { if (r.graph()) { return isAnchored(*r.graph()); } + if (r.dfa()) { + return r.dfa()->start_anchored == DEAD_STATE; + } + if (r.haig()) { + return r.haig()->start_anchored == DEAD_STATE; + } + // All other types are explicitly anchored. return true; } diff --git a/src/rose/stream.c b/src/rose/stream.c index 9599612f..17139b25 100644 --- a/src/rose/stream.c +++ b/src/rose/stream.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -512,6 +512,34 @@ void runEagerPrefixesStream(const struct RoseEngine *t, } } +static really_inline +int can_never_match(const struct RoseEngine *t, char *state, + struct hs_scratch *scratch, size_t length, u64a offset) { + struct RoseContext *tctxt = &scratch->tctxt; + + if (tctxt->groups) { + DEBUG_PRINTF("still has active groups\n"); + return 0; + } + + if (offset + length <= t->anchoredDistance) { /* not < as may have eod */ + DEBUG_PRINTF("still in anchored region\n"); + return 0; + } + + if (t->lastByteHistoryIterOffset) { /* last byte history is hard */ + DEBUG_PRINTF("last byte history\n"); + return 0; + } + + if (mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)) { + DEBUG_PRINTF("active leaf\n"); + return 0; + } + + return 1; +} + void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) { DEBUG_PRINTF("OH HAI [%llu, %llu)\n", scratch->core_info.buf_offset, scratch->core_info.buf_offset + (u64a)scratch->core_info.len); @@ -647,6 +675,14 @@ exit: if (!can_stop_matching(scratch)) { ensureStreamNeatAndTidy(t, state, scratch, length, offset); } + + if (!told_to_stop_matching(scratch) + && can_never_match(t, state, scratch, length, offset)) { + DEBUG_PRINTF("PATTERN SET IS EXHAUSTED\n"); + scratch->core_info.status = STATUS_EXHAUSTED; + return; + } + DEBUG_PRINTF("DONE STREAMING SCAN, status = %u\n", scratch->core_info.status); return; diff --git a/src/runtime.c b/src/runtime.c index a2ed1026..1ee3efa5 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -214,7 +214,7 @@ void pureLiteralBlockExec(const struct RoseEngine *rose, scratch->tctxt.groups = rose->initialGroups; hwlmExec(ftable, buffer, length, 0, roseCallback, scratch, - rose->initialGroups); + rose->initialGroups & rose->floating_group_mask); } static really_inline @@ -762,7 +762,7 @@ void pureLiteralStreamExec(struct hs_stream *stream_state, const size_t start = 0; hwlmExecStreaming(ftable, scratch, len2, start, roseCallback, scratch, - rose->initialGroups); + rose->initialGroups & rose->floating_group_mask); if (!told_to_stop_matching(scratch) && isAllExhausted(rose, scratch->core_info.exhaustionVector)) { diff --git a/src/scratch.h b/src/scratch.h index b59dc8d4..47f8afa8 100644 --- a/src/scratch.h +++ b/src/scratch.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -73,8 +73,11 @@ struct catchup_pq { /** \brief Status flag: user requested termination. */ #define STATUS_TERMINATED (1U << 0) -/** \brief Status flag: all possible matches on this stream have - * been raised (i.e. all its exhaustion keys are on.) */ +/** \brief Status flag: it has been determined that it is not possible for this + * stream to raise any more matches. + * + * This may be because all its exhaustion keys are on or for other reasons + * (anchored sections not matching). */ #define STATUS_EXHAUSTED (1U << 1) /** \brief Status flag: Rose requires rebuild as delay literal matched in diff --git a/src/util/container.h b/src/util/container.h index 24f01fd2..e8a16418 100644 --- a/src/util/container.h +++ b/src/util/container.h @@ -202,6 +202,17 @@ void erase_all(C *container, const D &donor) { } } + +template +bool any_of_in(const C &c, Pred p) { + return std::any_of(c.begin(), c.end(), std::move(p)); +} + +template +bool all_of_in(const C &c, Pred p) { + return std::all_of(c.begin(), c.end(), std::move(p)); +} + } // namespace ue2 #ifdef DUMP_SUPPORT