mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-09-30 03:34:25 +03:00
allow streams to marked as exhausted in more cases
At stream boundaries, we can mark streams as exhausted if there are no groups active and there are no other ways to report matches. This allows us to stop maintaining the history buffer on subsequent stream writes. Previously, streams were only marked as exhausted if a pure highlander case reported all patterns or the outfix in a sole outfix case died.
This commit is contained in:
@@ -722,7 +722,9 @@ void makeEodEventLeftfix(RoseBuildImpl &build, RoseVertex u,
|
||||
RoseEdge e = add_edge(v, w, g);
|
||||
g[e].minBound = 0;
|
||||
g[e].maxBound = 0;
|
||||
g[e].history = ROSE_ROLE_HISTORY_LAST_BYTE;
|
||||
/* No need to set history as the event is only delivered at the last
|
||||
* byte anyway - no need to invalidate stale entries. */
|
||||
g[e].history = ROSE_ROLE_HISTORY_NONE;
|
||||
DEBUG_PRINTF("accept eod vertex (index=%zu)\n", g[w].index);
|
||||
}
|
||||
}
|
||||
|
@@ -172,6 +172,7 @@ struct RoseResources {
|
||||
bool has_lit_delay = false;
|
||||
bool has_lit_check = false; // long literal support
|
||||
bool has_anchored = false;
|
||||
bool has_floating = false;
|
||||
bool has_eod = false;
|
||||
};
|
||||
|
||||
@@ -352,6 +353,11 @@ bool needsCatchup(const RoseBuildImpl &build,
|
||||
|
||||
static
|
||||
bool isPureFloating(const RoseResources &resources, const CompileContext &cc) {
|
||||
if (!resources.has_floating) {
|
||||
DEBUG_PRINTF("no floating table\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (resources.has_outfixes || resources.has_suffixes ||
|
||||
resources.has_leftfixes) {
|
||||
DEBUG_PRINTF("has engines\n");
|
||||
@@ -429,6 +435,7 @@ u8 pickRuntimeImpl(const RoseBuildImpl &build, const build_context &bc,
|
||||
DEBUG_PRINTF("has_lit_delay=%d\n", bc.resources.has_lit_delay);
|
||||
DEBUG_PRINTF("has_lit_check=%d\n", bc.resources.has_lit_check);
|
||||
DEBUG_PRINTF("has_anchored=%d\n", bc.resources.has_anchored);
|
||||
DEBUG_PRINTF("has_floating=%d\n", bc.resources.has_floating);
|
||||
DEBUG_PRINTF("has_eod=%d\n", bc.resources.has_eod);
|
||||
|
||||
if (isPureFloating(bc.resources, build.cc)) {
|
||||
@@ -539,7 +546,10 @@ void fillStateOffsets(const RoseBuildImpl &tbi, u32 rolesWithStateCount,
|
||||
|
||||
// Get the mask of initial vertices due to root and anchored_root.
|
||||
rose_group RoseBuildImpl::getInitialGroups() const {
|
||||
rose_group groups = getSuccGroups(root) | getSuccGroups(anchored_root);
|
||||
rose_group groups = getSuccGroups(root)
|
||||
| getSuccGroups(anchored_root)
|
||||
| boundary_group_mask;
|
||||
|
||||
DEBUG_PRINTF("initial groups = %016llx\n", groups);
|
||||
return groups;
|
||||
}
|
||||
@@ -2227,6 +2237,7 @@ u32 buildLastByteIter(const RoseGraph &g, build_context &bc) {
|
||||
auto it = bc.roleStateIndices.find(v);
|
||||
if (it != end(bc.roleStateIndices)) {
|
||||
lb_roles.push_back(it->second);
|
||||
DEBUG_PRINTF("last byte %u\n", it->second);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5521,6 +5532,7 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
|
||||
currOffset = ROUNDUP_CL(currOffset);
|
||||
fmatcherOffset = currOffset;
|
||||
currOffset += verify_u32(fsize);
|
||||
bc.resources.has_floating = true;
|
||||
}
|
||||
|
||||
// Build EOD-anchored HWLM matcher.
|
||||
|
@@ -1236,11 +1236,15 @@ void buildRoseSquashMasks(RoseBuildImpl &tbi) {
|
||||
}
|
||||
}
|
||||
|
||||
rose_group unsquashable = 0;
|
||||
rose_group unsquashable = tbi.boundary_group_mask;
|
||||
|
||||
for (u32 lit_id : lit_ids) {
|
||||
const rose_literal_info &info = tbi.literal_info[lit_id];
|
||||
if (info.vertices.size() > 1 || !info.delayed_ids.empty()) {
|
||||
if (!info.delayed_ids.empty()
|
||||
|| !all_of_in(info.vertices,
|
||||
[&](RoseVertex v) {
|
||||
return left == tbi.g[v].left; })) {
|
||||
DEBUG_PRINTF("group %llu is unsquashable\n", info.group_mask);
|
||||
unsquashable |= info.group_mask;
|
||||
}
|
||||
}
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
* Copyright (c) 2016-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -33,6 +33,9 @@
|
||||
|
||||
#include "rose_build_groups.h"
|
||||
|
||||
#include "util/boundary_reports.h"
|
||||
#include "util/compile_context.h"
|
||||
|
||||
#include <queue>
|
||||
#include <vector>
|
||||
|
||||
@@ -71,24 +74,18 @@ bool superStrong(const rose_literal_id &lit) {
|
||||
|
||||
static
|
||||
bool eligibleForAlwaysOnGroup(const RoseBuildImpl &build, u32 id) {
|
||||
/* returns true if it or any of its delay versions have root role */
|
||||
for (auto v : build.literal_info[id].vertices) {
|
||||
if (build.isRootSuccessor(v)) {
|
||||
NGHolder *h = build.g[v].left.graph.get();
|
||||
if (!h || proper_out_degree(h->startDs, *h)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
auto eligble = [&](RoseVertex v) {
|
||||
return build.isRootSuccessor(v)
|
||||
&& (!build.g[v].left || !isAnchored(build.g[v].left));
|
||||
};
|
||||
|
||||
if (any_of_in(build.literal_info[id].vertices, eligble)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
for (u32 delayed_id : build.literal_info[id].delayed_ids) {
|
||||
for (auto v : build.literal_info[delayed_id].vertices) {
|
||||
if (build.isRootSuccessor(v)) {
|
||||
NGHolder *h = build.g[v].left.graph.get();
|
||||
if (!h || proper_out_degree(h->startDs, *h)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if (any_of_in(build.literal_info[delayed_id].vertices, eligble)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -170,6 +167,64 @@ u32 next_available_group(u32 counter, u32 min_start_group) {
|
||||
return counter;
|
||||
}
|
||||
|
||||
static
|
||||
void allocateGroupForBoundary(RoseBuildImpl &build, u32 group_always_on,
|
||||
map<u8, u32> &groupCount) {
|
||||
/* Boundary reports at zero will always fired and forgotten, no need to
|
||||
* worry about preventing the stream being marked as exhausted */
|
||||
if (build.boundary.report_at_eod.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Group based stream exhaustion is only done at stream boundaries */
|
||||
if (!build.cc.streaming) {
|
||||
return;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("allocating %u as boundary group id\n", group_always_on);
|
||||
|
||||
build.boundary_group_mask = 1ULL << group_always_on;
|
||||
groupCount[group_always_on]++;
|
||||
}
|
||||
|
||||
static
|
||||
void allocateGroupForEvent(RoseBuildImpl &build, u32 group_always_on,
|
||||
map<u8, u32> &groupCount, u32 *counter) {
|
||||
if (build.eod_event_literal_id == MO_INVALID_IDX) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Group based stream exhaustion is only done at stream boundaries */
|
||||
if (!build.cc.streaming) {
|
||||
return;
|
||||
}
|
||||
|
||||
rose_literal_info &info = build.literal_info[build.eod_event_literal_id];
|
||||
|
||||
if (info.vertices.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
bool new_group = !groupCount[group_always_on];
|
||||
for (RoseVertex v : info.vertices) {
|
||||
if (build.g[v].left && !isAnchored(build.g[v].left)) {
|
||||
new_group = false;
|
||||
}
|
||||
}
|
||||
|
||||
u32 group;
|
||||
if (!new_group) {
|
||||
group = group_always_on;
|
||||
} else {
|
||||
group = *counter;
|
||||
*counter += 1;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("allocating %u as eod event group id\n", *counter);
|
||||
info.group_mask = 1ULL << group;
|
||||
groupCount[group]++;
|
||||
}
|
||||
|
||||
void assignGroupsToLiterals(RoseBuildImpl &build) {
|
||||
auto &literals = build.literals;
|
||||
auto &literal_info = build.literal_info;
|
||||
@@ -211,6 +266,9 @@ void assignGroupsToLiterals(RoseBuildImpl &build) {
|
||||
counter++;
|
||||
}
|
||||
|
||||
allocateGroupForBoundary(build, group_always_on, groupCount);
|
||||
allocateGroupForEvent(build, group_always_on, groupCount, &counter);
|
||||
|
||||
u32 min_start_group = counter;
|
||||
priority_queue<tuple<s32, s32, u32>> pq;
|
||||
|
||||
@@ -453,6 +511,7 @@ rose_group getSquashableGroups(const RoseBuildImpl &build) {
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("squashable groups=0x%llx\n", squashable_groups);
|
||||
assert(!(squashable_groups & build.boundary_group_mask));
|
||||
return squashable_groups;
|
||||
}
|
||||
|
||||
@@ -505,7 +564,7 @@ bool isGroupSquasher(const RoseBuildImpl &build, const u32 id /* literal id */,
|
||||
lit_info.group_mask);
|
||||
|
||||
if (build.literals.right.at(id).table == ROSE_EVENT) {
|
||||
DEBUG_PRINTF("event literal, has no groups to squash\n");
|
||||
DEBUG_PRINTF("event literal\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -628,7 +687,7 @@ bool isGroupSquasher(const RoseBuildImpl &build, const u32 id /* literal id */,
|
||||
}
|
||||
|
||||
void findGroupSquashers(RoseBuildImpl &build) {
|
||||
rose_group forbidden_squash_group = 0;
|
||||
rose_group forbidden_squash_group = build.boundary_group_mask;
|
||||
for (const auto &e : build.literals.right) {
|
||||
if (e.second.delay) {
|
||||
forbidden_squash_group |= build.literal_info[e.first].group_mask;
|
||||
|
@@ -601,6 +601,8 @@ public:
|
||||
|
||||
u32 max_rose_anchored_floating_overlap;
|
||||
|
||||
rose_group boundary_group_mask = 0;
|
||||
|
||||
QueueIndexFactory qif;
|
||||
ReportManager &rm;
|
||||
SomSlotManager &ssm;
|
||||
|
@@ -1082,6 +1082,13 @@ bool isAnchored(const left_id &r) {
|
||||
if (r.graph()) {
|
||||
return isAnchored(*r.graph());
|
||||
}
|
||||
if (r.dfa()) {
|
||||
return r.dfa()->start_anchored == DEAD_STATE;
|
||||
}
|
||||
if (r.haig()) {
|
||||
return r.haig()->start_anchored == DEAD_STATE;
|
||||
}
|
||||
|
||||
// All other types are explicitly anchored.
|
||||
return true;
|
||||
}
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -512,6 +512,34 @@ void runEagerPrefixesStream(const struct RoseEngine *t,
|
||||
}
|
||||
}
|
||||
|
||||
static really_inline
|
||||
int can_never_match(const struct RoseEngine *t, char *state,
|
||||
struct hs_scratch *scratch, size_t length, u64a offset) {
|
||||
struct RoseContext *tctxt = &scratch->tctxt;
|
||||
|
||||
if (tctxt->groups) {
|
||||
DEBUG_PRINTF("still has active groups\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (offset + length <= t->anchoredDistance) { /* not < as may have eod */
|
||||
DEBUG_PRINTF("still in anchored region\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (t->lastByteHistoryIterOffset) { /* last byte history is hard */
|
||||
DEBUG_PRINTF("last byte history\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)) {
|
||||
DEBUG_PRINTF("active leaf\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) {
|
||||
DEBUG_PRINTF("OH HAI [%llu, %llu)\n", scratch->core_info.buf_offset,
|
||||
scratch->core_info.buf_offset + (u64a)scratch->core_info.len);
|
||||
@@ -647,6 +675,14 @@ exit:
|
||||
if (!can_stop_matching(scratch)) {
|
||||
ensureStreamNeatAndTidy(t, state, scratch, length, offset);
|
||||
}
|
||||
|
||||
if (!told_to_stop_matching(scratch)
|
||||
&& can_never_match(t, state, scratch, length, offset)) {
|
||||
DEBUG_PRINTF("PATTERN SET IS EXHAUSTED\n");
|
||||
scratch->core_info.status = STATUS_EXHAUSTED;
|
||||
return;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("DONE STREAMING SCAN, status = %u\n",
|
||||
scratch->core_info.status);
|
||||
return;
|
||||
|
Reference in New Issue
Block a user