mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
shift early_dfa construction earlier
This commit is contained in:
parent
caa46201f0
commit
512c049493
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -251,7 +251,7 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som,
|
||||
return true;
|
||||
}
|
||||
|
||||
if (doViolet(*ng.rose, g, w.prefilter, cc)) {
|
||||
if (doViolet(*ng.rose, g, w.prefilter, false, ng.rm, cc)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -272,7 +272,7 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som,
|
||||
return true;
|
||||
}
|
||||
|
||||
if (doViolet(*ng.rose, g, w.prefilter, cc)) {
|
||||
if (doViolet(*ng.rose, g, w.prefilter, true, ng.rm, cc)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -65,6 +65,26 @@ bool can_exhaust(const NGHolder &g, const ReportManager &rm) {
|
||||
return true;
|
||||
}
|
||||
|
||||
void set_report(NGHolder &g, ReportID internal_report) {
|
||||
// First, wipe the report IDs on all vertices.
|
||||
for (auto v : vertices_range(g)) {
|
||||
g[v].reports.clear();
|
||||
}
|
||||
|
||||
// Any predecessors of accept get our id.
|
||||
for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
|
||||
g[v].reports.insert(internal_report);
|
||||
}
|
||||
|
||||
// Same for preds of acceptEod, except accept itself.
|
||||
for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
|
||||
if (v == g.accept) {
|
||||
continue;
|
||||
}
|
||||
g[v].reports.insert(internal_report);
|
||||
}
|
||||
}
|
||||
|
||||
/** Derive a maximum offset for the graph from the max_offset values of its
|
||||
* reports. Returns MAX_OFFSET for inf. */
|
||||
u64a findMaxOffset(const NGHolder &g, const ReportManager &rm) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -48,6 +48,10 @@ std::set<ReportID> all_reports(const NGHolder &g);
|
||||
/** True if *all* reports in the graph are exhaustible. */
|
||||
bool can_exhaust(const NGHolder &g, const ReportManager &rm);
|
||||
|
||||
/** Replaces all existing reports on the holder with the provided internal
|
||||
* report id. */
|
||||
void set_report(NGHolder &g, ReportID internal_report);
|
||||
|
||||
/** Derive a maximum offset for the graph from the max_offset values of its
|
||||
* reports. Returns MAX_OFFSET for inf. */
|
||||
u64a findMaxOffset(const NGHolder &g, const ReportManager &rm);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
* Copyright (c) 2016-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -38,6 +38,8 @@
|
||||
#include "ng_holder.h"
|
||||
#include "ng_is_equal.h"
|
||||
#include "ng_literal_analysis.h"
|
||||
#include "ng_limex.h"
|
||||
#include "ng_mcclellan.h"
|
||||
#include "ng_netflow.h"
|
||||
#include "ng_prune.h"
|
||||
#include "ng_redundancy.h"
|
||||
@ -47,6 +49,7 @@
|
||||
#include "ng_split.h"
|
||||
#include "ng_util.h"
|
||||
#include "ng_width.h"
|
||||
#include "nfa/rdfa.h"
|
||||
#include "rose/rose_build.h"
|
||||
#include "rose/rose_build_util.h"
|
||||
#include "rose/rose_in_dump.h"
|
||||
@ -2616,7 +2619,110 @@ void rehomeEodSuffixes(RoseInGraph &vg) {
|
||||
/* old accept vertices will be tidied up by final pruneUseless() call */
|
||||
}
|
||||
|
||||
static
|
||||
bool tryForEarlyDfa(const NGHolder &h, const CompileContext &cc) {
|
||||
switch (h.kind) {
|
||||
case NFA_OUTFIX: /* 'prefix' of eod */
|
||||
case NFA_PREFIX:
|
||||
return cc.grey.earlyMcClellanPrefix;
|
||||
case NFA_INFIX:
|
||||
return cc.grey.earlyMcClellanInfix;
|
||||
case NFA_SUFFIX:
|
||||
return cc.grey.earlyMcClellanSuffix;
|
||||
default:
|
||||
DEBUG_PRINTF("kind %u\n", (u32)h.kind);
|
||||
assert(0);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
vector<vector<CharReach>> getDfaTriggers(RoseInGraph &vg,
|
||||
const vector<RoseInEdge> &edges,
|
||||
bool *single_trigger) {
|
||||
vector<vector<CharReach>> triggers;
|
||||
u32 min_offset = ~0U;
|
||||
u32 max_offset = 0;
|
||||
for (const auto &e : edges) {
|
||||
RoseInVertex s = source(e, vg);
|
||||
if (vg[s].type == RIV_LITERAL) {
|
||||
triggers.push_back(as_cr_seq(vg[s].s));
|
||||
}
|
||||
ENSURE_AT_LEAST(&max_offset, vg[s].max_offset);
|
||||
LIMIT_TO_AT_MOST(&min_offset, vg[s].min_offset);
|
||||
}
|
||||
|
||||
*single_trigger = min_offset == max_offset;
|
||||
DEBUG_PRINTF("trigger offset (%u, %u)\n", min_offset, max_offset);
|
||||
|
||||
return triggers;
|
||||
}
|
||||
|
||||
static
|
||||
bool doEarlyDfa(RoseBuild &rose, RoseInGraph &vg, NGHolder &h,
|
||||
const vector<RoseInEdge> &edges, const ReportManager &rm,
|
||||
const CompileContext &cc) {
|
||||
DEBUG_PRINTF("trying for dfa\n");
|
||||
|
||||
bool single_trigger;
|
||||
for (const auto &e : edges) {
|
||||
if (vg[target(e, vg)].type == RIV_ACCEPT_EOD) {
|
||||
/* TODO: support eod prefixes */
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
auto triggers = getDfaTriggers(vg, edges, &single_trigger);
|
||||
|
||||
/* TODO: literal delay things */
|
||||
if (!generates_callbacks(h)) {
|
||||
set_report(h, rose.getNewNfaReport());
|
||||
}
|
||||
|
||||
shared_ptr<raw_dfa> dfa = buildMcClellan(h, &rm, single_trigger, triggers,
|
||||
cc.grey);
|
||||
|
||||
if (!dfa) {
|
||||
return false;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("dfa ok\n");
|
||||
for (const auto &e : edges) {
|
||||
vg[e].dfa = dfa;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
void ensureImplementable(RoseBuild &rose, RoseInGraph &vg,
|
||||
const ReportManager &rm, const CompileContext &cc) {
|
||||
map<const NGHolder *, vector<RoseInEdge> > edges_by_graph;
|
||||
vector<NGHolder *> graphs;
|
||||
for (const RoseInEdge &ve : edges_range(vg)) {
|
||||
if (vg[ve].graph) {
|
||||
NGHolder *h = vg[ve].graph.get();
|
||||
if (!contains(edges_by_graph, h)) {
|
||||
graphs.push_back(h);
|
||||
}
|
||||
edges_by_graph[h].push_back(ve);
|
||||
}
|
||||
}
|
||||
for (NGHolder *h : graphs) {
|
||||
if (isImplementableNFA(*h, &rm, cc)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (tryForEarlyDfa(*h, cc)
|
||||
&& doEarlyDfa(rose, vg, *h, edges_by_graph[h], rm, cc)) {
|
||||
continue;
|
||||
}
|
||||
DEBUG_PRINTF("eek\n");
|
||||
}
|
||||
}
|
||||
|
||||
bool doViolet(RoseBuild &rose, const NGHolder &h, bool prefilter,
|
||||
bool last_chance, const ReportManager &rm,
|
||||
const CompileContext &cc) {
|
||||
assert(!can_never_match(h));
|
||||
|
||||
@ -2663,10 +2769,6 @@ bool doViolet(RoseBuild &rose, const NGHolder &h, bool prefilter,
|
||||
decomposeLiteralChains(vg, cc);
|
||||
}
|
||||
|
||||
/* Step 5: avoid unimplementable, or overly large engines if possible */
|
||||
/* TODO: later - ng_rose is currently acting as a backstop */
|
||||
|
||||
/* Step 6: send to rose */
|
||||
rehomeEodSuffixes(vg);
|
||||
removeRedundantLiterals(vg, cc);
|
||||
|
||||
@ -2674,6 +2776,14 @@ bool doViolet(RoseBuild &rose, const NGHolder &h, bool prefilter,
|
||||
dumpPreRoseGraph(vg, cc.grey);
|
||||
renumber_vertices(vg);
|
||||
calcVertexOffsets(vg);
|
||||
|
||||
|
||||
/* Step 5: avoid unimplementable, or overly large engines if possible */
|
||||
if (last_chance) {
|
||||
ensureImplementable(rose, vg, rm, cc);
|
||||
}
|
||||
|
||||
/* Step 6: send to rose */
|
||||
bool rv = rose.addRose(vg, prefilter);
|
||||
DEBUG_PRINTF("violet: %s\n", rv ? "success" : "fail");
|
||||
return rv;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
* Copyright (c) 2016-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -41,10 +41,12 @@ class NGHolder;
|
||||
class RoseBuild;
|
||||
|
||||
struct CompileContext;
|
||||
class ReportManager;
|
||||
|
||||
/** \brief Attempt to consume the entire pattern in graph \a h with Rose.
|
||||
* Returns true if successful. */
|
||||
bool doViolet(RoseBuild &rose, const NGHolder &h, bool prefilter,
|
||||
bool last_chance, const ReportManager &rm,
|
||||
const CompileContext &cc);
|
||||
|
||||
} // namespace ue2
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -1033,8 +1033,8 @@ bool empty(const GraphT &g) {
|
||||
return vi == ve;
|
||||
}
|
||||
|
||||
/* We only try to implement as a dfa if a non-nullptr as_dfa is provided to return
|
||||
* the raw dfa to. */
|
||||
/* We only try to implement as a dfa if a non-nullptr as_dfa is provided to
|
||||
* return the raw dfa to. */
|
||||
static
|
||||
bool canImplementGraph(RoseBuildImpl *tbi, const RoseInGraph &in, NGHolder &h,
|
||||
const vector<RoseInEdge> &edges, bool prefilter,
|
||||
@ -1105,7 +1105,7 @@ bool canImplementGraph(RoseBuildImpl *tbi, const RoseInGraph &in, NGHolder &h,
|
||||
}
|
||||
|
||||
if (!generates_callbacks(h)) {
|
||||
setReportId(h, tbi->getNewNfaReport());
|
||||
set_report(h, tbi->getNewNfaReport());
|
||||
}
|
||||
|
||||
bool single_trigger = min_offset == max_offset;
|
||||
@ -1601,6 +1601,8 @@ bool RoseBuildImpl::addRose(const RoseInGraph &ig, bool prefilter,
|
||||
|
||||
for (const auto &e : edges_range(in)) {
|
||||
if (!in[e].graph) {
|
||||
assert(!in[e].dfa);
|
||||
assert(!in[e].haig);
|
||||
continue; // no graph
|
||||
}
|
||||
|
||||
@ -1616,6 +1618,11 @@ bool RoseBuildImpl::addRose(const RoseInGraph &ig, bool prefilter,
|
||||
ordered_graphs.push_back(h);
|
||||
}
|
||||
graphs[h].push_back(e);
|
||||
if (in[e].dfa) {
|
||||
assert(!contains(bd.early_dfas, h)
|
||||
|| bd.early_dfas[h] == in[e].dfa);
|
||||
bd.early_dfas[h] = in[e].dfa;
|
||||
}
|
||||
}
|
||||
|
||||
assert(ordered_graphs.size() == graphs.size());
|
||||
@ -1626,7 +1633,8 @@ bool RoseBuildImpl::addRose(const RoseInGraph &ig, bool prefilter,
|
||||
const vector<RoseInEdge> &h_edges = graphs.at(h);
|
||||
unique_ptr<raw_dfa> as_dfa;
|
||||
/* allow finalChance as fallback is basically an outfix at this point */
|
||||
if (!canImplementGraph(this, in, *h, h_edges, prefilter, rm, cc,
|
||||
if (!contains(bd.early_dfas, h)
|
||||
&& !canImplementGraph(this, in, *h, h_edges, prefilter, rm, cc,
|
||||
finalChance, &as_dfa)) {
|
||||
return false;
|
||||
}
|
||||
@ -1649,7 +1657,7 @@ bool RoseBuildImpl::addRose(const RoseInGraph &ig, bool prefilter,
|
||||
if (!generates_callbacks(whatRoseIsThis(in, e))
|
||||
&& !contains(bd.early_dfas, &h)
|
||||
&& in[target(e, in)].type != RIV_ACCEPT_EOD) {
|
||||
setReportId(h, getNewNfaReport());
|
||||
set_report(h, getNewNfaReport());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -480,7 +480,7 @@ void addTransientMask(RoseBuildImpl &build, const vector<CharReach> &mask,
|
||||
|
||||
// Everyone gets the same report ID.
|
||||
ReportID mask_report = build.getNewNfaReport();
|
||||
setReportId(*mask_graph, mask_report);
|
||||
set_report(*mask_graph, mask_report);
|
||||
|
||||
// Build the HWLM literal mask.
|
||||
vector<u8> msk, cmp;
|
||||
|
@ -166,7 +166,7 @@ bool delayLiteralWithPrefix(RoseBuildImpl &tbi, RoseVertex v, u32 lit_id,
|
||||
|
||||
shared_ptr<NGHolder> h = makeRosePrefix(lit.s);
|
||||
ReportID prefix_report = 0;
|
||||
setReportId(*h, prefix_report);
|
||||
set_report(*h, prefix_report);
|
||||
|
||||
if (!isImplementableNFA(*h, &tbi.rm, tbi.cc)) {
|
||||
DEBUG_PRINTF("prefix not implementable\n");
|
||||
|
@ -624,8 +624,6 @@ size_t maxOverlap(const rose_literal_id &a, const rose_literal_id &b);
|
||||
ue2_literal findNonOverlappingTail(const std::set<ue2_literal> &lits,
|
||||
const ue2_literal &s);
|
||||
|
||||
void setReportId(NGHolder &g, ReportID id);
|
||||
|
||||
#ifndef NDEBUG
|
||||
bool roseHasTops(const RoseBuildImpl &build, RoseVertex v);
|
||||
bool hasOrphanedTops(const RoseBuildImpl &build);
|
||||
|
@ -859,27 +859,6 @@ bool RoseDedupeAuxImpl::requiresDedupeSupport(
|
||||
return false;
|
||||
}
|
||||
|
||||
// Sets the report ID for all vertices connected to an accept to `id`.
|
||||
void setReportId(NGHolder &g, ReportID id) {
|
||||
// First, wipe the report IDs on all vertices.
|
||||
for (auto v : vertices_range(g)) {
|
||||
g[v].reports.clear();
|
||||
}
|
||||
|
||||
// Any predecessors of accept get our id.
|
||||
for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
|
||||
g[v].reports.insert(id);
|
||||
}
|
||||
|
||||
// Same for preds of acceptEod, except accept itself.
|
||||
for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
|
||||
if (v == g.accept) {
|
||||
continue;
|
||||
}
|
||||
g[v].reports.insert(id);
|
||||
}
|
||||
}
|
||||
|
||||
bool operator<(const RoseEdgeProps &a, const RoseEdgeProps &b) {
|
||||
ORDER_CHECK(minBound);
|
||||
ORDER_CHECK(maxBound);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -55,6 +55,7 @@ namespace ue2 {
|
||||
|
||||
class NGHolder;
|
||||
struct raw_som_dfa;
|
||||
struct raw_dfa;
|
||||
|
||||
enum RoseInVertexType {
|
||||
RIV_LITERAL,
|
||||
@ -166,9 +167,12 @@ struct RoseInEdgeProps {
|
||||
/** \brief Maximum bound on 'dot' repeat between literals. */
|
||||
u32 maxBound;
|
||||
|
||||
/** \brief Prefix graph. Graph is end to (end - lag). */
|
||||
/** \brief Graph on edge. Graph is end to (end - lag). */
|
||||
std::shared_ptr<NGHolder> graph;
|
||||
|
||||
/** \brief DFA version of graph, if we have already determinised. */
|
||||
std::shared_ptr<raw_dfa> dfa;
|
||||
|
||||
/** \brief Haig version of graph, if required. */
|
||||
std::shared_ptr<raw_som_dfa> haig;
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user