mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
rose: remove no-longer-used convertBadLeaves pass
This commit is contained in:
parent
bc2f336d9d
commit
8f1b3c89fa
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -105,8 +105,6 @@ Grey::Grey(void) :
|
||||
roseGraphReduction(true),
|
||||
roseRoleAliasing(true),
|
||||
roseMasks(true),
|
||||
roseMaxBadLeafLength(5),
|
||||
roseConvertInfBadLeaves(true),
|
||||
roseConvertFloodProneSuffixes(true),
|
||||
roseMergeRosesDuringAliasing(true),
|
||||
roseMultiTopRoses(true),
|
||||
@ -272,8 +270,6 @@ void applyGreyOverrides(Grey *g, const string &s) {
|
||||
G_UPDATE(roseGraphReduction);
|
||||
G_UPDATE(roseRoleAliasing);
|
||||
G_UPDATE(roseMasks);
|
||||
G_UPDATE(roseMaxBadLeafLength);
|
||||
G_UPDATE(roseConvertInfBadLeaves);
|
||||
G_UPDATE(roseConvertFloodProneSuffixes);
|
||||
G_UPDATE(roseMergeRosesDuringAliasing);
|
||||
G_UPDATE(roseMultiTopRoses);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -118,8 +118,6 @@ struct Grey {
|
||||
bool roseGraphReduction;
|
||||
bool roseRoleAliasing;
|
||||
bool roseMasks;
|
||||
u32 roseMaxBadLeafLength;
|
||||
bool roseConvertInfBadLeaves;
|
||||
bool roseConvertFloodProneSuffixes;
|
||||
bool roseMergeRosesDuringAliasing;
|
||||
bool roseMultiTopRoses;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -1644,8 +1644,6 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildRose(u32 minWidth) {
|
||||
dedupeLeftfixes(*this);
|
||||
aliasRoles(*this, false); // Don't merge leftfixes.
|
||||
dedupeLeftfixes(*this);
|
||||
|
||||
convertBadLeaves(*this);
|
||||
uncalcLeaves(*this);
|
||||
|
||||
/* note the leftfixes which do not need to keep state across stream
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -76,301 +76,6 @@ NFAVertex addHolderVertex(const CharReach &cr, NGHolder &out) {
|
||||
return v;
|
||||
}
|
||||
|
||||
// Returns the first and last vertices.
|
||||
static
|
||||
pair<NFAVertex, NFAVertex> addLiteralVertices(const RoseGraph &g,
|
||||
const RoseLiteralMap &literals,
|
||||
const RoseVertex &t_v,
|
||||
NGHolder &out) {
|
||||
// We have limited cases that we support: one literal of arbitrary length,
|
||||
// or a bunch of literals of length one that just become a vertex with
|
||||
// their reach unioned together.
|
||||
|
||||
// TODO: generalise this and handle more cases.
|
||||
|
||||
const auto &litids = g[t_v].literals;
|
||||
if (litids.size() > 1) {
|
||||
// Multiple literals of len 1.
|
||||
CharReach v_cr;
|
||||
for (const auto &lit_id : litids) {
|
||||
const rose_literal_id &litv = literals.right.at(lit_id);
|
||||
assert(litv.s.length() == 1);
|
||||
v_cr |= *litv.s.begin();
|
||||
}
|
||||
|
||||
NFAVertex v = addHolderVertex(v_cr, out);
|
||||
return make_pair(v, v);
|
||||
}
|
||||
|
||||
// Otherwise, we have a single literal, could be of arbitrary length.
|
||||
assert(litids.size() == 1);
|
||||
u32 lit_id = *(litids.begin());
|
||||
const rose_literal_id &litv = literals.right.at(lit_id);
|
||||
assert(!litv.s.empty());
|
||||
|
||||
ue2_literal::const_iterator it = litv.s.begin(), ite = litv.s.end();
|
||||
NFAVertex first = addHolderVertex(*it, out), last = first;
|
||||
for (++it; it != ite; ++it) {
|
||||
NFAVertex v = addHolderVertex(*it, out);
|
||||
add_edge(last, v, out);
|
||||
last = v;
|
||||
}
|
||||
|
||||
return make_pair(first, last);
|
||||
}
|
||||
|
||||
static
|
||||
unique_ptr<NGHolder> convertLeafToHolder(const RoseGraph &g,
|
||||
const RoseEdge &t_e,
|
||||
const RoseLiteralMap &literals) {
|
||||
RoseVertex t_v = target(t_e, g); // leaf vertex for demolition.
|
||||
u32 minBound = g[t_e].minBound;
|
||||
u32 maxBound = g[t_e].maxBound;
|
||||
|
||||
const CharReach dot = CharReach::dot();
|
||||
|
||||
assert(!g[t_v].left);
|
||||
|
||||
auto out = ue2::make_unique<NGHolder>(NFA_SUFFIX);
|
||||
|
||||
// Repeats wired to the start of the graph.
|
||||
DEBUG_PRINTF("bounds [%u, %u]\n", minBound, maxBound);
|
||||
u32 i = 1;
|
||||
NFAVertex last = out->start;
|
||||
for (; i <= minBound; i++) {
|
||||
NFAVertex v = addHolderVertex(dot, *out);
|
||||
add_edge(last, v, *out);
|
||||
last = v;
|
||||
}
|
||||
NFAVertex last_mand = last;
|
||||
if (maxBound != ROSE_BOUND_INF) {
|
||||
for (; i <= maxBound; i++) {
|
||||
NFAVertex v = addHolderVertex(dot, *out);
|
||||
add_edge(last_mand, v, *out);
|
||||
if (last != last_mand) {
|
||||
add_edge(last, v, *out);
|
||||
}
|
||||
last = v;
|
||||
}
|
||||
} else {
|
||||
if (minBound) {
|
||||
add_edge(last_mand, last_mand, *out);
|
||||
} else {
|
||||
NFAVertex v = addHolderVertex(dot, *out);
|
||||
add_edge(last_mand, v, *out);
|
||||
add_edge(v, v, *out);
|
||||
last = v;
|
||||
}
|
||||
}
|
||||
|
||||
setTops(*out);
|
||||
|
||||
// Literal vertices wired to accept.
|
||||
NFAVertex litfirst, litlast;
|
||||
tie(litfirst, litlast) = addLiteralVertices(g, literals, t_v, *out);
|
||||
add_edge(last, litfirst, *out);
|
||||
if (last != last_mand) {
|
||||
add_edge(last_mand, litfirst, *out);
|
||||
}
|
||||
add_edge(litlast, out->accept, *out);
|
||||
insert(&(*out)[litlast].reports, g[t_v].reports);
|
||||
return out;
|
||||
}
|
||||
|
||||
static
|
||||
bool areLiteralsConvertible(const RoseLiteralMap &literals,
|
||||
const flat_set<u32> &ids) {
|
||||
// Every literal in v must have the same length.
|
||||
|
||||
// TODO: at the moment, we only handle two cases in construction: (a) one
|
||||
// literal of arbitrary length, and (b) many literals, but all with length
|
||||
// 1.
|
||||
|
||||
if (ids.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto it = ids.begin(), ite = ids.end();
|
||||
const size_t len = literals.right.at(*it).elength();
|
||||
|
||||
// Note: len may be 0 for cases with special literals, like EOD prefixes.
|
||||
|
||||
if (len != 1 && ids.size() != 1) {
|
||||
DEBUG_PRINTF("more than one literal of len > 1\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check the others all have the same length.
|
||||
while (++it != ite) {
|
||||
if (literals.right.at(*it).elength() != len) {
|
||||
DEBUG_PRINTF("literals have different lengths\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Returns true if the given vertex doesn't qualify as a bad leaf to be eaten
|
||||
// by an NFA.
|
||||
static
|
||||
bool isUnconvertibleLeaf(const RoseBuildImpl &tbi, const RoseVertex v) {
|
||||
const RoseGraph &g = tbi.g;
|
||||
|
||||
if (in_degree(v, g) != 1) {
|
||||
DEBUG_PRINTF("more than one in-edge\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
const RoseEdge &e = *(in_edges(v, g).first);
|
||||
RoseVertex u = source(e, g);
|
||||
|
||||
if (!g[u].reports.empty()) {
|
||||
DEBUG_PRINTF("pred has accept\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
if (g[u].suffix) {
|
||||
// TODO: this could be handled by adding new vertices to the existing
|
||||
// suffix.
|
||||
DEBUG_PRINTF("pred already has suffix\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
if (tbi.isAnyStart(u)) {
|
||||
DEBUG_PRINTF("fail start\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
if (tbi.isAnchored(u)) {
|
||||
/* TODO need to check for possible anchored queue overflow? maybe? */
|
||||
DEBUG_PRINTF("fail anchored\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
if (g[v].reports.empty() || g[v].eod_accept) {
|
||||
DEBUG_PRINTF("bad accept\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
if (g[v].suffix) {
|
||||
DEBUG_PRINTF("suffix\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
if (g[v].left) {
|
||||
/* TODO: we really should handle this case as we would be checking
|
||||
* an nfa each time. However it requires completely different graph
|
||||
* fiddling logic */
|
||||
DEBUG_PRINTF("rose prefix action\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!areLiteralsConvertible(tbi.literals, g[v].literals)) {
|
||||
DEBUG_PRINTF("fail length\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
u32 max_lit_len = tbi.maxLiteralLen(v);
|
||||
|
||||
u32 maxbound = max_lit_len == 1 ? 124 : 32; // arbitrary magic numbers
|
||||
if (g[e].maxBound > maxbound && g[e].maxBound != ROSE_BOUND_INF) {
|
||||
DEBUG_PRINTF("fail maxbound (%u)\n", maxbound);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (g[e].maxBound == ROSE_BOUND_INF) {
|
||||
/* slightly risky as nfa won't die */
|
||||
DEBUG_PRINTF("fail: .*\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Find all of the leaves with literals whose length is <= len.
|
||||
static
|
||||
void findBadLeaves(RoseBuildImpl &tbi, set<RoseVertex> &bad) {
|
||||
RoseGraph &g = tbi.g;
|
||||
u32 len = tbi.cc.grey.roseMaxBadLeafLength;
|
||||
|
||||
for (const auto &m : tbi.literals.right) {
|
||||
if (m.second.s.length() > len) {
|
||||
continue;
|
||||
}
|
||||
u32 lid = m.first;
|
||||
DEBUG_PRINTF("%u is a short lit (length %zu)\n", lid,
|
||||
m.second.s.length());
|
||||
|
||||
if (tbi.isDelayed(lid)) {
|
||||
DEBUG_PRINTF("delayed, skipping!\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
const rose_literal_info &info = tbi.literal_info[lid];
|
||||
|
||||
for (auto v : info.vertices) {
|
||||
if (!isLeafNode(v, g)) {
|
||||
continue;
|
||||
}
|
||||
if (isUnconvertibleLeaf(tbi, v)) {
|
||||
continue; // we don't want to touch it
|
||||
}
|
||||
|
||||
// This leaf may have a predecessor with more than one successor,
|
||||
// in which case we want to clone the pred just to support this
|
||||
// leaf.
|
||||
const RoseEdge &e = *in_edges(v, g).first;
|
||||
RoseVertex u = source(e, g);
|
||||
if (out_degree(u, g) != 1) {
|
||||
DEBUG_PRINTF("re-homing %zu to cloned pred\n", g[v].index);
|
||||
RoseVertex u2 = tbi.cloneVertex(u);
|
||||
for (const auto &e_in : in_edges_range(u, g)) {
|
||||
add_edge(source(e_in, g), u2, g[e_in], g);
|
||||
}
|
||||
add_edge(u2, v, g[e], g);
|
||||
remove_edge(e, g);
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("%zu is a bad leaf vertex\n", g[v].index);
|
||||
bad.insert(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void convertBadLeaves(RoseBuildImpl &tbi) {
|
||||
RoseGraph &g = tbi.g;
|
||||
set<RoseVertex> bad;
|
||||
findBadLeaves(tbi, bad);
|
||||
DEBUG_PRINTF("found %zu bad leaves\n", bad.size());
|
||||
|
||||
if (bad.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
vector<RoseVertex> dead;
|
||||
for (auto v : bad) {
|
||||
assert(in_degree(v, g));
|
||||
|
||||
const RoseEdge &e = *(in_edges(v, g).first);
|
||||
|
||||
shared_ptr<NGHolder> h = convertLeafToHolder(g, e, tbi.literals);
|
||||
if (num_vertices(*h) >= NFA_MAX_STATES) {
|
||||
assert(0); // too big!
|
||||
continue;
|
||||
}
|
||||
|
||||
RoseVertex u = source(e, g);
|
||||
assert(!g[u].suffix);
|
||||
g[u].suffix.graph = h;
|
||||
DEBUG_PRINTF("%zu's nfa holder %p\n", g[u].index, h.get());
|
||||
|
||||
dead.push_back(v);
|
||||
}
|
||||
|
||||
tbi.removeVertices(dead);
|
||||
}
|
||||
|
||||
static
|
||||
size_t suffixFloodLen(const ue2_literal &s) {
|
||||
if (s.empty()) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -34,7 +34,6 @@ namespace ue2 {
|
||||
class RoseBuildImpl;
|
||||
|
||||
void convertFloodProneSuffixes(RoseBuildImpl &tbi);
|
||||
void convertBadLeaves(RoseBuildImpl &tbi);
|
||||
void convertPrefixToBounds(RoseBuildImpl &tbi);
|
||||
void convertAnchPrefixToBounds(RoseBuildImpl &tbi);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user