diff --git a/src/grey.cpp b/src/grey.cpp index 340a34bf..f0374b6d 100644 --- a/src/grey.cpp +++ b/src/grey.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -105,8 +105,6 @@ Grey::Grey(void) : roseGraphReduction(true), roseRoleAliasing(true), roseMasks(true), - roseMaxBadLeafLength(5), - roseConvertInfBadLeaves(true), roseConvertFloodProneSuffixes(true), roseMergeRosesDuringAliasing(true), roseMultiTopRoses(true), @@ -272,8 +270,6 @@ void applyGreyOverrides(Grey *g, const string &s) { G_UPDATE(roseGraphReduction); G_UPDATE(roseRoleAliasing); G_UPDATE(roseMasks); - G_UPDATE(roseMaxBadLeafLength); - G_UPDATE(roseConvertInfBadLeaves); G_UPDATE(roseConvertFloodProneSuffixes); G_UPDATE(roseMergeRosesDuringAliasing); G_UPDATE(roseMultiTopRoses); diff --git a/src/grey.h b/src/grey.h index 4882af7d..7a6a168b 100644 --- a/src/grey.h +++ b/src/grey.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -118,8 +118,6 @@ struct Grey { bool roseGraphReduction; bool roseRoleAliasing; bool roseMasks; - u32 roseMaxBadLeafLength; - bool roseConvertInfBadLeaves; bool roseConvertFloodProneSuffixes; bool roseMergeRosesDuringAliasing; bool roseMultiTopRoses; diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index e13d7c5c..1237a014 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -1644,8 +1644,6 @@ aligned_unique_ptr RoseBuildImpl::buildRose(u32 minWidth) { dedupeLeftfixes(*this); aliasRoles(*this, false); // Don't merge leftfixes. dedupeLeftfixes(*this); - - convertBadLeaves(*this); uncalcLeaves(*this); /* note the leftfixes which do not need to keep state across stream diff --git a/src/rose/rose_build_convert.cpp b/src/rose/rose_build_convert.cpp index b151c0c9..a15d4dc6 100644 --- a/src/rose/rose_build_convert.cpp +++ b/src/rose/rose_build_convert.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -76,301 +76,6 @@ NFAVertex addHolderVertex(const CharReach &cr, NGHolder &out) { return v; } -// Returns the first and last vertices. -static -pair addLiteralVertices(const RoseGraph &g, - const RoseLiteralMap &literals, - const RoseVertex &t_v, - NGHolder &out) { - // We have limited cases that we support: one literal of arbitrary length, - // or a bunch of literals of length one that just become a vertex with - // their reach unioned together. - - // TODO: generalise this and handle more cases. - - const auto &litids = g[t_v].literals; - if (litids.size() > 1) { - // Multiple literals of len 1. - CharReach v_cr; - for (const auto &lit_id : litids) { - const rose_literal_id &litv = literals.right.at(lit_id); - assert(litv.s.length() == 1); - v_cr |= *litv.s.begin(); - } - - NFAVertex v = addHolderVertex(v_cr, out); - return make_pair(v, v); - } - - // Otherwise, we have a single literal, could be of arbitrary length. - assert(litids.size() == 1); - u32 lit_id = *(litids.begin()); - const rose_literal_id &litv = literals.right.at(lit_id); - assert(!litv.s.empty()); - - ue2_literal::const_iterator it = litv.s.begin(), ite = litv.s.end(); - NFAVertex first = addHolderVertex(*it, out), last = first; - for (++it; it != ite; ++it) { - NFAVertex v = addHolderVertex(*it, out); - add_edge(last, v, out); - last = v; - } - - return make_pair(first, last); -} - -static -unique_ptr convertLeafToHolder(const RoseGraph &g, - const RoseEdge &t_e, - const RoseLiteralMap &literals) { - RoseVertex t_v = target(t_e, g); // leaf vertex for demolition. - u32 minBound = g[t_e].minBound; - u32 maxBound = g[t_e].maxBound; - - const CharReach dot = CharReach::dot(); - - assert(!g[t_v].left); - - auto out = ue2::make_unique(NFA_SUFFIX); - - // Repeats wired to the start of the graph. - DEBUG_PRINTF("bounds [%u, %u]\n", minBound, maxBound); - u32 i = 1; - NFAVertex last = out->start; - for (; i <= minBound; i++) { - NFAVertex v = addHolderVertex(dot, *out); - add_edge(last, v, *out); - last = v; - } - NFAVertex last_mand = last; - if (maxBound != ROSE_BOUND_INF) { - for (; i <= maxBound; i++) { - NFAVertex v = addHolderVertex(dot, *out); - add_edge(last_mand, v, *out); - if (last != last_mand) { - add_edge(last, v, *out); - } - last = v; - } - } else { - if (minBound) { - add_edge(last_mand, last_mand, *out); - } else { - NFAVertex v = addHolderVertex(dot, *out); - add_edge(last_mand, v, *out); - add_edge(v, v, *out); - last = v; - } - } - - setTops(*out); - - // Literal vertices wired to accept. - NFAVertex litfirst, litlast; - tie(litfirst, litlast) = addLiteralVertices(g, literals, t_v, *out); - add_edge(last, litfirst, *out); - if (last != last_mand) { - add_edge(last_mand, litfirst, *out); - } - add_edge(litlast, out->accept, *out); - insert(&(*out)[litlast].reports, g[t_v].reports); - return out; -} - -static -bool areLiteralsConvertible(const RoseLiteralMap &literals, - const flat_set &ids) { - // Every literal in v must have the same length. - - // TODO: at the moment, we only handle two cases in construction: (a) one - // literal of arbitrary length, and (b) many literals, but all with length - // 1. - - if (ids.empty()) { - return false; - } - - auto it = ids.begin(), ite = ids.end(); - const size_t len = literals.right.at(*it).elength(); - - // Note: len may be 0 for cases with special literals, like EOD prefixes. - - if (len != 1 && ids.size() != 1) { - DEBUG_PRINTF("more than one literal of len > 1\n"); - return false; - } - - // Check the others all have the same length. - while (++it != ite) { - if (literals.right.at(*it).elength() != len) { - DEBUG_PRINTF("literals have different lengths\n"); - return false; - } - } - - return true; -} - -// Returns true if the given vertex doesn't qualify as a bad leaf to be eaten -// by an NFA. -static -bool isUnconvertibleLeaf(const RoseBuildImpl &tbi, const RoseVertex v) { - const RoseGraph &g = tbi.g; - - if (in_degree(v, g) != 1) { - DEBUG_PRINTF("more than one in-edge\n"); - return true; - } - - const RoseEdge &e = *(in_edges(v, g).first); - RoseVertex u = source(e, g); - - if (!g[u].reports.empty()) { - DEBUG_PRINTF("pred has accept\n"); - return true; - } - - if (g[u].suffix) { - // TODO: this could be handled by adding new vertices to the existing - // suffix. - DEBUG_PRINTF("pred already has suffix\n"); - return true; - } - - if (tbi.isAnyStart(u)) { - DEBUG_PRINTF("fail start\n"); - return true; - } - - if (tbi.isAnchored(u)) { - /* TODO need to check for possible anchored queue overflow? maybe? */ - DEBUG_PRINTF("fail anchored\n"); - return true; - } - - if (g[v].reports.empty() || g[v].eod_accept) { - DEBUG_PRINTF("bad accept\n"); - return true; - } - - if (g[v].suffix) { - DEBUG_PRINTF("suffix\n"); - return true; - } - - if (g[v].left) { - /* TODO: we really should handle this case as we would be checking - * an nfa each time. However it requires completely different graph - * fiddling logic */ - DEBUG_PRINTF("rose prefix action\n"); - return true; - } - - if (!areLiteralsConvertible(tbi.literals, g[v].literals)) { - DEBUG_PRINTF("fail length\n"); - return true; - } - - u32 max_lit_len = tbi.maxLiteralLen(v); - - u32 maxbound = max_lit_len == 1 ? 124 : 32; // arbitrary magic numbers - if (g[e].maxBound > maxbound && g[e].maxBound != ROSE_BOUND_INF) { - DEBUG_PRINTF("fail maxbound (%u)\n", maxbound); - return true; - } - - if (g[e].maxBound == ROSE_BOUND_INF) { - /* slightly risky as nfa won't die */ - DEBUG_PRINTF("fail: .*\n"); - return true; - } - - return false; -} - -// Find all of the leaves with literals whose length is <= len. -static -void findBadLeaves(RoseBuildImpl &tbi, set &bad) { - RoseGraph &g = tbi.g; - u32 len = tbi.cc.grey.roseMaxBadLeafLength; - - for (const auto &m : tbi.literals.right) { - if (m.second.s.length() > len) { - continue; - } - u32 lid = m.first; - DEBUG_PRINTF("%u is a short lit (length %zu)\n", lid, - m.second.s.length()); - - if (tbi.isDelayed(lid)) { - DEBUG_PRINTF("delayed, skipping!\n"); - continue; - } - - const rose_literal_info &info = tbi.literal_info[lid]; - - for (auto v : info.vertices) { - if (!isLeafNode(v, g)) { - continue; - } - if (isUnconvertibleLeaf(tbi, v)) { - continue; // we don't want to touch it - } - - // This leaf may have a predecessor with more than one successor, - // in which case we want to clone the pred just to support this - // leaf. - const RoseEdge &e = *in_edges(v, g).first; - RoseVertex u = source(e, g); - if (out_degree(u, g) != 1) { - DEBUG_PRINTF("re-homing %zu to cloned pred\n", g[v].index); - RoseVertex u2 = tbi.cloneVertex(u); - for (const auto &e_in : in_edges_range(u, g)) { - add_edge(source(e_in, g), u2, g[e_in], g); - } - add_edge(u2, v, g[e], g); - remove_edge(e, g); - } - - DEBUG_PRINTF("%zu is a bad leaf vertex\n", g[v].index); - bad.insert(v); - } - } -} - -void convertBadLeaves(RoseBuildImpl &tbi) { - RoseGraph &g = tbi.g; - set bad; - findBadLeaves(tbi, bad); - DEBUG_PRINTF("found %zu bad leaves\n", bad.size()); - - if (bad.empty()) { - return; - } - - vector dead; - for (auto v : bad) { - assert(in_degree(v, g)); - - const RoseEdge &e = *(in_edges(v, g).first); - - shared_ptr h = convertLeafToHolder(g, e, tbi.literals); - if (num_vertices(*h) >= NFA_MAX_STATES) { - assert(0); // too big! - continue; - } - - RoseVertex u = source(e, g); - assert(!g[u].suffix); - g[u].suffix.graph = h; - DEBUG_PRINTF("%zu's nfa holder %p\n", g[u].index, h.get()); - - dead.push_back(v); - } - - tbi.removeVertices(dead); -} - static size_t suffixFloodLen(const ue2_literal &s) { if (s.empty()) { diff --git a/src/rose/rose_build_convert.h b/src/rose/rose_build_convert.h index fd7c6d3e..7307c213 100644 --- a/src/rose/rose_build_convert.h +++ b/src/rose/rose_build_convert.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,7 +34,6 @@ namespace ue2 { class RoseBuildImpl; void convertFloodProneSuffixes(RoseBuildImpl &tbi); -void convertBadLeaves(RoseBuildImpl &tbi); void convertPrefixToBounds(RoseBuildImpl &tbi); void convertAnchPrefixToBounds(RoseBuildImpl &tbi);