/* * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Intel Corporation nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /** \file * \brief NFA acceleration analysis code. */ #include "ng_limex_accel.h" #include "ng_holder.h" #include "ng_misc_opt.h" #include "ng_util.h" #include "ue2common.h" #include "nfa/accel.h" #include "util/bitutils.h" // for CASE_CLEAR #include "util/charreach.h" #include "util/compile_context.h" #include "util/container.h" #include "util/dump_charclass.h" #include "util/graph_range.h" #include "util/small_vector.h" #include "util/target_info.h" #include #include #include using namespace std; using boost::adaptors::map_keys; namespace ue2 { #define WIDE_FRIEND_MIN 200 static void findAccelFriendGeneration(const NGHolder &g, const CharReach &cr, const flat_set &cands, const flat_set &f_preds, flat_set *next_cands, flat_set *next_preds, flat_set *friends) { for (auto v : cands) { if (contains(f_preds, v)) { continue; } const CharReach &acr = g[v].char_reach; DEBUG_PRINTF("checking %zu\n", g[v].index); if (acr.count() < WIDE_FRIEND_MIN || !acr.isSubsetOf(cr)) { DEBUG_PRINTF("bad reach %zu\n", acr.count()); continue; } for (auto u : inv_adjacent_vertices_range(v, g)) { if (!contains(f_preds, u)) { DEBUG_PRINTF("bad pred\n"); goto next_cand; } } next_preds->insert(v); insert(next_cands, adjacent_vertices(v, g)); DEBUG_PRINTF("%zu is a friend indeed\n", g[v].index); friends->insert(v); next_cand:; } } void findAccelFriends(const NGHolder &g, NFAVertex v, const map &br_cyclic, u32 offset, flat_set *friends) { /* A friend of an accel state is a successor state which can only be on when * the accel is on. This requires that it has a subset of the accel state's * preds and a charreach which is a subset of the accel state. * * A friend can be safely ignored when accelerating provided there is * sufficient back-off. A friend is useful if it has a wide reach. */ /* BR cyclic states which may go stale cannot have friends as they may * suddenly turn off leading their so-called friends stranded and alone. * TODO: restrict to only stale going BR cyclics */ if (contains(br_cyclic, v) && !br_cyclic.at(v).unbounded()) { return; } u32 friend_depth = offset + 1; flat_set f_preds; insert(&f_preds, inv_adjacent_vertices(v, g)); const CharReach &cr = g[v].char_reach; flat_set cands; insert(&cands, adjacent_vertices(v, g)); flat_set next_preds; flat_set next_cands; for (u32 i = 0; i < friend_depth; i++) { findAccelFriendGeneration(g, cr, cands, f_preds, &next_cands, &next_preds, friends); f_preds.insert(next_preds.begin(), next_preds.end()); next_preds.clear(); cands.swap(next_cands); next_cands.clear(); } } static void findPaths(const NGHolder &g, NFAVertex v, const vector &refined_cr, vector> *paths, const flat_set &forbidden, u32 depth) { static const u32 MAGIC_TOO_WIDE_NUMBER = 16; if (!depth) { paths->push_back({}); return; } if (v == g.accept || v == g.acceptEod) { paths->push_back({}); if (!generates_callbacks(g) || v == g.acceptEod) { paths->back().emplace_back(CharReach()); /* red tape options */ } return; } /* for the escape 'literals' we want to use the minimal cr so we * can be more selective */ const CharReach &cr = refined_cr[g[v].index]; if (out_degree(v, g) >= MAGIC_TOO_WIDE_NUMBER || hasSelfLoop(v, g)) { /* give up on pushing past this point */ paths->push_back({cr}); return; } vector> curr; for (auto w : adjacent_vertices_range(v, g)) { if (contains(forbidden, w)) { /* path has looped back to one of the active+boring acceleration * states. We can ignore this path if we have sufficient back- * off. */ paths->push_back({cr}); continue; } u32 new_depth = depth - 1; do { curr.clear(); findPaths(g, w, refined_cr, &curr, forbidden, new_depth); } while (new_depth-- && curr.size() >= MAGIC_TOO_WIDE_NUMBER); for (auto &c : curr) { c.emplace_back(cr); paths->emplace_back(std::move(c)); } } } namespace { struct SAccelScheme { SAccelScheme(CharReach cr_in, u32 offset_in) : cr(std::move(cr_in)), offset(offset_in) { assert(offset <= MAX_ACCEL_DEPTH); } SAccelScheme() {} bool operator<(const SAccelScheme &b) const { const SAccelScheme &a = *this; const size_t a_count = cr.count(), b_count = b.cr.count(); if (a_count != b_count) { return a_count < b_count; } /* TODO: give bonus if one is a 'caseless' character */ ORDER_CHECK(offset); ORDER_CHECK(cr); return false; } CharReach cr = CharReach::dot(); u32 offset = MAX_ACCEL_DEPTH + 1; }; } /** * \brief Limit on the number of (recursive) calls to findBestInternal(). */ static constexpr size_t MAX_FINDBEST_CALLS = 1000000; static void findBestInternal(vector>::const_iterator pb, vector>::const_iterator pe, size_t *num_calls, const SAccelScheme &curr, SAccelScheme *best) { assert(curr.offset <= MAX_ACCEL_DEPTH); if (++(*num_calls) > MAX_FINDBEST_CALLS) { DEBUG_PRINTF("hit num_calls limit %zu\n", *num_calls); return; } DEBUG_PRINTF("paths left %zu\n", pe - pb); if (pb == pe) { if (curr < *best) { *best = curr; DEBUG_PRINTF("new best: count=%zu, class=%s, offset=%u\n", best->cr.count(), describeClass(best->cr).c_str(), best->offset); } return; } DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin()); small_vector priority_path; priority_path.reserve(pb->size()); u32 i = 0; for (auto p = pb->begin(); p != pb->end(); ++p, i++) { SAccelScheme as(*p | curr.cr, max(i, curr.offset)); if (*best < as) { DEBUG_PRINTF("worse\n"); continue; } priority_path.emplace_back(std::move(as)); } sort(priority_path.begin(), priority_path.end()); for (auto it = priority_path.begin(); it != priority_path.end(); ++it) { auto jt = next(it); for (; jt != priority_path.end(); ++jt) { if (!it->cr.isSubsetOf(jt->cr)) { break; } } priority_path.erase(next(it), jt); DEBUG_PRINTF("||%zu\n", it->cr.count()); } DEBUG_PRINTF("---\n"); for (const SAccelScheme &in : priority_path) { DEBUG_PRINTF("in: count %zu\n", in.cr.count()); if (*best < in) { DEBUG_PRINTF("worse\n"); continue; } findBestInternal(pb + 1, pe, num_calls, in, best); if (curr.cr == best->cr) { return; /* could only get better by offset */ } } } static SAccelScheme findBest(const vector> &paths, const CharReach &terminating) { SAccelScheme curr(terminating, 0U); SAccelScheme best; size_t num_calls = 0; findBestInternal(paths.begin(), paths.end(), &num_calls, curr, &best); DEBUG_PRINTF("findBest completed, num_calls=%zu\n", num_calls); DEBUG_PRINTF("selected scheme: count=%zu, class=%s, offset=%u\n", best.cr.count(), describeClass(best.cr).c_str(), best.offset); return best; } namespace { struct DAccelScheme { DAccelScheme(CharReach cr_in, u32 offset_in) : double_cr(std::move(cr_in)), double_offset(offset_in) { assert(double_offset <= MAX_ACCEL_DEPTH); } bool operator<(const DAccelScheme &b) const { const DAccelScheme &a = *this; size_t a_dcount = a.double_cr.count(); size_t b_dcount = b.double_cr.count(); assert(!a.double_byte.empty() || a_dcount || a.double_offset); assert(!b.double_byte.empty() || b_dcount || b.double_offset); if (a_dcount != b_dcount) { return a_dcount < b_dcount; } if (!a_dcount) { bool cd_a = buildDvermMask(a.double_byte); bool cd_b = buildDvermMask(b.double_byte); if (cd_a != cd_b) { return cd_a; } } ORDER_CHECK(double_byte.size()); ORDER_CHECK(double_offset); /* TODO: give bonus if one is a 'caseless' character */ ORDER_CHECK(double_byte); ORDER_CHECK(double_cr); return false; } flat_set> double_byte; CharReach double_cr; u32 double_offset = 0; }; } static DAccelScheme make_double_accel(DAccelScheme as, CharReach cr_1, const CharReach &cr_2_in, u32 offset_in) { cr_1 &= ~as.double_cr; CharReach cr_2 = cr_2_in & ~as.double_cr; u32 offset = offset_in; if (cr_1.none()) { DEBUG_PRINTF("empty first element\n"); ENSURE_AT_LEAST(&as.double_offset, offset); return as; } if (cr_2_in != cr_2 || cr_2.none()) { offset = offset_in + 1; } size_t two_count = cr_1.count() * cr_2.count(); DEBUG_PRINTF("will generate raw %zu pairs\n", two_count); if (!two_count) { DEBUG_PRINTF("empty element\n"); ENSURE_AT_LEAST(&as.double_offset, offset); return as; } if (two_count > DOUBLE_SHUFTI_LIMIT) { if (cr_2.count() < cr_1.count()) { as.double_cr |= cr_2; offset = offset_in + 1; } else { as.double_cr |= cr_1; } } else { for (auto i = cr_1.find_first(); i != CharReach::npos; i = cr_1.find_next(i)) { for (auto j = cr_2.find_first(); j != CharReach::npos; j = cr_2.find_next(j)) { as.double_byte.emplace(i, j); } } } ENSURE_AT_LEAST(&as.double_offset, offset); DEBUG_PRINTF("construct da %zu pairs, %zu singles, offset %u\n", as.double_byte.size(), as.double_cr.count(), as.double_offset); return as; } static void findDoubleBest(vector >::const_iterator pb, vector >::const_iterator pe, const DAccelScheme &curr, DAccelScheme *best) { assert(curr.double_offset <= MAX_ACCEL_DEPTH); DEBUG_PRINTF("paths left %zu\n", pe - pb); DEBUG_PRINTF("current base: %zu pairs, %zu singles, offset %u\n", curr.double_byte.size(), curr.double_cr.count(), curr.double_offset); if (pb == pe) { if (curr < *best) { *best = curr; DEBUG_PRINTF("new best: %zu pairs, %zu singles, offset %u\n", best->double_byte.size(), best->double_cr.count(), best->double_offset); } return; } DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin()); small_vector priority_path; priority_path.reserve(pb->size()); u32 i = 0; for (auto p = pb->begin(); p != pb->end() && next(p) != pb->end(); ++p, i++) { DAccelScheme as = make_double_accel(curr, *p, *next(p), i); if (*best < as) { DEBUG_PRINTF("worse\n"); continue; } priority_path.emplace_back(std::move(as)); } sort(priority_path.begin(), priority_path.end()); DEBUG_PRINTF("%zu candidates for this path\n", priority_path.size()); DEBUG_PRINTF("input best: %zu pairs, %zu singles, offset %u\n", best->double_byte.size(), best->double_cr.count(), best->double_offset); for (const DAccelScheme &in : priority_path) { DEBUG_PRINTF("in: %zu pairs, %zu singles, offset %u\n", in.double_byte.size(), in.double_cr.count(), in.double_offset); if (*best < in) { DEBUG_PRINTF("worse\n"); continue; } findDoubleBest(pb + 1, pe, in, best); } } #ifdef DEBUG static void dumpPaths(const vector> &paths) { for (const auto &path : paths) { DEBUG_PRINTF("path: ["); for (const auto &cr : path) { printf(" ["); describeClass(stdout, cr, 20, CC_OUT_TEXT); printf("]"); } printf(" ]\n"); } } #endif static void blowoutPathsLessStrictSegment(vector > &paths) { /* paths segments which are a superset of an earlier segment should never be * picked as an acceleration segment -> to improve processing just replace * with dot */ // cppcheck-suppress constVariableReference for (auto &p : paths) { for (auto it = p.begin(); it != p.end(); ++it) { for (auto jt = next(it); jt != p.end(); ++jt) { if (it->isSubsetOf(*jt)) { *jt = CharReach::dot(); } } } } } static void unifyPathsLastSegment(vector > &paths) { /* try to unify paths which only differ in the last segment */ for (vector >::iterator p = paths.begin(); p != paths.end() && p + 1 != paths.end();) { vector &a = *p; vector &b = *(p + 1); if (a.size() != b.size()) { ++p; continue; } u32 i = 0; for (; i < a.size() - 1; i++) { if (a[i] != b[i]) { break; } } if (i == a.size() - 1) { /* we can unify these paths */ a[i] |= b[i]; paths.erase(p + 1); } else { ++p; } } } static void improvePaths(vector > &paths) { #ifdef DEBUG DEBUG_PRINTF("orig paths\n"); dumpPaths(paths); #endif blowoutPathsLessStrictSegment(paths); sort(paths.begin(), paths.end()); unifyPathsLastSegment(paths); #ifdef DEBUG DEBUG_PRINTF("opt paths\n"); dumpPaths(paths); #endif } #define MAX_DOUBLE_ACCEL_PATHS 10 static DAccelScheme findBestDoubleAccelScheme(vector > paths, const CharReach &terminating) { DEBUG_PRINTF("looking for double accel, %zu terminating symbols\n", terminating.count()); unifyPathsLastSegment(paths); #ifdef DEBUG DEBUG_PRINTF("paths:\n"); dumpPaths(paths); #endif /* if there are too many paths, shorten the paths to reduce the number of * distinct paths we have to consider */ while (paths.size() > MAX_DOUBLE_ACCEL_PATHS) { for (auto &p : paths) { if (p.empty()) { return DAccelScheme(terminating, 0U); } p.pop_back(); } unifyPathsLastSegment(paths); } if (paths.empty()) { return DAccelScheme(terminating, 0U); } DAccelScheme curr(terminating, 0U); DAccelScheme best(CharReach::dot(), 0U); findDoubleBest(paths.begin(), paths.end(), curr, &best); DEBUG_PRINTF("da %zu pairs, %zu singles\n", best.double_byte.size(), best.double_cr.count()); return best; } #define MAX_EXPLORE_PATHS 40 AccelScheme findBestAccelScheme(vector> paths, const CharReach &terminating, bool look_for_double_byte) { AccelScheme rv; if (look_for_double_byte) { DAccelScheme da = findBestDoubleAccelScheme(paths, terminating); if (da.double_byte.size() <= DOUBLE_SHUFTI_LIMIT) { rv.double_byte = std::move(da.double_byte); rv.double_cr = std::move(da.double_cr); rv.double_offset = da.double_offset; } } improvePaths(paths); DEBUG_PRINTF("we have %zu paths\n", paths.size()); if (paths.size() > MAX_EXPLORE_PATHS) { return rv; /* too many paths to explore */ } /* if we were smart we would do something netflowy on the paths to find the * best cut. But we aren't, so we will just brute force it. */ SAccelScheme best = findBest(paths, terminating); /* find best is a bit lazy in terms of minimising the offset, see if we can * make it better. need to find the min max offset that we need.*/ u32 offset = 0; for (const auto &path : paths) { u32 i = 0; for (const auto &cr : path) { if (cr.isSubsetOf(best.cr)) { break; } i++; } offset = MAX(offset, i); } assert(offset <= best.offset); best.offset = offset; rv.offset = best.offset; rv.cr = best.cr; if (rv.cr.count() < rv.double_cr.count()) { rv.double_byte.clear(); } return rv; } AccelScheme nfaFindAccel(const NGHolder &g, const vector &verts, const vector &refined_cr, const map &br_cyclic, bool allow_wide, bool look_for_double_byte) { CharReach terminating; for (auto v : verts) { if (!hasSelfLoop(v, g)) { DEBUG_PRINTF("no self loop\n"); return AccelScheme(); /* invalid scheme */ } // check that this state is reachable on most characters terminating |= ~g[v].char_reach; } DEBUG_PRINTF("set vertex has %zu stop chars\n", terminating.count()); size_t limit = allow_wide ? ACCEL_MAX_FLOATING_STOP_CHAR : ACCEL_MAX_STOP_CHAR; if (terminating.count() > limit) { return AccelScheme(); /* invalid scheme */ } vector> paths; flat_set ignore_vert_set(verts.begin(), verts.end()); /* Note: we can not in general (TODO: ignore when possible) ignore entries * into the bounded repeat cyclic states as that is when the magic happens */ for (auto v : br_cyclic | map_keys) { /* TODO: can allow if repeatMin <= 1 ? */ ignore_vert_set.erase(v); } for (auto v : verts) { for (auto w : adjacent_vertices_range(v, g)) { if (w != v) { findPaths(g, w, refined_cr, &paths, ignore_vert_set, MAX_ACCEL_DEPTH); } } } /* paths built wrong: reverse them */ for (auto &path : paths) { reverse(path.begin(), path.end()); } return findBestAccelScheme(std::move(paths), terminating, look_for_double_byte); } NFAVertex get_sds_or_proxy(const NGHolder &g) { DEBUG_PRINTF("looking for sds proxy\n"); if (proper_out_degree(g.startDs, g)) { return g.startDs; } NFAVertex v = NGHolder::null_vertex(); for (auto w : adjacent_vertices_range(g.start, g)) { if (w != g.startDs) { if (!v) { v = w; } else { return g.startDs; } } } if (!v) { return g.startDs; } while (true) { if (hasSelfLoop(v, g)) { DEBUG_PRINTF("woot %zu\n", g[v].index); return v; } if (out_degree(v, g) != 1) { break; } NFAVertex u = getSoleDestVertex(g, v); if (!g[u].char_reach.all()) { break; } v = u; } return g.startDs; } /** \brief Check if vertex \a v is an accelerable state (for a limex NFA). */ bool nfaCheckAccel(const NGHolder &g, NFAVertex v, const vector &refined_cr, const map &br_cyclic, AccelScheme *as, bool allow_wide) { // For a state to be accelerable, our current criterion is that it be a // large character class with a self-loop and narrow set of possible other // successors (i.e. no special successors, union of successor reachability // is small). if (!hasSelfLoop(v, g)) { return false; } // check that this state is reachable on most characters /* we want to use the maximal reach here (in the graph) */ CharReach terminating = g[v].char_reach; terminating.flip(); DEBUG_PRINTF("vertex %zu is cyclic and has %zu stop chars%s\n", g[v].index, terminating.count(), allow_wide ? " (w)" : ""); size_t limit = allow_wide ? ACCEL_MAX_FLOATING_STOP_CHAR : ACCEL_MAX_STOP_CHAR; if (terminating.count() > limit) { DEBUG_PRINTF("too leaky\n"); return false; } flat_set curr, next; insert(&curr, adjacent_vertices(v, g)); curr.erase(v); // erase self-loop // We consider offsets of zero through three; this is fairly arbitrary at // present and could probably be increased (FIXME) /* WARNING: would/could do horrible things to compile time */ bool stop = false; vector depthReach(MAX_ACCEL_DEPTH); unsigned int depth; for (depth = 0; !stop && depth < MAX_ACCEL_DEPTH; depth++) { CharReach &cr = depthReach[depth]; for (auto t : curr) { if (is_special(t, g)) { // We've bumped into the edge of the graph, so we should stop // searching. // Exception: iff our cyclic state is not a dot, than we can // safely accelerate towards an EOD accept. /* Exception: nfas that don't generate callbacks so accepts are * fine too */ if (t == g.accept && !generates_callbacks(g)) { stop = true; // don't search beyond this depth continue; } else if (t == g.accept) { goto depth_done; } assert(t == g.acceptEod); stop = true; // don't search beyond this depth } else { // Non-special vertex insert(&next, adjacent_vertices(t, g)); /* for the escape 'literals' we want to use the minimal cr so we * can be more selective */ cr |= refined_cr[g[t].index]; } } cr |= terminating; DEBUG_PRINTF("depth %u has unioned reach %zu\n", depth, cr.count()); curr.swap(next); next.clear(); } depth_done: if (depth == 0) { return false; } DEBUG_PRINTF("selecting from depth 0..%u\n", depth); /* Look for the most awesome acceleration evar */ for (unsigned int i = 0; i < depth; i++) { if (depthReach[i].none()) { DEBUG_PRINTF("red tape acceleration engine depth %u\n", i); *as = AccelScheme(); as->offset = i; as->cr = CharReach(); return true; } } // First, loop over our depths and see if we have a suitable 2-byte // caseful vermicelli option: this is the (second) fastest accel we have if (depth > 1) { for (unsigned int i = 0; i < (depth - 1); i++) { const CharReach &cra = depthReach[i]; const CharReach &crb = depthReach[i + 1]; if ((cra.count() == 1 && crb.count() == 1) || (cra.count() == 2 && crb.count() == 2 && cra.isBit5Insensitive() && crb.isBit5Insensitive())) { DEBUG_PRINTF("two-byte vermicelli, depth %u\n", i); *as = AccelScheme(); as->offset = i; return true; } } // Second option: a two-byte shufti (i.e. less than eight 2-byte // literals) for (unsigned int i = 0; i < (depth - 1); i++) { if (depthReach[i].count() * depthReach[i+1].count() <= DOUBLE_SHUFTI_LIMIT) { DEBUG_PRINTF("two-byte shufti, depth %u\n", i); *as = AccelScheme(); as->offset = i; return true; } } } // Look for offset accel schemes verm/shufti; vector verts(1, v); *as = nfaFindAccel(g, verts, refined_cr, br_cyclic, allow_wide, true); DEBUG_PRINTF("as width %zu\n", as->cr.count()); return as->cr.count() <= ACCEL_MAX_STOP_CHAR || allow_wide; } } // namespace ue2