mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
914 lines
28 KiB
C++
914 lines
28 KiB
C++
/*
|
|
* Copyright (c) 2015-2016, Intel Corporation
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
/** \file
|
|
* \brief NFA acceleration analysis code.
|
|
*/
|
|
#include "ng_limex_accel.h"
|
|
|
|
#include "ng_holder.h"
|
|
#include "ng_misc_opt.h"
|
|
#include "ng_util.h"
|
|
#include "ue2common.h"
|
|
|
|
#include "nfa/accel.h"
|
|
#include "nfa/multiaccel_compilehelper.h"
|
|
|
|
#include "util/bitutils.h" // for CASE_CLEAR
|
|
#include "util/charreach.h"
|
|
#include "util/compile_context.h"
|
|
#include "util/container.h"
|
|
#include "util/dump_charclass.h"
|
|
#include "util/graph_range.h"
|
|
#include "util/target_info.h"
|
|
|
|
#include <algorithm>
|
|
#include <map>
|
|
|
|
using namespace std;
|
|
|
|
namespace ue2 {
|
|
|
|
#define WIDE_FRIEND_MIN 200
|
|
|
|
static
|
|
void findAccelFriendGeneration(const NGHolder &g, const CharReach &cr,
|
|
const flat_set<NFAVertex> &cands,
|
|
const flat_set<NFAVertex> &preds,
|
|
flat_set<NFAVertex> *next_cands,
|
|
flat_set<NFAVertex> *next_preds,
|
|
flat_set<NFAVertex> *friends) {
|
|
for (auto v : cands) {
|
|
if (contains(preds, v)) {
|
|
continue;
|
|
}
|
|
|
|
const CharReach &acr = g[v].char_reach;
|
|
DEBUG_PRINTF("checking %u\n", g[v].index);
|
|
|
|
if (acr.count() < WIDE_FRIEND_MIN || !acr.isSubsetOf(cr)) {
|
|
DEBUG_PRINTF("bad reach %zu\n", acr.count());
|
|
continue;
|
|
}
|
|
|
|
for (auto u : inv_adjacent_vertices_range(v, g)) {
|
|
if (!contains(preds, u)) {
|
|
DEBUG_PRINTF("bad pred\n");
|
|
goto next_cand;
|
|
}
|
|
}
|
|
|
|
next_preds->insert(v);
|
|
insert(next_cands, adjacent_vertices(v, g));
|
|
|
|
DEBUG_PRINTF("%u is a friend indeed\n", g[v].index);
|
|
friends->insert(v);
|
|
next_cand:;
|
|
}
|
|
}
|
|
|
|
void findAccelFriends(const NGHolder &g, NFAVertex v,
|
|
const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
|
|
u32 offset, flat_set<NFAVertex> *friends) {
|
|
/* A friend of an accel state is a successor state which can only be on when
|
|
* the accel is on. This requires that it has a subset of the accel state's
|
|
* preds and a charreach which is a subset of the accel state.
|
|
*
|
|
* A friend can be safely ignored when accelerating provided there is
|
|
* sufficient back-off. A friend is useful if it has a wide reach.
|
|
*/
|
|
|
|
/* BR cyclic states which may go stale cannot have friends as they may
|
|
* suddenly turn off leading their so-called friends stranded and alone.
|
|
* TODO: restrict to only stale going BR cyclics
|
|
*/
|
|
if (contains(br_cyclic, v) && !br_cyclic.at(v).unbounded()) {
|
|
return;
|
|
}
|
|
|
|
u32 friend_depth = offset + 1;
|
|
|
|
flat_set<NFAVertex> preds;
|
|
insert(&preds, inv_adjacent_vertices(v, g));
|
|
const CharReach &cr = g[v].char_reach;
|
|
|
|
flat_set<NFAVertex> cands;
|
|
insert(&cands, adjacent_vertices(v, g));
|
|
|
|
flat_set<NFAVertex> next_preds;
|
|
flat_set<NFAVertex> next_cands;
|
|
for (u32 i = 0; i < friend_depth; i++) {
|
|
findAccelFriendGeneration(g, cr, cands, preds, &next_cands, &next_preds,
|
|
friends);
|
|
preds.insert(next_preds.begin(), next_preds.end());
|
|
next_preds.clear();
|
|
cands.swap(next_cands);
|
|
next_cands.clear();
|
|
}
|
|
}
|
|
|
|
static
|
|
void buildTwoByteStops(flat_set<pair<u8, u8>> &twobyte, const CharReach &cr1,
|
|
const CharReach &cr2) {
|
|
for (size_t c1 = cr1.find_first(); c1 != cr1.npos; c1 = cr1.find_next(c1)) {
|
|
for (size_t c2 = cr2.find_first(); c2 != cr2.npos;
|
|
c2 = cr2.find_next(c2)) {
|
|
twobyte.emplace((u8)c1, (u8)c2);
|
|
}
|
|
}
|
|
}
|
|
|
|
static
|
|
void findStopLiteralsAtVertex(NFAVertex v, const NGHolder &g,
|
|
DoubleAccelInfo &build) {
|
|
DEBUG_PRINTF("state %u\n", g[v].index);
|
|
|
|
// double-byte accel is possible: calculate all single- and double-byte
|
|
// accel literals.
|
|
const CharReach &cr1 = g[v].char_reach;
|
|
|
|
if (edge(v, g.accept, g).second) {
|
|
// If this first byte is an accept state, it must contribute a
|
|
// single-byte escape. We can still go on and calculate additional
|
|
// double-byte ones, though.
|
|
/* TODO: fix for rose */
|
|
build.stop1 |= cr1;
|
|
}
|
|
|
|
flat_set<pair<u8, u8>> twobyte; // for just this starting state
|
|
bool single = false;
|
|
|
|
for (auto w : adjacent_vertices_range(v, g)) {
|
|
if (w == g.accept || w == g.acceptEod) {
|
|
continue;
|
|
}
|
|
const CharReach &cr2 = g[w].char_reach;
|
|
size_t count = cr1.count() * cr2.count() + build.stop2.size();
|
|
if (count > 0 && count <= 8) { // can't do more than 8 two-byte
|
|
buildTwoByteStops(twobyte, cr1, cr2);
|
|
} else {
|
|
// two many two-byte literals, add the first byte as single
|
|
single = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (single || twobyte.empty()) {
|
|
assert(!cr1.none());
|
|
build.stop1 |= cr1;
|
|
} else {
|
|
assert(!twobyte.empty());
|
|
build.stop2.insert(twobyte.begin(), twobyte.end());
|
|
}
|
|
}
|
|
|
|
static
|
|
bool is_bit5_insensitive(const flat_set<pair<u8, u8>> &stop) {
|
|
if (stop.size() != 4) {
|
|
return false;
|
|
}
|
|
|
|
const u8 a = stop.begin()->first & CASE_CLEAR;
|
|
const u8 b = stop.begin()->second & CASE_CLEAR;
|
|
|
|
for (flat_set<pair<u8, u8>>::const_iterator it = stop.begin();
|
|
it != stop.end(); ++it) {
|
|
if ((it->first & CASE_CLEAR) != a || (it->second & CASE_CLEAR) != b) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static
|
|
bool is_dverm(const DoubleAccelInfo &a) {
|
|
if (a.stop1.any()) {
|
|
return false;
|
|
}
|
|
|
|
if (a.stop2.size() == 1) {
|
|
return true;
|
|
}
|
|
|
|
return is_bit5_insensitive(a.stop2);
|
|
}
|
|
|
|
static
|
|
bool is_double_better(const DoubleAccelInfo &a, const DoubleAccelInfo &b) {
|
|
/* Note: this is not an operator< */
|
|
|
|
if (a.stop2.empty()) {
|
|
return false;
|
|
}
|
|
|
|
if (b.stop2.empty()) {
|
|
return true;
|
|
}
|
|
|
|
if (a.stop1.count() > b.stop1.count()) {
|
|
return false;
|
|
}
|
|
|
|
if (a.stop1.count() < b.stop1.count()) {
|
|
return true;
|
|
}
|
|
|
|
bool a_dvm = is_dverm(a);
|
|
bool b_dvm = is_dverm(b);
|
|
|
|
if (b_dvm && !a_dvm) {
|
|
return false;
|
|
}
|
|
|
|
if (!b_dvm && a_dvm) {
|
|
return true;
|
|
}
|
|
|
|
if (a.stop2.size() > b.stop2.size()) {
|
|
return false;
|
|
}
|
|
|
|
if (a.stop2.size() < b.stop2.size()) {
|
|
return true;
|
|
}
|
|
|
|
return a.offset < b.offset;
|
|
}
|
|
|
|
/** \brief Find the escape literals for a two byte accel at the given accel
|
|
* offset */
|
|
static
|
|
void findDoubleAccel(const NGHolder &g, NFAVertex v, u32 accel_offset,
|
|
DoubleAccelInfo &build) {
|
|
DEBUG_PRINTF("find double accel +%u for vertex %u\n", accel_offset,
|
|
g[v].index);
|
|
build.offset = accel_offset;
|
|
|
|
// Our accel state contributes single-byte escapes
|
|
build.stop1 |= ~g[v].char_reach;
|
|
|
|
flat_set<NFAVertex> searchStates; // states that contribute stop literals
|
|
searchStates.insert(v); /* TODO: verify */
|
|
|
|
/* Note: We cannot search past an accepting state */
|
|
/* TODO: remove restriction for non-callback generating */
|
|
flat_set<NFAVertex> nextStates;
|
|
|
|
insert(&nextStates, adjacent_vertices(v, g));
|
|
nextStates.erase(v);
|
|
nextStates.erase(g.accept);
|
|
nextStates.erase(g.acceptEod);
|
|
|
|
searchStates.swap(nextStates);
|
|
nextStates.clear();
|
|
|
|
// subsequent iterations are simpler, just follow all edges
|
|
for (u32 j = 1; j <= accel_offset; j++) {
|
|
for (auto u : searchStates) {
|
|
insert(&nextStates, adjacent_vertices(u, g));
|
|
if (edge(u, g.accept, g).second) {
|
|
nextStates.clear();
|
|
break;
|
|
}
|
|
nextStates.erase(g.accept);
|
|
nextStates.erase(g.acceptEod);
|
|
}
|
|
|
|
searchStates.swap(nextStates);
|
|
nextStates.clear();
|
|
}
|
|
|
|
vector<NFAVertex> sorted;
|
|
insert(&sorted, sorted.end(), searchStates);
|
|
sort(sorted.begin(), sorted.end(), make_index_ordering(g));
|
|
for (auto sv : sorted) {
|
|
findStopLiteralsAtVertex(sv, g, build);
|
|
}
|
|
}
|
|
|
|
DoubleAccelInfo findBestDoubleAccelInfo(const NGHolder &g, NFAVertex v) {
|
|
DoubleAccelInfo rv;
|
|
for (u32 offset = 0; offset <= MAX_ACCEL_DEPTH; offset++) {
|
|
DoubleAccelInfo b_temp;
|
|
findDoubleAccel(g, v, offset, b_temp);
|
|
if (is_double_better(b_temp, rv)) {
|
|
rv = b_temp;
|
|
}
|
|
}
|
|
|
|
return rv;
|
|
}
|
|
|
|
static
|
|
void findPaths(const NGHolder &g, NFAVertex v,
|
|
const vector<CharReach> &refined_cr,
|
|
vector<vector<CharReach> > *paths,
|
|
const flat_set<NFAVertex> &forbidden, u32 depth) {
|
|
static const u32 MAGIC_TOO_WIDE_NUMBER = 16;
|
|
if (!depth) {
|
|
paths->push_back(vector<CharReach>());
|
|
return;
|
|
}
|
|
if (v == g.accept || v == g.acceptEod) {
|
|
paths->push_back(vector<CharReach>());
|
|
if (!generates_callbacks(g) || v == g.acceptEod) {
|
|
paths->back().push_back(CharReach()); /* red tape options */
|
|
}
|
|
return;
|
|
}
|
|
|
|
/* for the escape 'literals' we want to use the minimal cr so we
|
|
* can be more selective */
|
|
const CharReach &cr = refined_cr[g[v].index];
|
|
|
|
if (out_degree(v, g) >= MAGIC_TOO_WIDE_NUMBER
|
|
|| hasSelfLoop(v, g)) {
|
|
/* give up on pushing past this point */
|
|
paths->push_back(vector<CharReach>());
|
|
vector<CharReach> &p = paths->back();
|
|
p.push_back(cr);
|
|
return;
|
|
}
|
|
|
|
for (auto w : adjacent_vertices_range(v, g)) {
|
|
if (contains(forbidden, w)) {
|
|
/* path has looped back to one of the active+boring acceleration
|
|
* states. We can ignore this path if we have sufficient back-
|
|
* off. */
|
|
paths->push_back(vector<CharReach>());
|
|
paths->back().push_back(CharReach());
|
|
continue;
|
|
}
|
|
|
|
u32 new_depth = depth - 1;
|
|
vector<vector<CharReach> > curr;
|
|
do {
|
|
curr.clear();
|
|
findPaths(g, w, refined_cr, &curr, forbidden, new_depth);
|
|
} while (new_depth-- && curr.size() >= MAGIC_TOO_WIDE_NUMBER);
|
|
|
|
for (vector<vector<CharReach> >::iterator it = curr.begin();
|
|
it != curr.end(); ++it) {
|
|
paths->push_back(vector<CharReach>());
|
|
vector<CharReach> &p = paths->back();
|
|
p.swap(*it);
|
|
p.push_back(cr);
|
|
}
|
|
}
|
|
}
|
|
|
|
static
|
|
AccelScheme merge(const AccelScheme &a, const AccelScheme &b) {
|
|
return AccelScheme(a.cr | b.cr, MAX(a.offset, b.offset));
|
|
}
|
|
|
|
static
|
|
void findBest(vector<vector<CharReach> >::const_iterator pb,
|
|
vector<vector<CharReach> >::const_iterator pe,
|
|
const AccelScheme &curr, AccelScheme *best) {
|
|
assert(curr.offset <= MAX_ACCEL_DEPTH);
|
|
DEBUG_PRINTF("paths left %zu\n", pe - pb);
|
|
if (pb == pe) {
|
|
*best = curr;
|
|
return;
|
|
}
|
|
|
|
DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin());
|
|
|
|
vector<AccelScheme> priority_path;
|
|
u32 i = 0;
|
|
for (vector<CharReach>::const_iterator p = pb->begin(); p != pb->end();
|
|
++p, i++) {
|
|
priority_path.push_back(AccelScheme(*p & ~curr.cr, i));
|
|
}
|
|
|
|
sort(priority_path.begin(), priority_path.end());
|
|
for (vector<AccelScheme>::iterator it = priority_path.begin();
|
|
it != priority_path.end(); ++it) {
|
|
vector<AccelScheme>::iterator jt = it + 1;
|
|
for (; jt != priority_path.end(); ++jt) {
|
|
if (!it->cr.isSubsetOf(jt->cr)) {
|
|
break;
|
|
}
|
|
}
|
|
priority_path.erase(it + 1, jt);
|
|
DEBUG_PRINTF("||%zu\n", it->cr.count());
|
|
}
|
|
DEBUG_PRINTF("---\n");
|
|
|
|
for (vector<AccelScheme>::const_iterator it = priority_path.begin();
|
|
it != priority_path.end(); ++it) {
|
|
DEBUG_PRINTF("%u:|| = %zu; p remaining len %zu\n", i, it->cr.count(),
|
|
priority_path.end() - it);
|
|
|
|
AccelScheme in = merge(curr, *it);
|
|
|
|
if (in > *best) {
|
|
DEBUG_PRINTF("worse\n");
|
|
continue;
|
|
}
|
|
AccelScheme temp = *best;
|
|
findBest(pb + 1, pe, in, &temp);
|
|
if (temp < *best) {
|
|
DEBUG_PRINTF("new best\n");
|
|
*best = temp;
|
|
if (curr.cr == best->cr) {
|
|
return; /* could only get better by offset */
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
|
|
static
|
|
void dumpPaths(const vector<vector<CharReach> > &paths) {
|
|
for (vector<vector<CharReach> >::const_iterator p = paths.begin();
|
|
p != paths.end(); ++p) {
|
|
DEBUG_PRINTF("path: [");
|
|
for (vector<CharReach>::const_iterator it = p->begin(); it != p->end();
|
|
++it) {
|
|
printf(" [");
|
|
describeClass(stdout, *it, 20, CC_OUT_TEXT);
|
|
printf("]");
|
|
}
|
|
printf(" ]\n");
|
|
}
|
|
}
|
|
#endif
|
|
|
|
static
|
|
void blowoutPathsLessStrictSegment(vector<vector<CharReach> > &paths) {
|
|
/* paths segments which are a superset of an earlier segment should never be
|
|
* picked as an acceleration segment -> to improve processing just replace
|
|
* with dot */
|
|
for (auto &p : paths) {
|
|
for (auto it = p.begin(); it != p.end(); ++it) {
|
|
for (auto jt = next(it); jt != p.end(); ++jt) {
|
|
if (it->isSubsetOf(*jt)) {
|
|
*jt = CharReach::dot();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static
|
|
void unifyPathsLastSegment(vector<vector<CharReach> > &paths) {
|
|
/* try to unify paths which only differ in the last segment */
|
|
for (vector<vector<CharReach> >::iterator p = paths.begin();
|
|
p != paths.end() && p + 1 != paths.end();) {
|
|
vector<CharReach> &a = *p;
|
|
vector<CharReach> &b = *(p + 1);
|
|
|
|
if (a.size() != b.size()) {
|
|
++p;
|
|
continue;
|
|
}
|
|
|
|
u32 i = 0;
|
|
for (; i < a.size() - 1; i++) {
|
|
if (a[i] != b[i]) {
|
|
break;
|
|
}
|
|
}
|
|
if (i == a.size() - 1) {
|
|
/* we can unify these paths */
|
|
a[i] |= b[i];
|
|
paths.erase(p + 1);
|
|
} else {
|
|
++p;
|
|
}
|
|
}
|
|
}
|
|
|
|
static
|
|
void improvePaths(vector<vector<CharReach> > &paths) {
|
|
#ifdef DEBUG
|
|
DEBUG_PRINTF("orig paths\n");
|
|
dumpPaths(paths);
|
|
#endif
|
|
blowoutPathsLessStrictSegment(paths);
|
|
|
|
sort(paths.begin(), paths.end());
|
|
|
|
unifyPathsLastSegment(paths);
|
|
|
|
#ifdef DEBUG
|
|
DEBUG_PRINTF("opt paths\n");
|
|
dumpPaths(paths);
|
|
#endif
|
|
}
|
|
|
|
AccelScheme findBestAccelScheme(vector<vector<CharReach> > paths,
|
|
const CharReach &terminating) {
|
|
improvePaths(paths);
|
|
|
|
DEBUG_PRINTF("we have %zu paths\n", paths.size());
|
|
if (paths.size() > 40) {
|
|
return AccelScheme(); /* too many paths to explore */
|
|
}
|
|
|
|
/* if we were smart we would do something netflowy on the paths to find the
|
|
* best cut. But we aren't, so we will just brute force it.
|
|
*/
|
|
AccelScheme curr(terminating, 0U);
|
|
AccelScheme best;
|
|
findBest(paths.begin(), paths.end(), curr, &best);
|
|
|
|
/* find best is a bit lazy in terms of minimising the offset, see if we can
|
|
* make it better. need to find the min max offset that we need.*/
|
|
u32 offset = 0;
|
|
for (vector<vector<CharReach> >::iterator p = paths.begin();
|
|
p != paths.end(); ++p) {
|
|
u32 i = 0;
|
|
for (vector<CharReach>::iterator it = p->begin(); it != p->end();
|
|
++it, i++) {
|
|
if (it->isSubsetOf(best.cr)) {
|
|
break;
|
|
}
|
|
}
|
|
offset = MAX(offset, i);
|
|
}
|
|
assert(offset <= best.offset);
|
|
best.offset = offset;
|
|
|
|
return best;
|
|
}
|
|
|
|
AccelScheme nfaFindAccel(const NGHolder &g, const vector<NFAVertex> &verts,
|
|
const vector<CharReach> &refined_cr,
|
|
const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
|
|
bool allow_wide) {
|
|
CharReach terminating;
|
|
for (auto v : verts) {
|
|
if (!hasSelfLoop(v, g)) {
|
|
DEBUG_PRINTF("no self loop\n");
|
|
return AccelScheme(); /* invalid scheme */
|
|
}
|
|
|
|
// check that this state is reachable on most characters
|
|
terminating |= ~g[v].char_reach;
|
|
}
|
|
|
|
DEBUG_PRINTF("set vertex has %zu stop chars\n", terminating.count());
|
|
size_t limit = allow_wide ? ACCEL_MAX_FLOATING_STOP_CHAR
|
|
: ACCEL_MAX_STOP_CHAR;
|
|
if (terminating.count() > limit) {
|
|
return AccelScheme(); /* invalid scheme */
|
|
}
|
|
|
|
vector<vector<CharReach> > paths;
|
|
flat_set<NFAVertex> ignore_vert_set(verts.begin(), verts.end());
|
|
|
|
/* Note: we can not in general (TODO: ignore when possible) ignore entries
|
|
* into the bounded repeat cyclic states as that is when the magic happens
|
|
*/
|
|
for (map<NFAVertex, BoundedRepeatSummary>::const_iterator it
|
|
= br_cyclic.begin();
|
|
it != br_cyclic.end(); ++it) {
|
|
/* TODO: can allow if repeatMin <= 1 ? */
|
|
ignore_vert_set.erase(it->first);
|
|
}
|
|
|
|
for (auto v : verts) {
|
|
for (auto w : adjacent_vertices_range(v, g)) {
|
|
if (w != v) {
|
|
findPaths(g, w, refined_cr, &paths, ignore_vert_set,
|
|
MAX_ACCEL_DEPTH);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* paths built wrong: reverse them */
|
|
for (vector<vector<CharReach> >::iterator it = paths.begin();
|
|
it != paths.end(); ++it) {
|
|
reverse(it->begin(), it->end());
|
|
}
|
|
|
|
return findBestAccelScheme(std::move(paths), terminating);
|
|
}
|
|
|
|
NFAVertex get_sds_or_proxy(const NGHolder &g) {
|
|
DEBUG_PRINTF("looking for sds proxy\n");
|
|
if (proper_out_degree(g.startDs, g)) {
|
|
return g.startDs;
|
|
}
|
|
|
|
NFAVertex v = NFAGraph::null_vertex();
|
|
for (auto w : adjacent_vertices_range(g.start, g)) {
|
|
if (w != g.startDs) {
|
|
if (!v) {
|
|
v = w;
|
|
} else {
|
|
return g.startDs;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!v) {
|
|
return g.startDs;
|
|
}
|
|
|
|
while (true) {
|
|
if (hasSelfLoop(v, g)) {
|
|
DEBUG_PRINTF("woot %u\n", g[v].index);
|
|
return v;
|
|
}
|
|
if (out_degree(v, g) != 1) {
|
|
break;
|
|
}
|
|
NFAVertex u = getSoleDestVertex(g, v);
|
|
if (!g[u].char_reach.all()) {
|
|
break;
|
|
}
|
|
v = u;
|
|
}
|
|
|
|
return g.startDs;
|
|
}
|
|
|
|
static
|
|
NFAVertex find_next(const NFAVertex v, const NGHolder &g) {
|
|
NFAVertex res = NFAGraph::null_vertex();
|
|
for (NFAVertex u : adjacent_vertices_range(v, g)) {
|
|
if (u != v) {
|
|
res = u;
|
|
break;
|
|
}
|
|
}
|
|
return res;
|
|
}
|
|
|
|
/** \brief Check if vertex \a v is a multi accelerable state (for a limex NFA). */
|
|
MultibyteAccelInfo nfaCheckMultiAccel(const NGHolder &g,
|
|
const vector<NFAVertex> &states,
|
|
const CompileContext &cc) {
|
|
// For a set of states to be accelerable, we basically have to have only
|
|
// one state to accelerate.
|
|
if (states.size() != 1) {
|
|
DEBUG_PRINTF("can't accelerate multiple states\n");
|
|
return MultibyteAccelInfo();
|
|
}
|
|
|
|
// Get our base vertex
|
|
NFAVertex v = states[0];
|
|
|
|
// We need the base vertex to be a self-looping dotall leading to exactly
|
|
// one vertex.
|
|
if (!hasSelfLoop(v, g)) {
|
|
DEBUG_PRINTF("base vertex has self-loop\n");
|
|
return MultibyteAccelInfo();
|
|
}
|
|
|
|
if (!g[v].char_reach.all()) {
|
|
DEBUG_PRINTF("can't accelerate anything but dot\n");
|
|
return MultibyteAccelInfo();
|
|
}
|
|
|
|
if (proper_out_degree(v, g) != 1) {
|
|
DEBUG_PRINTF("can't accelerate states with multiple successors\n");
|
|
return MultibyteAccelInfo();
|
|
}
|
|
|
|
// find our start vertex
|
|
NFAVertex cur = find_next(v, g);
|
|
if (cur == NFAGraph::null_vertex()) {
|
|
DEBUG_PRINTF("invalid start vertex\n");
|
|
return MultibyteAccelInfo();
|
|
}
|
|
|
|
bool has_offset = false;
|
|
u32 offset = 0;
|
|
CharReach cr = g[cur].char_reach;
|
|
|
|
// if we start with a dot, we have an offset, so defer figuring out the
|
|
// real CharReach for this accel scheme
|
|
if (cr == CharReach::dot()) {
|
|
has_offset = true;
|
|
offset = 1;
|
|
}
|
|
|
|
// figure out our offset
|
|
while (has_offset) {
|
|
// vertices have to have no self loops
|
|
if (hasSelfLoop(cur, g)) {
|
|
DEBUG_PRINTF("can't have self-loops\n");
|
|
return MultibyteAccelInfo();
|
|
}
|
|
|
|
// we have to have exactly 1 successor to have this acceleration scheme
|
|
if (out_degree(cur, g) != 1) {
|
|
DEBUG_PRINTF("can't have multiple successors\n");
|
|
return MultibyteAccelInfo();
|
|
}
|
|
|
|
cur = *adjacent_vertices(cur, g).first;
|
|
|
|
// if we met a special vertex, bail out
|
|
if (is_special(cur, g)) {
|
|
DEBUG_PRINTF("can't have special vertices\n");
|
|
return MultibyteAccelInfo();
|
|
}
|
|
|
|
// now, get the real char reach
|
|
if (g[cur].char_reach != CharReach::dot()) {
|
|
cr = g[cur].char_reach;
|
|
has_offset = false;
|
|
} else {
|
|
offset++;
|
|
}
|
|
}
|
|
|
|
// now, fire up the compilation machinery
|
|
target_t ti = cc.target_info;
|
|
unsigned max_len = ti.has_avx2() ? MULTIACCEL_MAX_LEN_AVX2 : MULTIACCEL_MAX_LEN_SSE;
|
|
MultiaccelCompileHelper mac(cr, offset, max_len);
|
|
|
|
while (mac.canAdvance()) {
|
|
// vertices have to have no self loops
|
|
if (hasSelfLoop(cur, g)) {
|
|
break;
|
|
}
|
|
|
|
// we have to have exactly 1 successor to have this acceleration scheme
|
|
if (out_degree(cur, g) != 1) {
|
|
break;
|
|
}
|
|
|
|
cur = *adjacent_vertices(cur, g).first;
|
|
|
|
// if we met a special vertex, bail out
|
|
if (is_special(cur, g)) {
|
|
break;
|
|
}
|
|
|
|
mac.advance(g[cur].char_reach);
|
|
}
|
|
MultibyteAccelInfo mai = mac.getBestScheme();
|
|
#ifdef DEBUG
|
|
DEBUG_PRINTF("Multibyte acceleration scheme: type: %u offset: %u lengths: %u,%u\n",
|
|
mai.type, mai.offset, mai.len1, mai.len2);
|
|
for (size_t c = mai.cr.find_first(); c != CharReach::npos; c = mai.cr.find_next(c)) {
|
|
DEBUG_PRINTF("multibyte accel char: %zu\n", c);
|
|
}
|
|
#endif
|
|
return mai;
|
|
}
|
|
|
|
/** \brief Check if vertex \a v is an accelerable state (for a limex NFA). */
|
|
bool nfaCheckAccel(const NGHolder &g, NFAVertex v,
|
|
const vector<CharReach> &refined_cr,
|
|
const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
|
|
AccelScheme *as, bool allow_wide) {
|
|
// For a state to be accelerable, our current criterion is that it be a
|
|
// large character class with a self-loop and narrow set of possible other
|
|
// successors (i.e. no special successors, union of successor reachability
|
|
// is small).
|
|
if (!hasSelfLoop(v, g)) {
|
|
return false;
|
|
}
|
|
|
|
// check that this state is reachable on most characters
|
|
/* we want to use the maximal reach here (in the graph) */
|
|
CharReach terminating = g[v].char_reach;
|
|
terminating.flip();
|
|
|
|
DEBUG_PRINTF("vertex %u is cyclic and has %zu stop chars%s\n",
|
|
g[v].index, terminating.count(),
|
|
allow_wide ? " (w)" : "");
|
|
|
|
size_t limit = allow_wide ? ACCEL_MAX_FLOATING_STOP_CHAR
|
|
: ACCEL_MAX_STOP_CHAR;
|
|
if (terminating.count() > limit) {
|
|
DEBUG_PRINTF("too leaky\n");
|
|
return false;
|
|
}
|
|
|
|
flat_set<NFAVertex> curr, next;
|
|
|
|
insert(&curr, adjacent_vertices(v, g));
|
|
curr.erase(v); // erase self-loop
|
|
|
|
// We consider offsets of zero through three; this is fairly arbitrary at
|
|
// present and could probably be increased (FIXME)
|
|
/* WARNING: would/could do horrible things to compile time */
|
|
bool stop = false;
|
|
vector<CharReach> depthReach(MAX_ACCEL_DEPTH);
|
|
unsigned int depth;
|
|
for (depth = 0; !stop && depth < MAX_ACCEL_DEPTH; depth++) {
|
|
CharReach &cr = depthReach[depth];
|
|
for (auto t : curr) {
|
|
if (is_special(t, g)) {
|
|
// We've bumped into the edge of the graph, so we should stop
|
|
// searching.
|
|
// Exception: iff our cyclic state is not a dot, than we can
|
|
// safely accelerate towards an EOD accept.
|
|
|
|
/* Exception: nfas that don't generate callbacks so accepts are
|
|
* fine too */
|
|
if (t == g.accept && !generates_callbacks(g)) {
|
|
stop = true; // don't search beyond this depth
|
|
continue;
|
|
} else if (t == g.accept) {
|
|
goto depth_done;
|
|
}
|
|
|
|
assert(t == g.acceptEod);
|
|
stop = true; // don't search beyond this depth
|
|
} else {
|
|
// Non-special vertex
|
|
insert(&next, adjacent_vertices(t, g));
|
|
/* for the escape 'literals' we want to use the minimal cr so we
|
|
* can be more selective */
|
|
cr |= refined_cr[g[t].index];
|
|
}
|
|
}
|
|
|
|
cr |= terminating;
|
|
DEBUG_PRINTF("depth %u has unioned reach %zu\n", depth, cr.count());
|
|
|
|
curr.swap(next);
|
|
next.clear();
|
|
}
|
|
|
|
depth_done:
|
|
|
|
if (depth == 0) {
|
|
return false;
|
|
}
|
|
|
|
DEBUG_PRINTF("selecting from depth 0..%u\n", depth);
|
|
|
|
/* Look for the most awesome acceleration evar */
|
|
for (unsigned int i = 0; i < depth; i++) {
|
|
if (depthReach[i].none()) {
|
|
DEBUG_PRINTF("red tape acceleration engine depth %u\n", i);
|
|
*as = AccelScheme(CharReach(), i);
|
|
return true;
|
|
}
|
|
}
|
|
|
|
// First, loop over our depths and see if we have a suitable 2-byte
|
|
// caseful vermicelli option: this is the (second) fastest accel we have
|
|
if (depth > 1) {
|
|
for (unsigned int i = 0; i < (depth - 1); i++) {
|
|
const CharReach &cra = depthReach[i];
|
|
const CharReach &crb = depthReach[i + 1];
|
|
if ((cra.count() == 1 && crb.count() == 1)
|
|
|| (cra.count() == 2 && crb.count() == 2
|
|
&& cra.isBit5Insensitive() && crb.isBit5Insensitive())) {
|
|
DEBUG_PRINTF("two-byte vermicelli, depth %u\n", i);
|
|
*as = AccelScheme(CharReach::dot(), i);
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Second option: a two-byte shufti (i.e. less than eight 2-byte
|
|
// literals)
|
|
if (depth > 1) {
|
|
for (unsigned int i = 0; i < (depth - 1); i++) {
|
|
if (depthReach[i].count()*depthReach[i+1].count() <= 8) {
|
|
DEBUG_PRINTF("two-byte shufti, depth %u\n", i);
|
|
*as = AccelScheme(CharReach::dot(), i);
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Look for one byte accel schemes verm/shufti;
|
|
vector<NFAVertex> verts(1, v);
|
|
*as = nfaFindAccel(g, verts, refined_cr, br_cyclic, allow_wide);
|
|
DEBUG_PRINTF("as width %zu\n", as->cr.count());
|
|
return as->cr.count() <= ACCEL_MAX_STOP_CHAR || allow_wide;
|
|
}
|
|
|
|
} // namespace ue2
|