ng_limex_accel: compile time speedups, tidy up

This commit is contained in:
Justin Viiret 2017-04-06 13:47:30 +10:00 committed by Matthew Barr
parent 40f03929be
commit c0d7960954

View File

@ -45,12 +45,16 @@
#include "util/container.h" #include "util/container.h"
#include "util/dump_charclass.h" #include "util/dump_charclass.h"
#include "util/graph_range.h" #include "util/graph_range.h"
#include "util/small_vector.h"
#include "util/target_info.h" #include "util/target_info.h"
#include <algorithm> #include <algorithm>
#include <map> #include <map>
#include <boost/range/adaptor/map.hpp>
using namespace std; using namespace std;
using boost::adaptors::map_keys;
namespace ue2 { namespace ue2 {
@ -135,15 +139,15 @@ void findAccelFriends(const NGHolder &g, NFAVertex v,
static static
void findPaths(const NGHolder &g, NFAVertex v, void findPaths(const NGHolder &g, NFAVertex v,
const vector<CharReach> &refined_cr, const vector<CharReach> &refined_cr,
vector<vector<CharReach> > *paths, vector<vector<CharReach>> *paths,
const flat_set<NFAVertex> &forbidden, u32 depth) { const flat_set<NFAVertex> &forbidden, u32 depth) {
static const u32 MAGIC_TOO_WIDE_NUMBER = 16; static const u32 MAGIC_TOO_WIDE_NUMBER = 16;
if (!depth) { if (!depth) {
paths->push_back(vector<CharReach>()); paths->push_back({});
return; return;
} }
if (v == g.accept || v == g.acceptEod) { if (v == g.accept || v == g.acceptEod) {
paths->push_back(vector<CharReach>()); paths->push_back({});
if (!generates_callbacks(g) || v == g.acceptEod) { if (!generates_callbacks(g) || v == g.acceptEod) {
paths->back().push_back(CharReach()); /* red tape options */ paths->back().push_back(CharReach()); /* red tape options */
} }
@ -157,42 +161,37 @@ void findPaths(const NGHolder &g, NFAVertex v,
if (out_degree(v, g) >= MAGIC_TOO_WIDE_NUMBER if (out_degree(v, g) >= MAGIC_TOO_WIDE_NUMBER
|| hasSelfLoop(v, g)) { || hasSelfLoop(v, g)) {
/* give up on pushing past this point */ /* give up on pushing past this point */
paths->push_back(vector<CharReach>()); paths->push_back({cr});
vector<CharReach> &p = paths->back();
p.push_back(cr);
return; return;
} }
vector<vector<CharReach>> curr;
for (auto w : adjacent_vertices_range(v, g)) { for (auto w : adjacent_vertices_range(v, g)) {
if (contains(forbidden, w)) { if (contains(forbidden, w)) {
/* path has looped back to one of the active+boring acceleration /* path has looped back to one of the active+boring acceleration
* states. We can ignore this path if we have sufficient back- * states. We can ignore this path if we have sufficient back-
* off. */ * off. */
paths->push_back(vector<CharReach>()); paths->push_back({CharReach()});
paths->back().push_back(CharReach());
continue; continue;
} }
u32 new_depth = depth - 1; u32 new_depth = depth - 1;
vector<vector<CharReach> > curr;
do { do {
curr.clear(); curr.clear();
findPaths(g, w, refined_cr, &curr, forbidden, new_depth); findPaths(g, w, refined_cr, &curr, forbidden, new_depth);
} while (new_depth-- && curr.size() >= MAGIC_TOO_WIDE_NUMBER); } while (new_depth-- && curr.size() >= MAGIC_TOO_WIDE_NUMBER);
for (vector<vector<CharReach> >::iterator it = curr.begin(); for (auto &c : curr) {
it != curr.end(); ++it) { c.push_back(cr);
paths->push_back(vector<CharReach>()); paths->push_back(std::move(c));
vector<CharReach> &p = paths->back();
p.swap(*it);
p.push_back(cr);
} }
} }
} }
namespace {
struct SAccelScheme { struct SAccelScheme {
SAccelScheme(const CharReach &cr_in, u32 offset_in) SAccelScheme(CharReach cr_in, u32 offset_in)
: cr(cr_in), offset(offset_in) { : cr(std::move(cr_in)), offset(offset_in) {
assert(offset <= MAX_ACCEL_DEPTH); assert(offset <= MAX_ACCEL_DEPTH);
} }
@ -215,6 +214,7 @@ struct SAccelScheme {
CharReach cr = CharReach::dot(); CharReach cr = CharReach::dot();
u32 offset = MAX_ACCEL_DEPTH + 1; u32 offset = MAX_ACCEL_DEPTH + 1;
}; };
}
static static
void findBest(vector<vector<CharReach> >::const_iterator pb, void findBest(vector<vector<CharReach> >::const_iterator pb,
@ -233,12 +233,11 @@ void findBest(vector<vector<CharReach> >::const_iterator pb,
DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin()); DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin());
vector<SAccelScheme> priority_path; small_vector<SAccelScheme, 10> priority_path;
priority_path.reserve(pb->size()); priority_path.reserve(pb->size());
u32 i = 0; u32 i = 0;
for (vector<CharReach>::const_iterator p = pb->begin(); p != pb->end(); for (auto p = pb->begin(); p != pb->end(); ++p, i++) {
++p, i++) { SAccelScheme as(*p | curr.cr, max(i, curr.offset));
SAccelScheme as(*p | curr.cr, MAX(i, curr.offset));
if (*best < as) { if (*best < as) {
DEBUG_PRINTF("worse\n"); DEBUG_PRINTF("worse\n");
continue; continue;
@ -259,13 +258,8 @@ void findBest(vector<vector<CharReach> >::const_iterator pb,
} }
DEBUG_PRINTF("---\n"); DEBUG_PRINTF("---\n");
for (vector<SAccelScheme>::const_iterator it = priority_path.begin(); for (const SAccelScheme &in : priority_path) {
it != priority_path.end(); ++it) { DEBUG_PRINTF("in: count %zu\n", in.cr.count());
DEBUG_PRINTF("%u:|| = %zu; p remaining len %zu\n", i, it->cr.count(),
priority_path.end() - it);
SAccelScheme in = move(*it);
if (*best < in) { if (*best < in) {
DEBUG_PRINTF("worse\n"); DEBUG_PRINTF("worse\n");
continue; continue;
@ -278,9 +272,10 @@ void findBest(vector<vector<CharReach> >::const_iterator pb,
} }
} }
namespace {
struct DAccelScheme { struct DAccelScheme {
DAccelScheme(const CharReach &cr_in, u32 offset_in) DAccelScheme(CharReach cr_in, u32 offset_in)
: double_cr(cr_in), double_offset(offset_in) { : double_cr(std::move(cr_in)), double_offset(offset_in) {
assert(double_offset <= MAX_ACCEL_DEPTH); assert(double_offset <= MAX_ACCEL_DEPTH);
} }
@ -319,6 +314,7 @@ struct DAccelScheme {
CharReach double_cr; CharReach double_cr;
u32 double_offset = 0; u32 double_offset = 0;
}; };
}
static static
DAccelScheme make_double_accel(DAccelScheme as, CharReach cr_1, DAccelScheme make_double_accel(DAccelScheme as, CharReach cr_1,
@ -391,11 +387,10 @@ void findDoubleBest(vector<vector<CharReach> >::const_iterator pb,
DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin()); DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin());
vector<DAccelScheme> priority_path; small_vector<DAccelScheme, 10> priority_path;
priority_path.reserve(pb->size()); priority_path.reserve(pb->size());
u32 i = 0; u32 i = 0;
for (vector<CharReach>::const_iterator p = pb->begin(); for (auto p = pb->begin(); p != pb->end() && next(p) != pb->end();
p != pb->end() && next(p) != pb->end();
++p, i++) { ++p, i++) {
DAccelScheme as = make_double_accel(curr, *p, *next(p), i); DAccelScheme as = make_double_accel(curr, *p, *next(p), i);
if (*best < as) { if (*best < as) {
@ -411,9 +406,7 @@ void findDoubleBest(vector<vector<CharReach> >::const_iterator pb,
best->double_byte.size(), best->double_cr.count(), best->double_byte.size(), best->double_cr.count(),
best->double_offset); best->double_offset);
for (vector<DAccelScheme>::const_iterator it = priority_path.begin(); for (const DAccelScheme &in : priority_path) {
it != priority_path.end(); ++it) {
DAccelScheme in = move(*it);
DEBUG_PRINTF("in: %zu pairs, %zu singles, offset %u\n", DEBUG_PRINTF("in: %zu pairs, %zu singles, offset %u\n",
in.double_byte.size(), in.double_cr.count(), in.double_byte.size(), in.double_cr.count(),
in.double_offset); in.double_offset);
@ -427,14 +420,12 @@ void findDoubleBest(vector<vector<CharReach> >::const_iterator pb,
#ifdef DEBUG #ifdef DEBUG
static static
void dumpPaths(const vector<vector<CharReach> > &paths) { void dumpPaths(const vector<vector<CharReach>> &paths) {
for (vector<vector<CharReach> >::const_iterator p = paths.begin(); for (const auto &path : paths) {
p != paths.end(); ++p) {
DEBUG_PRINTF("path: ["); DEBUG_PRINTF("path: [");
for (vector<CharReach>::const_iterator it = p->begin(); it != p->end(); for (const auto &cr : path) {
++it) {
printf(" ["); printf(" [");
describeClass(stdout, *it, 20, CC_OUT_TEXT); describeClass(stdout, cr, 20, CC_OUT_TEXT);
printf("]"); printf("]");
} }
printf(" ]\n"); printf(" ]\n");
@ -545,7 +536,7 @@ DAccelScheme findBestDoubleAccelScheme(vector<vector<CharReach> > paths,
#define MAX_EXPLORE_PATHS 40 #define MAX_EXPLORE_PATHS 40
AccelScheme findBestAccelScheme(vector<vector<CharReach> > paths, AccelScheme findBestAccelScheme(vector<vector<CharReach>> paths,
const CharReach &terminating, const CharReach &terminating,
bool look_for_double_byte) { bool look_for_double_byte) {
AccelScheme rv; AccelScheme rv;
@ -575,14 +566,13 @@ AccelScheme findBestAccelScheme(vector<vector<CharReach> > paths,
/* find best is a bit lazy in terms of minimising the offset, see if we can /* find best is a bit lazy in terms of minimising the offset, see if we can
* make it better. need to find the min max offset that we need.*/ * make it better. need to find the min max offset that we need.*/
u32 offset = 0; u32 offset = 0;
for (vector<vector<CharReach> >::iterator p = paths.begin(); for (const auto &path : paths) {
p != paths.end(); ++p) {
u32 i = 0; u32 i = 0;
for (vector<CharReach>::iterator it = p->begin(); it != p->end(); for (const auto &cr : path) {
++it, i++) { if (cr.isSubsetOf(best.cr)) {
if (it->isSubsetOf(best.cr)) {
break; break;
} }
i++;
} }
offset = MAX(offset, i); offset = MAX(offset, i);
} }
@ -620,17 +610,15 @@ AccelScheme nfaFindAccel(const NGHolder &g, const vector<NFAVertex> &verts,
return AccelScheme(); /* invalid scheme */ return AccelScheme(); /* invalid scheme */
} }
vector<vector<CharReach> > paths; vector<vector<CharReach>> paths;
flat_set<NFAVertex> ignore_vert_set(verts.begin(), verts.end()); flat_set<NFAVertex> ignore_vert_set(verts.begin(), verts.end());
/* Note: we can not in general (TODO: ignore when possible) ignore entries /* Note: we can not in general (TODO: ignore when possible) ignore entries
* into the bounded repeat cyclic states as that is when the magic happens * into the bounded repeat cyclic states as that is when the magic happens
*/ */
for (map<NFAVertex, BoundedRepeatSummary>::const_iterator it for (auto v : br_cyclic | map_keys) {
= br_cyclic.begin();
it != br_cyclic.end(); ++it) {
/* TODO: can allow if repeatMin <= 1 ? */ /* TODO: can allow if repeatMin <= 1 ? */
ignore_vert_set.erase(it->first); ignore_vert_set.erase(v);
} }
for (auto v : verts) { for (auto v : verts) {
@ -643,9 +631,8 @@ AccelScheme nfaFindAccel(const NGHolder &g, const vector<NFAVertex> &verts,
} }
/* paths built wrong: reverse them */ /* paths built wrong: reverse them */
for (vector<vector<CharReach> >::iterator it = paths.begin(); for (auto &path : paths) {
it != paths.end(); ++it) { reverse(path.begin(), path.end());
reverse(it->begin(), it->end());
} }
return findBestAccelScheme(std::move(paths), terminating, return findBestAccelScheme(std::move(paths), terminating,