mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
ng_limex_accel: compile time speedups, tidy up
This commit is contained in:
parent
40f03929be
commit
c0d7960954
@ -45,12 +45,16 @@
|
|||||||
#include "util/container.h"
|
#include "util/container.h"
|
||||||
#include "util/dump_charclass.h"
|
#include "util/dump_charclass.h"
|
||||||
#include "util/graph_range.h"
|
#include "util/graph_range.h"
|
||||||
|
#include "util/small_vector.h"
|
||||||
#include "util/target_info.h"
|
#include "util/target_info.h"
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <map>
|
#include <map>
|
||||||
|
|
||||||
|
#include <boost/range/adaptor/map.hpp>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
using boost::adaptors::map_keys;
|
||||||
|
|
||||||
namespace ue2 {
|
namespace ue2 {
|
||||||
|
|
||||||
@ -135,15 +139,15 @@ void findAccelFriends(const NGHolder &g, NFAVertex v,
|
|||||||
static
|
static
|
||||||
void findPaths(const NGHolder &g, NFAVertex v,
|
void findPaths(const NGHolder &g, NFAVertex v,
|
||||||
const vector<CharReach> &refined_cr,
|
const vector<CharReach> &refined_cr,
|
||||||
vector<vector<CharReach> > *paths,
|
vector<vector<CharReach>> *paths,
|
||||||
const flat_set<NFAVertex> &forbidden, u32 depth) {
|
const flat_set<NFAVertex> &forbidden, u32 depth) {
|
||||||
static const u32 MAGIC_TOO_WIDE_NUMBER = 16;
|
static const u32 MAGIC_TOO_WIDE_NUMBER = 16;
|
||||||
if (!depth) {
|
if (!depth) {
|
||||||
paths->push_back(vector<CharReach>());
|
paths->push_back({});
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (v == g.accept || v == g.acceptEod) {
|
if (v == g.accept || v == g.acceptEod) {
|
||||||
paths->push_back(vector<CharReach>());
|
paths->push_back({});
|
||||||
if (!generates_callbacks(g) || v == g.acceptEod) {
|
if (!generates_callbacks(g) || v == g.acceptEod) {
|
||||||
paths->back().push_back(CharReach()); /* red tape options */
|
paths->back().push_back(CharReach()); /* red tape options */
|
||||||
}
|
}
|
||||||
@ -157,42 +161,37 @@ void findPaths(const NGHolder &g, NFAVertex v,
|
|||||||
if (out_degree(v, g) >= MAGIC_TOO_WIDE_NUMBER
|
if (out_degree(v, g) >= MAGIC_TOO_WIDE_NUMBER
|
||||||
|| hasSelfLoop(v, g)) {
|
|| hasSelfLoop(v, g)) {
|
||||||
/* give up on pushing past this point */
|
/* give up on pushing past this point */
|
||||||
paths->push_back(vector<CharReach>());
|
paths->push_back({cr});
|
||||||
vector<CharReach> &p = paths->back();
|
|
||||||
p.push_back(cr);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
vector<vector<CharReach>> curr;
|
||||||
for (auto w : adjacent_vertices_range(v, g)) {
|
for (auto w : adjacent_vertices_range(v, g)) {
|
||||||
if (contains(forbidden, w)) {
|
if (contains(forbidden, w)) {
|
||||||
/* path has looped back to one of the active+boring acceleration
|
/* path has looped back to one of the active+boring acceleration
|
||||||
* states. We can ignore this path if we have sufficient back-
|
* states. We can ignore this path if we have sufficient back-
|
||||||
* off. */
|
* off. */
|
||||||
paths->push_back(vector<CharReach>());
|
paths->push_back({CharReach()});
|
||||||
paths->back().push_back(CharReach());
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 new_depth = depth - 1;
|
u32 new_depth = depth - 1;
|
||||||
vector<vector<CharReach> > curr;
|
|
||||||
do {
|
do {
|
||||||
curr.clear();
|
curr.clear();
|
||||||
findPaths(g, w, refined_cr, &curr, forbidden, new_depth);
|
findPaths(g, w, refined_cr, &curr, forbidden, new_depth);
|
||||||
} while (new_depth-- && curr.size() >= MAGIC_TOO_WIDE_NUMBER);
|
} while (new_depth-- && curr.size() >= MAGIC_TOO_WIDE_NUMBER);
|
||||||
|
|
||||||
for (vector<vector<CharReach> >::iterator it = curr.begin();
|
for (auto &c : curr) {
|
||||||
it != curr.end(); ++it) {
|
c.push_back(cr);
|
||||||
paths->push_back(vector<CharReach>());
|
paths->push_back(std::move(c));
|
||||||
vector<CharReach> &p = paths->back();
|
|
||||||
p.swap(*it);
|
|
||||||
p.push_back(cr);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
struct SAccelScheme {
|
struct SAccelScheme {
|
||||||
SAccelScheme(const CharReach &cr_in, u32 offset_in)
|
SAccelScheme(CharReach cr_in, u32 offset_in)
|
||||||
: cr(cr_in), offset(offset_in) {
|
: cr(std::move(cr_in)), offset(offset_in) {
|
||||||
assert(offset <= MAX_ACCEL_DEPTH);
|
assert(offset <= MAX_ACCEL_DEPTH);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -215,6 +214,7 @@ struct SAccelScheme {
|
|||||||
CharReach cr = CharReach::dot();
|
CharReach cr = CharReach::dot();
|
||||||
u32 offset = MAX_ACCEL_DEPTH + 1;
|
u32 offset = MAX_ACCEL_DEPTH + 1;
|
||||||
};
|
};
|
||||||
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void findBest(vector<vector<CharReach> >::const_iterator pb,
|
void findBest(vector<vector<CharReach> >::const_iterator pb,
|
||||||
@ -233,12 +233,11 @@ void findBest(vector<vector<CharReach> >::const_iterator pb,
|
|||||||
|
|
||||||
DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin());
|
DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin());
|
||||||
|
|
||||||
vector<SAccelScheme> priority_path;
|
small_vector<SAccelScheme, 10> priority_path;
|
||||||
priority_path.reserve(pb->size());
|
priority_path.reserve(pb->size());
|
||||||
u32 i = 0;
|
u32 i = 0;
|
||||||
for (vector<CharReach>::const_iterator p = pb->begin(); p != pb->end();
|
for (auto p = pb->begin(); p != pb->end(); ++p, i++) {
|
||||||
++p, i++) {
|
SAccelScheme as(*p | curr.cr, max(i, curr.offset));
|
||||||
SAccelScheme as(*p | curr.cr, MAX(i, curr.offset));
|
|
||||||
if (*best < as) {
|
if (*best < as) {
|
||||||
DEBUG_PRINTF("worse\n");
|
DEBUG_PRINTF("worse\n");
|
||||||
continue;
|
continue;
|
||||||
@ -259,13 +258,8 @@ void findBest(vector<vector<CharReach> >::const_iterator pb,
|
|||||||
}
|
}
|
||||||
DEBUG_PRINTF("---\n");
|
DEBUG_PRINTF("---\n");
|
||||||
|
|
||||||
for (vector<SAccelScheme>::const_iterator it = priority_path.begin();
|
for (const SAccelScheme &in : priority_path) {
|
||||||
it != priority_path.end(); ++it) {
|
DEBUG_PRINTF("in: count %zu\n", in.cr.count());
|
||||||
DEBUG_PRINTF("%u:|| = %zu; p remaining len %zu\n", i, it->cr.count(),
|
|
||||||
priority_path.end() - it);
|
|
||||||
|
|
||||||
SAccelScheme in = move(*it);
|
|
||||||
|
|
||||||
if (*best < in) {
|
if (*best < in) {
|
||||||
DEBUG_PRINTF("worse\n");
|
DEBUG_PRINTF("worse\n");
|
||||||
continue;
|
continue;
|
||||||
@ -278,9 +272,10 @@ void findBest(vector<vector<CharReach> >::const_iterator pb,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
struct DAccelScheme {
|
struct DAccelScheme {
|
||||||
DAccelScheme(const CharReach &cr_in, u32 offset_in)
|
DAccelScheme(CharReach cr_in, u32 offset_in)
|
||||||
: double_cr(cr_in), double_offset(offset_in) {
|
: double_cr(std::move(cr_in)), double_offset(offset_in) {
|
||||||
assert(double_offset <= MAX_ACCEL_DEPTH);
|
assert(double_offset <= MAX_ACCEL_DEPTH);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -319,6 +314,7 @@ struct DAccelScheme {
|
|||||||
CharReach double_cr;
|
CharReach double_cr;
|
||||||
u32 double_offset = 0;
|
u32 double_offset = 0;
|
||||||
};
|
};
|
||||||
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
DAccelScheme make_double_accel(DAccelScheme as, CharReach cr_1,
|
DAccelScheme make_double_accel(DAccelScheme as, CharReach cr_1,
|
||||||
@ -391,11 +387,10 @@ void findDoubleBest(vector<vector<CharReach> >::const_iterator pb,
|
|||||||
|
|
||||||
DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin());
|
DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin());
|
||||||
|
|
||||||
vector<DAccelScheme> priority_path;
|
small_vector<DAccelScheme, 10> priority_path;
|
||||||
priority_path.reserve(pb->size());
|
priority_path.reserve(pb->size());
|
||||||
u32 i = 0;
|
u32 i = 0;
|
||||||
for (vector<CharReach>::const_iterator p = pb->begin();
|
for (auto p = pb->begin(); p != pb->end() && next(p) != pb->end();
|
||||||
p != pb->end() && next(p) != pb->end();
|
|
||||||
++p, i++) {
|
++p, i++) {
|
||||||
DAccelScheme as = make_double_accel(curr, *p, *next(p), i);
|
DAccelScheme as = make_double_accel(curr, *p, *next(p), i);
|
||||||
if (*best < as) {
|
if (*best < as) {
|
||||||
@ -411,9 +406,7 @@ void findDoubleBest(vector<vector<CharReach> >::const_iterator pb,
|
|||||||
best->double_byte.size(), best->double_cr.count(),
|
best->double_byte.size(), best->double_cr.count(),
|
||||||
best->double_offset);
|
best->double_offset);
|
||||||
|
|
||||||
for (vector<DAccelScheme>::const_iterator it = priority_path.begin();
|
for (const DAccelScheme &in : priority_path) {
|
||||||
it != priority_path.end(); ++it) {
|
|
||||||
DAccelScheme in = move(*it);
|
|
||||||
DEBUG_PRINTF("in: %zu pairs, %zu singles, offset %u\n",
|
DEBUG_PRINTF("in: %zu pairs, %zu singles, offset %u\n",
|
||||||
in.double_byte.size(), in.double_cr.count(),
|
in.double_byte.size(), in.double_cr.count(),
|
||||||
in.double_offset);
|
in.double_offset);
|
||||||
@ -427,14 +420,12 @@ void findDoubleBest(vector<vector<CharReach> >::const_iterator pb,
|
|||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
static
|
static
|
||||||
void dumpPaths(const vector<vector<CharReach> > &paths) {
|
void dumpPaths(const vector<vector<CharReach>> &paths) {
|
||||||
for (vector<vector<CharReach> >::const_iterator p = paths.begin();
|
for (const auto &path : paths) {
|
||||||
p != paths.end(); ++p) {
|
|
||||||
DEBUG_PRINTF("path: [");
|
DEBUG_PRINTF("path: [");
|
||||||
for (vector<CharReach>::const_iterator it = p->begin(); it != p->end();
|
for (const auto &cr : path) {
|
||||||
++it) {
|
|
||||||
printf(" [");
|
printf(" [");
|
||||||
describeClass(stdout, *it, 20, CC_OUT_TEXT);
|
describeClass(stdout, cr, 20, CC_OUT_TEXT);
|
||||||
printf("]");
|
printf("]");
|
||||||
}
|
}
|
||||||
printf(" ]\n");
|
printf(" ]\n");
|
||||||
@ -545,7 +536,7 @@ DAccelScheme findBestDoubleAccelScheme(vector<vector<CharReach> > paths,
|
|||||||
|
|
||||||
#define MAX_EXPLORE_PATHS 40
|
#define MAX_EXPLORE_PATHS 40
|
||||||
|
|
||||||
AccelScheme findBestAccelScheme(vector<vector<CharReach> > paths,
|
AccelScheme findBestAccelScheme(vector<vector<CharReach>> paths,
|
||||||
const CharReach &terminating,
|
const CharReach &terminating,
|
||||||
bool look_for_double_byte) {
|
bool look_for_double_byte) {
|
||||||
AccelScheme rv;
|
AccelScheme rv;
|
||||||
@ -575,14 +566,13 @@ AccelScheme findBestAccelScheme(vector<vector<CharReach> > paths,
|
|||||||
/* find best is a bit lazy in terms of minimising the offset, see if we can
|
/* find best is a bit lazy in terms of minimising the offset, see if we can
|
||||||
* make it better. need to find the min max offset that we need.*/
|
* make it better. need to find the min max offset that we need.*/
|
||||||
u32 offset = 0;
|
u32 offset = 0;
|
||||||
for (vector<vector<CharReach> >::iterator p = paths.begin();
|
for (const auto &path : paths) {
|
||||||
p != paths.end(); ++p) {
|
|
||||||
u32 i = 0;
|
u32 i = 0;
|
||||||
for (vector<CharReach>::iterator it = p->begin(); it != p->end();
|
for (const auto &cr : path) {
|
||||||
++it, i++) {
|
if (cr.isSubsetOf(best.cr)) {
|
||||||
if (it->isSubsetOf(best.cr)) {
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
i++;
|
||||||
}
|
}
|
||||||
offset = MAX(offset, i);
|
offset = MAX(offset, i);
|
||||||
}
|
}
|
||||||
@ -620,17 +610,15 @@ AccelScheme nfaFindAccel(const NGHolder &g, const vector<NFAVertex> &verts,
|
|||||||
return AccelScheme(); /* invalid scheme */
|
return AccelScheme(); /* invalid scheme */
|
||||||
}
|
}
|
||||||
|
|
||||||
vector<vector<CharReach> > paths;
|
vector<vector<CharReach>> paths;
|
||||||
flat_set<NFAVertex> ignore_vert_set(verts.begin(), verts.end());
|
flat_set<NFAVertex> ignore_vert_set(verts.begin(), verts.end());
|
||||||
|
|
||||||
/* Note: we can not in general (TODO: ignore when possible) ignore entries
|
/* Note: we can not in general (TODO: ignore when possible) ignore entries
|
||||||
* into the bounded repeat cyclic states as that is when the magic happens
|
* into the bounded repeat cyclic states as that is when the magic happens
|
||||||
*/
|
*/
|
||||||
for (map<NFAVertex, BoundedRepeatSummary>::const_iterator it
|
for (auto v : br_cyclic | map_keys) {
|
||||||
= br_cyclic.begin();
|
|
||||||
it != br_cyclic.end(); ++it) {
|
|
||||||
/* TODO: can allow if repeatMin <= 1 ? */
|
/* TODO: can allow if repeatMin <= 1 ? */
|
||||||
ignore_vert_set.erase(it->first);
|
ignore_vert_set.erase(v);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (auto v : verts) {
|
for (auto v : verts) {
|
||||||
@ -643,9 +631,8 @@ AccelScheme nfaFindAccel(const NGHolder &g, const vector<NFAVertex> &verts,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* paths built wrong: reverse them */
|
/* paths built wrong: reverse them */
|
||||||
for (vector<vector<CharReach> >::iterator it = paths.begin();
|
for (auto &path : paths) {
|
||||||
it != paths.end(); ++it) {
|
reverse(path.begin(), path.end());
|
||||||
reverse(it->begin(), it->end());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return findBestAccelScheme(std::move(paths), terminating,
|
return findBestAccelScheme(std::move(paths), terminating,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user