mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
more efficent accel path discovery
This commit is contained in:
parent
ff82ea6d6e
commit
850636dbd6
@ -190,75 +190,140 @@ void findPaths(const NGHolder &g, NFAVertex v,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
struct SAccelScheme {
|
||||||
AccelScheme merge(AccelScheme a, const AccelScheme &b) {
|
SAccelScheme(const CharReach &cr_in, u32 offset_in)
|
||||||
a.cr |= b.cr;
|
: cr(cr_in), offset(offset_in) {
|
||||||
ENSURE_AT_LEAST(&a.offset, b.offset);
|
assert(offset <= MAX_ACCEL_DEPTH);
|
||||||
a.double_cr |= b.double_cr;
|
|
||||||
insert(&a.double_byte, b.double_byte);
|
|
||||||
ENSURE_AT_LEAST(&a.double_offset, b.double_offset);
|
|
||||||
return a;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SAccelScheme() {}
|
||||||
|
|
||||||
|
bool operator<(const SAccelScheme &b) const {
|
||||||
|
const SAccelScheme &a = *this;
|
||||||
|
|
||||||
|
const size_t a_count = cr.count(), b_count = b.cr.count();
|
||||||
|
if (a_count != b_count) {
|
||||||
|
return a_count < b_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* TODO: give bonus if one is a 'caseless' character */
|
||||||
|
ORDER_CHECK(offset);
|
||||||
|
ORDER_CHECK(cr);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
CharReach cr = CharReach::dot();
|
||||||
|
u32 offset = MAX_ACCEL_DEPTH + 1;
|
||||||
|
};
|
||||||
|
|
||||||
static
|
static
|
||||||
void findBest(vector<vector<CharReach> >::const_iterator pb,
|
void findBest(vector<vector<CharReach> >::const_iterator pb,
|
||||||
vector<vector<CharReach> >::const_iterator pe,
|
vector<vector<CharReach> >::const_iterator pe,
|
||||||
const AccelScheme &curr, AccelScheme *best) {
|
const SAccelScheme &curr, SAccelScheme *best) {
|
||||||
assert(curr.offset <= MAX_ACCEL_DEPTH);
|
assert(curr.offset <= MAX_ACCEL_DEPTH);
|
||||||
DEBUG_PRINTF("paths left %zu\n", pe - pb);
|
DEBUG_PRINTF("paths left %zu\n", pe - pb);
|
||||||
if (pb == pe) {
|
if (pb == pe) {
|
||||||
|
if (curr < *best) {
|
||||||
|
DEBUG_PRINTF("new best\n");
|
||||||
|
*best = curr;
|
||||||
|
}
|
||||||
*best = curr;
|
*best = curr;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin());
|
DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin());
|
||||||
|
|
||||||
vector<AccelScheme> priority_path;
|
vector<SAccelScheme> priority_path;
|
||||||
|
priority_path.reserve(pb->size());
|
||||||
u32 i = 0;
|
u32 i = 0;
|
||||||
for (vector<CharReach>::const_iterator p = pb->begin(); p != pb->end();
|
for (vector<CharReach>::const_iterator p = pb->begin(); p != pb->end();
|
||||||
++p, i++) {
|
++p, i++) {
|
||||||
priority_path.push_back(AccelScheme(*p & ~curr.cr, i));
|
SAccelScheme as(*p | curr.cr, MAX(i, curr.offset));
|
||||||
|
if (*best < as) {
|
||||||
|
DEBUG_PRINTF("worse\n");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
priority_path.push_back(move(as));
|
||||||
}
|
}
|
||||||
|
|
||||||
sort(priority_path.begin(), priority_path.end());
|
sort(priority_path.begin(), priority_path.end());
|
||||||
for (vector<AccelScheme>::iterator it = priority_path.begin();
|
for (auto it = priority_path.begin(); it != priority_path.end(); ++it) {
|
||||||
it != priority_path.end(); ++it) {
|
auto jt = next(it);
|
||||||
vector<AccelScheme>::iterator jt = it + 1;
|
|
||||||
for (; jt != priority_path.end(); ++jt) {
|
for (; jt != priority_path.end(); ++jt) {
|
||||||
if (!it->cr.isSubsetOf(jt->cr)) {
|
if (!it->cr.isSubsetOf(jt->cr)) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
priority_path.erase(it + 1, jt);
|
priority_path.erase(next(it), jt);
|
||||||
DEBUG_PRINTF("||%zu\n", it->cr.count());
|
DEBUG_PRINTF("||%zu\n", it->cr.count());
|
||||||
}
|
}
|
||||||
DEBUG_PRINTF("---\n");
|
DEBUG_PRINTF("---\n");
|
||||||
|
|
||||||
for (vector<AccelScheme>::const_iterator it = priority_path.begin();
|
for (vector<SAccelScheme>::const_iterator it = priority_path.begin();
|
||||||
it != priority_path.end(); ++it) {
|
it != priority_path.end(); ++it) {
|
||||||
DEBUG_PRINTF("%u:|| = %zu; p remaining len %zu\n", i, it->cr.count(),
|
DEBUG_PRINTF("%u:|| = %zu; p remaining len %zu\n", i, it->cr.count(),
|
||||||
priority_path.end() - it);
|
priority_path.end() - it);
|
||||||
|
|
||||||
AccelScheme in = merge(curr, *it);
|
SAccelScheme in = move(*it);
|
||||||
|
|
||||||
if (in > *best) {
|
if (*best < in) {
|
||||||
DEBUG_PRINTF("worse\n");
|
DEBUG_PRINTF("worse\n");
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
AccelScheme temp = *best;
|
findBest(pb + 1, pe, in, best);
|
||||||
findBest(pb + 1, pe, in, &temp);
|
|
||||||
if (temp < *best) {
|
|
||||||
DEBUG_PRINTF("new best\n");
|
|
||||||
*best = temp;
|
|
||||||
if (curr.cr == best->cr) {
|
if (curr.cr == best->cr) {
|
||||||
return; /* could only get better by offset */
|
return; /* could only get better by offset */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct DAccelScheme {
|
||||||
|
DAccelScheme(const CharReach &cr_in, u32 offset_in)
|
||||||
|
: double_cr(cr_in), double_offset(offset_in) {
|
||||||
|
assert(double_offset <= MAX_ACCEL_DEPTH);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DAccelScheme() {}
|
||||||
|
|
||||||
|
bool operator<(const DAccelScheme &b) const {
|
||||||
|
const DAccelScheme &a = *this;
|
||||||
|
|
||||||
|
size_t a_dcount = a.double_cr.count();
|
||||||
|
size_t b_dcount = b.double_cr.count();
|
||||||
|
|
||||||
|
assert(!a.double_byte.empty() || a_dcount || a.double_offset);
|
||||||
|
assert(!b.double_byte.empty() || b_dcount || b.double_offset);
|
||||||
|
|
||||||
|
if (a_dcount != b_dcount) {
|
||||||
|
return a_dcount < b_dcount;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!a_dcount) {
|
||||||
|
bool cd_a = buildDvermMask(a.double_byte);
|
||||||
|
bool cd_b = buildDvermMask(b.double_byte);
|
||||||
|
if (cd_a != cd_b) {
|
||||||
|
return cd_a > cd_b;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ORDER_CHECK(double_byte.size());
|
||||||
|
ORDER_CHECK(double_offset);
|
||||||
|
|
||||||
|
/* TODO: give bonus if one is a 'caseless' character */
|
||||||
|
ORDER_CHECK(double_byte);
|
||||||
|
ORDER_CHECK(double_cr);
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
ue2::flat_set<std::pair<u8, u8> > double_byte;
|
||||||
|
CharReach double_cr;
|
||||||
|
u32 double_offset = 0;
|
||||||
|
};
|
||||||
|
|
||||||
static
|
static
|
||||||
AccelScheme make_double_accel(AccelScheme as, CharReach cr_1,
|
DAccelScheme make_double_accel(DAccelScheme as, CharReach cr_1,
|
||||||
const CharReach &cr_2_in, u32 offset_in) {
|
const CharReach &cr_2_in, u32 offset_in) {
|
||||||
cr_1 &= ~as.double_cr;
|
cr_1 &= ~as.double_cr;
|
||||||
CharReach cr_2 = cr_2_in & ~as.double_cr;
|
CharReach cr_2 = cr_2_in & ~as.double_cr;
|
||||||
@ -266,7 +331,7 @@ AccelScheme make_double_accel(AccelScheme as, CharReach cr_1,
|
|||||||
|
|
||||||
if (cr_1.none()) {
|
if (cr_1.none()) {
|
||||||
DEBUG_PRINTF("empty first element\n");
|
DEBUG_PRINTF("empty first element\n");
|
||||||
as.double_offset = offset;
|
ENSURE_AT_LEAST(&as.double_offset, offset);
|
||||||
return as;
|
return as;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -280,7 +345,7 @@ AccelScheme make_double_accel(AccelScheme as, CharReach cr_1,
|
|||||||
|
|
||||||
if (!two_count) {
|
if (!two_count) {
|
||||||
DEBUG_PRINTF("empty element\n");
|
DEBUG_PRINTF("empty element\n");
|
||||||
as.double_offset = offset;
|
ENSURE_AT_LEAST(&as.double_offset, offset);
|
||||||
return as;
|
return as;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -296,63 +361,69 @@ AccelScheme make_double_accel(AccelScheme as, CharReach cr_1,
|
|||||||
i = cr_1.find_next(i)) {
|
i = cr_1.find_next(i)) {
|
||||||
for (auto j = cr_2.find_first(); j != CharReach::npos;
|
for (auto j = cr_2.find_first(); j != CharReach::npos;
|
||||||
j = cr_2.find_next(j)) {
|
j = cr_2.find_next(j)) {
|
||||||
as.double_byte.insert(make_pair(i, j));
|
as.double_byte.emplace(i, j);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
as.double_offset = offset;
|
ENSURE_AT_LEAST(&as.double_offset, offset);
|
||||||
DEBUG_PRINTF("construct da %zu pairs, %zu singles, offset %u\n",
|
DEBUG_PRINTF("construct da %zu pairs, %zu singles, offset %u\n",
|
||||||
as.double_byte.size(), as.double_cr.count(), as.offset);
|
as.double_byte.size(), as.double_cr.count(), as.double_offset);
|
||||||
return as;
|
return as;
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void findDoubleBest(vector<vector<CharReach> >::const_iterator pb,
|
void findDoubleBest(vector<vector<CharReach> >::const_iterator pb,
|
||||||
vector<vector<CharReach> >::const_iterator pe,
|
vector<vector<CharReach> >::const_iterator pe,
|
||||||
const AccelScheme &curr, AccelScheme *best) {
|
const DAccelScheme &curr, DAccelScheme *best) {
|
||||||
assert(curr.offset <= MAX_ACCEL_DEPTH);
|
assert(curr.double_offset <= MAX_ACCEL_DEPTH);
|
||||||
DEBUG_PRINTF("paths left %zu\n", pe - pb);
|
DEBUG_PRINTF("paths left %zu\n", pe - pb);
|
||||||
|
DEBUG_PRINTF("current base: %zu pairs, %zu singles, offset %u\n",
|
||||||
|
curr.double_byte.size(), curr.double_cr.count(),
|
||||||
|
curr.double_offset);
|
||||||
if (pb == pe) {
|
if (pb == pe) {
|
||||||
|
if (curr < *best) {
|
||||||
*best = curr;
|
*best = curr;
|
||||||
|
DEBUG_PRINTF("new best: %zu pairs, %zu singles, offset %u\n",
|
||||||
|
best->double_byte.size(), best->double_cr.count(),
|
||||||
|
best->double_offset);
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin());
|
DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin());
|
||||||
|
|
||||||
vector<AccelScheme> priority_path;
|
vector<DAccelScheme> priority_path;
|
||||||
|
priority_path.reserve(pb->size());
|
||||||
u32 i = 0;
|
u32 i = 0;
|
||||||
for (vector<CharReach>::const_iterator p = pb->begin();
|
for (vector<CharReach>::const_iterator p = pb->begin();
|
||||||
p != pb->end() && next(p) != pb->end();
|
p != pb->end() && next(p) != pb->end();
|
||||||
++p, i++) {
|
++p, i++) {
|
||||||
priority_path.push_back(make_double_accel(curr, *p, *next(p), i));
|
DAccelScheme as = make_double_accel(curr, *p, *next(p), i);
|
||||||
}
|
if (*best < as) {
|
||||||
|
|
||||||
sort(priority_path.begin(), priority_path.end());
|
|
||||||
|
|
||||||
DEBUG_PRINTF("input best: %zu pairs, %zu singles, offset %u\n",
|
|
||||||
best->double_byte.size(), best->double_cr.count(),
|
|
||||||
best->offset);
|
|
||||||
|
|
||||||
for (vector<AccelScheme>::const_iterator it = priority_path.begin();
|
|
||||||
it != priority_path.end(); ++it) {
|
|
||||||
|
|
||||||
AccelScheme in = merge(curr, *it);
|
|
||||||
DEBUG_PRINTF("in: %zu pairs, %zu singles, offset %u\n",
|
|
||||||
in.double_byte.size(), in.double_cr.count(), in.offset);
|
|
||||||
|
|
||||||
if (in > *best) {
|
|
||||||
DEBUG_PRINTF("worse\n");
|
DEBUG_PRINTF("worse\n");
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
AccelScheme temp = *best;
|
priority_path.push_back(move(as));
|
||||||
findDoubleBest(pb + 1, pe, in, &temp);
|
|
||||||
if (temp < *best) {
|
|
||||||
*best = temp;
|
|
||||||
DEBUG_PRINTF("new best: %zu pairs, %zu singles, offset %u\n",
|
|
||||||
best->double_byte.size(), best->double_cr.count(),
|
|
||||||
best->offset);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sort(priority_path.begin(), priority_path.end());
|
||||||
|
DEBUG_PRINTF("%zu candidates for this path\n", priority_path.size());
|
||||||
|
DEBUG_PRINTF("input best: %zu pairs, %zu singles, offset %u\n",
|
||||||
|
best->double_byte.size(), best->double_cr.count(),
|
||||||
|
best->double_offset);
|
||||||
|
|
||||||
|
for (vector<DAccelScheme>::const_iterator it = priority_path.begin();
|
||||||
|
it != priority_path.end(); ++it) {
|
||||||
|
DAccelScheme in = move(*it);
|
||||||
|
DEBUG_PRINTF("in: %zu pairs, %zu singles, offset %u\n",
|
||||||
|
in.double_byte.size(), in.double_cr.count(),
|
||||||
|
in.double_offset);
|
||||||
|
if (*best < in) {
|
||||||
|
DEBUG_PRINTF("worse\n");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
findDoubleBest(pb + 1, pe, in, best);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -439,20 +510,23 @@ void improvePaths(vector<vector<CharReach> > &paths) {
|
|||||||
#define MAX_DOUBLE_ACCEL_PATHS 10
|
#define MAX_DOUBLE_ACCEL_PATHS 10
|
||||||
|
|
||||||
static
|
static
|
||||||
AccelScheme findBestDoubleAccelScheme(vector<vector<CharReach> > paths,
|
DAccelScheme findBestDoubleAccelScheme(vector<vector<CharReach> > paths,
|
||||||
const CharReach &terminating) {
|
const CharReach &terminating) {
|
||||||
DEBUG_PRINTF("looking for double accel, %zu terminating symbols\n",
|
DEBUG_PRINTF("looking for double accel, %zu terminating symbols\n",
|
||||||
terminating.count());
|
terminating.count());
|
||||||
unifyPathsLastSegment(paths);
|
unifyPathsLastSegment(paths);
|
||||||
AccelScheme curr;
|
|
||||||
curr.double_cr = terminating;
|
#ifdef DEBUG
|
||||||
curr.offset = 0;
|
DEBUG_PRINTF("paths:\n");
|
||||||
|
dumpPaths(paths);
|
||||||
|
#endif
|
||||||
|
|
||||||
/* if there are too many paths, shorten the paths to reduce the number of
|
/* if there are too many paths, shorten the paths to reduce the number of
|
||||||
* distinct paths we have to consider */
|
* distinct paths we have to consider */
|
||||||
while (paths.size() > MAX_DOUBLE_ACCEL_PATHS) {
|
while (paths.size() > MAX_DOUBLE_ACCEL_PATHS) {
|
||||||
for (auto &p : paths) {
|
for (auto &p : paths) {
|
||||||
if (p.empty()) {
|
if (p.empty()) {
|
||||||
return curr;
|
return DAccelScheme(terminating, 0U);
|
||||||
}
|
}
|
||||||
p.pop_back();
|
p.pop_back();
|
||||||
}
|
}
|
||||||
@ -460,39 +534,44 @@ AccelScheme findBestDoubleAccelScheme(vector<vector<CharReach> > paths,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (paths.empty()) {
|
if (paths.empty()) {
|
||||||
return curr;
|
return DAccelScheme(terminating, 0U);
|
||||||
}
|
}
|
||||||
|
|
||||||
AccelScheme best;
|
DAccelScheme curr(terminating, 0U);
|
||||||
best.double_cr = CharReach::dot();
|
DAccelScheme best(CharReach::dot(), 0U);
|
||||||
findDoubleBest(paths.begin(), paths.end(), curr, &best);
|
findDoubleBest(paths.begin(), paths.end(), curr, &best);
|
||||||
curr = best;
|
DEBUG_PRINTF("da %zu pairs, %zu singles\n", best.double_byte.size(),
|
||||||
DEBUG_PRINTF("da %zu pairs, %zu singles\n", curr.double_byte.size(),
|
best.double_cr.count());
|
||||||
curr.double_cr.count());
|
return best;
|
||||||
return curr;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define MAX_EXPLORE_PATHS 40
|
||||||
|
|
||||||
AccelScheme findBestAccelScheme(vector<vector<CharReach> > paths,
|
AccelScheme findBestAccelScheme(vector<vector<CharReach> > paths,
|
||||||
const CharReach &terminating,
|
const CharReach &terminating,
|
||||||
bool look_for_double_byte) {
|
bool look_for_double_byte) {
|
||||||
AccelScheme da;
|
AccelScheme rv;
|
||||||
|
|
||||||
if (look_for_double_byte) {
|
if (look_for_double_byte) {
|
||||||
da = findBestDoubleAccelScheme(paths, terminating);
|
DAccelScheme da = findBestDoubleAccelScheme(paths, terminating);
|
||||||
|
if (da.double_byte.size() <= DOUBLE_SHUFTI_LIMIT) {
|
||||||
|
rv.double_byte = move(da.double_byte);
|
||||||
|
rv.double_cr = move(da.double_cr);
|
||||||
|
rv.double_offset = da.double_offset;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
improvePaths(paths);
|
improvePaths(paths);
|
||||||
|
|
||||||
DEBUG_PRINTF("we have %zu paths\n", paths.size());
|
DEBUG_PRINTF("we have %zu paths\n", paths.size());
|
||||||
if (paths.size() > 40) {
|
if (paths.size() > MAX_EXPLORE_PATHS) {
|
||||||
return da; /* too many paths to explore */
|
return rv; /* too many paths to explore */
|
||||||
}
|
}
|
||||||
|
|
||||||
/* if we were smart we would do something netflowy on the paths to find the
|
/* if we were smart we would do something netflowy on the paths to find the
|
||||||
* best cut. But we aren't, so we will just brute force it.
|
* best cut. But we aren't, so we will just brute force it.
|
||||||
*/
|
*/
|
||||||
AccelScheme curr(terminating, 0U);
|
SAccelScheme curr(terminating, 0U);
|
||||||
AccelScheme best;
|
SAccelScheme best;
|
||||||
findBest(paths.begin(), paths.end(), curr, &best);
|
findBest(paths.begin(), paths.end(), curr, &best);
|
||||||
|
|
||||||
/* find best is a bit lazy in terms of minimising the offset, see if we can
|
/* find best is a bit lazy in terms of minimising the offset, see if we can
|
||||||
@ -512,15 +591,13 @@ AccelScheme findBestAccelScheme(vector<vector<CharReach> > paths,
|
|||||||
assert(offset <= best.offset);
|
assert(offset <= best.offset);
|
||||||
best.offset = offset;
|
best.offset = offset;
|
||||||
|
|
||||||
/* merge best single and best double */
|
rv.offset = best.offset;
|
||||||
if (!da.double_byte.empty() && da.double_byte.size() <= DOUBLE_SHUFTI_LIMIT
|
rv.cr = best.cr;
|
||||||
&& da.double_cr.count() < best.cr.count()) {
|
if (rv.cr.count() < rv.double_cr.count()) {
|
||||||
best.double_byte = da.double_byte;
|
rv.double_byte.clear();
|
||||||
best.double_cr = da.double_cr;
|
|
||||||
best.double_offset = da.double_offset;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return best;
|
return rv;
|
||||||
}
|
}
|
||||||
|
|
||||||
AccelScheme nfaFindAccel(const NGHolder &g, const vector<NFAVertex> &verts,
|
AccelScheme nfaFindAccel(const NGHolder &g, const vector<NFAVertex> &verts,
|
||||||
@ -832,7 +909,9 @@ depth_done:
|
|||||||
for (unsigned int i = 0; i < depth; i++) {
|
for (unsigned int i = 0; i < depth; i++) {
|
||||||
if (depthReach[i].none()) {
|
if (depthReach[i].none()) {
|
||||||
DEBUG_PRINTF("red tape acceleration engine depth %u\n", i);
|
DEBUG_PRINTF("red tape acceleration engine depth %u\n", i);
|
||||||
*as = AccelScheme(CharReach(), i);
|
*as = AccelScheme();
|
||||||
|
as->offset = i;
|
||||||
|
as->cr = CharReach();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -847,7 +926,8 @@ depth_done:
|
|||||||
|| (cra.count() == 2 && crb.count() == 2
|
|| (cra.count() == 2 && crb.count() == 2
|
||||||
&& cra.isBit5Insensitive() && crb.isBit5Insensitive())) {
|
&& cra.isBit5Insensitive() && crb.isBit5Insensitive())) {
|
||||||
DEBUG_PRINTF("two-byte vermicelli, depth %u\n", i);
|
DEBUG_PRINTF("two-byte vermicelli, depth %u\n", i);
|
||||||
*as = AccelScheme(CharReach::dot(), i);
|
*as = AccelScheme();
|
||||||
|
as->offset = i;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -860,7 +940,8 @@ depth_done:
|
|||||||
if (depthReach[i].count() * depthReach[i+1].count()
|
if (depthReach[i].count() * depthReach[i+1].count()
|
||||||
<= DOUBLE_SHUFTI_LIMIT) {
|
<= DOUBLE_SHUFTI_LIMIT) {
|
||||||
DEBUG_PRINTF("two-byte shufti, depth %u\n", i);
|
DEBUG_PRINTF("two-byte shufti, depth %u\n", i);
|
||||||
*as = AccelScheme(CharReach::dot(), i);
|
*as = AccelScheme();
|
||||||
|
as->offset = i;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -66,71 +66,10 @@ void findAccelFriends(const NGHolder &g, NFAVertex v,
|
|||||||
#define DOUBLE_SHUFTI_LIMIT 20
|
#define DOUBLE_SHUFTI_LIMIT 20
|
||||||
|
|
||||||
struct AccelScheme {
|
struct AccelScheme {
|
||||||
AccelScheme(const CharReach &cr_in, u32 offset_in)
|
|
||||||
: cr(cr_in), offset(offset_in) {
|
|
||||||
assert(offset <= MAX_ACCEL_DEPTH);
|
|
||||||
}
|
|
||||||
AccelScheme() : cr(CharReach::dot()), offset(MAX_ACCEL_DEPTH + 1) {}
|
|
||||||
|
|
||||||
bool operator<(const AccelScheme &b) const {
|
|
||||||
const AccelScheme &a = *this;
|
|
||||||
|
|
||||||
// Don't use ORDER_CHECK as it will (stupidly) eval count() too many
|
|
||||||
// times.
|
|
||||||
size_t a_dcount = double_cr.count();
|
|
||||||
size_t b_dcount = b.double_cr.count();
|
|
||||||
|
|
||||||
bool feasible_double_a = !a.double_byte.empty()
|
|
||||||
&& a.double_byte.size() <= DOUBLE_SHUFTI_LIMIT;
|
|
||||||
bool feasible_double_b = !b.double_byte.empty()
|
|
||||||
&& b.double_byte.size() <= DOUBLE_SHUFTI_LIMIT;
|
|
||||||
|
|
||||||
if (feasible_double_a != feasible_double_b) {
|
|
||||||
return feasible_double_a > feasible_double_b;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (feasible_double_a) {
|
|
||||||
if (a_dcount != b_dcount) {
|
|
||||||
return a_dcount < b_dcount;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((a.double_byte.size() == 1) != (b.double_byte.size() == 1)) {
|
|
||||||
return a.double_byte.size() < b.double_byte.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!a_dcount) {
|
|
||||||
bool cd_a = buildDvermMask(a.double_byte);
|
|
||||||
bool cd_b = buildDvermMask(b.double_byte);
|
|
||||||
if (cd_a != cd_b) {
|
|
||||||
return cd_a > cd_b;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ORDER_CHECK(double_byte.size());
|
|
||||||
ORDER_CHECK(double_offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
const size_t a_count = cr.count(), b_count = b.cr.count();
|
|
||||||
if (a_count != b_count) {
|
|
||||||
return a_count < b_count;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* TODO: give bonus if one is a 'caseless' character */
|
|
||||||
ORDER_CHECK(offset);
|
|
||||||
ORDER_CHECK(cr);
|
|
||||||
ORDER_CHECK(double_byte);
|
|
||||||
ORDER_CHECK(double_cr);
|
|
||||||
ORDER_CHECK(double_offset);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool operator>(const AccelScheme &b) const {
|
|
||||||
return b < *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
ue2::flat_set<std::pair<u8, u8> > double_byte;
|
ue2::flat_set<std::pair<u8, u8> > double_byte;
|
||||||
CharReach cr;
|
CharReach cr = CharReach::dot();
|
||||||
CharReach double_cr;
|
CharReach double_cr;
|
||||||
u32 offset;
|
u32 offset = MAX_ACCEL_DEPTH + 1;
|
||||||
u32 double_offset = 0;
|
u32 double_offset = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -153,7 +92,8 @@ bool nfaCheckAccel(const NGHolder &g, NFAVertex v,
|
|||||||
const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
|
const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
|
||||||
AccelScheme *as, bool allow_wide);
|
AccelScheme *as, bool allow_wide);
|
||||||
|
|
||||||
/** \brief Check if vertex \a v is a multi accelerable state (for a limex NFA). */
|
/** \brief Check if vertex \a v is a multi accelerable state (for a limex NFA).
|
||||||
|
*/
|
||||||
MultibyteAccelInfo nfaCheckMultiAccel(const NGHolder &g,
|
MultibyteAccelInfo nfaCheckMultiAccel(const NGHolder &g,
|
||||||
const std::vector<NFAVertex> &verts,
|
const std::vector<NFAVertex> &verts,
|
||||||
const CompileContext &cc);
|
const CompileContext &cc);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user