make nfaExecCastle0_QR() more efficent

1. Reverse scan for the last escape and only process later events.
2. Only scheck subcastles which may expire for staleness
This commit is contained in:
Alex Coyte 2015-12-02 15:49:49 +11:00 committed by Matthew Barr
parent b9c5d65f0e
commit e065c4d60b
6 changed files with 229 additions and 109 deletions

View File

@ -54,7 +54,7 @@ Grey::Grey(void) :
allowRose(true),
allowExtendedNFA(true), /* bounded repeats of course */
allowLimExNFA(true),
allowSidecar(true),
allowSidecar(false),
allowAnchoredAcyclic(true),
allowSmallLiteralSet(true),
allowCastle(true),

View File

@ -162,6 +162,10 @@ static really_inline
char castleInAccept(const struct Castle *c, struct mq *q,
const ReportID report, const u64a offset) {
DEBUG_PRINTF("offset=%llu\n", offset);
/* ignore when just catching up due to full queue */
if (report == MO_INVALID_IDX) {
return 0;
}
if (c->exclusive) {
const u32 activeIdx = partial_load_u32(q->streamState,
@ -216,6 +220,11 @@ void castleDeactivateStaleSubs(const struct Castle *c, const u64a offset,
void *full_state, void *stream_state) {
DEBUG_PRINTF("offset=%llu\n", offset);
if (!c->staleIterOffset) {
DEBUG_PRINTF("{no repeats can go stale}\n");
return; /* no subcastle can ever go stale */
}
if (c->exclusive) {
const u32 activeIdx = partial_load_u32(stream_state, c->activeIdxSize);
if (activeIdx < c->numRepeats) {
@ -227,19 +236,27 @@ void castleDeactivateStaleSubs(const struct Castle *c, const u64a offset,
if (!c->pureExclusive) {
const u8 *active = (const u8 *)stream_state + c->activeIdxSize;
for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
i != MMB_INVALID;
i = mmbit_iterate(active, c->numRepeats, i)) {
const struct mmbit_sparse_iter *it
= (const void *)((const char *)c + c->staleIterOffset);
struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES];
u32 numRepeats = c->numRepeats;
u32 idx = 0;
u32 i = mmbit_sparse_iter_begin(active, numRepeats, &idx, it, si_state);
while(i != MMB_INVALID) {
DEBUG_PRINTF("subcastle %u\n", i);
subCastleDeactivateStaleSubs(c, offset, full_state,
stream_state, i);
subCastleDeactivateStaleSubs(c, offset, full_state, stream_state, i);
i = mmbit_sparse_iter_next(active, numRepeats, i, &idx, it,
si_state);
}
}
}
static really_inline
void castleProcessTop(const struct Castle *c, const u32 top, const u64a offset,
void *full_state, void *stream_state) {
void *full_state, void *stream_state,
UNUSED char stale_checked) {
assert(top < c->numRepeats);
const struct SubCastle *sub = getSubCastle(c, top);
@ -263,8 +280,8 @@ void castleProcessTop(const struct Castle *c, const u32 top, const u64a offset,
} else {
DEBUG_PRINTF("repeat %u is already alive\n", top);
// Caller should ensure we're not stale.
assert(repeatHasMatch(info, rctrl, rstate, offset) !=
REPEAT_STALE);
assert(!stale_checked
|| repeatHasMatch(info, rctrl, rstate, offset) != REPEAT_STALE);
// Ignore duplicate top events.
u64a last = repeatLastTop(info, rctrl, rstate);
@ -589,7 +606,103 @@ char castleScan(const struct Castle *c, const u8 *buf, const size_t begin,
}
static really_inline
void castleHandleEvent(const struct Castle *c, struct mq *q, const u64a sp) {
char castleRevScanVerm(const struct Castle *c, const u8 *buf,
const size_t begin, const size_t end, size_t *loc) {
const u8 *ptr = rvermicelliExec(c->u.verm.c, 0, buf + begin, buf + end);
if (ptr == buf + begin - 1) {
DEBUG_PRINTF("no escape found\n");
return 0;
}
assert(loc);
assert(ptr >= buf && ptr < buf + end);
*loc = (size_t)(ptr - buf);
DEBUG_PRINTF("escape found at offset %zu\n", *loc);
return 1;
}
static really_inline
char castleRevScanNVerm(const struct Castle *c, const u8 *buf,
const size_t begin, const size_t end, size_t *loc) {
const u8 *ptr = rnvermicelliExec(c->u.verm.c, 0, buf + begin, buf + end);
if (ptr == buf + begin - 1) {
DEBUG_PRINTF("no escape found\n");
return 0;
}
assert(loc);
assert(ptr >= buf && ptr < buf + end);
*loc = (size_t)(ptr - buf);
DEBUG_PRINTF("escape found at offset %zu\n", *loc);
return 1;
}
static really_inline
char castleRevScanShufti(const struct Castle *c, const u8 *buf,
const size_t begin, const size_t end, size_t *loc) {
const m128 mask_lo = c->u.shuf.mask_lo;
const m128 mask_hi = c->u.shuf.mask_hi;
const u8 *ptr = rshuftiExec(mask_lo, mask_hi, buf + begin, buf + end);
if (ptr == buf + begin - 1) {
DEBUG_PRINTF("no escape found\n");
return 0;
}
assert(loc);
assert(ptr >= buf && ptr < buf + end);
*loc = (size_t)(ptr - buf);
DEBUG_PRINTF("escape found at offset %zu\n", *loc);
return 1;
}
static really_inline
char castleRevScanTruffle(const struct Castle *c, const u8 *buf,
const size_t begin, const size_t end, size_t *loc) {
const u8 *ptr = rtruffleExec(c->u.truffle.mask1, c->u.truffle.mask2,
buf + begin, buf + end);
if (ptr == buf + begin - 1) {
DEBUG_PRINTF("no escape found\n");
return 0;
}
assert(loc);
assert(ptr >= buf && ptr < buf + end);
*loc = (size_t)(ptr - buf);
DEBUG_PRINTF("escape found at offset %zu\n", *loc);
return 1;
}
static really_inline
char castleRevScan(const struct Castle *c, const u8 *buf, const size_t begin,
const size_t end, size_t *loc) {
assert(begin <= end);
DEBUG_PRINTF("scanning backwards over (%zu,%zu]\n", begin, end);
if (begin == end) {
return 0;
}
switch (c->type) {
case CASTLE_DOT:
// Nothing can stop a dot scan!
return 0;
case CASTLE_VERM:
return castleRevScanVerm(c, buf, begin, end, loc);
case CASTLE_NVERM:
return castleRevScanNVerm(c, buf, begin, end, loc);
case CASTLE_SHUFTI:
return castleRevScanShufti(c, buf, begin, end, loc);
case CASTLE_TRUFFLE:
return castleRevScanTruffle(c, buf, begin, end, loc);
default:
DEBUG_PRINTF("unknown scan type!\n");
assert(0);
return 0;
}
}
static really_inline
void castleHandleEvent(const struct Castle *c, struct mq *q, const u64a sp,
char stale_checked) {
const u32 event = q->items[q->cur].type;
switch (event) {
case MQE_TOP:
@ -603,11 +716,23 @@ void castleHandleEvent(const struct Castle *c, struct mq *q, const u64a sp) {
assert(event < MQE_INVALID);
u32 top = event - MQE_TOP_FIRST;
DEBUG_PRINTF("top %u at offset %llu\n", top, sp);
castleProcessTop(c, top, sp, q->state, q->streamState);
castleProcessTop(c, top, sp, q->state, q->streamState, stale_checked);
break;
}
}
static really_inline
void clear_repeats(const struct Castle *c, const struct mq *q, u8 *active) {
DEBUG_PRINTF("clearing active repeats due to escape\n");
if (c->exclusive) {
partial_store_u32(q->streamState, c->numRepeats, c->activeIdxSize);
}
if (!c->pureExclusive) {
mmbit_clear(active, c->numRepeats);
}
}
static really_inline
char nfaExecCastle0_Q_i(const struct NFA *n, struct mq *q, s64a end,
enum MatchMode mode) {
@ -698,15 +823,7 @@ char nfaExecCastle0_Q_i(const struct NFA *n, struct mq *q, s64a end,
}
if (escape_found) {
DEBUG_PRINTF("clearing active repeats due to escape\n");
if (c->exclusive) {
partial_store_u32(q->streamState, c->numRepeats,
c->activeIdxSize);
}
if (!c->pureExclusive) {
mmbit_clear(active, c->numRepeats);
}
clear_repeats(c, q, active);
}
}
@ -720,7 +837,7 @@ char nfaExecCastle0_Q_i(const struct NFA *n, struct mq *q, s64a end,
}
sp = q_cur_offset(q);
castleHandleEvent(c, q, sp);
castleHandleEvent(c, q, sp, 1);
q->cur++;
}
@ -745,28 +862,34 @@ char nfaExecCastle0_Q2(const struct NFA *n, struct mq *q, s64a end) {
return nfaExecCastle0_Q_i(n, q, end, STOP_AT_MATCH);
}
static really_inline
void castleStreamSilent(const struct Castle *c, u8 *active, const u8 *buf,
size_t length) {
DEBUG_PRINTF("entry\n");
static
s64a castleLastKillLoc(const struct Castle *c, struct mq *q) {
assert(q_cur_type(q) == MQE_START);
assert(q_last_type(q) == MQE_END);
s64a sp = q_cur_loc(q);
s64a ep = q_last_loc(q);
// This call doesn't produce matches, so we elide the castleMatchLoop call
// entirely and just do escape scans to maintain the repeat.
DEBUG_PRINTF("finding final squash in (%lld, %lld]\n", sp, ep);
size_t eloc = 0;
char escaped = castleScan(c, buf, 0, length, &eloc);
if (escaped) {
assert(eloc < length);
DEBUG_PRINTF("escape found at %zu, clearing castle\n", eloc);
if (c->exclusive) {
partial_store_u32(active - c->activeIdxSize,
c->numRepeats, c->activeIdxSize);
}
if (!c->pureExclusive) {
mmbit_clear(active, c->numRepeats);
size_t loc;
if (ep > 0) {
if (castleRevScan(c, q->buffer, sp > 0 ? sp : 0, ep, &loc)) {
return (s64a)loc;
}
ep = 0;
}
if (sp < 0) {
s64a hlen = q->hlength;
if (castleRevScan(c, q->history, sp + hlen, ep + hlen, &loc)) {
return (s64a)loc - hlen;
}
ep = 0;
}
return sp - 1; /* the repeats are never killed */
}
char nfaExecCastle0_QR(const struct NFA *n, struct mq *q, ReportID report) {
@ -780,76 +903,40 @@ char nfaExecCastle0_QR(const struct NFA *n, struct mq *q, ReportID report) {
assert(q->cur + 1 < q->end); /* require at least two items */
assert(q_cur_type(q) == MQE_START);
u64a sp = q_cur_offset(q);
q->cur++;
DEBUG_PRINTF("sp=%llu\n", sp);
const struct Castle *c = getImplNfa(n);
u8 *active = (u8 *)q->streamState + c->activeIdxSize;
char found = 0;
u64a end_offset = q_last_loc(q) + q->offset;
s64a last_kill_loc = castleLastKillLoc(c, q);
DEBUG_PRINTF("all repeats killed at %lld (exec range %lld, %lld)\n",
last_kill_loc, q_cur_loc(q), q_last_loc(q));
assert(last_kill_loc < q_last_loc(q));
if (last_kill_loc != q_cur_loc(q) - 1) {
clear_repeats(c, q, active);
}
q->cur++; /* skip start event */
/* skip events prior to the repeats being squashed */
while (q_cur_loc(q) <= last_kill_loc) {
DEBUG_PRINTF("skipping moot event at %lld\n", q_cur_loc(q));
q->cur++;
assert(q->cur < q->end);
}
while (q->cur < q->end) {
DEBUG_PRINTF("q item type=%d offset=%llu\n", q_cur_type(q),
q_cur_offset(q));
found = 0;
if (c->exclusive) {
const u32 activeIdx = partial_load_u32(q->streamState,
c->activeIdxSize);
if (activeIdx < c->numRepeats) {
found = 1;
} else if (c->pureExclusive) {
DEBUG_PRINTF("castle is dead\n");
goto scan_done;
}
}
if (!found && !mmbit_any(active, c->numRepeats)) {
DEBUG_PRINTF("castle is dead\n");
goto scan_done;
}
u64a ep = q_cur_offset(q);
if (sp < q->offset) {
DEBUG_PRINTF("HISTORY BUFFER SCAN\n");
assert(q->offset - sp <= q->hlength);
u64a local_ep = MIN(q->offset, ep);
const u8 *ptr = q->history + q->hlength + sp - q->offset;
castleStreamSilent(c, active, ptr, local_ep - sp);
sp = local_ep;
}
found = 0;
if (c->exclusive) {
const u32 activeIdx = partial_load_u32(q->streamState,
c->activeIdxSize);
if (activeIdx < c->numRepeats) {
found = 1;
} else if (c->pureExclusive) {
DEBUG_PRINTF("castle is dead\n");
goto scan_done;
}
}
if (!found && !mmbit_any(active, c->numRepeats)) {
DEBUG_PRINTF("castle is dead\n");
goto scan_done;
}
if (sp < ep) {
DEBUG_PRINTF("MAIN BUFFER SCAN\n");
assert(ep - q->offset <= q->length);
const u8 *ptr = q->buffer + sp - q->offset;
castleStreamSilent(c, active, ptr, ep - sp);
}
scan_done:
sp = q_cur_offset(q);
castleDeactivateStaleSubs(c, sp, q->state, q->streamState);
castleHandleEvent(c, q, sp);
u64a sp = q_cur_offset(q);
castleHandleEvent(c, q, sp, 0);
q->cur++;
}
found = 0;
castleDeactivateStaleSubs(c, end_offset, q->state, q->streamState);
char found = 0;
if (c->exclusive) {
const u32 activeIdx = partial_load_u32(q->streamState,
c->activeIdxSize);
@ -866,7 +953,7 @@ scan_done:
return 0;
}
if (castleInAccept(c, q, report, sp)) {
if (castleInAccept(c, q, report, end_offset)) {
return MO_MATCHES_PENDING;
}
@ -1013,4 +1100,3 @@ char nfaExecCastle0_expandState(const struct NFA *n, void *dest,
}
return 0;
}

View File

@ -100,6 +100,7 @@ void nfaExecCastle0_dumpText(const struct NFA *nfa, FILE *f) {
fprintf(f, "unknown type %u\n", c->type);
break;
}
fprintf(f, "Stale Iter Offset: %u\n", c->staleIterOffset);
fprintf(f, "\n");
dumpTextReverse(nfa, f);

View File

@ -63,6 +63,7 @@ struct SubCastle {
* - struct Castle
* - struct SubCastle[numRepeats]
* - tables for sparse model repeats
* - sparse iterator for subcastles that may be stale
*
* Castle stores an "active repeats" multibit in stream state, followed by the
* packed repeat state for each SubCastle. If all SubCastles are mutual
@ -83,6 +84,9 @@ struct ALIGN_AVX_DIRECTIVE Castle {
char pureExclusive; //!< tells us if all SubCastles are mutual exclusive
u8 activeIdxSize; //!< number of bytes in stream state to store
// active SubCastle id for exclusive mode
u32 staleIterOffset; //<! offset to a sparse iterator to check for stale
// sub castles
union {
struct {
char c;

View File

@ -32,6 +32,7 @@
#include "castlecompile.h"
#include "castle_internal.h"
#include "limex_limits.h"
#include "nfa_internal.h"
#include "repeatcompile.h"
#include "shufticompile.h"
@ -47,6 +48,7 @@
#include "util/dump_charclass.h"
#include "util/graph.h"
#include "util/make_unique.h"
#include "util/multibit_build.h"
#include "util/multibit_internal.h"
#include "util/ue2_containers.h"
#include "util/verify_types.h"
@ -63,7 +65,6 @@ using boost::adaptors::map_values;
namespace ue2 {
#define CASTLE_MAX_TOPS 32
#define CLIQUE_GRAPH_MAX_SIZE 1000
static
@ -305,7 +306,7 @@ void buildSubcastles(const CastleProto &proto, vector<SubCastle> &subs,
const vector<pair<depth, bool>> &repeatInfoPair,
u32 &scratchStateSize, u32 &streamStateSize,
u32 &tableSize, vector<u64a> &tables, u32 &sparseRepeats,
const set<u32> &exclusiveGroup) {
const set<u32> &exclusiveGroup, vector<u32> &may_stale) {
u32 i = 0;
u32 maxStreamSize = 0;
bool exclusive = exclusiveGroup.size() > 1;
@ -343,6 +344,10 @@ void buildSubcastles(const CastleProto &proto, vector<SubCastle> &subs,
streamStateSize += subStreamStateSize;
}
if (pr.bounds.max.is_finite()) {
may_stale.push_back(i);
}
info.type = verify_u8(rtype);
info.repeatMin = depth_to_u32(pr.bounds.min);
info.repeatMax = depth_to_u32(pr.bounds.max);
@ -492,11 +497,20 @@ buildCastle(const CastleProto &proto,
u32 tableSize = 0;
u32 sparseRepeats = 0;
vector<u32> may_stale; /* sub castles that may go stale */
buildSubcastles(proto, subs, infos, patchSize, repeatInfoPair,
scratchStateSize, streamStateSize, tableSize,
tables, sparseRepeats, exclusiveGroup);
tables, sparseRepeats, exclusiveGroup, may_stale);
const size_t total_size =
DEBUG_PRINTF("%zu subcastles may go stale\n", may_stale.size());
vector<mmbit_sparse_iter> stale_iter;
if (!may_stale.empty()) {
mmbBuildSparseIterator(stale_iter, may_stale, numRepeats);
}
size_t total_size =
sizeof(NFA) + // initial NFA structure
sizeof(Castle) + // Castle structure
sizeof(SubCastle) * subs.size() + // SubCastles themselves
@ -506,6 +520,9 @@ buildCastle(const CastleProto &proto,
sizeof(u64a) * sparseRepeats; // paddings for
// REPEAT_SPARSE_OPTIMAL_P tables
total_size = ROUNDUP_N(total_size, alignof(mmbit_sparse_iter));
total_size += byte_length(stale_iter); // stale sparse iter
aligned_unique_ptr<NFA> nfa = aligned_zmalloc_unique<NFA>(total_size);
nfa->type = verify_u8(CASTLE_NFA_0);
nfa->length = verify_u32(total_size);
@ -515,7 +532,8 @@ buildCastle(const CastleProto &proto,
nfa->minWidth = verify_u32(minWidth);
nfa->maxWidth = maxWidth.is_finite() ? verify_u32(maxWidth) : 0;
char *ptr = (char *)nfa.get() + sizeof(NFA);
char * const base_ptr = (char *)nfa.get() + sizeof(NFA);
char *ptr = base_ptr;
Castle *c = (Castle *)ptr;
c->numRepeats = verify_u32(subs.size());
c->exclusive = exclusive;
@ -560,6 +578,16 @@ buildCastle(const CastleProto &proto,
sub->exclusive = 0;
}
}
ptr = base_ptr + total_size - sizeof(NFA) - byte_length(stale_iter);
assert(ptr + byte_length(stale_iter) == base_ptr + total_size - sizeof(NFA));
if (!stale_iter.empty()) {
c->staleIterOffset = verify_u32(ptr - base_ptr);
copy_bytes(ptr, stale_iter);
ptr += byte_length(stale_iter);
}
return nfa;
}
@ -893,7 +921,7 @@ unique_ptr<NGHolder> makeHolder(const CastleProto &proto, nfa_kind kind,
unique_ptr<NGHolder> g = ue2::make_unique<NGHolder>(kind);
for (const auto &m : proto.repeats) {
if (m.first >= CASTLE_MAX_TOPS) {
if (m.first >= NFA_MAX_TOP_MASKS) {
DEBUG_PRINTF("top %u too big for an NFA\n", m.first);
return nullptr;
}

View File

@ -189,7 +189,8 @@ char nfaInAcceptState(const struct NFA *nfa, ReportID report, struct mq *q);
* be monotonically increasing. If not all the data was processed during
* the call, the queue is updated to reflect the remaining work.
* @param report we are interested in, if set at the end of the scan returns
* @ref MO_MATCHES_PENDING
* @ref MO_MATCHES_PENDING. If no report is desired, MO_INVALID_IDX should
* be passed in.
* @return @ref MO_ALIVE if the nfa is still active with no matches pending,
* and @ref MO_MATCHES_PENDING if there are matches pending, 0 if not
* alive