diff --git a/src/grey.cpp b/src/grey.cpp index 3f169976..d0872415 100644 --- a/src/grey.cpp +++ b/src/grey.cpp @@ -54,7 +54,7 @@ Grey::Grey(void) : allowRose(true), allowExtendedNFA(true), /* bounded repeats of course */ allowLimExNFA(true), - allowSidecar(true), + allowSidecar(false), allowAnchoredAcyclic(true), allowSmallLiteralSet(true), allowCastle(true), diff --git a/src/nfa/castle.c b/src/nfa/castle.c index 66e0ded6..274e5705 100644 --- a/src/nfa/castle.c +++ b/src/nfa/castle.c @@ -162,6 +162,10 @@ static really_inline char castleInAccept(const struct Castle *c, struct mq *q, const ReportID report, const u64a offset) { DEBUG_PRINTF("offset=%llu\n", offset); + /* ignore when just catching up due to full queue */ + if (report == MO_INVALID_IDX) { + return 0; + } if (c->exclusive) { const u32 activeIdx = partial_load_u32(q->streamState, @@ -216,6 +220,11 @@ void castleDeactivateStaleSubs(const struct Castle *c, const u64a offset, void *full_state, void *stream_state) { DEBUG_PRINTF("offset=%llu\n", offset); + if (!c->staleIterOffset) { + DEBUG_PRINTF("{no repeats can go stale}\n"); + return; /* no subcastle can ever go stale */ + } + if (c->exclusive) { const u32 activeIdx = partial_load_u32(stream_state, c->activeIdxSize); if (activeIdx < c->numRepeats) { @@ -227,19 +236,27 @@ void castleDeactivateStaleSubs(const struct Castle *c, const u64a offset, if (!c->pureExclusive) { const u8 *active = (const u8 *)stream_state + c->activeIdxSize; - for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); - i != MMB_INVALID; - i = mmbit_iterate(active, c->numRepeats, i)) { + const struct mmbit_sparse_iter *it + = (const void *)((const char *)c + c->staleIterOffset); + + struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; + u32 numRepeats = c->numRepeats; + u32 idx = 0; + + u32 i = mmbit_sparse_iter_begin(active, numRepeats, &idx, it, si_state); + while(i != MMB_INVALID) { DEBUG_PRINTF("subcastle %u\n", i); - subCastleDeactivateStaleSubs(c, offset, full_state, - stream_state, i); + subCastleDeactivateStaleSubs(c, offset, full_state, stream_state, i); + i = mmbit_sparse_iter_next(active, numRepeats, i, &idx, it, + si_state); } } } static really_inline void castleProcessTop(const struct Castle *c, const u32 top, const u64a offset, - void *full_state, void *stream_state) { + void *full_state, void *stream_state, + UNUSED char stale_checked) { assert(top < c->numRepeats); const struct SubCastle *sub = getSubCastle(c, top); @@ -263,8 +280,8 @@ void castleProcessTop(const struct Castle *c, const u32 top, const u64a offset, } else { DEBUG_PRINTF("repeat %u is already alive\n", top); // Caller should ensure we're not stale. - assert(repeatHasMatch(info, rctrl, rstate, offset) != - REPEAT_STALE); + assert(!stale_checked + || repeatHasMatch(info, rctrl, rstate, offset) != REPEAT_STALE); // Ignore duplicate top events. u64a last = repeatLastTop(info, rctrl, rstate); @@ -589,7 +606,103 @@ char castleScan(const struct Castle *c, const u8 *buf, const size_t begin, } static really_inline -void castleHandleEvent(const struct Castle *c, struct mq *q, const u64a sp) { +char castleRevScanVerm(const struct Castle *c, const u8 *buf, + const size_t begin, const size_t end, size_t *loc) { + const u8 *ptr = rvermicelliExec(c->u.verm.c, 0, buf + begin, buf + end); + if (ptr == buf + begin - 1) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + assert(ptr >= buf && ptr < buf + end); + *loc = (size_t)(ptr - buf); + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +static really_inline +char castleRevScanNVerm(const struct Castle *c, const u8 *buf, + const size_t begin, const size_t end, size_t *loc) { + const u8 *ptr = rnvermicelliExec(c->u.verm.c, 0, buf + begin, buf + end); + if (ptr == buf + begin - 1) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + assert(ptr >= buf && ptr < buf + end); + *loc = (size_t)(ptr - buf); + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +static really_inline +char castleRevScanShufti(const struct Castle *c, const u8 *buf, + const size_t begin, const size_t end, size_t *loc) { + const m128 mask_lo = c->u.shuf.mask_lo; + const m128 mask_hi = c->u.shuf.mask_hi; + const u8 *ptr = rshuftiExec(mask_lo, mask_hi, buf + begin, buf + end); + if (ptr == buf + begin - 1) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + assert(ptr >= buf && ptr < buf + end); + *loc = (size_t)(ptr - buf); + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +static really_inline +char castleRevScanTruffle(const struct Castle *c, const u8 *buf, + const size_t begin, const size_t end, size_t *loc) { + const u8 *ptr = rtruffleExec(c->u.truffle.mask1, c->u.truffle.mask2, + buf + begin, buf + end); + if (ptr == buf + begin - 1) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + assert(ptr >= buf && ptr < buf + end); + *loc = (size_t)(ptr - buf); + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +static really_inline +char castleRevScan(const struct Castle *c, const u8 *buf, const size_t begin, + const size_t end, size_t *loc) { + assert(begin <= end); + DEBUG_PRINTF("scanning backwards over (%zu,%zu]\n", begin, end); + if (begin == end) { + return 0; + } + + switch (c->type) { + case CASTLE_DOT: + // Nothing can stop a dot scan! + return 0; + case CASTLE_VERM: + return castleRevScanVerm(c, buf, begin, end, loc); + case CASTLE_NVERM: + return castleRevScanNVerm(c, buf, begin, end, loc); + case CASTLE_SHUFTI: + return castleRevScanShufti(c, buf, begin, end, loc); + case CASTLE_TRUFFLE: + return castleRevScanTruffle(c, buf, begin, end, loc); + default: + DEBUG_PRINTF("unknown scan type!\n"); + assert(0); + return 0; + } +} + +static really_inline +void castleHandleEvent(const struct Castle *c, struct mq *q, const u64a sp, + char stale_checked) { const u32 event = q->items[q->cur].type; switch (event) { case MQE_TOP: @@ -603,11 +716,23 @@ void castleHandleEvent(const struct Castle *c, struct mq *q, const u64a sp) { assert(event < MQE_INVALID); u32 top = event - MQE_TOP_FIRST; DEBUG_PRINTF("top %u at offset %llu\n", top, sp); - castleProcessTop(c, top, sp, q->state, q->streamState); + castleProcessTop(c, top, sp, q->state, q->streamState, stale_checked); break; } } +static really_inline +void clear_repeats(const struct Castle *c, const struct mq *q, u8 *active) { + DEBUG_PRINTF("clearing active repeats due to escape\n"); + if (c->exclusive) { + partial_store_u32(q->streamState, c->numRepeats, c->activeIdxSize); + } + + if (!c->pureExclusive) { + mmbit_clear(active, c->numRepeats); + } +} + static really_inline char nfaExecCastle0_Q_i(const struct NFA *n, struct mq *q, s64a end, enum MatchMode mode) { @@ -698,15 +823,7 @@ char nfaExecCastle0_Q_i(const struct NFA *n, struct mq *q, s64a end, } if (escape_found) { - DEBUG_PRINTF("clearing active repeats due to escape\n"); - if (c->exclusive) { - partial_store_u32(q->streamState, c->numRepeats, - c->activeIdxSize); - } - - if (!c->pureExclusive) { - mmbit_clear(active, c->numRepeats); - } + clear_repeats(c, q, active); } } @@ -720,7 +837,7 @@ char nfaExecCastle0_Q_i(const struct NFA *n, struct mq *q, s64a end, } sp = q_cur_offset(q); - castleHandleEvent(c, q, sp); + castleHandleEvent(c, q, sp, 1); q->cur++; } @@ -745,28 +862,34 @@ char nfaExecCastle0_Q2(const struct NFA *n, struct mq *q, s64a end) { return nfaExecCastle0_Q_i(n, q, end, STOP_AT_MATCH); } -static really_inline -void castleStreamSilent(const struct Castle *c, u8 *active, const u8 *buf, - size_t length) { - DEBUG_PRINTF("entry\n"); +static +s64a castleLastKillLoc(const struct Castle *c, struct mq *q) { + assert(q_cur_type(q) == MQE_START); + assert(q_last_type(q) == MQE_END); + s64a sp = q_cur_loc(q); + s64a ep = q_last_loc(q); - // This call doesn't produce matches, so we elide the castleMatchLoop call - // entirely and just do escape scans to maintain the repeat. + DEBUG_PRINTF("finding final squash in (%lld, %lld]\n", sp, ep); - size_t eloc = 0; - char escaped = castleScan(c, buf, 0, length, &eloc); - if (escaped) { - assert(eloc < length); - DEBUG_PRINTF("escape found at %zu, clearing castle\n", eloc); - if (c->exclusive) { - partial_store_u32(active - c->activeIdxSize, - c->numRepeats, c->activeIdxSize); - } - - if (!c->pureExclusive) { - mmbit_clear(active, c->numRepeats); + size_t loc; + + if (ep > 0) { + if (castleRevScan(c, q->buffer, sp > 0 ? sp : 0, ep, &loc)) { + return (s64a)loc; } + ep = 0; } + + if (sp < 0) { + s64a hlen = q->hlength; + + if (castleRevScan(c, q->history, sp + hlen, ep + hlen, &loc)) { + return (s64a)loc - hlen; + } + ep = 0; + } + + return sp - 1; /* the repeats are never killed */ } char nfaExecCastle0_QR(const struct NFA *n, struct mq *q, ReportID report) { @@ -780,76 +903,40 @@ char nfaExecCastle0_QR(const struct NFA *n, struct mq *q, ReportID report) { assert(q->cur + 1 < q->end); /* require at least two items */ assert(q_cur_type(q) == MQE_START); - u64a sp = q_cur_offset(q); - q->cur++; - DEBUG_PRINTF("sp=%llu\n", sp); const struct Castle *c = getImplNfa(n); u8 *active = (u8 *)q->streamState + c->activeIdxSize; - char found = 0; + + u64a end_offset = q_last_loc(q) + q->offset; + s64a last_kill_loc = castleLastKillLoc(c, q); + DEBUG_PRINTF("all repeats killed at %lld (exec range %lld, %lld)\n", + last_kill_loc, q_cur_loc(q), q_last_loc(q)); + assert(last_kill_loc < q_last_loc(q)); + + if (last_kill_loc != q_cur_loc(q) - 1) { + clear_repeats(c, q, active); + } + + q->cur++; /* skip start event */ + + /* skip events prior to the repeats being squashed */ + while (q_cur_loc(q) <= last_kill_loc) { + DEBUG_PRINTF("skipping moot event at %lld\n", q_cur_loc(q)); + q->cur++; + assert(q->cur < q->end); + } + while (q->cur < q->end) { DEBUG_PRINTF("q item type=%d offset=%llu\n", q_cur_type(q), q_cur_offset(q)); - found = 0; - if (c->exclusive) { - const u32 activeIdx = partial_load_u32(q->streamState, - c->activeIdxSize); - if (activeIdx < c->numRepeats) { - found = 1; - } else if (c->pureExclusive) { - DEBUG_PRINTF("castle is dead\n"); - goto scan_done; - } - } - - if (!found && !mmbit_any(active, c->numRepeats)) { - DEBUG_PRINTF("castle is dead\n"); - goto scan_done; - } - - u64a ep = q_cur_offset(q); - - if (sp < q->offset) { - DEBUG_PRINTF("HISTORY BUFFER SCAN\n"); - assert(q->offset - sp <= q->hlength); - u64a local_ep = MIN(q->offset, ep); - const u8 *ptr = q->history + q->hlength + sp - q->offset; - castleStreamSilent(c, active, ptr, local_ep - sp); - sp = local_ep; - } - - found = 0; - if (c->exclusive) { - const u32 activeIdx = partial_load_u32(q->streamState, - c->activeIdxSize); - if (activeIdx < c->numRepeats) { - found = 1; - } else if (c->pureExclusive) { - DEBUG_PRINTF("castle is dead\n"); - goto scan_done; - } - } - - if (!found && !mmbit_any(active, c->numRepeats)) { - DEBUG_PRINTF("castle is dead\n"); - goto scan_done; - } - - if (sp < ep) { - DEBUG_PRINTF("MAIN BUFFER SCAN\n"); - assert(ep - q->offset <= q->length); - const u8 *ptr = q->buffer + sp - q->offset; - castleStreamSilent(c, active, ptr, ep - sp); - } - -scan_done: - sp = q_cur_offset(q); - castleDeactivateStaleSubs(c, sp, q->state, q->streamState); - castleHandleEvent(c, q, sp); + u64a sp = q_cur_offset(q); + castleHandleEvent(c, q, sp, 0); q->cur++; } - found = 0; + castleDeactivateStaleSubs(c, end_offset, q->state, q->streamState); + + char found = 0; if (c->exclusive) { const u32 activeIdx = partial_load_u32(q->streamState, c->activeIdxSize); @@ -866,7 +953,7 @@ scan_done: return 0; } - if (castleInAccept(c, q, report, sp)) { + if (castleInAccept(c, q, report, end_offset)) { return MO_MATCHES_PENDING; } @@ -1013,4 +1100,3 @@ char nfaExecCastle0_expandState(const struct NFA *n, void *dest, } return 0; } - diff --git a/src/nfa/castle_dump.cpp b/src/nfa/castle_dump.cpp index 5f906c2d..dd0e369f 100644 --- a/src/nfa/castle_dump.cpp +++ b/src/nfa/castle_dump.cpp @@ -100,6 +100,7 @@ void nfaExecCastle0_dumpText(const struct NFA *nfa, FILE *f) { fprintf(f, "unknown type %u\n", c->type); break; } + fprintf(f, "Stale Iter Offset: %u\n", c->staleIterOffset); fprintf(f, "\n"); dumpTextReverse(nfa, f); diff --git a/src/nfa/castle_internal.h b/src/nfa/castle_internal.h index 7a061d3d..54578d67 100644 --- a/src/nfa/castle_internal.h +++ b/src/nfa/castle_internal.h @@ -63,6 +63,7 @@ struct SubCastle { * - struct Castle * - struct SubCastle[numRepeats] * - tables for sparse model repeats + * - sparse iterator for subcastles that may be stale * * Castle stores an "active repeats" multibit in stream state, followed by the * packed repeat state for each SubCastle. If all SubCastles are mutual @@ -83,6 +84,9 @@ struct ALIGN_AVX_DIRECTIVE Castle { char pureExclusive; //!< tells us if all SubCastles are mutual exclusive u8 activeIdxSize; //!< number of bytes in stream state to store // active SubCastle id for exclusive mode + u32 staleIterOffset; // &subs, const vector> &repeatInfoPair, u32 &scratchStateSize, u32 &streamStateSize, u32 &tableSize, vector &tables, u32 &sparseRepeats, - const set &exclusiveGroup) { + const set &exclusiveGroup, vector &may_stale) { u32 i = 0; u32 maxStreamSize = 0; bool exclusive = exclusiveGroup.size() > 1; @@ -343,6 +344,10 @@ void buildSubcastles(const CastleProto &proto, vector &subs, streamStateSize += subStreamStateSize; } + if (pr.bounds.max.is_finite()) { + may_stale.push_back(i); + } + info.type = verify_u8(rtype); info.repeatMin = depth_to_u32(pr.bounds.min); info.repeatMax = depth_to_u32(pr.bounds.max); @@ -492,11 +497,20 @@ buildCastle(const CastleProto &proto, u32 tableSize = 0; u32 sparseRepeats = 0; + vector may_stale; /* sub castles that may go stale */ + buildSubcastles(proto, subs, infos, patchSize, repeatInfoPair, scratchStateSize, streamStateSize, tableSize, - tables, sparseRepeats, exclusiveGroup); + tables, sparseRepeats, exclusiveGroup, may_stale); - const size_t total_size = + DEBUG_PRINTF("%zu subcastles may go stale\n", may_stale.size()); + vector stale_iter; + if (!may_stale.empty()) { + mmbBuildSparseIterator(stale_iter, may_stale, numRepeats); + } + + + size_t total_size = sizeof(NFA) + // initial NFA structure sizeof(Castle) + // Castle structure sizeof(SubCastle) * subs.size() + // SubCastles themselves @@ -506,6 +520,9 @@ buildCastle(const CastleProto &proto, sizeof(u64a) * sparseRepeats; // paddings for // REPEAT_SPARSE_OPTIMAL_P tables + total_size = ROUNDUP_N(total_size, alignof(mmbit_sparse_iter)); + total_size += byte_length(stale_iter); // stale sparse iter + aligned_unique_ptr nfa = aligned_zmalloc_unique(total_size); nfa->type = verify_u8(CASTLE_NFA_0); nfa->length = verify_u32(total_size); @@ -515,7 +532,8 @@ buildCastle(const CastleProto &proto, nfa->minWidth = verify_u32(minWidth); nfa->maxWidth = maxWidth.is_finite() ? verify_u32(maxWidth) : 0; - char *ptr = (char *)nfa.get() + sizeof(NFA); + char * const base_ptr = (char *)nfa.get() + sizeof(NFA); + char *ptr = base_ptr; Castle *c = (Castle *)ptr; c->numRepeats = verify_u32(subs.size()); c->exclusive = exclusive; @@ -560,6 +578,16 @@ buildCastle(const CastleProto &proto, sub->exclusive = 0; } } + + ptr = base_ptr + total_size - sizeof(NFA) - byte_length(stale_iter); + + assert(ptr + byte_length(stale_iter) == base_ptr + total_size - sizeof(NFA)); + if (!stale_iter.empty()) { + c->staleIterOffset = verify_u32(ptr - base_ptr); + copy_bytes(ptr, stale_iter); + ptr += byte_length(stale_iter); + } + return nfa; } @@ -893,7 +921,7 @@ unique_ptr makeHolder(const CastleProto &proto, nfa_kind kind, unique_ptr g = ue2::make_unique(kind); for (const auto &m : proto.repeats) { - if (m.first >= CASTLE_MAX_TOPS) { + if (m.first >= NFA_MAX_TOP_MASKS) { DEBUG_PRINTF("top %u too big for an NFA\n", m.first); return nullptr; } diff --git a/src/nfa/nfa_api.h b/src/nfa/nfa_api.h index 25654990..4e31a625 100644 --- a/src/nfa/nfa_api.h +++ b/src/nfa/nfa_api.h @@ -189,7 +189,8 @@ char nfaInAcceptState(const struct NFA *nfa, ReportID report, struct mq *q); * be monotonically increasing. If not all the data was processed during * the call, the queue is updated to reflect the remaining work. * @param report we are interested in, if set at the end of the scan returns - * @ref MO_MATCHES_PENDING + * @ref MO_MATCHES_PENDING. If no report is desired, MO_INVALID_IDX should + * be passed in. * @return @ref MO_ALIVE if the nfa is still active with no matches pending, * and @ref MO_MATCHES_PENDING if there are matches pending, 0 if not * alive