diff --git a/src/nfa/castle.c b/src/nfa/castle.c index 13a44a97..bfdcf6b5 100644 --- a/src/nfa/castle.c +++ b/src/nfa/castle.c @@ -979,6 +979,46 @@ char nfaExecCastle0_inAccept(const struct NFA *n, ReportID report, return castleInAccept(c, q, report, q_cur_offset(q)); } +char nfaExecCastle0_inAnyAccept(const struct NFA *n, struct mq *q) { + assert(n && q); + assert(n->type == CASTLE_NFA_0); + DEBUG_PRINTF("entry\n"); + + const struct Castle *c = getImplNfa(n); + const u64a offset = q_cur_offset(q); + DEBUG_PRINTF("offset=%llu\n", offset); + + if (c->exclusive) { + u8 *active = (u8 *)q->streamState; + u8 *groups = active + c->groupIterOffset; + for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); + i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { + u8 *cur = active + i * c->activeIdxSize; + const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); + DEBUG_PRINTF("subcastle %u\n", activeIdx); + const struct SubCastle *sub = getSubCastle(c, activeIdx); + if (subCastleInAccept(c, q, sub->report, offset, activeIdx)) { + return 1; + } + } + } + + if (c->exclusive != PURE_EXCLUSIVE) { + const u8 *active = (const u8 *)q->streamState + c->activeOffset; + for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); + i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) { + DEBUG_PRINTF("subcastle %u\n", i); + const struct SubCastle *sub = getSubCastle(c, i); + if (subCastleInAccept(c, q, sub->report, offset, i)) { + return 1; + } + } + } + + return 0; +} + + char nfaExecCastle0_queueInitState(UNUSED const struct NFA *n, struct mq *q) { assert(n && q); assert(n->type == CASTLE_NFA_0); diff --git a/src/nfa/castle.h b/src/nfa/castle.h index 8fc3514b..84d79097 100644 --- a/src/nfa/castle.h +++ b/src/nfa/castle.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -44,6 +44,7 @@ char nfaExecCastle0_QR(const struct NFA *n, struct mq *q, ReportID report); char nfaExecCastle0_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecCastle0_inAccept(const struct NFA *n, ReportID report, struct mq *q); +char nfaExecCastle0_inAnyAccept(const struct NFA *n, struct mq *q); char nfaExecCastle0_queueInitState(const struct NFA *n, struct mq *q); char nfaExecCastle0_initCompressedState(const struct NFA *n, u64a offset, void *state, u8 key); diff --git a/src/nfa/gough.c b/src/nfa/gough.c index c52bca06..3b7a115d 100644 --- a/src/nfa/gough.c +++ b/src/nfa/gough.c @@ -1048,6 +1048,14 @@ char nfaExecGough16_inAccept(const struct NFA *n, ReportID report, return nfaExecMcClellan16_inAccept(n, report, q); } +char nfaExecGough8_inAnyAccept(const struct NFA *n, struct mq *q) { + return nfaExecMcClellan8_inAnyAccept(n, q); +} + +char nfaExecGough16_inAnyAccept(const struct NFA *n, struct mq *q) { + return nfaExecMcClellan16_inAnyAccept(n, q); +} + static char goughCheckEOD(const struct NFA *nfa, u16 s, const struct gough_som_info *som, diff --git a/src/nfa/gough.h b/src/nfa/gough.h index 41d4cb5a..1a7dbd74 100644 --- a/src/nfa/gough.h +++ b/src/nfa/gough.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -46,6 +46,7 @@ char nfaExecGough8_Q2(const struct NFA *n, struct mq *q, s64a end); char nfaExecGough8_QR(const struct NFA *n, struct mq *q, ReportID report); char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecGough8_inAccept(const struct NFA *n, ReportID report, struct mq *q); +char nfaExecGough8_inAnyAccept(const struct NFA *n, struct mq *q); char nfaExecGough8_queueInitState(const struct NFA *n, struct mq *q); char nfaExecGough8_initCompressedState(const struct NFA *n, u64a offset, void *state, u8 key); @@ -68,6 +69,7 @@ char nfaExecGough16_Q2(const struct NFA *n, struct mq *q, s64a end); char nfaExecGough16_QR(const struct NFA *n, struct mq *q, ReportID report); char nfaExecGough16_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecGough16_inAccept(const struct NFA *n, ReportID report, struct mq *q); +char nfaExecGough16_inAnyAccept(const struct NFA *n, struct mq *q); char nfaExecGough16_queueInitState(const struct NFA *n, struct mq *q); char nfaExecGough16_initCompressedState(const struct NFA *n, u64a offset, void *state, u8 key); diff --git a/src/nfa/lbr.h b/src/nfa/lbr.h index b770477d..a9e42046 100644 --- a/src/nfa/lbr.h +++ b/src/nfa/lbr.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -46,6 +46,7 @@ char nfaExecLbrDot_Q2(const struct NFA *n, struct mq *q, s64a end); char nfaExecLbrDot_QR(const struct NFA *n, struct mq *q, ReportID report); char nfaExecLbrDot_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecLbrDot_inAccept(const struct NFA *n, ReportID report, struct mq *q); +char nfaExecLbrDot_inAnyAccept(const struct NFA *n, struct mq *q); char nfaExecLbrDot_queueInitState(const struct NFA *n, struct mq *q); char nfaExecLbrDot_initCompressedState(const struct NFA *n, u64a offset, void *state, u8 key); @@ -66,6 +67,7 @@ char nfaExecLbrVerm_QR(const struct NFA *n, struct mq *q, ReportID report); char nfaExecLbrVerm_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecLbrVerm_inAccept(const struct NFA *n, ReportID report, struct mq *q); +char nfaExecLbrVerm_inAnyAccept(const struct NFA *n, struct mq *q); char nfaExecLbrVerm_queueInitState(const struct NFA *n, struct mq *q); char nfaExecLbrVerm_initCompressedState(const struct NFA *n, u64a offset, void *state, u8 key); @@ -86,6 +88,7 @@ char nfaExecLbrNVerm_QR(const struct NFA *n, struct mq *q, ReportID report); char nfaExecLbrNVerm_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecLbrNVerm_inAccept(const struct NFA *n, ReportID report, struct mq *q); +char nfaExecLbrNVerm_inAnyAccept(const struct NFA *n, struct mq *q); char nfaExecLbrNVerm_queueInitState(const struct NFA *n, struct mq *q); char nfaExecLbrNVerm_initCompressedState(const struct NFA *n, u64a offset, void *state, u8 key); @@ -106,6 +109,7 @@ char nfaExecLbrShuf_QR(const struct NFA *n, struct mq *q, ReportID report); char nfaExecLbrShuf_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecLbrShuf_inAccept(const struct NFA *n, ReportID report, struct mq *q); +char nfaExecLbrShuf_inAnyAccept(const struct NFA *n, struct mq *q); char nfaExecLbrShuf_queueInitState(const struct NFA *n, struct mq *q); char nfaExecLbrShuf_initCompressedState(const struct NFA *n, u64a offset, void *state, u8 key); @@ -126,6 +130,7 @@ char nfaExecLbrTruf_QR(const struct NFA *n, struct mq *q, ReportID report); char nfaExecLbrTruf_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecLbrTruf_inAccept(const struct NFA *n, ReportID report, struct mq *q); +char nfaExecLbrTruf_inAnyAccept(const struct NFA *n, struct mq *q); char nfaExecLbrTruf_queueInitState(const struct NFA *n, struct mq *q); char nfaExecLbrTruf_initCompressedState(const struct NFA *n, u64a offset, void *state, u8 key); diff --git a/src/nfa/lbr_common_impl.h b/src/nfa/lbr_common_impl.h index 917a8e91..4fb8f62a 100644 --- a/src/nfa/lbr_common_impl.h +++ b/src/nfa/lbr_common_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -94,6 +94,15 @@ char JOIN(ENGINE_EXEC_NAME, _inAccept)(const struct NFA *nfa, return lbrInAccept(l, lstate, q->streamState, offset, report); } +char JOIN(ENGINE_EXEC_NAME, _inAnyAccept)(const struct NFA *nfa, struct mq *q) { + assert(nfa && q); + assert(isLbrType(nfa->type)); + DEBUG_PRINTF("entry\n"); + + const struct lbr_common *l = getImplNfa(nfa); + return JOIN(ENGINE_EXEC_NAME, _inAccept)(nfa, l->report, q); +} + char JOIN(ENGINE_EXEC_NAME, _queueInitState)(const struct NFA *nfa, struct mq *q) { assert(nfa && q); diff --git a/src/nfa/limex.h b/src/nfa/limex.h index 57ee46df..3d4d258b 100644 --- a/src/nfa/limex.h +++ b/src/nfa/limex.h @@ -60,6 +60,7 @@ extern "C" char gf_name##_reportCurrent(const struct NFA *n, struct mq *q); \ char gf_name##_inAccept(const struct NFA *n, ReportID report, \ struct mq *q); \ + char gf_name##_inAnyAccept(const struct NFA *n, struct mq *q); \ char gf_name##_queueInitState(const struct NFA *n, struct mq *q); \ char gf_name##_initCompressedState(const struct NFA *n, u64a offset, \ void *state, u8 key); \ diff --git a/src/nfa/limex_common_impl.h b/src/nfa/limex_common_impl.h index 6e4b7718..68e0c0ad 100644 --- a/src/nfa/limex_common_impl.h +++ b/src/nfa/limex_common_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -40,6 +40,7 @@ #define TESTEOD_FN JOIN(moNfaTestEod, SIZE) #define TESTEOD_REV_FN JOIN(moNfaRevTestEod, SIZE) #define LIMEX_INACCEPT_FN JOIN(limexInAccept, SIZE) +#define LIMEX_INANYACCEPT_FN JOIN(limexInAnyAccept, SIZE) #define EXPIRE_ESTATE_FN JOIN(limexExpireExtendedState, SIZE) #define REPORTCURRENT_FN JOIN(moNfaReportCurrent, SIZE) #define INITIAL_FN JOIN(moNfaInitial, SIZE) @@ -374,11 +375,32 @@ char LIMEX_INACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state, return 0; } +static really_inline +char LIMEX_INANYACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state, + union RepeatControl *repeat_ctrl, char *repeat_state, + u64a offset) { + assert(limex); + + const STATE_T acceptMask = LOAD_STATE(&limex->accept); + STATE_T accstate = AND_STATE(state, acceptMask); + + // Are we in an accept state? + if (ISZERO_STATE(accstate)) { + DEBUG_PRINTF("no accept states are on\n"); + return 0; + } + + SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state, offset, &accstate); + + return ISNONZERO_STATE(accstate); +} + #undef TESTEOD_FN #undef TESTEOD_REV_FN #undef REPORTCURRENT_FN #undef EXPIRE_ESTATE_FN #undef LIMEX_INACCEPT_FN +#undef LIMEX_INANYACCEPT_FN #undef INITIAL_FN #undef TOP_FN #undef TOPN_FN diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index b8857922..79e6db1c 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -1008,7 +1008,8 @@ void findMaskedCompressionStates(const build_info &args, // Suffixes and outfixes can mask out leaf states, which should all be // accepts. Right now we can only do this when there is nothing in initDs, // as we switch that on unconditionally in the expand call. - if (generates_callbacks(h) && !hasInitDsStates(h, args.state_ids)) { + if (!inspects_states_for_accepts(h) + && !hasInitDsStates(h, args.state_ids)) { NFAStateSet nonleaf(args.num_states); for (const auto &e : edges_range(h)) { u32 from = args.state_ids.at(source(e, h)); diff --git a/src/nfa/limex_runtime_impl.h b/src/nfa/limex_runtime_impl.h index 9924ef8c..19a5ebd3 100644 --- a/src/nfa/limex_runtime_impl.h +++ b/src/nfa/limex_runtime_impl.h @@ -650,7 +650,27 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) { ep = MIN(ep, end_abs); assert(ep >= sp); - assert(sp >= offset); // We no longer do history buffer scans here. + if (sp < offset) { + DEBUG_PRINTF("HISTORY BUFFER SCAN\n"); + assert(offset - sp <= q->hlength); + u64a local_ep = MIN(offset, ep); + u64a final_look = 0; + /* we are starting inside the history buffer */ + if (STREAMFIRST_FN(limex, q->history + q->hlength + sp - offset, + local_ep - sp, &ctx, sp, + &final_look) == MO_HALT_MATCHING) { + DEBUG_PRINTF("final_look:%llu sp:%llu end_abs:%llu " + "offset:%llu\n", final_look, sp, end_abs, offset); + assert(q->cur); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = sp + final_look - offset; + STORE_STATE(q->state, LOAD_STATE(&ctx.s)); + return MO_MATCHES_PENDING; + } + + sp = local_ep; + } if (sp >= ep) { goto scan_done; @@ -868,6 +888,21 @@ char JOIN(LIMEX_API_ROOT, _inAccept)(const struct NFA *nfa, offset, report); } +char JOIN(LIMEX_API_ROOT, _inAnyAccept)(const struct NFA *nfa, struct mq *q) { + assert(nfa && q); + assert(q->state && q->streamState); + + const IMPL_NFA_T *limex = getImplNfa(nfa); + union RepeatControl *repeat_ctrl = + getRepeatControlBase(q->state, sizeof(STATE_T)); + char *repeat_state = q->streamState + limex->stateSize; + STATE_T state = LOAD_STATE(q->state); + u64a offset = q->offset + q_last_loc(q) + 1; + + return JOIN(limexInAnyAccept, SIZE)(limex, state, repeat_ctrl, repeat_state, + offset); +} + enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)( const struct NFA *nfa, struct mq *q, diff --git a/src/nfa/mcclellan.c b/src/nfa/mcclellan.c index 314e88e7..ac26c6a1 100644 --- a/src/nfa/mcclellan.c +++ b/src/nfa/mcclellan.c @@ -850,7 +850,7 @@ char nfaExecMcClellan8_reportCurrent(const struct NFA *n, struct mq *q) { } char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q) { - const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n); + const struct mcclellan *m = getImplNfa(n); NfaCallback cb = q->cb; void *ctxt = q->context; u16 s = *(u16 *)q->state; @@ -905,7 +905,7 @@ char nfaExecMcClellan8_inAccept(const struct NFA *n, ReportID report, struct mq *q) { assert(n && q); - const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n); + const struct mcclellan *m = getImplNfa(n); u8 s = *(u8 *)q->state; DEBUG_PRINTF("checking accepts for %hhu\n", s); if (s < m->accept_limit_8) { @@ -915,25 +915,45 @@ char nfaExecMcClellan8_inAccept(const struct NFA *n, ReportID report, return mcclellanHasAccept(m, get_aux(m, s), report); } +char nfaExecMcClellan8_inAnyAccept(const struct NFA *n, struct mq *q) { + assert(n && q); + + const struct mcclellan *m = getImplNfa(n); + u8 s = *(u8 *)q->state; + DEBUG_PRINTF("checking accepts for %hhu\n", s); + assert(s < m->accept_limit_8 || get_aux(m, s)->accept); + + return s >= m->accept_limit_8; +} char nfaExecMcClellan16_inAccept(const struct NFA *n, ReportID report, struct mq *q) { assert(n && q); - const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n); + const struct mcclellan *m = getImplNfa(n); u16 s = *(u16 *)q->state; DEBUG_PRINTF("checking accepts for %hu\n", s); return mcclellanHasAccept(m, get_aux(m, s), report); } +char nfaExecMcClellan16_inAnyAccept(const struct NFA *n, struct mq *q) { + assert(n && q); + + const struct mcclellan *m = getImplNfa(n); + u16 s = *(u16 *)q->state; + DEBUG_PRINTF("checking accepts for %hu\n", s); + + return !!get_aux(m, s)->accept; +} + char nfaExecMcClellan8_Q2(const struct NFA *n, struct mq *q, s64a end) { u64a offset = q->offset; const u8 *buffer = q->buffer; NfaCallback cb = q->cb; void *context = q->context; assert(n->type == MCCLELLAN_NFA_8); - const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n); + const struct mcclellan *m = getImplNfa(n); const u8 *hend = q->history + q->hlength; return nfaExecMcClellan8_Q2i(n, offset, buffer, hend, cb, context, q, @@ -947,7 +967,7 @@ char nfaExecMcClellan16_Q2(const struct NFA *n, struct mq *q, s64a end) { NfaCallback cb = q->cb; void *context = q->context; assert(n->type == MCCLELLAN_NFA_16); - const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n); + const struct mcclellan *m = getImplNfa(n); const u8 *hend = q->history + q->hlength; return nfaExecMcClellan16_Q2i(n, offset, buffer, hend, cb, context, q, @@ -961,7 +981,7 @@ char nfaExecMcClellan8_QR(const struct NFA *n, struct mq *q, ReportID report) { NfaCallback cb = q->cb; void *context = q->context; assert(n->type == MCCLELLAN_NFA_8); - const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n); + const struct mcclellan *m = getImplNfa(n); const u8 *hend = q->history + q->hlength; char rv = nfaExecMcClellan8_Q2i(n, offset, buffer, hend, cb, context, q, @@ -980,7 +1000,7 @@ char nfaExecMcClellan16_QR(const struct NFA *n, struct mq *q, ReportID report) { NfaCallback cb = q->cb; void *context = q->context; assert(n->type == MCCLELLAN_NFA_16); - const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n); + const struct mcclellan *m = getImplNfa(n); const u8 *hend = q->history + q->hlength; char rv = nfaExecMcClellan16_Q2i(n, offset, buffer, hend, cb, context, q, @@ -996,7 +1016,7 @@ char nfaExecMcClellan16_QR(const struct NFA *n, struct mq *q, ReportID report) { char nfaExecMcClellan8_initCompressedState(const struct NFA *nfa, u64a offset, void *state, UNUSED u8 key) { - const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa); + const struct mcclellan *m = getImplNfa(nfa); u8 s = offset ? m->start_floating : m->start_anchored; if (s) { *(u8 *)state = s; @@ -1007,7 +1027,7 @@ char nfaExecMcClellan8_initCompressedState(const struct NFA *nfa, u64a offset, char nfaExecMcClellan16_initCompressedState(const struct NFA *nfa, u64a offset, void *state, UNUSED u8 key) { - const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa); + const struct mcclellan *m = getImplNfa(nfa); u16 s = offset ? m->start_floating : m->start_anchored; if (s) { unaligned_store_u16(state, s); @@ -1019,7 +1039,7 @@ char nfaExecMcClellan16_initCompressedState(const struct NFA *nfa, u64a offset, void nfaExecMcClellan8_SimpStream(const struct NFA *nfa, char *state, const u8 *buf, char top, size_t start_off, size_t len, NfaCallback cb, void *ctxt) { - const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa); + const struct mcclellan *m = getImplNfa(nfa); u8 s = top ? m->start_anchored : *(u8 *)state; @@ -1037,7 +1057,7 @@ void nfaExecMcClellan8_SimpStream(const struct NFA *nfa, char *state, void nfaExecMcClellan16_SimpStream(const struct NFA *nfa, char *state, const u8 *buf, char top, size_t start_off, size_t len, NfaCallback cb, void *ctxt) { - const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa); + const struct mcclellan *m = getImplNfa(nfa); u16 s = top ? m->start_anchored : unaligned_load_u16(state); diff --git a/src/nfa/mcclellan.h b/src/nfa/mcclellan.h index 6b4ec2d5..677265f5 100644 --- a/src/nfa/mcclellan.h +++ b/src/nfa/mcclellan.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -47,6 +47,7 @@ char nfaExecMcClellan8_QR(const struct NFA *n, struct mq *q, ReportID report); char nfaExecMcClellan8_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecMcClellan8_inAccept(const struct NFA *n, ReportID report, struct mq *q); +char nfaExecMcClellan8_inAnyAccept(const struct NFA *n, struct mq *q); char nfaExecMcClellan8_queueInitState(const struct NFA *n, struct mq *q); char nfaExecMcClellan8_initCompressedState(const struct NFA *n, u64a offset, void *state, u8 key); @@ -70,6 +71,7 @@ char nfaExecMcClellan16_QR(const struct NFA *n, struct mq *q, ReportID report); char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecMcClellan16_inAccept(const struct NFA *n, ReportID report, struct mq *q); +char nfaExecMcClellan16_inAnyAccept(const struct NFA *n, struct mq *q); char nfaExecMcClellan16_queueInitState(const struct NFA *n, struct mq *q); char nfaExecMcClellan16_initCompressedState(const struct NFA *n, u64a offset, void *state, u8 key); diff --git a/src/nfa/mcclellancompile_util.cpp b/src/nfa/mcclellancompile_util.cpp index 234574d8..2f1ffa02 100644 --- a/src/nfa/mcclellancompile_util.cpp +++ b/src/nfa/mcclellancompile_util.cpp @@ -395,4 +395,36 @@ dstate_id_t get_sds_or_proxy(const raw_dfa &raw) { } } +static +bool can_die_early(const raw_dfa &raw, dstate_id_t s, + map &visited, u32 age_limit) { + if (contains(visited, s) && visited[s] >= age_limit) { + /* we have already visited (or are in the process of visiting) here with + * a looser limit. */ + return false; + } + visited[s] = age_limit; + + if (s == DEAD_STATE) { + return true; + } + + if (age_limit == 0) { + return false; + } + + for (const auto &next : raw.states[s].next) { + if (can_die_early(raw, next, visited, age_limit - 1)) { + return true; + } + } + + return false; +} + +bool can_die_early(const raw_dfa &raw, u32 age_limit) { + map visited; + return can_die_early(raw, raw.start_anchored, visited, age_limit); +} + } // namespace ue2 diff --git a/src/nfa/mcclellancompile_util.h b/src/nfa/mcclellancompile_util.h index 7b6c033a..3d3ee2e7 100644 --- a/src/nfa/mcclellancompile_util.h +++ b/src/nfa/mcclellancompile_util.h @@ -57,6 +57,8 @@ size_t hash_dfa(const raw_dfa &rdfa); dstate_id_t get_sds_or_proxy(const raw_dfa &raw); +bool can_die_early(const raw_dfa &raw, u32 age_limit); + } // namespace ue2 #endif diff --git a/src/nfa/mpv.h b/src/nfa/mpv.h index dc5dad6f..a3f90719 100644 --- a/src/nfa/mpv.h +++ b/src/nfa/mpv.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,7 +36,6 @@ struct NFA; char nfaExecMpv0_Q(const struct NFA *n, struct mq *q, s64a end); char nfaExecMpv0_reportCurrent(const struct NFA *n, struct mq *q); -char nfaExecMpv0_inAccept(const struct NFA *n, ReportID report, struct mq *q); char nfaExecMpv0_queueInitState(const struct NFA *n, struct mq *q); char nfaExecMpv0_initCompressedState(const struct NFA *n, u64a offset, void *state, u8 key); @@ -47,6 +46,7 @@ char nfaExecMpv0_expandState(const struct NFA *nfa, void *dest, const void *src, #define nfaExecMpv0_testEOD NFA_API_NO_IMPL #define nfaExecMpv0_inAccept NFA_API_NO_IMPL +#define nfaExecMpv0_inAnyAccept NFA_API_NO_IMPL #define nfaExecMpv0_QR NFA_API_NO_IMPL #define nfaExecMpv0_Q2 NFA_API_NO_IMPL /* for non-chained suffixes. */ #define nfaExecMpv0_B_Reverse NFA_API_NO_IMPL diff --git a/src/nfa/nfa_api.h b/src/nfa/nfa_api.h index 84f5c4a0..dad3894a 100644 --- a/src/nfa/nfa_api.h +++ b/src/nfa/nfa_api.h @@ -175,10 +175,16 @@ char nfaReportCurrentMatches(const struct NFA *nfa, struct mq *q); */ char nfaInAcceptState(const struct NFA *nfa, ReportID report, struct mq *q); +/** + * Returns non-zero if the NFA is in any accept state regardless of report + * ID. + */ +char nfaInAnyAcceptState(const struct NFA *nfa, struct mq *q); + /** * Process the queued commands on the given NFA up to end or the first match. * - * Note: This version is meant for rose prefix NFAs: + * Note: This version is meant for rose prefix/infix NFAs: * - never uses a callback * - loading of state at a point in history is not special cased * @@ -187,9 +193,9 @@ char nfaInAcceptState(const struct NFA *nfa, ReportID report, struct mq *q); * end with some variant of end. The location field of the events must * be monotonically increasing. If not all the data was processed during * the call, the queue is updated to reflect the remaining work. - * @param report we are interested in, if set at the end of the scan returns - * @ref MO_MATCHES_PENDING. If no report is desired, MO_INVALID_IDX should - * be passed in. + * @param report we are interested in. If the given report will be raised at + * the end location, the function returns @ref MO_MATCHES_PENDING. If no + * match information is desired, MO_INVALID_IDX should be passed in. * @return @ref MO_ALIVE if the nfa is still active with no matches pending, * and @ref MO_MATCHES_PENDING if there are matches pending, 0 if not * alive diff --git a/src/nfa/nfa_api_dispatch.c b/src/nfa/nfa_api_dispatch.c index 95b1898e..9591cad5 100644 --- a/src/nfa/nfa_api_dispatch.c +++ b/src/nfa/nfa_api_dispatch.c @@ -228,7 +228,6 @@ char nfaQueueExecToMatch(const struct NFA *nfa, struct mq *q, s64a end) { assert(q); assert(end >= 0); - assert(q->context); assert(q->state); assert(q->cur < q->end); assert(q->end <= MAX_MQE_LEN); @@ -285,6 +284,11 @@ char nfaInAcceptState(const struct NFA *nfa, ReportID report, struct mq *q) { return 0; } +char nfaInAnyAcceptState(const struct NFA *nfa, struct mq *q) { + DISPATCH_BY_NFA_TYPE(_inAnyAccept(nfa, q)); + return 0; +} + char nfaQueueExecRose(const struct NFA *nfa, struct mq *q, ReportID r) { DEBUG_PRINTF("nfa=%p\n", nfa); #ifdef DEBUG diff --git a/src/nfa/nfa_kind.h b/src/nfa/nfa_kind.h index 46d0bc4c..adc7045f 100644 --- a/src/nfa/nfa_kind.h +++ b/src/nfa/nfa_kind.h @@ -47,6 +47,7 @@ enum nfa_kind { NFA_OUTFIX, //!< "outfix" nfa not triggered by external events NFA_OUTFIX_RAW, //!< "outfix", but with unmanaged reports NFA_REV_PREFIX, //! reverse running prefixes (for som) + NFA_EAGER_PREFIX, //!< rose prefix that is also run up to matches }; /** \brief True if this kind of engine is triggered by a top event. */ @@ -63,8 +64,10 @@ bool is_triggered(enum nfa_kind k) { } /** - * \brief True if this kind of engine generates callback events when it - * enters accept states. + * \brief True if this kind of engine generates actively checks for accept + * states either to halt matching or to raise a callback. Only these engines + * generated with this property should call nfaQueueExec() or + * nfaQueueExecToMatch(). */ inline bool generates_callbacks(enum nfa_kind k) { @@ -73,6 +76,24 @@ bool generates_callbacks(enum nfa_kind k) { case NFA_OUTFIX: case NFA_OUTFIX_RAW: case NFA_REV_PREFIX: + case NFA_EAGER_PREFIX: + return true; + default: + return false; + } +} + +/** + * \brief True if this kind of engine has its state inspected to see if it is in + * an accept state. Engines generated with this property will commonly call + * nfaQueueExecRose(), nfaInAcceptState(), and nfaInAnyAcceptState(). + */ +inline +bool inspects_states_for_accepts(enum nfa_kind k) { + switch (k) { + case NFA_PREFIX: + case NFA_INFIX: + case NFA_EAGER_PREFIX: return true; default: return false; diff --git a/src/nfagraph/ng_execute.cpp b/src/nfagraph/ng_execute.cpp index 92bef737..46307cd5 100644 --- a/src/nfagraph/ng_execute.cpp +++ b/src/nfagraph/ng_execute.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -324,4 +324,49 @@ flat_set execute_graph(const NGHolder &running_g, initial_states); } +static +bool can_die_early(const NGHolder &g, const vector &info, + const dynamic_bitset<> &s, + map, u32> &visited, u32 age_limit) { + if (contains(visited, s) && visited[s] >= age_limit) { + /* we have already (or are in the process) of visiting here with a + * looser limit. */ + return false; + } + visited[s] = age_limit; + + if (s.none()) { + DEBUG_PRINTF("dead\n"); + return true; + } + + if (age_limit == 0) { + return false; + } + + dynamic_bitset<> all_succ(s.size()); + step(g, info, s, &all_succ); + all_succ.reset(NODE_START_DOTSTAR); + + for (u32 i = 0; i < N_CHARS; i++) { + dynamic_bitset<> next = all_succ; + filter_by_reach(info, &next, CharReach(i)); + if (can_die_early(g, info, next, visited, age_limit - 1)) { + return true; + } + } + + return false; +} + +bool can_die_early(const NGHolder &g, u32 age_limit) { + if (proper_out_degree(g.startDs, g)) { + return false; + } + const vector &info = makeInfoTable(g); + map, u32> visited; + return can_die_early(g, info, makeStateBitset(g, {g.start}), visited, + age_limit); +} + } // namespace ue2 diff --git a/src/nfagraph/ng_execute.h b/src/nfagraph/ng_execute.h index e2c7c72d..bdcfecfd 100644 --- a/src/nfagraph/ng_execute.h +++ b/src/nfagraph/ng_execute.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -64,6 +64,9 @@ flat_set execute_graph(const NGHolder &g, const NGHolder &input_dag, const flat_set &input_start_states, const flat_set &initial); +/* returns true if it is possible for the nfa to die within age_limit bytes */ +bool can_die_early(const NGHolder &g, u32 age_limit); + } // namespace ue2 #endif diff --git a/src/nfagraph/ng_holder.h b/src/nfagraph/ng_holder.h index 3243f665..07f21d0f 100644 --- a/src/nfagraph/ng_holder.h +++ b/src/nfagraph/ng_holder.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -315,15 +315,26 @@ void remove_edges(const Container &c, NGHolder &h, bool renumber = true) { remove_edges(c.begin(), c.end(), h, renumber); } -static UNUSED +inline bool is_triggered(const NGHolder &g) { return is_triggered(g.kind); } -static UNUSED +inline bool generates_callbacks(const NGHolder &g) { return generates_callbacks(g.kind); } + +inline +bool has_managed_reports(const NGHolder &g) { + return has_managed_reports(g.kind); +} + +inline +bool inspects_states_for_accepts(const NGHolder &g) { + return inspects_states_for_accepts(g.kind); +} + } // namespace ue2 #endif diff --git a/src/nfagraph/ng_limex.cpp b/src/nfagraph/ng_limex.cpp index 713fe370..af7779ba 100644 --- a/src/nfagraph/ng_limex.cpp +++ b/src/nfagraph/ng_limex.cpp @@ -373,7 +373,7 @@ constructNFA(const NGHolder &h_in, const ReportManager *rm, const map>> &triggers, bool compress_state, bool do_accel, bool impl_test_only, u32 hint, const CompileContext &cc) { - if (!generates_callbacks(h_in)) { + if (!has_managed_reports(h_in)) { rm = nullptr; } else { assert(rm); @@ -413,7 +413,7 @@ constructNFA(const NGHolder &h_in, const ReportManager *rm, set zombies = findZombies(*h, br_cyclic, state_ids, cc); - if (generates_callbacks(*h)) { + if (has_managed_reports(*h)) { assert(rm); remapReportsToPrograms(*h, *rm); } @@ -508,7 +508,7 @@ u32 isImplementableNFA(const NGHolder &g, const ReportManager *rm, return true; } - if (!generates_callbacks(g)) { + if (!has_managed_reports(g)) { rm = nullptr; } else { assert(rm); @@ -547,7 +547,7 @@ void reduceImplementableGraph(NGHolder &g, som_type som, const ReportManager *rm removeRedundancy(g, som); - if (rm && generates_callbacks(g)) { + if (rm && has_managed_reports(g)) { pruneHighlanderDominated(g, *rm); } @@ -560,7 +560,7 @@ void reduceImplementableGraph(NGHolder &g, som_type som, const ReportManager *rm u32 countAccelStates(const NGHolder &g, const ReportManager *rm, const CompileContext &cc) { - if (!generates_callbacks(g)) { + if (!has_managed_reports(g)) { rm = nullptr; } else { assert(rm); diff --git a/src/nfagraph/ng_mcclellan.cpp b/src/nfagraph/ng_mcclellan.cpp index b1c6ff96..024cf2c1 100644 --- a/src/nfagraph/ng_mcclellan.cpp +++ b/src/nfagraph/ng_mcclellan.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -531,9 +531,9 @@ unique_ptr buildMcClellan(const NGHolder &graph, DEBUG_PRINTF("attempting to build ?%d? mcclellan\n", (int)graph.kind); assert(allMatchStatesHaveReports(graph)); - bool prunable = grey.highlanderPruneDFA && generates_callbacks(graph); - assert(rm || !generates_callbacks(graph)); - if (!generates_callbacks(graph)) { + bool prunable = grey.highlanderPruneDFA && has_managed_reports(graph); + assert(rm || !has_managed_reports(graph)); + if (!has_managed_reports(graph)) { rm = nullptr; } diff --git a/src/nfagraph/ng_split.cpp b/src/nfagraph/ng_split.cpp index 42157e1e..75150136 100644 --- a/src/nfagraph/ng_split.cpp +++ b/src/nfagraph/ng_split.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -112,6 +112,12 @@ void splitLHS(const NGHolder &base, const vector &pivots, case NFA_SUFFIX: lhs->kind = NFA_INFIX; break; + case NFA_EAGER_PREFIX: + /* Current code should not be assigning eager until well after all the + * splitting is done. */ + assert(0); + lhs->kind = NFA_EAGER_PREFIX; + break; case NFA_REV_PREFIX: case NFA_OUTFIX_RAW: assert(0); @@ -154,6 +160,12 @@ void splitRHS(const NGHolder &base, const vector &pivots, case NFA_OUTFIX: rhs->kind = NFA_SUFFIX; break; + case NFA_EAGER_PREFIX: + /* Current code should not be assigning eager until well after all the + * splitting is done. */ + assert(0); + rhs->kind = NFA_INFIX; + break; case NFA_REV_PREFIX: case NFA_OUTFIX_RAW: assert(0); diff --git a/src/rose/block.c b/src/rose/block.c index 55323c2e..a40d229b 100644 --- a/src/rose/block.c +++ b/src/rose/block.c @@ -266,6 +266,86 @@ int roseBlockFloating(const struct RoseEngine *t, struct hs_scratch *scratch) { return can_stop_matching(scratch); } +static rose_inline +void runEagerPrefixesBlock(const struct RoseEngine *t, + struct hs_scratch *scratch) { + if (!t->eagerIterOffset) { + return; + } + + char *state = scratch->core_info.state; + u8 *ara = getActiveLeftArray(t, state); /* indexed by offsets into + * left_table */ + const u32 arCount = t->activeLeftCount; + const u32 qCount = t->queueCount; + const struct LeftNfaInfo *left_table = getLeftTable(t); + const struct mmbit_sparse_iter *it = getByOffset(t, t->eagerIterOffset); + + struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; + + u32 idx = 0; + u32 ri = mmbit_sparse_iter_begin(ara, arCount, &idx, it, si_state); + for (; ri != MMB_INVALID; + ri = mmbit_sparse_iter_next(ara, arCount, ri, &idx, it, si_state)) { + const struct LeftNfaInfo *left = left_table + ri; + u32 qi = ri + t->leftfixBeginQueue; + DEBUG_PRINTF("leftfix %u/%u, maxLag=%u\n", ri, arCount, left->maxLag); + + assert(!fatbit_isset(scratch->aqa, qCount, qi)); + assert(left->eager); + assert(!left->infix); + + struct mq *q = scratch->queues + qi; + const struct NFA *nfa = getNfaByQueue(t, qi); + + if (scratch->core_info.len < nfa->minWidth) { + /* we know that there is not enough data for this to ever match, so + * we can immediately squash/ */ + mmbit_unset(ara, arCount, ri); + scratch->tctxt.groups &= left->squash_mask; + } + + s64a loc = MIN(scratch->core_info.len, EAGER_STOP_OFFSET); + + fatbit_set(scratch->aqa, qCount, qi); + initRoseQueue(t, qi, left, scratch); + + pushQueueAt(q, 0, MQE_START, 0); + pushQueueAt(q, 1, MQE_TOP, 0); + pushQueueAt(q, 2, MQE_END, loc); + nfaQueueInitState(nfa, q); + + char alive = nfaQueueExecToMatch(q->nfa, q, loc); + + if (!alive) { + DEBUG_PRINTF("queue %u dead, squashing\n", qi); + mmbit_unset(ara, arCount, ri); + fatbit_unset(scratch->aqa, qCount, qi); + scratch->tctxt.groups &= left->squash_mask; + } else if (q->cur == q->end) { + assert(alive != MO_MATCHES_PENDING); + if (loc == (s64a)scratch->core_info.len) { + /* We know that the prefix does not match in the block so we + * can squash the groups anyway even though it did not die */ + /* TODO: if we knew the minimum lag the leftfix is checked at we + * could make this check tighter */ + DEBUG_PRINTF("queue %u has no match in block, squashing\n", qi); + mmbit_unset(ara, arCount, ri); + fatbit_unset(scratch->aqa, qCount, qi); + scratch->tctxt.groups &= left->squash_mask; + } else { + DEBUG_PRINTF("queue %u finished, nfa lives\n", qi); + q->cur = q->end = 0; + pushQueueAt(q, 0, MQE_START, loc); + } + } else { + assert(alive == MO_MATCHES_PENDING); + DEBUG_PRINTF("queue %u unfinished, nfa lives\n", qi); + q->end--; /* remove end item */ + } + } +} + void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch) { assert(t); assert(scratch); @@ -314,6 +394,8 @@ void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch) { hwlmExec(sbtable, scratch->core_info.buf, sblen, 0, roseCallback, scratch, tctxt->groups); } else { + runEagerPrefixesBlock(t, scratch); + if (roseBlockAnchored(t, scratch)) { return; } diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index 860f7599..3794ac3f 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -424,7 +424,7 @@ char roseTestLeftfix(const struct RoseEngine *t, struct hs_scratch *scratch, } s64a loc = (s64a)end - ci->buf_offset - leftfixLag; - assert(loc >= q_cur_loc(q)); + assert(loc >= q_cur_loc(q) || left->eager); assert(leftfixReport != MO_INVALID_IDX); if (!is_infix && left->transient) { @@ -471,7 +471,13 @@ char roseTestLeftfix(const struct RoseEngine *t, struct hs_scratch *scratch, DEBUG_PRINTF("checking for report %u\n", leftfixReport); DEBUG_PRINTF("leftfix done %hhd\n", (signed char)rv); return rv == MO_MATCHES_PENDING; + } else if (q_cur_loc(q) > loc) { + /* an eager leftfix may have already progressed past loc if there is no + * match at loc. */ + assert(left->eager); + return 0; } else { + assert(q_cur_loc(q) == loc); DEBUG_PRINTF("checking for report %u\n", leftfixReport); char rv = nfaInAcceptState(q->nfa, leftfixReport, q); DEBUG_PRINTF("leftfix done %hhd\n", (signed char)rv); diff --git a/src/rose/rose_build_add.cpp b/src/rose/rose_build_add.cpp index ae155361..fe2c259e 100644 --- a/src/rose/rose_build_add.cpp +++ b/src/rose/rose_build_add.cpp @@ -1038,6 +1038,7 @@ bool canImplementGraph(RoseBuildImpl *tbi, const RoseInGraph &in, NGHolder &h, return false; } break; + case NFA_EAGER_PREFIX: case NFA_REV_PREFIX: case NFA_OUTFIX_RAW: DEBUG_PRINTF("kind %u\n", (u32)h.kind); diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 3f36a05e..3f56b101 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -50,6 +50,7 @@ #include "nfa/nfa_build_util.h" #include "nfa/nfa_internal.h" #include "nfa/shufticompile.h" +#include "nfagraph/ng_execute.h" #include "nfagraph/ng_holder.h" #include "nfagraph/ng_lbr.h" #include "nfagraph/ng_limex.h" @@ -1046,8 +1047,9 @@ makeLeftNfa(const RoseBuildImpl &tbi, left_id &left, // streaming mode. const bool compress_state = !is_transient; - assert(!left.graph() - || left.graph()->kind == (is_prefix ? NFA_PREFIX : NFA_INFIX)); + assert(is_prefix || !left.graph() || left.graph()->kind == NFA_INFIX); + assert(!is_prefix || !left.graph() || left.graph()->kind == NFA_PREFIX + || left.graph()->kind == NFA_EAGER_PREFIX); // Holder should be implementable as an NFA at the very least. if (!left.dfa() && left.graph()) { @@ -1089,7 +1091,9 @@ makeLeftNfa(const RoseBuildImpl &tbi, left_id &left, if (!n && left.graph()) { map>> triggers; - findTriggerSequences(tbi, infixTriggers.at(left), &triggers); + if (left.graph()->kind == NFA_INFIX) { + findTriggerSequences(tbi, infixTriggers.at(left), &triggers); + } n = constructNFA(*left.graph(), nullptr, fixed_depth_tops, triggers, compress_state, cc); } @@ -1125,17 +1129,309 @@ void setLeftNfaProperties(NFA &n, const left_id &left) { // graph. } +static +void appendTailToHolder(NGHolder &h, const flat_set &reports, + const vector &starts, + const vector &tail) { + assert(!tail.empty()); + NFAVertex curr = add_vertex(h); + for (NFAVertex v : starts) { + assert(!edge(v, h.acceptEod, h).second); + assert(h[v].reports == reports); + h[v].reports.clear(); + remove_edge(v, h.accept, h); + add_edge(v, curr, h); + } + auto it = tail.begin(); + h[curr].char_reach = *it; + ++it; + while (it != tail.end()) { + NFAVertex old = curr; + curr = add_vertex(h); + add_edge(old, curr, h); + assert(!it->none()); + h[curr].char_reach = *it; + ++it; + } + + h[curr].reports = reports; + add_edge(curr, h.accept, h); +} + +static +void appendTailToHolder(NGHolder &h, const vector &tail) { + assert(in_degree(h.acceptEod, h) == 1); + assert(!tail.empty()); + + map, vector > reporters; + for (auto v : inv_adjacent_vertices_range(h.accept, h)) { + reporters[h[v].reports].push_back(v); + } + + for (const auto &e : reporters) { + appendTailToHolder(h, e.first, e.second, tail); + } + + h.renumberEdges(); +} + +static +u32 decreaseLag(const RoseBuildImpl &build, NGHolder &h, + const vector &succs) { + const RoseGraph &rg = build.g; + static const size_t MAX_RESTORE_LEN = 5; + + vector restored(MAX_RESTORE_LEN); + for (RoseVertex v : succs) { + u32 lag = rg[v].left.lag; + for (u32 lit_id : rg[v].literals) { + u32 delay = build.literals.right.at(lit_id).delay; + const ue2_literal &literal = build.literals.right.at(lit_id).s; + assert(lag <= literal.length() + delay); + size_t base = literal.length() + delay - lag; + if (base >= literal.length()) { + return 0; + } + size_t len = literal.length() - base; + len = MIN(len, restored.size()); + restored.resize(len); + auto lit_it = literal.begin() + base; + for (u32 i = 0; i < len; i++) { + assert(lit_it != literal.end()); + restored[i] |= *lit_it; + ++lit_it; + } + } + } + + assert(!restored.empty()); + + appendTailToHolder(h, restored); + + return restored.size(); +} + +#define EAGER_DIE_BEFORE_LIMIT 10 + +struct eager_info { + shared_ptr new_graph; + u32 lag_adjust = 0; +}; + +static +bool checkSuitableForEager(bool is_prefix, const left_id &left, + const RoseBuildImpl &build, + const vector &succs, + rose_group squash_mask, rose_group initial_groups, + eager_info &ei, const CompileContext &cc) { + DEBUG_PRINTF("checking prefix --> %016llx...\n", squash_mask); + + const RoseGraph &rg = build.g; + + if (!is_prefix) { + DEBUG_PRINTF("not prefix\n"); + return false; /* only prefixes (for now...) */ + } + + if ((initial_groups & squash_mask) == initial_groups) { + DEBUG_PRINTF("no squash -- useless\n"); + return false; + } + + for (RoseVertex s : succs) { + if (build.isInETable(s) + || contains(rg[s].literals, build.eod_event_literal_id)) { + return false; /* Ignore EOD related prefixes */ + } + } + + if (left.dfa()) { + const raw_dfa &dfa = *left.dfa(); + if (dfa.start_floating != DEAD_STATE) { + return false; /* not purely anchored */ + } + if (!dfa.states[dfa.start_anchored].reports.empty()) { + return false; /* vacuous (todo: handle?) */ + } + + if (!can_die_early(dfa, EAGER_DIE_BEFORE_LIMIT)) { + return false; + } + ei.new_graph = rg[succs[0]].left.graph; + } else if (left.graph()) { + const NGHolder &g = *left.graph(); + if (proper_out_degree(g.startDs, g)) { + return false; /* not purely anchored */ + } + if (is_match_vertex(g.start, g)) { + return false; /* vacuous (todo: handle?) */ + } + + ei.new_graph = cloneHolder(*left.graph()); + auto gg = ei.new_graph; + gg->kind = NFA_EAGER_PREFIX; + + ei.lag_adjust = decreaseLag(build, *gg, succs); + + if (!can_die_early(*gg, EAGER_DIE_BEFORE_LIMIT)) { + DEBUG_PRINTF("not eager as stuck alive\n"); + return false; + } + + /* We need to ensure that adding in the literals does not cause us to no + * longer be able to build an nfa. */ + bool ok = isImplementableNFA(*gg, nullptr, cc); + if (!ok) { + return false; + } + } else { + DEBUG_PRINTF("unable to determine if good for eager running\n"); + return false; + } + + DEBUG_PRINTF("eager prefix\n"); + return true; +} + +static +left_id updateLeftfixWithEager(RoseGraph &g, const eager_info &ei, + const vector &succs) { + u32 lag_adjust = ei.lag_adjust; + auto gg = ei.new_graph; + for (RoseVertex v : succs) { + g[v].left.graph = gg; + assert(g[v].left.lag >= lag_adjust); + g[v].left.lag -= lag_adjust; + DEBUG_PRINTF("added %u literal chars back, new lag %u\n", lag_adjust, + g[v].left.lag); + } + left_id leftfix = g[succs[0]].left; + + if (leftfix.graph()) { + assert(leftfix.graph()->kind == NFA_PREFIX + || leftfix.graph()->kind == NFA_EAGER_PREFIX); + leftfix.graph()->kind = NFA_EAGER_PREFIX; + } + if (leftfix.dfa()) { + assert(leftfix.dfa()->kind == NFA_PREFIX); + leftfix.dfa()->kind = NFA_EAGER_PREFIX; + } + + return leftfix; +} + +static +bool buildLeftfix(RoseBuildImpl &build, build_context &bc, bool prefix, u32 qi, + const map > &infixTriggers, + set *no_retrigger_queues, set *eager_queues, + const map &eager, + const vector &succs, left_id leftfix) { + RoseGraph &g = build.g; + const CompileContext &cc = build.cc; + const ReportManager &rm = build.rm; + + bool is_transient = contains(build.transient, leftfix); + rose_group squash_mask = build.rose_squash_masks.at(leftfix); + + DEBUG_PRINTF("making %sleftfix\n", is_transient ? "transient " : ""); + + if (contains(eager, leftfix)) { + eager_queues->insert(qi); + leftfix = updateLeftfixWithEager(g, eager.at(leftfix), succs); + } + + aligned_unique_ptr nfa; + // Need to build NFA, which is either predestined to be a Haig (in SOM mode) + // or could be all manner of things. + if (leftfix.haig()) { + nfa = goughCompile(*leftfix.haig(), build.ssm.somPrecision(), cc, rm); + } else { + nfa = makeLeftNfa(build, leftfix, prefix, is_transient, infixTriggers, + cc); + } + + if (!nfa) { + assert(!"failed to build leftfix"); + return false; + } + + setLeftNfaProperties(*nfa, leftfix); + + build.leftfix_queue_map.emplace(leftfix, qi); + nfa->queueIndex = qi; + + if (!prefix && !leftfix.haig() && leftfix.graph() + && nfaStuckOn(*leftfix.graph())) { + DEBUG_PRINTF("%u sticks on\n", qi); + no_retrigger_queues->insert(qi); + } + + DEBUG_PRINTF("built leftfix, qi=%u\n", qi); + add_nfa_to_blob(bc, *nfa); + + // Leftfixes can have stop alphabets. + vector stop(N_CHARS, 0); + /* haigs track som information - need more care */ + som_type som = leftfix.haig() ? SOM_LEFT : SOM_NONE; + if (leftfix.graph()) { + stop = findLeftOffsetStopAlphabet(*leftfix.graph(), som); + } else if (leftfix.castle()) { + stop = findLeftOffsetStopAlphabet(*leftfix.castle(), som); + } + + // Infix NFAs can have bounds on their queue lengths. + u32 max_queuelen = UINT32_MAX; + if (!prefix) { + set lits; + for (RoseVertex v : succs) { + for (auto u : inv_adjacent_vertices_range(v, g)) { + for (u32 lit_id : g[u].literals) { + lits.insert(build.literals.right.at(lit_id).s); + } + } + } + DEBUG_PRINTF("%zu literals\n", lits.size()); + max_queuelen = findMaxInfixMatches(leftfix, lits); + if (max_queuelen < UINT32_MAX) { + max_queuelen++; + } + } + + u32 max_width; + if (is_transient) { + depth d = findMaxWidth(leftfix); + assert(d.is_finite()); + max_width = d; + } else { + max_width = 0; + } + + u8 cm_count = 0; + CharReach cm_cr; + if (cc.grey.allowCountingMiracles) { + findCountingMiracleInfo(leftfix, stop, &cm_count, &cm_cr); + } + + for (RoseVertex v : succs) { + bc.leftfix_info.emplace(v, left_build_info(qi, g[v].left.lag, max_width, + squash_mask, stop, + max_queuelen, cm_count, + cm_cr)); + } + + return true; +} + static bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc, QueueIndexFactory &qif, set *no_retrigger_queues, - bool do_prefix) { - const RoseGraph &g = tbi.g; + set *eager_queues, bool do_prefix) { + RoseGraph &g = tbi.g; const CompileContext &cc = tbi.cc; - const ReportManager &rm = tbi.rm; - - ue2::unordered_map seen; // already built queue indices map > infixTriggers; + vector order; + unordered_map > succs; findInfixTriggers(tbi, &infixTriggers); for (auto v : vertices_range(g)) { @@ -1143,6 +1439,7 @@ bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc, continue; } + assert(tbi.isNonRootSuccessor(v) != tbi.isRootSuccessor(v)); bool is_prefix = tbi.isRootSuccessor(v); if (do_prefix != is_prefix) { @@ -1156,8 +1453,6 @@ bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc, // our in-edges. assert(roseHasTops(g, v)); - u32 qi; // queue index, set below. - u32 lag = g[v].left.lag; bool is_transient = contains(tbi.transient, leftfix); // Transient leftfixes can sometimes be implemented solely with @@ -1173,95 +1468,42 @@ bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc, } } - if (contains(seen, leftfix)) { - // NFA already built. - qi = seen[leftfix]; - assert(contains(bc.engineOffsets, qi)); - DEBUG_PRINTF("sharing leftfix, qi=%u\n", qi); - } else { - DEBUG_PRINTF("making %sleftfix\n", is_transient ? "transient " : ""); - - aligned_unique_ptr nfa; - - // Need to build NFA, which is either predestined to be a Haig (in - // SOM mode) or could be all manner of things. - if (leftfix.haig()) { - nfa = goughCompile(*leftfix.haig(), tbi.ssm.somPrecision(), cc, - rm); - } else { - assert(tbi.isNonRootSuccessor(v) != tbi.isRootSuccessor(v)); - nfa = makeLeftNfa(tbi, leftfix, is_prefix, is_transient, - infixTriggers, cc); - } - - if (!nfa) { - assert(!"failed to build leftfix"); - return false; - } - - setLeftNfaProperties(*nfa, leftfix); - - qi = qif.get_queue(); - tbi.leftfix_queue_map.emplace(leftfix, qi); - nfa->queueIndex = qi; - - if (!is_prefix && !leftfix.haig() && leftfix.graph() && - nfaStuckOn(*leftfix.graph())) { - DEBUG_PRINTF("%u sticks on\n", qi); - no_retrigger_queues->insert(qi); - } - - DEBUG_PRINTF("built leftfix, qi=%u\n", qi); - add_nfa_to_blob(bc, *nfa); - seen.emplace(leftfix, qi); + if (!contains(succs, leftfix)) { + order.push_back(leftfix); } + succs[leftfix].push_back(v); + } + + rose_group initial_groups = tbi.getInitialGroups(); + rose_group combined_eager_squashed_mask = ~0ULL; + + map eager; + + for (const left_id &leftfix : order) { + const auto &left_succs = succs[leftfix]; + rose_group squash_mask = tbi.rose_squash_masks.at(leftfix); + eager_info ei; - // Leftfixes can have stop alphabets. - vector stop(N_CHARS, 0); - /* haigs track som information - need more care */ - som_type som = leftfix.haig() ? SOM_LEFT : SOM_NONE; - if (leftfix.graph()) { - stop = findLeftOffsetStopAlphabet(*leftfix.graph(), som); - } else if (leftfix.castle()) { - stop = findLeftOffsetStopAlphabet(*leftfix.castle(), som); + if (checkSuitableForEager(do_prefix, leftfix, tbi, left_succs, + squash_mask, initial_groups, ei, cc)) { + eager[leftfix] = ei; + combined_eager_squashed_mask &= squash_mask; + DEBUG_PRINTF("combo %016llx...\n", combined_eager_squashed_mask); } + } - // Infix NFAs can have bounds on their queue lengths. - u32 max_queuelen = UINT32_MAX; - if (!is_prefix) { - set lits; - for (auto u : inv_adjacent_vertices_range(v, tbi.g)) { - for (u32 lit_id : tbi.g[u].literals) { - lits.insert(tbi.literals.right.at(lit_id).s); - } - } - DEBUG_PRINTF("%zu literals\n", lits.size()); - max_queuelen = findMaxInfixMatches(leftfix, lits); - if (max_queuelen < UINT32_MAX) { - max_queuelen++; - } - } + if (do_prefix && combined_eager_squashed_mask & initial_groups) { + DEBUG_PRINTF("eager groups won't squash everyone - be lazy\n"); + eager_queues->clear(); + eager.clear(); + } - u32 max_width; - if (is_transient) { - depth d = findMaxWidth(leftfix); - assert(d.is_finite()); - max_width = d; - } else { - max_width = 0; - } - - u8 cm_count = 0; - CharReach cm_cr; - if (cc.grey.allowCountingMiracles) { - findCountingMiracleInfo(leftfix, stop, &cm_count, &cm_cr); - } - - bc.leftfix_info.emplace( - v, left_build_info(qi, lag, max_width, squash_mask, stop, - max_queuelen, cm_count, cm_cr)); + for (const left_id &leftfix : order) { + buildLeftfix(tbi, bc, do_prefix, qif.get_queue(), infixTriggers, + no_retrigger_queues, eager_queues, eager, succs[leftfix], + leftfix); } return true; @@ -1613,9 +1855,11 @@ void buildCountingMiracles(RoseBuildImpl &build, build_context &bc) { } } +/* Note: buildNfas may reduce the lag for vertices that have prefixes */ static bool buildNfas(RoseBuildImpl &tbi, build_context &bc, QueueIndexFactory &qif, - set *no_retrigger_queues, u32 *leftfixBeginQueue) { + set *no_retrigger_queues, set *eager_queues, + u32 *leftfixBeginQueue) { assignSuffixQueues(tbi, bc); if (!buildSuffixes(tbi, bc, no_retrigger_queues)) { @@ -1624,11 +1868,13 @@ bool buildNfas(RoseBuildImpl &tbi, build_context &bc, QueueIndexFactory &qif, *leftfixBeginQueue = qif.allocated_count(); - if (!buildLeftfixes(tbi, bc, qif, no_retrigger_queues, true)) { + if (!buildLeftfixes(tbi, bc, qif, no_retrigger_queues, eager_queues, + true)) { return false; } - if (!buildLeftfixes(tbi, bc, qif, no_retrigger_queues, false)) { + if (!buildLeftfixes(tbi, bc, qif, no_retrigger_queues, eager_queues, + false)) { return false; } @@ -1672,10 +1918,10 @@ static void findTransientQueues(const map &leftfix_info, set *out) { DEBUG_PRINTF("curating transient queues\n"); - for (const auto &rbi : leftfix_info | map_values) { - if (rbi.transient) { - DEBUG_PRINTF("q %u is transient\n", rbi.queue); - out->insert(rbi.queue); + for (const auto &build : leftfix_info | map_values) { + if (build.transient) { + DEBUG_PRINTF("q %u is transient\n", build.queue); + out->insert(build.queue); } } } @@ -3301,9 +3547,9 @@ void assignStateIndices(const RoseBuildImpl &build, build_context &bc) { } static -bool hasUsefulStops(const left_build_info &rbi) { +bool hasUsefulStops(const left_build_info &build) { for (u32 i = 0; i < N_CHARS; i++) { - if (rbi.stopAlphabet[i]) { + if (build.stopAlphabet[i]) { return true; } } @@ -3312,6 +3558,7 @@ bool hasUsefulStops(const left_build_info &rbi) { static void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc, + const set &eager_queues, u32 leftfixBeginQueue, u32 leftfixCount, vector &leftTable, u32 *laggedRoseCount, size_t *history) { @@ -3371,6 +3618,7 @@ void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc, DEBUG_PRINTF("mw = %u\n", lbi.transient); left.transient = verify_u8(lbi.transient); left.infix = tbi.isNonRootSuccessor(v); + left.eager = contains(eager_queues, lbi.queue); // A rose has a lagIndex if it's non-transient and we are // streaming. @@ -4271,6 +4519,25 @@ void fillMatcherDistances(const RoseBuildImpl &build, RoseEngine *engine) { } } +static +u32 buildEagerQueueIter(const set &eager, u32 leftfixBeginQueue, + u32 queue_count, + build_context &bc) { + if (eager.empty()) { + return 0; + } + + vector vec; + for (u32 q : eager) { + assert(q >= leftfixBeginQueue); + vec.push_back(q - leftfixBeginQueue); + } + + vector iter; + mmbBuildSparseIterator(iter, vec, queue_count - leftfixBeginQueue); + return addIteratorToTable(bc, iter); +} + aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { DerivedBoundaryReports dboundary(boundary); @@ -4305,7 +4572,10 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { u32 outfixEndQueue = qif.allocated_count(); u32 leftfixBeginQueue = outfixEndQueue; - if (!buildNfas(*this, bc, qif, &no_retrigger_queues, + set eager_queues; + + /* Note: buildNfas may reduce the lag for vertices that have prefixes */ + if (!buildNfas(*this, bc, qif, &no_retrigger_queues, &eager_queues, &leftfixBeginQueue)) { return nullptr; } @@ -4325,7 +4595,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { u32 laggedRoseCount = 0; vector leftInfoTable; - buildLeftInfoTable(*this, bc, leftfixBeginQueue, + buildLeftInfoTable(*this, bc, eager_queues, leftfixBeginQueue, queue_count - leftfixBeginQueue, leftInfoTable, &laggedRoseCount, &historyRequired); @@ -4340,6 +4610,8 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { buildActiveLeftIter(leftInfoTable, activeLeftIter); u32 lastByteOffset = buildLastByteIter(g, bc); + u32 eagerIterOffset = buildEagerQueueIter(eager_queues, leftfixBeginQueue, + queue_count, bc); // Enforce role table resource limit. if (num_vertices(g) > cc.grey.limitRoseRoleCount) { @@ -4513,6 +4785,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->activeArrayCount = activeArrayCount; engine->activeLeftCount = activeLeftCount; engine->queueCount = queue_count; + engine->eagerIterOffset = eagerIterOffset; engine->handledKeyCount = bc.handledKeys.size(); engine->group_weak_end = group_weak_end; diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index 46d1676d..2c3f326e 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -76,6 +76,8 @@ string to_string(nfa_kind k) { return "REV_PREFIX"; case NFA_OUTFIX_RAW: return "OUTFIX_RAW"; + case NFA_EAGER_PREFIX: + return "EAGER_PREFIX"; } assert(0); return "?"; diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index 5f1871e4..71940e07 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -150,7 +150,7 @@ struct left_id { : g(in.graph.get()), c(in.castle.get()), d(in.dfa.get()), h(in.haig.get()), dfa_min_width(in.dfa_min_width), dfa_max_width(in.dfa_max_width) { - assert(!g || !generates_callbacks(*g)); + assert(!g || !has_managed_reports(*g)); } bool operator==(const left_id &b) const { bool rv = g == b.g && c == b.c && h == b.h && d == b.d; diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index 9f55dbf2..1d63c71a 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -605,6 +605,9 @@ void dumpNfaNotes(ofstream &fout, const RoseEngine *t, const NFA *n) { } const LeftNfaInfo *left = getLeftInfoByQueue(t, qindex); + if (left->eager) { + fout << "eager "; + } if (left->transient) { fout << "transient " << (u32)left->transient << " "; } @@ -1018,6 +1021,7 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, activeArrayCount); DUMP_U32(t, activeLeftCount); DUMP_U32(t, queueCount); + DUMP_U32(t, eagerIterOffset); DUMP_U32(t, handledKeyCount); DUMP_U32(t, leftOffset); DUMP_U32(t, roseCount); diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index 9dd17350..5b6a9dc6 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -144,6 +144,7 @@ struct LeftNfaInfo { u32 stopTable; // stop table index, or ROSE_OFFSET_INVALID u8 transient; /**< 0 if not transient, else max width of transient prefix */ char infix; /* TODO: make flags */ + char eager; /**< nfa should be run eagerly to first match or death */ char eod_check; /**< nfa is used by the event eod literal */ u32 countingMiracleOffset; /** if not 0, offset to RoseCountingMiracle. */ rose_group squash_mask; /* & mask applied when rose nfa dies */ @@ -366,6 +367,9 @@ struct RoseEngine { u32 activeLeftCount; //number of nfas tracked in the active rose array u32 queueCount; /**< number of nfa queues */ + u32 eagerIterOffset; /**< offset to sparse iter for eager prefixes or 0 if + * none */ + /** \brief Number of keys used by CHECK_SET_HANDLED instructions in role * programs. Used to size the handled_roles fatbit in scratch. */ u32 handledKeyCount; diff --git a/src/rose/runtime.h b/src/rose/runtime.h index f7f6641d..60c7d34b 100644 --- a/src/rose/runtime.h +++ b/src/rose/runtime.h @@ -55,6 +55,11 @@ #define rose_inline really_inline +/* Maximum offset that we will eagerly run prefixes to. Beyond this point, eager + * prefixes are always run in exactly the same way as normal prefixes. */ +#define EAGER_STOP_OFFSET 64 + + static really_inline const void *getByOffset(const struct RoseEngine *t, u32 offset) { assert(offset < t->size); diff --git a/src/rose/stream.c b/src/rose/stream.c index ffe965dd..181bfe65 100644 --- a/src/rose/stream.c +++ b/src/rose/stream.c @@ -423,6 +423,92 @@ void do_rebuild(const struct RoseEngine *t, const struct HWLM *ftable, assert(!can_stop_matching(scratch)); } +static rose_inline +void runEagerPrefixesStream(const struct RoseEngine *t, + struct hs_scratch *scratch) { + if (!t->eagerIterOffset + || scratch->core_info.buf_offset >= EAGER_STOP_OFFSET) { + return; + } + + char *state = scratch->core_info.state; + u8 *ara = getActiveLeftArray(t, state); /* indexed by offsets into + * left_table */ + const u32 arCount = t->activeLeftCount; + const u32 qCount = t->queueCount; + const struct LeftNfaInfo *left_table = getLeftTable(t); + const struct mmbit_sparse_iter *it = getByOffset(t, t->eagerIterOffset); + + struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; + + u32 idx = 0; + u32 ri = mmbit_sparse_iter_begin(ara, arCount, &idx, it, si_state); + for (; ri != MMB_INVALID; + ri = mmbit_sparse_iter_next(ara, arCount, ri, &idx, it, si_state)) { + const struct LeftNfaInfo *left = left_table + ri; + u32 qi = ri + t->leftfixBeginQueue; + DEBUG_PRINTF("leftfix %u of %u, maxLag=%u\n", ri, arCount, left->maxLag); + + assert(!fatbit_isset(scratch->aqa, qCount, qi)); + assert(left->eager); + assert(!left->infix); + + struct mq *q = scratch->queues + qi; + const struct NFA *nfa = getNfaByQueue(t, qi); + s64a loc = MIN(scratch->core_info.len, + EAGER_STOP_OFFSET - scratch->core_info.buf_offset); + + fatbit_set(scratch->aqa, qCount, qi); + initRoseQueue(t, qi, left, scratch); + + if (scratch->core_info.buf_offset) { + s64a sp = left->transient ? -(s64a)scratch->core_info.hlen + : -(s64a)loadRoseDelay(t, state, left); + pushQueueAt(q, 0, MQE_START, sp); + if (scratch->core_info.buf_offset + sp > 0) { + loadStreamState(nfa, q, sp); + /* if the leftfix fix is currently in a match state, we cannot + * advance it. */ + if (nfaInAnyAcceptState(nfa, q)) { + continue; + } + pushQueueAt(q, 1, MQE_END, loc); + } else { + pushQueueAt(q, 1, MQE_TOP, sp); + pushQueueAt(q, 2, MQE_END, loc); + nfaQueueInitState(q->nfa, q); + } + } else { + pushQueueAt(q, 0, MQE_START, 0); + pushQueueAt(q, 1, MQE_TOP, 0); + pushQueueAt(q, 2, MQE_END, loc); + nfaQueueInitState(nfa, q); + } + + char alive = nfaQueueExecToMatch(q->nfa, q, loc); + + if (!alive) { + DEBUG_PRINTF("queue %u dead, squashing\n", qi); + mmbit_unset(ara, arCount, ri); + fatbit_unset(scratch->aqa, qCount, qi); + scratch->tctxt.groups &= left->squash_mask; + } else if (q->cur == q->end) { + assert(alive != MO_MATCHES_PENDING); + /* unlike in block mode we cannot squash groups if there is no match + * in this block as we need the groups on for later stream writes */ + /* TODO: investigate possibility of a method to suppress groups for + * a single stream block. */ + DEBUG_PRINTF("queue %u finished, nfa lives\n", qi); + q->cur = q->end = 0; + pushQueueAt(q, 0, MQE_START, loc); + } else { + assert(alive == MO_MATCHES_PENDING); + DEBUG_PRINTF("queue %u unfinished, nfa lives\n", qi); + q->end--; /* remove end item */ + } + } +} + void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) { DEBUG_PRINTF("OH HAI\n"); assert(t); @@ -472,6 +558,8 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) { streamInitSufPQ(t, state, scratch); } + runEagerPrefixesStream(t, scratch); + u32 alen = t->anchoredDistance > offset ? MIN(length + offset, t->anchoredDistance) - offset : 0;