From 8a0e4f82498f4c9adecdabda8a0719655120d3ff Mon Sep 17 00:00:00 2001 From: "Wang, Xiang W" Date: Wed, 26 Sep 2018 06:52:40 -0400 Subject: [PATCH 01/21] Use std::distance explicitly to avoid ambiguity with boost --- src/nfa/limex_compile.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index 6053b56f..bbb26605 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -980,7 +980,7 @@ u32 addSquashMask(const build_info &args, const NFAVertex &v, // see if we've already seen it, otherwise add a new one. auto it = find(squash.begin(), squash.end(), sit->second); if (it != squash.end()) { - return verify_u32(distance(squash.begin(), it)); + return verify_u32(std::distance(squash.begin(), it)); } u32 idx = verify_u32(squash.size()); squash.push_back(sit->second); @@ -1007,7 +1007,7 @@ u32 addReports(const flat_set &r, vector &reports, auto it = search(begin(reports), end(reports), begin(my_reports), end(my_reports)); if (it != end(reports)) { - u32 offset = verify_u32(distance(begin(reports), it)); + u32 offset = verify_u32(std::distance(begin(reports), it)); DEBUG_PRINTF("reusing found report list at %u\n", offset); return offset; } From 62dfd48d5319ff56eda3f8a78323fd9b63912991 Mon Sep 17 00:00:00 2001 From: "Chang, Harry" Date: Wed, 8 Aug 2018 14:44:01 +0800 Subject: [PATCH 02/21] Jenkins-1080: fixed error reporting logical combination match in "A&!B" type by moving flush_comb behind report_eod_matches in hs_close_stream/hs_reset_stream. --- src/runtime.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/runtime.c b/src/runtime.c index 052449f6..9fbb8e81 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -934,12 +934,6 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data, } } - if (rose->flushCombProgramOffset && !told_to_stop_matching(scratch)) { - if (roseRunFlushCombProgram(rose, scratch, ~0ULL) == MO_HALT_MATCHING) { - scratch->core_info.status |= STATUS_TERMINATED; - } - } - setStreamStatus(state, scratch->core_info.status); if (likely(!can_stop_matching(scratch))) { @@ -994,6 +988,13 @@ hs_error_t HS_CDECL hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch, unmarkScratchInUse(scratch); } + if (id->rose->flushCombProgramOffset && !told_to_stop_matching(scratch)) { + if (roseRunFlushCombProgram(id->rose, scratch, ~0ULL) + == MO_HALT_MATCHING) { + scratch->core_info.status |= STATUS_TERMINATED; + } + } + hs_stream_free(id); return HS_SUCCESS; @@ -1019,6 +1020,13 @@ hs_error_t HS_CDECL hs_reset_stream(hs_stream_t *id, UNUSED unsigned int flags, unmarkScratchInUse(scratch); } + if (id->rose->flushCombProgramOffset && !told_to_stop_matching(scratch)) { + if (roseRunFlushCombProgram(id->rose, scratch, ~0ULL) + == MO_HALT_MATCHING) { + scratch->core_info.status |= STATUS_TERMINATED; + } + } + // history already initialised init_stream(id, id->rose, 0); From acffc9d36c4f6eccff22dc3eeb2e7ec122d0ae58 Mon Sep 17 00:00:00 2001 From: "Chang, Harry" Date: Wed, 31 Oct 2018 11:00:11 +0800 Subject: [PATCH 03/21] Jenkins-1424: fixed error which misses report of logical combination under vacuous input. --- src/runtime.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runtime.c b/src/runtime.c index 9fbb8e81..64a04fd7 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -445,6 +445,7 @@ done_scan: scratch); } +set_retval: if (rose->flushCombProgramOffset) { if (roseRunFlushCombProgram(rose, scratch, ~0ULL) == MO_HALT_MATCHING) { unmarkScratchInUse(scratch); @@ -452,7 +453,6 @@ done_scan: } } -set_retval: DEBUG_PRINTF("done. told_to_stop_matching=%d\n", told_to_stop_matching(scratch)); hs_error_t rv = told_to_stop_matching(scratch) ? HS_SCAN_TERMINATED From c7c411975015cfdb6de04cfb00b1b7b9b4687507 Mon Sep 17 00:00:00 2001 From: "Wang, Xiang W" Date: Sun, 2 Dec 2018 22:31:44 -0500 Subject: [PATCH 04/21] chimera: silence gcc-8 exception catch warning --- chimera/ch_compile.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/chimera/ch_compile.cpp b/chimera/ch_compile.cpp index c71e26e0..374bd7ad 100644 --- a/chimera/ch_compile.cpp +++ b/chimera/ch_compile.cpp @@ -714,7 +714,7 @@ ch_error_t HS_CDECL ch_compile(const char *expression, unsigned flags, (int)e.index : -1); return CH_COMPILER_ERROR; } - catch (std::bad_alloc) { + catch (std::bad_alloc &) { *db = nullptr; *comp_error = const_cast(&ch_enomem); return CH_COMPILER_ERROR; @@ -782,7 +782,7 @@ ch_error_t HS_CDECL ch_compile_multi(const char *const *expressions, (int)e.index : -1); return CH_COMPILER_ERROR; } - catch (std::bad_alloc) { + catch (std::bad_alloc &) { *db = nullptr; *comp_error = const_cast(&ch_enomem); return CH_COMPILER_ERROR; @@ -855,7 +855,7 @@ ch_error_t HS_CDECL ch_compile_ext_multi( (int)e.index : -1); return CH_COMPILER_ERROR; } - catch (std::bad_alloc) { + catch (std::bad_alloc &) { *db = nullptr; *comp_error = const_cast(&ch_enomem); return CH_COMPILER_ERROR; From c06d5e1c148bbc3b4fc8ab47f903b8e1de0dcc1a Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Wed, 19 Dec 2018 17:49:09 +0800 Subject: [PATCH 05/21] DFA state compression: 16-bit wide and sherman co-exist --- src/grey.cpp | 4 +- src/grey.h | 3 +- src/nfa/accel_dfa_build_strat.h | 9 +- src/nfa/goughcompile.cpp | 1 + src/nfa/mcclellan.c | 216 +++++++++--- src/nfa/mcclellan_common_impl.h | 107 +++++- src/nfa/mcclellan_internal.h | 51 +++ src/nfa/mcclellancompile.cpp | 569 ++++++++++++++++++++++++++++++-- src/nfa/mcclellancompile.h | 3 +- src/nfa/shengcompile.h | 3 +- 10 files changed, 894 insertions(+), 72 deletions(-) diff --git a/src/grey.cpp b/src/grey.cpp index 3762a497..fa8da2b4 100644 --- a/src/grey.cpp +++ b/src/grey.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -82,6 +82,7 @@ Grey::Grey(void) : onlyOneOutfix(false), allowShermanStates(true), allowMcClellan8(true), + allowWideStates(true), // enable wide state for McClellan8 highlanderPruneDFA(true), minimizeDFA(true), accelerateDFA(true), @@ -251,6 +252,7 @@ void applyGreyOverrides(Grey *g, const string &s) { G_UPDATE(onlyOneOutfix); G_UPDATE(allowShermanStates); G_UPDATE(allowMcClellan8); + G_UPDATE(allowWideStates); G_UPDATE(highlanderPruneDFA); G_UPDATE(minimizeDFA); G_UPDATE(accelerateDFA); diff --git a/src/grey.h b/src/grey.h index 34c62918..ed2f845a 100644 --- a/src/grey.h +++ b/src/grey.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -87,6 +87,7 @@ struct Grey { bool allowShermanStates; bool allowMcClellan8; + bool allowWideStates; // enable wide state for McClellan8 bool highlanderPruneDFA; bool minimizeDFA; diff --git a/src/nfa/accel_dfa_build_strat.h b/src/nfa/accel_dfa_build_strat.h index 881892ed..53a6f35b 100644 --- a/src/nfa/accel_dfa_build_strat.h +++ b/src/nfa/accel_dfa_build_strat.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -40,6 +40,11 @@ namespace ue2 { class ReportManager; struct Grey; +enum DfaType { + McClellan, + Sheng, + Gough +}; class accel_dfa_build_strat : public dfa_build_strat { public: @@ -53,6 +58,8 @@ public: virtual void buildAccel(dstate_id_t this_idx, const AccelScheme &info, void *accel_out); virtual std::map getAccelInfo(const Grey &grey); + virtual DfaType getType() const = 0; + private: bool only_accel_init; }; diff --git a/src/nfa/goughcompile.cpp b/src/nfa/goughcompile.cpp index 3f1614dd..d41c6f42 100644 --- a/src/nfa/goughcompile.cpp +++ b/src/nfa/goughcompile.cpp @@ -91,6 +91,7 @@ public: void buildAccel(dstate_id_t this_idx, const AccelScheme &info, void *accel_out) override; u32 max_allowed_offset_accel() const override { return 0; } + DfaType getType() const override { return Gough; } raw_som_dfa &rdfa; const GoughGraph ≫ diff --git a/src/nfa/mcclellan.c b/src/nfa/mcclellan.c index ceedb9db..1521de5b 100644 --- a/src/nfa/mcclellan.c +++ b/src/nfa/mcclellan.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -167,9 +167,68 @@ u32 doNormal16(const struct mcclellan *m, const u8 **c_inout, const u8 *end, } static really_inline -char mcclellanExec16_i(const struct mcclellan *m, u32 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **c_final, enum MatchMode mode) { +u32 doNormalWide16(const struct mcclellan *m, const u8 **c_inout, + const u8 *end, u32 s, char *qstate, u16 *offset, + char do_accel, enum MatchMode mode) { + const u8 *c = *c_inout; + + u32 wide_limit = m->wide_limit; + const char *wide_base + = (const char *)m - sizeof(struct NFA) + m->wide_offset; + + const u16 *succ_table + = (const u16 *)((const char *)m + sizeof(struct mcclellan)); + assert(ISALIGNED_N(succ_table, 2)); + u32 sherman_base = m->sherman_limit; + const char *sherman_base_offset + = (const char *)m - sizeof(struct NFA) + m->sherman_offset; + u32 as = m->alphaShift; + + s &= STATE_MASK; + + while (c < end && s) { + u8 cprime = m->remap[*c]; + DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx (s=%u) &c: %p\n", *c, + ourisprint(*c) ? *c : '?', cprime, s, c); + + if (unlikely(s >= wide_limit)) { + const char *wide_entry + = findWideEntry16(m, wide_base, wide_limit, s); + DEBUG_PRINTF("doing wide head (%u)\n", s); + s = doWide16(wide_entry, &c, end, m->remap, (u16 *)&s, qstate, + offset); + } else if (s >= sherman_base) { + const char *sherman_state + = findShermanState(m, sherman_base_offset, sherman_base, s); + DEBUG_PRINTF("doing sherman (%u)\n", s); + s = doSherman16(sherman_state, cprime, succ_table, as); + } else { + DEBUG_PRINTF("doing normal\n"); + s = succ_table[(s << as) + cprime]; + } + + DEBUG_PRINTF("s: %u (%u)\n", s, s & STATE_MASK); + c++; + + if (do_accel && (s & ACCEL_FLAG)) { + break; + } + if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) { + break; + } + + s &= STATE_MASK; + } + + *c_inout = c; + return s; +} + +static really_inline +char mcclellanExec16_i(const struct mcclellan *m, u32 *state, char *qstate, + const u8 *buf, size_t len, u64a offAdj, NfaCallback cb, + void *ctxt, char single, const u8 **c_final, + enum MatchMode mode) { assert(ISALIGNED_N(state, 2)); if (!len) { if (mode == STOP_AT_MATCH) { @@ -179,6 +238,7 @@ char mcclellanExec16_i(const struct mcclellan *m, u32 *state, const u8 *buf, } u32 s = *state; + u16 offset = 0; const u8 *c = buf; const u8 *c_end = buf + len; const struct mstate_aux *aux @@ -207,7 +267,12 @@ without_accel: goto exit; } - s = doNormal16(m, &c, min_accel_offset, s, 0, mode); + if (unlikely(m->has_wide)) { + s = doNormalWide16(m, &c, min_accel_offset, s, qstate, &offset, 0, + mode); + } else { + s = doNormal16(m, &c, min_accel_offset, s, 0, mode); + } if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) { if (mode == STOP_AT_MATCH) { @@ -259,7 +324,11 @@ with_accel: } } - s = doNormal16(m, &c, c_end, s, 1, mode); + if (unlikely(m->has_wide)) { + s = doNormalWide16(m, &c, c_end, s, qstate, &offset, 1, mode); + } else { + s = doNormal16(m, &c, c_end, s, 1, mode); + } if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) { if (mode == STOP_AT_MATCH) { @@ -297,44 +366,47 @@ exit: } static never_inline -char mcclellanExec16_i_cb(const struct mcclellan *m, u32 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **final_point) { - return mcclellanExec16_i(m, state, buf, len, offAdj, cb, ctxt, single, - final_point, CALLBACK_OUTPUT); +char mcclellanExec16_i_cb(const struct mcclellan *m, u32 *state, char *qstate, + const u8 *buf, size_t len, u64a offAdj, + NfaCallback cb, void *ctxt, char single, + const u8 **final_point) { + return mcclellanExec16_i(m, state, qstate, buf, len, offAdj, cb, ctxt, + single, final_point, CALLBACK_OUTPUT); } static never_inline -char mcclellanExec16_i_sam(const struct mcclellan *m, u32 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **final_point) { - return mcclellanExec16_i(m, state, buf, len, offAdj, cb, ctxt, single, - final_point, STOP_AT_MATCH); +char mcclellanExec16_i_sam(const struct mcclellan *m, u32 *state, char *qstate, + const u8 *buf, size_t len, u64a offAdj, + NfaCallback cb, void *ctxt, char single, + const u8 **final_point) { + return mcclellanExec16_i(m, state, qstate, buf, len, offAdj, cb, ctxt, + single, final_point, STOP_AT_MATCH); } static never_inline -char mcclellanExec16_i_nm(const struct mcclellan *m, u32 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **final_point) { - return mcclellanExec16_i(m, state, buf, len, offAdj, cb, ctxt, single, - final_point, NO_MATCHES); +char mcclellanExec16_i_nm(const struct mcclellan *m, u32 *state, char *qstate, + const u8 *buf, size_t len, u64a offAdj, + NfaCallback cb, void *ctxt, char single, + const u8 **final_point) { + return mcclellanExec16_i(m, state, qstate, buf, len, offAdj, cb, ctxt, + single, final_point, NO_MATCHES); } static really_inline -char mcclellanExec16_i_ni(const struct mcclellan *m, u32 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **final_point, - enum MatchMode mode) { +char mcclellanExec16_i_ni(const struct mcclellan *m, u32 *state, char *qstate, + const u8 *buf, size_t len, u64a offAdj, + NfaCallback cb, void *ctxt, char single, + const u8 **final_point, enum MatchMode mode) { if (mode == CALLBACK_OUTPUT) { - return mcclellanExec16_i_cb(m, state, buf, len, offAdj, cb, ctxt, - single, final_point); + return mcclellanExec16_i_cb(m, state, qstate, buf, len, offAdj, cb, + ctxt, single, final_point); } else if (mode == STOP_AT_MATCH) { - return mcclellanExec16_i_sam(m, state, buf, len, offAdj, cb, ctxt, - single, final_point); + return mcclellanExec16_i_sam(m, state, qstate, buf, len, offAdj, cb, + ctxt, single, final_point); } else { assert(mode == NO_MATCHES); - return mcclellanExec16_i_nm(m, state, buf, len, offAdj, cb, ctxt, - single, final_point); + return mcclellanExec16_i_nm(m, state, qstate, buf, len, offAdj, cb, + ctxt, single, final_point); } } @@ -540,6 +612,10 @@ char mcclellanCheckEOD(const struct NFA *nfa, u32 s, u64a offset, const struct mcclellan *m = getImplNfa(nfa); const struct mstate_aux *aux = get_aux(m, s); + if (m->has_wide == 1 && s >= m->wide_limit) { + return MO_CONTINUE_MATCHING; + } + if (!aux->accept_eod) { return MO_CONTINUE_MATCHING; } @@ -612,9 +688,9 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, /* do main buffer region */ const u8 *final_look; - char rv = mcclellanExec16_i_ni(m, &s, cur_buf + sp, local_ep - sp, - offset + sp, cb, context, single, - &final_look, mode); + char rv = mcclellanExec16_i_ni(m, &s, q->state, cur_buf + sp, + local_ep - sp, offset + sp, cb, context, + single, &final_look, mode); if (rv == MO_DEAD) { *(u16 *)q->state = 0; return MO_DEAD; @@ -684,12 +760,16 @@ char nfaExecMcClellan16_Bi(const struct NFA *n, u64a offset, const u8 *buffer, const struct mcclellan *m = getImplNfa(n); u32 s = m->start_anchored; - if (mcclellanExec16_i(m, &s, buffer, length, offset, cb, context, single, - NULL, CALLBACK_OUTPUT) + if (mcclellanExec16_i(m, &s, NULL, buffer, length, offset, cb, context, + single, NULL, CALLBACK_OUTPUT) == MO_DEAD) { return s ? MO_ALIVE : MO_DEAD; } + if (m->has_wide == 1 && s >= m->wide_limit) { + return MO_ALIVE; + } + const struct mstate_aux *aux = get_aux(m, s); if (aux->accept_eod) { @@ -768,6 +848,7 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, char rv = mcclellanExec8_i_ni(m, &s, cur_buf + sp, local_ep - sp, offset + sp, cb, context, single, &final_look, mode); + if (rv == MO_HALT_MATCHING) { *(u8 *)q->state = 0; return MO_DEAD; @@ -1016,7 +1097,8 @@ char nfaExecMcClellan16_inAccept(const struct NFA *n, ReportID report, u16 s = *(u16 *)q->state; DEBUG_PRINTF("checking accepts for %hu\n", s); - return mcclellanHasAccept(m, get_aux(m, s), report); + return (m->has_wide == 1 && s >= m->wide_limit) ? + 0 : mcclellanHasAccept(m, get_aux(m, s), report); } char nfaExecMcClellan16_inAnyAccept(const struct NFA *n, struct mq *q) { @@ -1026,7 +1108,8 @@ char nfaExecMcClellan16_inAnyAccept(const struct NFA *n, struct mq *q) { u16 s = *(u16 *)q->state; DEBUG_PRINTF("checking accepts for %hu\n", s); - return !!get_aux(m, s)->accept; + return (m->has_wide == 1 && s >= m->wide_limit) ? + 0 : !!get_aux(m, s)->accept; } char nfaExecMcClellan8_Q2(const struct NFA *n, struct mq *q, s64a end) { @@ -1111,6 +1194,12 @@ char nfaExecMcClellan16_initCompressedState(const struct NFA *nfa, u64a offset, void *state, UNUSED u8 key) { const struct mcclellan *m = getImplNfa(nfa); u16 s = offset ? m->start_floating : m->start_anchored; + + // new byte + if (m->has_wide) { + *((u16 *)state + 1) = 0; + } + if (s) { unaligned_store_u16(state, s); return 1; @@ -1140,14 +1229,24 @@ void nfaExecMcClellan16_SimpStream(const struct NFA *nfa, char *state, const u8 *buf, char top, size_t start_off, size_t len, NfaCallback cb, void *ctxt) { const struct mcclellan *m = getImplNfa(nfa); + u32 s; - u32 s = top ? m->start_anchored : unaligned_load_u16(state); + if (top) { + s = m->start_anchored; + + // new byte + if (m->has_wide) { + *((u16 *)state + 1) = 0; + } + } else { + s = unaligned_load_u16(state); + } if (m->flags & MCCLELLAN_FLAG_SINGLE) { - mcclellanExec16_i(m, &s, buf + start_off, len - start_off, + mcclellanExec16_i(m, &s, state, buf + start_off, len - start_off, start_off, cb, ctxt, 1, NULL, CALLBACK_OUTPUT); } else { - mcclellanExec16_i(m, &s, buf + start_off, len - start_off, + mcclellanExec16_i(m, &s, state, buf + start_off, len - start_off, start_off, cb, ctxt, 0, NULL, CALLBACK_OUTPUT); } @@ -1178,9 +1277,16 @@ char nfaExecMcClellan8_queueInitState(UNUSED const struct NFA *nfa, char nfaExecMcClellan16_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) { - assert(nfa->scratchStateSize == 2); + const struct mcclellan *m = getImplNfa(nfa); + assert(m->has_wide == 1 ? nfa->scratchStateSize == 4 + : nfa->scratchStateSize == 2); assert(ISALIGNED_N(q->state, 2)); *(u16 *)q->state = 0; + + // new byte + if (m->has_wide) { + *((u16 *)q->state + 1) = 0; + } return 0; } @@ -1206,21 +1312,39 @@ char nfaExecMcClellan8_expandState(UNUSED const struct NFA *nfa, void *dest, char nfaExecMcClellan16_queueCompressState(UNUSED const struct NFA *nfa, const struct mq *q, UNUSED s64a loc) { + const struct mcclellan *m = getImplNfa(nfa); void *dest = q->streamState; const void *src = q->state; - assert(nfa->scratchStateSize == 2); - assert(nfa->streamStateSize == 2); + assert(m->has_wide == 1 ? nfa->scratchStateSize == 4 + : nfa->scratchStateSize == 2); + assert(m->has_wide == 1 ? nfa->streamStateSize == 4 + : nfa->streamStateSize == 2); + assert(ISALIGNED_N(src, 2)); unaligned_store_u16(dest, *(const u16 *)(src)); + + // new byte + if (m->has_wide) { + *((u16 *)dest + 1) = *((const u16 *)src + 1); + } return 0; } char nfaExecMcClellan16_expandState(UNUSED const struct NFA *nfa, void *dest, const void *src, UNUSED u64a offset, UNUSED u8 key) { - assert(nfa->scratchStateSize == 2); - assert(nfa->streamStateSize == 2); + const struct mcclellan *m = getImplNfa(nfa); + assert(m->has_wide == 1 ? nfa->scratchStateSize == 4 + : nfa->scratchStateSize == 2); + assert(m->has_wide == 1 ? nfa->streamStateSize == 4 + : nfa->streamStateSize == 2); + assert(ISALIGNED_N(dest, 2)); *(u16 *)dest = unaligned_load_u16(src); + + // new byte + if (m->has_wide) { + *((u16 *)dest + 1) = *((const u16 *)src + 1); + } return 0; } diff --git a/src/nfa/mcclellan_common_impl.h b/src/nfa/mcclellan_common_impl.h index be130715..b6af672d 100644 --- a/src/nfa/mcclellan_common_impl.h +++ b/src/nfa/mcclellan_common_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -82,3 +82,108 @@ u32 doSherman16(const char *sherman_state, u8 cprime, const u16 *succ_table, u32 daddy = *(const u16 *)(sherman_state + SHERMAN_DADDY_OFFSET); return succ_table[(daddy << as) + cprime]; } + +static really_inline +u16 doWide16(const char *wide_entry, const u8 **c_inout, const u8 *end, + const u8 *remap, const u16 *s, char *qstate, u16 *offset) { + // Internal relative offset after the last visit of the wide state. + if (qstate != NULL) { // stream mode + *offset = *(const u16 *)(qstate + 2); + } + + u8 successful = 0; + const u8 *c = *c_inout; + u32 len_c = end - c; + + u16 width = *(const u16 *)(wide_entry + WIDE_WIDTH_OFFSET); + assert(width >= 8); + const u8 *symbols = (const u8 *)(wide_entry + WIDE_SYMBOL_OFFSET16); + const u16 *trans = (const u16 *)(wide_entry + + WIDE_TRANSITION_OFFSET16(width)); + + assert(*offset < width); + u16 len_w = width - *offset; + const u8 *sym = symbols + *offset; + + char tmp[16]; + u16 pos = 0; + + if (*offset == 0 && remap[*c] != *sym) { + goto normal; + } + + // both in (16, +oo). + while (len_w >= 16 && len_c >= 16) { + m128 str_w = loadu128(sym); + for (size_t i = 0; i < 16; i++) { + tmp[i] = remap[*(c + i)]; + } + m128 str_c = loadu128(tmp); + + u32 z = movemask128(eq128(str_w, str_c)); + pos = ctz32(~z); + assert(pos <= 16); + + if (pos < 16) { + goto normal; + } + + sym += 16; + c += 16; + len_w -= 16; + len_c -= 16; + } + + pos = 0; + // at least one in (0, 16). + u32 loadLength_w = MIN(len_w, 16); + u32 loadLength_c = MIN(len_c, 16); + m128 str_w = loadbytes128(sym, loadLength_w); + for (size_t i = 0; i < loadLength_c; i++) { + tmp[i] = remap[*(c + i)]; + } + m128 str_c = loadbytes128(tmp, loadLength_c); + + u32 z = movemask128(eq128(str_w, str_c)); + pos = ctz32(~z); + + pos = MIN(pos, MIN(loadLength_w, loadLength_c)); + + if (loadLength_w <= loadLength_c) { + assert(pos <= loadLength_w); + // successful matching. + if (pos == loadLength_w) { + c -= 1; + successful = 1; + } + // failure, do nothing. + } else { + assert(pos <= loadLength_c); + // successful partial matching. + if (pos == loadLength_c) { + c -= 1; + goto partial; + } + // failure, do nothing. + } + +normal: + *offset = 0; + if (qstate != NULL) { + // Internal relative offset. + unaligned_store_u16(qstate + 2, *offset); + } + c += pos; + *c_inout = c; + return successful ? *trans : *(trans + 1 + remap[*c]); + +partial: + *offset = sym - symbols + pos; + if (qstate != NULL) { + // Internal relative offset. + unaligned_store_u16(qstate + 2, *offset); + } + c += pos; + *c_inout = c; + return *s; +} diff --git a/src/nfa/mcclellan_internal.h b/src/nfa/mcclellan_internal.h index 5289b074..0981f99e 100644 --- a/src/nfa/mcclellan_internal.h +++ b/src/nfa/mcclellan_internal.h @@ -50,6 +50,16 @@ extern "C" #define SHERMAN_CHARS_OFFSET 4 #define SHERMAN_STATES_OFFSET(sso_len) (4 + (sso_len)) +#define WIDE_STATE 2 +#define WIDE_ENTRY_OFFSET8(weo_pos) (2 + (weo_pos)) +#define WIDE_ENTRY_OFFSET16(weo_pos) (3 + (weo_pos)) + +#define WIDE_WIDTH_OFFSET 0 +#define WIDE_SYMBOL_OFFSET8 1 +#define WIDE_TRANSITION_OFFSET8(wto_width) (1 + (wto_width)) +#define WIDE_SYMBOL_OFFSET16 2 +#define WIDE_TRANSITION_OFFSET16(wto_width) (2 + (wto_width)) + struct report_list { u32 count; ReportID report[]; @@ -79,13 +89,17 @@ struct mcclellan { u16 accel_limit_8; /**< 8 bit, lowest accelerable state */ u16 accept_limit_8; /**< 8 bit, lowest accept state */ u16 sherman_limit; /**< lowest sherman state */ + u16 wide_limit; /**< 8/16 bit, lowest wide head state */ u8 alphaShift; u8 flags; u8 has_accel; /**< 1 iff there are any accel plans */ + u8 has_wide; /**< 1 iff there exists any wide state */ u8 remap[256]; /**< remaps characters to a smaller alphabet */ ReportID arb_report; /**< one of the accepts that this dfa may raise */ u32 accel_offset; /**< offset of accel structures from start of McClellan */ u32 haig_offset; /**< reserved for use by Haig, relative to start of NFA */ + u32 wide_offset; /**< offset of the wide state entries to the start of the + * nfa structure */ }; static really_inline @@ -106,6 +120,43 @@ char *findMutableShermanState(char *sherman_base_offset, u16 sherman_base, return sherman_base_offset + SHERMAN_FIXED_SIZE * (s - sherman_base); } +static really_inline +const char *findWideEntry8(UNUSED const struct mcclellan *m, + const char *wide_base, u32 wide_limit, u32 s) { + UNUSED u8 type = *(const u8 *)wide_base; + assert(type == WIDE_STATE); + const u32 entry_offset + = *(const u32 *)(wide_base + + WIDE_ENTRY_OFFSET8((s - wide_limit) * sizeof(u32))); + + const char *rv = wide_base + entry_offset; + assert(rv < (const char *)m + m->length - sizeof(struct NFA)); + return rv; +} + +static really_inline +const char *findWideEntry16(UNUSED const struct mcclellan *m, + const char *wide_base, u32 wide_limit, u32 s) { + UNUSED u8 type = *(const u8 *)wide_base; + assert(type == WIDE_STATE); + const u32 entry_offset + = *(const u32 *)(wide_base + + WIDE_ENTRY_OFFSET16((s - wide_limit) * sizeof(u32))); + + const char *rv = wide_base + entry_offset; + assert(rv < (const char *)m + m->length - sizeof(struct NFA)); + return rv; +} + +static really_inline +char *findMutableWideEntry16(char *wide_base, u32 wide_limit, u32 s) { + u32 entry_offset + = *(const u32 *)(wide_base + + WIDE_ENTRY_OFFSET16((s - wide_limit) * sizeof(u32))); + + return wide_base + entry_offset; +} + #ifdef __cplusplus } #endif diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index 8e3a744c..db142f86 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -56,13 +56,19 @@ #include #include #include +#include #include #include #include +#include "mcclellandump.h" +#include "util/dump_util.h" +#include "util/dump_charclass.h" + using namespace std; using boost::adaptors::map_keys; +using boost::dynamic_bitset; #define ACCEL_DFA_MAX_OFFSET_DEPTH 4 @@ -82,6 +88,8 @@ namespace /* anon */ { struct dstate_extra { u16 daddytaken = 0; bool shermanState = false; + bool wideState = false; + bool wideHead = false; }; struct dfa_info { @@ -89,6 +97,8 @@ struct dfa_info { raw_dfa &raw; vector &states; vector extra; + vector> wide_state_chain; + vector> wide_symbol_chain; const u16 alpha_size; /* including special symbols */ const array &alpha_remap; const u16 impl_alpha_size; @@ -112,6 +122,14 @@ struct dfa_info { return extra[raw_id].shermanState; } + bool is_widestate(dstate_id_t raw_id) const { + return extra[raw_id].wideState; + } + + bool is_widehead(dstate_id_t raw_id) const { + return extra[raw_id].wideHead; + } + size_t size(void) const { return states.size(); } }; @@ -124,6 +142,35 @@ u8 dfa_info::getAlphaShift() const { } } +struct state_prev_info { + vector> prev_vec; + explicit state_prev_info(size_t alpha_size) : prev_vec(alpha_size) {} +}; + +struct DfaPrevInfo { + u16 impl_alpha_size; + u16 state_num; + vector states; + set accepts; + + explicit DfaPrevInfo(raw_dfa &rdfa); +}; + +DfaPrevInfo::DfaPrevInfo(raw_dfa &rdfa) + : impl_alpha_size(rdfa.getImplAlphaSize()), state_num(rdfa.states.size()), + states(state_num, state_prev_info(impl_alpha_size)){ + for (size_t i = 0; i < states.size(); i++) { + for (symbol_t sym = 0; sym < impl_alpha_size; sym++) { + dstate_id_t curr = rdfa.states[i].next[sym]; + states[curr].prev_vec[sym].push_back(i); + } + if (!rdfa.states[i].reports.empty() + || !rdfa.states[i].reports_eod.empty()) { + DEBUG_PRINTF("accept raw state: %ld\n", i); + accepts.insert(i); + } + } +} } // namespace static @@ -151,6 +198,11 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) { for (size_t j = 0; j < alphaSize; j++) { size_t c_prime = (i << alphaShift) + j; + // wide state has no aux structure. + if (m->has_wide && succ_table[c_prime] >= m->wide_limit) { + continue; + } + mstate_aux *aux = getAux(n, succ_table[c_prime]); if (aux->accept) { @@ -165,7 +217,8 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) { /* handle the sherman states */ char *sherman_base_offset = (char *)n + m->sherman_offset; - for (u16 j = m->sherman_limit; j < m->state_count; j++) { + u16 sherman_ceil = m->has_wide == 1 ? m->wide_limit : m->state_count; + for (u16 j = m->sherman_limit; j < sherman_ceil; j++) { char *sherman_cur = findMutableShermanState(sherman_base_offset, m->sherman_limit, j); assert(*(sherman_cur + SHERMAN_TYPE_OFFSET) == SHERMAN_STATE); @@ -174,6 +227,11 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) { for (u8 i = 0; i < len; i++) { u16 succ_i = unaligned_load_u16((u8 *)&succs[i]); + // wide state has no aux structure. + if (m->has_wide && succ_i >= m->wide_limit) { + continue; + } + mstate_aux *aux = getAux(n, succ_i); if (aux->accept) { @@ -187,6 +245,49 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) { unaligned_store_u16((u8 *)&succs[i], succ_i); } } + + /* handle the wide states */ + if (m->has_wide) { + u32 wide_limit = m->wide_limit; + char *wide_base = (char *)n + m->wide_offset; + assert(*wide_base == WIDE_STATE); + u16 wide_number = verify_u16(info.wide_symbol_chain.size()); + // traverse over wide head states. + for (u16 j = wide_limit; j < wide_limit + wide_number; j++) { + char *wide_cur + = findMutableWideEntry16(wide_base, wide_limit, j); + u16 width = *(const u16 *)(wide_cur + WIDE_WIDTH_OFFSET); + u16 *trans = (u16 *)(wide_cur + WIDE_TRANSITION_OFFSET16(width)); + + // check successful transition + u16 next = unaligned_load_u16((u8 *)trans); + if (next >= wide_limit) { + continue; + } + mstate_aux *aux = getAux(n, next); + if (aux->accept) { + next |= ACCEPT_FLAG; + } + if (aux->accel_offset) { + next |= ACCEL_FLAG; + } + unaligned_store_u16((u8 *)trans, next); + trans ++; + + // check failure transition + for (symbol_t k = 0; k < alphaSize; k++) { + u16 next_k = unaligned_load_u16((u8 *)&trans[k]); + mstate_aux *aux_k = getAux(n, next_k); + if (aux_k->accept) { + next_k |= ACCEPT_FLAG; + } + if (aux_k->accel_offset) { + next_k |= ACCEL_FLAG; + } + unaligned_store_u16((u8 *)&trans[k], next_k); + } + } + } } u32 mcclellan_build_strat::max_allowed_offset_accel() const { @@ -232,6 +333,19 @@ void populateBasicInfo(size_t state_size, const dfa_info &info, m->start_anchored = info.implId(info.raw.start_anchored); m->start_floating = info.implId(info.raw.start_floating); m->has_accel = accel_count ? 1 : 0; + m->has_wide = info.wide_state_chain.size() > 0 ? 1 : 0; + + if (state_size == sizeof(u8) && m->has_wide == 1) { + // allocate 1 more byte for wide state use. + nfa->scratchStateSize += sizeof(u8); + nfa->streamStateSize += sizeof(u8); + } + + if (state_size == sizeof(u16) && m->has_wide == 1) { + // allocate 2 more bytes for wide state use. + nfa->scratchStateSize += sizeof(u16); + nfa->streamStateSize += sizeof(u16); + } if (single) { m->flags |= MCCLELLAN_FLAG_SINGLE; @@ -404,6 +518,23 @@ size_t calcShermanRegionSize(const dfa_info &info) { return ROUNDUP_16(rv); } +static +size_t calcWideRegionSize(const dfa_info &info) { + if (info.wide_state_chain.empty()) { + return 0; + } + + // wide info header + size_t rv = info.wide_symbol_chain.size() * sizeof(u32) + 3; + + // wide info body + for (const auto &chain : info.wide_symbol_chain) { + rv += chain.size() + (info.impl_alpha_size + 1) * sizeof(u16) + 2; + } + + return ROUNDUP_16(rv); +} + static void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info, const vector &reports, const vector &reports_eod, @@ -418,42 +549,60 @@ void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info, /* returns false on error */ static -bool allocateFSN16(dfa_info &info, dstate_id_t *sherman_base) { +bool allocateFSN16(dfa_info &info, dstate_id_t *sherman_base, + dstate_id_t *wide_limit) { info.states[0].impl_id = 0; /* dead is always 0 */ vector norm; vector sherm; + vector wideHead; + vector wideState; if (info.size() > (1 << 16)) { DEBUG_PRINTF("too many states\n"); - *sherman_base = 0; + *wide_limit = 0; return false; } for (u32 i = 1; i < info.size(); i++) { - if (info.is_sherman(i)) { + if (info.is_widehead(i)) { + wideHead.push_back(i); + } else if (info.is_widestate(i)) { + wideState.push_back(i); + } else if (info.is_sherman(i)) { sherm.push_back(i); } else { norm.push_back(i); } } - dstate_id_t next_norm = 1; + dstate_id_t next = 1; for (const dstate_id_t &s : norm) { - info.states[s].impl_id = next_norm++; + DEBUG_PRINTF("[norm] mapping state %u to %u\n", s, next); + info.states[s].impl_id = next++; } - *sherman_base = next_norm; - dstate_id_t next_sherman = next_norm; - + *sherman_base = next; for (const dstate_id_t &s : sherm) { - info.states[s].impl_id = next_sherman++; + DEBUG_PRINTF("[sherm] mapping state %u to %u\n", s, next); + info.states[s].impl_id = next++; + } + + *wide_limit = next; + for (const dstate_id_t &s : wideHead) { + DEBUG_PRINTF("[widehead] mapping state %u to %u\n", s, next); + info.states[s].impl_id = next++; + } + + for (const dstate_id_t &s : wideState) { + DEBUG_PRINTF("[wide] mapping state %u to %u\n", s, next); + info.states[s].impl_id = next++; } /* Check to see if we haven't over allocated our states */ - DEBUG_PRINTF("next sherman %u masked %u\n", next_sherman, - (dstate_id_t)(next_sherman & STATE_MASK)); - return (next_sherman - 1) == ((next_sherman - 1) & STATE_MASK); + DEBUG_PRINTF("next sherman %u masked %u\n", next, + (dstate_id_t)(next & STATE_MASK)); + return (next - 1) == ((next - 1) & STATE_MASK); } static @@ -470,12 +619,16 @@ bytecode_ptr mcclellanCompile16(dfa_info &info, const CompileContext &cc, assert(alphaShift <= 8); u16 count_real_states; - if (!allocateFSN16(info, &count_real_states)) { + u16 wide_limit; + if (!allocateFSN16(info, &count_real_states, &wide_limit)) { DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n", info.size()); return nullptr; } + DEBUG_PRINTF("count_real_states: %d\n", count_real_states); + DEBUG_PRINTF("non_wide_states: %d\n", wide_limit); + auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); map accel_escape_info = info.strat.getAccelInfo(cc.grey); @@ -483,7 +636,7 @@ bytecode_ptr mcclellanCompile16(dfa_info &info, const CompileContext &cc, size_t tran_size = (1 << info.getAlphaShift()) * sizeof(u16) * count_real_states; - size_t aux_size = sizeof(mstate_aux) * info.size(); + size_t aux_size = sizeof(mstate_aux) * wide_limit; size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcclellan) + tran_size); size_t accel_size = info.strat.accelSize() * accel_escape_info.size(); @@ -491,12 +644,24 @@ bytecode_ptr mcclellanCompile16(dfa_info &info, const CompileContext &cc, + ri->getReportListSize(), 32); size_t sherman_offset = ROUNDUP_16(accel_offset + accel_size); size_t sherman_size = calcShermanRegionSize(info); - - size_t total_size = sherman_offset + sherman_size; + size_t wide_offset = ROUNDUP_16(sherman_offset + sherman_size); + size_t wide_size = calcWideRegionSize(info); + size_t total_size = wide_offset + wide_size; accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */ assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); + DEBUG_PRINTF("aux_offset %zu\n", aux_offset); + DEBUG_PRINTF("aux_size %zu\n", aux_size); + DEBUG_PRINTF("rl size %u\n", ri->getReportListSize()); + DEBUG_PRINTF("accel_offset %zu\n", accel_offset + sizeof(NFA)); + DEBUG_PRINTF("accel_size %zu\n", accel_size); + DEBUG_PRINTF("sherman_offset %zu\n", sherman_offset); + DEBUG_PRINTF("sherman_size %zu\n", sherman_size); + DEBUG_PRINTF("wide_offset %zu\n", wide_offset); + DEBUG_PRINTF("wide_size %zu\n", wide_size); + DEBUG_PRINTF("total_size %zu\n", total_size); + auto nfa = make_zeroed_bytecode_ptr(total_size); char *nfa_base = (char *)nfa.get(); @@ -511,6 +676,9 @@ bytecode_ptr mcclellanCompile16(dfa_info &info, const CompileContext &cc, mstate_aux *aux = (mstate_aux *)(nfa_base + aux_offset); mcclellan *m = (mcclellan *)getMutableImplNfa(nfa.get()); + m->wide_limit = wide_limit; + m->wide_offset = wide_offset; + /* copy in the mc header information */ m->sherman_offset = sherman_offset; m->sherman_end = total_size; @@ -518,7 +686,7 @@ bytecode_ptr mcclellanCompile16(dfa_info &info, const CompileContext &cc, /* do normal states */ for (size_t i = 0; i < info.size(); i++) { - if (info.is_sherman(i)) { + if (info.is_sherman(i) || info.is_widestate(i)) { continue; } @@ -556,6 +724,7 @@ bytecode_ptr mcclellanCompile16(dfa_info &info, const CompileContext &cc, mstate_aux *this_aux = getAux(nfa.get(), fs); assert(fs >= count_real_states); + assert(fs < wide_limit); char *curr_sherman_entry = sherman_table + (fs - m->sherman_limit) * SHERMAN_FIXED_SIZE; @@ -599,6 +768,70 @@ bytecode_ptr mcclellanCompile16(dfa_info &info, const CompileContext &cc, } } + if (!info.wide_state_chain.empty()) { + /* do wide states using info */ + u16 wide_number = verify_u16(info.wide_symbol_chain.size()); + char *wide_base = nfa_base + m->wide_offset; + assert(ISALIGNED_16(wide_base)); + + char *wide_top = wide_base; + *(u8 *)(wide_top++) = WIDE_STATE; + *(u16 *)(wide_top) = wide_number; + wide_top += 2; + + char *curr_wide_entry = wide_top + wide_number * sizeof(u32); + u32 *wide_offset_list = (u32 *)wide_top; + + /* get the order of writing wide states */ + vector order(wide_number); + for (size_t i = 0; i < wide_number; i++) { + dstate_id_t head = info.wide_state_chain[i].front(); + size_t pos = info.implId(head) - m->wide_limit; + order[pos] = i; + } + + for (size_t i : order) { + vector &state_chain = info.wide_state_chain[i]; + vector &symbol_chain = info.wide_symbol_chain[i]; + + u16 width = verify_u16(symbol_chain.size()); + *(u16 *)(curr_wide_entry + WIDE_WIDTH_OFFSET) = width; + u8 *chars = (u8 *)(curr_wide_entry + WIDE_SYMBOL_OFFSET16); + + // store wide state symbol chain + for (size_t j = 0; j < width; j++) { + *(chars++) = verify_u8(symbol_chain[j]); + } + + // store wide state transition table + u16 *trans = (u16 *)(curr_wide_entry + + WIDE_TRANSITION_OFFSET16(width)); + dstate_id_t tail = state_chain[width - 1]; + symbol_t last = symbol_chain[width -1]; + dstate_id_t tran = info.states[tail].next[last]; + // 1. successful transition + *trans++ = info.implId(tran); + // 2. failure transition + for (size_t j = 0; verify_u16(j) < width - 1; j++) { + if (symbol_chain[j] != last) { + tran = info.states[state_chain[j]].next[last]; + } + } + for (symbol_t sym = 0; sym < info.impl_alpha_size; sym++) { + if (sym != last) { + *trans++ = info.implId(info.states[tail].next[sym]); + } + else { + *trans++ = info.implId(tran); + } + } + + *wide_offset_list++ = verify_u32(curr_wide_entry - wide_base); + + curr_wide_entry = (char *)trans; + } + } + markEdges(nfa.get(), succ_table, info); if (accel_states && nfa) { @@ -844,12 +1077,16 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit, if (trust_daddy_states) { // Use the daddy already set for this state so long as it isn't already // a Sherman state. - if (!info.is_sherman(currState.daddy)) { + dstate_id_t daddy = currState.daddy; + if (!info.is_sherman(daddy) && !info.is_widestate(daddy)) { hinted.insert(currState.daddy); } else { // Fall back to granddaddy, which has already been processed (due // to BFS ordering) and cannot be a Sherman state. dstate_id_t granddaddy = info.states[currState.daddy].daddy; + if (info.is_widestate(granddaddy)) { + return; + } assert(!info.is_sherman(granddaddy)); hinted.insert(granddaddy); } @@ -861,7 +1098,7 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit, assert(donor < curr_id); u32 score = 0; - if (info.is_sherman(donor)) { + if (info.is_sherman(donor) || info.is_widestate(donor)) { continue; } @@ -934,6 +1171,290 @@ bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) { return false; } +/* \brief Test for only-one-predecessor property. */ +static +bool check_property1(const DfaPrevInfo &info, const u16 impl_alpha_size, + const dstate_id_t curr_id, dstate_id_t &prev_id, + symbol_t &prev_sym) { + u32 num_prev = 0; + bool test_p1 = false; + + for (symbol_t sym = 0; sym < impl_alpha_size; sym++) { + num_prev += info.states[curr_id].prev_vec[sym].size(); + DEBUG_PRINTF("Check symbol: %u, with its vector size: %lu\n", sym, + info.states[curr_id].prev_vec[sym].size()); + if (num_prev == 1 && !test_p1) { + test_p1 = true; + prev_id = info.states[curr_id].prev_vec[sym].front(); //[0] for sure??? + prev_sym = sym; + } + } + + return num_prev == 1; +} + +/* \brief Test for same-failure-action property. */ +static +bool check_property2(const raw_dfa &rdfa, const u16 impl_alpha_size, + const dstate_id_t curr_id, const dstate_id_t prev_id, + const symbol_t curr_sym, const symbol_t prev_sym) { + const dstate &prevState = rdfa.states[prev_id]; + const dstate &currState = rdfa.states[curr_id]; + + // Compare transition tables between currState and prevState. + u16 score = 0; + for (symbol_t sym = 0; sym < impl_alpha_size; sym++) { + if (currState.next[sym] == prevState.next[sym] + && sym != curr_sym && sym != prev_sym) { + score++; + } + } + DEBUG_PRINTF("(Score: %u/%u)\n", score, impl_alpha_size); + + // 2 cases. + if (curr_sym != prev_sym && score >= impl_alpha_size - 2 + && currState.next[prev_sym] == prevState.next[curr_sym]) { + return true; + } else if (curr_sym == prev_sym && score == impl_alpha_size - 1) { + return true; + } + return false; +} + +/* \brief Check whether adding current prev_id will generate a circle.*/ +static +bool check_circle(const DfaPrevInfo &info, const u16 impl_alpha_size, + const vector &chain, const dstate_id_t id) { + const vector> &prev_vec = info.states[id].prev_vec; + const dstate_id_t tail = chain.front(); + for (symbol_t sym = 0; sym < impl_alpha_size; sym++) { + auto iter = find(prev_vec[sym].begin(), prev_vec[sym].end(), tail); + if (iter != prev_vec[sym].end()) { + // Tail is one of id's predecessors, forming a circle. + return true; + } + } + return false; +} + +/* \brief Returns a chain of state ids and symbols. */ +static +dstate_id_t find_chain_candidate(const raw_dfa &rdfa, const DfaPrevInfo &info, + const dstate_id_t curr_id, + const symbol_t curr_sym, + vector &temp_chain) { + //Record current id first. + temp_chain.push_back(curr_id); + + const u16 size = info.impl_alpha_size; + + // Stop when entering root cloud. + if (rdfa.start_anchored != DEAD_STATE + && is_cyclic_near(rdfa, rdfa.start_anchored) + && curr_id < size) { + return curr_id; + } + if (rdfa.start_floating != DEAD_STATE + && curr_id >= rdfa.start_floating + && curr_id < rdfa.start_floating + size * 3) { + return curr_id; + } + + // Stop when reaching anchored or floating. + if (curr_id == rdfa.start_anchored || curr_id == rdfa.start_floating) { + return curr_id; + } + + dstate_id_t prev_id = 0; + symbol_t prev_sym = ALPHABET_SIZE; + + // Check the only-one-predecessor property. + if (!check_property1(info, size, curr_id, prev_id, prev_sym)) { + return curr_id; + } + assert(prev_id != 0 && prev_sym != ALPHABET_SIZE); + DEBUG_PRINTF("(P1 test passed.)\n"); + + // Circle testing for the prev_id that passes the P1 test. + if (check_circle(info, size, temp_chain, prev_id)) { + DEBUG_PRINTF("(A circle is found.)\n"); + return curr_id; + } + + // Check the same-failure-action property. + if (!check_property2(rdfa, size, curr_id, prev_id, curr_sym, prev_sym)) { + return curr_id; + } + DEBUG_PRINTF("(P2 test passed.)\n"); + + if (!rdfa.states[prev_id].reports.empty() + || !rdfa.states[prev_id].reports_eod.empty()) { + return curr_id; + } else { + return find_chain_candidate(rdfa, info, prev_id, prev_sym, temp_chain); + } +} + +/* \brief Always store the non-extensible chains found till now. */ +static +bool store_chain_longest(vector> &candidate_chain, + vector &temp_chain, + dynamic_bitset<> &added, bool head_is_new) { + dstate_id_t head = temp_chain.front(); + u16 length = temp_chain.size(); + + if (head_is_new) { + DEBUG_PRINTF("This is a new chain!\n"); + + // Add this new chain and get it marked. + candidate_chain.push_back(temp_chain); + + for (auto &id : temp_chain) { + DEBUG_PRINTF("(Marking s%u ...)\n", id); + added.set(id); + } + + return true; + } + + DEBUG_PRINTF("This is a longer chain!\n"); + assert(!candidate_chain.empty()); + + auto chain = find_if(candidate_chain.begin(), candidate_chain.end(), + [&](const vector &it) { + return it.front() == head; + }); + + // Not a valid head, just do nothing and return. + if (chain == candidate_chain.end()) { + return false; + } + + u16 len = chain->size(); + + if (length > len) { + // Find out the branch node first. + size_t piv = 0; + for (; piv < length; piv++) { + if ((*chain)[piv] != temp_chain[piv]) { + break; + } + } + + for (size_t j = piv + 1; j < length; j++) { + DEBUG_PRINTF("(Marking s%u (new branch) ...)\n", temp_chain[j]); + added.set(temp_chain[j]); + } + + // Unmark old unuseful nodes. + // (Except the tail node, which is in working queue) + for (size_t j = piv + 1; j < verify_u16(len - 1); j++) { + DEBUG_PRINTF("(UnMarking s%u (old branch)...)\n", (*chain)[j]); + added.reset((*chain)[j]); + } + + chain->assign(temp_chain.begin(), temp_chain.end()); + } + + return false; +} + +/* \brief Generate wide_symbol_chain from wide_state_chain. */ +static +void generate_symbol_chain(dfa_info &info, vector &chain_tail) { + raw_dfa &rdfa = info.raw; + assert(chain_tail.size() == info.wide_state_chain.size()); + + for (size_t i = 0; i < info.wide_state_chain.size(); i++) { + vector &state_chain = info.wide_state_chain[i]; + vector symbol_chain; + + info.extra[state_chain[0]].wideHead = true; + size_t width = state_chain.size() - 1; + + for (size_t j = 0; j < width; j++) { + dstate_id_t curr_id = state_chain[j]; + dstate_id_t next_id = state_chain[j + 1]; + + // The last state of the chain doesn't belong to a wide state. + info.extra[curr_id].wideState = true; + + // The tail symbol comes from vector chain_tail; + if (j == width - 1) { + symbol_chain.push_back(chain_tail[i]); + } else { + for (symbol_t sym = 0; sym < info.impl_alpha_size; sym++) { + if (rdfa.states[curr_id].next[sym] == next_id) { + symbol_chain.push_back(sym); + break; + } + } + } + } + + info.wide_symbol_chain.push_back(symbol_chain); + } +} + +/* \brief Find potential regions of states to be packed into wide states. */ +static +void find_wide_state(dfa_info &info) { + DfaPrevInfo dinfo(info.raw); + queue work_queue; + + dynamic_bitset<> added(info.raw.states.size()); + for (auto it : dinfo.accepts) { + work_queue.push(it); + added.set(it); + } + + vector chain_tail; + while (!work_queue.empty()) { + dstate_id_t curr_id = work_queue.front(); + work_queue.pop(); + DEBUG_PRINTF("Newly popped state: s%u\n", curr_id); + + for (symbol_t sym = 0; sym < dinfo.impl_alpha_size; sym++) { + for (auto info_it : dinfo.states[curr_id].prev_vec[sym]) { + if (added.test(info_it)) { + DEBUG_PRINTF("(s%u already marked.)\n", info_it); + continue; + } + + vector temp_chain; + // Head is a state failing the test of the chain. + dstate_id_t head = find_chain_candidate(info.raw, dinfo, + info_it, sym, + temp_chain); + + // A candidate chain should contain 8 substates at least. + if (temp_chain.size() < 8) { + DEBUG_PRINTF("(Not enough substates, continue.)\n"); + continue; + } + + bool head_is_new = !added.test(head); + if (head_is_new) { + added.set(head); + work_queue.push(head); + DEBUG_PRINTF("Newly pushed state: s%u\n", head); + } + + reverse(temp_chain.begin(), temp_chain.end()); + temp_chain.push_back(curr_id); + + assert(head > 0 && head == temp_chain.front()); + if (store_chain_longest(info.wide_state_chain, temp_chain, + added, head_is_new)) { + chain_tail.push_back(sym); + } + } + } + } + + generate_symbol_chain(info, chain_tail); +} + bytecode_ptr mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, const CompileContext &cc, bool trust_daddy_states, @@ -952,11 +1473,19 @@ bytecode_ptr mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, bytecode_ptr nfa; if (!using8bit) { + if (cc.grey.allowWideStates && strat.getType() == McClellan + && !is_triggered(raw.kind)) { + find_wide_state(info); + } + u16 total_daddy = 0; bool any_cyclic_near_anchored_state = is_cyclic_near(raw, raw.start_anchored); for (u32 i = 0; i < info.size(); i++) { + if (info.is_widestate(i)) { + continue; + } find_better_daddy(info, i, using8bit, any_cyclic_near_anchored_state, trust_daddy_states, cc.grey); diff --git a/src/nfa/mcclellancompile.h b/src/nfa/mcclellancompile.h index ce63fbbf..73cb9fd7 100644 --- a/src/nfa/mcclellancompile.h +++ b/src/nfa/mcclellancompile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -60,6 +60,7 @@ public: u32 max_allowed_offset_accel() const override; u32 max_stop_char() const override; u32 max_floating_stop_char() const override; + DfaType getType() const override { return McClellan; } private: raw_dfa &rdfa; diff --git a/src/nfa/shengcompile.h b/src/nfa/shengcompile.h index 2fe1e356..d795b362 100644 --- a/src/nfa/shengcompile.h +++ b/src/nfa/shengcompile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2017, Intel Corporation + * Copyright (c) 2016-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -61,6 +61,7 @@ public: u32 max_allowed_offset_accel() const override; u32 max_stop_char() const override; u32 max_floating_stop_char() const override; + DfaType getType() const override { return Sheng; } private: raw_dfa &rdfa; From f13cbd692ee658a6e619e41b115bb2aaee7f5eda Mon Sep 17 00:00:00 2001 From: "Chang, Harry" Date: Thu, 20 Dec 2018 15:28:07 +0800 Subject: [PATCH 06/21] Update PCRE version to 8.42 (8.41 is also compatible) --- CMakeLists.txt | 4 ++-- cmake/pcre.cmake | 12 ++++++------ doc/dev-reference/compilation.rst | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 07000270..0e905db6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -456,7 +456,7 @@ set(PCRE_REQUIRED_MINOR_VERSION 41) set(PCRE_REQUIRED_VERSION ${PCRE_REQUIRED_MAJOR_VERSION}.${PCRE_REQUIRED_MINOR_VERSION}) include (${CMAKE_MODULE_PATH}/pcre.cmake) if (NOT CORRECT_PCRE_VERSION) - message(STATUS "PCRE ${PCRE_REQUIRED_VERSION} not found") + message(STATUS "PCRE ${PCRE_REQUIRED_VERSION} or above not found") endif() # we need static libs for Chimera - too much deep magic for shared libs @@ -508,7 +508,7 @@ set(PCRE_REQUIRED_MINOR_VERSION 41) set(PCRE_REQUIRED_VERSION ${PCRE_REQUIRED_MAJOR_VERSION}.${PCRE_REQUIRED_MINOR_VERSION}) include (${CMAKE_MODULE_PATH}/pcre.cmake) if (NOT CORRECT_PCRE_VERSION) - message(STATUS "PCRE ${PCRE_REQUIRED_VERSION} not found") + message(STATUS "PCRE ${PCRE_REQUIRED_VERSION} or above not found") endif() # we need static libs for Chimera - too much deep magic for shared libs diff --git a/cmake/pcre.cmake b/cmake/pcre.cmake index 2b0d23c7..e0acda5e 100644 --- a/cmake/pcre.cmake +++ b/cmake/pcre.cmake @@ -27,7 +27,7 @@ if (PCRE_BUILD_SOURCE) # first, check version number CHECK_C_SOURCE_COMPILES("#include - #if PCRE_MAJOR != ${PCRE_REQUIRED_MAJOR_VERSION} || PCRE_MINOR != ${PCRE_REQUIRED_MINOR_VERSION} + #if PCRE_MAJOR != ${PCRE_REQUIRED_MAJOR_VERSION} || PCRE_MINOR < ${PCRE_REQUIRED_MINOR_VERSION} #error Incorrect pcre version #endif main() {}" CORRECT_PCRE_VERSION) @@ -35,10 +35,10 @@ if (PCRE_BUILD_SOURCE) if (NOT CORRECT_PCRE_VERSION) unset(CORRECT_PCRE_VERSION CACHE) - message(STATUS "Incorrect version of pcre - version ${PCRE_REQUIRED_VERSION} is required") + message(STATUS "Incorrect version of pcre - version ${PCRE_REQUIRED_VERSION} or above is required") return () else() - message(STATUS "PCRE version ${PCRE_REQUIRED_VERSION} - building from source.") + message(STATUS "PCRE version ${PCRE_REQUIRED_VERSION} or above - building from source.") endif() # PCRE compile options @@ -52,12 +52,12 @@ if (PCRE_BUILD_SOURCE) else () # pkgconf should save us find_package(PkgConfig) - pkg_check_modules(PCRE libpcre=${PCRE_REQUIRED_VERSION}) + pkg_check_modules(PCRE libpcre>=${PCRE_REQUIRED_VERSION}) if (PCRE_FOUND) set(CORRECT_PCRE_VERSION TRUE) - message(STATUS "PCRE version ${PCRE_REQUIRED_VERSION}") + message(STATUS "PCRE version ${PCRE_REQUIRED_VERSION} or above") else () - message(STATUS "PCRE version ${PCRE_REQUIRED_VERSION} not found") + message(STATUS "PCRE version ${PCRE_REQUIRED_VERSION} or above not found") return () endif () endif (PCRE_BUILD_SOURCE) diff --git a/doc/dev-reference/compilation.rst b/doc/dev-reference/compilation.rst index 7a7f37ec..214f4abc 100644 --- a/doc/dev-reference/compilation.rst +++ b/doc/dev-reference/compilation.rst @@ -64,7 +64,7 @@ libpcre are supported. The use of unsupported constructs will result in compilation errors. The version of PCRE used to validate Hyperscan's interpretation of this syntax -is 8.41. +is 8.41 or above. ==================== Supported Constructs From 922fe2ab20bf2d1e1b5ef6a21340a4658317a1bb Mon Sep 17 00:00:00 2001 From: Guangqing Chen Date: Tue, 4 Dec 2018 05:25:52 +0800 Subject: [PATCH 07/21] Rose: optimize switch-case with Labels-as-Values --- src/rose/match.c | 2 +- src/rose/program_runtime.c | 2721 +++++++++++++++++++++++++++++++++++- src/rose/program_runtime.h | 2652 +---------------------------------- src/rose/stream_long_lit.h | 1 + 4 files changed, 2722 insertions(+), 2654 deletions(-) diff --git a/src/rose/match.c b/src/rose/match.c index 97e93c93..8ad58b15 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -238,7 +238,7 @@ hwlmcb_rv_t roseProcessMatchInline(const struct RoseEngine *t, assert(id && id < t->size); // id is an offset into bytecode const u64a som = 0; const u8 flags = 0; - return roseRunProgram_i(t, scratch, id, som, end, flags); + return roseRunProgram(t, scratch, id, som, end, flags); } static rose_inline diff --git a/src/rose/program_runtime.c b/src/rose/program_runtime.c index 2f2a6aa3..1c6133ba 100644 --- a/src/rose/program_runtime.c +++ b/src/rose/program_runtime.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,6 +33,1464 @@ #include "program_runtime.h" +#include "catchup.h" +#include "counting_miracle.h" +#include "infix.h" +#include "match.h" +#include "miracle.h" +#include "report.h" +#include "rose_common.h" +#include "rose_internal.h" +#include "rose_program.h" +#include "rose_types.h" +#include "validate_mask.h" +#include "validate_shufti.h" +#include "runtime.h" +#include "util/compare.h" +#include "util/copybytes.h" +#include "util/fatbit.h" +#include "util/multibit.h" + +/* Inline implementation follows. */ + +static rose_inline +void rosePushDelayedMatch(const struct RoseEngine *t, + struct hs_scratch *scratch, u32 delay, + u32 delay_index, u64a offset) { + assert(delay); + + const u32 src_slot_index = delay; + u32 slot_index = (src_slot_index + offset) & DELAY_MASK; + + struct RoseContext *tctxt = &scratch->tctxt; + if (offset + src_slot_index <= tctxt->delayLastEndOffset) { + DEBUG_PRINTF("skip too late\n"); + return; + } + + const u32 delay_count = t->delay_count; + struct fatbit **delaySlots = getDelaySlots(scratch); + struct fatbit *slot = delaySlots[slot_index]; + + DEBUG_PRINTF("pushing tab %u into slot %u\n", delay_index, slot_index); + if (!(tctxt->filledDelayedSlots & (1U << slot_index))) { + tctxt->filledDelayedSlots |= 1U << slot_index; + fatbit_clear(slot); + } + + fatbit_set(slot, delay_count, delay_index); +} + +static rose_inline +void recordAnchoredLiteralMatch(const struct RoseEngine *t, + struct hs_scratch *scratch, u32 anch_id, + u64a end) { + assert(end); + + if (end <= t->floatingMinLiteralMatchOffset) { + return; + } + + struct fatbit **anchoredLiteralRows = getAnchoredLiteralLog(scratch); + + DEBUG_PRINTF("record %u (of %u) @ %llu\n", anch_id, t->anchored_count, end); + + if (!bf64_set(&scratch->al_log_sum, end - 1)) { + // first time, clear row + DEBUG_PRINTF("clearing %llu/%u\n", end - 1, t->anchored_count); + fatbit_clear(anchoredLiteralRows[end - 1]); + } + + assert(anch_id < t->anchored_count); + fatbit_set(anchoredLiteralRows[end - 1], t->anchored_count, anch_id); +} + +static rose_inline +char roseLeftfixCheckMiracles(const struct RoseEngine *t, + const struct LeftNfaInfo *left, + struct core_info *ci, struct mq *q, u64a end, + const char is_infix) { + if (!is_infix && left->transient) { + // Miracles won't help us with transient leftfix engines; they only + // scan for a limited time anyway. + return 1; + } + + if (!left->stopTable) { + return 1; + } + + DEBUG_PRINTF("looking for miracle on queue %u\n", q->nfa->queueIndex); + + const s64a begin_loc = q_cur_loc(q); + const s64a end_loc = end - ci->buf_offset; + + s64a miracle_loc; + if (roseMiracleOccurs(t, left, ci, begin_loc, end_loc, &miracle_loc)) { + goto found_miracle; + } + + if (roseCountingMiracleOccurs(t, left, ci, begin_loc, end_loc, + &miracle_loc)) { + goto found_miracle; + } + + return 1; + +found_miracle: + DEBUG_PRINTF("miracle at %lld\n", miracle_loc); + assert(miracle_loc >= begin_loc); + + // If we're a prefix, then a miracle effectively results in us needing to + // re-init our state and start fresh. + if (!is_infix) { + if (miracle_loc != begin_loc) { + DEBUG_PRINTF("re-init prefix state\n"); + q->cur = q->end = 0; + pushQueueAt(q, 0, MQE_START, miracle_loc); + pushQueueAt(q, 1, MQE_TOP, miracle_loc); + nfaQueueInitState(q->nfa, q); + } + return 1; + } + + // Otherwise, we're an infix. Remove tops before the miracle from the queue + // and re-init at that location. + + q_skip_forward_to(q, miracle_loc); + + if (q_last_type(q) == MQE_START) { + DEBUG_PRINTF("miracle caused infix to die\n"); + return 0; + } + + DEBUG_PRINTF("re-init infix state\n"); + assert(q->items[q->cur].type == MQE_START); + q->items[q->cur].location = miracle_loc; + nfaQueueInitState(q->nfa, q); + + return 1; +} + +static rose_inline +hwlmcb_rv_t roseTriggerSuffix(const struct RoseEngine *t, + struct hs_scratch *scratch, u32 qi, u32 top, + u64a som, u64a end) { + DEBUG_PRINTF("suffix qi=%u, top event=%u\n", qi, top); + + struct core_info *ci = &scratch->core_info; + u8 *aa = getActiveLeafArray(t, ci->state); + const u32 aaCount = t->activeArrayCount; + const u32 qCount = t->queueCount; + struct mq *q = &scratch->queues[qi]; + const struct NfaInfo *info = getNfaInfoByQueue(t, qi); + const struct NFA *nfa = getNfaByInfo(t, info); + + s64a loc = (s64a)end - ci->buf_offset; + assert(loc <= (s64a)ci->len && loc >= -(s64a)ci->hlen); + + if (!mmbit_set(aa, aaCount, qi)) { + initQueue(q, qi, t, scratch); + nfaQueueInitState(nfa, q); + pushQueueAt(q, 0, MQE_START, loc); + fatbit_set(scratch->aqa, qCount, qi); + } else if (info->no_retrigger) { + DEBUG_PRINTF("yawn\n"); + /* nfa only needs one top; we can go home now */ + return HWLM_CONTINUE_MATCHING; + } else if (!fatbit_set(scratch->aqa, qCount, qi)) { + initQueue(q, qi, t, scratch); + loadStreamState(nfa, q, 0); + pushQueueAt(q, 0, MQE_START, 0); + } else if (isQueueFull(q)) { + DEBUG_PRINTF("queue %u full -> catching up nfas\n", qi); + if (info->eod) { + /* can catch up suffix independently no pq */ + q->context = NULL; + pushQueueNoMerge(q, MQE_END, loc); + nfaQueueExecRose(q->nfa, q, MO_INVALID_IDX); + q->cur = q->end = 0; + pushQueueAt(q, 0, MQE_START, loc); + } else if (ensureQueueFlushed(t, scratch, qi, loc) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + + assert(top == MQE_TOP || (top >= MQE_TOP_FIRST && top < MQE_INVALID)); + pushQueueSom(q, top, loc, som); + + if (q_cur_loc(q) == (s64a)ci->len && !info->eod) { + /* we may not run the nfa; need to ensure state is fine */ + DEBUG_PRINTF("empty run\n"); + pushQueueNoMerge(q, MQE_END, loc); + char alive = nfaQueueExec(nfa, q, loc); + if (alive) { + q->cur = q->end = 0; + pushQueueAt(q, 0, MQE_START, loc); + } else { + mmbit_unset(aa, aaCount, qi); + fatbit_unset(scratch->aqa, qCount, qi); + } + } + + return HWLM_CONTINUE_MATCHING; +} + +static really_inline +char roseTestLeftfix(const struct RoseEngine *t, struct hs_scratch *scratch, + u32 qi, u32 leftfixLag, ReportID leftfixReport, u64a end, + const char is_infix) { + struct core_info *ci = &scratch->core_info; + + u32 ri = queueToLeftIndex(t, qi); + const struct LeftNfaInfo *left = getLeftTable(t) + ri; + + DEBUG_PRINTF("testing %s %s %u/%u with lag %u (maxLag=%u)\n", + (left->transient ? "transient" : "active"), + (is_infix ? "infix" : "prefix"), + ri, qi, leftfixLag, left->maxLag); + + assert(leftfixLag <= left->maxLag); + assert(left->infix == is_infix); + assert(!is_infix || !left->transient); // Only prefixes can be transient. + + struct mq *q = scratch->queues + qi; + char *state = scratch->core_info.state; + u8 *activeLeftArray = getActiveLeftArray(t, state); + u32 qCount = t->queueCount; + u32 arCount = t->activeLeftCount; + + if (!mmbit_isset(activeLeftArray, arCount, ri)) { + DEBUG_PRINTF("engine is dead nothing to see here\n"); + return 0; + } + + if (unlikely(end < leftfixLag)) { + assert(0); /* lag is the literal length */ + return 0; + } + + if (nfaSupportsZombie(getNfaByQueue(t, qi)) && ci->buf_offset + && !fatbit_isset(scratch->aqa, qCount, qi) + && isZombie(t, state, left)) { + DEBUG_PRINTF("zombie\n"); + return 1; + } + + if (!fatbit_set(scratch->aqa, qCount, qi)) { + DEBUG_PRINTF("initing q %u\n", qi); + initRoseQueue(t, qi, left, scratch); + if (ci->buf_offset) { // there have been writes before us! + s32 sp; + if (!is_infix && left->transient) { + sp = -(s32)ci->hlen; + } else { + sp = -(s32)loadRoseDelay(t, state, left); + } + + /* transient nfas are always started fresh -> state not maintained + * at stream boundary */ + + pushQueueAt(q, 0, MQE_START, sp); + if (is_infix || (ci->buf_offset + sp > 0 && !left->transient)) { + loadStreamState(q->nfa, q, sp); + } else { + pushQueueAt(q, 1, MQE_TOP, sp); + nfaQueueInitState(q->nfa, q); + } + } else { // first write ever + pushQueueAt(q, 0, MQE_START, 0); + pushQueueAt(q, 1, MQE_TOP, 0); + nfaQueueInitState(q->nfa, q); + } + } + + s64a loc = (s64a)end - ci->buf_offset - leftfixLag; + assert(loc >= q_cur_loc(q) || left->eager); + assert(leftfixReport != MO_INVALID_IDX); + + if (!is_infix && left->transient) { + s64a start_loc = loc - left->transient; + if (q_cur_loc(q) < start_loc) { + q->cur = q->end = 0; + pushQueueAt(q, 0, MQE_START, start_loc); + pushQueueAt(q, 1, MQE_TOP, start_loc); + nfaQueueInitState(q->nfa, q); + } + } + + if (q_cur_loc(q) < loc || q_last_type(q) != MQE_START) { + if (is_infix) { + if (infixTooOld(q, loc)) { + DEBUG_PRINTF("infix %u died of old age\n", ri); + goto nfa_dead; + } + + reduceInfixQueue(q, loc, left->maxQueueLen, q->nfa->maxWidth); + } + + if (!roseLeftfixCheckMiracles(t, left, ci, q, end, is_infix)) { + DEBUG_PRINTF("leftfix %u died due to miracle\n", ri); + goto nfa_dead; + } + +#ifdef DEBUG + debugQueue(q); +#endif + + pushQueueNoMerge(q, MQE_END, loc); + + char rv = nfaQueueExecRose(q->nfa, q, leftfixReport); + if (!rv) { /* nfa is dead */ + DEBUG_PRINTF("leftfix %u died while trying to catch up\n", ri); + goto nfa_dead; + } + + // Queue must have next start loc before we call nfaInAcceptState. + q->cur = q->end = 0; + pushQueueAt(q, 0, MQE_START, loc); + + DEBUG_PRINTF("checking for report %u\n", leftfixReport); + DEBUG_PRINTF("leftfix done %hhd\n", (signed char)rv); + return rv == MO_MATCHES_PENDING; + } else if (q_cur_loc(q) > loc) { + /* an eager leftfix may have already progressed past loc if there is no + * match at loc. */ + assert(left->eager); + return 0; + } else { + assert(q_cur_loc(q) == loc); + DEBUG_PRINTF("checking for report %u\n", leftfixReport); + char rv = nfaInAcceptState(q->nfa, leftfixReport, q); + DEBUG_PRINTF("leftfix done %hhd\n", (signed char)rv); + return rv; + } + +nfa_dead: + mmbit_unset(activeLeftArray, arCount, ri); + scratch->tctxt.groups &= left->squash_mask; + return 0; +} + +static rose_inline +char roseTestPrefix(const struct RoseEngine *t, struct hs_scratch *scratch, + u32 qi, u32 leftfixLag, ReportID leftfixReport, u64a end) { + return roseTestLeftfix(t, scratch, qi, leftfixLag, leftfixReport, end, 0); +} + +static rose_inline +char roseTestInfix(const struct RoseEngine *t, struct hs_scratch *scratch, + u32 qi, u32 leftfixLag, ReportID leftfixReport, u64a end) { + return roseTestLeftfix(t, scratch, qi, leftfixLag, leftfixReport, end, 1); +} + +static rose_inline +void roseTriggerInfix(const struct RoseEngine *t, struct hs_scratch *scratch, + u64a start, u64a end, u32 qi, u32 topEvent, u8 cancel) { + struct core_info *ci = &scratch->core_info; + s64a loc = (s64a)end - ci->buf_offset; + + u32 ri = queueToLeftIndex(t, qi); + assert(topEvent < MQE_INVALID); + + const struct LeftNfaInfo *left = getLeftInfoByQueue(t, qi); + assert(!left->transient); + + DEBUG_PRINTF("rose %u (qi=%u) event %u\n", ri, qi, topEvent); + + struct mq *q = scratch->queues + qi; + const struct NfaInfo *info = getNfaInfoByQueue(t, qi); + + char *state = ci->state; + u8 *activeLeftArray = getActiveLeftArray(t, state); + const u32 arCount = t->activeLeftCount; + char alive = mmbit_set(activeLeftArray, arCount, ri); + + if (alive && info->no_retrigger) { + DEBUG_PRINTF("yawn\n"); + return; + } + + struct fatbit *aqa = scratch->aqa; + const u32 qCount = t->queueCount; + + if (alive && nfaSupportsZombie(getNfaByInfo(t, info)) && ci->buf_offset && + !fatbit_isset(aqa, qCount, qi) && isZombie(t, state, left)) { + DEBUG_PRINTF("yawn - zombie\n"); + return; + } + + if (cancel) { + DEBUG_PRINTF("dominating top: (re)init\n"); + fatbit_set(aqa, qCount, qi); + initRoseQueue(t, qi, left, scratch); + pushQueueAt(q, 0, MQE_START, loc); + nfaQueueInitState(q->nfa, q); + } else if (!fatbit_set(aqa, qCount, qi)) { + DEBUG_PRINTF("initing %u\n", qi); + initRoseQueue(t, qi, left, scratch); + if (alive) { + s32 sp = -(s32)loadRoseDelay(t, state, left); + pushQueueAt(q, 0, MQE_START, sp); + loadStreamState(q->nfa, q, sp); + } else { + pushQueueAt(q, 0, MQE_START, loc); + nfaQueueInitState(q->nfa, q); + } + } else if (!alive) { + q->cur = q->end = 0; + pushQueueAt(q, 0, MQE_START, loc); + nfaQueueInitState(q->nfa, q); + } else if (isQueueFull(q)) { + reduceInfixQueue(q, loc, left->maxQueueLen, q->nfa->maxWidth); + + if (isQueueFull(q)) { + /* still full - reduceInfixQueue did nothing */ + DEBUG_PRINTF("queue %u full (%u items) -> catching up nfa\n", qi, + q->end - q->cur); + pushQueueNoMerge(q, MQE_END, loc); + nfaQueueExecRose(q->nfa, q, MO_INVALID_IDX); + + q->cur = q->end = 0; + pushQueueAt(q, 0, MQE_START, loc); + } + } + + pushQueueSom(q, topEvent, loc, start); +} + +static rose_inline +hwlmcb_rv_t roseReport(const struct RoseEngine *t, struct hs_scratch *scratch, + u64a end, ReportID onmatch, s32 offset_adjust, + u32 ekey) { + DEBUG_PRINTF("firing callback onmatch=%u, end=%llu\n", onmatch, end); + updateLastMatchOffset(&scratch->tctxt, end); + + int cb_rv = roseDeliverReport(end, onmatch, offset_adjust, scratch, ekey); + if (cb_rv == MO_HALT_MATCHING) { + DEBUG_PRINTF("termination requested\n"); + return HWLM_TERMINATE_MATCHING; + } + + if (ekey == INVALID_EKEY || cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { + return HWLM_CONTINUE_MATCHING; + } + + return roseHaltIfExhausted(t, scratch); +} + +/* catches up engines enough to ensure any earlier mpv triggers are enqueued + * and then adds the trigger to the mpv queue. */ +static rose_inline +hwlmcb_rv_t roseCatchUpAndHandleChainMatch(const struct RoseEngine *t, + struct hs_scratch *scratch, + u32 event, u64a top_squash_distance, + u64a end, const char in_catchup) { + if (!in_catchup && + roseCatchUpMpvFeeders(t, scratch, end) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + return roseHandleChainMatch(t, scratch, event, top_squash_distance, end, + in_catchup); +} + +static rose_inline +void roseHandleSom(struct hs_scratch *scratch, const struct som_operation *sr, + u64a end) { + DEBUG_PRINTF("end=%llu, minMatchOffset=%llu\n", end, + scratch->tctxt.minMatchOffset); + + updateLastMatchOffset(&scratch->tctxt, end); + handleSomInternal(scratch, sr, end); +} + +static rose_inline +hwlmcb_rv_t roseReportSom(const struct RoseEngine *t, + struct hs_scratch *scratch, u64a start, u64a end, + ReportID onmatch, s32 offset_adjust, u32 ekey) { + DEBUG_PRINTF("firing som callback onmatch=%u, start=%llu, end=%llu\n", + onmatch, start, end); + updateLastMatchOffset(&scratch->tctxt, end); + + int cb_rv = roseDeliverSomReport(start, end, onmatch, offset_adjust, + scratch, ekey); + if (cb_rv == MO_HALT_MATCHING) { + DEBUG_PRINTF("termination requested\n"); + return HWLM_TERMINATE_MATCHING; + } + + if (ekey == INVALID_EKEY || cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { + return HWLM_CONTINUE_MATCHING; + } + + return roseHaltIfExhausted(t, scratch); +} + +static rose_inline +void roseHandleSomSom(struct hs_scratch *scratch, + const struct som_operation *sr, u64a start, u64a end) { + DEBUG_PRINTF("start=%llu, end=%llu, minMatchOffset=%llu\n", start, end, + scratch->tctxt.minMatchOffset); + + updateLastMatchOffset(&scratch->tctxt, end); + setSomFromSomAware(scratch, sr, start, end); +} + +static rose_inline +hwlmcb_rv_t roseSetExhaust(const struct RoseEngine *t, + struct hs_scratch *scratch, u32 ekey) { + assert(scratch); + assert(scratch->magic == SCRATCH_MAGIC); + + struct core_info *ci = &scratch->core_info; + + assert(!can_stop_matching(scratch)); + assert(!isExhausted(ci->rose, ci->exhaustionVector, ekey)); + + markAsMatched(ci->rose, ci->exhaustionVector, ekey); + + return roseHaltIfExhausted(t, scratch); +} + +static really_inline +int reachHasBit(const u8 *reach, u8 c) { + return !!(reach[c / 8U] & (u8)1U << (c % 8U)); +} + +/* + * Generate a 8-byte valid_mask with #high bytes 0 from the highest side + * and #low bytes 0 from the lowest side + * and (8 - high - low) bytes '0xff' in the middle. + */ +static rose_inline +u64a generateValidMask(const s32 high, const s32 low) { + assert(high + low < 8); + DEBUG_PRINTF("high %d low %d\n", high, low); + const u64a ones = ~0ull; + return (ones << ((high + low) * 8)) >> (high * 8); +} + +/* + * Do the single-byte check if only one lookaround entry exists + * and it's a single mask. + * Return success if the byte is in the future or before history + * (offset is greater than (history) buffer length). + */ +static rose_inline +int roseCheckByte(const struct core_info *ci, u8 and_mask, u8 cmp_mask, + u8 negation, s32 checkOffset, u64a end) { + DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end, + ci->buf_offset, ci->buf_offset + ci->len); + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + + const s64a base_offset = end - ci->buf_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("checkOffset=%d offset=%lld\n", checkOffset, offset); + u8 c; + if (offset >= 0) { + if (offset >= (s64a)ci->len) { + DEBUG_PRINTF("in the future\n"); + return 1; + } else { + assert(offset < (s64a)ci->len); + DEBUG_PRINTF("check byte in buffer\n"); + c = ci->buf[offset]; + } + } else { + if (offset >= -(s64a) ci->hlen) { + DEBUG_PRINTF("check byte in history\n"); + c = ci->hbuf[ci->hlen + offset]; + } else { + DEBUG_PRINTF("before history and return\n"); + return 1; + } + } + + if (((and_mask & c) != cmp_mask) ^ negation) { + DEBUG_PRINTF("char 0x%02x at offset %lld failed byte check\n", + c, offset); + return 0; + } + + DEBUG_PRINTF("real offset=%lld char=%02x\n", offset, c); + DEBUG_PRINTF("OK :)\n"); + return 1; +} + +static rose_inline +int roseCheckMask(const struct core_info *ci, u64a and_mask, u64a cmp_mask, + u64a neg_mask, s32 checkOffset, u64a end) { + const s64a base_offset = (s64a)end - ci->buf_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("rel offset %lld\n",base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + + u64a data = 0; + u64a valid_data_mask = ~0ULL; // mask for validate check. + //A 0xff byte means that this byte is in the buffer. + s32 shift_l = 0; // size of bytes in the future. + s32 shift_r = 0; // size of bytes before the history. + s32 h_len = 0; // size of bytes in the history buffer. + s32 c_len = 8; // size of bytes in the current buffer. + if (offset < 0) { + // in or before history buffer. + if (offset + 8 <= -(s64a)ci->hlen) { + DEBUG_PRINTF("before history and return\n"); + return 1; + } + const u8 *h_start = ci->hbuf; // start pointer in history buffer. + if (offset < -(s64a)ci->hlen) { + // some bytes are before history. + shift_r = -(offset + (s64a)ci->hlen); + DEBUG_PRINTF("shift_r %d", shift_r); + } else { + h_start += ci->hlen + offset; + } + if (offset + 7 < 0) { + DEBUG_PRINTF("all in history buffer\n"); + data = partial_load_u64a(h_start, 8 - shift_r); + } else { + // history part + c_len = offset + 8; + h_len = -offset - shift_r; + DEBUG_PRINTF("%d bytes in history\n", h_len); + s64a data_h = 0; + data_h = partial_load_u64a(h_start, h_len); + // current part + if (c_len > (s64a)ci->len) { + shift_l = c_len - ci->len; + c_len = ci->len; + } + data = partial_load_u64a(ci->buf, c_len); + data <<= h_len << 3; + data |= data_h; + } + if (shift_r) { + data <<= shift_r << 3; + } + } else { + // current buffer. + if (offset + c_len > (s64a)ci->len) { + if (offset >= (s64a)ci->len) { + DEBUG_PRINTF("all in the future\n"); + return 1; + } + // some bytes in the future. + shift_l = offset + c_len - ci->len; + c_len = ci->len - offset; + data = partial_load_u64a(ci->buf + offset, c_len); + } else { + data = unaligned_load_u64a(ci->buf + offset); + } + } + + if (shift_l || shift_r) { + valid_data_mask = generateValidMask(shift_l, shift_r); + } + DEBUG_PRINTF("valid_data_mask %llx\n", valid_data_mask); + + if (validateMask(data, valid_data_mask, + and_mask, cmp_mask, neg_mask)) { + DEBUG_PRINTF("check mask successfully\n"); + return 1; + } else { + return 0; + } +} + +static rose_inline +int roseCheckMask32(const struct core_info *ci, const u8 *and_mask, + const u8 *cmp_mask, const u32 neg_mask, + s32 checkOffset, u64a end) { + const s64a base_offset = (s64a)end - ci->buf_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + + m256 data = zeroes256(); // consists of the following four parts. + s32 c_shift = 0; // blank bytes after current. + s32 h_shift = 0; // blank bytes before history. + s32 h_len = 32; // number of bytes from history buffer. + s32 c_len = 0; // number of bytes from current buffer. + /* h_shift + h_len + c_len + c_shift = 32 need to be hold.*/ + + if (offset < 0) { + s32 h_offset = 0; // the start offset in history buffer. + if (offset < -(s64a)ci->hlen) { + if (offset + 32 <= -(s64a)ci->hlen) { + DEBUG_PRINTF("all before history\n"); + return 1; + } + h_shift = -(offset + (s64a)ci->hlen); + h_len = 32 - h_shift; + } else { + h_offset = ci->hlen + offset; + } + if (offset + 32 > 0) { + // part in current buffer. + c_len = offset + 32; + h_len = -(offset + h_shift); + if (c_len > (s64a)ci->len) { + // out of current buffer. + c_shift = c_len - ci->len; + c_len = ci->len; + } + copy_upto_32_bytes((u8 *)&data - offset, ci->buf, c_len); + } + assert(h_shift + h_len + c_len + c_shift == 32); + copy_upto_32_bytes((u8 *)&data + h_shift, ci->hbuf + h_offset, h_len); + } else { + if (offset + 32 > (s64a)ci->len) { + if (offset >= (s64a)ci->len) { + DEBUG_PRINTF("all in the future.\n"); + return 1; + } + c_len = ci->len - offset; + c_shift = 32 - c_len; + copy_upto_32_bytes((u8 *)&data, ci->buf + offset, c_len); + } else { + data = loadu256(ci->buf + offset); + } + } + DEBUG_PRINTF("h_shift %d c_shift %d\n", h_shift, c_shift); + DEBUG_PRINTF("h_len %d c_len %d\n", h_len, c_len); + // we use valid_data_mask to blind bytes before history/in the future. + u32 valid_data_mask; + valid_data_mask = (~0u) << (h_shift + c_shift) >> (c_shift); + + m256 and_mask_m256 = loadu256(and_mask); + m256 cmp_mask_m256 = loadu256(cmp_mask); + if (validateMask32(data, valid_data_mask, and_mask_m256, + cmp_mask_m256, neg_mask)) { + DEBUG_PRINTF("Mask32 passed\n"); + return 1; + } + return 0; +} + +// get 128/256 bits data from history and current buffer. +// return data and valid_data_mask. +static rose_inline +u32 getBufferDataComplex(const struct core_info *ci, const s64a loc, + u8 *data, const u32 data_len) { + assert(data_len == 16 || data_len == 32); + s32 c_shift = 0; // blank bytes after current. + s32 h_shift = 0; // blank bytes before history. + s32 h_len = data_len; // number of bytes from history buffer. + s32 c_len = 0; // number of bytes from current buffer. + if (loc < 0) { + s32 h_offset = 0; // the start offset in history buffer. + if (loc < -(s64a)ci->hlen) { + if (loc + data_len <= -(s64a)ci->hlen) { + DEBUG_PRINTF("all before history\n"); + return 0; + } + h_shift = -(loc + (s64a)ci->hlen); + h_len = data_len - h_shift; + } else { + h_offset = ci->hlen + loc; + } + if (loc + data_len > 0) { + // part in current buffer. + c_len = loc + data_len; + h_len = -(loc + h_shift); + if (c_len > (s64a)ci->len) { + // out of current buffer. + c_shift = c_len - ci->len; + c_len = ci->len; + } + copy_upto_32_bytes(data - loc, ci->buf, c_len); + } + assert(h_shift + h_len + c_len + c_shift == (s32)data_len); + copy_upto_32_bytes(data + h_shift, ci->hbuf + h_offset, h_len); + } else { + if (loc + data_len > (s64a)ci->len) { + if (loc >= (s64a)ci->len) { + DEBUG_PRINTF("all in the future.\n"); + return 0; + } + c_len = ci->len - loc; + c_shift = data_len - c_len; + copy_upto_32_bytes(data, ci->buf + loc, c_len); + } else { + if (data_len == 16) { + storeu128(data, loadu128(ci->buf + loc)); + return 0xffff; + } else { + storeu256(data, loadu256(ci->buf + loc)); + return 0xffffffff; + } + } + } + DEBUG_PRINTF("h_shift %d c_shift %d\n", h_shift, c_shift); + DEBUG_PRINTF("h_len %d c_len %d\n", h_len, c_len); + + if (data_len == 16) { + return (u16)(0xffff << (h_shift + c_shift)) >> c_shift; + } else { + return (~0u) << (h_shift + c_shift) >> c_shift; + } +} + +static rose_inline +m128 getData128(const struct core_info *ci, s64a offset, u32 *valid_data_mask) { + if (offset > 0 && offset + sizeof(m128) <= ci->len) { + *valid_data_mask = 0xffff; + return loadu128(ci->buf + offset); + } + ALIGN_DIRECTIVE u8 data[sizeof(m128)]; + *valid_data_mask = getBufferDataComplex(ci, offset, data, 16); + return *(m128 *)data; +} + +static rose_inline +m256 getData256(const struct core_info *ci, s64a offset, u32 *valid_data_mask) { + if (offset > 0 && offset + sizeof(m256) <= ci->len) { + *valid_data_mask = ~0u; + return loadu256(ci->buf + offset); + } + ALIGN_AVX_DIRECTIVE u8 data[sizeof(m256)]; + *valid_data_mask = getBufferDataComplex(ci, offset, data, 32); + return *(m256 *)data; +} + +static rose_inline +int roseCheckShufti16x8(const struct core_info *ci, const u8 *nib_mask, + const u8 *bucket_select_mask, u32 neg_mask, + s32 checkOffset, u64a end) { + const s64a base_offset = (s64a)end - ci->buf_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + + u32 valid_data_mask = 0; + m128 data = getData128(ci, offset, &valid_data_mask); + if (unlikely(!valid_data_mask)) { + return 1; + } + + m256 nib_mask_m256 = loadu256(nib_mask); + m128 bucket_select_mask_m128 = loadu128(bucket_select_mask); + if (validateShuftiMask16x8(data, nib_mask_m256, + bucket_select_mask_m128, + neg_mask, valid_data_mask)) { + DEBUG_PRINTF("check shufti 16x8 successfully\n"); + return 1; + } else { + return 0; + } +} + +static rose_inline +int roseCheckShufti16x16(const struct core_info *ci, const u8 *hi_mask, + const u8 *lo_mask, const u8 *bucket_select_mask, + u32 neg_mask, s32 checkOffset, u64a end) { + const s64a base_offset = (s64a)end - ci->buf_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + + u32 valid_data_mask = 0; + m128 data = getData128(ci, offset, &valid_data_mask); + if (unlikely(!valid_data_mask)) { + return 1; + } + + m256 data_m256 = set2x128(data); + m256 hi_mask_m256 = loadu256(hi_mask); + m256 lo_mask_m256 = loadu256(lo_mask); + m256 bucket_select_mask_m256 = loadu256(bucket_select_mask); + if (validateShuftiMask16x16(data_m256, hi_mask_m256, lo_mask_m256, + bucket_select_mask_m256, + neg_mask, valid_data_mask)) { + DEBUG_PRINTF("check shufti 16x16 successfully\n"); + return 1; + } else { + return 0; + } +} + +static rose_inline +int roseCheckShufti32x8(const struct core_info *ci, const u8 *hi_mask, + const u8 *lo_mask, const u8 *bucket_select_mask, + u32 neg_mask, s32 checkOffset, u64a end) { + const s64a base_offset = (s64a)end - ci->buf_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + + u32 valid_data_mask = 0; + m256 data = getData256(ci, offset, &valid_data_mask); + if (unlikely(!valid_data_mask)) { + return 1; + } + + m128 hi_mask_m128 = loadu128(hi_mask); + m128 lo_mask_m128 = loadu128(lo_mask); + m256 hi_mask_m256 = set2x128(hi_mask_m128); + m256 lo_mask_m256 = set2x128(lo_mask_m128); + m256 bucket_select_mask_m256 = loadu256(bucket_select_mask); + if (validateShuftiMask32x8(data, hi_mask_m256, lo_mask_m256, + bucket_select_mask_m256, + neg_mask, valid_data_mask)) { + DEBUG_PRINTF("check shufti 32x8 successfully\n"); + return 1; + } else { + return 0; + } +} + +static rose_inline +int roseCheckShufti32x16(const struct core_info *ci, const u8 *hi_mask, + const u8 *lo_mask, const u8 *bucket_select_mask_hi, + const u8 *bucket_select_mask_lo, u32 neg_mask, + s32 checkOffset, u64a end) { + const s64a base_offset = (s64a)end - ci->buf_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + + u32 valid_data_mask = 0; + m256 data = getData256(ci, offset, &valid_data_mask); + if (unlikely(!valid_data_mask)) { + return 1; + } + + m256 hi_mask_1 = loadu2x128(hi_mask); + m256 hi_mask_2 = loadu2x128(hi_mask + 16); + m256 lo_mask_1 = loadu2x128(lo_mask); + m256 lo_mask_2 = loadu2x128(lo_mask + 16); + + m256 bucket_mask_hi = loadu256(bucket_select_mask_hi); + m256 bucket_mask_lo = loadu256(bucket_select_mask_lo); + if (validateShuftiMask32x16(data, hi_mask_1, hi_mask_2, + lo_mask_1, lo_mask_2, bucket_mask_hi, + bucket_mask_lo, neg_mask, valid_data_mask)) { + DEBUG_PRINTF("check shufti 32x16 successfully\n"); + return 1; + } else { + return 0; + } +} + +static rose_inline +int roseCheckSingleLookaround(const struct RoseEngine *t, + const struct hs_scratch *scratch, + s8 checkOffset, u32 lookaroundReachIndex, + u64a end) { + assert(lookaroundReachIndex != MO_INVALID_IDX); + const struct core_info *ci = &scratch->core_info; + DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end, + ci->buf_offset, ci->buf_offset + ci->len); + + const s64a base_offset = end - ci->buf_offset; + const s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("base_offset=%lld\n", base_offset); + DEBUG_PRINTF("checkOffset=%d offset=%lld\n", checkOffset, offset); + + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + + const u8 *reach = getByOffset(t, lookaroundReachIndex); + + u8 c; + if (offset >= 0 && offset < (s64a)ci->len) { + c = ci->buf[offset]; + } else if (offset < 0 && offset >= -(s64a)ci->hlen) { + c = ci->hbuf[ci->hlen + offset]; + } else { + return 1; + } + + if (!reachHasBit(reach, c)) { + DEBUG_PRINTF("char 0x%02x failed reach check\n", c); + return 0; + } + + DEBUG_PRINTF("OK :)\n"); + return 1; +} + +/** + * \brief Scan around a literal, checking that that "lookaround" reach masks + * are satisfied. + */ +static rose_inline +int roseCheckLookaround(const struct RoseEngine *t, + const struct hs_scratch *scratch, + u32 lookaroundLookIndex, u32 lookaroundReachIndex, + u32 lookaroundCount, u64a end) { + assert(lookaroundLookIndex != MO_INVALID_IDX); + assert(lookaroundReachIndex != MO_INVALID_IDX); + assert(lookaroundCount > 0); + + const struct core_info *ci = &scratch->core_info; + DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end, + ci->buf_offset, ci->buf_offset + ci->len); + + const s8 *look = getByOffset(t, lookaroundLookIndex); + const s8 *look_end = look + lookaroundCount; + assert(look < look_end); + + const u8 *reach = getByOffset(t, lookaroundReachIndex); + + // The following code assumes that the lookaround structures are ordered by + // increasing offset. + + const s64a base_offset = end - ci->buf_offset; + DEBUG_PRINTF("base_offset=%lld\n", base_offset); + DEBUG_PRINTF("first look has offset %d\n", *look); + + // If our first check tells us we need to look at an offset before the + // start of the stream, this role cannot match. + if (unlikely(*look < 0 && (u64a)(0 - *look) > end)) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + + // Skip over offsets that are before the history buffer. + do { + s64a offset = base_offset + *look; + if (offset >= -(s64a)ci->hlen) { + goto in_history; + } + DEBUG_PRINTF("look=%d before history\n", *look); + look++; + reach += REACH_BITVECTOR_LEN; + } while (look < look_end); + + // History buffer. + DEBUG_PRINTF("scan history (%zu looks left)\n", look_end - look); + for (; look < look_end; ++look, reach += REACH_BITVECTOR_LEN) { + in_history: + ; + s64a offset = base_offset + *look; + DEBUG_PRINTF("reach=%p, rel offset=%lld\n", reach, offset); + + if (offset >= 0) { + DEBUG_PRINTF("in buffer\n"); + goto in_buffer; + } + + assert(offset >= -(s64a)ci->hlen && offset < 0); + u8 c = ci->hbuf[ci->hlen + offset]; + if (!reachHasBit(reach, c)) { + DEBUG_PRINTF("char 0x%02x failed reach check\n", c); + return 0; + } + } + // Current buffer. + DEBUG_PRINTF("scan buffer (%zu looks left)\n", look_end - look); + for (; look < look_end; ++look, reach += REACH_BITVECTOR_LEN) { + in_buffer: + ; + s64a offset = base_offset + *look; + DEBUG_PRINTF("reach=%p, rel offset=%lld\n", reach, offset); + + if (offset >= (s64a)ci->len) { + DEBUG_PRINTF("in the future\n"); + break; + } + + assert(offset >= 0 && offset < (s64a)ci->len); + u8 c = ci->buf[offset]; + if (!reachHasBit(reach, c)) { + DEBUG_PRINTF("char 0x%02x failed reach check\n", c); + return 0; + } + } + + DEBUG_PRINTF("OK :)\n"); + return 1; +} + +/** + * \brief Trying to find a matching path by the corresponding path mask of + * every lookaround location. + */ +static rose_inline +int roseMultipathLookaround(const struct RoseEngine *t, + const struct hs_scratch *scratch, + u32 multipathLookaroundLookIndex, + u32 multipathLookaroundReachIndex, + u32 multipathLookaroundCount, + s32 last_start, const u8 *start_mask, + u64a end) { + assert(multipathLookaroundCount > 0); + + const struct core_info *ci = &scratch->core_info; + DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end, + ci->buf_offset, ci->buf_offset + ci->len); + + const s8 *look = getByOffset(t, multipathLookaroundLookIndex); + const s8 *look_end = look + multipathLookaroundCount; + assert(look < look_end); + + const u8 *reach = getByOffset(t, multipathLookaroundReachIndex); + + const s64a base_offset = (s64a)end - ci->buf_offset; + DEBUG_PRINTF("base_offset=%lld\n", base_offset); + + u8 path = 0xff; + + assert(last_start < 0); + + if (unlikely((u64a)(0 - last_start) > end)) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + + s8 base_look_offset = *look; + do { + s64a offset = base_offset + *look; + u32 start_offset = (u32)(*look - base_look_offset); + DEBUG_PRINTF("start_mask[%u] = %x\n", start_offset, + start_mask[start_offset]); + path = start_mask[start_offset]; + if (offset >= -(s64a)ci->hlen) { + break; + } + DEBUG_PRINTF("look=%d before history\n", *look); + look++; + reach += MULTI_REACH_BITVECTOR_LEN; + } while (look < look_end); + + DEBUG_PRINTF("scan history (%zu looks left)\n", look_end - look); + for (; look < look_end; ++look, reach += MULTI_REACH_BITVECTOR_LEN) { + s64a offset = base_offset + *look; + DEBUG_PRINTF("reach=%p, rel offset=%lld\n", reach, offset); + + if (offset >= 0) { + DEBUG_PRINTF("in buffer\n"); + break; + } + + assert(offset >= -(s64a)ci->hlen && offset < 0); + u8 c = ci->hbuf[ci->hlen + offset]; + path &= reach[c]; + DEBUG_PRINTF("reach[%x] = %02x path = %0xx\n", c, reach[c], path); + if (!path) { + DEBUG_PRINTF("char 0x%02x failed reach check\n", c); + return 0; + } + } + + DEBUG_PRINTF("scan buffer (%zu looks left)\n", look_end - look); + for(; look < look_end; ++look, reach += MULTI_REACH_BITVECTOR_LEN) { + s64a offset = base_offset + *look; + DEBUG_PRINTF("reach=%p, rel offset=%lld\n", reach, offset); + + if (offset >= (s64a)ci->len) { + DEBUG_PRINTF("in the future\n"); + break; + } + + assert(offset >= 0 && offset < (s64a)ci->len); + u8 c = ci->buf[offset]; + path &= reach[c]; + DEBUG_PRINTF("reach[%x] = %02x path = %0xx\n", c, reach[c], path); + if (!path) { + DEBUG_PRINTF("char 0x%02x failed reach check\n", c); + return 0; + } + } + + DEBUG_PRINTF("OK :)\n"); + return 1; +} + +static never_inline +int roseCheckMultipathShufti16x8(const struct hs_scratch *scratch, + const struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_16x8 *ri, + u64a end) { + const struct core_info *ci = &scratch->core_info; + s32 checkOffset = ri->base_offset; + const s64a base_offset = (s64a)end - ci->buf_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + + assert(ri->last_start <= 0); + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + if ((u64a)(0 - ri->last_start) > end) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + } + + u32 valid_data_mask; + m128 data_init = getData128(ci, offset, &valid_data_mask); + m128 data_select_mask = loadu128(ri->data_select_mask); + + u32 valid_path_mask = 0; + if (unlikely(!(valid_data_mask & 1))) { + DEBUG_PRINTF("lose part of backward data\n"); + DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); + + m128 expand_valid; + u64a expand_mask = 0x8080808080808080ULL; + u64a valid_lo = expand64(valid_data_mask & 0xff, expand_mask); + u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask); + DEBUG_PRINTF("expand_hi %llx\n", valid_hi); + DEBUG_PRINTF("expand_lo %llx\n", valid_lo); + expand_valid = set64x2(valid_hi, valid_lo); + valid_path_mask = ~movemask128(pshufb_m128(expand_valid, + data_select_mask)); + } + + m128 data = pshufb_m128(data_init, data_select_mask); + m256 nib_mask = loadu256(ri->nib_mask); + m128 bucket_select_mask = loadu128(ri->bucket_select_mask); + + u32 hi_bits_mask = ri->hi_bits_mask; + u32 lo_bits_mask = ri->lo_bits_mask; + u32 neg_mask = ri->neg_mask; + + if (validateMultipathShuftiMask16x8(data, nib_mask, + bucket_select_mask, + hi_bits_mask, lo_bits_mask, + neg_mask, valid_path_mask)) { + DEBUG_PRINTF("check multi-path shufti-16x8 successfully\n"); + return 1; + } else { + return 0; + } +} + +static never_inline +int roseCheckMultipathShufti32x8(const struct hs_scratch *scratch, + const struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_32x8 *ri, + u64a end) { + const struct core_info *ci = &scratch->core_info; + s32 checkOffset = ri->base_offset; + const s64a base_offset = (s64a)end - ci->buf_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + + assert(ri->last_start <= 0); + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + if ((u64a)(0 - ri->last_start) > end) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + } + + u32 valid_data_mask; + m128 data_m128 = getData128(ci, offset, &valid_data_mask); + m256 data_double = set2x128(data_m128); + m256 data_select_mask = loadu256(ri->data_select_mask); + + u32 valid_path_mask = 0; + m256 expand_valid; + if (unlikely(!(valid_data_mask & 1))) { + DEBUG_PRINTF("lose part of backward data\n"); + DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); + + u64a expand_mask = 0x8080808080808080ULL; + u64a valid_lo = expand64(valid_data_mask & 0xff, expand_mask); + u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask); + DEBUG_PRINTF("expand_hi %llx\n", valid_hi); + DEBUG_PRINTF("expand_lo %llx\n", valid_lo); + expand_valid = set64x4(valid_hi, valid_lo, valid_hi, + valid_lo); + valid_path_mask = ~movemask256(pshufb_m256(expand_valid, + data_select_mask)); + } + + m256 data = pshufb_m256(data_double, data_select_mask); + m256 hi_mask = loadu2x128(ri->hi_mask); + m256 lo_mask = loadu2x128(ri->lo_mask); + m256 bucket_select_mask = loadu256(ri->bucket_select_mask); + + u32 hi_bits_mask = ri->hi_bits_mask; + u32 lo_bits_mask = ri->lo_bits_mask; + u32 neg_mask = ri->neg_mask; + + if (validateMultipathShuftiMask32x8(data, hi_mask, lo_mask, + bucket_select_mask, + hi_bits_mask, lo_bits_mask, + neg_mask, valid_path_mask)) { + DEBUG_PRINTF("check multi-path shufti-32x8 successfully\n"); + return 1; + } else { + return 0; + } +} + +static never_inline +int roseCheckMultipathShufti32x16(const struct hs_scratch *scratch, + const struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_32x16 *ri, + u64a end) { + const struct core_info *ci = &scratch->core_info; + const s64a base_offset = (s64a)end - ci->buf_offset; + s32 checkOffset = ri->base_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + + assert(ri->last_start <= 0); + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + if ((u64a)(0 - ri->last_start) > end) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + } + + u32 valid_data_mask; + m128 data_m128 = getData128(ci, offset, &valid_data_mask); + m256 data_double = set2x128(data_m128); + m256 data_select_mask = loadu256(ri->data_select_mask); + + u32 valid_path_mask = 0; + m256 expand_valid; + if (unlikely(!(valid_data_mask & 1))) { + DEBUG_PRINTF("lose part of backward data\n"); + DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); + + u64a expand_mask = 0x8080808080808080ULL; + u64a valid_lo = expand64(valid_data_mask & 0xff, expand_mask); + u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask); + DEBUG_PRINTF("expand_hi %llx\n", valid_hi); + DEBUG_PRINTF("expand_lo %llx\n", valid_lo); + expand_valid = set64x4(valid_hi, valid_lo, valid_hi, + valid_lo); + valid_path_mask = ~movemask256(pshufb_m256(expand_valid, + data_select_mask)); + } + + m256 data = pshufb_m256(data_double, data_select_mask); + + m256 hi_mask_1 = loadu2x128(ri->hi_mask); + m256 hi_mask_2 = loadu2x128(ri->hi_mask + 16); + m256 lo_mask_1 = loadu2x128(ri->lo_mask); + m256 lo_mask_2 = loadu2x128(ri->lo_mask + 16); + + m256 bucket_select_mask_hi = loadu256(ri->bucket_select_mask_hi); + m256 bucket_select_mask_lo = loadu256(ri->bucket_select_mask_lo); + + u32 hi_bits_mask = ri->hi_bits_mask; + u32 lo_bits_mask = ri->lo_bits_mask; + u32 neg_mask = ri->neg_mask; + + if (validateMultipathShuftiMask32x16(data, hi_mask_1, hi_mask_2, + lo_mask_1, lo_mask_2, + bucket_select_mask_hi, + bucket_select_mask_lo, + hi_bits_mask, lo_bits_mask, + neg_mask, valid_path_mask)) { + DEBUG_PRINTF("check multi-path shufti-32x16 successfully\n"); + return 1; + } else { + return 0; + } +} + +static never_inline +int roseCheckMultipathShufti64(const struct hs_scratch *scratch, + const struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_64 *ri, + u64a end) { + const struct core_info *ci = &scratch->core_info; + const s64a base_offset = (s64a)end - ci->buf_offset; + s32 checkOffset = ri->base_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + if ((u64a)(0 - ri->last_start) > end) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + } + + u32 valid_data_mask; + m128 data_m128 = getData128(ci, offset, &valid_data_mask); + m256 data_m256 = set2x128(data_m128); + m256 data_select_mask_1 = loadu256(ri->data_select_mask); + m256 data_select_mask_2 = loadu256(ri->data_select_mask + 32); + + u64a valid_path_mask = 0; + m256 expand_valid; + if (unlikely(!(valid_data_mask & 1))) { + DEBUG_PRINTF("lose part of backward data\n"); + DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); + + u64a expand_mask = 0x8080808080808080ULL; + u64a valid_lo = expand64(valid_data_mask & 0xff, expand_mask); + u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask); + DEBUG_PRINTF("expand_hi %llx\n", valid_hi); + DEBUG_PRINTF("expand_lo %llx\n", valid_lo); + expand_valid = set64x4(valid_hi, valid_lo, valid_hi, + valid_lo); + u32 valid_path_1 = movemask256(pshufb_m256(expand_valid, + data_select_mask_1)); + u32 valid_path_2 = movemask256(pshufb_m256(expand_valid, + data_select_mask_2)); + valid_path_mask = ~((u64a)valid_path_1 | (u64a)valid_path_2 << 32); + } + + m256 data_1 = pshufb_m256(data_m256, data_select_mask_1); + m256 data_2 = pshufb_m256(data_m256, data_select_mask_2); + + m256 hi_mask = loadu2x128(ri->hi_mask); + m256 lo_mask = loadu2x128(ri->lo_mask); + + m256 bucket_select_mask_1 = loadu256(ri->bucket_select_mask); + m256 bucket_select_mask_2 = loadu256(ri->bucket_select_mask + 32); + + u64a hi_bits_mask = ri->hi_bits_mask; + u64a lo_bits_mask = ri->lo_bits_mask; + u64a neg_mask = ri->neg_mask; + + if (validateMultipathShuftiMask64(data_1, data_2, hi_mask, lo_mask, + bucket_select_mask_1, + bucket_select_mask_2, hi_bits_mask, + lo_bits_mask, neg_mask, + valid_path_mask)) { + DEBUG_PRINTF("check multi-path shufti-64 successfully\n"); + return 1; + } else { + return 0; + } +} + +static rose_inline int roseNfaEarliestSom(u64a start, UNUSED u64a end, UNUSED ReportID id, void *context) { assert(context); @@ -41,8 +1499,1267 @@ int roseNfaEarliestSom(u64a start, UNUSED u64a end, UNUSED ReportID id, return MO_CONTINUE_MATCHING; } +static rose_inline +u64a roseGetHaigSom(const struct RoseEngine *t, struct hs_scratch *scratch, + const u32 qi, UNUSED const u32 leftfixLag) { + u32 ri = queueToLeftIndex(t, qi); + + UNUSED const struct LeftNfaInfo *left = getLeftTable(t) + ri; + + DEBUG_PRINTF("testing %s prefix %u/%u with lag %u (maxLag=%u)\n", + left->transient ? "transient" : "active", ri, qi, + leftfixLag, left->maxLag); + + assert(leftfixLag <= left->maxLag); + + struct mq *q = scratch->queues + qi; + + u64a start = ~0ULL; + + /* switch the callback + context for a fun one */ + q->cb = roseNfaEarliestSom; + q->context = &start; + + nfaReportCurrentMatches(q->nfa, q); + + /* restore the old callback + context */ + q->cb = roseNfaAdaptor; + q->context = NULL; + DEBUG_PRINTF("earliest som is %llu\n", start); + return start; +} + +static rose_inline +char roseCheckBounds(u64a end, u64a min_bound, u64a max_bound) { + DEBUG_PRINTF("check offset=%llu against bounds [%llu,%llu]\n", end, + min_bound, max_bound); + assert(min_bound <= max_bound); + return end >= min_bound && end <= max_bound; +} + +static rose_inline +hwlmcb_rv_t roseEnginesEod(const struct RoseEngine *rose, + struct hs_scratch *scratch, u64a offset, + u32 iter_offset) { + const char is_streaming = rose->mode != HS_MODE_BLOCK; + + /* data, len is used for state decompress, should be full available data */ + u8 key = 0; + if (is_streaming) { + const u8 *eod_data = scratch->core_info.hbuf; + size_t eod_len = scratch->core_info.hlen; + key = eod_len ? eod_data[eod_len - 1] : 0; + } + + const u8 *aa = getActiveLeafArray(rose, scratch->core_info.state); + const u32 aaCount = rose->activeArrayCount; + const u32 qCount = rose->queueCount; + struct fatbit *aqa = scratch->aqa; + + const struct mmbit_sparse_iter *it = getByOffset(rose, iter_offset); + assert(ISALIGNED(it)); + + u32 idx = 0; + struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; + + for (u32 qi = mmbit_sparse_iter_begin(aa, aaCount, &idx, it, si_state); + qi != MMB_INVALID; + qi = mmbit_sparse_iter_next(aa, aaCount, qi, &idx, it, si_state)) { + DEBUG_PRINTF("checking nfa %u\n", qi); + struct mq *q = scratch->queues + qi; + if (!fatbit_set(aqa, qCount, qi)) { + initQueue(q, qi, rose, scratch); + } + + assert(q->nfa == getNfaByQueue(rose, qi)); + assert(nfaAcceptsEod(q->nfa)); + + if (is_streaming) { + // Decompress stream state. + nfaExpandState(q->nfa, q->state, q->streamState, offset, key); + } + + if (nfaCheckFinalState(q->nfa, q->state, q->streamState, offset, + roseReportAdaptor, + scratch) == MO_HALT_MATCHING) { + DEBUG_PRINTF("user instructed us to stop\n"); + return HWLM_TERMINATE_MATCHING; + } + } + + return HWLM_CONTINUE_MATCHING; +} + +static rose_inline +hwlmcb_rv_t roseSuffixesEod(const struct RoseEngine *rose, + struct hs_scratch *scratch, u64a offset) { + const u8 *aa = getActiveLeafArray(rose, scratch->core_info.state); + const u32 aaCount = rose->activeArrayCount; + + for (u32 qi = mmbit_iterate(aa, aaCount, MMB_INVALID); qi != MMB_INVALID; + qi = mmbit_iterate(aa, aaCount, qi)) { + DEBUG_PRINTF("checking nfa %u\n", qi); + struct mq *q = scratch->queues + qi; + assert(q->nfa == getNfaByQueue(rose, qi)); + assert(nfaAcceptsEod(q->nfa)); + + /* We have just been triggered. */ + assert(fatbit_isset(scratch->aqa, rose->queueCount, qi)); + + pushQueueNoMerge(q, MQE_END, scratch->core_info.len); + q->context = NULL; + + /* rose exec is used as we don't want to / can't raise matches in the + * history buffer. */ + if (!nfaQueueExecRose(q->nfa, q, MO_INVALID_IDX)) { + DEBUG_PRINTF("nfa is dead\n"); + continue; + } + if (nfaCheckFinalState(q->nfa, q->state, q->streamState, offset, + roseReportAdaptor, + scratch) == MO_HALT_MATCHING) { + DEBUG_PRINTF("user instructed us to stop\n"); + return HWLM_TERMINATE_MATCHING; + } + } + return HWLM_CONTINUE_MATCHING; +} + +static rose_inline +hwlmcb_rv_t roseMatcherEod(const struct RoseEngine *rose, + struct hs_scratch *scratch, u64a offset) { + assert(rose->ematcherOffset); + assert(rose->ematcherRegionSize); + + // Clear role state and active engines, since we have already handled all + // outstanding work there. + DEBUG_PRINTF("clear role state and active leaf array\n"); + char *state = scratch->core_info.state; + mmbit_clear(getRoleState(state), rose->rolesWithStateCount); + mmbit_clear(getActiveLeafArray(rose, state), rose->activeArrayCount); + + const char is_streaming = rose->mode != HS_MODE_BLOCK; + + size_t eod_len; + const u8 *eod_data; + if (!is_streaming) { /* Block */ + eod_data = scratch->core_info.buf; + eod_len = scratch->core_info.len; + } else { /* Streaming */ + eod_len = scratch->core_info.hlen; + eod_data = scratch->core_info.hbuf; + } + + assert(eod_data); + assert(eod_len); + + DEBUG_PRINTF("%zu bytes of eod data to scan at offset %llu\n", eod_len, + offset); + + // If we don't have enough bytes to produce a match from an EOD table scan, + // there's no point scanning. + if (eod_len < rose->eodmatcherMinWidth) { + DEBUG_PRINTF("too short for min width %u\n", rose->eodmatcherMinWidth); + return HWLM_CONTINUE_MATCHING; + } + + // Ensure that we only need scan the last N bytes, where N is the length of + // the eod-anchored matcher region. + size_t adj = eod_len - MIN(eod_len, rose->ematcherRegionSize); + + const struct HWLM *etable = getByOffset(rose, rose->ematcherOffset); + hwlmExec(etable, eod_data, eod_len, adj, roseCallback, scratch, + scratch->tctxt.groups); + + // We may need to fire delayed matches. + if (cleanUpDelayed(rose, scratch, 0, offset) == HWLM_TERMINATE_MATCHING) { + DEBUG_PRINTF("user instructed us to stop\n"); + return HWLM_TERMINATE_MATCHING; + } + + roseFlushLastByteHistory(rose, scratch, offset); + return HWLM_CONTINUE_MATCHING; +} + +static rose_inline +int roseCheckLongLiteral(const struct RoseEngine *t, + const struct hs_scratch *scratch, u64a end, + u32 lit_offset, u32 lit_length, char nocase) { + const struct core_info *ci = &scratch->core_info; + const u8 *lit = getByOffset(t, lit_offset); + + DEBUG_PRINTF("check lit at %llu, length %u\n", end, lit_length); + DEBUG_PRINTF("base buf_offset=%llu\n", ci->buf_offset); + + if (end < lit_length) { + DEBUG_PRINTF("too short!\n"); + return 0; + } + + // If any portion of the literal matched in the current buffer, check it. + if (end > ci->buf_offset) { + u32 scan_len = MIN(end - ci->buf_offset, lit_length); + u64a scan_start = end - ci->buf_offset - scan_len; + DEBUG_PRINTF("checking suffix (%u bytes) in buf[%llu:%llu]\n", scan_len, + scan_start, end); + if (cmpForward(ci->buf + scan_start, lit + lit_length - scan_len, + scan_len, nocase)) { + DEBUG_PRINTF("cmp of suffix failed\n"); + return 0; + } + } + + // If the entirety of the literal was in the current block, we are done. + if (end - lit_length >= ci->buf_offset) { + DEBUG_PRINTF("literal confirmed in current block\n"); + return 1; + } + + // We still have a prefix which we must test against the buffer prepared by + // the long literal table. This is only done in streaming mode. + + assert(t->mode != HS_MODE_BLOCK); + + const u8 *ll_buf; + size_t ll_len; + if (nocase) { + ll_buf = scratch->tctxt.ll_buf_nocase; + ll_len = scratch->tctxt.ll_len_nocase; + } else { + ll_buf = scratch->tctxt.ll_buf; + ll_len = scratch->tctxt.ll_len; + } + + assert(ll_buf); + + u64a lit_start_offset = end - lit_length; + u32 prefix_len = MIN(lit_length, ci->buf_offset - lit_start_offset); + u32 hist_rewind = ci->buf_offset - lit_start_offset; + DEBUG_PRINTF("ll_len=%zu, hist_rewind=%u\n", ll_len, hist_rewind); + if (hist_rewind > ll_len) { + DEBUG_PRINTF("not enough history\n"); + return 0; + } + + DEBUG_PRINTF("check prefix len=%u from hist (len %zu, rewind %u)\n", + prefix_len, ll_len, hist_rewind); + assert(hist_rewind <= ll_len); + if (cmpForward(ll_buf + ll_len - hist_rewind, lit, prefix_len, nocase)) { + DEBUG_PRINTF("cmp of prefix failed\n"); + return 0; + } + + DEBUG_PRINTF("cmp succeeded\n"); + return 1; +} + +static rose_inline +int roseCheckMediumLiteral(const struct RoseEngine *t, + const struct hs_scratch *scratch, u64a end, + u32 lit_offset, u32 lit_length, char nocase) { + const struct core_info *ci = &scratch->core_info; + const u8 *lit = getByOffset(t, lit_offset); + + DEBUG_PRINTF("check lit at %llu, length %u\n", end, lit_length); + DEBUG_PRINTF("base buf_offset=%llu\n", ci->buf_offset); + + if (end < lit_length) { + DEBUG_PRINTF("too short!\n"); + return 0; + } + + // If any portion of the literal matched in the current buffer, check it. + if (end > ci->buf_offset) { + u32 scan_len = MIN(end - ci->buf_offset, lit_length); + u64a scan_start = end - ci->buf_offset - scan_len; + DEBUG_PRINTF("checking suffix (%u bytes) in buf[%llu:%llu]\n", scan_len, + scan_start, end); + if (cmpForward(ci->buf + scan_start, lit + lit_length - scan_len, + scan_len, nocase)) { + DEBUG_PRINTF("cmp of suffix failed\n"); + return 0; + } + } + + // If the entirety of the literal was in the current block, we are done. + if (end - lit_length >= ci->buf_offset) { + DEBUG_PRINTF("literal confirmed in current block\n"); + return 1; + } + + // We still have a prefix which we must test against the history buffer. + assert(t->mode != HS_MODE_BLOCK); + + u64a lit_start_offset = end - lit_length; + u32 prefix_len = MIN(lit_length, ci->buf_offset - lit_start_offset); + u32 hist_rewind = ci->buf_offset - lit_start_offset; + DEBUG_PRINTF("hlen=%zu, hist_rewind=%u\n", ci->hlen, hist_rewind); + + // History length check required for confirm in the EOD and delayed + // rebuild paths. + if (hist_rewind > ci->hlen) { + DEBUG_PRINTF("not enough history\n"); + return 0; + } + + DEBUG_PRINTF("check prefix len=%u from hist (len %zu, rewind %u)\n", + prefix_len, ci->hlen, hist_rewind); + assert(hist_rewind <= ci->hlen); + if (cmpForward(ci->hbuf + ci->hlen - hist_rewind, lit, prefix_len, + nocase)) { + DEBUG_PRINTF("cmp of prefix failed\n"); + return 0; + } + + DEBUG_PRINTF("cmp succeeded\n"); + return 1; +} + +static +void updateSeqPoint(struct RoseContext *tctxt, u64a offset, + const char from_mpv) { + if (from_mpv) { + updateMinMatchOffsetFromMpv(tctxt, offset); + } else { + updateMinMatchOffset(tctxt, offset); + } +} + +static rose_inline +hwlmcb_rv_t flushActiveCombinations(const struct RoseEngine *t, + struct hs_scratch *scratch) { + u8 *cvec = (u8 *)scratch->core_info.combVector; + if (!mmbit_any(cvec, t->ckeyCount)) { + return HWLM_CONTINUE_MATCHING; + } + u64a end = scratch->tctxt.lastCombMatchOffset; + for (u32 i = mmbit_iterate(cvec, t->ckeyCount, MMB_INVALID); + i != MMB_INVALID; i = mmbit_iterate(cvec, t->ckeyCount, i)) { + const struct CombInfo *combInfoMap = (const struct CombInfo *) + ((const char *)t + t->combInfoMapOffset); + const struct CombInfo *ci = combInfoMap + i; + if ((ci->min_offset != 0) && (end < ci->min_offset)) { + DEBUG_PRINTF("halt: before min_offset=%llu\n", ci->min_offset); + continue; + } + if ((ci->max_offset != MAX_OFFSET) && (end > ci->max_offset)) { + DEBUG_PRINTF("halt: after max_offset=%llu\n", ci->max_offset); + continue; + } + + DEBUG_PRINTF("check ekey %u\n", ci->ekey); + if (ci->ekey != INVALID_EKEY) { + assert(ci->ekey < t->ekeyCount); + const char *evec = scratch->core_info.exhaustionVector; + if (isExhausted(t, evec, ci->ekey)) { + DEBUG_PRINTF("ekey %u already set, match is exhausted\n", + ci->ekey); + continue; + } + } + + DEBUG_PRINTF("check ckey %u\n", i); + char *lvec = scratch->core_info.logicalVector; + if (!isLogicalCombination(t, lvec, ci->start, ci->result)) { + DEBUG_PRINTF("Logical Combination Failed!\n"); + continue; + } + + DEBUG_PRINTF("Logical Combination Passed!\n"); + if (roseReport(t, scratch, end, ci->id, 0, + ci->ekey) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + clearCvec(t, (char *)cvec); + return HWLM_CONTINUE_MATCHING; +} + +#define PROGRAM_CASE(name) \ + case ROSE_INSTR_##name: { \ + LABEL_ROSE_INSTR_##name: \ + DEBUG_PRINTF("instruction: " #name " (pc=%u)\n", \ + programOffset + (u32)(pc - pc_base)); \ + const struct ROSE_STRUCT_##name *ri = \ + (const struct ROSE_STRUCT_##name *)pc; + +#define PROGRAM_NEXT_INSTRUCTION \ + pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \ + goto *(next_instr[*(const u8 *)pc]); \ + } + +#define PROGRAM_NEXT_INSTRUCTION_JUMP \ + goto *(next_instr[*(const u8 *)pc]); + hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, struct hs_scratch *scratch, u32 programOffset, u64a som, u64a end, u8 prog_flags) { - return roseRunProgram_i(t, scratch, programOffset, som, end, prog_flags); + DEBUG_PRINTF("program=%u, offsets [%llu,%llu], flags=%u\n", programOffset, + som, end, prog_flags); + + assert(programOffset != ROSE_INVALID_PROG_OFFSET); + assert(programOffset >= sizeof(struct RoseEngine)); + assert(programOffset < t->size); + + const char in_anchored = prog_flags & ROSE_PROG_FLAG_IN_ANCHORED; + const char in_catchup = prog_flags & ROSE_PROG_FLAG_IN_CATCHUP; + const char from_mpv = prog_flags & ROSE_PROG_FLAG_FROM_MPV; + const char skip_mpv_catchup = prog_flags & ROSE_PROG_FLAG_SKIP_MPV_CATCHUP; + + const char *pc_base = getByOffset(t, programOffset); + const char *pc = pc_base; + + // Local sparse iterator state for programs that use the SPARSE_ITER_BEGIN + // and SPARSE_ITER_NEXT instructions. + struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; + + // If this program has an effect, work_done will be set to one (which may + // allow the program to squash groups). + int work_done = 0; + + struct RoseContext *tctxt = &scratch->tctxt; + + assert(*(const u8 *)pc != ROSE_INSTR_END); + + static const void *next_instr[] = { + &&LABEL_ROSE_INSTR_END, //!< End of program. + &&LABEL_ROSE_INSTR_ANCHORED_DELAY, //!< Delay until after anchored matcher. + &&LABEL_ROSE_INSTR_CHECK_LIT_EARLY, //!< Skip matches before floating min offset. + &&LABEL_ROSE_INSTR_CHECK_GROUPS, //!< Check that literal groups are on. + &&LABEL_ROSE_INSTR_CHECK_ONLY_EOD, //!< Role matches only at EOD. + &&LABEL_ROSE_INSTR_CHECK_BOUNDS, //!< Bounds on distance from offset 0. + &&LABEL_ROSE_INSTR_CHECK_NOT_HANDLED, //!< Test & set role in "handled". + &&LABEL_ROSE_INSTR_CHECK_SINGLE_LOOKAROUND, //!< Single lookaround check. + &&LABEL_ROSE_INSTR_CHECK_LOOKAROUND, //!< Lookaround check. + &&LABEL_ROSE_INSTR_CHECK_MASK, //!< 8-bytes mask check. + &&LABEL_ROSE_INSTR_CHECK_MASK_32, //!< 32-bytes and/cmp/neg mask check. + &&LABEL_ROSE_INSTR_CHECK_BYTE, //!< Single Byte check. + &&LABEL_ROSE_INSTR_CHECK_SHUFTI_16x8, //!< Check 16-byte data by 8-bucket shufti. + &&LABEL_ROSE_INSTR_CHECK_SHUFTI_32x8, //!< Check 32-byte data by 8-bucket shufti. + &&LABEL_ROSE_INSTR_CHECK_SHUFTI_16x16, //!< Check 16-byte data by 16-bucket shufti. + &&LABEL_ROSE_INSTR_CHECK_SHUFTI_32x16, //!< Check 32-byte data by 16-bucket shufti. + &&LABEL_ROSE_INSTR_CHECK_INFIX, //!< Infix engine must be in accept state. + &&LABEL_ROSE_INSTR_CHECK_PREFIX, //!< Prefix engine must be in accept state. + &&LABEL_ROSE_INSTR_PUSH_DELAYED, //!< Push delayed literal matches. + &&LABEL_ROSE_INSTR_DUMMY_NOP, //!< NOP. Should not exist in build programs. + &&LABEL_ROSE_INSTR_CATCH_UP, //!< Catch up engines, anchored matches. + &&LABEL_ROSE_INSTR_CATCH_UP_MPV, //!< Catch up the MPV. + &&LABEL_ROSE_INSTR_SOM_ADJUST, //!< Set SOM from a distance to EOM. + &&LABEL_ROSE_INSTR_SOM_LEFTFIX, //!< Acquire SOM from a leftfix engine. + &&LABEL_ROSE_INSTR_SOM_FROM_REPORT, //!< Acquire SOM from a som_operation. + &&LABEL_ROSE_INSTR_SOM_ZERO, //!< Set SOM to zero. + &&LABEL_ROSE_INSTR_TRIGGER_INFIX, //!< Trigger an infix engine. + &&LABEL_ROSE_INSTR_TRIGGER_SUFFIX, //!< Trigger a suffix engine. + &&LABEL_ROSE_INSTR_DEDUPE, //!< Run deduplication for report. + &&LABEL_ROSE_INSTR_DEDUPE_SOM, //!< Run deduplication for SOM report. + &&LABEL_ROSE_INSTR_REPORT_CHAIN, //!< Fire a chained report (MPV). + &&LABEL_ROSE_INSTR_REPORT_SOM_INT, //!< Manipulate SOM only. + &&LABEL_ROSE_INSTR_REPORT_SOM_AWARE, //!< Manipulate SOM from SOM-aware source. + &&LABEL_ROSE_INSTR_REPORT, + &&LABEL_ROSE_INSTR_REPORT_EXHAUST, + &&LABEL_ROSE_INSTR_REPORT_SOM, + &&LABEL_ROSE_INSTR_REPORT_SOM_EXHAUST, + &&LABEL_ROSE_INSTR_DEDUPE_AND_REPORT, + &&LABEL_ROSE_INSTR_FINAL_REPORT, + &&LABEL_ROSE_INSTR_CHECK_EXHAUSTED, //!< Check if an ekey has already been set. + &&LABEL_ROSE_INSTR_CHECK_MIN_LENGTH, //!< Check (EOM - SOM) against min length. + &&LABEL_ROSE_INSTR_SET_STATE, //!< Switch a state index on. + &&LABEL_ROSE_INSTR_SET_GROUPS, //!< Set some literal group bits. + &&LABEL_ROSE_INSTR_SQUASH_GROUPS, //!< Conditionally turn off some groups. + &&LABEL_ROSE_INSTR_CHECK_STATE, //!< Test a single bit in the state multibit. + &&LABEL_ROSE_INSTR_SPARSE_ITER_BEGIN, //!< Begin running a sparse iter over states. + &&LABEL_ROSE_INSTR_SPARSE_ITER_NEXT, //!< Continue running sparse iter over states. + &&LABEL_ROSE_INSTR_SPARSE_ITER_ANY, //!< Test for any bit in the sparse iterator. + &&LABEL_ROSE_INSTR_ENGINES_EOD, + &&LABEL_ROSE_INSTR_SUFFIXES_EOD, + &&LABEL_ROSE_INSTR_MATCHER_EOD, + &&LABEL_ROSE_INSTR_CHECK_LONG_LIT, + &&LABEL_ROSE_INSTR_CHECK_LONG_LIT_NOCASE, + &&LABEL_ROSE_INSTR_CHECK_MED_LIT, + &&LABEL_ROSE_INSTR_CHECK_MED_LIT_NOCASE, + &&LABEL_ROSE_INSTR_CLEAR_WORK_DONE, + &&LABEL_ROSE_INSTR_MULTIPATH_LOOKAROUND, + &&LABEL_ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_16x8, + &&LABEL_ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_32x8, + &&LABEL_ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_32x16, + &&LABEL_ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_64, + &&LABEL_ROSE_INSTR_INCLUDED_JUMP, + &&LABEL_ROSE_INSTR_SET_LOGICAL, + &&LABEL_ROSE_INSTR_SET_COMBINATION, + &&LABEL_ROSE_INSTR_FLUSH_COMBINATION, + &&LABEL_ROSE_INSTR_SET_EXHAUST + }; + + for (;;) { + assert(ISALIGNED_N(pc, ROSE_INSTR_MIN_ALIGN)); + assert(pc >= pc_base); + assert((size_t)(pc - pc_base) < t->size); + const u8 code = *(const u8 *)pc; + assert(code <= LAST_ROSE_INSTRUCTION); + + switch ((enum RoseInstructionCode)code) { + PROGRAM_CASE(END) { + DEBUG_PRINTF("finished\n"); + return HWLM_CONTINUE_MATCHING; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(ANCHORED_DELAY) { + if (in_anchored && end > t->floatingMinLiteralMatchOffset) { + DEBUG_PRINTF("delay until playback\n"); + tctxt->groups |= ri->groups; + work_done = 1; + recordAnchoredLiteralMatch(t, scratch, ri->anch_id, end); + + assert(ri->done_jump); // must progress + pc += ri->done_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_LIT_EARLY) { + if (end < ri->min_offset) { + DEBUG_PRINTF("halt: before min_offset=%u\n", + ri->min_offset); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_GROUPS) { + DEBUG_PRINTF("groups=0x%llx, checking instr groups=0x%llx\n", + tctxt->groups, ri->groups); + if (!(ri->groups & tctxt->groups)) { + DEBUG_PRINTF("halt: no groups are set\n"); + return HWLM_CONTINUE_MATCHING; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_ONLY_EOD) { + struct core_info *ci = &scratch->core_info; + if (end != ci->buf_offset + ci->len) { + DEBUG_PRINTF("should only match at end of data\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_BOUNDS) { + if (!roseCheckBounds(end, ri->min_bound, ri->max_bound)) { + DEBUG_PRINTF("failed bounds check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_NOT_HANDLED) { + struct fatbit *handled = scratch->handled_roles; + if (fatbit_set(handled, t->handledKeyCount, ri->key)) { + DEBUG_PRINTF("key %u already set\n", ri->key); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_SINGLE_LOOKAROUND) { + if (!roseCheckSingleLookaround(t, scratch, ri->offset, + ri->reach_index, end)) { + DEBUG_PRINTF("failed lookaround check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_LOOKAROUND) { + if (!roseCheckLookaround(t, scratch, ri->look_index, + ri->reach_index, ri->count, end)) { + DEBUG_PRINTF("failed lookaround check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MASK) { + struct core_info *ci = &scratch->core_info; + if (!roseCheckMask(ci, ri->and_mask, ri->cmp_mask, + ri->neg_mask, ri->offset, end)) { + DEBUG_PRINTF("failed mask check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MASK_32) { + struct core_info *ci = &scratch->core_info; + if (!roseCheckMask32(ci, ri->and_mask, ri->cmp_mask, + ri->neg_mask, ri->offset, end)) { + assert(ri->fail_jump); + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_BYTE) { + const struct core_info *ci = &scratch->core_info; + if (!roseCheckByte(ci, ri->and_mask, ri->cmp_mask, + ri->negation, ri->offset, end)) { + DEBUG_PRINTF("failed byte check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_SHUFTI_16x8) { + const struct core_info *ci = &scratch->core_info; + if (!roseCheckShufti16x8(ci, ri->nib_mask, + ri->bucket_select_mask, + ri->neg_mask, ri->offset, end)) { + assert(ri->fail_jump); + pc += ri-> fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_SHUFTI_32x8) { + const struct core_info *ci = &scratch->core_info; + if (!roseCheckShufti32x8(ci, ri->hi_mask, ri->lo_mask, + ri->bucket_select_mask, + ri->neg_mask, ri->offset, end)) { + assert(ri->fail_jump); + pc += ri-> fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_SHUFTI_16x16) { + const struct core_info *ci = &scratch->core_info; + if (!roseCheckShufti16x16(ci, ri->hi_mask, ri->lo_mask, + ri->bucket_select_mask, + ri->neg_mask, ri->offset, end)) { + assert(ri->fail_jump); + pc += ri-> fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_SHUFTI_32x16) { + const struct core_info *ci = &scratch->core_info; + if (!roseCheckShufti32x16(ci, ri->hi_mask, ri->lo_mask, + ri->bucket_select_mask_hi, + ri->bucket_select_mask_lo, + ri->neg_mask, ri->offset, end)) { + assert(ri->fail_jump); + pc += ri-> fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_INFIX) { + if (!roseTestInfix(t, scratch, ri->queue, ri->lag, ri->report, + end)) { + DEBUG_PRINTF("failed infix check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_PREFIX) { + if (!roseTestPrefix(t, scratch, ri->queue, ri->lag, ri->report, + end)) { + DEBUG_PRINTF("failed prefix check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(PUSH_DELAYED) { + rosePushDelayedMatch(t, scratch, ri->delay, ri->index, end); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(DUMMY_NOP) { + assert(0); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CATCH_UP) { + if (roseCatchUpTo(t, scratch, end) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CATCH_UP_MPV) { + if (from_mpv || skip_mpv_catchup) { + DEBUG_PRINTF("skipping mpv catchup\n"); + } else if (roseCatchUpMPV(t, + end - scratch->core_info.buf_offset, + scratch) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SOM_ADJUST) { + assert(ri->distance <= end); + som = end - ri->distance; + DEBUG_PRINTF("som is (end - %u) = %llu\n", ri->distance, som); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SOM_LEFTFIX) { + som = roseGetHaigSom(t, scratch, ri->queue, ri->lag); + DEBUG_PRINTF("som from leftfix is %llu\n", som); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SOM_FROM_REPORT) { + som = handleSomExternal(scratch, &ri->som, end); + DEBUG_PRINTF("som from report %u is %llu\n", ri->som.onmatch, + som); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SOM_ZERO) { + DEBUG_PRINTF("setting SOM to zero\n"); + som = 0; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(TRIGGER_INFIX) { + roseTriggerInfix(t, scratch, som, end, ri->queue, ri->event, + ri->cancel); + work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(TRIGGER_SUFFIX) { + if (roseTriggerSuffix(t, scratch, ri->queue, ri->event, som, + end) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(DEDUPE) { + updateSeqPoint(tctxt, end, from_mpv); + const char do_som = t->hasSom; // TODO: constant propagate + const char is_external_report = 1; + enum DedupeResult rv = + dedupeCatchup(t, scratch, end, som, end + ri->offset_adjust, + ri->dkey, ri->offset_adjust, + is_external_report, ri->quash_som, do_som); + switch (rv) { + case DEDUPE_HALT: + return HWLM_TERMINATE_MATCHING; + case DEDUPE_SKIP: + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + case DEDUPE_CONTINUE: + break; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(DEDUPE_SOM) { + updateSeqPoint(tctxt, end, from_mpv); + const char is_external_report = 0; + const char do_som = 1; + enum DedupeResult rv = + dedupeCatchup(t, scratch, end, som, end + ri->offset_adjust, + ri->dkey, ri->offset_adjust, + is_external_report, ri->quash_som, do_som); + switch (rv) { + case DEDUPE_HALT: + return HWLM_TERMINATE_MATCHING; + case DEDUPE_SKIP: + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + case DEDUPE_CONTINUE: + break; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_CHAIN) { + // Note: sequence points updated inside this function. + if (roseCatchUpAndHandleChainMatch( + t, scratch, ri->event, ri->top_squash_distance, end, + in_catchup) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_SOM_INT) { + updateSeqPoint(tctxt, end, from_mpv); + roseHandleSom(scratch, &ri->som, end); + work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_SOM_AWARE) { + updateSeqPoint(tctxt, end, from_mpv); + roseHandleSomSom(scratch, &ri->som, som, end); + work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT) { + updateSeqPoint(tctxt, end, from_mpv); + if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust, + INVALID_EKEY) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_EXHAUST) { + updateSeqPoint(tctxt, end, from_mpv); + if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust, + ri->ekey) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_SOM) { + updateSeqPoint(tctxt, end, from_mpv); + if (roseReportSom(t, scratch, som, end, ri->onmatch, + ri->offset_adjust, + INVALID_EKEY) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_SOM_EXHAUST) { + updateSeqPoint(tctxt, end, from_mpv); + if (roseReportSom(t, scratch, som, end, ri->onmatch, + ri->offset_adjust, + ri->ekey) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(DEDUPE_AND_REPORT) { + updateSeqPoint(tctxt, end, from_mpv); + const char do_som = t->hasSom; // TODO: constant propagate + const char is_external_report = 1; + enum DedupeResult rv = + dedupeCatchup(t, scratch, end, som, end + ri->offset_adjust, + ri->dkey, ri->offset_adjust, + is_external_report, ri->quash_som, do_som); + switch (rv) { + case DEDUPE_HALT: + return HWLM_TERMINATE_MATCHING; + case DEDUPE_SKIP: + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + case DEDUPE_CONTINUE: + break; + } + + const u32 ekey = INVALID_EKEY; + if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust, + ekey) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(FINAL_REPORT) { + updateSeqPoint(tctxt, end, from_mpv); + if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust, + INVALID_EKEY) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + /* One-shot specialisation: this instruction always terminates + * execution of the program. */ + return HWLM_CONTINUE_MATCHING; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_EXHAUSTED) { + DEBUG_PRINTF("check ekey %u\n", ri->ekey); + assert(ri->ekey != INVALID_EKEY); + assert(ri->ekey < t->ekeyCount); + const char *evec = scratch->core_info.exhaustionVector; + if (isExhausted(t, evec, ri->ekey)) { + DEBUG_PRINTF("ekey %u already set, match is exhausted\n", + ri->ekey); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MIN_LENGTH) { + DEBUG_PRINTF("check min length %llu (adj %d)\n", ri->min_length, + ri->end_adj); + assert(ri->min_length > 0); + assert(ri->end_adj == 0 || ri->end_adj == -1); + assert(som == HS_OFFSET_PAST_HORIZON || som <= end); + if (som != HS_OFFSET_PAST_HORIZON && + ((end + ri->end_adj) - som < ri->min_length)) { + DEBUG_PRINTF("failed check, match len %llu\n", + (u64a)((end + ri->end_adj) - som)); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SET_STATE) { + DEBUG_PRINTF("set state index %u\n", ri->index); + mmbit_set(getRoleState(scratch->core_info.state), + t->rolesWithStateCount, ri->index); + work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SET_GROUPS) { + tctxt->groups |= ri->groups; + DEBUG_PRINTF("set groups 0x%llx -> 0x%llx\n", ri->groups, + tctxt->groups); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SQUASH_GROUPS) { + assert(popcount64(ri->groups) == 63); // Squash only one group. + if (work_done) { + tctxt->groups &= ri->groups; + DEBUG_PRINTF("squash groups 0x%llx -> 0x%llx\n", ri->groups, + tctxt->groups); + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_STATE) { + DEBUG_PRINTF("check state %u\n", ri->index); + const u8 *roles = getRoleState(scratch->core_info.state); + if (!mmbit_isset(roles, t->rolesWithStateCount, ri->index)) { + DEBUG_PRINTF("state not on\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SPARSE_ITER_BEGIN) { + DEBUG_PRINTF("iter_offset=%u\n", ri->iter_offset); + const struct mmbit_sparse_iter *it = + getByOffset(t, ri->iter_offset); + assert(ISALIGNED(it)); + + const u8 *roles = getRoleState(scratch->core_info.state); + + u32 idx = 0; + u32 i = mmbit_sparse_iter_begin(roles, t->rolesWithStateCount, + &idx, it, si_state); + if (i == MMB_INVALID) { + DEBUG_PRINTF("no states in sparse iter are on\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + + fatbit_clear(scratch->handled_roles); + + const u32 *jumps = getByOffset(t, ri->jump_table); + DEBUG_PRINTF("state %u (idx=%u) is on, jump to %u\n", i, idx, + jumps[idx]); + pc = pc_base + jumps[idx]; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SPARSE_ITER_NEXT) { + DEBUG_PRINTF("iter_offset=%u, state=%u\n", ri->iter_offset, + ri->state); + const struct mmbit_sparse_iter *it = + getByOffset(t, ri->iter_offset); + assert(ISALIGNED(it)); + + const u8 *roles = getRoleState(scratch->core_info.state); + + u32 idx = 0; + u32 i = mmbit_sparse_iter_next(roles, t->rolesWithStateCount, + ri->state, &idx, it, si_state); + if (i == MMB_INVALID) { + DEBUG_PRINTF("no more states in sparse iter are on\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + + const u32 *jumps = getByOffset(t, ri->jump_table); + DEBUG_PRINTF("state %u (idx=%u) is on, jump to %u\n", i, idx, + jumps[idx]); + pc = pc_base + jumps[idx]; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SPARSE_ITER_ANY) { + DEBUG_PRINTF("iter_offset=%u\n", ri->iter_offset); + const struct mmbit_sparse_iter *it = + getByOffset(t, ri->iter_offset); + assert(ISALIGNED(it)); + + const u8 *roles = getRoleState(scratch->core_info.state); + + u32 idx = 0; + u32 i = mmbit_sparse_iter_begin(roles, t->rolesWithStateCount, + &idx, it, si_state); + if (i == MMB_INVALID) { + DEBUG_PRINTF("no states in sparse iter are on\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + DEBUG_PRINTF("state %u (idx=%u) is on\n", i, idx); + fatbit_clear(scratch->handled_roles); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(ENGINES_EOD) { + if (roseEnginesEod(t, scratch, end, ri->iter_offset) == + HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SUFFIXES_EOD) { + if (roseSuffixesEod(t, scratch, end) == + HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(MATCHER_EOD) { + if (roseMatcherEod(t, scratch, end) == + HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_LONG_LIT) { + const char nocase = 0; + if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset, + ri->lit_length, nocase)) { + DEBUG_PRINTF("failed long lit check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_LONG_LIT_NOCASE) { + const char nocase = 1; + if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset, + ri->lit_length, nocase)) { + DEBUG_PRINTF("failed nocase long lit check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MED_LIT) { + const char nocase = 0; + if (!roseCheckMediumLiteral(t, scratch, end, ri->lit_offset, + ri->lit_length, nocase)) { + DEBUG_PRINTF("failed lit check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MED_LIT_NOCASE) { + const char nocase = 1; + if (!roseCheckMediumLiteral(t, scratch, end, ri->lit_offset, + ri->lit_length, nocase)) { + DEBUG_PRINTF("failed long lit check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CLEAR_WORK_DONE) { + DEBUG_PRINTF("clear work_done flag\n"); + work_done = 0; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(MULTIPATH_LOOKAROUND) { + if (!roseMultipathLookaround(t, scratch, ri->look_index, + ri->reach_index, ri->count, + ri->last_start, ri->start_mask, + end)) { + DEBUG_PRINTF("failed multi-path lookaround check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_16x8) { + if (!roseCheckMultipathShufti16x8(scratch, ri, end)) { + DEBUG_PRINTF("failed multi-path shufti 16x8 check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_32x8) { + if (!roseCheckMultipathShufti32x8(scratch, ri, end)) { + DEBUG_PRINTF("failed multi-path shufti 32x8 check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_32x16) { + if (!roseCheckMultipathShufti32x16(scratch, ri, end)) { + DEBUG_PRINTF("failed multi-path shufti 32x16 check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_64) { + if (!roseCheckMultipathShufti64(scratch, ri, end)) { + DEBUG_PRINTF("failed multi-path shufti 64 check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(INCLUDED_JUMP) { + if (scratch->fdr_conf) { + // squash the bucket of included literal + u8 shift = scratch->fdr_conf_offset & ~7U; + u64a mask = ((~(u64a)ri->squash) << shift); + *(scratch->fdr_conf) &= mask; + + pc = getByOffset(t, ri->child_offset); + pc_base = pc; + programOffset = (const u8 *)pc_base -(const u8 *)t; + DEBUG_PRINTF("pc_base %p pc %p child_offset %u squash %u\n", + pc_base, pc, ri->child_offset, ri->squash); + work_done = 0; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SET_LOGICAL) { + DEBUG_PRINTF("set logical value of lkey %u, offset_adjust=%d\n", + ri->lkey, ri->offset_adjust); + assert(ri->lkey != INVALID_LKEY); + assert(ri->lkey < t->lkeyCount); + char *lvec = scratch->core_info.logicalVector; + setLogicalVal(t, lvec, ri->lkey, 1); + updateLastCombMatchOffset(tctxt, end + ri->offset_adjust); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SET_COMBINATION) { + DEBUG_PRINTF("set ckey %u as active\n", ri->ckey); + assert(ri->ckey != INVALID_CKEY); + assert(ri->ckey < t->ckeyCount); + char *cvec = scratch->core_info.combVector; + setCombinationActive(t, cvec, ri->ckey); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(FLUSH_COMBINATION) { + assert(end >= tctxt->lastCombMatchOffset); + if (end > tctxt->lastCombMatchOffset) { + if (flushActiveCombinations(t, scratch) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SET_EXHAUST) { + updateSeqPoint(tctxt, end, from_mpv); + if (roseSetExhaust(t, scratch, ri->ekey) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + } + } + + assert(0); // unreachable + return HWLM_CONTINUE_MATCHING; } + +#undef PROGRAM_CASE +#undef PROGRAM_NEXT_INSTRUCTION +#undef PROGRAM_NEXT_INSTRUCTION_JUMP diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index 3c11300b..5b16118e 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -34,27 +34,10 @@ #ifndef PROGRAM_RUNTIME_H #define PROGRAM_RUNTIME_H -#include "catchup.h" -#include "counting_miracle.h" -#include "infix.h" -#include "match.h" -#include "miracle.h" -#include "report.h" +#include "hwlm/hwlm.h" // for hwlmcb_rv_t #include "rose.h" -#include "rose_common.h" -#include "rose_internal.h" -#include "rose_program.h" -#include "rose_types.h" -#include "validate_mask.h" -#include "validate_shufti.h" -#include "runtime.h" #include "scratch.h" #include "ue2common.h" -#include "hwlm/hwlm.h" // for hwlmcb_rv_t -#include "util/compare.h" -#include "util/copybytes.h" -#include "util/fatbit.h" -#include "util/multibit.h" /* * Program context flags, which control the behaviour of some instructions at @@ -71,2637 +54,4 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, struct hs_scratch *scratch, u32 programOffset, u64a som, u64a end, u8 prog_flags); -/* Inline implementation follows. */ - -static rose_inline -void rosePushDelayedMatch(const struct RoseEngine *t, - struct hs_scratch *scratch, u32 delay, - u32 delay_index, u64a offset) { - assert(delay); - - const u32 src_slot_index = delay; - u32 slot_index = (src_slot_index + offset) & DELAY_MASK; - - struct RoseContext *tctxt = &scratch->tctxt; - if (offset + src_slot_index <= tctxt->delayLastEndOffset) { - DEBUG_PRINTF("skip too late\n"); - return; - } - - const u32 delay_count = t->delay_count; - struct fatbit **delaySlots = getDelaySlots(scratch); - struct fatbit *slot = delaySlots[slot_index]; - - DEBUG_PRINTF("pushing tab %u into slot %u\n", delay_index, slot_index); - if (!(tctxt->filledDelayedSlots & (1U << slot_index))) { - tctxt->filledDelayedSlots |= 1U << slot_index; - fatbit_clear(slot); - } - - fatbit_set(slot, delay_count, delay_index); -} - -static rose_inline -void recordAnchoredLiteralMatch(const struct RoseEngine *t, - struct hs_scratch *scratch, u32 anch_id, - u64a end) { - assert(end); - - if (end <= t->floatingMinLiteralMatchOffset) { - return; - } - - struct fatbit **anchoredLiteralRows = getAnchoredLiteralLog(scratch); - - DEBUG_PRINTF("record %u (of %u) @ %llu\n", anch_id, t->anchored_count, end); - - if (!bf64_set(&scratch->al_log_sum, end - 1)) { - // first time, clear row - DEBUG_PRINTF("clearing %llu/%u\n", end - 1, t->anchored_count); - fatbit_clear(anchoredLiteralRows[end - 1]); - } - - assert(anch_id < t->anchored_count); - fatbit_set(anchoredLiteralRows[end - 1], t->anchored_count, anch_id); -} - -static rose_inline -char roseLeftfixCheckMiracles(const struct RoseEngine *t, - const struct LeftNfaInfo *left, - struct core_info *ci, struct mq *q, u64a end, - const char is_infix) { - if (!is_infix && left->transient) { - // Miracles won't help us with transient leftfix engines; they only - // scan for a limited time anyway. - return 1; - } - - if (!left->stopTable) { - return 1; - } - - DEBUG_PRINTF("looking for miracle on queue %u\n", q->nfa->queueIndex); - - const s64a begin_loc = q_cur_loc(q); - const s64a end_loc = end - ci->buf_offset; - - s64a miracle_loc; - if (roseMiracleOccurs(t, left, ci, begin_loc, end_loc, &miracle_loc)) { - goto found_miracle; - } - - if (roseCountingMiracleOccurs(t, left, ci, begin_loc, end_loc, - &miracle_loc)) { - goto found_miracle; - } - - return 1; - -found_miracle: - DEBUG_PRINTF("miracle at %lld\n", miracle_loc); - assert(miracle_loc >= begin_loc); - - // If we're a prefix, then a miracle effectively results in us needing to - // re-init our state and start fresh. - if (!is_infix) { - if (miracle_loc != begin_loc) { - DEBUG_PRINTF("re-init prefix state\n"); - q->cur = q->end = 0; - pushQueueAt(q, 0, MQE_START, miracle_loc); - pushQueueAt(q, 1, MQE_TOP, miracle_loc); - nfaQueueInitState(q->nfa, q); - } - return 1; - } - - // Otherwise, we're an infix. Remove tops before the miracle from the queue - // and re-init at that location. - - q_skip_forward_to(q, miracle_loc); - - if (q_last_type(q) == MQE_START) { - DEBUG_PRINTF("miracle caused infix to die\n"); - return 0; - } - - DEBUG_PRINTF("re-init infix state\n"); - assert(q->items[q->cur].type == MQE_START); - q->items[q->cur].location = miracle_loc; - nfaQueueInitState(q->nfa, q); - - return 1; -} - -static rose_inline -hwlmcb_rv_t roseTriggerSuffix(const struct RoseEngine *t, - struct hs_scratch *scratch, u32 qi, u32 top, - u64a som, u64a end) { - DEBUG_PRINTF("suffix qi=%u, top event=%u\n", qi, top); - - struct core_info *ci = &scratch->core_info; - u8 *aa = getActiveLeafArray(t, ci->state); - const u32 aaCount = t->activeArrayCount; - const u32 qCount = t->queueCount; - struct mq *q = &scratch->queues[qi]; - const struct NfaInfo *info = getNfaInfoByQueue(t, qi); - const struct NFA *nfa = getNfaByInfo(t, info); - - s64a loc = (s64a)end - ci->buf_offset; - assert(loc <= (s64a)ci->len && loc >= -(s64a)ci->hlen); - - if (!mmbit_set(aa, aaCount, qi)) { - initQueue(q, qi, t, scratch); - nfaQueueInitState(nfa, q); - pushQueueAt(q, 0, MQE_START, loc); - fatbit_set(scratch->aqa, qCount, qi); - } else if (info->no_retrigger) { - DEBUG_PRINTF("yawn\n"); - /* nfa only needs one top; we can go home now */ - return HWLM_CONTINUE_MATCHING; - } else if (!fatbit_set(scratch->aqa, qCount, qi)) { - initQueue(q, qi, t, scratch); - loadStreamState(nfa, q, 0); - pushQueueAt(q, 0, MQE_START, 0); - } else if (isQueueFull(q)) { - DEBUG_PRINTF("queue %u full -> catching up nfas\n", qi); - if (info->eod) { - /* can catch up suffix independently no pq */ - q->context = NULL; - pushQueueNoMerge(q, MQE_END, loc); - nfaQueueExecRose(q->nfa, q, MO_INVALID_IDX); - q->cur = q->end = 0; - pushQueueAt(q, 0, MQE_START, loc); - } else if (ensureQueueFlushed(t, scratch, qi, loc) - == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - } - - assert(top == MQE_TOP || (top >= MQE_TOP_FIRST && top < MQE_INVALID)); - pushQueueSom(q, top, loc, som); - - if (q_cur_loc(q) == (s64a)ci->len && !info->eod) { - /* we may not run the nfa; need to ensure state is fine */ - DEBUG_PRINTF("empty run\n"); - pushQueueNoMerge(q, MQE_END, loc); - char alive = nfaQueueExec(nfa, q, loc); - if (alive) { - q->cur = q->end = 0; - pushQueueAt(q, 0, MQE_START, loc); - } else { - mmbit_unset(aa, aaCount, qi); - fatbit_unset(scratch->aqa, qCount, qi); - } - } - - return HWLM_CONTINUE_MATCHING; -} - -static really_inline -char roseTestLeftfix(const struct RoseEngine *t, struct hs_scratch *scratch, - u32 qi, u32 leftfixLag, ReportID leftfixReport, u64a end, - const char is_infix) { - struct core_info *ci = &scratch->core_info; - - u32 ri = queueToLeftIndex(t, qi); - const struct LeftNfaInfo *left = getLeftTable(t) + ri; - - DEBUG_PRINTF("testing %s %s %u/%u with lag %u (maxLag=%u)\n", - (left->transient ? "transient" : "active"), - (is_infix ? "infix" : "prefix"), - ri, qi, leftfixLag, left->maxLag); - - assert(leftfixLag <= left->maxLag); - assert(left->infix == is_infix); - assert(!is_infix || !left->transient); // Only prefixes can be transient. - - struct mq *q = scratch->queues + qi; - char *state = scratch->core_info.state; - u8 *activeLeftArray = getActiveLeftArray(t, state); - u32 qCount = t->queueCount; - u32 arCount = t->activeLeftCount; - - if (!mmbit_isset(activeLeftArray, arCount, ri)) { - DEBUG_PRINTF("engine is dead nothing to see here\n"); - return 0; - } - - if (unlikely(end < leftfixLag)) { - assert(0); /* lag is the literal length */ - return 0; - } - - if (nfaSupportsZombie(getNfaByQueue(t, qi)) && ci->buf_offset - && !fatbit_isset(scratch->aqa, qCount, qi) - && isZombie(t, state, left)) { - DEBUG_PRINTF("zombie\n"); - return 1; - } - - if (!fatbit_set(scratch->aqa, qCount, qi)) { - DEBUG_PRINTF("initing q %u\n", qi); - initRoseQueue(t, qi, left, scratch); - if (ci->buf_offset) { // there have been writes before us! - s32 sp; - if (!is_infix && left->transient) { - sp = -(s32)ci->hlen; - } else { - sp = -(s32)loadRoseDelay(t, state, left); - } - - /* transient nfas are always started fresh -> state not maintained - * at stream boundary */ - - pushQueueAt(q, 0, MQE_START, sp); - if (is_infix || (ci->buf_offset + sp > 0 && !left->transient)) { - loadStreamState(q->nfa, q, sp); - } else { - pushQueueAt(q, 1, MQE_TOP, sp); - nfaQueueInitState(q->nfa, q); - } - } else { // first write ever - pushQueueAt(q, 0, MQE_START, 0); - pushQueueAt(q, 1, MQE_TOP, 0); - nfaQueueInitState(q->nfa, q); - } - } - - s64a loc = (s64a)end - ci->buf_offset - leftfixLag; - assert(loc >= q_cur_loc(q) || left->eager); - assert(leftfixReport != MO_INVALID_IDX); - - if (!is_infix && left->transient) { - s64a start_loc = loc - left->transient; - if (q_cur_loc(q) < start_loc) { - q->cur = q->end = 0; - pushQueueAt(q, 0, MQE_START, start_loc); - pushQueueAt(q, 1, MQE_TOP, start_loc); - nfaQueueInitState(q->nfa, q); - } - } - - if (q_cur_loc(q) < loc || q_last_type(q) != MQE_START) { - if (is_infix) { - if (infixTooOld(q, loc)) { - DEBUG_PRINTF("infix %u died of old age\n", ri); - goto nfa_dead; - } - - reduceInfixQueue(q, loc, left->maxQueueLen, q->nfa->maxWidth); - } - - if (!roseLeftfixCheckMiracles(t, left, ci, q, end, is_infix)) { - DEBUG_PRINTF("leftfix %u died due to miracle\n", ri); - goto nfa_dead; - } - -#ifdef DEBUG - debugQueue(q); -#endif - - pushQueueNoMerge(q, MQE_END, loc); - - char rv = nfaQueueExecRose(q->nfa, q, leftfixReport); - if (!rv) { /* nfa is dead */ - DEBUG_PRINTF("leftfix %u died while trying to catch up\n", ri); - goto nfa_dead; - } - - // Queue must have next start loc before we call nfaInAcceptState. - q->cur = q->end = 0; - pushQueueAt(q, 0, MQE_START, loc); - - DEBUG_PRINTF("checking for report %u\n", leftfixReport); - DEBUG_PRINTF("leftfix done %hhd\n", (signed char)rv); - return rv == MO_MATCHES_PENDING; - } else if (q_cur_loc(q) > loc) { - /* an eager leftfix may have already progressed past loc if there is no - * match at loc. */ - assert(left->eager); - return 0; - } else { - assert(q_cur_loc(q) == loc); - DEBUG_PRINTF("checking for report %u\n", leftfixReport); - char rv = nfaInAcceptState(q->nfa, leftfixReport, q); - DEBUG_PRINTF("leftfix done %hhd\n", (signed char)rv); - return rv; - } - -nfa_dead: - mmbit_unset(activeLeftArray, arCount, ri); - scratch->tctxt.groups &= left->squash_mask; - return 0; -} - -static rose_inline -char roseTestPrefix(const struct RoseEngine *t, struct hs_scratch *scratch, - u32 qi, u32 leftfixLag, ReportID leftfixReport, u64a end) { - return roseTestLeftfix(t, scratch, qi, leftfixLag, leftfixReport, end, 0); -} - -static rose_inline -char roseTestInfix(const struct RoseEngine *t, struct hs_scratch *scratch, - u32 qi, u32 leftfixLag, ReportID leftfixReport, u64a end) { - return roseTestLeftfix(t, scratch, qi, leftfixLag, leftfixReport, end, 1); -} - -static rose_inline -void roseTriggerInfix(const struct RoseEngine *t, struct hs_scratch *scratch, - u64a start, u64a end, u32 qi, u32 topEvent, u8 cancel) { - struct core_info *ci = &scratch->core_info; - s64a loc = (s64a)end - ci->buf_offset; - - u32 ri = queueToLeftIndex(t, qi); - assert(topEvent < MQE_INVALID); - - const struct LeftNfaInfo *left = getLeftInfoByQueue(t, qi); - assert(!left->transient); - - DEBUG_PRINTF("rose %u (qi=%u) event %u\n", ri, qi, topEvent); - - struct mq *q = scratch->queues + qi; - const struct NfaInfo *info = getNfaInfoByQueue(t, qi); - - char *state = ci->state; - u8 *activeLeftArray = getActiveLeftArray(t, state); - const u32 arCount = t->activeLeftCount; - char alive = mmbit_set(activeLeftArray, arCount, ri); - - if (alive && info->no_retrigger) { - DEBUG_PRINTF("yawn\n"); - return; - } - - struct fatbit *aqa = scratch->aqa; - const u32 qCount = t->queueCount; - - if (alive && nfaSupportsZombie(getNfaByInfo(t, info)) && ci->buf_offset && - !fatbit_isset(aqa, qCount, qi) && isZombie(t, state, left)) { - DEBUG_PRINTF("yawn - zombie\n"); - return; - } - - if (cancel) { - DEBUG_PRINTF("dominating top: (re)init\n"); - fatbit_set(aqa, qCount, qi); - initRoseQueue(t, qi, left, scratch); - pushQueueAt(q, 0, MQE_START, loc); - nfaQueueInitState(q->nfa, q); - } else if (!fatbit_set(aqa, qCount, qi)) { - DEBUG_PRINTF("initing %u\n", qi); - initRoseQueue(t, qi, left, scratch); - if (alive) { - s32 sp = -(s32)loadRoseDelay(t, state, left); - pushQueueAt(q, 0, MQE_START, sp); - loadStreamState(q->nfa, q, sp); - } else { - pushQueueAt(q, 0, MQE_START, loc); - nfaQueueInitState(q->nfa, q); - } - } else if (!alive) { - q->cur = q->end = 0; - pushQueueAt(q, 0, MQE_START, loc); - nfaQueueInitState(q->nfa, q); - } else if (isQueueFull(q)) { - reduceInfixQueue(q, loc, left->maxQueueLen, q->nfa->maxWidth); - - if (isQueueFull(q)) { - /* still full - reduceInfixQueue did nothing */ - DEBUG_PRINTF("queue %u full (%u items) -> catching up nfa\n", qi, - q->end - q->cur); - pushQueueNoMerge(q, MQE_END, loc); - nfaQueueExecRose(q->nfa, q, MO_INVALID_IDX); - - q->cur = q->end = 0; - pushQueueAt(q, 0, MQE_START, loc); - } - } - - pushQueueSom(q, topEvent, loc, start); -} - -static rose_inline -hwlmcb_rv_t roseReport(const struct RoseEngine *t, struct hs_scratch *scratch, - u64a end, ReportID onmatch, s32 offset_adjust, - u32 ekey) { - DEBUG_PRINTF("firing callback onmatch=%u, end=%llu\n", onmatch, end); - updateLastMatchOffset(&scratch->tctxt, end); - - int cb_rv = roseDeliverReport(end, onmatch, offset_adjust, scratch, ekey); - if (cb_rv == MO_HALT_MATCHING) { - DEBUG_PRINTF("termination requested\n"); - return HWLM_TERMINATE_MATCHING; - } - - if (ekey == INVALID_EKEY || cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { - return HWLM_CONTINUE_MATCHING; - } - - return roseHaltIfExhausted(t, scratch); -} - -/* catches up engines enough to ensure any earlier mpv triggers are enqueued - * and then adds the trigger to the mpv queue. */ -static rose_inline -hwlmcb_rv_t roseCatchUpAndHandleChainMatch(const struct RoseEngine *t, - struct hs_scratch *scratch, - u32 event, u64a top_squash_distance, - u64a end, const char in_catchup) { - if (!in_catchup && - roseCatchUpMpvFeeders(t, scratch, end) == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - return roseHandleChainMatch(t, scratch, event, top_squash_distance, end, - in_catchup); -} - -static rose_inline -void roseHandleSom(struct hs_scratch *scratch, const struct som_operation *sr, - u64a end) { - DEBUG_PRINTF("end=%llu, minMatchOffset=%llu\n", end, - scratch->tctxt.minMatchOffset); - - updateLastMatchOffset(&scratch->tctxt, end); - handleSomInternal(scratch, sr, end); -} - -static rose_inline -hwlmcb_rv_t roseReportSom(const struct RoseEngine *t, - struct hs_scratch *scratch, u64a start, u64a end, - ReportID onmatch, s32 offset_adjust, u32 ekey) { - DEBUG_PRINTF("firing som callback onmatch=%u, start=%llu, end=%llu\n", - onmatch, start, end); - updateLastMatchOffset(&scratch->tctxt, end); - - int cb_rv = roseDeliverSomReport(start, end, onmatch, offset_adjust, - scratch, ekey); - if (cb_rv == MO_HALT_MATCHING) { - DEBUG_PRINTF("termination requested\n"); - return HWLM_TERMINATE_MATCHING; - } - - if (ekey == INVALID_EKEY || cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { - return HWLM_CONTINUE_MATCHING; - } - - return roseHaltIfExhausted(t, scratch); -} - -static rose_inline -void roseHandleSomSom(struct hs_scratch *scratch, - const struct som_operation *sr, u64a start, u64a end) { - DEBUG_PRINTF("start=%llu, end=%llu, minMatchOffset=%llu\n", start, end, - scratch->tctxt.minMatchOffset); - - updateLastMatchOffset(&scratch->tctxt, end); - setSomFromSomAware(scratch, sr, start, end); -} - -static rose_inline -hwlmcb_rv_t roseSetExhaust(const struct RoseEngine *t, - struct hs_scratch *scratch, u32 ekey) { - assert(scratch); - assert(scratch->magic == SCRATCH_MAGIC); - - struct core_info *ci = &scratch->core_info; - - assert(!can_stop_matching(scratch)); - assert(!isExhausted(ci->rose, ci->exhaustionVector, ekey)); - - markAsMatched(ci->rose, ci->exhaustionVector, ekey); - - return roseHaltIfExhausted(t, scratch); -} - -static really_inline -int reachHasBit(const u8 *reach, u8 c) { - return !!(reach[c / 8U] & (u8)1U << (c % 8U)); -} - -/* - * Generate a 8-byte valid_mask with #high bytes 0 from the highest side - * and #low bytes 0 from the lowest side - * and (8 - high - low) bytes '0xff' in the middle. - */ -static rose_inline -u64a generateValidMask(const s32 high, const s32 low) { - assert(high + low < 8); - DEBUG_PRINTF("high %d low %d\n", high, low); - const u64a ones = ~0ull; - return (ones << ((high + low) * 8)) >> (high * 8); -} - -/* - * Do the single-byte check if only one lookaround entry exists - * and it's a single mask. - * Return success if the byte is in the future or before history - * (offset is greater than (history) buffer length). - */ -static rose_inline -int roseCheckByte(const struct core_info *ci, u8 and_mask, u8 cmp_mask, - u8 negation, s32 checkOffset, u64a end) { - DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end, - ci->buf_offset, ci->buf_offset + ci->len); - if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { - DEBUG_PRINTF("too early, fail\n"); - return 0; - } - - const s64a base_offset = end - ci->buf_offset; - s64a offset = base_offset + checkOffset; - DEBUG_PRINTF("checkOffset=%d offset=%lld\n", checkOffset, offset); - u8 c; - if (offset >= 0) { - if (offset >= (s64a)ci->len) { - DEBUG_PRINTF("in the future\n"); - return 1; - } else { - assert(offset < (s64a)ci->len); - DEBUG_PRINTF("check byte in buffer\n"); - c = ci->buf[offset]; - } - } else { - if (offset >= -(s64a) ci->hlen) { - DEBUG_PRINTF("check byte in history\n"); - c = ci->hbuf[ci->hlen + offset]; - } else { - DEBUG_PRINTF("before history and return\n"); - return 1; - } - } - - if (((and_mask & c) != cmp_mask) ^ negation) { - DEBUG_PRINTF("char 0x%02x at offset %lld failed byte check\n", - c, offset); - return 0; - } - - DEBUG_PRINTF("real offset=%lld char=%02x\n", offset, c); - DEBUG_PRINTF("OK :)\n"); - return 1; -} - -static rose_inline -int roseCheckMask(const struct core_info *ci, u64a and_mask, u64a cmp_mask, - u64a neg_mask, s32 checkOffset, u64a end) { - const s64a base_offset = (s64a)end - ci->buf_offset; - s64a offset = base_offset + checkOffset; - DEBUG_PRINTF("rel offset %lld\n",base_offset); - DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); - if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { - DEBUG_PRINTF("too early, fail\n"); - return 0; - } - - u64a data = 0; - u64a valid_data_mask = ~0ULL; // mask for validate check. - //A 0xff byte means that this byte is in the buffer. - s32 shift_l = 0; // size of bytes in the future. - s32 shift_r = 0; // size of bytes before the history. - s32 h_len = 0; // size of bytes in the history buffer. - s32 c_len = 8; // size of bytes in the current buffer. - if (offset < 0) { - // in or before history buffer. - if (offset + 8 <= -(s64a)ci->hlen) { - DEBUG_PRINTF("before history and return\n"); - return 1; - } - const u8 *h_start = ci->hbuf; // start pointer in history buffer. - if (offset < -(s64a)ci->hlen) { - // some bytes are before history. - shift_r = -(offset + (s64a)ci->hlen); - DEBUG_PRINTF("shift_r %d", shift_r); - } else { - h_start += ci->hlen + offset; - } - if (offset + 7 < 0) { - DEBUG_PRINTF("all in history buffer\n"); - data = partial_load_u64a(h_start, 8 - shift_r); - } else { - // history part - c_len = offset + 8; - h_len = -offset - shift_r; - DEBUG_PRINTF("%d bytes in history\n", h_len); - s64a data_h = 0; - data_h = partial_load_u64a(h_start, h_len); - // current part - if (c_len > (s64a)ci->len) { - shift_l = c_len - ci->len; - c_len = ci->len; - } - data = partial_load_u64a(ci->buf, c_len); - data <<= h_len << 3; - data |= data_h; - } - if (shift_r) { - data <<= shift_r << 3; - } - } else { - // current buffer. - if (offset + c_len > (s64a)ci->len) { - if (offset >= (s64a)ci->len) { - DEBUG_PRINTF("all in the future\n"); - return 1; - } - // some bytes in the future. - shift_l = offset + c_len - ci->len; - c_len = ci->len - offset; - data = partial_load_u64a(ci->buf + offset, c_len); - } else { - data = unaligned_load_u64a(ci->buf + offset); - } - } - - if (shift_l || shift_r) { - valid_data_mask = generateValidMask(shift_l, shift_r); - } - DEBUG_PRINTF("valid_data_mask %llx\n", valid_data_mask); - - if (validateMask(data, valid_data_mask, - and_mask, cmp_mask, neg_mask)) { - DEBUG_PRINTF("check mask successfully\n"); - return 1; - } else { - return 0; - } -} - -static rose_inline -int roseCheckMask32(const struct core_info *ci, const u8 *and_mask, - const u8 *cmp_mask, const u32 neg_mask, - s32 checkOffset, u64a end) { - const s64a base_offset = (s64a)end - ci->buf_offset; - s64a offset = base_offset + checkOffset; - DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); - DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); - - if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { - DEBUG_PRINTF("too early, fail\n"); - return 0; - } - - m256 data = zeroes256(); // consists of the following four parts. - s32 c_shift = 0; // blank bytes after current. - s32 h_shift = 0; // blank bytes before history. - s32 h_len = 32; // number of bytes from history buffer. - s32 c_len = 0; // number of bytes from current buffer. - /* h_shift + h_len + c_len + c_shift = 32 need to be hold.*/ - - if (offset < 0) { - s32 h_offset = 0; // the start offset in history buffer. - if (offset < -(s64a)ci->hlen) { - if (offset + 32 <= -(s64a)ci->hlen) { - DEBUG_PRINTF("all before history\n"); - return 1; - } - h_shift = -(offset + (s64a)ci->hlen); - h_len = 32 - h_shift; - } else { - h_offset = ci->hlen + offset; - } - if (offset + 32 > 0) { - // part in current buffer. - c_len = offset + 32; - h_len = -(offset + h_shift); - if (c_len > (s64a)ci->len) { - // out of current buffer. - c_shift = c_len - ci->len; - c_len = ci->len; - } - copy_upto_32_bytes((u8 *)&data - offset, ci->buf, c_len); - } - assert(h_shift + h_len + c_len + c_shift == 32); - copy_upto_32_bytes((u8 *)&data + h_shift, ci->hbuf + h_offset, h_len); - } else { - if (offset + 32 > (s64a)ci->len) { - if (offset >= (s64a)ci->len) { - DEBUG_PRINTF("all in the future.\n"); - return 1; - } - c_len = ci->len - offset; - c_shift = 32 - c_len; - copy_upto_32_bytes((u8 *)&data, ci->buf + offset, c_len); - } else { - data = loadu256(ci->buf + offset); - } - } - DEBUG_PRINTF("h_shift %d c_shift %d\n", h_shift, c_shift); - DEBUG_PRINTF("h_len %d c_len %d\n", h_len, c_len); - // we use valid_data_mask to blind bytes before history/in the future. - u32 valid_data_mask; - valid_data_mask = (~0u) << (h_shift + c_shift) >> (c_shift); - - m256 and_mask_m256 = loadu256(and_mask); - m256 cmp_mask_m256 = loadu256(cmp_mask); - if (validateMask32(data, valid_data_mask, and_mask_m256, - cmp_mask_m256, neg_mask)) { - DEBUG_PRINTF("Mask32 passed\n"); - return 1; - } - return 0; -} - -// get 128/256 bits data from history and current buffer. -// return data and valid_data_mask. -static rose_inline -u32 getBufferDataComplex(const struct core_info *ci, const s64a loc, - u8 *data, const u32 data_len) { - assert(data_len == 16 || data_len == 32); - s32 c_shift = 0; // blank bytes after current. - s32 h_shift = 0; // blank bytes before history. - s32 h_len = data_len; // number of bytes from history buffer. - s32 c_len = 0; // number of bytes from current buffer. - if (loc < 0) { - s32 h_offset = 0; // the start offset in history buffer. - if (loc < -(s64a)ci->hlen) { - if (loc + data_len <= -(s64a)ci->hlen) { - DEBUG_PRINTF("all before history\n"); - return 0; - } - h_shift = -(loc + (s64a)ci->hlen); - h_len = data_len - h_shift; - } else { - h_offset = ci->hlen + loc; - } - if (loc + data_len > 0) { - // part in current buffer. - c_len = loc + data_len; - h_len = -(loc + h_shift); - if (c_len > (s64a)ci->len) { - // out of current buffer. - c_shift = c_len - ci->len; - c_len = ci->len; - } - copy_upto_32_bytes(data - loc, ci->buf, c_len); - } - assert(h_shift + h_len + c_len + c_shift == (s32)data_len); - copy_upto_32_bytes(data + h_shift, ci->hbuf + h_offset, h_len); - } else { - if (loc + data_len > (s64a)ci->len) { - if (loc >= (s64a)ci->len) { - DEBUG_PRINTF("all in the future.\n"); - return 0; - } - c_len = ci->len - loc; - c_shift = data_len - c_len; - copy_upto_32_bytes(data, ci->buf + loc, c_len); - } else { - if (data_len == 16) { - storeu128(data, loadu128(ci->buf + loc)); - return 0xffff; - } else { - storeu256(data, loadu256(ci->buf + loc)); - return 0xffffffff; - } - } - } - DEBUG_PRINTF("h_shift %d c_shift %d\n", h_shift, c_shift); - DEBUG_PRINTF("h_len %d c_len %d\n", h_len, c_len); - - if (data_len == 16) { - return (u16)(0xffff << (h_shift + c_shift)) >> c_shift; - } else { - return (~0u) << (h_shift + c_shift) >> c_shift; - } -} - -static rose_inline -m128 getData128(const struct core_info *ci, s64a offset, u32 *valid_data_mask) { - if (offset > 0 && offset + sizeof(m128) <= ci->len) { - *valid_data_mask = 0xffff; - return loadu128(ci->buf + offset); - } - ALIGN_DIRECTIVE u8 data[sizeof(m128)]; - *valid_data_mask = getBufferDataComplex(ci, offset, data, 16); - return *(m128 *)data; -} - -static rose_inline -m256 getData256(const struct core_info *ci, s64a offset, u32 *valid_data_mask) { - if (offset > 0 && offset + sizeof(m256) <= ci->len) { - *valid_data_mask = ~0u; - return loadu256(ci->buf + offset); - } - ALIGN_AVX_DIRECTIVE u8 data[sizeof(m256)]; - *valid_data_mask = getBufferDataComplex(ci, offset, data, 32); - return *(m256 *)data; -} - -static rose_inline -int roseCheckShufti16x8(const struct core_info *ci, const u8 *nib_mask, - const u8 *bucket_select_mask, u32 neg_mask, - s32 checkOffset, u64a end) { - const s64a base_offset = (s64a)end - ci->buf_offset; - s64a offset = base_offset + checkOffset; - DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); - DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); - - if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { - DEBUG_PRINTF("too early, fail\n"); - return 0; - } - - u32 valid_data_mask = 0; - m128 data = getData128(ci, offset, &valid_data_mask); - if (unlikely(!valid_data_mask)) { - return 1; - } - - m256 nib_mask_m256 = loadu256(nib_mask); - m128 bucket_select_mask_m128 = loadu128(bucket_select_mask); - if (validateShuftiMask16x8(data, nib_mask_m256, - bucket_select_mask_m128, - neg_mask, valid_data_mask)) { - DEBUG_PRINTF("check shufti 16x8 successfully\n"); - return 1; - } else { - return 0; - } -} - -static rose_inline -int roseCheckShufti16x16(const struct core_info *ci, const u8 *hi_mask, - const u8 *lo_mask, const u8 *bucket_select_mask, - u32 neg_mask, s32 checkOffset, u64a end) { - const s64a base_offset = (s64a)end - ci->buf_offset; - s64a offset = base_offset + checkOffset; - DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); - DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); - - if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { - DEBUG_PRINTF("too early, fail\n"); - return 0; - } - - u32 valid_data_mask = 0; - m128 data = getData128(ci, offset, &valid_data_mask); - if (unlikely(!valid_data_mask)) { - return 1; - } - - m256 data_m256 = set2x128(data); - m256 hi_mask_m256 = loadu256(hi_mask); - m256 lo_mask_m256 = loadu256(lo_mask); - m256 bucket_select_mask_m256 = loadu256(bucket_select_mask); - if (validateShuftiMask16x16(data_m256, hi_mask_m256, lo_mask_m256, - bucket_select_mask_m256, - neg_mask, valid_data_mask)) { - DEBUG_PRINTF("check shufti 16x16 successfully\n"); - return 1; - } else { - return 0; - } -} - -static rose_inline -int roseCheckShufti32x8(const struct core_info *ci, const u8 *hi_mask, - const u8 *lo_mask, const u8 *bucket_select_mask, - u32 neg_mask, s32 checkOffset, u64a end) { - const s64a base_offset = (s64a)end - ci->buf_offset; - s64a offset = base_offset + checkOffset; - DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); - DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); - - if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { - DEBUG_PRINTF("too early, fail\n"); - return 0; - } - - u32 valid_data_mask = 0; - m256 data = getData256(ci, offset, &valid_data_mask); - if (unlikely(!valid_data_mask)) { - return 1; - } - - m128 hi_mask_m128 = loadu128(hi_mask); - m128 lo_mask_m128 = loadu128(lo_mask); - m256 hi_mask_m256 = set2x128(hi_mask_m128); - m256 lo_mask_m256 = set2x128(lo_mask_m128); - m256 bucket_select_mask_m256 = loadu256(bucket_select_mask); - if (validateShuftiMask32x8(data, hi_mask_m256, lo_mask_m256, - bucket_select_mask_m256, - neg_mask, valid_data_mask)) { - DEBUG_PRINTF("check shufti 32x8 successfully\n"); - return 1; - } else { - return 0; - } -} - -static rose_inline -int roseCheckShufti32x16(const struct core_info *ci, const u8 *hi_mask, - const u8 *lo_mask, const u8 *bucket_select_mask_hi, - const u8 *bucket_select_mask_lo, u32 neg_mask, - s32 checkOffset, u64a end) { - const s64a base_offset = (s64a)end - ci->buf_offset; - s64a offset = base_offset + checkOffset; - DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); - DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); - - if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { - DEBUG_PRINTF("too early, fail\n"); - return 0; - } - - u32 valid_data_mask = 0; - m256 data = getData256(ci, offset, &valid_data_mask); - if (unlikely(!valid_data_mask)) { - return 1; - } - - m256 hi_mask_1 = loadu2x128(hi_mask); - m256 hi_mask_2 = loadu2x128(hi_mask + 16); - m256 lo_mask_1 = loadu2x128(lo_mask); - m256 lo_mask_2 = loadu2x128(lo_mask + 16); - - m256 bucket_mask_hi = loadu256(bucket_select_mask_hi); - m256 bucket_mask_lo = loadu256(bucket_select_mask_lo); - if (validateShuftiMask32x16(data, hi_mask_1, hi_mask_2, - lo_mask_1, lo_mask_2, bucket_mask_hi, - bucket_mask_lo, neg_mask, valid_data_mask)) { - DEBUG_PRINTF("check shufti 32x16 successfully\n"); - return 1; - } else { - return 0; - } -} - -static rose_inline -int roseCheckSingleLookaround(const struct RoseEngine *t, - const struct hs_scratch *scratch, - s8 checkOffset, u32 lookaroundReachIndex, - u64a end) { - assert(lookaroundReachIndex != MO_INVALID_IDX); - const struct core_info *ci = &scratch->core_info; - DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end, - ci->buf_offset, ci->buf_offset + ci->len); - - const s64a base_offset = end - ci->buf_offset; - const s64a offset = base_offset + checkOffset; - DEBUG_PRINTF("base_offset=%lld\n", base_offset); - DEBUG_PRINTF("checkOffset=%d offset=%lld\n", checkOffset, offset); - - if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { - DEBUG_PRINTF("too early, fail\n"); - return 0; - } - - const u8 *reach = getByOffset(t, lookaroundReachIndex); - - u8 c; - if (offset >= 0 && offset < (s64a)ci->len) { - c = ci->buf[offset]; - } else if (offset < 0 && offset >= -(s64a)ci->hlen) { - c = ci->hbuf[ci->hlen + offset]; - } else { - return 1; - } - - if (!reachHasBit(reach, c)) { - DEBUG_PRINTF("char 0x%02x failed reach check\n", c); - return 0; - } - - DEBUG_PRINTF("OK :)\n"); - return 1; -} - -/** - * \brief Scan around a literal, checking that that "lookaround" reach masks - * are satisfied. - */ -static rose_inline -int roseCheckLookaround(const struct RoseEngine *t, - const struct hs_scratch *scratch, - u32 lookaroundLookIndex, u32 lookaroundReachIndex, - u32 lookaroundCount, u64a end) { - assert(lookaroundLookIndex != MO_INVALID_IDX); - assert(lookaroundReachIndex != MO_INVALID_IDX); - assert(lookaroundCount > 0); - - const struct core_info *ci = &scratch->core_info; - DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end, - ci->buf_offset, ci->buf_offset + ci->len); - - const s8 *look = getByOffset(t, lookaroundLookIndex); - const s8 *look_end = look + lookaroundCount; - assert(look < look_end); - - const u8 *reach = getByOffset(t, lookaroundReachIndex); - - // The following code assumes that the lookaround structures are ordered by - // increasing offset. - - const s64a base_offset = end - ci->buf_offset; - DEBUG_PRINTF("base_offset=%lld\n", base_offset); - DEBUG_PRINTF("first look has offset %d\n", *look); - - // If our first check tells us we need to look at an offset before the - // start of the stream, this role cannot match. - if (unlikely(*look < 0 && (u64a)(0 - *look) > end)) { - DEBUG_PRINTF("too early, fail\n"); - return 0; - } - - // Skip over offsets that are before the history buffer. - do { - s64a offset = base_offset + *look; - if (offset >= -(s64a)ci->hlen) { - goto in_history; - } - DEBUG_PRINTF("look=%d before history\n", *look); - look++; - reach += REACH_BITVECTOR_LEN; - } while (look < look_end); - - // History buffer. - DEBUG_PRINTF("scan history (%zu looks left)\n", look_end - look); - for (; look < look_end; ++look, reach += REACH_BITVECTOR_LEN) { - in_history: - ; - s64a offset = base_offset + *look; - DEBUG_PRINTF("reach=%p, rel offset=%lld\n", reach, offset); - - if (offset >= 0) { - DEBUG_PRINTF("in buffer\n"); - goto in_buffer; - } - - assert(offset >= -(s64a)ci->hlen && offset < 0); - u8 c = ci->hbuf[ci->hlen + offset]; - if (!reachHasBit(reach, c)) { - DEBUG_PRINTF("char 0x%02x failed reach check\n", c); - return 0; - } - } - // Current buffer. - DEBUG_PRINTF("scan buffer (%zu looks left)\n", look_end - look); - for (; look < look_end; ++look, reach += REACH_BITVECTOR_LEN) { - in_buffer: - ; - s64a offset = base_offset + *look; - DEBUG_PRINTF("reach=%p, rel offset=%lld\n", reach, offset); - - if (offset >= (s64a)ci->len) { - DEBUG_PRINTF("in the future\n"); - break; - } - - assert(offset >= 0 && offset < (s64a)ci->len); - u8 c = ci->buf[offset]; - if (!reachHasBit(reach, c)) { - DEBUG_PRINTF("char 0x%02x failed reach check\n", c); - return 0; - } - } - - DEBUG_PRINTF("OK :)\n"); - return 1; -} - -/** - * \brief Trying to find a matching path by the corresponding path mask of - * every lookaround location. - */ -static rose_inline -int roseMultipathLookaround(const struct RoseEngine *t, - const struct hs_scratch *scratch, - u32 multipathLookaroundLookIndex, - u32 multipathLookaroundReachIndex, - u32 multipathLookaroundCount, - s32 last_start, const u8 *start_mask, - u64a end) { - assert(multipathLookaroundCount > 0); - - const struct core_info *ci = &scratch->core_info; - DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end, - ci->buf_offset, ci->buf_offset + ci->len); - - const s8 *look = getByOffset(t, multipathLookaroundLookIndex); - const s8 *look_end = look + multipathLookaroundCount; - assert(look < look_end); - - const u8 *reach = getByOffset(t, multipathLookaroundReachIndex); - - const s64a base_offset = (s64a)end - ci->buf_offset; - DEBUG_PRINTF("base_offset=%lld\n", base_offset); - - u8 path = 0xff; - - assert(last_start < 0); - - if (unlikely((u64a)(0 - last_start) > end)) { - DEBUG_PRINTF("too early, fail\n"); - return 0; - } - - s8 base_look_offset = *look; - do { - s64a offset = base_offset + *look; - u32 start_offset = (u32)(*look - base_look_offset); - DEBUG_PRINTF("start_mask[%u] = %x\n", start_offset, - start_mask[start_offset]); - path = start_mask[start_offset]; - if (offset >= -(s64a)ci->hlen) { - break; - } - DEBUG_PRINTF("look=%d before history\n", *look); - look++; - reach += MULTI_REACH_BITVECTOR_LEN; - } while (look < look_end); - - DEBUG_PRINTF("scan history (%zu looks left)\n", look_end - look); - for (; look < look_end; ++look, reach += MULTI_REACH_BITVECTOR_LEN) { - s64a offset = base_offset + *look; - DEBUG_PRINTF("reach=%p, rel offset=%lld\n", reach, offset); - - if (offset >= 0) { - DEBUG_PRINTF("in buffer\n"); - break; - } - - assert(offset >= -(s64a)ci->hlen && offset < 0); - u8 c = ci->hbuf[ci->hlen + offset]; - path &= reach[c]; - DEBUG_PRINTF("reach[%x] = %02x path = %0xx\n", c, reach[c], path); - if (!path) { - DEBUG_PRINTF("char 0x%02x failed reach check\n", c); - return 0; - } - } - - DEBUG_PRINTF("scan buffer (%zu looks left)\n", look_end - look); - for(; look < look_end; ++look, reach += MULTI_REACH_BITVECTOR_LEN) { - s64a offset = base_offset + *look; - DEBUG_PRINTF("reach=%p, rel offset=%lld\n", reach, offset); - - if (offset >= (s64a)ci->len) { - DEBUG_PRINTF("in the future\n"); - break; - } - - assert(offset >= 0 && offset < (s64a)ci->len); - u8 c = ci->buf[offset]; - path &= reach[c]; - DEBUG_PRINTF("reach[%x] = %02x path = %0xx\n", c, reach[c], path); - if (!path) { - DEBUG_PRINTF("char 0x%02x failed reach check\n", c); - return 0; - } - } - - DEBUG_PRINTF("OK :)\n"); - return 1; -} - -static never_inline -int roseCheckMultipathShufti16x8(const struct hs_scratch *scratch, - const struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_16x8 *ri, - u64a end) { - const struct core_info *ci = &scratch->core_info; - s32 checkOffset = ri->base_offset; - const s64a base_offset = (s64a)end - ci->buf_offset; - s64a offset = base_offset + checkOffset; - DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); - DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); - - assert(ri->last_start <= 0); - if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { - if ((u64a)(0 - ri->last_start) > end) { - DEBUG_PRINTF("too early, fail\n"); - return 0; - } - } - - u32 valid_data_mask; - m128 data_init = getData128(ci, offset, &valid_data_mask); - m128 data_select_mask = loadu128(ri->data_select_mask); - - u32 valid_path_mask = 0; - if (unlikely(!(valid_data_mask & 1))) { - DEBUG_PRINTF("lose part of backward data\n"); - DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); - - m128 expand_valid; - u64a expand_mask = 0x8080808080808080ULL; - u64a valid_lo = expand64(valid_data_mask & 0xff, expand_mask); - u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask); - DEBUG_PRINTF("expand_hi %llx\n", valid_hi); - DEBUG_PRINTF("expand_lo %llx\n", valid_lo); - expand_valid = set64x2(valid_hi, valid_lo); - valid_path_mask = ~movemask128(pshufb_m128(expand_valid, - data_select_mask)); - } - - m128 data = pshufb_m128(data_init, data_select_mask); - m256 nib_mask = loadu256(ri->nib_mask); - m128 bucket_select_mask = loadu128(ri->bucket_select_mask); - - u32 hi_bits_mask = ri->hi_bits_mask; - u32 lo_bits_mask = ri->lo_bits_mask; - u32 neg_mask = ri->neg_mask; - - if (validateMultipathShuftiMask16x8(data, nib_mask, - bucket_select_mask, - hi_bits_mask, lo_bits_mask, - neg_mask, valid_path_mask)) { - DEBUG_PRINTF("check multi-path shufti-16x8 successfully\n"); - return 1; - } else { - return 0; - } -} - -static never_inline -int roseCheckMultipathShufti32x8(const struct hs_scratch *scratch, - const struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_32x8 *ri, - u64a end) { - const struct core_info *ci = &scratch->core_info; - s32 checkOffset = ri->base_offset; - const s64a base_offset = (s64a)end - ci->buf_offset; - s64a offset = base_offset + checkOffset; - DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); - DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); - - assert(ri->last_start <= 0); - if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { - if ((u64a)(0 - ri->last_start) > end) { - DEBUG_PRINTF("too early, fail\n"); - return 0; - } - } - - u32 valid_data_mask; - m128 data_m128 = getData128(ci, offset, &valid_data_mask); - m256 data_double = set2x128(data_m128); - m256 data_select_mask = loadu256(ri->data_select_mask); - - u32 valid_path_mask = 0; - m256 expand_valid; - if (unlikely(!(valid_data_mask & 1))) { - DEBUG_PRINTF("lose part of backward data\n"); - DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); - - u64a expand_mask = 0x8080808080808080ULL; - u64a valid_lo = expand64(valid_data_mask & 0xff, expand_mask); - u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask); - DEBUG_PRINTF("expand_hi %llx\n", valid_hi); - DEBUG_PRINTF("expand_lo %llx\n", valid_lo); - expand_valid = set64x4(valid_hi, valid_lo, valid_hi, - valid_lo); - valid_path_mask = ~movemask256(pshufb_m256(expand_valid, - data_select_mask)); - } - - m256 data = pshufb_m256(data_double, data_select_mask); - m256 hi_mask = loadu2x128(ri->hi_mask); - m256 lo_mask = loadu2x128(ri->lo_mask); - m256 bucket_select_mask = loadu256(ri->bucket_select_mask); - - u32 hi_bits_mask = ri->hi_bits_mask; - u32 lo_bits_mask = ri->lo_bits_mask; - u32 neg_mask = ri->neg_mask; - - if (validateMultipathShuftiMask32x8(data, hi_mask, lo_mask, - bucket_select_mask, - hi_bits_mask, lo_bits_mask, - neg_mask, valid_path_mask)) { - DEBUG_PRINTF("check multi-path shufti-32x8 successfully\n"); - return 1; - } else { - return 0; - } -} - -static never_inline -int roseCheckMultipathShufti32x16(const struct hs_scratch *scratch, - const struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_32x16 *ri, - u64a end) { - const struct core_info *ci = &scratch->core_info; - const s64a base_offset = (s64a)end - ci->buf_offset; - s32 checkOffset = ri->base_offset; - s64a offset = base_offset + checkOffset; - DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); - DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); - - assert(ri->last_start <= 0); - if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { - if ((u64a)(0 - ri->last_start) > end) { - DEBUG_PRINTF("too early, fail\n"); - return 0; - } - } - - u32 valid_data_mask; - m128 data_m128 = getData128(ci, offset, &valid_data_mask); - m256 data_double = set2x128(data_m128); - m256 data_select_mask = loadu256(ri->data_select_mask); - - u32 valid_path_mask = 0; - m256 expand_valid; - if (unlikely(!(valid_data_mask & 1))) { - DEBUG_PRINTF("lose part of backward data\n"); - DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); - - u64a expand_mask = 0x8080808080808080ULL; - u64a valid_lo = expand64(valid_data_mask & 0xff, expand_mask); - u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask); - DEBUG_PRINTF("expand_hi %llx\n", valid_hi); - DEBUG_PRINTF("expand_lo %llx\n", valid_lo); - expand_valid = set64x4(valid_hi, valid_lo, valid_hi, - valid_lo); - valid_path_mask = ~movemask256(pshufb_m256(expand_valid, - data_select_mask)); - } - - m256 data = pshufb_m256(data_double, data_select_mask); - - m256 hi_mask_1 = loadu2x128(ri->hi_mask); - m256 hi_mask_2 = loadu2x128(ri->hi_mask + 16); - m256 lo_mask_1 = loadu2x128(ri->lo_mask); - m256 lo_mask_2 = loadu2x128(ri->lo_mask + 16); - - m256 bucket_select_mask_hi = loadu256(ri->bucket_select_mask_hi); - m256 bucket_select_mask_lo = loadu256(ri->bucket_select_mask_lo); - - u32 hi_bits_mask = ri->hi_bits_mask; - u32 lo_bits_mask = ri->lo_bits_mask; - u32 neg_mask = ri->neg_mask; - - if (validateMultipathShuftiMask32x16(data, hi_mask_1, hi_mask_2, - lo_mask_1, lo_mask_2, - bucket_select_mask_hi, - bucket_select_mask_lo, - hi_bits_mask, lo_bits_mask, - neg_mask, valid_path_mask)) { - DEBUG_PRINTF("check multi-path shufti-32x16 successfully\n"); - return 1; - } else { - return 0; - } -} - -static never_inline -int roseCheckMultipathShufti64(const struct hs_scratch *scratch, - const struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_64 *ri, - u64a end) { - const struct core_info *ci = &scratch->core_info; - const s64a base_offset = (s64a)end - ci->buf_offset; - s32 checkOffset = ri->base_offset; - s64a offset = base_offset + checkOffset; - DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); - DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); - - if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { - if ((u64a)(0 - ri->last_start) > end) { - DEBUG_PRINTF("too early, fail\n"); - return 0; - } - } - - u32 valid_data_mask; - m128 data_m128 = getData128(ci, offset, &valid_data_mask); - m256 data_m256 = set2x128(data_m128); - m256 data_select_mask_1 = loadu256(ri->data_select_mask); - m256 data_select_mask_2 = loadu256(ri->data_select_mask + 32); - - u64a valid_path_mask = 0; - m256 expand_valid; - if (unlikely(!(valid_data_mask & 1))) { - DEBUG_PRINTF("lose part of backward data\n"); - DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); - - u64a expand_mask = 0x8080808080808080ULL; - u64a valid_lo = expand64(valid_data_mask & 0xff, expand_mask); - u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask); - DEBUG_PRINTF("expand_hi %llx\n", valid_hi); - DEBUG_PRINTF("expand_lo %llx\n", valid_lo); - expand_valid = set64x4(valid_hi, valid_lo, valid_hi, - valid_lo); - u32 valid_path_1 = movemask256(pshufb_m256(expand_valid, - data_select_mask_1)); - u32 valid_path_2 = movemask256(pshufb_m256(expand_valid, - data_select_mask_2)); - valid_path_mask = ~((u64a)valid_path_1 | (u64a)valid_path_2 << 32); - } - - m256 data_1 = pshufb_m256(data_m256, data_select_mask_1); - m256 data_2 = pshufb_m256(data_m256, data_select_mask_2); - - m256 hi_mask = loadu2x128(ri->hi_mask); - m256 lo_mask = loadu2x128(ri->lo_mask); - - m256 bucket_select_mask_1 = loadu256(ri->bucket_select_mask); - m256 bucket_select_mask_2 = loadu256(ri->bucket_select_mask + 32); - - u64a hi_bits_mask = ri->hi_bits_mask; - u64a lo_bits_mask = ri->lo_bits_mask; - u64a neg_mask = ri->neg_mask; - - if (validateMultipathShuftiMask64(data_1, data_2, hi_mask, lo_mask, - bucket_select_mask_1, - bucket_select_mask_2, hi_bits_mask, - lo_bits_mask, neg_mask, - valid_path_mask)) { - DEBUG_PRINTF("check multi-path shufti-64 successfully\n"); - return 1; - } else { - return 0; - } -} - -int roseNfaEarliestSom(u64a start, u64a end, ReportID id, void *context); - -static rose_inline -u64a roseGetHaigSom(const struct RoseEngine *t, struct hs_scratch *scratch, - const u32 qi, UNUSED const u32 leftfixLag) { - u32 ri = queueToLeftIndex(t, qi); - - UNUSED const struct LeftNfaInfo *left = getLeftTable(t) + ri; - - DEBUG_PRINTF("testing %s prefix %u/%u with lag %u (maxLag=%u)\n", - left->transient ? "transient" : "active", ri, qi, - leftfixLag, left->maxLag); - - assert(leftfixLag <= left->maxLag); - - struct mq *q = scratch->queues + qi; - - u64a start = ~0ULL; - - /* switch the callback + context for a fun one */ - q->cb = roseNfaEarliestSom; - q->context = &start; - - nfaReportCurrentMatches(q->nfa, q); - - /* restore the old callback + context */ - q->cb = roseNfaAdaptor; - q->context = NULL; - DEBUG_PRINTF("earliest som is %llu\n", start); - return start; -} - -static rose_inline -char roseCheckBounds(u64a end, u64a min_bound, u64a max_bound) { - DEBUG_PRINTF("check offset=%llu against bounds [%llu,%llu]\n", end, - min_bound, max_bound); - assert(min_bound <= max_bound); - return end >= min_bound && end <= max_bound; -} - -static rose_inline -hwlmcb_rv_t roseEnginesEod(const struct RoseEngine *rose, - struct hs_scratch *scratch, u64a offset, - u32 iter_offset) { - const char is_streaming = rose->mode != HS_MODE_BLOCK; - - /* data, len is used for state decompress, should be full available data */ - u8 key = 0; - if (is_streaming) { - const u8 *eod_data = scratch->core_info.hbuf; - size_t eod_len = scratch->core_info.hlen; - key = eod_len ? eod_data[eod_len - 1] : 0; - } - - const u8 *aa = getActiveLeafArray(rose, scratch->core_info.state); - const u32 aaCount = rose->activeArrayCount; - const u32 qCount = rose->queueCount; - struct fatbit *aqa = scratch->aqa; - - const struct mmbit_sparse_iter *it = getByOffset(rose, iter_offset); - assert(ISALIGNED(it)); - - u32 idx = 0; - struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; - - for (u32 qi = mmbit_sparse_iter_begin(aa, aaCount, &idx, it, si_state); - qi != MMB_INVALID; - qi = mmbit_sparse_iter_next(aa, aaCount, qi, &idx, it, si_state)) { - DEBUG_PRINTF("checking nfa %u\n", qi); - struct mq *q = scratch->queues + qi; - if (!fatbit_set(aqa, qCount, qi)) { - initQueue(q, qi, rose, scratch); - } - - assert(q->nfa == getNfaByQueue(rose, qi)); - assert(nfaAcceptsEod(q->nfa)); - - if (is_streaming) { - // Decompress stream state. - nfaExpandState(q->nfa, q->state, q->streamState, offset, key); - } - - if (nfaCheckFinalState(q->nfa, q->state, q->streamState, offset, - roseReportAdaptor, - scratch) == MO_HALT_MATCHING) { - DEBUG_PRINTF("user instructed us to stop\n"); - return HWLM_TERMINATE_MATCHING; - } - } - - return HWLM_CONTINUE_MATCHING; -} - -static rose_inline -hwlmcb_rv_t roseSuffixesEod(const struct RoseEngine *rose, - struct hs_scratch *scratch, u64a offset) { - const u8 *aa = getActiveLeafArray(rose, scratch->core_info.state); - const u32 aaCount = rose->activeArrayCount; - - for (u32 qi = mmbit_iterate(aa, aaCount, MMB_INVALID); qi != MMB_INVALID; - qi = mmbit_iterate(aa, aaCount, qi)) { - DEBUG_PRINTF("checking nfa %u\n", qi); - struct mq *q = scratch->queues + qi; - assert(q->nfa == getNfaByQueue(rose, qi)); - assert(nfaAcceptsEod(q->nfa)); - - /* We have just been triggered. */ - assert(fatbit_isset(scratch->aqa, rose->queueCount, qi)); - - pushQueueNoMerge(q, MQE_END, scratch->core_info.len); - q->context = NULL; - - /* rose exec is used as we don't want to / can't raise matches in the - * history buffer. */ - if (!nfaQueueExecRose(q->nfa, q, MO_INVALID_IDX)) { - DEBUG_PRINTF("nfa is dead\n"); - continue; - } - if (nfaCheckFinalState(q->nfa, q->state, q->streamState, offset, - roseReportAdaptor, - scratch) == MO_HALT_MATCHING) { - DEBUG_PRINTF("user instructed us to stop\n"); - return HWLM_TERMINATE_MATCHING; - } - } - return HWLM_CONTINUE_MATCHING; -} - -static rose_inline -hwlmcb_rv_t roseMatcherEod(const struct RoseEngine *rose, - struct hs_scratch *scratch, u64a offset) { - assert(rose->ematcherOffset); - assert(rose->ematcherRegionSize); - - // Clear role state and active engines, since we have already handled all - // outstanding work there. - DEBUG_PRINTF("clear role state and active leaf array\n"); - char *state = scratch->core_info.state; - mmbit_clear(getRoleState(state), rose->rolesWithStateCount); - mmbit_clear(getActiveLeafArray(rose, state), rose->activeArrayCount); - - const char is_streaming = rose->mode != HS_MODE_BLOCK; - - size_t eod_len; - const u8 *eod_data; - if (!is_streaming) { /* Block */ - eod_data = scratch->core_info.buf; - eod_len = scratch->core_info.len; - } else { /* Streaming */ - eod_len = scratch->core_info.hlen; - eod_data = scratch->core_info.hbuf; - } - - assert(eod_data); - assert(eod_len); - - DEBUG_PRINTF("%zu bytes of eod data to scan at offset %llu\n", eod_len, - offset); - - // If we don't have enough bytes to produce a match from an EOD table scan, - // there's no point scanning. - if (eod_len < rose->eodmatcherMinWidth) { - DEBUG_PRINTF("too short for min width %u\n", rose->eodmatcherMinWidth); - return HWLM_CONTINUE_MATCHING; - } - - // Ensure that we only need scan the last N bytes, where N is the length of - // the eod-anchored matcher region. - size_t adj = eod_len - MIN(eod_len, rose->ematcherRegionSize); - - const struct HWLM *etable = getByOffset(rose, rose->ematcherOffset); - hwlmExec(etable, eod_data, eod_len, adj, roseCallback, scratch, - scratch->tctxt.groups); - - // We may need to fire delayed matches. - if (cleanUpDelayed(rose, scratch, 0, offset) == HWLM_TERMINATE_MATCHING) { - DEBUG_PRINTF("user instructed us to stop\n"); - return HWLM_TERMINATE_MATCHING; - } - - roseFlushLastByteHistory(rose, scratch, offset); - return HWLM_CONTINUE_MATCHING; -} - -static rose_inline -int roseCheckLongLiteral(const struct RoseEngine *t, - const struct hs_scratch *scratch, u64a end, - u32 lit_offset, u32 lit_length, char nocase) { - const struct core_info *ci = &scratch->core_info; - const u8 *lit = getByOffset(t, lit_offset); - - DEBUG_PRINTF("check lit at %llu, length %u\n", end, lit_length); - DEBUG_PRINTF("base buf_offset=%llu\n", ci->buf_offset); - - if (end < lit_length) { - DEBUG_PRINTF("too short!\n"); - return 0; - } - - // If any portion of the literal matched in the current buffer, check it. - if (end > ci->buf_offset) { - u32 scan_len = MIN(end - ci->buf_offset, lit_length); - u64a scan_start = end - ci->buf_offset - scan_len; - DEBUG_PRINTF("checking suffix (%u bytes) in buf[%llu:%llu]\n", scan_len, - scan_start, end); - if (cmpForward(ci->buf + scan_start, lit + lit_length - scan_len, - scan_len, nocase)) { - DEBUG_PRINTF("cmp of suffix failed\n"); - return 0; - } - } - - // If the entirety of the literal was in the current block, we are done. - if (end - lit_length >= ci->buf_offset) { - DEBUG_PRINTF("literal confirmed in current block\n"); - return 1; - } - - // We still have a prefix which we must test against the buffer prepared by - // the long literal table. This is only done in streaming mode. - - assert(t->mode != HS_MODE_BLOCK); - - const u8 *ll_buf; - size_t ll_len; - if (nocase) { - ll_buf = scratch->tctxt.ll_buf_nocase; - ll_len = scratch->tctxt.ll_len_nocase; - } else { - ll_buf = scratch->tctxt.ll_buf; - ll_len = scratch->tctxt.ll_len; - } - - assert(ll_buf); - - u64a lit_start_offset = end - lit_length; - u32 prefix_len = MIN(lit_length, ci->buf_offset - lit_start_offset); - u32 hist_rewind = ci->buf_offset - lit_start_offset; - DEBUG_PRINTF("ll_len=%zu, hist_rewind=%u\n", ll_len, hist_rewind); - if (hist_rewind > ll_len) { - DEBUG_PRINTF("not enough history\n"); - return 0; - } - - DEBUG_PRINTF("check prefix len=%u from hist (len %zu, rewind %u)\n", - prefix_len, ll_len, hist_rewind); - assert(hist_rewind <= ll_len); - if (cmpForward(ll_buf + ll_len - hist_rewind, lit, prefix_len, nocase)) { - DEBUG_PRINTF("cmp of prefix failed\n"); - return 0; - } - - DEBUG_PRINTF("cmp succeeded\n"); - return 1; -} - -static rose_inline -int roseCheckMediumLiteral(const struct RoseEngine *t, - const struct hs_scratch *scratch, u64a end, - u32 lit_offset, u32 lit_length, char nocase) { - const struct core_info *ci = &scratch->core_info; - const u8 *lit = getByOffset(t, lit_offset); - - DEBUG_PRINTF("check lit at %llu, length %u\n", end, lit_length); - DEBUG_PRINTF("base buf_offset=%llu\n", ci->buf_offset); - - if (end < lit_length) { - DEBUG_PRINTF("too short!\n"); - return 0; - } - - // If any portion of the literal matched in the current buffer, check it. - if (end > ci->buf_offset) { - u32 scan_len = MIN(end - ci->buf_offset, lit_length); - u64a scan_start = end - ci->buf_offset - scan_len; - DEBUG_PRINTF("checking suffix (%u bytes) in buf[%llu:%llu]\n", scan_len, - scan_start, end); - if (cmpForward(ci->buf + scan_start, lit + lit_length - scan_len, - scan_len, nocase)) { - DEBUG_PRINTF("cmp of suffix failed\n"); - return 0; - } - } - - // If the entirety of the literal was in the current block, we are done. - if (end - lit_length >= ci->buf_offset) { - DEBUG_PRINTF("literal confirmed in current block\n"); - return 1; - } - - // We still have a prefix which we must test against the history buffer. - assert(t->mode != HS_MODE_BLOCK); - - u64a lit_start_offset = end - lit_length; - u32 prefix_len = MIN(lit_length, ci->buf_offset - lit_start_offset); - u32 hist_rewind = ci->buf_offset - lit_start_offset; - DEBUG_PRINTF("hlen=%zu, hist_rewind=%u\n", ci->hlen, hist_rewind); - - // History length check required for confirm in the EOD and delayed - // rebuild paths. - if (hist_rewind > ci->hlen) { - DEBUG_PRINTF("not enough history\n"); - return 0; - } - - DEBUG_PRINTF("check prefix len=%u from hist (len %zu, rewind %u)\n", - prefix_len, ci->hlen, hist_rewind); - assert(hist_rewind <= ci->hlen); - if (cmpForward(ci->hbuf + ci->hlen - hist_rewind, lit, prefix_len, - nocase)) { - DEBUG_PRINTF("cmp of prefix failed\n"); - return 0; - } - - DEBUG_PRINTF("cmp succeeded\n"); - return 1; -} - -static -void updateSeqPoint(struct RoseContext *tctxt, u64a offset, - const char from_mpv) { - if (from_mpv) { - updateMinMatchOffsetFromMpv(tctxt, offset); - } else { - updateMinMatchOffset(tctxt, offset); - } -} - -static rose_inline -hwlmcb_rv_t flushActiveCombinations(const struct RoseEngine *t, - struct hs_scratch *scratch) { - u8 *cvec = (u8 *)scratch->core_info.combVector; - if (!mmbit_any(cvec, t->ckeyCount)) { - return HWLM_CONTINUE_MATCHING; - } - u64a end = scratch->tctxt.lastCombMatchOffset; - for (u32 i = mmbit_iterate(cvec, t->ckeyCount, MMB_INVALID); - i != MMB_INVALID; i = mmbit_iterate(cvec, t->ckeyCount, i)) { - const struct CombInfo *combInfoMap = (const struct CombInfo *) - ((const char *)t + t->combInfoMapOffset); - const struct CombInfo *ci = combInfoMap + i; - if ((ci->min_offset != 0) && (end < ci->min_offset)) { - DEBUG_PRINTF("halt: before min_offset=%llu\n", ci->min_offset); - continue; - } - if ((ci->max_offset != MAX_OFFSET) && (end > ci->max_offset)) { - DEBUG_PRINTF("halt: after max_offset=%llu\n", ci->max_offset); - continue; - } - - DEBUG_PRINTF("check ekey %u\n", ci->ekey); - if (ci->ekey != INVALID_EKEY) { - assert(ci->ekey < t->ekeyCount); - const char *evec = scratch->core_info.exhaustionVector; - if (isExhausted(t, evec, ci->ekey)) { - DEBUG_PRINTF("ekey %u already set, match is exhausted\n", - ci->ekey); - continue; - } - } - - DEBUG_PRINTF("check ckey %u\n", i); - char *lvec = scratch->core_info.logicalVector; - if (!isLogicalCombination(t, lvec, ci->start, ci->result)) { - DEBUG_PRINTF("Logical Combination Failed!\n"); - continue; - } - - DEBUG_PRINTF("Logical Combination Passed!\n"); - if (roseReport(t, scratch, end, ci->id, 0, - ci->ekey) == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - } - clearCvec(t, (char *)cvec); - return HWLM_CONTINUE_MATCHING; -} - -#define PROGRAM_CASE(name) \ - case ROSE_INSTR_##name: { \ - DEBUG_PRINTF("instruction: " #name " (pc=%u)\n", \ - programOffset + (u32)(pc - pc_base)); \ - const struct ROSE_STRUCT_##name *ri = \ - (const struct ROSE_STRUCT_##name *)pc; - -#define PROGRAM_NEXT_INSTRUCTION \ - pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \ - break; \ - } - -static rose_inline -hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, - struct hs_scratch *scratch, u32 programOffset, - u64a som, u64a end, u8 prog_flags) { - DEBUG_PRINTF("program=%u, offsets [%llu,%llu], flags=%u\n", programOffset, - som, end, prog_flags); - - assert(programOffset != ROSE_INVALID_PROG_OFFSET); - assert(programOffset >= sizeof(struct RoseEngine)); - assert(programOffset < t->size); - - const char in_anchored = prog_flags & ROSE_PROG_FLAG_IN_ANCHORED; - const char in_catchup = prog_flags & ROSE_PROG_FLAG_IN_CATCHUP; - const char from_mpv = prog_flags & ROSE_PROG_FLAG_FROM_MPV; - const char skip_mpv_catchup = prog_flags & ROSE_PROG_FLAG_SKIP_MPV_CATCHUP; - - const char *pc_base = getByOffset(t, programOffset); - const char *pc = pc_base; - - // Local sparse iterator state for programs that use the SPARSE_ITER_BEGIN - // and SPARSE_ITER_NEXT instructions. - struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; - - // If this program has an effect, work_done will be set to one (which may - // allow the program to squash groups). - int work_done = 0; - - struct RoseContext *tctxt = &scratch->tctxt; - - assert(*(const u8 *)pc != ROSE_INSTR_END); - - for (;;) { - assert(ISALIGNED_N(pc, ROSE_INSTR_MIN_ALIGN)); - assert(pc >= pc_base); - assert((size_t)(pc - pc_base) < t->size); - const u8 code = *(const u8 *)pc; - assert(code <= LAST_ROSE_INSTRUCTION); - - switch ((enum RoseInstructionCode)code) { - PROGRAM_CASE(END) { - DEBUG_PRINTF("finished\n"); - return HWLM_CONTINUE_MATCHING; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(ANCHORED_DELAY) { - if (in_anchored && end > t->floatingMinLiteralMatchOffset) { - DEBUG_PRINTF("delay until playback\n"); - tctxt->groups |= ri->groups; - work_done = 1; - recordAnchoredLiteralMatch(t, scratch, ri->anch_id, end); - - assert(ri->done_jump); // must progress - pc += ri->done_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_LIT_EARLY) { - if (end < ri->min_offset) { - DEBUG_PRINTF("halt: before min_offset=%u\n", - ri->min_offset); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_GROUPS) { - DEBUG_PRINTF("groups=0x%llx, checking instr groups=0x%llx\n", - tctxt->groups, ri->groups); - if (!(ri->groups & tctxt->groups)) { - DEBUG_PRINTF("halt: no groups are set\n"); - return HWLM_CONTINUE_MATCHING; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_ONLY_EOD) { - struct core_info *ci = &scratch->core_info; - if (end != ci->buf_offset + ci->len) { - DEBUG_PRINTF("should only match at end of data\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_BOUNDS) { - if (!roseCheckBounds(end, ri->min_bound, ri->max_bound)) { - DEBUG_PRINTF("failed bounds check\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_NOT_HANDLED) { - struct fatbit *handled = scratch->handled_roles; - if (fatbit_set(handled, t->handledKeyCount, ri->key)) { - DEBUG_PRINTF("key %u already set\n", ri->key); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_SINGLE_LOOKAROUND) { - if (!roseCheckSingleLookaround(t, scratch, ri->offset, - ri->reach_index, end)) { - DEBUG_PRINTF("failed lookaround check\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_LOOKAROUND) { - if (!roseCheckLookaround(t, scratch, ri->look_index, - ri->reach_index, ri->count, end)) { - DEBUG_PRINTF("failed lookaround check\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_MASK) { - struct core_info *ci = &scratch->core_info; - if (!roseCheckMask(ci, ri->and_mask, ri->cmp_mask, - ri->neg_mask, ri->offset, end)) { - DEBUG_PRINTF("failed mask check\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_MASK_32) { - struct core_info *ci = &scratch->core_info; - if (!roseCheckMask32(ci, ri->and_mask, ri->cmp_mask, - ri->neg_mask, ri->offset, end)) { - assert(ri->fail_jump); - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_BYTE) { - const struct core_info *ci = &scratch->core_info; - if (!roseCheckByte(ci, ri->and_mask, ri->cmp_mask, - ri->negation, ri->offset, end)) { - DEBUG_PRINTF("failed byte check\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_SHUFTI_16x8) { - const struct core_info *ci = &scratch->core_info; - if (!roseCheckShufti16x8(ci, ri->nib_mask, - ri->bucket_select_mask, - ri->neg_mask, ri->offset, end)) { - assert(ri->fail_jump); - pc += ri-> fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_SHUFTI_32x8) { - const struct core_info *ci = &scratch->core_info; - if (!roseCheckShufti32x8(ci, ri->hi_mask, ri->lo_mask, - ri->bucket_select_mask, - ri->neg_mask, ri->offset, end)) { - assert(ri->fail_jump); - pc += ri-> fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_SHUFTI_16x16) { - const struct core_info *ci = &scratch->core_info; - if (!roseCheckShufti16x16(ci, ri->hi_mask, ri->lo_mask, - ri->bucket_select_mask, - ri->neg_mask, ri->offset, end)) { - assert(ri->fail_jump); - pc += ri-> fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_SHUFTI_32x16) { - const struct core_info *ci = &scratch->core_info; - if (!roseCheckShufti32x16(ci, ri->hi_mask, ri->lo_mask, - ri->bucket_select_mask_hi, - ri->bucket_select_mask_lo, - ri->neg_mask, ri->offset, end)) { - assert(ri->fail_jump); - pc += ri-> fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_INFIX) { - if (!roseTestInfix(t, scratch, ri->queue, ri->lag, ri->report, - end)) { - DEBUG_PRINTF("failed infix check\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_PREFIX) { - if (!roseTestPrefix(t, scratch, ri->queue, ri->lag, ri->report, - end)) { - DEBUG_PRINTF("failed prefix check\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(PUSH_DELAYED) { - rosePushDelayedMatch(t, scratch, ri->delay, ri->index, end); - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(DUMMY_NOP) { - assert(0); - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CATCH_UP) { - if (roseCatchUpTo(t, scratch, end) == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CATCH_UP_MPV) { - if (from_mpv || skip_mpv_catchup) { - DEBUG_PRINTF("skipping mpv catchup\n"); - } else if (roseCatchUpMPV(t, - end - scratch->core_info.buf_offset, - scratch) == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SOM_ADJUST) { - assert(ri->distance <= end); - som = end - ri->distance; - DEBUG_PRINTF("som is (end - %u) = %llu\n", ri->distance, som); - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SOM_LEFTFIX) { - som = roseGetHaigSom(t, scratch, ri->queue, ri->lag); - DEBUG_PRINTF("som from leftfix is %llu\n", som); - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SOM_FROM_REPORT) { - som = handleSomExternal(scratch, &ri->som, end); - DEBUG_PRINTF("som from report %u is %llu\n", ri->som.onmatch, - som); - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SOM_ZERO) { - DEBUG_PRINTF("setting SOM to zero\n"); - som = 0; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(TRIGGER_INFIX) { - roseTriggerInfix(t, scratch, som, end, ri->queue, ri->event, - ri->cancel); - work_done = 1; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(TRIGGER_SUFFIX) { - if (roseTriggerSuffix(t, scratch, ri->queue, ri->event, som, - end) == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - work_done = 1; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(DEDUPE) { - updateSeqPoint(tctxt, end, from_mpv); - const char do_som = t->hasSom; // TODO: constant propagate - const char is_external_report = 1; - enum DedupeResult rv = - dedupeCatchup(t, scratch, end, som, end + ri->offset_adjust, - ri->dkey, ri->offset_adjust, - is_external_report, ri->quash_som, do_som); - switch (rv) { - case DEDUPE_HALT: - return HWLM_TERMINATE_MATCHING; - case DEDUPE_SKIP: - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - case DEDUPE_CONTINUE: - break; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(DEDUPE_SOM) { - updateSeqPoint(tctxt, end, from_mpv); - const char is_external_report = 0; - const char do_som = 1; - enum DedupeResult rv = - dedupeCatchup(t, scratch, end, som, end + ri->offset_adjust, - ri->dkey, ri->offset_adjust, - is_external_report, ri->quash_som, do_som); - switch (rv) { - case DEDUPE_HALT: - return HWLM_TERMINATE_MATCHING; - case DEDUPE_SKIP: - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - case DEDUPE_CONTINUE: - break; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(REPORT_CHAIN) { - // Note: sequence points updated inside this function. - if (roseCatchUpAndHandleChainMatch( - t, scratch, ri->event, ri->top_squash_distance, end, - in_catchup) == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - work_done = 1; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(REPORT_SOM_INT) { - updateSeqPoint(tctxt, end, from_mpv); - roseHandleSom(scratch, &ri->som, end); - work_done = 1; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(REPORT_SOM_AWARE) { - updateSeqPoint(tctxt, end, from_mpv); - roseHandleSomSom(scratch, &ri->som, som, end); - work_done = 1; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(REPORT) { - updateSeqPoint(tctxt, end, from_mpv); - if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust, - INVALID_EKEY) == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - work_done = 1; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(REPORT_EXHAUST) { - updateSeqPoint(tctxt, end, from_mpv); - if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust, - ri->ekey) == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - work_done = 1; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(REPORT_SOM) { - updateSeqPoint(tctxt, end, from_mpv); - if (roseReportSom(t, scratch, som, end, ri->onmatch, - ri->offset_adjust, - INVALID_EKEY) == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - work_done = 1; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(REPORT_SOM_EXHAUST) { - updateSeqPoint(tctxt, end, from_mpv); - if (roseReportSom(t, scratch, som, end, ri->onmatch, - ri->offset_adjust, - ri->ekey) == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - work_done = 1; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(DEDUPE_AND_REPORT) { - updateSeqPoint(tctxt, end, from_mpv); - const char do_som = t->hasSom; // TODO: constant propagate - const char is_external_report = 1; - enum DedupeResult rv = - dedupeCatchup(t, scratch, end, som, end + ri->offset_adjust, - ri->dkey, ri->offset_adjust, - is_external_report, ri->quash_som, do_som); - switch (rv) { - case DEDUPE_HALT: - return HWLM_TERMINATE_MATCHING; - case DEDUPE_SKIP: - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - case DEDUPE_CONTINUE: - break; - } - - const u32 ekey = INVALID_EKEY; - if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust, - ekey) == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - work_done = 1; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(FINAL_REPORT) { - updateSeqPoint(tctxt, end, from_mpv); - if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust, - INVALID_EKEY) == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - /* One-shot specialisation: this instruction always terminates - * execution of the program. */ - return HWLM_CONTINUE_MATCHING; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_EXHAUSTED) { - DEBUG_PRINTF("check ekey %u\n", ri->ekey); - assert(ri->ekey != INVALID_EKEY); - assert(ri->ekey < t->ekeyCount); - const char *evec = scratch->core_info.exhaustionVector; - if (isExhausted(t, evec, ri->ekey)) { - DEBUG_PRINTF("ekey %u already set, match is exhausted\n", - ri->ekey); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_MIN_LENGTH) { - DEBUG_PRINTF("check min length %llu (adj %d)\n", ri->min_length, - ri->end_adj); - assert(ri->min_length > 0); - assert(ri->end_adj == 0 || ri->end_adj == -1); - assert(som == HS_OFFSET_PAST_HORIZON || som <= end); - if (som != HS_OFFSET_PAST_HORIZON && - ((end + ri->end_adj) - som < ri->min_length)) { - DEBUG_PRINTF("failed check, match len %llu\n", - (u64a)((end + ri->end_adj) - som)); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SET_STATE) { - DEBUG_PRINTF("set state index %u\n", ri->index); - mmbit_set(getRoleState(scratch->core_info.state), - t->rolesWithStateCount, ri->index); - work_done = 1; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SET_GROUPS) { - tctxt->groups |= ri->groups; - DEBUG_PRINTF("set groups 0x%llx -> 0x%llx\n", ri->groups, - tctxt->groups); - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SQUASH_GROUPS) { - assert(popcount64(ri->groups) == 63); // Squash only one group. - if (work_done) { - tctxt->groups &= ri->groups; - DEBUG_PRINTF("squash groups 0x%llx -> 0x%llx\n", ri->groups, - tctxt->groups); - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_STATE) { - DEBUG_PRINTF("check state %u\n", ri->index); - const u8 *roles = getRoleState(scratch->core_info.state); - if (!mmbit_isset(roles, t->rolesWithStateCount, ri->index)) { - DEBUG_PRINTF("state not on\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SPARSE_ITER_BEGIN) { - DEBUG_PRINTF("iter_offset=%u\n", ri->iter_offset); - const struct mmbit_sparse_iter *it = - getByOffset(t, ri->iter_offset); - assert(ISALIGNED(it)); - - const u8 *roles = getRoleState(scratch->core_info.state); - - u32 idx = 0; - u32 i = mmbit_sparse_iter_begin(roles, t->rolesWithStateCount, - &idx, it, si_state); - if (i == MMB_INVALID) { - DEBUG_PRINTF("no states in sparse iter are on\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - - fatbit_clear(scratch->handled_roles); - - const u32 *jumps = getByOffset(t, ri->jump_table); - DEBUG_PRINTF("state %u (idx=%u) is on, jump to %u\n", i, idx, - jumps[idx]); - pc = pc_base + jumps[idx]; - continue; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SPARSE_ITER_NEXT) { - DEBUG_PRINTF("iter_offset=%u, state=%u\n", ri->iter_offset, - ri->state); - const struct mmbit_sparse_iter *it = - getByOffset(t, ri->iter_offset); - assert(ISALIGNED(it)); - - const u8 *roles = getRoleState(scratch->core_info.state); - - u32 idx = 0; - u32 i = mmbit_sparse_iter_next(roles, t->rolesWithStateCount, - ri->state, &idx, it, si_state); - if (i == MMB_INVALID) { - DEBUG_PRINTF("no more states in sparse iter are on\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - - const u32 *jumps = getByOffset(t, ri->jump_table); - DEBUG_PRINTF("state %u (idx=%u) is on, jump to %u\n", i, idx, - jumps[idx]); - pc = pc_base + jumps[idx]; - continue; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SPARSE_ITER_ANY) { - DEBUG_PRINTF("iter_offset=%u\n", ri->iter_offset); - const struct mmbit_sparse_iter *it = - getByOffset(t, ri->iter_offset); - assert(ISALIGNED(it)); - - const u8 *roles = getRoleState(scratch->core_info.state); - - u32 idx = 0; - u32 i = mmbit_sparse_iter_begin(roles, t->rolesWithStateCount, - &idx, it, si_state); - if (i == MMB_INVALID) { - DEBUG_PRINTF("no states in sparse iter are on\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - DEBUG_PRINTF("state %u (idx=%u) is on\n", i, idx); - fatbit_clear(scratch->handled_roles); - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(ENGINES_EOD) { - if (roseEnginesEod(t, scratch, end, ri->iter_offset) == - HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SUFFIXES_EOD) { - if (roseSuffixesEod(t, scratch, end) == - HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(MATCHER_EOD) { - if (roseMatcherEod(t, scratch, end) == - HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_LONG_LIT) { - const char nocase = 0; - if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset, - ri->lit_length, nocase)) { - DEBUG_PRINTF("failed long lit check\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_LONG_LIT_NOCASE) { - const char nocase = 1; - if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset, - ri->lit_length, nocase)) { - DEBUG_PRINTF("failed nocase long lit check\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_MED_LIT) { - const char nocase = 0; - if (!roseCheckMediumLiteral(t, scratch, end, ri->lit_offset, - ri->lit_length, nocase)) { - DEBUG_PRINTF("failed lit check\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_MED_LIT_NOCASE) { - const char nocase = 1; - if (!roseCheckMediumLiteral(t, scratch, end, ri->lit_offset, - ri->lit_length, nocase)) { - DEBUG_PRINTF("failed long lit check\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CLEAR_WORK_DONE) { - DEBUG_PRINTF("clear work_done flag\n"); - work_done = 0; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(MULTIPATH_LOOKAROUND) { - if (!roseMultipathLookaround(t, scratch, ri->look_index, - ri->reach_index, ri->count, - ri->last_start, ri->start_mask, - end)) { - DEBUG_PRINTF("failed multi-path lookaround check\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_16x8) { - if (!roseCheckMultipathShufti16x8(scratch, ri, end)) { - DEBUG_PRINTF("failed multi-path shufti 16x8 check\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_32x8) { - if (!roseCheckMultipathShufti32x8(scratch, ri, end)) { - DEBUG_PRINTF("failed multi-path shufti 32x8 check\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_32x16) { - if (!roseCheckMultipathShufti32x16(scratch, ri, end)) { - DEBUG_PRINTF("failed multi-path shufti 32x16 check\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_64) { - if (!roseCheckMultipathShufti64(scratch, ri, end)) { - DEBUG_PRINTF("failed multi-path shufti 64 check\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(INCLUDED_JUMP) { - if (scratch->fdr_conf) { - // squash the bucket of included literal - u8 shift = scratch->fdr_conf_offset & ~7U; - u64a mask = ((~(u64a)ri->squash) << shift); - *(scratch->fdr_conf) &= mask; - - pc = getByOffset(t, ri->child_offset); - pc_base = pc; - programOffset = (const u8 *)pc_base -(const u8 *)t; - DEBUG_PRINTF("pc_base %p pc %p child_offset %u squash %u\n", - pc_base, pc, ri->child_offset, ri->squash); - work_done = 0; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SET_LOGICAL) { - DEBUG_PRINTF("set logical value of lkey %u, offset_adjust=%d\n", - ri->lkey, ri->offset_adjust); - assert(ri->lkey != INVALID_LKEY); - assert(ri->lkey < t->lkeyCount); - char *lvec = scratch->core_info.logicalVector; - setLogicalVal(t, lvec, ri->lkey, 1); - updateLastCombMatchOffset(tctxt, end + ri->offset_adjust); - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SET_COMBINATION) { - DEBUG_PRINTF("set ckey %u as active\n", ri->ckey); - assert(ri->ckey != INVALID_CKEY); - assert(ri->ckey < t->ckeyCount); - char *cvec = scratch->core_info.combVector; - setCombinationActive(t, cvec, ri->ckey); - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(FLUSH_COMBINATION) { - assert(end >= tctxt->lastCombMatchOffset); - if (end > tctxt->lastCombMatchOffset) { - if (flushActiveCombinations(t, scratch) - == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SET_EXHAUST) { - updateSeqPoint(tctxt, end, from_mpv); - if (roseSetExhaust(t, scratch, ri->ekey) - == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - work_done = 1; - } - PROGRAM_NEXT_INSTRUCTION - } - } - - assert(0); // unreachable - return HWLM_CONTINUE_MATCHING; -} - -#undef PROGRAM_CASE -#undef PROGRAM_NEXT_INSTRUCTION - #endif // PROGRAM_RUNTIME_H diff --git a/src/rose/stream_long_lit.h b/src/rose/stream_long_lit.h index 0736ec88..34867608 100644 --- a/src/rose/stream_long_lit.h +++ b/src/rose/stream_long_lit.h @@ -33,6 +33,7 @@ #include "rose_common.h" #include "rose_internal.h" #include "stream_long_lit_hash.h" +#include "util/compare.h" #include "util/copybytes.h" static really_inline From 6f3a0a323eca78f7be3b37045443bd474bcd8333 Mon Sep 17 00:00:00 2001 From: "Wang, Xiang W" Date: Fri, 21 Dec 2018 21:46:04 -0500 Subject: [PATCH 08/21] Silence clang warnings about unused variable --- src/rose/rose_build_dump.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index 0cc5b5c3..2eb7bb51 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -115,9 +115,9 @@ class RoseGraphWriter { public: RoseGraphWriter(const RoseBuildImpl &b_in, const map &frag_map_in, const map &lqm_in, - const map &sqm_in, const RoseEngine *t_in) + const map &sqm_in) : frag_map(frag_map_in), leftfix_queue_map(lqm_in), - suffix_queue_map(sqm_in), build(b_in), t(t_in) { + suffix_queue_map(sqm_in), build(b_in) { for (const auto &m : build.ghost) { ghost.insert(m.second); } @@ -273,7 +273,6 @@ private: const map &leftfix_queue_map; const map &suffix_queue_map; const RoseBuildImpl &build; - const RoseEngine *t; }; } // namespace @@ -313,8 +312,7 @@ void dumpRoseGraph(const RoseBuildImpl &build, const RoseEngine *t, ofstream os(ss.str()); auto frag_map = makeFragMap(fragments); - RoseGraphWriter writer(build, frag_map, leftfix_queue_map, suffix_queue_map, - t); + RoseGraphWriter writer(build, frag_map, leftfix_queue_map, suffix_queue_map); writeGraphviz(os, build.g, writer, get(boost::vertex_index, build.g)); } From 805a550a0aa5b137eb989558d7969cc2964954d6 Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Mon, 24 Dec 2018 04:49:47 +0800 Subject: [PATCH 09/21] mcclellan: wide state fixes for sanitisers and accept state construction --- src/nfa/mcclellan.c | 10 +++++----- src/nfa/mcclellan_common_impl.h | 2 +- src/nfa/mcclellan_internal.h | 4 ++-- src/nfa/mcclellancompile.cpp | 30 +++++++++++++++++------------- 4 files changed, 25 insertions(+), 21 deletions(-) diff --git a/src/nfa/mcclellan.c b/src/nfa/mcclellan.c index 1521de5b..71f71e32 100644 --- a/src/nfa/mcclellan.c +++ b/src/nfa/mcclellan.c @@ -1197,7 +1197,7 @@ char nfaExecMcClellan16_initCompressedState(const struct NFA *nfa, u64a offset, // new byte if (m->has_wide) { - *((u16 *)state + 1) = 0; + unaligned_store_u16((u16 *)state + 1, 0); } if (s) { @@ -1236,7 +1236,7 @@ void nfaExecMcClellan16_SimpStream(const struct NFA *nfa, char *state, // new byte if (m->has_wide) { - *((u16 *)state + 1) = 0; + unaligned_store_u16((u16 *)state + 1, 0); } } else { s = unaligned_load_u16(state); @@ -1285,7 +1285,7 @@ char nfaExecMcClellan16_queueInitState(UNUSED const struct NFA *nfa, // new byte if (m->has_wide) { - *((u16 *)q->state + 1) = 0; + unaligned_store_u16((u16 *)q->state + 1, 0); } return 0; } @@ -1325,7 +1325,7 @@ char nfaExecMcClellan16_queueCompressState(UNUSED const struct NFA *nfa, // new byte if (m->has_wide) { - *((u16 *)dest + 1) = *((const u16 *)src + 1); + unaligned_store_u16((u16 *)dest + 1, *((const u16 *)src + 1)); } return 0; } @@ -1344,7 +1344,7 @@ char nfaExecMcClellan16_expandState(UNUSED const struct NFA *nfa, void *dest, // new byte if (m->has_wide) { - *((u16 *)dest + 1) = *((const u16 *)src + 1); + *((u16 *)dest + 1) = unaligned_load_u16((const u16 *)src + 1); } return 0; } diff --git a/src/nfa/mcclellan_common_impl.h b/src/nfa/mcclellan_common_impl.h index b6af672d..7b0e7f48 100644 --- a/src/nfa/mcclellan_common_impl.h +++ b/src/nfa/mcclellan_common_impl.h @@ -88,7 +88,7 @@ u16 doWide16(const char *wide_entry, const u8 **c_inout, const u8 *end, const u8 *remap, const u16 *s, char *qstate, u16 *offset) { // Internal relative offset after the last visit of the wide state. if (qstate != NULL) { // stream mode - *offset = *(const u16 *)(qstate + 2); + *offset = unaligned_load_u16((const u16 *)(qstate + 2)); } u8 successful = 0; diff --git a/src/nfa/mcclellan_internal.h b/src/nfa/mcclellan_internal.h index 0981f99e..482fdb1b 100644 --- a/src/nfa/mcclellan_internal.h +++ b/src/nfa/mcclellan_internal.h @@ -52,13 +52,13 @@ extern "C" #define WIDE_STATE 2 #define WIDE_ENTRY_OFFSET8(weo_pos) (2 + (weo_pos)) -#define WIDE_ENTRY_OFFSET16(weo_pos) (3 + (weo_pos)) +#define WIDE_ENTRY_OFFSET16(weo_pos) (4 + (weo_pos)) #define WIDE_WIDTH_OFFSET 0 #define WIDE_SYMBOL_OFFSET8 1 #define WIDE_TRANSITION_OFFSET8(wto_width) (1 + (wto_width)) #define WIDE_SYMBOL_OFFSET16 2 -#define WIDE_TRANSITION_OFFSET16(wto_width) (2 + (wto_width)) +#define WIDE_TRANSITION_OFFSET16(wto_width) (2 + ROUNDUP_N(wto_width, 2)) struct report_list { u32 count; diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index db142f86..c1a4f87f 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -261,22 +261,24 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) { // check successful transition u16 next = unaligned_load_u16((u8 *)trans); - if (next >= wide_limit) { - continue; + if (next < wide_limit) { + mstate_aux *aux = getAux(n, next); + if (aux->accept) { + next |= ACCEPT_FLAG; + } + if (aux->accel_offset) { + next |= ACCEL_FLAG; + } + unaligned_store_u16((u8 *)trans, next); } - mstate_aux *aux = getAux(n, next); - if (aux->accept) { - next |= ACCEPT_FLAG; - } - if (aux->accel_offset) { - next |= ACCEL_FLAG; - } - unaligned_store_u16((u8 *)trans, next); - trans ++; + trans++; // check failure transition for (symbol_t k = 0; k < alphaSize; k++) { u16 next_k = unaligned_load_u16((u8 *)&trans[k]); + if (next_k >= wide_limit) { + continue; + } mstate_aux *aux_k = getAux(n, next_k); if (aux_k->accept) { next_k |= ACCEPT_FLAG; @@ -525,11 +527,12 @@ size_t calcWideRegionSize(const dfa_info &info) { } // wide info header - size_t rv = info.wide_symbol_chain.size() * sizeof(u32) + 3; + size_t rv = info.wide_symbol_chain.size() * sizeof(u32) + 4; // wide info body for (const auto &chain : info.wide_symbol_chain) { - rv += chain.size() + (info.impl_alpha_size + 1) * sizeof(u16) + 2; + rv += ROUNDUP_N(chain.size(), 2) + + (info.impl_alpha_size + 1) * sizeof(u16) + 2; } return ROUNDUP_16(rv); @@ -776,6 +779,7 @@ bytecode_ptr mcclellanCompile16(dfa_info &info, const CompileContext &cc, char *wide_top = wide_base; *(u8 *)(wide_top++) = WIDE_STATE; + wide_top = ROUNDUP_PTR(wide_top, 2); *(u16 *)(wide_top) = wide_number; wide_top += 2; From 35060958e40178f54daf7117c846da4c4921de8f Mon Sep 17 00:00:00 2001 From: "Wang, Xiang W" Date: Sun, 23 Dec 2018 21:37:30 -0500 Subject: [PATCH 10/21] doc: fix github repo address --- doc/dev-reference/getting_started.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/dev-reference/getting_started.rst b/doc/dev-reference/getting_started.rst index 4e4d36f3..45d4fbbb 100644 --- a/doc/dev-reference/getting_started.rst +++ b/doc/dev-reference/getting_started.rst @@ -10,7 +10,7 @@ Very Quick Start #. Clone Hyperscan :: cd - git clone git://github/intel/hyperscan + git clone git://github.com/intel/hyperscan #. Configure Hyperscan From ff9636e0229970b8830dc5239a3f20b39f177b42 Mon Sep 17 00:00:00 2001 From: "Wang, Xiang W" Date: Mon, 24 Dec 2018 00:45:59 -0500 Subject: [PATCH 11/21] rose: disable switch optimization for windows --- src/rose/program_runtime.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/rose/program_runtime.c b/src/rose/program_runtime.c index 1c6133ba..4fffcd86 100644 --- a/src/rose/program_runtime.c +++ b/src/rose/program_runtime.c @@ -1875,6 +1875,7 @@ hwlmcb_rv_t flushActiveCombinations(const struct RoseEngine *t, return HWLM_CONTINUE_MATCHING; } +#if !defined(_WIN32) #define PROGRAM_CASE(name) \ case ROSE_INSTR_##name: { \ LABEL_ROSE_INSTR_##name: \ @@ -1890,6 +1891,21 @@ hwlmcb_rv_t flushActiveCombinations(const struct RoseEngine *t, #define PROGRAM_NEXT_INSTRUCTION_JUMP \ goto *(next_instr[*(const u8 *)pc]); +#else +#define PROGRAM_CASE(name) \ + case ROSE_INSTR_##name: { \ + DEBUG_PRINTF("instruction: " #name " (pc=%u)\n", \ + programOffset + (u32)(pc - pc_base)); \ + const struct ROSE_STRUCT_##name *ri = \ + (const struct ROSE_STRUCT_##name *)pc; + +#define PROGRAM_NEXT_INSTRUCTION \ + pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \ + break; \ + } + +#define PROGRAM_NEXT_INSTRUCTION_JUMP +#endif hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, struct hs_scratch *scratch, u32 programOffset, @@ -1921,6 +1937,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, assert(*(const u8 *)pc != ROSE_INSTR_END); +#if !defined(_WIN32) static const void *next_instr[] = { &&LABEL_ROSE_INSTR_END, //!< End of program. &&LABEL_ROSE_INSTR_ANCHORED_DELAY, //!< Delay until after anchored matcher. @@ -1989,6 +2006,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, &&LABEL_ROSE_INSTR_FLUSH_COMBINATION, &&LABEL_ROSE_INSTR_SET_EXHAUST }; +#endif for (;;) { assert(ISALIGNED_N(pc, ROSE_INSTR_MIN_ALIGN)); From 5ad56093f892094fa637c0e96be1a6fdeede5329 Mon Sep 17 00:00:00 2001 From: "Wang, Xiang W" Date: Mon, 24 Dec 2018 01:32:02 -0500 Subject: [PATCH 12/21] hscollider: fix back inserter error for windows --- tools/hscollider/main.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/hscollider/main.cpp b/tools/hscollider/main.cpp index ec7cd6be..18d7a016 100644 --- a/tools/hscollider/main.cpp +++ b/tools/hscollider/main.cpp @@ -61,6 +61,7 @@ #include #include #include +#include #include #include #include From 9ab674b18ecadb988a3b8d0e6b5db34d35d2a5a8 Mon Sep 17 00:00:00 2001 From: "Chang, Harry" Date: Tue, 25 Dec 2018 13:49:29 +0800 Subject: [PATCH 13/21] fix dead loop under win10 release bin. --- src/rose/program_runtime.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rose/program_runtime.c b/src/rose/program_runtime.c index 4fffcd86..3350e167 100644 --- a/src/rose/program_runtime.c +++ b/src/rose/program_runtime.c @@ -1904,7 +1904,7 @@ hwlmcb_rv_t flushActiveCombinations(const struct RoseEngine *t, break; \ } -#define PROGRAM_NEXT_INSTRUCTION_JUMP +#define PROGRAM_NEXT_INSTRUCTION_JUMP continue; #endif hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, From 63e7d89fccb25748fca9463618eb25cf3027fa0d Mon Sep 17 00:00:00 2001 From: "Wang, Xiang W" Date: Tue, 25 Dec 2018 23:19:51 -0500 Subject: [PATCH 14/21] build: avoid pcre error for hscollider when using installed PCRE package Fixes github issue #127 --- tools/hscollider/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/hscollider/CMakeLists.txt b/tools/hscollider/CMakeLists.txt index 4684964f..a4d71b2f 100644 --- a/tools/hscollider/CMakeLists.txt +++ b/tools/hscollider/CMakeLists.txt @@ -64,7 +64,6 @@ set_source_files_properties(${hscollider_SOURCES} PROPERTIES INCLUDE_DIRECTORIES ${CMAKE_CURRENT_SOURCE_DIR}) add_executable(hscollider ${hscollider_SOURCES}) add_dependencies(hscollider ragel_ColliderCorporaParser) -add_dependencies(hscollider pcre) if(NOT WIN32) if (BUILD_CHIMERA) From f68723a6061483a0a242cb3a6f65c3b259f9e669 Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Wed, 9 Jan 2019 06:00:55 +0800 Subject: [PATCH 15/21] literal matching: separate path for pure literal patterns --- src/fdr/fdr_compile.cpp | 5 +- src/fdr/fdr_confirm.h | 3 +- src/fdr/fdr_confirm_compile.cpp | 3 +- src/fdr/fdr_confirm_runtime.h | 4 +- src/hwlm/hwlm_literal.cpp | 7 +- src/hwlm/hwlm_literal.h | 11 +- src/parser/shortcut_literal.cpp | 3 +- src/rose/match.c | 8 +- src/rose/program_runtime.c | 289 ++++++++++++++++++++++++++++++- src/rose/program_runtime.h | 6 +- src/rose/rose_build_bytecode.cpp | 33 +++- src/rose/rose_build_impl.h | 11 +- src/rose/rose_build_matchers.cpp | 5 +- src/runtime.c | 3 +- src/scratch.c | 3 +- src/scratch.h | 3 +- src/util/ue2string.h | 9 +- 17 files changed, 379 insertions(+), 27 deletions(-) diff --git a/src/fdr/fdr_compile.cpp b/src/fdr/fdr_compile.cpp index 5e3c6a4e..65c5020e 100644 --- a/src/fdr/fdr_compile.cpp +++ b/src/fdr/fdr_compile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -807,6 +807,9 @@ void findIncludedLits(vector &lits, for (size_t i = 0; i < cnt; i++) { u32 bucket1 = group[i].first; u32 id1 = group[i].second; + if (lits[id1].pure) { + continue; + } buildSquashMask(lits, id1, bucket1, i + 1, group, parent_map, exception_map); } diff --git a/src/fdr/fdr_confirm.h b/src/fdr/fdr_confirm.h index d975747e..9490df43 100644 --- a/src/fdr/fdr_confirm.h +++ b/src/fdr/fdr_confirm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -62,6 +62,7 @@ struct LitInfo { u8 size; u8 flags; //!< bitfield of flags from FDR_LIT_FLAG_* above. u8 next; + u8 pure; //!< The pass-on of pure flag from hwlmLiteral. }; #define FDRC_FLAG_NO_CONFIRM 1 diff --git a/src/fdr/fdr_confirm_compile.cpp b/src/fdr/fdr_confirm_compile.cpp index c75f8d17..3eab21b2 100644 --- a/src/fdr/fdr_confirm_compile.cpp +++ b/src/fdr/fdr_confirm_compile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -87,6 +87,7 @@ void fillLitInfo(const vector &lits, vector &tmpLitInfo, info.flags = flags; info.size = verify_u8(max(lit.msk.size(), lit.s.size())); info.groups = lit.groups; + info.pure = lit.pure; // these are built up assuming a LE machine CONF_TYPE msk = all_ones; diff --git a/src/fdr/fdr_confirm_runtime.h b/src/fdr/fdr_confirm_runtime.h index 067e50e2..67e0d692 100644 --- a/src/fdr/fdr_confirm_runtime.h +++ b/src/fdr/fdr_confirm_runtime.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -65,6 +65,7 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a u8 oldNext; // initialized in loop do { assert(ISALIGNED(li)); + scratch->pure = li->pure; if (unlikely((conf_key & li->msk) != li->v)) { goto out; @@ -99,6 +100,7 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a li++; } while (oldNext); scratch->fdr_conf = NULL; + scratch->pure = 0; } #endif diff --git a/src/hwlm/hwlm_literal.cpp b/src/hwlm/hwlm_literal.cpp index b0968d79..b257dfb0 100644 --- a/src/hwlm/hwlm_literal.cpp +++ b/src/hwlm/hwlm_literal.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -83,9 +83,10 @@ bool maskIsConsistent(const std::string &s, bool nocase, const vector &msk, * \ref HWLM_MASKLEN. */ hwlmLiteral::hwlmLiteral(const std::string &s_in, bool nocase_in, bool noruns_in, u32 id_in, hwlm_group_t groups_in, - const vector &msk_in, const vector &cmp_in) + const vector &msk_in, const vector &cmp_in, + bool pure_in) : s(s_in), id(id_in), nocase(nocase_in), noruns(noruns_in), - groups(groups_in), msk(msk_in), cmp(cmp_in) { + groups(groups_in), msk(msk_in), cmp(cmp_in), pure(pure_in) { assert(s.size() <= HWLM_LITERAL_MAX_LEN); assert(msk.size() <= HWLM_MASKLEN); assert(msk.size() == cmp.size()); diff --git a/src/hwlm/hwlm_literal.h b/src/hwlm/hwlm_literal.h index 08510fb0..72a57f94 100644 --- a/src/hwlm/hwlm_literal.h +++ b/src/hwlm/hwlm_literal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -113,15 +113,20 @@ struct hwlmLiteral { */ std::vector cmp; + bool pure; //!< \brief The pass-on of pure flag from LitFragment. + /** \brief Complete constructor, takes group information and msk/cmp. * * This constructor takes a msk/cmp pair. Both must be vectors of length <= * \ref HWLM_MASKLEN. */ hwlmLiteral(const std::string &s_in, bool nocase_in, bool noruns_in, u32 id_in, hwlm_group_t groups_in, - const std::vector &msk_in, const std::vector &cmp_in); + const std::vector &msk_in, const std::vector &cmp_in, + bool pure_in = false); - /** \brief Simple constructor: no group information, no msk/cmp. */ + /** \brief Simple constructor: no group information, no msk/cmp. + * + * This constructor is only used in internal unit test. */ hwlmLiteral(const std::string &s_in, bool nocase_in, u32 id_in) : hwlmLiteral(s_in, nocase_in, false, id_in, HWLM_ALL_GROUPS, {}, {}) {} }; diff --git a/src/parser/shortcut_literal.cpp b/src/parser/shortcut_literal.cpp index 7a7ab6ee..d08bab3c 100644 --- a/src/parser/shortcut_literal.cpp +++ b/src/parser/shortcut_literal.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -185,6 +185,7 @@ bool shortcutLiteral(NG &ng, const ParsedExpression &pe) { return false; } + vis.lit.set_pure(); const ue2_literal &lit = vis.lit; if (lit.empty()) { diff --git a/src/rose/match.c b/src/rose/match.c index 8ad58b15..192b4709 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -238,7 +238,11 @@ hwlmcb_rv_t roseProcessMatchInline(const struct RoseEngine *t, assert(id && id < t->size); // id is an offset into bytecode const u64a som = 0; const u8 flags = 0; - return roseRunProgram(t, scratch, id, som, end, flags); + if (!scratch->pure) { + return roseRunProgram(t, scratch, id, som, end, flags); + } else { + return roseRunProgram_l(t, scratch, id, som, end, flags); + } } static rose_inline diff --git a/src/rose/program_runtime.c b/src/rose/program_runtime.c index 3350e167..5a7f786e 100644 --- a/src/rose/program_runtime.c +++ b/src/rose/program_runtime.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -2778,6 +2778,293 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, return HWLM_CONTINUE_MATCHING; } +#define L_PROGRAM_CASE(name) \ + case ROSE_INSTR_##name: { \ + DEBUG_PRINTF("l_instruction: " #name " (pc=%u)\n", \ + programOffset + (u32)(pc - pc_base)); \ + const struct ROSE_STRUCT_##name *ri = \ + (const struct ROSE_STRUCT_##name *)pc; + +#define L_PROGRAM_NEXT_INSTRUCTION \ + pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \ + break; \ + } + +#define L_PROGRAM_NEXT_INSTRUCTION_JUMP continue; + +hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t, + struct hs_scratch *scratch, u32 programOffset, + u64a som, u64a end, u8 prog_flags) { + DEBUG_PRINTF("program=%u, offsets [%llu,%llu], flags=%u\n", programOffset, + som, end, prog_flags); + + assert(programOffset != ROSE_INVALID_PROG_OFFSET); + assert(programOffset >= sizeof(struct RoseEngine)); + assert(programOffset < t->size); + + const char from_mpv = prog_flags & ROSE_PROG_FLAG_FROM_MPV; + + const char *pc_base = getByOffset(t, programOffset); + const char *pc = pc_base; + + struct RoseContext *tctxt = &scratch->tctxt; + + assert(*(const u8 *)pc != ROSE_INSTR_END); + + for (;;) { + assert(ISALIGNED_N(pc, ROSE_INSTR_MIN_ALIGN)); + assert(pc >= pc_base); + assert((size_t)(pc - pc_base) < t->size); + const u8 code = *(const u8 *)pc; + assert(code <= LAST_ROSE_INSTRUCTION); + + switch ((enum RoseInstructionCode)code) { + L_PROGRAM_CASE(END) { + DEBUG_PRINTF("finished\n"); + return HWLM_CONTINUE_MATCHING; + } + L_PROGRAM_NEXT_INSTRUCTION + + L_PROGRAM_CASE(CATCH_UP) { + if (roseCatchUpTo(t, scratch, end) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + L_PROGRAM_NEXT_INSTRUCTION + + L_PROGRAM_CASE(SOM_FROM_REPORT) { + som = handleSomExternal(scratch, &ri->som, end); + DEBUG_PRINTF("som from report %u is %llu\n", ri->som.onmatch, + som); + } + L_PROGRAM_NEXT_INSTRUCTION + + L_PROGRAM_CASE(DEDUPE) { + updateSeqPoint(tctxt, end, from_mpv); + const char do_som = t->hasSom; // TODO: constant propagate + const char is_external_report = 1; + enum DedupeResult rv = + dedupeCatchup(t, scratch, end, som, end + ri->offset_adjust, + ri->dkey, ri->offset_adjust, + is_external_report, ri->quash_som, do_som); + switch (rv) { + case DEDUPE_HALT: + return HWLM_TERMINATE_MATCHING; + case DEDUPE_SKIP: + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + L_PROGRAM_NEXT_INSTRUCTION_JUMP + case DEDUPE_CONTINUE: + break; + } + } + L_PROGRAM_NEXT_INSTRUCTION + + L_PROGRAM_CASE(DEDUPE_SOM) { + updateSeqPoint(tctxt, end, from_mpv); + const char is_external_report = 0; + const char do_som = 1; + enum DedupeResult rv = + dedupeCatchup(t, scratch, end, som, end + ri->offset_adjust, + ri->dkey, ri->offset_adjust, + is_external_report, ri->quash_som, do_som); + switch (rv) { + case DEDUPE_HALT: + return HWLM_TERMINATE_MATCHING; + case DEDUPE_SKIP: + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + L_PROGRAM_NEXT_INSTRUCTION_JUMP + case DEDUPE_CONTINUE: + break; + } + } + L_PROGRAM_NEXT_INSTRUCTION + + L_PROGRAM_CASE(REPORT) { + updateSeqPoint(tctxt, end, from_mpv); + if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust, + INVALID_EKEY) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + L_PROGRAM_NEXT_INSTRUCTION + + L_PROGRAM_CASE(REPORT_EXHAUST) { + updateSeqPoint(tctxt, end, from_mpv); + if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust, + ri->ekey) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + L_PROGRAM_NEXT_INSTRUCTION + + L_PROGRAM_CASE(REPORT_SOM) { + updateSeqPoint(tctxt, end, from_mpv); + if (roseReportSom(t, scratch, som, end, ri->onmatch, + ri->offset_adjust, + INVALID_EKEY) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + L_PROGRAM_NEXT_INSTRUCTION + + L_PROGRAM_CASE(DEDUPE_AND_REPORT) { + updateSeqPoint(tctxt, end, from_mpv); + const char do_som = t->hasSom; // TODO: constant propagate + const char is_external_report = 1; + enum DedupeResult rv = + dedupeCatchup(t, scratch, end, som, end + ri->offset_adjust, + ri->dkey, ri->offset_adjust, + is_external_report, ri->quash_som, do_som); + switch (rv) { + case DEDUPE_HALT: + return HWLM_TERMINATE_MATCHING; + case DEDUPE_SKIP: + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + L_PROGRAM_NEXT_INSTRUCTION_JUMP + case DEDUPE_CONTINUE: + break; + } + + const u32 ekey = INVALID_EKEY; + if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust, + ekey) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + L_PROGRAM_NEXT_INSTRUCTION + + L_PROGRAM_CASE(FINAL_REPORT) { + updateSeqPoint(tctxt, end, from_mpv); + if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust, + INVALID_EKEY) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + /* One-shot specialisation: this instruction always terminates + * execution of the program. */ + return HWLM_CONTINUE_MATCHING; + } + L_PROGRAM_NEXT_INSTRUCTION + + L_PROGRAM_CASE(CHECK_EXHAUSTED) { + DEBUG_PRINTF("check ekey %u\n", ri->ekey); + assert(ri->ekey != INVALID_EKEY); + assert(ri->ekey < t->ekeyCount); + const char *evec = scratch->core_info.exhaustionVector; + if (isExhausted(t, evec, ri->ekey)) { + DEBUG_PRINTF("ekey %u already set, match is exhausted\n", + ri->ekey); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + L_PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + L_PROGRAM_NEXT_INSTRUCTION + + L_PROGRAM_CASE(CHECK_LONG_LIT) { + const char nocase = 0; + if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset, + ri->lit_length, nocase)) { + DEBUG_PRINTF("failed long lit check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + L_PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + L_PROGRAM_NEXT_INSTRUCTION + + L_PROGRAM_CASE(CHECK_LONG_LIT_NOCASE) { + const char nocase = 1; + if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset, + ri->lit_length, nocase)) { + DEBUG_PRINTF("failed nocase long lit check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + L_PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + L_PROGRAM_NEXT_INSTRUCTION + + L_PROGRAM_CASE(CHECK_MED_LIT) { + const char nocase = 0; + if (!roseCheckMediumLiteral(t, scratch, end, ri->lit_offset, + ri->lit_length, nocase)) { + DEBUG_PRINTF("failed lit check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + L_PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + L_PROGRAM_NEXT_INSTRUCTION + + L_PROGRAM_CASE(CHECK_MED_LIT_NOCASE) { + const char nocase = 1; + if (!roseCheckMediumLiteral(t, scratch, end, ri->lit_offset, + ri->lit_length, nocase)) { + DEBUG_PRINTF("failed long lit check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + L_PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + L_PROGRAM_NEXT_INSTRUCTION + + L_PROGRAM_CASE(SET_LOGICAL) { + DEBUG_PRINTF("set logical value of lkey %u, offset_adjust=%d\n", + ri->lkey, ri->offset_adjust); + assert(ri->lkey != INVALID_LKEY); + assert(ri->lkey < t->lkeyCount); + char *lvec = scratch->core_info.logicalVector; + setLogicalVal(t, lvec, ri->lkey, 1); + updateLastCombMatchOffset(tctxt, end + ri->offset_adjust); + } + L_PROGRAM_NEXT_INSTRUCTION + + L_PROGRAM_CASE(SET_COMBINATION) { + DEBUG_PRINTF("set ckey %u as active\n", ri->ckey); + assert(ri->ckey != INVALID_CKEY); + assert(ri->ckey < t->ckeyCount); + char *cvec = scratch->core_info.combVector; + setCombinationActive(t, cvec, ri->ckey); + } + L_PROGRAM_NEXT_INSTRUCTION + + L_PROGRAM_CASE(FLUSH_COMBINATION) { + assert(end >= tctxt->lastCombMatchOffset); + if (end > tctxt->lastCombMatchOffset) { + if (flushActiveCombinations(t, scratch) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + } + L_PROGRAM_NEXT_INSTRUCTION + + L_PROGRAM_CASE(SET_EXHAUST) { + updateSeqPoint(tctxt, end, from_mpv); + if (roseSetExhaust(t, scratch, ri->ekey) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + L_PROGRAM_NEXT_INSTRUCTION + + default: { + assert(0); // unreachable + } + } + } + + assert(0); // unreachable + return HWLM_CONTINUE_MATCHING; +} + +#undef L_PROGRAM_CASE +#undef L_PROGRAM_NEXT_INSTRUCTION +#undef L_PROGRAM_NEXT_INSTRUCTION_JUMP + #undef PROGRAM_CASE #undef PROGRAM_NEXT_INSTRUCTION #undef PROGRAM_NEXT_INSTRUCTION_JUMP diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index 5b16118e..50bf202c 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -54,4 +54,8 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, struct hs_scratch *scratch, u32 programOffset, u64a som, u64a end, u8 prog_flags); +hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t, + struct hs_scratch *scratch, u32 programOffset, + u64a som, u64a end, u8 prog_flags); + #endif // PROGRAM_RUNTIME_H diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 2c0a9b28..0ef20f21 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -2843,9 +2843,34 @@ vector groupByFragment(const RoseBuildImpl &build) { DEBUG_PRINTF("fragment candidate: lit_id=%u %s\n", lit_id, dumpString(lit.s).c_str()); - auto &fi = frag_info[getFragment(lit)]; - fi.lit_ids.push_back(lit_id); - fi.groups |= groups; + + /** 0:/xxabcdefgh/ */ + /** 1:/yyabcdefgh/ */ + /** 2:/yyabcdefgh.+/ */ + // Above 3 patterns should firstly convert into RoseLiteralMap with + // 2 elements ("xxabcdefgh" and "yyabcdefgh"), then convert into + // LitFragment with 1 element ("abcdefgh"). Special care should be + // taken to handle the 'pure' flag during the conversion. + + rose_literal_id lit_frag = getFragment(lit); + auto it = frag_info.find(lit_frag); + if (it != frag_info.end()) { + if (!lit_frag.s.get_pure() && it->first.s.get_pure()) { + struct FragmentInfo f_info = it->second; + f_info.lit_ids.push_back(lit_id); + f_info.groups |= groups; + frag_info.erase(it->first); + frag_info.emplace(lit_frag, f_info); + } else { + it->second.lit_ids.push_back(lit_id); + it->second.groups |= groups; + } + } else { + struct FragmentInfo f_info; + f_info.lit_ids.push_back(lit_id); + f_info.groups |= groups; + frag_info.emplace(lit_frag, f_info); + } } for (auto &m : frag_info) { diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index 900aee6c..fe48da4c 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -340,7 +340,14 @@ public: std::pair insert(const rose_literal_id &lit) { auto it = lits_index.find(lit); if (it != lits_index.end()) { - return {it->second, false}; + u32 idx = it->second; + auto &l = lits.at(idx); + if (!lit.s.get_pure() && l.s.get_pure()) { + lits_index.erase(l); + l.s.unset_pure(); + lits_index.emplace(l, idx); + } + return {idx, false}; } u32 id = verify_u32(lits.size()); lits.push_back(lit); diff --git a/src/rose/rose_build_matchers.cpp b/src/rose/rose_build_matchers.cpp index 2c302a85..8c532cab 100644 --- a/src/rose/rose_build_matchers.cpp +++ b/src/rose/rose_build_matchers.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2017, Intel Corporation + * Copyright (c) 2016-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -727,6 +727,7 @@ void addFragmentLiteral(const RoseBuildImpl &build, MatcherProto &mp, const auto &s_final = lit_final.get_string(); bool nocase = lit_final.any_nocase(); + bool pure = f.s.get_pure(); DEBUG_PRINTF("id=%u, s='%s', nocase=%d, noruns=%d, msk=%s, cmp=%s\n", f.fragment_id, escapeString(s_final).c_str(), (int)nocase, @@ -740,7 +741,7 @@ void addFragmentLiteral(const RoseBuildImpl &build, MatcherProto &mp, const auto &groups = f.groups; mp.lits.emplace_back(move(s_final), nocase, noruns, f.fragment_id, - groups, msk, cmp); + groups, msk, cmp, pure); } static diff --git a/src/runtime.c b/src/runtime.c index 64a04fd7..68f1f8a7 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -141,6 +141,7 @@ void populateCoreInfo(struct hs_scratch *s, const struct RoseEngine *rose, s->deduper.current_report_offset = ~0ULL; s->deduper.som_log_dirty = 1; /* som logs have not been cleared */ s->fdr_conf = NULL; + s->pure = 0; // Rose program execution (used for some report paths) depends on these // values being initialised. diff --git a/src/scratch.c b/src/scratch.c index 8e082c77..c23b5b3c 100644 --- a/src/scratch.c +++ b/src/scratch.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -137,6 +137,7 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { s->scratchSize = alloc_size; s->scratch_alloc = (char *)s_tmp; s->fdr_conf = NULL; + s->pure = 0; // each of these is at an offset from the previous char *current = (char *)s + sizeof(*s); diff --git a/src/scratch.h b/src/scratch.h index 59aa02c6..dab7bab7 100644 --- a/src/scratch.h +++ b/src/scratch.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -208,6 +208,7 @@ struct ALIGN_CL_DIRECTIVE hs_scratch { u64a *fdr_conf; /**< FDR confirm value */ u8 fdr_conf_offset; /**< offset where FDR/Teddy front end matches * in buffer */ + u8 pure; /**< indicator of pure-literal or cutting-literal */ }; /* array of fatbit ptr; TODO: why not an array of fatbits? */ diff --git a/src/util/ue2string.h b/src/util/ue2string.h index 0fa76c3a..1ce51b2f 100644 --- a/src/util/ue2string.h +++ b/src/util/ue2string.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -211,10 +211,17 @@ public: size_t hash() const; + void set_pure() { pure = true; } + void unset_pure() { pure = false; } + bool get_pure() const { return pure; } + + /* TODO: consider existing member functions possibly related with pure. */ + private: friend const_iterator; std::string s; boost::dynamic_bitset<> nocase; + bool pure = false; /**< born from cutting or not (pure literal). */ }; /// Return a reversed copy of this literal. From b5a8644b1f52a7d3cb0143b28bfe69654a64035f Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Fri, 18 Jan 2019 00:20:06 +0800 Subject: [PATCH 16/21] mcclellan: fix dump issue in wide-state case. --- src/nfa/mcclellandump.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/nfa/mcclellandump.cpp b/src/nfa/mcclellandump.cpp index a13795fd..92090bc5 100644 --- a/src/nfa/mcclellandump.cpp +++ b/src/nfa/mcclellandump.cpp @@ -275,7 +275,8 @@ void nfaExecMcClellan16_dumpDot(const NFA *nfa, FILE *f) { dumpDotPreambleDfa(f); - for (u16 i = 1; i < m->state_count; i++) { + u16 sherman_ceil = m->has_wide == 1 ? m->wide_limit : m->state_count; + for (u16 i = 1; i < sherman_ceil; i++) { describeNode(nfa, m, i, f); u16 t[ALPHABET_SIZE]; @@ -314,7 +315,8 @@ void dumpAccelMasks(FILE *f, const mcclellan *m, const mstate_aux *aux) { fprintf(f, "Acceleration\n"); fprintf(f, "------------\n"); - for (u16 i = 0; i < m->state_count; i++) { + u16 sherman_ceil = m->has_wide == 1 ? m->wide_limit : m->state_count; + for (u16 i = 0; i < sherman_ceil; i++) { if (!aux[i].accel_offset) { continue; } @@ -360,7 +362,8 @@ void dumpCommonHeader(FILE *f, const mcclellan *m) { static void dumpTransitions(FILE *f, const NFA *nfa, const mcclellan *m, const mstate_aux *aux) { - for (u16 i = 0; i < m->state_count; i++) { + u16 sherman_ceil = m->has_wide == 1 ? m->wide_limit : m->state_count; + for (u16 i = 0; i < sherman_ceil; i++) { fprintf(f, "%05hu", i); if (aux[i].accel_offset) { dumpAccelText(f, (const union AccelAux *)((const char *)m + From 229f3d508014dcbd5ccc7d471a9a0de299e03a76 Mon Sep 17 00:00:00 2001 From: "Wang, Xiang W" Date: Tue, 8 Jan 2019 06:45:31 -0500 Subject: [PATCH 17/21] tools: add catches for C++ exceptions --- src/grey.cpp | 10 +++++++++- tools/hsbench/main.cpp | 3 +++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/grey.cpp b/src/grey.cpp index fa8da2b4..86a93d25 100644 --- a/src/grey.cpp +++ b/src/grey.cpp @@ -198,7 +198,15 @@ void applyGreyOverrides(Grey *g, const string &s) { string::const_iterator ve = find(ke, pe, ';'); - unsigned int value = lexical_cast(string(ke + 1, ve)); + unsigned int value = 0; + try { + value = lexical_cast(string(ke + 1, ve)); + } catch (boost::bad_lexical_cast &e) { + printf("Invalid grey override key %s:%s\n", key.c_str(), + string(ke + 1, ve).c_str()); + invalid_key_seen = true; + break; + } bool done = false; /* surely there exists a nice template to go with this macro to make diff --git a/tools/hsbench/main.cpp b/tools/hsbench/main.cpp index fecdd330..de9fde07 100644 --- a/tools/hsbench/main.cpp +++ b/tools/hsbench/main.cpp @@ -1065,6 +1065,9 @@ int HS_CDECL main(int argc, char *argv[]) { } catch (const SqlFailure &f) { cerr << f.message << '\n'; return -1; + } catch (const std::runtime_error &e) { + cerr << "Internal error: " << e.what() << '\n'; + return -1; } return 0; From eda1871f86759f0b73e9e076d9b203f0e5a4593b Mon Sep 17 00:00:00 2001 From: "Wang, Xiang W" Date: Mon, 21 Jan 2019 03:48:36 -0500 Subject: [PATCH 18/21] chimera: fix scratch space handling in shared scratch use case --- chimera/ch_scratch.c | 1 - 1 file changed, 1 deletion(-) diff --git a/chimera/ch_scratch.c b/chimera/ch_scratch.c index af49c34d..e413efe8 100644 --- a/chimera/ch_scratch.c +++ b/chimera/ch_scratch.c @@ -216,7 +216,6 @@ ch_error_t HS_CDECL ch_alloc_scratch(const ch_database_t *hydb, } if (db->flags & CHIMERA_FLAG_NO_MULTIMATCH) { - (*scratch)->multi_scratch = NULL; return CH_SUCCESS; } From b0c0d9cd929ac82d2d087ee24cfa5e3317c7af29 Mon Sep 17 00:00:00 2001 From: "Wang, Xiang W" Date: Tue, 29 Jan 2019 04:45:51 -0500 Subject: [PATCH 19/21] unit: check return value of malloc --- unit/chimera/arg_checks.cpp | 3 +++ unit/hyperscan/arg_checks.cpp | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/unit/chimera/arg_checks.cpp b/unit/chimera/arg_checks.cpp index ea1cda15..b9132a31 100644 --- a/unit/chimera/arg_checks.cpp +++ b/unit/chimera/arg_checks.cpp @@ -477,6 +477,7 @@ TEST(HybridArgChecks, AllocScratchBogusScratch) { makeDatabase(&db); ch_scratch_t *blah = (ch_scratch_t *)malloc(100); + ASSERT_TRUE(blah != nullptr); memset(blah, 0xf0, 100); ch_error_t err = ch_alloc_scratch(db, &blah); ASSERT_EQ(CH_INVALID, err); @@ -536,6 +537,7 @@ TEST(HybridArgChecks, DatabaseSizeNoDatabase) { TEST(HybridArgChecks, CloneBadScratch) { // Try cloning the scratch void *local_garbage = malloc(sizeof(garbage)); + ASSERT_TRUE(local_garbage != nullptr); memcpy(local_garbage, garbage, sizeof(garbage)); ch_scratch_t *cloned = nullptr; ch_scratch_t *scratch = (ch_scratch_t *)local_garbage; @@ -550,6 +552,7 @@ TEST(HybridArgChecks, ScanBadScratch) { makeDatabase(&db); void *local_garbage = malloc(sizeof(garbage)); + ASSERT_TRUE(local_garbage != nullptr); memcpy(local_garbage, garbage, sizeof(garbage)); ch_scratch_t *scratch = (ch_scratch_t *)local_garbage; diff --git a/unit/hyperscan/arg_checks.cpp b/unit/hyperscan/arg_checks.cpp index 2cbd0842..21c8707f 100644 --- a/unit/hyperscan/arg_checks.cpp +++ b/unit/hyperscan/arg_checks.cpp @@ -1370,6 +1370,7 @@ TEST(HyperscanArgChecks, AllocScratchBogusScratch) { ASSERT_EQ(HS_SUCCESS, err); ASSERT_TRUE(db != nullptr); hs_scratch_t *blah = (hs_scratch_t *)malloc(100); + ASSERT_TRUE(blah != nullptr); memset(blah, 0xf0, 100); err = hs_alloc_scratch(db, &blah); ASSERT_EQ(HS_INVALID, err); @@ -2034,6 +2035,7 @@ TEST(HyperscanArgChecks, ScratchSizeBadScratch) { TEST(HyperscanArgChecks, CloneBadScratch) { // Try cloning the scratch void *local_garbage = malloc(sizeof(garbage)); + ASSERT_TRUE(local_garbage != nullptr); memcpy(local_garbage, garbage, sizeof(garbage)); hs_scratch_t *cloned = nullptr; hs_scratch_t *scratch = (hs_scratch_t *)local_garbage; @@ -2052,6 +2054,7 @@ TEST(HyperscanArgChecks, ScanBadScratch) { ASSERT_TRUE(db != nullptr); void *local_garbage = malloc(sizeof(garbage)); + ASSERT_TRUE(local_garbage != nullptr); memcpy(local_garbage, garbage, sizeof(garbage)); hs_scratch_t *scratch = (hs_scratch_t *)local_garbage; @@ -2072,6 +2075,7 @@ TEST(HyperscanArgChecks, ScanStreamBadScratch) { ASSERT_EQ(HS_SUCCESS, err); ASSERT_TRUE(db != nullptr); void *local_garbage = malloc(sizeof(garbage)); + ASSERT_TRUE(local_garbage != nullptr); memcpy(local_garbage, garbage, sizeof(garbage)); hs_scratch_t *scratch = (hs_scratch_t *)local_garbage; @@ -2107,6 +2111,7 @@ TEST(HyperscanArgChecks, ResetStreamBadScratch) { ASSERT_EQ(HS_SUCCESS, err); ASSERT_TRUE(db != nullptr); void *local_garbage = malloc(sizeof(garbage)); + ASSERT_TRUE(local_garbage != nullptr); memcpy(local_garbage, garbage, sizeof(garbage)); hs_scratch_t *scratch = (hs_scratch_t *)local_garbage; @@ -2142,6 +2147,7 @@ TEST(HyperscanArgChecks, ScanVectorBadScratch) { ASSERT_EQ(HS_SUCCESS, err); ASSERT_TRUE(db != nullptr); void *local_garbage = malloc(sizeof(garbage)); + ASSERT_TRUE(local_garbage != nullptr); memcpy(local_garbage, garbage, sizeof(garbage)); hs_scratch_t *scratch = (hs_scratch_t *)local_garbage; From d4df39972866afdeaa9e299a3ab7e2536a8eb5a5 Mon Sep 17 00:00:00 2001 From: "Chang, Harry" Date: Thu, 3 Jan 2019 14:58:53 +0800 Subject: [PATCH 20/21] changelog: updates for 5.1.0 release --- CHANGELOG.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6695e9fc..93336b50 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,24 @@ This is a list of notable changes to Hyperscan, in reverse chronological order. +## [5.1.0] 2019-01-17 +- Improve DFA state compression by wide-state optimization to reduce bytecode + size. +- Create specific interpreter runtime handling to boost the performance of pure + literal matching. +- Optimize original presentation of interpreter (the "Rose" engine ) to + increase overall performance. +- Bugfix for logical combinations: fix error reporting combination's match in + case of sub-expression has EOD match under streaming mode. +- Bugfix for logical combinations: fix miss reporting combination's match under + vacuous input. +- Bugfix for issue #104: fix compile error with Boost 1.68.0. +- Bugfix for issue #127: avoid pcre error for hscollider with installed PCRE + package. +- Update version of PCRE used by testing tools as a syntax and semantic + reference to PCRE 8.41 or above. +- Fix github repo address in doc. + ## [5.0.0] 2018-07-09 - Introduce chimera hybrid engine of Hyperscan and PCRE, to fully support PCRE syntax as well as to take advantage of the high performance nature of From f9c78376d910eae93e9c23ae44f4304874425762 Mon Sep 17 00:00:00 2001 From: "Chang, Harry" Date: Thu, 17 Jan 2019 15:47:30 +0800 Subject: [PATCH 21/21] Bump version number for release --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0e905db6..cac4fab7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required (VERSION 2.8.11) project (hyperscan C CXX) set (HS_MAJOR_VERSION 5) -set (HS_MINOR_VERSION 0) +set (HS_MINOR_VERSION 1) set (HS_PATCH_VERSION 0) set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION})