diff --git a/CHANGELOG.md b/CHANGELOG.md index 6695e9fc..93336b50 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,24 @@ This is a list of notable changes to Hyperscan, in reverse chronological order. +## [5.1.0] 2019-01-17 +- Improve DFA state compression by wide-state optimization to reduce bytecode + size. +- Create specific interpreter runtime handling to boost the performance of pure + literal matching. +- Optimize original presentation of interpreter (the "Rose" engine ) to + increase overall performance. +- Bugfix for logical combinations: fix error reporting combination's match in + case of sub-expression has EOD match under streaming mode. +- Bugfix for logical combinations: fix miss reporting combination's match under + vacuous input. +- Bugfix for issue #104: fix compile error with Boost 1.68.0. +- Bugfix for issue #127: avoid pcre error for hscollider with installed PCRE + package. +- Update version of PCRE used by testing tools as a syntax and semantic + reference to PCRE 8.41 or above. +- Fix github repo address in doc. + ## [5.0.0] 2018-07-09 - Introduce chimera hybrid engine of Hyperscan and PCRE, to fully support PCRE syntax as well as to take advantage of the high performance nature of diff --git a/CMakeLists.txt b/CMakeLists.txt index 07000270..cac4fab7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required (VERSION 2.8.11) project (hyperscan C CXX) set (HS_MAJOR_VERSION 5) -set (HS_MINOR_VERSION 0) +set (HS_MINOR_VERSION 1) set (HS_PATCH_VERSION 0) set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION}) @@ -456,7 +456,7 @@ set(PCRE_REQUIRED_MINOR_VERSION 41) set(PCRE_REQUIRED_VERSION ${PCRE_REQUIRED_MAJOR_VERSION}.${PCRE_REQUIRED_MINOR_VERSION}) include (${CMAKE_MODULE_PATH}/pcre.cmake) if (NOT CORRECT_PCRE_VERSION) - message(STATUS "PCRE ${PCRE_REQUIRED_VERSION} not found") + message(STATUS "PCRE ${PCRE_REQUIRED_VERSION} or above not found") endif() # we need static libs for Chimera - too much deep magic for shared libs @@ -508,7 +508,7 @@ set(PCRE_REQUIRED_MINOR_VERSION 41) set(PCRE_REQUIRED_VERSION ${PCRE_REQUIRED_MAJOR_VERSION}.${PCRE_REQUIRED_MINOR_VERSION}) include (${CMAKE_MODULE_PATH}/pcre.cmake) if (NOT CORRECT_PCRE_VERSION) - message(STATUS "PCRE ${PCRE_REQUIRED_VERSION} not found") + message(STATUS "PCRE ${PCRE_REQUIRED_VERSION} or above not found") endif() # we need static libs for Chimera - too much deep magic for shared libs diff --git a/chimera/ch_compile.cpp b/chimera/ch_compile.cpp index c71e26e0..374bd7ad 100644 --- a/chimera/ch_compile.cpp +++ b/chimera/ch_compile.cpp @@ -714,7 +714,7 @@ ch_error_t HS_CDECL ch_compile(const char *expression, unsigned flags, (int)e.index : -1); return CH_COMPILER_ERROR; } - catch (std::bad_alloc) { + catch (std::bad_alloc &) { *db = nullptr; *comp_error = const_cast(&ch_enomem); return CH_COMPILER_ERROR; @@ -782,7 +782,7 @@ ch_error_t HS_CDECL ch_compile_multi(const char *const *expressions, (int)e.index : -1); return CH_COMPILER_ERROR; } - catch (std::bad_alloc) { + catch (std::bad_alloc &) { *db = nullptr; *comp_error = const_cast(&ch_enomem); return CH_COMPILER_ERROR; @@ -855,7 +855,7 @@ ch_error_t HS_CDECL ch_compile_ext_multi( (int)e.index : -1); return CH_COMPILER_ERROR; } - catch (std::bad_alloc) { + catch (std::bad_alloc &) { *db = nullptr; *comp_error = const_cast(&ch_enomem); return CH_COMPILER_ERROR; diff --git a/chimera/ch_scratch.c b/chimera/ch_scratch.c index af49c34d..e413efe8 100644 --- a/chimera/ch_scratch.c +++ b/chimera/ch_scratch.c @@ -216,7 +216,6 @@ ch_error_t HS_CDECL ch_alloc_scratch(const ch_database_t *hydb, } if (db->flags & CHIMERA_FLAG_NO_MULTIMATCH) { - (*scratch)->multi_scratch = NULL; return CH_SUCCESS; } diff --git a/cmake/pcre.cmake b/cmake/pcre.cmake index 2b0d23c7..e0acda5e 100644 --- a/cmake/pcre.cmake +++ b/cmake/pcre.cmake @@ -27,7 +27,7 @@ if (PCRE_BUILD_SOURCE) # first, check version number CHECK_C_SOURCE_COMPILES("#include - #if PCRE_MAJOR != ${PCRE_REQUIRED_MAJOR_VERSION} || PCRE_MINOR != ${PCRE_REQUIRED_MINOR_VERSION} + #if PCRE_MAJOR != ${PCRE_REQUIRED_MAJOR_VERSION} || PCRE_MINOR < ${PCRE_REQUIRED_MINOR_VERSION} #error Incorrect pcre version #endif main() {}" CORRECT_PCRE_VERSION) @@ -35,10 +35,10 @@ if (PCRE_BUILD_SOURCE) if (NOT CORRECT_PCRE_VERSION) unset(CORRECT_PCRE_VERSION CACHE) - message(STATUS "Incorrect version of pcre - version ${PCRE_REQUIRED_VERSION} is required") + message(STATUS "Incorrect version of pcre - version ${PCRE_REQUIRED_VERSION} or above is required") return () else() - message(STATUS "PCRE version ${PCRE_REQUIRED_VERSION} - building from source.") + message(STATUS "PCRE version ${PCRE_REQUIRED_VERSION} or above - building from source.") endif() # PCRE compile options @@ -52,12 +52,12 @@ if (PCRE_BUILD_SOURCE) else () # pkgconf should save us find_package(PkgConfig) - pkg_check_modules(PCRE libpcre=${PCRE_REQUIRED_VERSION}) + pkg_check_modules(PCRE libpcre>=${PCRE_REQUIRED_VERSION}) if (PCRE_FOUND) set(CORRECT_PCRE_VERSION TRUE) - message(STATUS "PCRE version ${PCRE_REQUIRED_VERSION}") + message(STATUS "PCRE version ${PCRE_REQUIRED_VERSION} or above") else () - message(STATUS "PCRE version ${PCRE_REQUIRED_VERSION} not found") + message(STATUS "PCRE version ${PCRE_REQUIRED_VERSION} or above not found") return () endif () endif (PCRE_BUILD_SOURCE) diff --git a/doc/dev-reference/compilation.rst b/doc/dev-reference/compilation.rst index 7a7f37ec..214f4abc 100644 --- a/doc/dev-reference/compilation.rst +++ b/doc/dev-reference/compilation.rst @@ -64,7 +64,7 @@ libpcre are supported. The use of unsupported constructs will result in compilation errors. The version of PCRE used to validate Hyperscan's interpretation of this syntax -is 8.41. +is 8.41 or above. ==================== Supported Constructs diff --git a/doc/dev-reference/getting_started.rst b/doc/dev-reference/getting_started.rst index 4e4d36f3..45d4fbbb 100644 --- a/doc/dev-reference/getting_started.rst +++ b/doc/dev-reference/getting_started.rst @@ -10,7 +10,7 @@ Very Quick Start #. Clone Hyperscan :: cd - git clone git://github/intel/hyperscan + git clone git://github.com/intel/hyperscan #. Configure Hyperscan diff --git a/src/fdr/fdr_compile.cpp b/src/fdr/fdr_compile.cpp index 5e3c6a4e..65c5020e 100644 --- a/src/fdr/fdr_compile.cpp +++ b/src/fdr/fdr_compile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -807,6 +807,9 @@ void findIncludedLits(vector &lits, for (size_t i = 0; i < cnt; i++) { u32 bucket1 = group[i].first; u32 id1 = group[i].second; + if (lits[id1].pure) { + continue; + } buildSquashMask(lits, id1, bucket1, i + 1, group, parent_map, exception_map); } diff --git a/src/fdr/fdr_confirm.h b/src/fdr/fdr_confirm.h index d975747e..9490df43 100644 --- a/src/fdr/fdr_confirm.h +++ b/src/fdr/fdr_confirm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -62,6 +62,7 @@ struct LitInfo { u8 size; u8 flags; //!< bitfield of flags from FDR_LIT_FLAG_* above. u8 next; + u8 pure; //!< The pass-on of pure flag from hwlmLiteral. }; #define FDRC_FLAG_NO_CONFIRM 1 diff --git a/src/fdr/fdr_confirm_compile.cpp b/src/fdr/fdr_confirm_compile.cpp index c75f8d17..3eab21b2 100644 --- a/src/fdr/fdr_confirm_compile.cpp +++ b/src/fdr/fdr_confirm_compile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -87,6 +87,7 @@ void fillLitInfo(const vector &lits, vector &tmpLitInfo, info.flags = flags; info.size = verify_u8(max(lit.msk.size(), lit.s.size())); info.groups = lit.groups; + info.pure = lit.pure; // these are built up assuming a LE machine CONF_TYPE msk = all_ones; diff --git a/src/fdr/fdr_confirm_runtime.h b/src/fdr/fdr_confirm_runtime.h index 067e50e2..67e0d692 100644 --- a/src/fdr/fdr_confirm_runtime.h +++ b/src/fdr/fdr_confirm_runtime.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -65,6 +65,7 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a u8 oldNext; // initialized in loop do { assert(ISALIGNED(li)); + scratch->pure = li->pure; if (unlikely((conf_key & li->msk) != li->v)) { goto out; @@ -99,6 +100,7 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a li++; } while (oldNext); scratch->fdr_conf = NULL; + scratch->pure = 0; } #endif diff --git a/src/grey.cpp b/src/grey.cpp index 3762a497..86a93d25 100644 --- a/src/grey.cpp +++ b/src/grey.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -82,6 +82,7 @@ Grey::Grey(void) : onlyOneOutfix(false), allowShermanStates(true), allowMcClellan8(true), + allowWideStates(true), // enable wide state for McClellan8 highlanderPruneDFA(true), minimizeDFA(true), accelerateDFA(true), @@ -197,7 +198,15 @@ void applyGreyOverrides(Grey *g, const string &s) { string::const_iterator ve = find(ke, pe, ';'); - unsigned int value = lexical_cast(string(ke + 1, ve)); + unsigned int value = 0; + try { + value = lexical_cast(string(ke + 1, ve)); + } catch (boost::bad_lexical_cast &e) { + printf("Invalid grey override key %s:%s\n", key.c_str(), + string(ke + 1, ve).c_str()); + invalid_key_seen = true; + break; + } bool done = false; /* surely there exists a nice template to go with this macro to make @@ -251,6 +260,7 @@ void applyGreyOverrides(Grey *g, const string &s) { G_UPDATE(onlyOneOutfix); G_UPDATE(allowShermanStates); G_UPDATE(allowMcClellan8); + G_UPDATE(allowWideStates); G_UPDATE(highlanderPruneDFA); G_UPDATE(minimizeDFA); G_UPDATE(accelerateDFA); diff --git a/src/grey.h b/src/grey.h index 34c62918..ed2f845a 100644 --- a/src/grey.h +++ b/src/grey.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -87,6 +87,7 @@ struct Grey { bool allowShermanStates; bool allowMcClellan8; + bool allowWideStates; // enable wide state for McClellan8 bool highlanderPruneDFA; bool minimizeDFA; diff --git a/src/hwlm/hwlm_literal.cpp b/src/hwlm/hwlm_literal.cpp index b0968d79..b257dfb0 100644 --- a/src/hwlm/hwlm_literal.cpp +++ b/src/hwlm/hwlm_literal.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -83,9 +83,10 @@ bool maskIsConsistent(const std::string &s, bool nocase, const vector &msk, * \ref HWLM_MASKLEN. */ hwlmLiteral::hwlmLiteral(const std::string &s_in, bool nocase_in, bool noruns_in, u32 id_in, hwlm_group_t groups_in, - const vector &msk_in, const vector &cmp_in) + const vector &msk_in, const vector &cmp_in, + bool pure_in) : s(s_in), id(id_in), nocase(nocase_in), noruns(noruns_in), - groups(groups_in), msk(msk_in), cmp(cmp_in) { + groups(groups_in), msk(msk_in), cmp(cmp_in), pure(pure_in) { assert(s.size() <= HWLM_LITERAL_MAX_LEN); assert(msk.size() <= HWLM_MASKLEN); assert(msk.size() == cmp.size()); diff --git a/src/hwlm/hwlm_literal.h b/src/hwlm/hwlm_literal.h index 08510fb0..72a57f94 100644 --- a/src/hwlm/hwlm_literal.h +++ b/src/hwlm/hwlm_literal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -113,15 +113,20 @@ struct hwlmLiteral { */ std::vector cmp; + bool pure; //!< \brief The pass-on of pure flag from LitFragment. + /** \brief Complete constructor, takes group information and msk/cmp. * * This constructor takes a msk/cmp pair. Both must be vectors of length <= * \ref HWLM_MASKLEN. */ hwlmLiteral(const std::string &s_in, bool nocase_in, bool noruns_in, u32 id_in, hwlm_group_t groups_in, - const std::vector &msk_in, const std::vector &cmp_in); + const std::vector &msk_in, const std::vector &cmp_in, + bool pure_in = false); - /** \brief Simple constructor: no group information, no msk/cmp. */ + /** \brief Simple constructor: no group information, no msk/cmp. + * + * This constructor is only used in internal unit test. */ hwlmLiteral(const std::string &s_in, bool nocase_in, u32 id_in) : hwlmLiteral(s_in, nocase_in, false, id_in, HWLM_ALL_GROUPS, {}, {}) {} }; diff --git a/src/nfa/accel_dfa_build_strat.h b/src/nfa/accel_dfa_build_strat.h index 881892ed..53a6f35b 100644 --- a/src/nfa/accel_dfa_build_strat.h +++ b/src/nfa/accel_dfa_build_strat.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -40,6 +40,11 @@ namespace ue2 { class ReportManager; struct Grey; +enum DfaType { + McClellan, + Sheng, + Gough +}; class accel_dfa_build_strat : public dfa_build_strat { public: @@ -53,6 +58,8 @@ public: virtual void buildAccel(dstate_id_t this_idx, const AccelScheme &info, void *accel_out); virtual std::map getAccelInfo(const Grey &grey); + virtual DfaType getType() const = 0; + private: bool only_accel_init; }; diff --git a/src/nfa/goughcompile.cpp b/src/nfa/goughcompile.cpp index 3f1614dd..d41c6f42 100644 --- a/src/nfa/goughcompile.cpp +++ b/src/nfa/goughcompile.cpp @@ -91,6 +91,7 @@ public: void buildAccel(dstate_id_t this_idx, const AccelScheme &info, void *accel_out) override; u32 max_allowed_offset_accel() const override { return 0; } + DfaType getType() const override { return Gough; } raw_som_dfa &rdfa; const GoughGraph ≫ diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index 6053b56f..bbb26605 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -980,7 +980,7 @@ u32 addSquashMask(const build_info &args, const NFAVertex &v, // see if we've already seen it, otherwise add a new one. auto it = find(squash.begin(), squash.end(), sit->second); if (it != squash.end()) { - return verify_u32(distance(squash.begin(), it)); + return verify_u32(std::distance(squash.begin(), it)); } u32 idx = verify_u32(squash.size()); squash.push_back(sit->second); @@ -1007,7 +1007,7 @@ u32 addReports(const flat_set &r, vector &reports, auto it = search(begin(reports), end(reports), begin(my_reports), end(my_reports)); if (it != end(reports)) { - u32 offset = verify_u32(distance(begin(reports), it)); + u32 offset = verify_u32(std::distance(begin(reports), it)); DEBUG_PRINTF("reusing found report list at %u\n", offset); return offset; } diff --git a/src/nfa/mcclellan.c b/src/nfa/mcclellan.c index ceedb9db..71f71e32 100644 --- a/src/nfa/mcclellan.c +++ b/src/nfa/mcclellan.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -167,9 +167,68 @@ u32 doNormal16(const struct mcclellan *m, const u8 **c_inout, const u8 *end, } static really_inline -char mcclellanExec16_i(const struct mcclellan *m, u32 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **c_final, enum MatchMode mode) { +u32 doNormalWide16(const struct mcclellan *m, const u8 **c_inout, + const u8 *end, u32 s, char *qstate, u16 *offset, + char do_accel, enum MatchMode mode) { + const u8 *c = *c_inout; + + u32 wide_limit = m->wide_limit; + const char *wide_base + = (const char *)m - sizeof(struct NFA) + m->wide_offset; + + const u16 *succ_table + = (const u16 *)((const char *)m + sizeof(struct mcclellan)); + assert(ISALIGNED_N(succ_table, 2)); + u32 sherman_base = m->sherman_limit; + const char *sherman_base_offset + = (const char *)m - sizeof(struct NFA) + m->sherman_offset; + u32 as = m->alphaShift; + + s &= STATE_MASK; + + while (c < end && s) { + u8 cprime = m->remap[*c]; + DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx (s=%u) &c: %p\n", *c, + ourisprint(*c) ? *c : '?', cprime, s, c); + + if (unlikely(s >= wide_limit)) { + const char *wide_entry + = findWideEntry16(m, wide_base, wide_limit, s); + DEBUG_PRINTF("doing wide head (%u)\n", s); + s = doWide16(wide_entry, &c, end, m->remap, (u16 *)&s, qstate, + offset); + } else if (s >= sherman_base) { + const char *sherman_state + = findShermanState(m, sherman_base_offset, sherman_base, s); + DEBUG_PRINTF("doing sherman (%u)\n", s); + s = doSherman16(sherman_state, cprime, succ_table, as); + } else { + DEBUG_PRINTF("doing normal\n"); + s = succ_table[(s << as) + cprime]; + } + + DEBUG_PRINTF("s: %u (%u)\n", s, s & STATE_MASK); + c++; + + if (do_accel && (s & ACCEL_FLAG)) { + break; + } + if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) { + break; + } + + s &= STATE_MASK; + } + + *c_inout = c; + return s; +} + +static really_inline +char mcclellanExec16_i(const struct mcclellan *m, u32 *state, char *qstate, + const u8 *buf, size_t len, u64a offAdj, NfaCallback cb, + void *ctxt, char single, const u8 **c_final, + enum MatchMode mode) { assert(ISALIGNED_N(state, 2)); if (!len) { if (mode == STOP_AT_MATCH) { @@ -179,6 +238,7 @@ char mcclellanExec16_i(const struct mcclellan *m, u32 *state, const u8 *buf, } u32 s = *state; + u16 offset = 0; const u8 *c = buf; const u8 *c_end = buf + len; const struct mstate_aux *aux @@ -207,7 +267,12 @@ without_accel: goto exit; } - s = doNormal16(m, &c, min_accel_offset, s, 0, mode); + if (unlikely(m->has_wide)) { + s = doNormalWide16(m, &c, min_accel_offset, s, qstate, &offset, 0, + mode); + } else { + s = doNormal16(m, &c, min_accel_offset, s, 0, mode); + } if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) { if (mode == STOP_AT_MATCH) { @@ -259,7 +324,11 @@ with_accel: } } - s = doNormal16(m, &c, c_end, s, 1, mode); + if (unlikely(m->has_wide)) { + s = doNormalWide16(m, &c, c_end, s, qstate, &offset, 1, mode); + } else { + s = doNormal16(m, &c, c_end, s, 1, mode); + } if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) { if (mode == STOP_AT_MATCH) { @@ -297,44 +366,47 @@ exit: } static never_inline -char mcclellanExec16_i_cb(const struct mcclellan *m, u32 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **final_point) { - return mcclellanExec16_i(m, state, buf, len, offAdj, cb, ctxt, single, - final_point, CALLBACK_OUTPUT); +char mcclellanExec16_i_cb(const struct mcclellan *m, u32 *state, char *qstate, + const u8 *buf, size_t len, u64a offAdj, + NfaCallback cb, void *ctxt, char single, + const u8 **final_point) { + return mcclellanExec16_i(m, state, qstate, buf, len, offAdj, cb, ctxt, + single, final_point, CALLBACK_OUTPUT); } static never_inline -char mcclellanExec16_i_sam(const struct mcclellan *m, u32 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **final_point) { - return mcclellanExec16_i(m, state, buf, len, offAdj, cb, ctxt, single, - final_point, STOP_AT_MATCH); +char mcclellanExec16_i_sam(const struct mcclellan *m, u32 *state, char *qstate, + const u8 *buf, size_t len, u64a offAdj, + NfaCallback cb, void *ctxt, char single, + const u8 **final_point) { + return mcclellanExec16_i(m, state, qstate, buf, len, offAdj, cb, ctxt, + single, final_point, STOP_AT_MATCH); } static never_inline -char mcclellanExec16_i_nm(const struct mcclellan *m, u32 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **final_point) { - return mcclellanExec16_i(m, state, buf, len, offAdj, cb, ctxt, single, - final_point, NO_MATCHES); +char mcclellanExec16_i_nm(const struct mcclellan *m, u32 *state, char *qstate, + const u8 *buf, size_t len, u64a offAdj, + NfaCallback cb, void *ctxt, char single, + const u8 **final_point) { + return mcclellanExec16_i(m, state, qstate, buf, len, offAdj, cb, ctxt, + single, final_point, NO_MATCHES); } static really_inline -char mcclellanExec16_i_ni(const struct mcclellan *m, u32 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **final_point, - enum MatchMode mode) { +char mcclellanExec16_i_ni(const struct mcclellan *m, u32 *state, char *qstate, + const u8 *buf, size_t len, u64a offAdj, + NfaCallback cb, void *ctxt, char single, + const u8 **final_point, enum MatchMode mode) { if (mode == CALLBACK_OUTPUT) { - return mcclellanExec16_i_cb(m, state, buf, len, offAdj, cb, ctxt, - single, final_point); + return mcclellanExec16_i_cb(m, state, qstate, buf, len, offAdj, cb, + ctxt, single, final_point); } else if (mode == STOP_AT_MATCH) { - return mcclellanExec16_i_sam(m, state, buf, len, offAdj, cb, ctxt, - single, final_point); + return mcclellanExec16_i_sam(m, state, qstate, buf, len, offAdj, cb, + ctxt, single, final_point); } else { assert(mode == NO_MATCHES); - return mcclellanExec16_i_nm(m, state, buf, len, offAdj, cb, ctxt, - single, final_point); + return mcclellanExec16_i_nm(m, state, qstate, buf, len, offAdj, cb, + ctxt, single, final_point); } } @@ -540,6 +612,10 @@ char mcclellanCheckEOD(const struct NFA *nfa, u32 s, u64a offset, const struct mcclellan *m = getImplNfa(nfa); const struct mstate_aux *aux = get_aux(m, s); + if (m->has_wide == 1 && s >= m->wide_limit) { + return MO_CONTINUE_MATCHING; + } + if (!aux->accept_eod) { return MO_CONTINUE_MATCHING; } @@ -612,9 +688,9 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, /* do main buffer region */ const u8 *final_look; - char rv = mcclellanExec16_i_ni(m, &s, cur_buf + sp, local_ep - sp, - offset + sp, cb, context, single, - &final_look, mode); + char rv = mcclellanExec16_i_ni(m, &s, q->state, cur_buf + sp, + local_ep - sp, offset + sp, cb, context, + single, &final_look, mode); if (rv == MO_DEAD) { *(u16 *)q->state = 0; return MO_DEAD; @@ -684,12 +760,16 @@ char nfaExecMcClellan16_Bi(const struct NFA *n, u64a offset, const u8 *buffer, const struct mcclellan *m = getImplNfa(n); u32 s = m->start_anchored; - if (mcclellanExec16_i(m, &s, buffer, length, offset, cb, context, single, - NULL, CALLBACK_OUTPUT) + if (mcclellanExec16_i(m, &s, NULL, buffer, length, offset, cb, context, + single, NULL, CALLBACK_OUTPUT) == MO_DEAD) { return s ? MO_ALIVE : MO_DEAD; } + if (m->has_wide == 1 && s >= m->wide_limit) { + return MO_ALIVE; + } + const struct mstate_aux *aux = get_aux(m, s); if (aux->accept_eod) { @@ -768,6 +848,7 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, char rv = mcclellanExec8_i_ni(m, &s, cur_buf + sp, local_ep - sp, offset + sp, cb, context, single, &final_look, mode); + if (rv == MO_HALT_MATCHING) { *(u8 *)q->state = 0; return MO_DEAD; @@ -1016,7 +1097,8 @@ char nfaExecMcClellan16_inAccept(const struct NFA *n, ReportID report, u16 s = *(u16 *)q->state; DEBUG_PRINTF("checking accepts for %hu\n", s); - return mcclellanHasAccept(m, get_aux(m, s), report); + return (m->has_wide == 1 && s >= m->wide_limit) ? + 0 : mcclellanHasAccept(m, get_aux(m, s), report); } char nfaExecMcClellan16_inAnyAccept(const struct NFA *n, struct mq *q) { @@ -1026,7 +1108,8 @@ char nfaExecMcClellan16_inAnyAccept(const struct NFA *n, struct mq *q) { u16 s = *(u16 *)q->state; DEBUG_PRINTF("checking accepts for %hu\n", s); - return !!get_aux(m, s)->accept; + return (m->has_wide == 1 && s >= m->wide_limit) ? + 0 : !!get_aux(m, s)->accept; } char nfaExecMcClellan8_Q2(const struct NFA *n, struct mq *q, s64a end) { @@ -1111,6 +1194,12 @@ char nfaExecMcClellan16_initCompressedState(const struct NFA *nfa, u64a offset, void *state, UNUSED u8 key) { const struct mcclellan *m = getImplNfa(nfa); u16 s = offset ? m->start_floating : m->start_anchored; + + // new byte + if (m->has_wide) { + unaligned_store_u16((u16 *)state + 1, 0); + } + if (s) { unaligned_store_u16(state, s); return 1; @@ -1140,14 +1229,24 @@ void nfaExecMcClellan16_SimpStream(const struct NFA *nfa, char *state, const u8 *buf, char top, size_t start_off, size_t len, NfaCallback cb, void *ctxt) { const struct mcclellan *m = getImplNfa(nfa); + u32 s; - u32 s = top ? m->start_anchored : unaligned_load_u16(state); + if (top) { + s = m->start_anchored; + + // new byte + if (m->has_wide) { + unaligned_store_u16((u16 *)state + 1, 0); + } + } else { + s = unaligned_load_u16(state); + } if (m->flags & MCCLELLAN_FLAG_SINGLE) { - mcclellanExec16_i(m, &s, buf + start_off, len - start_off, + mcclellanExec16_i(m, &s, state, buf + start_off, len - start_off, start_off, cb, ctxt, 1, NULL, CALLBACK_OUTPUT); } else { - mcclellanExec16_i(m, &s, buf + start_off, len - start_off, + mcclellanExec16_i(m, &s, state, buf + start_off, len - start_off, start_off, cb, ctxt, 0, NULL, CALLBACK_OUTPUT); } @@ -1178,9 +1277,16 @@ char nfaExecMcClellan8_queueInitState(UNUSED const struct NFA *nfa, char nfaExecMcClellan16_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) { - assert(nfa->scratchStateSize == 2); + const struct mcclellan *m = getImplNfa(nfa); + assert(m->has_wide == 1 ? nfa->scratchStateSize == 4 + : nfa->scratchStateSize == 2); assert(ISALIGNED_N(q->state, 2)); *(u16 *)q->state = 0; + + // new byte + if (m->has_wide) { + unaligned_store_u16((u16 *)q->state + 1, 0); + } return 0; } @@ -1206,21 +1312,39 @@ char nfaExecMcClellan8_expandState(UNUSED const struct NFA *nfa, void *dest, char nfaExecMcClellan16_queueCompressState(UNUSED const struct NFA *nfa, const struct mq *q, UNUSED s64a loc) { + const struct mcclellan *m = getImplNfa(nfa); void *dest = q->streamState; const void *src = q->state; - assert(nfa->scratchStateSize == 2); - assert(nfa->streamStateSize == 2); + assert(m->has_wide == 1 ? nfa->scratchStateSize == 4 + : nfa->scratchStateSize == 2); + assert(m->has_wide == 1 ? nfa->streamStateSize == 4 + : nfa->streamStateSize == 2); + assert(ISALIGNED_N(src, 2)); unaligned_store_u16(dest, *(const u16 *)(src)); + + // new byte + if (m->has_wide) { + unaligned_store_u16((u16 *)dest + 1, *((const u16 *)src + 1)); + } return 0; } char nfaExecMcClellan16_expandState(UNUSED const struct NFA *nfa, void *dest, const void *src, UNUSED u64a offset, UNUSED u8 key) { - assert(nfa->scratchStateSize == 2); - assert(nfa->streamStateSize == 2); + const struct mcclellan *m = getImplNfa(nfa); + assert(m->has_wide == 1 ? nfa->scratchStateSize == 4 + : nfa->scratchStateSize == 2); + assert(m->has_wide == 1 ? nfa->streamStateSize == 4 + : nfa->streamStateSize == 2); + assert(ISALIGNED_N(dest, 2)); *(u16 *)dest = unaligned_load_u16(src); + + // new byte + if (m->has_wide) { + *((u16 *)dest + 1) = unaligned_load_u16((const u16 *)src + 1); + } return 0; } diff --git a/src/nfa/mcclellan_common_impl.h b/src/nfa/mcclellan_common_impl.h index be130715..7b0e7f48 100644 --- a/src/nfa/mcclellan_common_impl.h +++ b/src/nfa/mcclellan_common_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -82,3 +82,108 @@ u32 doSherman16(const char *sherman_state, u8 cprime, const u16 *succ_table, u32 daddy = *(const u16 *)(sherman_state + SHERMAN_DADDY_OFFSET); return succ_table[(daddy << as) + cprime]; } + +static really_inline +u16 doWide16(const char *wide_entry, const u8 **c_inout, const u8 *end, + const u8 *remap, const u16 *s, char *qstate, u16 *offset) { + // Internal relative offset after the last visit of the wide state. + if (qstate != NULL) { // stream mode + *offset = unaligned_load_u16((const u16 *)(qstate + 2)); + } + + u8 successful = 0; + const u8 *c = *c_inout; + u32 len_c = end - c; + + u16 width = *(const u16 *)(wide_entry + WIDE_WIDTH_OFFSET); + assert(width >= 8); + const u8 *symbols = (const u8 *)(wide_entry + WIDE_SYMBOL_OFFSET16); + const u16 *trans = (const u16 *)(wide_entry + + WIDE_TRANSITION_OFFSET16(width)); + + assert(*offset < width); + u16 len_w = width - *offset; + const u8 *sym = symbols + *offset; + + char tmp[16]; + u16 pos = 0; + + if (*offset == 0 && remap[*c] != *sym) { + goto normal; + } + + // both in (16, +oo). + while (len_w >= 16 && len_c >= 16) { + m128 str_w = loadu128(sym); + for (size_t i = 0; i < 16; i++) { + tmp[i] = remap[*(c + i)]; + } + m128 str_c = loadu128(tmp); + + u32 z = movemask128(eq128(str_w, str_c)); + pos = ctz32(~z); + assert(pos <= 16); + + if (pos < 16) { + goto normal; + } + + sym += 16; + c += 16; + len_w -= 16; + len_c -= 16; + } + + pos = 0; + // at least one in (0, 16). + u32 loadLength_w = MIN(len_w, 16); + u32 loadLength_c = MIN(len_c, 16); + m128 str_w = loadbytes128(sym, loadLength_w); + for (size_t i = 0; i < loadLength_c; i++) { + tmp[i] = remap[*(c + i)]; + } + m128 str_c = loadbytes128(tmp, loadLength_c); + + u32 z = movemask128(eq128(str_w, str_c)); + pos = ctz32(~z); + + pos = MIN(pos, MIN(loadLength_w, loadLength_c)); + + if (loadLength_w <= loadLength_c) { + assert(pos <= loadLength_w); + // successful matching. + if (pos == loadLength_w) { + c -= 1; + successful = 1; + } + // failure, do nothing. + } else { + assert(pos <= loadLength_c); + // successful partial matching. + if (pos == loadLength_c) { + c -= 1; + goto partial; + } + // failure, do nothing. + } + +normal: + *offset = 0; + if (qstate != NULL) { + // Internal relative offset. + unaligned_store_u16(qstate + 2, *offset); + } + c += pos; + *c_inout = c; + return successful ? *trans : *(trans + 1 + remap[*c]); + +partial: + *offset = sym - symbols + pos; + if (qstate != NULL) { + // Internal relative offset. + unaligned_store_u16(qstate + 2, *offset); + } + c += pos; + *c_inout = c; + return *s; +} diff --git a/src/nfa/mcclellan_internal.h b/src/nfa/mcclellan_internal.h index 5289b074..482fdb1b 100644 --- a/src/nfa/mcclellan_internal.h +++ b/src/nfa/mcclellan_internal.h @@ -50,6 +50,16 @@ extern "C" #define SHERMAN_CHARS_OFFSET 4 #define SHERMAN_STATES_OFFSET(sso_len) (4 + (sso_len)) +#define WIDE_STATE 2 +#define WIDE_ENTRY_OFFSET8(weo_pos) (2 + (weo_pos)) +#define WIDE_ENTRY_OFFSET16(weo_pos) (4 + (weo_pos)) + +#define WIDE_WIDTH_OFFSET 0 +#define WIDE_SYMBOL_OFFSET8 1 +#define WIDE_TRANSITION_OFFSET8(wto_width) (1 + (wto_width)) +#define WIDE_SYMBOL_OFFSET16 2 +#define WIDE_TRANSITION_OFFSET16(wto_width) (2 + ROUNDUP_N(wto_width, 2)) + struct report_list { u32 count; ReportID report[]; @@ -79,13 +89,17 @@ struct mcclellan { u16 accel_limit_8; /**< 8 bit, lowest accelerable state */ u16 accept_limit_8; /**< 8 bit, lowest accept state */ u16 sherman_limit; /**< lowest sherman state */ + u16 wide_limit; /**< 8/16 bit, lowest wide head state */ u8 alphaShift; u8 flags; u8 has_accel; /**< 1 iff there are any accel plans */ + u8 has_wide; /**< 1 iff there exists any wide state */ u8 remap[256]; /**< remaps characters to a smaller alphabet */ ReportID arb_report; /**< one of the accepts that this dfa may raise */ u32 accel_offset; /**< offset of accel structures from start of McClellan */ u32 haig_offset; /**< reserved for use by Haig, relative to start of NFA */ + u32 wide_offset; /**< offset of the wide state entries to the start of the + * nfa structure */ }; static really_inline @@ -106,6 +120,43 @@ char *findMutableShermanState(char *sherman_base_offset, u16 sherman_base, return sherman_base_offset + SHERMAN_FIXED_SIZE * (s - sherman_base); } +static really_inline +const char *findWideEntry8(UNUSED const struct mcclellan *m, + const char *wide_base, u32 wide_limit, u32 s) { + UNUSED u8 type = *(const u8 *)wide_base; + assert(type == WIDE_STATE); + const u32 entry_offset + = *(const u32 *)(wide_base + + WIDE_ENTRY_OFFSET8((s - wide_limit) * sizeof(u32))); + + const char *rv = wide_base + entry_offset; + assert(rv < (const char *)m + m->length - sizeof(struct NFA)); + return rv; +} + +static really_inline +const char *findWideEntry16(UNUSED const struct mcclellan *m, + const char *wide_base, u32 wide_limit, u32 s) { + UNUSED u8 type = *(const u8 *)wide_base; + assert(type == WIDE_STATE); + const u32 entry_offset + = *(const u32 *)(wide_base + + WIDE_ENTRY_OFFSET16((s - wide_limit) * sizeof(u32))); + + const char *rv = wide_base + entry_offset; + assert(rv < (const char *)m + m->length - sizeof(struct NFA)); + return rv; +} + +static really_inline +char *findMutableWideEntry16(char *wide_base, u32 wide_limit, u32 s) { + u32 entry_offset + = *(const u32 *)(wide_base + + WIDE_ENTRY_OFFSET16((s - wide_limit) * sizeof(u32))); + + return wide_base + entry_offset; +} + #ifdef __cplusplus } #endif diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index 8e3a744c..c1a4f87f 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -56,13 +56,19 @@ #include #include #include +#include #include #include #include +#include "mcclellandump.h" +#include "util/dump_util.h" +#include "util/dump_charclass.h" + using namespace std; using boost::adaptors::map_keys; +using boost::dynamic_bitset; #define ACCEL_DFA_MAX_OFFSET_DEPTH 4 @@ -82,6 +88,8 @@ namespace /* anon */ { struct dstate_extra { u16 daddytaken = 0; bool shermanState = false; + bool wideState = false; + bool wideHead = false; }; struct dfa_info { @@ -89,6 +97,8 @@ struct dfa_info { raw_dfa &raw; vector &states; vector extra; + vector> wide_state_chain; + vector> wide_symbol_chain; const u16 alpha_size; /* including special symbols */ const array &alpha_remap; const u16 impl_alpha_size; @@ -112,6 +122,14 @@ struct dfa_info { return extra[raw_id].shermanState; } + bool is_widestate(dstate_id_t raw_id) const { + return extra[raw_id].wideState; + } + + bool is_widehead(dstate_id_t raw_id) const { + return extra[raw_id].wideHead; + } + size_t size(void) const { return states.size(); } }; @@ -124,6 +142,35 @@ u8 dfa_info::getAlphaShift() const { } } +struct state_prev_info { + vector> prev_vec; + explicit state_prev_info(size_t alpha_size) : prev_vec(alpha_size) {} +}; + +struct DfaPrevInfo { + u16 impl_alpha_size; + u16 state_num; + vector states; + set accepts; + + explicit DfaPrevInfo(raw_dfa &rdfa); +}; + +DfaPrevInfo::DfaPrevInfo(raw_dfa &rdfa) + : impl_alpha_size(rdfa.getImplAlphaSize()), state_num(rdfa.states.size()), + states(state_num, state_prev_info(impl_alpha_size)){ + for (size_t i = 0; i < states.size(); i++) { + for (symbol_t sym = 0; sym < impl_alpha_size; sym++) { + dstate_id_t curr = rdfa.states[i].next[sym]; + states[curr].prev_vec[sym].push_back(i); + } + if (!rdfa.states[i].reports.empty() + || !rdfa.states[i].reports_eod.empty()) { + DEBUG_PRINTF("accept raw state: %ld\n", i); + accepts.insert(i); + } + } +} } // namespace static @@ -151,6 +198,11 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) { for (size_t j = 0; j < alphaSize; j++) { size_t c_prime = (i << alphaShift) + j; + // wide state has no aux structure. + if (m->has_wide && succ_table[c_prime] >= m->wide_limit) { + continue; + } + mstate_aux *aux = getAux(n, succ_table[c_prime]); if (aux->accept) { @@ -165,7 +217,8 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) { /* handle the sherman states */ char *sherman_base_offset = (char *)n + m->sherman_offset; - for (u16 j = m->sherman_limit; j < m->state_count; j++) { + u16 sherman_ceil = m->has_wide == 1 ? m->wide_limit : m->state_count; + for (u16 j = m->sherman_limit; j < sherman_ceil; j++) { char *sherman_cur = findMutableShermanState(sherman_base_offset, m->sherman_limit, j); assert(*(sherman_cur + SHERMAN_TYPE_OFFSET) == SHERMAN_STATE); @@ -174,6 +227,11 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) { for (u8 i = 0; i < len; i++) { u16 succ_i = unaligned_load_u16((u8 *)&succs[i]); + // wide state has no aux structure. + if (m->has_wide && succ_i >= m->wide_limit) { + continue; + } + mstate_aux *aux = getAux(n, succ_i); if (aux->accept) { @@ -187,6 +245,51 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) { unaligned_store_u16((u8 *)&succs[i], succ_i); } } + + /* handle the wide states */ + if (m->has_wide) { + u32 wide_limit = m->wide_limit; + char *wide_base = (char *)n + m->wide_offset; + assert(*wide_base == WIDE_STATE); + u16 wide_number = verify_u16(info.wide_symbol_chain.size()); + // traverse over wide head states. + for (u16 j = wide_limit; j < wide_limit + wide_number; j++) { + char *wide_cur + = findMutableWideEntry16(wide_base, wide_limit, j); + u16 width = *(const u16 *)(wide_cur + WIDE_WIDTH_OFFSET); + u16 *trans = (u16 *)(wide_cur + WIDE_TRANSITION_OFFSET16(width)); + + // check successful transition + u16 next = unaligned_load_u16((u8 *)trans); + if (next < wide_limit) { + mstate_aux *aux = getAux(n, next); + if (aux->accept) { + next |= ACCEPT_FLAG; + } + if (aux->accel_offset) { + next |= ACCEL_FLAG; + } + unaligned_store_u16((u8 *)trans, next); + } + trans++; + + // check failure transition + for (symbol_t k = 0; k < alphaSize; k++) { + u16 next_k = unaligned_load_u16((u8 *)&trans[k]); + if (next_k >= wide_limit) { + continue; + } + mstate_aux *aux_k = getAux(n, next_k); + if (aux_k->accept) { + next_k |= ACCEPT_FLAG; + } + if (aux_k->accel_offset) { + next_k |= ACCEL_FLAG; + } + unaligned_store_u16((u8 *)&trans[k], next_k); + } + } + } } u32 mcclellan_build_strat::max_allowed_offset_accel() const { @@ -232,6 +335,19 @@ void populateBasicInfo(size_t state_size, const dfa_info &info, m->start_anchored = info.implId(info.raw.start_anchored); m->start_floating = info.implId(info.raw.start_floating); m->has_accel = accel_count ? 1 : 0; + m->has_wide = info.wide_state_chain.size() > 0 ? 1 : 0; + + if (state_size == sizeof(u8) && m->has_wide == 1) { + // allocate 1 more byte for wide state use. + nfa->scratchStateSize += sizeof(u8); + nfa->streamStateSize += sizeof(u8); + } + + if (state_size == sizeof(u16) && m->has_wide == 1) { + // allocate 2 more bytes for wide state use. + nfa->scratchStateSize += sizeof(u16); + nfa->streamStateSize += sizeof(u16); + } if (single) { m->flags |= MCCLELLAN_FLAG_SINGLE; @@ -404,6 +520,24 @@ size_t calcShermanRegionSize(const dfa_info &info) { return ROUNDUP_16(rv); } +static +size_t calcWideRegionSize(const dfa_info &info) { + if (info.wide_state_chain.empty()) { + return 0; + } + + // wide info header + size_t rv = info.wide_symbol_chain.size() * sizeof(u32) + 4; + + // wide info body + for (const auto &chain : info.wide_symbol_chain) { + rv += ROUNDUP_N(chain.size(), 2) + + (info.impl_alpha_size + 1) * sizeof(u16) + 2; + } + + return ROUNDUP_16(rv); +} + static void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info, const vector &reports, const vector &reports_eod, @@ -418,42 +552,60 @@ void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info, /* returns false on error */ static -bool allocateFSN16(dfa_info &info, dstate_id_t *sherman_base) { +bool allocateFSN16(dfa_info &info, dstate_id_t *sherman_base, + dstate_id_t *wide_limit) { info.states[0].impl_id = 0; /* dead is always 0 */ vector norm; vector sherm; + vector wideHead; + vector wideState; if (info.size() > (1 << 16)) { DEBUG_PRINTF("too many states\n"); - *sherman_base = 0; + *wide_limit = 0; return false; } for (u32 i = 1; i < info.size(); i++) { - if (info.is_sherman(i)) { + if (info.is_widehead(i)) { + wideHead.push_back(i); + } else if (info.is_widestate(i)) { + wideState.push_back(i); + } else if (info.is_sherman(i)) { sherm.push_back(i); } else { norm.push_back(i); } } - dstate_id_t next_norm = 1; + dstate_id_t next = 1; for (const dstate_id_t &s : norm) { - info.states[s].impl_id = next_norm++; + DEBUG_PRINTF("[norm] mapping state %u to %u\n", s, next); + info.states[s].impl_id = next++; } - *sherman_base = next_norm; - dstate_id_t next_sherman = next_norm; - + *sherman_base = next; for (const dstate_id_t &s : sherm) { - info.states[s].impl_id = next_sherman++; + DEBUG_PRINTF("[sherm] mapping state %u to %u\n", s, next); + info.states[s].impl_id = next++; + } + + *wide_limit = next; + for (const dstate_id_t &s : wideHead) { + DEBUG_PRINTF("[widehead] mapping state %u to %u\n", s, next); + info.states[s].impl_id = next++; + } + + for (const dstate_id_t &s : wideState) { + DEBUG_PRINTF("[wide] mapping state %u to %u\n", s, next); + info.states[s].impl_id = next++; } /* Check to see if we haven't over allocated our states */ - DEBUG_PRINTF("next sherman %u masked %u\n", next_sherman, - (dstate_id_t)(next_sherman & STATE_MASK)); - return (next_sherman - 1) == ((next_sherman - 1) & STATE_MASK); + DEBUG_PRINTF("next sherman %u masked %u\n", next, + (dstate_id_t)(next & STATE_MASK)); + return (next - 1) == ((next - 1) & STATE_MASK); } static @@ -470,12 +622,16 @@ bytecode_ptr mcclellanCompile16(dfa_info &info, const CompileContext &cc, assert(alphaShift <= 8); u16 count_real_states; - if (!allocateFSN16(info, &count_real_states)) { + u16 wide_limit; + if (!allocateFSN16(info, &count_real_states, &wide_limit)) { DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n", info.size()); return nullptr; } + DEBUG_PRINTF("count_real_states: %d\n", count_real_states); + DEBUG_PRINTF("non_wide_states: %d\n", wide_limit); + auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); map accel_escape_info = info.strat.getAccelInfo(cc.grey); @@ -483,7 +639,7 @@ bytecode_ptr mcclellanCompile16(dfa_info &info, const CompileContext &cc, size_t tran_size = (1 << info.getAlphaShift()) * sizeof(u16) * count_real_states; - size_t aux_size = sizeof(mstate_aux) * info.size(); + size_t aux_size = sizeof(mstate_aux) * wide_limit; size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcclellan) + tran_size); size_t accel_size = info.strat.accelSize() * accel_escape_info.size(); @@ -491,12 +647,24 @@ bytecode_ptr mcclellanCompile16(dfa_info &info, const CompileContext &cc, + ri->getReportListSize(), 32); size_t sherman_offset = ROUNDUP_16(accel_offset + accel_size); size_t sherman_size = calcShermanRegionSize(info); - - size_t total_size = sherman_offset + sherman_size; + size_t wide_offset = ROUNDUP_16(sherman_offset + sherman_size); + size_t wide_size = calcWideRegionSize(info); + size_t total_size = wide_offset + wide_size; accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */ assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); + DEBUG_PRINTF("aux_offset %zu\n", aux_offset); + DEBUG_PRINTF("aux_size %zu\n", aux_size); + DEBUG_PRINTF("rl size %u\n", ri->getReportListSize()); + DEBUG_PRINTF("accel_offset %zu\n", accel_offset + sizeof(NFA)); + DEBUG_PRINTF("accel_size %zu\n", accel_size); + DEBUG_PRINTF("sherman_offset %zu\n", sherman_offset); + DEBUG_PRINTF("sherman_size %zu\n", sherman_size); + DEBUG_PRINTF("wide_offset %zu\n", wide_offset); + DEBUG_PRINTF("wide_size %zu\n", wide_size); + DEBUG_PRINTF("total_size %zu\n", total_size); + auto nfa = make_zeroed_bytecode_ptr(total_size); char *nfa_base = (char *)nfa.get(); @@ -511,6 +679,9 @@ bytecode_ptr mcclellanCompile16(dfa_info &info, const CompileContext &cc, mstate_aux *aux = (mstate_aux *)(nfa_base + aux_offset); mcclellan *m = (mcclellan *)getMutableImplNfa(nfa.get()); + m->wide_limit = wide_limit; + m->wide_offset = wide_offset; + /* copy in the mc header information */ m->sherman_offset = sherman_offset; m->sherman_end = total_size; @@ -518,7 +689,7 @@ bytecode_ptr mcclellanCompile16(dfa_info &info, const CompileContext &cc, /* do normal states */ for (size_t i = 0; i < info.size(); i++) { - if (info.is_sherman(i)) { + if (info.is_sherman(i) || info.is_widestate(i)) { continue; } @@ -556,6 +727,7 @@ bytecode_ptr mcclellanCompile16(dfa_info &info, const CompileContext &cc, mstate_aux *this_aux = getAux(nfa.get(), fs); assert(fs >= count_real_states); + assert(fs < wide_limit); char *curr_sherman_entry = sherman_table + (fs - m->sherman_limit) * SHERMAN_FIXED_SIZE; @@ -599,6 +771,71 @@ bytecode_ptr mcclellanCompile16(dfa_info &info, const CompileContext &cc, } } + if (!info.wide_state_chain.empty()) { + /* do wide states using info */ + u16 wide_number = verify_u16(info.wide_symbol_chain.size()); + char *wide_base = nfa_base + m->wide_offset; + assert(ISALIGNED_16(wide_base)); + + char *wide_top = wide_base; + *(u8 *)(wide_top++) = WIDE_STATE; + wide_top = ROUNDUP_PTR(wide_top, 2); + *(u16 *)(wide_top) = wide_number; + wide_top += 2; + + char *curr_wide_entry = wide_top + wide_number * sizeof(u32); + u32 *wide_offset_list = (u32 *)wide_top; + + /* get the order of writing wide states */ + vector order(wide_number); + for (size_t i = 0; i < wide_number; i++) { + dstate_id_t head = info.wide_state_chain[i].front(); + size_t pos = info.implId(head) - m->wide_limit; + order[pos] = i; + } + + for (size_t i : order) { + vector &state_chain = info.wide_state_chain[i]; + vector &symbol_chain = info.wide_symbol_chain[i]; + + u16 width = verify_u16(symbol_chain.size()); + *(u16 *)(curr_wide_entry + WIDE_WIDTH_OFFSET) = width; + u8 *chars = (u8 *)(curr_wide_entry + WIDE_SYMBOL_OFFSET16); + + // store wide state symbol chain + for (size_t j = 0; j < width; j++) { + *(chars++) = verify_u8(symbol_chain[j]); + } + + // store wide state transition table + u16 *trans = (u16 *)(curr_wide_entry + + WIDE_TRANSITION_OFFSET16(width)); + dstate_id_t tail = state_chain[width - 1]; + symbol_t last = symbol_chain[width -1]; + dstate_id_t tran = info.states[tail].next[last]; + // 1. successful transition + *trans++ = info.implId(tran); + // 2. failure transition + for (size_t j = 0; verify_u16(j) < width - 1; j++) { + if (symbol_chain[j] != last) { + tran = info.states[state_chain[j]].next[last]; + } + } + for (symbol_t sym = 0; sym < info.impl_alpha_size; sym++) { + if (sym != last) { + *trans++ = info.implId(info.states[tail].next[sym]); + } + else { + *trans++ = info.implId(tran); + } + } + + *wide_offset_list++ = verify_u32(curr_wide_entry - wide_base); + + curr_wide_entry = (char *)trans; + } + } + markEdges(nfa.get(), succ_table, info); if (accel_states && nfa) { @@ -844,12 +1081,16 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit, if (trust_daddy_states) { // Use the daddy already set for this state so long as it isn't already // a Sherman state. - if (!info.is_sherman(currState.daddy)) { + dstate_id_t daddy = currState.daddy; + if (!info.is_sherman(daddy) && !info.is_widestate(daddy)) { hinted.insert(currState.daddy); } else { // Fall back to granddaddy, which has already been processed (due // to BFS ordering) and cannot be a Sherman state. dstate_id_t granddaddy = info.states[currState.daddy].daddy; + if (info.is_widestate(granddaddy)) { + return; + } assert(!info.is_sherman(granddaddy)); hinted.insert(granddaddy); } @@ -861,7 +1102,7 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit, assert(donor < curr_id); u32 score = 0; - if (info.is_sherman(donor)) { + if (info.is_sherman(donor) || info.is_widestate(donor)) { continue; } @@ -934,6 +1175,290 @@ bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) { return false; } +/* \brief Test for only-one-predecessor property. */ +static +bool check_property1(const DfaPrevInfo &info, const u16 impl_alpha_size, + const dstate_id_t curr_id, dstate_id_t &prev_id, + symbol_t &prev_sym) { + u32 num_prev = 0; + bool test_p1 = false; + + for (symbol_t sym = 0; sym < impl_alpha_size; sym++) { + num_prev += info.states[curr_id].prev_vec[sym].size(); + DEBUG_PRINTF("Check symbol: %u, with its vector size: %lu\n", sym, + info.states[curr_id].prev_vec[sym].size()); + if (num_prev == 1 && !test_p1) { + test_p1 = true; + prev_id = info.states[curr_id].prev_vec[sym].front(); //[0] for sure??? + prev_sym = sym; + } + } + + return num_prev == 1; +} + +/* \brief Test for same-failure-action property. */ +static +bool check_property2(const raw_dfa &rdfa, const u16 impl_alpha_size, + const dstate_id_t curr_id, const dstate_id_t prev_id, + const symbol_t curr_sym, const symbol_t prev_sym) { + const dstate &prevState = rdfa.states[prev_id]; + const dstate &currState = rdfa.states[curr_id]; + + // Compare transition tables between currState and prevState. + u16 score = 0; + for (symbol_t sym = 0; sym < impl_alpha_size; sym++) { + if (currState.next[sym] == prevState.next[sym] + && sym != curr_sym && sym != prev_sym) { + score++; + } + } + DEBUG_PRINTF("(Score: %u/%u)\n", score, impl_alpha_size); + + // 2 cases. + if (curr_sym != prev_sym && score >= impl_alpha_size - 2 + && currState.next[prev_sym] == prevState.next[curr_sym]) { + return true; + } else if (curr_sym == prev_sym && score == impl_alpha_size - 1) { + return true; + } + return false; +} + +/* \brief Check whether adding current prev_id will generate a circle.*/ +static +bool check_circle(const DfaPrevInfo &info, const u16 impl_alpha_size, + const vector &chain, const dstate_id_t id) { + const vector> &prev_vec = info.states[id].prev_vec; + const dstate_id_t tail = chain.front(); + for (symbol_t sym = 0; sym < impl_alpha_size; sym++) { + auto iter = find(prev_vec[sym].begin(), prev_vec[sym].end(), tail); + if (iter != prev_vec[sym].end()) { + // Tail is one of id's predecessors, forming a circle. + return true; + } + } + return false; +} + +/* \brief Returns a chain of state ids and symbols. */ +static +dstate_id_t find_chain_candidate(const raw_dfa &rdfa, const DfaPrevInfo &info, + const dstate_id_t curr_id, + const symbol_t curr_sym, + vector &temp_chain) { + //Record current id first. + temp_chain.push_back(curr_id); + + const u16 size = info.impl_alpha_size; + + // Stop when entering root cloud. + if (rdfa.start_anchored != DEAD_STATE + && is_cyclic_near(rdfa, rdfa.start_anchored) + && curr_id < size) { + return curr_id; + } + if (rdfa.start_floating != DEAD_STATE + && curr_id >= rdfa.start_floating + && curr_id < rdfa.start_floating + size * 3) { + return curr_id; + } + + // Stop when reaching anchored or floating. + if (curr_id == rdfa.start_anchored || curr_id == rdfa.start_floating) { + return curr_id; + } + + dstate_id_t prev_id = 0; + symbol_t prev_sym = ALPHABET_SIZE; + + // Check the only-one-predecessor property. + if (!check_property1(info, size, curr_id, prev_id, prev_sym)) { + return curr_id; + } + assert(prev_id != 0 && prev_sym != ALPHABET_SIZE); + DEBUG_PRINTF("(P1 test passed.)\n"); + + // Circle testing for the prev_id that passes the P1 test. + if (check_circle(info, size, temp_chain, prev_id)) { + DEBUG_PRINTF("(A circle is found.)\n"); + return curr_id; + } + + // Check the same-failure-action property. + if (!check_property2(rdfa, size, curr_id, prev_id, curr_sym, prev_sym)) { + return curr_id; + } + DEBUG_PRINTF("(P2 test passed.)\n"); + + if (!rdfa.states[prev_id].reports.empty() + || !rdfa.states[prev_id].reports_eod.empty()) { + return curr_id; + } else { + return find_chain_candidate(rdfa, info, prev_id, prev_sym, temp_chain); + } +} + +/* \brief Always store the non-extensible chains found till now. */ +static +bool store_chain_longest(vector> &candidate_chain, + vector &temp_chain, + dynamic_bitset<> &added, bool head_is_new) { + dstate_id_t head = temp_chain.front(); + u16 length = temp_chain.size(); + + if (head_is_new) { + DEBUG_PRINTF("This is a new chain!\n"); + + // Add this new chain and get it marked. + candidate_chain.push_back(temp_chain); + + for (auto &id : temp_chain) { + DEBUG_PRINTF("(Marking s%u ...)\n", id); + added.set(id); + } + + return true; + } + + DEBUG_PRINTF("This is a longer chain!\n"); + assert(!candidate_chain.empty()); + + auto chain = find_if(candidate_chain.begin(), candidate_chain.end(), + [&](const vector &it) { + return it.front() == head; + }); + + // Not a valid head, just do nothing and return. + if (chain == candidate_chain.end()) { + return false; + } + + u16 len = chain->size(); + + if (length > len) { + // Find out the branch node first. + size_t piv = 0; + for (; piv < length; piv++) { + if ((*chain)[piv] != temp_chain[piv]) { + break; + } + } + + for (size_t j = piv + 1; j < length; j++) { + DEBUG_PRINTF("(Marking s%u (new branch) ...)\n", temp_chain[j]); + added.set(temp_chain[j]); + } + + // Unmark old unuseful nodes. + // (Except the tail node, which is in working queue) + for (size_t j = piv + 1; j < verify_u16(len - 1); j++) { + DEBUG_PRINTF("(UnMarking s%u (old branch)...)\n", (*chain)[j]); + added.reset((*chain)[j]); + } + + chain->assign(temp_chain.begin(), temp_chain.end()); + } + + return false; +} + +/* \brief Generate wide_symbol_chain from wide_state_chain. */ +static +void generate_symbol_chain(dfa_info &info, vector &chain_tail) { + raw_dfa &rdfa = info.raw; + assert(chain_tail.size() == info.wide_state_chain.size()); + + for (size_t i = 0; i < info.wide_state_chain.size(); i++) { + vector &state_chain = info.wide_state_chain[i]; + vector symbol_chain; + + info.extra[state_chain[0]].wideHead = true; + size_t width = state_chain.size() - 1; + + for (size_t j = 0; j < width; j++) { + dstate_id_t curr_id = state_chain[j]; + dstate_id_t next_id = state_chain[j + 1]; + + // The last state of the chain doesn't belong to a wide state. + info.extra[curr_id].wideState = true; + + // The tail symbol comes from vector chain_tail; + if (j == width - 1) { + symbol_chain.push_back(chain_tail[i]); + } else { + for (symbol_t sym = 0; sym < info.impl_alpha_size; sym++) { + if (rdfa.states[curr_id].next[sym] == next_id) { + symbol_chain.push_back(sym); + break; + } + } + } + } + + info.wide_symbol_chain.push_back(symbol_chain); + } +} + +/* \brief Find potential regions of states to be packed into wide states. */ +static +void find_wide_state(dfa_info &info) { + DfaPrevInfo dinfo(info.raw); + queue work_queue; + + dynamic_bitset<> added(info.raw.states.size()); + for (auto it : dinfo.accepts) { + work_queue.push(it); + added.set(it); + } + + vector chain_tail; + while (!work_queue.empty()) { + dstate_id_t curr_id = work_queue.front(); + work_queue.pop(); + DEBUG_PRINTF("Newly popped state: s%u\n", curr_id); + + for (symbol_t sym = 0; sym < dinfo.impl_alpha_size; sym++) { + for (auto info_it : dinfo.states[curr_id].prev_vec[sym]) { + if (added.test(info_it)) { + DEBUG_PRINTF("(s%u already marked.)\n", info_it); + continue; + } + + vector temp_chain; + // Head is a state failing the test of the chain. + dstate_id_t head = find_chain_candidate(info.raw, dinfo, + info_it, sym, + temp_chain); + + // A candidate chain should contain 8 substates at least. + if (temp_chain.size() < 8) { + DEBUG_PRINTF("(Not enough substates, continue.)\n"); + continue; + } + + bool head_is_new = !added.test(head); + if (head_is_new) { + added.set(head); + work_queue.push(head); + DEBUG_PRINTF("Newly pushed state: s%u\n", head); + } + + reverse(temp_chain.begin(), temp_chain.end()); + temp_chain.push_back(curr_id); + + assert(head > 0 && head == temp_chain.front()); + if (store_chain_longest(info.wide_state_chain, temp_chain, + added, head_is_new)) { + chain_tail.push_back(sym); + } + } + } + } + + generate_symbol_chain(info, chain_tail); +} + bytecode_ptr mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, const CompileContext &cc, bool trust_daddy_states, @@ -952,11 +1477,19 @@ bytecode_ptr mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, bytecode_ptr nfa; if (!using8bit) { + if (cc.grey.allowWideStates && strat.getType() == McClellan + && !is_triggered(raw.kind)) { + find_wide_state(info); + } + u16 total_daddy = 0; bool any_cyclic_near_anchored_state = is_cyclic_near(raw, raw.start_anchored); for (u32 i = 0; i < info.size(); i++) { + if (info.is_widestate(i)) { + continue; + } find_better_daddy(info, i, using8bit, any_cyclic_near_anchored_state, trust_daddy_states, cc.grey); diff --git a/src/nfa/mcclellancompile.h b/src/nfa/mcclellancompile.h index ce63fbbf..73cb9fd7 100644 --- a/src/nfa/mcclellancompile.h +++ b/src/nfa/mcclellancompile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -60,6 +60,7 @@ public: u32 max_allowed_offset_accel() const override; u32 max_stop_char() const override; u32 max_floating_stop_char() const override; + DfaType getType() const override { return McClellan; } private: raw_dfa &rdfa; diff --git a/src/nfa/mcclellandump.cpp b/src/nfa/mcclellandump.cpp index a13795fd..92090bc5 100644 --- a/src/nfa/mcclellandump.cpp +++ b/src/nfa/mcclellandump.cpp @@ -275,7 +275,8 @@ void nfaExecMcClellan16_dumpDot(const NFA *nfa, FILE *f) { dumpDotPreambleDfa(f); - for (u16 i = 1; i < m->state_count; i++) { + u16 sherman_ceil = m->has_wide == 1 ? m->wide_limit : m->state_count; + for (u16 i = 1; i < sherman_ceil; i++) { describeNode(nfa, m, i, f); u16 t[ALPHABET_SIZE]; @@ -314,7 +315,8 @@ void dumpAccelMasks(FILE *f, const mcclellan *m, const mstate_aux *aux) { fprintf(f, "Acceleration\n"); fprintf(f, "------------\n"); - for (u16 i = 0; i < m->state_count; i++) { + u16 sherman_ceil = m->has_wide == 1 ? m->wide_limit : m->state_count; + for (u16 i = 0; i < sherman_ceil; i++) { if (!aux[i].accel_offset) { continue; } @@ -360,7 +362,8 @@ void dumpCommonHeader(FILE *f, const mcclellan *m) { static void dumpTransitions(FILE *f, const NFA *nfa, const mcclellan *m, const mstate_aux *aux) { - for (u16 i = 0; i < m->state_count; i++) { + u16 sherman_ceil = m->has_wide == 1 ? m->wide_limit : m->state_count; + for (u16 i = 0; i < sherman_ceil; i++) { fprintf(f, "%05hu", i); if (aux[i].accel_offset) { dumpAccelText(f, (const union AccelAux *)((const char *)m + diff --git a/src/nfa/shengcompile.h b/src/nfa/shengcompile.h index 2fe1e356..d795b362 100644 --- a/src/nfa/shengcompile.h +++ b/src/nfa/shengcompile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2017, Intel Corporation + * Copyright (c) 2016-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -61,6 +61,7 @@ public: u32 max_allowed_offset_accel() const override; u32 max_stop_char() const override; u32 max_floating_stop_char() const override; + DfaType getType() const override { return Sheng; } private: raw_dfa &rdfa; diff --git a/src/parser/shortcut_literal.cpp b/src/parser/shortcut_literal.cpp index 7a7ab6ee..d08bab3c 100644 --- a/src/parser/shortcut_literal.cpp +++ b/src/parser/shortcut_literal.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -185,6 +185,7 @@ bool shortcutLiteral(NG &ng, const ParsedExpression &pe) { return false; } + vis.lit.set_pure(); const ue2_literal &lit = vis.lit; if (lit.empty()) { diff --git a/src/rose/match.c b/src/rose/match.c index 97e93c93..192b4709 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -238,7 +238,11 @@ hwlmcb_rv_t roseProcessMatchInline(const struct RoseEngine *t, assert(id && id < t->size); // id is an offset into bytecode const u64a som = 0; const u8 flags = 0; - return roseRunProgram_i(t, scratch, id, som, end, flags); + if (!scratch->pure) { + return roseRunProgram(t, scratch, id, som, end, flags); + } else { + return roseRunProgram_l(t, scratch, id, som, end, flags); + } } static rose_inline diff --git a/src/rose/program_runtime.c b/src/rose/program_runtime.c index 2f2a6aa3..5a7f786e 100644 --- a/src/rose/program_runtime.c +++ b/src/rose/program_runtime.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,6 +33,1464 @@ #include "program_runtime.h" +#include "catchup.h" +#include "counting_miracle.h" +#include "infix.h" +#include "match.h" +#include "miracle.h" +#include "report.h" +#include "rose_common.h" +#include "rose_internal.h" +#include "rose_program.h" +#include "rose_types.h" +#include "validate_mask.h" +#include "validate_shufti.h" +#include "runtime.h" +#include "util/compare.h" +#include "util/copybytes.h" +#include "util/fatbit.h" +#include "util/multibit.h" + +/* Inline implementation follows. */ + +static rose_inline +void rosePushDelayedMatch(const struct RoseEngine *t, + struct hs_scratch *scratch, u32 delay, + u32 delay_index, u64a offset) { + assert(delay); + + const u32 src_slot_index = delay; + u32 slot_index = (src_slot_index + offset) & DELAY_MASK; + + struct RoseContext *tctxt = &scratch->tctxt; + if (offset + src_slot_index <= tctxt->delayLastEndOffset) { + DEBUG_PRINTF("skip too late\n"); + return; + } + + const u32 delay_count = t->delay_count; + struct fatbit **delaySlots = getDelaySlots(scratch); + struct fatbit *slot = delaySlots[slot_index]; + + DEBUG_PRINTF("pushing tab %u into slot %u\n", delay_index, slot_index); + if (!(tctxt->filledDelayedSlots & (1U << slot_index))) { + tctxt->filledDelayedSlots |= 1U << slot_index; + fatbit_clear(slot); + } + + fatbit_set(slot, delay_count, delay_index); +} + +static rose_inline +void recordAnchoredLiteralMatch(const struct RoseEngine *t, + struct hs_scratch *scratch, u32 anch_id, + u64a end) { + assert(end); + + if (end <= t->floatingMinLiteralMatchOffset) { + return; + } + + struct fatbit **anchoredLiteralRows = getAnchoredLiteralLog(scratch); + + DEBUG_PRINTF("record %u (of %u) @ %llu\n", anch_id, t->anchored_count, end); + + if (!bf64_set(&scratch->al_log_sum, end - 1)) { + // first time, clear row + DEBUG_PRINTF("clearing %llu/%u\n", end - 1, t->anchored_count); + fatbit_clear(anchoredLiteralRows[end - 1]); + } + + assert(anch_id < t->anchored_count); + fatbit_set(anchoredLiteralRows[end - 1], t->anchored_count, anch_id); +} + +static rose_inline +char roseLeftfixCheckMiracles(const struct RoseEngine *t, + const struct LeftNfaInfo *left, + struct core_info *ci, struct mq *q, u64a end, + const char is_infix) { + if (!is_infix && left->transient) { + // Miracles won't help us with transient leftfix engines; they only + // scan for a limited time anyway. + return 1; + } + + if (!left->stopTable) { + return 1; + } + + DEBUG_PRINTF("looking for miracle on queue %u\n", q->nfa->queueIndex); + + const s64a begin_loc = q_cur_loc(q); + const s64a end_loc = end - ci->buf_offset; + + s64a miracle_loc; + if (roseMiracleOccurs(t, left, ci, begin_loc, end_loc, &miracle_loc)) { + goto found_miracle; + } + + if (roseCountingMiracleOccurs(t, left, ci, begin_loc, end_loc, + &miracle_loc)) { + goto found_miracle; + } + + return 1; + +found_miracle: + DEBUG_PRINTF("miracle at %lld\n", miracle_loc); + assert(miracle_loc >= begin_loc); + + // If we're a prefix, then a miracle effectively results in us needing to + // re-init our state and start fresh. + if (!is_infix) { + if (miracle_loc != begin_loc) { + DEBUG_PRINTF("re-init prefix state\n"); + q->cur = q->end = 0; + pushQueueAt(q, 0, MQE_START, miracle_loc); + pushQueueAt(q, 1, MQE_TOP, miracle_loc); + nfaQueueInitState(q->nfa, q); + } + return 1; + } + + // Otherwise, we're an infix. Remove tops before the miracle from the queue + // and re-init at that location. + + q_skip_forward_to(q, miracle_loc); + + if (q_last_type(q) == MQE_START) { + DEBUG_PRINTF("miracle caused infix to die\n"); + return 0; + } + + DEBUG_PRINTF("re-init infix state\n"); + assert(q->items[q->cur].type == MQE_START); + q->items[q->cur].location = miracle_loc; + nfaQueueInitState(q->nfa, q); + + return 1; +} + +static rose_inline +hwlmcb_rv_t roseTriggerSuffix(const struct RoseEngine *t, + struct hs_scratch *scratch, u32 qi, u32 top, + u64a som, u64a end) { + DEBUG_PRINTF("suffix qi=%u, top event=%u\n", qi, top); + + struct core_info *ci = &scratch->core_info; + u8 *aa = getActiveLeafArray(t, ci->state); + const u32 aaCount = t->activeArrayCount; + const u32 qCount = t->queueCount; + struct mq *q = &scratch->queues[qi]; + const struct NfaInfo *info = getNfaInfoByQueue(t, qi); + const struct NFA *nfa = getNfaByInfo(t, info); + + s64a loc = (s64a)end - ci->buf_offset; + assert(loc <= (s64a)ci->len && loc >= -(s64a)ci->hlen); + + if (!mmbit_set(aa, aaCount, qi)) { + initQueue(q, qi, t, scratch); + nfaQueueInitState(nfa, q); + pushQueueAt(q, 0, MQE_START, loc); + fatbit_set(scratch->aqa, qCount, qi); + } else if (info->no_retrigger) { + DEBUG_PRINTF("yawn\n"); + /* nfa only needs one top; we can go home now */ + return HWLM_CONTINUE_MATCHING; + } else if (!fatbit_set(scratch->aqa, qCount, qi)) { + initQueue(q, qi, t, scratch); + loadStreamState(nfa, q, 0); + pushQueueAt(q, 0, MQE_START, 0); + } else if (isQueueFull(q)) { + DEBUG_PRINTF("queue %u full -> catching up nfas\n", qi); + if (info->eod) { + /* can catch up suffix independently no pq */ + q->context = NULL; + pushQueueNoMerge(q, MQE_END, loc); + nfaQueueExecRose(q->nfa, q, MO_INVALID_IDX); + q->cur = q->end = 0; + pushQueueAt(q, 0, MQE_START, loc); + } else if (ensureQueueFlushed(t, scratch, qi, loc) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + + assert(top == MQE_TOP || (top >= MQE_TOP_FIRST && top < MQE_INVALID)); + pushQueueSom(q, top, loc, som); + + if (q_cur_loc(q) == (s64a)ci->len && !info->eod) { + /* we may not run the nfa; need to ensure state is fine */ + DEBUG_PRINTF("empty run\n"); + pushQueueNoMerge(q, MQE_END, loc); + char alive = nfaQueueExec(nfa, q, loc); + if (alive) { + q->cur = q->end = 0; + pushQueueAt(q, 0, MQE_START, loc); + } else { + mmbit_unset(aa, aaCount, qi); + fatbit_unset(scratch->aqa, qCount, qi); + } + } + + return HWLM_CONTINUE_MATCHING; +} + +static really_inline +char roseTestLeftfix(const struct RoseEngine *t, struct hs_scratch *scratch, + u32 qi, u32 leftfixLag, ReportID leftfixReport, u64a end, + const char is_infix) { + struct core_info *ci = &scratch->core_info; + + u32 ri = queueToLeftIndex(t, qi); + const struct LeftNfaInfo *left = getLeftTable(t) + ri; + + DEBUG_PRINTF("testing %s %s %u/%u with lag %u (maxLag=%u)\n", + (left->transient ? "transient" : "active"), + (is_infix ? "infix" : "prefix"), + ri, qi, leftfixLag, left->maxLag); + + assert(leftfixLag <= left->maxLag); + assert(left->infix == is_infix); + assert(!is_infix || !left->transient); // Only prefixes can be transient. + + struct mq *q = scratch->queues + qi; + char *state = scratch->core_info.state; + u8 *activeLeftArray = getActiveLeftArray(t, state); + u32 qCount = t->queueCount; + u32 arCount = t->activeLeftCount; + + if (!mmbit_isset(activeLeftArray, arCount, ri)) { + DEBUG_PRINTF("engine is dead nothing to see here\n"); + return 0; + } + + if (unlikely(end < leftfixLag)) { + assert(0); /* lag is the literal length */ + return 0; + } + + if (nfaSupportsZombie(getNfaByQueue(t, qi)) && ci->buf_offset + && !fatbit_isset(scratch->aqa, qCount, qi) + && isZombie(t, state, left)) { + DEBUG_PRINTF("zombie\n"); + return 1; + } + + if (!fatbit_set(scratch->aqa, qCount, qi)) { + DEBUG_PRINTF("initing q %u\n", qi); + initRoseQueue(t, qi, left, scratch); + if (ci->buf_offset) { // there have been writes before us! + s32 sp; + if (!is_infix && left->transient) { + sp = -(s32)ci->hlen; + } else { + sp = -(s32)loadRoseDelay(t, state, left); + } + + /* transient nfas are always started fresh -> state not maintained + * at stream boundary */ + + pushQueueAt(q, 0, MQE_START, sp); + if (is_infix || (ci->buf_offset + sp > 0 && !left->transient)) { + loadStreamState(q->nfa, q, sp); + } else { + pushQueueAt(q, 1, MQE_TOP, sp); + nfaQueueInitState(q->nfa, q); + } + } else { // first write ever + pushQueueAt(q, 0, MQE_START, 0); + pushQueueAt(q, 1, MQE_TOP, 0); + nfaQueueInitState(q->nfa, q); + } + } + + s64a loc = (s64a)end - ci->buf_offset - leftfixLag; + assert(loc >= q_cur_loc(q) || left->eager); + assert(leftfixReport != MO_INVALID_IDX); + + if (!is_infix && left->transient) { + s64a start_loc = loc - left->transient; + if (q_cur_loc(q) < start_loc) { + q->cur = q->end = 0; + pushQueueAt(q, 0, MQE_START, start_loc); + pushQueueAt(q, 1, MQE_TOP, start_loc); + nfaQueueInitState(q->nfa, q); + } + } + + if (q_cur_loc(q) < loc || q_last_type(q) != MQE_START) { + if (is_infix) { + if (infixTooOld(q, loc)) { + DEBUG_PRINTF("infix %u died of old age\n", ri); + goto nfa_dead; + } + + reduceInfixQueue(q, loc, left->maxQueueLen, q->nfa->maxWidth); + } + + if (!roseLeftfixCheckMiracles(t, left, ci, q, end, is_infix)) { + DEBUG_PRINTF("leftfix %u died due to miracle\n", ri); + goto nfa_dead; + } + +#ifdef DEBUG + debugQueue(q); +#endif + + pushQueueNoMerge(q, MQE_END, loc); + + char rv = nfaQueueExecRose(q->nfa, q, leftfixReport); + if (!rv) { /* nfa is dead */ + DEBUG_PRINTF("leftfix %u died while trying to catch up\n", ri); + goto nfa_dead; + } + + // Queue must have next start loc before we call nfaInAcceptState. + q->cur = q->end = 0; + pushQueueAt(q, 0, MQE_START, loc); + + DEBUG_PRINTF("checking for report %u\n", leftfixReport); + DEBUG_PRINTF("leftfix done %hhd\n", (signed char)rv); + return rv == MO_MATCHES_PENDING; + } else if (q_cur_loc(q) > loc) { + /* an eager leftfix may have already progressed past loc if there is no + * match at loc. */ + assert(left->eager); + return 0; + } else { + assert(q_cur_loc(q) == loc); + DEBUG_PRINTF("checking for report %u\n", leftfixReport); + char rv = nfaInAcceptState(q->nfa, leftfixReport, q); + DEBUG_PRINTF("leftfix done %hhd\n", (signed char)rv); + return rv; + } + +nfa_dead: + mmbit_unset(activeLeftArray, arCount, ri); + scratch->tctxt.groups &= left->squash_mask; + return 0; +} + +static rose_inline +char roseTestPrefix(const struct RoseEngine *t, struct hs_scratch *scratch, + u32 qi, u32 leftfixLag, ReportID leftfixReport, u64a end) { + return roseTestLeftfix(t, scratch, qi, leftfixLag, leftfixReport, end, 0); +} + +static rose_inline +char roseTestInfix(const struct RoseEngine *t, struct hs_scratch *scratch, + u32 qi, u32 leftfixLag, ReportID leftfixReport, u64a end) { + return roseTestLeftfix(t, scratch, qi, leftfixLag, leftfixReport, end, 1); +} + +static rose_inline +void roseTriggerInfix(const struct RoseEngine *t, struct hs_scratch *scratch, + u64a start, u64a end, u32 qi, u32 topEvent, u8 cancel) { + struct core_info *ci = &scratch->core_info; + s64a loc = (s64a)end - ci->buf_offset; + + u32 ri = queueToLeftIndex(t, qi); + assert(topEvent < MQE_INVALID); + + const struct LeftNfaInfo *left = getLeftInfoByQueue(t, qi); + assert(!left->transient); + + DEBUG_PRINTF("rose %u (qi=%u) event %u\n", ri, qi, topEvent); + + struct mq *q = scratch->queues + qi; + const struct NfaInfo *info = getNfaInfoByQueue(t, qi); + + char *state = ci->state; + u8 *activeLeftArray = getActiveLeftArray(t, state); + const u32 arCount = t->activeLeftCount; + char alive = mmbit_set(activeLeftArray, arCount, ri); + + if (alive && info->no_retrigger) { + DEBUG_PRINTF("yawn\n"); + return; + } + + struct fatbit *aqa = scratch->aqa; + const u32 qCount = t->queueCount; + + if (alive && nfaSupportsZombie(getNfaByInfo(t, info)) && ci->buf_offset && + !fatbit_isset(aqa, qCount, qi) && isZombie(t, state, left)) { + DEBUG_PRINTF("yawn - zombie\n"); + return; + } + + if (cancel) { + DEBUG_PRINTF("dominating top: (re)init\n"); + fatbit_set(aqa, qCount, qi); + initRoseQueue(t, qi, left, scratch); + pushQueueAt(q, 0, MQE_START, loc); + nfaQueueInitState(q->nfa, q); + } else if (!fatbit_set(aqa, qCount, qi)) { + DEBUG_PRINTF("initing %u\n", qi); + initRoseQueue(t, qi, left, scratch); + if (alive) { + s32 sp = -(s32)loadRoseDelay(t, state, left); + pushQueueAt(q, 0, MQE_START, sp); + loadStreamState(q->nfa, q, sp); + } else { + pushQueueAt(q, 0, MQE_START, loc); + nfaQueueInitState(q->nfa, q); + } + } else if (!alive) { + q->cur = q->end = 0; + pushQueueAt(q, 0, MQE_START, loc); + nfaQueueInitState(q->nfa, q); + } else if (isQueueFull(q)) { + reduceInfixQueue(q, loc, left->maxQueueLen, q->nfa->maxWidth); + + if (isQueueFull(q)) { + /* still full - reduceInfixQueue did nothing */ + DEBUG_PRINTF("queue %u full (%u items) -> catching up nfa\n", qi, + q->end - q->cur); + pushQueueNoMerge(q, MQE_END, loc); + nfaQueueExecRose(q->nfa, q, MO_INVALID_IDX); + + q->cur = q->end = 0; + pushQueueAt(q, 0, MQE_START, loc); + } + } + + pushQueueSom(q, topEvent, loc, start); +} + +static rose_inline +hwlmcb_rv_t roseReport(const struct RoseEngine *t, struct hs_scratch *scratch, + u64a end, ReportID onmatch, s32 offset_adjust, + u32 ekey) { + DEBUG_PRINTF("firing callback onmatch=%u, end=%llu\n", onmatch, end); + updateLastMatchOffset(&scratch->tctxt, end); + + int cb_rv = roseDeliverReport(end, onmatch, offset_adjust, scratch, ekey); + if (cb_rv == MO_HALT_MATCHING) { + DEBUG_PRINTF("termination requested\n"); + return HWLM_TERMINATE_MATCHING; + } + + if (ekey == INVALID_EKEY || cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { + return HWLM_CONTINUE_MATCHING; + } + + return roseHaltIfExhausted(t, scratch); +} + +/* catches up engines enough to ensure any earlier mpv triggers are enqueued + * and then adds the trigger to the mpv queue. */ +static rose_inline +hwlmcb_rv_t roseCatchUpAndHandleChainMatch(const struct RoseEngine *t, + struct hs_scratch *scratch, + u32 event, u64a top_squash_distance, + u64a end, const char in_catchup) { + if (!in_catchup && + roseCatchUpMpvFeeders(t, scratch, end) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + return roseHandleChainMatch(t, scratch, event, top_squash_distance, end, + in_catchup); +} + +static rose_inline +void roseHandleSom(struct hs_scratch *scratch, const struct som_operation *sr, + u64a end) { + DEBUG_PRINTF("end=%llu, minMatchOffset=%llu\n", end, + scratch->tctxt.minMatchOffset); + + updateLastMatchOffset(&scratch->tctxt, end); + handleSomInternal(scratch, sr, end); +} + +static rose_inline +hwlmcb_rv_t roseReportSom(const struct RoseEngine *t, + struct hs_scratch *scratch, u64a start, u64a end, + ReportID onmatch, s32 offset_adjust, u32 ekey) { + DEBUG_PRINTF("firing som callback onmatch=%u, start=%llu, end=%llu\n", + onmatch, start, end); + updateLastMatchOffset(&scratch->tctxt, end); + + int cb_rv = roseDeliverSomReport(start, end, onmatch, offset_adjust, + scratch, ekey); + if (cb_rv == MO_HALT_MATCHING) { + DEBUG_PRINTF("termination requested\n"); + return HWLM_TERMINATE_MATCHING; + } + + if (ekey == INVALID_EKEY || cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { + return HWLM_CONTINUE_MATCHING; + } + + return roseHaltIfExhausted(t, scratch); +} + +static rose_inline +void roseHandleSomSom(struct hs_scratch *scratch, + const struct som_operation *sr, u64a start, u64a end) { + DEBUG_PRINTF("start=%llu, end=%llu, minMatchOffset=%llu\n", start, end, + scratch->tctxt.minMatchOffset); + + updateLastMatchOffset(&scratch->tctxt, end); + setSomFromSomAware(scratch, sr, start, end); +} + +static rose_inline +hwlmcb_rv_t roseSetExhaust(const struct RoseEngine *t, + struct hs_scratch *scratch, u32 ekey) { + assert(scratch); + assert(scratch->magic == SCRATCH_MAGIC); + + struct core_info *ci = &scratch->core_info; + + assert(!can_stop_matching(scratch)); + assert(!isExhausted(ci->rose, ci->exhaustionVector, ekey)); + + markAsMatched(ci->rose, ci->exhaustionVector, ekey); + + return roseHaltIfExhausted(t, scratch); +} + +static really_inline +int reachHasBit(const u8 *reach, u8 c) { + return !!(reach[c / 8U] & (u8)1U << (c % 8U)); +} + +/* + * Generate a 8-byte valid_mask with #high bytes 0 from the highest side + * and #low bytes 0 from the lowest side + * and (8 - high - low) bytes '0xff' in the middle. + */ +static rose_inline +u64a generateValidMask(const s32 high, const s32 low) { + assert(high + low < 8); + DEBUG_PRINTF("high %d low %d\n", high, low); + const u64a ones = ~0ull; + return (ones << ((high + low) * 8)) >> (high * 8); +} + +/* + * Do the single-byte check if only one lookaround entry exists + * and it's a single mask. + * Return success if the byte is in the future or before history + * (offset is greater than (history) buffer length). + */ +static rose_inline +int roseCheckByte(const struct core_info *ci, u8 and_mask, u8 cmp_mask, + u8 negation, s32 checkOffset, u64a end) { + DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end, + ci->buf_offset, ci->buf_offset + ci->len); + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + + const s64a base_offset = end - ci->buf_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("checkOffset=%d offset=%lld\n", checkOffset, offset); + u8 c; + if (offset >= 0) { + if (offset >= (s64a)ci->len) { + DEBUG_PRINTF("in the future\n"); + return 1; + } else { + assert(offset < (s64a)ci->len); + DEBUG_PRINTF("check byte in buffer\n"); + c = ci->buf[offset]; + } + } else { + if (offset >= -(s64a) ci->hlen) { + DEBUG_PRINTF("check byte in history\n"); + c = ci->hbuf[ci->hlen + offset]; + } else { + DEBUG_PRINTF("before history and return\n"); + return 1; + } + } + + if (((and_mask & c) != cmp_mask) ^ negation) { + DEBUG_PRINTF("char 0x%02x at offset %lld failed byte check\n", + c, offset); + return 0; + } + + DEBUG_PRINTF("real offset=%lld char=%02x\n", offset, c); + DEBUG_PRINTF("OK :)\n"); + return 1; +} + +static rose_inline +int roseCheckMask(const struct core_info *ci, u64a and_mask, u64a cmp_mask, + u64a neg_mask, s32 checkOffset, u64a end) { + const s64a base_offset = (s64a)end - ci->buf_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("rel offset %lld\n",base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + + u64a data = 0; + u64a valid_data_mask = ~0ULL; // mask for validate check. + //A 0xff byte means that this byte is in the buffer. + s32 shift_l = 0; // size of bytes in the future. + s32 shift_r = 0; // size of bytes before the history. + s32 h_len = 0; // size of bytes in the history buffer. + s32 c_len = 8; // size of bytes in the current buffer. + if (offset < 0) { + // in or before history buffer. + if (offset + 8 <= -(s64a)ci->hlen) { + DEBUG_PRINTF("before history and return\n"); + return 1; + } + const u8 *h_start = ci->hbuf; // start pointer in history buffer. + if (offset < -(s64a)ci->hlen) { + // some bytes are before history. + shift_r = -(offset + (s64a)ci->hlen); + DEBUG_PRINTF("shift_r %d", shift_r); + } else { + h_start += ci->hlen + offset; + } + if (offset + 7 < 0) { + DEBUG_PRINTF("all in history buffer\n"); + data = partial_load_u64a(h_start, 8 - shift_r); + } else { + // history part + c_len = offset + 8; + h_len = -offset - shift_r; + DEBUG_PRINTF("%d bytes in history\n", h_len); + s64a data_h = 0; + data_h = partial_load_u64a(h_start, h_len); + // current part + if (c_len > (s64a)ci->len) { + shift_l = c_len - ci->len; + c_len = ci->len; + } + data = partial_load_u64a(ci->buf, c_len); + data <<= h_len << 3; + data |= data_h; + } + if (shift_r) { + data <<= shift_r << 3; + } + } else { + // current buffer. + if (offset + c_len > (s64a)ci->len) { + if (offset >= (s64a)ci->len) { + DEBUG_PRINTF("all in the future\n"); + return 1; + } + // some bytes in the future. + shift_l = offset + c_len - ci->len; + c_len = ci->len - offset; + data = partial_load_u64a(ci->buf + offset, c_len); + } else { + data = unaligned_load_u64a(ci->buf + offset); + } + } + + if (shift_l || shift_r) { + valid_data_mask = generateValidMask(shift_l, shift_r); + } + DEBUG_PRINTF("valid_data_mask %llx\n", valid_data_mask); + + if (validateMask(data, valid_data_mask, + and_mask, cmp_mask, neg_mask)) { + DEBUG_PRINTF("check mask successfully\n"); + return 1; + } else { + return 0; + } +} + +static rose_inline +int roseCheckMask32(const struct core_info *ci, const u8 *and_mask, + const u8 *cmp_mask, const u32 neg_mask, + s32 checkOffset, u64a end) { + const s64a base_offset = (s64a)end - ci->buf_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + + m256 data = zeroes256(); // consists of the following four parts. + s32 c_shift = 0; // blank bytes after current. + s32 h_shift = 0; // blank bytes before history. + s32 h_len = 32; // number of bytes from history buffer. + s32 c_len = 0; // number of bytes from current buffer. + /* h_shift + h_len + c_len + c_shift = 32 need to be hold.*/ + + if (offset < 0) { + s32 h_offset = 0; // the start offset in history buffer. + if (offset < -(s64a)ci->hlen) { + if (offset + 32 <= -(s64a)ci->hlen) { + DEBUG_PRINTF("all before history\n"); + return 1; + } + h_shift = -(offset + (s64a)ci->hlen); + h_len = 32 - h_shift; + } else { + h_offset = ci->hlen + offset; + } + if (offset + 32 > 0) { + // part in current buffer. + c_len = offset + 32; + h_len = -(offset + h_shift); + if (c_len > (s64a)ci->len) { + // out of current buffer. + c_shift = c_len - ci->len; + c_len = ci->len; + } + copy_upto_32_bytes((u8 *)&data - offset, ci->buf, c_len); + } + assert(h_shift + h_len + c_len + c_shift == 32); + copy_upto_32_bytes((u8 *)&data + h_shift, ci->hbuf + h_offset, h_len); + } else { + if (offset + 32 > (s64a)ci->len) { + if (offset >= (s64a)ci->len) { + DEBUG_PRINTF("all in the future.\n"); + return 1; + } + c_len = ci->len - offset; + c_shift = 32 - c_len; + copy_upto_32_bytes((u8 *)&data, ci->buf + offset, c_len); + } else { + data = loadu256(ci->buf + offset); + } + } + DEBUG_PRINTF("h_shift %d c_shift %d\n", h_shift, c_shift); + DEBUG_PRINTF("h_len %d c_len %d\n", h_len, c_len); + // we use valid_data_mask to blind bytes before history/in the future. + u32 valid_data_mask; + valid_data_mask = (~0u) << (h_shift + c_shift) >> (c_shift); + + m256 and_mask_m256 = loadu256(and_mask); + m256 cmp_mask_m256 = loadu256(cmp_mask); + if (validateMask32(data, valid_data_mask, and_mask_m256, + cmp_mask_m256, neg_mask)) { + DEBUG_PRINTF("Mask32 passed\n"); + return 1; + } + return 0; +} + +// get 128/256 bits data from history and current buffer. +// return data and valid_data_mask. +static rose_inline +u32 getBufferDataComplex(const struct core_info *ci, const s64a loc, + u8 *data, const u32 data_len) { + assert(data_len == 16 || data_len == 32); + s32 c_shift = 0; // blank bytes after current. + s32 h_shift = 0; // blank bytes before history. + s32 h_len = data_len; // number of bytes from history buffer. + s32 c_len = 0; // number of bytes from current buffer. + if (loc < 0) { + s32 h_offset = 0; // the start offset in history buffer. + if (loc < -(s64a)ci->hlen) { + if (loc + data_len <= -(s64a)ci->hlen) { + DEBUG_PRINTF("all before history\n"); + return 0; + } + h_shift = -(loc + (s64a)ci->hlen); + h_len = data_len - h_shift; + } else { + h_offset = ci->hlen + loc; + } + if (loc + data_len > 0) { + // part in current buffer. + c_len = loc + data_len; + h_len = -(loc + h_shift); + if (c_len > (s64a)ci->len) { + // out of current buffer. + c_shift = c_len - ci->len; + c_len = ci->len; + } + copy_upto_32_bytes(data - loc, ci->buf, c_len); + } + assert(h_shift + h_len + c_len + c_shift == (s32)data_len); + copy_upto_32_bytes(data + h_shift, ci->hbuf + h_offset, h_len); + } else { + if (loc + data_len > (s64a)ci->len) { + if (loc >= (s64a)ci->len) { + DEBUG_PRINTF("all in the future.\n"); + return 0; + } + c_len = ci->len - loc; + c_shift = data_len - c_len; + copy_upto_32_bytes(data, ci->buf + loc, c_len); + } else { + if (data_len == 16) { + storeu128(data, loadu128(ci->buf + loc)); + return 0xffff; + } else { + storeu256(data, loadu256(ci->buf + loc)); + return 0xffffffff; + } + } + } + DEBUG_PRINTF("h_shift %d c_shift %d\n", h_shift, c_shift); + DEBUG_PRINTF("h_len %d c_len %d\n", h_len, c_len); + + if (data_len == 16) { + return (u16)(0xffff << (h_shift + c_shift)) >> c_shift; + } else { + return (~0u) << (h_shift + c_shift) >> c_shift; + } +} + +static rose_inline +m128 getData128(const struct core_info *ci, s64a offset, u32 *valid_data_mask) { + if (offset > 0 && offset + sizeof(m128) <= ci->len) { + *valid_data_mask = 0xffff; + return loadu128(ci->buf + offset); + } + ALIGN_DIRECTIVE u8 data[sizeof(m128)]; + *valid_data_mask = getBufferDataComplex(ci, offset, data, 16); + return *(m128 *)data; +} + +static rose_inline +m256 getData256(const struct core_info *ci, s64a offset, u32 *valid_data_mask) { + if (offset > 0 && offset + sizeof(m256) <= ci->len) { + *valid_data_mask = ~0u; + return loadu256(ci->buf + offset); + } + ALIGN_AVX_DIRECTIVE u8 data[sizeof(m256)]; + *valid_data_mask = getBufferDataComplex(ci, offset, data, 32); + return *(m256 *)data; +} + +static rose_inline +int roseCheckShufti16x8(const struct core_info *ci, const u8 *nib_mask, + const u8 *bucket_select_mask, u32 neg_mask, + s32 checkOffset, u64a end) { + const s64a base_offset = (s64a)end - ci->buf_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + + u32 valid_data_mask = 0; + m128 data = getData128(ci, offset, &valid_data_mask); + if (unlikely(!valid_data_mask)) { + return 1; + } + + m256 nib_mask_m256 = loadu256(nib_mask); + m128 bucket_select_mask_m128 = loadu128(bucket_select_mask); + if (validateShuftiMask16x8(data, nib_mask_m256, + bucket_select_mask_m128, + neg_mask, valid_data_mask)) { + DEBUG_PRINTF("check shufti 16x8 successfully\n"); + return 1; + } else { + return 0; + } +} + +static rose_inline +int roseCheckShufti16x16(const struct core_info *ci, const u8 *hi_mask, + const u8 *lo_mask, const u8 *bucket_select_mask, + u32 neg_mask, s32 checkOffset, u64a end) { + const s64a base_offset = (s64a)end - ci->buf_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + + u32 valid_data_mask = 0; + m128 data = getData128(ci, offset, &valid_data_mask); + if (unlikely(!valid_data_mask)) { + return 1; + } + + m256 data_m256 = set2x128(data); + m256 hi_mask_m256 = loadu256(hi_mask); + m256 lo_mask_m256 = loadu256(lo_mask); + m256 bucket_select_mask_m256 = loadu256(bucket_select_mask); + if (validateShuftiMask16x16(data_m256, hi_mask_m256, lo_mask_m256, + bucket_select_mask_m256, + neg_mask, valid_data_mask)) { + DEBUG_PRINTF("check shufti 16x16 successfully\n"); + return 1; + } else { + return 0; + } +} + +static rose_inline +int roseCheckShufti32x8(const struct core_info *ci, const u8 *hi_mask, + const u8 *lo_mask, const u8 *bucket_select_mask, + u32 neg_mask, s32 checkOffset, u64a end) { + const s64a base_offset = (s64a)end - ci->buf_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + + u32 valid_data_mask = 0; + m256 data = getData256(ci, offset, &valid_data_mask); + if (unlikely(!valid_data_mask)) { + return 1; + } + + m128 hi_mask_m128 = loadu128(hi_mask); + m128 lo_mask_m128 = loadu128(lo_mask); + m256 hi_mask_m256 = set2x128(hi_mask_m128); + m256 lo_mask_m256 = set2x128(lo_mask_m128); + m256 bucket_select_mask_m256 = loadu256(bucket_select_mask); + if (validateShuftiMask32x8(data, hi_mask_m256, lo_mask_m256, + bucket_select_mask_m256, + neg_mask, valid_data_mask)) { + DEBUG_PRINTF("check shufti 32x8 successfully\n"); + return 1; + } else { + return 0; + } +} + +static rose_inline +int roseCheckShufti32x16(const struct core_info *ci, const u8 *hi_mask, + const u8 *lo_mask, const u8 *bucket_select_mask_hi, + const u8 *bucket_select_mask_lo, u32 neg_mask, + s32 checkOffset, u64a end) { + const s64a base_offset = (s64a)end - ci->buf_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + + u32 valid_data_mask = 0; + m256 data = getData256(ci, offset, &valid_data_mask); + if (unlikely(!valid_data_mask)) { + return 1; + } + + m256 hi_mask_1 = loadu2x128(hi_mask); + m256 hi_mask_2 = loadu2x128(hi_mask + 16); + m256 lo_mask_1 = loadu2x128(lo_mask); + m256 lo_mask_2 = loadu2x128(lo_mask + 16); + + m256 bucket_mask_hi = loadu256(bucket_select_mask_hi); + m256 bucket_mask_lo = loadu256(bucket_select_mask_lo); + if (validateShuftiMask32x16(data, hi_mask_1, hi_mask_2, + lo_mask_1, lo_mask_2, bucket_mask_hi, + bucket_mask_lo, neg_mask, valid_data_mask)) { + DEBUG_PRINTF("check shufti 32x16 successfully\n"); + return 1; + } else { + return 0; + } +} + +static rose_inline +int roseCheckSingleLookaround(const struct RoseEngine *t, + const struct hs_scratch *scratch, + s8 checkOffset, u32 lookaroundReachIndex, + u64a end) { + assert(lookaroundReachIndex != MO_INVALID_IDX); + const struct core_info *ci = &scratch->core_info; + DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end, + ci->buf_offset, ci->buf_offset + ci->len); + + const s64a base_offset = end - ci->buf_offset; + const s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("base_offset=%lld\n", base_offset); + DEBUG_PRINTF("checkOffset=%d offset=%lld\n", checkOffset, offset); + + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + + const u8 *reach = getByOffset(t, lookaroundReachIndex); + + u8 c; + if (offset >= 0 && offset < (s64a)ci->len) { + c = ci->buf[offset]; + } else if (offset < 0 && offset >= -(s64a)ci->hlen) { + c = ci->hbuf[ci->hlen + offset]; + } else { + return 1; + } + + if (!reachHasBit(reach, c)) { + DEBUG_PRINTF("char 0x%02x failed reach check\n", c); + return 0; + } + + DEBUG_PRINTF("OK :)\n"); + return 1; +} + +/** + * \brief Scan around a literal, checking that that "lookaround" reach masks + * are satisfied. + */ +static rose_inline +int roseCheckLookaround(const struct RoseEngine *t, + const struct hs_scratch *scratch, + u32 lookaroundLookIndex, u32 lookaroundReachIndex, + u32 lookaroundCount, u64a end) { + assert(lookaroundLookIndex != MO_INVALID_IDX); + assert(lookaroundReachIndex != MO_INVALID_IDX); + assert(lookaroundCount > 0); + + const struct core_info *ci = &scratch->core_info; + DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end, + ci->buf_offset, ci->buf_offset + ci->len); + + const s8 *look = getByOffset(t, lookaroundLookIndex); + const s8 *look_end = look + lookaroundCount; + assert(look < look_end); + + const u8 *reach = getByOffset(t, lookaroundReachIndex); + + // The following code assumes that the lookaround structures are ordered by + // increasing offset. + + const s64a base_offset = end - ci->buf_offset; + DEBUG_PRINTF("base_offset=%lld\n", base_offset); + DEBUG_PRINTF("first look has offset %d\n", *look); + + // If our first check tells us we need to look at an offset before the + // start of the stream, this role cannot match. + if (unlikely(*look < 0 && (u64a)(0 - *look) > end)) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + + // Skip over offsets that are before the history buffer. + do { + s64a offset = base_offset + *look; + if (offset >= -(s64a)ci->hlen) { + goto in_history; + } + DEBUG_PRINTF("look=%d before history\n", *look); + look++; + reach += REACH_BITVECTOR_LEN; + } while (look < look_end); + + // History buffer. + DEBUG_PRINTF("scan history (%zu looks left)\n", look_end - look); + for (; look < look_end; ++look, reach += REACH_BITVECTOR_LEN) { + in_history: + ; + s64a offset = base_offset + *look; + DEBUG_PRINTF("reach=%p, rel offset=%lld\n", reach, offset); + + if (offset >= 0) { + DEBUG_PRINTF("in buffer\n"); + goto in_buffer; + } + + assert(offset >= -(s64a)ci->hlen && offset < 0); + u8 c = ci->hbuf[ci->hlen + offset]; + if (!reachHasBit(reach, c)) { + DEBUG_PRINTF("char 0x%02x failed reach check\n", c); + return 0; + } + } + // Current buffer. + DEBUG_PRINTF("scan buffer (%zu looks left)\n", look_end - look); + for (; look < look_end; ++look, reach += REACH_BITVECTOR_LEN) { + in_buffer: + ; + s64a offset = base_offset + *look; + DEBUG_PRINTF("reach=%p, rel offset=%lld\n", reach, offset); + + if (offset >= (s64a)ci->len) { + DEBUG_PRINTF("in the future\n"); + break; + } + + assert(offset >= 0 && offset < (s64a)ci->len); + u8 c = ci->buf[offset]; + if (!reachHasBit(reach, c)) { + DEBUG_PRINTF("char 0x%02x failed reach check\n", c); + return 0; + } + } + + DEBUG_PRINTF("OK :)\n"); + return 1; +} + +/** + * \brief Trying to find a matching path by the corresponding path mask of + * every lookaround location. + */ +static rose_inline +int roseMultipathLookaround(const struct RoseEngine *t, + const struct hs_scratch *scratch, + u32 multipathLookaroundLookIndex, + u32 multipathLookaroundReachIndex, + u32 multipathLookaroundCount, + s32 last_start, const u8 *start_mask, + u64a end) { + assert(multipathLookaroundCount > 0); + + const struct core_info *ci = &scratch->core_info; + DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end, + ci->buf_offset, ci->buf_offset + ci->len); + + const s8 *look = getByOffset(t, multipathLookaroundLookIndex); + const s8 *look_end = look + multipathLookaroundCount; + assert(look < look_end); + + const u8 *reach = getByOffset(t, multipathLookaroundReachIndex); + + const s64a base_offset = (s64a)end - ci->buf_offset; + DEBUG_PRINTF("base_offset=%lld\n", base_offset); + + u8 path = 0xff; + + assert(last_start < 0); + + if (unlikely((u64a)(0 - last_start) > end)) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + + s8 base_look_offset = *look; + do { + s64a offset = base_offset + *look; + u32 start_offset = (u32)(*look - base_look_offset); + DEBUG_PRINTF("start_mask[%u] = %x\n", start_offset, + start_mask[start_offset]); + path = start_mask[start_offset]; + if (offset >= -(s64a)ci->hlen) { + break; + } + DEBUG_PRINTF("look=%d before history\n", *look); + look++; + reach += MULTI_REACH_BITVECTOR_LEN; + } while (look < look_end); + + DEBUG_PRINTF("scan history (%zu looks left)\n", look_end - look); + for (; look < look_end; ++look, reach += MULTI_REACH_BITVECTOR_LEN) { + s64a offset = base_offset + *look; + DEBUG_PRINTF("reach=%p, rel offset=%lld\n", reach, offset); + + if (offset >= 0) { + DEBUG_PRINTF("in buffer\n"); + break; + } + + assert(offset >= -(s64a)ci->hlen && offset < 0); + u8 c = ci->hbuf[ci->hlen + offset]; + path &= reach[c]; + DEBUG_PRINTF("reach[%x] = %02x path = %0xx\n", c, reach[c], path); + if (!path) { + DEBUG_PRINTF("char 0x%02x failed reach check\n", c); + return 0; + } + } + + DEBUG_PRINTF("scan buffer (%zu looks left)\n", look_end - look); + for(; look < look_end; ++look, reach += MULTI_REACH_BITVECTOR_LEN) { + s64a offset = base_offset + *look; + DEBUG_PRINTF("reach=%p, rel offset=%lld\n", reach, offset); + + if (offset >= (s64a)ci->len) { + DEBUG_PRINTF("in the future\n"); + break; + } + + assert(offset >= 0 && offset < (s64a)ci->len); + u8 c = ci->buf[offset]; + path &= reach[c]; + DEBUG_PRINTF("reach[%x] = %02x path = %0xx\n", c, reach[c], path); + if (!path) { + DEBUG_PRINTF("char 0x%02x failed reach check\n", c); + return 0; + } + } + + DEBUG_PRINTF("OK :)\n"); + return 1; +} + +static never_inline +int roseCheckMultipathShufti16x8(const struct hs_scratch *scratch, + const struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_16x8 *ri, + u64a end) { + const struct core_info *ci = &scratch->core_info; + s32 checkOffset = ri->base_offset; + const s64a base_offset = (s64a)end - ci->buf_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + + assert(ri->last_start <= 0); + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + if ((u64a)(0 - ri->last_start) > end) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + } + + u32 valid_data_mask; + m128 data_init = getData128(ci, offset, &valid_data_mask); + m128 data_select_mask = loadu128(ri->data_select_mask); + + u32 valid_path_mask = 0; + if (unlikely(!(valid_data_mask & 1))) { + DEBUG_PRINTF("lose part of backward data\n"); + DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); + + m128 expand_valid; + u64a expand_mask = 0x8080808080808080ULL; + u64a valid_lo = expand64(valid_data_mask & 0xff, expand_mask); + u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask); + DEBUG_PRINTF("expand_hi %llx\n", valid_hi); + DEBUG_PRINTF("expand_lo %llx\n", valid_lo); + expand_valid = set64x2(valid_hi, valid_lo); + valid_path_mask = ~movemask128(pshufb_m128(expand_valid, + data_select_mask)); + } + + m128 data = pshufb_m128(data_init, data_select_mask); + m256 nib_mask = loadu256(ri->nib_mask); + m128 bucket_select_mask = loadu128(ri->bucket_select_mask); + + u32 hi_bits_mask = ri->hi_bits_mask; + u32 lo_bits_mask = ri->lo_bits_mask; + u32 neg_mask = ri->neg_mask; + + if (validateMultipathShuftiMask16x8(data, nib_mask, + bucket_select_mask, + hi_bits_mask, lo_bits_mask, + neg_mask, valid_path_mask)) { + DEBUG_PRINTF("check multi-path shufti-16x8 successfully\n"); + return 1; + } else { + return 0; + } +} + +static never_inline +int roseCheckMultipathShufti32x8(const struct hs_scratch *scratch, + const struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_32x8 *ri, + u64a end) { + const struct core_info *ci = &scratch->core_info; + s32 checkOffset = ri->base_offset; + const s64a base_offset = (s64a)end - ci->buf_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + + assert(ri->last_start <= 0); + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + if ((u64a)(0 - ri->last_start) > end) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + } + + u32 valid_data_mask; + m128 data_m128 = getData128(ci, offset, &valid_data_mask); + m256 data_double = set2x128(data_m128); + m256 data_select_mask = loadu256(ri->data_select_mask); + + u32 valid_path_mask = 0; + m256 expand_valid; + if (unlikely(!(valid_data_mask & 1))) { + DEBUG_PRINTF("lose part of backward data\n"); + DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); + + u64a expand_mask = 0x8080808080808080ULL; + u64a valid_lo = expand64(valid_data_mask & 0xff, expand_mask); + u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask); + DEBUG_PRINTF("expand_hi %llx\n", valid_hi); + DEBUG_PRINTF("expand_lo %llx\n", valid_lo); + expand_valid = set64x4(valid_hi, valid_lo, valid_hi, + valid_lo); + valid_path_mask = ~movemask256(pshufb_m256(expand_valid, + data_select_mask)); + } + + m256 data = pshufb_m256(data_double, data_select_mask); + m256 hi_mask = loadu2x128(ri->hi_mask); + m256 lo_mask = loadu2x128(ri->lo_mask); + m256 bucket_select_mask = loadu256(ri->bucket_select_mask); + + u32 hi_bits_mask = ri->hi_bits_mask; + u32 lo_bits_mask = ri->lo_bits_mask; + u32 neg_mask = ri->neg_mask; + + if (validateMultipathShuftiMask32x8(data, hi_mask, lo_mask, + bucket_select_mask, + hi_bits_mask, lo_bits_mask, + neg_mask, valid_path_mask)) { + DEBUG_PRINTF("check multi-path shufti-32x8 successfully\n"); + return 1; + } else { + return 0; + } +} + +static never_inline +int roseCheckMultipathShufti32x16(const struct hs_scratch *scratch, + const struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_32x16 *ri, + u64a end) { + const struct core_info *ci = &scratch->core_info; + const s64a base_offset = (s64a)end - ci->buf_offset; + s32 checkOffset = ri->base_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + + assert(ri->last_start <= 0); + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + if ((u64a)(0 - ri->last_start) > end) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + } + + u32 valid_data_mask; + m128 data_m128 = getData128(ci, offset, &valid_data_mask); + m256 data_double = set2x128(data_m128); + m256 data_select_mask = loadu256(ri->data_select_mask); + + u32 valid_path_mask = 0; + m256 expand_valid; + if (unlikely(!(valid_data_mask & 1))) { + DEBUG_PRINTF("lose part of backward data\n"); + DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); + + u64a expand_mask = 0x8080808080808080ULL; + u64a valid_lo = expand64(valid_data_mask & 0xff, expand_mask); + u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask); + DEBUG_PRINTF("expand_hi %llx\n", valid_hi); + DEBUG_PRINTF("expand_lo %llx\n", valid_lo); + expand_valid = set64x4(valid_hi, valid_lo, valid_hi, + valid_lo); + valid_path_mask = ~movemask256(pshufb_m256(expand_valid, + data_select_mask)); + } + + m256 data = pshufb_m256(data_double, data_select_mask); + + m256 hi_mask_1 = loadu2x128(ri->hi_mask); + m256 hi_mask_2 = loadu2x128(ri->hi_mask + 16); + m256 lo_mask_1 = loadu2x128(ri->lo_mask); + m256 lo_mask_2 = loadu2x128(ri->lo_mask + 16); + + m256 bucket_select_mask_hi = loadu256(ri->bucket_select_mask_hi); + m256 bucket_select_mask_lo = loadu256(ri->bucket_select_mask_lo); + + u32 hi_bits_mask = ri->hi_bits_mask; + u32 lo_bits_mask = ri->lo_bits_mask; + u32 neg_mask = ri->neg_mask; + + if (validateMultipathShuftiMask32x16(data, hi_mask_1, hi_mask_2, + lo_mask_1, lo_mask_2, + bucket_select_mask_hi, + bucket_select_mask_lo, + hi_bits_mask, lo_bits_mask, + neg_mask, valid_path_mask)) { + DEBUG_PRINTF("check multi-path shufti-32x16 successfully\n"); + return 1; + } else { + return 0; + } +} + +static never_inline +int roseCheckMultipathShufti64(const struct hs_scratch *scratch, + const struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_64 *ri, + u64a end) { + const struct core_info *ci = &scratch->core_info; + const s64a base_offset = (s64a)end - ci->buf_offset; + s32 checkOffset = ri->base_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + if ((u64a)(0 - ri->last_start) > end) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + } + + u32 valid_data_mask; + m128 data_m128 = getData128(ci, offset, &valid_data_mask); + m256 data_m256 = set2x128(data_m128); + m256 data_select_mask_1 = loadu256(ri->data_select_mask); + m256 data_select_mask_2 = loadu256(ri->data_select_mask + 32); + + u64a valid_path_mask = 0; + m256 expand_valid; + if (unlikely(!(valid_data_mask & 1))) { + DEBUG_PRINTF("lose part of backward data\n"); + DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); + + u64a expand_mask = 0x8080808080808080ULL; + u64a valid_lo = expand64(valid_data_mask & 0xff, expand_mask); + u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask); + DEBUG_PRINTF("expand_hi %llx\n", valid_hi); + DEBUG_PRINTF("expand_lo %llx\n", valid_lo); + expand_valid = set64x4(valid_hi, valid_lo, valid_hi, + valid_lo); + u32 valid_path_1 = movemask256(pshufb_m256(expand_valid, + data_select_mask_1)); + u32 valid_path_2 = movemask256(pshufb_m256(expand_valid, + data_select_mask_2)); + valid_path_mask = ~((u64a)valid_path_1 | (u64a)valid_path_2 << 32); + } + + m256 data_1 = pshufb_m256(data_m256, data_select_mask_1); + m256 data_2 = pshufb_m256(data_m256, data_select_mask_2); + + m256 hi_mask = loadu2x128(ri->hi_mask); + m256 lo_mask = loadu2x128(ri->lo_mask); + + m256 bucket_select_mask_1 = loadu256(ri->bucket_select_mask); + m256 bucket_select_mask_2 = loadu256(ri->bucket_select_mask + 32); + + u64a hi_bits_mask = ri->hi_bits_mask; + u64a lo_bits_mask = ri->lo_bits_mask; + u64a neg_mask = ri->neg_mask; + + if (validateMultipathShuftiMask64(data_1, data_2, hi_mask, lo_mask, + bucket_select_mask_1, + bucket_select_mask_2, hi_bits_mask, + lo_bits_mask, neg_mask, + valid_path_mask)) { + DEBUG_PRINTF("check multi-path shufti-64 successfully\n"); + return 1; + } else { + return 0; + } +} + +static rose_inline int roseNfaEarliestSom(u64a start, UNUSED u64a end, UNUSED ReportID id, void *context) { assert(context); @@ -41,8 +1499,1572 @@ int roseNfaEarliestSom(u64a start, UNUSED u64a end, UNUSED ReportID id, return MO_CONTINUE_MATCHING; } +static rose_inline +u64a roseGetHaigSom(const struct RoseEngine *t, struct hs_scratch *scratch, + const u32 qi, UNUSED const u32 leftfixLag) { + u32 ri = queueToLeftIndex(t, qi); + + UNUSED const struct LeftNfaInfo *left = getLeftTable(t) + ri; + + DEBUG_PRINTF("testing %s prefix %u/%u with lag %u (maxLag=%u)\n", + left->transient ? "transient" : "active", ri, qi, + leftfixLag, left->maxLag); + + assert(leftfixLag <= left->maxLag); + + struct mq *q = scratch->queues + qi; + + u64a start = ~0ULL; + + /* switch the callback + context for a fun one */ + q->cb = roseNfaEarliestSom; + q->context = &start; + + nfaReportCurrentMatches(q->nfa, q); + + /* restore the old callback + context */ + q->cb = roseNfaAdaptor; + q->context = NULL; + DEBUG_PRINTF("earliest som is %llu\n", start); + return start; +} + +static rose_inline +char roseCheckBounds(u64a end, u64a min_bound, u64a max_bound) { + DEBUG_PRINTF("check offset=%llu against bounds [%llu,%llu]\n", end, + min_bound, max_bound); + assert(min_bound <= max_bound); + return end >= min_bound && end <= max_bound; +} + +static rose_inline +hwlmcb_rv_t roseEnginesEod(const struct RoseEngine *rose, + struct hs_scratch *scratch, u64a offset, + u32 iter_offset) { + const char is_streaming = rose->mode != HS_MODE_BLOCK; + + /* data, len is used for state decompress, should be full available data */ + u8 key = 0; + if (is_streaming) { + const u8 *eod_data = scratch->core_info.hbuf; + size_t eod_len = scratch->core_info.hlen; + key = eod_len ? eod_data[eod_len - 1] : 0; + } + + const u8 *aa = getActiveLeafArray(rose, scratch->core_info.state); + const u32 aaCount = rose->activeArrayCount; + const u32 qCount = rose->queueCount; + struct fatbit *aqa = scratch->aqa; + + const struct mmbit_sparse_iter *it = getByOffset(rose, iter_offset); + assert(ISALIGNED(it)); + + u32 idx = 0; + struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; + + for (u32 qi = mmbit_sparse_iter_begin(aa, aaCount, &idx, it, si_state); + qi != MMB_INVALID; + qi = mmbit_sparse_iter_next(aa, aaCount, qi, &idx, it, si_state)) { + DEBUG_PRINTF("checking nfa %u\n", qi); + struct mq *q = scratch->queues + qi; + if (!fatbit_set(aqa, qCount, qi)) { + initQueue(q, qi, rose, scratch); + } + + assert(q->nfa == getNfaByQueue(rose, qi)); + assert(nfaAcceptsEod(q->nfa)); + + if (is_streaming) { + // Decompress stream state. + nfaExpandState(q->nfa, q->state, q->streamState, offset, key); + } + + if (nfaCheckFinalState(q->nfa, q->state, q->streamState, offset, + roseReportAdaptor, + scratch) == MO_HALT_MATCHING) { + DEBUG_PRINTF("user instructed us to stop\n"); + return HWLM_TERMINATE_MATCHING; + } + } + + return HWLM_CONTINUE_MATCHING; +} + +static rose_inline +hwlmcb_rv_t roseSuffixesEod(const struct RoseEngine *rose, + struct hs_scratch *scratch, u64a offset) { + const u8 *aa = getActiveLeafArray(rose, scratch->core_info.state); + const u32 aaCount = rose->activeArrayCount; + + for (u32 qi = mmbit_iterate(aa, aaCount, MMB_INVALID); qi != MMB_INVALID; + qi = mmbit_iterate(aa, aaCount, qi)) { + DEBUG_PRINTF("checking nfa %u\n", qi); + struct mq *q = scratch->queues + qi; + assert(q->nfa == getNfaByQueue(rose, qi)); + assert(nfaAcceptsEod(q->nfa)); + + /* We have just been triggered. */ + assert(fatbit_isset(scratch->aqa, rose->queueCount, qi)); + + pushQueueNoMerge(q, MQE_END, scratch->core_info.len); + q->context = NULL; + + /* rose exec is used as we don't want to / can't raise matches in the + * history buffer. */ + if (!nfaQueueExecRose(q->nfa, q, MO_INVALID_IDX)) { + DEBUG_PRINTF("nfa is dead\n"); + continue; + } + if (nfaCheckFinalState(q->nfa, q->state, q->streamState, offset, + roseReportAdaptor, + scratch) == MO_HALT_MATCHING) { + DEBUG_PRINTF("user instructed us to stop\n"); + return HWLM_TERMINATE_MATCHING; + } + } + return HWLM_CONTINUE_MATCHING; +} + +static rose_inline +hwlmcb_rv_t roseMatcherEod(const struct RoseEngine *rose, + struct hs_scratch *scratch, u64a offset) { + assert(rose->ematcherOffset); + assert(rose->ematcherRegionSize); + + // Clear role state and active engines, since we have already handled all + // outstanding work there. + DEBUG_PRINTF("clear role state and active leaf array\n"); + char *state = scratch->core_info.state; + mmbit_clear(getRoleState(state), rose->rolesWithStateCount); + mmbit_clear(getActiveLeafArray(rose, state), rose->activeArrayCount); + + const char is_streaming = rose->mode != HS_MODE_BLOCK; + + size_t eod_len; + const u8 *eod_data; + if (!is_streaming) { /* Block */ + eod_data = scratch->core_info.buf; + eod_len = scratch->core_info.len; + } else { /* Streaming */ + eod_len = scratch->core_info.hlen; + eod_data = scratch->core_info.hbuf; + } + + assert(eod_data); + assert(eod_len); + + DEBUG_PRINTF("%zu bytes of eod data to scan at offset %llu\n", eod_len, + offset); + + // If we don't have enough bytes to produce a match from an EOD table scan, + // there's no point scanning. + if (eod_len < rose->eodmatcherMinWidth) { + DEBUG_PRINTF("too short for min width %u\n", rose->eodmatcherMinWidth); + return HWLM_CONTINUE_MATCHING; + } + + // Ensure that we only need scan the last N bytes, where N is the length of + // the eod-anchored matcher region. + size_t adj = eod_len - MIN(eod_len, rose->ematcherRegionSize); + + const struct HWLM *etable = getByOffset(rose, rose->ematcherOffset); + hwlmExec(etable, eod_data, eod_len, adj, roseCallback, scratch, + scratch->tctxt.groups); + + // We may need to fire delayed matches. + if (cleanUpDelayed(rose, scratch, 0, offset) == HWLM_TERMINATE_MATCHING) { + DEBUG_PRINTF("user instructed us to stop\n"); + return HWLM_TERMINATE_MATCHING; + } + + roseFlushLastByteHistory(rose, scratch, offset); + return HWLM_CONTINUE_MATCHING; +} + +static rose_inline +int roseCheckLongLiteral(const struct RoseEngine *t, + const struct hs_scratch *scratch, u64a end, + u32 lit_offset, u32 lit_length, char nocase) { + const struct core_info *ci = &scratch->core_info; + const u8 *lit = getByOffset(t, lit_offset); + + DEBUG_PRINTF("check lit at %llu, length %u\n", end, lit_length); + DEBUG_PRINTF("base buf_offset=%llu\n", ci->buf_offset); + + if (end < lit_length) { + DEBUG_PRINTF("too short!\n"); + return 0; + } + + // If any portion of the literal matched in the current buffer, check it. + if (end > ci->buf_offset) { + u32 scan_len = MIN(end - ci->buf_offset, lit_length); + u64a scan_start = end - ci->buf_offset - scan_len; + DEBUG_PRINTF("checking suffix (%u bytes) in buf[%llu:%llu]\n", scan_len, + scan_start, end); + if (cmpForward(ci->buf + scan_start, lit + lit_length - scan_len, + scan_len, nocase)) { + DEBUG_PRINTF("cmp of suffix failed\n"); + return 0; + } + } + + // If the entirety of the literal was in the current block, we are done. + if (end - lit_length >= ci->buf_offset) { + DEBUG_PRINTF("literal confirmed in current block\n"); + return 1; + } + + // We still have a prefix which we must test against the buffer prepared by + // the long literal table. This is only done in streaming mode. + + assert(t->mode != HS_MODE_BLOCK); + + const u8 *ll_buf; + size_t ll_len; + if (nocase) { + ll_buf = scratch->tctxt.ll_buf_nocase; + ll_len = scratch->tctxt.ll_len_nocase; + } else { + ll_buf = scratch->tctxt.ll_buf; + ll_len = scratch->tctxt.ll_len; + } + + assert(ll_buf); + + u64a lit_start_offset = end - lit_length; + u32 prefix_len = MIN(lit_length, ci->buf_offset - lit_start_offset); + u32 hist_rewind = ci->buf_offset - lit_start_offset; + DEBUG_PRINTF("ll_len=%zu, hist_rewind=%u\n", ll_len, hist_rewind); + if (hist_rewind > ll_len) { + DEBUG_PRINTF("not enough history\n"); + return 0; + } + + DEBUG_PRINTF("check prefix len=%u from hist (len %zu, rewind %u)\n", + prefix_len, ll_len, hist_rewind); + assert(hist_rewind <= ll_len); + if (cmpForward(ll_buf + ll_len - hist_rewind, lit, prefix_len, nocase)) { + DEBUG_PRINTF("cmp of prefix failed\n"); + return 0; + } + + DEBUG_PRINTF("cmp succeeded\n"); + return 1; +} + +static rose_inline +int roseCheckMediumLiteral(const struct RoseEngine *t, + const struct hs_scratch *scratch, u64a end, + u32 lit_offset, u32 lit_length, char nocase) { + const struct core_info *ci = &scratch->core_info; + const u8 *lit = getByOffset(t, lit_offset); + + DEBUG_PRINTF("check lit at %llu, length %u\n", end, lit_length); + DEBUG_PRINTF("base buf_offset=%llu\n", ci->buf_offset); + + if (end < lit_length) { + DEBUG_PRINTF("too short!\n"); + return 0; + } + + // If any portion of the literal matched in the current buffer, check it. + if (end > ci->buf_offset) { + u32 scan_len = MIN(end - ci->buf_offset, lit_length); + u64a scan_start = end - ci->buf_offset - scan_len; + DEBUG_PRINTF("checking suffix (%u bytes) in buf[%llu:%llu]\n", scan_len, + scan_start, end); + if (cmpForward(ci->buf + scan_start, lit + lit_length - scan_len, + scan_len, nocase)) { + DEBUG_PRINTF("cmp of suffix failed\n"); + return 0; + } + } + + // If the entirety of the literal was in the current block, we are done. + if (end - lit_length >= ci->buf_offset) { + DEBUG_PRINTF("literal confirmed in current block\n"); + return 1; + } + + // We still have a prefix which we must test against the history buffer. + assert(t->mode != HS_MODE_BLOCK); + + u64a lit_start_offset = end - lit_length; + u32 prefix_len = MIN(lit_length, ci->buf_offset - lit_start_offset); + u32 hist_rewind = ci->buf_offset - lit_start_offset; + DEBUG_PRINTF("hlen=%zu, hist_rewind=%u\n", ci->hlen, hist_rewind); + + // History length check required for confirm in the EOD and delayed + // rebuild paths. + if (hist_rewind > ci->hlen) { + DEBUG_PRINTF("not enough history\n"); + return 0; + } + + DEBUG_PRINTF("check prefix len=%u from hist (len %zu, rewind %u)\n", + prefix_len, ci->hlen, hist_rewind); + assert(hist_rewind <= ci->hlen); + if (cmpForward(ci->hbuf + ci->hlen - hist_rewind, lit, prefix_len, + nocase)) { + DEBUG_PRINTF("cmp of prefix failed\n"); + return 0; + } + + DEBUG_PRINTF("cmp succeeded\n"); + return 1; +} + +static +void updateSeqPoint(struct RoseContext *tctxt, u64a offset, + const char from_mpv) { + if (from_mpv) { + updateMinMatchOffsetFromMpv(tctxt, offset); + } else { + updateMinMatchOffset(tctxt, offset); + } +} + +static rose_inline +hwlmcb_rv_t flushActiveCombinations(const struct RoseEngine *t, + struct hs_scratch *scratch) { + u8 *cvec = (u8 *)scratch->core_info.combVector; + if (!mmbit_any(cvec, t->ckeyCount)) { + return HWLM_CONTINUE_MATCHING; + } + u64a end = scratch->tctxt.lastCombMatchOffset; + for (u32 i = mmbit_iterate(cvec, t->ckeyCount, MMB_INVALID); + i != MMB_INVALID; i = mmbit_iterate(cvec, t->ckeyCount, i)) { + const struct CombInfo *combInfoMap = (const struct CombInfo *) + ((const char *)t + t->combInfoMapOffset); + const struct CombInfo *ci = combInfoMap + i; + if ((ci->min_offset != 0) && (end < ci->min_offset)) { + DEBUG_PRINTF("halt: before min_offset=%llu\n", ci->min_offset); + continue; + } + if ((ci->max_offset != MAX_OFFSET) && (end > ci->max_offset)) { + DEBUG_PRINTF("halt: after max_offset=%llu\n", ci->max_offset); + continue; + } + + DEBUG_PRINTF("check ekey %u\n", ci->ekey); + if (ci->ekey != INVALID_EKEY) { + assert(ci->ekey < t->ekeyCount); + const char *evec = scratch->core_info.exhaustionVector; + if (isExhausted(t, evec, ci->ekey)) { + DEBUG_PRINTF("ekey %u already set, match is exhausted\n", + ci->ekey); + continue; + } + } + + DEBUG_PRINTF("check ckey %u\n", i); + char *lvec = scratch->core_info.logicalVector; + if (!isLogicalCombination(t, lvec, ci->start, ci->result)) { + DEBUG_PRINTF("Logical Combination Failed!\n"); + continue; + } + + DEBUG_PRINTF("Logical Combination Passed!\n"); + if (roseReport(t, scratch, end, ci->id, 0, + ci->ekey) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + clearCvec(t, (char *)cvec); + return HWLM_CONTINUE_MATCHING; +} + +#if !defined(_WIN32) +#define PROGRAM_CASE(name) \ + case ROSE_INSTR_##name: { \ + LABEL_ROSE_INSTR_##name: \ + DEBUG_PRINTF("instruction: " #name " (pc=%u)\n", \ + programOffset + (u32)(pc - pc_base)); \ + const struct ROSE_STRUCT_##name *ri = \ + (const struct ROSE_STRUCT_##name *)pc; + +#define PROGRAM_NEXT_INSTRUCTION \ + pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \ + goto *(next_instr[*(const u8 *)pc]); \ + } + +#define PROGRAM_NEXT_INSTRUCTION_JUMP \ + goto *(next_instr[*(const u8 *)pc]); +#else +#define PROGRAM_CASE(name) \ + case ROSE_INSTR_##name: { \ + DEBUG_PRINTF("instruction: " #name " (pc=%u)\n", \ + programOffset + (u32)(pc - pc_base)); \ + const struct ROSE_STRUCT_##name *ri = \ + (const struct ROSE_STRUCT_##name *)pc; + +#define PROGRAM_NEXT_INSTRUCTION \ + pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \ + break; \ + } + +#define PROGRAM_NEXT_INSTRUCTION_JUMP continue; +#endif + hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, struct hs_scratch *scratch, u32 programOffset, u64a som, u64a end, u8 prog_flags) { - return roseRunProgram_i(t, scratch, programOffset, som, end, prog_flags); + DEBUG_PRINTF("program=%u, offsets [%llu,%llu], flags=%u\n", programOffset, + som, end, prog_flags); + + assert(programOffset != ROSE_INVALID_PROG_OFFSET); + assert(programOffset >= sizeof(struct RoseEngine)); + assert(programOffset < t->size); + + const char in_anchored = prog_flags & ROSE_PROG_FLAG_IN_ANCHORED; + const char in_catchup = prog_flags & ROSE_PROG_FLAG_IN_CATCHUP; + const char from_mpv = prog_flags & ROSE_PROG_FLAG_FROM_MPV; + const char skip_mpv_catchup = prog_flags & ROSE_PROG_FLAG_SKIP_MPV_CATCHUP; + + const char *pc_base = getByOffset(t, programOffset); + const char *pc = pc_base; + + // Local sparse iterator state for programs that use the SPARSE_ITER_BEGIN + // and SPARSE_ITER_NEXT instructions. + struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; + + // If this program has an effect, work_done will be set to one (which may + // allow the program to squash groups). + int work_done = 0; + + struct RoseContext *tctxt = &scratch->tctxt; + + assert(*(const u8 *)pc != ROSE_INSTR_END); + +#if !defined(_WIN32) + static const void *next_instr[] = { + &&LABEL_ROSE_INSTR_END, //!< End of program. + &&LABEL_ROSE_INSTR_ANCHORED_DELAY, //!< Delay until after anchored matcher. + &&LABEL_ROSE_INSTR_CHECK_LIT_EARLY, //!< Skip matches before floating min offset. + &&LABEL_ROSE_INSTR_CHECK_GROUPS, //!< Check that literal groups are on. + &&LABEL_ROSE_INSTR_CHECK_ONLY_EOD, //!< Role matches only at EOD. + &&LABEL_ROSE_INSTR_CHECK_BOUNDS, //!< Bounds on distance from offset 0. + &&LABEL_ROSE_INSTR_CHECK_NOT_HANDLED, //!< Test & set role in "handled". + &&LABEL_ROSE_INSTR_CHECK_SINGLE_LOOKAROUND, //!< Single lookaround check. + &&LABEL_ROSE_INSTR_CHECK_LOOKAROUND, //!< Lookaround check. + &&LABEL_ROSE_INSTR_CHECK_MASK, //!< 8-bytes mask check. + &&LABEL_ROSE_INSTR_CHECK_MASK_32, //!< 32-bytes and/cmp/neg mask check. + &&LABEL_ROSE_INSTR_CHECK_BYTE, //!< Single Byte check. + &&LABEL_ROSE_INSTR_CHECK_SHUFTI_16x8, //!< Check 16-byte data by 8-bucket shufti. + &&LABEL_ROSE_INSTR_CHECK_SHUFTI_32x8, //!< Check 32-byte data by 8-bucket shufti. + &&LABEL_ROSE_INSTR_CHECK_SHUFTI_16x16, //!< Check 16-byte data by 16-bucket shufti. + &&LABEL_ROSE_INSTR_CHECK_SHUFTI_32x16, //!< Check 32-byte data by 16-bucket shufti. + &&LABEL_ROSE_INSTR_CHECK_INFIX, //!< Infix engine must be in accept state. + &&LABEL_ROSE_INSTR_CHECK_PREFIX, //!< Prefix engine must be in accept state. + &&LABEL_ROSE_INSTR_PUSH_DELAYED, //!< Push delayed literal matches. + &&LABEL_ROSE_INSTR_DUMMY_NOP, //!< NOP. Should not exist in build programs. + &&LABEL_ROSE_INSTR_CATCH_UP, //!< Catch up engines, anchored matches. + &&LABEL_ROSE_INSTR_CATCH_UP_MPV, //!< Catch up the MPV. + &&LABEL_ROSE_INSTR_SOM_ADJUST, //!< Set SOM from a distance to EOM. + &&LABEL_ROSE_INSTR_SOM_LEFTFIX, //!< Acquire SOM from a leftfix engine. + &&LABEL_ROSE_INSTR_SOM_FROM_REPORT, //!< Acquire SOM from a som_operation. + &&LABEL_ROSE_INSTR_SOM_ZERO, //!< Set SOM to zero. + &&LABEL_ROSE_INSTR_TRIGGER_INFIX, //!< Trigger an infix engine. + &&LABEL_ROSE_INSTR_TRIGGER_SUFFIX, //!< Trigger a suffix engine. + &&LABEL_ROSE_INSTR_DEDUPE, //!< Run deduplication for report. + &&LABEL_ROSE_INSTR_DEDUPE_SOM, //!< Run deduplication for SOM report. + &&LABEL_ROSE_INSTR_REPORT_CHAIN, //!< Fire a chained report (MPV). + &&LABEL_ROSE_INSTR_REPORT_SOM_INT, //!< Manipulate SOM only. + &&LABEL_ROSE_INSTR_REPORT_SOM_AWARE, //!< Manipulate SOM from SOM-aware source. + &&LABEL_ROSE_INSTR_REPORT, + &&LABEL_ROSE_INSTR_REPORT_EXHAUST, + &&LABEL_ROSE_INSTR_REPORT_SOM, + &&LABEL_ROSE_INSTR_REPORT_SOM_EXHAUST, + &&LABEL_ROSE_INSTR_DEDUPE_AND_REPORT, + &&LABEL_ROSE_INSTR_FINAL_REPORT, + &&LABEL_ROSE_INSTR_CHECK_EXHAUSTED, //!< Check if an ekey has already been set. + &&LABEL_ROSE_INSTR_CHECK_MIN_LENGTH, //!< Check (EOM - SOM) against min length. + &&LABEL_ROSE_INSTR_SET_STATE, //!< Switch a state index on. + &&LABEL_ROSE_INSTR_SET_GROUPS, //!< Set some literal group bits. + &&LABEL_ROSE_INSTR_SQUASH_GROUPS, //!< Conditionally turn off some groups. + &&LABEL_ROSE_INSTR_CHECK_STATE, //!< Test a single bit in the state multibit. + &&LABEL_ROSE_INSTR_SPARSE_ITER_BEGIN, //!< Begin running a sparse iter over states. + &&LABEL_ROSE_INSTR_SPARSE_ITER_NEXT, //!< Continue running sparse iter over states. + &&LABEL_ROSE_INSTR_SPARSE_ITER_ANY, //!< Test for any bit in the sparse iterator. + &&LABEL_ROSE_INSTR_ENGINES_EOD, + &&LABEL_ROSE_INSTR_SUFFIXES_EOD, + &&LABEL_ROSE_INSTR_MATCHER_EOD, + &&LABEL_ROSE_INSTR_CHECK_LONG_LIT, + &&LABEL_ROSE_INSTR_CHECK_LONG_LIT_NOCASE, + &&LABEL_ROSE_INSTR_CHECK_MED_LIT, + &&LABEL_ROSE_INSTR_CHECK_MED_LIT_NOCASE, + &&LABEL_ROSE_INSTR_CLEAR_WORK_DONE, + &&LABEL_ROSE_INSTR_MULTIPATH_LOOKAROUND, + &&LABEL_ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_16x8, + &&LABEL_ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_32x8, + &&LABEL_ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_32x16, + &&LABEL_ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_64, + &&LABEL_ROSE_INSTR_INCLUDED_JUMP, + &&LABEL_ROSE_INSTR_SET_LOGICAL, + &&LABEL_ROSE_INSTR_SET_COMBINATION, + &&LABEL_ROSE_INSTR_FLUSH_COMBINATION, + &&LABEL_ROSE_INSTR_SET_EXHAUST + }; +#endif + + for (;;) { + assert(ISALIGNED_N(pc, ROSE_INSTR_MIN_ALIGN)); + assert(pc >= pc_base); + assert((size_t)(pc - pc_base) < t->size); + const u8 code = *(const u8 *)pc; + assert(code <= LAST_ROSE_INSTRUCTION); + + switch ((enum RoseInstructionCode)code) { + PROGRAM_CASE(END) { + DEBUG_PRINTF("finished\n"); + return HWLM_CONTINUE_MATCHING; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(ANCHORED_DELAY) { + if (in_anchored && end > t->floatingMinLiteralMatchOffset) { + DEBUG_PRINTF("delay until playback\n"); + tctxt->groups |= ri->groups; + work_done = 1; + recordAnchoredLiteralMatch(t, scratch, ri->anch_id, end); + + assert(ri->done_jump); // must progress + pc += ri->done_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_LIT_EARLY) { + if (end < ri->min_offset) { + DEBUG_PRINTF("halt: before min_offset=%u\n", + ri->min_offset); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_GROUPS) { + DEBUG_PRINTF("groups=0x%llx, checking instr groups=0x%llx\n", + tctxt->groups, ri->groups); + if (!(ri->groups & tctxt->groups)) { + DEBUG_PRINTF("halt: no groups are set\n"); + return HWLM_CONTINUE_MATCHING; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_ONLY_EOD) { + struct core_info *ci = &scratch->core_info; + if (end != ci->buf_offset + ci->len) { + DEBUG_PRINTF("should only match at end of data\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_BOUNDS) { + if (!roseCheckBounds(end, ri->min_bound, ri->max_bound)) { + DEBUG_PRINTF("failed bounds check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_NOT_HANDLED) { + struct fatbit *handled = scratch->handled_roles; + if (fatbit_set(handled, t->handledKeyCount, ri->key)) { + DEBUG_PRINTF("key %u already set\n", ri->key); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_SINGLE_LOOKAROUND) { + if (!roseCheckSingleLookaround(t, scratch, ri->offset, + ri->reach_index, end)) { + DEBUG_PRINTF("failed lookaround check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_LOOKAROUND) { + if (!roseCheckLookaround(t, scratch, ri->look_index, + ri->reach_index, ri->count, end)) { + DEBUG_PRINTF("failed lookaround check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MASK) { + struct core_info *ci = &scratch->core_info; + if (!roseCheckMask(ci, ri->and_mask, ri->cmp_mask, + ri->neg_mask, ri->offset, end)) { + DEBUG_PRINTF("failed mask check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MASK_32) { + struct core_info *ci = &scratch->core_info; + if (!roseCheckMask32(ci, ri->and_mask, ri->cmp_mask, + ri->neg_mask, ri->offset, end)) { + assert(ri->fail_jump); + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_BYTE) { + const struct core_info *ci = &scratch->core_info; + if (!roseCheckByte(ci, ri->and_mask, ri->cmp_mask, + ri->negation, ri->offset, end)) { + DEBUG_PRINTF("failed byte check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_SHUFTI_16x8) { + const struct core_info *ci = &scratch->core_info; + if (!roseCheckShufti16x8(ci, ri->nib_mask, + ri->bucket_select_mask, + ri->neg_mask, ri->offset, end)) { + assert(ri->fail_jump); + pc += ri-> fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_SHUFTI_32x8) { + const struct core_info *ci = &scratch->core_info; + if (!roseCheckShufti32x8(ci, ri->hi_mask, ri->lo_mask, + ri->bucket_select_mask, + ri->neg_mask, ri->offset, end)) { + assert(ri->fail_jump); + pc += ri-> fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_SHUFTI_16x16) { + const struct core_info *ci = &scratch->core_info; + if (!roseCheckShufti16x16(ci, ri->hi_mask, ri->lo_mask, + ri->bucket_select_mask, + ri->neg_mask, ri->offset, end)) { + assert(ri->fail_jump); + pc += ri-> fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_SHUFTI_32x16) { + const struct core_info *ci = &scratch->core_info; + if (!roseCheckShufti32x16(ci, ri->hi_mask, ri->lo_mask, + ri->bucket_select_mask_hi, + ri->bucket_select_mask_lo, + ri->neg_mask, ri->offset, end)) { + assert(ri->fail_jump); + pc += ri-> fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_INFIX) { + if (!roseTestInfix(t, scratch, ri->queue, ri->lag, ri->report, + end)) { + DEBUG_PRINTF("failed infix check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_PREFIX) { + if (!roseTestPrefix(t, scratch, ri->queue, ri->lag, ri->report, + end)) { + DEBUG_PRINTF("failed prefix check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(PUSH_DELAYED) { + rosePushDelayedMatch(t, scratch, ri->delay, ri->index, end); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(DUMMY_NOP) { + assert(0); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CATCH_UP) { + if (roseCatchUpTo(t, scratch, end) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CATCH_UP_MPV) { + if (from_mpv || skip_mpv_catchup) { + DEBUG_PRINTF("skipping mpv catchup\n"); + } else if (roseCatchUpMPV(t, + end - scratch->core_info.buf_offset, + scratch) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SOM_ADJUST) { + assert(ri->distance <= end); + som = end - ri->distance; + DEBUG_PRINTF("som is (end - %u) = %llu\n", ri->distance, som); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SOM_LEFTFIX) { + som = roseGetHaigSom(t, scratch, ri->queue, ri->lag); + DEBUG_PRINTF("som from leftfix is %llu\n", som); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SOM_FROM_REPORT) { + som = handleSomExternal(scratch, &ri->som, end); + DEBUG_PRINTF("som from report %u is %llu\n", ri->som.onmatch, + som); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SOM_ZERO) { + DEBUG_PRINTF("setting SOM to zero\n"); + som = 0; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(TRIGGER_INFIX) { + roseTriggerInfix(t, scratch, som, end, ri->queue, ri->event, + ri->cancel); + work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(TRIGGER_SUFFIX) { + if (roseTriggerSuffix(t, scratch, ri->queue, ri->event, som, + end) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(DEDUPE) { + updateSeqPoint(tctxt, end, from_mpv); + const char do_som = t->hasSom; // TODO: constant propagate + const char is_external_report = 1; + enum DedupeResult rv = + dedupeCatchup(t, scratch, end, som, end + ri->offset_adjust, + ri->dkey, ri->offset_adjust, + is_external_report, ri->quash_som, do_som); + switch (rv) { + case DEDUPE_HALT: + return HWLM_TERMINATE_MATCHING; + case DEDUPE_SKIP: + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + case DEDUPE_CONTINUE: + break; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(DEDUPE_SOM) { + updateSeqPoint(tctxt, end, from_mpv); + const char is_external_report = 0; + const char do_som = 1; + enum DedupeResult rv = + dedupeCatchup(t, scratch, end, som, end + ri->offset_adjust, + ri->dkey, ri->offset_adjust, + is_external_report, ri->quash_som, do_som); + switch (rv) { + case DEDUPE_HALT: + return HWLM_TERMINATE_MATCHING; + case DEDUPE_SKIP: + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + case DEDUPE_CONTINUE: + break; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_CHAIN) { + // Note: sequence points updated inside this function. + if (roseCatchUpAndHandleChainMatch( + t, scratch, ri->event, ri->top_squash_distance, end, + in_catchup) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_SOM_INT) { + updateSeqPoint(tctxt, end, from_mpv); + roseHandleSom(scratch, &ri->som, end); + work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_SOM_AWARE) { + updateSeqPoint(tctxt, end, from_mpv); + roseHandleSomSom(scratch, &ri->som, som, end); + work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT) { + updateSeqPoint(tctxt, end, from_mpv); + if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust, + INVALID_EKEY) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_EXHAUST) { + updateSeqPoint(tctxt, end, from_mpv); + if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust, + ri->ekey) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_SOM) { + updateSeqPoint(tctxt, end, from_mpv); + if (roseReportSom(t, scratch, som, end, ri->onmatch, + ri->offset_adjust, + INVALID_EKEY) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_SOM_EXHAUST) { + updateSeqPoint(tctxt, end, from_mpv); + if (roseReportSom(t, scratch, som, end, ri->onmatch, + ri->offset_adjust, + ri->ekey) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(DEDUPE_AND_REPORT) { + updateSeqPoint(tctxt, end, from_mpv); + const char do_som = t->hasSom; // TODO: constant propagate + const char is_external_report = 1; + enum DedupeResult rv = + dedupeCatchup(t, scratch, end, som, end + ri->offset_adjust, + ri->dkey, ri->offset_adjust, + is_external_report, ri->quash_som, do_som); + switch (rv) { + case DEDUPE_HALT: + return HWLM_TERMINATE_MATCHING; + case DEDUPE_SKIP: + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + case DEDUPE_CONTINUE: + break; + } + + const u32 ekey = INVALID_EKEY; + if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust, + ekey) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(FINAL_REPORT) { + updateSeqPoint(tctxt, end, from_mpv); + if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust, + INVALID_EKEY) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + /* One-shot specialisation: this instruction always terminates + * execution of the program. */ + return HWLM_CONTINUE_MATCHING; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_EXHAUSTED) { + DEBUG_PRINTF("check ekey %u\n", ri->ekey); + assert(ri->ekey != INVALID_EKEY); + assert(ri->ekey < t->ekeyCount); + const char *evec = scratch->core_info.exhaustionVector; + if (isExhausted(t, evec, ri->ekey)) { + DEBUG_PRINTF("ekey %u already set, match is exhausted\n", + ri->ekey); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MIN_LENGTH) { + DEBUG_PRINTF("check min length %llu (adj %d)\n", ri->min_length, + ri->end_adj); + assert(ri->min_length > 0); + assert(ri->end_adj == 0 || ri->end_adj == -1); + assert(som == HS_OFFSET_PAST_HORIZON || som <= end); + if (som != HS_OFFSET_PAST_HORIZON && + ((end + ri->end_adj) - som < ri->min_length)) { + DEBUG_PRINTF("failed check, match len %llu\n", + (u64a)((end + ri->end_adj) - som)); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SET_STATE) { + DEBUG_PRINTF("set state index %u\n", ri->index); + mmbit_set(getRoleState(scratch->core_info.state), + t->rolesWithStateCount, ri->index); + work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SET_GROUPS) { + tctxt->groups |= ri->groups; + DEBUG_PRINTF("set groups 0x%llx -> 0x%llx\n", ri->groups, + tctxt->groups); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SQUASH_GROUPS) { + assert(popcount64(ri->groups) == 63); // Squash only one group. + if (work_done) { + tctxt->groups &= ri->groups; + DEBUG_PRINTF("squash groups 0x%llx -> 0x%llx\n", ri->groups, + tctxt->groups); + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_STATE) { + DEBUG_PRINTF("check state %u\n", ri->index); + const u8 *roles = getRoleState(scratch->core_info.state); + if (!mmbit_isset(roles, t->rolesWithStateCount, ri->index)) { + DEBUG_PRINTF("state not on\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SPARSE_ITER_BEGIN) { + DEBUG_PRINTF("iter_offset=%u\n", ri->iter_offset); + const struct mmbit_sparse_iter *it = + getByOffset(t, ri->iter_offset); + assert(ISALIGNED(it)); + + const u8 *roles = getRoleState(scratch->core_info.state); + + u32 idx = 0; + u32 i = mmbit_sparse_iter_begin(roles, t->rolesWithStateCount, + &idx, it, si_state); + if (i == MMB_INVALID) { + DEBUG_PRINTF("no states in sparse iter are on\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + + fatbit_clear(scratch->handled_roles); + + const u32 *jumps = getByOffset(t, ri->jump_table); + DEBUG_PRINTF("state %u (idx=%u) is on, jump to %u\n", i, idx, + jumps[idx]); + pc = pc_base + jumps[idx]; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SPARSE_ITER_NEXT) { + DEBUG_PRINTF("iter_offset=%u, state=%u\n", ri->iter_offset, + ri->state); + const struct mmbit_sparse_iter *it = + getByOffset(t, ri->iter_offset); + assert(ISALIGNED(it)); + + const u8 *roles = getRoleState(scratch->core_info.state); + + u32 idx = 0; + u32 i = mmbit_sparse_iter_next(roles, t->rolesWithStateCount, + ri->state, &idx, it, si_state); + if (i == MMB_INVALID) { + DEBUG_PRINTF("no more states in sparse iter are on\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + + const u32 *jumps = getByOffset(t, ri->jump_table); + DEBUG_PRINTF("state %u (idx=%u) is on, jump to %u\n", i, idx, + jumps[idx]); + pc = pc_base + jumps[idx]; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SPARSE_ITER_ANY) { + DEBUG_PRINTF("iter_offset=%u\n", ri->iter_offset); + const struct mmbit_sparse_iter *it = + getByOffset(t, ri->iter_offset); + assert(ISALIGNED(it)); + + const u8 *roles = getRoleState(scratch->core_info.state); + + u32 idx = 0; + u32 i = mmbit_sparse_iter_begin(roles, t->rolesWithStateCount, + &idx, it, si_state); + if (i == MMB_INVALID) { + DEBUG_PRINTF("no states in sparse iter are on\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + DEBUG_PRINTF("state %u (idx=%u) is on\n", i, idx); + fatbit_clear(scratch->handled_roles); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(ENGINES_EOD) { + if (roseEnginesEod(t, scratch, end, ri->iter_offset) == + HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SUFFIXES_EOD) { + if (roseSuffixesEod(t, scratch, end) == + HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(MATCHER_EOD) { + if (roseMatcherEod(t, scratch, end) == + HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_LONG_LIT) { + const char nocase = 0; + if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset, + ri->lit_length, nocase)) { + DEBUG_PRINTF("failed long lit check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_LONG_LIT_NOCASE) { + const char nocase = 1; + if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset, + ri->lit_length, nocase)) { + DEBUG_PRINTF("failed nocase long lit check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MED_LIT) { + const char nocase = 0; + if (!roseCheckMediumLiteral(t, scratch, end, ri->lit_offset, + ri->lit_length, nocase)) { + DEBUG_PRINTF("failed lit check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MED_LIT_NOCASE) { + const char nocase = 1; + if (!roseCheckMediumLiteral(t, scratch, end, ri->lit_offset, + ri->lit_length, nocase)) { + DEBUG_PRINTF("failed long lit check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CLEAR_WORK_DONE) { + DEBUG_PRINTF("clear work_done flag\n"); + work_done = 0; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(MULTIPATH_LOOKAROUND) { + if (!roseMultipathLookaround(t, scratch, ri->look_index, + ri->reach_index, ri->count, + ri->last_start, ri->start_mask, + end)) { + DEBUG_PRINTF("failed multi-path lookaround check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_16x8) { + if (!roseCheckMultipathShufti16x8(scratch, ri, end)) { + DEBUG_PRINTF("failed multi-path shufti 16x8 check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_32x8) { + if (!roseCheckMultipathShufti32x8(scratch, ri, end)) { + DEBUG_PRINTF("failed multi-path shufti 32x8 check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_32x16) { + if (!roseCheckMultipathShufti32x16(scratch, ri, end)) { + DEBUG_PRINTF("failed multi-path shufti 32x16 check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_64) { + if (!roseCheckMultipathShufti64(scratch, ri, end)) { + DEBUG_PRINTF("failed multi-path shufti 64 check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(INCLUDED_JUMP) { + if (scratch->fdr_conf) { + // squash the bucket of included literal + u8 shift = scratch->fdr_conf_offset & ~7U; + u64a mask = ((~(u64a)ri->squash) << shift); + *(scratch->fdr_conf) &= mask; + + pc = getByOffset(t, ri->child_offset); + pc_base = pc; + programOffset = (const u8 *)pc_base -(const u8 *)t; + DEBUG_PRINTF("pc_base %p pc %p child_offset %u squash %u\n", + pc_base, pc, ri->child_offset, ri->squash); + work_done = 0; + PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SET_LOGICAL) { + DEBUG_PRINTF("set logical value of lkey %u, offset_adjust=%d\n", + ri->lkey, ri->offset_adjust); + assert(ri->lkey != INVALID_LKEY); + assert(ri->lkey < t->lkeyCount); + char *lvec = scratch->core_info.logicalVector; + setLogicalVal(t, lvec, ri->lkey, 1); + updateLastCombMatchOffset(tctxt, end + ri->offset_adjust); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SET_COMBINATION) { + DEBUG_PRINTF("set ckey %u as active\n", ri->ckey); + assert(ri->ckey != INVALID_CKEY); + assert(ri->ckey < t->ckeyCount); + char *cvec = scratch->core_info.combVector; + setCombinationActive(t, cvec, ri->ckey); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(FLUSH_COMBINATION) { + assert(end >= tctxt->lastCombMatchOffset); + if (end > tctxt->lastCombMatchOffset) { + if (flushActiveCombinations(t, scratch) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SET_EXHAUST) { + updateSeqPoint(tctxt, end, from_mpv); + if (roseSetExhaust(t, scratch, ri->ekey) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + } + } + + assert(0); // unreachable + return HWLM_CONTINUE_MATCHING; } + +#define L_PROGRAM_CASE(name) \ + case ROSE_INSTR_##name: { \ + DEBUG_PRINTF("l_instruction: " #name " (pc=%u)\n", \ + programOffset + (u32)(pc - pc_base)); \ + const struct ROSE_STRUCT_##name *ri = \ + (const struct ROSE_STRUCT_##name *)pc; + +#define L_PROGRAM_NEXT_INSTRUCTION \ + pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \ + break; \ + } + +#define L_PROGRAM_NEXT_INSTRUCTION_JUMP continue; + +hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t, + struct hs_scratch *scratch, u32 programOffset, + u64a som, u64a end, u8 prog_flags) { + DEBUG_PRINTF("program=%u, offsets [%llu,%llu], flags=%u\n", programOffset, + som, end, prog_flags); + + assert(programOffset != ROSE_INVALID_PROG_OFFSET); + assert(programOffset >= sizeof(struct RoseEngine)); + assert(programOffset < t->size); + + const char from_mpv = prog_flags & ROSE_PROG_FLAG_FROM_MPV; + + const char *pc_base = getByOffset(t, programOffset); + const char *pc = pc_base; + + struct RoseContext *tctxt = &scratch->tctxt; + + assert(*(const u8 *)pc != ROSE_INSTR_END); + + for (;;) { + assert(ISALIGNED_N(pc, ROSE_INSTR_MIN_ALIGN)); + assert(pc >= pc_base); + assert((size_t)(pc - pc_base) < t->size); + const u8 code = *(const u8 *)pc; + assert(code <= LAST_ROSE_INSTRUCTION); + + switch ((enum RoseInstructionCode)code) { + L_PROGRAM_CASE(END) { + DEBUG_PRINTF("finished\n"); + return HWLM_CONTINUE_MATCHING; + } + L_PROGRAM_NEXT_INSTRUCTION + + L_PROGRAM_CASE(CATCH_UP) { + if (roseCatchUpTo(t, scratch, end) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + L_PROGRAM_NEXT_INSTRUCTION + + L_PROGRAM_CASE(SOM_FROM_REPORT) { + som = handleSomExternal(scratch, &ri->som, end); + DEBUG_PRINTF("som from report %u is %llu\n", ri->som.onmatch, + som); + } + L_PROGRAM_NEXT_INSTRUCTION + + L_PROGRAM_CASE(DEDUPE) { + updateSeqPoint(tctxt, end, from_mpv); + const char do_som = t->hasSom; // TODO: constant propagate + const char is_external_report = 1; + enum DedupeResult rv = + dedupeCatchup(t, scratch, end, som, end + ri->offset_adjust, + ri->dkey, ri->offset_adjust, + is_external_report, ri->quash_som, do_som); + switch (rv) { + case DEDUPE_HALT: + return HWLM_TERMINATE_MATCHING; + case DEDUPE_SKIP: + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + L_PROGRAM_NEXT_INSTRUCTION_JUMP + case DEDUPE_CONTINUE: + break; + } + } + L_PROGRAM_NEXT_INSTRUCTION + + L_PROGRAM_CASE(DEDUPE_SOM) { + updateSeqPoint(tctxt, end, from_mpv); + const char is_external_report = 0; + const char do_som = 1; + enum DedupeResult rv = + dedupeCatchup(t, scratch, end, som, end + ri->offset_adjust, + ri->dkey, ri->offset_adjust, + is_external_report, ri->quash_som, do_som); + switch (rv) { + case DEDUPE_HALT: + return HWLM_TERMINATE_MATCHING; + case DEDUPE_SKIP: + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + L_PROGRAM_NEXT_INSTRUCTION_JUMP + case DEDUPE_CONTINUE: + break; + } + } + L_PROGRAM_NEXT_INSTRUCTION + + L_PROGRAM_CASE(REPORT) { + updateSeqPoint(tctxt, end, from_mpv); + if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust, + INVALID_EKEY) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + L_PROGRAM_NEXT_INSTRUCTION + + L_PROGRAM_CASE(REPORT_EXHAUST) { + updateSeqPoint(tctxt, end, from_mpv); + if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust, + ri->ekey) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + L_PROGRAM_NEXT_INSTRUCTION + + L_PROGRAM_CASE(REPORT_SOM) { + updateSeqPoint(tctxt, end, from_mpv); + if (roseReportSom(t, scratch, som, end, ri->onmatch, + ri->offset_adjust, + INVALID_EKEY) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + L_PROGRAM_NEXT_INSTRUCTION + + L_PROGRAM_CASE(DEDUPE_AND_REPORT) { + updateSeqPoint(tctxt, end, from_mpv); + const char do_som = t->hasSom; // TODO: constant propagate + const char is_external_report = 1; + enum DedupeResult rv = + dedupeCatchup(t, scratch, end, som, end + ri->offset_adjust, + ri->dkey, ri->offset_adjust, + is_external_report, ri->quash_som, do_som); + switch (rv) { + case DEDUPE_HALT: + return HWLM_TERMINATE_MATCHING; + case DEDUPE_SKIP: + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + L_PROGRAM_NEXT_INSTRUCTION_JUMP + case DEDUPE_CONTINUE: + break; + } + + const u32 ekey = INVALID_EKEY; + if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust, + ekey) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + L_PROGRAM_NEXT_INSTRUCTION + + L_PROGRAM_CASE(FINAL_REPORT) { + updateSeqPoint(tctxt, end, from_mpv); + if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust, + INVALID_EKEY) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + /* One-shot specialisation: this instruction always terminates + * execution of the program. */ + return HWLM_CONTINUE_MATCHING; + } + L_PROGRAM_NEXT_INSTRUCTION + + L_PROGRAM_CASE(CHECK_EXHAUSTED) { + DEBUG_PRINTF("check ekey %u\n", ri->ekey); + assert(ri->ekey != INVALID_EKEY); + assert(ri->ekey < t->ekeyCount); + const char *evec = scratch->core_info.exhaustionVector; + if (isExhausted(t, evec, ri->ekey)) { + DEBUG_PRINTF("ekey %u already set, match is exhausted\n", + ri->ekey); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + L_PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + L_PROGRAM_NEXT_INSTRUCTION + + L_PROGRAM_CASE(CHECK_LONG_LIT) { + const char nocase = 0; + if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset, + ri->lit_length, nocase)) { + DEBUG_PRINTF("failed long lit check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + L_PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + L_PROGRAM_NEXT_INSTRUCTION + + L_PROGRAM_CASE(CHECK_LONG_LIT_NOCASE) { + const char nocase = 1; + if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset, + ri->lit_length, nocase)) { + DEBUG_PRINTF("failed nocase long lit check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + L_PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + L_PROGRAM_NEXT_INSTRUCTION + + L_PROGRAM_CASE(CHECK_MED_LIT) { + const char nocase = 0; + if (!roseCheckMediumLiteral(t, scratch, end, ri->lit_offset, + ri->lit_length, nocase)) { + DEBUG_PRINTF("failed lit check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + L_PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + L_PROGRAM_NEXT_INSTRUCTION + + L_PROGRAM_CASE(CHECK_MED_LIT_NOCASE) { + const char nocase = 1; + if (!roseCheckMediumLiteral(t, scratch, end, ri->lit_offset, + ri->lit_length, nocase)) { + DEBUG_PRINTF("failed long lit check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + L_PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + L_PROGRAM_NEXT_INSTRUCTION + + L_PROGRAM_CASE(SET_LOGICAL) { + DEBUG_PRINTF("set logical value of lkey %u, offset_adjust=%d\n", + ri->lkey, ri->offset_adjust); + assert(ri->lkey != INVALID_LKEY); + assert(ri->lkey < t->lkeyCount); + char *lvec = scratch->core_info.logicalVector; + setLogicalVal(t, lvec, ri->lkey, 1); + updateLastCombMatchOffset(tctxt, end + ri->offset_adjust); + } + L_PROGRAM_NEXT_INSTRUCTION + + L_PROGRAM_CASE(SET_COMBINATION) { + DEBUG_PRINTF("set ckey %u as active\n", ri->ckey); + assert(ri->ckey != INVALID_CKEY); + assert(ri->ckey < t->ckeyCount); + char *cvec = scratch->core_info.combVector; + setCombinationActive(t, cvec, ri->ckey); + } + L_PROGRAM_NEXT_INSTRUCTION + + L_PROGRAM_CASE(FLUSH_COMBINATION) { + assert(end >= tctxt->lastCombMatchOffset); + if (end > tctxt->lastCombMatchOffset) { + if (flushActiveCombinations(t, scratch) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + } + L_PROGRAM_NEXT_INSTRUCTION + + L_PROGRAM_CASE(SET_EXHAUST) { + updateSeqPoint(tctxt, end, from_mpv); + if (roseSetExhaust(t, scratch, ri->ekey) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + L_PROGRAM_NEXT_INSTRUCTION + + default: { + assert(0); // unreachable + } + } + } + + assert(0); // unreachable + return HWLM_CONTINUE_MATCHING; +} + +#undef L_PROGRAM_CASE +#undef L_PROGRAM_NEXT_INSTRUCTION +#undef L_PROGRAM_NEXT_INSTRUCTION_JUMP + +#undef PROGRAM_CASE +#undef PROGRAM_NEXT_INSTRUCTION +#undef PROGRAM_NEXT_INSTRUCTION_JUMP diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index 3c11300b..50bf202c 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,27 +34,10 @@ #ifndef PROGRAM_RUNTIME_H #define PROGRAM_RUNTIME_H -#include "catchup.h" -#include "counting_miracle.h" -#include "infix.h" -#include "match.h" -#include "miracle.h" -#include "report.h" +#include "hwlm/hwlm.h" // for hwlmcb_rv_t #include "rose.h" -#include "rose_common.h" -#include "rose_internal.h" -#include "rose_program.h" -#include "rose_types.h" -#include "validate_mask.h" -#include "validate_shufti.h" -#include "runtime.h" #include "scratch.h" #include "ue2common.h" -#include "hwlm/hwlm.h" // for hwlmcb_rv_t -#include "util/compare.h" -#include "util/copybytes.h" -#include "util/fatbit.h" -#include "util/multibit.h" /* * Program context flags, which control the behaviour of some instructions at @@ -71,2637 +54,8 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, struct hs_scratch *scratch, u32 programOffset, u64a som, u64a end, u8 prog_flags); -/* Inline implementation follows. */ - -static rose_inline -void rosePushDelayedMatch(const struct RoseEngine *t, - struct hs_scratch *scratch, u32 delay, - u32 delay_index, u64a offset) { - assert(delay); - - const u32 src_slot_index = delay; - u32 slot_index = (src_slot_index + offset) & DELAY_MASK; - - struct RoseContext *tctxt = &scratch->tctxt; - if (offset + src_slot_index <= tctxt->delayLastEndOffset) { - DEBUG_PRINTF("skip too late\n"); - return; - } - - const u32 delay_count = t->delay_count; - struct fatbit **delaySlots = getDelaySlots(scratch); - struct fatbit *slot = delaySlots[slot_index]; - - DEBUG_PRINTF("pushing tab %u into slot %u\n", delay_index, slot_index); - if (!(tctxt->filledDelayedSlots & (1U << slot_index))) { - tctxt->filledDelayedSlots |= 1U << slot_index; - fatbit_clear(slot); - } - - fatbit_set(slot, delay_count, delay_index); -} - -static rose_inline -void recordAnchoredLiteralMatch(const struct RoseEngine *t, - struct hs_scratch *scratch, u32 anch_id, - u64a end) { - assert(end); - - if (end <= t->floatingMinLiteralMatchOffset) { - return; - } - - struct fatbit **anchoredLiteralRows = getAnchoredLiteralLog(scratch); - - DEBUG_PRINTF("record %u (of %u) @ %llu\n", anch_id, t->anchored_count, end); - - if (!bf64_set(&scratch->al_log_sum, end - 1)) { - // first time, clear row - DEBUG_PRINTF("clearing %llu/%u\n", end - 1, t->anchored_count); - fatbit_clear(anchoredLiteralRows[end - 1]); - } - - assert(anch_id < t->anchored_count); - fatbit_set(anchoredLiteralRows[end - 1], t->anchored_count, anch_id); -} - -static rose_inline -char roseLeftfixCheckMiracles(const struct RoseEngine *t, - const struct LeftNfaInfo *left, - struct core_info *ci, struct mq *q, u64a end, - const char is_infix) { - if (!is_infix && left->transient) { - // Miracles won't help us with transient leftfix engines; they only - // scan for a limited time anyway. - return 1; - } - - if (!left->stopTable) { - return 1; - } - - DEBUG_PRINTF("looking for miracle on queue %u\n", q->nfa->queueIndex); - - const s64a begin_loc = q_cur_loc(q); - const s64a end_loc = end - ci->buf_offset; - - s64a miracle_loc; - if (roseMiracleOccurs(t, left, ci, begin_loc, end_loc, &miracle_loc)) { - goto found_miracle; - } - - if (roseCountingMiracleOccurs(t, left, ci, begin_loc, end_loc, - &miracle_loc)) { - goto found_miracle; - } - - return 1; - -found_miracle: - DEBUG_PRINTF("miracle at %lld\n", miracle_loc); - assert(miracle_loc >= begin_loc); - - // If we're a prefix, then a miracle effectively results in us needing to - // re-init our state and start fresh. - if (!is_infix) { - if (miracle_loc != begin_loc) { - DEBUG_PRINTF("re-init prefix state\n"); - q->cur = q->end = 0; - pushQueueAt(q, 0, MQE_START, miracle_loc); - pushQueueAt(q, 1, MQE_TOP, miracle_loc); - nfaQueueInitState(q->nfa, q); - } - return 1; - } - - // Otherwise, we're an infix. Remove tops before the miracle from the queue - // and re-init at that location. - - q_skip_forward_to(q, miracle_loc); - - if (q_last_type(q) == MQE_START) { - DEBUG_PRINTF("miracle caused infix to die\n"); - return 0; - } - - DEBUG_PRINTF("re-init infix state\n"); - assert(q->items[q->cur].type == MQE_START); - q->items[q->cur].location = miracle_loc; - nfaQueueInitState(q->nfa, q); - - return 1; -} - -static rose_inline -hwlmcb_rv_t roseTriggerSuffix(const struct RoseEngine *t, - struct hs_scratch *scratch, u32 qi, u32 top, - u64a som, u64a end) { - DEBUG_PRINTF("suffix qi=%u, top event=%u\n", qi, top); - - struct core_info *ci = &scratch->core_info; - u8 *aa = getActiveLeafArray(t, ci->state); - const u32 aaCount = t->activeArrayCount; - const u32 qCount = t->queueCount; - struct mq *q = &scratch->queues[qi]; - const struct NfaInfo *info = getNfaInfoByQueue(t, qi); - const struct NFA *nfa = getNfaByInfo(t, info); - - s64a loc = (s64a)end - ci->buf_offset; - assert(loc <= (s64a)ci->len && loc >= -(s64a)ci->hlen); - - if (!mmbit_set(aa, aaCount, qi)) { - initQueue(q, qi, t, scratch); - nfaQueueInitState(nfa, q); - pushQueueAt(q, 0, MQE_START, loc); - fatbit_set(scratch->aqa, qCount, qi); - } else if (info->no_retrigger) { - DEBUG_PRINTF("yawn\n"); - /* nfa only needs one top; we can go home now */ - return HWLM_CONTINUE_MATCHING; - } else if (!fatbit_set(scratch->aqa, qCount, qi)) { - initQueue(q, qi, t, scratch); - loadStreamState(nfa, q, 0); - pushQueueAt(q, 0, MQE_START, 0); - } else if (isQueueFull(q)) { - DEBUG_PRINTF("queue %u full -> catching up nfas\n", qi); - if (info->eod) { - /* can catch up suffix independently no pq */ - q->context = NULL; - pushQueueNoMerge(q, MQE_END, loc); - nfaQueueExecRose(q->nfa, q, MO_INVALID_IDX); - q->cur = q->end = 0; - pushQueueAt(q, 0, MQE_START, loc); - } else if (ensureQueueFlushed(t, scratch, qi, loc) - == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - } - - assert(top == MQE_TOP || (top >= MQE_TOP_FIRST && top < MQE_INVALID)); - pushQueueSom(q, top, loc, som); - - if (q_cur_loc(q) == (s64a)ci->len && !info->eod) { - /* we may not run the nfa; need to ensure state is fine */ - DEBUG_PRINTF("empty run\n"); - pushQueueNoMerge(q, MQE_END, loc); - char alive = nfaQueueExec(nfa, q, loc); - if (alive) { - q->cur = q->end = 0; - pushQueueAt(q, 0, MQE_START, loc); - } else { - mmbit_unset(aa, aaCount, qi); - fatbit_unset(scratch->aqa, qCount, qi); - } - } - - return HWLM_CONTINUE_MATCHING; -} - -static really_inline -char roseTestLeftfix(const struct RoseEngine *t, struct hs_scratch *scratch, - u32 qi, u32 leftfixLag, ReportID leftfixReport, u64a end, - const char is_infix) { - struct core_info *ci = &scratch->core_info; - - u32 ri = queueToLeftIndex(t, qi); - const struct LeftNfaInfo *left = getLeftTable(t) + ri; - - DEBUG_PRINTF("testing %s %s %u/%u with lag %u (maxLag=%u)\n", - (left->transient ? "transient" : "active"), - (is_infix ? "infix" : "prefix"), - ri, qi, leftfixLag, left->maxLag); - - assert(leftfixLag <= left->maxLag); - assert(left->infix == is_infix); - assert(!is_infix || !left->transient); // Only prefixes can be transient. - - struct mq *q = scratch->queues + qi; - char *state = scratch->core_info.state; - u8 *activeLeftArray = getActiveLeftArray(t, state); - u32 qCount = t->queueCount; - u32 arCount = t->activeLeftCount; - - if (!mmbit_isset(activeLeftArray, arCount, ri)) { - DEBUG_PRINTF("engine is dead nothing to see here\n"); - return 0; - } - - if (unlikely(end < leftfixLag)) { - assert(0); /* lag is the literal length */ - return 0; - } - - if (nfaSupportsZombie(getNfaByQueue(t, qi)) && ci->buf_offset - && !fatbit_isset(scratch->aqa, qCount, qi) - && isZombie(t, state, left)) { - DEBUG_PRINTF("zombie\n"); - return 1; - } - - if (!fatbit_set(scratch->aqa, qCount, qi)) { - DEBUG_PRINTF("initing q %u\n", qi); - initRoseQueue(t, qi, left, scratch); - if (ci->buf_offset) { // there have been writes before us! - s32 sp; - if (!is_infix && left->transient) { - sp = -(s32)ci->hlen; - } else { - sp = -(s32)loadRoseDelay(t, state, left); - } - - /* transient nfas are always started fresh -> state not maintained - * at stream boundary */ - - pushQueueAt(q, 0, MQE_START, sp); - if (is_infix || (ci->buf_offset + sp > 0 && !left->transient)) { - loadStreamState(q->nfa, q, sp); - } else { - pushQueueAt(q, 1, MQE_TOP, sp); - nfaQueueInitState(q->nfa, q); - } - } else { // first write ever - pushQueueAt(q, 0, MQE_START, 0); - pushQueueAt(q, 1, MQE_TOP, 0); - nfaQueueInitState(q->nfa, q); - } - } - - s64a loc = (s64a)end - ci->buf_offset - leftfixLag; - assert(loc >= q_cur_loc(q) || left->eager); - assert(leftfixReport != MO_INVALID_IDX); - - if (!is_infix && left->transient) { - s64a start_loc = loc - left->transient; - if (q_cur_loc(q) < start_loc) { - q->cur = q->end = 0; - pushQueueAt(q, 0, MQE_START, start_loc); - pushQueueAt(q, 1, MQE_TOP, start_loc); - nfaQueueInitState(q->nfa, q); - } - } - - if (q_cur_loc(q) < loc || q_last_type(q) != MQE_START) { - if (is_infix) { - if (infixTooOld(q, loc)) { - DEBUG_PRINTF("infix %u died of old age\n", ri); - goto nfa_dead; - } - - reduceInfixQueue(q, loc, left->maxQueueLen, q->nfa->maxWidth); - } - - if (!roseLeftfixCheckMiracles(t, left, ci, q, end, is_infix)) { - DEBUG_PRINTF("leftfix %u died due to miracle\n", ri); - goto nfa_dead; - } - -#ifdef DEBUG - debugQueue(q); -#endif - - pushQueueNoMerge(q, MQE_END, loc); - - char rv = nfaQueueExecRose(q->nfa, q, leftfixReport); - if (!rv) { /* nfa is dead */ - DEBUG_PRINTF("leftfix %u died while trying to catch up\n", ri); - goto nfa_dead; - } - - // Queue must have next start loc before we call nfaInAcceptState. - q->cur = q->end = 0; - pushQueueAt(q, 0, MQE_START, loc); - - DEBUG_PRINTF("checking for report %u\n", leftfixReport); - DEBUG_PRINTF("leftfix done %hhd\n", (signed char)rv); - return rv == MO_MATCHES_PENDING; - } else if (q_cur_loc(q) > loc) { - /* an eager leftfix may have already progressed past loc if there is no - * match at loc. */ - assert(left->eager); - return 0; - } else { - assert(q_cur_loc(q) == loc); - DEBUG_PRINTF("checking for report %u\n", leftfixReport); - char rv = nfaInAcceptState(q->nfa, leftfixReport, q); - DEBUG_PRINTF("leftfix done %hhd\n", (signed char)rv); - return rv; - } - -nfa_dead: - mmbit_unset(activeLeftArray, arCount, ri); - scratch->tctxt.groups &= left->squash_mask; - return 0; -} - -static rose_inline -char roseTestPrefix(const struct RoseEngine *t, struct hs_scratch *scratch, - u32 qi, u32 leftfixLag, ReportID leftfixReport, u64a end) { - return roseTestLeftfix(t, scratch, qi, leftfixLag, leftfixReport, end, 0); -} - -static rose_inline -char roseTestInfix(const struct RoseEngine *t, struct hs_scratch *scratch, - u32 qi, u32 leftfixLag, ReportID leftfixReport, u64a end) { - return roseTestLeftfix(t, scratch, qi, leftfixLag, leftfixReport, end, 1); -} - -static rose_inline -void roseTriggerInfix(const struct RoseEngine *t, struct hs_scratch *scratch, - u64a start, u64a end, u32 qi, u32 topEvent, u8 cancel) { - struct core_info *ci = &scratch->core_info; - s64a loc = (s64a)end - ci->buf_offset; - - u32 ri = queueToLeftIndex(t, qi); - assert(topEvent < MQE_INVALID); - - const struct LeftNfaInfo *left = getLeftInfoByQueue(t, qi); - assert(!left->transient); - - DEBUG_PRINTF("rose %u (qi=%u) event %u\n", ri, qi, topEvent); - - struct mq *q = scratch->queues + qi; - const struct NfaInfo *info = getNfaInfoByQueue(t, qi); - - char *state = ci->state; - u8 *activeLeftArray = getActiveLeftArray(t, state); - const u32 arCount = t->activeLeftCount; - char alive = mmbit_set(activeLeftArray, arCount, ri); - - if (alive && info->no_retrigger) { - DEBUG_PRINTF("yawn\n"); - return; - } - - struct fatbit *aqa = scratch->aqa; - const u32 qCount = t->queueCount; - - if (alive && nfaSupportsZombie(getNfaByInfo(t, info)) && ci->buf_offset && - !fatbit_isset(aqa, qCount, qi) && isZombie(t, state, left)) { - DEBUG_PRINTF("yawn - zombie\n"); - return; - } - - if (cancel) { - DEBUG_PRINTF("dominating top: (re)init\n"); - fatbit_set(aqa, qCount, qi); - initRoseQueue(t, qi, left, scratch); - pushQueueAt(q, 0, MQE_START, loc); - nfaQueueInitState(q->nfa, q); - } else if (!fatbit_set(aqa, qCount, qi)) { - DEBUG_PRINTF("initing %u\n", qi); - initRoseQueue(t, qi, left, scratch); - if (alive) { - s32 sp = -(s32)loadRoseDelay(t, state, left); - pushQueueAt(q, 0, MQE_START, sp); - loadStreamState(q->nfa, q, sp); - } else { - pushQueueAt(q, 0, MQE_START, loc); - nfaQueueInitState(q->nfa, q); - } - } else if (!alive) { - q->cur = q->end = 0; - pushQueueAt(q, 0, MQE_START, loc); - nfaQueueInitState(q->nfa, q); - } else if (isQueueFull(q)) { - reduceInfixQueue(q, loc, left->maxQueueLen, q->nfa->maxWidth); - - if (isQueueFull(q)) { - /* still full - reduceInfixQueue did nothing */ - DEBUG_PRINTF("queue %u full (%u items) -> catching up nfa\n", qi, - q->end - q->cur); - pushQueueNoMerge(q, MQE_END, loc); - nfaQueueExecRose(q->nfa, q, MO_INVALID_IDX); - - q->cur = q->end = 0; - pushQueueAt(q, 0, MQE_START, loc); - } - } - - pushQueueSom(q, topEvent, loc, start); -} - -static rose_inline -hwlmcb_rv_t roseReport(const struct RoseEngine *t, struct hs_scratch *scratch, - u64a end, ReportID onmatch, s32 offset_adjust, - u32 ekey) { - DEBUG_PRINTF("firing callback onmatch=%u, end=%llu\n", onmatch, end); - updateLastMatchOffset(&scratch->tctxt, end); - - int cb_rv = roseDeliverReport(end, onmatch, offset_adjust, scratch, ekey); - if (cb_rv == MO_HALT_MATCHING) { - DEBUG_PRINTF("termination requested\n"); - return HWLM_TERMINATE_MATCHING; - } - - if (ekey == INVALID_EKEY || cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { - return HWLM_CONTINUE_MATCHING; - } - - return roseHaltIfExhausted(t, scratch); -} - -/* catches up engines enough to ensure any earlier mpv triggers are enqueued - * and then adds the trigger to the mpv queue. */ -static rose_inline -hwlmcb_rv_t roseCatchUpAndHandleChainMatch(const struct RoseEngine *t, - struct hs_scratch *scratch, - u32 event, u64a top_squash_distance, - u64a end, const char in_catchup) { - if (!in_catchup && - roseCatchUpMpvFeeders(t, scratch, end) == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - return roseHandleChainMatch(t, scratch, event, top_squash_distance, end, - in_catchup); -} - -static rose_inline -void roseHandleSom(struct hs_scratch *scratch, const struct som_operation *sr, - u64a end) { - DEBUG_PRINTF("end=%llu, minMatchOffset=%llu\n", end, - scratch->tctxt.minMatchOffset); - - updateLastMatchOffset(&scratch->tctxt, end); - handleSomInternal(scratch, sr, end); -} - -static rose_inline -hwlmcb_rv_t roseReportSom(const struct RoseEngine *t, - struct hs_scratch *scratch, u64a start, u64a end, - ReportID onmatch, s32 offset_adjust, u32 ekey) { - DEBUG_PRINTF("firing som callback onmatch=%u, start=%llu, end=%llu\n", - onmatch, start, end); - updateLastMatchOffset(&scratch->tctxt, end); - - int cb_rv = roseDeliverSomReport(start, end, onmatch, offset_adjust, - scratch, ekey); - if (cb_rv == MO_HALT_MATCHING) { - DEBUG_PRINTF("termination requested\n"); - return HWLM_TERMINATE_MATCHING; - } - - if (ekey == INVALID_EKEY || cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { - return HWLM_CONTINUE_MATCHING; - } - - return roseHaltIfExhausted(t, scratch); -} - -static rose_inline -void roseHandleSomSom(struct hs_scratch *scratch, - const struct som_operation *sr, u64a start, u64a end) { - DEBUG_PRINTF("start=%llu, end=%llu, minMatchOffset=%llu\n", start, end, - scratch->tctxt.minMatchOffset); - - updateLastMatchOffset(&scratch->tctxt, end); - setSomFromSomAware(scratch, sr, start, end); -} - -static rose_inline -hwlmcb_rv_t roseSetExhaust(const struct RoseEngine *t, - struct hs_scratch *scratch, u32 ekey) { - assert(scratch); - assert(scratch->magic == SCRATCH_MAGIC); - - struct core_info *ci = &scratch->core_info; - - assert(!can_stop_matching(scratch)); - assert(!isExhausted(ci->rose, ci->exhaustionVector, ekey)); - - markAsMatched(ci->rose, ci->exhaustionVector, ekey); - - return roseHaltIfExhausted(t, scratch); -} - -static really_inline -int reachHasBit(const u8 *reach, u8 c) { - return !!(reach[c / 8U] & (u8)1U << (c % 8U)); -} - -/* - * Generate a 8-byte valid_mask with #high bytes 0 from the highest side - * and #low bytes 0 from the lowest side - * and (8 - high - low) bytes '0xff' in the middle. - */ -static rose_inline -u64a generateValidMask(const s32 high, const s32 low) { - assert(high + low < 8); - DEBUG_PRINTF("high %d low %d\n", high, low); - const u64a ones = ~0ull; - return (ones << ((high + low) * 8)) >> (high * 8); -} - -/* - * Do the single-byte check if only one lookaround entry exists - * and it's a single mask. - * Return success if the byte is in the future or before history - * (offset is greater than (history) buffer length). - */ -static rose_inline -int roseCheckByte(const struct core_info *ci, u8 and_mask, u8 cmp_mask, - u8 negation, s32 checkOffset, u64a end) { - DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end, - ci->buf_offset, ci->buf_offset + ci->len); - if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { - DEBUG_PRINTF("too early, fail\n"); - return 0; - } - - const s64a base_offset = end - ci->buf_offset; - s64a offset = base_offset + checkOffset; - DEBUG_PRINTF("checkOffset=%d offset=%lld\n", checkOffset, offset); - u8 c; - if (offset >= 0) { - if (offset >= (s64a)ci->len) { - DEBUG_PRINTF("in the future\n"); - return 1; - } else { - assert(offset < (s64a)ci->len); - DEBUG_PRINTF("check byte in buffer\n"); - c = ci->buf[offset]; - } - } else { - if (offset >= -(s64a) ci->hlen) { - DEBUG_PRINTF("check byte in history\n"); - c = ci->hbuf[ci->hlen + offset]; - } else { - DEBUG_PRINTF("before history and return\n"); - return 1; - } - } - - if (((and_mask & c) != cmp_mask) ^ negation) { - DEBUG_PRINTF("char 0x%02x at offset %lld failed byte check\n", - c, offset); - return 0; - } - - DEBUG_PRINTF("real offset=%lld char=%02x\n", offset, c); - DEBUG_PRINTF("OK :)\n"); - return 1; -} - -static rose_inline -int roseCheckMask(const struct core_info *ci, u64a and_mask, u64a cmp_mask, - u64a neg_mask, s32 checkOffset, u64a end) { - const s64a base_offset = (s64a)end - ci->buf_offset; - s64a offset = base_offset + checkOffset; - DEBUG_PRINTF("rel offset %lld\n",base_offset); - DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); - if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { - DEBUG_PRINTF("too early, fail\n"); - return 0; - } - - u64a data = 0; - u64a valid_data_mask = ~0ULL; // mask for validate check. - //A 0xff byte means that this byte is in the buffer. - s32 shift_l = 0; // size of bytes in the future. - s32 shift_r = 0; // size of bytes before the history. - s32 h_len = 0; // size of bytes in the history buffer. - s32 c_len = 8; // size of bytes in the current buffer. - if (offset < 0) { - // in or before history buffer. - if (offset + 8 <= -(s64a)ci->hlen) { - DEBUG_PRINTF("before history and return\n"); - return 1; - } - const u8 *h_start = ci->hbuf; // start pointer in history buffer. - if (offset < -(s64a)ci->hlen) { - // some bytes are before history. - shift_r = -(offset + (s64a)ci->hlen); - DEBUG_PRINTF("shift_r %d", shift_r); - } else { - h_start += ci->hlen + offset; - } - if (offset + 7 < 0) { - DEBUG_PRINTF("all in history buffer\n"); - data = partial_load_u64a(h_start, 8 - shift_r); - } else { - // history part - c_len = offset + 8; - h_len = -offset - shift_r; - DEBUG_PRINTF("%d bytes in history\n", h_len); - s64a data_h = 0; - data_h = partial_load_u64a(h_start, h_len); - // current part - if (c_len > (s64a)ci->len) { - shift_l = c_len - ci->len; - c_len = ci->len; - } - data = partial_load_u64a(ci->buf, c_len); - data <<= h_len << 3; - data |= data_h; - } - if (shift_r) { - data <<= shift_r << 3; - } - } else { - // current buffer. - if (offset + c_len > (s64a)ci->len) { - if (offset >= (s64a)ci->len) { - DEBUG_PRINTF("all in the future\n"); - return 1; - } - // some bytes in the future. - shift_l = offset + c_len - ci->len; - c_len = ci->len - offset; - data = partial_load_u64a(ci->buf + offset, c_len); - } else { - data = unaligned_load_u64a(ci->buf + offset); - } - } - - if (shift_l || shift_r) { - valid_data_mask = generateValidMask(shift_l, shift_r); - } - DEBUG_PRINTF("valid_data_mask %llx\n", valid_data_mask); - - if (validateMask(data, valid_data_mask, - and_mask, cmp_mask, neg_mask)) { - DEBUG_PRINTF("check mask successfully\n"); - return 1; - } else { - return 0; - } -} - -static rose_inline -int roseCheckMask32(const struct core_info *ci, const u8 *and_mask, - const u8 *cmp_mask, const u32 neg_mask, - s32 checkOffset, u64a end) { - const s64a base_offset = (s64a)end - ci->buf_offset; - s64a offset = base_offset + checkOffset; - DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); - DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); - - if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { - DEBUG_PRINTF("too early, fail\n"); - return 0; - } - - m256 data = zeroes256(); // consists of the following four parts. - s32 c_shift = 0; // blank bytes after current. - s32 h_shift = 0; // blank bytes before history. - s32 h_len = 32; // number of bytes from history buffer. - s32 c_len = 0; // number of bytes from current buffer. - /* h_shift + h_len + c_len + c_shift = 32 need to be hold.*/ - - if (offset < 0) { - s32 h_offset = 0; // the start offset in history buffer. - if (offset < -(s64a)ci->hlen) { - if (offset + 32 <= -(s64a)ci->hlen) { - DEBUG_PRINTF("all before history\n"); - return 1; - } - h_shift = -(offset + (s64a)ci->hlen); - h_len = 32 - h_shift; - } else { - h_offset = ci->hlen + offset; - } - if (offset + 32 > 0) { - // part in current buffer. - c_len = offset + 32; - h_len = -(offset + h_shift); - if (c_len > (s64a)ci->len) { - // out of current buffer. - c_shift = c_len - ci->len; - c_len = ci->len; - } - copy_upto_32_bytes((u8 *)&data - offset, ci->buf, c_len); - } - assert(h_shift + h_len + c_len + c_shift == 32); - copy_upto_32_bytes((u8 *)&data + h_shift, ci->hbuf + h_offset, h_len); - } else { - if (offset + 32 > (s64a)ci->len) { - if (offset >= (s64a)ci->len) { - DEBUG_PRINTF("all in the future.\n"); - return 1; - } - c_len = ci->len - offset; - c_shift = 32 - c_len; - copy_upto_32_bytes((u8 *)&data, ci->buf + offset, c_len); - } else { - data = loadu256(ci->buf + offset); - } - } - DEBUG_PRINTF("h_shift %d c_shift %d\n", h_shift, c_shift); - DEBUG_PRINTF("h_len %d c_len %d\n", h_len, c_len); - // we use valid_data_mask to blind bytes before history/in the future. - u32 valid_data_mask; - valid_data_mask = (~0u) << (h_shift + c_shift) >> (c_shift); - - m256 and_mask_m256 = loadu256(and_mask); - m256 cmp_mask_m256 = loadu256(cmp_mask); - if (validateMask32(data, valid_data_mask, and_mask_m256, - cmp_mask_m256, neg_mask)) { - DEBUG_PRINTF("Mask32 passed\n"); - return 1; - } - return 0; -} - -// get 128/256 bits data from history and current buffer. -// return data and valid_data_mask. -static rose_inline -u32 getBufferDataComplex(const struct core_info *ci, const s64a loc, - u8 *data, const u32 data_len) { - assert(data_len == 16 || data_len == 32); - s32 c_shift = 0; // blank bytes after current. - s32 h_shift = 0; // blank bytes before history. - s32 h_len = data_len; // number of bytes from history buffer. - s32 c_len = 0; // number of bytes from current buffer. - if (loc < 0) { - s32 h_offset = 0; // the start offset in history buffer. - if (loc < -(s64a)ci->hlen) { - if (loc + data_len <= -(s64a)ci->hlen) { - DEBUG_PRINTF("all before history\n"); - return 0; - } - h_shift = -(loc + (s64a)ci->hlen); - h_len = data_len - h_shift; - } else { - h_offset = ci->hlen + loc; - } - if (loc + data_len > 0) { - // part in current buffer. - c_len = loc + data_len; - h_len = -(loc + h_shift); - if (c_len > (s64a)ci->len) { - // out of current buffer. - c_shift = c_len - ci->len; - c_len = ci->len; - } - copy_upto_32_bytes(data - loc, ci->buf, c_len); - } - assert(h_shift + h_len + c_len + c_shift == (s32)data_len); - copy_upto_32_bytes(data + h_shift, ci->hbuf + h_offset, h_len); - } else { - if (loc + data_len > (s64a)ci->len) { - if (loc >= (s64a)ci->len) { - DEBUG_PRINTF("all in the future.\n"); - return 0; - } - c_len = ci->len - loc; - c_shift = data_len - c_len; - copy_upto_32_bytes(data, ci->buf + loc, c_len); - } else { - if (data_len == 16) { - storeu128(data, loadu128(ci->buf + loc)); - return 0xffff; - } else { - storeu256(data, loadu256(ci->buf + loc)); - return 0xffffffff; - } - } - } - DEBUG_PRINTF("h_shift %d c_shift %d\n", h_shift, c_shift); - DEBUG_PRINTF("h_len %d c_len %d\n", h_len, c_len); - - if (data_len == 16) { - return (u16)(0xffff << (h_shift + c_shift)) >> c_shift; - } else { - return (~0u) << (h_shift + c_shift) >> c_shift; - } -} - -static rose_inline -m128 getData128(const struct core_info *ci, s64a offset, u32 *valid_data_mask) { - if (offset > 0 && offset + sizeof(m128) <= ci->len) { - *valid_data_mask = 0xffff; - return loadu128(ci->buf + offset); - } - ALIGN_DIRECTIVE u8 data[sizeof(m128)]; - *valid_data_mask = getBufferDataComplex(ci, offset, data, 16); - return *(m128 *)data; -} - -static rose_inline -m256 getData256(const struct core_info *ci, s64a offset, u32 *valid_data_mask) { - if (offset > 0 && offset + sizeof(m256) <= ci->len) { - *valid_data_mask = ~0u; - return loadu256(ci->buf + offset); - } - ALIGN_AVX_DIRECTIVE u8 data[sizeof(m256)]; - *valid_data_mask = getBufferDataComplex(ci, offset, data, 32); - return *(m256 *)data; -} - -static rose_inline -int roseCheckShufti16x8(const struct core_info *ci, const u8 *nib_mask, - const u8 *bucket_select_mask, u32 neg_mask, - s32 checkOffset, u64a end) { - const s64a base_offset = (s64a)end - ci->buf_offset; - s64a offset = base_offset + checkOffset; - DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); - DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); - - if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { - DEBUG_PRINTF("too early, fail\n"); - return 0; - } - - u32 valid_data_mask = 0; - m128 data = getData128(ci, offset, &valid_data_mask); - if (unlikely(!valid_data_mask)) { - return 1; - } - - m256 nib_mask_m256 = loadu256(nib_mask); - m128 bucket_select_mask_m128 = loadu128(bucket_select_mask); - if (validateShuftiMask16x8(data, nib_mask_m256, - bucket_select_mask_m128, - neg_mask, valid_data_mask)) { - DEBUG_PRINTF("check shufti 16x8 successfully\n"); - return 1; - } else { - return 0; - } -} - -static rose_inline -int roseCheckShufti16x16(const struct core_info *ci, const u8 *hi_mask, - const u8 *lo_mask, const u8 *bucket_select_mask, - u32 neg_mask, s32 checkOffset, u64a end) { - const s64a base_offset = (s64a)end - ci->buf_offset; - s64a offset = base_offset + checkOffset; - DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); - DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); - - if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { - DEBUG_PRINTF("too early, fail\n"); - return 0; - } - - u32 valid_data_mask = 0; - m128 data = getData128(ci, offset, &valid_data_mask); - if (unlikely(!valid_data_mask)) { - return 1; - } - - m256 data_m256 = set2x128(data); - m256 hi_mask_m256 = loadu256(hi_mask); - m256 lo_mask_m256 = loadu256(lo_mask); - m256 bucket_select_mask_m256 = loadu256(bucket_select_mask); - if (validateShuftiMask16x16(data_m256, hi_mask_m256, lo_mask_m256, - bucket_select_mask_m256, - neg_mask, valid_data_mask)) { - DEBUG_PRINTF("check shufti 16x16 successfully\n"); - return 1; - } else { - return 0; - } -} - -static rose_inline -int roseCheckShufti32x8(const struct core_info *ci, const u8 *hi_mask, - const u8 *lo_mask, const u8 *bucket_select_mask, - u32 neg_mask, s32 checkOffset, u64a end) { - const s64a base_offset = (s64a)end - ci->buf_offset; - s64a offset = base_offset + checkOffset; - DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); - DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); - - if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { - DEBUG_PRINTF("too early, fail\n"); - return 0; - } - - u32 valid_data_mask = 0; - m256 data = getData256(ci, offset, &valid_data_mask); - if (unlikely(!valid_data_mask)) { - return 1; - } - - m128 hi_mask_m128 = loadu128(hi_mask); - m128 lo_mask_m128 = loadu128(lo_mask); - m256 hi_mask_m256 = set2x128(hi_mask_m128); - m256 lo_mask_m256 = set2x128(lo_mask_m128); - m256 bucket_select_mask_m256 = loadu256(bucket_select_mask); - if (validateShuftiMask32x8(data, hi_mask_m256, lo_mask_m256, - bucket_select_mask_m256, - neg_mask, valid_data_mask)) { - DEBUG_PRINTF("check shufti 32x8 successfully\n"); - return 1; - } else { - return 0; - } -} - -static rose_inline -int roseCheckShufti32x16(const struct core_info *ci, const u8 *hi_mask, - const u8 *lo_mask, const u8 *bucket_select_mask_hi, - const u8 *bucket_select_mask_lo, u32 neg_mask, - s32 checkOffset, u64a end) { - const s64a base_offset = (s64a)end - ci->buf_offset; - s64a offset = base_offset + checkOffset; - DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); - DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); - - if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { - DEBUG_PRINTF("too early, fail\n"); - return 0; - } - - u32 valid_data_mask = 0; - m256 data = getData256(ci, offset, &valid_data_mask); - if (unlikely(!valid_data_mask)) { - return 1; - } - - m256 hi_mask_1 = loadu2x128(hi_mask); - m256 hi_mask_2 = loadu2x128(hi_mask + 16); - m256 lo_mask_1 = loadu2x128(lo_mask); - m256 lo_mask_2 = loadu2x128(lo_mask + 16); - - m256 bucket_mask_hi = loadu256(bucket_select_mask_hi); - m256 bucket_mask_lo = loadu256(bucket_select_mask_lo); - if (validateShuftiMask32x16(data, hi_mask_1, hi_mask_2, - lo_mask_1, lo_mask_2, bucket_mask_hi, - bucket_mask_lo, neg_mask, valid_data_mask)) { - DEBUG_PRINTF("check shufti 32x16 successfully\n"); - return 1; - } else { - return 0; - } -} - -static rose_inline -int roseCheckSingleLookaround(const struct RoseEngine *t, - const struct hs_scratch *scratch, - s8 checkOffset, u32 lookaroundReachIndex, - u64a end) { - assert(lookaroundReachIndex != MO_INVALID_IDX); - const struct core_info *ci = &scratch->core_info; - DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end, - ci->buf_offset, ci->buf_offset + ci->len); - - const s64a base_offset = end - ci->buf_offset; - const s64a offset = base_offset + checkOffset; - DEBUG_PRINTF("base_offset=%lld\n", base_offset); - DEBUG_PRINTF("checkOffset=%d offset=%lld\n", checkOffset, offset); - - if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { - DEBUG_PRINTF("too early, fail\n"); - return 0; - } - - const u8 *reach = getByOffset(t, lookaroundReachIndex); - - u8 c; - if (offset >= 0 && offset < (s64a)ci->len) { - c = ci->buf[offset]; - } else if (offset < 0 && offset >= -(s64a)ci->hlen) { - c = ci->hbuf[ci->hlen + offset]; - } else { - return 1; - } - - if (!reachHasBit(reach, c)) { - DEBUG_PRINTF("char 0x%02x failed reach check\n", c); - return 0; - } - - DEBUG_PRINTF("OK :)\n"); - return 1; -} - -/** - * \brief Scan around a literal, checking that that "lookaround" reach masks - * are satisfied. - */ -static rose_inline -int roseCheckLookaround(const struct RoseEngine *t, - const struct hs_scratch *scratch, - u32 lookaroundLookIndex, u32 lookaroundReachIndex, - u32 lookaroundCount, u64a end) { - assert(lookaroundLookIndex != MO_INVALID_IDX); - assert(lookaroundReachIndex != MO_INVALID_IDX); - assert(lookaroundCount > 0); - - const struct core_info *ci = &scratch->core_info; - DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end, - ci->buf_offset, ci->buf_offset + ci->len); - - const s8 *look = getByOffset(t, lookaroundLookIndex); - const s8 *look_end = look + lookaroundCount; - assert(look < look_end); - - const u8 *reach = getByOffset(t, lookaroundReachIndex); - - // The following code assumes that the lookaround structures are ordered by - // increasing offset. - - const s64a base_offset = end - ci->buf_offset; - DEBUG_PRINTF("base_offset=%lld\n", base_offset); - DEBUG_PRINTF("first look has offset %d\n", *look); - - // If our first check tells us we need to look at an offset before the - // start of the stream, this role cannot match. - if (unlikely(*look < 0 && (u64a)(0 - *look) > end)) { - DEBUG_PRINTF("too early, fail\n"); - return 0; - } - - // Skip over offsets that are before the history buffer. - do { - s64a offset = base_offset + *look; - if (offset >= -(s64a)ci->hlen) { - goto in_history; - } - DEBUG_PRINTF("look=%d before history\n", *look); - look++; - reach += REACH_BITVECTOR_LEN; - } while (look < look_end); - - // History buffer. - DEBUG_PRINTF("scan history (%zu looks left)\n", look_end - look); - for (; look < look_end; ++look, reach += REACH_BITVECTOR_LEN) { - in_history: - ; - s64a offset = base_offset + *look; - DEBUG_PRINTF("reach=%p, rel offset=%lld\n", reach, offset); - - if (offset >= 0) { - DEBUG_PRINTF("in buffer\n"); - goto in_buffer; - } - - assert(offset >= -(s64a)ci->hlen && offset < 0); - u8 c = ci->hbuf[ci->hlen + offset]; - if (!reachHasBit(reach, c)) { - DEBUG_PRINTF("char 0x%02x failed reach check\n", c); - return 0; - } - } - // Current buffer. - DEBUG_PRINTF("scan buffer (%zu looks left)\n", look_end - look); - for (; look < look_end; ++look, reach += REACH_BITVECTOR_LEN) { - in_buffer: - ; - s64a offset = base_offset + *look; - DEBUG_PRINTF("reach=%p, rel offset=%lld\n", reach, offset); - - if (offset >= (s64a)ci->len) { - DEBUG_PRINTF("in the future\n"); - break; - } - - assert(offset >= 0 && offset < (s64a)ci->len); - u8 c = ci->buf[offset]; - if (!reachHasBit(reach, c)) { - DEBUG_PRINTF("char 0x%02x failed reach check\n", c); - return 0; - } - } - - DEBUG_PRINTF("OK :)\n"); - return 1; -} - -/** - * \brief Trying to find a matching path by the corresponding path mask of - * every lookaround location. - */ -static rose_inline -int roseMultipathLookaround(const struct RoseEngine *t, - const struct hs_scratch *scratch, - u32 multipathLookaroundLookIndex, - u32 multipathLookaroundReachIndex, - u32 multipathLookaroundCount, - s32 last_start, const u8 *start_mask, - u64a end) { - assert(multipathLookaroundCount > 0); - - const struct core_info *ci = &scratch->core_info; - DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end, - ci->buf_offset, ci->buf_offset + ci->len); - - const s8 *look = getByOffset(t, multipathLookaroundLookIndex); - const s8 *look_end = look + multipathLookaroundCount; - assert(look < look_end); - - const u8 *reach = getByOffset(t, multipathLookaroundReachIndex); - - const s64a base_offset = (s64a)end - ci->buf_offset; - DEBUG_PRINTF("base_offset=%lld\n", base_offset); - - u8 path = 0xff; - - assert(last_start < 0); - - if (unlikely((u64a)(0 - last_start) > end)) { - DEBUG_PRINTF("too early, fail\n"); - return 0; - } - - s8 base_look_offset = *look; - do { - s64a offset = base_offset + *look; - u32 start_offset = (u32)(*look - base_look_offset); - DEBUG_PRINTF("start_mask[%u] = %x\n", start_offset, - start_mask[start_offset]); - path = start_mask[start_offset]; - if (offset >= -(s64a)ci->hlen) { - break; - } - DEBUG_PRINTF("look=%d before history\n", *look); - look++; - reach += MULTI_REACH_BITVECTOR_LEN; - } while (look < look_end); - - DEBUG_PRINTF("scan history (%zu looks left)\n", look_end - look); - for (; look < look_end; ++look, reach += MULTI_REACH_BITVECTOR_LEN) { - s64a offset = base_offset + *look; - DEBUG_PRINTF("reach=%p, rel offset=%lld\n", reach, offset); - - if (offset >= 0) { - DEBUG_PRINTF("in buffer\n"); - break; - } - - assert(offset >= -(s64a)ci->hlen && offset < 0); - u8 c = ci->hbuf[ci->hlen + offset]; - path &= reach[c]; - DEBUG_PRINTF("reach[%x] = %02x path = %0xx\n", c, reach[c], path); - if (!path) { - DEBUG_PRINTF("char 0x%02x failed reach check\n", c); - return 0; - } - } - - DEBUG_PRINTF("scan buffer (%zu looks left)\n", look_end - look); - for(; look < look_end; ++look, reach += MULTI_REACH_BITVECTOR_LEN) { - s64a offset = base_offset + *look; - DEBUG_PRINTF("reach=%p, rel offset=%lld\n", reach, offset); - - if (offset >= (s64a)ci->len) { - DEBUG_PRINTF("in the future\n"); - break; - } - - assert(offset >= 0 && offset < (s64a)ci->len); - u8 c = ci->buf[offset]; - path &= reach[c]; - DEBUG_PRINTF("reach[%x] = %02x path = %0xx\n", c, reach[c], path); - if (!path) { - DEBUG_PRINTF("char 0x%02x failed reach check\n", c); - return 0; - } - } - - DEBUG_PRINTF("OK :)\n"); - return 1; -} - -static never_inline -int roseCheckMultipathShufti16x8(const struct hs_scratch *scratch, - const struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_16x8 *ri, - u64a end) { - const struct core_info *ci = &scratch->core_info; - s32 checkOffset = ri->base_offset; - const s64a base_offset = (s64a)end - ci->buf_offset; - s64a offset = base_offset + checkOffset; - DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); - DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); - - assert(ri->last_start <= 0); - if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { - if ((u64a)(0 - ri->last_start) > end) { - DEBUG_PRINTF("too early, fail\n"); - return 0; - } - } - - u32 valid_data_mask; - m128 data_init = getData128(ci, offset, &valid_data_mask); - m128 data_select_mask = loadu128(ri->data_select_mask); - - u32 valid_path_mask = 0; - if (unlikely(!(valid_data_mask & 1))) { - DEBUG_PRINTF("lose part of backward data\n"); - DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); - - m128 expand_valid; - u64a expand_mask = 0x8080808080808080ULL; - u64a valid_lo = expand64(valid_data_mask & 0xff, expand_mask); - u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask); - DEBUG_PRINTF("expand_hi %llx\n", valid_hi); - DEBUG_PRINTF("expand_lo %llx\n", valid_lo); - expand_valid = set64x2(valid_hi, valid_lo); - valid_path_mask = ~movemask128(pshufb_m128(expand_valid, - data_select_mask)); - } - - m128 data = pshufb_m128(data_init, data_select_mask); - m256 nib_mask = loadu256(ri->nib_mask); - m128 bucket_select_mask = loadu128(ri->bucket_select_mask); - - u32 hi_bits_mask = ri->hi_bits_mask; - u32 lo_bits_mask = ri->lo_bits_mask; - u32 neg_mask = ri->neg_mask; - - if (validateMultipathShuftiMask16x8(data, nib_mask, - bucket_select_mask, - hi_bits_mask, lo_bits_mask, - neg_mask, valid_path_mask)) { - DEBUG_PRINTF("check multi-path shufti-16x8 successfully\n"); - return 1; - } else { - return 0; - } -} - -static never_inline -int roseCheckMultipathShufti32x8(const struct hs_scratch *scratch, - const struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_32x8 *ri, - u64a end) { - const struct core_info *ci = &scratch->core_info; - s32 checkOffset = ri->base_offset; - const s64a base_offset = (s64a)end - ci->buf_offset; - s64a offset = base_offset + checkOffset; - DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); - DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); - - assert(ri->last_start <= 0); - if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { - if ((u64a)(0 - ri->last_start) > end) { - DEBUG_PRINTF("too early, fail\n"); - return 0; - } - } - - u32 valid_data_mask; - m128 data_m128 = getData128(ci, offset, &valid_data_mask); - m256 data_double = set2x128(data_m128); - m256 data_select_mask = loadu256(ri->data_select_mask); - - u32 valid_path_mask = 0; - m256 expand_valid; - if (unlikely(!(valid_data_mask & 1))) { - DEBUG_PRINTF("lose part of backward data\n"); - DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); - - u64a expand_mask = 0x8080808080808080ULL; - u64a valid_lo = expand64(valid_data_mask & 0xff, expand_mask); - u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask); - DEBUG_PRINTF("expand_hi %llx\n", valid_hi); - DEBUG_PRINTF("expand_lo %llx\n", valid_lo); - expand_valid = set64x4(valid_hi, valid_lo, valid_hi, - valid_lo); - valid_path_mask = ~movemask256(pshufb_m256(expand_valid, - data_select_mask)); - } - - m256 data = pshufb_m256(data_double, data_select_mask); - m256 hi_mask = loadu2x128(ri->hi_mask); - m256 lo_mask = loadu2x128(ri->lo_mask); - m256 bucket_select_mask = loadu256(ri->bucket_select_mask); - - u32 hi_bits_mask = ri->hi_bits_mask; - u32 lo_bits_mask = ri->lo_bits_mask; - u32 neg_mask = ri->neg_mask; - - if (validateMultipathShuftiMask32x8(data, hi_mask, lo_mask, - bucket_select_mask, - hi_bits_mask, lo_bits_mask, - neg_mask, valid_path_mask)) { - DEBUG_PRINTF("check multi-path shufti-32x8 successfully\n"); - return 1; - } else { - return 0; - } -} - -static never_inline -int roseCheckMultipathShufti32x16(const struct hs_scratch *scratch, - const struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_32x16 *ri, - u64a end) { - const struct core_info *ci = &scratch->core_info; - const s64a base_offset = (s64a)end - ci->buf_offset; - s32 checkOffset = ri->base_offset; - s64a offset = base_offset + checkOffset; - DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); - DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); - - assert(ri->last_start <= 0); - if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { - if ((u64a)(0 - ri->last_start) > end) { - DEBUG_PRINTF("too early, fail\n"); - return 0; - } - } - - u32 valid_data_mask; - m128 data_m128 = getData128(ci, offset, &valid_data_mask); - m256 data_double = set2x128(data_m128); - m256 data_select_mask = loadu256(ri->data_select_mask); - - u32 valid_path_mask = 0; - m256 expand_valid; - if (unlikely(!(valid_data_mask & 1))) { - DEBUG_PRINTF("lose part of backward data\n"); - DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); - - u64a expand_mask = 0x8080808080808080ULL; - u64a valid_lo = expand64(valid_data_mask & 0xff, expand_mask); - u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask); - DEBUG_PRINTF("expand_hi %llx\n", valid_hi); - DEBUG_PRINTF("expand_lo %llx\n", valid_lo); - expand_valid = set64x4(valid_hi, valid_lo, valid_hi, - valid_lo); - valid_path_mask = ~movemask256(pshufb_m256(expand_valid, - data_select_mask)); - } - - m256 data = pshufb_m256(data_double, data_select_mask); - - m256 hi_mask_1 = loadu2x128(ri->hi_mask); - m256 hi_mask_2 = loadu2x128(ri->hi_mask + 16); - m256 lo_mask_1 = loadu2x128(ri->lo_mask); - m256 lo_mask_2 = loadu2x128(ri->lo_mask + 16); - - m256 bucket_select_mask_hi = loadu256(ri->bucket_select_mask_hi); - m256 bucket_select_mask_lo = loadu256(ri->bucket_select_mask_lo); - - u32 hi_bits_mask = ri->hi_bits_mask; - u32 lo_bits_mask = ri->lo_bits_mask; - u32 neg_mask = ri->neg_mask; - - if (validateMultipathShuftiMask32x16(data, hi_mask_1, hi_mask_2, - lo_mask_1, lo_mask_2, - bucket_select_mask_hi, - bucket_select_mask_lo, - hi_bits_mask, lo_bits_mask, - neg_mask, valid_path_mask)) { - DEBUG_PRINTF("check multi-path shufti-32x16 successfully\n"); - return 1; - } else { - return 0; - } -} - -static never_inline -int roseCheckMultipathShufti64(const struct hs_scratch *scratch, - const struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_64 *ri, - u64a end) { - const struct core_info *ci = &scratch->core_info; - const s64a base_offset = (s64a)end - ci->buf_offset; - s32 checkOffset = ri->base_offset; - s64a offset = base_offset + checkOffset; - DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); - DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); - - if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { - if ((u64a)(0 - ri->last_start) > end) { - DEBUG_PRINTF("too early, fail\n"); - return 0; - } - } - - u32 valid_data_mask; - m128 data_m128 = getData128(ci, offset, &valid_data_mask); - m256 data_m256 = set2x128(data_m128); - m256 data_select_mask_1 = loadu256(ri->data_select_mask); - m256 data_select_mask_2 = loadu256(ri->data_select_mask + 32); - - u64a valid_path_mask = 0; - m256 expand_valid; - if (unlikely(!(valid_data_mask & 1))) { - DEBUG_PRINTF("lose part of backward data\n"); - DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); - - u64a expand_mask = 0x8080808080808080ULL; - u64a valid_lo = expand64(valid_data_mask & 0xff, expand_mask); - u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask); - DEBUG_PRINTF("expand_hi %llx\n", valid_hi); - DEBUG_PRINTF("expand_lo %llx\n", valid_lo); - expand_valid = set64x4(valid_hi, valid_lo, valid_hi, - valid_lo); - u32 valid_path_1 = movemask256(pshufb_m256(expand_valid, - data_select_mask_1)); - u32 valid_path_2 = movemask256(pshufb_m256(expand_valid, - data_select_mask_2)); - valid_path_mask = ~((u64a)valid_path_1 | (u64a)valid_path_2 << 32); - } - - m256 data_1 = pshufb_m256(data_m256, data_select_mask_1); - m256 data_2 = pshufb_m256(data_m256, data_select_mask_2); - - m256 hi_mask = loadu2x128(ri->hi_mask); - m256 lo_mask = loadu2x128(ri->lo_mask); - - m256 bucket_select_mask_1 = loadu256(ri->bucket_select_mask); - m256 bucket_select_mask_2 = loadu256(ri->bucket_select_mask + 32); - - u64a hi_bits_mask = ri->hi_bits_mask; - u64a lo_bits_mask = ri->lo_bits_mask; - u64a neg_mask = ri->neg_mask; - - if (validateMultipathShuftiMask64(data_1, data_2, hi_mask, lo_mask, - bucket_select_mask_1, - bucket_select_mask_2, hi_bits_mask, - lo_bits_mask, neg_mask, - valid_path_mask)) { - DEBUG_PRINTF("check multi-path shufti-64 successfully\n"); - return 1; - } else { - return 0; - } -} - -int roseNfaEarliestSom(u64a start, u64a end, ReportID id, void *context); - -static rose_inline -u64a roseGetHaigSom(const struct RoseEngine *t, struct hs_scratch *scratch, - const u32 qi, UNUSED const u32 leftfixLag) { - u32 ri = queueToLeftIndex(t, qi); - - UNUSED const struct LeftNfaInfo *left = getLeftTable(t) + ri; - - DEBUG_PRINTF("testing %s prefix %u/%u with lag %u (maxLag=%u)\n", - left->transient ? "transient" : "active", ri, qi, - leftfixLag, left->maxLag); - - assert(leftfixLag <= left->maxLag); - - struct mq *q = scratch->queues + qi; - - u64a start = ~0ULL; - - /* switch the callback + context for a fun one */ - q->cb = roseNfaEarliestSom; - q->context = &start; - - nfaReportCurrentMatches(q->nfa, q); - - /* restore the old callback + context */ - q->cb = roseNfaAdaptor; - q->context = NULL; - DEBUG_PRINTF("earliest som is %llu\n", start); - return start; -} - -static rose_inline -char roseCheckBounds(u64a end, u64a min_bound, u64a max_bound) { - DEBUG_PRINTF("check offset=%llu against bounds [%llu,%llu]\n", end, - min_bound, max_bound); - assert(min_bound <= max_bound); - return end >= min_bound && end <= max_bound; -} - -static rose_inline -hwlmcb_rv_t roseEnginesEod(const struct RoseEngine *rose, - struct hs_scratch *scratch, u64a offset, - u32 iter_offset) { - const char is_streaming = rose->mode != HS_MODE_BLOCK; - - /* data, len is used for state decompress, should be full available data */ - u8 key = 0; - if (is_streaming) { - const u8 *eod_data = scratch->core_info.hbuf; - size_t eod_len = scratch->core_info.hlen; - key = eod_len ? eod_data[eod_len - 1] : 0; - } - - const u8 *aa = getActiveLeafArray(rose, scratch->core_info.state); - const u32 aaCount = rose->activeArrayCount; - const u32 qCount = rose->queueCount; - struct fatbit *aqa = scratch->aqa; - - const struct mmbit_sparse_iter *it = getByOffset(rose, iter_offset); - assert(ISALIGNED(it)); - - u32 idx = 0; - struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; - - for (u32 qi = mmbit_sparse_iter_begin(aa, aaCount, &idx, it, si_state); - qi != MMB_INVALID; - qi = mmbit_sparse_iter_next(aa, aaCount, qi, &idx, it, si_state)) { - DEBUG_PRINTF("checking nfa %u\n", qi); - struct mq *q = scratch->queues + qi; - if (!fatbit_set(aqa, qCount, qi)) { - initQueue(q, qi, rose, scratch); - } - - assert(q->nfa == getNfaByQueue(rose, qi)); - assert(nfaAcceptsEod(q->nfa)); - - if (is_streaming) { - // Decompress stream state. - nfaExpandState(q->nfa, q->state, q->streamState, offset, key); - } - - if (nfaCheckFinalState(q->nfa, q->state, q->streamState, offset, - roseReportAdaptor, - scratch) == MO_HALT_MATCHING) { - DEBUG_PRINTF("user instructed us to stop\n"); - return HWLM_TERMINATE_MATCHING; - } - } - - return HWLM_CONTINUE_MATCHING; -} - -static rose_inline -hwlmcb_rv_t roseSuffixesEod(const struct RoseEngine *rose, - struct hs_scratch *scratch, u64a offset) { - const u8 *aa = getActiveLeafArray(rose, scratch->core_info.state); - const u32 aaCount = rose->activeArrayCount; - - for (u32 qi = mmbit_iterate(aa, aaCount, MMB_INVALID); qi != MMB_INVALID; - qi = mmbit_iterate(aa, aaCount, qi)) { - DEBUG_PRINTF("checking nfa %u\n", qi); - struct mq *q = scratch->queues + qi; - assert(q->nfa == getNfaByQueue(rose, qi)); - assert(nfaAcceptsEod(q->nfa)); - - /* We have just been triggered. */ - assert(fatbit_isset(scratch->aqa, rose->queueCount, qi)); - - pushQueueNoMerge(q, MQE_END, scratch->core_info.len); - q->context = NULL; - - /* rose exec is used as we don't want to / can't raise matches in the - * history buffer. */ - if (!nfaQueueExecRose(q->nfa, q, MO_INVALID_IDX)) { - DEBUG_PRINTF("nfa is dead\n"); - continue; - } - if (nfaCheckFinalState(q->nfa, q->state, q->streamState, offset, - roseReportAdaptor, - scratch) == MO_HALT_MATCHING) { - DEBUG_PRINTF("user instructed us to stop\n"); - return HWLM_TERMINATE_MATCHING; - } - } - return HWLM_CONTINUE_MATCHING; -} - -static rose_inline -hwlmcb_rv_t roseMatcherEod(const struct RoseEngine *rose, - struct hs_scratch *scratch, u64a offset) { - assert(rose->ematcherOffset); - assert(rose->ematcherRegionSize); - - // Clear role state and active engines, since we have already handled all - // outstanding work there. - DEBUG_PRINTF("clear role state and active leaf array\n"); - char *state = scratch->core_info.state; - mmbit_clear(getRoleState(state), rose->rolesWithStateCount); - mmbit_clear(getActiveLeafArray(rose, state), rose->activeArrayCount); - - const char is_streaming = rose->mode != HS_MODE_BLOCK; - - size_t eod_len; - const u8 *eod_data; - if (!is_streaming) { /* Block */ - eod_data = scratch->core_info.buf; - eod_len = scratch->core_info.len; - } else { /* Streaming */ - eod_len = scratch->core_info.hlen; - eod_data = scratch->core_info.hbuf; - } - - assert(eod_data); - assert(eod_len); - - DEBUG_PRINTF("%zu bytes of eod data to scan at offset %llu\n", eod_len, - offset); - - // If we don't have enough bytes to produce a match from an EOD table scan, - // there's no point scanning. - if (eod_len < rose->eodmatcherMinWidth) { - DEBUG_PRINTF("too short for min width %u\n", rose->eodmatcherMinWidth); - return HWLM_CONTINUE_MATCHING; - } - - // Ensure that we only need scan the last N bytes, where N is the length of - // the eod-anchored matcher region. - size_t adj = eod_len - MIN(eod_len, rose->ematcherRegionSize); - - const struct HWLM *etable = getByOffset(rose, rose->ematcherOffset); - hwlmExec(etable, eod_data, eod_len, adj, roseCallback, scratch, - scratch->tctxt.groups); - - // We may need to fire delayed matches. - if (cleanUpDelayed(rose, scratch, 0, offset) == HWLM_TERMINATE_MATCHING) { - DEBUG_PRINTF("user instructed us to stop\n"); - return HWLM_TERMINATE_MATCHING; - } - - roseFlushLastByteHistory(rose, scratch, offset); - return HWLM_CONTINUE_MATCHING; -} - -static rose_inline -int roseCheckLongLiteral(const struct RoseEngine *t, - const struct hs_scratch *scratch, u64a end, - u32 lit_offset, u32 lit_length, char nocase) { - const struct core_info *ci = &scratch->core_info; - const u8 *lit = getByOffset(t, lit_offset); - - DEBUG_PRINTF("check lit at %llu, length %u\n", end, lit_length); - DEBUG_PRINTF("base buf_offset=%llu\n", ci->buf_offset); - - if (end < lit_length) { - DEBUG_PRINTF("too short!\n"); - return 0; - } - - // If any portion of the literal matched in the current buffer, check it. - if (end > ci->buf_offset) { - u32 scan_len = MIN(end - ci->buf_offset, lit_length); - u64a scan_start = end - ci->buf_offset - scan_len; - DEBUG_PRINTF("checking suffix (%u bytes) in buf[%llu:%llu]\n", scan_len, - scan_start, end); - if (cmpForward(ci->buf + scan_start, lit + lit_length - scan_len, - scan_len, nocase)) { - DEBUG_PRINTF("cmp of suffix failed\n"); - return 0; - } - } - - // If the entirety of the literal was in the current block, we are done. - if (end - lit_length >= ci->buf_offset) { - DEBUG_PRINTF("literal confirmed in current block\n"); - return 1; - } - - // We still have a prefix which we must test against the buffer prepared by - // the long literal table. This is only done in streaming mode. - - assert(t->mode != HS_MODE_BLOCK); - - const u8 *ll_buf; - size_t ll_len; - if (nocase) { - ll_buf = scratch->tctxt.ll_buf_nocase; - ll_len = scratch->tctxt.ll_len_nocase; - } else { - ll_buf = scratch->tctxt.ll_buf; - ll_len = scratch->tctxt.ll_len; - } - - assert(ll_buf); - - u64a lit_start_offset = end - lit_length; - u32 prefix_len = MIN(lit_length, ci->buf_offset - lit_start_offset); - u32 hist_rewind = ci->buf_offset - lit_start_offset; - DEBUG_PRINTF("ll_len=%zu, hist_rewind=%u\n", ll_len, hist_rewind); - if (hist_rewind > ll_len) { - DEBUG_PRINTF("not enough history\n"); - return 0; - } - - DEBUG_PRINTF("check prefix len=%u from hist (len %zu, rewind %u)\n", - prefix_len, ll_len, hist_rewind); - assert(hist_rewind <= ll_len); - if (cmpForward(ll_buf + ll_len - hist_rewind, lit, prefix_len, nocase)) { - DEBUG_PRINTF("cmp of prefix failed\n"); - return 0; - } - - DEBUG_PRINTF("cmp succeeded\n"); - return 1; -} - -static rose_inline -int roseCheckMediumLiteral(const struct RoseEngine *t, - const struct hs_scratch *scratch, u64a end, - u32 lit_offset, u32 lit_length, char nocase) { - const struct core_info *ci = &scratch->core_info; - const u8 *lit = getByOffset(t, lit_offset); - - DEBUG_PRINTF("check lit at %llu, length %u\n", end, lit_length); - DEBUG_PRINTF("base buf_offset=%llu\n", ci->buf_offset); - - if (end < lit_length) { - DEBUG_PRINTF("too short!\n"); - return 0; - } - - // If any portion of the literal matched in the current buffer, check it. - if (end > ci->buf_offset) { - u32 scan_len = MIN(end - ci->buf_offset, lit_length); - u64a scan_start = end - ci->buf_offset - scan_len; - DEBUG_PRINTF("checking suffix (%u bytes) in buf[%llu:%llu]\n", scan_len, - scan_start, end); - if (cmpForward(ci->buf + scan_start, lit + lit_length - scan_len, - scan_len, nocase)) { - DEBUG_PRINTF("cmp of suffix failed\n"); - return 0; - } - } - - // If the entirety of the literal was in the current block, we are done. - if (end - lit_length >= ci->buf_offset) { - DEBUG_PRINTF("literal confirmed in current block\n"); - return 1; - } - - // We still have a prefix which we must test against the history buffer. - assert(t->mode != HS_MODE_BLOCK); - - u64a lit_start_offset = end - lit_length; - u32 prefix_len = MIN(lit_length, ci->buf_offset - lit_start_offset); - u32 hist_rewind = ci->buf_offset - lit_start_offset; - DEBUG_PRINTF("hlen=%zu, hist_rewind=%u\n", ci->hlen, hist_rewind); - - // History length check required for confirm in the EOD and delayed - // rebuild paths. - if (hist_rewind > ci->hlen) { - DEBUG_PRINTF("not enough history\n"); - return 0; - } - - DEBUG_PRINTF("check prefix len=%u from hist (len %zu, rewind %u)\n", - prefix_len, ci->hlen, hist_rewind); - assert(hist_rewind <= ci->hlen); - if (cmpForward(ci->hbuf + ci->hlen - hist_rewind, lit, prefix_len, - nocase)) { - DEBUG_PRINTF("cmp of prefix failed\n"); - return 0; - } - - DEBUG_PRINTF("cmp succeeded\n"); - return 1; -} - -static -void updateSeqPoint(struct RoseContext *tctxt, u64a offset, - const char from_mpv) { - if (from_mpv) { - updateMinMatchOffsetFromMpv(tctxt, offset); - } else { - updateMinMatchOffset(tctxt, offset); - } -} - -static rose_inline -hwlmcb_rv_t flushActiveCombinations(const struct RoseEngine *t, - struct hs_scratch *scratch) { - u8 *cvec = (u8 *)scratch->core_info.combVector; - if (!mmbit_any(cvec, t->ckeyCount)) { - return HWLM_CONTINUE_MATCHING; - } - u64a end = scratch->tctxt.lastCombMatchOffset; - for (u32 i = mmbit_iterate(cvec, t->ckeyCount, MMB_INVALID); - i != MMB_INVALID; i = mmbit_iterate(cvec, t->ckeyCount, i)) { - const struct CombInfo *combInfoMap = (const struct CombInfo *) - ((const char *)t + t->combInfoMapOffset); - const struct CombInfo *ci = combInfoMap + i; - if ((ci->min_offset != 0) && (end < ci->min_offset)) { - DEBUG_PRINTF("halt: before min_offset=%llu\n", ci->min_offset); - continue; - } - if ((ci->max_offset != MAX_OFFSET) && (end > ci->max_offset)) { - DEBUG_PRINTF("halt: after max_offset=%llu\n", ci->max_offset); - continue; - } - - DEBUG_PRINTF("check ekey %u\n", ci->ekey); - if (ci->ekey != INVALID_EKEY) { - assert(ci->ekey < t->ekeyCount); - const char *evec = scratch->core_info.exhaustionVector; - if (isExhausted(t, evec, ci->ekey)) { - DEBUG_PRINTF("ekey %u already set, match is exhausted\n", - ci->ekey); - continue; - } - } - - DEBUG_PRINTF("check ckey %u\n", i); - char *lvec = scratch->core_info.logicalVector; - if (!isLogicalCombination(t, lvec, ci->start, ci->result)) { - DEBUG_PRINTF("Logical Combination Failed!\n"); - continue; - } - - DEBUG_PRINTF("Logical Combination Passed!\n"); - if (roseReport(t, scratch, end, ci->id, 0, - ci->ekey) == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - } - clearCvec(t, (char *)cvec); - return HWLM_CONTINUE_MATCHING; -} - -#define PROGRAM_CASE(name) \ - case ROSE_INSTR_##name: { \ - DEBUG_PRINTF("instruction: " #name " (pc=%u)\n", \ - programOffset + (u32)(pc - pc_base)); \ - const struct ROSE_STRUCT_##name *ri = \ - (const struct ROSE_STRUCT_##name *)pc; - -#define PROGRAM_NEXT_INSTRUCTION \ - pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \ - break; \ - } - -static rose_inline -hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, +hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t, struct hs_scratch *scratch, u32 programOffset, - u64a som, u64a end, u8 prog_flags) { - DEBUG_PRINTF("program=%u, offsets [%llu,%llu], flags=%u\n", programOffset, - som, end, prog_flags); - - assert(programOffset != ROSE_INVALID_PROG_OFFSET); - assert(programOffset >= sizeof(struct RoseEngine)); - assert(programOffset < t->size); - - const char in_anchored = prog_flags & ROSE_PROG_FLAG_IN_ANCHORED; - const char in_catchup = prog_flags & ROSE_PROG_FLAG_IN_CATCHUP; - const char from_mpv = prog_flags & ROSE_PROG_FLAG_FROM_MPV; - const char skip_mpv_catchup = prog_flags & ROSE_PROG_FLAG_SKIP_MPV_CATCHUP; - - const char *pc_base = getByOffset(t, programOffset); - const char *pc = pc_base; - - // Local sparse iterator state for programs that use the SPARSE_ITER_BEGIN - // and SPARSE_ITER_NEXT instructions. - struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; - - // If this program has an effect, work_done will be set to one (which may - // allow the program to squash groups). - int work_done = 0; - - struct RoseContext *tctxt = &scratch->tctxt; - - assert(*(const u8 *)pc != ROSE_INSTR_END); - - for (;;) { - assert(ISALIGNED_N(pc, ROSE_INSTR_MIN_ALIGN)); - assert(pc >= pc_base); - assert((size_t)(pc - pc_base) < t->size); - const u8 code = *(const u8 *)pc; - assert(code <= LAST_ROSE_INSTRUCTION); - - switch ((enum RoseInstructionCode)code) { - PROGRAM_CASE(END) { - DEBUG_PRINTF("finished\n"); - return HWLM_CONTINUE_MATCHING; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(ANCHORED_DELAY) { - if (in_anchored && end > t->floatingMinLiteralMatchOffset) { - DEBUG_PRINTF("delay until playback\n"); - tctxt->groups |= ri->groups; - work_done = 1; - recordAnchoredLiteralMatch(t, scratch, ri->anch_id, end); - - assert(ri->done_jump); // must progress - pc += ri->done_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_LIT_EARLY) { - if (end < ri->min_offset) { - DEBUG_PRINTF("halt: before min_offset=%u\n", - ri->min_offset); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_GROUPS) { - DEBUG_PRINTF("groups=0x%llx, checking instr groups=0x%llx\n", - tctxt->groups, ri->groups); - if (!(ri->groups & tctxt->groups)) { - DEBUG_PRINTF("halt: no groups are set\n"); - return HWLM_CONTINUE_MATCHING; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_ONLY_EOD) { - struct core_info *ci = &scratch->core_info; - if (end != ci->buf_offset + ci->len) { - DEBUG_PRINTF("should only match at end of data\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_BOUNDS) { - if (!roseCheckBounds(end, ri->min_bound, ri->max_bound)) { - DEBUG_PRINTF("failed bounds check\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_NOT_HANDLED) { - struct fatbit *handled = scratch->handled_roles; - if (fatbit_set(handled, t->handledKeyCount, ri->key)) { - DEBUG_PRINTF("key %u already set\n", ri->key); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_SINGLE_LOOKAROUND) { - if (!roseCheckSingleLookaround(t, scratch, ri->offset, - ri->reach_index, end)) { - DEBUG_PRINTF("failed lookaround check\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_LOOKAROUND) { - if (!roseCheckLookaround(t, scratch, ri->look_index, - ri->reach_index, ri->count, end)) { - DEBUG_PRINTF("failed lookaround check\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_MASK) { - struct core_info *ci = &scratch->core_info; - if (!roseCheckMask(ci, ri->and_mask, ri->cmp_mask, - ri->neg_mask, ri->offset, end)) { - DEBUG_PRINTF("failed mask check\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_MASK_32) { - struct core_info *ci = &scratch->core_info; - if (!roseCheckMask32(ci, ri->and_mask, ri->cmp_mask, - ri->neg_mask, ri->offset, end)) { - assert(ri->fail_jump); - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_BYTE) { - const struct core_info *ci = &scratch->core_info; - if (!roseCheckByte(ci, ri->and_mask, ri->cmp_mask, - ri->negation, ri->offset, end)) { - DEBUG_PRINTF("failed byte check\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_SHUFTI_16x8) { - const struct core_info *ci = &scratch->core_info; - if (!roseCheckShufti16x8(ci, ri->nib_mask, - ri->bucket_select_mask, - ri->neg_mask, ri->offset, end)) { - assert(ri->fail_jump); - pc += ri-> fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_SHUFTI_32x8) { - const struct core_info *ci = &scratch->core_info; - if (!roseCheckShufti32x8(ci, ri->hi_mask, ri->lo_mask, - ri->bucket_select_mask, - ri->neg_mask, ri->offset, end)) { - assert(ri->fail_jump); - pc += ri-> fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_SHUFTI_16x16) { - const struct core_info *ci = &scratch->core_info; - if (!roseCheckShufti16x16(ci, ri->hi_mask, ri->lo_mask, - ri->bucket_select_mask, - ri->neg_mask, ri->offset, end)) { - assert(ri->fail_jump); - pc += ri-> fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_SHUFTI_32x16) { - const struct core_info *ci = &scratch->core_info; - if (!roseCheckShufti32x16(ci, ri->hi_mask, ri->lo_mask, - ri->bucket_select_mask_hi, - ri->bucket_select_mask_lo, - ri->neg_mask, ri->offset, end)) { - assert(ri->fail_jump); - pc += ri-> fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_INFIX) { - if (!roseTestInfix(t, scratch, ri->queue, ri->lag, ri->report, - end)) { - DEBUG_PRINTF("failed infix check\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_PREFIX) { - if (!roseTestPrefix(t, scratch, ri->queue, ri->lag, ri->report, - end)) { - DEBUG_PRINTF("failed prefix check\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(PUSH_DELAYED) { - rosePushDelayedMatch(t, scratch, ri->delay, ri->index, end); - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(DUMMY_NOP) { - assert(0); - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CATCH_UP) { - if (roseCatchUpTo(t, scratch, end) == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CATCH_UP_MPV) { - if (from_mpv || skip_mpv_catchup) { - DEBUG_PRINTF("skipping mpv catchup\n"); - } else if (roseCatchUpMPV(t, - end - scratch->core_info.buf_offset, - scratch) == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SOM_ADJUST) { - assert(ri->distance <= end); - som = end - ri->distance; - DEBUG_PRINTF("som is (end - %u) = %llu\n", ri->distance, som); - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SOM_LEFTFIX) { - som = roseGetHaigSom(t, scratch, ri->queue, ri->lag); - DEBUG_PRINTF("som from leftfix is %llu\n", som); - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SOM_FROM_REPORT) { - som = handleSomExternal(scratch, &ri->som, end); - DEBUG_PRINTF("som from report %u is %llu\n", ri->som.onmatch, - som); - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SOM_ZERO) { - DEBUG_PRINTF("setting SOM to zero\n"); - som = 0; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(TRIGGER_INFIX) { - roseTriggerInfix(t, scratch, som, end, ri->queue, ri->event, - ri->cancel); - work_done = 1; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(TRIGGER_SUFFIX) { - if (roseTriggerSuffix(t, scratch, ri->queue, ri->event, som, - end) == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - work_done = 1; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(DEDUPE) { - updateSeqPoint(tctxt, end, from_mpv); - const char do_som = t->hasSom; // TODO: constant propagate - const char is_external_report = 1; - enum DedupeResult rv = - dedupeCatchup(t, scratch, end, som, end + ri->offset_adjust, - ri->dkey, ri->offset_adjust, - is_external_report, ri->quash_som, do_som); - switch (rv) { - case DEDUPE_HALT: - return HWLM_TERMINATE_MATCHING; - case DEDUPE_SKIP: - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - case DEDUPE_CONTINUE: - break; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(DEDUPE_SOM) { - updateSeqPoint(tctxt, end, from_mpv); - const char is_external_report = 0; - const char do_som = 1; - enum DedupeResult rv = - dedupeCatchup(t, scratch, end, som, end + ri->offset_adjust, - ri->dkey, ri->offset_adjust, - is_external_report, ri->quash_som, do_som); - switch (rv) { - case DEDUPE_HALT: - return HWLM_TERMINATE_MATCHING; - case DEDUPE_SKIP: - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - case DEDUPE_CONTINUE: - break; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(REPORT_CHAIN) { - // Note: sequence points updated inside this function. - if (roseCatchUpAndHandleChainMatch( - t, scratch, ri->event, ri->top_squash_distance, end, - in_catchup) == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - work_done = 1; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(REPORT_SOM_INT) { - updateSeqPoint(tctxt, end, from_mpv); - roseHandleSom(scratch, &ri->som, end); - work_done = 1; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(REPORT_SOM_AWARE) { - updateSeqPoint(tctxt, end, from_mpv); - roseHandleSomSom(scratch, &ri->som, som, end); - work_done = 1; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(REPORT) { - updateSeqPoint(tctxt, end, from_mpv); - if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust, - INVALID_EKEY) == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - work_done = 1; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(REPORT_EXHAUST) { - updateSeqPoint(tctxt, end, from_mpv); - if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust, - ri->ekey) == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - work_done = 1; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(REPORT_SOM) { - updateSeqPoint(tctxt, end, from_mpv); - if (roseReportSom(t, scratch, som, end, ri->onmatch, - ri->offset_adjust, - INVALID_EKEY) == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - work_done = 1; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(REPORT_SOM_EXHAUST) { - updateSeqPoint(tctxt, end, from_mpv); - if (roseReportSom(t, scratch, som, end, ri->onmatch, - ri->offset_adjust, - ri->ekey) == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - work_done = 1; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(DEDUPE_AND_REPORT) { - updateSeqPoint(tctxt, end, from_mpv); - const char do_som = t->hasSom; // TODO: constant propagate - const char is_external_report = 1; - enum DedupeResult rv = - dedupeCatchup(t, scratch, end, som, end + ri->offset_adjust, - ri->dkey, ri->offset_adjust, - is_external_report, ri->quash_som, do_som); - switch (rv) { - case DEDUPE_HALT: - return HWLM_TERMINATE_MATCHING; - case DEDUPE_SKIP: - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - case DEDUPE_CONTINUE: - break; - } - - const u32 ekey = INVALID_EKEY; - if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust, - ekey) == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - work_done = 1; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(FINAL_REPORT) { - updateSeqPoint(tctxt, end, from_mpv); - if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust, - INVALID_EKEY) == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - /* One-shot specialisation: this instruction always terminates - * execution of the program. */ - return HWLM_CONTINUE_MATCHING; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_EXHAUSTED) { - DEBUG_PRINTF("check ekey %u\n", ri->ekey); - assert(ri->ekey != INVALID_EKEY); - assert(ri->ekey < t->ekeyCount); - const char *evec = scratch->core_info.exhaustionVector; - if (isExhausted(t, evec, ri->ekey)) { - DEBUG_PRINTF("ekey %u already set, match is exhausted\n", - ri->ekey); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_MIN_LENGTH) { - DEBUG_PRINTF("check min length %llu (adj %d)\n", ri->min_length, - ri->end_adj); - assert(ri->min_length > 0); - assert(ri->end_adj == 0 || ri->end_adj == -1); - assert(som == HS_OFFSET_PAST_HORIZON || som <= end); - if (som != HS_OFFSET_PAST_HORIZON && - ((end + ri->end_adj) - som < ri->min_length)) { - DEBUG_PRINTF("failed check, match len %llu\n", - (u64a)((end + ri->end_adj) - som)); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SET_STATE) { - DEBUG_PRINTF("set state index %u\n", ri->index); - mmbit_set(getRoleState(scratch->core_info.state), - t->rolesWithStateCount, ri->index); - work_done = 1; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SET_GROUPS) { - tctxt->groups |= ri->groups; - DEBUG_PRINTF("set groups 0x%llx -> 0x%llx\n", ri->groups, - tctxt->groups); - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SQUASH_GROUPS) { - assert(popcount64(ri->groups) == 63); // Squash only one group. - if (work_done) { - tctxt->groups &= ri->groups; - DEBUG_PRINTF("squash groups 0x%llx -> 0x%llx\n", ri->groups, - tctxt->groups); - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_STATE) { - DEBUG_PRINTF("check state %u\n", ri->index); - const u8 *roles = getRoleState(scratch->core_info.state); - if (!mmbit_isset(roles, t->rolesWithStateCount, ri->index)) { - DEBUG_PRINTF("state not on\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SPARSE_ITER_BEGIN) { - DEBUG_PRINTF("iter_offset=%u\n", ri->iter_offset); - const struct mmbit_sparse_iter *it = - getByOffset(t, ri->iter_offset); - assert(ISALIGNED(it)); - - const u8 *roles = getRoleState(scratch->core_info.state); - - u32 idx = 0; - u32 i = mmbit_sparse_iter_begin(roles, t->rolesWithStateCount, - &idx, it, si_state); - if (i == MMB_INVALID) { - DEBUG_PRINTF("no states in sparse iter are on\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - - fatbit_clear(scratch->handled_roles); - - const u32 *jumps = getByOffset(t, ri->jump_table); - DEBUG_PRINTF("state %u (idx=%u) is on, jump to %u\n", i, idx, - jumps[idx]); - pc = pc_base + jumps[idx]; - continue; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SPARSE_ITER_NEXT) { - DEBUG_PRINTF("iter_offset=%u, state=%u\n", ri->iter_offset, - ri->state); - const struct mmbit_sparse_iter *it = - getByOffset(t, ri->iter_offset); - assert(ISALIGNED(it)); - - const u8 *roles = getRoleState(scratch->core_info.state); - - u32 idx = 0; - u32 i = mmbit_sparse_iter_next(roles, t->rolesWithStateCount, - ri->state, &idx, it, si_state); - if (i == MMB_INVALID) { - DEBUG_PRINTF("no more states in sparse iter are on\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - - const u32 *jumps = getByOffset(t, ri->jump_table); - DEBUG_PRINTF("state %u (idx=%u) is on, jump to %u\n", i, idx, - jumps[idx]); - pc = pc_base + jumps[idx]; - continue; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SPARSE_ITER_ANY) { - DEBUG_PRINTF("iter_offset=%u\n", ri->iter_offset); - const struct mmbit_sparse_iter *it = - getByOffset(t, ri->iter_offset); - assert(ISALIGNED(it)); - - const u8 *roles = getRoleState(scratch->core_info.state); - - u32 idx = 0; - u32 i = mmbit_sparse_iter_begin(roles, t->rolesWithStateCount, - &idx, it, si_state); - if (i == MMB_INVALID) { - DEBUG_PRINTF("no states in sparse iter are on\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - DEBUG_PRINTF("state %u (idx=%u) is on\n", i, idx); - fatbit_clear(scratch->handled_roles); - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(ENGINES_EOD) { - if (roseEnginesEod(t, scratch, end, ri->iter_offset) == - HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SUFFIXES_EOD) { - if (roseSuffixesEod(t, scratch, end) == - HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(MATCHER_EOD) { - if (roseMatcherEod(t, scratch, end) == - HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_LONG_LIT) { - const char nocase = 0; - if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset, - ri->lit_length, nocase)) { - DEBUG_PRINTF("failed long lit check\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_LONG_LIT_NOCASE) { - const char nocase = 1; - if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset, - ri->lit_length, nocase)) { - DEBUG_PRINTF("failed nocase long lit check\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_MED_LIT) { - const char nocase = 0; - if (!roseCheckMediumLiteral(t, scratch, end, ri->lit_offset, - ri->lit_length, nocase)) { - DEBUG_PRINTF("failed lit check\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_MED_LIT_NOCASE) { - const char nocase = 1; - if (!roseCheckMediumLiteral(t, scratch, end, ri->lit_offset, - ri->lit_length, nocase)) { - DEBUG_PRINTF("failed long lit check\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CLEAR_WORK_DONE) { - DEBUG_PRINTF("clear work_done flag\n"); - work_done = 0; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(MULTIPATH_LOOKAROUND) { - if (!roseMultipathLookaround(t, scratch, ri->look_index, - ri->reach_index, ri->count, - ri->last_start, ri->start_mask, - end)) { - DEBUG_PRINTF("failed multi-path lookaround check\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_16x8) { - if (!roseCheckMultipathShufti16x8(scratch, ri, end)) { - DEBUG_PRINTF("failed multi-path shufti 16x8 check\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_32x8) { - if (!roseCheckMultipathShufti32x8(scratch, ri, end)) { - DEBUG_PRINTF("failed multi-path shufti 32x8 check\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_32x16) { - if (!roseCheckMultipathShufti32x16(scratch, ri, end)) { - DEBUG_PRINTF("failed multi-path shufti 32x16 check\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_64) { - if (!roseCheckMultipathShufti64(scratch, ri, end)) { - DEBUG_PRINTF("failed multi-path shufti 64 check\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(INCLUDED_JUMP) { - if (scratch->fdr_conf) { - // squash the bucket of included literal - u8 shift = scratch->fdr_conf_offset & ~7U; - u64a mask = ((~(u64a)ri->squash) << shift); - *(scratch->fdr_conf) &= mask; - - pc = getByOffset(t, ri->child_offset); - pc_base = pc; - programOffset = (const u8 *)pc_base -(const u8 *)t; - DEBUG_PRINTF("pc_base %p pc %p child_offset %u squash %u\n", - pc_base, pc, ri->child_offset, ri->squash); - work_done = 0; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SET_LOGICAL) { - DEBUG_PRINTF("set logical value of lkey %u, offset_adjust=%d\n", - ri->lkey, ri->offset_adjust); - assert(ri->lkey != INVALID_LKEY); - assert(ri->lkey < t->lkeyCount); - char *lvec = scratch->core_info.logicalVector; - setLogicalVal(t, lvec, ri->lkey, 1); - updateLastCombMatchOffset(tctxt, end + ri->offset_adjust); - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SET_COMBINATION) { - DEBUG_PRINTF("set ckey %u as active\n", ri->ckey); - assert(ri->ckey != INVALID_CKEY); - assert(ri->ckey < t->ckeyCount); - char *cvec = scratch->core_info.combVector; - setCombinationActive(t, cvec, ri->ckey); - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(FLUSH_COMBINATION) { - assert(end >= tctxt->lastCombMatchOffset); - if (end > tctxt->lastCombMatchOffset) { - if (flushActiveCombinations(t, scratch) - == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SET_EXHAUST) { - updateSeqPoint(tctxt, end, from_mpv); - if (roseSetExhaust(t, scratch, ri->ekey) - == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - work_done = 1; - } - PROGRAM_NEXT_INSTRUCTION - } - } - - assert(0); // unreachable - return HWLM_CONTINUE_MATCHING; -} - -#undef PROGRAM_CASE -#undef PROGRAM_NEXT_INSTRUCTION + u64a som, u64a end, u8 prog_flags); #endif // PROGRAM_RUNTIME_H diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 2c0a9b28..0ef20f21 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -2843,9 +2843,34 @@ vector groupByFragment(const RoseBuildImpl &build) { DEBUG_PRINTF("fragment candidate: lit_id=%u %s\n", lit_id, dumpString(lit.s).c_str()); - auto &fi = frag_info[getFragment(lit)]; - fi.lit_ids.push_back(lit_id); - fi.groups |= groups; + + /** 0:/xxabcdefgh/ */ + /** 1:/yyabcdefgh/ */ + /** 2:/yyabcdefgh.+/ */ + // Above 3 patterns should firstly convert into RoseLiteralMap with + // 2 elements ("xxabcdefgh" and "yyabcdefgh"), then convert into + // LitFragment with 1 element ("abcdefgh"). Special care should be + // taken to handle the 'pure' flag during the conversion. + + rose_literal_id lit_frag = getFragment(lit); + auto it = frag_info.find(lit_frag); + if (it != frag_info.end()) { + if (!lit_frag.s.get_pure() && it->first.s.get_pure()) { + struct FragmentInfo f_info = it->second; + f_info.lit_ids.push_back(lit_id); + f_info.groups |= groups; + frag_info.erase(it->first); + frag_info.emplace(lit_frag, f_info); + } else { + it->second.lit_ids.push_back(lit_id); + it->second.groups |= groups; + } + } else { + struct FragmentInfo f_info; + f_info.lit_ids.push_back(lit_id); + f_info.groups |= groups; + frag_info.emplace(lit_frag, f_info); + } } for (auto &m : frag_info) { diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index 0cc5b5c3..2eb7bb51 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -115,9 +115,9 @@ class RoseGraphWriter { public: RoseGraphWriter(const RoseBuildImpl &b_in, const map &frag_map_in, const map &lqm_in, - const map &sqm_in, const RoseEngine *t_in) + const map &sqm_in) : frag_map(frag_map_in), leftfix_queue_map(lqm_in), - suffix_queue_map(sqm_in), build(b_in), t(t_in) { + suffix_queue_map(sqm_in), build(b_in) { for (const auto &m : build.ghost) { ghost.insert(m.second); } @@ -273,7 +273,6 @@ private: const map &leftfix_queue_map; const map &suffix_queue_map; const RoseBuildImpl &build; - const RoseEngine *t; }; } // namespace @@ -313,8 +312,7 @@ void dumpRoseGraph(const RoseBuildImpl &build, const RoseEngine *t, ofstream os(ss.str()); auto frag_map = makeFragMap(fragments); - RoseGraphWriter writer(build, frag_map, leftfix_queue_map, suffix_queue_map, - t); + RoseGraphWriter writer(build, frag_map, leftfix_queue_map, suffix_queue_map); writeGraphviz(os, build.g, writer, get(boost::vertex_index, build.g)); } diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index 900aee6c..fe48da4c 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -340,7 +340,14 @@ public: std::pair insert(const rose_literal_id &lit) { auto it = lits_index.find(lit); if (it != lits_index.end()) { - return {it->second, false}; + u32 idx = it->second; + auto &l = lits.at(idx); + if (!lit.s.get_pure() && l.s.get_pure()) { + lits_index.erase(l); + l.s.unset_pure(); + lits_index.emplace(l, idx); + } + return {idx, false}; } u32 id = verify_u32(lits.size()); lits.push_back(lit); diff --git a/src/rose/rose_build_matchers.cpp b/src/rose/rose_build_matchers.cpp index 2c302a85..8c532cab 100644 --- a/src/rose/rose_build_matchers.cpp +++ b/src/rose/rose_build_matchers.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2017, Intel Corporation + * Copyright (c) 2016-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -727,6 +727,7 @@ void addFragmentLiteral(const RoseBuildImpl &build, MatcherProto &mp, const auto &s_final = lit_final.get_string(); bool nocase = lit_final.any_nocase(); + bool pure = f.s.get_pure(); DEBUG_PRINTF("id=%u, s='%s', nocase=%d, noruns=%d, msk=%s, cmp=%s\n", f.fragment_id, escapeString(s_final).c_str(), (int)nocase, @@ -740,7 +741,7 @@ void addFragmentLiteral(const RoseBuildImpl &build, MatcherProto &mp, const auto &groups = f.groups; mp.lits.emplace_back(move(s_final), nocase, noruns, f.fragment_id, - groups, msk, cmp); + groups, msk, cmp, pure); } static diff --git a/src/rose/stream_long_lit.h b/src/rose/stream_long_lit.h index 0736ec88..34867608 100644 --- a/src/rose/stream_long_lit.h +++ b/src/rose/stream_long_lit.h @@ -33,6 +33,7 @@ #include "rose_common.h" #include "rose_internal.h" #include "stream_long_lit_hash.h" +#include "util/compare.h" #include "util/copybytes.h" static really_inline diff --git a/src/runtime.c b/src/runtime.c index 052449f6..68f1f8a7 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -141,6 +141,7 @@ void populateCoreInfo(struct hs_scratch *s, const struct RoseEngine *rose, s->deduper.current_report_offset = ~0ULL; s->deduper.som_log_dirty = 1; /* som logs have not been cleared */ s->fdr_conf = NULL; + s->pure = 0; // Rose program execution (used for some report paths) depends on these // values being initialised. @@ -445,6 +446,7 @@ done_scan: scratch); } +set_retval: if (rose->flushCombProgramOffset) { if (roseRunFlushCombProgram(rose, scratch, ~0ULL) == MO_HALT_MATCHING) { unmarkScratchInUse(scratch); @@ -452,7 +454,6 @@ done_scan: } } -set_retval: DEBUG_PRINTF("done. told_to_stop_matching=%d\n", told_to_stop_matching(scratch)); hs_error_t rv = told_to_stop_matching(scratch) ? HS_SCAN_TERMINATED @@ -934,12 +935,6 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data, } } - if (rose->flushCombProgramOffset && !told_to_stop_matching(scratch)) { - if (roseRunFlushCombProgram(rose, scratch, ~0ULL) == MO_HALT_MATCHING) { - scratch->core_info.status |= STATUS_TERMINATED; - } - } - setStreamStatus(state, scratch->core_info.status); if (likely(!can_stop_matching(scratch))) { @@ -994,6 +989,13 @@ hs_error_t HS_CDECL hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch, unmarkScratchInUse(scratch); } + if (id->rose->flushCombProgramOffset && !told_to_stop_matching(scratch)) { + if (roseRunFlushCombProgram(id->rose, scratch, ~0ULL) + == MO_HALT_MATCHING) { + scratch->core_info.status |= STATUS_TERMINATED; + } + } + hs_stream_free(id); return HS_SUCCESS; @@ -1019,6 +1021,13 @@ hs_error_t HS_CDECL hs_reset_stream(hs_stream_t *id, UNUSED unsigned int flags, unmarkScratchInUse(scratch); } + if (id->rose->flushCombProgramOffset && !told_to_stop_matching(scratch)) { + if (roseRunFlushCombProgram(id->rose, scratch, ~0ULL) + == MO_HALT_MATCHING) { + scratch->core_info.status |= STATUS_TERMINATED; + } + } + // history already initialised init_stream(id, id->rose, 0); diff --git a/src/scratch.c b/src/scratch.c index 8e082c77..c23b5b3c 100644 --- a/src/scratch.c +++ b/src/scratch.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -137,6 +137,7 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { s->scratchSize = alloc_size; s->scratch_alloc = (char *)s_tmp; s->fdr_conf = NULL; + s->pure = 0; // each of these is at an offset from the previous char *current = (char *)s + sizeof(*s); diff --git a/src/scratch.h b/src/scratch.h index 59aa02c6..dab7bab7 100644 --- a/src/scratch.h +++ b/src/scratch.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -208,6 +208,7 @@ struct ALIGN_CL_DIRECTIVE hs_scratch { u64a *fdr_conf; /**< FDR confirm value */ u8 fdr_conf_offset; /**< offset where FDR/Teddy front end matches * in buffer */ + u8 pure; /**< indicator of pure-literal or cutting-literal */ }; /* array of fatbit ptr; TODO: why not an array of fatbits? */ diff --git a/src/util/ue2string.h b/src/util/ue2string.h index 0fa76c3a..1ce51b2f 100644 --- a/src/util/ue2string.h +++ b/src/util/ue2string.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -211,10 +211,17 @@ public: size_t hash() const; + void set_pure() { pure = true; } + void unset_pure() { pure = false; } + bool get_pure() const { return pure; } + + /* TODO: consider existing member functions possibly related with pure. */ + private: friend const_iterator; std::string s; boost::dynamic_bitset<> nocase; + bool pure = false; /**< born from cutting or not (pure literal). */ }; /// Return a reversed copy of this literal. diff --git a/tools/hsbench/main.cpp b/tools/hsbench/main.cpp index fecdd330..de9fde07 100644 --- a/tools/hsbench/main.cpp +++ b/tools/hsbench/main.cpp @@ -1065,6 +1065,9 @@ int HS_CDECL main(int argc, char *argv[]) { } catch (const SqlFailure &f) { cerr << f.message << '\n'; return -1; + } catch (const std::runtime_error &e) { + cerr << "Internal error: " << e.what() << '\n'; + return -1; } return 0; diff --git a/tools/hscollider/CMakeLists.txt b/tools/hscollider/CMakeLists.txt index 4684964f..a4d71b2f 100644 --- a/tools/hscollider/CMakeLists.txt +++ b/tools/hscollider/CMakeLists.txt @@ -64,7 +64,6 @@ set_source_files_properties(${hscollider_SOURCES} PROPERTIES INCLUDE_DIRECTORIES ${CMAKE_CURRENT_SOURCE_DIR}) add_executable(hscollider ${hscollider_SOURCES}) add_dependencies(hscollider ragel_ColliderCorporaParser) -add_dependencies(hscollider pcre) if(NOT WIN32) if (BUILD_CHIMERA) diff --git a/tools/hscollider/main.cpp b/tools/hscollider/main.cpp index ec7cd6be..18d7a016 100644 --- a/tools/hscollider/main.cpp +++ b/tools/hscollider/main.cpp @@ -61,6 +61,7 @@ #include #include #include +#include #include #include #include diff --git a/unit/chimera/arg_checks.cpp b/unit/chimera/arg_checks.cpp index ea1cda15..b9132a31 100644 --- a/unit/chimera/arg_checks.cpp +++ b/unit/chimera/arg_checks.cpp @@ -477,6 +477,7 @@ TEST(HybridArgChecks, AllocScratchBogusScratch) { makeDatabase(&db); ch_scratch_t *blah = (ch_scratch_t *)malloc(100); + ASSERT_TRUE(blah != nullptr); memset(blah, 0xf0, 100); ch_error_t err = ch_alloc_scratch(db, &blah); ASSERT_EQ(CH_INVALID, err); @@ -536,6 +537,7 @@ TEST(HybridArgChecks, DatabaseSizeNoDatabase) { TEST(HybridArgChecks, CloneBadScratch) { // Try cloning the scratch void *local_garbage = malloc(sizeof(garbage)); + ASSERT_TRUE(local_garbage != nullptr); memcpy(local_garbage, garbage, sizeof(garbage)); ch_scratch_t *cloned = nullptr; ch_scratch_t *scratch = (ch_scratch_t *)local_garbage; @@ -550,6 +552,7 @@ TEST(HybridArgChecks, ScanBadScratch) { makeDatabase(&db); void *local_garbage = malloc(sizeof(garbage)); + ASSERT_TRUE(local_garbage != nullptr); memcpy(local_garbage, garbage, sizeof(garbage)); ch_scratch_t *scratch = (ch_scratch_t *)local_garbage; diff --git a/unit/hyperscan/arg_checks.cpp b/unit/hyperscan/arg_checks.cpp index 2cbd0842..21c8707f 100644 --- a/unit/hyperscan/arg_checks.cpp +++ b/unit/hyperscan/arg_checks.cpp @@ -1370,6 +1370,7 @@ TEST(HyperscanArgChecks, AllocScratchBogusScratch) { ASSERT_EQ(HS_SUCCESS, err); ASSERT_TRUE(db != nullptr); hs_scratch_t *blah = (hs_scratch_t *)malloc(100); + ASSERT_TRUE(blah != nullptr); memset(blah, 0xf0, 100); err = hs_alloc_scratch(db, &blah); ASSERT_EQ(HS_INVALID, err); @@ -2034,6 +2035,7 @@ TEST(HyperscanArgChecks, ScratchSizeBadScratch) { TEST(HyperscanArgChecks, CloneBadScratch) { // Try cloning the scratch void *local_garbage = malloc(sizeof(garbage)); + ASSERT_TRUE(local_garbage != nullptr); memcpy(local_garbage, garbage, sizeof(garbage)); hs_scratch_t *cloned = nullptr; hs_scratch_t *scratch = (hs_scratch_t *)local_garbage; @@ -2052,6 +2054,7 @@ TEST(HyperscanArgChecks, ScanBadScratch) { ASSERT_TRUE(db != nullptr); void *local_garbage = malloc(sizeof(garbage)); + ASSERT_TRUE(local_garbage != nullptr); memcpy(local_garbage, garbage, sizeof(garbage)); hs_scratch_t *scratch = (hs_scratch_t *)local_garbage; @@ -2072,6 +2075,7 @@ TEST(HyperscanArgChecks, ScanStreamBadScratch) { ASSERT_EQ(HS_SUCCESS, err); ASSERT_TRUE(db != nullptr); void *local_garbage = malloc(sizeof(garbage)); + ASSERT_TRUE(local_garbage != nullptr); memcpy(local_garbage, garbage, sizeof(garbage)); hs_scratch_t *scratch = (hs_scratch_t *)local_garbage; @@ -2107,6 +2111,7 @@ TEST(HyperscanArgChecks, ResetStreamBadScratch) { ASSERT_EQ(HS_SUCCESS, err); ASSERT_TRUE(db != nullptr); void *local_garbage = malloc(sizeof(garbage)); + ASSERT_TRUE(local_garbage != nullptr); memcpy(local_garbage, garbage, sizeof(garbage)); hs_scratch_t *scratch = (hs_scratch_t *)local_garbage; @@ -2142,6 +2147,7 @@ TEST(HyperscanArgChecks, ScanVectorBadScratch) { ASSERT_EQ(HS_SUCCESS, err); ASSERT_TRUE(db != nullptr); void *local_garbage = malloc(sizeof(garbage)); + ASSERT_TRUE(local_garbage != nullptr); memcpy(local_garbage, garbage, sizeof(garbage)); hs_scratch_t *scratch = (hs_scratch_t *)local_garbage;