diff --git a/CMakeLists.txt b/CMakeLists.txt index 52d54955..9062c287 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -448,10 +448,6 @@ set (hs_exec_SRCS src/nfa/lbr.h src/nfa/lbr_common_impl.h src/nfa/lbr_internal.h - src/nfa/mcclellan.c - src/nfa/mcclellan.h - src/nfa/mcclellan_common_impl.h - src/nfa/mcclellan_internal.h src/nfa/limex_accel.c src/nfa/limex_accel.h src/nfa/limex_exceptional.h @@ -470,6 +466,14 @@ set (hs_exec_SRCS src/nfa/limex_runtime_impl.h src/nfa/limex_shuffle.h src/nfa/limex_state_impl.h + src/nfa/mcclellan.c + src/nfa/mcclellan.h + src/nfa/mcclellan_common_impl.h + src/nfa/mcclellan_internal.h + src/nfa/mcsheng.c + src/nfa/mcsheng_data.c + src/nfa/mcsheng.h + src/nfa/mcsheng_internal.h src/nfa/mpv.h src/nfa/mpv.c src/nfa/mpv_internal.h @@ -650,6 +654,8 @@ SET (hs_SRCS src/nfa/mcclellancompile.h src/nfa/mcclellancompile_util.cpp src/nfa/mcclellancompile_util.h + src/nfa/mcsheng_compile.cpp + src/nfa/mcsheng_compile.h src/nfa/limex_compile.cpp src/nfa/limex_compile.h src/nfa/limex_accel.h @@ -667,6 +673,8 @@ SET (hs_SRCS src/nfa/nfa_internal.h src/nfa/nfa_kind.h src/nfa/rdfa.h + src/nfa/rdfa_graph.cpp + src/nfa/rdfa_graph.h src/nfa/rdfa_merge.cpp src/nfa/rdfa_merge.h src/nfa/repeat_internal.h @@ -962,6 +970,8 @@ set(hs_dump_SRCS src/nfa/limex_dump.cpp src/nfa/mcclellandump.cpp src/nfa/mcclellandump.h + src/nfa/mcsheng_dump.cpp + src/nfa/mcsheng_dump.h src/nfa/mpv_dump.cpp src/nfa/nfa_dump_api.h src/nfa/nfa_dump_dispatch.cpp diff --git a/src/grey.cpp b/src/grey.cpp index bad56b56..340a34bf 100644 --- a/src/grey.cpp +++ b/src/grey.cpp @@ -51,6 +51,7 @@ Grey::Grey(void) : allowLbr(true), allowMcClellan(true), allowSheng(true), + allowMcSheng(true), allowPuff(true), allowLiteral(true), allowRose(true), @@ -217,6 +218,7 @@ void applyGreyOverrides(Grey *g, const string &s) { G_UPDATE(allowLbr); G_UPDATE(allowMcClellan); G_UPDATE(allowSheng); + G_UPDATE(allowMcSheng); G_UPDATE(allowPuff); G_UPDATE(allowLiteral); G_UPDATE(allowRose); diff --git a/src/grey.h b/src/grey.h index 90f5f826..4882af7d 100644 --- a/src/grey.h +++ b/src/grey.h @@ -51,6 +51,7 @@ struct Grey { bool allowLbr; bool allowMcClellan; bool allowSheng; + bool allowMcSheng; bool allowPuff; bool allowLiteral; bool allowRose; diff --git a/src/nfa/limex_accel.c b/src/nfa/limex_accel.c index f883973e..c74c7079 100644 --- a/src/nfa/limex_accel.c +++ b/src/nfa/limex_accel.c @@ -78,7 +78,7 @@ size_t accelScanWrapper(const u8 *accelTable, const union AccelAux *aux, size_t doAccel32(u32 s, u32 accel, const u8 *accelTable, const union AccelAux *aux, const u8 *input, size_t i, size_t end) { - u32 idx = packedExtract32(s, accel); + u32 idx = pext32(s, accel); return accelScanWrapper(accelTable, aux, input, idx, i, end); } @@ -86,14 +86,14 @@ size_t doAccel32(u32 s, u32 accel, const u8 *accelTable, size_t doAccel64(u64a s, u64a accel, const u8 *accelTable, const union AccelAux *aux, const u8 *input, size_t i, size_t end) { - u32 idx = packedExtract64(s, accel); + u32 idx = pext64(s, accel); return accelScanWrapper(accelTable, aux, input, idx, i, end); } #else size_t doAccel64(m128 s, m128 accel, const u8 *accelTable, const union AccelAux *aux, const u8 *input, size_t i, size_t end) { - u32 idx = packedExtract64(movq(s), movq(accel)); + u32 idx = pext64(movq(s), movq(accel)); return accelScanWrapper(accelTable, aux, input, idx, i, end); } #endif diff --git a/src/nfa/limex_shuffle.h b/src/nfa/limex_shuffle.h index e45e4331..5ca8fce0 100644 --- a/src/nfa/limex_shuffle.h +++ b/src/nfa/limex_shuffle.h @@ -41,52 +41,6 @@ #include "util/bitutils.h" #include "util/simd_utils.h" -#if defined(__BMI2__) || (defined(_WIN32) && defined(__AVX2__)) -#define HAVE_PEXT -#endif - -static really_inline -u32 packedExtract32(u32 x, u32 mask) { -#if defined(HAVE_PEXT) - // Intel BMI2 can do this operation in one instruction. - return _pext_u32(x, mask); -#else - - u32 result = 0, num = 1; - while (mask != 0) { - u32 bit = findAndClearLSB_32(&mask); - if (x & (1U << bit)) { - assert(num != 0); // more than 32 bits! - result |= num; - } - num <<= 1; - } - return result; -#endif -} - -static really_inline -u32 packedExtract64(u64a x, u64a mask) { -#if defined(HAVE_PEXT) && defined(ARCH_64_BIT) - // Intel BMI2 can do this operation in one instruction. - return _pext_u64(x, mask); -#else - - u32 result = 0, num = 1; - while (mask != 0) { - u32 bit = findAndClearLSB_64(&mask); - if (x & (1ULL << bit)) { - assert(num != 0); // more than 32 bits! - result |= num; - } - num <<= 1; - } - return result; -#endif -} - -#undef HAVE_PEXT - static really_inline u32 packedExtract128(m128 s, const m128 permute, const m128 compare) { m128 shuffled = pshufb(s, permute); diff --git a/src/nfa/mcclellan.c b/src/nfa/mcclellan.c index 63f5f535..584670c2 100644 --- a/src/nfa/mcclellan.c +++ b/src/nfa/mcclellan.c @@ -175,7 +175,7 @@ char mcclellanExec16_i(const struct mcclellan *m, u32 *state, const u8 *buf, if (mode == STOP_AT_MATCH) { *c_final = buf; } - return MO_CONTINUE_MATCHING; + return MO_ALIVE; } u32 s = *state; @@ -213,7 +213,7 @@ without_accel: if (mode == STOP_AT_MATCH) { *state = s & STATE_MASK; *c_final = c - 1; - return MO_CONTINUE_MATCHING; + return MO_MATCHES_PENDING; } u64a loc = (c - 1) - buf + offAdj + 1; @@ -221,12 +221,12 @@ without_accel: if (single) { DEBUG_PRINTF("reporting %u\n", m->arb_report); if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; /* termination requested */ + return MO_DEAD; /* termination requested */ } } else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0, &cached_accept_state, &cached_accept_id) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; + return MO_DEAD; } } @@ -265,7 +265,7 @@ with_accel: if (mode == STOP_AT_MATCH) { *state = s & STATE_MASK; *c_final = c - 1; - return MO_CONTINUE_MATCHING; + return MO_MATCHES_PENDING; } u64a loc = (c - 1) - buf + offAdj + 1; @@ -273,12 +273,12 @@ with_accel: if (single) { DEBUG_PRINTF("reporting %u\n", m->arb_report); if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; /* termination requested */ + return MO_DEAD; /* termination requested */ } } else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0, &cached_accept_state, &cached_accept_id) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; + return MO_DEAD; } } @@ -293,7 +293,7 @@ exit: } *state = s; - return MO_CONTINUE_MATCHING; + return MO_ALIVE; } static never_inline @@ -376,7 +376,7 @@ char mcclellanExec8_i(const struct mcclellan *m, u32 *state, const u8 *buf, char single, const u8 **c_final, enum MatchMode mode) { if (!len) { *c_final = buf; - return MO_CONTINUE_MATCHING; + return MO_ALIVE; } u32 s = *state; const u8 *c = buf; @@ -390,8 +390,7 @@ char mcclellanExec8_i(const struct mcclellan *m, u32 *state, const u8 *buf, u32 cached_accept_id = 0; u32 cached_accept_state = 0; - DEBUG_PRINTF("accel %hu, accept %hu\n", - m->accel_limit_8, m->accept_limit_8); + DEBUG_PRINTF("accel %hu, accept %u\n", m->accel_limit_8, accept_limit); DEBUG_PRINTF("s: %u, len %zu\n", s, len); @@ -417,19 +416,19 @@ without_accel: DEBUG_PRINTF("match - pausing\n"); *state = s; *c_final = c - 1; - return MO_CONTINUE_MATCHING; + return MO_MATCHES_PENDING; } u64a loc = (c - 1) - buf + offAdj + 1; if (single) { DEBUG_PRINTF("reporting %u\n", m->arb_report); if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; + return MO_DEAD; } } else if (doComplexReport(cb, ctxt, m, s, loc, 0, &cached_accept_state, &cached_accept_id) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; + return MO_DEAD; } } @@ -464,19 +463,19 @@ with_accel: DEBUG_PRINTF("match - pausing\n"); *state = s; *c_final = c - 1; - return MO_CONTINUE_MATCHING; + return MO_MATCHES_PENDING; } u64a loc = (c - 1) - buf + offAdj + 1; if (single) { DEBUG_PRINTF("reporting %u\n", m->arb_report); if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; + return MO_DEAD; } } else if (doComplexReport(cb, ctxt, m, s, loc, 0, &cached_accept_state, &cached_accept_id) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; + return MO_DEAD; } } @@ -488,7 +487,7 @@ exit: if (mode == STOP_AT_MATCH) { *c_final = c_end; } - return MO_CONTINUE_MATCHING; + return MO_ALIVE; } static never_inline @@ -576,7 +575,7 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, q->report_current = 0; if (rv == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; + return MO_DEAD; } } @@ -611,17 +610,20 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, /* do main buffer region */ const u8 *final_look; - if (mcclellanExec16_i_ni(m, &s, cur_buf + sp, local_ep - sp, - offset + sp, cb, context, single, &final_look, - mode) - == MO_HALT_MATCHING) { + char rv = mcclellanExec16_i_ni(m, &s, cur_buf + sp, local_ep - sp, + offset + sp, cb, context, single, + &final_look, mode); + if (rv == MO_DEAD) { *(u16 *)q->state = 0; - return 0; + return MO_DEAD; } - if (mode == STOP_AT_MATCH && final_look != cur_buf + local_ep) { + if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) { DEBUG_PRINTF("this is as far as we go\n"); - assert(q->cur); DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf); + + assert(q->cur); + assert(final_look != cur_buf + local_ep); + q->cur--; q->items[q->cur].type = MQE_START; q->items[q->cur].location = final_look - cur_buf + 1; /* due to @@ -630,6 +632,7 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, return MO_MATCHES_PENDING; } + assert(rv == MO_ALIVE); assert(q->cur); if (mode != NO_MATCHES && q->items[q->cur].location > end) { DEBUG_PRINTF("this is as far as we go\n"); @@ -662,7 +665,7 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, case MQE_END: *(u16 *)q->state = s; q->cur++; - return s ? MO_ALIVE : 0; + return s ? MO_ALIVE : MO_DEAD; default: assert(!"invalid queue event"); } @@ -681,8 +684,8 @@ char nfaExecMcClellan16_Bi(const struct NFA *n, u64a offset, const u8 *buffer, if (mcclellanExec16_i(m, &s, buffer, length, offset, cb, context, single, NULL, CALLBACK_OUTPUT) - == MO_HALT_MATCHING) { - return 0; + == MO_DEAD) { + return s ? MO_ALIVE : MO_DEAD; } const struct mstate_aux *aux = get_aux(m, s); @@ -691,7 +694,7 @@ char nfaExecMcClellan16_Bi(const struct NFA *n, u64a offset, const u8 *buffer, doComplexReport(cb, context, m, s, offset + length, 1, NULL, NULL); } - return !!s; + return MO_ALIVE; } static really_inline @@ -724,7 +727,7 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, q->report_current = 0; if (rv == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; + return MO_DEAD; } } @@ -760,16 +763,20 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, } const u8 *final_look; - if (mcclellanExec8_i_ni(m, &s, cur_buf + sp, local_ep - sp, offset + sp, - cb, context, single, &final_look, mode) - == MO_HALT_MATCHING) { + char rv = mcclellanExec8_i_ni(m, &s, cur_buf + sp, local_ep - sp, + offset + sp, cb, context, single, + &final_look, mode); + if (rv == MO_HALT_MATCHING) { *(u8 *)q->state = 0; - return 0; + return MO_DEAD; } - if (mode == STOP_AT_MATCH && final_look != cur_buf + local_ep) { - /* found a match */ - DEBUG_PRINTF("found a match\n"); + if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) { + DEBUG_PRINTF("this is as far as we go\n"); + DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf); + assert(q->cur); + assert(final_look != cur_buf + local_ep); + q->cur--; q->items[q->cur].type = MQE_START; q->items[q->cur].location = final_look - cur_buf + 1; /* due to @@ -778,6 +785,7 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, return MO_MATCHES_PENDING; } + assert(rv == MO_ALIVE); assert(q->cur); if (mode != NO_MATCHES && q->items[q->cur].location > end) { DEBUG_PRINTF("this is as far as we go\n"); @@ -811,7 +819,7 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, case MQE_END: *(u8 *)q->state = s; q->cur++; - return s ? MO_ALIVE : 0; + return s ? MO_ALIVE : MO_DEAD; default: assert(!"invalid queue event"); } @@ -830,8 +838,8 @@ char nfaExecMcClellan8_Bi(const struct NFA *n, u64a offset, const u8 *buffer, if (mcclellanExec8_i(m, &s, buffer, length, offset, cb, context, single, NULL, CALLBACK_OUTPUT) - == MO_HALT_MATCHING) { - return 0; + == MO_DEAD) { + return MO_DEAD; } const struct mstate_aux *aux = get_aux(m, s); @@ -840,7 +848,7 @@ char nfaExecMcClellan8_Bi(const struct NFA *n, u64a offset, const u8 *buffer, doComplexReport(cb, context, m, s, offset + length, 1, NULL, NULL); } - return s; + return s ? MO_ALIVE : MO_DEAD; } char nfaExecMcClellan8_B(const struct NFA *n, u64a offset, const u8 *buffer, diff --git a/src/nfa/mcclellan_internal.h b/src/nfa/mcclellan_internal.h index 4a27aadb..549bccf5 100644 --- a/src/nfa/mcclellan_internal.h +++ b/src/nfa/mcclellan_internal.h @@ -71,17 +71,17 @@ struct mcclellan { u16 start_floating; /**< floating start state */ u32 aux_offset; /**< offset of the aux structures relative to the start of * the nfa structure */ - u32 sherman_offset; /**< offset of to array of sherman state offsets - * the state_info structures relative to the start of the - * nfa structure */ - u32 sherman_end; /**< offset of the end of the state_info structures relative - * to the start of the nfa structure */ + u32 sherman_offset; /**< offset of array of sherman state offsets the + * state_info structures relative to the start of the + * nfa structure */ + u32 sherman_end; /**< offset of the end of the state_info structures + * relative to the start of the nfa structure */ u16 accel_limit_8; /**< 8 bit, lowest accelerable state */ u16 accept_limit_8; /**< 8 bit, lowest accept state */ u16 sherman_limit; /**< lowest sherman state */ u8 alphaShift; u8 flags; - u8 has_accel; /**< 1 iff there are any accel planes */ + u8 has_accel; /**< 1 iff there are any accel plans */ u8 remap[256]; /**< remaps characters to a smaller alphabet */ ReportID arb_report; /**< one of the accepts that this dfa may raise */ u32 accel_offset; /**< offset of the accel structures from start of NFA */ diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index 09006d5b..7a73c9d4 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -415,9 +415,9 @@ void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info, : info.raw.start_floating); } -/* returns non-zero on error */ +/* returns false on error */ static -int allocateFSN16(dfa_info &info, dstate_id_t *sherman_base) { +bool allocateFSN16(dfa_info &info, dstate_id_t *sherman_base) { info.states[0].impl_id = 0; /* dead is always 0 */ vector norm; @@ -426,7 +426,7 @@ int allocateFSN16(dfa_info &info, dstate_id_t *sherman_base) { if (info.size() > (1 << 16)) { DEBUG_PRINTF("too many states\n"); *sherman_base = 0; - return 1; + return false; } for (u32 i = 1; i < info.size(); i++) { @@ -452,7 +452,7 @@ int allocateFSN16(dfa_info &info, dstate_id_t *sherman_base) { /* Check to see if we haven't over allocated our states */ DEBUG_PRINTF("next sherman %u masked %u\n", next_sherman, (dstate_id_t)(next_sherman & STATE_MASK)); - return (next_sherman - 1) != ((next_sherman - 1) & STATE_MASK); + return (next_sherman - 1) == ((next_sherman - 1) & STATE_MASK); } static @@ -470,7 +470,7 @@ aligned_unique_ptr mcclellanCompile16(dfa_info &info, assert(alphaShift <= 8); u16 count_real_states; - if (allocateFSN16(info, &count_real_states)) { + if (!allocateFSN16(info, &count_real_states)) { DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n", info.size()); return nullptr; diff --git a/src/nfa/mcclellancompile.h b/src/nfa/mcclellancompile.h index e6f548a7..8d8dfb19 100644 --- a/src/nfa/mcclellancompile.h +++ b/src/nfa/mcclellancompile.h @@ -32,9 +32,7 @@ #include "accel_dfa_build_strat.h" #include "rdfa.h" #include "ue2common.h" -#include "util/accel_scheme.h" #include "util/alloc.h" -#include "util/charreach.h" #include "util/ue2_containers.h" #include diff --git a/src/nfa/mcsheng.c b/src/nfa/mcsheng.c new file mode 100644 index 00000000..98db3f0a --- /dev/null +++ b/src/nfa/mcsheng.c @@ -0,0 +1,1406 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "mcsheng.h" + +#include "accel.h" +#include "mcsheng_internal.h" +#include "nfa_api.h" +#include "nfa_api_queue.h" +#include "nfa_internal.h" +#include "util/bitutils.h" +#include "util/compare.h" +#include "util/simd_utils.h" +#include "ue2common.h" + +enum MatchMode { + CALLBACK_OUTPUT, + STOP_AT_MATCH, + NO_MATCHES +}; + +static really_inline +const struct mstate_aux *get_aux(const struct mcsheng *m, u32 s) { + const char *nfa = (const char *)m - sizeof(struct NFA); + const struct mstate_aux *aux + = s + (const struct mstate_aux *)(nfa + m->aux_offset); + + assert(ISALIGNED(aux)); + return aux; +} + +static really_inline +u32 mcshengEnableStarts(const struct mcsheng *m, u32 s) { + const struct mstate_aux *aux = get_aux(m, s); + + DEBUG_PRINTF("enabling starts %u->%hu\n", s, aux->top); + return aux->top; +} + +static really_inline +u32 doSherman16(const char *sherman_state, u8 cprime, const u16 *succ_table, + u32 as) { + assert(ISALIGNED_N(sherman_state, 16)); + + u8 len = *(const u8 *)(sherman_state + SHERMAN_LEN_OFFSET); + + if (len) { + m128 ss_char = load128(sherman_state); + m128 cur_char = set16x8(cprime); + + u32 z = movemask128(eq128(ss_char, cur_char)); + + /* remove header cruft: type 1, len 1, daddy 2*/ + z &= ~0xf; + z &= (1U << (len + 4)) - 1; + + if (z) { + u32 i = ctz32(z & ~0xf) - 4; + + u32 s_out = unaligned_load_u16((const u8 *)sherman_state + + SHERMAN_STATES_OFFSET(len) + + sizeof(u16) * i); + DEBUG_PRINTF("found sherman match at %u/%u for c'=%hhu s=%u\n", i, + len, cprime, s_out); + return s_out; + } + } + + u32 daddy = *(const u16 *)(sherman_state + SHERMAN_DADDY_OFFSET); + return succ_table[(daddy << as) + cprime]; +} + +static really_inline +char doComplexReport(NfaCallback cb, void *ctxt, const struct mcsheng *m, + u32 s, u64a loc, char eod, u32 *cached_accept_state, + u32 *cached_accept_id) { + DEBUG_PRINTF("reporting state = %u, loc=%llu, eod %hhu\n", + s & STATE_MASK, loc, eod); + + if (!eod && s == *cached_accept_state) { + if (cb(0, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; /* termination requested */ + } + + return MO_CONTINUE_MATCHING; /* continue execution */ + } + + const struct mstate_aux *aux = get_aux(m, s); + size_t offset = eod ? aux->accept_eod : aux->accept; + + assert(offset); + const struct report_list *rl + = (const void *)((const char *)m + offset - sizeof(struct NFA)); + assert(ISALIGNED(rl)); + + DEBUG_PRINTF("report list size %u\n", rl->count); + u32 count = rl->count; + + if (!eod && count == 1) { + *cached_accept_state = s; + *cached_accept_id = rl->report[0]; + + DEBUG_PRINTF("reporting %u\n", rl->report[0]); + if (cb(0, loc, rl->report[0], ctxt) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; /* termination requested */ + } + + return MO_CONTINUE_MATCHING; /* continue execution */ + } + + for (u32 i = 0; i < count; i++) { + DEBUG_PRINTF("reporting %u\n", rl->report[i]); + if (cb(0, loc, rl->report[i], ctxt) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; /* termination requested */ + } + } + + return MO_CONTINUE_MATCHING; /* continue execution */ +} + +#define SHENG_CHUNK 8 + +static really_inline +u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end, + const u8 *hard_c_end, u32 s_in, char do_accel) { + assert(s_in < m->sheng_end); + assert(s_in); /* should not already be dead */ + assert(soft_c_end <= hard_c_end); + DEBUG_PRINTF("s_in = %u (adjusted %u)\n", s_in, s_in - 1); + m128 s = set16x8(s_in - 1); + const u8 *c = *c_inout; + const u8 *c_end = hard_c_end - SHENG_CHUNK + 1; + if (!do_accel) { + c_end = MIN(soft_c_end, hard_c_end - SHENG_CHUNK + 1); + } + const m128 *masks = m->sheng_masks; + u8 sheng_limit = m->sheng_end - 1; /* - 1: no dead state */ + u8 sheng_stop_limit = do_accel ? m->sheng_accel_limit : sheng_limit; + + /* When we use movd to get a u32 containing our state, it will have 4 lanes + * all duplicating the state. We can create versions of our limits with 4 + * copies to directly compare against, this prevents us generating code to + * extract a single copy of the state from the u32 for checking. */ + u32 sheng_stop_limit_x4 = sheng_stop_limit * 0x01010101; + +#if defined(HAVE_PEXT) && defined(ARCH_64_BIT) + u32 sheng_limit_x4 = sheng_limit * 0x01010101; + m128 simd_stop_limit = set4x32(sheng_stop_limit_x4); + m128 accel_delta = set16x8(sheng_limit - sheng_stop_limit); + DEBUG_PRINTF("end %hu, accel %hhu --> limit %hhu\n", sheng_limit, + m->sheng_accel_limit, sheng_stop_limit); +#endif + +#define SHENG_SINGLE_ITER do { \ + m128 shuffle_mask = masks[*(c++)]; \ + s = pshufb(shuffle_mask, s); \ + u32 s_gpr_x4 = movd(s); /* convert to u8 */ \ + DEBUG_PRINTF("c %hhu (%c) --> s %hhu\n", c[-1], c[-1], s_gpr); \ + if (s_gpr_x4 >= sheng_stop_limit_x4) { \ + s_gpr = s_gpr_x4; \ + goto exit; \ + } \ + } while (0) + + u8 s_gpr; + while (c < c_end) { +#if defined(HAVE_PEXT) && defined(ARCH_64_BIT) + /* This version uses pext for efficently bitbashing out scaled + * versions of the bytes to process from a u64a */ + + u64a data_bytes = unaligned_load_u64a(c); + u64a cc0 = pdep64(data_bytes, 0xff0); /* extract scaled low byte */ + data_bytes &= ~0xffULL; /* clear low bits for scale space */ + m128 shuffle_mask0 = load128((const char *)masks + cc0); + s = pshufb(shuffle_mask0, s); + m128 s_max = s; + m128 s_max0 = s_max; + DEBUG_PRINTF("c %02llx --> s %hhu\n", cc0 >> 4, movd(s)); + +#define SHENG_SINGLE_UNROLL_ITER(iter) \ + assert(iter); \ + u64a cc##iter = pext64(data_bytes, mcsheng_pext_mask[iter]); \ + assert(cc##iter == (u64a)c[iter] << 4); \ + m128 shuffle_mask##iter = load128((const char *)masks + cc##iter); \ + s = pshufb(shuffle_mask##iter, s); \ + if (do_accel && iter == 7) { \ + /* in the final iteration we also have to check against accel */ \ + m128 s_temp = sadd_u8_m128(s, accel_delta); \ + s_max = max_u8_m128(s_max, s_temp); \ + } else { \ + s_max = max_u8_m128(s_max, s); \ + } \ + m128 s_max##iter = s_max; \ + DEBUG_PRINTF("c %02llx --> s %hhu max %hhu\n", cc##iter >> 4, \ + movd(s), movd(s_max)); + + SHENG_SINGLE_UNROLL_ITER(1); + + SHENG_SINGLE_UNROLL_ITER(2); + SHENG_SINGLE_UNROLL_ITER(3); + + SHENG_SINGLE_UNROLL_ITER(4); + SHENG_SINGLE_UNROLL_ITER(5); + + SHENG_SINGLE_UNROLL_ITER(6); + SHENG_SINGLE_UNROLL_ITER(7); + + if (movd(s_max7) >= sheng_limit_x4) { + DEBUG_PRINTF("exit found\n"); + + /* Explicitly check the last byte as it is more likely as it also + * checks for acceleration. */ + if (movd(s_max6) < sheng_limit_x4) { + c += SHENG_CHUNK; + s_gpr = movq(s); + assert(s_gpr >= sheng_stop_limit); + goto exit; + } + + /* use shift-xor to create a register containing all of the max + * values */ + m128 blended = rshift64_m128(s_max0, 56); + blended = xor128(blended, rshift64_m128(s_max1, 48)); + blended = xor128(blended, rshift64_m128(s_max2, 40)); + blended = xor128(blended, rshift64_m128(s_max3, 32)); + blended = xor128(blended, rshift64_m128(s_max4, 24)); + blended = xor128(blended, rshift64_m128(s_max5, 16)); + blended = xor128(blended, rshift64_m128(s_max6, 8)); + blended = xor128(blended, s); + blended = xor128(blended, rshift64_m128(blended, 8)); + DEBUG_PRINTF("blended %016llx\n", movq(blended)); + + m128 final = min_u8_m128(blended, simd_stop_limit); + m128 cmp = sub_u8_m128(final, simd_stop_limit); + u64a stops = ~movemask128(cmp); + assert(stops); + u32 earliest = ctz32(stops); + DEBUG_PRINTF("stops %02llx, earliest %u\n", stops, earliest); + assert(earliest < 8); + c += earliest + 1; + s_gpr = movq(blended) >> (earliest * 8); + assert(s_gpr >= sheng_stop_limit); + goto exit; + } else { + c += SHENG_CHUNK; + } +#else + SHENG_SINGLE_ITER; + SHENG_SINGLE_ITER; + SHENG_SINGLE_ITER; + SHENG_SINGLE_ITER; + + SHENG_SINGLE_ITER; + SHENG_SINGLE_ITER; + SHENG_SINGLE_ITER; + SHENG_SINGLE_ITER; +#endif + } + + assert(c_end - c < SHENG_CHUNK); + if (c < soft_c_end) { + assert(soft_c_end - c < SHENG_CHUNK); + switch (soft_c_end - c) { + case 7: + SHENG_SINGLE_ITER; + case 6: + SHENG_SINGLE_ITER; + case 5: + SHENG_SINGLE_ITER; + case 4: + SHENG_SINGLE_ITER; + case 3: + SHENG_SINGLE_ITER; + case 2: + SHENG_SINGLE_ITER; + case 1: + SHENG_SINGLE_ITER; + } + } + + assert(c >= soft_c_end); + + s_gpr = movd(s); +exit: + assert(c <= hard_c_end); + DEBUG_PRINTF("%zu from end; s %hhu\n", c_end - c, s_gpr); + assert(c >= soft_c_end || s_gpr >= sheng_stop_limit); + /* undo state adjustment to match mcclellan view */ + if (s_gpr == sheng_limit) { + s_gpr = 0; + } else if (s_gpr < sheng_limit) { + s_gpr++; + } + + *c_inout = c; + return s_gpr; +} + +static really_inline +const char *findShermanState(UNUSED const struct mcsheng *m, + const char *sherman_base_offset, u32 sherman_base, + u32 s) { + const char *rv + = sherman_base_offset + SHERMAN_FIXED_SIZE * (s - sherman_base); + assert(rv < (const char *)m + m->length - sizeof(struct NFA)); + UNUSED u8 type = *(const u8 *)(rv + SHERMAN_TYPE_OFFSET); + assert(type == SHERMAN_STATE); + return rv; +} + +static really_inline +const u8 *run_mcsheng_accel(const struct mcsheng *m, + const struct mstate_aux *aux, u32 s, + const u8 **min_accel_offset, + const u8 *c, const u8 *c_end) { + DEBUG_PRINTF("skipping\n"); + u32 accel_offset = aux[s].accel_offset; + + assert(aux[s].accel_offset); + assert(accel_offset >= m->aux_offset); + assert(!m->sherman_offset || accel_offset < m->sherman_offset); + + const union AccelAux *aaux = (const void *)((const char *)m + accel_offset); + const u8 *c2 = run_accel(aaux, c, c_end); + + if (c2 < *min_accel_offset + BAD_ACCEL_DIST) { + *min_accel_offset = c2 + BIG_ACCEL_PENALTY; + } else { + *min_accel_offset = c2 + SMALL_ACCEL_PENALTY; + } + + if (*min_accel_offset >= c_end - ACCEL_MIN_LEN) { + *min_accel_offset = c_end; + } + + DEBUG_PRINTF("advanced %zd, next accel chance in %zd/%zd\n", + c2 - c, *min_accel_offset - c2, c_end - c2); + + return c2; +} + +static really_inline +u32 doNormal16(const struct mcsheng *m, const u8 **c_inout, const u8 *end, + u32 s, char do_accel, enum MatchMode mode) { + const u8 *c = *c_inout; + + const u16 *succ_table + = (const u16 *)((const char *)m + sizeof(struct mcsheng)); + assert(ISALIGNED_N(succ_table, 2)); + u32 sheng_end = m->sheng_end; + u32 sherman_base = m->sherman_limit; + const char *sherman_base_offset + = (const char *)m - sizeof(struct NFA) + m->sherman_offset; + u32 as = m->alphaShift; + + /* Adjust start of succ table so we can index into using state id (rather + * than adjust to normal id). As we will not be processing states with low + * state ids, we will not be accessing data before the succ table. Note: due + * to the size of the sheng tables, the succ_table pointer will still be + * inside the engine.*/ + succ_table -= sheng_end << as; + + s &= STATE_MASK; + + while (c < end && s >= sheng_end) { + u8 cprime = m->remap[*c]; + DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx (s=%u)\n", *c, + ourisprint(*c) ? *c : '?', cprime, s); + if (s < sherman_base) { + DEBUG_PRINTF("doing normal\n"); + assert(s < m->state_count); + s = succ_table[(s << as) + cprime]; + } else { + const char *sherman_state + = findShermanState(m, sherman_base_offset, sherman_base, s); + DEBUG_PRINTF("doing sherman (%u)\n", s); + s = doSherman16(sherman_state, cprime, succ_table, as); + } + + DEBUG_PRINTF("s: %u (%u)\n", s, s & STATE_MASK); + c++; + + if (do_accel && (s & ACCEL_FLAG)) { + break; + } + if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) { + break; + } + + s &= STATE_MASK; + } + + *c_inout = c; + return s; +} + +static really_inline +char mcshengExec16_i(const struct mcsheng *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **c_final, enum MatchMode mode) { + assert(ISALIGNED_N(state, 2)); + if (!len) { + if (mode == STOP_AT_MATCH) { + *c_final = buf; + } + return MO_ALIVE; + } + + u32 s = *state; + const u8 *c = buf; + const u8 *c_end = buf + len; + const u8 sheng_end = m->sheng_end; + const struct mstate_aux *aux + = (const struct mstate_aux *)((const char *)m + m->aux_offset + - sizeof(struct NFA)); + + s &= STATE_MASK; + + u32 cached_accept_id = 0; + u32 cached_accept_state = 0; + + DEBUG_PRINTF("s: %u, len %zu\n", s, len); + + const u8 *min_accel_offset = c; + if (!m->has_accel || len < ACCEL_MIN_LEN) { + min_accel_offset = c_end; + goto without_accel; + } + + goto with_accel; + +without_accel: + do { + assert(c < min_accel_offset); + int do_accept; + if (!s) { + goto exit; + } else if (s < sheng_end) { + s = doSheng(m, &c, min_accel_offset, c_end, s, 0); + do_accept = mode != NO_MATCHES && get_aux(m, s)->accept; + } else { + s = doNormal16(m, &c, min_accel_offset, s, 0, mode); + + do_accept = mode != NO_MATCHES && (s & ACCEPT_FLAG); + } + + if (do_accept) { + if (mode == STOP_AT_MATCH) { + *state = s & STATE_MASK; + *c_final = c - 1; + return MO_MATCHES_PENDING; + } + + u64a loc = (c - 1) - buf + offAdj + 1; + + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { + return MO_DEAD; /* termination requested */ + } + } else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0, + &cached_accept_state, &cached_accept_id) + == MO_HALT_MATCHING) { + return MO_DEAD; + } + } + + assert(c <= c_end); /* sheng is fuzzy for min_accel_offset */ + } while (c < min_accel_offset); + + if (c == c_end) { + goto exit; + } + +with_accel: + do { + assert(c < c_end); + int do_accept; + + if (!s) { + goto exit; + } else if (s < sheng_end) { + if (s > m->sheng_accel_limit) { + c = run_mcsheng_accel(m, aux, s, &min_accel_offset, c, c_end); + if (c == c_end) { + goto exit; + } else { + goto without_accel; + } + } + s = doSheng(m, &c, c_end, c_end, s, 1); + do_accept = mode != NO_MATCHES && get_aux(m, s)->accept; + } else { + if (s & ACCEL_FLAG) { + DEBUG_PRINTF("skipping\n"); + s &= STATE_MASK; + c = run_mcsheng_accel(m, aux, s, &min_accel_offset, c, c_end); + if (c == c_end) { + goto exit; + } else { + goto without_accel; + } + } + + s = doNormal16(m, &c, c_end, s, 1, mode); + do_accept = mode != NO_MATCHES && (s & ACCEPT_FLAG); + } + + if (do_accept) { + if (mode == STOP_AT_MATCH) { + *state = s & STATE_MASK; + *c_final = c - 1; + return MO_MATCHES_PENDING; + } + + u64a loc = (c - 1) - buf + offAdj + 1; + + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { + return MO_DEAD; /* termination requested */ + } + } else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0, + &cached_accept_state, &cached_accept_id) + == MO_HALT_MATCHING) { + return MO_DEAD; + } + } + + assert(c <= c_end); + } while (c < c_end); + +exit: + s &= STATE_MASK; + + if (mode == STOP_AT_MATCH) { + *c_final = c_end; + } + *state = s; + + return MO_ALIVE; +} + +static never_inline +char mcshengExec16_i_cb(const struct mcsheng *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point) { + return mcshengExec16_i(m, state, buf, len, offAdj, cb, ctxt, single, + final_point, CALLBACK_OUTPUT); +} + +static never_inline +char mcshengExec16_i_sam(const struct mcsheng *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point) { + return mcshengExec16_i(m, state, buf, len, offAdj, cb, ctxt, single, + final_point, STOP_AT_MATCH); +} + +static never_inline +char mcshengExec16_i_nm(const struct mcsheng *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point) { + return mcshengExec16_i(m, state, buf, len, offAdj, cb, ctxt, single, + final_point, NO_MATCHES); +} + +static really_inline +char mcshengExec16_i_ni(const struct mcsheng *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point, + enum MatchMode mode) { + if (mode == CALLBACK_OUTPUT) { + return mcshengExec16_i_cb(m, state, buf, len, offAdj, cb, ctxt, + single, final_point); + } else if (mode == STOP_AT_MATCH) { + return mcshengExec16_i_sam(m, state, buf, len, offAdj, cb, ctxt, + single, final_point); + } else { + assert (mode == NO_MATCHES); + return mcshengExec16_i_nm(m, state, buf, len, offAdj, cb, ctxt, + single, final_point); + } +} + +static really_inline +u32 doNormal8(const struct mcsheng *m, const u8 **c_inout, const u8 *end, u32 s, + char do_accel, enum MatchMode mode) { + const u8 *c = *c_inout; + u32 sheng_end = m->sheng_end; + u32 accel_limit = m->accel_limit_8; + u32 accept_limit = m->accept_limit_8; + + const u32 as = m->alphaShift; + const u8 *succ_table = (const u8 *)((const char *)m + + sizeof(struct mcsheng)); + /* Adjust start of succ table so we can index into using state id (rather + * than adjust to normal id). As we will not be processing states with low + * state ids, we will not be accessing data before the succ table. Note: due + * to the size of the sheng tables, the succ_table pointer will still be + * inside the engine.*/ + succ_table -= sheng_end << as; + + assert(s >= sheng_end); + + while (c < end && s >= sheng_end) { + u8 cprime = m->remap[*c]; + DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx\n", *c, + ourisprint(*c) ? *c : '?', cprime); + s = succ_table[(s << as) + cprime]; + + DEBUG_PRINTF("s: %u\n", s); + c++; + if (do_accel) { + if (s >= accel_limit) { + break; + } + } else { + if (mode != NO_MATCHES && s >= accept_limit) { + break; + } + } + } + *c_inout = c; + return s; +} + +static really_inline +char mcshengExec8_i(const struct mcsheng *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **c_final, enum MatchMode mode) { + if (!len) { + *c_final = buf; + return MO_ALIVE; + } + u32 s = *state; + const u8 *c = buf; + const u8 *c_end = buf + len; + const u8 sheng_end = m->sheng_end; + + const struct mstate_aux *aux + = (const struct mstate_aux *)((const char *)m + m->aux_offset + - sizeof(struct NFA)); + u32 accept_limit = m->accept_limit_8; + + u32 cached_accept_id = 0; + u32 cached_accept_state = 0; + + DEBUG_PRINTF("accel %hu, accept %u\n", m->accel_limit_8, accept_limit); + + DEBUG_PRINTF("s: %u, len %zu\n", s, len); + + const u8 *min_accel_offset = c; + if (!m->has_accel || len < ACCEL_MIN_LEN) { + min_accel_offset = c_end; + goto without_accel; + } + + goto with_accel; + +without_accel: + do { + assert(c < min_accel_offset); + if (!s) { + goto exit; + } else if (s < sheng_end) { + s = doSheng(m, &c, min_accel_offset, c_end, s, 0); + } else { + s = doNormal8(m, &c, min_accel_offset, s, 0, mode); + assert(c <= min_accel_offset); + } + + if (mode != NO_MATCHES && s >= accept_limit) { + if (mode == STOP_AT_MATCH) { + DEBUG_PRINTF("match - pausing\n"); + *state = s; + *c_final = c - 1; + return MO_MATCHES_PENDING; + } + + u64a loc = (c - 1) - buf + offAdj + 1; + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { + return MO_DEAD; + } + } else if (doComplexReport(cb, ctxt, m, s, loc, 0, + &cached_accept_state, &cached_accept_id) + == MO_HALT_MATCHING) { + return MO_DEAD; + } + } + + assert(c <= c_end); /* sheng is fuzzy for min_accel_offset */ + } while (c < min_accel_offset); + + if (c == c_end) { + goto exit; + } + +with_accel: + do { + u32 accel_limit = m->accel_limit_8; + + assert(c < c_end); + if (!s) { + goto exit; + } else if (s < sheng_end) { + if (s > m->sheng_accel_limit) { + c = run_mcsheng_accel(m, aux, s, &min_accel_offset, c, c_end); + if (c == c_end) { + goto exit; + } else { + goto without_accel; + } + } + s = doSheng(m, &c, c_end, c_end, s, 1); + } else { + if (s >= accel_limit && aux[s].accel_offset) { + c = run_mcsheng_accel(m, aux, s, &min_accel_offset, c, c_end); + if (c == c_end) { + goto exit; + } else { + goto without_accel; + } + } + s = doNormal8(m, &c, c_end, s, 1, mode); + } + + if (mode != NO_MATCHES && s >= accept_limit) { + if (mode == STOP_AT_MATCH) { + DEBUG_PRINTF("match - pausing\n"); + *state = s; + *c_final = c - 1; + return MO_MATCHES_PENDING; + } + + u64a loc = (c - 1) - buf + offAdj + 1; + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { + return MO_DEAD; + } + } else if (doComplexReport(cb, ctxt, m, s, loc, 0, + &cached_accept_state, &cached_accept_id) + == MO_HALT_MATCHING) { + return MO_DEAD; + } + } + + assert(c <= c_end); + } while (c < c_end); + +exit: + *state = s; + if (mode == STOP_AT_MATCH) { + *c_final = c_end; + } + return MO_ALIVE; +} + +static never_inline +char mcshengExec8_i_cb(const struct mcsheng *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point) { + return mcshengExec8_i(m, state, buf, len, offAdj, cb, ctxt, single, + final_point, CALLBACK_OUTPUT); +} + +static never_inline +char mcshengExec8_i_sam(const struct mcsheng *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point) { + return mcshengExec8_i(m, state, buf, len, offAdj, cb, ctxt, single, + final_point, STOP_AT_MATCH); +} + +static never_inline +char mcshengExec8_i_nm(const struct mcsheng *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point) { + return mcshengExec8_i(m, state, buf, len, offAdj, cb, ctxt, single, + final_point, NO_MATCHES); +} + +static really_inline +char mcshengExec8_i_ni(const struct mcsheng *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point, + enum MatchMode mode) { + if (mode == CALLBACK_OUTPUT) { + return mcshengExec8_i_cb(m, state, buf, len, offAdj, cb, ctxt, single, + final_point); + } else if (mode == STOP_AT_MATCH) { + return mcshengExec8_i_sam(m, state, buf, len, offAdj, cb, ctxt, + single, final_point); + } else { + assert(mode == NO_MATCHES); + return mcshengExec8_i_nm(m, state, buf, len, offAdj, cb, ctxt, single, + final_point); + } +} + +static really_inline +char mcshengCheckEOD(const struct NFA *nfa, u32 s, u64a offset, + NfaCallback cb, void *ctxt) { + const struct mcsheng *m = getImplNfa(nfa); + const struct mstate_aux *aux = get_aux(m, s); + + if (!aux->accept_eod) { + return MO_CONTINUE_MATCHING; + } + return doComplexReport(cb, ctxt, m, s, offset, 1, NULL, NULL); +} + +static really_inline +char nfaExecMcSheng16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, + const u8 *hend, NfaCallback cb, void *context, + struct mq *q, char single, s64a end, + enum MatchMode mode) { + assert(n->type == MCSHENG_NFA_16); + const struct mcsheng *m = getImplNfa(n); + s64a sp; + + assert(ISALIGNED_N(q->state, 2)); + u32 s = *(u16 *)q->state; + + if (q->report_current) { + assert(s); + assert(get_aux(m, s)->accept); + + int rv; + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + rv = cb(0, q_cur_offset(q), m->arb_report, context); + } else { + u32 cached_accept_id = 0; + u32 cached_accept_state = 0; + + rv = doComplexReport(cb, context, m, s, q_cur_offset(q), 0, + &cached_accept_state, &cached_accept_id); + } + + q->report_current = 0; + + if (rv == MO_HALT_MATCHING) { + return MO_DEAD; + } + } + + sp = q_cur_loc(q); + q->cur++; + + const u8 *cur_buf = sp < 0 ? hend : buffer; + + assert(q->cur); + if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) { + DEBUG_PRINTF("this is as far as we go\n"); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = end; + *(u16 *)q->state = s; + return MO_ALIVE; + } + + while (1) { + assert(q->cur < q->end); + s64a ep = q->items[q->cur].location; + if (mode != NO_MATCHES) { + ep = MIN(ep, end); + } + + assert(ep >= sp); + + s64a local_ep = ep; + if (sp < 0) { + local_ep = MIN(0, ep); + } + + /* do main buffer region */ + const u8 *final_look; + char rv = mcshengExec16_i_ni(m, &s, cur_buf + sp, local_ep - sp, + offset + sp, cb, context, single, + &final_look, mode); + if (rv == MO_DEAD) { + *(u16 *)q->state = 0; + return MO_DEAD; + } + if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) { + DEBUG_PRINTF("this is as far as we go\n"); + DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf); + + assert(q->cur); + assert(final_look != cur_buf + local_ep); + + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = final_look - cur_buf + 1; /* due to + * early -1 */ + *(u16 *)q->state = s; + return MO_MATCHES_PENDING; + } + + assert(rv == MO_ALIVE); + assert(q->cur); + if (mode != NO_MATCHES && q->items[q->cur].location > end) { + DEBUG_PRINTF("this is as far as we go\n"); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = end; + *(u16 *)q->state = s; + return MO_ALIVE; + } + + sp = local_ep; + + if (sp == 0) { + cur_buf = buffer; + } + + if (sp != ep) { + continue; + } + + switch (q->items[q->cur].type) { + case MQE_TOP: + assert(sp + offset || !s); + if (sp + offset == 0) { + s = m->start_anchored; + break; + } + s = mcshengEnableStarts(m, s); + break; + case MQE_END: + *(u16 *)q->state = s; + q->cur++; + return s ? MO_ALIVE : MO_DEAD; + default: + assert(!"invalid queue event"); + } + + q->cur++; + } +} + +static really_inline +char nfaExecMcSheng8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, + const u8 *hend, NfaCallback cb, void *context, + struct mq *q, char single, s64a end, + enum MatchMode mode) { + assert(n->type == MCSHENG_NFA_8); + const struct mcsheng *m = getImplNfa(n); + s64a sp; + + u32 s = *(u8 *)q->state; + + if (q->report_current) { + assert(s); + assert(s >= m->accept_limit_8); + + int rv; + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + rv = cb(0, q_cur_offset(q), m->arb_report, context); + } else { + u32 cached_accept_id = 0; + u32 cached_accept_state = 0; + + rv = doComplexReport(cb, context, m, s, q_cur_offset(q), 0, + &cached_accept_state, &cached_accept_id); + } + + q->report_current = 0; + + if (rv == MO_HALT_MATCHING) { + return MO_DEAD; + } + } + + sp = q_cur_loc(q); + q->cur++; + + const u8 *cur_buf = sp < 0 ? hend : buffer; + + if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) { + DEBUG_PRINTF("this is as far as we go\n"); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = end; + *(u8 *)q->state = s; + return MO_ALIVE; + } + + while (1) { + DEBUG_PRINTF("%s @ %llu\n", q->items[q->cur].type == MQE_TOP ? "TOP" : + q->items[q->cur].type == MQE_END ? "END" : "???", + q->items[q->cur].location + offset); + assert(q->cur < q->end); + s64a ep = q->items[q->cur].location; + if (mode != NO_MATCHES) { + ep = MIN(ep, end); + } + + assert(ep >= sp); + + s64a local_ep = ep; + if (sp < 0) { + local_ep = MIN(0, ep); + } + + const u8 *final_look; + char rv = mcshengExec8_i_ni(m, &s, cur_buf + sp, local_ep - sp, + offset + sp, cb, context, single, + &final_look, mode); + if (rv == MO_HALT_MATCHING) { + *(u8 *)q->state = 0; + return MO_DEAD; + } + if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) { + DEBUG_PRINTF("this is as far as we go\n"); + DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf); + + assert(q->cur); + assert(final_look != cur_buf + local_ep); + + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = final_look - cur_buf + 1; /* due to + * early -1 */ + *(u8 *)q->state = s; + return MO_MATCHES_PENDING; + } + + assert(rv == MO_ALIVE); + assert(q->cur); + if (mode != NO_MATCHES && q->items[q->cur].location > end) { + DEBUG_PRINTF("this is as far as we go\n"); + assert(q->cur); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = end; + *(u8 *)q->state = s; + return MO_ALIVE; + } + + sp = local_ep; + + if (sp == 0) { + cur_buf = buffer; + } + + if (sp != ep) { + continue; + } + + switch (q->items[q->cur].type) { + case MQE_TOP: + assert(sp + offset || !s); + if (sp + offset == 0) { + s = (u8)m->start_anchored; + break; + } + s = mcshengEnableStarts(m, s); + break; + case MQE_END: + *(u8 *)q->state = s; + q->cur++; + return s ? MO_ALIVE : MO_DEAD; + default: + assert(!"invalid queue event"); + } + + q->cur++; + } +} + +char nfaExecMcSheng8_Q(const struct NFA *n, struct mq *q, s64a end) { + u64a offset = q->offset; + const u8 *buffer = q->buffer; + NfaCallback cb = q->cb; + void *context = q->context; + assert(n->type == MCSHENG_NFA_8); + const struct mcsheng *m = getImplNfa(n); + const u8 *hend = q->history + q->hlength; + + return nfaExecMcSheng8_Q2i(n, offset, buffer, hend, cb, context, q, + m->flags & MCSHENG_FLAG_SINGLE, end, + CALLBACK_OUTPUT); +} + +char nfaExecMcSheng16_Q(const struct NFA *n, struct mq *q, s64a end) { + u64a offset = q->offset; + const u8 *buffer = q->buffer; + NfaCallback cb = q->cb; + void *context = q->context; + assert(n->type == MCSHENG_NFA_16); + const struct mcsheng *m = getImplNfa(n); + const u8 *hend = q->history + q->hlength; + + return nfaExecMcSheng16_Q2i(n, offset, buffer, hend, cb, context, q, + m->flags & MCSHENG_FLAG_SINGLE, end, + CALLBACK_OUTPUT); +} + +char nfaExecMcSheng8_reportCurrent(const struct NFA *n, struct mq *q) { + const struct mcsheng *m = getImplNfa(n); + NfaCallback cb = q->cb; + void *ctxt = q->context; + u32 s = *(u8 *)q->state; + u8 single = m->flags & MCSHENG_FLAG_SINGLE; + u64a offset = q_cur_offset(q); + assert(q_cur_type(q) == MQE_START); + assert(s); + + if (s >= m->accept_limit_8) { + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + cb(0, offset, m->arb_report, ctxt); + } else { + u32 cached_accept_id = 0; + u32 cached_accept_state = 0; + + doComplexReport(cb, ctxt, m, s, offset, 0, &cached_accept_state, + &cached_accept_id); + } + } + + return 0; +} + +char nfaExecMcSheng16_reportCurrent(const struct NFA *n, struct mq *q) { + const struct mcsheng *m = getImplNfa(n); + NfaCallback cb = q->cb; + void *ctxt = q->context; + u32 s = *(u16 *)q->state; + const struct mstate_aux *aux = get_aux(m, s); + u8 single = m->flags & MCSHENG_FLAG_SINGLE; + u64a offset = q_cur_offset(q); + assert(q_cur_type(q) == MQE_START); + DEBUG_PRINTF("state %u\n", s); + assert(s); + + if (aux->accept) { + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + cb(0, offset, m->arb_report, ctxt); + } else { + u32 cached_accept_id = 0; + u32 cached_accept_state = 0; + + doComplexReport(cb, ctxt, m, s, offset, 0, &cached_accept_state, + &cached_accept_id); + } + } + + return 0; +} + +static +char mcshengHasAccept(const struct mcsheng *m, const struct mstate_aux *aux, + ReportID report) { + assert(m && aux); + + if (!aux->accept) { + return 0; + } + + const struct report_list *rl = (const struct report_list *) + ((const char *)m + aux->accept - sizeof(struct NFA)); + assert(ISALIGNED_N(rl, 4)); + + DEBUG_PRINTF("report list has %u entries\n", rl->count); + + for (u32 i = 0; i < rl->count; i++) { + if (rl->report[i] == report) { + return 1; + } + } + + return 0; +} + +char nfaExecMcSheng8_inAccept(const struct NFA *n, ReportID report, + struct mq *q) { + assert(n && q); + + const struct mcsheng *m = getImplNfa(n); + u8 s = *(u8 *)q->state; + DEBUG_PRINTF("checking accepts for %hhu\n", s); + + return mcshengHasAccept(m, get_aux(m, s), report); +} + +char nfaExecMcSheng8_inAnyAccept(const struct NFA *n, struct mq *q) { + assert(n && q); + + const struct mcsheng *m = getImplNfa(n); + u8 s = *(u8 *)q->state; + DEBUG_PRINTF("checking accepts for %hhu\n", s); + + return !!get_aux(m, s)->accept; +} + +char nfaExecMcSheng16_inAccept(const struct NFA *n, ReportID report, + struct mq *q) { + assert(n && q); + + const struct mcsheng *m = getImplNfa(n); + u16 s = *(u16 *)q->state; + DEBUG_PRINTF("checking accepts for %hu\n", s); + + return mcshengHasAccept(m, get_aux(m, s), report); +} + +char nfaExecMcSheng16_inAnyAccept(const struct NFA *n, struct mq *q) { + assert(n && q); + + const struct mcsheng *m = getImplNfa(n); + u16 s = *(u16 *)q->state; + DEBUG_PRINTF("checking accepts for %hu\n", s); + + return !!get_aux(m, s)->accept; +} + +char nfaExecMcSheng8_Q2(const struct NFA *n, struct mq *q, s64a end) { + u64a offset = q->offset; + const u8 *buffer = q->buffer; + NfaCallback cb = q->cb; + void *context = q->context; + assert(n->type == MCSHENG_NFA_8); + const struct mcsheng *m = getImplNfa(n); + const u8 *hend = q->history + q->hlength; + + return nfaExecMcSheng8_Q2i(n, offset, buffer, hend, cb, context, q, + m->flags & MCSHENG_FLAG_SINGLE, end, + STOP_AT_MATCH); +} + +char nfaExecMcSheng16_Q2(const struct NFA *n, struct mq *q, s64a end) { + u64a offset = q->offset; + const u8 *buffer = q->buffer; + NfaCallback cb = q->cb; + void *context = q->context; + assert(n->type == MCSHENG_NFA_16); + const struct mcsheng *m = getImplNfa(n); + const u8 *hend = q->history + q->hlength; + + return nfaExecMcSheng16_Q2i(n, offset, buffer, hend, cb, context, q, + m->flags & MCSHENG_FLAG_SINGLE, end, + STOP_AT_MATCH); +} + +char nfaExecMcSheng8_QR(const struct NFA *n, struct mq *q, ReportID report) { + u64a offset = q->offset; + const u8 *buffer = q->buffer; + NfaCallback cb = q->cb; + void *context = q->context; + assert(n->type == MCSHENG_NFA_8); + const struct mcsheng *m = getImplNfa(n); + const u8 *hend = q->history + q->hlength; + + char rv = nfaExecMcSheng8_Q2i(n, offset, buffer, hend, cb, context, q, + m->flags & MCSHENG_FLAG_SINGLE, 0 /* end */, + NO_MATCHES); + if (rv && nfaExecMcSheng8_inAccept(n, report, q)) { + return MO_MATCHES_PENDING; + } else { + return rv; + } +} + +char nfaExecMcSheng16_QR(const struct NFA *n, struct mq *q, ReportID report) { + u64a offset = q->offset; + const u8 *buffer = q->buffer; + NfaCallback cb = q->cb; + void *context = q->context; + assert(n->type == MCSHENG_NFA_16); + const struct mcsheng *m = getImplNfa(n); + const u8 *hend = q->history + q->hlength; + + char rv = nfaExecMcSheng16_Q2i(n, offset, buffer, hend, cb, context, q, + m->flags & MCSHENG_FLAG_SINGLE, 0 /* end */, + NO_MATCHES); + + if (rv && nfaExecMcSheng16_inAccept(n, report, q)) { + return MO_MATCHES_PENDING; + } else { + return rv; + } +} + +char nfaExecMcSheng8_initCompressedState(const struct NFA *nfa, u64a offset, + void *state, UNUSED u8 key) { + const struct mcsheng *m = getImplNfa(nfa); + u8 s = offset ? m->start_floating : m->start_anchored; + if (s) { + *(u8 *)state = s; + return 1; + } + return 0; +} + +char nfaExecMcSheng16_initCompressedState(const struct NFA *nfa, u64a offset, + void *state, UNUSED u8 key) { + const struct mcsheng *m = getImplNfa(nfa); + u16 s = offset ? m->start_floating : m->start_anchored; + if (s) { + unaligned_store_u16(state, s); + return 1; + } + return 0; +} + +char nfaExecMcSheng8_testEOD(const struct NFA *nfa, const char *state, + UNUSED const char *streamState, u64a offset, + NfaCallback callback, void *context) { + return mcshengCheckEOD(nfa, *(const u8 *)state, offset, callback, + context); +} + +char nfaExecMcSheng16_testEOD(const struct NFA *nfa, const char *state, + UNUSED const char *streamState, u64a offset, + NfaCallback callback, void *context) { + assert(ISALIGNED_N(state, 2)); + return mcshengCheckEOD(nfa, *(const u16 *)state, offset, callback, + context); +} + +char nfaExecMcSheng8_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) { + assert(nfa->scratchStateSize == 1); + *(u8 *)q->state = 0; + return 0; +} + +char nfaExecMcSheng16_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) { + assert(nfa->scratchStateSize == 2); + assert(ISALIGNED_N(q->state, 2)); + *(u16 *)q->state = 0; + return 0; +} + +char nfaExecMcSheng8_queueCompressState(UNUSED const struct NFA *nfa, + const struct mq *q, UNUSED s64a loc) { + void *dest = q->streamState; + const void *src = q->state; + assert(nfa->scratchStateSize == 1); + assert(nfa->streamStateSize == 1); + *(u8 *)dest = *(const u8 *)src; + return 0; +} + +char nfaExecMcSheng8_expandState(UNUSED const struct NFA *nfa, void *dest, + const void *src, UNUSED u64a offset, + UNUSED u8 key) { + assert(nfa->scratchStateSize == 1); + assert(nfa->streamStateSize == 1); + *(u8 *)dest = *(const u8 *)src; + return 0; +} + +char nfaExecMcSheng16_queueCompressState(UNUSED const struct NFA *nfa, + const struct mq *q, + UNUSED s64a loc) { + void *dest = q->streamState; + const void *src = q->state; + assert(nfa->scratchStateSize == 2); + assert(nfa->streamStateSize == 2); + assert(ISALIGNED_N(src, 2)); + unaligned_store_u16(dest, *(const u16 *)(src)); + return 0; +} + +char nfaExecMcSheng16_expandState(UNUSED const struct NFA *nfa, void *dest, + const void *src, UNUSED u64a offset, + UNUSED u8 key) { + assert(nfa->scratchStateSize == 2); + assert(nfa->streamStateSize == 2); + assert(ISALIGNED_N(dest, 2)); + *(u16 *)dest = unaligned_load_u16(src); + return 0; +} diff --git a/src/nfa/mcsheng.h b/src/nfa/mcsheng.h new file mode 100644 index 00000000..19fd6961 --- /dev/null +++ b/src/nfa/mcsheng.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MCSHENG_H +#define MCSHENG_H + +#include "callback.h" +#include "ue2common.h" + +struct mq; +struct NFA; + +/* 8-bit Sheng-McClellan hybrid */ + +char nfaExecMcSheng8_testEOD(const struct NFA *nfa, const char *state, + const char *streamState, u64a offset, + NfaCallback callback, void *context); +char nfaExecMcSheng8_Q(const struct NFA *n, struct mq *q, s64a end); +char nfaExecMcSheng8_Q2(const struct NFA *n, struct mq *q, s64a end); +char nfaExecMcSheng8_QR(const struct NFA *n, struct mq *q, ReportID report); +char nfaExecMcSheng8_reportCurrent(const struct NFA *n, struct mq *q); +char nfaExecMcSheng8_inAccept(const struct NFA *n, ReportID report, + struct mq *q); +char nfaExecMcSheng8_inAnyAccept(const struct NFA *n, struct mq *q); +char nfaExecMcSheng8_queueInitState(const struct NFA *n, struct mq *q); +char nfaExecMcSheng8_initCompressedState(const struct NFA *n, u64a offset, + void *state, u8 key); +char nfaExecMcSheng8_queueCompressState(const struct NFA *nfa, + const struct mq *q, s64a loc); +char nfaExecMcSheng8_expandState(const struct NFA *nfa, void *dest, + const void *src, u64a offset, u8 key); + +#define nfaExecMcSheng8_B_Reverse NFA_API_NO_IMPL +#define nfaExecMcSheng8_zombie_status NFA_API_ZOMBIE_NO_IMPL + +/* 16-bit Sheng-McClellan hybrid */ + +char nfaExecMcSheng16_testEOD(const struct NFA *nfa, const char *state, + const char *streamState, u64a offset, + NfaCallback callback, void *context); +char nfaExecMcSheng16_Q(const struct NFA *n, struct mq *q, s64a end); +char nfaExecMcSheng16_Q2(const struct NFA *n, struct mq *q, s64a end); +char nfaExecMcSheng16_QR(const struct NFA *n, struct mq *q, ReportID report); +char nfaExecMcSheng16_reportCurrent(const struct NFA *n, struct mq *q); +char nfaExecMcSheng16_inAccept(const struct NFA *n, ReportID report, + struct mq *q); +char nfaExecMcSheng16_inAnyAccept(const struct NFA *n, struct mq *q); +char nfaExecMcSheng16_queueInitState(const struct NFA *n, struct mq *q); +char nfaExecMcSheng16_initCompressedState(const struct NFA *n, u64a offset, + void *state, u8 key); +char nfaExecMcSheng16_queueCompressState(const struct NFA *nfa, + const struct mq *q, s64a loc); +char nfaExecMcSheng16_expandState(const struct NFA *nfa, void *dest, + const void *src, u64a offset, u8 key); + +#define nfaExecMcSheng16_B_Reverse NFA_API_NO_IMPL +#define nfaExecMcSheng16_zombie_status NFA_API_ZOMBIE_NO_IMPL + +#endif diff --git a/src/nfa/mcsheng_compile.cpp b/src/nfa/mcsheng_compile.cpp new file mode 100644 index 00000000..666c3b1d --- /dev/null +++ b/src/nfa/mcsheng_compile.cpp @@ -0,0 +1,1144 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "mcsheng_compile.h" + +#include "accel.h" +#include "accelcompile.h" +#include "grey.h" +#include "mcclellancompile.h" +#include "mcclellancompile_util.h" +#include "mcsheng_internal.h" +#include "nfa_internal.h" +#include "rdfa_graph.h" +#include "shufticompile.h" +#include "trufflecompile.h" +#include "ue2common.h" +#include "util/alloc.h" +#include "util/bitutils.h" +#include "util/charreach.h" +#include "util/compare.h" +#include "util/compile_context.h" +#include "util/container.h" +#include "util/graph.h" +#include "util/graph_range.h" +#include "util/make_unique.h" +#include "util/order_check.h" +#include "util/report_manager.h" +#include "util/ue2_containers.h" +#include "util/unaligned.h" +#include "util/verify_types.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +using namespace std; +using boost::adaptors::map_keys; + +namespace ue2 { + +namespace /* anon */ { + +#define MIN_SHENG_SIZE 6 +#define INVALID_SHENG_ID 255 + +struct dstate_extra { + u16 daddytaken = 0; + bool shermanState = false; + bool sheng_succ = false; + u8 sheng_id = INVALID_SHENG_ID; +}; + +struct dfa_info { + accel_dfa_build_strat &strat; + raw_dfa &raw; + vector &states; + vector extra; + const u16 alpha_size; /* including special symbols */ + const array &alpha_remap; + vector rev_alpha; + const u16 impl_alpha_size; + + u8 getAlphaShift() const; + + explicit dfa_info(accel_dfa_build_strat &s) + : strat(s), + raw(s.get_raw()), + states(raw.states), + extra(raw.states.size()), + alpha_size(raw.alpha_size), + alpha_remap(raw.alpha_remap), + impl_alpha_size(raw.getImplAlphaSize()) { + rev_alpha.resize(impl_alpha_size); + for (u32 i = 0; i < N_CHARS; i++) { + rev_alpha[alpha_remap[i]].set(i); + } + } + + dstate_id_t implId(dstate_id_t raw_id) const { + return states[raw_id].impl_id; + } + + bool is_sherman(dstate_id_t raw_id) const { + return extra[raw_id].shermanState; + } + + bool is_sheng(dstate_id_t raw_id) const { + return extra[raw_id].sheng_id != INVALID_SHENG_ID; + } + + bool is_sheng_succ(dstate_id_t raw_id) const { + return extra[raw_id].sheng_succ; + } + + /* states which use the normal transition/successor table */ + bool is_normal(dstate_id_t raw_id) const { + return raw_id != DEAD_STATE && !is_sheng(raw_id) && !is_sherman(raw_id); + } + size_t size(void) const { return states.size(); } +}; + +u8 dfa_info::getAlphaShift() const { + if (impl_alpha_size < 2) { + return 1; + } else { + /* log2 round up */ + return 32 - clz32(impl_alpha_size - 1); + } +} + +} // namespace + +static +mstate_aux *getAux(NFA *n, dstate_id_t i) { + mcsheng *m = (mcsheng *)getMutableImplNfa(n); + mstate_aux *aux_base = (mstate_aux *)((char *)n + m->aux_offset); + + mstate_aux *aux = aux_base + i; + assert((const char *)aux < (const char *)n + m->length); + return aux; +} + +static +void createShuffleMasks(mcsheng *m, const dfa_info &info, + dstate_id_t sheng_end, + const map &accel_escape_info) { + DEBUG_PRINTF("using first %hu states for a sheng\n", sheng_end); + assert(sheng_end > DEAD_STATE + 1); + assert(sheng_end <= sizeof(m128) + 1); + vector> masks; + masks.resize(info.alpha_size); + /* -1 to avoid wasting a slot as we do not include dead state */ + vector raw_ids; + raw_ids.resize(sheng_end - 1); + for (dstate_id_t s = DEAD_STATE + 1; s < info.states.size(); s++) { + assert(info.implId(s)); /* should not map to DEAD_STATE */ + if (info.is_sheng(s)) { + raw_ids[info.extra[s].sheng_id] = s; + } + } + for (u32 i = 0; i < info.alpha_size; i++) { + if (i == info.alpha_remap[TOP]) { + continue; + } + auto &mask = masks[i]; + assert(sizeof(mask) == sizeof(m128)); + mask.fill(0); + + for (dstate_id_t sheng_id = 0; sheng_id < sheng_end - 1; sheng_id++) { + dstate_id_t raw_id = raw_ids[sheng_id]; + dstate_id_t next_id = info.implId(info.states[raw_id].next[i]); + if (next_id == DEAD_STATE) { + next_id = sheng_end - 1; + } else if (next_id < sheng_end) { + next_id--; + } + DEBUG_PRINTF("%hu: %u->next %hu\n", sheng_id, i, next_id); + mask[sheng_id] = verify_u8(next_id); + } + } + for (u32 i = 0; i < N_CHARS; i++) { + assert(info.alpha_remap[i] != info.alpha_remap[TOP]); + m->sheng_masks[i] = loadu128(masks[info.alpha_remap[i]].data()); + } + m->sheng_end = sheng_end; + m->sheng_accel_limit = sheng_end - 1; + + for (dstate_id_t s : raw_ids) { + if (contains(accel_escape_info, s)) { + LIMIT_TO_AT_MOST(&m->sheng_accel_limit, info.extra[s].sheng_id); + } + } +} + +static +void populateBasicInfo(size_t state_size, const dfa_info &info, + u32 total_size, u32 aux_offset, u32 accel_offset, + u32 accel_count, ReportID arb, bool single, NFA *nfa) { + assert(state_size == sizeof(u16) || state_size == sizeof(u8)); + + nfa->length = total_size; + nfa->nPositions = info.states.size(); + + nfa->scratchStateSize = verify_u32(state_size); + nfa->streamStateSize = verify_u32(state_size); + + if (state_size == sizeof(u8)) { + nfa->type = MCSHENG_NFA_8; + } else { + nfa->type = MCSHENG_NFA_16; + } + + mcsheng *m = (mcsheng *)getMutableImplNfa(nfa); + for (u32 i = 0; i < 256; i++) { + m->remap[i] = verify_u8(info.alpha_remap[i]); + } + m->alphaShift = info.getAlphaShift(); + m->length = total_size; + m->aux_offset = aux_offset; + m->accel_offset = accel_offset; + m->arb_report = arb; + m->state_count = verify_u16(info.size()); + m->start_anchored = info.implId(info.raw.start_anchored); + m->start_floating = info.implId(info.raw.start_floating); + m->has_accel = accel_count ? 1 : 0; + + if (single) { + m->flags |= MCSHENG_FLAG_SINGLE; + } +} + +namespace { + +struct raw_report_list { + flat_set reports; + + raw_report_list(const flat_set &reports_in, + const ReportManager &rm, bool do_remap) { + if (do_remap) { + for (auto &id : reports_in) { + reports.insert(rm.getProgramOffset(id)); + } + } else { + reports = reports_in; + } + } + + bool operator<(const raw_report_list &b) const { + return reports < b.reports; + } +}; + +struct raw_report_info_impl : public raw_report_info { + vector rl; + u32 getReportListSize() const override; + size_t size() const override; + void fillReportLists(NFA *n, size_t base_offset, + std::vector &ro /* out */) const override; +}; +} + +u32 raw_report_info_impl::getReportListSize() const { + u32 rv = 0; + + for (const auto &reps : rl) { + rv += sizeof(report_list); + rv += sizeof(ReportID) * reps.reports.size(); + } + + return rv; +} + +size_t raw_report_info_impl::size() const { + return rl.size(); +} + +void raw_report_info_impl::fillReportLists(NFA *n, size_t base_offset, + vector &ro) const { + for (const auto &reps : rl) { + ro.push_back(base_offset); + + report_list *p = (report_list *)((char *)n + base_offset); + + u32 i = 0; + for (const ReportID report : reps.reports) { + p->report[i++] = report; + } + p->count = verify_u32(reps.reports.size()); + + base_offset += sizeof(report_list); + base_offset += sizeof(ReportID) * reps.reports.size(); + } +} + +static +void fillAccelOut(const map &accel_escape_info, + set *accel_states) { + for (dstate_id_t i : accel_escape_info | map_keys) { + accel_states->insert(i); + } +} + +static +size_t calcShermanRegionSize(const dfa_info &info) { + size_t rv = 0; + + for (size_t i = 0; i < info.size(); i++) { + if (info.is_sherman(i)) { + rv += SHERMAN_FIXED_SIZE; + } + } + + return ROUNDUP_16(rv); +} + +static +void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info, + const vector &reports, const vector &reports_eod, + const vector &reportOffsets) { + const dstate &raw_state = info.states[i]; + aux->accept = raw_state.reports.empty() ? 0 : reportOffsets[reports[i]]; + aux->accept_eod = raw_state.reports_eod.empty() ? 0 + : reportOffsets[reports_eod[i]]; + aux->top = info.implId(i ? raw_state.next[info.alpha_remap[TOP]] + : info.raw.start_floating); +} + +/* returns false on error */ +static +bool allocateImplId16(dfa_info &info, dstate_id_t sheng_end, + dstate_id_t *sherman_base) { + info.states[0].impl_id = 0; /* dead is always 0 */ + + vector norm; + vector sherm; + vector norm_sheng_succ; + vector sherm_sheng_succ; + + if (info.size() > (1 << 16)) { + DEBUG_PRINTF("too many states\n"); + *sherman_base = 0; + return false; + } + + for (u32 i = 1; i < info.size(); i++) { + if (info.is_sheng(i)) { + continue; /* sheng impl ids have already been allocated */ + } if (info.is_sherman(i)) { + if (info.is_sheng_succ(i)) { + sherm_sheng_succ.push_back(i); + } else { + sherm.push_back(i); + } + } else { + if (info.is_sheng_succ(i)) { + norm_sheng_succ.push_back(i); + } else { + norm.push_back(i); + } + } + } + + dstate_id_t next_norm = sheng_end; + for (dstate_id_t s : norm_sheng_succ) { + info.states[s].impl_id = next_norm++; + } + if (next_norm + norm.size() + sherm_sheng_succ.size() > UINT8_MAX) { + /* we need to give sheng_succs ids which fit into a u8 -- demote these + * to normal states */ + for (dstate_id_t s : sherm_sheng_succ) { + info.states[s].impl_id = next_norm++; + info.extra[s].shermanState = false; + } + sherm_sheng_succ.clear(); + } + for (dstate_id_t s : norm) { + info.states[s].impl_id = next_norm++; + } + + *sherman_base = next_norm; + dstate_id_t next_sherman = next_norm; + + for (dstate_id_t s : sherm_sheng_succ) { + info.states[s].impl_id = next_sherman++; + } + + for (dstate_id_t s : sherm) { + info.states[s].impl_id = next_sherman++; + } + + /* Check to see if we haven't over allocated our states */ + DEBUG_PRINTF("next sherman %u masked %u\n", next_sherman, + (dstate_id_t)(next_sherman & STATE_MASK)); + return (next_sherman - 1) == ((next_sherman - 1) & STATE_MASK); +} + +typedef RdfaGraph::vertex_descriptor RdfaVertex; + +static +bool mark_sheng_succs(const RdfaGraph &g, dfa_info &info, + const flat_set &sheng_states) { + u32 exit_count = 0; + + for (auto v : sheng_states) { + dstate_id_t s = g[v].index; + for (u32 i = 0; i != info.alpha_size; i++) { + if (i == info.alpha_remap[TOP]) { + continue; + } + dstate_id_t next = info.states[s].next[i]; + if (!next || info.is_sheng(next) || info.is_sheng_succ(next)) { + continue; + } + exit_count++; + info.extra[next].sheng_succ = true; + } + } + + if (exit_count + sheng_states.size() < UINT8_MAX) { + return true; + } else { + DEBUG_PRINTF("fail: unable to fit %u exits in byte", exit_count); + return false; + } +} + +static +CharReach get_edge_reach(dstate_id_t u, dstate_id_t v, const dfa_info &info) { + CharReach rv; + for (u32 i = 0; i < info.impl_alpha_size; i++) { + if (info.raw.states[u].next[i] == v) { + assert(info.rev_alpha[i].any()); + rv |= info.rev_alpha[i]; + } + } + assert(rv.any()); + return rv; +} + +#define MAX_SHENG_STATES 16 +#define MAX_SHENG_LEAKINESS 0.05 + +/** + * Returns the proportion of strings of length 'depth' which will leave the + * sheng region when starting at state 'u'. + */ +static +double leakiness(const RdfaGraph &g, dfa_info &info, + const flat_set &sheng_states, RdfaVertex u, + u32 depth, + unordered_map, double> &cache) { + double rv = 0; + if (contains(cache, make_pair(u, depth))) { + return cache[make_pair(u, depth)]; + } + for (RdfaVertex v : adjacent_vertices_range(u, g)) { + if (g[v].index == DEAD_STATE) { + continue; + } + double width = get_edge_reach(g[u].index, g[v].index, info).count(); + width /= N_CHARS; + + double weight; + if (!contains(sheng_states, v)) { + weight = 1; + } else if (depth > 1) { + weight = leakiness(g, info, sheng_states, v, depth - 1, cache); + } else { + continue; /* weight = 0 */ + } + rv += width * weight; + } + + cache[make_pair(u, depth)] = rv; + DEBUG_PRINTF("%zu [%u] q = %g\n", g[u].index, depth, rv); + return rv; +} + +/** + * Returns the proportion of 8 byte strings which will leave the sheng region + * when starting at state 'u'. + */ +static +double leakiness(const RdfaGraph &g, dfa_info &info, + const flat_set &sheng_states, RdfaVertex u) { + unordered_map, double> cache; + double rv = leakiness(g, info, sheng_states, u, 8, cache); + return rv; +} + +static +dstate_id_t find_sheng_states(dfa_info &info, + map &accel_escape_info) { + RdfaGraph g(info.raw); + auto cyclics = find_vertices_in_cycles(g); + + auto base_cyclic = RdfaGraph::null_vertex(); + for (const auto &v : cyclics) { + if (g[v].index == DEAD_STATE) { + continue; + } + DEBUG_PRINTF("considering cyclic %zu\n", g[v].index); + /* get an estimate of stickness of the cyclic: assume any edges from + * states with larger state ids are back edges */ + CharReach est_back_reach; + for (const auto &u : inv_adjacent_vertices_range(v, g)) { + if (g[u].index < g[v].index) { + continue; + } + est_back_reach |= get_edge_reach(g[u].index, g[v].index, info); + } + + if (est_back_reach.count() < 30) { + continue; + } + base_cyclic = v; + break; + } + if (!base_cyclic) { + return DEAD_STATE; + } + + flat_set sheng_states; + deque to_consider = { base_cyclic }; + flat_set considered = { DEAD_STATE }; + bool seen_back_edge = false; + while (!to_consider.empty() + && sheng_states.size() < MAX_SHENG_STATES) { + auto v = to_consider.front(); + to_consider.pop_front(); + if (!considered.insert(g[v].index).second) { + continue; + } + + assert(!contains(sheng_states, v)); + + if (generates_callbacks(info.raw.kind) + && !info.states[g[v].index].reports.empty()) { + /* cannot raise callbacks from sheng region */ + continue; + } + + sheng_states.insert(v); + for (const auto &t : adjacent_vertices_range(v, g)) { + if (!contains(considered, g[t].index)) { + to_consider.push_back(t); + } + if (t == base_cyclic) { + seen_back_edge = true; + } + } + } + + /* allocate normal ids */ + dstate_id_t sheng_end = DEAD_STATE + 1; + for (auto v : sheng_states) { + dstate_id_t s = g[v].index; + if (!contains(accel_escape_info, s)) { + info.states[s].impl_id = sheng_end++; + info.extra[s].sheng_id = info.states[s].impl_id - 1; + } + } + + /* allocate accel ids */ + for (auto v : sheng_states) { + dstate_id_t s = g[v].index; + if (contains(accel_escape_info, s)) { + assert(!info.states[s].impl_id); + info.states[s].impl_id = sheng_end++; + info.extra[s].sheng_id = info.states[s].impl_id - 1; + } + } + + if (sheng_states.size() < MIN_SHENG_SIZE) { + DEBUG_PRINTF("sheng region too small\n"); + return DEAD_STATE; + } + + if (!seen_back_edge) { + DEBUG_PRINTF("did not include cyclic\n"); + return DEAD_STATE; + } + + double leak = leakiness(g, info, sheng_states, base_cyclic); + if (leak > MAX_SHENG_LEAKINESS) { + DEBUG_PRINTF("too leaky (%g)\n", leak); + return DEAD_STATE; + } + + if (!mark_sheng_succs(g, info, sheng_states)) { + return DEAD_STATE; + } + + /* TODO: ensure sufficiently 'sticky' */ + /* TODO: check not all states accel */ + DEBUG_PRINTF("sheng_end = %hu\n", sheng_end); + return sheng_end; +} + +static +void fill_in_aux_info(NFA *nfa, const dfa_info &info, + const map &accel_escape_info, + u32 accel_offset, UNUSED u32 accel_end_offset, + const vector &reports, + const vector &reports_eod, + u32 report_base_offset, + const raw_report_info &ri) { + mcsheng *m = (mcsheng *)getMutableImplNfa(nfa); + + vector reportOffsets; + + ri.fillReportLists(nfa, report_base_offset, reportOffsets); + + for (u32 i = 0; i < info.size(); i++) { + u16 impl_id = info.implId(i); + mstate_aux *this_aux = getAux(nfa, impl_id); + + fillInAux(this_aux, i, info, reports, reports_eod, reportOffsets); + if (contains(accel_escape_info, i)) { + this_aux->accel_offset = accel_offset; + accel_offset += info.strat.accelSize(); + assert(accel_offset <= accel_end_offset); + assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); + info.strat.buildAccel(i, accel_escape_info.at(i), + (void *)((char *)m + this_aux->accel_offset)); + } + } +} + +static +u16 get_edge_flags(NFA *nfa, dstate_id_t target_impl_id) { + mstate_aux *aux = getAux(nfa, target_impl_id); + u16 flags = 0; + + if (aux->accept) { + flags |= ACCEPT_FLAG; + } + + if (aux->accel_offset) { + flags |= ACCEL_FLAG; + } + + return flags; +} + +static +void fill_in_succ_table_16(NFA *nfa, const dfa_info &info, + dstate_id_t sheng_end, + UNUSED dstate_id_t sherman_base) { + u16 *succ_table = (u16 *)((char *)nfa + sizeof(NFA) + sizeof(mcsheng)); + + u8 alphaShift = info.getAlphaShift(); + assert(alphaShift <= 8); + + for (size_t i = 0; i < info.size(); i++) { + if (!info.is_normal(i)) { + assert(info.implId(i) < sheng_end || info.is_sherman(i)); + continue; + } + + assert(info.implId(i) < sherman_base); + u16 normal_id = verify_u16(info.implId(i) - sheng_end); + + for (size_t s = 0; s < info.impl_alpha_size; s++) { + dstate_id_t raw_succ = info.states[i].next[s]; + u16 &entry = succ_table[(normal_id << alphaShift) + s]; + + entry = info.implId(raw_succ); + entry |= get_edge_flags(nfa, entry); + } + } +} + +#define MAX_SHERMAN_LIST_LEN 8 + +static +void addIfEarlier(set &dest, dstate_id_t candidate, + dstate_id_t max) { + if (candidate < max) { + dest.insert(candidate); + } +} + +static +void addSuccessors(set &dest, const dstate &source, + u16 alphasize, dstate_id_t curr_id) { + for (symbol_t s = 0; s < alphasize; s++) { + addIfEarlier(dest, source.next[s], curr_id); + } +} + +#define MAX_SHERMAN_SELF_LOOP 20 + +static +void find_better_daddy(dfa_info &info, dstate_id_t curr_id, + bool any_cyclic_near_anchored_state, const Grey &grey) { + if (!grey.allowShermanStates) { + return; + } + + const u16 width = sizeof(u16); + const u16 alphasize = info.impl_alpha_size; + + if (info.raw.start_anchored != DEAD_STATE + && any_cyclic_near_anchored_state + && curr_id < alphasize * 3) { + /* crude attempt to prevent frequent states from being sherman'ed + * depends on the fact that states are numbers are currently in bfs + * order */ + DEBUG_PRINTF("%hu is banned\n", curr_id); + return; + } + + if (info.raw.start_floating != DEAD_STATE + && curr_id >= info.raw.start_floating + && curr_id < info.raw.start_floating + alphasize * 3) { + /* crude attempt to prevent frequent states from being sherman'ed + * depends on the fact that states are numbers are currently in bfs + * order */ + DEBUG_PRINTF("%hu is banned (%hu)\n", curr_id, info.raw.start_floating); + return; + } + + const u16 full_state_size = width * alphasize; + const u16 max_list_len = MIN(MAX_SHERMAN_LIST_LEN, + (full_state_size - 2)/(width + 1)); + u16 best_score = 0; + dstate_id_t best_daddy = 0; + dstate &currState = info.states[curr_id]; + + set hinted; /* set of states to search for a better daddy */ + addIfEarlier(hinted, 0, curr_id); + addIfEarlier(hinted, info.raw.start_anchored, curr_id); + addIfEarlier(hinted, info.raw.start_floating, curr_id); + + dstate_id_t mydaddy = currState.daddy; + if (mydaddy) { + addIfEarlier(hinted, mydaddy, curr_id); + addSuccessors(hinted, info.states[mydaddy], alphasize, curr_id); + dstate_id_t mygranddaddy = info.states[mydaddy].daddy; + if (mygranddaddy) { + addIfEarlier(hinted, mygranddaddy, curr_id); + addSuccessors(hinted, info.states[mygranddaddy], alphasize, + curr_id); + } + } + + for (const dstate_id_t &donor : hinted) { + assert(donor < curr_id); + u32 score = 0; + + if (!info.is_normal(donor)) { + continue; + } + + const dstate &donorState = info.states[donor]; + for (symbol_t s = 0; s < alphasize; s++) { + if (currState.next[s] == donorState.next[s]) { + score++; + } + } + + /* prefer lower ids to provide some stability amongst potential + * siblings */ + if (score > best_score || (score == best_score && donor < best_daddy)) { + best_daddy = donor; + best_score = score; + + if (score == alphasize) { + break; + } + } + } + + currState.daddy = best_daddy; + info.extra[curr_id].daddytaken = best_score; + DEBUG_PRINTF("%hu -> daddy %hu: %u/%u BF\n", curr_id, best_daddy, + best_score, alphasize); + + if (best_daddy == DEAD_STATE) { + return; /* No good daddy */ + } + + if (best_score + max_list_len < alphasize) { + return; /* ??? */ + } + + assert(info.is_normal(currState.daddy)); + + u32 self_loop_width = 0; + const dstate curr_raw = info.states[curr_id]; + for (unsigned i = 0; i < N_CHARS; i++) { + if (curr_raw.next[info.alpha_remap[i]] == curr_id) { + self_loop_width++; + } + } + + if (self_loop_width > MAX_SHERMAN_SELF_LOOP) { + DEBUG_PRINTF("%hu is banned wide self loop (%u)\n", curr_id, + self_loop_width); + return; + } + + if (info.is_sheng(curr_id)) { + return; + } + + DEBUG_PRINTF("%hu is sherman\n", curr_id); + info.extra[curr_id].shermanState = true; +} + +static +bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) { + symbol_t alphasize = raw.getImplAlphaSize(); + for (symbol_t s = 0; s < alphasize; s++) { + dstate_id_t succ_id = raw.states[root].next[s]; + if (succ_id == DEAD_STATE) { + continue; + } + + const dstate &succ = raw.states[succ_id]; + for (symbol_t t = 0; t < alphasize; t++) { + if (succ.next[t] == root || succ.next[t] == succ_id) { + return true; + } + } + } + return false; +} + +static +void fill_in_sherman(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) { + char *nfa_base = (char *)nfa; + mcsheng *m = (mcsheng *)getMutableImplNfa(nfa); + char *sherman_table = nfa_base + m->sherman_offset; + + assert(ISALIGNED_16(sherman_table)); + for (size_t i = 0; i < info.size(); i++) { + if (!info.is_sherman(i)) { + continue; + } + u16 fs = verify_u16(info.implId(i)); + DEBUG_PRINTF("building sherman %zu impl %hu\n", i, fs); + + assert(fs >= sherman_limit); + + char *curr_sherman_entry + = sherman_table + (fs - m->sherman_limit) * SHERMAN_FIXED_SIZE; + assert(curr_sherman_entry <= nfa_base + m->length); + + u8 len = verify_u8(info.impl_alpha_size - info.extra[i].daddytaken); + assert(len <= 9); + dstate_id_t d = info.states[i].daddy; + + *(u8 *)(curr_sherman_entry + SHERMAN_TYPE_OFFSET) = SHERMAN_STATE; + *(u8 *)(curr_sherman_entry + SHERMAN_LEN_OFFSET) = len; + *(u16 *)(curr_sherman_entry + SHERMAN_DADDY_OFFSET) = info.implId(d); + u8 *chars = (u8 *)(curr_sherman_entry + SHERMAN_CHARS_OFFSET); + + for (u16 s = 0; s < info.impl_alpha_size; s++) { + if (info.states[i].next[s] != info.states[d].next[s]) { + *(chars++) = (u8)s; + } + } + + u16 *states = (u16 *)(curr_sherman_entry + SHERMAN_STATES_OFFSET(len)); + for (u16 s = 0; s < info.impl_alpha_size; s++) { + if (info.states[i].next[s] != info.states[d].next[s]) { + DEBUG_PRINTF("s overrider %hu dad %hu char next %hu\n", fs, + info.implId(d), + info.implId(info.states[i].next[s])); + u16 entry_val = info.implId(info.states[i].next[s]); + entry_val |= get_edge_flags(nfa, entry_val); + unaligned_store_u16((u8 *)states++, entry_val); + } + } + } +} + +static +aligned_unique_ptr mcshengCompile16(dfa_info &info, dstate_id_t sheng_end, + const map &accel_escape_info, + const Grey &grey) { + DEBUG_PRINTF("building mcsheng 16\n"); + + vector reports; /* index in ri for the appropriate report list */ + vector reports_eod; /* as above */ + ReportID arb; + u8 single; + + assert(info.getAlphaShift() <= 8); + + u16 total_daddy = 0; + for (u32 i = 0; i < info.size(); i++) { + find_better_daddy(info, i, + is_cyclic_near(info.raw, info.raw.start_anchored), + grey); + total_daddy += info.extra[i].daddytaken; + } + + DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy, + info.size() * info.impl_alpha_size, info.size(), + info.impl_alpha_size); + + u16 sherman_limit; + if (!allocateImplId16(info, sheng_end, &sherman_limit)) { + DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n", + info.size()); + return nullptr; + } + u16 count_real_states = sherman_limit - sheng_end; + + auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); + + size_t tran_size = (1 << info.getAlphaShift()) * sizeof(u16) + * count_real_states; + + size_t aux_size = sizeof(mstate_aux) * info.size(); + + size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcsheng) + tran_size); + size_t accel_size = info.strat.accelSize() * accel_escape_info.size(); + size_t accel_offset = ROUNDUP_N(aux_offset + aux_size + + ri->getReportListSize(), 32); + size_t sherman_offset = ROUNDUP_16(accel_offset + accel_size); + size_t sherman_size = calcShermanRegionSize(info); + + size_t total_size = sherman_offset + sherman_size; + + accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */ + assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); + + aligned_unique_ptr nfa = aligned_zmalloc_unique(total_size); + mcsheng *m = (mcsheng *)getMutableImplNfa(nfa.get()); + + populateBasicInfo(sizeof(u16), info, total_size, aux_offset, accel_offset, + accel_escape_info.size(), arb, single, nfa.get()); + createShuffleMasks(m, info, sheng_end, accel_escape_info); + + /* copy in the mc header information */ + m->sherman_offset = sherman_offset; + m->sherman_end = total_size; + m->sherman_limit = sherman_limit; + + DEBUG_PRINTF("%hu sheng, %hu norm, %zu total\n", sheng_end, + count_real_states, info.size()); + + fill_in_aux_info(nfa.get(), info, accel_escape_info, accel_offset, + sherman_offset - sizeof(NFA), reports, reports_eod, + aux_offset + aux_size, *ri); + + fill_in_succ_table_16(nfa.get(), info, sheng_end, sherman_limit); + + fill_in_sherman(nfa.get(), info, sherman_limit); + + return nfa; +} + +static +void fill_in_succ_table_8(NFA *nfa, const dfa_info &info, + dstate_id_t sheng_end) { + u8 *succ_table = (u8 *)nfa + sizeof(NFA) + sizeof(mcsheng); + + u8 alphaShift = info.getAlphaShift(); + assert(alphaShift <= 8); + + for (size_t i = 0; i < info.size(); i++) { + assert(!info.is_sherman(i)); + if (!info.is_normal(i)) { + assert(info.implId(i) < sheng_end); + continue; + } + u8 normal_id = verify_u8(info.implId(i) - sheng_end); + + for (size_t s = 0; s < info.impl_alpha_size; s++) { + dstate_id_t raw_succ = info.states[i].next[s]; + succ_table[(normal_id << alphaShift) + s] = info.implId(raw_succ); + } + } +} + +static +void allocateImplId8(dfa_info &info, dstate_id_t sheng_end, + const map &accel_escape_info, + u16 *accel_limit, u16 *accept_limit) { + info.states[0].impl_id = 0; /* dead is always 0 */ + + vector norm; + vector accel; + vector accept; + + assert(info.size() <= (1 << 8)); + + for (u32 i = 1; i < info.size(); i++) { + if (info.is_sheng(i)) { + continue; /* already allocated */ + } else if (!info.states[i].reports.empty()) { + accept.push_back(i); + } else if (contains(accel_escape_info, i)) { + accel.push_back(i); + } else { + norm.push_back(i); + } + } + + u32 j = sheng_end; + for (const dstate_id_t &s : norm) { + assert(j <= 256); + DEBUG_PRINTF("mapping state %u to %u\n", s, j); + info.states[s].impl_id = j++; + } + *accel_limit = j; + for (const dstate_id_t &s : accel) { + assert(j <= 256); + DEBUG_PRINTF("mapping state %u to %u\n", s, j); + info.states[s].impl_id = j++; + } + *accept_limit = j; + for (const dstate_id_t &s : accept) { + assert(j <= 256); + DEBUG_PRINTF("mapping state %u to %u\n", s, j); + info.states[s].impl_id = j++; + } +} + +static +aligned_unique_ptr mcshengCompile8(dfa_info &info, dstate_id_t sheng_end, + const map &accel_escape_info) { + DEBUG_PRINTF("building mcsheng 8\n"); + + vector reports; + vector reports_eod; + ReportID arb; + u8 single; + + auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); + + size_t normal_count = info.size() - sheng_end; + + size_t tran_size = sizeof(u8) * (1 << info.getAlphaShift()) * normal_count; + size_t aux_size = sizeof(mstate_aux) * info.size(); + size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcsheng) + tran_size); + size_t accel_size = info.strat.accelSize() * accel_escape_info.size(); + size_t accel_offset = ROUNDUP_N(aux_offset + aux_size + + ri->getReportListSize(), 32); + size_t total_size = accel_offset + accel_size; + + DEBUG_PRINTF("aux_size %zu\n", aux_size); + DEBUG_PRINTF("aux_offset %zu\n", aux_offset); + DEBUG_PRINTF("rl size %u\n", ri->getReportListSize()); + DEBUG_PRINTF("accel_size %zu\n", accel_size); + DEBUG_PRINTF("accel_offset %zu\n", accel_offset); + DEBUG_PRINTF("total_size %zu\n", total_size); + + accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */ + assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); + + aligned_unique_ptr nfa = aligned_zmalloc_unique(total_size); + mcsheng *m = (mcsheng *)getMutableImplNfa(nfa.get()); + + allocateImplId8(info, sheng_end, accel_escape_info, &m->accel_limit_8, + &m->accept_limit_8); + + populateBasicInfo(sizeof(u8), info, total_size, aux_offset, accel_offset, + accel_escape_info.size(), arb, single, nfa.get()); + createShuffleMasks(m, info, sheng_end, accel_escape_info); + + fill_in_aux_info(nfa.get(), info, accel_escape_info, accel_offset, + total_size - sizeof(NFA), reports, reports_eod, + aux_offset + aux_size, *ri); + + fill_in_succ_table_8(nfa.get(), info, sheng_end); + + DEBUG_PRINTF("rl size %zu\n", ri->size()); + + return nfa; +} + +aligned_unique_ptr mcshengCompile(raw_dfa &raw, const CompileContext &cc, + const ReportManager &rm, + set *accel_states) { + if (!cc.grey.allowMcSheng) { + return nullptr; + } + + mcclellan_build_strat mbs(raw, rm); + dfa_info info(mbs); + bool using8bit = cc.grey.allowMcClellan8 && info.size() <= 256; + + if (!cc.streaming) { /* TODO: work out if we can do the strip in streaming + * mode with our semantics */ + raw.stripExtraEodReports(); + } + + bool has_eod_reports = raw.hasEodReports(); + + map accel_escape_info + = info.strat.getAccelInfo(cc.grey); + + dstate_id_t sheng_end = find_sheng_states(info, accel_escape_info); + if (sheng_end <= DEAD_STATE + 1) { + return nullptr; + } + + aligned_unique_ptr nfa; + if (!using8bit) { + nfa = mcshengCompile16(info, sheng_end, accel_escape_info, cc.grey); + } else { + nfa = mcshengCompile8(info, sheng_end, accel_escape_info); + } + + if (!nfa) { + return nfa; + } + + if (has_eod_reports) { + nfa->flags |= NFA_ACCEPTS_EOD; + } + + if (accel_states) { + fillAccelOut(accel_escape_info, accel_states); + } + + DEBUG_PRINTF("compile done\n"); + return nfa; +} + +bool has_accel_mcsheng(const NFA *) { + return true; /* consider the sheng region as accelerated */ +} + +} // namespace ue2 diff --git a/src/nfa/mcsheng_compile.h b/src/nfa/mcsheng_compile.h new file mode 100644 index 00000000..24cc66e9 --- /dev/null +++ b/src/nfa/mcsheng_compile.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MCSHENGCOMPILE_H +#define MCSHENGCOMPILE_H + +#include "accel_dfa_build_strat.h" +#include "rdfa.h" +#include "ue2common.h" +#include "util/alloc.h" +#include "util/ue2_containers.h" + +#include +#include + +struct NFA; + +namespace ue2 { + +class ReportManager; +struct CompileContext; + +/* accel_states: (optional) on success, is filled with the set of accelerable + * states */ +ue2::aligned_unique_ptr +mcshengCompile(raw_dfa &raw, const CompileContext &cc, + const ReportManager &rm, + std::set *accel_states = nullptr); + +bool has_accel_mcsheng(const NFA *nfa); + +} // namespace ue2 + +#endif diff --git a/src/nfa/mcsheng_data.c b/src/nfa/mcsheng_data.c new file mode 100644 index 00000000..eaf3cbbb --- /dev/null +++ b/src/nfa/mcsheng_data.c @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "mcsheng_internal.h" + +/* This table is in a separate translation unit from mcsheng.c as we want to + * prevent the compiler from seeing these constants. We have the load resources + * free at runtime to load the masks with no problems. */ +const u64a mcsheng_pext_mask[8] = { + 0, /* dummy */ + 0x000000000000ff0f, + 0x0000000000ff000f, + 0x00000000ff00000f, + 0x000000ff0000000f, + 0x0000ff000000000f, + 0x00ff00000000000f, + 0xff0000000000000f, +}; diff --git a/src/nfa/mcsheng_dump.cpp b/src/nfa/mcsheng_dump.cpp new file mode 100644 index 00000000..f5c058af --- /dev/null +++ b/src/nfa/mcsheng_dump.cpp @@ -0,0 +1,415 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include "mcsheng_dump.h" + +#include "accel.h" +#include "accel_dump.h" +#include "nfa_dump_internal.h" +#include "nfa_internal.h" +#include "mcsheng_internal.h" +#include "rdfa.h" +#include "ue2common.h" +#include "util/charreach.h" +#include "util/dump_charclass.h" +#include "util/dump_util.h" +#include "util/unaligned.h" + +#include +#include +#include +#include +#include + +#ifndef DUMP_SUPPORT +#error No dump support! +#endif + +using namespace std; + +namespace ue2 { + +static +const mstate_aux *getAux(const NFA *n, dstate_id_t i) { + auto *m = (const mcsheng *)getImplNfa(n); + auto *aux_base = (const mstate_aux *)((const char *)n + m->aux_offset); + + const mstate_aux *aux = aux_base + i; + + assert((const char *)aux < (const char *)n + m->length); + return aux; +} + +static +void next_states(const NFA *n, u16 s, u16 *t) { + const mcsheng *m = (const mcsheng *)getImplNfa(n); + const mstate_aux *aux = getAux(n, s); + const u32 as = m->alphaShift; + assert(s != DEAD_STATE); + + if (s < m->sheng_end) { + for (u16 c = 0; c < N_CHARS; c++) { + u8 sheng_s = s - 1; + auto trans_for_c = (const char *)&m->sheng_masks[c]; + assert(sheng_s < sizeof(m128)); + u8 raw_succ = trans_for_c[sheng_s]; + if (raw_succ == m->sheng_end - 1) { + t[c] = DEAD_STATE; + } else if (raw_succ < m->sheng_end) { + t[c] = raw_succ + 1; + } else { + t[c] = raw_succ; + } + } + } else if (n->type == MCSHENG_NFA_8) { + const u8 *succ_table = (const u8 *)((const char *)m + sizeof(mcsheng)); + for (u16 c = 0; c < N_CHARS; c++) { + u32 normal_id = s - m->sheng_end; + t[c] = succ_table[(normal_id << as) + m->remap[c]]; + } + } else { + u16 base_s = s; + const char *winfo_base = (const char *)n + m->sherman_offset; + const char *state_base + = winfo_base + SHERMAN_FIXED_SIZE * (s - m->sherman_limit); + + if (s >= m->sherman_limit) { + base_s = unaligned_load_u16(state_base + SHERMAN_DADDY_OFFSET); + assert(base_s >= m->sheng_end); + } + + const u16 *succ_table = (const u16 *)((const char *)m + + sizeof(mcsheng)); + for (u16 c = 0; c < N_CHARS; c++) { + u32 normal_id = base_s - m->sheng_end; + t[c] = succ_table[(normal_id << as) + m->remap[c]]; + } + + if (s >= m->sherman_limit) { + UNUSED char type = *(state_base + SHERMAN_TYPE_OFFSET); + assert(type == SHERMAN_STATE); + u8 len = *(const u8 *)(SHERMAN_LEN_OFFSET + state_base); + const char *chars = state_base + SHERMAN_CHARS_OFFSET; + const u16 *states = (const u16 *)(state_base + + SHERMAN_STATES_OFFSET(len)); + + for (u8 i = 0; i < len; i++) { + for (u16 c = 0; c < N_CHARS; c++) { + if (m->remap[c] == chars[i]) { + t[c] = unaligned_load_u16((const u8*)&states[i]); + } + } + } + } + + for (u16 c = 0; c < N_CHARS; c++) { + t[c] &= STATE_MASK; + } + + } + + t[TOP] = aux->top & STATE_MASK; +} + +static +void describeEdge(FILE *f, const mcsheng *m, const u16 *t, u16 i) { + for (u16 s = 0; s < N_CHARS; s++) { + if (!t[s]) { + continue; + } + + u16 ss; + for (ss = 0; ss < s; ss++) { + if (t[s] == t[ss]) { + break; + } + } + + if (ss != s) { + continue; + } + + CharReach reach; + for (ss = s; ss < 256; ss++) { + if (t[s] == t[ss]) { + reach.set(ss); + } + } + + fprintf(f, "%u -> %u [ ", i, t[s]); + if (i < m->sheng_end && t[s] < m->sheng_end) { + fprintf(f, "color = red, fontcolor = red "); + } + fprintf(f, "label = \""); + describeClass(f, reach, 5, CC_OUT_DOT); + + fprintf(f, "\" ];\n"); + } +} + +static +void dumpAccelDot(FILE *f, u16 i, const union AccelAux *accel) { + switch(accel->accel_type) { + case ACCEL_NONE: + break; + case ACCEL_VERM: + case ACCEL_VERM_NOCASE: + case ACCEL_DVERM: + case ACCEL_DVERM_NOCASE: + fprintf(f, "%u [ color = forestgreen style=diagonals];\n", i); + break; + case ACCEL_SHUFTI: + case ACCEL_DSHUFTI: + case ACCEL_TRUFFLE: + fprintf(f, "%u [ color = darkgreen style=diagonals ];\n", i); + break; + default: + fprintf(f, "%u [ color = yellow style=diagonals ];\n", i); + break; + } +} + +static +void describeNode(const NFA *n, const mcsheng *m, u16 i, FILE *f) { + const mstate_aux *aux = getAux(n, i); + + bool isSherman = m->sherman_limit && i >= m->sherman_limit; + + fprintf(f, "%u [ width = 1, fixedsize = true, fontsize = 12, " + "label = \"%u%s\" ]; \n", i, i, isSherman ? "w":""); + + if (aux->accel_offset) { + dumpAccelDot(f, i, (const union AccelAux *) + ((const char *)m + aux->accel_offset)); + } + + if (i && i < m->sheng_end) { + fprintf(f, "%u [color = red, fontcolor = red]; \n", i); + } + + if (aux->accept_eod) { + fprintf(f, "%u [ color = darkorchid ];\n", i); + } + + if (aux->accept) { + fprintf(f, "%u [ shape = doublecircle ];\n", i); + } + + if (aux->top && aux->top != i) { + fprintf(f, "%u -> %u [color = darkgoldenrod weight=0.1 ]\n", i, + aux->top); + } + + if (i == m->start_anchored) { + fprintf(f, "STARTA -> %u [color = blue ]\n", i); + } + + if (i == m->start_floating) { + fprintf(f, "STARTF -> %u [color = red ]\n", i); + } + + if (isSherman) { + const char *winfo_base = (const char *)n + m->sherman_offset; + const char *state_base + = winfo_base + SHERMAN_FIXED_SIZE * (i - m->sherman_limit); + assert(state_base < (const char *)m + m->length - sizeof(NFA)); + UNUSED u8 type = *(const u8 *)(state_base + SHERMAN_TYPE_OFFSET); + assert(type == SHERMAN_STATE); + fprintf(f, "%u [ fillcolor = lightblue style=filled ];\n", i); + u16 daddy = *(const u16 *)(state_base + SHERMAN_DADDY_OFFSET); + if (daddy) { + fprintf(f, "%u -> %u [ color=royalblue style=dashed weight=0.1]\n", + i, daddy); + } + } + + if (i && i < m->sheng_end) { + fprintf(f, "subgraph cluster_sheng { %u } \n", i); + } + +} + +static +void dumpDotPreambleDfa(FILE *f) { + dumpDotPreamble(f); + + // DFA specific additions. + fprintf(f, "STARTF [style=invis];\n"); + fprintf(f, "STARTA [style=invis];\n"); + fprintf(f, "0 [style=invis];\n"); + fprintf(f, "subgraph cluster_sheng { style = dashed }\n"); +} + +static +void dump_dot_16(const NFA *nfa, FILE *f) { + auto *m = (const mcsheng *)getImplNfa(nfa); + + dumpDotPreambleDfa(f); + + for (u16 i = 1; i < m->state_count; i++) { + describeNode(nfa, m, i, f); + + u16 t[ALPHABET_SIZE]; + + next_states(nfa, i, t); + + describeEdge(f, m, t, i); + } + + fprintf(f, "}\n"); +} + +static +void dump_dot_8(const NFA *nfa, FILE *f) { + auto m = (const mcsheng *)getImplNfa(nfa); + + dumpDotPreambleDfa(f); + + for (u16 i = 1; i < m->state_count; i++) { + describeNode(nfa, m, i, f); + + u16 t[ALPHABET_SIZE]; + + next_states(nfa, i, t); + + describeEdge(f, m, t, i); + } + + fprintf(f, "}\n"); +} + +static +void dumpAccelMasks(FILE *f, const mcsheng *m, const mstate_aux *aux) { + fprintf(f, "\n"); + fprintf(f, "Acceleration\n"); + fprintf(f, "------------\n"); + + for (u16 i = 0; i < m->state_count; i++) { + if (!aux[i].accel_offset) { + continue; + } + + auto accel = (const AccelAux *)((const char *)m + aux[i].accel_offset); + fprintf(f, "%05hu ", i); + dumpAccelInfo(f, *accel); + } +} + +static +void describeAlphabet(FILE *f, const mcsheng *m) { + map rev; + + for (u16 i = 0; i < N_CHARS; i++) { + rev[m->remap[i]].clear(); + } + + for (u16 i = 0; i < N_CHARS; i++) { + rev[m->remap[i]].set(i); + } + + map::const_iterator it; + fprintf(f, "\nAlphabet\n"); + for (it = rev.begin(); it != rev.end(); ++it) { + fprintf(f, "%3hhu: ", it->first); + describeClass(f, it->second, 10240, CC_OUT_TEXT); + fprintf(f, "\n"); + } + fprintf(f, "\n"); +} + +static +void dumpCommonHeader(FILE *f, const mcsheng *m) { + fprintf(f, "report: %u, states: %u, length: %u\n", m->arb_report, + m->state_count, m->length); + fprintf(f, "astart: %hu, fstart: %hu\n", m->start_anchored, + m->start_floating); + fprintf(f, "single accept: %d, has_accel: %d\n", + !!(int)m->flags & MCSHENG_FLAG_SINGLE, m->has_accel); + fprintf(f, "sheng_end: %hu\n", m->sheng_end); + fprintf(f, "sheng_accel_limit: %hu\n", m->sheng_accel_limit); +} + +static +void dump_text_16(const NFA *nfa, FILE *f) { + auto *m = (const mcsheng *)getImplNfa(nfa); + auto *aux = (const mstate_aux *)((const char *)nfa + m->aux_offset); + + fprintf(f, "mcsheng 16\n"); + dumpCommonHeader(f, m); + fprintf(f, "sherman_limit: %d, sherman_end: %d\n", (int)m->sherman_limit, + (int)m->sherman_end); + fprintf(f, "\n"); + + describeAlphabet(f, m); + dumpAccelMasks(f, m, aux); + + fprintf(f, "\n"); + dumpTextReverse(nfa, f); +} + +static +void dump_text_8(const NFA *nfa, FILE *f) { + auto m = (const mcsheng *)getImplNfa(nfa); + auto aux = (const mstate_aux *)((const char *)nfa + m->aux_offset); + + fprintf(f, "mcsheng 8\n"); + dumpCommonHeader(f, m); + fprintf(f, "accel_limit: %hu, accept_limit %hu\n", m->accel_limit_8, + m->accept_limit_8); + fprintf(f, "\n"); + + describeAlphabet(f, m); + dumpAccelMasks(f, m, aux); + + fprintf(f, "\n"); + dumpTextReverse(nfa, f); +} + +void nfaExecMcSheng16_dump(const NFA *nfa, const string &base) { + assert(nfa->type == MCSHENG_NFA_16); + FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); + dump_text_16(nfa, f); + fclose(f); + f = fopen_or_throw((base + ".dot").c_str(), "w"); + dump_dot_16(nfa, f); + fclose(f); +} + +void nfaExecMcSheng8_dump(const NFA *nfa, const string &base) { + assert(nfa->type == MCSHENG_NFA_8); + FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); + dump_text_8(nfa, f); + fclose(f); + f = fopen_or_throw((base + ".dot").c_str(), "w"); + dump_dot_8(nfa, f); + fclose(f); +} + +} // namespace ue2 diff --git a/src/nfa/mcsheng_dump.h b/src/nfa/mcsheng_dump.h new file mode 100644 index 00000000..1b699367 --- /dev/null +++ b/src/nfa/mcsheng_dump.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MCSHENG_DUMP_H +#define MCSHENG_DUMP_H + +#ifdef DUMP_SUPPORT + +#include "rdfa.h" + +#include +#include + +struct NFA; + +namespace ue2 { + +void nfaExecMcSheng8_dump(const struct NFA *nfa, const std::string &base); +void nfaExecMcSheng16_dump(const struct NFA *nfa, const std::string &base); + +} // namespace ue2 + +#endif // DUMP_SUPPORT + +#endif // MCSHENG_DUMP_H diff --git a/src/nfa/mcsheng_internal.h b/src/nfa/mcsheng_internal.h new file mode 100644 index 00000000..5ced6f76 --- /dev/null +++ b/src/nfa/mcsheng_internal.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MCSHENG_INTERNAL_H +#define MCSHENG_INTERNAL_H + +#include "nfa_internal.h" +#include "ue2common.h" +#include "util/simd_utils.h" + +#define ACCEPT_FLAG 0x8000 +#define ACCEL_FLAG 0x4000 +#define STATE_MASK 0x3fff + +#define SHERMAN_STATE 1 + +#define SHERMAN_TYPE_OFFSET 0 +#define SHERMAN_FIXED_SIZE 32 + +#define SHERMAN_LEN_OFFSET 1 +#define SHERMAN_DADDY_OFFSET 2 +#define SHERMAN_CHARS_OFFSET 4 +#define SHERMAN_STATES_OFFSET(sso_len) (4 + (sso_len)) + +struct report_list { + u32 count; + ReportID report[]; +}; + +struct mstate_aux { + u32 accept; + u32 accept_eod; + u16 top; + u32 accel_offset; /* relative to start of struct mcsheng; 0 if no accel */ +}; + +#define MCSHENG_FLAG_SINGLE 1 /**< we raise only single accept id */ + +struct mcsheng { + u16 state_count; /**< total number of states */ + u32 length; /**< length of dfa in bytes */ + u16 start_anchored; /**< anchored start state */ + u16 start_floating; /**< floating start state */ + u32 aux_offset; /**< offset of the aux structures relative to the start of + * the nfa structure */ + u32 sherman_offset; /**< offset of array of sherman state offsets the + * state_info structures relative to the start of the + * nfa structure */ + u32 sherman_end; /**< offset of the end of the state_info structures + * relative to the start of the nfa structure */ + u16 sheng_end; /**< first non-sheng state */ + u16 sheng_accel_limit; /**< first sheng accel state. state given in terms of + * internal sheng ids */ + u16 accel_limit_8; /**< 8 bit, lowest accelerable state */ + u16 accept_limit_8; /**< 8 bit, lowest accept state */ + u16 sherman_limit; /**< lowest sherman state */ + u8 alphaShift; + u8 flags; + u8 has_accel; /**< 1 iff there are any accel plans */ + u8 remap[256]; /**< remaps characters to a smaller alphabet */ + ReportID arb_report; /**< one of the accepts that this dfa may raise */ + u32 accel_offset; /**< offset of the accel structures from start of NFA */ + m128 sheng_masks[N_CHARS]; +}; + +/* pext masks for the runtime to access appropriately copies of bytes 1..7 + * representing the data from a u64a. */ +extern const u64a mcsheng_pext_mask[8]; + +#endif diff --git a/src/nfa/nfa_api_dispatch.c b/src/nfa/nfa_api_dispatch.c index d4e9eb78..f4b7552e 100644 --- a/src/nfa/nfa_api_dispatch.c +++ b/src/nfa/nfa_api_dispatch.c @@ -41,6 +41,7 @@ #include "lbr.h" #include "limex.h" #include "mcclellan.h" +#include "mcsheng.h" #include "mpv.h" #include "sheng.h" #include "tamarama.h" @@ -73,6 +74,8 @@ DISPATCH_CASE(CASTLE_NFA, Castle, dbnt_func); \ DISPATCH_CASE(SHENG_NFA, Sheng, dbnt_func); \ DISPATCH_CASE(TAMARAMA_NFA, Tamarama, dbnt_func); \ + DISPATCH_CASE(MCSHENG_NFA_8, McSheng8, dbnt_func); \ + DISPATCH_CASE(MCSHENG_NFA_16, McSheng16, dbnt_func); \ default: \ assert(0); \ } diff --git a/src/nfa/nfa_build_util.cpp b/src/nfa/nfa_build_util.cpp index 3b235bf4..3103cd29 100644 --- a/src/nfa/nfa_build_util.cpp +++ b/src/nfa/nfa_build_util.cpp @@ -30,6 +30,7 @@ #include "limex_internal.h" #include "mcclellancompile.h" +#include "mcsheng_compile.h" #include "shengcompile.h" #include "nfa_internal.h" #include "repeat_internal.h" @@ -413,6 +414,38 @@ const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = d const char *NFATraits::name = "Tamarama"; #endif +template<> struct NFATraits { + UNUSED static const char *name; + static const NFACategory category = NFA_OTHER; + static const u32 stateAlign = 1; + static const bool fast = true; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; +}; +const nfa_dispatch_fn NFATraits::has_accel = has_accel_mcsheng; +const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; +#if defined(DUMP_SUPPORT) +const char *NFATraits::name = "Shengy McShengFace 8"; +#endif + +template<> struct NFATraits { + UNUSED static const char *name; + static const NFACategory category = NFA_OTHER; + static const u32 stateAlign = 2; + static const bool fast = true; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; +}; +const nfa_dispatch_fn NFATraits::has_accel = has_accel_mcsheng; +const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; +#if defined(DUMP_SUPPORT) +const char *NFATraits::name = "Shengy McShengFace 16"; +#endif + } // namespace #if defined(DUMP_SUPPORT) diff --git a/src/nfa/nfa_dump_dispatch.cpp b/src/nfa/nfa_dump_dispatch.cpp index 3dea5ef7..5607ed27 100644 --- a/src/nfa/nfa_dump_dispatch.cpp +++ b/src/nfa/nfa_dump_dispatch.cpp @@ -39,6 +39,7 @@ #include "lbr_dump.h" #include "limex.h" #include "mcclellandump.h" +#include "mcsheng_dump.h" #include "mpv_dump.h" #include "shengdump.h" #include "tamarama_dump.h" @@ -78,6 +79,8 @@ namespace ue2 { DISPATCH_CASE(CASTLE_NFA, Castle, dbnt_func); \ DISPATCH_CASE(SHENG_NFA, Sheng, dbnt_func); \ DISPATCH_CASE(TAMARAMA_NFA, Tamarama, dbnt_func); \ + DISPATCH_CASE(MCSHENG_NFA_8, McSheng8, dbnt_func); \ + DISPATCH_CASE(MCSHENG_NFA_16, McSheng16, dbnt_func); \ default: \ assert(0); \ } diff --git a/src/nfa/nfa_internal.h b/src/nfa/nfa_internal.h index 1ce566ff..9d280822 100644 --- a/src/nfa/nfa_internal.h +++ b/src/nfa/nfa_internal.h @@ -70,6 +70,8 @@ enum NFAEngineType { CASTLE_NFA, /**< magic pseudo nfa */ SHENG_NFA, /**< magic pseudo nfa */ TAMARAMA_NFA, /**< magic nfa container */ + MCSHENG_NFA_8, /**< magic pseudo nfa */ + MCSHENG_NFA_16, /**< magic pseudo nfa */ /** \brief bogus NFA - not used */ INVALID_NFA }; @@ -143,6 +145,12 @@ static really_inline int isMcClellanType(u8 t) { return t == MCCLELLAN_NFA_8 || t == MCCLELLAN_NFA_16; } +/** \brief True if the given type (from NFA::type) is a Sheng-McClellan hybrid + * DFA. */ +static really_inline int isShengMcClellanType(u8 t) { + return t == MCSHENG_NFA_8 || t == MCSHENG_NFA_16; +} + /** \brief True if the given type (from NFA::type) is a Gough DFA. */ static really_inline int isGoughType(u8 t) { return t == GOUGH_NFA_8 || t == GOUGH_NFA_16; @@ -158,7 +166,16 @@ static really_inline int isShengType(u8 t) { * Sheng DFA. */ static really_inline int isDfaType(u8 t) { - return isMcClellanType(t) || isGoughType(t) || isShengType(t); + return isMcClellanType(t) || isGoughType(t) || isShengType(t) + || isShengMcClellanType(t); +} + +static really_inline int isBigDfaType(u8 t) { + return t == MCCLELLAN_NFA_16 || t == MCSHENG_NFA_16 || t == GOUGH_NFA_16; +} + +static really_inline int isSmallDfaType(u8 t) { + return isDfaType(t) && !isBigDfaType(t); } /** \brief True if the given type (from NFA::type) is an NFA. */ diff --git a/src/nfa/rdfa_graph.cpp b/src/nfa/rdfa_graph.cpp new file mode 100644 index 00000000..2467748b --- /dev/null +++ b/src/nfa/rdfa_graph.cpp @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +#include "rdfa_graph.h" + +#include "rdfa.h" +#include "util/container.h" + +#include + +using namespace std; + +namespace ue2 { + +RdfaGraph::RdfaGraph(const raw_dfa &rdfa) { + RdfaGraph &g = *this; + + vector verts; + verts.reserve(rdfa.states.size()); + for (dstate_id_t i = 0; i < rdfa.states.size(); i++) { + verts.push_back(add_vertex(g)); + assert(g[verts.back()].index == i); + } + + symbol_t symbol_end = rdfa.alpha_size - 1; + + flat_set local_succs; + for (dstate_id_t i = 0; i < rdfa.states.size(); i++) { + local_succs.clear(); + for (symbol_t s = 0; s < symbol_end; s++) { + dstate_id_t next = rdfa.states[i].next[s]; + if (contains(local_succs, next)) { + continue; + } + DEBUG_PRINTF("%hu->%hu\n", i, next); + add_edge(verts[i], verts[next], g); + local_succs.insert(next); + } + } +} + +} diff --git a/src/nfa/rdfa_graph.h b/src/nfa/rdfa_graph.h new file mode 100644 index 00000000..6d166c2f --- /dev/null +++ b/src/nfa/rdfa_graph.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef RDFA_GRAPH_H +#define RDFA_GRAPH_H + +#include "ue2common.h" +#include "util/ue2_graph.h" + +namespace ue2 { + +struct raw_dfa; + +struct RdfaVertexProps { + size_t index = 0; +}; + +struct RdfaEdgeProps { + size_t index = 0; +}; + +struct RdfaGraph : public ue2_graph { + RdfaGraph(const raw_dfa &rdfa); +}; + + +} + +#endif diff --git a/src/nfa/shengcompile.cpp b/src/nfa/shengcompile.cpp index 3902dbaf..a02a9b96 100644 --- a/src/nfa/shengcompile.cpp +++ b/src/nfa/shengcompile.cpp @@ -447,9 +447,8 @@ void createShuffleMasks(sheng *s, dfa_info &info, } } -bool has_accel_sheng(const NFA *nfa) { - const sheng *s = (const sheng *)getImplNfa(nfa); - return s->flags & SHENG_FLAG_HAS_ACCEL; +bool has_accel_sheng(const NFA *) { + return true; /* consider the sheng region as accelerated */ } aligned_unique_ptr shengCompile(raw_dfa &raw, diff --git a/src/nfagraph/ng_util.cpp b/src/nfagraph/ng_util.cpp index 948cd7f1..5252eb18 100644 --- a/src/nfagraph/ng_util.cpp +++ b/src/nfagraph/ng_util.cpp @@ -46,7 +46,6 @@ #include #include #include -#include #include #include @@ -54,7 +53,6 @@ using namespace std; using boost::default_color_type; using boost::make_filtered_graph; using boost::make_assoc_property_map; -using boost::adaptors::map_values; namespace ue2 { @@ -257,38 +255,6 @@ bool hasBigCycles(const NGHolder &g) { return false; } -set findVerticesInCycles(const NGHolder &g) { - map comp_map; - - strong_components(g, make_assoc_property_map(comp_map)); - - map > comps; - - for (const auto &e : comp_map) { - comps[e.second].insert(e.first); - } - - - set rv; - - for (const auto &comp : comps | map_values) { - /* every vertex in a strongly connected component is reachable from - * every other vertex in the component. A vertex is involved in a cycle - * therefore if it is in a strongly connected component with more than - * one vertex or if it is the only vertex and it has a self loop. */ - assert(!comp.empty()); - if (comp.size() > 1) { - insert(&rv, comp); - } - NFAVertex v = *comp.begin(); - if (hasSelfLoop(v, g)) { - rv.insert(v); - } - } - - return rv; -} - bool can_never_match(const NGHolder &g) { assert(edge(g.accept, g.acceptEod, g).second); if (in_degree(g.accept, g) == 0 && in_degree(g.acceptEod, g) == 1) { diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index f074973d..ef74619d 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -52,6 +52,7 @@ #include "nfa/goughcompile.h" #include "nfa/mcclellancompile.h" #include "nfa/mcclellancompile_util.h" +#include "nfa/mcsheng_compile.h" #include "nfa/nfa_api_queue.h" #include "nfa/nfa_build_util.h" #include "nfa/nfa_internal.h" @@ -615,7 +616,7 @@ aligned_unique_ptr pickImpl(aligned_unique_ptr dfa_impl, bool d_accel = has_accel(*dfa_impl); bool n_accel = has_accel(*nfa_impl); - bool d_big = dfa_impl->type == MCCLELLAN_NFA_16; + bool d_big = isBigDfaType(dfa_impl->type); bool n_vsmall = nfa_impl->nPositions <= 32; bool n_br = has_bounded_repeats(*nfa_impl); DEBUG_PRINTF("da %d na %d db %d nvs %d nbr %d\n", (int)d_accel, @@ -666,10 +667,17 @@ buildRepeatEngine(const CastleProto &proto, } static -aligned_unique_ptr getDfa(raw_dfa &rdfa, const CompileContext &cc, +aligned_unique_ptr getDfa(raw_dfa &rdfa, bool is_transient, + const CompileContext &cc, const ReportManager &rm) { // Unleash the Sheng!! auto dfa = shengCompile(rdfa, cc, rm); + if (!dfa && !is_transient) { + // Sheng wasn't successful, so unleash McClellan! + /* We don't try the hybrid for transient prefixes due to the extra + * bytecode and that they are usually run on small blocks */ + dfa = mcshengCompile(rdfa, cc, rm); + } if (!dfa) { // Sheng wasn't successful, so unleash McClellan! dfa = mcclellanCompile(rdfa, cc, rm); @@ -697,7 +705,7 @@ buildSuffix(const ReportManager &rm, const SomSlotManager &ssm, } if (suff.dfa()) { - auto d = getDfa(*suff.dfa(), cc, rm); + auto d = getDfa(*suff.dfa(), false, cc, rm); assert(d); return d; } @@ -726,7 +734,7 @@ buildSuffix(const ReportManager &rm, const SomSlotManager &ssm, auto rdfa = buildMcClellan(holder, &rm, false, triggers.at(0), cc.grey); if (rdfa) { - auto d = getDfa(*rdfa, cc, rm); + auto d = getDfa(*rdfa, false, cc, rm); assert(d); if (cc.grey.roseMcClellanSuffix != 2) { n = pickImpl(move(d), move(n)); @@ -846,12 +854,12 @@ makeLeftNfa(const RoseBuildImpl &tbi, left_id &left, } if (left.dfa()) { - n = getDfa(*left.dfa(), cc, rm); + n = getDfa(*left.dfa(), is_transient, cc, rm); } else if (left.graph() && cc.grey.roseMcClellanPrefix == 2 && is_prefix && !is_transient) { auto rdfa = buildMcClellan(*left.graph(), nullptr, cc.grey); if (rdfa) { - n = getDfa(*rdfa, cc, rm); + n = getDfa(*rdfa, is_transient, cc, rm); assert(n); } } @@ -878,7 +886,7 @@ makeLeftNfa(const RoseBuildImpl &tbi, left_id &left, && (!n || !has_bounded_repeats_other_than_firsts(*n) || !is_fast(*n))) { auto rdfa = buildMcClellan(*left.graph(), nullptr, cc.grey); if (rdfa) { - auto d = getDfa(*rdfa, cc, rm); + auto d = getDfa(*rdfa, is_transient, cc, rm); assert(d); n = pickImpl(move(d), move(n)); } @@ -1614,7 +1622,7 @@ public: aligned_unique_ptr operator()(unique_ptr &rdfa) const { // Unleash the mighty DFA! - return getDfa(*rdfa, build.cc, build.rm); + return getDfa(*rdfa, false, build.cc, build.rm); } aligned_unique_ptr operator()(unique_ptr &haig) const { @@ -1642,7 +1650,7 @@ public: !has_bounded_repeats_other_than_firsts(*n)) { auto rdfa = buildMcClellan(h, &rm, cc.grey); if (rdfa) { - auto d = getDfa(*rdfa, cc, rm); + auto d = getDfa(*rdfa, false, cc, rm); if (d) { n = pickImpl(move(d), move(n)); } diff --git a/src/rose/rose_build_infix.cpp b/src/rose/rose_build_infix.cpp index f3e7680f..4bbb3525 100644 --- a/src/rose/rose_build_infix.cpp +++ b/src/rose/rose_build_infix.cpp @@ -278,7 +278,7 @@ void findCountingMiracleInfo(const left_id &left, const vector &stopTable, const NGHolder &g = *left.graph(); - auto cyclics = findVerticesInCycles(g); + auto cyclics = find_vertices_in_cycles(g); if (!proper_out_degree(g.startDs, g)) { cyclics.erase(g.startDs); diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index 50ca1d9e..28b885bd 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -1206,7 +1206,7 @@ u32 roseQuality(const RoseEngine *t) { } const NFA *nfa = (const NFA *)((const char *)atable + sizeof(*atable)); - if (nfa->type != MCCLELLAN_NFA_8) { + if (!isSmallDfaType(nfa->type)) { DEBUG_PRINTF("m16 atable engine\n"); return 0; } diff --git a/src/util/bitutils.h b/src/util/bitutils.h index b7a09ca7..d144e879 100644 --- a/src/util/bitutils.h +++ b/src/util/bitutils.h @@ -471,4 +471,55 @@ u32 rank_in_mask64(u64a mask, u32 bit) { return popcount64(mask); } +#if defined(__BMI2__) || (defined(_WIN32) && defined(__AVX2__)) +#define HAVE_PEXT +#endif + +static really_inline +u32 pext32(u32 x, u32 mask) { +#if defined(HAVE_PEXT) + // Intel BMI2 can do this operation in one instruction. + return _pext_u32(x, mask); +#else + + u32 result = 0, num = 1; + while (mask != 0) { + u32 bit = findAndClearLSB_32(&mask); + if (x & (1U << bit)) { + assert(num != 0); // more than 32 bits! + result |= num; + } + num <<= 1; + } + return result; +#endif +} + +static really_inline +u64a pext64(u64a x, u64a mask) { +#if defined(HAVE_PEXT) && defined(ARCH_64_BIT) + // Intel BMI2 can do this operation in one instruction. + return _pext_u64(x, mask); +#else + + u32 result = 0, num = 1; + while (mask != 0) { + u32 bit = findAndClearLSB_64(&mask); + if (x & (1ULL << bit)) { + assert(num != 0); // more than 32 bits! + result |= num; + } + num <<= 1; + } + return result; +#endif +} + +#if defined(HAVE_PEXT) && defined(ARCH_64_BIT) +static really_inline +u64a pdep64(u64a x, u64a mask) { + return _pdep_u64(x, mask); +} +#endif + #endif // BITUTILS_H diff --git a/src/util/graph.h b/src/util/graph.h index ae7c2c90..4c2876f1 100644 --- a/src/util/graph.h +++ b/src/util/graph.h @@ -39,8 +39,12 @@ #include "util/ue2_containers.h" #include +#include +#include #include +#include +#include #include #include @@ -140,6 +144,41 @@ void find_unreachable(const Graph &g, const SourceCont &sources, OutCont *out) { } } +template +ue2::flat_set +find_vertices_in_cycles(const Graph &g) { + using vertex_descriptor = typename Graph::vertex_descriptor; + + std::map comp_map; + + boost::strong_components(g, boost::make_assoc_property_map(comp_map)); + + std::map> comps; + + for (const auto &e : comp_map) { + comps[e.second].push_back(e.first); + } + + ue2::flat_set rv; + + for (const auto &comp : comps | boost::adaptors::map_values) { + /* every vertex in a strongly connected component is reachable from + * every other vertex in the component. A vertex is involved in a cycle + * therefore if it is in a strongly connected component with more than + * one vertex or if it is the only vertex and it has a self loop. */ + assert(!comp.empty()); + if (comp.size() > 1) { + insert(&rv, comp); + } + vertex_descriptor v = *comp.begin(); + if (hasSelfLoop(v, g)) { + rv.insert(v); + } + } + + return rv; +} + template bool has_parallel_edge(const Graph &g) { using vertex_descriptor = typename Graph::vertex_descriptor; diff --git a/src/util/simd_utils.h b/src/util/simd_utils.h index 35e1a390..e8676249 100644 --- a/src/util/simd_utils.h +++ b/src/util/simd_utils.h @@ -159,6 +159,10 @@ static really_inline m128 set16x8(u8 c) { return _mm_set1_epi8(c); } +static really_inline m128 set4x32(u32 c) { + return _mm_set1_epi32(c); +} + static really_inline u32 movd(const m128 in) { return _mm_cvtsi128_si32(in); } @@ -328,6 +332,25 @@ m128 variable_byte_shift_m128(m128 in, s32 amount) { return pshufb(in, shift_mask); } +static really_inline +m128 max_u8_m128(m128 a, m128 b) { + return _mm_max_epu8(a, b); +} + +static really_inline +m128 min_u8_m128(m128 a, m128 b) { + return _mm_min_epu8(a, b); +} + +static really_inline +m128 sadd_u8_m128(m128 a, m128 b) { + return _mm_adds_epu8(a, b); +} + +static really_inline +m128 sub_u8_m128(m128 a, m128 b) { + return _mm_sub_epi8(a, b); +} /**** **** 256-bit Primitives diff --git a/unit/internal/bitutils.cpp b/unit/internal/bitutils.cpp index 4d476932..31aaf17f 100644 --- a/unit/internal/bitutils.cpp +++ b/unit/internal/bitutils.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -436,3 +436,16 @@ TEST(BitUtils, rank_in_mask64) { ASSERT_EQ(15, rank_in_mask64(0xf0f0f0f0f0f0f0f0ULL, 31)); ASSERT_EQ(31, rank_in_mask64(0xf0f0f0f0f0f0f0f0ULL, 63)); } + +#if defined(HAVE_PEXT) && defined(ARCH_64_BIT) +TEST(BitUtils, pdep64) { + u64a data = 0xF123456789ABCDEF; + ASSERT_EQ(0xfULL, pdep64(data, 0xf)); + ASSERT_EQ(0xefULL, pdep64(data, 0xff)); + ASSERT_EQ(0xf0ULL, pdep64(data, 0xf0)); + ASSERT_EQ(0xfULL, pdep64(data, 0xf)); + ASSERT_EQ(0xef0ULL, pdep64(data, 0xff0)); + ASSERT_EQ(0xef00ULL, pdep64(data, 0xff00)); + ASSERT_EQ(0xd0e0f00ULL, pdep64(data, 0xf0f0f00)); +} +#endif diff --git a/unit/internal/nfagraph_util.cpp b/unit/internal/nfagraph_util.cpp index 135276dd..b6952f5a 100644 --- a/unit/internal/nfagraph_util.cpp +++ b/unit/internal/nfagraph_util.cpp @@ -320,9 +320,9 @@ TEST(NFAGraph, cyclicVerts1) { add_edge(a, b, g); add_edge(b, a, g); - auto cyclics = findVerticesInCycles(g); + auto cyclics = find_vertices_in_cycles(g); - ASSERT_EQ(set({g.startDs, a, b}), cyclics); + ASSERT_EQ(flat_set({g.startDs, a, b}), cyclics); } TEST(NFAGraph, cyclicVerts2) { @@ -341,9 +341,9 @@ TEST(NFAGraph, cyclicVerts2) { add_edge(c, d, g); add_edge(a, e, g); - auto cyclics = findVerticesInCycles(g); + auto cyclics = find_vertices_in_cycles(g); - ASSERT_EQ(set({g.startDs, a, b, c}), cyclics); + ASSERT_EQ(flat_set({g.startDs, a, b, c}), cyclics); } TEST(NFAGraph, cyclicVerts3) { @@ -369,9 +369,9 @@ TEST(NFAGraph, cyclicVerts3) { add_edge(f, h, g); add_edge(h, h, g); - auto cyclics = findVerticesInCycles(g); + auto cyclics = find_vertices_in_cycles(g); - ASSERT_EQ(set({g.startDs, a, b, c, d, e, h}), cyclics); + ASSERT_EQ(flat_set({g.startDs, a, b, c, d, e, h}), cyclics); } TEST(NFAGraph, cyclicVerts4) { @@ -396,9 +396,9 @@ TEST(NFAGraph, cyclicVerts4) { add_edge(e, f, g); add_edge(f, h, g); - auto cyclics = findVerticesInCycles(g); + auto cyclics = find_vertices_in_cycles(g); - ASSERT_EQ(set({g.startDs, a, b, c, d, e}), cyclics); + ASSERT_EQ(flat_set({g.startDs, a, b, c, d, e}), cyclics); } TEST(NFAGraph, cyclicVerts5) { @@ -418,7 +418,7 @@ TEST(NFAGraph, cyclicVerts5) { add_edge(c, d, g); add_edge(e, c, g); - auto cyclics = findVerticesInCycles(g); + auto cyclics = find_vertices_in_cycles(g); - ASSERT_EQ(set({g.startDs, b, c}), cyclics); + ASSERT_EQ(flat_set({g.startDs, b, c}), cyclics); } diff --git a/unit/internal/shuffle.cpp b/unit/internal/shuffle.cpp index 614b641d..a4632c36 100644 --- a/unit/internal/shuffle.cpp +++ b/unit/internal/shuffle.cpp @@ -54,14 +54,14 @@ TEST(Shuffle, PackedExtract32_1) { for (unsigned int i = 0; i < 32; i++) { // shuffle a single 1 bit to the front u32 mask = 1U << i; - EXPECT_EQ(1U, packedExtract32(mask, mask)); - EXPECT_EQ(1U, packedExtract32(~0U, mask)); + EXPECT_EQ(1U, pext32(mask, mask)); + EXPECT_EQ(1U, pext32(~0U, mask)); // we should get zero out of these cases - EXPECT_EQ(0U, packedExtract32(0, mask)); - EXPECT_EQ(0U, packedExtract32(~mask, mask)); + EXPECT_EQ(0U, pext32(0, mask)); + EXPECT_EQ(0U, pext32(~mask, mask)); // we should get zero out of all the other bit positions for (unsigned int j = 0; (j != i && j < 32); j++) { - EXPECT_EQ(0U, packedExtract32((1U << j), mask)); + EXPECT_EQ(0U, pext32((1U << j), mask)); } } } @@ -69,10 +69,10 @@ TEST(Shuffle, PackedExtract32_1) { TEST(Shuffle, PackedExtract32_2) { // All 32 bits in mask are on u32 mask = ~0U; - EXPECT_EQ(0U, packedExtract32(0, mask)); - EXPECT_EQ(mask, packedExtract32(mask, mask)); + EXPECT_EQ(0U, pext32(0, mask)); + EXPECT_EQ(mask, pext32(mask, mask)); for (unsigned int i = 0; i < 32; i++) { - EXPECT_EQ(1U << i, packedExtract32(1U << i, mask)); + EXPECT_EQ(1U << i, pext32(1U << i, mask)); } } @@ -84,16 +84,16 @@ TEST(Shuffle, PackedExtract32_3) { } // Test both cases (all even bits, all odd bits) - EXPECT_EQ((1U << 16) - 1, packedExtract32(mask, mask)); - EXPECT_EQ((1U << 16) - 1, packedExtract32(~mask, ~mask)); - EXPECT_EQ(0U, packedExtract32(~mask, mask)); - EXPECT_EQ(0U, packedExtract32(mask, ~mask)); + EXPECT_EQ((1U << 16) - 1, pext32(mask, mask)); + EXPECT_EQ((1U << 16) - 1, pext32(~mask, ~mask)); + EXPECT_EQ(0U, pext32(~mask, mask)); + EXPECT_EQ(0U, pext32(mask, ~mask)); for (unsigned int i = 0; i < 32; i += 2) { - EXPECT_EQ(1U << (i/2), packedExtract32(1U << i, mask)); - EXPECT_EQ(0U, packedExtract32(1U << i, ~mask)); - EXPECT_EQ(1U << (i/2), packedExtract32(1U << (i+1), ~mask)); - EXPECT_EQ(0U, packedExtract32(1U << (i+1), mask)); + EXPECT_EQ(1U << (i/2), pext32(1U << i, mask)); + EXPECT_EQ(0U, pext32(1U << i, ~mask)); + EXPECT_EQ(1U << (i/2), pext32(1U << (i+1), ~mask)); + EXPECT_EQ(0U, pext32(1U << (i+1), mask)); } } @@ -102,14 +102,14 @@ TEST(Shuffle, PackedExtract64_1) { for (unsigned int i = 0; i < 64; i++) { // shuffle a single 1 bit to the front u64a mask = 1ULL << i; - EXPECT_EQ(1U, packedExtract64(mask, mask)); - EXPECT_EQ(1U, packedExtract64(~0ULL, mask)); + EXPECT_EQ(1U, pext64(mask, mask)); + EXPECT_EQ(1U, pext64(~0ULL, mask)); // we should get zero out of these cases - EXPECT_EQ(0U, packedExtract64(0, mask)); - EXPECT_EQ(0U, packedExtract64(~mask, mask)); + EXPECT_EQ(0U, pext64(0, mask)); + EXPECT_EQ(0U, pext64(~mask, mask)); // we should get zero out of all the other bit positions for (unsigned int j = 0; (j != i && j < 64); j++) { - EXPECT_EQ(0U, packedExtract64((1ULL << j), mask)); + EXPECT_EQ(0U, pext64((1ULL << j), mask)); } } } @@ -117,26 +117,26 @@ TEST(Shuffle, PackedExtract64_1) { TEST(Shuffle, PackedExtract64_2) { // Fill first half of mask u64a mask = 0x00000000ffffffffULL; - EXPECT_EQ(0U, packedExtract64(0, mask)); - EXPECT_EQ(0xffffffffU, packedExtract64(mask, mask)); + EXPECT_EQ(0U, pext64(0, mask)); + EXPECT_EQ(0xffffffffU, pext64(mask, mask)); for (unsigned int i = 0; i < 32; i++) { - EXPECT_EQ(1U << i, packedExtract64(1ULL << i, mask)); + EXPECT_EQ(1U << i, pext64(1ULL << i, mask)); } // Fill second half of mask mask = 0xffffffff00000000ULL; - EXPECT_EQ(0U, packedExtract64(0, mask)); - EXPECT_EQ(0xffffffffU, packedExtract64(mask, mask)); + EXPECT_EQ(0U, pext64(0, mask)); + EXPECT_EQ(0xffffffffU, pext64(mask, mask)); for (unsigned int i = 32; i < 64; i++) { - EXPECT_EQ(1U << (i - 32), packedExtract64(1ULL << i, mask)); + EXPECT_EQ(1U << (i - 32), pext64(1ULL << i, mask)); } // Try one in the middle mask = 0x0000ffffffff0000ULL; - EXPECT_EQ(0U, packedExtract64(0, mask)); - EXPECT_EQ(0xffffffffU, packedExtract64(mask, mask)); + EXPECT_EQ(0U, pext64(0, mask)); + EXPECT_EQ(0xffffffffU, pext64(mask, mask)); for (unsigned int i = 16; i < 48; i++) { - EXPECT_EQ(1U << (i - 16), packedExtract64(1ULL << i, mask)); + EXPECT_EQ(1U << (i - 16), pext64(1ULL << i, mask)); } } @@ -148,16 +148,16 @@ TEST(Shuffle, PackedExtract64_3) { } // Test both cases (all even bits, all odd bits) - EXPECT_EQ(0xffffffffU, packedExtract64(mask, mask)); - EXPECT_EQ(0xffffffffU, packedExtract64(~mask, ~mask)); - EXPECT_EQ(0U, packedExtract64(~mask, mask)); - EXPECT_EQ(0U, packedExtract64(mask, ~mask)); + EXPECT_EQ(0xffffffffU, pext64(mask, mask)); + EXPECT_EQ(0xffffffffU, pext64(~mask, ~mask)); + EXPECT_EQ(0U, pext64(~mask, mask)); + EXPECT_EQ(0U, pext64(mask, ~mask)); for (unsigned int i = 0; i < 64; i += 2) { - EXPECT_EQ(1U << (i/2), packedExtract64(1ULL << i, mask)); - EXPECT_EQ(0U, packedExtract64(1ULL << i, ~mask)); - EXPECT_EQ(1U << (i/2), packedExtract64(1ULL << (i+1), ~mask)); - EXPECT_EQ(0U, packedExtract64(1ULL << (i+1), mask)); + EXPECT_EQ(1U << (i/2), pext64(1ULL << i, mask)); + EXPECT_EQ(0U, pext64(1ULL << i, ~mask)); + EXPECT_EQ(1U << (i/2), pext64(1ULL << (i+1), ~mask)); + EXPECT_EQ(0U, pext64(1ULL << (i+1), mask)); } } diff --git a/unit/internal/simd_utils.cpp b/unit/internal/simd_utils.cpp index 31d4b925..7b34d92e 100644 --- a/unit/internal/simd_utils.cpp +++ b/unit/internal/simd_utils.cpp @@ -614,6 +614,12 @@ TEST(SimdUtilsTest, set16x8) { } } +TEST(SimdUtilsTest, set4x32) { + u32 cmp[4] = { 0x12345678, 0x12345678, 0x12345678, 0x12345678 }; + m128 simd = set4x32(cmp[0]); + ASSERT_EQ(0, memcmp(cmp, &simd, sizeof(simd))); +} + #if defined(__AVX2__) TEST(SimdUtilsTest, set32x8) { char cmp[sizeof(m256)]; @@ -693,4 +699,50 @@ TEST(SimdUtilsTest, variableByteShift128) { EXPECT_TRUE(!diff128(zeroes128(), variable_byte_shift_m128(in, -16))); } +TEST(SimdUtilsTest, max_u8_m128) { + char base1[] = "0123456789ABCDE\xfe"; + char base2[] = "!!23455889aBCd\xff\xff"; + char expec[] = "0123456889aBCd\xff\xff"; + m128 in1 = loadu128(base1); + m128 in2 = loadu128(base2); + m128 result = max_u8_m128(in1, in2); + EXPECT_TRUE(!diff128(result, loadu128(expec))); +} + +TEST(SimdUtilsTest, min_u8_m128) { + char base1[] = "0123456789ABCDE\xfe"; + char base2[] = "!!23455889aBCd\xff\xff"; + char expec[] = "!!23455789ABCDE\xfe"; + m128 in1 = loadu128(base1); + m128 in2 = loadu128(base2); + m128 result = min_u8_m128(in1, in2); + EXPECT_TRUE(!diff128(result, loadu128(expec))); +} + +TEST(SimdUtilsTest, sadd_u8_m128) { + unsigned char base1[] = {0, 0x80, 0xff, 'A', '1', '2', '3', '4', + '1', '2', '3', '4', '1', '2', '3', '4'}; + unsigned char base2[] = {'a', 0x80, 'b', 'A', 0x10, 0x10, 0x10, 0x10, + 0x30, 0x30, 0x30, 0x30, 0, 0, 0, 0}; + unsigned char expec[] = {'a', 0xff, 0xff, 0x82, 'A', 'B', 'C', 'D', + 'a', 'b', 'c', 'd', '1', '2', '3', '4'}; + m128 in1 = loadu128(base1); + m128 in2 = loadu128(base2); + m128 result = sadd_u8_m128(in1, in2); + EXPECT_TRUE(!diff128(result, loadu128(expec))); +} + +TEST(SimdUtilsTest, sub_u8_m128) { + unsigned char base1[] = {'a', 0xff, 0xff, 0x82, 'A', 'B', 'C', 'D', + 'a', 'b', 'c', 'd', '1', '2', '3', '4'}; + unsigned char base2[] = {0, 0x80, 0xff, 'A', '1', '2', '3', '4', + '1', '2', '3', '4', '1', '2', '3', '4'}; + unsigned char expec[] = {'a', 0x7f, 0, 'A', 0x10, 0x10, 0x10, 0x10, + 0x30, 0x30, 0x30, 0x30, 0, 0, 0, 0}; + m128 in1 = loadu128(base1); + m128 in2 = loadu128(base2); + m128 result = sub_u8_m128(in1, in2); + EXPECT_TRUE(!diff128(result, loadu128(expec))); +} + } // namespace