diff --git a/CMakeLists.txt b/CMakeLists.txt index 1719ec2d..8834d4d6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -476,6 +476,12 @@ set (hs_exec_SRCS src/nfa/repeat.c src/nfa/repeat.h src/nfa/repeat_internal.h + src/nfa/sheng.c + src/nfa/sheng.h + src/nfa/sheng_defs.h + src/nfa/sheng_impl.h + src/nfa/sheng_impl4.h + src/nfa/sheng_internal.h src/nfa/shufti_common.h src/nfa/shufti.c src/nfa/shufti.h @@ -641,6 +647,9 @@ SET (hs_SRCS src/nfa/repeat_internal.h src/nfa/repeatcompile.cpp src/nfa/repeatcompile.h + src/nfa/sheng_internal.h + src/nfa/shengcompile.cpp + src/nfa/shengcompile.h src/nfa/shufticompile.cpp src/nfa/shufticompile.h src/nfa/tamaramacompile.cpp @@ -927,6 +936,8 @@ set(hs_dump_SRCS src/nfa/nfa_dump_dispatch.cpp src/nfa/nfa_dump_internal.cpp src/nfa/nfa_dump_internal.h + src/nfa/shengdump.cpp + src/nfa/shengdump.h src/nfa/tamarama_dump.cpp src/nfa/tamarama_dump.h src/parser/dump.cpp diff --git a/src/grey.cpp b/src/grey.cpp index bba5198a..bad56b56 100644 --- a/src/grey.cpp +++ b/src/grey.cpp @@ -50,6 +50,7 @@ Grey::Grey(void) : allowLitHaig(true), allowLbr(true), allowMcClellan(true), + allowSheng(true), allowPuff(true), allowLiteral(true), allowRose(true), @@ -127,6 +128,7 @@ Grey::Grey(void) : equivalenceEnable(true), allowSmallWrite(true), // McClellan dfas for small patterns + allowSmallWriteSheng(false), // allow use of Sheng for SMWR smallWriteLargestBuffer(70), // largest buffer that can be // considered a small write @@ -214,6 +216,7 @@ void applyGreyOverrides(Grey *g, const string &s) { G_UPDATE(allowLitHaig); G_UPDATE(allowLbr); G_UPDATE(allowMcClellan); + G_UPDATE(allowSheng); G_UPDATE(allowPuff); G_UPDATE(allowLiteral); G_UPDATE(allowRose); @@ -290,6 +293,7 @@ void applyGreyOverrides(Grey *g, const string &s) { G_UPDATE(miracleHistoryBonus); G_UPDATE(equivalenceEnable); G_UPDATE(allowSmallWrite); + G_UPDATE(allowSmallWriteSheng); G_UPDATE(smallWriteLargestBuffer); G_UPDATE(smallWriteLargestBufferBad); G_UPDATE(limitSmallWriteOutfixSize); diff --git a/src/grey.h b/src/grey.h index 1714a0eb..90f5f826 100644 --- a/src/grey.h +++ b/src/grey.h @@ -50,6 +50,7 @@ struct Grey { bool allowLitHaig; bool allowLbr; bool allowMcClellan; + bool allowSheng; bool allowPuff; bool allowLiteral; bool allowRose; @@ -149,6 +150,7 @@ struct Grey { // SmallWrite engine bool allowSmallWrite; + bool allowSmallWriteSheng; u32 smallWriteLargestBuffer; // largest buffer that can be small write u32 smallWriteLargestBufferBad;// largest buffer that can be small write u32 limitSmallWriteOutfixSize; //!< max total size of outfix DFAs diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index c26fb904..09006d5b 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -1009,7 +1009,7 @@ u32 mcclellanStartReachSize(const raw_dfa *raw) { return out.count(); } -bool has_accel_dfa(const NFA *nfa) { +bool has_accel_mcclellan(const NFA *nfa) { const mcclellan *m = (const mcclellan *)getImplNfa(nfa); return m->has_accel; } diff --git a/src/nfa/mcclellancompile.h b/src/nfa/mcclellancompile.h index e5e56cc8..e6f548a7 100644 --- a/src/nfa/mcclellancompile.h +++ b/src/nfa/mcclellancompile.h @@ -87,7 +87,7 @@ u32 mcclellanStartReachSize(const raw_dfa *raw); std::set all_reports(const raw_dfa &rdfa); -bool has_accel_dfa(const NFA *nfa); +bool has_accel_mcclellan(const NFA *nfa); } // namespace ue2 diff --git a/src/nfa/nfa_api.h b/src/nfa/nfa_api.h index 9e0b6f89..e3f7f743 100644 --- a/src/nfa/nfa_api.h +++ b/src/nfa/nfa_api.h @@ -127,6 +127,9 @@ char nfaQueueExec(const struct NFA *nfa, struct mq *q, s64a end); */ char nfaQueueExec_raw(const struct NFA *nfa, struct mq *q, s64a end); +/** Return value indicating that the engine is dead. */ +#define MO_DEAD 0 + /** Return value indicating that the engine is alive. */ #define MO_ALIVE 1 diff --git a/src/nfa/nfa_api_dispatch.c b/src/nfa/nfa_api_dispatch.c index 789c3014..c67103b3 100644 --- a/src/nfa/nfa_api_dispatch.c +++ b/src/nfa/nfa_api_dispatch.c @@ -42,6 +42,7 @@ #include "limex.h" #include "mcclellan.h" #include "mpv.h" +#include "sheng.h" #include "tamarama.h" #define DISPATCH_CASE(dc_ltype, dc_ftype, dc_subtype, dc_func_call) \ @@ -69,6 +70,7 @@ DISPATCH_CASE(LBR, Lbr, Shuf, dbnt_func); \ DISPATCH_CASE(LBR, Lbr, Truf, dbnt_func); \ DISPATCH_CASE(CASTLE, Castle, 0, dbnt_func); \ + DISPATCH_CASE(SHENG, Sheng, 0, dbnt_func); \ DISPATCH_CASE(TAMARAMA, Tamarama, 0, dbnt_func); \ default: \ assert(0); \ diff --git a/src/nfa/nfa_build_util.cpp b/src/nfa/nfa_build_util.cpp index ce473196..93376b01 100644 --- a/src/nfa/nfa_build_util.cpp +++ b/src/nfa/nfa_build_util.cpp @@ -30,6 +30,7 @@ #include "limex_internal.h" #include "mcclellancompile.h" +#include "shengcompile.h" #include "nfa_internal.h" #include "repeat_internal.h" #include "ue2common.h" @@ -213,7 +214,7 @@ template<> struct NFATraits { static const nfa_dispatch_fn has_repeats; static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const nfa_dispatch_fn NFATraits::has_accel = has_accel_dfa; +const nfa_dispatch_fn NFATraits::has_accel = has_accel_mcclellan; const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) @@ -229,7 +230,7 @@ template<> struct NFATraits { static const nfa_dispatch_fn has_repeats; static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const nfa_dispatch_fn NFATraits::has_accel = has_accel_dfa; +const nfa_dispatch_fn NFATraits::has_accel = has_accel_mcclellan; const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) @@ -245,7 +246,7 @@ template<> struct NFATraits { static const nfa_dispatch_fn has_repeats; static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const nfa_dispatch_fn NFATraits::has_accel = has_accel_dfa; +const nfa_dispatch_fn NFATraits::has_accel = has_accel_mcclellan; const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) @@ -261,7 +262,7 @@ template<> struct NFATraits { static const nfa_dispatch_fn has_repeats; static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const nfa_dispatch_fn NFATraits::has_accel = has_accel_dfa; +const nfa_dispatch_fn NFATraits::has_accel = has_accel_mcclellan; const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) @@ -380,6 +381,22 @@ const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = d const char *NFATraits::name = "Lim Bounded Repeat (M)"; #endif +template<> struct NFATraits { + UNUSED static const char *name; + static const NFACategory category = NFA_OTHER; + static const u32 stateAlign = 1; + static const bool fast = true; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; +}; +const nfa_dispatch_fn NFATraits::has_accel = has_accel_sheng; +const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; +#if defined(DUMP_SUPPORT) +const char *NFATraits::name = "Sheng"; +#endif + template<> struct NFATraits { UNUSED static const char *name; static const NFACategory category = NFA_OTHER; diff --git a/src/nfa/nfa_dump_dispatch.cpp b/src/nfa/nfa_dump_dispatch.cpp index cf2aa7f5..388ac003 100644 --- a/src/nfa/nfa_dump_dispatch.cpp +++ b/src/nfa/nfa_dump_dispatch.cpp @@ -40,6 +40,7 @@ #include "limex.h" #include "mcclellandump.h" #include "mpv_dump.h" +#include "shengdump.h" #include "tamarama_dump.h" #ifndef DUMP_SUPPORT @@ -74,6 +75,7 @@ namespace ue2 { DISPATCH_CASE(LBR, Lbr, Shuf, dbnt_func); \ DISPATCH_CASE(LBR, Lbr, Truf, dbnt_func); \ DISPATCH_CASE(CASTLE, Castle, 0, dbnt_func); \ + DISPATCH_CASE(SHENG, Sheng, 0, dbnt_func); \ DISPATCH_CASE(TAMARAMA, Tamarama, 0, dbnt_func); \ default: \ assert(0); \ diff --git a/src/nfa/nfa_internal.h b/src/nfa/nfa_internal.h index a3703cb5..41fee73e 100644 --- a/src/nfa/nfa_internal.h +++ b/src/nfa/nfa_internal.h @@ -67,6 +67,7 @@ enum NFAEngineType { LBR_NFA_Shuf, /**< magic pseudo nfa */ LBR_NFA_Truf, /**< magic pseudo nfa */ CASTLE_NFA_0, /**< magic pseudo nfa */ + SHENG_NFA_0, /**< magic pseudo nfa */ TAMARAMA_NFA_0, /**< magic nfa container */ /** \brief bogus NFA - not used */ INVALID_NFA @@ -146,10 +147,17 @@ static really_inline int isGoughType(u8 t) { return t == GOUGH_NFA_8 || t == GOUGH_NFA_16; } -/** \brief True if the given type (from NFA::type) is a McClellan or Gough DFA. - * */ +/** \brief True if the given type (from NFA::type) is a Sheng DFA. */ +static really_inline int isShengType(u8 t) { + return t == SHENG_NFA_0; +} + +/** + * \brief True if the given type (from NFA::type) is a McClellan, Gough or + * Sheng DFA. + */ static really_inline int isDfaType(u8 t) { - return isMcClellanType(t) || isGoughType(t); + return isMcClellanType(t) || isGoughType(t) || isShengType(t); } /** \brief True if the given type (from NFA::type) is an NFA. */ diff --git a/src/nfa/sheng.c b/src/nfa/sheng.c new file mode 100644 index 00000000..bbbf1f20 --- /dev/null +++ b/src/nfa/sheng.c @@ -0,0 +1,676 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "sheng.h" + +#include "accel.h" +#include "sheng_internal.h" +#include "nfa_api.h" +#include "nfa_api_queue.h" +#include "nfa_internal.h" +#include "util/bitutils.h" +#include "util/compare.h" +#include "util/join.h" +#include "util/simd_utils.h" + +enum MatchMode { + CALLBACK_OUTPUT, + STOP_AT_MATCH, + NO_MATCHES +}; + +static really_inline +const struct sheng *get_sheng(const struct NFA *n) { + return (const struct sheng *)getImplNfa(n); +} + +static really_inline +const struct sstate_aux *get_aux(const struct sheng *sh, u8 id) { + u32 offset = sh->aux_offset - sizeof(struct NFA) + + (id & SHENG_STATE_MASK) * sizeof(struct sstate_aux); + DEBUG_PRINTF("Getting aux for state %u at offset %llu\n", + id & SHENG_STATE_MASK, (u64a)offset + sizeof(struct NFA)); + return (const struct sstate_aux *)((const char *) sh + offset); +} + +static really_inline +const union AccelAux *get_accel(const struct sheng *sh, u8 id) { + const struct sstate_aux *saux = get_aux(sh, id); + DEBUG_PRINTF("Getting accel aux at offset %u\n", saux->accel); + const union AccelAux *aux = (const union AccelAux *) + ((const char *)sh + saux->accel - sizeof(struct NFA)); + return aux; +} + +static really_inline +const struct report_list *get_rl(const struct sheng *sh, + const struct sstate_aux *aux) { + DEBUG_PRINTF("Getting report list at offset %u\n", aux->accept); + return (const struct report_list *) + ((const char *)sh + aux->accept - sizeof(struct NFA)); +} + +static really_inline +const struct report_list *get_eod_rl(const struct sheng *sh, + const struct sstate_aux *aux) { + DEBUG_PRINTF("Getting EOD report list at offset %u\n", aux->accept); + return (const struct report_list *) + ((const char *)sh + aux->accept_eod - sizeof(struct NFA)); +} + +static really_inline +char shengHasAccept(const struct sheng *sh, const struct sstate_aux *aux, + ReportID report) { + assert(sh && aux); + + const struct report_list *rl = get_rl(sh, aux); + assert(ISALIGNED_N(rl, 4)); + + DEBUG_PRINTF("report list has %u entries\n", rl->count); + + for (u32 i = 0; i < rl->count; i++) { + if (rl->report[i] == report) { + DEBUG_PRINTF("reporting %u\n", rl->report[i]); + return 1; + } + } + + return 0; +} + +static really_inline +char fireSingleReport(NfaCallback cb, void *ctxt, ReportID r, u64a loc) { + DEBUG_PRINTF("reporting %u\n", r); + if (cb(0, loc, r, ctxt) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; /* termination requested */ + } + return MO_CONTINUE_MATCHING; /* continue execution */ +} + +static really_inline +char fireReports(const struct sheng *sh, NfaCallback cb, void *ctxt, + const u8 state, u64a loc, u8 *const cached_accept_state, + ReportID *const cached_accept_id, char eod) { + DEBUG_PRINTF("reporting matches @ %llu\n", loc); + + if (!eod && state == *cached_accept_state) { + DEBUG_PRINTF("reporting %u\n", *cached_accept_id); + if (cb(0, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; /* termination requested */ + } + + return MO_CONTINUE_MATCHING; /* continue execution */ + } + const struct sstate_aux *aux = get_aux(sh, state); + const struct report_list *rl = eod ? get_eod_rl(sh, aux) : get_rl(sh, aux); + assert(ISALIGNED(rl)); + + DEBUG_PRINTF("report list has %u entries\n", rl->count); + u32 count = rl->count; + + if (!eod && count == 1) { + *cached_accept_state = state; + *cached_accept_id = rl->report[0]; + + DEBUG_PRINTF("reporting %u\n", rl->report[0]); + if (cb(0, loc, rl->report[0], ctxt) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; /* termination requested */ + } + + return MO_CONTINUE_MATCHING; /* continue execution */ + } + + for (u32 i = 0; i < count; i++) { + DEBUG_PRINTF("reporting %u\n", rl->report[i]); + if (cb(0, loc, rl->report[i], ctxt) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; /* termination requested */ + } + } + return MO_CONTINUE_MATCHING; /* continue execution */ +} + +/* include Sheng function definitions */ +#include "sheng_defs.h" + +static really_inline +char runShengCb(const struct sheng *sh, NfaCallback cb, void *ctxt, u64a offset, + u8 *const cached_accept_state, ReportID *const cached_accept_id, + const u8 *cur_buf, const u8 *start, const u8 *end, u8 can_die, + u8 has_accel, u8 single, const u8 **scanned, u8 *state) { + DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in callback mode\n", + (u64a)(end - start), offset); + DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf), + (s64a)(end - cur_buf)); + DEBUG_PRINTF("can die: %u has accel: %u single: %u\n", !!can_die, + !!has_accel, !!single); + int rv; + /* scan and report all matches */ + if (can_die) { + if (has_accel) { + rv = sheng4_coda(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, start, + end, scanned); + } else { + rv = sheng4_cod(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, start, + end, scanned); + } + if (rv == MO_HALT_MATCHING) { + return MO_DEAD; + } + rv = sheng_cod(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, *scanned, end, + scanned); + } else { + if (has_accel) { + rv = sheng4_coa(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, start, + end, scanned); + } else { + rv = sheng4_co(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, start, + end, scanned); + } + if (rv == MO_HALT_MATCHING) { + return MO_DEAD; + } + rv = sheng_co(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, *scanned, end, + scanned); + } + if (rv == MO_HALT_MATCHING) { + return MO_DEAD; + } + return MO_ALIVE; +} + +static really_inline +void runShengNm(const struct sheng *sh, NfaCallback cb, void *ctxt, u64a offset, + u8 *const cached_accept_state, ReportID *const cached_accept_id, + const u8 *cur_buf, const u8 *start, const u8 *end, u8 can_die, + u8 has_accel, u8 single, const u8 **scanned, u8 *state) { + DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in nomatch mode\n", + (u64a)(end - start), offset); + DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf), + (s64a)(end - cur_buf)); + DEBUG_PRINTF("can die: %u has accel: %u single: %u\n", !!can_die, + !!has_accel, !!single); + /* just scan the buffer */ + if (can_die) { + if (has_accel) { + sheng4_nmda(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, start, end, + scanned); + } else { + sheng4_nmd(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, start, end, + scanned); + } + sheng_nmd(state, cb, ctxt, sh, cached_accept_state, cached_accept_id, + single, offset, cur_buf, *scanned, end, scanned); + } else { + sheng4_nm(state, cb, ctxt, sh, cached_accept_state, cached_accept_id, + single, offset, cur_buf, start, end, scanned); + sheng_nm(state, cb, ctxt, sh, cached_accept_state, cached_accept_id, + single, offset, cur_buf, *scanned, end, scanned); + } +} + +static really_inline +char runShengSam(const struct sheng *sh, NfaCallback cb, void *ctxt, + u64a offset, u8 *const cached_accept_state, + ReportID *const cached_accept_id, const u8 *cur_buf, + const u8 *start, const u8 *end, u8 can_die, u8 has_accel, + u8 single, const u8 **scanned, u8 *state) { + DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in stop at match mode\n", + (u64a)(end - start), offset); + DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf), + (s64a)(end - cur_buf)); + DEBUG_PRINTF("can die: %u has accel: %u single: %u\n", !!can_die, + !!has_accel, !!single); + int rv; + /* scan until first match */ + if (can_die) { + if (has_accel) { + rv = sheng4_samda(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, start, + end, scanned); + } else { + rv = sheng4_samd(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, start, + end, scanned); + } + if (rv == MO_HALT_MATCHING) { + return MO_DEAD; + } + /* if we stopped before we expected, we found a match */ + if (rv == MO_MATCHES_PENDING) { + return MO_MATCHES_PENDING; + } + + rv = sheng_samd(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, *scanned, + end, scanned); + } else { + if (has_accel) { + rv = sheng4_sama(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, start, + end, scanned); + } else { + rv = sheng4_sam(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, start, + end, scanned); + } + if (rv == MO_HALT_MATCHING) { + return MO_DEAD; + } + /* if we stopped before we expected, we found a match */ + if (rv == MO_MATCHES_PENDING) { + return MO_MATCHES_PENDING; + } + + rv = sheng_sam(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, *scanned, end, + scanned); + } + if (rv == MO_HALT_MATCHING) { + return MO_DEAD; + } + /* if we stopped before we expected, we found a match */ + if (rv == MO_MATCHES_PENDING) { + return MO_MATCHES_PENDING; + } + return MO_ALIVE; +} + +static never_inline +char runSheng(const struct sheng *sh, struct mq *q, s64a b_end, + enum MatchMode mode) { + u8 state = *(u8 *)q->state; + u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE; + u8 has_accel = sh->flags & SHENG_FLAG_HAS_ACCEL; + u8 single = sh->flags & SHENG_FLAG_SINGLE_REPORT; + + u8 cached_accept_state = 0; + ReportID cached_accept_id = 0; + + DEBUG_PRINTF("starting Sheng execution in state %u\n", + state & SHENG_STATE_MASK); + + if (q->report_current) { + DEBUG_PRINTF("reporting current pending matches\n"); + assert(sh); + + q->report_current = 0; + + int rv; + if (single) { + rv = fireSingleReport(q->cb, q->context, sh->report, + q_cur_offset(q)); + } else { + rv = fireReports(sh, q->cb, q->context, state, q_cur_offset(q), + &cached_accept_state, &cached_accept_id, 0); + } + if (rv == MO_HALT_MATCHING) { + DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK); + return MO_DEAD; + } + + DEBUG_PRINTF("proceeding with matching\n"); + } + + assert(q_cur_type(q) == MQE_START); + s64a start = q_cur_loc(q); + + DEBUG_PRINTF("offset: %lli, location: %lli, mode: %s\n", q->offset, start, + mode == CALLBACK_OUTPUT ? "CALLBACK OUTPUT" : + mode == NO_MATCHES ? "NO MATCHES" : + mode == STOP_AT_MATCH ? "STOP AT MATCH" : "???"); + + DEBUG_PRINTF("processing event @ %lli: %s\n", q->offset + q_cur_loc(q), + q_cur_type(q) == MQE_START ? "START" : + q_cur_type(q) == MQE_TOP ? "TOP" : + q_cur_type(q) == MQE_END ? "END" : "???"); + + const u8* cur_buf; + if (start < 0) { + DEBUG_PRINTF("negative location, scanning history\n"); + DEBUG_PRINTF("min location: %zd\n", -q->hlength); + cur_buf = q->history + q->hlength; + } else { + DEBUG_PRINTF("positive location, scanning buffer\n"); + DEBUG_PRINTF("max location: %lli\n", b_end); + cur_buf = q->buffer; + } + + /* if we our queue event is past our end */ + if (mode != NO_MATCHES && q_cur_loc(q) > b_end) { + DEBUG_PRINTF("current location past buffer end\n"); + DEBUG_PRINTF("setting q location to %llu\n", b_end); + DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK); + q->items[q->cur].location = b_end; + return MO_ALIVE; + } + + q->cur++; + + s64a cur_start = start; + + while (1) { + DEBUG_PRINTF("processing event @ %lli: %s\n", q->offset + q_cur_loc(q), + q_cur_type(q) == MQE_START ? "START" : + q_cur_type(q) == MQE_TOP ? "TOP" : + q_cur_type(q) == MQE_END ? "END" : "???"); + s64a end = q_cur_loc(q); + if (mode != NO_MATCHES) { + end = MIN(end, b_end); + } + assert(end <= (s64a) q->length); + s64a cur_end = end; + + /* we may cross the border between history and current buffer */ + if (cur_start < 0) { + cur_end = MIN(0, cur_end); + } + + DEBUG_PRINTF("start: %lli end: %lli\n", start, end); + + /* don't scan zero length buffer */ + if (cur_start != cur_end) { + const u8 * scanned = cur_buf; + char rv; + + /* if we're in nomatch mode or if we're scanning history buffer */ + if (mode == NO_MATCHES || + (cur_start < 0 && mode == CALLBACK_OUTPUT)) { + runShengNm(sh, q->cb, q->context, q->offset, + &cached_accept_state, &cached_accept_id, cur_buf, + cur_buf + cur_start, cur_buf + cur_end, can_die, + has_accel, single, &scanned, &state); + } else if (mode == CALLBACK_OUTPUT) { + rv = runShengCb(sh, q->cb, q->context, q->offset, + &cached_accept_state, &cached_accept_id, + cur_buf, cur_buf + cur_start, cur_buf + cur_end, + can_die, has_accel, single, &scanned, &state); + if (rv == MO_DEAD) { + DEBUG_PRINTF("exiting in state %u\n", + state & SHENG_STATE_MASK); + return MO_DEAD; + } + } else if (mode == STOP_AT_MATCH) { + rv = runShengSam(sh, q->cb, q->context, q->offset, + &cached_accept_state, &cached_accept_id, + cur_buf, cur_buf + cur_start, + cur_buf + cur_end, can_die, has_accel, single, + &scanned, &state); + if (rv == MO_DEAD) { + DEBUG_PRINTF("exiting in state %u\n", + state & SHENG_STATE_MASK); + return rv; + } else if (rv == MO_MATCHES_PENDING) { + assert(q->cur); + DEBUG_PRINTF("found a match, setting q location to %zd\n", + scanned - cur_buf + 1); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = + scanned - cur_buf + 1; /* due to exiting early */ + *(u8 *)q->state = state; + DEBUG_PRINTF("exiting in state %u\n", + state & SHENG_STATE_MASK); + return rv; + } + } else { + assert(!"invalid scanning mode!"); + } + assert(scanned == cur_buf + cur_end); + + cur_start = cur_end; + } + + /* if we our queue event is past our end */ + if (mode != NO_MATCHES && q_cur_loc(q) > b_end) { + DEBUG_PRINTF("current location past buffer end\n"); + DEBUG_PRINTF("setting q location to %llu\n", b_end); + DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = b_end; + *(u8 *)q->state = state; + return MO_ALIVE; + } + + /* crossing over into actual buffer */ + if (cur_start == 0) { + DEBUG_PRINTF("positive location, scanning buffer\n"); + DEBUG_PRINTF("max offset: %lli\n", b_end); + cur_buf = q->buffer; + } + + /* continue scanning the same buffer */ + if (end != cur_end) { + continue; + } + + switch (q_cur_type(q)) { + case MQE_END: + *(u8 *)q->state = state; + q->cur++; + DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK); + if (can_die) { + return (state & SHENG_STATE_DEAD) ? MO_DEAD : MO_ALIVE; + } + return MO_ALIVE; + case MQE_TOP: + if (q->offset + cur_start == 0) { + DEBUG_PRINTF("Anchored start, going to state %u\n", + sh->anchored); + state = sh->anchored; + } else { + u8 new_state = get_aux(sh, state)->top; + DEBUG_PRINTF("Top event %u->%u\n", state & SHENG_STATE_MASK, + new_state & SHENG_STATE_MASK); + state = new_state; + } + break; + default: + assert(!"invalid queue event"); + break; + } + q->cur++; + } +} + +char nfaExecSheng0_B(const struct NFA *n, u64a offset, const u8 *buffer, + size_t length, NfaCallback cb, void *context) { + DEBUG_PRINTF("smallwrite Sheng\n"); + assert(n->type == SHENG_NFA_0); + const struct sheng *sh = getImplNfa(n); + u8 state = sh->anchored; + u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE; + u8 has_accel = sh->flags & SHENG_FLAG_HAS_ACCEL; + u8 single = sh->flags & SHENG_FLAG_SINGLE_REPORT; + u8 cached_accept_state = 0; + ReportID cached_accept_id = 0; + + /* scan and report all matches */ + int rv; + s64a end = length; + const u8 *scanned; + + rv = runShengCb(sh, cb, context, offset, &cached_accept_state, + &cached_accept_id, buffer, buffer, buffer + end, can_die, + has_accel, single, &scanned, &state); + if (rv == MO_DEAD) { + DEBUG_PRINTF("exiting in state %u\n", + state & SHENG_STATE_MASK); + return MO_DEAD; + } + + DEBUG_PRINTF("%u\n", state & SHENG_STATE_MASK); + + const struct sstate_aux *aux = get_aux(sh, state); + + if (aux->accept_eod) { + DEBUG_PRINTF("Reporting EOD matches\n"); + fireReports(sh, cb, context, state, end + offset, &cached_accept_state, + &cached_accept_id, 1); + } + + return state & SHENG_STATE_DEAD ? MO_DEAD : MO_ALIVE; +} + +char nfaExecSheng0_Q(const struct NFA *n, struct mq *q, s64a end) { + const struct sheng *sh = get_sheng(n); + char rv = runSheng(sh, q, end, CALLBACK_OUTPUT); + return rv; +} + +char nfaExecSheng0_Q2(const struct NFA *n, struct mq *q, s64a end) { + const struct sheng *sh = get_sheng(n); + char rv = runSheng(sh, q, end, STOP_AT_MATCH); + return rv; +} + +char nfaExecSheng0_QR(const struct NFA *n, struct mq *q, ReportID report) { + assert(q_cur_type(q) == MQE_START); + + const struct sheng *sh = get_sheng(n); + char rv = runSheng(sh, q, 0 /* end */, NO_MATCHES); + + if (rv && nfaExecSheng0_inAccept(n, report, q)) { + return MO_MATCHES_PENDING; + } + return rv; +} + +char nfaExecSheng0_inAccept(const struct NFA *n, ReportID report, + struct mq *q) { + assert(n && q); + + const struct sheng *sh = get_sheng(n); + u8 s = *(const u8 *)q->state; + DEBUG_PRINTF("checking accepts for %u\n", (u8)(s & SHENG_STATE_MASK)); + + const struct sstate_aux *aux = get_aux(sh, s); + + if (!aux->accept) { + return 0; + } + + return shengHasAccept(sh, aux, report); +} + +char nfaExecSheng0_inAnyAccept(const struct NFA *n, struct mq *q) { + assert(n && q); + + const struct sheng *sh = get_sheng(n); + u8 s = *(const u8 *)q->state; + DEBUG_PRINTF("checking accepts for %u\n", (u8)(s & SHENG_STATE_MASK)); + + const struct sstate_aux *aux = get_aux(sh, s); + return !!aux->accept; +} + +char nfaExecSheng0_testEOD(const struct NFA *nfa, const char *state, + UNUSED const char *streamState, u64a offset, + NfaCallback cb, void *ctxt) { + assert(nfa); + + const struct sheng *sh = get_sheng(nfa); + u8 s = *(const u8 *)state; + DEBUG_PRINTF("checking EOD accepts for %u\n", (u8)(s & SHENG_STATE_MASK)); + + const struct sstate_aux *aux = get_aux(sh, s); + + if (!aux->accept_eod) { + return MO_CONTINUE_MATCHING; + } + + return fireReports(sh, cb, ctxt, s, offset, NULL, NULL, 1); +} + +char nfaExecSheng0_reportCurrent(const struct NFA *n, struct mq *q) { + const struct sheng *sh = (const struct sheng *)getImplNfa(n); + NfaCallback cb = q->cb; + void *ctxt = q->context; + u8 s = *(u8 *)q->state; + const struct sstate_aux *aux = get_aux(sh, s); + u64a offset = q_cur_offset(q); + u8 cached_state_id = 0; + ReportID cached_report_id = 0; + assert(q_cur_type(q) == MQE_START); + + if (aux->accept) { + if (sh->flags & SHENG_FLAG_SINGLE_REPORT) { + fireSingleReport(cb, ctxt, sh->report, offset); + } else { + fireReports(sh, cb, ctxt, s, offset, &cached_state_id, + &cached_report_id, 1); + } + } + + return 0; +} + +char nfaExecSheng0_initCompressedState(const struct NFA *nfa, u64a offset, + void *state, UNUSED u8 key) { + const struct sheng *sh = get_sheng(nfa); + u8 *s = (u8 *)state; + *s = offset ? sh->floating: sh->anchored; + return !(*s & SHENG_STATE_DEAD); +} + +char nfaExecSheng0_queueInitState(const struct NFA *nfa, struct mq *q) { + assert(nfa->scratchStateSize == 1); + + /* starting in floating state */ + const struct sheng *sh = get_sheng(nfa); + *(u8 *)q->state = sh->floating; + DEBUG_PRINTF("starting in floating state\n"); + return 0; +} + +char nfaExecSheng0_queueCompressState(UNUSED const struct NFA *nfa, + const struct mq *q, UNUSED s64a loc) { + void *dest = q->streamState; + const void *src = q->state; + assert(nfa->scratchStateSize == 1); + assert(nfa->streamStateSize == 1); + *(u8 *)dest = *(const u8 *)src; + return 0; +} + +char nfaExecSheng0_expandState(UNUSED const struct NFA *nfa, void *dest, + const void *src, UNUSED u64a offset, + UNUSED u8 key) { + assert(nfa->scratchStateSize == 1); + assert(nfa->streamStateSize == 1); + *(u8 *)dest = *(const u8 *)src; + return 0; +} diff --git a/src/nfa/sheng.h b/src/nfa/sheng.h new file mode 100644 index 00000000..46ead180 --- /dev/null +++ b/src/nfa/sheng.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SHENG_H_ +#define SHENG_H_ + +#include "callback.h" +#include "ue2common.h" + +struct mq; +struct NFA; + +#define nfaExecSheng0_B_Reverse NFA_API_NO_IMPL +#define nfaExecSheng0_zombie_status NFA_API_ZOMBIE_NO_IMPL + +char nfaExecSheng0_Q(const struct NFA *n, struct mq *q, s64a end); +char nfaExecSheng0_Q2(const struct NFA *n, struct mq *q, s64a end); +char nfaExecSheng0_QR(const struct NFA *n, struct mq *q, ReportID report); +char nfaExecSheng0_inAccept(const struct NFA *n, ReportID report, struct mq *q); +char nfaExecSheng0_inAnyAccept(const struct NFA *n, struct mq *q); +char nfaExecSheng0_queueInitState(const struct NFA *nfa, struct mq *q); +char nfaExecSheng0_queueCompressState(const struct NFA *nfa, const struct mq *q, + s64a loc); +char nfaExecSheng0_expandState(const struct NFA *nfa, void *dest, + const void *src, u64a offset, u8 key); +char nfaExecSheng0_initCompressedState(const struct NFA *nfa, u64a offset, + void *state, u8 key); +char nfaExecSheng0_testEOD(const struct NFA *nfa, const char *state, + const char *streamState, u64a offset, + NfaCallback callback, void *context); +char nfaExecSheng0_reportCurrent(const struct NFA *n, struct mq *q); + +char nfaExecSheng0_B(const struct NFA *n, u64a offset, const u8 *buffer, + size_t length, NfaCallback cb, void *context); + +#endif /* SHENG_H_ */ diff --git a/src/nfa/sheng_defs.h b/src/nfa/sheng_defs.h new file mode 100644 index 00000000..26bdbcee --- /dev/null +++ b/src/nfa/sheng_defs.h @@ -0,0 +1,353 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SHENG_DEFS_H +#define SHENG_DEFS_H + +/* + * Utility functions used by various versions of Sheng engine + */ +static really_inline +u8 isDeadState(const u8 a) { + return a & SHENG_STATE_DEAD; +} + +static really_inline +u8 isAcceptState(const u8 a) { + return a & SHENG_STATE_ACCEPT; +} + +static really_inline +u8 isAccelState(const u8 a) { + return a & SHENG_STATE_ACCEL; +} + +static really_inline +u8 hasInterestingStates(const u8 a, const u8 b, const u8 c, const u8 d) { + return (a | b | c | d) & (SHENG_STATE_FLAG_MASK); +} + +/* these functions should be optimized out, used by NO_MATCHES mode */ +static really_inline +u8 dummyFunc4(UNUSED const u8 a, UNUSED const u8 b, UNUSED const u8 c, + UNUSED const u8 d) { + return 0; +} + +static really_inline +u8 dummyFunc(UNUSED const u8 a) { + return 0; +} + +/* + * Sheng function definitions for single byte loops + */ +/* callback output, can die */ +#define SHENG_IMPL sheng_cod +#define DEAD_FUNC isDeadState +#define ACCEPT_FUNC isAcceptState +#define STOP_AT_MATCH 0 +#include "sheng_impl.h" +#undef SHENG_IMPL +#undef DEAD_FUNC +#undef ACCEPT_FUNC +#undef STOP_AT_MATCH + +/* callback output, can't die */ +#define SHENG_IMPL sheng_co +#define DEAD_FUNC dummyFunc +#define ACCEPT_FUNC isAcceptState +#define STOP_AT_MATCH 0 +#include "sheng_impl.h" +#undef SHENG_IMPL +#undef DEAD_FUNC +#undef ACCEPT_FUNC +#undef STOP_AT_MATCH + +/* stop at match, can die */ +#define SHENG_IMPL sheng_samd +#define DEAD_FUNC isDeadState +#define ACCEPT_FUNC isAcceptState +#define STOP_AT_MATCH 1 +#include "sheng_impl.h" +#undef SHENG_IMPL +#undef DEAD_FUNC +#undef ACCEPT_FUNC +#undef STOP_AT_MATCH + +/* stop at match, can't die */ +#define SHENG_IMPL sheng_sam +#define DEAD_FUNC dummyFunc +#define ACCEPT_FUNC isAcceptState +#define STOP_AT_MATCH 1 +#include "sheng_impl.h" +#undef SHENG_IMPL +#undef DEAD_FUNC +#undef ACCEPT_FUNC +#undef STOP_AT_MATCH + +/* no match, can die */ +#define SHENG_IMPL sheng_nmd +#define DEAD_FUNC isDeadState +#define ACCEPT_FUNC dummyFunc +#define STOP_AT_MATCH 0 +#include "sheng_impl.h" +#undef SHENG_IMPL +#undef DEAD_FUNC +#undef ACCEPT_FUNC +#undef STOP_AT_MATCH + +/* no match, can't die */ +#define SHENG_IMPL sheng_nm +#define DEAD_FUNC dummyFunc +#define ACCEPT_FUNC dummyFunc +#define STOP_AT_MATCH 0 +#include "sheng_impl.h" +#undef SHENG_IMPL +#undef DEAD_FUNC +#undef ACCEPT_FUNC +#undef STOP_AT_MATCH + +/* + * Sheng function definitions for 4-byte loops + */ +/* callback output, can die, accelerated */ +#define SHENG_IMPL sheng4_coda +#define INTERESTING_FUNC hasInterestingStates +#define INNER_DEAD_FUNC isDeadState +#define OUTER_DEAD_FUNC dummyFunc +#define INNER_ACCEL_FUNC isAccelState +#define OUTER_ACCEL_FUNC dummyFunc +#define ACCEPT_FUNC isAcceptState +#define STOP_AT_MATCH 0 +#include "sheng_impl4.h" +#undef SHENG_IMPL +#undef INTERESTING_FUNC +#undef INNER_DEAD_FUNC +#undef OUTER_DEAD_FUNC +#undef INNER_ACCEL_FUNC +#undef OUTER_ACCEL_FUNC +#undef ACCEPT_FUNC +#undef STOP_AT_MATCH + +/* callback output, can die, not accelerated */ +#define SHENG_IMPL sheng4_cod +#define INTERESTING_FUNC hasInterestingStates +#define INNER_DEAD_FUNC isDeadState +#define OUTER_DEAD_FUNC dummyFunc +#define INNER_ACCEL_FUNC dummyFunc +#define OUTER_ACCEL_FUNC dummyFunc +#define ACCEPT_FUNC isAcceptState +#define STOP_AT_MATCH 0 +#include "sheng_impl4.h" +#undef SHENG_IMPL +#undef INTERESTING_FUNC +#undef INNER_DEAD_FUNC +#undef OUTER_DEAD_FUNC +#undef INNER_ACCEL_FUNC +#undef OUTER_ACCEL_FUNC +#undef ACCEPT_FUNC +#undef STOP_AT_MATCH + +/* callback output, can't die, accelerated */ +#define SHENG_IMPL sheng4_coa +#define INTERESTING_FUNC hasInterestingStates +#define INNER_DEAD_FUNC dummyFunc +#define OUTER_DEAD_FUNC dummyFunc +#define INNER_ACCEL_FUNC isAccelState +#define OUTER_ACCEL_FUNC dummyFunc +#define ACCEPT_FUNC isAcceptState +#define STOP_AT_MATCH 0 +#include "sheng_impl4.h" +#undef SHENG_IMPL +#undef INTERESTING_FUNC +#undef INNER_DEAD_FUNC +#undef OUTER_DEAD_FUNC +#undef INNER_ACCEL_FUNC +#undef OUTER_ACCEL_FUNC +#undef ACCEPT_FUNC +#undef STOP_AT_MATCH + +/* callback output, can't die, not accelerated */ +#define SHENG_IMPL sheng4_co +#define INTERESTING_FUNC hasInterestingStates +#define INNER_DEAD_FUNC dummyFunc +#define OUTER_DEAD_FUNC dummyFunc +#define INNER_ACCEL_FUNC dummyFunc +#define OUTER_ACCEL_FUNC dummyFunc +#define ACCEPT_FUNC isAcceptState +#define STOP_AT_MATCH 0 +#include "sheng_impl4.h" +#undef SHENG_IMPL +#undef INTERESTING_FUNC +#undef INNER_DEAD_FUNC +#undef OUTER_DEAD_FUNC +#undef INNER_ACCEL_FUNC +#undef OUTER_ACCEL_FUNC +#undef ACCEPT_FUNC +#undef STOP_AT_MATCH + +/* stop at match, can die, accelerated */ +#define SHENG_IMPL sheng4_samda +#define INTERESTING_FUNC hasInterestingStates +#define INNER_DEAD_FUNC isDeadState +#define OUTER_DEAD_FUNC dummyFunc +#define INNER_ACCEL_FUNC isAccelState +#define OUTER_ACCEL_FUNC dummyFunc +#define ACCEPT_FUNC isAcceptState +#define STOP_AT_MATCH 1 +#include "sheng_impl4.h" +#undef SHENG_IMPL +#undef INTERESTING_FUNC +#undef INNER_DEAD_FUNC +#undef OUTER_DEAD_FUNC +#undef INNER_ACCEL_FUNC +#undef OUTER_ACCEL_FUNC +#undef ACCEPT_FUNC +#undef STOP_AT_MATCH + +/* stop at match, can die, not accelerated */ +#define SHENG_IMPL sheng4_samd +#define INTERESTING_FUNC hasInterestingStates +#define INNER_DEAD_FUNC isDeadState +#define OUTER_DEAD_FUNC dummyFunc +#define INNER_ACCEL_FUNC dummyFunc +#define OUTER_ACCEL_FUNC dummyFunc +#define ACCEPT_FUNC isAcceptState +#define STOP_AT_MATCH 1 +#include "sheng_impl4.h" +#undef SHENG_IMPL +#undef INTERESTING_FUNC +#undef INNER_DEAD_FUNC +#undef OUTER_DEAD_FUNC +#undef INNER_ACCEL_FUNC +#undef OUTER_ACCEL_FUNC +#undef ACCEPT_FUNC +#undef STOP_AT_MATCH + +/* stop at match, can't die, accelerated */ +#define SHENG_IMPL sheng4_sama +#define INTERESTING_FUNC hasInterestingStates +#define INNER_DEAD_FUNC dummyFunc +#define OUTER_DEAD_FUNC dummyFunc +#define INNER_ACCEL_FUNC isAccelState +#define OUTER_ACCEL_FUNC dummyFunc +#define ACCEPT_FUNC isAcceptState +#define STOP_AT_MATCH 1 +#include "sheng_impl4.h" +#undef SHENG_IMPL +#undef INTERESTING_FUNC +#undef INNER_DEAD_FUNC +#undef OUTER_DEAD_FUNC +#undef INNER_ACCEL_FUNC +#undef OUTER_ACCEL_FUNC +#undef ACCEPT_FUNC +#undef STOP_AT_MATCH + +/* stop at match, can't die, not accelerated */ +#define SHENG_IMPL sheng4_sam +#define INTERESTING_FUNC hasInterestingStates +#define INNER_DEAD_FUNC dummyFunc +#define OUTER_DEAD_FUNC dummyFunc +#define INNER_ACCEL_FUNC dummyFunc +#define OUTER_ACCEL_FUNC dummyFunc +#define ACCEPT_FUNC isAcceptState +#define STOP_AT_MATCH 1 +#include "sheng_impl4.h" +#undef SHENG_IMPL +#undef INTERESTING_FUNC +#undef INNER_DEAD_FUNC +#undef OUTER_DEAD_FUNC +#undef INNER_ACCEL_FUNC +#undef OUTER_ACCEL_FUNC +#undef ACCEPT_FUNC +#undef STOP_AT_MATCH + +/* no-match have interesting func as dummy, and die/accel checks are outer */ + +/* no match, can die, accelerated */ +#define SHENG_IMPL sheng4_nmda +#define INTERESTING_FUNC dummyFunc4 +#define INNER_DEAD_FUNC dummyFunc +#define OUTER_DEAD_FUNC isDeadState +#define INNER_ACCEL_FUNC dummyFunc +#define OUTER_ACCEL_FUNC isAccelState +#define ACCEPT_FUNC dummyFunc +#define STOP_AT_MATCH 0 +#include "sheng_impl4.h" +#undef SHENG_IMPL +#undef INTERESTING_FUNC +#undef INNER_DEAD_FUNC +#undef OUTER_DEAD_FUNC +#undef INNER_ACCEL_FUNC +#undef OUTER_ACCEL_FUNC +#undef ACCEPT_FUNC +#undef STOP_AT_MATCH + +/* no match, can die, not accelerated */ +#define SHENG_IMPL sheng4_nmd +#define INTERESTING_FUNC dummyFunc4 +#define INNER_DEAD_FUNC dummyFunc +#define OUTER_DEAD_FUNC isDeadState +#define INNER_ACCEL_FUNC dummyFunc +#define OUTER_ACCEL_FUNC dummyFunc +#define ACCEPT_FUNC dummyFunc +#define STOP_AT_MATCH 0 +#include "sheng_impl4.h" +#undef SHENG_IMPL +#undef INTERESTING_FUNC +#undef INNER_DEAD_FUNC +#undef OUTER_DEAD_FUNC +#undef INNER_ACCEL_FUNC +#undef OUTER_ACCEL_FUNC +#undef ACCEPT_FUNC +#undef STOP_AT_MATCH + +/* there is no performance benefit in accelerating a no-match case that can't + * die */ + +/* no match, can't die */ +#define SHENG_IMPL sheng4_nm +#define INTERESTING_FUNC dummyFunc4 +#define INNER_DEAD_FUNC dummyFunc +#define OUTER_DEAD_FUNC dummyFunc +#define INNER_ACCEL_FUNC dummyFunc +#define OUTER_ACCEL_FUNC dummyFunc +#define ACCEPT_FUNC dummyFunc +#define STOP_AT_MATCH 0 +#include "sheng_impl4.h" +#undef SHENG_IMPL +#undef INTERESTING_FUNC +#undef INNER_DEAD_FUNC +#undef OUTER_DEAD_FUNC +#undef INNER_ACCEL_FUNC +#undef OUTER_ACCEL_FUNC +#undef ACCEPT_FUNC +#undef STOP_AT_MATCH + +#endif // SHENG_DEFS_H diff --git a/src/nfa/sheng_impl.h b/src/nfa/sheng_impl.h new file mode 100644 index 00000000..fc3e54aa --- /dev/null +++ b/src/nfa/sheng_impl.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * In order to use this macro, the following things need to be defined: + * + * - SHENG_IMPL (name of the Sheng implementation function) + * - DEAD_FUNC (name of the function checking for dead states) + * - ACCEPT_FUNC (name of the function checking for accept state) + * - STOP_AT_MATCH (can be 1 or 0, enable or disable stop at match) + */ + +/* byte-by-byte version. we don't do byte-by-byte death checking as it's + * pretty pointless to do it over a buffer that's at most 3 bytes long */ +static really_inline +char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s, + u8 *const cached_accept_state, ReportID *const cached_accept_id, + u8 single, u64a base_offset, const u8 *buf, const u8 *start, + const u8 *end, const u8 **scan_end) { + DEBUG_PRINTF("Starting DFA execution in state %u\n", + *state & SHENG_STATE_MASK); + const u8 *cur_buf = start; + if (DEAD_FUNC(*state)) { + DEBUG_PRINTF("Dead on arrival\n"); + *scan_end = end; + return MO_CONTINUE_MATCHING; + } + DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start)); + + m128 cur_state = set16x8(*state); + const m128 *masks = s->shuffle_masks; + + while (likely(cur_buf != end)) { + const u8 c = *cur_buf; + const m128 shuffle_mask = masks[c]; + cur_state = pshufb(shuffle_mask, cur_state); + const u8 tmp = movd(cur_state); + + DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?'); + DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", tmp, (tmp & 0xF0) >> 4, + tmp & 0xF); + + if (unlikely(ACCEPT_FUNC(tmp))) { + DEBUG_PRINTF("Accept state %u reached\n", tmp & SHENG_STATE_MASK); + u64a match_offset = base_offset + (cur_buf - buf) + 1; + DEBUG_PRINTF("Match @ %llu\n", match_offset); + if (STOP_AT_MATCH) { + DEBUG_PRINTF("Stopping at match @ %lli\n", + (u64a)(cur_buf - start)); + *state = tmp; + *scan_end = cur_buf; + return MO_MATCHES_PENDING; + } + if (single) { + if (fireSingleReport(cb, ctxt, s->report, match_offset) == + MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } else { + if (fireReports(s, cb, ctxt, tmp, match_offset, + cached_accept_state, cached_accept_id, + 0) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } + } + cur_buf++; + } + *state = movd(cur_state); + *scan_end = cur_buf; + return MO_CONTINUE_MATCHING; +} diff --git a/src/nfa/sheng_impl4.h b/src/nfa/sheng_impl4.h new file mode 100644 index 00000000..2561e52d --- /dev/null +++ b/src/nfa/sheng_impl4.h @@ -0,0 +1,284 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * In order to use this macro, the following things need to be defined: + * + * - SHENG_IMPL (name of the Sheng implementation function) + * - INTERESTING_FUNC (name of the function checking for accept, accel or dead + * states) + * - INNER_DEAD_FUNC (name of the inner function checking for dead states) + * - OUTER_DEAD_FUNC (name of the outer function checking for dead states) + * - INNER_ACCEL_FUNC (name of the inner function checking for accel states) + * - OUTER_ACCEL_FUNC (name of the outer function checking for accel states) + * - ACCEPT_FUNC (name of the function checking for accept state) + * - STOP_AT_MATCH (can be 1 or 0, enable or disable stop at match) + */ + +/* unrolled 4-byte-at-a-time version. + * + * we put innerDeadFunc inside interestingFunc() block so that we don't pay for + * dead states checking. however, if interestingFunc is dummy, innerDeadFunc + * gets lost with it, so we need an additional check outside the + * interestingFunc() branch - it's normally dummy so we don't pay for it, but + * when interestingFunc is dummy, outerDeadFunc should be set if we want to + * check for dead states. + * + * also, deadFunc only checks the last known state, but since we can't ever get + * out of the dead state and we don't really care where we died, it's not a + * problem. + */ +static really_inline +char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s, + u8 *const cached_accept_state, ReportID *const cached_accept_id, + u8 single, u64a base_offset, const u8 *buf, const u8 *start, + const u8 *end, const u8 **scan_end) { + DEBUG_PRINTF("Starting DFAx4 execution in state %u\n", + *state & SHENG_STATE_MASK); + const u8 *cur_buf = start; + const u8 *min_accel_dist = start; + base_offset++; + DEBUG_PRINTF("Scanning %llu bytes\n", (u64a)(end - start)); + + if (INNER_ACCEL_FUNC(*state) || OUTER_ACCEL_FUNC(*state)) { + DEBUG_PRINTF("Accel state reached @ 0\n"); + const union AccelAux *aaux = get_accel(s, *state & SHENG_STATE_MASK); + const u8 *new_offset = run_accel(aaux, cur_buf, end); + if (new_offset < cur_buf + BAD_ACCEL_DIST) { + min_accel_dist = new_offset + BIG_ACCEL_PENALTY; + } else { + min_accel_dist = new_offset + SMALL_ACCEL_PENALTY; + } + DEBUG_PRINTF("Next accel chance: %llu\n", + (u64a)(min_accel_dist - start)); + DEBUG_PRINTF("Accel scanned %zu bytes\n", new_offset - cur_buf); + cur_buf = new_offset; + DEBUG_PRINTF("New offset: %lli\n", (s64a)(cur_buf - start)); + } + if (INNER_DEAD_FUNC(*state) || OUTER_DEAD_FUNC(*state)) { + DEBUG_PRINTF("Dead on arrival\n"); + *scan_end = end; + return MO_CONTINUE_MATCHING; + } + + m128 cur_state = set16x8(*state); + const m128 *masks = s->shuffle_masks; + + while (likely(end - cur_buf >= 4)) { + const u8 *b1 = cur_buf; + const u8 *b2 = cur_buf + 1; + const u8 *b3 = cur_buf + 2; + const u8 *b4 = cur_buf + 3; + const u8 c1 = *b1; + const u8 c2 = *b2; + const u8 c3 = *b3; + const u8 c4 = *b4; + + const m128 shuffle_mask1 = masks[c1]; + cur_state = pshufb(shuffle_mask1, cur_state); + const u8 a1 = movd(cur_state); + + const m128 shuffle_mask2 = masks[c2]; + cur_state = pshufb(shuffle_mask2, cur_state); + const u8 a2 = movd(cur_state); + + const m128 shuffle_mask3 = masks[c3]; + cur_state = pshufb(shuffle_mask3, cur_state); + const u8 a3 = movd(cur_state); + + const m128 shuffle_mask4 = masks[c4]; + cur_state = pshufb(shuffle_mask4, cur_state); + const u8 a4 = movd(cur_state); + + DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?'); + DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", a1, (a1 & 0xF0) >> 4, a1 & 0xF); + + DEBUG_PRINTF("c: %02hhx '%c'\n", c2, ourisprint(c2) ? c2 : '?'); + DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", a2, (a2 & 0xF0) >> 4, a2 & 0xF); + + DEBUG_PRINTF("c: %02hhx '%c'\n", c3, ourisprint(c3) ? c3 : '?'); + DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", a3, (a3 & 0xF0) >> 4, a3 & 0xF); + + DEBUG_PRINTF("c: %02hhx '%c'\n", c4, ourisprint(c4) ? c4 : '?'); + DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", a4, (a4 & 0xF0) >> 4, a4 & 0xF); + + if (unlikely(INTERESTING_FUNC(a1, a2, a3, a4))) { + if (ACCEPT_FUNC(a1)) { + u64a match_offset = base_offset + b1 - buf; + DEBUG_PRINTF("Accept state %u reached\n", + a1 & SHENG_STATE_MASK); + DEBUG_PRINTF("Match @ %llu\n", match_offset); + if (STOP_AT_MATCH) { + DEBUG_PRINTF("Stopping at match @ %lli\n", + (s64a)(b1 - start)); + *scan_end = b1; + *state = a1; + return MO_MATCHES_PENDING; + } + if (single) { + if (fireSingleReport(cb, ctxt, s->report, match_offset) == + MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } else { + if (fireReports(s, cb, ctxt, a1, match_offset, + cached_accept_state, cached_accept_id, + 0) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } + } + if (ACCEPT_FUNC(a2)) { + u64a match_offset = base_offset + b2 - buf; + DEBUG_PRINTF("Accept state %u reached\n", + a2 & SHENG_STATE_MASK); + DEBUG_PRINTF("Match @ %llu\n", match_offset); + if (STOP_AT_MATCH) { + DEBUG_PRINTF("Stopping at match @ %lli\n", + (s64a)(b2 - start)); + *scan_end = b2; + *state = a2; + return MO_MATCHES_PENDING; + } + if (single) { + if (fireSingleReport(cb, ctxt, s->report, match_offset) == + MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } else { + if (fireReports(s, cb, ctxt, a2, match_offset, + cached_accept_state, cached_accept_id, + 0) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } + } + if (ACCEPT_FUNC(a3)) { + u64a match_offset = base_offset + b3 - buf; + DEBUG_PRINTF("Accept state %u reached\n", + a3 & SHENG_STATE_MASK); + DEBUG_PRINTF("Match @ %llu\n", match_offset); + if (STOP_AT_MATCH) { + DEBUG_PRINTF("Stopping at match @ %lli\n", + (s64a)(b3 - start)); + *scan_end = b3; + *state = a3; + return MO_MATCHES_PENDING; + } + if (single) { + if (fireSingleReport(cb, ctxt, s->report, match_offset) == + MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } else { + if (fireReports(s, cb, ctxt, a3, match_offset, + cached_accept_state, cached_accept_id, + 0) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } + } + if (ACCEPT_FUNC(a4)) { + u64a match_offset = base_offset + b4 - buf; + DEBUG_PRINTF("Accept state %u reached\n", + a4 & SHENG_STATE_MASK); + DEBUG_PRINTF("Match @ %llu\n", match_offset); + if (STOP_AT_MATCH) { + DEBUG_PRINTF("Stopping at match @ %lli\n", + (s64a)(b4 - start)); + *scan_end = b4; + *state = a4; + return MO_MATCHES_PENDING; + } + if (single) { + if (fireSingleReport(cb, ctxt, s->report, match_offset) == + MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } else { + if (fireReports(s, cb, ctxt, a4, match_offset, + cached_accept_state, cached_accept_id, + 0) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } + } + if (INNER_DEAD_FUNC(a4)) { + DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(b4 - buf)); + *scan_end = end; + *state = a4; + return MO_CONTINUE_MATCHING; + } + if (cur_buf > min_accel_dist && INNER_ACCEL_FUNC(a4)) { + DEBUG_PRINTF("Accel state reached @ %lli\n", (s64a)(b4 - buf)); + const union AccelAux *aaux = + get_accel(s, a4 & SHENG_STATE_MASK); + const u8 *new_offset = run_accel(aaux, cur_buf + 4, end); + if (new_offset < cur_buf + 4 + BAD_ACCEL_DIST) { + min_accel_dist = new_offset + BIG_ACCEL_PENALTY; + } else { + min_accel_dist = new_offset + SMALL_ACCEL_PENALTY; + } + DEBUG_PRINTF("Next accel chance: %llu\n", + (u64a)(min_accel_dist - start)); + DEBUG_PRINTF("Accel scanned %llu bytes\n", + (u64a)(new_offset - cur_buf - 4)); + cur_buf = new_offset; + DEBUG_PRINTF("New offset: %llu\n", (u64a)(cur_buf - buf)); + continue; + } + } + if (OUTER_DEAD_FUNC(a4)) { + DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(cur_buf - buf)); + *scan_end = end; + *state = a4; + return MO_CONTINUE_MATCHING; + }; + if (cur_buf > min_accel_dist && OUTER_ACCEL_FUNC(a4)) { + DEBUG_PRINTF("Accel state reached @ %lli\n", (s64a)(b4 - buf)); + const union AccelAux *aaux = get_accel(s, a4 & SHENG_STATE_MASK); + const u8 *new_offset = run_accel(aaux, cur_buf + 4, end); + if (new_offset < cur_buf + 4 + BAD_ACCEL_DIST) { + min_accel_dist = new_offset + BIG_ACCEL_PENALTY; + } else { + min_accel_dist = new_offset + SMALL_ACCEL_PENALTY; + } + DEBUG_PRINTF("Next accel chance: %llu\n", + (u64a)(min_accel_dist - start)); + DEBUG_PRINTF("Accel scanned %llu bytes\n", + (u64a)(new_offset - cur_buf - 4)); + cur_buf = new_offset; + DEBUG_PRINTF("New offset: %llu\n", (u64a)(cur_buf - buf)); + continue; + }; + cur_buf += 4; + } + *state = movd(cur_state); + *scan_end = cur_buf; + return MO_CONTINUE_MATCHING; +} diff --git a/src/nfa/sheng_internal.h b/src/nfa/sheng_internal.h new file mode 100644 index 00000000..046eb759 --- /dev/null +++ b/src/nfa/sheng_internal.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SHENG_INTERNAL_H_ +#define SHENG_INTERNAL_H_ + +#include "ue2common.h" +#include "util/simd_utils.h" + +#define SHENG_STATE_ACCEPT 0x10 +#define SHENG_STATE_DEAD 0x20 +#define SHENG_STATE_ACCEL 0x40 +#define SHENG_STATE_MASK 0xF +#define SHENG_STATE_FLAG_MASK 0x70 + +#define SHENG_FLAG_SINGLE_REPORT 0x1 +#define SHENG_FLAG_CAN_DIE 0x2 +#define SHENG_FLAG_HAS_ACCEL 0x4 + +struct report_list { + u32 count; + ReportID report[]; +}; + +struct sstate_aux { + u32 accept; + u32 accept_eod; + u32 accel; + u32 top; +}; + +struct sheng { + m128 shuffle_masks[256]; + u32 length; + u32 aux_offset; + u32 report_offset; + u32 accel_offset; + u8 n_states; + u8 anchored; + u8 floating; + u8 flags; + ReportID report; +}; + +#endif /* SHENG_INTERNAL_H_ */ diff --git a/src/nfa/shengcompile.cpp b/src/nfa/shengcompile.cpp new file mode 100644 index 00000000..911f6d70 --- /dev/null +++ b/src/nfa/shengcompile.cpp @@ -0,0 +1,541 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "shengcompile.h" + +#include "accel.h" +#include "accelcompile.h" +#include "shufticompile.h" +#include "trufflecompile.h" +#include "util/alloc.h" +#include "util/bitutils.h" +#include "util/charreach.h" +#include "util/compare.h" +#include "util/container.h" +#include "util/order_check.h" +#include "util/report_manager.h" +#include "util/unaligned.h" + +#include "grey.h" +#include "nfa_internal.h" +#include "sheng_internal.h" +#include "ue2common.h" +#include "util/compile_context.h" +#include "util/make_unique.h" +#include "util/verify_types.h" +#include "util/simd_utils.h" + +#include +#include +#include + +#include + +using namespace std; +using boost::adaptors::map_keys; + +namespace ue2 { + +#define ACCEL_DFA_MAX_OFFSET_DEPTH 4 + +/** Maximum tolerated number of escape character from an accel state. + * This is larger than nfa, as we don't have a budget and the nfa cheats on stop + * characters for sets of states */ +#define ACCEL_DFA_MAX_STOP_CHAR 160 + +/** Maximum tolerated number of escape character from a sds accel state. Larger + * than normal states as accelerating sds is important. Matches NFA value */ +#define ACCEL_DFA_MAX_FLOATING_STOP_CHAR 192 + +struct dfa_info { + accel_dfa_build_strat &strat; + raw_dfa &raw; + vector &states; + dstate &floating; + dstate &anchored; + bool can_die; + + explicit dfa_info(accel_dfa_build_strat &s) + : strat(s), raw(strat.get_raw()), states(raw.states), + floating(states[raw.start_floating]), + anchored(states[raw.start_anchored]), can_die(dfaCanDie(raw)) {} + + // returns adjusted size + size_t size() const { + return can_die ? states.size() : states.size() - 1; + } + // expects adjusted index + dstate &operator[](dstate_id_t idx) { + return states[raw_id(idx)]; + } + dstate &top(dstate_id_t idx) { + if (isDead(idx)) { + return floating; + } + return next(idx, TOP); + } + dstate &next(dstate_id_t idx, u16 chr) { + auto &src = (*this)[idx]; + auto next_id = src.next[raw.alpha_remap[chr]]; + return states[next_id]; + } + // get original idx from adjusted idx + dstate_id_t raw_id(dstate_id_t idx) { + assert(idx < size()); + // if DFA can't die, shift all indices left by 1 + return can_die ? idx : idx + 1; + } + bool isDead(dstate &state) { + return raw_id(state.impl_id) == DEAD_STATE; + } + bool isDead(dstate_id_t idx) { + return raw_id(idx) == DEAD_STATE; + } + +private: + static bool dfaCanDie(raw_dfa &rdfa) { + for (unsigned chr = 0; chr < 256; chr++) { + for (dstate_id_t state = 0; state < rdfa.states.size(); state++) { + auto succ = rdfa.states[state].next[rdfa.alpha_remap[chr]]; + if (succ == DEAD_STATE) { + return true; + } + } + } + return false; + } +}; + +namespace { + +struct raw_report_list { + flat_set reports; + + raw_report_list(const flat_set &reports_in, + const ReportManager &rm, bool do_remap) { + if (do_remap) { + for (auto &id : reports_in) { + reports.insert(rm.getProgramOffset(id)); + } + } else { + reports = reports_in; + } + } + + bool operator<(const raw_report_list &b) const { + return reports < b.reports; + } +}; + +struct raw_report_info_impl : public raw_report_info { + vector rl; + u32 getReportListSize() const override; + size_t size() const override; + void fillReportLists(NFA *n, size_t base_offset, + std::vector &ro /* out */) const override; +}; +} + +u32 raw_report_info_impl::getReportListSize() const { + u32 rv = 0; + + for (const auto &reps : rl) { + rv += sizeof(report_list); + rv += sizeof(ReportID) * reps.reports.size(); + } + + return rv; +} + +size_t raw_report_info_impl::size() const { + return rl.size(); +} + +void raw_report_info_impl::fillReportLists(NFA *n, size_t base_offset, + vector &ro) const { + for (const auto &reps : rl) { + ro.push_back(base_offset); + + report_list *p = (report_list *)((char *)n + base_offset); + + u32 i = 0; + for (const ReportID report : reps.reports) { + p->report[i++] = report; + } + p->count = verify_u32(reps.reports.size()); + + base_offset += sizeof(report_list); + base_offset += sizeof(ReportID) * reps.reports.size(); + } +} + +unique_ptr sheng_build_strat::gatherReports( + vector &reports, + vector &reports_eod, + u8 *isSingleReport, + ReportID *arbReport) const { + DEBUG_PRINTF("gathering reports\n"); + + const bool remap_reports = has_managed_reports(rdfa.kind); + + auto ri = ue2::make_unique(); + map rev; + + for (const dstate &s : rdfa.states) { + if (s.reports.empty()) { + reports.push_back(MO_INVALID_IDX); + continue; + } + + raw_report_list rrl(s.reports, rm, remap_reports); + DEBUG_PRINTF("non empty r\n"); + if (rev.find(rrl) != rev.end()) { + reports.push_back(rev[rrl]); + } else { + DEBUG_PRINTF("adding to rl %zu\n", ri->size()); + rev[rrl] = ri->size(); + reports.push_back(ri->size()); + ri->rl.push_back(rrl); + } + } + + for (const dstate &s : rdfa.states) { + if (s.reports_eod.empty()) { + reports_eod.push_back(MO_INVALID_IDX); + continue; + } + + DEBUG_PRINTF("non empty r eod\n"); + raw_report_list rrl(s.reports_eod, rm, remap_reports); + if (rev.find(rrl) != rev.end()) { + reports_eod.push_back(rev[rrl]); + continue; + } + + DEBUG_PRINTF("adding to rl eod %zu\n", s.reports_eod.size()); + rev[rrl] = ri->size(); + reports_eod.push_back(ri->size()); + ri->rl.push_back(rrl); + } + + assert(!ri->rl.empty()); /* all components should be able to generate + reports */ + if (!ri->rl.empty()) { + *arbReport = *ri->rl.begin()->reports.begin(); + } else { + *arbReport = 0; + } + + /* if we have only a single report id generated from all accepts (not eod) + * we can take some short cuts */ + set reps; + + for (u32 rl_index : reports) { + if (rl_index == MO_INVALID_IDX) { + continue; + } + assert(rl_index < ri->size()); + insert(&reps, ri->rl[rl_index].reports); + } + + if (reps.size() == 1) { + *isSingleReport = 1; + *arbReport = *reps.begin(); + DEBUG_PRINTF("single -- %u\n", *arbReport); + } else { + *isSingleReport = 0; + } + + return move(ri); +} + +u32 sheng_build_strat::max_allowed_offset_accel() const { + return ACCEL_DFA_MAX_OFFSET_DEPTH; +} + +u32 sheng_build_strat::max_stop_char() const { + return ACCEL_DFA_MAX_STOP_CHAR; +} + +u32 sheng_build_strat::max_floating_stop_char() const { + return ACCEL_DFA_MAX_FLOATING_STOP_CHAR; +} + +size_t sheng_build_strat::accelSize() const { + return sizeof(AccelAux); +} + +#ifdef DEBUG +static really_inline +void dumpShuffleMask(const u8 chr, const u8 *buf, unsigned sz) { + stringstream o; + + for (unsigned i = 0; i < sz; i++) { + o.width(2); + o << (buf[i] & SHENG_STATE_MASK) << " "; + } + DEBUG_PRINTF("chr %3u: %s\n", chr, o.str().c_str()); +} +#endif + +static +void fillAccelOut(const map &accel_escape_info, + set *accel_states) { + for (dstate_id_t i : accel_escape_info | map_keys) { + accel_states->insert(i); + } +} + +static +u8 getShengState(dstate &state, dfa_info &info, + map &accelInfo) { + u8 s = state.impl_id; + if (!state.reports.empty()) { + s |= SHENG_STATE_ACCEPT; + } + if (info.isDead(state)) { + s |= SHENG_STATE_DEAD; + } + if (accelInfo.find(info.raw_id(state.impl_id)) != accelInfo.end()) { + s |= SHENG_STATE_ACCEL; + } + return s; +} + +static +void fillAccelAux(struct NFA *n, dfa_info &info, + map &accelInfo) { + DEBUG_PRINTF("Filling accel aux structures\n"); + sheng *s = (sheng *)getMutableImplNfa(n); + u32 offset = s->accel_offset; + + for (dstate_id_t i = 0; i < info.size(); i++) { + dstate_id_t state_id = info.raw_id(i); + if (accelInfo.find(state_id) != accelInfo.end()) { + s->flags |= SHENG_FLAG_HAS_ACCEL; + AccelAux *aux = (AccelAux *)((char *)n + offset); + info.strat.buildAccel(state_id, accelInfo[state_id], aux); + sstate_aux *saux = + (sstate_aux *)((char *)n + s->aux_offset) + state_id; + saux->accel = offset; + DEBUG_PRINTF("Accel offset: %u\n", offset); + offset += ROUNDUP_N(sizeof(AccelAux), alignof(AccelAux)); + } + } +} + +static +void populateBasicInfo(struct NFA *n, dfa_info &info, + map &accelInfo, u32 aux_offset, + u32 report_offset, u32 accel_offset, u32 total_size, + u32 dfa_size) { + n->length = total_size; + n->scratchStateSize = 1; + n->streamStateSize = 1; + n->nPositions = info.size(); + n->type = SHENG_NFA_0; + n->flags |= info.raw.hasEodReports() ? NFA_ACCEPTS_EOD : 0; + + sheng *s = (sheng *)getMutableImplNfa(n); + s->aux_offset = aux_offset; + s->report_offset = report_offset; + s->accel_offset = accel_offset; + s->n_states = info.size(); + s->length = dfa_size; + s->flags |= info.can_die ? SHENG_FLAG_CAN_DIE : 0; + + s->anchored = getShengState(info.anchored, info, accelInfo); + s->floating = getShengState(info.floating, info, accelInfo); +} + +static +void fillTops(NFA *n, dfa_info &info, dstate_id_t id, + map &accelInfo) { + sheng *s = (sheng *)getMutableImplNfa(n); + u32 aux_base = s->aux_offset; + + DEBUG_PRINTF("Filling tops for state %u\n", id); + + sstate_aux *aux = (sstate_aux *)((char *)n + aux_base) + id; + + DEBUG_PRINTF("Aux structure for state %u, offset %zd\n", id, + (char *)aux - (char *)n); + + /* we could conceivably end up in an accept/dead state on a top event, + * so mark top as accept/dead state if it indeed is. + */ + auto &top_state = info.top(id); + + DEBUG_PRINTF("Top transition for state %u: %u\n", id, top_state.impl_id); + + aux->top = getShengState(top_state, info, accelInfo); +} + +static +void fillAux(NFA *n, dfa_info &info, dstate_id_t id, vector &reports, + vector &reports_eod, vector &report_offsets) { + sheng *s = (sheng *)getMutableImplNfa(n); + u32 aux_base = s->aux_offset; + auto raw_id = info.raw_id(id); + + auto &state = info[id]; + + sstate_aux *aux = (sstate_aux *)((char *)n + aux_base) + id; + + DEBUG_PRINTF("Filling aux and report structures for state %u\n", id); + DEBUG_PRINTF("Aux structure for state %u, offset %zd\n", id, + (char *)aux - (char *)n); + + aux->accept = state.reports.empty() ? 0 : report_offsets[reports[raw_id]]; + aux->accept_eod = + state.reports_eod.empty() ? 0 : report_offsets[reports_eod[raw_id]]; + + DEBUG_PRINTF("Report list offset: %u\n", aux->accept); + DEBUG_PRINTF("EOD report list offset: %u\n", aux->accept_eod); +} + +static +void fillSingleReport(NFA *n, ReportID r_id) { + sheng *s = (sheng *)getMutableImplNfa(n); + + DEBUG_PRINTF("Single report ID: %u\n", r_id); + s->report = r_id; + s->flags |= SHENG_FLAG_SINGLE_REPORT; +} + +static +void createShuffleMasks(sheng *s, dfa_info &info, + map &accelInfo) { + for (u16 chr = 0; chr < 256; chr++) { + u8 buf[16] = {0}; + + for (dstate_id_t idx = 0; idx < info.size(); idx++) { + auto &succ_state = info.next(idx, chr); + + buf[idx] = getShengState(succ_state, info, accelInfo); + } +#ifdef DEBUG + dumpShuffleMask(chr, buf, sizeof(buf)); +#endif + m128 mask = loadu128(buf); + s->shuffle_masks[chr] = mask; + } +} + +bool has_accel_sheng(const NFA *nfa) { + const sheng *s = (const sheng *)getImplNfa(nfa); + return s->flags & SHENG_FLAG_HAS_ACCEL; +} + +aligned_unique_ptr shengCompile(raw_dfa &raw, + const CompileContext &cc, + const ReportManager &rm, + set *accel_states) { + if (!cc.grey.allowSheng) { + DEBUG_PRINTF("Sheng is not allowed!\n"); + return nullptr; + } + + sheng_build_strat strat(raw, rm); + dfa_info info(strat); + + DEBUG_PRINTF("Trying to compile a %zu state Sheng\n", raw.states.size()); + + DEBUG_PRINTF("Anchored start state id: %u, floating start state id: %u\n", + raw.start_anchored, raw.start_floating); + + DEBUG_PRINTF("This DFA %s die so effective number of states is %zu\n", + info.can_die ? "can" : "cannot", info.size()); + if (info.size() > 16) { + DEBUG_PRINTF("Too many states\n"); + return nullptr; + } + + if (!cc.streaming) { /* TODO: work out if we can do the strip in streaming + * mode with our semantics */ + raw.stripExtraEodReports(); + } + auto accelInfo = strat.getAccelInfo(cc.grey); + + // set impl_id of each dfa state + for (dstate_id_t i = 0; i < info.size(); i++) { + info[i].impl_id = i; + } + + DEBUG_PRINTF("Anchored start state: %u, floating start state: %u\n", + info.anchored.impl_id, info.floating.impl_id); + + u32 nfa_size = ROUNDUP_16(sizeof(NFA) + sizeof(sheng)); + vector reports, eod_reports, report_offsets; + u8 isSingle = 0; + ReportID single_report = 0; + + auto ri = + strat.gatherReports(reports, eod_reports, &isSingle, &single_report); + + u32 total_aux = sizeof(sstate_aux) * info.size(); + u32 total_accel = strat.accelSize() * accelInfo.size(); + u32 total_reports = ri->getReportListSize(); + + u32 reports_offset = nfa_size + total_aux; + u32 accel_offset = + ROUNDUP_N(reports_offset + total_reports, alignof(AccelAux)); + u32 total_size = ROUNDUP_N(accel_offset + total_accel, 64); + + DEBUG_PRINTF("NFA: %u, aux: %u, reports: %u, accel: %u, total: %u\n", + nfa_size, total_aux, total_reports, total_accel, total_size); + + aligned_unique_ptr nfa = aligned_zmalloc_unique(total_size); + + populateBasicInfo(nfa.get(), info, accelInfo, nfa_size, reports_offset, + accel_offset, total_size, total_size - sizeof(NFA)); + + DEBUG_PRINTF("Setting up aux and report structures\n"); + + ri->fillReportLists(nfa.get(), reports_offset, report_offsets); + + for (dstate_id_t idx = 0; idx < info.size(); idx++) { + fillTops(nfa.get(), info, idx, accelInfo); + fillAux(nfa.get(), info, idx, reports, eod_reports, report_offsets); + } + if (isSingle) { + fillSingleReport(nfa.get(), single_report); + } + + fillAccelAux(nfa.get(), info, accelInfo); + + if (accel_states) { + fillAccelOut(accelInfo, accel_states); + } + + createShuffleMasks((sheng *)getMutableImplNfa(nfa.get()), info, accelInfo); + + return nfa; +} + +} // namespace ue2 diff --git a/src/nfa/shengcompile.h b/src/nfa/shengcompile.h new file mode 100644 index 00000000..873b7c75 --- /dev/null +++ b/src/nfa/shengcompile.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SHENGCOMPILE_H_ +#define SHENGCOMPILE_H_ + +#include "accel_dfa_build_strat.h" +#include "rdfa.h" +#include "util/alloc.h" +#include "util/charreach.h" +#include "util/ue2_containers.h" + +struct NFA; + +namespace ue2 { + +class ReportManager; +struct CompileContext; +struct raw_dfa; + +class sheng_build_strat : public accel_dfa_build_strat { +public: + sheng_build_strat(raw_dfa &rdfa_in, const ReportManager &rm_in) + : accel_dfa_build_strat(rm_in), rdfa(rdfa_in) {} + raw_dfa &get_raw() const override { return rdfa; } + std::unique_ptr gatherReports( + std::vector &reports /* out */, + std::vector &reports_eod /* out */, + u8 *isSingleReport /* out */, + ReportID *arbReport /* out */) const override; + size_t accelSize(void) const override; + u32 max_allowed_offset_accel() const override; + u32 max_stop_char() const override; + u32 max_floating_stop_char() const override; + +private: + raw_dfa &rdfa; +}; + +aligned_unique_ptr +shengCompile(raw_dfa &raw, const CompileContext &cc, const ReportManager &rm, + std::set *accel_states = nullptr); + +struct sheng_escape_info { + CharReach outs; + CharReach outs2_single; + flat_set> outs2; + bool outs2_broken = false; +}; + +bool has_accel_sheng(const NFA *nfa); + +} // namespace ue2 + +#endif /* SHENGCOMPILE_H_ */ diff --git a/src/nfa/shengdump.cpp b/src/nfa/shengdump.cpp new file mode 100644 index 00000000..037dfb05 --- /dev/null +++ b/src/nfa/shengdump.cpp @@ -0,0 +1,265 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include "shengdump.h" + +#include "accel_dump.h" +#include "nfa_dump_internal.h" +#include "nfa_internal.h" +#include "sheng_internal.h" +#include "rdfa.h" +#include "ue2common.h" +#include "util/charreach.h" +#include "util/dump_charclass.h" +#include "util/simd_utils.h" + + +#ifndef DUMP_SUPPORT +#error No dump support! +#endif + +using namespace std; + +namespace ue2 { + +static +const sstate_aux *get_aux(const NFA *n, dstate_id_t i) { + assert(n && isShengType(n->type)); + + const sheng *s = (const sheng *)getImplNfa(n); + const sstate_aux *aux_base = + (const sstate_aux *)((const char *)n + s->aux_offset); + + const sstate_aux *aux = aux_base + i; + + assert((const char *)aux < (const char *)s + s->length); + + return aux; +} + +static +void dumpHeader(FILE *f, const sheng *s) { + fprintf(f, "number of states: %u, DFA engine size: %u\n", s->n_states, + s->length); + fprintf(f, "aux base offset: %u, reports base offset: %u, " + "accel offset: %u\n", + s->aux_offset, s->report_offset, s->accel_offset); + fprintf(f, "anchored start state: %u, floating start state: %u\n", + s->anchored & SHENG_STATE_MASK, s->floating & SHENG_STATE_MASK); + fprintf(f, "has accel: %u can die: %u single report: %u\n", + !!(s->flags & SHENG_FLAG_HAS_ACCEL), + !!(s->flags & SHENG_FLAG_CAN_DIE), + !!(s->flags & SHENG_FLAG_SINGLE_REPORT)); +} + +static +void dumpAux(FILE *f, u32 state, const sstate_aux *aux) { + fprintf(f, "state id: %u, reports offset: %u, EOD reports offset: %u, " + "accel offset: %u, top: %u\n", + state, aux->accept, aux->accept_eod, aux->accel, + aux->top & SHENG_STATE_MASK); +} + +static +void dumpReports(FILE *f, const report_list *rl) { + fprintf(f, "reports count: %u\n", rl->count); + for (u32 i = 0; i < rl->count; i++) { + fprintf(f, " report: %u, report ID: %u\n", i, rl->report[i]); + } +} + +static +void dumpMasks(FILE *f, const sheng *s) { + for (u32 chr = 0; chr < 256; chr++) { + u8 buf[16]; + m128 shuffle_mask = s->shuffle_masks[chr]; + store128(buf, shuffle_mask); + + fprintf(f, "%3u: ", chr); + for (u32 pos = 0; pos < 16; pos++) { + u8 c = buf[pos]; + if (c & SHENG_STATE_FLAG_MASK) { + fprintf(f, "%2u* ", c & SHENG_STATE_MASK); + } else { + fprintf(f, "%2u ", c & SHENG_STATE_MASK); + } + } + fprintf(f, "\n"); + } +} + +void nfaExecSheng0_dumpText(const NFA *nfa, FILE *f) { + assert(nfa->type == SHENG_NFA_0); + const sheng *s = (const sheng *)getImplNfa(nfa); + + fprintf(f, "sheng DFA\n"); + dumpHeader(f, s); + + for (u32 state = 0; state < s->n_states; state++) { + const sstate_aux *aux = get_aux(nfa, state); + dumpAux(f, state, aux); + if (aux->accept) { + fprintf(f, "report list:\n"); + const report_list *rl = + (const report_list *)((const char *)nfa + aux->accept); + dumpReports(f, rl); + } + if (aux->accept_eod) { + fprintf(f, "EOD report list:\n"); + const report_list *rl = + (const report_list *)((const char *)nfa + aux->accept_eod); + dumpReports(f, rl); + } + if (aux->accel) { + fprintf(f, "accel:\n"); + const AccelAux *accel = + (const AccelAux *)((const char *)nfa + aux->accel); + dumpAccelInfo(f, *accel); + } + } + + fprintf(f, "\n"); + + dumpMasks(f, s); + + fprintf(f, "\n"); +} + +static +void dumpDotPreambleDfa(FILE *f) { + dumpDotPreamble(f); + + // DFA specific additions. + fprintf(f, "STARTF [style=invis];\n"); + fprintf(f, "STARTA [style=invis];\n"); + fprintf(f, "0 [style=invis];\n"); +} + +static +void describeNode(const NFA *n, const sheng *s, u16 i, FILE *f) { + const sstate_aux *aux = get_aux(n, i); + + fprintf(f, "%u [ width = 1, fixedsize = true, fontsize = 12, " + "label = \"%u\" ]; \n", + i, i); + + if (aux->accept_eod) { + fprintf(f, "%u [ color = darkorchid ];\n", i); + } + + if (aux->accept) { + fprintf(f, "%u [ shape = doublecircle ];\n", i); + } + + if (aux->top && (aux->top & SHENG_STATE_MASK) != i) { + fprintf(f, "%u -> %u [color = darkgoldenrod weight=0.1 ]\n", i, + aux->top & SHENG_STATE_MASK); + } + + if (i == (s->anchored & SHENG_STATE_MASK)) { + fprintf(f, "STARTA -> %u [color = blue ]\n", i); + } + + if (i == (s->floating & SHENG_STATE_MASK)) { + fprintf(f, "STARTF -> %u [color = red ]\n", i); + } +} + +static +void describeEdge(FILE *f, const u16 *t, u16 i) { + for (u16 s = 0; s < N_CHARS; s++) { + if (!t[s]) { + continue; + } + + u16 ss; + for (ss = 0; ss < s; ss++) { + if (t[s] == t[ss]) { + break; + } + } + + if (ss != s) { + continue; + } + + CharReach reach; + for (ss = s; ss < 256; ss++) { + if (t[s] == t[ss]) { + reach.set(ss); + } + } + + fprintf(f, "%u -> %u [ label = \"", i, t[s]); + + describeClass(f, reach, 5, CC_OUT_DOT); + + fprintf(f, "\" ];\n"); + } +} + +static +void shengGetTransitions(const NFA *n, u16 state, u16 *t) { + assert(isShengType(n->type)); + const sheng *s = (const sheng *)getImplNfa(n); + const sstate_aux *aux = get_aux(n, state); + + for (unsigned i = 0; i < N_CHARS; i++) { + u8 buf[16]; + m128 shuffle_mask = s->shuffle_masks[i]; + + store128(buf, shuffle_mask); + + t[i] = buf[state] & SHENG_STATE_MASK; + } + + t[TOP] = aux->top & SHENG_STATE_MASK; +} + +void nfaExecSheng0_dumpDot(const NFA *nfa, FILE *f, const string &) { + assert(nfa->type == SHENG_NFA_0); + const sheng *s = (const sheng *)getImplNfa(nfa); + + dumpDotPreambleDfa(f); + + for (u16 i = 1; i < s->n_states; i++) { + describeNode(nfa, s, i, f); + + u16 t[ALPHABET_SIZE]; + + shengGetTransitions(nfa, i, t); + + describeEdge(f, t, i); + } + + fprintf(f, "}\n"); +} + +} // namespace ue2 diff --git a/src/nfa/shengdump.h b/src/nfa/shengdump.h new file mode 100644 index 00000000..5334894f --- /dev/null +++ b/src/nfa/shengdump.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SHENGDUMP_H_ +#define SHENGDUMP_H_ + +#ifdef DUMP_SUPPORT + +#include +#include + +struct NFA; + +namespace ue2 { + +void nfaExecSheng0_dumpDot(const struct NFA *nfa, FILE *file, + const std::string &base); +void nfaExecSheng0_dumpText(const struct NFA *nfa, FILE *file); + +} // namespace ue2 + +#endif // DUMP_SUPPORT + +#endif /* SHENGDUMP_H_ */ diff --git a/src/nfagraph/ng_limex.cpp b/src/nfagraph/ng_limex.cpp index a8a5113d..72efa43a 100644 --- a/src/nfagraph/ng_limex.cpp +++ b/src/nfagraph/ng_limex.cpp @@ -505,6 +505,9 @@ aligned_unique_ptr constructReversedNFA(const NGHolder &h_in, u32 hint, u32 isImplementableNFA(const NGHolder &g, const ReportManager *rm, const CompileContext &cc) { + if (!cc.grey.allowLimExNFA) { + return false; + } // Quick check: we can always implement an NFA with less than NFA_MAX_STATES // states. Note that top masks can generate extra states, so we account for // those here too. diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 085aca79..c39c3401 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -50,6 +50,7 @@ #include "nfa/nfa_api_queue.h" #include "nfa/nfa_build_util.h" #include "nfa/nfa_internal.h" +#include "nfa/shengcompile.h" #include "nfa/shufticompile.h" #include "nfa/tamaramacompile.h" #include "nfa/tamarama_internal.h" @@ -863,13 +864,18 @@ aligned_unique_ptr pickImpl(aligned_unique_ptr dfa_impl, aligned_unique_ptr nfa_impl) { assert(nfa_impl); assert(dfa_impl); - assert(isMcClellanType(dfa_impl->type)); + assert(isDfaType(dfa_impl->type)); // If our NFA is an LBR, it always wins. if (isLbrType(nfa_impl->type)) { return nfa_impl; } + // if our DFA is an accelerated Sheng, it always wins. + if (isShengType(dfa_impl->type) && has_accel(*dfa_impl)) { + return dfa_impl; + } + bool d_accel = has_accel(*dfa_impl); bool n_accel = has_accel(*nfa_impl); bool d_big = dfa_impl->type == MCCLELLAN_NFA_16; @@ -922,6 +928,18 @@ buildRepeatEngine(const CastleProto &proto, return castle_nfa; } +static +aligned_unique_ptr getDfa(raw_dfa &rdfa, const CompileContext &cc, + const ReportManager &rm) { + // Unleash the Sheng!! + auto dfa = shengCompile(rdfa, cc, rm); + if (!dfa) { + // Sheng wasn't successful, so unleash McClellan! + dfa = mcclellanCompile(rdfa, cc, rm); + } + return dfa; +} + /* builds suffix nfas */ static aligned_unique_ptr @@ -942,7 +960,7 @@ buildSuffix(const ReportManager &rm, const SomSlotManager &ssm, } if (suff.dfa()) { - auto d = mcclellanCompile(*suff.dfa(), cc, rm); + auto d = getDfa(*suff.dfa(), cc, rm); assert(d); return d; } @@ -971,7 +989,7 @@ buildSuffix(const ReportManager &rm, const SomSlotManager &ssm, auto rdfa = buildMcClellan(holder, &rm, false, triggers.at(0), cc.grey); if (rdfa) { - auto d = mcclellanCompile(*rdfa, cc, rm); + auto d = getDfa(*rdfa, cc, rm); assert(d); if (cc.grey.roseMcClellanSuffix != 2) { n = pickImpl(move(d), move(n)); @@ -1091,12 +1109,13 @@ makeLeftNfa(const RoseBuildImpl &tbi, left_id &left, } if (left.dfa()) { - n = mcclellanCompile(*left.dfa(), cc, rm); + n = getDfa(*left.dfa(), cc, rm); } else if (left.graph() && cc.grey.roseMcClellanPrefix == 2 && is_prefix && !is_transient) { auto rdfa = buildMcClellan(*left.graph(), nullptr, cc.grey); if (rdfa) { - n = mcclellanCompile(*rdfa, cc, rm); + n = getDfa(*rdfa, cc, rm); + assert(n); } } @@ -1122,7 +1141,7 @@ makeLeftNfa(const RoseBuildImpl &tbi, left_id &left, && (!n || !has_bounded_repeats_other_than_firsts(*n) || !is_fast(*n))) { auto rdfa = buildMcClellan(*left.graph(), nullptr, cc.grey); if (rdfa) { - auto d = mcclellanCompile(*rdfa, cc, rm); + auto d = getDfa(*rdfa, cc, rm); assert(d); n = pickImpl(move(d), move(n)); } @@ -1857,8 +1876,8 @@ public: }; aligned_unique_ptr operator()(unique_ptr &rdfa) const { - // Unleash the McClellan! - return mcclellanCompile(*rdfa, build.cc, build.rm); + // Unleash the mighty DFA! + return getDfa(*rdfa, build.cc, build.rm); } aligned_unique_ptr operator()(unique_ptr &haig) const { @@ -1886,7 +1905,7 @@ public: !has_bounded_repeats_other_than_firsts(*n)) { auto rdfa = buildMcClellan(h, &rm, cc.grey); if (rdfa) { - auto d = mcclellanCompile(*rdfa, cc, rm); + auto d = getDfa(*rdfa, cc, rm); if (d) { n = pickImpl(move(d), move(n)); } diff --git a/src/runtime.c b/src/runtime.c index fc867b8e..35a11634 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -43,6 +43,7 @@ #include "nfa/nfa_api_util.h" #include "nfa/nfa_internal.h" #include "nfa/nfa_rev_api.h" +#include "nfa/sheng.h" #include "smallwrite/smallwrite_internal.h" #include "rose/rose.h" #include "rose/runtime.h" @@ -286,13 +287,16 @@ void runSmallWriteEngine(const struct SmallWriteEngine *smwr, size_t local_alen = length - smwr->start_offset; const u8 *local_buffer = buffer + smwr->start_offset; - assert(isMcClellanType(nfa->type)); + assert(isDfaType(nfa->type)); if (nfa->type == MCCLELLAN_NFA_8) { nfaExecMcClellan8_B(nfa, smwr->start_offset, local_buffer, local_alen, roseReportAdaptor, scratch); - } else { + } else if (nfa->type == MCCLELLAN_NFA_16){ nfaExecMcClellan16_B(nfa, smwr->start_offset, local_buffer, local_alen, roseReportAdaptor, scratch); + } else { + nfaExecSheng0_B(nfa, smwr->start_offset, local_buffer, + local_alen, roseReportAdaptor, scratch); } } diff --git a/src/smallwrite/smallwrite_build.cpp b/src/smallwrite/smallwrite_build.cpp index 1cffe514..90770ba5 100644 --- a/src/smallwrite/smallwrite_build.cpp +++ b/src/smallwrite/smallwrite_build.cpp @@ -34,6 +34,7 @@ #include "nfa/mcclellancompile_util.h" #include "nfa/nfa_internal.h" #include "nfa/rdfa_merge.h" +#include "nfa/shengcompile.h" #include "nfagraph/ng.h" #include "nfagraph/ng_holder.h" #include "nfagraph/ng_mcclellan.h" @@ -312,6 +313,20 @@ bool is_slow(const raw_dfa &rdfa, const set &accel, return true; } +static +aligned_unique_ptr getDfa(raw_dfa &rdfa, const CompileContext &cc, + const ReportManager &rm, + set &accel_states) { + aligned_unique_ptr dfa = nullptr; + if (cc.grey.allowSmallWriteSheng) { + dfa = shengCompile(rdfa, cc, rm, &accel_states); + } + if (!dfa) { + dfa = mcclellanCompile(rdfa, cc, rm, &accel_states); + } + return dfa; +} + static aligned_unique_ptr prepEngine(raw_dfa &rdfa, u32 roseQuality, const CompileContext &cc, @@ -322,9 +337,9 @@ aligned_unique_ptr prepEngine(raw_dfa &rdfa, u32 roseQuality, // Unleash the McClellan! set accel_states; - auto nfa = mcclellanCompile(rdfa, cc, rm, &accel_states); + auto nfa = getDfa(rdfa, cc, rm, accel_states); if (!nfa) { - DEBUG_PRINTF("mcclellan compile failed for smallwrite NFA\n"); + DEBUG_PRINTF("DFA compile failed for smallwrite NFA\n"); return nullptr; } @@ -340,9 +355,9 @@ aligned_unique_ptr prepEngine(raw_dfa &rdfa, u32 roseQuality, return nullptr; } - nfa = mcclellanCompile(rdfa, cc, rm, &accel_states); + nfa = getDfa(rdfa, cc, rm, accel_states); if (!nfa) { - DEBUG_PRINTF("mcclellan compile failed for smallwrite NFA\n"); + DEBUG_PRINTF("DFA compile failed for smallwrite NFA\n"); assert(0); /* able to build orig dfa but not the trimmed? */ return nullptr; } @@ -351,7 +366,7 @@ aligned_unique_ptr prepEngine(raw_dfa &rdfa, u32 roseQuality, *small_region = cc.grey.smallWriteLargestBuffer; } - assert(isMcClellanType(nfa->type)); + assert(isDfaType(nfa->type)); if (nfa->length > cc.grey.limitSmallWriteOutfixSize || nfa->length > cc.grey.limitDFASize) { DEBUG_PRINTF("smallwrite outfix size too large\n");