mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
dfa: adding new Sheng engine
A new shuffle-based DFA engine, complete with acceleration and smallwrite.
This commit is contained in:
parent
56bf25b091
commit
6331da4e29
@ -476,6 +476,12 @@ set (hs_exec_SRCS
|
||||
src/nfa/repeat.c
|
||||
src/nfa/repeat.h
|
||||
src/nfa/repeat_internal.h
|
||||
src/nfa/sheng.c
|
||||
src/nfa/sheng.h
|
||||
src/nfa/sheng_defs.h
|
||||
src/nfa/sheng_impl.h
|
||||
src/nfa/sheng_impl4.h
|
||||
src/nfa/sheng_internal.h
|
||||
src/nfa/shufti_common.h
|
||||
src/nfa/shufti.c
|
||||
src/nfa/shufti.h
|
||||
@ -641,6 +647,9 @@ SET (hs_SRCS
|
||||
src/nfa/repeat_internal.h
|
||||
src/nfa/repeatcompile.cpp
|
||||
src/nfa/repeatcompile.h
|
||||
src/nfa/sheng_internal.h
|
||||
src/nfa/shengcompile.cpp
|
||||
src/nfa/shengcompile.h
|
||||
src/nfa/shufticompile.cpp
|
||||
src/nfa/shufticompile.h
|
||||
src/nfa/tamaramacompile.cpp
|
||||
@ -927,6 +936,8 @@ set(hs_dump_SRCS
|
||||
src/nfa/nfa_dump_dispatch.cpp
|
||||
src/nfa/nfa_dump_internal.cpp
|
||||
src/nfa/nfa_dump_internal.h
|
||||
src/nfa/shengdump.cpp
|
||||
src/nfa/shengdump.h
|
||||
src/nfa/tamarama_dump.cpp
|
||||
src/nfa/tamarama_dump.h
|
||||
src/parser/dump.cpp
|
||||
|
@ -50,6 +50,7 @@ Grey::Grey(void) :
|
||||
allowLitHaig(true),
|
||||
allowLbr(true),
|
||||
allowMcClellan(true),
|
||||
allowSheng(true),
|
||||
allowPuff(true),
|
||||
allowLiteral(true),
|
||||
allowRose(true),
|
||||
@ -127,6 +128,7 @@ Grey::Grey(void) :
|
||||
equivalenceEnable(true),
|
||||
|
||||
allowSmallWrite(true), // McClellan dfas for small patterns
|
||||
allowSmallWriteSheng(false), // allow use of Sheng for SMWR
|
||||
|
||||
smallWriteLargestBuffer(70), // largest buffer that can be
|
||||
// considered a small write
|
||||
@ -214,6 +216,7 @@ void applyGreyOverrides(Grey *g, const string &s) {
|
||||
G_UPDATE(allowLitHaig);
|
||||
G_UPDATE(allowLbr);
|
||||
G_UPDATE(allowMcClellan);
|
||||
G_UPDATE(allowSheng);
|
||||
G_UPDATE(allowPuff);
|
||||
G_UPDATE(allowLiteral);
|
||||
G_UPDATE(allowRose);
|
||||
@ -290,6 +293,7 @@ void applyGreyOverrides(Grey *g, const string &s) {
|
||||
G_UPDATE(miracleHistoryBonus);
|
||||
G_UPDATE(equivalenceEnable);
|
||||
G_UPDATE(allowSmallWrite);
|
||||
G_UPDATE(allowSmallWriteSheng);
|
||||
G_UPDATE(smallWriteLargestBuffer);
|
||||
G_UPDATE(smallWriteLargestBufferBad);
|
||||
G_UPDATE(limitSmallWriteOutfixSize);
|
||||
|
@ -50,6 +50,7 @@ struct Grey {
|
||||
bool allowLitHaig;
|
||||
bool allowLbr;
|
||||
bool allowMcClellan;
|
||||
bool allowSheng;
|
||||
bool allowPuff;
|
||||
bool allowLiteral;
|
||||
bool allowRose;
|
||||
@ -149,6 +150,7 @@ struct Grey {
|
||||
|
||||
// SmallWrite engine
|
||||
bool allowSmallWrite;
|
||||
bool allowSmallWriteSheng;
|
||||
u32 smallWriteLargestBuffer; // largest buffer that can be small write
|
||||
u32 smallWriteLargestBufferBad;// largest buffer that can be small write
|
||||
u32 limitSmallWriteOutfixSize; //!< max total size of outfix DFAs
|
||||
|
@ -1009,7 +1009,7 @@ u32 mcclellanStartReachSize(const raw_dfa *raw) {
|
||||
return out.count();
|
||||
}
|
||||
|
||||
bool has_accel_dfa(const NFA *nfa) {
|
||||
bool has_accel_mcclellan(const NFA *nfa) {
|
||||
const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
|
||||
return m->has_accel;
|
||||
}
|
||||
|
@ -87,7 +87,7 @@ u32 mcclellanStartReachSize(const raw_dfa *raw);
|
||||
|
||||
std::set<ReportID> all_reports(const raw_dfa &rdfa);
|
||||
|
||||
bool has_accel_dfa(const NFA *nfa);
|
||||
bool has_accel_mcclellan(const NFA *nfa);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
|
@ -127,6 +127,9 @@ char nfaQueueExec(const struct NFA *nfa, struct mq *q, s64a end);
|
||||
*/
|
||||
char nfaQueueExec_raw(const struct NFA *nfa, struct mq *q, s64a end);
|
||||
|
||||
/** Return value indicating that the engine is dead. */
|
||||
#define MO_DEAD 0
|
||||
|
||||
/** Return value indicating that the engine is alive. */
|
||||
#define MO_ALIVE 1
|
||||
|
||||
|
@ -42,6 +42,7 @@
|
||||
#include "limex.h"
|
||||
#include "mcclellan.h"
|
||||
#include "mpv.h"
|
||||
#include "sheng.h"
|
||||
#include "tamarama.h"
|
||||
|
||||
#define DISPATCH_CASE(dc_ltype, dc_ftype, dc_subtype, dc_func_call) \
|
||||
@ -69,6 +70,7 @@
|
||||
DISPATCH_CASE(LBR, Lbr, Shuf, dbnt_func); \
|
||||
DISPATCH_CASE(LBR, Lbr, Truf, dbnt_func); \
|
||||
DISPATCH_CASE(CASTLE, Castle, 0, dbnt_func); \
|
||||
DISPATCH_CASE(SHENG, Sheng, 0, dbnt_func); \
|
||||
DISPATCH_CASE(TAMARAMA, Tamarama, 0, dbnt_func); \
|
||||
default: \
|
||||
assert(0); \
|
||||
|
@ -30,6 +30,7 @@
|
||||
|
||||
#include "limex_internal.h"
|
||||
#include "mcclellancompile.h"
|
||||
#include "shengcompile.h"
|
||||
#include "nfa_internal.h"
|
||||
#include "repeat_internal.h"
|
||||
#include "ue2common.h"
|
||||
@ -213,7 +214,7 @@ template<> struct NFATraits<MCCLELLAN_NFA_8> {
|
||||
static const nfa_dispatch_fn has_repeats;
|
||||
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||
};
|
||||
const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_8>::has_accel = has_accel_dfa;
|
||||
const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_8>::has_accel = has_accel_mcclellan;
|
||||
const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_8>::has_repeats = dispatch_false;
|
||||
const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_8>::has_repeats_other_than_firsts = dispatch_false;
|
||||
#if defined(DUMP_SUPPORT)
|
||||
@ -229,7 +230,7 @@ template<> struct NFATraits<MCCLELLAN_NFA_16> {
|
||||
static const nfa_dispatch_fn has_repeats;
|
||||
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||
};
|
||||
const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_16>::has_accel = has_accel_dfa;
|
||||
const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_16>::has_accel = has_accel_mcclellan;
|
||||
const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_16>::has_repeats = dispatch_false;
|
||||
const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_16>::has_repeats_other_than_firsts = dispatch_false;
|
||||
#if defined(DUMP_SUPPORT)
|
||||
@ -245,7 +246,7 @@ template<> struct NFATraits<GOUGH_NFA_8> {
|
||||
static const nfa_dispatch_fn has_repeats;
|
||||
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||
};
|
||||
const nfa_dispatch_fn NFATraits<GOUGH_NFA_8>::has_accel = has_accel_dfa;
|
||||
const nfa_dispatch_fn NFATraits<GOUGH_NFA_8>::has_accel = has_accel_mcclellan;
|
||||
const nfa_dispatch_fn NFATraits<GOUGH_NFA_8>::has_repeats = dispatch_false;
|
||||
const nfa_dispatch_fn NFATraits<GOUGH_NFA_8>::has_repeats_other_than_firsts = dispatch_false;
|
||||
#if defined(DUMP_SUPPORT)
|
||||
@ -261,7 +262,7 @@ template<> struct NFATraits<GOUGH_NFA_16> {
|
||||
static const nfa_dispatch_fn has_repeats;
|
||||
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||
};
|
||||
const nfa_dispatch_fn NFATraits<GOUGH_NFA_16>::has_accel = has_accel_dfa;
|
||||
const nfa_dispatch_fn NFATraits<GOUGH_NFA_16>::has_accel = has_accel_mcclellan;
|
||||
const nfa_dispatch_fn NFATraits<GOUGH_NFA_16>::has_repeats = dispatch_false;
|
||||
const nfa_dispatch_fn NFATraits<GOUGH_NFA_16>::has_repeats_other_than_firsts = dispatch_false;
|
||||
#if defined(DUMP_SUPPORT)
|
||||
@ -380,6 +381,22 @@ const nfa_dispatch_fn NFATraits<LBR_NFA_Truf>::has_repeats_other_than_firsts = d
|
||||
const char *NFATraits<LBR_NFA_Truf>::name = "Lim Bounded Repeat (M)";
|
||||
#endif
|
||||
|
||||
template<> struct NFATraits<SHENG_NFA_0> {
|
||||
UNUSED static const char *name;
|
||||
static const NFACategory category = NFA_OTHER;
|
||||
static const u32 stateAlign = 1;
|
||||
static const bool fast = true;
|
||||
static const nfa_dispatch_fn has_accel;
|
||||
static const nfa_dispatch_fn has_repeats;
|
||||
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||
};
|
||||
const nfa_dispatch_fn NFATraits<SHENG_NFA_0>::has_accel = has_accel_sheng;
|
||||
const nfa_dispatch_fn NFATraits<SHENG_NFA_0>::has_repeats = dispatch_false;
|
||||
const nfa_dispatch_fn NFATraits<SHENG_NFA_0>::has_repeats_other_than_firsts = dispatch_false;
|
||||
#if defined(DUMP_SUPPORT)
|
||||
const char *NFATraits<SHENG_NFA_0>::name = "Sheng";
|
||||
#endif
|
||||
|
||||
template<> struct NFATraits<TAMARAMA_NFA_0> {
|
||||
UNUSED static const char *name;
|
||||
static const NFACategory category = NFA_OTHER;
|
||||
|
@ -40,6 +40,7 @@
|
||||
#include "limex.h"
|
||||
#include "mcclellandump.h"
|
||||
#include "mpv_dump.h"
|
||||
#include "shengdump.h"
|
||||
#include "tamarama_dump.h"
|
||||
|
||||
#ifndef DUMP_SUPPORT
|
||||
@ -74,6 +75,7 @@ namespace ue2 {
|
||||
DISPATCH_CASE(LBR, Lbr, Shuf, dbnt_func); \
|
||||
DISPATCH_CASE(LBR, Lbr, Truf, dbnt_func); \
|
||||
DISPATCH_CASE(CASTLE, Castle, 0, dbnt_func); \
|
||||
DISPATCH_CASE(SHENG, Sheng, 0, dbnt_func); \
|
||||
DISPATCH_CASE(TAMARAMA, Tamarama, 0, dbnt_func); \
|
||||
default: \
|
||||
assert(0); \
|
||||
|
@ -67,6 +67,7 @@ enum NFAEngineType {
|
||||
LBR_NFA_Shuf, /**< magic pseudo nfa */
|
||||
LBR_NFA_Truf, /**< magic pseudo nfa */
|
||||
CASTLE_NFA_0, /**< magic pseudo nfa */
|
||||
SHENG_NFA_0, /**< magic pseudo nfa */
|
||||
TAMARAMA_NFA_0, /**< magic nfa container */
|
||||
/** \brief bogus NFA - not used */
|
||||
INVALID_NFA
|
||||
@ -146,10 +147,17 @@ static really_inline int isGoughType(u8 t) {
|
||||
return t == GOUGH_NFA_8 || t == GOUGH_NFA_16;
|
||||
}
|
||||
|
||||
/** \brief True if the given type (from NFA::type) is a McClellan or Gough DFA.
|
||||
* */
|
||||
/** \brief True if the given type (from NFA::type) is a Sheng DFA. */
|
||||
static really_inline int isShengType(u8 t) {
|
||||
return t == SHENG_NFA_0;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief True if the given type (from NFA::type) is a McClellan, Gough or
|
||||
* Sheng DFA.
|
||||
*/
|
||||
static really_inline int isDfaType(u8 t) {
|
||||
return isMcClellanType(t) || isGoughType(t);
|
||||
return isMcClellanType(t) || isGoughType(t) || isShengType(t);
|
||||
}
|
||||
|
||||
/** \brief True if the given type (from NFA::type) is an NFA. */
|
||||
|
676
src/nfa/sheng.c
Normal file
676
src/nfa/sheng.c
Normal file
@ -0,0 +1,676 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "sheng.h"
|
||||
|
||||
#include "accel.h"
|
||||
#include "sheng_internal.h"
|
||||
#include "nfa_api.h"
|
||||
#include "nfa_api_queue.h"
|
||||
#include "nfa_internal.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/compare.h"
|
||||
#include "util/join.h"
|
||||
#include "util/simd_utils.h"
|
||||
|
||||
enum MatchMode {
|
||||
CALLBACK_OUTPUT,
|
||||
STOP_AT_MATCH,
|
||||
NO_MATCHES
|
||||
};
|
||||
|
||||
static really_inline
|
||||
const struct sheng *get_sheng(const struct NFA *n) {
|
||||
return (const struct sheng *)getImplNfa(n);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const struct sstate_aux *get_aux(const struct sheng *sh, u8 id) {
|
||||
u32 offset = sh->aux_offset - sizeof(struct NFA) +
|
||||
(id & SHENG_STATE_MASK) * sizeof(struct sstate_aux);
|
||||
DEBUG_PRINTF("Getting aux for state %u at offset %llu\n",
|
||||
id & SHENG_STATE_MASK, (u64a)offset + sizeof(struct NFA));
|
||||
return (const struct sstate_aux *)((const char *) sh + offset);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const union AccelAux *get_accel(const struct sheng *sh, u8 id) {
|
||||
const struct sstate_aux *saux = get_aux(sh, id);
|
||||
DEBUG_PRINTF("Getting accel aux at offset %u\n", saux->accel);
|
||||
const union AccelAux *aux = (const union AccelAux *)
|
||||
((const char *)sh + saux->accel - sizeof(struct NFA));
|
||||
return aux;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const struct report_list *get_rl(const struct sheng *sh,
|
||||
const struct sstate_aux *aux) {
|
||||
DEBUG_PRINTF("Getting report list at offset %u\n", aux->accept);
|
||||
return (const struct report_list *)
|
||||
((const char *)sh + aux->accept - sizeof(struct NFA));
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const struct report_list *get_eod_rl(const struct sheng *sh,
|
||||
const struct sstate_aux *aux) {
|
||||
DEBUG_PRINTF("Getting EOD report list at offset %u\n", aux->accept);
|
||||
return (const struct report_list *)
|
||||
((const char *)sh + aux->accept_eod - sizeof(struct NFA));
|
||||
}
|
||||
|
||||
static really_inline
|
||||
char shengHasAccept(const struct sheng *sh, const struct sstate_aux *aux,
|
||||
ReportID report) {
|
||||
assert(sh && aux);
|
||||
|
||||
const struct report_list *rl = get_rl(sh, aux);
|
||||
assert(ISALIGNED_N(rl, 4));
|
||||
|
||||
DEBUG_PRINTF("report list has %u entries\n", rl->count);
|
||||
|
||||
for (u32 i = 0; i < rl->count; i++) {
|
||||
if (rl->report[i] == report) {
|
||||
DEBUG_PRINTF("reporting %u\n", rl->report[i]);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
char fireSingleReport(NfaCallback cb, void *ctxt, ReportID r, u64a loc) {
|
||||
DEBUG_PRINTF("reporting %u\n", r);
|
||||
if (cb(0, loc, r, ctxt) == MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING; /* termination requested */
|
||||
}
|
||||
return MO_CONTINUE_MATCHING; /* continue execution */
|
||||
}
|
||||
|
||||
static really_inline
|
||||
char fireReports(const struct sheng *sh, NfaCallback cb, void *ctxt,
|
||||
const u8 state, u64a loc, u8 *const cached_accept_state,
|
||||
ReportID *const cached_accept_id, char eod) {
|
||||
DEBUG_PRINTF("reporting matches @ %llu\n", loc);
|
||||
|
||||
if (!eod && state == *cached_accept_state) {
|
||||
DEBUG_PRINTF("reporting %u\n", *cached_accept_id);
|
||||
if (cb(0, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING; /* termination requested */
|
||||
}
|
||||
|
||||
return MO_CONTINUE_MATCHING; /* continue execution */
|
||||
}
|
||||
const struct sstate_aux *aux = get_aux(sh, state);
|
||||
const struct report_list *rl = eod ? get_eod_rl(sh, aux) : get_rl(sh, aux);
|
||||
assert(ISALIGNED(rl));
|
||||
|
||||
DEBUG_PRINTF("report list has %u entries\n", rl->count);
|
||||
u32 count = rl->count;
|
||||
|
||||
if (!eod && count == 1) {
|
||||
*cached_accept_state = state;
|
||||
*cached_accept_id = rl->report[0];
|
||||
|
||||
DEBUG_PRINTF("reporting %u\n", rl->report[0]);
|
||||
if (cb(0, loc, rl->report[0], ctxt) == MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING; /* termination requested */
|
||||
}
|
||||
|
||||
return MO_CONTINUE_MATCHING; /* continue execution */
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < count; i++) {
|
||||
DEBUG_PRINTF("reporting %u\n", rl->report[i]);
|
||||
if (cb(0, loc, rl->report[i], ctxt) == MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING; /* termination requested */
|
||||
}
|
||||
}
|
||||
return MO_CONTINUE_MATCHING; /* continue execution */
|
||||
}
|
||||
|
||||
/* include Sheng function definitions */
|
||||
#include "sheng_defs.h"
|
||||
|
||||
static really_inline
|
||||
char runShengCb(const struct sheng *sh, NfaCallback cb, void *ctxt, u64a offset,
|
||||
u8 *const cached_accept_state, ReportID *const cached_accept_id,
|
||||
const u8 *cur_buf, const u8 *start, const u8 *end, u8 can_die,
|
||||
u8 has_accel, u8 single, const u8 **scanned, u8 *state) {
|
||||
DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in callback mode\n",
|
||||
(u64a)(end - start), offset);
|
||||
DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf),
|
||||
(s64a)(end - cur_buf));
|
||||
DEBUG_PRINTF("can die: %u has accel: %u single: %u\n", !!can_die,
|
||||
!!has_accel, !!single);
|
||||
int rv;
|
||||
/* scan and report all matches */
|
||||
if (can_die) {
|
||||
if (has_accel) {
|
||||
rv = sheng4_coda(state, cb, ctxt, sh, cached_accept_state,
|
||||
cached_accept_id, single, offset, cur_buf, start,
|
||||
end, scanned);
|
||||
} else {
|
||||
rv = sheng4_cod(state, cb, ctxt, sh, cached_accept_state,
|
||||
cached_accept_id, single, offset, cur_buf, start,
|
||||
end, scanned);
|
||||
}
|
||||
if (rv == MO_HALT_MATCHING) {
|
||||
return MO_DEAD;
|
||||
}
|
||||
rv = sheng_cod(state, cb, ctxt, sh, cached_accept_state,
|
||||
cached_accept_id, single, offset, cur_buf, *scanned, end,
|
||||
scanned);
|
||||
} else {
|
||||
if (has_accel) {
|
||||
rv = sheng4_coa(state, cb, ctxt, sh, cached_accept_state,
|
||||
cached_accept_id, single, offset, cur_buf, start,
|
||||
end, scanned);
|
||||
} else {
|
||||
rv = sheng4_co(state, cb, ctxt, sh, cached_accept_state,
|
||||
cached_accept_id, single, offset, cur_buf, start,
|
||||
end, scanned);
|
||||
}
|
||||
if (rv == MO_HALT_MATCHING) {
|
||||
return MO_DEAD;
|
||||
}
|
||||
rv = sheng_co(state, cb, ctxt, sh, cached_accept_state,
|
||||
cached_accept_id, single, offset, cur_buf, *scanned, end,
|
||||
scanned);
|
||||
}
|
||||
if (rv == MO_HALT_MATCHING) {
|
||||
return MO_DEAD;
|
||||
}
|
||||
return MO_ALIVE;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void runShengNm(const struct sheng *sh, NfaCallback cb, void *ctxt, u64a offset,
|
||||
u8 *const cached_accept_state, ReportID *const cached_accept_id,
|
||||
const u8 *cur_buf, const u8 *start, const u8 *end, u8 can_die,
|
||||
u8 has_accel, u8 single, const u8 **scanned, u8 *state) {
|
||||
DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in nomatch mode\n",
|
||||
(u64a)(end - start), offset);
|
||||
DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf),
|
||||
(s64a)(end - cur_buf));
|
||||
DEBUG_PRINTF("can die: %u has accel: %u single: %u\n", !!can_die,
|
||||
!!has_accel, !!single);
|
||||
/* just scan the buffer */
|
||||
if (can_die) {
|
||||
if (has_accel) {
|
||||
sheng4_nmda(state, cb, ctxt, sh, cached_accept_state,
|
||||
cached_accept_id, single, offset, cur_buf, start, end,
|
||||
scanned);
|
||||
} else {
|
||||
sheng4_nmd(state, cb, ctxt, sh, cached_accept_state,
|
||||
cached_accept_id, single, offset, cur_buf, start, end,
|
||||
scanned);
|
||||
}
|
||||
sheng_nmd(state, cb, ctxt, sh, cached_accept_state, cached_accept_id,
|
||||
single, offset, cur_buf, *scanned, end, scanned);
|
||||
} else {
|
||||
sheng4_nm(state, cb, ctxt, sh, cached_accept_state, cached_accept_id,
|
||||
single, offset, cur_buf, start, end, scanned);
|
||||
sheng_nm(state, cb, ctxt, sh, cached_accept_state, cached_accept_id,
|
||||
single, offset, cur_buf, *scanned, end, scanned);
|
||||
}
|
||||
}
|
||||
|
||||
static really_inline
|
||||
char runShengSam(const struct sheng *sh, NfaCallback cb, void *ctxt,
|
||||
u64a offset, u8 *const cached_accept_state,
|
||||
ReportID *const cached_accept_id, const u8 *cur_buf,
|
||||
const u8 *start, const u8 *end, u8 can_die, u8 has_accel,
|
||||
u8 single, const u8 **scanned, u8 *state) {
|
||||
DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in stop at match mode\n",
|
||||
(u64a)(end - start), offset);
|
||||
DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf),
|
||||
(s64a)(end - cur_buf));
|
||||
DEBUG_PRINTF("can die: %u has accel: %u single: %u\n", !!can_die,
|
||||
!!has_accel, !!single);
|
||||
int rv;
|
||||
/* scan until first match */
|
||||
if (can_die) {
|
||||
if (has_accel) {
|
||||
rv = sheng4_samda(state, cb, ctxt, sh, cached_accept_state,
|
||||
cached_accept_id, single, offset, cur_buf, start,
|
||||
end, scanned);
|
||||
} else {
|
||||
rv = sheng4_samd(state, cb, ctxt, sh, cached_accept_state,
|
||||
cached_accept_id, single, offset, cur_buf, start,
|
||||
end, scanned);
|
||||
}
|
||||
if (rv == MO_HALT_MATCHING) {
|
||||
return MO_DEAD;
|
||||
}
|
||||
/* if we stopped before we expected, we found a match */
|
||||
if (rv == MO_MATCHES_PENDING) {
|
||||
return MO_MATCHES_PENDING;
|
||||
}
|
||||
|
||||
rv = sheng_samd(state, cb, ctxt, sh, cached_accept_state,
|
||||
cached_accept_id, single, offset, cur_buf, *scanned,
|
||||
end, scanned);
|
||||
} else {
|
||||
if (has_accel) {
|
||||
rv = sheng4_sama(state, cb, ctxt, sh, cached_accept_state,
|
||||
cached_accept_id, single, offset, cur_buf, start,
|
||||
end, scanned);
|
||||
} else {
|
||||
rv = sheng4_sam(state, cb, ctxt, sh, cached_accept_state,
|
||||
cached_accept_id, single, offset, cur_buf, start,
|
||||
end, scanned);
|
||||
}
|
||||
if (rv == MO_HALT_MATCHING) {
|
||||
return MO_DEAD;
|
||||
}
|
||||
/* if we stopped before we expected, we found a match */
|
||||
if (rv == MO_MATCHES_PENDING) {
|
||||
return MO_MATCHES_PENDING;
|
||||
}
|
||||
|
||||
rv = sheng_sam(state, cb, ctxt, sh, cached_accept_state,
|
||||
cached_accept_id, single, offset, cur_buf, *scanned, end,
|
||||
scanned);
|
||||
}
|
||||
if (rv == MO_HALT_MATCHING) {
|
||||
return MO_DEAD;
|
||||
}
|
||||
/* if we stopped before we expected, we found a match */
|
||||
if (rv == MO_MATCHES_PENDING) {
|
||||
return MO_MATCHES_PENDING;
|
||||
}
|
||||
return MO_ALIVE;
|
||||
}
|
||||
|
||||
static never_inline
|
||||
char runSheng(const struct sheng *sh, struct mq *q, s64a b_end,
|
||||
enum MatchMode mode) {
|
||||
u8 state = *(u8 *)q->state;
|
||||
u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE;
|
||||
u8 has_accel = sh->flags & SHENG_FLAG_HAS_ACCEL;
|
||||
u8 single = sh->flags & SHENG_FLAG_SINGLE_REPORT;
|
||||
|
||||
u8 cached_accept_state = 0;
|
||||
ReportID cached_accept_id = 0;
|
||||
|
||||
DEBUG_PRINTF("starting Sheng execution in state %u\n",
|
||||
state & SHENG_STATE_MASK);
|
||||
|
||||
if (q->report_current) {
|
||||
DEBUG_PRINTF("reporting current pending matches\n");
|
||||
assert(sh);
|
||||
|
||||
q->report_current = 0;
|
||||
|
||||
int rv;
|
||||
if (single) {
|
||||
rv = fireSingleReport(q->cb, q->context, sh->report,
|
||||
q_cur_offset(q));
|
||||
} else {
|
||||
rv = fireReports(sh, q->cb, q->context, state, q_cur_offset(q),
|
||||
&cached_accept_state, &cached_accept_id, 0);
|
||||
}
|
||||
if (rv == MO_HALT_MATCHING) {
|
||||
DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK);
|
||||
return MO_DEAD;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("proceeding with matching\n");
|
||||
}
|
||||
|
||||
assert(q_cur_type(q) == MQE_START);
|
||||
s64a start = q_cur_loc(q);
|
||||
|
||||
DEBUG_PRINTF("offset: %lli, location: %lli, mode: %s\n", q->offset, start,
|
||||
mode == CALLBACK_OUTPUT ? "CALLBACK OUTPUT" :
|
||||
mode == NO_MATCHES ? "NO MATCHES" :
|
||||
mode == STOP_AT_MATCH ? "STOP AT MATCH" : "???");
|
||||
|
||||
DEBUG_PRINTF("processing event @ %lli: %s\n", q->offset + q_cur_loc(q),
|
||||
q_cur_type(q) == MQE_START ? "START" :
|
||||
q_cur_type(q) == MQE_TOP ? "TOP" :
|
||||
q_cur_type(q) == MQE_END ? "END" : "???");
|
||||
|
||||
const u8* cur_buf;
|
||||
if (start < 0) {
|
||||
DEBUG_PRINTF("negative location, scanning history\n");
|
||||
DEBUG_PRINTF("min location: %zd\n", -q->hlength);
|
||||
cur_buf = q->history + q->hlength;
|
||||
} else {
|
||||
DEBUG_PRINTF("positive location, scanning buffer\n");
|
||||
DEBUG_PRINTF("max location: %lli\n", b_end);
|
||||
cur_buf = q->buffer;
|
||||
}
|
||||
|
||||
/* if we our queue event is past our end */
|
||||
if (mode != NO_MATCHES && q_cur_loc(q) > b_end) {
|
||||
DEBUG_PRINTF("current location past buffer end\n");
|
||||
DEBUG_PRINTF("setting q location to %llu\n", b_end);
|
||||
DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK);
|
||||
q->items[q->cur].location = b_end;
|
||||
return MO_ALIVE;
|
||||
}
|
||||
|
||||
q->cur++;
|
||||
|
||||
s64a cur_start = start;
|
||||
|
||||
while (1) {
|
||||
DEBUG_PRINTF("processing event @ %lli: %s\n", q->offset + q_cur_loc(q),
|
||||
q_cur_type(q) == MQE_START ? "START" :
|
||||
q_cur_type(q) == MQE_TOP ? "TOP" :
|
||||
q_cur_type(q) == MQE_END ? "END" : "???");
|
||||
s64a end = q_cur_loc(q);
|
||||
if (mode != NO_MATCHES) {
|
||||
end = MIN(end, b_end);
|
||||
}
|
||||
assert(end <= (s64a) q->length);
|
||||
s64a cur_end = end;
|
||||
|
||||
/* we may cross the border between history and current buffer */
|
||||
if (cur_start < 0) {
|
||||
cur_end = MIN(0, cur_end);
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("start: %lli end: %lli\n", start, end);
|
||||
|
||||
/* don't scan zero length buffer */
|
||||
if (cur_start != cur_end) {
|
||||
const u8 * scanned = cur_buf;
|
||||
char rv;
|
||||
|
||||
/* if we're in nomatch mode or if we're scanning history buffer */
|
||||
if (mode == NO_MATCHES ||
|
||||
(cur_start < 0 && mode == CALLBACK_OUTPUT)) {
|
||||
runShengNm(sh, q->cb, q->context, q->offset,
|
||||
&cached_accept_state, &cached_accept_id, cur_buf,
|
||||
cur_buf + cur_start, cur_buf + cur_end, can_die,
|
||||
has_accel, single, &scanned, &state);
|
||||
} else if (mode == CALLBACK_OUTPUT) {
|
||||
rv = runShengCb(sh, q->cb, q->context, q->offset,
|
||||
&cached_accept_state, &cached_accept_id,
|
||||
cur_buf, cur_buf + cur_start, cur_buf + cur_end,
|
||||
can_die, has_accel, single, &scanned, &state);
|
||||
if (rv == MO_DEAD) {
|
||||
DEBUG_PRINTF("exiting in state %u\n",
|
||||
state & SHENG_STATE_MASK);
|
||||
return MO_DEAD;
|
||||
}
|
||||
} else if (mode == STOP_AT_MATCH) {
|
||||
rv = runShengSam(sh, q->cb, q->context, q->offset,
|
||||
&cached_accept_state, &cached_accept_id,
|
||||
cur_buf, cur_buf + cur_start,
|
||||
cur_buf + cur_end, can_die, has_accel, single,
|
||||
&scanned, &state);
|
||||
if (rv == MO_DEAD) {
|
||||
DEBUG_PRINTF("exiting in state %u\n",
|
||||
state & SHENG_STATE_MASK);
|
||||
return rv;
|
||||
} else if (rv == MO_MATCHES_PENDING) {
|
||||
assert(q->cur);
|
||||
DEBUG_PRINTF("found a match, setting q location to %zd\n",
|
||||
scanned - cur_buf + 1);
|
||||
q->cur--;
|
||||
q->items[q->cur].type = MQE_START;
|
||||
q->items[q->cur].location =
|
||||
scanned - cur_buf + 1; /* due to exiting early */
|
||||
*(u8 *)q->state = state;
|
||||
DEBUG_PRINTF("exiting in state %u\n",
|
||||
state & SHENG_STATE_MASK);
|
||||
return rv;
|
||||
}
|
||||
} else {
|
||||
assert(!"invalid scanning mode!");
|
||||
}
|
||||
assert(scanned == cur_buf + cur_end);
|
||||
|
||||
cur_start = cur_end;
|
||||
}
|
||||
|
||||
/* if we our queue event is past our end */
|
||||
if (mode != NO_MATCHES && q_cur_loc(q) > b_end) {
|
||||
DEBUG_PRINTF("current location past buffer end\n");
|
||||
DEBUG_PRINTF("setting q location to %llu\n", b_end);
|
||||
DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK);
|
||||
q->cur--;
|
||||
q->items[q->cur].type = MQE_START;
|
||||
q->items[q->cur].location = b_end;
|
||||
*(u8 *)q->state = state;
|
||||
return MO_ALIVE;
|
||||
}
|
||||
|
||||
/* crossing over into actual buffer */
|
||||
if (cur_start == 0) {
|
||||
DEBUG_PRINTF("positive location, scanning buffer\n");
|
||||
DEBUG_PRINTF("max offset: %lli\n", b_end);
|
||||
cur_buf = q->buffer;
|
||||
}
|
||||
|
||||
/* continue scanning the same buffer */
|
||||
if (end != cur_end) {
|
||||
continue;
|
||||
}
|
||||
|
||||
switch (q_cur_type(q)) {
|
||||
case MQE_END:
|
||||
*(u8 *)q->state = state;
|
||||
q->cur++;
|
||||
DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK);
|
||||
if (can_die) {
|
||||
return (state & SHENG_STATE_DEAD) ? MO_DEAD : MO_ALIVE;
|
||||
}
|
||||
return MO_ALIVE;
|
||||
case MQE_TOP:
|
||||
if (q->offset + cur_start == 0) {
|
||||
DEBUG_PRINTF("Anchored start, going to state %u\n",
|
||||
sh->anchored);
|
||||
state = sh->anchored;
|
||||
} else {
|
||||
u8 new_state = get_aux(sh, state)->top;
|
||||
DEBUG_PRINTF("Top event %u->%u\n", state & SHENG_STATE_MASK,
|
||||
new_state & SHENG_STATE_MASK);
|
||||
state = new_state;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert(!"invalid queue event");
|
||||
break;
|
||||
}
|
||||
q->cur++;
|
||||
}
|
||||
}
|
||||
|
||||
char nfaExecSheng0_B(const struct NFA *n, u64a offset, const u8 *buffer,
|
||||
size_t length, NfaCallback cb, void *context) {
|
||||
DEBUG_PRINTF("smallwrite Sheng\n");
|
||||
assert(n->type == SHENG_NFA_0);
|
||||
const struct sheng *sh = getImplNfa(n);
|
||||
u8 state = sh->anchored;
|
||||
u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE;
|
||||
u8 has_accel = sh->flags & SHENG_FLAG_HAS_ACCEL;
|
||||
u8 single = sh->flags & SHENG_FLAG_SINGLE_REPORT;
|
||||
u8 cached_accept_state = 0;
|
||||
ReportID cached_accept_id = 0;
|
||||
|
||||
/* scan and report all matches */
|
||||
int rv;
|
||||
s64a end = length;
|
||||
const u8 *scanned;
|
||||
|
||||
rv = runShengCb(sh, cb, context, offset, &cached_accept_state,
|
||||
&cached_accept_id, buffer, buffer, buffer + end, can_die,
|
||||
has_accel, single, &scanned, &state);
|
||||
if (rv == MO_DEAD) {
|
||||
DEBUG_PRINTF("exiting in state %u\n",
|
||||
state & SHENG_STATE_MASK);
|
||||
return MO_DEAD;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("%u\n", state & SHENG_STATE_MASK);
|
||||
|
||||
const struct sstate_aux *aux = get_aux(sh, state);
|
||||
|
||||
if (aux->accept_eod) {
|
||||
DEBUG_PRINTF("Reporting EOD matches\n");
|
||||
fireReports(sh, cb, context, state, end + offset, &cached_accept_state,
|
||||
&cached_accept_id, 1);
|
||||
}
|
||||
|
||||
return state & SHENG_STATE_DEAD ? MO_DEAD : MO_ALIVE;
|
||||
}
|
||||
|
||||
char nfaExecSheng0_Q(const struct NFA *n, struct mq *q, s64a end) {
|
||||
const struct sheng *sh = get_sheng(n);
|
||||
char rv = runSheng(sh, q, end, CALLBACK_OUTPUT);
|
||||
return rv;
|
||||
}
|
||||
|
||||
char nfaExecSheng0_Q2(const struct NFA *n, struct mq *q, s64a end) {
|
||||
const struct sheng *sh = get_sheng(n);
|
||||
char rv = runSheng(sh, q, end, STOP_AT_MATCH);
|
||||
return rv;
|
||||
}
|
||||
|
||||
char nfaExecSheng0_QR(const struct NFA *n, struct mq *q, ReportID report) {
|
||||
assert(q_cur_type(q) == MQE_START);
|
||||
|
||||
const struct sheng *sh = get_sheng(n);
|
||||
char rv = runSheng(sh, q, 0 /* end */, NO_MATCHES);
|
||||
|
||||
if (rv && nfaExecSheng0_inAccept(n, report, q)) {
|
||||
return MO_MATCHES_PENDING;
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
char nfaExecSheng0_inAccept(const struct NFA *n, ReportID report,
|
||||
struct mq *q) {
|
||||
assert(n && q);
|
||||
|
||||
const struct sheng *sh = get_sheng(n);
|
||||
u8 s = *(const u8 *)q->state;
|
||||
DEBUG_PRINTF("checking accepts for %u\n", (u8)(s & SHENG_STATE_MASK));
|
||||
|
||||
const struct sstate_aux *aux = get_aux(sh, s);
|
||||
|
||||
if (!aux->accept) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return shengHasAccept(sh, aux, report);
|
||||
}
|
||||
|
||||
char nfaExecSheng0_inAnyAccept(const struct NFA *n, struct mq *q) {
|
||||
assert(n && q);
|
||||
|
||||
const struct sheng *sh = get_sheng(n);
|
||||
u8 s = *(const u8 *)q->state;
|
||||
DEBUG_PRINTF("checking accepts for %u\n", (u8)(s & SHENG_STATE_MASK));
|
||||
|
||||
const struct sstate_aux *aux = get_aux(sh, s);
|
||||
return !!aux->accept;
|
||||
}
|
||||
|
||||
char nfaExecSheng0_testEOD(const struct NFA *nfa, const char *state,
|
||||
UNUSED const char *streamState, u64a offset,
|
||||
NfaCallback cb, void *ctxt) {
|
||||
assert(nfa);
|
||||
|
||||
const struct sheng *sh = get_sheng(nfa);
|
||||
u8 s = *(const u8 *)state;
|
||||
DEBUG_PRINTF("checking EOD accepts for %u\n", (u8)(s & SHENG_STATE_MASK));
|
||||
|
||||
const struct sstate_aux *aux = get_aux(sh, s);
|
||||
|
||||
if (!aux->accept_eod) {
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
return fireReports(sh, cb, ctxt, s, offset, NULL, NULL, 1);
|
||||
}
|
||||
|
||||
char nfaExecSheng0_reportCurrent(const struct NFA *n, struct mq *q) {
|
||||
const struct sheng *sh = (const struct sheng *)getImplNfa(n);
|
||||
NfaCallback cb = q->cb;
|
||||
void *ctxt = q->context;
|
||||
u8 s = *(u8 *)q->state;
|
||||
const struct sstate_aux *aux = get_aux(sh, s);
|
||||
u64a offset = q_cur_offset(q);
|
||||
u8 cached_state_id = 0;
|
||||
ReportID cached_report_id = 0;
|
||||
assert(q_cur_type(q) == MQE_START);
|
||||
|
||||
if (aux->accept) {
|
||||
if (sh->flags & SHENG_FLAG_SINGLE_REPORT) {
|
||||
fireSingleReport(cb, ctxt, sh->report, offset);
|
||||
} else {
|
||||
fireReports(sh, cb, ctxt, s, offset, &cached_state_id,
|
||||
&cached_report_id, 1);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
char nfaExecSheng0_initCompressedState(const struct NFA *nfa, u64a offset,
|
||||
void *state, UNUSED u8 key) {
|
||||
const struct sheng *sh = get_sheng(nfa);
|
||||
u8 *s = (u8 *)state;
|
||||
*s = offset ? sh->floating: sh->anchored;
|
||||
return !(*s & SHENG_STATE_DEAD);
|
||||
}
|
||||
|
||||
char nfaExecSheng0_queueInitState(const struct NFA *nfa, struct mq *q) {
|
||||
assert(nfa->scratchStateSize == 1);
|
||||
|
||||
/* starting in floating state */
|
||||
const struct sheng *sh = get_sheng(nfa);
|
||||
*(u8 *)q->state = sh->floating;
|
||||
DEBUG_PRINTF("starting in floating state\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
char nfaExecSheng0_queueCompressState(UNUSED const struct NFA *nfa,
|
||||
const struct mq *q, UNUSED s64a loc) {
|
||||
void *dest = q->streamState;
|
||||
const void *src = q->state;
|
||||
assert(nfa->scratchStateSize == 1);
|
||||
assert(nfa->streamStateSize == 1);
|
||||
*(u8 *)dest = *(const u8 *)src;
|
||||
return 0;
|
||||
}
|
||||
|
||||
char nfaExecSheng0_expandState(UNUSED const struct NFA *nfa, void *dest,
|
||||
const void *src, UNUSED u64a offset,
|
||||
UNUSED u8 key) {
|
||||
assert(nfa->scratchStateSize == 1);
|
||||
assert(nfa->streamStateSize == 1);
|
||||
*(u8 *)dest = *(const u8 *)src;
|
||||
return 0;
|
||||
}
|
61
src/nfa/sheng.h
Normal file
61
src/nfa/sheng.h
Normal file
@ -0,0 +1,61 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef SHENG_H_
|
||||
#define SHENG_H_
|
||||
|
||||
#include "callback.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
struct mq;
|
||||
struct NFA;
|
||||
|
||||
#define nfaExecSheng0_B_Reverse NFA_API_NO_IMPL
|
||||
#define nfaExecSheng0_zombie_status NFA_API_ZOMBIE_NO_IMPL
|
||||
|
||||
char nfaExecSheng0_Q(const struct NFA *n, struct mq *q, s64a end);
|
||||
char nfaExecSheng0_Q2(const struct NFA *n, struct mq *q, s64a end);
|
||||
char nfaExecSheng0_QR(const struct NFA *n, struct mq *q, ReportID report);
|
||||
char nfaExecSheng0_inAccept(const struct NFA *n, ReportID report, struct mq *q);
|
||||
char nfaExecSheng0_inAnyAccept(const struct NFA *n, struct mq *q);
|
||||
char nfaExecSheng0_queueInitState(const struct NFA *nfa, struct mq *q);
|
||||
char nfaExecSheng0_queueCompressState(const struct NFA *nfa, const struct mq *q,
|
||||
s64a loc);
|
||||
char nfaExecSheng0_expandState(const struct NFA *nfa, void *dest,
|
||||
const void *src, u64a offset, u8 key);
|
||||
char nfaExecSheng0_initCompressedState(const struct NFA *nfa, u64a offset,
|
||||
void *state, u8 key);
|
||||
char nfaExecSheng0_testEOD(const struct NFA *nfa, const char *state,
|
||||
const char *streamState, u64a offset,
|
||||
NfaCallback callback, void *context);
|
||||
char nfaExecSheng0_reportCurrent(const struct NFA *n, struct mq *q);
|
||||
|
||||
char nfaExecSheng0_B(const struct NFA *n, u64a offset, const u8 *buffer,
|
||||
size_t length, NfaCallback cb, void *context);
|
||||
|
||||
#endif /* SHENG_H_ */
|
353
src/nfa/sheng_defs.h
Normal file
353
src/nfa/sheng_defs.h
Normal file
@ -0,0 +1,353 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef SHENG_DEFS_H
|
||||
#define SHENG_DEFS_H
|
||||
|
||||
/*
|
||||
* Utility functions used by various versions of Sheng engine
|
||||
*/
|
||||
static really_inline
|
||||
u8 isDeadState(const u8 a) {
|
||||
return a & SHENG_STATE_DEAD;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u8 isAcceptState(const u8 a) {
|
||||
return a & SHENG_STATE_ACCEPT;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u8 isAccelState(const u8 a) {
|
||||
return a & SHENG_STATE_ACCEL;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u8 hasInterestingStates(const u8 a, const u8 b, const u8 c, const u8 d) {
|
||||
return (a | b | c | d) & (SHENG_STATE_FLAG_MASK);
|
||||
}
|
||||
|
||||
/* these functions should be optimized out, used by NO_MATCHES mode */
|
||||
static really_inline
|
||||
u8 dummyFunc4(UNUSED const u8 a, UNUSED const u8 b, UNUSED const u8 c,
|
||||
UNUSED const u8 d) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u8 dummyFunc(UNUSED const u8 a) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Sheng function definitions for single byte loops
|
||||
*/
|
||||
/* callback output, can die */
|
||||
#define SHENG_IMPL sheng_cod
|
||||
#define DEAD_FUNC isDeadState
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#define STOP_AT_MATCH 0
|
||||
#include "sheng_impl.h"
|
||||
#undef SHENG_IMPL
|
||||
#undef DEAD_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#undef STOP_AT_MATCH
|
||||
|
||||
/* callback output, can't die */
|
||||
#define SHENG_IMPL sheng_co
|
||||
#define DEAD_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#define STOP_AT_MATCH 0
|
||||
#include "sheng_impl.h"
|
||||
#undef SHENG_IMPL
|
||||
#undef DEAD_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#undef STOP_AT_MATCH
|
||||
|
||||
/* stop at match, can die */
|
||||
#define SHENG_IMPL sheng_samd
|
||||
#define DEAD_FUNC isDeadState
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#define STOP_AT_MATCH 1
|
||||
#include "sheng_impl.h"
|
||||
#undef SHENG_IMPL
|
||||
#undef DEAD_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#undef STOP_AT_MATCH
|
||||
|
||||
/* stop at match, can't die */
|
||||
#define SHENG_IMPL sheng_sam
|
||||
#define DEAD_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#define STOP_AT_MATCH 1
|
||||
#include "sheng_impl.h"
|
||||
#undef SHENG_IMPL
|
||||
#undef DEAD_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#undef STOP_AT_MATCH
|
||||
|
||||
/* no match, can die */
|
||||
#define SHENG_IMPL sheng_nmd
|
||||
#define DEAD_FUNC isDeadState
|
||||
#define ACCEPT_FUNC dummyFunc
|
||||
#define STOP_AT_MATCH 0
|
||||
#include "sheng_impl.h"
|
||||
#undef SHENG_IMPL
|
||||
#undef DEAD_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#undef STOP_AT_MATCH
|
||||
|
||||
/* no match, can't die */
|
||||
#define SHENG_IMPL sheng_nm
|
||||
#define DEAD_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC dummyFunc
|
||||
#define STOP_AT_MATCH 0
|
||||
#include "sheng_impl.h"
|
||||
#undef SHENG_IMPL
|
||||
#undef DEAD_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#undef STOP_AT_MATCH
|
||||
|
||||
/*
|
||||
* Sheng function definitions for 4-byte loops
|
||||
*/
|
||||
/* callback output, can die, accelerated */
|
||||
#define SHENG_IMPL sheng4_coda
|
||||
#define INTERESTING_FUNC hasInterestingStates
|
||||
#define INNER_DEAD_FUNC isDeadState
|
||||
#define OUTER_DEAD_FUNC dummyFunc
|
||||
#define INNER_ACCEL_FUNC isAccelState
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#define STOP_AT_MATCH 0
|
||||
#include "sheng_impl4.h"
|
||||
#undef SHENG_IMPL
|
||||
#undef INTERESTING_FUNC
|
||||
#undef INNER_DEAD_FUNC
|
||||
#undef OUTER_DEAD_FUNC
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#undef STOP_AT_MATCH
|
||||
|
||||
/* callback output, can die, not accelerated */
|
||||
#define SHENG_IMPL sheng4_cod
|
||||
#define INTERESTING_FUNC hasInterestingStates
|
||||
#define INNER_DEAD_FUNC isDeadState
|
||||
#define OUTER_DEAD_FUNC dummyFunc
|
||||
#define INNER_ACCEL_FUNC dummyFunc
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#define STOP_AT_MATCH 0
|
||||
#include "sheng_impl4.h"
|
||||
#undef SHENG_IMPL
|
||||
#undef INTERESTING_FUNC
|
||||
#undef INNER_DEAD_FUNC
|
||||
#undef OUTER_DEAD_FUNC
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#undef STOP_AT_MATCH
|
||||
|
||||
/* callback output, can't die, accelerated */
|
||||
#define SHENG_IMPL sheng4_coa
|
||||
#define INTERESTING_FUNC hasInterestingStates
|
||||
#define INNER_DEAD_FUNC dummyFunc
|
||||
#define OUTER_DEAD_FUNC dummyFunc
|
||||
#define INNER_ACCEL_FUNC isAccelState
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#define STOP_AT_MATCH 0
|
||||
#include "sheng_impl4.h"
|
||||
#undef SHENG_IMPL
|
||||
#undef INTERESTING_FUNC
|
||||
#undef INNER_DEAD_FUNC
|
||||
#undef OUTER_DEAD_FUNC
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#undef STOP_AT_MATCH
|
||||
|
||||
/* callback output, can't die, not accelerated */
|
||||
#define SHENG_IMPL sheng4_co
|
||||
#define INTERESTING_FUNC hasInterestingStates
|
||||
#define INNER_DEAD_FUNC dummyFunc
|
||||
#define OUTER_DEAD_FUNC dummyFunc
|
||||
#define INNER_ACCEL_FUNC dummyFunc
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#define STOP_AT_MATCH 0
|
||||
#include "sheng_impl4.h"
|
||||
#undef SHENG_IMPL
|
||||
#undef INTERESTING_FUNC
|
||||
#undef INNER_DEAD_FUNC
|
||||
#undef OUTER_DEAD_FUNC
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#undef STOP_AT_MATCH
|
||||
|
||||
/* stop at match, can die, accelerated */
|
||||
#define SHENG_IMPL sheng4_samda
|
||||
#define INTERESTING_FUNC hasInterestingStates
|
||||
#define INNER_DEAD_FUNC isDeadState
|
||||
#define OUTER_DEAD_FUNC dummyFunc
|
||||
#define INNER_ACCEL_FUNC isAccelState
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#define STOP_AT_MATCH 1
|
||||
#include "sheng_impl4.h"
|
||||
#undef SHENG_IMPL
|
||||
#undef INTERESTING_FUNC
|
||||
#undef INNER_DEAD_FUNC
|
||||
#undef OUTER_DEAD_FUNC
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#undef STOP_AT_MATCH
|
||||
|
||||
/* stop at match, can die, not accelerated */
|
||||
#define SHENG_IMPL sheng4_samd
|
||||
#define INTERESTING_FUNC hasInterestingStates
|
||||
#define INNER_DEAD_FUNC isDeadState
|
||||
#define OUTER_DEAD_FUNC dummyFunc
|
||||
#define INNER_ACCEL_FUNC dummyFunc
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#define STOP_AT_MATCH 1
|
||||
#include "sheng_impl4.h"
|
||||
#undef SHENG_IMPL
|
||||
#undef INTERESTING_FUNC
|
||||
#undef INNER_DEAD_FUNC
|
||||
#undef OUTER_DEAD_FUNC
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#undef STOP_AT_MATCH
|
||||
|
||||
/* stop at match, can't die, accelerated */
|
||||
#define SHENG_IMPL sheng4_sama
|
||||
#define INTERESTING_FUNC hasInterestingStates
|
||||
#define INNER_DEAD_FUNC dummyFunc
|
||||
#define OUTER_DEAD_FUNC dummyFunc
|
||||
#define INNER_ACCEL_FUNC isAccelState
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#define STOP_AT_MATCH 1
|
||||
#include "sheng_impl4.h"
|
||||
#undef SHENG_IMPL
|
||||
#undef INTERESTING_FUNC
|
||||
#undef INNER_DEAD_FUNC
|
||||
#undef OUTER_DEAD_FUNC
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#undef STOP_AT_MATCH
|
||||
|
||||
/* stop at match, can't die, not accelerated */
|
||||
#define SHENG_IMPL sheng4_sam
|
||||
#define INTERESTING_FUNC hasInterestingStates
|
||||
#define INNER_DEAD_FUNC dummyFunc
|
||||
#define OUTER_DEAD_FUNC dummyFunc
|
||||
#define INNER_ACCEL_FUNC dummyFunc
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#define STOP_AT_MATCH 1
|
||||
#include "sheng_impl4.h"
|
||||
#undef SHENG_IMPL
|
||||
#undef INTERESTING_FUNC
|
||||
#undef INNER_DEAD_FUNC
|
||||
#undef OUTER_DEAD_FUNC
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#undef STOP_AT_MATCH
|
||||
|
||||
/* no-match have interesting func as dummy, and die/accel checks are outer */
|
||||
|
||||
/* no match, can die, accelerated */
|
||||
#define SHENG_IMPL sheng4_nmda
|
||||
#define INTERESTING_FUNC dummyFunc4
|
||||
#define INNER_DEAD_FUNC dummyFunc
|
||||
#define OUTER_DEAD_FUNC isDeadState
|
||||
#define INNER_ACCEL_FUNC dummyFunc
|
||||
#define OUTER_ACCEL_FUNC isAccelState
|
||||
#define ACCEPT_FUNC dummyFunc
|
||||
#define STOP_AT_MATCH 0
|
||||
#include "sheng_impl4.h"
|
||||
#undef SHENG_IMPL
|
||||
#undef INTERESTING_FUNC
|
||||
#undef INNER_DEAD_FUNC
|
||||
#undef OUTER_DEAD_FUNC
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#undef STOP_AT_MATCH
|
||||
|
||||
/* no match, can die, not accelerated */
|
||||
#define SHENG_IMPL sheng4_nmd
|
||||
#define INTERESTING_FUNC dummyFunc4
|
||||
#define INNER_DEAD_FUNC dummyFunc
|
||||
#define OUTER_DEAD_FUNC isDeadState
|
||||
#define INNER_ACCEL_FUNC dummyFunc
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC dummyFunc
|
||||
#define STOP_AT_MATCH 0
|
||||
#include "sheng_impl4.h"
|
||||
#undef SHENG_IMPL
|
||||
#undef INTERESTING_FUNC
|
||||
#undef INNER_DEAD_FUNC
|
||||
#undef OUTER_DEAD_FUNC
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#undef STOP_AT_MATCH
|
||||
|
||||
/* there is no performance benefit in accelerating a no-match case that can't
|
||||
* die */
|
||||
|
||||
/* no match, can't die */
|
||||
#define SHENG_IMPL sheng4_nm
|
||||
#define INTERESTING_FUNC dummyFunc4
|
||||
#define INNER_DEAD_FUNC dummyFunc
|
||||
#define OUTER_DEAD_FUNC dummyFunc
|
||||
#define INNER_ACCEL_FUNC dummyFunc
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC dummyFunc
|
||||
#define STOP_AT_MATCH 0
|
||||
#include "sheng_impl4.h"
|
||||
#undef SHENG_IMPL
|
||||
#undef INTERESTING_FUNC
|
||||
#undef INNER_DEAD_FUNC
|
||||
#undef OUTER_DEAD_FUNC
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#undef STOP_AT_MATCH
|
||||
|
||||
#endif // SHENG_DEFS_H
|
97
src/nfa/sheng_impl.h
Normal file
97
src/nfa/sheng_impl.h
Normal file
@ -0,0 +1,97 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* In order to use this macro, the following things need to be defined:
|
||||
*
|
||||
* - SHENG_IMPL (name of the Sheng implementation function)
|
||||
* - DEAD_FUNC (name of the function checking for dead states)
|
||||
* - ACCEPT_FUNC (name of the function checking for accept state)
|
||||
* - STOP_AT_MATCH (can be 1 or 0, enable or disable stop at match)
|
||||
*/
|
||||
|
||||
/* byte-by-byte version. we don't do byte-by-byte death checking as it's
|
||||
* pretty pointless to do it over a buffer that's at most 3 bytes long */
|
||||
static really_inline
|
||||
char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s,
|
||||
u8 *const cached_accept_state, ReportID *const cached_accept_id,
|
||||
u8 single, u64a base_offset, const u8 *buf, const u8 *start,
|
||||
const u8 *end, const u8 **scan_end) {
|
||||
DEBUG_PRINTF("Starting DFA execution in state %u\n",
|
||||
*state & SHENG_STATE_MASK);
|
||||
const u8 *cur_buf = start;
|
||||
if (DEAD_FUNC(*state)) {
|
||||
DEBUG_PRINTF("Dead on arrival\n");
|
||||
*scan_end = end;
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start));
|
||||
|
||||
m128 cur_state = set16x8(*state);
|
||||
const m128 *masks = s->shuffle_masks;
|
||||
|
||||
while (likely(cur_buf != end)) {
|
||||
const u8 c = *cur_buf;
|
||||
const m128 shuffle_mask = masks[c];
|
||||
cur_state = pshufb(shuffle_mask, cur_state);
|
||||
const u8 tmp = movd(cur_state);
|
||||
|
||||
DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?');
|
||||
DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", tmp, (tmp & 0xF0) >> 4,
|
||||
tmp & 0xF);
|
||||
|
||||
if (unlikely(ACCEPT_FUNC(tmp))) {
|
||||
DEBUG_PRINTF("Accept state %u reached\n", tmp & SHENG_STATE_MASK);
|
||||
u64a match_offset = base_offset + (cur_buf - buf) + 1;
|
||||
DEBUG_PRINTF("Match @ %llu\n", match_offset);
|
||||
if (STOP_AT_MATCH) {
|
||||
DEBUG_PRINTF("Stopping at match @ %lli\n",
|
||||
(u64a)(cur_buf - start));
|
||||
*state = tmp;
|
||||
*scan_end = cur_buf;
|
||||
return MO_MATCHES_PENDING;
|
||||
}
|
||||
if (single) {
|
||||
if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
|
||||
MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
} else {
|
||||
if (fireReports(s, cb, ctxt, tmp, match_offset,
|
||||
cached_accept_state, cached_accept_id,
|
||||
0) == MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
}
|
||||
}
|
||||
cur_buf++;
|
||||
}
|
||||
*state = movd(cur_state);
|
||||
*scan_end = cur_buf;
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
284
src/nfa/sheng_impl4.h
Normal file
284
src/nfa/sheng_impl4.h
Normal file
@ -0,0 +1,284 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* In order to use this macro, the following things need to be defined:
|
||||
*
|
||||
* - SHENG_IMPL (name of the Sheng implementation function)
|
||||
* - INTERESTING_FUNC (name of the function checking for accept, accel or dead
|
||||
* states)
|
||||
* - INNER_DEAD_FUNC (name of the inner function checking for dead states)
|
||||
* - OUTER_DEAD_FUNC (name of the outer function checking for dead states)
|
||||
* - INNER_ACCEL_FUNC (name of the inner function checking for accel states)
|
||||
* - OUTER_ACCEL_FUNC (name of the outer function checking for accel states)
|
||||
* - ACCEPT_FUNC (name of the function checking for accept state)
|
||||
* - STOP_AT_MATCH (can be 1 or 0, enable or disable stop at match)
|
||||
*/
|
||||
|
||||
/* unrolled 4-byte-at-a-time version.
|
||||
*
|
||||
* we put innerDeadFunc inside interestingFunc() block so that we don't pay for
|
||||
* dead states checking. however, if interestingFunc is dummy, innerDeadFunc
|
||||
* gets lost with it, so we need an additional check outside the
|
||||
* interestingFunc() branch - it's normally dummy so we don't pay for it, but
|
||||
* when interestingFunc is dummy, outerDeadFunc should be set if we want to
|
||||
* check for dead states.
|
||||
*
|
||||
* also, deadFunc only checks the last known state, but since we can't ever get
|
||||
* out of the dead state and we don't really care where we died, it's not a
|
||||
* problem.
|
||||
*/
|
||||
static really_inline
|
||||
char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s,
|
||||
u8 *const cached_accept_state, ReportID *const cached_accept_id,
|
||||
u8 single, u64a base_offset, const u8 *buf, const u8 *start,
|
||||
const u8 *end, const u8 **scan_end) {
|
||||
DEBUG_PRINTF("Starting DFAx4 execution in state %u\n",
|
||||
*state & SHENG_STATE_MASK);
|
||||
const u8 *cur_buf = start;
|
||||
const u8 *min_accel_dist = start;
|
||||
base_offset++;
|
||||
DEBUG_PRINTF("Scanning %llu bytes\n", (u64a)(end - start));
|
||||
|
||||
if (INNER_ACCEL_FUNC(*state) || OUTER_ACCEL_FUNC(*state)) {
|
||||
DEBUG_PRINTF("Accel state reached @ 0\n");
|
||||
const union AccelAux *aaux = get_accel(s, *state & SHENG_STATE_MASK);
|
||||
const u8 *new_offset = run_accel(aaux, cur_buf, end);
|
||||
if (new_offset < cur_buf + BAD_ACCEL_DIST) {
|
||||
min_accel_dist = new_offset + BIG_ACCEL_PENALTY;
|
||||
} else {
|
||||
min_accel_dist = new_offset + SMALL_ACCEL_PENALTY;
|
||||
}
|
||||
DEBUG_PRINTF("Next accel chance: %llu\n",
|
||||
(u64a)(min_accel_dist - start));
|
||||
DEBUG_PRINTF("Accel scanned %zu bytes\n", new_offset - cur_buf);
|
||||
cur_buf = new_offset;
|
||||
DEBUG_PRINTF("New offset: %lli\n", (s64a)(cur_buf - start));
|
||||
}
|
||||
if (INNER_DEAD_FUNC(*state) || OUTER_DEAD_FUNC(*state)) {
|
||||
DEBUG_PRINTF("Dead on arrival\n");
|
||||
*scan_end = end;
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
m128 cur_state = set16x8(*state);
|
||||
const m128 *masks = s->shuffle_masks;
|
||||
|
||||
while (likely(end - cur_buf >= 4)) {
|
||||
const u8 *b1 = cur_buf;
|
||||
const u8 *b2 = cur_buf + 1;
|
||||
const u8 *b3 = cur_buf + 2;
|
||||
const u8 *b4 = cur_buf + 3;
|
||||
const u8 c1 = *b1;
|
||||
const u8 c2 = *b2;
|
||||
const u8 c3 = *b3;
|
||||
const u8 c4 = *b4;
|
||||
|
||||
const m128 shuffle_mask1 = masks[c1];
|
||||
cur_state = pshufb(shuffle_mask1, cur_state);
|
||||
const u8 a1 = movd(cur_state);
|
||||
|
||||
const m128 shuffle_mask2 = masks[c2];
|
||||
cur_state = pshufb(shuffle_mask2, cur_state);
|
||||
const u8 a2 = movd(cur_state);
|
||||
|
||||
const m128 shuffle_mask3 = masks[c3];
|
||||
cur_state = pshufb(shuffle_mask3, cur_state);
|
||||
const u8 a3 = movd(cur_state);
|
||||
|
||||
const m128 shuffle_mask4 = masks[c4];
|
||||
cur_state = pshufb(shuffle_mask4, cur_state);
|
||||
const u8 a4 = movd(cur_state);
|
||||
|
||||
DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?');
|
||||
DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", a1, (a1 & 0xF0) >> 4, a1 & 0xF);
|
||||
|
||||
DEBUG_PRINTF("c: %02hhx '%c'\n", c2, ourisprint(c2) ? c2 : '?');
|
||||
DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", a2, (a2 & 0xF0) >> 4, a2 & 0xF);
|
||||
|
||||
DEBUG_PRINTF("c: %02hhx '%c'\n", c3, ourisprint(c3) ? c3 : '?');
|
||||
DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", a3, (a3 & 0xF0) >> 4, a3 & 0xF);
|
||||
|
||||
DEBUG_PRINTF("c: %02hhx '%c'\n", c4, ourisprint(c4) ? c4 : '?');
|
||||
DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", a4, (a4 & 0xF0) >> 4, a4 & 0xF);
|
||||
|
||||
if (unlikely(INTERESTING_FUNC(a1, a2, a3, a4))) {
|
||||
if (ACCEPT_FUNC(a1)) {
|
||||
u64a match_offset = base_offset + b1 - buf;
|
||||
DEBUG_PRINTF("Accept state %u reached\n",
|
||||
a1 & SHENG_STATE_MASK);
|
||||
DEBUG_PRINTF("Match @ %llu\n", match_offset);
|
||||
if (STOP_AT_MATCH) {
|
||||
DEBUG_PRINTF("Stopping at match @ %lli\n",
|
||||
(s64a)(b1 - start));
|
||||
*scan_end = b1;
|
||||
*state = a1;
|
||||
return MO_MATCHES_PENDING;
|
||||
}
|
||||
if (single) {
|
||||
if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
|
||||
MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
} else {
|
||||
if (fireReports(s, cb, ctxt, a1, match_offset,
|
||||
cached_accept_state, cached_accept_id,
|
||||
0) == MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (ACCEPT_FUNC(a2)) {
|
||||
u64a match_offset = base_offset + b2 - buf;
|
||||
DEBUG_PRINTF("Accept state %u reached\n",
|
||||
a2 & SHENG_STATE_MASK);
|
||||
DEBUG_PRINTF("Match @ %llu\n", match_offset);
|
||||
if (STOP_AT_MATCH) {
|
||||
DEBUG_PRINTF("Stopping at match @ %lli\n",
|
||||
(s64a)(b2 - start));
|
||||
*scan_end = b2;
|
||||
*state = a2;
|
||||
return MO_MATCHES_PENDING;
|
||||
}
|
||||
if (single) {
|
||||
if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
|
||||
MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
} else {
|
||||
if (fireReports(s, cb, ctxt, a2, match_offset,
|
||||
cached_accept_state, cached_accept_id,
|
||||
0) == MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (ACCEPT_FUNC(a3)) {
|
||||
u64a match_offset = base_offset + b3 - buf;
|
||||
DEBUG_PRINTF("Accept state %u reached\n",
|
||||
a3 & SHENG_STATE_MASK);
|
||||
DEBUG_PRINTF("Match @ %llu\n", match_offset);
|
||||
if (STOP_AT_MATCH) {
|
||||
DEBUG_PRINTF("Stopping at match @ %lli\n",
|
||||
(s64a)(b3 - start));
|
||||
*scan_end = b3;
|
||||
*state = a3;
|
||||
return MO_MATCHES_PENDING;
|
||||
}
|
||||
if (single) {
|
||||
if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
|
||||
MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
} else {
|
||||
if (fireReports(s, cb, ctxt, a3, match_offset,
|
||||
cached_accept_state, cached_accept_id,
|
||||
0) == MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (ACCEPT_FUNC(a4)) {
|
||||
u64a match_offset = base_offset + b4 - buf;
|
||||
DEBUG_PRINTF("Accept state %u reached\n",
|
||||
a4 & SHENG_STATE_MASK);
|
||||
DEBUG_PRINTF("Match @ %llu\n", match_offset);
|
||||
if (STOP_AT_MATCH) {
|
||||
DEBUG_PRINTF("Stopping at match @ %lli\n",
|
||||
(s64a)(b4 - start));
|
||||
*scan_end = b4;
|
||||
*state = a4;
|
||||
return MO_MATCHES_PENDING;
|
||||
}
|
||||
if (single) {
|
||||
if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
|
||||
MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
} else {
|
||||
if (fireReports(s, cb, ctxt, a4, match_offset,
|
||||
cached_accept_state, cached_accept_id,
|
||||
0) == MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (INNER_DEAD_FUNC(a4)) {
|
||||
DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(b4 - buf));
|
||||
*scan_end = end;
|
||||
*state = a4;
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
if (cur_buf > min_accel_dist && INNER_ACCEL_FUNC(a4)) {
|
||||
DEBUG_PRINTF("Accel state reached @ %lli\n", (s64a)(b4 - buf));
|
||||
const union AccelAux *aaux =
|
||||
get_accel(s, a4 & SHENG_STATE_MASK);
|
||||
const u8 *new_offset = run_accel(aaux, cur_buf + 4, end);
|
||||
if (new_offset < cur_buf + 4 + BAD_ACCEL_DIST) {
|
||||
min_accel_dist = new_offset + BIG_ACCEL_PENALTY;
|
||||
} else {
|
||||
min_accel_dist = new_offset + SMALL_ACCEL_PENALTY;
|
||||
}
|
||||
DEBUG_PRINTF("Next accel chance: %llu\n",
|
||||
(u64a)(min_accel_dist - start));
|
||||
DEBUG_PRINTF("Accel scanned %llu bytes\n",
|
||||
(u64a)(new_offset - cur_buf - 4));
|
||||
cur_buf = new_offset;
|
||||
DEBUG_PRINTF("New offset: %llu\n", (u64a)(cur_buf - buf));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (OUTER_DEAD_FUNC(a4)) {
|
||||
DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(cur_buf - buf));
|
||||
*scan_end = end;
|
||||
*state = a4;
|
||||
return MO_CONTINUE_MATCHING;
|
||||
};
|
||||
if (cur_buf > min_accel_dist && OUTER_ACCEL_FUNC(a4)) {
|
||||
DEBUG_PRINTF("Accel state reached @ %lli\n", (s64a)(b4 - buf));
|
||||
const union AccelAux *aaux = get_accel(s, a4 & SHENG_STATE_MASK);
|
||||
const u8 *new_offset = run_accel(aaux, cur_buf + 4, end);
|
||||
if (new_offset < cur_buf + 4 + BAD_ACCEL_DIST) {
|
||||
min_accel_dist = new_offset + BIG_ACCEL_PENALTY;
|
||||
} else {
|
||||
min_accel_dist = new_offset + SMALL_ACCEL_PENALTY;
|
||||
}
|
||||
DEBUG_PRINTF("Next accel chance: %llu\n",
|
||||
(u64a)(min_accel_dist - start));
|
||||
DEBUG_PRINTF("Accel scanned %llu bytes\n",
|
||||
(u64a)(new_offset - cur_buf - 4));
|
||||
cur_buf = new_offset;
|
||||
DEBUG_PRINTF("New offset: %llu\n", (u64a)(cur_buf - buf));
|
||||
continue;
|
||||
};
|
||||
cur_buf += 4;
|
||||
}
|
||||
*state = movd(cur_state);
|
||||
*scan_end = cur_buf;
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
70
src/nfa/sheng_internal.h
Normal file
70
src/nfa/sheng_internal.h
Normal file
@ -0,0 +1,70 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef SHENG_INTERNAL_H_
|
||||
#define SHENG_INTERNAL_H_
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "util/simd_utils.h"
|
||||
|
||||
#define SHENG_STATE_ACCEPT 0x10
|
||||
#define SHENG_STATE_DEAD 0x20
|
||||
#define SHENG_STATE_ACCEL 0x40
|
||||
#define SHENG_STATE_MASK 0xF
|
||||
#define SHENG_STATE_FLAG_MASK 0x70
|
||||
|
||||
#define SHENG_FLAG_SINGLE_REPORT 0x1
|
||||
#define SHENG_FLAG_CAN_DIE 0x2
|
||||
#define SHENG_FLAG_HAS_ACCEL 0x4
|
||||
|
||||
struct report_list {
|
||||
u32 count;
|
||||
ReportID report[];
|
||||
};
|
||||
|
||||
struct sstate_aux {
|
||||
u32 accept;
|
||||
u32 accept_eod;
|
||||
u32 accel;
|
||||
u32 top;
|
||||
};
|
||||
|
||||
struct sheng {
|
||||
m128 shuffle_masks[256];
|
||||
u32 length;
|
||||
u32 aux_offset;
|
||||
u32 report_offset;
|
||||
u32 accel_offset;
|
||||
u8 n_states;
|
||||
u8 anchored;
|
||||
u8 floating;
|
||||
u8 flags;
|
||||
ReportID report;
|
||||
};
|
||||
|
||||
#endif /* SHENG_INTERNAL_H_ */
|
541
src/nfa/shengcompile.cpp
Normal file
541
src/nfa/shengcompile.cpp
Normal file
@ -0,0 +1,541 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "shengcompile.h"
|
||||
|
||||
#include "accel.h"
|
||||
#include "accelcompile.h"
|
||||
#include "shufticompile.h"
|
||||
#include "trufflecompile.h"
|
||||
#include "util/alloc.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/charreach.h"
|
||||
#include "util/compare.h"
|
||||
#include "util/container.h"
|
||||
#include "util/order_check.h"
|
||||
#include "util/report_manager.h"
|
||||
#include "util/unaligned.h"
|
||||
|
||||
#include "grey.h"
|
||||
#include "nfa_internal.h"
|
||||
#include "sheng_internal.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/compile_context.h"
|
||||
#include "util/make_unique.h"
|
||||
#include "util/verify_types.h"
|
||||
#include "util/simd_utils.h"
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <sstream>
|
||||
|
||||
#include <boost/range/adaptor/map.hpp>
|
||||
|
||||
using namespace std;
|
||||
using boost::adaptors::map_keys;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
#define ACCEL_DFA_MAX_OFFSET_DEPTH 4
|
||||
|
||||
/** Maximum tolerated number of escape character from an accel state.
|
||||
* This is larger than nfa, as we don't have a budget and the nfa cheats on stop
|
||||
* characters for sets of states */
|
||||
#define ACCEL_DFA_MAX_STOP_CHAR 160
|
||||
|
||||
/** Maximum tolerated number of escape character from a sds accel state. Larger
|
||||
* than normal states as accelerating sds is important. Matches NFA value */
|
||||
#define ACCEL_DFA_MAX_FLOATING_STOP_CHAR 192
|
||||
|
||||
struct dfa_info {
|
||||
accel_dfa_build_strat &strat;
|
||||
raw_dfa &raw;
|
||||
vector<dstate> &states;
|
||||
dstate &floating;
|
||||
dstate &anchored;
|
||||
bool can_die;
|
||||
|
||||
explicit dfa_info(accel_dfa_build_strat &s)
|
||||
: strat(s), raw(strat.get_raw()), states(raw.states),
|
||||
floating(states[raw.start_floating]),
|
||||
anchored(states[raw.start_anchored]), can_die(dfaCanDie(raw)) {}
|
||||
|
||||
// returns adjusted size
|
||||
size_t size() const {
|
||||
return can_die ? states.size() : states.size() - 1;
|
||||
}
|
||||
// expects adjusted index
|
||||
dstate &operator[](dstate_id_t idx) {
|
||||
return states[raw_id(idx)];
|
||||
}
|
||||
dstate &top(dstate_id_t idx) {
|
||||
if (isDead(idx)) {
|
||||
return floating;
|
||||
}
|
||||
return next(idx, TOP);
|
||||
}
|
||||
dstate &next(dstate_id_t idx, u16 chr) {
|
||||
auto &src = (*this)[idx];
|
||||
auto next_id = src.next[raw.alpha_remap[chr]];
|
||||
return states[next_id];
|
||||
}
|
||||
// get original idx from adjusted idx
|
||||
dstate_id_t raw_id(dstate_id_t idx) {
|
||||
assert(idx < size());
|
||||
// if DFA can't die, shift all indices left by 1
|
||||
return can_die ? idx : idx + 1;
|
||||
}
|
||||
bool isDead(dstate &state) {
|
||||
return raw_id(state.impl_id) == DEAD_STATE;
|
||||
}
|
||||
bool isDead(dstate_id_t idx) {
|
||||
return raw_id(idx) == DEAD_STATE;
|
||||
}
|
||||
|
||||
private:
|
||||
static bool dfaCanDie(raw_dfa &rdfa) {
|
||||
for (unsigned chr = 0; chr < 256; chr++) {
|
||||
for (dstate_id_t state = 0; state < rdfa.states.size(); state++) {
|
||||
auto succ = rdfa.states[state].next[rdfa.alpha_remap[chr]];
|
||||
if (succ == DEAD_STATE) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
namespace {
|
||||
|
||||
struct raw_report_list {
|
||||
flat_set<ReportID> reports;
|
||||
|
||||
raw_report_list(const flat_set<ReportID> &reports_in,
|
||||
const ReportManager &rm, bool do_remap) {
|
||||
if (do_remap) {
|
||||
for (auto &id : reports_in) {
|
||||
reports.insert(rm.getProgramOffset(id));
|
||||
}
|
||||
} else {
|
||||
reports = reports_in;
|
||||
}
|
||||
}
|
||||
|
||||
bool operator<(const raw_report_list &b) const {
|
||||
return reports < b.reports;
|
||||
}
|
||||
};
|
||||
|
||||
struct raw_report_info_impl : public raw_report_info {
|
||||
vector<raw_report_list> rl;
|
||||
u32 getReportListSize() const override;
|
||||
size_t size() const override;
|
||||
void fillReportLists(NFA *n, size_t base_offset,
|
||||
std::vector<u32> &ro /* out */) const override;
|
||||
};
|
||||
}
|
||||
|
||||
u32 raw_report_info_impl::getReportListSize() const {
|
||||
u32 rv = 0;
|
||||
|
||||
for (const auto &reps : rl) {
|
||||
rv += sizeof(report_list);
|
||||
rv += sizeof(ReportID) * reps.reports.size();
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
size_t raw_report_info_impl::size() const {
|
||||
return rl.size();
|
||||
}
|
||||
|
||||
void raw_report_info_impl::fillReportLists(NFA *n, size_t base_offset,
|
||||
vector<u32> &ro) const {
|
||||
for (const auto &reps : rl) {
|
||||
ro.push_back(base_offset);
|
||||
|
||||
report_list *p = (report_list *)((char *)n + base_offset);
|
||||
|
||||
u32 i = 0;
|
||||
for (const ReportID report : reps.reports) {
|
||||
p->report[i++] = report;
|
||||
}
|
||||
p->count = verify_u32(reps.reports.size());
|
||||
|
||||
base_offset += sizeof(report_list);
|
||||
base_offset += sizeof(ReportID) * reps.reports.size();
|
||||
}
|
||||
}
|
||||
|
||||
unique_ptr<raw_report_info> sheng_build_strat::gatherReports(
|
||||
vector<u32> &reports,
|
||||
vector<u32> &reports_eod,
|
||||
u8 *isSingleReport,
|
||||
ReportID *arbReport) const {
|
||||
DEBUG_PRINTF("gathering reports\n");
|
||||
|
||||
const bool remap_reports = has_managed_reports(rdfa.kind);
|
||||
|
||||
auto ri = ue2::make_unique<raw_report_info_impl>();
|
||||
map<raw_report_list, u32> rev;
|
||||
|
||||
for (const dstate &s : rdfa.states) {
|
||||
if (s.reports.empty()) {
|
||||
reports.push_back(MO_INVALID_IDX);
|
||||
continue;
|
||||
}
|
||||
|
||||
raw_report_list rrl(s.reports, rm, remap_reports);
|
||||
DEBUG_PRINTF("non empty r\n");
|
||||
if (rev.find(rrl) != rev.end()) {
|
||||
reports.push_back(rev[rrl]);
|
||||
} else {
|
||||
DEBUG_PRINTF("adding to rl %zu\n", ri->size());
|
||||
rev[rrl] = ri->size();
|
||||
reports.push_back(ri->size());
|
||||
ri->rl.push_back(rrl);
|
||||
}
|
||||
}
|
||||
|
||||
for (const dstate &s : rdfa.states) {
|
||||
if (s.reports_eod.empty()) {
|
||||
reports_eod.push_back(MO_INVALID_IDX);
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("non empty r eod\n");
|
||||
raw_report_list rrl(s.reports_eod, rm, remap_reports);
|
||||
if (rev.find(rrl) != rev.end()) {
|
||||
reports_eod.push_back(rev[rrl]);
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("adding to rl eod %zu\n", s.reports_eod.size());
|
||||
rev[rrl] = ri->size();
|
||||
reports_eod.push_back(ri->size());
|
||||
ri->rl.push_back(rrl);
|
||||
}
|
||||
|
||||
assert(!ri->rl.empty()); /* all components should be able to generate
|
||||
reports */
|
||||
if (!ri->rl.empty()) {
|
||||
*arbReport = *ri->rl.begin()->reports.begin();
|
||||
} else {
|
||||
*arbReport = 0;
|
||||
}
|
||||
|
||||
/* if we have only a single report id generated from all accepts (not eod)
|
||||
* we can take some short cuts */
|
||||
set<ReportID> reps;
|
||||
|
||||
for (u32 rl_index : reports) {
|
||||
if (rl_index == MO_INVALID_IDX) {
|
||||
continue;
|
||||
}
|
||||
assert(rl_index < ri->size());
|
||||
insert(&reps, ri->rl[rl_index].reports);
|
||||
}
|
||||
|
||||
if (reps.size() == 1) {
|
||||
*isSingleReport = 1;
|
||||
*arbReport = *reps.begin();
|
||||
DEBUG_PRINTF("single -- %u\n", *arbReport);
|
||||
} else {
|
||||
*isSingleReport = 0;
|
||||
}
|
||||
|
||||
return move(ri);
|
||||
}
|
||||
|
||||
u32 sheng_build_strat::max_allowed_offset_accel() const {
|
||||
return ACCEL_DFA_MAX_OFFSET_DEPTH;
|
||||
}
|
||||
|
||||
u32 sheng_build_strat::max_stop_char() const {
|
||||
return ACCEL_DFA_MAX_STOP_CHAR;
|
||||
}
|
||||
|
||||
u32 sheng_build_strat::max_floating_stop_char() const {
|
||||
return ACCEL_DFA_MAX_FLOATING_STOP_CHAR;
|
||||
}
|
||||
|
||||
size_t sheng_build_strat::accelSize() const {
|
||||
return sizeof(AccelAux);
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
static really_inline
|
||||
void dumpShuffleMask(const u8 chr, const u8 *buf, unsigned sz) {
|
||||
stringstream o;
|
||||
|
||||
for (unsigned i = 0; i < sz; i++) {
|
||||
o.width(2);
|
||||
o << (buf[i] & SHENG_STATE_MASK) << " ";
|
||||
}
|
||||
DEBUG_PRINTF("chr %3u: %s\n", chr, o.str().c_str());
|
||||
}
|
||||
#endif
|
||||
|
||||
static
|
||||
void fillAccelOut(const map<dstate_id_t, AccelScheme> &accel_escape_info,
|
||||
set<dstate_id_t> *accel_states) {
|
||||
for (dstate_id_t i : accel_escape_info | map_keys) {
|
||||
accel_states->insert(i);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
u8 getShengState(dstate &state, dfa_info &info,
|
||||
map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||
u8 s = state.impl_id;
|
||||
if (!state.reports.empty()) {
|
||||
s |= SHENG_STATE_ACCEPT;
|
||||
}
|
||||
if (info.isDead(state)) {
|
||||
s |= SHENG_STATE_DEAD;
|
||||
}
|
||||
if (accelInfo.find(info.raw_id(state.impl_id)) != accelInfo.end()) {
|
||||
s |= SHENG_STATE_ACCEL;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
static
|
||||
void fillAccelAux(struct NFA *n, dfa_info &info,
|
||||
map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||
DEBUG_PRINTF("Filling accel aux structures\n");
|
||||
sheng *s = (sheng *)getMutableImplNfa(n);
|
||||
u32 offset = s->accel_offset;
|
||||
|
||||
for (dstate_id_t i = 0; i < info.size(); i++) {
|
||||
dstate_id_t state_id = info.raw_id(i);
|
||||
if (accelInfo.find(state_id) != accelInfo.end()) {
|
||||
s->flags |= SHENG_FLAG_HAS_ACCEL;
|
||||
AccelAux *aux = (AccelAux *)((char *)n + offset);
|
||||
info.strat.buildAccel(state_id, accelInfo[state_id], aux);
|
||||
sstate_aux *saux =
|
||||
(sstate_aux *)((char *)n + s->aux_offset) + state_id;
|
||||
saux->accel = offset;
|
||||
DEBUG_PRINTF("Accel offset: %u\n", offset);
|
||||
offset += ROUNDUP_N(sizeof(AccelAux), alignof(AccelAux));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void populateBasicInfo(struct NFA *n, dfa_info &info,
|
||||
map<dstate_id_t, AccelScheme> &accelInfo, u32 aux_offset,
|
||||
u32 report_offset, u32 accel_offset, u32 total_size,
|
||||
u32 dfa_size) {
|
||||
n->length = total_size;
|
||||
n->scratchStateSize = 1;
|
||||
n->streamStateSize = 1;
|
||||
n->nPositions = info.size();
|
||||
n->type = SHENG_NFA_0;
|
||||
n->flags |= info.raw.hasEodReports() ? NFA_ACCEPTS_EOD : 0;
|
||||
|
||||
sheng *s = (sheng *)getMutableImplNfa(n);
|
||||
s->aux_offset = aux_offset;
|
||||
s->report_offset = report_offset;
|
||||
s->accel_offset = accel_offset;
|
||||
s->n_states = info.size();
|
||||
s->length = dfa_size;
|
||||
s->flags |= info.can_die ? SHENG_FLAG_CAN_DIE : 0;
|
||||
|
||||
s->anchored = getShengState(info.anchored, info, accelInfo);
|
||||
s->floating = getShengState(info.floating, info, accelInfo);
|
||||
}
|
||||
|
||||
static
|
||||
void fillTops(NFA *n, dfa_info &info, dstate_id_t id,
|
||||
map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||
sheng *s = (sheng *)getMutableImplNfa(n);
|
||||
u32 aux_base = s->aux_offset;
|
||||
|
||||
DEBUG_PRINTF("Filling tops for state %u\n", id);
|
||||
|
||||
sstate_aux *aux = (sstate_aux *)((char *)n + aux_base) + id;
|
||||
|
||||
DEBUG_PRINTF("Aux structure for state %u, offset %zd\n", id,
|
||||
(char *)aux - (char *)n);
|
||||
|
||||
/* we could conceivably end up in an accept/dead state on a top event,
|
||||
* so mark top as accept/dead state if it indeed is.
|
||||
*/
|
||||
auto &top_state = info.top(id);
|
||||
|
||||
DEBUG_PRINTF("Top transition for state %u: %u\n", id, top_state.impl_id);
|
||||
|
||||
aux->top = getShengState(top_state, info, accelInfo);
|
||||
}
|
||||
|
||||
static
|
||||
void fillAux(NFA *n, dfa_info &info, dstate_id_t id, vector<u32> &reports,
|
||||
vector<u32> &reports_eod, vector<u32> &report_offsets) {
|
||||
sheng *s = (sheng *)getMutableImplNfa(n);
|
||||
u32 aux_base = s->aux_offset;
|
||||
auto raw_id = info.raw_id(id);
|
||||
|
||||
auto &state = info[id];
|
||||
|
||||
sstate_aux *aux = (sstate_aux *)((char *)n + aux_base) + id;
|
||||
|
||||
DEBUG_PRINTF("Filling aux and report structures for state %u\n", id);
|
||||
DEBUG_PRINTF("Aux structure for state %u, offset %zd\n", id,
|
||||
(char *)aux - (char *)n);
|
||||
|
||||
aux->accept = state.reports.empty() ? 0 : report_offsets[reports[raw_id]];
|
||||
aux->accept_eod =
|
||||
state.reports_eod.empty() ? 0 : report_offsets[reports_eod[raw_id]];
|
||||
|
||||
DEBUG_PRINTF("Report list offset: %u\n", aux->accept);
|
||||
DEBUG_PRINTF("EOD report list offset: %u\n", aux->accept_eod);
|
||||
}
|
||||
|
||||
static
|
||||
void fillSingleReport(NFA *n, ReportID r_id) {
|
||||
sheng *s = (sheng *)getMutableImplNfa(n);
|
||||
|
||||
DEBUG_PRINTF("Single report ID: %u\n", r_id);
|
||||
s->report = r_id;
|
||||
s->flags |= SHENG_FLAG_SINGLE_REPORT;
|
||||
}
|
||||
|
||||
static
|
||||
void createShuffleMasks(sheng *s, dfa_info &info,
|
||||
map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||
for (u16 chr = 0; chr < 256; chr++) {
|
||||
u8 buf[16] = {0};
|
||||
|
||||
for (dstate_id_t idx = 0; idx < info.size(); idx++) {
|
||||
auto &succ_state = info.next(idx, chr);
|
||||
|
||||
buf[idx] = getShengState(succ_state, info, accelInfo);
|
||||
}
|
||||
#ifdef DEBUG
|
||||
dumpShuffleMask(chr, buf, sizeof(buf));
|
||||
#endif
|
||||
m128 mask = loadu128(buf);
|
||||
s->shuffle_masks[chr] = mask;
|
||||
}
|
||||
}
|
||||
|
||||
bool has_accel_sheng(const NFA *nfa) {
|
||||
const sheng *s = (const sheng *)getImplNfa(nfa);
|
||||
return s->flags & SHENG_FLAG_HAS_ACCEL;
|
||||
}
|
||||
|
||||
aligned_unique_ptr<NFA> shengCompile(raw_dfa &raw,
|
||||
const CompileContext &cc,
|
||||
const ReportManager &rm,
|
||||
set<dstate_id_t> *accel_states) {
|
||||
if (!cc.grey.allowSheng) {
|
||||
DEBUG_PRINTF("Sheng is not allowed!\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
sheng_build_strat strat(raw, rm);
|
||||
dfa_info info(strat);
|
||||
|
||||
DEBUG_PRINTF("Trying to compile a %zu state Sheng\n", raw.states.size());
|
||||
|
||||
DEBUG_PRINTF("Anchored start state id: %u, floating start state id: %u\n",
|
||||
raw.start_anchored, raw.start_floating);
|
||||
|
||||
DEBUG_PRINTF("This DFA %s die so effective number of states is %zu\n",
|
||||
info.can_die ? "can" : "cannot", info.size());
|
||||
if (info.size() > 16) {
|
||||
DEBUG_PRINTF("Too many states\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (!cc.streaming) { /* TODO: work out if we can do the strip in streaming
|
||||
* mode with our semantics */
|
||||
raw.stripExtraEodReports();
|
||||
}
|
||||
auto accelInfo = strat.getAccelInfo(cc.grey);
|
||||
|
||||
// set impl_id of each dfa state
|
||||
for (dstate_id_t i = 0; i < info.size(); i++) {
|
||||
info[i].impl_id = i;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("Anchored start state: %u, floating start state: %u\n",
|
||||
info.anchored.impl_id, info.floating.impl_id);
|
||||
|
||||
u32 nfa_size = ROUNDUP_16(sizeof(NFA) + sizeof(sheng));
|
||||
vector<u32> reports, eod_reports, report_offsets;
|
||||
u8 isSingle = 0;
|
||||
ReportID single_report = 0;
|
||||
|
||||
auto ri =
|
||||
strat.gatherReports(reports, eod_reports, &isSingle, &single_report);
|
||||
|
||||
u32 total_aux = sizeof(sstate_aux) * info.size();
|
||||
u32 total_accel = strat.accelSize() * accelInfo.size();
|
||||
u32 total_reports = ri->getReportListSize();
|
||||
|
||||
u32 reports_offset = nfa_size + total_aux;
|
||||
u32 accel_offset =
|
||||
ROUNDUP_N(reports_offset + total_reports, alignof(AccelAux));
|
||||
u32 total_size = ROUNDUP_N(accel_offset + total_accel, 64);
|
||||
|
||||
DEBUG_PRINTF("NFA: %u, aux: %u, reports: %u, accel: %u, total: %u\n",
|
||||
nfa_size, total_aux, total_reports, total_accel, total_size);
|
||||
|
||||
aligned_unique_ptr<NFA> nfa = aligned_zmalloc_unique<NFA>(total_size);
|
||||
|
||||
populateBasicInfo(nfa.get(), info, accelInfo, nfa_size, reports_offset,
|
||||
accel_offset, total_size, total_size - sizeof(NFA));
|
||||
|
||||
DEBUG_PRINTF("Setting up aux and report structures\n");
|
||||
|
||||
ri->fillReportLists(nfa.get(), reports_offset, report_offsets);
|
||||
|
||||
for (dstate_id_t idx = 0; idx < info.size(); idx++) {
|
||||
fillTops(nfa.get(), info, idx, accelInfo);
|
||||
fillAux(nfa.get(), info, idx, reports, eod_reports, report_offsets);
|
||||
}
|
||||
if (isSingle) {
|
||||
fillSingleReport(nfa.get(), single_report);
|
||||
}
|
||||
|
||||
fillAccelAux(nfa.get(), info, accelInfo);
|
||||
|
||||
if (accel_states) {
|
||||
fillAccelOut(accelInfo, accel_states);
|
||||
}
|
||||
|
||||
createShuffleMasks((sheng *)getMutableImplNfa(nfa.get()), info, accelInfo);
|
||||
|
||||
return nfa;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
80
src/nfa/shengcompile.h
Normal file
80
src/nfa/shengcompile.h
Normal file
@ -0,0 +1,80 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef SHENGCOMPILE_H_
|
||||
#define SHENGCOMPILE_H_
|
||||
|
||||
#include "accel_dfa_build_strat.h"
|
||||
#include "rdfa.h"
|
||||
#include "util/alloc.h"
|
||||
#include "util/charreach.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
struct NFA;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class ReportManager;
|
||||
struct CompileContext;
|
||||
struct raw_dfa;
|
||||
|
||||
class sheng_build_strat : public accel_dfa_build_strat {
|
||||
public:
|
||||
sheng_build_strat(raw_dfa &rdfa_in, const ReportManager &rm_in)
|
||||
: accel_dfa_build_strat(rm_in), rdfa(rdfa_in) {}
|
||||
raw_dfa &get_raw() const override { return rdfa; }
|
||||
std::unique_ptr<raw_report_info> gatherReports(
|
||||
std::vector<u32> &reports /* out */,
|
||||
std::vector<u32> &reports_eod /* out */,
|
||||
u8 *isSingleReport /* out */,
|
||||
ReportID *arbReport /* out */) const override;
|
||||
size_t accelSize(void) const override;
|
||||
u32 max_allowed_offset_accel() const override;
|
||||
u32 max_stop_char() const override;
|
||||
u32 max_floating_stop_char() const override;
|
||||
|
||||
private:
|
||||
raw_dfa &rdfa;
|
||||
};
|
||||
|
||||
aligned_unique_ptr<NFA>
|
||||
shengCompile(raw_dfa &raw, const CompileContext &cc, const ReportManager &rm,
|
||||
std::set<dstate_id_t> *accel_states = nullptr);
|
||||
|
||||
struct sheng_escape_info {
|
||||
CharReach outs;
|
||||
CharReach outs2_single;
|
||||
flat_set<std::pair<u8, u8>> outs2;
|
||||
bool outs2_broken = false;
|
||||
};
|
||||
|
||||
bool has_accel_sheng(const NFA *nfa);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif /* SHENGCOMPILE_H_ */
|
265
src/nfa/shengdump.cpp
Normal file
265
src/nfa/shengdump.cpp
Normal file
@ -0,0 +1,265 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "shengdump.h"
|
||||
|
||||
#include "accel_dump.h"
|
||||
#include "nfa_dump_internal.h"
|
||||
#include "nfa_internal.h"
|
||||
#include "sheng_internal.h"
|
||||
#include "rdfa.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/charreach.h"
|
||||
#include "util/dump_charclass.h"
|
||||
#include "util/simd_utils.h"
|
||||
|
||||
|
||||
#ifndef DUMP_SUPPORT
|
||||
#error No dump support!
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
static
|
||||
const sstate_aux *get_aux(const NFA *n, dstate_id_t i) {
|
||||
assert(n && isShengType(n->type));
|
||||
|
||||
const sheng *s = (const sheng *)getImplNfa(n);
|
||||
const sstate_aux *aux_base =
|
||||
(const sstate_aux *)((const char *)n + s->aux_offset);
|
||||
|
||||
const sstate_aux *aux = aux_base + i;
|
||||
|
||||
assert((const char *)aux < (const char *)s + s->length);
|
||||
|
||||
return aux;
|
||||
}
|
||||
|
||||
static
|
||||
void dumpHeader(FILE *f, const sheng *s) {
|
||||
fprintf(f, "number of states: %u, DFA engine size: %u\n", s->n_states,
|
||||
s->length);
|
||||
fprintf(f, "aux base offset: %u, reports base offset: %u, "
|
||||
"accel offset: %u\n",
|
||||
s->aux_offset, s->report_offset, s->accel_offset);
|
||||
fprintf(f, "anchored start state: %u, floating start state: %u\n",
|
||||
s->anchored & SHENG_STATE_MASK, s->floating & SHENG_STATE_MASK);
|
||||
fprintf(f, "has accel: %u can die: %u single report: %u\n",
|
||||
!!(s->flags & SHENG_FLAG_HAS_ACCEL),
|
||||
!!(s->flags & SHENG_FLAG_CAN_DIE),
|
||||
!!(s->flags & SHENG_FLAG_SINGLE_REPORT));
|
||||
}
|
||||
|
||||
static
|
||||
void dumpAux(FILE *f, u32 state, const sstate_aux *aux) {
|
||||
fprintf(f, "state id: %u, reports offset: %u, EOD reports offset: %u, "
|
||||
"accel offset: %u, top: %u\n",
|
||||
state, aux->accept, aux->accept_eod, aux->accel,
|
||||
aux->top & SHENG_STATE_MASK);
|
||||
}
|
||||
|
||||
static
|
||||
void dumpReports(FILE *f, const report_list *rl) {
|
||||
fprintf(f, "reports count: %u\n", rl->count);
|
||||
for (u32 i = 0; i < rl->count; i++) {
|
||||
fprintf(f, " report: %u, report ID: %u\n", i, rl->report[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void dumpMasks(FILE *f, const sheng *s) {
|
||||
for (u32 chr = 0; chr < 256; chr++) {
|
||||
u8 buf[16];
|
||||
m128 shuffle_mask = s->shuffle_masks[chr];
|
||||
store128(buf, shuffle_mask);
|
||||
|
||||
fprintf(f, "%3u: ", chr);
|
||||
for (u32 pos = 0; pos < 16; pos++) {
|
||||
u8 c = buf[pos];
|
||||
if (c & SHENG_STATE_FLAG_MASK) {
|
||||
fprintf(f, "%2u* ", c & SHENG_STATE_MASK);
|
||||
} else {
|
||||
fprintf(f, "%2u ", c & SHENG_STATE_MASK);
|
||||
}
|
||||
}
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
void nfaExecSheng0_dumpText(const NFA *nfa, FILE *f) {
|
||||
assert(nfa->type == SHENG_NFA_0);
|
||||
const sheng *s = (const sheng *)getImplNfa(nfa);
|
||||
|
||||
fprintf(f, "sheng DFA\n");
|
||||
dumpHeader(f, s);
|
||||
|
||||
for (u32 state = 0; state < s->n_states; state++) {
|
||||
const sstate_aux *aux = get_aux(nfa, state);
|
||||
dumpAux(f, state, aux);
|
||||
if (aux->accept) {
|
||||
fprintf(f, "report list:\n");
|
||||
const report_list *rl =
|
||||
(const report_list *)((const char *)nfa + aux->accept);
|
||||
dumpReports(f, rl);
|
||||
}
|
||||
if (aux->accept_eod) {
|
||||
fprintf(f, "EOD report list:\n");
|
||||
const report_list *rl =
|
||||
(const report_list *)((const char *)nfa + aux->accept_eod);
|
||||
dumpReports(f, rl);
|
||||
}
|
||||
if (aux->accel) {
|
||||
fprintf(f, "accel:\n");
|
||||
const AccelAux *accel =
|
||||
(const AccelAux *)((const char *)nfa + aux->accel);
|
||||
dumpAccelInfo(f, *accel);
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(f, "\n");
|
||||
|
||||
dumpMasks(f, s);
|
||||
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
|
||||
static
|
||||
void dumpDotPreambleDfa(FILE *f) {
|
||||
dumpDotPreamble(f);
|
||||
|
||||
// DFA specific additions.
|
||||
fprintf(f, "STARTF [style=invis];\n");
|
||||
fprintf(f, "STARTA [style=invis];\n");
|
||||
fprintf(f, "0 [style=invis];\n");
|
||||
}
|
||||
|
||||
static
|
||||
void describeNode(const NFA *n, const sheng *s, u16 i, FILE *f) {
|
||||
const sstate_aux *aux = get_aux(n, i);
|
||||
|
||||
fprintf(f, "%u [ width = 1, fixedsize = true, fontsize = 12, "
|
||||
"label = \"%u\" ]; \n",
|
||||
i, i);
|
||||
|
||||
if (aux->accept_eod) {
|
||||
fprintf(f, "%u [ color = darkorchid ];\n", i);
|
||||
}
|
||||
|
||||
if (aux->accept) {
|
||||
fprintf(f, "%u [ shape = doublecircle ];\n", i);
|
||||
}
|
||||
|
||||
if (aux->top && (aux->top & SHENG_STATE_MASK) != i) {
|
||||
fprintf(f, "%u -> %u [color = darkgoldenrod weight=0.1 ]\n", i,
|
||||
aux->top & SHENG_STATE_MASK);
|
||||
}
|
||||
|
||||
if (i == (s->anchored & SHENG_STATE_MASK)) {
|
||||
fprintf(f, "STARTA -> %u [color = blue ]\n", i);
|
||||
}
|
||||
|
||||
if (i == (s->floating & SHENG_STATE_MASK)) {
|
||||
fprintf(f, "STARTF -> %u [color = red ]\n", i);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void describeEdge(FILE *f, const u16 *t, u16 i) {
|
||||
for (u16 s = 0; s < N_CHARS; s++) {
|
||||
if (!t[s]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
u16 ss;
|
||||
for (ss = 0; ss < s; ss++) {
|
||||
if (t[s] == t[ss]) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (ss != s) {
|
||||
continue;
|
||||
}
|
||||
|
||||
CharReach reach;
|
||||
for (ss = s; ss < 256; ss++) {
|
||||
if (t[s] == t[ss]) {
|
||||
reach.set(ss);
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(f, "%u -> %u [ label = \"", i, t[s]);
|
||||
|
||||
describeClass(f, reach, 5, CC_OUT_DOT);
|
||||
|
||||
fprintf(f, "\" ];\n");
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void shengGetTransitions(const NFA *n, u16 state, u16 *t) {
|
||||
assert(isShengType(n->type));
|
||||
const sheng *s = (const sheng *)getImplNfa(n);
|
||||
const sstate_aux *aux = get_aux(n, state);
|
||||
|
||||
for (unsigned i = 0; i < N_CHARS; i++) {
|
||||
u8 buf[16];
|
||||
m128 shuffle_mask = s->shuffle_masks[i];
|
||||
|
||||
store128(buf, shuffle_mask);
|
||||
|
||||
t[i] = buf[state] & SHENG_STATE_MASK;
|
||||
}
|
||||
|
||||
t[TOP] = aux->top & SHENG_STATE_MASK;
|
||||
}
|
||||
|
||||
void nfaExecSheng0_dumpDot(const NFA *nfa, FILE *f, const string &) {
|
||||
assert(nfa->type == SHENG_NFA_0);
|
||||
const sheng *s = (const sheng *)getImplNfa(nfa);
|
||||
|
||||
dumpDotPreambleDfa(f);
|
||||
|
||||
for (u16 i = 1; i < s->n_states; i++) {
|
||||
describeNode(nfa, s, i, f);
|
||||
|
||||
u16 t[ALPHABET_SIZE];
|
||||
|
||||
shengGetTransitions(nfa, i, t);
|
||||
|
||||
describeEdge(f, t, i);
|
||||
}
|
||||
|
||||
fprintf(f, "}\n");
|
||||
}
|
||||
|
||||
} // namespace ue2
|
49
src/nfa/shengdump.h
Normal file
49
src/nfa/shengdump.h
Normal file
@ -0,0 +1,49 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef SHENGDUMP_H_
|
||||
#define SHENGDUMP_H_
|
||||
|
||||
#ifdef DUMP_SUPPORT
|
||||
|
||||
#include <cstdio>
|
||||
#include <string>
|
||||
|
||||
struct NFA;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
void nfaExecSheng0_dumpDot(const struct NFA *nfa, FILE *file,
|
||||
const std::string &base);
|
||||
void nfaExecSheng0_dumpText(const struct NFA *nfa, FILE *file);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // DUMP_SUPPORT
|
||||
|
||||
#endif /* SHENGDUMP_H_ */
|
@ -505,6 +505,9 @@ aligned_unique_ptr<NFA> constructReversedNFA(const NGHolder &h_in, u32 hint,
|
||||
|
||||
u32 isImplementableNFA(const NGHolder &g, const ReportManager *rm,
|
||||
const CompileContext &cc) {
|
||||
if (!cc.grey.allowLimExNFA) {
|
||||
return false;
|
||||
}
|
||||
// Quick check: we can always implement an NFA with less than NFA_MAX_STATES
|
||||
// states. Note that top masks can generate extra states, so we account for
|
||||
// those here too.
|
||||
|
@ -50,6 +50,7 @@
|
||||
#include "nfa/nfa_api_queue.h"
|
||||
#include "nfa/nfa_build_util.h"
|
||||
#include "nfa/nfa_internal.h"
|
||||
#include "nfa/shengcompile.h"
|
||||
#include "nfa/shufticompile.h"
|
||||
#include "nfa/tamaramacompile.h"
|
||||
#include "nfa/tamarama_internal.h"
|
||||
@ -863,13 +864,18 @@ aligned_unique_ptr<NFA> pickImpl(aligned_unique_ptr<NFA> dfa_impl,
|
||||
aligned_unique_ptr<NFA> nfa_impl) {
|
||||
assert(nfa_impl);
|
||||
assert(dfa_impl);
|
||||
assert(isMcClellanType(dfa_impl->type));
|
||||
assert(isDfaType(dfa_impl->type));
|
||||
|
||||
// If our NFA is an LBR, it always wins.
|
||||
if (isLbrType(nfa_impl->type)) {
|
||||
return nfa_impl;
|
||||
}
|
||||
|
||||
// if our DFA is an accelerated Sheng, it always wins.
|
||||
if (isShengType(dfa_impl->type) && has_accel(*dfa_impl)) {
|
||||
return dfa_impl;
|
||||
}
|
||||
|
||||
bool d_accel = has_accel(*dfa_impl);
|
||||
bool n_accel = has_accel(*nfa_impl);
|
||||
bool d_big = dfa_impl->type == MCCLELLAN_NFA_16;
|
||||
@ -922,6 +928,18 @@ buildRepeatEngine(const CastleProto &proto,
|
||||
return castle_nfa;
|
||||
}
|
||||
|
||||
static
|
||||
aligned_unique_ptr<NFA> getDfa(raw_dfa &rdfa, const CompileContext &cc,
|
||||
const ReportManager &rm) {
|
||||
// Unleash the Sheng!!
|
||||
auto dfa = shengCompile(rdfa, cc, rm);
|
||||
if (!dfa) {
|
||||
// Sheng wasn't successful, so unleash McClellan!
|
||||
dfa = mcclellanCompile(rdfa, cc, rm);
|
||||
}
|
||||
return dfa;
|
||||
}
|
||||
|
||||
/* builds suffix nfas */
|
||||
static
|
||||
aligned_unique_ptr<NFA>
|
||||
@ -942,7 +960,7 @@ buildSuffix(const ReportManager &rm, const SomSlotManager &ssm,
|
||||
}
|
||||
|
||||
if (suff.dfa()) {
|
||||
auto d = mcclellanCompile(*suff.dfa(), cc, rm);
|
||||
auto d = getDfa(*suff.dfa(), cc, rm);
|
||||
assert(d);
|
||||
return d;
|
||||
}
|
||||
@ -971,7 +989,7 @@ buildSuffix(const ReportManager &rm, const SomSlotManager &ssm,
|
||||
auto rdfa = buildMcClellan(holder, &rm, false, triggers.at(0),
|
||||
cc.grey);
|
||||
if (rdfa) {
|
||||
auto d = mcclellanCompile(*rdfa, cc, rm);
|
||||
auto d = getDfa(*rdfa, cc, rm);
|
||||
assert(d);
|
||||
if (cc.grey.roseMcClellanSuffix != 2) {
|
||||
n = pickImpl(move(d), move(n));
|
||||
@ -1091,12 +1109,13 @@ makeLeftNfa(const RoseBuildImpl &tbi, left_id &left,
|
||||
}
|
||||
|
||||
if (left.dfa()) {
|
||||
n = mcclellanCompile(*left.dfa(), cc, rm);
|
||||
n = getDfa(*left.dfa(), cc, rm);
|
||||
} else if (left.graph() && cc.grey.roseMcClellanPrefix == 2 && is_prefix &&
|
||||
!is_transient) {
|
||||
auto rdfa = buildMcClellan(*left.graph(), nullptr, cc.grey);
|
||||
if (rdfa) {
|
||||
n = mcclellanCompile(*rdfa, cc, rm);
|
||||
n = getDfa(*rdfa, cc, rm);
|
||||
assert(n);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1122,7 +1141,7 @@ makeLeftNfa(const RoseBuildImpl &tbi, left_id &left,
|
||||
&& (!n || !has_bounded_repeats_other_than_firsts(*n) || !is_fast(*n))) {
|
||||
auto rdfa = buildMcClellan(*left.graph(), nullptr, cc.grey);
|
||||
if (rdfa) {
|
||||
auto d = mcclellanCompile(*rdfa, cc, rm);
|
||||
auto d = getDfa(*rdfa, cc, rm);
|
||||
assert(d);
|
||||
n = pickImpl(move(d), move(n));
|
||||
}
|
||||
@ -1857,8 +1876,8 @@ public:
|
||||
};
|
||||
|
||||
aligned_unique_ptr<NFA> operator()(unique_ptr<raw_dfa> &rdfa) const {
|
||||
// Unleash the McClellan!
|
||||
return mcclellanCompile(*rdfa, build.cc, build.rm);
|
||||
// Unleash the mighty DFA!
|
||||
return getDfa(*rdfa, build.cc, build.rm);
|
||||
}
|
||||
|
||||
aligned_unique_ptr<NFA> operator()(unique_ptr<raw_som_dfa> &haig) const {
|
||||
@ -1886,7 +1905,7 @@ public:
|
||||
!has_bounded_repeats_other_than_firsts(*n)) {
|
||||
auto rdfa = buildMcClellan(h, &rm, cc.grey);
|
||||
if (rdfa) {
|
||||
auto d = mcclellanCompile(*rdfa, cc, rm);
|
||||
auto d = getDfa(*rdfa, cc, rm);
|
||||
if (d) {
|
||||
n = pickImpl(move(d), move(n));
|
||||
}
|
||||
|
@ -43,6 +43,7 @@
|
||||
#include "nfa/nfa_api_util.h"
|
||||
#include "nfa/nfa_internal.h"
|
||||
#include "nfa/nfa_rev_api.h"
|
||||
#include "nfa/sheng.h"
|
||||
#include "smallwrite/smallwrite_internal.h"
|
||||
#include "rose/rose.h"
|
||||
#include "rose/runtime.h"
|
||||
@ -286,13 +287,16 @@ void runSmallWriteEngine(const struct SmallWriteEngine *smwr,
|
||||
size_t local_alen = length - smwr->start_offset;
|
||||
const u8 *local_buffer = buffer + smwr->start_offset;
|
||||
|
||||
assert(isMcClellanType(nfa->type));
|
||||
assert(isDfaType(nfa->type));
|
||||
if (nfa->type == MCCLELLAN_NFA_8) {
|
||||
nfaExecMcClellan8_B(nfa, smwr->start_offset, local_buffer,
|
||||
local_alen, roseReportAdaptor, scratch);
|
||||
} else {
|
||||
} else if (nfa->type == MCCLELLAN_NFA_16){
|
||||
nfaExecMcClellan16_B(nfa, smwr->start_offset, local_buffer,
|
||||
local_alen, roseReportAdaptor, scratch);
|
||||
} else {
|
||||
nfaExecSheng0_B(nfa, smwr->start_offset, local_buffer,
|
||||
local_alen, roseReportAdaptor, scratch);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -34,6 +34,7 @@
|
||||
#include "nfa/mcclellancompile_util.h"
|
||||
#include "nfa/nfa_internal.h"
|
||||
#include "nfa/rdfa_merge.h"
|
||||
#include "nfa/shengcompile.h"
|
||||
#include "nfagraph/ng.h"
|
||||
#include "nfagraph/ng_holder.h"
|
||||
#include "nfagraph/ng_mcclellan.h"
|
||||
@ -312,6 +313,20 @@ bool is_slow(const raw_dfa &rdfa, const set<dstate_id_t> &accel,
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
aligned_unique_ptr<NFA> getDfa(raw_dfa &rdfa, const CompileContext &cc,
|
||||
const ReportManager &rm,
|
||||
set<dstate_id_t> &accel_states) {
|
||||
aligned_unique_ptr<NFA> dfa = nullptr;
|
||||
if (cc.grey.allowSmallWriteSheng) {
|
||||
dfa = shengCompile(rdfa, cc, rm, &accel_states);
|
||||
}
|
||||
if (!dfa) {
|
||||
dfa = mcclellanCompile(rdfa, cc, rm, &accel_states);
|
||||
}
|
||||
return dfa;
|
||||
}
|
||||
|
||||
static
|
||||
aligned_unique_ptr<NFA> prepEngine(raw_dfa &rdfa, u32 roseQuality,
|
||||
const CompileContext &cc,
|
||||
@ -322,9 +337,9 @@ aligned_unique_ptr<NFA> prepEngine(raw_dfa &rdfa, u32 roseQuality,
|
||||
// Unleash the McClellan!
|
||||
set<dstate_id_t> accel_states;
|
||||
|
||||
auto nfa = mcclellanCompile(rdfa, cc, rm, &accel_states);
|
||||
auto nfa = getDfa(rdfa, cc, rm, accel_states);
|
||||
if (!nfa) {
|
||||
DEBUG_PRINTF("mcclellan compile failed for smallwrite NFA\n");
|
||||
DEBUG_PRINTF("DFA compile failed for smallwrite NFA\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
@ -340,9 +355,9 @@ aligned_unique_ptr<NFA> prepEngine(raw_dfa &rdfa, u32 roseQuality,
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
nfa = mcclellanCompile(rdfa, cc, rm, &accel_states);
|
||||
nfa = getDfa(rdfa, cc, rm, accel_states);
|
||||
if (!nfa) {
|
||||
DEBUG_PRINTF("mcclellan compile failed for smallwrite NFA\n");
|
||||
DEBUG_PRINTF("DFA compile failed for smallwrite NFA\n");
|
||||
assert(0); /* able to build orig dfa but not the trimmed? */
|
||||
return nullptr;
|
||||
}
|
||||
@ -351,7 +366,7 @@ aligned_unique_ptr<NFA> prepEngine(raw_dfa &rdfa, u32 roseQuality,
|
||||
*small_region = cc.grey.smallWriteLargestBuffer;
|
||||
}
|
||||
|
||||
assert(isMcClellanType(nfa->type));
|
||||
assert(isDfaType(nfa->type));
|
||||
if (nfa->length > cc.grey.limitSmallWriteOutfixSize
|
||||
|| nfa->length > cc.grey.limitDFASize) {
|
||||
DEBUG_PRINTF("smallwrite outfix size too large\n");
|
||||
|
Loading…
x
Reference in New Issue
Block a user