introduce Sheng-McClellan hybrid

This commit is contained in:
Alex Coyte
2016-12-01 14:32:47 +11:00
committed by Matthew Barr
parent f626276271
commit e51b6d23b9
35 changed files with 3804 additions and 206 deletions

View File

@@ -78,7 +78,7 @@ size_t accelScanWrapper(const u8 *accelTable, const union AccelAux *aux,
size_t doAccel32(u32 s, u32 accel, const u8 *accelTable,
const union AccelAux *aux, const u8 *input, size_t i,
size_t end) {
u32 idx = packedExtract32(s, accel);
u32 idx = pext32(s, accel);
return accelScanWrapper(accelTable, aux, input, idx, i, end);
}
@@ -86,14 +86,14 @@ size_t doAccel32(u32 s, u32 accel, const u8 *accelTable,
size_t doAccel64(u64a s, u64a accel, const u8 *accelTable,
const union AccelAux *aux, const u8 *input, size_t i,
size_t end) {
u32 idx = packedExtract64(s, accel);
u32 idx = pext64(s, accel);
return accelScanWrapper(accelTable, aux, input, idx, i, end);
}
#else
size_t doAccel64(m128 s, m128 accel, const u8 *accelTable,
const union AccelAux *aux, const u8 *input, size_t i,
size_t end) {
u32 idx = packedExtract64(movq(s), movq(accel));
u32 idx = pext64(movq(s), movq(accel));
return accelScanWrapper(accelTable, aux, input, idx, i, end);
}
#endif

View File

@@ -41,52 +41,6 @@
#include "util/bitutils.h"
#include "util/simd_utils.h"
#if defined(__BMI2__) || (defined(_WIN32) && defined(__AVX2__))
#define HAVE_PEXT
#endif
static really_inline
u32 packedExtract32(u32 x, u32 mask) {
#if defined(HAVE_PEXT)
// Intel BMI2 can do this operation in one instruction.
return _pext_u32(x, mask);
#else
u32 result = 0, num = 1;
while (mask != 0) {
u32 bit = findAndClearLSB_32(&mask);
if (x & (1U << bit)) {
assert(num != 0); // more than 32 bits!
result |= num;
}
num <<= 1;
}
return result;
#endif
}
static really_inline
u32 packedExtract64(u64a x, u64a mask) {
#if defined(HAVE_PEXT) && defined(ARCH_64_BIT)
// Intel BMI2 can do this operation in one instruction.
return _pext_u64(x, mask);
#else
u32 result = 0, num = 1;
while (mask != 0) {
u32 bit = findAndClearLSB_64(&mask);
if (x & (1ULL << bit)) {
assert(num != 0); // more than 32 bits!
result |= num;
}
num <<= 1;
}
return result;
#endif
}
#undef HAVE_PEXT
static really_inline
u32 packedExtract128(m128 s, const m128 permute, const m128 compare) {
m128 shuffled = pshufb(s, permute);

View File

@@ -175,7 +175,7 @@ char mcclellanExec16_i(const struct mcclellan *m, u32 *state, const u8 *buf,
if (mode == STOP_AT_MATCH) {
*c_final = buf;
}
return MO_CONTINUE_MATCHING;
return MO_ALIVE;
}
u32 s = *state;
@@ -213,7 +213,7 @@ without_accel:
if (mode == STOP_AT_MATCH) {
*state = s & STATE_MASK;
*c_final = c - 1;
return MO_CONTINUE_MATCHING;
return MO_MATCHES_PENDING;
}
u64a loc = (c - 1) - buf + offAdj + 1;
@@ -221,12 +221,12 @@ without_accel:
if (single) {
DEBUG_PRINTF("reporting %u\n", m->arb_report);
if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING; /* termination requested */
return MO_DEAD; /* termination requested */
}
} else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0,
&cached_accept_state, &cached_accept_id)
== MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
return MO_DEAD;
}
}
@@ -265,7 +265,7 @@ with_accel:
if (mode == STOP_AT_MATCH) {
*state = s & STATE_MASK;
*c_final = c - 1;
return MO_CONTINUE_MATCHING;
return MO_MATCHES_PENDING;
}
u64a loc = (c - 1) - buf + offAdj + 1;
@@ -273,12 +273,12 @@ with_accel:
if (single) {
DEBUG_PRINTF("reporting %u\n", m->arb_report);
if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING; /* termination requested */
return MO_DEAD; /* termination requested */
}
} else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0,
&cached_accept_state, &cached_accept_id)
== MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
return MO_DEAD;
}
}
@@ -293,7 +293,7 @@ exit:
}
*state = s;
return MO_CONTINUE_MATCHING;
return MO_ALIVE;
}
static never_inline
@@ -376,7 +376,7 @@ char mcclellanExec8_i(const struct mcclellan *m, u32 *state, const u8 *buf,
char single, const u8 **c_final, enum MatchMode mode) {
if (!len) {
*c_final = buf;
return MO_CONTINUE_MATCHING;
return MO_ALIVE;
}
u32 s = *state;
const u8 *c = buf;
@@ -390,8 +390,7 @@ char mcclellanExec8_i(const struct mcclellan *m, u32 *state, const u8 *buf,
u32 cached_accept_id = 0;
u32 cached_accept_state = 0;
DEBUG_PRINTF("accel %hu, accept %hu\n",
m->accel_limit_8, m->accept_limit_8);
DEBUG_PRINTF("accel %hu, accept %u\n", m->accel_limit_8, accept_limit);
DEBUG_PRINTF("s: %u, len %zu\n", s, len);
@@ -417,19 +416,19 @@ without_accel:
DEBUG_PRINTF("match - pausing\n");
*state = s;
*c_final = c - 1;
return MO_CONTINUE_MATCHING;
return MO_MATCHES_PENDING;
}
u64a loc = (c - 1) - buf + offAdj + 1;
if (single) {
DEBUG_PRINTF("reporting %u\n", m->arb_report);
if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
return MO_DEAD;
}
} else if (doComplexReport(cb, ctxt, m, s, loc, 0,
&cached_accept_state, &cached_accept_id)
== MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
return MO_DEAD;
}
}
@@ -464,19 +463,19 @@ with_accel:
DEBUG_PRINTF("match - pausing\n");
*state = s;
*c_final = c - 1;
return MO_CONTINUE_MATCHING;
return MO_MATCHES_PENDING;
}
u64a loc = (c - 1) - buf + offAdj + 1;
if (single) {
DEBUG_PRINTF("reporting %u\n", m->arb_report);
if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
return MO_DEAD;
}
} else if (doComplexReport(cb, ctxt, m, s, loc, 0,
&cached_accept_state, &cached_accept_id)
== MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
return MO_DEAD;
}
}
@@ -488,7 +487,7 @@ exit:
if (mode == STOP_AT_MATCH) {
*c_final = c_end;
}
return MO_CONTINUE_MATCHING;
return MO_ALIVE;
}
static never_inline
@@ -576,7 +575,7 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
q->report_current = 0;
if (rv == MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
return MO_DEAD;
}
}
@@ -611,17 +610,20 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
/* do main buffer region */
const u8 *final_look;
if (mcclellanExec16_i_ni(m, &s, cur_buf + sp, local_ep - sp,
offset + sp, cb, context, single, &final_look,
mode)
== MO_HALT_MATCHING) {
char rv = mcclellanExec16_i_ni(m, &s, cur_buf + sp, local_ep - sp,
offset + sp, cb, context, single,
&final_look, mode);
if (rv == MO_DEAD) {
*(u16 *)q->state = 0;
return 0;
return MO_DEAD;
}
if (mode == STOP_AT_MATCH && final_look != cur_buf + local_ep) {
if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) {
DEBUG_PRINTF("this is as far as we go\n");
assert(q->cur);
DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf);
assert(q->cur);
assert(final_look != cur_buf + local_ep);
q->cur--;
q->items[q->cur].type = MQE_START;
q->items[q->cur].location = final_look - cur_buf + 1; /* due to
@@ -630,6 +632,7 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
return MO_MATCHES_PENDING;
}
assert(rv == MO_ALIVE);
assert(q->cur);
if (mode != NO_MATCHES && q->items[q->cur].location > end) {
DEBUG_PRINTF("this is as far as we go\n");
@@ -662,7 +665,7 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
case MQE_END:
*(u16 *)q->state = s;
q->cur++;
return s ? MO_ALIVE : 0;
return s ? MO_ALIVE : MO_DEAD;
default:
assert(!"invalid queue event");
}
@@ -681,8 +684,8 @@ char nfaExecMcClellan16_Bi(const struct NFA *n, u64a offset, const u8 *buffer,
if (mcclellanExec16_i(m, &s, buffer, length, offset, cb, context, single,
NULL, CALLBACK_OUTPUT)
== MO_HALT_MATCHING) {
return 0;
== MO_DEAD) {
return s ? MO_ALIVE : MO_DEAD;
}
const struct mstate_aux *aux = get_aux(m, s);
@@ -691,7 +694,7 @@ char nfaExecMcClellan16_Bi(const struct NFA *n, u64a offset, const u8 *buffer,
doComplexReport(cb, context, m, s, offset + length, 1, NULL, NULL);
}
return !!s;
return MO_ALIVE;
}
static really_inline
@@ -724,7 +727,7 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
q->report_current = 0;
if (rv == MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
return MO_DEAD;
}
}
@@ -760,16 +763,20 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
}
const u8 *final_look;
if (mcclellanExec8_i_ni(m, &s, cur_buf + sp, local_ep - sp, offset + sp,
cb, context, single, &final_look, mode)
== MO_HALT_MATCHING) {
char rv = mcclellanExec8_i_ni(m, &s, cur_buf + sp, local_ep - sp,
offset + sp, cb, context, single,
&final_look, mode);
if (rv == MO_HALT_MATCHING) {
*(u8 *)q->state = 0;
return 0;
return MO_DEAD;
}
if (mode == STOP_AT_MATCH && final_look != cur_buf + local_ep) {
/* found a match */
DEBUG_PRINTF("found a match\n");
if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) {
DEBUG_PRINTF("this is as far as we go\n");
DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf);
assert(q->cur);
assert(final_look != cur_buf + local_ep);
q->cur--;
q->items[q->cur].type = MQE_START;
q->items[q->cur].location = final_look - cur_buf + 1; /* due to
@@ -778,6 +785,7 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
return MO_MATCHES_PENDING;
}
assert(rv == MO_ALIVE);
assert(q->cur);
if (mode != NO_MATCHES && q->items[q->cur].location > end) {
DEBUG_PRINTF("this is as far as we go\n");
@@ -811,7 +819,7 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
case MQE_END:
*(u8 *)q->state = s;
q->cur++;
return s ? MO_ALIVE : 0;
return s ? MO_ALIVE : MO_DEAD;
default:
assert(!"invalid queue event");
}
@@ -830,8 +838,8 @@ char nfaExecMcClellan8_Bi(const struct NFA *n, u64a offset, const u8 *buffer,
if (mcclellanExec8_i(m, &s, buffer, length, offset, cb, context, single,
NULL, CALLBACK_OUTPUT)
== MO_HALT_MATCHING) {
return 0;
== MO_DEAD) {
return MO_DEAD;
}
const struct mstate_aux *aux = get_aux(m, s);
@@ -840,7 +848,7 @@ char nfaExecMcClellan8_Bi(const struct NFA *n, u64a offset, const u8 *buffer,
doComplexReport(cb, context, m, s, offset + length, 1, NULL, NULL);
}
return s;
return s ? MO_ALIVE : MO_DEAD;
}
char nfaExecMcClellan8_B(const struct NFA *n, u64a offset, const u8 *buffer,

View File

@@ -71,17 +71,17 @@ struct mcclellan {
u16 start_floating; /**< floating start state */
u32 aux_offset; /**< offset of the aux structures relative to the start of
* the nfa structure */
u32 sherman_offset; /**< offset of to array of sherman state offsets
* the state_info structures relative to the start of the
* nfa structure */
u32 sherman_end; /**< offset of the end of the state_info structures relative
* to the start of the nfa structure */
u32 sherman_offset; /**< offset of array of sherman state offsets the
* state_info structures relative to the start of the
* nfa structure */
u32 sherman_end; /**< offset of the end of the state_info structures
* relative to the start of the nfa structure */
u16 accel_limit_8; /**< 8 bit, lowest accelerable state */
u16 accept_limit_8; /**< 8 bit, lowest accept state */
u16 sherman_limit; /**< lowest sherman state */
u8 alphaShift;
u8 flags;
u8 has_accel; /**< 1 iff there are any accel planes */
u8 has_accel; /**< 1 iff there are any accel plans */
u8 remap[256]; /**< remaps characters to a smaller alphabet */
ReportID arb_report; /**< one of the accepts that this dfa may raise */
u32 accel_offset; /**< offset of the accel structures from start of NFA */

View File

@@ -415,9 +415,9 @@ void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info,
: info.raw.start_floating);
}
/* returns non-zero on error */
/* returns false on error */
static
int allocateFSN16(dfa_info &info, dstate_id_t *sherman_base) {
bool allocateFSN16(dfa_info &info, dstate_id_t *sherman_base) {
info.states[0].impl_id = 0; /* dead is always 0 */
vector<dstate_id_t> norm;
@@ -426,7 +426,7 @@ int allocateFSN16(dfa_info &info, dstate_id_t *sherman_base) {
if (info.size() > (1 << 16)) {
DEBUG_PRINTF("too many states\n");
*sherman_base = 0;
return 1;
return false;
}
for (u32 i = 1; i < info.size(); i++) {
@@ -452,7 +452,7 @@ int allocateFSN16(dfa_info &info, dstate_id_t *sherman_base) {
/* Check to see if we haven't over allocated our states */
DEBUG_PRINTF("next sherman %u masked %u\n", next_sherman,
(dstate_id_t)(next_sherman & STATE_MASK));
return (next_sherman - 1) != ((next_sherman - 1) & STATE_MASK);
return (next_sherman - 1) == ((next_sherman - 1) & STATE_MASK);
}
static
@@ -470,7 +470,7 @@ aligned_unique_ptr<NFA> mcclellanCompile16(dfa_info &info,
assert(alphaShift <= 8);
u16 count_real_states;
if (allocateFSN16(info, &count_real_states)) {
if (!allocateFSN16(info, &count_real_states)) {
DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n",
info.size());
return nullptr;

View File

@@ -32,9 +32,7 @@
#include "accel_dfa_build_strat.h"
#include "rdfa.h"
#include "ue2common.h"
#include "util/accel_scheme.h"
#include "util/alloc.h"
#include "util/charreach.h"
#include "util/ue2_containers.h"
#include <memory>

1406
src/nfa/mcsheng.c Normal file

File diff suppressed because it is too large Load Diff

84
src/nfa/mcsheng.h Normal file
View File

@@ -0,0 +1,84 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef MCSHENG_H
#define MCSHENG_H
#include "callback.h"
#include "ue2common.h"
struct mq;
struct NFA;
/* 8-bit Sheng-McClellan hybrid */
char nfaExecMcSheng8_testEOD(const struct NFA *nfa, const char *state,
const char *streamState, u64a offset,
NfaCallback callback, void *context);
char nfaExecMcSheng8_Q(const struct NFA *n, struct mq *q, s64a end);
char nfaExecMcSheng8_Q2(const struct NFA *n, struct mq *q, s64a end);
char nfaExecMcSheng8_QR(const struct NFA *n, struct mq *q, ReportID report);
char nfaExecMcSheng8_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecMcSheng8_inAccept(const struct NFA *n, ReportID report,
struct mq *q);
char nfaExecMcSheng8_inAnyAccept(const struct NFA *n, struct mq *q);
char nfaExecMcSheng8_queueInitState(const struct NFA *n, struct mq *q);
char nfaExecMcSheng8_initCompressedState(const struct NFA *n, u64a offset,
void *state, u8 key);
char nfaExecMcSheng8_queueCompressState(const struct NFA *nfa,
const struct mq *q, s64a loc);
char nfaExecMcSheng8_expandState(const struct NFA *nfa, void *dest,
const void *src, u64a offset, u8 key);
#define nfaExecMcSheng8_B_Reverse NFA_API_NO_IMPL
#define nfaExecMcSheng8_zombie_status NFA_API_ZOMBIE_NO_IMPL
/* 16-bit Sheng-McClellan hybrid */
char nfaExecMcSheng16_testEOD(const struct NFA *nfa, const char *state,
const char *streamState, u64a offset,
NfaCallback callback, void *context);
char nfaExecMcSheng16_Q(const struct NFA *n, struct mq *q, s64a end);
char nfaExecMcSheng16_Q2(const struct NFA *n, struct mq *q, s64a end);
char nfaExecMcSheng16_QR(const struct NFA *n, struct mq *q, ReportID report);
char nfaExecMcSheng16_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecMcSheng16_inAccept(const struct NFA *n, ReportID report,
struct mq *q);
char nfaExecMcSheng16_inAnyAccept(const struct NFA *n, struct mq *q);
char nfaExecMcSheng16_queueInitState(const struct NFA *n, struct mq *q);
char nfaExecMcSheng16_initCompressedState(const struct NFA *n, u64a offset,
void *state, u8 key);
char nfaExecMcSheng16_queueCompressState(const struct NFA *nfa,
const struct mq *q, s64a loc);
char nfaExecMcSheng16_expandState(const struct NFA *nfa, void *dest,
const void *src, u64a offset, u8 key);
#define nfaExecMcSheng16_B_Reverse NFA_API_NO_IMPL
#define nfaExecMcSheng16_zombie_status NFA_API_ZOMBIE_NO_IMPL
#endif

1144
src/nfa/mcsheng_compile.cpp Normal file

File diff suppressed because it is too large Load Diff

59
src/nfa/mcsheng_compile.h Normal file
View File

@@ -0,0 +1,59 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef MCSHENGCOMPILE_H
#define MCSHENGCOMPILE_H
#include "accel_dfa_build_strat.h"
#include "rdfa.h"
#include "ue2common.h"
#include "util/alloc.h"
#include "util/ue2_containers.h"
#include <memory>
#include <set>
struct NFA;
namespace ue2 {
class ReportManager;
struct CompileContext;
/* accel_states: (optional) on success, is filled with the set of accelerable
* states */
ue2::aligned_unique_ptr<NFA>
mcshengCompile(raw_dfa &raw, const CompileContext &cc,
const ReportManager &rm,
std::set<dstate_id_t> *accel_states = nullptr);
bool has_accel_mcsheng(const NFA *nfa);
} // namespace ue2
#endif

43
src/nfa/mcsheng_data.c Normal file
View File

@@ -0,0 +1,43 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "mcsheng_internal.h"
/* This table is in a separate translation unit from mcsheng.c as we want to
* prevent the compiler from seeing these constants. We have the load resources
* free at runtime to load the masks with no problems. */
const u64a mcsheng_pext_mask[8] = {
0, /* dummy */
0x000000000000ff0f,
0x0000000000ff000f,
0x00000000ff00000f,
0x000000ff0000000f,
0x0000ff000000000f,
0x00ff00000000000f,
0xff0000000000000f,
};

415
src/nfa/mcsheng_dump.cpp Normal file
View File

@@ -0,0 +1,415 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "mcsheng_dump.h"
#include "accel.h"
#include "accel_dump.h"
#include "nfa_dump_internal.h"
#include "nfa_internal.h"
#include "mcsheng_internal.h"
#include "rdfa.h"
#include "ue2common.h"
#include "util/charreach.h"
#include "util/dump_charclass.h"
#include "util/dump_util.h"
#include "util/unaligned.h"
#include <cctype>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <map>
#ifndef DUMP_SUPPORT
#error No dump support!
#endif
using namespace std;
namespace ue2 {
static
const mstate_aux *getAux(const NFA *n, dstate_id_t i) {
auto *m = (const mcsheng *)getImplNfa(n);
auto *aux_base = (const mstate_aux *)((const char *)n + m->aux_offset);
const mstate_aux *aux = aux_base + i;
assert((const char *)aux < (const char *)n + m->length);
return aux;
}
static
void next_states(const NFA *n, u16 s, u16 *t) {
const mcsheng *m = (const mcsheng *)getImplNfa(n);
const mstate_aux *aux = getAux(n, s);
const u32 as = m->alphaShift;
assert(s != DEAD_STATE);
if (s < m->sheng_end) {
for (u16 c = 0; c < N_CHARS; c++) {
u8 sheng_s = s - 1;
auto trans_for_c = (const char *)&m->sheng_masks[c];
assert(sheng_s < sizeof(m128));
u8 raw_succ = trans_for_c[sheng_s];
if (raw_succ == m->sheng_end - 1) {
t[c] = DEAD_STATE;
} else if (raw_succ < m->sheng_end) {
t[c] = raw_succ + 1;
} else {
t[c] = raw_succ;
}
}
} else if (n->type == MCSHENG_NFA_8) {
const u8 *succ_table = (const u8 *)((const char *)m + sizeof(mcsheng));
for (u16 c = 0; c < N_CHARS; c++) {
u32 normal_id = s - m->sheng_end;
t[c] = succ_table[(normal_id << as) + m->remap[c]];
}
} else {
u16 base_s = s;
const char *winfo_base = (const char *)n + m->sherman_offset;
const char *state_base
= winfo_base + SHERMAN_FIXED_SIZE * (s - m->sherman_limit);
if (s >= m->sherman_limit) {
base_s = unaligned_load_u16(state_base + SHERMAN_DADDY_OFFSET);
assert(base_s >= m->sheng_end);
}
const u16 *succ_table = (const u16 *)((const char *)m
+ sizeof(mcsheng));
for (u16 c = 0; c < N_CHARS; c++) {
u32 normal_id = base_s - m->sheng_end;
t[c] = succ_table[(normal_id << as) + m->remap[c]];
}
if (s >= m->sherman_limit) {
UNUSED char type = *(state_base + SHERMAN_TYPE_OFFSET);
assert(type == SHERMAN_STATE);
u8 len = *(const u8 *)(SHERMAN_LEN_OFFSET + state_base);
const char *chars = state_base + SHERMAN_CHARS_OFFSET;
const u16 *states = (const u16 *)(state_base
+ SHERMAN_STATES_OFFSET(len));
for (u8 i = 0; i < len; i++) {
for (u16 c = 0; c < N_CHARS; c++) {
if (m->remap[c] == chars[i]) {
t[c] = unaligned_load_u16((const u8*)&states[i]);
}
}
}
}
for (u16 c = 0; c < N_CHARS; c++) {
t[c] &= STATE_MASK;
}
}
t[TOP] = aux->top & STATE_MASK;
}
static
void describeEdge(FILE *f, const mcsheng *m, const u16 *t, u16 i) {
for (u16 s = 0; s < N_CHARS; s++) {
if (!t[s]) {
continue;
}
u16 ss;
for (ss = 0; ss < s; ss++) {
if (t[s] == t[ss]) {
break;
}
}
if (ss != s) {
continue;
}
CharReach reach;
for (ss = s; ss < 256; ss++) {
if (t[s] == t[ss]) {
reach.set(ss);
}
}
fprintf(f, "%u -> %u [ ", i, t[s]);
if (i < m->sheng_end && t[s] < m->sheng_end) {
fprintf(f, "color = red, fontcolor = red ");
}
fprintf(f, "label = \"");
describeClass(f, reach, 5, CC_OUT_DOT);
fprintf(f, "\" ];\n");
}
}
static
void dumpAccelDot(FILE *f, u16 i, const union AccelAux *accel) {
switch(accel->accel_type) {
case ACCEL_NONE:
break;
case ACCEL_VERM:
case ACCEL_VERM_NOCASE:
case ACCEL_DVERM:
case ACCEL_DVERM_NOCASE:
fprintf(f, "%u [ color = forestgreen style=diagonals];\n", i);
break;
case ACCEL_SHUFTI:
case ACCEL_DSHUFTI:
case ACCEL_TRUFFLE:
fprintf(f, "%u [ color = darkgreen style=diagonals ];\n", i);
break;
default:
fprintf(f, "%u [ color = yellow style=diagonals ];\n", i);
break;
}
}
static
void describeNode(const NFA *n, const mcsheng *m, u16 i, FILE *f) {
const mstate_aux *aux = getAux(n, i);
bool isSherman = m->sherman_limit && i >= m->sherman_limit;
fprintf(f, "%u [ width = 1, fixedsize = true, fontsize = 12, "
"label = \"%u%s\" ]; \n", i, i, isSherman ? "w":"");
if (aux->accel_offset) {
dumpAccelDot(f, i, (const union AccelAux *)
((const char *)m + aux->accel_offset));
}
if (i && i < m->sheng_end) {
fprintf(f, "%u [color = red, fontcolor = red]; \n", i);
}
if (aux->accept_eod) {
fprintf(f, "%u [ color = darkorchid ];\n", i);
}
if (aux->accept) {
fprintf(f, "%u [ shape = doublecircle ];\n", i);
}
if (aux->top && aux->top != i) {
fprintf(f, "%u -> %u [color = darkgoldenrod weight=0.1 ]\n", i,
aux->top);
}
if (i == m->start_anchored) {
fprintf(f, "STARTA -> %u [color = blue ]\n", i);
}
if (i == m->start_floating) {
fprintf(f, "STARTF -> %u [color = red ]\n", i);
}
if (isSherman) {
const char *winfo_base = (const char *)n + m->sherman_offset;
const char *state_base
= winfo_base + SHERMAN_FIXED_SIZE * (i - m->sherman_limit);
assert(state_base < (const char *)m + m->length - sizeof(NFA));
UNUSED u8 type = *(const u8 *)(state_base + SHERMAN_TYPE_OFFSET);
assert(type == SHERMAN_STATE);
fprintf(f, "%u [ fillcolor = lightblue style=filled ];\n", i);
u16 daddy = *(const u16 *)(state_base + SHERMAN_DADDY_OFFSET);
if (daddy) {
fprintf(f, "%u -> %u [ color=royalblue style=dashed weight=0.1]\n",
i, daddy);
}
}
if (i && i < m->sheng_end) {
fprintf(f, "subgraph cluster_sheng { %u } \n", i);
}
}
static
void dumpDotPreambleDfa(FILE *f) {
dumpDotPreamble(f);
// DFA specific additions.
fprintf(f, "STARTF [style=invis];\n");
fprintf(f, "STARTA [style=invis];\n");
fprintf(f, "0 [style=invis];\n");
fprintf(f, "subgraph cluster_sheng { style = dashed }\n");
}
static
void dump_dot_16(const NFA *nfa, FILE *f) {
auto *m = (const mcsheng *)getImplNfa(nfa);
dumpDotPreambleDfa(f);
for (u16 i = 1; i < m->state_count; i++) {
describeNode(nfa, m, i, f);
u16 t[ALPHABET_SIZE];
next_states(nfa, i, t);
describeEdge(f, m, t, i);
}
fprintf(f, "}\n");
}
static
void dump_dot_8(const NFA *nfa, FILE *f) {
auto m = (const mcsheng *)getImplNfa(nfa);
dumpDotPreambleDfa(f);
for (u16 i = 1; i < m->state_count; i++) {
describeNode(nfa, m, i, f);
u16 t[ALPHABET_SIZE];
next_states(nfa, i, t);
describeEdge(f, m, t, i);
}
fprintf(f, "}\n");
}
static
void dumpAccelMasks(FILE *f, const mcsheng *m, const mstate_aux *aux) {
fprintf(f, "\n");
fprintf(f, "Acceleration\n");
fprintf(f, "------------\n");
for (u16 i = 0; i < m->state_count; i++) {
if (!aux[i].accel_offset) {
continue;
}
auto accel = (const AccelAux *)((const char *)m + aux[i].accel_offset);
fprintf(f, "%05hu ", i);
dumpAccelInfo(f, *accel);
}
}
static
void describeAlphabet(FILE *f, const mcsheng *m) {
map<u8, CharReach> rev;
for (u16 i = 0; i < N_CHARS; i++) {
rev[m->remap[i]].clear();
}
for (u16 i = 0; i < N_CHARS; i++) {
rev[m->remap[i]].set(i);
}
map<u8, CharReach>::const_iterator it;
fprintf(f, "\nAlphabet\n");
for (it = rev.begin(); it != rev.end(); ++it) {
fprintf(f, "%3hhu: ", it->first);
describeClass(f, it->second, 10240, CC_OUT_TEXT);
fprintf(f, "\n");
}
fprintf(f, "\n");
}
static
void dumpCommonHeader(FILE *f, const mcsheng *m) {
fprintf(f, "report: %u, states: %u, length: %u\n", m->arb_report,
m->state_count, m->length);
fprintf(f, "astart: %hu, fstart: %hu\n", m->start_anchored,
m->start_floating);
fprintf(f, "single accept: %d, has_accel: %d\n",
!!(int)m->flags & MCSHENG_FLAG_SINGLE, m->has_accel);
fprintf(f, "sheng_end: %hu\n", m->sheng_end);
fprintf(f, "sheng_accel_limit: %hu\n", m->sheng_accel_limit);
}
static
void dump_text_16(const NFA *nfa, FILE *f) {
auto *m = (const mcsheng *)getImplNfa(nfa);
auto *aux = (const mstate_aux *)((const char *)nfa + m->aux_offset);
fprintf(f, "mcsheng 16\n");
dumpCommonHeader(f, m);
fprintf(f, "sherman_limit: %d, sherman_end: %d\n", (int)m->sherman_limit,
(int)m->sherman_end);
fprintf(f, "\n");
describeAlphabet(f, m);
dumpAccelMasks(f, m, aux);
fprintf(f, "\n");
dumpTextReverse(nfa, f);
}
static
void dump_text_8(const NFA *nfa, FILE *f) {
auto m = (const mcsheng *)getImplNfa(nfa);
auto aux = (const mstate_aux *)((const char *)nfa + m->aux_offset);
fprintf(f, "mcsheng 8\n");
dumpCommonHeader(f, m);
fprintf(f, "accel_limit: %hu, accept_limit %hu\n", m->accel_limit_8,
m->accept_limit_8);
fprintf(f, "\n");
describeAlphabet(f, m);
dumpAccelMasks(f, m, aux);
fprintf(f, "\n");
dumpTextReverse(nfa, f);
}
void nfaExecMcSheng16_dump(const NFA *nfa, const string &base) {
assert(nfa->type == MCSHENG_NFA_16);
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
dump_text_16(nfa, f);
fclose(f);
f = fopen_or_throw((base + ".dot").c_str(), "w");
dump_dot_16(nfa, f);
fclose(f);
}
void nfaExecMcSheng8_dump(const NFA *nfa, const string &base) {
assert(nfa->type == MCSHENG_NFA_8);
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
dump_text_8(nfa, f);
fclose(f);
f = fopen_or_throw((base + ".dot").c_str(), "w");
dump_dot_8(nfa, f);
fclose(f);
}
} // namespace ue2

50
src/nfa/mcsheng_dump.h Normal file
View File

@@ -0,0 +1,50 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef MCSHENG_DUMP_H
#define MCSHENG_DUMP_H
#ifdef DUMP_SUPPORT
#include "rdfa.h"
#include <cstdio>
#include <string>
struct NFA;
namespace ue2 {
void nfaExecMcSheng8_dump(const struct NFA *nfa, const std::string &base);
void nfaExecMcSheng16_dump(const struct NFA *nfa, const std::string &base);
} // namespace ue2
#endif // DUMP_SUPPORT
#endif // MCSHENG_DUMP_H

View File

@@ -0,0 +1,95 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef MCSHENG_INTERNAL_H
#define MCSHENG_INTERNAL_H
#include "nfa_internal.h"
#include "ue2common.h"
#include "util/simd_utils.h"
#define ACCEPT_FLAG 0x8000
#define ACCEL_FLAG 0x4000
#define STATE_MASK 0x3fff
#define SHERMAN_STATE 1
#define SHERMAN_TYPE_OFFSET 0
#define SHERMAN_FIXED_SIZE 32
#define SHERMAN_LEN_OFFSET 1
#define SHERMAN_DADDY_OFFSET 2
#define SHERMAN_CHARS_OFFSET 4
#define SHERMAN_STATES_OFFSET(sso_len) (4 + (sso_len))
struct report_list {
u32 count;
ReportID report[];
};
struct mstate_aux {
u32 accept;
u32 accept_eod;
u16 top;
u32 accel_offset; /* relative to start of struct mcsheng; 0 if no accel */
};
#define MCSHENG_FLAG_SINGLE 1 /**< we raise only single accept id */
struct mcsheng {
u16 state_count; /**< total number of states */
u32 length; /**< length of dfa in bytes */
u16 start_anchored; /**< anchored start state */
u16 start_floating; /**< floating start state */
u32 aux_offset; /**< offset of the aux structures relative to the start of
* the nfa structure */
u32 sherman_offset; /**< offset of array of sherman state offsets the
* state_info structures relative to the start of the
* nfa structure */
u32 sherman_end; /**< offset of the end of the state_info structures
* relative to the start of the nfa structure */
u16 sheng_end; /**< first non-sheng state */
u16 sheng_accel_limit; /**< first sheng accel state. state given in terms of
* internal sheng ids */
u16 accel_limit_8; /**< 8 bit, lowest accelerable state */
u16 accept_limit_8; /**< 8 bit, lowest accept state */
u16 sherman_limit; /**< lowest sherman state */
u8 alphaShift;
u8 flags;
u8 has_accel; /**< 1 iff there are any accel plans */
u8 remap[256]; /**< remaps characters to a smaller alphabet */
ReportID arb_report; /**< one of the accepts that this dfa may raise */
u32 accel_offset; /**< offset of the accel structures from start of NFA */
m128 sheng_masks[N_CHARS];
};
/* pext masks for the runtime to access appropriately copies of bytes 1..7
* representing the data from a u64a. */
extern const u64a mcsheng_pext_mask[8];
#endif

View File

@@ -41,6 +41,7 @@
#include "lbr.h"
#include "limex.h"
#include "mcclellan.h"
#include "mcsheng.h"
#include "mpv.h"
#include "sheng.h"
#include "tamarama.h"
@@ -73,6 +74,8 @@
DISPATCH_CASE(CASTLE_NFA, Castle, dbnt_func); \
DISPATCH_CASE(SHENG_NFA, Sheng, dbnt_func); \
DISPATCH_CASE(TAMARAMA_NFA, Tamarama, dbnt_func); \
DISPATCH_CASE(MCSHENG_NFA_8, McSheng8, dbnt_func); \
DISPATCH_CASE(MCSHENG_NFA_16, McSheng16, dbnt_func); \
default: \
assert(0); \
}

View File

@@ -30,6 +30,7 @@
#include "limex_internal.h"
#include "mcclellancompile.h"
#include "mcsheng_compile.h"
#include "shengcompile.h"
#include "nfa_internal.h"
#include "repeat_internal.h"
@@ -413,6 +414,38 @@ const nfa_dispatch_fn NFATraits<TAMARAMA_NFA>::has_repeats_other_than_firsts = d
const char *NFATraits<TAMARAMA_NFA>::name = "Tamarama";
#endif
template<> struct NFATraits<MCSHENG_NFA_8> {
UNUSED static const char *name;
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 1;
static const bool fast = true;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
};
const nfa_dispatch_fn NFATraits<MCSHENG_NFA_8>::has_accel = has_accel_mcsheng;
const nfa_dispatch_fn NFATraits<MCSHENG_NFA_8>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<MCSHENG_NFA_8>::has_repeats_other_than_firsts = dispatch_false;
#if defined(DUMP_SUPPORT)
const char *NFATraits<MCSHENG_NFA_8>::name = "Shengy McShengFace 8";
#endif
template<> struct NFATraits<MCSHENG_NFA_16> {
UNUSED static const char *name;
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 2;
static const bool fast = true;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
};
const nfa_dispatch_fn NFATraits<MCSHENG_NFA_16>::has_accel = has_accel_mcsheng;
const nfa_dispatch_fn NFATraits<MCSHENG_NFA_16>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<MCSHENG_NFA_16>::has_repeats_other_than_firsts = dispatch_false;
#if defined(DUMP_SUPPORT)
const char *NFATraits<MCSHENG_NFA_16>::name = "Shengy McShengFace 16";
#endif
} // namespace
#if defined(DUMP_SUPPORT)

View File

@@ -39,6 +39,7 @@
#include "lbr_dump.h"
#include "limex.h"
#include "mcclellandump.h"
#include "mcsheng_dump.h"
#include "mpv_dump.h"
#include "shengdump.h"
#include "tamarama_dump.h"
@@ -78,6 +79,8 @@ namespace ue2 {
DISPATCH_CASE(CASTLE_NFA, Castle, dbnt_func); \
DISPATCH_CASE(SHENG_NFA, Sheng, dbnt_func); \
DISPATCH_CASE(TAMARAMA_NFA, Tamarama, dbnt_func); \
DISPATCH_CASE(MCSHENG_NFA_8, McSheng8, dbnt_func); \
DISPATCH_CASE(MCSHENG_NFA_16, McSheng16, dbnt_func); \
default: \
assert(0); \
}

View File

@@ -70,6 +70,8 @@ enum NFAEngineType {
CASTLE_NFA, /**< magic pseudo nfa */
SHENG_NFA, /**< magic pseudo nfa */
TAMARAMA_NFA, /**< magic nfa container */
MCSHENG_NFA_8, /**< magic pseudo nfa */
MCSHENG_NFA_16, /**< magic pseudo nfa */
/** \brief bogus NFA - not used */
INVALID_NFA
};
@@ -143,6 +145,12 @@ static really_inline int isMcClellanType(u8 t) {
return t == MCCLELLAN_NFA_8 || t == MCCLELLAN_NFA_16;
}
/** \brief True if the given type (from NFA::type) is a Sheng-McClellan hybrid
* DFA. */
static really_inline int isShengMcClellanType(u8 t) {
return t == MCSHENG_NFA_8 || t == MCSHENG_NFA_16;
}
/** \brief True if the given type (from NFA::type) is a Gough DFA. */
static really_inline int isGoughType(u8 t) {
return t == GOUGH_NFA_8 || t == GOUGH_NFA_16;
@@ -158,7 +166,16 @@ static really_inline int isShengType(u8 t) {
* Sheng DFA.
*/
static really_inline int isDfaType(u8 t) {
return isMcClellanType(t) || isGoughType(t) || isShengType(t);
return isMcClellanType(t) || isGoughType(t) || isShengType(t)
|| isShengMcClellanType(t);
}
static really_inline int isBigDfaType(u8 t) {
return t == MCCLELLAN_NFA_16 || t == MCSHENG_NFA_16 || t == GOUGH_NFA_16;
}
static really_inline int isSmallDfaType(u8 t) {
return isDfaType(t) && !isBigDfaType(t);
}
/** \brief True if the given type (from NFA::type) is an NFA. */

68
src/nfa/rdfa_graph.cpp Normal file
View File

@@ -0,0 +1,68 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "rdfa_graph.h"
#include "rdfa.h"
#include "util/container.h"
#include <vector>
using namespace std;
namespace ue2 {
RdfaGraph::RdfaGraph(const raw_dfa &rdfa) {
RdfaGraph &g = *this;
vector<RdfaGraph::vertex_descriptor> verts;
verts.reserve(rdfa.states.size());
for (dstate_id_t i = 0; i < rdfa.states.size(); i++) {
verts.push_back(add_vertex(g));
assert(g[verts.back()].index == i);
}
symbol_t symbol_end = rdfa.alpha_size - 1;
flat_set<dstate_id_t> local_succs;
for (dstate_id_t i = 0; i < rdfa.states.size(); i++) {
local_succs.clear();
for (symbol_t s = 0; s < symbol_end; s++) {
dstate_id_t next = rdfa.states[i].next[s];
if (contains(local_succs, next)) {
continue;
}
DEBUG_PRINTF("%hu->%hu\n", i, next);
add_edge(verts[i], verts[next], g);
local_succs.insert(next);
}
}
}
}

54
src/nfa/rdfa_graph.h Normal file
View File

@@ -0,0 +1,54 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef RDFA_GRAPH_H
#define RDFA_GRAPH_H
#include "ue2common.h"
#include "util/ue2_graph.h"
namespace ue2 {
struct raw_dfa;
struct RdfaVertexProps {
size_t index = 0;
};
struct RdfaEdgeProps {
size_t index = 0;
};
struct RdfaGraph : public ue2_graph<RdfaGraph, RdfaVertexProps, RdfaEdgeProps> {
RdfaGraph(const raw_dfa &rdfa);
};
}
#endif

View File

@@ -447,9 +447,8 @@ void createShuffleMasks(sheng *s, dfa_info &info,
}
}
bool has_accel_sheng(const NFA *nfa) {
const sheng *s = (const sheng *)getImplNfa(nfa);
return s->flags & SHENG_FLAG_HAS_ACCEL;
bool has_accel_sheng(const NFA *) {
return true; /* consider the sheng region as accelerated */
}
aligned_unique_ptr<NFA> shengCompile(raw_dfa &raw,