introduce Sheng-McClellan hybrid

This commit is contained in:
Alex Coyte 2016-12-01 14:32:47 +11:00 committed by Matthew Barr
parent f626276271
commit e51b6d23b9
35 changed files with 3804 additions and 206 deletions

View File

@ -448,10 +448,6 @@ set (hs_exec_SRCS
src/nfa/lbr.h
src/nfa/lbr_common_impl.h
src/nfa/lbr_internal.h
src/nfa/mcclellan.c
src/nfa/mcclellan.h
src/nfa/mcclellan_common_impl.h
src/nfa/mcclellan_internal.h
src/nfa/limex_accel.c
src/nfa/limex_accel.h
src/nfa/limex_exceptional.h
@ -470,6 +466,14 @@ set (hs_exec_SRCS
src/nfa/limex_runtime_impl.h
src/nfa/limex_shuffle.h
src/nfa/limex_state_impl.h
src/nfa/mcclellan.c
src/nfa/mcclellan.h
src/nfa/mcclellan_common_impl.h
src/nfa/mcclellan_internal.h
src/nfa/mcsheng.c
src/nfa/mcsheng_data.c
src/nfa/mcsheng.h
src/nfa/mcsheng_internal.h
src/nfa/mpv.h
src/nfa/mpv.c
src/nfa/mpv_internal.h
@ -650,6 +654,8 @@ SET (hs_SRCS
src/nfa/mcclellancompile.h
src/nfa/mcclellancompile_util.cpp
src/nfa/mcclellancompile_util.h
src/nfa/mcsheng_compile.cpp
src/nfa/mcsheng_compile.h
src/nfa/limex_compile.cpp
src/nfa/limex_compile.h
src/nfa/limex_accel.h
@ -667,6 +673,8 @@ SET (hs_SRCS
src/nfa/nfa_internal.h
src/nfa/nfa_kind.h
src/nfa/rdfa.h
src/nfa/rdfa_graph.cpp
src/nfa/rdfa_graph.h
src/nfa/rdfa_merge.cpp
src/nfa/rdfa_merge.h
src/nfa/repeat_internal.h
@ -962,6 +970,8 @@ set(hs_dump_SRCS
src/nfa/limex_dump.cpp
src/nfa/mcclellandump.cpp
src/nfa/mcclellandump.h
src/nfa/mcsheng_dump.cpp
src/nfa/mcsheng_dump.h
src/nfa/mpv_dump.cpp
src/nfa/nfa_dump_api.h
src/nfa/nfa_dump_dispatch.cpp

View File

@ -51,6 +51,7 @@ Grey::Grey(void) :
allowLbr(true),
allowMcClellan(true),
allowSheng(true),
allowMcSheng(true),
allowPuff(true),
allowLiteral(true),
allowRose(true),
@ -217,6 +218,7 @@ void applyGreyOverrides(Grey *g, const string &s) {
G_UPDATE(allowLbr);
G_UPDATE(allowMcClellan);
G_UPDATE(allowSheng);
G_UPDATE(allowMcSheng);
G_UPDATE(allowPuff);
G_UPDATE(allowLiteral);
G_UPDATE(allowRose);

View File

@ -51,6 +51,7 @@ struct Grey {
bool allowLbr;
bool allowMcClellan;
bool allowSheng;
bool allowMcSheng;
bool allowPuff;
bool allowLiteral;
bool allowRose;

View File

@ -78,7 +78,7 @@ size_t accelScanWrapper(const u8 *accelTable, const union AccelAux *aux,
size_t doAccel32(u32 s, u32 accel, const u8 *accelTable,
const union AccelAux *aux, const u8 *input, size_t i,
size_t end) {
u32 idx = packedExtract32(s, accel);
u32 idx = pext32(s, accel);
return accelScanWrapper(accelTable, aux, input, idx, i, end);
}
@ -86,14 +86,14 @@ size_t doAccel32(u32 s, u32 accel, const u8 *accelTable,
size_t doAccel64(u64a s, u64a accel, const u8 *accelTable,
const union AccelAux *aux, const u8 *input, size_t i,
size_t end) {
u32 idx = packedExtract64(s, accel);
u32 idx = pext64(s, accel);
return accelScanWrapper(accelTable, aux, input, idx, i, end);
}
#else
size_t doAccel64(m128 s, m128 accel, const u8 *accelTable,
const union AccelAux *aux, const u8 *input, size_t i,
size_t end) {
u32 idx = packedExtract64(movq(s), movq(accel));
u32 idx = pext64(movq(s), movq(accel));
return accelScanWrapper(accelTable, aux, input, idx, i, end);
}
#endif

View File

@ -41,52 +41,6 @@
#include "util/bitutils.h"
#include "util/simd_utils.h"
#if defined(__BMI2__) || (defined(_WIN32) && defined(__AVX2__))
#define HAVE_PEXT
#endif
static really_inline
u32 packedExtract32(u32 x, u32 mask) {
#if defined(HAVE_PEXT)
// Intel BMI2 can do this operation in one instruction.
return _pext_u32(x, mask);
#else
u32 result = 0, num = 1;
while (mask != 0) {
u32 bit = findAndClearLSB_32(&mask);
if (x & (1U << bit)) {
assert(num != 0); // more than 32 bits!
result |= num;
}
num <<= 1;
}
return result;
#endif
}
static really_inline
u32 packedExtract64(u64a x, u64a mask) {
#if defined(HAVE_PEXT) && defined(ARCH_64_BIT)
// Intel BMI2 can do this operation in one instruction.
return _pext_u64(x, mask);
#else
u32 result = 0, num = 1;
while (mask != 0) {
u32 bit = findAndClearLSB_64(&mask);
if (x & (1ULL << bit)) {
assert(num != 0); // more than 32 bits!
result |= num;
}
num <<= 1;
}
return result;
#endif
}
#undef HAVE_PEXT
static really_inline
u32 packedExtract128(m128 s, const m128 permute, const m128 compare) {
m128 shuffled = pshufb(s, permute);

View File

@ -175,7 +175,7 @@ char mcclellanExec16_i(const struct mcclellan *m, u32 *state, const u8 *buf,
if (mode == STOP_AT_MATCH) {
*c_final = buf;
}
return MO_CONTINUE_MATCHING;
return MO_ALIVE;
}
u32 s = *state;
@ -213,7 +213,7 @@ without_accel:
if (mode == STOP_AT_MATCH) {
*state = s & STATE_MASK;
*c_final = c - 1;
return MO_CONTINUE_MATCHING;
return MO_MATCHES_PENDING;
}
u64a loc = (c - 1) - buf + offAdj + 1;
@ -221,12 +221,12 @@ without_accel:
if (single) {
DEBUG_PRINTF("reporting %u\n", m->arb_report);
if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING; /* termination requested */
return MO_DEAD; /* termination requested */
}
} else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0,
&cached_accept_state, &cached_accept_id)
== MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
return MO_DEAD;
}
}
@ -265,7 +265,7 @@ with_accel:
if (mode == STOP_AT_MATCH) {
*state = s & STATE_MASK;
*c_final = c - 1;
return MO_CONTINUE_MATCHING;
return MO_MATCHES_PENDING;
}
u64a loc = (c - 1) - buf + offAdj + 1;
@ -273,12 +273,12 @@ with_accel:
if (single) {
DEBUG_PRINTF("reporting %u\n", m->arb_report);
if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING; /* termination requested */
return MO_DEAD; /* termination requested */
}
} else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0,
&cached_accept_state, &cached_accept_id)
== MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
return MO_DEAD;
}
}
@ -293,7 +293,7 @@ exit:
}
*state = s;
return MO_CONTINUE_MATCHING;
return MO_ALIVE;
}
static never_inline
@ -376,7 +376,7 @@ char mcclellanExec8_i(const struct mcclellan *m, u32 *state, const u8 *buf,
char single, const u8 **c_final, enum MatchMode mode) {
if (!len) {
*c_final = buf;
return MO_CONTINUE_MATCHING;
return MO_ALIVE;
}
u32 s = *state;
const u8 *c = buf;
@ -390,8 +390,7 @@ char mcclellanExec8_i(const struct mcclellan *m, u32 *state, const u8 *buf,
u32 cached_accept_id = 0;
u32 cached_accept_state = 0;
DEBUG_PRINTF("accel %hu, accept %hu\n",
m->accel_limit_8, m->accept_limit_8);
DEBUG_PRINTF("accel %hu, accept %u\n", m->accel_limit_8, accept_limit);
DEBUG_PRINTF("s: %u, len %zu\n", s, len);
@ -417,19 +416,19 @@ without_accel:
DEBUG_PRINTF("match - pausing\n");
*state = s;
*c_final = c - 1;
return MO_CONTINUE_MATCHING;
return MO_MATCHES_PENDING;
}
u64a loc = (c - 1) - buf + offAdj + 1;
if (single) {
DEBUG_PRINTF("reporting %u\n", m->arb_report);
if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
return MO_DEAD;
}
} else if (doComplexReport(cb, ctxt, m, s, loc, 0,
&cached_accept_state, &cached_accept_id)
== MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
return MO_DEAD;
}
}
@ -464,19 +463,19 @@ with_accel:
DEBUG_PRINTF("match - pausing\n");
*state = s;
*c_final = c - 1;
return MO_CONTINUE_MATCHING;
return MO_MATCHES_PENDING;
}
u64a loc = (c - 1) - buf + offAdj + 1;
if (single) {
DEBUG_PRINTF("reporting %u\n", m->arb_report);
if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
return MO_DEAD;
}
} else if (doComplexReport(cb, ctxt, m, s, loc, 0,
&cached_accept_state, &cached_accept_id)
== MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
return MO_DEAD;
}
}
@ -488,7 +487,7 @@ exit:
if (mode == STOP_AT_MATCH) {
*c_final = c_end;
}
return MO_CONTINUE_MATCHING;
return MO_ALIVE;
}
static never_inline
@ -576,7 +575,7 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
q->report_current = 0;
if (rv == MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
return MO_DEAD;
}
}
@ -611,17 +610,20 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
/* do main buffer region */
const u8 *final_look;
if (mcclellanExec16_i_ni(m, &s, cur_buf + sp, local_ep - sp,
offset + sp, cb, context, single, &final_look,
mode)
== MO_HALT_MATCHING) {
char rv = mcclellanExec16_i_ni(m, &s, cur_buf + sp, local_ep - sp,
offset + sp, cb, context, single,
&final_look, mode);
if (rv == MO_DEAD) {
*(u16 *)q->state = 0;
return 0;
return MO_DEAD;
}
if (mode == STOP_AT_MATCH && final_look != cur_buf + local_ep) {
if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) {
DEBUG_PRINTF("this is as far as we go\n");
assert(q->cur);
DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf);
assert(q->cur);
assert(final_look != cur_buf + local_ep);
q->cur--;
q->items[q->cur].type = MQE_START;
q->items[q->cur].location = final_look - cur_buf + 1; /* due to
@ -630,6 +632,7 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
return MO_MATCHES_PENDING;
}
assert(rv == MO_ALIVE);
assert(q->cur);
if (mode != NO_MATCHES && q->items[q->cur].location > end) {
DEBUG_PRINTF("this is as far as we go\n");
@ -662,7 +665,7 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
case MQE_END:
*(u16 *)q->state = s;
q->cur++;
return s ? MO_ALIVE : 0;
return s ? MO_ALIVE : MO_DEAD;
default:
assert(!"invalid queue event");
}
@ -681,8 +684,8 @@ char nfaExecMcClellan16_Bi(const struct NFA *n, u64a offset, const u8 *buffer,
if (mcclellanExec16_i(m, &s, buffer, length, offset, cb, context, single,
NULL, CALLBACK_OUTPUT)
== MO_HALT_MATCHING) {
return 0;
== MO_DEAD) {
return s ? MO_ALIVE : MO_DEAD;
}
const struct mstate_aux *aux = get_aux(m, s);
@ -691,7 +694,7 @@ char nfaExecMcClellan16_Bi(const struct NFA *n, u64a offset, const u8 *buffer,
doComplexReport(cb, context, m, s, offset + length, 1, NULL, NULL);
}
return !!s;
return MO_ALIVE;
}
static really_inline
@ -724,7 +727,7 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
q->report_current = 0;
if (rv == MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
return MO_DEAD;
}
}
@ -760,16 +763,20 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
}
const u8 *final_look;
if (mcclellanExec8_i_ni(m, &s, cur_buf + sp, local_ep - sp, offset + sp,
cb, context, single, &final_look, mode)
== MO_HALT_MATCHING) {
char rv = mcclellanExec8_i_ni(m, &s, cur_buf + sp, local_ep - sp,
offset + sp, cb, context, single,
&final_look, mode);
if (rv == MO_HALT_MATCHING) {
*(u8 *)q->state = 0;
return 0;
return MO_DEAD;
}
if (mode == STOP_AT_MATCH && final_look != cur_buf + local_ep) {
/* found a match */
DEBUG_PRINTF("found a match\n");
if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) {
DEBUG_PRINTF("this is as far as we go\n");
DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf);
assert(q->cur);
assert(final_look != cur_buf + local_ep);
q->cur--;
q->items[q->cur].type = MQE_START;
q->items[q->cur].location = final_look - cur_buf + 1; /* due to
@ -778,6 +785,7 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
return MO_MATCHES_PENDING;
}
assert(rv == MO_ALIVE);
assert(q->cur);
if (mode != NO_MATCHES && q->items[q->cur].location > end) {
DEBUG_PRINTF("this is as far as we go\n");
@ -811,7 +819,7 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
case MQE_END:
*(u8 *)q->state = s;
q->cur++;
return s ? MO_ALIVE : 0;
return s ? MO_ALIVE : MO_DEAD;
default:
assert(!"invalid queue event");
}
@ -830,8 +838,8 @@ char nfaExecMcClellan8_Bi(const struct NFA *n, u64a offset, const u8 *buffer,
if (mcclellanExec8_i(m, &s, buffer, length, offset, cb, context, single,
NULL, CALLBACK_OUTPUT)
== MO_HALT_MATCHING) {
return 0;
== MO_DEAD) {
return MO_DEAD;
}
const struct mstate_aux *aux = get_aux(m, s);
@ -840,7 +848,7 @@ char nfaExecMcClellan8_Bi(const struct NFA *n, u64a offset, const u8 *buffer,
doComplexReport(cb, context, m, s, offset + length, 1, NULL, NULL);
}
return s;
return s ? MO_ALIVE : MO_DEAD;
}
char nfaExecMcClellan8_B(const struct NFA *n, u64a offset, const u8 *buffer,

View File

@ -71,17 +71,17 @@ struct mcclellan {
u16 start_floating; /**< floating start state */
u32 aux_offset; /**< offset of the aux structures relative to the start of
* the nfa structure */
u32 sherman_offset; /**< offset of to array of sherman state offsets
* the state_info structures relative to the start of the
* nfa structure */
u32 sherman_end; /**< offset of the end of the state_info structures relative
* to the start of the nfa structure */
u32 sherman_offset; /**< offset of array of sherman state offsets the
* state_info structures relative to the start of the
* nfa structure */
u32 sherman_end; /**< offset of the end of the state_info structures
* relative to the start of the nfa structure */
u16 accel_limit_8; /**< 8 bit, lowest accelerable state */
u16 accept_limit_8; /**< 8 bit, lowest accept state */
u16 sherman_limit; /**< lowest sherman state */
u8 alphaShift;
u8 flags;
u8 has_accel; /**< 1 iff there are any accel planes */
u8 has_accel; /**< 1 iff there are any accel plans */
u8 remap[256]; /**< remaps characters to a smaller alphabet */
ReportID arb_report; /**< one of the accepts that this dfa may raise */
u32 accel_offset; /**< offset of the accel structures from start of NFA */

View File

@ -415,9 +415,9 @@ void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info,
: info.raw.start_floating);
}
/* returns non-zero on error */
/* returns false on error */
static
int allocateFSN16(dfa_info &info, dstate_id_t *sherman_base) {
bool allocateFSN16(dfa_info &info, dstate_id_t *sherman_base) {
info.states[0].impl_id = 0; /* dead is always 0 */
vector<dstate_id_t> norm;
@ -426,7 +426,7 @@ int allocateFSN16(dfa_info &info, dstate_id_t *sherman_base) {
if (info.size() > (1 << 16)) {
DEBUG_PRINTF("too many states\n");
*sherman_base = 0;
return 1;
return false;
}
for (u32 i = 1; i < info.size(); i++) {
@ -452,7 +452,7 @@ int allocateFSN16(dfa_info &info, dstate_id_t *sherman_base) {
/* Check to see if we haven't over allocated our states */
DEBUG_PRINTF("next sherman %u masked %u\n", next_sherman,
(dstate_id_t)(next_sherman & STATE_MASK));
return (next_sherman - 1) != ((next_sherman - 1) & STATE_MASK);
return (next_sherman - 1) == ((next_sherman - 1) & STATE_MASK);
}
static
@ -470,7 +470,7 @@ aligned_unique_ptr<NFA> mcclellanCompile16(dfa_info &info,
assert(alphaShift <= 8);
u16 count_real_states;
if (allocateFSN16(info, &count_real_states)) {
if (!allocateFSN16(info, &count_real_states)) {
DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n",
info.size());
return nullptr;

View File

@ -32,9 +32,7 @@
#include "accel_dfa_build_strat.h"
#include "rdfa.h"
#include "ue2common.h"
#include "util/accel_scheme.h"
#include "util/alloc.h"
#include "util/charreach.h"
#include "util/ue2_containers.h"
#include <memory>

1406
src/nfa/mcsheng.c Normal file

File diff suppressed because it is too large Load Diff

84
src/nfa/mcsheng.h Normal file
View File

@ -0,0 +1,84 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef MCSHENG_H
#define MCSHENG_H
#include "callback.h"
#include "ue2common.h"
struct mq;
struct NFA;
/* 8-bit Sheng-McClellan hybrid */
char nfaExecMcSheng8_testEOD(const struct NFA *nfa, const char *state,
const char *streamState, u64a offset,
NfaCallback callback, void *context);
char nfaExecMcSheng8_Q(const struct NFA *n, struct mq *q, s64a end);
char nfaExecMcSheng8_Q2(const struct NFA *n, struct mq *q, s64a end);
char nfaExecMcSheng8_QR(const struct NFA *n, struct mq *q, ReportID report);
char nfaExecMcSheng8_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecMcSheng8_inAccept(const struct NFA *n, ReportID report,
struct mq *q);
char nfaExecMcSheng8_inAnyAccept(const struct NFA *n, struct mq *q);
char nfaExecMcSheng8_queueInitState(const struct NFA *n, struct mq *q);
char nfaExecMcSheng8_initCompressedState(const struct NFA *n, u64a offset,
void *state, u8 key);
char nfaExecMcSheng8_queueCompressState(const struct NFA *nfa,
const struct mq *q, s64a loc);
char nfaExecMcSheng8_expandState(const struct NFA *nfa, void *dest,
const void *src, u64a offset, u8 key);
#define nfaExecMcSheng8_B_Reverse NFA_API_NO_IMPL
#define nfaExecMcSheng8_zombie_status NFA_API_ZOMBIE_NO_IMPL
/* 16-bit Sheng-McClellan hybrid */
char nfaExecMcSheng16_testEOD(const struct NFA *nfa, const char *state,
const char *streamState, u64a offset,
NfaCallback callback, void *context);
char nfaExecMcSheng16_Q(const struct NFA *n, struct mq *q, s64a end);
char nfaExecMcSheng16_Q2(const struct NFA *n, struct mq *q, s64a end);
char nfaExecMcSheng16_QR(const struct NFA *n, struct mq *q, ReportID report);
char nfaExecMcSheng16_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecMcSheng16_inAccept(const struct NFA *n, ReportID report,
struct mq *q);
char nfaExecMcSheng16_inAnyAccept(const struct NFA *n, struct mq *q);
char nfaExecMcSheng16_queueInitState(const struct NFA *n, struct mq *q);
char nfaExecMcSheng16_initCompressedState(const struct NFA *n, u64a offset,
void *state, u8 key);
char nfaExecMcSheng16_queueCompressState(const struct NFA *nfa,
const struct mq *q, s64a loc);
char nfaExecMcSheng16_expandState(const struct NFA *nfa, void *dest,
const void *src, u64a offset, u8 key);
#define nfaExecMcSheng16_B_Reverse NFA_API_NO_IMPL
#define nfaExecMcSheng16_zombie_status NFA_API_ZOMBIE_NO_IMPL
#endif

1144
src/nfa/mcsheng_compile.cpp Normal file

File diff suppressed because it is too large Load Diff

59
src/nfa/mcsheng_compile.h Normal file
View File

@ -0,0 +1,59 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef MCSHENGCOMPILE_H
#define MCSHENGCOMPILE_H
#include "accel_dfa_build_strat.h"
#include "rdfa.h"
#include "ue2common.h"
#include "util/alloc.h"
#include "util/ue2_containers.h"
#include <memory>
#include <set>
struct NFA;
namespace ue2 {
class ReportManager;
struct CompileContext;
/* accel_states: (optional) on success, is filled with the set of accelerable
* states */
ue2::aligned_unique_ptr<NFA>
mcshengCompile(raw_dfa &raw, const CompileContext &cc,
const ReportManager &rm,
std::set<dstate_id_t> *accel_states = nullptr);
bool has_accel_mcsheng(const NFA *nfa);
} // namespace ue2
#endif

43
src/nfa/mcsheng_data.c Normal file
View File

@ -0,0 +1,43 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "mcsheng_internal.h"
/* This table is in a separate translation unit from mcsheng.c as we want to
* prevent the compiler from seeing these constants. We have the load resources
* free at runtime to load the masks with no problems. */
const u64a mcsheng_pext_mask[8] = {
0, /* dummy */
0x000000000000ff0f,
0x0000000000ff000f,
0x00000000ff00000f,
0x000000ff0000000f,
0x0000ff000000000f,
0x00ff00000000000f,
0xff0000000000000f,
};

415
src/nfa/mcsheng_dump.cpp Normal file
View File

@ -0,0 +1,415 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "mcsheng_dump.h"
#include "accel.h"
#include "accel_dump.h"
#include "nfa_dump_internal.h"
#include "nfa_internal.h"
#include "mcsheng_internal.h"
#include "rdfa.h"
#include "ue2common.h"
#include "util/charreach.h"
#include "util/dump_charclass.h"
#include "util/dump_util.h"
#include "util/unaligned.h"
#include <cctype>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <map>
#ifndef DUMP_SUPPORT
#error No dump support!
#endif
using namespace std;
namespace ue2 {
static
const mstate_aux *getAux(const NFA *n, dstate_id_t i) {
auto *m = (const mcsheng *)getImplNfa(n);
auto *aux_base = (const mstate_aux *)((const char *)n + m->aux_offset);
const mstate_aux *aux = aux_base + i;
assert((const char *)aux < (const char *)n + m->length);
return aux;
}
static
void next_states(const NFA *n, u16 s, u16 *t) {
const mcsheng *m = (const mcsheng *)getImplNfa(n);
const mstate_aux *aux = getAux(n, s);
const u32 as = m->alphaShift;
assert(s != DEAD_STATE);
if (s < m->sheng_end) {
for (u16 c = 0; c < N_CHARS; c++) {
u8 sheng_s = s - 1;
auto trans_for_c = (const char *)&m->sheng_masks[c];
assert(sheng_s < sizeof(m128));
u8 raw_succ = trans_for_c[sheng_s];
if (raw_succ == m->sheng_end - 1) {
t[c] = DEAD_STATE;
} else if (raw_succ < m->sheng_end) {
t[c] = raw_succ + 1;
} else {
t[c] = raw_succ;
}
}
} else if (n->type == MCSHENG_NFA_8) {
const u8 *succ_table = (const u8 *)((const char *)m + sizeof(mcsheng));
for (u16 c = 0; c < N_CHARS; c++) {
u32 normal_id = s - m->sheng_end;
t[c] = succ_table[(normal_id << as) + m->remap[c]];
}
} else {
u16 base_s = s;
const char *winfo_base = (const char *)n + m->sherman_offset;
const char *state_base
= winfo_base + SHERMAN_FIXED_SIZE * (s - m->sherman_limit);
if (s >= m->sherman_limit) {
base_s = unaligned_load_u16(state_base + SHERMAN_DADDY_OFFSET);
assert(base_s >= m->sheng_end);
}
const u16 *succ_table = (const u16 *)((const char *)m
+ sizeof(mcsheng));
for (u16 c = 0; c < N_CHARS; c++) {
u32 normal_id = base_s - m->sheng_end;
t[c] = succ_table[(normal_id << as) + m->remap[c]];
}
if (s >= m->sherman_limit) {
UNUSED char type = *(state_base + SHERMAN_TYPE_OFFSET);
assert(type == SHERMAN_STATE);
u8 len = *(const u8 *)(SHERMAN_LEN_OFFSET + state_base);
const char *chars = state_base + SHERMAN_CHARS_OFFSET;
const u16 *states = (const u16 *)(state_base
+ SHERMAN_STATES_OFFSET(len));
for (u8 i = 0; i < len; i++) {
for (u16 c = 0; c < N_CHARS; c++) {
if (m->remap[c] == chars[i]) {
t[c] = unaligned_load_u16((const u8*)&states[i]);
}
}
}
}
for (u16 c = 0; c < N_CHARS; c++) {
t[c] &= STATE_MASK;
}
}
t[TOP] = aux->top & STATE_MASK;
}
static
void describeEdge(FILE *f, const mcsheng *m, const u16 *t, u16 i) {
for (u16 s = 0; s < N_CHARS; s++) {
if (!t[s]) {
continue;
}
u16 ss;
for (ss = 0; ss < s; ss++) {
if (t[s] == t[ss]) {
break;
}
}
if (ss != s) {
continue;
}
CharReach reach;
for (ss = s; ss < 256; ss++) {
if (t[s] == t[ss]) {
reach.set(ss);
}
}
fprintf(f, "%u -> %u [ ", i, t[s]);
if (i < m->sheng_end && t[s] < m->sheng_end) {
fprintf(f, "color = red, fontcolor = red ");
}
fprintf(f, "label = \"");
describeClass(f, reach, 5, CC_OUT_DOT);
fprintf(f, "\" ];\n");
}
}
static
void dumpAccelDot(FILE *f, u16 i, const union AccelAux *accel) {
switch(accel->accel_type) {
case ACCEL_NONE:
break;
case ACCEL_VERM:
case ACCEL_VERM_NOCASE:
case ACCEL_DVERM:
case ACCEL_DVERM_NOCASE:
fprintf(f, "%u [ color = forestgreen style=diagonals];\n", i);
break;
case ACCEL_SHUFTI:
case ACCEL_DSHUFTI:
case ACCEL_TRUFFLE:
fprintf(f, "%u [ color = darkgreen style=diagonals ];\n", i);
break;
default:
fprintf(f, "%u [ color = yellow style=diagonals ];\n", i);
break;
}
}
static
void describeNode(const NFA *n, const mcsheng *m, u16 i, FILE *f) {
const mstate_aux *aux = getAux(n, i);
bool isSherman = m->sherman_limit && i >= m->sherman_limit;
fprintf(f, "%u [ width = 1, fixedsize = true, fontsize = 12, "
"label = \"%u%s\" ]; \n", i, i, isSherman ? "w":"");
if (aux->accel_offset) {
dumpAccelDot(f, i, (const union AccelAux *)
((const char *)m + aux->accel_offset));
}
if (i && i < m->sheng_end) {
fprintf(f, "%u [color = red, fontcolor = red]; \n", i);
}
if (aux->accept_eod) {
fprintf(f, "%u [ color = darkorchid ];\n", i);
}
if (aux->accept) {
fprintf(f, "%u [ shape = doublecircle ];\n", i);
}
if (aux->top && aux->top != i) {
fprintf(f, "%u -> %u [color = darkgoldenrod weight=0.1 ]\n", i,
aux->top);
}
if (i == m->start_anchored) {
fprintf(f, "STARTA -> %u [color = blue ]\n", i);
}
if (i == m->start_floating) {
fprintf(f, "STARTF -> %u [color = red ]\n", i);
}
if (isSherman) {
const char *winfo_base = (const char *)n + m->sherman_offset;
const char *state_base
= winfo_base + SHERMAN_FIXED_SIZE * (i - m->sherman_limit);
assert(state_base < (const char *)m + m->length - sizeof(NFA));
UNUSED u8 type = *(const u8 *)(state_base + SHERMAN_TYPE_OFFSET);
assert(type == SHERMAN_STATE);
fprintf(f, "%u [ fillcolor = lightblue style=filled ];\n", i);
u16 daddy = *(const u16 *)(state_base + SHERMAN_DADDY_OFFSET);
if (daddy) {
fprintf(f, "%u -> %u [ color=royalblue style=dashed weight=0.1]\n",
i, daddy);
}
}
if (i && i < m->sheng_end) {
fprintf(f, "subgraph cluster_sheng { %u } \n", i);
}
}
static
void dumpDotPreambleDfa(FILE *f) {
dumpDotPreamble(f);
// DFA specific additions.
fprintf(f, "STARTF [style=invis];\n");
fprintf(f, "STARTA [style=invis];\n");
fprintf(f, "0 [style=invis];\n");
fprintf(f, "subgraph cluster_sheng { style = dashed }\n");
}
static
void dump_dot_16(const NFA *nfa, FILE *f) {
auto *m = (const mcsheng *)getImplNfa(nfa);
dumpDotPreambleDfa(f);
for (u16 i = 1; i < m->state_count; i++) {
describeNode(nfa, m, i, f);
u16 t[ALPHABET_SIZE];
next_states(nfa, i, t);
describeEdge(f, m, t, i);
}
fprintf(f, "}\n");
}
static
void dump_dot_8(const NFA *nfa, FILE *f) {
auto m = (const mcsheng *)getImplNfa(nfa);
dumpDotPreambleDfa(f);
for (u16 i = 1; i < m->state_count; i++) {
describeNode(nfa, m, i, f);
u16 t[ALPHABET_SIZE];
next_states(nfa, i, t);
describeEdge(f, m, t, i);
}
fprintf(f, "}\n");
}
static
void dumpAccelMasks(FILE *f, const mcsheng *m, const mstate_aux *aux) {
fprintf(f, "\n");
fprintf(f, "Acceleration\n");
fprintf(f, "------------\n");
for (u16 i = 0; i < m->state_count; i++) {
if (!aux[i].accel_offset) {
continue;
}
auto accel = (const AccelAux *)((const char *)m + aux[i].accel_offset);
fprintf(f, "%05hu ", i);
dumpAccelInfo(f, *accel);
}
}
static
void describeAlphabet(FILE *f, const mcsheng *m) {
map<u8, CharReach> rev;
for (u16 i = 0; i < N_CHARS; i++) {
rev[m->remap[i]].clear();
}
for (u16 i = 0; i < N_CHARS; i++) {
rev[m->remap[i]].set(i);
}
map<u8, CharReach>::const_iterator it;
fprintf(f, "\nAlphabet\n");
for (it = rev.begin(); it != rev.end(); ++it) {
fprintf(f, "%3hhu: ", it->first);
describeClass(f, it->second, 10240, CC_OUT_TEXT);
fprintf(f, "\n");
}
fprintf(f, "\n");
}
static
void dumpCommonHeader(FILE *f, const mcsheng *m) {
fprintf(f, "report: %u, states: %u, length: %u\n", m->arb_report,
m->state_count, m->length);
fprintf(f, "astart: %hu, fstart: %hu\n", m->start_anchored,
m->start_floating);
fprintf(f, "single accept: %d, has_accel: %d\n",
!!(int)m->flags & MCSHENG_FLAG_SINGLE, m->has_accel);
fprintf(f, "sheng_end: %hu\n", m->sheng_end);
fprintf(f, "sheng_accel_limit: %hu\n", m->sheng_accel_limit);
}
static
void dump_text_16(const NFA *nfa, FILE *f) {
auto *m = (const mcsheng *)getImplNfa(nfa);
auto *aux = (const mstate_aux *)((const char *)nfa + m->aux_offset);
fprintf(f, "mcsheng 16\n");
dumpCommonHeader(f, m);
fprintf(f, "sherman_limit: %d, sherman_end: %d\n", (int)m->sherman_limit,
(int)m->sherman_end);
fprintf(f, "\n");
describeAlphabet(f, m);
dumpAccelMasks(f, m, aux);
fprintf(f, "\n");
dumpTextReverse(nfa, f);
}
static
void dump_text_8(const NFA *nfa, FILE *f) {
auto m = (const mcsheng *)getImplNfa(nfa);
auto aux = (const mstate_aux *)((const char *)nfa + m->aux_offset);
fprintf(f, "mcsheng 8\n");
dumpCommonHeader(f, m);
fprintf(f, "accel_limit: %hu, accept_limit %hu\n", m->accel_limit_8,
m->accept_limit_8);
fprintf(f, "\n");
describeAlphabet(f, m);
dumpAccelMasks(f, m, aux);
fprintf(f, "\n");
dumpTextReverse(nfa, f);
}
void nfaExecMcSheng16_dump(const NFA *nfa, const string &base) {
assert(nfa->type == MCSHENG_NFA_16);
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
dump_text_16(nfa, f);
fclose(f);
f = fopen_or_throw((base + ".dot").c_str(), "w");
dump_dot_16(nfa, f);
fclose(f);
}
void nfaExecMcSheng8_dump(const NFA *nfa, const string &base) {
assert(nfa->type == MCSHENG_NFA_8);
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
dump_text_8(nfa, f);
fclose(f);
f = fopen_or_throw((base + ".dot").c_str(), "w");
dump_dot_8(nfa, f);
fclose(f);
}
} // namespace ue2

50
src/nfa/mcsheng_dump.h Normal file
View File

@ -0,0 +1,50 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef MCSHENG_DUMP_H
#define MCSHENG_DUMP_H
#ifdef DUMP_SUPPORT
#include "rdfa.h"
#include <cstdio>
#include <string>
struct NFA;
namespace ue2 {
void nfaExecMcSheng8_dump(const struct NFA *nfa, const std::string &base);
void nfaExecMcSheng16_dump(const struct NFA *nfa, const std::string &base);
} // namespace ue2
#endif // DUMP_SUPPORT
#endif // MCSHENG_DUMP_H

View File

@ -0,0 +1,95 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef MCSHENG_INTERNAL_H
#define MCSHENG_INTERNAL_H
#include "nfa_internal.h"
#include "ue2common.h"
#include "util/simd_utils.h"
#define ACCEPT_FLAG 0x8000
#define ACCEL_FLAG 0x4000
#define STATE_MASK 0x3fff
#define SHERMAN_STATE 1
#define SHERMAN_TYPE_OFFSET 0
#define SHERMAN_FIXED_SIZE 32
#define SHERMAN_LEN_OFFSET 1
#define SHERMAN_DADDY_OFFSET 2
#define SHERMAN_CHARS_OFFSET 4
#define SHERMAN_STATES_OFFSET(sso_len) (4 + (sso_len))
struct report_list {
u32 count;
ReportID report[];
};
struct mstate_aux {
u32 accept;
u32 accept_eod;
u16 top;
u32 accel_offset; /* relative to start of struct mcsheng; 0 if no accel */
};
#define MCSHENG_FLAG_SINGLE 1 /**< we raise only single accept id */
struct mcsheng {
u16 state_count; /**< total number of states */
u32 length; /**< length of dfa in bytes */
u16 start_anchored; /**< anchored start state */
u16 start_floating; /**< floating start state */
u32 aux_offset; /**< offset of the aux structures relative to the start of
* the nfa structure */
u32 sherman_offset; /**< offset of array of sherman state offsets the
* state_info structures relative to the start of the
* nfa structure */
u32 sherman_end; /**< offset of the end of the state_info structures
* relative to the start of the nfa structure */
u16 sheng_end; /**< first non-sheng state */
u16 sheng_accel_limit; /**< first sheng accel state. state given in terms of
* internal sheng ids */
u16 accel_limit_8; /**< 8 bit, lowest accelerable state */
u16 accept_limit_8; /**< 8 bit, lowest accept state */
u16 sherman_limit; /**< lowest sherman state */
u8 alphaShift;
u8 flags;
u8 has_accel; /**< 1 iff there are any accel plans */
u8 remap[256]; /**< remaps characters to a smaller alphabet */
ReportID arb_report; /**< one of the accepts that this dfa may raise */
u32 accel_offset; /**< offset of the accel structures from start of NFA */
m128 sheng_masks[N_CHARS];
};
/* pext masks for the runtime to access appropriately copies of bytes 1..7
* representing the data from a u64a. */
extern const u64a mcsheng_pext_mask[8];
#endif

View File

@ -41,6 +41,7 @@
#include "lbr.h"
#include "limex.h"
#include "mcclellan.h"
#include "mcsheng.h"
#include "mpv.h"
#include "sheng.h"
#include "tamarama.h"
@ -73,6 +74,8 @@
DISPATCH_CASE(CASTLE_NFA, Castle, dbnt_func); \
DISPATCH_CASE(SHENG_NFA, Sheng, dbnt_func); \
DISPATCH_CASE(TAMARAMA_NFA, Tamarama, dbnt_func); \
DISPATCH_CASE(MCSHENG_NFA_8, McSheng8, dbnt_func); \
DISPATCH_CASE(MCSHENG_NFA_16, McSheng16, dbnt_func); \
default: \
assert(0); \
}

View File

@ -30,6 +30,7 @@
#include "limex_internal.h"
#include "mcclellancompile.h"
#include "mcsheng_compile.h"
#include "shengcompile.h"
#include "nfa_internal.h"
#include "repeat_internal.h"
@ -413,6 +414,38 @@ const nfa_dispatch_fn NFATraits<TAMARAMA_NFA>::has_repeats_other_than_firsts = d
const char *NFATraits<TAMARAMA_NFA>::name = "Tamarama";
#endif
template<> struct NFATraits<MCSHENG_NFA_8> {
UNUSED static const char *name;
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 1;
static const bool fast = true;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
};
const nfa_dispatch_fn NFATraits<MCSHENG_NFA_8>::has_accel = has_accel_mcsheng;
const nfa_dispatch_fn NFATraits<MCSHENG_NFA_8>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<MCSHENG_NFA_8>::has_repeats_other_than_firsts = dispatch_false;
#if defined(DUMP_SUPPORT)
const char *NFATraits<MCSHENG_NFA_8>::name = "Shengy McShengFace 8";
#endif
template<> struct NFATraits<MCSHENG_NFA_16> {
UNUSED static const char *name;
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 2;
static const bool fast = true;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
};
const nfa_dispatch_fn NFATraits<MCSHENG_NFA_16>::has_accel = has_accel_mcsheng;
const nfa_dispatch_fn NFATraits<MCSHENG_NFA_16>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<MCSHENG_NFA_16>::has_repeats_other_than_firsts = dispatch_false;
#if defined(DUMP_SUPPORT)
const char *NFATraits<MCSHENG_NFA_16>::name = "Shengy McShengFace 16";
#endif
} // namespace
#if defined(DUMP_SUPPORT)

View File

@ -39,6 +39,7 @@
#include "lbr_dump.h"
#include "limex.h"
#include "mcclellandump.h"
#include "mcsheng_dump.h"
#include "mpv_dump.h"
#include "shengdump.h"
#include "tamarama_dump.h"
@ -78,6 +79,8 @@ namespace ue2 {
DISPATCH_CASE(CASTLE_NFA, Castle, dbnt_func); \
DISPATCH_CASE(SHENG_NFA, Sheng, dbnt_func); \
DISPATCH_CASE(TAMARAMA_NFA, Tamarama, dbnt_func); \
DISPATCH_CASE(MCSHENG_NFA_8, McSheng8, dbnt_func); \
DISPATCH_CASE(MCSHENG_NFA_16, McSheng16, dbnt_func); \
default: \
assert(0); \
}

View File

@ -70,6 +70,8 @@ enum NFAEngineType {
CASTLE_NFA, /**< magic pseudo nfa */
SHENG_NFA, /**< magic pseudo nfa */
TAMARAMA_NFA, /**< magic nfa container */
MCSHENG_NFA_8, /**< magic pseudo nfa */
MCSHENG_NFA_16, /**< magic pseudo nfa */
/** \brief bogus NFA - not used */
INVALID_NFA
};
@ -143,6 +145,12 @@ static really_inline int isMcClellanType(u8 t) {
return t == MCCLELLAN_NFA_8 || t == MCCLELLAN_NFA_16;
}
/** \brief True if the given type (from NFA::type) is a Sheng-McClellan hybrid
* DFA. */
static really_inline int isShengMcClellanType(u8 t) {
return t == MCSHENG_NFA_8 || t == MCSHENG_NFA_16;
}
/** \brief True if the given type (from NFA::type) is a Gough DFA. */
static really_inline int isGoughType(u8 t) {
return t == GOUGH_NFA_8 || t == GOUGH_NFA_16;
@ -158,7 +166,16 @@ static really_inline int isShengType(u8 t) {
* Sheng DFA.
*/
static really_inline int isDfaType(u8 t) {
return isMcClellanType(t) || isGoughType(t) || isShengType(t);
return isMcClellanType(t) || isGoughType(t) || isShengType(t)
|| isShengMcClellanType(t);
}
static really_inline int isBigDfaType(u8 t) {
return t == MCCLELLAN_NFA_16 || t == MCSHENG_NFA_16 || t == GOUGH_NFA_16;
}
static really_inline int isSmallDfaType(u8 t) {
return isDfaType(t) && !isBigDfaType(t);
}
/** \brief True if the given type (from NFA::type) is an NFA. */

68
src/nfa/rdfa_graph.cpp Normal file
View File

@ -0,0 +1,68 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "rdfa_graph.h"
#include "rdfa.h"
#include "util/container.h"
#include <vector>
using namespace std;
namespace ue2 {
RdfaGraph::RdfaGraph(const raw_dfa &rdfa) {
RdfaGraph &g = *this;
vector<RdfaGraph::vertex_descriptor> verts;
verts.reserve(rdfa.states.size());
for (dstate_id_t i = 0; i < rdfa.states.size(); i++) {
verts.push_back(add_vertex(g));
assert(g[verts.back()].index == i);
}
symbol_t symbol_end = rdfa.alpha_size - 1;
flat_set<dstate_id_t> local_succs;
for (dstate_id_t i = 0; i < rdfa.states.size(); i++) {
local_succs.clear();
for (symbol_t s = 0; s < symbol_end; s++) {
dstate_id_t next = rdfa.states[i].next[s];
if (contains(local_succs, next)) {
continue;
}
DEBUG_PRINTF("%hu->%hu\n", i, next);
add_edge(verts[i], verts[next], g);
local_succs.insert(next);
}
}
}
}

54
src/nfa/rdfa_graph.h Normal file
View File

@ -0,0 +1,54 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef RDFA_GRAPH_H
#define RDFA_GRAPH_H
#include "ue2common.h"
#include "util/ue2_graph.h"
namespace ue2 {
struct raw_dfa;
struct RdfaVertexProps {
size_t index = 0;
};
struct RdfaEdgeProps {
size_t index = 0;
};
struct RdfaGraph : public ue2_graph<RdfaGraph, RdfaVertexProps, RdfaEdgeProps> {
RdfaGraph(const raw_dfa &rdfa);
};
}
#endif

View File

@ -447,9 +447,8 @@ void createShuffleMasks(sheng *s, dfa_info &info,
}
}
bool has_accel_sheng(const NFA *nfa) {
const sheng *s = (const sheng *)getImplNfa(nfa);
return s->flags & SHENG_FLAG_HAS_ACCEL;
bool has_accel_sheng(const NFA *) {
return true; /* consider the sheng region as accelerated */
}
aligned_unique_ptr<NFA> shengCompile(raw_dfa &raw,

View File

@ -46,7 +46,6 @@
#include <map>
#include <set>
#include <boost/graph/filtered_graph.hpp>
#include <boost/graph/strong_components.hpp>
#include <boost/graph/topological_sort.hpp>
#include <boost/range/adaptor/map.hpp>
@ -54,7 +53,6 @@ using namespace std;
using boost::default_color_type;
using boost::make_filtered_graph;
using boost::make_assoc_property_map;
using boost::adaptors::map_values;
namespace ue2 {
@ -257,38 +255,6 @@ bool hasBigCycles(const NGHolder &g) {
return false;
}
set<NFAVertex> findVerticesInCycles(const NGHolder &g) {
map<NFAVertex, size_t> comp_map;
strong_components(g, make_assoc_property_map(comp_map));
map<size_t, set<NFAVertex> > comps;
for (const auto &e : comp_map) {
comps[e.second].insert(e.first);
}
set<NFAVertex> rv;
for (const auto &comp : comps | map_values) {
/* every vertex in a strongly connected component is reachable from
* every other vertex in the component. A vertex is involved in a cycle
* therefore if it is in a strongly connected component with more than
* one vertex or if it is the only vertex and it has a self loop. */
assert(!comp.empty());
if (comp.size() > 1) {
insert(&rv, comp);
}
NFAVertex v = *comp.begin();
if (hasSelfLoop(v, g)) {
rv.insert(v);
}
}
return rv;
}
bool can_never_match(const NGHolder &g) {
assert(edge(g.accept, g.acceptEod, g).second);
if (in_degree(g.accept, g) == 0 && in_degree(g.acceptEod, g) == 1) {

View File

@ -52,6 +52,7 @@
#include "nfa/goughcompile.h"
#include "nfa/mcclellancompile.h"
#include "nfa/mcclellancompile_util.h"
#include "nfa/mcsheng_compile.h"
#include "nfa/nfa_api_queue.h"
#include "nfa/nfa_build_util.h"
#include "nfa/nfa_internal.h"
@ -615,7 +616,7 @@ aligned_unique_ptr<NFA> pickImpl(aligned_unique_ptr<NFA> dfa_impl,
bool d_accel = has_accel(*dfa_impl);
bool n_accel = has_accel(*nfa_impl);
bool d_big = dfa_impl->type == MCCLELLAN_NFA_16;
bool d_big = isBigDfaType(dfa_impl->type);
bool n_vsmall = nfa_impl->nPositions <= 32;
bool n_br = has_bounded_repeats(*nfa_impl);
DEBUG_PRINTF("da %d na %d db %d nvs %d nbr %d\n", (int)d_accel,
@ -666,10 +667,17 @@ buildRepeatEngine(const CastleProto &proto,
}
static
aligned_unique_ptr<NFA> getDfa(raw_dfa &rdfa, const CompileContext &cc,
aligned_unique_ptr<NFA> getDfa(raw_dfa &rdfa, bool is_transient,
const CompileContext &cc,
const ReportManager &rm) {
// Unleash the Sheng!!
auto dfa = shengCompile(rdfa, cc, rm);
if (!dfa && !is_transient) {
// Sheng wasn't successful, so unleash McClellan!
/* We don't try the hybrid for transient prefixes due to the extra
* bytecode and that they are usually run on small blocks */
dfa = mcshengCompile(rdfa, cc, rm);
}
if (!dfa) {
// Sheng wasn't successful, so unleash McClellan!
dfa = mcclellanCompile(rdfa, cc, rm);
@ -697,7 +705,7 @@ buildSuffix(const ReportManager &rm, const SomSlotManager &ssm,
}
if (suff.dfa()) {
auto d = getDfa(*suff.dfa(), cc, rm);
auto d = getDfa(*suff.dfa(), false, cc, rm);
assert(d);
return d;
}
@ -726,7 +734,7 @@ buildSuffix(const ReportManager &rm, const SomSlotManager &ssm,
auto rdfa = buildMcClellan(holder, &rm, false, triggers.at(0),
cc.grey);
if (rdfa) {
auto d = getDfa(*rdfa, cc, rm);
auto d = getDfa(*rdfa, false, cc, rm);
assert(d);
if (cc.grey.roseMcClellanSuffix != 2) {
n = pickImpl(move(d), move(n));
@ -846,12 +854,12 @@ makeLeftNfa(const RoseBuildImpl &tbi, left_id &left,
}
if (left.dfa()) {
n = getDfa(*left.dfa(), cc, rm);
n = getDfa(*left.dfa(), is_transient, cc, rm);
} else if (left.graph() && cc.grey.roseMcClellanPrefix == 2 && is_prefix &&
!is_transient) {
auto rdfa = buildMcClellan(*left.graph(), nullptr, cc.grey);
if (rdfa) {
n = getDfa(*rdfa, cc, rm);
n = getDfa(*rdfa, is_transient, cc, rm);
assert(n);
}
}
@ -878,7 +886,7 @@ makeLeftNfa(const RoseBuildImpl &tbi, left_id &left,
&& (!n || !has_bounded_repeats_other_than_firsts(*n) || !is_fast(*n))) {
auto rdfa = buildMcClellan(*left.graph(), nullptr, cc.grey);
if (rdfa) {
auto d = getDfa(*rdfa, cc, rm);
auto d = getDfa(*rdfa, is_transient, cc, rm);
assert(d);
n = pickImpl(move(d), move(n));
}
@ -1614,7 +1622,7 @@ public:
aligned_unique_ptr<NFA> operator()(unique_ptr<raw_dfa> &rdfa) const {
// Unleash the mighty DFA!
return getDfa(*rdfa, build.cc, build.rm);
return getDfa(*rdfa, false, build.cc, build.rm);
}
aligned_unique_ptr<NFA> operator()(unique_ptr<raw_som_dfa> &haig) const {
@ -1642,7 +1650,7 @@ public:
!has_bounded_repeats_other_than_firsts(*n)) {
auto rdfa = buildMcClellan(h, &rm, cc.grey);
if (rdfa) {
auto d = getDfa(*rdfa, cc, rm);
auto d = getDfa(*rdfa, false, cc, rm);
if (d) {
n = pickImpl(move(d), move(n));
}

View File

@ -278,7 +278,7 @@ void findCountingMiracleInfo(const left_id &left, const vector<u8> &stopTable,
const NGHolder &g = *left.graph();
auto cyclics = findVerticesInCycles(g);
auto cyclics = find_vertices_in_cycles(g);
if (!proper_out_degree(g.startDs, g)) {
cyclics.erase(g.startDs);

View File

@ -1206,7 +1206,7 @@ u32 roseQuality(const RoseEngine *t) {
}
const NFA *nfa = (const NFA *)((const char *)atable + sizeof(*atable));
if (nfa->type != MCCLELLAN_NFA_8) {
if (!isSmallDfaType(nfa->type)) {
DEBUG_PRINTF("m16 atable engine\n");
return 0;
}

View File

@ -471,4 +471,55 @@ u32 rank_in_mask64(u64a mask, u32 bit) {
return popcount64(mask);
}
#if defined(__BMI2__) || (defined(_WIN32) && defined(__AVX2__))
#define HAVE_PEXT
#endif
static really_inline
u32 pext32(u32 x, u32 mask) {
#if defined(HAVE_PEXT)
// Intel BMI2 can do this operation in one instruction.
return _pext_u32(x, mask);
#else
u32 result = 0, num = 1;
while (mask != 0) {
u32 bit = findAndClearLSB_32(&mask);
if (x & (1U << bit)) {
assert(num != 0); // more than 32 bits!
result |= num;
}
num <<= 1;
}
return result;
#endif
}
static really_inline
u64a pext64(u64a x, u64a mask) {
#if defined(HAVE_PEXT) && defined(ARCH_64_BIT)
// Intel BMI2 can do this operation in one instruction.
return _pext_u64(x, mask);
#else
u32 result = 0, num = 1;
while (mask != 0) {
u32 bit = findAndClearLSB_64(&mask);
if (x & (1ULL << bit)) {
assert(num != 0); // more than 32 bits!
result |= num;
}
num <<= 1;
}
return result;
#endif
}
#if defined(HAVE_PEXT) && defined(ARCH_64_BIT)
static really_inline
u64a pdep64(u64a x, u64a mask) {
return _pdep_u64(x, mask);
}
#endif
#endif // BITUTILS_H

View File

@ -39,8 +39,12 @@
#include "util/ue2_containers.h"
#include <boost/graph/depth_first_search.hpp>
#include <boost/graph/strong_components.hpp>
#include <boost/range/adaptor/map.hpp>
#include <algorithm>
#include <map>
#include <set>
#include <utility>
#include <vector>
@ -140,6 +144,41 @@ void find_unreachable(const Graph &g, const SourceCont &sources, OutCont *out) {
}
}
template <class Graph>
ue2::flat_set<typename Graph::vertex_descriptor>
find_vertices_in_cycles(const Graph &g) {
using vertex_descriptor = typename Graph::vertex_descriptor;
std::map<vertex_descriptor, size_t> comp_map;
boost::strong_components(g, boost::make_assoc_property_map(comp_map));
std::map<size_t, std::vector<vertex_descriptor>> comps;
for (const auto &e : comp_map) {
comps[e.second].push_back(e.first);
}
ue2::flat_set<vertex_descriptor> rv;
for (const auto &comp : comps | boost::adaptors::map_values) {
/* every vertex in a strongly connected component is reachable from
* every other vertex in the component. A vertex is involved in a cycle
* therefore if it is in a strongly connected component with more than
* one vertex or if it is the only vertex and it has a self loop. */
assert(!comp.empty());
if (comp.size() > 1) {
insert(&rv, comp);
}
vertex_descriptor v = *comp.begin();
if (hasSelfLoop(v, g)) {
rv.insert(v);
}
}
return rv;
}
template <class Graph>
bool has_parallel_edge(const Graph &g) {
using vertex_descriptor = typename Graph::vertex_descriptor;

View File

@ -159,6 +159,10 @@ static really_inline m128 set16x8(u8 c) {
return _mm_set1_epi8(c);
}
static really_inline m128 set4x32(u32 c) {
return _mm_set1_epi32(c);
}
static really_inline u32 movd(const m128 in) {
return _mm_cvtsi128_si32(in);
}
@ -328,6 +332,25 @@ m128 variable_byte_shift_m128(m128 in, s32 amount) {
return pshufb(in, shift_mask);
}
static really_inline
m128 max_u8_m128(m128 a, m128 b) {
return _mm_max_epu8(a, b);
}
static really_inline
m128 min_u8_m128(m128 a, m128 b) {
return _mm_min_epu8(a, b);
}
static really_inline
m128 sadd_u8_m128(m128 a, m128 b) {
return _mm_adds_epu8(a, b);
}
static really_inline
m128 sub_u8_m128(m128 a, m128 b) {
return _mm_sub_epi8(a, b);
}
/****
**** 256-bit Primitives

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -436,3 +436,16 @@ TEST(BitUtils, rank_in_mask64) {
ASSERT_EQ(15, rank_in_mask64(0xf0f0f0f0f0f0f0f0ULL, 31));
ASSERT_EQ(31, rank_in_mask64(0xf0f0f0f0f0f0f0f0ULL, 63));
}
#if defined(HAVE_PEXT) && defined(ARCH_64_BIT)
TEST(BitUtils, pdep64) {
u64a data = 0xF123456789ABCDEF;
ASSERT_EQ(0xfULL, pdep64(data, 0xf));
ASSERT_EQ(0xefULL, pdep64(data, 0xff));
ASSERT_EQ(0xf0ULL, pdep64(data, 0xf0));
ASSERT_EQ(0xfULL, pdep64(data, 0xf));
ASSERT_EQ(0xef0ULL, pdep64(data, 0xff0));
ASSERT_EQ(0xef00ULL, pdep64(data, 0xff00));
ASSERT_EQ(0xd0e0f00ULL, pdep64(data, 0xf0f0f00));
}
#endif

View File

@ -320,9 +320,9 @@ TEST(NFAGraph, cyclicVerts1) {
add_edge(a, b, g);
add_edge(b, a, g);
auto cyclics = findVerticesInCycles(g);
auto cyclics = find_vertices_in_cycles(g);
ASSERT_EQ(set<NFAVertex>({g.startDs, a, b}), cyclics);
ASSERT_EQ(flat_set<NFAVertex>({g.startDs, a, b}), cyclics);
}
TEST(NFAGraph, cyclicVerts2) {
@ -341,9 +341,9 @@ TEST(NFAGraph, cyclicVerts2) {
add_edge(c, d, g);
add_edge(a, e, g);
auto cyclics = findVerticesInCycles(g);
auto cyclics = find_vertices_in_cycles(g);
ASSERT_EQ(set<NFAVertex>({g.startDs, a, b, c}), cyclics);
ASSERT_EQ(flat_set<NFAVertex>({g.startDs, a, b, c}), cyclics);
}
TEST(NFAGraph, cyclicVerts3) {
@ -369,9 +369,9 @@ TEST(NFAGraph, cyclicVerts3) {
add_edge(f, h, g);
add_edge(h, h, g);
auto cyclics = findVerticesInCycles(g);
auto cyclics = find_vertices_in_cycles(g);
ASSERT_EQ(set<NFAVertex>({g.startDs, a, b, c, d, e, h}), cyclics);
ASSERT_EQ(flat_set<NFAVertex>({g.startDs, a, b, c, d, e, h}), cyclics);
}
TEST(NFAGraph, cyclicVerts4) {
@ -396,9 +396,9 @@ TEST(NFAGraph, cyclicVerts4) {
add_edge(e, f, g);
add_edge(f, h, g);
auto cyclics = findVerticesInCycles(g);
auto cyclics = find_vertices_in_cycles(g);
ASSERT_EQ(set<NFAVertex>({g.startDs, a, b, c, d, e}), cyclics);
ASSERT_EQ(flat_set<NFAVertex>({g.startDs, a, b, c, d, e}), cyclics);
}
TEST(NFAGraph, cyclicVerts5) {
@ -418,7 +418,7 @@ TEST(NFAGraph, cyclicVerts5) {
add_edge(c, d, g);
add_edge(e, c, g);
auto cyclics = findVerticesInCycles(g);
auto cyclics = find_vertices_in_cycles(g);
ASSERT_EQ(set<NFAVertex>({g.startDs, b, c}), cyclics);
ASSERT_EQ(flat_set<NFAVertex>({g.startDs, b, c}), cyclics);
}

View File

@ -54,14 +54,14 @@ TEST(Shuffle, PackedExtract32_1) {
for (unsigned int i = 0; i < 32; i++) {
// shuffle a single 1 bit to the front
u32 mask = 1U << i;
EXPECT_EQ(1U, packedExtract32(mask, mask));
EXPECT_EQ(1U, packedExtract32(~0U, mask));
EXPECT_EQ(1U, pext32(mask, mask));
EXPECT_EQ(1U, pext32(~0U, mask));
// we should get zero out of these cases
EXPECT_EQ(0U, packedExtract32(0, mask));
EXPECT_EQ(0U, packedExtract32(~mask, mask));
EXPECT_EQ(0U, pext32(0, mask));
EXPECT_EQ(0U, pext32(~mask, mask));
// we should get zero out of all the other bit positions
for (unsigned int j = 0; (j != i && j < 32); j++) {
EXPECT_EQ(0U, packedExtract32((1U << j), mask));
EXPECT_EQ(0U, pext32((1U << j), mask));
}
}
}
@ -69,10 +69,10 @@ TEST(Shuffle, PackedExtract32_1) {
TEST(Shuffle, PackedExtract32_2) {
// All 32 bits in mask are on
u32 mask = ~0U;
EXPECT_EQ(0U, packedExtract32(0, mask));
EXPECT_EQ(mask, packedExtract32(mask, mask));
EXPECT_EQ(0U, pext32(0, mask));
EXPECT_EQ(mask, pext32(mask, mask));
for (unsigned int i = 0; i < 32; i++) {
EXPECT_EQ(1U << i, packedExtract32(1U << i, mask));
EXPECT_EQ(1U << i, pext32(1U << i, mask));
}
}
@ -84,16 +84,16 @@ TEST(Shuffle, PackedExtract32_3) {
}
// Test both cases (all even bits, all odd bits)
EXPECT_EQ((1U << 16) - 1, packedExtract32(mask, mask));
EXPECT_EQ((1U << 16) - 1, packedExtract32(~mask, ~mask));
EXPECT_EQ(0U, packedExtract32(~mask, mask));
EXPECT_EQ(0U, packedExtract32(mask, ~mask));
EXPECT_EQ((1U << 16) - 1, pext32(mask, mask));
EXPECT_EQ((1U << 16) - 1, pext32(~mask, ~mask));
EXPECT_EQ(0U, pext32(~mask, mask));
EXPECT_EQ(0U, pext32(mask, ~mask));
for (unsigned int i = 0; i < 32; i += 2) {
EXPECT_EQ(1U << (i/2), packedExtract32(1U << i, mask));
EXPECT_EQ(0U, packedExtract32(1U << i, ~mask));
EXPECT_EQ(1U << (i/2), packedExtract32(1U << (i+1), ~mask));
EXPECT_EQ(0U, packedExtract32(1U << (i+1), mask));
EXPECT_EQ(1U << (i/2), pext32(1U << i, mask));
EXPECT_EQ(0U, pext32(1U << i, ~mask));
EXPECT_EQ(1U << (i/2), pext32(1U << (i+1), ~mask));
EXPECT_EQ(0U, pext32(1U << (i+1), mask));
}
}
@ -102,14 +102,14 @@ TEST(Shuffle, PackedExtract64_1) {
for (unsigned int i = 0; i < 64; i++) {
// shuffle a single 1 bit to the front
u64a mask = 1ULL << i;
EXPECT_EQ(1U, packedExtract64(mask, mask));
EXPECT_EQ(1U, packedExtract64(~0ULL, mask));
EXPECT_EQ(1U, pext64(mask, mask));
EXPECT_EQ(1U, pext64(~0ULL, mask));
// we should get zero out of these cases
EXPECT_EQ(0U, packedExtract64(0, mask));
EXPECT_EQ(0U, packedExtract64(~mask, mask));
EXPECT_EQ(0U, pext64(0, mask));
EXPECT_EQ(0U, pext64(~mask, mask));
// we should get zero out of all the other bit positions
for (unsigned int j = 0; (j != i && j < 64); j++) {
EXPECT_EQ(0U, packedExtract64((1ULL << j), mask));
EXPECT_EQ(0U, pext64((1ULL << j), mask));
}
}
}
@ -117,26 +117,26 @@ TEST(Shuffle, PackedExtract64_1) {
TEST(Shuffle, PackedExtract64_2) {
// Fill first half of mask
u64a mask = 0x00000000ffffffffULL;
EXPECT_EQ(0U, packedExtract64(0, mask));
EXPECT_EQ(0xffffffffU, packedExtract64(mask, mask));
EXPECT_EQ(0U, pext64(0, mask));
EXPECT_EQ(0xffffffffU, pext64(mask, mask));
for (unsigned int i = 0; i < 32; i++) {
EXPECT_EQ(1U << i, packedExtract64(1ULL << i, mask));
EXPECT_EQ(1U << i, pext64(1ULL << i, mask));
}
// Fill second half of mask
mask = 0xffffffff00000000ULL;
EXPECT_EQ(0U, packedExtract64(0, mask));
EXPECT_EQ(0xffffffffU, packedExtract64(mask, mask));
EXPECT_EQ(0U, pext64(0, mask));
EXPECT_EQ(0xffffffffU, pext64(mask, mask));
for (unsigned int i = 32; i < 64; i++) {
EXPECT_EQ(1U << (i - 32), packedExtract64(1ULL << i, mask));
EXPECT_EQ(1U << (i - 32), pext64(1ULL << i, mask));
}
// Try one in the middle
mask = 0x0000ffffffff0000ULL;
EXPECT_EQ(0U, packedExtract64(0, mask));
EXPECT_EQ(0xffffffffU, packedExtract64(mask, mask));
EXPECT_EQ(0U, pext64(0, mask));
EXPECT_EQ(0xffffffffU, pext64(mask, mask));
for (unsigned int i = 16; i < 48; i++) {
EXPECT_EQ(1U << (i - 16), packedExtract64(1ULL << i, mask));
EXPECT_EQ(1U << (i - 16), pext64(1ULL << i, mask));
}
}
@ -148,16 +148,16 @@ TEST(Shuffle, PackedExtract64_3) {
}
// Test both cases (all even bits, all odd bits)
EXPECT_EQ(0xffffffffU, packedExtract64(mask, mask));
EXPECT_EQ(0xffffffffU, packedExtract64(~mask, ~mask));
EXPECT_EQ(0U, packedExtract64(~mask, mask));
EXPECT_EQ(0U, packedExtract64(mask, ~mask));
EXPECT_EQ(0xffffffffU, pext64(mask, mask));
EXPECT_EQ(0xffffffffU, pext64(~mask, ~mask));
EXPECT_EQ(0U, pext64(~mask, mask));
EXPECT_EQ(0U, pext64(mask, ~mask));
for (unsigned int i = 0; i < 64; i += 2) {
EXPECT_EQ(1U << (i/2), packedExtract64(1ULL << i, mask));
EXPECT_EQ(0U, packedExtract64(1ULL << i, ~mask));
EXPECT_EQ(1U << (i/2), packedExtract64(1ULL << (i+1), ~mask));
EXPECT_EQ(0U, packedExtract64(1ULL << (i+1), mask));
EXPECT_EQ(1U << (i/2), pext64(1ULL << i, mask));
EXPECT_EQ(0U, pext64(1ULL << i, ~mask));
EXPECT_EQ(1U << (i/2), pext64(1ULL << (i+1), ~mask));
EXPECT_EQ(0U, pext64(1ULL << (i+1), mask));
}
}

View File

@ -614,6 +614,12 @@ TEST(SimdUtilsTest, set16x8) {
}
}
TEST(SimdUtilsTest, set4x32) {
u32 cmp[4] = { 0x12345678, 0x12345678, 0x12345678, 0x12345678 };
m128 simd = set4x32(cmp[0]);
ASSERT_EQ(0, memcmp(cmp, &simd, sizeof(simd)));
}
#if defined(__AVX2__)
TEST(SimdUtilsTest, set32x8) {
char cmp[sizeof(m256)];
@ -693,4 +699,50 @@ TEST(SimdUtilsTest, variableByteShift128) {
EXPECT_TRUE(!diff128(zeroes128(), variable_byte_shift_m128(in, -16)));
}
TEST(SimdUtilsTest, max_u8_m128) {
char base1[] = "0123456789ABCDE\xfe";
char base2[] = "!!23455889aBCd\xff\xff";
char expec[] = "0123456889aBCd\xff\xff";
m128 in1 = loadu128(base1);
m128 in2 = loadu128(base2);
m128 result = max_u8_m128(in1, in2);
EXPECT_TRUE(!diff128(result, loadu128(expec)));
}
TEST(SimdUtilsTest, min_u8_m128) {
char base1[] = "0123456789ABCDE\xfe";
char base2[] = "!!23455889aBCd\xff\xff";
char expec[] = "!!23455789ABCDE\xfe";
m128 in1 = loadu128(base1);
m128 in2 = loadu128(base2);
m128 result = min_u8_m128(in1, in2);
EXPECT_TRUE(!diff128(result, loadu128(expec)));
}
TEST(SimdUtilsTest, sadd_u8_m128) {
unsigned char base1[] = {0, 0x80, 0xff, 'A', '1', '2', '3', '4',
'1', '2', '3', '4', '1', '2', '3', '4'};
unsigned char base2[] = {'a', 0x80, 'b', 'A', 0x10, 0x10, 0x10, 0x10,
0x30, 0x30, 0x30, 0x30, 0, 0, 0, 0};
unsigned char expec[] = {'a', 0xff, 0xff, 0x82, 'A', 'B', 'C', 'D',
'a', 'b', 'c', 'd', '1', '2', '3', '4'};
m128 in1 = loadu128(base1);
m128 in2 = loadu128(base2);
m128 result = sadd_u8_m128(in1, in2);
EXPECT_TRUE(!diff128(result, loadu128(expec)));
}
TEST(SimdUtilsTest, sub_u8_m128) {
unsigned char base1[] = {'a', 0xff, 0xff, 0x82, 'A', 'B', 'C', 'D',
'a', 'b', 'c', 'd', '1', '2', '3', '4'};
unsigned char base2[] = {0, 0x80, 0xff, 'A', '1', '2', '3', '4',
'1', '2', '3', '4', '1', '2', '3', '4'};
unsigned char expec[] = {'a', 0x7f, 0, 'A', 0x10, 0x10, 0x10, 0x10,
0x30, 0x30, 0x30, 0x30, 0, 0, 0, 0};
m128 in1 = loadu128(base1);
m128 in2 = loadu128(base2);
m128 result = sub_u8_m128(in1, in2);
EXPECT_TRUE(!diff128(result, loadu128(expec)));
}
} // namespace