diff --git a/CMakeLists.txt b/CMakeLists.txt index 8f7e9bf0..bdb60b74 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -470,6 +470,7 @@ set (hs_exec_SRCS src/nfa/limex_exceptional.h src/nfa/limex_native.c src/nfa/limex_ring.h + src/nfa/limex_64.c src/nfa/limex_simd128.c src/nfa/limex_simd256.c src/nfa/limex_simd384.c diff --git a/src/nfa/limex.h b/src/nfa/limex.h index ad53503c..70bcdd1c 100644 --- a/src/nfa/limex.h +++ b/src/nfa/limex.h @@ -77,6 +77,7 @@ extern "C" GENERATE_NFA_DUMP_DECL(gf_name) GENERATE_NFA_DECL(nfaExecLimEx32) +GENERATE_NFA_DECL(nfaExecLimEx64) GENERATE_NFA_DECL(nfaExecLimEx128) GENERATE_NFA_DECL(nfaExecLimEx256) GENERATE_NFA_DECL(nfaExecLimEx384) diff --git a/src/nfa/limex_64.c b/src/nfa/limex_64.c new file mode 100644 index 00000000..e8f0880b --- /dev/null +++ b/src/nfa/limex_64.c @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2015-2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief LimEx NFA: 128-bit SIMD runtime implementations. + */ + +/* Limex64 is unusual on as on 32 bit platforms, at runtime it uses an m128 for + * state calculations. + */ + +//#define DEBUG_INPUT +//#define DEBUG_EXCEPTIONS + +#include "limex.h" + +#include "accel.h" +#include "limex_internal.h" +#include "nfa_internal.h" +#include "ue2common.h" +#include "util/bitutils.h" +#include "util/simd_utils.h" + +// Common code +#define STATE_ON_STACK +#define ESTATE_ON_STACK + +#include "limex_runtime.h" + +#define SIZE 64 +#define ENG_STATE_T u64a + +#ifdef ARCH_64_BIT +#define STATE_T u64a +#define LOAD_FROM_ENG load_u64a +#else +#define STATE_T m128 +#define LOAD_FROM_ENG load_m128_from_u64a +#endif + +#include "limex_exceptional.h" + +#include "limex_state_impl.h" + +#define INLINE_ATTR really_inline +#include "limex_common_impl.h" + +#include "limex_runtime_impl.h" diff --git a/src/nfa/limex_accel.c b/src/nfa/limex_accel.c index 28f37083..f883973e 100644 --- a/src/nfa/limex_accel.c +++ b/src/nfa/limex_accel.c @@ -82,6 +82,22 @@ size_t doAccel32(u32 s, u32 accel, const u8 *accelTable, return accelScanWrapper(accelTable, aux, input, idx, i, end); } +#ifdef ARCH_64_BIT +size_t doAccel64(u64a s, u64a accel, const u8 *accelTable, + const union AccelAux *aux, const u8 *input, size_t i, + size_t end) { + u32 idx = packedExtract64(s, accel); + return accelScanWrapper(accelTable, aux, input, idx, i, end); +} +#else +size_t doAccel64(m128 s, m128 accel, const u8 *accelTable, + const union AccelAux *aux, const u8 *input, size_t i, + size_t end) { + u32 idx = packedExtract64(movq(s), movq(accel)); + return accelScanWrapper(accelTable, aux, input, idx, i, end); +} +#endif + size_t doAccel128(const m128 *state, const struct LimExNFA128 *limex, const u8 *accelTable, const union AccelAux *aux, const u8 *input, size_t i, size_t end) { diff --git a/src/nfa/limex_accel.h b/src/nfa/limex_accel.h index 173df759..e5c94e82 100644 --- a/src/nfa/limex_accel.h +++ b/src/nfa/limex_accel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -40,6 +40,7 @@ #include "util/simd_utils.h" // for m128 etc union AccelAux; +struct LimExNFA64; struct LimExNFA128; struct LimExNFA256; struct LimExNFA384; @@ -49,6 +50,16 @@ size_t doAccel32(u32 s, u32 accel, const u8 *accelTable, const union AccelAux *aux, const u8 *input, size_t i, size_t end); +#ifdef ARCH_64_BIT +size_t doAccel64(u64a s, u64a accel, const u8 *accelTable, + const union AccelAux *aux, const u8 *input, size_t i, + size_t end); +#else +size_t doAccel64(m128 s, m128 accel, const u8 *accelTable, + const union AccelAux *aux, const u8 *input, size_t i, + size_t end); +#endif + size_t doAccel128(const m128 *s, const struct LimExNFA128 *limex, const u8 *accelTable, const union AccelAux *aux, const u8 *input, size_t i, size_t end); diff --git a/src/nfa/limex_common_impl.h b/src/nfa/limex_common_impl.h index 9523b073..187a661b 100644 --- a/src/nfa/limex_common_impl.h +++ b/src/nfa/limex_common_impl.h @@ -31,8 +31,9 @@ /* impl of limex functions which depend only on state size */ -#if !defined(SIZE) || !defined(STATE_T) || !defined(INLINE_ATTR) -# error Must define SIZE and STATE_T and INLINE_ATTR in includer. +#if !defined(SIZE) || !defined(STATE_T) || !defined(LOAD_FROM_ENG) \ + || !defined(INLINE_ATTR) +# error Must define SIZE, STATE_T, LOAD_FROM_ENG and INLINE_ATTR in includer. #endif #define IMPL_NFA_T JOIN(struct LimExNFA, SIZE) @@ -50,8 +51,6 @@ #define PROCESS_ACCEPTS_NOSQUASH_FN JOIN(moProcessAcceptsNoSquash, SIZE) #define CONTEXT_T JOIN(NFAContext, SIZE) #define ONES_STATE JOIN(ones_, STATE_T) -#define LOAD_STATE JOIN(load_, STATE_T) -#define STORE_STATE JOIN(store_, STATE_T) #define AND_STATE JOIN(and_, STATE_T) #define OR_STATE JOIN(or_, STATE_T) #define ANDNOT_STATE JOIN(andnot_, STATE_T) @@ -83,7 +82,7 @@ void SQUASH_UNTUG_BR_FN(const IMPL_NFA_T *limex, const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i); u32 cyclicState = info->cyclicState; - if (!TESTBIT_STATE(accstate, cyclicState)) { + if (!TESTBIT_STATE(*accstate, cyclicState)) { continue; } @@ -111,12 +110,12 @@ char PROCESS_ACCEPTS_FN(const IMPL_NFA_T *limex, STATE_T *s, // We have squash masks we might have to apply after firing reports. STATE_T squash = ONES_STATE; - const STATE_T *squashMasks = (const STATE_T *) + const ENG_STATE_T *squashMasks = (const ENG_STATE_T *) ((const char *)limex + limex->squashOffset); for (u32 i = 0; i < acceptCount; i++) { const struct NFAAccept *a = &acceptTable[i]; - if (TESTBIT_STATE(s, a->state)) { + if (TESTBIT_STATE(*s, a->state)) { DEBUG_PRINTF("state %u is on, firing report id=%u, offset=%llu\n", a->state, a->externalId, offset); int rv = callback(0, offset, a->externalId, context); @@ -125,14 +124,14 @@ char PROCESS_ACCEPTS_FN(const IMPL_NFA_T *limex, STATE_T *s, } if (a->squash != MO_INVALID_IDX) { assert(a->squash < limex->squashCount); - const STATE_T *sq = &squashMasks[a->squash]; + const ENG_STATE_T *sq = &squashMasks[a->squash]; DEBUG_PRINTF("squash mask %u @ %p\n", a->squash, sq); - squash = AND_STATE(squash, LOAD_STATE(sq)); + squash = AND_STATE(squash, LOAD_FROM_ENG(sq)); } } } - STORE_STATE(s, AND_STATE(LOAD_STATE(s), squash)); + *s = AND_STATE(*s, squash); return 0; } @@ -147,7 +146,7 @@ char PROCESS_ACCEPTS_NOSQUASH_FN(const STATE_T *s, for (u32 i = 0; i < acceptCount; i++) { const struct NFAAccept *a = &acceptTable[i]; - if (TESTBIT_STATE(s, a->state)) { + if (TESTBIT_STATE(*s, a->state)) { DEBUG_PRINTF("state %u is on, firing report id=%u, offset=%llu\n", a->state, a->externalId, offset); int rv = callback(0, offset, a->externalId, context); @@ -172,8 +171,8 @@ char TESTEOD_FN(const IMPL_NFA_T *limex, const STATE_T *s, return MO_CONTINUE_MATCHING; } - const STATE_T acceptEodMask = LOAD_STATE(&limex->acceptAtEOD); - STATE_T foundAccepts = AND_STATE(LOAD_STATE(s), acceptEodMask); + const STATE_T acceptEodMask = LOAD_FROM_ENG(&limex->acceptAtEOD); + STATE_T foundAccepts = AND_STATE(*s, acceptEodMask); if (do_br) { SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state, @@ -204,8 +203,8 @@ char TESTEOD_REV_FN(const IMPL_NFA_T *limex, const STATE_T *s, u64a offset, return MO_CONTINUE_MATCHING; } - STATE_T acceptEodMask = LOAD_STATE(&limex->acceptAtEOD); - STATE_T foundAccepts = AND_STATE(LOAD_STATE(s), acceptEodMask); + STATE_T acceptEodMask = LOAD_FROM_ENG(&limex->acceptAtEOD); + STATE_T foundAccepts = AND_STATE(*s, acceptEodMask); assert(!limex->repeatCount); @@ -228,8 +227,8 @@ char REPORTCURRENT_FN(const IMPL_NFA_T *limex, const struct mq *q) { assert(q->state); assert(q_cur_type(q) == MQE_START); - STATE_T s = LOAD_STATE(q->state); - STATE_T acceptMask = LOAD_STATE(&limex->accept); + STATE_T s = *(STATE_T *)q->state; + STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept); STATE_T foundAccepts = AND_STATE(s, acceptMask); if (unlikely(ISNONZERO_STATE(foundAccepts))) { @@ -250,7 +249,7 @@ char REPORTCURRENT_FN(const IMPL_NFA_T *limex, const struct mq *q) { static really_inline STATE_T INITIAL_FN(const IMPL_NFA_T *impl, char onlyDs) { - return LOAD_STATE(onlyDs ? &impl->initDS : &impl->init); + return LOAD_FROM_ENG(onlyDs ? &impl->initDS : &impl->init); } static really_inline @@ -261,9 +260,9 @@ STATE_T TOP_FN(const IMPL_NFA_T *impl, char onlyDs, STATE_T state) { static really_inline STATE_T TOPN_FN(const IMPL_NFA_T *limex, STATE_T state, u32 n) { assert(n < limex->topCount); - const STATE_T *topsptr = - (const STATE_T *)((const char *)limex + limex->topOffset); - STATE_T top = LOAD_STATE(&topsptr[n]); + const ENG_STATE_T *topsptr = + (const ENG_STATE_T *)((const char *)limex + limex->topOffset); + STATE_T top = LOAD_FROM_ENG(&topsptr[n]); return OR_STATE(top, state); } @@ -279,8 +278,8 @@ void EXPIRE_ESTATE_FN(const IMPL_NFA_T *limex, struct CONTEXT_T *ctx, DEBUG_PRINTF("expire estate at offset %llu\n", offset); - const STATE_T cyclics = - AND_STATE(LOAD_STATE(&ctx->s), LOAD_STATE(&limex->repeatCyclicMask)); + const STATE_T cyclics + = AND_STATE(ctx->s, LOAD_FROM_ENG(&limex->repeatCyclicMask)); if (ISZERO_STATE(cyclics)) { DEBUG_PRINTF("no cyclic states are on\n"); return; @@ -290,7 +289,7 @@ void EXPIRE_ESTATE_FN(const IMPL_NFA_T *limex, struct CONTEXT_T *ctx, const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i); u32 cyclicState = info->cyclicState; - if (!TESTBIT_STATE(&cyclics, cyclicState)) { + if (!TESTBIT_STATE(cyclics, cyclicState)) { continue; } @@ -310,14 +309,14 @@ void EXPIRE_ESTATE_FN(const IMPL_NFA_T *limex, struct CONTEXT_T *ctx, last_top, repeat->repeatMax); u64a adj = 0; /* if the cycle's tugs are active at repeat max, it is still alive */ - if (TESTBIT_STATE((const STATE_T *)&limex->accept, cyclicState) || - TESTBIT_STATE((const STATE_T *)&limex->acceptAtEOD, cyclicState)) { + if (TESTBIT_STATE(LOAD_FROM_ENG(&limex->accept), cyclicState) || + TESTBIT_STATE(LOAD_FROM_ENG(&limex->acceptAtEOD), cyclicState)) { DEBUG_PRINTF("lazy tug possible - may still be inspected\n"); adj = 1; } else { - const STATE_T *tug_mask = - (const STATE_T *)((const char *)info + info->tugMaskOffset); - if (ISNONZERO_STATE(AND_STATE(ctx->s, LOAD_STATE(tug_mask)))) { + const ENG_STATE_T *tug_mask = + (const ENG_STATE_T *)((const char *)info + info->tugMaskOffset); + if (ISNONZERO_STATE(AND_STATE(ctx->s, LOAD_FROM_ENG(tug_mask)))) { DEBUG_PRINTF("tug possible - may still be inspected\n"); adj = 1; } @@ -339,7 +338,7 @@ char LIMEX_INACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state, u64a offset, ReportID report) { assert(limex); - const STATE_T acceptMask = LOAD_STATE(&limex->accept); + const STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept); STATE_T accstate = AND_STATE(state, acceptMask); // Are we in an accept state? @@ -355,7 +354,7 @@ char LIMEX_INACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state, #ifdef DEBUG DEBUG_PRINTF("accept states that are on: "); for (u32 i = 0; i < sizeof(STATE_T) * 8; i++) { - if (TESTBIT_STATE(&accstate, i)) printf("%u ", i); + if (TESTBIT_STATE(accstate, i)) printf("%u ", i); } printf("\n"); #endif @@ -366,7 +365,7 @@ char LIMEX_INACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state, const struct NFAAccept *a = &acceptTable[i]; DEBUG_PRINTF("checking idx=%u, externalId=%u\n", a->state, a->externalId); - if (a->externalId == report && TESTBIT_STATE(&accstate, a->state)) { + if (a->externalId == report && TESTBIT_STATE(accstate, a->state)) { DEBUG_PRINTF("report is on!\n"); return 1; } @@ -381,7 +380,7 @@ char LIMEX_INANYACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state, u64a offset) { assert(limex); - const STATE_T acceptMask = LOAD_STATE(&limex->accept); + const STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept); STATE_T accstate = AND_STATE(state, acceptMask); // Are we in an accept state? @@ -407,8 +406,6 @@ char LIMEX_INANYACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state, #undef CONTEXT_T #undef IMPL_NFA_T #undef ONES_STATE -#undef LOAD_STATE -#undef STORE_STATE #undef AND_STATE #undef OR_STATE #undef ANDNOT_STATE @@ -420,7 +417,3 @@ char LIMEX_INANYACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state, #undef PROCESS_ACCEPTS_NOSQUASH_FN #undef SQUASH_UNTUG_BR_FN #undef GET_NFA_REPEAT_INFO_FN - -#undef SIZE -#undef STATE_T -#undef INLINE_ATTR diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index 77754e0b..8d7343e5 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -1438,7 +1438,8 @@ struct Factory { sizeof(limex->init), stateSize, repeatscratchStateSize, repeatStreamState); - size_t scratchStateSize = sizeof(limex->init); + size_t scratchStateSize = NFATraits::scratch_state_size; + if (repeatscratchStateSize) { scratchStateSize = ROUNDUP_N(scratchStateSize, alignof(RepeatControl)); @@ -2021,13 +2022,6 @@ struct Factory { sz = 32; } - // Special case: with SIMD available, we definitely prefer using - // 128-bit NFAs over 64-bit ones given the paucity of registers - // available. - if (sz == 64) { - sz = 128; - } - if (args.cc.grey.nfaForceSize) { sz = args.cc.grey.nfaForceSize; } @@ -2067,9 +2061,12 @@ struct scoreNfa { typedef u_##mlt_size tableRow_t; \ typedef NFAException##mlt_size exception_t; \ static const size_t maxStates = mlt_size; \ + static const size_t scratch_state_size = mlt_size == 64 ? sizeof(m128) \ + : sizeof(tableRow_t); \ }; MAKE_LIMEX_TRAITS(32) +MAKE_LIMEX_TRAITS(64) MAKE_LIMEX_TRAITS(128) MAKE_LIMEX_TRAITS(256) MAKE_LIMEX_TRAITS(384) diff --git a/src/nfa/limex_context.h b/src/nfa/limex_context.h index 74f22c32..60d20879 100644 --- a/src/nfa/limex_context.h +++ b/src/nfa/limex_context.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -39,6 +39,16 @@ // Runtime context structures. +/* Note: The size of the context structures may vary from platform to platform + * (notably, for the Limex64 structure). As a result, information based on the + * size and other detail of these structures should not be written into the + * bytecode -- really, the details of the structure should not be accessed by + * the ue2 compile side at all. + */ +#ifdef __cplusplus +#error ue2 runtime only file +#endif + /* cached_estate/esucc etc... * * If the exception state matches the cached_estate we will apply @@ -66,6 +76,11 @@ struct ALIGN_CL_DIRECTIVE NFAContext##nsize { \ }; GEN_CONTEXT_STRUCT(32, u32) +#ifdef ARCH_64_BIT +GEN_CONTEXT_STRUCT(64, u64a) +#else +GEN_CONTEXT_STRUCT(64, m128) +#endif GEN_CONTEXT_STRUCT(128, m128) GEN_CONTEXT_STRUCT(256, m256) GEN_CONTEXT_STRUCT(384, m384) diff --git a/src/nfa/limex_dump.cpp b/src/nfa/limex_dump.cpp index 8b6b7015..181951dc 100644 --- a/src/nfa/limex_dump.cpp +++ b/src/nfa/limex_dump.cpp @@ -70,6 +70,10 @@ template<> struct limex_traits { static const u32 size = 128; typedef NFAException128 exception_type; }; +template<> struct limex_traits { + static const u32 size = 64; + typedef NFAException64 exception_type; +}; template<> struct limex_traits { static const u32 size = 32; typedef NFAException32 exception_type; @@ -486,6 +490,7 @@ void dumpLimDotInfo(const limex_type *limex, u32 state, FILE *f) { DUMP_DOT_FN(size) LIMEX_DUMP_FNS(32) +LIMEX_DUMP_FNS(64) LIMEX_DUMP_FNS(128) LIMEX_DUMP_FNS(256) LIMEX_DUMP_FNS(384) diff --git a/src/nfa/limex_exceptional.h b/src/nfa/limex_exceptional.h index 175ca393..c8296f91 100644 --- a/src/nfa/limex_exceptional.h +++ b/src/nfa/limex_exceptional.h @@ -32,8 +32,8 @@ * X-macro generic impl, included into the various LimEx model implementations. */ -#if !defined(SIZE) || !defined(STATE_T) -# error Must define SIZE and STATE_T in includer. +#if !defined(SIZE) || !defined(STATE_T) || !defined(LOAD_FROM_ENG) +# error Must define SIZE, STATE_T, LOAD_FROM_ENG in includer. #endif #include "config.h" @@ -44,8 +44,6 @@ #define PE_FN JOIN(processExceptional, SIZE) #define RUN_EXCEPTION_FN JOIN(runException, SIZE) #define ZERO_STATE JOIN(zero_, STATE_T) -#define LOAD_STATE JOIN(load_, STATE_T) -#define STORE_STATE JOIN(store_, STATE_T) #define AND_STATE JOIN(and_, STATE_T) #define EQ_STATE(a, b) (!JOIN(noteq_, STATE_T)((a), (b))) #define OR_STATE JOIN(or_, STATE_T) @@ -59,7 +57,7 @@ #define ESTATE_ARG STATE_T estate #else #define ESTATE_ARG const STATE_T *estatep -#define estate LOAD_STATE(estatep) +#define estate (*estatep) #endif #ifdef STATE_ON_STACK @@ -133,7 +131,7 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG, char *repeat_state = ctx->repeat_state + info->stateOffset; if (e->trigger == LIMEX_TRIGGER_POS) { - char cyclic_on = TESTBIT_STATE(STATE_ARG_P, info->cyclicState); + char cyclic_on = TESTBIT_STATE(*STATE_ARG_P, info->cyclicState); processPosTrigger(repeat, repeat_ctrl, repeat_state, offset, cyclic_on); *cacheable = DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES; @@ -149,8 +147,7 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG, *cacheable = DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES; DEBUG_PRINTF("stale history, squashing cyclic state\n"); assert(e->hasSquash == LIMEX_SQUASH_TUG); - STORE_STATE(succ, AND_STATE(LOAD_STATE(succ), - LOAD_STATE(&e->squash))); + *succ = AND_STATE(*succ, LOAD_FROM_ENG(&e->squash)); return 1; // continue } else if (rv == TRIGGER_SUCCESS_CACHE) { new_cache->br = 1; @@ -188,18 +185,16 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG, // Most exceptions have a set of successors to switch on. `local_succ' is // ORed into `succ' at the end of the caller's loop. #ifndef BIG_MODEL - *local_succ = OR_STATE(*local_succ, LOAD_STATE(&e->successors)); + *local_succ = OR_STATE(*local_succ, LOAD_FROM_ENG(&e->successors)); #else - STORE_STATE(&ctx->local_succ, OR_STATE(LOAD_STATE(&ctx->local_succ), - LOAD_STATE(&e->successors))); + ctx->local_succ = OR_STATE(ctx->local_succ, LOAD_FROM_ENG(&e->successors)); #endif // Some exceptions squash states behind them. Note that we squash states in // 'succ', not local_succ. - if (e->hasSquash == LIMEX_SQUASH_CYCLIC || - e->hasSquash == LIMEX_SQUASH_REPORT) { - STORE_STATE(succ, AND_STATE(LOAD_STATE(succ), - LOAD_STATE(&e->squash))); + if (e->hasSquash == LIMEX_SQUASH_CYCLIC + || e->hasSquash == LIMEX_SQUASH_REPORT) { + *succ = AND_STATE(*succ, LOAD_FROM_ENG(&e->squash)); if (*cacheable == CACHE_RESULT) { *cacheable = DO_NOT_CACHE_RESULT; } @@ -219,9 +214,9 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ, char in_rev, char flags) { assert(diffmask > 0); // guaranteed by caller macro - if (EQ_STATE(estate, LOAD_STATE(&ctx->cached_estate))) { + if (EQ_STATE(estate, ctx->cached_estate)) { DEBUG_PRINTF("using cached succ from previous state\n"); - STORE_STATE(succ, OR_STATE(LOAD_STATE(succ), LOAD_STATE(&ctx->cached_esucc))); + *succ = OR_STATE(*succ, ctx->cached_esucc); if (ctx->cached_reports && (flags & CALLBACK_OUTPUT)) { DEBUG_PRINTF("firing cached reports from previous state\n"); if (unlikely(limexRunReports(ctx->cached_reports, ctx->callback, @@ -236,7 +231,7 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ, #ifndef BIG_MODEL STATE_T local_succ = ZERO_STATE; #else - STORE_STATE(&ctx->local_succ, ZERO_STATE); + ctx->local_succ = ZERO_STATE; #endif // A copy of the estate as an array of GPR-sized chunks. @@ -254,7 +249,7 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ, u32 base_index[sizeof(STATE_T) / sizeof(CHUNK_T)]; base_index[0] = 0; - for (u32 i = 0; i < ARRAY_LENGTH(base_index) - 1; i++) { + for (s32 i = 0; i < (s32)ARRAY_LENGTH(base_index) - 1; i++) { base_index[i + 1] = base_index[i] + POPCOUNT_FN(emask_chunks[i]); } @@ -284,23 +279,23 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ, } while (diffmask); #ifndef BIG_MODEL - STORE_STATE(succ, OR_STATE(LOAD_STATE(succ), local_succ)); + *succ = OR_STATE(*succ, local_succ); #else - STORE_STATE(succ, OR_STATE(LOAD_STATE(succ), ctx->local_succ)); + *succ = OR_STATE(*succ, ctx->local_succ); #endif if (cacheable == CACHE_RESULT) { - STORE_STATE(&ctx->cached_estate, estate); + ctx->cached_estate = estate; #ifndef BIG_MODEL ctx->cached_esucc = local_succ; #else - STORE_STATE(&ctx->cached_esucc, LOAD_STATE(&ctx->local_succ)); + ctx->cached_esucc = ctx->local_succ; #endif ctx->cached_reports = new_cache.reports; ctx->cached_br = new_cache.br; } else if (cacheable == DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES) { if (ctx->cached_br) { - STORE_STATE(&ctx->cached_estate, ZERO_STATE); + ctx->cached_estate = ZERO_STATE; } } @@ -314,8 +309,6 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ, #undef EQ_STATE #undef OR_STATE #undef TESTBIT_STATE -#undef LOAD_STATE -#undef STORE_STATE #undef PE_FN #undef RUN_EXCEPTION_FN #undef CONTEXT_T @@ -337,7 +330,3 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ, #undef FIND_AND_CLEAR_FN #undef IMPL_NFA_T #undef GET_NFA_REPEAT_INFO_FN - -// Parameters. -#undef SIZE -#undef STATE_T diff --git a/src/nfa/limex_internal.h b/src/nfa/limex_internal.h index c37f5f40..03ebb384 100644 --- a/src/nfa/limex_internal.h +++ b/src/nfa/limex_internal.h @@ -160,6 +160,7 @@ struct LimExNFA##size { \ }; CREATE_NFA_LIMEX(32) +CREATE_NFA_LIMEX(64) CREATE_NFA_LIMEX(128) CREATE_NFA_LIMEX(256) CREATE_NFA_LIMEX(384) diff --git a/src/nfa/limex_native.c b/src/nfa/limex_native.c index 8a0a8acd..c9949836 100644 --- a/src/nfa/limex_native.c +++ b/src/nfa/limex_native.c @@ -49,12 +49,13 @@ #include "limex_runtime.h" // Other implementation code from X-Macro impl. -#define SIZE 32 -#define STATE_T u32 +#define SIZE 32 +#define STATE_T u32 +#define ENG_STATE_T u32 +#define LOAD_FROM_ENG load_u32 + #include "limex_state_impl.h" -#define SIZE 32 -#define STATE_T u32 #define INLINE_ATTR really_inline #include "limex_common_impl.h" @@ -64,8 +65,6 @@ // Process exceptional states -#define SIZE 32 -#define STATE_T u32 #define STATE_ON_STACK #define ESTATE_ON_STACK #define RUN_EXCEPTION_FN_ONLY @@ -128,7 +127,4 @@ int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ, } // 32-bit models. - -#define SIZE 32 -#define STATE_T u32 #include "limex_runtime_impl.h" diff --git a/src/nfa/limex_runtime.h b/src/nfa/limex_runtime.h index e0c182fc..75094ef6 100644 --- a/src/nfa/limex_runtime.h +++ b/src/nfa/limex_runtime.h @@ -30,8 +30,8 @@ \brief Limex Execution Engine Or: How I Learned To Stop Worrying And Love The Preprocessor - This file includes utility functions which do not depend on the state size or - shift masks directly. + This file includes utility functions which do not depend on the size of the + state or shift masks directly. */ #ifndef LIMEX_RUNTIME_H @@ -72,41 +72,6 @@ struct proto_cache { const ReportID *reports; }; -// Shift macros for Limited NFAs. Defined in terms of uniform ops. -// LimExNFAxxx ptr in 'limex' and the current state in 's' -#define NFA_EXEC_LIM_SHIFT(nels_type, nels_i) \ - (JOIN(lshift_, nels_type)( \ - JOIN(and_, nels_type)(s, \ - JOIN(load_, nels_type)(&limex->shift[nels_i])), \ - limex->shiftAmount[nels_i])) - -// Calculate the (limited model) successors for a number of variable shifts. -// Assumes current state in 's' and successors in 'succ'. - -#define NFA_EXEC_GET_LIM_SUCC(gls_type) \ - do { \ - succ = NFA_EXEC_LIM_SHIFT(gls_type, 0); \ - switch (limex->shiftCount) { \ - case 8: \ - succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 7)); \ - case 7: \ - succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 6)); \ - case 6: \ - succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 5)); \ - case 5: \ - succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 4)); \ - case 4: \ - succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 3)); \ - case 3: \ - succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 2)); \ - case 2: \ - succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 1)); \ - case 1: \ - case 0: \ - ; \ - } \ - } while (0) - #define PE_RV_HALT 1 #ifdef STATE_ON_STACK @@ -170,6 +135,7 @@ int limexRunReports(const ReportID *reports, NfaCallback callback, } MAKE_GET_NFA_REPEAT_INFO(32) +MAKE_GET_NFA_REPEAT_INFO(64) MAKE_GET_NFA_REPEAT_INFO(128) MAKE_GET_NFA_REPEAT_INFO(256) MAKE_GET_NFA_REPEAT_INFO(384) diff --git a/src/nfa/limex_runtime_impl.h b/src/nfa/limex_runtime_impl.h index 881e41fd..644ddd6a 100644 --- a/src/nfa/limex_runtime_impl.h +++ b/src/nfa/limex_runtime_impl.h @@ -29,7 +29,6 @@ #include "util/join.h" #include - /** \file * \brief Limex Execution Engine Or: * How I Learned To Stop Worrying And Love The Preprocessor @@ -37,8 +36,9 @@ * Version 2.0: now with X-Macros, so you get line numbers in your debugger. */ -#if !defined(SIZE) || !defined(STATE_T) -# error Must define SIZE and STATE_T in includer. + +#if !defined(SIZE) || !defined(STATE_T) || !defined(LOAD_FROM_ENG) +# error Must define SIZE, STATE_T, LOAD_FROM_ENG in includer. #endif #define LIMEX_API_ROOT JOIN(nfaExecLimEx, SIZE) @@ -67,11 +67,10 @@ #define STREAMSILENT_FN JOIN(LIMEX_API_ROOT, _Stream_Silent) #define CONTEXT_T JOIN(NFAContext, SIZE) #define EXCEPTION_T JOIN(struct NFAException, SIZE) -#define LOAD_STATE JOIN(load_, STATE_T) -#define STORE_STATE JOIN(store_, STATE_T) #define AND_STATE JOIN(and_, STATE_T) #define ANDNOT_STATE JOIN(andnot_, STATE_T) #define OR_STATE JOIN(or_, STATE_T) +#define LSHIFT_STATE JOIN(lshift_, STATE_T) #define TESTBIT_STATE JOIN(testbit_, STATE_T) #define CLEARBIT_STATE JOIN(clearbit_, STATE_T) #define ZERO_STATE JOIN(zero_, STATE_T) @@ -96,9 +95,9 @@ #define ACCEL_AND_FRIENDS_MASK accel_and_friendsMask #define EXCEPTION_MASK exceptionMask #else -#define ACCEL_MASK LOAD_STATE(&limex->accel) -#define ACCEL_AND_FRIENDS_MASK LOAD_STATE(&limex->accel_and_friends) -#define EXCEPTION_MASK LOAD_STATE(&limex->exceptionMask) +#define ACCEL_MASK LOAD_FROM_ENG(&limex->accel) +#define ACCEL_AND_FRIENDS_MASK LOAD_FROM_ENG(&limex->accel_and_friends) +#define EXCEPTION_MASK LOAD_FROM_ENG(&limex->exceptionMask) #endif // Run exception processing, if necessary. Returns 0 if scanning should @@ -117,13 +116,13 @@ char RUN_EXCEPTIONS_FN(const IMPL_NFA_T *limex, const EXCEPTION_T *exceptions, } if (first_match && i) { - STATE_T acceptMask = LOAD_STATE(&limex->accept); + STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept); STATE_T foundAccepts = AND_STATE(s, acceptMask); if (unlikely(ISNONZERO_STATE(foundAccepts))) { DEBUG_PRINTF("first match at %zu\n", i); DEBUG_PRINTF("for nfa %p\n", limex); assert(final_loc); - STORE_STATE(&ctx->s, s); + ctx->s = s; *final_loc = i; return 1; // Halt matching. } @@ -161,22 +160,56 @@ size_t RUN_ACCEL_FN(const STATE_T s, UNUSED const STATE_T accelMask, return j; } +// Shift macros for Limited NFAs. Defined in terms of uniform ops. +// LimExNFAxxx ptr in 'limex' and the current state in 's' +#define NFA_EXEC_LIM_SHIFT(limex_m, curr_m, shift_idx) \ + LSHIFT_STATE(AND_STATE(curr_m, LOAD_FROM_ENG(&limex_m->shift[shift_idx])), \ + limex_m->shiftAmount[shift_idx]) + +// Calculate the (limited model) successors for a number of variable shifts. +// Assumes current state in 'curr_m' and places the successors in 'succ_m'. +#define NFA_EXEC_GET_LIM_SUCC(limex_m, curr_m, succ_m) \ + do { \ + succ_m = NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 0); \ + switch (limex_m->shiftCount) { \ + case 8: \ + succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 7)); \ + case 7: \ + succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 6)); \ + case 6: \ + succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 5)); \ + case 5: \ + succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 4)); \ + case 4: \ + succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 3)); \ + case 3: \ + succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 2)); \ + case 2: \ + succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 1)); \ + case 1: \ + case 0: \ + ; \ + } \ + } while (0) + + static really_inline char STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, struct CONTEXT_T *ctx, u64a offset, const char flags, u64a *final_loc, const char first_match) { - const STATE_T *reach = (const STATE_T *)((const char *)limex + sizeof(*limex)); + const ENG_STATE_T *reach = get_reach_table(limex); #if SIZE < 256 - const STATE_T accelMask = LOAD_STATE(&limex->accel); - const STATE_T accel_and_friendsMask = LOAD_STATE(&limex->accel_and_friends); - const STATE_T exceptionMask = LOAD_STATE(&limex->exceptionMask); + const STATE_T accelMask = LOAD_FROM_ENG(&limex->accel); + const STATE_T accel_and_friendsMask + = LOAD_FROM_ENG(&limex->accel_and_friends); + const STATE_T exceptionMask = LOAD_FROM_ENG(&limex->exceptionMask); #endif const u8 *accelTable = (const u8 *)((const char *)limex + limex->accelTableOffset); const union AccelAux *accelAux = (const union AccelAux *)((const char *)limex + limex->accelAuxOffset); const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex); const ReportID *exReports = getExReports(limex); - STATE_T s = LOAD_STATE(&ctx->s); + STATE_T s = ctx->s; /* assert(ISALIGNED_16(exceptions)); */ /* assert(ISALIGNED_16(reach)); */ @@ -195,13 +228,13 @@ without_accel: DUMP_INPUT(i); if (ISZERO_STATE(s)) { DEBUG_PRINTF("no states are switched on, early exit\n"); - STORE_STATE(&ctx->s, s); + ctx->s = s; return MO_CONTINUE_MATCHING; } u8 c = input[i]; STATE_T succ; - NFA_EXEC_GET_LIM_SUCC(STATE_T); + NFA_EXEC_GET_LIM_SUCC(limex, s, succ); if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, s, EXCEPTION_MASK, i, offset, &succ, final_loc, ctx, flags, 0, @@ -209,7 +242,7 @@ without_accel: return MO_HALT_MATCHING; } - s = AND_STATE(succ, LOAD_STATE(&reach[limex->reachMap[c]])); + s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]])); } with_accel: @@ -252,7 +285,7 @@ with_accel: u8 c = input[i]; STATE_T succ; - NFA_EXEC_GET_LIM_SUCC(STATE_T); + NFA_EXEC_GET_LIM_SUCC(limex, s, succ); if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, s, EXCEPTION_MASK, i, offset, &succ, final_loc, ctx, flags, 0, @@ -260,20 +293,20 @@ with_accel: return MO_HALT_MATCHING; } - s = AND_STATE(succ, LOAD_STATE(&reach[limex->reachMap[c]])); + s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]])); } - STORE_STATE(&ctx->s, s); + ctx->s = s; if ((first_match || (flags & CALLBACK_OUTPUT)) && limex->acceptCount) { - STATE_T acceptMask = LOAD_STATE(&limex->accept); + STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept); const struct NFAAccept *acceptTable = getAcceptTable(limex); const u32 acceptCount = limex->acceptCount; STATE_T foundAccepts = AND_STATE(s, acceptMask); if (unlikely(ISNONZERO_STATE(foundAccepts))) { if (first_match) { - STORE_STATE(&ctx->s, s); + ctx->s = s; assert(final_loc); *final_loc = length; return MO_HALT_MATCHING; @@ -294,13 +327,13 @@ with_accel: static never_inline char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, struct CONTEXT_T *ctx, u64a offset) { - const STATE_T *reach = (const STATE_T *)((const char *)limex + sizeof(*limex)); + const ENG_STATE_T *reach = get_reach_table(limex); #if SIZE < 256 - const STATE_T exceptionMask = LOAD_STATE(&limex->exceptionMask); + const STATE_T exceptionMask = LOAD_FROM_ENG(&limex->exceptionMask); #endif const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex); const ReportID *exReports = getExReports(limex); - STATE_T s = LOAD_STATE(&ctx->s); + STATE_T s = ctx->s; /* assert(ISALIGNED_16(exceptions)); */ /* assert(ISALIGNED_16(reach)); */ @@ -311,13 +344,13 @@ char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, DUMP_INPUT(i-1); if (ISZERO_STATE(s)) { DEBUG_PRINTF("no states are switched on, early exit\n"); - STORE_STATE(&ctx->s, s); + ctx->s = s; return MO_CONTINUE_MATCHING; } u8 c = input[i-1]; STATE_T succ; - NFA_EXEC_GET_LIM_SUCC(STATE_T); + NFA_EXEC_GET_LIM_SUCC(limex, s, succ); if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, s, EXCEPTION_MASK, i, offset, &succ, final_loc, ctx, @@ -325,12 +358,12 @@ char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, return MO_HALT_MATCHING; } - s = AND_STATE(succ, reach[limex->reachMap[c]]); + s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]])); } - STORE_STATE(&ctx->s, s); + ctx->s = s; - STATE_T acceptMask = LOAD_STATE(&limex->accept); + STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept); const struct NFAAccept *acceptTable = getAcceptTable(limex); const u32 acceptCount = limex->acceptCount; assert(flags & CALLBACK_OUTPUT); @@ -354,9 +387,9 @@ void COMPRESS_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, void *src, return; } - STATE_T s = LOAD_STATE(src); + STATE_T s = *(STATE_T *)src; - if (ISZERO_STATE(AND_STATE(s, LOAD_STATE(&limex->repeatCyclicMask)))) { + if (ISZERO_STATE(AND_STATE(LOAD_FROM_ENG(&limex->repeatCyclicMask), s))) { DEBUG_PRINTF("no cyclics are on\n"); return; } @@ -369,7 +402,7 @@ void COMPRESS_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, void *src, DEBUG_PRINTF("repeat %u\n", i); const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i); - if (!TESTBIT_STATE(&s, info->cyclicState)) { + if (!TESTBIT_STATE(s, info->cyclicState)) { DEBUG_PRINTF("is dead\n"); continue; } @@ -388,7 +421,7 @@ void COMPRESS_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, void *src, offset); } - STORE_STATE(src, s); + *(STATE_T *)src = s; } char JOIN(LIMEX_API_ROOT, _queueCompressState)(const struct NFA *n, @@ -411,7 +444,7 @@ void EXPAND_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, const void *src, // Note: state has already been expanded into 'dest'. const STATE_T cyclics = - AND_STATE(LOAD_STATE(dest), LOAD_STATE(&limex->repeatCyclicMask)); + AND_STATE(*(STATE_T *)dest, LOAD_FROM_ENG(&limex->repeatCyclicMask)); if (ISZERO_STATE(cyclics)) { DEBUG_PRINTF("no cyclics are on\n"); return; @@ -425,7 +458,7 @@ void EXPAND_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, const void *src, DEBUG_PRINTF("repeat %u\n", i); const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i); - if (!TESTBIT_STATE(&cyclics, info->cyclicState)) { + if (!TESTBIT_STATE(cyclics, info->cyclicState)) { DEBUG_PRINTF("is dead\n"); continue; } @@ -447,9 +480,8 @@ char JOIN(LIMEX_API_ROOT, _expandState)(const struct NFA *n, void *dest, return 0; } -char JOIN(LIMEX_API_ROOT, _queueInitState)(const struct NFA *n, - struct mq *q) { - STORE_STATE(q->state, ZERO_STATE); +char JOIN(LIMEX_API_ROOT, _queueInitState)(const struct NFA *n, struct mq *q) { + *(STATE_T *)q->state = ZERO_STATE; // Zero every bounded repeat control block in state. const IMPL_NFA_T *limex = getImplNfa(n); @@ -529,7 +561,7 @@ void JOIN(LIMEX_API_ROOT, _HandleEvent)(const IMPL_NFA_T *limex, u32 e = q->items[q->cur].type; switch (e) { DEFINE_CASE(MQE_TOP) - STORE_STATE(&ctx->s, TOP_FN(limex, !!sp, LOAD_STATE(&ctx->s))); + ctx->s = TOP_FN(limex, !!sp, ctx->s); break; DEFINE_CASE(MQE_START) break; @@ -539,8 +571,7 @@ void JOIN(LIMEX_API_ROOT, _HandleEvent)(const IMPL_NFA_T *limex, assert(e >= MQE_TOP_FIRST); assert(e < MQE_INVALID); DEBUG_PRINTF("MQE_TOP + %d\n", ((int)e - MQE_TOP_FIRST)); - STORE_STATE(&ctx->s, - TOPN_FN(limex, LOAD_STATE(&ctx->s), e - MQE_TOP_FIRST)); + ctx->s = TOPN_FN(limex, ctx->s, e - MQE_TOP_FIRST); } #undef DEFINE_CASE } @@ -570,12 +601,12 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) { ctx.repeat_state = q->streamState + limex->stateSize; ctx.callback = q->cb; ctx.context = q->context; - STORE_STATE(&ctx.cached_estate, ZERO_STATE); + ctx.cached_estate = ZERO_STATE; ctx.cached_br = 0; assert(q->items[q->cur].location >= 0); DEBUG_PRINTF("LOAD STATE\n"); - STORE_STATE(&ctx.s, LOAD_STATE(q->state)); + ctx.s = *(STATE_T *)q->state; assert(q->items[q->cur].type == MQE_START); u64a offset = q->offset; @@ -599,7 +630,7 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) { assert(ep - offset <= q->length); if (STREAMCB_FN(limex, q->buffer + sp - offset, ep - sp, &ctx, sp) == MO_HALT_MATCHING) { - STORE_STATE(q->state, ZERO_STATE); + *(STATE_T *)q->state = ZERO_STATE; return 0; } @@ -616,7 +647,7 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) { q->items[q->cur].type = MQE_START; q->items[q->cur].location = sp - offset; DEBUG_PRINTF("bailing q->cur %u q->end %u\n", q->cur, q->end); - STORE_STATE(q->state, LOAD_STATE(&ctx.s)); + *(STATE_T *)q->state = ctx.s; return MO_ALIVE; } @@ -628,7 +659,7 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) { EXPIRE_ESTATE_FN(limex, &ctx, sp); DEBUG_PRINTF("END\n"); - STORE_STATE(q->state, LOAD_STATE(&ctx.s)); + *(STATE_T *)q->state = ctx.s; if (q->cur != q->end) { q->cur--; @@ -637,7 +668,7 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) { return MO_ALIVE; } - return ISNONZERO_STATE(LOAD_STATE(&ctx.s)); + return ISNONZERO_STATE(ctx.s); } /* used by suffix execution in Rose */ @@ -665,11 +696,11 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) { ctx.repeat_state = q->streamState + limex->stateSize; ctx.callback = q->cb; ctx.context = q->context; - STORE_STATE(&ctx.cached_estate, ZERO_STATE); + ctx.cached_estate = ZERO_STATE; ctx.cached_br = 0; DEBUG_PRINTF("LOAD STATE\n"); - STORE_STATE(&ctx.s, LOAD_STATE(q->state)); + ctx.s = *(STATE_T *)q->state; assert(q->items[q->cur].type == MQE_START); u64a offset = q->offset; @@ -699,7 +730,7 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) { q->cur--; q->items[q->cur].type = MQE_START; q->items[q->cur].location = sp + final_look - offset; - STORE_STATE(q->state, LOAD_STATE(&ctx.s)); + *(STATE_T *)q->state = ctx.s; return MO_MATCHES_PENDING; } @@ -721,7 +752,7 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) { q->cur--; q->items[q->cur].type = MQE_START; q->items[q->cur].location = sp + final_look - offset; - STORE_STATE(q->state, LOAD_STATE(&ctx.s)); + *(STATE_T *)q->state = ctx.s; return MO_MATCHES_PENDING; } @@ -737,7 +768,7 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) { q->items[q->cur].type = MQE_START; q->items[q->cur].location = sp - offset; DEBUG_PRINTF("bailing q->cur %u q->end %u\n", q->cur, q->end); - STORE_STATE(q->state, LOAD_STATE(&ctx.s)); + *(STATE_T *)q->state = ctx.s; return MO_ALIVE; } @@ -749,7 +780,7 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) { EXPIRE_ESTATE_FN(limex, &ctx, sp); DEBUG_PRINTF("END\n"); - STORE_STATE(q->state, LOAD_STATE(&ctx.s)); + *(STATE_T *)q->state = ctx.s; if (q->cur != q->end) { q->cur--; @@ -758,7 +789,7 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) { return MO_ALIVE; } - return ISNONZERO_STATE(LOAD_STATE(&ctx.s)); + return ISNONZERO_STATE(ctx.s); } // Used for execution Rose prefix/infixes. @@ -777,11 +808,11 @@ char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q, ctx.repeat_state = q->streamState + limex->stateSize; ctx.callback = NULL; ctx.context = NULL; - STORE_STATE(&ctx.cached_estate, ZERO_STATE); + ctx.cached_estate = ZERO_STATE; ctx.cached_br = 0; DEBUG_PRINTF("LOAD STATE\n"); - STORE_STATE(&ctx.s, LOAD_STATE(q->state)); + ctx.s = *(STATE_T *)q->state; assert(q->items[q->cur].type == MQE_START); u64a offset = q->offset; @@ -793,7 +824,7 @@ char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q, if (n->maxWidth) { if (ep - sp > n->maxWidth) { sp = ep - n->maxWidth; - STORE_STATE(&ctx.s, INITIAL_FN(limex, !!sp)); + ctx.s = INITIAL_FN(limex, !!sp); } } assert(ep >= sp); @@ -832,14 +863,14 @@ char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q, DEBUG_PRINTF("END, nfa is %s\n", ISNONZERO_STATE(ctx.s) ? "still alive" : "dead"); - STORE_STATE(q->state, LOAD_STATE(&ctx.s)); + *(STATE_T *)q->state = ctx.s; - if (JOIN(limexInAccept, SIZE)(limex, LOAD_STATE(&ctx.s), ctx.repeat_ctrl, + if (JOIN(limexInAccept, SIZE)(limex, ctx.s, ctx.repeat_ctrl, ctx.repeat_state, sp + 1, report)) { return MO_MATCHES_PENDING; } - return ISNONZERO_STATE(LOAD_STATE(&ctx.s)); + return ISNONZERO_STATE(ctx.s); } char JOIN(LIMEX_API_ROOT, _testEOD)(const struct NFA *n, const char *state, @@ -875,11 +906,11 @@ char JOIN(LIMEX_API_ROOT, _B_Reverse)(const struct NFA *n, u64a offset, ctx.repeat_state = NULL; ctx.callback = cb; ctx.context = context; - STORE_STATE(&ctx.cached_estate, ZERO_STATE); + ctx.cached_estate = ZERO_STATE; ctx.cached_br = 0; const IMPL_NFA_T *limex = getImplNfa(n); - STORE_STATE(&ctx.s, INITIAL_FN(limex, 0)); // always anchored + ctx.s = INITIAL_FN(limex, 0); // always anchored // 'buf' may be null, for example when we're scanning at EOD time. if (buflen) { @@ -896,7 +927,7 @@ char JOIN(LIMEX_API_ROOT, _B_Reverse)(const struct NFA *n, u64a offset, REV_STREAM_FN(limex, hbuf, hlen, &ctx, offset); } - if (offset == 0 && ISNONZERO_STATE(LOAD_STATE(&ctx.s))) { + if (offset == 0 && ISNONZERO_STATE(ctx.s)) { TESTEOD_REV_FN(limex, &ctx.s, offset, cb, context); } @@ -913,7 +944,7 @@ char JOIN(LIMEX_API_ROOT, _inAccept)(const struct NFA *nfa, union RepeatControl *repeat_ctrl = getRepeatControlBase(q->state, sizeof(STATE_T)); char *repeat_state = q->streamState + limex->stateSize; - STATE_T state = LOAD_STATE(q->state); + STATE_T state = *(STATE_T *)q->state; u64a offset = q->offset + q_last_loc(q) + 1; return JOIN(limexInAccept, SIZE)(limex, state, repeat_ctrl, repeat_state, @@ -928,7 +959,7 @@ char JOIN(LIMEX_API_ROOT, _inAnyAccept)(const struct NFA *nfa, struct mq *q) { union RepeatControl *repeat_ctrl = getRepeatControlBase(q->state, sizeof(STATE_T)); char *repeat_state = q->streamState + limex->stateSize; - STATE_T state = LOAD_STATE(q->state); + STATE_T state = *(STATE_T *)q->state; u64a offset = q->offset + q_last_loc(q) + 1; return JOIN(limexInAnyAccept, SIZE)(limex, state, repeat_ctrl, repeat_state, @@ -941,8 +972,8 @@ enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)( s64a loc) { assert(nfa->flags & NFA_ZOMBIE); const IMPL_NFA_T *limex = getImplNfa(nfa); - STATE_T state = LOAD_STATE(q->state); - STATE_T zmask = LOAD_STATE(&limex->zombieMask); + STATE_T state = *(STATE_T *)q->state; + STATE_T zmask = LOAD_FROM_ENG(&limex->zombieMask); if (limex->repeatCount) { u64a offset = q->offset + loc + 1; @@ -981,11 +1012,10 @@ enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)( #undef STREAMSILENT_FN #undef CONTEXT_T #undef EXCEPTION_T -#undef LOAD_STATE -#undef STORE_STATE #undef AND_STATE #undef ANDNOT_STATE #undef OR_STATE +#undef LSHIFT_STATE #undef TESTBIT_STATE #undef CLEARBIT_STATE #undef ZERO_STATE @@ -999,8 +1029,4 @@ enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)( #undef ACCEL_MASK #undef ACCEL_AND_FRIENDS_MASK #undef EXCEPTION_MASK - -// Parameters. -#undef SIZE -#undef STATE_T #undef LIMEX_API_ROOT diff --git a/src/nfa/limex_simd128.c b/src/nfa/limex_simd128.c index f0fb1dd4..c5f2b33e 100644 --- a/src/nfa/limex_simd128.c +++ b/src/nfa/limex_simd128.c @@ -48,19 +48,16 @@ #include "limex_runtime.h" -#define SIZE 128 -#define STATE_T m128 +#define SIZE 128 +#define STATE_T m128 +#define ENG_STATE_T m128 +#define LOAD_FROM_ENG load_m128 + #include "limex_exceptional.h" -#define SIZE 128 -#define STATE_T m128 #include "limex_state_impl.h" -#define SIZE 128 -#define STATE_T m128 #define INLINE_ATTR really_inline #include "limex_common_impl.h" -#define SIZE 128 -#define STATE_T m128 #include "limex_runtime_impl.h" diff --git a/src/nfa/limex_simd256.c b/src/nfa/limex_simd256.c index 57648b69..cc232908 100644 --- a/src/nfa/limex_simd256.c +++ b/src/nfa/limex_simd256.c @@ -45,19 +45,16 @@ // Common code #include "limex_runtime.h" -#define SIZE 256 -#define STATE_T m256 +#define SIZE 256 +#define STATE_T m256 +#define ENG_STATE_T m256 +#define LOAD_FROM_ENG load_m256 + #include "limex_exceptional.h" -#define SIZE 256 -#define STATE_T m256 #include "limex_state_impl.h" -#define SIZE 256 -#define STATE_T m256 #define INLINE_ATTR really_inline #include "limex_common_impl.h" -#define SIZE 256 -#define STATE_T m256 #include "limex_runtime_impl.h" diff --git a/src/nfa/limex_simd384.c b/src/nfa/limex_simd384.c index 84061f61..7e596e48 100644 --- a/src/nfa/limex_simd384.c +++ b/src/nfa/limex_simd384.c @@ -45,19 +45,16 @@ // Common code #include "limex_runtime.h" -#define SIZE 384 -#define STATE_T m384 +#define SIZE 384 +#define STATE_T m384 +#define ENG_STATE_T m384 +#define LOAD_FROM_ENG load_m384 + #include "limex_exceptional.h" -#define SIZE 384 -#define STATE_T m384 #include "limex_state_impl.h" -#define SIZE 384 -#define STATE_T m384 #define INLINE_ATTR really_inline #include "limex_common_impl.h" -#define SIZE 384 -#define STATE_T m384 #include "limex_runtime_impl.h" diff --git a/src/nfa/limex_simd512.c b/src/nfa/limex_simd512.c index a6646d83..f779f335 100644 --- a/src/nfa/limex_simd512.c +++ b/src/nfa/limex_simd512.c @@ -45,19 +45,16 @@ // Common code #include "limex_runtime.h" -#define SIZE 512 -#define STATE_T m512 +#define SIZE 512 +#define STATE_T m512 +#define ENG_STATE_T m512 +#define LOAD_FROM_ENG load_m512 + #include "limex_exceptional.h" -#define SIZE 512 -#define STATE_T m512 #include "limex_state_impl.h" -#define SIZE 512 -#define STATE_T m512 #define INLINE_ATTR really_inline #include "limex_common_impl.h" -#define SIZE 512 -#define STATE_T m512 #include "limex_runtime_impl.h" diff --git a/src/nfa/limex_state_impl.h b/src/nfa/limex_state_impl.h index d6e89904..81153f71 100644 --- a/src/nfa/limex_state_impl.h +++ b/src/nfa/limex_state_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,8 +35,8 @@ #include "util/state_compress.h" #include -#if !defined(SIZE) || !defined(STATE_T) -# error Must define SIZE and STATE_T in includer. +#if !defined(SIZE) || !defined(STATE_T) || !defined(LOAD_FROM_ENG) +# error Must define SIZE, STATE_T, LOAD_FROM_ENG in includer. #endif #define IMPL_NFA_T JOIN(struct LimExNFA, SIZE) @@ -44,29 +44,33 @@ #define REACHMASK_FN JOIN(moNfaReachMask, SIZE) #define COMPRESS_FN JOIN(moNfaCompressState, SIZE) #define EXPAND_FN JOIN(moNfaExpandState, SIZE) -#define COMPRESSED_STORE_FN JOIN(storecompressed, SIZE) -#define COMPRESSED_LOAD_FN JOIN(loadcompressed, SIZE) +#define COMPRESSED_STORE_FN JOIN(store_compressed_, STATE_T) +#define COMPRESSED_LOAD_FN JOIN(load_compressed_, STATE_T) #define PARTIAL_STORE_FN JOIN(partial_store_, STATE_T) #define PARTIAL_LOAD_FN JOIN(partial_load_, STATE_T) -#define LOAD_STATE JOIN(load_, STATE_T) -#define STORE_STATE JOIN(store_, STATE_T) #define OR_STATE JOIN(or_, STATE_T) #define AND_STATE JOIN(and_, STATE_T) #define ISZERO_STATE JOIN(isZero_, STATE_T) static really_inline -const STATE_T *REACHMASK_FN(const IMPL_NFA_T *limex, const u8 key) { - const STATE_T *reach - = (const STATE_T *)((const char *)limex + sizeof(*limex)); - assert(ISALIGNED_N(reach, alignof(STATE_T))); - return &reach[limex->reachMap[key]]; +const ENG_STATE_T *get_reach_table(const IMPL_NFA_T *limex) { + const ENG_STATE_T *reach + = (const ENG_STATE_T *)((const char *)limex + sizeof(*limex)); + assert(ISALIGNED_N(reach, alignof(ENG_STATE_T))); + return reach; +} + +static really_inline +STATE_T REACHMASK_FN(const IMPL_NFA_T *limex, const u8 key) { + const ENG_STATE_T *reach = get_reach_table(limex); + return LOAD_FROM_ENG(&reach[limex->reachMap[key]]); } static really_inline void COMPRESS_FN(const IMPL_NFA_T *limex, u8 *dest, const STATE_T *src, u8 key) { assert(ISALIGNED_N(src, alignof(STATE_T))); - STATE_T a_src = LOAD_STATE(src); + STATE_T a_src = *src; DEBUG_PRINTF("compress state: %p -> %p\n", src, dest); @@ -77,31 +81,30 @@ void COMPRESS_FN(const IMPL_NFA_T *limex, u8 *dest, const STATE_T *src, } else { DEBUG_PRINTF("compress state, key=%hhx\n", key); - const STATE_T *reachmask = REACHMASK_FN(limex, key); + STATE_T reachmask = REACHMASK_FN(limex, key); // Masked compression means that we mask off the initDs states and // provide a shortcut for the all-zeroes case. Note that these must be // switched on in the EXPAND call below. if (limex->flags & LIMEX_FLAG_COMPRESS_MASKED) { - STATE_T s = AND_STATE(LOAD_STATE(&limex->compressMask), a_src); + STATE_T s = AND_STATE(LOAD_FROM_ENG(&limex->compressMask), a_src); if (ISZERO_STATE(s)) { DEBUG_PRINTF("after compression mask, all states are zero\n"); memset(dest, 0, limex->stateSize); return; } - STATE_T mask = AND_STATE(LOAD_STATE(&limex->compressMask), - LOAD_STATE(reachmask)); + STATE_T mask = AND_STATE(LOAD_FROM_ENG(&limex->compressMask), + reachmask); COMPRESSED_STORE_FN(dest, &s, &mask, limex->stateSize); } else { - COMPRESSED_STORE_FN(dest, src, reachmask, limex->stateSize); + COMPRESSED_STORE_FN(dest, src, &reachmask, limex->stateSize); } } } static really_inline -void EXPAND_FN(const IMPL_NFA_T *limex, STATE_T *dest, const u8 *src, - u8 key) { +void EXPAND_FN(const IMPL_NFA_T *limex, STATE_T *dest, const u8 *src, u8 key) { assert(ISALIGNED_N(dest, alignof(STATE_T))); DEBUG_PRINTF("expand state: %p -> %p\n", src, dest); @@ -111,16 +114,15 @@ void EXPAND_FN(const IMPL_NFA_T *limex, STATE_T *dest, const u8 *src, *dest = PARTIAL_LOAD_FN(src, limex->stateSize); } else { DEBUG_PRINTF("expand state, key=%hhx\n", key); - const STATE_T *reachmask = REACHMASK_FN(limex, key); + STATE_T reachmask = REACHMASK_FN(limex, key); if (limex->flags & LIMEX_FLAG_COMPRESS_MASKED) { - STATE_T mask = AND_STATE(LOAD_STATE(&limex->compressMask), - LOAD_STATE(reachmask)); + STATE_T mask = AND_STATE(LOAD_FROM_ENG(&limex->compressMask), + reachmask); COMPRESSED_LOAD_FN(dest, src, &mask, limex->stateSize); - STORE_STATE(dest, OR_STATE(LOAD_STATE(&limex->initDS), - LOAD_STATE(dest))); + *dest = OR_STATE(LOAD_FROM_ENG(&limex->initDS), *dest); } else { - COMPRESSED_LOAD_FN(dest, src, reachmask, limex->stateSize); + COMPRESSED_LOAD_FN(dest, src, &reachmask, limex->stateSize); } } } @@ -134,11 +136,6 @@ void EXPAND_FN(const IMPL_NFA_T *limex, STATE_T *dest, const u8 *src, #undef COMPRESSED_LOAD_FN #undef PARTIAL_STORE_FN #undef PARTIAL_LOAD_FN -#undef LOAD_STATE -#undef STORE_STATE #undef OR_STATE #undef AND_STATE #undef ISZERO_STATE - -#undef SIZE -#undef STATE_T diff --git a/src/nfa/nfa_api_dispatch.c b/src/nfa/nfa_api_dispatch.c index c67103b3..2a213ed6 100644 --- a/src/nfa/nfa_api_dispatch.c +++ b/src/nfa/nfa_api_dispatch.c @@ -55,6 +55,7 @@ #define DISPATCH_BY_NFA_TYPE(dbnt_func) \ switch (nfa->type) { \ DISPATCH_CASE(LIMEX, LimEx, 32, dbnt_func); \ + DISPATCH_CASE(LIMEX, LimEx, 64, dbnt_func); \ DISPATCH_CASE(LIMEX, LimEx, 128, dbnt_func); \ DISPATCH_CASE(LIMEX, LimEx, 256, dbnt_func); \ DISPATCH_CASE(LIMEX, LimEx, 384, dbnt_func); \ diff --git a/src/nfa/nfa_build_util.cpp b/src/nfa/nfa_build_util.cpp index 93376b01..f3b5329d 100644 --- a/src/nfa/nfa_build_util.cpp +++ b/src/nfa/nfa_build_util.cpp @@ -170,17 +170,16 @@ enum NFACategory {NFA_LIMEX, NFA_OTHER}; #define DO_IF_DUMP_SUPPORT(a) #endif -#define MAKE_LIMEX_TRAITS(mlt_size) \ +#define MAKE_LIMEX_TRAITS(mlt_size, mlt_align) \ template<> struct NFATraits { \ static UNUSED const char *name; \ static const NFACategory category = NFA_LIMEX; \ typedef LimExNFA##mlt_size implNFA_t; \ - typedef u_##mlt_size tableRow_t; \ static const nfa_dispatch_fn has_accel; \ static const nfa_dispatch_fn has_repeats; \ static const nfa_dispatch_fn has_repeats_other_than_firsts; \ static const u32 stateAlign = \ - MAX(alignof(tableRow_t), alignof(RepeatControl)); \ + MAX(mlt_align, alignof(RepeatControl)); \ static const bool fast = mlt_size <= 64; \ }; \ const nfa_dispatch_fn NFATraits::has_accel \ @@ -194,16 +193,17 @@ enum NFACategory {NFA_LIMEX, NFA_OTHER}; const char *NFATraits::name \ = "LimEx "#mlt_size; \ template<> struct getDescription { \ - static string call(const void *ptr) { \ - return getDescriptionLimEx((const NFA *)ptr); \ - } \ + static string call(const void *p) { \ + return getDescriptionLimEx((const NFA *)p); \ + } \ };) -MAKE_LIMEX_TRAITS(32) -MAKE_LIMEX_TRAITS(128) -MAKE_LIMEX_TRAITS(256) -MAKE_LIMEX_TRAITS(384) -MAKE_LIMEX_TRAITS(512) +MAKE_LIMEX_TRAITS(32, alignof(u32)) +MAKE_LIMEX_TRAITS(64, alignof(m128)) /* special, 32bit arch uses m128 */ +MAKE_LIMEX_TRAITS(128, alignof(m128)) +MAKE_LIMEX_TRAITS(256, alignof(m256)) +MAKE_LIMEX_TRAITS(384, alignof(m384)) +MAKE_LIMEX_TRAITS(512, alignof(m512)) template<> struct NFATraits { UNUSED static const char *name; diff --git a/src/nfa/nfa_dump_dispatch.cpp b/src/nfa/nfa_dump_dispatch.cpp index 388ac003..f7a5e05d 100644 --- a/src/nfa/nfa_dump_dispatch.cpp +++ b/src/nfa/nfa_dump_dispatch.cpp @@ -60,6 +60,7 @@ namespace ue2 { DEBUG_PRINTF("dispatch for NFA type %u\n", nfa->type); \ switch (nfa->type) { \ DISPATCH_CASE(LIMEX, LimEx, 32, dbnt_func); \ + DISPATCH_CASE(LIMEX, LimEx, 64, dbnt_func); \ DISPATCH_CASE(LIMEX, LimEx, 128, dbnt_func); \ DISPATCH_CASE(LIMEX, LimEx, 256, dbnt_func); \ DISPATCH_CASE(LIMEX, LimEx, 384, dbnt_func); \ diff --git a/src/nfa/nfa_internal.h b/src/nfa/nfa_internal.h index 41fee73e..58c3da6c 100644 --- a/src/nfa/nfa_internal.h +++ b/src/nfa/nfa_internal.h @@ -52,6 +52,7 @@ extern "C" enum NFAEngineType { LIMEX_NFA_32, + LIMEX_NFA_64, LIMEX_NFA_128, LIMEX_NFA_256, LIMEX_NFA_384, @@ -164,6 +165,7 @@ static really_inline int isDfaType(u8 t) { static really_inline int isNfaType(u8 t) { switch (t) { case LIMEX_NFA_32: + case LIMEX_NFA_64: case LIMEX_NFA_128: case LIMEX_NFA_256: case LIMEX_NFA_384: diff --git a/src/util/simd_utils.h b/src/util/simd_utils.h index 87de0940..dc8922fd 100644 --- a/src/util/simd_utils.h +++ b/src/util/simd_utils.h @@ -173,6 +173,12 @@ static really_inline u64a movq(const m128 in) { #endif } +/* another form of movq */ +static really_inline +m128 load_m128_from_u64a(const u64a *p) { + return _mm_loadl_epi64((const m128 *)p); +} + #define rshiftbyte_m128(a, count_immed) _mm_srli_si128(a, count_immed) #define lshiftbyte_m128(a, count_immed) _mm_slli_si128(a, count_immed) @@ -270,12 +276,12 @@ void clearbit128(m128 *ptr, unsigned int n) { // tests bit N in the given vector. static really_inline -char testbit128(const m128 *ptr, unsigned int n) { +char testbit128(m128 val, unsigned int n) { const m128 mask = mask1bit128(n); #if defined(__SSE4_1__) - return !_mm_testz_si128(mask, *ptr); + return !_mm_testz_si128(mask, val); #else - return isnonzero128(and128(mask, *ptr)); + return isnonzero128(and128(mask, val)); #endif } @@ -606,13 +612,13 @@ void clearbit256(m256 *ptr, unsigned int n) { // tests bit N in the given vector. static really_inline -char testbit256(const m256 *ptr, unsigned int n) { - assert(n < sizeof(*ptr) * 8); - const m128 *sub; +char testbit256(m256 val, unsigned int n) { + assert(n < sizeof(val) * 8); + m128 sub; if (n < 128) { - sub = &ptr->lo; + sub = val.lo; } else { - sub = &ptr->hi; + sub = val.hi; n -= 128; } return testbit128(sub, n); @@ -633,9 +639,9 @@ void clearbit256(m256 *ptr, unsigned int n) { // tests bit N in the given vector. static really_inline -char testbit256(const m256 *ptr, unsigned int n) { +char testbit256(m256 val, unsigned int n) { const m256 mask = mask1bit256(n); - return !_mm256_testz_si256(mask, *ptr); + return !_mm256_testz_si256(mask, val); } static really_really_inline @@ -827,15 +833,15 @@ void clearbit384(m384 *ptr, unsigned int n) { // tests bit N in the given vector. static really_inline -char testbit384(const m384 *ptr, unsigned int n) { - assert(n < sizeof(*ptr) * 8); - const m128 *sub; +char testbit384(m384 val, unsigned int n) { + assert(n < sizeof(val) * 8); + m128 sub; if (n < 128) { - sub = &ptr->lo; + sub = val.lo; } else if (n < 256) { - sub = &ptr->mid; + sub = val.mid; } else { - sub = &ptr->hi; + sub = val.hi; } return testbit128(sub, n % 128); } @@ -1040,26 +1046,26 @@ void clearbit512(m512 *ptr, unsigned int n) { // tests bit N in the given vector. static really_inline -char testbit512(const m512 *ptr, unsigned int n) { - assert(n < sizeof(*ptr) * 8); +char testbit512(m512 val, unsigned int n) { + assert(n < sizeof(val) * 8); #if !defined(__AVX2__) - const m128 *sub; + m128 sub; if (n < 128) { - sub = &ptr->lo.lo; + sub = val.lo.lo; } else if (n < 256) { - sub = &ptr->lo.hi; + sub = val.lo.hi; } else if (n < 384) { - sub = &ptr->hi.lo; + sub = val.hi.lo; } else { - sub = &ptr->hi.hi; + sub = val.hi.hi; } return testbit128(sub, n % 128); #else - const m256 *sub; + m256 sub; if (n < 256) { - sub = &ptr->lo; + sub = val.lo; } else { - sub = &ptr->hi; + sub = val.hi; n -= 256; } return testbit256(sub, n); diff --git a/src/util/uniform_ops.h b/src/util/uniform_ops.h index 0619c7e4..3385e441 100644 --- a/src/util/uniform_ops.h +++ b/src/util/uniform_ops.h @@ -180,44 +180,52 @@ #define partial_load_m384(ptr, sz) loadbytes384(ptr, sz) #define partial_load_m512(ptr, sz) loadbytes512(ptr, sz) -#define store_compressed_u32(ptr, x, m) storecompressed32(ptr, x, m) -#define store_compressed_u64a(ptr, x, m) storecompressed64(ptr, x, m) -#define store_compressed_m128(ptr, x, m) storecompressed128(ptr, x, m) -#define store_compressed_m256(ptr, x, m) storecompressed256(ptr, x, m) -#define store_compressed_m384(ptr, x, m) storecompressed384(ptr, x, m) -#define store_compressed_m512(ptr, x, m) storecompressed512(ptr, x, m) +#define store_compressed_u32(ptr, x, m, len) storecompressed32(ptr, x, m, len) +#define store_compressed_u64a(ptr, x, m, len) storecompressed64(ptr, x, m, len) +#define store_compressed_m128(ptr, x, m, len) storecompressed128(ptr, x, m, len) +#define store_compressed_m256(ptr, x, m, len) storecompressed256(ptr, x, m, len) +#define store_compressed_m384(ptr, x, m, len) storecompressed384(ptr, x, m, len) +#define store_compressed_m512(ptr, x, m, len) storecompressed512(ptr, x, m, len) -#define load_compressed_u32(x, ptr, m) loadcompressed32(x, ptr, m) -#define load_compressed_u64a(x, ptr, m) loadcompressed64(x, ptr, m) -#define load_compressed_m128(x, ptr, m) loadcompressed128(x, ptr, m) -#define load_compressed_m256(x, ptr, m) loadcompressed256(x, ptr, m) -#define load_compressed_m384(x, ptr, m) loadcompressed384(x, ptr, m) -#define load_compressed_m512(x, ptr, m) loadcompressed512(x, ptr, m) +#define load_compressed_u32(x, ptr, m, len) loadcompressed32(x, ptr, m, len) +#define load_compressed_u64a(x, ptr, m, len) loadcompressed64(x, ptr, m, len) +#define load_compressed_m128(x, ptr, m, len) loadcompressed128(x, ptr, m, len) +#define load_compressed_m256(x, ptr, m, len) loadcompressed256(x, ptr, m, len) +#define load_compressed_m384(x, ptr, m, len) loadcompressed384(x, ptr, m, len) +#define load_compressed_m512(x, ptr, m, len) loadcompressed512(x, ptr, m, len) -static really_inline void clearbit_u32(u32 *p, u32 n) { +static really_inline +void clearbit_u32(u32 *p, u32 n) { assert(n < sizeof(*p) * 8); *p &= ~(1U << n); } -static really_inline void clearbit_u64a(u64a *p, u32 n) { + +static really_inline +void clearbit_u64a(u64a *p, u32 n) { assert(n < sizeof(*p) * 8); *p &= ~(1ULL << n); } + #define clearbit_m128(ptr, n) (clearbit128(ptr, n)) #define clearbit_m256(ptr, n) (clearbit256(ptr, n)) #define clearbit_m384(ptr, n) (clearbit384(ptr, n)) #define clearbit_m512(ptr, n) (clearbit512(ptr, n)) -static really_inline char testbit_u32(const u32 *p, u32 n) { - assert(n < sizeof(*p) * 8); - return !!(*p & (1U << n)); +static really_inline +char testbit_u32(u32 val, u32 n) { + assert(n < sizeof(val) * 8); + return !!(val & (1U << n)); } -static really_inline char testbit_u64a(const u64a *p, u32 n) { - assert(n < sizeof(*p) * 8); - return !!(*p & (1ULL << n)); + +static really_inline +char testbit_u64a(u64a val, u32 n) { + assert(n < sizeof(val) * 8); + return !!(val & (1ULL << n)); } -#define testbit_m128(ptr, n) (testbit128(ptr, n)) -#define testbit_m256(ptr, n) (testbit256(ptr, n)) -#define testbit_m384(ptr, n) (testbit384(ptr, n)) -#define testbit_m512(ptr, n) (testbit512(ptr, n)) + +#define testbit_m128(val, n) (testbit128(val, n)) +#define testbit_m256(val, n) (testbit256(val, n)) +#define testbit_m384(val, n) (testbit384(val, n)) +#define testbit_m512(val, n) (testbit512(val, n)) #endif diff --git a/unit/internal/limex_nfa.cpp b/unit/internal/limex_nfa.cpp index 6bb4fcb9..1c742793 100644 --- a/unit/internal/limex_nfa.cpp +++ b/unit/internal/limex_nfa.cpp @@ -31,7 +31,6 @@ #include "grey.h" #include "compiler/compiler.h" -#include "nfa/limex_context.h" #include "nfa/limex_internal.h" #include "nfa/nfa_api.h" #include "nfa/nfa_api_util.h" @@ -167,11 +166,10 @@ TEST_P(LimExModelTest, QueueExec) { TEST_P(LimExModelTest, CompressExpand) { ASSERT_TRUE(nfa != nullptr); - // 64-bit NFAs assume during compression that they have >= 5 bytes of - // compressed NFA state, which isn't true for our 8-state test pattern. We - // skip this test for just these models. - if (nfa->scratchStateSize == 8) { - return; + u32 real_state_size = nfa->scratchStateSize; + /* Only look at 8 bytes for limex 64 (rather than the padding) */ + if (nfa->type == LIMEX_NFA_64) { + real_state_size = sizeof(u64a); } initQueue(); @@ -195,8 +193,7 @@ TEST_P(LimExModelTest, CompressExpand) { memset(dest, 0xff, nfa->scratchStateSize); nfaExpandState(nfa.get(), dest, q.streamState, q.offset, queue_prev_byte(&q, end)); - ASSERT_TRUE(std::equal(dest, dest + nfa->scratchStateSize, - full_state.get())); + ASSERT_TRUE(std::equal(dest, dest + real_state_size, full_state.get())); } TEST_P(LimExModelTest, InitCompressedState0) { diff --git a/unit/internal/simd_utils.cpp b/unit/internal/simd_utils.cpp index 3c07b2b0..31d4b925 100644 --- a/unit/internal/simd_utils.cpp +++ b/unit/internal/simd_utils.cpp @@ -110,10 +110,10 @@ void simd_setbit(m128 *a, unsigned int i) { return setbit128(a, i); } void simd_setbit(m256 *a, unsigned int i) { return setbit256(a, i); } void simd_setbit(m384 *a, unsigned int i) { return setbit384(a, i); } void simd_setbit(m512 *a, unsigned int i) { return setbit512(a, i); } -bool simd_testbit(const m128 *a, unsigned int i) { return testbit128(a, i); } -bool simd_testbit(const m256 *a, unsigned int i) { return testbit256(a, i); } -bool simd_testbit(const m384 *a, unsigned int i) { return testbit384(a, i); } -bool simd_testbit(const m512 *a, unsigned int i) { return testbit512(a, i); } +bool simd_testbit(const m128 &a, unsigned int i) { return testbit128(a, i); } +bool simd_testbit(const m256 &a, unsigned int i) { return testbit256(a, i); } +bool simd_testbit(const m384 &a, unsigned int i) { return testbit384(a, i); } +bool simd_testbit(const m512 &a, unsigned int i) { return testbit512(a, i); } u32 simd_diffrich(const m128 &a, const m128 &b) { return diffrich128(a, b); } u32 simd_diffrich(const m256 &a, const m256 &b) { return diffrich256(a, b); } u32 simd_diffrich(const m384 &a, const m384 &b) { return diffrich384(a, b); } @@ -419,15 +419,15 @@ TYPED_TEST(SimdUtilsTest, testbit) { // First, all bits are on in 'ones'. for (unsigned int i = 0; i < total_bits; i++) { - ASSERT_EQ(1, simd_testbit(&ones, i)) << "bit " << i << " is on"; + ASSERT_EQ(1, simd_testbit(ones, i)) << "bit " << i << " is on"; } // Try individual bits; only 'i' should be on. for (unsigned int i = 0; i < total_bits; i++) { TypeParam a = setbit(i); for (unsigned int j = 0; j < total_bits; j++) { - ASSERT_EQ(i == j ? 1 : 0, simd_testbit(&a, j)) << "bit " << i - << " is wrong"; + ASSERT_EQ(i == j ? 1 : 0, simd_testbit(a, j)) << "bit " << i + << " is wrong"; } } } @@ -470,7 +470,7 @@ TYPED_TEST(SimdUtilsTest, diffrich) { // and nothing is on in zeroes for (unsigned int i = 0; i < total_bits; i++) { - ASSERT_EQ(0, simd_testbit(&zeroes, i)) << "bit " << i << " is off"; + ASSERT_EQ(0, simd_testbit(zeroes, i)) << "bit " << i << " is off"; } // All-zeroes and all-ones differ in all words diff --git a/unit/internal/uniform_ops.cpp b/unit/internal/uniform_ops.cpp index 33d7cd30..10defdbd 100644 --- a/unit/internal/uniform_ops.cpp +++ b/unit/internal/uniform_ops.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -156,26 +156,26 @@ TEST(Uniform, loadstore_m512) { TEST(Uniform, testbit_u32) { for (u32 i = 0; i < 32; i++) { u32 v = 0; - EXPECT_EQ((char)0, testbit_u32(&v, i)); + EXPECT_EQ((char)0, testbit_u32(v, i)); v |= 1ULL << i; - EXPECT_EQ((char)1, testbit_u32(&v, i)); + EXPECT_EQ((char)1, testbit_u32(v, i)); v = ~v; - EXPECT_EQ((char)0, testbit_u32(&v, i)); + EXPECT_EQ((char)0, testbit_u32(v, i)); v |= 1ULL << i; - EXPECT_EQ((char)1, testbit_u32(&v, i)); + EXPECT_EQ((char)1, testbit_u32(v, i)); } } TEST(Uniform, testbit_u64a) { for (u32 i = 0; i < 64; i++) { u64a v = 0; - EXPECT_EQ((char)0, testbit_u64a(&v, i)); + EXPECT_EQ((char)0, testbit_u64a(v, i)); v |= 1ULL << i; - EXPECT_EQ((char)1, testbit_u64a(&v, i)); + EXPECT_EQ((char)1, testbit_u64a(v, i)); v = ~v; - EXPECT_EQ((char)0, testbit_u64a(&v, i)); + EXPECT_EQ((char)0, testbit_u64a(v, i)); v |= 1ULL << i; - EXPECT_EQ((char)1, testbit_u64a(&v, i)); + EXPECT_EQ((char)1, testbit_u64a(v, i)); } } @@ -183,7 +183,7 @@ TEST(Uniform, clearbit_u32) { for (u32 i = 0; i < 32; i++) { u32 v = ~0U; clearbit_u32(&v, i); - EXPECT_EQ((char)0, testbit_u32(&v, i)); + EXPECT_EQ((char)0, testbit_u32(v, i)); v = ~v; clearbit_u32(&v, i); EXPECT_EQ(0U, v); @@ -194,7 +194,7 @@ TEST(Uniform, clearbit_u64a) { for (u32 i = 0; i < 64; i++) { u64a v = ~0ULL; clearbit_u64a(&v, i); - EXPECT_EQ((char)0, testbit_u64a(&v, i)); + EXPECT_EQ((char)0, testbit_u64a(v, i)); v = ~v; clearbit_u64a(&v, i); EXPECT_EQ(0ULL, v);