Introduce a 64-bit LimEx model.

On 64-bit platforms, the Limex 64 model is implemented in normal GPRs.
On 32-bit platforms, however, 128-bit SSE registers are used for the
runtime implementation.
This commit is contained in:
Alex Coyte 2016-08-26 14:17:41 +10:00 committed by Matthew Barr
parent 3cf4199879
commit a08e1dd690
28 changed files with 441 additions and 351 deletions

View File

@ -470,6 +470,7 @@ set (hs_exec_SRCS
src/nfa/limex_exceptional.h
src/nfa/limex_native.c
src/nfa/limex_ring.h
src/nfa/limex_64.c
src/nfa/limex_simd128.c
src/nfa/limex_simd256.c
src/nfa/limex_simd384.c

View File

@ -77,6 +77,7 @@ extern "C"
GENERATE_NFA_DUMP_DECL(gf_name)
GENERATE_NFA_DECL(nfaExecLimEx32)
GENERATE_NFA_DECL(nfaExecLimEx64)
GENERATE_NFA_DECL(nfaExecLimEx128)
GENERATE_NFA_DECL(nfaExecLimEx256)
GENERATE_NFA_DECL(nfaExecLimEx384)

73
src/nfa/limex_64.c Normal file
View File

@ -0,0 +1,73 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief LimEx NFA: 128-bit SIMD runtime implementations.
*/
/* Limex64 is unusual on as on 32 bit platforms, at runtime it uses an m128 for
* state calculations.
*/
//#define DEBUG_INPUT
//#define DEBUG_EXCEPTIONS
#include "limex.h"
#include "accel.h"
#include "limex_internal.h"
#include "nfa_internal.h"
#include "ue2common.h"
#include "util/bitutils.h"
#include "util/simd_utils.h"
// Common code
#define STATE_ON_STACK
#define ESTATE_ON_STACK
#include "limex_runtime.h"
#define SIZE 64
#define ENG_STATE_T u64a
#ifdef ARCH_64_BIT
#define STATE_T u64a
#define LOAD_FROM_ENG load_u64a
#else
#define STATE_T m128
#define LOAD_FROM_ENG load_m128_from_u64a
#endif
#include "limex_exceptional.h"
#include "limex_state_impl.h"
#define INLINE_ATTR really_inline
#include "limex_common_impl.h"
#include "limex_runtime_impl.h"

View File

@ -82,6 +82,22 @@ size_t doAccel32(u32 s, u32 accel, const u8 *accelTable,
return accelScanWrapper(accelTable, aux, input, idx, i, end);
}
#ifdef ARCH_64_BIT
size_t doAccel64(u64a s, u64a accel, const u8 *accelTable,
const union AccelAux *aux, const u8 *input, size_t i,
size_t end) {
u32 idx = packedExtract64(s, accel);
return accelScanWrapper(accelTable, aux, input, idx, i, end);
}
#else
size_t doAccel64(m128 s, m128 accel, const u8 *accelTable,
const union AccelAux *aux, const u8 *input, size_t i,
size_t end) {
u32 idx = packedExtract64(movq(s), movq(accel));
return accelScanWrapper(accelTable, aux, input, idx, i, end);
}
#endif
size_t doAccel128(const m128 *state, const struct LimExNFA128 *limex,
const u8 *accelTable, const union AccelAux *aux,
const u8 *input, size_t i, size_t end) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -40,6 +40,7 @@
#include "util/simd_utils.h" // for m128 etc
union AccelAux;
struct LimExNFA64;
struct LimExNFA128;
struct LimExNFA256;
struct LimExNFA384;
@ -49,6 +50,16 @@ size_t doAccel32(u32 s, u32 accel, const u8 *accelTable,
const union AccelAux *aux, const u8 *input, size_t i,
size_t end);
#ifdef ARCH_64_BIT
size_t doAccel64(u64a s, u64a accel, const u8 *accelTable,
const union AccelAux *aux, const u8 *input, size_t i,
size_t end);
#else
size_t doAccel64(m128 s, m128 accel, const u8 *accelTable,
const union AccelAux *aux, const u8 *input, size_t i,
size_t end);
#endif
size_t doAccel128(const m128 *s, const struct LimExNFA128 *limex,
const u8 *accelTable, const union AccelAux *aux,
const u8 *input, size_t i, size_t end);

View File

@ -31,8 +31,9 @@
/* impl of limex functions which depend only on state size */
#if !defined(SIZE) || !defined(STATE_T) || !defined(INLINE_ATTR)
# error Must define SIZE and STATE_T and INLINE_ATTR in includer.
#if !defined(SIZE) || !defined(STATE_T) || !defined(LOAD_FROM_ENG) \
|| !defined(INLINE_ATTR)
# error Must define SIZE, STATE_T, LOAD_FROM_ENG and INLINE_ATTR in includer.
#endif
#define IMPL_NFA_T JOIN(struct LimExNFA, SIZE)
@ -50,8 +51,6 @@
#define PROCESS_ACCEPTS_NOSQUASH_FN JOIN(moProcessAcceptsNoSquash, SIZE)
#define CONTEXT_T JOIN(NFAContext, SIZE)
#define ONES_STATE JOIN(ones_, STATE_T)
#define LOAD_STATE JOIN(load_, STATE_T)
#define STORE_STATE JOIN(store_, STATE_T)
#define AND_STATE JOIN(and_, STATE_T)
#define OR_STATE JOIN(or_, STATE_T)
#define ANDNOT_STATE JOIN(andnot_, STATE_T)
@ -83,7 +82,7 @@ void SQUASH_UNTUG_BR_FN(const IMPL_NFA_T *limex,
const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i);
u32 cyclicState = info->cyclicState;
if (!TESTBIT_STATE(accstate, cyclicState)) {
if (!TESTBIT_STATE(*accstate, cyclicState)) {
continue;
}
@ -111,12 +110,12 @@ char PROCESS_ACCEPTS_FN(const IMPL_NFA_T *limex, STATE_T *s,
// We have squash masks we might have to apply after firing reports.
STATE_T squash = ONES_STATE;
const STATE_T *squashMasks = (const STATE_T *)
const ENG_STATE_T *squashMasks = (const ENG_STATE_T *)
((const char *)limex + limex->squashOffset);
for (u32 i = 0; i < acceptCount; i++) {
const struct NFAAccept *a = &acceptTable[i];
if (TESTBIT_STATE(s, a->state)) {
if (TESTBIT_STATE(*s, a->state)) {
DEBUG_PRINTF("state %u is on, firing report id=%u, offset=%llu\n",
a->state, a->externalId, offset);
int rv = callback(0, offset, a->externalId, context);
@ -125,14 +124,14 @@ char PROCESS_ACCEPTS_FN(const IMPL_NFA_T *limex, STATE_T *s,
}
if (a->squash != MO_INVALID_IDX) {
assert(a->squash < limex->squashCount);
const STATE_T *sq = &squashMasks[a->squash];
const ENG_STATE_T *sq = &squashMasks[a->squash];
DEBUG_PRINTF("squash mask %u @ %p\n", a->squash, sq);
squash = AND_STATE(squash, LOAD_STATE(sq));
squash = AND_STATE(squash, LOAD_FROM_ENG(sq));
}
}
}
STORE_STATE(s, AND_STATE(LOAD_STATE(s), squash));
*s = AND_STATE(*s, squash);
return 0;
}
@ -147,7 +146,7 @@ char PROCESS_ACCEPTS_NOSQUASH_FN(const STATE_T *s,
for (u32 i = 0; i < acceptCount; i++) {
const struct NFAAccept *a = &acceptTable[i];
if (TESTBIT_STATE(s, a->state)) {
if (TESTBIT_STATE(*s, a->state)) {
DEBUG_PRINTF("state %u is on, firing report id=%u, offset=%llu\n",
a->state, a->externalId, offset);
int rv = callback(0, offset, a->externalId, context);
@ -172,8 +171,8 @@ char TESTEOD_FN(const IMPL_NFA_T *limex, const STATE_T *s,
return MO_CONTINUE_MATCHING;
}
const STATE_T acceptEodMask = LOAD_STATE(&limex->acceptAtEOD);
STATE_T foundAccepts = AND_STATE(LOAD_STATE(s), acceptEodMask);
const STATE_T acceptEodMask = LOAD_FROM_ENG(&limex->acceptAtEOD);
STATE_T foundAccepts = AND_STATE(*s, acceptEodMask);
if (do_br) {
SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state,
@ -204,8 +203,8 @@ char TESTEOD_REV_FN(const IMPL_NFA_T *limex, const STATE_T *s, u64a offset,
return MO_CONTINUE_MATCHING;
}
STATE_T acceptEodMask = LOAD_STATE(&limex->acceptAtEOD);
STATE_T foundAccepts = AND_STATE(LOAD_STATE(s), acceptEodMask);
STATE_T acceptEodMask = LOAD_FROM_ENG(&limex->acceptAtEOD);
STATE_T foundAccepts = AND_STATE(*s, acceptEodMask);
assert(!limex->repeatCount);
@ -228,8 +227,8 @@ char REPORTCURRENT_FN(const IMPL_NFA_T *limex, const struct mq *q) {
assert(q->state);
assert(q_cur_type(q) == MQE_START);
STATE_T s = LOAD_STATE(q->state);
STATE_T acceptMask = LOAD_STATE(&limex->accept);
STATE_T s = *(STATE_T *)q->state;
STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept);
STATE_T foundAccepts = AND_STATE(s, acceptMask);
if (unlikely(ISNONZERO_STATE(foundAccepts))) {
@ -250,7 +249,7 @@ char REPORTCURRENT_FN(const IMPL_NFA_T *limex, const struct mq *q) {
static really_inline
STATE_T INITIAL_FN(const IMPL_NFA_T *impl, char onlyDs) {
return LOAD_STATE(onlyDs ? &impl->initDS : &impl->init);
return LOAD_FROM_ENG(onlyDs ? &impl->initDS : &impl->init);
}
static really_inline
@ -261,9 +260,9 @@ STATE_T TOP_FN(const IMPL_NFA_T *impl, char onlyDs, STATE_T state) {
static really_inline
STATE_T TOPN_FN(const IMPL_NFA_T *limex, STATE_T state, u32 n) {
assert(n < limex->topCount);
const STATE_T *topsptr =
(const STATE_T *)((const char *)limex + limex->topOffset);
STATE_T top = LOAD_STATE(&topsptr[n]);
const ENG_STATE_T *topsptr =
(const ENG_STATE_T *)((const char *)limex + limex->topOffset);
STATE_T top = LOAD_FROM_ENG(&topsptr[n]);
return OR_STATE(top, state);
}
@ -279,8 +278,8 @@ void EXPIRE_ESTATE_FN(const IMPL_NFA_T *limex, struct CONTEXT_T *ctx,
DEBUG_PRINTF("expire estate at offset %llu\n", offset);
const STATE_T cyclics =
AND_STATE(LOAD_STATE(&ctx->s), LOAD_STATE(&limex->repeatCyclicMask));
const STATE_T cyclics
= AND_STATE(ctx->s, LOAD_FROM_ENG(&limex->repeatCyclicMask));
if (ISZERO_STATE(cyclics)) {
DEBUG_PRINTF("no cyclic states are on\n");
return;
@ -290,7 +289,7 @@ void EXPIRE_ESTATE_FN(const IMPL_NFA_T *limex, struct CONTEXT_T *ctx,
const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i);
u32 cyclicState = info->cyclicState;
if (!TESTBIT_STATE(&cyclics, cyclicState)) {
if (!TESTBIT_STATE(cyclics, cyclicState)) {
continue;
}
@ -310,14 +309,14 @@ void EXPIRE_ESTATE_FN(const IMPL_NFA_T *limex, struct CONTEXT_T *ctx,
last_top, repeat->repeatMax);
u64a adj = 0;
/* if the cycle's tugs are active at repeat max, it is still alive */
if (TESTBIT_STATE((const STATE_T *)&limex->accept, cyclicState) ||
TESTBIT_STATE((const STATE_T *)&limex->acceptAtEOD, cyclicState)) {
if (TESTBIT_STATE(LOAD_FROM_ENG(&limex->accept), cyclicState) ||
TESTBIT_STATE(LOAD_FROM_ENG(&limex->acceptAtEOD), cyclicState)) {
DEBUG_PRINTF("lazy tug possible - may still be inspected\n");
adj = 1;
} else {
const STATE_T *tug_mask =
(const STATE_T *)((const char *)info + info->tugMaskOffset);
if (ISNONZERO_STATE(AND_STATE(ctx->s, LOAD_STATE(tug_mask)))) {
const ENG_STATE_T *tug_mask =
(const ENG_STATE_T *)((const char *)info + info->tugMaskOffset);
if (ISNONZERO_STATE(AND_STATE(ctx->s, LOAD_FROM_ENG(tug_mask)))) {
DEBUG_PRINTF("tug possible - may still be inspected\n");
adj = 1;
}
@ -339,7 +338,7 @@ char LIMEX_INACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
u64a offset, ReportID report) {
assert(limex);
const STATE_T acceptMask = LOAD_STATE(&limex->accept);
const STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept);
STATE_T accstate = AND_STATE(state, acceptMask);
// Are we in an accept state?
@ -355,7 +354,7 @@ char LIMEX_INACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
#ifdef DEBUG
DEBUG_PRINTF("accept states that are on: ");
for (u32 i = 0; i < sizeof(STATE_T) * 8; i++) {
if (TESTBIT_STATE(&accstate, i)) printf("%u ", i);
if (TESTBIT_STATE(accstate, i)) printf("%u ", i);
}
printf("\n");
#endif
@ -366,7 +365,7 @@ char LIMEX_INACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
const struct NFAAccept *a = &acceptTable[i];
DEBUG_PRINTF("checking idx=%u, externalId=%u\n", a->state,
a->externalId);
if (a->externalId == report && TESTBIT_STATE(&accstate, a->state)) {
if (a->externalId == report && TESTBIT_STATE(accstate, a->state)) {
DEBUG_PRINTF("report is on!\n");
return 1;
}
@ -381,7 +380,7 @@ char LIMEX_INANYACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
u64a offset) {
assert(limex);
const STATE_T acceptMask = LOAD_STATE(&limex->accept);
const STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept);
STATE_T accstate = AND_STATE(state, acceptMask);
// Are we in an accept state?
@ -407,8 +406,6 @@ char LIMEX_INANYACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
#undef CONTEXT_T
#undef IMPL_NFA_T
#undef ONES_STATE
#undef LOAD_STATE
#undef STORE_STATE
#undef AND_STATE
#undef OR_STATE
#undef ANDNOT_STATE
@ -420,7 +417,3 @@ char LIMEX_INANYACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
#undef PROCESS_ACCEPTS_NOSQUASH_FN
#undef SQUASH_UNTUG_BR_FN
#undef GET_NFA_REPEAT_INFO_FN
#undef SIZE
#undef STATE_T
#undef INLINE_ATTR

View File

@ -1438,7 +1438,8 @@ struct Factory {
sizeof(limex->init), stateSize, repeatscratchStateSize,
repeatStreamState);
size_t scratchStateSize = sizeof(limex->init);
size_t scratchStateSize = NFATraits<dtype>::scratch_state_size;
if (repeatscratchStateSize) {
scratchStateSize
= ROUNDUP_N(scratchStateSize, alignof(RepeatControl));
@ -2021,13 +2022,6 @@ struct Factory {
sz = 32;
}
// Special case: with SIMD available, we definitely prefer using
// 128-bit NFAs over 64-bit ones given the paucity of registers
// available.
if (sz == 64) {
sz = 128;
}
if (args.cc.grey.nfaForceSize) {
sz = args.cc.grey.nfaForceSize;
}
@ -2067,9 +2061,12 @@ struct scoreNfa {
typedef u_##mlt_size tableRow_t; \
typedef NFAException##mlt_size exception_t; \
static const size_t maxStates = mlt_size; \
static const size_t scratch_state_size = mlt_size == 64 ? sizeof(m128) \
: sizeof(tableRow_t); \
};
MAKE_LIMEX_TRAITS(32)
MAKE_LIMEX_TRAITS(64)
MAKE_LIMEX_TRAITS(128)
MAKE_LIMEX_TRAITS(256)
MAKE_LIMEX_TRAITS(384)

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -39,6 +39,16 @@
// Runtime context structures.
/* Note: The size of the context structures may vary from platform to platform
* (notably, for the Limex64 structure). As a result, information based on the
* size and other detail of these structures should not be written into the
* bytecode -- really, the details of the structure should not be accessed by
* the ue2 compile side at all.
*/
#ifdef __cplusplus
#error ue2 runtime only file
#endif
/* cached_estate/esucc etc...
*
* If the exception state matches the cached_estate we will apply
@ -66,6 +76,11 @@ struct ALIGN_CL_DIRECTIVE NFAContext##nsize { \
};
GEN_CONTEXT_STRUCT(32, u32)
#ifdef ARCH_64_BIT
GEN_CONTEXT_STRUCT(64, u64a)
#else
GEN_CONTEXT_STRUCT(64, m128)
#endif
GEN_CONTEXT_STRUCT(128, m128)
GEN_CONTEXT_STRUCT(256, m256)
GEN_CONTEXT_STRUCT(384, m384)

View File

@ -70,6 +70,10 @@ template<> struct limex_traits<LimExNFA128> {
static const u32 size = 128;
typedef NFAException128 exception_type;
};
template<> struct limex_traits<LimExNFA64> {
static const u32 size = 64;
typedef NFAException64 exception_type;
};
template<> struct limex_traits<LimExNFA32> {
static const u32 size = 32;
typedef NFAException32 exception_type;
@ -486,6 +490,7 @@ void dumpLimDotInfo(const limex_type *limex, u32 state, FILE *f) {
DUMP_DOT_FN(size)
LIMEX_DUMP_FNS(32)
LIMEX_DUMP_FNS(64)
LIMEX_DUMP_FNS(128)
LIMEX_DUMP_FNS(256)
LIMEX_DUMP_FNS(384)

View File

@ -32,8 +32,8 @@
* X-macro generic impl, included into the various LimEx model implementations.
*/
#if !defined(SIZE) || !defined(STATE_T)
# error Must define SIZE and STATE_T in includer.
#if !defined(SIZE) || !defined(STATE_T) || !defined(LOAD_FROM_ENG)
# error Must define SIZE, STATE_T, LOAD_FROM_ENG in includer.
#endif
#include "config.h"
@ -44,8 +44,6 @@
#define PE_FN JOIN(processExceptional, SIZE)
#define RUN_EXCEPTION_FN JOIN(runException, SIZE)
#define ZERO_STATE JOIN(zero_, STATE_T)
#define LOAD_STATE JOIN(load_, STATE_T)
#define STORE_STATE JOIN(store_, STATE_T)
#define AND_STATE JOIN(and_, STATE_T)
#define EQ_STATE(a, b) (!JOIN(noteq_, STATE_T)((a), (b)))
#define OR_STATE JOIN(or_, STATE_T)
@ -59,7 +57,7 @@
#define ESTATE_ARG STATE_T estate
#else
#define ESTATE_ARG const STATE_T *estatep
#define estate LOAD_STATE(estatep)
#define estate (*estatep)
#endif
#ifdef STATE_ON_STACK
@ -133,7 +131,7 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG,
char *repeat_state = ctx->repeat_state + info->stateOffset;
if (e->trigger == LIMEX_TRIGGER_POS) {
char cyclic_on = TESTBIT_STATE(STATE_ARG_P, info->cyclicState);
char cyclic_on = TESTBIT_STATE(*STATE_ARG_P, info->cyclicState);
processPosTrigger(repeat, repeat_ctrl, repeat_state, offset,
cyclic_on);
*cacheable = DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES;
@ -149,8 +147,7 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG,
*cacheable = DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES;
DEBUG_PRINTF("stale history, squashing cyclic state\n");
assert(e->hasSquash == LIMEX_SQUASH_TUG);
STORE_STATE(succ, AND_STATE(LOAD_STATE(succ),
LOAD_STATE(&e->squash)));
*succ = AND_STATE(*succ, LOAD_FROM_ENG(&e->squash));
return 1; // continue
} else if (rv == TRIGGER_SUCCESS_CACHE) {
new_cache->br = 1;
@ -188,18 +185,16 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG,
// Most exceptions have a set of successors to switch on. `local_succ' is
// ORed into `succ' at the end of the caller's loop.
#ifndef BIG_MODEL
*local_succ = OR_STATE(*local_succ, LOAD_STATE(&e->successors));
*local_succ = OR_STATE(*local_succ, LOAD_FROM_ENG(&e->successors));
#else
STORE_STATE(&ctx->local_succ, OR_STATE(LOAD_STATE(&ctx->local_succ),
LOAD_STATE(&e->successors)));
ctx->local_succ = OR_STATE(ctx->local_succ, LOAD_FROM_ENG(&e->successors));
#endif
// Some exceptions squash states behind them. Note that we squash states in
// 'succ', not local_succ.
if (e->hasSquash == LIMEX_SQUASH_CYCLIC ||
e->hasSquash == LIMEX_SQUASH_REPORT) {
STORE_STATE(succ, AND_STATE(LOAD_STATE(succ),
LOAD_STATE(&e->squash)));
if (e->hasSquash == LIMEX_SQUASH_CYCLIC
|| e->hasSquash == LIMEX_SQUASH_REPORT) {
*succ = AND_STATE(*succ, LOAD_FROM_ENG(&e->squash));
if (*cacheable == CACHE_RESULT) {
*cacheable = DO_NOT_CACHE_RESULT;
}
@ -219,9 +214,9 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ,
char in_rev, char flags) {
assert(diffmask > 0); // guaranteed by caller macro
if (EQ_STATE(estate, LOAD_STATE(&ctx->cached_estate))) {
if (EQ_STATE(estate, ctx->cached_estate)) {
DEBUG_PRINTF("using cached succ from previous state\n");
STORE_STATE(succ, OR_STATE(LOAD_STATE(succ), LOAD_STATE(&ctx->cached_esucc)));
*succ = OR_STATE(*succ, ctx->cached_esucc);
if (ctx->cached_reports && (flags & CALLBACK_OUTPUT)) {
DEBUG_PRINTF("firing cached reports from previous state\n");
if (unlikely(limexRunReports(ctx->cached_reports, ctx->callback,
@ -236,7 +231,7 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ,
#ifndef BIG_MODEL
STATE_T local_succ = ZERO_STATE;
#else
STORE_STATE(&ctx->local_succ, ZERO_STATE);
ctx->local_succ = ZERO_STATE;
#endif
// A copy of the estate as an array of GPR-sized chunks.
@ -254,7 +249,7 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ,
u32 base_index[sizeof(STATE_T) / sizeof(CHUNK_T)];
base_index[0] = 0;
for (u32 i = 0; i < ARRAY_LENGTH(base_index) - 1; i++) {
for (s32 i = 0; i < (s32)ARRAY_LENGTH(base_index) - 1; i++) {
base_index[i + 1] = base_index[i] + POPCOUNT_FN(emask_chunks[i]);
}
@ -284,23 +279,23 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ,
} while (diffmask);
#ifndef BIG_MODEL
STORE_STATE(succ, OR_STATE(LOAD_STATE(succ), local_succ));
*succ = OR_STATE(*succ, local_succ);
#else
STORE_STATE(succ, OR_STATE(LOAD_STATE(succ), ctx->local_succ));
*succ = OR_STATE(*succ, ctx->local_succ);
#endif
if (cacheable == CACHE_RESULT) {
STORE_STATE(&ctx->cached_estate, estate);
ctx->cached_estate = estate;
#ifndef BIG_MODEL
ctx->cached_esucc = local_succ;
#else
STORE_STATE(&ctx->cached_esucc, LOAD_STATE(&ctx->local_succ));
ctx->cached_esucc = ctx->local_succ;
#endif
ctx->cached_reports = new_cache.reports;
ctx->cached_br = new_cache.br;
} else if (cacheable == DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES) {
if (ctx->cached_br) {
STORE_STATE(&ctx->cached_estate, ZERO_STATE);
ctx->cached_estate = ZERO_STATE;
}
}
@ -314,8 +309,6 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ,
#undef EQ_STATE
#undef OR_STATE
#undef TESTBIT_STATE
#undef LOAD_STATE
#undef STORE_STATE
#undef PE_FN
#undef RUN_EXCEPTION_FN
#undef CONTEXT_T
@ -337,7 +330,3 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ,
#undef FIND_AND_CLEAR_FN
#undef IMPL_NFA_T
#undef GET_NFA_REPEAT_INFO_FN
// Parameters.
#undef SIZE
#undef STATE_T

View File

@ -160,6 +160,7 @@ struct LimExNFA##size { \
};
CREATE_NFA_LIMEX(32)
CREATE_NFA_LIMEX(64)
CREATE_NFA_LIMEX(128)
CREATE_NFA_LIMEX(256)
CREATE_NFA_LIMEX(384)

View File

@ -49,12 +49,13 @@
#include "limex_runtime.h"
// Other implementation code from X-Macro impl.
#define SIZE 32
#define STATE_T u32
#define SIZE 32
#define STATE_T u32
#define ENG_STATE_T u32
#define LOAD_FROM_ENG load_u32
#include "limex_state_impl.h"
#define SIZE 32
#define STATE_T u32
#define INLINE_ATTR really_inline
#include "limex_common_impl.h"
@ -64,8 +65,6 @@
// Process exceptional states
#define SIZE 32
#define STATE_T u32
#define STATE_ON_STACK
#define ESTATE_ON_STACK
#define RUN_EXCEPTION_FN_ONLY
@ -128,7 +127,4 @@ int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ,
}
// 32-bit models.
#define SIZE 32
#define STATE_T u32
#include "limex_runtime_impl.h"

View File

@ -30,8 +30,8 @@
\brief Limex Execution Engine Or:
How I Learned To Stop Worrying And Love The Preprocessor
This file includes utility functions which do not depend on the state size or
shift masks directly.
This file includes utility functions which do not depend on the size of the
state or shift masks directly.
*/
#ifndef LIMEX_RUNTIME_H
@ -72,41 +72,6 @@ struct proto_cache {
const ReportID *reports;
};
// Shift macros for Limited NFAs. Defined in terms of uniform ops.
// LimExNFAxxx ptr in 'limex' and the current state in 's'
#define NFA_EXEC_LIM_SHIFT(nels_type, nels_i) \
(JOIN(lshift_, nels_type)( \
JOIN(and_, nels_type)(s, \
JOIN(load_, nels_type)(&limex->shift[nels_i])), \
limex->shiftAmount[nels_i]))
// Calculate the (limited model) successors for a number of variable shifts.
// Assumes current state in 's' and successors in 'succ'.
#define NFA_EXEC_GET_LIM_SUCC(gls_type) \
do { \
succ = NFA_EXEC_LIM_SHIFT(gls_type, 0); \
switch (limex->shiftCount) { \
case 8: \
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 7)); \
case 7: \
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 6)); \
case 6: \
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 5)); \
case 5: \
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 4)); \
case 4: \
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 3)); \
case 3: \
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 2)); \
case 2: \
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 1)); \
case 1: \
case 0: \
; \
} \
} while (0)
#define PE_RV_HALT 1
#ifdef STATE_ON_STACK
@ -170,6 +135,7 @@ int limexRunReports(const ReportID *reports, NfaCallback callback,
}
MAKE_GET_NFA_REPEAT_INFO(32)
MAKE_GET_NFA_REPEAT_INFO(64)
MAKE_GET_NFA_REPEAT_INFO(128)
MAKE_GET_NFA_REPEAT_INFO(256)
MAKE_GET_NFA_REPEAT_INFO(384)

View File

@ -29,7 +29,6 @@
#include "util/join.h"
#include <string.h>
/** \file
* \brief Limex Execution Engine Or:
* How I Learned To Stop Worrying And Love The Preprocessor
@ -37,8 +36,9 @@
* Version 2.0: now with X-Macros, so you get line numbers in your debugger.
*/
#if !defined(SIZE) || !defined(STATE_T)
# error Must define SIZE and STATE_T in includer.
#if !defined(SIZE) || !defined(STATE_T) || !defined(LOAD_FROM_ENG)
# error Must define SIZE, STATE_T, LOAD_FROM_ENG in includer.
#endif
#define LIMEX_API_ROOT JOIN(nfaExecLimEx, SIZE)
@ -67,11 +67,10 @@
#define STREAMSILENT_FN JOIN(LIMEX_API_ROOT, _Stream_Silent)
#define CONTEXT_T JOIN(NFAContext, SIZE)
#define EXCEPTION_T JOIN(struct NFAException, SIZE)
#define LOAD_STATE JOIN(load_, STATE_T)
#define STORE_STATE JOIN(store_, STATE_T)
#define AND_STATE JOIN(and_, STATE_T)
#define ANDNOT_STATE JOIN(andnot_, STATE_T)
#define OR_STATE JOIN(or_, STATE_T)
#define LSHIFT_STATE JOIN(lshift_, STATE_T)
#define TESTBIT_STATE JOIN(testbit_, STATE_T)
#define CLEARBIT_STATE JOIN(clearbit_, STATE_T)
#define ZERO_STATE JOIN(zero_, STATE_T)
@ -96,9 +95,9 @@
#define ACCEL_AND_FRIENDS_MASK accel_and_friendsMask
#define EXCEPTION_MASK exceptionMask
#else
#define ACCEL_MASK LOAD_STATE(&limex->accel)
#define ACCEL_AND_FRIENDS_MASK LOAD_STATE(&limex->accel_and_friends)
#define EXCEPTION_MASK LOAD_STATE(&limex->exceptionMask)
#define ACCEL_MASK LOAD_FROM_ENG(&limex->accel)
#define ACCEL_AND_FRIENDS_MASK LOAD_FROM_ENG(&limex->accel_and_friends)
#define EXCEPTION_MASK LOAD_FROM_ENG(&limex->exceptionMask)
#endif
// Run exception processing, if necessary. Returns 0 if scanning should
@ -117,13 +116,13 @@ char RUN_EXCEPTIONS_FN(const IMPL_NFA_T *limex, const EXCEPTION_T *exceptions,
}
if (first_match && i) {
STATE_T acceptMask = LOAD_STATE(&limex->accept);
STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept);
STATE_T foundAccepts = AND_STATE(s, acceptMask);
if (unlikely(ISNONZERO_STATE(foundAccepts))) {
DEBUG_PRINTF("first match at %zu\n", i);
DEBUG_PRINTF("for nfa %p\n", limex);
assert(final_loc);
STORE_STATE(&ctx->s, s);
ctx->s = s;
*final_loc = i;
return 1; // Halt matching.
}
@ -161,22 +160,56 @@ size_t RUN_ACCEL_FN(const STATE_T s, UNUSED const STATE_T accelMask,
return j;
}
// Shift macros for Limited NFAs. Defined in terms of uniform ops.
// LimExNFAxxx ptr in 'limex' and the current state in 's'
#define NFA_EXEC_LIM_SHIFT(limex_m, curr_m, shift_idx) \
LSHIFT_STATE(AND_STATE(curr_m, LOAD_FROM_ENG(&limex_m->shift[shift_idx])), \
limex_m->shiftAmount[shift_idx])
// Calculate the (limited model) successors for a number of variable shifts.
// Assumes current state in 'curr_m' and places the successors in 'succ_m'.
#define NFA_EXEC_GET_LIM_SUCC(limex_m, curr_m, succ_m) \
do { \
succ_m = NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 0); \
switch (limex_m->shiftCount) { \
case 8: \
succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 7)); \
case 7: \
succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 6)); \
case 6: \
succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 5)); \
case 5: \
succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 4)); \
case 4: \
succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 3)); \
case 3: \
succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 2)); \
case 2: \
succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 1)); \
case 1: \
case 0: \
; \
} \
} while (0)
static really_inline
char STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length,
struct CONTEXT_T *ctx, u64a offset, const char flags,
u64a *final_loc, const char first_match) {
const STATE_T *reach = (const STATE_T *)((const char *)limex + sizeof(*limex));
const ENG_STATE_T *reach = get_reach_table(limex);
#if SIZE < 256
const STATE_T accelMask = LOAD_STATE(&limex->accel);
const STATE_T accel_and_friendsMask = LOAD_STATE(&limex->accel_and_friends);
const STATE_T exceptionMask = LOAD_STATE(&limex->exceptionMask);
const STATE_T accelMask = LOAD_FROM_ENG(&limex->accel);
const STATE_T accel_and_friendsMask
= LOAD_FROM_ENG(&limex->accel_and_friends);
const STATE_T exceptionMask = LOAD_FROM_ENG(&limex->exceptionMask);
#endif
const u8 *accelTable = (const u8 *)((const char *)limex + limex->accelTableOffset);
const union AccelAux *accelAux =
(const union AccelAux *)((const char *)limex + limex->accelAuxOffset);
const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex);
const ReportID *exReports = getExReports(limex);
STATE_T s = LOAD_STATE(&ctx->s);
STATE_T s = ctx->s;
/* assert(ISALIGNED_16(exceptions)); */
/* assert(ISALIGNED_16(reach)); */
@ -195,13 +228,13 @@ without_accel:
DUMP_INPUT(i);
if (ISZERO_STATE(s)) {
DEBUG_PRINTF("no states are switched on, early exit\n");
STORE_STATE(&ctx->s, s);
ctx->s = s;
return MO_CONTINUE_MATCHING;
}
u8 c = input[i];
STATE_T succ;
NFA_EXEC_GET_LIM_SUCC(STATE_T);
NFA_EXEC_GET_LIM_SUCC(limex, s, succ);
if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, s, EXCEPTION_MASK,
i, offset, &succ, final_loc, ctx, flags, 0,
@ -209,7 +242,7 @@ without_accel:
return MO_HALT_MATCHING;
}
s = AND_STATE(succ, LOAD_STATE(&reach[limex->reachMap[c]]));
s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]]));
}
with_accel:
@ -252,7 +285,7 @@ with_accel:
u8 c = input[i];
STATE_T succ;
NFA_EXEC_GET_LIM_SUCC(STATE_T);
NFA_EXEC_GET_LIM_SUCC(limex, s, succ);
if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, s, EXCEPTION_MASK,
i, offset, &succ, final_loc, ctx, flags, 0,
@ -260,20 +293,20 @@ with_accel:
return MO_HALT_MATCHING;
}
s = AND_STATE(succ, LOAD_STATE(&reach[limex->reachMap[c]]));
s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]]));
}
STORE_STATE(&ctx->s, s);
ctx->s = s;
if ((first_match || (flags & CALLBACK_OUTPUT)) && limex->acceptCount) {
STATE_T acceptMask = LOAD_STATE(&limex->accept);
STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept);
const struct NFAAccept *acceptTable = getAcceptTable(limex);
const u32 acceptCount = limex->acceptCount;
STATE_T foundAccepts = AND_STATE(s, acceptMask);
if (unlikely(ISNONZERO_STATE(foundAccepts))) {
if (first_match) {
STORE_STATE(&ctx->s, s);
ctx->s = s;
assert(final_loc);
*final_loc = length;
return MO_HALT_MATCHING;
@ -294,13 +327,13 @@ with_accel:
static never_inline
char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length,
struct CONTEXT_T *ctx, u64a offset) {
const STATE_T *reach = (const STATE_T *)((const char *)limex + sizeof(*limex));
const ENG_STATE_T *reach = get_reach_table(limex);
#if SIZE < 256
const STATE_T exceptionMask = LOAD_STATE(&limex->exceptionMask);
const STATE_T exceptionMask = LOAD_FROM_ENG(&limex->exceptionMask);
#endif
const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex);
const ReportID *exReports = getExReports(limex);
STATE_T s = LOAD_STATE(&ctx->s);
STATE_T s = ctx->s;
/* assert(ISALIGNED_16(exceptions)); */
/* assert(ISALIGNED_16(reach)); */
@ -311,13 +344,13 @@ char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length,
DUMP_INPUT(i-1);
if (ISZERO_STATE(s)) {
DEBUG_PRINTF("no states are switched on, early exit\n");
STORE_STATE(&ctx->s, s);
ctx->s = s;
return MO_CONTINUE_MATCHING;
}
u8 c = input[i-1];
STATE_T succ;
NFA_EXEC_GET_LIM_SUCC(STATE_T);
NFA_EXEC_GET_LIM_SUCC(limex, s, succ);
if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, s,
EXCEPTION_MASK, i, offset, &succ, final_loc, ctx,
@ -325,12 +358,12 @@ char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length,
return MO_HALT_MATCHING;
}
s = AND_STATE(succ, reach[limex->reachMap[c]]);
s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]]));
}
STORE_STATE(&ctx->s, s);
ctx->s = s;
STATE_T acceptMask = LOAD_STATE(&limex->accept);
STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept);
const struct NFAAccept *acceptTable = getAcceptTable(limex);
const u32 acceptCount = limex->acceptCount;
assert(flags & CALLBACK_OUTPUT);
@ -354,9 +387,9 @@ void COMPRESS_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, void *src,
return;
}
STATE_T s = LOAD_STATE(src);
STATE_T s = *(STATE_T *)src;
if (ISZERO_STATE(AND_STATE(s, LOAD_STATE(&limex->repeatCyclicMask)))) {
if (ISZERO_STATE(AND_STATE(LOAD_FROM_ENG(&limex->repeatCyclicMask), s))) {
DEBUG_PRINTF("no cyclics are on\n");
return;
}
@ -369,7 +402,7 @@ void COMPRESS_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, void *src,
DEBUG_PRINTF("repeat %u\n", i);
const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i);
if (!TESTBIT_STATE(&s, info->cyclicState)) {
if (!TESTBIT_STATE(s, info->cyclicState)) {
DEBUG_PRINTF("is dead\n");
continue;
}
@ -388,7 +421,7 @@ void COMPRESS_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, void *src,
offset);
}
STORE_STATE(src, s);
*(STATE_T *)src = s;
}
char JOIN(LIMEX_API_ROOT, _queueCompressState)(const struct NFA *n,
@ -411,7 +444,7 @@ void EXPAND_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, const void *src,
// Note: state has already been expanded into 'dest'.
const STATE_T cyclics =
AND_STATE(LOAD_STATE(dest), LOAD_STATE(&limex->repeatCyclicMask));
AND_STATE(*(STATE_T *)dest, LOAD_FROM_ENG(&limex->repeatCyclicMask));
if (ISZERO_STATE(cyclics)) {
DEBUG_PRINTF("no cyclics are on\n");
return;
@ -425,7 +458,7 @@ void EXPAND_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, const void *src,
DEBUG_PRINTF("repeat %u\n", i);
const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i);
if (!TESTBIT_STATE(&cyclics, info->cyclicState)) {
if (!TESTBIT_STATE(cyclics, info->cyclicState)) {
DEBUG_PRINTF("is dead\n");
continue;
}
@ -447,9 +480,8 @@ char JOIN(LIMEX_API_ROOT, _expandState)(const struct NFA *n, void *dest,
return 0;
}
char JOIN(LIMEX_API_ROOT, _queueInitState)(const struct NFA *n,
struct mq *q) {
STORE_STATE(q->state, ZERO_STATE);
char JOIN(LIMEX_API_ROOT, _queueInitState)(const struct NFA *n, struct mq *q) {
*(STATE_T *)q->state = ZERO_STATE;
// Zero every bounded repeat control block in state.
const IMPL_NFA_T *limex = getImplNfa(n);
@ -529,7 +561,7 @@ void JOIN(LIMEX_API_ROOT, _HandleEvent)(const IMPL_NFA_T *limex,
u32 e = q->items[q->cur].type;
switch (e) {
DEFINE_CASE(MQE_TOP)
STORE_STATE(&ctx->s, TOP_FN(limex, !!sp, LOAD_STATE(&ctx->s)));
ctx->s = TOP_FN(limex, !!sp, ctx->s);
break;
DEFINE_CASE(MQE_START)
break;
@ -539,8 +571,7 @@ void JOIN(LIMEX_API_ROOT, _HandleEvent)(const IMPL_NFA_T *limex,
assert(e >= MQE_TOP_FIRST);
assert(e < MQE_INVALID);
DEBUG_PRINTF("MQE_TOP + %d\n", ((int)e - MQE_TOP_FIRST));
STORE_STATE(&ctx->s,
TOPN_FN(limex, LOAD_STATE(&ctx->s), e - MQE_TOP_FIRST));
ctx->s = TOPN_FN(limex, ctx->s, e - MQE_TOP_FIRST);
}
#undef DEFINE_CASE
}
@ -570,12 +601,12 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) {
ctx.repeat_state = q->streamState + limex->stateSize;
ctx.callback = q->cb;
ctx.context = q->context;
STORE_STATE(&ctx.cached_estate, ZERO_STATE);
ctx.cached_estate = ZERO_STATE;
ctx.cached_br = 0;
assert(q->items[q->cur].location >= 0);
DEBUG_PRINTF("LOAD STATE\n");
STORE_STATE(&ctx.s, LOAD_STATE(q->state));
ctx.s = *(STATE_T *)q->state;
assert(q->items[q->cur].type == MQE_START);
u64a offset = q->offset;
@ -599,7 +630,7 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) {
assert(ep - offset <= q->length);
if (STREAMCB_FN(limex, q->buffer + sp - offset, ep - sp, &ctx, sp)
== MO_HALT_MATCHING) {
STORE_STATE(q->state, ZERO_STATE);
*(STATE_T *)q->state = ZERO_STATE;
return 0;
}
@ -616,7 +647,7 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) {
q->items[q->cur].type = MQE_START;
q->items[q->cur].location = sp - offset;
DEBUG_PRINTF("bailing q->cur %u q->end %u\n", q->cur, q->end);
STORE_STATE(q->state, LOAD_STATE(&ctx.s));
*(STATE_T *)q->state = ctx.s;
return MO_ALIVE;
}
@ -628,7 +659,7 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) {
EXPIRE_ESTATE_FN(limex, &ctx, sp);
DEBUG_PRINTF("END\n");
STORE_STATE(q->state, LOAD_STATE(&ctx.s));
*(STATE_T *)q->state = ctx.s;
if (q->cur != q->end) {
q->cur--;
@ -637,7 +668,7 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) {
return MO_ALIVE;
}
return ISNONZERO_STATE(LOAD_STATE(&ctx.s));
return ISNONZERO_STATE(ctx.s);
}
/* used by suffix execution in Rose */
@ -665,11 +696,11 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) {
ctx.repeat_state = q->streamState + limex->stateSize;
ctx.callback = q->cb;
ctx.context = q->context;
STORE_STATE(&ctx.cached_estate, ZERO_STATE);
ctx.cached_estate = ZERO_STATE;
ctx.cached_br = 0;
DEBUG_PRINTF("LOAD STATE\n");
STORE_STATE(&ctx.s, LOAD_STATE(q->state));
ctx.s = *(STATE_T *)q->state;
assert(q->items[q->cur].type == MQE_START);
u64a offset = q->offset;
@ -699,7 +730,7 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) {
q->cur--;
q->items[q->cur].type = MQE_START;
q->items[q->cur].location = sp + final_look - offset;
STORE_STATE(q->state, LOAD_STATE(&ctx.s));
*(STATE_T *)q->state = ctx.s;
return MO_MATCHES_PENDING;
}
@ -721,7 +752,7 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) {
q->cur--;
q->items[q->cur].type = MQE_START;
q->items[q->cur].location = sp + final_look - offset;
STORE_STATE(q->state, LOAD_STATE(&ctx.s));
*(STATE_T *)q->state = ctx.s;
return MO_MATCHES_PENDING;
}
@ -737,7 +768,7 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) {
q->items[q->cur].type = MQE_START;
q->items[q->cur].location = sp - offset;
DEBUG_PRINTF("bailing q->cur %u q->end %u\n", q->cur, q->end);
STORE_STATE(q->state, LOAD_STATE(&ctx.s));
*(STATE_T *)q->state = ctx.s;
return MO_ALIVE;
}
@ -749,7 +780,7 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) {
EXPIRE_ESTATE_FN(limex, &ctx, sp);
DEBUG_PRINTF("END\n");
STORE_STATE(q->state, LOAD_STATE(&ctx.s));
*(STATE_T *)q->state = ctx.s;
if (q->cur != q->end) {
q->cur--;
@ -758,7 +789,7 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) {
return MO_ALIVE;
}
return ISNONZERO_STATE(LOAD_STATE(&ctx.s));
return ISNONZERO_STATE(ctx.s);
}
// Used for execution Rose prefix/infixes.
@ -777,11 +808,11 @@ char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q,
ctx.repeat_state = q->streamState + limex->stateSize;
ctx.callback = NULL;
ctx.context = NULL;
STORE_STATE(&ctx.cached_estate, ZERO_STATE);
ctx.cached_estate = ZERO_STATE;
ctx.cached_br = 0;
DEBUG_PRINTF("LOAD STATE\n");
STORE_STATE(&ctx.s, LOAD_STATE(q->state));
ctx.s = *(STATE_T *)q->state;
assert(q->items[q->cur].type == MQE_START);
u64a offset = q->offset;
@ -793,7 +824,7 @@ char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q,
if (n->maxWidth) {
if (ep - sp > n->maxWidth) {
sp = ep - n->maxWidth;
STORE_STATE(&ctx.s, INITIAL_FN(limex, !!sp));
ctx.s = INITIAL_FN(limex, !!sp);
}
}
assert(ep >= sp);
@ -832,14 +863,14 @@ char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q,
DEBUG_PRINTF("END, nfa is %s\n",
ISNONZERO_STATE(ctx.s) ? "still alive" : "dead");
STORE_STATE(q->state, LOAD_STATE(&ctx.s));
*(STATE_T *)q->state = ctx.s;
if (JOIN(limexInAccept, SIZE)(limex, LOAD_STATE(&ctx.s), ctx.repeat_ctrl,
if (JOIN(limexInAccept, SIZE)(limex, ctx.s, ctx.repeat_ctrl,
ctx.repeat_state, sp + 1, report)) {
return MO_MATCHES_PENDING;
}
return ISNONZERO_STATE(LOAD_STATE(&ctx.s));
return ISNONZERO_STATE(ctx.s);
}
char JOIN(LIMEX_API_ROOT, _testEOD)(const struct NFA *n, const char *state,
@ -875,11 +906,11 @@ char JOIN(LIMEX_API_ROOT, _B_Reverse)(const struct NFA *n, u64a offset,
ctx.repeat_state = NULL;
ctx.callback = cb;
ctx.context = context;
STORE_STATE(&ctx.cached_estate, ZERO_STATE);
ctx.cached_estate = ZERO_STATE;
ctx.cached_br = 0;
const IMPL_NFA_T *limex = getImplNfa(n);
STORE_STATE(&ctx.s, INITIAL_FN(limex, 0)); // always anchored
ctx.s = INITIAL_FN(limex, 0); // always anchored
// 'buf' may be null, for example when we're scanning at EOD time.
if (buflen) {
@ -896,7 +927,7 @@ char JOIN(LIMEX_API_ROOT, _B_Reverse)(const struct NFA *n, u64a offset,
REV_STREAM_FN(limex, hbuf, hlen, &ctx, offset);
}
if (offset == 0 && ISNONZERO_STATE(LOAD_STATE(&ctx.s))) {
if (offset == 0 && ISNONZERO_STATE(ctx.s)) {
TESTEOD_REV_FN(limex, &ctx.s, offset, cb, context);
}
@ -913,7 +944,7 @@ char JOIN(LIMEX_API_ROOT, _inAccept)(const struct NFA *nfa,
union RepeatControl *repeat_ctrl =
getRepeatControlBase(q->state, sizeof(STATE_T));
char *repeat_state = q->streamState + limex->stateSize;
STATE_T state = LOAD_STATE(q->state);
STATE_T state = *(STATE_T *)q->state;
u64a offset = q->offset + q_last_loc(q) + 1;
return JOIN(limexInAccept, SIZE)(limex, state, repeat_ctrl, repeat_state,
@ -928,7 +959,7 @@ char JOIN(LIMEX_API_ROOT, _inAnyAccept)(const struct NFA *nfa, struct mq *q) {
union RepeatControl *repeat_ctrl =
getRepeatControlBase(q->state, sizeof(STATE_T));
char *repeat_state = q->streamState + limex->stateSize;
STATE_T state = LOAD_STATE(q->state);
STATE_T state = *(STATE_T *)q->state;
u64a offset = q->offset + q_last_loc(q) + 1;
return JOIN(limexInAnyAccept, SIZE)(limex, state, repeat_ctrl, repeat_state,
@ -941,8 +972,8 @@ enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)(
s64a loc) {
assert(nfa->flags & NFA_ZOMBIE);
const IMPL_NFA_T *limex = getImplNfa(nfa);
STATE_T state = LOAD_STATE(q->state);
STATE_T zmask = LOAD_STATE(&limex->zombieMask);
STATE_T state = *(STATE_T *)q->state;
STATE_T zmask = LOAD_FROM_ENG(&limex->zombieMask);
if (limex->repeatCount) {
u64a offset = q->offset + loc + 1;
@ -981,11 +1012,10 @@ enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)(
#undef STREAMSILENT_FN
#undef CONTEXT_T
#undef EXCEPTION_T
#undef LOAD_STATE
#undef STORE_STATE
#undef AND_STATE
#undef ANDNOT_STATE
#undef OR_STATE
#undef LSHIFT_STATE
#undef TESTBIT_STATE
#undef CLEARBIT_STATE
#undef ZERO_STATE
@ -999,8 +1029,4 @@ enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)(
#undef ACCEL_MASK
#undef ACCEL_AND_FRIENDS_MASK
#undef EXCEPTION_MASK
// Parameters.
#undef SIZE
#undef STATE_T
#undef LIMEX_API_ROOT

View File

@ -48,19 +48,16 @@
#include "limex_runtime.h"
#define SIZE 128
#define STATE_T m128
#define SIZE 128
#define STATE_T m128
#define ENG_STATE_T m128
#define LOAD_FROM_ENG load_m128
#include "limex_exceptional.h"
#define SIZE 128
#define STATE_T m128
#include "limex_state_impl.h"
#define SIZE 128
#define STATE_T m128
#define INLINE_ATTR really_inline
#include "limex_common_impl.h"
#define SIZE 128
#define STATE_T m128
#include "limex_runtime_impl.h"

View File

@ -45,19 +45,16 @@
// Common code
#include "limex_runtime.h"
#define SIZE 256
#define STATE_T m256
#define SIZE 256
#define STATE_T m256
#define ENG_STATE_T m256
#define LOAD_FROM_ENG load_m256
#include "limex_exceptional.h"
#define SIZE 256
#define STATE_T m256
#include "limex_state_impl.h"
#define SIZE 256
#define STATE_T m256
#define INLINE_ATTR really_inline
#include "limex_common_impl.h"
#define SIZE 256
#define STATE_T m256
#include "limex_runtime_impl.h"

View File

@ -45,19 +45,16 @@
// Common code
#include "limex_runtime.h"
#define SIZE 384
#define STATE_T m384
#define SIZE 384
#define STATE_T m384
#define ENG_STATE_T m384
#define LOAD_FROM_ENG load_m384
#include "limex_exceptional.h"
#define SIZE 384
#define STATE_T m384
#include "limex_state_impl.h"
#define SIZE 384
#define STATE_T m384
#define INLINE_ATTR really_inline
#include "limex_common_impl.h"
#define SIZE 384
#define STATE_T m384
#include "limex_runtime_impl.h"

View File

@ -45,19 +45,16 @@
// Common code
#include "limex_runtime.h"
#define SIZE 512
#define STATE_T m512
#define SIZE 512
#define STATE_T m512
#define ENG_STATE_T m512
#define LOAD_FROM_ENG load_m512
#include "limex_exceptional.h"
#define SIZE 512
#define STATE_T m512
#include "limex_state_impl.h"
#define SIZE 512
#define STATE_T m512
#define INLINE_ATTR really_inline
#include "limex_common_impl.h"
#define SIZE 512
#define STATE_T m512
#include "limex_runtime_impl.h"

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -35,8 +35,8 @@
#include "util/state_compress.h"
#include <string.h>
#if !defined(SIZE) || !defined(STATE_T)
# error Must define SIZE and STATE_T in includer.
#if !defined(SIZE) || !defined(STATE_T) || !defined(LOAD_FROM_ENG)
# error Must define SIZE, STATE_T, LOAD_FROM_ENG in includer.
#endif
#define IMPL_NFA_T JOIN(struct LimExNFA, SIZE)
@ -44,29 +44,33 @@
#define REACHMASK_FN JOIN(moNfaReachMask, SIZE)
#define COMPRESS_FN JOIN(moNfaCompressState, SIZE)
#define EXPAND_FN JOIN(moNfaExpandState, SIZE)
#define COMPRESSED_STORE_FN JOIN(storecompressed, SIZE)
#define COMPRESSED_LOAD_FN JOIN(loadcompressed, SIZE)
#define COMPRESSED_STORE_FN JOIN(store_compressed_, STATE_T)
#define COMPRESSED_LOAD_FN JOIN(load_compressed_, STATE_T)
#define PARTIAL_STORE_FN JOIN(partial_store_, STATE_T)
#define PARTIAL_LOAD_FN JOIN(partial_load_, STATE_T)
#define LOAD_STATE JOIN(load_, STATE_T)
#define STORE_STATE JOIN(store_, STATE_T)
#define OR_STATE JOIN(or_, STATE_T)
#define AND_STATE JOIN(and_, STATE_T)
#define ISZERO_STATE JOIN(isZero_, STATE_T)
static really_inline
const STATE_T *REACHMASK_FN(const IMPL_NFA_T *limex, const u8 key) {
const STATE_T *reach
= (const STATE_T *)((const char *)limex + sizeof(*limex));
assert(ISALIGNED_N(reach, alignof(STATE_T)));
return &reach[limex->reachMap[key]];
const ENG_STATE_T *get_reach_table(const IMPL_NFA_T *limex) {
const ENG_STATE_T *reach
= (const ENG_STATE_T *)((const char *)limex + sizeof(*limex));
assert(ISALIGNED_N(reach, alignof(ENG_STATE_T)));
return reach;
}
static really_inline
STATE_T REACHMASK_FN(const IMPL_NFA_T *limex, const u8 key) {
const ENG_STATE_T *reach = get_reach_table(limex);
return LOAD_FROM_ENG(&reach[limex->reachMap[key]]);
}
static really_inline
void COMPRESS_FN(const IMPL_NFA_T *limex, u8 *dest, const STATE_T *src,
u8 key) {
assert(ISALIGNED_N(src, alignof(STATE_T)));
STATE_T a_src = LOAD_STATE(src);
STATE_T a_src = *src;
DEBUG_PRINTF("compress state: %p -> %p\n", src, dest);
@ -77,31 +81,30 @@ void COMPRESS_FN(const IMPL_NFA_T *limex, u8 *dest, const STATE_T *src,
} else {
DEBUG_PRINTF("compress state, key=%hhx\n", key);
const STATE_T *reachmask = REACHMASK_FN(limex, key);
STATE_T reachmask = REACHMASK_FN(limex, key);
// Masked compression means that we mask off the initDs states and
// provide a shortcut for the all-zeroes case. Note that these must be
// switched on in the EXPAND call below.
if (limex->flags & LIMEX_FLAG_COMPRESS_MASKED) {
STATE_T s = AND_STATE(LOAD_STATE(&limex->compressMask), a_src);
STATE_T s = AND_STATE(LOAD_FROM_ENG(&limex->compressMask), a_src);
if (ISZERO_STATE(s)) {
DEBUG_PRINTF("after compression mask, all states are zero\n");
memset(dest, 0, limex->stateSize);
return;
}
STATE_T mask = AND_STATE(LOAD_STATE(&limex->compressMask),
LOAD_STATE(reachmask));
STATE_T mask = AND_STATE(LOAD_FROM_ENG(&limex->compressMask),
reachmask);
COMPRESSED_STORE_FN(dest, &s, &mask, limex->stateSize);
} else {
COMPRESSED_STORE_FN(dest, src, reachmask, limex->stateSize);
COMPRESSED_STORE_FN(dest, src, &reachmask, limex->stateSize);
}
}
}
static really_inline
void EXPAND_FN(const IMPL_NFA_T *limex, STATE_T *dest, const u8 *src,
u8 key) {
void EXPAND_FN(const IMPL_NFA_T *limex, STATE_T *dest, const u8 *src, u8 key) {
assert(ISALIGNED_N(dest, alignof(STATE_T)));
DEBUG_PRINTF("expand state: %p -> %p\n", src, dest);
@ -111,16 +114,15 @@ void EXPAND_FN(const IMPL_NFA_T *limex, STATE_T *dest, const u8 *src,
*dest = PARTIAL_LOAD_FN(src, limex->stateSize);
} else {
DEBUG_PRINTF("expand state, key=%hhx\n", key);
const STATE_T *reachmask = REACHMASK_FN(limex, key);
STATE_T reachmask = REACHMASK_FN(limex, key);
if (limex->flags & LIMEX_FLAG_COMPRESS_MASKED) {
STATE_T mask = AND_STATE(LOAD_STATE(&limex->compressMask),
LOAD_STATE(reachmask));
STATE_T mask = AND_STATE(LOAD_FROM_ENG(&limex->compressMask),
reachmask);
COMPRESSED_LOAD_FN(dest, src, &mask, limex->stateSize);
STORE_STATE(dest, OR_STATE(LOAD_STATE(&limex->initDS),
LOAD_STATE(dest)));
*dest = OR_STATE(LOAD_FROM_ENG(&limex->initDS), *dest);
} else {
COMPRESSED_LOAD_FN(dest, src, reachmask, limex->stateSize);
COMPRESSED_LOAD_FN(dest, src, &reachmask, limex->stateSize);
}
}
}
@ -134,11 +136,6 @@ void EXPAND_FN(const IMPL_NFA_T *limex, STATE_T *dest, const u8 *src,
#undef COMPRESSED_LOAD_FN
#undef PARTIAL_STORE_FN
#undef PARTIAL_LOAD_FN
#undef LOAD_STATE
#undef STORE_STATE
#undef OR_STATE
#undef AND_STATE
#undef ISZERO_STATE
#undef SIZE
#undef STATE_T

View File

@ -55,6 +55,7 @@
#define DISPATCH_BY_NFA_TYPE(dbnt_func) \
switch (nfa->type) { \
DISPATCH_CASE(LIMEX, LimEx, 32, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 64, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 128, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 256, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 384, dbnt_func); \

View File

@ -170,17 +170,16 @@ enum NFACategory {NFA_LIMEX, NFA_OTHER};
#define DO_IF_DUMP_SUPPORT(a)
#endif
#define MAKE_LIMEX_TRAITS(mlt_size) \
#define MAKE_LIMEX_TRAITS(mlt_size, mlt_align) \
template<> struct NFATraits<LIMEX_NFA_##mlt_size> { \
static UNUSED const char *name; \
static const NFACategory category = NFA_LIMEX; \
typedef LimExNFA##mlt_size implNFA_t; \
typedef u_##mlt_size tableRow_t; \
static const nfa_dispatch_fn has_accel; \
static const nfa_dispatch_fn has_repeats; \
static const nfa_dispatch_fn has_repeats_other_than_firsts; \
static const u32 stateAlign = \
MAX(alignof(tableRow_t), alignof(RepeatControl)); \
MAX(mlt_align, alignof(RepeatControl)); \
static const bool fast = mlt_size <= 64; \
}; \
const nfa_dispatch_fn NFATraits<LIMEX_NFA_##mlt_size>::has_accel \
@ -194,16 +193,17 @@ enum NFACategory {NFA_LIMEX, NFA_OTHER};
const char *NFATraits<LIMEX_NFA_##mlt_size>::name \
= "LimEx "#mlt_size; \
template<> struct getDescription<LIMEX_NFA_##mlt_size> { \
static string call(const void *ptr) { \
return getDescriptionLimEx<LIMEX_NFA_##mlt_size>((const NFA *)ptr); \
} \
static string call(const void *p) { \
return getDescriptionLimEx<LIMEX_NFA_##mlt_size>((const NFA *)p); \
} \
};)
MAKE_LIMEX_TRAITS(32)
MAKE_LIMEX_TRAITS(128)
MAKE_LIMEX_TRAITS(256)
MAKE_LIMEX_TRAITS(384)
MAKE_LIMEX_TRAITS(512)
MAKE_LIMEX_TRAITS(32, alignof(u32))
MAKE_LIMEX_TRAITS(64, alignof(m128)) /* special, 32bit arch uses m128 */
MAKE_LIMEX_TRAITS(128, alignof(m128))
MAKE_LIMEX_TRAITS(256, alignof(m256))
MAKE_LIMEX_TRAITS(384, alignof(m384))
MAKE_LIMEX_TRAITS(512, alignof(m512))
template<> struct NFATraits<MCCLELLAN_NFA_8> {
UNUSED static const char *name;

View File

@ -60,6 +60,7 @@ namespace ue2 {
DEBUG_PRINTF("dispatch for NFA type %u\n", nfa->type); \
switch (nfa->type) { \
DISPATCH_CASE(LIMEX, LimEx, 32, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 64, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 128, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 256, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 384, dbnt_func); \

View File

@ -52,6 +52,7 @@ extern "C"
enum NFAEngineType {
LIMEX_NFA_32,
LIMEX_NFA_64,
LIMEX_NFA_128,
LIMEX_NFA_256,
LIMEX_NFA_384,
@ -164,6 +165,7 @@ static really_inline int isDfaType(u8 t) {
static really_inline int isNfaType(u8 t) {
switch (t) {
case LIMEX_NFA_32:
case LIMEX_NFA_64:
case LIMEX_NFA_128:
case LIMEX_NFA_256:
case LIMEX_NFA_384:

View File

@ -173,6 +173,12 @@ static really_inline u64a movq(const m128 in) {
#endif
}
/* another form of movq */
static really_inline
m128 load_m128_from_u64a(const u64a *p) {
return _mm_loadl_epi64((const m128 *)p);
}
#define rshiftbyte_m128(a, count_immed) _mm_srli_si128(a, count_immed)
#define lshiftbyte_m128(a, count_immed) _mm_slli_si128(a, count_immed)
@ -270,12 +276,12 @@ void clearbit128(m128 *ptr, unsigned int n) {
// tests bit N in the given vector.
static really_inline
char testbit128(const m128 *ptr, unsigned int n) {
char testbit128(m128 val, unsigned int n) {
const m128 mask = mask1bit128(n);
#if defined(__SSE4_1__)
return !_mm_testz_si128(mask, *ptr);
return !_mm_testz_si128(mask, val);
#else
return isnonzero128(and128(mask, *ptr));
return isnonzero128(and128(mask, val));
#endif
}
@ -606,13 +612,13 @@ void clearbit256(m256 *ptr, unsigned int n) {
// tests bit N in the given vector.
static really_inline
char testbit256(const m256 *ptr, unsigned int n) {
assert(n < sizeof(*ptr) * 8);
const m128 *sub;
char testbit256(m256 val, unsigned int n) {
assert(n < sizeof(val) * 8);
m128 sub;
if (n < 128) {
sub = &ptr->lo;
sub = val.lo;
} else {
sub = &ptr->hi;
sub = val.hi;
n -= 128;
}
return testbit128(sub, n);
@ -633,9 +639,9 @@ void clearbit256(m256 *ptr, unsigned int n) {
// tests bit N in the given vector.
static really_inline
char testbit256(const m256 *ptr, unsigned int n) {
char testbit256(m256 val, unsigned int n) {
const m256 mask = mask1bit256(n);
return !_mm256_testz_si256(mask, *ptr);
return !_mm256_testz_si256(mask, val);
}
static really_really_inline
@ -827,15 +833,15 @@ void clearbit384(m384 *ptr, unsigned int n) {
// tests bit N in the given vector.
static really_inline
char testbit384(const m384 *ptr, unsigned int n) {
assert(n < sizeof(*ptr) * 8);
const m128 *sub;
char testbit384(m384 val, unsigned int n) {
assert(n < sizeof(val) * 8);
m128 sub;
if (n < 128) {
sub = &ptr->lo;
sub = val.lo;
} else if (n < 256) {
sub = &ptr->mid;
sub = val.mid;
} else {
sub = &ptr->hi;
sub = val.hi;
}
return testbit128(sub, n % 128);
}
@ -1040,26 +1046,26 @@ void clearbit512(m512 *ptr, unsigned int n) {
// tests bit N in the given vector.
static really_inline
char testbit512(const m512 *ptr, unsigned int n) {
assert(n < sizeof(*ptr) * 8);
char testbit512(m512 val, unsigned int n) {
assert(n < sizeof(val) * 8);
#if !defined(__AVX2__)
const m128 *sub;
m128 sub;
if (n < 128) {
sub = &ptr->lo.lo;
sub = val.lo.lo;
} else if (n < 256) {
sub = &ptr->lo.hi;
sub = val.lo.hi;
} else if (n < 384) {
sub = &ptr->hi.lo;
sub = val.hi.lo;
} else {
sub = &ptr->hi.hi;
sub = val.hi.hi;
}
return testbit128(sub, n % 128);
#else
const m256 *sub;
m256 sub;
if (n < 256) {
sub = &ptr->lo;
sub = val.lo;
} else {
sub = &ptr->hi;
sub = val.hi;
n -= 256;
}
return testbit256(sub, n);

View File

@ -180,44 +180,52 @@
#define partial_load_m384(ptr, sz) loadbytes384(ptr, sz)
#define partial_load_m512(ptr, sz) loadbytes512(ptr, sz)
#define store_compressed_u32(ptr, x, m) storecompressed32(ptr, x, m)
#define store_compressed_u64a(ptr, x, m) storecompressed64(ptr, x, m)
#define store_compressed_m128(ptr, x, m) storecompressed128(ptr, x, m)
#define store_compressed_m256(ptr, x, m) storecompressed256(ptr, x, m)
#define store_compressed_m384(ptr, x, m) storecompressed384(ptr, x, m)
#define store_compressed_m512(ptr, x, m) storecompressed512(ptr, x, m)
#define store_compressed_u32(ptr, x, m, len) storecompressed32(ptr, x, m, len)
#define store_compressed_u64a(ptr, x, m, len) storecompressed64(ptr, x, m, len)
#define store_compressed_m128(ptr, x, m, len) storecompressed128(ptr, x, m, len)
#define store_compressed_m256(ptr, x, m, len) storecompressed256(ptr, x, m, len)
#define store_compressed_m384(ptr, x, m, len) storecompressed384(ptr, x, m, len)
#define store_compressed_m512(ptr, x, m, len) storecompressed512(ptr, x, m, len)
#define load_compressed_u32(x, ptr, m) loadcompressed32(x, ptr, m)
#define load_compressed_u64a(x, ptr, m) loadcompressed64(x, ptr, m)
#define load_compressed_m128(x, ptr, m) loadcompressed128(x, ptr, m)
#define load_compressed_m256(x, ptr, m) loadcompressed256(x, ptr, m)
#define load_compressed_m384(x, ptr, m) loadcompressed384(x, ptr, m)
#define load_compressed_m512(x, ptr, m) loadcompressed512(x, ptr, m)
#define load_compressed_u32(x, ptr, m, len) loadcompressed32(x, ptr, m, len)
#define load_compressed_u64a(x, ptr, m, len) loadcompressed64(x, ptr, m, len)
#define load_compressed_m128(x, ptr, m, len) loadcompressed128(x, ptr, m, len)
#define load_compressed_m256(x, ptr, m, len) loadcompressed256(x, ptr, m, len)
#define load_compressed_m384(x, ptr, m, len) loadcompressed384(x, ptr, m, len)
#define load_compressed_m512(x, ptr, m, len) loadcompressed512(x, ptr, m, len)
static really_inline void clearbit_u32(u32 *p, u32 n) {
static really_inline
void clearbit_u32(u32 *p, u32 n) {
assert(n < sizeof(*p) * 8);
*p &= ~(1U << n);
}
static really_inline void clearbit_u64a(u64a *p, u32 n) {
static really_inline
void clearbit_u64a(u64a *p, u32 n) {
assert(n < sizeof(*p) * 8);
*p &= ~(1ULL << n);
}
#define clearbit_m128(ptr, n) (clearbit128(ptr, n))
#define clearbit_m256(ptr, n) (clearbit256(ptr, n))
#define clearbit_m384(ptr, n) (clearbit384(ptr, n))
#define clearbit_m512(ptr, n) (clearbit512(ptr, n))
static really_inline char testbit_u32(const u32 *p, u32 n) {
assert(n < sizeof(*p) * 8);
return !!(*p & (1U << n));
static really_inline
char testbit_u32(u32 val, u32 n) {
assert(n < sizeof(val) * 8);
return !!(val & (1U << n));
}
static really_inline char testbit_u64a(const u64a *p, u32 n) {
assert(n < sizeof(*p) * 8);
return !!(*p & (1ULL << n));
static really_inline
char testbit_u64a(u64a val, u32 n) {
assert(n < sizeof(val) * 8);
return !!(val & (1ULL << n));
}
#define testbit_m128(ptr, n) (testbit128(ptr, n))
#define testbit_m256(ptr, n) (testbit256(ptr, n))
#define testbit_m384(ptr, n) (testbit384(ptr, n))
#define testbit_m512(ptr, n) (testbit512(ptr, n))
#define testbit_m128(val, n) (testbit128(val, n))
#define testbit_m256(val, n) (testbit256(val, n))
#define testbit_m384(val, n) (testbit384(val, n))
#define testbit_m512(val, n) (testbit512(val, n))
#endif

View File

@ -31,7 +31,6 @@
#include "grey.h"
#include "compiler/compiler.h"
#include "nfa/limex_context.h"
#include "nfa/limex_internal.h"
#include "nfa/nfa_api.h"
#include "nfa/nfa_api_util.h"
@ -167,11 +166,10 @@ TEST_P(LimExModelTest, QueueExec) {
TEST_P(LimExModelTest, CompressExpand) {
ASSERT_TRUE(nfa != nullptr);
// 64-bit NFAs assume during compression that they have >= 5 bytes of
// compressed NFA state, which isn't true for our 8-state test pattern. We
// skip this test for just these models.
if (nfa->scratchStateSize == 8) {
return;
u32 real_state_size = nfa->scratchStateSize;
/* Only look at 8 bytes for limex 64 (rather than the padding) */
if (nfa->type == LIMEX_NFA_64) {
real_state_size = sizeof(u64a);
}
initQueue();
@ -195,8 +193,7 @@ TEST_P(LimExModelTest, CompressExpand) {
memset(dest, 0xff, nfa->scratchStateSize);
nfaExpandState(nfa.get(), dest, q.streamState, q.offset,
queue_prev_byte(&q, end));
ASSERT_TRUE(std::equal(dest, dest + nfa->scratchStateSize,
full_state.get()));
ASSERT_TRUE(std::equal(dest, dest + real_state_size, full_state.get()));
}
TEST_P(LimExModelTest, InitCompressedState0) {

View File

@ -110,10 +110,10 @@ void simd_setbit(m128 *a, unsigned int i) { return setbit128(a, i); }
void simd_setbit(m256 *a, unsigned int i) { return setbit256(a, i); }
void simd_setbit(m384 *a, unsigned int i) { return setbit384(a, i); }
void simd_setbit(m512 *a, unsigned int i) { return setbit512(a, i); }
bool simd_testbit(const m128 *a, unsigned int i) { return testbit128(a, i); }
bool simd_testbit(const m256 *a, unsigned int i) { return testbit256(a, i); }
bool simd_testbit(const m384 *a, unsigned int i) { return testbit384(a, i); }
bool simd_testbit(const m512 *a, unsigned int i) { return testbit512(a, i); }
bool simd_testbit(const m128 &a, unsigned int i) { return testbit128(a, i); }
bool simd_testbit(const m256 &a, unsigned int i) { return testbit256(a, i); }
bool simd_testbit(const m384 &a, unsigned int i) { return testbit384(a, i); }
bool simd_testbit(const m512 &a, unsigned int i) { return testbit512(a, i); }
u32 simd_diffrich(const m128 &a, const m128 &b) { return diffrich128(a, b); }
u32 simd_diffrich(const m256 &a, const m256 &b) { return diffrich256(a, b); }
u32 simd_diffrich(const m384 &a, const m384 &b) { return diffrich384(a, b); }
@ -419,15 +419,15 @@ TYPED_TEST(SimdUtilsTest, testbit) {
// First, all bits are on in 'ones'.
for (unsigned int i = 0; i < total_bits; i++) {
ASSERT_EQ(1, simd_testbit(&ones, i)) << "bit " << i << " is on";
ASSERT_EQ(1, simd_testbit(ones, i)) << "bit " << i << " is on";
}
// Try individual bits; only 'i' should be on.
for (unsigned int i = 0; i < total_bits; i++) {
TypeParam a = setbit<TypeParam>(i);
for (unsigned int j = 0; j < total_bits; j++) {
ASSERT_EQ(i == j ? 1 : 0, simd_testbit(&a, j)) << "bit " << i
<< " is wrong";
ASSERT_EQ(i == j ? 1 : 0, simd_testbit(a, j)) << "bit " << i
<< " is wrong";
}
}
}
@ -470,7 +470,7 @@ TYPED_TEST(SimdUtilsTest, diffrich) {
// and nothing is on in zeroes
for (unsigned int i = 0; i < total_bits; i++) {
ASSERT_EQ(0, simd_testbit(&zeroes, i)) << "bit " << i << " is off";
ASSERT_EQ(0, simd_testbit(zeroes, i)) << "bit " << i << " is off";
}
// All-zeroes and all-ones differ in all words

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -156,26 +156,26 @@ TEST(Uniform, loadstore_m512) {
TEST(Uniform, testbit_u32) {
for (u32 i = 0; i < 32; i++) {
u32 v = 0;
EXPECT_EQ((char)0, testbit_u32(&v, i));
EXPECT_EQ((char)0, testbit_u32(v, i));
v |= 1ULL << i;
EXPECT_EQ((char)1, testbit_u32(&v, i));
EXPECT_EQ((char)1, testbit_u32(v, i));
v = ~v;
EXPECT_EQ((char)0, testbit_u32(&v, i));
EXPECT_EQ((char)0, testbit_u32(v, i));
v |= 1ULL << i;
EXPECT_EQ((char)1, testbit_u32(&v, i));
EXPECT_EQ((char)1, testbit_u32(v, i));
}
}
TEST(Uniform, testbit_u64a) {
for (u32 i = 0; i < 64; i++) {
u64a v = 0;
EXPECT_EQ((char)0, testbit_u64a(&v, i));
EXPECT_EQ((char)0, testbit_u64a(v, i));
v |= 1ULL << i;
EXPECT_EQ((char)1, testbit_u64a(&v, i));
EXPECT_EQ((char)1, testbit_u64a(v, i));
v = ~v;
EXPECT_EQ((char)0, testbit_u64a(&v, i));
EXPECT_EQ((char)0, testbit_u64a(v, i));
v |= 1ULL << i;
EXPECT_EQ((char)1, testbit_u64a(&v, i));
EXPECT_EQ((char)1, testbit_u64a(v, i));
}
}
@ -183,7 +183,7 @@ TEST(Uniform, clearbit_u32) {
for (u32 i = 0; i < 32; i++) {
u32 v = ~0U;
clearbit_u32(&v, i);
EXPECT_EQ((char)0, testbit_u32(&v, i));
EXPECT_EQ((char)0, testbit_u32(v, i));
v = ~v;
clearbit_u32(&v, i);
EXPECT_EQ(0U, v);
@ -194,7 +194,7 @@ TEST(Uniform, clearbit_u64a) {
for (u32 i = 0; i < 64; i++) {
u64a v = ~0ULL;
clearbit_u64a(&v, i);
EXPECT_EQ((char)0, testbit_u64a(&v, i));
EXPECT_EQ((char)0, testbit_u64a(v, i));
v = ~v;
clearbit_u64a(&v, i);
EXPECT_EQ(0ULL, v);