mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Limex: exception handling with AVX512
This commit is contained in:
parent
001b7824d2
commit
5f930b267c
@ -1922,7 +1922,8 @@ struct Factory {
|
||||
}
|
||||
|
||||
static
|
||||
void writeExceptions(const map<ExceptionProto, vector<u32>> &exceptionMap,
|
||||
void writeExceptions(const build_info &args,
|
||||
const map<ExceptionProto, vector<u32>> &exceptionMap,
|
||||
const vector<u32> &repeatOffsets, implNFA_t *limex,
|
||||
const u32 exceptionsOffset,
|
||||
const u32 reportListOffset) {
|
||||
@ -1974,6 +1975,59 @@ struct Factory {
|
||||
|
||||
limex->exceptionOffset = exceptionsOffset;
|
||||
limex->exceptionCount = ecount;
|
||||
|
||||
if (args.num_states > 64 && args.cc.target_info.has_avx512vbmi()) {
|
||||
const u8 *exceptionMask = (const u8 *)(&limex->exceptionMask);
|
||||
u8 *shufMask = (u8 *)&limex->exceptionShufMask;
|
||||
u8 *bitMask = (u8 *)&limex->exceptionBitMask;
|
||||
u8 *andMask = (u8 *)&limex->exceptionAndMask;
|
||||
|
||||
u32 tot_cnt = 0;
|
||||
u32 pos = 0;
|
||||
bool valid = true;
|
||||
size_t tot = sizeof(limex->exceptionMask);
|
||||
size_t base = 0;
|
||||
|
||||
// We normally have up to 64 exceptions to handle,
|
||||
// but treat 384 state Limex differently to simplify operations
|
||||
size_t limit = 64;
|
||||
if (args.num_states > 256 && args.num_states <= 384) {
|
||||
limit = 48;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < tot; i++) {
|
||||
if (!exceptionMask[i]) {
|
||||
continue;
|
||||
}
|
||||
u32 bit_cnt = popcount32(exceptionMask[i]);
|
||||
|
||||
tot_cnt += bit_cnt;
|
||||
if (tot_cnt > limit) {
|
||||
valid = false;
|
||||
break;
|
||||
}
|
||||
|
||||
u32 emsk = exceptionMask[i];
|
||||
while (emsk) {
|
||||
u32 t = findAndClearLSB_32(&emsk);
|
||||
bitMask[pos] = 1U << t;
|
||||
andMask[pos] = 1U << t;
|
||||
shufMask[pos++] = i + base;
|
||||
|
||||
if (pos == 32 &&
|
||||
(args.num_states > 128 && args.num_states <= 256)) {
|
||||
base += 32;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Avoid matching unused bytes
|
||||
for (u32 i = pos; i < 64; i++) {
|
||||
bitMask[i] = 0xff;
|
||||
}
|
||||
if (valid) {
|
||||
setLimexFlag(limex, LIMEX_FLAG_EXTRACT_EXP);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
@ -2299,7 +2353,7 @@ struct Factory {
|
||||
writeRepeats(repeats, repeatOffsets, limex, repeatOffsetsOffset,
|
||||
repeatsOffset);
|
||||
|
||||
writeExceptions(exceptionMap, repeatOffsets, limex, exceptionsOffset,
|
||||
writeExceptions(args, exceptionMap, repeatOffsets, limex, exceptionsOffset,
|
||||
reportListOffset);
|
||||
|
||||
writeLimexMasks(args, limex);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2020, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -47,6 +47,8 @@
|
||||
#define AND_STATE JOIN(and_, STATE_T)
|
||||
#define EQ_STATE(a, b) (!JOIN(noteq_, STATE_T)((a), (b)))
|
||||
#define OR_STATE JOIN(or_, STATE_T)
|
||||
#define EXPAND_STATE JOIN(expand_, STATE_T)
|
||||
#define SHUFFLE_BYTE_STATE JOIN(shuffle_byte_, STATE_T)
|
||||
#define TESTBIT_STATE JOIN(testbit_, STATE_T)
|
||||
#define EXCEPTION_T JOIN(struct NFAException, SIZE)
|
||||
#define CONTEXT_T JOIN(NFAContext, SIZE)
|
||||
@ -208,7 +210,7 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG,
|
||||
/** \brief Process all of the exceptions associated with the states in the \a
|
||||
* estate. */
|
||||
static really_inline
|
||||
int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ,
|
||||
int PE_FN(STATE_ARG, ESTATE_ARG, UNUSED u32 diffmask, STATE_T *succ,
|
||||
const struct IMPL_NFA_T *limex, const EXCEPTION_T *exceptions,
|
||||
u64a offset, struct CONTEXT_T *ctx, char in_rev, char flags) {
|
||||
assert(diffmask > 0); // guaranteed by caller macro
|
||||
@ -233,6 +235,72 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ,
|
||||
ctx->local_succ = ZERO_STATE;
|
||||
#endif
|
||||
|
||||
struct proto_cache new_cache = {0, NULL};
|
||||
enum CacheResult cacheable = CACHE_RESULT;
|
||||
|
||||
#if defined(HAVE_AVX512VBMI) && SIZE > 64
|
||||
if (likely(limex->flags & LIMEX_FLAG_EXTRACT_EXP)) {
|
||||
m512 emask = EXPAND_STATE(*STATE_ARG_P);
|
||||
emask = SHUFFLE_BYTE_STATE(load_m512(&limex->exceptionShufMask), emask);
|
||||
emask = and512(emask, load_m512(&limex->exceptionAndMask));
|
||||
u64a word = eq512mask(emask, load_m512(&limex->exceptionBitMask));
|
||||
|
||||
do {
|
||||
u32 bit = FIND_AND_CLEAR_FN(&word);
|
||||
const EXCEPTION_T *e = &exceptions[bit];
|
||||
|
||||
if (!RUN_EXCEPTION_FN(e, STATE_ARG_NAME, succ,
|
||||
#ifndef BIG_MODEL
|
||||
&local_succ,
|
||||
#endif
|
||||
limex, offset, ctx, &new_cache, &cacheable,
|
||||
in_rev, flags)) {
|
||||
return PE_RV_HALT;
|
||||
}
|
||||
} while (word);
|
||||
} else {
|
||||
// A copy of the estate as an array of GPR-sized chunks.
|
||||
CHUNK_T chunks[sizeof(STATE_T) / sizeof(CHUNK_T)];
|
||||
CHUNK_T emask_chunks[sizeof(STATE_T) / sizeof(CHUNK_T)];
|
||||
#ifdef ESTATE_ON_STACK
|
||||
memcpy(chunks, &estate, sizeof(STATE_T));
|
||||
#else
|
||||
memcpy(chunks, estatep, sizeof(STATE_T));
|
||||
#endif
|
||||
memcpy(emask_chunks, &limex->exceptionMask, sizeof(STATE_T));
|
||||
|
||||
u32 base_index[sizeof(STATE_T) / sizeof(CHUNK_T)];
|
||||
base_index[0] = 0;
|
||||
for (s32 i = 0; i < (s32)ARRAY_LENGTH(base_index) - 1; i++) {
|
||||
base_index[i + 1] = base_index[i] + POPCOUNT_FN(emask_chunks[i]);
|
||||
}
|
||||
|
||||
do {
|
||||
u32 t = findAndClearLSB_32(&diffmask);
|
||||
#ifdef ARCH_64_BIT
|
||||
t >>= 1; // Due to diffmask64, which leaves holes in the bitmask.
|
||||
#endif
|
||||
assert(t < ARRAY_LENGTH(chunks));
|
||||
CHUNK_T word = chunks[t];
|
||||
assert(word != 0);
|
||||
do {
|
||||
u32 bit = FIND_AND_CLEAR_FN(&word);
|
||||
u32 local_index = RANK_IN_MASK_FN(emask_chunks[t], bit);
|
||||
u32 idx = local_index + base_index[t];
|
||||
const EXCEPTION_T *e = &exceptions[idx];
|
||||
|
||||
if (!RUN_EXCEPTION_FN(e, STATE_ARG_NAME, succ,
|
||||
#ifndef BIG_MODEL
|
||||
&local_succ,
|
||||
#endif
|
||||
limex, offset, ctx, &new_cache, &cacheable,
|
||||
in_rev, flags)) {
|
||||
return PE_RV_HALT;
|
||||
}
|
||||
} while (word);
|
||||
} while (diffmask);
|
||||
}
|
||||
#else
|
||||
// A copy of the estate as an array of GPR-sized chunks.
|
||||
CHUNK_T chunks[sizeof(STATE_T) / sizeof(CHUNK_T)];
|
||||
CHUNK_T emask_chunks[sizeof(STATE_T) / sizeof(CHUNK_T)];
|
||||
@ -243,9 +311,6 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ,
|
||||
#endif
|
||||
memcpy(emask_chunks, &limex->exceptionMask, sizeof(STATE_T));
|
||||
|
||||
struct proto_cache new_cache = {0, NULL};
|
||||
enum CacheResult cacheable = CACHE_RESULT;
|
||||
|
||||
u32 base_index[sizeof(STATE_T) / sizeof(CHUNK_T)];
|
||||
base_index[0] = 0;
|
||||
for (s32 i = 0; i < (s32)ARRAY_LENGTH(base_index) - 1; i++) {
|
||||
@ -276,6 +341,7 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ,
|
||||
}
|
||||
} while (word);
|
||||
} while (diffmask);
|
||||
#endif
|
||||
|
||||
#ifndef BIG_MODEL
|
||||
*succ = OR_STATE(*succ, local_succ);
|
||||
@ -307,6 +373,8 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ,
|
||||
#undef AND_STATE
|
||||
#undef EQ_STATE
|
||||
#undef OR_STATE
|
||||
#undef EXPAND_STATE
|
||||
#undef SHUFFLE_BYTE_STATE
|
||||
#undef TESTBIT_STATE
|
||||
#undef PE_FN
|
||||
#undef RUN_EXCEPTION_FN
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2020, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -86,6 +86,7 @@
|
||||
#define LIMEX_FLAG_COMPRESS_STATE 1 /**< pack state into stream state */
|
||||
#define LIMEX_FLAG_COMPRESS_MASKED 2 /**< use reach mask-based compression */
|
||||
#define LIMEX_FLAG_CANNOT_DIE 4 /**< limex cannot have no states on */
|
||||
#define LIMEX_FLAG_EXTRACT_EXP 8 /**< use limex exception bit extraction */
|
||||
|
||||
enum LimExTrigger {
|
||||
LIMEX_TRIGGER_NONE = 0,
|
||||
@ -157,6 +158,9 @@ struct LimExNFA##size { \
|
||||
u_##size shift[MAX_SHIFT_COUNT]; \
|
||||
u32 shiftCount; /**< number of shift masks used */ \
|
||||
u8 shiftAmount[MAX_SHIFT_COUNT]; /**< shift amount for each mask */ \
|
||||
m512 exceptionShufMask; /**< exception byte shuffle mask */ \
|
||||
m512 exceptionBitMask; /**< exception bit mask */ \
|
||||
m512 exceptionAndMask; /**< exception and mask */ \
|
||||
};
|
||||
|
||||
CREATE_NFA_LIMEX(32)
|
||||
|
@ -187,6 +187,12 @@ static really_inline m128 or128(m128 a, m128 b) {
|
||||
return _mm_or_si128(a,b);
|
||||
}
|
||||
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
static really_inline m512 expand128(m128 a) {
|
||||
return _mm512_broadcast_i32x4(a);
|
||||
}
|
||||
#endif
|
||||
|
||||
static really_inline m128 andnot128(m128 a, m128 b) {
|
||||
return _mm_andnot_si128(a, b);
|
||||
}
|
||||
@ -374,6 +380,12 @@ static really_inline m256 or256(m256 a, m256 b) {
|
||||
return _mm256_or_si256(a, b);
|
||||
}
|
||||
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
static really_inline m512 expand256(m256 a) {
|
||||
return _mm512_broadcast_i64x4(a);
|
||||
}
|
||||
#endif
|
||||
|
||||
static really_inline m256 xor256(m256 a, m256 b) {
|
||||
return _mm256_xor_si256(a, b);
|
||||
}
|
||||
@ -684,6 +696,16 @@ m512 or512(m512 a, m512 b) {
|
||||
return _mm512_or_si512(a, b);
|
||||
}
|
||||
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
static really_inline m512 expand384(m384 a) {
|
||||
u64a *lo = (u64a*)&a.lo;
|
||||
u64a *mid = (u64a*)&a.mid;
|
||||
u64a *hi = (u64a*)&a.hi;
|
||||
return _mm512_set_epi64(0ULL, 0ULL, hi[1], hi[0], mid[1], mid[0],
|
||||
lo[1], lo[0]);
|
||||
}
|
||||
#endif
|
||||
|
||||
static really_inline
|
||||
m512 xor512(m512 a, m512 b) {
|
||||
return _mm512_xor_si512(a, b);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2020, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -101,6 +101,18 @@
|
||||
#define or_m384(a, b) (or384(a, b))
|
||||
#define or_m512(a, b) (or512(a, b))
|
||||
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#define expand_m128(a) (expand128(a))
|
||||
#define expand_m256(a) (expand256(a))
|
||||
#define expand_m384(a) (expand384(a))
|
||||
#define expand_m512(a) (a)
|
||||
|
||||
#define shuffle_byte_m128(a, b) (pshufb_m512(b, a))
|
||||
#define shuffle_byte_m256(a, b) (vpermb512(a, b))
|
||||
#define shuffle_byte_m384(a, b) (vpermb512(a, b))
|
||||
#define shuffle_byte_m512(a, b) (vpermb512(a, b))
|
||||
#endif
|
||||
|
||||
#define and_u8(a, b) ((a) & (b))
|
||||
#define and_u32(a, b) ((a) & (b))
|
||||
#define and_u64a(a, b) ((a) & (b))
|
||||
|
Loading…
x
Reference in New Issue
Block a user