mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Limex: exception handling with AVX512
This commit is contained in:
parent
20e69f6ad8
commit
2945c9bd20
@ -1922,7 +1922,8 @@ struct Factory {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void writeExceptions(const map<ExceptionProto, vector<u32>> &exceptionMap,
|
void writeExceptions(const build_info &args,
|
||||||
|
const map<ExceptionProto, vector<u32>> &exceptionMap,
|
||||||
const vector<u32> &repeatOffsets, implNFA_t *limex,
|
const vector<u32> &repeatOffsets, implNFA_t *limex,
|
||||||
const u32 exceptionsOffset,
|
const u32 exceptionsOffset,
|
||||||
const u32 reportListOffset) {
|
const u32 reportListOffset) {
|
||||||
@ -1974,6 +1975,59 @@ struct Factory {
|
|||||||
|
|
||||||
limex->exceptionOffset = exceptionsOffset;
|
limex->exceptionOffset = exceptionsOffset;
|
||||||
limex->exceptionCount = ecount;
|
limex->exceptionCount = ecount;
|
||||||
|
|
||||||
|
if (args.num_states > 64 && args.cc.target_info.has_avx512vbmi()) {
|
||||||
|
const u8 *exceptionMask = (const u8 *)(&limex->exceptionMask);
|
||||||
|
u8 *shufMask = (u8 *)&limex->exceptionShufMask;
|
||||||
|
u8 *bitMask = (u8 *)&limex->exceptionBitMask;
|
||||||
|
u8 *andMask = (u8 *)&limex->exceptionAndMask;
|
||||||
|
|
||||||
|
u32 tot_cnt = 0;
|
||||||
|
u32 pos = 0;
|
||||||
|
bool valid = true;
|
||||||
|
size_t tot = sizeof(limex->exceptionMask);
|
||||||
|
size_t base = 0;
|
||||||
|
|
||||||
|
// We normally have up to 64 exceptions to handle,
|
||||||
|
// but treat 384 state Limex differently to simplify operations
|
||||||
|
size_t limit = 64;
|
||||||
|
if (args.num_states > 256 && args.num_states <= 384) {
|
||||||
|
limit = 48;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < tot; i++) {
|
||||||
|
if (!exceptionMask[i]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
u32 bit_cnt = popcount32(exceptionMask[i]);
|
||||||
|
|
||||||
|
tot_cnt += bit_cnt;
|
||||||
|
if (tot_cnt > limit) {
|
||||||
|
valid = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 emsk = exceptionMask[i];
|
||||||
|
while (emsk) {
|
||||||
|
u32 t = findAndClearLSB_32(&emsk);
|
||||||
|
bitMask[pos] = 1U << t;
|
||||||
|
andMask[pos] = 1U << t;
|
||||||
|
shufMask[pos++] = i + base;
|
||||||
|
|
||||||
|
if (pos == 32 &&
|
||||||
|
(args.num_states > 128 && args.num_states <= 256)) {
|
||||||
|
base += 32;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Avoid matching unused bytes
|
||||||
|
for (u32 i = pos; i < 64; i++) {
|
||||||
|
bitMask[i] = 0xff;
|
||||||
|
}
|
||||||
|
if (valid) {
|
||||||
|
setLimexFlag(limex, LIMEX_FLAG_EXTRACT_EXP);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
@ -2299,7 +2353,7 @@ struct Factory {
|
|||||||
writeRepeats(repeats, repeatOffsets, limex, repeatOffsetsOffset,
|
writeRepeats(repeats, repeatOffsets, limex, repeatOffsetsOffset,
|
||||||
repeatsOffset);
|
repeatsOffset);
|
||||||
|
|
||||||
writeExceptions(exceptionMap, repeatOffsets, limex, exceptionsOffset,
|
writeExceptions(args, exceptionMap, repeatOffsets, limex, exceptionsOffset,
|
||||||
reportListOffset);
|
reportListOffset);
|
||||||
|
|
||||||
writeLimexMasks(args, limex);
|
writeLimexMasks(args, limex);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -47,6 +47,8 @@
|
|||||||
#define AND_STATE JOIN(and_, STATE_T)
|
#define AND_STATE JOIN(and_, STATE_T)
|
||||||
#define EQ_STATE(a, b) (!JOIN(noteq_, STATE_T)((a), (b)))
|
#define EQ_STATE(a, b) (!JOIN(noteq_, STATE_T)((a), (b)))
|
||||||
#define OR_STATE JOIN(or_, STATE_T)
|
#define OR_STATE JOIN(or_, STATE_T)
|
||||||
|
#define EXPAND_STATE JOIN(expand_, STATE_T)
|
||||||
|
#define SHUFFLE_BYTE_STATE JOIN(shuffle_byte_, STATE_T)
|
||||||
#define TESTBIT_STATE JOIN(testbit_, STATE_T)
|
#define TESTBIT_STATE JOIN(testbit_, STATE_T)
|
||||||
#define EXCEPTION_T JOIN(struct NFAException, SIZE)
|
#define EXCEPTION_T JOIN(struct NFAException, SIZE)
|
||||||
#define CONTEXT_T JOIN(NFAContext, SIZE)
|
#define CONTEXT_T JOIN(NFAContext, SIZE)
|
||||||
@ -208,7 +210,7 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG,
|
|||||||
/** \brief Process all of the exceptions associated with the states in the \a
|
/** \brief Process all of the exceptions associated with the states in the \a
|
||||||
* estate. */
|
* estate. */
|
||||||
static really_inline
|
static really_inline
|
||||||
int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ,
|
int PE_FN(STATE_ARG, ESTATE_ARG, UNUSED u32 diffmask, STATE_T *succ,
|
||||||
const struct IMPL_NFA_T *limex, const EXCEPTION_T *exceptions,
|
const struct IMPL_NFA_T *limex, const EXCEPTION_T *exceptions,
|
||||||
u64a offset, struct CONTEXT_T *ctx, char in_rev, char flags) {
|
u64a offset, struct CONTEXT_T *ctx, char in_rev, char flags) {
|
||||||
assert(diffmask > 0); // guaranteed by caller macro
|
assert(diffmask > 0); // guaranteed by caller macro
|
||||||
@ -233,6 +235,72 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ,
|
|||||||
ctx->local_succ = ZERO_STATE;
|
ctx->local_succ = ZERO_STATE;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
struct proto_cache new_cache = {0, NULL};
|
||||||
|
enum CacheResult cacheable = CACHE_RESULT;
|
||||||
|
|
||||||
|
#if defined(HAVE_AVX512VBMI) && SIZE > 64
|
||||||
|
if (likely(limex->flags & LIMEX_FLAG_EXTRACT_EXP)) {
|
||||||
|
m512 emask = EXPAND_STATE(*STATE_ARG_P);
|
||||||
|
emask = SHUFFLE_BYTE_STATE(load_m512(&limex->exceptionShufMask), emask);
|
||||||
|
emask = and512(emask, load_m512(&limex->exceptionAndMask));
|
||||||
|
u64a word = eq512mask(emask, load_m512(&limex->exceptionBitMask));
|
||||||
|
|
||||||
|
do {
|
||||||
|
u32 bit = FIND_AND_CLEAR_FN(&word);
|
||||||
|
const EXCEPTION_T *e = &exceptions[bit];
|
||||||
|
|
||||||
|
if (!RUN_EXCEPTION_FN(e, STATE_ARG_NAME, succ,
|
||||||
|
#ifndef BIG_MODEL
|
||||||
|
&local_succ,
|
||||||
|
#endif
|
||||||
|
limex, offset, ctx, &new_cache, &cacheable,
|
||||||
|
in_rev, flags)) {
|
||||||
|
return PE_RV_HALT;
|
||||||
|
}
|
||||||
|
} while (word);
|
||||||
|
} else {
|
||||||
|
// A copy of the estate as an array of GPR-sized chunks.
|
||||||
|
CHUNK_T chunks[sizeof(STATE_T) / sizeof(CHUNK_T)];
|
||||||
|
CHUNK_T emask_chunks[sizeof(STATE_T) / sizeof(CHUNK_T)];
|
||||||
|
#ifdef ESTATE_ON_STACK
|
||||||
|
memcpy(chunks, &estate, sizeof(STATE_T));
|
||||||
|
#else
|
||||||
|
memcpy(chunks, estatep, sizeof(STATE_T));
|
||||||
|
#endif
|
||||||
|
memcpy(emask_chunks, &limex->exceptionMask, sizeof(STATE_T));
|
||||||
|
|
||||||
|
u32 base_index[sizeof(STATE_T) / sizeof(CHUNK_T)];
|
||||||
|
base_index[0] = 0;
|
||||||
|
for (s32 i = 0; i < (s32)ARRAY_LENGTH(base_index) - 1; i++) {
|
||||||
|
base_index[i + 1] = base_index[i] + POPCOUNT_FN(emask_chunks[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
do {
|
||||||
|
u32 t = findAndClearLSB_32(&diffmask);
|
||||||
|
#ifdef ARCH_64_BIT
|
||||||
|
t >>= 1; // Due to diffmask64, which leaves holes in the bitmask.
|
||||||
|
#endif
|
||||||
|
assert(t < ARRAY_LENGTH(chunks));
|
||||||
|
CHUNK_T word = chunks[t];
|
||||||
|
assert(word != 0);
|
||||||
|
do {
|
||||||
|
u32 bit = FIND_AND_CLEAR_FN(&word);
|
||||||
|
u32 local_index = RANK_IN_MASK_FN(emask_chunks[t], bit);
|
||||||
|
u32 idx = local_index + base_index[t];
|
||||||
|
const EXCEPTION_T *e = &exceptions[idx];
|
||||||
|
|
||||||
|
if (!RUN_EXCEPTION_FN(e, STATE_ARG_NAME, succ,
|
||||||
|
#ifndef BIG_MODEL
|
||||||
|
&local_succ,
|
||||||
|
#endif
|
||||||
|
limex, offset, ctx, &new_cache, &cacheable,
|
||||||
|
in_rev, flags)) {
|
||||||
|
return PE_RV_HALT;
|
||||||
|
}
|
||||||
|
} while (word);
|
||||||
|
} while (diffmask);
|
||||||
|
}
|
||||||
|
#else
|
||||||
// A copy of the estate as an array of GPR-sized chunks.
|
// A copy of the estate as an array of GPR-sized chunks.
|
||||||
CHUNK_T chunks[sizeof(STATE_T) / sizeof(CHUNK_T)];
|
CHUNK_T chunks[sizeof(STATE_T) / sizeof(CHUNK_T)];
|
||||||
CHUNK_T emask_chunks[sizeof(STATE_T) / sizeof(CHUNK_T)];
|
CHUNK_T emask_chunks[sizeof(STATE_T) / sizeof(CHUNK_T)];
|
||||||
@ -243,9 +311,6 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ,
|
|||||||
#endif
|
#endif
|
||||||
memcpy(emask_chunks, &limex->exceptionMask, sizeof(STATE_T));
|
memcpy(emask_chunks, &limex->exceptionMask, sizeof(STATE_T));
|
||||||
|
|
||||||
struct proto_cache new_cache = {0, NULL};
|
|
||||||
enum CacheResult cacheable = CACHE_RESULT;
|
|
||||||
|
|
||||||
u32 base_index[sizeof(STATE_T) / sizeof(CHUNK_T)];
|
u32 base_index[sizeof(STATE_T) / sizeof(CHUNK_T)];
|
||||||
base_index[0] = 0;
|
base_index[0] = 0;
|
||||||
for (s32 i = 0; i < (s32)ARRAY_LENGTH(base_index) - 1; i++) {
|
for (s32 i = 0; i < (s32)ARRAY_LENGTH(base_index) - 1; i++) {
|
||||||
@ -276,6 +341,7 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ,
|
|||||||
}
|
}
|
||||||
} while (word);
|
} while (word);
|
||||||
} while (diffmask);
|
} while (diffmask);
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef BIG_MODEL
|
#ifndef BIG_MODEL
|
||||||
*succ = OR_STATE(*succ, local_succ);
|
*succ = OR_STATE(*succ, local_succ);
|
||||||
@ -307,6 +373,8 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ,
|
|||||||
#undef AND_STATE
|
#undef AND_STATE
|
||||||
#undef EQ_STATE
|
#undef EQ_STATE
|
||||||
#undef OR_STATE
|
#undef OR_STATE
|
||||||
|
#undef EXPAND_STATE
|
||||||
|
#undef SHUFFLE_BYTE_STATE
|
||||||
#undef TESTBIT_STATE
|
#undef TESTBIT_STATE
|
||||||
#undef PE_FN
|
#undef PE_FN
|
||||||
#undef RUN_EXCEPTION_FN
|
#undef RUN_EXCEPTION_FN
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2017, Intel Corporation
|
* Copyright (c) 2015-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -86,6 +86,7 @@
|
|||||||
#define LIMEX_FLAG_COMPRESS_STATE 1 /**< pack state into stream state */
|
#define LIMEX_FLAG_COMPRESS_STATE 1 /**< pack state into stream state */
|
||||||
#define LIMEX_FLAG_COMPRESS_MASKED 2 /**< use reach mask-based compression */
|
#define LIMEX_FLAG_COMPRESS_MASKED 2 /**< use reach mask-based compression */
|
||||||
#define LIMEX_FLAG_CANNOT_DIE 4 /**< limex cannot have no states on */
|
#define LIMEX_FLAG_CANNOT_DIE 4 /**< limex cannot have no states on */
|
||||||
|
#define LIMEX_FLAG_EXTRACT_EXP 8 /**< use limex exception bit extraction */
|
||||||
|
|
||||||
enum LimExTrigger {
|
enum LimExTrigger {
|
||||||
LIMEX_TRIGGER_NONE = 0,
|
LIMEX_TRIGGER_NONE = 0,
|
||||||
@ -157,6 +158,9 @@ struct LimExNFA##size { \
|
|||||||
u_##size shift[MAX_SHIFT_COUNT]; \
|
u_##size shift[MAX_SHIFT_COUNT]; \
|
||||||
u32 shiftCount; /**< number of shift masks used */ \
|
u32 shiftCount; /**< number of shift masks used */ \
|
||||||
u8 shiftAmount[MAX_SHIFT_COUNT]; /**< shift amount for each mask */ \
|
u8 shiftAmount[MAX_SHIFT_COUNT]; /**< shift amount for each mask */ \
|
||||||
|
m512 exceptionShufMask; /**< exception byte shuffle mask */ \
|
||||||
|
m512 exceptionBitMask; /**< exception bit mask */ \
|
||||||
|
m512 exceptionAndMask; /**< exception and mask */ \
|
||||||
};
|
};
|
||||||
|
|
||||||
CREATE_NFA_LIMEX(32)
|
CREATE_NFA_LIMEX(32)
|
||||||
|
@ -223,6 +223,24 @@ static really_inline m128 or128(m128 a, m128 b) {
|
|||||||
return _mm_or_si128(a,b);
|
return _mm_or_si128(a,b);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
static really_inline m512 expand128(m128 a) {
|
||||||
|
return _mm512_broadcast_i32x4(a);
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline m512 expand256(m256 a) {
|
||||||
|
return _mm512_broadcast_i64x4(a);
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline m512 expand384(m384 a) {
|
||||||
|
u64a *lo = (u64a*)&a.lo;
|
||||||
|
u64a *mid = (u64a*)&a.mid;
|
||||||
|
u64a *hi = (u64a*)&a.hi;
|
||||||
|
return _mm512_set_epi64(0ULL, 0ULL, hi[1], hi[0], mid[1], mid[0],
|
||||||
|
lo[1], lo[0]);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static really_inline m128 andnot128(m128 a, m128 b) {
|
static really_inline m128 andnot128(m128 a, m128 b) {
|
||||||
return _mm_andnot_si128(a, b);
|
return _mm_andnot_si128(a, b);
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -101,6 +101,18 @@
|
|||||||
#define or_m384(a, b) (or384(a, b))
|
#define or_m384(a, b) (or384(a, b))
|
||||||
#define or_m512(a, b) (or512(a, b))
|
#define or_m512(a, b) (or512(a, b))
|
||||||
|
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
#define expand_m128(a) (expand128(a))
|
||||||
|
#define expand_m256(a) (expand256(a))
|
||||||
|
#define expand_m384(a) (expand384(a))
|
||||||
|
#define expand_m512(a) (a)
|
||||||
|
|
||||||
|
#define shuffle_byte_m128(a, b) (pshufb_m512(b, a))
|
||||||
|
#define shuffle_byte_m256(a, b) (vpermb512(a, b))
|
||||||
|
#define shuffle_byte_m384(a, b) (vpermb512(a, b))
|
||||||
|
#define shuffle_byte_m512(a, b) (vpermb512(a, b))
|
||||||
|
#endif
|
||||||
|
|
||||||
#define and_u8(a, b) ((a) & (b))
|
#define and_u8(a, b) ((a) & (b))
|
||||||
#define and_u32(a, b) ((a) & (b))
|
#define and_u32(a, b) ((a) & (b))
|
||||||
#define and_u64a(a, b) ((a) & (b))
|
#define and_u64a(a, b) ((a) & (b))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user