mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-09-30 19:47:43 +03:00
avx512: add basic functions to simd_utils
Extends the m512 type to use avx512 and also changes required for limex.
This commit is contained in:
@@ -151,18 +151,20 @@ size_t doAccel512(const m512 *state, const struct LimExNFA512 *limex,
|
||||
DEBUG_PRINTF("using PSHUFB for 512-bit shuffle\n");
|
||||
m512 accelPerm = limex->accelPermute;
|
||||
m512 accelComp = limex->accelCompare;
|
||||
#if !defined(HAVE_AVX2)
|
||||
#if defined(HAVE_AVX512)
|
||||
idx = packedExtract512(s, accelPerm, accelComp);
|
||||
#elif defined(HAVE_AVX2)
|
||||
u32 idx1 = packedExtract256(s.lo, accelPerm.lo, accelComp.lo);
|
||||
u32 idx2 = packedExtract256(s.hi, accelPerm.hi, accelComp.hi);
|
||||
assert((idx1 & idx2) == 0); // should be no shared bits
|
||||
idx = idx1 | idx2;
|
||||
#else
|
||||
u32 idx1 = packedExtract128(s.lo.lo, accelPerm.lo.lo, accelComp.lo.lo);
|
||||
u32 idx2 = packedExtract128(s.lo.hi, accelPerm.lo.hi, accelComp.lo.hi);
|
||||
u32 idx3 = packedExtract128(s.hi.lo, accelPerm.hi.lo, accelComp.hi.lo);
|
||||
u32 idx4 = packedExtract128(s.hi.hi, accelPerm.hi.hi, accelComp.hi.hi);
|
||||
assert((idx1 & idx2 & idx3 & idx4) == 0); // should be no shared bits
|
||||
idx = idx1 | idx2 | idx3 | idx4;
|
||||
#else
|
||||
u32 idx1 = packedExtract256(s.lo, accelPerm.lo, accelComp.lo);
|
||||
u32 idx2 = packedExtract256(s.hi, accelPerm.hi, accelComp.hi);
|
||||
assert((idx1 & idx2) == 0); // should be no shared bits
|
||||
idx = idx1 | idx2;
|
||||
#endif
|
||||
return accelScanWrapper(accelTable, aux, input, idx, i, end);
|
||||
}
|
||||
|
@@ -62,4 +62,17 @@ u32 packedExtract256(m256 s, const m256 permute, const m256 compare) {
|
||||
}
|
||||
#endif // AVX2
|
||||
|
||||
#if defined(HAVE_AVX512)
|
||||
static really_inline
|
||||
u32 packedExtract512(m512 s, const m512 permute, const m512 compare) {
|
||||
// vpshufb doesn't cross lanes, so this is a bit of a cheat
|
||||
m512 shuffled = pshufb_m512(s, permute);
|
||||
m512 compared = and512(shuffled, compare);
|
||||
u64a rv = ~eq512mask(compared, shuffled);
|
||||
// stitch the lane-wise results back together
|
||||
rv = rv >> 32 | rv;
|
||||
return (u32)(((rv >> 16) | rv) & 0xffffU);
|
||||
}
|
||||
#endif // AVX512
|
||||
|
||||
#endif // LIMEX_SHUFFLE_H
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -401,7 +401,7 @@ const char *NFATraits<SHENG_NFA>::name = "Sheng";
|
||||
template<> struct NFATraits<TAMARAMA_NFA> {
|
||||
UNUSED static const char *name;
|
||||
static const NFACategory category = NFA_OTHER;
|
||||
static const u32 stateAlign = 32;
|
||||
static const u32 stateAlign = 64;
|
||||
static const bool fast = true;
|
||||
static const nfa_dispatch_fn has_accel;
|
||||
static const nfa_dispatch_fn has_repeats;
|
||||
|
Reference in New Issue
Block a user