avx512: add basic functions to simd_utils

Extends the m512 type to use avx512 and also changes required
for limex.
This commit is contained in:
Matthew Barr
2016-07-20 11:31:34 +10:00
parent fedd48489f
commit 8a56d16d57
11 changed files with 258 additions and 53 deletions

View File

@@ -151,18 +151,20 @@ size_t doAccel512(const m512 *state, const struct LimExNFA512 *limex,
DEBUG_PRINTF("using PSHUFB for 512-bit shuffle\n");
m512 accelPerm = limex->accelPermute;
m512 accelComp = limex->accelCompare;
#if !defined(HAVE_AVX2)
#if defined(HAVE_AVX512)
idx = packedExtract512(s, accelPerm, accelComp);
#elif defined(HAVE_AVX2)
u32 idx1 = packedExtract256(s.lo, accelPerm.lo, accelComp.lo);
u32 idx2 = packedExtract256(s.hi, accelPerm.hi, accelComp.hi);
assert((idx1 & idx2) == 0); // should be no shared bits
idx = idx1 | idx2;
#else
u32 idx1 = packedExtract128(s.lo.lo, accelPerm.lo.lo, accelComp.lo.lo);
u32 idx2 = packedExtract128(s.lo.hi, accelPerm.lo.hi, accelComp.lo.hi);
u32 idx3 = packedExtract128(s.hi.lo, accelPerm.hi.lo, accelComp.hi.lo);
u32 idx4 = packedExtract128(s.hi.hi, accelPerm.hi.hi, accelComp.hi.hi);
assert((idx1 & idx2 & idx3 & idx4) == 0); // should be no shared bits
idx = idx1 | idx2 | idx3 | idx4;
#else
u32 idx1 = packedExtract256(s.lo, accelPerm.lo, accelComp.lo);
u32 idx2 = packedExtract256(s.hi, accelPerm.hi, accelComp.hi);
assert((idx1 & idx2) == 0); // should be no shared bits
idx = idx1 | idx2;
#endif
return accelScanWrapper(accelTable, aux, input, idx, i, end);
}

View File

@@ -62,4 +62,17 @@ u32 packedExtract256(m256 s, const m256 permute, const m256 compare) {
}
#endif // AVX2
#if defined(HAVE_AVX512)
static really_inline
u32 packedExtract512(m512 s, const m512 permute, const m512 compare) {
// vpshufb doesn't cross lanes, so this is a bit of a cheat
m512 shuffled = pshufb_m512(s, permute);
m512 compared = and512(shuffled, compare);
u64a rv = ~eq512mask(compared, shuffled);
// stitch the lane-wise results back together
rv = rv >> 32 | rv;
return (u32)(((rv >> 16) | rv) & 0xffffU);
}
#endif // AVX512
#endif // LIMEX_SHUFFLE_H

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -401,7 +401,7 @@ const char *NFATraits<SHENG_NFA>::name = "Sheng";
template<> struct NFATraits<TAMARAMA_NFA> {
UNUSED static const char *name;
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 32;
static const u32 stateAlign = 64;
static const bool fast = true;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;