Limex: exception handling with AVX512

This commit is contained in:
Wang Xiang W
2020-04-24 11:51:34 -04:00
committed by Konstantinos Margaritis
parent 001b7824d2
commit 5f930b267c
5 changed files with 169 additions and 9 deletions

View File

@@ -187,6 +187,12 @@ static really_inline m128 or128(m128 a, m128 b) {
return _mm_or_si128(a,b);
}
#if defined(HAVE_AVX512VBMI)
static really_inline m512 expand128(m128 a) {
return _mm512_broadcast_i32x4(a);
}
#endif
static really_inline m128 andnot128(m128 a, m128 b) {
return _mm_andnot_si128(a, b);
}
@@ -374,6 +380,12 @@ static really_inline m256 or256(m256 a, m256 b) {
return _mm256_or_si256(a, b);
}
#if defined(HAVE_AVX512VBMI)
static really_inline m512 expand256(m256 a) {
return _mm512_broadcast_i64x4(a);
}
#endif
static really_inline m256 xor256(m256 a, m256 b) {
return _mm256_xor_si256(a, b);
}
@@ -684,6 +696,16 @@ m512 or512(m512 a, m512 b) {
return _mm512_or_si512(a, b);
}
#if defined(HAVE_AVX512VBMI)
static really_inline m512 expand384(m384 a) {
u64a *lo = (u64a*)&a.lo;
u64a *mid = (u64a*)&a.mid;
u64a *hi = (u64a*)&a.hi;
return _mm512_set_epi64(0ULL, 0ULL, hi[1], hi[0], mid[1], mid[0],
lo[1], lo[0]);
}
#endif
static really_inline
m512 xor512(m512 a, m512 b) {
return _mm512_xor_si512(a, b);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2020, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -101,6 +101,18 @@
#define or_m384(a, b) (or384(a, b))
#define or_m512(a, b) (or512(a, b))
#if defined(HAVE_AVX512VBMI)
#define expand_m128(a) (expand128(a))
#define expand_m256(a) (expand256(a))
#define expand_m384(a) (expand384(a))
#define expand_m512(a) (a)
#define shuffle_byte_m128(a, b) (pshufb_m512(b, a))
#define shuffle_byte_m256(a, b) (vpermb512(a, b))
#define shuffle_byte_m384(a, b) (vpermb512(a, b))
#define shuffle_byte_m512(a, b) (vpermb512(a, b))
#endif
#define and_u8(a, b) ((a) & (b))
#define and_u32(a, b) ((a) & (b))
#define and_u64a(a, b) ((a) & (b))