avx512: add basic functions to simd_utils

Extends the m512 type to use avx512 and also changes required
for limex.
This commit is contained in:
Matthew Barr
2016-07-20 11:31:34 +10:00
parent fedd48489f
commit 8a56d16d57
11 changed files with 258 additions and 53 deletions

View File

@@ -165,14 +165,15 @@ TEST(Shuffle, PackedExtract64_3) {
template<typename T>
static
void build_pshufb_masks_onebit(unsigned int bit, T *permute, T *compare) {
static_assert(sizeof(T) == sizeof(m128) || sizeof(T) == sizeof(m256),
static_assert(sizeof(T) == sizeof(m128) || sizeof(T) == sizeof(m256) ||
sizeof(T) == sizeof(m512),
"should be valid type");
// permute mask has 0x80 in all bytes except the one we care about
memset(permute, 0x80, sizeof(*permute));
memset(compare, 0, sizeof(*compare));
char *pmsk = (char *)permute;
char *cmsk = (char *)compare;
u8 off = (bit >= 128) ? 0x10 : 0;
u8 off = (bit >= 128) ? (bit >= 256) ? (bit >= 384) ? 0x30 : 0x20 : 0x10 : 0;
pmsk[off] = bit/8;
cmsk[off] = ~(1 << (bit % 8));
}
@@ -214,4 +215,24 @@ TEST(Shuffle, PackedExtract256_1) {
}
}
#endif
#if defined(HAVE_AVX512)
TEST(Shuffle, PackedExtract512_1) {
// Try all possible one-bit masks
for (unsigned int i = 0; i < 512; i++) {
// shuffle a single 1 bit to the front
m512 permute, compare;
build_pshufb_masks_onebit(i, &permute, &compare);
EXPECT_EQ(1U, packedExtract512(setbit<m512>(i), permute, compare));
EXPECT_EQ(1U, packedExtract512(ones512(), permute, compare));
// we should get zero out of these cases
EXPECT_EQ(0U, packedExtract512(zeroes512(), permute, compare));
EXPECT_EQ(0U, packedExtract512(not512(setbit<m512>(i)), permute, compare));
// we should get zero out of all the other bit positions
for (unsigned int j = 0; (j != i && j < 512); j++) {
EXPECT_EQ(0U, packedExtract512(setbit<m512>(j), permute, compare));
}
}
}
#endif
} // namespace

View File

@@ -590,7 +590,7 @@ TEST(SimdUtilsTest, alignment) {
ASSERT_EQ(16, alignof(m128));
ASSERT_EQ(32, alignof(m256));
ASSERT_EQ(16, alignof(m384));
ASSERT_EQ(32, alignof(m512));
ASSERT_EQ(64, alignof(m512));
}
TEST(SimdUtilsTest, movq) {