vectorscan/unit/internal/simd_utils.cpp
Konstantinos Margaritis 7cad514366 clang is more strict
2021-12-02 23:09:53 +02:00

945 lines
30 KiB
C++

/*
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "gtest/gtest.h"
#include "util/arch.h"
#include "util/bytecode_ptr.h"
#include "util/simd_utils.h"
using namespace std;
using namespace ue2;
namespace {
// Switch one bit on in a bitmask.
template<class Mask>
Mask setbit(unsigned int bit) {
union {
Mask simd;
char bytes[sizeof(Mask)];
} cf;
memset(cf.bytes, 0, sizeof(Mask));
unsigned int byte_idx = bit / 8;
cf.bytes[byte_idx] = 1U << (bit % 8);
return cf.simd;
}
// Parameterized tests follow!
//
// Irritatingly we have to define a whole bunch of overrides here... because
// templates. One Admiration Unit for anyone able to build a better way of
// doing this.
struct simd_zeroes {
operator m128() { return zeroes128(); }
operator m256() { return zeroes256(); }
operator m384() { return zeroes384(); }
operator m512() { return zeroes512(); }
};
struct simd_ones {
operator m128() { return ones128(); }
operator m256() { return ones256(); }
operator m384() { return ones384(); }
operator m512() { return ones512(); }
};
bool simd_diff(const m128 &a, const m128 &b) { return !!diff128(a, b); }
bool simd_diff(const m256 &a, const m256 &b) { return !!diff256(a, b); }
bool simd_diff(const m384 &a, const m384 &b) { return !!diff384(a, b); }
bool simd_diff(const m512 &a, const m512 &b) { return !!diff512(a, b); }
bool simd_isnonzero(const m128 &a) { return !!isnonzero128(a); }
bool simd_isnonzero(const m256 &a) { return !!isnonzero256(a); }
bool simd_isnonzero(const m384 &a) { return !!isnonzero384(a); }
bool simd_isnonzero(const m512 &a) { return !!isnonzero512(a); }
m128 simd_and(const m128 &a, const m128 &b) { return and128(a, b); }
m256 simd_and(const m256 &a, const m256 &b) { return and256(a, b); }
m384 simd_and(const m384 &a, const m384 &b) { return and384(a, b); }
m512 simd_and(const m512 &a, const m512 &b) { return and512(a, b); }
m128 simd_or(const m128 &a, const m128 &b) { return or128(a, b); }
m256 simd_or(const m256 &a, const m256 &b) { return or256(a, b); }
m384 simd_or(const m384 &a, const m384 &b) { return or384(a, b); }
m512 simd_or(const m512 &a, const m512 &b) { return or512(a, b); }
m128 simd_xor(const m128 &a, const m128 &b) { return xor128(a, b); }
m256 simd_xor(const m256 &a, const m256 &b) { return xor256(a, b); }
m384 simd_xor(const m384 &a, const m384 &b) { return xor384(a, b); }
m512 simd_xor(const m512 &a, const m512 &b) { return xor512(a, b); }
m128 simd_andnot(const m128 &a, const m128 &b) { return andnot128(a, b); }
m256 simd_andnot(const m256 &a, const m256 &b) { return andnot256(a, b); }
m384 simd_andnot(const m384 &a, const m384 &b) { return andnot384(a, b); }
m512 simd_andnot(const m512 &a, const m512 &b) { return andnot512(a, b); }
m128 simd_not(const m128 &a) { return not128(a); }
m256 simd_not(const m256 &a) { return not256(a); }
m384 simd_not(const m384 &a) { return not384(a); }
m512 simd_not(const m512 &a) { return not512(a); }
void simd_clearbit(m128 *a, unsigned int i) { return clearbit128(a, i); }
void simd_clearbit(m256 *a, unsigned int i) { return clearbit256(a, i); }
void simd_clearbit(m384 *a, unsigned int i) { return clearbit384(a, i); }
void simd_clearbit(m512 *a, unsigned int i) { return clearbit512(a, i); }
void simd_setbit(m128 *a, unsigned int i) { return setbit128(a, i); }
void simd_setbit(m256 *a, unsigned int i) { return setbit256(a, i); }
void simd_setbit(m384 *a, unsigned int i) { return setbit384(a, i); }
void simd_setbit(m512 *a, unsigned int i) { return setbit512(a, i); }
bool simd_testbit(const m128 &a, unsigned int i) { return testbit128(a, i); }
bool simd_testbit(const m256 &a, unsigned int i) { return testbit256(a, i); }
bool simd_testbit(const m384 &a, unsigned int i) { return testbit384(a, i); }
bool simd_testbit(const m512 &a, unsigned int i) { return testbit512(a, i); }
u32 simd_diffrich(const m128 &a, const m128 &b) { return diffrich128(a, b); }
u32 simd_diffrich(const m256 &a, const m256 &b) { return diffrich256(a, b); }
u32 simd_diffrich(const m384 &a, const m384 &b) { return diffrich384(a, b); }
u32 simd_diffrich(const m512 &a, const m512 &b) { return diffrich512(a, b); }
u32 simd_diffrich64(const m128 &a, const m128 &b) { return diffrich64_128(a, b); }
u32 simd_diffrich64(const m256 &a, const m256 &b) { return diffrich64_256(a, b); }
u32 simd_diffrich64(const m384 &a, const m384 &b) { return diffrich64_384(a, b); }
u32 simd_diffrich64(const m512 &a, const m512 &b) { return diffrich64_512(a, b); }
void simd_store(void *ptr, const m128 &a) { store128(ptr, a); }
void simd_store(void *ptr, const m256 &a) { store256(ptr, a); }
void simd_store(void *ptr, const m384 &a) { store384(ptr, a); }
void simd_store(void *ptr, const m512 &a) { store512(ptr, a); }
void simd_load(m128 *a, const void *ptr) { *a = load128(ptr); }
void simd_load(m256 *a, const void *ptr) { *a = load256(ptr); }
void simd_load(m384 *a, const void *ptr) { *a = load384(ptr); }
void simd_load(m512 *a, const void *ptr) { *a = load512(ptr); }
void simd_loadu(m128 *a, const void *ptr) { *a = loadu128(ptr); }
void simd_loadu(m256 *a, const void *ptr) { *a = loadu256(ptr); }
void simd_loadu(m384 *a, const void *ptr) { *a = loadu384(ptr); }
void simd_loadu(m512 *a, const void *ptr) { *a = loadu512(ptr); }
void simd_storebytes(void *ptr, const m128 &a, unsigned i) { storebytes128(ptr, a, i); }
void simd_storebytes(void *ptr, const m256 &a, unsigned i) { storebytes256(ptr, a, i); }
void simd_storebytes(void *ptr, const m384 &a, unsigned i) { storebytes384(ptr, a, i); }
void simd_storebytes(void *ptr, const m512 &a, unsigned i) { storebytes512(ptr, a, i); }
void simd_loadbytes(m128 *a, const void *ptr, unsigned i) { *a = loadbytes128(ptr, i); }
void simd_loadbytes(m256 *a, const void *ptr, unsigned i) { *a = loadbytes256(ptr, i); }
void simd_loadbytes(m384 *a, const void *ptr, unsigned i) { *a = loadbytes384(ptr, i); }
void simd_loadbytes(m512 *a, const void *ptr, unsigned i) { *a = loadbytes512(ptr, i); }
m128 simd_lshift64(const m128 &a, unsigned i) { return lshift64_m128(a, i); }
m256 simd_lshift64(const m256 &a, unsigned i) { return lshift64_m256(a, i); }
m384 simd_lshift64(const m384 &a, unsigned i) { return lshift64_m384(a, i); }
m512 simd_lshift64(const m512 &a, unsigned i) { return lshift64_m512(a, i); }
template<typename T>
class SimdUtilsTest : public testing::Test {
// empty
};
typedef ::testing::Types<m128, m256, m384, m512> SimdTypes;
TYPED_TEST_CASE(SimdUtilsTest, SimdTypes);
//
// The tests themselves.
//
TYPED_TEST(SimdUtilsTest, zero) {
const TypeParam zeroes = simd_zeroes();
// Should have no bits on.
char cmp[sizeof(zeroes)];
memset(cmp, 0, sizeof(zeroes));
ASSERT_EQ(0, memcmp(cmp, &zeroes, sizeof(zeroes)));
}
TYPED_TEST(SimdUtilsTest, ones) {
const TypeParam ones = simd_ones();
// Should have all bits on.
char cmp[sizeof(ones)];
memset(cmp, 0xff, sizeof(ones));
ASSERT_EQ(0, memcmp(cmp, &ones, sizeof(ones)));
}
TYPED_TEST(SimdUtilsTest, and1) {
const TypeParam zeroes = simd_zeroes();
const TypeParam ones = simd_ones();
TypeParam result;
result = simd_and(zeroes, ones);
EXPECT_FALSE(simd_diff(result, zeroes));
result = simd_and(ones, zeroes);
EXPECT_FALSE(simd_diff(result, zeroes));
result = simd_and(zeroes, zeroes);
EXPECT_FALSE(simd_diff(result, zeroes));
result = simd_and(ones, ones);
EXPECT_FALSE(simd_diff(result, ones));
}
TYPED_TEST(SimdUtilsTest, and2) {
TypeParam a, b;
memset(&a, 0x33, sizeof(a));
memset(&b, 0x55, sizeof(b));
union {
TypeParam simd;
char bytes[sizeof(TypeParam)];
} c;
c.simd = simd_and(a, b);
const char expected = 0x33 & 0x55;
for (size_t i = 0; i < sizeof(c); i++) {
EXPECT_EQ(expected, c.bytes[i]);
}
}
TEST(SimdUtils, diff256) {
const unsigned total_bits = 256;
// Test identical cases
ASSERT_EQ(0U, diff256(zeroes256(), zeroes256()));
ASSERT_EQ(0U, diff256(ones256(), ones256()));
for (unsigned i = 0; i < total_bits; i++) {
m256 a = setbit<m256>(i);
m256 b = setbit<m256>(i);
ASSERT_EQ(0U, diff256(a, b));
}
// Cases that differ in one 32-bit word
for (unsigned i = 0; i < total_bits; i++) {
m256 a = setbit<m256>(i);
u32 rv = diff256(zeroes256(), a);
ASSERT_EQ(1U, rv);
}
}
TYPED_TEST(SimdUtilsTest, or1) {
const TypeParam zeroes = simd_zeroes();
const TypeParam ones = simd_ones();
TypeParam result;
result = simd_or(zeroes, ones);
EXPECT_FALSE(simd_diff(result, ones));
result = simd_or(ones, zeroes);
EXPECT_FALSE(simd_diff(result, ones));
result = simd_or(zeroes, zeroes);
EXPECT_FALSE(simd_diff(result, zeroes));
result = simd_or(ones, ones);
EXPECT_FALSE(simd_diff(result, ones));
}
TYPED_TEST(SimdUtilsTest, or2) {
TypeParam a, b;
memset(&a, 0x33, sizeof(a));
memset(&b, 0x55, sizeof(b));
for (unsigned j = 0; j < 8; j++) {
for (unsigned i = 0; i < 32; i++) {
m256 x = setbit<m256>(j*32+i);
m256 y = zeroes256();
ASSERT_EQ(1U << j, diffrich256(x, y)) << "bit " << j*32+i << " not happy";
}
}
union {
TypeParam simd;
char bytes[sizeof(TypeParam)];
} c;
c.simd = simd_or(a, b);
const char expected = 0x33 | 0x55;
for (size_t i = 0; i < sizeof(c); i++) {
EXPECT_EQ(expected, c.bytes[i]);
}
}
TYPED_TEST(SimdUtilsTest, xor1) {
const TypeParam zeroes = simd_zeroes();
const TypeParam ones = simd_ones();
TypeParam result;
result = simd_xor(zeroes, ones);
EXPECT_FALSE(simd_diff(result, ones));
result = simd_xor(ones, zeroes);
EXPECT_FALSE(simd_diff(result, ones));
result = simd_xor(zeroes, zeroes);
EXPECT_FALSE(simd_diff(result, zeroes));
result = simd_xor(ones, ones);
EXPECT_FALSE(simd_diff(result, zeroes));
}
TYPED_TEST(SimdUtilsTest, xor2) {
TypeParam a, b;
memset(&a, 0x33, sizeof(a));
memset(&b, 0x55, sizeof(b));
union {
TypeParam simd;
char bytes[sizeof(TypeParam)];
} c;
c.simd = simd_xor(a, b);
const char expected = 0x33 ^ 0x55;
for (size_t i = 0; i < sizeof(c); i++) {
EXPECT_EQ(expected, c.bytes[i]);
}
}
TYPED_TEST(SimdUtilsTest, andnot1) {
const TypeParam zeroes = simd_zeroes();
const TypeParam ones = simd_ones();
TypeParam result;
result = simd_andnot(zeroes, ones);
EXPECT_FALSE(simd_diff(result, ones));
result = simd_andnot(ones, zeroes);
EXPECT_FALSE(simd_diff(result, zeroes));
result = simd_andnot(zeroes, zeroes);
EXPECT_FALSE(simd_diff(result, zeroes));
result = simd_andnot(ones, ones);
EXPECT_FALSE(simd_diff(result, zeroes));
}
TYPED_TEST(SimdUtilsTest, andnot2) {
TypeParam a, b;
memset(&a, 0x33, sizeof(a));
memset(&b, 0x55, sizeof(b));
union {
TypeParam simd;
char bytes[sizeof(TypeParam)];
} c;
c.simd = simd_andnot(a, b);
const char expected = ~0x33 & 0x55;
for (size_t i = 0; i < sizeof(c); i++) {
EXPECT_EQ(expected, c.bytes[i]);
}
}
TYPED_TEST(SimdUtilsTest, not1) {
const TypeParam zeroes = simd_zeroes();
const TypeParam ones = simd_ones();
TypeParam result;
result = simd_not(zeroes);
EXPECT_FALSE(simd_diff(result, ones));
result = simd_not(ones);
EXPECT_FALSE(simd_diff(result, zeroes));
}
TYPED_TEST(SimdUtilsTest, not2) {
TypeParam a;
memset(&a, 0x33, sizeof(a));
union {
TypeParam simd;
char bytes[sizeof(TypeParam)];
} c;
c.simd = simd_not(a);
const char expected = ~0x33;
for (size_t i = 0; i < sizeof(c); i++) {
EXPECT_EQ(expected, c.bytes[i]);
}
}
TYPED_TEST(SimdUtilsTest, isnonzero) {
TypeParam a = simd_zeroes();
EXPECT_FALSE(simd_isnonzero(a));
a = simd_ones();
EXPECT_TRUE(simd_isnonzero(a));
union {
TypeParam simd;
char bytes[sizeof(TypeParam)];
} c;
// Try every 1-bit case.
for (size_t i = 0; i < sizeof(a); i++) {
for (size_t j = 0; j < 8; j++) {
memset(&c.simd, 0, sizeof(c.simd));
c.bytes[i] = 1 << j;
EXPECT_TRUE(simd_isnonzero(c.simd));
}
}
}
TYPED_TEST(SimdUtilsTest, clearbit) {
const unsigned int total_bits = sizeof(TypeParam) * 8;
const TypeParam ones = simd_ones();
for (unsigned int i = 0; i < total_bits; i++) {
TypeParam a = simd_ones();
simd_clearbit(&a, i);
ASSERT_NE(0, simd_diff(a, ones)) << "bit " << i << " wasn't cleared";
TypeParam mask = setbit<TypeParam>(i);
ASSERT_EQ(0, simd_diff(ones, simd_or(a, mask)))
<< "clearing bit " << i << " caused collateral damage";
}
}
TYPED_TEST(SimdUtilsTest, testbit) {
const unsigned int total_bits = sizeof(TypeParam) * 8;
const TypeParam ones = simd_ones();
// First, all bits are on in 'ones'.
for (unsigned int i = 0; i < total_bits; i++) {
ASSERT_EQ(1, simd_testbit(ones, i)) << "bit " << i << " is on";
}
// Try individual bits; only 'i' should be on.
for (unsigned int i = 0; i < total_bits; i++) {
TypeParam a = setbit<TypeParam>(i);
for (unsigned int j = 0; j < total_bits; j++) {
ASSERT_EQ(i == j ? 1 : 0, simd_testbit(a, j)) << "bit " << i
<< " is wrong";
}
}
}
TYPED_TEST(SimdUtilsTest, setbit) {
const unsigned int total_bits = sizeof(TypeParam) * 8;
// Try individual bits; only 'i' should be on.
for (unsigned int i = 0; i < total_bits; i++) {
TypeParam a = setbit<TypeParam>(i);
TypeParam x = simd_zeroes();
simd_setbit(&x, i);
ASSERT_FALSE(simd_diff(a, x));
}
TypeParam a = simd_zeroes();
// turn on all bits
for (unsigned int i = 0; i < total_bits; i++) {
simd_setbit(&a, i);
}
ASSERT_FALSE(simd_diff(simd_ones(), a));
}
TYPED_TEST(SimdUtilsTest, diffrich) {
const unsigned total_bits = sizeof(TypeParam) * 8;
const TypeParam zeroes = simd_zeroes();
const TypeParam ones = simd_ones();
// Test identical cases
EXPECT_EQ(0U, simd_diffrich(zeroes, zeroes));
EXPECT_EQ(0U, simd_diffrich(ones, ones));
for (unsigned i = 0; i < total_bits; i++) {
TypeParam a = setbit<TypeParam>(i);
TypeParam b = setbit<TypeParam>(i);
EXPECT_EQ(0U, simd_diffrich(a, b));
}
// and nothing is on in zeroes
for (unsigned int i = 0; i < total_bits; i++) {
ASSERT_EQ(0, simd_testbit(zeroes, i)) << "bit " << i << " is off";
}
// All-zeroes and all-ones differ in all words
EXPECT_EQ((1U << (total_bits / 32)) - 1, simd_diffrich(zeroes, ones));
// Cases that differ in one 32-bit word
for (unsigned i = 0; i < total_bits; i++) {
TypeParam a = setbit<TypeParam>(i);
u32 rv = simd_diffrich(zeroes, a);
EXPECT_EQ(1U << i / 32, rv);
}
}
TYPED_TEST(SimdUtilsTest, diffrich64) {
const unsigned total_bits = sizeof(TypeParam) * 8;
const TypeParam zeroes = simd_zeroes();
const TypeParam ones = simd_ones();
// Test identical cases
EXPECT_EQ(0U, simd_diffrich64(zeroes, zeroes));
EXPECT_EQ(0U, simd_diffrich64(ones, ones));
for (unsigned i = 0; i < total_bits; i++) {
TypeParam a = setbit<TypeParam>(i);
TypeParam b = setbit<TypeParam>(i);
EXPECT_EQ(0U, simd_diffrich64(a, b));
}
// All-zeroes and all-ones differ in all words, which will result in every
// second bit being on.
EXPECT_EQ(((1U << (total_bits / 32)) - 1) & 0x55555555u,
simd_diffrich64(zeroes, ones));
// Cases that differ in one 64-bit word
for (unsigned i = 0; i < total_bits; i++) {
TypeParam a = setbit<TypeParam>(i);
u32 rv = simd_diffrich64(zeroes, a);
EXPECT_EQ(1U << ((i / 64) * 2), rv);
}
}
// Unaligned load
TYPED_TEST(SimdUtilsTest, loadu) {
const TypeParam ones = simd_ones();
const size_t mem_len = sizeof(ones) * 2;
unique_ptr<char[]> mem_array = std::make_unique<char[]>(mem_len);
char *mem = mem_array.get();
for (size_t offset = 1; offset < sizeof(ones); offset++) {
memset(mem, 0, mem_len);
memset(mem + offset, 0xff, sizeof(ones));
TypeParam a;
simd_loadu(&a, mem + offset);
ASSERT_EQ(0, simd_diff(a, ones));
}
}
// Aligned load and store
TYPED_TEST(SimdUtilsTest, load_store) {
union {
TypeParam simd;
char bytes[sizeof(TypeParam)];
} a;
for (size_t i = 0; i < sizeof(a); i++) {
a.bytes[i] = (char)(i % 256);
}
auto mem_ptr = make_bytecode_ptr<char>(sizeof(a), alignof(TypeParam));
char *mem = mem_ptr.get();
ASSERT_EQ(0, (size_t)mem % 16U);
memset(mem, 0, sizeof(a));
simd_store(mem, a.simd);
ASSERT_EQ(0, memcmp(mem, a.bytes, sizeof(a)));
TypeParam b;
simd_load(&b, mem);
ASSERT_FALSE(simd_diff(a.simd, b));
}
// Packed load and store
TYPED_TEST(SimdUtilsTest, loadbytes_storebytes) {
union {
TypeParam simd;
char bytes[sizeof(TypeParam)];
} a;
for (size_t i = 0; i < sizeof(a); i++) {
a.bytes[i] = (char)(i % 256);
}
char mem[sizeof(TypeParam)];
for (size_t i = 1; i < sizeof(TypeParam); i++) {
memset(mem, 0xff, sizeof(TypeParam));
simd_storebytes(mem, a.simd, i);
union {
TypeParam simd;
char bytes[sizeof(TypeParam)];
} b;
simd_loadbytes(&b.simd, mem, i);
// First i bytes should match a, remaining bytes are zero. (Note that
// this takes endianness into account)
for (size_t j = 0; j < sizeof(TypeParam); j++) {
size_t idx = j;
ASSERT_EQ(j < i ? a.bytes[idx] : 0, b.bytes[idx]);
}
}
}
TYPED_TEST(SimdUtilsTest, lshift64) {
TypeParam a;
memset(&a, 0x5a, sizeof(a));
static constexpr u64a exp_val = 0x5a5a5a5a5a5a5a5aULL;
union {
TypeParam simd;
u64a qword[sizeof(TypeParam) / 8];
} c;
for (unsigned s = 0; s < 64; s++) {
c.simd = simd_lshift64(a, s);
const u64a expected = exp_val << s;
for (size_t i = 0; i < sizeof(c) / 8; i++) {
EXPECT_EQ(expected, c.qword[i]);
}
}
/* Clang 3.4 on FreeBSD 10 crashes on the following - disable for now */
#if !(defined(__FreeBSD__) && defined(__clang__) && __clang_major__ == 3)
// test immediates
u64a expected;
c.simd = simd_lshift64(a, 1);
expected = exp_val << 1;
for (size_t i = 0; i < sizeof(c) / 8; i++) {
EXPECT_EQ(expected, c.qword[i]);
}
c.simd = simd_lshift64(a, 2);
expected = exp_val << 2;
for (size_t i = 0; i < sizeof(c) / 8; i++) {
EXPECT_EQ(expected, c.qword[i]);
}
c.simd = simd_lshift64(a, 7);
expected = exp_val << 7;
for (size_t i = 0; i < sizeof(c) / 8; i++) {
EXPECT_EQ(expected, c.qword[i]);
}
c.simd = simd_lshift64(a, 31);
expected = exp_val << 31;
for (size_t i = 0; i < sizeof(c) / 8; i++) {
EXPECT_EQ(expected, c.qword[i]);
}
#endif
}
TEST(SimdUtilsTest, alignment) {
ASSERT_EQ(16, alignof(m128));
ASSERT_EQ(32, alignof(m256));
ASSERT_EQ(16, alignof(m384));
ASSERT_EQ(64, alignof(m512));
}
TEST(SimdUtilsTest, movq) {
m128 simd;
simd = ones128();
u64a r = movq(simd);
ASSERT_EQ((u64a)(~0), r);
char cmp[sizeof(m128)];
memset(cmp, 0x80, sizeof(m128));
simd = set1_16x8(0x80);
r = movq(simd);
ASSERT_EQ(0, memcmp(cmp, &simd, sizeof(simd)));
ASSERT_EQ(0, memcmp(cmp, &r, sizeof(r)));
#if defined(HAVE_SIMD_128_BITS)
#if defined(ARCH_IA32) || defined(ARCH_X86_64)
simd = _mm_set_epi64x(~0LL, 0x123456789abcdef);
#elif defined(ARCH_ARM32) || defined(ARCH_AARCH64)
int64x2_t a = { 0x123456789abcdefLL, ~0LL };
simd = vreinterpretq_s32_s64(a);
#elif defined(ARCH_PPC64EL)
int64x2_t a = {0x123456789abcdefLL, ~0LL };
simd = (m128) a;
#endif
#endif
r = movq(simd);
ASSERT_EQ(r, 0x123456789abcdef);
}
TEST(SimdUtilsTest, set1_16x8) {
char cmp[sizeof(m128)];
for (unsigned i = 0; i < 256; i++) {
m128 simd = set1_16x8(i);
memset(cmp, i, sizeof(simd));
ASSERT_EQ(0, memcmp(cmp, &simd, sizeof(simd)));
}
}
TEST(SimdUtilsTest, set1_4x32) {
u32 cmp[4] = { 0x12345678, 0x12345678, 0x12345678, 0x12345678 };
m128 simd = set1_4x32(cmp[0]);
ASSERT_EQ(0, memcmp(cmp, &simd, sizeof(simd)));
}
#if defined(HAVE_SIMD_256_BITS)
TEST(SimdUtilsTest, set32x8) {
char cmp[sizeof(m256)];
for (unsigned i = 0; i < 256; i++) {
m256 simd = set1_32x8(i);
memset(cmp, i, sizeof(simd));
ASSERT_EQ(0, memcmp(cmp, &simd, sizeof(simd)));
}
}
TEST(SimdUtilsTest, set2x128) {
char cmp[sizeof(m256)];
for (unsigned i = 0; i < 256; i++) {
m128 x = set1_16x8(i);
m256 y = set1_32x8(i);
m256 z = set1_2x128(x);
memset(cmp, i, sizeof(z));
ASSERT_EQ(0, memcmp(cmp, &z, sizeof(z)));
ASSERT_EQ(0, memcmp(&y, &z, sizeof(z)));
}
}
#endif
TEST(SimdUtilsTest, variableByteShift128) {
char base[] = "0123456789ABCDEF";
m128 in = loadu128(base);
EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 0),
variable_byte_shift_m128(in, 0)));
EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 1),
variable_byte_shift_m128(in, -1)));
EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 2),
variable_byte_shift_m128(in, -2)));
EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 3),
variable_byte_shift_m128(in, -3)));
EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 4),
variable_byte_shift_m128(in, -4)));
EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 5),
variable_byte_shift_m128(in, -5)));
EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 6),
variable_byte_shift_m128(in, -6)));
EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 7),
variable_byte_shift_m128(in, -7)));
EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 8),
variable_byte_shift_m128(in, -8)));
EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 9),
variable_byte_shift_m128(in, -9)));
EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 10),
variable_byte_shift_m128(in, -10)));
EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 0),
variable_byte_shift_m128(in, 0)));
EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 1),
variable_byte_shift_m128(in, 1)));
EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 2),
variable_byte_shift_m128(in, 2)));
EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 3),
variable_byte_shift_m128(in, 3)));
EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 4),
variable_byte_shift_m128(in, 4)));
EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 5),
variable_byte_shift_m128(in, 5)));
EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 6),
variable_byte_shift_m128(in, 6)));
EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 7),
variable_byte_shift_m128(in, 7)));
EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 8),
variable_byte_shift_m128(in, 8)));
EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 9),
variable_byte_shift_m128(in, 9)));
EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 10),
variable_byte_shift_m128(in, 10)));
EXPECT_TRUE(!diff128(zeroes128(), variable_byte_shift_m128(in, 16)));
EXPECT_TRUE(!diff128(zeroes128(), variable_byte_shift_m128(in, -16)));
}
TEST(SimdUtilsTest, max_u8_m128) {
char base1[] = "0123456789ABCDE\xfe";
char base2[] = "!!23455889aBCd\xff\xff";
char expec[] = "0123456889aBCd\xff\xff";
m128 in1 = loadu128(base1);
m128 in2 = loadu128(base2);
m128 result = max_u8_m128(in1, in2);
EXPECT_TRUE(!diff128(result, loadu128(expec)));
}
TEST(SimdUtilsTest, min_u8_m128) {
char base1[] = "0123456789ABCDE\xfe";
char base2[] = "!!23455889aBCd\xff\xff";
char expec[] = "!!23455789ABCDE\xfe";
m128 in1 = loadu128(base1);
m128 in2 = loadu128(base2);
m128 result = min_u8_m128(in1, in2);
EXPECT_TRUE(!diff128(result, loadu128(expec)));
}
TEST(SimdUtilsTest, sadd_u8_m128) {
unsigned char base1[] = {0, 0x80, 0xff, 'A', '1', '2', '3', '4',
'1', '2', '3', '4', '1', '2', '3', '4'};
unsigned char base2[] = {'a', 0x80, 'b', 'A', 0x10, 0x10, 0x10, 0x10,
0x30, 0x30, 0x30, 0x30, 0, 0, 0, 0};
unsigned char expec[] = {'a', 0xff, 0xff, 0x82, 'A', 'B', 'C', 'D',
'a', 'b', 'c', 'd', '1', '2', '3', '4'};
m128 in1 = loadu128(base1);
m128 in2 = loadu128(base2);
m128 result = sadd_u8_m128(in1, in2);
EXPECT_TRUE(!diff128(result, loadu128(expec)));
}
TEST(SimdUtilsTest, sub_u8_m128) {
unsigned char base1[] = {'a', 0xff, 0xff, 0x82, 'A', 'B', 'C', 'D',
'a', 'b', 'c', 'd', '1', '2', '3', '4'};
unsigned char base2[] = {0, 0x80, 0xff, 'A', '1', '2', '3', '4',
'1', '2', '3', '4', '1', '2', '3', '4'};
unsigned char expec[] = {'a', 0x7f, 0, 'A', 0x10, 0x10, 0x10, 0x10,
0x30, 0x30, 0x30, 0x30, 0, 0, 0, 0};
m128 in1 = loadu128(base1);
m128 in2 = loadu128(base2);
m128 result = sub_u8_m128(in1, in2);
EXPECT_TRUE(!diff128(result, loadu128(expec)));
}
TEST(SimdUtilsTest, load_m128_from_u64a) {
srand (time(NULL));
u64a tmp = rand();
m128 res = load_m128_from_u64a(&tmp);
m128 cmp = set2x64(0LL, tmp);
//print_m128_16x8("res",res);
//print_m128_16x8("cmp",cmp);
EXPECT_TRUE(!diff128(res, cmp));
}
TEST(SimdUtilsTest, movemask_128) {
srand (time(NULL));
u8 vec[16] = {0};
u8 vec2[16] = {0};
u16 r = rand() % 100 + 1;
for(int i=0; i<16; i++) {
if (r & (1 << i)) {
vec[i] = 0xff;
}
}
m128 v = loadu128(vec);
u16 mask = movemask128(v);
for(int i=0; i<16; i++) {
if (mask & (1 << i)) {
vec2[i] = 0xff;
}
}
for (int i=0; i<16; i++) {
ASSERT_EQ(vec[i],vec2[i]);
}
}
TEST(SimdUtilsTest, pshufb_m128) {
srand (time(NULL));
u8 vec[16];
for (int i=0; i<16; i++) {
vec[i] = rand() % 1000 + 1;
}
u8 vec2[16];
for (int i=0; i<16; i++) {
vec2[i]=i + (rand() % 100 + 0);
}
// On Intel, if bit 0x80 is set, then result is zero, otherwise which the lane it is &0xf.
// In NEON or PPC, if >=16, then the result is zero, otherwise it is that lane.
// Thus bellow we have to check that case to NEON or PPC.
//Insure that vec3 has at least 1 or more 0x80 elements
u8 vec3[16] = {0};
vec3[15] = 0x80;
for (int i=0; i<15; i++) {
int l = rand() % 1000 + 0;
if (l % 16 ==0){
vec3[i]= 0x80;
} else{
vec3[i]= vec2[i];
}
}
/*
printf("vec3: ");
for(int i=15; i>=0; i--) { printf("%02x, ", vec3[i]); }
printf("\n");
*/
//Test Special Case
m128 v1 = loadu128(vec);
m128 v2 = loadu128(vec3);
m128 vres = pshufb_m128(v1, v2);
u8 res[16];
storeu128(res, vres);
for (int i=0; i<16; i++) {
if(vec3[i] & 0x80){
ASSERT_EQ(res[i], 0);
}else{
ASSERT_EQ(vec[vec3[i] % 16 ], res[i]);
}
}
//Test Other Cases
v1 = loadu128(vec);
v2 = loadu128(vec2);
vres = pshufb_m128(v1, v2);
storeu128(res, vres);
for (int i=0; i<16; i++) {
if(vec2[i] & 0x80){
ASSERT_EQ(res[i], 0);
}else{
ASSERT_EQ(vec[vec2[i] % 16 ], res[i]);
}
}
}
/*Define ALIGNR128 macro*/
#define TEST_ALIGNR128(v1, v2, buf, l) { \
m128 v_aligned = palignr(v2,v1, l); \
storeu128(res, v_aligned); \
for (size_t i=0; i<16; i++) { \
ASSERT_EQ(res[i], vec[i + l]); \
} \
}
TEST(SimdUtilsTest, Alignr128){
u8 vec[32];
u8 res[16];
for (int i=0; i<32; i++) {
vec[i]=i;
}
m128 v1 = loadu128(vec);
m128 v2 = loadu128(vec+16);
for (int j = 0; j<16; j++){
TEST_ALIGNR128(v1, v2, vec, j);
}
}
} // namespace