fix names, use own intrinsic instead of explicit _mm* ones

This commit is contained in:
Konstantinos Margaritis
2020-09-23 11:51:21 +03:00
parent f7a6b8934c
commit 5333467249
15 changed files with 137 additions and 137 deletions

View File

@@ -150,7 +150,7 @@ m128 loadcompressed128_32bit(const void *ptr, m128 mvec) {
u32 x[4] = { expand32(v[0], m[0]), expand32(v[1], m[1]),
expand32(v[2], m[2]), expand32(v[3], m[3]) };
return _mm_set_epi32(x[3], x[2], x[1], x[0]);
return set32x4(x[3], x[2], x[1], x[0]);
}
#endif
@@ -158,7 +158,7 @@ m128 loadcompressed128_32bit(const void *ptr, m128 mvec) {
static really_inline
m128 loadcompressed128_64bit(const void *ptr, m128 mvec) {
// First, decompose our vectors into 64-bit chunks.
u64a m[2] = { movq(mvec), movq(_mm_srli_si128(mvec, 8)) };
u64a m[2] = { movq(mvec), movq(rshiftbyte_m128(mvec, 8)) };
u32 bits[2] = { popcount64(m[0]), popcount64(m[1]) };
u64a v[2];
@@ -167,7 +167,7 @@ m128 loadcompressed128_64bit(const void *ptr, m128 mvec) {
u64a x[2] = { expand64(v[0], m[0]), expand64(v[1], m[1]) };
return _mm_set_epi64x(x[1], x[0]);
return set2x64(x[1], x[0]);
}
#endif
@@ -264,11 +264,11 @@ m256 loadcompressed256_32bit(const void *ptr, m256 mvec) {
expand32(v[6], m[6]), expand32(v[7], m[7]) };
#if !defined(HAVE_AVX2)
m256 xvec = { .lo = _mm_set_epi32(x[3], x[2], x[1], x[0]),
.hi = _mm_set_epi32(x[7], x[6], x[5], x[4]) };
m256 xvec = { .lo = set32x4(x[3], x[2], x[1], x[0]),
.hi = set32x4(x[7], x[6], x[5], x[4]) };
#else
m256 xvec = _mm256_set_epi32(x[7], x[6], x[5], x[4],
x[3], x[2], x[1], x[0]);
m256 xvec = set32x8(x[7], x[6], x[5], x[4],
x[3], x[2], x[1], x[0]);
#endif
return xvec;
}
@@ -291,10 +291,10 @@ m256 loadcompressed256_64bit(const void *ptr, m256 mvec) {
expand64(v[2], m[2]), expand64(v[3], m[3]) };
#if !defined(HAVE_AVX2)
m256 xvec = { .lo = _mm_set_epi64x(x[1], x[0]),
.hi = _mm_set_epi64x(x[3], x[2]) };
m256 xvec = { .lo = set2x64(x[1], x[0]),
.hi = set2x64(x[3], x[2]) };
#else
m256 xvec = _mm256_set_epi64x(x[3], x[2], x[1], x[0]);
m256 xvec = set4x64(x[3], x[2], x[1], x[0]);
#endif
return xvec;
}
@@ -402,9 +402,9 @@ m384 loadcompressed384_32bit(const void *ptr, m384 mvec) {
expand32(v[8], m[8]), expand32(v[9], m[9]),
expand32(v[10], m[10]), expand32(v[11], m[11]) };
m384 xvec = { .lo = _mm_set_epi32(x[3], x[2], x[1], x[0]),
.mid = _mm_set_epi32(x[7], x[6], x[5], x[4]),
.hi = _mm_set_epi32(x[11], x[10], x[9], x[8]) };
m384 xvec = { .lo = set32x4(x[3], x[2], x[1], x[0]),
.mid = set32x4(x[7], x[6], x[5], x[4]),
.hi = set32x4(x[11], x[10], x[9], x[8]) };
return xvec;
}
#endif
@@ -427,9 +427,9 @@ m384 loadcompressed384_64bit(const void *ptr, m384 mvec) {
expand64(v[2], m[2]), expand64(v[3], m[3]),
expand64(v[4], m[4]), expand64(v[5], m[5]) };
m384 xvec = { .lo = _mm_set_epi64x(x[1], x[0]),
.mid = _mm_set_epi64x(x[3], x[2]),
.hi = _mm_set_epi64x(x[5], x[4]) };
m384 xvec = { .lo = set2x64(x[1], x[0]),
.mid = set2x64(x[3], x[2]),
.hi = set2x64(x[5], x[4]) };
return xvec;
}
#endif
@@ -548,20 +548,20 @@ m512 loadcompressed512_32bit(const void *ptr, m512 mvec) {
m512 xvec;
#if defined(HAVE_AVX512)
xvec = _mm512_set_epi32(x[15], x[14], x[13], x[12],
x[11], x[10], x[9], x[8],
x[7], x[6], x[5], x[4],
x[3], x[2], x[1], x[0]);
xvec = set32x16(x[15], x[14], x[13], x[12],
x[11], x[10], x[9], x[8],
x[7], x[6], x[5], x[4],
x[3], x[2], x[1], x[0]);
#elif defined(HAVE_AVX2)
xvec.lo = _mm256_set_epi32(x[7], x[6], x[5], x[4],
x[3], x[2], x[1], x[0]);
xvec.hi = _mm256_set_epi32(x[15], x[14], x[13], x[12],
x[11], x[10], x[9], x[8]);
xvec.lo = set32x8(x[7], x[6], x[5], x[4],
x[3], x[2], x[1], x[0]);
xvec.hi = set32x8(x[15], x[14], x[13], x[12],
x[11], x[10], x[9], x[8]);
#else
xvec.lo.lo = _mm_set_epi32(x[3], x[2], x[1], x[0]);
xvec.lo.hi = _mm_set_epi32(x[7], x[6], x[5], x[4]);
xvec.hi.lo = _mm_set_epi32(x[11], x[10], x[9], x[8]);
xvec.hi.hi = _mm_set_epi32(x[15], x[14], x[13], x[12]);
xvec.lo.lo = set32x4(x[3], x[2], x[1], x[0]);
xvec.lo.hi = set32x4(x[7], x[6], x[5], x[4]);
xvec.hi.lo = set32x4(x[11], x[10], x[9], x[8]);
xvec.hi.hi = set32x4(x[15], x[14], x[13], x[12]);
#endif
return xvec;
}
@@ -588,16 +588,16 @@ m512 loadcompressed512_64bit(const void *ptr, m512 mvec) {
expand64(v[6], m[6]), expand64(v[7], m[7]) };
#if defined(HAVE_AVX512)
m512 xvec = _mm512_set_epi64(x[7], x[6], x[5], x[4],
m512 xvec = set64x8(x[7], x[6], x[5], x[4],
x[3], x[2], x[1], x[0]);
#elif defined(HAVE_AVX2)
m512 xvec = { .lo = _mm256_set_epi64x(x[3], x[2], x[1], x[0]),
.hi = _mm256_set_epi64x(x[7], x[6], x[5], x[4])};
m512 xvec = { .lo = set4x64(x[3], x[2], x[1], x[0]),
.hi = set4x64(x[7], x[6], x[5], x[4])};
#else
m512 xvec = { .lo = { _mm_set_epi64x(x[1], x[0]),
_mm_set_epi64x(x[3], x[2]) },
.hi = { _mm_set_epi64x(x[5], x[4]),
_mm_set_epi64x(x[7], x[6]) } };
m512 xvec = { .lo = { set2x64(x[1], x[0]),
set2x64(x[3], x[2]) },
.hi = { set2x64(x[5], x[4]),
set2x64(x[7], x[6]) } };
#endif
return xvec;
}