mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-09-29 19:24:25 +03:00
fix names, use own intrinsic instead of explicit _mm* ones
This commit is contained in:
@@ -150,7 +150,7 @@ m128 loadcompressed128_32bit(const void *ptr, m128 mvec) {
|
||||
u32 x[4] = { expand32(v[0], m[0]), expand32(v[1], m[1]),
|
||||
expand32(v[2], m[2]), expand32(v[3], m[3]) };
|
||||
|
||||
return _mm_set_epi32(x[3], x[2], x[1], x[0]);
|
||||
return set32x4(x[3], x[2], x[1], x[0]);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -158,7 +158,7 @@ m128 loadcompressed128_32bit(const void *ptr, m128 mvec) {
|
||||
static really_inline
|
||||
m128 loadcompressed128_64bit(const void *ptr, m128 mvec) {
|
||||
// First, decompose our vectors into 64-bit chunks.
|
||||
u64a m[2] = { movq(mvec), movq(_mm_srli_si128(mvec, 8)) };
|
||||
u64a m[2] = { movq(mvec), movq(rshiftbyte_m128(mvec, 8)) };
|
||||
|
||||
u32 bits[2] = { popcount64(m[0]), popcount64(m[1]) };
|
||||
u64a v[2];
|
||||
@@ -167,7 +167,7 @@ m128 loadcompressed128_64bit(const void *ptr, m128 mvec) {
|
||||
|
||||
u64a x[2] = { expand64(v[0], m[0]), expand64(v[1], m[1]) };
|
||||
|
||||
return _mm_set_epi64x(x[1], x[0]);
|
||||
return set2x64(x[1], x[0]);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -264,11 +264,11 @@ m256 loadcompressed256_32bit(const void *ptr, m256 mvec) {
|
||||
expand32(v[6], m[6]), expand32(v[7], m[7]) };
|
||||
|
||||
#if !defined(HAVE_AVX2)
|
||||
m256 xvec = { .lo = _mm_set_epi32(x[3], x[2], x[1], x[0]),
|
||||
.hi = _mm_set_epi32(x[7], x[6], x[5], x[4]) };
|
||||
m256 xvec = { .lo = set32x4(x[3], x[2], x[1], x[0]),
|
||||
.hi = set32x4(x[7], x[6], x[5], x[4]) };
|
||||
#else
|
||||
m256 xvec = _mm256_set_epi32(x[7], x[6], x[5], x[4],
|
||||
x[3], x[2], x[1], x[0]);
|
||||
m256 xvec = set32x8(x[7], x[6], x[5], x[4],
|
||||
x[3], x[2], x[1], x[0]);
|
||||
#endif
|
||||
return xvec;
|
||||
}
|
||||
@@ -291,10 +291,10 @@ m256 loadcompressed256_64bit(const void *ptr, m256 mvec) {
|
||||
expand64(v[2], m[2]), expand64(v[3], m[3]) };
|
||||
|
||||
#if !defined(HAVE_AVX2)
|
||||
m256 xvec = { .lo = _mm_set_epi64x(x[1], x[0]),
|
||||
.hi = _mm_set_epi64x(x[3], x[2]) };
|
||||
m256 xvec = { .lo = set2x64(x[1], x[0]),
|
||||
.hi = set2x64(x[3], x[2]) };
|
||||
#else
|
||||
m256 xvec = _mm256_set_epi64x(x[3], x[2], x[1], x[0]);
|
||||
m256 xvec = set4x64(x[3], x[2], x[1], x[0]);
|
||||
#endif
|
||||
return xvec;
|
||||
}
|
||||
@@ -402,9 +402,9 @@ m384 loadcompressed384_32bit(const void *ptr, m384 mvec) {
|
||||
expand32(v[8], m[8]), expand32(v[9], m[9]),
|
||||
expand32(v[10], m[10]), expand32(v[11], m[11]) };
|
||||
|
||||
m384 xvec = { .lo = _mm_set_epi32(x[3], x[2], x[1], x[0]),
|
||||
.mid = _mm_set_epi32(x[7], x[6], x[5], x[4]),
|
||||
.hi = _mm_set_epi32(x[11], x[10], x[9], x[8]) };
|
||||
m384 xvec = { .lo = set32x4(x[3], x[2], x[1], x[0]),
|
||||
.mid = set32x4(x[7], x[6], x[5], x[4]),
|
||||
.hi = set32x4(x[11], x[10], x[9], x[8]) };
|
||||
return xvec;
|
||||
}
|
||||
#endif
|
||||
@@ -427,9 +427,9 @@ m384 loadcompressed384_64bit(const void *ptr, m384 mvec) {
|
||||
expand64(v[2], m[2]), expand64(v[3], m[3]),
|
||||
expand64(v[4], m[4]), expand64(v[5], m[5]) };
|
||||
|
||||
m384 xvec = { .lo = _mm_set_epi64x(x[1], x[0]),
|
||||
.mid = _mm_set_epi64x(x[3], x[2]),
|
||||
.hi = _mm_set_epi64x(x[5], x[4]) };
|
||||
m384 xvec = { .lo = set2x64(x[1], x[0]),
|
||||
.mid = set2x64(x[3], x[2]),
|
||||
.hi = set2x64(x[5], x[4]) };
|
||||
return xvec;
|
||||
}
|
||||
#endif
|
||||
@@ -548,20 +548,20 @@ m512 loadcompressed512_32bit(const void *ptr, m512 mvec) {
|
||||
|
||||
m512 xvec;
|
||||
#if defined(HAVE_AVX512)
|
||||
xvec = _mm512_set_epi32(x[15], x[14], x[13], x[12],
|
||||
x[11], x[10], x[9], x[8],
|
||||
x[7], x[6], x[5], x[4],
|
||||
x[3], x[2], x[1], x[0]);
|
||||
xvec = set32x16(x[15], x[14], x[13], x[12],
|
||||
x[11], x[10], x[9], x[8],
|
||||
x[7], x[6], x[5], x[4],
|
||||
x[3], x[2], x[1], x[0]);
|
||||
#elif defined(HAVE_AVX2)
|
||||
xvec.lo = _mm256_set_epi32(x[7], x[6], x[5], x[4],
|
||||
x[3], x[2], x[1], x[0]);
|
||||
xvec.hi = _mm256_set_epi32(x[15], x[14], x[13], x[12],
|
||||
x[11], x[10], x[9], x[8]);
|
||||
xvec.lo = set32x8(x[7], x[6], x[5], x[4],
|
||||
x[3], x[2], x[1], x[0]);
|
||||
xvec.hi = set32x8(x[15], x[14], x[13], x[12],
|
||||
x[11], x[10], x[9], x[8]);
|
||||
#else
|
||||
xvec.lo.lo = _mm_set_epi32(x[3], x[2], x[1], x[0]);
|
||||
xvec.lo.hi = _mm_set_epi32(x[7], x[6], x[5], x[4]);
|
||||
xvec.hi.lo = _mm_set_epi32(x[11], x[10], x[9], x[8]);
|
||||
xvec.hi.hi = _mm_set_epi32(x[15], x[14], x[13], x[12]);
|
||||
xvec.lo.lo = set32x4(x[3], x[2], x[1], x[0]);
|
||||
xvec.lo.hi = set32x4(x[7], x[6], x[5], x[4]);
|
||||
xvec.hi.lo = set32x4(x[11], x[10], x[9], x[8]);
|
||||
xvec.hi.hi = set32x4(x[15], x[14], x[13], x[12]);
|
||||
#endif
|
||||
return xvec;
|
||||
}
|
||||
@@ -588,16 +588,16 @@ m512 loadcompressed512_64bit(const void *ptr, m512 mvec) {
|
||||
expand64(v[6], m[6]), expand64(v[7], m[7]) };
|
||||
|
||||
#if defined(HAVE_AVX512)
|
||||
m512 xvec = _mm512_set_epi64(x[7], x[6], x[5], x[4],
|
||||
m512 xvec = set64x8(x[7], x[6], x[5], x[4],
|
||||
x[3], x[2], x[1], x[0]);
|
||||
#elif defined(HAVE_AVX2)
|
||||
m512 xvec = { .lo = _mm256_set_epi64x(x[3], x[2], x[1], x[0]),
|
||||
.hi = _mm256_set_epi64x(x[7], x[6], x[5], x[4])};
|
||||
m512 xvec = { .lo = set4x64(x[3], x[2], x[1], x[0]),
|
||||
.hi = set4x64(x[7], x[6], x[5], x[4])};
|
||||
#else
|
||||
m512 xvec = { .lo = { _mm_set_epi64x(x[1], x[0]),
|
||||
_mm_set_epi64x(x[3], x[2]) },
|
||||
.hi = { _mm_set_epi64x(x[5], x[4]),
|
||||
_mm_set_epi64x(x[7], x[6]) } };
|
||||
m512 xvec = { .lo = { set2x64(x[1], x[0]),
|
||||
set2x64(x[3], x[2]) },
|
||||
.hi = { set2x64(x[5], x[4]),
|
||||
set2x64(x[7], x[6]) } };
|
||||
#endif
|
||||
return xvec;
|
||||
}
|
||||
|
Reference in New Issue
Block a user