mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
fix unit-internal release builds using __builtin_constant_p() as well
This commit is contained in:
parent
00384c9e37
commit
7ceca78db4
@ -520,16 +520,18 @@ really_inline SuperVector<16> SuperVector<16>::loadu_maskz(void const *ptr, uint
|
|||||||
return mask & v;
|
return mask & v;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HS_OPTIMIZE
|
|
||||||
template<>
|
template<>
|
||||||
really_inline SuperVector<16> SuperVector<16>::alignr(SuperVector<16> &other, int8_t offset)
|
really_inline SuperVector<16> SuperVector<16>::alignr(SuperVector<16> &other, int8_t offset)
|
||||||
{
|
{
|
||||||
|
#if defined(HAVE__BUILTIN_CONSTANT_P)
|
||||||
|
if (__builtin_constant_p(offset)) {
|
||||||
|
if (offset == 16) {
|
||||||
|
return *this;
|
||||||
|
} else {
|
||||||
return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], offset)};
|
return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], offset)};
|
||||||
}
|
}
|
||||||
#else
|
}
|
||||||
template<>
|
#endif
|
||||||
really_inline SuperVector<16> SuperVector<16>::alignr(SuperVector<16> &other, int8_t offset)
|
|
||||||
{
|
|
||||||
switch(offset) {
|
switch(offset) {
|
||||||
case 0: return other; break;
|
case 0: return other; break;
|
||||||
case 1: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 1)}; break;
|
case 1: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 1)}; break;
|
||||||
@ -551,7 +553,6 @@ really_inline SuperVector<16> SuperVector<16>::alignr(SuperVector<16> &other, in
|
|||||||
}
|
}
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
template<>
|
template<>
|
||||||
@ -1037,10 +1038,11 @@ really_inline SuperVector<32> SuperVector<32>::vshr(uint8_t const N) const
|
|||||||
return vshr_256(N);
|
return vshr_256(N);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HS_OPTIMIZE
|
|
||||||
template <>
|
template <>
|
||||||
really_inline SuperVector<32> SuperVector<32>::operator>>(uint8_t const N) const
|
really_inline SuperVector<32> SuperVector<32>::operator>>(uint8_t const N) const
|
||||||
{
|
{
|
||||||
|
#if defined(HAVE__BUILTIN_CONSTANT_P)
|
||||||
|
if (__builtin_constant_p(N)) {
|
||||||
// As found here: https://stackoverflow.com/questions/25248766/emulating-shifts-on-32-bytes-with-avx
|
// As found here: https://stackoverflow.com/questions/25248766/emulating-shifts-on-32-bytes-with-avx
|
||||||
if (N < 16) {
|
if (N < 16) {
|
||||||
return {_mm256_alignr_epi8(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)), u.v256[0], N)};
|
return {_mm256_alignr_epi8(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)), u.v256[0], N)};
|
||||||
@ -1050,18 +1052,15 @@ really_inline SuperVector<32> SuperVector<32>::operator>>(uint8_t const N) const
|
|||||||
return {_mm256_srli_si256(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)), N - 16)};
|
return {_mm256_srli_si256(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)), N - 16)};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
#endif
|
||||||
template <>
|
|
||||||
really_inline SuperVector<32> SuperVector<32>::operator>>(uint8_t const N) const
|
|
||||||
{
|
|
||||||
return vshr_256(N);
|
return vshr_256(N);
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef HS_OPTIMIZE
|
|
||||||
template <>
|
template <>
|
||||||
really_inline SuperVector<32> SuperVector<32>::operator<<(uint8_t const N) const
|
really_inline SuperVector<32> SuperVector<32>::operator<<(uint8_t const N) const
|
||||||
{
|
{
|
||||||
|
#if defined(HAVE__BUILTIN_CONSTANT_P)
|
||||||
|
if (__builtin_constant_p(N)) {
|
||||||
// As found here: https://stackoverflow.com/questions/25248766/emulating-shifts-on-32-bytes-with-avx
|
// As found here: https://stackoverflow.com/questions/25248766/emulating-shifts-on-32-bytes-with-avx
|
||||||
if (N < 16) {
|
if (N < 16) {
|
||||||
return {_mm256_alignr_epi8(u.v256[0], _mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), 16 - N)};
|
return {_mm256_alignr_epi8(u.v256[0], _mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), 16 - N)};
|
||||||
@ -1071,13 +1070,9 @@ really_inline SuperVector<32> SuperVector<32>::operator<<(uint8_t const N) const
|
|||||||
return {_mm256_slli_si256(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), N - 16)};
|
return {_mm256_slli_si256(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), N - 16)};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
#endif
|
||||||
template <>
|
|
||||||
really_inline SuperVector<32> SuperVector<32>::operator<<(uint8_t const N) const
|
|
||||||
{
|
|
||||||
return vshl_256(N);
|
return vshl_256(N);
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
really_inline SuperVector<32> SuperVector<32>::Ones_vshr(uint8_t const N)
|
really_inline SuperVector<32> SuperVector<32>::Ones_vshr(uint8_t const N)
|
||||||
@ -1132,16 +1127,18 @@ really_inline SuperVector<32> SuperVector<32>::loadu_maskz(void const *ptr, uint
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HS_OPTIMIZE
|
|
||||||
template<>
|
template<>
|
||||||
really_inline SuperVector<32> SuperVector<32>::alignr(SuperVector<32> &other, int8_t offset)
|
really_inline SuperVector<32> SuperVector<32>::alignr(SuperVector<32> &other, int8_t offset)
|
||||||
{
|
{
|
||||||
|
#if defined(HAVE__BUILTIN_CONSTANT_P)
|
||||||
|
if (__builtin_constant_p(offset)) {
|
||||||
|
if (offset == 16) {
|
||||||
|
return *this;
|
||||||
|
} else {
|
||||||
return {_mm256_alignr_epi8(u.v256[0], other.u.v256[0], offset)};
|
return {_mm256_alignr_epi8(u.v256[0], other.u.v256[0], offset)};
|
||||||
}
|
}
|
||||||
#else
|
}
|
||||||
template<>
|
#endif
|
||||||
really_inline SuperVector<32> SuperVector<32>::alignr(SuperVector<32> &other, int8_t offset)
|
|
||||||
{
|
|
||||||
// As found here: https://stackoverflow.com/questions/8517970/mm-alignr-epi8-palignr-equivalent-in-avx2#8637458
|
// As found here: https://stackoverflow.com/questions/8517970/mm-alignr-epi8-palignr-equivalent-in-avx2#8637458
|
||||||
switch (offset){
|
switch (offset){
|
||||||
case 0 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 0), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 0)); break;
|
case 0 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 0), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 0)); break;
|
||||||
@ -1180,7 +1177,6 @@ really_inline SuperVector<32> SuperVector<32>::alignr(SuperVector<32> &other, in
|
|||||||
}
|
}
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
template<>
|
template<>
|
||||||
@ -1772,16 +1768,18 @@ really_inline SuperVector<64> SuperVector<64>::pshufb_maskz(SuperVector<64> b, u
|
|||||||
return {_mm512_maskz_shuffle_epi8(mask, u.v512[0], b.u.v512[0])};
|
return {_mm512_maskz_shuffle_epi8(mask, u.v512[0], b.u.v512[0])};
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HS_OPTIMIZE
|
|
||||||
template<>
|
template<>
|
||||||
really_inline SuperVector<64> SuperVector<64>::alignr(SuperVector<64> &l, int8_t offset)
|
really_inline SuperVector<64> SuperVector<64>::alignr(SuperVector<64> &l, int8_t offset)
|
||||||
{
|
{
|
||||||
|
#if defined(HAVE__BUILTIN_CONSTANT_P)
|
||||||
|
if (__builtin_constant_p(offset)) {
|
||||||
|
if (offset == 16) {
|
||||||
|
return *this;
|
||||||
|
} else {
|
||||||
return {_mm512_alignr_epi8(u.v512[0], l.u.v512[0], offset)};
|
return {_mm512_alignr_epi8(u.v512[0], l.u.v512[0], offset)};
|
||||||
}
|
}
|
||||||
#else
|
}
|
||||||
template<>
|
#endif
|
||||||
really_inline SuperVector<64> SuperVector<64>::alignr(SuperVector<64> &l, int8_t offset)
|
|
||||||
{
|
|
||||||
if(offset == 0) {
|
if(offset == 0) {
|
||||||
return *this;
|
return *this;
|
||||||
} else if (offset < 32){
|
} else if (offset < 32){
|
||||||
@ -1802,7 +1800,6 @@ really_inline SuperVector<64> SuperVector<64>::alignr(SuperVector<64> &l, int8_t
|
|||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif // HAVE_AVX512
|
#endif // HAVE_AVX512
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user