mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
simd_utils: fix undefined instruction issue for 32-bit system
fixes github issue #292
This commit is contained in:
parent
62e35c910b
commit
1ecb3aef8b
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2020, Intel Corporation
|
* Copyright (c) 2015-2021, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -156,6 +156,16 @@ static really_inline u32 movd(const m128 in) {
|
|||||||
return _mm_cvtsi128_si32(in);
|
return _mm_cvtsi128_si32(in);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static really_inline u64a movq(const m128 in) {
|
||||||
|
#if defined(ARCH_X86_64)
|
||||||
|
return _mm_cvtsi128_si64(in);
|
||||||
|
#else // 32-bit - this is horrific
|
||||||
|
u32 lo = movd(in);
|
||||||
|
u32 hi = movd(_mm_srli_epi64(in, 32));
|
||||||
|
return (u64a)hi << 32 | lo;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
#if defined(HAVE_AVX512)
|
#if defined(HAVE_AVX512)
|
||||||
static really_inline u32 movd512(const m512 in) {
|
static really_inline u32 movd512(const m512 in) {
|
||||||
// NOTE: seems gcc doesn't support _mm512_cvtsi512_si32(in),
|
// NOTE: seems gcc doesn't support _mm512_cvtsi512_si32(in),
|
||||||
@ -166,20 +176,10 @@ static really_inline u32 movd512(const m512 in) {
|
|||||||
static really_inline u64a movq512(const m512 in) {
|
static really_inline u64a movq512(const m512 in) {
|
||||||
// NOTE: seems AVX512 doesn't support _mm512_cvtsi512_si64(in),
|
// NOTE: seems AVX512 doesn't support _mm512_cvtsi512_si64(in),
|
||||||
// so we use 2-step convertions to work around.
|
// so we use 2-step convertions to work around.
|
||||||
return _mm_cvtsi128_si64(_mm512_castsi512_si128(in));
|
return movq(_mm512_castsi512_si128(in));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static really_inline u64a movq(const m128 in) {
|
|
||||||
#if defined(ARCH_X86_64)
|
|
||||||
return _mm_cvtsi128_si64(in);
|
|
||||||
#else // 32-bit - this is horrific
|
|
||||||
u32 lo = movd(in);
|
|
||||||
u32 hi = movd(_mm_srli_epi64(in, 32));
|
|
||||||
return (u64a)hi << 32 | lo;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
/* another form of movq */
|
/* another form of movq */
|
||||||
static really_inline
|
static really_inline
|
||||||
m128 load_m128_from_u64a(const u64a *p) {
|
m128 load_m128_from_u64a(const u64a *p) {
|
||||||
@ -791,7 +791,7 @@ m128 movdq_lo(m256 x) {
|
|||||||
#define lshift128_m256(a, count_immed) _mm256_slli_si256(a, count_immed)
|
#define lshift128_m256(a, count_immed) _mm256_slli_si256(a, count_immed)
|
||||||
#define extract64from256(a, imm) _mm_extract_epi64(_mm256_extracti128_si256(a, imm >> 1), imm % 2)
|
#define extract64from256(a, imm) _mm_extract_epi64(_mm256_extracti128_si256(a, imm >> 1), imm % 2)
|
||||||
#define extract32from256(a, imm) _mm_extract_epi32(_mm256_extracti128_si256(a, imm >> 2), imm % 4)
|
#define extract32from256(a, imm) _mm_extract_epi32(_mm256_extracti128_si256(a, imm >> 2), imm % 4)
|
||||||
#define extractlow64from256(a) _mm_cvtsi128_si64(cast256to128(a))
|
#define extractlow64from256(a) movq(cast256to128(a))
|
||||||
#define extractlow32from256(a) movd(cast256to128(a))
|
#define extractlow32from256(a) movd(cast256to128(a))
|
||||||
#define interleave256hi(a, b) _mm256_unpackhi_epi8(a, b)
|
#define interleave256hi(a, b) _mm256_unpackhi_epi8(a, b)
|
||||||
#define interleave256lo(a, b) _mm256_unpacklo_epi8(a, b)
|
#define interleave256lo(a, b) _mm256_unpacklo_epi8(a, b)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user