mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
If we can shift by an immediate, do it. Otherwise, don't.
This commit is contained in:
parent
0275869b3e
commit
3e345c2567
@ -313,6 +313,7 @@ endif ()
|
||||
# testing a builtin takes a little more work
|
||||
CHECK_C_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CC_BUILTIN_ASSUME_ALIGNED)
|
||||
CHECK_CXX_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CXX_BUILTIN_ASSUME_ALIGNED)
|
||||
CHECK_C_SOURCE_COMPILES("int main(void) { __builtin_constant_p(0); }" HAVE__BUILTIN_CONSTANT_P)
|
||||
|
||||
if (NOT WIN32)
|
||||
set(C_FLAGS_TO_CHECK
|
||||
|
@ -81,6 +81,9 @@
|
||||
/* Define to 1 if you have the `_aligned_malloc' function. */
|
||||
#cmakedefine HAVE__ALIGNED_MALLOC
|
||||
|
||||
/* Define if compiler has __builtin_constant_p */
|
||||
#cmakedefine HAVE__BUILTIN_CONSTANT_P
|
||||
|
||||
/* Optimize, inline critical functions */
|
||||
#cmakedefine HS_OPTIMIZE
|
||||
|
||||
|
@ -123,7 +123,17 @@ static really_inline u32 diffrich64_128(m128 a, m128 b) {
|
||||
#endif
|
||||
}
|
||||
|
||||
#define lshift64_m128(a, b) _mm_slli_epi64((a), (b))
|
||||
static really_really_inline
|
||||
m128 lshift64_m128(m128 a, unsigned b) {
|
||||
#if defined(HAVE__BUILTIN_CONSTANT_P)
|
||||
if (__builtin_constant_p(b)) {
|
||||
return _mm_slli_epi64(a, b);
|
||||
}
|
||||
#endif
|
||||
m128 x = _mm_cvtsi32_si128(b);
|
||||
return _mm_sll_epi64(a, x);
|
||||
}
|
||||
|
||||
#define rshift64_m128(a, b) _mm_srli_epi64((a), (b))
|
||||
#define eq128(a, b) _mm_cmpeq_epi8((a), (b))
|
||||
#define movemask128(a) ((u32)_mm_movemask_epi8((a)))
|
||||
@ -339,7 +349,18 @@ m128 set64x2(u64a hi, u64a lo) {
|
||||
****/
|
||||
|
||||
#if defined(HAVE_AVX2)
|
||||
#define lshift64_m256(a, b) _mm256_slli_epi64((a), (b))
|
||||
|
||||
static really_really_inline
|
||||
m256 lshift64_m256(m256 a, unsigned b) {
|
||||
#if defined(HAVE__BUILTIN_CONSTANT_P)
|
||||
if (__builtin_constant_p(b)) {
|
||||
return _mm256_slli_epi64(a, b);
|
||||
}
|
||||
#endif
|
||||
m128 x = _mm_cvtsi32_si128(b);
|
||||
return _mm256_sll_epi64(a, x);
|
||||
}
|
||||
|
||||
#define rshift64_m256(a, b) _mm256_srli_epi64((a), (b))
|
||||
|
||||
static really_inline
|
||||
@ -357,7 +378,7 @@ m256 set2x128(m128 a) {
|
||||
|
||||
#else
|
||||
|
||||
static really_inline
|
||||
static really_really_inline
|
||||
m256 lshift64_m256(m256 a, int b) {
|
||||
m256 rv = a;
|
||||
rv.lo = lshift64_m128(rv.lo, b);
|
||||
@ -776,7 +797,6 @@ static really_inline m384 andnot384(m384 a, m384 b) {
|
||||
return rv;
|
||||
}
|
||||
|
||||
// The shift amount is an immediate
|
||||
static really_really_inline
|
||||
m384 lshift64_m384(m384 a, unsigned b) {
|
||||
m384 rv;
|
||||
@ -1016,9 +1036,17 @@ m512 andnot512(m512 a, m512 b) {
|
||||
}
|
||||
|
||||
#if defined(HAVE_AVX512)
|
||||
#define lshift64_m512(a, b) _mm512_slli_epi64((a), b)
|
||||
static really_really_inline
|
||||
m512 lshift64_m512(m512 a, unsigned b) {
|
||||
#if defined(HAVE__BUILTIN_CONSTANT_P)
|
||||
if (__builtin_constant_p(b)) {
|
||||
return _mm512_slli_epi64(a, b);
|
||||
}
|
||||
#endif
|
||||
m128 x = _mm_cvtsi32_si128(b);
|
||||
return _mm512_sll_epi64(a, x);
|
||||
}
|
||||
#else
|
||||
// The shift amount is an immediate
|
||||
static really_really_inline
|
||||
m512 lshift64_m512(m512 a, unsigned b) {
|
||||
m512 rv;
|
||||
|
@ -143,6 +143,10 @@ void simd_loadbytes(m128 *a, const void *ptr, unsigned i) { *a = loadbytes128(pt
|
||||
void simd_loadbytes(m256 *a, const void *ptr, unsigned i) { *a = loadbytes256(ptr, i); }
|
||||
void simd_loadbytes(m384 *a, const void *ptr, unsigned i) { *a = loadbytes384(ptr, i); }
|
||||
void simd_loadbytes(m512 *a, const void *ptr, unsigned i) { *a = loadbytes512(ptr, i); }
|
||||
m128 simd_lshift64(const m128 &a, unsigned i) { return lshift64_m128(a, i); }
|
||||
m256 simd_lshift64(const m256 &a, unsigned i) { return lshift64_m256(a, i); }
|
||||
m384 simd_lshift64(const m384 &a, unsigned i) { return lshift64_m384(a, i); }
|
||||
m512 simd_lshift64(const m512 &a, unsigned i) { return lshift64_m512(a, i); }
|
||||
|
||||
template<typename T>
|
||||
class SimdUtilsTest : public testing::Test {
|
||||
@ -586,6 +590,56 @@ TYPED_TEST(SimdUtilsTest, loadbytes_storebytes) {
|
||||
}
|
||||
}
|
||||
|
||||
TYPED_TEST(SimdUtilsTest, lshift64) {
|
||||
TypeParam a;
|
||||
memset(&a, 0x5a, sizeof(a));
|
||||
|
||||
static constexpr u64a exp_val = 0x5a5a5a5a5a5a5a5aULL;
|
||||
|
||||
union {
|
||||
TypeParam simd;
|
||||
u64a qword[sizeof(TypeParam) / 8];
|
||||
} c;
|
||||
cout << "non-const for size " << sizeof(a) << '\n';
|
||||
for (unsigned s = 0; s < 64; s++) {
|
||||
c.simd = simd_lshift64(a, s);
|
||||
|
||||
const u64a expected = exp_val << s;
|
||||
for (size_t i = 0; i < sizeof(c) / 8; i++) {
|
||||
EXPECT_EQ(expected, c.qword[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// test immediates
|
||||
u64a expected;
|
||||
|
||||
cout << "imm for size " << sizeof(a) << '\n';
|
||||
c.simd = simd_lshift64(a, 1);
|
||||
expected = exp_val << 1;
|
||||
for (size_t i = 0; i < sizeof(c) / 8; i++) {
|
||||
EXPECT_EQ(expected, c.qword[i]);
|
||||
}
|
||||
|
||||
c.simd = simd_lshift64(a, 2);
|
||||
expected = exp_val << 2;
|
||||
for (size_t i = 0; i < sizeof(c) / 8; i++) {
|
||||
EXPECT_EQ(expected, c.qword[i]);
|
||||
}
|
||||
|
||||
c.simd = simd_lshift64(a, 7);
|
||||
expected = exp_val << 7;
|
||||
for (size_t i = 0; i < sizeof(c) / 8; i++) {
|
||||
EXPECT_EQ(expected, c.qword[i]);
|
||||
}
|
||||
|
||||
c.simd = simd_lshift64(a, 31);
|
||||
expected = exp_val << 31;
|
||||
for (size_t i = 0; i < sizeof(c) / 8; i++) {
|
||||
EXPECT_EQ(expected, c.qword[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
TEST(SimdUtilsTest, alignment) {
|
||||
ASSERT_EQ(16, alignof(m128));
|
||||
ASSERT_EQ(32, alignof(m256));
|
||||
|
Loading…
x
Reference in New Issue
Block a user