From d2416736cb586d380ffb9b1ff3b63194247d7e81 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Tue, 7 Mar 2017 09:58:24 +1100 Subject: [PATCH] Use intrinsic to get correct movq everywhere The real trick here is that _mm_set_epi64x() (note the 'x') takes a 64-bit value - not a ptr to a 128-bit value like the non-x - so compilers don't twist themselves in knots with alignment or whatever confuses them. --- src/util/simd_utils.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/util/simd_utils.h b/src/util/simd_utils.h index c6d43f57..484b47c0 100644 --- a/src/util/simd_utils.h +++ b/src/util/simd_utils.h @@ -180,9 +180,7 @@ static really_inline u64a movq(const m128 in) { /* another form of movq */ static really_inline m128 load_m128_from_u64a(const u64a *p) { - m128 out; - __asm__ ("vmovq\t%1,%0" : "=x"(out) :"m"(*p)); - return out; + return _mm_set_epi64x(0LL, *p); } #define rshiftbyte_m128(a, count_immed) _mm_srli_si128(a, count_immed)