diff --git a/src/ue2common.h b/src/ue2common.h index 2de60753..e1f03f72 100644 --- a/src/ue2common.h +++ b/src/ue2common.h @@ -52,6 +52,9 @@ #define ALIGN_ATTR(x) __attribute__((aligned((x)))) #endif +#define ALIGN_DIRECTIVE ALIGN_ATTR(16) +#define ALIGN_AVX_DIRECTIVE ALIGN_ATTR(32) +#define ALIGN_CL_DIRECTIVE ALIGN_ATTR(64) typedef signed char s8; typedef unsigned char u8; @@ -82,10 +85,6 @@ typedef u32 ReportID; #define HS_PUBLIC_API #endif -#define ALIGN_DIRECTIVE ALIGN_ATTR(16) -#define ALIGN_AVX_DIRECTIVE ALIGN_ATTR(32) -#define ALIGN_CL_DIRECTIVE ALIGN_ATTR(64) - #define ARRAY_LENGTH(a) (sizeof(a)/sizeof((a)[0])) /** \brief Shorthand for the attribute to shut gcc about unused parameters */ diff --git a/src/util/simd_types.h b/src/util/simd_types.h index 63311b10..e4541411 100644 --- a/src/util/simd_types.h +++ b/src/util/simd_types.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -65,7 +65,7 @@ typedef __m128i m128; #if defined(__AVX2__) typedef __m256i m256; #else -typedef struct ALIGN_AVX_DIRECTIVE {m128 lo; m128 hi;} m256; +typedef ALIGN_AVX_DIRECTIVE struct {m128 lo; m128 hi;} m256; #endif // these should align to 16 and 32 respectively diff --git a/src/util/simd_utils.h b/src/util/simd_utils.h index 5f557ba5..8cea458e 100644 --- a/src/util/simd_utils.h +++ b/src/util/simd_utils.h @@ -493,11 +493,10 @@ static really_inline u32 diffrich64_256(m256 a, m256 b) { // aligned load static really_inline m256 load256(const void *ptr) { -#if defined(__AVX2__) assert(ISALIGNED_N(ptr, alignof(m256))); +#if defined(__AVX2__) return _mm256_load_si256((const m256 *)ptr); #else - assert(ISALIGNED_N(ptr, alignof(m128))); m256 rv = { load128(ptr), load128((const char *)ptr + 16) }; return rv; #endif @@ -517,11 +516,10 @@ static really_inline m256 load2x128(const void *ptr) { // aligned store static really_inline void store256(void *ptr, m256 a) { -#if defined(__AVX2__) assert(ISALIGNED_N(ptr, alignof(m256))); +#if defined(__AVX2__) _mm256_store_si256((m256 *)ptr, a); #else - assert(ISALIGNED_16(ptr)); ptr = assume_aligned(ptr, 16); *(m256 *)ptr = a; #endif @@ -943,19 +941,19 @@ static really_inline u32 diffrich64_512(m512 a, m512 b) { // aligned load static really_inline m512 load512(const void *ptr) { - assert(ISALIGNED_16(ptr)); + assert(ISALIGNED_N(ptr, alignof(m256))); m512 rv = { load256(ptr), load256((const char *)ptr + 32) }; return rv; } // aligned store static really_inline void store512(void *ptr, m512 a) { + assert(ISALIGNED_N(ptr, alignof(m256))); #if defined(__AVX2__) m512 *x = (m512 *)ptr; store256(&x->lo, a.lo); store256(&x->hi, a.hi); #else - assert(ISALIGNED_16(ptr)); ptr = assume_aligned(ptr, 16); *(m512 *)ptr = a; #endif