Ensure that m256 is 32-aligned on non-avx2 builds

This commit is contained in:
Matthew Barr 2016-07-07 14:00:11 +10:00
parent d497a1259a
commit 22b451b59b
3 changed files with 9 additions and 12 deletions

View File

@ -52,6 +52,9 @@
#define ALIGN_ATTR(x) __attribute__((aligned((x)))) #define ALIGN_ATTR(x) __attribute__((aligned((x))))
#endif #endif
#define ALIGN_DIRECTIVE ALIGN_ATTR(16)
#define ALIGN_AVX_DIRECTIVE ALIGN_ATTR(32)
#define ALIGN_CL_DIRECTIVE ALIGN_ATTR(64)
typedef signed char s8; typedef signed char s8;
typedef unsigned char u8; typedef unsigned char u8;
@ -82,10 +85,6 @@ typedef u32 ReportID;
#define HS_PUBLIC_API #define HS_PUBLIC_API
#endif #endif
#define ALIGN_DIRECTIVE ALIGN_ATTR(16)
#define ALIGN_AVX_DIRECTIVE ALIGN_ATTR(32)
#define ALIGN_CL_DIRECTIVE ALIGN_ATTR(64)
#define ARRAY_LENGTH(a) (sizeof(a)/sizeof((a)[0])) #define ARRAY_LENGTH(a) (sizeof(a)/sizeof((a)[0]))
/** \brief Shorthand for the attribute to shut gcc about unused parameters */ /** \brief Shorthand for the attribute to shut gcc about unused parameters */

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -65,7 +65,7 @@ typedef __m128i m128;
#if defined(__AVX2__) #if defined(__AVX2__)
typedef __m256i m256; typedef __m256i m256;
#else #else
typedef struct ALIGN_AVX_DIRECTIVE {m128 lo; m128 hi;} m256; typedef ALIGN_AVX_DIRECTIVE struct {m128 lo; m128 hi;} m256;
#endif #endif
// these should align to 16 and 32 respectively // these should align to 16 and 32 respectively

View File

@ -493,11 +493,10 @@ static really_inline u32 diffrich64_256(m256 a, m256 b) {
// aligned load // aligned load
static really_inline m256 load256(const void *ptr) { static really_inline m256 load256(const void *ptr) {
#if defined(__AVX2__)
assert(ISALIGNED_N(ptr, alignof(m256))); assert(ISALIGNED_N(ptr, alignof(m256)));
#if defined(__AVX2__)
return _mm256_load_si256((const m256 *)ptr); return _mm256_load_si256((const m256 *)ptr);
#else #else
assert(ISALIGNED_N(ptr, alignof(m128)));
m256 rv = { load128(ptr), load128((const char *)ptr + 16) }; m256 rv = { load128(ptr), load128((const char *)ptr + 16) };
return rv; return rv;
#endif #endif
@ -517,11 +516,10 @@ static really_inline m256 load2x128(const void *ptr) {
// aligned store // aligned store
static really_inline void store256(void *ptr, m256 a) { static really_inline void store256(void *ptr, m256 a) {
#if defined(__AVX2__)
assert(ISALIGNED_N(ptr, alignof(m256))); assert(ISALIGNED_N(ptr, alignof(m256)));
#if defined(__AVX2__)
_mm256_store_si256((m256 *)ptr, a); _mm256_store_si256((m256 *)ptr, a);
#else #else
assert(ISALIGNED_16(ptr));
ptr = assume_aligned(ptr, 16); ptr = assume_aligned(ptr, 16);
*(m256 *)ptr = a; *(m256 *)ptr = a;
#endif #endif
@ -943,19 +941,19 @@ static really_inline u32 diffrich64_512(m512 a, m512 b) {
// aligned load // aligned load
static really_inline m512 load512(const void *ptr) { static really_inline m512 load512(const void *ptr) {
assert(ISALIGNED_16(ptr)); assert(ISALIGNED_N(ptr, alignof(m256)));
m512 rv = { load256(ptr), load256((const char *)ptr + 32) }; m512 rv = { load256(ptr), load256((const char *)ptr + 32) };
return rv; return rv;
} }
// aligned store // aligned store
static really_inline void store512(void *ptr, m512 a) { static really_inline void store512(void *ptr, m512 a) {
assert(ISALIGNED_N(ptr, alignof(m256)));
#if defined(__AVX2__) #if defined(__AVX2__)
m512 *x = (m512 *)ptr; m512 *x = (m512 *)ptr;
store256(&x->lo, a.lo); store256(&x->lo, a.lo);
store256(&x->hi, a.hi); store256(&x->hi, a.hi);
#else #else
assert(ISALIGNED_16(ptr));
ptr = assume_aligned(ptr, 16); ptr = assume_aligned(ptr, 16);
*(m512 *)ptr = a; *(m512 *)ptr = a;
#endif #endif