Ensure that m256 is 32-aligned on non-avx2 builds

This commit is contained in:
Matthew Barr 2016-07-07 14:00:11 +10:00
parent d497a1259a
commit 22b451b59b
3 changed files with 9 additions and 12 deletions

View File

@ -52,6 +52,9 @@
#define ALIGN_ATTR(x) __attribute__((aligned((x))))
#endif
#define ALIGN_DIRECTIVE ALIGN_ATTR(16)
#define ALIGN_AVX_DIRECTIVE ALIGN_ATTR(32)
#define ALIGN_CL_DIRECTIVE ALIGN_ATTR(64)
typedef signed char s8;
typedef unsigned char u8;
@ -82,10 +85,6 @@ typedef u32 ReportID;
#define HS_PUBLIC_API
#endif
#define ALIGN_DIRECTIVE ALIGN_ATTR(16)
#define ALIGN_AVX_DIRECTIVE ALIGN_ATTR(32)
#define ALIGN_CL_DIRECTIVE ALIGN_ATTR(64)
#define ARRAY_LENGTH(a) (sizeof(a)/sizeof((a)[0]))
/** \brief Shorthand for the attribute to shut gcc about unused parameters */

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -65,7 +65,7 @@ typedef __m128i m128;
#if defined(__AVX2__)
typedef __m256i m256;
#else
typedef struct ALIGN_AVX_DIRECTIVE {m128 lo; m128 hi;} m256;
typedef ALIGN_AVX_DIRECTIVE struct {m128 lo; m128 hi;} m256;
#endif
// these should align to 16 and 32 respectively

View File

@ -493,11 +493,10 @@ static really_inline u32 diffrich64_256(m256 a, m256 b) {
// aligned load
static really_inline m256 load256(const void *ptr) {
#if defined(__AVX2__)
assert(ISALIGNED_N(ptr, alignof(m256)));
#if defined(__AVX2__)
return _mm256_load_si256((const m256 *)ptr);
#else
assert(ISALIGNED_N(ptr, alignof(m128)));
m256 rv = { load128(ptr), load128((const char *)ptr + 16) };
return rv;
#endif
@ -517,11 +516,10 @@ static really_inline m256 load2x128(const void *ptr) {
// aligned store
static really_inline void store256(void *ptr, m256 a) {
#if defined(__AVX2__)
assert(ISALIGNED_N(ptr, alignof(m256)));
#if defined(__AVX2__)
_mm256_store_si256((m256 *)ptr, a);
#else
assert(ISALIGNED_16(ptr));
ptr = assume_aligned(ptr, 16);
*(m256 *)ptr = a;
#endif
@ -943,19 +941,19 @@ static really_inline u32 diffrich64_512(m512 a, m512 b) {
// aligned load
static really_inline m512 load512(const void *ptr) {
assert(ISALIGNED_16(ptr));
assert(ISALIGNED_N(ptr, alignof(m256)));
m512 rv = { load256(ptr), load256((const char *)ptr + 32) };
return rv;
}
// aligned store
static really_inline void store512(void *ptr, m512 a) {
assert(ISALIGNED_N(ptr, alignof(m256)));
#if defined(__AVX2__)
m512 *x = (m512 *)ptr;
store256(&x->lo, a.lo);
store256(&x->hi, a.hi);
#else
assert(ISALIGNED_16(ptr));
ptr = assume_aligned(ptr, 16);
*(m512 *)ptr = a;
#endif