Ensure that m256 is 32-aligned on non-avx2 builds

2026-01-17 16:00:26 +03:00 · 2016-07-07 14:00:11 +10:00
parent d497a1259a
commit 22b451b59b
3 changed files with 9 additions and 12 deletions
--- a/src/ue2common.h
+++ b/src/ue2common.h
@@ -52,6 +52,9 @@
 #define ALIGN_ATTR(x) __attribute__((aligned((x))))
 #endif

+#define ALIGN_DIRECTIVE ALIGN_ATTR(16)
+#define ALIGN_AVX_DIRECTIVE ALIGN_ATTR(32)
+#define ALIGN_CL_DIRECTIVE ALIGN_ATTR(64)

 typedef signed char s8;
 typedef unsigned char u8;
@@ -82,10 +85,6 @@ typedef u32 ReportID;
 #define HS_PUBLIC_API
 #endif

-#define ALIGN_DIRECTIVE ALIGN_ATTR(16)
-#define ALIGN_AVX_DIRECTIVE ALIGN_ATTR(32)
-#define ALIGN_CL_DIRECTIVE ALIGN_ATTR(64)
-
 #define ARRAY_LENGTH(a) (sizeof(a)/sizeof((a)[0]))

 /** \brief Shorthand for the attribute to shut gcc about unused parameters */
--- a/src/util/simd_types.h
+++ b/src/util/simd_types.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
@@ -65,7 +65,7 @@ typedef __m128i m128;
 #if defined(__AVX2__)
 typedef __m256i m256;
 #else
-typedef struct ALIGN_AVX_DIRECTIVE {m128 lo; m128 hi;} m256;
+typedef ALIGN_AVX_DIRECTIVE struct {m128 lo; m128 hi;} m256;
 #endif

 // these should align to 16 and 32 respectively
--- a/src/util/simd_utils.h
+++ b/src/util/simd_utils.h
@@ -493,11 +493,10 @@ static really_inline u32 diffrich64_256(m256 a, m256 b) {

 // aligned load
 static really_inline m256 load256(const void *ptr) {
-#if defined(__AVX2__)
    assert(ISALIGNED_N(ptr, alignof(m256)));
+#if defined(__AVX2__)
    return _mm256_load_si256((const m256 *)ptr);
 #else
-    assert(ISALIGNED_N(ptr, alignof(m128)));
    m256 rv = { load128(ptr), load128((const char *)ptr + 16) };
    return rv;
 #endif
@@ -517,11 +516,10 @@ static really_inline m256 load2x128(const void *ptr) {

 // aligned store
 static really_inline void store256(void *ptr, m256 a) {
-#if defined(__AVX2__)
    assert(ISALIGNED_N(ptr, alignof(m256)));
+#if defined(__AVX2__)
    _mm256_store_si256((m256 *)ptr, a);
 #else
-    assert(ISALIGNED_16(ptr));
    ptr = assume_aligned(ptr, 16);
    *(m256 *)ptr = a;
 #endif
@@ -943,19 +941,19 @@ static really_inline u32 diffrich64_512(m512 a, m512 b) {

 // aligned load
 static really_inline m512 load512(const void *ptr) {
-    assert(ISALIGNED_16(ptr));
+    assert(ISALIGNED_N(ptr, alignof(m256)));
    m512 rv = { load256(ptr), load256((const char *)ptr + 32) };
    return rv;
 }

 // aligned store
 static really_inline void store512(void *ptr, m512 a) {
+    assert(ISALIGNED_N(ptr, alignof(m256)));
 #if defined(__AVX2__)
    m512 *x = (m512 *)ptr;
    store256(&x->lo, a.lo);
    store256(&x->hi, a.hi);
 #else
-    assert(ISALIGNED_16(ptr));
    ptr = assume_aligned(ptr, 16);
    *(m512 *)ptr = a;
 #endif