mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Ensure that m256 is 32-aligned on non-avx2 builds
This commit is contained in:
parent
d497a1259a
commit
22b451b59b
@ -52,6 +52,9 @@
|
|||||||
#define ALIGN_ATTR(x) __attribute__((aligned((x))))
|
#define ALIGN_ATTR(x) __attribute__((aligned((x))))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#define ALIGN_DIRECTIVE ALIGN_ATTR(16)
|
||||||
|
#define ALIGN_AVX_DIRECTIVE ALIGN_ATTR(32)
|
||||||
|
#define ALIGN_CL_DIRECTIVE ALIGN_ATTR(64)
|
||||||
|
|
||||||
typedef signed char s8;
|
typedef signed char s8;
|
||||||
typedef unsigned char u8;
|
typedef unsigned char u8;
|
||||||
@ -82,10 +85,6 @@ typedef u32 ReportID;
|
|||||||
#define HS_PUBLIC_API
|
#define HS_PUBLIC_API
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define ALIGN_DIRECTIVE ALIGN_ATTR(16)
|
|
||||||
#define ALIGN_AVX_DIRECTIVE ALIGN_ATTR(32)
|
|
||||||
#define ALIGN_CL_DIRECTIVE ALIGN_ATTR(64)
|
|
||||||
|
|
||||||
#define ARRAY_LENGTH(a) (sizeof(a)/sizeof((a)[0]))
|
#define ARRAY_LENGTH(a) (sizeof(a)/sizeof((a)[0]))
|
||||||
|
|
||||||
/** \brief Shorthand for the attribute to shut gcc about unused parameters */
|
/** \brief Shorthand for the attribute to shut gcc about unused parameters */
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -65,7 +65,7 @@ typedef __m128i m128;
|
|||||||
#if defined(__AVX2__)
|
#if defined(__AVX2__)
|
||||||
typedef __m256i m256;
|
typedef __m256i m256;
|
||||||
#else
|
#else
|
||||||
typedef struct ALIGN_AVX_DIRECTIVE {m128 lo; m128 hi;} m256;
|
typedef ALIGN_AVX_DIRECTIVE struct {m128 lo; m128 hi;} m256;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// these should align to 16 and 32 respectively
|
// these should align to 16 and 32 respectively
|
||||||
|
@ -493,11 +493,10 @@ static really_inline u32 diffrich64_256(m256 a, m256 b) {
|
|||||||
|
|
||||||
// aligned load
|
// aligned load
|
||||||
static really_inline m256 load256(const void *ptr) {
|
static really_inline m256 load256(const void *ptr) {
|
||||||
#if defined(__AVX2__)
|
|
||||||
assert(ISALIGNED_N(ptr, alignof(m256)));
|
assert(ISALIGNED_N(ptr, alignof(m256)));
|
||||||
|
#if defined(__AVX2__)
|
||||||
return _mm256_load_si256((const m256 *)ptr);
|
return _mm256_load_si256((const m256 *)ptr);
|
||||||
#else
|
#else
|
||||||
assert(ISALIGNED_N(ptr, alignof(m128)));
|
|
||||||
m256 rv = { load128(ptr), load128((const char *)ptr + 16) };
|
m256 rv = { load128(ptr), load128((const char *)ptr + 16) };
|
||||||
return rv;
|
return rv;
|
||||||
#endif
|
#endif
|
||||||
@ -517,11 +516,10 @@ static really_inline m256 load2x128(const void *ptr) {
|
|||||||
|
|
||||||
// aligned store
|
// aligned store
|
||||||
static really_inline void store256(void *ptr, m256 a) {
|
static really_inline void store256(void *ptr, m256 a) {
|
||||||
#if defined(__AVX2__)
|
|
||||||
assert(ISALIGNED_N(ptr, alignof(m256)));
|
assert(ISALIGNED_N(ptr, alignof(m256)));
|
||||||
|
#if defined(__AVX2__)
|
||||||
_mm256_store_si256((m256 *)ptr, a);
|
_mm256_store_si256((m256 *)ptr, a);
|
||||||
#else
|
#else
|
||||||
assert(ISALIGNED_16(ptr));
|
|
||||||
ptr = assume_aligned(ptr, 16);
|
ptr = assume_aligned(ptr, 16);
|
||||||
*(m256 *)ptr = a;
|
*(m256 *)ptr = a;
|
||||||
#endif
|
#endif
|
||||||
@ -943,19 +941,19 @@ static really_inline u32 diffrich64_512(m512 a, m512 b) {
|
|||||||
|
|
||||||
// aligned load
|
// aligned load
|
||||||
static really_inline m512 load512(const void *ptr) {
|
static really_inline m512 load512(const void *ptr) {
|
||||||
assert(ISALIGNED_16(ptr));
|
assert(ISALIGNED_N(ptr, alignof(m256)));
|
||||||
m512 rv = { load256(ptr), load256((const char *)ptr + 32) };
|
m512 rv = { load256(ptr), load256((const char *)ptr + 32) };
|
||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
|
|
||||||
// aligned store
|
// aligned store
|
||||||
static really_inline void store512(void *ptr, m512 a) {
|
static really_inline void store512(void *ptr, m512 a) {
|
||||||
|
assert(ISALIGNED_N(ptr, alignof(m256)));
|
||||||
#if defined(__AVX2__)
|
#if defined(__AVX2__)
|
||||||
m512 *x = (m512 *)ptr;
|
m512 *x = (m512 *)ptr;
|
||||||
store256(&x->lo, a.lo);
|
store256(&x->lo, a.lo);
|
||||||
store256(&x->hi, a.hi);
|
store256(&x->hi, a.hi);
|
||||||
#else
|
#else
|
||||||
assert(ISALIGNED_16(ptr));
|
|
||||||
ptr = assume_aligned(ptr, 16);
|
ptr = assume_aligned(ptr, 16);
|
||||||
*(m512 *)ptr = a;
|
*(m512 *)ptr = a;
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
x
Reference in New Issue
Block a user