mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Ensure that m256 is 32-aligned on non-avx2 builds
This commit is contained in:
parent
d497a1259a
commit
22b451b59b
@ -52,6 +52,9 @@
|
||||
#define ALIGN_ATTR(x) __attribute__((aligned((x))))
|
||||
#endif
|
||||
|
||||
#define ALIGN_DIRECTIVE ALIGN_ATTR(16)
|
||||
#define ALIGN_AVX_DIRECTIVE ALIGN_ATTR(32)
|
||||
#define ALIGN_CL_DIRECTIVE ALIGN_ATTR(64)
|
||||
|
||||
typedef signed char s8;
|
||||
typedef unsigned char u8;
|
||||
@ -82,10 +85,6 @@ typedef u32 ReportID;
|
||||
#define HS_PUBLIC_API
|
||||
#endif
|
||||
|
||||
#define ALIGN_DIRECTIVE ALIGN_ATTR(16)
|
||||
#define ALIGN_AVX_DIRECTIVE ALIGN_ATTR(32)
|
||||
#define ALIGN_CL_DIRECTIVE ALIGN_ATTR(64)
|
||||
|
||||
#define ARRAY_LENGTH(a) (sizeof(a)/sizeof((a)[0]))
|
||||
|
||||
/** \brief Shorthand for the attribute to shut gcc about unused parameters */
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -65,7 +65,7 @@ typedef __m128i m128;
|
||||
#if defined(__AVX2__)
|
||||
typedef __m256i m256;
|
||||
#else
|
||||
typedef struct ALIGN_AVX_DIRECTIVE {m128 lo; m128 hi;} m256;
|
||||
typedef ALIGN_AVX_DIRECTIVE struct {m128 lo; m128 hi;} m256;
|
||||
#endif
|
||||
|
||||
// these should align to 16 and 32 respectively
|
||||
|
@ -493,11 +493,10 @@ static really_inline u32 diffrich64_256(m256 a, m256 b) {
|
||||
|
||||
// aligned load
|
||||
static really_inline m256 load256(const void *ptr) {
|
||||
#if defined(__AVX2__)
|
||||
assert(ISALIGNED_N(ptr, alignof(m256)));
|
||||
#if defined(__AVX2__)
|
||||
return _mm256_load_si256((const m256 *)ptr);
|
||||
#else
|
||||
assert(ISALIGNED_N(ptr, alignof(m128)));
|
||||
m256 rv = { load128(ptr), load128((const char *)ptr + 16) };
|
||||
return rv;
|
||||
#endif
|
||||
@ -517,11 +516,10 @@ static really_inline m256 load2x128(const void *ptr) {
|
||||
|
||||
// aligned store
|
||||
static really_inline void store256(void *ptr, m256 a) {
|
||||
#if defined(__AVX2__)
|
||||
assert(ISALIGNED_N(ptr, alignof(m256)));
|
||||
#if defined(__AVX2__)
|
||||
_mm256_store_si256((m256 *)ptr, a);
|
||||
#else
|
||||
assert(ISALIGNED_16(ptr));
|
||||
ptr = assume_aligned(ptr, 16);
|
||||
*(m256 *)ptr = a;
|
||||
#endif
|
||||
@ -943,19 +941,19 @@ static really_inline u32 diffrich64_512(m512 a, m512 b) {
|
||||
|
||||
// aligned load
|
||||
static really_inline m512 load512(const void *ptr) {
|
||||
assert(ISALIGNED_16(ptr));
|
||||
assert(ISALIGNED_N(ptr, alignof(m256)));
|
||||
m512 rv = { load256(ptr), load256((const char *)ptr + 32) };
|
||||
return rv;
|
||||
}
|
||||
|
||||
// aligned store
|
||||
static really_inline void store512(void *ptr, m512 a) {
|
||||
assert(ISALIGNED_N(ptr, alignof(m256)));
|
||||
#if defined(__AVX2__)
|
||||
m512 *x = (m512 *)ptr;
|
||||
store256(&x->lo, a.lo);
|
||||
store256(&x->hi, a.hi);
|
||||
#else
|
||||
assert(ISALIGNED_16(ptr));
|
||||
ptr = assume_aligned(ptr, 16);
|
||||
*(m512 *)ptr = a;
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user