diff --git a/src/util/arch.h b/src/util/arch.h index 985fec6a..57e39c07 100644 --- a/src/util/arch.h +++ b/src/util/arch.h @@ -33,58 +33,9 @@ #ifndef UTIL_ARCH_H_ #define UTIL_ARCH_H_ -#if defined(__SSE2__) || defined(_M_X64) || (_M_IX86_FP >= 2) -#define HAVE_SSE2 +#if defined(__i386__) || defined(__x86_64__) +#include "util/arch/x86/x86.h" #endif -#if defined(__SSE4_1__) || (defined(_WIN32) && defined(__AVX__)) -#define HAVE_SSE41 -#endif +#endif // UTIL_ARCH_X86_H_ -#if defined(__SSE4_2__) || (defined(_WIN32) && defined(__AVX__)) -#define HAVE_SSE42 -#endif - -#if defined(__AVX__) -#define HAVE_AVX -#endif - -#if defined(__AVX2__) -#define HAVE_AVX2 -#endif - -#if defined(__AVX512BW__) -#define HAVE_AVX512 -#endif - -#if defined(__AVX512VBMI__) -#define HAVE_AVX512VBMI -#endif - -/* - * ICC and MSVC don't break out POPCNT or BMI/2 as separate pre-def macros - */ -#if defined(__POPCNT__) || \ - (defined(__INTEL_COMPILER) && defined(__SSE4_2__)) || \ - (defined(_WIN32) && defined(__AVX__)) -#define HAVE_POPCOUNT_INSTR -#endif - -#if defined(__BMI__) || (defined(_WIN32) && defined(__AVX2__)) || \ - (defined(__INTEL_COMPILER) && defined(__AVX2__)) -#define HAVE_BMI -#endif - -#if defined(__BMI2__) || (defined(_WIN32) && defined(__AVX2__)) || \ - (defined(__INTEL_COMPILER) && defined(__AVX2__)) -#define HAVE_BMI2 -#endif - -/* - * MSVC uses a different form of inline asm - */ -#if defined(_WIN32) && defined(_MSC_VER) -#define NO_ASM -#endif - -#endif // UTIL_ARCH_H_ diff --git a/src/util/arch/x86/simd_types.h b/src/util/arch/x86/simd_types.h new file mode 100644 index 00000000..a582abd5 --- /dev/null +++ b/src/util/arch/x86/simd_types.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2015-2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SIMD_TYPES_X86_H +#define SIMD_TYPES_X86_H + +#if !defined(m128) && defined(HAVE_SSE2) +typedef __m128i m128; +#endif + +#if !defined(m128) && defined(HAVE_AVX2) +typedef __m256i m256; +#endif + +#if !defined(m512) && defined(HAVE_AVX512) +typedef __m512i m512; +#endif + +#endif /* SIMD_TYPES_H */ + diff --git a/src/util/arch/x86/x86.h b/src/util/arch/x86/x86.h new file mode 100644 index 00000000..8126f14a --- /dev/null +++ b/src/util/arch/x86/x86.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2017-2020, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Per-platform architecture definitions + */ + +#ifndef UTIL_ARCH_X86_H_ +#define UTIL_ARCH_X86_H_ + +#if defined(__SSE2__) || defined(_M_X64) || (_M_IX86_FP >= 2) +#define HAVE_SSE2 +#define HAVE_SIMD_128_BITS +#endif + +#if defined(__SSE4_1__) || (defined(_WIN32) && defined(__AVX__)) +#define HAVE_SSE41 +#define HAVE_SIMD_128_BITS +#endif + +#if defined(__SSE4_2__) || (defined(_WIN32) && defined(__AVX__)) +#define HAVE_SSE42 +#define HAVE_SIMD_128_BITS +#endif + +#if defined(__AVX__) +#define HAVE_AVX +#define HAVE_SIMD_256_BITS +#endif + +#if defined(__AVX2__) +#define HAVE_AVX2 +#define HAVE_SIMD_256_BITS +#endif + +#if defined(__AVX512BW__) +#define HAVE_AVX512 +#define HAVE_SIMD_512_BITS +#endif + +#if defined(__AVX512VBMI__) +#define HAVE_AVX512VBMI +#endif + +/* + * ICC and MSVC don't break out POPCNT or BMI/2 as separate pre-def macros + */ +#if defined(__POPCNT__) || \ + (defined(__INTEL_COMPILER) && defined(__SSE4_2__)) || \ + (defined(_WIN32) && defined(__AVX__)) +#define HAVE_POPCOUNT_INSTR +#endif + +#if defined(__BMI__) || (defined(_WIN32) && defined(__AVX2__)) || \ + (defined(__INTEL_COMPILER) && defined(__AVX2__)) +#define HAVE_BMI +#endif + +#if defined(__BMI2__) || (defined(_WIN32) && defined(__AVX2__)) || \ + (defined(__INTEL_COMPILER) && defined(__AVX2__)) +#define HAVE_BMI2 +#endif + +/* + * MSVC uses a different form of inline asm + */ +#if defined(_WIN32) && defined(_MSC_VER) +#define NO_ASM +#endif + +#endif // UTIL_ARCH_X86_H_ diff --git a/src/util/simd_types.h b/src/util/simd_types.h index 962cad6c..a58ede4d 100644 --- a/src/util/simd_types.h +++ b/src/util/simd_types.h @@ -34,22 +34,20 @@ #include "util/intrinsics.h" #include "ue2common.h" -#if defined(HAVE_SSE2) -typedef __m128i m128; -#else +#if defined(__i386__) || defined(__x86_64__) +#include "util/arch/x86/simd_types.h" +#endif + +#if !defined(m128) && !defined(HAVE_SIMD_128_BITS) typedef struct ALIGN_DIRECTIVE {u64a hi; u64a lo;} m128; #endif -#if defined(HAVE_AVX2) -typedef __m256i m256; -#else +#if !defined(m256) && !defined(HAVE_SIMD_256_BITS) typedef struct ALIGN_AVX_DIRECTIVE {m128 lo; m128 hi;} m256; #endif typedef struct {m128 lo; m128 mid; m128 hi;} m384; -#if defined(HAVE_AVX512) -typedef __m512i m512; -#else +#if !defined(m512) && !defined(HAVE_SIMD_512_BITS) typedef struct ALIGN_ATTR(64) {m256 lo; m256 hi;} m512; #endif diff --git a/src/util/simd_utils.h b/src/util/simd_utils.h index 42223133..671a5bab 100644 --- a/src/util/simd_utils.h +++ b/src/util/simd_utils.h @@ -38,10 +38,10 @@ #endif #include "config.h" +#include "util/arch.h" #include "ue2common.h" #include "simd_types.h" #include "unaligned.h" -#include "util/arch.h" #include "util/intrinsics.h" #include // for memcpy