diff --git a/CMakeLists.txt b/CMakeLists.txt index e5078848..f4d1cc9f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -175,7 +175,7 @@ else() string(REGEX REPLACE "-O[^ ]*" "" CMAKE_CXX_FLAGS_${CONFIG} "${CMAKE_CXX_FLAGS_${CONFIG}}") endforeach () - if (CMAKE_COMPILER_IS_GNUCC) + if (ARCH_IA32 OR ARCH_X86_64 AND CMAKE_COMPILER_IS_GNUCC) message(STATUS "gcc version ${CMAKE_C_COMPILER_VERSION}") # If gcc doesn't recognise the host cpu, then mtune=native becomes # generic, which isn't very good in some cases. march=native looks at @@ -281,10 +281,14 @@ else() endif() CHECK_INCLUDE_FILES(unistd.h HAVE_UNISTD_H) -CHECK_INCLUDE_FILES(intrin.h HAVE_C_INTRIN_H) -CHECK_INCLUDE_FILE_CXX(intrin.h HAVE_CXX_INTRIN_H) -CHECK_INCLUDE_FILES(x86intrin.h HAVE_C_X86INTRIN_H) -CHECK_INCLUDE_FILE_CXX(x86intrin.h HAVE_CXX_X86INTRIN_H) +if (ARCH_IA32 OR ARCH_X86_64) + CHECK_INCLUDE_FILES(intrin.h HAVE_C_INTRIN_H) + CHECK_INCLUDE_FILE_CXX(intrin.h HAVE_CXX_INTRIN_H) + CHECK_INCLUDE_FILES(x86intrin.h HAVE_C_X86INTRIN_H) + CHECK_INCLUDE_FILE_CXX(x86intrin.h HAVE_CXX_X86INTRIN_H) +elseif (ARCH_ARM32 OR ARCH_AARCH64) + CHECK_INCLUDE_FILE_CXX(arm_neon.h HAVE_C_ARM_NEON_H) +endif() CHECK_FUNCTION_EXISTS(posix_memalign HAVE_POSIX_MEMALIGN) CHECK_FUNCTION_EXISTS(_aligned_malloc HAVE__ALIGNED_MALLOC) diff --git a/cmake/arch.cmake b/cmake/arch.cmake index cced49c6..e3cc9f44 100644 --- a/cmake/arch.cmake +++ b/cmake/arch.cmake @@ -6,7 +6,10 @@ if (HAVE_C_X86INTRIN_H) set (INTRIN_INC_H "x86intrin.h") elseif (HAVE_C_INTRIN_H) set (INTRIN_INC_H "intrin.h") -else () +elseif (HAVE_C_ARM_NEON_H) + set (INTRIN_INC_H "arm_neon.h") + set (FAT_RUNTIME OFF) +else() message (FATAL_ERROR "No intrinsics header found") endif () @@ -29,15 +32,16 @@ else (NOT FAT_RUNTIME) set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} ${ARCH_C_FLAGS}") endif () -# ensure we have the minimum of SSSE3 - call a SSSE3 intrinsic -CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}> +if (ARCH_IA32 OR ARCH_X86_64) + # ensure we have the minimum of SSSE3 - call a SSSE3 intrinsic + CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}> int main() { __m128i a = _mm_set1_epi8(1); (void)_mm_shuffle_epi8(a, a); }" HAVE_SSSE3) -# now look for AVX2 -CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}> + # now look for AVX2 + CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}> #if !defined(__AVX2__) #error no avx2 #endif @@ -47,8 +51,8 @@ int main(){ (void)_mm256_xor_si256(z, z); }" HAVE_AVX2) -# and now for AVX512 -CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}> + # and now for AVX512 + CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}> #if !defined(__AVX512BW__) #error no avx512bw #endif @@ -58,8 +62,8 @@ int main(){ (void)_mm512_abs_epi8(z); }" HAVE_AVX512) -# and now for AVX512VBMI -CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}> + # and now for AVX512VBMI + CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}> #if !defined(__AVX512VBMI__) #error no avx512vbmi #endif @@ -70,26 +74,38 @@ int main(){ (void)_mm512_permutexvar_epi8(idx, a); }" HAVE_AVX512VBMI) +elseif (ARCH_ARM32 OR ARCH_AARCH64) + CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}> +int main() { + int32x4_t a = vdupq_n_s32(1); +}" HAVE_NEON) +else () + message (FATAL_ERROR "Unsupported architecture") +endif () + if (FAT_RUNTIME) - if (NOT HAVE_SSSE3) + if ((ARCH_IA32 OR ARCH_X86_64) AND NOT HAVE_SSSE3) message(FATAL_ERROR "SSSE3 support required to build fat runtime") endif () - if (NOT HAVE_AVX2) + if ((ARCH_IA32 OR ARCH_X86_64) AND NOT HAVE_AVX2) message(FATAL_ERROR "AVX2 support required to build fat runtime") endif () - if (BUILD_AVX512 AND NOT HAVE_AVX512) + if ((ARCH_IA32 OR ARCH_X86_64) AND BUILD_AVX512 AND NOT HAVE_AVX512) message(FATAL_ERROR "AVX512 support requested but not supported") endif () else (NOT FAT_RUNTIME) - if (NOT HAVE_AVX2) + if ((ARCH_IA32 OR ARCH_X86_64) AND NOT HAVE_AVX2) message(STATUS "Building without AVX2 support") endif () - if (NOT HAVE_AVX512) + if ((ARCH_IA32 OR ARCH_X86_64) AND NOT HAVE_AVX512) message(STATUS "Building without AVX512 support") endif () - if (NOT HAVE_SSSE3) + if ((ARCH_IA32 OR ARCH_X86_64) AND NOT HAVE_SSSE3) message(FATAL_ERROR "A minimum of SSSE3 compiler support is required") endif () + if ((ARCH_ARM32 OR ARCH_AARCH64) AND NOT HAVE_NEON) + message(FATAL_ERROR "NEON support required for ARM support") + endif () endif () unset (CMAKE_REQUIRED_FLAGS) diff --git a/cmake/config.h.in b/cmake/config.h.in index 203f0afd..2d2c78ce 100644 --- a/cmake/config.h.in +++ b/cmake/config.h.in @@ -15,6 +15,12 @@ /* "Define if building for EM64T" */ #cmakedefine ARCH_X86_64 +/* "Define if building for ARM32" */ +#cmakedefine ARCH_ARM32 + +/* "Define if building for AARCH64" */ +#cmakedefine ARCH_AARCH64 + /* internal build, switch on dump support. */ #cmakedefine DUMP_SUPPORT @@ -45,6 +51,9 @@ /* C compiler has intrin.h */ #cmakedefine HAVE_C_INTRIN_H +/* C compiler has arm_neon.h */ +#cmakedefine HAVE_C_ARM_NEON_H + /* Define to 1 if you have the declaration of `pthread_setaffinity_np', and to 0 if you don't. */ #cmakedefine HAVE_DECL_PTHREAD_SETAFFINITY_NP diff --git a/cmake/platform.cmake b/cmake/platform.cmake index 8c82da2b..4591bf93 100644 --- a/cmake/platform.cmake +++ b/cmake/platform.cmake @@ -5,10 +5,10 @@ CHECK_C_SOURCE_COMPILES("#if !(defined(__x86_64__) || defined(_M_X64))\n#error n CHECK_C_SOURCE_COMPILES("#if !(defined(__i386__) || defined(_M_IX86))\n#error not 32bit\n#endif\nint main(void) { return 0; }" ARCH_IA32) -CHECK_C_SOURCE_COMPILES("#if !defined(__aarch64__)\n#error not 64bit\n#endif\nint main(void) { return 0; }" ARCH_ARM64) +CHECK_C_SOURCE_COMPILES("#if !defined(__aarch64__)\n#error not 64bit\n#endif\nint main(void) { return 0; }" ARCH_AARCH64) CHECK_C_SOURCE_COMPILES("#if !(defined(__arm__) && !defined(__aarch64__))\n#error not 32bit\n#endif\nint main(void) { return 0; }" ARCH_ARM32) -if (DEFINED(ARCH_X86_64) OR DEFINED(ARCH_ARM64)) +if (DEFINED(ARCH_X86_64) OR DEFINED(ARCH_AARCH64)) set(ARCH_64_BIT TRUE) else() set(ARCH_32_BIT TRUE) diff --git a/src/util/intrinsics.h b/src/util/intrinsics.h index edc4f6ef..3e2afc22 100644 --- a/src/util/intrinsics.h +++ b/src/util/intrinsics.h @@ -45,6 +45,10 @@ # endif #endif +#if defined(HAVE_C_ARM_NEON_H) +# define USE_ARM_NEON_H +#endif + #ifdef __cplusplus # if defined(HAVE_CXX_INTRIN_H) # define USE_INTRIN_H @@ -59,6 +63,8 @@ #include #elif defined(USE_INTRIN_H) #include +#elif defined(USE_ARM_NEON_H) +#include #else #error no intrinsics file #endif