From e15ad9308aa552311333a9f18ee29f43d1e6c570 Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Tue, 19 Dec 2023 17:31:43 +0200 Subject: [PATCH 01/11] SIMDe on Clang needs SIMDE_NO_CHECK_IMMEDIATE_CONSTANT defined --- cmake/simde.cmake | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cmake/simde.cmake b/cmake/simde.cmake index 8cac2bdd..5a7335bb 100644 --- a/cmake/simde.cmake +++ b/cmake/simde.cmake @@ -6,6 +6,11 @@ if (SIMDE_SSE42_H_FOUND) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DVS_SIMDE_BACKEND") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DVS_SIMDE_BACKEND") + if (CMAKE_COMPILER_IS_CLANG) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DSIMDE_NO_CHECK_IMMEDIATE_CONSTANT") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSIMDE_NO_CHECK_IMMEDIATE_CONSTANT") + endif() + if (SIMDE_NATIVE) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DVS_SIMDE_NATIVE -DSIMDE_ENABLE_OPENMP -fopenmp-simd") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DVS_SIMDE_NATIVE -DSIMDE_ENABLE_OPENMP -fopenmp-simd") From c8ba7fa1d30abc175d828d31b1ec8b46fc853ce4 Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Tue, 19 Dec 2023 23:09:03 +0200 Subject: [PATCH 02/11] add missing pdep64 for common bitutils --- src/util/bitutils.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/util/bitutils.h b/src/util/bitutils.h index c67d5a85..8e9aae9c 100644 --- a/src/util/bitutils.h +++ b/src/util/bitutils.h @@ -78,6 +78,7 @@ #define rank_in_mask64_impl rank_in_mask64_impl_c #define pext32_impl pext32_impl_c #define pext64_impl pext64_impl_c +#define pdep64_impl pdep64_impl_c #endif static really_inline @@ -207,6 +208,11 @@ u64a pext64(u64a x, u64a mask) { return pext64_impl(x, mask); } +static really_inline +u64a pdep64(u64a x, u64a mask) { + return pdep64_impl(x, mask); +} + /* compilers don't reliably synthesize the 32-bit ANDN instruction here, * so we force its generation. */ From 8cba258e7f10c75e373cb213551e494b33012fbc Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Tue, 19 Dec 2023 23:15:27 +0200 Subject: [PATCH 03/11] add missing pdep64 for arm and ppc64le --- src/util/arch/arm/bitutils.h | 5 +++++ src/util/arch/ppc64el/bitutils.h | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/util/arch/arm/bitutils.h b/src/util/arch/arm/bitutils.h index 5ef5fbf4..04d001d3 100644 --- a/src/util/arch/arm/bitutils.h +++ b/src/util/arch/arm/bitutils.h @@ -203,6 +203,11 @@ u64a pext64_impl(u64a x, u64a mask) { return pext64_impl_c(x, mask); } +static really_inline +u64a pdep64_impl(u64a x, u64a mask) { + return pdep64_impl_c(x, mask); +} + /* compilers don't reliably synthesize the 32-bit ANDN instruction here, * so we force its generation. */ diff --git a/src/util/arch/ppc64el/bitutils.h b/src/util/arch/ppc64el/bitutils.h index 10c4869b..1741b09d 100644 --- a/src/util/arch/ppc64el/bitutils.h +++ b/src/util/arch/ppc64el/bitutils.h @@ -201,7 +201,7 @@ u64a pext64_impl(u64a x, u64a mask) { } static really_inline -u64a pdep64(u64a x, u64a mask) { +u64a pdep64_impl(u64a x, u64a mask) { return pdep64_impl_c(x, mask); } From 49e6fe15a281b8ba2ea16ab9bfcefc9c4c77c086 Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Wed, 20 Dec 2023 00:12:15 +0200 Subject: [PATCH 04/11] add missing pdep64 for x86 bitutils --- src/util/arch/x86/bitutils.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/util/arch/x86/bitutils.h b/src/util/arch/x86/bitutils.h index 5c15ee91..4141119a 100644 --- a/src/util/arch/x86/bitutils.h +++ b/src/util/arch/x86/bitutils.h @@ -282,7 +282,7 @@ u64a pext64_impl(u64a x, u64a mask) { #if defined(HAVE_BMI2) && defined(ARCH_64_BIT) static really_inline -u64a pdep64(u64a x, u64a mask) { +u64a pdep64_impl(u64a x, u64a mask) { return _pdep_u64(x, mask); } #endif From 1b915cfb938a7a86d3bb26244fd20abf2031c4df Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Wed, 20 Dec 2023 08:25:30 +0200 Subject: [PATCH 05/11] add fallback pdep64 for x86 if no HAVE_BMI2 --- src/util/arch/x86/bitutils.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/util/arch/x86/bitutils.h b/src/util/arch/x86/bitutils.h index 4141119a..485b6512 100644 --- a/src/util/arch/x86/bitutils.h +++ b/src/util/arch/x86/bitutils.h @@ -285,6 +285,11 @@ static really_inline u64a pdep64_impl(u64a x, u64a mask) { return _pdep_u64(x, mask); } +#else +static really_inline +u64a pdep64_impl(u64a x, u64a mask) { + return pdep64_impl_c(x, mask); +} #endif /* compilers don't reliably synthesize the 32-bit ANDN instruction here, From 2aa5e1c71026699e9057cd6c0398f0fe14840711 Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Wed, 20 Dec 2023 15:15:38 +0000 Subject: [PATCH 06/11] fix arch=native on arm+clang --- cmake/archdetect.cmake | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/cmake/archdetect.cmake b/cmake/archdetect.cmake index 494269c2..2d64e5cf 100644 --- a/cmake/archdetect.cmake +++ b/cmake/archdetect.cmake @@ -68,8 +68,23 @@ if (USE_CPU_NATIVE) endif() else() if (SIMDE_BACKEND) - set(GNUCC_ARCH native) - set(TUNE_FLAG native) + if (CMAKE_COMPILER_IS_CLANG) + if(ARCH_AARCH64) + if (CMAKE_C_COMPILER_VERSION VERSION_LESS "15.0") + set(GNUCC_ARCH native) + set(TUNE_FLAG native) + else() + set(GNUCC_ARCH armv8-a) + set(TUNE_FLAG generic) + endif() + else() + set(GNUCC_ARCH native) + set(TUNE_FLAG native) + endif() + else() + set(GNUCC_ARCH native) + set(TUNE_FLAG native) + endif() elseif (ARCH_IA32 OR ARCH_X86_64) set(GNUCC_ARCH native) set(TUNE_FLAG generic) From 44f19c10065bafc1d2bbdfb1e3da76cce3dd592a Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Wed, 20 Dec 2023 17:16:23 +0200 Subject: [PATCH 07/11] fix submodule headers detection --- cmake/simde.cmake | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cmake/simde.cmake b/cmake/simde.cmake index 5a7335bb..0ac52832 100644 --- a/cmake/simde.cmake +++ b/cmake/simde.cmake @@ -1,10 +1,11 @@ -include_directories(${PROJECT_SOURCE_DIR}/simde/simde) +LIST(APPEND CMAKE_REQUIRED_INCLUDES ${PROJECT_SOURCE_DIR}/simde) -CHECK_INCLUDE_FILES("simde/x86/sse4.2.h" SIMDE_SSE42_H_FOUND) +CHECK_INCLUDE_FILES(simde/x86/sse4.2.h SIMDE_SSE42_H_FOUND) if (SIMDE_SSE42_H_FOUND) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DVS_SIMDE_BACKEND") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DVS_SIMDE_BACKEND") + include_directories(${PROJECT_SOURCE_DIR}/simde) if (CMAKE_COMPILER_IS_CLANG) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DSIMDE_NO_CHECK_IMMEDIATE_CONSTANT") From a7a12844e751d92648d2bc988b650b2806732722 Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Wed, 20 Dec 2023 17:16:45 +0200 Subject: [PATCH 08/11] reorganize OS detection --- cmake/osdetection.cmake | 42 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/cmake/osdetection.cmake b/cmake/osdetection.cmake index 235487a9..343e16b5 100644 --- a/cmake/osdetection.cmake +++ b/cmake/osdetection.cmake @@ -7,28 +7,28 @@ if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") endif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") option(FAT_RUNTIME "Build a library that supports multiple microarchitectures" OFF) -message("Checking Fat Runtime Requirements...") -if (FAT_RUNTIME AND NOT LINUX) - message(FATAL_ERROR "Fat runtime is only supported on Linux OS") -endif() - -if (USE_CPU_NATIVE AND FAT_RUNTIME) - message(FATAL_ERROR "Fat runtime is not compatible with Native CPU detection") -endif() - -if (FAT_RUNTIME AND LINUX) - if (NOT (ARCH_IA32 OR ARCH_X86_64 OR ARCH_AARCH64)) - message(FATAL_ERROR "Fat runtime is only supported on Intel and Aarch64 architectures") +if (FAT_RUNTIME) + message("Checking Fat Runtime Requirements...") + if (NOT LINUX) + message(FATAL_ERROR "Fat runtime is only supported on Linux OS") else() - message(STATUS "Building Fat runtime for multiple microarchitectures") - message(STATUS "generator is ${CMAKE_GENERATOR}") - if (NOT (CMAKE_GENERATOR MATCHES "Unix Makefiles" OR - (CMAKE_VERSION VERSION_GREATER "3.0" AND CMAKE_GENERATOR MATCHES "Ninja"))) - message (FATAL_ERROR "Building the fat runtime requires the Unix Makefiles generator, or Ninja with CMake v3.0 or higher") + if (USE_CPU_NATIVE AND FAT_RUNTIME) + message(FATAL_ERROR "Fat runtime is not compatible with Native CPU detection") + endif() + + if (NOT (ARCH_IA32 OR ARCH_X86_64 OR ARCH_AARCH64)) + message(FATAL_ERROR "Fat runtime is only supported on Intel and Aarch64 architectures") else() - include (${CMAKE_MODULE_PATH}/attrib.cmake) - if (NOT HAS_C_ATTR_IFUNC) - message(FATAL_ERROR "Compiler does not support ifunc attribute, cannot build fat runtime") + message(STATUS "Building Fat runtime for multiple microarchitectures") + message(STATUS "generator is ${CMAKE_GENERATOR}") + if (NOT (CMAKE_GENERATOR MATCHES "Unix Makefiles" OR + (CMAKE_VERSION VERSION_GREATER "3.0" AND CMAKE_GENERATOR MATCHES "Ninja"))) + message (FATAL_ERROR "Building the fat runtime requires the Unix Makefiles generator, or Ninja with CMake v3.0 or higher") + else() + include (${CMAKE_MODULE_PATH}/attrib.cmake) + if (NOT HAS_C_ATTR_IFUNC) + message(FATAL_ERROR "Compiler does not support ifunc attribute, cannot build fat runtime") + endif() endif() endif() endif() @@ -36,5 +36,3 @@ if (FAT_RUNTIME AND LINUX) message(FATAL_ERROR "Fat runtime is only built on Release builds") endif() endif () - - From 306e8612be25a2a7634986d6a98ae69cc359359e Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Wed, 20 Dec 2023 15:27:56 +0000 Subject: [PATCH 09/11] GREATER_EQUAL --- cmake/archdetect.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/archdetect.cmake b/cmake/archdetect.cmake index 2d64e5cf..9dd5962a 100644 --- a/cmake/archdetect.cmake +++ b/cmake/archdetect.cmake @@ -70,7 +70,7 @@ else() if (SIMDE_BACKEND) if (CMAKE_COMPILER_IS_CLANG) if(ARCH_AARCH64) - if (CMAKE_C_COMPILER_VERSION VERSION_LESS "15.0") + if (CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL "15.0") set(GNUCC_ARCH native) set(TUNE_FLAG native) else() From ef37e6015ada07310b485ac59e0a33a5800006ec Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Wed, 20 Dec 2023 16:43:38 +0000 Subject: [PATCH 10/11] native CPU on SIMDe will enable all sorts of features in an unpredicted manner, set sane defaults --- cmake/archdetect.cmake | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/cmake/archdetect.cmake b/cmake/archdetect.cmake index 9dd5962a..b988064a 100644 --- a/cmake/archdetect.cmake +++ b/cmake/archdetect.cmake @@ -68,22 +68,21 @@ if (USE_CPU_NATIVE) endif() else() if (SIMDE_BACKEND) - if (CMAKE_COMPILER_IS_CLANG) - if(ARCH_AARCH64) - if (CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL "15.0") - set(GNUCC_ARCH native) - set(TUNE_FLAG native) - else() - set(GNUCC_ARCH armv8-a) - set(TUNE_FLAG generic) - endif() - else() - set(GNUCC_ARCH native) - set(TUNE_FLAG native) - endif() + if (ARCH_IA32 OR ARCH_X86_64) + set(GNUCC_ARCH x86_64_v2) + set(TUNE_FLAG generic) + elseif(ARCH_AARCH64) + set(GNUCC_ARCH armv8-a) + set(TUNE_FLAG generic) + elseif(ARCH_ARM32) + set(GNUCC_ARCH armv7a) + set(TUNE_FLAG generic) + elseif(ARCH_PPC64EL) + set(GNUCC_ARCH power8) + set(TUNE_FLAG power8) else() set(GNUCC_ARCH native) - set(TUNE_FLAG native) + set(TUNE_FLAG generic) endif() elseif (ARCH_IA32 OR ARCH_X86_64) set(GNUCC_ARCH native) From 10d957477a94a00e51f878deca0b0a3adb58ef0d Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Wed, 20 Dec 2023 22:21:00 +0200 Subject: [PATCH 11/11] fix typo in baseline x86 arch definition --- cmake/archdetect.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/archdetect.cmake b/cmake/archdetect.cmake index b988064a..bd0d088c 100644 --- a/cmake/archdetect.cmake +++ b/cmake/archdetect.cmake @@ -44,7 +44,7 @@ if (USE_CPU_NATIVE) endif() elseif (CMAKE_COMPILER_IS_CLANG) if (ARCH_IA32 OR ARCH_X86_64) - set(GNUCC_ARCH x86_64_v2) + set(GNUCC_ARCH x86-64-v2) set(TUNE_FLAG generic) elseif(ARCH_AARCH64) if (BUILD_SVE2_BITPERM) @@ -69,7 +69,7 @@ if (USE_CPU_NATIVE) else() if (SIMDE_BACKEND) if (ARCH_IA32 OR ARCH_X86_64) - set(GNUCC_ARCH x86_64_v2) + set(GNUCC_ARCH x86-64-v2) set(TUNE_FLAG generic) elseif(ARCH_AARCH64) set(GNUCC_ARCH armv8-a)