Merge pull request #212 from VectorCamp/bugfix/fix-simde-build

SIMDe on Clang needs SIMDE_NO_CHECK_IMMEDIATE_CONSTANT defined and other SIMDe related fixes now that SIMDe is part of the CI pipeline.

Some issue with SIMDe on x86 still remains because of an upstream bug:

https://github.com/simd-everywhere/simde/issues/1119

Similarly SIMDe native with clang on Arm also poses a non-high priority build failure:

https://buildbot-ci.vectorcamp.gr/#/builders/129/builds/11

Possibly a SIMDe issue as well, need to investigate but will merge this PR as these are non-blockers.
This commit is contained in:
Konstantinos Margaritis 2023-12-21 11:04:32 +02:00 committed by GitHub
commit 3113d1ca30
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 63 additions and 29 deletions

View File

@ -44,7 +44,7 @@ if (USE_CPU_NATIVE)
endif() endif()
elseif (CMAKE_COMPILER_IS_CLANG) elseif (CMAKE_COMPILER_IS_CLANG)
if (ARCH_IA32 OR ARCH_X86_64) if (ARCH_IA32 OR ARCH_X86_64)
set(GNUCC_ARCH x86_64_v2) set(GNUCC_ARCH x86-64-v2)
set(TUNE_FLAG generic) set(TUNE_FLAG generic)
elseif(ARCH_AARCH64) elseif(ARCH_AARCH64)
if (BUILD_SVE2_BITPERM) if (BUILD_SVE2_BITPERM)
@ -68,8 +68,22 @@ if (USE_CPU_NATIVE)
endif() endif()
else() else()
if (SIMDE_BACKEND) if (SIMDE_BACKEND)
if (ARCH_IA32 OR ARCH_X86_64)
set(GNUCC_ARCH x86-64-v2)
set(TUNE_FLAG generic)
elseif(ARCH_AARCH64)
set(GNUCC_ARCH armv8-a)
set(TUNE_FLAG generic)
elseif(ARCH_ARM32)
set(GNUCC_ARCH armv7a)
set(TUNE_FLAG generic)
elseif(ARCH_PPC64EL)
set(GNUCC_ARCH power8)
set(TUNE_FLAG power8)
else()
set(GNUCC_ARCH native) set(GNUCC_ARCH native)
set(TUNE_FLAG native) set(TUNE_FLAG generic)
endif()
elseif (ARCH_IA32 OR ARCH_X86_64) elseif (ARCH_IA32 OR ARCH_X86_64)
set(GNUCC_ARCH native) set(GNUCC_ARCH native)
set(TUNE_FLAG generic) set(TUNE_FLAG generic)

View File

@ -7,16 +7,15 @@ if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
endif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") endif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
option(FAT_RUNTIME "Build a library that supports multiple microarchitectures" OFF) option(FAT_RUNTIME "Build a library that supports multiple microarchitectures" OFF)
message("Checking Fat Runtime Requirements...") if (FAT_RUNTIME)
if (FAT_RUNTIME AND NOT LINUX) message("Checking Fat Runtime Requirements...")
if (NOT LINUX)
message(FATAL_ERROR "Fat runtime is only supported on Linux OS") message(FATAL_ERROR "Fat runtime is only supported on Linux OS")
endif() else()
if (USE_CPU_NATIVE AND FAT_RUNTIME)
if (USE_CPU_NATIVE AND FAT_RUNTIME)
message(FATAL_ERROR "Fat runtime is not compatible with Native CPU detection") message(FATAL_ERROR "Fat runtime is not compatible with Native CPU detection")
endif() endif()
if (FAT_RUNTIME AND LINUX)
if (NOT (ARCH_IA32 OR ARCH_X86_64 OR ARCH_AARCH64)) if (NOT (ARCH_IA32 OR ARCH_X86_64 OR ARCH_AARCH64))
message(FATAL_ERROR "Fat runtime is only supported on Intel and Aarch64 architectures") message(FATAL_ERROR "Fat runtime is only supported on Intel and Aarch64 architectures")
else() else()
@ -32,9 +31,8 @@ if (FAT_RUNTIME AND LINUX)
endif() endif()
endif() endif()
endif() endif()
endif()
if (NOT RELEASE_BUILD) if (NOT RELEASE_BUILD)
message(FATAL_ERROR "Fat runtime is only built on Release builds") message(FATAL_ERROR "Fat runtime is only built on Release builds")
endif() endif()
endif () endif ()

View File

@ -1,10 +1,16 @@
include_directories(${PROJECT_SOURCE_DIR}/simde/simde) LIST(APPEND CMAKE_REQUIRED_INCLUDES ${PROJECT_SOURCE_DIR}/simde)
CHECK_INCLUDE_FILES("simde/x86/sse4.2.h" SIMDE_SSE42_H_FOUND) CHECK_INCLUDE_FILES(simde/x86/sse4.2.h SIMDE_SSE42_H_FOUND)
if (SIMDE_SSE42_H_FOUND) if (SIMDE_SSE42_H_FOUND)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DVS_SIMDE_BACKEND") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DVS_SIMDE_BACKEND")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DVS_SIMDE_BACKEND") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DVS_SIMDE_BACKEND")
include_directories(${PROJECT_SOURCE_DIR}/simde)
if (CMAKE_COMPILER_IS_CLANG)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DSIMDE_NO_CHECK_IMMEDIATE_CONSTANT")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSIMDE_NO_CHECK_IMMEDIATE_CONSTANT")
endif()
if (SIMDE_NATIVE) if (SIMDE_NATIVE)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DVS_SIMDE_NATIVE -DSIMDE_ENABLE_OPENMP -fopenmp-simd") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DVS_SIMDE_NATIVE -DSIMDE_ENABLE_OPENMP -fopenmp-simd")

View File

@ -203,6 +203,11 @@ u64a pext64_impl(u64a x, u64a mask) {
return pext64_impl_c(x, mask); return pext64_impl_c(x, mask);
} }
static really_inline
u64a pdep64_impl(u64a x, u64a mask) {
return pdep64_impl_c(x, mask);
}
/* compilers don't reliably synthesize the 32-bit ANDN instruction here, /* compilers don't reliably synthesize the 32-bit ANDN instruction here,
* so we force its generation. * so we force its generation.
*/ */

View File

@ -201,7 +201,7 @@ u64a pext64_impl(u64a x, u64a mask) {
} }
static really_inline static really_inline
u64a pdep64(u64a x, u64a mask) { u64a pdep64_impl(u64a x, u64a mask) {
return pdep64_impl_c(x, mask); return pdep64_impl_c(x, mask);
} }

View File

@ -282,9 +282,14 @@ u64a pext64_impl(u64a x, u64a mask) {
#if defined(HAVE_BMI2) && defined(ARCH_64_BIT) #if defined(HAVE_BMI2) && defined(ARCH_64_BIT)
static really_inline static really_inline
u64a pdep64(u64a x, u64a mask) { u64a pdep64_impl(u64a x, u64a mask) {
return _pdep_u64(x, mask); return _pdep_u64(x, mask);
} }
#else
static really_inline
u64a pdep64_impl(u64a x, u64a mask) {
return pdep64_impl_c(x, mask);
}
#endif #endif
/* compilers don't reliably synthesize the 32-bit ANDN instruction here, /* compilers don't reliably synthesize the 32-bit ANDN instruction here,

View File

@ -78,6 +78,7 @@
#define rank_in_mask64_impl rank_in_mask64_impl_c #define rank_in_mask64_impl rank_in_mask64_impl_c
#define pext32_impl pext32_impl_c #define pext32_impl pext32_impl_c
#define pext64_impl pext64_impl_c #define pext64_impl pext64_impl_c
#define pdep64_impl pdep64_impl_c
#endif #endif
static really_inline static really_inline
@ -207,6 +208,11 @@ u64a pext64(u64a x, u64a mask) {
return pext64_impl(x, mask); return pext64_impl(x, mask);
} }
static really_inline
u64a pdep64(u64a x, u64a mask) {
return pdep64_impl(x, mask);
}
/* compilers don't reliably synthesize the 32-bit ANDN instruction here, /* compilers don't reliably synthesize the 32-bit ANDN instruction here,
* so we force its generation. * so we force its generation.
*/ */