diff --git a/CMakeLists.txt b/CMakeLists.txt index 9c58fd46..3485e5f8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -151,15 +151,16 @@ else () set(ICELAKE_FLAG "-march=icelake-server") endif () +if(ARCH_PPC64EL) + set(ARCH_FLAG mcpu) +else() + set(ARCH_FLAG march) +endif() + # Detect best GNUCC_ARCH to tune for if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE) message(STATUS "gcc version ${CMAKE_C_COMPILER_VERSION}") - if(ARCH_PPC64EL) - set(ARCH_FLAG mcpu) - else() - set(ARCH_FLAG march) - endif() # If gcc doesn't recognise the host cpu, then mtune=native becomes # generic, which isn't very good in some cases. march=native looks at # cpuid info and then chooses the best microarch it can (and replaces @@ -185,23 +186,12 @@ if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE) set(TUNE_FLAG native) else() set(TUNE_FLAG ${GNUCC_ARCH}) - message(STATUS "gcc will tune for ${GNUCC_ARCH}") + message(STATUS "gcc will tune for ${GNUCC_ARCH}, ${TUNE_FLAG}") endif() elseif (CMAKE_COMPILER_IS_CLANG AND NOT CROSS_COMPILE) - message(STATUS "clang will tune for ${TUNE_FLAG}") if (ARCH_IA32 OR ARCH_X86_64) set(GNUCC_ARCH native) set(TUNE_FLAG generic) - if (BUILD_AVX512) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SKYLAKE_FLAG}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SKYLAKE_FLAG}") - elseif (BUILD_AVX2) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx2") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx2") - else() - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.2") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2") - endif() elseif(ARCH_AARCH64) set(GNUCC_ARCH armv8) set(TUNE_FLAG generic) @@ -212,11 +202,30 @@ elseif (CMAKE_COMPILER_IS_CLANG AND NOT CROSS_COMPILE) set(GNUCC_ARCH native) set(TUNE_FLAG generic) endif() + message(STATUS "clang will tune for ${GNUCC_ARCH}, ${TUNE_FLAG}") elseif (CROSS_COMPILE) set(GNUCC_ARCH generic) set(TUNE_FLAG generic) endif() +if (ARCH_IA32 OR ARCH_X86_64) + if (NOT FAT_RUNTIME) + if (BUILD_AVX512) + set(ARCH_C_FLAGS "${SKYLAKE_FLAG}") + set(ARCH_CXX_FLAGS "${SKYLAKE_FLAG}") + elseif (BUILD_AVX2) + set(ARCH_C_FLAGS "-mavx2") + set(ARCH_CXX_FLAGS "-mavx2") + else() + set(ARCH_C_FLAGS "-msse4.2") + set(ARCH_CXX_FLAGS "-msse4.2") + endif() + else() + set(ARCH_C_FLAGS "-msse4.2") + set(ARCH_CXX_FLAGS "-msse4.2") + endif() +endif() + if (ARCH_AARCH64) if (BUILD_SVE2_BITPERM) set(GNUCC_ARCH "${GNUCC_ARCH}+sve2-bitperm") @@ -227,23 +236,26 @@ if (ARCH_AARCH64) endif () endif(ARCH_AARCH64) -if (ARCH_IA32 OR ARCH_X86_64 OR ARCH_ARM32 OR ARCH_AARCH64) - if (NOT CMAKE_C_FLAGS MATCHES .*march.* AND NOT CMAKE_C_FLAGS MATCHES .*mtune.*) - set(ARCH_C_FLAGS "-march=${GNUCC_ARCH} -mtune=${TUNE_FLAG}") - endif() - if (NOT CMAKE_CXX_FLAGS MATCHES .*march.* AND NOT CMAKE_CXX_FLAGS MATCHES .*mtune.*) - set(ARCH_CXX_FLAGS "-march=${GNUCC_ARCH} -mtune=${TUNE_FLAG}") - endif() -endif() +set(ARCH_C_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -mtune=${TUNE_FLAG} ${ARCH_C_FLAGS}") +set(ARCH_CXX_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -mtune=${TUNE_FLAG} ${ARCH_CXX_FLAGS}") + +#if (ARCH_IA32 OR ARCH_X86_64 OR ARCH_ARM32 OR ARCH_AARCH64) +# if (NOT CMAKE_C_FLAGS MATCHES .*march.* AND NOT CMAKE_C_FLAGS MATCHES .*mtune.*) +# set(ARCH_C_FLAGS "-march=${GNUCC_ARCH} -mtune=${TUNE_FLAG}") +# endif() +# if (NOT CMAKE_CXX_FLAGS MATCHES .*march.* AND NOT CMAKE_CXX_FLAGS MATCHES .*mtune.*) +# set(ARCH_CXX_FLAGS "-march=${GNUCC_ARCH} -mtune=${TUNE_FLAG}") +# endif() +#endif() -if(ARCH_PPC64EL) - if (NOT CMAKE_C_FLAGS MATCHES .*march.* AND NOT CMAKE_C_FLAGS MATCHES .*mtune.*) - set(ARCH_C_FLAGS "-mtune=${TUNE_FLAG}") - endif() - if (NOT CMAKE_CXX_FLAGS MATCHES .*march.* AND NOT CMAKE_CXX_FLAGS MATCHES .*mtune.*) - set(ARCH_CXX_FLAGS "-mtune=${TUNE_FLAG}") - endif() -endif() +#if(ARCH_PPC64EL) +# if (NOT CMAKE_C_FLAGS MATCHES .*march.* AND NOT CMAKE_C_FLAGS MATCHES .*mtune.*) +# set(ARCH_C_FLAGS "-mtune=${TUNE_FLAG}") +# endif() +# if (NOT CMAKE_CXX_FLAGS MATCHES .*march.* AND NOT CMAKE_CXX_FLAGS MATCHES .*mtune.*) +# set(ARCH_CXX_FLAGS "-mtune=${TUNE_FLAG}") +# endif() +#endif() # compiler version checks TODO: test more compilers if (CMAKE_COMPILER_IS_GNUCXX) @@ -306,7 +318,6 @@ if (NOT(ARCH_IA32 AND RELEASE_BUILD)) set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fno-omit-frame-pointer") endif() - CHECK_INCLUDE_FILES(unistd.h HAVE_UNISTD_H) if (ARCH_IA32 OR ARCH_X86_64) CHECK_INCLUDE_FILES(intrin.h HAVE_C_INTRIN_H) @@ -474,13 +485,12 @@ if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") set(FREEBSD true) endif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") + if (FAT_RUNTIME) if (NOT (ARCH_IA32 OR ARCH_X86_64)) message(FATAL_ERROR "Fat runtime is not supported on non-Intel architectures") else() message(STATUS "Building runtime for multiple microarchitectures") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") endif() else() if (CROSS_COMPILE) @@ -488,9 +498,9 @@ else() else() message(STATUS "Building for current host CPU: ${ARCH_C_FLAGS}") endif() - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARCH_C_FLAGS}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_CXX_FLAGS}") endif() +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARCH_C_FLAGS}") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_CXX_FLAGS}") add_subdirectory(util) add_subdirectory(doc/dev-reference) @@ -1207,10 +1217,6 @@ if (NOT FAT_RUNTIME) set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C) add_library(hs_compile OBJECT ${hs_compile_SRCS}) - if (ARCH_IA32) - set_target_properties(hs_compile PROPERTIES COMPILE_FLAGS "-msse4.2") - endif (ARCH_IA32) - add_library(hs STATIC src/hs_version.c src/hs_valid_platform.c @@ -1241,7 +1247,7 @@ else (FAT_RUNTIME) add_library(hs_exec_core2 OBJECT ${hs_exec_SRCS}) list(APPEND RUNTIME_LIBS $) set_target_properties(hs_exec_core2 PROPERTIES - COMPILE_FLAGS "-march=core2" + COMPILE_FLAGS "-march=core2 -msse4.2" RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} core2 ${CMAKE_MODULE_PATH}/keep.syms.in" ) @@ -1290,10 +1296,6 @@ else (FAT_RUNTIME) ${RUNTIME_LIBS}) set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C) add_library(hs_compile OBJECT ${hs_compile_SRCS}) - if (ARCH_IA32 OR ARCH_X86_64) - set_target_properties(hs_exec_common PROPERTIES COMPILE_FLAGS "-msse4.2") - set_target_properties(hs_compile PROPERTIES COMPILE_FLAGS "-msse4.2") - endif () # we want the static lib for testing add_library(hs STATIC src/hs_version.c src/hs_valid_platform.c @@ -1310,7 +1312,7 @@ else (FAT_RUNTIME) add_library(hs_exec_shared_core2 OBJECT ${hs_exec_SRCS}) list(APPEND RUNTIME_SHLIBS $) set_target_properties(hs_exec_shared_core2 PROPERTIES - COMPILE_FLAGS "-march=core2" + COMPILE_FLAGS "-march=core2 -msse4.2" POSITION_INDEPENDENT_CODE TRUE RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} core2 ${CMAKE_MODULE_PATH}/keep.syms.in" ) diff --git a/src/util/simd_types.h b/src/util/simd_types.h index 0deff7e5..4f0fd1a9 100644 --- a/src/util/simd_types.h +++ b/src/util/simd_types.h @@ -51,6 +51,7 @@ typedef struct ALIGN_AVX_DIRECTIVE {m128 lo; m128 hi;} m256; #endif typedef struct {m128 lo; m128 mid; m128 hi;} m384; + #if !defined(m512) && !defined(HAVE_SIMD_512_BITS) typedef struct ALIGN_ATTR(64) {m256 lo; m256 hi;} m512; #endif diff --git a/util/CMakeLists.txt b/util/CMakeLists.txt index 82cee0ff..ea942ef1 100644 --- a/util/CMakeLists.txt +++ b/util/CMakeLists.txt @@ -33,9 +33,6 @@ SET(corpusomatic_SRCS ng_find_matches.cpp ) add_library(corpusomatic STATIC ${corpusomatic_SRCS}) -if (ARCH_IA32 OR ARCH_X86_64) - set_target_properties(corpusomatic PROPERTIES COMPILE_FLAGS "-mssse3") -endif () set(databaseutil_SRCS database_util.cpp