From ee0c8f763fe7a7c7bc2f73e01630f55eadb997cc Mon Sep 17 00:00:00 2001 From: Alex Bondarev Date: Tue, 13 Sep 2022 18:21:10 +0300 Subject: [PATCH] fix to correctly place the autodetected flags and to activate SVE options --- CMakeLists.txt | 52 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 35 insertions(+), 17 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 57e986c9..1283dc88 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -156,6 +156,12 @@ else() set(ARCH_FLAG march) endif() +set(TUNE_FLAG "mtune") +set(CPU_FLAG "mcpu") +set(GNUCC_CPU "") +set(GNUCC_TUNE "") +message(STATUS "ARCH_FLAG '${ARCH_FLAG}' '${GNUCC_ARCH}', TUNE_FLAG '${TUNE_FLAG}' '${GNUCC_TUNE}' , CPU_FLAG '${CPU_FLAG}' '${GNUCC_CPU}'") + # Detect best GNUCC_ARCH to tune for if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE) message(STATUS "gcc version ${CMAKE_C_COMPILER_VERSION}") @@ -167,24 +173,47 @@ if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE) # arg1 might exist if using ccache string (STRIP "${CMAKE_C_COMPILER_ARG1}" CC_ARG1) - set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -${ARCH_FLAG}=native -mtune=native) + set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -${ARCH_FLAG}=native -mtune=native -mcpu=native) execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS} OUTPUT_VARIABLE _GCC_OUTPUT) + set(_GCC_OUTPUT_TUNE ${_GCC_OUTPUT}) + set(_GCC_OUTPUT_CPU ${_GCC_OUTPUT}) string(FIND "${_GCC_OUTPUT}" "${ARCH_FLAG}" POS) string(SUBSTRING "${_GCC_OUTPUT}" ${POS} -1 _GCC_OUTPUT) string(REGEX REPLACE "${ARCH_FLAG}=[ \t]*([^ \n]*)[ \n].*" "\\1" GNUCC_ARCH "${_GCC_OUTPUT}") + string(FIND "${_GCC_OUTPUT_TUNE}" "${TUNE_FLAG}" POS_TUNE) + string(SUBSTRING "${_GCC_OUTPUT_TUNE}" ${POS_TUNE} -1 _GCC_OUTPUT_TUNE) + string(REGEX REPLACE "${TUNE_FLAG}=[ \t]*([^ \n]*)[ \n].*" "\\1" GNUCC_TUNE "${_GCC_OUTPUT_TUNE}") + + string(FIND "${_GCC_OUTPUT_CPU}" "${CPU_FLAG}" POS_CPU) + string(SUBSTRING "${_GCC_OUTPUT_CPU}" ${POS_CPU} -1 _GCC_OUTPUT_CPU) + string(REGEX REPLACE "${CPU_FLAG}=[ \t]*([^ \n]*)[ \n].*" "\\1" GNUCC_CPU "${_GCC_OUTPUT_CPU}") + + string(FIND "${GNUCC_ARCH}" "sve" POS_SVE) + string(FIND "${GNUCC_ARCH}" "sve2" POS_SVE2) + string(FIND "${GNUCC_ARCH}" "sve2-bitperm" POS_SVE2_BITPERM) + if (NOT POS_SVE EQUAL 0) + set(BUILD_SVE 1) + elseif(NOT POS_SVE2 EQUAL 0) + set(BUILD_SVE2 1) + elseif(NOT POS_SVE2_BITPERM EQUAL 0) + set(BUILD_SVE2_BITPERM 1) + endif() + + message(STATUS "ARCH_FLAG '${ARCH_FLAG}' '${GNUCC_ARCH}', TUNE_FLAG '${TUNE_FLAG}' '${GNUCC_TUNE}' , CPU_FLAG '${CPU_FLAG}' '${GNUCC_CPU}'") + # test the parsed flag - set (EXEC_ARGS ${CC_ARG1} -E - -mtune=${GNUCC_ARCH}) + set (EXEC_ARGS ${CC_ARG1} -E - -${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE} -${CPU_FLAG}=${GNUCC_CPU}) execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS} OUTPUT_QUIET ERROR_QUIET INPUT_FILE /dev/null RESULT_VARIABLE GNUCC_TUNE_TEST) if (NOT GNUCC_TUNE_TEST EQUAL 0) message(WARNING "Something went wrong determining gcc tune: -mtune=${GNUCC_ARCH} not valid, falling back to -mtune=native") - set(TUNE_FLAG native) + set(GNUCC_TUNE native) else() - set(TUNE_FLAG ${GNUCC_ARCH}) + set(GNUCC_TUNE ${GNUCC_TUNE}) message(STATUS "gcc will tune for ${GNUCC_ARCH}, ${TUNE_FLAG}") endif() elseif (CMAKE_COMPILER_IS_CLANG AND NOT CROSS_COMPILE) @@ -225,23 +254,12 @@ if (ARCH_IA32 OR ARCH_X86_64) endif() endif() -if (ARCH_AARCH64) - if (BUILD_SVE2_BITPERM) - set(GNUCC_ARCH "${GNUCC_ARCH}+sve2-bitperm") - elseif (BUILD_SVE2) - set(GNUCC_ARCH "${GNUCC_ARCH}+sve2") - elseif (BUILD_SVE) - set(GNUCC_ARCH "${GNUCC_ARCH}+sve") - endif () -endif(ARCH_AARCH64) - - message(STATUS "ARCH_C_FLAGS : ${ARCH_C_FLAGS}") message(STATUS "ARCH_CXX_FLAGS : ${ARCH_CXX_FLAGS}") if (NOT FAT_RUNTIME) - set(ARCH_C_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -mtune=${TUNE_FLAG} ${ARCH_C_FLAGS}") - set(ARCH_CXX_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -mtune=${TUNE_FLAG} ${ARCH_CXX_FLAGS}") + set(ARCH_C_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE} -${CPU_FLAG}=${GNUCC_CPU} ${ARCH_C_FLAGS}") + set(ARCH_CXX_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE} -${CPU_FLAG}=${GNUCC_CPU} ${ARCH_CXX_FLAGS}") endif() #if (ARCH_IA32 OR ARCH_X86_64 OR ARCH_ARM32 OR ARCH_AARCH64)