SVE enabled on user input. updated README

tune and arch flags will be applied from autodetect only if they have been created by the process, otherwise the old logical flow remains wrt the flags
This commit is contained in:
Alex Bondarev 2022-09-18 12:04:05 +03:00
parent 4ab0730dbe
commit 90ac746303
2 changed files with 23 additions and 10 deletions

View File

@ -171,7 +171,7 @@ if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE)
# arg1 might exist if using ccache
string (STRIP "${CMAKE_C_COMPILER_ARG1}" CC_ARG1)
set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -${ARCH_FLAG}=native -mtune=native)
set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -${ARCH_FLAG}=native -${TUNE_FLAG}=native)
execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS}
OUTPUT_VARIABLE _GCC_OUTPUT)
set(_GCC_OUTPUT_TUNE ${_GCC_OUTPUT})
@ -187,11 +187,11 @@ if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE)
string(FIND "${GNUCC_ARCH}" "sve2" POS_SVE2)
string(FIND "${GNUCC_ARCH}" "sve2-bitperm" POS_SVE2_BITPERM)
if (NOT POS_SVE EQUAL 0)
set(BUILD_SVE 1)
set(SVE_FOUND 1)
elseif(NOT POS_SVE2 EQUAL 0)
set(BUILD_SVE2 1)
set(SVE2_FOUND 1)
elseif(NOT POS_SVE2_BITPERM EQUAL 0)
set(BUILD_SVE2_BITPERM 1)
set(SVE2_BITPERM_FOUND 1)
endif()
message(STATUS "ARCH_FLAG '${ARCH_FLAG}' '${GNUCC_ARCH}', TUNE_FLAG '${TUNE_FLAG}' '${GNUCC_TUNE}' ")
@ -203,11 +203,11 @@ if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE)
INPUT_FILE /dev/null
RESULT_VARIABLE GNUCC_TUNE_TEST)
if (NOT GNUCC_TUNE_TEST EQUAL 0)
message(WARNING "Something went wrong determining gcc tune: -mtune=${GNUCC_ARCH} not valid, falling back to -mtune=native")
message(WARNING "Something went wrong determining gcc tune: -mtune=${GNUCC_TUNE} not valid, falling back to -mtune=native")
set(GNUCC_TUNE native)
else()
set(GNUCC_TUNE ${GNUCC_TUNE})
message(STATUS "gcc will tune for ${GNUCC_ARCH}, ${TUNE_FLAG}")
message(STATUS "gcc will tune for ${GNUCC_ARCH}, ${GNUCC_TUNE}")
endif()
elseif (CMAKE_COMPILER_IS_CLANG AND NOT CROSS_COMPILE)
if (ARCH_IA32 OR ARCH_X86_64)
@ -247,12 +247,27 @@ if (ARCH_IA32 OR ARCH_X86_64)
endif()
endif()
if (ARCH_AARCH64)
if (BUILD_SVE2_BITPERM AND NOT SVE2_BITPERM_FOUND)
set(GNUCC_ARCH "${GNUCC_ARCH}+sve2-bitperm")
elseif (BUILD_SVE2 AND NOT SVE2_FOUND)
set(GNUCC_ARCH "${GNUCC_ARCH}+sve2")
elseif (BUILD_SVE AND NOT SVE_FOUND)
set(GNUCC_ARCH "${GNUCC_ARCH}+sve")
endif ()
endif(ARCH_AARCH64)
message(STATUS "ARCH_C_FLAGS : ${ARCH_C_FLAGS}")
message(STATUS "ARCH_CXX_FLAGS : ${ARCH_CXX_FLAGS}")
if (NOT FAT_RUNTIME)
if (GNUCC_TUNE)
set(ARCH_C_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE}")
set(ARCH_CXX_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE}")
else()
set(ARCH_C_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -mtune=${TUNE_FLAG} ${ARCH_C_FLAGS}")
set(ARCH_CXX_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -mtune=${TUNE_FLAG} ${ARCH_CXX_FLAGS}")
endif()
endif()
#if (ARCH_IA32 OR ARCH_X86_64 OR ARCH_ARM32 OR ARCH_AARCH64)

View File

@ -47,8 +47,6 @@ Vectorscan is typically used in a DPI library stack, just like Hyperscan.
# Compiling for SVE
When compiling on AARCH64 machine with support for either of the SVE flags, it will be detected and applied automatically.
The following cmake variables can be set in order to target Arm's Scalable
Vector Extension. They are listed in ascending order of strength, with cmake
detecting whether the feature is available in the compiler and falling back to