From ee0c8f763fe7a7c7bc2f73e01630f55eadb997cc Mon Sep 17 00:00:00 2001 From: Alex Bondarev Date: Tue, 13 Sep 2022 18:21:10 +0300 Subject: [PATCH 1/6] fix to correctly place the autodetected flags and to activate SVE options --- CMakeLists.txt | 52 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 35 insertions(+), 17 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 57e986c9..1283dc88 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -156,6 +156,12 @@ else() set(ARCH_FLAG march) endif() +set(TUNE_FLAG "mtune") +set(CPU_FLAG "mcpu") +set(GNUCC_CPU "") +set(GNUCC_TUNE "") +message(STATUS "ARCH_FLAG '${ARCH_FLAG}' '${GNUCC_ARCH}', TUNE_FLAG '${TUNE_FLAG}' '${GNUCC_TUNE}' , CPU_FLAG '${CPU_FLAG}' '${GNUCC_CPU}'") + # Detect best GNUCC_ARCH to tune for if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE) message(STATUS "gcc version ${CMAKE_C_COMPILER_VERSION}") @@ -167,24 +173,47 @@ if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE) # arg1 might exist if using ccache string (STRIP "${CMAKE_C_COMPILER_ARG1}" CC_ARG1) - set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -${ARCH_FLAG}=native -mtune=native) + set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -${ARCH_FLAG}=native -mtune=native -mcpu=native) execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS} OUTPUT_VARIABLE _GCC_OUTPUT) + set(_GCC_OUTPUT_TUNE ${_GCC_OUTPUT}) + set(_GCC_OUTPUT_CPU ${_GCC_OUTPUT}) string(FIND "${_GCC_OUTPUT}" "${ARCH_FLAG}" POS) string(SUBSTRING "${_GCC_OUTPUT}" ${POS} -1 _GCC_OUTPUT) string(REGEX REPLACE "${ARCH_FLAG}=[ \t]*([^ \n]*)[ \n].*" "\\1" GNUCC_ARCH "${_GCC_OUTPUT}") + string(FIND "${_GCC_OUTPUT_TUNE}" "${TUNE_FLAG}" POS_TUNE) + string(SUBSTRING "${_GCC_OUTPUT_TUNE}" ${POS_TUNE} -1 _GCC_OUTPUT_TUNE) + string(REGEX REPLACE "${TUNE_FLAG}=[ \t]*([^ \n]*)[ \n].*" "\\1" GNUCC_TUNE "${_GCC_OUTPUT_TUNE}") + + string(FIND "${_GCC_OUTPUT_CPU}" "${CPU_FLAG}" POS_CPU) + string(SUBSTRING "${_GCC_OUTPUT_CPU}" ${POS_CPU} -1 _GCC_OUTPUT_CPU) + string(REGEX REPLACE "${CPU_FLAG}=[ \t]*([^ \n]*)[ \n].*" "\\1" GNUCC_CPU "${_GCC_OUTPUT_CPU}") + + string(FIND "${GNUCC_ARCH}" "sve" POS_SVE) + string(FIND "${GNUCC_ARCH}" "sve2" POS_SVE2) + string(FIND "${GNUCC_ARCH}" "sve2-bitperm" POS_SVE2_BITPERM) + if (NOT POS_SVE EQUAL 0) + set(BUILD_SVE 1) + elseif(NOT POS_SVE2 EQUAL 0) + set(BUILD_SVE2 1) + elseif(NOT POS_SVE2_BITPERM EQUAL 0) + set(BUILD_SVE2_BITPERM 1) + endif() + + message(STATUS "ARCH_FLAG '${ARCH_FLAG}' '${GNUCC_ARCH}', TUNE_FLAG '${TUNE_FLAG}' '${GNUCC_TUNE}' , CPU_FLAG '${CPU_FLAG}' '${GNUCC_CPU}'") + # test the parsed flag - set (EXEC_ARGS ${CC_ARG1} -E - -mtune=${GNUCC_ARCH}) + set (EXEC_ARGS ${CC_ARG1} -E - -${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE} -${CPU_FLAG}=${GNUCC_CPU}) execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS} OUTPUT_QUIET ERROR_QUIET INPUT_FILE /dev/null RESULT_VARIABLE GNUCC_TUNE_TEST) if (NOT GNUCC_TUNE_TEST EQUAL 0) message(WARNING "Something went wrong determining gcc tune: -mtune=${GNUCC_ARCH} not valid, falling back to -mtune=native") - set(TUNE_FLAG native) + set(GNUCC_TUNE native) else() - set(TUNE_FLAG ${GNUCC_ARCH}) + set(GNUCC_TUNE ${GNUCC_TUNE}) message(STATUS "gcc will tune for ${GNUCC_ARCH}, ${TUNE_FLAG}") endif() elseif (CMAKE_COMPILER_IS_CLANG AND NOT CROSS_COMPILE) @@ -225,23 +254,12 @@ if (ARCH_IA32 OR ARCH_X86_64) endif() endif() -if (ARCH_AARCH64) - if (BUILD_SVE2_BITPERM) - set(GNUCC_ARCH "${GNUCC_ARCH}+sve2-bitperm") - elseif (BUILD_SVE2) - set(GNUCC_ARCH "${GNUCC_ARCH}+sve2") - elseif (BUILD_SVE) - set(GNUCC_ARCH "${GNUCC_ARCH}+sve") - endif () -endif(ARCH_AARCH64) - - message(STATUS "ARCH_C_FLAGS : ${ARCH_C_FLAGS}") message(STATUS "ARCH_CXX_FLAGS : ${ARCH_CXX_FLAGS}") if (NOT FAT_RUNTIME) - set(ARCH_C_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -mtune=${TUNE_FLAG} ${ARCH_C_FLAGS}") - set(ARCH_CXX_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -mtune=${TUNE_FLAG} ${ARCH_CXX_FLAGS}") + set(ARCH_C_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE} -${CPU_FLAG}=${GNUCC_CPU} ${ARCH_C_FLAGS}") + set(ARCH_CXX_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE} -${CPU_FLAG}=${GNUCC_CPU} ${ARCH_CXX_FLAGS}") endif() #if (ARCH_IA32 OR ARCH_X86_64 OR ARCH_ARM32 OR ARCH_AARCH64) From 69e6176e0923fef57f97da91268c6bd83ae11120 Mon Sep 17 00:00:00 2001 From: Alex Bondarev Date: Tue, 13 Sep 2022 18:29:06 +0300 Subject: [PATCH 2/6] updated README to reflect CMake changes --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 8bc7aff6..f9d2708f 100644 --- a/README.md +++ b/README.md @@ -47,6 +47,8 @@ Vectorscan is typically used in a DPI library stack, just like Hyperscan. # Compiling for SVE +When compiling on AARCH64 machine with support for either of the SVE flags, it will be detected and applied automatically. + The following cmake variables can be set in order to target Arm's Scalable Vector Extension. They are listed in ascending order of strength, with cmake detecting whether the feature is available in the compiler and falling back to From d0a017da99947723d78d83576efbdd5ed2bb77f3 Mon Sep 17 00:00:00 2001 From: Alex Bondarev Date: Thu, 15 Sep 2022 18:38:01 +0300 Subject: [PATCH 3/6] removed cpu reference flags and fixed tune flag --- CMakeLists.txt | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1283dc88..86fd3b58 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -156,12 +156,6 @@ else() set(ARCH_FLAG march) endif() -set(TUNE_FLAG "mtune") -set(CPU_FLAG "mcpu") -set(GNUCC_CPU "") -set(GNUCC_TUNE "") -message(STATUS "ARCH_FLAG '${ARCH_FLAG}' '${GNUCC_ARCH}', TUNE_FLAG '${TUNE_FLAG}' '${GNUCC_TUNE}' , CPU_FLAG '${CPU_FLAG}' '${GNUCC_CPU}'") - # Detect best GNUCC_ARCH to tune for if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE) message(STATUS "gcc version ${CMAKE_C_COMPILER_VERSION}") @@ -171,25 +165,24 @@ if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE) # cpuid info and then chooses the best microarch it can (and replaces # the flag), so use that for tune. + set(TUNE_FLAG "mtune") + set(GNUCC_TUNE "") + message(STATUS "ARCH_FLAG '${ARCH_FLAG}' '${GNUCC_ARCH}', TUNE_FLAG '${TUNE_FLAG}' '${GNUCC_TUNE}' ") + # arg1 might exist if using ccache string (STRIP "${CMAKE_C_COMPILER_ARG1}" CC_ARG1) - set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -${ARCH_FLAG}=native -mtune=native -mcpu=native) + set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -${ARCH_FLAG}=native -mtune=native) execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS} OUTPUT_VARIABLE _GCC_OUTPUT) set(_GCC_OUTPUT_TUNE ${_GCC_OUTPUT}) - set(_GCC_OUTPUT_CPU ${_GCC_OUTPUT}) - string(FIND "${_GCC_OUTPUT}" "${ARCH_FLAG}" POS) + string(FIND "${_GCC_OUTPUT}" "${ARCH_FLAG}=" POS) string(SUBSTRING "${_GCC_OUTPUT}" ${POS} -1 _GCC_OUTPUT) string(REGEX REPLACE "${ARCH_FLAG}=[ \t]*([^ \n]*)[ \n].*" "\\1" GNUCC_ARCH "${_GCC_OUTPUT}") - string(FIND "${_GCC_OUTPUT_TUNE}" "${TUNE_FLAG}" POS_TUNE) + string(FIND "${_GCC_OUTPUT_TUNE}" "${TUNE_FLAG}=" POS_TUNE) string(SUBSTRING "${_GCC_OUTPUT_TUNE}" ${POS_TUNE} -1 _GCC_OUTPUT_TUNE) string(REGEX REPLACE "${TUNE_FLAG}=[ \t]*([^ \n]*)[ \n].*" "\\1" GNUCC_TUNE "${_GCC_OUTPUT_TUNE}") - string(FIND "${_GCC_OUTPUT_CPU}" "${CPU_FLAG}" POS_CPU) - string(SUBSTRING "${_GCC_OUTPUT_CPU}" ${POS_CPU} -1 _GCC_OUTPUT_CPU) - string(REGEX REPLACE "${CPU_FLAG}=[ \t]*([^ \n]*)[ \n].*" "\\1" GNUCC_CPU "${_GCC_OUTPUT_CPU}") - string(FIND "${GNUCC_ARCH}" "sve" POS_SVE) string(FIND "${GNUCC_ARCH}" "sve2" POS_SVE2) string(FIND "${GNUCC_ARCH}" "sve2-bitperm" POS_SVE2_BITPERM) @@ -201,7 +194,7 @@ if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE) set(BUILD_SVE2_BITPERM 1) endif() - message(STATUS "ARCH_FLAG '${ARCH_FLAG}' '${GNUCC_ARCH}', TUNE_FLAG '${TUNE_FLAG}' '${GNUCC_TUNE}' , CPU_FLAG '${CPU_FLAG}' '${GNUCC_CPU}'") + message(STATUS "ARCH_FLAG '${ARCH_FLAG}' '${GNUCC_ARCH}', TUNE_FLAG '${TUNE_FLAG}' '${GNUCC_TUNE}' ") # test the parsed flag set (EXEC_ARGS ${CC_ARG1} -E - -${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE} -${CPU_FLAG}=${GNUCC_CPU}) @@ -258,8 +251,8 @@ message(STATUS "ARCH_C_FLAGS : ${ARCH_C_FLAGS}") message(STATUS "ARCH_CXX_FLAGS : ${ARCH_CXX_FLAGS}") if (NOT FAT_RUNTIME) - set(ARCH_C_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE} -${CPU_FLAG}=${GNUCC_CPU} ${ARCH_C_FLAGS}") - set(ARCH_CXX_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE} -${CPU_FLAG}=${GNUCC_CPU} ${ARCH_CXX_FLAGS}") + set(ARCH_C_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE}") + set(ARCH_CXX_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE}") endif() #if (ARCH_IA32 OR ARCH_X86_64 OR ARCH_ARM32 OR ARCH_AARCH64) From 4ab0730dbe0950bbb51b8df2795d96701b735af1 Mon Sep 17 00:00:00 2001 From: Alex Bondarev Date: Fri, 16 Sep 2022 00:03:08 +0300 Subject: [PATCH 4/6] additional mcpu flag cleanup --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 86fd3b58..011bfec5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -197,7 +197,7 @@ if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE) message(STATUS "ARCH_FLAG '${ARCH_FLAG}' '${GNUCC_ARCH}', TUNE_FLAG '${TUNE_FLAG}' '${GNUCC_TUNE}' ") # test the parsed flag - set (EXEC_ARGS ${CC_ARG1} -E - -${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE} -${CPU_FLAG}=${GNUCC_CPU}) + set (EXEC_ARGS ${CC_ARG1} -E - -${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE}) execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS} OUTPUT_QUIET ERROR_QUIET INPUT_FILE /dev/null From 90ac7463035fb1a19c78f7466651ae9fc8939c5a Mon Sep 17 00:00:00 2001 From: Alex Bondarev Date: Sun, 18 Sep 2022 12:04:05 +0300 Subject: [PATCH 5/6] SVE enabled on user input. updated README tune and arch flags will be applied from autodetect only if they have been created by the process, otherwise the old logical flow remains wrt the flags --- CMakeLists.txt | 31 +++++++++++++++++++++++-------- README.md | 2 -- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 011bfec5..b26fcc40 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -171,7 +171,7 @@ if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE) # arg1 might exist if using ccache string (STRIP "${CMAKE_C_COMPILER_ARG1}" CC_ARG1) - set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -${ARCH_FLAG}=native -mtune=native) + set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -${ARCH_FLAG}=native -${TUNE_FLAG}=native) execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS} OUTPUT_VARIABLE _GCC_OUTPUT) set(_GCC_OUTPUT_TUNE ${_GCC_OUTPUT}) @@ -187,11 +187,11 @@ if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE) string(FIND "${GNUCC_ARCH}" "sve2" POS_SVE2) string(FIND "${GNUCC_ARCH}" "sve2-bitperm" POS_SVE2_BITPERM) if (NOT POS_SVE EQUAL 0) - set(BUILD_SVE 1) + set(SVE_FOUND 1) elseif(NOT POS_SVE2 EQUAL 0) - set(BUILD_SVE2 1) + set(SVE2_FOUND 1) elseif(NOT POS_SVE2_BITPERM EQUAL 0) - set(BUILD_SVE2_BITPERM 1) + set(SVE2_BITPERM_FOUND 1) endif() message(STATUS "ARCH_FLAG '${ARCH_FLAG}' '${GNUCC_ARCH}', TUNE_FLAG '${TUNE_FLAG}' '${GNUCC_TUNE}' ") @@ -203,11 +203,11 @@ if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE) INPUT_FILE /dev/null RESULT_VARIABLE GNUCC_TUNE_TEST) if (NOT GNUCC_TUNE_TEST EQUAL 0) - message(WARNING "Something went wrong determining gcc tune: -mtune=${GNUCC_ARCH} not valid, falling back to -mtune=native") + message(WARNING "Something went wrong determining gcc tune: -mtune=${GNUCC_TUNE} not valid, falling back to -mtune=native") set(GNUCC_TUNE native) else() set(GNUCC_TUNE ${GNUCC_TUNE}) - message(STATUS "gcc will tune for ${GNUCC_ARCH}, ${TUNE_FLAG}") + message(STATUS "gcc will tune for ${GNUCC_ARCH}, ${GNUCC_TUNE}") endif() elseif (CMAKE_COMPILER_IS_CLANG AND NOT CROSS_COMPILE) if (ARCH_IA32 OR ARCH_X86_64) @@ -247,12 +247,27 @@ if (ARCH_IA32 OR ARCH_X86_64) endif() endif() +if (ARCH_AARCH64) + if (BUILD_SVE2_BITPERM AND NOT SVE2_BITPERM_FOUND) + set(GNUCC_ARCH "${GNUCC_ARCH}+sve2-bitperm") + elseif (BUILD_SVE2 AND NOT SVE2_FOUND) + set(GNUCC_ARCH "${GNUCC_ARCH}+sve2") + elseif (BUILD_SVE AND NOT SVE_FOUND) + set(GNUCC_ARCH "${GNUCC_ARCH}+sve") + endif () +endif(ARCH_AARCH64) + message(STATUS "ARCH_C_FLAGS : ${ARCH_C_FLAGS}") message(STATUS "ARCH_CXX_FLAGS : ${ARCH_CXX_FLAGS}") if (NOT FAT_RUNTIME) - set(ARCH_C_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE}") - set(ARCH_CXX_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE}") + if (GNUCC_TUNE) + set(ARCH_C_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE}") + set(ARCH_CXX_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE}") + else() + set(ARCH_C_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -mtune=${TUNE_FLAG} ${ARCH_C_FLAGS}") + set(ARCH_CXX_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -mtune=${TUNE_FLAG} ${ARCH_CXX_FLAGS}") + endif() endif() #if (ARCH_IA32 OR ARCH_X86_64 OR ARCH_ARM32 OR ARCH_AARCH64) diff --git a/README.md b/README.md index f9d2708f..8bc7aff6 100644 --- a/README.md +++ b/README.md @@ -47,8 +47,6 @@ Vectorscan is typically used in a DPI library stack, just like Hyperscan. # Compiling for SVE -When compiling on AARCH64 machine with support for either of the SVE flags, it will be detected and applied automatically. - The following cmake variables can be set in order to target Arm's Scalable Vector Extension. They are listed in ascending order of strength, with cmake detecting whether the feature is available in the compiler and falling back to From 7133ac5be1ea013857e6e3a50fe5e722ac14fff0 Mon Sep 17 00:00:00 2001 From: Alex Bondarev Date: Sun, 18 Sep 2022 19:42:45 +0300 Subject: [PATCH 6/6] clang SVE build fix --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index b26fcc40..66f96a07 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -358,6 +358,7 @@ if (ARCH_IA32 OR ARCH_X86_64) elseif (ARCH_ARM32 OR ARCH_AARCH64) CHECK_INCLUDE_FILE_CXX(arm_neon.h HAVE_C_ARM_NEON_H) if (BUILD_SVE OR BUILD_SVE2 OR BUILD_SVE2_BITPERM) + set(CMAKE_REQUIRED_FLAGS ${ARCH_CXX_FLAGS}) CHECK_INCLUDE_FILE_CXX(arm_sve.h HAVE_C_ARM_SVE_H) if (NOT HAVE_C_ARM_SVE_H) message(FATAL_ERROR "arm_sve.h is required to build for SVE.")