From ee0c8f763fe7a7c7bc2f73e01630f55eadb997cc Mon Sep 17 00:00:00 2001 From: Alex Bondarev Date: Tue, 13 Sep 2022 18:21:10 +0300 Subject: [PATCH 01/13] fix to correctly place the autodetected flags and to activate SVE options --- CMakeLists.txt | 52 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 35 insertions(+), 17 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 57e986c9..1283dc88 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -156,6 +156,12 @@ else() set(ARCH_FLAG march) endif() +set(TUNE_FLAG "mtune") +set(CPU_FLAG "mcpu") +set(GNUCC_CPU "") +set(GNUCC_TUNE "") +message(STATUS "ARCH_FLAG '${ARCH_FLAG}' '${GNUCC_ARCH}', TUNE_FLAG '${TUNE_FLAG}' '${GNUCC_TUNE}' , CPU_FLAG '${CPU_FLAG}' '${GNUCC_CPU}'") + # Detect best GNUCC_ARCH to tune for if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE) message(STATUS "gcc version ${CMAKE_C_COMPILER_VERSION}") @@ -167,24 +173,47 @@ if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE) # arg1 might exist if using ccache string (STRIP "${CMAKE_C_COMPILER_ARG1}" CC_ARG1) - set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -${ARCH_FLAG}=native -mtune=native) + set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -${ARCH_FLAG}=native -mtune=native -mcpu=native) execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS} OUTPUT_VARIABLE _GCC_OUTPUT) + set(_GCC_OUTPUT_TUNE ${_GCC_OUTPUT}) + set(_GCC_OUTPUT_CPU ${_GCC_OUTPUT}) string(FIND "${_GCC_OUTPUT}" "${ARCH_FLAG}" POS) string(SUBSTRING "${_GCC_OUTPUT}" ${POS} -1 _GCC_OUTPUT) string(REGEX REPLACE "${ARCH_FLAG}=[ \t]*([^ \n]*)[ \n].*" "\\1" GNUCC_ARCH "${_GCC_OUTPUT}") + string(FIND "${_GCC_OUTPUT_TUNE}" "${TUNE_FLAG}" POS_TUNE) + string(SUBSTRING "${_GCC_OUTPUT_TUNE}" ${POS_TUNE} -1 _GCC_OUTPUT_TUNE) + string(REGEX REPLACE "${TUNE_FLAG}=[ \t]*([^ \n]*)[ \n].*" "\\1" GNUCC_TUNE "${_GCC_OUTPUT_TUNE}") + + string(FIND "${_GCC_OUTPUT_CPU}" "${CPU_FLAG}" POS_CPU) + string(SUBSTRING "${_GCC_OUTPUT_CPU}" ${POS_CPU} -1 _GCC_OUTPUT_CPU) + string(REGEX REPLACE "${CPU_FLAG}=[ \t]*([^ \n]*)[ \n].*" "\\1" GNUCC_CPU "${_GCC_OUTPUT_CPU}") + + string(FIND "${GNUCC_ARCH}" "sve" POS_SVE) + string(FIND "${GNUCC_ARCH}" "sve2" POS_SVE2) + string(FIND "${GNUCC_ARCH}" "sve2-bitperm" POS_SVE2_BITPERM) + if (NOT POS_SVE EQUAL 0) + set(BUILD_SVE 1) + elseif(NOT POS_SVE2 EQUAL 0) + set(BUILD_SVE2 1) + elseif(NOT POS_SVE2_BITPERM EQUAL 0) + set(BUILD_SVE2_BITPERM 1) + endif() + + message(STATUS "ARCH_FLAG '${ARCH_FLAG}' '${GNUCC_ARCH}', TUNE_FLAG '${TUNE_FLAG}' '${GNUCC_TUNE}' , CPU_FLAG '${CPU_FLAG}' '${GNUCC_CPU}'") + # test the parsed flag - set (EXEC_ARGS ${CC_ARG1} -E - -mtune=${GNUCC_ARCH}) + set (EXEC_ARGS ${CC_ARG1} -E - -${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE} -${CPU_FLAG}=${GNUCC_CPU}) execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS} OUTPUT_QUIET ERROR_QUIET INPUT_FILE /dev/null RESULT_VARIABLE GNUCC_TUNE_TEST) if (NOT GNUCC_TUNE_TEST EQUAL 0) message(WARNING "Something went wrong determining gcc tune: -mtune=${GNUCC_ARCH} not valid, falling back to -mtune=native") - set(TUNE_FLAG native) + set(GNUCC_TUNE native) else() - set(TUNE_FLAG ${GNUCC_ARCH}) + set(GNUCC_TUNE ${GNUCC_TUNE}) message(STATUS "gcc will tune for ${GNUCC_ARCH}, ${TUNE_FLAG}") endif() elseif (CMAKE_COMPILER_IS_CLANG AND NOT CROSS_COMPILE) @@ -225,23 +254,12 @@ if (ARCH_IA32 OR ARCH_X86_64) endif() endif() -if (ARCH_AARCH64) - if (BUILD_SVE2_BITPERM) - set(GNUCC_ARCH "${GNUCC_ARCH}+sve2-bitperm") - elseif (BUILD_SVE2) - set(GNUCC_ARCH "${GNUCC_ARCH}+sve2") - elseif (BUILD_SVE) - set(GNUCC_ARCH "${GNUCC_ARCH}+sve") - endif () -endif(ARCH_AARCH64) - - message(STATUS "ARCH_C_FLAGS : ${ARCH_C_FLAGS}") message(STATUS "ARCH_CXX_FLAGS : ${ARCH_CXX_FLAGS}") if (NOT FAT_RUNTIME) - set(ARCH_C_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -mtune=${TUNE_FLAG} ${ARCH_C_FLAGS}") - set(ARCH_CXX_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -mtune=${TUNE_FLAG} ${ARCH_CXX_FLAGS}") + set(ARCH_C_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE} -${CPU_FLAG}=${GNUCC_CPU} ${ARCH_C_FLAGS}") + set(ARCH_CXX_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE} -${CPU_FLAG}=${GNUCC_CPU} ${ARCH_CXX_FLAGS}") endif() #if (ARCH_IA32 OR ARCH_X86_64 OR ARCH_ARM32 OR ARCH_AARCH64) From 69e6176e0923fef57f97da91268c6bd83ae11120 Mon Sep 17 00:00:00 2001 From: Alex Bondarev Date: Tue, 13 Sep 2022 18:29:06 +0300 Subject: [PATCH 02/13] updated README to reflect CMake changes --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 8bc7aff6..f9d2708f 100644 --- a/README.md +++ b/README.md @@ -47,6 +47,8 @@ Vectorscan is typically used in a DPI library stack, just like Hyperscan. # Compiling for SVE +When compiling on AARCH64 machine with support for either of the SVE flags, it will be detected and applied automatically. + The following cmake variables can be set in order to target Arm's Scalable Vector Extension. They are listed in ascending order of strength, with cmake detecting whether the feature is available in the compiler and falling back to From d0a017da99947723d78d83576efbdd5ed2bb77f3 Mon Sep 17 00:00:00 2001 From: Alex Bondarev Date: Thu, 15 Sep 2022 18:38:01 +0300 Subject: [PATCH 03/13] removed cpu reference flags and fixed tune flag --- CMakeLists.txt | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1283dc88..86fd3b58 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -156,12 +156,6 @@ else() set(ARCH_FLAG march) endif() -set(TUNE_FLAG "mtune") -set(CPU_FLAG "mcpu") -set(GNUCC_CPU "") -set(GNUCC_TUNE "") -message(STATUS "ARCH_FLAG '${ARCH_FLAG}' '${GNUCC_ARCH}', TUNE_FLAG '${TUNE_FLAG}' '${GNUCC_TUNE}' , CPU_FLAG '${CPU_FLAG}' '${GNUCC_CPU}'") - # Detect best GNUCC_ARCH to tune for if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE) message(STATUS "gcc version ${CMAKE_C_COMPILER_VERSION}") @@ -171,25 +165,24 @@ if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE) # cpuid info and then chooses the best microarch it can (and replaces # the flag), so use that for tune. + set(TUNE_FLAG "mtune") + set(GNUCC_TUNE "") + message(STATUS "ARCH_FLAG '${ARCH_FLAG}' '${GNUCC_ARCH}', TUNE_FLAG '${TUNE_FLAG}' '${GNUCC_TUNE}' ") + # arg1 might exist if using ccache string (STRIP "${CMAKE_C_COMPILER_ARG1}" CC_ARG1) - set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -${ARCH_FLAG}=native -mtune=native -mcpu=native) + set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -${ARCH_FLAG}=native -mtune=native) execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS} OUTPUT_VARIABLE _GCC_OUTPUT) set(_GCC_OUTPUT_TUNE ${_GCC_OUTPUT}) - set(_GCC_OUTPUT_CPU ${_GCC_OUTPUT}) - string(FIND "${_GCC_OUTPUT}" "${ARCH_FLAG}" POS) + string(FIND "${_GCC_OUTPUT}" "${ARCH_FLAG}=" POS) string(SUBSTRING "${_GCC_OUTPUT}" ${POS} -1 _GCC_OUTPUT) string(REGEX REPLACE "${ARCH_FLAG}=[ \t]*([^ \n]*)[ \n].*" "\\1" GNUCC_ARCH "${_GCC_OUTPUT}") - string(FIND "${_GCC_OUTPUT_TUNE}" "${TUNE_FLAG}" POS_TUNE) + string(FIND "${_GCC_OUTPUT_TUNE}" "${TUNE_FLAG}=" POS_TUNE) string(SUBSTRING "${_GCC_OUTPUT_TUNE}" ${POS_TUNE} -1 _GCC_OUTPUT_TUNE) string(REGEX REPLACE "${TUNE_FLAG}=[ \t]*([^ \n]*)[ \n].*" "\\1" GNUCC_TUNE "${_GCC_OUTPUT_TUNE}") - string(FIND "${_GCC_OUTPUT_CPU}" "${CPU_FLAG}" POS_CPU) - string(SUBSTRING "${_GCC_OUTPUT_CPU}" ${POS_CPU} -1 _GCC_OUTPUT_CPU) - string(REGEX REPLACE "${CPU_FLAG}=[ \t]*([^ \n]*)[ \n].*" "\\1" GNUCC_CPU "${_GCC_OUTPUT_CPU}") - string(FIND "${GNUCC_ARCH}" "sve" POS_SVE) string(FIND "${GNUCC_ARCH}" "sve2" POS_SVE2) string(FIND "${GNUCC_ARCH}" "sve2-bitperm" POS_SVE2_BITPERM) @@ -201,7 +194,7 @@ if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE) set(BUILD_SVE2_BITPERM 1) endif() - message(STATUS "ARCH_FLAG '${ARCH_FLAG}' '${GNUCC_ARCH}', TUNE_FLAG '${TUNE_FLAG}' '${GNUCC_TUNE}' , CPU_FLAG '${CPU_FLAG}' '${GNUCC_CPU}'") + message(STATUS "ARCH_FLAG '${ARCH_FLAG}' '${GNUCC_ARCH}', TUNE_FLAG '${TUNE_FLAG}' '${GNUCC_TUNE}' ") # test the parsed flag set (EXEC_ARGS ${CC_ARG1} -E - -${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE} -${CPU_FLAG}=${GNUCC_CPU}) @@ -258,8 +251,8 @@ message(STATUS "ARCH_C_FLAGS : ${ARCH_C_FLAGS}") message(STATUS "ARCH_CXX_FLAGS : ${ARCH_CXX_FLAGS}") if (NOT FAT_RUNTIME) - set(ARCH_C_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE} -${CPU_FLAG}=${GNUCC_CPU} ${ARCH_C_FLAGS}") - set(ARCH_CXX_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE} -${CPU_FLAG}=${GNUCC_CPU} ${ARCH_CXX_FLAGS}") + set(ARCH_C_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE}") + set(ARCH_CXX_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE}") endif() #if (ARCH_IA32 OR ARCH_X86_64 OR ARCH_ARM32 OR ARCH_AARCH64) From 4ab0730dbe0950bbb51b8df2795d96701b735af1 Mon Sep 17 00:00:00 2001 From: Alex Bondarev Date: Fri, 16 Sep 2022 00:03:08 +0300 Subject: [PATCH 04/13] additional mcpu flag cleanup --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 86fd3b58..011bfec5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -197,7 +197,7 @@ if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE) message(STATUS "ARCH_FLAG '${ARCH_FLAG}' '${GNUCC_ARCH}', TUNE_FLAG '${TUNE_FLAG}' '${GNUCC_TUNE}' ") # test the parsed flag - set (EXEC_ARGS ${CC_ARG1} -E - -${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE} -${CPU_FLAG}=${GNUCC_CPU}) + set (EXEC_ARGS ${CC_ARG1} -E - -${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE}) execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS} OUTPUT_QUIET ERROR_QUIET INPUT_FILE /dev/null From ef66877e9e6db22cf273230e6b07840cf9373857 Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Fri, 16 Sep 2022 12:41:08 +0300 Subject: [PATCH 05/13] [VSX] clang complains about the order of __vector --- src/util/supervector/arch/ppc64el/impl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/util/supervector/arch/ppc64el/impl.cpp b/src/util/supervector/arch/ppc64el/impl.cpp index 2eba69b2..295cd128 100644 --- a/src/util/supervector/arch/ppc64el/impl.cpp +++ b/src/util/supervector/arch/ppc64el/impl.cpp @@ -49,7 +49,7 @@ really_inline SuperVector<16>::SuperVector(SuperVector const &other) template<> template<> -really_inline SuperVector<16>::SuperVector(char __bool __vector v) +really_inline SuperVector<16>::SuperVector(__vector __bool char v) { u.u8x16[0] = (uint8x16_t) v; }; From 3fc6c8a53273f29a01bdcd2e7d5e3d441371dbbc Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Fri, 16 Sep 2022 12:50:33 +0300 Subject: [PATCH 06/13] [VSX] movemask needs to be explicitly aligned on clang for vec_ste --- src/util/arch/ppc64el/simd_utils.h | 34 +++++++++++----------- src/util/supervector/arch/ppc64el/impl.cpp | 4 +-- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/util/arch/ppc64el/simd_utils.h b/src/util/arch/ppc64el/simd_utils.h index ea1766b2..119d0946 100644 --- a/src/util/arch/ppc64el/simd_utils.h +++ b/src/util/arch/ppc64el/simd_utils.h @@ -152,7 +152,7 @@ static really_inline u32 movemask128(m128 a) { static uint8x16_t perm = { 16, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; uint8x16_t bitmask = vec_gb((uint8x16_t) a); bitmask = (uint8x16_t) vec_perm(vec_splat_u8(0), bitmask, perm); - u32 movemask; + u32 ALIGN_ATTR(16) movemask; vec_ste((uint32x4_t) bitmask, 0, &movemask); return movemask; } @@ -285,27 +285,27 @@ m128 loadbytes128(const void *ptr, unsigned int n) { return a; } -#define CASE_ALIGN_VECTORS(a, b, offset) case offset: return (m128)vec_sld((int8x16_t)(b), (int8x16_t)(a), (16 - offset)); break; +#define CASE_ALIGN_VECTORS(a, b, offset) case offset: return (m128)vec_sld((int8x16_t)(a), (int8x16_t)(b), (16 - offset)); break; static really_really_inline m128 palignr_imm(m128 r, m128 l, int offset) { switch (offset) { case 0: return l; break; - CASE_ALIGN_VECTORS(l, r, 1); - CASE_ALIGN_VECTORS(l, r, 2); - CASE_ALIGN_VECTORS(l, r, 3); - CASE_ALIGN_VECTORS(l, r, 4); - CASE_ALIGN_VECTORS(l, r, 5); - CASE_ALIGN_VECTORS(l, r, 6); - CASE_ALIGN_VECTORS(l, r, 7); - CASE_ALIGN_VECTORS(l, r, 8); - CASE_ALIGN_VECTORS(l, r, 9); - CASE_ALIGN_VECTORS(l, r, 10); - CASE_ALIGN_VECTORS(l, r, 11); - CASE_ALIGN_VECTORS(l, r, 12); - CASE_ALIGN_VECTORS(l, r, 13); - CASE_ALIGN_VECTORS(l, r, 14); - CASE_ALIGN_VECTORS(l, r, 15); + CASE_ALIGN_VECTORS(r, l, 1); + CASE_ALIGN_VECTORS(r, l, 2); + CASE_ALIGN_VECTORS(r, l, 3); + CASE_ALIGN_VECTORS(r, l, 4); + CASE_ALIGN_VECTORS(r, l, 5); + CASE_ALIGN_VECTORS(r, l, 6); + CASE_ALIGN_VECTORS(r, l, 7); + CASE_ALIGN_VECTORS(r, l, 8); + CASE_ALIGN_VECTORS(r, l, 9); + CASE_ALIGN_VECTORS(r, l, 10); + CASE_ALIGN_VECTORS(r, l, 11); + CASE_ALIGN_VECTORS(r, l, 12); + CASE_ALIGN_VECTORS(r, l, 13); + CASE_ALIGN_VECTORS(r, l, 14); + CASE_ALIGN_VECTORS(r, l, 15); case 16: return r; break; default: return zeroes128(); break; } diff --git a/src/util/supervector/arch/ppc64el/impl.cpp b/src/util/supervector/arch/ppc64el/impl.cpp index 295cd128..494bcbd6 100644 --- a/src/util/supervector/arch/ppc64el/impl.cpp +++ b/src/util/supervector/arch/ppc64el/impl.cpp @@ -269,10 +269,10 @@ really_inline SuperVector<16> SuperVector<16>::eq(SuperVector<16> const &b) cons template <> really_inline typename SuperVector<16>::comparemask_type SuperVector<16>::comparemask(void) const { - uint8x16_t bitmask = vec_gb( u.u8x16[0]); static uint8x16_t perm = { 16, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + uint8x16_t bitmask = vec_gb(u.u8x16[0]); bitmask = (uint8x16_t) vec_perm(vec_splat_u8(0), bitmask, perm); - u32 movemask; + u32 ALIGN_ATTR(16) movemask; vec_ste((uint32x4_t) bitmask, 0, &movemask); return movemask; } From 6de45b464879b8126f98d42526e0fabb870e7e91 Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Fri, 16 Sep 2022 14:02:26 +0300 Subject: [PATCH 07/13] clang 14 complains about this, needs investigation --- src/rose/rose_build_add.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rose/rose_build_add.cpp b/src/rose/rose_build_add.cpp index dc9ee308..82f0e2e0 100644 --- a/src/rose/rose_build_add.cpp +++ b/src/rose/rose_build_add.cpp @@ -216,9 +216,9 @@ RoseRoleHistory selectHistory(const RoseBuildImpl &tbi, const RoseBuildData &bd, const bool fixed_offset_src = g[u].fixedOffset(); const bool has_bounds = g[e].minBound || (g[e].maxBound != ROSE_BOUND_INF); - DEBUG_PRINTF("edge %zu->%zu, bounds=[%u,%u], fixed_u=%d, prefix=%d\n", + /*DEBUG_PRINTF("edge %zu->%zu, bounds=[%u,%u], fixed_u=%d, prefix=%d\n", g[u].index, g[v].index, g[e].minBound, g[e].maxBound, - (int)g[u].fixedOffset(), (int)g[v].left); + (int)g[u].fixedOffset(), (int)g[v].left);*/ if (g[v].left) { // Roles with prefix engines have their history handled by that prefix. From 0e0147ec5c138c51673c7ddbfe3af88d852bbc33 Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Fri, 16 Sep 2022 14:02:53 +0300 Subject: [PATCH 08/13] clang 14 does not allow bitwise OR for bools --- src/nfagraph/ng_misc_opt.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/nfagraph/ng_misc_opt.cpp b/src/nfagraph/ng_misc_opt.cpp index d0f1f029..2b898cf7 100644 --- a/src/nfagraph/ng_misc_opt.cpp +++ b/src/nfagraph/ng_misc_opt.cpp @@ -385,8 +385,7 @@ bool improveGraph(NGHolder &g, som_type som) { const vector ordering = getTopoOrdering(g); - return enlargeCyclicCR(g, som, ordering) - | enlargeCyclicCR_rev(g, ordering); + return enlargeCyclicCR(g, som, ordering) || enlargeCyclicCR_rev(g, ordering); } /** finds a smaller reachability for a state by the reverse transformation of From a4972aa191ed8664c39e4fcc626e3ee66cbea4ca Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Fri, 16 Sep 2022 14:03:17 +0300 Subject: [PATCH 09/13] remove leftover debug print --- src/util/supervector/arch/x86/impl.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/util/supervector/arch/x86/impl.cpp b/src/util/supervector/arch/x86/impl.cpp index c9daf0cf..49fbee99 100644 --- a/src/util/supervector/arch/x86/impl.cpp +++ b/src/util/supervector/arch/x86/impl.cpp @@ -523,9 +523,7 @@ template <> really_inline SuperVector<16> SuperVector<16>::loadu_maskz(void const *ptr, uint8_t const len) { SuperVector mask = Ones_vshr(16 -len); - mask.print8("mask"); SuperVector v = _mm_loadu_si128((const m128 *)ptr); - v.print8("v"); return mask & v; } From 911a98d54f974fc9e80879b6859a3748df4efc86 Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Fri, 16 Sep 2022 14:04:59 +0300 Subject: [PATCH 10/13] clang 13+ gives wrong -Wunused-but-set-variable error on nfa/mcclellancompile.cpp about total_daddy variable, disabling --- CMakeLists.txt | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 57e986c9..5076f0a9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,5 @@ -cmake_minimum_required (VERSION 2.8.11) +cmake_minimum_required (VERSION 2.8.12) + project (vectorscan C CXX) set (HS_MAJOR_VERSION 5) @@ -296,6 +297,12 @@ if (NOT RELEASE_BUILD) # release builds set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Werror") set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Werror") + if (CMAKE_COMPILER_IS_CLANG) + if (CMAKE_C_COMPILER_VERSION VERSION_GREATER "13.0") + set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-unused-but-set-variable") + set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-unused-but-set-variable") + endif() + endif() endif() if (DISABLE_ASSERTS) From 48105cdd1de8b596f2c83dac6ad68741d3f6e7a4 Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Fri, 16 Sep 2022 14:05:31 +0300 Subject: [PATCH 11/13] move variable --- src/nfa/mcclellancompile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index 055920b2..d1afcbcc 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -1484,12 +1484,12 @@ bytecode_ptr mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, find_wide_state(info); } - u16 total_daddy = 0; bool any_cyclic_near_anchored_state = is_cyclic_near(raw, raw.start_anchored); // Sherman optimization if (info.impl_alpha_size > 16) { + u16 total_daddy = 0; for (u32 i = 0; i < info.size(); i++) { if (info.is_widestate(i)) { continue; From 90ac7463035fb1a19c78f7466651ae9fc8939c5a Mon Sep 17 00:00:00 2001 From: Alex Bondarev Date: Sun, 18 Sep 2022 12:04:05 +0300 Subject: [PATCH 12/13] SVE enabled on user input. updated README tune and arch flags will be applied from autodetect only if they have been created by the process, otherwise the old logical flow remains wrt the flags --- CMakeLists.txt | 31 +++++++++++++++++++++++-------- README.md | 2 -- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 011bfec5..b26fcc40 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -171,7 +171,7 @@ if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE) # arg1 might exist if using ccache string (STRIP "${CMAKE_C_COMPILER_ARG1}" CC_ARG1) - set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -${ARCH_FLAG}=native -mtune=native) + set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -${ARCH_FLAG}=native -${TUNE_FLAG}=native) execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS} OUTPUT_VARIABLE _GCC_OUTPUT) set(_GCC_OUTPUT_TUNE ${_GCC_OUTPUT}) @@ -187,11 +187,11 @@ if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE) string(FIND "${GNUCC_ARCH}" "sve2" POS_SVE2) string(FIND "${GNUCC_ARCH}" "sve2-bitperm" POS_SVE2_BITPERM) if (NOT POS_SVE EQUAL 0) - set(BUILD_SVE 1) + set(SVE_FOUND 1) elseif(NOT POS_SVE2 EQUAL 0) - set(BUILD_SVE2 1) + set(SVE2_FOUND 1) elseif(NOT POS_SVE2_BITPERM EQUAL 0) - set(BUILD_SVE2_BITPERM 1) + set(SVE2_BITPERM_FOUND 1) endif() message(STATUS "ARCH_FLAG '${ARCH_FLAG}' '${GNUCC_ARCH}', TUNE_FLAG '${TUNE_FLAG}' '${GNUCC_TUNE}' ") @@ -203,11 +203,11 @@ if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE) INPUT_FILE /dev/null RESULT_VARIABLE GNUCC_TUNE_TEST) if (NOT GNUCC_TUNE_TEST EQUAL 0) - message(WARNING "Something went wrong determining gcc tune: -mtune=${GNUCC_ARCH} not valid, falling back to -mtune=native") + message(WARNING "Something went wrong determining gcc tune: -mtune=${GNUCC_TUNE} not valid, falling back to -mtune=native") set(GNUCC_TUNE native) else() set(GNUCC_TUNE ${GNUCC_TUNE}) - message(STATUS "gcc will tune for ${GNUCC_ARCH}, ${TUNE_FLAG}") + message(STATUS "gcc will tune for ${GNUCC_ARCH}, ${GNUCC_TUNE}") endif() elseif (CMAKE_COMPILER_IS_CLANG AND NOT CROSS_COMPILE) if (ARCH_IA32 OR ARCH_X86_64) @@ -247,12 +247,27 @@ if (ARCH_IA32 OR ARCH_X86_64) endif() endif() +if (ARCH_AARCH64) + if (BUILD_SVE2_BITPERM AND NOT SVE2_BITPERM_FOUND) + set(GNUCC_ARCH "${GNUCC_ARCH}+sve2-bitperm") + elseif (BUILD_SVE2 AND NOT SVE2_FOUND) + set(GNUCC_ARCH "${GNUCC_ARCH}+sve2") + elseif (BUILD_SVE AND NOT SVE_FOUND) + set(GNUCC_ARCH "${GNUCC_ARCH}+sve") + endif () +endif(ARCH_AARCH64) + message(STATUS "ARCH_C_FLAGS : ${ARCH_C_FLAGS}") message(STATUS "ARCH_CXX_FLAGS : ${ARCH_CXX_FLAGS}") if (NOT FAT_RUNTIME) - set(ARCH_C_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE}") - set(ARCH_CXX_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE}") + if (GNUCC_TUNE) + set(ARCH_C_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE}") + set(ARCH_CXX_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE}") + else() + set(ARCH_C_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -mtune=${TUNE_FLAG} ${ARCH_C_FLAGS}") + set(ARCH_CXX_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -mtune=${TUNE_FLAG} ${ARCH_CXX_FLAGS}") + endif() endif() #if (ARCH_IA32 OR ARCH_X86_64 OR ARCH_ARM32 OR ARCH_AARCH64) diff --git a/README.md b/README.md index f9d2708f..8bc7aff6 100644 --- a/README.md +++ b/README.md @@ -47,8 +47,6 @@ Vectorscan is typically used in a DPI library stack, just like Hyperscan. # Compiling for SVE -When compiling on AARCH64 machine with support for either of the SVE flags, it will be detected and applied automatically. - The following cmake variables can be set in order to target Arm's Scalable Vector Extension. They are listed in ascending order of strength, with cmake detecting whether the feature is available in the compiler and falling back to From 7133ac5be1ea013857e6e3a50fe5e722ac14fff0 Mon Sep 17 00:00:00 2001 From: Alex Bondarev Date: Sun, 18 Sep 2022 19:42:45 +0300 Subject: [PATCH 13/13] clang SVE build fix --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index b26fcc40..66f96a07 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -358,6 +358,7 @@ if (ARCH_IA32 OR ARCH_X86_64) elseif (ARCH_ARM32 OR ARCH_AARCH64) CHECK_INCLUDE_FILE_CXX(arm_neon.h HAVE_C_ARM_NEON_H) if (BUILD_SVE OR BUILD_SVE2 OR BUILD_SVE2_BITPERM) + set(CMAKE_REQUIRED_FLAGS ${ARCH_CXX_FLAGS}) CHECK_INCLUDE_FILE_CXX(arm_sve.h HAVE_C_ARM_SVE_H) if (NOT HAVE_C_ARM_SVE_H) message(FATAL_ERROR "arm_sve.h is required to build for SVE.")