From 779cbbc1c1976df64c65bf8c58217b0b6b54fe48 Mon Sep 17 00:00:00 2001 From: Alex Bondarev Date: Tue, 13 Sep 2022 18:21:10 +0300 Subject: [PATCH 01/13] fix to correctly place the autodetected flags and to activate SVE options --- CMakeLists.txt | 52 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 35 insertions(+), 17 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 57e986c9..1283dc88 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -156,6 +156,12 @@ else() set(ARCH_FLAG march) endif() +set(TUNE_FLAG "mtune") +set(CPU_FLAG "mcpu") +set(GNUCC_CPU "") +set(GNUCC_TUNE "") +message(STATUS "ARCH_FLAG '${ARCH_FLAG}' '${GNUCC_ARCH}', TUNE_FLAG '${TUNE_FLAG}' '${GNUCC_TUNE}' , CPU_FLAG '${CPU_FLAG}' '${GNUCC_CPU}'") + # Detect best GNUCC_ARCH to tune for if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE) message(STATUS "gcc version ${CMAKE_C_COMPILER_VERSION}") @@ -167,24 +173,47 @@ if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE) # arg1 might exist if using ccache string (STRIP "${CMAKE_C_COMPILER_ARG1}" CC_ARG1) - set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -${ARCH_FLAG}=native -mtune=native) + set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -${ARCH_FLAG}=native -mtune=native -mcpu=native) execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS} OUTPUT_VARIABLE _GCC_OUTPUT) + set(_GCC_OUTPUT_TUNE ${_GCC_OUTPUT}) + set(_GCC_OUTPUT_CPU ${_GCC_OUTPUT}) string(FIND "${_GCC_OUTPUT}" "${ARCH_FLAG}" POS) string(SUBSTRING "${_GCC_OUTPUT}" ${POS} -1 _GCC_OUTPUT) string(REGEX REPLACE "${ARCH_FLAG}=[ \t]*([^ \n]*)[ \n].*" "\\1" GNUCC_ARCH "${_GCC_OUTPUT}") + string(FIND "${_GCC_OUTPUT_TUNE}" "${TUNE_FLAG}" POS_TUNE) + string(SUBSTRING "${_GCC_OUTPUT_TUNE}" ${POS_TUNE} -1 _GCC_OUTPUT_TUNE) + string(REGEX REPLACE "${TUNE_FLAG}=[ \t]*([^ \n]*)[ \n].*" "\\1" GNUCC_TUNE "${_GCC_OUTPUT_TUNE}") + + string(FIND "${_GCC_OUTPUT_CPU}" "${CPU_FLAG}" POS_CPU) + string(SUBSTRING "${_GCC_OUTPUT_CPU}" ${POS_CPU} -1 _GCC_OUTPUT_CPU) + string(REGEX REPLACE "${CPU_FLAG}=[ \t]*([^ \n]*)[ \n].*" "\\1" GNUCC_CPU "${_GCC_OUTPUT_CPU}") + + string(FIND "${GNUCC_ARCH}" "sve" POS_SVE) + string(FIND "${GNUCC_ARCH}" "sve2" POS_SVE2) + string(FIND "${GNUCC_ARCH}" "sve2-bitperm" POS_SVE2_BITPERM) + if (NOT POS_SVE EQUAL 0) + set(BUILD_SVE 1) + elseif(NOT POS_SVE2 EQUAL 0) + set(BUILD_SVE2 1) + elseif(NOT POS_SVE2_BITPERM EQUAL 0) + set(BUILD_SVE2_BITPERM 1) + endif() + + message(STATUS "ARCH_FLAG '${ARCH_FLAG}' '${GNUCC_ARCH}', TUNE_FLAG '${TUNE_FLAG}' '${GNUCC_TUNE}' , CPU_FLAG '${CPU_FLAG}' '${GNUCC_CPU}'") + # test the parsed flag - set (EXEC_ARGS ${CC_ARG1} -E - -mtune=${GNUCC_ARCH}) + set (EXEC_ARGS ${CC_ARG1} -E - -${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE} -${CPU_FLAG}=${GNUCC_CPU}) execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS} OUTPUT_QUIET ERROR_QUIET INPUT_FILE /dev/null RESULT_VARIABLE GNUCC_TUNE_TEST) if (NOT GNUCC_TUNE_TEST EQUAL 0) message(WARNING "Something went wrong determining gcc tune: -mtune=${GNUCC_ARCH} not valid, falling back to -mtune=native") - set(TUNE_FLAG native) + set(GNUCC_TUNE native) else() - set(TUNE_FLAG ${GNUCC_ARCH}) + set(GNUCC_TUNE ${GNUCC_TUNE}) message(STATUS "gcc will tune for ${GNUCC_ARCH}, ${TUNE_FLAG}") endif() elseif (CMAKE_COMPILER_IS_CLANG AND NOT CROSS_COMPILE) @@ -225,23 +254,12 @@ if (ARCH_IA32 OR ARCH_X86_64) endif() endif() -if (ARCH_AARCH64) - if (BUILD_SVE2_BITPERM) - set(GNUCC_ARCH "${GNUCC_ARCH}+sve2-bitperm") - elseif (BUILD_SVE2) - set(GNUCC_ARCH "${GNUCC_ARCH}+sve2") - elseif (BUILD_SVE) - set(GNUCC_ARCH "${GNUCC_ARCH}+sve") - endif () -endif(ARCH_AARCH64) - - message(STATUS "ARCH_C_FLAGS : ${ARCH_C_FLAGS}") message(STATUS "ARCH_CXX_FLAGS : ${ARCH_CXX_FLAGS}") if (NOT FAT_RUNTIME) - set(ARCH_C_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -mtune=${TUNE_FLAG} ${ARCH_C_FLAGS}") - set(ARCH_CXX_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -mtune=${TUNE_FLAG} ${ARCH_CXX_FLAGS}") + set(ARCH_C_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE} -${CPU_FLAG}=${GNUCC_CPU} ${ARCH_C_FLAGS}") + set(ARCH_CXX_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE} -${CPU_FLAG}=${GNUCC_CPU} ${ARCH_CXX_FLAGS}") endif() #if (ARCH_IA32 OR ARCH_X86_64 OR ARCH_ARM32 OR ARCH_AARCH64) From 1368a72d55ee1ed6ce9d5f8d0dc1d56d7a5e54a6 Mon Sep 17 00:00:00 2001 From: Alex Bondarev Date: Tue, 13 Sep 2022 18:29:06 +0300 Subject: [PATCH 02/13] updated README to reflect CMake changes --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 8bc7aff6..f9d2708f 100644 --- a/README.md +++ b/README.md @@ -47,6 +47,8 @@ Vectorscan is typically used in a DPI library stack, just like Hyperscan. # Compiling for SVE +When compiling on AARCH64 machine with support for either of the SVE flags, it will be detected and applied automatically. + The following cmake variables can be set in order to target Arm's Scalable Vector Extension. They are listed in ascending order of strength, with cmake detecting whether the feature is available in the compiler and falling back to From e9530145c32cc99dda4be9ce6bdd34560740a8cc Mon Sep 17 00:00:00 2001 From: Alex Bondarev Date: Thu, 15 Sep 2022 18:38:01 +0300 Subject: [PATCH 03/13] removed cpu reference flags and fixed tune flag --- CMakeLists.txt | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1283dc88..86fd3b58 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -156,12 +156,6 @@ else() set(ARCH_FLAG march) endif() -set(TUNE_FLAG "mtune") -set(CPU_FLAG "mcpu") -set(GNUCC_CPU "") -set(GNUCC_TUNE "") -message(STATUS "ARCH_FLAG '${ARCH_FLAG}' '${GNUCC_ARCH}', TUNE_FLAG '${TUNE_FLAG}' '${GNUCC_TUNE}' , CPU_FLAG '${CPU_FLAG}' '${GNUCC_CPU}'") - # Detect best GNUCC_ARCH to tune for if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE) message(STATUS "gcc version ${CMAKE_C_COMPILER_VERSION}") @@ -171,25 +165,24 @@ if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE) # cpuid info and then chooses the best microarch it can (and replaces # the flag), so use that for tune. + set(TUNE_FLAG "mtune") + set(GNUCC_TUNE "") + message(STATUS "ARCH_FLAG '${ARCH_FLAG}' '${GNUCC_ARCH}', TUNE_FLAG '${TUNE_FLAG}' '${GNUCC_TUNE}' ") + # arg1 might exist if using ccache string (STRIP "${CMAKE_C_COMPILER_ARG1}" CC_ARG1) - set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -${ARCH_FLAG}=native -mtune=native -mcpu=native) + set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -${ARCH_FLAG}=native -mtune=native) execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS} OUTPUT_VARIABLE _GCC_OUTPUT) set(_GCC_OUTPUT_TUNE ${_GCC_OUTPUT}) - set(_GCC_OUTPUT_CPU ${_GCC_OUTPUT}) - string(FIND "${_GCC_OUTPUT}" "${ARCH_FLAG}" POS) + string(FIND "${_GCC_OUTPUT}" "${ARCH_FLAG}=" POS) string(SUBSTRING "${_GCC_OUTPUT}" ${POS} -1 _GCC_OUTPUT) string(REGEX REPLACE "${ARCH_FLAG}=[ \t]*([^ \n]*)[ \n].*" "\\1" GNUCC_ARCH "${_GCC_OUTPUT}") - string(FIND "${_GCC_OUTPUT_TUNE}" "${TUNE_FLAG}" POS_TUNE) + string(FIND "${_GCC_OUTPUT_TUNE}" "${TUNE_FLAG}=" POS_TUNE) string(SUBSTRING "${_GCC_OUTPUT_TUNE}" ${POS_TUNE} -1 _GCC_OUTPUT_TUNE) string(REGEX REPLACE "${TUNE_FLAG}=[ \t]*([^ \n]*)[ \n].*" "\\1" GNUCC_TUNE "${_GCC_OUTPUT_TUNE}") - string(FIND "${_GCC_OUTPUT_CPU}" "${CPU_FLAG}" POS_CPU) - string(SUBSTRING "${_GCC_OUTPUT_CPU}" ${POS_CPU} -1 _GCC_OUTPUT_CPU) - string(REGEX REPLACE "${CPU_FLAG}=[ \t]*([^ \n]*)[ \n].*" "\\1" GNUCC_CPU "${_GCC_OUTPUT_CPU}") - string(FIND "${GNUCC_ARCH}" "sve" POS_SVE) string(FIND "${GNUCC_ARCH}" "sve2" POS_SVE2) string(FIND "${GNUCC_ARCH}" "sve2-bitperm" POS_SVE2_BITPERM) @@ -201,7 +194,7 @@ if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE) set(BUILD_SVE2_BITPERM 1) endif() - message(STATUS "ARCH_FLAG '${ARCH_FLAG}' '${GNUCC_ARCH}', TUNE_FLAG '${TUNE_FLAG}' '${GNUCC_TUNE}' , CPU_FLAG '${CPU_FLAG}' '${GNUCC_CPU}'") + message(STATUS "ARCH_FLAG '${ARCH_FLAG}' '${GNUCC_ARCH}', TUNE_FLAG '${TUNE_FLAG}' '${GNUCC_TUNE}' ") # test the parsed flag set (EXEC_ARGS ${CC_ARG1} -E - -${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE} -${CPU_FLAG}=${GNUCC_CPU}) @@ -258,8 +251,8 @@ message(STATUS "ARCH_C_FLAGS : ${ARCH_C_FLAGS}") message(STATUS "ARCH_CXX_FLAGS : ${ARCH_CXX_FLAGS}") if (NOT FAT_RUNTIME) - set(ARCH_C_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE} -${CPU_FLAG}=${GNUCC_CPU} ${ARCH_C_FLAGS}") - set(ARCH_CXX_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE} -${CPU_FLAG}=${GNUCC_CPU} ${ARCH_CXX_FLAGS}") + set(ARCH_C_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE}") + set(ARCH_CXX_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE}") endif() #if (ARCH_IA32 OR ARCH_X86_64 OR ARCH_ARM32 OR ARCH_AARCH64) From 17858d4da3424cb667c6438fb6b0798f390c3e9f Mon Sep 17 00:00:00 2001 From: Alex Bondarev Date: Fri, 16 Sep 2022 00:03:08 +0300 Subject: [PATCH 04/13] additional mcpu flag cleanup --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 86fd3b58..011bfec5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -197,7 +197,7 @@ if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE) message(STATUS "ARCH_FLAG '${ARCH_FLAG}' '${GNUCC_ARCH}', TUNE_FLAG '${TUNE_FLAG}' '${GNUCC_TUNE}' ") # test the parsed flag - set (EXEC_ARGS ${CC_ARG1} -E - -${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE} -${CPU_FLAG}=${GNUCC_CPU}) + set (EXEC_ARGS ${CC_ARG1} -E - -${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE}) execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS} OUTPUT_QUIET ERROR_QUIET INPUT_FILE /dev/null From dfdb44a924c94b6c889d09cc61281717ce6c7fd0 Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Fri, 16 Sep 2022 12:41:08 +0300 Subject: [PATCH 05/13] [VSX] clang complains about the order of __vector --- src/util/supervector/arch/ppc64el/impl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/util/supervector/arch/ppc64el/impl.cpp b/src/util/supervector/arch/ppc64el/impl.cpp index 2eba69b2..295cd128 100644 --- a/src/util/supervector/arch/ppc64el/impl.cpp +++ b/src/util/supervector/arch/ppc64el/impl.cpp @@ -49,7 +49,7 @@ really_inline SuperVector<16>::SuperVector(SuperVector const &other) template<> template<> -really_inline SuperVector<16>::SuperVector(char __bool __vector v) +really_inline SuperVector<16>::SuperVector(__vector __bool char v) { u.u8x16[0] = (uint8x16_t) v; }; From 8a6add2fb62fb31435632b67da4c525b7b8f3469 Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Fri, 16 Sep 2022 12:50:33 +0300 Subject: [PATCH 06/13] [VSX] movemask needs to be explicitly aligned on clang for vec_ste --- src/util/arch/ppc64el/simd_utils.h | 34 +++++++++++----------- src/util/supervector/arch/ppc64el/impl.cpp | 4 +-- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/util/arch/ppc64el/simd_utils.h b/src/util/arch/ppc64el/simd_utils.h index ea1766b2..119d0946 100644 --- a/src/util/arch/ppc64el/simd_utils.h +++ b/src/util/arch/ppc64el/simd_utils.h @@ -152,7 +152,7 @@ static really_inline u32 movemask128(m128 a) { static uint8x16_t perm = { 16, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; uint8x16_t bitmask = vec_gb((uint8x16_t) a); bitmask = (uint8x16_t) vec_perm(vec_splat_u8(0), bitmask, perm); - u32 movemask; + u32 ALIGN_ATTR(16) movemask; vec_ste((uint32x4_t) bitmask, 0, &movemask); return movemask; } @@ -285,27 +285,27 @@ m128 loadbytes128(const void *ptr, unsigned int n) { return a; } -#define CASE_ALIGN_VECTORS(a, b, offset) case offset: return (m128)vec_sld((int8x16_t)(b), (int8x16_t)(a), (16 - offset)); break; +#define CASE_ALIGN_VECTORS(a, b, offset) case offset: return (m128)vec_sld((int8x16_t)(a), (int8x16_t)(b), (16 - offset)); break; static really_really_inline m128 palignr_imm(m128 r, m128 l, int offset) { switch (offset) { case 0: return l; break; - CASE_ALIGN_VECTORS(l, r, 1); - CASE_ALIGN_VECTORS(l, r, 2); - CASE_ALIGN_VECTORS(l, r, 3); - CASE_ALIGN_VECTORS(l, r, 4); - CASE_ALIGN_VECTORS(l, r, 5); - CASE_ALIGN_VECTORS(l, r, 6); - CASE_ALIGN_VECTORS(l, r, 7); - CASE_ALIGN_VECTORS(l, r, 8); - CASE_ALIGN_VECTORS(l, r, 9); - CASE_ALIGN_VECTORS(l, r, 10); - CASE_ALIGN_VECTORS(l, r, 11); - CASE_ALIGN_VECTORS(l, r, 12); - CASE_ALIGN_VECTORS(l, r, 13); - CASE_ALIGN_VECTORS(l, r, 14); - CASE_ALIGN_VECTORS(l, r, 15); + CASE_ALIGN_VECTORS(r, l, 1); + CASE_ALIGN_VECTORS(r, l, 2); + CASE_ALIGN_VECTORS(r, l, 3); + CASE_ALIGN_VECTORS(r, l, 4); + CASE_ALIGN_VECTORS(r, l, 5); + CASE_ALIGN_VECTORS(r, l, 6); + CASE_ALIGN_VECTORS(r, l, 7); + CASE_ALIGN_VECTORS(r, l, 8); + CASE_ALIGN_VECTORS(r, l, 9); + CASE_ALIGN_VECTORS(r, l, 10); + CASE_ALIGN_VECTORS(r, l, 11); + CASE_ALIGN_VECTORS(r, l, 12); + CASE_ALIGN_VECTORS(r, l, 13); + CASE_ALIGN_VECTORS(r, l, 14); + CASE_ALIGN_VECTORS(r, l, 15); case 16: return r; break; default: return zeroes128(); break; } diff --git a/src/util/supervector/arch/ppc64el/impl.cpp b/src/util/supervector/arch/ppc64el/impl.cpp index 295cd128..494bcbd6 100644 --- a/src/util/supervector/arch/ppc64el/impl.cpp +++ b/src/util/supervector/arch/ppc64el/impl.cpp @@ -269,10 +269,10 @@ really_inline SuperVector<16> SuperVector<16>::eq(SuperVector<16> const &b) cons template <> really_inline typename SuperVector<16>::comparemask_type SuperVector<16>::comparemask(void) const { - uint8x16_t bitmask = vec_gb( u.u8x16[0]); static uint8x16_t perm = { 16, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + uint8x16_t bitmask = vec_gb(u.u8x16[0]); bitmask = (uint8x16_t) vec_perm(vec_splat_u8(0), bitmask, perm); - u32 movemask; + u32 ALIGN_ATTR(16) movemask; vec_ste((uint32x4_t) bitmask, 0, &movemask); return movemask; } From 7a21c218ee7680f4a068c7fccc17f3ac03a7af80 Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Fri, 16 Sep 2022 14:02:26 +0300 Subject: [PATCH 07/13] clang 14 complains about this, needs investigation --- src/rose/rose_build_add.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rose/rose_build_add.cpp b/src/rose/rose_build_add.cpp index dc9ee308..82f0e2e0 100644 --- a/src/rose/rose_build_add.cpp +++ b/src/rose/rose_build_add.cpp @@ -216,9 +216,9 @@ RoseRoleHistory selectHistory(const RoseBuildImpl &tbi, const RoseBuildData &bd, const bool fixed_offset_src = g[u].fixedOffset(); const bool has_bounds = g[e].minBound || (g[e].maxBound != ROSE_BOUND_INF); - DEBUG_PRINTF("edge %zu->%zu, bounds=[%u,%u], fixed_u=%d, prefix=%d\n", + /*DEBUG_PRINTF("edge %zu->%zu, bounds=[%u,%u], fixed_u=%d, prefix=%d\n", g[u].index, g[v].index, g[e].minBound, g[e].maxBound, - (int)g[u].fixedOffset(), (int)g[v].left); + (int)g[u].fixedOffset(), (int)g[v].left);*/ if (g[v].left) { // Roles with prefix engines have their history handled by that prefix. From 03c53cc065f427dcea2288543b4cfbcba2b063c9 Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Fri, 16 Sep 2022 14:02:53 +0300 Subject: [PATCH 08/13] clang 14 does not allow bitwise OR for bools --- src/nfagraph/ng_misc_opt.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/nfagraph/ng_misc_opt.cpp b/src/nfagraph/ng_misc_opt.cpp index d0f1f029..2b898cf7 100644 --- a/src/nfagraph/ng_misc_opt.cpp +++ b/src/nfagraph/ng_misc_opt.cpp @@ -385,8 +385,7 @@ bool improveGraph(NGHolder &g, som_type som) { const vector ordering = getTopoOrdering(g); - return enlargeCyclicCR(g, som, ordering) - | enlargeCyclicCR_rev(g, ordering); + return enlargeCyclicCR(g, som, ordering) || enlargeCyclicCR_rev(g, ordering); } /** finds a smaller reachability for a state by the reverse transformation of From 700ef1a26516ebc185a79de2941db9d304998601 Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Fri, 16 Sep 2022 14:03:17 +0300 Subject: [PATCH 09/13] remove leftover debug print --- src/util/supervector/arch/x86/impl.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/util/supervector/arch/x86/impl.cpp b/src/util/supervector/arch/x86/impl.cpp index c9daf0cf..49fbee99 100644 --- a/src/util/supervector/arch/x86/impl.cpp +++ b/src/util/supervector/arch/x86/impl.cpp @@ -523,9 +523,7 @@ template <> really_inline SuperVector<16> SuperVector<16>::loadu_maskz(void const *ptr, uint8_t const len) { SuperVector mask = Ones_vshr(16 -len); - mask.print8("mask"); SuperVector v = _mm_loadu_si128((const m128 *)ptr); - v.print8("v"); return mask & v; } From 257cf1e31143df149eddd434c00eca6e9d8d5e25 Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Fri, 16 Sep 2022 14:04:59 +0300 Subject: [PATCH 10/13] clang 13+ gives wrong -Wunused-but-set-variable error on nfa/mcclellancompile.cpp about total_daddy variable, disabling --- CMakeLists.txt | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 57e986c9..5076f0a9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,5 @@ -cmake_minimum_required (VERSION 2.8.11) +cmake_minimum_required (VERSION 2.8.12) + project (vectorscan C CXX) set (HS_MAJOR_VERSION 5) @@ -296,6 +297,12 @@ if (NOT RELEASE_BUILD) # release builds set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Werror") set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Werror") + if (CMAKE_COMPILER_IS_CLANG) + if (CMAKE_C_COMPILER_VERSION VERSION_GREATER "13.0") + set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-unused-but-set-variable") + set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-unused-but-set-variable") + endif() + endif() endif() if (DISABLE_ASSERTS) From 4b24525f271ffbf134eb75ac0c45b4d47a21d5ec Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Fri, 16 Sep 2022 14:05:31 +0300 Subject: [PATCH 11/13] move variable --- src/nfa/mcclellancompile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index 055920b2..d1afcbcc 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -1484,12 +1484,12 @@ bytecode_ptr mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, find_wide_state(info); } - u16 total_daddy = 0; bool any_cyclic_near_anchored_state = is_cyclic_near(raw, raw.start_anchored); // Sherman optimization if (info.impl_alpha_size > 16) { + u16 total_daddy = 0; for (u32 i = 0; i < info.size(); i++) { if (info.is_widestate(i)) { continue; From d144762b4a9fa182851e3b8fa680e99c94edcdda Mon Sep 17 00:00:00 2001 From: Alex Bondarev Date: Sun, 18 Sep 2022 12:04:05 +0300 Subject: [PATCH 12/13] SVE enabled on user input. updated README tune and arch flags will be applied from autodetect only if they have been created by the process, otherwise the old logical flow remains wrt the flags --- CMakeLists.txt | 31 +++++++++++++++++++++++-------- README.md | 2 -- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 011bfec5..b26fcc40 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -171,7 +171,7 @@ if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE) # arg1 might exist if using ccache string (STRIP "${CMAKE_C_COMPILER_ARG1}" CC_ARG1) - set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -${ARCH_FLAG}=native -mtune=native) + set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -${ARCH_FLAG}=native -${TUNE_FLAG}=native) execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS} OUTPUT_VARIABLE _GCC_OUTPUT) set(_GCC_OUTPUT_TUNE ${_GCC_OUTPUT}) @@ -187,11 +187,11 @@ if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE) string(FIND "${GNUCC_ARCH}" "sve2" POS_SVE2) string(FIND "${GNUCC_ARCH}" "sve2-bitperm" POS_SVE2_BITPERM) if (NOT POS_SVE EQUAL 0) - set(BUILD_SVE 1) + set(SVE_FOUND 1) elseif(NOT POS_SVE2 EQUAL 0) - set(BUILD_SVE2 1) + set(SVE2_FOUND 1) elseif(NOT POS_SVE2_BITPERM EQUAL 0) - set(BUILD_SVE2_BITPERM 1) + set(SVE2_BITPERM_FOUND 1) endif() message(STATUS "ARCH_FLAG '${ARCH_FLAG}' '${GNUCC_ARCH}', TUNE_FLAG '${TUNE_FLAG}' '${GNUCC_TUNE}' ") @@ -203,11 +203,11 @@ if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE) INPUT_FILE /dev/null RESULT_VARIABLE GNUCC_TUNE_TEST) if (NOT GNUCC_TUNE_TEST EQUAL 0) - message(WARNING "Something went wrong determining gcc tune: -mtune=${GNUCC_ARCH} not valid, falling back to -mtune=native") + message(WARNING "Something went wrong determining gcc tune: -mtune=${GNUCC_TUNE} not valid, falling back to -mtune=native") set(GNUCC_TUNE native) else() set(GNUCC_TUNE ${GNUCC_TUNE}) - message(STATUS "gcc will tune for ${GNUCC_ARCH}, ${TUNE_FLAG}") + message(STATUS "gcc will tune for ${GNUCC_ARCH}, ${GNUCC_TUNE}") endif() elseif (CMAKE_COMPILER_IS_CLANG AND NOT CROSS_COMPILE) if (ARCH_IA32 OR ARCH_X86_64) @@ -247,12 +247,27 @@ if (ARCH_IA32 OR ARCH_X86_64) endif() endif() +if (ARCH_AARCH64) + if (BUILD_SVE2_BITPERM AND NOT SVE2_BITPERM_FOUND) + set(GNUCC_ARCH "${GNUCC_ARCH}+sve2-bitperm") + elseif (BUILD_SVE2 AND NOT SVE2_FOUND) + set(GNUCC_ARCH "${GNUCC_ARCH}+sve2") + elseif (BUILD_SVE AND NOT SVE_FOUND) + set(GNUCC_ARCH "${GNUCC_ARCH}+sve") + endif () +endif(ARCH_AARCH64) + message(STATUS "ARCH_C_FLAGS : ${ARCH_C_FLAGS}") message(STATUS "ARCH_CXX_FLAGS : ${ARCH_CXX_FLAGS}") if (NOT FAT_RUNTIME) - set(ARCH_C_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE}") - set(ARCH_CXX_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE}") + if (GNUCC_TUNE) + set(ARCH_C_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE}") + set(ARCH_CXX_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE}") + else() + set(ARCH_C_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -mtune=${TUNE_FLAG} ${ARCH_C_FLAGS}") + set(ARCH_CXX_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -mtune=${TUNE_FLAG} ${ARCH_CXX_FLAGS}") + endif() endif() #if (ARCH_IA32 OR ARCH_X86_64 OR ARCH_ARM32 OR ARCH_AARCH64) diff --git a/README.md b/README.md index f9d2708f..8bc7aff6 100644 --- a/README.md +++ b/README.md @@ -47,8 +47,6 @@ Vectorscan is typically used in a DPI library stack, just like Hyperscan. # Compiling for SVE -When compiling on AARCH64 machine with support for either of the SVE flags, it will be detected and applied automatically. - The following cmake variables can be set in order to target Arm's Scalable Vector Extension. They are listed in ascending order of strength, with cmake detecting whether the feature is available in the compiler and falling back to From 49a8775fbd941dce5aa7bae685d7eb614a59e3fd Mon Sep 17 00:00:00 2001 From: Alex Bondarev Date: Sun, 18 Sep 2022 19:42:45 +0300 Subject: [PATCH 13/13] clang SVE build fix --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index b26fcc40..66f96a07 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -358,6 +358,7 @@ if (ARCH_IA32 OR ARCH_X86_64) elseif (ARCH_ARM32 OR ARCH_AARCH64) CHECK_INCLUDE_FILE_CXX(arm_neon.h HAVE_C_ARM_NEON_H) if (BUILD_SVE OR BUILD_SVE2 OR BUILD_SVE2_BITPERM) + set(CMAKE_REQUIRED_FLAGS ${ARCH_CXX_FLAGS}) CHECK_INCLUDE_FILE_CXX(arm_sve.h HAVE_C_ARM_SVE_H) if (NOT HAVE_C_ARM_SVE_H) message(FATAL_ERROR "arm_sve.h is required to build for SVE.")