From 3ee7b75ee02bdf83fc86c4a209d31873376d2e74 Mon Sep 17 00:00:00 2001 From: George Wort Date: Mon, 17 May 2021 17:13:14 +0100 Subject: [PATCH] Add SVE, SVE2, and SVE2_BITPERM as targets Change-Id: I5231e2eb0a31708a16c853dc83ea48db32e0b0a5 --- CMakeLists.txt | 11 +++++++++ README.md | 12 ++++++++++ cmake/arch.cmake | 55 +++++++++++++++++++++++++++++++++++++------ cmake/config.h.in | 12 ++++++++++ src/util/intrinsics.h | 12 ++++++++++ 5 files changed, 95 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e32be7b3..fa9648f7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -213,6 +213,14 @@ else() set(TUNE_FLAG native) endif() + if (BUILD_SVE2_BITPERM) + set(GNUCC_ARCH "${GNUCC_ARCH}+sve2-bitperm") + elseif (BUILD_SVE2) + set(GNUCC_ARCH "${GNUCC_ARCH}+sve2") + elseif (BUILD_SVE) + set(GNUCC_ARCH "${GNUCC_ARCH}+sve") + endif () + # compiler version checks TODO: test more compilers if (CMAKE_COMPILER_IS_GNUCXX) set(GNUCXX_MINVER "4.8.1") @@ -296,6 +304,9 @@ if (ARCH_IA32 OR ARCH_X86_64) CHECK_INCLUDE_FILE_CXX(x86intrin.h HAVE_CXX_X86INTRIN_H) elseif (ARCH_ARM32 OR ARCH_AARCH64) CHECK_INCLUDE_FILE_CXX(arm_neon.h HAVE_C_ARM_NEON_H) + if (BUILD_SVE OR BUILD_SVE2 OR BUILD_SVE2_BITPERM) + CHECK_INCLUDE_FILE_CXX(arm_sve.h HAVE_C_ARM_SVE_H) + endif() set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -flax-vector-conversions") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -flax-vector-conversions") endif() diff --git a/README.md b/README.md index e780238f..8bc7aff6 100644 --- a/README.md +++ b/README.md @@ -45,6 +45,18 @@ Vectorscan is typically used in a DPI library stack, just like Hyperscan. - `make -jT` where T is the number of threads used to compile. - `cmake --build . -- -j T` can also be used instead of make. +# Compiling for SVE + +The following cmake variables can be set in order to target Arm's Scalable +Vector Extension. They are listed in ascending order of strength, with cmake +detecting whether the feature is available in the compiler and falling back to +a weaker version if not. Only one of these variables needs to be set as weaker +variables will be implied as set. + +- `BUILD_SVE` +- `BUILD_SVE2` +- `BUILD_SVE2_BITPERM` + # Documentation Information on building the Hyperscan library and using its API is available in diff --git a/cmake/arch.cmake b/cmake/arch.cmake index 691861d6..c757e91c 100644 --- a/cmake/arch.cmake +++ b/cmake/arch.cmake @@ -13,6 +13,52 @@ else() message (FATAL_ERROR "No intrinsics header found") endif () +if (ARCH_ARM32 OR ARCH_AARCH64) + CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}> +int main() { + int32x4_t a = vdupq_n_s32(1); + (void)a; +}" HAVE_NEON) +endif () + +if (ARCH_AARCH64) + set(PREV_FLAGS "${CMAKE_C_FLAGS}") + if (BUILD_SVE2_BITPERM) + set(CMAKE_C_FLAGS "-march=${GNUCC_ARCH} ${CMAKE_C_FLAGS}") + CHECK_C_SOURCE_COMPILES("#include + int main() { + svuint8_t a = svbext(svdup_u8(1), svdup_u8(2)); + (void)a; + }" HAVE_SVE2_BITPERM) + if (HAVE_SVE2_BITPERM) + add_definitions(-DHAVE_SVE2_BITPERM) + endif () + endif() + if (BUILD_SVE2) + set(CMAKE_C_FLAGS "-march=${GNUCC_ARCH} ${CMAKE_C_FLAGS}") + CHECK_C_SOURCE_COMPILES("#include + int main() { + svuint8_t a = svbsl(svdup_u8(1), svdup_u8(2), svdup_u8(3)); + (void)a; + }" HAVE_SVE2) + if (HAVE_SVE2) + add_definitions(-DHAVE_SVE2) + endif () + endif() + if (BUILD_SVE) + set(CMAKE_C_FLAGS "-march=${GNUCC_ARCH} ${CMAKE_C_FLAGS}") + CHECK_C_SOURCE_COMPILES("#include + int main() { + svuint8_t a = svdup_u8(1); + (void)a; + }" HAVE_SVE) + if (HAVE_SVE) + add_definitions(-DHAVE_SVE) + endif () + endif () + set(CMAKE_C_FLAGS "${PREV_FLAGS}") +endif() + if (BUILD_AVX512) CHECK_C_COMPILER_FLAG(${SKYLAKE_FLAG} HAS_ARCH_SKYLAKE) if (NOT HAS_ARCH_SKYLAKE) @@ -90,13 +136,7 @@ int main(){ (void)_mm512_permutexvar_epi8(idx, a); }" HAVE_AVX512VBMI) -elseif (ARCH_ARM32 OR ARCH_AARCH64) - CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}> -int main() { - int32x4_t a = vdupq_n_s32(1); - (void)a; -}" HAVE_NEON) -else () +elseif (!ARCH_ARM32 AND !ARCH_AARCH64) message (FATAL_ERROR "Unsupported architecture") endif () @@ -131,5 +171,6 @@ else (NOT FAT_RUNTIME) endif () endif () +unset (PREV_FLAGS) unset (CMAKE_REQUIRED_FLAGS) unset (INTRIN_INC_H) diff --git a/cmake/config.h.in b/cmake/config.h.in index 17c1e729..0afd6998 100644 --- a/cmake/config.h.in +++ b/cmake/config.h.in @@ -24,6 +24,15 @@ /* "Define if cross compiling for AARCH64" */ #cmakedefine CROSS_COMPILE_AARCH64 +/* Define if building SVE for AARCH64. */ +#cmakedefine BUILD_SVE + +/* Define if building SVE2 for AARCH64. */ +#cmakedefine BUILD_SVE2 + +/* Define if building SVE2+BITPERM for AARCH64. */ +#cmakedefine BUILD_SVE2_BITPERM + /* internal build, switch on dump support. */ #cmakedefine DUMP_SUPPORT @@ -63,6 +72,9 @@ /* C compiler has arm_neon.h */ #cmakedefine HAVE_C_ARM_NEON_H +/* C compiler has arm_sve.h */ +#cmakedefine HAVE_C_ARM_SVE_H + /* Define to 1 if you have the declaration of `pthread_setaffinity_np', and to 0 if you don't. */ #cmakedefine HAVE_DECL_PTHREAD_SETAFFINITY_NP diff --git a/src/util/intrinsics.h b/src/util/intrinsics.h index 3e2afc22..33beb497 100644 --- a/src/util/intrinsics.h +++ b/src/util/intrinsics.h @@ -47,6 +47,15 @@ #if defined(HAVE_C_ARM_NEON_H) # define USE_ARM_NEON_H +# if defined(HAVE_C_ARM_SVE_H) +# define USE_ARM_SVE +# if defined(BUILD_SVE2) +# define USE_ARM_SVE2 +# if defined(BUILD_SVE2_BITPERM) +# define USE_ARM_SVE2_BITPERM +# endif +# endif +# endif #endif #ifdef __cplusplus @@ -65,6 +74,9 @@ #include #elif defined(USE_ARM_NEON_H) #include +# if defined(USE_ARM_SVE) +# include +# endif #else #error no intrinsics file #endif