From b19a41528a4bc9778d67f3490a20093467dc918f Mon Sep 17 00:00:00 2001 From: "Chang, Harry" Date: Wed, 21 Oct 2020 05:14:53 +0000 Subject: [PATCH] Add cpu feature / target info "AVX512VBMI". --- CMakeLists.txt | 53 +++++++++++++++------------ cmake/arch.cmake | 7 ++++ cmake/config.h.in | 3 ++ doc/dev-reference/getting_started.rst | 34 +++++++++++------ src/compiler/compiler.cpp | 3 ++ src/database.c | 13 ++++--- src/database.h | 14 ++++++- src/dispatcher.c | 11 +++++- src/hs.cpp | 7 ++-- src/hs_compile.h | 25 +++++++++++++ src/util/arch/x86/cpuid_flags.c | 19 +++++++++- src/util/arch/x86/cpuid_inline.h | 52 ++++++++++++++++++++++++-- src/util/target_info.cpp | 10 ++++- src/util/target_info.h | 4 +- 14 files changed, 204 insertions(+), 51 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d08bd014..04b7de23 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1225,6 +1225,9 @@ else (FAT_RUNTIME) if (NOT BUILD_AVX512) set (DISPATCHER_DEFINE "-DDISABLE_AVX512_DISPATCH") endif (NOT BUILD_AVX512) + if (NOT BUILD_AVX512VBMI) + set (DISPATCHER_DEFINE "${DISPATCHER_DEFINE} -DDISABLE_AVX512VBMI_DISPATCH") + endif (NOT BUILD_AVX512VBMI) set_source_files_properties(src/dispatcher.c PROPERTIES COMPILE_FLAGS "-Wno-unused-parameter -Wno-unused-function ${DISPATCHER_DEFINE}") @@ -1252,18 +1255,19 @@ else (FAT_RUNTIME) if (BUILD_AVX512) add_library(hs_exec_avx512 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) list(APPEND RUNTIME_LIBS $) - if (BUILD_AVX512VBMI) - set_target_properties(hs_exec_avx512 PROPERTIES - COMPILE_FLAGS "${ICELAKE_FLAG}" - RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx512 ${CMAKE_MODULE_PATH}/keep.syms.in" - ) - else () - set_target_properties(hs_exec_avx512 PROPERTIES - COMPILE_FLAGS "${SKYLAKE_FLAG}" - RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx512 ${CMAKE_MODULE_PATH}/keep.syms.in" - ) - endif (BUILD_AVX512VBMI) + set_target_properties(hs_exec_avx512 PROPERTIES + COMPILE_FLAGS "${SKYLAKE_FLAG}" + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx512 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) endif (BUILD_AVX512) + if (BUILD_AVX512VBMI) + add_library(hs_exec_avx512vbmi OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) + list(APPEND RUNTIME_LIBS $) + set_target_properties(hs_exec_avx512vbmi PROPERTIES + COMPILE_FLAGS "${ICELAKE_FLAG}" + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx512vbmi ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + endif (BUILD_AVX512VBMI) add_library(hs_exec_common OBJECT ${hs_exec_common_SRCS} @@ -1320,20 +1324,21 @@ else (FAT_RUNTIME) if (BUILD_AVX512) add_library(hs_exec_shared_avx512 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) list(APPEND RUNTIME_SHLIBS $) - if (BUILD_AVX512VBMI) - set_target_properties(hs_exec_shared_avx512 PROPERTIES - COMPILE_FLAGS "${ICELAKE_FLAG}" - POSITION_INDEPENDENT_CODE TRUE - RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx512 ${CMAKE_MODULE_PATH}/keep.syms.in" - ) - else () - set_target_properties(hs_exec_shared_avx512 PROPERTIES - COMPILE_FLAGS "${SKYLAKE_FLAG}" - POSITION_INDEPENDENT_CODE TRUE - RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx512 ${CMAKE_MODULE_PATH}/keep.syms.in" - ) - endif (BUILD_AVX512VBMI) + set_target_properties(hs_exec_shared_avx512 PROPERTIES + COMPILE_FLAGS "${SKYLAKE_FLAG}" + POSITION_INDEPENDENT_CODE TRUE + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx512 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) endif (BUILD_AVX512) + if (BUILD_AVX512VBMI) + add_library(hs_exec_shared_avx512vbmi OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) + list(APPEND RUNTIME_SHLIBS $) + set_target_properties(hs_exec_shared_avx512vbmi PROPERTIES + COMPILE_FLAGS "${ICELAKE_FLAG}" + POSITION_INDEPENDENT_CODE TRUE + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx512vbmi ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + endif (BUILD_AVX512VBMI) add_library(hs_exec_common_shared OBJECT ${hs_exec_common_SRCS} src/dispatcher.c diff --git a/cmake/arch.cmake b/cmake/arch.cmake index 56851354..b0930728 100644 --- a/cmake/arch.cmake +++ b/cmake/arch.cmake @@ -20,6 +20,13 @@ if (BUILD_AVX512) endif () endif () +if (BUILD_AVX512VBMI) + CHECK_C_COMPILER_FLAG(${ICELAKE_FLAG} HAS_ARCH_ICELAKE) + if (NOT HAS_ARCH_ICELAKE) + message (FATAL_ERROR "AVX512VBMI not supported by compiler") + endif () +endif () + if (FAT_RUNTIME) # test the highest level microarch to make sure everything works if (BUILD_AVX512) diff --git a/cmake/config.h.in b/cmake/config.h.in index 2d2c78ce..f974c0ad 100644 --- a/cmake/config.h.in +++ b/cmake/config.h.in @@ -30,6 +30,9 @@ /* Define if building AVX-512 in the fat runtime. */ #cmakedefine BUILD_AVX512 +/* Define if building AVX512VBMI in the fat runtime. */ +#cmakedefine BUILD_AVX512VBMI + /* Define to 1 if `backtrace' works. */ #cmakedefine HAVE_BACKTRACE diff --git a/doc/dev-reference/getting_started.rst b/doc/dev-reference/getting_started.rst index b3812873..aaff15ba 100644 --- a/doc/dev-reference/getting_started.rst +++ b/doc/dev-reference/getting_started.rst @@ -263,17 +263,19 @@ the current platform is supported by Hyperscan. As of this release, the variants of the runtime that are built, and the CPU capability that is required, are the following: -+----------+-------------------------------+---------------------------+ -| Variant | CPU Feature Flag(s) Required | gcc arch flag | -+==========+===============================+===========================+ -| Core 2 | ``SSSE3`` | ``-march=core2`` | -+----------+-------------------------------+---------------------------+ -| Core i7 | ``SSE4_2`` and ``POPCNT`` | ``-march=corei7`` | -+----------+-------------------------------+---------------------------+ -| AVX 2 | ``AVX2`` | ``-march=core-avx2`` | -+----------+-------------------------------+---------------------------+ -| AVX 512 | ``AVX512BW`` (see note below) | ``-march=skylake-avx512`` | -+----------+-------------------------------+---------------------------+ ++--------------+---------------------------------+---------------------------+ +| Variant | CPU Feature Flag(s) Required | gcc arch flag | ++==============+=================================+===========================+ +| Core 2 | ``SSSE3`` | ``-march=core2`` | ++--------------+---------------------------------+---------------------------+ +| Core i7 | ``SSE4_2`` and ``POPCNT`` | ``-march=corei7`` | ++--------------+---------------------------------+---------------------------+ +| AVX 2 | ``AVX2`` | ``-march=core-avx2`` | ++--------------+---------------------------------+---------------------------+ +| AVX 512 | ``AVX512BW`` (see note below) | ``-march=skylake-avx512`` | ++--------------+---------------------------------+---------------------------+ +| AVX 512 VBMI | ``AVX512VBMI`` (see note below) | ``-march=icelake-server`` | ++--------------+---------------------------------+---------------------------+ .. note:: @@ -287,6 +289,16 @@ capability that is required, are the following: cmake -DBUILD_AVX512=on <...> + Hyperscan v5.3 adds support for AVX512VBMI instructions - in particular the + ``AVX512VBMI`` instruction set that was introduced on Intel "Icelake" Xeon + processors - however the AVX512VBMI runtime variant is **not** enabled by + default in fat runtime builds as not all toolchains support AVX512VBMI + instruction sets. To build an AVX512VBMI runtime, the CMake variable + ``BUILD_AVX512VBMI`` must be enabled manually during configuration. For + example: :: + + cmake -DBUILD_AVX512VBMI=on <...> + As the fat runtime requires compiler, libc, and binutils support, at this time it will only be enabled for Linux builds where the compiler supports the `indirect function "ifunc" function attribute diff --git a/src/compiler/compiler.cpp b/src/compiler/compiler.cpp index 666eefc9..5751bd64 100644 --- a/src/compiler/compiler.cpp +++ b/src/compiler/compiler.cpp @@ -458,6 +458,9 @@ platform_t target_to_platform(const target_t &target_info) { if (!target_info.has_avx512()) { p |= HS_PLATFORM_NOAVX512; } + if (!target_info.has_avx512vbmi()) { + p |= HS_PLATFORM_NOAVX512VBMI; + } return p; } diff --git a/src/database.c b/src/database.c index 1a79800e..6adf1419 100644 --- a/src/database.c +++ b/src/database.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2020, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -115,7 +115,8 @@ static hs_error_t db_check_platform(const u64a p) { if (p != hs_current_platform && p != (hs_current_platform | hs_current_platform_no_avx2) - && p != (hs_current_platform | hs_current_platform_no_avx512)) { + && p != (hs_current_platform | hs_current_platform_no_avx512) + && p != (hs_current_platform | hs_current_platform_no_avx512vbmi)) { return HS_DB_PLATFORM_ERROR; } // passed all checks @@ -370,9 +371,11 @@ hs_error_t print_database_string(char **s, u32 version, const platform_t plat, u8 minor = (version >> 16) & 0xff; u8 major = (version >> 24) & 0xff; - const char *features = (plat & HS_PLATFORM_NOAVX512) - ? (plat & HS_PLATFORM_NOAVX2) ? "" : "AVX2" - : "AVX512"; + const char *features = (plat & HS_PLATFORM_NOAVX512VBMI) + ? (plat & HS_PLATFORM_NOAVX512) + ? (plat & HS_PLATFORM_NOAVX2) ? "" : "AVX2" + : "AVX512" + : "AVX512VBMI"; const char *mode = NULL; diff --git a/src/database.h b/src/database.h index 7789b9ab..a4d6e4dc 100644 --- a/src/database.h +++ b/src/database.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2020, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -56,6 +56,7 @@ extern "C" #define HS_PLATFORM_NOAVX2 (4<<13) #define HS_PLATFORM_NOAVX512 (8<<13) +#define HS_PLATFORM_NOAVX512VBMI (0x10<<13) /** \brief Platform features bitmask. */ typedef u64a platform_t; @@ -67,6 +68,9 @@ const platform_t hs_current_platform = { #endif #if !defined(HAVE_AVX512) HS_PLATFORM_NOAVX512 | +#endif +#if !defined(HAVE_AVX512VBMI) + HS_PLATFORM_NOAVX512VBMI | #endif 0, }; @@ -75,12 +79,20 @@ static UNUSED const platform_t hs_current_platform_no_avx2 = { HS_PLATFORM_NOAVX2 | HS_PLATFORM_NOAVX512 | + HS_PLATFORM_NOAVX512VBMI | 0, }; static UNUSED const platform_t hs_current_platform_no_avx512 = { HS_PLATFORM_NOAVX512 | + HS_PLATFORM_NOAVX512VBMI | + 0, +}; + +static UNUSED +const platform_t hs_current_platform_no_avx512vbmi = { + HS_PLATFORM_NOAVX512VBMI | 0, }; diff --git a/src/dispatcher.c b/src/dispatcher.c index 46fdb7d5..f5f2d2c6 100644 --- a/src/dispatcher.c +++ b/src/dispatcher.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2017, Intel Corporation + * Copyright (c) 2016-2020, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -40,8 +40,14 @@ #define check_avx512() (0) #endif +#if defined(DISABLE_AVX512VBMI_DISPATCH) +#define avx512vbmi_ disabled_ +#define check_avx512vbmi() (0) +#endif + #define CREATE_DISPATCH(RTYPE, NAME, ...) \ /* create defns */ \ + RTYPE JOIN(avx512vbmi_, NAME)(__VA_ARGS__); \ RTYPE JOIN(avx512_, NAME)(__VA_ARGS__); \ RTYPE JOIN(avx2_, NAME)(__VA_ARGS__); \ RTYPE JOIN(corei7_, NAME)(__VA_ARGS__); \ @@ -54,6 +60,9 @@ \ /* resolver */ \ static RTYPE (*JOIN(resolve_, NAME)(void))(__VA_ARGS__) { \ + if (check_avx512vbmi()) { \ + return JOIN(avx512vbmi_, NAME); \ + } \ if (check_avx512()) { \ return JOIN(avx512_, NAME); \ } \ diff --git a/src/hs.cpp b/src/hs.cpp index b128572a..303e7838 100644 --- a/src/hs.cpp +++ b/src/hs.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2019, Intel Corporation + * Copyright (c) 2015-2020, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -123,9 +123,10 @@ bool checkMode(unsigned int mode, hs_compile_error **comp_error) { static bool checkPlatform(const hs_platform_info *p, hs_compile_error **comp_error) { - static constexpr u32 HS_TUNE_LAST = HS_TUNE_FAMILY_GLM; + static constexpr u32 HS_TUNE_LAST = HS_TUNE_FAMILY_ICX; static constexpr u32 HS_CPU_FEATURES_ALL = - HS_CPU_FEATURES_AVX2 | HS_CPU_FEATURES_AVX512; + HS_CPU_FEATURES_AVX2 | HS_CPU_FEATURES_AVX512 | + HS_CPU_FEATURES_AVX512VBMI; if (!p) { return true; diff --git a/src/hs_compile.h b/src/hs_compile.h index 081d4638..b318c29d 100644 --- a/src/hs_compile.h +++ b/src/hs_compile.h @@ -1034,6 +1034,15 @@ hs_error_t HS_CDECL hs_populate_platform(hs_platform_info_t *platform); */ #define HS_CPU_FEATURES_AVX512 (1ULL << 3) +/** + * CPU features flag - Intel(R) Advanced Vector Extensions 512 + * Vector Byte Manipulation Instructions (Intel(R) AVX512VBMI) + * + * Setting this flag indicates that the target platform supports AVX512VBMI + * instructions. Using AVX512VBMI implies the use of AVX512. + */ +#define HS_CPU_FEATURES_AVX512VBMI (1ULL << 4) + /** @} */ /** @@ -1114,6 +1123,22 @@ hs_error_t HS_CDECL hs_populate_platform(hs_platform_info_t *platform); */ #define HS_TUNE_FAMILY_GLM 8 +/** + * Tuning Parameter - Intel(R) microarchitecture code name Icelake + * + * This indicates that the compiled database should be tuned for the + * Icelake microarchitecture. + */ +#define HS_TUNE_FAMILY_ICL 9 + +/** + * Tuning Parameter - Intel(R) microarchitecture code name Icelake Server + * + * This indicates that the compiled database should be tuned for the + * Icelake Server microarchitecture. + */ +#define HS_TUNE_FAMILY_ICX 10 + /** @} */ /** diff --git a/src/util/arch/x86/cpuid_flags.c b/src/util/arch/x86/cpuid_flags.c index 81c7e456..9b8901fd 100644 --- a/src/util/arch/x86/cpuid_flags.c +++ b/src/util/arch/x86/cpuid_flags.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2020, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -50,6 +50,11 @@ u64a cpuid_flags(void) { cap |= HS_CPU_FEATURES_AVX512; } + if (check_avx512vbmi()) { + DEBUG_PRINTF("AVX512VBMI enabled\n"); + cap |= HS_CPU_FEATURES_AVX512VBMI; + } + #if !defined(FAT_RUNTIME) && !defined(HAVE_AVX2) cap &= ~HS_CPU_FEATURES_AVX2; #endif @@ -59,6 +64,11 @@ u64a cpuid_flags(void) { cap &= ~HS_CPU_FEATURES_AVX512; #endif +#if (!defined(FAT_RUNTIME) && !defined(HAVE_AVX512VBMI)) || \ + (defined(FAT_RUNTIME) && !defined(BUILD_AVX512VBMI)) + cap &= ~HS_CPU_FEATURES_AVX512VBMI; +#endif + return cap; } @@ -105,6 +115,11 @@ static const struct family_id known_microarch[] = { { 0x6, 0x8E, HS_TUNE_FAMILY_SKL }, /* Kabylake Mobile */ { 0x6, 0x9E, HS_TUNE_FAMILY_SKL }, /* Kabylake desktop */ + { 0x6, 0x7D, HS_TUNE_FAMILY_ICL }, /* Icelake */ + { 0x6, 0x7E, HS_TUNE_FAMILY_ICL }, /* Icelake */ + { 0x6, 0x6A, HS_TUNE_FAMILY_ICX }, /* Icelake Xeon-D */ + { 0x6, 0x6C, HS_TUNE_FAMILY_ICX }, /* Icelake Xeon */ + }; #ifdef DUMP_SUPPORT @@ -120,6 +135,8 @@ const char *dumpTune(u32 tune) { T_CASE(HS_TUNE_FAMILY_BDW); T_CASE(HS_TUNE_FAMILY_SKL); T_CASE(HS_TUNE_FAMILY_SKX); + T_CASE(HS_TUNE_FAMILY_ICL); + T_CASE(HS_TUNE_FAMILY_ICX); } #undef T_CASE return "unknown"; diff --git a/src/util/arch/x86/cpuid_inline.h b/src/util/arch/x86/cpuid_inline.h index 97f19aed..50fa858b 100644 --- a/src/util/arch/x86/cpuid_inline.h +++ b/src/util/arch/x86/cpuid_inline.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, Intel Corporation + * Copyright (c) 2017-2020, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -74,11 +74,12 @@ void cpuid(unsigned int op, unsigned int leaf, unsigned int *eax, #define CPUID_HTT (1 << 28) // Structured Extended Feature Flags Enumeration Leaf ECX values +#define CPUID_AVX512VBMI (1 << 1) + +// Structured Extended Feature Flags Enumeration Leaf EBX values #define CPUID_BMI (1 << 3) #define CPUID_AVX2 (1 << 5) #define CPUID_BMI2 (1 << 8) - -// Structured Extended Feature Flags Enumeration Leaf EBX values #define CPUID_AVX512F (1 << 16) #define CPUID_AVX512BW (1 << 30) @@ -186,6 +187,51 @@ int check_avx512(void) { #endif } +static inline +int check_avx512vbmi(void) { +#if defined(__INTEL_COMPILER) + return _may_i_use_cpu_feature(_FEATURE_AVX512VBMI); +#else + unsigned int eax, ebx, ecx, edx; + + cpuid(1, 0, &eax, &ebx, &ecx, &edx); + + /* check XSAVE is enabled by OS */ + if (!(ecx & CPUID_XSAVE)) { + DEBUG_PRINTF("AVX and XSAVE not supported\n"); + return 0; + } + + /* check that AVX 512 registers are enabled by OS */ + u64a xcr0 = xgetbv(0); + if ((xcr0 & CPUID_XCR0_AVX512) != CPUID_XCR0_AVX512) { + DEBUG_PRINTF("AVX512 registers not enabled\n"); + return 0; + } + + /* ECX and EDX contain capability flags */ + ecx = 0; + cpuid(7, 0, &eax, &ebx, &ecx, &edx); + + if (!(ebx & CPUID_AVX512F)) { + DEBUG_PRINTF("AVX512F (AVX512 Foundation) instructions not enabled\n"); + return 0; + } + + if (!(ebx & CPUID_AVX512BW)) { + DEBUG_PRINTF("AVX512BW instructions not enabled\n"); + return 0; + } + + if (ecx & CPUID_AVX512VBMI) { + DEBUG_PRINTF("AVX512VBMI instructions enabled\n"); + return 1; + } + + return 0; +#endif +} + static inline int check_ssse3(void) { unsigned int eax, ebx, ecx, edx; diff --git a/src/util/target_info.cpp b/src/util/target_info.cpp index 5253755b..9bd34342 100644 --- a/src/util/target_info.cpp +++ b/src/util/target_info.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2020, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -53,6 +53,10 @@ bool target_t::can_run_on_code_built_for(const target_t &code_target) const { return false; } + if (!has_avx512vbmi() && code_target.has_avx512vbmi()) { + return false; + } + return true; } @@ -67,6 +71,10 @@ bool target_t::has_avx512(void) const { return cpu_features & HS_CPU_FEATURES_AVX512; } +bool target_t::has_avx512vbmi(void) const { + return cpu_features & HS_CPU_FEATURES_AVX512VBMI; +} + bool target_t::is_atom_class(void) const { return tune == HS_TUNE_FAMILY_SLM || tune == HS_TUNE_FAMILY_GLM; } diff --git a/src/util/target_info.h b/src/util/target_info.h index 794b2985..f64573ae 100644 --- a/src/util/target_info.h +++ b/src/util/target_info.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2020, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -42,6 +42,8 @@ struct target_t { bool has_avx512(void) const; + bool has_avx512vbmi(void) const; + bool is_atom_class(void) const; // This asks: can this target (the object) run on code that was built for