diff --git a/CMakeLists.txt b/CMakeLists.txt index 93f3c152..60959cb5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -125,6 +125,9 @@ CMAKE_DEPENDENT_OPTION(DUMP_SUPPORT "Dump code support; normally on, except in r CMAKE_DEPENDENT_OPTION(DISABLE_ASSERTS "Disable assert(); Asserts are enabled in debug builds, disabled in release builds" OFF "NOT RELEASE_BUILD" ON) +option(BUILD_AVX512 "Experimental: support avx512 in the fat runtime" + OFF) + option(WINDOWS_ICC "Use Intel C++ Compiler on Windows, default off, requires ICC to be set in project" OFF) # TODO: per platform config files? @@ -456,6 +459,11 @@ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}") endif() +if(CMAKE_C_COMPILER_ID MATCHES "Intel") + set(SKYLAKE_FLAG "-xCORE-AVX512") +else() + set(SKYLAKE_FLAG "-march=skylake-avx512") +endif() if(NOT WIN32) set(RAGEL_C_FLAGS "-Wno-unused") @@ -1079,6 +1087,7 @@ if (NOT FAT_RUNTIME) if (HAVE_AVX2) set(hs_exec_SRCS ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) endif() + if (BUILD_STATIC_LIBS) add_library(hs_exec OBJECT ${hs_exec_SRCS}) @@ -1096,27 +1105,41 @@ if (NOT FAT_RUNTIME) else (FAT_RUNTIME) set(BUILD_WRAPPER "${PROJECT_SOURCE_DIR}/cmake/build_wrapper.sh") + if (NOT BUILD_AVX512) + set (DISPATCHER_DEFINE "-DDISABLE_AVX512_DISPATCH") + endif (NOT BUILD_AVX512) set_source_files_properties(src/dispatcher.c PROPERTIES - COMPILE_FLAGS "-Wno-unused-parameter -Wno-unused-function") + COMPILE_FLAGS "-Wno-unused-parameter -Wno-unused-function ${DISPATCHER_DEFINE}") if (BUILD_STATIC_LIBS) add_library(hs_exec_core2 OBJECT ${hs_exec_SRCS}) + list(APPEND RUNTIME_LIBS $) set_target_properties(hs_exec_core2 PROPERTIES COMPILE_FLAGS "-march=core2" RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} core2 ${CMAKE_MODULE_PATH}/keep.syms.in" ) add_library(hs_exec_corei7 OBJECT ${hs_exec_SRCS}) + list(APPEND RUNTIME_LIBS $) set_target_properties(hs_exec_corei7 PROPERTIES COMPILE_FLAGS "-march=corei7" RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} corei7 ${CMAKE_MODULE_PATH}/keep.syms.in" ) add_library(hs_exec_avx2 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) + list(APPEND RUNTIME_LIBS $) set_target_properties(hs_exec_avx2 PROPERTIES COMPILE_FLAGS "-march=core-avx2" RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx2 ${CMAKE_MODULE_PATH}/keep.syms.in" ) + if (BUILD_AVX512) + add_library(hs_exec_avx512 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) + list(APPEND RUNTIME_LIBS $) + set_target_properties(hs_exec_avx512 PROPERTIES + COMPILE_FLAGS "${SKYLAKE_FLAG}" + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx512 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + endif (BUILD_AVX512) add_library(hs_exec_common OBJECT ${hs_exec_common_SRCS} @@ -1127,37 +1150,51 @@ else (FAT_RUNTIME) # create a lib without any src (I'm looking at you Xcode) add_library(hs_runtime STATIC src/hs_version.c - $ $ - $ $) + $ + ${RUNTIME_LIBS}) set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C) # we want the static lib for testing add_library(hs STATIC src/hs_version.c src/hs_valid_platform.c - ${hs_SRCS} $ $ - $ $) + ${hs_SRCS} + $ + ${RUNTIME_LIBS}) endif (BUILD_STATIC_LIBS) if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) # build shared libs add_library(hs_exec_shared_core2 OBJECT ${hs_exec_SRCS}) + list(APPEND RUNTIME_SHLIBS $) set_target_properties(hs_exec_shared_core2 PROPERTIES COMPILE_FLAGS "-march=core2" POSITION_INDEPENDENT_CODE TRUE RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} core2 ${CMAKE_MODULE_PATH}/keep.syms.in" ) add_library(hs_exec_shared_corei7 OBJECT ${hs_exec_SRCS}) + list(APPEND RUNTIME_SHLIBS $) set_target_properties(hs_exec_shared_corei7 PROPERTIES COMPILE_FLAGS "-march=corei7" POSITION_INDEPENDENT_CODE TRUE RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} corei7 ${CMAKE_MODULE_PATH}/keep.syms.in" ) add_library(hs_exec_shared_avx2 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) + list(APPEND RUNTIME_SHLIBS $) set_target_properties(hs_exec_shared_avx2 PROPERTIES COMPILE_FLAGS "-march=core-avx2" POSITION_INDEPENDENT_CODE TRUE RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx2 ${CMAKE_MODULE_PATH}/keep.syms.in" ) + + if (BUILD_AVX512) + add_library(hs_exec_shared_avx512 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) + list(APPEND RUNTIME_SHLIBS $) + set_target_properties(hs_exec_shared_avx512 PROPERTIES + COMPILE_FLAGS "${SKYLAKE_FLAG}" + POSITION_INDEPENDENT_CODE TRUE + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx512 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + endif (BUILD_AVX512) add_library(hs_exec_common_shared OBJECT ${hs_exec_common_SRCS} src/dispatcher.c @@ -1176,15 +1213,13 @@ endif() if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) if (NOT FAT_RUNTIME) - add_library(hs_runtime_shared SHARED src/hs_version.c src/hs_valid_platform.c -$) - else() + add_library(hs_runtime_shared SHARED src/hs_version.c + src/hs_valid_platform.c $) + else() add_library(hs_runtime_shared SHARED src/hs_version.c src/hs_valid_platform.c $ - $ - $ - $) + ${RUNTIME_SHLIBS}) endif() set_target_properties(hs_runtime_shared PROPERTIES VERSION ${LIB_VERSION} @@ -1213,9 +1248,7 @@ if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) else() add_library(hs_shared SHARED src/hs_version.c src/hs_valid_platform.c ${hs_SRCS} $ - $ - $ - $) + ${RUNTIME_SHLIBS}) endif() add_dependencies(hs_shared ragel_Parser) diff --git a/cmake/config.h.in b/cmake/config.h.in index 6e23f493..5434668e 100644 --- a/cmake/config.h.in +++ b/cmake/config.h.in @@ -15,9 +15,6 @@ /* "Define if building for EM64T" */ #cmakedefine ARCH_X86_64 -/* Define if AVX-512BW available */ -#cmakedefine HAVE_AVX512 - /* internal build, switch on dump support. */ #cmakedefine DUMP_SUPPORT diff --git a/src/dispatcher.c b/src/dispatcher.c index fb2f4f02..5ae46b56 100644 --- a/src/dispatcher.c +++ b/src/dispatcher.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,8 +33,14 @@ #include "util/cpuid_flags.h" #include "util/join.h" +#if defined(DISABLE_AVX512_DISPATCH) +#define avx512_ disabled_ +#define check_avx512() (0) +#endif + #define CREATE_DISPATCH(RTYPE, NAME, ...) \ /* create defns */ \ + RTYPE JOIN(avx512_, NAME)(__VA_ARGS__); \ RTYPE JOIN(avx2_, NAME)(__VA_ARGS__); \ RTYPE JOIN(corei7_, NAME)(__VA_ARGS__); \ RTYPE JOIN(core2_, NAME)(__VA_ARGS__); \ @@ -46,6 +52,9 @@ \ /* resolver */ \ static void(*JOIN(resolve_, NAME)(void)) { \ + if (check_avx512()) { \ + return JOIN(avx512_, NAME); \ + } \ if (check_avx2()) { \ return JOIN(avx2_, NAME); \ } \ diff --git a/src/util/cpuid_flags.c b/src/util/cpuid_flags.c index d4eaa319..c0ab09af 100644 --- a/src/util/cpuid_flags.c +++ b/src/util/cpuid_flags.c @@ -133,13 +133,12 @@ int check_avx2(void) { #endif } -static int check_avx512(void) { /* * For our purposes, having avx512 really means "can we use AVX512BW?" */ #if defined(__INTEL_COMPILER) - return _may_i_use_cpu_feature(_FEATURE_AVX512BW); + return _may_i_use_cpu_feature(_FEATURE_AVX512BW | _FEATURE_AVX512VL); #else unsigned int eax, ebx, ecx, edx; diff --git a/src/util/cpuid_flags.h b/src/util/cpuid_flags.h index c39038a1..d79c3832 100644 --- a/src/util/cpuid_flags.h +++ b/src/util/cpuid_flags.h @@ -41,6 +41,7 @@ u64a cpuid_flags(void); u32 cpuid_tune(void); +int check_avx512(void); int check_avx2(void); int check_ssse3(void); int check_sse42(void);