Merge pull request #81 from VectorCamp/feature/add-clang-support

Feature/add clang support
This commit is contained in:
Konstantinos Margaritis 2021-12-07 22:16:38 +02:00 committed by GitHub
commit 1718e33544
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 1144 additions and 302 deletions

View File

@ -3,7 +3,7 @@ project (vectorscan C CXX)
set (HS_MAJOR_VERSION 5) set (HS_MAJOR_VERSION 5)
set (HS_MINOR_VERSION 4) set (HS_MINOR_VERSION 4)
set (HS_PATCH_VERSION 3) set (HS_PATCH_VERSION 5)
set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION}) set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION})
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
@ -128,11 +128,9 @@ CMAKE_DEPENDENT_OPTION(DUMP_SUPPORT "Dump code support; normally on, except in r
CMAKE_DEPENDENT_OPTION(DISABLE_ASSERTS "Disable assert(); Asserts are enabled in debug builds, disabled in release builds" OFF "NOT RELEASE_BUILD" ON) CMAKE_DEPENDENT_OPTION(DISABLE_ASSERTS "Disable assert(); Asserts are enabled in debug builds, disabled in release builds" OFF "NOT RELEASE_BUILD" ON)
option(BUILD_AVX512 "Experimental: support avx512 in the fat runtime" option(BUILD_AVX512 "Experimental: support avx512 in the fat runtime" OFF)
OFF)
option(BUILD_AVX512VBMI "Experimental: support avx512vbmi in the fat runtime" option(BUILD_AVX512VBMI "Experimental: support avx512vbmi in the fat runtime" OFF)
OFF)
if (BUILD_AVX512VBMI) if (BUILD_AVX512VBMI)
set(BUILD_AVX512 ON) set(BUILD_AVX512 ON)
@ -140,14 +138,29 @@ endif ()
# TODO: per platform config files? # TODO: per platform config files?
# remove CMake's idea of optimisation # remove CMake's idea of optimisation
foreach (CONFIG ${CMAKE_BUILD_TYPE} ${CMAKE_CONFIGURATION_TYPES}) foreach (CONFIG ${CMAKE_BUILD_TYPE} ${CMAKE_CONFIGURATION_TYPES})
string(REGEX REPLACE "-O[^ ]*" "" CMAKE_C_FLAGS_${CONFIG} "${CMAKE_C_FLAGS_${CONFIG}}") string(REGEX REPLACE "-O[^ ]*" "" CMAKE_C_FLAGS_${CONFIG} "${CMAKE_C_FLAGS_${CONFIG}}")
string(REGEX REPLACE "-O[^ ]*" "" CMAKE_CXX_FLAGS_${CONFIG} "${CMAKE_CXX_FLAGS_${CONFIG}}") string(REGEX REPLACE "-O[^ ]*" "" CMAKE_CXX_FLAGS_${CONFIG} "${CMAKE_CXX_FLAGS_${CONFIG}}")
endforeach () endforeach ()
if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE_AARCH64 AND NOT ARCH_PPC64EL) if (CMAKE_C_COMPILER_ID MATCHES "Intel")
set(SKYLAKE_FLAG "-xCORE-AVX512")
else ()
set(SKYLAKE_FLAG "-march=skylake-avx512")
set(ICELAKE_FLAG "-march=icelake-server")
endif ()
if(ARCH_PPC64EL)
set(ARCH_FLAG mcpu)
else()
set(ARCH_FLAG march)
endif()
# Detect best GNUCC_ARCH to tune for
if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE)
message(STATUS "gcc version ${CMAKE_C_COMPILER_VERSION}") message(STATUS "gcc version ${CMAKE_C_COMPILER_VERSION}")
# If gcc doesn't recognise the host cpu, then mtune=native becomes # If gcc doesn't recognise the host cpu, then mtune=native becomes
# generic, which isn't very good in some cases. march=native looks at # generic, which isn't very good in some cases. march=native looks at
# cpuid info and then chooses the best microarch it can (and replaces # cpuid info and then chooses the best microarch it can (and replaces
@ -155,15 +168,13 @@ endif ()
# arg1 might exist if using ccache # arg1 might exist if using ccache
string (STRIP "${CMAKE_C_COMPILER_ARG1}" CC_ARG1) string (STRIP "${CMAKE_C_COMPILER_ARG1}" CC_ARG1)
set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -march=native -mtune=native) set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -${ARCH_FLAG}=native -mtune=native)
execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS} execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS}
OUTPUT_VARIABLE _GCC_OUTPUT) OUTPUT_VARIABLE _GCC_OUTPUT)
string(FIND "${_GCC_OUTPUT}" "march" POS) string(FIND "${_GCC_OUTPUT}" "${ARCH_FLAG}" POS)
string(SUBSTRING "${_GCC_OUTPUT}" ${POS} -1 _GCC_OUTPUT) string(SUBSTRING "${_GCC_OUTPUT}" ${POS} -1 _GCC_OUTPUT)
string(REGEX REPLACE "march=[ \t]*([^ \n]*)[ \n].*" "\\1" string(REGEX REPLACE "${ARCH_FLAG}=[ \t]*([^ \n]*)[ \n].*" "\\1" GNUCC_ARCH "${_GCC_OUTPUT}")
GNUCC_ARCH "${_GCC_OUTPUT}")
if (ARCH_IA32 OR ARCH_X86_64)
# test the parsed flag # test the parsed flag
set (EXEC_ARGS ${CC_ARG1} -E - -mtune=${GNUCC_ARCH}) set (EXEC_ARGS ${CC_ARG1} -E - -mtune=${GNUCC_ARCH})
execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS} execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS}
@ -171,16 +182,51 @@ endif ()
INPUT_FILE /dev/null INPUT_FILE /dev/null
RESULT_VARIABLE GNUCC_TUNE_TEST) RESULT_VARIABLE GNUCC_TUNE_TEST)
if (NOT GNUCC_TUNE_TEST EQUAL 0) if (NOT GNUCC_TUNE_TEST EQUAL 0)
message(SEND_ERROR "Something went wrong determining gcc tune: -mtune=${GNUCC_ARCH} not valid") message(WARNING "Something went wrong determining gcc tune: -mtune=${GNUCC_ARCH} not valid, falling back to -mtune=native")
endif() set(TUNE_FLAG native)
set(TUNE_FLAG ${GNUCC_ARCH})
else() else()
set(TUNE_FLAG native) set(TUNE_FLAG ${GNUCC_ARCH})
message(STATUS "gcc will tune for ${GNUCC_ARCH}, ${TUNE_FLAG}")
endif() endif()
elseif (NOT TUNE_FLAG) elseif (CMAKE_COMPILER_IS_CLANG AND NOT CROSS_COMPILE)
set(TUNE_FLAG native) if (ARCH_IA32 OR ARCH_X86_64)
set(GNUCC_ARCH native)
set(TUNE_FLAG generic)
elseif(ARCH_AARCH64)
set(GNUCC_ARCH armv8)
set(TUNE_FLAG generic)
elseif(ARCH_ARM32)
set(GNUCC_ARCH armv7a)
set(TUNE_FLAG generic)
else()
set(GNUCC_ARCH native)
set(TUNE_FLAG generic)
endif() endif()
message(STATUS "clang will tune for ${GNUCC_ARCH}, ${TUNE_FLAG}")
elseif (CROSS_COMPILE)
set(GNUCC_ARCH generic)
set(TUNE_FLAG generic)
endif()
if (ARCH_IA32 OR ARCH_X86_64)
if (NOT FAT_RUNTIME)
if (BUILD_AVX512)
set(ARCH_C_FLAGS "${SKYLAKE_FLAG}")
set(ARCH_CXX_FLAGS "${SKYLAKE_FLAG}")
elseif (BUILD_AVX2)
set(ARCH_C_FLAGS "-mavx2")
set(ARCH_CXX_FLAGS "-mavx2")
else()
set(ARCH_C_FLAGS "-msse4.2")
set(ARCH_CXX_FLAGS "-msse4.2")
endif()
else()
set(ARCH_C_FLAGS "-msse4.2")
set(ARCH_CXX_FLAGS "-msse4.2")
endif()
endif()
if (ARCH_AARCH64)
if (BUILD_SVE2_BITPERM) if (BUILD_SVE2_BITPERM)
set(GNUCC_ARCH "${GNUCC_ARCH}+sve2-bitperm") set(GNUCC_ARCH "${GNUCC_ARCH}+sve2-bitperm")
elseif (BUILD_SVE2) elseif (BUILD_SVE2)
@ -188,17 +234,39 @@ endif ()
elseif (BUILD_SVE) elseif (BUILD_SVE)
set(GNUCC_ARCH "${GNUCC_ARCH}+sve") set(GNUCC_ARCH "${GNUCC_ARCH}+sve")
endif () endif ()
endif(ARCH_AARCH64)
# compiler version checks TODO: test more compilers set(ARCH_C_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -mtune=${TUNE_FLAG} ${ARCH_C_FLAGS}")
if (CMAKE_COMPILER_IS_GNUCXX) set(ARCH_CXX_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -mtune=${TUNE_FLAG} ${ARCH_CXX_FLAGS}")
set(GNUCXX_MINVER "4.8.1")
#if (ARCH_IA32 OR ARCH_X86_64 OR ARCH_ARM32 OR ARCH_AARCH64)
# if (NOT CMAKE_C_FLAGS MATCHES .*march.* AND NOT CMAKE_C_FLAGS MATCHES .*mtune.*)
# set(ARCH_C_FLAGS "-march=${GNUCC_ARCH} -mtune=${TUNE_FLAG}")
# endif()
# if (NOT CMAKE_CXX_FLAGS MATCHES .*march.* AND NOT CMAKE_CXX_FLAGS MATCHES .*mtune.*)
# set(ARCH_CXX_FLAGS "-march=${GNUCC_ARCH} -mtune=${TUNE_FLAG}")
# endif()
#endif()
#if(ARCH_PPC64EL)
# if (NOT CMAKE_C_FLAGS MATCHES .*march.* AND NOT CMAKE_C_FLAGS MATCHES .*mtune.*)
# set(ARCH_C_FLAGS "-mtune=${TUNE_FLAG}")
# endif()
# if (NOT CMAKE_CXX_FLAGS MATCHES .*march.* AND NOT CMAKE_CXX_FLAGS MATCHES .*mtune.*)
# set(ARCH_CXX_FLAGS "-mtune=${TUNE_FLAG}")
# endif()
#endif()
# compiler version checks TODO: test more compilers
if (CMAKE_COMPILER_IS_GNUCXX)
set(GNUCXX_MINVER "9")
message(STATUS "g++ version ${CMAKE_CXX_COMPILER_VERSION}") message(STATUS "g++ version ${CMAKE_CXX_COMPILER_VERSION}")
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS GNUCXX_MINVER) if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS GNUCXX_MINVER)
message(FATAL_ERROR "A minimum of g++ ${GNUCXX_MINVER} is required for C++11 support") message(FATAL_ERROR "A minimum of g++ ${GNUCXX_MINVER} is required for C++17 support")
endif()
endif() endif()
endif()
if(RELEASE_BUILD) if(RELEASE_BUILD)
if (NOT CMAKE_BUILD_TYPE MATCHES MINSIZEREL) if (NOT CMAKE_BUILD_TYPE MATCHES MINSIZEREL)
set(OPT_C_FLAG "-O3") set(OPT_C_FLAG "-O3")
set(OPT_CXX_FLAG "-O3") set(OPT_CXX_FLAG "-O3")
@ -206,53 +274,36 @@ endif ()
set(OPT_C_FLAG "-Os") set(OPT_C_FLAG "-Os")
set(OPT_CXX_FLAG "-Os") set(OPT_CXX_FLAG "-Os")
endif () endif ()
else() else()
set(OPT_C_FLAG "-O0") set(OPT_C_FLAG "-O0")
set(OPT_CXX_FLAG "-O0") set(OPT_CXX_FLAG "-O0")
endif(RELEASE_BUILD) endif(RELEASE_BUILD)
# set compiler flags - more are tested and added later # set compiler flags - more are tested and added later
set(EXTRA_C_FLAGS "${OPT_C_FLAG} -std=c17 -Wall -Wextra -Wshadow -Wcast-qual -fno-strict-aliasing") set(EXTRA_C_FLAGS "${OPT_C_FLAG} -std=c17 -Wall -Wextra -Wshadow -Wcast-qual -fno-strict-aliasing")
set(EXTRA_CXX_FLAGS "${OPT_CXX_FLAG} -std=c++17 -Wall -Wextra -Wshadow -Wswitch -Wreturn-type -Wcast-qual -Wno-deprecated -Wnon-virtual-dtor -fno-strict-aliasing -fno-new-ttp-matching") set(EXTRA_CXX_FLAGS "${OPT_CXX_FLAG} -std=c++17 -Wall -Wextra -Wshadow -Wswitch -Wreturn-type -Wcast-qual -Wno-deprecated -Wnon-virtual-dtor -fno-strict-aliasing")
if (NOT CMAKE_COMPILER_IS_CLANG)
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fno-new-ttp-matching")
endif()
if (NOT RELEASE_BUILD) if (NOT RELEASE_BUILD)
# -Werror is most useful during development, don't potentially break # -Werror is most useful during development, don't potentially break
# release builds # release builds
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Werror") set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Werror")
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Werror") set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Werror")
endif() endif()
if (DISABLE_ASSERTS) if (DISABLE_ASSERTS)
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -DNDEBUG") set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -DNDEBUG")
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -DNDEBUG") set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -DNDEBUG")
endif() endif()
if(CMAKE_COMPILER_IS_GNUCC)
if (ARCH_IA32 OR ARCH_X86_64 OR ARCH_ARM32 OR ARCH_AARCH64)
if (NOT CMAKE_C_FLAGS MATCHES .*march.* AND NOT CMAKE_C_FLAGS MATCHES .*mtune.*)
set(ARCH_C_FLAGS "-march=${GNUCC_ARCH} -mtune=${TUNE_FLAG}")
endif()
if (NOT CMAKE_CXX_FLAGS MATCHES .*march.* AND NOT CMAKE_CXX_FLAGS MATCHES .*mtune.*)
set(ARCH_CXX_FLAGS "-march=${GNUCC_ARCH} -mtune=${TUNE_FLAG}")
endif()
endif()
if(ARCH_PPC64EL)
if (NOT CMAKE_C_FLAGS MATCHES .*march.* AND NOT CMAKE_C_FLAGS MATCHES .*mtune.*)
set(ARCH_C_FLAGS "-mtune=${TUNE_FLAG}")
endif()
if (NOT CMAKE_CXX_FLAGS MATCHES .*march.* AND NOT CMAKE_CXX_FLAGS MATCHES .*mtune.*)
set(ARCH_CXX_FLAGS "-mtune=${TUNE_FLAG}")
endif()
endif()
if(CMAKE_COMPILER_IS_GNUCC)
# spurious warnings? # spurious warnings?
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-array-bounds -Wno-maybe-uninitialized") set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-array-bounds -Wno-maybe-uninitialized")
endif() endif()
if(CMAKE_COMPILER_IS_GNUCXX) if(CMAKE_COMPILER_IS_GNUCXX)
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-maybe-uninitialized") set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-maybe-uninitialized")
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.0) if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.0)
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fabi-version=0") set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fabi-version=0")
@ -260,20 +311,12 @@ endif ()
# don't complain about abi # don't complain about abi
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-abi") set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-abi")
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-abi") set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-abi")
endif() endif()
if (NOT(ARCH_IA32 AND RELEASE_BUILD)) if (NOT(ARCH_IA32 AND RELEASE_BUILD))
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -fno-omit-frame-pointer") set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -fno-omit-frame-pointer")
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fno-omit-frame-pointer") set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fno-omit-frame-pointer")
endif() endif()
if (CMAKE_C_COMPILER_ID MATCHES "Intel")
set(SKYLAKE_FLAG "-xCORE-AVX512")
else ()
set(SKYLAKE_FLAG "-march=skylake-avx512")
set(ICELAKE_FLAG "-march=icelake-server")
endif ()
CHECK_INCLUDE_FILES(unistd.h HAVE_UNISTD_H) CHECK_INCLUDE_FILES(unistd.h HAVE_UNISTD_H)
if (ARCH_IA32 OR ARCH_X86_64) if (ARCH_IA32 OR ARCH_X86_64)
@ -289,8 +332,6 @@ elseif (ARCH_ARM32 OR ARCH_AARCH64)
message(FATAL_ERROR "arm_sve.h is required to build for SVE.") message(FATAL_ERROR "arm_sve.h is required to build for SVE.")
endif() endif()
endif() endif()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -flax-vector-conversions")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -flax-vector-conversions")
elseif (ARCH_PPC64EL) elseif (ARCH_PPC64EL)
CHECK_INCLUDE_FILE_CXX(altivec.h HAVE_C_PPC64EL_ALTIVEC_H) CHECK_INCLUDE_FILE_CXX(altivec.h HAVE_C_PPC64EL_ALTIVEC_H)
endif() endif()
@ -318,8 +359,7 @@ if (CMAKE_SYSTEM_NAME MATCHES "Linux")
# This is a Linux-only feature for now - requires platform support # This is a Linux-only feature for now - requires platform support
# elsewhere # elsewhere
message(STATUS "generator is ${CMAKE_GENERATOR}") message(STATUS "generator is ${CMAKE_GENERATOR}")
if (CMAKE_C_COMPILER_ID MATCHES "Clang" AND if (CMAKE_C_COMPILER_IS_CLANG AND CMAKE_C_COMPILER_VERSION VERSION_LESS "3.9")
CMAKE_C_COMPILER_VERSION VERSION_LESS "3.9")
message (STATUS "Clang v3.9 or higher required for fat runtime, cannot build fat runtime") message (STATUS "Clang v3.9 or higher required for fat runtime, cannot build fat runtime")
set (FAT_RUNTIME_REQUISITES FALSE) set (FAT_RUNTIME_REQUISITES FALSE)
elseif (NOT (CMAKE_GENERATOR MATCHES "Unix Makefiles" OR elseif (NOT (CMAKE_GENERATOR MATCHES "Unix Makefiles" OR
@ -343,7 +383,10 @@ include (${CMAKE_MODULE_PATH}/arch.cmake)
# testing a builtin takes a little more work # testing a builtin takes a little more work
CHECK_C_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CC_BUILTIN_ASSUME_ALIGNED) CHECK_C_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CC_BUILTIN_ASSUME_ALIGNED)
CHECK_CXX_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CXX_BUILTIN_ASSUME_ALIGNED) CHECK_CXX_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CXX_BUILTIN_ASSUME_ALIGNED)
CHECK_C_SOURCE_COMPILES("int main(void) { __builtin_constant_p(0); }" HAVE__BUILTIN_CONSTANT_P) # Clang does not use __builtin_constant_p() the same way as gcc
if (NOT CMAKE_COMPILER_IS_CLANG)
CHECK_C_SOURCE_COMPILES("int main(void) { __builtin_constant_p(0); }" HAVE__BUILTIN_CONSTANT_P)
endif()
set(C_FLAGS_TO_CHECK set(C_FLAGS_TO_CHECK
# Variable length arrays are way bad, most especially at run time # Variable length arrays are way bad, most especially at run time
@ -442,19 +485,22 @@ if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
set(FREEBSD true) set(FREEBSD true)
endif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") endif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
if (NOT FAT_RUNTIME)
if (CROSS_COMPILE_AARCH64) if (FAT_RUNTIME)
if (NOT (ARCH_IA32 OR ARCH_X86_64))
message(FATAL_ERROR "Fat runtime is not supported on non-Intel architectures")
else()
message(STATUS "Building runtime for multiple microarchitectures")
endif()
else()
if (CROSS_COMPILE)
message(STATUS "Building for target CPU: ${ARCH_C_FLAGS}") message(STATUS "Building for target CPU: ${ARCH_C_FLAGS}")
else() else()
message(STATUS "Building for current host CPU: ${ARCH_C_FLAGS}") message(STATUS "Building for current host CPU: ${ARCH_C_FLAGS}")
endif() endif()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARCH_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_CXX_FLAGS}")
else()
message(STATUS "Building runtime for multiple microarchitectures")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
endif() endif()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARCH_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_CXX_FLAGS}")
add_subdirectory(util) add_subdirectory(util)
add_subdirectory(doc/dev-reference) add_subdirectory(doc/dev-reference)
@ -1171,10 +1217,6 @@ if (NOT FAT_RUNTIME)
set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C) set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C)
add_library(hs_compile OBJECT ${hs_compile_SRCS}) add_library(hs_compile OBJECT ${hs_compile_SRCS})
if (ARCH_IA32)
set_target_properties(hs_compile PROPERTIES COMPILE_FLAGS "-mssse3")
endif (ARCH_IA32)
add_library(hs STATIC add_library(hs STATIC
src/hs_version.c src/hs_version.c
src/hs_valid_platform.c src/hs_valid_platform.c
@ -1205,14 +1247,14 @@ else (FAT_RUNTIME)
add_library(hs_exec_core2 OBJECT ${hs_exec_SRCS}) add_library(hs_exec_core2 OBJECT ${hs_exec_SRCS})
list(APPEND RUNTIME_LIBS $<TARGET_OBJECTS:hs_exec_core2>) list(APPEND RUNTIME_LIBS $<TARGET_OBJECTS:hs_exec_core2>)
set_target_properties(hs_exec_core2 PROPERTIES set_target_properties(hs_exec_core2 PROPERTIES
COMPILE_FLAGS "-march=core2" COMPILE_FLAGS "-march=core2 -msse4.2"
RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} core2 ${CMAKE_MODULE_PATH}/keep.syms.in" RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} core2 ${CMAKE_MODULE_PATH}/keep.syms.in"
) )
add_library(hs_exec_corei7 OBJECT ${hs_exec_SRCS}) add_library(hs_exec_corei7 OBJECT ${hs_exec_SRCS})
list(APPEND RUNTIME_LIBS $<TARGET_OBJECTS:hs_exec_corei7>) list(APPEND RUNTIME_LIBS $<TARGET_OBJECTS:hs_exec_corei7>)
set_target_properties(hs_exec_corei7 PROPERTIES set_target_properties(hs_exec_corei7 PROPERTIES
COMPILE_FLAGS "-march=corei7 -mssse3" COMPILE_FLAGS "-march=corei7 -msse4.2"
RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} corei7 ${CMAKE_MODULE_PATH}/keep.syms.in" RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} corei7 ${CMAKE_MODULE_PATH}/keep.syms.in"
) )
@ -1254,10 +1296,6 @@ else (FAT_RUNTIME)
${RUNTIME_LIBS}) ${RUNTIME_LIBS})
set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C) set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C)
add_library(hs_compile OBJECT ${hs_compile_SRCS}) add_library(hs_compile OBJECT ${hs_compile_SRCS})
if (ARCH_IA32 OR ARCH_X86_64)
set_target_properties(hs_exec_common PROPERTIES COMPILE_FLAGS "-mssse3")
set_target_properties(hs_compile PROPERTIES COMPILE_FLAGS "-mssse3")
endif ()
# we want the static lib for testing # we want the static lib for testing
add_library(hs STATIC src/hs_version.c src/hs_valid_platform.c add_library(hs STATIC src/hs_version.c src/hs_valid_platform.c
@ -1274,14 +1312,14 @@ else (FAT_RUNTIME)
add_library(hs_exec_shared_core2 OBJECT ${hs_exec_SRCS}) add_library(hs_exec_shared_core2 OBJECT ${hs_exec_SRCS})
list(APPEND RUNTIME_SHLIBS $<TARGET_OBJECTS:hs_exec_shared_core2>) list(APPEND RUNTIME_SHLIBS $<TARGET_OBJECTS:hs_exec_shared_core2>)
set_target_properties(hs_exec_shared_core2 PROPERTIES set_target_properties(hs_exec_shared_core2 PROPERTIES
COMPILE_FLAGS "-march=core2" COMPILE_FLAGS "-march=core2 -msse4.2"
POSITION_INDEPENDENT_CODE TRUE POSITION_INDEPENDENT_CODE TRUE
RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} core2 ${CMAKE_MODULE_PATH}/keep.syms.in" RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} core2 ${CMAKE_MODULE_PATH}/keep.syms.in"
) )
add_library(hs_exec_shared_corei7 OBJECT ${hs_exec_SRCS}) add_library(hs_exec_shared_corei7 OBJECT ${hs_exec_SRCS})
list(APPEND RUNTIME_SHLIBS $<TARGET_OBJECTS:hs_exec_shared_corei7>) list(APPEND RUNTIME_SHLIBS $<TARGET_OBJECTS:hs_exec_shared_corei7>)
set_target_properties(hs_exec_shared_corei7 PROPERTIES set_target_properties(hs_exec_shared_corei7 PROPERTIES
COMPILE_FLAGS "-march=corei7 -mssse3" COMPILE_FLAGS "-march=corei7 -msse4.2"
POSITION_INDEPENDENT_CODE TRUE POSITION_INDEPENDENT_CODE TRUE
RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} corei7 ${CMAKE_MODULE_PATH}/keep.syms.in" RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} corei7 ${CMAKE_MODULE_PATH}/keep.syms.in"
) )

598
Jenkinsfile vendored
View File

@ -1,22 +1,590 @@
pipeline { pipeline {
agent { agent none
node {
label 'x86'
}
}
stages { stages {
stage('Release, SSE') { stage("Build") {
agent { failFast true
node { parallel {
label 'x86' stage("Release/SSE") {
} agent { label "x86" }
stages {
} stage("Git checkout") {
steps { steps {
sh 'mkdir build-release-SSE && cmake -DCMAKE_BUILD_TYPE=Release -C build-release-SSE' checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-release-SSE', buildType: 'Release', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=no -DBUILD_AVX512=no -DFAT_RUNTIME=no', installation: 'InSearchPath', steps: [[args: '--parallel 4', withCmake: true]]
}
}
stage("Unit Test") {
steps {
sh 'build-release-SSE/bin/unit-internal'
}
}
stage("Test") {
steps {
sh 'build-release-SSE/bin/unit-hyperscan'
}
}
}
}
stage("Release/AVX2") {
agent { label "x86" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-release-AVX2', buildType: 'Release', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=yes -DBUILD_AVX512=no -DFAT_RUNTIME=no', installation: 'InSearchPath', steps: [[args: '--parallel 4', withCmake: true]]
}
}
stage("Unit Test") {
steps {
sh 'build-release-AVX2/bin/unit-internal'
}
}
stage("Test") {
steps {
sh 'build-release-AVX2/bin/unit-hyperscan'
}
}
}
}
stage("Release/AVX512") {
agent { label "x86" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-release-AVX512', buildType: 'Release', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=yes -DBUILD_AVX512=yes -DFAT_RUNTIME=no', installation: 'InSearchPath', steps: [[args: '--parallel 4', withCmake: true]]
}
}
stage("Unit Test") {
steps {
sh 'build-release-AVX512/bin/unit-internal'
}
}
stage("Test") {
steps {
sh 'build-release-AVX512/bin/unit-hyperscan'
}
}
}
}
stage("Release/FAT") {
agent { label "x86" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-release-fat', buildType: 'Release', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=yes -DBUILD_AVX512=yes -DFAT_RUNTIME=yes', installation: 'InSearchPath', steps: [[args: '--parallel 4', withCmake: true]]
}
}
stage("Test") {
steps {
sh 'build-release-fat/bin/unit-hyperscan'
}
}
}
}
stage("Debug/SSE") {
agent { label "x86" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-debug-SSE', buildType: 'Debug', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=no -DBUILD_AVX512=no -DFAT_RUNTIME=no', installation: 'InSearchPath', steps: [[args: '--parallel 4', withCmake: true]]
}
}
stage("Unit Test") {
steps {
sh 'build-debug-SSE/bin/unit-internal'
}
}
stage("Test") {
steps {
sh 'build-debug-SSE/bin/unit-hyperscan'
}
}
}
}
stage("Debug/AVX2") {
agent { label "x86" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-debug-AVX2', buildType: 'Debug', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=yes -DBUILD_AVX512=no -DFAT_RUNTIME=no', installation: 'InSearchPath', steps: [[args: '--parallel 4', withCmake: true]]
}
}
stage("Unit Test") {
steps {
sh 'build-debug-AVX2/bin/unit-internal'
}
}
stage("Test") {
steps {
sh 'build-debug-AVX2/bin/unit-hyperscan'
}
}
}
}
stage("Debug/AVX512") {
agent { label "x86" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-debug-AVX512', buildType: 'Debug', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=yes -DBUILD_AVX512=yes -DFAT_RUNTIME=no', installation: 'InSearchPath', steps: [[args: '--parallel 4', withCmake: true]]
}
}
stage("Unit Test") {
steps {
sh 'build-debug-AVX512/bin/unit-internal'
}
}
stage("Test") {
steps {
sh 'build-debug-AVX512/bin/unit-hyperscan'
}
}
}
}
stage("Debug/FAT") {
agent { label "x86" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-debug-fat', buildType: 'Debug', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=yes -DBUILD_AVX512=yes -DFAT_RUNTIME=yes', installation: 'InSearchPath', steps: [[args: '--parallel 4', withCmake: true]]
}
}
stage("Test") {
steps {
sh 'build-debug-fat/bin/unit-hyperscan'
}
}
}
}
stage("Release/ARM") {
agent { label "arm" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-release-arm', buildType: 'Release', cleanBuild: true, cmakeArgs: '', installation: 'InSearchPath', steps: [[args: '--parallel 4', withCmake: true]]
}
}
stage("Unit Test") {
steps {
sh 'build-release-arm/bin/unit-internal'
}
}
stage("Test") {
steps {
sh 'build-release-arm/bin/unit-hyperscan'
}
}
}
}
stage("Debug/ARM") {
agent { label "arm" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-debug-arm', buildType: 'Debug', cleanBuild: true, cmakeArgs: '', installation: 'InSearchPath', steps: [[args: '--parallel 4', withCmake: true]]
}
}
stage("Unit Test") {
steps {
sh 'build-debug-arm/bin/unit-internal'
}
}
stage("Test") {
steps {
sh 'build-debug-arm/bin/unit-hyperscan'
}
}
}
}
stage("Release/Power") {
agent { label "power" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-release-power', buildType: 'Release', cleanBuild: true, cmakeArgs: '', installation: 'InSearchPath', steps: [[args: '--parallel 4', withCmake: true]]
}
}
stage("Unit Test") {
steps {
sh 'build-release-power/bin/unit-internal'
}
}
stage("Test") {
steps {
sh 'build-release-power/bin/unit-hyperscan'
}
}
}
}
stage("Debug/Power") {
agent { label "power" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-debug-power', buildType: 'Debug', cleanBuild: true, cmakeArgs: '', installation: 'InSearchPath', steps: [[args: '--parallel 4', withCmake: true]]
}
}
stage("Unit Test") {
steps {
sh 'build-debug-power/bin/unit-internal'
}
}
stage("Test") {
steps {
sh 'build-debug-power/bin/unit-hyperscan'
}
}
}
}
stage("Clang-Release/SSE") {
agent { label "x86" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-clang-release-SSE', buildType: 'Release', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=no -DBUILD_AVX512=no -DFAT_RUNTIME=no', installation: 'InSearchPath', steps: [[envVars: 'CC=clang CXX=clang++', args: '--parallel 4', withCmake: true]]
}
}
stage("Unit Test") {
steps {
sh 'build-clang-release-SSE/bin/unit-internal'
}
}
stage("Test") {
steps {
sh 'build-clang-release-SSE/bin/unit-hyperscan'
}
}
}
}
stage("Clang-Release/AVX2") {
agent { label "x86" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-clang-release-AVX2', buildType: 'Release', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=yes -DBUILD_AVX512=no -DFAT_RUNTIME=no', installation: 'InSearchPath', steps: [[envVars: 'CC=clang CXX=clang++', args: '--parallel 4', withCmake: true]]
}
}
stage("Unit Test") {
steps {
sh 'build-clang-release-AVX2/bin/unit-internal'
}
}
stage("Test") {
steps {
sh 'build-clang-release-AVX2/bin/unit-hyperscan'
}
}
}
}
stage("Clang-Release/AVX512") {
agent { label "x86" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-clang-release-AVX512', buildType: 'Release', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=yes -DBUILD_AVX512=yes -DFAT_RUNTIME=no', installation: 'InSearchPath', steps: [[envVars: 'CC=clang CXX=clang++', args: '--parallel 4', withCmake: true]]
}
}
stage("Unit Test") {
steps {
sh 'build-clang-release-AVX512/bin/unit-internal'
}
}
stage("Test") {
steps {
sh 'build-clang-release-AVX512/bin/unit-hyperscan'
}
}
}
}
stage("Clang-Release/FAT") {
agent { label "x86" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-clang-release-fat', buildType: 'Release', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=yes -DBUILD_AVX512=yes -DFAT_RUNTIME=yes', installation: 'InSearchPath', steps: [[envVars: 'CC=clang CXX=clang++', args: '--parallel 4', withCmake: true]]
}
}
stage("Test") {
steps {
sh 'build-clang-release-fat/bin/unit-hyperscan'
}
}
}
}
stage("Clang-Debug/SSE") {
agent { label "x86" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-clang-debug-SSE', buildType: 'Debug', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=no -DBUILD_AVX512=no -DFAT_RUNTIME=no', installation: 'InSearchPath', steps: [[envVars: 'CC=clang CXX=clang++', args: '--parallel 4', withCmake: true]]
}
}
stage("Unit Test") {
steps {
sh 'build-clang-debug-SSE/bin/unit-internal'
}
}
stage("Test") {
steps {
sh 'build-clang-debug-SSE/bin/unit-hyperscan'
}
}
}
}
stage("Clang-Debug/AVX2") {
agent { label "x86" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-clang-debug-AVX2', buildType: 'Debug', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=yes -DBUILD_AVX512=no -DFAT_RUNTIME=no', installation: 'InSearchPath', steps: [[envVars: 'CC=clang CXX=clang++', args: '--parallel 4', withCmake: true]]
}
}
stage("Unit Test") {
steps {
sh 'build-clang-debug-AVX2/bin/unit-internal'
}
}
stage("Test") {
steps {
sh 'build-clang-debug-AVX2/bin/unit-hyperscan'
}
}
}
}
stage("Clang-Debug/AVX512") {
agent { label "x86" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-clang-debug-AVX512', buildType: 'Debug', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=yes -DBUILD_AVX512=yes -DFAT_RUNTIME=no', installation: 'InSearchPath', steps: [[envVars: 'CC=clang CXX=clang++', args: '--parallel 4', withCmake: true]]
}
}
stage("Unit Test") {
steps {
sh 'build-clang-debug-AVX512/bin/unit-internal'
}
}
stage("Test") {
steps {
sh 'build-clang-debug-AVX512/bin/unit-hyperscan'
}
}
}
}
stage("Clang-Debug/FAT") {
agent { label "x86" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-clang-debug-fat', buildType: 'Debug', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=yes -DBUILD_AVX512=yes -DFAT_RUNTIME=yes', installation: 'InSearchPath', steps: [[envVars: 'CC=clang CXX=clang++', args: '--parallel 4', withCmake: true]]
}
}
stage("Test") {
steps {
sh 'build-clang-debug-fat/bin/unit-hyperscan'
}
}
}
}
stage("Clang-Release/ARM") {
agent { label "arm" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-clang-release-arm', buildType: 'Release', cleanBuild: true, cmakeArgs: '', installation: 'InSearchPath', steps: [[envVars: 'CC=clang CXX=clang++', args: '--parallel 4', withCmake: true]]
}
}
stage("Unit Test") {
steps {
sh 'build-clang-release-arm/bin/unit-internal'
}
}
stage("Test") {
steps {
sh 'build-clang-release-arm/bin/unit-hyperscan'
}
}
}
}
stage("Clang-Debug/ARM") {
agent { label "arm" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-debug-arm', buildType: 'Debug', cleanBuild: true, cmakeArgs: '', installation: 'InSearchPath', steps: [[envVars: 'CC=clang CXX=clang++', args: '--parallel 4', withCmake: true]]
}
}
stage("Unit Test") {
steps {
sh 'build-clang-debug-arm/bin/unit-internal'
}
}
stage("Test") {
steps {
sh 'build-clang-debug-arm/bin/unit-hyperscan'
}
}
}
}
stage("Clang-Release/Power") {
agent { label "power" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-clang-release-power', buildType: 'Release', cleanBuild: true, cmakeArgs: '', installation: 'InSearchPath', steps: [[envVars: 'CC=clang CXX=clang++', args: '--parallel 4', withCmake: true]]
}
}
stage("Unit Test") {
steps {
sh 'build-clang-release-power/bin/unit-internal'
}
}
stage("Test") {
steps {
sh 'build-clang-release-power/bin/unit-hyperscan'
}
}
}
}
stage("Clang-Debug/Power") {
agent { label "power" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-clang-debug-power', buildType: 'Debug', cleanBuild: true, cmakeArgs: '', installation: 'InSearchPath', steps: [[envVars: 'CC=clang CXX=clang++', args: '--parallel 4', withCmake: true]]
}
}
stage("Unit Test") {
steps {
sh 'build-clang-debug-power/bin/unit-internal'
}
}
stage("Test") {
steps {
sh 'build-clang-debug-power/bin/unit-hyperscan'
}
}
}
}
} }
} }
} }
} }

View File

@ -88,7 +88,7 @@ if (FAT_RUNTIME)
set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} ${SKYLAKE_FLAG}") set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} ${SKYLAKE_FLAG}")
endif (BUILD_AVX512VBMI) endif (BUILD_AVX512VBMI)
elseif (BUILD_AVX2) elseif (BUILD_AVX2)
set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} -march=core-avx2 -mavx") set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} -march=core-avx2 -mavx2")
elseif () elseif ()
set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} -march=core-i7 -mssse3") set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} -march=core-i7 -mssse3")
endif () endif ()
@ -98,12 +98,12 @@ else (NOT FAT_RUNTIME)
endif () endif ()
if (ARCH_IA32 OR ARCH_X86_64) if (ARCH_IA32 OR ARCH_X86_64)
# ensure we have the minimum of SSSE3 - call a SSSE3 intrinsic # ensure we have the minimum of SSE4.2 - call a SSE4.2 intrinsic
CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}> CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}>
int main() { int main() {
__m128i a = _mm_set1_epi8(1); __m128i a = _mm_set1_epi8(1);
(void)_mm_shuffle_epi8(a, a); (void)_mm_shuffle_epi8(a, a);
}" HAVE_SSSE3) }" HAVE_SSE42)
# now look for AVX2 # now look for AVX2
CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}> CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}>
@ -157,8 +157,8 @@ else ()
endif () endif ()
if (FAT_RUNTIME) if (FAT_RUNTIME)
if ((ARCH_IA32 OR ARCH_X86_64) AND NOT HAVE_SSSE3) if ((ARCH_IA32 OR ARCH_X86_64) AND NOT HAVE_SSE42)
message(FATAL_ERROR "SSSE3 support required to build fat runtime") message(FATAL_ERROR "SSE4.2 support required to build fat runtime")
endif () endif ()
if ((ARCH_IA32 OR ARCH_X86_64) AND BUILD_AVX2 AND NOT HAVE_AVX2) if ((ARCH_IA32 OR ARCH_X86_64) AND BUILD_AVX2 AND NOT HAVE_AVX2)
message(FATAL_ERROR "AVX2 support required to build fat runtime") message(FATAL_ERROR "AVX2 support required to build fat runtime")
@ -179,8 +179,8 @@ else (NOT FAT_RUNTIME)
if ((ARCH_IA32 OR ARCH_X86_64) AND NOT HAVE_AVX512VBMI) if ((ARCH_IA32 OR ARCH_X86_64) AND NOT HAVE_AVX512VBMI)
message(STATUS "Building without AVX512VBMI support") message(STATUS "Building without AVX512VBMI support")
endif () endif ()
if ((ARCH_IA32 OR ARCH_X86_64) AND NOT HAVE_SSSE3) if ((ARCH_IA32 OR ARCH_X86_64) AND NOT HAVE_SSE42)
message(FATAL_ERROR "A minimum of SSSE3 compiler support is required") message(FATAL_ERROR "A minimum of SSE4.2 compiler support is required")
endif () endif ()
if ((ARCH_ARM32 OR ARCH_AARCH64) AND NOT HAVE_NEON) if ((ARCH_ARM32 OR ARCH_AARCH64) AND NOT HAVE_NEON)
message(FATAL_ERROR "NEON support required for ARM support") message(FATAL_ERROR "NEON support required for ARM support")

View File

@ -1,3 +1,8 @@
# determine compiler
if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
set(CMAKE_COMPILER_IS_CLANG TRUE)
endif()
# determine the target arch # determine the target arch
if (CROSS_COMPILE_AARCH64) if (CROSS_COMPILE_AARCH64)
@ -10,7 +15,7 @@ else()
CHECK_C_SOURCE_COMPILES("#if !(defined(__i386__) || defined(_M_IX86))\n#error not 32bit\n#endif\nint main(void) { return 0; }" ARCH_IA32) CHECK_C_SOURCE_COMPILES("#if !(defined(__i386__) || defined(_M_IX86))\n#error not 32bit\n#endif\nint main(void) { return 0; }" ARCH_IA32)
CHECK_C_SOURCE_COMPILES("#if !defined(__ARM_ARCH_ISA_A64)\n#error not 64bit\n#endif\nint main(void) { return 0; }" ARCH_AARCH64) CHECK_C_SOURCE_COMPILES("#if !defined(__ARM_ARCH_ISA_A64)\n#error not 64bit\n#endif\nint main(void) { return 0; }" ARCH_AARCH64)
CHECK_C_SOURCE_COMPILES("#if !defined(__ARM_ARCH_ISA_ARM)\n#error not 32bit\n#endif\nint main(void) { return 0; }" ARCH_ARM32) CHECK_C_SOURCE_COMPILES("#if !defined(__ARM_ARCH_ISA_ARM)\n#error not 32bit\n#endif\nint main(void) { return 0; }" ARCH_ARM32)
CHECK_C_SOURCE_COMPILES("#if !defined(__PPC64__) && !defined(__LITTLE_ENDIAN__) && !defined(__VSX__)\n#error not ppc64el\n#endif\nint main(void) { return 0; }" ARCH_PPC64EL) CHECK_C_SOURCE_COMPILES("#if !defined(__PPC64__) && !(defined(__LITTLE_ENDIAN__) && defined(__VSX__))\n#error not ppc64el\n#endif\nint main(void) { return 0; }" ARCH_PPC64EL)
if (ARCH_X86_64 OR ARCH_AARCH64 OR ARCH_PPC64EL) if (ARCH_X86_64 OR ARCH_AARCH64 OR ARCH_PPC64EL)
set(ARCH_64_BIT TRUE) set(ARCH_64_BIT TRUE)
else() else()

View File

@ -122,24 +122,252 @@ m128 sub_2x64(m128 a, m128 b) {
return (m128) vsubq_u64((uint64x2_t)a, (uint64x2_t)b); return (m128) vsubq_u64((uint64x2_t)a, (uint64x2_t)b);
} }
static really_really_inline static really_inline
m128 lshift_m128(m128 a, unsigned b) { m128 lshift_m128(m128 a, unsigned b) {
#if defined(HAVE__BUILTIN_CONSTANT_P)
if (__builtin_constant_p(b)) {
return (m128) vshlq_n_u32((uint32x4_t)a, b); return (m128) vshlq_n_u32((uint32x4_t)a, b);
}
#endif
#define CASE_LSHIFT_m128(a, offset) case offset: return (m128)vshlq_n_u32((uint32x4_t)(a), (offset)); break;
switch (b) {
case 0: return a; break;
CASE_LSHIFT_m128(a, 1);
CASE_LSHIFT_m128(a, 2);
CASE_LSHIFT_m128(a, 3);
CASE_LSHIFT_m128(a, 4);
CASE_LSHIFT_m128(a, 5);
CASE_LSHIFT_m128(a, 6);
CASE_LSHIFT_m128(a, 7);
CASE_LSHIFT_m128(a, 8);
CASE_LSHIFT_m128(a, 9);
CASE_LSHIFT_m128(a, 10);
CASE_LSHIFT_m128(a, 11);
CASE_LSHIFT_m128(a, 12);
CASE_LSHIFT_m128(a, 13);
CASE_LSHIFT_m128(a, 14);
CASE_LSHIFT_m128(a, 15);
CASE_LSHIFT_m128(a, 16);
CASE_LSHIFT_m128(a, 17);
CASE_LSHIFT_m128(a, 18);
CASE_LSHIFT_m128(a, 19);
CASE_LSHIFT_m128(a, 20);
CASE_LSHIFT_m128(a, 21);
CASE_LSHIFT_m128(a, 22);
CASE_LSHIFT_m128(a, 23);
CASE_LSHIFT_m128(a, 24);
CASE_LSHIFT_m128(a, 25);
CASE_LSHIFT_m128(a, 26);
CASE_LSHIFT_m128(a, 27);
CASE_LSHIFT_m128(a, 28);
CASE_LSHIFT_m128(a, 29);
CASE_LSHIFT_m128(a, 30);
CASE_LSHIFT_m128(a, 31);
default: return zeroes128(); break;
}
#undef CASE_LSHIFT_m128
} }
static really_really_inline static really_really_inline
m128 rshift_m128(m128 a, unsigned b) { m128 rshift_m128(m128 a, unsigned b) {
#if defined(HAVE__BUILTIN_CONSTANT_P)
if (__builtin_constant_p(b)) {
return (m128) vshrq_n_u32((uint32x4_t)a, b); return (m128) vshrq_n_u32((uint32x4_t)a, b);
}
#endif
#define CASE_RSHIFT_m128(a, offset) case offset: return (m128)vshrq_n_u32((uint32x4_t)(a), (offset)); break;
switch (b) {
case 0: return a; break;
CASE_RSHIFT_m128(a, 1);
CASE_RSHIFT_m128(a, 2);
CASE_RSHIFT_m128(a, 3);
CASE_RSHIFT_m128(a, 4);
CASE_RSHIFT_m128(a, 5);
CASE_RSHIFT_m128(a, 6);
CASE_RSHIFT_m128(a, 7);
CASE_RSHIFT_m128(a, 8);
CASE_RSHIFT_m128(a, 9);
CASE_RSHIFT_m128(a, 10);
CASE_RSHIFT_m128(a, 11);
CASE_RSHIFT_m128(a, 12);
CASE_RSHIFT_m128(a, 13);
CASE_RSHIFT_m128(a, 14);
CASE_RSHIFT_m128(a, 15);
CASE_RSHIFT_m128(a, 16);
CASE_RSHIFT_m128(a, 17);
CASE_RSHIFT_m128(a, 18);
CASE_RSHIFT_m128(a, 19);
CASE_RSHIFT_m128(a, 20);
CASE_RSHIFT_m128(a, 21);
CASE_RSHIFT_m128(a, 22);
CASE_RSHIFT_m128(a, 23);
CASE_RSHIFT_m128(a, 24);
CASE_RSHIFT_m128(a, 25);
CASE_RSHIFT_m128(a, 26);
CASE_RSHIFT_m128(a, 27);
CASE_RSHIFT_m128(a, 28);
CASE_RSHIFT_m128(a, 29);
CASE_RSHIFT_m128(a, 30);
CASE_RSHIFT_m128(a, 31);
default: return zeroes128(); break;
}
#undef CASE_RSHIFT_m128
} }
static really_really_inline static really_really_inline
m128 lshift64_m128(m128 a, unsigned b) { m128 lshift64_m128(m128 a, unsigned b) {
#if defined(HAVE__BUILTIN_CONSTANT_P)
if (__builtin_constant_p(b)) {
return (m128) vshlq_n_u64((uint64x2_t)a, b); return (m128) vshlq_n_u64((uint64x2_t)a, b);
}
#endif
#define CASE_LSHIFT64_m128(a, offset) case offset: return (m128)vshlq_n_u64((uint64x2_t)(a), (offset)); break;
switch (b) {
case 0: return a; break;
CASE_LSHIFT64_m128(a, 1);
CASE_LSHIFT64_m128(a, 2);
CASE_LSHIFT64_m128(a, 3);
CASE_LSHIFT64_m128(a, 4);
CASE_LSHIFT64_m128(a, 5);
CASE_LSHIFT64_m128(a, 6);
CASE_LSHIFT64_m128(a, 7);
CASE_LSHIFT64_m128(a, 8);
CASE_LSHIFT64_m128(a, 9);
CASE_LSHIFT64_m128(a, 10);
CASE_LSHIFT64_m128(a, 11);
CASE_LSHIFT64_m128(a, 12);
CASE_LSHIFT64_m128(a, 13);
CASE_LSHIFT64_m128(a, 14);
CASE_LSHIFT64_m128(a, 15);
CASE_LSHIFT64_m128(a, 16);
CASE_LSHIFT64_m128(a, 17);
CASE_LSHIFT64_m128(a, 18);
CASE_LSHIFT64_m128(a, 19);
CASE_LSHIFT64_m128(a, 20);
CASE_LSHIFT64_m128(a, 21);
CASE_LSHIFT64_m128(a, 22);
CASE_LSHIFT64_m128(a, 23);
CASE_LSHIFT64_m128(a, 24);
CASE_LSHIFT64_m128(a, 25);
CASE_LSHIFT64_m128(a, 26);
CASE_LSHIFT64_m128(a, 27);
CASE_LSHIFT64_m128(a, 28);
CASE_LSHIFT64_m128(a, 29);
CASE_LSHIFT64_m128(a, 30);
CASE_LSHIFT64_m128(a, 31);
CASE_LSHIFT64_m128(a, 32);
CASE_LSHIFT64_m128(a, 33);
CASE_LSHIFT64_m128(a, 34);
CASE_LSHIFT64_m128(a, 35);
CASE_LSHIFT64_m128(a, 36);
CASE_LSHIFT64_m128(a, 37);
CASE_LSHIFT64_m128(a, 38);
CASE_LSHIFT64_m128(a, 39);
CASE_LSHIFT64_m128(a, 40);
CASE_LSHIFT64_m128(a, 41);
CASE_LSHIFT64_m128(a, 42);
CASE_LSHIFT64_m128(a, 43);
CASE_LSHIFT64_m128(a, 44);
CASE_LSHIFT64_m128(a, 45);
CASE_LSHIFT64_m128(a, 46);
CASE_LSHIFT64_m128(a, 47);
CASE_LSHIFT64_m128(a, 48);
CASE_LSHIFT64_m128(a, 49);
CASE_LSHIFT64_m128(a, 50);
CASE_LSHIFT64_m128(a, 51);
CASE_LSHIFT64_m128(a, 52);
CASE_LSHIFT64_m128(a, 53);
CASE_LSHIFT64_m128(a, 54);
CASE_LSHIFT64_m128(a, 55);
CASE_LSHIFT64_m128(a, 56);
CASE_LSHIFT64_m128(a, 57);
CASE_LSHIFT64_m128(a, 58);
CASE_LSHIFT64_m128(a, 59);
CASE_LSHIFT64_m128(a, 60);
CASE_LSHIFT64_m128(a, 61);
CASE_LSHIFT64_m128(a, 62);
CASE_LSHIFT64_m128(a, 63);
default: return zeroes128(); break;
}
#undef CASE_LSHIFT64_m128
} }
static really_really_inline static really_really_inline
m128 rshift64_m128(m128 a, unsigned b) { m128 rshift64_m128(m128 a, unsigned b) {
#if defined(HAVE__BUILTIN_CONSTANT_P)
if (__builtin_constant_p(b)) {
return (m128) vshrq_n_u64((uint64x2_t)a, b); return (m128) vshrq_n_u64((uint64x2_t)a, b);
}
#endif
#define CASE_RSHIFT64_m128(a, offset) case offset: return (m128)vshrq_n_u64((uint64x2_t)(a), (offset)); break;
switch (b) {
case 0: return a; break;
CASE_RSHIFT64_m128(a, 1);
CASE_RSHIFT64_m128(a, 2);
CASE_RSHIFT64_m128(a, 3);
CASE_RSHIFT64_m128(a, 4);
CASE_RSHIFT64_m128(a, 5);
CASE_RSHIFT64_m128(a, 6);
CASE_RSHIFT64_m128(a, 7);
CASE_RSHIFT64_m128(a, 8);
CASE_RSHIFT64_m128(a, 9);
CASE_RSHIFT64_m128(a, 10);
CASE_RSHIFT64_m128(a, 11);
CASE_RSHIFT64_m128(a, 12);
CASE_RSHIFT64_m128(a, 13);
CASE_RSHIFT64_m128(a, 14);
CASE_RSHIFT64_m128(a, 15);
CASE_RSHIFT64_m128(a, 16);
CASE_RSHIFT64_m128(a, 17);
CASE_RSHIFT64_m128(a, 18);
CASE_RSHIFT64_m128(a, 19);
CASE_RSHIFT64_m128(a, 20);
CASE_RSHIFT64_m128(a, 21);
CASE_RSHIFT64_m128(a, 22);
CASE_RSHIFT64_m128(a, 23);
CASE_RSHIFT64_m128(a, 24);
CASE_RSHIFT64_m128(a, 25);
CASE_RSHIFT64_m128(a, 26);
CASE_RSHIFT64_m128(a, 27);
CASE_RSHIFT64_m128(a, 28);
CASE_RSHIFT64_m128(a, 29);
CASE_RSHIFT64_m128(a, 30);
CASE_RSHIFT64_m128(a, 31);
CASE_RSHIFT64_m128(a, 32);
CASE_RSHIFT64_m128(a, 33);
CASE_RSHIFT64_m128(a, 34);
CASE_RSHIFT64_m128(a, 35);
CASE_RSHIFT64_m128(a, 36);
CASE_RSHIFT64_m128(a, 37);
CASE_RSHIFT64_m128(a, 38);
CASE_RSHIFT64_m128(a, 39);
CASE_RSHIFT64_m128(a, 40);
CASE_RSHIFT64_m128(a, 41);
CASE_RSHIFT64_m128(a, 42);
CASE_RSHIFT64_m128(a, 43);
CASE_RSHIFT64_m128(a, 44);
CASE_RSHIFT64_m128(a, 45);
CASE_RSHIFT64_m128(a, 46);
CASE_RSHIFT64_m128(a, 47);
CASE_RSHIFT64_m128(a, 48);
CASE_RSHIFT64_m128(a, 49);
CASE_RSHIFT64_m128(a, 50);
CASE_RSHIFT64_m128(a, 51);
CASE_RSHIFT64_m128(a, 52);
CASE_RSHIFT64_m128(a, 53);
CASE_RSHIFT64_m128(a, 54);
CASE_RSHIFT64_m128(a, 55);
CASE_RSHIFT64_m128(a, 56);
CASE_RSHIFT64_m128(a, 57);
CASE_RSHIFT64_m128(a, 58);
CASE_RSHIFT64_m128(a, 59);
CASE_RSHIFT64_m128(a, 60);
CASE_RSHIFT64_m128(a, 61);
CASE_RSHIFT64_m128(a, 62);
CASE_RSHIFT64_m128(a, 63);
default: return zeroes128(); break;
}
#undef CASE_RSHIFT64_m128
} }
static really_inline m128 eq128(m128 a, m128 b) { static really_inline m128 eq128(m128 a, m128 b) {
@ -191,9 +419,11 @@ m128 load_m128_from_u64a(const u64a *p) {
} }
static really_inline u32 extract32from128(const m128 in, unsigned imm) { static really_inline u32 extract32from128(const m128 in, unsigned imm) {
#if defined(HS_OPTIMIZE) #if defined(HAVE__BUILTIN_CONSTANT_P)
if (__builtin_constant_p(imm)) {
return vgetq_lane_u32((uint32x4_t) in, imm); return vgetq_lane_u32((uint32x4_t) in, imm);
#else }
#endif
switch (imm) { switch (imm) {
case 0: case 0:
return vgetq_lane_u32((uint32x4_t) in, 0); return vgetq_lane_u32((uint32x4_t) in, 0);
@ -211,13 +441,14 @@ static really_inline u32 extract32from128(const m128 in, unsigned imm) {
return 0; return 0;
break; break;
} }
#endif
} }
static really_inline u64a extract64from128(const m128 in, unsigned imm) { static really_inline u64a extract64from128(const m128 in, unsigned imm) {
#if defined(HS_OPTIMIZE) #if defined(HAVE__BUILTIN_CONSTANT_P)
if (__builtin_constant_p(imm)) {
return vgetq_lane_u64((uint64x2_t) in, imm); return vgetq_lane_u64((uint64x2_t) in, imm);
#else }
#endif
switch (imm) { switch (imm) {
case 0: case 0:
return vgetq_lane_u64((uint64x2_t) in, 0); return vgetq_lane_u64((uint64x2_t) in, 0);
@ -229,7 +460,6 @@ static really_inline u64a extract64from128(const m128 in, unsigned imm) {
return 0; return 0;
break; break;
} }
#endif
} }
static really_inline m128 low64from128(const m128 in) { static really_inline m128 low64from128(const m128 in) {

View File

@ -30,7 +30,7 @@
#define ARCH_PPC64EL_SIMD_TYPES_H #define ARCH_PPC64EL_SIMD_TYPES_H
#if !defined(m128) && defined(HAVE_VSX) #if !defined(m128) && defined(HAVE_VSX)
typedef __vector int32_t m128; typedef __vector int m128;
#endif #endif
#endif /* ARCH_PPC64EL_SIMD_TYPES_H */ #endif /* ARCH_PPC64EL_SIMD_TYPES_H */

View File

@ -43,6 +43,18 @@
#include <string.h> // for memcpy #include <string.h> // for memcpy
typedef __vector unsigned long long int uint64x2_t;
typedef __vector signed long long int int64x2_t;
typedef __vector unsigned int uint32x4_t;
typedef __vector signed int int32x4_t;
typedef __vector unsigned short int uint16x8_t;
typedef __vector signed short int int16x8_t;
typedef __vector unsigned char uint8x16_t;
typedef __vector signed char int8x16_t;
typedef unsigned long long int ulong64_t;
typedef signed long long int long64_t;
/*
typedef __vector uint64_t uint64x2_t; typedef __vector uint64_t uint64x2_t;
typedef __vector int64_t int64x2_t; typedef __vector int64_t int64x2_t;
typedef __vector uint32_t uint32x4_t; typedef __vector uint32_t uint32x4_t;
@ -50,7 +62,7 @@ typedef __vector int32_t int32x4_t;
typedef __vector uint16_t uint16x8_t; typedef __vector uint16_t uint16x8_t;
typedef __vector int16_t int16x8_t; typedef __vector int16_t int16x8_t;
typedef __vector uint8_t uint8x16_t; typedef __vector uint8_t uint8x16_t;
typedef __vector int8_t int8x16_t; typedef __vector int8_t int8x16_t;*/
#define ZEROES_8 0, 0, 0, 0, 0, 0, 0, 0 #define ZEROES_8 0, 0, 0, 0, 0, 0, 0, 0
@ -182,13 +194,13 @@ m128 rshift_m128(m128 a, unsigned b) {
static really_really_inline static really_really_inline
m128 lshift64_m128(m128 a, unsigned b) { m128 lshift64_m128(m128 a, unsigned b) {
uint64x2_t shift_indices = vec_splats((uint64_t)b); uint64x2_t shift_indices = vec_splats((ulong64_t)b);
return (m128) vec_sl((int64x2_t)a, shift_indices); return (m128) vec_sl((int64x2_t)a, shift_indices);
} }
static really_really_inline static really_really_inline
m128 rshift64_m128(m128 a, unsigned b) { m128 rshift64_m128(m128 a, unsigned b) {
uint64x2_t shift_indices = vec_splats((uint64_t)b); uint64x2_t shift_indices = vec_splats((ulong64_t)b);
return (m128) vec_sr((int64x2_t)a, shift_indices); return (m128) vec_sr((int64x2_t)a, shift_indices);
} }
@ -213,11 +225,11 @@ static really_inline u32 movemask128(m128 a) {
uint32x4_t s3 = vec_or((uint32x4_t)ss2, res_and2); uint32x4_t s3 = vec_or((uint32x4_t)ss2, res_and2);
uint64x2_t ss3 = vec_sr((uint64x2_t)s3, (uint64x2_t)vec_splats(28)); uint64x2_t ss3 = vec_sr((uint64x2_t)s3, (uint64x2_t)vec_splats(28));
uint64x2_t res_and3 = vec_and((uint64x2_t)s3, vec_splats((uint64_t)0xff)); uint64x2_t res_and3 = vec_and((uint64x2_t)s3, vec_splats((ulong64_t)0xff));
uint64x2_t s4 = vec_or((uint64x2_t)ss3, res_and3); uint64x2_t s4 = vec_or((uint64x2_t)ss3, res_and3);
uint64x2_t ss4 = vec_sld((uint64x2_t)vec_splats(0), s4, 9); uint64x2_t ss4 = vec_sld((uint64x2_t)vec_splats(0), s4, 9);
uint64x2_t res_and4 = vec_and((uint64x2_t)s4, vec_splats((uint64_t)0xff)); uint64x2_t res_and4 = vec_and((uint64x2_t)s4, vec_splats((ulong64_t)0xff));
uint64x2_t s5 = vec_or((uint64x2_t)ss4, res_and4); uint64x2_t s5 = vec_or((uint64x2_t)ss4, res_and4);
return s5[0]; return s5[0];

View File

@ -30,7 +30,7 @@
#ifndef SIMD_TYPES_X86_H #ifndef SIMD_TYPES_X86_H
#define SIMD_TYPES_X86_H #define SIMD_TYPES_X86_H
#if !defined(m128) && defined(HAVE_SSE2) #if !defined(m128) && defined(HAVE_SSE42)
typedef __m128i m128; typedef __m128i m128;
#endif #endif

View File

@ -51,6 +51,7 @@ typedef struct ALIGN_AVX_DIRECTIVE {m128 lo; m128 hi;} m256;
#endif #endif
typedef struct {m128 lo; m128 mid; m128 hi;} m384; typedef struct {m128 lo; m128 mid; m128 hi;} m384;
#if !defined(m512) && !defined(HAVE_SIMD_512_BITS) #if !defined(m512) && !defined(HAVE_SIMD_512_BITS)
typedef struct ALIGN_ATTR(64) {m256 lo; m256 hi;} m512; typedef struct ALIGN_ATTR(64) {m256 lo; m256 hi;} m512;
#endif #endif

View File

@ -45,112 +45,112 @@ really_inline SuperVector<16>::SuperVector(typename base_type::type const v)
template<> template<>
template<> template<>
really_inline SuperVector<16>::SuperVector<int8x16_t>(int8x16_t other) really_inline SuperVector<16>::SuperVector(int8x16_t other)
{ {
u.s8x16[0] = other; u.s8x16[0] = other;
} }
template<> template<>
template<> template<>
really_inline SuperVector<16>::SuperVector<uint8x16_t>(uint8x16_t other) really_inline SuperVector<16>::SuperVector(uint8x16_t other)
{ {
u.u8x16[0] = other; u.u8x16[0] = other;
} }
template<> template<>
template<> template<>
really_inline SuperVector<16>::SuperVector<int16x8_t>(int16x8_t other) really_inline SuperVector<16>::SuperVector(int16x8_t other)
{ {
u.s16x8[0] = other; u.s16x8[0] = other;
} }
template<> template<>
template<> template<>
really_inline SuperVector<16>::SuperVector<uint16x8_t>(uint16x8_t other) really_inline SuperVector<16>::SuperVector(uint16x8_t other)
{ {
u.u16x8[0] = other; u.u16x8[0] = other;
} }
template<> template<>
template<> template<>
really_inline SuperVector<16>::SuperVector<int32x4_t>(int32x4_t other) really_inline SuperVector<16>::SuperVector(int32x4_t other)
{ {
u.s32x4[0] = other; u.s32x4[0] = other;
} }
template<> template<>
template<> template<>
really_inline SuperVector<16>::SuperVector<uint32x4_t>(uint32x4_t other) really_inline SuperVector<16>::SuperVector(uint32x4_t other)
{ {
u.u32x4[0] = other; u.u32x4[0] = other;
} }
template<> template<>
template<> template<>
really_inline SuperVector<16>::SuperVector<int64x2_t>(int64x2_t other) really_inline SuperVector<16>::SuperVector(int64x2_t other)
{ {
u.s64x2[0] = other; u.s64x2[0] = other;
} }
template<> template<>
template<> template<>
really_inline SuperVector<16>::SuperVector<uint64x2_t>(uint64x2_t other) really_inline SuperVector<16>::SuperVector(uint64x2_t other)
{ {
u.u64x2[0] = other; u.u64x2[0] = other;
} }
template<> template<>
template<> template<>
really_inline SuperVector<16>::SuperVector<int8_t>(int8_t const other) really_inline SuperVector<16>::SuperVector(int8_t const other)
{ {
u.s8x16[0] = vdupq_n_s8(other); u.s8x16[0] = vdupq_n_s8(other);
} }
template<> template<>
template<> template<>
really_inline SuperVector<16>::SuperVector<uint8_t>(uint8_t const other) really_inline SuperVector<16>::SuperVector(uint8_t const other)
{ {
u.u8x16[0] = vdupq_n_u8(other); u.u8x16[0] = vdupq_n_u8(other);
} }
template<> template<>
template<> template<>
really_inline SuperVector<16>::SuperVector<int16_t>(int16_t const other) really_inline SuperVector<16>::SuperVector(int16_t const other)
{ {
u.s16x8[0] = vdupq_n_s16(other); u.s16x8[0] = vdupq_n_s16(other);
} }
template<> template<>
template<> template<>
really_inline SuperVector<16>::SuperVector<uint16_t>(uint16_t const other) really_inline SuperVector<16>::SuperVector(uint16_t const other)
{ {
u.u16x8[0] = vdupq_n_u16(other); u.u16x8[0] = vdupq_n_u16(other);
} }
template<> template<>
template<> template<>
really_inline SuperVector<16>::SuperVector<int32_t>(int32_t const other) really_inline SuperVector<16>::SuperVector(int32_t const other)
{ {
u.s32x4[0] = vdupq_n_s32(other); u.s32x4[0] = vdupq_n_s32(other);
} }
template<> template<>
template<> template<>
really_inline SuperVector<16>::SuperVector<uint32_t>(uint32_t const other) really_inline SuperVector<16>::SuperVector(uint32_t const other)
{ {
u.u32x4[0] = vdupq_n_u32(other); u.u32x4[0] = vdupq_n_u32(other);
} }
template<> template<>
template<> template<>
really_inline SuperVector<16>::SuperVector<int64_t>(int64_t const other) really_inline SuperVector<16>::SuperVector(int64_t const other)
{ {
u.s64x2[0] = vdupq_n_s64(other); u.s64x2[0] = vdupq_n_s64(other);
} }
template<> template<>
template<> template<>
really_inline SuperVector<16>::SuperVector<uint64_t>(uint64_t const other) really_inline SuperVector<16>::SuperVector(uint64_t const other)
{ {
u.u64x2[0] = vdupq_n_u64(other); u.u64x2[0] = vdupq_n_u64(other);
} }
@ -376,7 +376,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_8 (uint8_t const N) const
if (N == 0) return *this; if (N == 0) return *this;
if (N == 16) return Zeroes(); if (N == 16) return Zeroes();
SuperVector result; SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshlq_n_u8(u.u8x16[0], n)}; }); Unroller<1, 8>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshlq_n_u8(v->u.u8x16[0], n)}; });
return result; return result;
} }
@ -386,7 +386,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_16 (uint8_t const N) const
if (N == 0) return *this; if (N == 0) return *this;
if (N == 16) return Zeroes(); if (N == 16) return Zeroes();
SuperVector result; SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshlq_n_u16(u.u16x8[0], n)}; }); Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshlq_n_u16(v->u.u16x8[0], n)}; });
return result; return result;
} }
@ -394,9 +394,9 @@ template <>
really_inline SuperVector<16> SuperVector<16>::vshl_32 (uint8_t const N) const really_inline SuperVector<16> SuperVector<16>::vshl_32 (uint8_t const N) const
{ {
if (N == 0) return *this; if (N == 0) return *this;
if (N == 16) return Zeroes(); if (N == 32) return Zeroes();
SuperVector result; SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshlq_n_u32(u.u32x4[0], n)}; }); Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshlq_n_u32(v->u.u32x4[0], n)}; });
return result; return result;
} }
@ -404,9 +404,9 @@ template <>
really_inline SuperVector<16> SuperVector<16>::vshl_64 (uint8_t const N) const really_inline SuperVector<16> SuperVector<16>::vshl_64 (uint8_t const N) const
{ {
if (N == 0) return *this; if (N == 0) return *this;
if (N == 16) return Zeroes(); if (N == 64) return Zeroes();
SuperVector result; SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshlq_n_u64(u.u64x2[0], n)}; }); Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshlq_n_u64(v->u.u64x2[0], n)}; });
return result; return result;
} }
@ -416,7 +416,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_128(uint8_t const N) const
if (N == 0) return *this; if (N == 0) return *this;
if (N == 16) return Zeroes(); if (N == 16) return Zeroes();
SuperVector result; SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vextq_u8(vdupq_n_u8(0), u.u8x16[0], 16 - n)}; }); Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vextq_u8(vdupq_n_u8(0), v->u.u8x16[0], 16 - n)}; });
return result; return result;
} }
@ -430,9 +430,9 @@ template <>
really_inline SuperVector<16> SuperVector<16>::vshr_8 (uint8_t const N) const really_inline SuperVector<16> SuperVector<16>::vshr_8 (uint8_t const N) const
{ {
if (N == 0) return *this; if (N == 0) return *this;
if (N == 16) return Zeroes(); if (N == 8) return Zeroes();
SuperVector result; SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshrq_n_u8(u.u8x16[0], n)}; }); Unroller<1, 8>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshrq_n_u8(v->u.u8x16[0], n)}; });
return result; return result;
} }
@ -442,7 +442,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_16 (uint8_t const N) const
if (N == 0) return *this; if (N == 0) return *this;
if (N == 16) return Zeroes(); if (N == 16) return Zeroes();
SuperVector result; SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshrq_n_u16(u.u16x8[0], n)}; }); Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshrq_n_u16(v->u.u16x8[0], n)}; });
return result; return result;
} }
@ -450,9 +450,9 @@ template <>
really_inline SuperVector<16> SuperVector<16>::vshr_32 (uint8_t const N) const really_inline SuperVector<16> SuperVector<16>::vshr_32 (uint8_t const N) const
{ {
if (N == 0) return *this; if (N == 0) return *this;
if (N == 16) return Zeroes(); if (N == 32) return Zeroes();
SuperVector result; SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshrq_n_u32(u.u32x4[0], n)}; }); Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshrq_n_u32(v->u.u32x4[0], n)}; });
return result; return result;
} }
@ -460,9 +460,9 @@ template <>
really_inline SuperVector<16> SuperVector<16>::vshr_64 (uint8_t const N) const really_inline SuperVector<16> SuperVector<16>::vshr_64 (uint8_t const N) const
{ {
if (N == 0) return *this; if (N == 0) return *this;
if (N == 16) return Zeroes(); if (N == 64) return Zeroes();
SuperVector result; SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshrq_n_u64(u.u64x2[0], n)}; }); Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshrq_n_u64(v->u.u64x2[0], n)}; });
return result; return result;
} }
@ -472,7 +472,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_128(uint8_t const N) const
if (N == 0) return *this; if (N == 0) return *this;
if (N == 16) return Zeroes(); if (N == 16) return Zeroes();
SuperVector result; SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vextq_u8(u.u8x16[0], vdupq_n_u8(0), n)}; }); Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vextq_u8(v->u.u8x16[0], vdupq_n_u8(0), n)}; });
return result; return result;
} }

View File

@ -39,16 +39,6 @@
#include "util/supervector/supervector.hpp" #include "util/supervector/supervector.hpp"
#include <iostream> #include <iostream>
typedef __vector uint64_t uint64x2_t;
typedef __vector int64_t int64x2_t;
typedef __vector uint32_t uint32x4_t;
typedef __vector int32_t int32x4_t;
typedef __vector uint16_t uint16x8_t;
typedef __vector int16_t int16x8_t;
typedef __vector uint8_t uint8x16_t;
typedef __vector int8_t int8x16_t;
// 128-bit Powerpc64le implementation // 128-bit Powerpc64le implementation
template<> template<>
@ -65,58 +55,58 @@ really_inline SuperVector<16>::SuperVector(typename base_type::type const v)
template<> template<>
template<> template<>
really_inline SuperVector<16>::SuperVector<int8_t>(int8_t const other) really_inline SuperVector<16>::SuperVector(int8_t const other)
{ {
u.v128[0] = (m128) vec_splats(other); u.v128[0] = (m128) vec_splats(other);
} }
template<> template<>
template<> template<>
really_inline SuperVector<16>::SuperVector<uint8_t>(uint8_t const other) really_inline SuperVector<16>::SuperVector(uint8_t const other)
{ {
u.v128[0] = (m128) vec_splats(static_cast<uint8_t>(other)); u.v128[0] = (m128) vec_splats(static_cast<uint8_t>(other));
} }
template<> template<>
template<> template<>
really_inline SuperVector<16>::SuperVector<int16_t>(int16_t const other) really_inline SuperVector<16>::SuperVector(int16_t const other)
{ {
u.v128[0] = (m128) vec_splats(other); u.v128[0] = (m128) vec_splats(other);
} }
template<> template<>
template<> template<>
really_inline SuperVector<16>::SuperVector<uint16_t>(uint16_t const other) really_inline SuperVector<16>::SuperVector(uint16_t const other)
{ {
u.v128[0] = (m128) vec_splats(static_cast<uint16_t>(other)); u.v128[0] = (m128) vec_splats(static_cast<uint16_t>(other));
} }
template<> template<>
template<> template<>
really_inline SuperVector<16>::SuperVector<int32_t>(int32_t const other) really_inline SuperVector<16>::SuperVector(int32_t const other)
{ {
u.v128[0] = (m128) vec_splats(other); u.v128[0] = (m128) vec_splats(other);
} }
template<> template<>
template<> template<>
really_inline SuperVector<16>::SuperVector<uint32_t>(uint32_t const other) really_inline SuperVector<16>::SuperVector(uint32_t const other)
{ {
u.v128[0] = (m128) vec_splats(static_cast<uint32_t>(other)); u.v128[0] = (m128) vec_splats(static_cast<uint32_t>(other));
} }
template<> template<>
template<> template<>
really_inline SuperVector<16>::SuperVector<int64_t>(int64_t const other) really_inline SuperVector<16>::SuperVector(int64_t const other)
{ {
u.v128[0] = (m128) vec_splats(other); u.v128[0] = (m128) vec_splats(static_cast<ulong64_t>(other));
} }
template<> template<>
template<> template<>
really_inline SuperVector<16>::SuperVector<uint64_t>(uint64_t const other) really_inline SuperVector<16>::SuperVector(uint64_t const other)
{ {
u.v128[0] = (m128) vec_splats(static_cast<uint64_t>(other)); u.v128[0] = (m128) vec_splats(static_cast<ulong64_t>(other));
} }
// Constants // Constants
@ -229,11 +219,11 @@ really_inline typename SuperVector<16>::movemask_type SuperVector<16>::movemask(
uint32x4_t s3 = vec_or((uint32x4_t)ss2, res_and2); uint32x4_t s3 = vec_or((uint32x4_t)ss2, res_and2);
uint64x2_t ss3 = vec_sr((uint64x2_t)s3, (uint64x2_t)vec_splats(28)); uint64x2_t ss3 = vec_sr((uint64x2_t)s3, (uint64x2_t)vec_splats(28));
uint64x2_t res_and3 = vec_and((uint64x2_t)s3, vec_splats((uint64_t)0xff)); uint64x2_t res_and3 = vec_and((uint64x2_t)s3, vec_splats((ulong64_t)0xff));
uint64x2_t s4 = vec_or((uint64x2_t)ss3, res_and3); uint64x2_t s4 = vec_or((uint64x2_t)ss3, res_and3);
uint64x2_t ss4 = vec_sld((uint64x2_t) vec_splats(0), s4, 9); uint64x2_t ss4 = vec_sld((uint64x2_t) vec_splats(0), s4, 9);
uint64x2_t res_and4 = vec_and((uint64x2_t)s4, vec_splats((uint64_t)0xff)); uint64x2_t res_and4 = vec_and((uint64x2_t)s4, vec_splats((ulong64_t)0xff));
uint64x2_t s5 = vec_or((uint64x2_t)ss4, res_and4); uint64x2_t s5 = vec_or((uint64x2_t)ss4, res_and4);
return s5[0]; return s5[0];
@ -271,7 +261,7 @@ template <>
template<uint8_t N> template<uint8_t N>
really_inline SuperVector<16> SuperVector<16>::vshl_64_imm() const really_inline SuperVector<16> SuperVector<16>::vshl_64_imm() const
{ {
return { (m128) vec_sl(u.s64x2[0], vec_splats((uint64_t)N)) }; return { (m128) vec_sl(u.s64x2[0], vec_splats((ulong64_t)N)) };
} }
template <> template <>
@ -313,7 +303,7 @@ template <>
template<uint8_t N> template<uint8_t N>
really_inline SuperVector<16> SuperVector<16>::vshr_64_imm() const really_inline SuperVector<16> SuperVector<16>::vshr_64_imm() const
{ {
return { (m128) vec_sr(u.s64x2[0], vec_splats((uint64_t)N)) }; return { (m128) vec_sr(u.s64x2[0], vec_splats((ulong64_t)N)) };
} }
template <> template <>
@ -352,7 +342,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_8 (uint8_t const N) const
if (N == 0) return *this; if (N == 0) return *this;
if (N == 16) return Zeroes(); if (N == 16) return Zeroes();
SuperVector result; SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sl(u.s8x16[0], vec_splats((uint8_t)n))}; }); Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sl(v->u.s8x16[0], vec_splats((uint8_t)n))}; });
return result; return result;
} }
@ -362,7 +352,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_16 (uint8_t const UNUSED N)
if (N == 0) return *this; if (N == 0) return *this;
if (N == 16) return Zeroes(); if (N == 16) return Zeroes();
SuperVector result; SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sl(u.s16x8[0], vec_splats((uint16_t)n))}; }); Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sl(v->u.s16x8[0], vec_splats((uint16_t)n))}; });
return result; return result;
} }
@ -372,7 +362,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_32 (uint8_t const N) const
if (N == 0) return *this; if (N == 0) return *this;
if (N == 16) return Zeroes(); if (N == 16) return Zeroes();
SuperVector result; SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sl(u.s32x4[0], vec_splats((uint32_t)n))}; }); Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sl(v->u.s32x4[0], vec_splats((uint32_t)n))}; });
return result; return result;
} }
@ -382,7 +372,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_64 (uint8_t const N) const
if (N == 0) return *this; if (N == 0) return *this;
if (N == 16) return Zeroes(); if (N == 16) return Zeroes();
SuperVector result; SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sl(u.s64x2[0], vec_splats((uint64_t)n))}; }); Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sl(v->u.s64x2[0], vec_splats((ulong64_t)n))}; });
return result; return result;
} }
@ -392,7 +382,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_128(uint8_t const N) const
if (N == 0) return *this; if (N == 0) return *this;
if (N == 16) return Zeroes(); if (N == 16) return Zeroes();
SuperVector result; SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sld(u.s8x16[0], (int8x16_t)vec_splat_s8(0), n)}; }); Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sld(v->u.s8x16[0], (int8x16_t)vec_splat_s8(0), n)}; });
return result; return result;
} }
@ -408,7 +398,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_8 (uint8_t const N) const
if (N == 0) return *this; if (N == 0) return *this;
if (N == 16) return Zeroes(); if (N == 16) return Zeroes();
SuperVector result; SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sr(u.s8x16[0], vec_splats((uint8_t)n))}; }); Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sr(v->u.s8x16[0], vec_splats((uint8_t)n))}; });
return result; return result;
} }
@ -418,7 +408,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_16 (uint8_t const N) const
if (N == 0) return *this; if (N == 0) return *this;
if (N == 16) return Zeroes(); if (N == 16) return Zeroes();
SuperVector result; SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sr(u.s16x8[0], vec_splats((uint16_t)n))}; }); Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sr(v->u.s16x8[0], vec_splats((uint16_t)n))}; });
return result; return result;
} }
@ -428,7 +418,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_32 (uint8_t const N) const
if (N == 0) return *this; if (N == 0) return *this;
if (N == 16) return Zeroes(); if (N == 16) return Zeroes();
SuperVector result; SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sr(u.s32x4[0], vec_splats((uint32_t)n))}; }); Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sr(v->u.s32x4[0], vec_splats((uint32_t)n))}; });
return result; return result;
} }
@ -438,7 +428,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_64 (uint8_t const N) const
if (N == 0) return *this; if (N == 0) return *this;
if (N == 16) return Zeroes(); if (N == 16) return Zeroes();
SuperVector result; SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sr(u.s64x2[0], vec_splats((uint64_t)n))}; }); Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sr(v->u.s64x2[0], vec_splats((ulong64_t)n))}; });
return result; return result;
} }
@ -448,7 +438,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_128(uint8_t const UNUSED N)
if (N == 0) return *this; if (N == 0) return *this;
if (N == 16) return Zeroes(); if (N == 16) return Zeroes();
SuperVector result; SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sld((int8x16_t)vec_splat_u8(0), u.s8x16[0], 16 - n)}; }); Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sld((int8x16_t)vec_splat_u8(0), v->u.s8x16[0], 16 - n)}; });
return result; return result;
} }
@ -523,14 +513,14 @@ really_inline SuperVector<16> SuperVector<16>::Ones_vshl(uint8_t const N)
template <> template <>
really_inline SuperVector<16> SuperVector<16>::loadu(void const *ptr) really_inline SuperVector<16> SuperVector<16>::loadu(void const *ptr)
{ {
return (m128) vec_xl(0, (const int64_t*)ptr); return (m128) vec_xl(0, (const long64_t*)ptr);
} }
template <> template <>
really_inline SuperVector<16> SuperVector<16>::load(void const *ptr) really_inline SuperVector<16> SuperVector<16>::load(void const *ptr)
{ {
assert(ISALIGNED_N(ptr, alignof(SuperVector::size))); assert(ISALIGNED_N(ptr, alignof(SuperVector::size)));
return (m128) vec_xl(0, (const int64_t*)ptr); return (m128) vec_xl(0, (const long64_t*)ptr);
} }
template <> template <>

View File

@ -27,6 +27,18 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
*/ */
typedef __vector unsigned long long int uint64x2_t;
typedef __vector signed long long int int64x2_t;
typedef __vector unsigned int uint32x4_t;
typedef __vector signed int int32x4_t;
typedef __vector unsigned short int uint16x8_t;
typedef __vector signed short int int16x8_t;
typedef __vector unsigned char uint8x16_t;
typedef __vector signed char int8x16_t;
typedef unsigned long long int ulong64_t;
typedef signed long long int long64_t;
#if !defined(m128) && defined(HAVE_VSX) #if !defined(m128) && defined(HAVE_VSX)
typedef __vector int32_t m128; typedef __vector int m128;
#endif #endif

View File

@ -55,56 +55,56 @@ really_inline SuperVector<16>::SuperVector(typename base_type::type const v)
template<> template<>
template<> template<>
really_inline SuperVector<16>::SuperVector<int8_t>(int8_t const other) really_inline SuperVector<16>::SuperVector(int8_t const other)
{ {
u.v128[0] = _mm_set1_epi8(other); u.v128[0] = _mm_set1_epi8(other);
} }
template<> template<>
template<> template<>
really_inline SuperVector<16>::SuperVector<uint8_t>(uint8_t const other) really_inline SuperVector<16>::SuperVector(uint8_t const other)
{ {
u.v128[0] = _mm_set1_epi8(static_cast<int8_t>(other)); u.v128[0] = _mm_set1_epi8(static_cast<int8_t>(other));
} }
template<> template<>
template<> template<>
really_inline SuperVector<16>::SuperVector<int16_t>(int16_t const other) really_inline SuperVector<16>::SuperVector(int16_t const other)
{ {
u.v128[0] = _mm_set1_epi16(other); u.v128[0] = _mm_set1_epi16(other);
} }
template<> template<>
template<> template<>
really_inline SuperVector<16>::SuperVector<uint16_t>(uint16_t const other) really_inline SuperVector<16>::SuperVector(uint16_t const other)
{ {
u.v128[0] = _mm_set1_epi16(static_cast<int16_t>(other)); u.v128[0] = _mm_set1_epi16(static_cast<int16_t>(other));
} }
template<> template<>
template<> template<>
really_inline SuperVector<16>::SuperVector<int32_t>(int32_t const other) really_inline SuperVector<16>::SuperVector(int32_t const other)
{ {
u.v128[0] = _mm_set1_epi32(other); u.v128[0] = _mm_set1_epi32(other);
} }
template<> template<>
template<> template<>
really_inline SuperVector<16>::SuperVector<uint32_t>(uint32_t const other) really_inline SuperVector<16>::SuperVector(uint32_t const other)
{ {
u.v128[0] = _mm_set1_epi32(static_cast<int32_t>(other)); u.v128[0] = _mm_set1_epi32(static_cast<int32_t>(other));
} }
template<> template<>
template<> template<>
really_inline SuperVector<16>::SuperVector<int64_t>(int64_t const other) really_inline SuperVector<16>::SuperVector(int64_t const other)
{ {
u.v128[0] = _mm_set1_epi64x(other); u.v128[0] = _mm_set1_epi64x(other);
} }
template<> template<>
template<> template<>
really_inline SuperVector<16>::SuperVector<uint64_t>(uint64_t const other) really_inline SuperVector<16>::SuperVector(uint64_t const other)
{ {
u.v128[0] = _mm_set1_epi64x(static_cast<int64_t>(other)); u.v128[0] = _mm_set1_epi64x(static_cast<int64_t>(other));
} }
@ -608,56 +608,56 @@ really_inline SuperVector<32>::SuperVector(SuperVector<16> const lo, SuperVector
template<> template<>
template<> template<>
really_inline SuperVector<32>::SuperVector<int8_t>(int8_t const other) really_inline SuperVector<32>::SuperVector(int8_t const other)
{ {
u.v256[0] = _mm256_set1_epi8(other); u.v256[0] = _mm256_set1_epi8(other);
} }
template<> template<>
template<> template<>
really_inline SuperVector<32>::SuperVector<uint8_t>(uint8_t const other) really_inline SuperVector<32>::SuperVector(uint8_t const other)
{ {
u.v256[0] = _mm256_set1_epi8(static_cast<int8_t>(other)); u.v256[0] = _mm256_set1_epi8(static_cast<int8_t>(other));
} }
template<> template<>
template<> template<>
really_inline SuperVector<32>::SuperVector<int16_t>(int16_t const other) really_inline SuperVector<32>::SuperVector(int16_t const other)
{ {
u.v256[0] = _mm256_set1_epi16(other); u.v256[0] = _mm256_set1_epi16(other);
} }
template<> template<>
template<> template<>
really_inline SuperVector<32>::SuperVector<uint16_t>(uint16_t const other) really_inline SuperVector<32>::SuperVector(uint16_t const other)
{ {
u.v256[0] = _mm256_set1_epi16(static_cast<int16_t>(other)); u.v256[0] = _mm256_set1_epi16(static_cast<int16_t>(other));
} }
template<> template<>
template<> template<>
really_inline SuperVector<32>::SuperVector<int32_t>(int32_t const other) really_inline SuperVector<32>::SuperVector(int32_t const other)
{ {
u.v256[0] = _mm256_set1_epi32(other); u.v256[0] = _mm256_set1_epi32(other);
} }
template<> template<>
template<> template<>
really_inline SuperVector<32>::SuperVector<uint32_t>(uint32_t const other) really_inline SuperVector<32>::SuperVector(uint32_t const other)
{ {
u.v256[0] = _mm256_set1_epi32(static_cast<int32_t>(other)); u.v256[0] = _mm256_set1_epi32(static_cast<int32_t>(other));
} }
template<> template<>
template<> template<>
really_inline SuperVector<32>::SuperVector<int64_t>(int64_t const other) really_inline SuperVector<32>::SuperVector(int64_t const other)
{ {
u.v256[0] = _mm256_set1_epi64x(other); u.v256[0] = _mm256_set1_epi64x(other);
} }
template<> template<>
template<> template<>
really_inline SuperVector<32>::SuperVector<uint64_t>(uint64_t const other) really_inline SuperVector<32>::SuperVector(uint64_t const other)
{ {
u.v256[0] = _mm256_set1_epi64x(static_cast<int64_t>(other)); u.v256[0] = _mm256_set1_epi64x(static_cast<int64_t>(other));
} }
@ -804,7 +804,7 @@ really_inline SuperVector<32> SuperVector<32>::vshl_128_imm() const
template <> template <>
template<uint8_t N> template<uint8_t N>
really_inline SuperVector<16> SuperVector<32>::vshl_256_imm() const really_inline SuperVector<32> SuperVector<32>::vshl_256_imm() const
{ {
if (N == 0) return *this; if (N == 0) return *this;
if (N == 16) return {_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0))}; if (N == 16) return {_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0))};
@ -950,11 +950,11 @@ really_inline SuperVector<32> SuperVector<32>::vshl_256(uint8_t const N) const
SuperVector result; SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { Unroller<1, 16>::iterator([&,v=this](auto const i) {
constexpr uint8_t n = i.value; constexpr uint8_t n = i.value;
if (N == n) result = {_mm256_alignr_epi8(u.v256[0], _mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), 16 - n)};; if (N == n) result = {_mm256_alignr_epi8(u.v256[0], _mm256_permute2x128_si256(v->u.v256[0], v->u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), 16 - n)};;
}); });
Unroller<17, 32>::iterator([&,v=this](auto const i) { Unroller<17, 32>::iterator([&,v=this](auto const i) {
constexpr uint8_t n = i.value; constexpr uint8_t n = i.value;
if (N == n) result = {_mm256_slli_si256(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), n - 16)}; if (N == n) result = {_mm256_slli_si256(_mm256_permute2x128_si256(v->u.v256[0], v->u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), n - 16)};
}); });
return result; return result;
} }
@ -1240,56 +1240,56 @@ really_inline SuperVector<64>::SuperVector(m128 const v)
template<> template<>
template<> template<>
really_inline SuperVector<64>::SuperVector<int8_t>(int8_t const o) really_inline SuperVector<64>::SuperVector(int8_t const o)
{ {
u.v512[0] = _mm512_set1_epi8(o); u.v512[0] = _mm512_set1_epi8(o);
} }
template<> template<>
template<> template<>
really_inline SuperVector<64>::SuperVector<uint8_t>(uint8_t const o) really_inline SuperVector<64>::SuperVector(uint8_t const o)
{ {
u.v512[0] = _mm512_set1_epi8(static_cast<int8_t>(o)); u.v512[0] = _mm512_set1_epi8(static_cast<int8_t>(o));
} }
template<> template<>
template<> template<>
really_inline SuperVector<64>::SuperVector<int16_t>(int16_t const o) really_inline SuperVector<64>::SuperVector(int16_t const o)
{ {
u.v512[0] = _mm512_set1_epi16(o); u.v512[0] = _mm512_set1_epi16(o);
} }
template<> template<>
template<> template<>
really_inline SuperVector<64>::SuperVector<uint16_t>(uint16_t const o) really_inline SuperVector<64>::SuperVector(uint16_t const o)
{ {
u.v512[0] = _mm512_set1_epi16(static_cast<int16_t>(o)); u.v512[0] = _mm512_set1_epi16(static_cast<int16_t>(o));
} }
template<> template<>
template<> template<>
really_inline SuperVector<64>::SuperVector<int32_t>(int32_t const o) really_inline SuperVector<64>::SuperVector(int32_t const o)
{ {
u.v512[0] = _mm512_set1_epi32(o); u.v512[0] = _mm512_set1_epi32(o);
} }
template<> template<>
template<> template<>
really_inline SuperVector<64>::SuperVector<uint32_t>(uint32_t const o) really_inline SuperVector<64>::SuperVector(uint32_t const o)
{ {
u.v512[0] = _mm512_set1_epi32(static_cast<int32_t>(o)); u.v512[0] = _mm512_set1_epi32(static_cast<int32_t>(o));
} }
template<> template<>
template<> template<>
really_inline SuperVector<64>::SuperVector<int64_t>(int64_t const o) really_inline SuperVector<64>::SuperVector(int64_t const o)
{ {
u.v512[0] = _mm512_set1_epi64(o); u.v512[0] = _mm512_set1_epi64(o);
} }
template<> template<>
template<> template<>
really_inline SuperVector<64>::SuperVector<uint64_t>(uint64_t const o) really_inline SuperVector<64>::SuperVector(uint64_t const o)
{ {
u.v512[0] = _mm512_set1_epi64(static_cast<int64_t>(o)); u.v512[0] = _mm512_set1_epi64(static_cast<int64_t>(o));
} }

View File

@ -165,7 +165,7 @@ public:
typename BaseVector<32>::type ALIGN_ATTR(BaseVector<32>::size) v256[SIZE / BaseVector<32>::size]; typename BaseVector<32>::type ALIGN_ATTR(BaseVector<32>::size) v256[SIZE / BaseVector<32>::size];
typename BaseVector<64>::type ALIGN_ATTR(BaseVector<64>::size) v512[SIZE / BaseVector<64>::size]; typename BaseVector<64>::type ALIGN_ATTR(BaseVector<64>::size) v512[SIZE / BaseVector<64>::size];
#if defined(ARCH_ARM32) || defined(ARCH_AARCH64) #if defined(ARCH_ARM32) || defined(ARCH_AARCH64) || defined(ARCH_PPC64EL)
uint64x2_t ALIGN_ATTR(BaseVector<16>::size) u64x2[SIZE / BaseVector<16>::size]; uint64x2_t ALIGN_ATTR(BaseVector<16>::size) u64x2[SIZE / BaseVector<16>::size];
int64x2_t ALIGN_ATTR(BaseVector<16>::size) s64x2[SIZE / BaseVector<16>::size]; int64x2_t ALIGN_ATTR(BaseVector<16>::size) s64x2[SIZE / BaseVector<16>::size];
uint32x4_t ALIGN_ATTR(BaseVector<16>::size) u32x4[SIZE / BaseVector<16>::size]; uint32x4_t ALIGN_ATTR(BaseVector<16>::size) u32x4[SIZE / BaseVector<16>::size];
@ -176,17 +176,6 @@ public:
int8x16_t ALIGN_ATTR(BaseVector<16>::size) s8x16[SIZE / BaseVector<16>::size]; int8x16_t ALIGN_ATTR(BaseVector<16>::size) s8x16[SIZE / BaseVector<16>::size];
#endif #endif
#if defined(ARCH_PPC64EL)
__vector uint64_t ALIGN_ATTR(BaseVector<16>::size) u64x2[SIZE / BaseVector<16>::size];
__vector int64_t ALIGN_ATTR(BaseVector<16>::size) s64x2[SIZE / BaseVector<16>::size];
__vector uint32_t ALIGN_ATTR(BaseVector<16>::size) u32x4[SIZE / BaseVector<16>::size];
__vector int32_t ALIGN_ATTR(BaseVector<16>::size) s32x4[SIZE / BaseVector<16>::size];
__vector uint16_t ALIGN_ATTR(BaseVector<16>::size) u16x8[SIZE / BaseVector<16>::size];
__vector int16_t ALIGN_ATTR(BaseVector<16>::size) s16x8[SIZE / BaseVector<16>::size];
__vector uint8_t ALIGN_ATTR(BaseVector<16>::size) u8x16[SIZE / BaseVector<16>::size];
__vector int8_t ALIGN_ATTR(BaseVector<16>::size) s8x16[SIZE / BaseVector<16>::size];
#endif
uint64_t u64[SIZE / sizeof(uint64_t)]; uint64_t u64[SIZE / sizeof(uint64_t)];
int64_t s64[SIZE / sizeof(int64_t)]; int64_t s64[SIZE / sizeof(int64_t)];
uint32_t u32[SIZE / sizeof(uint32_t)]; uint32_t u32[SIZE / sizeof(uint32_t)];
@ -200,7 +189,7 @@ public:
} u; } u;
constexpr SuperVector() {}; constexpr SuperVector() {};
constexpr SuperVector(SuperVector const &other) SuperVector(SuperVector const &other)
:u(other.u) {}; :u(other.u) {};
SuperVector(typename base_type::type const v); SuperVector(typename base_type::type const v);

View File

@ -667,7 +667,7 @@ TEST(SimdUtilsTest, movq) {
simd = _mm_set_epi64x(~0LL, 0x123456789abcdef); simd = _mm_set_epi64x(~0LL, 0x123456789abcdef);
#elif defined(ARCH_ARM32) || defined(ARCH_AARCH64) #elif defined(ARCH_ARM32) || defined(ARCH_AARCH64)
int64x2_t a = { 0x123456789abcdefLL, ~0LL }; int64x2_t a = { 0x123456789abcdefLL, ~0LL };
simd = vreinterpretq_s64_s8(a); simd = vreinterpretq_s32_s64(a);
#elif defined(ARCH_PPC64EL) #elif defined(ARCH_PPC64EL)
int64x2_t a = {0x123456789abcdefLL, ~0LL }; int64x2_t a = {0x123456789abcdefLL, ~0LL };
simd = (m128) a; simd = (m128) a;

View File

@ -33,9 +33,6 @@ SET(corpusomatic_SRCS
ng_find_matches.cpp ng_find_matches.cpp
) )
add_library(corpusomatic STATIC ${corpusomatic_SRCS}) add_library(corpusomatic STATIC ${corpusomatic_SRCS})
if (ARCH_IA32 OR ARCH_X86_64)
set_target_properties(corpusomatic PROPERTIES COMPILE_FLAGS "-mssse3")
endif ()
set(databaseutil_SRCS set(databaseutil_SRCS
database_util.cpp database_util.cpp