mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Merge pull request #81 from VectorCamp/feature/add-clang-support
Feature/add clang support
This commit is contained in:
commit
1718e33544
324
CMakeLists.txt
324
CMakeLists.txt
@ -3,7 +3,7 @@ project (vectorscan C CXX)
|
||||
|
||||
set (HS_MAJOR_VERSION 5)
|
||||
set (HS_MINOR_VERSION 4)
|
||||
set (HS_PATCH_VERSION 3)
|
||||
set (HS_PATCH_VERSION 5)
|
||||
set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION})
|
||||
|
||||
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
|
||||
@ -128,11 +128,9 @@ CMAKE_DEPENDENT_OPTION(DUMP_SUPPORT "Dump code support; normally on, except in r
|
||||
|
||||
CMAKE_DEPENDENT_OPTION(DISABLE_ASSERTS "Disable assert(); Asserts are enabled in debug builds, disabled in release builds" OFF "NOT RELEASE_BUILD" ON)
|
||||
|
||||
option(BUILD_AVX512 "Experimental: support avx512 in the fat runtime"
|
||||
OFF)
|
||||
option(BUILD_AVX512 "Experimental: support avx512 in the fat runtime" OFF)
|
||||
|
||||
option(BUILD_AVX512VBMI "Experimental: support avx512vbmi in the fat runtime"
|
||||
OFF)
|
||||
option(BUILD_AVX512VBMI "Experimental: support avx512vbmi in the fat runtime" OFF)
|
||||
|
||||
if (BUILD_AVX512VBMI)
|
||||
set(BUILD_AVX512 ON)
|
||||
@ -140,47 +138,95 @@ endif ()
|
||||
|
||||
# TODO: per platform config files?
|
||||
|
||||
# remove CMake's idea of optimisation
|
||||
foreach (CONFIG ${CMAKE_BUILD_TYPE} ${CMAKE_CONFIGURATION_TYPES})
|
||||
string(REGEX REPLACE "-O[^ ]*" "" CMAKE_C_FLAGS_${CONFIG} "${CMAKE_C_FLAGS_${CONFIG}}")
|
||||
string(REGEX REPLACE "-O[^ ]*" "" CMAKE_CXX_FLAGS_${CONFIG} "${CMAKE_CXX_FLAGS_${CONFIG}}")
|
||||
endforeach ()
|
||||
# remove CMake's idea of optimisation
|
||||
foreach (CONFIG ${CMAKE_BUILD_TYPE} ${CMAKE_CONFIGURATION_TYPES})
|
||||
string(REGEX REPLACE "-O[^ ]*" "" CMAKE_C_FLAGS_${CONFIG} "${CMAKE_C_FLAGS_${CONFIG}}")
|
||||
string(REGEX REPLACE "-O[^ ]*" "" CMAKE_CXX_FLAGS_${CONFIG} "${CMAKE_CXX_FLAGS_${CONFIG}}")
|
||||
endforeach ()
|
||||
|
||||
if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE_AARCH64 AND NOT ARCH_PPC64EL)
|
||||
message(STATUS "gcc version ${CMAKE_C_COMPILER_VERSION}")
|
||||
# If gcc doesn't recognise the host cpu, then mtune=native becomes
|
||||
# generic, which isn't very good in some cases. march=native looks at
|
||||
# cpuid info and then chooses the best microarch it can (and replaces
|
||||
# the flag), so use that for tune.
|
||||
if (CMAKE_C_COMPILER_ID MATCHES "Intel")
|
||||
set(SKYLAKE_FLAG "-xCORE-AVX512")
|
||||
else ()
|
||||
set(SKYLAKE_FLAG "-march=skylake-avx512")
|
||||
set(ICELAKE_FLAG "-march=icelake-server")
|
||||
endif ()
|
||||
|
||||
# arg1 might exist if using ccache
|
||||
string (STRIP "${CMAKE_C_COMPILER_ARG1}" CC_ARG1)
|
||||
set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -march=native -mtune=native)
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS}
|
||||
OUTPUT_VARIABLE _GCC_OUTPUT)
|
||||
string(FIND "${_GCC_OUTPUT}" "march" POS)
|
||||
string(SUBSTRING "${_GCC_OUTPUT}" ${POS} -1 _GCC_OUTPUT)
|
||||
string(REGEX REPLACE "march=[ \t]*([^ \n]*)[ \n].*" "\\1"
|
||||
GNUCC_ARCH "${_GCC_OUTPUT}")
|
||||
if(ARCH_PPC64EL)
|
||||
set(ARCH_FLAG mcpu)
|
||||
else()
|
||||
set(ARCH_FLAG march)
|
||||
endif()
|
||||
|
||||
if (ARCH_IA32 OR ARCH_X86_64)
|
||||
# test the parsed flag
|
||||
set (EXEC_ARGS ${CC_ARG1} -E - -mtune=${GNUCC_ARCH})
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS}
|
||||
OUTPUT_QUIET ERROR_QUIET
|
||||
INPUT_FILE /dev/null
|
||||
RESULT_VARIABLE GNUCC_TUNE_TEST)
|
||||
if (NOT GNUCC_TUNE_TEST EQUAL 0)
|
||||
message(SEND_ERROR "Something went wrong determining gcc tune: -mtune=${GNUCC_ARCH} not valid")
|
||||
endif()
|
||||
set(TUNE_FLAG ${GNUCC_ARCH})
|
||||
else()
|
||||
set(TUNE_FLAG native)
|
||||
endif()
|
||||
elseif (NOT TUNE_FLAG)
|
||||
# Detect best GNUCC_ARCH to tune for
|
||||
if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE)
|
||||
message(STATUS "gcc version ${CMAKE_C_COMPILER_VERSION}")
|
||||
|
||||
# If gcc doesn't recognise the host cpu, then mtune=native becomes
|
||||
# generic, which isn't very good in some cases. march=native looks at
|
||||
# cpuid info and then chooses the best microarch it can (and replaces
|
||||
# the flag), so use that for tune.
|
||||
|
||||
# arg1 might exist if using ccache
|
||||
string (STRIP "${CMAKE_C_COMPILER_ARG1}" CC_ARG1)
|
||||
set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -${ARCH_FLAG}=native -mtune=native)
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS}
|
||||
OUTPUT_VARIABLE _GCC_OUTPUT)
|
||||
string(FIND "${_GCC_OUTPUT}" "${ARCH_FLAG}" POS)
|
||||
string(SUBSTRING "${_GCC_OUTPUT}" ${POS} -1 _GCC_OUTPUT)
|
||||
string(REGEX REPLACE "${ARCH_FLAG}=[ \t]*([^ \n]*)[ \n].*" "\\1" GNUCC_ARCH "${_GCC_OUTPUT}")
|
||||
|
||||
# test the parsed flag
|
||||
set (EXEC_ARGS ${CC_ARG1} -E - -mtune=${GNUCC_ARCH})
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS}
|
||||
OUTPUT_QUIET ERROR_QUIET
|
||||
INPUT_FILE /dev/null
|
||||
RESULT_VARIABLE GNUCC_TUNE_TEST)
|
||||
if (NOT GNUCC_TUNE_TEST EQUAL 0)
|
||||
message(WARNING "Something went wrong determining gcc tune: -mtune=${GNUCC_ARCH} not valid, falling back to -mtune=native")
|
||||
set(TUNE_FLAG native)
|
||||
else()
|
||||
set(TUNE_FLAG ${GNUCC_ARCH})
|
||||
message(STATUS "gcc will tune for ${GNUCC_ARCH}, ${TUNE_FLAG}")
|
||||
endif()
|
||||
elseif (CMAKE_COMPILER_IS_CLANG AND NOT CROSS_COMPILE)
|
||||
if (ARCH_IA32 OR ARCH_X86_64)
|
||||
set(GNUCC_ARCH native)
|
||||
set(TUNE_FLAG generic)
|
||||
elseif(ARCH_AARCH64)
|
||||
set(GNUCC_ARCH armv8)
|
||||
set(TUNE_FLAG generic)
|
||||
elseif(ARCH_ARM32)
|
||||
set(GNUCC_ARCH armv7a)
|
||||
set(TUNE_FLAG generic)
|
||||
else()
|
||||
set(GNUCC_ARCH native)
|
||||
set(TUNE_FLAG generic)
|
||||
endif()
|
||||
message(STATUS "clang will tune for ${GNUCC_ARCH}, ${TUNE_FLAG}")
|
||||
elseif (CROSS_COMPILE)
|
||||
set(GNUCC_ARCH generic)
|
||||
set(TUNE_FLAG generic)
|
||||
endif()
|
||||
|
||||
if (ARCH_IA32 OR ARCH_X86_64)
|
||||
if (NOT FAT_RUNTIME)
|
||||
if (BUILD_AVX512)
|
||||
set(ARCH_C_FLAGS "${SKYLAKE_FLAG}")
|
||||
set(ARCH_CXX_FLAGS "${SKYLAKE_FLAG}")
|
||||
elseif (BUILD_AVX2)
|
||||
set(ARCH_C_FLAGS "-mavx2")
|
||||
set(ARCH_CXX_FLAGS "-mavx2")
|
||||
else()
|
||||
set(ARCH_C_FLAGS "-msse4.2")
|
||||
set(ARCH_CXX_FLAGS "-msse4.2")
|
||||
endif()
|
||||
else()
|
||||
set(ARCH_C_FLAGS "-msse4.2")
|
||||
set(ARCH_CXX_FLAGS "-msse4.2")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (ARCH_AARCH64)
|
||||
if (BUILD_SVE2_BITPERM)
|
||||
set(GNUCC_ARCH "${GNUCC_ARCH}+sve2-bitperm")
|
||||
elseif (BUILD_SVE2)
|
||||
@ -188,92 +234,89 @@ endif ()
|
||||
elseif (BUILD_SVE)
|
||||
set(GNUCC_ARCH "${GNUCC_ARCH}+sve")
|
||||
endif ()
|
||||
endif(ARCH_AARCH64)
|
||||
|
||||
# compiler version checks TODO: test more compilers
|
||||
if (CMAKE_COMPILER_IS_GNUCXX)
|
||||
set(GNUCXX_MINVER "4.8.1")
|
||||
message(STATUS "g++ version ${CMAKE_CXX_COMPILER_VERSION}")
|
||||
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS GNUCXX_MINVER)
|
||||
message(FATAL_ERROR "A minimum of g++ ${GNUCXX_MINVER} is required for C++11 support")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(RELEASE_BUILD)
|
||||
if (NOT CMAKE_BUILD_TYPE MATCHES MINSIZEREL)
|
||||
set(OPT_C_FLAG "-O3")
|
||||
set(OPT_CXX_FLAG "-O3")
|
||||
else ()
|
||||
set(OPT_C_FLAG "-Os")
|
||||
set(OPT_CXX_FLAG "-Os")
|
||||
endif ()
|
||||
else()
|
||||
set(OPT_C_FLAG "-O0")
|
||||
set(OPT_CXX_FLAG "-O0")
|
||||
endif(RELEASE_BUILD)
|
||||
|
||||
# set compiler flags - more are tested and added later
|
||||
set(EXTRA_C_FLAGS "${OPT_C_FLAG} -std=c17 -Wall -Wextra -Wshadow -Wcast-qual -fno-strict-aliasing")
|
||||
set(EXTRA_CXX_FLAGS "${OPT_CXX_FLAG} -std=c++17 -Wall -Wextra -Wshadow -Wswitch -Wreturn-type -Wcast-qual -Wno-deprecated -Wnon-virtual-dtor -fno-strict-aliasing -fno-new-ttp-matching")
|
||||
|
||||
if (NOT RELEASE_BUILD)
|
||||
# -Werror is most useful during development, don't potentially break
|
||||
# release builds
|
||||
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Werror")
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Werror")
|
||||
endif()
|
||||
|
||||
if (DISABLE_ASSERTS)
|
||||
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -DNDEBUG")
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -DNDEBUG")
|
||||
endif()
|
||||
set(ARCH_C_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -mtune=${TUNE_FLAG} ${ARCH_C_FLAGS}")
|
||||
set(ARCH_CXX_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -mtune=${TUNE_FLAG} ${ARCH_CXX_FLAGS}")
|
||||
|
||||
#if (ARCH_IA32 OR ARCH_X86_64 OR ARCH_ARM32 OR ARCH_AARCH64)
|
||||
# if (NOT CMAKE_C_FLAGS MATCHES .*march.* AND NOT CMAKE_C_FLAGS MATCHES .*mtune.*)
|
||||
# set(ARCH_C_FLAGS "-march=${GNUCC_ARCH} -mtune=${TUNE_FLAG}")
|
||||
# endif()
|
||||
# if (NOT CMAKE_CXX_FLAGS MATCHES .*march.* AND NOT CMAKE_CXX_FLAGS MATCHES .*mtune.*)
|
||||
# set(ARCH_CXX_FLAGS "-march=${GNUCC_ARCH} -mtune=${TUNE_FLAG}")
|
||||
# endif()
|
||||
#endif()
|
||||
|
||||
if (ARCH_IA32 OR ARCH_X86_64 OR ARCH_ARM32 OR ARCH_AARCH64)
|
||||
if (NOT CMAKE_C_FLAGS MATCHES .*march.* AND NOT CMAKE_C_FLAGS MATCHES .*mtune.*)
|
||||
set(ARCH_C_FLAGS "-march=${GNUCC_ARCH} -mtune=${TUNE_FLAG}")
|
||||
endif()
|
||||
|
||||
if (NOT CMAKE_CXX_FLAGS MATCHES .*march.* AND NOT CMAKE_CXX_FLAGS MATCHES .*mtune.*)
|
||||
set(ARCH_CXX_FLAGS "-march=${GNUCC_ARCH} -mtune=${TUNE_FLAG}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(ARCH_PPC64EL)
|
||||
if (NOT CMAKE_C_FLAGS MATCHES .*march.* AND NOT CMAKE_C_FLAGS MATCHES .*mtune.*)
|
||||
set(ARCH_C_FLAGS "-mtune=${TUNE_FLAG}")
|
||||
endif()
|
||||
if (NOT CMAKE_CXX_FLAGS MATCHES .*march.* AND NOT CMAKE_CXX_FLAGS MATCHES .*mtune.*)
|
||||
set(ARCH_CXX_FLAGS "-mtune=${TUNE_FLAG}")
|
||||
endif()
|
||||
endif()
|
||||
#if(ARCH_PPC64EL)
|
||||
# if (NOT CMAKE_C_FLAGS MATCHES .*march.* AND NOT CMAKE_C_FLAGS MATCHES .*mtune.*)
|
||||
# set(ARCH_C_FLAGS "-mtune=${TUNE_FLAG}")
|
||||
# endif()
|
||||
# if (NOT CMAKE_CXX_FLAGS MATCHES .*march.* AND NOT CMAKE_CXX_FLAGS MATCHES .*mtune.*)
|
||||
# set(ARCH_CXX_FLAGS "-mtune=${TUNE_FLAG}")
|
||||
# endif()
|
||||
#endif()
|
||||
|
||||
if(CMAKE_COMPILER_IS_GNUCC)
|
||||
# spurious warnings?
|
||||
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-array-bounds -Wno-maybe-uninitialized")
|
||||
# compiler version checks TODO: test more compilers
|
||||
if (CMAKE_COMPILER_IS_GNUCXX)
|
||||
set(GNUCXX_MINVER "9")
|
||||
message(STATUS "g++ version ${CMAKE_CXX_COMPILER_VERSION}")
|
||||
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS GNUCXX_MINVER)
|
||||
message(FATAL_ERROR "A minimum of g++ ${GNUCXX_MINVER} is required for C++17 support")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(CMAKE_COMPILER_IS_GNUCXX)
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-maybe-uninitialized")
|
||||
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.0)
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fabi-version=0")
|
||||
endif ()
|
||||
# don't complain about abi
|
||||
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-abi")
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-abi")
|
||||
endif()
|
||||
|
||||
if (NOT(ARCH_IA32 AND RELEASE_BUILD))
|
||||
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -fno-omit-frame-pointer")
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fno-omit-frame-pointer")
|
||||
endif()
|
||||
|
||||
|
||||
if (CMAKE_C_COMPILER_ID MATCHES "Intel")
|
||||
set(SKYLAKE_FLAG "-xCORE-AVX512")
|
||||
if(RELEASE_BUILD)
|
||||
if (NOT CMAKE_BUILD_TYPE MATCHES MINSIZEREL)
|
||||
set(OPT_C_FLAG "-O3")
|
||||
set(OPT_CXX_FLAG "-O3")
|
||||
else ()
|
||||
set(SKYLAKE_FLAG "-march=skylake-avx512")
|
||||
set(ICELAKE_FLAG "-march=icelake-server")
|
||||
set(OPT_C_FLAG "-Os")
|
||||
set(OPT_CXX_FLAG "-Os")
|
||||
endif ()
|
||||
else()
|
||||
set(OPT_C_FLAG "-O0")
|
||||
set(OPT_CXX_FLAG "-O0")
|
||||
endif(RELEASE_BUILD)
|
||||
|
||||
# set compiler flags - more are tested and added later
|
||||
set(EXTRA_C_FLAGS "${OPT_C_FLAG} -std=c17 -Wall -Wextra -Wshadow -Wcast-qual -fno-strict-aliasing")
|
||||
set(EXTRA_CXX_FLAGS "${OPT_CXX_FLAG} -std=c++17 -Wall -Wextra -Wshadow -Wswitch -Wreturn-type -Wcast-qual -Wno-deprecated -Wnon-virtual-dtor -fno-strict-aliasing")
|
||||
if (NOT CMAKE_COMPILER_IS_CLANG)
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fno-new-ttp-matching")
|
||||
endif()
|
||||
|
||||
if (NOT RELEASE_BUILD)
|
||||
# -Werror is most useful during development, don't potentially break
|
||||
# release builds
|
||||
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Werror")
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Werror")
|
||||
endif()
|
||||
|
||||
if (DISABLE_ASSERTS)
|
||||
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -DNDEBUG")
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -DNDEBUG")
|
||||
endif()
|
||||
|
||||
if(CMAKE_COMPILER_IS_GNUCC)
|
||||
# spurious warnings?
|
||||
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-array-bounds -Wno-maybe-uninitialized")
|
||||
endif()
|
||||
|
||||
if(CMAKE_COMPILER_IS_GNUCXX)
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-maybe-uninitialized")
|
||||
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.0)
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fabi-version=0")
|
||||
endif ()
|
||||
# don't complain about abi
|
||||
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-abi")
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-abi")
|
||||
endif()
|
||||
|
||||
if (NOT(ARCH_IA32 AND RELEASE_BUILD))
|
||||
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -fno-omit-frame-pointer")
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fno-omit-frame-pointer")
|
||||
endif()
|
||||
|
||||
CHECK_INCLUDE_FILES(unistd.h HAVE_UNISTD_H)
|
||||
if (ARCH_IA32 OR ARCH_X86_64)
|
||||
@ -289,8 +332,6 @@ elseif (ARCH_ARM32 OR ARCH_AARCH64)
|
||||
message(FATAL_ERROR "arm_sve.h is required to build for SVE.")
|
||||
endif()
|
||||
endif()
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -flax-vector-conversions")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -flax-vector-conversions")
|
||||
elseif (ARCH_PPC64EL)
|
||||
CHECK_INCLUDE_FILE_CXX(altivec.h HAVE_C_PPC64EL_ALTIVEC_H)
|
||||
endif()
|
||||
@ -318,8 +359,7 @@ if (CMAKE_SYSTEM_NAME MATCHES "Linux")
|
||||
# This is a Linux-only feature for now - requires platform support
|
||||
# elsewhere
|
||||
message(STATUS "generator is ${CMAKE_GENERATOR}")
|
||||
if (CMAKE_C_COMPILER_ID MATCHES "Clang" AND
|
||||
CMAKE_C_COMPILER_VERSION VERSION_LESS "3.9")
|
||||
if (CMAKE_C_COMPILER_IS_CLANG AND CMAKE_C_COMPILER_VERSION VERSION_LESS "3.9")
|
||||
message (STATUS "Clang v3.9 or higher required for fat runtime, cannot build fat runtime")
|
||||
set (FAT_RUNTIME_REQUISITES FALSE)
|
||||
elseif (NOT (CMAKE_GENERATOR MATCHES "Unix Makefiles" OR
|
||||
@ -343,7 +383,10 @@ include (${CMAKE_MODULE_PATH}/arch.cmake)
|
||||
# testing a builtin takes a little more work
|
||||
CHECK_C_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CC_BUILTIN_ASSUME_ALIGNED)
|
||||
CHECK_CXX_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CXX_BUILTIN_ASSUME_ALIGNED)
|
||||
CHECK_C_SOURCE_COMPILES("int main(void) { __builtin_constant_p(0); }" HAVE__BUILTIN_CONSTANT_P)
|
||||
# Clang does not use __builtin_constant_p() the same way as gcc
|
||||
if (NOT CMAKE_COMPILER_IS_CLANG)
|
||||
CHECK_C_SOURCE_COMPILES("int main(void) { __builtin_constant_p(0); }" HAVE__BUILTIN_CONSTANT_P)
|
||||
endif()
|
||||
|
||||
set(C_FLAGS_TO_CHECK
|
||||
# Variable length arrays are way bad, most especially at run time
|
||||
@ -442,19 +485,22 @@ if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
|
||||
set(FREEBSD true)
|
||||
endif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
|
||||
|
||||
if (NOT FAT_RUNTIME)
|
||||
if (CROSS_COMPILE_AARCH64)
|
||||
|
||||
if (FAT_RUNTIME)
|
||||
if (NOT (ARCH_IA32 OR ARCH_X86_64))
|
||||
message(FATAL_ERROR "Fat runtime is not supported on non-Intel architectures")
|
||||
else()
|
||||
message(STATUS "Building runtime for multiple microarchitectures")
|
||||
endif()
|
||||
else()
|
||||
if (CROSS_COMPILE)
|
||||
message(STATUS "Building for target CPU: ${ARCH_C_FLAGS}")
|
||||
else()
|
||||
message(STATUS "Building for current host CPU: ${ARCH_C_FLAGS}")
|
||||
endif()
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARCH_C_FLAGS}")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_CXX_FLAGS}")
|
||||
else()
|
||||
message(STATUS "Building runtime for multiple microarchitectures")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
||||
endif()
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARCH_C_FLAGS}")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_CXX_FLAGS}")
|
||||
|
||||
add_subdirectory(util)
|
||||
add_subdirectory(doc/dev-reference)
|
||||
@ -1171,10 +1217,6 @@ if (NOT FAT_RUNTIME)
|
||||
set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C)
|
||||
|
||||
add_library(hs_compile OBJECT ${hs_compile_SRCS})
|
||||
if (ARCH_IA32)
|
||||
set_target_properties(hs_compile PROPERTIES COMPILE_FLAGS "-mssse3")
|
||||
endif (ARCH_IA32)
|
||||
|
||||
add_library(hs STATIC
|
||||
src/hs_version.c
|
||||
src/hs_valid_platform.c
|
||||
@ -1205,14 +1247,14 @@ else (FAT_RUNTIME)
|
||||
add_library(hs_exec_core2 OBJECT ${hs_exec_SRCS})
|
||||
list(APPEND RUNTIME_LIBS $<TARGET_OBJECTS:hs_exec_core2>)
|
||||
set_target_properties(hs_exec_core2 PROPERTIES
|
||||
COMPILE_FLAGS "-march=core2"
|
||||
COMPILE_FLAGS "-march=core2 -msse4.2"
|
||||
RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} core2 ${CMAKE_MODULE_PATH}/keep.syms.in"
|
||||
)
|
||||
|
||||
add_library(hs_exec_corei7 OBJECT ${hs_exec_SRCS})
|
||||
list(APPEND RUNTIME_LIBS $<TARGET_OBJECTS:hs_exec_corei7>)
|
||||
set_target_properties(hs_exec_corei7 PROPERTIES
|
||||
COMPILE_FLAGS "-march=corei7 -mssse3"
|
||||
COMPILE_FLAGS "-march=corei7 -msse4.2"
|
||||
RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} corei7 ${CMAKE_MODULE_PATH}/keep.syms.in"
|
||||
)
|
||||
|
||||
@ -1254,10 +1296,6 @@ else (FAT_RUNTIME)
|
||||
${RUNTIME_LIBS})
|
||||
set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C)
|
||||
add_library(hs_compile OBJECT ${hs_compile_SRCS})
|
||||
if (ARCH_IA32 OR ARCH_X86_64)
|
||||
set_target_properties(hs_exec_common PROPERTIES COMPILE_FLAGS "-mssse3")
|
||||
set_target_properties(hs_compile PROPERTIES COMPILE_FLAGS "-mssse3")
|
||||
endif ()
|
||||
|
||||
# we want the static lib for testing
|
||||
add_library(hs STATIC src/hs_version.c src/hs_valid_platform.c
|
||||
@ -1274,14 +1312,14 @@ else (FAT_RUNTIME)
|
||||
add_library(hs_exec_shared_core2 OBJECT ${hs_exec_SRCS})
|
||||
list(APPEND RUNTIME_SHLIBS $<TARGET_OBJECTS:hs_exec_shared_core2>)
|
||||
set_target_properties(hs_exec_shared_core2 PROPERTIES
|
||||
COMPILE_FLAGS "-march=core2"
|
||||
COMPILE_FLAGS "-march=core2 -msse4.2"
|
||||
POSITION_INDEPENDENT_CODE TRUE
|
||||
RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} core2 ${CMAKE_MODULE_PATH}/keep.syms.in"
|
||||
)
|
||||
add_library(hs_exec_shared_corei7 OBJECT ${hs_exec_SRCS})
|
||||
list(APPEND RUNTIME_SHLIBS $<TARGET_OBJECTS:hs_exec_shared_corei7>)
|
||||
set_target_properties(hs_exec_shared_corei7 PROPERTIES
|
||||
COMPILE_FLAGS "-march=corei7 -mssse3"
|
||||
COMPILE_FLAGS "-march=corei7 -msse4.2"
|
||||
POSITION_INDEPENDENT_CODE TRUE
|
||||
RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} corei7 ${CMAKE_MODULE_PATH}/keep.syms.in"
|
||||
)
|
||||
|
606
Jenkinsfile
vendored
606
Jenkinsfile
vendored
@ -1,22 +1,590 @@
|
||||
pipeline {
|
||||
agent {
|
||||
node {
|
||||
label 'x86'
|
||||
}
|
||||
|
||||
}
|
||||
stages {
|
||||
stage('Release, SSE') {
|
||||
agent {
|
||||
node {
|
||||
label 'x86'
|
||||
agent none
|
||||
stages {
|
||||
stage("Build") {
|
||||
failFast true
|
||||
parallel {
|
||||
stage("Release/SSE") {
|
||||
agent { label "x86" }
|
||||
stages {
|
||||
stage("Git checkout") {
|
||||
steps {
|
||||
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
|
||||
}
|
||||
}
|
||||
stage("Build") {
|
||||
steps {
|
||||
cmakeBuild buildDir: 'build-release-SSE', buildType: 'Release', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=no -DBUILD_AVX512=no -DFAT_RUNTIME=no', installation: 'InSearchPath', steps: [[args: '--parallel 4', withCmake: true]]
|
||||
}
|
||||
}
|
||||
stage("Unit Test") {
|
||||
steps {
|
||||
sh 'build-release-SSE/bin/unit-internal'
|
||||
}
|
||||
}
|
||||
stage("Test") {
|
||||
steps {
|
||||
sh 'build-release-SSE/bin/unit-hyperscan'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
stage("Release/AVX2") {
|
||||
agent { label "x86" }
|
||||
stages {
|
||||
stage("Git checkout") {
|
||||
steps {
|
||||
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
|
||||
}
|
||||
}
|
||||
stage("Build") {
|
||||
steps {
|
||||
cmakeBuild buildDir: 'build-release-AVX2', buildType: 'Release', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=yes -DBUILD_AVX512=no -DFAT_RUNTIME=no', installation: 'InSearchPath', steps: [[args: '--parallel 4', withCmake: true]]
|
||||
}
|
||||
}
|
||||
stage("Unit Test") {
|
||||
steps {
|
||||
sh 'build-release-AVX2/bin/unit-internal'
|
||||
}
|
||||
}
|
||||
stage("Test") {
|
||||
steps {
|
||||
sh 'build-release-AVX2/bin/unit-hyperscan'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
stage("Release/AVX512") {
|
||||
agent { label "x86" }
|
||||
stages {
|
||||
stage("Git checkout") {
|
||||
steps {
|
||||
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
|
||||
}
|
||||
}
|
||||
stage("Build") {
|
||||
steps {
|
||||
cmakeBuild buildDir: 'build-release-AVX512', buildType: 'Release', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=yes -DBUILD_AVX512=yes -DFAT_RUNTIME=no', installation: 'InSearchPath', steps: [[args: '--parallel 4', withCmake: true]]
|
||||
}
|
||||
}
|
||||
stage("Unit Test") {
|
||||
steps {
|
||||
sh 'build-release-AVX512/bin/unit-internal'
|
||||
}
|
||||
}
|
||||
stage("Test") {
|
||||
steps {
|
||||
sh 'build-release-AVX512/bin/unit-hyperscan'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
stage("Release/FAT") {
|
||||
agent { label "x86" }
|
||||
stages {
|
||||
stage("Git checkout") {
|
||||
steps {
|
||||
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
|
||||
}
|
||||
}
|
||||
stage("Build") {
|
||||
steps {
|
||||
cmakeBuild buildDir: 'build-release-fat', buildType: 'Release', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=yes -DBUILD_AVX512=yes -DFAT_RUNTIME=yes', installation: 'InSearchPath', steps: [[args: '--parallel 4', withCmake: true]]
|
||||
}
|
||||
}
|
||||
stage("Test") {
|
||||
steps {
|
||||
sh 'build-release-fat/bin/unit-hyperscan'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
stage("Debug/SSE") {
|
||||
agent { label "x86" }
|
||||
stages {
|
||||
stage("Git checkout") {
|
||||
steps {
|
||||
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
|
||||
}
|
||||
}
|
||||
stage("Build") {
|
||||
steps {
|
||||
cmakeBuild buildDir: 'build-debug-SSE', buildType: 'Debug', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=no -DBUILD_AVX512=no -DFAT_RUNTIME=no', installation: 'InSearchPath', steps: [[args: '--parallel 4', withCmake: true]]
|
||||
}
|
||||
}
|
||||
stage("Unit Test") {
|
||||
steps {
|
||||
sh 'build-debug-SSE/bin/unit-internal'
|
||||
}
|
||||
}
|
||||
stage("Test") {
|
||||
steps {
|
||||
sh 'build-debug-SSE/bin/unit-hyperscan'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
stage("Debug/AVX2") {
|
||||
agent { label "x86" }
|
||||
stages {
|
||||
stage("Git checkout") {
|
||||
steps {
|
||||
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
|
||||
}
|
||||
}
|
||||
stage("Build") {
|
||||
steps {
|
||||
cmakeBuild buildDir: 'build-debug-AVX2', buildType: 'Debug', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=yes -DBUILD_AVX512=no -DFAT_RUNTIME=no', installation: 'InSearchPath', steps: [[args: '--parallel 4', withCmake: true]]
|
||||
}
|
||||
}
|
||||
stage("Unit Test") {
|
||||
steps {
|
||||
sh 'build-debug-AVX2/bin/unit-internal'
|
||||
}
|
||||
}
|
||||
stage("Test") {
|
||||
steps {
|
||||
sh 'build-debug-AVX2/bin/unit-hyperscan'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
stage("Debug/AVX512") {
|
||||
agent { label "x86" }
|
||||
stages {
|
||||
stage("Git checkout") {
|
||||
steps {
|
||||
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
|
||||
}
|
||||
}
|
||||
stage("Build") {
|
||||
steps {
|
||||
cmakeBuild buildDir: 'build-debug-AVX512', buildType: 'Debug', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=yes -DBUILD_AVX512=yes -DFAT_RUNTIME=no', installation: 'InSearchPath', steps: [[args: '--parallel 4', withCmake: true]]
|
||||
}
|
||||
}
|
||||
stage("Unit Test") {
|
||||
steps {
|
||||
sh 'build-debug-AVX512/bin/unit-internal'
|
||||
}
|
||||
}
|
||||
stage("Test") {
|
||||
steps {
|
||||
sh 'build-debug-AVX512/bin/unit-hyperscan'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
stage("Debug/FAT") {
|
||||
agent { label "x86" }
|
||||
stages {
|
||||
stage("Git checkout") {
|
||||
steps {
|
||||
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
|
||||
}
|
||||
}
|
||||
stage("Build") {
|
||||
steps {
|
||||
cmakeBuild buildDir: 'build-debug-fat', buildType: 'Debug', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=yes -DBUILD_AVX512=yes -DFAT_RUNTIME=yes', installation: 'InSearchPath', steps: [[args: '--parallel 4', withCmake: true]]
|
||||
}
|
||||
}
|
||||
stage("Test") {
|
||||
steps {
|
||||
sh 'build-debug-fat/bin/unit-hyperscan'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
stage("Release/ARM") {
|
||||
agent { label "arm" }
|
||||
stages {
|
||||
stage("Git checkout") {
|
||||
steps {
|
||||
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
|
||||
}
|
||||
}
|
||||
stage("Build") {
|
||||
steps {
|
||||
cmakeBuild buildDir: 'build-release-arm', buildType: 'Release', cleanBuild: true, cmakeArgs: '', installation: 'InSearchPath', steps: [[args: '--parallel 4', withCmake: true]]
|
||||
}
|
||||
}
|
||||
stage("Unit Test") {
|
||||
steps {
|
||||
sh 'build-release-arm/bin/unit-internal'
|
||||
}
|
||||
}
|
||||
stage("Test") {
|
||||
steps {
|
||||
sh 'build-release-arm/bin/unit-hyperscan'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
stage("Debug/ARM") {
|
||||
agent { label "arm" }
|
||||
stages {
|
||||
stage("Git checkout") {
|
||||
steps {
|
||||
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
|
||||
}
|
||||
}
|
||||
stage("Build") {
|
||||
steps {
|
||||
cmakeBuild buildDir: 'build-debug-arm', buildType: 'Debug', cleanBuild: true, cmakeArgs: '', installation: 'InSearchPath', steps: [[args: '--parallel 4', withCmake: true]]
|
||||
}
|
||||
}
|
||||
stage("Unit Test") {
|
||||
steps {
|
||||
sh 'build-debug-arm/bin/unit-internal'
|
||||
}
|
||||
}
|
||||
stage("Test") {
|
||||
steps {
|
||||
sh 'build-debug-arm/bin/unit-hyperscan'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
stage("Release/Power") {
|
||||
agent { label "power" }
|
||||
stages {
|
||||
stage("Git checkout") {
|
||||
steps {
|
||||
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
|
||||
}
|
||||
}
|
||||
stage("Build") {
|
||||
steps {
|
||||
cmakeBuild buildDir: 'build-release-power', buildType: 'Release', cleanBuild: true, cmakeArgs: '', installation: 'InSearchPath', steps: [[args: '--parallel 4', withCmake: true]]
|
||||
}
|
||||
}
|
||||
stage("Unit Test") {
|
||||
steps {
|
||||
sh 'build-release-power/bin/unit-internal'
|
||||
}
|
||||
}
|
||||
stage("Test") {
|
||||
steps {
|
||||
sh 'build-release-power/bin/unit-hyperscan'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
stage("Debug/Power") {
|
||||
agent { label "power" }
|
||||
stages {
|
||||
stage("Git checkout") {
|
||||
steps {
|
||||
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
|
||||
}
|
||||
}
|
||||
stage("Build") {
|
||||
steps {
|
||||
cmakeBuild buildDir: 'build-debug-power', buildType: 'Debug', cleanBuild: true, cmakeArgs: '', installation: 'InSearchPath', steps: [[args: '--parallel 4', withCmake: true]]
|
||||
}
|
||||
}
|
||||
stage("Unit Test") {
|
||||
steps {
|
||||
sh 'build-debug-power/bin/unit-internal'
|
||||
}
|
||||
}
|
||||
stage("Test") {
|
||||
steps {
|
||||
sh 'build-debug-power/bin/unit-hyperscan'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
stage("Clang-Release/SSE") {
|
||||
agent { label "x86" }
|
||||
stages {
|
||||
stage("Git checkout") {
|
||||
steps {
|
||||
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
|
||||
}
|
||||
}
|
||||
stage("Build") {
|
||||
steps {
|
||||
cmakeBuild buildDir: 'build-clang-release-SSE', buildType: 'Release', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=no -DBUILD_AVX512=no -DFAT_RUNTIME=no', installation: 'InSearchPath', steps: [[envVars: 'CC=clang CXX=clang++', args: '--parallel 4', withCmake: true]]
|
||||
}
|
||||
}
|
||||
stage("Unit Test") {
|
||||
steps {
|
||||
sh 'build-clang-release-SSE/bin/unit-internal'
|
||||
}
|
||||
}
|
||||
stage("Test") {
|
||||
steps {
|
||||
sh 'build-clang-release-SSE/bin/unit-hyperscan'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
stage("Clang-Release/AVX2") {
|
||||
agent { label "x86" }
|
||||
stages {
|
||||
stage("Git checkout") {
|
||||
steps {
|
||||
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
|
||||
}
|
||||
}
|
||||
stage("Build") {
|
||||
steps {
|
||||
cmakeBuild buildDir: 'build-clang-release-AVX2', buildType: 'Release', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=yes -DBUILD_AVX512=no -DFAT_RUNTIME=no', installation: 'InSearchPath', steps: [[envVars: 'CC=clang CXX=clang++', args: '--parallel 4', withCmake: true]]
|
||||
}
|
||||
}
|
||||
stage("Unit Test") {
|
||||
steps {
|
||||
sh 'build-clang-release-AVX2/bin/unit-internal'
|
||||
}
|
||||
}
|
||||
stage("Test") {
|
||||
steps {
|
||||
sh 'build-clang-release-AVX2/bin/unit-hyperscan'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
stage("Clang-Release/AVX512") {
|
||||
agent { label "x86" }
|
||||
stages {
|
||||
stage("Git checkout") {
|
||||
steps {
|
||||
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
|
||||
}
|
||||
}
|
||||
stage("Build") {
|
||||
steps {
|
||||
cmakeBuild buildDir: 'build-clang-release-AVX512', buildType: 'Release', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=yes -DBUILD_AVX512=yes -DFAT_RUNTIME=no', installation: 'InSearchPath', steps: [[envVars: 'CC=clang CXX=clang++', args: '--parallel 4', withCmake: true]]
|
||||
}
|
||||
}
|
||||
stage("Unit Test") {
|
||||
steps {
|
||||
sh 'build-clang-release-AVX512/bin/unit-internal'
|
||||
}
|
||||
}
|
||||
stage("Test") {
|
||||
steps {
|
||||
sh 'build-clang-release-AVX512/bin/unit-hyperscan'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
stage("Clang-Release/FAT") {
|
||||
agent { label "x86" }
|
||||
stages {
|
||||
stage("Git checkout") {
|
||||
steps {
|
||||
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
|
||||
}
|
||||
}
|
||||
stage("Build") {
|
||||
steps {
|
||||
cmakeBuild buildDir: 'build-clang-release-fat', buildType: 'Release', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=yes -DBUILD_AVX512=yes -DFAT_RUNTIME=yes', installation: 'InSearchPath', steps: [[envVars: 'CC=clang CXX=clang++', args: '--parallel 4', withCmake: true]]
|
||||
}
|
||||
}
|
||||
stage("Test") {
|
||||
steps {
|
||||
sh 'build-clang-release-fat/bin/unit-hyperscan'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
stage("Clang-Debug/SSE") {
|
||||
agent { label "x86" }
|
||||
stages {
|
||||
stage("Git checkout") {
|
||||
steps {
|
||||
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
|
||||
}
|
||||
}
|
||||
stage("Build") {
|
||||
steps {
|
||||
cmakeBuild buildDir: 'build-clang-debug-SSE', buildType: 'Debug', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=no -DBUILD_AVX512=no -DFAT_RUNTIME=no', installation: 'InSearchPath', steps: [[envVars: 'CC=clang CXX=clang++', args: '--parallel 4', withCmake: true]]
|
||||
}
|
||||
}
|
||||
stage("Unit Test") {
|
||||
steps {
|
||||
sh 'build-clang-debug-SSE/bin/unit-internal'
|
||||
}
|
||||
}
|
||||
stage("Test") {
|
||||
steps {
|
||||
sh 'build-clang-debug-SSE/bin/unit-hyperscan'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
stage("Clang-Debug/AVX2") {
|
||||
agent { label "x86" }
|
||||
stages {
|
||||
stage("Git checkout") {
|
||||
steps {
|
||||
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
|
||||
}
|
||||
}
|
||||
stage("Build") {
|
||||
steps {
|
||||
cmakeBuild buildDir: 'build-clang-debug-AVX2', buildType: 'Debug', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=yes -DBUILD_AVX512=no -DFAT_RUNTIME=no', installation: 'InSearchPath', steps: [[envVars: 'CC=clang CXX=clang++', args: '--parallel 4', withCmake: true]]
|
||||
}
|
||||
}
|
||||
stage("Unit Test") {
|
||||
steps {
|
||||
sh 'build-clang-debug-AVX2/bin/unit-internal'
|
||||
}
|
||||
}
|
||||
stage("Test") {
|
||||
steps {
|
||||
sh 'build-clang-debug-AVX2/bin/unit-hyperscan'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
stage("Clang-Debug/AVX512") {
|
||||
agent { label "x86" }
|
||||
stages {
|
||||
stage("Git checkout") {
|
||||
steps {
|
||||
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
|
||||
}
|
||||
}
|
||||
stage("Build") {
|
||||
steps {
|
||||
cmakeBuild buildDir: 'build-clang-debug-AVX512', buildType: 'Debug', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=yes -DBUILD_AVX512=yes -DFAT_RUNTIME=no', installation: 'InSearchPath', steps: [[envVars: 'CC=clang CXX=clang++', args: '--parallel 4', withCmake: true]]
|
||||
}
|
||||
}
|
||||
stage("Unit Test") {
|
||||
steps {
|
||||
sh 'build-clang-debug-AVX512/bin/unit-internal'
|
||||
}
|
||||
}
|
||||
stage("Test") {
|
||||
steps {
|
||||
sh 'build-clang-debug-AVX512/bin/unit-hyperscan'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
stage("Clang-Debug/FAT") {
|
||||
agent { label "x86" }
|
||||
stages {
|
||||
stage("Git checkout") {
|
||||
steps {
|
||||
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
|
||||
}
|
||||
}
|
||||
stage("Build") {
|
||||
steps {
|
||||
cmakeBuild buildDir: 'build-clang-debug-fat', buildType: 'Debug', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=yes -DBUILD_AVX512=yes -DFAT_RUNTIME=yes', installation: 'InSearchPath', steps: [[envVars: 'CC=clang CXX=clang++', args: '--parallel 4', withCmake: true]]
|
||||
}
|
||||
}
|
||||
stage("Test") {
|
||||
steps {
|
||||
sh 'build-clang-debug-fat/bin/unit-hyperscan'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
stage("Clang-Release/ARM") {
|
||||
agent { label "arm" }
|
||||
stages {
|
||||
stage("Git checkout") {
|
||||
steps {
|
||||
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
|
||||
}
|
||||
}
|
||||
stage("Build") {
|
||||
steps {
|
||||
cmakeBuild buildDir: 'build-clang-release-arm', buildType: 'Release', cleanBuild: true, cmakeArgs: '', installation: 'InSearchPath', steps: [[envVars: 'CC=clang CXX=clang++', args: '--parallel 4', withCmake: true]]
|
||||
}
|
||||
}
|
||||
stage("Unit Test") {
|
||||
steps {
|
||||
sh 'build-clang-release-arm/bin/unit-internal'
|
||||
}
|
||||
}
|
||||
stage("Test") {
|
||||
steps {
|
||||
sh 'build-clang-release-arm/bin/unit-hyperscan'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
stage("Clang-Debug/ARM") {
|
||||
agent { label "arm" }
|
||||
stages {
|
||||
stage("Git checkout") {
|
||||
steps {
|
||||
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
|
||||
}
|
||||
}
|
||||
stage("Build") {
|
||||
steps {
|
||||
cmakeBuild buildDir: 'build-debug-arm', buildType: 'Debug', cleanBuild: true, cmakeArgs: '', installation: 'InSearchPath', steps: [[envVars: 'CC=clang CXX=clang++', args: '--parallel 4', withCmake: true]]
|
||||
}
|
||||
}
|
||||
stage("Unit Test") {
|
||||
steps {
|
||||
sh 'build-clang-debug-arm/bin/unit-internal'
|
||||
}
|
||||
}
|
||||
stage("Test") {
|
||||
steps {
|
||||
sh 'build-clang-debug-arm/bin/unit-hyperscan'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
stage("Clang-Release/Power") {
|
||||
agent { label "power" }
|
||||
stages {
|
||||
stage("Git checkout") {
|
||||
steps {
|
||||
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
|
||||
}
|
||||
}
|
||||
stage("Build") {
|
||||
steps {
|
||||
cmakeBuild buildDir: 'build-clang-release-power', buildType: 'Release', cleanBuild: true, cmakeArgs: '', installation: 'InSearchPath', steps: [[envVars: 'CC=clang CXX=clang++', args: '--parallel 4', withCmake: true]]
|
||||
}
|
||||
}
|
||||
stage("Unit Test") {
|
||||
steps {
|
||||
sh 'build-clang-release-power/bin/unit-internal'
|
||||
}
|
||||
}
|
||||
stage("Test") {
|
||||
steps {
|
||||
sh 'build-clang-release-power/bin/unit-hyperscan'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
stage("Clang-Debug/Power") {
|
||||
agent { label "power" }
|
||||
stages {
|
||||
stage("Git checkout") {
|
||||
steps {
|
||||
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
|
||||
}
|
||||
}
|
||||
stage("Build") {
|
||||
steps {
|
||||
cmakeBuild buildDir: 'build-clang-debug-power', buildType: 'Debug', cleanBuild: true, cmakeArgs: '', installation: 'InSearchPath', steps: [[envVars: 'CC=clang CXX=clang++', args: '--parallel 4', withCmake: true]]
|
||||
}
|
||||
}
|
||||
stage("Unit Test") {
|
||||
steps {
|
||||
sh 'build-clang-debug-power/bin/unit-internal'
|
||||
}
|
||||
}
|
||||
stage("Test") {
|
||||
steps {
|
||||
sh 'build-clang-debug-power/bin/unit-hyperscan'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
steps {
|
||||
sh 'mkdir build-release-SSE && cmake -DCMAKE_BUILD_TYPE=Release -C build-release-SSE'
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -88,7 +88,7 @@ if (FAT_RUNTIME)
|
||||
set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} ${SKYLAKE_FLAG}")
|
||||
endif (BUILD_AVX512VBMI)
|
||||
elseif (BUILD_AVX2)
|
||||
set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} -march=core-avx2 -mavx")
|
||||
set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} -march=core-avx2 -mavx2")
|
||||
elseif ()
|
||||
set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} -march=core-i7 -mssse3")
|
||||
endif ()
|
||||
@ -98,12 +98,12 @@ else (NOT FAT_RUNTIME)
|
||||
endif ()
|
||||
|
||||
if (ARCH_IA32 OR ARCH_X86_64)
|
||||
# ensure we have the minimum of SSSE3 - call a SSSE3 intrinsic
|
||||
# ensure we have the minimum of SSE4.2 - call a SSE4.2 intrinsic
|
||||
CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}>
|
||||
int main() {
|
||||
__m128i a = _mm_set1_epi8(1);
|
||||
(void)_mm_shuffle_epi8(a, a);
|
||||
}" HAVE_SSSE3)
|
||||
}" HAVE_SSE42)
|
||||
|
||||
# now look for AVX2
|
||||
CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}>
|
||||
@ -157,8 +157,8 @@ else ()
|
||||
endif ()
|
||||
|
||||
if (FAT_RUNTIME)
|
||||
if ((ARCH_IA32 OR ARCH_X86_64) AND NOT HAVE_SSSE3)
|
||||
message(FATAL_ERROR "SSSE3 support required to build fat runtime")
|
||||
if ((ARCH_IA32 OR ARCH_X86_64) AND NOT HAVE_SSE42)
|
||||
message(FATAL_ERROR "SSE4.2 support required to build fat runtime")
|
||||
endif ()
|
||||
if ((ARCH_IA32 OR ARCH_X86_64) AND BUILD_AVX2 AND NOT HAVE_AVX2)
|
||||
message(FATAL_ERROR "AVX2 support required to build fat runtime")
|
||||
@ -179,8 +179,8 @@ else (NOT FAT_RUNTIME)
|
||||
if ((ARCH_IA32 OR ARCH_X86_64) AND NOT HAVE_AVX512VBMI)
|
||||
message(STATUS "Building without AVX512VBMI support")
|
||||
endif ()
|
||||
if ((ARCH_IA32 OR ARCH_X86_64) AND NOT HAVE_SSSE3)
|
||||
message(FATAL_ERROR "A minimum of SSSE3 compiler support is required")
|
||||
if ((ARCH_IA32 OR ARCH_X86_64) AND NOT HAVE_SSE42)
|
||||
message(FATAL_ERROR "A minimum of SSE4.2 compiler support is required")
|
||||
endif ()
|
||||
if ((ARCH_ARM32 OR ARCH_AARCH64) AND NOT HAVE_NEON)
|
||||
message(FATAL_ERROR "NEON support required for ARM support")
|
||||
|
@ -1,3 +1,8 @@
|
||||
# determine compiler
|
||||
if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
|
||||
set(CMAKE_COMPILER_IS_CLANG TRUE)
|
||||
endif()
|
||||
|
||||
# determine the target arch
|
||||
|
||||
if (CROSS_COMPILE_AARCH64)
|
||||
@ -10,7 +15,7 @@ else()
|
||||
CHECK_C_SOURCE_COMPILES("#if !(defined(__i386__) || defined(_M_IX86))\n#error not 32bit\n#endif\nint main(void) { return 0; }" ARCH_IA32)
|
||||
CHECK_C_SOURCE_COMPILES("#if !defined(__ARM_ARCH_ISA_A64)\n#error not 64bit\n#endif\nint main(void) { return 0; }" ARCH_AARCH64)
|
||||
CHECK_C_SOURCE_COMPILES("#if !defined(__ARM_ARCH_ISA_ARM)\n#error not 32bit\n#endif\nint main(void) { return 0; }" ARCH_ARM32)
|
||||
CHECK_C_SOURCE_COMPILES("#if !defined(__PPC64__) && !defined(__LITTLE_ENDIAN__) && !defined(__VSX__)\n#error not ppc64el\n#endif\nint main(void) { return 0; }" ARCH_PPC64EL)
|
||||
CHECK_C_SOURCE_COMPILES("#if !defined(__PPC64__) && !(defined(__LITTLE_ENDIAN__) && defined(__VSX__))\n#error not ppc64el\n#endif\nint main(void) { return 0; }" ARCH_PPC64EL)
|
||||
if (ARCH_X86_64 OR ARCH_AARCH64 OR ARCH_PPC64EL)
|
||||
set(ARCH_64_BIT TRUE)
|
||||
else()
|
||||
|
@ -122,24 +122,252 @@ m128 sub_2x64(m128 a, m128 b) {
|
||||
return (m128) vsubq_u64((uint64x2_t)a, (uint64x2_t)b);
|
||||
}
|
||||
|
||||
static really_really_inline
|
||||
static really_inline
|
||||
m128 lshift_m128(m128 a, unsigned b) {
|
||||
return (m128) vshlq_n_u32((uint32x4_t)a, b);
|
||||
#if defined(HAVE__BUILTIN_CONSTANT_P)
|
||||
if (__builtin_constant_p(b)) {
|
||||
return (m128) vshlq_n_u32((uint32x4_t)a, b);
|
||||
}
|
||||
#endif
|
||||
#define CASE_LSHIFT_m128(a, offset) case offset: return (m128)vshlq_n_u32((uint32x4_t)(a), (offset)); break;
|
||||
switch (b) {
|
||||
case 0: return a; break;
|
||||
CASE_LSHIFT_m128(a, 1);
|
||||
CASE_LSHIFT_m128(a, 2);
|
||||
CASE_LSHIFT_m128(a, 3);
|
||||
CASE_LSHIFT_m128(a, 4);
|
||||
CASE_LSHIFT_m128(a, 5);
|
||||
CASE_LSHIFT_m128(a, 6);
|
||||
CASE_LSHIFT_m128(a, 7);
|
||||
CASE_LSHIFT_m128(a, 8);
|
||||
CASE_LSHIFT_m128(a, 9);
|
||||
CASE_LSHIFT_m128(a, 10);
|
||||
CASE_LSHIFT_m128(a, 11);
|
||||
CASE_LSHIFT_m128(a, 12);
|
||||
CASE_LSHIFT_m128(a, 13);
|
||||
CASE_LSHIFT_m128(a, 14);
|
||||
CASE_LSHIFT_m128(a, 15);
|
||||
CASE_LSHIFT_m128(a, 16);
|
||||
CASE_LSHIFT_m128(a, 17);
|
||||
CASE_LSHIFT_m128(a, 18);
|
||||
CASE_LSHIFT_m128(a, 19);
|
||||
CASE_LSHIFT_m128(a, 20);
|
||||
CASE_LSHIFT_m128(a, 21);
|
||||
CASE_LSHIFT_m128(a, 22);
|
||||
CASE_LSHIFT_m128(a, 23);
|
||||
CASE_LSHIFT_m128(a, 24);
|
||||
CASE_LSHIFT_m128(a, 25);
|
||||
CASE_LSHIFT_m128(a, 26);
|
||||
CASE_LSHIFT_m128(a, 27);
|
||||
CASE_LSHIFT_m128(a, 28);
|
||||
CASE_LSHIFT_m128(a, 29);
|
||||
CASE_LSHIFT_m128(a, 30);
|
||||
CASE_LSHIFT_m128(a, 31);
|
||||
default: return zeroes128(); break;
|
||||
}
|
||||
#undef CASE_LSHIFT_m128
|
||||
}
|
||||
|
||||
static really_really_inline
|
||||
m128 rshift_m128(m128 a, unsigned b) {
|
||||
return (m128) vshrq_n_u32((uint32x4_t)a, b);
|
||||
#if defined(HAVE__BUILTIN_CONSTANT_P)
|
||||
if (__builtin_constant_p(b)) {
|
||||
return (m128) vshrq_n_u32((uint32x4_t)a, b);
|
||||
}
|
||||
#endif
|
||||
#define CASE_RSHIFT_m128(a, offset) case offset: return (m128)vshrq_n_u32((uint32x4_t)(a), (offset)); break;
|
||||
switch (b) {
|
||||
case 0: return a; break;
|
||||
CASE_RSHIFT_m128(a, 1);
|
||||
CASE_RSHIFT_m128(a, 2);
|
||||
CASE_RSHIFT_m128(a, 3);
|
||||
CASE_RSHIFT_m128(a, 4);
|
||||
CASE_RSHIFT_m128(a, 5);
|
||||
CASE_RSHIFT_m128(a, 6);
|
||||
CASE_RSHIFT_m128(a, 7);
|
||||
CASE_RSHIFT_m128(a, 8);
|
||||
CASE_RSHIFT_m128(a, 9);
|
||||
CASE_RSHIFT_m128(a, 10);
|
||||
CASE_RSHIFT_m128(a, 11);
|
||||
CASE_RSHIFT_m128(a, 12);
|
||||
CASE_RSHIFT_m128(a, 13);
|
||||
CASE_RSHIFT_m128(a, 14);
|
||||
CASE_RSHIFT_m128(a, 15);
|
||||
CASE_RSHIFT_m128(a, 16);
|
||||
CASE_RSHIFT_m128(a, 17);
|
||||
CASE_RSHIFT_m128(a, 18);
|
||||
CASE_RSHIFT_m128(a, 19);
|
||||
CASE_RSHIFT_m128(a, 20);
|
||||
CASE_RSHIFT_m128(a, 21);
|
||||
CASE_RSHIFT_m128(a, 22);
|
||||
CASE_RSHIFT_m128(a, 23);
|
||||
CASE_RSHIFT_m128(a, 24);
|
||||
CASE_RSHIFT_m128(a, 25);
|
||||
CASE_RSHIFT_m128(a, 26);
|
||||
CASE_RSHIFT_m128(a, 27);
|
||||
CASE_RSHIFT_m128(a, 28);
|
||||
CASE_RSHIFT_m128(a, 29);
|
||||
CASE_RSHIFT_m128(a, 30);
|
||||
CASE_RSHIFT_m128(a, 31);
|
||||
default: return zeroes128(); break;
|
||||
}
|
||||
#undef CASE_RSHIFT_m128
|
||||
}
|
||||
|
||||
static really_really_inline
|
||||
m128 lshift64_m128(m128 a, unsigned b) {
|
||||
return (m128) vshlq_n_u64((uint64x2_t)a, b);
|
||||
#if defined(HAVE__BUILTIN_CONSTANT_P)
|
||||
if (__builtin_constant_p(b)) {
|
||||
return (m128) vshlq_n_u64((uint64x2_t)a, b);
|
||||
}
|
||||
#endif
|
||||
#define CASE_LSHIFT64_m128(a, offset) case offset: return (m128)vshlq_n_u64((uint64x2_t)(a), (offset)); break;
|
||||
switch (b) {
|
||||
case 0: return a; break;
|
||||
CASE_LSHIFT64_m128(a, 1);
|
||||
CASE_LSHIFT64_m128(a, 2);
|
||||
CASE_LSHIFT64_m128(a, 3);
|
||||
CASE_LSHIFT64_m128(a, 4);
|
||||
CASE_LSHIFT64_m128(a, 5);
|
||||
CASE_LSHIFT64_m128(a, 6);
|
||||
CASE_LSHIFT64_m128(a, 7);
|
||||
CASE_LSHIFT64_m128(a, 8);
|
||||
CASE_LSHIFT64_m128(a, 9);
|
||||
CASE_LSHIFT64_m128(a, 10);
|
||||
CASE_LSHIFT64_m128(a, 11);
|
||||
CASE_LSHIFT64_m128(a, 12);
|
||||
CASE_LSHIFT64_m128(a, 13);
|
||||
CASE_LSHIFT64_m128(a, 14);
|
||||
CASE_LSHIFT64_m128(a, 15);
|
||||
CASE_LSHIFT64_m128(a, 16);
|
||||
CASE_LSHIFT64_m128(a, 17);
|
||||
CASE_LSHIFT64_m128(a, 18);
|
||||
CASE_LSHIFT64_m128(a, 19);
|
||||
CASE_LSHIFT64_m128(a, 20);
|
||||
CASE_LSHIFT64_m128(a, 21);
|
||||
CASE_LSHIFT64_m128(a, 22);
|
||||
CASE_LSHIFT64_m128(a, 23);
|
||||
CASE_LSHIFT64_m128(a, 24);
|
||||
CASE_LSHIFT64_m128(a, 25);
|
||||
CASE_LSHIFT64_m128(a, 26);
|
||||
CASE_LSHIFT64_m128(a, 27);
|
||||
CASE_LSHIFT64_m128(a, 28);
|
||||
CASE_LSHIFT64_m128(a, 29);
|
||||
CASE_LSHIFT64_m128(a, 30);
|
||||
CASE_LSHIFT64_m128(a, 31);
|
||||
CASE_LSHIFT64_m128(a, 32);
|
||||
CASE_LSHIFT64_m128(a, 33);
|
||||
CASE_LSHIFT64_m128(a, 34);
|
||||
CASE_LSHIFT64_m128(a, 35);
|
||||
CASE_LSHIFT64_m128(a, 36);
|
||||
CASE_LSHIFT64_m128(a, 37);
|
||||
CASE_LSHIFT64_m128(a, 38);
|
||||
CASE_LSHIFT64_m128(a, 39);
|
||||
CASE_LSHIFT64_m128(a, 40);
|
||||
CASE_LSHIFT64_m128(a, 41);
|
||||
CASE_LSHIFT64_m128(a, 42);
|
||||
CASE_LSHIFT64_m128(a, 43);
|
||||
CASE_LSHIFT64_m128(a, 44);
|
||||
CASE_LSHIFT64_m128(a, 45);
|
||||
CASE_LSHIFT64_m128(a, 46);
|
||||
CASE_LSHIFT64_m128(a, 47);
|
||||
CASE_LSHIFT64_m128(a, 48);
|
||||
CASE_LSHIFT64_m128(a, 49);
|
||||
CASE_LSHIFT64_m128(a, 50);
|
||||
CASE_LSHIFT64_m128(a, 51);
|
||||
CASE_LSHIFT64_m128(a, 52);
|
||||
CASE_LSHIFT64_m128(a, 53);
|
||||
CASE_LSHIFT64_m128(a, 54);
|
||||
CASE_LSHIFT64_m128(a, 55);
|
||||
CASE_LSHIFT64_m128(a, 56);
|
||||
CASE_LSHIFT64_m128(a, 57);
|
||||
CASE_LSHIFT64_m128(a, 58);
|
||||
CASE_LSHIFT64_m128(a, 59);
|
||||
CASE_LSHIFT64_m128(a, 60);
|
||||
CASE_LSHIFT64_m128(a, 61);
|
||||
CASE_LSHIFT64_m128(a, 62);
|
||||
CASE_LSHIFT64_m128(a, 63);
|
||||
default: return zeroes128(); break;
|
||||
}
|
||||
#undef CASE_LSHIFT64_m128
|
||||
}
|
||||
|
||||
static really_really_inline
|
||||
m128 rshift64_m128(m128 a, unsigned b) {
|
||||
return (m128) vshrq_n_u64((uint64x2_t)a, b);
|
||||
#if defined(HAVE__BUILTIN_CONSTANT_P)
|
||||
if (__builtin_constant_p(b)) {
|
||||
return (m128) vshrq_n_u64((uint64x2_t)a, b);
|
||||
}
|
||||
#endif
|
||||
#define CASE_RSHIFT64_m128(a, offset) case offset: return (m128)vshrq_n_u64((uint64x2_t)(a), (offset)); break;
|
||||
switch (b) {
|
||||
case 0: return a; break;
|
||||
CASE_RSHIFT64_m128(a, 1);
|
||||
CASE_RSHIFT64_m128(a, 2);
|
||||
CASE_RSHIFT64_m128(a, 3);
|
||||
CASE_RSHIFT64_m128(a, 4);
|
||||
CASE_RSHIFT64_m128(a, 5);
|
||||
CASE_RSHIFT64_m128(a, 6);
|
||||
CASE_RSHIFT64_m128(a, 7);
|
||||
CASE_RSHIFT64_m128(a, 8);
|
||||
CASE_RSHIFT64_m128(a, 9);
|
||||
CASE_RSHIFT64_m128(a, 10);
|
||||
CASE_RSHIFT64_m128(a, 11);
|
||||
CASE_RSHIFT64_m128(a, 12);
|
||||
CASE_RSHIFT64_m128(a, 13);
|
||||
CASE_RSHIFT64_m128(a, 14);
|
||||
CASE_RSHIFT64_m128(a, 15);
|
||||
CASE_RSHIFT64_m128(a, 16);
|
||||
CASE_RSHIFT64_m128(a, 17);
|
||||
CASE_RSHIFT64_m128(a, 18);
|
||||
CASE_RSHIFT64_m128(a, 19);
|
||||
CASE_RSHIFT64_m128(a, 20);
|
||||
CASE_RSHIFT64_m128(a, 21);
|
||||
CASE_RSHIFT64_m128(a, 22);
|
||||
CASE_RSHIFT64_m128(a, 23);
|
||||
CASE_RSHIFT64_m128(a, 24);
|
||||
CASE_RSHIFT64_m128(a, 25);
|
||||
CASE_RSHIFT64_m128(a, 26);
|
||||
CASE_RSHIFT64_m128(a, 27);
|
||||
CASE_RSHIFT64_m128(a, 28);
|
||||
CASE_RSHIFT64_m128(a, 29);
|
||||
CASE_RSHIFT64_m128(a, 30);
|
||||
CASE_RSHIFT64_m128(a, 31);
|
||||
CASE_RSHIFT64_m128(a, 32);
|
||||
CASE_RSHIFT64_m128(a, 33);
|
||||
CASE_RSHIFT64_m128(a, 34);
|
||||
CASE_RSHIFT64_m128(a, 35);
|
||||
CASE_RSHIFT64_m128(a, 36);
|
||||
CASE_RSHIFT64_m128(a, 37);
|
||||
CASE_RSHIFT64_m128(a, 38);
|
||||
CASE_RSHIFT64_m128(a, 39);
|
||||
CASE_RSHIFT64_m128(a, 40);
|
||||
CASE_RSHIFT64_m128(a, 41);
|
||||
CASE_RSHIFT64_m128(a, 42);
|
||||
CASE_RSHIFT64_m128(a, 43);
|
||||
CASE_RSHIFT64_m128(a, 44);
|
||||
CASE_RSHIFT64_m128(a, 45);
|
||||
CASE_RSHIFT64_m128(a, 46);
|
||||
CASE_RSHIFT64_m128(a, 47);
|
||||
CASE_RSHIFT64_m128(a, 48);
|
||||
CASE_RSHIFT64_m128(a, 49);
|
||||
CASE_RSHIFT64_m128(a, 50);
|
||||
CASE_RSHIFT64_m128(a, 51);
|
||||
CASE_RSHIFT64_m128(a, 52);
|
||||
CASE_RSHIFT64_m128(a, 53);
|
||||
CASE_RSHIFT64_m128(a, 54);
|
||||
CASE_RSHIFT64_m128(a, 55);
|
||||
CASE_RSHIFT64_m128(a, 56);
|
||||
CASE_RSHIFT64_m128(a, 57);
|
||||
CASE_RSHIFT64_m128(a, 58);
|
||||
CASE_RSHIFT64_m128(a, 59);
|
||||
CASE_RSHIFT64_m128(a, 60);
|
||||
CASE_RSHIFT64_m128(a, 61);
|
||||
CASE_RSHIFT64_m128(a, 62);
|
||||
CASE_RSHIFT64_m128(a, 63);
|
||||
default: return zeroes128(); break;
|
||||
}
|
||||
#undef CASE_RSHIFT64_m128
|
||||
}
|
||||
|
||||
static really_inline m128 eq128(m128 a, m128 b) {
|
||||
@ -191,9 +419,11 @@ m128 load_m128_from_u64a(const u64a *p) {
|
||||
}
|
||||
|
||||
static really_inline u32 extract32from128(const m128 in, unsigned imm) {
|
||||
#if defined(HS_OPTIMIZE)
|
||||
return vgetq_lane_u32((uint32x4_t) in, imm);
|
||||
#else
|
||||
#if defined(HAVE__BUILTIN_CONSTANT_P)
|
||||
if (__builtin_constant_p(imm)) {
|
||||
return vgetq_lane_u32((uint32x4_t) in, imm);
|
||||
}
|
||||
#endif
|
||||
switch (imm) {
|
||||
case 0:
|
||||
return vgetq_lane_u32((uint32x4_t) in, 0);
|
||||
@ -211,13 +441,14 @@ static really_inline u32 extract32from128(const m128 in, unsigned imm) {
|
||||
return 0;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static really_inline u64a extract64from128(const m128 in, unsigned imm) {
|
||||
#if defined(HS_OPTIMIZE)
|
||||
return vgetq_lane_u64((uint64x2_t) in, imm);
|
||||
#else
|
||||
#if defined(HAVE__BUILTIN_CONSTANT_P)
|
||||
if (__builtin_constant_p(imm)) {
|
||||
return vgetq_lane_u64((uint64x2_t) in, imm);
|
||||
}
|
||||
#endif
|
||||
switch (imm) {
|
||||
case 0:
|
||||
return vgetq_lane_u64((uint64x2_t) in, 0);
|
||||
@ -229,7 +460,6 @@ static really_inline u64a extract64from128(const m128 in, unsigned imm) {
|
||||
return 0;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static really_inline m128 low64from128(const m128 in) {
|
||||
|
@ -30,7 +30,7 @@
|
||||
#define ARCH_PPC64EL_SIMD_TYPES_H
|
||||
|
||||
#if !defined(m128) && defined(HAVE_VSX)
|
||||
typedef __vector int32_t m128;
|
||||
typedef __vector int m128;
|
||||
#endif
|
||||
|
||||
#endif /* ARCH_PPC64EL_SIMD_TYPES_H */
|
||||
|
@ -43,6 +43,18 @@
|
||||
|
||||
#include <string.h> // for memcpy
|
||||
|
||||
typedef __vector unsigned long long int uint64x2_t;
|
||||
typedef __vector signed long long int int64x2_t;
|
||||
typedef __vector unsigned int uint32x4_t;
|
||||
typedef __vector signed int int32x4_t;
|
||||
typedef __vector unsigned short int uint16x8_t;
|
||||
typedef __vector signed short int int16x8_t;
|
||||
typedef __vector unsigned char uint8x16_t;
|
||||
typedef __vector signed char int8x16_t;
|
||||
|
||||
typedef unsigned long long int ulong64_t;
|
||||
typedef signed long long int long64_t;
|
||||
/*
|
||||
typedef __vector uint64_t uint64x2_t;
|
||||
typedef __vector int64_t int64x2_t;
|
||||
typedef __vector uint32_t uint32x4_t;
|
||||
@ -50,7 +62,7 @@ typedef __vector int32_t int32x4_t;
|
||||
typedef __vector uint16_t uint16x8_t;
|
||||
typedef __vector int16_t int16x8_t;
|
||||
typedef __vector uint8_t uint8x16_t;
|
||||
typedef __vector int8_t int8x16_t;
|
||||
typedef __vector int8_t int8x16_t;*/
|
||||
|
||||
|
||||
#define ZEROES_8 0, 0, 0, 0, 0, 0, 0, 0
|
||||
@ -182,13 +194,13 @@ m128 rshift_m128(m128 a, unsigned b) {
|
||||
|
||||
static really_really_inline
|
||||
m128 lshift64_m128(m128 a, unsigned b) {
|
||||
uint64x2_t shift_indices = vec_splats((uint64_t)b);
|
||||
uint64x2_t shift_indices = vec_splats((ulong64_t)b);
|
||||
return (m128) vec_sl((int64x2_t)a, shift_indices);
|
||||
}
|
||||
|
||||
static really_really_inline
|
||||
m128 rshift64_m128(m128 a, unsigned b) {
|
||||
uint64x2_t shift_indices = vec_splats((uint64_t)b);
|
||||
uint64x2_t shift_indices = vec_splats((ulong64_t)b);
|
||||
return (m128) vec_sr((int64x2_t)a, shift_indices);
|
||||
}
|
||||
|
||||
@ -213,11 +225,11 @@ static really_inline u32 movemask128(m128 a) {
|
||||
uint32x4_t s3 = vec_or((uint32x4_t)ss2, res_and2);
|
||||
|
||||
uint64x2_t ss3 = vec_sr((uint64x2_t)s3, (uint64x2_t)vec_splats(28));
|
||||
uint64x2_t res_and3 = vec_and((uint64x2_t)s3, vec_splats((uint64_t)0xff));
|
||||
uint64x2_t res_and3 = vec_and((uint64x2_t)s3, vec_splats((ulong64_t)0xff));
|
||||
uint64x2_t s4 = vec_or((uint64x2_t)ss3, res_and3);
|
||||
|
||||
uint64x2_t ss4 = vec_sld((uint64x2_t)vec_splats(0), s4, 9);
|
||||
uint64x2_t res_and4 = vec_and((uint64x2_t)s4, vec_splats((uint64_t)0xff));
|
||||
uint64x2_t res_and4 = vec_and((uint64x2_t)s4, vec_splats((ulong64_t)0xff));
|
||||
uint64x2_t s5 = vec_or((uint64x2_t)ss4, res_and4);
|
||||
|
||||
return s5[0];
|
||||
|
@ -30,7 +30,7 @@
|
||||
#ifndef SIMD_TYPES_X86_H
|
||||
#define SIMD_TYPES_X86_H
|
||||
|
||||
#if !defined(m128) && defined(HAVE_SSE2)
|
||||
#if !defined(m128) && defined(HAVE_SSE42)
|
||||
typedef __m128i m128;
|
||||
#endif
|
||||
|
||||
|
@ -51,6 +51,7 @@ typedef struct ALIGN_AVX_DIRECTIVE {m128 lo; m128 hi;} m256;
|
||||
#endif
|
||||
|
||||
typedef struct {m128 lo; m128 mid; m128 hi;} m384;
|
||||
|
||||
#if !defined(m512) && !defined(HAVE_SIMD_512_BITS)
|
||||
typedef struct ALIGN_ATTR(64) {m256 lo; m256 hi;} m512;
|
||||
#endif
|
||||
|
@ -45,112 +45,112 @@ really_inline SuperVector<16>::SuperVector(typename base_type::type const v)
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<16>::SuperVector<int8x16_t>(int8x16_t other)
|
||||
really_inline SuperVector<16>::SuperVector(int8x16_t other)
|
||||
{
|
||||
u.s8x16[0] = other;
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<16>::SuperVector<uint8x16_t>(uint8x16_t other)
|
||||
really_inline SuperVector<16>::SuperVector(uint8x16_t other)
|
||||
{
|
||||
u.u8x16[0] = other;
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<16>::SuperVector<int16x8_t>(int16x8_t other)
|
||||
really_inline SuperVector<16>::SuperVector(int16x8_t other)
|
||||
{
|
||||
u.s16x8[0] = other;
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<16>::SuperVector<uint16x8_t>(uint16x8_t other)
|
||||
really_inline SuperVector<16>::SuperVector(uint16x8_t other)
|
||||
{
|
||||
u.u16x8[0] = other;
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<16>::SuperVector<int32x4_t>(int32x4_t other)
|
||||
really_inline SuperVector<16>::SuperVector(int32x4_t other)
|
||||
{
|
||||
u.s32x4[0] = other;
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<16>::SuperVector<uint32x4_t>(uint32x4_t other)
|
||||
really_inline SuperVector<16>::SuperVector(uint32x4_t other)
|
||||
{
|
||||
u.u32x4[0] = other;
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<16>::SuperVector<int64x2_t>(int64x2_t other)
|
||||
really_inline SuperVector<16>::SuperVector(int64x2_t other)
|
||||
{
|
||||
u.s64x2[0] = other;
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<16>::SuperVector<uint64x2_t>(uint64x2_t other)
|
||||
really_inline SuperVector<16>::SuperVector(uint64x2_t other)
|
||||
{
|
||||
u.u64x2[0] = other;
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<16>::SuperVector<int8_t>(int8_t const other)
|
||||
really_inline SuperVector<16>::SuperVector(int8_t const other)
|
||||
{
|
||||
u.s8x16[0] = vdupq_n_s8(other);
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<16>::SuperVector<uint8_t>(uint8_t const other)
|
||||
really_inline SuperVector<16>::SuperVector(uint8_t const other)
|
||||
{
|
||||
u.u8x16[0] = vdupq_n_u8(other);
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<16>::SuperVector<int16_t>(int16_t const other)
|
||||
really_inline SuperVector<16>::SuperVector(int16_t const other)
|
||||
{
|
||||
u.s16x8[0] = vdupq_n_s16(other);
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<16>::SuperVector<uint16_t>(uint16_t const other)
|
||||
really_inline SuperVector<16>::SuperVector(uint16_t const other)
|
||||
{
|
||||
u.u16x8[0] = vdupq_n_u16(other);
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<16>::SuperVector<int32_t>(int32_t const other)
|
||||
really_inline SuperVector<16>::SuperVector(int32_t const other)
|
||||
{
|
||||
u.s32x4[0] = vdupq_n_s32(other);
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<16>::SuperVector<uint32_t>(uint32_t const other)
|
||||
really_inline SuperVector<16>::SuperVector(uint32_t const other)
|
||||
{
|
||||
u.u32x4[0] = vdupq_n_u32(other);
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<16>::SuperVector<int64_t>(int64_t const other)
|
||||
really_inline SuperVector<16>::SuperVector(int64_t const other)
|
||||
{
|
||||
u.s64x2[0] = vdupq_n_s64(other);
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<16>::SuperVector<uint64_t>(uint64_t const other)
|
||||
really_inline SuperVector<16>::SuperVector(uint64_t const other)
|
||||
{
|
||||
u.u64x2[0] = vdupq_n_u64(other);
|
||||
}
|
||||
@ -376,7 +376,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_8 (uint8_t const N) const
|
||||
if (N == 0) return *this;
|
||||
if (N == 16) return Zeroes();
|
||||
SuperVector result;
|
||||
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshlq_n_u8(u.u8x16[0], n)}; });
|
||||
Unroller<1, 8>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshlq_n_u8(v->u.u8x16[0], n)}; });
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -386,7 +386,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_16 (uint8_t const N) const
|
||||
if (N == 0) return *this;
|
||||
if (N == 16) return Zeroes();
|
||||
SuperVector result;
|
||||
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshlq_n_u16(u.u16x8[0], n)}; });
|
||||
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshlq_n_u16(v->u.u16x8[0], n)}; });
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -394,9 +394,9 @@ template <>
|
||||
really_inline SuperVector<16> SuperVector<16>::vshl_32 (uint8_t const N) const
|
||||
{
|
||||
if (N == 0) return *this;
|
||||
if (N == 16) return Zeroes();
|
||||
if (N == 32) return Zeroes();
|
||||
SuperVector result;
|
||||
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshlq_n_u32(u.u32x4[0], n)}; });
|
||||
Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshlq_n_u32(v->u.u32x4[0], n)}; });
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -404,9 +404,9 @@ template <>
|
||||
really_inline SuperVector<16> SuperVector<16>::vshl_64 (uint8_t const N) const
|
||||
{
|
||||
if (N == 0) return *this;
|
||||
if (N == 16) return Zeroes();
|
||||
if (N == 64) return Zeroes();
|
||||
SuperVector result;
|
||||
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshlq_n_u64(u.u64x2[0], n)}; });
|
||||
Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshlq_n_u64(v->u.u64x2[0], n)}; });
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -416,7 +416,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_128(uint8_t const N) const
|
||||
if (N == 0) return *this;
|
||||
if (N == 16) return Zeroes();
|
||||
SuperVector result;
|
||||
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vextq_u8(vdupq_n_u8(0), u.u8x16[0], 16 - n)}; });
|
||||
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vextq_u8(vdupq_n_u8(0), v->u.u8x16[0], 16 - n)}; });
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -430,9 +430,9 @@ template <>
|
||||
really_inline SuperVector<16> SuperVector<16>::vshr_8 (uint8_t const N) const
|
||||
{
|
||||
if (N == 0) return *this;
|
||||
if (N == 16) return Zeroes();
|
||||
if (N == 8) return Zeroes();
|
||||
SuperVector result;
|
||||
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshrq_n_u8(u.u8x16[0], n)}; });
|
||||
Unroller<1, 8>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshrq_n_u8(v->u.u8x16[0], n)}; });
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -442,7 +442,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_16 (uint8_t const N) const
|
||||
if (N == 0) return *this;
|
||||
if (N == 16) return Zeroes();
|
||||
SuperVector result;
|
||||
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshrq_n_u16(u.u16x8[0], n)}; });
|
||||
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshrq_n_u16(v->u.u16x8[0], n)}; });
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -450,9 +450,9 @@ template <>
|
||||
really_inline SuperVector<16> SuperVector<16>::vshr_32 (uint8_t const N) const
|
||||
{
|
||||
if (N == 0) return *this;
|
||||
if (N == 16) return Zeroes();
|
||||
if (N == 32) return Zeroes();
|
||||
SuperVector result;
|
||||
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshrq_n_u32(u.u32x4[0], n)}; });
|
||||
Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshrq_n_u32(v->u.u32x4[0], n)}; });
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -460,9 +460,9 @@ template <>
|
||||
really_inline SuperVector<16> SuperVector<16>::vshr_64 (uint8_t const N) const
|
||||
{
|
||||
if (N == 0) return *this;
|
||||
if (N == 16) return Zeroes();
|
||||
if (N == 64) return Zeroes();
|
||||
SuperVector result;
|
||||
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshrq_n_u64(u.u64x2[0], n)}; });
|
||||
Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshrq_n_u64(v->u.u64x2[0], n)}; });
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -472,7 +472,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_128(uint8_t const N) const
|
||||
if (N == 0) return *this;
|
||||
if (N == 16) return Zeroes();
|
||||
SuperVector result;
|
||||
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vextq_u8(u.u8x16[0], vdupq_n_u8(0), n)}; });
|
||||
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vextq_u8(v->u.u8x16[0], vdupq_n_u8(0), n)}; });
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -39,16 +39,6 @@
|
||||
#include "util/supervector/supervector.hpp"
|
||||
#include <iostream>
|
||||
|
||||
|
||||
typedef __vector uint64_t uint64x2_t;
|
||||
typedef __vector int64_t int64x2_t;
|
||||
typedef __vector uint32_t uint32x4_t;
|
||||
typedef __vector int32_t int32x4_t;
|
||||
typedef __vector uint16_t uint16x8_t;
|
||||
typedef __vector int16_t int16x8_t;
|
||||
typedef __vector uint8_t uint8x16_t;
|
||||
typedef __vector int8_t int8x16_t;
|
||||
|
||||
// 128-bit Powerpc64le implementation
|
||||
|
||||
template<>
|
||||
@ -65,58 +55,58 @@ really_inline SuperVector<16>::SuperVector(typename base_type::type const v)
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<16>::SuperVector<int8_t>(int8_t const other)
|
||||
really_inline SuperVector<16>::SuperVector(int8_t const other)
|
||||
{
|
||||
u.v128[0] = (m128) vec_splats(other);
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<16>::SuperVector<uint8_t>(uint8_t const other)
|
||||
really_inline SuperVector<16>::SuperVector(uint8_t const other)
|
||||
{
|
||||
u.v128[0] = (m128) vec_splats(static_cast<uint8_t>(other));
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<16>::SuperVector<int16_t>(int16_t const other)
|
||||
really_inline SuperVector<16>::SuperVector(int16_t const other)
|
||||
{
|
||||
u.v128[0] = (m128) vec_splats(other);
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<16>::SuperVector<uint16_t>(uint16_t const other)
|
||||
really_inline SuperVector<16>::SuperVector(uint16_t const other)
|
||||
{
|
||||
u.v128[0] = (m128) vec_splats(static_cast<uint16_t>(other));
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<16>::SuperVector<int32_t>(int32_t const other)
|
||||
really_inline SuperVector<16>::SuperVector(int32_t const other)
|
||||
{
|
||||
u.v128[0] = (m128) vec_splats(other);
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<16>::SuperVector<uint32_t>(uint32_t const other)
|
||||
really_inline SuperVector<16>::SuperVector(uint32_t const other)
|
||||
{
|
||||
u.v128[0] = (m128) vec_splats(static_cast<uint32_t>(other));
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<16>::SuperVector<int64_t>(int64_t const other)
|
||||
really_inline SuperVector<16>::SuperVector(int64_t const other)
|
||||
{
|
||||
u.v128[0] = (m128) vec_splats(other);
|
||||
u.v128[0] = (m128) vec_splats(static_cast<ulong64_t>(other));
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<16>::SuperVector<uint64_t>(uint64_t const other)
|
||||
really_inline SuperVector<16>::SuperVector(uint64_t const other)
|
||||
{
|
||||
u.v128[0] = (m128) vec_splats(static_cast<uint64_t>(other));
|
||||
u.v128[0] = (m128) vec_splats(static_cast<ulong64_t>(other));
|
||||
}
|
||||
|
||||
// Constants
|
||||
@ -229,11 +219,11 @@ really_inline typename SuperVector<16>::movemask_type SuperVector<16>::movemask(
|
||||
uint32x4_t s3 = vec_or((uint32x4_t)ss2, res_and2);
|
||||
|
||||
uint64x2_t ss3 = vec_sr((uint64x2_t)s3, (uint64x2_t)vec_splats(28));
|
||||
uint64x2_t res_and3 = vec_and((uint64x2_t)s3, vec_splats((uint64_t)0xff));
|
||||
uint64x2_t res_and3 = vec_and((uint64x2_t)s3, vec_splats((ulong64_t)0xff));
|
||||
uint64x2_t s4 = vec_or((uint64x2_t)ss3, res_and3);
|
||||
|
||||
uint64x2_t ss4 = vec_sld((uint64x2_t) vec_splats(0), s4, 9);
|
||||
uint64x2_t res_and4 = vec_and((uint64x2_t)s4, vec_splats((uint64_t)0xff));
|
||||
uint64x2_t res_and4 = vec_and((uint64x2_t)s4, vec_splats((ulong64_t)0xff));
|
||||
uint64x2_t s5 = vec_or((uint64x2_t)ss4, res_and4);
|
||||
|
||||
return s5[0];
|
||||
@ -271,7 +261,7 @@ template <>
|
||||
template<uint8_t N>
|
||||
really_inline SuperVector<16> SuperVector<16>::vshl_64_imm() const
|
||||
{
|
||||
return { (m128) vec_sl(u.s64x2[0], vec_splats((uint64_t)N)) };
|
||||
return { (m128) vec_sl(u.s64x2[0], vec_splats((ulong64_t)N)) };
|
||||
}
|
||||
|
||||
template <>
|
||||
@ -313,7 +303,7 @@ template <>
|
||||
template<uint8_t N>
|
||||
really_inline SuperVector<16> SuperVector<16>::vshr_64_imm() const
|
||||
{
|
||||
return { (m128) vec_sr(u.s64x2[0], vec_splats((uint64_t)N)) };
|
||||
return { (m128) vec_sr(u.s64x2[0], vec_splats((ulong64_t)N)) };
|
||||
}
|
||||
|
||||
template <>
|
||||
@ -352,7 +342,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_8 (uint8_t const N) const
|
||||
if (N == 0) return *this;
|
||||
if (N == 16) return Zeroes();
|
||||
SuperVector result;
|
||||
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sl(u.s8x16[0], vec_splats((uint8_t)n))}; });
|
||||
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sl(v->u.s8x16[0], vec_splats((uint8_t)n))}; });
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -362,7 +352,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_16 (uint8_t const UNUSED N)
|
||||
if (N == 0) return *this;
|
||||
if (N == 16) return Zeroes();
|
||||
SuperVector result;
|
||||
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sl(u.s16x8[0], vec_splats((uint16_t)n))}; });
|
||||
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sl(v->u.s16x8[0], vec_splats((uint16_t)n))}; });
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -372,7 +362,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_32 (uint8_t const N) const
|
||||
if (N == 0) return *this;
|
||||
if (N == 16) return Zeroes();
|
||||
SuperVector result;
|
||||
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sl(u.s32x4[0], vec_splats((uint32_t)n))}; });
|
||||
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sl(v->u.s32x4[0], vec_splats((uint32_t)n))}; });
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -382,7 +372,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_64 (uint8_t const N) const
|
||||
if (N == 0) return *this;
|
||||
if (N == 16) return Zeroes();
|
||||
SuperVector result;
|
||||
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sl(u.s64x2[0], vec_splats((uint64_t)n))}; });
|
||||
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sl(v->u.s64x2[0], vec_splats((ulong64_t)n))}; });
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -392,7 +382,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_128(uint8_t const N) const
|
||||
if (N == 0) return *this;
|
||||
if (N == 16) return Zeroes();
|
||||
SuperVector result;
|
||||
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sld(u.s8x16[0], (int8x16_t)vec_splat_s8(0), n)}; });
|
||||
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sld(v->u.s8x16[0], (int8x16_t)vec_splat_s8(0), n)}; });
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -408,7 +398,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_8 (uint8_t const N) const
|
||||
if (N == 0) return *this;
|
||||
if (N == 16) return Zeroes();
|
||||
SuperVector result;
|
||||
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sr(u.s8x16[0], vec_splats((uint8_t)n))}; });
|
||||
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sr(v->u.s8x16[0], vec_splats((uint8_t)n))}; });
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -418,7 +408,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_16 (uint8_t const N) const
|
||||
if (N == 0) return *this;
|
||||
if (N == 16) return Zeroes();
|
||||
SuperVector result;
|
||||
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sr(u.s16x8[0], vec_splats((uint16_t)n))}; });
|
||||
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sr(v->u.s16x8[0], vec_splats((uint16_t)n))}; });
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -428,7 +418,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_32 (uint8_t const N) const
|
||||
if (N == 0) return *this;
|
||||
if (N == 16) return Zeroes();
|
||||
SuperVector result;
|
||||
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sr(u.s32x4[0], vec_splats((uint32_t)n))}; });
|
||||
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sr(v->u.s32x4[0], vec_splats((uint32_t)n))}; });
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -438,7 +428,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_64 (uint8_t const N) const
|
||||
if (N == 0) return *this;
|
||||
if (N == 16) return Zeroes();
|
||||
SuperVector result;
|
||||
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sr(u.s64x2[0], vec_splats((uint64_t)n))}; });
|
||||
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sr(v->u.s64x2[0], vec_splats((ulong64_t)n))}; });
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -448,7 +438,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_128(uint8_t const UNUSED N)
|
||||
if (N == 0) return *this;
|
||||
if (N == 16) return Zeroes();
|
||||
SuperVector result;
|
||||
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sld((int8x16_t)vec_splat_u8(0), u.s8x16[0], 16 - n)}; });
|
||||
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sld((int8x16_t)vec_splat_u8(0), v->u.s8x16[0], 16 - n)}; });
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -523,14 +513,14 @@ really_inline SuperVector<16> SuperVector<16>::Ones_vshl(uint8_t const N)
|
||||
template <>
|
||||
really_inline SuperVector<16> SuperVector<16>::loadu(void const *ptr)
|
||||
{
|
||||
return (m128) vec_xl(0, (const int64_t*)ptr);
|
||||
return (m128) vec_xl(0, (const long64_t*)ptr);
|
||||
}
|
||||
|
||||
template <>
|
||||
really_inline SuperVector<16> SuperVector<16>::load(void const *ptr)
|
||||
{
|
||||
assert(ISALIGNED_N(ptr, alignof(SuperVector::size)));
|
||||
return (m128) vec_xl(0, (const int64_t*)ptr);
|
||||
return (m128) vec_xl(0, (const long64_t*)ptr);
|
||||
}
|
||||
|
||||
template <>
|
||||
|
@ -27,6 +27,18 @@
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
typedef __vector unsigned long long int uint64x2_t;
|
||||
typedef __vector signed long long int int64x2_t;
|
||||
typedef __vector unsigned int uint32x4_t;
|
||||
typedef __vector signed int int32x4_t;
|
||||
typedef __vector unsigned short int uint16x8_t;
|
||||
typedef __vector signed short int int16x8_t;
|
||||
typedef __vector unsigned char uint8x16_t;
|
||||
typedef __vector signed char int8x16_t;
|
||||
|
||||
typedef unsigned long long int ulong64_t;
|
||||
typedef signed long long int long64_t;
|
||||
|
||||
#if !defined(m128) && defined(HAVE_VSX)
|
||||
typedef __vector int32_t m128;
|
||||
typedef __vector int m128;
|
||||
#endif
|
||||
|
@ -55,56 +55,56 @@ really_inline SuperVector<16>::SuperVector(typename base_type::type const v)
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<16>::SuperVector<int8_t>(int8_t const other)
|
||||
really_inline SuperVector<16>::SuperVector(int8_t const other)
|
||||
{
|
||||
u.v128[0] = _mm_set1_epi8(other);
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<16>::SuperVector<uint8_t>(uint8_t const other)
|
||||
really_inline SuperVector<16>::SuperVector(uint8_t const other)
|
||||
{
|
||||
u.v128[0] = _mm_set1_epi8(static_cast<int8_t>(other));
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<16>::SuperVector<int16_t>(int16_t const other)
|
||||
really_inline SuperVector<16>::SuperVector(int16_t const other)
|
||||
{
|
||||
u.v128[0] = _mm_set1_epi16(other);
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<16>::SuperVector<uint16_t>(uint16_t const other)
|
||||
really_inline SuperVector<16>::SuperVector(uint16_t const other)
|
||||
{
|
||||
u.v128[0] = _mm_set1_epi16(static_cast<int16_t>(other));
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<16>::SuperVector<int32_t>(int32_t const other)
|
||||
really_inline SuperVector<16>::SuperVector(int32_t const other)
|
||||
{
|
||||
u.v128[0] = _mm_set1_epi32(other);
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<16>::SuperVector<uint32_t>(uint32_t const other)
|
||||
really_inline SuperVector<16>::SuperVector(uint32_t const other)
|
||||
{
|
||||
u.v128[0] = _mm_set1_epi32(static_cast<int32_t>(other));
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<16>::SuperVector<int64_t>(int64_t const other)
|
||||
really_inline SuperVector<16>::SuperVector(int64_t const other)
|
||||
{
|
||||
u.v128[0] = _mm_set1_epi64x(other);
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<16>::SuperVector<uint64_t>(uint64_t const other)
|
||||
really_inline SuperVector<16>::SuperVector(uint64_t const other)
|
||||
{
|
||||
u.v128[0] = _mm_set1_epi64x(static_cast<int64_t>(other));
|
||||
}
|
||||
@ -608,56 +608,56 @@ really_inline SuperVector<32>::SuperVector(SuperVector<16> const lo, SuperVector
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<32>::SuperVector<int8_t>(int8_t const other)
|
||||
really_inline SuperVector<32>::SuperVector(int8_t const other)
|
||||
{
|
||||
u.v256[0] = _mm256_set1_epi8(other);
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<32>::SuperVector<uint8_t>(uint8_t const other)
|
||||
really_inline SuperVector<32>::SuperVector(uint8_t const other)
|
||||
{
|
||||
u.v256[0] = _mm256_set1_epi8(static_cast<int8_t>(other));
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<32>::SuperVector<int16_t>(int16_t const other)
|
||||
really_inline SuperVector<32>::SuperVector(int16_t const other)
|
||||
{
|
||||
u.v256[0] = _mm256_set1_epi16(other);
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<32>::SuperVector<uint16_t>(uint16_t const other)
|
||||
really_inline SuperVector<32>::SuperVector(uint16_t const other)
|
||||
{
|
||||
u.v256[0] = _mm256_set1_epi16(static_cast<int16_t>(other));
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<32>::SuperVector<int32_t>(int32_t const other)
|
||||
really_inline SuperVector<32>::SuperVector(int32_t const other)
|
||||
{
|
||||
u.v256[0] = _mm256_set1_epi32(other);
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<32>::SuperVector<uint32_t>(uint32_t const other)
|
||||
really_inline SuperVector<32>::SuperVector(uint32_t const other)
|
||||
{
|
||||
u.v256[0] = _mm256_set1_epi32(static_cast<int32_t>(other));
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<32>::SuperVector<int64_t>(int64_t const other)
|
||||
really_inline SuperVector<32>::SuperVector(int64_t const other)
|
||||
{
|
||||
u.v256[0] = _mm256_set1_epi64x(other);
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<32>::SuperVector<uint64_t>(uint64_t const other)
|
||||
really_inline SuperVector<32>::SuperVector(uint64_t const other)
|
||||
{
|
||||
u.v256[0] = _mm256_set1_epi64x(static_cast<int64_t>(other));
|
||||
}
|
||||
@ -804,7 +804,7 @@ really_inline SuperVector<32> SuperVector<32>::vshl_128_imm() const
|
||||
|
||||
template <>
|
||||
template<uint8_t N>
|
||||
really_inline SuperVector<16> SuperVector<32>::vshl_256_imm() const
|
||||
really_inline SuperVector<32> SuperVector<32>::vshl_256_imm() const
|
||||
{
|
||||
if (N == 0) return *this;
|
||||
if (N == 16) return {_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0))};
|
||||
@ -950,11 +950,11 @@ really_inline SuperVector<32> SuperVector<32>::vshl_256(uint8_t const N) const
|
||||
SuperVector result;
|
||||
Unroller<1, 16>::iterator([&,v=this](auto const i) {
|
||||
constexpr uint8_t n = i.value;
|
||||
if (N == n) result = {_mm256_alignr_epi8(u.v256[0], _mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), 16 - n)};;
|
||||
if (N == n) result = {_mm256_alignr_epi8(u.v256[0], _mm256_permute2x128_si256(v->u.v256[0], v->u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), 16 - n)};;
|
||||
});
|
||||
Unroller<17, 32>::iterator([&,v=this](auto const i) {
|
||||
constexpr uint8_t n = i.value;
|
||||
if (N == n) result = {_mm256_slli_si256(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), n - 16)};
|
||||
if (N == n) result = {_mm256_slli_si256(_mm256_permute2x128_si256(v->u.v256[0], v->u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), n - 16)};
|
||||
});
|
||||
return result;
|
||||
}
|
||||
@ -1240,56 +1240,56 @@ really_inline SuperVector<64>::SuperVector(m128 const v)
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<64>::SuperVector<int8_t>(int8_t const o)
|
||||
really_inline SuperVector<64>::SuperVector(int8_t const o)
|
||||
{
|
||||
u.v512[0] = _mm512_set1_epi8(o);
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<64>::SuperVector<uint8_t>(uint8_t const o)
|
||||
really_inline SuperVector<64>::SuperVector(uint8_t const o)
|
||||
{
|
||||
u.v512[0] = _mm512_set1_epi8(static_cast<int8_t>(o));
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<64>::SuperVector<int16_t>(int16_t const o)
|
||||
really_inline SuperVector<64>::SuperVector(int16_t const o)
|
||||
{
|
||||
u.v512[0] = _mm512_set1_epi16(o);
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<64>::SuperVector<uint16_t>(uint16_t const o)
|
||||
really_inline SuperVector<64>::SuperVector(uint16_t const o)
|
||||
{
|
||||
u.v512[0] = _mm512_set1_epi16(static_cast<int16_t>(o));
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<64>::SuperVector<int32_t>(int32_t const o)
|
||||
really_inline SuperVector<64>::SuperVector(int32_t const o)
|
||||
{
|
||||
u.v512[0] = _mm512_set1_epi32(o);
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<64>::SuperVector<uint32_t>(uint32_t const o)
|
||||
really_inline SuperVector<64>::SuperVector(uint32_t const o)
|
||||
{
|
||||
u.v512[0] = _mm512_set1_epi32(static_cast<int32_t>(o));
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<64>::SuperVector<int64_t>(int64_t const o)
|
||||
really_inline SuperVector<64>::SuperVector(int64_t const o)
|
||||
{
|
||||
u.v512[0] = _mm512_set1_epi64(o);
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
really_inline SuperVector<64>::SuperVector<uint64_t>(uint64_t const o)
|
||||
really_inline SuperVector<64>::SuperVector(uint64_t const o)
|
||||
{
|
||||
u.v512[0] = _mm512_set1_epi64(static_cast<int64_t>(o));
|
||||
}
|
||||
|
@ -165,7 +165,7 @@ public:
|
||||
typename BaseVector<32>::type ALIGN_ATTR(BaseVector<32>::size) v256[SIZE / BaseVector<32>::size];
|
||||
typename BaseVector<64>::type ALIGN_ATTR(BaseVector<64>::size) v512[SIZE / BaseVector<64>::size];
|
||||
|
||||
#if defined(ARCH_ARM32) || defined(ARCH_AARCH64)
|
||||
#if defined(ARCH_ARM32) || defined(ARCH_AARCH64) || defined(ARCH_PPC64EL)
|
||||
uint64x2_t ALIGN_ATTR(BaseVector<16>::size) u64x2[SIZE / BaseVector<16>::size];
|
||||
int64x2_t ALIGN_ATTR(BaseVector<16>::size) s64x2[SIZE / BaseVector<16>::size];
|
||||
uint32x4_t ALIGN_ATTR(BaseVector<16>::size) u32x4[SIZE / BaseVector<16>::size];
|
||||
@ -176,17 +176,6 @@ public:
|
||||
int8x16_t ALIGN_ATTR(BaseVector<16>::size) s8x16[SIZE / BaseVector<16>::size];
|
||||
#endif
|
||||
|
||||
#if defined(ARCH_PPC64EL)
|
||||
__vector uint64_t ALIGN_ATTR(BaseVector<16>::size) u64x2[SIZE / BaseVector<16>::size];
|
||||
__vector int64_t ALIGN_ATTR(BaseVector<16>::size) s64x2[SIZE / BaseVector<16>::size];
|
||||
__vector uint32_t ALIGN_ATTR(BaseVector<16>::size) u32x4[SIZE / BaseVector<16>::size];
|
||||
__vector int32_t ALIGN_ATTR(BaseVector<16>::size) s32x4[SIZE / BaseVector<16>::size];
|
||||
__vector uint16_t ALIGN_ATTR(BaseVector<16>::size) u16x8[SIZE / BaseVector<16>::size];
|
||||
__vector int16_t ALIGN_ATTR(BaseVector<16>::size) s16x8[SIZE / BaseVector<16>::size];
|
||||
__vector uint8_t ALIGN_ATTR(BaseVector<16>::size) u8x16[SIZE / BaseVector<16>::size];
|
||||
__vector int8_t ALIGN_ATTR(BaseVector<16>::size) s8x16[SIZE / BaseVector<16>::size];
|
||||
#endif
|
||||
|
||||
uint64_t u64[SIZE / sizeof(uint64_t)];
|
||||
int64_t s64[SIZE / sizeof(int64_t)];
|
||||
uint32_t u32[SIZE / sizeof(uint32_t)];
|
||||
@ -200,7 +189,7 @@ public:
|
||||
} u;
|
||||
|
||||
constexpr SuperVector() {};
|
||||
constexpr SuperVector(SuperVector const &other)
|
||||
SuperVector(SuperVector const &other)
|
||||
:u(other.u) {};
|
||||
SuperVector(typename base_type::type const v);
|
||||
|
||||
|
@ -667,7 +667,7 @@ TEST(SimdUtilsTest, movq) {
|
||||
simd = _mm_set_epi64x(~0LL, 0x123456789abcdef);
|
||||
#elif defined(ARCH_ARM32) || defined(ARCH_AARCH64)
|
||||
int64x2_t a = { 0x123456789abcdefLL, ~0LL };
|
||||
simd = vreinterpretq_s64_s8(a);
|
||||
simd = vreinterpretq_s32_s64(a);
|
||||
#elif defined(ARCH_PPC64EL)
|
||||
int64x2_t a = {0x123456789abcdefLL, ~0LL };
|
||||
simd = (m128) a;
|
||||
|
@ -33,9 +33,6 @@ SET(corpusomatic_SRCS
|
||||
ng_find_matches.cpp
|
||||
)
|
||||
add_library(corpusomatic STATIC ${corpusomatic_SRCS})
|
||||
if (ARCH_IA32 OR ARCH_X86_64)
|
||||
set_target_properties(corpusomatic PROPERTIES COMPILE_FLAGS "-mssse3")
|
||||
endif ()
|
||||
|
||||
set(databaseutil_SRCS
|
||||
database_util.cpp
|
||||
|
Loading…
x
Reference in New Issue
Block a user