Merge pull request #81 from VectorCamp/feature/add-clang-support

Feature/add clang support
This commit is contained in:
Konstantinos Margaritis 2021-12-07 22:16:38 +02:00 committed by GitHub
commit 1718e33544
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 1144 additions and 302 deletions

View File

@ -3,7 +3,7 @@ project (vectorscan C CXX)
set (HS_MAJOR_VERSION 5)
set (HS_MINOR_VERSION 4)
set (HS_PATCH_VERSION 3)
set (HS_PATCH_VERSION 5)
set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION})
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
@ -128,11 +128,9 @@ CMAKE_DEPENDENT_OPTION(DUMP_SUPPORT "Dump code support; normally on, except in r
CMAKE_DEPENDENT_OPTION(DISABLE_ASSERTS "Disable assert(); Asserts are enabled in debug builds, disabled in release builds" OFF "NOT RELEASE_BUILD" ON)
option(BUILD_AVX512 "Experimental: support avx512 in the fat runtime"
OFF)
option(BUILD_AVX512 "Experimental: support avx512 in the fat runtime" OFF)
option(BUILD_AVX512VBMI "Experimental: support avx512vbmi in the fat runtime"
OFF)
option(BUILD_AVX512VBMI "Experimental: support avx512vbmi in the fat runtime" OFF)
if (BUILD_AVX512VBMI)
set(BUILD_AVX512 ON)
@ -140,47 +138,95 @@ endif ()
# TODO: per platform config files?
# remove CMake's idea of optimisation
foreach (CONFIG ${CMAKE_BUILD_TYPE} ${CMAKE_CONFIGURATION_TYPES})
string(REGEX REPLACE "-O[^ ]*" "" CMAKE_C_FLAGS_${CONFIG} "${CMAKE_C_FLAGS_${CONFIG}}")
string(REGEX REPLACE "-O[^ ]*" "" CMAKE_CXX_FLAGS_${CONFIG} "${CMAKE_CXX_FLAGS_${CONFIG}}")
endforeach ()
# remove CMake's idea of optimisation
foreach (CONFIG ${CMAKE_BUILD_TYPE} ${CMAKE_CONFIGURATION_TYPES})
string(REGEX REPLACE "-O[^ ]*" "" CMAKE_C_FLAGS_${CONFIG} "${CMAKE_C_FLAGS_${CONFIG}}")
string(REGEX REPLACE "-O[^ ]*" "" CMAKE_CXX_FLAGS_${CONFIG} "${CMAKE_CXX_FLAGS_${CONFIG}}")
endforeach ()
if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE_AARCH64 AND NOT ARCH_PPC64EL)
message(STATUS "gcc version ${CMAKE_C_COMPILER_VERSION}")
# If gcc doesn't recognise the host cpu, then mtune=native becomes
# generic, which isn't very good in some cases. march=native looks at
# cpuid info and then chooses the best microarch it can (and replaces
# the flag), so use that for tune.
if (CMAKE_C_COMPILER_ID MATCHES "Intel")
set(SKYLAKE_FLAG "-xCORE-AVX512")
else ()
set(SKYLAKE_FLAG "-march=skylake-avx512")
set(ICELAKE_FLAG "-march=icelake-server")
endif ()
# arg1 might exist if using ccache
string (STRIP "${CMAKE_C_COMPILER_ARG1}" CC_ARG1)
set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -march=native -mtune=native)
execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS}
OUTPUT_VARIABLE _GCC_OUTPUT)
string(FIND "${_GCC_OUTPUT}" "march" POS)
string(SUBSTRING "${_GCC_OUTPUT}" ${POS} -1 _GCC_OUTPUT)
string(REGEX REPLACE "march=[ \t]*([^ \n]*)[ \n].*" "\\1"
GNUCC_ARCH "${_GCC_OUTPUT}")
if(ARCH_PPC64EL)
set(ARCH_FLAG mcpu)
else()
set(ARCH_FLAG march)
endif()
if (ARCH_IA32 OR ARCH_X86_64)
# test the parsed flag
set (EXEC_ARGS ${CC_ARG1} -E - -mtune=${GNUCC_ARCH})
execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS}
OUTPUT_QUIET ERROR_QUIET
INPUT_FILE /dev/null
RESULT_VARIABLE GNUCC_TUNE_TEST)
if (NOT GNUCC_TUNE_TEST EQUAL 0)
message(SEND_ERROR "Something went wrong determining gcc tune: -mtune=${GNUCC_ARCH} not valid")
endif()
set(TUNE_FLAG ${GNUCC_ARCH})
else()
set(TUNE_FLAG native)
endif()
elseif (NOT TUNE_FLAG)
# Detect best GNUCC_ARCH to tune for
if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE)
message(STATUS "gcc version ${CMAKE_C_COMPILER_VERSION}")
# If gcc doesn't recognise the host cpu, then mtune=native becomes
# generic, which isn't very good in some cases. march=native looks at
# cpuid info and then chooses the best microarch it can (and replaces
# the flag), so use that for tune.
# arg1 might exist if using ccache
string (STRIP "${CMAKE_C_COMPILER_ARG1}" CC_ARG1)
set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -${ARCH_FLAG}=native -mtune=native)
execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS}
OUTPUT_VARIABLE _GCC_OUTPUT)
string(FIND "${_GCC_OUTPUT}" "${ARCH_FLAG}" POS)
string(SUBSTRING "${_GCC_OUTPUT}" ${POS} -1 _GCC_OUTPUT)
string(REGEX REPLACE "${ARCH_FLAG}=[ \t]*([^ \n]*)[ \n].*" "\\1" GNUCC_ARCH "${_GCC_OUTPUT}")
# test the parsed flag
set (EXEC_ARGS ${CC_ARG1} -E - -mtune=${GNUCC_ARCH})
execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS}
OUTPUT_QUIET ERROR_QUIET
INPUT_FILE /dev/null
RESULT_VARIABLE GNUCC_TUNE_TEST)
if (NOT GNUCC_TUNE_TEST EQUAL 0)
message(WARNING "Something went wrong determining gcc tune: -mtune=${GNUCC_ARCH} not valid, falling back to -mtune=native")
set(TUNE_FLAG native)
else()
set(TUNE_FLAG ${GNUCC_ARCH})
message(STATUS "gcc will tune for ${GNUCC_ARCH}, ${TUNE_FLAG}")
endif()
elseif (CMAKE_COMPILER_IS_CLANG AND NOT CROSS_COMPILE)
if (ARCH_IA32 OR ARCH_X86_64)
set(GNUCC_ARCH native)
set(TUNE_FLAG generic)
elseif(ARCH_AARCH64)
set(GNUCC_ARCH armv8)
set(TUNE_FLAG generic)
elseif(ARCH_ARM32)
set(GNUCC_ARCH armv7a)
set(TUNE_FLAG generic)
else()
set(GNUCC_ARCH native)
set(TUNE_FLAG generic)
endif()
message(STATUS "clang will tune for ${GNUCC_ARCH}, ${TUNE_FLAG}")
elseif (CROSS_COMPILE)
set(GNUCC_ARCH generic)
set(TUNE_FLAG generic)
endif()
if (ARCH_IA32 OR ARCH_X86_64)
if (NOT FAT_RUNTIME)
if (BUILD_AVX512)
set(ARCH_C_FLAGS "${SKYLAKE_FLAG}")
set(ARCH_CXX_FLAGS "${SKYLAKE_FLAG}")
elseif (BUILD_AVX2)
set(ARCH_C_FLAGS "-mavx2")
set(ARCH_CXX_FLAGS "-mavx2")
else()
set(ARCH_C_FLAGS "-msse4.2")
set(ARCH_CXX_FLAGS "-msse4.2")
endif()
else()
set(ARCH_C_FLAGS "-msse4.2")
set(ARCH_CXX_FLAGS "-msse4.2")
endif()
endif()
if (ARCH_AARCH64)
if (BUILD_SVE2_BITPERM)
set(GNUCC_ARCH "${GNUCC_ARCH}+sve2-bitperm")
elseif (BUILD_SVE2)
@ -188,92 +234,89 @@ endif ()
elseif (BUILD_SVE)
set(GNUCC_ARCH "${GNUCC_ARCH}+sve")
endif ()
endif(ARCH_AARCH64)
# compiler version checks TODO: test more compilers
if (CMAKE_COMPILER_IS_GNUCXX)
set(GNUCXX_MINVER "4.8.1")
message(STATUS "g++ version ${CMAKE_CXX_COMPILER_VERSION}")
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS GNUCXX_MINVER)
message(FATAL_ERROR "A minimum of g++ ${GNUCXX_MINVER} is required for C++11 support")
endif()
endif()
if(RELEASE_BUILD)
if (NOT CMAKE_BUILD_TYPE MATCHES MINSIZEREL)
set(OPT_C_FLAG "-O3")
set(OPT_CXX_FLAG "-O3")
else ()
set(OPT_C_FLAG "-Os")
set(OPT_CXX_FLAG "-Os")
endif ()
else()
set(OPT_C_FLAG "-O0")
set(OPT_CXX_FLAG "-O0")
endif(RELEASE_BUILD)
# set compiler flags - more are tested and added later
set(EXTRA_C_FLAGS "${OPT_C_FLAG} -std=c17 -Wall -Wextra -Wshadow -Wcast-qual -fno-strict-aliasing")
set(EXTRA_CXX_FLAGS "${OPT_CXX_FLAG} -std=c++17 -Wall -Wextra -Wshadow -Wswitch -Wreturn-type -Wcast-qual -Wno-deprecated -Wnon-virtual-dtor -fno-strict-aliasing -fno-new-ttp-matching")
if (NOT RELEASE_BUILD)
# -Werror is most useful during development, don't potentially break
# release builds
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Werror")
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Werror")
endif()
if (DISABLE_ASSERTS)
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -DNDEBUG")
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -DNDEBUG")
endif()
set(ARCH_C_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -mtune=${TUNE_FLAG} ${ARCH_C_FLAGS}")
set(ARCH_CXX_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -mtune=${TUNE_FLAG} ${ARCH_CXX_FLAGS}")
#if (ARCH_IA32 OR ARCH_X86_64 OR ARCH_ARM32 OR ARCH_AARCH64)
# if (NOT CMAKE_C_FLAGS MATCHES .*march.* AND NOT CMAKE_C_FLAGS MATCHES .*mtune.*)
# set(ARCH_C_FLAGS "-march=${GNUCC_ARCH} -mtune=${TUNE_FLAG}")
# endif()
# if (NOT CMAKE_CXX_FLAGS MATCHES .*march.* AND NOT CMAKE_CXX_FLAGS MATCHES .*mtune.*)
# set(ARCH_CXX_FLAGS "-march=${GNUCC_ARCH} -mtune=${TUNE_FLAG}")
# endif()
#endif()
if (ARCH_IA32 OR ARCH_X86_64 OR ARCH_ARM32 OR ARCH_AARCH64)
if (NOT CMAKE_C_FLAGS MATCHES .*march.* AND NOT CMAKE_C_FLAGS MATCHES .*mtune.*)
set(ARCH_C_FLAGS "-march=${GNUCC_ARCH} -mtune=${TUNE_FLAG}")
endif()
if (NOT CMAKE_CXX_FLAGS MATCHES .*march.* AND NOT CMAKE_CXX_FLAGS MATCHES .*mtune.*)
set(ARCH_CXX_FLAGS "-march=${GNUCC_ARCH} -mtune=${TUNE_FLAG}")
endif()
endif()
if(ARCH_PPC64EL)
if (NOT CMAKE_C_FLAGS MATCHES .*march.* AND NOT CMAKE_C_FLAGS MATCHES .*mtune.*)
set(ARCH_C_FLAGS "-mtune=${TUNE_FLAG}")
endif()
if (NOT CMAKE_CXX_FLAGS MATCHES .*march.* AND NOT CMAKE_CXX_FLAGS MATCHES .*mtune.*)
set(ARCH_CXX_FLAGS "-mtune=${TUNE_FLAG}")
endif()
endif()
#if(ARCH_PPC64EL)
# if (NOT CMAKE_C_FLAGS MATCHES .*march.* AND NOT CMAKE_C_FLAGS MATCHES .*mtune.*)
# set(ARCH_C_FLAGS "-mtune=${TUNE_FLAG}")
# endif()
# if (NOT CMAKE_CXX_FLAGS MATCHES .*march.* AND NOT CMAKE_CXX_FLAGS MATCHES .*mtune.*)
# set(ARCH_CXX_FLAGS "-mtune=${TUNE_FLAG}")
# endif()
#endif()
if(CMAKE_COMPILER_IS_GNUCC)
# spurious warnings?
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-array-bounds -Wno-maybe-uninitialized")
# compiler version checks TODO: test more compilers
if (CMAKE_COMPILER_IS_GNUCXX)
set(GNUCXX_MINVER "9")
message(STATUS "g++ version ${CMAKE_CXX_COMPILER_VERSION}")
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS GNUCXX_MINVER)
message(FATAL_ERROR "A minimum of g++ ${GNUCXX_MINVER} is required for C++17 support")
endif()
endif()
if(CMAKE_COMPILER_IS_GNUCXX)
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-maybe-uninitialized")
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.0)
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fabi-version=0")
endif ()
# don't complain about abi
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-abi")
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-abi")
endif()
if (NOT(ARCH_IA32 AND RELEASE_BUILD))
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -fno-omit-frame-pointer")
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fno-omit-frame-pointer")
endif()
if (CMAKE_C_COMPILER_ID MATCHES "Intel")
set(SKYLAKE_FLAG "-xCORE-AVX512")
if(RELEASE_BUILD)
if (NOT CMAKE_BUILD_TYPE MATCHES MINSIZEREL)
set(OPT_C_FLAG "-O3")
set(OPT_CXX_FLAG "-O3")
else ()
set(SKYLAKE_FLAG "-march=skylake-avx512")
set(ICELAKE_FLAG "-march=icelake-server")
set(OPT_C_FLAG "-Os")
set(OPT_CXX_FLAG "-Os")
endif ()
else()
set(OPT_C_FLAG "-O0")
set(OPT_CXX_FLAG "-O0")
endif(RELEASE_BUILD)
# set compiler flags - more are tested and added later
set(EXTRA_C_FLAGS "${OPT_C_FLAG} -std=c17 -Wall -Wextra -Wshadow -Wcast-qual -fno-strict-aliasing")
set(EXTRA_CXX_FLAGS "${OPT_CXX_FLAG} -std=c++17 -Wall -Wextra -Wshadow -Wswitch -Wreturn-type -Wcast-qual -Wno-deprecated -Wnon-virtual-dtor -fno-strict-aliasing")
if (NOT CMAKE_COMPILER_IS_CLANG)
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fno-new-ttp-matching")
endif()
if (NOT RELEASE_BUILD)
# -Werror is most useful during development, don't potentially break
# release builds
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Werror")
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Werror")
endif()
if (DISABLE_ASSERTS)
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -DNDEBUG")
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -DNDEBUG")
endif()
if(CMAKE_COMPILER_IS_GNUCC)
# spurious warnings?
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-array-bounds -Wno-maybe-uninitialized")
endif()
if(CMAKE_COMPILER_IS_GNUCXX)
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-maybe-uninitialized")
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.0)
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fabi-version=0")
endif ()
# don't complain about abi
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-abi")
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-abi")
endif()
if (NOT(ARCH_IA32 AND RELEASE_BUILD))
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -fno-omit-frame-pointer")
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fno-omit-frame-pointer")
endif()
CHECK_INCLUDE_FILES(unistd.h HAVE_UNISTD_H)
if (ARCH_IA32 OR ARCH_X86_64)
@ -289,8 +332,6 @@ elseif (ARCH_ARM32 OR ARCH_AARCH64)
message(FATAL_ERROR "arm_sve.h is required to build for SVE.")
endif()
endif()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -flax-vector-conversions")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -flax-vector-conversions")
elseif (ARCH_PPC64EL)
CHECK_INCLUDE_FILE_CXX(altivec.h HAVE_C_PPC64EL_ALTIVEC_H)
endif()
@ -318,8 +359,7 @@ if (CMAKE_SYSTEM_NAME MATCHES "Linux")
# This is a Linux-only feature for now - requires platform support
# elsewhere
message(STATUS "generator is ${CMAKE_GENERATOR}")
if (CMAKE_C_COMPILER_ID MATCHES "Clang" AND
CMAKE_C_COMPILER_VERSION VERSION_LESS "3.9")
if (CMAKE_C_COMPILER_IS_CLANG AND CMAKE_C_COMPILER_VERSION VERSION_LESS "3.9")
message (STATUS "Clang v3.9 or higher required for fat runtime, cannot build fat runtime")
set (FAT_RUNTIME_REQUISITES FALSE)
elseif (NOT (CMAKE_GENERATOR MATCHES "Unix Makefiles" OR
@ -343,7 +383,10 @@ include (${CMAKE_MODULE_PATH}/arch.cmake)
# testing a builtin takes a little more work
CHECK_C_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CC_BUILTIN_ASSUME_ALIGNED)
CHECK_CXX_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CXX_BUILTIN_ASSUME_ALIGNED)
CHECK_C_SOURCE_COMPILES("int main(void) { __builtin_constant_p(0); }" HAVE__BUILTIN_CONSTANT_P)
# Clang does not use __builtin_constant_p() the same way as gcc
if (NOT CMAKE_COMPILER_IS_CLANG)
CHECK_C_SOURCE_COMPILES("int main(void) { __builtin_constant_p(0); }" HAVE__BUILTIN_CONSTANT_P)
endif()
set(C_FLAGS_TO_CHECK
# Variable length arrays are way bad, most especially at run time
@ -442,19 +485,22 @@ if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
set(FREEBSD true)
endif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
if (NOT FAT_RUNTIME)
if (CROSS_COMPILE_AARCH64)
if (FAT_RUNTIME)
if (NOT (ARCH_IA32 OR ARCH_X86_64))
message(FATAL_ERROR "Fat runtime is not supported on non-Intel architectures")
else()
message(STATUS "Building runtime for multiple microarchitectures")
endif()
else()
if (CROSS_COMPILE)
message(STATUS "Building for target CPU: ${ARCH_C_FLAGS}")
else()
message(STATUS "Building for current host CPU: ${ARCH_C_FLAGS}")
endif()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARCH_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_CXX_FLAGS}")
else()
message(STATUS "Building runtime for multiple microarchitectures")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
endif()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARCH_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_CXX_FLAGS}")
add_subdirectory(util)
add_subdirectory(doc/dev-reference)
@ -1171,10 +1217,6 @@ if (NOT FAT_RUNTIME)
set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C)
add_library(hs_compile OBJECT ${hs_compile_SRCS})
if (ARCH_IA32)
set_target_properties(hs_compile PROPERTIES COMPILE_FLAGS "-mssse3")
endif (ARCH_IA32)
add_library(hs STATIC
src/hs_version.c
src/hs_valid_platform.c
@ -1205,14 +1247,14 @@ else (FAT_RUNTIME)
add_library(hs_exec_core2 OBJECT ${hs_exec_SRCS})
list(APPEND RUNTIME_LIBS $<TARGET_OBJECTS:hs_exec_core2>)
set_target_properties(hs_exec_core2 PROPERTIES
COMPILE_FLAGS "-march=core2"
COMPILE_FLAGS "-march=core2 -msse4.2"
RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} core2 ${CMAKE_MODULE_PATH}/keep.syms.in"
)
add_library(hs_exec_corei7 OBJECT ${hs_exec_SRCS})
list(APPEND RUNTIME_LIBS $<TARGET_OBJECTS:hs_exec_corei7>)
set_target_properties(hs_exec_corei7 PROPERTIES
COMPILE_FLAGS "-march=corei7 -mssse3"
COMPILE_FLAGS "-march=corei7 -msse4.2"
RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} corei7 ${CMAKE_MODULE_PATH}/keep.syms.in"
)
@ -1254,10 +1296,6 @@ else (FAT_RUNTIME)
${RUNTIME_LIBS})
set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C)
add_library(hs_compile OBJECT ${hs_compile_SRCS})
if (ARCH_IA32 OR ARCH_X86_64)
set_target_properties(hs_exec_common PROPERTIES COMPILE_FLAGS "-mssse3")
set_target_properties(hs_compile PROPERTIES COMPILE_FLAGS "-mssse3")
endif ()
# we want the static lib for testing
add_library(hs STATIC src/hs_version.c src/hs_valid_platform.c
@ -1274,14 +1312,14 @@ else (FAT_RUNTIME)
add_library(hs_exec_shared_core2 OBJECT ${hs_exec_SRCS})
list(APPEND RUNTIME_SHLIBS $<TARGET_OBJECTS:hs_exec_shared_core2>)
set_target_properties(hs_exec_shared_core2 PROPERTIES
COMPILE_FLAGS "-march=core2"
COMPILE_FLAGS "-march=core2 -msse4.2"
POSITION_INDEPENDENT_CODE TRUE
RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} core2 ${CMAKE_MODULE_PATH}/keep.syms.in"
)
add_library(hs_exec_shared_corei7 OBJECT ${hs_exec_SRCS})
list(APPEND RUNTIME_SHLIBS $<TARGET_OBJECTS:hs_exec_shared_corei7>)
set_target_properties(hs_exec_shared_corei7 PROPERTIES
COMPILE_FLAGS "-march=corei7 -mssse3"
COMPILE_FLAGS "-march=corei7 -msse4.2"
POSITION_INDEPENDENT_CODE TRUE
RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} corei7 ${CMAKE_MODULE_PATH}/keep.syms.in"
)

606
Jenkinsfile vendored
View File

@ -1,22 +1,590 @@
pipeline {
agent {
node {
label 'x86'
}
}
stages {
stage('Release, SSE') {
agent {
node {
label 'x86'
agent none
stages {
stage("Build") {
failFast true
parallel {
stage("Release/SSE") {
agent { label "x86" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-release-SSE', buildType: 'Release', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=no -DBUILD_AVX512=no -DFAT_RUNTIME=no', installation: 'InSearchPath', steps: [[args: '--parallel 4', withCmake: true]]
}
}
stage("Unit Test") {
steps {
sh 'build-release-SSE/bin/unit-internal'
}
}
stage("Test") {
steps {
sh 'build-release-SSE/bin/unit-hyperscan'
}
}
}
}
stage("Release/AVX2") {
agent { label "x86" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-release-AVX2', buildType: 'Release', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=yes -DBUILD_AVX512=no -DFAT_RUNTIME=no', installation: 'InSearchPath', steps: [[args: '--parallel 4', withCmake: true]]
}
}
stage("Unit Test") {
steps {
sh 'build-release-AVX2/bin/unit-internal'
}
}
stage("Test") {
steps {
sh 'build-release-AVX2/bin/unit-hyperscan'
}
}
}
}
stage("Release/AVX512") {
agent { label "x86" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-release-AVX512', buildType: 'Release', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=yes -DBUILD_AVX512=yes -DFAT_RUNTIME=no', installation: 'InSearchPath', steps: [[args: '--parallel 4', withCmake: true]]
}
}
stage("Unit Test") {
steps {
sh 'build-release-AVX512/bin/unit-internal'
}
}
stage("Test") {
steps {
sh 'build-release-AVX512/bin/unit-hyperscan'
}
}
}
}
stage("Release/FAT") {
agent { label "x86" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-release-fat', buildType: 'Release', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=yes -DBUILD_AVX512=yes -DFAT_RUNTIME=yes', installation: 'InSearchPath', steps: [[args: '--parallel 4', withCmake: true]]
}
}
stage("Test") {
steps {
sh 'build-release-fat/bin/unit-hyperscan'
}
}
}
}
stage("Debug/SSE") {
agent { label "x86" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-debug-SSE', buildType: 'Debug', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=no -DBUILD_AVX512=no -DFAT_RUNTIME=no', installation: 'InSearchPath', steps: [[args: '--parallel 4', withCmake: true]]
}
}
stage("Unit Test") {
steps {
sh 'build-debug-SSE/bin/unit-internal'
}
}
stage("Test") {
steps {
sh 'build-debug-SSE/bin/unit-hyperscan'
}
}
}
}
stage("Debug/AVX2") {
agent { label "x86" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-debug-AVX2', buildType: 'Debug', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=yes -DBUILD_AVX512=no -DFAT_RUNTIME=no', installation: 'InSearchPath', steps: [[args: '--parallel 4', withCmake: true]]
}
}
stage("Unit Test") {
steps {
sh 'build-debug-AVX2/bin/unit-internal'
}
}
stage("Test") {
steps {
sh 'build-debug-AVX2/bin/unit-hyperscan'
}
}
}
}
stage("Debug/AVX512") {
agent { label "x86" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-debug-AVX512', buildType: 'Debug', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=yes -DBUILD_AVX512=yes -DFAT_RUNTIME=no', installation: 'InSearchPath', steps: [[args: '--parallel 4', withCmake: true]]
}
}
stage("Unit Test") {
steps {
sh 'build-debug-AVX512/bin/unit-internal'
}
}
stage("Test") {
steps {
sh 'build-debug-AVX512/bin/unit-hyperscan'
}
}
}
}
stage("Debug/FAT") {
agent { label "x86" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-debug-fat', buildType: 'Debug', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=yes -DBUILD_AVX512=yes -DFAT_RUNTIME=yes', installation: 'InSearchPath', steps: [[args: '--parallel 4', withCmake: true]]
}
}
stage("Test") {
steps {
sh 'build-debug-fat/bin/unit-hyperscan'
}
}
}
}
stage("Release/ARM") {
agent { label "arm" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-release-arm', buildType: 'Release', cleanBuild: true, cmakeArgs: '', installation: 'InSearchPath', steps: [[args: '--parallel 4', withCmake: true]]
}
}
stage("Unit Test") {
steps {
sh 'build-release-arm/bin/unit-internal'
}
}
stage("Test") {
steps {
sh 'build-release-arm/bin/unit-hyperscan'
}
}
}
}
stage("Debug/ARM") {
agent { label "arm" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-debug-arm', buildType: 'Debug', cleanBuild: true, cmakeArgs: '', installation: 'InSearchPath', steps: [[args: '--parallel 4', withCmake: true]]
}
}
stage("Unit Test") {
steps {
sh 'build-debug-arm/bin/unit-internal'
}
}
stage("Test") {
steps {
sh 'build-debug-arm/bin/unit-hyperscan'
}
}
}
}
stage("Release/Power") {
agent { label "power" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-release-power', buildType: 'Release', cleanBuild: true, cmakeArgs: '', installation: 'InSearchPath', steps: [[args: '--parallel 4', withCmake: true]]
}
}
stage("Unit Test") {
steps {
sh 'build-release-power/bin/unit-internal'
}
}
stage("Test") {
steps {
sh 'build-release-power/bin/unit-hyperscan'
}
}
}
}
stage("Debug/Power") {
agent { label "power" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-debug-power', buildType: 'Debug', cleanBuild: true, cmakeArgs: '', installation: 'InSearchPath', steps: [[args: '--parallel 4', withCmake: true]]
}
}
stage("Unit Test") {
steps {
sh 'build-debug-power/bin/unit-internal'
}
}
stage("Test") {
steps {
sh 'build-debug-power/bin/unit-hyperscan'
}
}
}
}
stage("Clang-Release/SSE") {
agent { label "x86" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-clang-release-SSE', buildType: 'Release', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=no -DBUILD_AVX512=no -DFAT_RUNTIME=no', installation: 'InSearchPath', steps: [[envVars: 'CC=clang CXX=clang++', args: '--parallel 4', withCmake: true]]
}
}
stage("Unit Test") {
steps {
sh 'build-clang-release-SSE/bin/unit-internal'
}
}
stage("Test") {
steps {
sh 'build-clang-release-SSE/bin/unit-hyperscan'
}
}
}
}
stage("Clang-Release/AVX2") {
agent { label "x86" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-clang-release-AVX2', buildType: 'Release', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=yes -DBUILD_AVX512=no -DFAT_RUNTIME=no', installation: 'InSearchPath', steps: [[envVars: 'CC=clang CXX=clang++', args: '--parallel 4', withCmake: true]]
}
}
stage("Unit Test") {
steps {
sh 'build-clang-release-AVX2/bin/unit-internal'
}
}
stage("Test") {
steps {
sh 'build-clang-release-AVX2/bin/unit-hyperscan'
}
}
}
}
stage("Clang-Release/AVX512") {
agent { label "x86" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-clang-release-AVX512', buildType: 'Release', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=yes -DBUILD_AVX512=yes -DFAT_RUNTIME=no', installation: 'InSearchPath', steps: [[envVars: 'CC=clang CXX=clang++', args: '--parallel 4', withCmake: true]]
}
}
stage("Unit Test") {
steps {
sh 'build-clang-release-AVX512/bin/unit-internal'
}
}
stage("Test") {
steps {
sh 'build-clang-release-AVX512/bin/unit-hyperscan'
}
}
}
}
stage("Clang-Release/FAT") {
agent { label "x86" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-clang-release-fat', buildType: 'Release', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=yes -DBUILD_AVX512=yes -DFAT_RUNTIME=yes', installation: 'InSearchPath', steps: [[envVars: 'CC=clang CXX=clang++', args: '--parallel 4', withCmake: true]]
}
}
stage("Test") {
steps {
sh 'build-clang-release-fat/bin/unit-hyperscan'
}
}
}
}
stage("Clang-Debug/SSE") {
agent { label "x86" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-clang-debug-SSE', buildType: 'Debug', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=no -DBUILD_AVX512=no -DFAT_RUNTIME=no', installation: 'InSearchPath', steps: [[envVars: 'CC=clang CXX=clang++', args: '--parallel 4', withCmake: true]]
}
}
stage("Unit Test") {
steps {
sh 'build-clang-debug-SSE/bin/unit-internal'
}
}
stage("Test") {
steps {
sh 'build-clang-debug-SSE/bin/unit-hyperscan'
}
}
}
}
stage("Clang-Debug/AVX2") {
agent { label "x86" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-clang-debug-AVX2', buildType: 'Debug', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=yes -DBUILD_AVX512=no -DFAT_RUNTIME=no', installation: 'InSearchPath', steps: [[envVars: 'CC=clang CXX=clang++', args: '--parallel 4', withCmake: true]]
}
}
stage("Unit Test") {
steps {
sh 'build-clang-debug-AVX2/bin/unit-internal'
}
}
stage("Test") {
steps {
sh 'build-clang-debug-AVX2/bin/unit-hyperscan'
}
}
}
}
stage("Clang-Debug/AVX512") {
agent { label "x86" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-clang-debug-AVX512', buildType: 'Debug', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=yes -DBUILD_AVX512=yes -DFAT_RUNTIME=no', installation: 'InSearchPath', steps: [[envVars: 'CC=clang CXX=clang++', args: '--parallel 4', withCmake: true]]
}
}
stage("Unit Test") {
steps {
sh 'build-clang-debug-AVX512/bin/unit-internal'
}
}
stage("Test") {
steps {
sh 'build-clang-debug-AVX512/bin/unit-hyperscan'
}
}
}
}
stage("Clang-Debug/FAT") {
agent { label "x86" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-clang-debug-fat', buildType: 'Debug', cleanBuild: true, cmakeArgs: '-DBUILD_AVX2=yes -DBUILD_AVX512=yes -DFAT_RUNTIME=yes', installation: 'InSearchPath', steps: [[envVars: 'CC=clang CXX=clang++', args: '--parallel 4', withCmake: true]]
}
}
stage("Test") {
steps {
sh 'build-clang-debug-fat/bin/unit-hyperscan'
}
}
}
}
stage("Clang-Release/ARM") {
agent { label "arm" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-clang-release-arm', buildType: 'Release', cleanBuild: true, cmakeArgs: '', installation: 'InSearchPath', steps: [[envVars: 'CC=clang CXX=clang++', args: '--parallel 4', withCmake: true]]
}
}
stage("Unit Test") {
steps {
sh 'build-clang-release-arm/bin/unit-internal'
}
}
stage("Test") {
steps {
sh 'build-clang-release-arm/bin/unit-hyperscan'
}
}
}
}
stage("Clang-Debug/ARM") {
agent { label "arm" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-debug-arm', buildType: 'Debug', cleanBuild: true, cmakeArgs: '', installation: 'InSearchPath', steps: [[envVars: 'CC=clang CXX=clang++', args: '--parallel 4', withCmake: true]]
}
}
stage("Unit Test") {
steps {
sh 'build-clang-debug-arm/bin/unit-internal'
}
}
stage("Test") {
steps {
sh 'build-clang-debug-arm/bin/unit-hyperscan'
}
}
}
}
stage("Clang-Release/Power") {
agent { label "power" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-clang-release-power', buildType: 'Release', cleanBuild: true, cmakeArgs: '', installation: 'InSearchPath', steps: [[envVars: 'CC=clang CXX=clang++', args: '--parallel 4', withCmake: true]]
}
}
stage("Unit Test") {
steps {
sh 'build-clang-release-power/bin/unit-internal'
}
}
stage("Test") {
steps {
sh 'build-clang-release-power/bin/unit-hyperscan'
}
}
}
}
stage("Clang-Debug/Power") {
agent { label "power" }
stages {
stage("Git checkout") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '${sha1}']], extensions: [], userRemoteConfigs: [[refspec: '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*', url: 'https://github.com/VectorCamp/vectorscan.git']]])
}
}
stage("Build") {
steps {
cmakeBuild buildDir: 'build-clang-debug-power', buildType: 'Debug', cleanBuild: true, cmakeArgs: '', installation: 'InSearchPath', steps: [[envVars: 'CC=clang CXX=clang++', args: '--parallel 4', withCmake: true]]
}
}
stage("Unit Test") {
steps {
sh 'build-clang-debug-power/bin/unit-internal'
}
}
stage("Test") {
steps {
sh 'build-clang-debug-power/bin/unit-hyperscan'
}
}
}
}
}
}
}
steps {
sh 'mkdir build-release-SSE && cmake -DCMAKE_BUILD_TYPE=Release -C build-release-SSE'
}
}
}
}
}

View File

@ -88,7 +88,7 @@ if (FAT_RUNTIME)
set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} ${SKYLAKE_FLAG}")
endif (BUILD_AVX512VBMI)
elseif (BUILD_AVX2)
set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} -march=core-avx2 -mavx")
set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} -march=core-avx2 -mavx2")
elseif ()
set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} -march=core-i7 -mssse3")
endif ()
@ -98,12 +98,12 @@ else (NOT FAT_RUNTIME)
endif ()
if (ARCH_IA32 OR ARCH_X86_64)
# ensure we have the minimum of SSSE3 - call a SSSE3 intrinsic
# ensure we have the minimum of SSE4.2 - call a SSE4.2 intrinsic
CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}>
int main() {
__m128i a = _mm_set1_epi8(1);
(void)_mm_shuffle_epi8(a, a);
}" HAVE_SSSE3)
}" HAVE_SSE42)
# now look for AVX2
CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}>
@ -157,8 +157,8 @@ else ()
endif ()
if (FAT_RUNTIME)
if ((ARCH_IA32 OR ARCH_X86_64) AND NOT HAVE_SSSE3)
message(FATAL_ERROR "SSSE3 support required to build fat runtime")
if ((ARCH_IA32 OR ARCH_X86_64) AND NOT HAVE_SSE42)
message(FATAL_ERROR "SSE4.2 support required to build fat runtime")
endif ()
if ((ARCH_IA32 OR ARCH_X86_64) AND BUILD_AVX2 AND NOT HAVE_AVX2)
message(FATAL_ERROR "AVX2 support required to build fat runtime")
@ -179,8 +179,8 @@ else (NOT FAT_RUNTIME)
if ((ARCH_IA32 OR ARCH_X86_64) AND NOT HAVE_AVX512VBMI)
message(STATUS "Building without AVX512VBMI support")
endif ()
if ((ARCH_IA32 OR ARCH_X86_64) AND NOT HAVE_SSSE3)
message(FATAL_ERROR "A minimum of SSSE3 compiler support is required")
if ((ARCH_IA32 OR ARCH_X86_64) AND NOT HAVE_SSE42)
message(FATAL_ERROR "A minimum of SSE4.2 compiler support is required")
endif ()
if ((ARCH_ARM32 OR ARCH_AARCH64) AND NOT HAVE_NEON)
message(FATAL_ERROR "NEON support required for ARM support")

View File

@ -1,3 +1,8 @@
# determine compiler
if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
set(CMAKE_COMPILER_IS_CLANG TRUE)
endif()
# determine the target arch
if (CROSS_COMPILE_AARCH64)
@ -10,7 +15,7 @@ else()
CHECK_C_SOURCE_COMPILES("#if !(defined(__i386__) || defined(_M_IX86))\n#error not 32bit\n#endif\nint main(void) { return 0; }" ARCH_IA32)
CHECK_C_SOURCE_COMPILES("#if !defined(__ARM_ARCH_ISA_A64)\n#error not 64bit\n#endif\nint main(void) { return 0; }" ARCH_AARCH64)
CHECK_C_SOURCE_COMPILES("#if !defined(__ARM_ARCH_ISA_ARM)\n#error not 32bit\n#endif\nint main(void) { return 0; }" ARCH_ARM32)
CHECK_C_SOURCE_COMPILES("#if !defined(__PPC64__) && !defined(__LITTLE_ENDIAN__) && !defined(__VSX__)\n#error not ppc64el\n#endif\nint main(void) { return 0; }" ARCH_PPC64EL)
CHECK_C_SOURCE_COMPILES("#if !defined(__PPC64__) && !(defined(__LITTLE_ENDIAN__) && defined(__VSX__))\n#error not ppc64el\n#endif\nint main(void) { return 0; }" ARCH_PPC64EL)
if (ARCH_X86_64 OR ARCH_AARCH64 OR ARCH_PPC64EL)
set(ARCH_64_BIT TRUE)
else()

View File

@ -122,24 +122,252 @@ m128 sub_2x64(m128 a, m128 b) {
return (m128) vsubq_u64((uint64x2_t)a, (uint64x2_t)b);
}
static really_really_inline
static really_inline
m128 lshift_m128(m128 a, unsigned b) {
return (m128) vshlq_n_u32((uint32x4_t)a, b);
#if defined(HAVE__BUILTIN_CONSTANT_P)
if (__builtin_constant_p(b)) {
return (m128) vshlq_n_u32((uint32x4_t)a, b);
}
#endif
#define CASE_LSHIFT_m128(a, offset) case offset: return (m128)vshlq_n_u32((uint32x4_t)(a), (offset)); break;
switch (b) {
case 0: return a; break;
CASE_LSHIFT_m128(a, 1);
CASE_LSHIFT_m128(a, 2);
CASE_LSHIFT_m128(a, 3);
CASE_LSHIFT_m128(a, 4);
CASE_LSHIFT_m128(a, 5);
CASE_LSHIFT_m128(a, 6);
CASE_LSHIFT_m128(a, 7);
CASE_LSHIFT_m128(a, 8);
CASE_LSHIFT_m128(a, 9);
CASE_LSHIFT_m128(a, 10);
CASE_LSHIFT_m128(a, 11);
CASE_LSHIFT_m128(a, 12);
CASE_LSHIFT_m128(a, 13);
CASE_LSHIFT_m128(a, 14);
CASE_LSHIFT_m128(a, 15);
CASE_LSHIFT_m128(a, 16);
CASE_LSHIFT_m128(a, 17);
CASE_LSHIFT_m128(a, 18);
CASE_LSHIFT_m128(a, 19);
CASE_LSHIFT_m128(a, 20);
CASE_LSHIFT_m128(a, 21);
CASE_LSHIFT_m128(a, 22);
CASE_LSHIFT_m128(a, 23);
CASE_LSHIFT_m128(a, 24);
CASE_LSHIFT_m128(a, 25);
CASE_LSHIFT_m128(a, 26);
CASE_LSHIFT_m128(a, 27);
CASE_LSHIFT_m128(a, 28);
CASE_LSHIFT_m128(a, 29);
CASE_LSHIFT_m128(a, 30);
CASE_LSHIFT_m128(a, 31);
default: return zeroes128(); break;
}
#undef CASE_LSHIFT_m128
}
static really_really_inline
m128 rshift_m128(m128 a, unsigned b) {
return (m128) vshrq_n_u32((uint32x4_t)a, b);
#if defined(HAVE__BUILTIN_CONSTANT_P)
if (__builtin_constant_p(b)) {
return (m128) vshrq_n_u32((uint32x4_t)a, b);
}
#endif
#define CASE_RSHIFT_m128(a, offset) case offset: return (m128)vshrq_n_u32((uint32x4_t)(a), (offset)); break;
switch (b) {
case 0: return a; break;
CASE_RSHIFT_m128(a, 1);
CASE_RSHIFT_m128(a, 2);
CASE_RSHIFT_m128(a, 3);
CASE_RSHIFT_m128(a, 4);
CASE_RSHIFT_m128(a, 5);
CASE_RSHIFT_m128(a, 6);
CASE_RSHIFT_m128(a, 7);
CASE_RSHIFT_m128(a, 8);
CASE_RSHIFT_m128(a, 9);
CASE_RSHIFT_m128(a, 10);
CASE_RSHIFT_m128(a, 11);
CASE_RSHIFT_m128(a, 12);
CASE_RSHIFT_m128(a, 13);
CASE_RSHIFT_m128(a, 14);
CASE_RSHIFT_m128(a, 15);
CASE_RSHIFT_m128(a, 16);
CASE_RSHIFT_m128(a, 17);
CASE_RSHIFT_m128(a, 18);
CASE_RSHIFT_m128(a, 19);
CASE_RSHIFT_m128(a, 20);
CASE_RSHIFT_m128(a, 21);
CASE_RSHIFT_m128(a, 22);
CASE_RSHIFT_m128(a, 23);
CASE_RSHIFT_m128(a, 24);
CASE_RSHIFT_m128(a, 25);
CASE_RSHIFT_m128(a, 26);
CASE_RSHIFT_m128(a, 27);
CASE_RSHIFT_m128(a, 28);
CASE_RSHIFT_m128(a, 29);
CASE_RSHIFT_m128(a, 30);
CASE_RSHIFT_m128(a, 31);
default: return zeroes128(); break;
}
#undef CASE_RSHIFT_m128
}
static really_really_inline
m128 lshift64_m128(m128 a, unsigned b) {
return (m128) vshlq_n_u64((uint64x2_t)a, b);
#if defined(HAVE__BUILTIN_CONSTANT_P)
if (__builtin_constant_p(b)) {
return (m128) vshlq_n_u64((uint64x2_t)a, b);
}
#endif
#define CASE_LSHIFT64_m128(a, offset) case offset: return (m128)vshlq_n_u64((uint64x2_t)(a), (offset)); break;
switch (b) {
case 0: return a; break;
CASE_LSHIFT64_m128(a, 1);
CASE_LSHIFT64_m128(a, 2);
CASE_LSHIFT64_m128(a, 3);
CASE_LSHIFT64_m128(a, 4);
CASE_LSHIFT64_m128(a, 5);
CASE_LSHIFT64_m128(a, 6);
CASE_LSHIFT64_m128(a, 7);
CASE_LSHIFT64_m128(a, 8);
CASE_LSHIFT64_m128(a, 9);
CASE_LSHIFT64_m128(a, 10);
CASE_LSHIFT64_m128(a, 11);
CASE_LSHIFT64_m128(a, 12);
CASE_LSHIFT64_m128(a, 13);
CASE_LSHIFT64_m128(a, 14);
CASE_LSHIFT64_m128(a, 15);
CASE_LSHIFT64_m128(a, 16);
CASE_LSHIFT64_m128(a, 17);
CASE_LSHIFT64_m128(a, 18);
CASE_LSHIFT64_m128(a, 19);
CASE_LSHIFT64_m128(a, 20);
CASE_LSHIFT64_m128(a, 21);
CASE_LSHIFT64_m128(a, 22);
CASE_LSHIFT64_m128(a, 23);
CASE_LSHIFT64_m128(a, 24);
CASE_LSHIFT64_m128(a, 25);
CASE_LSHIFT64_m128(a, 26);
CASE_LSHIFT64_m128(a, 27);
CASE_LSHIFT64_m128(a, 28);
CASE_LSHIFT64_m128(a, 29);
CASE_LSHIFT64_m128(a, 30);
CASE_LSHIFT64_m128(a, 31);
CASE_LSHIFT64_m128(a, 32);
CASE_LSHIFT64_m128(a, 33);
CASE_LSHIFT64_m128(a, 34);
CASE_LSHIFT64_m128(a, 35);
CASE_LSHIFT64_m128(a, 36);
CASE_LSHIFT64_m128(a, 37);
CASE_LSHIFT64_m128(a, 38);
CASE_LSHIFT64_m128(a, 39);
CASE_LSHIFT64_m128(a, 40);
CASE_LSHIFT64_m128(a, 41);
CASE_LSHIFT64_m128(a, 42);
CASE_LSHIFT64_m128(a, 43);
CASE_LSHIFT64_m128(a, 44);
CASE_LSHIFT64_m128(a, 45);
CASE_LSHIFT64_m128(a, 46);
CASE_LSHIFT64_m128(a, 47);
CASE_LSHIFT64_m128(a, 48);
CASE_LSHIFT64_m128(a, 49);
CASE_LSHIFT64_m128(a, 50);
CASE_LSHIFT64_m128(a, 51);
CASE_LSHIFT64_m128(a, 52);
CASE_LSHIFT64_m128(a, 53);
CASE_LSHIFT64_m128(a, 54);
CASE_LSHIFT64_m128(a, 55);
CASE_LSHIFT64_m128(a, 56);
CASE_LSHIFT64_m128(a, 57);
CASE_LSHIFT64_m128(a, 58);
CASE_LSHIFT64_m128(a, 59);
CASE_LSHIFT64_m128(a, 60);
CASE_LSHIFT64_m128(a, 61);
CASE_LSHIFT64_m128(a, 62);
CASE_LSHIFT64_m128(a, 63);
default: return zeroes128(); break;
}
#undef CASE_LSHIFT64_m128
}
static really_really_inline
m128 rshift64_m128(m128 a, unsigned b) {
return (m128) vshrq_n_u64((uint64x2_t)a, b);
#if defined(HAVE__BUILTIN_CONSTANT_P)
if (__builtin_constant_p(b)) {
return (m128) vshrq_n_u64((uint64x2_t)a, b);
}
#endif
#define CASE_RSHIFT64_m128(a, offset) case offset: return (m128)vshrq_n_u64((uint64x2_t)(a), (offset)); break;
switch (b) {
case 0: return a; break;
CASE_RSHIFT64_m128(a, 1);
CASE_RSHIFT64_m128(a, 2);
CASE_RSHIFT64_m128(a, 3);
CASE_RSHIFT64_m128(a, 4);
CASE_RSHIFT64_m128(a, 5);
CASE_RSHIFT64_m128(a, 6);
CASE_RSHIFT64_m128(a, 7);
CASE_RSHIFT64_m128(a, 8);
CASE_RSHIFT64_m128(a, 9);
CASE_RSHIFT64_m128(a, 10);
CASE_RSHIFT64_m128(a, 11);
CASE_RSHIFT64_m128(a, 12);
CASE_RSHIFT64_m128(a, 13);
CASE_RSHIFT64_m128(a, 14);
CASE_RSHIFT64_m128(a, 15);
CASE_RSHIFT64_m128(a, 16);
CASE_RSHIFT64_m128(a, 17);
CASE_RSHIFT64_m128(a, 18);
CASE_RSHIFT64_m128(a, 19);
CASE_RSHIFT64_m128(a, 20);
CASE_RSHIFT64_m128(a, 21);
CASE_RSHIFT64_m128(a, 22);
CASE_RSHIFT64_m128(a, 23);
CASE_RSHIFT64_m128(a, 24);
CASE_RSHIFT64_m128(a, 25);
CASE_RSHIFT64_m128(a, 26);
CASE_RSHIFT64_m128(a, 27);
CASE_RSHIFT64_m128(a, 28);
CASE_RSHIFT64_m128(a, 29);
CASE_RSHIFT64_m128(a, 30);
CASE_RSHIFT64_m128(a, 31);
CASE_RSHIFT64_m128(a, 32);
CASE_RSHIFT64_m128(a, 33);
CASE_RSHIFT64_m128(a, 34);
CASE_RSHIFT64_m128(a, 35);
CASE_RSHIFT64_m128(a, 36);
CASE_RSHIFT64_m128(a, 37);
CASE_RSHIFT64_m128(a, 38);
CASE_RSHIFT64_m128(a, 39);
CASE_RSHIFT64_m128(a, 40);
CASE_RSHIFT64_m128(a, 41);
CASE_RSHIFT64_m128(a, 42);
CASE_RSHIFT64_m128(a, 43);
CASE_RSHIFT64_m128(a, 44);
CASE_RSHIFT64_m128(a, 45);
CASE_RSHIFT64_m128(a, 46);
CASE_RSHIFT64_m128(a, 47);
CASE_RSHIFT64_m128(a, 48);
CASE_RSHIFT64_m128(a, 49);
CASE_RSHIFT64_m128(a, 50);
CASE_RSHIFT64_m128(a, 51);
CASE_RSHIFT64_m128(a, 52);
CASE_RSHIFT64_m128(a, 53);
CASE_RSHIFT64_m128(a, 54);
CASE_RSHIFT64_m128(a, 55);
CASE_RSHIFT64_m128(a, 56);
CASE_RSHIFT64_m128(a, 57);
CASE_RSHIFT64_m128(a, 58);
CASE_RSHIFT64_m128(a, 59);
CASE_RSHIFT64_m128(a, 60);
CASE_RSHIFT64_m128(a, 61);
CASE_RSHIFT64_m128(a, 62);
CASE_RSHIFT64_m128(a, 63);
default: return zeroes128(); break;
}
#undef CASE_RSHIFT64_m128
}
static really_inline m128 eq128(m128 a, m128 b) {
@ -191,9 +419,11 @@ m128 load_m128_from_u64a(const u64a *p) {
}
static really_inline u32 extract32from128(const m128 in, unsigned imm) {
#if defined(HS_OPTIMIZE)
return vgetq_lane_u32((uint32x4_t) in, imm);
#else
#if defined(HAVE__BUILTIN_CONSTANT_P)
if (__builtin_constant_p(imm)) {
return vgetq_lane_u32((uint32x4_t) in, imm);
}
#endif
switch (imm) {
case 0:
return vgetq_lane_u32((uint32x4_t) in, 0);
@ -211,13 +441,14 @@ static really_inline u32 extract32from128(const m128 in, unsigned imm) {
return 0;
break;
}
#endif
}
static really_inline u64a extract64from128(const m128 in, unsigned imm) {
#if defined(HS_OPTIMIZE)
return vgetq_lane_u64((uint64x2_t) in, imm);
#else
#if defined(HAVE__BUILTIN_CONSTANT_P)
if (__builtin_constant_p(imm)) {
return vgetq_lane_u64((uint64x2_t) in, imm);
}
#endif
switch (imm) {
case 0:
return vgetq_lane_u64((uint64x2_t) in, 0);
@ -229,7 +460,6 @@ static really_inline u64a extract64from128(const m128 in, unsigned imm) {
return 0;
break;
}
#endif
}
static really_inline m128 low64from128(const m128 in) {

View File

@ -30,7 +30,7 @@
#define ARCH_PPC64EL_SIMD_TYPES_H
#if !defined(m128) && defined(HAVE_VSX)
typedef __vector int32_t m128;
typedef __vector int m128;
#endif
#endif /* ARCH_PPC64EL_SIMD_TYPES_H */

View File

@ -43,6 +43,18 @@
#include <string.h> // for memcpy
typedef __vector unsigned long long int uint64x2_t;
typedef __vector signed long long int int64x2_t;
typedef __vector unsigned int uint32x4_t;
typedef __vector signed int int32x4_t;
typedef __vector unsigned short int uint16x8_t;
typedef __vector signed short int int16x8_t;
typedef __vector unsigned char uint8x16_t;
typedef __vector signed char int8x16_t;
typedef unsigned long long int ulong64_t;
typedef signed long long int long64_t;
/*
typedef __vector uint64_t uint64x2_t;
typedef __vector int64_t int64x2_t;
typedef __vector uint32_t uint32x4_t;
@ -50,7 +62,7 @@ typedef __vector int32_t int32x4_t;
typedef __vector uint16_t uint16x8_t;
typedef __vector int16_t int16x8_t;
typedef __vector uint8_t uint8x16_t;
typedef __vector int8_t int8x16_t;
typedef __vector int8_t int8x16_t;*/
#define ZEROES_8 0, 0, 0, 0, 0, 0, 0, 0
@ -182,13 +194,13 @@ m128 rshift_m128(m128 a, unsigned b) {
static really_really_inline
m128 lshift64_m128(m128 a, unsigned b) {
uint64x2_t shift_indices = vec_splats((uint64_t)b);
uint64x2_t shift_indices = vec_splats((ulong64_t)b);
return (m128) vec_sl((int64x2_t)a, shift_indices);
}
static really_really_inline
m128 rshift64_m128(m128 a, unsigned b) {
uint64x2_t shift_indices = vec_splats((uint64_t)b);
uint64x2_t shift_indices = vec_splats((ulong64_t)b);
return (m128) vec_sr((int64x2_t)a, shift_indices);
}
@ -213,11 +225,11 @@ static really_inline u32 movemask128(m128 a) {
uint32x4_t s3 = vec_or((uint32x4_t)ss2, res_and2);
uint64x2_t ss3 = vec_sr((uint64x2_t)s3, (uint64x2_t)vec_splats(28));
uint64x2_t res_and3 = vec_and((uint64x2_t)s3, vec_splats((uint64_t)0xff));
uint64x2_t res_and3 = vec_and((uint64x2_t)s3, vec_splats((ulong64_t)0xff));
uint64x2_t s4 = vec_or((uint64x2_t)ss3, res_and3);
uint64x2_t ss4 = vec_sld((uint64x2_t)vec_splats(0), s4, 9);
uint64x2_t res_and4 = vec_and((uint64x2_t)s4, vec_splats((uint64_t)0xff));
uint64x2_t res_and4 = vec_and((uint64x2_t)s4, vec_splats((ulong64_t)0xff));
uint64x2_t s5 = vec_or((uint64x2_t)ss4, res_and4);
return s5[0];

View File

@ -30,7 +30,7 @@
#ifndef SIMD_TYPES_X86_H
#define SIMD_TYPES_X86_H
#if !defined(m128) && defined(HAVE_SSE2)
#if !defined(m128) && defined(HAVE_SSE42)
typedef __m128i m128;
#endif

View File

@ -51,6 +51,7 @@ typedef struct ALIGN_AVX_DIRECTIVE {m128 lo; m128 hi;} m256;
#endif
typedef struct {m128 lo; m128 mid; m128 hi;} m384;
#if !defined(m512) && !defined(HAVE_SIMD_512_BITS)
typedef struct ALIGN_ATTR(64) {m256 lo; m256 hi;} m512;
#endif

View File

@ -45,112 +45,112 @@ really_inline SuperVector<16>::SuperVector(typename base_type::type const v)
template<>
template<>
really_inline SuperVector<16>::SuperVector<int8x16_t>(int8x16_t other)
really_inline SuperVector<16>::SuperVector(int8x16_t other)
{
u.s8x16[0] = other;
}
template<>
template<>
really_inline SuperVector<16>::SuperVector<uint8x16_t>(uint8x16_t other)
really_inline SuperVector<16>::SuperVector(uint8x16_t other)
{
u.u8x16[0] = other;
}
template<>
template<>
really_inline SuperVector<16>::SuperVector<int16x8_t>(int16x8_t other)
really_inline SuperVector<16>::SuperVector(int16x8_t other)
{
u.s16x8[0] = other;
}
template<>
template<>
really_inline SuperVector<16>::SuperVector<uint16x8_t>(uint16x8_t other)
really_inline SuperVector<16>::SuperVector(uint16x8_t other)
{
u.u16x8[0] = other;
}
template<>
template<>
really_inline SuperVector<16>::SuperVector<int32x4_t>(int32x4_t other)
really_inline SuperVector<16>::SuperVector(int32x4_t other)
{
u.s32x4[0] = other;
}
template<>
template<>
really_inline SuperVector<16>::SuperVector<uint32x4_t>(uint32x4_t other)
really_inline SuperVector<16>::SuperVector(uint32x4_t other)
{
u.u32x4[0] = other;
}
template<>
template<>
really_inline SuperVector<16>::SuperVector<int64x2_t>(int64x2_t other)
really_inline SuperVector<16>::SuperVector(int64x2_t other)
{
u.s64x2[0] = other;
}
template<>
template<>
really_inline SuperVector<16>::SuperVector<uint64x2_t>(uint64x2_t other)
really_inline SuperVector<16>::SuperVector(uint64x2_t other)
{
u.u64x2[0] = other;
}
template<>
template<>
really_inline SuperVector<16>::SuperVector<int8_t>(int8_t const other)
really_inline SuperVector<16>::SuperVector(int8_t const other)
{
u.s8x16[0] = vdupq_n_s8(other);
}
template<>
template<>
really_inline SuperVector<16>::SuperVector<uint8_t>(uint8_t const other)
really_inline SuperVector<16>::SuperVector(uint8_t const other)
{
u.u8x16[0] = vdupq_n_u8(other);
}
template<>
template<>
really_inline SuperVector<16>::SuperVector<int16_t>(int16_t const other)
really_inline SuperVector<16>::SuperVector(int16_t const other)
{
u.s16x8[0] = vdupq_n_s16(other);
}
template<>
template<>
really_inline SuperVector<16>::SuperVector<uint16_t>(uint16_t const other)
really_inline SuperVector<16>::SuperVector(uint16_t const other)
{
u.u16x8[0] = vdupq_n_u16(other);
}
template<>
template<>
really_inline SuperVector<16>::SuperVector<int32_t>(int32_t const other)
really_inline SuperVector<16>::SuperVector(int32_t const other)
{
u.s32x4[0] = vdupq_n_s32(other);
}
template<>
template<>
really_inline SuperVector<16>::SuperVector<uint32_t>(uint32_t const other)
really_inline SuperVector<16>::SuperVector(uint32_t const other)
{
u.u32x4[0] = vdupq_n_u32(other);
}
template<>
template<>
really_inline SuperVector<16>::SuperVector<int64_t>(int64_t const other)
really_inline SuperVector<16>::SuperVector(int64_t const other)
{
u.s64x2[0] = vdupq_n_s64(other);
}
template<>
template<>
really_inline SuperVector<16>::SuperVector<uint64_t>(uint64_t const other)
really_inline SuperVector<16>::SuperVector(uint64_t const other)
{
u.u64x2[0] = vdupq_n_u64(other);
}
@ -376,7 +376,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_8 (uint8_t const N) const
if (N == 0) return *this;
if (N == 16) return Zeroes();
SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshlq_n_u8(u.u8x16[0], n)}; });
Unroller<1, 8>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshlq_n_u8(v->u.u8x16[0], n)}; });
return result;
}
@ -386,7 +386,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_16 (uint8_t const N) const
if (N == 0) return *this;
if (N == 16) return Zeroes();
SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshlq_n_u16(u.u16x8[0], n)}; });
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshlq_n_u16(v->u.u16x8[0], n)}; });
return result;
}
@ -394,9 +394,9 @@ template <>
really_inline SuperVector<16> SuperVector<16>::vshl_32 (uint8_t const N) const
{
if (N == 0) return *this;
if (N == 16) return Zeroes();
if (N == 32) return Zeroes();
SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshlq_n_u32(u.u32x4[0], n)}; });
Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshlq_n_u32(v->u.u32x4[0], n)}; });
return result;
}
@ -404,9 +404,9 @@ template <>
really_inline SuperVector<16> SuperVector<16>::vshl_64 (uint8_t const N) const
{
if (N == 0) return *this;
if (N == 16) return Zeroes();
if (N == 64) return Zeroes();
SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshlq_n_u64(u.u64x2[0], n)}; });
Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshlq_n_u64(v->u.u64x2[0], n)}; });
return result;
}
@ -416,7 +416,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_128(uint8_t const N) const
if (N == 0) return *this;
if (N == 16) return Zeroes();
SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vextq_u8(vdupq_n_u8(0), u.u8x16[0], 16 - n)}; });
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vextq_u8(vdupq_n_u8(0), v->u.u8x16[0], 16 - n)}; });
return result;
}
@ -430,9 +430,9 @@ template <>
really_inline SuperVector<16> SuperVector<16>::vshr_8 (uint8_t const N) const
{
if (N == 0) return *this;
if (N == 16) return Zeroes();
if (N == 8) return Zeroes();
SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshrq_n_u8(u.u8x16[0], n)}; });
Unroller<1, 8>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshrq_n_u8(v->u.u8x16[0], n)}; });
return result;
}
@ -442,7 +442,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_16 (uint8_t const N) const
if (N == 0) return *this;
if (N == 16) return Zeroes();
SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshrq_n_u16(u.u16x8[0], n)}; });
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshrq_n_u16(v->u.u16x8[0], n)}; });
return result;
}
@ -450,9 +450,9 @@ template <>
really_inline SuperVector<16> SuperVector<16>::vshr_32 (uint8_t const N) const
{
if (N == 0) return *this;
if (N == 16) return Zeroes();
if (N == 32) return Zeroes();
SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshrq_n_u32(u.u32x4[0], n)}; });
Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshrq_n_u32(v->u.u32x4[0], n)}; });
return result;
}
@ -460,9 +460,9 @@ template <>
really_inline SuperVector<16> SuperVector<16>::vshr_64 (uint8_t const N) const
{
if (N == 0) return *this;
if (N == 16) return Zeroes();
if (N == 64) return Zeroes();
SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshrq_n_u64(u.u64x2[0], n)}; });
Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vshrq_n_u64(v->u.u64x2[0], n)}; });
return result;
}
@ -472,7 +472,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_128(uint8_t const N) const
if (N == 0) return *this;
if (N == 16) return Zeroes();
SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vextq_u8(u.u8x16[0], vdupq_n_u8(0), n)}; });
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vextq_u8(v->u.u8x16[0], vdupq_n_u8(0), n)}; });
return result;
}

View File

@ -39,16 +39,6 @@
#include "util/supervector/supervector.hpp"
#include <iostream>
typedef __vector uint64_t uint64x2_t;
typedef __vector int64_t int64x2_t;
typedef __vector uint32_t uint32x4_t;
typedef __vector int32_t int32x4_t;
typedef __vector uint16_t uint16x8_t;
typedef __vector int16_t int16x8_t;
typedef __vector uint8_t uint8x16_t;
typedef __vector int8_t int8x16_t;
// 128-bit Powerpc64le implementation
template<>
@ -65,58 +55,58 @@ really_inline SuperVector<16>::SuperVector(typename base_type::type const v)
template<>
template<>
really_inline SuperVector<16>::SuperVector<int8_t>(int8_t const other)
really_inline SuperVector<16>::SuperVector(int8_t const other)
{
u.v128[0] = (m128) vec_splats(other);
}
template<>
template<>
really_inline SuperVector<16>::SuperVector<uint8_t>(uint8_t const other)
really_inline SuperVector<16>::SuperVector(uint8_t const other)
{
u.v128[0] = (m128) vec_splats(static_cast<uint8_t>(other));
}
template<>
template<>
really_inline SuperVector<16>::SuperVector<int16_t>(int16_t const other)
really_inline SuperVector<16>::SuperVector(int16_t const other)
{
u.v128[0] = (m128) vec_splats(other);
}
template<>
template<>
really_inline SuperVector<16>::SuperVector<uint16_t>(uint16_t const other)
really_inline SuperVector<16>::SuperVector(uint16_t const other)
{
u.v128[0] = (m128) vec_splats(static_cast<uint16_t>(other));
}
template<>
template<>
really_inline SuperVector<16>::SuperVector<int32_t>(int32_t const other)
really_inline SuperVector<16>::SuperVector(int32_t const other)
{
u.v128[0] = (m128) vec_splats(other);
}
template<>
template<>
really_inline SuperVector<16>::SuperVector<uint32_t>(uint32_t const other)
really_inline SuperVector<16>::SuperVector(uint32_t const other)
{
u.v128[0] = (m128) vec_splats(static_cast<uint32_t>(other));
}
template<>
template<>
really_inline SuperVector<16>::SuperVector<int64_t>(int64_t const other)
really_inline SuperVector<16>::SuperVector(int64_t const other)
{
u.v128[0] = (m128) vec_splats(other);
u.v128[0] = (m128) vec_splats(static_cast<ulong64_t>(other));
}
template<>
template<>
really_inline SuperVector<16>::SuperVector<uint64_t>(uint64_t const other)
really_inline SuperVector<16>::SuperVector(uint64_t const other)
{
u.v128[0] = (m128) vec_splats(static_cast<uint64_t>(other));
u.v128[0] = (m128) vec_splats(static_cast<ulong64_t>(other));
}
// Constants
@ -229,11 +219,11 @@ really_inline typename SuperVector<16>::movemask_type SuperVector<16>::movemask(
uint32x4_t s3 = vec_or((uint32x4_t)ss2, res_and2);
uint64x2_t ss3 = vec_sr((uint64x2_t)s3, (uint64x2_t)vec_splats(28));
uint64x2_t res_and3 = vec_and((uint64x2_t)s3, vec_splats((uint64_t)0xff));
uint64x2_t res_and3 = vec_and((uint64x2_t)s3, vec_splats((ulong64_t)0xff));
uint64x2_t s4 = vec_or((uint64x2_t)ss3, res_and3);
uint64x2_t ss4 = vec_sld((uint64x2_t) vec_splats(0), s4, 9);
uint64x2_t res_and4 = vec_and((uint64x2_t)s4, vec_splats((uint64_t)0xff));
uint64x2_t res_and4 = vec_and((uint64x2_t)s4, vec_splats((ulong64_t)0xff));
uint64x2_t s5 = vec_or((uint64x2_t)ss4, res_and4);
return s5[0];
@ -271,7 +261,7 @@ template <>
template<uint8_t N>
really_inline SuperVector<16> SuperVector<16>::vshl_64_imm() const
{
return { (m128) vec_sl(u.s64x2[0], vec_splats((uint64_t)N)) };
return { (m128) vec_sl(u.s64x2[0], vec_splats((ulong64_t)N)) };
}
template <>
@ -313,7 +303,7 @@ template <>
template<uint8_t N>
really_inline SuperVector<16> SuperVector<16>::vshr_64_imm() const
{
return { (m128) vec_sr(u.s64x2[0], vec_splats((uint64_t)N)) };
return { (m128) vec_sr(u.s64x2[0], vec_splats((ulong64_t)N)) };
}
template <>
@ -352,7 +342,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_8 (uint8_t const N) const
if (N == 0) return *this;
if (N == 16) return Zeroes();
SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sl(u.s8x16[0], vec_splats((uint8_t)n))}; });
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sl(v->u.s8x16[0], vec_splats((uint8_t)n))}; });
return result;
}
@ -362,7 +352,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_16 (uint8_t const UNUSED N)
if (N == 0) return *this;
if (N == 16) return Zeroes();
SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sl(u.s16x8[0], vec_splats((uint16_t)n))}; });
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sl(v->u.s16x8[0], vec_splats((uint16_t)n))}; });
return result;
}
@ -372,7 +362,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_32 (uint8_t const N) const
if (N == 0) return *this;
if (N == 16) return Zeroes();
SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sl(u.s32x4[0], vec_splats((uint32_t)n))}; });
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sl(v->u.s32x4[0], vec_splats((uint32_t)n))}; });
return result;
}
@ -382,7 +372,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_64 (uint8_t const N) const
if (N == 0) return *this;
if (N == 16) return Zeroes();
SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sl(u.s64x2[0], vec_splats((uint64_t)n))}; });
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sl(v->u.s64x2[0], vec_splats((ulong64_t)n))}; });
return result;
}
@ -392,7 +382,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_128(uint8_t const N) const
if (N == 0) return *this;
if (N == 16) return Zeroes();
SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sld(u.s8x16[0], (int8x16_t)vec_splat_s8(0), n)}; });
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sld(v->u.s8x16[0], (int8x16_t)vec_splat_s8(0), n)}; });
return result;
}
@ -408,7 +398,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_8 (uint8_t const N) const
if (N == 0) return *this;
if (N == 16) return Zeroes();
SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sr(u.s8x16[0], vec_splats((uint8_t)n))}; });
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sr(v->u.s8x16[0], vec_splats((uint8_t)n))}; });
return result;
}
@ -418,7 +408,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_16 (uint8_t const N) const
if (N == 0) return *this;
if (N == 16) return Zeroes();
SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sr(u.s16x8[0], vec_splats((uint16_t)n))}; });
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sr(v->u.s16x8[0], vec_splats((uint16_t)n))}; });
return result;
}
@ -428,7 +418,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_32 (uint8_t const N) const
if (N == 0) return *this;
if (N == 16) return Zeroes();
SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sr(u.s32x4[0], vec_splats((uint32_t)n))}; });
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sr(v->u.s32x4[0], vec_splats((uint32_t)n))}; });
return result;
}
@ -438,7 +428,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_64 (uint8_t const N) const
if (N == 0) return *this;
if (N == 16) return Zeroes();
SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sr(u.s64x2[0], vec_splats((uint64_t)n))}; });
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sr(v->u.s64x2[0], vec_splats((ulong64_t)n))}; });
return result;
}
@ -448,7 +438,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_128(uint8_t const UNUSED N)
if (N == 0) return *this;
if (N == 16) return Zeroes();
SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sld((int8x16_t)vec_splat_u8(0), u.s8x16[0], 16 - n)}; });
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sld((int8x16_t)vec_splat_u8(0), v->u.s8x16[0], 16 - n)}; });
return result;
}
@ -523,14 +513,14 @@ really_inline SuperVector<16> SuperVector<16>::Ones_vshl(uint8_t const N)
template <>
really_inline SuperVector<16> SuperVector<16>::loadu(void const *ptr)
{
return (m128) vec_xl(0, (const int64_t*)ptr);
return (m128) vec_xl(0, (const long64_t*)ptr);
}
template <>
really_inline SuperVector<16> SuperVector<16>::load(void const *ptr)
{
assert(ISALIGNED_N(ptr, alignof(SuperVector::size)));
return (m128) vec_xl(0, (const int64_t*)ptr);
return (m128) vec_xl(0, (const long64_t*)ptr);
}
template <>

View File

@ -27,6 +27,18 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
typedef __vector unsigned long long int uint64x2_t;
typedef __vector signed long long int int64x2_t;
typedef __vector unsigned int uint32x4_t;
typedef __vector signed int int32x4_t;
typedef __vector unsigned short int uint16x8_t;
typedef __vector signed short int int16x8_t;
typedef __vector unsigned char uint8x16_t;
typedef __vector signed char int8x16_t;
typedef unsigned long long int ulong64_t;
typedef signed long long int long64_t;
#if !defined(m128) && defined(HAVE_VSX)
typedef __vector int32_t m128;
typedef __vector int m128;
#endif

View File

@ -55,56 +55,56 @@ really_inline SuperVector<16>::SuperVector(typename base_type::type const v)
template<>
template<>
really_inline SuperVector<16>::SuperVector<int8_t>(int8_t const other)
really_inline SuperVector<16>::SuperVector(int8_t const other)
{
u.v128[0] = _mm_set1_epi8(other);
}
template<>
template<>
really_inline SuperVector<16>::SuperVector<uint8_t>(uint8_t const other)
really_inline SuperVector<16>::SuperVector(uint8_t const other)
{
u.v128[0] = _mm_set1_epi8(static_cast<int8_t>(other));
}
template<>
template<>
really_inline SuperVector<16>::SuperVector<int16_t>(int16_t const other)
really_inline SuperVector<16>::SuperVector(int16_t const other)
{
u.v128[0] = _mm_set1_epi16(other);
}
template<>
template<>
really_inline SuperVector<16>::SuperVector<uint16_t>(uint16_t const other)
really_inline SuperVector<16>::SuperVector(uint16_t const other)
{
u.v128[0] = _mm_set1_epi16(static_cast<int16_t>(other));
}
template<>
template<>
really_inline SuperVector<16>::SuperVector<int32_t>(int32_t const other)
really_inline SuperVector<16>::SuperVector(int32_t const other)
{
u.v128[0] = _mm_set1_epi32(other);
}
template<>
template<>
really_inline SuperVector<16>::SuperVector<uint32_t>(uint32_t const other)
really_inline SuperVector<16>::SuperVector(uint32_t const other)
{
u.v128[0] = _mm_set1_epi32(static_cast<int32_t>(other));
}
template<>
template<>
really_inline SuperVector<16>::SuperVector<int64_t>(int64_t const other)
really_inline SuperVector<16>::SuperVector(int64_t const other)
{
u.v128[0] = _mm_set1_epi64x(other);
}
template<>
template<>
really_inline SuperVector<16>::SuperVector<uint64_t>(uint64_t const other)
really_inline SuperVector<16>::SuperVector(uint64_t const other)
{
u.v128[0] = _mm_set1_epi64x(static_cast<int64_t>(other));
}
@ -608,56 +608,56 @@ really_inline SuperVector<32>::SuperVector(SuperVector<16> const lo, SuperVector
template<>
template<>
really_inline SuperVector<32>::SuperVector<int8_t>(int8_t const other)
really_inline SuperVector<32>::SuperVector(int8_t const other)
{
u.v256[0] = _mm256_set1_epi8(other);
}
template<>
template<>
really_inline SuperVector<32>::SuperVector<uint8_t>(uint8_t const other)
really_inline SuperVector<32>::SuperVector(uint8_t const other)
{
u.v256[0] = _mm256_set1_epi8(static_cast<int8_t>(other));
}
template<>
template<>
really_inline SuperVector<32>::SuperVector<int16_t>(int16_t const other)
really_inline SuperVector<32>::SuperVector(int16_t const other)
{
u.v256[0] = _mm256_set1_epi16(other);
}
template<>
template<>
really_inline SuperVector<32>::SuperVector<uint16_t>(uint16_t const other)
really_inline SuperVector<32>::SuperVector(uint16_t const other)
{
u.v256[0] = _mm256_set1_epi16(static_cast<int16_t>(other));
}
template<>
template<>
really_inline SuperVector<32>::SuperVector<int32_t>(int32_t const other)
really_inline SuperVector<32>::SuperVector(int32_t const other)
{
u.v256[0] = _mm256_set1_epi32(other);
}
template<>
template<>
really_inline SuperVector<32>::SuperVector<uint32_t>(uint32_t const other)
really_inline SuperVector<32>::SuperVector(uint32_t const other)
{
u.v256[0] = _mm256_set1_epi32(static_cast<int32_t>(other));
}
template<>
template<>
really_inline SuperVector<32>::SuperVector<int64_t>(int64_t const other)
really_inline SuperVector<32>::SuperVector(int64_t const other)
{
u.v256[0] = _mm256_set1_epi64x(other);
}
template<>
template<>
really_inline SuperVector<32>::SuperVector<uint64_t>(uint64_t const other)
really_inline SuperVector<32>::SuperVector(uint64_t const other)
{
u.v256[0] = _mm256_set1_epi64x(static_cast<int64_t>(other));
}
@ -804,7 +804,7 @@ really_inline SuperVector<32> SuperVector<32>::vshl_128_imm() const
template <>
template<uint8_t N>
really_inline SuperVector<16> SuperVector<32>::vshl_256_imm() const
really_inline SuperVector<32> SuperVector<32>::vshl_256_imm() const
{
if (N == 0) return *this;
if (N == 16) return {_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0))};
@ -950,11 +950,11 @@ really_inline SuperVector<32> SuperVector<32>::vshl_256(uint8_t const N) const
SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) {
constexpr uint8_t n = i.value;
if (N == n) result = {_mm256_alignr_epi8(u.v256[0], _mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), 16 - n)};;
if (N == n) result = {_mm256_alignr_epi8(u.v256[0], _mm256_permute2x128_si256(v->u.v256[0], v->u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), 16 - n)};;
});
Unroller<17, 32>::iterator([&,v=this](auto const i) {
constexpr uint8_t n = i.value;
if (N == n) result = {_mm256_slli_si256(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), n - 16)};
if (N == n) result = {_mm256_slli_si256(_mm256_permute2x128_si256(v->u.v256[0], v->u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), n - 16)};
});
return result;
}
@ -1240,56 +1240,56 @@ really_inline SuperVector<64>::SuperVector(m128 const v)
template<>
template<>
really_inline SuperVector<64>::SuperVector<int8_t>(int8_t const o)
really_inline SuperVector<64>::SuperVector(int8_t const o)
{
u.v512[0] = _mm512_set1_epi8(o);
}
template<>
template<>
really_inline SuperVector<64>::SuperVector<uint8_t>(uint8_t const o)
really_inline SuperVector<64>::SuperVector(uint8_t const o)
{
u.v512[0] = _mm512_set1_epi8(static_cast<int8_t>(o));
}
template<>
template<>
really_inline SuperVector<64>::SuperVector<int16_t>(int16_t const o)
really_inline SuperVector<64>::SuperVector(int16_t const o)
{
u.v512[0] = _mm512_set1_epi16(o);
}
template<>
template<>
really_inline SuperVector<64>::SuperVector<uint16_t>(uint16_t const o)
really_inline SuperVector<64>::SuperVector(uint16_t const o)
{
u.v512[0] = _mm512_set1_epi16(static_cast<int16_t>(o));
}
template<>
template<>
really_inline SuperVector<64>::SuperVector<int32_t>(int32_t const o)
really_inline SuperVector<64>::SuperVector(int32_t const o)
{
u.v512[0] = _mm512_set1_epi32(o);
}
template<>
template<>
really_inline SuperVector<64>::SuperVector<uint32_t>(uint32_t const o)
really_inline SuperVector<64>::SuperVector(uint32_t const o)
{
u.v512[0] = _mm512_set1_epi32(static_cast<int32_t>(o));
}
template<>
template<>
really_inline SuperVector<64>::SuperVector<int64_t>(int64_t const o)
really_inline SuperVector<64>::SuperVector(int64_t const o)
{
u.v512[0] = _mm512_set1_epi64(o);
}
template<>
template<>
really_inline SuperVector<64>::SuperVector<uint64_t>(uint64_t const o)
really_inline SuperVector<64>::SuperVector(uint64_t const o)
{
u.v512[0] = _mm512_set1_epi64(static_cast<int64_t>(o));
}

View File

@ -165,7 +165,7 @@ public:
typename BaseVector<32>::type ALIGN_ATTR(BaseVector<32>::size) v256[SIZE / BaseVector<32>::size];
typename BaseVector<64>::type ALIGN_ATTR(BaseVector<64>::size) v512[SIZE / BaseVector<64>::size];
#if defined(ARCH_ARM32) || defined(ARCH_AARCH64)
#if defined(ARCH_ARM32) || defined(ARCH_AARCH64) || defined(ARCH_PPC64EL)
uint64x2_t ALIGN_ATTR(BaseVector<16>::size) u64x2[SIZE / BaseVector<16>::size];
int64x2_t ALIGN_ATTR(BaseVector<16>::size) s64x2[SIZE / BaseVector<16>::size];
uint32x4_t ALIGN_ATTR(BaseVector<16>::size) u32x4[SIZE / BaseVector<16>::size];
@ -176,17 +176,6 @@ public:
int8x16_t ALIGN_ATTR(BaseVector<16>::size) s8x16[SIZE / BaseVector<16>::size];
#endif
#if defined(ARCH_PPC64EL)
__vector uint64_t ALIGN_ATTR(BaseVector<16>::size) u64x2[SIZE / BaseVector<16>::size];
__vector int64_t ALIGN_ATTR(BaseVector<16>::size) s64x2[SIZE / BaseVector<16>::size];
__vector uint32_t ALIGN_ATTR(BaseVector<16>::size) u32x4[SIZE / BaseVector<16>::size];
__vector int32_t ALIGN_ATTR(BaseVector<16>::size) s32x4[SIZE / BaseVector<16>::size];
__vector uint16_t ALIGN_ATTR(BaseVector<16>::size) u16x8[SIZE / BaseVector<16>::size];
__vector int16_t ALIGN_ATTR(BaseVector<16>::size) s16x8[SIZE / BaseVector<16>::size];
__vector uint8_t ALIGN_ATTR(BaseVector<16>::size) u8x16[SIZE / BaseVector<16>::size];
__vector int8_t ALIGN_ATTR(BaseVector<16>::size) s8x16[SIZE / BaseVector<16>::size];
#endif
uint64_t u64[SIZE / sizeof(uint64_t)];
int64_t s64[SIZE / sizeof(int64_t)];
uint32_t u32[SIZE / sizeof(uint32_t)];
@ -200,7 +189,7 @@ public:
} u;
constexpr SuperVector() {};
constexpr SuperVector(SuperVector const &other)
SuperVector(SuperVector const &other)
:u(other.u) {};
SuperVector(typename base_type::type const v);

View File

@ -667,7 +667,7 @@ TEST(SimdUtilsTest, movq) {
simd = _mm_set_epi64x(~0LL, 0x123456789abcdef);
#elif defined(ARCH_ARM32) || defined(ARCH_AARCH64)
int64x2_t a = { 0x123456789abcdefLL, ~0LL };
simd = vreinterpretq_s64_s8(a);
simd = vreinterpretq_s32_s64(a);
#elif defined(ARCH_PPC64EL)
int64x2_t a = {0x123456789abcdefLL, ~0LL };
simd = (m128) a;

View File

@ -33,9 +33,6 @@ SET(corpusomatic_SRCS
ng_find_matches.cpp
)
add_library(corpusomatic STATIC ${corpusomatic_SRCS})
if (ARCH_IA32 OR ARCH_X86_64)
set_target_properties(corpusomatic PROPERTIES COMPILE_FLAGS "-mssse3")
endif ()
set(databaseutil_SRCS
database_util.cpp