mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Merge pull request #203 from VectorCamp/feature/enable-simde-backend
Feature/enable simde backend
This commit is contained in:
commit
a26bed96bc
3
.gitmodules
vendored
Normal file
3
.gitmodules
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
[submodule "simde"]
|
||||
path = simde
|
||||
url = https://github.com/simd-everywhere/simde.git
|
@ -119,15 +119,22 @@ endif()
|
||||
# Detect OS and if Fat Runtime is available
|
||||
include (${CMAKE_MODULE_PATH}/osdetection.cmake)
|
||||
|
||||
if (ARCH_IA32 OR ARCH_X86_64)
|
||||
if(SIMDE_BACKEND)
|
||||
include (${CMAKE_MODULE_PATH}/simde.cmake)
|
||||
elseif (ARCH_IA32 OR ARCH_X86_64)
|
||||
include (${CMAKE_MODULE_PATH}/cflags-x86.cmake)
|
||||
set(ARCH_FLAG march)
|
||||
elseif (ARCH_ARM32 OR ARCH_AARCH64)
|
||||
include (${CMAKE_MODULE_PATH}/cflags-arm.cmake)
|
||||
set(ARCH_FLAG march)
|
||||
elseif (ARCH_PPC64EL)
|
||||
include (${CMAKE_MODULE_PATH}/cflags-ppc64le.cmake)
|
||||
else ()
|
||||
message(FATAL_ERROR "Unsupported platform")
|
||||
endif ()
|
||||
|
||||
if (ARCH_PPC64EL)
|
||||
set(ARCH_FLAG mcpu)
|
||||
else ()
|
||||
set(ARCH_FLAG march)
|
||||
endif ()
|
||||
|
||||
# Detect Native arch flags if requested
|
||||
@ -239,8 +246,11 @@ set (hs_exec_common_SRCS
|
||||
src/util/arch/common/cpuid_flags.h
|
||||
src/util/multibit.c
|
||||
)
|
||||
|
||||
if (ARCH_IA32 OR ARCH_X86_64)
|
||||
if (SIMDE_BACKEND)
|
||||
set (hs_exec_common_SRCS
|
||||
${hs_exec_common_SRCS}
|
||||
src/util/arch/simde/cpuid_flags.c)
|
||||
elseif (ARCH_IA32 OR ARCH_X86_64)
|
||||
set (hs_exec_common_SRCS
|
||||
${hs_exec_common_SRCS}
|
||||
src/util/arch/x86/cpuid_flags.c
|
||||
@ -398,7 +408,12 @@ set (hs_exec_SRCS
|
||||
src/database.h
|
||||
)
|
||||
|
||||
if (ARCH_IA32 OR ARCH_X86_64)
|
||||
if (SIMDE_BACKEND)
|
||||
set (hs_exec_SRCS
|
||||
${hs_exec_SRCS}
|
||||
src/nfa/vermicelli_simd.cpp
|
||||
src/util/supervector/arch/x86/impl.cpp)
|
||||
elseif (ARCH_IA32 OR ARCH_X86_64)
|
||||
set (hs_exec_SRCS
|
||||
${hs_exec_SRCS}
|
||||
src/nfa/vermicelli_simd.cpp
|
||||
|
13
README.md
13
README.md
@ -1,8 +1,12 @@
|
||||
# About Vectorscan
|
||||
|
||||
A fork of Intel's Hyperscan, modified to run on more platforms. Currently ARM NEON/ASIMD
|
||||
is 100% functional, and Power VSX are in development. ARM SVE2 support is in ongoing with
|
||||
and Power VSX are 100% functional. ARM SVE2 support is in ongoing with
|
||||
access to hardware now. More platforms will follow in the future.
|
||||
Further more, starting 5.4.12 there is now a [SIMDe](https://github.com/simd-everywhere/simde)
|
||||
port, which can be either used for platforms without official SIMD support,
|
||||
as SIMDe can emulate SIMD instructions, or as an alternative backend for existing architectures,
|
||||
for reference and comparison purposes.
|
||||
|
||||
Vectorscan will follow Intel's API and internal algorithms where possible, but will not
|
||||
hesitate to make code changes where it is thought of giving better performance or better
|
||||
@ -148,6 +152,11 @@ Common options for Cmake are:
|
||||
|
||||
* `SANITIZE=[address|memory|undefined]` (experimental) Use `libasan` sanitizer to detect possible bugs. For now only `address` is tested. This will eventually be integrated in the CI.
|
||||
|
||||
## SIMDe options
|
||||
|
||||
* `SIMDE_BACKEND=[On|Off]` Enable SIMDe backend. If this is chosen all native (SSE/AVX/AVX512/Neon/SVE/VSX) backends will be disabled and a SIMDe SSE4.2 emulation backend will be enabled. This will enable Vectorscan to build and run on architectures without SIMD.
|
||||
* `SIMDE_NATIVE=[On|Off]` Enable SIMDe native emulation of x86 SSE4.2 intrinsics on the building platform. That is, SSE4.2 intrinsics will be emulated using Neon on an Arm platform, or VSX on a Power platform, etc.
|
||||
|
||||
## Build
|
||||
|
||||
If `cmake` has completed successfully you can run `make` in the same directory, if you have a multi-core system with `N` cores, running
|
||||
@ -177,4 +186,4 @@ the [Developer Reference Guide](http://intel.github.io/hyperscan/dev-reference/)
|
||||
|
||||
And you can find the source code [on Github](https://github.com/intel/hyperscan).
|
||||
|
||||
For Intel Hyperscan related issues and questions, please follow the relevant links there.
|
||||
For Intel Hyperscan related issues and questions, please follow the relevant links there.
|
||||
|
@ -1,4 +1,7 @@
|
||||
if (NOT FAT_RUNTIME AND (BUILD_STATIC_AND_SHARED OR BUILD_STATIC_LIBS))
|
||||
include_directories(SYSTEM ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
include_directories(${PROJECT_SOURCE_DIR})
|
||||
|
||||
if (NOT FAT_RUNTIME AND (BUILD_SHARED_LIBS OR BUILD_STATIC_LIBS))
|
||||
add_executable(benchmarks benchmarks.cpp)
|
||||
set_source_files_properties(benchmarks.cpp PROPERTIES COMPILE_FLAGS
|
||||
"-Wall -Wno-unused-variable")
|
||||
|
@ -67,7 +67,10 @@ if (USE_CPU_NATIVE)
|
||||
message(STATUS "clang will tune for ${GNUCC_ARCH}, ${TUNE_FLAG}")
|
||||
endif()
|
||||
else()
|
||||
if (ARCH_IA32 OR ARCH_X86_64)
|
||||
if (SIMDE_BACKEND)
|
||||
set(GNUCC_ARCH native)
|
||||
set(TUNE_FLAG native)
|
||||
elseif (ARCH_IA32 OR ARCH_X86_64)
|
||||
set(GNUCC_ARCH native)
|
||||
set(TUNE_FLAG generic)
|
||||
elseif(ARCH_AARCH64)
|
||||
@ -84,8 +87,11 @@ else()
|
||||
elseif(ARCH_ARM32)
|
||||
set(GNUCC_ARCH armv7a)
|
||||
set(TUNE_FLAG generic)
|
||||
elseif(ARCH_PPC64EL)
|
||||
set(GNUCC_ARCH power8)
|
||||
set(TUNE_FLAG power8)
|
||||
else()
|
||||
set(GNUCC_ARCH power9)
|
||||
set(TUNE_FLAG power9)
|
||||
set(GNUCC_ARCH native)
|
||||
set(TUNE_FLAG native)
|
||||
endif()
|
||||
endif()
|
||||
|
15
cmake/simde.cmake
Normal file
15
cmake/simde.cmake
Normal file
@ -0,0 +1,15 @@
|
||||
include_directories(${PROJECT_SOURCE_DIR}/simde/simde)
|
||||
|
||||
CHECK_INCLUDE_FILES("simde/x86/sse4.2.h" SIMDE_SSE42_H_FOUND)
|
||||
|
||||
if (SIMDE_SSE42_H_FOUND)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DVS_SIMDE_BACKEND")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DVS_SIMDE_BACKEND")
|
||||
|
||||
if (SIMDE_NATIVE)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DVS_SIMDE_NATIVE -DSIMDE_ENABLE_OPENMP -fopenmp-simd")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DVS_SIMDE_NATIVE -DSIMDE_ENABLE_OPENMP -fopenmp-simd")
|
||||
endif()
|
||||
else()
|
||||
message(FATAL_ERROR "SIMDe backend requested but SIMDe is not available on the system")
|
||||
endif()
|
1
simde
Submodule
1
simde
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit aae22459fa284e9fc2b7d4b8e4571afa0418125f
|
@ -1,5 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2017, Intel Corporation
|
||||
* Copyright (c) 2020-2023, VectorCamp PC
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -29,28 +30,30 @@
|
||||
#include "config.h"
|
||||
#include "hs_common.h"
|
||||
#include "ue2common.h"
|
||||
#if !defined(VS_SIMDE_BACKEND)
|
||||
#if defined(ARCH_IA32) || defined(ARCH_X86_64)
|
||||
#include "util/arch/x86/cpuid_inline.h"
|
||||
#elif defined(ARCH_AARCH64)
|
||||
#include "util/arch/arm/cpuid_inline.h"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
HS_PUBLIC_API
|
||||
hs_error_t HS_CDECL hs_valid_platform(void) {
|
||||
/* Hyperscan requires SSSE3, anything else is a bonus */
|
||||
#if defined(ARCH_IA32) || defined(ARCH_X86_64)
|
||||
#if !defined(VS_SIMDE_BACKEND) && (defined(ARCH_IA32) || defined(ARCH_X86_64))
|
||||
if (check_ssse3()) {
|
||||
return HS_SUCCESS;
|
||||
} else {
|
||||
return HS_ARCH_ERROR;
|
||||
}
|
||||
#elif defined(ARCH_ARM32) || defined(ARCH_AARCH64)
|
||||
#elif !defined(VS_SIMDE_BACKEND) && (defined(ARCH_ARM32) || defined(ARCH_AARCH64))
|
||||
if (check_neon()) {
|
||||
return HS_SUCCESS;
|
||||
} else {
|
||||
return HS_ARCH_ERROR;
|
||||
}
|
||||
#elif defined(ARCH_PPC64EL)
|
||||
return HS_SUCCESS;
|
||||
#elif defined(ARCH_PPC64EL) || defined(VS_SIMDE_BACKEND)
|
||||
return HS_SUCCESS;
|
||||
#endif
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2020-2021, VectorCamp PC
|
||||
* Copyright (c) 2020-2023, VectorCamp PC
|
||||
* Copyright (c) 2021, Arm Limited
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -52,13 +52,17 @@ template <uint16_t S>
|
||||
static really_inline
|
||||
SuperVector<S> blockDoubleMask(SuperVector<S> mask1_lo, SuperVector<S> mask1_hi, SuperVector<S> mask2_lo, SuperVector<S> mask2_hi, SuperVector<S> chars);
|
||||
|
||||
#if defined(VS_SIMDE_BACKEND)
|
||||
#include "x86/shufti.hpp"
|
||||
#else
|
||||
#if defined(ARCH_IA32) || defined(ARCH_X86_64)
|
||||
#include "x86/shufti.hpp"
|
||||
#elif defined(ARCH_ARM32) || defined(ARCH_AARCH64)
|
||||
#elif (defined(ARCH_ARM32) || defined(ARCH_AARCH64))
|
||||
#include "arm/shufti.hpp"
|
||||
#elif defined(ARCH_PPC64EL)
|
||||
#include "ppc64el/shufti.hpp"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
template <uint16_t S>
|
||||
static really_inline
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2020-2021, VectorCamp PC
|
||||
* Copyright (c) 2020-2023, VectorCamp PC
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -45,6 +45,9 @@ template <uint16_t S>
|
||||
static really_inline
|
||||
const SuperVector<S> blockSingleMask(SuperVector<S> shuf_mask_lo_highclear, SuperVector<S> shuf_mask_lo_highset, SuperVector<S> chars);
|
||||
|
||||
#if defined(VS_SIMDE_BACKEND)
|
||||
#include "x86/truffle.hpp"
|
||||
#else
|
||||
#if defined(ARCH_IA32) || defined(ARCH_X86_64)
|
||||
#include "x86/truffle.hpp"
|
||||
#elif defined(ARCH_ARM32) || defined(ARCH_AARCH64)
|
||||
@ -52,6 +55,7 @@ const SuperVector<S> blockSingleMask(SuperVector<S> shuf_mask_lo_highclear, Supe
|
||||
#elif defined(ARCH_PPC64EL)
|
||||
#include "ppc64el/truffle.hpp"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
template <uint16_t S>
|
||||
static really_inline
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2020, Intel Corporation
|
||||
* Copyright (c) 2020-2021, VectorCamp PC
|
||||
* Copyright (c) 2020-2023, VectorCamp PC
|
||||
* Copyright (c) 2021, Arm Limited
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -71,6 +71,9 @@ const u8 *vermicelliDoubleMaskedBlock(SuperVector<S> const data, SuperVector<S>
|
||||
SuperVector<S> const mask1, SuperVector<S> const mask2,
|
||||
u8 const c1, u8 const c2, u8 const m1, u8 const m2, u8 const *buf, u16 const len);
|
||||
|
||||
#if defined(VS_SIMDE_BACKEND)
|
||||
#include "x86/vermicelli.hpp"
|
||||
#else
|
||||
#if defined(ARCH_IA32) || defined(ARCH_X86_64)
|
||||
#include "x86/vermicelli.hpp"
|
||||
#elif defined(ARCH_ARM32) || defined(ARCH_AARCH64)
|
||||
@ -78,6 +81,7 @@ const u8 *vermicelliDoubleMaskedBlock(SuperVector<S> const data, SuperVector<S>
|
||||
#elif defined(ARCH_PPC64EL)
|
||||
#include "ppc64el/vermicelli.hpp"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
template <uint16_t S>
|
||||
static const u8 *vermicelliExecReal(SuperVector<S> const chars, SuperVector<S> const casemask, u8 const *buf, u8 const *buf_end) {
|
||||
|
@ -214,16 +214,22 @@ u64a compress64_impl_c(u64a x, u64a m) {
|
||||
}
|
||||
|
||||
static really_inline
|
||||
m128 compress128_impl_c(m128 xvec, m128 mvec) {
|
||||
u64a ALIGN_ATTR(16) x[2];
|
||||
u64a ALIGN_ATTR(16) m[2];
|
||||
store128(x, xvec);
|
||||
store128(m, mvec);
|
||||
m128 compress128_impl_c(m128 x, m128 m) {
|
||||
m128 one = set1_2x64(1);
|
||||
m128 bitset = one;
|
||||
m128 vres = zeroes128();
|
||||
while (isnonzero128(m)) {
|
||||
m128 mm = sub_2x64(zeroes128(), m);
|
||||
m128 tv = and128(x, m);
|
||||
tv = and128(tv, mm);
|
||||
|
||||
compress64_impl_c(x[0], m[0]);
|
||||
compress64_impl_c(x[1], m[1]);
|
||||
|
||||
return xvec;
|
||||
m128 mask = not128(eq64_m128(tv, zeroes128()));
|
||||
mask = and128(bitset, mask);
|
||||
vres = or128(vres, mask);
|
||||
m = and128(m, sub_2x64(m, one));
|
||||
bitset = lshift64_m128(bitset, 1);
|
||||
}
|
||||
return vres;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
@ -303,16 +309,20 @@ u64a expand64_impl_c(u64a x, u64a m) {
|
||||
}
|
||||
|
||||
static really_inline
|
||||
m128 expand128_impl_c(m128 xvec, m128 mvec) {
|
||||
u64a ALIGN_ATTR(16) x[2];
|
||||
u64a ALIGN_ATTR(16) m[2];
|
||||
store128(x, xvec);
|
||||
store128(m, mvec);
|
||||
|
||||
expand64_impl_c(x[0], m[0]);
|
||||
expand64_impl_c(x[1], m[1]);
|
||||
|
||||
return xvec;
|
||||
m128 expand128_impl_c(m128 x, m128 m) {
|
||||
m128 one = set1_2x64(1);
|
||||
m128 bb = one;
|
||||
m128 res = zeroes128();
|
||||
while (isnonzero128(m)) {
|
||||
m128 xm = and128(x, bb);
|
||||
m128 mm = sub_2x64(zeroes128(), m);
|
||||
m128 mask = not128(eq64_m128(xm, zeroes128()));
|
||||
mask = and128(mask, and128(m,mm));
|
||||
res = or128(res, mask);
|
||||
m = and128(m, sub_2x64(m, one));
|
||||
bb = lshift64_m128(bb, 1);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
/* returns the first set bit after begin (if not ~0U). If no bit is set after
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2020, Intel Corporation
|
||||
* Copyright (c) 2020-2021, VectorCamp PC
|
||||
* Copyright (c) 2020-2023, VectorCamp PC
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -41,7 +41,7 @@
|
||||
|
||||
#include <string.h> // for memcpy
|
||||
|
||||
#if !defined(HAVE_SIMD_128_BITS)
|
||||
#if !defined(HAVE_SIMD_128_BITS) && !defined(VS_SIMDE_BACKEND)
|
||||
#error "You need at least a 128-bit capable SIMD engine!"
|
||||
#endif // HAVE_SIMD_128_BITS
|
||||
|
||||
@ -88,7 +88,7 @@ static inline void print_m128_2x64(const char *label, m128 vec) {
|
||||
#define print_m128_2x64(label, vec) ;
|
||||
#endif
|
||||
|
||||
#if !defined(ARCH_IA32) && !defined(ARCH_X86_64)
|
||||
#if !defined(ARCH_IA32) && !defined(ARCH_X86_64) && !defined(VS_SIMDE_BACKEND)
|
||||
#define ZEROES_8 0, 0, 0, 0, 0, 0, 0, 0
|
||||
#define ZEROES_31 ZEROES_8, ZEROES_8, ZEROES_8, 0, 0, 0, 0, 0, 0, 0
|
||||
#define ZEROES_32 ZEROES_8, ZEROES_8, ZEROES_8, ZEROES_8
|
||||
@ -455,7 +455,6 @@ static really_inline int isnonzero384(m384 a) {
|
||||
return isnonzero128(or128(or128(a.lo, a.mid), a.hi));
|
||||
}
|
||||
|
||||
#if defined(HAVE_SIMD_128_BITS) && !defined(ARCH_IA32) && !defined(ARCH_X86_64)
|
||||
/**
|
||||
* "Rich" version of diff384(). Takes two vectors a and b and returns a 12-bit
|
||||
* mask indicating which 32-bit words contain differences.
|
||||
@ -464,7 +463,6 @@ static really_inline
|
||||
u32 diffrich384(m384 a, m384 b) {
|
||||
return diffrich128(a.lo, b.lo) | (diffrich128(a.mid, b.mid) << 4) | (diffrich128(a.hi, b.hi) << 8);
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* "Rich" version of diff384(), 64-bit variant. Takes two vectors a and b and
|
||||
|
41
src/util/arch/simde/cpuid_flags.c
Normal file
41
src/util/arch/simde/cpuid_flags.c
Normal file
@ -0,0 +1,41 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2020-2023, VectorCamp PC
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "util/arch/common/cpuid_flags.h"
|
||||
#include "ue2common.h"
|
||||
#include "hs_compile.h" // for HS_MODE_ flags
|
||||
#include "util/arch.h"
|
||||
|
||||
u64a cpuid_flags(void) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
u32 cpuid_tune(void) {
|
||||
return HS_TUNE_FAMILY_GENERIC;
|
||||
}
|
@ -112,6 +112,16 @@ static really_inline u32 diffrich64_128(m128 a, m128 b) {
|
||||
#endif
|
||||
}
|
||||
|
||||
static really_really_inline
|
||||
m128 add_2x64(m128 a, m128 b) {
|
||||
return (m128) _mm_add_epi64(a, b);
|
||||
}
|
||||
|
||||
static really_really_inline
|
||||
m128 sub_2x64(m128 a, m128 b) {
|
||||
return (m128) _mm_sub_epi64(a, b);
|
||||
}
|
||||
|
||||
static really_really_inline
|
||||
m128 lshift64_m128(m128 a, unsigned b) {
|
||||
#if defined(HAVE__BUILTIN_CONSTANT_P)
|
||||
@ -124,8 +134,9 @@ m128 lshift64_m128(m128 a, unsigned b) {
|
||||
}
|
||||
|
||||
#define rshift64_m128(a, b) _mm_srli_epi64((a), (b))
|
||||
#define eq128(a, b) _mm_cmpeq_epi8((a), (b))
|
||||
#define movemask128(a) ((u32)_mm_movemask_epi8((a)))
|
||||
#define eq128(a, b) _mm_cmpeq_epi8((a), (b))
|
||||
#define eq64_m128(a, b) _mm_cmpeq_epi64((a), (b))
|
||||
#define movemask128(a) ((u32)_mm_movemask_epi8((a)))
|
||||
|
||||
#if defined(HAVE_AVX512)
|
||||
static really_inline m128 cast512to128(const m512 in) {
|
||||
@ -169,7 +180,7 @@ m128 load_m128_from_u64a(const u64a *p) {
|
||||
|
||||
static really_inline
|
||||
m128 rshiftbyte_m128(const m128 a, int count_immed) {
|
||||
#if defined(HAVE__BUILTIN_CONSTANT_P)
|
||||
#if defined(HAVE__BUILTIN_CONSTANT_P) && !defined(VS_SIMDE_BACKEND)
|
||||
if (__builtin_constant_p(count_immed)) {
|
||||
return _mm_srli_si128(a, count_immed);
|
||||
}
|
||||
@ -200,7 +211,7 @@ m128 rshiftbyte_m128(const m128 a, int count_immed) {
|
||||
|
||||
static really_inline
|
||||
m128 lshiftbyte_m128(const m128 a, int count_immed) {
|
||||
#if defined(HAVE__BUILTIN_CONSTANT_P)
|
||||
#if defined(HAVE__BUILTIN_CONSTANT_P) && !defined(VS_SIMDE_BACKEND)
|
||||
if (__builtin_constant_p(count_immed)) {
|
||||
return _mm_slli_si128(a, count_immed);
|
||||
}
|
||||
@ -668,24 +679,6 @@ m256 combine2x128(m128 hi, m128 lo) {
|
||||
}
|
||||
#endif //AVX2
|
||||
|
||||
#if defined(HAVE_SIMD_128_BITS)
|
||||
/**
|
||||
* "Rich" version of diff384(). Takes two vectors a and b and returns a 12-bit
|
||||
* mask indicating which 32-bit words contain differences.
|
||||
*/
|
||||
|
||||
static really_inline u32 diffrich384(m384 a, m384 b) {
|
||||
m128 z = zeroes128();
|
||||
a.lo = _mm_cmpeq_epi32(a.lo, b.lo);
|
||||
a.mid = _mm_cmpeq_epi32(a.mid, b.mid);
|
||||
a.hi = _mm_cmpeq_epi32(a.hi, b.hi);
|
||||
m128 packed = _mm_packs_epi16(_mm_packs_epi32(a.lo, a.mid),
|
||||
_mm_packs_epi32(a.hi, z));
|
||||
return ~(_mm_movemask_epi8(packed)) & 0xfff;
|
||||
}
|
||||
|
||||
#endif // HAVE_SIMD_128_BITS
|
||||
|
||||
/****
|
||||
**** 512-bit Primitives
|
||||
****/
|
||||
|
@ -138,8 +138,8 @@ public:
|
||||
|
||||
/// Flip all bits.
|
||||
void flip() {
|
||||
for (auto &e : bits) {
|
||||
e = ~e;
|
||||
for (size_t i = 0; i < size(); i++) {
|
||||
flip(i);
|
||||
}
|
||||
clear_trailer();
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2020-2023, VectorCamp PC
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -44,7 +45,7 @@
|
||||
#define DOUBLE_CASE_CLEAR 0xdfdf
|
||||
#define OCTO_CASE_CLEAR 0xdfdfdfdfdfdfdfdfULL
|
||||
|
||||
|
||||
#if !defined(VS_SIMDE_BACKEND)
|
||||
#if defined(ARCH_IA32) || defined(ARCH_X86_64)
|
||||
#include "util/arch/x86/bitutils.h"
|
||||
#elif defined(ARCH_ARM32) || defined(ARCH_AARCH64)
|
||||
@ -52,6 +53,32 @@
|
||||
#elif defined(ARCH_PPC64EL)
|
||||
#include "util/arch/ppc64el/bitutils.h"
|
||||
#endif
|
||||
#else
|
||||
#include "util/arch/common/bitutils.h"
|
||||
#define clz32_impl clz32_impl_c
|
||||
#define clz64_impl clz64_impl_c
|
||||
#define ctz32_impl ctz32_impl_c
|
||||
#define ctz64_impl ctz64_impl_c
|
||||
#define lg2_impl lg2_impl_c
|
||||
#define lg2_64_impl lg2_64_impl_c
|
||||
#define findAndClearLSB_32_impl findAndClearLSB_32_impl_c
|
||||
#define findAndClearLSB_64_impl findAndClearLSB_64_impl_c
|
||||
#define findAndClearMSB_32_impl findAndClearMSB_32_impl_c
|
||||
#define findAndClearMSB_64_impl findAndClearMSB_64_impl_c
|
||||
#define compress32_impl compress32_impl_c
|
||||
#define compress64_impl compress64_impl_c
|
||||
#define compress128_impl compress128_impl_c
|
||||
#define expand32_impl expand32_impl_c
|
||||
#define expand64_impl expand64_impl_c
|
||||
#define expand128_impl expand128_impl_c
|
||||
#define bf64_iterate_impl bf64_iterate_impl_c
|
||||
#define bf64_set_impl bf64_set_impl_c
|
||||
#define bf64_unset_impl bf64_unset_impl_c
|
||||
#define rank_in_mask32_impl rank_in_mask32_impl_c
|
||||
#define rank_in_mask64_impl rank_in_mask64_impl_c
|
||||
#define pext32_impl pext32_impl_c
|
||||
#define pext64_impl pext64_impl_c
|
||||
#endif
|
||||
|
||||
static really_inline
|
||||
u32 clz32(u32 x) {
|
||||
|
@ -74,8 +74,6 @@
|
||||
# endif
|
||||
#elif defined(USE_PPC64EL_ALTIVEC_H)
|
||||
#include <altivec.h>
|
||||
#else
|
||||
#error no intrinsics file
|
||||
#endif
|
||||
|
||||
#endif // INTRINSICS_H
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2020-2021, VectorCamp PC
|
||||
* Copyright (c) 2020-2023, VectorCamp PC
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -49,6 +49,9 @@ const u8 *first_zero_match_inverted(const u8 *buf, SuperVector<S> v, u16 const l
|
||||
template <u16 S>
|
||||
const u8 *last_zero_match_inverted(const u8 *buf, SuperVector<S> v, u16 len = S);
|
||||
|
||||
#if defined(VS_SIMDE_BACKEND)
|
||||
#include "util/arch/x86/match.hpp"
|
||||
#else
|
||||
#if defined(ARCH_IA32) || defined(ARCH_X86_64)
|
||||
#include "util/arch/x86/match.hpp"
|
||||
#elif defined(ARCH_ARM32) || defined(ARCH_AARCH64)
|
||||
@ -56,5 +59,6 @@ const u8 *last_zero_match_inverted(const u8 *buf, SuperVector<S> v, u16 len = S)
|
||||
#elif defined(ARCH_PPC64EL)
|
||||
#include "util/arch/ppc64el/match.hpp"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif // MATCH_HPP
|
||||
|
@ -1,5 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2023, VectorCamp PC
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -34,7 +35,16 @@
|
||||
#include "util/intrinsics.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#if defined(ARCH_IA32) || defined(ARCH_X86_64)
|
||||
#if defined(VS_SIMDE_BACKEND)
|
||||
#define VECTORSIZE 16
|
||||
#define SIMDE_ENABLE_NATIVE_ALIASES
|
||||
#if !defined(VS_SIMDE_NATIVE)
|
||||
#define SIMDE_NO_NATIVE
|
||||
#endif
|
||||
#include <simde/x86/sse4.2.h>
|
||||
typedef simde__m128i m128;
|
||||
#define HAVE_SIMD_128_BITS
|
||||
#elif defined(ARCH_IA32) || defined(ARCH_X86_64)
|
||||
#include "util/arch/x86/simd_types.h"
|
||||
#elif defined(ARCH_ARM32) || defined(ARCH_AARCH64)
|
||||
#include "util/arch/arm/simd_types.h"
|
||||
@ -42,9 +52,6 @@
|
||||
#include "util/arch/ppc64el/simd_types.h"
|
||||
#endif
|
||||
|
||||
#if !defined(m128) && !defined(HAVE_SIMD_128_BITS)
|
||||
typedef struct ALIGN_DIRECTIVE {u64a hi; u64a lo;} m128;
|
||||
#endif
|
||||
|
||||
#if !defined(m256) && !defined(HAVE_SIMD_256_BITS)
|
||||
typedef struct ALIGN_AVX_DIRECTIVE {m128 lo; m128 hi;} m256;
|
||||
|
@ -1,5 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2020, Intel Corporation
|
||||
* Copyright (c) 2023, VectorCamp PC
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -61,6 +62,9 @@ extern const char vbs_mask_data[];
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(VS_SIMDE_BACKEND)
|
||||
#include "util/arch/x86/simd_utils.h"
|
||||
#else
|
||||
#if defined(ARCH_IA32) || defined(ARCH_X86_64)
|
||||
#include "util/arch/x86/simd_utils.h"
|
||||
#elif defined(ARCH_ARM32) || defined(ARCH_AARCH64)
|
||||
@ -68,6 +72,7 @@ extern const char vbs_mask_data[];
|
||||
#elif defined(ARCH_PPC64EL)
|
||||
#include "util/arch/ppc64el/simd_utils.h"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "util/arch/common/simd_utils.h"
|
||||
|
||||
|
@ -376,7 +376,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_64 (uint8_t const N) const
|
||||
template <>
|
||||
really_inline SuperVector<16> SuperVector<16>::vshl_128(uint8_t const N) const
|
||||
{
|
||||
#if defined(HAVE__BUILTIN_CONSTANT_P)
|
||||
#if defined(HAVE__BUILTIN_CONSTANT_P) && !defined(VS_SIMDE_BACKEND)
|
||||
if (__builtin_constant_p(N)) {
|
||||
return {_mm_slli_si128(u.v128[0], N)};
|
||||
}
|
||||
@ -451,7 +451,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_64 (uint8_t const N) const
|
||||
template <>
|
||||
really_inline SuperVector<16> SuperVector<16>::vshr_128(uint8_t const N) const
|
||||
{
|
||||
#if defined(HAVE__BUILTIN_CONSTANT_P)
|
||||
#if defined(HAVE__BUILTIN_CONSTANT_P) && !defined(VS_SIMDE_BACKEND)
|
||||
if (__builtin_constant_p(N)) {
|
||||
return {_mm_srli_si128(u.v128[0], N)};
|
||||
}
|
||||
@ -472,7 +472,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr(uint8_t const N) const
|
||||
template <>
|
||||
really_inline SuperVector<16> SuperVector<16>::operator>>(uint8_t const N) const
|
||||
{
|
||||
#if defined(HAVE__BUILTIN_CONSTANT_P)
|
||||
#if defined(HAVE__BUILTIN_CONSTANT_P) && !defined(VS_SIMDE_BACKEND)
|
||||
if (__builtin_constant_p(N)) {
|
||||
return {_mm_srli_si128(u.v128[0], N)};
|
||||
}
|
||||
@ -483,7 +483,7 @@ really_inline SuperVector<16> SuperVector<16>::operator>>(uint8_t const N) const
|
||||
template <>
|
||||
really_inline SuperVector<16> SuperVector<16>::operator<<(uint8_t const N) const
|
||||
{
|
||||
#if defined(HAVE__BUILTIN_CONSTANT_P)
|
||||
#if defined(HAVE__BUILTIN_CONSTANT_P) && !defined(VS_SIMDE_BACKEND)
|
||||
if (__builtin_constant_p(N)) {
|
||||
return {_mm_slli_si128(u.v128[0], N)};
|
||||
}
|
||||
|
@ -34,6 +34,9 @@
|
||||
#include <cstdio>
|
||||
#include <type_traits>
|
||||
|
||||
#if defined(VS_SIMDE_BACKEND)
|
||||
#include "util/supervector/arch/x86/types.hpp"
|
||||
#else
|
||||
#if defined(ARCH_IA32) || defined(ARCH_X86_64)
|
||||
#include "util/supervector/arch/x86/types.hpp"
|
||||
#elif defined(ARCH_ARM32) || defined(ARCH_AARCH64)
|
||||
@ -41,6 +44,7 @@
|
||||
#elif defined(ARCH_PPC64EL)
|
||||
#include "util/supervector/arch/ppc64el/types.hpp"
|
||||
#endif
|
||||
#endif // VS_SIMDE_BACKEND
|
||||
|
||||
#if defined(HAVE_SIMD_512_BITS)
|
||||
using Z_TYPE = u64a;
|
||||
@ -57,7 +61,7 @@ using Z_TYPE = u32;
|
||||
#define DOUBLE_LOAD_MASK(l) (((1ULL) << (l)) - 1ULL)
|
||||
#define SINGLE_LOAD_MASK(l) (((1ULL) << (l)) - 1ULL)
|
||||
#elif defined(HAVE_SIMD_128_BITS)
|
||||
#if defined(ARCH_ARM32) || defined(ARCH_AARCH64)
|
||||
#if !defined(VS_SIMDE_BACKEND) && (defined(ARCH_ARM32) || defined(ARCH_AARCH64))
|
||||
using Z_TYPE = u64a;
|
||||
#define Z_BITS 64
|
||||
#define Z_POSSHIFT 2
|
||||
@ -175,7 +179,7 @@ public:
|
||||
typename BaseVector<32>::type ALIGN_ATTR(BaseVector<32>::size) v256[SIZE / BaseVector<32>::size];
|
||||
typename BaseVector<64>::type ALIGN_ATTR(BaseVector<64>::size) v512[SIZE / BaseVector<64>::size];
|
||||
|
||||
#if defined(ARCH_ARM32) || defined(ARCH_AARCH64) || defined(ARCH_PPC64EL)
|
||||
#if !defined(VS_SIMDE_BACKEND) && (defined(ARCH_ARM32) || defined(ARCH_AARCH64) || defined(ARCH_PPC64EL))
|
||||
uint64x2_t ALIGN_ATTR(BaseVector<16>::size) u64x2[SIZE / BaseVector<16>::size];
|
||||
int64x2_t ALIGN_ATTR(BaseVector<16>::size) s64x2[SIZE / BaseVector<16>::size];
|
||||
uint32x4_t ALIGN_ATTR(BaseVector<16>::size) u32x4[SIZE / BaseVector<16>::size];
|
||||
@ -382,6 +386,9 @@ struct Unroller<End, End>
|
||||
};
|
||||
|
||||
#if defined(HS_OPTIMIZE)
|
||||
#if defined(VS_SIMDE_BACKEND)
|
||||
#include "util/supervector/arch/x86/impl.cpp"
|
||||
#else
|
||||
#if defined(ARCH_IA32) || defined(ARCH_X86_64)
|
||||
#include "util/supervector/arch/x86/impl.cpp"
|
||||
#elif defined(ARCH_ARM32) || defined(ARCH_AARCH64)
|
||||
@ -390,6 +397,7 @@ struct Unroller<End, End>
|
||||
#include "util/supervector/arch/ppc64el/impl.cpp"
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif /* SUPERVECTOR_H */
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2023, VectorCamp PC
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -667,7 +668,7 @@ TEST(SimdUtilsTest, movq) {
|
||||
ASSERT_EQ(0, memcmp(cmp, &r, sizeof(r)));
|
||||
|
||||
#if defined(HAVE_SIMD_128_BITS)
|
||||
#if defined(ARCH_IA32) || defined(ARCH_X86_64)
|
||||
#if defined(ARCH_IA32) || defined(ARCH_X86_64) || defined(VS_SIMDE_BACKEND)
|
||||
simd = _mm_set_epi64x(~0LL, 0x123456789abcdef);
|
||||
#elif defined(ARCH_ARM32) || defined(ARCH_AARCH64)
|
||||
int64x2_t a = { 0x123456789abcdefLL, ~0LL };
|
||||
|
Loading…
x
Reference in New Issue
Block a user