Merge pull request #203 from VectorCamp/feature/enable-simde-backend

Feature/enable simde backend
This commit is contained in:
Konstantinos Margaritis 2023-11-29 11:22:08 +02:00 committed by GitHub
commit a26bed96bc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
24 changed files with 242 additions and 83 deletions

3
.gitmodules vendored Normal file
View File

@ -0,0 +1,3 @@
[submodule "simde"]
path = simde
url = https://github.com/simd-everywhere/simde.git

View File

@ -119,15 +119,22 @@ endif()
# Detect OS and if Fat Runtime is available
include (${CMAKE_MODULE_PATH}/osdetection.cmake)
if (ARCH_IA32 OR ARCH_X86_64)
if(SIMDE_BACKEND)
include (${CMAKE_MODULE_PATH}/simde.cmake)
elseif (ARCH_IA32 OR ARCH_X86_64)
include (${CMAKE_MODULE_PATH}/cflags-x86.cmake)
set(ARCH_FLAG march)
elseif (ARCH_ARM32 OR ARCH_AARCH64)
include (${CMAKE_MODULE_PATH}/cflags-arm.cmake)
set(ARCH_FLAG march)
elseif (ARCH_PPC64EL)
include (${CMAKE_MODULE_PATH}/cflags-ppc64le.cmake)
else ()
message(FATAL_ERROR "Unsupported platform")
endif ()
if (ARCH_PPC64EL)
set(ARCH_FLAG mcpu)
else ()
set(ARCH_FLAG march)
endif ()
# Detect Native arch flags if requested
@ -239,8 +246,11 @@ set (hs_exec_common_SRCS
src/util/arch/common/cpuid_flags.h
src/util/multibit.c
)
if (ARCH_IA32 OR ARCH_X86_64)
if (SIMDE_BACKEND)
set (hs_exec_common_SRCS
${hs_exec_common_SRCS}
src/util/arch/simde/cpuid_flags.c)
elseif (ARCH_IA32 OR ARCH_X86_64)
set (hs_exec_common_SRCS
${hs_exec_common_SRCS}
src/util/arch/x86/cpuid_flags.c
@ -398,7 +408,12 @@ set (hs_exec_SRCS
src/database.h
)
if (ARCH_IA32 OR ARCH_X86_64)
if (SIMDE_BACKEND)
set (hs_exec_SRCS
${hs_exec_SRCS}
src/nfa/vermicelli_simd.cpp
src/util/supervector/arch/x86/impl.cpp)
elseif (ARCH_IA32 OR ARCH_X86_64)
set (hs_exec_SRCS
${hs_exec_SRCS}
src/nfa/vermicelli_simd.cpp

View File

@ -1,8 +1,12 @@
# About Vectorscan
A fork of Intel's Hyperscan, modified to run on more platforms. Currently ARM NEON/ASIMD
is 100% functional, and Power VSX are in development. ARM SVE2 support is in ongoing with
and Power VSX are 100% functional. ARM SVE2 support is in ongoing with
access to hardware now. More platforms will follow in the future.
Further more, starting 5.4.12 there is now a [SIMDe](https://github.com/simd-everywhere/simde)
port, which can be either used for platforms without official SIMD support,
as SIMDe can emulate SIMD instructions, or as an alternative backend for existing architectures,
for reference and comparison purposes.
Vectorscan will follow Intel's API and internal algorithms where possible, but will not
hesitate to make code changes where it is thought of giving better performance or better
@ -148,6 +152,11 @@ Common options for Cmake are:
* `SANITIZE=[address|memory|undefined]` (experimental) Use `libasan` sanitizer to detect possible bugs. For now only `address` is tested. This will eventually be integrated in the CI.
## SIMDe options
* `SIMDE_BACKEND=[On|Off]` Enable SIMDe backend. If this is chosen all native (SSE/AVX/AVX512/Neon/SVE/VSX) backends will be disabled and a SIMDe SSE4.2 emulation backend will be enabled. This will enable Vectorscan to build and run on architectures without SIMD.
* `SIMDE_NATIVE=[On|Off]` Enable SIMDe native emulation of x86 SSE4.2 intrinsics on the building platform. That is, SSE4.2 intrinsics will be emulated using Neon on an Arm platform, or VSX on a Power platform, etc.
## Build
If `cmake` has completed successfully you can run `make` in the same directory, if you have a multi-core system with `N` cores, running
@ -177,4 +186,4 @@ the [Developer Reference Guide](http://intel.github.io/hyperscan/dev-reference/)
And you can find the source code [on Github](https://github.com/intel/hyperscan).
For Intel Hyperscan related issues and questions, please follow the relevant links there.
For Intel Hyperscan related issues and questions, please follow the relevant links there.

View File

@ -1,4 +1,7 @@
if (NOT FAT_RUNTIME AND (BUILD_STATIC_AND_SHARED OR BUILD_STATIC_LIBS))
include_directories(SYSTEM ${CMAKE_CURRENT_SOURCE_DIR})
include_directories(${PROJECT_SOURCE_DIR})
if (NOT FAT_RUNTIME AND (BUILD_SHARED_LIBS OR BUILD_STATIC_LIBS))
add_executable(benchmarks benchmarks.cpp)
set_source_files_properties(benchmarks.cpp PROPERTIES COMPILE_FLAGS
"-Wall -Wno-unused-variable")

View File

@ -67,7 +67,10 @@ if (USE_CPU_NATIVE)
message(STATUS "clang will tune for ${GNUCC_ARCH}, ${TUNE_FLAG}")
endif()
else()
if (ARCH_IA32 OR ARCH_X86_64)
if (SIMDE_BACKEND)
set(GNUCC_ARCH native)
set(TUNE_FLAG native)
elseif (ARCH_IA32 OR ARCH_X86_64)
set(GNUCC_ARCH native)
set(TUNE_FLAG generic)
elseif(ARCH_AARCH64)
@ -84,8 +87,11 @@ else()
elseif(ARCH_ARM32)
set(GNUCC_ARCH armv7a)
set(TUNE_FLAG generic)
elseif(ARCH_PPC64EL)
set(GNUCC_ARCH power8)
set(TUNE_FLAG power8)
else()
set(GNUCC_ARCH power9)
set(TUNE_FLAG power9)
set(GNUCC_ARCH native)
set(TUNE_FLAG native)
endif()
endif()

15
cmake/simde.cmake Normal file
View File

@ -0,0 +1,15 @@
include_directories(${PROJECT_SOURCE_DIR}/simde/simde)
CHECK_INCLUDE_FILES("simde/x86/sse4.2.h" SIMDE_SSE42_H_FOUND)
if (SIMDE_SSE42_H_FOUND)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DVS_SIMDE_BACKEND")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DVS_SIMDE_BACKEND")
if (SIMDE_NATIVE)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DVS_SIMDE_NATIVE -DSIMDE_ENABLE_OPENMP -fopenmp-simd")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DVS_SIMDE_NATIVE -DSIMDE_ENABLE_OPENMP -fopenmp-simd")
endif()
else()
message(FATAL_ERROR "SIMDe backend requested but SIMDe is not available on the system")
endif()

1
simde Submodule

@ -0,0 +1 @@
Subproject commit aae22459fa284e9fc2b7d4b8e4571afa0418125f

View File

@ -1,5 +1,6 @@
/*
* Copyright (c) 2016-2017, Intel Corporation
* Copyright (c) 2020-2023, VectorCamp PC
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -29,28 +30,30 @@
#include "config.h"
#include "hs_common.h"
#include "ue2common.h"
#if !defined(VS_SIMDE_BACKEND)
#if defined(ARCH_IA32) || defined(ARCH_X86_64)
#include "util/arch/x86/cpuid_inline.h"
#elif defined(ARCH_AARCH64)
#include "util/arch/arm/cpuid_inline.h"
#endif
#endif
HS_PUBLIC_API
hs_error_t HS_CDECL hs_valid_platform(void) {
/* Hyperscan requires SSSE3, anything else is a bonus */
#if defined(ARCH_IA32) || defined(ARCH_X86_64)
#if !defined(VS_SIMDE_BACKEND) && (defined(ARCH_IA32) || defined(ARCH_X86_64))
if (check_ssse3()) {
return HS_SUCCESS;
} else {
return HS_ARCH_ERROR;
}
#elif defined(ARCH_ARM32) || defined(ARCH_AARCH64)
#elif !defined(VS_SIMDE_BACKEND) && (defined(ARCH_ARM32) || defined(ARCH_AARCH64))
if (check_neon()) {
return HS_SUCCESS;
} else {
return HS_ARCH_ERROR;
}
#elif defined(ARCH_PPC64EL)
return HS_SUCCESS;
#elif defined(ARCH_PPC64EL) || defined(VS_SIMDE_BACKEND)
return HS_SUCCESS;
#endif
}

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2015-2017, Intel Corporation
* Copyright (c) 2020-2021, VectorCamp PC
* Copyright (c) 2020-2023, VectorCamp PC
* Copyright (c) 2021, Arm Limited
*
* Redistribution and use in source and binary forms, with or without
@ -52,13 +52,17 @@ template <uint16_t S>
static really_inline
SuperVector<S> blockDoubleMask(SuperVector<S> mask1_lo, SuperVector<S> mask1_hi, SuperVector<S> mask2_lo, SuperVector<S> mask2_hi, SuperVector<S> chars);
#if defined(VS_SIMDE_BACKEND)
#include "x86/shufti.hpp"
#else
#if defined(ARCH_IA32) || defined(ARCH_X86_64)
#include "x86/shufti.hpp"
#elif defined(ARCH_ARM32) || defined(ARCH_AARCH64)
#elif (defined(ARCH_ARM32) || defined(ARCH_AARCH64))
#include "arm/shufti.hpp"
#elif defined(ARCH_PPC64EL)
#include "ppc64el/shufti.hpp"
#endif
#endif
template <uint16_t S>
static really_inline

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2015-2017, Intel Corporation
* Copyright (c) 2020-2021, VectorCamp PC
* Copyright (c) 2020-2023, VectorCamp PC
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -45,6 +45,9 @@ template <uint16_t S>
static really_inline
const SuperVector<S> blockSingleMask(SuperVector<S> shuf_mask_lo_highclear, SuperVector<S> shuf_mask_lo_highset, SuperVector<S> chars);
#if defined(VS_SIMDE_BACKEND)
#include "x86/truffle.hpp"
#else
#if defined(ARCH_IA32) || defined(ARCH_X86_64)
#include "x86/truffle.hpp"
#elif defined(ARCH_ARM32) || defined(ARCH_AARCH64)
@ -52,6 +55,7 @@ const SuperVector<S> blockSingleMask(SuperVector<S> shuf_mask_lo_highclear, Supe
#elif defined(ARCH_PPC64EL)
#include "ppc64el/truffle.hpp"
#endif
#endif
template <uint16_t S>
static really_inline

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2015-2020, Intel Corporation
* Copyright (c) 2020-2021, VectorCamp PC
* Copyright (c) 2020-2023, VectorCamp PC
* Copyright (c) 2021, Arm Limited
*
* Redistribution and use in source and binary forms, with or without
@ -71,6 +71,9 @@ const u8 *vermicelliDoubleMaskedBlock(SuperVector<S> const data, SuperVector<S>
SuperVector<S> const mask1, SuperVector<S> const mask2,
u8 const c1, u8 const c2, u8 const m1, u8 const m2, u8 const *buf, u16 const len);
#if defined(VS_SIMDE_BACKEND)
#include "x86/vermicelli.hpp"
#else
#if defined(ARCH_IA32) || defined(ARCH_X86_64)
#include "x86/vermicelli.hpp"
#elif defined(ARCH_ARM32) || defined(ARCH_AARCH64)
@ -78,6 +81,7 @@ const u8 *vermicelliDoubleMaskedBlock(SuperVector<S> const data, SuperVector<S>
#elif defined(ARCH_PPC64EL)
#include "ppc64el/vermicelli.hpp"
#endif
#endif
template <uint16_t S>
static const u8 *vermicelliExecReal(SuperVector<S> const chars, SuperVector<S> const casemask, u8 const *buf, u8 const *buf_end) {

View File

@ -214,16 +214,22 @@ u64a compress64_impl_c(u64a x, u64a m) {
}
static really_inline
m128 compress128_impl_c(m128 xvec, m128 mvec) {
u64a ALIGN_ATTR(16) x[2];
u64a ALIGN_ATTR(16) m[2];
store128(x, xvec);
store128(m, mvec);
m128 compress128_impl_c(m128 x, m128 m) {
m128 one = set1_2x64(1);
m128 bitset = one;
m128 vres = zeroes128();
while (isnonzero128(m)) {
m128 mm = sub_2x64(zeroes128(), m);
m128 tv = and128(x, m);
tv = and128(tv, mm);
compress64_impl_c(x[0], m[0]);
compress64_impl_c(x[1], m[1]);
return xvec;
m128 mask = not128(eq64_m128(tv, zeroes128()));
mask = and128(bitset, mask);
vres = or128(vres, mask);
m = and128(m, sub_2x64(m, one));
bitset = lshift64_m128(bitset, 1);
}
return vres;
}
static really_inline
@ -303,16 +309,20 @@ u64a expand64_impl_c(u64a x, u64a m) {
}
static really_inline
m128 expand128_impl_c(m128 xvec, m128 mvec) {
u64a ALIGN_ATTR(16) x[2];
u64a ALIGN_ATTR(16) m[2];
store128(x, xvec);
store128(m, mvec);
expand64_impl_c(x[0], m[0]);
expand64_impl_c(x[1], m[1]);
return xvec;
m128 expand128_impl_c(m128 x, m128 m) {
m128 one = set1_2x64(1);
m128 bb = one;
m128 res = zeroes128();
while (isnonzero128(m)) {
m128 xm = and128(x, bb);
m128 mm = sub_2x64(zeroes128(), m);
m128 mask = not128(eq64_m128(xm, zeroes128()));
mask = and128(mask, and128(m,mm));
res = or128(res, mask);
m = and128(m, sub_2x64(m, one));
bb = lshift64_m128(bb, 1);
}
return res;
}
/* returns the first set bit after begin (if not ~0U). If no bit is set after

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2015-2020, Intel Corporation
* Copyright (c) 2020-2021, VectorCamp PC
* Copyright (c) 2020-2023, VectorCamp PC
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -41,7 +41,7 @@
#include <string.h> // for memcpy
#if !defined(HAVE_SIMD_128_BITS)
#if !defined(HAVE_SIMD_128_BITS) && !defined(VS_SIMDE_BACKEND)
#error "You need at least a 128-bit capable SIMD engine!"
#endif // HAVE_SIMD_128_BITS
@ -88,7 +88,7 @@ static inline void print_m128_2x64(const char *label, m128 vec) {
#define print_m128_2x64(label, vec) ;
#endif
#if !defined(ARCH_IA32) && !defined(ARCH_X86_64)
#if !defined(ARCH_IA32) && !defined(ARCH_X86_64) && !defined(VS_SIMDE_BACKEND)
#define ZEROES_8 0, 0, 0, 0, 0, 0, 0, 0
#define ZEROES_31 ZEROES_8, ZEROES_8, ZEROES_8, 0, 0, 0, 0, 0, 0, 0
#define ZEROES_32 ZEROES_8, ZEROES_8, ZEROES_8, ZEROES_8
@ -455,7 +455,6 @@ static really_inline int isnonzero384(m384 a) {
return isnonzero128(or128(or128(a.lo, a.mid), a.hi));
}
#if defined(HAVE_SIMD_128_BITS) && !defined(ARCH_IA32) && !defined(ARCH_X86_64)
/**
* "Rich" version of diff384(). Takes two vectors a and b and returns a 12-bit
* mask indicating which 32-bit words contain differences.
@ -464,7 +463,6 @@ static really_inline
u32 diffrich384(m384 a, m384 b) {
return diffrich128(a.lo, b.lo) | (diffrich128(a.mid, b.mid) << 4) | (diffrich128(a.hi, b.hi) << 8);
}
#endif
/**
* "Rich" version of diff384(), 64-bit variant. Takes two vectors a and b and

View File

@ -0,0 +1,41 @@
/*
* Copyright (c) 2015-2017, Intel Corporation
* Copyright (c) 2020-2023, VectorCamp PC
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "util/arch/common/cpuid_flags.h"
#include "ue2common.h"
#include "hs_compile.h" // for HS_MODE_ flags
#include "util/arch.h"
u64a cpuid_flags(void) {
return 0;
}
u32 cpuid_tune(void) {
return HS_TUNE_FAMILY_GENERIC;
}

View File

@ -112,6 +112,16 @@ static really_inline u32 diffrich64_128(m128 a, m128 b) {
#endif
}
static really_really_inline
m128 add_2x64(m128 a, m128 b) {
return (m128) _mm_add_epi64(a, b);
}
static really_really_inline
m128 sub_2x64(m128 a, m128 b) {
return (m128) _mm_sub_epi64(a, b);
}
static really_really_inline
m128 lshift64_m128(m128 a, unsigned b) {
#if defined(HAVE__BUILTIN_CONSTANT_P)
@ -124,8 +134,9 @@ m128 lshift64_m128(m128 a, unsigned b) {
}
#define rshift64_m128(a, b) _mm_srli_epi64((a), (b))
#define eq128(a, b) _mm_cmpeq_epi8((a), (b))
#define movemask128(a) ((u32)_mm_movemask_epi8((a)))
#define eq128(a, b) _mm_cmpeq_epi8((a), (b))
#define eq64_m128(a, b) _mm_cmpeq_epi64((a), (b))
#define movemask128(a) ((u32)_mm_movemask_epi8((a)))
#if defined(HAVE_AVX512)
static really_inline m128 cast512to128(const m512 in) {
@ -169,7 +180,7 @@ m128 load_m128_from_u64a(const u64a *p) {
static really_inline
m128 rshiftbyte_m128(const m128 a, int count_immed) {
#if defined(HAVE__BUILTIN_CONSTANT_P)
#if defined(HAVE__BUILTIN_CONSTANT_P) && !defined(VS_SIMDE_BACKEND)
if (__builtin_constant_p(count_immed)) {
return _mm_srli_si128(a, count_immed);
}
@ -200,7 +211,7 @@ m128 rshiftbyte_m128(const m128 a, int count_immed) {
static really_inline
m128 lshiftbyte_m128(const m128 a, int count_immed) {
#if defined(HAVE__BUILTIN_CONSTANT_P)
#if defined(HAVE__BUILTIN_CONSTANT_P) && !defined(VS_SIMDE_BACKEND)
if (__builtin_constant_p(count_immed)) {
return _mm_slli_si128(a, count_immed);
}
@ -668,24 +679,6 @@ m256 combine2x128(m128 hi, m128 lo) {
}
#endif //AVX2
#if defined(HAVE_SIMD_128_BITS)
/**
* "Rich" version of diff384(). Takes two vectors a and b and returns a 12-bit
* mask indicating which 32-bit words contain differences.
*/
static really_inline u32 diffrich384(m384 a, m384 b) {
m128 z = zeroes128();
a.lo = _mm_cmpeq_epi32(a.lo, b.lo);
a.mid = _mm_cmpeq_epi32(a.mid, b.mid);
a.hi = _mm_cmpeq_epi32(a.hi, b.hi);
m128 packed = _mm_packs_epi16(_mm_packs_epi32(a.lo, a.mid),
_mm_packs_epi32(a.hi, z));
return ~(_mm_movemask_epi8(packed)) & 0xfff;
}
#endif // HAVE_SIMD_128_BITS
/****
**** 512-bit Primitives
****/

View File

@ -138,8 +138,8 @@ public:
/// Flip all bits.
void flip() {
for (auto &e : bits) {
e = ~e;
for (size_t i = 0; i < size(); i++) {
flip(i);
}
clear_trailer();
}

View File

@ -1,5 +1,6 @@
/*
* Copyright (c) 2015-2017, Intel Corporation
* Copyright (c) 2020-2023, VectorCamp PC
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -44,7 +45,7 @@
#define DOUBLE_CASE_CLEAR 0xdfdf
#define OCTO_CASE_CLEAR 0xdfdfdfdfdfdfdfdfULL
#if !defined(VS_SIMDE_BACKEND)
#if defined(ARCH_IA32) || defined(ARCH_X86_64)
#include "util/arch/x86/bitutils.h"
#elif defined(ARCH_ARM32) || defined(ARCH_AARCH64)
@ -52,6 +53,32 @@
#elif defined(ARCH_PPC64EL)
#include "util/arch/ppc64el/bitutils.h"
#endif
#else
#include "util/arch/common/bitutils.h"
#define clz32_impl clz32_impl_c
#define clz64_impl clz64_impl_c
#define ctz32_impl ctz32_impl_c
#define ctz64_impl ctz64_impl_c
#define lg2_impl lg2_impl_c
#define lg2_64_impl lg2_64_impl_c
#define findAndClearLSB_32_impl findAndClearLSB_32_impl_c
#define findAndClearLSB_64_impl findAndClearLSB_64_impl_c
#define findAndClearMSB_32_impl findAndClearMSB_32_impl_c
#define findAndClearMSB_64_impl findAndClearMSB_64_impl_c
#define compress32_impl compress32_impl_c
#define compress64_impl compress64_impl_c
#define compress128_impl compress128_impl_c
#define expand32_impl expand32_impl_c
#define expand64_impl expand64_impl_c
#define expand128_impl expand128_impl_c
#define bf64_iterate_impl bf64_iterate_impl_c
#define bf64_set_impl bf64_set_impl_c
#define bf64_unset_impl bf64_unset_impl_c
#define rank_in_mask32_impl rank_in_mask32_impl_c
#define rank_in_mask64_impl rank_in_mask64_impl_c
#define pext32_impl pext32_impl_c
#define pext64_impl pext64_impl_c
#endif
static really_inline
u32 clz32(u32 x) {

View File

@ -74,8 +74,6 @@
# endif
#elif defined(USE_PPC64EL_ALTIVEC_H)
#include <altivec.h>
#else
#error no intrinsics file
#endif
#endif // INTRINSICS_H

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2015-2017, Intel Corporation
* Copyright (c) 2020-2021, VectorCamp PC
* Copyright (c) 2020-2023, VectorCamp PC
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -49,6 +49,9 @@ const u8 *first_zero_match_inverted(const u8 *buf, SuperVector<S> v, u16 const l
template <u16 S>
const u8 *last_zero_match_inverted(const u8 *buf, SuperVector<S> v, u16 len = S);
#if defined(VS_SIMDE_BACKEND)
#include "util/arch/x86/match.hpp"
#else
#if defined(ARCH_IA32) || defined(ARCH_X86_64)
#include "util/arch/x86/match.hpp"
#elif defined(ARCH_ARM32) || defined(ARCH_AARCH64)
@ -56,5 +59,6 @@ const u8 *last_zero_match_inverted(const u8 *buf, SuperVector<S> v, u16 len = S)
#elif defined(ARCH_PPC64EL)
#include "util/arch/ppc64el/match.hpp"
#endif
#endif
#endif // MATCH_HPP

View File

@ -1,5 +1,6 @@
/*
* Copyright (c) 2015-2017, Intel Corporation
* Copyright (c) 2023, VectorCamp PC
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -34,7 +35,16 @@
#include "util/intrinsics.h"
#include "ue2common.h"
#if defined(ARCH_IA32) || defined(ARCH_X86_64)
#if defined(VS_SIMDE_BACKEND)
#define VECTORSIZE 16
#define SIMDE_ENABLE_NATIVE_ALIASES
#if !defined(VS_SIMDE_NATIVE)
#define SIMDE_NO_NATIVE
#endif
#include <simde/x86/sse4.2.h>
typedef simde__m128i m128;
#define HAVE_SIMD_128_BITS
#elif defined(ARCH_IA32) || defined(ARCH_X86_64)
#include "util/arch/x86/simd_types.h"
#elif defined(ARCH_ARM32) || defined(ARCH_AARCH64)
#include "util/arch/arm/simd_types.h"
@ -42,9 +52,6 @@
#include "util/arch/ppc64el/simd_types.h"
#endif
#if !defined(m128) && !defined(HAVE_SIMD_128_BITS)
typedef struct ALIGN_DIRECTIVE {u64a hi; u64a lo;} m128;
#endif
#if !defined(m256) && !defined(HAVE_SIMD_256_BITS)
typedef struct ALIGN_AVX_DIRECTIVE {m128 lo; m128 hi;} m256;

View File

@ -1,5 +1,6 @@
/*
* Copyright (c) 2015-2020, Intel Corporation
* Copyright (c) 2023, VectorCamp PC
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -61,6 +62,9 @@ extern const char vbs_mask_data[];
}
#endif
#if defined(VS_SIMDE_BACKEND)
#include "util/arch/x86/simd_utils.h"
#else
#if defined(ARCH_IA32) || defined(ARCH_X86_64)
#include "util/arch/x86/simd_utils.h"
#elif defined(ARCH_ARM32) || defined(ARCH_AARCH64)
@ -68,6 +72,7 @@ extern const char vbs_mask_data[];
#elif defined(ARCH_PPC64EL)
#include "util/arch/ppc64el/simd_utils.h"
#endif
#endif
#include "util/arch/common/simd_utils.h"

View File

@ -376,7 +376,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_64 (uint8_t const N) const
template <>
really_inline SuperVector<16> SuperVector<16>::vshl_128(uint8_t const N) const
{
#if defined(HAVE__BUILTIN_CONSTANT_P)
#if defined(HAVE__BUILTIN_CONSTANT_P) && !defined(VS_SIMDE_BACKEND)
if (__builtin_constant_p(N)) {
return {_mm_slli_si128(u.v128[0], N)};
}
@ -451,7 +451,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_64 (uint8_t const N) const
template <>
really_inline SuperVector<16> SuperVector<16>::vshr_128(uint8_t const N) const
{
#if defined(HAVE__BUILTIN_CONSTANT_P)
#if defined(HAVE__BUILTIN_CONSTANT_P) && !defined(VS_SIMDE_BACKEND)
if (__builtin_constant_p(N)) {
return {_mm_srli_si128(u.v128[0], N)};
}
@ -472,7 +472,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr(uint8_t const N) const
template <>
really_inline SuperVector<16> SuperVector<16>::operator>>(uint8_t const N) const
{
#if defined(HAVE__BUILTIN_CONSTANT_P)
#if defined(HAVE__BUILTIN_CONSTANT_P) && !defined(VS_SIMDE_BACKEND)
if (__builtin_constant_p(N)) {
return {_mm_srli_si128(u.v128[0], N)};
}
@ -483,7 +483,7 @@ really_inline SuperVector<16> SuperVector<16>::operator>>(uint8_t const N) const
template <>
really_inline SuperVector<16> SuperVector<16>::operator<<(uint8_t const N) const
{
#if defined(HAVE__BUILTIN_CONSTANT_P)
#if defined(HAVE__BUILTIN_CONSTANT_P) && !defined(VS_SIMDE_BACKEND)
if (__builtin_constant_p(N)) {
return {_mm_slli_si128(u.v128[0], N)};
}

View File

@ -34,6 +34,9 @@
#include <cstdio>
#include <type_traits>
#if defined(VS_SIMDE_BACKEND)
#include "util/supervector/arch/x86/types.hpp"
#else
#if defined(ARCH_IA32) || defined(ARCH_X86_64)
#include "util/supervector/arch/x86/types.hpp"
#elif defined(ARCH_ARM32) || defined(ARCH_AARCH64)
@ -41,6 +44,7 @@
#elif defined(ARCH_PPC64EL)
#include "util/supervector/arch/ppc64el/types.hpp"
#endif
#endif // VS_SIMDE_BACKEND
#if defined(HAVE_SIMD_512_BITS)
using Z_TYPE = u64a;
@ -57,7 +61,7 @@ using Z_TYPE = u32;
#define DOUBLE_LOAD_MASK(l) (((1ULL) << (l)) - 1ULL)
#define SINGLE_LOAD_MASK(l) (((1ULL) << (l)) - 1ULL)
#elif defined(HAVE_SIMD_128_BITS)
#if defined(ARCH_ARM32) || defined(ARCH_AARCH64)
#if !defined(VS_SIMDE_BACKEND) && (defined(ARCH_ARM32) || defined(ARCH_AARCH64))
using Z_TYPE = u64a;
#define Z_BITS 64
#define Z_POSSHIFT 2
@ -175,7 +179,7 @@ public:
typename BaseVector<32>::type ALIGN_ATTR(BaseVector<32>::size) v256[SIZE / BaseVector<32>::size];
typename BaseVector<64>::type ALIGN_ATTR(BaseVector<64>::size) v512[SIZE / BaseVector<64>::size];
#if defined(ARCH_ARM32) || defined(ARCH_AARCH64) || defined(ARCH_PPC64EL)
#if !defined(VS_SIMDE_BACKEND) && (defined(ARCH_ARM32) || defined(ARCH_AARCH64) || defined(ARCH_PPC64EL))
uint64x2_t ALIGN_ATTR(BaseVector<16>::size) u64x2[SIZE / BaseVector<16>::size];
int64x2_t ALIGN_ATTR(BaseVector<16>::size) s64x2[SIZE / BaseVector<16>::size];
uint32x4_t ALIGN_ATTR(BaseVector<16>::size) u32x4[SIZE / BaseVector<16>::size];
@ -382,6 +386,9 @@ struct Unroller<End, End>
};
#if defined(HS_OPTIMIZE)
#if defined(VS_SIMDE_BACKEND)
#include "util/supervector/arch/x86/impl.cpp"
#else
#if defined(ARCH_IA32) || defined(ARCH_X86_64)
#include "util/supervector/arch/x86/impl.cpp"
#elif defined(ARCH_ARM32) || defined(ARCH_AARCH64)
@ -390,6 +397,7 @@ struct Unroller<End, End>
#include "util/supervector/arch/ppc64el/impl.cpp"
#endif
#endif
#endif
#endif /* SUPERVECTOR_H */

View File

@ -1,5 +1,6 @@
/*
* Copyright (c) 2015-2017, Intel Corporation
* Copyright (c) 2023, VectorCamp PC
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -667,7 +668,7 @@ TEST(SimdUtilsTest, movq) {
ASSERT_EQ(0, memcmp(cmp, &r, sizeof(r)));
#if defined(HAVE_SIMD_128_BITS)
#if defined(ARCH_IA32) || defined(ARCH_X86_64)
#if defined(ARCH_IA32) || defined(ARCH_X86_64) || defined(VS_SIMDE_BACKEND)
simd = _mm_set_epi64x(~0LL, 0x123456789abcdef);
#elif defined(ARCH_ARM32) || defined(ARCH_AARCH64)
int64x2_t a = { 0x123456789abcdefLL, ~0LL };