vectorscan/src/util/supervector/supervector.hpp
2021-12-02 18:00:02 +02:00

377 lines
13 KiB
C++

/*
* Copyright (c) 2015-2017, Intel Corporation
* Copyright (c) 2020-2021, VectorCamp PC
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef SUPERVECTOR_HPP
#define SUPERVECTOR_HPP
#include <cstdint>
#include <cstdio>
#include <type_traits>
#if defined(ARCH_IA32) || defined(ARCH_X86_64)
#include "util/supervector/arch/x86/types.hpp"
#elif defined(ARCH_ARM32) || defined(ARCH_AARCH64)
#include "util/supervector/arch/arm/types.hpp"
#elif defined(ARCH_PPC64EL)
#include "util/supervector/arch/ppc64el/types.hpp"
#endif
#if defined(HAVE_SIMD_512_BITS)
using Z_TYPE = u64a;
#define Z_BITS 64
#define Z_SHIFT 63
#define DOUBLE_LOAD_MASK(l) ((~0ULL) >> (Z_BITS -(l)))
#define SINGLE_LOAD_MASK(l) (((1ULL) << (l)) - 1ULL)
#elif defined(HAVE_SIMD_256_BITS)
using Z_TYPE = u32;
#define Z_BITS 32
#define Z_SHIFT 31
#define DOUBLE_LOAD_MASK(l) (((1ULL) << (l)) - 1ULL)
#define SINGLE_LOAD_MASK(l) (((1ULL) << (l)) - 1ULL)
#elif defined(HAVE_SIMD_128_BITS)
using Z_TYPE = u32;
#define Z_BITS 32
#define Z_SHIFT 15
#define DOUBLE_LOAD_MASK(l) (((1ULL) << (l)) - 1ULL)
#define SINGLE_LOAD_MASK(l) (((1ULL) << (l)) - 1ULL)
#endif
// Define a common assume_aligned using an appropriate compiler built-in, if
// it's available. Note that we need to handle C or C++ compilation.
#ifdef __cplusplus
# ifdef HAVE_CXX_BUILTIN_ASSUME_ALIGNED
# define assume_aligned(x, y) __builtin_assume_aligned((x), (y))
# endif
#else
# ifdef HAVE_CC_BUILTIN_ASSUME_ALIGNED
# define assume_aligned(x, y) __builtin_assume_aligned((x), (y))
# endif
#endif
// Fallback to identity case.
#ifndef assume_aligned
#define assume_aligned(x, y) (x)
#endif
template <uint16_t SIZE>
class SuperVector;
using m128_t = SuperVector<16>;
using m256_t = SuperVector<32>;
using m512_t = SuperVector<64>;
using m1024_t = SuperVector<128>;
// struct for inferring what underlying types to use
template <int T>
struct BaseVector
{
static constexpr bool is_valid = false;
static constexpr u16 size = 8;
using type = void;
using movemask_type = void;
static constexpr bool has_previous = false;
using previous_type = void;
static constexpr u16 previous_size = 4;
};
template <>
struct BaseVector<128>
{
static constexpr bool is_valid = true;
static constexpr u16 size = 128;
using type = void;
using movemask_type = u64a;
static constexpr bool has_previous = true;
using previous_type = m512;
static constexpr u16 previous_size = 64;
};
template <>
struct BaseVector<64>
{
static constexpr bool is_valid = true;
static constexpr u16 size = 64;
using type = m512;
using movemask_type = u64a;
static constexpr bool has_previous = true;
using previous_type = m256;
static constexpr u16 previous_size = 32;
};
// 128 bit implementation
template <>
struct BaseVector<32>
{
static constexpr bool is_valid = true;
static constexpr u16 size = 32;
using type = m256;
using movemask_type = u32;
static constexpr bool has_previous = true;
using previous_type = m128;
static constexpr u16 previous_size = 16;
};
// 128 bit implementation
template <>
struct BaseVector<16>
{
static constexpr bool is_valid = true;
static constexpr u16 size = 16;
using type = m128;
using movemask_type = u32;
static constexpr bool has_previous = false;
using previous_type = u64a;
static constexpr u16 previous_size = 8;
};
template <uint16_t SIZE>
class SuperVector : public BaseVector<SIZE>
{
static_assert(BaseVector<SIZE>::is_valid, "invalid SuperVector size");
public:
using base_type = BaseVector<SIZE>;
using previous_type = typename BaseVector<SIZE>::previous_type;
union {
typename BaseVector<16>::type ALIGN_ATTR(BaseVector<16>::size) v128[SIZE / BaseVector<16>::size];
typename BaseVector<32>::type ALIGN_ATTR(BaseVector<32>::size) v256[SIZE / BaseVector<32>::size];
typename BaseVector<64>::type ALIGN_ATTR(BaseVector<64>::size) v512[SIZE / BaseVector<64>::size];
#if defined(ARCH_ARM32) || defined(ARCH_AARCH64) || defined(ARCH_PPC64EL)
uint64x2_t ALIGN_ATTR(BaseVector<16>::size) u64x2[SIZE / BaseVector<16>::size];
int64x2_t ALIGN_ATTR(BaseVector<16>::size) s64x2[SIZE / BaseVector<16>::size];
uint32x4_t ALIGN_ATTR(BaseVector<16>::size) u32x4[SIZE / BaseVector<16>::size];
int32x4_t ALIGN_ATTR(BaseVector<16>::size) s32x4[SIZE / BaseVector<16>::size];
uint16x8_t ALIGN_ATTR(BaseVector<16>::size) u16x8[SIZE / BaseVector<16>::size];
int16x8_t ALIGN_ATTR(BaseVector<16>::size) s16x8[SIZE / BaseVector<16>::size];
uint8x16_t ALIGN_ATTR(BaseVector<16>::size) u8x16[SIZE / BaseVector<16>::size];
int8x16_t ALIGN_ATTR(BaseVector<16>::size) s8x16[SIZE / BaseVector<16>::size];
#endif
uint64_t u64[SIZE / sizeof(uint64_t)];
int64_t s64[SIZE / sizeof(int64_t)];
uint32_t u32[SIZE / sizeof(uint32_t)];
int32_t s32[SIZE / sizeof(int32_t)];
uint16_t u16[SIZE / sizeof(uint16_t)];
int16_t s16[SIZE / sizeof(int16_t)];
uint8_t u8[SIZE / sizeof(uint8_t)];
int8_t s8[SIZE / sizeof(int8_t)];
float f32[SIZE / sizeof(float)];
double f64[SIZE / sizeof(double)];
} u;
constexpr SuperVector() {};
SuperVector(SuperVector const &other)
:u(other.u) {};
SuperVector(typename base_type::type const v);
template<typename T>
SuperVector(T other);
SuperVector(SuperVector<SIZE/2> const lo, SuperVector<SIZE/2> const hi);
SuperVector(previous_type const lo, previous_type const hi);
static SuperVector dup_u8 (uint8_t other) { return {other}; };
static SuperVector dup_s8 (int8_t other) { return {other}; };
static SuperVector dup_u16(uint16_t other) { return {other}; };
static SuperVector dup_s16(int16_t other) { return {other}; };
static SuperVector dup_u32(uint32_t other) { return {other}; };
static SuperVector dup_s32(int32_t other) { return {other}; };
static SuperVector dup_u64(uint64_t other) { return {other}; };
static SuperVector dup_s64(int64_t other) { return {other}; };
void operator=(SuperVector const &other);
SuperVector operator&(SuperVector const &b) const;
SuperVector operator|(SuperVector const &b) const;
SuperVector operator^(SuperVector const &b) const;
SuperVector operator!() const;
SuperVector operator==(SuperVector const &b) const;
SuperVector operator!=(SuperVector const &b) const;
SuperVector operator>(SuperVector const &b) const;
SuperVector operator>=(SuperVector const &b) const;
SuperVector operator<(SuperVector const &b) const;
SuperVector operator<=(SuperVector const &b) const;
SuperVector opand(SuperVector const &b) const { return *this & b; }
SuperVector opor (SuperVector const &b) const { return *this | b; }
SuperVector opxor(SuperVector const &b) const { return *this ^ b; }
SuperVector opandnot(SuperVector const &b) const;
SuperVector opnot() const { return !(*this); }
SuperVector eq(SuperVector const &b) const;
SuperVector operator<<(uint8_t const N) const;
SuperVector operator>>(uint8_t const N) const;
typename base_type::movemask_type movemask(void) const;
typename base_type::movemask_type eqmask(SuperVector const b) const;
static SuperVector loadu(void const *ptr);
static SuperVector load(void const *ptr);
static SuperVector loadu_maskz(void const *ptr, uint8_t const len);
SuperVector alignr(SuperVector &other, int8_t offset);
template<bool emulateIntel=true>
SuperVector pshufb(SuperVector b);
SuperVector pshufb_maskz(SuperVector b, uint8_t const len);
// Shift instructions
template<uint8_t N>
SuperVector vshl_8_imm() const;
template<uint8_t N>
SuperVector vshr_8_imm() const;
template<uint8_t N>
SuperVector vshl_16_imm() const;
template<uint8_t N>
SuperVector vshr_16_imm() const;
template<uint8_t N>
SuperVector vshl_32_imm() const;
template<uint8_t N>
SuperVector vshr_32_imm() const;
template<uint8_t N>
SuperVector vshl_64_imm() const;
template<uint8_t N>
SuperVector vshr_64_imm() const;
template<uint8_t N>
SuperVector vshl_128_imm() const;
template<uint8_t N>
SuperVector vshr_128_imm() const;
#if defined(HAVE_SIMD_256_BITS)
template<uint8_t N>
SuperVector vshl_256_imm() const;
template<uint8_t N>
SuperVector vshr_256_imm() const;
#endif
#if defined(HAVE_SIMD_512_BITS)
template<uint8_t N>
SuperVector vshl_512_imm() const;
template<uint8_t N>
SuperVector vshr_512_imm() const;
#endif
template<uint8_t N>
SuperVector vshl_imm() const;
template<uint8_t N>
SuperVector vshr_imm() const;
SuperVector vshl_8 (uint8_t const N) const;
SuperVector vshr_8 (uint8_t const N) const;
SuperVector vshl_16 (uint8_t const N) const;
SuperVector vshr_16 (uint8_t const N) const;
SuperVector vshl_32 (uint8_t const N) const;
SuperVector vshr_32 (uint8_t const N) const;
SuperVector vshl_64 (uint8_t const N) const;
SuperVector vshr_64 (uint8_t const N) const;
SuperVector vshl_128(uint8_t const N) const;
SuperVector vshr_128(uint8_t const N) const;
#if defined(HAVE_SIMD_256_BITS)
SuperVector vshl_256(uint8_t const N) const;
SuperVector vshr_256(uint8_t const N) const;
#endif
#if defined(HAVE_SIMD_512_BITS)
SuperVector vshl_512(uint8_t const N) const;
SuperVector vshr_512(uint8_t const N) const;
#endif
SuperVector vshl (uint8_t const N) const;
SuperVector vshr (uint8_t const N) const;
// Constants
static SuperVector Ones();
static SuperVector Ones_vshr(uint8_t const N);
static SuperVector Ones_vshl(uint8_t const N);
static SuperVector Zeroes();
#if defined(DEBUG)
void print8(const char *label) const {
printf("%12s: ", label);
for(s16 i=SIZE-1; i >= 0; i--)
printf("%02x ", u.u8[i]);
printf("\n");
}
void print16(const char *label) const {
printf("%12s: ", label);
for(s16 i=SIZE/sizeof(u16)-1; i >= 0; i--)
printf("%04x ", u.u16[i]);
printf("\n");
}
void print32(const char *label) const {
printf("%12s: ", label);
for(s16 i=SIZE/sizeof(u32)-1; i >= 0; i--)
printf("%08x ", u.u32[i]);
printf("\n");
}
void print64(const char *label) const {
printf("%12s: ", label);
for(s16 i=SIZE/sizeof(u64a)-1; i >= 0; i--)
printf("%016lx ", u.u64[i]);
printf("\n");
}
#else
void print8(const char *label UNUSED) const {};
void print16(const char *label UNUSED) const {};
void print32(const char *label UNUSED) const {};
void print64(const char *label UNUSED) const {};
#endif
};
template <std::size_t Begin, std::size_t End>
struct Unroller
{
template<typename Action>
static void iterator(Action &&action)
{
action(std::integral_constant<int, Begin>());
Unroller<Begin + 1, End>::iterator(action);
}
};
template <std::size_t End>
struct Unroller<End, End>
{
template<typename Action>
static void iterator(Action &&action UNUSED)
{}
};
#if defined(HS_OPTIMIZE)
#if defined(ARCH_IA32) || defined(ARCH_X86_64)
#include "util/supervector/arch/x86/impl.cpp"
#elif defined(ARCH_ARM32) || defined(ARCH_AARCH64)
#include "util/supervector/arch/arm/impl.cpp"
#elif defined(ARCH_PPC64EL)
#include "util/supervector/arch/ppc64el/impl.cpp"
#endif
#endif
#endif /* SUPERVECTOR_H */