/* * Copyright (c) 2015-2017, Intel Corporation * Copyright (c) 2020-2021, VectorCamp PC * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Intel Corporation nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SUPERVECTOR_HPP #define SUPERVECTOR_HPP #include #include #if defined(ARCH_IA32) || defined(ARCH_X86_64) #include "util/supervector/arch/x86/types.hpp" #elif defined(ARCH_ARM32) || defined(ARCH_AARCH64) #include "util/supervector/arch/arm/types.hpp" #endif #if defined(HAVE_SIMD_512_BITS) using Z_TYPE = u64a; #define Z_BITS 64 #define Z_SHIFT 63 #define DOUBLE_LOAD_MASK(l) ((~0ULL) >> (Z_BITS -(l))) #define SINGLE_LOAD_MASK(l) (((1ULL) << (l)) - 1ULL) #elif defined(HAVE_SIMD_256_BITS) using Z_TYPE = u32; #define Z_BITS 32 #define Z_SHIFT 31 #define DOUBLE_LOAD_MASK(l) (((1ULL) << (l)) - 1ULL) #define SINGLE_LOAD_MASK(l) (((1ULL) << (l)) - 1ULL) #elif defined(HAVE_SIMD_128_BITS) using Z_TYPE = u32; #define Z_BITS 32 #define Z_SHIFT 15 #define DOUBLE_LOAD_MASK(l) (((1ULL) << (l)) - 1ULL) #define SINGLE_LOAD_MASK(l) (((1ULL) << (l)) - 1ULL) #endif // Define a common assume_aligned using an appropriate compiler built-in, if // it's available. Note that we need to handle C or C++ compilation. #ifdef __cplusplus # ifdef HAVE_CXX_BUILTIN_ASSUME_ALIGNED # define assume_aligned(x, y) __builtin_assume_aligned((x), (y)) # endif #else # ifdef HAVE_CC_BUILTIN_ASSUME_ALIGNED # define assume_aligned(x, y) __builtin_assume_aligned((x), (y)) # endif #endif // Fallback to identity case. #ifndef assume_aligned #define assume_aligned(x, y) (x) #endif template class SuperVector; using m128_t = SuperVector<16>; using m256_t = SuperVector<32>; using m512_t = SuperVector<64>; using m1024_t = SuperVector<128>; // struct for inferring what underlying types to use template struct BaseVector { static const bool is_valid = false; // for template matches specialisation using type = void; using movemask_type = uint32_t; using previous_type = void; }; template <> struct BaseVector<128> { static constexpr bool is_valid = true; static constexpr uint16_t size = 128; using type = void; using movemask_type = u64a; }; template <> struct BaseVector<64> { static constexpr bool is_valid = true; static constexpr uint16_t size = 64; using type = m512; using movemask_type = u64a; }; // 128 bit implementation template <> struct BaseVector<32> { static constexpr bool is_valid = true; static constexpr uint16_t size = 32; using type = m256; using movemask_type = u32; }; // 128 bit implementation template <> struct BaseVector<16> { static constexpr bool is_valid = true; static constexpr uint16_t size = 16; using type = m128; using movemask_type = u32; }; template class SuperVector : public BaseVector { static_assert(BaseVector::is_valid, "invalid SuperVector size"); public: using base_type = BaseVector; union { typename BaseVector<16>::type ALIGN_ATTR(BaseVector<16>::size) v128[SIZE / BaseVector<16>::size]; typename BaseVector<32>::type ALIGN_ATTR(BaseVector<32>::size) v256[SIZE / BaseVector<32>::size]; typename BaseVector<64>::type ALIGN_ATTR(BaseVector<64>::size) v512[SIZE / BaseVector<64>::size]; uint64_t u64[SIZE / sizeof(uint64_t)]; int64_t s64[SIZE / sizeof(int64_t)]; uint32_t u32[SIZE / sizeof(uint32_t)]; int32_t s32[SIZE / sizeof(int32_t)]; uint16_t u16[SIZE / sizeof(uint16_t)]; int16_t s16[SIZE / sizeof(int16_t)]; uint8_t u8[SIZE / sizeof(uint8_t)]; int8_t s8[SIZE / sizeof(int8_t)]; float f32[SIZE / sizeof(float)]; double f64[SIZE / sizeof(double)]; } u; SuperVector() {}; SuperVector(SuperVector const &other); SuperVector(typename base_type::type const v); template SuperVector(T const other); static SuperVector dup_u8 (uint8_t other) { return {other}; }; static SuperVector dup_s8 (int8_t other) { return {other}; }; static SuperVector dup_u16(uint16_t other) { return {other}; }; static SuperVector dup_s16(int16_t other) { return {other}; }; static SuperVector dup_u32(uint32_t other) { return {other}; }; static SuperVector dup_s32(int32_t other) { return {other}; }; static SuperVector dup_u64(uint64_t other) { return {other}; }; static SuperVector dup_s64(int64_t other) { return {other}; }; void operator=(SuperVector const &other); SuperVector operator&(SuperVector const &b) const; SuperVector operator|(SuperVector const &b) const; SuperVector operator^(SuperVector const &b) const; SuperVector opand(SuperVector const &b) const { return *this & b; } SuperVector opor (SuperVector const &b) const { return *this | b; } SuperVector opxor(SuperVector const &b) const { return *this ^ b; } SuperVector opandnot(SuperVector const &b) const; SuperVector eq(SuperVector const &b) const; SuperVector operator<<(uint8_t const N) const; SuperVector operator>>(uint8_t const N) const; typename base_type::movemask_type movemask(void) const; typename base_type::movemask_type eqmask(SuperVector const b) const; static SuperVector loadu(void const *ptr); static SuperVector load(void const *ptr); static SuperVector loadu_maskz(void const *ptr, uint8_t const len); SuperVector alignr(SuperVector &other, int8_t offset); SuperVector pshufb(SuperVector b); SuperVector lshift64(uint8_t const N); SuperVector rshift64(uint8_t const N); SuperVector lshift128(uint8_t const N); SuperVector rshift128(uint8_t const N); SuperVector lshift128_var(uint8_t const N) const; SuperVector rshift128_var(uint8_t const N) const; // Constants static SuperVector Ones(); static SuperVector Zeroes(); #if defined(DEBUG) void print8(const char *label) { printf("%12s: ", label); for(s16 i=SIZE-1; i >= 0; i--) printf("%02x ", u.u8[i]); printf("\n"); } void print16(const char *label) { printf("%12s: ", label); for(s16 i=SIZE/sizeof(u16)-1; i >= 0; i--) printf("%04x ", u.u16[i]); printf("\n"); } void print32(const char *label) { printf("%12s: ", label); for(s16 i=SIZE/sizeof(u32)-1; i >= 0; i--) printf("%08x ", u.u32[i]); printf("\n"); } void print64(const char *label) { printf("%12s: ", label); for(s16 i=SIZE/sizeof(u64a)-1; i >= 0; i--) printf("%016lx ", u.u64[i]); printf("\n"); } #else void print8(const char *label UNUSED) {}; void print16(const char *label UNUSED) {}; void print32(const char *label UNUSED) {}; void print64(const char *label UNUSED) {}; #endif }; #if defined(HS_OPTIMIZE) #if defined(ARCH_IA32) || defined(ARCH_X86_64) #include "util/supervector/arch/x86/impl.cpp" #elif defined(ARCH_ARM32) || defined(ARCH_AARCH64) #include "util/supervector/arch/arm/impl.cpp" #endif #endif #endif /* SUPERVECTOR_H */