mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
add missing ARM SuperVector methods, some tests still fail, WIP
This commit is contained in:
parent
5d9d958e74
commit
acca824dea
@ -602,7 +602,7 @@ set (hs_exec_common_SRCS
|
||||
${hs_exec_common_SRCS}
|
||||
src/util/arch/x86/cpuid_flags.c
|
||||
)
|
||||
else (ARCH_ARM32 OR ARCH_AARCH64)
|
||||
elseif (ARCH_ARM32 OR ARCH_AARCH64)
|
||||
set (hs_exec_common_SRCS
|
||||
${hs_exec_common_SRCS}
|
||||
src/util/arch/arm/cpuid_flags.c
|
||||
@ -758,7 +758,7 @@ if (ARCH_IA32 OR ARCH_X86_64)
|
||||
set (hs_exec_SRCS
|
||||
${hs_exec_SRCS}
|
||||
src/util/simd/arch/x86/impl.cpp)
|
||||
else (ARCH_ARM32 OR ARCH_AARCH64)
|
||||
elseif (ARCH_ARM32 OR ARCH_AARCH64)
|
||||
set (hs_exec_SRCS
|
||||
${hs_exec_SRCS}
|
||||
src/util/simd/arch/arm/impl.cpp)
|
||||
|
@ -131,6 +131,8 @@ really_inline SuperVector<16> SuperVector<16>::Zeroes(void)
|
||||
return {vdupq_n_u8(0)};
|
||||
}
|
||||
|
||||
// Methods
|
||||
|
||||
template <>
|
||||
really_inline void SuperVector<16>::operator=(SuperVector<16> const &o)
|
||||
{
|
||||
@ -143,6 +145,24 @@ really_inline SuperVector<16> SuperVector<16>::operator&(SuperVector<16> const b
|
||||
return {vandq_s8(u.v128[0], b.u.v128[0])};
|
||||
}
|
||||
|
||||
template <>
|
||||
really_inline SuperVector<16> SuperVector<16>::operator|(SuperVector<16> const b) const
|
||||
{
|
||||
return {vandq_s8(u.v128[0], b.u.v128[0])};
|
||||
}
|
||||
|
||||
template <>
|
||||
really_inline SuperVector<16> SuperVector<16>::opand(SuperVector<16> const b) const
|
||||
{
|
||||
return {vandq_s8(u.v128[0], b.u.v128[0])};
|
||||
}
|
||||
|
||||
template <>
|
||||
really_inline SuperVector<16> SuperVector<16>::opandnot(SuperVector<16> const b) const
|
||||
{
|
||||
return {vandq_s8(u.v128[0], b.u.v128[0])};
|
||||
}
|
||||
|
||||
template <>
|
||||
really_inline SuperVector<16> SuperVector<16>::eq(SuperVector<16> const b) const
|
||||
{
|
||||
@ -171,7 +191,7 @@ really_inline typename SuperVector<16>::movemask_type SuperVector<16>::eqmask(Su
|
||||
return eq(b).movemask();
|
||||
}
|
||||
|
||||
#ifndef DEBUG
|
||||
#ifndef HS_OPTIMIZE
|
||||
template <>
|
||||
really_inline SuperVector<16> SuperVector<16>::operator<<(uint8_t const N) const
|
||||
{
|
||||
@ -205,6 +225,38 @@ really_inline SuperVector<16> SuperVector<16>::operator<<(uint8_t const N) const
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HS_OPTIMIZE
|
||||
template <>
|
||||
really_inline SuperVector<16> SuperVector<16>::operator>>(uint8_t const N) const
|
||||
{
|
||||
return {vshrq_n_s32(u.v128[0], N)};
|
||||
}
|
||||
#else
|
||||
template <>
|
||||
really_inline SuperVector<16> SuperVector<16>::operator>>(uint8_t const N) const
|
||||
{
|
||||
switch(N) {
|
||||
case 0: return {vshrq_n_s32(u.v128[0], 0)}; break;
|
||||
case 1: return {vshrq_n_s32(u.v128[0], 1)}; break;
|
||||
case 2: return {vshrq_n_s32(u.v128[0], 2)}; break;
|
||||
case 3: return {vshrq_n_s32(u.v128[0], 3)}; break;
|
||||
case 4: return {vshrq_n_s32(u.v128[0], 4)}; break;
|
||||
case 5: return {vshrq_n_s32(u.v128[0], 5)}; break;
|
||||
case 6: return {vshrq_n_s32(u.v128[0], 6)}; break;
|
||||
case 7: return {vshrq_n_s32(u.v128[0], 7)}; break;
|
||||
case 8: return {vshrq_n_s32(u.v128[0], 8)}; break;
|
||||
case 9: return {vshrq_n_s32(u.v128[0], 9)}; break;
|
||||
case 10: return {vshrq_n_s32(u.v128[0], 10)}; break;
|
||||
case 11: return {vshrq_n_s32(u.v128[0], 11)}; break;
|
||||
case 12: return {vshrq_n_s32(u.v128[0], 12)}; break;
|
||||
case 13: return {vshrq_n_s32(u.v128[0], 13)}; break;
|
||||
case 14: return {vshrq_n_s32(u.v128[0], 14)}; break;
|
||||
case 15: return {vshrq_n_s32(u.v128[0], 15)}; break;
|
||||
default: break;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
|
||||
template <>
|
||||
really_inline SuperVector<16> SuperVector<16>::loadu(void const *ptr)
|
||||
@ -217,10 +269,20 @@ really_inline SuperVector<16> SuperVector<16>::load(void const *ptr)
|
||||
{
|
||||
assert(ISALIGNED_N(ptr, alignof(SuperVector::size)));
|
||||
ptr = assume_aligned(ptr, SuperVector::size);
|
||||
return vld1q_s32((const int32_t *)ptr);
|
||||
return {vld1q_s32((const int32_t *)ptr)};
|
||||
}
|
||||
|
||||
#ifndef DEBUG
|
||||
template <>
|
||||
really_inline SuperVector<16> SuperVector<16>::loadu_maskz(void const *ptr, uint8_t const len)
|
||||
{
|
||||
uint8_t alignment = (uintptr_t)(ptr) & 15;
|
||||
SuperVector<16> maskb = Ones() << alignment;
|
||||
SuperVector<16> maske = Ones() >> (16 -len - alignment);
|
||||
SuperVector<16> v = SuperVector<16>::loadu((const m128 *)ptr);
|
||||
return {maskb.u.v128[0] & maske.u.v128[0] & v.u.v128[0]};
|
||||
}
|
||||
|
||||
#ifndef HS_OPTIMIZE
|
||||
template<>
|
||||
really_inline SuperVector<16> SuperVector<16>::alignr(SuperVector<16> r, int8_t offset)
|
||||
{
|
||||
@ -254,6 +316,81 @@ really_inline SuperVector<16> SuperVector<16>::alignr(SuperVector<16> l, int8_t
|
||||
}
|
||||
#endif
|
||||
|
||||
template<>
|
||||
really_inline SuperVector<16> SuperVector<16>::pshufb(SuperVector<16> b)
|
||||
{
|
||||
/* On Intel, if bit 0x80 is set, then result is zero, otherwise which the lane it is &0xf.
|
||||
In NEON, if >=16, then the result is zero, otherwise it is that lane.
|
||||
btranslated is the version that is converted from Intel to NEON. */
|
||||
int8x16_t btranslated = vandq_s8((int8x16_t)b.u.v128[0], vdupq_n_s8(0x8f));
|
||||
return {vqtbl1q_s8((int8x16_t)u.v128[0], (uint8x16_t)btranslated)};
|
||||
}
|
||||
|
||||
#ifdef HS_OPTIMIZE
|
||||
template<>
|
||||
really_inline SuperVector<16> SuperVector<16>::lshift64(uint8_t const l)
|
||||
{
|
||||
return {(m128)vshlq_n_s64(u.v128[0], l)};
|
||||
}
|
||||
#else
|
||||
template<>
|
||||
really_inline SuperVector<16> SuperVector<16>::lshift64(uint8_t const l)
|
||||
{
|
||||
switch(l) {
|
||||
case 0: return {vshlq_n_s64(u.v128[0], 0)}; break;
|
||||
case 1: return {vshlq_n_s64(u.v128[0], 1)}; break;
|
||||
case 2: return {vshlq_n_s64(u.v128[0], 2)}; break;
|
||||
case 3: return {vshlq_n_s64(u.v128[0], 3)}; break;
|
||||
case 4: return {vshlq_n_s64(u.v128[0], 4)}; break;
|
||||
case 5: return {vshlq_n_s64(u.v128[0], 5)}; break;
|
||||
case 6: return {vshlq_n_s64(u.v128[0], 6)}; break;
|
||||
case 7: return {vshlq_n_s64(u.v128[0], 7)}; break;
|
||||
case 8: return {vshlq_n_s64(u.v128[0], 8)}; break;
|
||||
case 9: return {vshlq_n_s64(u.v128[0], 9)}; break;
|
||||
case 10: return {vshlq_n_s64(u.v128[0], 10)}; break;
|
||||
case 11: return {vshlq_n_s64(u.v128[0], 11)}; break;
|
||||
case 12: return {vshlq_n_s64(u.v128[0], 12)}; break;
|
||||
case 13: return {vshlq_n_s64(u.v128[0], 13)}; break;
|
||||
case 14: return {vshlq_n_s64(u.v128[0], 14)}; break;
|
||||
case 15: return {vshlq_n_s64(u.v128[0], 15)}; break;
|
||||
default: break;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HS_OPTIMIZE
|
||||
template<>
|
||||
really_inline SuperVector<16> SuperVector<16>::rshift64(uint8_t const l)
|
||||
{
|
||||
return {(m128)vshrq_n_s64(u.v128[0], l)};
|
||||
}
|
||||
#else
|
||||
template<>
|
||||
really_inline SuperVector<16> SuperVector<16>::rshift64(uint8_t const l)
|
||||
{
|
||||
switch(l) {
|
||||
case 0: return {vshrq_n_s64(u.v128[0], 0)}; break;
|
||||
case 1: return {vshrq_n_s64(u.v128[0], 1)}; break;
|
||||
case 2: return {vshrq_n_s64(u.v128[0], 2)}; break;
|
||||
case 3: return {vshrq_n_s64(u.v128[0], 3)}; break;
|
||||
case 4: return {vshrq_n_s64(u.v128[0], 4)}; break;
|
||||
case 5: return {vshrq_n_s64(u.v128[0], 5)}; break;
|
||||
case 6: return {vshrq_n_s64(u.v128[0], 6)}; break;
|
||||
case 7: return {vshrq_n_s64(u.v128[0], 7)}; break;
|
||||
case 8: return {vshrq_n_s64(u.v128[0], 8)}; break;
|
||||
case 9: return {vshrq_n_s64(u.v128[0], 9)}; break;
|
||||
case 10: return {vshrq_n_s64(u.v128[0], 10)}; break;
|
||||
case 11: return {vshrq_n_s64(u.v128[0], 11)}; break;
|
||||
case 12: return {vshrq_n_s64(u.v128[0], 12)}; break;
|
||||
case 13: return {vshrq_n_s64(u.v128[0], 13)}; break;
|
||||
case 14: return {vshrq_n_s64(u.v128[0], 14)}; break;
|
||||
case 15: return {vshrq_n_s64(u.v128[0], 15)}; break;
|
||||
default: break;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif // SIMD_IMPL_HPP
|
||||
|
@ -165,13 +165,13 @@ really_inline SuperVector<16> SuperVector<16>::operator|(SuperVector<16> const b
|
||||
}
|
||||
|
||||
template <>
|
||||
really_inline SuperVector<16> SuperVector<16>::mand(SuperVector<16> const b) const
|
||||
really_inline SuperVector<16> SuperVector<16>::opand(SuperVector<16> const b) const
|
||||
{
|
||||
return *this & b;
|
||||
}
|
||||
|
||||
template <>
|
||||
really_inline SuperVector<16> SuperVector<16>::mandnot(SuperVector<16> const b) const
|
||||
really_inline SuperVector<16> SuperVector<16>::opandnot(SuperVector<16> const b) const
|
||||
{
|
||||
return {_mm_andnot_si128(u.v128[0], b.u.v128[0])};
|
||||
}
|
||||
|
@ -175,8 +175,9 @@ public:
|
||||
SuperVector operator&(SuperVector const b) const;
|
||||
SuperVector operator|(SuperVector const b) const;
|
||||
|
||||
SuperVector mand(SuperVector const b) const;
|
||||
SuperVector mandnot(SuperVector const b) const;
|
||||
SuperVector opand(SuperVector const b) const;
|
||||
SuperVector opor(SuperVector const b) const;
|
||||
SuperVector opandnot(SuperVector const b) const;
|
||||
|
||||
SuperVector eq(SuperVector const b) const;
|
||||
SuperVector operator<<(uint8_t const N) const;
|
||||
|
Loading…
x
Reference in New Issue
Block a user