mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
[VSX] optimize alignr method
This commit is contained in:
parent
a837cf3bee
commit
305a041c73
@ -523,14 +523,14 @@ really_inline SuperVector<16> SuperVector<16>::Ones_vshl(uint8_t const N)
|
|||||||
template <>
|
template <>
|
||||||
really_inline SuperVector<16> SuperVector<16>::loadu(void const *ptr)
|
really_inline SuperVector<16> SuperVector<16>::loadu(void const *ptr)
|
||||||
{
|
{
|
||||||
return (m128) vec_xl(0, (const long64_t*)ptr);
|
return { vec_xl(0, (const long64_t*)ptr) };
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
really_inline SuperVector<16> SuperVector<16>::load(void const *ptr)
|
really_inline SuperVector<16> SuperVector<16>::load(void const *ptr)
|
||||||
{
|
{
|
||||||
assert(ISALIGNED_N(ptr, alignof(SuperVector::size)));
|
assert(ISALIGNED_N(ptr, alignof(SuperVector::size)));
|
||||||
return (m128) vec_xl(0, (const long64_t*)ptr);
|
return { vec_xl(0, (const long64_t*)ptr) };
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
@ -544,27 +544,18 @@ really_inline SuperVector<16> SuperVector<16>::loadu_maskz(void const *ptr, uint
|
|||||||
template<>
|
template<>
|
||||||
really_inline SuperVector<16> SuperVector<16>::alignr(SuperVector<16> &other, int8_t offset)
|
really_inline SuperVector<16> SuperVector<16>::alignr(SuperVector<16> &other, int8_t offset)
|
||||||
{
|
{
|
||||||
|
if (offset == 0) return other;
|
||||||
switch(offset) {
|
if (offset == 16) return *this;
|
||||||
case 0: return other; break;
|
#if defined(HAVE__BUILTIN_CONSTANT_P)
|
||||||
case 1: return {(m128) vec_sld(u.s8x16[0], other.u.s8x16[0], 15)}; break;
|
if (__builtin_constant_p(offset)) {
|
||||||
case 2: return {(m128) vec_sld(u.s8x16[0], other.u.s8x16[0], 14)}; break;
|
return { vec_sld(u.s8x16[0], other.u.s8x16[0], offset) };
|
||||||
case 3: return {(m128) vec_sld(u.s8x16[0], other.u.s8x16[0], 13)}; break;
|
|
||||||
case 4: return {(m128) vec_sld(u.s8x16[0], other.u.s8x16[0], 12)}; break;
|
|
||||||
case 5: return {(m128) vec_sld(u.s8x16[0], other.u.s8x16[0], 11)}; break;
|
|
||||||
case 6: return {(m128) vec_sld(u.s8x16[0], other.u.s8x16[0], 10)}; break;
|
|
||||||
case 7: return {(m128) vec_sld(u.s8x16[0], other.u.s8x16[0], 9)}; break;
|
|
||||||
case 8: return {(m128) vec_sld(u.s8x16[0], other.u.s8x16[0], 8)}; break;
|
|
||||||
case 9: return {(m128) vec_sld(u.s8x16[0], other.u.s8x16[0], 7)}; break;
|
|
||||||
case 10: return {(m128) vec_sld(u.s8x16[0], other.u.s8x16[0], 6)}; break;
|
|
||||||
case 11: return {(m128) vec_sld(u.s8x16[0], other.u.s8x16[0], 5)}; break;
|
|
||||||
case 12: return {(m128) vec_sld(u.s8x16[0], other.u.s8x16[0], 4)}; break;
|
|
||||||
case 13: return {(m128) vec_sld(u.s8x16[0], other.u.s8x16[0], 3)}; break;
|
|
||||||
case 14: return {(m128) vec_sld(u.s8x16[0], other.u.s8x16[0], 2)}; break;
|
|
||||||
case 15: return {(m128) vec_sld(u.s8x16[0], other.u.s8x16[0], 1)}; break;
|
|
||||||
default: break;
|
|
||||||
}
|
}
|
||||||
return *this;
|
#endif
|
||||||
|
uint8x16_t sl = vec_splats((uint8_t) (offset << 3));
|
||||||
|
uint8x16_t sr = vec_splats((uint8_t) ((16 - offset) << 3));
|
||||||
|
uint8x16_t rhs = vec_slo(u.u8x16[0], sr);
|
||||||
|
uint8x16_t lhs = vec_sro(other.u.u8x16[0], sl);
|
||||||
|
return { vec_or(lhs, rhs) };
|
||||||
}
|
}
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
|
Loading…
x
Reference in New Issue
Block a user