mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-09-29 19:24:25 +03:00
Special case for Shuffle test added as well as comments for respectives implementations
This commit is contained in:
@@ -462,6 +462,9 @@ char testbit128(m128 val, unsigned int n) {
|
||||
|
||||
static really_inline
|
||||
m128 pshufb_m128(m128 a, m128 b) {
|
||||
/* On Intel, if bit 0x80 is set, then result is zero, otherwise which the lane it is &0xf.
|
||||
In NEON or PPC, if >=16, then the result is zero, otherwise it is that lane.
|
||||
below is the version that is converted from Intel to PPC. */
|
||||
uint8x16_t mask =(uint8x16_t)vec_cmpge((uint8x16_t)b, (uint8x16_t)vec_splats((uint8_t)0x80));
|
||||
uint8x16_t res = vec_perm ((uint8x16_t)a, (uint8x16_t)a, (uint8x16_t)b);
|
||||
return (m128) vec_sel((uint8x16_t)res, (uint8x16_t)zeroes128(), (uint8x16_t)mask);
|
||||
|
@@ -603,6 +603,9 @@ template<>
|
||||
template<>
|
||||
really_inline SuperVector<16> SuperVector<16>::pshufb<false>(SuperVector<16> b)
|
||||
{
|
||||
/* On Intel, if bit 0x80 is set, then result is zero, otherwise which the lane it is &0xf.
|
||||
In NEON or PPC, if >=16, then the result is zero, otherwise it is that lane.
|
||||
below is the version that is converted from Intel to PPC. */
|
||||
uint8x16_t mask =(uint8x16_t)vec_cmpge((uint8x16_t)b.u.v128[0], (uint8x16_t)vec_splats((uint8_t)0x80));
|
||||
uint8x16_t res = vec_perm ((uint8x16_t)u.v128[0], (uint8x16_t)u.v128[0], (uint8x16_t)b.u.v128[0]);
|
||||
return (m128) vec_sel((uint8x16_t)res, (uint8x16_t)vec_splat_s8(0), (uint8x16_t)mask);
|
||||
|
Reference in New Issue
Block a user