mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Special case for Shuffle test added as well as comments for respectives implementations
This commit is contained in:
parent
1eb3b19f63
commit
bf54aae779
@ -462,6 +462,9 @@ char testbit128(m128 val, unsigned int n) {
|
||||
|
||||
static really_inline
|
||||
m128 pshufb_m128(m128 a, m128 b) {
|
||||
/* On Intel, if bit 0x80 is set, then result is zero, otherwise which the lane it is &0xf.
|
||||
In NEON or PPC, if >=16, then the result is zero, otherwise it is that lane.
|
||||
below is the version that is converted from Intel to PPC. */
|
||||
uint8x16_t mask =(uint8x16_t)vec_cmpge((uint8x16_t)b, (uint8x16_t)vec_splats((uint8_t)0x80));
|
||||
uint8x16_t res = vec_perm ((uint8x16_t)a, (uint8x16_t)a, (uint8x16_t)b);
|
||||
return (m128) vec_sel((uint8x16_t)res, (uint8x16_t)zeroes128(), (uint8x16_t)mask);
|
||||
|
@ -603,6 +603,9 @@ template<>
|
||||
template<>
|
||||
really_inline SuperVector<16> SuperVector<16>::pshufb<false>(SuperVector<16> b)
|
||||
{
|
||||
/* On Intel, if bit 0x80 is set, then result is zero, otherwise which the lane it is &0xf.
|
||||
In NEON or PPC, if >=16, then the result is zero, otherwise it is that lane.
|
||||
below is the version that is converted from Intel to PPC. */
|
||||
uint8x16_t mask =(uint8x16_t)vec_cmpge((uint8x16_t)b.u.v128[0], (uint8x16_t)vec_splats((uint8_t)0x80));
|
||||
uint8x16_t res = vec_perm ((uint8x16_t)u.v128[0], (uint8x16_t)u.v128[0], (uint8x16_t)b.u.v128[0]);
|
||||
return (m128) vec_sel((uint8x16_t)res, (uint8x16_t)vec_splat_s8(0), (uint8x16_t)mask);
|
||||
|
@ -845,26 +845,63 @@ TEST(SimdUtilsTest, pshufb_m128) {
|
||||
srand (time(NULL));
|
||||
u8 vec[16];
|
||||
for (int i=0; i<16; i++) {
|
||||
vec[i] = rand() % 100 + 1;
|
||||
vec[i] = rand() % 1000 + 1;
|
||||
}
|
||||
u8 vec2[16];
|
||||
for (int i=0; i<16; i++) {
|
||||
vec2[i]=i + (rand() % 15 + 0);
|
||||
vec2[i]=i + (rand() % 100 + 0);
|
||||
}
|
||||
|
||||
/* On Intel, if bit 0x80 is set, then result is zero, otherwise which the lane it is &0xf.
|
||||
In NEON or PPC, if >=16, then the result is zero, otherwise it is that lane.
|
||||
Thus bellow we have to check thah case to NEON or PPC. */
|
||||
|
||||
/*Insure that vec2 has at least 1 or more 0x80*/
|
||||
u8 vec3[16] = {0};
|
||||
vec3[15] = 0x80;
|
||||
|
||||
for (int i=0; i<15; i++) {
|
||||
int l = rand() % 1000 + 0;
|
||||
if (l % 16 ==0){
|
||||
vec3[i]= 0x80;
|
||||
} else{
|
||||
vec3[i]= vec2[i];
|
||||
}
|
||||
}
|
||||
/*
|
||||
printf("vec3: ");
|
||||
for(int i=15; i>=0; i--) { printf("%02x, ", vec3[i]); }
|
||||
printf("\n");
|
||||
*/
|
||||
|
||||
/*Test Special Case*/
|
||||
m128 v1 = loadu128(vec);
|
||||
m128 v2 = loadu128(vec2);
|
||||
m128 v2 = loadu128(vec3);
|
||||
m128 vres = pshufb_m128(v1, v2);
|
||||
|
||||
u8 res[16];
|
||||
storeu128(res, vres);
|
||||
|
||||
for (int i=0; i<16; i++) {
|
||||
if(vec3[i] & 0x80){
|
||||
ASSERT_EQ(res[i], 0);
|
||||
}else{
|
||||
ASSERT_EQ(vec[vec3[i] % 16 ], res[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/*Test Other Cases*/
|
||||
v1 = loadu128(vec);
|
||||
v2 = loadu128(vec2);
|
||||
vres = pshufb_m128(v1, v2);
|
||||
storeu128(res, vres);
|
||||
|
||||
for (int i=0; i<16; i++) {
|
||||
if(vec2[i] & 0x80){
|
||||
ASSERT_EQ(res[i], 0);
|
||||
}else{
|
||||
ASSERT_EQ(vec[vec2[i] % 16 ], res[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user