mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
SuperVector shuffle implementation and test function optimized
This commit is contained in:
parent
57301721f1
commit
d43d6733b6
@ -45,10 +45,7 @@
|
|||||||
static really_inline
|
static really_inline
|
||||||
u32 packedExtract128(m128 s, const m128 permute, const m128 compare) {
|
u32 packedExtract128(m128 s, const m128 permute, const m128 compare) {
|
||||||
m128 shuffled = pshufb_m128(s, permute);
|
m128 shuffled = pshufb_m128(s, permute);
|
||||||
//int8x16_t res = (int8x16_t) pshufb_m128(s, permute);
|
print_m128_16x8("shufled", shuffled);
|
||||||
//printf("shufled:");
|
|
||||||
//for(int i=15; i>=0; i--) {printf("%02x ", res[i]);}
|
|
||||||
//printf("\n");
|
|
||||||
m128 compared = and128(shuffled, compare);
|
m128 compared = and128(shuffled, compare);
|
||||||
u16 rv = ~movemask128(eq128(compared, shuffled));
|
u16 rv = ~movemask128(eq128(compared, shuffled));
|
||||||
return (u32)rv;
|
return (u32)rv;
|
||||||
|
@ -463,14 +463,6 @@ char testbit128(m128 val, unsigned int n) {
|
|||||||
static really_inline
|
static really_inline
|
||||||
m128 pshufb_m128(m128 a, m128 b) {
|
m128 pshufb_m128(m128 a, m128 b) {
|
||||||
return (m128) vec_perm((uint8x16_t)a, (uint8x16_t)a, (uint8x16_t)b);
|
return (m128) vec_perm((uint8x16_t)a, (uint8x16_t)a, (uint8x16_t)b);
|
||||||
//return (m128) vec_perm((int8x16_t)vec_splat_s8(0), (int8x16_t)a, (uint8x16_t)b);;
|
|
||||||
//uint8x16_t btransparent = vec_and((uint8x16_t)b, (uint8x16_t)vec_splats(0x8f));
|
|
||||||
//return (m128) vec_perm(a, a, btransparent);
|
|
||||||
//return (m128) vec_perm((int8x16_t)vec_splat_s8(0), (int8x16_t)b, (uint8x16_t)a);
|
|
||||||
|
|
||||||
//return (m128) vec_perm((int8x16_t)a, (int8x16_t)b, (uint8x16_t)vec_splat_s8(0));
|
|
||||||
//return (m128) vec_perm((int8x16_t)b, (int8x16_t)a, (uint8x16_t)vec_splat_s8(0));
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
|
@ -603,7 +603,7 @@ template<>
|
|||||||
template<>
|
template<>
|
||||||
really_inline SuperVector<16> SuperVector<16>::pshufb<false>(SuperVector<16> b)
|
really_inline SuperVector<16> SuperVector<16>::pshufb<false>(SuperVector<16> b)
|
||||||
{
|
{
|
||||||
return (m128) vec_permxor((int8x16_t)vec_splat_s8(0), (int8x16_t)u.v128[0], (int8x16_t) b.u.v128[0]);
|
return (m128) vec_perm((uint8x16_t)u.v128[0], (uint8x16_t)u.v128[0], (uint8x16_t)b.u.v128[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
|
@ -849,7 +849,7 @@ TEST(SimdUtilsTest, pshufb_m128) {
|
|||||||
}
|
}
|
||||||
u8 vec2[16];
|
u8 vec2[16];
|
||||||
for (int i=0; i<16; i++) {
|
for (int i=0; i<16; i++) {
|
||||||
vec2[i]=i;
|
vec2[i]=i + (rand() % 16 + 0);
|
||||||
}
|
}
|
||||||
m128 v1 = loadu128(vec);
|
m128 v1 = loadu128(vec);
|
||||||
m128 v2 = loadu128(vec2);
|
m128 v2 = loadu128(vec2);
|
||||||
@ -857,7 +857,7 @@ TEST(SimdUtilsTest, pshufb_m128) {
|
|||||||
u8 res[16];
|
u8 res[16];
|
||||||
store128(res, vres);
|
store128(res, vres);
|
||||||
for (int i=0; i<16; i++) {
|
for (int i=0; i<16; i++) {
|
||||||
ASSERT_EQ(vec[vec2[i]], res[i]);
|
ASSERT_EQ(vec[vec2[i] % 16 ], res[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -280,13 +280,13 @@ TEST(SuperVectorUtilsTest,pshufb128c) {
|
|||||||
}
|
}
|
||||||
u8 vec2[16];
|
u8 vec2[16];
|
||||||
for (int i=0; i<16; i++) {
|
for (int i=0; i<16; i++) {
|
||||||
vec2[i]=i;
|
vec2[i]=i + (rand() % 15 + 0);
|
||||||
}
|
}
|
||||||
auto SP1 = SuperVector<16>::loadu(vec);
|
auto SP1 = SuperVector<16>::loadu(vec);
|
||||||
auto SP2 = SuperVector<16>::loadu(vec2);
|
auto SP2 = SuperVector<16>::loadu(vec2);
|
||||||
auto SResult = SP1.template pshufb<true>(SP2);
|
auto SResult = SP1.template pshufb<true>(SP2);
|
||||||
for (int i=0; i<16; i++) {
|
for (int i=0; i<16; i++) {
|
||||||
ASSERT_EQ(vec[vec2[i]],SResult.u.u8[i]);
|
ASSERT_EQ(vec[vec2[i] % 16 ],SResult.u.u8[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user