mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
SuperVector operators fixes and simd_utils low/high64 functions implementations added
This commit is contained in:
parent
e084c2d6e4
commit
558313a2c2
@ -49,8 +49,8 @@
|
|||||||
static inline void print_m128_16x8(const char *label, m128 vector) {
|
static inline void print_m128_16x8(const char *label, m128 vector) {
|
||||||
uint8_t ALIGN_ATTR(16) data[16];
|
uint8_t ALIGN_ATTR(16) data[16];
|
||||||
store128(data, vector);
|
store128(data, vector);
|
||||||
DEBUG_PRINTF("%s: ", label);
|
DEBUG_PRINTF("%12s: ", label);
|
||||||
for(int i=0; i < 16; i++)
|
for(int i=15; i >=0; i--)
|
||||||
printf("%02x ", data[i]);
|
printf("%02x ", data[i]);
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
@ -58,8 +58,8 @@ static inline void print_m128_16x8(const char *label, m128 vector) {
|
|||||||
static inline void print_m128_8x16(const char *label, m128 vector) {
|
static inline void print_m128_8x16(const char *label, m128 vector) {
|
||||||
uint16_t ALIGN_ATTR(16) data[8];
|
uint16_t ALIGN_ATTR(16) data[8];
|
||||||
store128(data, vector);
|
store128(data, vector);
|
||||||
DEBUG_PRINTF("%s: ", label);
|
DEBUG_PRINTF("%12s: ", label);
|
||||||
for(int i=0; i < 8; i++)
|
for(int i=7; i >= 0; i--)
|
||||||
printf("%04x ", data[i]);
|
printf("%04x ", data[i]);
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
@ -67,8 +67,8 @@ static inline void print_m128_8x16(const char *label, m128 vector) {
|
|||||||
static inline void print_m128_4x32(const char *label, m128 vector) {
|
static inline void print_m128_4x32(const char *label, m128 vector) {
|
||||||
uint32_t ALIGN_ATTR(16) data[4];
|
uint32_t ALIGN_ATTR(16) data[4];
|
||||||
store128(data, vector);
|
store128(data, vector);
|
||||||
DEBUG_PRINTF("%s: ", label);
|
DEBUG_PRINTF("%12s: ", label);
|
||||||
for(int i=0; i < 4; i++)
|
for(int i=3; i >= 0; i--)
|
||||||
printf("%08x ", data[i]);
|
printf("%08x ", data[i]);
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
@ -76,8 +76,8 @@ static inline void print_m128_4x32(const char *label, m128 vector) {
|
|||||||
static inline void print_m128_2x64(const char *label, m128 vector) {
|
static inline void print_m128_2x64(const char *label, m128 vector) {
|
||||||
uint64_t ALIGN_ATTR(16) data[2];
|
uint64_t ALIGN_ATTR(16) data[2];
|
||||||
store128(data, vector);
|
store128(data, vector);
|
||||||
DEBUG_PRINTF("%s: ", label);
|
DEBUG_PRINTF("%12s: ", label);
|
||||||
for(int i=0; i < 2; i++)
|
for(int i=1; i >= 0; i--)
|
||||||
printf("%016lx ", data[i]);
|
printf("%016lx ", data[i]);
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
|
@ -270,7 +270,7 @@ switch (imm) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static really_inline u64a extract64from128(const m128 in, unsigned UNUSED imm) {
|
static really_inline u64a extract64from128(const m128 in, unsigned imm) {
|
||||||
u64a ALIGN_ATTR(16) a[2];
|
u64a ALIGN_ATTR(16) a[2];
|
||||||
vec_xst((uint64x2_t) in, 0, a);
|
vec_xst((uint64x2_t) in, 0, a);
|
||||||
switch (imm) {
|
switch (imm) {
|
||||||
@ -285,19 +285,11 @@ switch (imm) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static really_inline m128 low64from128(const m128 in) {
|
static really_inline m128 low64from128(const m128 in) {
|
||||||
//u64a ALIGN_ATTR(16) a[2];
|
return (m128) vec_perm((int64x2_t)in, (int64x2_t)vec_splats((uint64_t)0), (uint8x16_t)vec_splat_u8(1));
|
||||||
//vec_xst((uint64x2_t) in, 0, a);
|
|
||||||
//return a[1];
|
|
||||||
// #warning FIXME
|
|
||||||
return vec_add(in, in);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static really_inline m128 high64from128(const m128 in) {
|
static really_inline m128 high64from128(const m128 in) {
|
||||||
//u64a ALIGN_ATTR(16) a[2];
|
return (m128) vec_perm((int64x2_t)in, (int64x2_t)vec_splats((uint64_t)0), (uint8x16_t)vec_splat_u8(0));
|
||||||
//vec_xst((uint64x2_t) in, 0, a);
|
|
||||||
//return a[0];
|
|
||||||
// #warning FIXME
|
|
||||||
return vec_add(in, in);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -186,29 +186,25 @@ really_inline SuperVector<16> SuperVector<16>::operator!=(SuperVector<16> const
|
|||||||
template <>
|
template <>
|
||||||
really_inline SuperVector<16> SuperVector<16>::operator>(SuperVector<16> const &b) const
|
really_inline SuperVector<16> SuperVector<16>::operator>(SuperVector<16> const &b) const
|
||||||
{
|
{
|
||||||
int32x4_t v = {u.s32[0] > b.u.s32[0], u.s32[1] > b.u.s32[1], u.s32[2] > b.u.s32[2], u.s32[3] > b.u.s32[3]};
|
return {(m128) vec_cmpgt(u.v128[0], b.u.v128[0])};
|
||||||
return (m128) v;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
really_inline SuperVector<16> SuperVector<16>::operator>=(SuperVector<16> const &b) const
|
really_inline SuperVector<16> SuperVector<16>::operator>=(SuperVector<16> const &b) const
|
||||||
{
|
{
|
||||||
int32x4_t v = {u.s32[0] >= b.u.s32[0], u.s32[1] >= b.u.s32[1], u.s32[2] >= b.u.s32[2], u.s32[3] >= b.u.s32[3]};
|
return {(m128) vec_cmpge(u.v128[0], b.u.v128[0])};
|
||||||
return (m128) v;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
really_inline SuperVector<16> SuperVector<16>::operator<(SuperVector<16> const &b) const
|
really_inline SuperVector<16> SuperVector<16>::operator<(SuperVector<16> const &b) const
|
||||||
{
|
{
|
||||||
int32x4_t v = {u.s32[0] < b.u.s32[0], u.s32[1] < b.u.s32[1], u.s32[2] < b.u.s32[2], u.s32[3] < b.u.s32[3]};
|
return {(m128) vec_cmpgt(b.u.v128[0], u.v128[0])};
|
||||||
return (m128) v;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
really_inline SuperVector<16> SuperVector<16>::operator<=(SuperVector<16> const &b) const
|
really_inline SuperVector<16> SuperVector<16>::operator<=(SuperVector<16> const &b) const
|
||||||
{
|
{
|
||||||
int32x4_t v = {u.s32[0] <= b.u.s32[0], u.s32[1] <= b.u.s32[1], u.s32[2] <= b.u.s32[2], u.s32[3] <= b.u.s32[3]};
|
return {(m128) vec_cmpge(b.u.v128[0], u.v128[0])};
|
||||||
return (m128) v;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -222,9 +218,21 @@ template <>
|
|||||||
really_inline typename SuperVector<16>::movemask_type SuperVector<16>::movemask(void)const
|
really_inline typename SuperVector<16>::movemask_type SuperVector<16>::movemask(void)const
|
||||||
{
|
{
|
||||||
uint8x16_t s1 = vec_sr((uint8x16_t)u.v128[0], vec_splat_u8(7));
|
uint8x16_t s1 = vec_sr((uint8x16_t)u.v128[0], vec_splat_u8(7));
|
||||||
|
//printf("s1:");
|
||||||
|
//for(int i=15; i>=0; i--) {printf("%02x, ",s1[i]);}
|
||||||
|
//printf("\n");
|
||||||
uint16x8_t ss = vec_sr((uint16x8_t)s1, vec_splat_u16(7));
|
uint16x8_t ss = vec_sr((uint16x8_t)s1, vec_splat_u16(7));
|
||||||
|
//printf("ss:");
|
||||||
|
//for(int i=7; i>=0; i--) {printf("%04x, ",ss[i]);}
|
||||||
|
//printf("\n");
|
||||||
uint16x8_t res_and = vec_and((uint16x8_t)s1, vec_splats((uint16_t)0xff));
|
uint16x8_t res_and = vec_and((uint16x8_t)s1, vec_splats((uint16_t)0xff));
|
||||||
|
//printf("res_and:");
|
||||||
|
//for(int i=7; i>=0; i--) {printf("%04x, ",res_and[i]);}
|
||||||
|
//printf("\n");
|
||||||
uint16x8_t s2 = vec_or((uint16x8_t)ss, res_and);
|
uint16x8_t s2 = vec_or((uint16x8_t)ss, res_and);
|
||||||
|
//printf("s2:");
|
||||||
|
//for(int i=7; i>=0; i--) {printf("%04x, ",s2[i]);}
|
||||||
|
//printf("\n");
|
||||||
|
|
||||||
uint32x4_t ss2 = vec_sr((uint32x4_t)s2 , vec_splat_u32(14));
|
uint32x4_t ss2 = vec_sr((uint32x4_t)s2 , vec_splat_u32(14));
|
||||||
uint32x4_t res_and2 = vec_and((uint32x4_t)s2, vec_splats((uint32_t)0xff));
|
uint32x4_t res_and2 = vec_and((uint32x4_t)s2, vec_splats((uint32_t)0xff));
|
||||||
@ -238,6 +246,9 @@ really_inline typename SuperVector<16>::movemask_type SuperVector<16>::movemask(
|
|||||||
uint64x2_t res_and4 = vec_and((uint64x2_t)s4, vec_splats((uint64_t)0xff));
|
uint64x2_t res_and4 = vec_and((uint64x2_t)s4, vec_splats((uint64_t)0xff));
|
||||||
uint64x2_t s5 = vec_or((uint64x2_t)ss4, res_and4);
|
uint64x2_t s5 = vec_or((uint64x2_t)ss4, res_and4);
|
||||||
|
|
||||||
|
//printf("s5:");
|
||||||
|
//for(int i=1; i>=0; i--) {printf("%016llx, ",s5[i]);}
|
||||||
|
//printf("\n");
|
||||||
return s5[0];
|
return s5[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user