add some useful intrinsics

This commit is contained in:
Konstantinos Margaritis 2021-01-15 17:35:01 +02:00 committed by Konstantinos Margaritis
parent 1c581e45e9
commit 5b85589274
2 changed files with 32 additions and 6 deletions

View File

@ -202,6 +202,18 @@ static really_inline u64a extract64from128(const m128 in, unsigned imm) {
#endif
}
static really_inline m128 low64from128(const m128 in) {
return vcombine_u64(vget_low_u64(in), vdup_n_u64(0));
}
static really_inline m128 high64from128(const m128 in) {
return vcombine_u64(vget_high_u64(in), vdup_n_u64(0));
}
static really_inline m128 add128(m128 a, m128 b) {
return (m128) vaddq_u64((uint64x2_t)a, (uint64x2_t)b);
}
static really_inline m128 and128(m128 a, m128 b) {
return (m128) vandq_s8((int8x16_t)a, (int8x16_t)b);
}
@ -381,13 +393,13 @@ m128 sub_u8_m128(m128 a, m128 b) {
static really_inline
m128 set4x32(u32 x3, u32 x2, u32 x1, u32 x0) {
uint32_t __attribute__((aligned(16))) data[4] = { x0, x1, x2, x3 };
uint32_t ALIGN_ATTR(16) data[4] = { x0, x1, x2, x3 };
return (m128) vld1q_u32((uint32_t *) data);
}
static really_inline
m128 set2x64(u64a hi, u64a lo) {
uint64_t __attribute__((aligned(16))) data[2] = { lo, hi };
uint64_t ALIGN_ATTR(16) data[2] = { lo, hi };
return (m128) vld1q_u64((uint64_t *) data);
}

View File

@ -46,7 +46,7 @@
#ifdef DEBUG
static inline void print_m128_16x8(char *label, m128 vector) {
uint8_t __attribute__((aligned(16))) data[16];
uint8_t ALIGN_ATTR(16) data[16];
store128(data, vector);
DEBUG_PRINTF("%s: ", label);
for(int i=0; i < 16; i++)
@ -55,7 +55,7 @@ static inline void print_m128_16x8(char *label, m128 vector) {
}
static inline void print_m128_8x16(char *label, m128 vector) {
uint16_t __attribute__((aligned(16))) data[8];
uint16_t ALIGN_ATTR(16) data[8];
store128(data, vector);
DEBUG_PRINTF("%s: ", label);
for(int i=0; i < 8; i++)
@ -64,7 +64,7 @@ static inline void print_m128_8x16(char *label, m128 vector) {
}
static inline void print_m128_4x32(char *label, m128 vector) {
uint32_t __attribute__((aligned(16))) data[4];
uint32_t ALIGN_ATTR(16) data[4];
store128(data, vector);
DEBUG_PRINTF("%s: ", label);
for(int i=0; i < 4; i++)
@ -73,7 +73,7 @@ static inline void print_m128_4x32(char *label, m128 vector) {
}
static inline void print_m128_2x64(char *label, m128 vector) {
uint64_t __attribute__((aligned(16))) data[2];
uint64_t ALIGN_ATTR(16) data[2];
store128(data, vector);
DEBUG_PRINTF("%s: ", label);
for(int i=0; i < 2; i++)
@ -146,6 +146,13 @@ static really_inline m256 ones256(void) {
return rv;
}
static really_inline m256 add256(m256 a, m256 b) {
m256 rv;
rv.lo = add128(a.lo, b.lo);
rv.hi = add128(a.hi, b.hi);
return rv;
}
static really_inline m256 and256(m256 a, m256 b) {
m256 rv;
rv.lo = and128(a.lo, b.lo);
@ -585,6 +592,13 @@ m512 set1_4x128(m128 a) {
return rv;
}
static really_inline
m512 add512(m512 a, m512 b) {
m512 rv;
rv.lo = add256(a.lo, b.lo);
rv.hi = add256(a.hi, b.hi);
return rv;
}
static really_inline
m512 and512(m512 a, m512 b) {