mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
add some useful intrinsics
This commit is contained in:
parent
6a11c83630
commit
ef9bf02d00
@ -202,6 +202,18 @@ static really_inline u64a extract64from128(const m128 in, unsigned imm) {
|
||||
#endif
|
||||
}
|
||||
|
||||
static really_inline m128 low64from128(const m128 in) {
|
||||
return vcombine_u64(vget_low_u64(in), vdup_n_u64(0));
|
||||
}
|
||||
|
||||
static really_inline m128 high64from128(const m128 in) {
|
||||
return vcombine_u64(vget_high_u64(in), vdup_n_u64(0));
|
||||
}
|
||||
|
||||
static really_inline m128 add128(m128 a, m128 b) {
|
||||
return (m128) vaddq_u64((uint64x2_t)a, (uint64x2_t)b);
|
||||
}
|
||||
|
||||
static really_inline m128 and128(m128 a, m128 b) {
|
||||
return (m128) vandq_s8((int8x16_t)a, (int8x16_t)b);
|
||||
}
|
||||
@ -381,13 +393,13 @@ m128 sub_u8_m128(m128 a, m128 b) {
|
||||
|
||||
static really_inline
|
||||
m128 set4x32(u32 x3, u32 x2, u32 x1, u32 x0) {
|
||||
uint32_t __attribute__((aligned(16))) data[4] = { x0, x1, x2, x3 };
|
||||
uint32_t ALIGN_ATTR(16) data[4] = { x0, x1, x2, x3 };
|
||||
return (m128) vld1q_u32((uint32_t *) data);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
m128 set2x64(u64a hi, u64a lo) {
|
||||
uint64_t __attribute__((aligned(16))) data[2] = { lo, hi };
|
||||
uint64_t ALIGN_ATTR(16) data[2] = { lo, hi };
|
||||
return (m128) vld1q_u64((uint64_t *) data);
|
||||
}
|
||||
|
||||
|
@ -46,7 +46,7 @@
|
||||
|
||||
#ifdef DEBUG
|
||||
static inline void print_m128_16x8(char *label, m128 vector) {
|
||||
uint8_t __attribute__((aligned(16))) data[16];
|
||||
uint8_t ALIGN_ATTR(16) data[16];
|
||||
store128(data, vector);
|
||||
DEBUG_PRINTF("%s: ", label);
|
||||
for(int i=0; i < 16; i++)
|
||||
@ -55,7 +55,7 @@ static inline void print_m128_16x8(char *label, m128 vector) {
|
||||
}
|
||||
|
||||
static inline void print_m128_8x16(char *label, m128 vector) {
|
||||
uint16_t __attribute__((aligned(16))) data[8];
|
||||
uint16_t ALIGN_ATTR(16) data[8];
|
||||
store128(data, vector);
|
||||
DEBUG_PRINTF("%s: ", label);
|
||||
for(int i=0; i < 8; i++)
|
||||
@ -64,7 +64,7 @@ static inline void print_m128_8x16(char *label, m128 vector) {
|
||||
}
|
||||
|
||||
static inline void print_m128_4x32(char *label, m128 vector) {
|
||||
uint32_t __attribute__((aligned(16))) data[4];
|
||||
uint32_t ALIGN_ATTR(16) data[4];
|
||||
store128(data, vector);
|
||||
DEBUG_PRINTF("%s: ", label);
|
||||
for(int i=0; i < 4; i++)
|
||||
@ -73,7 +73,7 @@ static inline void print_m128_4x32(char *label, m128 vector) {
|
||||
}
|
||||
|
||||
static inline void print_m128_2x64(char *label, m128 vector) {
|
||||
uint64_t __attribute__((aligned(16))) data[2];
|
||||
uint64_t ALIGN_ATTR(16) data[2];
|
||||
store128(data, vector);
|
||||
DEBUG_PRINTF("%s: ", label);
|
||||
for(int i=0; i < 2; i++)
|
||||
@ -146,6 +146,13 @@ static really_inline m256 ones256(void) {
|
||||
return rv;
|
||||
}
|
||||
|
||||
static really_inline m256 add256(m256 a, m256 b) {
|
||||
m256 rv;
|
||||
rv.lo = add128(a.lo, b.lo);
|
||||
rv.hi = add128(a.hi, b.hi);
|
||||
return rv;
|
||||
}
|
||||
|
||||
static really_inline m256 and256(m256 a, m256 b) {
|
||||
m256 rv;
|
||||
rv.lo = and128(a.lo, b.lo);
|
||||
@ -585,6 +592,13 @@ m512 set1_4x128(m128 a) {
|
||||
return rv;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
m512 add512(m512 a, m512 b) {
|
||||
m512 rv;
|
||||
rv.lo = add256(a.lo, b.lo);
|
||||
rv.hi = add256(a.hi, b.hi);
|
||||
return rv;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
m512 and512(m512 a, m512 b) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user