mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
add extra instructions (currently arm-only), fix order of elements in set4x32/set2x64
This commit is contained in:
parent
18296eee47
commit
7b8cf97546
@ -83,6 +83,26 @@ static really_inline u32 diffrich64_128(m128 a, m128 b) {
|
|||||||
return vaddvq_u64(vandq_u64(vmvnq_s32(vceqq_s64((int64x2_t)a, (int64x2_t)b)), movemask));
|
return vaddvq_u64(vandq_u64(vmvnq_s32(vceqq_s64((int64x2_t)a, (int64x2_t)b)), movemask));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static really_really_inline
|
||||||
|
m128 add_2x64(m128 a, m128 b) {
|
||||||
|
return (m128) vaddq_u64((int64x2_t)a, (int64x2_t)b);
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_really_inline
|
||||||
|
m128 sub_2x64(m128 a, m128 b) {
|
||||||
|
return (m128) vsubq_u64((int64x2_t)a, (int64x2_t)b);
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_really_inline
|
||||||
|
m128 lshift_m128(m128 a, unsigned b) {
|
||||||
|
return (m128) vshlq_n_s32((int64x2_t)a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_really_inline
|
||||||
|
m128 rshift_m128(m128 a, unsigned b) {
|
||||||
|
return (m128) vshrq_n_s32((int64x2_t)a, b);
|
||||||
|
}
|
||||||
|
|
||||||
static really_really_inline
|
static really_really_inline
|
||||||
m128 lshift64_m128(m128 a, unsigned b) {
|
m128 lshift64_m128(m128 a, unsigned b) {
|
||||||
return (m128) vshlq_n_s64((int64x2_t)a, b);
|
return (m128) vshlq_n_s64((int64x2_t)a, b);
|
||||||
@ -97,6 +117,10 @@ static really_inline m128 eq128(m128 a, m128 b) {
|
|||||||
return (m128) vceqq_s8((int8x16_t)a, (int8x16_t)b);
|
return (m128) vceqq_s8((int8x16_t)a, (int8x16_t)b);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static really_inline m128 eq64_m128(m128 a, m128 b) {
|
||||||
|
return (m128) vceqq_u64((int64x2_t)a, (int64x2_t)b);
|
||||||
|
}
|
||||||
|
|
||||||
static really_inline u32 movemask128(m128 a) {
|
static really_inline u32 movemask128(m128 a) {
|
||||||
static const uint8x16_t powers = { 1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128 };
|
static const uint8x16_t powers = { 1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128 };
|
||||||
|
|
||||||
@ -290,13 +314,13 @@ m128 sub_u8_m128(m128 a, m128 b) {
|
|||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
m128 set4x32(u32 x3, u32 x2, u32 x1, u32 x0) {
|
m128 set4x32(u32 x3, u32 x2, u32 x1, u32 x0) {
|
||||||
uint32_t __attribute__((aligned(16))) data[4] = { x3, x2, x1, x0 };
|
uint32_t __attribute__((aligned(16))) data[4] = { x0, x1, x2, x3 };
|
||||||
return (m128) vld1q_u32((uint32_t *) data);
|
return (m128) vld1q_u32((uint32_t *) data);
|
||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
m128 set2x64(u64a hi, u64a lo) {
|
m128 set2x64(u64a hi, u64a lo) {
|
||||||
uint64_t __attribute__((aligned(16))) data[2] = { hi, lo };
|
uint64_t __attribute__((aligned(16))) data[2] = { lo, hi };
|
||||||
return (m128) vld1q_u64((uint64_t *) data);
|
return (m128) vld1q_u64((uint64_t *) data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user