mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-09-30 03:34:25 +03:00
MCSHENG64: extend to 64-state based on mcsheng
This commit is contained in:
committed by
Konstantinos Margaritis
parent
dea7c4dc2e
commit
d96f1ab505
@@ -108,6 +108,12 @@ m128 lshift64_m128(m128 a, unsigned b) {
|
||||
#define eq128(a, b) _mm_cmpeq_epi8((a), (b))
|
||||
#define movemask128(a) ((u32)_mm_movemask_epi8((a)))
|
||||
|
||||
#if defined(HAVE_AVX512)
|
||||
static really_inline m128 cast512to128(const m512 in) {
|
||||
return _mm512_castsi512_si128(in);
|
||||
}
|
||||
#endif
|
||||
|
||||
static really_inline m128 set1_16x8(u8 c) {
|
||||
return _mm_set1_epi8(c);
|
||||
}
|
||||
@@ -165,6 +171,10 @@ m128 load_m128_from_u64a(const u64a *p) {
|
||||
|
||||
#endif // !AVX2
|
||||
|
||||
static really_inline m128 add128(m128 a, m128 b) {
|
||||
return _mm_add_epi64(a, b);
|
||||
}
|
||||
|
||||
static really_inline m128 and128(m128 a, m128 b) {
|
||||
return _mm_and_si128(a,b);
|
||||
}
|
||||
@@ -352,6 +362,10 @@ static really_inline m256 ones256(void) {
|
||||
return rv;
|
||||
}
|
||||
|
||||
static really_inline m256 add256(m256 a, m256 b) {
|
||||
return _mm256_add_epi64(a, b);
|
||||
}
|
||||
|
||||
static really_inline m256 and256(m256 a, m256 b) {
|
||||
return _mm256_and_si256(a, b);
|
||||
}
|
||||
@@ -562,6 +576,12 @@ static really_inline u32 movd512(const m512 in) {
|
||||
return _mm_cvtsi128_si32(_mm512_castsi512_si128(in));
|
||||
}
|
||||
|
||||
static really_inline u64a movq512(const m512 in) {
|
||||
// NOTE: seems AVX512 doesn't support _mm512_cvtsi512_si64(in),
|
||||
// so we use 2-step convertions to work around.
|
||||
return _mm_cvtsi128_si64(_mm512_castsi512_si128(in));
|
||||
}
|
||||
|
||||
static really_inline
|
||||
m512 pshufb_m512(m512 a, m512 b) {
|
||||
return _mm512_shuffle_epi8(a, b);
|
||||
@@ -606,6 +626,11 @@ m512 set1_8x64(u64a a) {
|
||||
return _mm512_set1_epi64(a);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
m512 set16x32(u32 a) {
|
||||
return _mm512_set1_epi32(a);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
m512 set8x64(u64a hi_3, u64a hi_2, u64a hi_1, u64a hi_0,
|
||||
u64a lo_3, u64a lo_2, u64a lo_1, u64a lo_0) {
|
||||
@@ -624,6 +649,31 @@ m512 set1_4x128(m128 a) {
|
||||
return _mm512_broadcast_i32x4(a);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
m512 sadd_u8_m512(m512 a, m512 b) {
|
||||
return _mm512_adds_epu8(a, b);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
m512 max_u8_m512(m512 a, m512 b) {
|
||||
return _mm512_max_epu8(a, b);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
m512 min_u8_m512(m512 a, m512 b) {
|
||||
return _mm512_min_epu8(a, b);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
m512 sub_u8_m512(m512 a, m512 b) {
|
||||
return _mm512_sub_epi8(a, b);
|
||||
}
|
||||
|
||||
static really_inline m512
|
||||
add512(m512 a, m512 b) {
|
||||
return _mm512_add_epu64(a, b);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
m512 and512(m512 a, m512 b) {
|
||||
return _mm512_and_si512(a, b);
|
||||
|
Reference in New Issue
Block a user