mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-11-18 18:20:35 +03:00
Use SVE2 Bitperm's bdep instruction in bitutils and state_compress
Specifically for pdep64, expand32, and expand64 in bitutils, as well as all of the loadcompressed functions used in state_compress. Change-Id: I92851bd12481dbee6a7e344df0890c4901b56d01
This commit is contained in:
committed by
Konstantinos Margaritis
parent
7e5138b78f
commit
ace6cd15f2
@@ -109,7 +109,7 @@ m128 compress128_impl(m128 x, m128 m) {
|
||||
m128 mm = sub_2x64(zeroes128(), m);
|
||||
m128 xm = and128(x, m);
|
||||
xm = and128(xm, mm);
|
||||
|
||||
|
||||
m128 mask = not128(eq64_m128(xm, zeroes128()));
|
||||
res = or128(res, and128(bb, mask));
|
||||
m = and128(m, sub_2x64(m, one));
|
||||
@@ -120,12 +120,20 @@ m128 compress128_impl(m128 x, m128 m) {
|
||||
|
||||
static really_inline
|
||||
u32 expand32_impl(u32 x, u32 m) {
|
||||
#if defined(HAVE_SVE2_BITPERM)
|
||||
return svlasta(svpfalse(), svbdep(svdup_u32(x), m));
|
||||
#else
|
||||
return expand32_impl_c(x, m);
|
||||
#endif
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u64a expand64_impl(u64a x, u64a m) {
|
||||
#if defined(HAVE_SVE2_BITPERM)
|
||||
return svlasta(svpfalse(), svbdep(svdup_u64(x), m));
|
||||
#else
|
||||
return expand64_impl_c(x, m);
|
||||
#endif
|
||||
}
|
||||
|
||||
static really_inline
|
||||
@@ -194,11 +202,6 @@ u64a pext64_impl(u64a x, u64a mask) {
|
||||
return pext64_impl_c(x, mask);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u64a pdep64(u64a x, u64a mask) {
|
||||
return pdep64_impl_c(x, mask);
|
||||
}
|
||||
|
||||
/* compilers don't reliably synthesize the 32-bit ANDN instruction here,
|
||||
* so we force its generation.
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user