mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
move andn helper function to bitutils.h
This commit is contained in:
parent
6581aae90e
commit
9f3ad89ed6
@ -36,6 +36,7 @@
|
||||
#include "teddy.h"
|
||||
#include "teddy_internal.h"
|
||||
#include "util/arch.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/simd_utils.h"
|
||||
#include "util/uniform_ops.h"
|
||||
|
||||
@ -119,20 +120,6 @@ const ALIGN_CL_DIRECTIVE u8 zone_or_mask[ITER_BYTES+1][ITER_BYTES] = {
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }
|
||||
};
|
||||
|
||||
/* compilers don't reliably synthesize the 32-bit ANDN instruction here,
|
||||
* so we force its generation.
|
||||
*/
|
||||
static really_inline
|
||||
u64a andn(const u32 a, const u8 *b) {
|
||||
u64a r;
|
||||
#if defined(HAVE_BMI) && !defined(NO_ASM)
|
||||
__asm__ ("andn\t%2,%1,%k0" : "=r"(r) : "r"(a), "m"(*(const u32 *)b));
|
||||
#else
|
||||
r = unaligned_load_u32(b) & ~a;
|
||||
#endif
|
||||
return r;
|
||||
}
|
||||
|
||||
/* generates an initial state mask based on the last byte-ish of history rather
|
||||
* than being all accepting. If there is no history to consider, the state is
|
||||
* generated based on the minimum length of each bucket in order to prevent
|
||||
|
@ -34,6 +34,7 @@
|
||||
#define BITUTILS_ARCH_COMMON_H
|
||||
|
||||
#include "util/popcount.h"
|
||||
#include "util/unaligned.h"
|
||||
|
||||
static really_inline
|
||||
u32 clz32_impl_c(u32 x) {
|
||||
@ -350,4 +351,12 @@ u64a pext64_impl_c(u64a x, u64a mask) {
|
||||
return result;
|
||||
}
|
||||
|
||||
/* compilers don't reliably synthesize the 32-bit ANDN instruction here,
|
||||
* so we force its generation.
|
||||
*/
|
||||
static really_inline
|
||||
u64a andn_impl_c(const u32 a, const u8 *b) {
|
||||
return unaligned_load_u32(b) & ~a;
|
||||
}
|
||||
|
||||
#endif // BITUTILS_ARCH_COMMON_H
|
||||
|
@ -301,4 +301,18 @@ u64a pdep64(u64a x, u64a mask) {
|
||||
}
|
||||
#endif
|
||||
|
||||
/* compilers don't reliably synthesize the 32-bit ANDN instruction here,
|
||||
* so we force its generation.
|
||||
*/
|
||||
static really_inline
|
||||
u64a andn_impl(const u32 a, const u8 *b) {
|
||||
#if defined(HAVE_BMI) && !defined(NO_ASM)
|
||||
u64a r;
|
||||
__asm__ ("andn\t%2,%1,%k0" : "=r"(r) : "r"(a), "m"(*(const u32 *)b));
|
||||
return r;
|
||||
#else
|
||||
return andn_impl_c(a, b);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif // BITUTILS_ARCH_X86_H
|
||||
|
@ -167,4 +167,12 @@ u64a pext64(u64a x, u64a mask) {
|
||||
return pext64_impl(x, mask);
|
||||
}
|
||||
|
||||
/* compilers don't reliably synthesize the 32-bit ANDN instruction here,
|
||||
* so we force its generation.
|
||||
*/
|
||||
static really_inline
|
||||
u64a andn(const u32 a, const u8 *b) {
|
||||
return andn_impl_c(a, b);
|
||||
}
|
||||
|
||||
#endif // BITUTILS_H
|
||||
|
Loading…
x
Reference in New Issue
Block a user