From 9f3ad89ed63dc56f8fe84b88a5ed81a7c5c6b11b Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Tue, 22 Sep 2020 12:17:27 +0300 Subject: [PATCH] move andn helper function to bitutils.h --- src/fdr/fdr.c | 15 +-------------- src/util/arch/common/bitutils.h | 9 +++++++++ src/util/arch/x86/bitutils.h | 14 ++++++++++++++ src/util/bitutils.h | 8 ++++++++ 4 files changed, 32 insertions(+), 14 deletions(-) diff --git a/src/fdr/fdr.c b/src/fdr/fdr.c index d33756d3..b0f90b52 100644 --- a/src/fdr/fdr.c +++ b/src/fdr/fdr.c @@ -36,6 +36,7 @@ #include "teddy.h" #include "teddy_internal.h" #include "util/arch.h" +#include "util/bitutils.h" #include "util/simd_utils.h" #include "util/uniform_ops.h" @@ -119,20 +120,6 @@ const ALIGN_CL_DIRECTIVE u8 zone_or_mask[ITER_BYTES+1][ITER_BYTES] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }; -/* compilers don't reliably synthesize the 32-bit ANDN instruction here, - * so we force its generation. - */ -static really_inline -u64a andn(const u32 a, const u8 *b) { - u64a r; -#if defined(HAVE_BMI) && !defined(NO_ASM) - __asm__ ("andn\t%2,%1,%k0" : "=r"(r) : "r"(a), "m"(*(const u32 *)b)); -#else - r = unaligned_load_u32(b) & ~a; -#endif - return r; -} - /* generates an initial state mask based on the last byte-ish of history rather * than being all accepting. If there is no history to consider, the state is * generated based on the minimum length of each bucket in order to prevent diff --git a/src/util/arch/common/bitutils.h b/src/util/arch/common/bitutils.h index 85d5dc49..f2706d70 100644 --- a/src/util/arch/common/bitutils.h +++ b/src/util/arch/common/bitutils.h @@ -34,6 +34,7 @@ #define BITUTILS_ARCH_COMMON_H #include "util/popcount.h" +#include "util/unaligned.h" static really_inline u32 clz32_impl_c(u32 x) { @@ -350,4 +351,12 @@ u64a pext64_impl_c(u64a x, u64a mask) { return result; } +/* compilers don't reliably synthesize the 32-bit ANDN instruction here, + * so we force its generation. + */ +static really_inline +u64a andn_impl_c(const u32 a, const u8 *b) { + return unaligned_load_u32(b) & ~a; +} + #endif // BITUTILS_ARCH_COMMON_H diff --git a/src/util/arch/x86/bitutils.h b/src/util/arch/x86/bitutils.h index da7c747e..ec4c95ad 100644 --- a/src/util/arch/x86/bitutils.h +++ b/src/util/arch/x86/bitutils.h @@ -301,4 +301,18 @@ u64a pdep64(u64a x, u64a mask) { } #endif +/* compilers don't reliably synthesize the 32-bit ANDN instruction here, + * so we force its generation. + */ +static really_inline +u64a andn_impl(const u32 a, const u8 *b) { +#if defined(HAVE_BMI) && !defined(NO_ASM) + u64a r; + __asm__ ("andn\t%2,%1,%k0" : "=r"(r) : "r"(a), "m"(*(const u32 *)b)); + return r; +#else + return andn_impl_c(a, b); +#endif +} + #endif // BITUTILS_ARCH_X86_H diff --git a/src/util/bitutils.h b/src/util/bitutils.h index 651e5f93..b9f312cb 100644 --- a/src/util/bitutils.h +++ b/src/util/bitutils.h @@ -167,4 +167,12 @@ u64a pext64(u64a x, u64a mask) { return pext64_impl(x, mask); } +/* compilers don't reliably synthesize the 32-bit ANDN instruction here, + * so we force its generation. + */ +static really_inline +u64a andn(const u32 a, const u8 *b) { + return andn_impl_c(a, b); +} + #endif // BITUTILS_H