move andn helper function to bitutils.h

This commit is contained in:
Konstantinos Margaritis 2020-09-22 12:17:27 +03:00
parent 6581aae90e
commit 9f3ad89ed6
4 changed files with 32 additions and 14 deletions

View File

@ -36,6 +36,7 @@
#include "teddy.h"
#include "teddy_internal.h"
#include "util/arch.h"
#include "util/bitutils.h"
#include "util/simd_utils.h"
#include "util/uniform_ops.h"
@ -119,20 +120,6 @@ const ALIGN_CL_DIRECTIVE u8 zone_or_mask[ITER_BYTES+1][ITER_BYTES] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }
};
/* compilers don't reliably synthesize the 32-bit ANDN instruction here,
* so we force its generation.
*/
static really_inline
u64a andn(const u32 a, const u8 *b) {
u64a r;
#if defined(HAVE_BMI) && !defined(NO_ASM)
__asm__ ("andn\t%2,%1,%k0" : "=r"(r) : "r"(a), "m"(*(const u32 *)b));
#else
r = unaligned_load_u32(b) & ~a;
#endif
return r;
}
/* generates an initial state mask based on the last byte-ish of history rather
* than being all accepting. If there is no history to consider, the state is
* generated based on the minimum length of each bucket in order to prevent

View File

@ -34,6 +34,7 @@
#define BITUTILS_ARCH_COMMON_H
#include "util/popcount.h"
#include "util/unaligned.h"
static really_inline
u32 clz32_impl_c(u32 x) {
@ -350,4 +351,12 @@ u64a pext64_impl_c(u64a x, u64a mask) {
return result;
}
/* compilers don't reliably synthesize the 32-bit ANDN instruction here,
* so we force its generation.
*/
static really_inline
u64a andn_impl_c(const u32 a, const u8 *b) {
return unaligned_load_u32(b) & ~a;
}
#endif // BITUTILS_ARCH_COMMON_H

View File

@ -301,4 +301,18 @@ u64a pdep64(u64a x, u64a mask) {
}
#endif
/* compilers don't reliably synthesize the 32-bit ANDN instruction here,
* so we force its generation.
*/
static really_inline
u64a andn_impl(const u32 a, const u8 *b) {
#if defined(HAVE_BMI) && !defined(NO_ASM)
u64a r;
__asm__ ("andn\t%2,%1,%k0" : "=r"(r) : "r"(a), "m"(*(const u32 *)b));
return r;
#else
return andn_impl_c(a, b);
#endif
}
#endif // BITUTILS_ARCH_X86_H

View File

@ -167,4 +167,12 @@ u64a pext64(u64a x, u64a mask) {
return pext64_impl(x, mask);
}
/* compilers don't reliably synthesize the 32-bit ANDN instruction here,
* so we force its generation.
*/
static really_inline
u64a andn(const u32 a, const u8 *b) {
return andn_impl_c(a, b);
}
#endif // BITUTILS_H