diff --git a/src/util/arch/common/simd_utils.h b/src/util/arch/common/simd_utils.h index 2f2dcf7c..90ae80b0 100644 --- a/src/util/arch/common/simd_utils.h +++ b/src/util/arch/common/simd_utils.h @@ -88,6 +88,7 @@ static inline void print_m128_2x64(const char *label, m128 vec) { #define print_m128_2x64(label, vec) ; #endif +#if !defined(ARCH_IA32) && !defined(ARCH_X86_64) #define ZEROES_8 0, 0, 0, 0, 0, 0, 0, 0 #define ZEROES_31 ZEROES_8, ZEROES_8, ZEROES_8, 0, 0, 0, 0, 0, 0, 0 #define ZEROES_32 ZEROES_8, ZEROES_8, ZEROES_8, ZEROES_8 @@ -105,6 +106,7 @@ ALIGN_CL_DIRECTIVE static const u8 simd_onebit_masks[] = { ZEROES_31, 0x80, ZEROES_32, ZEROES_32, ZEROES_32, }; +#endif // !defined(ARCH_IA32) && !defined(ARCH_X86_64) /**** **** 256-bit Primitives diff --git a/src/util/arch/x86/simd_utils.h b/src/util/arch/x86/simd_utils.h index d432251f..f732e3b8 100644 --- a/src/util/arch/x86/simd_utils.h +++ b/src/util/arch/x86/simd_utils.h @@ -42,6 +42,24 @@ #include // for memcpy +#define ZEROES_8 0, 0, 0, 0, 0, 0, 0, 0 +#define ZEROES_31 ZEROES_8, ZEROES_8, ZEROES_8, 0, 0, 0, 0, 0, 0, 0 +#define ZEROES_32 ZEROES_8, ZEROES_8, ZEROES_8, ZEROES_8 + +/** \brief LUT for the mask1bit functions. */ +ALIGN_CL_DIRECTIVE static const u8 simd_onebit_masks[] = { + ZEROES_32, ZEROES_32, + ZEROES_31, 0x01, ZEROES_32, + ZEROES_31, 0x02, ZEROES_32, + ZEROES_31, 0x04, ZEROES_32, + ZEROES_31, 0x08, ZEROES_32, + ZEROES_31, 0x10, ZEROES_32, + ZEROES_31, 0x20, ZEROES_32, + ZEROES_31, 0x40, ZEROES_32, + ZEROES_31, 0x80, ZEROES_32, + ZEROES_32, ZEROES_32, +}; + static really_inline m128 ones128(void) { #if defined(__GNUC__) || defined(__INTEL_COMPILER) /* gcc gets this right */ @@ -237,14 +255,6 @@ m128 loadbytes128(const void *ptr, unsigned int n) { memcpy(&a, ptr, n); return a; } -/* -#ifdef __cplusplus -extern "C" { -#endif -extern const u8 simd_onebit_masks[]; -#ifdef __cplusplus -} -#endif*/ static really_inline m128 mask1bit128(unsigned int n) {