diff --git a/src/util/arch/arm/simd_utils.h b/src/util/arch/arm/simd_utils.h index a2f79774..052319f6 100644 --- a/src/util/arch/arm/simd_utils.h +++ b/src/util/arch/arm/simd_utils.h @@ -52,6 +52,24 @@ #include // for memcpy +#define ZEROES_8 0, 0, 0, 0, 0, 0, 0, 0 +#define ZEROES_31 ZEROES_8, ZEROES_8, ZEROES_8, 0, 0, 0, 0, 0, 0, 0 +#define ZEROES_32 ZEROES_8, ZEROES_8, ZEROES_8, ZEROES_8 + +/** \brief LUT for the mask1bit functions. */ +ALIGN_CL_DIRECTIVE static const u8 simd_onebit_masks[] = { + ZEROES_32, ZEROES_32, + ZEROES_31, 0x01, ZEROES_32, + ZEROES_31, 0x02, ZEROES_32, + ZEROES_31, 0x04, ZEROES_32, + ZEROES_31, 0x08, ZEROES_32, + ZEROES_31, 0x10, ZEROES_32, + ZEROES_31, 0x20, ZEROES_32, + ZEROES_31, 0x40, ZEROES_32, + ZEROES_31, 0x80, ZEROES_32, + ZEROES_32, ZEROES_32, +}; + static really_inline m128 ones128(void) { return (m128) vdupq_n_s8(0xFF); } @@ -343,14 +361,6 @@ m128 variable_byte_shift_m128(m128 in, s32 amount) { } } -#ifdef __cplusplus -extern "C" { -#endif -extern const u8 simd_onebit_masks[]; -#ifdef __cplusplus -} -#endif - static really_inline m128 mask1bit128(unsigned int n) { assert(n < sizeof(m128) * 8); diff --git a/src/util/arch/x86/simd_utils.h b/src/util/arch/x86/simd_utils.h index 24c1abe0..b36d5a38 100644 --- a/src/util/arch/x86/simd_utils.h +++ b/src/util/arch/x86/simd_utils.h @@ -58,6 +58,7 @@ ALIGN_CL_DIRECTIVE static const u8 simd_onebit_masks[] = { ZEROES_31, 0x80, ZEROES_32, ZEROES_32, ZEROES_32, }; + static really_inline m128 ones128(void) { #if defined(__GNUC__) || defined(__INTEL_COMPILER) /* gcc gets this right */