remove Windows/ICC support

This commit is contained in:
Konstantinos Margaritis
2021-07-30 12:49:38 +03:00
committed by Konstantinos Margaritis
parent 8cff876962
commit 08357a096c
41 changed files with 94 additions and 892 deletions

View File

@@ -61,20 +61,12 @@ namespace ue2 {
void *aligned_malloc_internal(size_t size, size_t align) {
void *mem;
#if !defined(_WIN32)
int rv = posix_memalign(&mem, align, size);
if (rv != 0) {
DEBUG_PRINTF("posix_memalign returned %d when asked for %zu bytes\n",
rv, size);
return nullptr;
}
#else
if (nullptr == (mem = _aligned_malloc(size, align))) {
DEBUG_PRINTF("_aligned_malloc failed when asked for %zu bytes\n",
size);
return nullptr;
}
#endif
assert(mem);
return mem;
@@ -85,11 +77,7 @@ void aligned_free_internal(void *ptr) {
return;
}
#if defined(_WIN32)
_aligned_free(ptr);
#else
free(ptr);
#endif
}
/** \brief 64-byte aligned, zeroed malloc.

View File

@@ -31,7 +31,7 @@
#include "ue2common.h"
#if (defined(ARCH_IA32) || defined(ARCH_X86_64)) && !defined(_WIN32) && !defined(CPUID_H_)
#if (defined(ARCH_IA32) || defined(ARCH_X86_64)) && !defined(CPUID_H_)
#include <cpuid.h>
/* system header doesn't have a header guard */
#define CPUID_H_

View File

@@ -42,64 +42,23 @@
static really_inline
u32 clz32_impl(u32 x) {
#if defined(_WIN32)
unsigned long r;
_BitScanReverse(&r, x);
return 31 - r;
#else
return clz32_impl_c(x);
#endif
}
static really_inline
u32 clz64_impl(u64a x) {
#if defined(_WIN64)
unsigned long r;
_BitScanReverse64(&r, x);
return 63 - r;
#elif defined(_WIN32)
unsigned long x1 = (u32)x;
unsigned long x2 = (u32)(x >> 32);
unsigned long r;
if (x2) {
_BitScanReverse(&r, x2);
return (u32)(31 - r);
}
_BitScanReverse(&r, (u32)x1);
return (u32)(63 - r);
#else
return clz64_impl_c(x);
#endif
}
// CTZ (count trailing zero) implementations.
static really_inline
u32 ctz32_impl(u32 x) {
#if defined(_WIN32)
unsigned long r;
_BitScanForward(&r, x);
return r;
#else
return ctz32_impl_c(x);
#endif
}
static really_inline
u32 ctz64_impl(u64a x) {
#if defined(_WIN64)
unsigned long r;
_BitScanForward64(&r, x);
return r;
#elif defined(_WIN32)
unsigned long r;
if (_BitScanForward(&r, (u32)x)) {
return (u32)r;
}
_BitScanForward(&r, x >> 32);
return (u32)(r + 32);
#else
return ctz64_impl_c(x);
#endif
}
static really_inline

View File

@@ -33,7 +33,7 @@
#include "hs_internal.h"
#include "util/arch.h"
#if !defined(_WIN32) && !defined(CPUID_H_)
#if !defined(CPUID_H_)
#include <cpuid.h>
#endif

View File

@@ -32,7 +32,7 @@
#include "ue2common.h"
#include "util/arch/common/cpuid_flags.h"
#if !defined(_WIN32) && !defined(CPUID_H_)
#if !defined(CPUID_H_)
#include <cpuid.h>
/* system header doesn't have a header guard */
#define CPUID_H_
@@ -46,16 +46,7 @@ extern "C"
static inline
void cpuid(unsigned int op, unsigned int leaf, unsigned int *eax,
unsigned int *ebx, unsigned int *ecx, unsigned int *edx) {
#ifndef _WIN32
__cpuid_count(op, leaf, *eax, *ebx, *ecx, *edx);
#else
int a[4];
__cpuidex(a, op, leaf);
*eax = a[0];
*ebx = a[1];
*ecx = a[2];
*edx = a[3];
#endif
}
// ECX
@@ -95,9 +86,6 @@ void cpuid(unsigned int op, unsigned int leaf, unsigned int *eax,
static inline
u64a xgetbv(u32 op) {
#if defined(_WIN32) || defined(__INTEL_COMPILER)
return _xgetbv(op);
#else
u32 a, d;
__asm__ volatile (
"xgetbv\n"
@@ -105,14 +93,10 @@ u64a xgetbv(u32 op) {
"=d"(d)
: "c"(op));
return ((u64a)d << 32) + a;
#endif
}
static inline
int check_avx2(void) {
#if defined(__INTEL_COMPILER)
return _may_i_use_cpu_feature(_FEATURE_AVX2);
#else
unsigned int eax, ebx, ecx, edx;
cpuid(1, 0, &eax, &ebx, &ecx, &edx);
@@ -141,7 +125,6 @@ int check_avx2(void) {
}
return 0;
#endif
}
static inline
@@ -149,9 +132,6 @@ int check_avx512(void) {
/*
* For our purposes, having avx512 really means "can we use AVX512BW?"
*/
#if defined(__INTEL_COMPILER)
return _may_i_use_cpu_feature(_FEATURE_AVX512BW | _FEATURE_AVX512VL);
#else
unsigned int eax, ebx, ecx, edx;
cpuid(1, 0, &eax, &ebx, &ecx, &edx);
@@ -184,14 +164,10 @@ int check_avx512(void) {
}
return 0;
#endif
}
static inline
int check_avx512vbmi(void) {
#if defined(__INTEL_COMPILER)
return _may_i_use_cpu_feature(_FEATURE_AVX512VBMI);
#else
unsigned int eax, ebx, ecx, edx;
cpuid(1, 0, &eax, &ebx, &ecx, &edx);
@@ -229,7 +205,6 @@ int check_avx512vbmi(void) {
}
return 0;
#endif
}
static inline

View File

@@ -38,12 +38,12 @@
#define HAVE_SIMD_128_BITS
#endif
#if defined(__SSE4_1__) || (defined(_WIN32) && defined(__AVX__))
#if defined(__SSE4_1__) || defined(__AVX__)
#define HAVE_SSE41
#define HAVE_SIMD_128_BITS
#endif
#if defined(__SSE4_2__) || (defined(_WIN32) && defined(__AVX__))
#if defined(__SSE4_2__) || defined(__AVX__)
#define HAVE_SSE42
#define HAVE_SIMD_128_BITS
#endif
@@ -78,30 +78,16 @@
#define VECTORSIZE 16
#endif
/*
* ICC and MSVC don't break out POPCNT or BMI/2 as separate pre-def macros
*/
#if defined(__POPCNT__) || \
(defined(__INTEL_COMPILER) && defined(__SSE4_2__)) || \
(defined(_WIN32) && defined(__AVX__))
#if defined(__POPCNT__)
#define HAVE_POPCOUNT_INSTR
#endif
#if defined(__BMI__) || (defined(_WIN32) && defined(__AVX2__)) || \
(defined(__INTEL_COMPILER) && defined(__AVX2__))
#if defined(__BMI__)
#define HAVE_BMI
#endif
#if defined(__BMI2__) || (defined(_WIN32) && defined(__AVX2__)) || \
(defined(__INTEL_COMPILER) && defined(__AVX2__))
#if defined(__BMI2__)
#define HAVE_BMI2
#endif
/*
* MSVC uses a different form of inline asm
*/
#if defined(_WIN32) && defined(_MSC_VER)
#define NO_ASM
#endif
#endif // UTIL_ARCH_X86_H_

View File

@@ -56,11 +56,7 @@ void describeChar(ostream &os, char c, enum cc_output_t out_type) {
const string backslash((out_type == CC_OUT_DOT ? 2 : 1), '\\');
#ifdef _WIN32
if (c >= 0x21 && c < 0x7F && c != '\\') {
#else
if (isgraph(c) && c != '\\') {
#endif
if (escaped.find(c) != string::npos) {
os << backslash << c;
} else if (out_type == CC_OUT_DOT

View File

@@ -1197,11 +1197,7 @@ u32 mmbit_sparse_iter_begin(const u8 *bits, u32 total_bits, u32 *idx,
assert(ISALIGNED_N(it_root, alignof(struct mmbit_sparse_iter)));
// Our state _may_ be on the stack
#ifndef _WIN32
assert(ISALIGNED_N(s, alignof(struct mmbit_sparse_state)));
#else
assert(ISALIGNED_N(s, 4));
#endif
MDEBUG_PRINTF("%p total_bits %u\n", bits, total_bits);
// iterator should have _something_ at the root level
@@ -1309,11 +1305,7 @@ u32 mmbit_sparse_iter_next(const u8 *bits, u32 total_bits, u32 last_key,
assert(ISALIGNED_N(it_root, alignof(struct mmbit_sparse_iter)));
// Our state _may_ be on the stack
#ifndef _WIN32
assert(ISALIGNED_N(s, alignof(struct mmbit_sparse_state)));
#else
assert(ISALIGNED_N(s, 4));
#endif
MDEBUG_PRINTF("%p total_bits %u\n", bits, total_bits);
MDEBUG_PRINTF("NEXT (total_bits=%u, last_key=%u)\n", total_bits, last_key);
@@ -1466,11 +1458,7 @@ void mmbit_sparse_iter_unset(u8 *bits, u32 total_bits,
assert(ISALIGNED_N(it, alignof(struct mmbit_sparse_iter)));
// Our state _may_ be on the stack
#ifndef _WIN32
assert(ISALIGNED_N(s, alignof(struct mmbit_sparse_state)));
#else
assert(ISALIGNED_N(s, 4));
#endif
MDEBUG_PRINTF("%p total_bits %u\n", bits, total_bits);

View File

@@ -38,36 +38,38 @@
static really_inline
u32 popcount32(u32 x) {
#if defined(HAVE_POPCOUNT_INSTR)
// Single-instruction builtin.
return _mm_popcnt_u32(x);
#else
// Fast branch-free version from bit-twiddling hacks as older Intel
// processors do not have a POPCNT instruction.
x -= (x >> 1) & 0x55555555;
x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
return (((x + (x >> 4)) & 0xf0f0f0f) * 0x1010101) >> 24;
#endif
return __builtin_popcount(x);
// #if defined(HAVE_POPCOUNT_INSTR)
// // Single-instruction builtin.
// return _mm_popcnt_u32(x);
// #else
// // Fast branch-free version from bit-twiddling hacks as older Intel
// // processors do not have a POPCNT instruction.
// x -= (x >> 1) & 0x55555555;
// x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
// return (((x + (x >> 4)) & 0xf0f0f0f) * 0x1010101) >> 24;
// #endif
}
static really_inline
u32 popcount64(u64a x) {
#if defined(ARCH_X86_64)
# if defined(HAVE_POPCOUNT_INSTR)
// Single-instruction builtin.
return (u32)_mm_popcnt_u64(x);
# else
// Fast branch-free version from bit-twiddling hacks as older Intel
// processors do not have a POPCNT instruction.
x -= (x >> 1) & 0x5555555555555555;
x = (x & 0x3333333333333333) + ((x >> 2) & 0x3333333333333333);
x = (x + (x >> 4)) & 0x0f0f0f0f0f0f0f0f;
return (x * 0x0101010101010101) >> 56;
# endif
#else
// Synthesise from two 32-bit cases.
return popcount32(x >> 32) + popcount32(x);
#endif
return __builtin_popcountll(x);
// #if defined(ARCH_X86_64)
// # if defined(HAVE_POPCOUNT_INSTR)
// // Single-instruction builtin.
// return (u32)_mm_popcnt_u64(x);
// # else
// // Fast branch-free version from bit-twiddling hacks as older Intel
// // processors do not have a POPCNT instruction.
// x -= (x >> 1) & 0x5555555555555555;
// x = (x & 0x3333333333333333) + ((x >> 2) & 0x3333333333333333);
// x = (x + (x >> 4)) & 0x0f0f0f0f0f0f0f0f;
// return (x * 0x0101010101010101) >> 56;
// # endif
// #else
// // Synthesise from two 32-bit cases.
// return popcount32(x >> 32) + popcount32(x);
// #endif
}
#endif /* UTIL_POPCOUNT_H_ */

View File

@@ -35,12 +35,7 @@
#include "ue2common.h"
#if !defined(_WIN32)
#define PACKED__MAY_ALIAS __attribute__((packed, may_alias))
#else
#define PACKED__MAY_ALIAS
#pragma pack(push, 1) // pack everything until told otherwise
#endif
/// Perform an unaligned 16-bit load
static really_inline
@@ -89,9 +84,6 @@ void unaligned_store_u64a(void *ptr, u64a val) {
struct unaligned *uptr = (struct unaligned *)ptr;
uptr->u = val;
}
#if defined(_WIN32)
#pragma pack(pop)
#endif // win32
#undef PACKED__MAY_ALIAS