mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Improved test for AVX2 support
Test for xsave and XMM/YMM registers. Use ICC dynamic feature detection where available.
This commit is contained in:
parent
6e8f394d8d
commit
d77ee0839d
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -40,6 +40,8 @@
|
|||||||
#define SSSE3 (1 << 9)
|
#define SSSE3 (1 << 9)
|
||||||
#define SSE4_1 (1 << 19)
|
#define SSE4_1 (1 << 19)
|
||||||
#define SSE4_2 (1 << 20)
|
#define SSE4_2 (1 << 20)
|
||||||
|
#define XSAVE (1 << 27)
|
||||||
|
#define AVX (1 << 28)
|
||||||
|
|
||||||
// EDX
|
// EDX
|
||||||
#define SSE (1 << 25)
|
#define SSE (1 << 25)
|
||||||
@ -51,6 +53,10 @@
|
|||||||
#define AVX2 (1 << 5)
|
#define AVX2 (1 << 5)
|
||||||
#define BMI2 (1 << 8)
|
#define BMI2 (1 << 8)
|
||||||
|
|
||||||
|
// Extended Control Register 0 (XCR0) values
|
||||||
|
#define XCR0_SSE (1 << 1)
|
||||||
|
#define XCR0_AVX (1 << 2)
|
||||||
|
|
||||||
static __inline
|
static __inline
|
||||||
void cpuid(unsigned int op, unsigned int leaf, unsigned int *eax,
|
void cpuid(unsigned int op, unsigned int leaf, unsigned int *eax,
|
||||||
unsigned int *ebx, unsigned int *ecx, unsigned int *edx) {
|
unsigned int *ebx, unsigned int *ecx, unsigned int *edx) {
|
||||||
@ -66,19 +72,61 @@ void cpuid(unsigned int op, unsigned int leaf, unsigned int *eax,
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
u64a cpuid_flags(void) {
|
static inline
|
||||||
unsigned int eax, ebx, ecx, edx;
|
u64a xgetbv(u32 op) {
|
||||||
u64a cap = 0;
|
#if defined(_WIN32) || defined(__INTEL_COMPILER)
|
||||||
|
return _xgetbv(op);
|
||||||
|
#else
|
||||||
|
u32 a, d;
|
||||||
|
__asm__ volatile (
|
||||||
|
"xgetbv\n"
|
||||||
|
: "=a"(a),
|
||||||
|
"=d"(d)
|
||||||
|
: "c"(op));
|
||||||
|
return ((u64a)d << 32) + a;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
int check_avx2(void) {
|
||||||
|
#if defined(__INTEL_COMPILER)
|
||||||
|
return _may_i_use_cpu_feature(_FEATURE_AVX2);
|
||||||
|
#else
|
||||||
|
unsigned int eax, ebx, ecx, edx;
|
||||||
|
|
||||||
// version info
|
|
||||||
cpuid(1, 0, &eax, &ebx, &ecx, &edx);
|
cpuid(1, 0, &eax, &ebx, &ecx, &edx);
|
||||||
|
|
||||||
/* ECX and EDX contain capability flags */
|
/* check AVX is supported and XGETBV is enabled by OS */
|
||||||
|
if ((ecx & (AVX | XSAVE)) != (AVX | XSAVE)) {
|
||||||
|
DEBUG_PRINTF("AVX and XSAVE not supported\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* check that SSE and AVX registers are enabled by OS */
|
||||||
|
u64a xcr0 = xgetbv(0);
|
||||||
|
if ((xcr0 & (XCR0_SSE | XCR0_AVX)) != (XCR0_SSE | XCR0_AVX)) {
|
||||||
|
DEBUG_PRINTF("SSE and AVX registers not enabled\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ECX and EDX contain capability flags */
|
||||||
ecx = 0;
|
ecx = 0;
|
||||||
cpuid(7, 0, &eax, &ebx, &ecx, &edx);
|
cpuid(7, 0, &eax, &ebx, &ecx, &edx);
|
||||||
|
|
||||||
if (ebx & AVX2) {
|
if (ebx & AVX2) {
|
||||||
|
DEBUG_PRINTF("AVX2 enabled\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
u64a cpuid_flags(void) {
|
||||||
|
u64a cap = 0;
|
||||||
|
|
||||||
|
if (check_avx2()) {
|
||||||
|
DEBUG_PRINTF("AVX2 enabled\n");
|
||||||
cap |= HS_CPU_FEATURES_AVX2;
|
cap |= HS_CPU_FEATURES_AVX2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user