mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Move cpuid checks inline.
During fat runtime function resolution there was a chance that the PLT might not be initialised in time for us to call the cpuid check functions. Moving them inline means there is no PLT to worry about. Fixes #78
This commit is contained in:
parent
3da5fc7cf3
commit
50ea83cb26
@ -30,7 +30,7 @@
|
||||
#include "hs_common.h"
|
||||
#include "hs_runtime.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/cpuid_flags.h"
|
||||
#include "util/cpuid_inline.h"
|
||||
#include "util/join.h"
|
||||
|
||||
#if defined(DISABLE_AVX512_DISPATCH)
|
||||
|
@ -45,6 +45,7 @@
|
||||
#include "parser/unsupported.h"
|
||||
#include "util/compile_error.h"
|
||||
#include "util/cpuid_flags.h"
|
||||
#include "util/cpuid_inline.h"
|
||||
#include "util/depth.h"
|
||||
#include "util/popcount.h"
|
||||
#include "util/target_info.h"
|
||||
|
@ -28,6 +28,7 @@
|
||||
|
||||
#include "hs_common.h"
|
||||
#include "util/cpuid_flags.h"
|
||||
#include "util/cpuid_inline.h"
|
||||
|
||||
HS_PUBLIC_API
|
||||
hs_error_t HS_CDECL hs_valid_platform(void) {
|
||||
|
@ -27,154 +27,16 @@
|
||||
*/
|
||||
|
||||
#include "cpuid_flags.h"
|
||||
#include "cpuid_inline.h"
|
||||
#include "ue2common.h"
|
||||
#include "hs_compile.h" // for HS_MODE_ flags
|
||||
#include "hs_internal.h"
|
||||
#include "util/arch.h"
|
||||
|
||||
#ifndef _WIN32
|
||||
#if !defined(_WIN32) && !defined(CPUID_H_)
|
||||
#include <cpuid.h>
|
||||
#endif
|
||||
|
||||
// ECX
|
||||
#define SSE3 (1 << 0)
|
||||
#define SSSE3 (1 << 9)
|
||||
#define SSE4_1 (1 << 19)
|
||||
#define SSE4_2 (1 << 20)
|
||||
#define POPCNT (1 << 23)
|
||||
#define XSAVE (1 << 27)
|
||||
#define AVX (1 << 28)
|
||||
|
||||
// EDX
|
||||
#define FXSAVE (1 << 24)
|
||||
#define SSE (1 << 25)
|
||||
#define SSE2 (1 << 26)
|
||||
#define HTT (1 << 28)
|
||||
|
||||
// Structured Extended Feature Flags Enumeration Leaf ECX values
|
||||
#define BMI (1 << 3)
|
||||
#define AVX2 (1 << 5)
|
||||
#define BMI2 (1 << 8)
|
||||
|
||||
// Structured Extended Feature Flags Enumeration Leaf EBX values
|
||||
#define AVX512F (1 << 16)
|
||||
#define AVX512BW (1 << 30)
|
||||
|
||||
// Extended Control Register 0 (XCR0) values
|
||||
#define XCR0_SSE (1 << 1)
|
||||
#define XCR0_AVX (1 << 2)
|
||||
#define XCR0_OPMASK (1 << 5) // k-regs
|
||||
#define XCR0_ZMM_Hi256 (1 << 6) // upper 256 bits of ZMM0-ZMM15
|
||||
#define XCR0_Hi16_ZMM (1 << 7) // ZMM16-ZMM31
|
||||
|
||||
#define XCR0_AVX512 (XCR0_OPMASK | XCR0_ZMM_Hi256 | XCR0_Hi16_ZMM)
|
||||
|
||||
static __inline
|
||||
void cpuid(unsigned int op, unsigned int leaf, unsigned int *eax,
|
||||
unsigned int *ebx, unsigned int *ecx, unsigned int *edx) {
|
||||
#ifndef _WIN32
|
||||
__cpuid_count(op, leaf, *eax, *ebx, *ecx, *edx);
|
||||
#else
|
||||
unsigned int a[4];
|
||||
__cpuidex(a, op, leaf);
|
||||
*eax = a[0];
|
||||
*ebx = a[1];
|
||||
*ecx = a[2];
|
||||
*edx = a[3];
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline
|
||||
u64a xgetbv(u32 op) {
|
||||
#if defined(_WIN32) || defined(__INTEL_COMPILER)
|
||||
return _xgetbv(op);
|
||||
#else
|
||||
u32 a, d;
|
||||
__asm__ volatile (
|
||||
"xgetbv\n"
|
||||
: "=a"(a),
|
||||
"=d"(d)
|
||||
: "c"(op));
|
||||
return ((u64a)d << 32) + a;
|
||||
#endif
|
||||
}
|
||||
|
||||
int check_avx2(void) {
|
||||
#if defined(__INTEL_COMPILER)
|
||||
return _may_i_use_cpu_feature(_FEATURE_AVX2);
|
||||
#else
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
|
||||
cpuid(1, 0, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
/* check AVX is supported and XGETBV is enabled by OS */
|
||||
if ((ecx & (AVX | XSAVE)) != (AVX | XSAVE)) {
|
||||
DEBUG_PRINTF("AVX and XSAVE not supported\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* check that SSE and AVX registers are enabled by OS */
|
||||
u64a xcr0 = xgetbv(0);
|
||||
if ((xcr0 & (XCR0_SSE | XCR0_AVX)) != (XCR0_SSE | XCR0_AVX)) {
|
||||
DEBUG_PRINTF("SSE and AVX registers not enabled\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ECX and EDX contain capability flags */
|
||||
ecx = 0;
|
||||
cpuid(7, 0, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
if (ebx & AVX2) {
|
||||
DEBUG_PRINTF("AVX2 enabled\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
int check_avx512(void) {
|
||||
/*
|
||||
* For our purposes, having avx512 really means "can we use AVX512BW?"
|
||||
*/
|
||||
#if defined(__INTEL_COMPILER)
|
||||
return _may_i_use_cpu_feature(_FEATURE_AVX512BW | _FEATURE_AVX512VL);
|
||||
#else
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
|
||||
cpuid(1, 0, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
/* check XSAVE is enabled by OS */
|
||||
if (!(ecx & XSAVE)) {
|
||||
DEBUG_PRINTF("AVX and XSAVE not supported\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* check that AVX 512 registers are enabled by OS */
|
||||
u64a xcr0 = xgetbv(0);
|
||||
if ((xcr0 & XCR0_AVX512) != XCR0_AVX512) {
|
||||
DEBUG_PRINTF("AVX512 registers not enabled\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ECX and EDX contain capability flags */
|
||||
ecx = 0;
|
||||
cpuid(7, 0, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
if (!(ebx & AVX512F)) {
|
||||
DEBUG_PRINTF("AVX512F (AVX512 Foundation) instructions not enabled\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (ebx & AVX512BW) {
|
||||
DEBUG_PRINTF("AVX512BW instructions enabled\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
u64a cpuid_flags(void) {
|
||||
u64a cap = 0;
|
||||
|
||||
@ -200,24 +62,6 @@ u64a cpuid_flags(void) {
|
||||
return cap;
|
||||
}
|
||||
|
||||
int check_ssse3(void) {
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
cpuid(1, 0, &eax, &ebx, &ecx, &edx);
|
||||
return !!(ecx & SSSE3);
|
||||
}
|
||||
|
||||
int check_sse42(void) {
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
cpuid(1, 0, &eax, &ebx, &ecx, &edx);
|
||||
return !!(ecx & SSE4_2);
|
||||
}
|
||||
|
||||
int check_popcnt(void) {
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
cpuid(1, 0, &eax, &ebx, &ecx, &edx);
|
||||
return !!(ecx & POPCNT);
|
||||
}
|
||||
|
||||
struct family_id {
|
||||
u32 full_family;
|
||||
u32 full_model;
|
||||
|
@ -31,6 +31,12 @@
|
||||
|
||||
#include "ue2common.h"
|
||||
|
||||
#if !defined(_WIN32) && !defined(CPUID_H_)
|
||||
#include <cpuid.h>
|
||||
/* system header doesn't have a header guard */
|
||||
#define CPUID_H_
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
@ -41,12 +47,6 @@ u64a cpuid_flags(void);
|
||||
|
||||
u32 cpuid_tune(void);
|
||||
|
||||
int check_avx512(void);
|
||||
int check_avx2(void);
|
||||
int check_ssse3(void);
|
||||
int check_sse42(void);
|
||||
int check_popcnt(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
214
src/util/cpuid_inline.h
Normal file
214
src/util/cpuid_inline.h
Normal file
@ -0,0 +1,214 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef CPUID_INLINE_H_
|
||||
#define CPUID_INLINE_H_
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "cpuid_flags.h"
|
||||
|
||||
#if !defined(_WIN32) && !defined(CPUID_H_)
|
||||
#include <cpuid.h>
|
||||
/* system header doesn't have a header guard */
|
||||
#define CPUID_H_
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
static inline
|
||||
void cpuid(unsigned int op, unsigned int leaf, unsigned int *eax,
|
||||
unsigned int *ebx, unsigned int *ecx, unsigned int *edx) {
|
||||
#ifndef _WIN32
|
||||
__cpuid_count(op, leaf, *eax, *ebx, *ecx, *edx);
|
||||
#else
|
||||
int a[4];
|
||||
__cpuidex(a, op, leaf);
|
||||
*eax = a[0];
|
||||
*ebx = a[1];
|
||||
*ecx = a[2];
|
||||
*edx = a[3];
|
||||
#endif
|
||||
}
|
||||
|
||||
// ECX
|
||||
#define CPUID_SSE3 (1 << 0)
|
||||
#define CPUID_SSSE3 (1 << 9)
|
||||
#define CPUID_SSE4_1 (1 << 19)
|
||||
#define CPUID_SSE4_2 (1 << 20)
|
||||
#define CPUID_POPCNT (1 << 23)
|
||||
#define CPUID_XSAVE (1 << 27)
|
||||
#define CPUID_AVX (1 << 28)
|
||||
|
||||
// EDX
|
||||
#define CPUID_FXSAVE (1 << 24)
|
||||
#define CPUID_SSE (1 << 25)
|
||||
#define CPUID_SSE2 (1 << 26)
|
||||
#define CPUID_HTT (1 << 28)
|
||||
|
||||
// Structured Extended Feature Flags Enumeration Leaf ECX values
|
||||
#define CPUID_BMI (1 << 3)
|
||||
#define CPUID_AVX2 (1 << 5)
|
||||
#define CPUID_BMI2 (1 << 8)
|
||||
|
||||
// Structured Extended Feature Flags Enumeration Leaf EBX values
|
||||
#define CPUID_AVX512F (1 << 16)
|
||||
#define CPUID_AVX512BW (1 << 30)
|
||||
|
||||
// Extended Control Register 0 (XCR0) values
|
||||
#define CPUID_XCR0_SSE (1 << 1)
|
||||
#define CPUID_XCR0_AVX (1 << 2)
|
||||
#define CPUID_XCR0_OPMASK (1 << 5) // k-regs
|
||||
#define CPUID_XCR0_ZMM_Hi256 (1 << 6) // upper 256 bits of ZMM0-ZMM15
|
||||
#define CPUID_XCR0_Hi16_ZMM (1 << 7) // ZMM16-ZMM31
|
||||
|
||||
#define CPUID_XCR0_AVX512 \
|
||||
(CPUID_XCR0_OPMASK | CPUID_XCR0_ZMM_Hi256 | CPUID_XCR0_Hi16_ZMM)
|
||||
|
||||
static inline
|
||||
u64a xgetbv(u32 op) {
|
||||
#if defined(_WIN32) || defined(__INTEL_COMPILER)
|
||||
return _xgetbv(op);
|
||||
#else
|
||||
u32 a, d;
|
||||
__asm__ volatile (
|
||||
"xgetbv\n"
|
||||
: "=a"(a),
|
||||
"=d"(d)
|
||||
: "c"(op));
|
||||
return ((u64a)d << 32) + a;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline
|
||||
int check_avx2(void) {
|
||||
#if defined(__INTEL_COMPILER)
|
||||
return _may_i_use_cpu_feature(_FEATURE_AVX2);
|
||||
#else
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
|
||||
cpuid(1, 0, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
/* check AVX is supported and XGETBV is enabled by OS */
|
||||
if ((ecx & (CPUID_AVX | CPUID_XSAVE)) != (CPUID_AVX | CPUID_XSAVE)) {
|
||||
DEBUG_PRINTF("AVX and XSAVE not supported\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* check that SSE and AVX registers are enabled by OS */
|
||||
u64a xcr0 = xgetbv(0);
|
||||
if ((xcr0 & (CPUID_XCR0_SSE | CPUID_XCR0_AVX)) !=
|
||||
(CPUID_XCR0_SSE | CPUID_XCR0_AVX)) {
|
||||
DEBUG_PRINTF("SSE and AVX registers not enabled\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ECX and EDX contain capability flags */
|
||||
ecx = 0;
|
||||
cpuid(7, 0, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
if (ebx & CPUID_AVX2) {
|
||||
DEBUG_PRINTF("AVX2 enabled\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline
|
||||
int check_avx512(void) {
|
||||
/*
|
||||
* For our purposes, having avx512 really means "can we use AVX512BW?"
|
||||
*/
|
||||
#if defined(__INTEL_COMPILER)
|
||||
return _may_i_use_cpu_feature(_FEATURE_AVX512BW | _FEATURE_AVX512VL);
|
||||
#else
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
|
||||
cpuid(1, 0, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
/* check XSAVE is enabled by OS */
|
||||
if (!(ecx & CPUID_XSAVE)) {
|
||||
DEBUG_PRINTF("AVX and XSAVE not supported\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* check that AVX 512 registers are enabled by OS */
|
||||
u64a xcr0 = xgetbv(0);
|
||||
if ((xcr0 & CPUID_XCR0_AVX512) != CPUID_XCR0_AVX512) {
|
||||
DEBUG_PRINTF("AVX512 registers not enabled\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ECX and EDX contain capability flags */
|
||||
ecx = 0;
|
||||
cpuid(7, 0, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
if (!(ebx & CPUID_AVX512F)) {
|
||||
DEBUG_PRINTF("AVX512F (AVX512 Foundation) instructions not enabled\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (ebx & CPUID_AVX512BW) {
|
||||
DEBUG_PRINTF("AVX512BW instructions enabled\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline
|
||||
int check_ssse3(void) {
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
cpuid(1, 0, &eax, &ebx, &ecx, &edx);
|
||||
return !!(ecx & CPUID_SSSE3);
|
||||
}
|
||||
|
||||
static inline
|
||||
int check_sse42(void) {
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
cpuid(1, 0, &eax, &ebx, &ecx, &edx);
|
||||
return !!(ecx & CPUID_SSE4_2);
|
||||
}
|
||||
|
||||
static inline
|
||||
int check_popcnt(void) {
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
cpuid(1, 0, &eax, &ebx, &ecx, &edx);
|
||||
return !!(ecx & CPUID_POPCNT);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* CPUID_INLINE_H_ */
|
Loading…
x
Reference in New Issue
Block a user