mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
247 lines
6.8 KiB
C
247 lines
6.8 KiB
C
/*
|
|
* Copyright (c) 2015-2016, Intel Corporation
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include "cpuid_flags.h"
|
|
#include "ue2common.h"
|
|
#include "hs_compile.h" // for HS_MODE_ flags
|
|
#include "hs_internal.h"
|
|
|
|
#ifndef _WIN32
|
|
#include <cpuid.h>
|
|
#endif
|
|
|
|
// ECX
|
|
#define SSE3 (1 << 0)
|
|
#define SSSE3 (1 << 9)
|
|
#define SSE4_1 (1 << 19)
|
|
#define SSE4_2 (1 << 20)
|
|
#define POPCNT (1 << 23)
|
|
#define XSAVE (1 << 27)
|
|
#define AVX (1 << 28)
|
|
|
|
// EDX
|
|
#define FXSAVE (1 << 24)
|
|
#define SSE (1 << 25)
|
|
#define SSE2 (1 << 26)
|
|
#define HTT (1 << 28)
|
|
|
|
// Structured Extended Feature Flags Enumeration Leaf ECX values
|
|
#define BMI (1 << 3)
|
|
#define AVX2 (1 << 5)
|
|
#define BMI2 (1 << 8)
|
|
|
|
// Extended Control Register 0 (XCR0) values
|
|
#define XCR0_SSE (1 << 1)
|
|
#define XCR0_AVX (1 << 2)
|
|
|
|
static __inline
|
|
void cpuid(unsigned int op, unsigned int leaf, unsigned int *eax,
|
|
unsigned int *ebx, unsigned int *ecx, unsigned int *edx) {
|
|
#ifndef _WIN32
|
|
__cpuid_count(op, leaf, *eax, *ebx, *ecx, *edx);
|
|
#else
|
|
unsigned int a[4];
|
|
__cpuidex(a, op, leaf);
|
|
*eax = a[0];
|
|
*ebx = a[1];
|
|
*ecx = a[2];
|
|
*edx = a[3];
|
|
#endif
|
|
}
|
|
|
|
static inline
|
|
u64a xgetbv(u32 op) {
|
|
#if defined(_WIN32) || defined(__INTEL_COMPILER)
|
|
return _xgetbv(op);
|
|
#else
|
|
u32 a, d;
|
|
__asm__ volatile (
|
|
"xgetbv\n"
|
|
: "=a"(a),
|
|
"=d"(d)
|
|
: "c"(op));
|
|
return ((u64a)d << 32) + a;
|
|
#endif
|
|
}
|
|
|
|
int check_avx2(void) {
|
|
#if defined(__INTEL_COMPILER)
|
|
return _may_i_use_cpu_feature(_FEATURE_AVX2);
|
|
#else
|
|
unsigned int eax, ebx, ecx, edx;
|
|
|
|
cpuid(1, 0, &eax, &ebx, &ecx, &edx);
|
|
|
|
/* check AVX is supported and XGETBV is enabled by OS */
|
|
if ((ecx & (AVX | XSAVE)) != (AVX | XSAVE)) {
|
|
DEBUG_PRINTF("AVX and XSAVE not supported\n");
|
|
return 0;
|
|
}
|
|
|
|
/* check that SSE and AVX registers are enabled by OS */
|
|
u64a xcr0 = xgetbv(0);
|
|
if ((xcr0 & (XCR0_SSE | XCR0_AVX)) != (XCR0_SSE | XCR0_AVX)) {
|
|
DEBUG_PRINTF("SSE and AVX registers not enabled\n");
|
|
return 0;
|
|
}
|
|
|
|
/* ECX and EDX contain capability flags */
|
|
ecx = 0;
|
|
cpuid(7, 0, &eax, &ebx, &ecx, &edx);
|
|
|
|
if (ebx & AVX2) {
|
|
DEBUG_PRINTF("AVX2 enabled\n");
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
#endif
|
|
}
|
|
|
|
u64a cpuid_flags(void) {
|
|
u64a cap = 0;
|
|
|
|
if (check_avx2()) {
|
|
DEBUG_PRINTF("AVX2 enabled\n");
|
|
cap |= HS_CPU_FEATURES_AVX2;
|
|
}
|
|
|
|
#if !defined(__AVX2__)
|
|
cap &= ~HS_CPU_FEATURES_AVX2;
|
|
#endif
|
|
|
|
return cap;
|
|
}
|
|
|
|
int check_ssse3(void) {
|
|
unsigned int eax, ebx, ecx, edx;
|
|
cpuid(1, 0, &eax, &ebx, &ecx, &edx);
|
|
return !!(ecx & SSSE3);
|
|
}
|
|
|
|
int check_sse42(void) {
|
|
unsigned int eax, ebx, ecx, edx;
|
|
cpuid(1, 0, &eax, &ebx, &ecx, &edx);
|
|
return !!(ecx & SSE4_2);
|
|
}
|
|
|
|
int check_popcnt(void) {
|
|
unsigned int eax, ebx, ecx, edx;
|
|
cpuid(1, 0, &eax, &ebx, &ecx, &edx);
|
|
return !!(ecx & POPCNT);
|
|
}
|
|
|
|
struct family_id {
|
|
u32 full_family;
|
|
u32 full_model;
|
|
u32 tune;
|
|
};
|
|
|
|
/* from table 35-1 of the Intel 64 and IA32 Arch. Software Developer's Manual
|
|
* and "Intel Architecture and Processor Identification With CPUID Model and
|
|
* Family Numbers" */
|
|
static const struct family_id known_microarch[] = {
|
|
{ 0x6, 0x37, HS_TUNE_FAMILY_SLM }, /* baytrail */
|
|
{ 0x6, 0x4D, HS_TUNE_FAMILY_SLM }, /* avoton, rangley */
|
|
|
|
{ 0x6, 0x3C, HS_TUNE_FAMILY_HSW }, /* haswell */
|
|
{ 0x6, 0x45, HS_TUNE_FAMILY_HSW }, /* haswell */
|
|
{ 0x6, 0x46, HS_TUNE_FAMILY_HSW }, /* haswell */
|
|
{ 0x6, 0x3F, HS_TUNE_FAMILY_HSW }, /* haswell */
|
|
|
|
{ 0x6, 0x3E, HS_TUNE_FAMILY_IVB }, /* ivybridge */
|
|
{ 0x6, 0x3A, HS_TUNE_FAMILY_IVB }, /* ivybridge */
|
|
|
|
{ 0x6, 0x2A, HS_TUNE_FAMILY_SNB }, /* sandybridge */
|
|
{ 0x6, 0x2D, HS_TUNE_FAMILY_SNB }, /* sandybridge */
|
|
|
|
{ 0x6, 0x3D, HS_TUNE_FAMILY_BDW }, /* broadwell Core-M */
|
|
{ 0x6, 0x4F, HS_TUNE_FAMILY_BDW }, /* broadwell xeon */
|
|
{ 0x6, 0x56, HS_TUNE_FAMILY_BDW }, /* broadwell xeon-d */
|
|
|
|
// { 0x6, 0x25, HS_TUNE_FAMILY_GENERIC }, /* westmere */
|
|
// { 0x6, 0x2C, HS_TUNE_FAMILY_GENERIC }, /* westmere */
|
|
// { 0x6, 0x2F, HS_TUNE_FAMILY_GENERIC }, /* westmere */
|
|
|
|
// { 0x6, 0x1E, HS_TUNE_FAMILY_GENERIC }, /* nehalem */
|
|
// { 0x6, 0x1A, HS_TUNE_FAMILY_GENERIC }, /* nehalem */
|
|
// { 0x6, 0x2E, HS_TUNE_FAMILY_GENERIC }, /* nehalem */
|
|
|
|
// { 0x6, 0x17, HS_TUNE_FAMILY_GENERIC }, /* penryn */
|
|
// { 0x6, 0x1D, HS_TUNE_FAMILY_GENERIC }, /* penryn */
|
|
|
|
};
|
|
|
|
#ifdef DUMP_SUPPORT
|
|
static UNUSED
|
|
const char *dumpTune(u32 tune) {
|
|
#define T_CASE(x) case x: return #x;
|
|
switch (tune) {
|
|
T_CASE(HS_TUNE_FAMILY_SLM);
|
|
T_CASE(HS_TUNE_FAMILY_HSW);
|
|
T_CASE(HS_TUNE_FAMILY_SNB);
|
|
T_CASE(HS_TUNE_FAMILY_IVB);
|
|
T_CASE(HS_TUNE_FAMILY_BDW);
|
|
}
|
|
#undef T_CASE
|
|
return "unknown";
|
|
}
|
|
#endif
|
|
|
|
u32 cpuid_tune(void) {
|
|
unsigned int eax, ebx, ecx, edx;
|
|
|
|
cpuid(1, 0, &eax, &ebx, &ecx, &edx);
|
|
|
|
u32 family = (eax >> 8) & 0xf;
|
|
u32 model = 0;
|
|
|
|
if (family == 0x6 || family == 0xf) {
|
|
model = ((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0);
|
|
} else {
|
|
model = (eax >> 4) & 0xf;
|
|
}
|
|
|
|
DEBUG_PRINTF("family = %xh model = %xh\n", family, model);
|
|
for (u32 i = 0; i < ARRAY_LENGTH(known_microarch); i++) {
|
|
if (family != known_microarch[i].full_family) {
|
|
continue;
|
|
}
|
|
|
|
if (model != known_microarch[i].full_model) {
|
|
continue;
|
|
}
|
|
|
|
u32 tune = known_microarch[i].tune;
|
|
DEBUG_PRINTF("found tune flag %s\n", dumpTune(tune) );
|
|
return tune;
|
|
}
|
|
|
|
return HS_TUNE_FAMILY_GENERIC;
|
|
}
|