mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Clear upper half of AVX register before libm call
Clearing the upper half of the AVX register is required before calling SSE code to avoid AVX-to-SSE transition penalties.
This commit is contained in:
parent
1245156f44
commit
9363ae7486
@ -991,6 +991,7 @@ SET (hs_SRCS
|
|||||||
src/util/fatbit_build.h
|
src/util/fatbit_build.h
|
||||||
src/util/graph.h
|
src/util/graph.h
|
||||||
src/util/hash.h
|
src/util/hash.h
|
||||||
|
src/util/math.h
|
||||||
src/util/multibit_build.cpp
|
src/util/multibit_build.cpp
|
||||||
src/util/multibit_build.h
|
src/util/multibit_build.h
|
||||||
src/util/order_check.h
|
src/util/order_check.h
|
||||||
|
@ -43,6 +43,7 @@
|
|||||||
#include "util/alloc.h"
|
#include "util/alloc.h"
|
||||||
#include "util/compare.h"
|
#include "util/compare.h"
|
||||||
#include "util/dump_mask.h"
|
#include "util/dump_mask.h"
|
||||||
|
#include "util/math.h"
|
||||||
#include "util/target_info.h"
|
#include "util/target_info.h"
|
||||||
#include "util/ue2string.h"
|
#include "util/ue2string.h"
|
||||||
#include "util/verify_types.h"
|
#include "util/verify_types.h"
|
||||||
@ -195,7 +196,7 @@ aligned_unique_ptr<FDR> FDRCompiler::setupFDR() {
|
|||||||
static
|
static
|
||||||
double getScoreUtil(u32 len, u32 count) {
|
double getScoreUtil(u32 len, u32 count) {
|
||||||
return len == 0 ? numeric_limits<double>::max()
|
return len == 0 ? numeric_limits<double>::max()
|
||||||
: pow(count, 1.05) * pow(len, -3.0);
|
: our_pow(count, 1.05) * our_pow(len, -3.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
73
src/util/math.h
Normal file
73
src/util/math.h
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2017, Intel Corporation
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef UTIL_MATH_H_
|
||||||
|
#define UTIL_MATH_H_
|
||||||
|
|
||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
# if defined(HAVE_CXX_X86INTRIN_H)
|
||||||
|
# define USE_X86INTRIN_H
|
||||||
|
# endif
|
||||||
|
#else // C
|
||||||
|
# if defined(HAVE_C_X86INTRIN_H)
|
||||||
|
# define USE_X86INTRIN_H
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
# if defined(HAVE_CXX_INTRIN_H)
|
||||||
|
# define USE_INTRIN_H
|
||||||
|
# endif
|
||||||
|
#else // C
|
||||||
|
# if defined(HAVE_C_INTRIN_H)
|
||||||
|
# define USE_INTRIN_H
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(USE_X86INTRIN_H)
|
||||||
|
#include <x86intrin.h>
|
||||||
|
#elif defined(USE_INTRIN_H)
|
||||||
|
#include <intrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
double our_pow(double x, double y) {
|
||||||
|
#if defined(__AVX__)
|
||||||
|
/*
|
||||||
|
* Clear the upper half of AVX registers before calling into the math lib.
|
||||||
|
* On some versions of glibc this can save thousands of AVX-to-SSE
|
||||||
|
* transitions.
|
||||||
|
*/
|
||||||
|
_mm256_zeroupper();
|
||||||
|
#endif
|
||||||
|
return pow(x, y);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // UTIL_MATH_H_
|
Loading…
x
Reference in New Issue
Block a user