Clear upper half of AVX register before libm call

Clearing the upper half of the AVX register is required before calling SSE
code to avoid AVX-to-SSE transition penalties.
This commit is contained in:
Matthew Barr
2017-02-10 11:29:42 +11:00
parent 1245156f44
commit 9363ae7486
3 changed files with 76 additions and 1 deletions

73
src/util/math.h Normal file
View File

@@ -0,0 +1,73 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef UTIL_MATH_H_
#define UTIL_MATH_H_
#include <math.h>
#ifdef __cplusplus
# if defined(HAVE_CXX_X86INTRIN_H)
# define USE_X86INTRIN_H
# endif
#else // C
# if defined(HAVE_C_X86INTRIN_H)
# define USE_X86INTRIN_H
# endif
#endif
#ifdef __cplusplus
# if defined(HAVE_CXX_INTRIN_H)
# define USE_INTRIN_H
# endif
#else // C
# if defined(HAVE_C_INTRIN_H)
# define USE_INTRIN_H
# endif
#endif
#if defined(USE_X86INTRIN_H)
#include <x86intrin.h>
#elif defined(USE_INTRIN_H)
#include <intrin.h>
#endif
static really_inline
double our_pow(double x, double y) {
#if defined(__AVX__)
/*
* Clear the upper half of AVX registers before calling into the math lib.
* On some versions of glibc this can save thousands of AVX-to-SSE
* transitions.
*/
_mm256_zeroupper();
#endif
return pow(x, y);
}
#endif // UTIL_MATH_H_