/* * Copyright (c) 2015, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Intel Corporation nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /** \file * \brief SIMD primitives specifically for Intel SSSE3 platforms. */ #ifndef SIMD_UTILS_SSSE3_H_E27DF795C9AA02 #define SIMD_UTILS_SSSE3_H_E27DF795C9AA02 #if !defined(_WIN32) && !defined(__SSSE3__) #error SSSE3 instructions must be enabled #endif #include "simd_utils.h" #include "ue2common.h" // we may already have x86intrin.h #if !defined(USE_X86INTRIN_H) #if defined(HAVE_C_INTRIN_H) #include #elif defined(HAVE_TMMINTRIN_H) #include // SSSE3 intrinsics #else #define I_HAVE_BROKEN_INTRINSICS #endif #endif #if !defined(I_HAVE_BROKEN_INTRINSICS) // newish compilers get this right #define palignr(r, l, offset) _mm_alignr_epi8(r, l, offset) #else // must be inline, even in weak-sauce debug builds. // oldish compilers either don't have the intrinsic, or force one arg through memory static really_really_inline m128 palignr(m128 r, m128 l, const int offset) { __asm__ ("palignr %2,%1,%0" : "+x"(r) : "x"(l), "i"(offset)); return r; } #endif static really_inline m128 pshufb(m128 a, m128 b) { m128 result; #if !defined(I_HAVE_BROKEN_INTRINSICS) result = _mm_shuffle_epi8(a, b); #else __asm__("pshufb\t%1,%0" : "=x"(result) : "xm"(b), "0"(a)); #endif return result; } #if defined(__AVX2__) static really_inline m256 vpshufb(m256 a, m256 b) { return _mm256_shuffle_epi8(a, b); } #if defined(USE_GCC_COMPOUND_STATEMENTS) #define vpalignr(r, l, offset) ({ \ m256 res = _mm256_alignr_epi8(r, l, offset); \ res; \ }) #else static really_inline m256 vpalignr(m256 r, m256 l, const int offset) { return _mm256_alignr_epi8(r, l, offset); } #endif #else // not __AVX2__ static really_inline m256 vpshufb(m256 a, m256 b) { m256 rv; rv.lo = pshufb(a.lo, b.lo); rv.hi = pshufb(a.hi, b.hi); return rv; } /* palignr requires the offset to be an immediate, which we can do with a * compound macro, otherwise we have to enumerate the offsets and hope the * compiler can throw the rest away. */ #if defined(USE_GCC_COMPOUND_STATEMENTS) #define vpalignr(r, l, offset) ({ \ m256 res; \ res.lo = palignr(r.lo, l.lo, offset); \ res.hi = palignr(r.hi, l.hi, offset); \ res; \ }) #else #define VPALIGN_CASE(N) case N: \ res.lo = palignr(r.lo, l.lo, N); \ res.hi = palignr(r.hi, l.hi, N); \ return res; static really_inline m256 vpalignr(m256 r, m256 l, const int offset) { m256 res; switch (offset) { VPALIGN_CASE(0) VPALIGN_CASE(1) VPALIGN_CASE(2) VPALIGN_CASE(3) VPALIGN_CASE(4) VPALIGN_CASE(5) VPALIGN_CASE(6) VPALIGN_CASE(7) VPALIGN_CASE(8) VPALIGN_CASE(9) VPALIGN_CASE(10) VPALIGN_CASE(11) VPALIGN_CASE(12) VPALIGN_CASE(13) VPALIGN_CASE(14) VPALIGN_CASE(15) default: assert(0); return zeroes256(); } } #undef VPALIGN_CASE #endif #endif // __AVX2__ #endif /* SIMD_UTILS_SSSE3_H_E27DF795C9AA02 */