From 0722b5db5b1e1ce51a8ff9c690d7e83f63586b8d Mon Sep 17 00:00:00 2001
From: Matthew Barr <matthew.barr@intel.com>
Date: Tue, 7 Jun 2016 15:45:53 +1000
Subject: [PATCH] Remove GCC-style compound statements

These do not appear to give us benefits over inlining on recent compilers.
---
 src/util/simd_utils.h | 177 ++----------------------------------------
 1 file changed, 6 insertions(+), 171 deletions(-)

diff --git a/src/util/simd_utils.h b/src/util/simd_utils.h
index 90f3893d..90a8aba4 100644
--- a/src/util/simd_utils.h
+++ b/src/util/simd_utils.h
@@ -72,10 +72,6 @@
 #include "ue2common.h"
 #include "simd_types.h"
 
-#if defined(__GNUC__)
-#define USE_GCC_COMPOUND_STATEMENTS
-#endif
-
 // Define a common assume_aligned using an appropriate compiler built-in, if
 // it's available. Note that we need to handle C or C++ compilation.
 #ifdef __cplusplus
@@ -417,13 +413,6 @@ static really_inline m256 ones256(void) {
 static really_inline m256 and256(m256 a, m256 b) {
     return _mm256_and_si256(a, b);
 }
-#elif defined(USE_GCC_COMPOUND_STATEMENTS)
-#define and256(a, b) ({                                                 \
-    m256 rv_and256;                                                     \
-    rv_and256.lo = and128((a).lo, (b).lo);                              \
-    rv_and256.hi = and128((a).hi, (b).hi);                              \
-    rv_and256;                                                          \
-})
 #else
 static really_inline m256 and256(m256 a, m256 b) {
     m256 rv;
@@ -437,13 +426,6 @@ static really_inline m256 and256(m256 a, m256 b) {
 static really_inline m256 or256(m256 a, m256 b) {
     return _mm256_or_si256(a, b);
 }
-#elif defined(USE_GCC_COMPOUND_STATEMENTS)
-#define or256(a, b) ({                                                  \
-    m256 rv_or256;                                                      \
-    rv_or256.lo = or128((a).lo, (b).lo);                                \
-    rv_or256.hi = or128((a).hi, (b).hi);                                \
-    rv_or256;                                                           \
-})
 #else
 static really_inline m256 or256(m256 a, m256 b) {
     m256 rv;
@@ -457,13 +439,6 @@ static really_inline m256 or256(m256 a, m256 b) {
 static really_inline m256 xor256(m256 a, m256 b) {
     return _mm256_xor_si256(a, b);
 }
-#elif defined(USE_GCC_COMPOUND_STATEMENTS)
-#define xor256(a, b) ({                                                 \
-    m256 rv_xor256;                                                     \
-    rv_xor256.lo = xor128((a).lo, (b).lo);                              \
-    rv_xor256.hi = xor128((a).hi, (b).hi);                              \
-    rv_xor256;                                                          \
-})
 #else
 static really_inline m256 xor256(m256 a, m256 b) {
     m256 rv;
@@ -477,13 +452,6 @@ static really_inline m256 xor256(m256 a, m256 b) {
 static really_inline m256 not256(m256 a) {
     return _mm256_xor_si256(a, ones256());
 }
-#elif defined(USE_GCC_COMPOUND_STATEMENTS)
-#define not256(a) ({                                                    \
-    m256 rv_not256;                                                     \
-    rv_not256.lo = not128((a).lo);                                      \
-    rv_not256.hi = not128((a).hi);                                      \
-    rv_not256;                                                          \
-})
 #else
 static really_inline m256 not256(m256 a) {
     m256 rv;
@@ -497,13 +465,6 @@ static really_inline m256 not256(m256 a) {
 static really_inline m256 andnot256(m256 a, m256 b) {
     return _mm256_andnot_si256(a, b);
 }
-#elif defined(USE_GCC_COMPOUND_STATEMENTS)
-#define andnot256(a, b) ({                                              \
-    m256 rv_andnot256;                                                  \
-    rv_andnot256.lo = andnot128((a).lo, (b).lo);                        \
-    rv_andnot256.hi = andnot128((a).hi, (b).hi);                        \
-    rv_andnot256;                                                       \
-})
 #else
 static really_inline m256 andnot256(m256 a, m256 b) {
     m256 rv;
@@ -513,19 +474,11 @@ static really_inline m256 andnot256(m256 a, m256 b) {
 }
 #endif
 
-// The shift amount is an immediate, so we define these operations as macros on
-// Intel SIMD (using a GNU C extension).
+// The shift amount is an immediate
 #if defined(__AVX2__)
 #define shift256(a, b)  _mm256_slli_epi64((a), (b))
-#elif defined(__GNUC__)
-#define shift256(a, b)  ({                                              \
-    m256 rv_shift256;                                                   \
-    rv_shift256.lo = shift128(a.lo, b);                                 \
-    rv_shift256.hi = shift128(a.hi, b);                                 \
-    rv_shift256;                                                        \
-})
 #else
-static really_inline m256 shift256(m256 a, unsigned b) {
+static really_really_inline m256 shift256(m256 a, unsigned b) {
     m256 rv;
     rv.lo = shift128(a.lo, b);
     rv.hi = shift128(a.hi, b);
@@ -762,15 +715,6 @@ m256 shift256Left8Bits(m256 a) {
  **** 384-bit Primitives
  ****/
 
-#if defined(USE_GCC_COMPOUND_STATEMENTS)
-#define and384(a, b) ({                                                 \
-    m384 rv_and384;                                                     \
-    rv_and384.lo = and128((a).lo, (b).lo);                              \
-    rv_and384.mid = and128((a).mid, (b).mid);                           \
-    rv_and384.hi = and128((a).hi, (b).hi);                              \
-    rv_and384;                                                          \
-})
-#else
 static really_inline m384 and384(m384 a, m384 b) {
     m384 rv;
     rv.lo = and128(a.lo, b.lo);
@@ -778,17 +722,7 @@ static really_inline m384 and384(m384 a, m384 b) {
     rv.hi = and128(a.hi, b.hi);
     return rv;
 }
-#endif
 
-#if defined(USE_GCC_COMPOUND_STATEMENTS)
-#define or384(a, b) ({                                                  \
-    m384 rv_or384;                                                      \
-    rv_or384.lo = or128((a).lo, (b).lo);                                \
-    rv_or384.mid = or128((a).mid, (b).mid);                             \
-    rv_or384.hi = or128((a).hi, (b).hi);                                \
-    rv_or384;                                                           \
-})
-#else
 static really_inline m384 or384(m384 a, m384 b) {
     m384 rv;
     rv.lo = or128(a.lo, b.lo);
@@ -796,17 +730,7 @@ static really_inline m384 or384(m384 a, m384 b) {
     rv.hi = or128(a.hi, b.hi);
     return rv;
 }
-#endif
 
-#if defined(USE_GCC_COMPOUND_STATEMENTS)
-#define xor384(a, b) ({                                                 \
-    m384 rv_xor384;                                                     \
-    rv_xor384.lo = xor128((a).lo, (b).lo);                              \
-    rv_xor384.mid = xor128((a).mid, (b).mid);                           \
-    rv_xor384.hi = xor128((a).hi, (b).hi);                              \
-    rv_xor384;                                                          \
-})
-#else
 static really_inline m384 xor384(m384 a, m384 b) {
     m384 rv;
     rv.lo = xor128(a.lo, b.lo);
@@ -814,17 +738,6 @@ static really_inline m384 xor384(m384 a, m384 b) {
     rv.hi = xor128(a.hi, b.hi);
     return rv;
 }
-#endif
-
-#if defined(USE_GCC_COMPOUND_STATEMENTS)
-#define not384(a) ({                                                    \
-    m384 rv_not384;                                                     \
-    rv_not384.lo = not128((a).lo);                                      \
-    rv_not384.mid = not128((a).mid);                                    \
-    rv_not384.hi = not128((a).hi);                                      \
-    rv_not384;                                                          \
-})
-#else
 static really_inline m384 not384(m384 a) {
     m384 rv;
     rv.lo = not128(a.lo);
@@ -832,17 +745,6 @@ static really_inline m384 not384(m384 a) {
     rv.hi = not128(a.hi);
     return rv;
 }
-#endif
-
-#if defined(USE_GCC_COMPOUND_STATEMENTS)
-#define andnot384(a, b) ({                                              \
-    m384 rv_andnot384;                                                  \
-    rv_andnot384.lo = andnot128((a).lo, (b).lo);                        \
-    rv_andnot384.mid = andnot128((a).mid, (b).mid);                     \
-    rv_andnot384.hi = andnot128((a).hi, (b).hi);                        \
-    rv_andnot384;                                                       \
-})
-#else
 static really_inline m384 andnot384(m384 a, m384 b) {
     m384 rv;
     rv.lo = andnot128(a.lo, b.lo);
@@ -850,27 +752,15 @@ static really_inline m384 andnot384(m384 a, m384 b) {
     rv.hi = andnot128(a.hi, b.hi);
     return rv;
 }
-#endif
 
-// The shift amount is an immediate, so we define these operations as macros on
-// Intel SIMD (using a GNU C extension).
-#if defined(__GNUC__)
-#define shift384(a, b)  ({                                              \
-    m384 rv;                                                            \
-    rv.lo = shift128(a.lo, b);                                          \
-    rv.mid = shift128(a.mid, b);                                        \
-    rv.hi = shift128(a.hi, b);                                          \
-    rv;                                                                 \
-})
-#else
-static really_inline m384 shift384(m384 a, unsigned b) {
+// The shift amount is an immediate
+static really_really_inline m384 shift384(m384 a, unsigned b) {
     m384 rv;
     rv.lo = shift128(a.lo, b);
     rv.mid = shift128(a.mid, b);
     rv.hi = shift128(a.hi, b);
     return rv;
 }
-#endif
 
 static really_inline m384 zeroes384(void) {
     m384 rv = {zeroes128(), zeroes128(), zeroes128()};
@@ -1000,103 +890,48 @@ char testbit384(const m384 *ptr, unsigned int n) {
  **** 512-bit Primitives
  ****/
 
-#if defined(USE_GCC_COMPOUND_STATEMENTS)
-#define and512(a, b) ({                                                 \
-    m512 rv_and512;                                                     \
-    rv_and512.lo = and256((a).lo, (b).lo);                              \
-    rv_and512.hi = and256((a).hi, (b).hi);                              \
-    rv_and512;                                                          \
-})
-#else
 static really_inline m512 and512(m512 a, m512 b) {
     m512 rv;
     rv.lo = and256(a.lo, b.lo);
     rv.hi = and256(a.hi, b.hi);
     return rv;
 }
-#endif
 
-#if defined(USE_GCC_COMPOUND_STATEMENTS)
-#define or512(a, b) ({                                                  \
-    m512 rv_or512;                                                      \
-    rv_or512.lo = or256((a).lo, (b).lo);                                \
-    rv_or512.hi = or256((a).hi, (b).hi);                                \
-    rv_or512;                                                           \
-})
-#else
 static really_inline m512 or512(m512 a, m512 b) {
     m512 rv;
     rv.lo = or256(a.lo, b.lo);
     rv.hi = or256(a.hi, b.hi);
     return rv;
 }
-#endif
 
-#if defined(USE_GCC_COMPOUND_STATEMENTS)
-#define xor512(a, b) ({                                                 \
-    m512 rv_xor512;                                                     \
-    rv_xor512.lo = xor256((a).lo, (b).lo);                              \
-    rv_xor512.hi = xor256((a).hi, (b).hi);                              \
-    rv_xor512;                                                          \
-})
-#else
 static really_inline m512 xor512(m512 a, m512 b) {
     m512 rv;
     rv.lo = xor256(a.lo, b.lo);
     rv.hi = xor256(a.hi, b.hi);
     return rv;
 }
-#endif
 
-#if defined(USE_GCC_COMPOUND_STATEMENTS)
-#define not512(a) ({                                                    \
-    m512 rv_not512;                                                     \
-    rv_not512.lo = not256((a).lo);                                      \
-    rv_not512.hi = not256((a).hi);                                      \
-    rv_not512;                                                          \
-})
-#else
 static really_inline m512 not512(m512 a) {
     m512 rv;
     rv.lo = not256(a.lo);
     rv.hi = not256(a.hi);
     return rv;
 }
-#endif
 
-#if defined(USE_GCC_COMPOUND_STATEMENTS)
-#define andnot512(a, b) ({                                              \
-    m512 rv_andnot512;                                                  \
-    rv_andnot512.lo = andnot256((a).lo, (b).lo);                        \
-    rv_andnot512.hi = andnot256((a).hi, (b).hi);                        \
-    rv_andnot512;                                                       \
-})
-#else
 static really_inline m512 andnot512(m512 a, m512 b) {
     m512 rv;
     rv.lo = andnot256(a.lo, b.lo);
     rv.hi = andnot256(a.hi, b.hi);
     return rv;
 }
-#endif
 
-// The shift amount is an immediate, so we define these operations as macros on
-// Intel SIMD (using a GNU C extension).
-#if defined(USE_GCC_COMPOUND_STATEMENTS)
-#define shift512(a, b)  ({                                              \
-    m512 rv_shift512;                                                   \
-    rv_shift512.lo = shift256(a.lo, b);                                 \
-    rv_shift512.hi = shift256(a.hi, b);                                 \
-    rv_shift512;                                                        \
-})
-#else
-static really_inline m512 shift512(m512 a, unsigned b) {
+// The shift amount is an immediate
+static really_really_inline m512 shift512(m512 a, unsigned b) {
     m512 rv;
     rv.lo = shift256(a.lo, b);
     rv.hi = shift256(a.hi, b);
     return rv;
 }
-#endif
 
 static really_inline m512 zeroes512(void) {
     m512 rv = {zeroes256(), zeroes256()};