fix names, use own intrinsic instead of explicit _mm* ones

This commit is contained in:
Konstantinos Margaritis
2020-09-23 11:51:21 +03:00
committed by Konstantinos Margaritis
parent 4fd0723532
commit 8ef26f19fc
15 changed files with 137 additions and 137 deletions

View File

@@ -159,7 +159,7 @@ const u8 *shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
}
const m128 zeroes = zeroes128();
const m128 low4bits = _mm_set1_epi8(0xf);
const m128 low4bits = set1_16x8(0xf);
const u8 *rv;
size_t min = (size_t)buf % 16;
@@ -246,7 +246,7 @@ const u8 *rshuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
}
const m128 zeroes = zeroes128();
const m128 low4bits = _mm_set1_epi8(0xf);
const m128 low4bits = set1_16x8(0xf);
const u8 *rv;
assert(buf_end - buf >= 16);
@@ -320,7 +320,7 @@ const u8 *shuftiDoubleExec(m128 mask1_lo, m128 mask1_hi,
m128 mask2_lo, m128 mask2_hi,
const u8 *buf, const u8 *buf_end) {
const m128 ones = ones128();
const m128 low4bits = _mm_set1_epi8(0xf);
const m128 low4bits = set1_16x8(0xf);
const u8 *rv;
size_t min = (size_t)buf % 16;
@@ -455,15 +455,15 @@ const u8 *shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
buf, buf_end);
}
const m256 low4bits = set32x8(0xf);
const m256 low4bits = set1_32x8(0xf);
if (buf_end - buf <= 32) {
return shuftiFwdShort(mask_lo, mask_hi, buf, buf_end, low4bits);
}
const m256 zeroes = zeroes256();
const m256 wide_mask_lo = set2x128(mask_lo);
const m256 wide_mask_hi = set2x128(mask_hi);
const m256 wide_mask_lo = set1_2x128(mask_lo);
const m256 wide_mask_hi = set1_2x128(mask_hi);
const u8 *rv;
size_t min = (size_t)buf % 32;
@@ -579,15 +579,15 @@ const u8 *rshuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
buf, buf_end);
}
const m256 low4bits = set32x8(0xf);
const m256 low4bits = set1_32x8(0xf);
if (buf_end - buf <= 32) {
return shuftiRevShort(mask_lo, mask_hi, buf, buf_end, low4bits);
}
const m256 zeroes = zeroes256();
const m256 wide_mask_lo = set2x128(mask_lo);
const m256 wide_mask_hi = set2x128(mask_hi);
const m256 wide_mask_lo = set1_2x128(mask_lo);
const m256 wide_mask_hi = set1_2x128(mask_hi);
const u8 *rv;
assert(buf_end - buf >= 32);
@@ -676,7 +676,7 @@ static really_inline
const u8 *shuftiDoubleShort(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo,
m128 mask2_hi, const u8 *buf, const u8 *buf_end) {
DEBUG_PRINTF("buf %p len %zu\n", buf, buf_end - buf);
const m256 low4bits = set32x8(0xf);
const m256 low4bits = set1_32x8(0xf);
// run shufti over two overlapping 16-byte unaligned reads
const m256 mask1 = combine2x128(mask1_hi, mask1_lo);
const m256 mask2 = combine2x128(mask2_hi, mask2_lo);
@@ -708,11 +708,11 @@ const u8 *shuftiDoubleExec(m128 mask1_lo, m128 mask1_hi,
}
const m256 ones = ones256();
const m256 low4bits = set32x8(0xf);
const m256 wide_mask1_lo = set2x128(mask1_lo);
const m256 wide_mask1_hi = set2x128(mask1_hi);
const m256 wide_mask2_lo = set2x128(mask2_lo);
const m256 wide_mask2_hi = set2x128(mask2_hi);
const m256 low4bits = set1_32x8(0xf);
const m256 wide_mask1_lo = set1_2x128(mask1_lo);
const m256 wide_mask1_hi = set1_2x128(mask1_hi);
const m256 wide_mask2_lo = set1_2x128(mask2_lo);
const m256 wide_mask2_hi = set1_2x128(mask2_hi);
const u8 *rv;
size_t min = (size_t)buf % 32;