mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-11-19 10:34:25 +03:00
fix names, use own intrinsic instead of explicit _mm* ones
This commit is contained in:
committed by
Konstantinos Margaritis
parent
4fd0723532
commit
8ef26f19fc
@@ -159,7 +159,7 @@ const u8 *shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
|
||||
}
|
||||
|
||||
const m128 zeroes = zeroes128();
|
||||
const m128 low4bits = _mm_set1_epi8(0xf);
|
||||
const m128 low4bits = set1_16x8(0xf);
|
||||
const u8 *rv;
|
||||
|
||||
size_t min = (size_t)buf % 16;
|
||||
@@ -246,7 +246,7 @@ const u8 *rshuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
|
||||
}
|
||||
|
||||
const m128 zeroes = zeroes128();
|
||||
const m128 low4bits = _mm_set1_epi8(0xf);
|
||||
const m128 low4bits = set1_16x8(0xf);
|
||||
const u8 *rv;
|
||||
|
||||
assert(buf_end - buf >= 16);
|
||||
@@ -320,7 +320,7 @@ const u8 *shuftiDoubleExec(m128 mask1_lo, m128 mask1_hi,
|
||||
m128 mask2_lo, m128 mask2_hi,
|
||||
const u8 *buf, const u8 *buf_end) {
|
||||
const m128 ones = ones128();
|
||||
const m128 low4bits = _mm_set1_epi8(0xf);
|
||||
const m128 low4bits = set1_16x8(0xf);
|
||||
const u8 *rv;
|
||||
|
||||
size_t min = (size_t)buf % 16;
|
||||
@@ -455,15 +455,15 @@ const u8 *shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
|
||||
buf, buf_end);
|
||||
}
|
||||
|
||||
const m256 low4bits = set32x8(0xf);
|
||||
const m256 low4bits = set1_32x8(0xf);
|
||||
|
||||
if (buf_end - buf <= 32) {
|
||||
return shuftiFwdShort(mask_lo, mask_hi, buf, buf_end, low4bits);
|
||||
}
|
||||
|
||||
const m256 zeroes = zeroes256();
|
||||
const m256 wide_mask_lo = set2x128(mask_lo);
|
||||
const m256 wide_mask_hi = set2x128(mask_hi);
|
||||
const m256 wide_mask_lo = set1_2x128(mask_lo);
|
||||
const m256 wide_mask_hi = set1_2x128(mask_hi);
|
||||
const u8 *rv;
|
||||
|
||||
size_t min = (size_t)buf % 32;
|
||||
@@ -579,15 +579,15 @@ const u8 *rshuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
|
||||
buf, buf_end);
|
||||
}
|
||||
|
||||
const m256 low4bits = set32x8(0xf);
|
||||
const m256 low4bits = set1_32x8(0xf);
|
||||
|
||||
if (buf_end - buf <= 32) {
|
||||
return shuftiRevShort(mask_lo, mask_hi, buf, buf_end, low4bits);
|
||||
}
|
||||
|
||||
const m256 zeroes = zeroes256();
|
||||
const m256 wide_mask_lo = set2x128(mask_lo);
|
||||
const m256 wide_mask_hi = set2x128(mask_hi);
|
||||
const m256 wide_mask_lo = set1_2x128(mask_lo);
|
||||
const m256 wide_mask_hi = set1_2x128(mask_hi);
|
||||
const u8 *rv;
|
||||
|
||||
assert(buf_end - buf >= 32);
|
||||
@@ -676,7 +676,7 @@ static really_inline
|
||||
const u8 *shuftiDoubleShort(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo,
|
||||
m128 mask2_hi, const u8 *buf, const u8 *buf_end) {
|
||||
DEBUG_PRINTF("buf %p len %zu\n", buf, buf_end - buf);
|
||||
const m256 low4bits = set32x8(0xf);
|
||||
const m256 low4bits = set1_32x8(0xf);
|
||||
// run shufti over two overlapping 16-byte unaligned reads
|
||||
const m256 mask1 = combine2x128(mask1_hi, mask1_lo);
|
||||
const m256 mask2 = combine2x128(mask2_hi, mask2_lo);
|
||||
@@ -708,11 +708,11 @@ const u8 *shuftiDoubleExec(m128 mask1_lo, m128 mask1_hi,
|
||||
}
|
||||
|
||||
const m256 ones = ones256();
|
||||
const m256 low4bits = set32x8(0xf);
|
||||
const m256 wide_mask1_lo = set2x128(mask1_lo);
|
||||
const m256 wide_mask1_hi = set2x128(mask1_hi);
|
||||
const m256 wide_mask2_lo = set2x128(mask2_lo);
|
||||
const m256 wide_mask2_hi = set2x128(mask2_hi);
|
||||
const m256 low4bits = set1_32x8(0xf);
|
||||
const m256 wide_mask1_lo = set1_2x128(mask1_lo);
|
||||
const m256 wide_mask1_hi = set1_2x128(mask1_hi);
|
||||
const m256 wide_mask2_lo = set1_2x128(mask2_lo);
|
||||
const m256 wide_mask2_hi = set1_2x128(mask2_hi);
|
||||
const u8 *rv;
|
||||
|
||||
size_t min = (size_t)buf % 32;
|
||||
|
||||
Reference in New Issue
Block a user