mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Fix combine2x128
This commit is contained in:
parent
8ff7a3cdbb
commit
99e14df117
@ -308,7 +308,7 @@ const u8 *fwdBlockShort(m256 mask, m128 chars, const u8 *buf,
|
||||
const m256 low4bits) {
|
||||
// do the hi and lo shuffles in the one avx register
|
||||
m256 c = set2x128(chars);
|
||||
c = _mm256_srlv_epi64(c, _mm256_set_epi64x(0, 0, 4, 4));
|
||||
c = _mm256_srlv_epi64(c, _mm256_set_epi64x(4, 4, 0, 0));
|
||||
c = and256(c, low4bits);
|
||||
m256 c_shuf = vpshufb(mask, c);
|
||||
m128 t = and128(movdq_hi(c_shuf), cast256to128(c_shuf));
|
||||
@ -440,7 +440,7 @@ const u8 *revBlockShort(m256 mask, m128 chars, const u8 *buf,
|
||||
const m256 low4bits) {
|
||||
// do the hi and lo shuffles in the one avx register
|
||||
m256 c = set2x128(chars);
|
||||
c = _mm256_srlv_epi64(c, _mm256_set_epi64x(0, 0, 4, 4));
|
||||
c = _mm256_srlv_epi64(c, _mm256_set_epi64x(4, 4, 0, 0));
|
||||
c = and256(c, low4bits);
|
||||
m256 c_shuf = vpshufb(mask, c);
|
||||
m128 t = and128(movdq_hi(c_shuf), cast256to128(c_shuf));
|
||||
@ -565,7 +565,7 @@ const u8 *fwdBlockShort2(m256 mask1, m256 mask2, m128 chars, const u8 *buf,
|
||||
const m256 low4bits) {
|
||||
// do the hi and lo shuffles in the one avx register
|
||||
m256 c = set2x128(chars);
|
||||
c = _mm256_srlv_epi64(c, _mm256_set_epi64x(0, 0, 4, 4));
|
||||
c = _mm256_srlv_epi64(c, _mm256_set_epi64x(4, 4, 0, 0));
|
||||
c = and256(c, low4bits);
|
||||
m256 c_shuf1 = vpshufb(mask1, c);
|
||||
m256 c_shuf2 = rshift128_m256(vpshufb(mask2, c), 1);
|
||||
|
@ -3054,8 +3054,8 @@ bool makeRoleShufti(const vector<LookEntry> &look,
|
||||
neg_mask &= 0xffff;
|
||||
array<u8, 32> nib_mask;
|
||||
array<u8, 16> bucket_select_mask_16;
|
||||
copy(hi_mask.begin(), hi_mask.begin() + 16, nib_mask.begin());
|
||||
copy(lo_mask.begin(), lo_mask.begin() + 16, nib_mask.begin() + 16);
|
||||
copy(lo_mask.begin(), lo_mask.begin() + 16, nib_mask.begin());
|
||||
copy(hi_mask.begin(), hi_mask.begin() + 16, nib_mask.begin() + 16);
|
||||
copy(bucket_select_lo.begin(), bucket_select_lo.begin() + 16,
|
||||
bucket_select_mask_16.begin());
|
||||
auto ri = make_unique<RoseInstrCheckShufti16x8>
|
||||
|
@ -658,8 +658,8 @@ m128 movdq_lo(m256 x) {
|
||||
}
|
||||
|
||||
static really_inline
|
||||
m256 combine2x128(m128 a, m128 b) {
|
||||
m256 rv = {a, b};
|
||||
m256 combine2x128(m128 hi, m128 lo) {
|
||||
m256 rv = {lo, hi};
|
||||
return rv;
|
||||
}
|
||||
|
||||
@ -712,7 +712,7 @@ m256 combine2x128(m128 hi, m128 lo) {
|
||||
#if defined(_mm256_set_m128i)
|
||||
return _mm256_set_m128i(hi, lo);
|
||||
#else
|
||||
return insert128to256(cast128to256(hi), lo, 1);
|
||||
return insert128to256(cast128to256(lo), hi, 1);
|
||||
#endif
|
||||
}
|
||||
#endif //AVX2
|
||||
|
Loading…
x
Reference in New Issue
Block a user