mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
implement missing vshr_512_imm(), simplifies caller x86 code
This commit is contained in:
parent
7d9a543100
commit
c95e66144a
@ -69,36 +69,11 @@ SuperVector<S> blockDoubleMask(SuperVector<S> mask1_lo, SuperVector<S> mask1_hi,
|
|||||||
// c2 is the match mask for the second char of the patterns
|
// c2 is the match mask for the second char of the patterns
|
||||||
c2.print8("c2");
|
c2.print8("c2");
|
||||||
|
|
||||||
// We want to shift the whole vector left by 1 and insert the last element of inout_c1.
|
|
||||||
// Due to lack of direct instructions to insert, extract and concatenate vectors
|
|
||||||
// we need to to store and load the vector.
|
|
||||||
uint8_t tmp_buf[2*S];
|
|
||||||
SuperVector<S> offset_c1;
|
|
||||||
if constexpr (S == 16) {
|
|
||||||
_mm_storeu_si128(reinterpret_cast<m128 *>(&tmp_buf[0]), inout_c1->u.v128[0]);
|
|
||||||
_mm_storeu_si128(reinterpret_cast<m128 *>(&tmp_buf[S]), new_c1.u.v128[0]);
|
|
||||||
offset_c1 = SuperVector<S>(_mm_loadu_si128(reinterpret_cast<const m128 *>(&tmp_buf[S-1])));
|
|
||||||
}
|
|
||||||
#ifdef HAVE_AVX2
|
|
||||||
else if constexpr (S == 32) {
|
|
||||||
_mm256_storeu_si256(reinterpret_cast<m256 *>(&tmp_buf[0]), inout_c1->u.v256[0]);
|
|
||||||
_mm256_storeu_si256(reinterpret_cast<m256 *>(&tmp_buf[S]), new_c1.u.v256[0]);
|
|
||||||
offset_c1 = SuperVector<S>(_mm256_loadu_si256(reinterpret_cast<const m256 *>(&tmp_buf[S-1])));
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
#ifdef HAVE_AVX512
|
|
||||||
else if constexpr (S == 64) {
|
|
||||||
_mm512_storeu_si512(reinterpret_cast<m512 *>(&tmp_buf[0]), inout_c1->u.v512[0]);
|
|
||||||
_mm512_storeu_si512(reinterpret_cast<m512 *>(&tmp_buf[S]), new_c1.u.v512[0]);
|
|
||||||
offset_c1 = SuperVector<S>(_mm512_load_si512(reinterpret_cast<const m512 *>(&tmp_buf[S-1])));
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
offset_c1.print8("offset c1");
|
|
||||||
|
|
||||||
// offset c1 so it aligns with c2. The hole created by the offset is filled
|
// offset c1 so it aligns with c2. The hole created by the offset is filled
|
||||||
// with the last elements of the previous c1 so no info is lost.
|
// with the last elements of the previous c1 so no info is lost.
|
||||||
// If bits with value 0 lines up, it indicate a match.
|
// If bits with value 0 lines up, it indicate a match.
|
||||||
SuperVector<S> c = offset_c1 | c2;
|
c2.template vshr_imm<1>().print8("c2.vshr_128(1)");
|
||||||
|
SuperVector<S> c = new_c1 | (c2.template vshr_imm<1>());
|
||||||
c.print8("c");
|
c.print8("c");
|
||||||
|
|
||||||
*inout_c1 = new_c1;
|
*inout_c1 = new_c1;
|
||||||
|
@ -877,10 +877,10 @@ template <>
|
|||||||
template<uint8_t N>
|
template<uint8_t N>
|
||||||
really_inline SuperVector<32> SuperVector<32>::vshr_256_imm() const
|
really_inline SuperVector<32> SuperVector<32>::vshr_256_imm() const
|
||||||
{
|
{
|
||||||
if (N == 0) return *this;
|
if constexpr (N == 0) return *this;
|
||||||
if (N == 16) return {SuperVector<32>(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)))};
|
if constexpr (N == 16) return {SuperVector<32>(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)))};
|
||||||
if (N == 32) return Zeroes();
|
if constexpr (N == 32) return Zeroes();
|
||||||
if (N < 16) {
|
if constexpr (N < 16) {
|
||||||
return {SuperVector<32>(_mm256_alignr_epi8(u.v256[0], _mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), 16 - N))};
|
return {SuperVector<32>(_mm256_alignr_epi8(u.v256[0], _mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), 16 - N))};
|
||||||
} else {
|
} else {
|
||||||
return {SuperVector<32>(_mm256_srli_si256(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)), N - 16))};
|
return {SuperVector<32>(_mm256_srli_si256(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)), N - 16))};
|
||||||
@ -1532,14 +1532,39 @@ template <>
|
|||||||
template<uint8_t N>
|
template<uint8_t N>
|
||||||
really_inline SuperVector<64> SuperVector<64>::vshr_256_imm() const
|
really_inline SuperVector<64> SuperVector<64>::vshr_256_imm() const
|
||||||
{
|
{
|
||||||
return {};
|
if constexpr (N == 0) return *this;
|
||||||
|
if constexpr (N == 16) return {SuperVector<64>(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)))};
|
||||||
|
if constexpr (N == 32) return Zeroes();
|
||||||
|
if constexpr (N < 16) {
|
||||||
|
return {SuperVector<64>(_mm256_alignr_epi8(u.v256[0], _mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), 16 - N))};
|
||||||
|
} else {
|
||||||
|
return {SuperVector<64>(_mm256_srli_si256(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)), N - 16))};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
template<uint8_t N>
|
template<uint8_t N>
|
||||||
really_inline SuperVector<64> SuperVector<64>::vshr_512_imm() const
|
really_inline SuperVector<64> SuperVector<64>::vshr_512_imm() const
|
||||||
{
|
{
|
||||||
return {};
|
if constexpr (N == 0) return *this;
|
||||||
|
if constexpr (N < 32) {
|
||||||
|
SuperVector<32> lo256 = SuperVector<32>(u.v256[0]);
|
||||||
|
SuperVector<32> hi256 = SuperVector<32>(u.v256[1]);
|
||||||
|
SuperVector<32> carry = hi256 << (32 - N);
|
||||||
|
hi256 = hi256 >> N;
|
||||||
|
lo256 = (lo256 >> N) | carry;
|
||||||
|
return SuperVector<64>(lo256, hi256);
|
||||||
|
}
|
||||||
|
if constexpr (N == 32) {
|
||||||
|
SuperVector<32> hi256 = SuperVector<32>(u.v256[1]);
|
||||||
|
return SuperVector<64>(hi256, SuperVector<32>::Zeroes());
|
||||||
|
}
|
||||||
|
if constexpr (N < 64) {
|
||||||
|
SuperVector<32> hi256 = SuperVector<32>(u.v256[1]);
|
||||||
|
return SuperVector<64>(hi256 >> (N - 32), SuperVector<32>::Zeroes());
|
||||||
|
} else {
|
||||||
|
return Zeroes();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
@ -1560,6 +1585,7 @@ template SuperVector<64> SuperVector<64>::vshr_64_imm<1>() const;
|
|||||||
template SuperVector<64> SuperVector<64>::vshr_64_imm<4>() const;
|
template SuperVector<64> SuperVector<64>::vshr_64_imm<4>() const;
|
||||||
template SuperVector<64> SuperVector<64>::vshr_128_imm<1>() const;
|
template SuperVector<64> SuperVector<64>::vshr_128_imm<1>() const;
|
||||||
template SuperVector<64> SuperVector<64>::vshr_128_imm<4>() const;
|
template SuperVector<64> SuperVector<64>::vshr_128_imm<4>() const;
|
||||||
|
template SuperVector<64> SuperVector<64>::vshr_imm<1>() const;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// template <>
|
// template <>
|
||||||
|
Loading…
x
Reference in New Issue
Block a user