SuperVector constructors as well as andnot implementation fixed

This commit is contained in:
Apostolos Tapsas 2021-11-05 13:34:48 +00:00
parent d9d39d48c5
commit ba90cdeb5a
3 changed files with 18 additions and 17 deletions

View File

@ -43,7 +43,7 @@ const SuperVector<S> blockSingleMask(SuperVector<S> mask_lo, SuperVector<S> mask
c_lo = mask_lo.template pshufb<false>(c_lo); c_lo = mask_lo.template pshufb<false>(c_lo);
c_hi = mask_hi.template pshufb<false>(c_hi); c_hi = mask_hi.template pshufb<false>(c_hi);
return (c_lo & c_hi) > (SuperVector<S>::Zeroes()); return (c_lo & c_hi).eq(SuperVector<S>::Zeroes());
} }
template <uint16_t S> template <uint16_t S>
@ -72,5 +72,5 @@ SuperVector<S> blockDoubleMask(SuperVector<S> mask1_lo, SuperVector<S> mask1_hi,
SuperVector<S> t = t1 | (t2.template vshr_128_imm<1>()); SuperVector<S> t = t1 | (t2.template vshr_128_imm<1>());
t.print8("t"); t.print8("t");
return !t.eq(SuperVector<S>::Ones()); return t.eq(SuperVector<S>::Ones());
} }

View File

@ -30,10 +30,10 @@
template <> template <>
really_really_inline really_really_inline
const u8 *firstMatch<16>(const u8 *buf, SuperVector<16> v) { const u8 *firstMatch<16>(const u8 *buf, SuperVector<16> v) {
SuperVector<16>::movemask_type z = v.movemask(); if (unlikely(vec_any_ne(v.u.v128[0], SuperVector<16>::Ones().u.v128[0]))) {
DEBUG_PRINTF("buf %p z %08x \n", buf, z); SuperVector<16>::movemask_type z = v.movemask();
DEBUG_PRINTF("z %08x\n", z); DEBUG_PRINTF("buf %p z %08x \n", buf, z);
if (unlikely(z != 0xffff)) { DEBUG_PRINTF("z %08x\n", z);
u32 pos = ctz32(~z & 0xffff); u32 pos = ctz32(~z & 0xffff);
DEBUG_PRINTF("~z %08x\n", ~z); DEBUG_PRINTF("~z %08x\n", ~z);
DEBUG_PRINTF("match @ pos %u\n", pos); DEBUG_PRINTF("match @ pos %u\n", pos);
@ -47,10 +47,10 @@ const u8 *firstMatch<16>(const u8 *buf, SuperVector<16> v) {
template <> template <>
really_really_inline really_really_inline
const u8 *lastMatch<16>(const u8 *buf, SuperVector<16> v) { const u8 *lastMatch<16>(const u8 *buf, SuperVector<16> v) {
SuperVector<16>::movemask_type z = v.movemask(); if (unlikely(vec_any_ne(v.u.v128[0], SuperVector<16>::Ones().u.v128[0]))) {
DEBUG_PRINTF("buf %p z %08x \n", buf, z); SuperVector<16>::movemask_type z = v.movemask();
DEBUG_PRINTF("z %08x\n", z); DEBUG_PRINTF("buf %p z %08x \n", buf, z);
if (unlikely(z != 0xffff)) { DEBUG_PRINTF("z %08x\n", z);
u32 pos = clz32(~z & 0xffff); u32 pos = clz32(~z & 0xffff);
DEBUG_PRINTF("~z %08x\n", ~z); DEBUG_PRINTF("~z %08x\n", ~z);
DEBUG_PRINTF("match @ pos %u\n", pos); DEBUG_PRINTF("match @ pos %u\n", pos);

View File

@ -74,7 +74,7 @@ template<>
template<> template<>
really_inline SuperVector<16>::SuperVector<uint8_t>(uint8_t const other) really_inline SuperVector<16>::SuperVector<uint8_t>(uint8_t const other)
{ {
u.v128[0] = (m128) vec_splats(static_cast<int8_t>(other)); u.v128[0] = (m128) vec_splats(static_cast<uint8_t>(other));
} }
template<> template<>
@ -88,7 +88,7 @@ template<>
template<> template<>
really_inline SuperVector<16>::SuperVector<uint16_t>(uint16_t const other) really_inline SuperVector<16>::SuperVector<uint16_t>(uint16_t const other)
{ {
u.v128[0] = (m128) vec_splats(static_cast<int8_t>(other)); u.v128[0] = (m128) vec_splats(static_cast<uint16_t>(other));
} }
template<> template<>
@ -102,7 +102,7 @@ template<>
template<> template<>
really_inline SuperVector<16>::SuperVector<uint32_t>(uint32_t const other) really_inline SuperVector<16>::SuperVector<uint32_t>(uint32_t const other)
{ {
u.v128[0] = (m128) vec_splats(static_cast<int8_t>(other)); u.v128[0] = (m128) vec_splats(static_cast<uint32_t>(other));
} }
template<> template<>
@ -116,7 +116,7 @@ template<>
template<> template<>
really_inline SuperVector<16>::SuperVector<uint64_t>(uint64_t const other) really_inline SuperVector<16>::SuperVector<uint64_t>(uint64_t const other)
{ {
u.v128[0] = (m128) vec_splats(static_cast<int8_t>(other)); u.v128[0] = (m128) vec_splats(static_cast<uint64_t>(other));
} }
// Constants // Constants
@ -167,7 +167,8 @@ really_inline SuperVector<16> SuperVector<16>::operator!() const
template <> template <>
really_inline SuperVector<16> SuperVector<16>::opandnot(SuperVector<16> const &b) const really_inline SuperVector<16> SuperVector<16>::opandnot(SuperVector<16> const &b) const
{ {
return vec_xor(vec_and(u.v128[0], b.u.v128[0]), vec_and(u.v128[0], b.u.v128[0])); m128 not_res = vec_xor(u.v128[0], (m128)vec_splat_s8(-1));
return {(m128) vec_and(not_res, (m128)b.u.v128[0]) };
} }
@ -311,8 +312,8 @@ really_inline SuperVector<16> SuperVector<16>::vshr_32_imm() const
template <> template <>
template<uint8_t N> template<uint8_t N>
really_inline SuperVector<16> SuperVector<16>::vshr_64_imm() const really_inline SuperVector<16> SuperVector<16>::vshr_64_imm() const
{ {
return { (m128) vec_sr((int64x2_t)u.v128[0], vec_splats((uint64_t)N)) }; return { (m128) vec_sr((int64x2_t)u.v128[0], vec_splats((uint64_t)N)) };
} }
template <> template <>