From fa3d509fad0bc3104ff672657b1da1fa49565eae Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Sun, 3 Oct 2021 10:47:53 +0000 Subject: [PATCH] firstMatch/lastMatch are now arch-dependent, emulating movemask on non-Intel is very costly, the alternative is almost twice as fast on Arm --- src/util/arch/arm/match.hpp | 66 +++++++++++++++++++++ src/util/arch/x86/match.hpp | 115 ++++++++++++++++++++++++++++++++++++ src/util/match.hpp | 94 +++-------------------------- 3 files changed, 188 insertions(+), 87 deletions(-) create mode 100644 src/util/arch/arm/match.hpp create mode 100644 src/util/arch/x86/match.hpp diff --git a/src/util/arch/arm/match.hpp b/src/util/arch/arm/match.hpp new file mode 100644 index 00000000..46d84d06 --- /dev/null +++ b/src/util/arch/arm/match.hpp @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2020-2021, VectorCamp PC + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +template <> +really_really_inline +const u8 *firstMatch<16>(const u8 *buf, SuperVector<16> mask) { + uint32x4_t res_t = vreinterpretq_u32_u8(mask.u.v128[0]); + uint64_t vmax = vgetq_lane_u64 (vreinterpretq_u64_u32 (vpmaxq_u32(res_t, res_t)), 0); + if (vmax != 0) { + typename SuperVector<16>::movemask_type z = mask.movemask(); + DEBUG_PRINTF("z %08x\n", z); + DEBUG_PRINTF("buf %p z %08x \n", buf, z); + u32 pos = ctz32(z & 0xffff); + DEBUG_PRINTF("match @ pos %u\n", pos); + assert(pos < 16); + DEBUG_PRINTF("buf + pos %p\n", buf + pos); + return buf + pos; + } else { + return NULL; // no match + } +} + +template <> +really_really_inline +const u8 *lastMatch<16>(const u8 *buf, SuperVector<16> mask) { + uint32x4_t res_t = vreinterpretq_u32_u8(mask.u.v128[0]); + uint64_t vmax = vgetq_lane_u64 (vreinterpretq_u64_u32 (vpmaxq_u32(res_t, res_t)), 0); + if (vmax != 0) { + typename SuperVector<16>::movemask_type z = mask.movemask(); + DEBUG_PRINTF("buf %p z %08x \n", buf, z); + DEBUG_PRINTF("z %08x\n", z); + u32 pos = clz32(z & 0xffff); + DEBUG_PRINTF("match @ pos %u\n", pos); + assert(pos >= 16 && pos < 32); + return buf + (31 - pos); + } else { + return NULL; // no match + } +} + diff --git a/src/util/arch/x86/match.hpp b/src/util/arch/x86/match.hpp new file mode 100644 index 00000000..6785cb15 --- /dev/null +++ b/src/util/arch/x86/match.hpp @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2020-2021, VectorCamp PC + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +template <> +really_really_inline +const u8 *firstMatch<16>(const u8 *buf, SuperVector<16> v) { + DEBUG_PRINTF("buf %p z %08x \n", buf, z); + DEBUG_PRINTF("z %08x\n", z); + if (unlikely(z != 0xffff)) { + u32 pos = ctz32(~z & 0xffff); + DEBUG_PRINTF("~z %08x\n", ~z); + DEBUG_PRINTF("match @ pos %u\n", pos); + assert(pos < 16); + return buf + pos; + } else { + return NULL; // no match + } +} + +template <> +really_really_inline +const u8 *firstMatch<32>(const u8 *buf, SuperVector<32> v) { + DEBUG_PRINTF("z 0x%08x\n", z); + if (unlikely(z != 0xffffffff)) { + u32 pos = ctz32(~z); + assert(pos < 32); + DEBUG_PRINTF("match @ pos %u\n", pos); + return buf + pos; + } else { + return NULL; // no match + } +} +template <> +really_really_inline +const u8 *firstMatch<64>(const u8 *buf, SuperVector<64>v) { + DEBUG_PRINTF("z 0x%016llx\n", z); + if (unlikely(z != ~0ULL)) { + u32 pos = ctz64(~z); + DEBUG_PRINTF("match @ pos %u\n", pos); + assert(pos < 64); + return buf + pos; + } else { + return NULL; // no match + } +} + +template <> +really_really_inline +const u8 *lastMatch<16>(const u8 *buf, SuperVector<16> v) { + DEBUG_PRINTF("buf %p z %08x \n", buf, z); + DEBUG_PRINTF("z %08x\n", z); + if (unlikely(z != 0xffff)) { + u32 pos = clz32(~z & 0xffff); + DEBUG_PRINTF("~z %08x\n", ~z); + DEBUG_PRINTF("match @ pos %u\n", pos); + assert(pos >= 16 && pos < 32); + return buf + (31 - pos); + } else { + return NULL; // no match + } +} + +template<> +really_really_inline +const u8 *lastMatch<32>(const u8 *buf, SuperVector<32> v) { + if (unlikely(z != 0xffffffff)) { + u32 pos = clz32(~z); + DEBUG_PRINTF("buf=%p, pos=%u\n", buf, pos); + assert(pos < 32); + return buf + (31 - pos); + } else { + return NULL; // no match + } +} + +template <> +really_really_inline +const u8 *lastMatch<64>(const u8 *buf, SuperVector<64> v) { + DEBUG_PRINTF("z 0x%016llx\n", z); + if (unlikely(z != ~0ULL)) { + u32 pos = clz64(~z); + DEBUG_PRINTF("match @ pos %u\n", pos); + assert(pos < 64); + return buf + (63 - pos); + } else { + return NULL; // no match + } +} + diff --git a/src/util/match.hpp b/src/util/match.hpp index b321f757..994dd9f8 100644 --- a/src/util/match.hpp +++ b/src/util/match.hpp @@ -35,94 +35,14 @@ #include "util/supervector/supervector.hpp" template -const u8 *firstMatch(const u8 *buf, typename SuperVector::movemask_type z); +const u8 *firstMatch(const u8 *buf, SuperVector v); template -const u8 *lastMatch(const u8 *buf, typename SuperVector::movemask_type z); +const u8 *lastMatch(const u8 *buf, SuperVector v); -template <> -really_really_inline -const u8 *firstMatch<16>(const u8 *buf, typename SuperVector<16>::movemask_type z) { - DEBUG_PRINTF("buf %p z %08x \n", buf, z); - DEBUG_PRINTF("z %08x\n", z); - if (unlikely(z != 0xffff)) { - u32 pos = ctz32(~z & 0xffff); - DEBUG_PRINTF("~z %08x\n", ~z); - DEBUG_PRINTF("match @ pos %u\n", pos); - assert(pos < 16); - return buf + pos; - } else { - return NULL; // no match - } -} - -template <> -really_really_inline -const u8 *firstMatch<32>(const u8 *buf, typename SuperVector<32>::movemask_type z) { - DEBUG_PRINTF("z 0x%08x\n", z); - if (unlikely(z != 0xffffffff)) { - u32 pos = ctz32(~z); - assert(pos < 32); - DEBUG_PRINTF("match @ pos %u\n", pos); - return buf + pos; - } else { - return NULL; // no match - } -} -template <> -really_really_inline -const u8 *firstMatch<64>(const u8 *buf, typename SuperVector<64>::movemask_type z) { - DEBUG_PRINTF("z 0x%016llx\n", z); - if (unlikely(z != ~0ULL)) { - u32 pos = ctz64(~z); - DEBUG_PRINTF("match @ pos %u\n", pos); - assert(pos < 64); - return buf + pos; - } else { - return NULL; // no match - } -} - -template <> -really_really_inline -const u8 *lastMatch<16>(const u8 *buf, typename SuperVector<16>::movemask_type z) { - DEBUG_PRINTF("buf %p z %08x \n", buf, z); - DEBUG_PRINTF("z %08x\n", z); - if (unlikely(z != 0xffff)) { - u32 pos = clz32(~z & 0xffff); - DEBUG_PRINTF("~z %08x\n", ~z); - DEBUG_PRINTF("match @ pos %u\n", pos); - assert(pos >= 16 && pos < 32); - return buf + (31 - pos); - } else { - return NULL; // no match - } -} - -template<> -really_really_inline -const u8 *lastMatch<32>(const u8 *buf, typename SuperVector<32>::movemask_type z) { - if (unlikely(z != 0xffffffff)) { - u32 pos = clz32(~z); - DEBUG_PRINTF("buf=%p, pos=%u\n", buf, pos); - assert(pos < 32); - return buf + (31 - pos); - } else { - return NULL; // no match - } -} - -template <> -really_really_inline -const u8 *lastMatch<64>(const u8 *buf, typename SuperVector<64>::movemask_type z) { - DEBUG_PRINTF("z 0x%016llx\n", z); - if (unlikely(z != ~0ULL)) { - u32 pos = clz64(~z); - DEBUG_PRINTF("match @ pos %u\n", pos); - assert(pos < 64); - return buf + (63 - pos); - } else { - return NULL; // no match - } -} +#if defined(ARCH_IA32) || defined(ARCH_X86_64) +#include "util/arch/x86/match.hpp" +#elif defined(ARCH_ARM32) || defined(ARCH_AARCH64) +#include "util/arch/arm/match.hpp" +#endif