From 54158a174651736cf9524aba09e3e06133652b4b Mon Sep 17 00:00:00 2001 From: Apostolos Tapsas Date: Sat, 13 Nov 2021 19:36:46 +0000 Subject: [PATCH] vermicelli and match implementations for ppc64el added --- src/nfa/ppc64el/vermicelli.hpp | 126 ++++++++++++++++++++++++++++++++ src/nfa/vermicelli_simd.cpp | 2 + src/util/arch/ppc64el/match.hpp | 56 +++++++++++--- unit/internal/simd_utils.cpp | 1 - 4 files changed, 173 insertions(+), 12 deletions(-) create mode 100644 src/nfa/ppc64el/vermicelli.hpp diff --git a/src/nfa/ppc64el/vermicelli.hpp b/src/nfa/ppc64el/vermicelli.hpp new file mode 100644 index 00000000..eeaad6a1 --- /dev/null +++ b/src/nfa/ppc64el/vermicelli.hpp @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2015-2020, Intel Corporation + * Copyright (c) 2020-2021, VectorCamp PC + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Vermicelli: single-byte and double-byte acceleration. + */ + +template +static really_inline +const u8 *vermicelliBlock(SuperVector const data, SuperVector const chars, SuperVector const casemask, u8 const *buf, u16 const len) { + + SuperVector mask = chars.eq(casemask & data); + return first_non_zero_match(buf, mask, len); +} + +template +static really_inline +const u8 *vermicelliBlockNeg(SuperVector const data, SuperVector const chars, SuperVector const casemask, u8 const *buf, u16 const len) { + + SuperVector mask = chars.eq(casemask & data); + return first_zero_match_inverted(buf, mask, len); +} + +template +static really_inline +const u8 *rvermicelliBlock(SuperVector const data, SuperVector const chars, SuperVector const casemask, u8 const *buf, u16 const len) { + + SuperVector mask = chars.eq(casemask & data); + return last_non_zero_match(buf, mask, len); +} + +template +static really_inline +const u8 *rvermicelliBlockNeg(SuperVector const data, SuperVector const chars, SuperVector const casemask, const u8 *buf, u16 const len) { + + data.print8("data"); + chars.print8("chars"); + casemask.print8("casemask"); + SuperVector mask = chars.eq(casemask & data); + mask.print8("mask"); + return last_zero_match_inverted(buf, mask, len); +} + +template +static really_inline +const u8 *vermicelliDoubleBlock(SuperVector const data, SuperVector const chars1, SuperVector const chars2, SuperVector const casemask, + u8 const c1, u8 const c2, u8 const casechar, u8 const *buf, u16 const len) { + + SuperVector v = casemask & data; + SuperVector mask1 = chars1.eq(v); + SuperVector mask2 = chars2.eq(v); + SuperVector mask = mask1 & (mask2 >> 1); + + DEBUG_PRINTF("rv[0] = %02hhx, rv[-1] = %02hhx\n", buf[0], buf[-1]); + bool partial_match = (((buf[0] & casechar) == c2) && ((buf[-1] & casechar) == c1)); + DEBUG_PRINTF("partial = %d\n", partial_match); + if (partial_match) return buf - 1; + + return first_non_zero_match(buf, mask, len); +} + +template +static really_inline +const u8 *rvermicelliDoubleBlock(SuperVector const data, SuperVector const chars1, SuperVector const chars2, SuperVector const casemask, + u8 const c1, u8 const c2, u8 const casechar, u8 const *buf, u16 const len) { + + SuperVector v = casemask & data; + SuperVector mask1 = chars1.eq(v); + SuperVector mask2 = chars2.eq(v); + SuperVector mask = (mask1 << 1)& mask2; + + DEBUG_PRINTF("buf[0] = %02hhx, buf[-1] = %02hhx\n", buf[0], buf[-1]); + bool partial_match = (((buf[0] & casechar) == c2) && ((buf[-1] & casechar) == c1)); + DEBUG_PRINTF("partial = %d\n", partial_match); + if (partial_match) { + mask = mask | (SuperVector::Ones() >> (S-1)); + } + + return last_non_zero_match(buf, mask, len); +} + +template +static really_inline +const u8 *vermicelliDoubleMaskedBlock(SuperVector const data, SuperVector const chars1, SuperVector const chars2, + SuperVector const mask1, SuperVector const mask2, + u8 const c1, u8 const c2, u8 const m1, u8 const m2, u8 const *buf, u16 const len) { + + SuperVector v1 = chars1.eq(data & mask1); + SuperVector v2 = chars2.eq(data & mask2); + SuperVector mask = v1 & (v2 >> 1); + + DEBUG_PRINTF("rv[0] = %02hhx, rv[-1] = %02hhx\n", buf[0], buf[-1]); + bool partial_match = (((buf[0] & m1) == c2) && ((buf[-1] & m2) == c1)); + DEBUG_PRINTF("partial = %d\n", partial_match); + if (partial_match) return buf - 1; + + return first_non_zero_match(buf, mask, len); +} + + diff --git a/src/nfa/vermicelli_simd.cpp b/src/nfa/vermicelli_simd.cpp index dbce6dc4..d790d137 100644 --- a/src/nfa/vermicelli_simd.cpp +++ b/src/nfa/vermicelli_simd.cpp @@ -75,6 +75,8 @@ const u8 *vermicelliDoubleMaskedBlock(SuperVector const data, SuperVector #include "x86/vermicelli.hpp" #elif defined(ARCH_ARM32) || defined(ARCH_AARCH64) #include "arm/vermicelli.hpp" +#elif defined(ARCH_PPC64EL) +#include "ppc64el/vermicelli.hpp" #endif template diff --git a/src/util/arch/ppc64el/match.hpp b/src/util/arch/ppc64el/match.hpp index 3f24ce7f..a3f52e41 100644 --- a/src/util/arch/ppc64el/match.hpp +++ b/src/util/arch/ppc64el/match.hpp @@ -29,12 +29,12 @@ template <> really_really_inline -const u8 *firstMatch<16>(const u8 *buf, SuperVector<16> v) { - if (unlikely(vec_any_ne(v.u.v128[0], SuperVector<16>::Ones().u.v128[0]))) { - SuperVector<16>::movemask_type z = v.movemask(); - DEBUG_PRINTF("buf %p z %08x \n", buf, z); - DEBUG_PRINTF("z %08x\n", z); - u32 pos = ctz32(~z & 0xffff); +const u8 *first_non_zero_match<16>(const u8 *buf, SuperVector<16> v, u16 const UNUSED len) { + SuperVector<16>::movemask_type z = v.movemask(); + DEBUG_PRINTF("buf %p z %08x \n", buf, z); + DEBUG_PRINTF("z %08x\n", z); + if (unlikely(z)) { + u32 pos = ctz32(z); DEBUG_PRINTF("~z %08x\n", ~z); DEBUG_PRINTF("match @ pos %u\n", pos); assert(pos < 16); @@ -46,11 +46,45 @@ const u8 *firstMatch<16>(const u8 *buf, SuperVector<16> v) { template <> really_really_inline -const u8 *lastMatch<16>(const u8 *buf, SuperVector<16> v) { - if (unlikely(vec_any_ne(v.u.v128[0], SuperVector<16>::Ones().u.v128[0]))) { - SuperVector<16>::movemask_type z = v.movemask(); - DEBUG_PRINTF("buf %p z %08x \n", buf, z); - DEBUG_PRINTF("z %08x\n", z); +const u8 *last_non_zero_match<16>(const u8 *buf, SuperVector<16> v, u16 const UNUSED len) { + SuperVector<16>::movemask_type z = v.movemask(); + DEBUG_PRINTF("buf %p z %08x \n", buf, z); + DEBUG_PRINTF("z %08x\n", z); + if (unlikely(z)) { + u32 pos = clz32(z); + DEBUG_PRINTF("match @ pos %u\n", pos); + assert(pos >= 16 && pos < 32); + return buf + (31 - pos); + } else { + return NULL; // no match + } +} + +template <> +really_really_inline +const u8 *first_zero_match_inverted<16>(const u8 *buf, SuperVector<16> v, u16 const UNUSED len) { + SuperVector<16>::movemask_type z = v.movemask(); + DEBUG_PRINTF("buf %p z %08x \n", buf, z); + DEBUG_PRINTF("z %08x\n", z); + if (unlikely(z != 0xffff)) { + u32 pos = ctz32(~z & 0xffff); + DEBUG_PRINTF("~z %08x\n", ~z); + DEBUG_PRINTF("match @ pos %u\n", pos); + assert(pos < 16); + return buf + pos; + } else { + return NULL; // no match + } +} + + +template <> +really_really_inline +const u8 *last_zero_match_inverted<16>(const u8 *buf, SuperVector<16> v, uint16_t UNUSED len ) { + SuperVector<16>::movemask_type z = v.movemask(); + DEBUG_PRINTF("buf %p z %08x \n", buf, z); + DEBUG_PRINTF("z %08x\n", z); + if (unlikely(z != 0xffff)) { u32 pos = clz32(~z & 0xffff); DEBUG_PRINTF("~z %08x\n", ~z); DEBUG_PRINTF("match @ pos %u\n", pos); diff --git a/unit/internal/simd_utils.cpp b/unit/internal/simd_utils.cpp index 23640034..b1b9bfb1 100644 --- a/unit/internal/simd_utils.cpp +++ b/unit/internal/simd_utils.cpp @@ -671,7 +671,6 @@ TEST(SimdUtilsTest, movq) { #elif defined(ARCH_PPC64EL) int64x2_t a = {0x123456789abcdefLL, ~0LL }; simd = (m128) a; - simd = vreinterpretq_s32_s64(a); #endif #endif r = movq(simd);