From dcf6b59e8d05a5f9647ea90352b64a4c8840043f Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Mon, 8 Nov 2021 19:45:21 +0000 Subject: [PATCH] split vermicelli block implementations per arch --- src/nfa/arm/vermicelli.hpp | 125 ++++++++++++++++++++++++++++++++++++ src/nfa/vermicelli_simd.cpp | 80 ++++------------------- src/nfa/x86/vermicelli.hpp | 125 ++++++++++++++++++++++++++++++++++++ 3 files changed, 262 insertions(+), 68 deletions(-) create mode 100644 src/nfa/arm/vermicelli.hpp create mode 100644 src/nfa/x86/vermicelli.hpp diff --git a/src/nfa/arm/vermicelli.hpp b/src/nfa/arm/vermicelli.hpp new file mode 100644 index 00000000..d790fa1f --- /dev/null +++ b/src/nfa/arm/vermicelli.hpp @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2015-2020, Intel Corporation + * Copyright (c) 2020-2021, VectorCamp PC + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Vermicelli: single-byte and double-byte acceleration. + */ + +template +static really_inline +const u8 *vermicelliBlock(SuperVector const data, SuperVector const chars, SuperVector const casemask, u8 const *buf, u16 const len) { + + SuperVector mask = chars.eq(casemask & data); + return first_non_zero_match(buf, mask, len); +} + +template +static really_inline +const u8 *vermicelliBlockNeg(SuperVector const data, SuperVector const chars, SuperVector const casemask, u8 const *buf, u16 const len) { + + SuperVector mask = !chars.eq(casemask & data); + return first_zero_match_inverted(buf, mask, len); +} + +template +static really_inline +const u8 *rvermicelliBlock(SuperVector const data, SuperVector const chars, SuperVector const casemask, u8 const *buf, u16 const len) { + + SuperVector mask = chars.eq(casemask & data); + return last_non_zero_match(buf, mask, len); +} + +template +static really_inline +const u8 *rvermicelliBlockNeg(SuperVector const data, SuperVector const chars, SuperVector const casemask, const u8 *buf, u16 const len) { + + data.print8("data"); + chars.print8("chars"); + casemask.print8("casemask"); + SuperVector mask = !chars.eq(casemask & data); + mask.print8("mask"); + return last_zero_match_inverted(buf, mask, len); +} + +template +static really_inline +const u8 *vermicelliDoubleBlock(SuperVector const data, SuperVector const chars1, SuperVector const chars2, SuperVector const casemask, + u8 const c1, u8 const c2, u8 const casechar, u8 const *buf, u16 const len) { + + SuperVector v = casemask & data; + SuperVector mask1 = chars1.eq(v); + SuperVector mask2 = chars2.eq(v); + SuperVector mask = mask1 & (mask2 >> 1); + + DEBUG_PRINTF("rv[0] = %02hhx, rv[-1] = %02hhx\n", buf[0], buf[-1]); + bool partial_match = (((buf[0] & casechar) == c2) && ((buf[-1] & casechar) == c1)); + DEBUG_PRINTF("partial = %d\n", partial_match); + if (partial_match) return buf - 1; + + return first_non_zero_match(buf, mask, len); +} + +template +static really_inline +const u8 *rvermicelliDoubleBlock(SuperVector const data, SuperVector const chars1, SuperVector const chars2, SuperVector const casemask, + u8 const c1, u8 const c2, u8 const casechar, u8 const *buf, u16 const len) { + + SuperVector v = casemask & data; + SuperVector mask1 = chars1.eq(v); + SuperVector mask2 = chars2.eq(v); + SuperVector mask = (mask1 << 1)& mask2; + + DEBUG_PRINTF("buf[0] = %02hhx, buf[-1] = %02hhx\n", buf[0], buf[-1]); + bool partial_match = (((buf[0] & casechar) == c2) && ((buf[-1] & casechar) == c1)); + DEBUG_PRINTF("partial = %d\n", partial_match); + if (partial_match) { + mask = mask | (SuperVector::Ones() >> (S-1)); + } + + return last_non_zero_match(buf, mask, len); +} + +template +static really_inline +const u8 *vermicelliDoubleMaskedBlock(SuperVector const data, SuperVector const chars1, SuperVector const chars2, + SuperVector const mask1, SuperVector const mask2, + u8 const c1, u8 const c2, u8 const m1, u8 const m2, u8 const *buf, u16 const len) { + + SuperVector v1 = chars1.eq(data & mask1); + SuperVector v2 = chars2.eq(data & mask2); + SuperVector mask = v1 & (v2 >> 1); + + DEBUG_PRINTF("rv[0] = %02hhx, rv[-1] = %02hhx\n", buf[0], buf[-1]); + bool partial_match = (((buf[0] & m1) == c2) && ((buf[-1] & m2) == c1)); + DEBUG_PRINTF("partial = %d\n", partial_match); + if (partial_match) return buf - 1; + + return first_non_zero_match(buf, mask, len); +} + diff --git a/src/nfa/vermicelli_simd.cpp b/src/nfa/vermicelli_simd.cpp index e8b7caaf..dbce6dc4 100644 --- a/src/nfa/vermicelli_simd.cpp +++ b/src/nfa/vermicelli_simd.cpp @@ -41,97 +41,41 @@ template static really_inline -const u8 *vermicelliBlock(SuperVector const data, SuperVector const chars, SuperVector const casemask, u8 const *buf, u16 const len) { - - SuperVector mask = chars.eq(casemask & data); - return first_non_zero_match(buf, mask, len); -} - +const u8 *vermicelliBlock(SuperVector const data, SuperVector const chars, SuperVector const casemask, u8 const *buf, u16 const len); template static really_inline -const u8 *vermicelliBlockNeg(SuperVector const data, SuperVector const chars, SuperVector const casemask, u8 const *buf, u16 const len) { - - SuperVector mask = chars.eq(casemask & data); - return first_zero_match_inverted(buf, mask, len); -} +const u8 *vermicelliBlockNeg(SuperVector const data, SuperVector const chars, SuperVector const casemask, u8 const *buf, u16 const len); template static really_inline -const u8 *rvermicelliBlock(SuperVector const data, SuperVector const chars, SuperVector const casemask, u8 const *buf, u16 const len) { - - SuperVector mask = chars.eq(casemask & data); - return last_non_zero_match(buf, mask, len); -} - +const u8 *rvermicelliBlock(SuperVector const data, SuperVector const chars, SuperVector const casemask, u8 const *buf, u16 const len); template static really_inline -const u8 *rvermicelliBlockNeg(SuperVector const data, SuperVector const chars, SuperVector const casemask, const u8 *buf, u16 const len) { - - data.print8("data"); - chars.print8("chars"); - casemask.print8("casemask"); - SuperVector mask = chars.eq(casemask & data); - mask.print8("mask"); - return last_zero_match_inverted(buf, mask, len); -} +const u8 *rvermicelliBlockNeg(SuperVector const data, SuperVector const chars, SuperVector const casemask, const u8 *buf, u16 const len); template static really_inline const u8 *vermicelliDoubleBlock(SuperVector const data, SuperVector const chars1, SuperVector const chars2, SuperVector const casemask, - u8 const c1, u8 const c2, u8 const casechar, u8 const *buf, u16 const len) { - - SuperVector v = casemask & data; - SuperVector mask1 = chars1.eq(v); - SuperVector mask2 = chars2.eq(v); - SuperVector mask = mask1 & (mask2 >> 1); - - DEBUG_PRINTF("rv[0] = %02hhx, rv[-1] = %02hhx\n", buf[0], buf[-1]); - bool partial_match = (((buf[0] & casechar) == c2) && ((buf[-1] & casechar) == c1)); - DEBUG_PRINTF("partial = %d\n", partial_match); - if (partial_match) return buf - 1; - - return first_non_zero_match(buf, mask, len); -} + u8 const c1, u8 const c2, u8 const casechar, u8 const *buf, u16 const len); template static really_inline const u8 *rvermicelliDoubleBlock(SuperVector const data, SuperVector const chars1, SuperVector const chars2, SuperVector const casemask, - u8 const c1, u8 const c2, u8 const casechar, u8 const *buf, u16 const len) { - - SuperVector v = casemask & data; - SuperVector mask1 = chars1.eq(v); - SuperVector mask2 = chars2.eq(v); - SuperVector mask = (mask1 << 1)& mask2; - - DEBUG_PRINTF("buf[0] = %02hhx, buf[-1] = %02hhx\n", buf[0], buf[-1]); - bool partial_match = (((buf[0] & casechar) == c2) && ((buf[-1] & casechar) == c1)); - DEBUG_PRINTF("partial = %d\n", partial_match); - if (partial_match) { - mask = mask | (SuperVector::Ones() >> (S-1)); - } - - return last_non_zero_match(buf, mask, len); -} + u8 const c1, u8 const c2, u8 const casechar, u8 const *buf, u16 const len); template static really_inline const u8 *vermicelliDoubleMaskedBlock(SuperVector const data, SuperVector const chars1, SuperVector const chars2, SuperVector const mask1, SuperVector const mask2, - u8 const c1, u8 const c2, u8 const m1, u8 const m2, u8 const *buf, u16 const len) { + u8 const c1, u8 const c2, u8 const m1, u8 const m2, u8 const *buf, u16 const len); - SuperVector v1 = chars1.eq(data & mask1); - SuperVector v2 = chars2.eq(data & mask2); - SuperVector mask = v1 & (v2 >> 1); - - DEBUG_PRINTF("rv[0] = %02hhx, rv[-1] = %02hhx\n", buf[0], buf[-1]); - bool partial_match = (((buf[0] & m1) == c2) && ((buf[-1] & m2) == c1)); - DEBUG_PRINTF("partial = %d\n", partial_match); - if (partial_match) return buf - 1; - - return first_non_zero_match(buf, mask, len); -} +#if defined(ARCH_IA32) || defined(ARCH_X86_64) +#include "x86/vermicelli.hpp" +#elif defined(ARCH_ARM32) || defined(ARCH_AARCH64) +#include "arm/vermicelli.hpp" +#endif template static const u8 *vermicelliExecReal(SuperVector const chars, SuperVector const casemask, u8 const *buf, u8 const *buf_end) { diff --git a/src/nfa/x86/vermicelli.hpp b/src/nfa/x86/vermicelli.hpp new file mode 100644 index 00000000..8b461dfe --- /dev/null +++ b/src/nfa/x86/vermicelli.hpp @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2015-2020, Intel Corporation + * Copyright (c) 2020-2021, VectorCamp PC + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Vermicelli: single-byte and double-byte acceleration. + */ + +template +static really_inline +const u8 *vermicelliBlock(SuperVector const data, SuperVector const chars, SuperVector const casemask, u8 const *buf, u16 const len) { + + SuperVector mask = chars.eq(casemask & data); + return first_non_zero_match(buf, mask, len); +} + +template +static really_inline +const u8 *vermicelliBlockNeg(SuperVector const data, SuperVector const chars, SuperVector const casemask, u8 const *buf, u16 const len) { + + SuperVector mask = chars.eq(casemask & data); + return first_zero_match_inverted(buf, mask, len); +} + +template +static really_inline +const u8 *rvermicelliBlock(SuperVector const data, SuperVector const chars, SuperVector const casemask, u8 const *buf, u16 const len) { + + SuperVector mask = chars.eq(casemask & data); + return last_non_zero_match(buf, mask, len); +} + +template +static really_inline +const u8 *rvermicelliBlockNeg(SuperVector const data, SuperVector const chars, SuperVector const casemask, const u8 *buf, u16 const len) { + + data.print8("data"); + chars.print8("chars"); + casemask.print8("casemask"); + SuperVector mask = chars.eq(casemask & data); + mask.print8("mask"); + return last_zero_match_inverted(buf, mask, len); +} + +template +static really_inline +const u8 *vermicelliDoubleBlock(SuperVector const data, SuperVector const chars1, SuperVector const chars2, SuperVector const casemask, + u8 const c1, u8 const c2, u8 const casechar, u8 const *buf, u16 const len) { + + SuperVector v = casemask & data; + SuperVector mask1 = chars1.eq(v); + SuperVector mask2 = chars2.eq(v); + SuperVector mask = mask1 & (mask2 >> 1); + + DEBUG_PRINTF("rv[0] = %02hhx, rv[-1] = %02hhx\n", buf[0], buf[-1]); + bool partial_match = (((buf[0] & casechar) == c2) && ((buf[-1] & casechar) == c1)); + DEBUG_PRINTF("partial = %d\n", partial_match); + if (partial_match) return buf - 1; + + return first_non_zero_match(buf, mask, len); +} + +template +static really_inline +const u8 *rvermicelliDoubleBlock(SuperVector const data, SuperVector const chars1, SuperVector const chars2, SuperVector const casemask, + u8 const c1, u8 const c2, u8 const casechar, u8 const *buf, u16 const len) { + + SuperVector v = casemask & data; + SuperVector mask1 = chars1.eq(v); + SuperVector mask2 = chars2.eq(v); + SuperVector mask = (mask1 << 1)& mask2; + + DEBUG_PRINTF("buf[0] = %02hhx, buf[-1] = %02hhx\n", buf[0], buf[-1]); + bool partial_match = (((buf[0] & casechar) == c2) && ((buf[-1] & casechar) == c1)); + DEBUG_PRINTF("partial = %d\n", partial_match); + if (partial_match) { + mask = mask | (SuperVector::Ones() >> (S-1)); + } + + return last_non_zero_match(buf, mask, len); +} + +template +static really_inline +const u8 *vermicelliDoubleMaskedBlock(SuperVector const data, SuperVector const chars1, SuperVector const chars2, + SuperVector const mask1, SuperVector const mask2, + u8 const c1, u8 const c2, u8 const m1, u8 const m2, u8 const *buf, u16 const len) { + + SuperVector v1 = chars1.eq(data & mask1); + SuperVector v2 = chars2.eq(data & mask2); + SuperVector mask = v1 & (v2 >> 1); + + DEBUG_PRINTF("rv[0] = %02hhx, rv[-1] = %02hhx\n", buf[0], buf[-1]); + bool partial_match = (((buf[0] & m1) == c2) && ((buf[-1] & m2) == c1)); + DEBUG_PRINTF("partial = %d\n", partial_match); + if (partial_match) return buf - 1; + + return first_non_zero_match(buf, mask, len); +} +