diff --git a/src/nfa/arm/shufti.hpp b/src/nfa/arm/shufti.hpp new file mode 100644 index 00000000..76461175 --- /dev/null +++ b/src/nfa/arm/shufti.hpp @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2020-2021, VectorCamp PC + * Copyright (c) 2021, Arm Limited + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Shufti: character class acceleration. + * + */ + +template +static really_inline +const SuperVector blockSingleMask(SuperVector mask_lo, SuperVector mask_hi, SuperVector chars) { + const SuperVector low4bits = SuperVector::dup_u8(0xf); + + SuperVector c_lo = chars & low4bits; + SuperVector c_hi = chars.template vshr_8_imm<4>(); + c_lo = mask_lo.template pshufb(c_lo); + c_hi = mask_hi.template pshufb(c_hi); + + return (c_lo & c_hi) > (SuperVector::Zeroes()); +} + +template +static really_inline +SuperVector blockDoubleMask(SuperVector mask1_lo, SuperVector mask1_hi, SuperVector mask2_lo, SuperVector mask2_hi, SuperVector chars) { + + const SuperVector low4bits = SuperVector::dup_u8(0xf); + SuperVector chars_lo = chars & low4bits; + chars_lo.print8("chars_lo"); + SuperVector chars_hi = chars.template vshr_64_imm<4>() & low4bits; + chars_hi.print8("chars_hi"); + SuperVector c1_lo = mask1_lo.template pshufb(chars_lo); + c1_lo.print8("c1_lo"); + SuperVector c1_hi = mask1_hi.template pshufb(chars_hi); + c1_hi.print8("c1_hi"); + SuperVector t1 = c1_lo | c1_hi; + t1.print8("t1"); + + SuperVector c2_lo = mask2_lo.template pshufb(chars_lo); + c2_lo.print8("c2_lo"); + SuperVector c2_hi = mask2_hi.template pshufb(chars_hi); + c2_hi.print8("c2_hi"); + SuperVector t2 = c2_lo | c2_hi; + t2.print8("t2"); + t2.template vshr_128_imm<1>().print8("t2.vshr_128(1)"); + SuperVector t = t1 | (t2.template vshr_128_imm<1>()); + t.print8("t"); + + return !t.eq(SuperVector::Ones()); +} diff --git a/src/nfa/shufti_simd.hpp b/src/nfa/shufti_simd.hpp index f8621afe..e7f3f6c9 100644 --- a/src/nfa/shufti_simd.hpp +++ b/src/nfa/shufti_simd.hpp @@ -34,6 +34,8 @@ * Utilises the SSSE3 pshufb shuffle instruction */ +#include + #include "shufti.h" #include "ue2common.h" #include "util/arch.h" @@ -43,58 +45,18 @@ #include "util/supervector/supervector.hpp" #include "util/match.hpp" -#include -#include -#include -#include -#include -#include -#include - -#include -#include - template static really_inline -const SuperVector blockSingleMask(SuperVector mask_lo, SuperVector mask_hi, SuperVector chars) { - const SuperVector low4bits = SuperVector::dup_u8(0xf); - - SuperVector c_lo = chars & low4bits; - SuperVector c_hi = chars.template vshr_8_imm<4>(); - c_lo = mask_lo.template pshufb(c_lo); - c_hi = mask_hi.template pshufb(c_hi); - - return (c_lo & c_hi) > (SuperVector::Zeroes()); -} - +const SuperVector blockSingleMask(SuperVector mask_lo, SuperVector mask_hi, SuperVector chars); template static really_inline -SuperVector blockDoubleMask(SuperVector mask1_lo, SuperVector mask1_hi, SuperVector mask2_lo, SuperVector mask2_hi, SuperVector chars) { +SuperVector blockDoubleMask(SuperVector mask1_lo, SuperVector mask1_hi, SuperVector mask2_lo, SuperVector mask2_hi, SuperVector chars); - const SuperVector low4bits = SuperVector::dup_u8(0xf); - SuperVector chars_lo = chars & low4bits; - chars_lo.print8("chars_lo"); - SuperVector chars_hi = chars.template vshr_64_imm<4>() & low4bits; - chars_hi.print8("chars_hi"); - SuperVector c1_lo = mask1_lo.template pshufb(chars_lo); - c1_lo.print8("c1_lo"); - SuperVector c1_hi = mask1_hi.template pshufb(chars_hi); - c1_hi.print8("c1_hi"); - SuperVector t1 = c1_lo | c1_hi; - t1.print8("t1"); - - SuperVector c2_lo = mask2_lo.template pshufb(chars_lo); - c2_lo.print8("c2_lo"); - SuperVector c2_hi = mask2_hi.template pshufb(chars_hi); - c2_hi.print8("c2_hi"); - SuperVector t2 = c2_lo | c2_hi; - t2.print8("t2"); - t2.template vshr_128_imm<1>().print8("t2.vshr_128(1)"); - SuperVector t = t1 | (t2.template vshr_128_imm<1>()); - t.print8("t"); - - return !t.eq(SuperVector::Ones()); -} +#if defined(ARCH_IA32) || defined(ARCH_X86_64) +#include "x86/shufti.hpp" +#elif defined(ARCH_ARM32) || defined(ARCH_AARCH64) +#include "arm/shufti.hpp" +#endif template static really_inline @@ -150,13 +112,13 @@ const u8 *shuftiExecReal(m128 mask_lo, m128 mask_hi, const u8 *buf, const u8 *bu d = ROUNDUP_PTR(d, S); } - while(d + S <= buf_end) { + while(d + S <= buf_end) { __builtin_prefetch(d + 64); DEBUG_PRINTF("d %p \n", d); SuperVector chars = SuperVector::load(d); rv = fwdBlock(wide_mask_lo, wide_mask_hi, chars, d); if (rv) return rv; - d += S; + d += S; } } @@ -164,10 +126,10 @@ const u8 *shuftiExecReal(m128 mask_lo, m128 mask_hi, const u8 *buf, const u8 *bu // finish off tail if (d != buf_end) { - SuperVector chars = SuperVector::loadu(d); + SuperVector chars = SuperVector::loadu_maskz(d, buf_end - d); rv = fwdBlock(wide_mask_lo, wide_mask_hi, chars, d); DEBUG_PRINTF("rv %p \n", rv); - if (rv) return rv; + if (rv && rv < buf_end) return rv; } return buf_end; @@ -222,7 +184,7 @@ const u8 *rshuftiExecReal(m128 mask_lo, m128 mask_hi, const u8 *buf, const u8 *b SuperVector chars = SuperVector::loadu(buf); rv = revBlock(wide_mask_lo, wide_mask_hi, chars, buf); DEBUG_PRINTF("rv %p \n", rv); - if (rv) return rv; + if (rv && rv < buf_end) return rv; } return buf - 1; @@ -261,14 +223,14 @@ const u8 *shuftiDoubleExecReal(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo, m128 d = ROUNDUP_PTR(d, S); } - while(d + S <= buf_end) { + while(d + S <= buf_end) { __builtin_prefetch(d + 64); DEBUG_PRINTF("d %p \n", d); SuperVector chars = SuperVector::load(d); rv = fwdBlockDouble(wide_mask1_lo, wide_mask1_hi, wide_mask2_lo, wide_mask2_hi, chars, d); if (rv) return rv; - d += S; + d += S; } } @@ -276,10 +238,10 @@ const u8 *shuftiDoubleExecReal(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo, m128 // finish off tail if (d != buf_end) { - SuperVector chars = SuperVector::loadu(buf_end - S); - rv = fwdBlockDouble(wide_mask1_lo, wide_mask1_hi, wide_mask2_lo, wide_mask2_hi, chars, buf_end - S); + SuperVector chars = SuperVector::loadu(d); + rv = fwdBlockDouble(wide_mask1_lo, wide_mask1_hi, wide_mask2_lo, wide_mask2_hi, chars, d); DEBUG_PRINTF("rv %p \n", rv); - if (rv) return rv; + if (rv && rv < buf_end) return rv; } return buf_end; diff --git a/src/nfa/x86/shufti.hpp b/src/nfa/x86/shufti.hpp new file mode 100644 index 00000000..fa18cc2a --- /dev/null +++ b/src/nfa/x86/shufti.hpp @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2020-2021, VectorCamp PC + * Copyright (c) 2021, Arm Limited + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Shufti: character class acceleration. + * + */ + +#ifndef SHUFTI_SIMD_X86_HPP +#define SHUFTI_SIMD_X86_HPP + +#include "util/supervector/supervector.hpp" +#include "util/match.hpp" + +template +static really_inline +const SuperVector blockSingleMask(SuperVector mask_lo, SuperVector mask_hi, SuperVector chars) { + const SuperVector low4bits = SuperVector::dup_u8(0xf); + + SuperVector c_lo = chars & low4bits; + SuperVector c_hi = chars.template vshr_64_imm<4>() & low4bits; + c_lo = mask_lo.template pshufb(c_lo); + c_hi = mask_hi.template pshufb(c_hi); + + SuperVector c = c_lo & c_hi; + + return c.eq(SuperVector::Zeroes()); +} + +template +static really_inline +SuperVector blockDoubleMask(SuperVector mask1_lo, SuperVector mask1_hi, SuperVector mask2_lo, SuperVector mask2_hi, SuperVector chars) { + + const SuperVector low4bits = SuperVector::dup_u8(0xf); + SuperVector chars_lo = chars & low4bits; + chars_lo.print8("chars_lo"); + SuperVector chars_hi = low4bits.opandnot(chars).template vshr_64_imm<4>(); + chars_hi.print8("chars_hi"); + SuperVector c1_lo = mask1_lo.pshufb(chars_lo); + c1_lo.print8("c1_lo"); + SuperVector c1_hi = mask1_hi.pshufb(chars_hi); + c1_hi.print8("c1_hi"); + SuperVector c1 = c1_lo | c1_hi; + c1.print8("c1"); + + SuperVector c2_lo = mask2_lo.pshufb(chars_lo); + c2_lo.print8("c2_lo"); + SuperVector c2_hi = mask2_hi.pshufb(chars_hi); + c2_hi.print8("c2_hi"); + SuperVector c2 = c2_lo | c2_hi; + c2.print8("c2"); + c2.template vshr_128_imm<1>().print8("c2.vshr_128(1)"); + SuperVector c = c1 | (c2.template vshr_128_imm<1>()); + c.print8("c"); + + return c.eq(SuperVector::Ones()); +} + +#endif // SHUFTI_SIMD_X86_HPP