diff --git a/src/nfa/castle.c b/src/nfa/castle.c index dc6ec8f9..c7dd6d50 100644 --- a/src/nfa/castle.c +++ b/src/nfa/castle.c @@ -46,6 +46,10 @@ #include "util/partial_store.h" #include "ue2common.h" +#ifdef HAVE_SVE2 +#include "castle_sve.h" +#endif + static really_inline const struct SubCastle *getSubCastle(const struct Castle *c, u32 num) { assert(num < c->numRepeats); @@ -553,42 +557,6 @@ char castleScanNVerm(const struct Castle *c, const u8 *buf, const size_t begin, return 1; } -#ifdef HAVE_SVE2 - -static really_inline -char castleScanVerm16(const struct Castle *c, const u8 *buf, const size_t begin, - const size_t end, size_t *loc) { - const u8 *ptr = vermicelli16Exec(c->u.verm16.mask, buf + begin, buf + end); - if (ptr == buf + end) { - DEBUG_PRINTF("no escape found\n"); - return 0; - } - - assert(loc); - assert(ptr >= buf && ptr < buf + end); - *loc = ptr - buf; - DEBUG_PRINTF("escape found at offset %zu\n", *loc); - return 1; -} - -static really_inline -char castleScanNVerm16(const struct Castle *c, const u8 *buf, const size_t begin, - const size_t end, size_t *loc) { - const u8 *ptr = nvermicelli16Exec(c->u.verm16.mask, buf + begin, buf + end); - if (ptr == buf + end) { - DEBUG_PRINTF("no escape found\n"); - return 0; - } - - assert(loc); - assert(ptr >= buf && ptr < buf + end); - *loc = ptr - buf; - DEBUG_PRINTF("escape found at offset %zu\n", *loc); - return 1; -} - -#endif // HAVE_SVE2 - static really_inline char castleScanShufti(const struct Castle *c, const u8 *buf, const size_t begin, const size_t end, size_t *loc) { @@ -690,42 +658,6 @@ char castleRevScanNVerm(const struct Castle *c, const u8 *buf, return 1; } -#ifdef HAVE_SVE2 - -static really_inline -char castleRevScanVerm16(const struct Castle *c, const u8 *buf, - const size_t begin, const size_t end, size_t *loc) { - const u8 *ptr = rvermicelli16Exec(c->u.verm16.mask, buf + begin, buf + end); - if (ptr == buf + begin - 1) { - DEBUG_PRINTF("no escape found\n"); - return 0; - } - - assert(loc); - assert(ptr >= buf && ptr < buf + end); - *loc = ptr - buf; - DEBUG_PRINTF("escape found at offset %zu\n", *loc); - return 1; -} - -static really_inline -char castleRevScanNVerm16(const struct Castle *c, const u8 *buf, - const size_t begin, const size_t end, size_t *loc) { - const u8 *ptr = rnvermicelli16Exec(c->u.verm16.mask, buf + begin, buf + end); - if (ptr == buf + begin - 1) { - DEBUG_PRINTF("no escape found\n"); - return 0; - } - - assert(loc); - assert(ptr >= buf && ptr < buf + end); - *loc = ptr - buf; - DEBUG_PRINTF("escape found at offset %zu\n", *loc); - return 1; -} - -#endif // HAVE_SVE2 - static really_inline char castleRevScanShufti(const struct Castle *c, const u8 *buf, const size_t begin, const size_t end, size_t *loc) { diff --git a/src/nfa/castle_sve.h b/src/nfa/castle_sve.h new file mode 100644 index 00000000..a8f6452d --- /dev/null +++ b/src/nfa/castle_sve.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2021, Arm Limited + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Castle for SVE: multi-tenant repeat engine, runtime code. + */ + +static really_inline +char castleScanVerm16(const struct Castle *c, const u8 *buf, const size_t begin, + const size_t end, size_t *loc) { + const u8 *ptr = vermicelli16Exec(c->u.verm16.mask, buf + begin, buf + end); + if (ptr == buf + end) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + assert(ptr >= buf && ptr < buf + end); + *loc = ptr - buf; + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +static really_inline +char castleScanNVerm16(const struct Castle *c, const u8 *buf, const size_t begin, + const size_t end, size_t *loc) { + const u8 *ptr = nvermicelli16Exec(c->u.verm16.mask, buf + begin, buf + end); + if (ptr == buf + end) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + assert(ptr >= buf && ptr < buf + end); + *loc = ptr - buf; + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +static really_inline +char castleRevScanVerm16(const struct Castle *c, const u8 *buf, + const size_t begin, const size_t end, size_t *loc) { + const u8 *ptr = rvermicelli16Exec(c->u.verm16.mask, buf + begin, buf + end); + if (ptr == buf + begin - 1) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + assert(ptr >= buf && ptr < buf + end); + *loc = ptr - buf; + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +static really_inline +char castleRevScanNVerm16(const struct Castle *c, const u8 *buf, + const size_t begin, const size_t end, size_t *loc) { + const u8 *ptr = rnvermicelli16Exec(c->u.verm16.mask, buf + begin, buf + end); + if (ptr == buf + begin - 1) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + assert(ptr >= buf && ptr < buf + end); + *loc = ptr - buf; + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} \ No newline at end of file diff --git a/src/nfa/lbr.c b/src/nfa/lbr.c index 2c6ea163..68e8e3f4 100644 --- a/src/nfa/lbr.c +++ b/src/nfa/lbr.c @@ -362,56 +362,6 @@ char lbrRevScanNVerm(const struct NFA *nfa, const u8 *buf, return 1; } -#ifdef HAVE_SVE2 - -static really_inline -char lbrRevScanVerm16(const struct NFA *nfa, const u8 *buf, - size_t begin, size_t end, size_t *loc) { - assert(begin <= end); - assert(nfa->type == LBR_NFA_VERM16); - const struct lbr_verm16 *l = getImplNfa(nfa); - - if (begin == end) { - return 0; - } - - const u8 *ptr = rvermicelli16Exec(l->mask, buf + begin, buf + end); - if (ptr == buf + begin - 1) { - DEBUG_PRINTF("no escape found\n"); - return 0; - } - - assert(loc); - *loc = ptr - buf; - DEBUG_PRINTF("escape found at offset %zu\n", *loc); - return 1; -} - -static really_inline -char lbrRevScanNVerm16(const struct NFA *nfa, const u8 *buf, - size_t begin, size_t end, size_t *loc) { - assert(begin <= end); - assert(nfa->type == LBR_NFA_NVERM16); - const struct lbr_verm16 *l = getImplNfa(nfa); - - if (begin == end) { - return 0; - } - - const u8 *ptr = rnvermicelli16Exec(l->mask, buf + begin, buf + end); - if (ptr == buf + begin - 1) { - DEBUG_PRINTF("no escape found\n"); - return 0; - } - - assert(loc); - *loc = ptr - buf; - DEBUG_PRINTF("escape found at offset %zu\n", *loc); - return 1; -} - -#endif // HAVE_SVE2 - static really_inline char lbrRevScanShuf(const struct NFA *nfa, const u8 *buf, size_t begin, size_t end, @@ -518,56 +468,6 @@ char lbrFwdScanNVerm(const struct NFA *nfa, const u8 *buf, return 1; } -#ifdef HAVE_SVE2 - -static really_inline -char lbrFwdScanVerm16(const struct NFA *nfa, const u8 *buf, - size_t begin, size_t end, size_t *loc) { - assert(begin <= end); - assert(nfa->type == LBR_NFA_VERM16); - const struct lbr_verm16 *l = getImplNfa(nfa); - - if (begin == end) { - return 0; - } - - const u8 *ptr = vermicelli16Exec(l->mask, buf + begin, buf + end); - if (ptr == buf + end) { - DEBUG_PRINTF("no escape found\n"); - return 0; - } - - assert(loc); - *loc = ptr - buf; - DEBUG_PRINTF("escape found at offset %zu\n", *loc); - return 1; -} - -static really_inline -char lbrFwdScanNVerm16(const struct NFA *nfa, const u8 *buf, - size_t begin, size_t end, size_t *loc) { - assert(begin <= end); - assert(nfa->type == LBR_NFA_NVERM16); - const struct lbr_verm16 *l = getImplNfa(nfa); - - if (begin == end) { - return 0; - } - - const u8 *ptr = nvermicelli16Exec(l->mask, buf + begin, buf + end); - if (ptr == buf + end) { - DEBUG_PRINTF("no escape found\n"); - return 0; - } - - assert(loc); - *loc = ptr - buf; - DEBUG_PRINTF("escape found at offset %zu\n", *loc); - return 1; -} - -#endif // HAVE_SVE2 - static really_inline char lbrFwdScanShuf(const struct NFA *nfa, const u8 *buf, size_t begin, size_t end, @@ -625,18 +525,12 @@ char lbrFwdScanTruf(const struct NFA *nfa, const u8 *buf, #define ENGINE_ROOT_NAME NVerm #include "lbr_common_impl.h" -#ifdef HAVE_SVE2 - -#define ENGINE_ROOT_NAME Verm16 -#include "lbr_common_impl.h" - -#define ENGINE_ROOT_NAME NVerm16 -#include "lbr_common_impl.h" - -#endif // HAVE_SVE2 - #define ENGINE_ROOT_NAME Shuf #include "lbr_common_impl.h" #define ENGINE_ROOT_NAME Truf #include "lbr_common_impl.h" + +#ifdef HAVE_SVE2 +#include "lbr_sve.h" +#endif \ No newline at end of file diff --git a/src/nfa/lbr_sve.h b/src/nfa/lbr_sve.h new file mode 100644 index 00000000..8f5948b5 --- /dev/null +++ b/src/nfa/lbr_sve.h @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2021, Arm Limited + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Large Bounded Repeat (LBR) engine for SVE: runtime code. + */ + +static really_inline +char lbrRevScanVerm16(const struct NFA *nfa, const u8 *buf, + size_t begin, size_t end, size_t *loc) { + assert(begin <= end); + assert(nfa->type == LBR_NFA_VERM16); + const struct lbr_verm16 *l = getImplNfa(nfa); + + if (begin == end) { + return 0; + } + + const u8 *ptr = rvermicelli16Exec(l->mask, buf + begin, buf + end); + if (ptr == buf + begin - 1) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + *loc = ptr - buf; + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +static really_inline +char lbrRevScanNVerm16(const struct NFA *nfa, const u8 *buf, + size_t begin, size_t end, size_t *loc) { + assert(begin <= end); + assert(nfa->type == LBR_NFA_NVERM16); + const struct lbr_verm16 *l = getImplNfa(nfa); + + if (begin == end) { + return 0; + } + + const u8 *ptr = rnvermicelli16Exec(l->mask, buf + begin, buf + end); + if (ptr == buf + begin - 1) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + *loc = ptr - buf; + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +static really_inline +char lbrFwdScanVerm16(const struct NFA *nfa, const u8 *buf, + size_t begin, size_t end, size_t *loc) { + assert(begin <= end); + assert(nfa->type == LBR_NFA_VERM16); + const struct lbr_verm16 *l = getImplNfa(nfa); + + if (begin == end) { + return 0; + } + + const u8 *ptr = vermicelli16Exec(l->mask, buf + begin, buf + end); + if (ptr == buf + end) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + *loc = ptr - buf; + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +static really_inline +char lbrFwdScanNVerm16(const struct NFA *nfa, const u8 *buf, + size_t begin, size_t end, size_t *loc) { + assert(begin <= end); + assert(nfa->type == LBR_NFA_NVERM16); + const struct lbr_verm16 *l = getImplNfa(nfa); + + if (begin == end) { + return 0; + } + + const u8 *ptr = nvermicelli16Exec(l->mask, buf + begin, buf + end); + if (ptr == buf + end) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + *loc = ptr - buf; + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +#define ENGINE_ROOT_NAME Verm16 +#include "lbr_common_impl.h" + +#define ENGINE_ROOT_NAME NVerm16 +#include "lbr_common_impl.h" \ No newline at end of file diff --git a/src/nfa/shufti_simd.hpp b/src/nfa/shufti_simd.hpp index 3dbeeebb..668b253d 100644 --- a/src/nfa/shufti_simd.hpp +++ b/src/nfa/shufti_simd.hpp @@ -270,7 +270,7 @@ static really_inline const u8 *shuftiDoubleMini(SuperVector mask1_lo, SuperVe t.print8("t"); typename SuperVector::movemask_type z = t.eqmask(SuperVector::Ones()); - DEBUG_PRINTF(" z: 0x%08x\n", z); + DEBUG_PRINTF(" z: 0x%016llx\n", (u64a)z); return firstMatch(buf, z); } diff --git a/src/nfagraph/ng_lbr.cpp b/src/nfagraph/ng_lbr.cpp index ca3a1a2e..039eeb3b 100644 --- a/src/nfagraph/ng_lbr.cpp +++ b/src/nfagraph/ng_lbr.cpp @@ -145,6 +145,10 @@ bytecode_ptr makeLbrNfa(NFAEngineType nfa_type, enum RepeatType rtype, return nfa; } +#ifdef HAVE_SVE2 +#include "ng_lbr_sve.hpp" +#endif + static bytecode_ptr buildLbrDot(const CharReach &cr, const depth &repeatMin, const depth &repeatMax, u32 minPeriod, @@ -211,56 +215,6 @@ bytecode_ptr buildLbrNVerm(const CharReach &cr, const depth &repeatMin, return nfa; } -#ifdef HAVE_SVE2 - -static -bytecode_ptr buildLbrVerm16(const CharReach &cr, const depth &repeatMin, - const depth &repeatMax, u32 minPeriod, - bool is_reset, ReportID report) { - const CharReach escapes(~cr); - - if (escapes.count() > 16) { - return nullptr; - } - - enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, - is_reset); - auto nfa = makeLbrNfa(LBR_NFA_VERM16, rtype, repeatMax); - struct lbr_verm16 *lv = (struct lbr_verm16 *)getMutableImplNfa(nfa.get()); - vermicelli16Build(escapes, (u8 *)&lv->mask); - - fillNfa(nfa.get(), &lv->common, report, repeatMin, repeatMax, - minPeriod, rtype); - - DEBUG_PRINTF("built verm16 lbr\n"); - return nfa; -} - -static -bytecode_ptr buildLbrNVerm16(const CharReach &cr, const depth &repeatMin, - const depth &repeatMax, u32 minPeriod, - bool is_reset, ReportID report) { - const CharReach escapes(cr); - - if (escapes.count() > 16) { - return nullptr; - } - - enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, - is_reset); - auto nfa = makeLbrNfa(LBR_NFA_NVERM16, rtype, repeatMax); - struct lbr_verm16 *lv = (struct lbr_verm16 *)getMutableImplNfa(nfa.get()); - vermicelli16Build(escapes, (u8 *)&lv->mask); - - fillNfa(nfa.get(), &lv->common, report, repeatMin, repeatMax, - minPeriod, rtype); - - DEBUG_PRINTF("built negated verm16 lbr\n"); - return nfa; -} - -#endif // HAVE_SVE2 - static bytecode_ptr buildLbrShuf(const CharReach &cr, const depth &repeatMin, const depth &repeatMax, u32 minPeriod, diff --git a/src/nfagraph/ng_lbr_sve.hpp b/src/nfagraph/ng_lbr_sve.hpp new file mode 100644 index 00000000..82df3ea1 --- /dev/null +++ b/src/nfagraph/ng_lbr_sve.hpp @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2021, Arm Limited + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * \brief Large Bounded Repeat (LBR) engine build code for SVE. + */ + +static +bytecode_ptr buildLbrVerm16(const CharReach &cr, const depth &repeatMin, + const depth &repeatMax, u32 minPeriod, + bool is_reset, ReportID report) { + const CharReach escapes(~cr); + + if (escapes.count() > 16) { + return nullptr; + } + + enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, + is_reset); + auto nfa = makeLbrNfa(LBR_NFA_VERM16, rtype, repeatMax); + struct lbr_verm16 *lv = (struct lbr_verm16 *)getMutableImplNfa(nfa.get()); + vermicelli16Build(escapes, (u8 *)&lv->mask); + + fillNfa(nfa.get(), &lv->common, report, repeatMin, repeatMax, + minPeriod, rtype); + + DEBUG_PRINTF("built verm16 lbr\n"); + return nfa; +} + +static +bytecode_ptr buildLbrNVerm16(const CharReach &cr, const depth &repeatMin, + const depth &repeatMax, u32 minPeriod, + bool is_reset, ReportID report) { + const CharReach escapes(cr); + + if (escapes.count() > 16) { + return nullptr; + } + + enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, + is_reset); + auto nfa = makeLbrNfa(LBR_NFA_NVERM16, rtype, repeatMax); + struct lbr_verm16 *lv = (struct lbr_verm16 *)getMutableImplNfa(nfa.get()); + vermicelli16Build(escapes, (u8 *)&lv->mask); + + fillNfa(nfa.get(), &lv->common, report, repeatMin, repeatMax, + minPeriod, rtype); + + DEBUG_PRINTF("built negated verm16 lbr\n"); + return nfa; +} \ No newline at end of file diff --git a/src/rose/counting_miracle.h b/src/rose/counting_miracle.h index d61cc12c..602907cb 100644 --- a/src/rose/counting_miracle.h +++ b/src/rose/counting_miracle.h @@ -41,64 +41,7 @@ #define COUNTING_MIRACLE_LEN_MAX 256 #ifdef HAVE_SVE2 - -static really_inline -size_t countMatches(svuint8_t chars, svbool_t pg, const u8 *buf) { - svuint8_t vec = svld1_u8(pg, buf); - return svcntp_b8(svptrue_b8(), svmatch(pg, vec, chars)); -} - -static really_inline -bool countLoopBody(svuint8_t chars, svbool_t pg, const u8 *d, - u32 target_count, u32 *count_inout, const u8 **d_out) { - *count_inout += countMatches(chars, pg, d); - if (*count_inout >= target_count) { - *d_out = d; - return true; - } - return false; -} - -static really_inline -bool countOnce(svuint8_t chars, const u8 *d, const u8 *d_end, - u32 target_count, u32 *count_inout, const u8 **d_out) { - assert(d <= d_end); - svbool_t pg = svwhilelt_b8_s64(0, d_end - d); - return countLoopBody(chars, pg, d, target_count, count_inout, d_out); -} - -static really_inline -bool roseCountingMiracleScan(u8 c, const u8 *d, const u8 *d_end, - u32 target_count, u32 *count_inout, - const u8 **d_out) { - assert(d <= d_end); - svuint8_t chars = svdup_u8(c); - size_t len = d_end - d; - if (len <= svcntb()) { - bool rv = countOnce(chars, d, d_end, target_count, count_inout, d_out); - return rv; - } - // peel off first part to align to the vector size - const u8 *aligned_d_end = ROUNDDOWN_PTR(d_end, svcntb_pat(SV_POW2)); - assert(d < aligned_d_end); - if (d_end != aligned_d_end) { - if (countOnce(chars, aligned_d_end, d_end, - target_count, count_inout, d_out)) return true; - d_end = aligned_d_end; - } - size_t loops = (d_end - d) / svcntb(); - for (size_t i = 0; i < loops; i++) { - d_end -= svcntb(); - if (countLoopBody(chars, svptrue_b8(), d_end, - target_count, count_inout, d_out)) return true; - } - if (d != d_end) { - if (countOnce(chars, d, d_end, - target_count, count_inout, d_out)) return true; - } - return false; -} - +#include "counting_miracle_sve.h" #else static really_inline @@ -146,71 +89,7 @@ char roseCountingMiracleScan(u8 c, const u8 *d, const u8 *d_end, #endif #ifdef HAVE_SVE - -static really_inline -size_t countShuftiMatches(svuint8_t mask_lo, svuint8_t mask_hi, - const svbool_t pg, const u8 *buf) { - svuint8_t vec = svld1_u8(pg, buf); - svuint8_t c_lo = svtbl(mask_lo, svand_z(svptrue_b8(), vec, (uint8_t)0xf)); - svuint8_t c_hi = svtbl(mask_hi, svlsr_z(svptrue_b8(), vec, 4)); - svuint8_t t = svand_z(svptrue_b8(), c_lo, c_hi); - return svcntp_b8(svptrue_b8(), svcmpne(pg, t, (uint8_t)0)); -} - -static really_inline -bool countShuftiLoopBody(svuint8_t mask_lo, svuint8_t mask_hi, - const svbool_t pg, const u8 *d, u32 target_count, - u32 *count_inout, const u8 **d_out) { - *count_inout += countShuftiMatches(mask_lo, mask_hi, pg, d); - if (*count_inout >= target_count) { - *d_out = d; - return true; - } - return false; -} - -static really_inline -bool countShuftiOnce(svuint8_t mask_lo, svuint8_t mask_hi, - const u8 *d, const u8 *d_end, u32 target_count, - u32 *count_inout, const u8 **d_out) { - svbool_t pg = svwhilelt_b8_s64(0, d_end - d); - return countShuftiLoopBody(mask_lo, mask_hi, pg, d, target_count, - count_inout, d_out); -} - -static really_inline -bool roseCountingMiracleScanShufti(svuint8_t mask_lo, svuint8_t mask_hi, - UNUSED u8 poison, const u8 *d, - const u8 *d_end, u32 target_count, - u32 *count_inout, const u8 **d_out) { - assert(d <= d_end); - size_t len = d_end - d; - if (len <= svcntb()) { - char rv = countShuftiOnce(mask_lo, mask_hi, d, d_end, target_count, - count_inout, d_out); - return rv; - } - // peel off first part to align to the vector size - const u8 *aligned_d_end = ROUNDDOWN_PTR(d_end, svcntb_pat(SV_POW2)); - assert(d < aligned_d_end); - if (d_end != aligned_d_end) { - if (countShuftiOnce(mask_lo, mask_hi, aligned_d_end, d_end, - target_count, count_inout, d_out)) return true; - d_end = aligned_d_end; - } - size_t loops = (d_end - d) / svcntb(); - for (size_t i = 0; i < loops; i++) { - d_end -= svcntb(); - if (countShuftiLoopBody(mask_lo, mask_hi, svptrue_b8(), d_end, - target_count, count_inout, d_out)) return true; - } - if (d != d_end) { - if (countShuftiOnce(mask_lo, mask_hi, d, d_end, - target_count, count_inout, d_out)) return true; - } - return false; -} - +#include "counting_miracle_shufti_sve.h" #else #define GET_LO_4(chars) and128(chars, low4bits) diff --git a/src/rose/counting_miracle_shufti_sve.h b/src/rose/counting_miracle_shufti_sve.h new file mode 100644 index 00000000..26991a82 --- /dev/null +++ b/src/rose/counting_miracle_shufti_sve.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2021, Arm Limited + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +static really_inline +size_t countShuftiMatches(svuint8_t mask_lo, svuint8_t mask_hi, + const svbool_t pg, const u8 *buf) { + svuint8_t vec = svld1_u8(pg, buf); + svuint8_t c_lo = svtbl(mask_lo, svand_z(svptrue_b8(), vec, (uint8_t)0xf)); + svuint8_t c_hi = svtbl(mask_hi, svlsr_z(svptrue_b8(), vec, 4)); + svuint8_t t = svand_z(svptrue_b8(), c_lo, c_hi); + return svcntp_b8(svptrue_b8(), svcmpne(pg, t, (uint8_t)0)); +} + +static really_inline +bool countShuftiLoopBody(svuint8_t mask_lo, svuint8_t mask_hi, + const svbool_t pg, const u8 *d, u32 target_count, + u32 *count_inout, const u8 **d_out) { + *count_inout += countShuftiMatches(mask_lo, mask_hi, pg, d); + if (*count_inout >= target_count) { + *d_out = d; + return true; + } + return false; +} + +static really_inline +bool countShuftiOnce(svuint8_t mask_lo, svuint8_t mask_hi, + const u8 *d, const u8 *d_end, u32 target_count, + u32 *count_inout, const u8 **d_out) { + svbool_t pg = svwhilelt_b8_s64(0, d_end - d); + return countShuftiLoopBody(mask_lo, mask_hi, pg, d, target_count, + count_inout, d_out); +} + +static really_inline +bool roseCountingMiracleScanShufti(svuint8_t mask_lo, svuint8_t mask_hi, + UNUSED u8 poison, const u8 *d, + const u8 *d_end, u32 target_count, + u32 *count_inout, const u8 **d_out) { + assert(d <= d_end); + size_t len = d_end - d; + if (len <= svcntb()) { + char rv = countShuftiOnce(mask_lo, mask_hi, d, d_end, target_count, + count_inout, d_out); + return rv; + } + // peel off first part to align to the vector size + const u8 *aligned_d_end = ROUNDDOWN_PTR(d_end, svcntb_pat(SV_POW2)); + assert(d < aligned_d_end); + if (d_end != aligned_d_end) { + if (countShuftiOnce(mask_lo, mask_hi, aligned_d_end, d_end, + target_count, count_inout, d_out)) return true; + d_end = aligned_d_end; + } + size_t loops = (d_end - d) / svcntb(); + for (size_t i = 0; i < loops; i++) { + d_end -= svcntb(); + if (countShuftiLoopBody(mask_lo, mask_hi, svptrue_b8(), d_end, + target_count, count_inout, d_out)) return true; + } + if (d != d_end) { + if (countShuftiOnce(mask_lo, mask_hi, d, d_end, + target_count, count_inout, d_out)) return true; + } + return false; +} \ No newline at end of file diff --git a/src/rose/counting_miracle_sve.h b/src/rose/counting_miracle_sve.h new file mode 100644 index 00000000..8a7114f2 --- /dev/null +++ b/src/rose/counting_miracle_sve.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2021, Arm Limited + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +static really_inline +size_t countMatches(svuint8_t chars, svbool_t pg, const u8 *buf) { + svuint8_t vec = svld1_u8(pg, buf); + return svcntp_b8(svptrue_b8(), svmatch(pg, vec, chars)); +} + +static really_inline +bool countLoopBody(svuint8_t chars, svbool_t pg, const u8 *d, + u32 target_count, u32 *count_inout, const u8 **d_out) { + *count_inout += countMatches(chars, pg, d); + if (*count_inout >= target_count) { + *d_out = d; + return true; + } + return false; +} + +static really_inline +bool countOnce(svuint8_t chars, const u8 *d, const u8 *d_end, + u32 target_count, u32 *count_inout, const u8 **d_out) { + assert(d <= d_end); + svbool_t pg = svwhilelt_b8_s64(0, d_end - d); + return countLoopBody(chars, pg, d, target_count, count_inout, d_out); +} + +static really_inline +bool roseCountingMiracleScan(u8 c, const u8 *d, const u8 *d_end, + u32 target_count, u32 *count_inout, + const u8 **d_out) { + assert(d <= d_end); + svuint8_t chars = svdup_u8(c); + size_t len = d_end - d; + if (len <= svcntb()) { + bool rv = countOnce(chars, d, d_end, target_count, count_inout, d_out); + return rv; + } + // peel off first part to align to the vector size + const u8 *aligned_d_end = ROUNDDOWN_PTR(d_end, svcntb_pat(SV_POW2)); + assert(d < aligned_d_end); + if (d_end != aligned_d_end) { + if (countOnce(chars, aligned_d_end, d_end, + target_count, count_inout, d_out)) return true; + d_end = aligned_d_end; + } + size_t loops = (d_end - d) / svcntb(); + for (size_t i = 0; i < loops; i++) { + d_end -= svcntb(); + if (countLoopBody(chars, svptrue_b8(), d_end, + target_count, count_inout, d_out)) return true; + } + if (d != d_end) { + if (countOnce(chars, d, d_end, + target_count, count_inout, d_out)) return true; + } + return false; +} \ No newline at end of file diff --git a/src/util/arch/arm/bitutils.h b/src/util/arch/arm/bitutils.h index c73e623c..a2f98c99 100644 --- a/src/util/arch/arm/bitutils.h +++ b/src/util/arch/arm/bitutils.h @@ -119,24 +119,23 @@ m128 compress128_impl(m128 x, m128 m) { return res; } + +#if defined(HAVE_SVE2_BITPERM) +#include "bitutils_sve.h" +#else + static really_inline u32 expand32_impl(u32 x, u32 m) { -#if defined(HAVE_SVE2_BITPERM) - return svlasta(svpfalse(), svbdep(svdup_u32(x), m)); -#else return expand32_impl_c(x, m); -#endif } static really_inline u64a expand64_impl(u64a x, u64a m) { -#if defined(HAVE_SVE2_BITPERM) - return svlasta(svpfalse(), svbdep(svdup_u64(x), m)); -#else return expand64_impl_c(x, m); -#endif } +#endif // HAVE_SVE2_BITPERM + static really_inline m128 expand128_impl(m128 x, m128 m) { m128 one = set1_2x64(1); diff --git a/src/util/arch/arm/bitutils_sve.h b/src/util/arch/arm/bitutils_sve.h new file mode 100644 index 00000000..1cd503d5 --- /dev/null +++ b/src/util/arch/arm/bitutils_sve.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2021, Arm Limited + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Bit-twiddling primitives for SVE (ctz, compress etc) + */ + +static really_inline +u32 expand32_impl(u32 x, u32 m) { + return svlasta(svpfalse(), svbdep(svdup_u32(x), m)); +} + +static really_inline +u64a expand64_impl(u64a x, u64a m) { + return svlasta(svpfalse(), svbdep(svdup_u64(x), m)); +} + +static really_inline +void bdep64x2(u64a *d, const u64a *x, const m128 *m) { + svbool_t pg = svptrue_pat_b64(SV_VL2); + svst1(pg, (uint64_t *)d, svbdep(svld1_u64(pg, (const uint64_t *)x), + svld1_u64(pg, (const uint64_t *)m))); +} diff --git a/src/util/arch/arm/simd_utils.h b/src/util/arch/arm/simd_utils.h index e5bc2948..917a6ad4 100644 --- a/src/util/arch/arm/simd_utils.h +++ b/src/util/arch/arm/simd_utils.h @@ -43,70 +43,11 @@ #include "util/intrinsics.h" #ifdef HAVE_SVE - -really_really_inline -uint64_t accelSearchGetOffset(svbool_t matched) { - return svcntp_b8(svptrue_b8(), svbrkb_z(svptrue_b8(), matched)); -} - -really_really_inline -const u8 *accelSearchCheckMatched(const u8 *buf, svbool_t matched) { - if (unlikely(svptest_any(svptrue_b8(), matched))) { - const u8 *matchPos = buf + accelSearchGetOffset(matched); - DEBUG_PRINTF("match pos %p\n", matchPos); - return matchPos; - } - return NULL; -} - -really_really_inline -const u8 *accelRevSearchCheckMatched(const u8 *buf, svbool_t matched) { - if (unlikely(svptest_any(svptrue_b8(), matched))) { - const u8 *matchPos = buf + (svcntb() - - svcntp_b8(svptrue_b8(), svbrka_z(svptrue_b8(), svrev_b8(matched)))); - DEBUG_PRINTF("match pos %p\n", matchPos); - return matchPos; - } - return NULL; -} - -static really_inline -svuint8_t getSVEMaskFrom128(m128 mask) { - return svld1_u8(svptrue_pat_b8(SV_VL16), (const uint8_t *)&mask); -} - +#include "simd_utils_sve.h" #endif #ifdef HAVE_SVE2 - -static really_inline -svuint8_t getCharMaskSingle(const u8 c, bool noCase) { - if (noCase) { - uint16_t chars_u16 = (c & 0xdf) | ((c | 0x20) << 8); - return svreinterpret_u8(svdup_u16(chars_u16)); - } else { - return svdup_u8(c); - } -} - -static really_inline -svuint16_t getCharMaskDouble(const u8 c0, const u8 c1, bool noCase) { - if (noCase) { - const uint64_t lowerFirst = c0 & 0xdf; - const uint64_t upperFirst = c0 | 0x20; - const uint64_t lowerSecond = c1 & 0xdf; - const uint64_t upperSecond = c1 | 0x20; - const uint64_t chars = lowerFirst | (lowerSecond << 8) - | (lowerFirst << 16) | (upperSecond) << 24 - | (upperFirst << 32) | (lowerSecond) << 40 - | (upperFirst << 48) | (upperSecond) << 56; - return svreinterpret_u16(svdup_u64(chars)); - } else { - uint16_t chars_u16 = c0 | (c1 << 8); - return svdup_u16(chars_u16); - } -} - +#include "simd_utils_sve2.h" #endif #include // for memcpy diff --git a/src/util/arch/arm/simd_utils_sve.h b/src/util/arch/arm/simd_utils_sve.h new file mode 100644 index 00000000..48a4a933 --- /dev/null +++ b/src/util/arch/arm/simd_utils_sve.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2021, Arm Limited + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief SVE primitive operations. + */ + +really_really_inline +uint64_t accelSearchGetOffset(svbool_t matched) { + return svcntp_b8(svptrue_b8(), svbrkb_z(svptrue_b8(), matched)); +} + +really_really_inline +const u8 *accelSearchCheckMatched(const u8 *buf, svbool_t matched) { + if (unlikely(svptest_any(svptrue_b8(), matched))) { + const u8 *matchPos = buf + accelSearchGetOffset(matched); + DEBUG_PRINTF("match pos %p\n", matchPos); + return matchPos; + } + return NULL; +} + +really_really_inline +const u8 *accelRevSearchCheckMatched(const u8 *buf, svbool_t matched) { + if (unlikely(svptest_any(svptrue_b8(), matched))) { + const u8 *matchPos = buf + (svcntb() - + svcntp_b8(svptrue_b8(), svbrka_z(svptrue_b8(), svrev_b8(matched)))); + DEBUG_PRINTF("match pos %p\n", matchPos); + return matchPos; + } + return NULL; +} + +static really_inline +svuint8_t getSVEMaskFrom128(m128 mask) { + return svld1_u8(svptrue_pat_b8(SV_VL16), (const uint8_t *)&mask); +} \ No newline at end of file diff --git a/src/util/arch/arm/simd_utils_sve2.h b/src/util/arch/arm/simd_utils_sve2.h new file mode 100644 index 00000000..188ef3ff --- /dev/null +++ b/src/util/arch/arm/simd_utils_sve2.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2021, Arm Limited + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief SVE primitive operations. + */ + +static really_inline +svuint8_t getCharMaskSingle(const u8 c, bool noCase) { + if (noCase) { + uint16_t chars_u16 = (c & 0xdf) | ((c | 0x20) << 8); + return svreinterpret_u8(svdup_u16(chars_u16)); + } else { + return svdup_u8(c); + } +} + +static really_inline +svuint16_t getCharMaskDouble(const u8 c0, const u8 c1, bool noCase) { + if (noCase) { + const uint64_t lowerFirst = c0 & 0xdf; + const uint64_t upperFirst = c0 | 0x20; + const uint64_t lowerSecond = c1 & 0xdf; + const uint64_t upperSecond = c1 | 0x20; + const uint64_t chars = lowerFirst | (lowerSecond << 8) + | (lowerFirst << 16) | (upperSecond) << 24 + | (upperFirst << 32) | (lowerSecond) << 40 + | (upperFirst << 48) | (upperSecond) << 56; + return svreinterpret_u16(svdup_u64(chars)); + } else { + uint16_t chars_u16 = c0 | (c1 << 8); + return svdup_u16(chars_u16); + } +} \ No newline at end of file diff --git a/src/util/state_compress.c b/src/util/state_compress.c index e3f50949..729eedb3 100644 --- a/src/util/state_compress.c +++ b/src/util/state_compress.c @@ -83,17 +83,6 @@ void loadcompressed64(u64a *x, const void *ptr, const u64a *m, u32 bytes) { #endif } -#if defined(HAVE_SVE2_BITPERM) - -static really_inline -void bdep64x2(u64a *d, const u64a *x, const m128 *m) { - svbool_t pg = svptrue_pat_b64(SV_VL2); - svst1(pg, (uint64_t *)d, svbdep(svld1_u64(pg, (const uint64_t *)x), - svld1_u64(pg, (const uint64_t *)m))); -} - -#endif // HAVE_SVE2_BITPERM - /* * 128-bit store/load. */