diff --git a/src/hwlm/noodle_engine.cpp b/src/hwlm/noodle_engine.cpp index dfda1ce9..f898c7b7 100644 --- a/src/hwlm/noodle_engine.cpp +++ b/src/hwlm/noodle_engine.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -66,21 +67,15 @@ struct cb_info { } \ } -#if !defined(HAVE_SVE) -#include "noodle_engine_simd.hpp" -#endif - // Make sure the rest of the string is there. The single character scanner // is used only for single chars with case insensitivity used correctly, // so it can go straight to the callback if we get this far. static really_inline hwlm_error_t final(const struct noodTable *n, const u8 *buf, UNUSED size_t len, - char single, const struct cb_info *cbi, size_t pos) { + bool needsConfirm, const struct cb_info *cbi, size_t pos) { u64a v{0}; - if (single) { - if (n->msk_len == 1) { - goto match; - } + if (!needsConfirm) { + goto match; } assert(len >= n->msk_len); v = partial_load_u64a(buf + pos + n->key_offset - n->msk_len, n->msk_len); @@ -100,31 +95,11 @@ match: return HWLM_SUCCESS; } -static really_really_inline -hwlm_error_t single_zscan(const struct noodTable *n,const u8 *d, const u8 *buf, - Z_TYPE z, size_t len, const struct cb_info *cbi) { - while (unlikely(z)) { - Z_TYPE pos = JOIN(findAndClearLSB_, Z_BITS)(&z); - size_t matchPos = d - buf + pos; - DEBUG_PRINTF("match pos %zu\n", matchPos); - hwlmcb_rv_t rv = final(n, buf, len, 1, cbi, matchPos); - RETURN_IF_TERMINATED(rv); - } - return HWLM_SUCCESS; -} - -static really_really_inline -hwlm_error_t double_zscan(const struct noodTable *n,const u8 *d, const u8 *buf, - Z_TYPE z, size_t len, const struct cb_info *cbi) { - while (unlikely(z)) { - Z_TYPE pos = JOIN(findAndClearLSB_, Z_BITS)(&z); - size_t matchPos = d - buf + pos - 1; \ - DEBUG_PRINTF("match pos %zu\n", matchPos); - hwlmcb_rv_t rv = final(n, buf, len, 0, cbi, matchPos); - RETURN_IF_TERMINATED(rv); - } - return HWLM_SUCCESS; -} +#ifdef HAVE_SVE2 +#include "noodle_engine_sve.hpp" +#else +#include "noodle_engine_simd.hpp" +#endif // main entry point for the scan code static really_inline diff --git a/src/hwlm/noodle_engine_simd.hpp b/src/hwlm/noodle_engine_simd.hpp index ac5f10cd..510f179e 100644 --- a/src/hwlm/noodle_engine_simd.hpp +++ b/src/hwlm/noodle_engine_simd.hpp @@ -50,6 +50,33 @@ static really_inline SuperVector getCaseMask(void) { return SuperVector(CASEMASK[1]); } + +static really_really_inline +hwlm_error_t single_zscan(const struct noodTable *n,const u8 *d, const u8 *buf, + Z_TYPE z, size_t len, const struct cb_info *cbi) { + while (unlikely(z)) { + Z_TYPE pos = JOIN(findAndClearLSB_, Z_BITS)(&z); + size_t matchPos = d - buf + pos; + DEBUG_PRINTF("match pos %zu\n", matchPos); + hwlmcb_rv_t rv = final(n, buf, len, n->msk_len != 1, cbi, matchPos); + RETURN_IF_TERMINATED(rv); + } + return HWLM_SUCCESS; +} + +static really_really_inline +hwlm_error_t double_zscan(const struct noodTable *n,const u8 *d, const u8 *buf, + Z_TYPE z, size_t len, const struct cb_info *cbi) { + while (unlikely(z)) { + Z_TYPE pos = JOIN(findAndClearLSB_, Z_BITS)(&z); + size_t matchPos = d - buf + pos - 1; + DEBUG_PRINTF("match pos %zu\n", matchPos); + hwlmcb_rv_t rv = final(n, buf, len, true, cbi, matchPos); + RETURN_IF_TERMINATED(rv); + } + return HWLM_SUCCESS; +} + // The short scan routine. It is used both to scan data up to an // alignment boundary if needed and to finish off data that the aligned scan // function can't handle (due to small/unaligned chunk at end) @@ -146,7 +173,7 @@ hwlm_error_t scanSingleMain(const struct noodTable *n, const u8 *buf, template static really_inline hwlm_error_t scanDoubleMain(const struct noodTable *n, const u8 *buf, - size_t len, size_t offset, + size_t len, size_t offset, SuperVector caseMask, SuperVector mask1, SuperVector mask2, const struct cb_info *cbi) { // we stop scanning for the key-fragment when the rest of the key can't diff --git a/src/hwlm/noodle_engine_sve.hpp b/src/hwlm/noodle_engine_sve.hpp new file mode 100644 index 00000000..41329737 --- /dev/null +++ b/src/hwlm/noodle_engine_sve.hpp @@ -0,0 +1,269 @@ +/* + * Copyright (c) 2021, Arm Limited + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +static really_inline +svuint8_t getCharMaskSingle(const struct noodTable *n, bool noCase) { + if (noCase) { + uint16_t chars_u16 = (n->key0 & 0xdf) | ((n->key0 | 0x20) << 8); + return svreinterpret_u8(svdup_u16(chars_u16)); + } else { + return svdup_u8(n->key0); + } +} + +static really_inline +hwlm_error_t checkMatched(const struct noodTable *n, const u8 *buf, size_t len, + const struct cb_info *cbi, const u8 *d, + svbool_t matched, bool needsConfirm) { + assert(d >= buf); + size_t basePos = d - buf; + svbool_t next_match = svpnext_b8(matched, svpfalse()); + do { + svbool_t brk = svbrkb_z(svptrue_b8(), next_match); + size_t matchPos = basePos + svcntp_b8(svptrue_b8(), brk); + DEBUG_PRINTF("match pos %zu\n", matchPos); + assert(matchPos < len); + hwlmcb_rv_t rv = final(n, buf, len, needsConfirm, cbi, matchPos); + RETURN_IF_TERMINATED(rv); + next_match = svpnext_b8(matched, next_match); + } while (unlikely(svptest_any(svptrue_b8(), next_match))); + return HWLM_SUCCESS; +} + +static really_inline +hwlm_error_t singleCheckMatched(const struct noodTable *n, const u8 *buf, + size_t len, const struct cb_info *cbi, + const u8 *d, svbool_t matched) { + if (unlikely(svptest_any(svptrue_b8(), matched))) { + hwlmcb_rv_t rv = checkMatched(n, buf, len, cbi, d, matched, + n->msk_len != 1); + RETURN_IF_TERMINATED(rv); + } + return HWLM_SUCCESS; +} + +static really_inline +svbool_t singleMatched(svuint8_t chars, const u8 *d, svbool_t pg) { + return svmatch(pg, svld1_u8(pg, d), chars); +} + +static really_inline +hwlm_error_t scanSingleOnce(const struct noodTable *n, const u8 *buf, + size_t len, const struct cb_info *cbi, + svuint8_t chars, const u8 *d, const u8 *e) { + DEBUG_PRINTF("start %p end %p\n", d, e); + assert(d < e); + assert(d >= buf); + DEBUG_PRINTF("l = %td\n", e - d); + svbool_t pg = svwhilelt_b8_s64(0, e - d); + svbool_t matched = singleMatched(chars, d, pg); + return singleCheckMatched(n, buf, len, cbi, d, matched); +} + +static really_inline +hwlm_error_t scanSingleLoop(const struct noodTable *n, const u8 *buf, + size_t len, const struct cb_info *cbi, + svuint8_t chars, const u8 *d, const u8 *e) { + assert(d < e); + assert(d >= buf); + size_t loops = (e - d) / svcntb(); + DEBUG_PRINTF("loops %zu \n", loops); + assert(d + (loops * svcntb()) <= e); + + for (size_t i = 0; i < loops; i++, d += svcntb()) { + DEBUG_PRINTF("d %p \n", d); + svbool_t matched = singleMatched(chars, d, svptrue_b8()); + hwlmcb_rv_t rv = singleCheckMatched(n, buf, len, cbi, d, matched); + RETURN_IF_TERMINATED(rv); + } + DEBUG_PRINTF("d %p e %p \n", d, e); + return d == e ? HWLM_SUCCESS + : scanSingleOnce(n, buf, len, cbi, chars, d, e); +} + +static really_inline +hwlm_error_t scanSingle(const struct noodTable *n, const u8 *buf, size_t len, + size_t offset, bool noCase, const struct cb_info *cbi) { + if (!ourisalpha(n->key0)) { + noCase = false; // force noCase off if we don't have an alphabetic char + } + + size_t start = offset + n->msk_len - 1; + const u8 *d = buf + start; + const u8 *e = buf + len; + DEBUG_PRINTF("start %p end %p \n", d, e); + assert(d < e); + assert(d >= buf); + + svuint8_t chars = getCharMaskSingle(n, noCase); + + // peel off first part to align to the vector size + const u8 *d1 = ROUNDUP_PTR(d, svcntb_pat(SV_POW2)); + if (d != d1) { + if (d1 >= e) { + return scanSingleOnce(n, buf, len, cbi, chars, d, e); + } else { + DEBUG_PRINTF("until aligned %p \n", d1); + hwlmcb_rv_t rv = scanSingleOnce(n, buf, len, cbi, chars, d, d1); + RETURN_IF_TERMINATED(rv); + } + } + return scanSingleLoop(n, buf, len, cbi, chars, d1, e); +} + +static really_inline +svuint16_t getCharMaskDouble(const struct noodTable *n, bool noCase) { + if (noCase) { + const uint64_t lowerFirst = n->key0 & 0xdf; + const uint64_t upperFirst = n->key0 | 0x20; + const uint64_t lowerSecond = n->key1 & 0xdf; + const uint64_t upperSecond = n->key1 | 0x20; + const uint64_t chars = lowerFirst | (lowerSecond << 8) + | (lowerFirst << 16) | (upperSecond) << 24 + | (upperFirst << 32) | (lowerSecond) << 40 + | (upperFirst << 48) | (upperSecond) << 56; + return svreinterpret_u16(svdup_u64(chars)); + } else { + uint16_t chars_u16 = n->key0 | (n->key1 << 8); + return svdup_u16(chars_u16); + } +} + +static really_inline +hwlm_error_t doubleCheckMatched(const struct noodTable *n, const u8 *buf, + size_t len, const struct cb_info *cbi, + const u8 *d, svbool_t matched, + svbool_t matched_rot, svbool_t any) { + if (unlikely(svptest_any(svptrue_b8(), any))) { + // Project predicate onto vector. + svuint8_t matched_vec = svdup_u8_z(matched, 1); + // Shift vector to right by one and project back to the predicate. + matched = svcmpeq_n_u8(svptrue_b8(), svinsr_n_u8(matched_vec, 0), 1); + matched = svorr_z(svptrue_b8(), matched, matched_rot); + // d - 1 won't underflow as the first position in buf has been dealt + // with meaning that d > buf + assert(d > buf); + hwlmcb_rv_t rv = checkMatched(n, buf, len, cbi, d - 1, matched, + n->msk_len != 2); + RETURN_IF_TERMINATED(rv); + } + return HWLM_SUCCESS; +} + +static really_inline +svbool_t doubleMatched(svuint16_t chars, const u8 *d, + svbool_t pg, svbool_t pg_rot, + svbool_t * const matched, svbool_t * const matched_rot) { + svuint16_t vec = svreinterpret_u16(svld1_u8(pg, d)); + // d - 1 won't underflow as the first position in buf has been dealt + // with meaning that d > buf + svuint16_t vec_rot = svreinterpret_u16(svld1_u8(pg_rot, d - 1)); + *matched = svmatch(pg, vec, chars); + *matched_rot = svmatch(pg_rot, vec_rot, chars); + return svorr_z(svptrue_b8(), *matched, *matched_rot); +} + +static really_inline +hwlm_error_t scanDoubleOnce(const struct noodTable *n, const u8 *buf, + size_t len, const struct cb_info *cbi, + svuint8_t chars, const u8 *d, const u8 *e) { + DEBUG_PRINTF("start %p end %p\n", d, e); + assert(d < e); + assert(d > buf); + svbool_t pg = svwhilelt_b8_s64(0, e - d); + svbool_t pg_rot = svwhilelt_b8_s64(0, e - d + 1); + svbool_t matched, matched_rot; + svbool_t any = doubleMatched(chars, d, pg, pg_rot, &matched, &matched_rot); + return doubleCheckMatched(n, buf, len, cbi, d, matched, matched_rot, any); +} + +static really_inline +hwlm_error_t scanDoubleLoop(const struct noodTable *n, const u8 *buf, + size_t len, const struct cb_info *cbi, + svuint8_t chars, const u8 *d, const u8 *e) { + assert(d < e); + assert(d > buf); + size_t loops = (e - d) / svcntb(); + DEBUG_PRINTF("loops %zu \n", loops); + assert(d + (loops * svcntb()) <= e); + + for (size_t i = 0; i < loops; i++, d += svcntb()) { + DEBUG_PRINTF("d %p \n", d); + svbool_t matched, matched_rot; + svbool_t any = doubleMatched(chars, d, svptrue_b8(), svptrue_b8(), + &matched, &matched_rot); + hwlm_error_t rv = doubleCheckMatched(n, buf, len, cbi, d, + matched, matched_rot, any); + RETURN_IF_TERMINATED(rv); + } + DEBUG_PRINTF("d %p e %p \n", d, e); + + return d == e ? HWLM_SUCCESS + : scanDoubleOnce(n, buf, len, cbi, chars, d, e); +} + +static really_inline +hwlm_error_t scanDouble(const struct noodTable *n, const u8 *buf, size_t len, + size_t offset, bool noCase, const struct cb_info *cbi) { + // we stop scanning for the key-fragment when the rest of the key can't + // possibly fit in the remaining buffer + size_t end = len - n->key_offset + 2; + + size_t start = offset + n->msk_len - n->key_offset; + + const u8 *d = buf + start; + const u8 *e = buf + end; + DEBUG_PRINTF("start %p end %p \n", d, e); + assert(d < e); + assert(d >= buf); + + // Check first position in scalar so as to remove underflow possibilities. + size_t matchPos = d - buf; + DEBUG_PRINTF("Test match pos %zu\n", matchPos); + RETURN_IF_TERMINATED(final(n, d, len, true, cbi, matchPos)); + d += 2; + if (d >= e) { + return HWLM_SUCCESS; + } + + svuint16_t chars = getCharMaskDouble(n, noCase); + + // peel off first part to align to the vector size + const u8 *d1 = ROUNDUP_PTR(d, svcntb_pat(SV_POW2)); + if (d != d1) { + if (d1 >= e) { + return scanDoubleOnce(n, buf, len, cbi, chars, d, e); + } else { + DEBUG_PRINTF("until aligned %p \n", d1); + hwlmcb_rv_t rv = scanDoubleOnce(n, buf, len, cbi, chars, + d, d1); + RETURN_IF_TERMINATED(rv); + } + } + return scanDoubleLoop(n, buf, len, cbi, chars, d1, e); +} \ No newline at end of file