From 56ef2d5f725f774e84a299686054a4f3bcbdb1ca Mon Sep 17 00:00:00 2001 From: George Wort Date: Fri, 2 Jul 2021 15:53:43 +0100 Subject: [PATCH] Use SVE2 for counting miracles. Change-Id: I048dc182e5f4e726b847b3285ffafef4f538e550 --- src/rose/counting_miracle.h | 64 +++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/src/rose/counting_miracle.h b/src/rose/counting_miracle.h index 6210fca5..668de996 100644 --- a/src/rose/counting_miracle.h +++ b/src/rose/counting_miracle.h @@ -39,6 +39,68 @@ * stop character. */ #define COUNTING_MIRACLE_LEN_MAX 256 +#ifdef HAVE_SVE2 + + +static really_inline +size_t countMatches(const svuint8_t chars, const svbool_t pg, const u8 *buf) { + svuint8_t vec = svld1_u8(pg, buf); + return svcntp_b8(svptrue_b8(), svmatch(pg, vec, chars)); +} + +static really_inline +bool countLoopBody(const svuint8_t chars, const svbool_t pg, const u8 *d, + u32 target_count, u32 *count_inout, const u8 **d_out) { + *count_inout += countMatches(chars, pg, d); + if (*count_inout >= target_count) { + *d_out = d; + return true; + } + return false; +} + +static really_inline +bool countOnce(const svuint8_t chars, const u8 *d, const u8 *d_end, + u32 target_count, u32 *count_inout, const u8 **d_out) { + assert(d <= d_end); + svbool_t pg = svwhilelt_b8_s64(0, d_end - d); + return countLoopBody(chars, pg, d, target_count, count_inout, d_out); +} + +static really_inline +bool roseCountingMiracleScan(u8 c, const u8 *d, const u8 *d_end, + u32 target_count, u32 *count_inout, + const u8 **d_out) { + assert(d <= d_end); + svuint8_t chars = svdup_u8(c); + size_t len = d_end - d; + if (len <= svcntb()) { + bool rv = countOnce(chars, d, d_end, target_count, count_inout, d_out); + return rv; + } + // peel off first part to align to the vector size + const u8 *aligned_d_end = ROUNDDOWN_PTR(d_end, svcntb_pat(SV_POW2)); + assert(d < aligned_d_end); + if (d_end != aligned_d_end) { + if (countOnce(chars, aligned_d_end, d_end, + target_count, count_inout, d_out)) return true; + d_end = aligned_d_end; + } + size_t loops = (d_end - d) / svcntb(); + for (size_t i = 0; i < loops; i++) { + d_end -= svcntb(); + if (countLoopBody(chars, svptrue_b8(), d_end, + target_count, count_inout, d_out)) return true; + } + if (d != d_end) { + if (countOnce(chars, d, d_end, + target_count, count_inout, d_out)) return true; + } + return false; +} + +#else + static really_inline char roseCountingMiracleScan(u8 c, const u8 *d, const u8 *d_end, u32 target_count, u32 *count_inout, @@ -81,6 +143,8 @@ char roseCountingMiracleScan(u8 c, const u8 *d, const u8 *d_end, return 0; } +#endif + #define GET_LO_4(chars) and128(chars, low4bits) #define GET_HI_4(chars) rshift64_m128(andnot128(low4bits, chars), 4)