From df926ef62fd12ab332ad1c7ea55a1f865d42e3bc Mon Sep 17 00:00:00 2001 From: George Wort Date: Mon, 28 Jun 2021 16:29:43 +0100 Subject: [PATCH] Implement new Vermicelli16 acceleration functions using SVE2. The scheme utilises the MATCH and NMATCH instructions to scan for 16 characters at the same rate as vermicelli scans for one. Change-Id: Ie2cef904c56651e6108593c668e9b65bc001a886 --- CMakeLists.txt | 2 + src/hwlm/hwlm.c | 6 + src/nfa/accel.c | 12 ++ src/nfa/accel.h | 7 + src/nfa/accel_dfa_build_strat.cpp | 11 ++ src/nfa/accelcompile.cpp | 12 ++ src/nfa/castle.c | 85 ++++++++++ src/nfa/castle_internal.h | 6 + src/nfa/castlecompile.cpp | 15 ++ src/nfa/lbr.c | 111 +++++++++++++ src/nfa/lbr.h | 47 ++++++ src/nfa/lbr_internal.h | 6 + src/nfa/mpv.c | 8 + src/nfa/mpv_internal.h | 6 + src/nfa/mpvcompile.cpp | 10 ++ src/nfa/nfa_api_dispatch.c | 10 ++ src/nfa/nfa_build_util.cpp | 37 +++++ src/nfa/nfa_internal.h | 8 + src/nfa/vermicelli_sve.h | 59 ++++++- src/nfa/vermicellicompile.cpp | 53 ++++++ src/nfa/vermicellicompile.h | 48 ++++++ src/nfagraph/ng_lbr.cpp | 62 +++++++ src/rose/rose_build_lit_accel.cpp | 13 ++ unit/internal/rvermicelli.cpp | 265 ++++++++++++++++++++++++++++++ unit/internal/vermicelli.cpp | 262 +++++++++++++++++++++++++++++ 25 files changed, 1153 insertions(+), 8 deletions(-) create mode 100644 src/nfa/vermicellicompile.cpp create mode 100644 src/nfa/vermicellicompile.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 8bfb78dc..f246932c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -879,6 +879,8 @@ SET (hs_compile_SRCS src/nfa/tamaramacompile.h src/nfa/trufflecompile.cpp src/nfa/trufflecompile.h + src/nfa/vermicellicompile.cpp + src/nfa/vermicellicompile.h src/nfagraph/ng.cpp src/nfagraph/ng.h src/nfagraph/ng_anchored_acyclic.cpp diff --git a/src/hwlm/hwlm.c b/src/hwlm/hwlm.c index 8cf585a9..c1c2837f 100644 --- a/src/hwlm/hwlm.c +++ b/src/hwlm/hwlm.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -62,6 +63,11 @@ const u8 *run_hwlm_accel(const union AccelAux *aux, const u8 *ptr, DEBUG_PRINTF("double vermicelli-nocase for 0x%02hhx%02hhx\n", aux->dverm.c1, aux->dverm.c2); return vermicelliDoubleExec(aux->dverm.c1, aux->dverm.c2, 1, ptr, end); +#ifdef HAVE_SVE2 + case ACCEL_VERM16: + DEBUG_PRINTF("single vermicelli16\n"); + return vermicelli16Exec(aux->verm16.mask, ptr, end); +#endif // HAVE_SVE2 case ACCEL_SHUFTI: DEBUG_PRINTF("single shufti\n"); return shuftiExec(aux->shufti.lo, aux->shufti.hi, ptr, end); diff --git a/src/nfa/accel.c b/src/nfa/accel.c index 2bc60945..8c9b6e72 100644 --- a/src/nfa/accel.c +++ b/src/nfa/accel.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -81,6 +82,17 @@ const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) { c_end - 1); break; +#ifdef HAVE_SVE2 + case ACCEL_VERM16: + DEBUG_PRINTF("accel verm16 %p %p\n", c, c_end); + if (c_end - c < 16) { + return c; + } + + rv = vermicelli16Exec(accel->verm16.mask, c, c_end); + break; +#endif // HAVE_SVE2 + case ACCEL_DVERM_MASKED: DEBUG_PRINTF("accel dverm masked %p %p\n", c, c_end); if (c + 16 + 1 >= c_end) { diff --git a/src/nfa/accel.h b/src/nfa/accel.h index 3a03d059..0676239a 100644 --- a/src/nfa/accel.h +++ b/src/nfa/accel.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -62,6 +63,7 @@ enum AccelType { ACCEL_TRUFFLE, ACCEL_RED_TAPE, ACCEL_DVERM_MASKED, + ACCEL_VERM16 }; /** \brief Structure for accel framework. */ @@ -97,6 +99,11 @@ union AccelAux { u8 len1; u8 len2; } mdverm; + struct { + u8 accel_type; + u8 offset; + m128 mask; + } verm16; struct { u8 accel_type; u8 offset; diff --git a/src/nfa/accel_dfa_build_strat.cpp b/src/nfa/accel_dfa_build_strat.cpp index 16a19f80..cfca9397 100644 --- a/src/nfa/accel_dfa_build_strat.cpp +++ b/src/nfa/accel_dfa_build_strat.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,6 +34,7 @@ #include "nfagraph/ng_limex_accel.h" #include "shufticompile.h" #include "trufflecompile.h" +#include "vermicellicompile.h" #include "util/accel_scheme.h" #include "util/charreach.h" #include "util/container.h" @@ -514,6 +516,15 @@ accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx, return; } +#ifdef HAVE_SVE2 + if (info.cr.count() <= 16) { + accel->accel_type = ACCEL_VERM16; + vermicelli16Build(info.cr, (u8 *)&accel->verm16.mask); + DEBUG_PRINTF("state %hu is vermicelli16\n", this_idx); + return; + } +#endif // HAVE_SVE2 + if (info.cr.count() > max_floating_stop_char()) { accel->accel_type = ACCEL_NONE; DEBUG_PRINTF("state %hu is too broad\n", this_idx); diff --git a/src/nfa/accelcompile.cpp b/src/nfa/accelcompile.cpp index a224410d..f68ed1b9 100644 --- a/src/nfa/accelcompile.cpp +++ b/src/nfa/accelcompile.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -29,6 +30,7 @@ #include "accel.h" #include "accelcompile.h" #include "shufticompile.h" +#include "vermicellicompile.h" #include "trufflecompile.h" #include "nfagraph/ng_limex_accel.h" /* for constants */ #include "util/bitutils.h" @@ -71,6 +73,16 @@ void buildAccelSingle(const AccelInfo &info, AccelAux *aux) { return; } +#ifdef HAVE_SVE2 + if (outs <= 16) { + aux->accel_type = ACCEL_VERM16; + aux->verm16.offset = offset; + vermicelli16Build(info.single_stops, (u8 *)&aux->verm16.mask); + DEBUG_PRINTF("building vermicelli16\n"); + return; + } +#endif + DEBUG_PRINTF("attempting shufti for %zu chars\n", outs); if (-1 != shuftiBuildMasks(info.single_stops, (u8 *)&aux->shufti.lo, (u8 *)&aux->shufti.hi)) { diff --git a/src/nfa/castle.c b/src/nfa/castle.c index 7c158b31..dc6ec8f9 100644 --- a/src/nfa/castle.c +++ b/src/nfa/castle.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -552,6 +553,42 @@ char castleScanNVerm(const struct Castle *c, const u8 *buf, const size_t begin, return 1; } +#ifdef HAVE_SVE2 + +static really_inline +char castleScanVerm16(const struct Castle *c, const u8 *buf, const size_t begin, + const size_t end, size_t *loc) { + const u8 *ptr = vermicelli16Exec(c->u.verm16.mask, buf + begin, buf + end); + if (ptr == buf + end) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + assert(ptr >= buf && ptr < buf + end); + *loc = ptr - buf; + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +static really_inline +char castleScanNVerm16(const struct Castle *c, const u8 *buf, const size_t begin, + const size_t end, size_t *loc) { + const u8 *ptr = nvermicelli16Exec(c->u.verm16.mask, buf + begin, buf + end); + if (ptr == buf + end) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + assert(ptr >= buf && ptr < buf + end); + *loc = ptr - buf; + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +#endif // HAVE_SVE2 + static really_inline char castleScanShufti(const struct Castle *c, const u8 *buf, const size_t begin, const size_t end, size_t *loc) { @@ -604,6 +641,12 @@ char castleScan(const struct Castle *c, const u8 *buf, const size_t begin, return castleScanVerm(c, buf, begin, end, loc); case CASTLE_NVERM: return castleScanNVerm(c, buf, begin, end, loc); +#ifdef HAVE_SVE2 + case CASTLE_VERM16: + return castleScanVerm16(c, buf, begin, end, loc); + case CASTLE_NVERM16: + return castleScanNVerm16(c, buf, begin, end, loc); +#endif // HAVE_SVE2 case CASTLE_SHUFTI: return castleScanShufti(c, buf, begin, end, loc); case CASTLE_TRUFFLE: @@ -647,6 +690,42 @@ char castleRevScanNVerm(const struct Castle *c, const u8 *buf, return 1; } +#ifdef HAVE_SVE2 + +static really_inline +char castleRevScanVerm16(const struct Castle *c, const u8 *buf, + const size_t begin, const size_t end, size_t *loc) { + const u8 *ptr = rvermicelli16Exec(c->u.verm16.mask, buf + begin, buf + end); + if (ptr == buf + begin - 1) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + assert(ptr >= buf && ptr < buf + end); + *loc = ptr - buf; + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +static really_inline +char castleRevScanNVerm16(const struct Castle *c, const u8 *buf, + const size_t begin, const size_t end, size_t *loc) { + const u8 *ptr = rnvermicelli16Exec(c->u.verm16.mask, buf + begin, buf + end); + if (ptr == buf + begin - 1) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + assert(ptr >= buf && ptr < buf + end); + *loc = ptr - buf; + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +#endif // HAVE_SVE2 + static really_inline char castleRevScanShufti(const struct Castle *c, const u8 *buf, const size_t begin, const size_t end, size_t *loc) { @@ -699,6 +778,12 @@ char castleRevScan(const struct Castle *c, const u8 *buf, const size_t begin, return castleRevScanVerm(c, buf, begin, end, loc); case CASTLE_NVERM: return castleRevScanNVerm(c, buf, begin, end, loc); +#ifdef HAVE_SVE2 + case CASTLE_VERM16: + return castleRevScanVerm16(c, buf, begin, end, loc); + case CASTLE_NVERM16: + return castleRevScanNVerm16(c, buf, begin, end, loc); +#endif // HAVE_SVE2 case CASTLE_SHUFTI: return castleRevScanShufti(c, buf, begin, end, loc); case CASTLE_TRUFFLE: diff --git a/src/nfa/castle_internal.h b/src/nfa/castle_internal.h index 429c232f..ea135f8d 100644 --- a/src/nfa/castle_internal.h +++ b/src/nfa/castle_internal.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -52,6 +53,8 @@ struct SubCastle { #define CASTLE_NVERM 2 #define CASTLE_SHUFTI 3 #define CASTLE_TRUFFLE 4 +#define CASTLE_VERM16 5 +#define CASTLE_NVERM16 6 enum ExclusiveType { NOT_EXCLUSIVE, //!< no subcastles are exclusive @@ -129,6 +132,9 @@ struct ALIGN_AVX_DIRECTIVE Castle { struct { char c; } verm; + struct { + m128 mask; + } verm16; struct { m128 mask_lo; m128 mask_hi; diff --git a/src/nfa/castlecompile.cpp b/src/nfa/castlecompile.cpp index 20bc2925..56b12700 100644 --- a/src/nfa/castlecompile.cpp +++ b/src/nfa/castlecompile.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -39,6 +40,7 @@ #include "repeatcompile.h" #include "shufticompile.h" #include "trufflecompile.h" +#include "vermicellicompile.h" #include "nfagraph/ng_dump.h" #include "nfagraph/ng_equivalence.h" #include "nfagraph/ng_repeat.h" @@ -101,6 +103,19 @@ void writeCastleScanEngine(const CharReach &cr, Castle *c) { return; } +#ifdef HAVE_SVE2 + if (cr.count() <= 16) { + c->type = CASTLE_NVERM16; + vermicelli16Build(cr, (u8 *)&c->u.verm16.mask); + return; + } + if (negated.count() <= 16) { + c->type = CASTLE_VERM16; + vermicelli16Build(negated, (u8 *)&c->u.verm16.mask); + return; + } +#endif // HAVE_SVE2 + if (shuftiBuildMasks(negated, (u8 *)&c->u.shuf.mask_lo, (u8 *)&c->u.shuf.mask_hi) != -1) { c->type = CASTLE_SHUFTI; diff --git a/src/nfa/lbr.c b/src/nfa/lbr.c index d403733a..2c6ea163 100644 --- a/src/nfa/lbr.c +++ b/src/nfa/lbr.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -361,6 +362,56 @@ char lbrRevScanNVerm(const struct NFA *nfa, const u8 *buf, return 1; } +#ifdef HAVE_SVE2 + +static really_inline +char lbrRevScanVerm16(const struct NFA *nfa, const u8 *buf, + size_t begin, size_t end, size_t *loc) { + assert(begin <= end); + assert(nfa->type == LBR_NFA_VERM16); + const struct lbr_verm16 *l = getImplNfa(nfa); + + if (begin == end) { + return 0; + } + + const u8 *ptr = rvermicelli16Exec(l->mask, buf + begin, buf + end); + if (ptr == buf + begin - 1) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + *loc = ptr - buf; + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +static really_inline +char lbrRevScanNVerm16(const struct NFA *nfa, const u8 *buf, + size_t begin, size_t end, size_t *loc) { + assert(begin <= end); + assert(nfa->type == LBR_NFA_NVERM16); + const struct lbr_verm16 *l = getImplNfa(nfa); + + if (begin == end) { + return 0; + } + + const u8 *ptr = rnvermicelli16Exec(l->mask, buf + begin, buf + end); + if (ptr == buf + begin - 1) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + *loc = ptr - buf; + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +#endif // HAVE_SVE2 + static really_inline char lbrRevScanShuf(const struct NFA *nfa, const u8 *buf, size_t begin, size_t end, @@ -467,6 +518,56 @@ char lbrFwdScanNVerm(const struct NFA *nfa, const u8 *buf, return 1; } +#ifdef HAVE_SVE2 + +static really_inline +char lbrFwdScanVerm16(const struct NFA *nfa, const u8 *buf, + size_t begin, size_t end, size_t *loc) { + assert(begin <= end); + assert(nfa->type == LBR_NFA_VERM16); + const struct lbr_verm16 *l = getImplNfa(nfa); + + if (begin == end) { + return 0; + } + + const u8 *ptr = vermicelli16Exec(l->mask, buf + begin, buf + end); + if (ptr == buf + end) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + *loc = ptr - buf; + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +static really_inline +char lbrFwdScanNVerm16(const struct NFA *nfa, const u8 *buf, + size_t begin, size_t end, size_t *loc) { + assert(begin <= end); + assert(nfa->type == LBR_NFA_NVERM16); + const struct lbr_verm16 *l = getImplNfa(nfa); + + if (begin == end) { + return 0; + } + + const u8 *ptr = nvermicelli16Exec(l->mask, buf + begin, buf + end); + if (ptr == buf + end) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + *loc = ptr - buf; + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +#endif // HAVE_SVE2 + static really_inline char lbrFwdScanShuf(const struct NFA *nfa, const u8 *buf, size_t begin, size_t end, @@ -524,6 +625,16 @@ char lbrFwdScanTruf(const struct NFA *nfa, const u8 *buf, #define ENGINE_ROOT_NAME NVerm #include "lbr_common_impl.h" +#ifdef HAVE_SVE2 + +#define ENGINE_ROOT_NAME Verm16 +#include "lbr_common_impl.h" + +#define ENGINE_ROOT_NAME NVerm16 +#include "lbr_common_impl.h" + +#endif // HAVE_SVE2 + #define ENGINE_ROOT_NAME Shuf #include "lbr_common_impl.h" diff --git a/src/nfa/lbr.h b/src/nfa/lbr.h index a9e42046..b6718c05 100644 --- a/src/nfa/lbr.h +++ b/src/nfa/lbr.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -101,6 +102,52 @@ char nfaExecLbrNVerm_expandState(const struct NFA *nfa, void *dest, #define nfaExecLbrNVerm_B_Reverse NFA_API_NO_IMPL #define nfaExecLbrNVerm_zombie_status NFA_API_ZOMBIE_NO_IMPL +#ifdef HAVE_SVE2 + +// LBR Verm16 + +char nfaExecLbrVerm16_Q(const struct NFA *n, struct mq *q, s64a end); +char nfaExecLbrVerm16_Q2(const struct NFA *n, struct mq *q, s64a end); +char nfaExecLbrVerm16_QR(const struct NFA *n, struct mq *q, ReportID report); +char nfaExecLbrVerm16_reportCurrent(const struct NFA *n, struct mq *q); +char nfaExecLbrVerm16_inAccept(const struct NFA *n, ReportID report, + struct mq *q); +char nfaExecLbrVerm16_inAnyAccept(const struct NFA *n, struct mq *q); +char nfaExecLbrVerm16_queueInitState(const struct NFA *n, struct mq *q); +char nfaExecLbrVerm16_initCompressedState(const struct NFA *n, u64a offset, + void *state, u8 key); +char nfaExecLbrVerm16_queueCompressState(const struct NFA *nfa, + const struct mq *q, s64a loc); +char nfaExecLbrVerm16_expandState(const struct NFA *nfa, void *dest, + const void *src, u64a offset, u8 key); + +#define nfaExecLbrVerm16_testEOD NFA_API_NO_IMPL +#define nfaExecLbrVerm16_B_Reverse NFA_API_NO_IMPL +#define nfaExecLbrVerm16_zombie_status NFA_API_ZOMBIE_NO_IMPL + +// LBR Negated Verm16 + +char nfaExecLbrNVerm16_Q(const struct NFA *n, struct mq *q, s64a end); +char nfaExecLbrNVerm16_Q2(const struct NFA *n, struct mq *q, s64a end); +char nfaExecLbrNVerm16_QR(const struct NFA *n, struct mq *q, ReportID report); +char nfaExecLbrNVerm16_reportCurrent(const struct NFA *n, struct mq *q); +char nfaExecLbrNVerm16_inAccept(const struct NFA *n, ReportID report, + struct mq *q); +char nfaExecLbrNVerm16_inAnyAccept(const struct NFA *n, struct mq *q); +char nfaExecLbrNVerm16_queueInitState(const struct NFA *n, struct mq *q); +char nfaExecLbrNVerm16_initCompressedState(const struct NFA *n, u64a offset, + void *state, u8 key); +char nfaExecLbrNVerm16_queueCompressState(const struct NFA *nfa, + const struct mq *q, s64a loc); +char nfaExecLbrNVerm16_expandState(const struct NFA *nfa, void *dest, + const void *src, u64a offset, u8 key); + +#define nfaExecLbrNVerm16_testEOD NFA_API_NO_IMPL +#define nfaExecLbrNVerm16_B_Reverse NFA_API_NO_IMPL +#define nfaExecLbrNVerm16_zombie_status NFA_API_ZOMBIE_NO_IMPL + +#endif // HAVE_SVE2 + // LBR Shuf char nfaExecLbrShuf_Q(const struct NFA *n, struct mq *q, s64a end); diff --git a/src/nfa/lbr_internal.h b/src/nfa/lbr_internal.h index 8ba11dd4..beb1a50b 100644 --- a/src/nfa/lbr_internal.h +++ b/src/nfa/lbr_internal.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -56,6 +57,11 @@ struct lbr_verm { char c; //!< escape char }; +struct lbr_verm16 { + struct lbr_common common; + m128 mask; +}; + struct lbr_shuf { struct lbr_common common; m128 mask_lo; //!< shufti lo mask for escape chars diff --git a/src/nfa/mpv.c b/src/nfa/mpv.c index 552754d6..5829d43d 100644 --- a/src/nfa/mpv.c +++ b/src/nfa/mpv.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -260,6 +261,13 @@ size_t limitByReach(const struct mpv_kilopuff *kp, const u8 *buf, } else if (kp->type == MPV_NVERM) { return nvermicelliExec(kp->u.verm.c, 0, buf, buf + length) - buf; } +#ifdef HAVE_SVE2 + else if (kp->type == MPV_VERM16) { + return vermicelli16Exec(kp->u.verm16.mask, buf, buf + length) - buf; + } else if (kp->type == MPV_NVERM16) { + return nvermicelli16Exec(kp->u.verm16.mask, buf, buf + length) - buf; + } +#endif // HAVE_SVE2 assert(kp->type == MPV_DOT); return length; diff --git a/src/nfa/mpv_internal.h b/src/nfa/mpv_internal.h index a52853dc..b6b92504 100644 --- a/src/nfa/mpv_internal.h +++ b/src/nfa/mpv_internal.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,6 +37,8 @@ #define MPV_SHUFTI 2 #define MPV_TRUFFLE 3 #define MPV_NVERM 4 +#define MPV_VERM16 5 +#define MPV_NVERM16 6 struct mpv_puffette { u32 repeats; @@ -65,6 +68,9 @@ struct mpv_kilopuff { struct { char c; } verm; + struct { + m128 mask; + } verm16; struct { m128 mask_lo; m128 mask_hi; diff --git a/src/nfa/mpvcompile.cpp b/src/nfa/mpvcompile.cpp index 5e59c04e..d85c90b0 100644 --- a/src/nfa/mpvcompile.cpp +++ b/src/nfa/mpvcompile.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,6 +34,7 @@ #include "nfa_internal.h" #include "shufticompile.h" #include "trufflecompile.h" +#include "vermicellicompile.h" #include "util/alloc.h" #include "util/multibit_build.h" #include "util/order_check.h" @@ -175,6 +177,14 @@ void writeKiloPuff(const map>::const_iterator &it, size_t set = reach.find_first(); assert(set != CharReach::npos); kp->u.verm.c = (char)set; +#ifdef HAVE_SVE2 + } else if (reach.count() >= 240) { + kp->type = MPV_VERM16; + vermicelli16Build(~reach, (u8 *)&kp->u.verm16.mask); + } else if (reach.count() <= 16) { + kp->type = MPV_NVERM16; + vermicelli16Build(reach, (u8 *)&kp->u.verm16.mask); +#endif // HAVE_SVE2 } else if (shuftiBuildMasks(~reach, (u8 *)&kp->u.shuf.mask_lo, (u8 *)&kp->u.shuf.mask_hi) != -1) { kp->type = MPV_SHUFTI; diff --git a/src/nfa/nfa_api_dispatch.c b/src/nfa/nfa_api_dispatch.c index 75cac4b4..6785e939 100644 --- a/src/nfa/nfa_api_dispatch.c +++ b/src/nfa/nfa_api_dispatch.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2015-2020, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -53,6 +54,14 @@ // general framework calls +#ifdef HAVE_SVE2 +#define VERM16_CASES(dbnt_func) \ + DISPATCH_CASE(LBR_NFA_VERM16, LbrVerm16, dbnt_func); \ + DISPATCH_CASE(LBR_NFA_NVERM16, LbrNVerm16, dbnt_func); +#else +#define VERM16_CASES(dbnt_func) +#endif + #define DISPATCH_BY_NFA_TYPE(dbnt_func) \ switch (nfa->type) { \ DISPATCH_CASE(LIMEX_NFA_32, LimEx32, dbnt_func); \ @@ -80,6 +89,7 @@ DISPATCH_CASE(SHENG_NFA_64, Sheng64, dbnt_func); \ DISPATCH_CASE(MCSHENG_64_NFA_8, McSheng64_8, dbnt_func); \ DISPATCH_CASE(MCSHENG_64_NFA_16, McSheng64_16, dbnt_func); \ + VERM16_CASES(dbnt_func) \ default: \ assert(0); \ } diff --git a/src/nfa/nfa_build_util.cpp b/src/nfa/nfa_build_util.cpp index 47153163..ed0e2f01 100644 --- a/src/nfa/nfa_build_util.cpp +++ b/src/nfa/nfa_build_util.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2015-2020, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -340,6 +341,42 @@ const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = const char *NFATraits::name = "Lim Bounded Repeat (NV)"; #endif +#ifdef HAVE_SVE2 + +template<> struct NFATraits { + UNUSED static const char *name; + static const NFACategory category = NFA_OTHER; + static const u32 stateAlign = 8; + static const bool fast = true; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; +}; +const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; +#if defined(DUMP_SUPPORT) +const char *NFATraits::name = "Lim Bounded Repeat (V16)"; +#endif + +template<> struct NFATraits { + UNUSED static const char *name; + static const NFACategory category = NFA_OTHER; + static const u32 stateAlign = 8; + static const bool fast = true; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; +}; +const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; +#if defined(DUMP_SUPPORT) +const char *NFATraits::name = "Lim Bounded Repeat (NV16)"; +#endif + +#endif // HAVE_SVE2 + template<> struct NFATraits { UNUSED static const char *name; static const NFACategory category = NFA_OTHER; diff --git a/src/nfa/nfa_internal.h b/src/nfa/nfa_internal.h index ad27e28b..f7155aef 100644 --- a/src/nfa/nfa_internal.h +++ b/src/nfa/nfa_internal.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2015-2020, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -65,6 +66,10 @@ enum NFAEngineType { LBR_NFA_DOT, /**< magic pseudo nfa */ LBR_NFA_VERM, /**< magic pseudo nfa */ LBR_NFA_NVERM, /**< magic pseudo nfa */ +#ifdef HAVE_SVE2 + LBR_NFA_VERM16, /**< magic pseudo nfa */ + LBR_NFA_NVERM16, /**< magic pseudo nfa */ +#endif // HAVE_SVE2 LBR_NFA_SHUF, /**< magic pseudo nfa */ LBR_NFA_TRUF, /**< magic pseudo nfa */ CASTLE_NFA, /**< magic pseudo nfa */ @@ -218,6 +223,9 @@ static really_inline int isNfaType(u8 t) { static really_inline int isLbrType(u8 t) { return t == LBR_NFA_DOT || t == LBR_NFA_VERM || t == LBR_NFA_NVERM || +#ifdef HAVE_SVE2 + t == LBR_NFA_VERM16 || t == LBR_NFA_NVERM16 || +#endif // HAVE_SVE2 t == LBR_NFA_SHUF || t == LBR_NFA_TRUF; } diff --git a/src/nfa/vermicelli_sve.h b/src/nfa/vermicelli_sve.h index 6a76f671..cadaac8e 100644 --- a/src/nfa/vermicelli_sve.h +++ b/src/nfa/vermicelli_sve.h @@ -232,10 +232,9 @@ const u8 *rdvermSearchLoopBody(svuint16_t chars, const u8 *buf) { } static really_inline -const u8 *vermSearch(char c, bool nocase, const u8 *buf, const u8 *buf_end, +const u8 *vermSearch(svuint8_t chars, const u8 *buf, const u8 *buf_end, bool negate) { assert(buf < buf_end); - svuint8_t chars = getCharMaskSingle(c, nocase); size_t len = buf_end - buf; if (len <= svcntb()) { return vermSearchOnce(chars, buf, buf_end, negate); @@ -267,10 +266,9 @@ const u8 *vermSearch(char c, bool nocase, const u8 *buf, const u8 *buf_end, } static really_inline -const u8 *rvermSearch(char c, bool nocase, const u8 *buf, const u8 *buf_end, +const u8 *rvermSearch(svuint8_t chars, const u8 *buf, const u8 *buf_end, bool negate) { assert(buf < buf_end); - svuint8_t chars = getCharMaskSingle(c, nocase); size_t len = buf_end - buf; if (len <= svcntb()) { return rvermSearchOnce(chars, buf, buf_end, negate); @@ -353,7 +351,8 @@ const u8 *vermicelliExec(char c, bool nocase, const u8 *buf, const u8 *buf_end) { DEBUG_PRINTF("verm scan %s\\x%02hhx over %td bytes\n", nocase ? "nocase " : "", c, buf_end - buf); - const u8 *ptr = vermSearch(c, nocase, buf, buf_end, false); + svuint8_t chars = getCharMaskSingle(c, nocase); + const u8 *ptr = vermSearch(chars, buf, buf_end, false); return ptr ? ptr : buf_end; } @@ -364,7 +363,8 @@ const u8 *nvermicelliExec(char c, bool nocase, const u8 *buf, const u8 *buf_end) { DEBUG_PRINTF("nverm scan %s\\x%02hhx over %td bytes\n", nocase ? "nocase " : "", c, buf_end - buf); - const u8 *ptr = vermSearch(c, nocase, buf, buf_end, true); + svuint8_t chars = getCharMaskSingle(c, nocase); + const u8 *ptr = vermSearch(chars, buf, buf_end, true); return ptr ? ptr : buf_end; } @@ -375,7 +375,8 @@ const u8 *rvermicelliExec(char c, bool nocase, const u8 *buf, const u8 *buf_end) { DEBUG_PRINTF("rev verm scan %s\\x%02hhx over %td bytes\n", nocase ? "nocase " : "", c, buf_end - buf); - const u8 *ptr = rvermSearch(c, nocase, buf, buf_end, false); + svuint8_t chars = getCharMaskSingle(c, nocase); + const u8 *ptr = rvermSearch(chars, buf, buf_end, false); return ptr ? ptr : buf - 1; } @@ -386,7 +387,8 @@ const u8 *rnvermicelliExec(char c, bool nocase, const u8 *buf, const u8 *buf_end) { DEBUG_PRINTF("rev verm scan %s\\x%02hhx over %td bytes\n", nocase ? "nocase " : "", c, buf_end - buf); - const u8 *ptr = rvermSearch(c, nocase, buf, buf_end, true); + svuint8_t chars = getCharMaskSingle(c, nocase); + const u8 *ptr = rvermSearch(chars, buf, buf_end, true); return ptr ? ptr : buf - 1; } @@ -427,4 +429,45 @@ const u8 *rvermicelliDoubleExec(char c1, char c2, bool nocase, const u8 *buf, } } return buf - 1; +} + +static really_inline +svuint8_t getDupSVEMaskFrom128(m128 _mask) { + return svld1rq_u8(svptrue_b8(), (const uint8_t *)&_mask); +} + +static really_inline +const u8 *vermicelli16Exec(const m128 _chars, const u8 *buf, + const u8 *buf_end) { + DEBUG_PRINTF("verm16 scan over %td bytes\n", buf_end - buf); + svuint8_t chars = getDupSVEMaskFrom128(_chars); + const u8 *ptr = vermSearch(chars, buf, buf_end, false); + return ptr ? ptr : buf_end; +} + +static really_inline +const u8 *nvermicelli16Exec(const m128 _chars, const u8 *buf, + const u8 *buf_end) { + DEBUG_PRINTF("nverm16 scan over %td bytes\n", buf_end - buf); + svuint8_t chars = getDupSVEMaskFrom128(_chars); + const u8 *ptr = vermSearch(chars, buf, buf_end, true); + return ptr ? ptr : buf_end; +} + +static really_inline +const u8 *rvermicelli16Exec(const m128 _chars, const u8 *buf, + const u8 *buf_end) { + DEBUG_PRINTF("rverm16 scan over %td bytes\n", buf_end - buf); + svuint8_t chars = getDupSVEMaskFrom128(_chars); + const u8 *ptr = rvermSearch(chars, buf, buf_end, false); + return ptr ? ptr : buf - 1; +} + +static really_inline +const u8 *rnvermicelli16Exec(const m128 _chars, const u8 *buf, + const u8 *buf_end) { + DEBUG_PRINTF("rnverm16 scan over %td bytes\n", buf_end - buf); + svuint8_t chars = getDupSVEMaskFrom128(_chars); + const u8 *ptr = rvermSearch(chars, buf, buf_end, true); + return ptr ? ptr : buf - 1; } \ No newline at end of file diff --git a/src/nfa/vermicellicompile.cpp b/src/nfa/vermicellicompile.cpp new file mode 100644 index 00000000..5b6ca036 --- /dev/null +++ b/src/nfa/vermicellicompile.cpp @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2021, Arm Limited + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Vermicelli acceleration: compile code. + */ +#include "vermicellicompile.h" +#include "util/charreach.h" + +#include + +namespace ue2 { + +bool vermicelli16Build(const CharReach &chars, u8 *rv) { + size_t i = chars.find_first(); + u8 arr[16]; + std::memset(arr, i, sizeof(arr)); + size_t count = 1; + for (i = chars.find_next(i); i != CharReach::npos; i = chars.find_next(i)) { + if (count == sizeof(arr)) return false; + arr[count] = i; + ++count; + } + std::memcpy(rv, arr, sizeof(arr)); + return true; +} + +} // namespace ue2 diff --git a/src/nfa/vermicellicompile.h b/src/nfa/vermicellicompile.h new file mode 100644 index 00000000..5c70100a --- /dev/null +++ b/src/nfa/vermicellicompile.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2021, Arm Limited + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Vermicelli acceleration: compile code. + */ + +#ifndef VERM_COMPILE_H +#define VERM_COMPILE_H + +#include "ue2common.h" +#include "util/charreach.h" +#include "util/flat_containers.h" + +#include + +namespace ue2 { + +bool vermicelli16Build(const CharReach &chars, u8 *rv); + +} // namespace ue2 + +#endif // VERM_COMPILE_H diff --git a/src/nfagraph/ng_lbr.cpp b/src/nfagraph/ng_lbr.cpp index d8ba503c..ca3a1a2e 100644 --- a/src/nfagraph/ng_lbr.cpp +++ b/src/nfagraph/ng_lbr.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -43,6 +44,7 @@ #include "nfa/repeatcompile.h" #include "nfa/shufticompile.h" #include "nfa/trufflecompile.h" +#include "nfa/vermicellicompile.h" #include "util/alloc.h" #include "util/bitutils.h" // for lg2 #include "util/compile_context.h" @@ -209,6 +211,56 @@ bytecode_ptr buildLbrNVerm(const CharReach &cr, const depth &repeatMin, return nfa; } +#ifdef HAVE_SVE2 + +static +bytecode_ptr buildLbrVerm16(const CharReach &cr, const depth &repeatMin, + const depth &repeatMax, u32 minPeriod, + bool is_reset, ReportID report) { + const CharReach escapes(~cr); + + if (escapes.count() > 16) { + return nullptr; + } + + enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, + is_reset); + auto nfa = makeLbrNfa(LBR_NFA_VERM16, rtype, repeatMax); + struct lbr_verm16 *lv = (struct lbr_verm16 *)getMutableImplNfa(nfa.get()); + vermicelli16Build(escapes, (u8 *)&lv->mask); + + fillNfa(nfa.get(), &lv->common, report, repeatMin, repeatMax, + minPeriod, rtype); + + DEBUG_PRINTF("built verm16 lbr\n"); + return nfa; +} + +static +bytecode_ptr buildLbrNVerm16(const CharReach &cr, const depth &repeatMin, + const depth &repeatMax, u32 minPeriod, + bool is_reset, ReportID report) { + const CharReach escapes(cr); + + if (escapes.count() > 16) { + return nullptr; + } + + enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, + is_reset); + auto nfa = makeLbrNfa(LBR_NFA_NVERM16, rtype, repeatMax); + struct lbr_verm16 *lv = (struct lbr_verm16 *)getMutableImplNfa(nfa.get()); + vermicelli16Build(escapes, (u8 *)&lv->mask); + + fillNfa(nfa.get(), &lv->common, report, repeatMin, repeatMax, + minPeriod, rtype); + + DEBUG_PRINTF("built negated verm16 lbr\n"); + return nfa; +} + +#endif // HAVE_SVE2 + static bytecode_ptr buildLbrShuf(const CharReach &cr, const depth &repeatMin, const depth &repeatMax, u32 minPeriod, @@ -269,6 +321,16 @@ bytecode_ptr constructLBR(const CharReach &cr, const depth &repeatMin, nfa = buildLbrNVerm(cr, repeatMin, repeatMax, minPeriod, is_reset, report); } +#ifdef HAVE_SVE2 + if (!nfa) { + nfa = buildLbrVerm16(cr, repeatMin, repeatMax, minPeriod, is_reset, + report); + } + if (!nfa) { + nfa = buildLbrNVerm16(cr, repeatMin, repeatMax, minPeriod, is_reset, + report); + } +#endif // HAVE_SVE2 if (!nfa) { nfa = buildLbrShuf(cr, repeatMin, repeatMax, minPeriod, is_reset, report); diff --git a/src/rose/rose_build_lit_accel.cpp b/src/rose/rose_build_lit_accel.cpp index 62f660fb..7286fddb 100644 --- a/src/rose/rose_build_lit_accel.cpp +++ b/src/rose/rose_build_lit_accel.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2017, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,6 +37,7 @@ #include "nfa/accel.h" #include "nfa/shufticompile.h" #include "nfa/trufflecompile.h" +#include "nfa/vermicellicompile.h" #include "util/compare.h" #include "util/dump_charclass.h" #include "util/ue2string.h" @@ -440,6 +442,17 @@ void findForwardAccelScheme(const vector &lits, } const CharReach &cr = reach[min_offset]; +#ifdef HAVE_SVE2 + if (min_count <= 16) { + vermicelli16Build(cr, (u8 *)&aux->verm16.mask); + DEBUG_PRINTF("built verm16 for %s (%zu chars, offset %u)\n", + describeClass(cr).c_str(), cr.count(), min_offset); + aux->verm16.accel_type = ACCEL_VERM16; + aux->verm16.offset = verify_u8(min_offset); + return; + } +#endif // HAVE_SVE2 + if (-1 != shuftiBuildMasks(cr, (u8 *)&aux->shufti.lo, (u8 *)&aux->shufti.hi)) { DEBUG_PRINTF("built shufti for %s (%zu chars, offset %u)\n", diff --git a/unit/internal/rvermicelli.cpp b/unit/internal/rvermicelli.cpp index 497ffe07..2806c5d8 100644 --- a/unit/internal/rvermicelli.cpp +++ b/unit/internal/rvermicelli.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -304,3 +305,267 @@ TEST(RDoubleVermicelli, Exec5) { } } } + +#ifdef HAVE_SVE2 + +#include "nfa/vermicellicompile.h" +using namespace ue2; + +union Matches { + u8 val8[16]; + m128 val128; +}; + +TEST(RVermicelli16, ExecNoMatch1) { + char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; + + CharReach chars; + chars.set('a'); + chars.set('B'); + chars.set('A'); + Matches matches; + bool ret = vermicelli16Build(chars, matches.val8); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + for (size_t j = 0; j < 16; j++) { + const u8 *begin = (const u8 *)t1 + i; + const u8 *end = (const u8 *)t1 + strlen(t1) - j; + + const u8 *rv = rvermicelli16Exec(matches.val128, begin, end); + ASSERT_EQ(begin - 1, rv); + } + } +} + +TEST(RVermicelli16, Exec1) { + char t1[] = "bbbbbbbbbbbbbbbbbabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbabbbbbbbbbbbbbbbbbbbbb"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + chars.set('a'); + chars.set('A'); + Matches matches; + bool ret = vermicelli16Build(chars, matches.val8); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + const u8 *rv = rvermicelli16Exec(matches.val128, buf, buf + strlen(t1) - i); + ASSERT_EQ(buf + 48, rv); + } +} + +TEST(RVermicelli16, Exec2) { + char t1[] = "bbbbbbbbbbbbbbbbbabbbbbbbbaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbb"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + chars.set('a'); + chars.set('A'); + Matches matches; + bool ret = vermicelli16Build(chars, matches.val8); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + const u8 *rv = rvermicelli16Exec(matches.val128, buf + i, buf + strlen(t1)); + ASSERT_EQ(buf + 48, rv); + } +} + +TEST(RVermicelli16, Exec3) { + char t1[] = "bbbbbbbbbbbbbbbbbabbbbbbbbaaaaaaaaaaaaaaaaaaaaaaAbbbbbbbbbbbbbbbbbbbbbb"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + chars.set('a'); + Matches matches_a; + bool ret = vermicelli16Build(chars, matches_a.val8); + ASSERT_TRUE(ret); + + chars.set('A'); + Matches matches_A; + ret = vermicelli16Build(chars, matches_A.val8); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + const u8 *rv = rvermicelli16Exec(matches_a.val128, buf, buf + strlen(t1) - i); + ASSERT_EQ(buf + 47, rv); + + rv = rvermicelli16Exec(matches_A.val128, buf, buf + strlen(t1) - i); + ASSERT_EQ(buf + 48, rv); + } +} + +TEST(RVermicelli16, Exec4) { + char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + chars.set('a'); + Matches matches_a; + bool ret = vermicelli16Build(chars, matches_a.val8); + ASSERT_TRUE(ret); + + chars.set('A'); + Matches matches_A; + ret = vermicelli16Build(chars, matches_A.val8); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 31; i++) { + t1[16 + i] = 'a'; + const u8 *rv = rvermicelli16Exec(matches_a.val128, buf, buf + strlen(t1)); + ASSERT_EQ(buf + 16 + i, rv); + + rv = rvermicelli16Exec(matches_A.val128, buf, buf + strlen(t1)); + ASSERT_EQ(buf + 16 + i, rv); + } +} + +TEST(RVermicelli16, Exec5) { + char t1[] = "qqqqqqqqqqqqqqqqqabcdefghijklmnopqqqqqqqqqqqqqqqqqqqqq"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + Matches matches[16]; + bool ret; + + for (int i = 0; i < 16; ++i) { + chars.set('a' + i); + ret = vermicelli16Build(chars, matches[i].val8); + ASSERT_TRUE(ret); + } + + for (int j = 0; j < 16; ++j) { + for (size_t i = 0; i < 16; i++) { + const u8 *rv = rvermicelli16Exec(matches[j].val128, buf, buf + strlen(t1) - i); + ASSERT_EQ(buf + j + 17, rv); + } + } +} + +TEST(RNVermicelli16, ExecNoMatch1) { + char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + chars.set('b'); + chars.set('B'); + chars.set('A'); + Matches matches; + bool ret = vermicelli16Build(chars, matches.val8); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + for (size_t j = 0; j < 16; j++) { + const u8 *rv = rnvermicelli16Exec(matches.val128, buf + i, buf + strlen(t1) - j); + ASSERT_EQ(buf + i - 1, rv); + } + } +} + +TEST(RNVermicelli16, Exec1) { + char t1[] = "bbbbbbbbbbbbbbbbbabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbabbbbbbbbbbbbbbbbbbbbb"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + chars.set('b'); + chars.set('A'); + Matches matches; + bool ret = vermicelli16Build(chars, matches.val8); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + const u8 *rv = rnvermicelli16Exec(matches.val128, buf + i, buf + strlen(t1) - i); + ASSERT_EQ(buf + 48, rv); + } +} + +TEST(RNVermicelli16, Exec2) { + char t1[] = "bbbbbbbbbbbbbbbbbabbbbbbbbaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbb"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + chars.set('b'); + chars.set('A'); + Matches matches; + bool ret = vermicelli16Build(chars, matches.val8); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + const u8 *rv = rnvermicelli16Exec(matches.val128, buf, buf + strlen(t1) - i); + ASSERT_EQ(buf + 48, rv); + } +} + +TEST(RNVermicelli16, Exec3) { + char t1[] = "bbbbbbbbbbbbbbbbbabbbbbbbbaaaaaaaaaaaaaaaaaaaaaaAbbbbbbbbbbbbbbbbbbbbbb"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + chars.set('b'); + Matches matches_b; + bool ret = vermicelli16Build(chars, matches_b.val8); + ASSERT_TRUE(ret); + + chars.set('A'); + Matches matches_A; + ret = vermicelli16Build(chars, matches_A.val8); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + const u8 *rv = rnvermicelli16Exec(matches_b.val128, buf + i, buf + strlen(t1)); + ASSERT_EQ(buf + 48, rv); + + rv = rnvermicelli16Exec(matches_A.val128, buf + i, buf + strlen(t1)); + ASSERT_EQ(buf + 47, rv); + } +} + +TEST(RNVermicelli16, Exec4) { + char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + chars.set('b'); + Matches matches_b; + bool ret = vermicelli16Build(chars, matches_b.val8); + ASSERT_TRUE(ret); + + chars.set('A'); + Matches matches_A; + ret = vermicelli16Build(chars, matches_A.val8); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 31; i++) { + t1[16 + i] = 'a'; + const u8 *rv = rnvermicelli16Exec(matches_b.val128, buf, buf + strlen(t1)); + ASSERT_EQ(buf + 16 + i, rv); + + rv = rnvermicelli16Exec(matches_A.val128, buf, buf + strlen(t1)); + ASSERT_EQ(buf + 16 + i, rv); + } +} + +TEST(RNVermicelli16, Exec5) { + char t1[] = "aaaaaaaaaaaaaaaaaabcdefghijklmnopqqqqqqqqqqqqqqqqqqqqqqqq"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + Matches matches[16]; + bool ret; + + for (int i = 0; i < 16; ++i) { + chars.set('q' - i); + ret = vermicelli16Build(chars, matches[i].val8); + ASSERT_TRUE(ret); + } + + for (int j = 0; j < 16; ++j) { + for (size_t i = 0; i < 16; i++) { + const u8 *rv = rnvermicelli16Exec(matches[j].val128, buf, buf + strlen(t1) - i); + ASSERT_EQ(buf - j + 32, rv); + } + } +} + +#endif // HAVE_SVE2 \ No newline at end of file diff --git a/unit/internal/vermicelli.cpp b/unit/internal/vermicelli.cpp index 5e4a8253..bc007e1a 100644 --- a/unit/internal/vermicelli.cpp +++ b/unit/internal/vermicelli.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -522,3 +523,264 @@ TEST(DoubleVermicelliMasked, Exec4) { } } +#ifdef HAVE_SVE2 + +#include "nfa/vermicellicompile.h" +using namespace ue2; + +union Matches { + u8 val8[16]; + m128 val128; +}; + +TEST(Vermicelli16, ExecNoMatch1) { + char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + chars.set('a'); + chars.set('B'); + chars.set('A'); + Matches matches; + bool ret = vermicelli16Build(chars, matches.val8); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + for (size_t j = 0; j < 16; j++) { + const u8 *rv = vermicelli16Exec(matches.val128, buf + i, buf + strlen(t1) - j); + ASSERT_EQ(buf + strlen(t1) - j, rv); + } + } +} + +TEST(Vermicelli16, Exec1) { + char t1[] = "bbbbbbbbbbbbbbbbbabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbabbbbbbbbbbbb"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + chars.set('a'); + chars.set('A'); + Matches matches; + bool ret = vermicelli16Build(chars, matches.val8); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + const u8 *rv = vermicelli16Exec(matches.val128, buf + i, buf + strlen(t1)); + ASSERT_EQ(buf + 17, rv); + } +} + +TEST(Vermicelli16, Exec2) { + char t1[] = "bbbbbbbbbbbbbbbbbaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbabbbbbbbbbbbb"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + chars.set('a'); + chars.set('A'); + Matches matches; + bool ret = vermicelli16Build(chars, matches.val8); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + const u8 *rv = vermicelli16Exec(matches.val128, buf + i, buf + strlen(t1)); + ASSERT_EQ(buf + 17, rv); + } +} + +TEST(Vermicelli16, Exec3) { + char t1[] = "bbbbbbbbbbbbbbbbbAaaaaaaaaaaaaaaaaaaaaaabbbbbbbbabbbbbbbbbbbb"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + chars.set('a'); + Matches matches_a; + bool ret = vermicelli16Build(chars, matches_a.val8); + ASSERT_TRUE(ret); + + chars.set('A'); + Matches matches_A; + ret = vermicelli16Build(chars, matches_A.val8); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + const u8 *rv = vermicelli16Exec(matches_a.val128, buf + i, buf + strlen(t1)); + ASSERT_EQ(buf + 18, rv); + + rv = vermicelli16Exec(matches_A.val128, buf + i, buf + strlen(t1)); + ASSERT_EQ(buf + 17, rv); + } +} + +TEST(Vermicelli16, Exec4) { + char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + chars.set('a'); + Matches matches_a; + bool ret = vermicelli16Build(chars, matches_a.val8); + ASSERT_TRUE(ret); + + chars.set('A'); + Matches matches_A; + ret = vermicelli16Build(chars, matches_A.val8); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 31; i++) { + t1[48 - i] = 'a'; + const u8 *rv = vermicelli16Exec(matches_a.val128, buf, buf + strlen(t1)); + ASSERT_EQ(buf + 48 - i, rv); + + rv = vermicelli16Exec(matches_A.val128, buf, buf + strlen(t1)); + ASSERT_EQ(buf + 48 - i, rv); + } +} + +TEST(Vermicelli16, Exec5) { + char t1[] = "qqqqqqqqqqqqqqqqqabcdefghijklmnopqqqqqqqqqqqqqqqqqqqqq"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + Matches matches[16]; + bool ret; + + for (int i = 0; i < 16; ++i) { + chars.set('p' - i); + ret = vermicelli16Build(chars, matches[i].val8); + ASSERT_TRUE(ret); + } + + for (int j = 0; j < 16; ++j) { + for (size_t i = 0; i < 16; i++) { + const u8 *rv = vermicelli16Exec(matches[j].val128, buf + i,buf + strlen(t1)); + ASSERT_EQ(buf - j + 32, rv); + } + } +} + +TEST(NVermicelli16, ExecNoMatch1) { + char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + chars.set('b'); + chars.set('B'); + chars.set('A'); + Matches matches; + bool ret = vermicelli16Build(chars, matches.val8); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + for (size_t j = 0; j < 16; j++) { + const u8 *rv = nvermicelli16Exec(matches.val128, buf + i, buf + strlen(t1) - j); + ASSERT_EQ((buf + strlen(t1) - j), rv); + } + } +} + +TEST(NVermicelli16, Exec1) { + char t1[] = "bbbbbbbbbbbbbbbbbabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbabbbbbbbbbbbb"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + chars.set('b'); + chars.set('A'); + Matches matches; + bool ret = vermicelli16Build(chars, matches.val8); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + const u8 *rv = nvermicelli16Exec(matches.val128, buf + i, buf + strlen(t1)); + ASSERT_EQ(buf + 17, rv); + } +} + +TEST(NVermicelli16, Exec2) { + char t1[] = "bbbbbbbbbbbbbbbbbaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbabbbbbbbbbbbb"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + chars.set('b'); + chars.set('A'); + Matches matches; + bool ret = vermicelli16Build(chars, matches.val8); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + const u8 *rv = nvermicelli16Exec(matches.val128, buf + i, buf + strlen(t1)); + ASSERT_EQ(buf + 17, rv); + } +} + +TEST(NVermicelli16, Exec3) { + char t1[] = "bbbbbbbbbbbbbbbbbAaaaaaaaaaaaaaaaaaaaaaabbbbbbbbabbbbbbbbbbbb"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + chars.set('b'); + Matches matches_b; + bool ret = vermicelli16Build(chars, matches_b.val8); + ASSERT_TRUE(ret); + + chars.set('A'); + Matches matches_A; + ret = vermicelli16Build(chars, matches_A.val8); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + const u8 *rv = nvermicelli16Exec(matches_b.val128, buf + i, buf + strlen(t1)); + ASSERT_EQ(buf + 17, rv); + + rv = nvermicelli16Exec(matches_A.val128, buf + i, buf + strlen(t1)); + ASSERT_EQ(buf + 18, rv); + } +} + +TEST(NVermicelli16, Exec4) { + char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + chars.set('b'); + Matches matches_b; + bool ret = vermicelli16Build(chars, matches_b.val8); + ASSERT_TRUE(ret); + + chars.set('A'); + Matches matches_A; + ret = vermicelli16Build(chars, matches_A.val8); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 31; i++) { + t1[48 - i] = 'a'; + const u8 *rv = nvermicelli16Exec(matches_b.val128, buf, buf + strlen(t1)); + ASSERT_EQ(buf + 48 - i, rv); + + rv = nvermicelli16Exec(matches_A.val128, buf, buf + strlen(t1)); + ASSERT_EQ(buf + 48 - i, rv); + } +} + +TEST(NVermicelli16, Exec5) { + char t1[] = "aaaaaaaaaaaaaaaaaabcdefghijklmnopqaaaaaaaaaaaaaaaaaaaaa"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + Matches matches[16]; + bool ret; + + for (int i = 0; i < 16; ++i) { + chars.set('a' + i); + ret = vermicelli16Build(chars, matches[i].val8); + ASSERT_TRUE(ret); + } + + for (int j = 0; j < 16; ++j) { + for (size_t i = 0; i < 16; i++) { + const u8 *rv = nvermicelli16Exec(matches[j].val128, buf + i, buf + strlen(t1)); + ASSERT_EQ(buf + j + 18, rv); + } + } +} + +#endif // HAVE_SVE2 \ No newline at end of file