Enable sheng32 and sheng64 on Arm

Signed-off-by: Yoan Picchi <yoan.picchi@arm.com>
This commit is contained in:
Yoan Picchi 2024-04-04 09:46:23 +00:00
parent 131672d175
commit 49fd4f0047
6 changed files with 153 additions and 46 deletions

View File

@ -154,7 +154,7 @@ char fireReports(const struct sheng *sh, NfaCallback cb, void *ctxt,
return MO_CONTINUE_MATCHING; /* continue execution */ return MO_CONTINUE_MATCHING; /* continue execution */
} }
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
// Sheng32 // Sheng32
static really_inline static really_inline
const struct sheng32 *get_sheng32(const struct NFA *n) { const struct sheng32 *get_sheng32(const struct NFA *n) {
@ -351,7 +351,7 @@ char fireReports64(const struct sheng64 *sh, NfaCallback cb, void *ctxt,
} }
return MO_CONTINUE_MATCHING; /* continue execution */ return MO_CONTINUE_MATCHING; /* continue execution */
} }
#endif // end of HAVE_AVX512VBMI #endif // end of HAVE_AVX512VBMI || HAVE_SVE
/* include Sheng function definitions */ /* include Sheng function definitions */
#include "sheng_defs.h" #include "sheng_defs.h"
@ -871,7 +871,7 @@ char nfaExecSheng_expandState(UNUSED const struct NFA *nfa, void *dest,
return 0; return 0;
} }
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
// Sheng32 // Sheng32
static really_inline static really_inline
char runSheng32Cb(const struct sheng32 *sh, NfaCallback cb, void *ctxt, char runSheng32Cb(const struct sheng32 *sh, NfaCallback cb, void *ctxt,
@ -1874,4 +1874,4 @@ char nfaExecSheng64_expandState(UNUSED const struct NFA *nfa, void *dest,
*(u8 *)dest = *(const u8 *)src; *(u8 *)dest = *(const u8 *)src;
return 0; return 0;
} }
#endif // end of HAVE_AVX512VBMI #endif // end of HAVE_AVX512VBMI || HAVE_SVE

View File

@ -58,7 +58,7 @@ char nfaExecSheng_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecSheng_B(const struct NFA *n, u64a offset, const u8 *buffer, char nfaExecSheng_B(const struct NFA *n, u64a offset, const u8 *buffer,
size_t length, NfaCallback cb, void *context); size_t length, NfaCallback cb, void *context);
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define nfaExecSheng32_B_Reverse NFA_API_NO_IMPL #define nfaExecSheng32_B_Reverse NFA_API_NO_IMPL
#define nfaExecSheng32_zombie_status NFA_API_ZOMBIE_NO_IMPL #define nfaExecSheng32_zombie_status NFA_API_ZOMBIE_NO_IMPL
@ -106,8 +106,7 @@ char nfaExecSheng64_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecSheng64_B(const struct NFA *n, u64a offset, const u8 *buffer, char nfaExecSheng64_B(const struct NFA *n, u64a offset, const u8 *buffer,
size_t length, NfaCallback cb, void *context); size_t length, NfaCallback cb, void *context);
#else // !HAVE_AVX512VBMI && !HAVE_SVE
#else // !HAVE_AVX512VBMI
#define nfaExecSheng32_B_Reverse NFA_API_NO_IMPL #define nfaExecSheng32_B_Reverse NFA_API_NO_IMPL
#define nfaExecSheng32_zombie_status NFA_API_ZOMBIE_NO_IMPL #define nfaExecSheng32_zombie_status NFA_API_ZOMBIE_NO_IMPL
@ -138,6 +137,7 @@ char nfaExecSheng64_B(const struct NFA *n, u64a offset, const u8 *buffer,
#define nfaExecSheng64_testEOD NFA_API_NO_IMPL #define nfaExecSheng64_testEOD NFA_API_NO_IMPL
#define nfaExecSheng64_reportCurrent NFA_API_NO_IMPL #define nfaExecSheng64_reportCurrent NFA_API_NO_IMPL
#define nfaExecSheng64_B NFA_API_NO_IMPL #define nfaExecSheng64_B NFA_API_NO_IMPL
#endif // end of HAVE_AVX512VBMI #endif // end of HAVE_AVX512VBMI || defined(HAVE_SVE)
#endif /* SHENG_H_ */ #endif /* SHENG_H_ */

View File

@ -52,7 +52,7 @@ u8 hasInterestingStates(const u8 a, const u8 b, const u8 c, const u8 d) {
return (a | b | c | d) & (SHENG_STATE_FLAG_MASK); return (a | b | c | d) & (SHENG_STATE_FLAG_MASK);
} }
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
static really_inline static really_inline
u8 isDeadState32(const u8 a) { u8 isDeadState32(const u8 a) {
return a & SHENG32_STATE_DEAD; return a & SHENG32_STATE_DEAD;
@ -108,7 +108,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define SHENG_IMPL sheng_cod #define SHENG_IMPL sheng_cod
#define DEAD_FUNC isDeadState #define DEAD_FUNC isDeadState
#define ACCEPT_FUNC isAcceptState #define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_cod #define SHENG32_IMPL sheng32_cod
#define DEAD_FUNC32 isDeadState32 #define DEAD_FUNC32 isDeadState32
#define ACCEPT_FUNC32 isAcceptState32 #define ACCEPT_FUNC32 isAcceptState32
@ -121,7 +121,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef SHENG_IMPL #undef SHENG_IMPL
#undef DEAD_FUNC #undef DEAD_FUNC
#undef ACCEPT_FUNC #undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL #undef SHENG32_IMPL
#undef DEAD_FUNC32 #undef DEAD_FUNC32
#undef ACCEPT_FUNC32 #undef ACCEPT_FUNC32
@ -135,7 +135,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define SHENG_IMPL sheng_co #define SHENG_IMPL sheng_co
#define DEAD_FUNC dummyFunc #define DEAD_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState #define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_co #define SHENG32_IMPL sheng32_co
#define DEAD_FUNC32 dummyFunc #define DEAD_FUNC32 dummyFunc
#define ACCEPT_FUNC32 isAcceptState32 #define ACCEPT_FUNC32 isAcceptState32
@ -148,7 +148,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef SHENG_IMPL #undef SHENG_IMPL
#undef DEAD_FUNC #undef DEAD_FUNC
#undef ACCEPT_FUNC #undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL #undef SHENG32_IMPL
#undef DEAD_FUNC32 #undef DEAD_FUNC32
#undef ACCEPT_FUNC32 #undef ACCEPT_FUNC32
@ -162,7 +162,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define SHENG_IMPL sheng_samd #define SHENG_IMPL sheng_samd
#define DEAD_FUNC isDeadState #define DEAD_FUNC isDeadState
#define ACCEPT_FUNC isAcceptState #define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_samd #define SHENG32_IMPL sheng32_samd
#define DEAD_FUNC32 isDeadState32 #define DEAD_FUNC32 isDeadState32
#define ACCEPT_FUNC32 isAcceptState32 #define ACCEPT_FUNC32 isAcceptState32
@ -175,7 +175,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef SHENG_IMPL #undef SHENG_IMPL
#undef DEAD_FUNC #undef DEAD_FUNC
#undef ACCEPT_FUNC #undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL #undef SHENG32_IMPL
#undef DEAD_FUNC32 #undef DEAD_FUNC32
#undef ACCEPT_FUNC32 #undef ACCEPT_FUNC32
@ -189,7 +189,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define SHENG_IMPL sheng_sam #define SHENG_IMPL sheng_sam
#define DEAD_FUNC dummyFunc #define DEAD_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState #define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_sam #define SHENG32_IMPL sheng32_sam
#define DEAD_FUNC32 dummyFunc #define DEAD_FUNC32 dummyFunc
#define ACCEPT_FUNC32 isAcceptState32 #define ACCEPT_FUNC32 isAcceptState32
@ -202,7 +202,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef SHENG_IMPL #undef SHENG_IMPL
#undef DEAD_FUNC #undef DEAD_FUNC
#undef ACCEPT_FUNC #undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL #undef SHENG32_IMPL
#undef DEAD_FUNC32 #undef DEAD_FUNC32
#undef ACCEPT_FUNC32 #undef ACCEPT_FUNC32
@ -216,7 +216,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define SHENG_IMPL sheng_nmd #define SHENG_IMPL sheng_nmd
#define DEAD_FUNC isDeadState #define DEAD_FUNC isDeadState
#define ACCEPT_FUNC dummyFunc #define ACCEPT_FUNC dummyFunc
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_nmd #define SHENG32_IMPL sheng32_nmd
#define DEAD_FUNC32 isDeadState32 #define DEAD_FUNC32 isDeadState32
#define ACCEPT_FUNC32 dummyFunc #define ACCEPT_FUNC32 dummyFunc
@ -229,7 +229,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef SHENG_IMPL #undef SHENG_IMPL
#undef DEAD_FUNC #undef DEAD_FUNC
#undef ACCEPT_FUNC #undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL #undef SHENG32_IMPL
#undef DEAD_FUNC32 #undef DEAD_FUNC32
#undef ACCEPT_FUNC32 #undef ACCEPT_FUNC32
@ -243,7 +243,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define SHENG_IMPL sheng_nm #define SHENG_IMPL sheng_nm
#define DEAD_FUNC dummyFunc #define DEAD_FUNC dummyFunc
#define ACCEPT_FUNC dummyFunc #define ACCEPT_FUNC dummyFunc
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_nm #define SHENG32_IMPL sheng32_nm
#define DEAD_FUNC32 dummyFunc #define DEAD_FUNC32 dummyFunc
#define ACCEPT_FUNC32 dummyFunc #define ACCEPT_FUNC32 dummyFunc
@ -256,7 +256,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef SHENG_IMPL #undef SHENG_IMPL
#undef DEAD_FUNC #undef DEAD_FUNC
#undef ACCEPT_FUNC #undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL #undef SHENG32_IMPL
#undef DEAD_FUNC32 #undef DEAD_FUNC32
#undef ACCEPT_FUNC32 #undef ACCEPT_FUNC32
@ -277,7 +277,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC isAccelState #define INNER_ACCEL_FUNC isAccelState
#define OUTER_ACCEL_FUNC dummyFunc #define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState #define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_coda #define SHENG32_IMPL sheng32_4_coda
#define INTERESTING_FUNC32 hasInterestingStates32 #define INTERESTING_FUNC32 hasInterestingStates32
#define INNER_DEAD_FUNC32 isDeadState32 #define INNER_DEAD_FUNC32 isDeadState32
@ -296,7 +296,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC #undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC #undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL #undef SHENG32_IMPL
#undef INTERESTING_FUNC32 #undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32 #undef INNER_DEAD_FUNC32
@ -316,7 +316,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC dummyFunc #define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc #define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState #define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_cod #define SHENG32_IMPL sheng32_4_cod
#define INTERESTING_FUNC32 hasInterestingStates32 #define INTERESTING_FUNC32 hasInterestingStates32
#define INNER_DEAD_FUNC32 isDeadState32 #define INNER_DEAD_FUNC32 isDeadState32
@ -339,7 +339,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC #undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC #undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL #undef SHENG32_IMPL
#undef INTERESTING_FUNC32 #undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32 #undef INNER_DEAD_FUNC32
@ -363,7 +363,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC isAccelState #define INNER_ACCEL_FUNC isAccelState
#define OUTER_ACCEL_FUNC dummyFunc #define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState #define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_coa #define SHENG32_IMPL sheng32_4_coa
#define INTERESTING_FUNC32 hasInterestingStates32 #define INTERESTING_FUNC32 hasInterestingStates32
#define INNER_DEAD_FUNC32 dummyFunc #define INNER_DEAD_FUNC32 dummyFunc
@ -382,7 +382,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC #undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC #undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL #undef SHENG32_IMPL
#undef INTERESTING_FUNC32 #undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32 #undef INNER_DEAD_FUNC32
@ -402,7 +402,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC dummyFunc #define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc #define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState #define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_co #define SHENG32_IMPL sheng32_4_co
#define INTERESTING_FUNC32 hasInterestingStates32 #define INTERESTING_FUNC32 hasInterestingStates32
#define INNER_DEAD_FUNC32 dummyFunc #define INNER_DEAD_FUNC32 dummyFunc
@ -425,7 +425,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC #undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC #undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL #undef SHENG32_IMPL
#undef INTERESTING_FUNC32 #undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32 #undef INNER_DEAD_FUNC32
@ -449,7 +449,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC isAccelState #define INNER_ACCEL_FUNC isAccelState
#define OUTER_ACCEL_FUNC dummyFunc #define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState #define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_samda #define SHENG32_IMPL sheng32_4_samda
#define INTERESTING_FUNC32 hasInterestingStates32 #define INTERESTING_FUNC32 hasInterestingStates32
#define INNER_DEAD_FUNC32 isDeadState32 #define INNER_DEAD_FUNC32 isDeadState32
@ -468,7 +468,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC #undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC #undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL #undef SHENG32_IMPL
#undef INTERESTING_FUNC32 #undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32 #undef INNER_DEAD_FUNC32
@ -488,7 +488,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC dummyFunc #define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc #define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState #define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_samd #define SHENG32_IMPL sheng32_4_samd
#define INTERESTING_FUNC32 hasInterestingStates32 #define INTERESTING_FUNC32 hasInterestingStates32
#define INNER_DEAD_FUNC32 isDeadState32 #define INNER_DEAD_FUNC32 isDeadState32
@ -511,7 +511,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC #undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC #undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL #undef SHENG32_IMPL
#undef INTERESTING_FUNC32 #undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32 #undef INNER_DEAD_FUNC32
@ -535,7 +535,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC isAccelState #define INNER_ACCEL_FUNC isAccelState
#define OUTER_ACCEL_FUNC dummyFunc #define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState #define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_sama #define SHENG32_IMPL sheng32_4_sama
#define INTERESTING_FUNC32 hasInterestingStates32 #define INTERESTING_FUNC32 hasInterestingStates32
#define INNER_DEAD_FUNC32 dummyFunc #define INNER_DEAD_FUNC32 dummyFunc
@ -554,7 +554,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC #undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC #undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL #undef SHENG32_IMPL
#undef INTERESTING_FUNC32 #undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32 #undef INNER_DEAD_FUNC32
@ -574,7 +574,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC dummyFunc #define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc #define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState #define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_sam #define SHENG32_IMPL sheng32_4_sam
#define INTERESTING_FUNC32 hasInterestingStates32 #define INTERESTING_FUNC32 hasInterestingStates32
#define INNER_DEAD_FUNC32 dummyFunc #define INNER_DEAD_FUNC32 dummyFunc
@ -597,7 +597,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC #undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC #undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL #undef SHENG32_IMPL
#undef INTERESTING_FUNC32 #undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32 #undef INNER_DEAD_FUNC32
@ -623,7 +623,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC dummyFunc #define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC isAccelState #define OUTER_ACCEL_FUNC isAccelState
#define ACCEPT_FUNC dummyFunc #define ACCEPT_FUNC dummyFunc
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_nmda #define SHENG32_IMPL sheng32_4_nmda
#define INTERESTING_FUNC32 dummyFunc4 #define INTERESTING_FUNC32 dummyFunc4
#define INNER_DEAD_FUNC32 dummyFunc #define INNER_DEAD_FUNC32 dummyFunc
@ -642,7 +642,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC #undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC #undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL #undef SHENG32_IMPL
#undef INTERESTING_FUNC32 #undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32 #undef INNER_DEAD_FUNC32
@ -662,7 +662,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC dummyFunc #define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc #define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC dummyFunc #define ACCEPT_FUNC dummyFunc
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_nmd #define SHENG32_IMPL sheng32_4_nmd
#define INTERESTING_FUNC32 dummyFunc4 #define INTERESTING_FUNC32 dummyFunc4
#define INNER_DEAD_FUNC32 dummyFunc #define INNER_DEAD_FUNC32 dummyFunc
@ -685,7 +685,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC #undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC #undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL #undef SHENG32_IMPL
#undef INTERESTING_FUNC32 #undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32 #undef INNER_DEAD_FUNC32
@ -712,7 +712,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC dummyFunc #define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc #define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC dummyFunc #define ACCEPT_FUNC dummyFunc
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_nm #define SHENG32_IMPL sheng32_4_nm
#define INTERESTING_FUNC32 dummyFunc4 #define INTERESTING_FUNC32 dummyFunc4
#define INNER_DEAD_FUNC32 dummyFunc #define INNER_DEAD_FUNC32 dummyFunc
@ -735,7 +735,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC #undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC #undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL #undef SHENG32_IMPL
#undef INTERESTING_FUNC32 #undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32 #undef INNER_DEAD_FUNC32

View File

@ -96,7 +96,7 @@ char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s,
return MO_CONTINUE_MATCHING; return MO_CONTINUE_MATCHING;
} }
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
static really_inline static really_inline
char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt, char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
const struct sheng32 *s, const struct sheng32 *s,
@ -114,14 +114,28 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
} }
DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start)); DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start));
#if defined(HAVE_SVE)
const svbool_t lane_pred_32 = svwhilelt_b8(0, 32);
svuint8_t cur_state = svdup_u8(*state);
svuint8_t tbl_mask = svdup_u8((unsigned char)0x1F);
const m512 *masks = s->succ_masks;
#else
m512 cur_state = set1_64x8(*state); m512 cur_state = set1_64x8(*state);
const m512 *masks = s->succ_masks; const m512 *masks = s->succ_masks;
#endif
while (likely(cur_buf != end)) { while (likely(cur_buf != end)) {
const u8 c = *cur_buf; const u8 c = *cur_buf;
#if defined(HAVE_SVE)
svuint8_t succ_mask = svld1(lane_pred_32, (const u8*)(masks + c));
cur_state = svtbl(succ_mask, svand_x(svptrue_b8(), tbl_mask, cur_state));
const u8 tmp = svlastb(lane_pred_32, cur_state);
#else
const m512 succ_mask = masks[c]; const m512 succ_mask = masks[c];
cur_state = vpermb512(cur_state, succ_mask); cur_state = vpermb512(cur_state, succ_mask);
const u8 tmp = movd512(cur_state); const u8 tmp = movd512(cur_state);
#endif
DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?'); DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?');
DEBUG_PRINTF("s: %u (flag: %u)\n", tmp & SHENG32_STATE_MASK, DEBUG_PRINTF("s: %u (flag: %u)\n", tmp & SHENG32_STATE_MASK,
@ -153,7 +167,11 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
} }
cur_buf++; cur_buf++;
} }
#if defined(HAVE_SVE)
*state = svlastb(lane_pred_32, cur_state);
#else
*state = movd512(cur_state); *state = movd512(cur_state);
#endif
*scan_end = cur_buf; *scan_end = cur_buf;
return MO_CONTINUE_MATCHING; return MO_CONTINUE_MATCHING;
} }
@ -175,14 +193,28 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
} }
DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start)); DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start));
#if defined(HAVE_SVE)
const svbool_t lane_pred_64 = svwhilelt_b8(0, 64);
svuint8_t cur_state = svdup_u8(*state);
svuint8_t tbl_mask = svdup_u8((unsigned char)0x3F);
const m512 *masks = s->succ_masks;
#else
m512 cur_state = set1_64x8(*state); m512 cur_state = set1_64x8(*state);
const m512 *masks = s->succ_masks; const m512 *masks = s->succ_masks;
#endif
while (likely(cur_buf != end)) { while (likely(cur_buf != end)) {
const u8 c = *cur_buf; const u8 c = *cur_buf;
#if defined(HAVE_SVE)
svuint8_t succ_mask = svld1(lane_pred_64, (const u8*)(masks + c));
cur_state = svtbl(succ_mask, svand_x(svptrue_b8(), tbl_mask, cur_state));
const u8 tmp = svlastb(lane_pred_64, cur_state);
#else
const m512 succ_mask = masks[c]; const m512 succ_mask = masks[c];
cur_state = vpermb512(cur_state, succ_mask); cur_state = vpermb512(cur_state, succ_mask);
const u8 tmp = movd512(cur_state); const u8 tmp = movd512(cur_state);
#endif
DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?'); DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?');
DEBUG_PRINTF("s: %u (flag: %u)\n", tmp & SHENG64_STATE_MASK, DEBUG_PRINTF("s: %u (flag: %u)\n", tmp & SHENG64_STATE_MASK,
@ -214,7 +246,11 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
} }
cur_buf++; cur_buf++;
} }
#if defined(HAVE_SVE)
*state = svlastb(lane_pred_64, cur_state);
#else
*state = movd512(cur_state); *state = movd512(cur_state);
#endif
*scan_end = cur_buf; *scan_end = cur_buf;
return MO_CONTINUE_MATCHING; return MO_CONTINUE_MATCHING;
} }

View File

@ -283,7 +283,7 @@ char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s,
return MO_CONTINUE_MATCHING; return MO_CONTINUE_MATCHING;
} }
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
static really_inline static really_inline
char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt, char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
const struct sheng32 *s, const struct sheng32 *s,
@ -320,8 +320,15 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
return MO_CONTINUE_MATCHING; return MO_CONTINUE_MATCHING;
} }
#if defined(HAVE_SVE)
const svbool_t lane_pred_32 = svwhilelt_b8(0, 32);
svuint8_t cur_state = svdup_u8(*state);
svuint8_t tbl_mask = svdup_u8((unsigned char)0x1F);
const m512 *masks = s->succ_masks;
#else
m512 cur_state = set1_64x8(*state); m512 cur_state = set1_64x8(*state);
const m512 *masks = s->succ_masks; const m512 *masks = s->succ_masks;
#endif
while (likely(end - cur_buf >= 4)) { while (likely(end - cur_buf >= 4)) {
const u8 *b1 = cur_buf; const u8 *b1 = cur_buf;
@ -333,6 +340,23 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
const u8 c3 = *b3; const u8 c3 = *b3;
const u8 c4 = *b4; const u8 c4 = *b4;
#if defined(HAVE_SVE)
svuint8_t succ_mask1 = svld1(lane_pred_32, (const u8*)(masks+c1));
cur_state = svtbl(succ_mask1, svand_x(svptrue_b8(), tbl_mask, cur_state));
const u8 a1 = svlastb(lane_pred_32, cur_state);
svuint8_t succ_mask2 = svld1(lane_pred_32, (const u8*)(masks+c2));
cur_state = svtbl(succ_mask2, svand_x(svptrue_b8(), tbl_mask, cur_state));
const u8 a2 = svlastb(lane_pred_32, cur_state);
svuint8_t succ_mask3 = svld1(lane_pred_32, (const u8*)(masks+c3));
cur_state = svtbl(succ_mask3, svand_x(svptrue_b8(), tbl_mask, cur_state));
const u8 a3 = svlastb(lane_pred_32, cur_state);
svuint8_t succ_mask4 = svld1(lane_pred_32, (const u8*)(masks+c4));
cur_state = svtbl(succ_mask4, svand_x(svptrue_b8(), tbl_mask, cur_state));
const u8 a4 = svlastb(lane_pred_32, cur_state);
#else
const m512 succ_mask1 = masks[c1]; const m512 succ_mask1 = masks[c1];
cur_state = vpermb512(cur_state, succ_mask1); cur_state = vpermb512(cur_state, succ_mask1);
const u8 a1 = movd512(cur_state); const u8 a1 = movd512(cur_state);
@ -348,6 +372,7 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
const m512 succ_mask4 = masks[c4]; const m512 succ_mask4 = masks[c4];
cur_state = vpermb512(cur_state, succ_mask4); cur_state = vpermb512(cur_state, succ_mask4);
const u8 a4 = movd512(cur_state); const u8 a4 = movd512(cur_state);
#endif
DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?'); DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?');
DEBUG_PRINTF("s: %u (flag: %u)\n", a1 & SHENG32_STATE_MASK, DEBUG_PRINTF("s: %u (flag: %u)\n", a1 & SHENG32_STATE_MASK,
@ -517,7 +542,11 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
}; };
cur_buf += 4; cur_buf += 4;
} }
#if defined(HAVE_SVE)
*state = svlastb(lane_pred_32, cur_state);
#else
*state = movd512(cur_state); *state = movd512(cur_state);
#endif
*scan_end = cur_buf; *scan_end = cur_buf;
return MO_CONTINUE_MATCHING; return MO_CONTINUE_MATCHING;
} }
@ -541,9 +570,15 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
*scan_end = end; *scan_end = end;
return MO_CONTINUE_MATCHING; return MO_CONTINUE_MATCHING;
} }
#if defined(HAVE_SVE)
const svbool_t lane_pred_64 = svwhilelt_b8(0, 64);
svuint8_t cur_state = svdup_u8(*state);
svuint8_t tbl_mask = svdup_u8((unsigned char)0x3F);
const m512 *masks = s->succ_masks;
#else
m512 cur_state = set1_64x8(*state); m512 cur_state = set1_64x8(*state);
const m512 *masks = s->succ_masks; const m512 *masks = s->succ_masks;
#endif
while (likely(end - cur_buf >= 4)) { while (likely(end - cur_buf >= 4)) {
const u8 *b1 = cur_buf; const u8 *b1 = cur_buf;
@ -555,6 +590,23 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
const u8 c3 = *b3; const u8 c3 = *b3;
const u8 c4 = *b4; const u8 c4 = *b4;
#if defined(HAVE_SVE)
svuint8_t succ_mask1 = svld1(lane_pred_64, (const u8*)(masks+c1));
cur_state = svtbl(succ_mask1, svand_x(svptrue_b8(), tbl_mask, cur_state));
const u8 a1 = svlastb(lane_pred_64, cur_state);
svuint8_t succ_mask2 = svld1(lane_pred_64, (const u8*)(masks+c2));
cur_state = svtbl(succ_mask2, svand_x(svptrue_b8(), tbl_mask, cur_state));
const u8 a2 = svlastb(lane_pred_64, cur_state);
svuint8_t succ_mask3 = svld1(lane_pred_64, (const u8*)(masks+c3));
cur_state = svtbl(succ_mask3, svand_x(svptrue_b8(), tbl_mask, cur_state));
const u8 a3 = svlastb(lane_pred_64, cur_state);
svuint8_t succ_mask4 = svld1(lane_pred_64, (const u8*)(masks+c4));
cur_state = svtbl(succ_mask4, svand_x(svptrue_b8(), tbl_mask, cur_state));
const u8 a4 = svlastb(lane_pred_64, cur_state);
#else
const m512 succ_mask1 = masks[c1]; const m512 succ_mask1 = masks[c1];
cur_state = vpermb512(cur_state, succ_mask1); cur_state = vpermb512(cur_state, succ_mask1);
const u8 a1 = movd512(cur_state); const u8 a1 = movd512(cur_state);
@ -570,6 +622,7 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
const m512 succ_mask4 = masks[c4]; const m512 succ_mask4 = masks[c4];
cur_state = vpermb512(cur_state, succ_mask4); cur_state = vpermb512(cur_state, succ_mask4);
const u8 a4 = movd512(cur_state); const u8 a4 = movd512(cur_state);
#endif
DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?'); DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?');
DEBUG_PRINTF("s: %u (flag: %u)\n", a1 & SHENG64_STATE_MASK, DEBUG_PRINTF("s: %u (flag: %u)\n", a1 & SHENG64_STATE_MASK,
@ -703,7 +756,11 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
} }
cur_buf += 4; cur_buf += 4;
} }
#if defined(HAVE_SVE)
*state = svlastb(lane_pred_64, cur_state);
#else
*state = movd512(cur_state); *state = movd512(cur_state);
#endif
*scan_end = cur_buf; *scan_end = cur_buf;
return MO_CONTINUE_MATCHING; return MO_CONTINUE_MATCHING;
} }

View File

@ -730,10 +730,17 @@ bytecode_ptr<NFA> sheng32Compile(raw_dfa &raw, const CompileContext &cc,
return nullptr; return nullptr;
} }
#ifdef HAVE_SVE
if (svcntb()<32) {
DEBUG_PRINTF("Sheng32 failed, SVE width is too small!\n");
return nullptr;
}
#else
if (!cc.target_info.has_avx512vbmi()) { if (!cc.target_info.has_avx512vbmi()) {
DEBUG_PRINTF("Sheng32 failed, no HS_CPU_FEATURES_AVX512VBMI!\n"); DEBUG_PRINTF("Sheng32 failed, no HS_CPU_FEATURES_AVX512VBMI!\n");
return nullptr; return nullptr;
} }
#endif
sheng_build_strat strat(raw, rm, only_accel_init); sheng_build_strat strat(raw, rm, only_accel_init);
dfa_info info(strat); dfa_info info(strat);
@ -762,10 +769,17 @@ bytecode_ptr<NFA> sheng64Compile(raw_dfa &raw, const CompileContext &cc,
return nullptr; return nullptr;
} }
#ifdef HAVE_SVE
if (svcntb()<64) {
DEBUG_PRINTF("Sheng64 failed, SVE width is too small!\n");
return nullptr;
}
#else
if (!cc.target_info.has_avx512vbmi()) { if (!cc.target_info.has_avx512vbmi()) {
DEBUG_PRINTF("Sheng64 failed, no HS_CPU_FEATURES_AVX512VBMI!\n"); DEBUG_PRINTF("Sheng64 failed, no HS_CPU_FEATURES_AVX512VBMI!\n");
return nullptr; return nullptr;
} }
#endif
sheng_build_strat strat(raw, rm, only_accel_init); sheng_build_strat strat(raw, rm, only_accel_init);
dfa_info info(strat); dfa_info info(strat);