From f5412b3509082a3278fd95a3bb0247916d4c0823 Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Tue, 19 Mar 2024 11:40:23 +0200 Subject: [PATCH] Revert "RFC Enable sheng32/64 for SVE" --- src/nfa/sheng.c | 8 +- src/nfa/sheng.h | 8 +- src/nfa/sheng_defs.h | 70 +++---- src/nfa/sheng_impl.h | 127 ------------ src/nfa/sheng_impl4.h | 428 --------------------------------------- src/nfa/shengcompile.cpp | 14 -- 6 files changed, 43 insertions(+), 612 deletions(-) diff --git a/src/nfa/sheng.c b/src/nfa/sheng.c index 922e8f80..3f36e218 100644 --- a/src/nfa/sheng.c +++ b/src/nfa/sheng.c @@ -154,7 +154,7 @@ char fireReports(const struct sheng *sh, NfaCallback cb, void *ctxt, return MO_CONTINUE_MATCHING; /* continue execution */ } -#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) +#if defined(HAVE_AVX512VBMI) // Sheng32 static really_inline const struct sheng32 *get_sheng32(const struct NFA *n) { @@ -351,7 +351,7 @@ char fireReports64(const struct sheng64 *sh, NfaCallback cb, void *ctxt, } return MO_CONTINUE_MATCHING; /* continue execution */ } -#endif // end of HAVE_AVX512VBMI || HAVE_SVE +#endif // end of HAVE_AVX512VBMI /* include Sheng function definitions */ #include "sheng_defs.h" @@ -871,7 +871,7 @@ char nfaExecSheng_expandState(UNUSED const struct NFA *nfa, void *dest, return 0; } -#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) +#if defined(HAVE_AVX512VBMI) // Sheng32 static really_inline char runSheng32Cb(const struct sheng32 *sh, NfaCallback cb, void *ctxt, @@ -1874,4 +1874,4 @@ char nfaExecSheng64_expandState(UNUSED const struct NFA *nfa, void *dest, *(u8 *)dest = *(const u8 *)src; return 0; } -#endif // end of HAVE_AVX512VBMI || HAVE_SVE +#endif // end of HAVE_AVX512VBMI diff --git a/src/nfa/sheng.h b/src/nfa/sheng.h index 212bd3a4..7b90e303 100644 --- a/src/nfa/sheng.h +++ b/src/nfa/sheng.h @@ -58,7 +58,7 @@ char nfaExecSheng_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecSheng_B(const struct NFA *n, u64a offset, const u8 *buffer, size_t length, NfaCallback cb, void *context); -#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) +#if defined(HAVE_AVX512VBMI) #define nfaExecSheng32_B_Reverse NFA_API_NO_IMPL #define nfaExecSheng32_zombie_status NFA_API_ZOMBIE_NO_IMPL @@ -106,7 +106,8 @@ char nfaExecSheng64_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecSheng64_B(const struct NFA *n, u64a offset, const u8 *buffer, size_t length, NfaCallback cb, void *context); -#else // !HAVE_AVX512VBMI && !HAVE_SVE + +#else // !HAVE_AVX512VBMI #define nfaExecSheng32_B_Reverse NFA_API_NO_IMPL #define nfaExecSheng32_zombie_status NFA_API_ZOMBIE_NO_IMPL @@ -137,7 +138,6 @@ char nfaExecSheng64_B(const struct NFA *n, u64a offset, const u8 *buffer, #define nfaExecSheng64_testEOD NFA_API_NO_IMPL #define nfaExecSheng64_reportCurrent NFA_API_NO_IMPL #define nfaExecSheng64_B NFA_API_NO_IMPL -#endif // end of HAVE_AVX512VBMI || defined(HAVE_SVE) - +#endif // end of HAVE_AVX512VBMI #endif /* SHENG_H_ */ diff --git a/src/nfa/sheng_defs.h b/src/nfa/sheng_defs.h index 886af28e..390af752 100644 --- a/src/nfa/sheng_defs.h +++ b/src/nfa/sheng_defs.h @@ -52,7 +52,7 @@ u8 hasInterestingStates(const u8 a, const u8 b, const u8 c, const u8 d) { return (a | b | c | d) & (SHENG_STATE_FLAG_MASK); } -#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) +#if defined(HAVE_AVX512VBMI) static really_inline u8 isDeadState32(const u8 a) { return a & SHENG32_STATE_DEAD; @@ -108,7 +108,7 @@ u8 dummyFunc(UNUSED const u8 a) { #define SHENG_IMPL sheng_cod #define DEAD_FUNC isDeadState #define ACCEPT_FUNC isAcceptState -#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) +#if defined(HAVE_AVX512VBMI) #define SHENG32_IMPL sheng32_cod #define DEAD_FUNC32 isDeadState32 #define ACCEPT_FUNC32 isAcceptState32 @@ -121,7 +121,7 @@ u8 dummyFunc(UNUSED const u8 a) { #undef SHENG_IMPL #undef DEAD_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) +#if defined(HAVE_AVX512VBMI) #undef SHENG32_IMPL #undef DEAD_FUNC32 #undef ACCEPT_FUNC32 @@ -135,7 +135,7 @@ u8 dummyFunc(UNUSED const u8 a) { #define SHENG_IMPL sheng_co #define DEAD_FUNC dummyFunc #define ACCEPT_FUNC isAcceptState -#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) +#if defined(HAVE_AVX512VBMI) #define SHENG32_IMPL sheng32_co #define DEAD_FUNC32 dummyFunc #define ACCEPT_FUNC32 isAcceptState32 @@ -148,7 +148,7 @@ u8 dummyFunc(UNUSED const u8 a) { #undef SHENG_IMPL #undef DEAD_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) +#if defined(HAVE_AVX512VBMI) #undef SHENG32_IMPL #undef DEAD_FUNC32 #undef ACCEPT_FUNC32 @@ -162,7 +162,7 @@ u8 dummyFunc(UNUSED const u8 a) { #define SHENG_IMPL sheng_samd #define DEAD_FUNC isDeadState #define ACCEPT_FUNC isAcceptState -#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) +#if defined(HAVE_AVX512VBMI) #define SHENG32_IMPL sheng32_samd #define DEAD_FUNC32 isDeadState32 #define ACCEPT_FUNC32 isAcceptState32 @@ -175,7 +175,7 @@ u8 dummyFunc(UNUSED const u8 a) { #undef SHENG_IMPL #undef DEAD_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) +#if defined(HAVE_AVX512VBMI) #undef SHENG32_IMPL #undef DEAD_FUNC32 #undef ACCEPT_FUNC32 @@ -189,7 +189,7 @@ u8 dummyFunc(UNUSED const u8 a) { #define SHENG_IMPL sheng_sam #define DEAD_FUNC dummyFunc #define ACCEPT_FUNC isAcceptState -#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) +#if defined(HAVE_AVX512VBMI) #define SHENG32_IMPL sheng32_sam #define DEAD_FUNC32 dummyFunc #define ACCEPT_FUNC32 isAcceptState32 @@ -202,7 +202,7 @@ u8 dummyFunc(UNUSED const u8 a) { #undef SHENG_IMPL #undef DEAD_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) +#if defined(HAVE_AVX512VBMI) #undef SHENG32_IMPL #undef DEAD_FUNC32 #undef ACCEPT_FUNC32 @@ -216,7 +216,7 @@ u8 dummyFunc(UNUSED const u8 a) { #define SHENG_IMPL sheng_nmd #define DEAD_FUNC isDeadState #define ACCEPT_FUNC dummyFunc -#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) +#if defined(HAVE_AVX512VBMI) #define SHENG32_IMPL sheng32_nmd #define DEAD_FUNC32 isDeadState32 #define ACCEPT_FUNC32 dummyFunc @@ -229,7 +229,7 @@ u8 dummyFunc(UNUSED const u8 a) { #undef SHENG_IMPL #undef DEAD_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) +#if defined(HAVE_AVX512VBMI) #undef SHENG32_IMPL #undef DEAD_FUNC32 #undef ACCEPT_FUNC32 @@ -243,7 +243,7 @@ u8 dummyFunc(UNUSED const u8 a) { #define SHENG_IMPL sheng_nm #define DEAD_FUNC dummyFunc #define ACCEPT_FUNC dummyFunc -#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) +#if defined(HAVE_AVX512VBMI) #define SHENG32_IMPL sheng32_nm #define DEAD_FUNC32 dummyFunc #define ACCEPT_FUNC32 dummyFunc @@ -256,7 +256,7 @@ u8 dummyFunc(UNUSED const u8 a) { #undef SHENG_IMPL #undef DEAD_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) +#if defined(HAVE_AVX512VBMI) #undef SHENG32_IMPL #undef DEAD_FUNC32 #undef ACCEPT_FUNC32 @@ -277,7 +277,7 @@ u8 dummyFunc(UNUSED const u8 a) { #define INNER_ACCEL_FUNC isAccelState #define OUTER_ACCEL_FUNC dummyFunc #define ACCEPT_FUNC isAcceptState -#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) +#if defined(HAVE_AVX512VBMI) #define SHENG32_IMPL sheng32_4_coda #define INTERESTING_FUNC32 hasInterestingStates32 #define INNER_DEAD_FUNC32 isDeadState32 @@ -296,7 +296,7 @@ u8 dummyFunc(UNUSED const u8 a) { #undef INNER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) +#if defined(HAVE_AVX512VBMI) #undef SHENG32_IMPL #undef INTERESTING_FUNC32 #undef INNER_DEAD_FUNC32 @@ -316,7 +316,7 @@ u8 dummyFunc(UNUSED const u8 a) { #define INNER_ACCEL_FUNC dummyFunc #define OUTER_ACCEL_FUNC dummyFunc #define ACCEPT_FUNC isAcceptState -#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) +#if defined(HAVE_AVX512VBMI) #define SHENG32_IMPL sheng32_4_cod #define INTERESTING_FUNC32 hasInterestingStates32 #define INNER_DEAD_FUNC32 isDeadState32 @@ -339,7 +339,7 @@ u8 dummyFunc(UNUSED const u8 a) { #undef INNER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) +#if defined(HAVE_AVX512VBMI) #undef SHENG32_IMPL #undef INTERESTING_FUNC32 #undef INNER_DEAD_FUNC32 @@ -363,7 +363,7 @@ u8 dummyFunc(UNUSED const u8 a) { #define INNER_ACCEL_FUNC isAccelState #define OUTER_ACCEL_FUNC dummyFunc #define ACCEPT_FUNC isAcceptState -#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) +#if defined(HAVE_AVX512VBMI) #define SHENG32_IMPL sheng32_4_coa #define INTERESTING_FUNC32 hasInterestingStates32 #define INNER_DEAD_FUNC32 dummyFunc @@ -382,7 +382,7 @@ u8 dummyFunc(UNUSED const u8 a) { #undef INNER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) +#if defined(HAVE_AVX512VBMI) #undef SHENG32_IMPL #undef INTERESTING_FUNC32 #undef INNER_DEAD_FUNC32 @@ -402,7 +402,7 @@ u8 dummyFunc(UNUSED const u8 a) { #define INNER_ACCEL_FUNC dummyFunc #define OUTER_ACCEL_FUNC dummyFunc #define ACCEPT_FUNC isAcceptState -#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) +#if defined(HAVE_AVX512VBMI) #define SHENG32_IMPL sheng32_4_co #define INTERESTING_FUNC32 hasInterestingStates32 #define INNER_DEAD_FUNC32 dummyFunc @@ -425,7 +425,7 @@ u8 dummyFunc(UNUSED const u8 a) { #undef INNER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) +#if defined(HAVE_AVX512VBMI) #undef SHENG32_IMPL #undef INTERESTING_FUNC32 #undef INNER_DEAD_FUNC32 @@ -449,7 +449,7 @@ u8 dummyFunc(UNUSED const u8 a) { #define INNER_ACCEL_FUNC isAccelState #define OUTER_ACCEL_FUNC dummyFunc #define ACCEPT_FUNC isAcceptState -#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) +#if defined(HAVE_AVX512VBMI) #define SHENG32_IMPL sheng32_4_samda #define INTERESTING_FUNC32 hasInterestingStates32 #define INNER_DEAD_FUNC32 isDeadState32 @@ -468,7 +468,7 @@ u8 dummyFunc(UNUSED const u8 a) { #undef INNER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) +#if defined(HAVE_AVX512VBMI) #undef SHENG32_IMPL #undef INTERESTING_FUNC32 #undef INNER_DEAD_FUNC32 @@ -488,7 +488,7 @@ u8 dummyFunc(UNUSED const u8 a) { #define INNER_ACCEL_FUNC dummyFunc #define OUTER_ACCEL_FUNC dummyFunc #define ACCEPT_FUNC isAcceptState -#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) +#if defined(HAVE_AVX512VBMI) #define SHENG32_IMPL sheng32_4_samd #define INTERESTING_FUNC32 hasInterestingStates32 #define INNER_DEAD_FUNC32 isDeadState32 @@ -511,7 +511,7 @@ u8 dummyFunc(UNUSED const u8 a) { #undef INNER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) +#if defined(HAVE_AVX512VBMI) #undef SHENG32_IMPL #undef INTERESTING_FUNC32 #undef INNER_DEAD_FUNC32 @@ -535,7 +535,7 @@ u8 dummyFunc(UNUSED const u8 a) { #define INNER_ACCEL_FUNC isAccelState #define OUTER_ACCEL_FUNC dummyFunc #define ACCEPT_FUNC isAcceptState -#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) +#if defined(HAVE_AVX512VBMI) #define SHENG32_IMPL sheng32_4_sama #define INTERESTING_FUNC32 hasInterestingStates32 #define INNER_DEAD_FUNC32 dummyFunc @@ -554,7 +554,7 @@ u8 dummyFunc(UNUSED const u8 a) { #undef INNER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) +#if defined(HAVE_AVX512VBMI) #undef SHENG32_IMPL #undef INTERESTING_FUNC32 #undef INNER_DEAD_FUNC32 @@ -574,7 +574,7 @@ u8 dummyFunc(UNUSED const u8 a) { #define INNER_ACCEL_FUNC dummyFunc #define OUTER_ACCEL_FUNC dummyFunc #define ACCEPT_FUNC isAcceptState -#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) +#if defined(HAVE_AVX512VBMI) #define SHENG32_IMPL sheng32_4_sam #define INTERESTING_FUNC32 hasInterestingStates32 #define INNER_DEAD_FUNC32 dummyFunc @@ -597,7 +597,7 @@ u8 dummyFunc(UNUSED const u8 a) { #undef INNER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) +#if defined(HAVE_AVX512VBMI) #undef SHENG32_IMPL #undef INTERESTING_FUNC32 #undef INNER_DEAD_FUNC32 @@ -623,7 +623,7 @@ u8 dummyFunc(UNUSED const u8 a) { #define INNER_ACCEL_FUNC dummyFunc #define OUTER_ACCEL_FUNC isAccelState #define ACCEPT_FUNC dummyFunc -#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) +#if defined(HAVE_AVX512VBMI) #define SHENG32_IMPL sheng32_4_nmda #define INTERESTING_FUNC32 dummyFunc4 #define INNER_DEAD_FUNC32 dummyFunc @@ -642,7 +642,7 @@ u8 dummyFunc(UNUSED const u8 a) { #undef INNER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) +#if defined(HAVE_AVX512VBMI) #undef SHENG32_IMPL #undef INTERESTING_FUNC32 #undef INNER_DEAD_FUNC32 @@ -662,7 +662,7 @@ u8 dummyFunc(UNUSED const u8 a) { #define INNER_ACCEL_FUNC dummyFunc #define OUTER_ACCEL_FUNC dummyFunc #define ACCEPT_FUNC dummyFunc -#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) +#if defined(HAVE_AVX512VBMI) #define SHENG32_IMPL sheng32_4_nmd #define INTERESTING_FUNC32 dummyFunc4 #define INNER_DEAD_FUNC32 dummyFunc @@ -685,7 +685,7 @@ u8 dummyFunc(UNUSED const u8 a) { #undef INNER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) +#if defined(HAVE_AVX512VBMI) #undef SHENG32_IMPL #undef INTERESTING_FUNC32 #undef INNER_DEAD_FUNC32 @@ -712,7 +712,7 @@ u8 dummyFunc(UNUSED const u8 a) { #define INNER_ACCEL_FUNC dummyFunc #define OUTER_ACCEL_FUNC dummyFunc #define ACCEPT_FUNC dummyFunc -#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) +#if defined(HAVE_AVX512VBMI) #define SHENG32_IMPL sheng32_4_nm #define INTERESTING_FUNC32 dummyFunc4 #define INNER_DEAD_FUNC32 dummyFunc @@ -735,7 +735,7 @@ u8 dummyFunc(UNUSED const u8 a) { #undef INNER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) +#if defined(HAVE_AVX512VBMI) #undef SHENG32_IMPL #undef INTERESTING_FUNC32 #undef INNER_DEAD_FUNC32 diff --git a/src/nfa/sheng_impl.h b/src/nfa/sheng_impl.h index 9634fa65..1fa5c831 100644 --- a/src/nfa/sheng_impl.h +++ b/src/nfa/sheng_impl.h @@ -96,133 +96,6 @@ char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s, return MO_CONTINUE_MATCHING; } -#if defined(HAVE_SVE) - -static really_inline -char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt, - const struct sheng32 *s, - u8 *const cached_accept_state, - ReportID *const cached_accept_id, - u8 single, u64a base_offset, const u8 *buf, const u8 *start, - const u8 *end, const u8 **scan_end) { - DEBUG_PRINTF("Starting DFA execution in state %u\n", - *state & SHENG32_STATE_MASK); - const u8 *cur_buf = start; - if (DEAD_FUNC32(*state)) { - DEBUG_PRINTF("Dead on arrival\n"); - *scan_end = end; - return MO_CONTINUE_MATCHING; - } - DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start)); - - const svbool_t lane_pred_32 = svwhilelt_b8(0, 32); - svuint8_t cur_state = svld1(lane_pred_32, state); - const m512 *masks = s->succ_masks; - - while (likely(cur_buf != end)) { - const u8 c = *cur_buf; - svuint8_t succ_mask = svld1(lane_pred_32, (const u8*)(masks + c)); - cur_state = svtbl(cur_state, succ_mask); - const u8 tmp = svlastb(lane_pred_32, cur_state); - - DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?'); - DEBUG_PRINTF("s: %u (flag: %u)\n", tmp & SHENG32_STATE_MASK, - tmp & SHENG32_STATE_FLAG_MASK); - - if (unlikely(ACCEPT_FUNC32(tmp))) { - DEBUG_PRINTF("Accept state %u reached\n", tmp & SHENG32_STATE_MASK); - u64a match_offset = base_offset + (cur_buf - buf) + 1; - DEBUG_PRINTF("Match @ %llu\n", match_offset); - if (STOP_AT_MATCH) { - DEBUG_PRINTF("Stopping at match @ %lli\n", - (u64a)(cur_buf - start)); - *state = tmp; - *scan_end = cur_buf; - return MO_MATCHES_PENDING; - } - if (single) { - if (fireSingleReport(cb, ctxt, s->report, match_offset) == - MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } else { - if (fireReports32(s, cb, ctxt, tmp, match_offset, - cached_accept_state, cached_accept_id, - 0) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } - } - cur_buf++; - } - *state = svlastb(lane_pred_32, cur_state); - *scan_end = cur_buf; - return MO_CONTINUE_MATCHING; -} - -static really_inline -char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt, - const struct sheng64 *s, - u8 *const cached_accept_state, - ReportID *const cached_accept_id, - u8 single, u64a base_offset, const u8 *buf, const u8 *start, - const u8 *end, const u8 **scan_end) { - DEBUG_PRINTF("Starting DFA execution in state %u\n", - *state & SHENG64_STATE_MASK); - const u8 *cur_buf = start; - if (DEAD_FUNC64(*state)) { - DEBUG_PRINTF("Dead on arrival\n"); - *scan_end = end; - return MO_CONTINUE_MATCHING; - } - DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start)); - - const svbool_t lane_pred_64 = svwhilelt_b8(0, 64); - svuint8_t cur_state = svld1(lane_pred_64, state); - const m512 *masks = s->succ_masks; - - while (likely(cur_buf != end)) { - const u8 c = *cur_buf; - svuint8_t succ_mask = svld1(lane_pred_64, (const u8*)(masks + c)); - cur_state = svtbl(cur_state, succ_mask); - const u8 tmp = svlastb(lane_pred_64, cur_state); - - DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?'); - DEBUG_PRINTF("s: %u (flag: %u)\n", tmp & SHENG64_STATE_MASK, - tmp & SHENG64_STATE_FLAG_MASK); - - if (unlikely(ACCEPT_FUNC64(tmp))) { - DEBUG_PRINTF("Accept state %u reached\n", tmp & SHENG64_STATE_MASK); - u64a match_offset = base_offset + (cur_buf - buf) + 1; - DEBUG_PRINTF("Match @ %llu\n", match_offset); - if (STOP_AT_MATCH) { - DEBUG_PRINTF("Stopping at match @ %lli\n", - (u64a)(cur_buf - start)); - *state = tmp; - *scan_end = cur_buf; - return MO_MATCHES_PENDING; - } - if (single) { - if (fireSingleReport(cb, ctxt, s->report, match_offset) == - MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } else { - if (fireReports64(s, cb, ctxt, tmp, match_offset, - cached_accept_state, cached_accept_id, - 0) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } - } - cur_buf++; - } - *state = svlastb(lane_pred_64, cur_state); - *scan_end = cur_buf; - return MO_CONTINUE_MATCHING; -} -#endif - #if defined(HAVE_AVX512VBMI) static really_inline char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt, diff --git a/src/nfa/sheng_impl4.h b/src/nfa/sheng_impl4.h index 10ad4ea0..e5d3468f 100644 --- a/src/nfa/sheng_impl4.h +++ b/src/nfa/sheng_impl4.h @@ -283,434 +283,6 @@ char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s, return MO_CONTINUE_MATCHING; } -#if defined(HAVE_SVE) -static really_inline -char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt, - const struct sheng32 *s, - u8 *const cached_accept_state, - ReportID *const cached_accept_id, - u8 single, u64a base_offset, const u8 *buf, const u8 *start, - const u8 *end, const u8 **scan_end) { - DEBUG_PRINTF("Starting DFAx4 execution in state %u\n", - *state & SHENG32_STATE_MASK); - const u8 *cur_buf = start; - const u8 *min_accel_dist = start; - base_offset++; - DEBUG_PRINTF("Scanning %llu bytes\n", (u64a)(end - start)); - - if (INNER_ACCEL_FUNC32(*state) || OUTER_ACCEL_FUNC32(*state)) { - DEBUG_PRINTF("Accel state reached @ 0\n"); - const union AccelAux *aaux = - get_accel32(s, *state & SHENG32_STATE_MASK); - const u8 *new_offset = run_accel(aaux, cur_buf, end); - if (new_offset < cur_buf + BAD_ACCEL_DIST) { - min_accel_dist = new_offset + BIG_ACCEL_PENALTY; - } else { - min_accel_dist = new_offset + SMALL_ACCEL_PENALTY; - } - DEBUG_PRINTF("Next accel chance: %llu\n", - (u64a)(min_accel_dist - start)); - DEBUG_PRINTF("Accel scanned %zu bytes\n", new_offset - cur_buf); - cur_buf = new_offset; - DEBUG_PRINTF("New offset: %lli\n", (s64a)(cur_buf - start)); - } - if (INNER_DEAD_FUNC32(*state) || OUTER_DEAD_FUNC32(*state)) { - DEBUG_PRINTF("Dead on arrival\n"); - *scan_end = end; - return MO_CONTINUE_MATCHING; - } - - const svbool_t lane_pred_32 = svwhilelt_b8(0, 32); - svuint8_t cur_state = svld1(lane_pred_32, state); - const m512 *masks = s->succ_masks; - - while (likely(end - cur_buf >= 4)) { - const u8 *b1 = cur_buf; - const u8 *b2 = cur_buf + 1; - const u8 *b3 = cur_buf + 2; - const u8 *b4 = cur_buf + 3; - const u8 c1 = *b1; - const u8 c2 = *b2; - const u8 c3 = *b3; - const u8 c4 = *b4; - svuint8_t succ_mask1 = svld1(lane_pred_32, (const u8*)(masks+c1)); - cur_state = svtbl(cur_state, succ_mask1); - const u8 a1 = svlastb(lane_pred_32, cur_state); - - svuint8_t succ_mask2 = svld1(lane_pred_32, (const u8*)(masks+c2)); - cur_state = svtbl(cur_state, succ_mask2); - const u8 a2 = svlastb(lane_pred_32, cur_state); - - svuint8_t succ_mask3 = svld1(lane_pred_32, (const u8*)(masks+c3)); - cur_state = svtbl(cur_state, succ_mask3); - const u8 a3 = svlastb(lane_pred_32, cur_state); - - svuint8_t succ_mask4 = svld1(lane_pred_32, (const u8*)(masks+c4)); - cur_state = svtbl(cur_state, succ_mask4); - const u8 a4 = svlastb(lane_pred_32, cur_state); - - DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?'); - DEBUG_PRINTF("s: %u (flag: %u)\n", a1 & SHENG32_STATE_MASK, - a1 & SHENG32_STATE_FLAG_MASK); - - DEBUG_PRINTF("c: %02hhx '%c'\n", c2, ourisprint(c2) ? c2 : '?'); - DEBUG_PRINTF("s: %u (flag: %u)\n", a2 & SHENG32_STATE_MASK, - a2 & SHENG32_STATE_FLAG_MASK); - - DEBUG_PRINTF("c: %02hhx '%c'\n", c3, ourisprint(c3) ? c3 : '?'); - DEBUG_PRINTF("s: %u (flag: %u)\n", a3 & SHENG32_STATE_MASK, - a3 & SHENG32_STATE_FLAG_MASK); - - DEBUG_PRINTF("c: %02hhx '%c'\n", c4, ourisprint(c4) ? c4 : '?'); - DEBUG_PRINTF("s: %u (flag: %u)\n", a4 & SHENG32_STATE_MASK, - a4 & SHENG32_STATE_FLAG_MASK); - - if (unlikely(INTERESTING_FUNC32(a1, a2, a3, a4))) { - if (ACCEPT_FUNC32(a1)) { - u64a match_offset = base_offset + b1 - buf; - DEBUG_PRINTF("Accept state %u reached\n", - a1 & SHENG32_STATE_MASK); - DEBUG_PRINTF("Match @ %llu\n", match_offset); - if (STOP_AT_MATCH) { - DEBUG_PRINTF("Stopping at match @ %lli\n", - (s64a)(b1 - start)); - *scan_end = b1; - *state = a1; - return MO_MATCHES_PENDING; - } - if (single) { - if (fireSingleReport(cb, ctxt, s->report, match_offset) == - MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } else { - if (fireReports32(s, cb, ctxt, a1, match_offset, - cached_accept_state, cached_accept_id, - 0) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } - } - if (ACCEPT_FUNC32(a2)) { - u64a match_offset = base_offset + b2 - buf; - DEBUG_PRINTF("Accept state %u reached\n", - a2 & SHENG32_STATE_MASK); - DEBUG_PRINTF("Match @ %llu\n", match_offset); - if (STOP_AT_MATCH) { - DEBUG_PRINTF("Stopping at match @ %lli\n", - (s64a)(b2 - start)); - *scan_end = b2; - *state = a2; - return MO_MATCHES_PENDING; - } - if (single) { - if (fireSingleReport(cb, ctxt, s->report, match_offset) == - MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } else { - if (fireReports32(s, cb, ctxt, a2, match_offset, - cached_accept_state, cached_accept_id, - 0) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } - } - if (ACCEPT_FUNC32(a3)) { - u64a match_offset = base_offset + b3 - buf; - DEBUG_PRINTF("Accept state %u reached\n", - a3 & SHENG32_STATE_MASK); - DEBUG_PRINTF("Match @ %llu\n", match_offset); - if (STOP_AT_MATCH) { - DEBUG_PRINTF("Stopping at match @ %lli\n", - (s64a)(b3 - start)); - *scan_end = b3; - *state = a3; - return MO_MATCHES_PENDING; - } - if (single) { - if (fireSingleReport(cb, ctxt, s->report, match_offset) == - MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } else { - if (fireReports32(s, cb, ctxt, a3, match_offset, - cached_accept_state, cached_accept_id, - 0) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } - } - if (ACCEPT_FUNC32(a4)) { - u64a match_offset = base_offset + b4 - buf; - DEBUG_PRINTF("Accept state %u reached\n", - a4 & SHENG32_STATE_MASK); - DEBUG_PRINTF("Match @ %llu\n", match_offset); - if (STOP_AT_MATCH) { - DEBUG_PRINTF("Stopping at match @ %lli\n", - (s64a)(b4 - start)); - *scan_end = b4; - *state = a4; - return MO_MATCHES_PENDING; - } - if (single) { - if (fireSingleReport(cb, ctxt, s->report, match_offset) == - MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } else { - if (fireReports32(s, cb, ctxt, a4, match_offset, - cached_accept_state, cached_accept_id, - 0) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } - } - if (INNER_DEAD_FUNC32(a4)) { - DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(b4 - buf)); - *scan_end = end; - *state = a4; - return MO_CONTINUE_MATCHING; - } - if (cur_buf > min_accel_dist && INNER_ACCEL_FUNC32(a4)) { - DEBUG_PRINTF("Accel state reached @ %lli\n", (s64a)(b4 - buf)); - const union AccelAux *aaux = - get_accel32(s, a4 & SHENG32_STATE_MASK); - const u8 *new_offset = run_accel(aaux, cur_buf + 4, end); - if (new_offset < cur_buf + 4 + BAD_ACCEL_DIST) { - min_accel_dist = new_offset + BIG_ACCEL_PENALTY; - } else { - min_accel_dist = new_offset + SMALL_ACCEL_PENALTY; - } - DEBUG_PRINTF("Next accel chance: %llu\n", - (u64a)(min_accel_dist - start)); - DEBUG_PRINTF("Accel scanned %llu bytes\n", - (u64a)(new_offset - cur_buf - 4)); - cur_buf = new_offset; - DEBUG_PRINTF("New offset: %llu\n", (u64a)(cur_buf - buf)); - continue; - } - } - if (OUTER_DEAD_FUNC32(a4)) { - DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(cur_buf - buf)); - *scan_end = end; - *state = a4; - return MO_CONTINUE_MATCHING; - }; - if (cur_buf > min_accel_dist && OUTER_ACCEL_FUNC32(a4)) { - DEBUG_PRINTF("Accel state reached @ %lli\n", (s64a)(b4 - buf)); - const union AccelAux *aaux = - get_accel32(s, a4 & SHENG32_STATE_MASK); - const u8 *new_offset = run_accel(aaux, cur_buf + 4, end); - if (new_offset < cur_buf + 4 + BAD_ACCEL_DIST) { - min_accel_dist = new_offset + BIG_ACCEL_PENALTY; - } else { - min_accel_dist = new_offset + SMALL_ACCEL_PENALTY; - } - DEBUG_PRINTF("Next accel chance: %llu\n", - (u64a)(min_accel_dist - start)); - DEBUG_PRINTF("Accel scanned %llu bytes\n", - (u64a)(new_offset - cur_buf - 4)); - cur_buf = new_offset; - DEBUG_PRINTF("New offset: %llu\n", (u64a)(cur_buf - buf)); - continue; - }; - cur_buf += 4; - } - *state = svlastb(lane_pred_32, cur_state); - *scan_end = cur_buf; - return MO_CONTINUE_MATCHING; -} - -#if !defined(NO_SHENG64_IMPL) -static really_inline -char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt, - const struct sheng64 *s, - u8 *const cached_accept_state, - ReportID *const cached_accept_id, - u8 single, u64a base_offset, const u8 *buf, const u8 *start, - const u8 *end, const u8 **scan_end) { - DEBUG_PRINTF("Starting DFAx4 execution in state %u\n", - *state & SHENG64_STATE_MASK); - const u8 *cur_buf = start; - base_offset++; - DEBUG_PRINTF("Scanning %llu bytes\n", (u64a)(end - start)); - - if (INNER_DEAD_FUNC64(*state) || OUTER_DEAD_FUNC64(*state)) { - DEBUG_PRINTF("Dead on arrival\n"); - *scan_end = end; - return MO_CONTINUE_MATCHING; - } - - const svbool_t lane_pred_64 = svwhilelt_b8(0, 64); - svuint8_t cur_state = svld1(lane_pred_64, state); - const m512 *masks = s->succ_masks; - - while (likely(end - cur_buf >= 4)) { - const u8 *b1 = cur_buf; - const u8 *b2 = cur_buf + 1; - const u8 *b3 = cur_buf + 2; - const u8 *b4 = cur_buf + 3; - const u8 c1 = *b1; - const u8 c2 = *b2; - const u8 c3 = *b3; - const u8 c4 = *b4; - - svuint8_t succ_mask1 = svld1(lane_pred_64, (const u8*)(masks+c1)); - cur_state = svtbl(cur_state, succ_mask1); - const u8 a1 = svlastb(lane_pred_64, cur_state); - - svuint8_t succ_mask2 = svld1(lane_pred_64, (const u8*)(masks+c2)); - cur_state = svtbl(cur_state, succ_mask2); - const u8 a2 = svlastb(lane_pred_64, cur_state); - - svuint8_t succ_mask3 = svld1(lane_pred_64, (const u8*)(masks+c3)); - cur_state = svtbl(cur_state, succ_mask3); - const u8 a3 = svlastb(lane_pred_64, cur_state); - - svuint8_t succ_mask4 = svld1(lane_pred_64, (const u8*)(masks+c4)); - cur_state = svtbl(cur_state, succ_mask4); - const u8 a4 = svlastb(lane_pred_64, cur_state); - - DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?'); - DEBUG_PRINTF("s: %u (flag: %u)\n", a1 & SHENG64_STATE_MASK, - a1 & SHENG64_STATE_FLAG_MASK); - - DEBUG_PRINTF("c: %02hhx '%c'\n", c2, ourisprint(c2) ? c2 : '?'); - DEBUG_PRINTF("s: %u (flag: %u)\n", a2 & SHENG64_STATE_MASK, - a2 & SHENG64_STATE_FLAG_MASK); - - DEBUG_PRINTF("c: %02hhx '%c'\n", c3, ourisprint(c3) ? c3 : '?'); - DEBUG_PRINTF("s: %u (flag: %u)\n", a3 & SHENG64_STATE_MASK, - a3 & SHENG64_STATE_FLAG_MASK); - - DEBUG_PRINTF("c: %02hhx '%c'\n", c4, ourisprint(c4) ? c4 : '?'); - DEBUG_PRINTF("s: %u (flag: %u)\n", a4 & SHENG64_STATE_MASK, - a4 & SHENG64_STATE_FLAG_MASK); - - if (unlikely(INTERESTING_FUNC64(a1, a2, a3, a4))) { - if (ACCEPT_FUNC64(a1)) { - u64a match_offset = base_offset + b1 - buf; - DEBUG_PRINTF("Accept state %u reached\n", - a1 & SHENG64_STATE_MASK); - DEBUG_PRINTF("Match @ %llu\n", match_offset); - if (STOP_AT_MATCH) { - DEBUG_PRINTF("Stopping at match @ %lli\n", - (s64a)(b1 - start)); - *scan_end = b1; - *state = a1; - return MO_MATCHES_PENDING; - } - if (single) { - if (fireSingleReport(cb, ctxt, s->report, match_offset) == - MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } else { - if (fireReports64(s, cb, ctxt, a1, match_offset, - cached_accept_state, cached_accept_id, - 0) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } - } - if (ACCEPT_FUNC64(a2)) { - u64a match_offset = base_offset + b2 - buf; - DEBUG_PRINTF("Accept state %u reached\n", - a2 & SHENG64_STATE_MASK); - DEBUG_PRINTF("Match @ %llu\n", match_offset); - if (STOP_AT_MATCH) { - DEBUG_PRINTF("Stopping at match @ %lli\n", - (s64a)(b2 - start)); - *scan_end = b2; - *state = a2; - return MO_MATCHES_PENDING; - } - if (single) { - if (fireSingleReport(cb, ctxt, s->report, match_offset) == - MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } else { - if (fireReports64(s, cb, ctxt, a2, match_offset, - cached_accept_state, cached_accept_id, - 0) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } - } - if (ACCEPT_FUNC64(a3)) { - u64a match_offset = base_offset + b3 - buf; - DEBUG_PRINTF("Accept state %u reached\n", - a3 & SHENG64_STATE_MASK); - DEBUG_PRINTF("Match @ %llu\n", match_offset); - if (STOP_AT_MATCH) { - DEBUG_PRINTF("Stopping at match @ %lli\n", - (s64a)(b3 - start)); - *scan_end = b3; - *state = a3; - return MO_MATCHES_PENDING; - } - if (single) { - if (fireSingleReport(cb, ctxt, s->report, match_offset) == - MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } else { - if (fireReports64(s, cb, ctxt, a3, match_offset, - cached_accept_state, cached_accept_id, - 0) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } - } - if (ACCEPT_FUNC64(a4)) { - u64a match_offset = base_offset + b4 - buf; - DEBUG_PRINTF("Accept state %u reached\n", - a4 & SHENG64_STATE_MASK); - DEBUG_PRINTF("Match @ %llu\n", match_offset); - if (STOP_AT_MATCH) { - DEBUG_PRINTF("Stopping at match @ %lli\n", - (s64a)(b4 - start)); - *scan_end = b4; - *state = a4; - return MO_MATCHES_PENDING; - } - if (single) { - if (fireSingleReport(cb, ctxt, s->report, match_offset) == - MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } else { - if (fireReports64(s, cb, ctxt, a4, match_offset, - cached_accept_state, cached_accept_id, - 0) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } - } - if (INNER_DEAD_FUNC64(a4)) { - DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(b4 - buf)); - *scan_end = end; - *state = a4; - return MO_CONTINUE_MATCHING; - } - } - if (OUTER_DEAD_FUNC64(a4)) { - DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(cur_buf - buf)); - *scan_end = end; - *state = a4; - return MO_CONTINUE_MATCHING; - } - cur_buf += 4; - } - *state = svlastb(lane_pred_64, cur_state); - *scan_end = cur_buf; - return MO_CONTINUE_MATCHING; -} -#endif -#endif - #if defined(HAVE_AVX512VBMI) static really_inline char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt, diff --git a/src/nfa/shengcompile.cpp b/src/nfa/shengcompile.cpp index 0f93e139..055e1971 100644 --- a/src/nfa/shengcompile.cpp +++ b/src/nfa/shengcompile.cpp @@ -730,17 +730,10 @@ bytecode_ptr sheng32Compile(raw_dfa &raw, const CompileContext &cc, return nullptr; } -#ifdef HAVE_SVE - if (svcntb()<32) { - DEBUG_PRINTF("Sheng32 failed, SVE width is too small!\n"); - return nullptr; - } -#else if (!cc.target_info.has_avx512vbmi()) { DEBUG_PRINTF("Sheng32 failed, no HS_CPU_FEATURES_AVX512VBMI!\n"); return nullptr; } -#endif sheng_build_strat strat(raw, rm, only_accel_init); dfa_info info(strat); @@ -769,17 +762,10 @@ bytecode_ptr sheng64Compile(raw_dfa &raw, const CompileContext &cc, return nullptr; } -#ifdef HAVE_SVE - if (svcntb()<64) { - DEBUG_PRINTF("Sheng64 failed, SVE width is too small!\n"); - return nullptr; - } -#else if (!cc.target_info.has_avx512vbmi()) { DEBUG_PRINTF("Sheng64 failed, no HS_CPU_FEATURES_AVX512VBMI!\n"); return nullptr; } -#endif sheng_build_strat strat(raw, rm, only_accel_init); dfa_info info(strat);