From f7d5546fe5f24e12b3b0978f203486e951eb53b1 Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Fri, 30 May 2025 21:08:55 +0300 Subject: [PATCH] Bugfix/fix avx512vbmi regressions (#335) Multiple AVX512VBMI-related fixes: src/nfa/mcsheng_compile.cpp: No need for an assert here, impl_id can be set to 0 src/nfa/nfa_api_queue.h: Make sure this compiles on both C++ and C src/nfagraph/ng_fuzzy.cpp: Fix compilation error when DEBUG_OUTPUT=on src/runtime.c: Fix crash when data == NULL unit/internal/sheng.cpp: Unit test has to enable AVX512VBMI manually as autodetection does not get trigger, this causes test to fail src/fdr/teddy_fat.cpp: AVX512 loads need to be 64-bit aligned, caused a crash on clang-18 --- src/fdr/teddy_fat.cpp | 2 +- src/nfa/mcsheng_compile.cpp | 4 +++- src/nfa/nfa_api_queue.h | 4 ++++ src/nfagraph/ng_fuzzy.cpp | 4 ++-- src/rose/rose_build_long_lit.cpp | 3 +-- src/runtime.c | 1 + unit/internal/sheng.cpp | 8 ++++++-- 7 files changed, 18 insertions(+), 8 deletions(-) diff --git a/src/fdr/teddy_fat.cpp b/src/fdr/teddy_fat.cpp index f9ff4119..f351008c 100644 --- a/src/fdr/teddy_fat.cpp +++ b/src/fdr/teddy_fat.cpp @@ -100,7 +100,7 @@ const m512 *getDupMaskBase(const struct Teddy *teddy, u8 numMask) { } -const u8 ALIGN_AVX_DIRECTIVE p_mask_interleave[64] = { +const u8 ALIGN_CL_DIRECTIVE p_mask_interleave[64] = { 0, 32, 1, 33, 2, 34, 3, 35, 4, 36, 5, 37, 6, 38, 7, 39, 8, 40, 9, 41, 10, 42, 11, 43, 12, 44, 13, 45, 14, 46, 15, 47, 16, 48, 17, 49, 18, 50, 19, 51, 20, 52, 21, 53, 22, 54, 23, 55, diff --git a/src/nfa/mcsheng_compile.cpp b/src/nfa/mcsheng_compile.cpp index 0ca31c99..a8400278 100644 --- a/src/nfa/mcsheng_compile.cpp +++ b/src/nfa/mcsheng_compile.cpp @@ -610,7 +610,9 @@ dstate_id_t find_sheng_states(dfa_info &info, for (auto v : sheng_states) { dstate_id_t s = g[v].index; if (contains(accel_escape_info, s)) { - assert(!info.states[s].impl_id); + if (info.states[s].impl_id == 0) { + DEBUG_PRINTF("impl_id == 0!\n"); + } info.states[s].impl_id = sheng_end++; info.extra[s].sheng_id = info.states[s].impl_id - 1; } diff --git a/src/nfa/nfa_api_queue.h b/src/nfa/nfa_api_queue.h index 3f8bca2c..c1274bd1 100644 --- a/src/nfa/nfa_api_queue.h +++ b/src/nfa/nfa_api_queue.h @@ -251,7 +251,11 @@ void q_skip_forward_to(struct mq *q, s64a min_loc) { // Dump the contents of the given queue. static never_inline UNUSED void debugQueue(const struct mq *q) { +#ifdef __cplusplus if (q == nullptr) { +#else + if (q == NULL) { +#endif DEBUG_PRINTF("q=NULL!\n"); return; } diff --git a/src/nfagraph/ng_fuzzy.cpp b/src/nfagraph/ng_fuzzy.cpp index 61a3ee12..95bfbdcc 100644 --- a/src/nfagraph/ng_fuzzy.cpp +++ b/src/nfagraph/ng_fuzzy.cpp @@ -76,7 +76,7 @@ vector> gatherSuccessorsByDepth(const NGHolder &g, if (v == succr) { continue; } - DEBUG_PRINTF("Node %zu depth %u\n", g[succ].index, d + 1); + DEBUG_PRINTF("Node %zu depth %u\n", g[succr].index, d + 1); next.insert(succr); } } @@ -118,7 +118,7 @@ vector> gatherPredecessorsByDepth(const NGHolder &g, if (v == predc) { continue; } - DEBUG_PRINTF("Node %zu depth %u\n", g[pred].index, d + 1); + DEBUG_PRINTF("Node %zu depth %u\n", g[predc].index, d + 1); next.insert(predc); } } diff --git a/src/rose/rose_build_long_lit.cpp b/src/rose/rose_build_long_lit.cpp index 2329a7c2..a88063f9 100644 --- a/src/rose/rose_build_long_lit.cpp +++ b/src/rose/rose_build_long_lit.cpp @@ -207,8 +207,7 @@ vector buildHashTable( u32 lit_id = lit_offset.first; u32 offset = lit_offset.second; - DEBUG_PRINTF("hash 0x%08x lit_id %u offset %u bucket %u\n", hash, - lit_id, offset, bucket); + DEBUG_PRINTF("hash 0x%08x lit_id %u offset %u bucket %u\n", m.first, lit_id, offset, bucket); auto &entry = tab[bucket]; entry.str_offset = verify_u32(litToOffsetVal.at(lit_id)); diff --git a/src/runtime.c b/src/runtime.c index d91b5672..ad16bf57 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -1089,6 +1089,7 @@ hs_error_t HS_CDECL hs_stream_size(const hs_database_t *db, // perusal. static UNUSED void dumpData(const char *data, size_t len) { + if (!data) return; DEBUG_PRINTF("BUFFER:"); for (size_t i = 0; i < len; i++) { u8 c = data[i]; diff --git a/unit/internal/sheng.cpp b/unit/internal/sheng.cpp index 342757d4..d475308f 100644 --- a/unit/internal/sheng.cpp +++ b/unit/internal/sheng.cpp @@ -198,7 +198,11 @@ typedef void (*init_raw_dfa_ptr)(struct ue2::raw_dfa*, const ReportID); static inline void init_nfa(struct NFA **out_nfa, sheng_compile_ptr compile_function, init_raw_dfa_ptr init_dfa_function) { ue2::Grey *g = new ue2::Grey(); +#if defined(HAVE_AVX512VBMI) + hs_platform_info plat_info = {0, HS_CPU_FEATURES_AVX512VBMI, 0, 0}; +#else hs_platform_info plat_info = {0, 0, 0, 0}; +#endif ue2::CompileContext *cc = new ue2::CompileContext(false, false, ue2::target_t(plat_info), *g); ue2::ReportManager *rm = new ue2::ReportManager(*g); ue2::Report *report = new ue2::Report(ue2::EXTERNAL_CALLBACK, 0); @@ -522,7 +526,7 @@ TEST(Sheng32, std_compile_header) { } #endif ue2::Grey *g = new ue2::Grey(); - hs_platform_info plat_info = {0, 0, 0, 0}; + hs_platform_info plat_info = {0, HS_CPU_FEATURES_AVX512VBMI, 0, 0}; ue2::CompileContext *cc = new ue2::CompileContext(false, false, ue2::target_t(plat_info), *g); ue2::ReportManager *rm = new ue2::ReportManager(*g); ue2::Report *report = new ue2::Report(ue2::EXTERNAL_CALLBACK, 0); @@ -713,4 +717,4 @@ TEST(Sheng32, history_run_Q2) { } #endif /* defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) */ -} /* namespace */ \ No newline at end of file +} /* namespace */