diff --git a/src/nfa/shengcompile.cpp b/src/nfa/shengcompile.cpp index 8b939973..f968bf59 100644 --- a/src/nfa/shengcompile.cpp +++ b/src/nfa/shengcompile.cpp @@ -643,19 +643,40 @@ bytecode_ptr shengCompile(raw_dfa &raw, const CompileContext &cc, DEBUG_PRINTF("This DFA %s die so effective number of states is %zu\n", info.can_die ? "can" : "cannot", info.size()); if (info.size() > 16) { -#if defined(HAVE_AVX512VBMI) - if (info.size() > 32) { - DEBUG_PRINTF("Too many states\n"); - return nullptr; - } - return shengCompile_int(raw, cc, accel_states, strat, info); -#else DEBUG_PRINTF("Too many states\n"); return nullptr; -#endif } return shengCompile_int(raw, cc, accel_states, strat, info); } +#if defined(HAVE_AVX512VBMI) +bytecode_ptr sheng32Compile(raw_dfa &raw, const CompileContext &cc, + const ReportManager &rm, bool only_accel_init, + set *accel_states) { + if (!cc.grey.allowSheng) { + DEBUG_PRINTF("Sheng is not allowed!\n"); + return nullptr; + } + + sheng_build_strat strat(raw, rm, only_accel_init); + dfa_info info(strat); + + DEBUG_PRINTF("Trying to compile a %zu state Sheng\n", raw.states.size()); + + DEBUG_PRINTF("Anchored start state id: %u, floating start state id: %u\n", + raw.start_anchored, raw.start_floating); + + DEBUG_PRINTF("This DFA %s die so effective number of states is %zu\n", + info.can_die ? "can" : "cannot", info.size()); + assert(info.size() > 16); + if (info.size() > 32) { + DEBUG_PRINTF("Too many states\n"); + return nullptr; + } + + return shengCompile_int(raw, cc, accel_states, strat, info); +} +#endif + } // namespace ue2 diff --git a/src/nfa/shengcompile.h b/src/nfa/shengcompile.h index d795b362..b36e27be 100644 --- a/src/nfa/shengcompile.h +++ b/src/nfa/shengcompile.h @@ -71,6 +71,12 @@ bytecode_ptr shengCompile(raw_dfa &raw, const CompileContext &cc, const ReportManager &rm, bool only_accel_init, std::set *accel_states = nullptr); +#if defined(HAVE_AVX512VBMI) +bytecode_ptr sheng32Compile(raw_dfa &raw, const CompileContext &cc, + const ReportManager &rm, bool only_accel_init, + std::set *accel_states = nullptr); +#endif + struct sheng_escape_info { CharReach outs; CharReach outs2_single; diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 5cbb5c84..8e1d7095 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -632,6 +632,11 @@ bytecode_ptr getDfa(raw_dfa &rdfa, bool is_transient, * bytecode and that they are usually run on small blocks */ dfa = mcshengCompile(rdfa, cc, rm); } +#if defined(HAVE_AVX512VBMI) + if (!dfa) { + dfa = sheng32Compile(rdfa, cc, rm, false); + } +#endif if (!dfa) { // Sheng wasn't successful, so unleash McClellan! dfa = mcclellanCompile(rdfa, cc, rm, false); diff --git a/src/smallwrite/smallwrite_build.cpp b/src/smallwrite/smallwrite_build.cpp index 345edfe9..909fdcb3 100644 --- a/src/smallwrite/smallwrite_build.cpp +++ b/src/smallwrite/smallwrite_build.cpp @@ -793,6 +793,11 @@ bytecode_ptr getDfa(raw_dfa &rdfa, const CompileContext &cc, bytecode_ptr dfa = nullptr; if (cc.grey.allowSmallWriteSheng) { dfa = shengCompile(rdfa, cc, rm, only_accel_init, &accel_states); +#if defined(HAVE_AVX512VBMI) + if (!dfa) { + dfa = sheng32Compile(rdfa, cc, rm, only_accel_init, &accel_states); + } +#endif } if (!dfa) { dfa = mcclellanCompile(rdfa, cc, rm, only_accel_init,