mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-07-13 05:54:43 +03:00
dfa: allow smwr to avoid lengthy daddy recalc
This commit is contained in:
parent
1538d90a9e
commit
beac58fcb4
@ -802,9 +802,9 @@ flat_set<dstate_id_t> find_daddy_candidates(const dfa_info &info,
|
|||||||
#define MAX_SHERMAN_SELF_LOOP 20
|
#define MAX_SHERMAN_SELF_LOOP 20
|
||||||
|
|
||||||
static
|
static
|
||||||
void find_better_daddy(dfa_info &info, dstate_id_t curr_id,
|
void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit,
|
||||||
bool using8bit, bool any_cyclic_near_anchored_state,
|
bool any_cyclic_near_anchored_state,
|
||||||
const Grey &grey) {
|
bool trust_daddy_states, const Grey &grey) {
|
||||||
if (!grey.allowShermanStates) {
|
if (!grey.allowShermanStates) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -839,7 +839,12 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id,
|
|||||||
dstate_id_t best_daddy = 0;
|
dstate_id_t best_daddy = 0;
|
||||||
dstate &currState = info.states[curr_id];
|
dstate &currState = info.states[curr_id];
|
||||||
|
|
||||||
const auto hinted = find_daddy_candidates(info, curr_id);
|
flat_set<dstate_id_t> hinted;
|
||||||
|
if (trust_daddy_states) {
|
||||||
|
hinted.insert(currState.daddy);
|
||||||
|
} else {
|
||||||
|
hinted = find_daddy_candidates(info, curr_id);
|
||||||
|
}
|
||||||
|
|
||||||
for (const dstate_id_t &donor : hinted) {
|
for (const dstate_id_t &donor : hinted) {
|
||||||
assert(donor < curr_id);
|
assert(donor < curr_id);
|
||||||
@ -947,6 +952,7 @@ bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) {
|
|||||||
|
|
||||||
bytecode_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat,
|
bytecode_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat,
|
||||||
const CompileContext &cc,
|
const CompileContext &cc,
|
||||||
|
bool trust_daddy_states,
|
||||||
set<dstate_id_t> *accel_states) {
|
set<dstate_id_t> *accel_states) {
|
||||||
u16 total_daddy = 0;
|
u16 total_daddy = 0;
|
||||||
dfa_info info(strat);
|
dfa_info info(strat);
|
||||||
@ -963,7 +969,7 @@ bytecode_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat,
|
|||||||
|
|
||||||
for (u32 i = 0; i < info.size(); i++) {
|
for (u32 i = 0; i < info.size(); i++) {
|
||||||
find_better_daddy(info, i, using8bit, any_cyclic_near_anchored_state,
|
find_better_daddy(info, i, using8bit, any_cyclic_near_anchored_state,
|
||||||
cc.grey);
|
trust_daddy_states, cc.grey);
|
||||||
total_daddy += info.extra[i].daddytaken;
|
total_daddy += info.extra[i].daddytaken;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -989,9 +995,10 @@ bytecode_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat,
|
|||||||
bytecode_ptr<NFA> mcclellanCompile(raw_dfa &raw, const CompileContext &cc,
|
bytecode_ptr<NFA> mcclellanCompile(raw_dfa &raw, const CompileContext &cc,
|
||||||
const ReportManager &rm,
|
const ReportManager &rm,
|
||||||
bool only_accel_init,
|
bool only_accel_init,
|
||||||
|
bool trust_daddy_states,
|
||||||
set<dstate_id_t> *accel_states) {
|
set<dstate_id_t> *accel_states) {
|
||||||
mcclellan_build_strat mbs(raw, rm, only_accel_init);
|
mcclellan_build_strat mbs(raw, rm, only_accel_init);
|
||||||
return mcclellanCompile_i(raw, mbs, cc, accel_states);
|
return mcclellanCompile_i(raw, mbs, cc, trust_daddy_states, accel_states);
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t mcclellan_build_strat::accelSize(void) const {
|
size_t mcclellan_build_strat::accelSize(void) const {
|
||||||
|
@ -71,12 +71,13 @@ private:
|
|||||||
bytecode_ptr<NFA>
|
bytecode_ptr<NFA>
|
||||||
mcclellanCompile(raw_dfa &raw, const CompileContext &cc,
|
mcclellanCompile(raw_dfa &raw, const CompileContext &cc,
|
||||||
const ReportManager &rm, bool only_accel_init,
|
const ReportManager &rm, bool only_accel_init,
|
||||||
|
bool trust_daddy_states = false,
|
||||||
std::set<dstate_id_t> *accel_states = nullptr);
|
std::set<dstate_id_t> *accel_states = nullptr);
|
||||||
|
|
||||||
/* used internally by mcclellan/haig/gough compile process */
|
/* used internally by mcclellan/haig/gough compile process */
|
||||||
bytecode_ptr<NFA>
|
bytecode_ptr<NFA>
|
||||||
mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat,
|
mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat,
|
||||||
const CompileContext &cc,
|
const CompileContext &cc, bool trust_daddy_states = false,
|
||||||
std::set<dstate_id_t> *accel_states = nullptr);
|
std::set<dstate_id_t> *accel_states = nullptr);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -692,18 +692,20 @@ bool is_slow(const raw_dfa &rdfa, const set<dstate_id_t> &accel,
|
|||||||
|
|
||||||
static
|
static
|
||||||
bytecode_ptr<NFA> getDfa(raw_dfa &rdfa, const CompileContext &cc,
|
bytecode_ptr<NFA> getDfa(raw_dfa &rdfa, const CompileContext &cc,
|
||||||
const ReportManager &rm, bool has_literals,
|
const ReportManager &rm, bool has_non_literals,
|
||||||
set<dstate_id_t> &accel_states) {
|
set<dstate_id_t> &accel_states) {
|
||||||
// If we determinised literals, then we only need to consider the init
|
// If we determinised only literals, then we only need to consider the init
|
||||||
// states for acceleration.
|
// states for acceleration.
|
||||||
bool only_accel_init = has_literals;
|
bool only_accel_init = !has_non_literals;
|
||||||
|
bool trust_daddy_states = !has_non_literals;
|
||||||
|
|
||||||
bytecode_ptr<NFA> dfa = nullptr;
|
bytecode_ptr<NFA> dfa = nullptr;
|
||||||
if (cc.grey.allowSmallWriteSheng) {
|
if (cc.grey.allowSmallWriteSheng) {
|
||||||
dfa = shengCompile(rdfa, cc, rm, only_accel_init, &accel_states);
|
dfa = shengCompile(rdfa, cc, rm, only_accel_init, &accel_states);
|
||||||
}
|
}
|
||||||
if (!dfa) {
|
if (!dfa) {
|
||||||
dfa = mcclellanCompile(rdfa, cc, rm, only_accel_init, &accel_states);
|
dfa = mcclellanCompile(rdfa, cc, rm, only_accel_init,
|
||||||
|
trust_daddy_states, &accel_states);
|
||||||
}
|
}
|
||||||
return dfa;
|
return dfa;
|
||||||
}
|
}
|
||||||
@ -711,14 +713,14 @@ bytecode_ptr<NFA> getDfa(raw_dfa &rdfa, const CompileContext &cc,
|
|||||||
static
|
static
|
||||||
bytecode_ptr<NFA> prepEngine(raw_dfa &rdfa, u32 roseQuality,
|
bytecode_ptr<NFA> prepEngine(raw_dfa &rdfa, u32 roseQuality,
|
||||||
const CompileContext &cc, const ReportManager &rm,
|
const CompileContext &cc, const ReportManager &rm,
|
||||||
bool has_literals, u32 *start_offset,
|
bool has_non_literals, u32 *start_offset,
|
||||||
u32 *small_region) {
|
u32 *small_region) {
|
||||||
*start_offset = remove_leading_dots(rdfa);
|
*start_offset = remove_leading_dots(rdfa);
|
||||||
|
|
||||||
// Unleash the McClellan!
|
// Unleash the McClellan!
|
||||||
set<dstate_id_t> accel_states;
|
set<dstate_id_t> accel_states;
|
||||||
|
|
||||||
auto nfa = getDfa(rdfa, cc, rm, has_literals, accel_states);
|
auto nfa = getDfa(rdfa, cc, rm, has_non_literals, accel_states);
|
||||||
if (!nfa) {
|
if (!nfa) {
|
||||||
DEBUG_PRINTF("DFA compile failed for smallwrite NFA\n");
|
DEBUG_PRINTF("DFA compile failed for smallwrite NFA\n");
|
||||||
return nullptr;
|
return nullptr;
|
||||||
@ -737,7 +739,7 @@ bytecode_ptr<NFA> prepEngine(raw_dfa &rdfa, u32 roseQuality,
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
nfa = getDfa(rdfa, cc, rm, has_literals, accel_states);
|
nfa = getDfa(rdfa, cc, rm, has_non_literals, accel_states);
|
||||||
if (!nfa) {
|
if (!nfa) {
|
||||||
DEBUG_PRINTF("DFA compile failed for smallwrite NFA\n");
|
DEBUG_PRINTF("DFA compile failed for smallwrite NFA\n");
|
||||||
assert(0); /* able to build orig dfa but not the trimmed? */
|
assert(0); /* able to build orig dfa but not the trimmed? */
|
||||||
@ -768,6 +770,7 @@ unique_ptr<SmallWriteBuild> makeSmallWriteBuilder(size_t num_patterns,
|
|||||||
|
|
||||||
bytecode_ptr<SmallWriteEngine> SmallWriteBuildImpl::build(u32 roseQuality) {
|
bytecode_ptr<SmallWriteEngine> SmallWriteBuildImpl::build(u32 roseQuality) {
|
||||||
const bool has_literals = !is_empty(lit_trie) || !is_empty(lit_trie_nocase);
|
const bool has_literals = !is_empty(lit_trie) || !is_empty(lit_trie_nocase);
|
||||||
|
const bool has_non_literals = rdfa != nullptr;
|
||||||
if (!rdfa && !has_literals) {
|
if (!rdfa && !has_literals) {
|
||||||
DEBUG_PRINTF("no smallwrite engine\n");
|
DEBUG_PRINTF("no smallwrite engine\n");
|
||||||
poisoned = true;
|
poisoned = true;
|
||||||
@ -788,7 +791,7 @@ bytecode_ptr<SmallWriteEngine> SmallWriteBuildImpl::build(u32 roseQuality) {
|
|||||||
|
|
||||||
u32 start_offset;
|
u32 start_offset;
|
||||||
u32 small_region;
|
u32 small_region;
|
||||||
auto nfa = prepEngine(*rdfa, roseQuality, cc, rm, has_literals,
|
auto nfa = prepEngine(*rdfa, roseQuality, cc, rm, has_non_literals,
|
||||||
&start_offset, &small_region);
|
&start_offset, &small_region);
|
||||||
if (!nfa) {
|
if (!nfa) {
|
||||||
DEBUG_PRINTF("some smallwrite outfix could not be prepped\n");
|
DEBUG_PRINTF("some smallwrite outfix could not be prepped\n");
|
||||||
|
Loading…
x
Reference in New Issue
Block a user