DFA: use sherman economically

This commit is contained in:
Hong, Yang A 2020-06-18 09:48:52 +00:00 committed by Konstantinos Margaritis
parent 7d21fc157c
commit d71515be04
2 changed files with 30 additions and 23 deletions

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2018, Intel Corporation * Copyright (c) 2015-2020, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -1477,6 +1477,7 @@ bytecode_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat,
bytecode_ptr<NFA> nfa; bytecode_ptr<NFA> nfa;
if (!using8bit) { if (!using8bit) {
// Wide state optimization
if (cc.grey.allowWideStates && strat.getType() == McClellan if (cc.grey.allowWideStates && strat.getType() == McClellan
&& !is_triggered(raw.kind)) { && !is_triggered(raw.kind)) {
find_wide_state(info); find_wide_state(info);
@ -1486,19 +1487,22 @@ bytecode_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat,
bool any_cyclic_near_anchored_state bool any_cyclic_near_anchored_state
= is_cyclic_near(raw, raw.start_anchored); = is_cyclic_near(raw, raw.start_anchored);
for (u32 i = 0; i < info.size(); i++) { // Sherman optimization
if (info.is_widestate(i)) { if (info.impl_alpha_size > 16) {
continue; for (u32 i = 0; i < info.size(); i++) {
if (info.is_widestate(i)) {
continue;
}
find_better_daddy(info, i, using8bit,
any_cyclic_near_anchored_state,
trust_daddy_states, cc.grey);
total_daddy += info.extra[i].daddytaken;
} }
find_better_daddy(info, i, using8bit,
any_cyclic_near_anchored_state,
trust_daddy_states, cc.grey);
total_daddy += info.extra[i].daddytaken;
}
DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy, DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy,
info.size() * info.impl_alpha_size, info.size(), info.size() * info.impl_alpha_size, info.size(),
info.impl_alpha_size); info.impl_alpha_size);
}
nfa = mcclellanCompile16(info, cc, accel_states); nfa = mcclellanCompile16(info, cc, accel_states);
} else { } else {

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2016-2017, Intel Corporation * Copyright (c) 2016-2020, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -842,17 +842,20 @@ bytecode_ptr<NFA> mcshengCompile16(dfa_info &info, dstate_id_t sheng_end,
assert(info.getAlphaShift() <= 8); assert(info.getAlphaShift() <= 8);
u16 total_daddy = 0; // Sherman optimization
for (u32 i = 0; i < info.size(); i++) { if (info.impl_alpha_size > 16) {
find_better_daddy(info, i, u16 total_daddy = 0;
is_cyclic_near(info.raw, info.raw.start_anchored), for (u32 i = 0; i < info.size(); i++) {
grey); find_better_daddy(info, i,
total_daddy += info.extra[i].daddytaken; is_cyclic_near(info.raw, info.raw.start_anchored),
} grey);
total_daddy += info.extra[i].daddytaken;
}
DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy, DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy,
info.size() * info.impl_alpha_size, info.size(), info.size() * info.impl_alpha_size, info.size(),
info.impl_alpha_size); info.impl_alpha_size);
}
u16 sherman_limit; u16 sherman_limit;
if (!allocateImplId16(info, sheng_end, &sherman_limit)) { if (!allocateImplId16(info, sheng_end, &sherman_limit)) {