From ed3ef5b997a411d8b60e574992e7189cd6f24be5 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Mon, 4 Apr 2016 15:54:09 +1000 Subject: [PATCH] raise the limit of strings in double shufti --- src/nfa/accelcompile.cpp | 19 ++++++++----------- src/nfa/mcclellancompile.cpp | 11 ++++------- src/nfa/mcclellancompile_accel.cpp | 1 - src/nfagraph/ng_limex_accel.cpp | 7 ++++--- src/nfagraph/ng_limex_accel.h | 10 ++++++---- 5 files changed, 22 insertions(+), 26 deletions(-) diff --git a/src/nfa/accelcompile.cpp b/src/nfa/accelcompile.cpp index a9281c13..75960dda 100644 --- a/src/nfa/accelcompile.cpp +++ b/src/nfa/accelcompile.cpp @@ -207,17 +207,14 @@ void buildAccelDouble(const AccelInfo &info, AccelAux *aux) { } } - if (outs1 + outs2 <= 8) { - if (outs1 < outs2 && outs1 <= 2) { // Heuristic from UE-438. - DEBUG_PRINTF("building double-shufti for %zu one-byte and %zu" - " two-byte literals\n", outs1, outs2); - aux->accel_type = ACCEL_DSHUFTI; - aux->dshufti.offset = offset; - shuftiBuildDoubleMasks(info.double_stop1, info.double_stop2, - &aux->dshufti.lo1, - &aux->dshufti.hi1, - &aux->dshufti.lo2, - &aux->dshufti.hi2); + if (outs1 < outs2 && outs1 <= 2) { // Heuristic from UE-438. + DEBUG_PRINTF("building double-shufti for %zu one-byte and %zu" + " two-byte literals\n", outs1, outs2); + aux->accel_type = ACCEL_DSHUFTI; + aux->dshufti.offset = offset; + if (shuftiBuildDoubleMasks(info.double_stop1, info.double_stop2, + &aux->dshufti.lo1, &aux->dshufti.hi1, + &aux->dshufti.lo2, &aux->dshufti.hi2)) { return; } } diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index 87eed250..b4418730 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -130,7 +130,6 @@ mstate_aux *getAux(NFA *n, dstate_id_t i) { static bool double_byte_ok(const escape_info &info) { return !info.outs2_broken - && info.outs2_single.count() + info.outs2.size() <= 8 && info.outs2_single.count() < info.outs2.size() && info.outs2_single.count() <= 2 && !info.outs2.empty(); } @@ -256,14 +255,12 @@ void mcclellan_build_strat::buildAccel(UNUSED dstate_id_t this_idx, } } - if (double_byte_ok(info)) { + if (double_byte_ok(info) + && shuftiBuildDoubleMasks(info.outs2_single, info.outs2, + &accel->dshufti.lo1, &accel->dshufti.hi1, + &accel->dshufti.lo2, &accel->dshufti.hi2)) { accel->accel_type = ACCEL_DSHUFTI; accel->dshufti.offset = verify_u8(info.outs2_offset); - shuftiBuildDoubleMasks(info.outs2_single, info.outs2, - &accel->dshufti.lo1, - &accel->dshufti.hi1, - &accel->dshufti.lo2, - &accel->dshufti.hi2); DEBUG_PRINTF("state %hu is double shufti\n", this_idx); return; } diff --git a/src/nfa/mcclellancompile_accel.cpp b/src/nfa/mcclellancompile_accel.cpp index 471d0d53..3e73d31d 100644 --- a/src/nfa/mcclellancompile_accel.cpp +++ b/src/nfa/mcclellancompile_accel.cpp @@ -334,7 +334,6 @@ map populateAccelerationInfo(const raw_dfa &rdfa, static bool double_byte_ok(const escape_info &info) { return !info.outs2_broken - && info.outs2_single.count() + info.outs2.size() <= 8 && info.outs2_single.count() < info.outs2.size() && info.outs2_single.count() <= 2 && !info.outs2.empty(); } diff --git a/src/nfagraph/ng_limex_accel.cpp b/src/nfagraph/ng_limex_accel.cpp index 63ec546b..8509b36f 100644 --- a/src/nfagraph/ng_limex_accel.cpp +++ b/src/nfagraph/ng_limex_accel.cpp @@ -284,7 +284,7 @@ AccelScheme make_double_accel(AccelScheme as, CharReach cr_1, return as; } - if (two_count > 8) { + if (two_count > DOUBLE_SHUFTI_LIMIT) { if (cr_2.count() < cr_1.count()) { as.double_cr |= cr_2; offset = offset_in + 1; @@ -513,7 +513,7 @@ AccelScheme findBestAccelScheme(vector > paths, best.offset = offset; /* merge best single and best double */ - if (!da.double_byte.empty() && da.double_byte.size() <= 8 + if (!da.double_byte.empty() && da.double_byte.size() <= DOUBLE_SHUFTI_LIMIT && da.double_cr.count() < best.cr.count()) { best.double_byte = da.double_byte; best.double_cr = da.double_cr; @@ -857,7 +857,8 @@ depth_done: // literals) if (depth > 1) { for (unsigned int i = 0; i < (depth - 1); i++) { - if (depthReach[i].count()*depthReach[i+1].count() <= 8) { + if (depthReach[i].count() * depthReach[i+1].count() + <= DOUBLE_SHUFTI_LIMIT) { DEBUG_PRINTF("two-byte shufti, depth %u\n", i); *as = AccelScheme(CharReach::dot(), i); return true; diff --git a/src/nfagraph/ng_limex_accel.h b/src/nfagraph/ng_limex_accel.h index 9c77dc67..16a6b770 100644 --- a/src/nfagraph/ng_limex_accel.h +++ b/src/nfagraph/ng_limex_accel.h @@ -63,6 +63,8 @@ void findAccelFriends(const NGHolder &g, NFAVertex v, u32 offset, ue2::flat_set *friends); +#define DOUBLE_SHUFTI_LIMIT 20 + struct AccelScheme { AccelScheme(const CharReach &cr_in, u32 offset_in) : cr(cr_in), offset(offset_in) { @@ -78,10 +80,10 @@ struct AccelScheme { size_t a_dcount = double_cr.count(); size_t b_dcount = b.double_cr.count(); - bool feasible_double_a - = !a.double_byte.empty() && a.double_byte.size() <= 8; - bool feasible_double_b - = !b.double_byte.empty() && b.double_byte.size() <= 8; + bool feasible_double_a = !a.double_byte.empty() + && a.double_byte.size() <= DOUBLE_SHUFTI_LIMIT; + bool feasible_double_b = !b.double_byte.empty() + && b.double_byte.size() <= DOUBLE_SHUFTI_LIMIT; if (feasible_double_a != feasible_double_b) { return feasible_double_a > feasible_double_b;