diff --git a/CMakeLists.txt b/CMakeLists.txt index 36267fc6..e1bd2794 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -596,6 +596,8 @@ SET (hs_SRCS src/nfa/mcclellan_internal.h src/nfa/mcclellancompile.cpp src/nfa/mcclellancompile.h + src/nfa/mcclellancompile_accel.cpp + src/nfa/mcclellancompile_accel.h src/nfa/mcclellancompile_util.cpp src/nfa/mcclellancompile_util.h src/nfa/limex_compile.cpp diff --git a/src/nfa/goughcompile.cpp b/src/nfa/goughcompile.cpp index d735c80a..2ad3c6dd 100644 --- a/src/nfa/goughcompile.cpp +++ b/src/nfa/goughcompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -85,10 +85,11 @@ public: vector &reports_eod /* out */, u8 *isSingleReport /* out */, ReportID *arbReport /* out */) const override; - void find_escape_strings(dstate_id_t this_idx, - escape_info *out) const override; + escape_info find_escape_strings(dstate_id_t this_idx) const override; size_t accelSize(void) const override { return sizeof(gough_accel); } - void buildAccel(dstate_id_t this_idx, void *accel_out) override; + void buildAccel(dstate_id_t this_idx, const escape_info &info, + void *accel_out) override; + u32 max_allowed_offset_accel() const override { return 0; } raw_som_dfa &rdfa; const GoughGraph ≫ @@ -1145,32 +1146,43 @@ aligned_unique_ptr goughCompile(raw_som_dfa &raw, u8 somPrecision, return gough_dfa; } -void gough_build_strat::find_escape_strings(dstate_id_t this_idx, - escape_info *out) const { +escape_info gough_build_strat::find_escape_strings(dstate_id_t this_idx) const { + escape_info rv; if (!contains(accel_gough_info, this_idx)) { - out->outs = CharReach::dot(); - out->outs2_broken = true; - return; + rv.outs = CharReach::dot(); + rv.outs2_broken = true; + return rv; } - mcclellan_build_strat::find_escape_strings(this_idx, out); + rv = mcclellan_build_strat::find_escape_strings(this_idx); + + assert(!rv.offset); /* should have been limited by strat */ + if (rv.offset) { + rv.outs = CharReach::dot(); + rv.outs2_broken = true; + return rv; + } if (!accel_gough_info.at(this_idx).two_byte) { - out->outs2_broken = true; + rv.outs2_broken = true; } + + return rv; } -void gough_build_strat::buildAccel(dstate_id_t this_idx, void *accel_out) { +void gough_build_strat::buildAccel(dstate_id_t this_idx, const escape_info &info, + void *accel_out) { assert(mcclellan_build_strat::accelSize() == sizeof(AccelAux)); gough_accel *accel = (gough_accel *)accel_out; /* build a plain accelaux so we can work out where we can get to */ - mcclellan_build_strat::buildAccel(this_idx, &accel->accel); + mcclellan_build_strat::buildAccel(this_idx, info, &accel->accel); DEBUG_PRINTF("state %hu is accel with type %hhu\n", this_idx, accel->accel.accel_type); if (accel->accel.accel_type == ACCEL_NONE) { return; } + assert(!accel->accel.generic.offset); assert(contains(accel_gough_info, this_idx)); accel->margin_dist = verify_u8(accel_gough_info.at(this_idx).margin); built_accel[accel] = this_idx; diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index f75d08b5..9b21b8c4 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -31,6 +31,8 @@ #include "accel.h" #include "grey.h" #include "mcclellan_internal.h" +#include "mcclellancompile_accel.h" +#include "mcclellancompile_util.h" #include "nfa_internal.h" #include "shufticompile.h" #include "trufflecompile.h" @@ -56,25 +58,18 @@ #include #include +#include + using namespace std; +using boost::adaptors::map_keys; namespace ue2 { -/* compile time accel defs */ -#define ACCEL_MAX_STOP_CHAR 160 /* larger than nfa, as we don't have a budget - and the nfa cheats on stop characters for - sets of states */ -#define ACCEL_MAX_FLOATING_STOP_CHAR 192 /* accelerating sds is important */ - - namespace /* anon */ { struct dstate_extra { - u16 daddytaken; - bool shermanState; - bool accelerable; - dstate_extra(void) : daddytaken(0), shermanState(false), - accelerable(false) {} + u16 daddytaken = 0; + bool shermanState = false; }; struct dfa_info { @@ -105,10 +100,6 @@ struct dfa_info { return extra[raw_id].shermanState; } - bool is_accel(dstate_id_t raw_id) const { - return extra[raw_id].accelerable; - } - size_t size(void) const { return states.size(); } }; @@ -135,6 +126,14 @@ mstate_aux *getAux(NFA *n, dstate_id_t i) { return aux; } +static +bool double_byte_ok(const escape_info &info) { + return !info.outs2_broken + && info.outs2_single.count() + info.outs2.size() <= 8 + && info.outs2_single.count() < info.outs2.size() + && info.outs2_single.count() <= 2 && !info.outs2.empty(); +} + static void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) { assert((size_t)succ_table % 2 == 0); @@ -186,75 +185,43 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) { } } -void mcclellan_build_strat::find_escape_strings(dstate_id_t this_idx, - escape_info *out) const { - const dstate &raw = rdfa.states[this_idx]; - const auto &alpha_remap = rdfa.alpha_remap; +u32 mcclellan_build_strat::max_allowed_offset_accel() const { + return ACCEL_DFA_MAX_OFFSET_DEPTH; +} - flat_set> outs2_local; - for (unsigned i = 0; i < N_CHARS; i++) { - outs2_local.clear(); - - if (raw.next[alpha_remap[i]] != this_idx) { - out->outs.set(i); - - DEBUG_PRINTF("next is %hu\n", raw.next[alpha_remap[i]]); - const dstate &raw_next = rdfa.states[raw.next[alpha_remap[i]]]; - - if (!raw_next.reports.empty() && generates_callbacks(rdfa.kind)) { - DEBUG_PRINTF("leads to report\n"); - out->outs2_broken = true; /* cannot accelerate over reports */ - } - - for (unsigned j = 0; !out->outs2_broken && j < N_CHARS; j++) { - if (raw_next.next[alpha_remap[j]] == raw.next[alpha_remap[j]]) { - continue; - } - - DEBUG_PRINTF("adding %02x %02x -> %hu to 2 \n", i, j, - raw_next.next[alpha_remap[j]]); - outs2_local.emplace((u8)i, (u8)j); - } - - if (outs2_local.size() > 8) { - DEBUG_PRINTF("adding %02x to outs2_single\n", i); - out->outs2_single.set(i); - } else { - insert(&out->outs2, outs2_local); - } - if (out->outs2.size() > 8) { - DEBUG_PRINTF("outs2 too big\n"); - out->outs2_broken = true; - } - } - } +escape_info mcclellan_build_strat::find_escape_strings(dstate_id_t this_idx) + const { + return find_mcclellan_escape_info(rdfa, this_idx, + max_allowed_offset_accel()); } /** builds acceleration schemes for states */ -void mcclellan_build_strat::buildAccel(dstate_id_t this_idx, void *accel_out) { +void mcclellan_build_strat::buildAccel(UNUSED dstate_id_t this_idx, + const escape_info &info, + void *accel_out) { AccelAux *accel = (AccelAux *)accel_out; - escape_info out; - find_escape_strings(this_idx, &out); + DEBUG_PRINTF("accelerations scheme has offset %u\n", info.offset); + accel->generic.offset = verify_u8(info.offset); - if (!out.outs2_broken && out.outs2_single.none() - && out.outs2.size() == 1) { + if (double_byte_ok(info) && info.outs2_single.none() + && info.outs2.size() == 1) { accel->accel_type = ACCEL_DVERM; - accel->dverm.c1 = out.outs2.begin()->first; - accel->dverm.c2 = out.outs2.begin()->second; + accel->dverm.c1 = info.outs2.begin()->first; + accel->dverm.c2 = info.outs2.begin()->second; DEBUG_PRINTF("state %hu is double vermicelli\n", this_idx); return; } - if (!out.outs2_broken && out.outs2_single.none() - && (out.outs2.size() == 2 || out.outs2.size() == 4)) { + if (double_byte_ok(info) && info.outs2_single.none() + && (info.outs2.size() == 2 || info.outs2.size() == 4)) { bool ok = true; - assert(!out.outs2.empty()); - u8 firstC = out.outs2.begin()->first & CASE_CLEAR; - u8 secondC = out.outs2.begin()->second & CASE_CLEAR; + assert(!info.outs2.empty()); + u8 firstC = info.outs2.begin()->first & CASE_CLEAR; + u8 secondC = info.outs2.begin()->second & CASE_CLEAR; - for (const pair &p : out.outs2) { + for (const pair &p : info.outs2) { if ((p.first & CASE_CLEAR) != firstC || (p.second & CASE_CLEAR) != secondC) { ok = false; @@ -271,12 +238,9 @@ void mcclellan_build_strat::buildAccel(dstate_id_t this_idx, void *accel_out) { } } - if (!out.outs2_broken && - (out.outs2_single.count() + out.outs2.size()) <= 8 && - out.outs2_single.count() < out.outs2.size() && - out.outs2_single.count() <= 2 && !out.outs2.empty()) { + if (double_byte_ok(info)) { accel->accel_type = ACCEL_DSHUFTI; - shuftiBuildDoubleMasks(out.outs2_single, out.outs2, + shuftiBuildDoubleMasks(info.outs2_single, info.outs2, &accel->dshufti.lo1, &accel->dshufti.hi1, &accel->dshufti.lo2, @@ -285,166 +249,46 @@ void mcclellan_build_strat::buildAccel(dstate_id_t this_idx, void *accel_out) { return; } - if (out.outs.none()) { + if (info.outs.none()) { accel->accel_type = ACCEL_RED_TAPE; DEBUG_PRINTF("state %hu is a dead end full of bureaucratic red tape" " from which there is no escape\n", this_idx); return; } - if (out.outs.count() == 1) { + if (info.outs.count() == 1) { accel->accel_type = ACCEL_VERM; - accel->verm.c = out.outs.find_first(); + accel->verm.c = info.outs.find_first(); DEBUG_PRINTF("state %hu is vermicelli\n", this_idx); return; } - if (out.outs.count() == 2 && out.outs.isCaselessChar()) { + if (info.outs.count() == 2 && info.outs.isCaselessChar()) { accel->accel_type = ACCEL_VERM_NOCASE; - accel->verm.c = out.outs.find_first() & CASE_CLEAR; + accel->verm.c = info.outs.find_first() & CASE_CLEAR; DEBUG_PRINTF("state %hu is caseless vermicelli\n", this_idx); return; } - if (out.outs.count() > ACCEL_MAX_FLOATING_STOP_CHAR) { + if (info.outs.count() > ACCEL_DFA_MAX_FLOATING_STOP_CHAR) { accel->accel_type = ACCEL_NONE; DEBUG_PRINTF("state %hu is too broad\n", this_idx); return; } accel->accel_type = ACCEL_SHUFTI; - if (-1 != shuftiBuildMasks(out.outs, &accel->shufti.lo, + if (-1 != shuftiBuildMasks(info.outs, &accel->shufti.lo, &accel->shufti.hi)) { DEBUG_PRINTF("state %hu is shufti\n", this_idx); return; } - assert(!out.outs.none()); + assert(!info.outs.none()); accel->accel_type = ACCEL_TRUFFLE; - truffleBuildMasks(out.outs, &accel->truffle.mask1, &accel->truffle.mask2); + truffleBuildMasks(info.outs, &accel->truffle.mask1, &accel->truffle.mask2); DEBUG_PRINTF("state %hu is truffle\n", this_idx); } -static -bool is_accel(const raw_dfa &raw, dstate_id_t sds_or_proxy, - dstate_id_t this_idx) { - if (!this_idx /* dead state is not accelerable */) { - return false; - } - - /* Note on report acceleration states: While we can't accelerate while we - * are spamming out callbacks, the QR code paths don't raise reports - * during scanning so they can accelerate report states. */ - - if (generates_callbacks(raw.kind) - && !raw.states[this_idx].reports.empty()) { - return false; - } - - size_t single_limit = this_idx == sds_or_proxy ? - ACCEL_MAX_FLOATING_STOP_CHAR : ACCEL_MAX_STOP_CHAR; - DEBUG_PRINTF("inspecting %hu/%hu: %zu\n", this_idx, sds_or_proxy, - single_limit); - - CharReach out; - for (u32 i = 0; i < N_CHARS; i++) { - if (raw.states[this_idx].next[raw.alpha_remap[i]] != this_idx) { - out.set(i); - } - } - - if (out.count() <= single_limit) { - DEBUG_PRINTF("state %hu should be accelerable %zu\n", this_idx, - out.count()); - return true; - } - - DEBUG_PRINTF("state %hu is not accelerable has %zu\n", this_idx, - out.count()); - - return false; -} - -static -bool has_self_loop(dstate_id_t s, const raw_dfa &raw) { - u16 top_remap = raw.alpha_remap[TOP]; - for (u32 i = 0; i < raw.states[s].next.size(); i++) { - if (i != top_remap && raw.states[s].next[i] == s) { - return true; - } - } - return false; -} - -static -dstate_id_t get_sds_or_proxy(const raw_dfa &raw) { - if (raw.start_floating != DEAD_STATE) { - DEBUG_PRINTF("has floating start\n"); - return raw.start_floating; - } - - DEBUG_PRINTF("looking for SDS proxy\n"); - - dstate_id_t s = raw.start_anchored; - - if (has_self_loop(s, raw)) { - return s; - } - - u16 top_remap = raw.alpha_remap[TOP]; - - ue2::unordered_set seen; - while (true) { - seen.insert(s); - DEBUG_PRINTF("basis %hu\n", s); - - /* check if we are connected to a state with a self loop */ - for (u32 i = 0; i < raw.states[s].next.size(); i++) { - dstate_id_t t = raw.states[s].next[i]; - if (i != top_remap && t != DEAD_STATE && has_self_loop(t, raw)) { - return t; - } - } - - /* find a neighbour to use as a basis for looking for the sds proxy */ - dstate_id_t t = DEAD_STATE; - for (u32 i = 0; i < raw.states[s].next.size(); i++) { - dstate_id_t tt = raw.states[s].next[i]; - if (i != top_remap && tt != DEAD_STATE && !contains(seen, tt)) { - t = tt; - break; - } - } - - if (t == DEAD_STATE) { - /* we were unable to find a state to use as a SDS proxy */ - return DEAD_STATE; - } - - s = t; - seen.insert(t); - } -} - -static -void populateAccelerationInfo(dfa_info &info, u32 *ac, const Grey &grey) { - *ac = 0; /* number of accelerable states */ - - if (!grey.accelerateDFA) { - return; - } - - dstate_id_t sds_proxy = get_sds_or_proxy(info.raw); - DEBUG_PRINTF("sds %hu\n", sds_proxy); - - for (size_t i = 0; i < info.size(); i++) { - if (is_accel(info.raw, sds_proxy, i)) { - ++*ac; - info.extra[i].accelerable = true; - } - } -} - static void populateBasicInfo(size_t state_size, const dfa_info &info, u32 total_size, u32 aux_offset, u32 accel_offset, @@ -625,6 +469,14 @@ void raw_report_info_impl::fillReportLists(NFA *n, size_t base_offset, } } +static +void fillAccelOut(const map &accel_escape_info, + set *accel_states) { + for (dstate_id_t i : accel_escape_info | map_keys) { + accel_states->insert(i); + } +} + static size_t calcShermanRegionSize(const dfa_info &info) { size_t rv = 0; @@ -692,14 +544,14 @@ int allocateFSN16(dfa_info &info, dstate_id_t *sherman_base) { static aligned_unique_ptr mcclellanCompile16(dfa_info &info, - const CompileContext &cc) { + const CompileContext &cc, + set *accel_states) { DEBUG_PRINTF("building mcclellan 16\n"); vector reports; /* index in ri for the appropriate report list */ vector reports_eod; /* as above */ ReportID arb; u8 single; - u32 accelCount; u8 alphaShift = info.getAlphaShift(); assert(alphaShift <= 8); @@ -713,7 +565,8 @@ aligned_unique_ptr mcclellanCompile16(dfa_info &info, unique_ptr ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); - populateAccelerationInfo(info, &accelCount, cc.grey); + map accel_escape_info + = populateAccelerationInfo(info.raw, info.strat, cc.grey); size_t tran_size = (1 << info.getAlphaShift()) * sizeof(u16) * count_real_states; @@ -721,7 +574,7 @@ aligned_unique_ptr mcclellanCompile16(dfa_info &info, size_t aux_size = sizeof(mstate_aux) * info.size(); size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcclellan) + tran_size); - size_t accel_size = info.strat.accelSize() * accelCount; + size_t accel_size = info.strat.accelSize() * accel_escape_info.size(); size_t accel_offset = ROUNDUP_N(aux_offset + aux_size + ri->getReportListSize(), 32); size_t sherman_offset = ROUNDUP_16(accel_offset + accel_size); @@ -736,7 +589,7 @@ aligned_unique_ptr mcclellanCompile16(dfa_info &info, char *nfa_base = (char *)nfa.get(); populateBasicInfo(sizeof(u16), info, total_size, aux_offset, accel_offset, - accelCount, arb, single, nfa.get()); + accel_escape_info.size(), arb, single, nfa.get()); vector reportOffsets; @@ -769,12 +622,12 @@ aligned_unique_ptr mcclellanCompile16(dfa_info &info, fillInAux(&aux[fs], i, info, reports, reports_eod, reportOffsets); - if (info.is_accel(i)) { + if (contains(accel_escape_info, i)) { this_aux->accel_offset = accel_offset; accel_offset += info.strat.accelSize(); assert(accel_offset + sizeof(NFA) <= sherman_offset); assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); - info.strat.buildAccel(i, + info.strat.buildAccel(i, accel_escape_info.at(i), (void *)((char *)m + this_aux->accel_offset)); } } @@ -798,12 +651,12 @@ aligned_unique_ptr mcclellanCompile16(dfa_info &info, fillInAux(this_aux, i, info, reports, reports_eod, reportOffsets); - if (info.is_accel(i)) { + if (contains(accel_escape_info, i)) { this_aux->accel_offset = accel_offset; accel_offset += info.strat.accelSize(); assert(accel_offset + sizeof(NFA) <= sherman_offset); assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); - info.strat.buildAccel(i, + info.strat.buildAccel(i, accel_escape_info.at(i), (void *)((char *)m + this_aux->accel_offset)); } @@ -836,6 +689,10 @@ aligned_unique_ptr mcclellanCompile16(dfa_info &info, markEdges(nfa.get(), succ_table, info); + if (accel_states && nfa) { + fillAccelOut(accel_escape_info, accel_states); + } + return nfa; } @@ -874,7 +731,9 @@ void fillInBasicState8(const dfa_info &info, mstate_aux *aux, u8 *succ_table, } static -void allocateFSN8(dfa_info &info, u16 *accel_limit, u16 *accept_limit) { +void allocateFSN8(dfa_info &info, + const map &accel_escape_info, + u16 *accel_limit, u16 *accept_limit) { info.states[0].impl_id = 0; /* dead is always 0 */ vector norm; @@ -886,7 +745,7 @@ void allocateFSN8(dfa_info &info, u16 *accel_limit, u16 *accept_limit) { for (u32 i = 1; i < info.size(); i++) { if (!info.states[i].reports.empty()) { accept.push_back(i); - } else if (info.is_accel(i)) { + } else if (contains(accel_escape_info, i)) { accel.push_back(i); } else { norm.push_back(i); @@ -915,23 +774,24 @@ void allocateFSN8(dfa_info &info, u16 *accel_limit, u16 *accept_limit) { static aligned_unique_ptr mcclellanCompile8(dfa_info &info, - const CompileContext &cc) { + const CompileContext &cc, + set *accel_states) { DEBUG_PRINTF("building mcclellan 8\n"); vector reports; vector reports_eod; ReportID arb; u8 single; - u32 accelCount; unique_ptr ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); - populateAccelerationInfo(info, &accelCount, cc.grey); + map accel_escape_info + = populateAccelerationInfo(info.raw, info.strat, cc.grey); size_t tran_size = sizeof(u8) * (1 << info.getAlphaShift()) * info.size(); size_t aux_size = sizeof(mstate_aux) * info.size(); size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcclellan) + tran_size); - size_t accel_size = info.strat.accelSize() * accelCount; + size_t accel_size = info.strat.accelSize() * accel_escape_info.size(); size_t accel_offset = ROUNDUP_N(aux_offset + aux_size + ri->getReportListSize(), 32); size_t total_size = accel_offset + accel_size; @@ -951,9 +811,9 @@ aligned_unique_ptr mcclellanCompile8(dfa_info &info, mcclellan *m = (mcclellan *)getMutableImplNfa(nfa.get()); - allocateFSN8(info, &m->accel_limit_8, &m->accept_limit_8); + allocateFSN8(info, accel_escape_info, &m->accel_limit_8, &m->accept_limit_8); populateBasicInfo(sizeof(u8), info, total_size, aux_offset, accel_offset, - accelCount, arb, single, nfa.get()); + accel_escape_info.size(), arb, single, nfa.get()); vector reportOffsets; @@ -964,13 +824,14 @@ aligned_unique_ptr mcclellanCompile8(dfa_info &info, mstate_aux *aux = (mstate_aux *)(nfa_base + aux_offset); for (size_t i = 0; i < info.size(); i++) { - if (info.is_accel(i)) { + if (contains(accel_escape_info, i)) { u32 j = info.implId(i); aux[j].accel_offset = accel_offset; accel_offset += info.strat.accelSize(); - info.strat.buildAccel(i, (void *)((char *)m + aux[j].accel_offset)); + info.strat.buildAccel(i, accel_escape_info.at(i), + (void *)((char *)m + aux[j].accel_offset)); } fillInBasicState8(info, aux, succ_table, reportOffsets, reports, @@ -981,6 +842,10 @@ aligned_unique_ptr mcclellanCompile8(dfa_info &info, DEBUG_PRINTF("rl size %zu\n", ri->size()); + if (accel_states && nfa) { + fillAccelOut(accel_escape_info, accel_states); + } + return nfa; } @@ -1163,15 +1028,6 @@ bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) { return false; } -static -void fillAccelOut(const dfa_info &info, set *accel_states) { - for (size_t i = 0; i < info.size(); i++) { - if (info.is_accel(i)) { - accel_states->insert(i); - } - } -} - aligned_unique_ptr mcclellanCompile_i(raw_dfa &raw, dfa_build_strat &strat, const CompileContext &cc, set *accel_states) { @@ -1200,19 +1056,15 @@ aligned_unique_ptr mcclellanCompile_i(raw_dfa &raw, dfa_build_strat &strat, aligned_unique_ptr nfa; if (!using8bit) { - nfa = mcclellanCompile16(info, cc); + nfa = mcclellanCompile16(info, cc, accel_states); } else { - nfa = mcclellanCompile8(info, cc); + nfa = mcclellanCompile8(info, cc, accel_states); } if (has_eod_reports) { nfa->flags |= NFA_ACCEPTS_EOD; } - if (accel_states && nfa) { - fillAccelOut(info, accel_states); - } - DEBUG_PRINTF("compile done\n"); return nfa; } diff --git a/src/nfa/mcclellancompile.h b/src/nfa/mcclellancompile.h index 78126bc8..d4b4325d 100644 --- a/src/nfa/mcclellancompile.h +++ b/src/nfa/mcclellancompile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -59,6 +59,7 @@ struct escape_info { CharReach outs2_single; flat_set> outs2; bool outs2_broken = false; + u32 offset = 0; }; class dfa_build_strat { @@ -70,10 +71,10 @@ public: std::vector &reports_eod /* out */, u8 *isSingleReport /* out */, ReportID *arbReport /* out */) const = 0; - virtual void find_escape_strings(dstate_id_t this_idx, - escape_info *out) const = 0; + virtual escape_info find_escape_strings(dstate_id_t this_idx) const = 0; virtual size_t accelSize(void) const = 0; - virtual void buildAccel(dstate_id_t this_idx, void *accel_out) = 0; + virtual void buildAccel(dstate_id_t this_idx, const escape_info &info, + void *accel_out) = 0; }; class mcclellan_build_strat : public dfa_build_strat { @@ -81,14 +82,15 @@ public: explicit mcclellan_build_strat(raw_dfa &r) : rdfa(r) {} raw_dfa &get_raw() const override { return rdfa; } std::unique_ptr gatherReports( - std::vector &reports /* out */, - std::vector &reports_eod /* out */, - u8 *isSingleReport /* out */, - ReportID *arbReport /* out */) const override; - void find_escape_strings(dstate_id_t this_idx, - escape_info *out) const override; + std::vector &reports /* out */, + std::vector &reports_eod /* out */, + u8 *isSingleReport /* out */, + ReportID *arbReport /* out */) const override; + escape_info find_escape_strings(dstate_id_t this_idx) const override; size_t accelSize(void) const override; - void buildAccel(dstate_id_t this_idx, void *accel_out) override; + void buildAccel(dstate_id_t this_idx,const escape_info &info, + void *accel_out) override; + virtual u32 max_allowed_offset_accel() const; private: raw_dfa &rdfa; diff --git a/src/nfa/mcclellancompile_accel.cpp b/src/nfa/mcclellancompile_accel.cpp new file mode 100644 index 00000000..12a05aaa --- /dev/null +++ b/src/nfa/mcclellancompile_accel.cpp @@ -0,0 +1,383 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "mcclellancompile_accel.h" + +#include "mcclellancompile_util.h" + +#include "grey.h" +#include "nfagraph/ng_limex_accel.h" +#include "util/charreach.h" +#include "util/container.h" +#include "util/dump_charclass.h" + +#include +#include + +#define PATHS_LIMIT 500 + +using namespace std; + +namespace ue2 { + +namespace { + +struct path { + vector reach; + dstate_id_t dest = DEAD_STATE; + explicit path(dstate_id_t base) : dest(base) {} +}; + +} + +static UNUSED +string describeClasses(const vector &v) { + std::ostringstream oss; + for (const auto &cr : v) { + describeClass(oss, cr); + } + return oss.str(); +} + +static +void dump_paths(const vector &paths) { + for (UNUSED const auto &p : paths) { + DEBUG_PRINTF("[%s] -> %u\n", describeClasses(p.reach).c_str(), p.dest); + } + DEBUG_PRINTF("%zu paths\n", paths.size()); +} + +static +bool is_useful_path(const vector &good, const path &p) { + for (const auto &g : good) { + assert(g.dest == p.dest); + assert(g.reach.size() <= p.reach.size()); + auto git = g.reach.rbegin(); + auto pit = p.reach.rbegin(); + + for (; git != g.reach.rend(); ++git, ++pit) { + if (!pit->isSubsetOf(*git)) { + goto next; + } + } + DEBUG_PRINTF("better: [%s] -> %u\n", + describeClasses(g.reach).c_str(), g.dest); + + return false; + next:; + } + + return true; +} + +static +path append(const path &orig, const CharReach &cr, u32 new_dest) { + path p(new_dest); + p.reach = orig.reach; + p.reach.push_back(cr); + + return p; +} + +static +void extend(const raw_dfa &rdfa, const path &p, + map > &all, + vector &out) { + dstate s = rdfa.states[p.dest]; + + if (!p.reach.empty() && p.reach.back().none()) { + out.push_back(p); + return; + } + + if (!s.reports.empty()) { + if (generates_callbacks(rdfa.kind)) { + out.push_back(p); + return; + } else { + path pp = append(p, CharReach(), p.dest); + all[p.dest].push_back(pp); + out.push_back(pp); + } + } + + if (!s.reports_eod.empty()) { + path pp = append(p, CharReach(), p.dest); + all[p.dest].push_back(pp); + out.push_back(pp); + } + + map dest; + for (unsigned i = 0; i < N_CHARS; i++) { + u32 succ = s.next[rdfa.alpha_remap[i]]; + dest[succ].set(i); + } + + for (const auto &e : dest) { + path pp = append(p, e.second, e.first); + if (!is_useful_path(all[e.first], pp)) { + DEBUG_PRINTF("not useful: [%s] -> %u\n", + describeClasses(pp.reach).c_str(), pp.dest); + continue; + } + + DEBUG_PRINTF("----good: [%s] -> %u\n", + describeClasses(pp.reach).c_str(), pp.dest); + all[e.first].push_back(pp); + out.push_back(pp); + } +} + +static +vector > generate_paths(const raw_dfa &rdfa, dstate_id_t base, + u32 len) { + vector paths{ path(base) }; + map > all; + all[base].push_back(path(base)); + for (u32 i = 0; i < len && paths.size() < PATHS_LIMIT; i++) { + vector next_gen; + for (const auto &p : paths) { + extend(rdfa, p, all, next_gen); + } + + paths = move(next_gen); + } + + dump_paths(paths); + + vector > rv; + for (auto &p : paths) { + rv.push_back(move(p.reach)); + } + return rv; +} + +escape_info look_for_offset_accel(const raw_dfa &rdfa, dstate_id_t base, + u32 max_allowed_accel_offset) { + DEBUG_PRINTF("looking for accel for %hu\n", base); + vector > paths = generate_paths(rdfa, base, + max_allowed_accel_offset + 1); + AccelScheme as = findBestAccelScheme(paths, CharReach()); + escape_info rv; + rv.outs2_broken = true; + rv.offset = as.offset; + rv.outs = as.cr; + DEBUG_PRINTF("found %s + %u\n", describeClass(as.cr).c_str(), as.offset); + return rv; +} + + +static +vector find_nonexit_symbols(const raw_dfa &rdfa, + const CharReach &escape) { + set rv; + CharReach nonexit = ~escape; + for (auto i = nonexit.find_first(); i != CharReach::npos; + i = nonexit.find_next(i)) { + rv.insert(rdfa.alpha_remap[i]); + } + + return vector(rv.begin(), rv.end()); +} + +static +set find_region(const raw_dfa &rdfa, dstate_id_t base, + const escape_info &ei) { + DEBUG_PRINTF("looking for region around %hu\n", base); + + set region = {base}; + + if (!ei.outs2_broken) { + return region; + } + + DEBUG_PRINTF("accel %s+%u\n", describeClass(ei.outs).c_str(), ei.offset); + + const CharReach &escape = ei.outs; + auto nonexit_symbols = find_nonexit_symbols(rdfa, escape); + + vector pending = {base}; + while (!pending.empty()) { + dstate_id_t curr = pending.back(); + pending.pop_back(); + for (auto s : nonexit_symbols) { + dstate_id_t t = rdfa.states[curr].next[s]; + if (contains(region, t)) { + continue; + } + + DEBUG_PRINTF(" %hu is in region\n", t); + region.insert(t); + pending.push_back(t); + } + } + + return region; +} + +static +bool better(const escape_info &a, const escape_info &b) { + if (!a.outs2_broken && b.outs2_broken) { + return true; + } + + if (!b.outs2_broken) { + return false; + } + + return a.outs.count() < b.outs.count(); +} + +map populateAccelerationInfo(const raw_dfa &rdfa, + const dfa_build_strat &strat, + const Grey &grey) { + map rv; + if (!grey.accelerateDFA) { + return rv; + } + + dstate_id_t sds_proxy = get_sds_or_proxy(rdfa); + DEBUG_PRINTF("sds %hu\n", sds_proxy); + + for (size_t i = 0; i < rdfa.states.size(); i++) { + escape_info ei = strat.find_escape_strings(i); + + if (i == DEAD_STATE) { + continue; + } + + /* Note on report acceleration states: While we can't accelerate while we + * are spamming out callbacks, the QR code paths don't raise reports + * during scanning so they can accelerate report states. */ + if (generates_callbacks(rdfa.kind) + && !rdfa.states[i].reports.empty()) { + continue; + } + + size_t single_limit = i == sds_proxy ? ACCEL_DFA_MAX_FLOATING_STOP_CHAR + : ACCEL_DFA_MAX_STOP_CHAR; + DEBUG_PRINTF("inspecting %zu/%hu: %zu\n", i, sds_proxy, single_limit); + + if (ei.outs.count() > single_limit) { + DEBUG_PRINTF("state %zu is not accelerable has %zu\n", i, + ei.outs.count()); + continue; + } + + DEBUG_PRINTF("state %zu should be accelerable %zu\n", + i, ei.outs.count()); + + rv[i] = ei; + } + + /* provide accleration states to states in the region of sds */ + if (contains(rv, sds_proxy)) { + auto sds_region = find_region(rdfa, sds_proxy, rv[sds_proxy]); + for (auto s : sds_region) { + if (!contains(rv, s) || better(rv[sds_proxy], rv[s])) { + rv[s] = rv[sds_proxy]; + } + } + } + + return rv; +} + +static +bool double_byte_ok(const escape_info &info) { + return !info.outs2_broken + && info.outs2_single.count() + info.outs2.size() <= 8 + && info.outs2_single.count() < info.outs2.size() + && info.outs2_single.count() <= 2 && !info.outs2.empty(); +} + +escape_info find_mcclellan_escape_info(const raw_dfa &rdfa, + dstate_id_t this_idx, + u32 max_allowed_accel_offset) { + escape_info rv; + const dstate &raw = rdfa.states[this_idx]; + const auto &alpha_remap = rdfa.alpha_remap; + + flat_set> outs2_local; + for (unsigned i = 0; i < N_CHARS; i++) { + outs2_local.clear(); + + if (raw.next[alpha_remap[i]] != this_idx) { + rv.outs.set(i); + + DEBUG_PRINTF("next is %hu\n", raw.next[alpha_remap[i]]); + const dstate &raw_next = rdfa.states[raw.next[alpha_remap[i]]]; + + if (!raw_next.reports.empty() && generates_callbacks(rdfa.kind)) { + DEBUG_PRINTF("leads to report\n"); + rv.outs2_broken = true; /* cannot accelerate over reports */ + } + + for (unsigned j = 0; !rv.outs2_broken && j < N_CHARS; j++) { + if (raw_next.next[alpha_remap[j]] == raw.next[alpha_remap[j]]) { + continue; + } + + DEBUG_PRINTF("adding %02x %02x -> %hu to 2 \n", i, j, + raw_next.next[alpha_remap[j]]); + outs2_local.emplace((u8)i, (u8)j); + } + + if (outs2_local.size() > 8) { + DEBUG_PRINTF("adding %02x to outs2_single\n", i); + rv.outs2_single.set(i); + } else { + insert(&rv.outs2, outs2_local); + } + if (rv.outs2.size() > 8) { + DEBUG_PRINTF("outs2 too big\n"); + rv.outs2_broken = true; + } + } + } + + DEBUG_PRINTF("this %u, sds proxy %hu\n", this_idx, get_sds_or_proxy(rdfa)); + DEBUG_PRINTF("broken %d\n", rv.outs2_broken); + if (!double_byte_ok(rv) && !is_triggered(rdfa.kind) + && this_idx == rdfa.start_floating + && this_idx != DEAD_STATE) { + DEBUG_PRINTF("looking for offset accel at %u\n", this_idx); + auto offset = look_for_offset_accel(rdfa, this_idx, + max_allowed_accel_offset); + DEBUG_PRINTF("width %zu vs %zu\n", offset.outs.count(), + rv.outs.count()); + if (offset.outs.count() < rv.outs.count()) { + DEBUG_PRINTF("using offset accel\n"); + rv = offset; + } + } + + return rv; +} + +} diff --git a/src/nfa/mcclellancompile_accel.h b/src/nfa/mcclellancompile_accel.h new file mode 100644 index 00000000..1e14c2cd --- /dev/null +++ b/src/nfa/mcclellancompile_accel.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MCCLELLANCOMPILE_ACCEL_H +#define MCCLELLANCOMPILE_ACCEL_H + +#include "mcclellancompile.h" + +#include + +namespace ue2 { + +struct Grey; + +#define ACCEL_DFA_MAX_OFFSET_DEPTH 4 + +/** Maximum tolerated number of escape character from an accel state. + * This is larger than nfa, as we don't have a budget and the nfa cheats on stop + * characters for sets of states */ +#define ACCEL_DFA_MAX_STOP_CHAR 160 + +/** Maximum tolerated number of escape character from a sds accel state. Larger + * than normal states as accelerating sds is important. Matches NFA value */ +#define ACCEL_DFA_MAX_FLOATING_STOP_CHAR 192 + +escape_info look_for_offset_accel(const raw_dfa &rdfa, dstate_id_t base, + u32 max_allowed_accel_offset); + +std::map populateAccelerationInfo(const raw_dfa &rdfa, + const dfa_build_strat &strat, + const Grey &grey); + +escape_info find_mcclellan_escape_info(const raw_dfa &rdfa, + dstate_id_t this_idx, + u32 max_allowed_accel_offset); + +} + +#endif diff --git a/src/nfa/mcclellancompile_util.cpp b/src/nfa/mcclellancompile_util.cpp index cd85ef36..2c946520 100644 --- a/src/nfa/mcclellancompile_util.cpp +++ b/src/nfa/mcclellancompile_util.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -334,4 +334,63 @@ size_t hash_dfa(const raw_dfa &rdfa) { return v; } +static +bool has_self_loop(dstate_id_t s, const raw_dfa &raw) { + u16 top_remap = raw.alpha_remap[TOP]; + for (u32 i = 0; i < raw.states[s].next.size(); i++) { + if (i != top_remap && raw.states[s].next[i] == s) { + return true; + } + } + return false; +} + +dstate_id_t get_sds_or_proxy(const raw_dfa &raw) { + if (raw.start_floating != DEAD_STATE) { + DEBUG_PRINTF("has floating start\n"); + return raw.start_floating; + } + + DEBUG_PRINTF("looking for SDS proxy\n"); + + dstate_id_t s = raw.start_anchored; + + if (has_self_loop(s, raw)) { + return s; + } + + u16 top_remap = raw.alpha_remap[TOP]; + + ue2::unordered_set seen; + while (true) { + seen.insert(s); + DEBUG_PRINTF("basis %hu\n", s); + + /* check if we are connected to a state with a self loop */ + for (u32 i = 0; i < raw.states[s].next.size(); i++) { + dstate_id_t t = raw.states[s].next[i]; + if (i != top_remap && t != DEAD_STATE && has_self_loop(t, raw)) { + return t; + } + } + + /* find a neighbour to use as a basis for looking for the sds proxy */ + dstate_id_t t = DEAD_STATE; + for (u32 i = 0; i < raw.states[s].next.size(); i++) { + dstate_id_t tt = raw.states[s].next[i]; + if (i != top_remap && tt != DEAD_STATE && !contains(seen, tt)) { + t = tt; + break; + } + } + + if (t == DEAD_STATE) { + /* we were unable to find a state to use as a SDS proxy */ + return DEAD_STATE; + } + + s = t; + } +} + } // namespace ue2 diff --git a/src/nfa/mcclellancompile_util.h b/src/nfa/mcclellancompile_util.h index 183abcaa..7015893b 100644 --- a/src/nfa/mcclellancompile_util.h +++ b/src/nfa/mcclellancompile_util.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -29,14 +29,13 @@ #ifndef MCCLELLAN_COMPILE_UTIL_H #define MCCLELLAN_COMPILE_UTIL_H +#include "rdfa.h" #include "ue2common.h" #include namespace ue2 { -struct raw_dfa; - u32 remove_leading_dots(raw_dfa &raw); void prune_overlong(raw_dfa &raw, u32 max_offset); std::set all_reports(const raw_dfa &rdfa); @@ -50,6 +49,8 @@ size_t hash_dfa_no_reports(const raw_dfa &rdfa); /** \brief Compute a simple hash of this raw_dfa, including its reports. */ size_t hash_dfa(const raw_dfa &rdfa); +dstate_id_t get_sds_or_proxy(const raw_dfa &raw); + } // namespace ue2 #endif diff --git a/src/nfagraph/ng_limex_accel.cpp b/src/nfagraph/ng_limex_accel.cpp index ed9f5bfe..41eda35d 100644 --- a/src/nfagraph/ng_limex_accel.cpp +++ b/src/nfagraph/ng_limex_accel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -464,16 +464,13 @@ void dumpPaths(const vector > &paths) { #endif static -void blowoutPathsLessStrictSegment(vector > *paths) { +void blowoutPathsLessStrictSegment(vector > &paths) { /* paths segments which are a superset of an earlier segment should never be * picked as an acceleration segment -> to improve processing just replace * with dot */ - for (vector >::iterator p = paths->begin(); - p != paths->end(); ++p) { - for (vector::iterator it = p->begin(); it != p->end(); - ++it) { - vector::iterator jt = it; - for (++jt; jt != p->end(); ++jt) { + for (auto &p : paths) { + for (auto it = p.begin(); it != p.end(); ++it) { + for (auto jt = next(it); jt != p.end(); ++jt) { if (it->isSubsetOf(*jt)) { *jt = CharReach::dot(); } @@ -483,10 +480,10 @@ void blowoutPathsLessStrictSegment(vector > *paths) { } static -void unifyPathsLastSegment(vector > *paths) { +void unifyPathsLastSegment(vector > &paths) { /* try to unify paths which only differ in the last segment */ - for (vector >::iterator p = paths->begin(); - p != paths->end() && p + 1 != paths->end();) { + for (vector >::iterator p = paths.begin(); + p != paths.end() && p + 1 != paths.end();) { vector &a = *p; vector &b = *(p + 1); @@ -504,7 +501,7 @@ void unifyPathsLastSegment(vector > *paths) { if (i == a.size() - 1) { /* we can unify these paths */ a[i] |= b[i]; - paths->erase(p + 1); + paths.erase(p + 1); } else { ++p; } @@ -512,23 +509,59 @@ void unifyPathsLastSegment(vector > *paths) { } static -void improvePaths(vector > *paths) { +void improvePaths(vector > &paths) { #ifdef DEBUG DEBUG_PRINTF("orig paths\n"); - dumpPaths(*paths); + dumpPaths(paths); #endif blowoutPathsLessStrictSegment(paths); - sort(paths->begin(), paths->end()); + sort(paths.begin(), paths.end()); unifyPathsLastSegment(paths); #ifdef DEBUG DEBUG_PRINTF("opt paths\n"); - dumpPaths(*paths); + dumpPaths(paths); #endif } +AccelScheme findBestAccelScheme(vector > paths, + const CharReach &terminating) { + improvePaths(paths); + + DEBUG_PRINTF("we have %zu paths\n", paths.size()); + if (paths.size() > 40) { + return AccelScheme(); /* too many paths to explore */ + } + + /* if we were smart we would do something netflowy on the paths to find the + * best cut. But we aren't, so we will just brute force it. + */ + AccelScheme curr(terminating, 0U); + AccelScheme best; + findBest(paths.begin(), paths.end(), curr, &best); + + /* find best is a bit lazy in terms of minimising the offset, see if we can + * make it better. need to find the min max offset that we need.*/ + u32 offset = 0; + for (vector >::iterator p = paths.begin(); + p != paths.end(); ++p) { + u32 i = 0; + for (vector::iterator it = p->begin(); it != p->end(); + ++it, i++) { + if (it->isSubsetOf(best.cr)) { + break; + } + } + offset = MAX(offset, i); + } + assert(offset <= best.offset); + best.offset = offset; + + return best; +} + AccelScheme nfaFindAccel(const NGHolder &g, const vector &verts, const vector &refined_cr, const map &br_cyclic, @@ -579,36 +612,7 @@ AccelScheme nfaFindAccel(const NGHolder &g, const vector &verts, reverse(it->begin(), it->end()); } - improvePaths(&paths); - DEBUG_PRINTF("we have %zu paths\n", paths.size()); - if (paths.size() > 40) { - return AccelScheme(); /* too many paths to explore */ - } - - /* if we were smart we would do something netflowy on the paths to find the - * best cut. But we aren't, so we will just brute force it. - */ - AccelScheme curr(terminating, 0U); - AccelScheme best; - findBest(paths.begin(), paths.end(), curr, &best); - - /* find best is a bit lazy in terms of minimising the offset, see if we can - * make it better. need to find the min max offset that we need.*/ - u32 offset = 0; - for (vector >::iterator p = paths.begin(); - p != paths.end(); ++p) { - u32 i = 0; - for (vector::iterator it = p->begin(); it != p->end(); - ++it, i++) { - if (it->isSubsetOf(best.cr)) { - break; - } - } - offset = MAX(offset, i); - } - assert(offset <= best.offset); - best.offset = offset; - return best; + return findBestAccelScheme(std::move(paths), terminating); } NFAVertex get_sds_or_proxy(const NGHolder &g) { diff --git a/src/nfagraph/ng_limex_accel.h b/src/nfagraph/ng_limex_accel.h index b9dba2e1..113b216c 100644 --- a/src/nfagraph/ng_limex_accel.h +++ b/src/nfagraph/ng_limex_accel.h @@ -110,6 +110,9 @@ AccelScheme nfaFindAccel(const NGHolder &g, const std::vector &verts, const std::map &br_cyclic, bool allow_wide); +AccelScheme findBestAccelScheme(std::vector > paths, + const CharReach &terminating); + /** \brief Check if vertex \a v is an accelerable state (for a limex NFA). */ bool nfaCheckAccel(const NGHolder &g, NFAVertex v, const std::vector &refined_cr, diff --git a/src/util/dump_charclass.h b/src/util/dump_charclass.h index d2a71880..9c3362bc 100644 --- a/src/util/dump_charclass.h +++ b/src/util/dump_charclass.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -48,8 +48,8 @@ enum cc_output_t { class CharReach; -void describeClass(std::ostream &os, const CharReach &cr, size_t maxLength, - enum cc_output_t out_type); +void describeClass(std::ostream &os, const CharReach &cr, size_t maxLength = 16, + enum cc_output_t out_type = CC_OUT_TEXT); std::string describeClass(const CharReach &cr, size_t maxLength = 16, enum cc_output_t out_type = CC_OUT_TEXT);