diff --git a/CMakeLists.txt b/CMakeLists.txt
index 36267fc6..e1bd2794 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -596,6 +596,8 @@ SET (hs_SRCS
     src/nfa/mcclellan_internal.h
     src/nfa/mcclellancompile.cpp
     src/nfa/mcclellancompile.h
+    src/nfa/mcclellancompile_accel.cpp
+    src/nfa/mcclellancompile_accel.h
     src/nfa/mcclellancompile_util.cpp
     src/nfa/mcclellancompile_util.h
     src/nfa/limex_compile.cpp
diff --git a/src/nfa/goughcompile.cpp b/src/nfa/goughcompile.cpp
index d735c80a..2ad3c6dd 100644
--- a/src/nfa/goughcompile.cpp
+++ b/src/nfa/goughcompile.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -85,10 +85,11 @@ public:
                             vector<u32> &reports_eod /* out */,
                             u8 *isSingleReport /* out */,
                             ReportID *arbReport  /* out */) const override;
-    void find_escape_strings(dstate_id_t this_idx,
-                             escape_info *out) const override;
+    escape_info find_escape_strings(dstate_id_t this_idx) const override;
     size_t accelSize(void) const override { return sizeof(gough_accel); }
-    void buildAccel(dstate_id_t this_idx, void *accel_out) override;
+    void buildAccel(dstate_id_t this_idx, const escape_info &info,
+                    void *accel_out) override;
+    u32 max_allowed_offset_accel() const override { return 0; }
 
     raw_som_dfa &rdfa;
     const GoughGraph &gg;
@@ -1145,32 +1146,43 @@ aligned_unique_ptr<NFA> goughCompile(raw_som_dfa &raw, u8 somPrecision,
     return gough_dfa;
 }
 
-void gough_build_strat::find_escape_strings(dstate_id_t this_idx,
-                                            escape_info *out) const {
+escape_info gough_build_strat::find_escape_strings(dstate_id_t this_idx) const {
+    escape_info rv;
     if (!contains(accel_gough_info, this_idx)) {
-        out->outs = CharReach::dot();
-        out->outs2_broken = true;
-        return;
+        rv.outs = CharReach::dot();
+        rv.outs2_broken = true;
+        return rv;
     }
 
-    mcclellan_build_strat::find_escape_strings(this_idx, out);
+    rv = mcclellan_build_strat::find_escape_strings(this_idx);
+
+    assert(!rv.offset); /* should have been limited by strat */
+    if (rv.offset) {
+        rv.outs = CharReach::dot();
+        rv.outs2_broken = true;
+        return rv;
+    }
 
     if (!accel_gough_info.at(this_idx).two_byte) {
-        out->outs2_broken = true;
+        rv.outs2_broken = true;
     }
+
+    return rv;
 }
 
-void gough_build_strat::buildAccel(dstate_id_t this_idx, void *accel_out) {
+void gough_build_strat::buildAccel(dstate_id_t this_idx, const escape_info &info,
+                                   void *accel_out) {
     assert(mcclellan_build_strat::accelSize() == sizeof(AccelAux));
     gough_accel *accel = (gough_accel *)accel_out;
     /* build a plain accelaux so we can work out where we can get to */
-    mcclellan_build_strat::buildAccel(this_idx, &accel->accel);
+    mcclellan_build_strat::buildAccel(this_idx, info, &accel->accel);
     DEBUG_PRINTF("state %hu is accel with type %hhu\n", this_idx,
                  accel->accel.accel_type);
     if (accel->accel.accel_type == ACCEL_NONE) {
         return;
     }
 
+    assert(!accel->accel.generic.offset);
     assert(contains(accel_gough_info, this_idx));
     accel->margin_dist = verify_u8(accel_gough_info.at(this_idx).margin);
     built_accel[accel] = this_idx;
diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp
index f75d08b5..9b21b8c4 100644
--- a/src/nfa/mcclellancompile.cpp
+++ b/src/nfa/mcclellancompile.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -31,6 +31,8 @@
 #include "accel.h"
 #include "grey.h"
 #include "mcclellan_internal.h"
+#include "mcclellancompile_accel.h"
+#include "mcclellancompile_util.h"
 #include "nfa_internal.h"
 #include "shufticompile.h"
 #include "trufflecompile.h"
@@ -56,25 +58,18 @@
 #include <set>
 #include <vector>
 
+#include <boost/range/adaptor/map.hpp>
+
 using namespace std;
+using boost::adaptors::map_keys;
 
 namespace ue2 {
 
-/* compile time accel defs */
-#define ACCEL_MAX_STOP_CHAR 160 /* larger than nfa, as we don't have a budget
-                                   and the nfa cheats on stop characters for
-                                   sets of states */
-#define ACCEL_MAX_FLOATING_STOP_CHAR 192 /* accelerating sds is important */
-
-
 namespace /* anon */ {
 
 struct dstate_extra {
-    u16 daddytaken;
-    bool shermanState;
-    bool accelerable;
-    dstate_extra(void) : daddytaken(0), shermanState(false),
-                         accelerable(false) {}
+    u16 daddytaken = 0;
+    bool shermanState = false;
 };
 
 struct dfa_info {
@@ -105,10 +100,6 @@ struct dfa_info {
         return extra[raw_id].shermanState;
     }
 
-    bool is_accel(dstate_id_t raw_id) const {
-        return extra[raw_id].accelerable;
-    }
-
     size_t size(void) const { return states.size(); }
 };
 
@@ -135,6 +126,14 @@ mstate_aux *getAux(NFA *n, dstate_id_t i) {
     return aux;
 }
 
+static
+bool double_byte_ok(const escape_info &info) {
+    return !info.outs2_broken
+        && info.outs2_single.count() + info.outs2.size() <= 8
+        && info.outs2_single.count() < info.outs2.size()
+        && info.outs2_single.count() <= 2 && !info.outs2.empty();
+}
+
 static
 void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) {
     assert((size_t)succ_table % 2 == 0);
@@ -186,75 +185,43 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) {
     }
 }
 
-void mcclellan_build_strat::find_escape_strings(dstate_id_t this_idx,
-                                                escape_info *out) const {
-    const dstate &raw = rdfa.states[this_idx];
-    const auto &alpha_remap = rdfa.alpha_remap;
+u32 mcclellan_build_strat::max_allowed_offset_accel() const {
+    return ACCEL_DFA_MAX_OFFSET_DEPTH;
+}
 
-    flat_set<pair<u8, u8>> outs2_local;
-    for (unsigned i = 0; i < N_CHARS; i++) {
-        outs2_local.clear();
-
-        if (raw.next[alpha_remap[i]] != this_idx) {
-            out->outs.set(i);
-
-            DEBUG_PRINTF("next is %hu\n", raw.next[alpha_remap[i]]);
-            const dstate &raw_next = rdfa.states[raw.next[alpha_remap[i]]];
-
-            if (!raw_next.reports.empty() && generates_callbacks(rdfa.kind)) {
-                DEBUG_PRINTF("leads to report\n");
-                out->outs2_broken = true;  /* cannot accelerate over reports */
-            }
-
-            for (unsigned j = 0; !out->outs2_broken && j < N_CHARS; j++) {
-                if (raw_next.next[alpha_remap[j]] == raw.next[alpha_remap[j]]) {
-                    continue;
-                }
-
-                DEBUG_PRINTF("adding %02x %02x -> %hu to 2 \n", i, j,
-                             raw_next.next[alpha_remap[j]]);
-                outs2_local.emplace((u8)i, (u8)j);
-            }
-
-            if (outs2_local.size() > 8) {
-                DEBUG_PRINTF("adding %02x to outs2_single\n", i);
-                out->outs2_single.set(i);
-            } else {
-                insert(&out->outs2, outs2_local);
-            }
-            if (out->outs2.size() > 8) {
-                DEBUG_PRINTF("outs2 too big\n");
-                out->outs2_broken = true;
-            }
-        }
-    }
+escape_info mcclellan_build_strat::find_escape_strings(dstate_id_t this_idx)
+    const {
+    return find_mcclellan_escape_info(rdfa, this_idx,
+                                      max_allowed_offset_accel());
 }
 
 /** builds acceleration schemes for states */
-void mcclellan_build_strat::buildAccel(dstate_id_t this_idx, void *accel_out) {
+void mcclellan_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
+                                       const escape_info &info,
+                                       void *accel_out) {
     AccelAux *accel = (AccelAux *)accel_out;
-    escape_info out;
 
-    find_escape_strings(this_idx, &out);
+    DEBUG_PRINTF("accelerations scheme has offset %u\n", info.offset);
+    accel->generic.offset = verify_u8(info.offset);
 
-    if (!out.outs2_broken && out.outs2_single.none()
-        && out.outs2.size() == 1) {
+    if (double_byte_ok(info) && info.outs2_single.none()
+        && info.outs2.size() == 1) {
         accel->accel_type = ACCEL_DVERM;
-        accel->dverm.c1 = out.outs2.begin()->first;
-        accel->dverm.c2 = out.outs2.begin()->second;
+        accel->dverm.c1 = info.outs2.begin()->first;
+        accel->dverm.c2 = info.outs2.begin()->second;
         DEBUG_PRINTF("state %hu is double vermicelli\n", this_idx);
         return;
     }
 
-    if (!out.outs2_broken && out.outs2_single.none()
-        && (out.outs2.size() == 2 || out.outs2.size() == 4)) {
+    if (double_byte_ok(info) && info.outs2_single.none()
+        && (info.outs2.size() == 2 || info.outs2.size() == 4)) {
         bool ok = true;
 
-        assert(!out.outs2.empty());
-        u8 firstC = out.outs2.begin()->first & CASE_CLEAR;
-        u8 secondC = out.outs2.begin()->second & CASE_CLEAR;
+        assert(!info.outs2.empty());
+        u8 firstC = info.outs2.begin()->first & CASE_CLEAR;
+        u8 secondC = info.outs2.begin()->second & CASE_CLEAR;
 
-        for (const pair<u8, u8> &p : out.outs2) {
+        for (const pair<u8, u8> &p : info.outs2) {
             if ((p.first & CASE_CLEAR) != firstC
              || (p.second & CASE_CLEAR) != secondC) {
                 ok = false;
@@ -271,12 +238,9 @@ void mcclellan_build_strat::buildAccel(dstate_id_t this_idx, void *accel_out) {
         }
     }
 
-    if (!out.outs2_broken &&
-        (out.outs2_single.count() + out.outs2.size()) <= 8 &&
-        out.outs2_single.count() < out.outs2.size() &&
-        out.outs2_single.count() <= 2 && !out.outs2.empty()) {
+    if (double_byte_ok(info)) {
         accel->accel_type = ACCEL_DSHUFTI;
-        shuftiBuildDoubleMasks(out.outs2_single, out.outs2,
+        shuftiBuildDoubleMasks(info.outs2_single, info.outs2,
                                &accel->dshufti.lo1,
                                &accel->dshufti.hi1,
                                &accel->dshufti.lo2,
@@ -285,166 +249,46 @@ void mcclellan_build_strat::buildAccel(dstate_id_t this_idx, void *accel_out) {
         return;
     }
 
-    if (out.outs.none()) {
+    if (info.outs.none()) {
         accel->accel_type = ACCEL_RED_TAPE;
         DEBUG_PRINTF("state %hu is a dead end full of bureaucratic red tape"
                      " from which there is no escape\n", this_idx);
         return;
     }
 
-    if (out.outs.count() == 1) {
+    if (info.outs.count() == 1) {
         accel->accel_type = ACCEL_VERM;
-        accel->verm.c = out.outs.find_first();
+        accel->verm.c = info.outs.find_first();
         DEBUG_PRINTF("state %hu is vermicelli\n", this_idx);
         return;
     }
 
-    if (out.outs.count() == 2 && out.outs.isCaselessChar()) {
+    if (info.outs.count() == 2 && info.outs.isCaselessChar()) {
         accel->accel_type = ACCEL_VERM_NOCASE;
-        accel->verm.c = out.outs.find_first() & CASE_CLEAR;
+        accel->verm.c = info.outs.find_first() & CASE_CLEAR;
         DEBUG_PRINTF("state %hu is caseless vermicelli\n", this_idx);
         return;
     }
 
-    if (out.outs.count() > ACCEL_MAX_FLOATING_STOP_CHAR) {
+    if (info.outs.count() > ACCEL_DFA_MAX_FLOATING_STOP_CHAR) {
         accel->accel_type = ACCEL_NONE;
         DEBUG_PRINTF("state %hu is too broad\n", this_idx);
         return;
     }
 
     accel->accel_type = ACCEL_SHUFTI;
-    if (-1 != shuftiBuildMasks(out.outs, &accel->shufti.lo,
+    if (-1 != shuftiBuildMasks(info.outs, &accel->shufti.lo,
                                &accel->shufti.hi)) {
         DEBUG_PRINTF("state %hu is shufti\n", this_idx);
         return;
     }
 
-    assert(!out.outs.none());
+    assert(!info.outs.none());
     accel->accel_type = ACCEL_TRUFFLE;
-    truffleBuildMasks(out.outs, &accel->truffle.mask1, &accel->truffle.mask2);
+    truffleBuildMasks(info.outs, &accel->truffle.mask1, &accel->truffle.mask2);
     DEBUG_PRINTF("state %hu is truffle\n", this_idx);
 }
 
-static
-bool is_accel(const raw_dfa &raw, dstate_id_t sds_or_proxy,
-              dstate_id_t this_idx) {
-    if (!this_idx /* dead state is not accelerable */) {
-        return false;
-    }
-
-    /* Note on report acceleration states: While we can't accelerate while we
-     * are spamming out callbacks, the QR code paths don't raise reports
-     * during scanning so they can accelerate report states. */
-
-    if (generates_callbacks(raw.kind)
-        && !raw.states[this_idx].reports.empty()) {
-        return false;
-    }
-
-    size_t single_limit = this_idx == sds_or_proxy ?
-                             ACCEL_MAX_FLOATING_STOP_CHAR : ACCEL_MAX_STOP_CHAR;
-    DEBUG_PRINTF("inspecting %hu/%hu: %zu\n", this_idx, sds_or_proxy,
-                  single_limit);
-
-    CharReach out;
-    for (u32 i = 0; i < N_CHARS; i++) {
-        if (raw.states[this_idx].next[raw.alpha_remap[i]] != this_idx) {
-            out.set(i);
-        }
-    }
-
-    if (out.count() <= single_limit) {
-        DEBUG_PRINTF("state %hu should be accelerable %zu\n", this_idx,
-                     out.count());
-        return true;
-    }
-
-    DEBUG_PRINTF("state %hu is not accelerable has %zu\n", this_idx,
-                  out.count());
-
-    return false;
-}
-
-static
-bool has_self_loop(dstate_id_t s, const raw_dfa &raw) {
-    u16 top_remap = raw.alpha_remap[TOP];
-    for (u32 i = 0; i < raw.states[s].next.size(); i++) {
-        if (i != top_remap && raw.states[s].next[i] == s) {
-            return true;
-        }
-    }
-    return false;
-}
-
-static
-dstate_id_t get_sds_or_proxy(const raw_dfa &raw) {
-    if (raw.start_floating != DEAD_STATE) {
-        DEBUG_PRINTF("has floating start\n");
-        return raw.start_floating;
-    }
-
-    DEBUG_PRINTF("looking for SDS proxy\n");
-
-    dstate_id_t s = raw.start_anchored;
-
-    if (has_self_loop(s, raw)) {
-        return s;
-    }
-
-    u16 top_remap = raw.alpha_remap[TOP];
-
-    ue2::unordered_set<dstate_id_t> seen;
-    while (true) {
-        seen.insert(s);
-        DEBUG_PRINTF("basis %hu\n", s);
-
-        /* check if we are connected to a state with a self loop */
-        for (u32 i = 0; i < raw.states[s].next.size(); i++) {
-            dstate_id_t t = raw.states[s].next[i];
-            if (i != top_remap && t != DEAD_STATE && has_self_loop(t, raw)) {
-                return t;
-            }
-        }
-
-        /* find a neighbour to use as a basis for looking for the sds proxy */
-        dstate_id_t t = DEAD_STATE;
-        for (u32 i = 0; i < raw.states[s].next.size(); i++) {
-            dstate_id_t tt = raw.states[s].next[i];
-            if (i != top_remap && tt != DEAD_STATE && !contains(seen, tt)) {
-                t = tt;
-                break;
-            }
-        }
-
-        if (t == DEAD_STATE) {
-            /* we were unable to find a state to use as a SDS proxy */
-            return DEAD_STATE;
-        }
-
-        s = t;
-        seen.insert(t);
-    }
-}
-
-static
-void populateAccelerationInfo(dfa_info &info, u32 *ac, const Grey &grey) {
-    *ac = 0; /* number of accelerable states */
-
-    if (!grey.accelerateDFA) {
-        return;
-    }
-
-    dstate_id_t sds_proxy = get_sds_or_proxy(info.raw);
-    DEBUG_PRINTF("sds %hu\n", sds_proxy);
-
-    for (size_t i = 0; i < info.size(); i++) {
-        if (is_accel(info.raw, sds_proxy, i)) {
-            ++*ac;
-            info.extra[i].accelerable = true;
-        }
-    }
-}
-
 static
 void populateBasicInfo(size_t state_size, const dfa_info &info,
                        u32 total_size, u32 aux_offset, u32 accel_offset,
@@ -625,6 +469,14 @@ void raw_report_info_impl::fillReportLists(NFA *n, size_t base_offset,
     }
 }
 
+static
+void fillAccelOut(const map<dstate_id_t, escape_info> &accel_escape_info,
+                  set<dstate_id_t> *accel_states) {
+    for (dstate_id_t i : accel_escape_info | map_keys) {
+        accel_states->insert(i);
+    }
+}
+
 static
 size_t calcShermanRegionSize(const dfa_info &info) {
     size_t rv = 0;
@@ -692,14 +544,14 @@ int allocateFSN16(dfa_info &info, dstate_id_t *sherman_base) {
 
 static
 aligned_unique_ptr<NFA> mcclellanCompile16(dfa_info &info,
-                                           const CompileContext &cc) {
+                                           const CompileContext &cc,
+                                           set<dstate_id_t> *accel_states) {
     DEBUG_PRINTF("building mcclellan 16\n");
 
     vector<u32> reports; /* index in ri for the appropriate report list */
     vector<u32> reports_eod; /* as above */
     ReportID arb;
     u8 single;
-    u32 accelCount;
 
     u8 alphaShift = info.getAlphaShift();
     assert(alphaShift <= 8);
@@ -713,7 +565,8 @@ aligned_unique_ptr<NFA> mcclellanCompile16(dfa_info &info,
 
     unique_ptr<raw_report_info> ri
         = info.strat.gatherReports(reports, reports_eod, &single, &arb);
-    populateAccelerationInfo(info, &accelCount, cc.grey);
+    map<dstate_id_t, escape_info> accel_escape_info
+        = populateAccelerationInfo(info.raw, info.strat, cc.grey);
 
     size_t tran_size = (1 << info.getAlphaShift())
         * sizeof(u16) * count_real_states;
@@ -721,7 +574,7 @@ aligned_unique_ptr<NFA> mcclellanCompile16(dfa_info &info,
     size_t aux_size = sizeof(mstate_aux) * info.size();
 
     size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcclellan) + tran_size);
-    size_t accel_size = info.strat.accelSize() * accelCount;
+    size_t accel_size = info.strat.accelSize() * accel_escape_info.size();
     size_t accel_offset = ROUNDUP_N(aux_offset + aux_size
                                     + ri->getReportListSize(), 32);
     size_t sherman_offset = ROUNDUP_16(accel_offset + accel_size);
@@ -736,7 +589,7 @@ aligned_unique_ptr<NFA> mcclellanCompile16(dfa_info &info,
     char *nfa_base = (char *)nfa.get();
 
     populateBasicInfo(sizeof(u16), info, total_size, aux_offset, accel_offset,
-                      accelCount, arb, single, nfa.get());
+                      accel_escape_info.size(), arb, single, nfa.get());
 
     vector<u32> reportOffsets;
 
@@ -769,12 +622,12 @@ aligned_unique_ptr<NFA> mcclellanCompile16(dfa_info &info,
 
         fillInAux(&aux[fs], i, info, reports, reports_eod, reportOffsets);
 
-        if (info.is_accel(i)) {
+        if (contains(accel_escape_info, i)) {
             this_aux->accel_offset = accel_offset;
             accel_offset += info.strat.accelSize();
             assert(accel_offset + sizeof(NFA) <= sherman_offset);
             assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
-            info.strat.buildAccel(i,
+            info.strat.buildAccel(i, accel_escape_info.at(i),
                                   (void *)((char *)m + this_aux->accel_offset));
         }
     }
@@ -798,12 +651,12 @@ aligned_unique_ptr<NFA> mcclellanCompile16(dfa_info &info,
 
         fillInAux(this_aux, i, info, reports, reports_eod, reportOffsets);
 
-        if (info.is_accel(i)) {
+        if (contains(accel_escape_info, i)) {
             this_aux->accel_offset = accel_offset;
             accel_offset += info.strat.accelSize();
             assert(accel_offset + sizeof(NFA) <= sherman_offset);
             assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
-            info.strat.buildAccel(i,
+            info.strat.buildAccel(i, accel_escape_info.at(i),
                                   (void *)((char *)m + this_aux->accel_offset));
         }
 
@@ -836,6 +689,10 @@ aligned_unique_ptr<NFA> mcclellanCompile16(dfa_info &info,
 
     markEdges(nfa.get(), succ_table, info);
 
+    if (accel_states && nfa) {
+        fillAccelOut(accel_escape_info, accel_states);
+    }
+
     return nfa;
 }
 
@@ -874,7 +731,9 @@ void fillInBasicState8(const dfa_info &info, mstate_aux *aux, u8 *succ_table,
 }
 
 static
-void allocateFSN8(dfa_info &info, u16 *accel_limit, u16 *accept_limit) {
+void allocateFSN8(dfa_info &info,
+                  const map<dstate_id_t, escape_info> &accel_escape_info,
+                  u16 *accel_limit, u16 *accept_limit) {
     info.states[0].impl_id = 0; /* dead is always 0 */
 
     vector<dstate_id_t> norm;
@@ -886,7 +745,7 @@ void allocateFSN8(dfa_info &info, u16 *accel_limit, u16 *accept_limit) {
     for (u32 i = 1; i < info.size(); i++) {
         if (!info.states[i].reports.empty()) {
             accept.push_back(i);
-        } else if (info.is_accel(i)) {
+        } else if (contains(accel_escape_info, i)) {
             accel.push_back(i);
         } else {
             norm.push_back(i);
@@ -915,23 +774,24 @@ void allocateFSN8(dfa_info &info, u16 *accel_limit, u16 *accept_limit) {
 
 static
 aligned_unique_ptr<NFA> mcclellanCompile8(dfa_info &info,
-                                          const CompileContext &cc) {
+                                          const CompileContext &cc,
+                                          set<dstate_id_t> *accel_states) {
     DEBUG_PRINTF("building mcclellan 8\n");
 
     vector<u32> reports;
     vector<u32> reports_eod;
     ReportID arb;
     u8 single;
-    u32 accelCount;
 
     unique_ptr<raw_report_info> ri
         = info.strat.gatherReports(reports, reports_eod, &single, &arb);
-    populateAccelerationInfo(info, &accelCount, cc.grey);
+    map<dstate_id_t, escape_info> accel_escape_info
+        = populateAccelerationInfo(info.raw, info.strat, cc.grey);
 
     size_t tran_size = sizeof(u8) * (1 << info.getAlphaShift()) * info.size();
     size_t aux_size = sizeof(mstate_aux) * info.size();
     size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcclellan) + tran_size);
-    size_t accel_size = info.strat.accelSize() * accelCount;
+    size_t accel_size = info.strat.accelSize() * accel_escape_info.size();
     size_t accel_offset = ROUNDUP_N(aux_offset + aux_size
                                      + ri->getReportListSize(), 32);
     size_t total_size = accel_offset + accel_size;
@@ -951,9 +811,9 @@ aligned_unique_ptr<NFA> mcclellanCompile8(dfa_info &info,
 
     mcclellan *m = (mcclellan *)getMutableImplNfa(nfa.get());
 
-    allocateFSN8(info, &m->accel_limit_8, &m->accept_limit_8);
+    allocateFSN8(info, accel_escape_info, &m->accel_limit_8, &m->accept_limit_8);
     populateBasicInfo(sizeof(u8), info, total_size, aux_offset, accel_offset,
-                      accelCount, arb, single, nfa.get());
+                      accel_escape_info.size(), arb, single, nfa.get());
 
     vector<u32> reportOffsets;
 
@@ -964,13 +824,14 @@ aligned_unique_ptr<NFA> mcclellanCompile8(dfa_info &info,
     mstate_aux *aux = (mstate_aux *)(nfa_base + aux_offset);
 
     for (size_t i = 0; i < info.size(); i++) {
-        if (info.is_accel(i)) {
+        if (contains(accel_escape_info, i)) {
             u32 j = info.implId(i);
 
             aux[j].accel_offset = accel_offset;
             accel_offset += info.strat.accelSize();
 
-            info.strat.buildAccel(i, (void *)((char *)m + aux[j].accel_offset));
+            info.strat.buildAccel(i, accel_escape_info.at(i),
+                                  (void *)((char *)m + aux[j].accel_offset));
         }
 
         fillInBasicState8(info, aux, succ_table, reportOffsets, reports,
@@ -981,6 +842,10 @@ aligned_unique_ptr<NFA> mcclellanCompile8(dfa_info &info,
 
     DEBUG_PRINTF("rl size %zu\n", ri->size());
 
+    if (accel_states && nfa) {
+        fillAccelOut(accel_escape_info, accel_states);
+    }
+
     return nfa;
 }
 
@@ -1163,15 +1028,6 @@ bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) {
     return false;
 }
 
-static
-void fillAccelOut(const dfa_info &info, set<dstate_id_t> *accel_states) {
-    for (size_t i = 0; i < info.size(); i++) {
-        if (info.is_accel(i)) {
-            accel_states->insert(i);
-        }
-    }
-}
-
 aligned_unique_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, dfa_build_strat &strat,
                                            const CompileContext &cc,
                                            set<dstate_id_t> *accel_states) {
@@ -1200,19 +1056,15 @@ aligned_unique_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, dfa_build_strat &strat,
 
     aligned_unique_ptr<NFA> nfa;
     if (!using8bit) {
-        nfa = mcclellanCompile16(info, cc);
+        nfa = mcclellanCompile16(info, cc, accel_states);
     } else {
-        nfa = mcclellanCompile8(info, cc);
+        nfa = mcclellanCompile8(info, cc, accel_states);
     }
 
     if (has_eod_reports) {
         nfa->flags |= NFA_ACCEPTS_EOD;
     }
 
-    if (accel_states && nfa) {
-        fillAccelOut(info, accel_states);
-    }
-
     DEBUG_PRINTF("compile done\n");
     return nfa;
 }
diff --git a/src/nfa/mcclellancompile.h b/src/nfa/mcclellancompile.h
index 78126bc8..d4b4325d 100644
--- a/src/nfa/mcclellancompile.h
+++ b/src/nfa/mcclellancompile.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -59,6 +59,7 @@ struct escape_info {
     CharReach outs2_single;
     flat_set<std::pair<u8, u8>> outs2;
     bool outs2_broken = false;
+    u32 offset = 0;
 };
 
 class dfa_build_strat {
@@ -70,10 +71,10 @@ public:
                                std::vector<u32> &reports_eod /* out */,
                                u8 *isSingleReport /* out */,
                                ReportID *arbReport  /* out */) const = 0;
-    virtual void find_escape_strings(dstate_id_t this_idx,
-                                     escape_info *out) const = 0;
+    virtual escape_info find_escape_strings(dstate_id_t this_idx) const = 0;
     virtual size_t accelSize(void) const = 0;
-    virtual void buildAccel(dstate_id_t this_idx, void *accel_out) = 0;
+    virtual void buildAccel(dstate_id_t this_idx, const escape_info &info,
+                            void *accel_out) = 0;
 };
 
 class mcclellan_build_strat : public dfa_build_strat {
@@ -81,14 +82,15 @@ public:
     explicit mcclellan_build_strat(raw_dfa &r) : rdfa(r) {}
     raw_dfa &get_raw() const override { return rdfa; }
     std::unique_ptr<raw_report_info> gatherReports(
-                                   std::vector<u32> &reports /* out */,
-                                   std::vector<u32> &reports_eod /* out */,
-                                   u8 *isSingleReport /* out */,
-                                   ReportID *arbReport  /* out */) const override;
-    void find_escape_strings(dstate_id_t this_idx,
-                             escape_info *out) const override;
+                                  std::vector<u32> &reports /* out */,
+                                  std::vector<u32> &reports_eod /* out */,
+                                  u8 *isSingleReport /* out */,
+                                  ReportID *arbReport  /* out */) const override;
+    escape_info find_escape_strings(dstate_id_t this_idx) const override;
     size_t accelSize(void) const override;
-    void buildAccel(dstate_id_t this_idx, void *accel_out) override;
+    void buildAccel(dstate_id_t this_idx,const escape_info &info,
+                    void *accel_out) override;
+    virtual u32 max_allowed_offset_accel() const;
 
 private:
     raw_dfa &rdfa;
diff --git a/src/nfa/mcclellancompile_accel.cpp b/src/nfa/mcclellancompile_accel.cpp
new file mode 100644
index 00000000..12a05aaa
--- /dev/null
+++ b/src/nfa/mcclellancompile_accel.cpp
@@ -0,0 +1,383 @@
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of Intel Corporation nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "mcclellancompile_accel.h"
+
+#include "mcclellancompile_util.h"
+
+#include "grey.h"
+#include "nfagraph/ng_limex_accel.h"
+#include "util/charreach.h"
+#include "util/container.h"
+#include "util/dump_charclass.h"
+
+#include <vector>
+#include <sstream>
+
+#define PATHS_LIMIT 500
+
+using namespace std;
+
+namespace ue2 {
+
+namespace {
+
+struct path {
+    vector<CharReach> reach;
+    dstate_id_t dest = DEAD_STATE;
+    explicit path(dstate_id_t base) : dest(base) {}
+};
+
+}
+
+static UNUSED
+string describeClasses(const vector<CharReach> &v) {
+    std::ostringstream oss;
+    for (const auto &cr : v) {
+        describeClass(oss, cr);
+    }
+    return oss.str();
+}
+
+static
+void dump_paths(const vector<path> &paths) {
+    for (UNUSED const auto &p : paths) {
+        DEBUG_PRINTF("[%s] -> %u\n", describeClasses(p.reach).c_str(), p.dest);
+    }
+    DEBUG_PRINTF("%zu paths\n", paths.size());
+}
+
+static
+bool is_useful_path(const vector<path> &good, const path &p) {
+    for (const auto &g : good) {
+        assert(g.dest == p.dest);
+        assert(g.reach.size() <= p.reach.size());
+        auto git = g.reach.rbegin();
+        auto pit = p.reach.rbegin();
+
+        for (; git != g.reach.rend(); ++git, ++pit) {
+            if (!pit->isSubsetOf(*git)) {
+                goto next;
+            }
+        }
+        DEBUG_PRINTF("better: [%s] -> %u\n",
+                     describeClasses(g.reach).c_str(), g.dest);
+
+        return false;
+    next:;
+    }
+
+    return true;
+}
+
+static
+path append(const path &orig, const CharReach &cr, u32 new_dest) {
+    path p(new_dest);
+    p.reach = orig.reach;
+    p.reach.push_back(cr);
+
+    return p;
+}
+
+static
+void extend(const raw_dfa &rdfa, const path &p,
+            map<u32, vector<path> > &all,
+            vector<path> &out) {
+    dstate s = rdfa.states[p.dest];
+
+    if (!p.reach.empty() && p.reach.back().none()) {
+        out.push_back(p);
+        return;
+    }
+
+    if (!s.reports.empty()) {
+        if (generates_callbacks(rdfa.kind)) {
+            out.push_back(p);
+            return;
+        } else {
+            path pp = append(p, CharReach(), p.dest);
+            all[p.dest].push_back(pp);
+            out.push_back(pp);
+        }
+    }
+
+    if (!s.reports_eod.empty()) {
+        path pp = append(p, CharReach(), p.dest);
+        all[p.dest].push_back(pp);
+        out.push_back(pp);
+    }
+
+    map<u32, CharReach> dest;
+    for (unsigned i = 0; i < N_CHARS; i++) {
+        u32 succ = s.next[rdfa.alpha_remap[i]];
+        dest[succ].set(i);
+    }
+
+    for (const auto &e : dest) {
+        path pp = append(p, e.second, e.first);
+        if (!is_useful_path(all[e.first], pp)) {
+            DEBUG_PRINTF("not useful: [%s] -> %u\n",
+                         describeClasses(pp.reach).c_str(), pp.dest);
+            continue;
+        }
+
+        DEBUG_PRINTF("----good: [%s] -> %u\n",
+                         describeClasses(pp.reach).c_str(), pp.dest);
+        all[e.first].push_back(pp);
+        out.push_back(pp);
+    }
+}
+
+static
+vector<vector<CharReach> > generate_paths(const raw_dfa &rdfa, dstate_id_t base,
+                                          u32 len) {
+    vector<path> paths{ path(base) };
+    map<u32, vector<path> > all;
+    all[base].push_back(path(base));
+    for (u32 i = 0; i < len && paths.size() < PATHS_LIMIT; i++) {
+        vector<path> next_gen;
+        for (const auto &p : paths) {
+            extend(rdfa, p, all, next_gen);
+        }
+
+        paths = move(next_gen);
+    }
+
+    dump_paths(paths);
+
+    vector<vector<CharReach> > rv;
+    for (auto &p : paths) {
+        rv.push_back(move(p.reach));
+    }
+    return rv;
+}
+
+escape_info look_for_offset_accel(const raw_dfa &rdfa, dstate_id_t base,
+                                  u32 max_allowed_accel_offset) {
+    DEBUG_PRINTF("looking for accel for %hu\n", base);
+    vector<vector<CharReach> > paths = generate_paths(rdfa, base,
+                                                   max_allowed_accel_offset + 1);
+    AccelScheme as = findBestAccelScheme(paths, CharReach());
+    escape_info rv;
+    rv.outs2_broken = true;
+    rv.offset = as.offset;
+    rv.outs = as.cr;
+    DEBUG_PRINTF("found %s + %u\n", describeClass(as.cr).c_str(), as.offset);
+    return rv;
+}
+
+
+static
+vector<u16> find_nonexit_symbols(const raw_dfa &rdfa,
+                                 const CharReach &escape) {
+    set<u16> rv;
+    CharReach nonexit = ~escape;
+    for (auto i = nonexit.find_first(); i != CharReach::npos;
+         i = nonexit.find_next(i)) {
+        rv.insert(rdfa.alpha_remap[i]);
+    }
+
+    return vector<u16>(rv.begin(), rv.end());
+}
+
+static
+set<dstate_id_t> find_region(const raw_dfa &rdfa, dstate_id_t base,
+                             const escape_info &ei) {
+    DEBUG_PRINTF("looking for region around %hu\n", base);
+
+    set<dstate_id_t> region = {base};
+
+    if (!ei.outs2_broken) {
+        return region;
+    }
+
+    DEBUG_PRINTF("accel %s+%u\n", describeClass(ei.outs).c_str(), ei.offset);
+
+    const CharReach &escape = ei.outs;
+    auto nonexit_symbols = find_nonexit_symbols(rdfa, escape);
+
+    vector<dstate_id_t> pending = {base};
+    while (!pending.empty()) {
+        dstate_id_t curr = pending.back();
+        pending.pop_back();
+        for (auto s : nonexit_symbols) {
+            dstate_id_t t = rdfa.states[curr].next[s];
+            if (contains(region, t)) {
+                continue;
+            }
+
+            DEBUG_PRINTF("    %hu is in region\n", t);
+            region.insert(t);
+            pending.push_back(t);
+        }
+    }
+
+    return region;
+}
+
+static
+bool better(const escape_info &a, const escape_info &b) {
+    if (!a.outs2_broken && b.outs2_broken) {
+        return true;
+    }
+
+    if (!b.outs2_broken) {
+        return false;
+    }
+
+    return a.outs.count() < b.outs.count();
+}
+
+map<dstate_id_t, escape_info> populateAccelerationInfo(const raw_dfa &rdfa,
+                                                   const dfa_build_strat &strat,
+                                                   const Grey &grey) {
+    map<dstate_id_t, escape_info> rv;
+    if (!grey.accelerateDFA) {
+        return rv;
+    }
+
+    dstate_id_t sds_proxy = get_sds_or_proxy(rdfa);
+    DEBUG_PRINTF("sds %hu\n", sds_proxy);
+
+    for (size_t i = 0; i < rdfa.states.size(); i++) {
+        escape_info ei = strat.find_escape_strings(i);
+
+        if (i == DEAD_STATE) {
+            continue;
+        }
+
+        /* Note on report acceleration states: While we can't accelerate while we
+         * are spamming out callbacks, the QR code paths don't raise reports
+         * during scanning so they can accelerate report states. */
+        if (generates_callbacks(rdfa.kind)
+            && !rdfa.states[i].reports.empty()) {
+            continue;
+        }
+
+        size_t single_limit = i == sds_proxy ? ACCEL_DFA_MAX_FLOATING_STOP_CHAR
+                                             : ACCEL_DFA_MAX_STOP_CHAR;
+        DEBUG_PRINTF("inspecting %zu/%hu: %zu\n", i, sds_proxy, single_limit);
+
+        if (ei.outs.count() > single_limit) {
+            DEBUG_PRINTF("state %zu is not accelerable has %zu\n", i,
+                         ei.outs.count());
+            continue;
+        }
+
+        DEBUG_PRINTF("state %zu should be accelerable %zu\n",
+                     i, ei.outs.count());
+
+        rv[i] = ei;
+    }
+
+    /* provide accleration states to states in the region of sds */
+    if (contains(rv, sds_proxy)) {
+        auto sds_region = find_region(rdfa, sds_proxy, rv[sds_proxy]);
+        for (auto s : sds_region) {
+            if (!contains(rv, s) || better(rv[sds_proxy], rv[s])) {
+                rv[s] = rv[sds_proxy];
+            }
+        }
+    }
+
+    return rv;
+}
+
+static
+bool double_byte_ok(const escape_info &info) {
+    return !info.outs2_broken
+        && info.outs2_single.count() + info.outs2.size() <= 8
+        && info.outs2_single.count() < info.outs2.size()
+        && info.outs2_single.count() <= 2 && !info.outs2.empty();
+}
+
+escape_info find_mcclellan_escape_info(const raw_dfa &rdfa,
+                                       dstate_id_t this_idx,
+                                       u32 max_allowed_accel_offset) {
+    escape_info rv;
+    const dstate &raw = rdfa.states[this_idx];
+    const auto &alpha_remap = rdfa.alpha_remap;
+
+    flat_set<pair<u8, u8>> outs2_local;
+    for (unsigned i = 0; i < N_CHARS; i++) {
+        outs2_local.clear();
+
+        if (raw.next[alpha_remap[i]] != this_idx) {
+            rv.outs.set(i);
+
+            DEBUG_PRINTF("next is %hu\n", raw.next[alpha_remap[i]]);
+            const dstate &raw_next = rdfa.states[raw.next[alpha_remap[i]]];
+
+            if (!raw_next.reports.empty() && generates_callbacks(rdfa.kind)) {
+                DEBUG_PRINTF("leads to report\n");
+                rv.outs2_broken = true;  /* cannot accelerate over reports */
+            }
+
+            for (unsigned j = 0; !rv.outs2_broken && j < N_CHARS; j++) {
+                if (raw_next.next[alpha_remap[j]] == raw.next[alpha_remap[j]]) {
+                    continue;
+                }
+
+                DEBUG_PRINTF("adding %02x %02x -> %hu to 2 \n", i, j,
+                             raw_next.next[alpha_remap[j]]);
+                outs2_local.emplace((u8)i, (u8)j);
+            }
+
+            if (outs2_local.size() > 8) {
+                DEBUG_PRINTF("adding %02x to outs2_single\n", i);
+                rv.outs2_single.set(i);
+            } else {
+                insert(&rv.outs2, outs2_local);
+            }
+            if (rv.outs2.size() > 8) {
+                DEBUG_PRINTF("outs2 too big\n");
+                rv.outs2_broken = true;
+            }
+        }
+    }
+
+    DEBUG_PRINTF("this %u, sds proxy %hu\n", this_idx, get_sds_or_proxy(rdfa));
+    DEBUG_PRINTF("broken %d\n", rv.outs2_broken);
+    if (!double_byte_ok(rv) && !is_triggered(rdfa.kind)
+        && this_idx == rdfa.start_floating
+        && this_idx != DEAD_STATE) {
+        DEBUG_PRINTF("looking for offset accel at %u\n", this_idx);
+        auto offset = look_for_offset_accel(rdfa, this_idx,
+                                            max_allowed_accel_offset);
+        DEBUG_PRINTF("width %zu vs %zu\n", offset.outs.count(),
+                      rv.outs.count());
+        if (offset.outs.count() < rv.outs.count()) {
+            DEBUG_PRINTF("using offset accel\n");
+            rv = offset;
+        }
+    }
+
+    return rv;
+}
+
+}
diff --git a/src/nfa/mcclellancompile_accel.h b/src/nfa/mcclellancompile_accel.h
new file mode 100644
index 00000000..1e14c2cd
--- /dev/null
+++ b/src/nfa/mcclellancompile_accel.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of Intel Corporation nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef MCCLELLANCOMPILE_ACCEL_H
+#define MCCLELLANCOMPILE_ACCEL_H
+
+#include "mcclellancompile.h"
+
+#include <map>
+
+namespace ue2 {
+
+struct Grey;
+
+#define ACCEL_DFA_MAX_OFFSET_DEPTH 4
+
+/** Maximum tolerated number of escape character from an accel state.
+ * This is larger than nfa, as we don't have a budget and the nfa cheats on stop
+ * characters for sets of states */
+#define ACCEL_DFA_MAX_STOP_CHAR 160
+
+/** Maximum tolerated number of escape character from a sds accel state. Larger
+ * than normal states as accelerating sds is important. Matches NFA value */
+#define ACCEL_DFA_MAX_FLOATING_STOP_CHAR 192
+
+escape_info look_for_offset_accel(const raw_dfa &rdfa, dstate_id_t base,
+                                  u32 max_allowed_accel_offset);
+
+std::map<dstate_id_t, escape_info> populateAccelerationInfo(const raw_dfa &rdfa,
+                                                   const dfa_build_strat &strat,
+                                                   const Grey &grey);
+
+escape_info find_mcclellan_escape_info(const raw_dfa &rdfa,
+                                       dstate_id_t this_idx,
+                                       u32 max_allowed_accel_offset);
+
+}
+
+#endif
diff --git a/src/nfa/mcclellancompile_util.cpp b/src/nfa/mcclellancompile_util.cpp
index cd85ef36..2c946520 100644
--- a/src/nfa/mcclellancompile_util.cpp
+++ b/src/nfa/mcclellancompile_util.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -334,4 +334,63 @@ size_t hash_dfa(const raw_dfa &rdfa) {
     return v;
 }
 
+static
+bool has_self_loop(dstate_id_t s, const raw_dfa &raw) {
+    u16 top_remap = raw.alpha_remap[TOP];
+    for (u32 i = 0; i < raw.states[s].next.size(); i++) {
+        if (i != top_remap && raw.states[s].next[i] == s) {
+            return true;
+        }
+    }
+    return false;
+}
+
+dstate_id_t get_sds_or_proxy(const raw_dfa &raw) {
+    if (raw.start_floating != DEAD_STATE) {
+        DEBUG_PRINTF("has floating start\n");
+        return raw.start_floating;
+    }
+
+    DEBUG_PRINTF("looking for SDS proxy\n");
+
+    dstate_id_t s = raw.start_anchored;
+
+    if (has_self_loop(s, raw)) {
+        return s;
+    }
+
+    u16 top_remap = raw.alpha_remap[TOP];
+
+    ue2::unordered_set<dstate_id_t> seen;
+    while (true) {
+        seen.insert(s);
+        DEBUG_PRINTF("basis %hu\n", s);
+
+        /* check if we are connected to a state with a self loop */
+        for (u32 i = 0; i < raw.states[s].next.size(); i++) {
+            dstate_id_t t = raw.states[s].next[i];
+            if (i != top_remap && t != DEAD_STATE && has_self_loop(t, raw)) {
+                return t;
+            }
+        }
+
+        /* find a neighbour to use as a basis for looking for the sds proxy */
+        dstate_id_t t = DEAD_STATE;
+        for (u32 i = 0; i < raw.states[s].next.size(); i++) {
+            dstate_id_t tt = raw.states[s].next[i];
+            if (i != top_remap && tt != DEAD_STATE && !contains(seen, tt)) {
+                t = tt;
+                break;
+            }
+        }
+
+        if (t == DEAD_STATE) {
+            /* we were unable to find a state to use as a SDS proxy */
+            return DEAD_STATE;
+        }
+
+        s = t;
+    }
+}
+
 } // namespace ue2
diff --git a/src/nfa/mcclellancompile_util.h b/src/nfa/mcclellancompile_util.h
index 183abcaa..7015893b 100644
--- a/src/nfa/mcclellancompile_util.h
+++ b/src/nfa/mcclellancompile_util.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -29,14 +29,13 @@
 #ifndef MCCLELLAN_COMPILE_UTIL_H
 #define MCCLELLAN_COMPILE_UTIL_H
 
+#include "rdfa.h"
 #include "ue2common.h"
 
 #include <set>
 
 namespace ue2 {
 
-struct raw_dfa;
-
 u32 remove_leading_dots(raw_dfa &raw);
 void prune_overlong(raw_dfa &raw, u32 max_offset);
 std::set<ReportID> all_reports(const raw_dfa &rdfa);
@@ -50,6 +49,8 @@ size_t hash_dfa_no_reports(const raw_dfa &rdfa);
 /** \brief Compute a simple hash of this raw_dfa, including its reports. */
 size_t hash_dfa(const raw_dfa &rdfa);
 
+dstate_id_t get_sds_or_proxy(const raw_dfa &raw);
+
 } // namespace ue2
 
 #endif
diff --git a/src/nfagraph/ng_limex_accel.cpp b/src/nfagraph/ng_limex_accel.cpp
index ed9f5bfe..41eda35d 100644
--- a/src/nfagraph/ng_limex_accel.cpp
+++ b/src/nfagraph/ng_limex_accel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -464,16 +464,13 @@ void dumpPaths(const vector<vector<CharReach> > &paths) {
 #endif
 
 static
-void blowoutPathsLessStrictSegment(vector<vector<CharReach> > *paths) {
+void blowoutPathsLessStrictSegment(vector<vector<CharReach> > &paths) {
     /* paths segments which are a superset of an earlier segment should never be
      * picked as an acceleration segment -> to improve processing just replace
      * with dot */
-    for (vector<vector<CharReach> >::iterator p = paths->begin();
-         p != paths->end(); ++p) {
-        for (vector<CharReach>::iterator it = p->begin(); it != p->end();
-             ++it) {
-            vector<CharReach>::iterator jt = it;
-            for (++jt; jt != p->end(); ++jt) {
+    for (auto &p : paths) {
+        for (auto it = p.begin(); it != p.end();  ++it) {
+            for (auto jt = next(it); jt != p.end(); ++jt) {
                 if (it->isSubsetOf(*jt)) {
                     *jt = CharReach::dot();
                 }
@@ -483,10 +480,10 @@ void blowoutPathsLessStrictSegment(vector<vector<CharReach> > *paths) {
 }
 
 static
-void unifyPathsLastSegment(vector<vector<CharReach> > *paths) {
+void unifyPathsLastSegment(vector<vector<CharReach> > &paths) {
     /* try to unify paths which only differ in the last segment */
-    for (vector<vector<CharReach> >::iterator p = paths->begin();
-         p != paths->end() && p + 1 != paths->end();) {
+    for (vector<vector<CharReach> >::iterator p = paths.begin();
+         p != paths.end() && p + 1 != paths.end();) {
         vector<CharReach> &a = *p;
         vector<CharReach> &b = *(p + 1);
 
@@ -504,7 +501,7 @@ void unifyPathsLastSegment(vector<vector<CharReach> > *paths) {
         if (i == a.size() - 1) {
             /* we can unify these paths */
             a[i] |= b[i];
-            paths->erase(p + 1);
+            paths.erase(p + 1);
         } else {
             ++p;
         }
@@ -512,23 +509,59 @@ void unifyPathsLastSegment(vector<vector<CharReach> > *paths) {
 }
 
 static
-void improvePaths(vector<vector<CharReach> > *paths) {
+void improvePaths(vector<vector<CharReach> > &paths) {
 #ifdef DEBUG
     DEBUG_PRINTF("orig paths\n");
-    dumpPaths(*paths);
+    dumpPaths(paths);
 #endif
     blowoutPathsLessStrictSegment(paths);
 
-    sort(paths->begin(), paths->end());
+    sort(paths.begin(), paths.end());
 
     unifyPathsLastSegment(paths);
 
 #ifdef DEBUG
     DEBUG_PRINTF("opt paths\n");
-    dumpPaths(*paths);
+    dumpPaths(paths);
 #endif
 }
 
+AccelScheme findBestAccelScheme(vector<vector<CharReach> > paths,
+                                const CharReach &terminating) {
+    improvePaths(paths);
+
+    DEBUG_PRINTF("we have %zu paths\n", paths.size());
+    if (paths.size() > 40) {
+        return AccelScheme(); /* too many paths to explore */
+    }
+
+    /* if we were smart we would do something netflowy on the paths to find the
+     * best cut. But we aren't, so we will just brute force it.
+     */
+    AccelScheme curr(terminating, 0U);
+    AccelScheme best;
+    findBest(paths.begin(), paths.end(), curr, &best);
+
+    /* find best is a bit lazy in terms of minimising the offset, see if we can
+     * make it better. need to find the min max offset that we need.*/
+    u32 offset = 0;
+    for (vector<vector<CharReach> >::iterator p = paths.begin();
+         p != paths.end(); ++p) {
+        u32 i = 0;
+        for (vector<CharReach>::iterator it = p->begin(); it != p->end();
+             ++it, i++) {
+            if (it->isSubsetOf(best.cr)) {
+                break;
+            }
+        }
+        offset = MAX(offset, i);
+    }
+    assert(offset <= best.offset);
+    best.offset = offset;
+
+    return best;
+}
+
 AccelScheme nfaFindAccel(const NGHolder &g, const vector<NFAVertex> &verts,
                          const vector<CharReach> &refined_cr,
                          const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
@@ -579,36 +612,7 @@ AccelScheme nfaFindAccel(const NGHolder &g, const vector<NFAVertex> &verts,
         reverse(it->begin(), it->end());
     }
 
-    improvePaths(&paths);
-    DEBUG_PRINTF("we have %zu paths\n", paths.size());
-    if (paths.size() > 40) {
-        return AccelScheme(); /* too many paths to explore */
-    }
-
-    /* if we were smart we would do something netflowy on the paths to find the
-     * best cut. But we aren't, so we will just brute force it.
-     */
-    AccelScheme curr(terminating, 0U);
-    AccelScheme best;
-    findBest(paths.begin(), paths.end(), curr, &best);
-
-    /* find best is a bit lazy in terms of minimising the offset, see if we can
-     * make it better. need to find the min max offset that we need.*/
-    u32 offset = 0;
-    for (vector<vector<CharReach> >::iterator p = paths.begin();
-         p != paths.end(); ++p) {
-        u32 i = 0;
-        for (vector<CharReach>::iterator it = p->begin(); it != p->end();
-             ++it, i++) {
-            if (it->isSubsetOf(best.cr)) {
-                break;
-            }
-        }
-        offset = MAX(offset, i);
-    }
-    assert(offset <= best.offset);
-    best.offset = offset;
-    return best;
+    return findBestAccelScheme(std::move(paths), terminating);
 }
 
 NFAVertex get_sds_or_proxy(const NGHolder &g) {
diff --git a/src/nfagraph/ng_limex_accel.h b/src/nfagraph/ng_limex_accel.h
index b9dba2e1..113b216c 100644
--- a/src/nfagraph/ng_limex_accel.h
+++ b/src/nfagraph/ng_limex_accel.h
@@ -110,6 +110,9 @@ AccelScheme nfaFindAccel(const NGHolder &g, const std::vector<NFAVertex> &verts,
                     const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
                     bool allow_wide);
 
+AccelScheme findBestAccelScheme(std::vector<std::vector<CharReach> > paths,
+                                const CharReach &terminating);
+
 /** \brief Check if vertex \a v is an accelerable state (for a limex NFA). */
 bool nfaCheckAccel(const NGHolder &g, NFAVertex v,
                    const std::vector<CharReach> &refined_cr,
diff --git a/src/util/dump_charclass.h b/src/util/dump_charclass.h
index d2a71880..9c3362bc 100644
--- a/src/util/dump_charclass.h
+++ b/src/util/dump_charclass.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -48,8 +48,8 @@ enum cc_output_t {
 
 class CharReach;
 
-void describeClass(std::ostream &os, const CharReach &cr, size_t maxLength,
-                   enum cc_output_t out_type);
+void describeClass(std::ostream &os, const CharReach &cr, size_t maxLength = 16,
+                   enum cc_output_t out_type  = CC_OUT_TEXT);
 
 std::string describeClass(const CharReach &cr, size_t maxLength = 16,
                           enum cc_output_t out_type = CC_OUT_TEXT);