diff --git a/util/ng_corpus_generator.cpp b/util/ng_corpus_generator.cpp
index ca7c413a..6d0de539 100644
--- a/util/ng_corpus_generator.cpp
+++ b/util/ng_corpus_generator.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2016, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -218,7 +218,7 @@ namespace {
 /** \brief Concrete implementation */
 class CorpusGeneratorImpl : public CorpusGenerator {
 public:
-    CorpusGeneratorImpl(const NGHolder &graph_in, CorpusProperties &props);
+    CorpusGeneratorImpl(const NGWrapper &graph_in, CorpusProperties &props);
     ~CorpusGeneratorImpl() {}
 
     void generateCorpus(vector<string> &data);
@@ -244,10 +244,13 @@ private:
     CorpusProperties &cProps;
 };
 
-CorpusGeneratorImpl::CorpusGeneratorImpl(const NGHolder &graph_in,
+CorpusGeneratorImpl::CorpusGeneratorImpl(const NGWrapper &graph_in,
                                          CorpusProperties &props)
     : graph(graph_in), cProps(props) {
-    // empty
+    // if this pattern is to be matched approximately
+    if (graph_in.edit_distance && !props.editDistance) {
+        props.editDistance = props.rand(0, graph_in.edit_distance + 1);
+    }
 }
 
 void CorpusGeneratorImpl::generateCorpus(vector<string> &data) {
@@ -388,7 +391,7 @@ hit_limit:
 /** \brief Concrete implementation for UTF-8 */
 class CorpusGeneratorUtf8 : public CorpusGenerator {
 public:
-    CorpusGeneratorUtf8(const NGHolder &graph_in, CorpusProperties &props);
+    CorpusGeneratorUtf8(const NGWrapper &graph_in, CorpusProperties &props);
     ~CorpusGeneratorUtf8() {}
 
     void generateCorpus(vector<string> &data);
@@ -407,17 +410,21 @@ private:
     void addRandom(const min_max &mm, vector<unichar> *out);
 
     /** \brief The NFA graph we operate over. */
-    const NGHolder &graph;
+    const NGWrapper &graph;
 
     /** \brief Reference to our corpus generator properties object (stores some
      * state) */
     CorpusProperties &cProps;
 };
 
-CorpusGeneratorUtf8::CorpusGeneratorUtf8(const NGHolder &graph_in,
+CorpusGeneratorUtf8::CorpusGeneratorUtf8(const NGWrapper &graph_in,
                                          CorpusProperties &props)
     : graph(graph_in), cProps(props) {
-    // empty
+    // we do not support Utf8 for approximate matching
+    if (graph.edit_distance) {
+        throw CorpusGenerationFailure("UTF-8 for edited patterns is not "
+                                      "supported.");
+    }
 }
 
 void CorpusGeneratorUtf8::generateCorpus(vector<string> &data) {
diff --git a/util/ng_corpus_generator.h b/util/ng_corpus_generator.h
index a7445ab6..a02721bd 100644
--- a/util/ng_corpus_generator.h
+++ b/util/ng_corpus_generator.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -45,6 +45,12 @@ class NGWrapper;
 
 } // namespace ue2
 
+struct CorpusGenerationFailure {
+    explicit CorpusGenerationFailure(const std::string s) :
+        message(std::move(s)) {}
+    std::string message;
+};
+
 /** \brief Abstract interface to corpus generator tool. */
 class CorpusGenerator {
 public:
diff --git a/util/ng_find_matches.cpp b/util/ng_find_matches.cpp
index 2b337365..b3c81574 100644
--- a/util/ng_find_matches.cpp
+++ b/util/ng_find_matches.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2016, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -47,55 +47,732 @@
 using namespace std;
 using namespace ue2;
 
+using MatchSet = set<pair<size_t, size_t>>;
+using StateBitSet = boost::dynamic_bitset<>;
+
 namespace {
 
-struct StateSet {
-    explicit StateSet(size_t sz) : s(sz), som(sz, 0) {}
-    boost::dynamic_bitset<> s; // bitset of states that are on
-    vector<size_t> som; // som value for each state
+// returns all successors up to a given depth in a vector of sets, indexed by
+// zero-based depth from source vertex
+static
+vector<flat_set<NFAVertex>>
+gatherSuccessorsByDepth(const NGHolder &g, const NFAVertex &src, u32 depth) {
+    vector<flat_set<NFAVertex>> result(depth);
+    flat_set<NFAVertex> cur, next;
+
+    // populate current set of successors
+    for (auto v : adjacent_vertices_range(src, g)) {
+        // ignore self-loops
+        if (src == v) {
+            continue;
+        }
+        DEBUG_PRINTF("Node %zu depth 1\n", g[v].index);
+
+        cur.insert(v);
+    }
+    result[0] = cur;
+
+    for (u32 d = 1; d < depth; d++) {
+        // collect all successors for all current level vertices
+        for (auto v : cur) {
+            // don't go past special nodes
+            if (is_special(v, g)) {
+                continue;
+            }
+
+            for (auto succ : adjacent_vertices_range(v, g)) {
+                // ignore self-loops
+                if (v == succ) {
+                    continue;
+                }
+                DEBUG_PRINTF("Node %zu depth %u\n", g[succ].index, d + 1);
+                next.insert(succ);
+            }
+        }
+        result[d] = next;
+        next.swap(cur);
+        next.clear();
+    }
+
+    return result;
+}
+
+// returns all predecessors up to a given depth in a vector of sets, indexed by
+// zero-based depth from source vertex
+static
+vector<flat_set<NFAVertex>> gatherPredecessorsByDepth(const NGHolder &g,
+                                                      NFAVertex src, u32 depth) {
+    vector<flat_set<NFAVertex>> result(depth);
+    flat_set<NFAVertex> cur, next;
+
+    assert(depth > 0);
+
+    // populate current set of successors
+    for (auto v : inv_adjacent_vertices_range(src, g)) {
+        // ignore self-loops
+        if (src == v) {
+            continue;
+        }
+        DEBUG_PRINTF("Node %zu depth 1\n", g[v].index);
+        cur.insert(v);
+    }
+    result[0] = cur;
+
+    for (u32 d = 1; d < depth; d++) {
+        // collect all successors for all current level vertices
+        for (auto v : cur) {
+            for (auto pred : inv_adjacent_vertices_range(v, g)) {
+                // ignore self-loops
+                if (v == pred) {
+                    continue;
+                }
+                DEBUG_PRINTF("Node %zu depth %u\n", g[pred].index, d + 1);
+                next.insert(pred);
+            }
+        }
+        result[d] = next;
+        next.swap(cur);
+        next.clear();
+    }
+
+    return result;
+}
+
+// this is a per-vertex, per-shadow level state transition table
+struct GraphCache {
+    GraphCache(u32 dist_in, const NGHolder &g) :
+        size(num_vertices(g)), edit_distance(dist_in)
+    {
+        auto dist_max = edit_distance + 1;
+
+        allocateStateTransitionTable(dist_max);
+        populateTransitionCache(g, dist_max);
+        populateAcceptCache(g, dist_max);
+    }
+
+    void allocateStateTransitionTable(u32 dist_max) {
+        // resize level 1 - per vertex
+        shadow_transitions.resize(size);
+        helper_transitions.resize(size);
+
+        // resize level 2 - per shadow level
+        for (u32 i = 0; i < size; i++) {
+            shadow_transitions[i].resize(dist_max);
+            helper_transitions[i].resize(dist_max);
+
+            // resize level 3 - per vertex
+            for (u32 d = 0; d < dist_max; d++) {
+                shadow_transitions[i][d].resize(size);
+                helper_transitions[i][d].resize(size);
+            }
+        }
+
+        // accept states are indexed by edit distance
+        accept_states.resize(dist_max);
+        accept_eod_states.resize(dist_max);
+
+        // vertex report maps are indexed by edit distance
+        vertex_reports_by_level.resize(dist_max);
+        vertex_eod_reports_by_level.resize(dist_max);
+    }
+
+    /*
+     * certain transitions to helpers are disallowed:
+     *  1. transitions from accept/acceptEod
+     *  2. transitions to accept/acceptEod
+     *  3. from start to startDs
+     *  4. to a virtual/multiline start
+     *
+     * everything else is allowed.
+     */
+    bool canTransitionToHelper(NFAVertex u, NFAVertex v, const NGHolder &g) const {
+        if (is_any_accept(u, g)) {
+            return false;
+        }
+        if (is_any_accept(v, g)) {
+            return false;
+        }
+        if (u == g.start && v == g.startDs) {
+            return false;
+        }
+        if (is_virtual_start(v, g)) {
+            return false;
+        }
+        return true;
+    }
+
+    void populateTransitionCache(const NGHolder &g, u32 dist_max) {
+        // populate mapping of vertex index to vertex
+        vector<NFAVertex> idx_to_v(size);
+        for (auto v : vertices_range(g)) {
+            idx_to_v[g[v].index] = v;
+        }
+
+        for (u32 i = 0; i < size; i++) {
+            auto cur_v = idx_to_v[i];
+
+            // set up transition tables
+            auto succs = gatherSuccessorsByDepth(g, cur_v, dist_max);
+
+            assert(succs.size() == dist_max);
+
+            for (u32 d = 0; d < dist_max; d++) {
+                auto &v_shadows = shadow_transitions[i][d];
+                auto cur_v_bit = i;
+
+                // enable transition to next level helper (this handles insertion)
+                if (d < edit_distance && !is_any_accept(cur_v, g)) {
+                    auto &next_v_helpers = helper_transitions[i][d + 1];
+
+                    next_v_helpers.set(cur_v_bit);
+                }
+
+                // if vertex has a self-loop, we can also transition to it,
+                // but only if we're at shadow level 0
+                if (edge(cur_v, cur_v, g).second && d == 0) {
+                    v_shadows.set(cur_v_bit);
+                }
+
+                // populate state transition tables
+                for (auto v : succs[d]) {
+                    auto v_bit = g[v].index;
+
+                    // we cannot transition to startDs on any level other than
+                    // level 0
+                    if (v != g.startDs || d == 0) {
+                        // this handles direct transitions as well as removals
+                        v_shadows.set(v_bit);
+                    }
+
+                    // we can also transition to next-level helper (handles
+                    // replace), provided we meet the criteria
+                    if (d < edit_distance && canTransitionToHelper(cur_v, v, g)) {
+                        auto &next_v_helpers = helper_transitions[i][d + 1];
+
+                        next_v_helpers.set(v_bit);
+                    }
+                }
+            }
+        }
+    }
+
+    void populateAcceptCache(const NGHolder &g, u32 dist_max) {
+        // set up accept states masks
+        StateBitSet accept(size);
+        accept.set(g[g.accept].index);
+        StateBitSet accept_eod(size);
+        accept_eod.set(g[g.acceptEod].index);
+
+        // gather accept and acceptEod states
+        for (u32 base_dist = 0; base_dist < dist_max; base_dist++) {
+            auto &states = accept_states[base_dist];
+            auto &eod_states = accept_eod_states[base_dist];
+
+            states.resize(size);
+            eod_states.resize(size);
+
+            // inspect each vertex
+            for (u32 i = 0; i < size; i++) {
+                // inspect all shadow levels from base_dist to dist_max
+                for (u32 d = 0; d < dist_max - base_dist; d++) {
+                    auto &shadows = shadow_transitions[i][d];
+
+                    // if this state transitions to accept, set its bit
+                    if ((shadows & accept).any()) {
+                        states.set(i);
+                    }
+                    if ((shadows & accept_eod).any()) {
+                        eod_states.set(i);
+                    }
+                }
+            }
+        }
+
+        // populate accepts cache
+        for (auto  v : inv_adjacent_vertices_range(g.accept, g)) {
+            const auto &rs = g[v].reports;
+
+            for (u32 d = 0; d <= edit_distance; d++) {
+                // add self to report list at all levels
+                vertex_reports_by_level[d][v].insert(rs.begin(), rs.end());
+            }
+            if (edit_distance == 0) {
+                // if edit distance is 0, no predecessors will have reports
+                continue;
+            }
+
+            auto preds_by_depth = gatherPredecessorsByDepth(g, v, edit_distance);
+            for (u32 pd = 0; pd < preds_by_depth.size(); pd++) {
+                const auto &preds = preds_by_depth[pd];
+                // for each predecessor, add reports up to maximum edit distance
+                // for current depth from source vertex
+                for (auto pred : preds) {
+                    for (u32 d = 0; d < edit_distance - pd; d++) {
+                        vertex_reports_by_level[d][pred].insert(rs.begin(), rs.end());
+                    }
+                }
+            }
+        }
+        for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
+            const auto &rs = g[v].reports;
+
+            if (v == g.accept) {
+                continue;
+            }
+
+            for (u32 d = 0; d <= edit_distance; d++) {
+                // add self to report list at all levels
+                vertex_eod_reports_by_level[d][v].insert(rs.begin(), rs.end());
+            }
+            if (edit_distance == 0) {
+                // if edit distance is 0, no predecessors will have reports
+                continue;
+            }
+
+            auto preds_by_depth = gatherPredecessorsByDepth(g, v, edit_distance);
+            for (u32 pd = 0; pd < preds_by_depth.size(); pd++) {
+                const auto &preds = preds_by_depth[pd];
+                // for each predecessor, add reports up to maximum edit distance
+                // for current depth from source vertex
+                for (auto pred : preds) {
+                    for (u32 d = 0; d < edit_distance - pd; d++) {
+                        vertex_eod_reports_by_level[d][pred].insert(rs.begin(), rs.end());
+                    }
+                }
+            }
+        }
+    }
+
+#ifdef DEBUG
+    void dumpStateTransitionTable(const NGHolder &g) {
+        StateBitSet accept(size);
+        accept.set(g[g.accept].index);
+        StateBitSet accept_eod(size);
+        accept_eod.set(g[g.acceptEod].index);
+
+        DEBUG_PRINTF("Dumping state transition tables\n");
+        DEBUG_PRINTF("Shadows:\n");
+        for (u32 i = 0; i < num_vertices(g); i++) {
+            DEBUG_PRINTF("%-7s %3u:", "Vertex", i);
+            for (u32 j = 0; j < num_vertices(g); j++) {
+                printf("%3i", j);
+            }
+            printf("\n");
+            for (u32 d = 0; d <= edit_distance; d++) {
+                DEBUG_PRINTF("%-7s %3u:", "Level", d);
+                const auto &s = getShadowTransitions(i, d);
+                for (u32 j = 0; j < num_vertices(g); j++) {
+                    printf("%3i", s.test(j));
+                }
+                printf("\n");
+            }
+            DEBUG_PRINTF("\n");
+        }
+
+        DEBUG_PRINTF("Helpers:\n");
+        for (u32 i = 0; i < num_vertices(g); i++) {
+            DEBUG_PRINTF("%-7s %3u:", "Vertex", i);
+            for (u32 j = 0; j < num_vertices(g); j++) {
+                printf("%3i", j);
+            }
+            printf("\n");
+            for (u32 d = 0; d <= edit_distance; d++) {
+                DEBUG_PRINTF("%-7s %3u:", "Level", d);
+                const auto &s = getHelperTransitions(i, d);
+                for (u32 j = 0; j < num_vertices(g); j++) {
+                    printf("%3i", s.test(j));
+                }
+                printf("\n");
+            }
+            DEBUG_PRINTF("\n");
+        }
+
+        DEBUG_PRINTF("Accept transitions:\n");
+        DEBUG_PRINTF("%-12s", "Vertex idx:");
+        for (u32 j = 0; j < num_vertices(g); j++) {
+            printf("%3i", j);
+        }
+        printf("\n");
+        for (u32 d = 0; d <= edit_distance; d++) {
+            DEBUG_PRINTF("%-7s %3u:", "Level", d);
+            const auto &s = getAcceptTransitions(d);
+            for (u32 j = 0; j < num_vertices(g); j++) {
+                printf("%3i", s.test(j));
+            }
+            printf("\n");
+        }
+        DEBUG_PRINTF("\n");
+
+        DEBUG_PRINTF("Accept EOD transitions:\n");
+        DEBUG_PRINTF("%-12s", "Vertex idx:");
+        for (u32 j = 0; j < num_vertices(g); j++) {
+            printf("%3i", j);
+        }
+        printf("\n");
+        for (u32 d = 0; d <= edit_distance; d++) {
+            DEBUG_PRINTF("%-7s %3u:", "Level", d);
+            const auto &s = getAcceptEodTransitions(d);
+            for (u32 j = 0; j < num_vertices(g); j++) {
+                printf("%3i", s.test(j));
+            }
+            printf("\n");
+        }
+        DEBUG_PRINTF("\n");
+
+        DEBUG_PRINTF("%-12s ", "Accepts:");
+        for (u32 i = 0; i < num_vertices(g); i++) {
+            printf("%3i", accept.test(i));
+        }
+        printf("\n");
+
+        DEBUG_PRINTF("%-12s ", "EOD Accepts:");
+        for (u32 i = 0; i < num_vertices(g); i++) {
+            printf("%3i", accept_eod.test(i));
+        }
+        printf("\n");
+
+        DEBUG_PRINTF("Reports\n");
+        for (auto v : vertices_range(g)) {
+            for (u32 d = 0; d <= edit_distance; d++) {
+                const auto &r = vertex_reports_by_level[d][v];
+                const auto &e = vertex_eod_reports_by_level[d][v];
+                DEBUG_PRINTF("%-7s %3zu %-8s %3zu %-8s %3zu\n",
+                             "Vertex", g[v].index, "rs:", r.size(), "eod:", e.size());
+            }
+        }
+        printf("\n");
+    }
+#endif
+
+    const StateBitSet& getShadowTransitions(u32 idx, u32 level) const {
+        assert(idx < size);
+        assert(level <= edit_distance);
+        return shadow_transitions[idx][level];
+    }
+    const StateBitSet& getHelperTransitions(u32 idx, u32 level) const {
+        assert(idx < size);
+        assert(level <= edit_distance);
+        return helper_transitions[idx][level];
+    }
+    const StateBitSet& getAcceptTransitions(u32 level) const {
+        assert(level <= edit_distance);
+        return accept_states[level];
+    }
+    const StateBitSet& getAcceptEodTransitions(u32 level) const {
+        assert(level <= edit_distance);
+        return accept_eod_states[level];
+    }
+
+    /*
+     * the bitsets are indexed by vertex and shadow level. the bitset's length is
+     * equal to the total number of vertices in the graph.
+     *
+     * for convenience, helper functions are provided.
+     */
+    vector<vector<StateBitSet>> shadow_transitions;
+    vector<vector<StateBitSet>> helper_transitions;
+
+    // accept states masks, indexed by shadow level
+    vector<StateBitSet> accept_states;
+    vector<StateBitSet> accept_eod_states;
+
+    // map of all reports associated with any vertex, indexed by shadow level
+    vector<map<NFAVertex, flat_set<ReportID>>> vertex_reports_by_level;
+    vector<map<NFAVertex, flat_set<ReportID>>> vertex_eod_reports_by_level;
+
+    u32 size;
+    u32 edit_distance;
 };
 
-using MatchSet = set<pair<size_t, size_t>>;
+
+/*
+ * SOM workflow is expected to be the following:
+ * - Caller calls getActiveStates, which reports SOM for each active states
+ * - Caller calls getSuccessorStates on each of the active states, which *doesn't*
+ *   report SOM
+ * - Caller decides if the successor state should be activated, and calls
+ *   activateState with SOM set to that of previous active state (not successor!)
+ * - activateState then resolves any conflicts between SOMs that may arise from
+ *   multiple active states progressing to the same successor
+ */
+struct StateSet {
+    struct State {
+        enum node_type {
+            NODE_SHADOW = 0,
+            NODE_HELPER
+        };
+        State(size_t idx_in, u32 level_in, size_t som_in, node_type type_in) :
+            idx(idx_in), level(level_in), som(som_in), type(type_in) {}
+        size_t idx;
+        u32 level;
+        size_t som;
+        node_type type;
+    };
+
+    StateSet(size_t sz, u32 dist_in) :
+            shadows(dist_in + 1), helpers(dist_in + 1),
+            shadows_som(dist_in + 1), helpers_som(dist_in + 1),
+            edit_distance(dist_in) {
+        for (u32 dist = 0; dist <= dist_in; dist++) {
+            shadows[dist].resize(sz, false);
+            helpers[dist].resize(sz, false);
+            shadows_som[dist].resize(sz, 0);
+            helpers_som[dist].resize(sz, 0);
+        }
+    }
+
+    void reset() {
+        for (u32 dist = 0; dist <= edit_distance; dist++) {
+            shadows[dist].reset();
+            helpers[dist].reset();
+            fill(shadows_som[dist].begin(), shadows_som[dist].end(), 0);
+            fill(helpers_som[dist].begin(), helpers_som[dist].end(), 0);
+        }
+    }
+
+    bool empty() const {
+        for (u32 dist = 0; dist <= edit_distance; dist++) {
+            if (shadows[dist].any()) {
+                return false;
+            }
+            if (helpers[dist].any()) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    size_t count() const {
+        size_t result = 0;
+
+        for (u32 dist = 0; dist <= edit_distance; dist++) {
+            result += shadows[dist].count();
+            result += helpers[dist].count();
+        }
+
+        return result;
+    }
+
+    bool setActive(const State &s) {
+        switch (s.type) {
+        case State::NODE_HELPER:
+            return helpers[s.level].test_set(s.idx);
+        case State::NODE_SHADOW:
+            return shadows[s.level].test_set(s.idx);
+        }
+        assert(0);
+        return false;
+    }
+
+    size_t getCachedSom(const State &s) const {
+        switch (s.type) {
+        case State::NODE_HELPER:
+            return helpers_som[s.level][s.idx];
+        case State::NODE_SHADOW:
+            return shadows_som[s.level][s.idx];
+        }
+        assert(0);
+        return 0;
+    }
+
+    void setCachedSom(const State &s, const size_t som_val) {
+        switch (s.type) {
+        case State::NODE_HELPER:
+            helpers_som[s.level][s.idx] = som_val;
+            break;
+        case State::NODE_SHADOW:
+            shadows_som[s.level][s.idx] = som_val;
+            break;
+        default:
+            assert(0);
+        }
+    }
+
+#ifdef DEBUG
+    void dumpActiveStates() const {
+        const auto states = getActiveStates();
+
+        DEBUG_PRINTF("Dumping active states\n");
+
+        for (const auto &state : states) {
+            DEBUG_PRINTF("type: %s idx: %zu level: %u som: %zu\n",
+                         state.type == State::NODE_HELPER ? "HELPER" : "SHADOW",
+                         state.idx, state.level, state.som);
+        }
+    }
+#endif
+
+    flat_set<State> getActiveStates() const {
+        flat_set<State> result;
+
+        for (u32 dist = 0; dist <= edit_distance; dist++) {
+            // get all shadow vertices (including original graph)
+            const auto &cur_shadow_vertices = shadows[dist];
+            for (size_t id = cur_shadow_vertices.find_first();
+                 id != cur_shadow_vertices.npos;
+                 id = cur_shadow_vertices.find_next(id)) {
+                result.emplace(id, dist, shadows_som[dist][id],
+                               State::NODE_SHADOW);
+            }
+
+            // the rest is only valid for edited graphs
+            if (dist == 0) {
+                continue;
+            }
+
+            // get all helper vertices
+            const auto &cur_helper_vertices = helpers[dist];
+            for (size_t id = cur_helper_vertices.find_first();
+                 id != cur_helper_vertices.npos;
+                 id = cur_helper_vertices.find_next(id)) {
+                result.emplace(id, dist, helpers_som[dist][id],
+                               State::NODE_HELPER);
+            }
+        }
+
+        return result;
+    }
+
+    // does not return SOM
+    flat_set<State> getSuccessors(const State &state, const GraphCache &gc) const {
+        flat_set<State> result;
+
+        // maximum shadow depth that we can go from current level
+        u32 max_depth = edit_distance - state.level + 1;
+
+        for (u32 d = 0; d < max_depth; d++) {
+            const auto &shadow_succ = gc.getShadowTransitions(state.idx, d);
+            for (size_t id = shadow_succ.find_first();
+                 id != shadow_succ.npos;
+                 id = shadow_succ.find_next(id)) {
+                auto new_level = state.level + d;
+                result.emplace(id, new_level, 0, State::NODE_SHADOW);
+            }
+
+            const auto &helper_succ = gc.getHelperTransitions(state.idx, d);
+            for (size_t id = helper_succ.find_first();
+                 id != helper_succ.npos;
+                 id = helper_succ.find_next(id)) {
+                auto new_level = state.level + d;
+                result.emplace(id, new_level, 0, State::NODE_HELPER);
+            }
+        }
+
+        return result;
+    }
+
+    flat_set<State> getAcceptStates(const GraphCache &gc) const {
+        flat_set<State> result;
+
+        for (u32 dist = 0; dist <= edit_distance; dist++) {
+            // get all shadow vertices (including original graph)
+            auto cur_shadow_vertices = shadows[dist];
+            cur_shadow_vertices &= gc.getAcceptTransitions(dist);
+            for (size_t id = cur_shadow_vertices.find_first();
+                 id != cur_shadow_vertices.npos;
+                 id = cur_shadow_vertices.find_next(id)) {
+                result.emplace(id, dist, shadows_som[dist][id],
+                               State::NODE_SHADOW);
+            }
+            auto cur_helper_vertices = helpers[dist];
+            cur_helper_vertices &= gc.getAcceptTransitions(dist);
+            for (size_t id = cur_helper_vertices.find_first();
+                 id != cur_helper_vertices.npos;
+                 id = cur_helper_vertices.find_next(id)) {
+                result.emplace(id, dist, helpers_som[dist][id],
+                               State::NODE_HELPER);
+            }
+        }
+
+        return result;
+    }
+
+    flat_set<State> getAcceptEodStates(const GraphCache &gc) const {
+        flat_set<State> result;
+
+        for (u32 dist = 0; dist <= edit_distance; dist++) {
+            // get all shadow vertices (including original graph)
+            auto cur_shadow_vertices = shadows[dist];
+            cur_shadow_vertices &= gc.getAcceptEodTransitions(dist);
+            for (size_t id = cur_shadow_vertices.find_first();
+                 id != cur_shadow_vertices.npos;
+                 id = cur_shadow_vertices.find_next(id)) {
+                result.emplace(id, dist, shadows_som[dist][id],
+                               State::NODE_SHADOW);
+            }
+            auto cur_helper_vertices = helpers[dist];
+            cur_helper_vertices &= gc.getAcceptEodTransitions(dist);
+            for (size_t id = cur_helper_vertices.find_first();
+                 id != cur_helper_vertices.npos;
+                 id = cur_helper_vertices.find_next(id)) {
+                result.emplace(id, dist, helpers_som[dist][id],
+                               State::NODE_HELPER);
+            }
+        }
+
+        return result;
+    }
+
+    // the caller must specify SOM at current offset, and must not attempt to
+    // resolve SOM inheritance conflicts
+    void activateState(const State &state) {
+        size_t cur_som = state.som;
+        if (setActive(state)) {
+            size_t cached_som = getCachedSom(state);
+            cur_som = min(cur_som, cached_som);
+        }
+        setCachedSom(state, cur_som);
+    }
+
+    vector<StateBitSet> shadows;
+    vector<StateBitSet> helpers;
+    vector<vector<size_t>> shadows_som;
+    vector<vector<size_t>> helpers_som;
+    u32 edit_distance;
+};
+
+// for flat_set
+bool operator<(const StateSet::State &a, const StateSet::State &b) {
+    ORDER_CHECK(idx);
+    ORDER_CHECK(level);
+    ORDER_CHECK(type);
+    ORDER_CHECK(som);
+    return false;
+}
 
 struct fmstate {
     const size_t num_states; // number of vertices in graph
     StateSet states; // currently active states
     StateSet next; // states on after this iteration
+    GraphCache &gc;
     vector<NFAVertex> vertices; // mapping from index to vertex
     size_t offset = 0;
     unsigned char cur = 0;
     unsigned char prev = 0;
-    const bool som;
     const bool utf8;
     const bool allowStartDs;
     const ReportManager &rm;
 
-    boost::dynamic_bitset<> accept; // states leading to accept
-    boost::dynamic_bitset<> accept_with_eod; // states leading to accept or eod
-
-    fmstate(const NGHolder &g, bool som_in, bool utf8_in, bool aSD_in,
-            const ReportManager &rm_in)
-        : num_states(num_vertices(g)), states(num_states), next(num_states),
-          vertices(num_vertices(g), NGHolder::null_vertex()), som(som_in),
-          utf8(utf8_in), allowStartDs(aSD_in), rm(rm_in), accept(num_states),
-          accept_with_eod(num_states) {
+    fmstate(const NGHolder &g, GraphCache &gc_in, bool utf8_in, bool aSD_in,
+            const u32 edit_distance, const ReportManager &rm_in)
+        : num_states(num_vertices(g)),
+          states(num_states, edit_distance),
+          next(num_states, edit_distance),
+          gc(gc_in), vertices(num_vertices(g), NGHolder::null_vertex()),
+          utf8(utf8_in), allowStartDs(aSD_in), rm(rm_in) {
         // init states
-        states.s.set(g[g.start].index);
+        states.activateState(
+                    StateSet::State {g[g.start].index, 0, 0,
+                                     StateSet::State::NODE_SHADOW});
         if (allowStartDs) {
-            states.s.set(g[g.startDs].index);
+            states.activateState(
+                        StateSet::State {g[g.startDs].index, 0, 0,
+                                         StateSet::State::NODE_SHADOW});
         }
         // fill vertex mapping
-        for (const auto &v : vertices_range(g)) {
+        for (auto v : vertices_range(g)) {
             vertices[g[v].index] = v;
         }
-        // init accept states
-        for (const auto &u : inv_adjacent_vertices_range(g.accept, g)) {
-            accept.set(g[u].index);
-        }
-        accept_with_eod = accept;
-        for (const auto &u : inv_adjacent_vertices_range(g.acceptEod, g)) {
-            accept_with_eod.set(g[u].index);
-        }
     }
 };
 
@@ -140,8 +817,7 @@ bool isUtf8CodePoint(const char c) {
 }
 
 static
-bool canReach(const NGHolder &g, const NFAEdge &e,
-              struct fmstate &state) {
+bool canReach(const NGHolder &g, const NFAEdge &e, struct fmstate &state) {
     auto flags = g[e].assert_flags;
     if (!flags) {
         return true;
@@ -177,36 +853,52 @@ bool canReach(const NGHolder &g, const NFAEdge &e,
 static
 void getMatches(const NGHolder &g, MatchSet &matches, struct fmstate &state,
                 bool allowEodMatches) {
-    auto acc_states = state.states.s;
-    acc_states &= allowEodMatches ? state.accept_with_eod : state.accept;
+    flat_set<NFAVertex> accepts {g.accept, g.acceptEod};
 
-    for (size_t i = acc_states.find_first(); i != acc_states.npos;
-         i = acc_states.find_next(i)) {
-        const NFAVertex u = state.vertices[i];
-        const size_t &som_offset = state.states.som[i];
-
-        // we can't accept anything from startDs in between UTF-8 codepoints
-        if (state.utf8 && u == g.startDs && !isUtf8CodePoint(state.cur)) {
+    for (auto v : accepts) {
+        bool eod = v == g.acceptEod;
+        if (eod && !allowEodMatches) {
             continue;
         }
 
-        for (const auto &e : out_edges_range(u, g)) {
-            NFAVertex v = target(e, g);
-            if (v == g.accept || (v == g.acceptEod && allowEodMatches)) {
-                // check edge assertions if we are allowed to reach accept
-                if (!canReach(g, e, state)) {
-                    continue;
-                }
-                DEBUG_PRINTF("match found at %zu\n", state.offset);
+        auto active_states = eod ? state.states.getAcceptEodStates(state.gc) :
+                                   state.states.getAcceptStates(state.gc);
 
-                assert(!g[u].reports.empty());
-                for (const auto &report_id : g[u].reports) {
-                    const Report &ri = state.rm.getReport(report_id);
+        DEBUG_PRINTF("Number of active states: %zu\n", active_states.size());
 
-                    DEBUG_PRINTF("report %u has offset adjustment %d\n",
-                                 report_id, ri.offsetAdjust);
-                    matches.emplace(som_offset, state.offset + ri.offsetAdjust);
-                }
+        for (const auto &cur : active_states) {
+            auto u = state.vertices[cur.idx];
+
+            // we can't accept anything from startDs in between UTF-8 codepoints
+            if (state.utf8 && u == g.startDs && !isUtf8CodePoint(state.cur)) {
+                continue;
+            }
+
+            const auto &reports =
+                    eod ?
+                        state.gc.vertex_eod_reports_by_level[cur.level][u] :
+                    state.gc.vertex_reports_by_level[cur.level][u];
+
+            NFAEdge e = edge(u, v, g);
+
+            // we assume edge assertions only exist at level 0
+            if (e && !canReach(g, e, state)) {
+                continue;
+            }
+
+            DEBUG_PRINTF("%smatch found at %zu\n",
+                         eod ? "eod " : "", state.offset);
+
+            assert(!reports.empty());
+            for (const auto &report_id : reports) {
+                const Report &ri = state.rm.getReport(report_id);
+
+                DEBUG_PRINTF("report %u has offset adjustment %d\n",
+                             report_id, ri.offsetAdjust);
+                DEBUG_PRINTF("match from (i:%zu,l:%u,t:%u): (%zu,%zu)\n",
+                             cur.idx, cur.level, cur.type, cur.som,
+                             state.offset + ri.offsetAdjust);
+                matches.emplace(cur.som, state.offset + ri.offsetAdjust);
             }
         }
     }
@@ -214,20 +906,18 @@ void getMatches(const NGHolder &g, MatchSet &matches, struct fmstate &state,
 
 static
 void step(const NGHolder &g, struct fmstate &state) {
-    state.next.s.reset();
+    state.next.reset();
 
-    for (size_t i = state.states.s.find_first(); i != state.states.s.npos;
-         i = state.states.s.find_next(i)) {
-        const NFAVertex &u = state.vertices[i];
-        const size_t &u_som_offset = state.states.som[i];
+    const auto active = state.states.getActiveStates();
 
-        for (const auto &e : out_edges_range(u, g)) {
-            NFAVertex v = target(e, g);
-            if (v == g.acceptEod) {
-                // can't know the future: we don't know if we're at EOD.
-                continue;
-            }
-            if (v == g.accept) {
+    for (const auto &cur : active) {
+        auto u = state.vertices[cur.idx];
+        auto succ_list = state.states.getSuccessors(cur, state.gc);
+
+        for (auto succ : succ_list) {
+            auto v = state.vertices[succ.idx];
+
+            if (is_any_accept(v, g)) {
                 continue;
             }
 
@@ -235,37 +925,70 @@ void step(const NGHolder &g, struct fmstate &state) {
                 continue;
             }
 
-            const CharReach &cr = g[v].char_reach;
-            const size_t v_idx = g[v].index;
+            // GraphCache doesn't differentiate between successors for shadows
+            // and helpers, and StateSet does not know anything about the graph,
+            // so the only place we can do it is here. we can't self-loop on a
+            // startDs if we're startDs's helper, so disallow it.
+            if (u == g.startDs && v == g.startDs &&
+                succ.level != 0 && succ.level == cur.level) {
+                continue;
+            }
 
-            // check reachability and edge assertions
-            if (cr.test(state.cur) && canReach(g, e, state)) {
-                // if we aren't in SOM mode, just set every SOM to 0
-                if (!state.som) {
-                    state.next.s.set(v_idx);
-                    state.next.som[v_idx] = 0;
-                    continue;
+            // for the reasons outlined above, also putting this here.
+            // disallow transitions from start to startDs on levels other than zero
+            if (u == g.start && v == g.startDs &&
+                cur.level != 0 && succ.level != 0) {
+                continue;
+            }
+
+            bool can_reach = false;
+
+            if (succ.type == StateSet::State::NODE_HELPER) {
+                can_reach = true;
+            } else {
+                // we assume edge assertions only exist on level 0
+                const CharReach &cr = g[v].char_reach;
+                NFAEdge e = edge(u, v, g);
+
+                if (cr.test(state.cur) &&
+                    (!e || canReach(g, e, state))) {
+                    can_reach = true;
                 }
+            }
 
-                // if this is first vertex since start, use current offset as SOM
+            // check edge assertions if we are allowed to reach accept
+            DEBUG_PRINTF("reaching %zu->%zu ('%c'->'%c'): %s\n",
+                         g[u].index, g[v].index,
+                         ourisprint(state.prev) ? state.prev : '?',
+                         ourisprint(state.cur) ? state.cur : '?',
+                         can_reach ? "yes" : "no");
+
+            if (can_reach) {
+                // we should use current offset as SOM if:
+                //  - we're at level 0 and we're a start vertex
+                //  - we're a fake start shadow
                 size_t next_som;
-                if (u == g.start || u == g.startDs || is_virtual_start(u, g)) {
+                bool reset = is_any_start(u, g) && cur.level == 0;
+                reset |= is_virtual_start(u, g) &&
+                         cur.type == StateSet::State::NODE_SHADOW;
+
+                if (reset) {
                     next_som = state.offset;
                 } else {
                     // else, inherit SOM from predecessor
-                    next_som = u_som_offset;
+                    next_som = cur.som;
                 }
+                succ.som = next_som;
 
-                // check if the vertex is already active
-                // if this vertex is not yet active, use current SOM
-                if (!state.next.s.test(v_idx)) {
-                    state.next.s.set(v_idx);
-                    state.next.som[v_idx] = next_som;
-                } else {
-                    // else, work out leftmost SOM
-                    state.next.som[v_idx] =
-                        min(next_som, state.next.som[v_idx]);
-                }
+                DEBUG_PRINTF("src: idx %zu level: %u som: %zu type: %s\n",
+                             cur.idx, cur.level, cur.som,
+                             cur.type == StateSet::State::NODE_HELPER ? "H" : "S");
+                DEBUG_PRINTF("dst: idx %zu level: %u som: %zu type: %s\n",
+                             succ.idx, succ.level, succ.som,
+                             succ.type == StateSet::State::NODE_HELPER ? "H" : "S");
+
+                // activate successor (SOM will be handled by activateState)
+                state.next.activateState(succ);
             }
         }
     }
@@ -312,15 +1035,28 @@ void filterMatches(MatchSet &matches) {
  *  Fills \a matches with offsets into the data stream where a match is found.
  */
 void findMatches(const NGHolder &g, const ReportManager &rm,
-                 const string &input, MatchSet &matches, const bool notEod,
-                 const bool som, const bool utf8) {
+                 const string &input, MatchSet &matches,
+                 const u32 edit_distance, const bool notEod, const bool utf8) {
     assert(hasCorrectlyNumberedVertices(g));
+    // cannot match fuzzy utf8 patterns, this should've been filtered out at
+    // compile time, so make it an assert
+    assert(!edit_distance || !utf8);
+
+    DEBUG_PRINTF("Finding matches\n");
+
+    GraphCache gc(edit_distance, g);
+#ifdef DEBUG
+    gc.dumpStateTransitionTable(g);
+#endif
 
     const bool allowStartDs = (proper_out_degree(g.startDs, g) > 0);
 
-    struct fmstate state(g, som, utf8, allowStartDs, rm);
+    struct fmstate state(g, gc, utf8, allowStartDs, edit_distance, rm);
 
     for (auto it = input.begin(), ite = input.end(); it != ite; ++it) {
+#ifdef DEBUG
+        state.states.dumpActiveStates();
+#endif
         state.offset = distance(input.begin(), it);
         state.cur = *it;
 
@@ -328,26 +1064,26 @@ void findMatches(const NGHolder &g, const ReportManager &rm,
 
         getMatches(g, matches, state, false);
 
-        DEBUG_PRINTF("index %zu, %zu states on\n", state.offset,
-                     state.next.s.count());
-        if (state.next.s.empty()) {
-            if (state.som) {
-                filterMatches(matches);
-            }
+        DEBUG_PRINTF("offset %zu, %zu states on\n", state.offset,
+                     state.next.count());
+        if (state.next.empty()) {
+            filterMatches(matches);
             return;
         }
         state.states = state.next;
         state.prev = state.cur;
     }
+#ifdef DEBUG
+    state.states.dumpActiveStates();
+#endif
     state.offset = input.size();
     state.cur = 0;
 
     // do additional step to get matches after stream end, this time count eod
     // matches also (or not, if we're in notEod mode)
 
+    DEBUG_PRINTF("Looking for EOD matches\n");
     getMatches(g, matches, state, !notEod);
 
-    if (state.som) {
-        filterMatches(matches);
-    }
+    filterMatches(matches);
 }
diff --git a/util/ng_find_matches.h b/util/ng_find_matches.h
index e9e47010..39ee3f68 100644
--- a/util/ng_find_matches.h
+++ b/util/ng_find_matches.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -51,6 +51,7 @@ struct BoundaryReports;
 void findMatches(const ue2::NGHolder &g, const ue2::ReportManager &rm,
                  const std::string &input,
                  std::set<std::pair<size_t, size_t>> &matches,
-                 const bool notEod, const bool som, const bool utf8);
+                 const unsigned int max_edit_distance, const bool notEod,
+                 const bool utf8);
 
 #endif // NG_FIND_MATCHES_H