ng_equivalence: reduce compile time on large cases

2025-07-10 04:24:43 +03:00 · 2016-06-02 16:25:36 +10:00 · 2016-06-02 16:25:36 +10:00 · 9dddb4efc3
commit 9dddb4efc3
parent 0548a6d995
1 changed files with 95 additions and 96 deletions
--- a/src/nfagraph/ng_equivalence.cpp
+++ b/src/nfagraph/ng_equivalence.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
@ -38,7 +38,7 @@
 #include "ng_util.h"
 #include "util/compile_context.h"
 #include "util/graph_range.h"
-#include "util/order_check.h"
+#include "util/ue2_containers.h"

 #include <algorithm>
 #include <set>
@ -53,9 +53,8 @@ using boost::ptr_vector;
 namespace ue2 {

 enum EquivalenceType {
-    LEFT_EQUIVALENCE = 0,
+    LEFT_EQUIVALENCE,
    RIGHT_EQUIVALENCE,
-    MAX_EQUIVALENCE
 };

 namespace {
@ -91,7 +90,6 @@ public:
 }

 typedef ue2::unordered_set<VertexInfo *, VertexInfoPtrCmp> VertexInfoSet;
-typedef ue2::unordered_map<unsigned, VertexInfoSet> ClassMap;

 // compare two vertex info pointers on their vertex index
 bool VertexInfoPtrCmp::operator()(const VertexInfo *a,
@ -118,27 +116,34 @@ public:
        DepthMinMax d1;
        DepthMinMax d2;
    };
-    ClassInfo(const NGHolder &g, VertexInfo &vi, ClassDepth &d_in,
+    ClassInfo(const NGHolder &g, const VertexInfo &vi, const ClassDepth &d_in,
              EquivalenceType eq)
-        : vertex_flags(vi.vertex_flags), edge_top(vi.edge_top), cr(vi.cr),
-          depth(d_in) {
+        : /* reports only matter for right-equiv */
+          rs(eq == RIGHT_EQUIVALENCE ? g[vi.v].reports : flat_set<ReportID>()),
+          vertex_flags(vi.vertex_flags), edge_top(vi.edge_top), cr(vi.cr),
+          adjacent_cr(eq == LEFT_EQUIVALENCE ? vi.pred_cr : vi.succ_cr),
+          /* treat non-special vertices the same */
+          node_type(min(g[vi.v].index, u32{N_SPECIALS})), depth(d_in) {}

-        // hackety-hack!
-        node_type = g[vi.v].index;
-        if (node_type > N_SPECIALS) {
-            // we treat all regular vertices the same
-            node_type = N_SPECIALS;
-        }
-
-        // get all the adjacent vertices' CharReach
-        adjacent_cr = eq == LEFT_EQUIVALENCE ? vi.pred_cr : vi.succ_cr;
-
-        if (eq == RIGHT_EQUIVALENCE) {
-            rs = g[vi.v].reports;
-        }
+    bool operator==(const ClassInfo &b) const {
+        return node_type == b.node_type && depth.d1 == b.depth.d1 &&
+               depth.d2 == b.depth.d2 && cr == b.cr &&
+               adjacent_cr == b.adjacent_cr && edge_top == b.edge_top &&
+               vertex_flags == b.vertex_flags && rs == b.rs;
    }

-    bool operator<(const ClassInfo &b) const;
+    friend size_t hash_value(const ClassInfo &c) {
+        size_t val = 0;
+        boost::hash_combine(val, boost::hash_range(begin(c.rs), end(c.rs)));
+        boost::hash_combine(val, c.vertex_flags);
+        boost::hash_combine(val, c.edge_top);
+        boost::hash_combine(val, c.cr);
+        boost::hash_combine(val, c.adjacent_cr);
+        boost::hash_combine(val, c.node_type);
+        boost::hash_combine(val, c.depth.d1);
+        boost::hash_combine(val, c.depth.d2);
+        return val;
+    }

 private:
    flat_set<ReportID> rs; /* for right equiv only */
@ -200,26 +205,12 @@ public:
        return q.capacity();
    }
 private:
-    set<unsigned> ids; //!< stores id's, for uniqueness
+    unordered_set<unsigned> ids; //!< stores id's, for uniqueness
    vector<unsigned> q; //!< vector of id's that we use as FILO.
 };

 }

-bool ClassInfo::operator<(const ClassInfo &b) const {
-    const ClassInfo &a = *this;
-
-    ORDER_CHECK(node_type);
-    ORDER_CHECK(depth.d1);
-    ORDER_CHECK(depth.d2);
-    ORDER_CHECK(cr);
-    ORDER_CHECK(adjacent_cr);
-    ORDER_CHECK(edge_top);
-    ORDER_CHECK(vertex_flags);
-    ORDER_CHECK(rs);
-    return false;
-}
-
 static
 bool outIsIrreducible(NFAVertex &v, const NGHolder &g) {
    unsigned nonSpecialVertices = 0;
@ -286,9 +277,14 @@ bool hasEdgeAsserts(NFAVertex v, const NGHolder &g) {

 // populate VertexInfo table
 static
-void getVertexInfos(const NGHolder &g, ptr_vector<VertexInfo> &infos) {
+ptr_vector<VertexInfo> getVertexInfos(const NGHolder &g) {
+    const size_t num_verts = num_vertices(g);
+
+    ptr_vector<VertexInfo> infos;
+    infos.reserve(num_verts * 2);
+
    vector<VertexInfo *> vertex_map; // indexed by vertex_index property
-    vertex_map.resize(num_vertices(g));
+    vertex_map.resize(num_verts);

    for (auto v : vertices_range(g)) {
        VertexInfo *vi = new VertexInfo(v, g);
@ -323,14 +319,24 @@ void getVertexInfos(const NGHolder &g, ptr_vector<VertexInfo> &infos) {
        }
        assert(!hasEdgeAsserts(cur_vi.v, g));
    }
+
+    return infos;
 }

 // store equivalence class in VertexInfo for each vertex
 static
-void partitionGraph(ptr_vector<VertexInfo> &infos, ClassMap &classes,
-                    WorkQueue &work_queue, const NGHolder &g,
-                    EquivalenceType eq) {
-    map<ClassInfo, unsigned> classinfomap;
+vector<VertexInfoSet> partitionGraph(ptr_vector<VertexInfo> &infos,
+                                     WorkQueue &work_queue, const NGHolder &g,
+                                     EquivalenceType eq) {
+    const size_t num_verts = infos.size();
+
+    vector<VertexInfoSet> classes;
+    unordered_map<ClassInfo, unsigned> classinfomap;
+
+    // assume we will have lots of classes, so we don't waste time resizing
+    // these structures.
+    classes.reserve(num_verts);
+    classinfomap.reserve(num_verts);

    // get distances from start (or accept) for all vertices
    // only one of them is used at a time, never both
@ -356,28 +362,25 @@ void partitionGraph(ptr_vector<VertexInfo> &infos, ClassMap &classes,

        auto ii = classinfomap.find(ci);
        if (ii == classinfomap.end()) {
-            unsigned new_class = classinfomap.size();
-            vi.equivalence_class = new_class;
-
-            classinfomap[ci] = new_class;
-
-            // insert this vertex into the class map
-            VertexInfoSet &vertices = classes[new_class];
-            vertices.insert(&vi);
+            // vertex is in a new equivalence class by itself.
+            unsigned eq_class = classes.size();
+            vi.equivalence_class = eq_class;
+            classes.push_back({&vi});
+            classinfomap.emplace(move(ci), eq_class);
        } else {
+            // vertex is added to an existing class.
            unsigned eq_class = ii->second;
            vi.equivalence_class = eq_class;
-
-            // insert this vertex into the class map
-            VertexInfoSet &vertices = classes[eq_class];
-            vertices.insert(&vi);
+            classes.at(eq_class).insert(&vi);

            // we now know that this particular class has more than one
            // vertex, so we add it to the work queue
            work_queue.push(eq_class);
        }
    }
-    DEBUG_PRINTF("partitioned, %zu equivalence classes\n", classinfomap.size());
+
+    DEBUG_PRINTF("partitioned, %zu equivalence classes\n", classes.size());
+    return classes;
 }

 // generalized equivalence processing (left and right)
@ -388,7 +391,7 @@ void partitionGraph(ptr_vector<VertexInfo> &infos, ClassMap &classes,
 // equivalence, predecessors for right equivalence) classes get revalidated in
 // case of a split.
 static
-void equivalence(ClassMap &classmap, WorkQueue &work_queue,
+void equivalence(vector<VertexInfoSet> &classes, WorkQueue &work_queue,
                 EquivalenceType eq_type) {
    // now, go through the work queue until it's empty
    map<flat_set<unsigned>, VertexInfoSet> tentative_classmap;
@ -397,12 +400,11 @@ void equivalence(ClassMap &classmap, WorkQueue &work_queue,
    WorkQueue reval_queue(work_queue.capacity());

    while (!work_queue.empty()) {
-
        // dequeue our class from the work queue
        unsigned cur_class = work_queue.pop();

        // get all vertices in current equivalence class
-        VertexInfoSet &cur_class_vertices = classmap[cur_class];
+        VertexInfoSet &cur_class_vertices = classes.at(cur_class);

        if (cur_class_vertices.size() < 2) {
            continue;
@ -445,16 +447,20 @@ void equivalence(ClassMap &classmap, WorkQueue &work_queue,

            // start from the second class
            for (++tmi; tmi != tentative_classmap.end(); ++tmi) {
-                unsigned new_class = classmap.size();
                const VertexInfoSet &vertices_to_split = tmi->second;
-                VertexInfoSet &new_class_vertices = classmap[new_class];
+                unsigned new_class = classes.size();
+                VertexInfoSet new_class_vertices;

                for (VertexInfo *vi : vertices_to_split) {
                    vi->equivalence_class = new_class;
-                    cur_class_vertices.erase(vi);
+                    // note: we cannot use the cur_class_vertices ref, as it is
+                    // invalidated by modifications to the classes vector.
+                    classes[cur_class].erase(vi);
                    new_class_vertices.insert(vi);
                }
-                if (tmi->first.find(cur_class) != tmi->first.end()) {
+                classes.push_back(move(new_class_vertices));
+
+                if (contains(tmi->first, cur_class)) {
                    reval_queue.push(new_class);
                }
            }
@ -619,16 +625,15 @@ void mergeClass(ptr_vector<VertexInfo> &infos, NGHolder &g, unsigned eq_class,
 // vertex (or, in rare cases for left equiv, a pair if we cannot satisfy the
 // report behaviour with a single vertex).
 static
-bool mergeEquivalentClasses(ClassMap &classmap, ptr_vector<VertexInfo> &infos,
-                            NGHolder &g) {
+bool mergeEquivalentClasses(vector<VertexInfoSet> &classes,
+                            ptr_vector<VertexInfo> &infos, NGHolder &g) {
    bool merged = false;
    set<NFAVertex> toRemove;

    // go through all classes and merge classes with more than one vertex
-    for (auto &cm : classmap) {
+    for (unsigned eq_class = 0; eq_class < classes.size(); eq_class++) {
        // get all vertices in current equivalence class
-        unsigned eq_class = cm.first;
-        VertexInfoSet &cur_class_vertices = cm.second;
+        VertexInfoSet &cur_class_vertices = classes[eq_class];

        // we don't care for single-vertex classes
        if (cur_class_vertices.size() > 1) {
@ -644,6 +649,26 @@ bool mergeEquivalentClasses(ClassMap &classmap, ptr_vector<VertexInfo> &infos,
    return merged;
 }

+static
+bool reduceGraphEquivalences(NGHolder &g, EquivalenceType eq_type) {
+    // create a list of equivalence classes to check
+    WorkQueue work_queue(num_vertices(g));
+
+    // get information on every vertex in the graph
+    // new vertices are allocated here, and stored in infos
+    ptr_vector<VertexInfo> infos = getVertexInfos(g);
+
+    // partition the graph
+    auto classes = partitionGraph(infos, work_queue, g, eq_type);
+
+    // do equivalence processing
+    equivalence(classes, work_queue, eq_type);
+
+    // replace equivalent classes with single vertices
+    // new vertices are (possibly) allocated here, and stored in infos
+    return mergeEquivalentClasses(classes, infos, g);
+}
+
 bool reduceGraphEquivalences(NGHolder &g, const CompileContext &cc) {
    if (!cc.grey.equivalenceEnable) {
        DEBUG_PRINTF("equivalence processing disabled in grey box\n");
@ -661,34 +686,8 @@ bool reduceGraphEquivalences(NGHolder &g, const CompileContext &cc) {

    // take note if we have merged any vertices
    bool merge = false;
-
-    for (int eqi = 0; eqi < MAX_EQUIVALENCE; ++eqi) {
-        // map of all information pertaining a vertex
-        ptr_vector<VertexInfo> infos;
-        ClassMap classes;
-
-        // create a list of equivalence classes to check
-        WorkQueue work_queue(num_vertices(g));
-        EquivalenceType eq_type = (EquivalenceType) eqi;
-
-        // resize the vector, make room for twice the vertices we have
-        infos.reserve(num_vertices(g) * 2);
-
-        // get information on every vertex in the graph
-        // new vertices are allocated here, and stored in infos
-        getVertexInfos(g, infos);
-
-        // partition the graph
-        partitionGraph(infos, classes, work_queue, g, eq_type);
-
-        // do equivalence processing
-        equivalence(classes, work_queue, eq_type);
-
-        // replace equivalent classes with single vertices
-        // new vertices are (possibly) allocated here, and stored in infos
-        merge |= mergeEquivalentClasses(classes, infos, g);
-    }
-
+    merge |= reduceGraphEquivalences(g, LEFT_EQUIVALENCE);
+    merge |= reduceGraphEquivalences(g, RIGHT_EQUIVALENCE);
    return merge;
 }