/* * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Intel Corporation nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "goughcompile.h" #include "goughcompile_dump.h" #include "goughcompile_internal.h" #include "gough_internal.h" #include "grey.h" #include "util/container.h" #include "util/flat_containers.h" #include "util/graph.h" #include "util/graph_range.h" #include "util/order_check.h" #include "ue2common.h" #include #include #include using namespace std; using boost::adaptors::map_values; namespace ue2 { template void emplace_back_all_raw(vector *out, const vector &in) { for (const auto &var : in) { out->emplace_back(var.get()); } } static void all_vars(const GoughGraph &g, vector *out) { for (auto v : vertices_range(g)) { emplace_back_all_raw(out, g[v].vars); } for (const auto &e : edges_range(g)) { emplace_back_all_raw(out, g[e].vars); } } namespace { struct GoughGraphAux { map containing_v; map containing_e; map > reporters; }; } static never_inline void fill_aux(const GoughGraph &g, GoughGraphAux *aux) { for (auto v : vertices_range(g)) { for (const auto &var : g[v].vars) { aux->containing_v[var.get()] = v; DEBUG_PRINTF("%u is on vertex %u\n", var->slot, g[v].state_id); } for (GoughSSAVar *var : g[v].reports | map_values) { aux->reporters[var].insert(v); } for (GoughSSAVar *var : g[v].reports_eod | map_values) { aux->reporters[var].insert(v); } } for (const auto &e : edges_range(g)) { for (const auto &var : g[e].vars) { aux->containing_e[var.get()] = e; DEBUG_PRINTF("%u is on edge %u->%u\n", var->slot, g[source(e, g)].state_id, g[target(e, g)].state_id); } } } static bool is_block_local(const GoughGraph &cfg, GoughSSAVar *var, const GoughGraphAux &aux) { /* if var used as a report, it cannot be considered block local */ if (contains(aux.reporters, var)) { return false; } /* (useful) vertex/join vars never local - they are terminal in blocks * and so should be read by another block. */ if (!contains(aux.containing_e, var)) { return false; } /* for other cases, require that all uses of var are later in the same edge * or on the target AND if on target it is sole on flow coming from the * edge in question. */ const GoughEdge &e = aux.containing_e.at(var); GoughVertex t = target(e, cfg); size_t seen_outputs = 0; const flat_set &out = var->get_outputs(); bool seen_var = false; for (const auto &e_var : cfg[e].vars) { if (seen_var) { GoughSSAVarWithInputs *w = dynamic_cast(e_var.get()); if (contains(out, w)) { seen_outputs++; } } else { seen_var = var == e_var.get(); } } assert(seen_var); for (const auto &t_var : cfg[t].vars) { if (contains(out, t_var.get())) { seen_outputs++; const flat_set &flow = t_var->get_edges_for_input(var); if (flow.size() != 1 || *flow.begin() != e) { /* this var is used by the target join var BUT on a different * flow, so this is not a block local variable */ return false; } } } assert(seen_outputs <= out.size()); return seen_outputs == out.size(); } static void handle_pending_edge(const GoughGraph &g, const GoughEdge &e, GoughSSAVar *start, set &pending_vertex, set &rv) { const vector > &vars = g[e].vars; bool marking = !start; DEBUG_PRINTF(" ---checking edge %u->%u %s %zu\n", g[source(e, g)].state_id, g[target(e, g)].state_id, marking ? "full" : "partial", vars.size()); for (auto it = vars.rbegin(); it != vars.rend(); ++it) { GoughSSAVar *var = it->get(); if (contains(rv, var)) { DEBUG_PRINTF("somebody has already processed this vertex [%u]\n", var->slot); return; } if (var == start) { assert(!marking); marking = true; continue; } if (marking) { rv.insert(var); } } assert(marking); GoughVertex s = source(e, g); for (const auto &var : g[s].vars) { DEBUG_PRINTF("interferes %u\n", var->slot); rv.insert(var.get()); } pending_vertex.insert(s); } static void handle_pending_vars(GoughSSAVar *def, const GoughGraph &g, const GoughGraphAux &aux, const flat_set &pending_var, set &pending_vertex, set &rv) { for (GoughSSAVarWithInputs *var : pending_var) { if (contains(aux.containing_v, var)) { /* def is used by join vertex, value only needs to be live on some * incoming edges */ GoughSSAVarJoin *vj = (GoughSSAVarJoin *)var; const flat_set &live_edges = vj->get_edges_for_input(def); for (const auto &e : live_edges) { handle_pending_edge(g, e, nullptr, pending_vertex, rv); } continue; } const GoughEdge &e = aux.containing_e.at(var); handle_pending_edge(g, e, var, pending_vertex, rv); } } static void handle_pending_vertex(GoughVertex def_v, const GoughGraph &g, GoughVertex current, set &pending_vertex, set &rv) { DEBUG_PRINTF("---checking vertex %u\n", g[current].state_id); if (def_v == current) { DEBUG_PRINTF("contains target vertex\n"); return; /* we have reached def */ } for (const auto &e : in_edges_range(current, g)) { handle_pending_edge(g, e, nullptr, pending_vertex, rv); } } static void handle_pending_vertices(GoughSSAVar *def, const GoughGraph &g, const GoughGraphAux &aux, set &pending_vertex, set &rv) { if (pending_vertex.empty()) { return; } GoughVertex def_v = GoughGraph::null_vertex(); if (contains(aux.containing_v, def)) { def_v = aux.containing_v.at(def); } unordered_set done; while (!pending_vertex.empty()) { GoughVertex current = *pending_vertex.begin(); pending_vertex.erase(current); if (contains(done, current)) { continue; } done.insert(current); handle_pending_vertex(def_v, g, current, pending_vertex, rv); } } /* returns set of labels that the given def is live at */ static never_inline set live_during(GoughSSAVar *def, const GoughGraph &g, const GoughGraphAux &aux) { DEBUG_PRINTF("checking who is defined during %u lifetime\n", def->slot); set pending_vertex; set rv; rv.insert(def); if (contains(aux.reporters, def)) { DEBUG_PRINTF("--> gets reported\n"); const set &reporters = aux.reporters.at(def); for (auto v : reporters) { pending_vertex.insert(v); for (const auto &var : g[v].vars) { DEBUG_PRINTF("interferes %u\n", var->slot); rv.insert(var.get()); } } } handle_pending_vars(def, g, aux, def->get_outputs(), pending_vertex, rv); handle_pending_vertices(def, g, aux, pending_vertex, rv); rv.erase(def); return rv; } template void set_initial_slots(const vector &vars, u32 *next_slot) { for (auto &var : vars) { assert(var->slot == INVALID_SLOT); var->slot = (*next_slot)++; } } /* crude, deterministic assignment of symbolic register slots. * returns number of slots given out */ static u32 initial_slots(const GoughGraph &g) { u32 next_slot = 0; for (auto v : vertices_range(g)) { set_initial_slots(g[v].vars, &next_slot); } for (const auto &e : edges_range(g)) { set_initial_slots(g[e].vars, &next_slot); } return next_slot; } #define NO_COLOUR (~0U) static u32 available_colour(const flat_set &bad_colours) { u32 rv = 0; for (const u32 &colour : bad_colours) { if (colour != rv) { assert(colour > rv); break; } rv = colour + 1; } assert(rv != NO_COLOUR); return rv; } static void poison_colours(const set &live, u32 c, const vector &colour_map, vector > *bad_colour) { for (const GoughSSAVar *var : live) { u32 var_index = var->slot; if (colour_map[var_index] != NO_COLOUR) { assert(c != colour_map[var_index]); } else { (*bad_colour)[var_index].insert(c); } } } static void find_bad_due_to_live(const set &live, const vector &colour_map, flat_set *out) { for (const GoughSSAVar *var : live) { u32 var_index = var->slot; if (colour_map[var_index] != NO_COLOUR) { out->insert(colour_map[var_index]); } } } static void sequential_vertex_colouring(const GoughGraph &g, const GoughGraphAux &aux, const vector &order, vector &colour_map) { assert(order.size() < NO_COLOUR); colour_map.clear(); colour_map.resize(order.size(), NO_COLOUR); vector temp(order.size(), ~0U); vector > bad_colour(order.size()); for (GoughSSAVar *var : order) { u32 var_index = var->slot; if (is_block_local(g, var, aux)) { DEBUG_PRINTF("%u is block local\n", var_index); /* ignore variable whose lifetime is limited to their local block * there is no need to assign stream state to these variables */ continue; } assert(colour_map[var_index] == NO_COLOUR); set live = live_during(var, g, aux); flat_set &local_bad = bad_colour[var_index]; find_bad_due_to_live(live, colour_map, &local_bad); DEBUG_PRINTF("colouring %u\n", var_index); u32 c = available_colour(local_bad); colour_map[var_index] = c; assert(!contains(bad_colour[var_index], c)); poison_colours(live, c, colour_map, &bad_colour); flat_set temp_set; local_bad.swap(temp_set); DEBUG_PRINTF(" %u coloured %u\n", var_index, c); } } template void add_to_dom_ordering(const vector &vars, vector *out) { for (const auto &var : vars) { out->emplace_back(var.get()); } } namespace { class FinishVisitor : public boost::default_dfs_visitor { public: explicit FinishVisitor(vector *o) : out(o) {} void finish_vertex(const GoughVertex v, const GoughGraph &) { out->emplace_back(v); } vector *out; }; } static void find_dom_ordering(const GoughGraph &cfg, vector *out) { vector g_order; /* due to construction quirks, default vertex order provides entry points */ depth_first_search(cfg, visitor(FinishVisitor(&g_order)) .root_vertex(cfg[boost::graph_bundle].initial_vertex)); for (auto it = g_order.rbegin(); it != g_order.rend(); ++it) { add_to_dom_ordering(cfg[*it].vars, out); for (const auto &e : out_edges_range(*it, cfg)) { add_to_dom_ordering(cfg[e].vars, out); } } } static void create_slot_mapping(const GoughGraph &cfg, UNUSED u32 old_slot_count, vector *old_new) { /* Interference graphs from SSA form are chordal -> optimally colourable in * poly time. * * Chordal graphs can be coloured by walking in perfect elimination order. * If the SSA CFG is iterated over in a way that respects dominance * relationship, the interference graph will be iterated in a perfect * elimination order. * * We can avoid creating the full interference graph and use liveness * information as we iterate over the definitions to perform the colouring. * * See S Hack various 2006- */ vector dom_order; GoughGraphAux aux; fill_aux(cfg, &aux); find_dom_ordering(cfg, &dom_order); assert(dom_order.size() == old_slot_count); sequential_vertex_colouring(cfg, aux, dom_order, *old_new); } static void update_local_slots(GoughGraph &g, set &locals, u32 local_base) { DEBUG_PRINTF("%zu local variables\n", locals.size()); /* local variables only occur on edges (joins are never local) */ u32 allocated_count = 0; for (const auto &e : edges_range(g)) { u32 next_slot = local_base; for (auto &var : g[e].vars) { if (contains(locals, var.get())) { DEBUG_PRINTF("updating slot %u using local %u\n", var->slot, next_slot); var->slot = next_slot++; allocated_count++; } } } assert(allocated_count == locals.size()); } static never_inline u32 update_slots(GoughGraph &g, const vector &old_new, UNUSED u32 old_slot_count) { vector vars; set locals; all_vars(g, &vars); u32 slot_count = 0; for (GoughSSAVar *v : vars) { assert(v->slot < old_new.size()); DEBUG_PRINTF("updating slot %u to %u\n", v->slot, old_new[v->slot]); if (old_new[v->slot] != NO_COLOUR) { /* not local, assign final slot */ v->slot = old_new[v->slot]; ENSURE_AT_LEAST(&slot_count, v->slot + 1); } else { locals.insert(v); } } assert(slot_count <= old_slot_count); DEBUG_PRINTF("reduce stream slots from %u to %u\n", old_slot_count, slot_count); update_local_slots(g, locals, slot_count); return slot_count; } u32 assign_slots(GoughGraph &cfg, const Grey &grey) { u32 slot_count = initial_slots(cfg); if (!grey.goughRegisterAllocate) { return slot_count; } dump(cfg, "slots_pre", grey); vector old_new; create_slot_mapping(cfg, slot_count, &old_new); slot_count = update_slots(cfg, old_new, slot_count); return slot_count; } } // namespace ue2