util: switch from Boost to std::unordered set/map

This commit replaces the ue2::unordered_{set,map} types with their STL
versions, with some new hashing utilities in util/hash.h. The new types
ue2_unordered_set<T> and ue2_unordered_map<Key, T> default to using the
ue2_hasher.

The header util/ue2_containers.h has been removed, and the flat_set/map
containers moved to util/flat_containers.h.
This commit is contained in:
Justin Viiret
2017-07-14 14:59:52 +10:00
committed by Matthew Barr
parent a425bb9b7c
commit 9cf66b6ac9
123 changed files with 1048 additions and 772 deletions

View File

@@ -41,6 +41,7 @@
#include "util/verify_types.h"
#include <sstream>
#include <unordered_set>
#include <vector>
#define PATHS_LIMIT 500
@@ -254,7 +255,7 @@ dstate_id_t get_sds_or_proxy(const raw_dfa &raw) {
u16 top_remap = raw.alpha_remap[TOP];
ue2::unordered_set<dstate_id_t> seen;
std::unordered_set<dstate_id_t> seen;
while (true) {
seen.insert(s);
DEBUG_PRINTF("basis %hu\n", s);

View File

@@ -44,6 +44,8 @@
#include "util/simd_types.h"
#include <cstdio>
#include <map>
#include <set>
#include <vector>
#ifndef DUMP_SUPPORT

View File

@@ -31,7 +31,7 @@
#include "ue2common.h"
#include "util/charreach.h"
#include "util/ue2_containers.h"
#include "util/flat_containers.h"
union AccelAux;

View File

@@ -48,11 +48,11 @@
#include "util/compile_context.h"
#include "util/container.h"
#include "util/dump_charclass.h"
#include "util/flat_containers.h"
#include "util/graph.h"
#include "util/make_unique.h"
#include "util/multibit_build.h"
#include "util/report_manager.h"
#include "util/ue2_containers.h"
#include "util/verify_types.h"
#include "grey.h"
@@ -153,7 +153,7 @@ static
void getNeighborInfo(const CliqueGraph &g, vector<u32> &neighbor,
const CliqueVertex &cv, const set<u32> &group) {
u32 id = g[cv].stateId;
ue2::unordered_set<u32> neighborId;
unordered_set<u32> neighborId;
// find neighbors for cv
for (const auto &v : adjacent_vertices_range(cv, g)) {

View File

@@ -39,11 +39,12 @@
#include "nfagraph/ng_repeat.h"
#include "util/bytecode_ptr.h"
#include "util/depth.h"
#include "util/ue2_containers.h"
#include "util/flat_containers.h"
#include <map>
#include <memory>
#include <set>
#include <unordered_map>
#include <vector>
struct NFA;
@@ -89,7 +90,7 @@ struct CastleProto {
std::map<u32, PureRepeat> repeats;
/** \brief Mapping from report to associated tops. */
ue2::unordered_map<ReportID, flat_set<u32>> report_map;
std::unordered_map<ReportID, flat_set<u32>> report_map;
/**
* \brief Next top id to use. Repeats may be removed without top remapping,
@@ -155,7 +156,7 @@ bool is_equal(const CastleProto &c1, const CastleProto &c2);
* of the reports in the given set.
*/
bool requiresDedupe(const CastleProto &proto,
const ue2::flat_set<ReportID> &reports);
const flat_set<ReportID> &reports);
/**
* \brief Build an NGHolder from a CastleProto.

View File

@@ -63,9 +63,9 @@
#include "rdfa.h"
#include "ue2common.h"
#include "util/container.h"
#include "util/flat_containers.h"
#include "util/noncopyable.h"
#include "util/partitioned_set.h"
#include "util/ue2_containers.h"
#include <algorithm>
#include <functional>

View File

@@ -37,11 +37,11 @@
#include "nfa_internal.h"
#include "util/compile_context.h"
#include "util/container.h"
#include "util/flat_containers.h"
#include "util/graph_range.h"
#include "util/make_unique.h"
#include "util/order_check.h"
#include "util/report_manager.h"
#include "util/ue2_containers.h"
#include "util/verify_types.h"
#include "ue2common.h"

View File

@@ -33,7 +33,7 @@
#include "nfa_kind.h"
#include "ue2common.h"
#include "util/bytecode_ptr.h"
#include "util/ue2_containers.h"
#include "util/flat_containers.h"
#include "util/order_check.h"
#include <map>

View File

@@ -33,9 +33,9 @@
#include "mcclellancompile.h"
#include "ue2common.h"
#include "util/charreach.h"
#include "util/flat_containers.h"
#include "util/noncopyable.h"
#include "util/order_check.h"
#include "util/ue2_containers.h"
#include <map>
#include <memory>

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -32,10 +32,10 @@
#include "gough_internal.h"
#include "grey.h"
#include "util/container.h"
#include "util/flat_containers.h"
#include "util/graph.h"
#include "util/graph_range.h"
#include "util/order_check.h"
#include "util/ue2_containers.h"
#include "ue2common.h"
@@ -235,7 +235,7 @@ void handle_pending_vertices(GoughSSAVar *def, const GoughGraph &g,
if (contains(aux.containing_v, def)) {
def_v = aux.containing_v.at(def);
}
ue2::unordered_set<GoughVertex> done;
unordered_set<GoughVertex> done;
while (!pending_vertex.empty()) {
GoughVertex current = *pending_vertex.begin();
pending_vertex.erase(current);

View File

@@ -53,12 +53,13 @@
#include "util/charreach.h"
#include "util/compile_context.h"
#include "util/container.h"
#include "util/flat_containers.h"
#include "util/graph.h"
#include "util/graph_range.h"
#include "util/graph_small_color_map.h"
#include "util/order_check.h"
#include "util/unordered.h"
#include "util/verify_types.h"
#include "util/ue2_containers.h"
#include <algorithm>
#include <cassert>
@@ -97,16 +98,16 @@ struct precalcAccel {
};
struct limex_accel_info {
ue2::unordered_set<NFAVertex> accelerable;
unordered_set<NFAVertex> accelerable;
map<NFAStateSet, precalcAccel> precalc;
ue2::unordered_map<NFAVertex, flat_set<NFAVertex>> friends;
ue2::unordered_map<NFAVertex, AccelScheme> accel_map;
unordered_map<NFAVertex, flat_set<NFAVertex>> friends;
unordered_map<NFAVertex, AccelScheme> accel_map;
};
static
map<NFAVertex, NFAStateSet>
reindexByStateId(const map<NFAVertex, NFAStateSet> &in, const NGHolder &g,
const ue2::unordered_map<NFAVertex, u32> &state_ids,
const unordered_map<NFAVertex, u32> &state_ids,
const u32 num_states) {
map<NFAVertex, NFAStateSet> out;
@@ -138,7 +139,7 @@ reindexByStateId(const map<NFAVertex, NFAStateSet> &in, const NGHolder &g,
struct build_info {
build_info(NGHolder &hi,
const ue2::unordered_map<NFAVertex, u32> &states_in,
const unordered_map<NFAVertex, u32> &states_in,
const vector<BoundedRepeatData> &ri,
const map<NFAVertex, NFAStateSet> &rsmi,
const map<NFAVertex, NFAStateSet> &smi,
@@ -161,7 +162,7 @@ struct build_info {
}
NGHolder &h;
const ue2::unordered_map<NFAVertex, u32> &state_ids;
const unordered_map<NFAVertex, u32> &state_ids;
const vector<BoundedRepeatData> &repeats;
// Squash maps; state sets are indexed by state_id.
@@ -169,7 +170,7 @@ struct build_info {
map<NFAVertex, NFAStateSet> squashMap;
const map<u32, set<NFAVertex>> &tops;
ue2::unordered_set<NFAVertex> tugs;
unordered_set<NFAVertex> tugs;
map<NFAVertex, BoundedRepeatSummary> br_cyclic;
const set<NFAVertex> &zombies;
bool do_accel;
@@ -479,7 +480,7 @@ bool allow_wide_accel(const vector<NFAVertex> &vv, const NGHolder &g,
static
void nfaFindAccelSchemes(const NGHolder &g,
const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
ue2::unordered_map<NFAVertex, AccelScheme> *out) {
unordered_map<NFAVertex, AccelScheme> *out) {
vector<CharReach> refined_cr = reduced_cr(g, br_cyclic);
NFAVertex sds_or_proxy = get_sds_or_proxy(g);
@@ -504,8 +505,8 @@ void nfaFindAccelSchemes(const NGHolder &g,
}
struct fas_visitor : public boost::default_bfs_visitor {
fas_visitor(const ue2::unordered_map<NFAVertex, AccelScheme> &am_in,
ue2::unordered_map<NFAVertex, AccelScheme> *out_in)
fas_visitor(const unordered_map<NFAVertex, AccelScheme> &am_in,
unordered_map<NFAVertex, AccelScheme> *out_in)
: accel_map(am_in), out(out_in) {}
void discover_vertex(NFAVertex v, const NGHolder &) {
@@ -516,13 +517,13 @@ struct fas_visitor : public boost::default_bfs_visitor {
throw this; /* done */
}
}
const ue2::unordered_map<NFAVertex, AccelScheme> &accel_map;
ue2::unordered_map<NFAVertex, AccelScheme> *out;
const unordered_map<NFAVertex, AccelScheme> &accel_map;
unordered_map<NFAVertex, AccelScheme> *out;
};
static
void filterAccelStates(NGHolder &g, const map<u32, set<NFAVertex>> &tops,
ue2::unordered_map<NFAVertex, AccelScheme> *accel_map) {
unordered_map<NFAVertex, AccelScheme> *accel_map) {
/* We want the NFA_MAX_ACCEL_STATES best acceleration states, everything
* else should be ditched. We use a simple BFS to choose accel states near
* the start. */
@@ -542,7 +543,7 @@ void filterAccelStates(NGHolder &g, const map<u32, set<NFAVertex>> &tops,
tempEdges.push_back(e); // Remove edge later.
}
ue2::unordered_map<NFAVertex, AccelScheme> out;
unordered_map<NFAVertex, AccelScheme> out;
try {
boost::breadth_first_search(g, g.start,
@@ -982,16 +983,18 @@ u32 addSquashMask(const build_info &args, const NFAVertex &v,
return idx;
}
using ReportListCache = ue2_unordered_map<vector<ReportID>, u32>;
static
u32 addReports(const flat_set<ReportID> &r, vector<ReportID> &reports,
unordered_map<vector<ReportID>, u32> &reportListCache) {
ReportListCache &reports_cache) {
assert(!r.empty());
vector<ReportID> my_reports(begin(r), end(r));
my_reports.push_back(MO_INVALID_IDX); // sentinel
auto cache_it = reportListCache.find(my_reports);
if (cache_it != end(reportListCache)) {
auto cache_it = reports_cache.find(my_reports);
if (cache_it != end(reports_cache)) {
u32 offset = cache_it->second;
DEBUG_PRINTF("reusing cached report list at %u\n", offset);
return offset;
@@ -1007,13 +1010,12 @@ u32 addReports(const flat_set<ReportID> &r, vector<ReportID> &reports,
u32 offset = verify_u32(reports.size());
insert(&reports, reports.end(), my_reports);
reportListCache.emplace(move(my_reports), offset);
reports_cache.emplace(move(my_reports), offset);
return offset;
}
static
void buildAcceptsList(const build_info &args,
unordered_map<vector<ReportID>, u32> &reports_cache,
void buildAcceptsList(const build_info &args, ReportListCache &reports_cache,
vector<NFAVertex> &verts, vector<NFAAccept> &accepts,
vector<ReportID> &reports, vector<NFAStateSet> &squash) {
if (verts.empty()) {
@@ -1051,8 +1053,7 @@ void buildAcceptsList(const build_info &args,
}
static
void buildAccepts(const build_info &args,
unordered_map<vector<ReportID>, u32> &reports_cache,
void buildAccepts(const build_info &args, ReportListCache &reports_cache,
NFAStateSet &acceptMask, NFAStateSet &acceptEodMask,
vector<NFAAccept> &accepts, vector<NFAAccept> &acceptsEod,
vector<ReportID> &reports, vector<NFAStateSet> &squash) {
@@ -1119,7 +1120,7 @@ u32 uncompressedStateSize(u32 num_states) {
static
u32 compressedStateSize(const NGHolder &h, const NFAStateSet &maskedStates,
const ue2::unordered_map<NFAVertex, u32> &state_ids) {
const unordered_map<NFAVertex, u32> &state_ids) {
// Shrink state requirement to enough to fit the compressed largest reach.
vector<u32> allreach(N_CHARS, 0);
@@ -1190,7 +1191,7 @@ bool hasSquashableInitDs(const build_info &args) {
static
bool hasInitDsStates(const NGHolder &h,
const ue2::unordered_map<NFAVertex, u32> &state_ids) {
const unordered_map<NFAVertex, u32> &state_ids) {
if (state_ids.at(h.startDs) != NO_STATE) {
return true;
}
@@ -1358,17 +1359,16 @@ struct ExceptionProto {
};
static
u32 buildExceptionMap(const build_info &args,
unordered_map<vector<ReportID>, u32> &reports_cache,
const ue2::unordered_set<NFAEdge> &exceptional,
u32 buildExceptionMap(const build_info &args, ReportListCache &reports_cache,
const unordered_set<NFAEdge> &exceptional,
map<ExceptionProto, vector<u32>> &exceptionMap,
vector<ReportID> &reportList) {
const NGHolder &h = args.h;
const u32 num_states = args.num_states;
u32 exceptionCount = 0;
ue2::unordered_map<NFAVertex, u32> pos_trigger;
ue2::unordered_map<NFAVertex, u32> tug_trigger;
unordered_map<NFAVertex, u32> pos_trigger;
unordered_map<NFAVertex, u32> tug_trigger;
for (u32 i = 0; i < args.repeats.size(); i++) {
const BoundedRepeatData &br = args.repeats[i];
@@ -1893,7 +1893,7 @@ struct Factory {
static
void findExceptionalTransitions(const build_info &args,
ue2::unordered_set<NFAEdge> &exceptional,
unordered_set<NFAEdge> &exceptional,
u32 maxShift) {
const NGHolder &h = args.h;
@@ -2168,9 +2168,9 @@ struct Factory {
// We track report lists that have already been written into the global
// list in case we can reuse them.
unordered_map<vector<ReportID>, u32> reports_cache;
ReportListCache reports_cache;
ue2::unordered_set<NFAEdge> exceptional;
unordered_set<NFAEdge> exceptional;
u32 shiftCount = findBestNumOfVarShifts(args);
assert(shiftCount);
u32 maxShift = findMaxVarShift(args, shiftCount);
@@ -2374,10 +2374,10 @@ MAKE_LIMEX_TRAITS(512)
// Some sanity tests, called by an assertion in generate().
static UNUSED
bool isSane(const NGHolder &h, const map<u32, set<NFAVertex>> &tops,
const ue2::unordered_map<NFAVertex, u32> &state_ids,
const unordered_map<NFAVertex, u32> &state_ids,
u32 num_states) {
ue2::unordered_set<u32> seen;
ue2::unordered_set<NFAVertex> top_starts;
unordered_set<u32> seen;
unordered_set<NFAVertex> top_starts;
for (const auto &vv : tops | map_values) {
insert(&top_starts, vv);
}
@@ -2424,7 +2424,7 @@ bool isSane(const NGHolder &h, const map<u32, set<NFAVertex>> &tops,
#endif // NDEBUG
static
u32 max_state(const ue2::unordered_map<NFAVertex, u32> &state_ids) {
u32 max_state(const unordered_map<NFAVertex, u32> &state_ids) {
u32 rv = 0;
for (const auto &m : state_ids) {
DEBUG_PRINTF("state %u\n", m.second);
@@ -2437,7 +2437,7 @@ u32 max_state(const ue2::unordered_map<NFAVertex, u32> &state_ids) {
}
bytecode_ptr<NFA> generate(NGHolder &h,
const ue2::unordered_map<NFAVertex, u32> &states,
const unordered_map<NFAVertex, u32> &states,
const vector<BoundedRepeatData> &repeats,
const map<NFAVertex, NFAStateSet> &reportSquashMap,
const map<NFAVertex, NFAStateSet> &squashMap,
@@ -2507,7 +2507,7 @@ bytecode_ptr<NFA> generate(NGHolder &h,
}
u32 countAccelStates(NGHolder &h,
const ue2::unordered_map<NFAVertex, u32> &states,
const unordered_map<NFAVertex, u32> &states,
const vector<BoundedRepeatData> &repeats,
const map<NFAVertex, NFAStateSet> &reportSquashMap,
const map<NFAVertex, NFAStateSet> &squashMap,

View File

@@ -34,15 +34,16 @@
#ifndef LIMEX_COMPILE_H
#define LIMEX_COMPILE_H
#include <map>
#include <memory>
#include <vector>
#include "nfagraph/ng_holder.h"
#include "nfagraph/ng_squash.h" // for NFAStateSet
#include "ue2common.h"
#include "util/bytecode_ptr.h"
#include "util/ue2_containers.h"
#include <set>
#include <map>
#include <memory>
#include <unordered_map>
#include <vector>
struct NFA;
@@ -69,7 +70,7 @@ struct CompileContext;
* graph.
*/
bytecode_ptr<NFA> generate(NGHolder &g,
const ue2::unordered_map<NFAVertex, u32> &states,
const std::unordered_map<NFAVertex, u32> &states,
const std::vector<BoundedRepeatData> &repeats,
const std::map<NFAVertex, NFAStateSet> &reportSquashMap,
const std::map<NFAVertex, NFAStateSet> &squashMap,
@@ -87,7 +88,7 @@ bytecode_ptr<NFA> generate(NGHolder &g,
* implementable.
*/
u32 countAccelStates(NGHolder &h,
const ue2::unordered_map<NFAVertex, u32> &states,
const std::unordered_map<NFAVertex, u32> &states,
const std::vector<BoundedRepeatData> &repeats,
const std::map<NFAVertex, NFAStateSet> &reportSquashMap,
const std::map<NFAVertex, NFAStateSet> &squashMap,

View File

@@ -46,7 +46,7 @@
#include "util/make_unique.h"
#include "util/order_check.h"
#include "util/report_manager.h"
#include "util/ue2_containers.h"
#include "util/flat_containers.h"
#include "util/unaligned.h"
#include "util/verify_types.h"

View File

@@ -33,7 +33,6 @@
#include "rdfa.h"
#include "ue2common.h"
#include "util/bytecode_ptr.h"
#include "util/ue2_containers.h"
#include <memory>
#include <vector>

View File

@@ -30,12 +30,11 @@
#include "rdfa.h"
#include "util/container.h"
#include "util/ue2_containers.h"
#include "util/hash.h"
#include "ue2common.h"
#include <deque>
#include <boost/functional/hash/hash.hpp>
#include <map>
using namespace std;
@@ -232,22 +231,18 @@ bool has_non_eod_accepts(const raw_dfa &rdfa) {
}
size_t hash_dfa_no_reports(const raw_dfa &rdfa) {
using boost::hash_combine;
using boost::hash_range;
size_t v = 0;
hash_combine(v, rdfa.alpha_size);
hash_combine(v, hash_range(begin(rdfa.alpha_remap), end(rdfa.alpha_remap)));
hash_combine(v, rdfa.alpha_remap);
for (const auto &ds : rdfa.states) {
hash_combine(v, hash_range(begin(ds.next), end(ds.next)));
hash_combine(v, ds.next);
}
return v;
}
size_t hash_dfa(const raw_dfa &rdfa) {
using boost::hash_combine;
size_t v = 0;
hash_combine(v, hash_dfa_no_reports(rdfa));
hash_combine(v, all_reports(rdfa));

View File

@@ -45,13 +45,14 @@
#include "util/compare.h"
#include "util/compile_context.h"
#include "util/container.h"
#include "util/flat_containers.h"
#include "util/graph.h"
#include "util/graph_range.h"
#include "util/make_unique.h"
#include "util/order_check.h"
#include "util/report_manager.h"
#include "util/ue2_containers.h"
#include "util/unaligned.h"
#include "util/unordered.h"
#include "util/verify_types.h"
#include <algorithm>
@@ -383,6 +384,8 @@ CharReach get_edge_reach(dstate_id_t u, dstate_id_t v, const dfa_info &info) {
#define MAX_SHENG_STATES 16
#define MAX_SHENG_LEAKINESS 0.05
using LeakinessCache = ue2_unordered_map<pair<RdfaVertex, u32>, double>;
/**
* Returns the proportion of strings of length 'depth' which will leave the
* sheng region when starting at state 'u'.
@@ -390,8 +393,7 @@ CharReach get_edge_reach(dstate_id_t u, dstate_id_t v, const dfa_info &info) {
static
double leakiness(const RdfaGraph &g, dfa_info &info,
const flat_set<RdfaVertex> &sheng_states, RdfaVertex u,
u32 depth,
unordered_map<pair<RdfaVertex, u32>, double> &cache) {
u32 depth, LeakinessCache &cache) {
double rv = 0;
if (contains(cache, make_pair(u, depth))) {
return cache[make_pair(u, depth)];
@@ -426,7 +428,7 @@ double leakiness(const RdfaGraph &g, dfa_info &info,
static
double leakiness(const RdfaGraph &g, dfa_info &info,
const flat_set<RdfaVertex> &sheng_states, RdfaVertex u) {
unordered_map<pair<RdfaVertex, u32>, double> cache;
LeakinessCache cache;
double rv = leakiness(g, info, sheng_states, u, 8, cache);
return rv;
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -32,7 +32,7 @@
#include "nfa_kind.h"
#include "ue2common.h"
#include "util/ue2_containers.h"
#include "util/flat_containers.h"
#include <array>
#include <vector>

View File

@@ -36,9 +36,10 @@
#include "nfagraph/ng_mcclellan_internal.h"
#include "util/container.h"
#include "util/determinise.h"
#include "util/flat_containers.h"
#include "util/make_unique.h"
#include "util/report_manager.h"
#include "util/ue2_containers.h"
#include "util/unordered.h"
#include <algorithm>
#include <queue>
@@ -54,7 +55,7 @@ namespace {
class Automaton_Merge {
public:
using StateSet = vector<u16>;
using StateMap = unordered_map<StateSet, dstate_id_t>;
using StateMap = ue2_unordered_map<StateSet, dstate_id_t>;
Automaton_Merge(const raw_dfa *rdfa1, const raw_dfa *rdfa2,
const ReportManager *rm_in, const Grey &grey_in)

View File

@@ -33,7 +33,10 @@
#include "rdfa.h"
#include "util/bytecode_ptr.h"
#include "util/charreach.h"
#include "util/ue2_containers.h"
#include "util/flat_containers.h"
#include <memory>
#include <set>
struct NFA;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -33,7 +33,7 @@
#include "ue2common.h"
#include "util/charreach.h"
#include "util/container.h"
#include "util/ue2_containers.h"
#include "util/flat_containers.h"
#include <array>
#include <cassert>

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -35,7 +35,7 @@
#include "ue2common.h"
#include "util/charreach.h"
#include "util/ue2_containers.h"
#include "util/flat_containers.h"
#include <utility>

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -32,12 +32,15 @@
* truffle is always able to represent an entire character class, providing a
* backstop to other acceleration engines.
*/
#include "trufflecompile.h"
#include "ue2common.h"
#include "util/charreach.h"
#include "util/dump_mask.h"
#include "util/simd_types.h"
#include "util/dump_mask.h"
#include <cstring>
using namespace std;