util: switch from Boost to std::unordered set/map

This commit replaces the ue2::unordered_{set,map} types with their STL
versions, with some new hashing utilities in util/hash.h. The new types
ue2_unordered_set<T> and ue2_unordered_map<Key, T> default to using the
ue2_hasher.

The header util/ue2_containers.h has been removed, and the flat_set/map
containers moved to util/flat_containers.h.
This commit is contained in:
Justin Viiret
2017-07-14 14:59:52 +10:00
committed by Matthew Barr
parent a425bb9b7c
commit 9cf66b6ac9
123 changed files with 1048 additions and 772 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2016, Intel Corporation
* Copyright (c) 2016-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -30,7 +30,7 @@
#define ACCEL_SCHEME_H
#include "util/charreach.h"
#include "util/ue2_containers.h"
#include "util/flat_containers.h"
#include <utility>
@@ -39,7 +39,7 @@ namespace ue2 {
#define MAX_ACCEL_DEPTH 4
struct AccelScheme {
flat_set<std::pair<u8, u8> > double_byte;
flat_set<std::pair<u8, u8>> double_byte;
CharReach cr = CharReach::dot();
CharReach double_cr;
u32 offset = MAX_ACCEL_DEPTH + 1;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -36,12 +36,12 @@
#include "ue2common.h"
#include "popcount.h"
#include "util/bitutils.h"
#include "util/hash.h"
#include <array>
#include <cassert>
#include <boost/dynamic_bitset.hpp>
#include <boost/functional/hash/hash.hpp>
namespace ue2 {
@@ -373,7 +373,7 @@ public:
/// Simple hash.
size_t hash() const {
return boost::hash_range(std::begin(bits), std::end(bits));
return ue2_hasher()(bits);
}
/// Sentinel value meaning "no more bits", used by find_first and
@@ -420,12 +420,17 @@ private:
std::array<block_type, num_blocks> bits;
};
/** \brief Boost-style hash free function. */
template<size_t requested_size>
size_t hash_value(const bitfield<requested_size> &b) {
return b.hash();
}
} // namespace ue2
namespace std {
template<size_t requested_size>
struct hash<ue2::bitfield<requested_size>> {
size_t operator()(const ue2::bitfield<requested_size> &b) const {
return b.hash();
}
};
} // namespace std
#endif // BITFIELD_H

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -196,12 +196,17 @@ bool isSubsetOf(const CharReach &small, const CharReach &big);
bool isutf8ascii(const CharReach &cr);
bool isutf8start(const CharReach &cr);
/** \brief Boost-style hash free function. */
static really_inline
size_t hash_value(const CharReach &cr) {
return cr.hash();
}
} // namespace ue2
namespace std {
template<>
struct hash<ue2::CharReach> {
size_t operator()(const ue2::CharReach &cr) const {
return cr.hash();
}
};
} // namespace std
#endif // NG_CHARREACH_H

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2016, Intel Corporation
* Copyright (c) 2016-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -34,7 +34,6 @@
#include "container.h"
#include "graph_range.h"
#include "make_unique.h"
#include "ue2_containers.h"
#include <map>
#include <set>

View File

@@ -221,8 +221,8 @@ public:
std::string str() const;
#endif
friend size_t hash_value(const depth &d) {
return d.val;
size_t hash() const {
return val;
}
private:
@@ -260,10 +260,6 @@ struct DepthMinMax : totally_ordered<DepthMinMax> {
};
inline size_t hash_value(const DepthMinMax &d) {
return hash_all(d.min, d.max);
}
/**
* \brief Merge two DepthMinMax values together to produce their union.
*/
@@ -271,4 +267,22 @@ DepthMinMax unionDepthMinMax(const DepthMinMax &a, const DepthMinMax &b);
} // namespace ue2
namespace std {
template<>
struct hash<ue2::depth> {
size_t operator()(const ue2::depth &d) const {
return d.hash();
}
};
template<>
struct hash<ue2::DepthMinMax> {
size_t operator()(const ue2::DepthMinMax &d) const {
return hash_all(d.min, d.max);
}
};
} // namespace
#endif // DEPTH_H

View File

@@ -26,10 +26,11 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef UTIL_UE2_CONTAINERS_H_
#define UTIL_UE2_CONTAINERS_H_
#ifndef UTIL_FLAT_CONTAINERS_H
#define UTIL_FLAT_CONTAINERS_H
#include "ue2common.h"
#include "util/hash.h"
#include "util/operators.h"
#include "util/small_vector.h"
@@ -38,19 +39,10 @@
#include <type_traits>
#include <utility>
#include <boost/functional/hash/hash_fwd.hpp>
#include <boost/iterator/iterator_facade.hpp>
#include <boost/unordered/unordered_map.hpp>
#include <boost/unordered/unordered_set.hpp>
namespace ue2 {
/** \brief Unordered set container implemented internally as a hash table. */
using boost::unordered_set;
/** \brief Unordered map container implemented internally as a hash table. */
using boost::unordered_map;
namespace flat_detail {
// Iterator facade that wraps an underlying iterator, so that we get our
@@ -363,11 +355,6 @@ public:
friend void swap(flat_set &a, flat_set &b) {
a.swap(b);
}
// Free hash function.
friend size_t hash_value(const flat_set &a) {
return boost::hash_range(a.begin(), a.end());
}
};
/**
@@ -652,13 +639,26 @@ public:
friend void swap(flat_map &a, flat_map &b) {
a.swap(b);
}
};
// Free hash function.
friend size_t hash_value(const flat_map &a) {
return boost::hash_range(a.begin(), a.end());
} // namespace ue2
namespace std {
template<typename T, typename Compare, typename Allocator>
struct hash<ue2::flat_set<T, Compare, Allocator>> {
size_t operator()(const ue2::flat_set<T, Compare, Allocator> &f) {
return ue2::ue2_hasher()(f);
}
};
} // namespace
template<typename Key, typename T, typename Compare, typename Allocator>
struct hash<ue2::flat_map<Key, T, Compare, Allocator>> {
size_t operator()(const ue2::flat_map<Key, T, Compare, Allocator> &f) {
return ue2::ue2_hasher()(f);
}
};
#endif // UTIL_UE2_CONTAINERS_H_
} // namespace std
#endif // UTIL_FLAT_CONTAINERS_H

View File

@@ -35,8 +35,9 @@
#include "container.h"
#include "ue2common.h"
#include "util/flat_containers.h"
#include "util/graph_range.h"
#include "util/ue2_containers.h"
#include "util/unordered.h"
#include <boost/graph/depth_first_search.hpp>
#include <boost/graph/strong_components.hpp>
@@ -115,7 +116,7 @@ bool has_proper_successor(const typename Graph::vertex_descriptor &v,
template<class Graph, class SourceCont, class OutCont>
void find_reachable(const Graph &g, const SourceCont &sources, OutCont *out) {
using vertex_descriptor = typename Graph::vertex_descriptor;
ue2::unordered_map<vertex_descriptor, boost::default_color_type> colours;
std::unordered_map<vertex_descriptor, boost::default_color_type> colours;
for (auto v : sources) {
boost::depth_first_visit(g, v,
@@ -133,7 +134,7 @@ void find_reachable(const Graph &g, const SourceCont &sources, OutCont *out) {
template<class Graph, class SourceCont, class OutCont>
void find_unreachable(const Graph &g, const SourceCont &sources, OutCont *out) {
using vertex_descriptor = typename Graph::vertex_descriptor;
ue2::unordered_set<vertex_descriptor> reachable;
std::unordered_set<vertex_descriptor> reachable;
find_reachable(g, sources, &reachable);
@@ -182,7 +183,8 @@ find_vertices_in_cycles(const Graph &g) {
template <class Graph>
bool has_parallel_edge(const Graph &g) {
using vertex_descriptor = typename Graph::vertex_descriptor;
ue2::unordered_set<std::pair<vertex_descriptor, vertex_descriptor>> seen;
ue2_unordered_set<std::pair<vertex_descriptor, vertex_descriptor>> seen;
for (const auto &e : edges_range(g)) {
auto u = source(e, g);
auto v = target(e, g);

View File

@@ -34,16 +34,126 @@
#ifndef UTIL_HASH_H
#define UTIL_HASH_H
#include <iterator>
#include <boost/functional/hash/hash_fwd.hpp>
#include <functional>
#include <type_traits>
#include <utility>
namespace ue2 {
namespace hash_detail {
inline
void hash_combine_impl(size_t &seed, size_t value) {
// Note: constants explicitly truncated on 32-bit platforms.
const size_t a = (size_t)0x0b4e0ef37bc32127ULL;
const size_t b = (size_t)0x318f07b0c8eb9be9ULL;
seed ^= value * a;
seed += b;
}
/** \brief Helper that determines whether std::begin() exists for T. */
template<typename T>
struct is_container_check {
private:
template<typename C>
static auto has_begin_function(const C &obj) -> decltype(std::begin(obj)) {
return std::begin(obj);
}
static void has_begin_function(...) {
return;
}
using has_begin_type = decltype(has_begin_function(std::declval<T>()));
public:
static const bool value = !std::is_void<has_begin_type>::value;
};
/** \brief Type trait to enable on whether T is a container. */
template<typename T>
struct is_container
: public ::std::integral_constant<bool, is_container_check<T>::value> {};
/** \brief Helper that determines whether T::hash() exists. */
template<typename T>
struct has_hash_member_check {
private:
template<typename C>
static auto has_hash_member_function(const C &obj) -> decltype(obj.hash()) {
return obj.hash();
}
static void has_hash_member_function(...) {
return;
}
using has_hash = decltype(has_hash_member_function(std::declval<T>()));
public:
static const bool value = !std::is_void<has_hash>::value;
};
/** \brief Type trait to enable on whether T::hash() exists. */
template<typename T>
struct has_hash_member
: public ::std::integral_constant<bool, has_hash_member_check<T>::value> {};
/** \brief Default hash: falls back to std::hash. */
template<typename T, typename Enable = void>
struct ue2_hash {
using decayed_type = typename std::decay<T>::type;
size_t operator()(const T &obj) const {
return std::hash<decayed_type>()(obj);
}
};
/** \brief Hash for std::pair. */
template<typename A, typename B>
struct ue2_hash<std::pair<A, B>, void> {
size_t operator()(const std::pair<A, B> &p) const {
size_t v = 0;
hash_combine_impl(v, ue2_hash<A>()(p.first));
hash_combine_impl(v, ue2_hash<B>()(p.second));
return v;
}
};
/** \brief Hash for any type that has a hash() member function. */
template<typename T>
struct ue2_hash<T, typename std::enable_if<has_hash_member<T>::value>::type> {
size_t operator()(const T &obj) const {
return obj.hash();
}
};
/** \brief Hash for any container type that supports std::begin(). */
template<typename T>
struct ue2_hash<T, typename std::enable_if<is_container<T>::value &&
!has_hash_member<T>::value>::type> {
size_t operator()(const T &obj) const {
size_t v = 0;
for (const auto &elem : obj) {
using element_type = typename std::decay<decltype(elem)>::type;
hash_combine_impl(v, ue2_hash<element_type>()(elem));
}
return v;
}
};
/** \brief Hash for enum types. */
template<typename T>
struct ue2_hash<T, typename std::enable_if<std::is_enum<T>::value>::type> {
size_t operator()(const T &obj) const {
using utype = typename std::underlying_type<T>::type;
return ue2_hash<utype>()(static_cast<utype>(obj));
}
};
template<typename T>
void hash_combine(size_t &seed, const T &obj) {
hash_combine_impl(seed, ue2_hash<T>()(obj));
}
template<typename T>
void hash_build(size_t &v, const T &obj) {
boost::hash_combine(v, obj);
hash_combine(v, obj);
}
template<typename T, typename... Args>
@@ -54,6 +164,21 @@ void hash_build(size_t &v, const T &obj, Args&&... args) {
} // namespace hash_detail
using hash_detail::hash_combine;
/**
* \brief Hasher for general use.
*
* Provides operators for most standard containers and falls back to
* std::hash<T>.
*/
struct ue2_hasher {
template<typename T>
size_t operator()(const T &obj) const {
return hash_detail::ue2_hash<T>()(obj);
}
};
/**
* \brief Computes the combined hash of all its arguments.
*
@@ -70,15 +195,6 @@ size_t hash_all(Args&&... args) {
return v;
}
/**
* \brief Compute the hash of all the elements of any range on which we can
* call std::begin() and std::end().
*/
template<typename Range>
size_t hash_range(const Range &r) {
return boost::hash_range(std::begin(r), std::end(r));
}
} // namespace ue2
#endif // UTIL_HASH_H

View File

@@ -34,8 +34,9 @@
#ifndef UTIL_HASH_DYNAMIC_BITSET_H
#define UTIL_HASH_DYNAMIC_BITSET_H
#include "hash.h"
#include <boost/dynamic_bitset.hpp>
#include <boost/functional/hash/hash.hpp>
#include <iterator>
@@ -68,7 +69,7 @@ struct hash_output_it {
template<typename T>
void operator=(const T &val) const {
boost::hash_combine(*out, val);
hash_combine(*out, val);
}
private:

View File

@@ -43,10 +43,16 @@ bool operator==(const mmbit_sparse_iter &a, const mmbit_sparse_iter &b) {
return a.mask == b.mask && a.val == b.val;
}
inline
size_t hash_value(const mmbit_sparse_iter &iter) {
return ue2::hash_all(iter.mask, iter.val);
}
namespace std {
template<>
struct hash<mmbit_sparse_iter> {
size_t operator()(const mmbit_sparse_iter &iter) const {
return ue2::hash_all(iter.mask, iter.val);
}
};
} // namespace std
namespace ue2 {

View File

@@ -31,7 +31,7 @@
#include "container.h"
#include "noncopyable.h"
#include "ue2_containers.h"
#include "flat_containers.h"
#include "ue2common.h"
#include <algorithm>

View File

@@ -206,13 +206,6 @@ bool operator==(const Report &a, const Report &b) {
a.topSquashDistance == b.topSquashDistance;
}
inline
size_t hash_value(const Report &r) {
return hash_all(r.type, r.quashSom, r.minOffset, r.maxOffset, r.minLength,
r.ekey, r.offsetAdjust, r.onmatch, r.revNfaIndex,
r.somDistance, r.topSquashDistance);
}
static inline
Report makeECallback(u32 report, s32 offsetAdjust, u32 ekey) {
Report ir(EXTERNAL_CALLBACK, report);
@@ -262,6 +255,19 @@ bool isSimpleExhaustible(const Report &ir) {
return true;
}
} // namespace
} // namespace ue2
namespace std {
template<>
struct hash<ue2::Report> {
std::size_t operator()(const ue2::Report &r) const {
return ue2::hash_all(r.type, r.quashSom, r.minOffset, r.maxOffset,
r.minLength, r.ekey, r.offsetAdjust, r.onmatch,
r.revNfaIndex, r.somDistance, r.topSquashDistance);
}
};
} // namespace std
#endif // UTIL_REPORT_H

View File

@@ -38,10 +38,10 @@
#include "util/compile_error.h"
#include "util/noncopyable.h"
#include "util/report.h"
#include "util/ue2_containers.h"
#include <map>
#include <set>
#include <unordered_map>
#include <vector>
namespace ue2 {
@@ -131,17 +131,17 @@ private:
/** \brief Mapping from Report to ID (inverse of \ref reportIds
* vector). */
unordered_map<Report, size_t> reportIdToInternalMap;
std::unordered_map<Report, size_t> reportIdToInternalMap;
/** \brief Mapping from ReportID to dedupe key. */
unordered_map<ReportID, u32> reportIdToDedupeKey;
std::unordered_map<ReportID, u32> reportIdToDedupeKey;
/** \brief Mapping from ReportID to Rose program offset in bytecode. */
unordered_map<ReportID, u32> reportIdToProgramOffset;
std::unordered_map<ReportID, u32> reportIdToProgramOffset;
/** \brief Mapping from external match ids to information about that
* id. */
unordered_map<ReportID, external_report_info> externalIdMap;
std::unordered_map<ReportID, external_report_info> externalIdMap;
/** \brief Mapping from expression index to exhaustion key. */
std::map<s64a, u32> toExhaustibleKeyMap;

View File

@@ -34,7 +34,6 @@
#include "util/noncopyable.h"
#include "util/operators.h"
#include <boost/functional/hash.hpp>
#include <boost/graph/properties.hpp> /* vertex_index_t, ... */
#include <boost/pending/property.hpp> /* no_property */
#include <boost/property_map/property_map.hpp>
@@ -42,7 +41,9 @@
#include <boost/iterator/iterator_adaptor.hpp>
#include <boost/iterator/iterator_facade.hpp>
#include <functional> /* hash */
#include <tuple> /* tie */
#include <type_traits> /* is_same, etc */
#include <utility> /* pair, declval */
/*
@@ -187,9 +188,8 @@ public:
}
bool operator==(const vertex_descriptor b) const { return p == b.p; }
friend size_t hash_value(vertex_descriptor v) {
using boost::hash_value;
return hash_value(v.serial);
size_t hash() const {
return std::hash<u64a>()(serial);
}
private:
@@ -227,9 +227,8 @@ public:
}
bool operator==(const edge_descriptor b) const { return p == b.p; }
friend size_t hash_value(edge_descriptor e) {
using boost::hash_value;
return hash_value(e.serial);
size_t hash() const {
return std::hash<u64a>()(serial);
}
private:
@@ -1288,7 +1287,7 @@ edge_index_upper_bound(const Graph &g) {
using boost::vertex_index;
using boost::edge_index;
}
} // namespace ue2
namespace boost {
@@ -1305,5 +1304,29 @@ struct property_map<Graph, Prop,
std::declval<const Graph &>())) const_type;
};
}
} // namespace boost
namespace std {
/* Specialization of std::hash so that vertex_descriptor can be used in
* unordered containers. */
template<typename Graph>
struct hash<ue2::graph_detail::vertex_descriptor<Graph>> {
using vertex_descriptor = ue2::graph_detail::vertex_descriptor<Graph>;
std::size_t operator()(const vertex_descriptor &v) const {
return v.hash();
}
};
/* Specialization of std::hash so that edge_descriptor can be used in
* unordered containers. */
template<typename Graph>
struct hash<ue2::graph_detail::edge_descriptor<Graph>> {
using edge_descriptor = ue2::graph_detail::edge_descriptor<Graph>;
std::size_t operator()(const edge_descriptor &e) const {
return e.hash();
}
};
} // namespace std
#endif

View File

@@ -29,11 +29,14 @@
/** \file
* \brief Tools for string manipulation, ue2_literal definition.
*/
#include "charreach.h"
#include "compare.h"
#include "ue2string.h"
#include "charreach.h"
#include "compare.h"
#include <algorithm>
#include <cstring>
#include <iomanip>
#include <sstream>
#include <string>

View File

@@ -208,14 +208,6 @@ private:
std::vector<bool> nocase; /* for trolling value */
};
inline
size_t hash_value(const ue2_literal::elem &elem) {
return hash_all(elem.c, elem.nocase);
}
inline
size_t hash_value(const ue2_literal &lit) { return hash_range(lit); }
/// Return a reversed copy of this literal.
ue2_literal reverse_literal(const ue2_literal &in);
@@ -314,4 +306,22 @@ std::string escapeString(const ue2_literal &lit);
} // namespace ue2
namespace std {
template<>
struct hash<ue2::ue2_literal::elem> {
size_t operator()(const ue2::ue2_literal::elem &elem) const {
return ue2::hash_all(elem.c, elem.nocase);
}
};
template<>
struct hash<ue2::ue2_literal> {
size_t operator()(const ue2::ue2_literal &lit) const {
return ue2::ue2_hasher()(lit);
}
};
} // namespace std
#endif

53
src/util/unordered.h Normal file
View File

@@ -0,0 +1,53 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef UTIL_UNORDERED_H
#define UTIL_UNORDERED_H
/**
* \file
* \brief Unordered set and map containers that default to using our own hasher.
*/
#include "hash.h"
#include <unordered_set>
#include <unordered_map>
namespace ue2 {
template<class Key, class Hash = ue2_hasher>
using ue2_unordered_set = std::unordered_set<Key, Hash>;
template<class Key, class T, class Hash = ue2_hasher>
using ue2_unordered_map = std::unordered_map<Key, T, Hash>;
} // namespace ue2
#endif // UTIL_UNORDERED_H