introduce Sheng-McClellan hybrid

This commit is contained in:
Alex Coyte
2016-12-01 14:32:47 +11:00
committed by Matthew Barr
parent f626276271
commit e51b6d23b9
35 changed files with 3804 additions and 206 deletions

View File

@@ -471,4 +471,55 @@ u32 rank_in_mask64(u64a mask, u32 bit) {
return popcount64(mask);
}
#if defined(__BMI2__) || (defined(_WIN32) && defined(__AVX2__))
#define HAVE_PEXT
#endif
static really_inline
u32 pext32(u32 x, u32 mask) {
#if defined(HAVE_PEXT)
// Intel BMI2 can do this operation in one instruction.
return _pext_u32(x, mask);
#else
u32 result = 0, num = 1;
while (mask != 0) {
u32 bit = findAndClearLSB_32(&mask);
if (x & (1U << bit)) {
assert(num != 0); // more than 32 bits!
result |= num;
}
num <<= 1;
}
return result;
#endif
}
static really_inline
u64a pext64(u64a x, u64a mask) {
#if defined(HAVE_PEXT) && defined(ARCH_64_BIT)
// Intel BMI2 can do this operation in one instruction.
return _pext_u64(x, mask);
#else
u32 result = 0, num = 1;
while (mask != 0) {
u32 bit = findAndClearLSB_64(&mask);
if (x & (1ULL << bit)) {
assert(num != 0); // more than 32 bits!
result |= num;
}
num <<= 1;
}
return result;
#endif
}
#if defined(HAVE_PEXT) && defined(ARCH_64_BIT)
static really_inline
u64a pdep64(u64a x, u64a mask) {
return _pdep_u64(x, mask);
}
#endif
#endif // BITUTILS_H

View File

@@ -39,8 +39,12 @@
#include "util/ue2_containers.h"
#include <boost/graph/depth_first_search.hpp>
#include <boost/graph/strong_components.hpp>
#include <boost/range/adaptor/map.hpp>
#include <algorithm>
#include <map>
#include <set>
#include <utility>
#include <vector>
@@ -140,6 +144,41 @@ void find_unreachable(const Graph &g, const SourceCont &sources, OutCont *out) {
}
}
template <class Graph>
ue2::flat_set<typename Graph::vertex_descriptor>
find_vertices_in_cycles(const Graph &g) {
using vertex_descriptor = typename Graph::vertex_descriptor;
std::map<vertex_descriptor, size_t> comp_map;
boost::strong_components(g, boost::make_assoc_property_map(comp_map));
std::map<size_t, std::vector<vertex_descriptor>> comps;
for (const auto &e : comp_map) {
comps[e.second].push_back(e.first);
}
ue2::flat_set<vertex_descriptor> rv;
for (const auto &comp : comps | boost::adaptors::map_values) {
/* every vertex in a strongly connected component is reachable from
* every other vertex in the component. A vertex is involved in a cycle
* therefore if it is in a strongly connected component with more than
* one vertex or if it is the only vertex and it has a self loop. */
assert(!comp.empty());
if (comp.size() > 1) {
insert(&rv, comp);
}
vertex_descriptor v = *comp.begin();
if (hasSelfLoop(v, g)) {
rv.insert(v);
}
}
return rv;
}
template <class Graph>
bool has_parallel_edge(const Graph &g) {
using vertex_descriptor = typename Graph::vertex_descriptor;

View File

@@ -159,6 +159,10 @@ static really_inline m128 set16x8(u8 c) {
return _mm_set1_epi8(c);
}
static really_inline m128 set4x32(u32 c) {
return _mm_set1_epi32(c);
}
static really_inline u32 movd(const m128 in) {
return _mm_cvtsi128_si32(in);
}
@@ -328,6 +332,25 @@ m128 variable_byte_shift_m128(m128 in, s32 amount) {
return pshufb(in, shift_mask);
}
static really_inline
m128 max_u8_m128(m128 a, m128 b) {
return _mm_max_epu8(a, b);
}
static really_inline
m128 min_u8_m128(m128 a, m128 b) {
return _mm_min_epu8(a, b);
}
static really_inline
m128 sadd_u8_m128(m128 a, m128 b) {
return _mm_adds_epu8(a, b);
}
static really_inline
m128 sub_u8_m128(m128 a, m128 b) {
return _mm_sub_epi8(a, b);
}
/****
**** 256-bit Primitives