mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-09-30 11:44:28 +03:00
introduce Sheng-McClellan hybrid
This commit is contained in:
@@ -471,4 +471,55 @@ u32 rank_in_mask64(u64a mask, u32 bit) {
|
||||
return popcount64(mask);
|
||||
}
|
||||
|
||||
#if defined(__BMI2__) || (defined(_WIN32) && defined(__AVX2__))
|
||||
#define HAVE_PEXT
|
||||
#endif
|
||||
|
||||
static really_inline
|
||||
u32 pext32(u32 x, u32 mask) {
|
||||
#if defined(HAVE_PEXT)
|
||||
// Intel BMI2 can do this operation in one instruction.
|
||||
return _pext_u32(x, mask);
|
||||
#else
|
||||
|
||||
u32 result = 0, num = 1;
|
||||
while (mask != 0) {
|
||||
u32 bit = findAndClearLSB_32(&mask);
|
||||
if (x & (1U << bit)) {
|
||||
assert(num != 0); // more than 32 bits!
|
||||
result |= num;
|
||||
}
|
||||
num <<= 1;
|
||||
}
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u64a pext64(u64a x, u64a mask) {
|
||||
#if defined(HAVE_PEXT) && defined(ARCH_64_BIT)
|
||||
// Intel BMI2 can do this operation in one instruction.
|
||||
return _pext_u64(x, mask);
|
||||
#else
|
||||
|
||||
u32 result = 0, num = 1;
|
||||
while (mask != 0) {
|
||||
u32 bit = findAndClearLSB_64(&mask);
|
||||
if (x & (1ULL << bit)) {
|
||||
assert(num != 0); // more than 32 bits!
|
||||
result |= num;
|
||||
}
|
||||
num <<= 1;
|
||||
}
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(HAVE_PEXT) && defined(ARCH_64_BIT)
|
||||
static really_inline
|
||||
u64a pdep64(u64a x, u64a mask) {
|
||||
return _pdep_u64(x, mask);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // BITUTILS_H
|
||||
|
@@ -39,8 +39,12 @@
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
#include <boost/graph/depth_first_search.hpp>
|
||||
#include <boost/graph/strong_components.hpp>
|
||||
#include <boost/range/adaptor/map.hpp>
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
@@ -140,6 +144,41 @@ void find_unreachable(const Graph &g, const SourceCont &sources, OutCont *out) {
|
||||
}
|
||||
}
|
||||
|
||||
template <class Graph>
|
||||
ue2::flat_set<typename Graph::vertex_descriptor>
|
||||
find_vertices_in_cycles(const Graph &g) {
|
||||
using vertex_descriptor = typename Graph::vertex_descriptor;
|
||||
|
||||
std::map<vertex_descriptor, size_t> comp_map;
|
||||
|
||||
boost::strong_components(g, boost::make_assoc_property_map(comp_map));
|
||||
|
||||
std::map<size_t, std::vector<vertex_descriptor>> comps;
|
||||
|
||||
for (const auto &e : comp_map) {
|
||||
comps[e.second].push_back(e.first);
|
||||
}
|
||||
|
||||
ue2::flat_set<vertex_descriptor> rv;
|
||||
|
||||
for (const auto &comp : comps | boost::adaptors::map_values) {
|
||||
/* every vertex in a strongly connected component is reachable from
|
||||
* every other vertex in the component. A vertex is involved in a cycle
|
||||
* therefore if it is in a strongly connected component with more than
|
||||
* one vertex or if it is the only vertex and it has a self loop. */
|
||||
assert(!comp.empty());
|
||||
if (comp.size() > 1) {
|
||||
insert(&rv, comp);
|
||||
}
|
||||
vertex_descriptor v = *comp.begin();
|
||||
if (hasSelfLoop(v, g)) {
|
||||
rv.insert(v);
|
||||
}
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
template <class Graph>
|
||||
bool has_parallel_edge(const Graph &g) {
|
||||
using vertex_descriptor = typename Graph::vertex_descriptor;
|
||||
|
@@ -159,6 +159,10 @@ static really_inline m128 set16x8(u8 c) {
|
||||
return _mm_set1_epi8(c);
|
||||
}
|
||||
|
||||
static really_inline m128 set4x32(u32 c) {
|
||||
return _mm_set1_epi32(c);
|
||||
}
|
||||
|
||||
static really_inline u32 movd(const m128 in) {
|
||||
return _mm_cvtsi128_si32(in);
|
||||
}
|
||||
@@ -328,6 +332,25 @@ m128 variable_byte_shift_m128(m128 in, s32 amount) {
|
||||
return pshufb(in, shift_mask);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
m128 max_u8_m128(m128 a, m128 b) {
|
||||
return _mm_max_epu8(a, b);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
m128 min_u8_m128(m128 a, m128 b) {
|
||||
return _mm_min_epu8(a, b);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
m128 sadd_u8_m128(m128 a, m128 b) {
|
||||
return _mm_adds_epu8(a, b);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
m128 sub_u8_m128(m128 a, m128 b) {
|
||||
return _mm_sub_epi8(a, b);
|
||||
}
|
||||
|
||||
/****
|
||||
**** 256-bit Primitives
|
||||
|
Reference in New Issue
Block a user