mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
simd_utils: setbit/clearbit by loading 1-bit mask
This commit is contained in:
parent
790683b641
commit
49bb3b5c82
@ -26,6 +26,10 @@
|
|||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/** \file
|
||||||
|
* \brief Lookup tables to support SIMD operations.
|
||||||
|
*/
|
||||||
|
|
||||||
#include "simd_utils.h"
|
#include "simd_utils.h"
|
||||||
|
|
||||||
const char vbs_mask_data[] ALIGN_CL_DIRECTIVE = {
|
const char vbs_mask_data[] ALIGN_CL_DIRECTIVE = {
|
||||||
@ -38,3 +42,19 @@ const char vbs_mask_data[] ALIGN_CL_DIRECTIVE = {
|
|||||||
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
|
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
|
||||||
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
|
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define ZEROES_8 0, 0, 0, 0, 0, 0, 0, 0
|
||||||
|
#define ZEROES_31 ZEROES_8, ZEROES_8, ZEROES_8, 0, 0, 0, 0, 0, 0, 0
|
||||||
|
#define ZEROES_32 ZEROES_8, ZEROES_8, ZEROES_8, ZEROES_8
|
||||||
|
|
||||||
|
/** \brief LUT for the mask1bit functions. */
|
||||||
|
const u8 simd_onebit_masks[] ALIGN_CL_DIRECTIVE = {
|
||||||
|
ZEROES_31, 0x01, ZEROES_32,
|
||||||
|
ZEROES_31, 0x02, ZEROES_32,
|
||||||
|
ZEROES_31, 0x04, ZEROES_32,
|
||||||
|
ZEROES_31, 0x08, ZEROES_32,
|
||||||
|
ZEROES_31, 0x10, ZEROES_32,
|
||||||
|
ZEROES_31, 0x20, ZEROES_32,
|
||||||
|
ZEROES_31, 0x40, ZEROES_32,
|
||||||
|
ZEROES_31, 0x80, ZEROES_32,
|
||||||
|
};
|
||||||
|
@ -245,47 +245,37 @@ m128 loadbytes128(const void *ptr, unsigned int n) {
|
|||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
extern const u8 simd_onebit_masks[];
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
m128 mask1bit128(unsigned int n) {
|
||||||
|
assert(n < sizeof(m128) * 8);
|
||||||
|
u32 mask_idx = ((n % 8) * 64) + 31;
|
||||||
|
mask_idx -= n / 8;
|
||||||
|
return loadu128(&simd_onebit_masks[mask_idx]);
|
||||||
|
}
|
||||||
|
|
||||||
// switches on bit N in the given vector.
|
// switches on bit N in the given vector.
|
||||||
static really_inline
|
static really_inline
|
||||||
void setbit128(m128 *ptr, unsigned int n) {
|
void setbit128(m128 *ptr, unsigned int n) {
|
||||||
assert(n < sizeof(*ptr) * 8);
|
*ptr = or128(mask1bit128(n), *ptr);
|
||||||
// We should be able to figure out a better way than this.
|
|
||||||
union {
|
|
||||||
m128 simd;
|
|
||||||
u8 bytes[sizeof(m128)];
|
|
||||||
} x;
|
|
||||||
x.simd = *ptr;
|
|
||||||
|
|
||||||
u8 *b = &x.bytes[n / 8];
|
|
||||||
*b |= 1U << (n % 8);
|
|
||||||
|
|
||||||
*ptr = x.simd;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// switches off bit N in the given vector.
|
// switches off bit N in the given vector.
|
||||||
static really_inline
|
static really_inline
|
||||||
void clearbit128(m128 *ptr, unsigned int n) {
|
void clearbit128(m128 *ptr, unsigned int n) {
|
||||||
assert(n < sizeof(*ptr) * 8);
|
*ptr = andnot128(mask1bit128(n), *ptr);
|
||||||
// We should be able to figure out a better way than this.
|
|
||||||
union {
|
|
||||||
m128 simd;
|
|
||||||
u8 bytes[sizeof(m128)];
|
|
||||||
} x;
|
|
||||||
x.simd = *ptr;
|
|
||||||
|
|
||||||
u8 *b = &x.bytes[n / 8];
|
|
||||||
*b &= ~(1U << (n % 8));
|
|
||||||
|
|
||||||
*ptr = x.simd;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// tests bit N in the given vector.
|
// tests bit N in the given vector.
|
||||||
static really_inline
|
static really_inline
|
||||||
char testbit128(const m128 *ptr, unsigned int n) {
|
char testbit128(const m128 *ptr, unsigned int n) {
|
||||||
assert(n < sizeof(*ptr) * 8);
|
const m128 mask = mask1bit128(n);
|
||||||
// We should be able to figure out a better way than this.
|
#if defined(__SSE4_1__)
|
||||||
const char *bytes = (const char *)ptr;
|
return !_mm_testz_si128(mask, *ptr);
|
||||||
return !!(bytes[n / 8] & (1 << (n % 8)));
|
#else
|
||||||
|
return isnonzero128(and128(mask, *ptr));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// offset must be an immediate
|
// offset must be an immediate
|
||||||
@ -551,6 +541,14 @@ m256 loadbytes256(const void *ptr, unsigned int n) {
|
|||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
m256 mask1bit256(unsigned int n) {
|
||||||
|
assert(n < sizeof(m256) * 8);
|
||||||
|
u32 mask_idx = ((n % 8) * 64) + 31;
|
||||||
|
mask_idx -= n / 8;
|
||||||
|
return loadu256(&simd_onebit_masks[mask_idx]);
|
||||||
|
}
|
||||||
|
|
||||||
#if !defined(__AVX2__)
|
#if !defined(__AVX2__)
|
||||||
// switches on bit N in the given vector.
|
// switches on bit N in the given vector.
|
||||||
static really_inline
|
static really_inline
|
||||||
@ -599,42 +597,19 @@ char testbit256(const m256 *ptr, unsigned int n) {
|
|||||||
// switches on bit N in the given vector.
|
// switches on bit N in the given vector.
|
||||||
static really_inline
|
static really_inline
|
||||||
void setbit256(m256 *ptr, unsigned int n) {
|
void setbit256(m256 *ptr, unsigned int n) {
|
||||||
assert(n < sizeof(*ptr) * 8);
|
*ptr = or256(mask1bit256(n), *ptr);
|
||||||
// We should be able to figure out a better way than this.
|
|
||||||
union {
|
|
||||||
m256 simd;
|
|
||||||
u8 bytes[sizeof(m256)];
|
|
||||||
} x;
|
|
||||||
x.simd = *ptr;
|
|
||||||
|
|
||||||
u8 *b = &x.bytes[n / 8];
|
|
||||||
*b |= 1U << (n % 8);
|
|
||||||
|
|
||||||
*ptr = x.simd;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: can we do this better in avx-land?
|
|
||||||
static really_inline
|
static really_inline
|
||||||
void clearbit256(m256 *ptr, unsigned int n) {
|
void clearbit256(m256 *ptr, unsigned int n) {
|
||||||
assert(n < sizeof(*ptr) * 8);
|
*ptr = andnot256(mask1bit256(n), *ptr);
|
||||||
union {
|
|
||||||
m256 simd;
|
|
||||||
u8 bytes[sizeof(m256)];
|
|
||||||
} x;
|
|
||||||
x.simd = *ptr;
|
|
||||||
|
|
||||||
u8 *b = &x.bytes[n / 8];
|
|
||||||
*b &= ~(1U << (n % 8));
|
|
||||||
|
|
||||||
*ptr = x.simd;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// tests bit N in the given vector.
|
// tests bit N in the given vector.
|
||||||
static really_inline
|
static really_inline
|
||||||
char testbit256(const m256 *ptr, unsigned int n) {
|
char testbit256(const m256 *ptr, unsigned int n) {
|
||||||
assert(n < sizeof(*ptr) * 8);
|
const m256 mask = mask1bit256(n);
|
||||||
const char *bytes = (const char *)ptr;
|
return !_mm256_testz_si256(mask, *ptr);
|
||||||
return !!(bytes[n / 8] & (1 << (n % 8)));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static really_really_inline
|
static really_really_inline
|
||||||
|
Loading…
x
Reference in New Issue
Block a user