mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-09-29 19:24:25 +03:00
fix names, use own intrinsic instead of explicit _mm* ones
This commit is contained in:
@@ -47,7 +47,7 @@ char roseCountingMiracleScan(u8 c, const u8 *d, const u8 *d_end,
|
||||
|
||||
u32 count = *count_inout;
|
||||
|
||||
m128 chars = set16x8(c);
|
||||
m128 chars = set1_16x8(c);
|
||||
|
||||
for (; d + 16 <= d_end; d_end -= 16) {
|
||||
m128 data = loadu128(d_end - 16);
|
||||
@@ -94,7 +94,7 @@ u32 roseCountingMiracleScanShufti(m128 mask_lo, m128 mask_hi, u8 poison,
|
||||
u32 count = *count_inout;
|
||||
|
||||
const m128 zeroes = zeroes128();
|
||||
const m128 low4bits = _mm_set1_epi8(0xf);
|
||||
const m128 low4bits = set1_16x8(0xf);
|
||||
|
||||
for (; d + 16 <= d_end; d_end -= 16) {
|
||||
m128 data = loadu128(d_end - 16);
|
||||
|
@@ -938,7 +938,7 @@ int roseCheckShufti16x16(const struct core_info *ci, const u8 *hi_mask,
|
||||
return 1;
|
||||
}
|
||||
|
||||
m256 data_m256 = set2x128(data);
|
||||
m256 data_m256 = set1_2x128(data);
|
||||
m256 hi_mask_m256 = loadu256(hi_mask);
|
||||
m256 lo_mask_m256 = loadu256(lo_mask);
|
||||
m256 bucket_select_mask_m256 = loadu256(bucket_select_mask);
|
||||
@@ -974,8 +974,8 @@ int roseCheckShufti32x8(const struct core_info *ci, const u8 *hi_mask,
|
||||
|
||||
m128 hi_mask_m128 = loadu128(hi_mask);
|
||||
m128 lo_mask_m128 = loadu128(lo_mask);
|
||||
m256 hi_mask_m256 = set2x128(hi_mask_m128);
|
||||
m256 lo_mask_m256 = set2x128(lo_mask_m128);
|
||||
m256 hi_mask_m256 = set1_2x128(hi_mask_m128);
|
||||
m256 lo_mask_m256 = set1_2x128(lo_mask_m128);
|
||||
m256 bucket_select_mask_m256 = loadu256(bucket_select_mask);
|
||||
if (validateShuftiMask32x8(data, hi_mask_m256, lo_mask_m256,
|
||||
bucket_select_mask_m256,
|
||||
@@ -1287,7 +1287,7 @@ int roseCheckMultipathShufti16x8(const struct hs_scratch *scratch,
|
||||
u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask);
|
||||
DEBUG_PRINTF("expand_hi %llx\n", valid_hi);
|
||||
DEBUG_PRINTF("expand_lo %llx\n", valid_lo);
|
||||
expand_valid = set64x2(valid_hi, valid_lo);
|
||||
expand_valid = set2x64(valid_hi, valid_lo);
|
||||
valid_path_mask = ~movemask128(pshufb_m128(expand_valid,
|
||||
data_select_mask));
|
||||
}
|
||||
@@ -1332,7 +1332,7 @@ int roseCheckMultipathShufti32x8(const struct hs_scratch *scratch,
|
||||
|
||||
u32 valid_data_mask;
|
||||
m128 data_m128 = getData128(ci, offset, &valid_data_mask);
|
||||
m256 data_double = set2x128(data_m128);
|
||||
m256 data_double = set1_2x128(data_m128);
|
||||
m256 data_select_mask = loadu256(ri->data_select_mask);
|
||||
|
||||
u32 valid_path_mask = 0;
|
||||
@@ -1346,7 +1346,7 @@ int roseCheckMultipathShufti32x8(const struct hs_scratch *scratch,
|
||||
u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask);
|
||||
DEBUG_PRINTF("expand_hi %llx\n", valid_hi);
|
||||
DEBUG_PRINTF("expand_lo %llx\n", valid_lo);
|
||||
expand_valid = set64x4(valid_hi, valid_lo, valid_hi,
|
||||
expand_valid = set4x64(valid_hi, valid_lo, valid_hi,
|
||||
valid_lo);
|
||||
valid_path_mask = ~movemask256(pshufb_m256(expand_valid,
|
||||
data_select_mask));
|
||||
@@ -1393,7 +1393,7 @@ int roseCheckMultipathShufti32x16(const struct hs_scratch *scratch,
|
||||
|
||||
u32 valid_data_mask;
|
||||
m128 data_m128 = getData128(ci, offset, &valid_data_mask);
|
||||
m256 data_double = set2x128(data_m128);
|
||||
m256 data_double = set1_2x128(data_m128);
|
||||
m256 data_select_mask = loadu256(ri->data_select_mask);
|
||||
|
||||
u32 valid_path_mask = 0;
|
||||
@@ -1407,7 +1407,7 @@ int roseCheckMultipathShufti32x16(const struct hs_scratch *scratch,
|
||||
u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask);
|
||||
DEBUG_PRINTF("expand_hi %llx\n", valid_hi);
|
||||
DEBUG_PRINTF("expand_lo %llx\n", valid_lo);
|
||||
expand_valid = set64x4(valid_hi, valid_lo, valid_hi,
|
||||
expand_valid = set4x64(valid_hi, valid_lo, valid_hi,
|
||||
valid_lo);
|
||||
valid_path_mask = ~movemask256(pshufb_m256(expand_valid,
|
||||
data_select_mask));
|
||||
@@ -1460,7 +1460,7 @@ int roseCheckMultipathShufti64(const struct hs_scratch *scratch,
|
||||
|
||||
u32 valid_data_mask;
|
||||
m128 data_m128 = getData128(ci, offset, &valid_data_mask);
|
||||
m256 data_m256 = set2x128(data_m128);
|
||||
m256 data_m256 = set1_2x128(data_m128);
|
||||
m256 data_select_mask_1 = loadu256(ri->data_select_mask);
|
||||
m256 data_select_mask_2 = loadu256(ri->data_select_mask + 32);
|
||||
|
||||
@@ -1475,7 +1475,7 @@ int roseCheckMultipathShufti64(const struct hs_scratch *scratch,
|
||||
u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask);
|
||||
DEBUG_PRINTF("expand_hi %llx\n", valid_hi);
|
||||
DEBUG_PRINTF("expand_lo %llx\n", valid_lo);
|
||||
expand_valid = set64x4(valid_hi, valid_lo, valid_hi,
|
||||
expand_valid = set4x64(valid_hi, valid_lo, valid_hi,
|
||||
valid_lo);
|
||||
u32 valid_path_1 = movemask256(pshufb_m256(expand_valid,
|
||||
data_select_mask_1));
|
||||
|
@@ -47,7 +47,7 @@ static really_inline
|
||||
int validateShuftiMask16x16(const m256 data, const m256 hi_mask,
|
||||
const m256 lo_mask, const m256 and_mask,
|
||||
const u32 neg_mask, const u32 valid_data_mask) {
|
||||
m256 low4bits = set32x8(0xf);
|
||||
m256 low4bits = set1_32x8(0xf);
|
||||
m256 c_lo = pshufb_m256(lo_mask, and256(data, low4bits));
|
||||
m256 c_hi = pshufb_m256(hi_mask,
|
||||
rshift64_m256(andnot256(low4bits, data), 4));
|
||||
@@ -78,7 +78,7 @@ int validateShuftiMask16x8(const m128 data, const m256 nib_mask,
|
||||
const m128 and_mask, const u32 neg_mask,
|
||||
const u32 valid_data_mask) {
|
||||
m256 data_m256 = combine2x128(rshift64_m128(data, 4), data);
|
||||
m256 low4bits = set32x8(0xf);
|
||||
m256 low4bits = set1_32x8(0xf);
|
||||
m256 c_nib = pshufb_m256(nib_mask, and256(data_m256, low4bits));
|
||||
m128 t = and128(movdq_hi(c_nib), movdq_lo(c_nib));
|
||||
m128 nresult = eq128(and128(t, and_mask), zeroes128());
|
||||
@@ -101,7 +101,7 @@ static really_inline
|
||||
int validateShuftiMask32x8(const m256 data, const m256 hi_mask,
|
||||
const m256 lo_mask, const m256 and_mask,
|
||||
const u32 neg_mask, const u32 valid_data_mask) {
|
||||
m256 low4bits = set32x8(0xf);
|
||||
m256 low4bits = set1_32x8(0xf);
|
||||
m256 c_lo = pshufb_m256(lo_mask, and256(data, low4bits));
|
||||
m256 c_hi = pshufb_m256(hi_mask,
|
||||
rshift64_m256(andnot256(low4bits, data), 4));
|
||||
@@ -133,7 +133,7 @@ int validateShuftiMask32x16(const m256 data,
|
||||
const m256 bucket_mask_hi,
|
||||
const m256 bucket_mask_lo, const u32 neg_mask,
|
||||
const u32 valid_data_mask) {
|
||||
m256 low4bits = set32x8(0xf);
|
||||
m256 low4bits = set1_32x8(0xf);
|
||||
m256 data_lo = and256(data, low4bits);
|
||||
m256 data_hi = and256(rshift64_m256(data, 4), low4bits);
|
||||
m256 c_lo_1 = pshufb_m256(lo_mask_1, data_lo);
|
||||
@@ -201,7 +201,7 @@ int validateMultipathShuftiMask16x8(const m128 data,
|
||||
const u32 neg_mask,
|
||||
const u32 valid_path_mask) {
|
||||
m256 data_256 = combine2x128(rshift64_m128(data, 4), data);
|
||||
m256 low4bits = set32x8(0xf);
|
||||
m256 low4bits = set1_32x8(0xf);
|
||||
m256 c_nib = pshufb_m256(nib_mask, and256(data_256, low4bits));
|
||||
m128 t = and128(movdq_hi(c_nib), movdq_lo(c_nib));
|
||||
m128 result = and128(t, bucket_select_mask);
|
||||
@@ -220,7 +220,7 @@ int validateMultipathShuftiMask32x8(const m256 data,
|
||||
const u32 hi_bits, const u32 lo_bits,
|
||||
const u32 neg_mask,
|
||||
const u32 valid_path_mask) {
|
||||
m256 low4bits = set32x8(0xf);
|
||||
m256 low4bits = set1_32x8(0xf);
|
||||
m256 data_lo = and256(data, low4bits);
|
||||
m256 data_hi = and256(rshift64_m256(data, 4), low4bits);
|
||||
m256 c_lo = pshufb_m256(lo_mask, data_lo);
|
||||
@@ -244,7 +244,7 @@ int validateMultipathShuftiMask32x16(const m256 data,
|
||||
const u32 hi_bits, const u32 lo_bits,
|
||||
const u32 neg_mask,
|
||||
const u32 valid_path_mask) {
|
||||
m256 low4bits = set32x8(0xf);
|
||||
m256 low4bits = set1_32x8(0xf);
|
||||
m256 data_lo = and256(data, low4bits);
|
||||
m256 data_hi = and256(rshift64_m256(data, 4), low4bits);
|
||||
m256 c_lo_1 = pshufb_m256(lo_mask_1, data_lo);
|
||||
@@ -271,7 +271,7 @@ int validateMultipathShuftiMask64(const m256 data_1, const m256 data_2,
|
||||
const u64a hi_bits, const u64a lo_bits,
|
||||
const u64a neg_mask,
|
||||
const u64a valid_path_mask) {
|
||||
m256 low4bits = set32x8(0xf);
|
||||
m256 low4bits = set1_32x8(0xf);
|
||||
m256 c_lo_1 = pshufb_m256(lo_mask, and256(data_1, low4bits));
|
||||
m256 c_lo_2 = pshufb_m256(lo_mask, and256(data_2, low4bits));
|
||||
m256 c_hi_1 = pshufb_m256(hi_mask,
|
||||
|
Reference in New Issue
Block a user