fix names, use own intrinsic instead of explicit _mm* ones

This commit is contained in:
Konstantinos Margaritis
2020-09-23 11:51:21 +03:00
parent f7a6b8934c
commit 5333467249
15 changed files with 137 additions and 137 deletions

View File

@@ -47,7 +47,7 @@ char roseCountingMiracleScan(u8 c, const u8 *d, const u8 *d_end,
u32 count = *count_inout;
m128 chars = set16x8(c);
m128 chars = set1_16x8(c);
for (; d + 16 <= d_end; d_end -= 16) {
m128 data = loadu128(d_end - 16);
@@ -94,7 +94,7 @@ u32 roseCountingMiracleScanShufti(m128 mask_lo, m128 mask_hi, u8 poison,
u32 count = *count_inout;
const m128 zeroes = zeroes128();
const m128 low4bits = _mm_set1_epi8(0xf);
const m128 low4bits = set1_16x8(0xf);
for (; d + 16 <= d_end; d_end -= 16) {
m128 data = loadu128(d_end - 16);

View File

@@ -938,7 +938,7 @@ int roseCheckShufti16x16(const struct core_info *ci, const u8 *hi_mask,
return 1;
}
m256 data_m256 = set2x128(data);
m256 data_m256 = set1_2x128(data);
m256 hi_mask_m256 = loadu256(hi_mask);
m256 lo_mask_m256 = loadu256(lo_mask);
m256 bucket_select_mask_m256 = loadu256(bucket_select_mask);
@@ -974,8 +974,8 @@ int roseCheckShufti32x8(const struct core_info *ci, const u8 *hi_mask,
m128 hi_mask_m128 = loadu128(hi_mask);
m128 lo_mask_m128 = loadu128(lo_mask);
m256 hi_mask_m256 = set2x128(hi_mask_m128);
m256 lo_mask_m256 = set2x128(lo_mask_m128);
m256 hi_mask_m256 = set1_2x128(hi_mask_m128);
m256 lo_mask_m256 = set1_2x128(lo_mask_m128);
m256 bucket_select_mask_m256 = loadu256(bucket_select_mask);
if (validateShuftiMask32x8(data, hi_mask_m256, lo_mask_m256,
bucket_select_mask_m256,
@@ -1287,7 +1287,7 @@ int roseCheckMultipathShufti16x8(const struct hs_scratch *scratch,
u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask);
DEBUG_PRINTF("expand_hi %llx\n", valid_hi);
DEBUG_PRINTF("expand_lo %llx\n", valid_lo);
expand_valid = set64x2(valid_hi, valid_lo);
expand_valid = set2x64(valid_hi, valid_lo);
valid_path_mask = ~movemask128(pshufb_m128(expand_valid,
data_select_mask));
}
@@ -1332,7 +1332,7 @@ int roseCheckMultipathShufti32x8(const struct hs_scratch *scratch,
u32 valid_data_mask;
m128 data_m128 = getData128(ci, offset, &valid_data_mask);
m256 data_double = set2x128(data_m128);
m256 data_double = set1_2x128(data_m128);
m256 data_select_mask = loadu256(ri->data_select_mask);
u32 valid_path_mask = 0;
@@ -1346,7 +1346,7 @@ int roseCheckMultipathShufti32x8(const struct hs_scratch *scratch,
u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask);
DEBUG_PRINTF("expand_hi %llx\n", valid_hi);
DEBUG_PRINTF("expand_lo %llx\n", valid_lo);
expand_valid = set64x4(valid_hi, valid_lo, valid_hi,
expand_valid = set4x64(valid_hi, valid_lo, valid_hi,
valid_lo);
valid_path_mask = ~movemask256(pshufb_m256(expand_valid,
data_select_mask));
@@ -1393,7 +1393,7 @@ int roseCheckMultipathShufti32x16(const struct hs_scratch *scratch,
u32 valid_data_mask;
m128 data_m128 = getData128(ci, offset, &valid_data_mask);
m256 data_double = set2x128(data_m128);
m256 data_double = set1_2x128(data_m128);
m256 data_select_mask = loadu256(ri->data_select_mask);
u32 valid_path_mask = 0;
@@ -1407,7 +1407,7 @@ int roseCheckMultipathShufti32x16(const struct hs_scratch *scratch,
u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask);
DEBUG_PRINTF("expand_hi %llx\n", valid_hi);
DEBUG_PRINTF("expand_lo %llx\n", valid_lo);
expand_valid = set64x4(valid_hi, valid_lo, valid_hi,
expand_valid = set4x64(valid_hi, valid_lo, valid_hi,
valid_lo);
valid_path_mask = ~movemask256(pshufb_m256(expand_valid,
data_select_mask));
@@ -1460,7 +1460,7 @@ int roseCheckMultipathShufti64(const struct hs_scratch *scratch,
u32 valid_data_mask;
m128 data_m128 = getData128(ci, offset, &valid_data_mask);
m256 data_m256 = set2x128(data_m128);
m256 data_m256 = set1_2x128(data_m128);
m256 data_select_mask_1 = loadu256(ri->data_select_mask);
m256 data_select_mask_2 = loadu256(ri->data_select_mask + 32);
@@ -1475,7 +1475,7 @@ int roseCheckMultipathShufti64(const struct hs_scratch *scratch,
u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask);
DEBUG_PRINTF("expand_hi %llx\n", valid_hi);
DEBUG_PRINTF("expand_lo %llx\n", valid_lo);
expand_valid = set64x4(valid_hi, valid_lo, valid_hi,
expand_valid = set4x64(valid_hi, valid_lo, valid_hi,
valid_lo);
u32 valid_path_1 = movemask256(pshufb_m256(expand_valid,
data_select_mask_1));

View File

@@ -47,7 +47,7 @@ static really_inline
int validateShuftiMask16x16(const m256 data, const m256 hi_mask,
const m256 lo_mask, const m256 and_mask,
const u32 neg_mask, const u32 valid_data_mask) {
m256 low4bits = set32x8(0xf);
m256 low4bits = set1_32x8(0xf);
m256 c_lo = pshufb_m256(lo_mask, and256(data, low4bits));
m256 c_hi = pshufb_m256(hi_mask,
rshift64_m256(andnot256(low4bits, data), 4));
@@ -78,7 +78,7 @@ int validateShuftiMask16x8(const m128 data, const m256 nib_mask,
const m128 and_mask, const u32 neg_mask,
const u32 valid_data_mask) {
m256 data_m256 = combine2x128(rshift64_m128(data, 4), data);
m256 low4bits = set32x8(0xf);
m256 low4bits = set1_32x8(0xf);
m256 c_nib = pshufb_m256(nib_mask, and256(data_m256, low4bits));
m128 t = and128(movdq_hi(c_nib), movdq_lo(c_nib));
m128 nresult = eq128(and128(t, and_mask), zeroes128());
@@ -101,7 +101,7 @@ static really_inline
int validateShuftiMask32x8(const m256 data, const m256 hi_mask,
const m256 lo_mask, const m256 and_mask,
const u32 neg_mask, const u32 valid_data_mask) {
m256 low4bits = set32x8(0xf);
m256 low4bits = set1_32x8(0xf);
m256 c_lo = pshufb_m256(lo_mask, and256(data, low4bits));
m256 c_hi = pshufb_m256(hi_mask,
rshift64_m256(andnot256(low4bits, data), 4));
@@ -133,7 +133,7 @@ int validateShuftiMask32x16(const m256 data,
const m256 bucket_mask_hi,
const m256 bucket_mask_lo, const u32 neg_mask,
const u32 valid_data_mask) {
m256 low4bits = set32x8(0xf);
m256 low4bits = set1_32x8(0xf);
m256 data_lo = and256(data, low4bits);
m256 data_hi = and256(rshift64_m256(data, 4), low4bits);
m256 c_lo_1 = pshufb_m256(lo_mask_1, data_lo);
@@ -201,7 +201,7 @@ int validateMultipathShuftiMask16x8(const m128 data,
const u32 neg_mask,
const u32 valid_path_mask) {
m256 data_256 = combine2x128(rshift64_m128(data, 4), data);
m256 low4bits = set32x8(0xf);
m256 low4bits = set1_32x8(0xf);
m256 c_nib = pshufb_m256(nib_mask, and256(data_256, low4bits));
m128 t = and128(movdq_hi(c_nib), movdq_lo(c_nib));
m128 result = and128(t, bucket_select_mask);
@@ -220,7 +220,7 @@ int validateMultipathShuftiMask32x8(const m256 data,
const u32 hi_bits, const u32 lo_bits,
const u32 neg_mask,
const u32 valid_path_mask) {
m256 low4bits = set32x8(0xf);
m256 low4bits = set1_32x8(0xf);
m256 data_lo = and256(data, low4bits);
m256 data_hi = and256(rshift64_m256(data, 4), low4bits);
m256 c_lo = pshufb_m256(lo_mask, data_lo);
@@ -244,7 +244,7 @@ int validateMultipathShuftiMask32x16(const m256 data,
const u32 hi_bits, const u32 lo_bits,
const u32 neg_mask,
const u32 valid_path_mask) {
m256 low4bits = set32x8(0xf);
m256 low4bits = set1_32x8(0xf);
m256 data_lo = and256(data, low4bits);
m256 data_hi = and256(rshift64_m256(data, 4), low4bits);
m256 c_lo_1 = pshufb_m256(lo_mask_1, data_lo);
@@ -271,7 +271,7 @@ int validateMultipathShuftiMask64(const m256 data_1, const m256 data_2,
const u64a hi_bits, const u64a lo_bits,
const u64a neg_mask,
const u64a valid_path_mask) {
m256 low4bits = set32x8(0xf);
m256 low4bits = set1_32x8(0xf);
m256 c_lo_1 = pshufb_m256(lo_mask, and256(data_1, low4bits));
m256 c_lo_2 = pshufb_m256(lo_mask, and256(data_2, low4bits));
m256 c_hi_1 = pshufb_m256(hi_mask,