From f6b688fc064d30d154aabe4a73c3274e639458bd Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Fri, 5 May 2017 10:43:37 +1000 Subject: [PATCH] rename pshufb to pshufb_m128 --- src/fdr/teddy.c | 17 +++++++++-------- src/nfa/limex_shuffle.h | 2 +- src/nfa/mcsheng.c | 6 +++--- src/nfa/sheng_impl.h | 4 ++-- src/nfa/sheng_impl4.h | 10 +++++----- src/nfa/shufti.c | 16 ++++++++-------- src/nfa/truffle.c | 6 +++--- src/rose/counting_miracle.h | 10 +++++----- src/rose/program_runtime.h | 4 ++-- src/util/simd_utils.h | 8 ++++---- 10 files changed, 42 insertions(+), 41 deletions(-) diff --git a/src/fdr/teddy.c b/src/fdr/teddy.c index e7a0fccd..a3f7cfaf 100644 --- a/src/fdr/teddy.c +++ b/src/fdr/teddy.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -129,7 +129,8 @@ m128 prep_conf_teddy_m1(const m128 *maskBase, m128 val) { m128 mask = set16x8(0xf); m128 lo = and128(val, mask); m128 hi = and128(rshift64_m128(val, 4), mask); - return and128(pshufb(maskBase[0*2], lo), pshufb(maskBase[0*2+1], hi)); + return and128(pshufb_m128(maskBase[0 * 2], lo), + pshufb_m128(maskBase[0 * 2 + 1], hi)); } static really_inline @@ -139,8 +140,8 @@ m128 prep_conf_teddy_m2(const m128 *maskBase, m128 *old_1, m128 val) { m128 hi = and128(rshift64_m128(val, 4), mask); m128 r = prep_conf_teddy_m1(maskBase, val); - m128 res_1 = and128(pshufb(maskBase[1*2], lo), - pshufb(maskBase[1*2+1], hi)); + m128 res_1 = and128(pshufb_m128(maskBase[1*2], lo), + pshufb_m128(maskBase[1*2+1], hi)); m128 res_shifted_1 = palignr(res_1, *old_1, 16-1); *old_1 = res_1; return and128(r, res_shifted_1); @@ -154,8 +155,8 @@ m128 prep_conf_teddy_m3(const m128 *maskBase, m128 *old_1, m128 *old_2, m128 hi = and128(rshift64_m128(val, 4), mask); m128 r = prep_conf_teddy_m2(maskBase, old_1, val); - m128 res_2 = and128(pshufb(maskBase[2*2], lo), - pshufb(maskBase[2*2+1], hi)); + m128 res_2 = and128(pshufb_m128(maskBase[2*2], lo), + pshufb_m128(maskBase[2*2+1], hi)); m128 res_shifted_2 = palignr(res_2, *old_2, 16-2); *old_2 = res_2; return and128(r, res_shifted_2); @@ -169,8 +170,8 @@ m128 prep_conf_teddy_m4(const m128 *maskBase, m128 *old_1, m128 *old_2, m128 hi = and128(rshift64_m128(val, 4), mask); m128 r = prep_conf_teddy_m3(maskBase, old_1, old_2, val); - m128 res_3 = and128(pshufb(maskBase[3*2], lo), - pshufb(maskBase[3*2+1], hi)); + m128 res_3 = and128(pshufb_m128(maskBase[3*2], lo), + pshufb_m128(maskBase[3*2+1], hi)); m128 res_shifted_3 = palignr(res_3, *old_3, 16-3); *old_3 = res_3; return and128(r, res_shifted_3); diff --git a/src/nfa/limex_shuffle.h b/src/nfa/limex_shuffle.h index cedca333..365d4729 100644 --- a/src/nfa/limex_shuffle.h +++ b/src/nfa/limex_shuffle.h @@ -44,7 +44,7 @@ static really_inline u32 packedExtract128(m128 s, const m128 permute, const m128 compare) { - m128 shuffled = pshufb(s, permute); + m128 shuffled = pshufb_m128(s, permute); m128 compared = and128(shuffled, compare); u16 rv = ~movemask128(eq128(compared, shuffled)); return (u32)rv; diff --git a/src/nfa/mcsheng.c b/src/nfa/mcsheng.c index a5ba2151..8130173d 100644 --- a/src/nfa/mcsheng.c +++ b/src/nfa/mcsheng.c @@ -179,7 +179,7 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end, #define SHENG_SINGLE_ITER do { \ m128 shuffle_mask = masks[*(c++)]; \ - s = pshufb(shuffle_mask, s); \ + s = pshufb_m128(shuffle_mask, s); \ u32 s_gpr_x4 = movd(s); /* convert to u8 */ \ DEBUG_PRINTF("c %hhu (%c) --> s %hhu\n", c[-1], c[-1], s_gpr_x4); \ if (s_gpr_x4 >= sheng_stop_limit_x4) { \ @@ -198,7 +198,7 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end, u64a cc0 = pdep64(data_bytes, 0xff0); /* extract scaled low byte */ data_bytes &= ~0xffULL; /* clear low bits for scale space */ m128 shuffle_mask0 = load128((const char *)masks + cc0); - s = pshufb(shuffle_mask0, s); + s = pshufb_m128(shuffle_mask0, s); m128 s_max = s; m128 s_max0 = s_max; DEBUG_PRINTF("c %02llx --> s %hhu\n", cc0 >> 4, movd(s)); @@ -208,7 +208,7 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end, u64a cc##iter = pext64(data_bytes, mcsheng_pext_mask[iter]); \ assert(cc##iter == (u64a)c[iter] << 4); \ m128 shuffle_mask##iter = load128((const char *)masks + cc##iter); \ - s = pshufb(shuffle_mask##iter, s); \ + s = pshufb_m128(shuffle_mask##iter, s); \ if (do_accel && iter == 7) { \ /* in the final iteration we also have to check against accel */ \ m128 s_temp = sadd_u8_m128(s, accel_delta); \ diff --git a/src/nfa/sheng_impl.h b/src/nfa/sheng_impl.h index fc3e54aa..9552fe15 100644 --- a/src/nfa/sheng_impl.h +++ b/src/nfa/sheng_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -58,7 +58,7 @@ char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s, while (likely(cur_buf != end)) { const u8 c = *cur_buf; const m128 shuffle_mask = masks[c]; - cur_state = pshufb(shuffle_mask, cur_state); + cur_state = pshufb_m128(shuffle_mask, cur_state); const u8 tmp = movd(cur_state); DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?'); diff --git a/src/nfa/sheng_impl4.h b/src/nfa/sheng_impl4.h index 2561e52d..74032201 100644 --- a/src/nfa/sheng_impl4.h +++ b/src/nfa/sheng_impl4.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -100,19 +100,19 @@ char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s, const u8 c4 = *b4; const m128 shuffle_mask1 = masks[c1]; - cur_state = pshufb(shuffle_mask1, cur_state); + cur_state = pshufb_m128(shuffle_mask1, cur_state); const u8 a1 = movd(cur_state); const m128 shuffle_mask2 = masks[c2]; - cur_state = pshufb(shuffle_mask2, cur_state); + cur_state = pshufb_m128(shuffle_mask2, cur_state); const u8 a2 = movd(cur_state); const m128 shuffle_mask3 = masks[c3]; - cur_state = pshufb(shuffle_mask3, cur_state); + cur_state = pshufb_m128(shuffle_mask3, cur_state); const u8 a3 = movd(cur_state); const m128 shuffle_mask4 = masks[c4]; - cur_state = pshufb(shuffle_mask4, cur_state); + cur_state = pshufb_m128(shuffle_mask4, cur_state); const u8 a4 = movd(cur_state); DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?'); diff --git a/src/nfa/shufti.c b/src/nfa/shufti.c index ebe5015d..09ffc0cf 100644 --- a/src/nfa/shufti.c +++ b/src/nfa/shufti.c @@ -114,8 +114,8 @@ DUMP_MSK(128) static really_inline u32 block(m128 mask_lo, m128 mask_hi, m128 chars, const m128 low4bits, const m128 compare) { - m128 c_lo = pshufb(mask_lo, GET_LO_4(chars)); - m128 c_hi = pshufb(mask_hi, GET_HI_4(chars)); + m128 c_lo = pshufb_m128(mask_lo, GET_LO_4(chars)); + m128 c_hi = pshufb_m128(mask_hi, GET_HI_4(chars)); m128 t = and128(c_lo, c_hi); #ifdef DEBUG @@ -219,8 +219,8 @@ const u8 *lastMatch(const u8 *buf, m128 t, m128 compare) { static really_inline const u8 *revBlock(m128 mask_lo, m128 mask_hi, m128 chars, const u8 *buf, const m128 low4bits, const m128 zeroes) { - m128 c_lo = pshufb(mask_lo, GET_LO_4(chars)); - m128 c_hi = pshufb(mask_hi, GET_HI_4(chars)); + m128 c_lo = pshufb_m128(mask_lo, GET_LO_4(chars)); + m128 c_hi = pshufb_m128(mask_hi, GET_HI_4(chars)); m128 t = and128(c_lo, c_hi); #ifdef DEBUG @@ -289,8 +289,8 @@ const u8 *fwdBlock2(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo, m128 mask2_hi, const m128 ones) { m128 chars_lo = GET_LO_4(chars); m128 chars_hi = GET_HI_4(chars); - m128 c_lo = pshufb(mask1_lo, chars_lo); - m128 c_hi = pshufb(mask1_hi, chars_hi); + m128 c_lo = pshufb_m128(mask1_lo, chars_lo); + m128 c_hi = pshufb_m128(mask1_hi, chars_hi); m128 t = or128(c_lo, c_hi); #ifdef DEBUG @@ -301,8 +301,8 @@ const u8 *fwdBlock2(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo, m128 mask2_hi, DEBUG_PRINTF(" t: "); dumpMsk128(t); printf("\n"); #endif - m128 c2_lo = pshufb(mask2_lo, chars_lo); - m128 c2_hi = pshufb(mask2_hi, chars_hi); + m128 c2_lo = pshufb_m128(mask2_lo, chars_lo); + m128 c2_hi = pshufb_m128(mask2_hi, chars_hi); m128 t2 = or128(t, rshiftbyte_m128(or128(c2_lo, c2_hi), 1)); #ifdef DEBUG diff --git a/src/nfa/truffle.c b/src/nfa/truffle.c index 335edd5b..be6b312c 100644 --- a/src/nfa/truffle.c +++ b/src/nfa/truffle.c @@ -68,11 +68,11 @@ u32 block(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, m128 v) { m128 shuf_mask_hi = _mm_set1_epi64x(0x8040201008040201); // and now do the real work - m128 shuf1 = pshufb(shuf_mask_lo_highclear, v); + m128 shuf1 = pshufb_m128(shuf_mask_lo_highclear, v); m128 t1 = xor128(v, highconst); - m128 shuf2 = pshufb(shuf_mask_lo_highset, t1); + m128 shuf2 = pshufb_m128(shuf_mask_lo_highset, t1); m128 t2 = andnot128(highconst, rshift64_m128(v, 4)); - m128 shuf3 = pshufb(shuf_mask_hi, t2); + m128 shuf3 = pshufb_m128(shuf_mask_hi, t2); m128 tmp = and128(or128(shuf1, shuf2), shuf3); m128 tmp2 = eq128(tmp, zeroes128()); u32 z = movemask128(tmp2); diff --git a/src/rose/counting_miracle.h b/src/rose/counting_miracle.h index 76db5a77..976208b7 100644 --- a/src/rose/counting_miracle.h +++ b/src/rose/counting_miracle.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -98,8 +98,8 @@ u32 roseCountingMiracleScanShufti(m128 mask_lo, m128 mask_hi, u8 poison, for (; d + 16 <= d_end; d_end -= 16) { m128 data = loadu128(d_end - 16); - m128 c_lo = pshufb(mask_lo, GET_LO_4(data)); - m128 c_hi = pshufb(mask_hi, GET_HI_4(data)); + m128 c_lo = pshufb_m128(mask_lo, GET_LO_4(data)); + m128 c_hi = pshufb_m128(mask_hi, GET_HI_4(data)); m128 t = and128(c_lo, c_hi); u32 z1 = movemask128(eq128(t, zeroes)); count += popcount32(z1 ^ 0xffff); @@ -117,8 +117,8 @@ u32 roseCountingMiracleScanShufti(m128 mask_lo, m128 mask_hi, u8 poison, memset(temp, poison, sizeof(temp)); memcpy(temp, d, d_end - d); m128 data = loadu128(temp); - m128 c_lo = pshufb(mask_lo, GET_LO_4(data)); - m128 c_hi = pshufb(mask_hi, GET_HI_4(data)); + m128 c_lo = pshufb_m128(mask_lo, GET_LO_4(data)); + m128 c_hi = pshufb_m128(mask_hi, GET_HI_4(data)); m128 t = and128(c_lo, c_hi); u32 z1 = movemask128(eq128(t, zeroes)); count += popcount32(z1 ^ 0xffff); diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index 38700fbd..b140a2bc 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -1274,11 +1274,11 @@ int roseCheckMultipathShufti16x8(const struct hs_scratch *scratch, DEBUG_PRINTF("expand_hi %llx\n", valid_hi); DEBUG_PRINTF("expand_lo %llx\n", valid_lo); expand_valid = set64x2(valid_hi, valid_lo); - valid_path_mask = ~movemask128(pshufb(expand_valid, + valid_path_mask = ~movemask128(pshufb_m128(expand_valid, data_select_mask)); } - m128 data = pshufb(data_init, data_select_mask); + m128 data = pshufb_m128(data_init, data_select_mask); m256 nib_mask = loadu256(ri->nib_mask); m128 bucket_select_mask = loadu128(ri->bucket_select_mask); diff --git a/src/util/simd_utils.h b/src/util/simd_utils.h index 6eafe488..b4c0f7c8 100644 --- a/src/util/simd_utils.h +++ b/src/util/simd_utils.h @@ -272,7 +272,7 @@ char testbit128(m128 val, unsigned int n) { #define palignr(r, l, offset) _mm_alignr_epi8(r, l, offset) static really_inline -m128 pshufb(m128 a, m128 b) { +m128 pshufb_m128(m128 a, m128 b) { m128 result; result = _mm_shuffle_epi8(a, b); return result; @@ -284,8 +284,8 @@ m256 pshufb_m256(m256 a, m256 b) { return _mm256_shuffle_epi8(a, b); #else m256 rv; - rv.lo = pshufb(a.lo, b.lo); - rv.hi = pshufb(a.hi, b.hi); + rv.lo = pshufb_m128(a.lo, b.lo); + rv.hi = pshufb_m128(a.hi, b.hi); return rv; #endif } @@ -306,7 +306,7 @@ static really_inline m128 variable_byte_shift_m128(m128 in, s32 amount) { assert(amount >= -16 && amount <= 16); m128 shift_mask = loadu128(vbs_mask_data + 16 - amount); - return pshufb(in, shift_mask); + return pshufb_m128(in, shift_mask); } static really_inline