rename pshufb to pshufb_m128

This commit is contained in:
Matthew Barr
2017-05-05 10:43:37 +10:00
parent a295c96198
commit f6b688fc06
10 changed files with 42 additions and 41 deletions

View File

@@ -44,7 +44,7 @@
static really_inline
u32 packedExtract128(m128 s, const m128 permute, const m128 compare) {
m128 shuffled = pshufb(s, permute);
m128 shuffled = pshufb_m128(s, permute);
m128 compared = and128(shuffled, compare);
u16 rv = ~movemask128(eq128(compared, shuffled));
return (u32)rv;

View File

@@ -179,7 +179,7 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end,
#define SHENG_SINGLE_ITER do { \
m128 shuffle_mask = masks[*(c++)]; \
s = pshufb(shuffle_mask, s); \
s = pshufb_m128(shuffle_mask, s); \
u32 s_gpr_x4 = movd(s); /* convert to u8 */ \
DEBUG_PRINTF("c %hhu (%c) --> s %hhu\n", c[-1], c[-1], s_gpr_x4); \
if (s_gpr_x4 >= sheng_stop_limit_x4) { \
@@ -198,7 +198,7 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end,
u64a cc0 = pdep64(data_bytes, 0xff0); /* extract scaled low byte */
data_bytes &= ~0xffULL; /* clear low bits for scale space */
m128 shuffle_mask0 = load128((const char *)masks + cc0);
s = pshufb(shuffle_mask0, s);
s = pshufb_m128(shuffle_mask0, s);
m128 s_max = s;
m128 s_max0 = s_max;
DEBUG_PRINTF("c %02llx --> s %hhu\n", cc0 >> 4, movd(s));
@@ -208,7 +208,7 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end,
u64a cc##iter = pext64(data_bytes, mcsheng_pext_mask[iter]); \
assert(cc##iter == (u64a)c[iter] << 4); \
m128 shuffle_mask##iter = load128((const char *)masks + cc##iter); \
s = pshufb(shuffle_mask##iter, s); \
s = pshufb_m128(shuffle_mask##iter, s); \
if (do_accel && iter == 7) { \
/* in the final iteration we also have to check against accel */ \
m128 s_temp = sadd_u8_m128(s, accel_delta); \

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2016, Intel Corporation
* Copyright (c) 2016-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -58,7 +58,7 @@ char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s,
while (likely(cur_buf != end)) {
const u8 c = *cur_buf;
const m128 shuffle_mask = masks[c];
cur_state = pshufb(shuffle_mask, cur_state);
cur_state = pshufb_m128(shuffle_mask, cur_state);
const u8 tmp = movd(cur_state);
DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?');

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2016, Intel Corporation
* Copyright (c) 2016-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -100,19 +100,19 @@ char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s,
const u8 c4 = *b4;
const m128 shuffle_mask1 = masks[c1];
cur_state = pshufb(shuffle_mask1, cur_state);
cur_state = pshufb_m128(shuffle_mask1, cur_state);
const u8 a1 = movd(cur_state);
const m128 shuffle_mask2 = masks[c2];
cur_state = pshufb(shuffle_mask2, cur_state);
cur_state = pshufb_m128(shuffle_mask2, cur_state);
const u8 a2 = movd(cur_state);
const m128 shuffle_mask3 = masks[c3];
cur_state = pshufb(shuffle_mask3, cur_state);
cur_state = pshufb_m128(shuffle_mask3, cur_state);
const u8 a3 = movd(cur_state);
const m128 shuffle_mask4 = masks[c4];
cur_state = pshufb(shuffle_mask4, cur_state);
cur_state = pshufb_m128(shuffle_mask4, cur_state);
const u8 a4 = movd(cur_state);
DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?');

View File

@@ -114,8 +114,8 @@ DUMP_MSK(128)
static really_inline
u32 block(m128 mask_lo, m128 mask_hi, m128 chars, const m128 low4bits,
const m128 compare) {
m128 c_lo = pshufb(mask_lo, GET_LO_4(chars));
m128 c_hi = pshufb(mask_hi, GET_HI_4(chars));
m128 c_lo = pshufb_m128(mask_lo, GET_LO_4(chars));
m128 c_hi = pshufb_m128(mask_hi, GET_HI_4(chars));
m128 t = and128(c_lo, c_hi);
#ifdef DEBUG
@@ -219,8 +219,8 @@ const u8 *lastMatch(const u8 *buf, m128 t, m128 compare) {
static really_inline
const u8 *revBlock(m128 mask_lo, m128 mask_hi, m128 chars, const u8 *buf,
const m128 low4bits, const m128 zeroes) {
m128 c_lo = pshufb(mask_lo, GET_LO_4(chars));
m128 c_hi = pshufb(mask_hi, GET_HI_4(chars));
m128 c_lo = pshufb_m128(mask_lo, GET_LO_4(chars));
m128 c_hi = pshufb_m128(mask_hi, GET_HI_4(chars));
m128 t = and128(c_lo, c_hi);
#ifdef DEBUG
@@ -289,8 +289,8 @@ const u8 *fwdBlock2(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo, m128 mask2_hi,
const m128 ones) {
m128 chars_lo = GET_LO_4(chars);
m128 chars_hi = GET_HI_4(chars);
m128 c_lo = pshufb(mask1_lo, chars_lo);
m128 c_hi = pshufb(mask1_hi, chars_hi);
m128 c_lo = pshufb_m128(mask1_lo, chars_lo);
m128 c_hi = pshufb_m128(mask1_hi, chars_hi);
m128 t = or128(c_lo, c_hi);
#ifdef DEBUG
@@ -301,8 +301,8 @@ const u8 *fwdBlock2(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo, m128 mask2_hi,
DEBUG_PRINTF(" t: "); dumpMsk128(t); printf("\n");
#endif
m128 c2_lo = pshufb(mask2_lo, chars_lo);
m128 c2_hi = pshufb(mask2_hi, chars_hi);
m128 c2_lo = pshufb_m128(mask2_lo, chars_lo);
m128 c2_hi = pshufb_m128(mask2_hi, chars_hi);
m128 t2 = or128(t, rshiftbyte_m128(or128(c2_lo, c2_hi), 1));
#ifdef DEBUG

View File

@@ -68,11 +68,11 @@ u32 block(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, m128 v) {
m128 shuf_mask_hi = _mm_set1_epi64x(0x8040201008040201);
// and now do the real work
m128 shuf1 = pshufb(shuf_mask_lo_highclear, v);
m128 shuf1 = pshufb_m128(shuf_mask_lo_highclear, v);
m128 t1 = xor128(v, highconst);
m128 shuf2 = pshufb(shuf_mask_lo_highset, t1);
m128 shuf2 = pshufb_m128(shuf_mask_lo_highset, t1);
m128 t2 = andnot128(highconst, rshift64_m128(v, 4));
m128 shuf3 = pshufb(shuf_mask_hi, t2);
m128 shuf3 = pshufb_m128(shuf_mask_hi, t2);
m128 tmp = and128(or128(shuf1, shuf2), shuf3);
m128 tmp2 = eq128(tmp, zeroes128());
u32 z = movemask128(tmp2);