mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-09-30 19:47:43 +03:00
rename vpshufb to pshufb_m256
This commit is contained in:
@@ -54,7 +54,7 @@ u32 packedExtract128(m128 s, const m128 permute, const m128 compare) {
|
||||
static really_inline
|
||||
u32 packedExtract256(m256 s, const m256 permute, const m256 compare) {
|
||||
// vpshufb doesn't cross lanes, so this is a bit of a cheat
|
||||
m256 shuffled = vpshufb(s, permute);
|
||||
m256 shuffled = pshufb_m256(s, permute);
|
||||
m256 compared = and256(shuffled, compare);
|
||||
u32 rv = ~movemask256(eq256(compared, shuffled));
|
||||
// stitch the lane-wise results back together
|
||||
|
@@ -373,8 +373,8 @@ DUMP_MSK(256)
|
||||
static really_inline
|
||||
u32 block(m256 mask_lo, m256 mask_hi, m256 chars, const m256 low4bits,
|
||||
const m256 compare) {
|
||||
m256 c_lo = vpshufb(mask_lo, GET_LO_4(chars));
|
||||
m256 c_hi = vpshufb(mask_hi, GET_HI_4(chars));
|
||||
m256 c_lo = pshufb_m256(mask_lo, GET_LO_4(chars));
|
||||
m256 c_hi = pshufb_m256(mask_hi, GET_HI_4(chars));
|
||||
m256 t = and256(c_lo, c_hi);
|
||||
|
||||
#ifdef DEBUG
|
||||
@@ -407,7 +407,7 @@ const u8 *fwdBlockShort(m256 mask, m128 chars, const u8 *buf,
|
||||
// do the hi and lo shuffles in the one avx register
|
||||
m256 c = combine2x128(rshift64_m128(chars, 4), chars);
|
||||
c = and256(c, low4bits);
|
||||
m256 c_shuf = vpshufb(mask, c);
|
||||
m256 c_shuf = pshufb_m256(mask, c);
|
||||
m128 t = and128(movdq_hi(c_shuf), cast256to128(c_shuf));
|
||||
// the upper 32-bits can't match
|
||||
u32 z = 0xffff0000U | movemask128(eq128(t, zeroes128()));
|
||||
@@ -516,8 +516,8 @@ const u8 *lastMatch(const u8 *buf, u32 z) {
|
||||
static really_inline
|
||||
const u8 *revBlock(m256 mask_lo, m256 mask_hi, m256 chars, const u8 *buf,
|
||||
const m256 low4bits, const m256 zeroes) {
|
||||
m256 c_lo = vpshufb(mask_lo, GET_LO_4(chars));
|
||||
m256 c_hi = vpshufb(mask_hi, GET_HI_4(chars));
|
||||
m256 c_lo = pshufb_m256(mask_lo, GET_LO_4(chars));
|
||||
m256 c_hi = pshufb_m256(mask_hi, GET_HI_4(chars));
|
||||
m256 t = and256(c_lo, c_hi);
|
||||
|
||||
#ifdef DEBUG
|
||||
@@ -538,7 +538,7 @@ const u8 *revBlockShort(m256 mask, m128 chars, const u8 *buf,
|
||||
// do the hi and lo shuffles in the one avx register
|
||||
m256 c = combine2x128(rshift64_m128(chars, 4), chars);
|
||||
c = and256(c, low4bits);
|
||||
m256 c_shuf = vpshufb(mask, c);
|
||||
m256 c_shuf = pshufb_m256(mask, c);
|
||||
m128 t = and128(movdq_hi(c_shuf), cast256to128(c_shuf));
|
||||
// the upper 32-bits can't match
|
||||
u32 z = 0xffff0000U | movemask128(eq128(t, zeroes128()));
|
||||
@@ -630,8 +630,8 @@ const u8 *fwdBlock2(m256 mask1_lo, m256 mask1_hi, m256 mask2_lo, m256 mask2_hi,
|
||||
DEBUG_PRINTF("buf %p\n", buf);
|
||||
m256 chars_lo = GET_LO_4(chars);
|
||||
m256 chars_hi = GET_HI_4(chars);
|
||||
m256 c_lo = vpshufb(mask1_lo, chars_lo);
|
||||
m256 c_hi = vpshufb(mask1_hi, chars_hi);
|
||||
m256 c_lo = pshufb_m256(mask1_lo, chars_lo);
|
||||
m256 c_hi = pshufb_m256(mask1_hi, chars_hi);
|
||||
m256 t = or256(c_lo, c_hi);
|
||||
|
||||
#ifdef DEBUG
|
||||
@@ -642,8 +642,8 @@ const u8 *fwdBlock2(m256 mask1_lo, m256 mask1_hi, m256 mask2_lo, m256 mask2_hi,
|
||||
DEBUG_PRINTF(" t: "); dumpMsk256(t); printf("\n");
|
||||
#endif
|
||||
|
||||
m256 c2_lo = vpshufb(mask2_lo, chars_lo);
|
||||
m256 c2_hi = vpshufb(mask2_hi, chars_hi);
|
||||
m256 c2_lo = pshufb_m256(mask2_lo, chars_lo);
|
||||
m256 c2_hi = pshufb_m256(mask2_hi, chars_hi);
|
||||
m256 t2 = or256(t, rshift128_m256(or256(c2_lo, c2_hi), 1));
|
||||
|
||||
#ifdef DEBUG
|
||||
@@ -662,8 +662,8 @@ const u8 *fwdBlockShort2(m256 mask1, m256 mask2, m128 chars, const u8 *buf,
|
||||
// do the hi and lo shuffles in the one avx register
|
||||
m256 c = combine2x128(rshift64_m128(chars, 4), chars);
|
||||
c = and256(c, low4bits);
|
||||
m256 c_shuf1 = vpshufb(mask1, c);
|
||||
m256 c_shuf2 = rshift128_m256(vpshufb(mask2, c), 1);
|
||||
m256 c_shuf1 = pshufb_m256(mask1, c);
|
||||
m256 c_shuf2 = rshift128_m256(pshufb_m256(mask2, c), 1);
|
||||
m256 t0 = or256(c_shuf1, c_shuf2);
|
||||
m128 t = or128(movdq_hi(t0), cast256to128(t0));
|
||||
// the upper 32-bits can't match
|
||||
|
@@ -264,11 +264,11 @@ u32 block(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, m256 v) {
|
||||
m256 shuf_mask_hi = _mm256_set1_epi64x(0x8040201008040201);
|
||||
|
||||
// and now do the real work
|
||||
m256 shuf1 = vpshufb(shuf_mask_lo_highclear, v);
|
||||
m256 shuf1 = pshufb_m256(shuf_mask_lo_highclear, v);
|
||||
m256 t1 = xor256(v, highconst);
|
||||
m256 shuf2 = vpshufb(shuf_mask_lo_highset, t1);
|
||||
m256 shuf2 = pshufb_m256(shuf_mask_lo_highset, t1);
|
||||
m256 t2 = andnot256(highconst, rshift64_m256(v, 4));
|
||||
m256 shuf3 = vpshufb(shuf_mask_hi, t2);
|
||||
m256 shuf3 = pshufb_m256(shuf_mask_hi, t2);
|
||||
m256 tmp = and256(or256(shuf1, shuf2), shuf3);
|
||||
m256 tmp2 = eq256(tmp, zeroes256());
|
||||
u32 z = movemask256(tmp2);
|
||||
|
Reference in New Issue
Block a user