Apply some consistency to the names we give shifts

This commit is contained in:
Matthew Barr
2016-06-15 11:02:42 +10:00
parent c76ff285e7
commit e3d416a6ea
13 changed files with 127 additions and 156 deletions

View File

@@ -75,7 +75,7 @@ struct proto_cache {
// Shift macros for Limited NFAs. Defined in terms of uniform ops.
// LimExNFAxxx ptr in 'limex' and the current state in 's'
#define NFA_EXEC_LIM_SHIFT(nels_type, nels_i) \
(JOIN(shift_, nels_type)( \
(JOIN(lshift_, nels_type)( \
JOIN(and_, nels_type)(s, \
JOIN(load_, nels_type)(&limex->shift[nels_i])), \
limex->shiftAmount[nels_i]))

View File

@@ -40,7 +40,6 @@
#include "shufti_common.h"
/** \brief Naive byte-by-byte implementation. */
static really_inline
const u8 *shuftiRevSlow(const u8 *lo, const u8 *hi, const u8 *buf,
@@ -234,7 +233,7 @@ const u8 *fwdBlock2(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo, m128 mask2_hi,
m128 c2_lo = pshufb(mask2_lo, chars_lo);
m128 c2_hi = pshufb(mask2_hi, chars_hi);
m128 t2 = or128(t, shiftRight8Bits(or128(c2_lo, c2_hi)));
m128 t2 = or128(t, rshiftbyte_m128(or128(c2_lo, c2_hi), 1));
#ifdef DEBUG
DEBUG_PRINTF(" c2_lo: "); dumpMsk128(c2_lo); printf("\n");
@@ -471,7 +470,7 @@ const u8 *fwdBlock2(m256 mask1_lo, m256 mask1_hi, m256 mask2_lo, m256 mask2_hi,
m256 c2_lo = vpshufb(mask2_lo, chars_lo);
m256 c2_hi = vpshufb(mask2_hi, chars_hi);
m256 t2 = or256(t, shift256Right8Bits(or256(c2_lo, c2_hi)));
m256 t2 = or256(t, rshift128_m256(or256(c2_lo, c2_hi), 1));
#ifdef DEBUG
DEBUG_PRINTF(" c2_lo: "); dumpMsk256(c2_lo); printf("\n");

View File

@@ -93,7 +93,7 @@ DUMP_MSK(128)
#endif
#define GET_LO_4(chars) and128(chars, low4bits)
#define GET_HI_4(chars) rshift2x64(andnot128(low4bits, chars), 4)
#define GET_HI_4(chars) rshift64_m128(andnot128(low4bits, chars), 4)
static really_inline
u32 block(m128 mask_lo, m128 mask_hi, m128 chars, const m128 low4bits,
@@ -119,7 +119,7 @@ DUMP_MSK(256)
#endif
#define GET_LO_4(chars) and256(chars, low4bits)
#define GET_HI_4(chars) rshift4x64(andnot256(low4bits, chars), 4)
#define GET_HI_4(chars) rshift64_m256(andnot256(low4bits, chars), 4)
static really_inline
u32 block(m256 mask_lo, m256 mask_hi, m256 chars, const m256 low4bits,

View File

@@ -48,7 +48,6 @@ const u8 *firstMatch(const u8 *buf, u32 z) {
return NULL; // no match
}
#define shift128r(a, b) _mm_srli_epi64((a), (b))
static really_inline
u32 block(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, m128 v) {
@@ -59,7 +58,7 @@ u32 block(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, m128 v) {
m128 shuf1 = pshufb(shuf_mask_lo_highclear, v);
m128 t1 = xor128(v, highconst);
m128 shuf2 = pshufb(shuf_mask_lo_highset, t1);
m128 t2 = andnot128(highconst, shift128r(v, 4));
m128 t2 = andnot128(highconst, rshift64_m128(v, 4));
m128 shuf3 = pshufb(shuf_mask_hi, t2);
m128 tmp = and128(or128(shuf1, shuf2), shuf3);
m128 tmp2 = eq128(tmp, zeroes128());
@@ -102,7 +101,6 @@ const u8 *firstMatch(const u8 *buf, u32 z) {
return NULL; // no match
}
#define shift256r(a, b) _mm256_srli_epi64((a), (b))
static really_inline
u32 block(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, m256 v) {
@@ -113,7 +111,7 @@ u32 block(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, m256 v) {
m256 shuf1 = vpshufb(shuf_mask_lo_highclear, v);
m256 t1 = xor256(v, highconst);
m256 shuf2 = vpshufb(shuf_mask_lo_highset, t1);
m256 t2 = andnot256(highconst, shift256r(v, 4));
m256 t2 = andnot256(highconst, rshift64_m256(v, 4));
m256 shuf3 = vpshufb(shuf_mask_hi, t2);
m256 tmp = and256(or256(shuf1, shuf2), shuf3);
m256 tmp2 = eq256(tmp, zeroes256());

View File

@@ -138,7 +138,7 @@ const u8 *dvermSearchAligned(m128 chars1, m128 chars2, u8 c1, u8 c2,
for (; buf + 16 < buf_end; buf += 16) {
m128 data = load128(buf);
u32 z = movemask128(and128(eq128(chars1, data),
shiftRight8Bits(eq128(chars2, data))));
rshiftbyte_m128(eq128(chars2, data), 1)));
if (buf[15] == c1 && buf[16] == c2) {
z |= (1 << 15);
}
@@ -161,7 +161,7 @@ const u8 *dvermSearchAlignedNocase(m128 chars1, m128 chars2, u8 c1, u8 c2,
m128 data = load128(buf);
m128 v = and128(casemask, data);
u32 z = movemask128(and128(eq128(chars1, v),
shiftRight8Bits(eq128(chars2, v))));
rshiftbyte_m128(eq128(chars2, v), 1)));
if ((buf[15] & CASE_CLEAR) == c1 && (buf[16] & CASE_CLEAR) == c2) {
z |= (1 << 15);
}
@@ -182,8 +182,10 @@ const u8 *dvermSearchAlignedMasked(m128 chars1, m128 chars2,
for (; buf + 16 < buf_end; buf += 16) {
m128 data = load128(buf);
u32 z = movemask128(and128(eq128(chars1, and128(data, mask1)),
shiftRight8Bits(eq128(chars2, and128(data, mask2)))));
m128 v1 = eq128(chars1, and128(data, mask1));
m128 v2 = eq128(chars2, and128(data, mask2));
u32 z = movemask128(and128(v1, rshiftbyte_m128(v2, 1)));
if ((buf[15] & m1) == c1 && (buf[16] & m2) == c2) {
z |= (1 << 15);
}
@@ -201,7 +203,7 @@ static really_inline
const u8 *dvermPrecondition(m128 chars1, m128 chars2, const u8 *buf) {
m128 data = loadu128(buf); // unaligned
u32 z = movemask128(and128(eq128(chars1, data),
shiftRight8Bits(eq128(chars2, data))));
rshiftbyte_m128(eq128(chars2, data), 1)));
/* no fixup of the boundary required - the aligned run will pick it up */
if (unlikely(z)) {
@@ -219,7 +221,7 @@ const u8 *dvermPreconditionNocase(m128 chars1, m128 chars2, const u8 *buf) {
m128 data = loadu128(buf); // unaligned
m128 v = and128(casemask, data);
u32 z = movemask128(and128(eq128(chars1, v),
shiftRight8Bits(eq128(chars2, v))));
rshiftbyte_m128(eq128(chars2, v), 1)));
/* no fixup of the boundary required - the aligned run will pick it up */
if (unlikely(z)) {
@@ -234,8 +236,9 @@ static really_inline
const u8 *dvermPreconditionMasked(m128 chars1, m128 chars2,
m128 mask1, m128 mask2, const u8 *buf) {
m128 data = loadu128(buf); // unaligned
u32 z = movemask128(and128(eq128(chars1, and128(data, mask1)),
shiftRight8Bits(eq128(chars2, and128(data, mask2)))));
m128 v1 = eq128(chars1, and128(data, mask1));
m128 v2 = eq128(chars2, and128(data, mask2));
u32 z = movemask128(and128(v1, rshiftbyte_m128(v2, 1)));
/* no fixup of the boundary required - the aligned run will pick it up */
if (unlikely(z)) {
@@ -324,7 +327,7 @@ const u8 *rdvermSearchAligned(m128 chars1, m128 chars2, u8 c1, u8 c2,
for (; buf + 16 < buf_end; buf_end -= 16) {
m128 data = load128(buf_end - 16);
u32 z = movemask128(and128(eq128(chars2, data),
shiftLeft8Bits(eq128(chars1, data))));
lshiftbyte_m128(eq128(chars1, data), 1)));
if (buf_end[-17] == c1 && buf_end[-16] == c2) {
z |= 1;
}
@@ -345,7 +348,7 @@ const u8 *rdvermSearchAlignedNocase(m128 chars1, m128 chars2, u8 c1, u8 c2,
m128 data = load128(buf_end - 16);
m128 v = and128(casemask, data);
u32 z = movemask128(and128(eq128(chars2, v),
shiftLeft8Bits(eq128(chars1, v))));
lshiftbyte_m128(eq128(chars1, v), 1)));
if ((buf_end[-17] & CASE_CLEAR) == c1
&& (buf_end[-16] & CASE_CLEAR) == c2) {
z |= 1;
@@ -362,7 +365,7 @@ static really_inline
const u8 *rdvermPrecondition(m128 chars1, m128 chars2, const u8 *buf) {
m128 data = loadu128(buf);
u32 z = movemask128(and128(eq128(chars2, data),
shiftLeft8Bits(eq128(chars1, data))));
lshiftbyte_m128(eq128(chars1, data), 1)));
/* no fixup of the boundary required - the aligned run will pick it up */
if (unlikely(z)) {
@@ -380,7 +383,7 @@ const u8 *rdvermPreconditionNocase(m128 chars1, m128 chars2, const u8 *buf) {
m128 data = loadu128(buf);
m128 v = and128(casemask, data);
u32 z = movemask128(and128(eq128(chars2, v),
shiftLeft8Bits(eq128(chars1, v))));
lshiftbyte_m128(eq128(chars1, v), 1)));
/* no fixup of the boundary required - the aligned run will pick it up */
if (unlikely(z)) {
return lastMatchOffset(buf + 16, z);