Apply some consistency to the names we give shifts

This commit is contained in:
Matthew Barr
2016-06-15 11:02:42 +10:00
parent c76ff285e7
commit e3d416a6ea
13 changed files with 127 additions and 156 deletions

View File

@@ -131,7 +131,7 @@ m128 getInitState(const struct FDR *fdr, u8 len_history, const u8 *ft,
u32 tmp = lv_u16(z->start + z->shift - 1, z->buf, z->end + 1);
tmp &= fdr->domainMask;
s = *((const m128 *)ft + tmp);
s = shiftRight8Bits(s);
s = rshiftbyte_m128(s, 1);
} else {
s = fdr->start;
}
@@ -185,20 +185,20 @@ void get_conf_stride_1(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr,
m128 st14 = *(const m128 *)(ft + v14*8);
m128 st15 = *(const m128 *)(ft + v15*8);
st1 = byteShiftLeft128(st1, 1);
st2 = byteShiftLeft128(st2, 2);
st3 = byteShiftLeft128(st3, 3);
st4 = byteShiftLeft128(st4, 4);
st5 = byteShiftLeft128(st5, 5);
st6 = byteShiftLeft128(st6, 6);
st7 = byteShiftLeft128(st7, 7);
st9 = byteShiftLeft128(st9, 1);
st10 = byteShiftLeft128(st10, 2);
st11 = byteShiftLeft128(st11, 3);
st12 = byteShiftLeft128(st12, 4);
st13 = byteShiftLeft128(st13, 5);
st14 = byteShiftLeft128(st14, 6);
st15 = byteShiftLeft128(st15, 7);
st1 = lshiftbyte_m128(st1, 1);
st2 = lshiftbyte_m128(st2, 2);
st3 = lshiftbyte_m128(st3, 3);
st4 = lshiftbyte_m128(st4, 4);
st5 = lshiftbyte_m128(st5, 5);
st6 = lshiftbyte_m128(st6, 6);
st7 = lshiftbyte_m128(st7, 7);
st9 = lshiftbyte_m128(st9, 1);
st10 = lshiftbyte_m128(st10, 2);
st11 = lshiftbyte_m128(st11, 3);
st12 = lshiftbyte_m128(st12, 4);
st13 = lshiftbyte_m128(st13, 5);
st14 = lshiftbyte_m128(st14, 6);
st15 = lshiftbyte_m128(st15, 7);
*s = or128(*s, st0);
*s = or128(*s, st1);
@@ -209,7 +209,7 @@ void get_conf_stride_1(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr,
*s = or128(*s, st6);
*s = or128(*s, st7);
*conf0 = movq(*s);
*s = byteShiftRight128(*s, 8);
*s = rshiftbyte_m128(*s, 8);
*conf0 ^= ~0ULL;
*s = or128(*s, st8);
@@ -221,7 +221,7 @@ void get_conf_stride_1(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr,
*s = or128(*s, st14);
*s = or128(*s, st15);
*conf8 = movq(*s);
*s = byteShiftRight128(*s, 8);
*s = rshiftbyte_m128(*s, 8);
*conf8 ^= ~0ULL;
}
@@ -252,19 +252,19 @@ void get_conf_stride_2(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr,
m128 st12 = *(const m128 *)(ft + v12*8);
m128 st14 = *(const m128 *)(ft + v14*8);
st2 = byteShiftLeft128(st2, 2);
st4 = byteShiftLeft128(st4, 4);
st6 = byteShiftLeft128(st6, 6);
st10 = byteShiftLeft128(st10, 2);
st12 = byteShiftLeft128(st12, 4);
st14 = byteShiftLeft128(st14, 6);
st2 = lshiftbyte_m128(st2, 2);
st4 = lshiftbyte_m128(st4, 4);
st6 = lshiftbyte_m128(st6, 6);
st10 = lshiftbyte_m128(st10, 2);
st12 = lshiftbyte_m128(st12, 4);
st14 = lshiftbyte_m128(st14, 6);
*s = or128(*s, st0);
*s = or128(*s, st2);
*s = or128(*s, st4);
*s = or128(*s, st6);
*conf0 = movq(*s);
*s = byteShiftRight128(*s, 8);
*s = rshiftbyte_m128(*s, 8);
*conf0 ^= ~0ULL;
*s = or128(*s, st8);
@@ -272,7 +272,7 @@ void get_conf_stride_2(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr,
*s = or128(*s, st12);
*s = or128(*s, st14);
*conf8 = movq(*s);
*s = byteShiftRight128(*s, 8);
*s = rshiftbyte_m128(*s, 8);
*conf8 ^= ~0ULL;
}
@@ -295,19 +295,19 @@ void get_conf_stride_4(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr,
m128 st8 = *(const m128 *)(ft + v8*8);
m128 st12 = *(const m128 *)(ft + v12*8);
st4 = byteShiftLeft128(st4, 4);
st12 = byteShiftLeft128(st12, 4);
st4 = lshiftbyte_m128(st4, 4);
st12 = lshiftbyte_m128(st12, 4);
*s = or128(*s, st0);
*s = or128(*s, st4);
*conf0 = movq(*s);
*s = byteShiftRight128(*s, 8);
*s = rshiftbyte_m128(*s, 8);
*conf0 ^= ~0ULL;
*s = or128(*s, st8);
*s = or128(*s, st12);
*conf8 = movq(*s);
*s = byteShiftRight128(*s, 8);
*s = rshiftbyte_m128(*s, 8);
*conf8 ^= ~0ULL;
}

View File

@@ -79,7 +79,7 @@ const u8 ALIGN_DIRECTIVE p_mask_arr[17][32] = {
do { \
if (unlikely(isnonzero128(var))) { \
u64a lo = movq(var); \
u64a hi = movq(byteShiftRight128(var, 8)); \
u64a hi = movq(rshiftbyte_m128(var, 8)); \
if (unlikely(lo)) { \
conf_fn(&lo, bucket, offset, confBase, reason, a, ptr, \
control, &last_match); \
@@ -97,9 +97,9 @@ do { \
do { \
if (unlikely(isnonzero128(var))) { \
u32 part1 = movd(var); \
u32 part2 = movd(byteShiftRight128(var, 4)); \
u32 part3 = movd(byteShiftRight128(var, 8)); \
u32 part4 = movd(byteShiftRight128(var, 12)); \
u32 part2 = movd(rshiftbyte_m128(var, 4)); \
u32 part3 = movd(rshiftbyte_m128(var, 8)); \
u32 part4 = movd(rshiftbyte_m128(var, 12)); \
if (unlikely(part1)) { \
conf_fn(&part1, bucket, offset, confBase, reason, a, ptr, \
control, &last_match); \
@@ -128,7 +128,7 @@ static really_inline
m128 prep_conf_teddy_m1(const m128 *maskBase, m128 p_mask, m128 val) {
m128 mask = set16x8(0xf);
m128 lo = and128(val, mask);
m128 hi = and128(rshift2x64(val, 4), mask);
m128 hi = and128(rshift64_m128(val, 4), mask);
return and128(and128(pshufb(maskBase[0*2], lo),
pshufb(maskBase[0*2+1], hi)), p_mask);
}
@@ -138,7 +138,7 @@ m128 prep_conf_teddy_m2(const m128 *maskBase, m128 *old_1, m128 p_mask,
m128 val) {
m128 mask = set16x8(0xf);
m128 lo = and128(val, mask);
m128 hi = and128(rshift2x64(val, 4), mask);
m128 hi = and128(rshift64_m128(val, 4), mask);
m128 r = prep_conf_teddy_m1(maskBase, p_mask, val);
m128 res_1 = and128(pshufb(maskBase[1*2], lo),
@@ -153,7 +153,7 @@ m128 prep_conf_teddy_m3(const m128 *maskBase, m128 *old_1, m128 *old_2,
m128 p_mask, m128 val) {
m128 mask = set16x8(0xf);
m128 lo = and128(val, mask);
m128 hi = and128(rshift2x64(val, 4), mask);
m128 hi = and128(rshift64_m128(val, 4), mask);
m128 r = prep_conf_teddy_m2(maskBase, old_1, p_mask, val);
m128 res_2 = and128(pshufb(maskBase[2*2], lo),
@@ -168,7 +168,7 @@ m128 prep_conf_teddy_m4(const m128 *maskBase, m128 *old_1, m128 *old_2,
m128 *old_3, m128 p_mask, m128 val) {
m128 mask = set16x8(0xf);
m128 lo = and128(val, mask);
m128 hi = and128(rshift2x64(val, 4), mask);
m128 hi = and128(rshift64_m128(val, 4), mask);
m128 r = prep_conf_teddy_m3(maskBase, old_1, old_2, p_mask, val);
m128 res_3 = and128(pshufb(maskBase[3*2], lo),

View File

@@ -371,7 +371,7 @@ void bit_array_fast_teddy(m128 var, u16 *bitArr, u32 *arrCnt, u32 offset) {
64 * (offset);
*arrCnt += 1;
}
u64a part_1 = movq(byteShiftRight128(var, 8));
u64a part_1 = movq(rshiftbyte_m128(var, 8));
while (unlikely(part_1)) {
bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_1) +
64 * (offset + 1);
@@ -384,19 +384,19 @@ void bit_array_fast_teddy(m128 var, u16 *bitArr, u32 *arrCnt, u32 offset) {
32 * (offset * 2);
*arrCnt += 1;
}
u32 part_1 = movd(byteShiftRight128(var, 4));
u32 part_1 = movd(rshiftbyte_m128(var, 4));
while (unlikely(part_1)) {
bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_1) +
32 * (offset * 2 + 1);
*arrCnt += 1;
}
u32 part_2 = movd(byteShiftRight128(var, 8));
u32 part_2 = movd(rshiftbyte_m128(var, 8));
while (unlikely(part_2)) {
bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_2) +
32 * (offset * 2 + 2);
*arrCnt += 1;
}
u32 part_3 = movd(byteShiftRight128(var, 12));
u32 part_3 = movd(rshiftbyte_m128(var, 12));
while (unlikely(part_3)) {
bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_3) +
32 * (offset * 2 + 3);
@@ -410,7 +410,7 @@ static really_inline
m256 prep_conf_fat_teddy_m1(const m256 *maskBase, m256 p_mask, m256 val) {
m256 mask = set32x8(0xf);
m256 lo = and256(val, mask);
m256 hi = and256(rshift4x64(val, 4), mask);
m256 hi = and256(rshift64_m256(val, 4), mask);
return and256(and256(vpshufb(maskBase[0*2], lo),
vpshufb(maskBase[0*2+1], hi)), p_mask);
}
@@ -420,7 +420,7 @@ m256 prep_conf_fat_teddy_m2(const m256 *maskBase, m256 *old_1, m256 p_mask,
m256 val) {
m256 mask = set32x8(0xf);
m256 lo = and256(val, mask);
m256 hi = and256(rshift4x64(val, 4), mask);
m256 hi = and256(rshift64_m256(val, 4), mask);
m256 r = prep_conf_fat_teddy_m1(maskBase, p_mask, val);
m256 res_1 = and256(vpshufb(maskBase[1*2], lo),
@@ -435,7 +435,7 @@ m256 prep_conf_fat_teddy_m3(const m256 *maskBase, m256 *old_1, m256 *old_2,
m256 p_mask, m256 val) {
m256 mask = set32x8(0xf);
m256 lo = and256(val, mask);
m256 hi = and256(rshift4x64(val, 4), mask);
m256 hi = and256(rshift64_m256(val, 4), mask);
m256 r = prep_conf_fat_teddy_m2(maskBase, old_1, p_mask, val);
m256 res_2 = and256(vpshufb(maskBase[2*2], lo),
@@ -450,7 +450,7 @@ m256 prep_conf_fat_teddy_m4(const m256 *maskBase, m256 *old_1, m256 *old_2,
m256 *old_3, m256 p_mask, m256 val) {
m256 mask = set32x8(0xf);
m256 lo = and256(val, mask);
m256 hi = and256(rshift4x64(val, 4), mask);
m256 hi = and256(rshift64_m256(val, 4), mask);
m256 r = prep_conf_fat_teddy_m3(maskBase, old_1, old_2, p_mask, val);
m256 res_3 = and256(vpshufb(maskBase[3*2], lo),
@@ -464,7 +464,7 @@ static really_inline
m256 prep_conf_fast_teddy_m1(m256 val, m256 mask, m256 maskLo, m256 maskHi,
m256 p_mask) {
m256 lo = and256(val, mask);
m256 hi = and256(rshift4x64(val, 4), mask);
m256 hi = and256(rshift64_m256(val, 4), mask);
m256 res = and256(vpshufb(maskLo, lo), vpshufb(maskHi, hi));
return and256(res, p_mask);
}