mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2026-01-02 14:44:41 +03:00
teddy: apply poison mask after prep_conf_ work
This simplifies the code, and removes all the all-ones p_mask uses, which we were otherwise trusting the optimizer to remove.
This commit is contained in:
committed by
Matthew Barr
parent
d574557200
commit
3d9a60d023
124
src/fdr/teddy.c
124
src/fdr/teddy.c
@@ -125,36 +125,34 @@ do { \
|
||||
#endif
|
||||
|
||||
static really_inline
|
||||
m128 prep_conf_teddy_m1(const m128 *maskBase, m128 p_mask, m128 val) {
|
||||
m128 prep_conf_teddy_m1(const m128 *maskBase, m128 val) {
|
||||
m128 mask = set16x8(0xf);
|
||||
m128 lo = and128(val, mask);
|
||||
m128 hi = and128(rshift64_m128(val, 4), mask);
|
||||
return and128(and128(pshufb(maskBase[0*2], lo),
|
||||
pshufb(maskBase[0*2+1], hi)), p_mask);
|
||||
return and128(pshufb(maskBase[0*2], lo), pshufb(maskBase[0*2+1], hi));
|
||||
}
|
||||
|
||||
static really_inline
|
||||
m128 prep_conf_teddy_m2(const m128 *maskBase, m128 *old_1, m128 p_mask,
|
||||
m128 val) {
|
||||
m128 prep_conf_teddy_m2(const m128 *maskBase, m128 *old_1, m128 val) {
|
||||
m128 mask = set16x8(0xf);
|
||||
m128 lo = and128(val, mask);
|
||||
m128 hi = and128(rshift64_m128(val, 4), mask);
|
||||
m128 r = prep_conf_teddy_m1(maskBase, p_mask, val);
|
||||
m128 r = prep_conf_teddy_m1(maskBase, val);
|
||||
|
||||
m128 res_1 = and128(pshufb(maskBase[1*2], lo),
|
||||
pshufb(maskBase[1*2+1], hi));
|
||||
m128 res_shifted_1 = palignr(res_1, *old_1, 16-1);
|
||||
*old_1 = res_1;
|
||||
return and128(and128(r, p_mask), res_shifted_1);
|
||||
return and128(r, res_shifted_1);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
m128 prep_conf_teddy_m3(const m128 *maskBase, m128 *old_1, m128 *old_2,
|
||||
m128 p_mask, m128 val) {
|
||||
m128 val) {
|
||||
m128 mask = set16x8(0xf);
|
||||
m128 lo = and128(val, mask);
|
||||
m128 hi = and128(rshift64_m128(val, 4), mask);
|
||||
m128 r = prep_conf_teddy_m2(maskBase, old_1, p_mask, val);
|
||||
m128 r = prep_conf_teddy_m2(maskBase, old_1, val);
|
||||
|
||||
m128 res_2 = and128(pshufb(maskBase[2*2], lo),
|
||||
pshufb(maskBase[2*2+1], hi));
|
||||
@@ -165,11 +163,11 @@ m128 prep_conf_teddy_m3(const m128 *maskBase, m128 *old_1, m128 *old_2,
|
||||
|
||||
static really_inline
|
||||
m128 prep_conf_teddy_m4(const m128 *maskBase, m128 *old_1, m128 *old_2,
|
||||
m128 *old_3, m128 p_mask, m128 val) {
|
||||
m128 *old_3, m128 val) {
|
||||
m128 mask = set16x8(0xf);
|
||||
m128 lo = and128(val, mask);
|
||||
m128 hi = and128(rshift64_m128(val, 4), mask);
|
||||
m128 r = prep_conf_teddy_m3(maskBase, old_1, old_2, p_mask, val);
|
||||
m128 r = prep_conf_teddy_m3(maskBase, old_1, old_2, val);
|
||||
|
||||
m128 res_3 = and128(pshufb(maskBase[3*2], lo),
|
||||
pshufb(maskBase[3*2+1], hi));
|
||||
@@ -201,13 +199,14 @@ hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr,
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 1);
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, p_mask, val_0);
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, val_0);
|
||||
r_0 = and128(r_0, p_mask);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit1_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
if (ptr + 16 < buf_end) {
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr));
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit1_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
@@ -215,9 +214,9 @@ hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr,
|
||||
for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
||||
__builtin_prefetch(ptr + (iterBytes*4));
|
||||
CHECK_FLOOD;
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr));
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit1_teddy);
|
||||
m128 r_1 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr + 16));
|
||||
m128 r_1 = prep_conf_teddy_m1(maskBase, load128(ptr + 16));
|
||||
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit1_teddy);
|
||||
}
|
||||
|
||||
@@ -225,7 +224,8 @@ hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr,
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 1);
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, p_mask, val_0);
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, val_0);
|
||||
r_0 = and128(r_0, p_mask);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit1_teddy);
|
||||
}
|
||||
|
||||
@@ -255,13 +255,14 @@ hwlm_error_t fdr_exec_teddy_msks1_pck(const struct FDR *fdr,
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 1);
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, p_mask, val_0);
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, val_0);
|
||||
r_0 = and128(r_0, p_mask);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
if (ptr + 16 < buf_end) {
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr));
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
@@ -269,9 +270,9 @@ hwlm_error_t fdr_exec_teddy_msks1_pck(const struct FDR *fdr,
|
||||
for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
||||
__builtin_prefetch(ptr + (iterBytes*4));
|
||||
CHECK_FLOOD;
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr));
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||
m128 r_1 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr + 16));
|
||||
m128 r_1 = prep_conf_teddy_m1(maskBase, load128(ptr + 16));
|
||||
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||
}
|
||||
|
||||
@@ -279,7 +280,8 @@ hwlm_error_t fdr_exec_teddy_msks1_pck(const struct FDR *fdr,
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 1);
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, p_mask, val_0);
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, val_0);
|
||||
r_0 = and128(r_0, p_mask);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
}
|
||||
|
||||
@@ -310,14 +312,14 @@ hwlm_error_t fdr_exec_teddy_msks2(const struct FDR *fdr,
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 2);
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, p_mask, val_0);
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, val_0);
|
||||
r_0 = and128(r_0, p_mask);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
if (ptr + 16 < buf_end) {
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(),
|
||||
load128(ptr));
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
@@ -325,11 +327,9 @@ hwlm_error_t fdr_exec_teddy_msks2(const struct FDR *fdr,
|
||||
for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
||||
__builtin_prefetch(ptr + (iterBytes*4));
|
||||
CHECK_FLOOD;
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(),
|
||||
load128(ptr));
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
||||
m128 r_1 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(),
|
||||
load128(ptr + 16));
|
||||
m128 r_1 = prep_conf_teddy_m2(maskBase, &res_old_1, load128(ptr + 16));
|
||||
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
||||
}
|
||||
|
||||
@@ -337,7 +337,8 @@ hwlm_error_t fdr_exec_teddy_msks2(const struct FDR *fdr,
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 2);
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, p_mask, val_0);
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, val_0);
|
||||
r_0 = and128(r_0, p_mask);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
}
|
||||
|
||||
@@ -368,14 +369,14 @@ hwlm_error_t fdr_exec_teddy_msks2_pck(const struct FDR *fdr,
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 2);
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, p_mask, val_0);
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, val_0);
|
||||
r_0 = and128(r_0, p_mask);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
if (ptr + 16 < buf_end) {
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(),
|
||||
load128(ptr));
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
@@ -383,11 +384,9 @@ hwlm_error_t fdr_exec_teddy_msks2_pck(const struct FDR *fdr,
|
||||
for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
||||
__builtin_prefetch(ptr + (iterBytes*4));
|
||||
CHECK_FLOOD;
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(),
|
||||
load128(ptr));
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||
m128 r_1 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(),
|
||||
load128(ptr + 16));
|
||||
m128 r_1 = prep_conf_teddy_m2(maskBase, &res_old_1, load128(ptr + 16));
|
||||
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||
}
|
||||
|
||||
@@ -395,7 +394,8 @@ hwlm_error_t fdr_exec_teddy_msks2_pck(const struct FDR *fdr,
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 2);
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, p_mask, val_0);
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, val_0);
|
||||
r_0 = and128(r_0, p_mask);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
}
|
||||
|
||||
@@ -428,14 +428,15 @@ hwlm_error_t fdr_exec_teddy_msks3(const struct FDR *fdr,
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 3);
|
||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
p_mask, val_0);
|
||||
val_0);
|
||||
r_0 = and128(r_0, p_mask);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
if (ptr + 16 < buf_end) {
|
||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
ones128(), load128(ptr));
|
||||
load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
@@ -444,10 +445,10 @@ hwlm_error_t fdr_exec_teddy_msks3(const struct FDR *fdr,
|
||||
__builtin_prefetch(ptr + (iterBytes*4));
|
||||
CHECK_FLOOD;
|
||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
ones128(), load128(ptr));
|
||||
load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
||||
m128 r_1 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
ones128(), load128(ptr + 16));
|
||||
load128(ptr + 16));
|
||||
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
||||
}
|
||||
|
||||
@@ -455,8 +456,8 @@ hwlm_error_t fdr_exec_teddy_msks3(const struct FDR *fdr,
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 3);
|
||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
p_mask, val_0);
|
||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, val_0);
|
||||
r_0 = and128(r_0, p_mask);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
}
|
||||
|
||||
@@ -489,14 +490,15 @@ hwlm_error_t fdr_exec_teddy_msks3_pck(const struct FDR *fdr,
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 3);
|
||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
p_mask, val_0);
|
||||
val_0);
|
||||
r_0 = and128(r_0, p_mask);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
if (ptr + 16 < buf_end) {
|
||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
ones128(), load128(ptr));
|
||||
load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
@@ -505,10 +507,10 @@ hwlm_error_t fdr_exec_teddy_msks3_pck(const struct FDR *fdr,
|
||||
__builtin_prefetch(ptr + (iterBytes*4));
|
||||
CHECK_FLOOD;
|
||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
ones128(), load128(ptr));
|
||||
load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||
m128 r_1 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
ones128(), load128(ptr + 16));
|
||||
load128(ptr + 16));
|
||||
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||
}
|
||||
|
||||
@@ -516,8 +518,8 @@ hwlm_error_t fdr_exec_teddy_msks3_pck(const struct FDR *fdr,
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 3);
|
||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
p_mask, val_0);
|
||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, val_0);
|
||||
r_0 = and128(r_0, p_mask);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
}
|
||||
|
||||
@@ -551,14 +553,15 @@ hwlm_error_t fdr_exec_teddy_msks4(const struct FDR *fdr,
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 4);
|
||||
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, p_mask, val_0);
|
||||
&res_old_3, val_0);
|
||||
r_0 = and128(r_0, p_mask);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
if (ptr + 16 < buf_end) {
|
||||
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, ones128(), load128(ptr));
|
||||
&res_old_3, load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
@@ -567,10 +570,10 @@ hwlm_error_t fdr_exec_teddy_msks4(const struct FDR *fdr,
|
||||
__builtin_prefetch(ptr + (iterBytes*4));
|
||||
CHECK_FLOOD;
|
||||
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, ones128(), load128(ptr));
|
||||
&res_old_3, load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
||||
m128 r_1 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, ones128(), load128(ptr + 16));
|
||||
&res_old_3, load128(ptr + 16));
|
||||
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
||||
}
|
||||
|
||||
@@ -579,7 +582,8 @@ hwlm_error_t fdr_exec_teddy_msks4(const struct FDR *fdr,
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 4);
|
||||
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, p_mask, val_0);
|
||||
&res_old_3, val_0);
|
||||
r_0 = and128(r_0, p_mask);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
}
|
||||
|
||||
@@ -613,14 +617,15 @@ hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr,
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 4);
|
||||
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, p_mask, val_0);
|
||||
&res_old_3, val_0);
|
||||
r_0 = and128(r_0, p_mask);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
if (ptr + 16 < buf_end) {
|
||||
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, ones128(), load128(ptr));
|
||||
&res_old_3, load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
@@ -629,10 +634,10 @@ hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr,
|
||||
__builtin_prefetch(ptr + (iterBytes*4));
|
||||
CHECK_FLOOD;
|
||||
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, ones128(), load128(ptr));
|
||||
&res_old_3, load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||
m128 r_1 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, ones128(), load128(ptr + 16));
|
||||
&res_old_3, load128(ptr + 16));
|
||||
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||
}
|
||||
|
||||
@@ -641,7 +646,8 @@ hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr,
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 4);
|
||||
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, p_mask, val_0);
|
||||
&res_old_3, val_0);
|
||||
r_0 = and128(r_0, p_mask);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user