prefetch works best when addresses are 64-byte aligned

This commit is contained in:
Konstantinos Margaritis
2021-03-12 10:10:53 +02:00
parent 521f233cfd
commit d3ff893871
6 changed files with 26 additions and 22 deletions

View File

@@ -634,10 +634,11 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
assert(ISALIGNED_N(q->state, 2));
u32 s = *(u16 *)q->state;
__builtin_prefetch(&m->remap[0]);
__builtin_prefetch(&m->remap[64]);
__builtin_prefetch(&m->remap[128]);
__builtin_prefetch(&m->remap[192]);
const u8 *base = ROUNDDOWN_PTR(&m->remap[0], 64);
__builtin_prefetch(base);
__builtin_prefetch(base + 64);
__builtin_prefetch(base + 128);
__builtin_prefetch(base + 192);
if (q->report_current) {
assert(s);
@@ -795,10 +796,11 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
u32 s = *(u8 *)q->state;
__builtin_prefetch(&m->remap[0]);
__builtin_prefetch(&m->remap[64]);
__builtin_prefetch(&m->remap[128]);
__builtin_prefetch(&m->remap[192]);
const u8 *base = ROUNDDOWN_PTR(&m->remap[0], 64);
__builtin_prefetch(base);
__builtin_prefetch(base + 64);
__builtin_prefetch(base + 128);
__builtin_prefetch(base + 192);
if (q->report_current) {
assert(s);

View File

@@ -889,10 +889,11 @@ char nfaExecMcSheng16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
return MO_ALIVE;
}
__builtin_prefetch(&m->remap[0]);
__builtin_prefetch(&m->remap[64]);
__builtin_prefetch(&m->remap[128]);
__builtin_prefetch(&m->remap[192]);
const u8 *base = ROUNDDOWN_PTR(&m->remap[0], 64);
__builtin_prefetch(base);
__builtin_prefetch(base + 64);
__builtin_prefetch(base + 128);
__builtin_prefetch(base + 192);
while (1) {
assert(q->cur < q->end);
@@ -1022,10 +1023,11 @@ char nfaExecMcSheng8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
return MO_ALIVE;
}
__builtin_prefetch(&m->remap[0]);
__builtin_prefetch(&m->remap[64]);
__builtin_prefetch(&m->remap[128]);
__builtin_prefetch(&m->remap[192]);
const u8 *base = ROUNDDOWN_PTR(&m->remap[0], 64);
__builtin_prefetch(base);
__builtin_prefetch(base + 64);
__builtin_prefetch(base + 128);
__builtin_prefetch(base + 192);
while (1) {
DEBUG_PRINTF("%s @ %llu\n", q->items[q->cur].type == MQE_TOP ? "TOP" :

View File

@@ -179,7 +179,7 @@ const u8 *shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
const u8 *last_block = buf_end - 16;
for (const u8 *itPtr = buf; itPtr + 4*16 <= last_block; itPtr += 4*16) {
for (const u8 *itPtr = ROUNDDOWN_PTR(buf, 64); itPtr + 4*16 <= last_block; itPtr += 4*16) {
__builtin_prefetch(itPtr);
}
while (buf < last_block) {