borrow cache prefetching tricks from the Marvell port, seem to improve performance by 5-28%

This commit is contained in:
Konstantinos Margaritis
2021-01-15 17:42:11 +02:00
committed by Konstantinos Margaritis
parent 700a0a093c
commit e830470028
4 changed files with 37 additions and 7 deletions

View File

@@ -634,6 +634,11 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
assert(ISALIGNED_N(q->state, 2));
u32 s = *(u16 *)q->state;
__builtin_prefetch(&m->remap[0]);
__builtin_prefetch(&m->remap[64]);
__builtin_prefetch(&m->remap[128]);
__builtin_prefetch(&m->remap[192]);
if (q->report_current) {
assert(s);
assert(get_aux(m, s)->accept);
@@ -790,6 +795,11 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
u32 s = *(u8 *)q->state;
__builtin_prefetch(&m->remap[0]);
__builtin_prefetch(&m->remap[64]);
__builtin_prefetch(&m->remap[128]);
__builtin_prefetch(&m->remap[192]);
if (q->report_current) {
assert(s);
assert(s >= m->accept_limit_8);