mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-11-18 18:20:35 +03:00
Speed up truffle with 256b TBL instructions
256b wide SVE vectors allow some simplification of truffle. Up to 40% speedup on graviton3. Going from 12500 MB/s to 17000 MB/s onhe microbenchmark. SVE2 also offer this capability for 128b vector with a speedup around 25% compared to normal SVE Add unit tests and benchmark for this wide variant Signed-off-by: Yoan Picchi <yoan.picchi@arm.com>
This commit is contained in:
@@ -73,7 +73,12 @@ const u8 *run_hwlm_accel(const union AccelAux *aux, const u8 *ptr,
|
||||
return shuftiExec(aux->shufti.lo, aux->shufti.hi, ptr, end);
|
||||
case ACCEL_TRUFFLE:
|
||||
DEBUG_PRINTF("truffle\n");
|
||||
return truffleExec(aux->truffle.mask1, aux->truffle.mask2, ptr, end);
|
||||
return truffleExec(aux->truffle.mask_lo, aux->truffle.mask_hi, ptr, end);
|
||||
#ifdef CAN_USE_WIDE_TRUFFLE
|
||||
case ACCEL_TRUFFLE_WIDE:
|
||||
DEBUG_PRINTF("truffle wide\n");
|
||||
return truffleExecWide(aux->truffle.mask, ptr, end);
|
||||
#endif // CAN_USE_WIDE_TRUFFLE
|
||||
default:
|
||||
/* no acceleration, fall through and return current ptr */
|
||||
DEBUG_PRINTF("no accel; %u\n", (int)aux->accel_type);
|
||||
|
||||
Reference in New Issue
Block a user