mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-11-17 01:41:51 +03:00
Speed up truffle with 256b TBL instructions
256b wide SVE vectors allow some simplification of truffle. Up to 40% speedup on graviton3. Going from 12500 MB/s to 17000 MB/s onhe microbenchmark. SVE2 also offer this capability for 128b vector with a speedup around 25% compared to normal SVE Add unit tests and benchmark for this wide variant Signed-off-by: Yoan Picchi <yoan.picchi@arm.com>
This commit is contained in:
@@ -97,11 +97,20 @@ void buildAccelSingle(const AccelInfo &info, AccelAux *aux) {
|
||||
|
||||
if (outs <= ACCEL_MAX_STOP_CHAR) {
|
||||
DEBUG_PRINTF("building Truffle for %zu chars\n", outs);
|
||||
aux->accel_type = ACCEL_TRUFFLE;
|
||||
aux->truffle.offset = offset;
|
||||
truffleBuildMasks(info.single_stops,
|
||||
reinterpret_cast<u8 *>(&aux->truffle.mask1),
|
||||
reinterpret_cast<u8 *>(&aux->truffle.mask2));
|
||||
#if defined(CAN_USE_WIDE_TRUFFLE)
|
||||
if(CAN_USE_WIDE_TRUFFLE) {
|
||||
aux->accel_type = ACCEL_TRUFFLE_WIDE;
|
||||
truffleBuildMasksWide(info.single_stops,
|
||||
reinterpret_cast<u8 *>(&aux->truffle.mask));
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
aux->accel_type = ACCEL_TRUFFLE;
|
||||
truffleBuildMasks(info.single_stops,
|
||||
reinterpret_cast<u8 *>(&aux->truffle.mask_lo),
|
||||
reinterpret_cast<u8 *>(&aux->truffle.mask_hi));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user