mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-11-19 02:30:35 +03:00
Speed up truffle with 256b TBL instructions
256b wide SVE vectors allow some simplification of truffle. Up to 40% speedup on graviton3. Going from 12500 MB/s to 17000 MB/s onhe microbenchmark. SVE2 also offer this capability for 128b vector with a speedup around 25% compared to normal SVE Add unit tests and benchmark for this wide variant Signed-off-by: Yoan Picchi <yoan.picchi@arm.com>
This commit is contained in:
@@ -181,6 +181,9 @@ void dumpAccelText(FILE *f, const union AccelAux *accel) {
|
||||
case ACCEL_TRUFFLE:
|
||||
fprintf(f, ":M");
|
||||
break;
|
||||
case ACCEL_TRUFFLE_WIDE:
|
||||
fprintf(f, ":MM");
|
||||
break;
|
||||
default:
|
||||
fprintf(f, ":??");
|
||||
break;
|
||||
@@ -200,6 +203,7 @@ void dumpAccelDot(FILE *f, u16 i, const union AccelAux *accel) {
|
||||
case ACCEL_SHUFTI:
|
||||
case ACCEL_DSHUFTI:
|
||||
case ACCEL_TRUFFLE:
|
||||
case ACCEL_TRUFFLE_WIDE:
|
||||
fprintf(f, "%u [ color = darkgreen style=diagonals ];\n", i);
|
||||
break;
|
||||
default:
|
||||
|
||||
Reference in New Issue
Block a user