mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-11-19 10:34:25 +03:00
Speed up truffle with 256b TBL instructions
256b wide SVE vectors allow some simplification of truffle. Up to 40% speedup on graviton3. Going from 12500 MB/s to 17000 MB/s onhe microbenchmark. SVE2 also offer this capability for 128b vector with a speedup around 25% compared to normal SVE Add unit tests and benchmark for this wide variant Signed-off-by: Yoan Picchi <yoan.picchi@arm.com>
This commit is contained in:
@@ -53,5 +53,11 @@
|
||||
#define HAVE_SVE2_BITPERM
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_SVE2)
|
||||
#define CAN_USE_WIDE_TRUFFLE 1
|
||||
#elif defined(HAVE_SVE)
|
||||
#define CAN_USE_WIDE_TRUFFLE (svcntb() >= 32)
|
||||
#endif
|
||||
|
||||
#endif // UTIL_ARCH_ARM_H_
|
||||
|
||||
|
||||
@@ -34,5 +34,9 @@
|
||||
typedef int32x4_t m128;
|
||||
#endif
|
||||
|
||||
#if !defined(m256) && defined(m128)
|
||||
typedef struct {m128 lo; m128 hi;} m256;
|
||||
#endif
|
||||
|
||||
#endif /* SIMD_TYPES_ARM_H */
|
||||
|
||||
|
||||
@@ -31,3 +31,6 @@
|
||||
typedef int32x4_t m128;
|
||||
#endif
|
||||
|
||||
#if !defined(m256) && defined(m128)
|
||||
typedef struct {m128 lo; m128 hi;} m256;
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user