mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-11-15 17:02:14 +03:00
Speed up truffle with 256b TBL instructions
256b wide SVE vectors allow some simplification of truffle. Up to 40% speedup on graviton3. Going from 12500 MB/s to 17000 MB/s onhe microbenchmark. SVE2 also offer this capability for 128b vector with a speedup around 25% compared to normal SVE Add unit tests and benchmark for this wide variant Signed-off-by: Yoan Picchi <yoan.picchi@arm.com>
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2020, 2021, VectorCamp PC
|
||||
* Copyright (c) 2024, Arm Limited
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -44,7 +45,18 @@ public:
|
||||
size_t size;
|
||||
|
||||
// Shufti/Truffle
|
||||
m128 lo, hi;
|
||||
union {
|
||||
m256 truffle_mask;
|
||||
struct {
|
||||
#if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE)
|
||||
m128 truffle_mask_lo;
|
||||
m128 truffle_mask_hi;
|
||||
#else
|
||||
m128 truffle_mask_hi;
|
||||
m128 truffle_mask_lo;
|
||||
#endif
|
||||
};
|
||||
};
|
||||
ue2::CharReach chars;
|
||||
std::vector<u8> buf;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user