mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-09-29 19:24:25 +03:00
Add truffle SVE implementation
Signed-off-by: Yoan Picchi <yoan.picchi@arm.com>
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2020-2021, VectorCamp PC
|
||||
* Copyright (c) 2023, Arm Limited
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -99,3 +100,64 @@ const u8 *last_zero_match_inverted<16>(const u8 *buf, SuperVector<16> mask, u16
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAVE_SVE
|
||||
|
||||
|
||||
static really_inline
|
||||
uint64_t last_non_zero_real(svuint8_t mask) {
|
||||
const svuint64_t leading_zeros = svclz_x(svptrue_b64(), svreinterpret_u64(mask));
|
||||
|
||||
uint64_t last_active_lane;
|
||||
|
||||
svbool_t remaining_mask = svptrue_b64();
|
||||
uint64_t i = 0;
|
||||
while(svptest_any(svptrue_b64(), remaining_mask)) {
|
||||
svbool_t single_lane_mask = svpnext_b64(remaining_mask, svpfalse());
|
||||
remaining_mask = sveor_z(svptrue_b64(), remaining_mask, single_lane_mask);
|
||||
uint64_t active_element = svlastb(single_lane_mask, leading_zeros);
|
||||
if(active_element<64) {
|
||||
uint64_t lane_index = (i+1)*8 - (active_element/8) - 1;
|
||||
last_active_lane = lane_index;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
return last_active_lane;
|
||||
}
|
||||
|
||||
/*
|
||||
* It is assumed mask have the value 0 for all inactive lanes, if any.
|
||||
*/
|
||||
static really_inline
|
||||
uint64_t last_non_zero(const size_t vector_size_int_8, svuint8_t mask) {
|
||||
const svbool_t result_pred = svcmpne(svptrue_b8(), mask, 0);
|
||||
|
||||
if (svptest_any(svptrue_b8(), result_pred)) {
|
||||
return last_non_zero_real(mask);
|
||||
} else {
|
||||
return vector_size_int_8;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* It is assumed mask have the value 0 for all inactive lanes, if any.
|
||||
*/
|
||||
static really_inline
|
||||
uint64_t first_non_zero(const size_t vector_size_int_8, svuint8_t mask) {
|
||||
const svbool_t result_pred = svcmpne(svptrue_b8(), mask, 0);
|
||||
|
||||
if (svptest_any(svptrue_b8(), result_pred)) {
|
||||
|
||||
// We don't have a CTZ instruction but we can work around by reversing the lane order
|
||||
const svuint64_t rev_large_res = svreinterpret_u64(svrev(mask));
|
||||
// Now each pack of 8 leading 0 means one empty lane. So if we have 18 leading 0,
|
||||
// that means the third lane have a matching character.
|
||||
uint64_t first_active_lane = last_non_zero_real(svreinterpret_u8(rev_large_res));
|
||||
// We reversed the lanes, so we reverse back the index
|
||||
first_active_lane = (vector_size_int_8-1) - first_active_lane;
|
||||
return first_active_lane;
|
||||
} else {
|
||||
return vector_size_int_8;
|
||||
}
|
||||
}
|
||||
|
||||
#endif //HAVE_SVE
|
@@ -138,7 +138,7 @@ struct BaseVector<64>
|
||||
static constexpr u16 previous_size = 32;
|
||||
};
|
||||
|
||||
// 128 bit implementation
|
||||
// 256 bit implementation
|
||||
template <>
|
||||
struct BaseVector<32>
|
||||
{
|
||||
|
Reference in New Issue
Block a user