From 8f26c5e65f6c34fd8fa3aa80705b507904d77b09 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 28 Mar 2023 21:34:35 +0000 Subject: [PATCH 01/25] Fix compilation with libcxx 16 After upgrading our (ClickHouse's) libcxx from 15 to 16, the compiler started to complain about usage of an incomplete type "RoseInstruction" in this (header) function: void RoseProgram::replace(Iter it, std::unique_ptr ri) { ... The reason is that libcxx 16 is the first version which implements C++23 constexpr std::unique_ptr (P2273R3, see (*)). RoseProgram::replace() happens to be be const-evaluatable and the compiler tries to run std::unique_ptr's ctor + dtor. This fails because at this point RoseInstruction isn't defined yet. There are two ways of fixing this: 1. Include rose_build_instruction.h (which contains RoseInstruction) into rose_build_program.h. Disadvantage: The new include will propagate transitively into all callers. 2. Move the function implementation into the source file which sees RoseInstruction's definition already. Disadvantage: Template instantiation is no longer automatic, instead there must be either a) explicit template instantiation (e.g. in rose_build_program.cpp) or b) all callers which instantiate the function must live in the same source file and do the instantiations by themselves. Fortunately, the latter is the case here, but potential future code outside rose_build_program.cpp will require ugly explicit instantiation. (*) https://en.cppreference.com/w/cpp/23 --- src/rose/rose_build_program.cpp | 9 +++++++++ src/rose/rose_build_program.h | 8 +------- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/rose/rose_build_program.cpp b/src/rose/rose_build_program.cpp index 3ddf2fcd..8e179e36 100644 --- a/src/rose/rose_build_program.cpp +++ b/src/rose/rose_build_program.cpp @@ -204,6 +204,15 @@ void RoseProgram::add_block(RoseProgram &&block) { make_move_iterator(block.prog.end())); } +template +void RoseProgram::replace(Iter it, std::unique_ptr ri) { + assert(!prog.empty()); + + const RoseInstruction *old_ptr = it->get(); + *it = move(ri); + update_targets(prog.begin(), prog.end(), old_ptr, it->get()); +} + bytecode_ptr writeProgram(RoseEngineBlob &blob, const RoseProgram &program) { u32 total_len = 0; diff --git a/src/rose/rose_build_program.h b/src/rose/rose_build_program.h index 6ad5529c..1882279d 100644 --- a/src/rose/rose_build_program.h +++ b/src/rose/rose_build_program.h @@ -124,13 +124,7 @@ public: * \brief Replace the instruction pointed to by the given iterator. */ template - void replace(Iter it, std::unique_ptr ri) { - assert(!prog.empty()); - - const RoseInstruction *old_ptr = it->get(); - *it = move(ri); - update_targets(prog.begin(), prog.end(), old_ptr, it->get()); - } + void replace(Iter it, std::unique_ptr ri); }; bytecode_ptr writeProgram(RoseEngineBlob &blob, From 8a54576861c7006e4b06abb8c04da6c8facf213d Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 10 May 2023 15:38:27 +0200 Subject: [PATCH 02/25] Use std::vector instead of boost::container::small_vector under MSan There are some issues with dtors in boost::container::small_vector and/or vector, which is reported by MSan as an error. The suppression __attribute__((no_sanitize_memory)) works until clang-15, but since clang-16 it does not. It looks like before clang-16 this no_sanitize_memory works for all child functions, while since clang-16 only for this function. I've tried to add few others, but a) it looks icky b) I haven't managed to finish this process. Also I've measured the performance and it hadn't been changed. Though boost::small_vector should be faster then std::vector, but apparently my particular case hadn't affected too much. And one more thing, MSan reports this only with -O0, with -O3 - it is not reproduced.
MSan report: _Note: it was slightly trimmed_ ``` ==11364==WARNING: MemorySanitizer: use-of-uninitialized-value 2023.05.10 15:40:53.000233 [ 11620 ] {} AsynchronousMetrics: MemoryTracking: was 1012.32 MiB, peak 1012.32 MiB, free memory in arenas 0.00 B, will set to 1015.82 MiB (RSS), difference: 3.50 MiB 0 0x55558d13289f in boost::container::vector_alloc_holder, std::__1::allocator, void>, unsigned long, boost::move_detail::integral_constant>::deallocate(std::__1::pair* const&, unsigned long) .cmake-llvm16-msan/./contrib/boost/boost/container/vector.hpp:455:7 1 0x55558d139e8e in boost::container::vector_alloc_holder, std::__1::allocator, void>, unsigned long, boost::move_detail::integral_constant>::~vector_alloc_holder() .cmake-llvm16-msan/./contrib/boost/boost/container/vector.hpp:420:16 2 0x55558d139e0b in boost::container::vector, boost::container::small_vector_allocator, std::__1::allocator, void>, void>::~vector() .cmake-llvm16-msan/./contrib/boost/boost/container/vector.hpp:1141:4 3 0x55558d12a4fa in boost::container::small_vector_base, std::__1::allocator>, void>::~small_vector_base() .cmake-llvm16-msan/./contrib/boost/boost/container/small_vector.hpp:445:80 4 0x55558d12a4fa in boost::container::small_vector, 1ul, std::__1::allocator>, void>::~small_vector() .cmake-llvm16-msan/./contrib/boost/boost/container/small_vector.hpp:564:7 5 0x55558d13a21b in std::__1::__tuple_leaf<0ul, boost::container::small_vector, 1ul, std::__1::allocator>, void>, false>::~__tuple_leaf() .cmake-llvm16-msan/./contrib/llvm-project/libcxx/include/tuple:265:7 6 0x55558d13a13a in std::__1::__tuple_impl<>::~__tuple_impl() .cmake-llvm16-msan/./contrib/llvm-project/libcxx/include/tuple:451:37 7 0x55558d13a05b in std::__1::tuple<>::~tuple() .cmake-llvm16-msan/./contrib/llvm-project/libcxx/include/tuple:538:28 8 0x55558d139f7b in ue2::flat_detail::flat_base<>::~flat_base() .cmake-llvm16-msan/./contrib/vectorscan/src/util/flat_containers.h:89:7 9 0x55558d1299da in ue2::flat_set<>::~flat_set() .cmake-llvm16-msan/./contrib/vectorscan/src/util/flat_containers.h:152:7 10 0x55558d4e4dda in ue2::(anonymous namespace)::DAccelScheme::~DAccelScheme() .cmake-llvm16-msan/./contrib/vectorscan/src/nfagraph/ng_limex_accel.cpp:301:8 11 0x55558d4ff6cf in void boost::container::allocator_traits<>::priv_destroy(boost::move_detail::integral_constant, boost::container::small_vector_allocator, void>&, ue2::(anonymous namespace)::DAccelScheme*) .cmake-llvm16-msan/./contrib/boost/boost/container/allocator_traits.hpp:403:11 12 0x55558d4fefde in void boost::container::allocator_traits<>::destroy(boost::container::small_vector_allocator, void>&, ue2::(anonymous namespace)::DAccelScheme*) .cmake-llvm16-msan/./contrib/boost/boost/container/allocator_traits.hpp:331:7 13 0x55558d4fc364 in boost::container::dtl::disable_if_trivially_destructible<>::type boost::container::destroy_alloc_n<>(boost::container::small_vector_allocator, void>&, ue2::(anonymous namespace)::DAccelScheme*, unsigned long) .cmake-llvm16-msan/./contrib/boost/boost/container/detail/copy_move_algo.hpp:988:7 14 0x55558d517962 in boost::container::vector<>::~vector() .cmake-llvm16-msan/./contrib/boost/boost/container/vector.hpp:1138:7 15 0x55558d4f724d in boost::container::small_vector_base<>::~small_vector_base() .cmake-llvm16-msan/./contrib/boost/boost/container/small_vector.hpp:445:80 16 0x55558d4f724d in boost::container::small_vector<>::~small_vector() .cmake-llvm16-msan/./contrib/boost/boost/container/small_vector.hpp:564:7 17 0x55558d4f2ff3 in ue2::findDoubleBest() .cmake-llvm16-msan/./contrib/vectorscan/src/nfagraph/ng_limex_accel.cpp:444:1 18 0x55558d4f2f41 in ue2::findDoubleBest() .cmake-llvm16-msan/./contrib/vectorscan/src/nfagraph/ng_limex_accel.cpp:442:9 19 0x55558d4f2f41 in ue2::findDoubleBest() .cmake-llvm16-msan/./contrib/vectorscan/src/nfagraph/ng_limex_accel.cpp:442:9 20 0x55558d4f2f41 in ue2::findDoubleBest() .cmake-llvm16-msan/./contrib/vectorscan/src/nfagraph/ng_limex_accel.cpp:442:9 21 0x55558d4f2f41 in ue2::findDoubleBest() .cmake-llvm16-msan/./contrib/vectorscan/src/nfagraph/ng_limex_accel.cpp:442:9 22 0x55558d4f2f41 in ue2::findDoubleBest() .cmake-llvm16-msan/./contrib/vectorscan/src/nfagraph/ng_limex_accel.cpp:442:9 23 0x55558d4f2f41 in ue2::findDoubleBest() .cmake-llvm16-msan/./contrib/vectorscan/src/nfagraph/ng_limex_accel.cpp:442:9 24 0x55558d4f2f41 in ue2::findDoubleBest() .cmake-llvm16-msan/./contrib/vectorscan/src/nfagraph/ng_limex_accel.cpp:442:9 25 0x55558d4e4af5 in ue2::findBestDoubleAccelScheme() .cmake-llvm16-msan/./contrib/vectorscan/src/nfagraph/ng_limex_accel.cpp:556:5 26 0x55558d4e2659 in ue2::findBestAccelScheme() .cmake-llvm16-msan/./contrib/vectorscan/src/nfagraph/ng_limex_accel.cpp:569:27 27 0x55558d3aa8ff in ue2::look_for_offset_accel(ue2::raw_dfa const&, unsigned short, unsigned int) .cmake-llvm16-msan/./contrib/vectorscan/src/nfa/accel_dfa_build_strat.cpp:197:22 28 0x55558d3a9727 in ue2::accel_dfa_build_strat::find_escape_strings(unsigned short) const .cmake-llvm16-msan/./contrib/vectorscan/src/nfa/accel_dfa_build_strat.cpp:414:13 29 0x55558d3b2119 in ue2::accel_dfa_build_strat::getAccelInfo(ue2::Grey const&)::$_0::operator()(unsigned long) const .cmake-llvm16-msan/./contrib/vectorscan/src/nfa/accel_dfa_build_strat.cpp:606:26 30 0x55558d3aefd4 in ue2::accel_dfa_build_strat::getAccelInfo(ue2::Grey const&) .cmake-llvm16-msan/./contrib/vectorscan/src/nfa/accel_dfa_build_strat.cpp:627:13 31 0x55558d2fc61f in ue2::mcclellanCompile8() .cmake-llvm16-msan/./contrib/vectorscan/src/nfa/mcclellancompile.cpp:935:22 32 0x55558d2e89ec in ue2::mcclellanCompile_i() .cmake-llvm16-msan/./contrib/vectorscan/src/nfa/mcclellancompile.cpp:1510:15 33 0x55558d2ff502 in ue2::mcclellanCompile() .cmake-llvm16-msan/./contrib/vectorscan/src/nfa/mcclellancompile.cpp:1527:12 34 0x55558fb13b52 in ue2::getDfa() .cmake-llvm16-msan/./contrib/vectorscan/src/rose/rose_build_bytecode.cpp:646:15 35 0x55558fb7e8c8 in ue2::makeLeftNfa() .cmake-llvm16-msan/./contrib/vectorscan/src/rose/rose_build_bytecode.cpp:854:22 36 0x55558fb6bd36 in ue2::buildLeftfix() .cmake-llvm16-msan/./contrib/vectorscan/src/rose/rose_build_bytecode.cpp:1123:15 37 0x55558fb21020 in ue2::buildLeftfixes() .cmake-llvm16-msan/./contrib/vectorscan/src/rose/rose_build_bytecode.cpp:1579:9 38 0x55558fad972c in ue2::buildNfas() .cmake-llvm16-msan/./contrib/vectorscan/src/rose/rose_build_bytecode.cpp:2063:10 39 0x55558fac9843 in ue2::RoseBuildImpl::buildFinalEngine(unsigned int) .cmake-llvm16-msan/./contrib/vectorscan/src/rose/rose_build_bytecode.cpp:3660:10 40 0x55558f2b2d86 in ue2::RoseBuildImpl::buildRose(unsigned int) .cmake-llvm16-msan/./contrib/vectorscan/src/rose/rose_build_compile.cpp:1796:12 Uninitialized value was stored to memory at 0 0x55558d132898 in boost::container::vector_alloc_holder, std::__1::allocator, void>, unsigned long, boost::move_detail::integral_constant>::deallocate(std::__1::pair* const&, unsigned long) .cmake-llvm16-msan/./contrib/boost/boost/container/vector.hpp:455:56 1 0x55558d139e8e in boost::container::vector_alloc_holder<>::~vector_alloc_holder() .cmake-llvm16-msan/./contrib/boost/boost/container/vector.hpp:420:16 2 0x55558d139e0b in boost::container::vector<>::~vector() .cmake-llvm16-msan/./contrib/boost/boost/container/vector.hpp:1141:4 3 0x55558d12a4fa in boost::container::small_vector_base<>::~small_vector_base() .cmake-llvm16-msan/./contrib/boost/boost/container/small_vector.hpp:445:80 4 0x55558d12a4fa in boost::container::small_vector, 1ul, std::__1::allocator>, void>::~small_vector() .cmake-llvm16-msan/./contrib/boost/boost/container/small_vector.hpp:564:7 5 0x55558d13a21b in std::__1::__tuple_leaf<>::~__tuple_leaf() .cmake-llvm16-msan/./contrib/llvm-project/libcxx/include/tuple:265:7 6 0x55558d13a13a in std::__1::__tuple_impl<>::~__tuple_impl .cmake-llvm16-msan/./contrib/llvm-project/libcxx/include/tuple:451:37 7 0x55558d13a05b in std::__1::tuple<>::~tuple() .cmake-llvm16-msan/./contrib/llvm-project/libcxx/include/tuple:538:28 8 0x55558d139f7b in ue2::flat_detail::flat_base<>::~flat_base() .cmake-llvm16-msan/./contrib/vectorscan/src/util/flat_containers.h:89:7 9 0x55558d1299da in ue2::flat_set<>::~flat_set() .cmake-llvm16-msan/./contrib/vectorscan/src/util/flat_containers.h:1 52:7 10 0x55558d4e4dda in ue2::(anonymous namespace)::DAccelScheme::~DAccelScheme() .cmake-llvm16-msan/./contrib/vectorscan/src/nfagraph/ng_limex_accel.cpp:301:8 11 0x55558d4ff6cf in void boost::container::allocator_traits<>::priv_destroy() .cmake-llvm16-msan/./contrib/boost/boost/container/allocator_traits.hpp:403:11 12 0x55558d4fefde in void boost::container::allocator_traits<>::destroy(boost::container::small_vector_allocator<>&, ue2::(anonymous namespace)::DAccelScheme*) .cmake-llvm16-msan/./contrib/boost/boost/container/allocator_traits.hpp:331:7 13 0x55558d4fc364 in boost::container::dtl::disable_if_trivially_destructible<>::type boost::container::destroy_alloc_n, void>, ue2::(anonymous namespace)::DAccelScheme*, unsigned long>(boost::container::small_vector_allocator, void>&, ue2::(anonymous namespace)::DAccelScheme*, unsigned long) .cmake-llvm16-msan/./contrib/boost/boost/container/detail/copy_move_algo.hpp:988:7 14 0x55558d517962 in boost::container::vector, void>, void>::~vector() .cmake-llvm16-msan/./contrib/boost/boost/container/vector.hpp:1138:7 15 0x55558d4f724d in boost::container::small_vector_base<>::~small_vector_base() .cmake-llvm16-msan/./contrib/boost/boost/container/small_vector.hpp:445:80 16 0x55558d4f724d in boost::container::small_vector<>::~small_vector() .cmake-llvm16-msan/./contrib/boost/boost/container/small_vector.hpp:564:7 17 0x55558d4f2ff3 in ue2::findDoubleBest() .cmake-llvm16-msan/./contrib/vectorscan/src/nfagraph/ng_limex_accel.cpp:444:1 18 0x55558d4f2f41 in ue2::findDoubleBest() .cmake-llvm16-msan/./contrib/vectorscan/src/nfagraph/ng_limex_accel.cpp:442:9 19 0x55558d4f2f41 in ue2::findDoubleBest() .cmake-llvm16-msan/./contrib/vectorscan/src/nfagraph/ng_limex_accel.cpp:442:9 20 0x55558d4f2f41 in ue2::findDoubleBest() .cmake-llvm16-msan/./contrib/vectorscan/src/nfagraph/ng_limex_accel.cpp:442:9 21 0x55558d4f2f41 in ue2::findDoubleBest() .cmake-llvm16-msan/./contrib/vectorscan/src/nfagraph/ng_limex_accel.cpp:442:9 Member fields were destroyed 0 0x5555652e08dd in __sanitizer_dtor_callback_fields /src/llvm/worktrees/llvm-16/compiler-rt/lib/msan/msan_interceptors.cpp:961:5 1 0x55558d4f71a6 in boost::container::small_vector<>::~small_vector() .cmake-llvm16-msan/./contrib/boost/boost/container/small_vector.hpp:528:8 2 0x55558d4f71a6 in boost::container::small_vector<>::~small_vector() .cmake-llvm16-msan/./contrib/boost/boost/container/small_vector.hpp:564:7 3 0x55558d4f2ff3 in ue2::findDoubleBest() .cmake-llvm16-msan/./contrib/vectorscan/src/nfagraph/ng_limex_accel.cpp:444:1 4 0x55558d4f2f41 in ue2::findDoubleBest() .cmake-llvm16-msan/./contrib/vectorscan/src/nfagraph/ng_limex_accel.cpp:442:9 5 0x55558d4f2f41 in ue2::findDoubleBest() .cmake-llvm16-msan/./contrib/vectorscan/src/nfagraph/ng_limex_accel.cpp:442:9 6 0x55558d4f2f41 in ue2::findDoubleBest() .cmake-llvm16-msan/./contrib/vectorscan/src/nfagraph/ng_limex_accel.cpp:442:9 7 0x55558d4f2f41 in ue2::findDoubleBest() .cmake-llvm16-msan/./contrib/vectorscan/src/nfagraph/ng_limex_accel.cpp:442:9 8 0x55558d4f2f41 in ue2::findDoubleBest() .cmake-llvm16-msan/./contrib/vectorscan/src/nfagraph/ng_limex_accel.cpp:442:9 9 0x55558d4f2f41 in ue2::findDoubleBest() .cmake-llvm16-msan/./contrib/vectorscan/src/nfagraph/ng_limex_accel.cpp:442:9 10 0x55558d4f2f41 in ue2::findDoubleBest() .cmake-llvm16-msan/./contrib/vectorscan/src/nfagraph/ng_limex_accel.cpp:442:9 11 0x55558d4e4af5 in ue2::findBestDoubleAccelScheme() .cmake-llvm16-msan/./contrib/vectorscan/src/nfagraph/ng_limex_accel.cpp:556:5 12 0x55558d4e2659 in ue2::findBestAccelScheme() .cmake-llvm16-msan/./contrib/vectorscan/src/nfagraph/ng_limex_accel.cpp:569:27 13 0x55558d3aa8ff in ue2::look_for_offset_accel(ue2::raw_dfa const&, unsigned short, unsigned int) .cmake-llvm16-msan/./contrib/vectorscan/src/nfa/accel_dfa_build_strat.cpp:197:22 14 0x55558d3a9727 in ue2::accel_dfa_build_strat::find_escape_strings(unsigned short) const .cmake-llvm16-msan/./contrib/vectorscan/src/nfa/accel_dfa_build_strat.cpp:414:13 15 0x55558d3b2119 in ue2::accel_dfa_build_strat::getAccelInfo(ue2::Grey const&)::$_0::operator()(unsigned long) const .cmake-llvm16-msan/./contrib/vectorscan/src/nfa/accel_dfa_build_strat.cpp:606:26 16 0x55558d3aefd4 in ue2::accel_dfa_build_strat::getAccelInfo(ue2::Grey const&) .cmake-llvm16-msan/./contrib/vectorscan/src/nfa/accel_dfa_build_strat.cpp:627:13 17 0x55558d2fc61f in ue2::mcclellanCompile8(ue2::(anonymous namespace)::dfa_info&, ue2::CompileContext const&, std::__1::set, std::__1::allocator>*) .cmake-llvm16-msan/./contrib/vectorscan/src/nfa/mcclellancompile.cpp:935:22 18 0x55558d2e89ec in ue2::mcclellanCompile_i(ue2::raw_dfa&, ue2::accel_dfa_build_strat&, ue2::CompileContext const&, bool, std::__1::set, std::__1::allocator>*) .cmake-llvm16-msan/./contrib/vectorscan/src/nfa/mcclellancompile.cpp:1510:15 19 0x55558d2ff502 in ue2::mcclellanCompile(ue2::raw_dfa&, ue2::CompileContext const&, ue2::ReportManager const&, bool, bool, std::__1::set, std::__1::allocator>*) .cmake-llvm16-msan/./contrib/vectorscan/src/nfa/mcclellancompile.cpp:1527:12 20 0x55558fb13b52 in ue2::getDfa(ue2::raw_dfa&, bool, ue2::CompileContext const&, ue2::ReportManager const&) .cmake-llvm16-msan/./contrib/vectorscan/src/rose/rose_build_bytecode.cpp:646:15 ```
Signed-off-by: Azat Khuzhin --- src/util/small_vector.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/util/small_vector.h b/src/util/small_vector.h index 0f54bbf6..5bad7df9 100644 --- a/src/util/small_vector.h +++ b/src/util/small_vector.h @@ -29,7 +29,11 @@ #ifndef UTIL_SMALL_VECTOR_H #define UTIL_SMALL_VECTOR_H -#include +#if defined(__has_feature) +# if __has_feature(memory_sanitizer) +#define BUILD_WITH_MSAN +# endif +#endif #include @@ -37,8 +41,16 @@ * We use the small_vector constructors introduced in Boost 1.61 (trac bug * #11866, github commit b436c91). If the Boost version is too old, we fall * back to using std::vector. + * + * Also with MSan boost::container::small_vector cannot be used because MSan + * reports some issues there, it looks similar to [1], but even adding + * __attribute__((no_sanitize_memory)) for ~small_vector_base() [2] is not + * enough since clang-16, so let's simply use std::vector under MSan. + * + * [1]: https://github.com/google/sanitizers/issues/854 + * [2]: https://github.com/ClickHouse/boost/commit/229354100 */ -#if BOOST_VERSION >= 106100 +#if !defined(BUILD_WITH_MSAN) && BOOST_VERSION >= 106100 # define HAVE_BOOST_CONTAINER_SMALL_VECTOR #endif @@ -56,6 +68,8 @@ using small_vector = boost::container::small_vector; #else +#include + // Boost version isn't new enough, fall back to just using std::vector. template > using small_vector = std::vector; From 07305d18aec870bfa94a96c7a274ed99b0452615 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 10 May 2023 15:29:23 +0200 Subject: [PATCH 03/25] Fix use-of-uninitialized-value due to getData128() When temporary buffer is used in getData128(), then it may return uninitialized data. Signed-off-by: Azat Khuzhin --- src/rose/program_runtime.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rose/program_runtime.c b/src/rose/program_runtime.c index f607e8f2..87dc0c4d 100644 --- a/src/rose/program_runtime.c +++ b/src/rose/program_runtime.c @@ -959,7 +959,7 @@ m128 getData128(const struct core_info *ci, s64a offset, u32 *valid_data_mask) { *valid_data_mask = 0xffff; return loadu128(ci->buf + offset); } - ALIGN_DIRECTIVE u8 data[sizeof(m128)]; + ALIGN_DIRECTIVE u8 data[sizeof(m128)] = { 0 }; *valid_data_mask = getBufferDataComplex(ci, offset, data, 16); return *(m128 *)data; } From 68db36f4c471a1336b379a11b3f280b417551cb1 Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Wed, 23 Aug 2023 09:42:00 +0000 Subject: [PATCH 04/25] initial attempt for fat binary on Aarch64 --- CMakeLists.txt | 367 ++++++++++++++++++------------- cmake/arch.cmake | 40 ++-- src/dispatcher.c | 36 ++- src/hs.cpp | 9 +- src/hs_valid_platform.c | 8 +- src/util/arch/arm/cpuid_inline.h | 61 +++++ 6 files changed, 355 insertions(+), 166 deletions(-) create mode 100644 src/util/arch/arm/cpuid_inline.h diff --git a/CMakeLists.txt b/CMakeLists.txt index e90d8c98..bc4c9846 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -249,13 +249,18 @@ if (ARCH_IA32 OR ARCH_X86_64) endif() if (ARCH_AARCH64) - if (BUILD_SVE2_BITPERM AND NOT SVE2_BITPERM_FOUND) - set(GNUCC_ARCH "${GNUCC_ARCH}+sve2-bitperm") - elseif (BUILD_SVE2 AND NOT SVE2_FOUND) - set(GNUCC_ARCH "${GNUCC_ARCH}+sve2") - elseif (BUILD_SVE AND NOT SVE_FOUND) - set(GNUCC_ARCH "${GNUCC_ARCH}+sve") - endif () + if (NOT FAT_RUNTIME) + if (BUILD_SVE2_BITPERM AND NOT SVE2_BITPERM_FOUND) + set(GNUCC_ARCH "${GNUCC_ARCH}+sve2-bitperm") + elseif (BUILD_SVE2 AND NOT SVE2_FOUND) + set(GNUCC_ARCH "${GNUCC_ARCH}+sve2") + elseif (BUILD_SVE AND NOT SVE_FOUND) + set(GNUCC_ARCH "${GNUCC_ARCH}+sve") + endif () + else() + set(ARCH_C_FLAGS "") + set(ARCH_CXX_FLAGS "") + endif() endif(ARCH_AARCH64) message(STATUS "ARCH_C_FLAGS : ${ARCH_C_FLAGS}") @@ -271,24 +276,6 @@ if (NOT FAT_RUNTIME) endif() endif() -#if (ARCH_IA32 OR ARCH_X86_64 OR ARCH_ARM32 OR ARCH_AARCH64) -# if (NOT CMAKE_C_FLAGS MATCHES .*march.* AND NOT CMAKE_C_FLAGS MATCHES .*mtune.*) -# set(ARCH_C_FLAGS "-march=${GNUCC_ARCH} -mtune=${TUNE_FLAG}") -# endif() -# if (NOT CMAKE_CXX_FLAGS MATCHES .*march.* AND NOT CMAKE_CXX_FLAGS MATCHES .*mtune.*) -# set(ARCH_CXX_FLAGS "-march=${GNUCC_ARCH} -mtune=${TUNE_FLAG}") -# endif() -#endif() - -#if(ARCH_PPC64EL) -# if (NOT CMAKE_C_FLAGS MATCHES .*march.* AND NOT CMAKE_C_FLAGS MATCHES .*mtune.*) -# set(ARCH_C_FLAGS "-mtune=${TUNE_FLAG}") -# endif() -# if (NOT CMAKE_CXX_FLAGS MATCHES .*march.* AND NOT CMAKE_CXX_FLAGS MATCHES .*mtune.*) -# set(ARCH_CXX_FLAGS "-mtune=${TUNE_FLAG}") -# endif() -#endif() - # compiler version checks TODO: test more compilers if (CMAKE_COMPILER_IS_GNUCXX) set(GNUCXX_MINVER "9") @@ -396,6 +383,7 @@ endif() option(FAT_RUNTIME "Build a library that supports multiple microarchitectures" ON) if (CMAKE_SYSTEM_NAME MATCHES "Linux" AND FAT_RUNTIME MATCHES "ON") + message("Fat Runtime for ${GNUCC_ARCH}") # This is a Linux-only feature for now - requires platform support # elsewhere message(STATUS "generator is ${CMAKE_GENERATOR}") @@ -529,8 +517,8 @@ endif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") if (FAT_RUNTIME) - if (NOT (ARCH_IA32 OR ARCH_X86_64)) - message(FATAL_ERROR "Fat runtime is not supported on non-Intel architectures") + if (NOT (ARCH_IA32 OR ARCH_X86_64 OR ARCH_AARCH64)) + message(FATAL_ERROR "Fat runtime is only supported on Intel and Aarch64 architectures") else() message(STATUS "Building runtime for multiple microarchitectures") endif() @@ -790,7 +778,7 @@ set (hs_exec_SRCS endif () endif() -if (NOT BUILD_SVE2) +if (FAT_RUNTIME OR (NOT FAT_RUNTIME AND NOT BUILD_SVE2)) set (hs_exec_SRCS ${hs_exec_SRCS} src/nfa/vermicelli_simd.cpp) @@ -1273,137 +1261,222 @@ if (NOT FAT_RUNTIME) add_library(hs_compile_shared OBJECT ${hs_compile_SRCS}) set_target_properties(hs_compile_shared PROPERTIES POSITION_INDEPENDENT_CODE TRUE) endif() +else () + if (ARCH_IA32 OR ARCH_X86_64) + set(BUILD_WRAPPER "${PROJECT_SOURCE_DIR}/cmake/build_wrapper.sh") + if (NOT BUILD_AVX512) + set (DISPATCHER_DEFINE "-DDISABLE_AVX512_DISPATCH") + endif (NOT BUILD_AVX512) + if (NOT BUILD_AVX512VBMI) + set (DISPATCHER_DEFINE "${DISPATCHER_DEFINE} -DDISABLE_AVX512VBMI_DISPATCH") + endif (NOT BUILD_AVX512VBMI) + set_source_files_properties(src/dispatcher.c PROPERTIES + COMPILE_FLAGS "-Wno-unused-parameter -Wno-unused-function ${DISPATCHER_DEFINE}") -else (FAT_RUNTIME) - - set(BUILD_WRAPPER "${PROJECT_SOURCE_DIR}/cmake/build_wrapper.sh") - if (NOT BUILD_AVX512) - set (DISPATCHER_DEFINE "-DDISABLE_AVX512_DISPATCH") - endif (NOT BUILD_AVX512) - if (NOT BUILD_AVX512VBMI) - set (DISPATCHER_DEFINE "${DISPATCHER_DEFINE} -DDISABLE_AVX512VBMI_DISPATCH") - endif (NOT BUILD_AVX512VBMI) - set_source_files_properties(src/dispatcher.c PROPERTIES - COMPILE_FLAGS "-Wno-unused-parameter -Wno-unused-function ${DISPATCHER_DEFINE}") - - if (BUILD_STATIC_LIBS) - add_library(hs_exec_core2 OBJECT ${hs_exec_SRCS}) - list(APPEND RUNTIME_LIBS $) - set_target_properties(hs_exec_core2 PROPERTIES - COMPILE_FLAGS "-march=core2 -msse4.2" - RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} core2 ${CMAKE_MODULE_PATH}/keep.syms.in" - ) - - add_library(hs_exec_corei7 OBJECT ${hs_exec_SRCS}) - list(APPEND RUNTIME_LIBS $) - set_target_properties(hs_exec_corei7 PROPERTIES - COMPILE_FLAGS "-march=corei7 -msse4.2" - RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} corei7 ${CMAKE_MODULE_PATH}/keep.syms.in" - ) - - if (BUILD_AVX2) - add_library(hs_exec_avx2 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) - list(APPEND RUNTIME_LIBS $) - set_target_properties(hs_exec_avx2 PROPERTIES - COMPILE_FLAGS "-march=core-avx2 -mavx2" - RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx2 ${CMAKE_MODULE_PATH}/keep.syms.in" + if (BUILD_STATIC_LIBS) + add_library(hs_exec_core2 OBJECT ${hs_exec_SRCS}) + list(APPEND RUNTIME_LIBS $) + set_target_properties(hs_exec_core2 PROPERTIES + COMPILE_FLAGS "-march=core2 -msse4.2" + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} core2 ${CMAKE_MODULE_PATH}/keep.syms.in" ) - endif (BUILD_AVX2) - if (BUILD_AVX512) - add_library(hs_exec_avx512 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) - list(APPEND RUNTIME_LIBS $) - set_target_properties(hs_exec_avx512 PROPERTIES - COMPILE_FLAGS "${SKYLAKE_FLAG}" - RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx512 ${CMAKE_MODULE_PATH}/keep.syms.in" - ) - endif (BUILD_AVX512) - if (BUILD_AVX512VBMI) - add_library(hs_exec_avx512vbmi OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) - list(APPEND RUNTIME_LIBS $) - set_target_properties(hs_exec_avx512vbmi PROPERTIES - COMPILE_FLAGS "${ICELAKE_FLAG}" - RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx512vbmi ${CMAKE_MODULE_PATH}/keep.syms.in" - ) - endif (BUILD_AVX512VBMI) - add_library(hs_exec_common OBJECT + add_library(hs_exec_corei7 OBJECT ${hs_exec_SRCS}) + list(APPEND RUNTIME_LIBS $) + set_target_properties(hs_exec_corei7 PROPERTIES + COMPILE_FLAGS "-march=corei7 -msse4.2" + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} corei7 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + + if (BUILD_AVX2) + add_library(hs_exec_avx2 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) + list(APPEND RUNTIME_LIBS $) + set_target_properties(hs_exec_avx2 PROPERTIES + COMPILE_FLAGS "-march=core-avx2 -mavx2" + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx2 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + endif (BUILD_AVX2) + if (BUILD_AVX512) + add_library(hs_exec_avx512 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) + list(APPEND RUNTIME_LIBS $) + set_target_properties(hs_exec_avx512 PROPERTIES + COMPILE_FLAGS "${SKYLAKE_FLAG}" + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx512 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + endif (BUILD_AVX512) + if (BUILD_AVX512VBMI) + add_library(hs_exec_avx512vbmi OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) + list(APPEND RUNTIME_LIBS $) + set_target_properties(hs_exec_avx512vbmi PROPERTIES + COMPILE_FLAGS "${ICELAKE_FLAG}" + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx512vbmi ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + endif (BUILD_AVX512VBMI) + + add_library(hs_exec_common OBJECT + ${hs_exec_common_SRCS} + src/dispatcher.c + ) + + # hs_version.c is added explicitly to avoid some build systems that refuse to + # create a lib without any src (I'm looking at you Xcode) + + add_library(hs_runtime STATIC src/hs_version.c + $ + ${RUNTIME_LIBS}) + set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C) + add_library(hs_compile OBJECT ${hs_compile_SRCS}) + + # we want the static lib for testing + add_library(hs STATIC src/hs_version.c src/hs_valid_platform.c + $ + $ + ${RUNTIME_LIBS}) + endif (BUILD_STATIC_LIBS) + + if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) + # build shared libs + add_library(hs_compile_shared OBJECT ${hs_compile_SRCS}) + set_target_properties(hs_compile_shared PROPERTIES POSITION_INDEPENDENT_CODE TRUE) + add_library(hs_exec_shared_core2 OBJECT ${hs_exec_SRCS}) + list(APPEND RUNTIME_SHLIBS $) + set_target_properties(hs_exec_shared_core2 PROPERTIES + COMPILE_FLAGS "-march=core2 -msse4.2" + POSITION_INDEPENDENT_CODE TRUE + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} core2 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + add_library(hs_exec_shared_corei7 OBJECT ${hs_exec_SRCS}) + list(APPEND RUNTIME_SHLIBS $) + set_target_properties(hs_exec_shared_corei7 PROPERTIES + COMPILE_FLAGS "-march=corei7 -msse4.2" + POSITION_INDEPENDENT_CODE TRUE + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} corei7 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + + if (BUILD_AVX2) + add_library(hs_exec_shared_avx2 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) + list(APPEND RUNTIME_SHLIBS $) + set_target_properties(hs_exec_shared_avx2 PROPERTIES + COMPILE_FLAGS "-march=core-avx2 -mavx2" + POSITION_INDEPENDENT_CODE TRUE + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx2 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + endif (BUILD_AVX2) + if (BUILD_AVX512) + add_library(hs_exec_shared_avx512 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) + list(APPEND RUNTIME_SHLIBS $) + set_target_properties(hs_exec_shared_avx512 PROPERTIES + COMPILE_FLAGS "${SKYLAKE_FLAG}" + POSITION_INDEPENDENT_CODE TRUE + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx512 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + endif (BUILD_AVX512) + if (BUILD_AVX512VBMI) + add_library(hs_exec_shared_avx512vbmi OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) + list(APPEND RUNTIME_SHLIBS $) + set_target_properties(hs_exec_shared_avx512vbmi PROPERTIES + COMPILE_FLAGS "${ICELAKE_FLAG}" + POSITION_INDEPENDENT_CODE TRUE + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx512vbmi ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + endif (BUILD_AVX512VBMI) + add_library(hs_exec_common_shared OBJECT ${hs_exec_common_SRCS} src/dispatcher.c ) + set_target_properties(hs_exec_common_shared PROPERTIES + OUTPUT_NAME hs_exec_common + POSITION_INDEPENDENT_CODE TRUE) - # hs_version.c is added explicitly to avoid some build systems that refuse to - # create a lib without any src (I'm looking at you Xcode) + endif() # SHARED + endif (ARCH_IA32 OR ARCH_X86_64) + if (ARCH_AARCH64) + set(BUILD_WRAPPER "${PROJECT_SOURCE_DIR}/cmake/build_wrapper.sh") + if (BUILD_STATIC_LIBS) + add_library(hs_exec_neon OBJECT ${hs_exec_SRCS}) + list(APPEND RUNTIME_LIBS $) + set_target_properties(hs_exec_neon PROPERTIES + COMPILE_FLAGS "-march=armv8-a" + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} neon ${CMAKE_MODULE_PATH}/keep.syms.in" + ) - add_library(hs_runtime STATIC src/hs_version.c - $ - ${RUNTIME_LIBS}) - set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C) - add_library(hs_compile OBJECT ${hs_compile_SRCS}) + if (BUILD_SVE) + add_library(hs_exec_sve OBJECT ${hs_exec_SRCS} ${hs_exec_sve_SRCS}) + list(APPEND RUNTIME_LIBS $) + set_target_properties(hs_exec_sve PROPERTIES + COMPILE_FLAGS "-march=armv8-a+sve" + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} sve ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + endif (BUILD_SVE) + if (BUILD_SVE2) + add_library(hs_exec_sve2 OBJECT ${hs_exec_SRCS} ${hs_exec_sve2_SRCS}) + list(APPEND RUNTIME_LIBS $) + set_target_properties(hs_exec_sve2 PROPERTIES + COMPILE_FLAGS "-march=armv8-a+sve2" + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} sve2 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + endif (BUILD_SVE2) - # we want the static lib for testing - add_library(hs STATIC src/hs_version.c src/hs_valid_platform.c - $ - $ - ${RUNTIME_LIBS}) + add_library(hs_exec_common OBJECT + ${hs_exec_common_SRCS} + src/dispatcher.c + ) - endif (BUILD_STATIC_LIBS) + # hs_version.c is added explicitly to avoid some build systems that refuse to + # create a lib without any src (I'm looking at you Xcode) - if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) - # build shared libs - add_library(hs_compile_shared OBJECT ${hs_compile_SRCS}) - set_target_properties(hs_compile_shared PROPERTIES POSITION_INDEPENDENT_CODE TRUE) - add_library(hs_exec_shared_core2 OBJECT ${hs_exec_SRCS}) - list(APPEND RUNTIME_SHLIBS $) - set_target_properties(hs_exec_shared_core2 PROPERTIES - COMPILE_FLAGS "-march=core2 -msse4.2" - POSITION_INDEPENDENT_CODE TRUE - RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} core2 ${CMAKE_MODULE_PATH}/keep.syms.in" + add_library(hs_runtime STATIC src/hs_version.c + $ + ${RUNTIME_LIBS}) + set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C) + add_library(hs_compile OBJECT ${hs_compile_SRCS}) + + # we want the static lib for testing + add_library(hs STATIC src/hs_version.c src/hs_valid_platform.c + $ + $ + ${RUNTIME_LIBS}) + endif (BUILD_STATIC_LIBS) + + if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) + # build shared libs + add_library(hs_compile_shared OBJECT ${hs_compile_SRCS}) + set_target_properties(hs_compile_shared PROPERTIES POSITION_INDEPENDENT_CODE TRUE) + add_library(hs_exec_shared_neon OBJECT ${hs_exec_SRCS}) + list(APPEND RUNTIME_SHLIBS $) + set_target_properties(hs_exec_shared_neon PROPERTIES + COMPILE_FLAGS "-march=armv8-a" + POSITION_INDEPENDENT_CODE TRUE + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} neon ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + + if (BUILD_SVE) + add_library(hs_exec_shared_sve OBJECT ${hs_exec_SRCS} ${hs_exec_sve_SRCS}) + list(APPEND RUNTIME_SHLIBS $) + set_target_properties(hs_exec_shared_sve PROPERTIES + COMPILE_FLAGS "-march=armv8-a+sve" + POSITION_INDEPENDENT_CODE TRUE + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} sve ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + endif (BUILD_SVE) + if (BUILD_SVE2) + add_library(hs_exec_shared_sve2 OBJECT ${hs_exec_SRCS} ${hs_exec_sve2_SRCS}) + list(APPEND RUNTIME_SHLIBS $) + set_target_properties(hs_exec_shared_sve2 PROPERTIES + COMPILE_FLAGS "-march=armv8-a+sve2" + POSITION_INDEPENDENT_CODE TRUE + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} sve2 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + endif (BUILD_SVE2) + add_library(hs_exec_common_shared OBJECT + ${hs_exec_common_SRCS} + src/dispatcher.c ) - add_library(hs_exec_shared_corei7 OBJECT ${hs_exec_SRCS}) - list(APPEND RUNTIME_SHLIBS $) - set_target_properties(hs_exec_shared_corei7 PROPERTIES - COMPILE_FLAGS "-march=corei7 -msse4.2" - POSITION_INDEPENDENT_CODE TRUE - RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} corei7 ${CMAKE_MODULE_PATH}/keep.syms.in" - ) - - if (BUILD_AVX2) - add_library(hs_exec_shared_avx2 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) - list(APPEND RUNTIME_SHLIBS $) - set_target_properties(hs_exec_shared_avx2 PROPERTIES - COMPILE_FLAGS "-march=core-avx2 -mavx2" - POSITION_INDEPENDENT_CODE TRUE - RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx2 ${CMAKE_MODULE_PATH}/keep.syms.in" - ) - endif (BUILD_AVX2) - if (BUILD_AVX512) - add_library(hs_exec_shared_avx512 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) - list(APPEND RUNTIME_SHLIBS $) - set_target_properties(hs_exec_shared_avx512 PROPERTIES - COMPILE_FLAGS "${SKYLAKE_FLAG}" - POSITION_INDEPENDENT_CODE TRUE - RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx512 ${CMAKE_MODULE_PATH}/keep.syms.in" - ) - endif (BUILD_AVX512) - if (BUILD_AVX512VBMI) - add_library(hs_exec_shared_avx512vbmi OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) - list(APPEND RUNTIME_SHLIBS $) - set_target_properties(hs_exec_shared_avx512vbmi PROPERTIES - COMPILE_FLAGS "${ICELAKE_FLAG}" - POSITION_INDEPENDENT_CODE TRUE - RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx512vbmi ${CMAKE_MODULE_PATH}/keep.syms.in" - ) - endif (BUILD_AVX512VBMI) - add_library(hs_exec_common_shared OBJECT - ${hs_exec_common_SRCS} - src/dispatcher.c - ) - set_target_properties(hs_exec_common_shared PROPERTIES - OUTPUT_NAME hs_exec_common - POSITION_INDEPENDENT_CODE TRUE) - endif() # SHARED - - + set_target_properties(hs_exec_common_shared PROPERTIES + OUTPUT_NAME hs_exec_common + POSITION_INDEPENDENT_CODE TRUE) + endif() # SHARED + endif (ARCH_AARCH64) endif (NOT FAT_RUNTIME) if (NOT BUILD_SHARED_LIBS) diff --git a/cmake/arch.cmake b/cmake/arch.cmake index 29c39b49..6dd183b9 100644 --- a/cmake/arch.cmake +++ b/cmake/arch.cmake @@ -8,7 +8,6 @@ elseif (HAVE_C_INTRIN_H) set (INTRIN_INC_H "intrin.h") elseif (HAVE_C_ARM_NEON_H) set (INTRIN_INC_H "arm_neon.h") - set (FAT_RUNTIME OFF) elseif (HAVE_C_PPC64EL_ALTIVEC_H) set (INTRIN_INC_H "altivec.h") set (FAT_RUNTIME OFF) @@ -77,21 +76,30 @@ if (BUILD_AVX512VBMI) endif () if (FAT_RUNTIME) - if (NOT DEFINED(BUILD_AVX2)) - set(BUILD_AVX2 TRUE) - endif () - # test the highest level microarch to make sure everything works - if (BUILD_AVX512) - if (BUILD_AVX512VBMI) - set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} ${ICELAKE_FLAG}") - else () - set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} ${SKYLAKE_FLAG}") - endif (BUILD_AVX512VBMI) - elseif (BUILD_AVX2) - set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} -march=core-avx2 -mavx2") - elseif () - set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} -march=core-i7 -mssse3") - endif () + if (ARCH_IA32 OR ARCH_X86_64) + if (NOT DEFINED(BUILD_AVX2)) + set(BUILD_AVX2 TRUE) + endif () + # test the highest level microarch to make sure everything works + if (BUILD_AVX512) + if (BUILD_AVX512VBMI) + set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} ${ICELAKE_FLAG}") + else () + set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} ${SKYLAKE_FLAG}") + endif (BUILD_AVX512VBMI) + elseif (BUILD_AVX2) + set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} -march=core-avx2 -mavx2") + elseif () + set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} -march=core-i7 -mssse3") + endif () + elseif(ARCH_AARCH64) + if (NOT DEFINED(BUILD_SVE)) + set(BUILD_SVE TRUE) + endif () + if (NOT DEFINED(BUILD_SVE2)) + set(BUILD_SVE2 TRUE) + endif () + endif() else (NOT FAT_RUNTIME) # if not fat runtime, then test given cflags set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} ${ARCH_C_FLAGS}") diff --git a/src/dispatcher.c b/src/dispatcher.c index f5f2d2c6..775002f6 100644 --- a/src/dispatcher.c +++ b/src/dispatcher.c @@ -32,7 +32,6 @@ #include "ue2common.h" #if defined(ARCH_IA32) || defined(ARCH_X86_64) #include "util/arch/x86/cpuid_inline.h" -#endif #include "util/join.h" #if defined(DISABLE_AVX512_DISPATCH) @@ -83,6 +82,41 @@ HS_PUBLIC_API \ RTYPE NAME(__VA_ARGS__) __attribute__((ifunc("resolve_" #NAME))) +#elif defined(ARCH_AARCH64) +#include "util/arch/arm/cpuid_inline.h" +#include "util/join.h" + +#define CREATE_DISPATCH(RTYPE, NAME, ...) \ + /* create defns */ \ + RTYPE JOIN(sve2_, NAME)(__VA_ARGS__); \ + RTYPE JOIN(sve_, NAME)(__VA_ARGS__); \ + RTYPE JOIN(neon_, NAME)(__VA_ARGS__); \ + \ + /* error func */ \ + static inline RTYPE JOIN(error_, NAME)(__VA_ARGS__) { \ + return (RTYPE)HS_ARCH_ERROR; \ + } \ + \ + /* resolver */ \ + static RTYPE (*JOIN(resolve_, NAME)(void))(__VA_ARGS__) { \ + if (check_sve2()) { \ + return JOIN(sve2_, NAME); \ + } \ + if (check_sve()) { \ + return JOIN(sve_, NAME); \ + } \ + if (check_neon()) { \ + return JOIN(neon_, NAME); \ + } \ + /* anything else is fail */ \ + return JOIN(error_, NAME); \ + } \ + \ + /* function */ \ + HS_PUBLIC_API \ + RTYPE NAME(__VA_ARGS__) __attribute__((ifunc("resolve_" #NAME))) + +#endif CREATE_DISPATCH(hs_error_t, hs_scan, const hs_database_t *db, const char *data, unsigned length, unsigned flags, hs_scratch_t *scratch, match_event_handler onEvent, void *userCtx); diff --git a/src/hs.cpp b/src/hs.cpp index 73cc032f..61e46148 100644 --- a/src/hs.cpp +++ b/src/hs.cpp @@ -199,11 +199,13 @@ hs_compile_multi_int(const char *const *expressions, const unsigned *flags, } #if defined(FAT_RUNTIME) +#if defined(ARCH_IA32) || defined(ARCH_X86_64) if (!check_ssse3()) { *db = nullptr; *comp_error = generateCompileError("Unsupported architecture", -1); return HS_ARCH_ERROR; } +#endif #endif if (!checkMode(mode, comp_error)) { @@ -320,13 +322,14 @@ hs_compile_lit_multi_int(const char *const *expressions, const unsigned *flags, *comp_error = generateCompileError("Invalid parameter: elements is zero", -1); return HS_COMPILER_ERROR; } - #if defined(FAT_RUNTIME) +#if defined(ARCH_IA32) || defined(ARCH_X86_64) if (!check_ssse3()) { *db = nullptr; *comp_error = generateCompileError("Unsupported architecture", -1); return HS_ARCH_ERROR; } +#endif #endif if (!checkMode(mode, comp_error)) { @@ -500,10 +503,12 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags, } #if defined(FAT_RUNTIME) +#if defined(ARCH_IA32) || defined(ARCH_X86_64) if (!check_ssse3()) { *error = generateCompileError("Unsupported architecture", -1); return HS_ARCH_ERROR; } +#endif #endif if (!info) { @@ -631,9 +636,11 @@ hs_error_t HS_CDECL hs_populate_platform(hs_platform_info_t *platform) { extern "C" HS_PUBLIC_API hs_error_t HS_CDECL hs_free_compile_error(hs_compile_error_t *error) { #if defined(FAT_RUNTIME) +#if defined(ARCH_IA32) || defined(ARCH_X86_64) if (!check_ssse3()) { return HS_ARCH_ERROR; } +#endif #endif freeCompileError(error); return HS_SUCCESS; diff --git a/src/hs_valid_platform.c b/src/hs_valid_platform.c index 809deee1..0af36b6c 100644 --- a/src/hs_valid_platform.c +++ b/src/hs_valid_platform.c @@ -31,6 +31,8 @@ #include "ue2common.h" #if defined(ARCH_IA32) || defined(ARCH_X86_64) #include "util/arch/x86/cpuid_inline.h" +#elif defined(ARCH_AARCH64) +#include "util/arch/arm/cpuid_inline.h" #endif HS_PUBLIC_API @@ -43,7 +45,11 @@ hs_error_t HS_CDECL hs_valid_platform(void) { return HS_ARCH_ERROR; } #elif defined(ARCH_ARM32) || defined(ARCH_AARCH64) - return HS_SUCCESS; + if (check_neon()) { + return HS_SUCCESS; + } else { + return HS_ARCH_ERROR; + } #elif defined(ARCH_PPC64EL) return HS_SUCCESS; #endif diff --git a/src/util/arch/arm/cpuid_inline.h b/src/util/arch/arm/cpuid_inline.h new file mode 100644 index 00000000..1173b42c --- /dev/null +++ b/src/util/arch/arm/cpuid_inline.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2017-2020, Intel Corporation + * Copyright (c) 2023, VectorCamp PC + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef AARCH64_CPUID_INLINE_H_ +#define AARCH64_CPUID_INLINE_H_ + +#include + +#include "ue2common.h" +#include "util/arch/common/cpuid_flags.h" + +static inline +int check_neon(void) { + return 1; +} + +static inline +int check_sve(void) { + unsigned long hwcap = getauxval(AT_HWCAP); + if (hwcap & HWCAP_SVE) { + return 1; + } + return 0; +} + +static inline +int check_sve2(void) { + unsigned long hwcap2 = getauxval(AT_HWCAP2); + if (hwcap2 & HWCAP2_SVE2) { + return 1; + } + return 0; +} + +#endif // AARCH64_CPUID_INLINE_H_ From 0ec7b4e77b7a2273099e3dffcf6eaa71de25dbbc Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Wed, 23 Aug 2023 10:21:02 +0000 Subject: [PATCH 05/25] fix SVE flags detection order #145 --- CMakeLists.txt | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index bc4c9846..43ce320b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -187,11 +187,15 @@ if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE) string(FIND "${GNUCC_ARCH}" "sve" POS_SVE) string(FIND "${GNUCC_ARCH}" "sve2" POS_SVE2) string(FIND "${GNUCC_ARCH}" "sve2-bitperm" POS_SVE2_BITPERM) - if (NOT POS_SVE EQUAL 0) + if(NOT POS_SVE2_BITPERM EQUAL 0) + set(SVE2_BITPERM_FOUND 1) + set(SVE2_FOUND 1) set(SVE_FOUND 1) elseif(NOT POS_SVE2 EQUAL 0) set(SVE2_FOUND 1) - elseif(NOT POS_SVE2_BITPERM EQUAL 0) + set(SVE_FOUND 1) + elseif (NOT POS_SVE EQUAL 0) + set(SVE_FOUND 1) set(SVE2_BITPERM_FOUND 1) endif() From 4bc70b37a72c1302dbd5344a4d4ebfb9b36bf0e2 Mon Sep 17 00:00:00 2001 From: jplaisance Date: Tue, 27 Jun 2023 09:47:14 -0500 Subject: [PATCH 06/25] adding ifndef around HS_PUBLIC_API definition so that vectorscan can be statically linked into another shared library without exporting symbols --- src/ue2common.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/ue2common.h b/src/ue2common.h index 6fe2d060..b8300dc7 100644 --- a/src/ue2common.h +++ b/src/ue2common.h @@ -73,7 +73,9 @@ typedef u32 ReportID; /* Shorthand for attribute to mark a function as part of our public API. * Functions without this attribute will be hidden. */ +#ifndef HS_PUBLIC_API #define HS_PUBLIC_API __attribute__((visibility("default"))) +#endif #define ARRAY_LENGTH(a) (sizeof(a)/sizeof((a)[0])) From 978105a4c0852d3983782cb2dba0220e13275f08 Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Tue, 31 May 2022 06:30:18 +0000 Subject: [PATCH 07/25] klocwork: fix risk issues --- src/nfa/goughcompile.cpp | 8 ++++++++ src/nfa/repeatcompile.cpp | 4 ++++ src/nfagraph/ng_som.cpp | 9 +++++++++ src/nfagraph/ng_violet.cpp | 17 +++++++++++++++++ src/parser/logical_combination.cpp | 3 ++- src/rose/rose_build_convert.cpp | 4 ++++ src/smallwrite/smallwrite_build.cpp | 2 +- src/util/graph_undirected.h | 4 ++-- src/util/ue2string.h | 2 +- tools/hsbench/data_corpus.cpp | 5 ++++- tools/hsbench/main.cpp | 5 +++++ tools/hscollider/DatabaseProxy.h | 2 +- tools/hscollider/NfaGeneratedCorpora.cpp | 2 +- tools/hscollider/Thread.cpp | 2 +- util/ng_corpus_properties.cpp | 2 +- 15 files changed, 61 insertions(+), 10 deletions(-) diff --git a/src/nfa/goughcompile.cpp b/src/nfa/goughcompile.cpp index 3bf729b3..5d5c37df 100644 --- a/src/nfa/goughcompile.cpp +++ b/src/nfa/goughcompile.cpp @@ -206,6 +206,10 @@ void makeCFG_top_edge(GoughGraph &cfg, const vector &vertices, assert(contains(src_slots, slot_id)); shared_ptr vmin = make_shared(); + if (!vmin) { + assert(0); + throw std::bad_alloc(); + } cfg[e].vars.emplace_back(vmin); final_var = vmin.get(); @@ -317,6 +321,10 @@ void makeCFG_edge(GoughGraph &cfg, const map &som_creators, DEBUG_PRINTF("bypassing min on join %u\n", slot_id); } else { shared_ptr vmin = make_shared(); + if (!vmin) { + assert(0); + throw std::bad_alloc(); + } cfg[e].vars.emplace_back(vmin); final_var = vmin.get(); diff --git a/src/nfa/repeatcompile.cpp b/src/nfa/repeatcompile.cpp index 73763001..60b51352 100644 --- a/src/nfa/repeatcompile.cpp +++ b/src/nfa/repeatcompile.cpp @@ -124,6 +124,10 @@ RepeatStateInfo::RepeatStateInfo(enum RepeatType type, const depth &repeatMin, const depth &repeatMax, u32 minPeriod) : stateSize(0), packedCtrlSize(0), horizon(0), patchCount(0), patchSize(0), encodingSize(0), patchesOffset(0) { + if (type == REPEAT_SPARSE_OPTIMAL_P && minPeriod == 0) { + assert(0); + throw std::domain_error("SPARSE_OPTIMAL_P must have non-zero minPeriod."); + } assert(repeatMin <= repeatMax); assert(repeatMax.is_reachable()); assert(minPeriod || type != REPEAT_SPARSE_OPTIMAL_P); diff --git a/src/nfagraph/ng_som.cpp b/src/nfagraph/ng_som.cpp index 10d93fb8..3077ee9d 100644 --- a/src/nfagraph/ng_som.cpp +++ b/src/nfagraph/ng_som.cpp @@ -2445,6 +2445,10 @@ static bool doLitHaigSom(NG &ng, NGHolder &g, som_type som) { ue2_literal lit; shared_ptr rhs = make_shared(); + if (!rhs) { + assert(0); + throw std::bad_alloc(); + } if (!ng.cc.grey.allowLitHaig) { return false; } @@ -2509,6 +2513,11 @@ bool doHaigLitHaigSom(NG &ng, NGHolder &g, ue2_literal lit; shared_ptr rhs = make_shared(); shared_ptr lhs = make_shared(); + if (!rhs || !lhs) { + assert(0); + throw std::bad_alloc(); + } + if (!splitOffBestLiteral(g, regions, &lit, &*lhs, &*rhs, ng.cc)) { return false; } diff --git a/src/nfagraph/ng_violet.cpp b/src/nfagraph/ng_violet.cpp index 4a5b492c..3e644460 100644 --- a/src/nfagraph/ng_violet.cpp +++ b/src/nfagraph/ng_violet.cpp @@ -1036,6 +1036,11 @@ bool splitRoseEdge(const NGHolder &base_graph, RoseInGraph &vg, shared_ptr lhs = make_shared(); shared_ptr rhs = make_shared(); + if (!lhs || !rhs) { + assert(0); + throw std::bad_alloc(); + } + unordered_map lhs_map; unordered_map rhs_map; @@ -1229,6 +1234,10 @@ void splitEdgesByCut(NGHolder &h, RoseInGraph &vg, DEBUG_PRINTF("splitting on pivot %zu\n", h[pivot].index); unordered_map temp_map; shared_ptr new_lhs = make_shared(); + if (!new_lhs) { + assert(0); + throw std::bad_alloc(); + } splitLHS(h, pivot, new_lhs.get(), &temp_map); /* want to cut off paths to pivot from things other than the pivot - @@ -1310,6 +1319,10 @@ void splitEdgesByCut(NGHolder &h, RoseInGraph &vg, if (!contains(done_rhs, adj)) { unordered_map temp_map; shared_ptr new_rhs = make_shared(); + if (!new_rhs) { + assert(0); + throw std::bad_alloc(); + } splitRHS(h, adj, new_rhs.get(), &temp_map); remove_edge(new_rhs->start, new_rhs->accept, *new_rhs); remove_edge(new_rhs->start, new_rhs->acceptEod, *new_rhs); @@ -2281,6 +2294,10 @@ void splitEdgesForSuffix(const NGHolder &base_graph, RoseInGraph &vg, assert(!splitters.empty()); shared_ptr lhs = make_shared(); + if (!lhs) { + assert(0); + throw bad_alloc(); + } unordered_map v_map; cloneHolder(*lhs, base_graph, &v_map); lhs->kind = NFA_INFIX; diff --git a/src/parser/logical_combination.cpp b/src/parser/logical_combination.cpp index adf06bc4..b75ca34f 100644 --- a/src/parser/logical_combination.cpp +++ b/src/parser/logical_combination.cpp @@ -140,7 +140,8 @@ void ParsedLogical::validateSubIDs(const unsigned *ids, } hs_compile_error_t *compile_err = NULL; hs_expr_info_t *info = NULL; - hs_error_t err = hs_expression_info(expressions[i], flags[i], &info, + hs_error_t err = hs_expression_info(expressions[i], + flags ? flags[i] : 0, &info, &compile_err); if (err != HS_SUCCESS) { hs_free_compile_error(compile_err); diff --git a/src/rose/rose_build_convert.cpp b/src/rose/rose_build_convert.cpp index b8d0a09b..992311da 100644 --- a/src/rose/rose_build_convert.cpp +++ b/src/rose/rose_build_convert.cpp @@ -561,6 +561,10 @@ bool handleMixedPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v, DEBUG_PRINTF("woot?\n"); shared_ptr h_new = make_shared(); + if (!h_new) { + assert(0); + throw std::bad_alloc(); + } unordered_map rhs_map; vector exits_vec; insert(&exits_vec, exits_vec.end(), exits); diff --git a/src/smallwrite/smallwrite_build.cpp b/src/smallwrite/smallwrite_build.cpp index 5dad4704..e1d2f1f3 100644 --- a/src/smallwrite/smallwrite_build.cpp +++ b/src/smallwrite/smallwrite_build.cpp @@ -77,7 +77,7 @@ namespace ue2 { struct LitTrieVertexProps { LitTrieVertexProps() = default; explicit LitTrieVertexProps(u8 c_in) : c(c_in) {} - size_t index; // managed by ue2_graph + size_t index = 0; // managed by ue2_graph u8 c = 0; //!< character reached on this vertex flat_set reports; //!< managed reports fired on this vertex }; diff --git a/src/util/graph_undirected.h b/src/util/graph_undirected.h index 049964ab..50717284 100644 --- a/src/util/graph_undirected.h +++ b/src/util/graph_undirected.h @@ -70,8 +70,8 @@ class undirected_graph_edge_descriptor using base_vertex_type = typename base_graph_traits::vertex_descriptor; base_edge_type underlying_edge; - const base_graph_type *g; - bool reverse; // if true, reverse vertices in source() and target() + const base_graph_type *g = nullptr; + bool reverse = false; // if true, reverse vertices in source() and target() inline std::pair canonical_edge() const { diff --git a/src/util/ue2string.h b/src/util/ue2string.h index 0aa84689..f436936d 100644 --- a/src/util/ue2string.h +++ b/src/util/ue2string.h @@ -133,7 +133,7 @@ public: : lit(&lit_in), idx(idx_in) {} const ue2_literal *lit = nullptr; - size_t idx; + size_t idx = 0; }; using const_reverse_iterator = std::reverse_iterator; diff --git a/tools/hsbench/data_corpus.cpp b/tools/hsbench/data_corpus.cpp index 8e761ec3..b23da1fb 100644 --- a/tools/hsbench/data_corpus.cpp +++ b/tools/hsbench/data_corpus.cpp @@ -58,7 +58,10 @@ void readRow(sqlite3_stmt *statement, vector &blocks, } auto internal_stream_index = stream_indices[stream_id]; - assert(blob || bytes > 0); + if (!(blob && bytes > 0)) { + assert(0); + throw std::domain_error("Invalid blob or bytes from sqlite3."); + } blocks.emplace_back(id, stream_id, internal_stream_index, string(blob, blob + bytes)); } diff --git a/tools/hsbench/main.cpp b/tools/hsbench/main.cpp index c5a6221b..6d091d38 100644 --- a/tools/hsbench/main.cpp +++ b/tools/hsbench/main.cpp @@ -740,6 +740,11 @@ u64a byte_size(const vector &corpus_blocks) { total += block.payload.size(); } + if (total == 0) { + assert(0); + throw std::invalid_argument("Empty corpus."); + } + return total; } diff --git a/tools/hscollider/DatabaseProxy.h b/tools/hscollider/DatabaseProxy.h index 831ab148..f6957d29 100644 --- a/tools/hscollider/DatabaseProxy.h +++ b/tools/hscollider/DatabaseProxy.h @@ -61,7 +61,7 @@ public: std::lock_guard lock(mutex); if (failed) { // We have previously failed to compile this database. - return nullptr; + throw CompileFailed("Unable to compile db previously."); } if (db) { return db; diff --git a/tools/hscollider/NfaGeneratedCorpora.cpp b/tools/hscollider/NfaGeneratedCorpora.cpp index 66ae270b..4de320e1 100644 --- a/tools/hscollider/NfaGeneratedCorpora.cpp +++ b/tools/hscollider/NfaGeneratedCorpora.cpp @@ -101,7 +101,7 @@ void NfaGeneratedCorpora::generate(unsigned id, vector &data) { pl.logicalKeyRenumber(); const auto &m_lkey = pl.getLkeyMap(); assert(!m_lkey.empty()); - u32 a_subid; // arbitrary sub id + u32 a_subid = 0; // arbitrary sub id unordered_map> m_data; for (const auto &it : m_lkey) { a_subid = it.first; diff --git a/tools/hscollider/Thread.cpp b/tools/hscollider/Thread.cpp index 5fff8239..c63793d9 100644 --- a/tools/hscollider/Thread.cpp +++ b/tools/hscollider/Thread.cpp @@ -98,6 +98,6 @@ void *Thread::runThread(void *thr) { } -Thread::Thread(size_t num) : thread_id(num) {} +Thread::Thread(size_t num) : thread_id(num), thread() {} Thread::~Thread() {} diff --git a/util/ng_corpus_properties.cpp b/util/ng_corpus_properties.cpp index e784e058..511ad60a 100644 --- a/util/ng_corpus_properties.cpp +++ b/util/ng_corpus_properties.cpp @@ -42,7 +42,7 @@ CorpusProperties::CorpusProperties() : matchness(100), unmatchness(0), randomness(0), prefixRange(0, 0), suffixRange(0, 0), cycleMin(1), cycleMax(1), corpusLimit(DEFAULT_CORPUS_GENERATOR_LIMIT), editDistance(0), - alphabetSize(~0) { + alphabetSize(~0), rngSeed(0) { // empty } From 762f4050a0f6897b7b7f4337eb4354dd4fd9ed85 Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Tue, 5 Jul 2022 17:11:18 +0000 Subject: [PATCH 08/25] gcc-10(and above): fix compile issue caused by stringop-overflow --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 43ce320b..e08ae48d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -504,9 +504,9 @@ CHECK_CXX_COMPILER_FLAG("-Wunused-local-typedefs" CXX_UNUSED_LOCAL_TYPEDEFS) CHECK_CXX_COMPILER_FLAG("-Wunused-variable" CXX_WUNUSED_VARIABLE) # gcc 10 complains about this -CHECK_C_COMPILER_FLAG("-Wstringop-overflow" CC_STRINGOP_OVERFLOW) -if(CC_STRINGOP_OVERFLOW) +if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 10) set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-stringop-overflow") + set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-stringop-overflow") endif() include_directories(SYSTEM ${Boost_INCLUDE_DIRS}) From 7c1835c0e7c74272b3794afb3fad1eb40c49c98d Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Tue, 12 Jul 2022 08:42:05 +0000 Subject: [PATCH 09/25] stringop-overflow compatible fix --- CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e08ae48d..d53a7778 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -504,7 +504,9 @@ CHECK_CXX_COMPILER_FLAG("-Wunused-local-typedefs" CXX_UNUSED_LOCAL_TYPEDEFS) CHECK_CXX_COMPILER_FLAG("-Wunused-variable" CXX_WUNUSED_VARIABLE) # gcc 10 complains about this -if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 10) +CHECK_C_COMPILER_FLAG("-Wstringop-overflow" CC_STRINGOP_OVERFLOW) +CHECK_CXX_COMPILER_FLAG("-Wstringop-overflow" CXX_STRINGOP_OVERFLOW) +if(CC_STRINGOP_OVERFLOW OR CXX_STRINGOP_OVERFLOW) set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-stringop-overflow") set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-stringop-overflow") endif() From 684f0ce2cba0f0440457efddbd05dc17bc603dae Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Thu, 28 Jul 2022 21:24:31 +0000 Subject: [PATCH 10/25] UTF-8 validation: fix one cotec check corner issue fix github issue #362 --- src/parser/utf8_validate.cpp | 2 +- unit/internal/utf8_validate.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/parser/utf8_validate.cpp b/src/parser/utf8_validate.cpp index 50aa06d8..a4b74796 100644 --- a/src/parser/utf8_validate.cpp +++ b/src/parser/utf8_validate.cpp @@ -72,7 +72,7 @@ bool isValidUtf8(const char *expression, const size_t len) { while (i < len) { DEBUG_PRINTF("byte %zu: 0x%02x\n", i, s[i]); // One octet. - if (s[i] < 0x7f) { + if (s[i] <= 0x7f) { DEBUG_PRINTF("one octet\n"); i++; continue; diff --git a/unit/internal/utf8_validate.cpp b/unit/internal/utf8_validate.cpp index 03357942..f69ee857 100644 --- a/unit/internal/utf8_validate.cpp +++ b/unit/internal/utf8_validate.cpp @@ -64,8 +64,8 @@ static ValidUtf8TestInfo valid_utf8_tests[] = { {"공동경비구역", true}, {"জলসাঘর", true}, - // Invalid one-byte caseS. - {"\x7f", false}, + // Valid one-byte caseS. + {"\x7f", true}, // \x7f is valid // These bytes should never appear in a UTF-8 stream. {"\xc0", false}, From b7ee9102ee915d0cfa5b606884b8d0da63edf49d Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Mon, 1 Aug 2022 17:13:25 +0000 Subject: [PATCH 11/25] update year 2022 --- chimera/ch_runtime.c | 2 +- src/parser/utf8_validate.cpp | 2 +- unit/internal/utf8_validate.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/chimera/ch_runtime.c b/chimera/ch_runtime.c index 1009036b..af7d1f08 100644 --- a/chimera/ch_runtime.c +++ b/chimera/ch_runtime.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, Intel Corporation + * Copyright (c) 2018-2022, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: diff --git a/src/parser/utf8_validate.cpp b/src/parser/utf8_validate.cpp index a4b74796..54c9755e 100644 --- a/src/parser/utf8_validate.cpp +++ b/src/parser/utf8_validate.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2022, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: diff --git a/unit/internal/utf8_validate.cpp b/unit/internal/utf8_validate.cpp index f69ee857..03f52903 100644 --- a/unit/internal/utf8_validate.cpp +++ b/unit/internal/utf8_validate.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2022, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: From fc5a423c7e547765f06d4c10f8a23cd773d6c033 Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Tue, 2 Aug 2022 19:25:27 +0000 Subject: [PATCH 12/25] Fix cmake CMP0115 warning for CMake 3.20 and above --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d53a7778..577cab2e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -628,7 +628,7 @@ endif () set (hs_exec_SRCS ${hs_HEADERS} - src/hs_version.h + src/hs_version.h.in src/ue2common.h src/allocator.h src/crc32.c @@ -804,7 +804,7 @@ SET (hs_compile_SRCS src/grey.h src/hs.cpp src/hs_internal.h - src/hs_version.h + src/hs_version.h.in src/scratch.h src/state.h src/ue2common.h From 941cc7144bc37315407a8ba556bab46e2df1b73a Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Mon, 15 Aug 2022 03:00:22 +0000 Subject: [PATCH 13/25] Silence clang-14 warnings --- CMakeLists.txt | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 577cab2e..096b609c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -474,6 +474,18 @@ if (CXX_UNUSED_CONST_VAR) set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-unused-const-variable") endif() +# clang-14 complains about unused-but-set variable. +CHECK_CXX_COMPILER_FLAG("-Wunused-but-set-variable" CXX_UNUSED_BUT_SET_VAR) +if (CXX_UNUSED_BUT_SET_VAR) + set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-unused-but-set-variable") +endif() + +# clang-14 complains about using bitwise operator instead of logical ones. +CHECK_CXX_COMPILER_FLAG("-Wbitwise-instead-of-logical" CXX_BITWISE_INSTEAD_OF_LOGICAL) +if (CXX_BITWISE_INSTEAD_OF_LOGICAL) + set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-bitwise-instead-of-logical") +endif() + # gcc 6 complains about type attributes that get ignored, like alignment CHECK_CXX_COMPILER_FLAG("-Wignored-attributes" CXX_IGNORED_ATTR) if (CXX_IGNORED_ATTR) From 659525480c1093f217e1a3055705caf391c50e7c Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Wed, 19 Oct 2022 16:50:02 +0000 Subject: [PATCH 14/25] stream close: free stream to avoid memory leak fix github issue #303 --- src/runtime.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/runtime.c b/src/runtime.c index a3659348..ab46db1a 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -1013,6 +1013,7 @@ hs_error_t HS_CDECL hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch, report_eod_matches(id, scratch, onEvent, context); if (unlikely(internal_matching_error(scratch))) { unmarkScratchInUse(scratch); + hs_stream_free(id); return HS_UNKNOWN_ERROR; } unmarkScratchInUse(scratch); From 7f2f7d2a1ee83586e86e6715b8e78f0b34635d8b Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Thu, 20 Oct 2022 08:48:46 +0000 Subject: [PATCH 15/25] scratch: add quick validity check fix github issue #350 --- src/runtime.c | 39 +++++++++++++++++--------------------- src/scratch.c | 4 +++- src/scratch.h | 3 ++- src/state.h | 5 ++++- src/stream_compress_impl.h | 3 ++- 5 files changed, 28 insertions(+), 26 deletions(-) diff --git a/src/runtime.c b/src/runtime.c index ab46db1a..3c2d6533 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2019, Intel Corporation + * Copyright (c) 2015-2022, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -90,7 +90,7 @@ u8 *getHistory(char *state, const struct RoseEngine *t, u64a offset) { * callers. */ static really_inline -char validScratch(const struct RoseEngine *t, const struct hs_scratch *s) { +char validScratch(const struct hs_scratch *s, u32 crc) { if (!ISALIGNED_CL(s)) { DEBUG_PRINTF("bad alignment %p\n", s); return 0; @@ -101,18 +101,12 @@ char validScratch(const struct RoseEngine *t, const struct hs_scratch *s) { return 0; } - if (t->mode == HS_MODE_BLOCK && t->stateOffsets.end > s->bStateSize) { - DEBUG_PRINTF("bad state size\n"); + /* add quick rose sanity checks by db crc*/ + if (s->db_crc != crc) { + DEBUG_PRINTF("Improper scratch for current db\n"); return 0; } - if (t->queueCount > s->queueCount) { - DEBUG_PRINTF("bad queue count\n"); - return 0; - } - - /* TODO: add quick rose sanity checks */ - return 1; } @@ -335,7 +329,7 @@ hs_error_t HS_CDECL hs_scan(const hs_database_t *db, const char *data, return HS_DB_MODE_ERROR; } - if (unlikely(!validScratch(rose, scratch))) { + if (unlikely(!validScratch(scratch, db->crc32))) { return HS_INVALID; } @@ -509,7 +503,7 @@ void maintainHistoryBuffer(const struct RoseEngine *rose, char *state, static really_inline void init_stream(struct hs_stream *s, const struct RoseEngine *rose, - char init_history) { + char init_history, u32 crc) { char *state = getMultiState(s); if (init_history) { @@ -524,6 +518,7 @@ void init_stream(struct hs_stream *s, const struct RoseEngine *rose, s->rose = rose; s->offset = 0; + s->crc32 = crc; setStreamStatus(state, 0); roseInitState(rose, state); @@ -568,7 +563,7 @@ hs_error_t HS_CDECL hs_open_stream(const hs_database_t *db, return HS_NOMEM; } - init_stream(s, rose, 1); + init_stream(s, rose, 1, db->crc32); *stream = s; return HS_SUCCESS; @@ -756,7 +751,7 @@ hs_error_t HS_CDECL hs_reset_and_copy_stream(hs_stream_t *to_id, } if (onEvent) { - if (!scratch || !validScratch(to_id->rose, scratch)) { + if (!scratch || !validScratch(scratch, to_id->crc32)) { return HS_INVALID; } if (unlikely(markScratchInUse(scratch))) { @@ -982,7 +977,7 @@ hs_error_t HS_CDECL hs_scan_stream(hs_stream_t *id, const char *data, hs_scratch_t *scratch, match_event_handler onEvent, void *context) { if (unlikely(!id || !scratch || !data || - !validScratch(id->rose, scratch))) { + !validScratch(scratch, id->crc32))) { return HS_INVALID; } @@ -1004,7 +999,7 @@ hs_error_t HS_CDECL hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch, } if (onEvent) { - if (!scratch || !validScratch(id->rose, scratch)) { + if (!scratch || !validScratch(scratch, id->crc32)) { return HS_INVALID; } if (unlikely(markScratchInUse(scratch))) { @@ -1034,7 +1029,7 @@ hs_error_t HS_CDECL hs_reset_stream(hs_stream_t *id, UNUSED unsigned int flags, } if (onEvent) { - if (!scratch || !validScratch(id->rose, scratch)) { + if (!scratch || !validScratch(scratch, id->crc32)) { return HS_INVALID; } if (unlikely(markScratchInUse(scratch))) { @@ -1049,7 +1044,7 @@ hs_error_t HS_CDECL hs_reset_stream(hs_stream_t *id, UNUSED unsigned int flags, } // history already initialised - init_stream(id, id->rose, 0); + init_stream(id, id->rose, 0, id->crc32); return HS_SUCCESS; } @@ -1128,7 +1123,7 @@ hs_error_t HS_CDECL hs_scan_vector(const hs_database_t *db, return HS_DB_MODE_ERROR; } - if (unlikely(!validScratch(rose, scratch))) { + if (unlikely(!validScratch(scratch, db->crc32))) { return HS_INVALID; } @@ -1138,7 +1133,7 @@ hs_error_t HS_CDECL hs_scan_vector(const hs_database_t *db, hs_stream_t *id = (hs_stream_t *)(scratch->bstate); - init_stream(id, rose, 1); /* open stream */ + init_stream(id, rose, 1, db->crc32); /* open stream */ for (u32 i = 0; i < count; i++) { DEBUG_PRINTF("block %u/%u offset=%llu len=%u\n", i, count, id->offset, @@ -1253,7 +1248,7 @@ hs_error_t HS_CDECL hs_reset_and_expand_stream(hs_stream_t *to_stream, const struct RoseEngine *rose = to_stream->rose; if (onEvent) { - if (!scratch || !validScratch(to_stream->rose, scratch)) { + if (!scratch || !validScratch(scratch, to_stream->crc32)) { return HS_INVALID; } if (unlikely(markScratchInUse(scratch))) { diff --git a/src/scratch.c b/src/scratch.c index 25991e2b..5849380d 100644 --- a/src/scratch.c +++ b/src/scratch.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2019, Intel Corporation + * Copyright (c) 2015-2022, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -373,6 +373,7 @@ hs_error_t HS_CDECL hs_alloc_scratch(const hs_database_t *db, hs_scratch_free((*scratch)->scratch_alloc); } + proto->db_crc = db->crc32; hs_error_t alloc_ret = alloc_scratch(proto, scratch); hs_scratch_free(proto_tmp); /* kill off temp used for sizing */ if (alloc_ret != HS_SUCCESS) { @@ -380,6 +381,7 @@ hs_error_t HS_CDECL hs_alloc_scratch(const hs_database_t *db, return alloc_ret; } } else { + (*scratch)->db_crc = db->crc32; hs_scratch_free(proto_tmp); /* kill off temp used for sizing */ unmarkScratchInUse(*scratch); } diff --git a/src/scratch.h b/src/scratch.h index 1256f7ab..efaa6884 100644 --- a/src/scratch.h +++ b/src/scratch.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2019, Intel Corporation + * Copyright (c) 2015-2022, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -171,6 +171,7 @@ struct match_deduper { */ struct ALIGN_CL_DIRECTIVE hs_scratch { u32 magic; + u32 db_crc; /**< identity of a scratch space, for validity check */ u8 in_use; /**< non-zero when being used by an API call. */ u32 queueCount; u32 activeQueueArraySize; /**< size of active queue array fatbit in bytes */ diff --git a/src/state.h b/src/state.h index 9ade59db..567001ea 100644 --- a/src/state.h +++ b/src/state.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2022, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -57,6 +57,9 @@ struct hs_stream { /** \brief The current stream offset. */ u64a offset; + + /** \brief Identity of hs_stream, for scratch validity check. */ + u32 crc32; }; #define getMultiState(hs_s) ((char *)(hs_s) + sizeof(*(hs_s))) diff --git a/src/stream_compress_impl.h b/src/stream_compress_impl.h index d1ccf5e6..ceea14a6 100644 --- a/src/stream_compress_impl.h +++ b/src/stream_compress_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, Intel Corporation + * Copyright (c) 2017-2022, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -116,6 +116,7 @@ size_t JOIN(sc_, FN_SUFFIX)(const struct RoseEngine *rose, = ((STREAM_QUAL char *)stream) + sizeof(struct hs_stream); COPY_FIELD(stream->offset); + COPY_FIELD(stream->crc32); ASSIGN(stream->rose, rose); COPY(stream_body + ROSE_STATE_OFFSET_STATUS_FLAGS, 1); From 91f0cb6ceab7dbd714fcab7be726657b4a45d910 Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Thu, 20 Oct 2022 08:47:03 +0000 Subject: [PATCH 16/25] fix nfa dump error --- src/nfa/nfa_dump_dispatch.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/nfa/nfa_dump_dispatch.cpp b/src/nfa/nfa_dump_dispatch.cpp index bc8c175d..b498fd95 100644 --- a/src/nfa/nfa_dump_dispatch.cpp +++ b/src/nfa/nfa_dump_dispatch.cpp @@ -75,6 +75,7 @@ namespace ue2 { DISPATCH_CASE(LBR_NFA_VERM, LbrVerm, dbnt_func); \ DISPATCH_CASE(LBR_NFA_NVERM, LbrNVerm, dbnt_func); \ DISPATCH_CASE(LBR_NFA_SHUF, LbrShuf, dbnt_func); \ + DISPATCH_CASE(LBR_NFA_VSHUF, LbrVShuf, dbnt_func); \ DISPATCH_CASE(LBR_NFA_TRUF, LbrTruf, dbnt_func); \ DISPATCH_CASE(CASTLE_NFA, Castle, dbnt_func); \ DISPATCH_CASE(SHENG_NFA, Sheng, dbnt_func); \ From 6765b35d48d5134365a4dd25010d048eeb91d3cf Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Mon, 7 Jun 2021 15:35:57 +0800 Subject: [PATCH 17/25] bugfix: add vbmi platform parameter for tests in single.cpp --- unit/hyperscan/single.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/unit/hyperscan/single.cpp b/unit/hyperscan/single.cpp index 01fbfeab..07269cf0 100644 --- a/unit/hyperscan/single.cpp +++ b/unit/hyperscan/single.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2021, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -363,8 +363,9 @@ static const unsigned validModes[] = { // Mode bits for switching off various architecture features static const unsigned long long featureMask[] = { ~0ULL, /* native */ - ~(HS_CPU_FEATURES_AVX2 | HS_CPU_FEATURES_AVX512), /* no avx2 */ - ~HS_CPU_FEATURES_AVX512, /* no avx512 */ + ~(HS_CPU_FEATURES_AVX2 | HS_CPU_FEATURES_AVX512 | HS_CPU_FEATURES_AVX512VBMI), /* no avx2 */ + ~(HS_CPU_FEATURES_AVX512 | HS_CPU_FEATURES_AVX512VBMI), /* no avx512 */ + ~HS_CPU_FEATURES_AVX512VBMI, /* no avx512vbmi */ }; INSTANTIATE_TEST_CASE_P(Single, From 4fb3a48dfdb085426c0a3bb89fedcef1aa46d17e Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Mon, 7 Jun 2021 16:24:51 +0800 Subject: [PATCH 18/25] bugfix: add vbmi case for test in database.cpp --- unit/internal/database.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/unit/internal/database.cpp b/unit/internal/database.cpp index 8f0c1a69..0070fbc9 100644 --- a/unit/internal/database.cpp +++ b/unit/internal/database.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2021, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -56,6 +56,10 @@ TEST(DB, flagsToPlatform) { p.cpu_features |= HS_CPU_FEATURES_AVX512; #endif +#if defined(HAVE_AVX512VBMI) + p.cpu_features |= HS_CPU_FEATURES_AVX512VBMI; +#endif + platform_t pp = target_to_platform(target_t(p)); ASSERT_EQ(pp, hs_current_platform); } From dc78dc1633aaa087beb5885023180d90665e6caf Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Wed, 29 Dec 2021 22:30:18 +0000 Subject: [PATCH 19/25] sanitiser bugfix --- tools/hscollider/args.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/hscollider/args.cpp b/tools/hscollider/args.cpp index 54cea276..3fe48f93 100644 --- a/tools/hscollider/args.cpp +++ b/tools/hscollider/args.cpp @@ -499,8 +499,8 @@ void processArgs(int argc, char *argv[], CorpusProperties &corpus_gen_prop, } else if (in_corpora) { corpora->push_back(optarg); in_corpora = 2; - break; } + break; case 0: break; default: From ab4f837607af1ae3c771fbcea31fe0a65c0dda33 Mon Sep 17 00:00:00 2001 From: "Chang, Harry" Date: Tue, 21 Feb 2023 22:52:57 +0000 Subject: [PATCH 20/25] changelog: updates for 5.4.1 release --- CHANGELOG.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8de3a8d6..481f8fcf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,23 @@ This is a list of notable changes to Hyperscan, in reverse chronological order. +## [5.4.1] 2023-02-20 +- The Intel Hyperscan team is pleased to provide a bug fix release to our open source library. + Intel also maintains an upgraded version available through your Intel sales representative. +- Bugfix for issue #184: fix random char value of UTF-8. +- Bugfix for issue #291: bypass logical combination flag in hs_expression_info(). +- Bugfix for issue #292: fix build error due to libc symbol parsing. +- Bugfix for issue #302/304: add empty string check for pure literal API. +- Bugfix for issue #303: fix unknown instruction error in pure literal API. +- Bugfix for issue #303: avoid memory leak in stream close stage. +- Bugfix for issue #305: fix assertion failure in DFA construction. +- Bugfix for issue #317: fix aligned allocator segment faults. +- Bugfix for issue #350: add quick validity check for scratch. +- Bugfix for issue #359: fix glibc-2.34 stack size issue. +- Bugfix for issue #360: fix SKIP flag issue in chimera. +- Bugfix for issue #362: fix one cotec check corner issue in UTF-8 validation. +- Fix other compile issues. + ## [5.4.0] 2020-12-31 - Improvement on literal matcher "Fat Teddy" performance, including support for Intel(R) AVX-512 Vector Byte Manipulation Instructions (Intel(R) From c6523453d721b67c84b43504fd5eae5ee8ff8c6e Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Wed, 1 Mar 2023 14:42:27 +0000 Subject: [PATCH 21/25] scratch: remove quick validity check Roll back fix for github issue #350 About Scratch Usage: For compile time, scratch space is strongly recommended to be allocated immediately after database generation. For runtime, besides using scratch for corresponding database, Hyperscan also allows user to use larger scratch space allocated for another database. When multiple concurrent threads need to use the same databases and a new scratch space is required, cloning the largest one is always safe. This is realized based on API hs_scratch_size() and hs_clone_scratch(). Behaviors beyond above are discouraged and results are undefined. --- src/runtime.c | 37 +++++++++++++++++++++---------------- src/scratch.c | 4 +--- src/scratch.h | 3 +-- src/state.h | 5 +---- src/stream_compress_impl.h | 3 +-- 5 files changed, 25 insertions(+), 27 deletions(-) diff --git a/src/runtime.c b/src/runtime.c index 3c2d6533..a055e5f4 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -90,7 +90,7 @@ u8 *getHistory(char *state, const struct RoseEngine *t, u64a offset) { * callers. */ static really_inline -char validScratch(const struct hs_scratch *s, u32 crc) { +char validScratch(const struct RoseEngine *t, const struct hs_scratch *s) { if (!ISALIGNED_CL(s)) { DEBUG_PRINTF("bad alignment %p\n", s); return 0; @@ -101,12 +101,18 @@ char validScratch(const struct hs_scratch *s, u32 crc) { return 0; } - /* add quick rose sanity checks by db crc*/ - if (s->db_crc != crc) { - DEBUG_PRINTF("Improper scratch for current db\n"); + if (t->mode == HS_MODE_BLOCK && t->stateOffsets.end > s->bStateSize) { + DEBUG_PRINTF("bad state size\n"); return 0; } + if (t->queueCount > s->queueCount) { + DEBUG_PRINTF("bad queue count\n"); + return 0; + } + + /* TODO: add quick rose sanity checks */ + return 1; } @@ -329,7 +335,7 @@ hs_error_t HS_CDECL hs_scan(const hs_database_t *db, const char *data, return HS_DB_MODE_ERROR; } - if (unlikely(!validScratch(scratch, db->crc32))) { + if (unlikely(!validScratch(rose, scratch))) { return HS_INVALID; } @@ -503,7 +509,7 @@ void maintainHistoryBuffer(const struct RoseEngine *rose, char *state, static really_inline void init_stream(struct hs_stream *s, const struct RoseEngine *rose, - char init_history, u32 crc) { + char init_history) { char *state = getMultiState(s); if (init_history) { @@ -518,7 +524,6 @@ void init_stream(struct hs_stream *s, const struct RoseEngine *rose, s->rose = rose; s->offset = 0; - s->crc32 = crc; setStreamStatus(state, 0); roseInitState(rose, state); @@ -563,7 +568,7 @@ hs_error_t HS_CDECL hs_open_stream(const hs_database_t *db, return HS_NOMEM; } - init_stream(s, rose, 1, db->crc32); + init_stream(s, rose, 1); *stream = s; return HS_SUCCESS; @@ -751,7 +756,7 @@ hs_error_t HS_CDECL hs_reset_and_copy_stream(hs_stream_t *to_id, } if (onEvent) { - if (!scratch || !validScratch(scratch, to_id->crc32)) { + if (!scratch || !validScratch(to_id->rose, scratch)) { return HS_INVALID; } if (unlikely(markScratchInUse(scratch))) { @@ -977,7 +982,7 @@ hs_error_t HS_CDECL hs_scan_stream(hs_stream_t *id, const char *data, hs_scratch_t *scratch, match_event_handler onEvent, void *context) { if (unlikely(!id || !scratch || !data || - !validScratch(scratch, id->crc32))) { + !validScratch(id->rose, scratch))) { return HS_INVALID; } @@ -999,7 +1004,7 @@ hs_error_t HS_CDECL hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch, } if (onEvent) { - if (!scratch || !validScratch(scratch, id->crc32)) { + if (!scratch || !validScratch(id->rose, scratch)) { return HS_INVALID; } if (unlikely(markScratchInUse(scratch))) { @@ -1029,7 +1034,7 @@ hs_error_t HS_CDECL hs_reset_stream(hs_stream_t *id, UNUSED unsigned int flags, } if (onEvent) { - if (!scratch || !validScratch(scratch, id->crc32)) { + if (!scratch || !validScratch(id->rose, scratch)) { return HS_INVALID; } if (unlikely(markScratchInUse(scratch))) { @@ -1044,7 +1049,7 @@ hs_error_t HS_CDECL hs_reset_stream(hs_stream_t *id, UNUSED unsigned int flags, } // history already initialised - init_stream(id, id->rose, 0, id->crc32); + init_stream(id, id->rose, 0); return HS_SUCCESS; } @@ -1123,7 +1128,7 @@ hs_error_t HS_CDECL hs_scan_vector(const hs_database_t *db, return HS_DB_MODE_ERROR; } - if (unlikely(!validScratch(scratch, db->crc32))) { + if (unlikely(!validScratch(rose, scratch))) { return HS_INVALID; } @@ -1133,7 +1138,7 @@ hs_error_t HS_CDECL hs_scan_vector(const hs_database_t *db, hs_stream_t *id = (hs_stream_t *)(scratch->bstate); - init_stream(id, rose, 1, db->crc32); /* open stream */ + init_stream(id, rose, 1); /* open stream */ for (u32 i = 0; i < count; i++) { DEBUG_PRINTF("block %u/%u offset=%llu len=%u\n", i, count, id->offset, @@ -1248,7 +1253,7 @@ hs_error_t HS_CDECL hs_reset_and_expand_stream(hs_stream_t *to_stream, const struct RoseEngine *rose = to_stream->rose; if (onEvent) { - if (!scratch || !validScratch(scratch, to_stream->crc32)) { + if (!scratch || !validScratch(to_stream->rose, scratch)) { return HS_INVALID; } if (unlikely(markScratchInUse(scratch))) { diff --git a/src/scratch.c b/src/scratch.c index 5849380d..9f6d77cd 100644 --- a/src/scratch.c +++ b/src/scratch.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2022, Intel Corporation + * Copyright (c) 2015-2023, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -373,7 +373,6 @@ hs_error_t HS_CDECL hs_alloc_scratch(const hs_database_t *db, hs_scratch_free((*scratch)->scratch_alloc); } - proto->db_crc = db->crc32; hs_error_t alloc_ret = alloc_scratch(proto, scratch); hs_scratch_free(proto_tmp); /* kill off temp used for sizing */ if (alloc_ret != HS_SUCCESS) { @@ -381,7 +380,6 @@ hs_error_t HS_CDECL hs_alloc_scratch(const hs_database_t *db, return alloc_ret; } } else { - (*scratch)->db_crc = db->crc32; hs_scratch_free(proto_tmp); /* kill off temp used for sizing */ unmarkScratchInUse(*scratch); } diff --git a/src/scratch.h b/src/scratch.h index efaa6884..e3cd9245 100644 --- a/src/scratch.h +++ b/src/scratch.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2022, Intel Corporation + * Copyright (c) 2015-2023, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -171,7 +171,6 @@ struct match_deduper { */ struct ALIGN_CL_DIRECTIVE hs_scratch { u32 magic; - u32 db_crc; /**< identity of a scratch space, for validity check */ u8 in_use; /**< non-zero when being used by an API call. */ u32 queueCount; u32 activeQueueArraySize; /**< size of active queue array fatbit in bytes */ diff --git a/src/state.h b/src/state.h index 567001ea..68600a91 100644 --- a/src/state.h +++ b/src/state.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2022, Intel Corporation + * Copyright (c) 2015-2023, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -57,9 +57,6 @@ struct hs_stream { /** \brief The current stream offset. */ u64a offset; - - /** \brief Identity of hs_stream, for scratch validity check. */ - u32 crc32; }; #define getMultiState(hs_s) ((char *)(hs_s) + sizeof(*(hs_s))) diff --git a/src/stream_compress_impl.h b/src/stream_compress_impl.h index ceea14a6..f02543ef 100644 --- a/src/stream_compress_impl.h +++ b/src/stream_compress_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2022, Intel Corporation + * Copyright (c) 2017-2023, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -116,7 +116,6 @@ size_t JOIN(sc_, FN_SUFFIX)(const struct RoseEngine *rose, = ((STREAM_QUAL char *)stream) + sizeof(struct hs_stream); COPY_FIELD(stream->offset); - COPY_FIELD(stream->crc32); ASSIGN(stream->rose, rose); COPY(stream_body + ROSE_STATE_OFFSET_STATUS_FLAGS, 1); From 5209c7978a81da812f7befaa5366e001e73dc64e Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Thu, 23 Mar 2023 06:43:46 +0000 Subject: [PATCH 22/25] remove invalid nfa dump info --- src/nfa/nfa_dump_dispatch.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/nfa/nfa_dump_dispatch.cpp b/src/nfa/nfa_dump_dispatch.cpp index b498fd95..bc8c175d 100644 --- a/src/nfa/nfa_dump_dispatch.cpp +++ b/src/nfa/nfa_dump_dispatch.cpp @@ -75,7 +75,6 @@ namespace ue2 { DISPATCH_CASE(LBR_NFA_VERM, LbrVerm, dbnt_func); \ DISPATCH_CASE(LBR_NFA_NVERM, LbrNVerm, dbnt_func); \ DISPATCH_CASE(LBR_NFA_SHUF, LbrShuf, dbnt_func); \ - DISPATCH_CASE(LBR_NFA_VSHUF, LbrVShuf, dbnt_func); \ DISPATCH_CASE(LBR_NFA_TRUF, LbrTruf, dbnt_func); \ DISPATCH_CASE(CASTLE_NFA, Castle, dbnt_func); \ DISPATCH_CASE(SHENG_NFA, Sheng, dbnt_func); \ From 4344d2fce7574cc65235d012f3c5f6cfc7561f9f Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Wed, 19 Apr 2023 09:18:45 +0000 Subject: [PATCH 23/25] changelog: updates for 5.4.2 release --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 481f8fcf..09b4a95c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,14 @@ This is a list of notable changes to Hyperscan, in reverse chronological order. +## [5.4.2] 2023-04-19 +- Roll back bugfix for github issue #350: Besides using scratch for + corresponding database, Hyperscan also allows user to use larger scratch + allocated for another database. Users can leverage this property to achieve + safe scratch usage in multi-database scenarios. Behaviors beyond these are + discouraged and results are undefined. +- Fix hsdump issue due to invalid nfa type. + ## [5.4.1] 2023-02-20 - The Intel Hyperscan team is pleased to provide a bug fix release to our open source library. Intel also maintains an upgraded version available through your Intel sales representative. From 68346631c74ef0dc1b6616506b71163807f2f8b8 Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Thu, 7 Sep 2023 17:51:07 +0300 Subject: [PATCH 24/25] bump version, add Vectorscan Changelog --- CHANGELOG-vectorscan.md | 44 +++++++++++++++++++++++++++++++++++++++++ CMakeLists.txt | 2 +- 2 files changed, 45 insertions(+), 1 deletion(-) create mode 100644 CHANGELOG-vectorscan.md diff --git a/CHANGELOG-vectorscan.md b/CHANGELOG-vectorscan.md new file mode 100644 index 00000000..1771e410 --- /dev/null +++ b/CHANGELOG-vectorscan.md @@ -0,0 +1,44 @@ +# Hyperscan Change Log + +This is a list of notable changes to Hyperscan, in reverse chronological order. + +## [5.4.10] 2023-09-23 + + +## [5.4.9] 2023-03-23 +- Major change: Enable SVE & SVE2 builds and make it a supported architecture! (thanks to @abondarev84) +- Fix various clang-related bugs +- Fix Aarch64 bug in Parser.rl because of char signedness. Make unsigned char the default in the Parser for all architectures. +- Fix Power bug, multiple tests were failing. +- C++20 related change, use prefixed assume_aligned to avoid conflict with C++20 std::assume_aligned. + +## [5.4.8] 2022-09-13 +- CMake: Use non-deprecated method for finding python by @jth in #108 +- Optimize vectorscan for aarch64 by using shrn instruction by @danlark1 in #113 +- Fixed the PCRE download location by @pareenaverma in #116 +- Bugfix/hyperscan backport 202208 by @markos in #118 +- VSX optimizations by @markos in #119 +- when compiling with mingw64, use __mingw_aligned_malloc() and __mingw_aligned_free() by @liquidaty in #121 +- [NEON] simplify/optimize shift/align primitives by @markos in #123 +- Merge develop to master by @markos in #124 + +## [5.4.7] 2022-05-05 +- Fix word boundary assertions under C++20 by @BigRedEye in #90 +- Fix all ASAN issues in vectorscan by @danlark1 in #93 +- change FAT_RUNTIME to a normal option so it can be set to off by @a16bitsysop in #94 +- Optimized and correct version of movemask128 for ARM by @danlark1 in #102 + +## [5.4.6] 2022-01-21 +- Major refactoring of many engines to use internal SuperVector C++ templates library. Code size reduced to 1/3rd with no loss of performance in most cases. +- Microbenchmarking tool added for performance finetuning +- Arm Advanced SIMD/NEON fully ported. Initial work on SVE2 for a couple of engines. +- Power9 VSX ppc64le fully ported. Initial port needs some optimization. +- Clang compiler support added. +- Apple M1 support added. +- CI added, the following configurations are tested on every PR: + gcc-debug, gcc-release, clang-debug, clang-release: + Linux Intel: SSE4.2, AVX2, AVX512, FAT + Linux Arm + Linux Power9 + clang-debug, clang-release: + MacOS Apple M1 diff --git a/CMakeLists.txt b/CMakeLists.txt index 096b609c..6a54233a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ project (vectorscan C CXX) set (HS_MAJOR_VERSION 5) set (HS_MINOR_VERSION 4) -set (HS_PATCH_VERSION 9) +set (HS_PATCH_VERSION 10) set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION}) set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) From a344cd30f7f9b58e417aeb49a41c95ae948f5b5c Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Thu, 7 Sep 2023 17:53:25 +0300 Subject: [PATCH 25/25] minor fix --- CHANGELOG-vectorscan.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG-vectorscan.md b/CHANGELOG-vectorscan.md index 1771e410..26188583 100644 --- a/CHANGELOG-vectorscan.md +++ b/CHANGELOG-vectorscan.md @@ -1,6 +1,6 @@ -# Hyperscan Change Log +# Vectorscan Change Log -This is a list of notable changes to Hyperscan, in reverse chronological order. +This is a list of notable changes to Vectorscan, in reverse chronological order. For Hyperscan Changelog, check CHANGELOG.md ## [5.4.10] 2023-09-23