diff --git a/CMakeLists.txt b/CMakeLists.txt index 74997466..85f96b53 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required (VERSION 3.13.4) +cmake_minimum_required (VERSION 3.18.4) project (vectorscan C CXX) @@ -162,11 +162,6 @@ foreach (CONFIG ${CMAKE_BUILD_TYPE} ${CMAKE_CONFIGURATION_TYPES}) string(REGEX REPLACE "-O[^ ]*" "" CMAKE_CXX_FLAGS_${CONFIG} "${CMAKE_CXX_FLAGS_${CONFIG}}") endforeach () -if (ARCH_LOONGARCH64) - set(ARCH_C_FLAGS "-mlsx") - set(ARCH_CXX_FLAGS "-mlsx") -endif(ARCH_LOONGARCH64) - message(STATUS "ARCH_C_FLAGS : ${ARCH_C_FLAGS}") message(STATUS "ARCH_CXX_FLAGS : ${ARCH_CXX_FLAGS}") @@ -193,7 +188,7 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_CXX_FLAGS}") # PCRE check, we have a fixed requirement for PCRE to use Chimera # and hscollider set(PCRE_REQUIRED_MAJOR_VERSION 8) -set(PCRE_REQUIRED_MINOR_VERSION 39) +set(PCRE_REQUIRED_MINOR_VERSION 41) set(PCRE_REQUIRED_VERSION ${PCRE_REQUIRED_MAJOR_VERSION}.${PCRE_REQUIRED_MINOR_VERSION}) include (${CMAKE_MODULE_PATH}/pcre.cmake) if (NOT CORRECT_PCRE_VERSION) diff --git a/cmake/cflags-loongarch64.cmake b/cmake/cflags-loongarch64.cmake index 1af7312f..9c26c312 100644 --- a/cmake/cflags-loongarch64.cmake +++ b/cmake/cflags-loongarch64.cmake @@ -1,4 +1,3 @@ - CHECK_INCLUDE_FILE_CXX(lsxintrin.h HAVE_C_LOONGARCH64_LSXINTRIN_H) if (HAVE_C_LOONGARCH64_LSXINTRIN_H) @@ -7,7 +6,10 @@ else() message (FATAL_ERROR "No intrinsics header found for LSX") endif () -set(CMAKE_REQUIRED_FLAGS "-mlsx") +set(ARCH_C_FLAGS "-mlsx") +set(ARCH_CXX_FLAGS "-mlsx") + +set(CMAKE_REQUIRED_FLAGS "${ARCH_C_FLAGS}") CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}> int main() { __m128i a = __lsx_vreplgr2vr_w(1); diff --git a/src/util/arch/loongarch64/match.hpp b/src/util/arch/loongarch64/match.hpp index 78651edc..2b10c1a2 100644 --- a/src/util/arch/loongarch64/match.hpp +++ b/src/util/arch/loongarch64/match.hpp @@ -28,24 +28,6 @@ * POSSIBILITY OF SUCH DAMAGE. */ -static really_inline m128 vpmax_loongarch(v4u32 a, v4u32 b) { - u32 result[4]; - u32 tmp1 = __lsx_vpickve2gr_wu(a, 0); - u32 tmp2 = __lsx_vpickve2gr_wu(a, 1); - result[0] = (tmp1 >= tmp2) ? tmp1 : tmp2; - tmp1 = __lsx_vpickve2gr_wu(a, 2); - tmp2 = __lsx_vpickve2gr_wu(a, 3); - result[1] = (tmp1 >= tmp2) ? tmp1 : tmp2; - tmp1 = __lsx_vpickve2gr_wu(b, 0); - tmp2 = __lsx_vpickve2gr_wu(b, 1); - result[2] = (tmp1 >= tmp2) ? tmp1 : tmp2; - tmp1 = __lsx_vpickve2gr_wu(b, 2); - tmp2 = __lsx_vpickve2gr_wu(b, 3); - result[3] = (tmp1 >= tmp2) ? tmp1 : tmp2; - v4u32 res = __lsx_vld((uint32_t *)result, 0); - return res; -} - template <> really_really_inline const u8 *first_non_zero_match<16>(const u8 *buf, SuperVector<16> mask, u16 const UNUSED len) { diff --git a/src/util/arch/loongarch64/simd_utils.h b/src/util/arch/loongarch64/simd_utils.h index 9a207d36..58f5b387 100644 --- a/src/util/arch/loongarch64/simd_utils.h +++ b/src/util/arch/loongarch64/simd_utils.h @@ -45,6 +45,24 @@ #include // for memcpy +static really_inline m128 vpmax_loongarch(v4u32 a, v4u32 b) { + u32 result[4]; + u32 tmp1 = __lsx_vpickve2gr_wu(a, 0); + u32 tmp2 = __lsx_vpickve2gr_wu(a, 1); + result[0] = (tmp1 >= tmp2) ? tmp1 : tmp2; + tmp1 = __lsx_vpickve2gr_wu(a, 2); + tmp2 = __lsx_vpickve2gr_wu(a, 3); + result[1] = (tmp1 >= tmp2) ? tmp1 : tmp2; + tmp1 = __lsx_vpickve2gr_wu(b, 0); + tmp2 = __lsx_vpickve2gr_wu(b, 1); + result[2] = (tmp1 >= tmp2) ? tmp1 : tmp2; + tmp1 = __lsx_vpickve2gr_wu(b, 2); + tmp2 = __lsx_vpickve2gr_wu(b, 3); + result[3] = (tmp1 >= tmp2) ? tmp1 : tmp2; + v4u32 res = __lsx_vld((uint32_t *)result, 0); + return res; +} + static really_inline m128 ones128(void) { return __lsx_vreplgr2vr_b(0xFF); }