mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Merge pull request #131 from VectorCamp/develop
Prepare for new release 5.4.9
This commit is contained in:
commit
6d8599eece
@ -1,4 +1,5 @@
|
|||||||
cmake_minimum_required (VERSION 2.8.11)
|
cmake_minimum_required (VERSION 2.8.12)
|
||||||
|
|
||||||
project (vectorscan C CXX)
|
project (vectorscan C CXX)
|
||||||
|
|
||||||
set (HS_MAJOR_VERSION 5)
|
set (HS_MAJOR_VERSION 5)
|
||||||
@ -165,27 +166,49 @@ if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE)
|
|||||||
# cpuid info and then chooses the best microarch it can (and replaces
|
# cpuid info and then chooses the best microarch it can (and replaces
|
||||||
# the flag), so use that for tune.
|
# the flag), so use that for tune.
|
||||||
|
|
||||||
|
set(TUNE_FLAG "mtune")
|
||||||
|
set(GNUCC_TUNE "")
|
||||||
|
message(STATUS "ARCH_FLAG '${ARCH_FLAG}' '${GNUCC_ARCH}', TUNE_FLAG '${TUNE_FLAG}' '${GNUCC_TUNE}' ")
|
||||||
|
|
||||||
# arg1 might exist if using ccache
|
# arg1 might exist if using ccache
|
||||||
string (STRIP "${CMAKE_C_COMPILER_ARG1}" CC_ARG1)
|
string (STRIP "${CMAKE_C_COMPILER_ARG1}" CC_ARG1)
|
||||||
set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -${ARCH_FLAG}=native -mtune=native)
|
set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -${ARCH_FLAG}=native -${TUNE_FLAG}=native)
|
||||||
execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS}
|
execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS}
|
||||||
OUTPUT_VARIABLE _GCC_OUTPUT)
|
OUTPUT_VARIABLE _GCC_OUTPUT)
|
||||||
string(FIND "${_GCC_OUTPUT}" "${ARCH_FLAG}" POS)
|
set(_GCC_OUTPUT_TUNE ${_GCC_OUTPUT})
|
||||||
|
string(FIND "${_GCC_OUTPUT}" "${ARCH_FLAG}=" POS)
|
||||||
string(SUBSTRING "${_GCC_OUTPUT}" ${POS} -1 _GCC_OUTPUT)
|
string(SUBSTRING "${_GCC_OUTPUT}" ${POS} -1 _GCC_OUTPUT)
|
||||||
string(REGEX REPLACE "${ARCH_FLAG}=[ \t]*([^ \n]*)[ \n].*" "\\1" GNUCC_ARCH "${_GCC_OUTPUT}")
|
string(REGEX REPLACE "${ARCH_FLAG}=[ \t]*([^ \n]*)[ \n].*" "\\1" GNUCC_ARCH "${_GCC_OUTPUT}")
|
||||||
|
|
||||||
|
string(FIND "${_GCC_OUTPUT_TUNE}" "${TUNE_FLAG}=" POS_TUNE)
|
||||||
|
string(SUBSTRING "${_GCC_OUTPUT_TUNE}" ${POS_TUNE} -1 _GCC_OUTPUT_TUNE)
|
||||||
|
string(REGEX REPLACE "${TUNE_FLAG}=[ \t]*([^ \n]*)[ \n].*" "\\1" GNUCC_TUNE "${_GCC_OUTPUT_TUNE}")
|
||||||
|
|
||||||
|
string(FIND "${GNUCC_ARCH}" "sve" POS_SVE)
|
||||||
|
string(FIND "${GNUCC_ARCH}" "sve2" POS_SVE2)
|
||||||
|
string(FIND "${GNUCC_ARCH}" "sve2-bitperm" POS_SVE2_BITPERM)
|
||||||
|
if (NOT POS_SVE EQUAL 0)
|
||||||
|
set(SVE_FOUND 1)
|
||||||
|
elseif(NOT POS_SVE2 EQUAL 0)
|
||||||
|
set(SVE2_FOUND 1)
|
||||||
|
elseif(NOT POS_SVE2_BITPERM EQUAL 0)
|
||||||
|
set(SVE2_BITPERM_FOUND 1)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
message(STATUS "ARCH_FLAG '${ARCH_FLAG}' '${GNUCC_ARCH}', TUNE_FLAG '${TUNE_FLAG}' '${GNUCC_TUNE}' ")
|
||||||
|
|
||||||
# test the parsed flag
|
# test the parsed flag
|
||||||
set (EXEC_ARGS ${CC_ARG1} -E - -mtune=${GNUCC_ARCH})
|
set (EXEC_ARGS ${CC_ARG1} -E - -${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE})
|
||||||
execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS}
|
execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS}
|
||||||
OUTPUT_QUIET ERROR_QUIET
|
OUTPUT_QUIET ERROR_QUIET
|
||||||
INPUT_FILE /dev/null
|
INPUT_FILE /dev/null
|
||||||
RESULT_VARIABLE GNUCC_TUNE_TEST)
|
RESULT_VARIABLE GNUCC_TUNE_TEST)
|
||||||
if (NOT GNUCC_TUNE_TEST EQUAL 0)
|
if (NOT GNUCC_TUNE_TEST EQUAL 0)
|
||||||
message(WARNING "Something went wrong determining gcc tune: -mtune=${GNUCC_ARCH} not valid, falling back to -mtune=native")
|
message(WARNING "Something went wrong determining gcc tune: -mtune=${GNUCC_TUNE} not valid, falling back to -mtune=native")
|
||||||
set(TUNE_FLAG native)
|
set(GNUCC_TUNE native)
|
||||||
else()
|
else()
|
||||||
set(TUNE_FLAG ${GNUCC_ARCH})
|
set(GNUCC_TUNE ${GNUCC_TUNE})
|
||||||
message(STATUS "gcc will tune for ${GNUCC_ARCH}, ${TUNE_FLAG}")
|
message(STATUS "gcc will tune for ${GNUCC_ARCH}, ${GNUCC_TUNE}")
|
||||||
endif()
|
endif()
|
||||||
elseif (CMAKE_COMPILER_IS_CLANG AND NOT CROSS_COMPILE)
|
elseif (CMAKE_COMPILER_IS_CLANG AND NOT CROSS_COMPILE)
|
||||||
if (ARCH_IA32 OR ARCH_X86_64)
|
if (ARCH_IA32 OR ARCH_X86_64)
|
||||||
@ -226,22 +249,26 @@ if (ARCH_IA32 OR ARCH_X86_64)
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (ARCH_AARCH64)
|
if (ARCH_AARCH64)
|
||||||
if (BUILD_SVE2_BITPERM)
|
if (BUILD_SVE2_BITPERM AND NOT SVE2_BITPERM_FOUND)
|
||||||
set(GNUCC_ARCH "${GNUCC_ARCH}+sve2-bitperm")
|
set(GNUCC_ARCH "${GNUCC_ARCH}+sve2-bitperm")
|
||||||
elseif (BUILD_SVE2)
|
elseif (BUILD_SVE2 AND NOT SVE2_FOUND)
|
||||||
set(GNUCC_ARCH "${GNUCC_ARCH}+sve2")
|
set(GNUCC_ARCH "${GNUCC_ARCH}+sve2")
|
||||||
elseif (BUILD_SVE)
|
elseif (BUILD_SVE AND NOT SVE_FOUND)
|
||||||
set(GNUCC_ARCH "${GNUCC_ARCH}+sve")
|
set(GNUCC_ARCH "${GNUCC_ARCH}+sve")
|
||||||
endif ()
|
endif ()
|
||||||
endif(ARCH_AARCH64)
|
endif(ARCH_AARCH64)
|
||||||
|
|
||||||
|
|
||||||
message(STATUS "ARCH_C_FLAGS : ${ARCH_C_FLAGS}")
|
message(STATUS "ARCH_C_FLAGS : ${ARCH_C_FLAGS}")
|
||||||
message(STATUS "ARCH_CXX_FLAGS : ${ARCH_CXX_FLAGS}")
|
message(STATUS "ARCH_CXX_FLAGS : ${ARCH_CXX_FLAGS}")
|
||||||
|
|
||||||
if (NOT FAT_RUNTIME)
|
if (NOT FAT_RUNTIME)
|
||||||
|
if (GNUCC_TUNE)
|
||||||
|
set(ARCH_C_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE}")
|
||||||
|
set(ARCH_CXX_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE}")
|
||||||
|
else()
|
||||||
set(ARCH_C_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -mtune=${TUNE_FLAG} ${ARCH_C_FLAGS}")
|
set(ARCH_C_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -mtune=${TUNE_FLAG} ${ARCH_C_FLAGS}")
|
||||||
set(ARCH_CXX_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -mtune=${TUNE_FLAG} ${ARCH_CXX_FLAGS}")
|
set(ARCH_CXX_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -mtune=${TUNE_FLAG} ${ARCH_CXX_FLAGS}")
|
||||||
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
#if (ARCH_IA32 OR ARCH_X86_64 OR ARCH_ARM32 OR ARCH_AARCH64)
|
#if (ARCH_IA32 OR ARCH_X86_64 OR ARCH_ARM32 OR ARCH_AARCH64)
|
||||||
@ -296,6 +323,12 @@ if (NOT RELEASE_BUILD)
|
|||||||
# release builds
|
# release builds
|
||||||
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Werror")
|
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Werror")
|
||||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Werror")
|
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Werror")
|
||||||
|
if (CMAKE_COMPILER_IS_CLANG)
|
||||||
|
if (CMAKE_C_COMPILER_VERSION VERSION_GREATER "13.0")
|
||||||
|
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-unused-but-set-variable")
|
||||||
|
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-unused-but-set-variable")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (DISABLE_ASSERTS)
|
if (DISABLE_ASSERTS)
|
||||||
@ -332,6 +365,7 @@ if (ARCH_IA32 OR ARCH_X86_64)
|
|||||||
elseif (ARCH_ARM32 OR ARCH_AARCH64)
|
elseif (ARCH_ARM32 OR ARCH_AARCH64)
|
||||||
CHECK_INCLUDE_FILE_CXX(arm_neon.h HAVE_C_ARM_NEON_H)
|
CHECK_INCLUDE_FILE_CXX(arm_neon.h HAVE_C_ARM_NEON_H)
|
||||||
if (BUILD_SVE OR BUILD_SVE2 OR BUILD_SVE2_BITPERM)
|
if (BUILD_SVE OR BUILD_SVE2 OR BUILD_SVE2_BITPERM)
|
||||||
|
set(CMAKE_REQUIRED_FLAGS ${ARCH_CXX_FLAGS})
|
||||||
CHECK_INCLUDE_FILE_CXX(arm_sve.h HAVE_C_ARM_SVE_H)
|
CHECK_INCLUDE_FILE_CXX(arm_sve.h HAVE_C_ARM_SVE_H)
|
||||||
if (NOT HAVE_C_ARM_SVE_H)
|
if (NOT HAVE_C_ARM_SVE_H)
|
||||||
message(FATAL_ERROR "arm_sve.h is required to build for SVE.")
|
message(FATAL_ERROR "arm_sve.h is required to build for SVE.")
|
||||||
|
@ -1484,12 +1484,12 @@ bytecode_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat,
|
|||||||
find_wide_state(info);
|
find_wide_state(info);
|
||||||
}
|
}
|
||||||
|
|
||||||
u16 total_daddy = 0;
|
|
||||||
bool any_cyclic_near_anchored_state
|
bool any_cyclic_near_anchored_state
|
||||||
= is_cyclic_near(raw, raw.start_anchored);
|
= is_cyclic_near(raw, raw.start_anchored);
|
||||||
|
|
||||||
// Sherman optimization
|
// Sherman optimization
|
||||||
if (info.impl_alpha_size > 16) {
|
if (info.impl_alpha_size > 16) {
|
||||||
|
u16 total_daddy = 0;
|
||||||
for (u32 i = 0; i < info.size(); i++) {
|
for (u32 i = 0; i < info.size(); i++) {
|
||||||
if (info.is_widestate(i)) {
|
if (info.is_widestate(i)) {
|
||||||
continue;
|
continue;
|
||||||
|
@ -385,8 +385,7 @@ bool improveGraph(NGHolder &g, som_type som) {
|
|||||||
|
|
||||||
const vector<NFAVertex> ordering = getTopoOrdering(g);
|
const vector<NFAVertex> ordering = getTopoOrdering(g);
|
||||||
|
|
||||||
return enlargeCyclicCR(g, som, ordering)
|
return enlargeCyclicCR(g, som, ordering) || enlargeCyclicCR_rev(g, ordering);
|
||||||
| enlargeCyclicCR_rev(g, ordering);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** finds a smaller reachability for a state by the reverse transformation of
|
/** finds a smaller reachability for a state by the reverse transformation of
|
||||||
|
@ -216,9 +216,9 @@ RoseRoleHistory selectHistory(const RoseBuildImpl &tbi, const RoseBuildData &bd,
|
|||||||
const bool fixed_offset_src = g[u].fixedOffset();
|
const bool fixed_offset_src = g[u].fixedOffset();
|
||||||
const bool has_bounds = g[e].minBound || (g[e].maxBound != ROSE_BOUND_INF);
|
const bool has_bounds = g[e].minBound || (g[e].maxBound != ROSE_BOUND_INF);
|
||||||
|
|
||||||
DEBUG_PRINTF("edge %zu->%zu, bounds=[%u,%u], fixed_u=%d, prefix=%d\n",
|
/*DEBUG_PRINTF("edge %zu->%zu, bounds=[%u,%u], fixed_u=%d, prefix=%d\n",
|
||||||
g[u].index, g[v].index, g[e].minBound, g[e].maxBound,
|
g[u].index, g[v].index, g[e].minBound, g[e].maxBound,
|
||||||
(int)g[u].fixedOffset(), (int)g[v].left);
|
(int)g[u].fixedOffset(), (int)g[v].left);*/
|
||||||
|
|
||||||
if (g[v].left) {
|
if (g[v].left) {
|
||||||
// Roles with prefix engines have their history handled by that prefix.
|
// Roles with prefix engines have their history handled by that prefix.
|
||||||
|
@ -152,7 +152,7 @@ static really_inline u32 movemask128(m128 a) {
|
|||||||
static uint8x16_t perm = { 16, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
|
static uint8x16_t perm = { 16, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||||
uint8x16_t bitmask = vec_gb((uint8x16_t) a);
|
uint8x16_t bitmask = vec_gb((uint8x16_t) a);
|
||||||
bitmask = (uint8x16_t) vec_perm(vec_splat_u8(0), bitmask, perm);
|
bitmask = (uint8x16_t) vec_perm(vec_splat_u8(0), bitmask, perm);
|
||||||
u32 movemask;
|
u32 ALIGN_ATTR(16) movemask;
|
||||||
vec_ste((uint32x4_t) bitmask, 0, &movemask);
|
vec_ste((uint32x4_t) bitmask, 0, &movemask);
|
||||||
return movemask;
|
return movemask;
|
||||||
}
|
}
|
||||||
@ -285,27 +285,27 @@ m128 loadbytes128(const void *ptr, unsigned int n) {
|
|||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define CASE_ALIGN_VECTORS(a, b, offset) case offset: return (m128)vec_sld((int8x16_t)(b), (int8x16_t)(a), (16 - offset)); break;
|
#define CASE_ALIGN_VECTORS(a, b, offset) case offset: return (m128)vec_sld((int8x16_t)(a), (int8x16_t)(b), (16 - offset)); break;
|
||||||
|
|
||||||
static really_really_inline
|
static really_really_inline
|
||||||
m128 palignr_imm(m128 r, m128 l, int offset) {
|
m128 palignr_imm(m128 r, m128 l, int offset) {
|
||||||
switch (offset) {
|
switch (offset) {
|
||||||
case 0: return l; break;
|
case 0: return l; break;
|
||||||
CASE_ALIGN_VECTORS(l, r, 1);
|
CASE_ALIGN_VECTORS(r, l, 1);
|
||||||
CASE_ALIGN_VECTORS(l, r, 2);
|
CASE_ALIGN_VECTORS(r, l, 2);
|
||||||
CASE_ALIGN_VECTORS(l, r, 3);
|
CASE_ALIGN_VECTORS(r, l, 3);
|
||||||
CASE_ALIGN_VECTORS(l, r, 4);
|
CASE_ALIGN_VECTORS(r, l, 4);
|
||||||
CASE_ALIGN_VECTORS(l, r, 5);
|
CASE_ALIGN_VECTORS(r, l, 5);
|
||||||
CASE_ALIGN_VECTORS(l, r, 6);
|
CASE_ALIGN_VECTORS(r, l, 6);
|
||||||
CASE_ALIGN_VECTORS(l, r, 7);
|
CASE_ALIGN_VECTORS(r, l, 7);
|
||||||
CASE_ALIGN_VECTORS(l, r, 8);
|
CASE_ALIGN_VECTORS(r, l, 8);
|
||||||
CASE_ALIGN_VECTORS(l, r, 9);
|
CASE_ALIGN_VECTORS(r, l, 9);
|
||||||
CASE_ALIGN_VECTORS(l, r, 10);
|
CASE_ALIGN_VECTORS(r, l, 10);
|
||||||
CASE_ALIGN_VECTORS(l, r, 11);
|
CASE_ALIGN_VECTORS(r, l, 11);
|
||||||
CASE_ALIGN_VECTORS(l, r, 12);
|
CASE_ALIGN_VECTORS(r, l, 12);
|
||||||
CASE_ALIGN_VECTORS(l, r, 13);
|
CASE_ALIGN_VECTORS(r, l, 13);
|
||||||
CASE_ALIGN_VECTORS(l, r, 14);
|
CASE_ALIGN_VECTORS(r, l, 14);
|
||||||
CASE_ALIGN_VECTORS(l, r, 15);
|
CASE_ALIGN_VECTORS(r, l, 15);
|
||||||
case 16: return r; break;
|
case 16: return r; break;
|
||||||
default: return zeroes128(); break;
|
default: return zeroes128(); break;
|
||||||
}
|
}
|
||||||
|
@ -49,7 +49,7 @@ really_inline SuperVector<16>::SuperVector(SuperVector const &other)
|
|||||||
|
|
||||||
template<>
|
template<>
|
||||||
template<>
|
template<>
|
||||||
really_inline SuperVector<16>::SuperVector(char __bool __vector v)
|
really_inline SuperVector<16>::SuperVector(__vector __bool char v)
|
||||||
{
|
{
|
||||||
u.u8x16[0] = (uint8x16_t) v;
|
u.u8x16[0] = (uint8x16_t) v;
|
||||||
};
|
};
|
||||||
@ -269,10 +269,10 @@ really_inline SuperVector<16> SuperVector<16>::eq(SuperVector<16> const &b) cons
|
|||||||
template <>
|
template <>
|
||||||
really_inline typename SuperVector<16>::comparemask_type
|
really_inline typename SuperVector<16>::comparemask_type
|
||||||
SuperVector<16>::comparemask(void) const {
|
SuperVector<16>::comparemask(void) const {
|
||||||
uint8x16_t bitmask = vec_gb( u.u8x16[0]);
|
|
||||||
static uint8x16_t perm = { 16, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
|
static uint8x16_t perm = { 16, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||||
|
uint8x16_t bitmask = vec_gb(u.u8x16[0]);
|
||||||
bitmask = (uint8x16_t) vec_perm(vec_splat_u8(0), bitmask, perm);
|
bitmask = (uint8x16_t) vec_perm(vec_splat_u8(0), bitmask, perm);
|
||||||
u32 movemask;
|
u32 ALIGN_ATTR(16) movemask;
|
||||||
vec_ste((uint32x4_t) bitmask, 0, &movemask);
|
vec_ste((uint32x4_t) bitmask, 0, &movemask);
|
||||||
return movemask;
|
return movemask;
|
||||||
}
|
}
|
||||||
|
@ -523,9 +523,7 @@ template <>
|
|||||||
really_inline SuperVector<16> SuperVector<16>::loadu_maskz(void const *ptr, uint8_t const len)
|
really_inline SuperVector<16> SuperVector<16>::loadu_maskz(void const *ptr, uint8_t const len)
|
||||||
{
|
{
|
||||||
SuperVector mask = Ones_vshr(16 -len);
|
SuperVector mask = Ones_vshr(16 -len);
|
||||||
mask.print8("mask");
|
|
||||||
SuperVector v = _mm_loadu_si128((const m128 *)ptr);
|
SuperVector v = _mm_loadu_si128((const m128 *)ptr);
|
||||||
v.print8("v");
|
|
||||||
return mask & v;
|
return mask & v;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user