diff --git a/README.md b/README.md index f6a10fca..2e68d2e6 100644 --- a/README.md +++ b/README.md @@ -173,9 +173,14 @@ export CC="/usr/local/bin/gcc12" export CXX="/usr/local/bin/g++12" ``` - Then continue with the build as below. +A note about running in FreeBSD: if you built a dynamically linked binary +with an alternative compiler, the libraries specific to the compiler that +built the binary will probably not be found and the base distro libraries +in /lib will be found instead. Adjust LD_LIBRARY_PATH appropriately. For +example, with gcc12 installed from pkg, one would want to use +```export LD_LIBRARY_PATH=/usr/local/lib/gcc12/``` ## Configure & build diff --git a/cmake/build_wrapper.sh b/cmake/build_wrapper.sh index 895610c0..d0a7e923 100755 --- a/cmake/build_wrapper.sh +++ b/cmake/build_wrapper.sh @@ -15,13 +15,21 @@ SYMSFILE=$(mktemp -p /tmp ${PREFIX}_rename.syms.XXXXX) KEEPSYMS=$(mktemp -p /tmp keep.syms.XXXXX) # find the libc used by gcc LIBC_SO=$("$@" --print-file-name=libc.so.6) +NM_FLAG="-f" +if [ `uname` = "FreeBSD" ]; then + # for freebsd, we will specify the name, + # we will leave it work as is in linux + LIBC_SO=/lib/libc.so.7 + # also, in BSD, the nm flag -F corresponds to the -f flag in linux. + NM_FLAG="-F" +fi cp ${KEEPSYMS_IN} ${KEEPSYMS} # get all symbols from libc and turn them into patterns -nm -f p -g -D ${LIBC_SO} | sed -s 's/\([^ @]*\).*/^\1$/' >> ${KEEPSYMS} +nm ${NM_FLAG} p -g -D ${LIBC_SO} | sed 's/\([^ @]*\).*/^\1$/' >> ${KEEPSYMS} # build the object "$@" # rename the symbols in the object -nm -f p -g ${OUT} | cut -f1 -d' ' | grep -v -f ${KEEPSYMS} | sed -e "s/\(.*\)/\1\ ${PREFIX}_\1/" >> ${SYMSFILE} +nm ${NM_FLAG} p -g ${OUT} | cut -f1 -d' ' | grep -v -f ${KEEPSYMS} | sed -e "s/\(.*\)/\1\ ${PREFIX}_\1/" >> ${SYMSFILE} if test -s ${SYMSFILE} then objcopy --redefine-syms=${SYMSFILE} ${OUT} diff --git a/cmake/osdetection.cmake b/cmake/osdetection.cmake index 1f35f1d3..6e7178cb 100644 --- a/cmake/osdetection.cmake +++ b/cmake/osdetection.cmake @@ -4,14 +4,13 @@ endif(CMAKE_SYSTEM_NAME MATCHES "Linux") if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") set(FREEBSD true) - set(CMAKE_INSTALL_RPATH_USE_LINK_PATH true) endif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") if(CMAKE_SYSTEM_NAME MATCHES "NetBSD") set(NETBSD true) endif(CMAKE_SYSTEM_NAME MATCHES "NetBSD") -if (LINUX AND (ARCH_IA32 OR ARCH_X86_64)) +if (ARCH_IA32 OR ARCH_X86_64) option(FAT_RUNTIME "Build a library that supports multiple microarchitectures" ON) else() option(FAT_RUNTIME "Build a library that supports multiple microarchitectures" OFF) @@ -19,26 +18,22 @@ endif() if (FAT_RUNTIME) message("Checking Fat Runtime Requirements...") - if (NOT LINUX) - message(FATAL_ERROR "Fat runtime is only supported on Linux OS") - else() - if (USE_CPU_NATIVE AND FAT_RUNTIME) - message(FATAL_ERROR "Fat runtime is not compatible with Native CPU detection") - endif() + if (USE_CPU_NATIVE AND FAT_RUNTIME) + message(FATAL_ERROR "Fat runtime is not compatible with Native CPU detection") + endif() - if (NOT (ARCH_IA32 OR ARCH_X86_64 OR ARCH_AARCH64)) - message(FATAL_ERROR "Fat runtime is only supported on Intel and Aarch64 architectures") + if (NOT (ARCH_IA32 OR ARCH_X86_64 OR ARCH_AARCH64)) + message(FATAL_ERROR "Fat runtime is only supported on Intel and Aarch64 architectures") + else() + message(STATUS "Building Fat runtime for multiple microarchitectures") + message(STATUS "generator is ${CMAKE_GENERATOR}") + if (NOT (CMAKE_GENERATOR MATCHES "Unix Makefiles" OR + (CMAKE_VERSION VERSION_GREATER "3.0" AND CMAKE_GENERATOR MATCHES "Ninja"))) + message (FATAL_ERROR "Building the fat runtime requires the Unix Makefiles generator, or Ninja with CMake v3.0 or higher") else() - message(STATUS "Building Fat runtime for multiple microarchitectures") - message(STATUS "generator is ${CMAKE_GENERATOR}") - if (NOT (CMAKE_GENERATOR MATCHES "Unix Makefiles" OR - (CMAKE_VERSION VERSION_GREATER "3.0" AND CMAKE_GENERATOR MATCHES "Ninja"))) - message (FATAL_ERROR "Building the fat runtime requires the Unix Makefiles generator, or Ninja with CMake v3.0 or higher") - else() - include (${CMAKE_MODULE_PATH}/attrib.cmake) - if (NOT HAS_C_ATTR_IFUNC) - message(FATAL_ERROR "Compiler does not support ifunc attribute, cannot build fat runtime") - endif() + include (${CMAKE_MODULE_PATH}/attrib.cmake) + if (NOT HAS_C_ATTR_IFUNC) + message(FATAL_ERROR "Compiler does not support ifunc attribute, cannot build fat runtime") endif() endif() endif() diff --git a/src/dispatcher.c b/src/dispatcher.c index a817e744..b704b0a5 100644 --- a/src/dispatcher.c +++ b/src/dispatcher.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2016-2020, Intel Corporation + * Copyright (c) 2024, VectorCamp PC * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,6 +31,39 @@ #include "hs_common.h" #include "hs_runtime.h" #include "ue2common.h" + +/* streamlining the dispatch to eliminate runtime checking/branching: + * what we want to do is, first call to the function will run the resolve + * code and set the static resolved/dispatch pointer to point to the + * correct function. Subsequent calls to the function will go directly to + * the resolved ptr. The simplest way to accomplish this is, to + * initially set the pointer to the resolve function. + * To accomplish this in a manner invisible to the user, + * we do involve some rather ugly/confusing macros in here. + * There are four macros that assemble the code for each function + * we want to dispatch in this manner: + * CREATE_DISPATCH + * this generates the declarations for the candidate target functions, + * for the fat_dispatch function pointer, for the resolve_ function, + * points the function pointer to the resolve function, and contains + * most of the definition of the resolve function. The very end of the + * resolve function is completed by the next macro, because in the + * CREATE_DISPATCH macro we have the argument list with the arg declarations, + * which is needed to generate correct function signatures, but we + * can't generate from this, in a macro, a _call_ to one of those functions. + * CONNECT_ARGS_1 + * this macro fills in the actual call at the end of the resolve function, + * with the correct arg list. hence the name connect args. + * CONNECT_DISPATCH_2 + * this macro likewise gives up the beginning of the definition of the + * actual entry point function (the 'real name' that's called by the user) + * but again in the pass-through call, cannot invoke the target without + * getting the arg list , which is supplied by the final macro, + * CONNECT_ARGS_3 + * + */ + + #if defined(ARCH_IA32) || defined(ARCH_X86_64) #include "util/arch/x86/cpuid_inline.h" #include "util/join.h" @@ -57,30 +91,38 @@ return (RTYPE)HS_ARCH_ERROR; \ } \ \ - /* resolver */ \ - static RTYPE (*JOIN(resolve_, NAME)(void))(__VA_ARGS__) { \ - if (check_avx512vbmi()) { \ - return JOIN(avx512vbmi_, NAME); \ - } \ - if (check_avx512()) { \ - return JOIN(avx512_, NAME); \ - } \ - if (check_avx2()) { \ - return JOIN(avx2_, NAME); \ - } \ - if (check_sse42() && check_popcnt()) { \ - return JOIN(corei7_, NAME); \ - } \ - if (check_ssse3()) { \ - return JOIN(core2_, NAME); \ - } \ - /* anything else is fail */ \ - return JOIN(error_, NAME); \ - } \ + /* dispatch routing pointer for this function */ \ + /* initially point it at the resolve function */ \ + static RTYPE JOIN(resolve_, NAME)(__VA_ARGS__); \ + static RTYPE (* JOIN(fat_dispatch_, NAME))(__VA_ARGS__) = \ + &JOIN(resolve_, NAME); \ \ - /* function */ \ - HS_PUBLIC_API \ - RTYPE NAME(__VA_ARGS__) __attribute__((ifunc("resolve_" #NAME))) + /* resolver */ \ + static RTYPE JOIN(resolve_, NAME)(__VA_ARGS__) { \ + if (check_avx512vbmi()) { \ + fat_dispatch_ ## NAME = &JOIN(avx512vbmi_, NAME); \ + } \ + else if (check_avx512()) { \ + fat_dispatch_ ## NAME = &JOIN(avx512_, NAME); \ + } \ + else if (check_avx2()) { \ + fat_dispatch_ ## NAME = &JOIN(avx2_, NAME); \ + } \ + else if (check_sse42() && check_popcnt()) { \ + fat_dispatch_ ## NAME = &JOIN(corei7_, NAME); \ + } \ + else if (check_ssse3()) { \ + fat_dispatch_ ## NAME = &JOIN(core2_, NAME); \ + } else { \ + /* anything else is fail */ \ + fat_dispatch_ ## NAME = &JOIN(error_, NAME); \ + } \ + + + +/* the rest of the function is completed in the CONNECT_ARGS_1 macro. */ + + #elif defined(ARCH_AARCH64) #include "util/arch/arm/cpuid_inline.h" @@ -97,99 +139,226 @@ return (RTYPE)HS_ARCH_ERROR; \ } \ \ - /* resolver */ \ - static RTYPE (*JOIN(resolve_, NAME)(void))(__VA_ARGS__) { \ - if (check_sve2()) { \ - return JOIN(sve2_, NAME); \ - } \ - if (check_sve()) { \ - return JOIN(sve_, NAME); \ - } \ - if (check_neon()) { \ - return JOIN(neon_, NAME); \ - } \ - /* anything else is fail */ \ - return JOIN(error_, NAME); \ - } \ + /* dispatch routing pointer for this function */ \ + /* initially point it at the resolve function */ \ + static RTYPE JOIN(resolve_, NAME)(__VA_ARGS__); \ + static RTYPE (* JOIN(fat_dispatch_, NAME))(__VA_ARGS__) = \ + &JOIN(resolve_, NAME); \ \ - /* function */ \ - HS_PUBLIC_API \ - RTYPE NAME(__VA_ARGS__) __attribute__((ifunc("resolve_" #NAME))) + /* resolver */ \ + static RTYPE JOIN(resolve_, NAME)(__VA_ARGS__) { \ + if (check_sve2()) { \ + fat_dispatch_ ## NAME = &JOIN(sve2_, NAME); \ + } \ + else if (check_sve()) { \ + fat_dispatch_ ## NAME = &JOIN(sve_, NAME); \ + } \ + else if (check_neon()) { \ + fat_dispatch_ ## NAME = &JOIN(neon_, NAME); \ + } else { \ + /* anything else is fail */ \ + fat_dispatch_ ## NAME = &JOIN(error_, NAME); \ + } \ + + +/* the rest of the function is completed in the CONNECT_ARGS_1 macro. */ + #endif + +#define CONNECT_ARGS_1(RTYPE, NAME, ...) \ + return (*fat_dispatch_ ## NAME)(__VA_ARGS__); \ + } \ + + +#define CONNECT_DISPATCH_2(RTYPE, NAME, ...) \ + /* new function */ \ + HS_PUBLIC_API \ + RTYPE NAME(__VA_ARGS__) { \ + + +#define CONNECT_ARGS_3(RTYPE, NAME, ...) \ + return (*fat_dispatch_ ## NAME)(__VA_ARGS__); \ + } \ + + #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-function" + +/* this gets a bit ugly to compose the static redirect functions, + * as we necessarily need first the typed arg list and then just the arg + * names, twice in a row, to define the redirect function and the + * dispatch function call */ + CREATE_DISPATCH(hs_error_t, hs_scan, const hs_database_t *db, const char *data, unsigned length, unsigned flags, hs_scratch_t *scratch, match_event_handler onEvent, void *userCtx); +CONNECT_ARGS_1(hs_error_t, hs_scan, db, data, length, flags, scratch, onEvent, userCtx); +CONNECT_DISPATCH_2(hs_error_t, hs_scan, const hs_database_t *db, const char *data, + unsigned length, unsigned flags, hs_scratch_t *scratch, + match_event_handler onEvent, void *userCtx); +CONNECT_ARGS_3(hs_error_t, hs_scan, db, data, length, flags, scratch, onEvent, userCtx); CREATE_DISPATCH(hs_error_t, hs_stream_size, const hs_database_t *database, size_t *stream_size); +CONNECT_ARGS_1(hs_error_t, hs_stream_size, database, stream_size); +CONNECT_DISPATCH_2(hs_error_t, hs_stream_size, const hs_database_t *database, + size_t *stream_size); +CONNECT_ARGS_3(hs_error_t, hs_stream_size, database, stream_size); CREATE_DISPATCH(hs_error_t, hs_database_size, const hs_database_t *db, size_t *size); +CONNECT_ARGS_1(hs_error_t, hs_database_size, db, size); +CONNECT_DISPATCH_2(hs_error_t, hs_database_size, const hs_database_t *db, + size_t *size); +CONNECT_ARGS_3(hs_error_t, hs_database_size, db, size); + CREATE_DISPATCH(hs_error_t, dbIsValid, const hs_database_t *db); +CONNECT_ARGS_1(hs_error_t, dbIsValid, db); +CONNECT_DISPATCH_2(hs_error_t, dbIsValid, const hs_database_t *db); +CONNECT_ARGS_3(hs_error_t, dbIsValid, db); + CREATE_DISPATCH(hs_error_t, hs_free_database, hs_database_t *db); +CONNECT_ARGS_1(hs_error_t, hs_free_database, db); +CONNECT_DISPATCH_2(hs_error_t, hs_free_database, hs_database_t *db); +CONNECT_ARGS_3(hs_error_t, hs_free_database, db); CREATE_DISPATCH(hs_error_t, hs_open_stream, const hs_database_t *db, unsigned int flags, hs_stream_t **stream); +CONNECT_ARGS_1(hs_error_t, hs_open_stream, db, flags, stream); +CONNECT_DISPATCH_2(hs_error_t, hs_open_stream, const hs_database_t *db, + unsigned int flags, hs_stream_t **stream); +CONNECT_ARGS_3(hs_error_t, hs_open_stream, db, flags, stream); CREATE_DISPATCH(hs_error_t, hs_scan_stream, hs_stream_t *id, const char *data, unsigned int length, unsigned int flags, hs_scratch_t *scratch, match_event_handler onEvent, void *ctxt); +CONNECT_ARGS_1(hs_error_t, hs_scan_stream, id, data, length, flags, scratch, onEvent, ctxt); +CONNECT_DISPATCH_2(hs_error_t, hs_scan_stream, hs_stream_t *id, const char *data, + unsigned int length, unsigned int flags, hs_scratch_t *scratch, + match_event_handler onEvent, void *ctxt); +CONNECT_ARGS_3(hs_error_t, hs_scan_stream, id, data, length, flags, scratch, onEvent, ctxt); CREATE_DISPATCH(hs_error_t, hs_close_stream, hs_stream_t *id, hs_scratch_t *scratch, match_event_handler onEvent, void *ctxt); +CONNECT_ARGS_1(hs_error_t, hs_close_stream, id, scratch, onEvent, ctxt); +CONNECT_DISPATCH_2(hs_error_t, hs_close_stream, hs_stream_t *id, + hs_scratch_t *scratch, match_event_handler onEvent, void *ctxt); +CONNECT_ARGS_3(hs_error_t, hs_close_stream, id, scratch, onEvent, ctxt); CREATE_DISPATCH(hs_error_t, hs_scan_vector, const hs_database_t *db, const char *const *data, const unsigned int *length, unsigned int count, unsigned int flags, hs_scratch_t *scratch, match_event_handler onevent, void *context); +CONNECT_ARGS_1(hs_error_t, hs_scan_vector, db, data, length, count, flags, scratch, onevent, context); +CONNECT_DISPATCH_2(hs_error_t, hs_scan_vector, const hs_database_t *db, + const char *const *data, const unsigned int *length, + unsigned int count, unsigned int flags, hs_scratch_t *scratch, + match_event_handler onevent, void *context); +CONNECT_ARGS_3(hs_error_t, hs_scan_vector, db, data, length, count, flags, scratch, onevent, context); CREATE_DISPATCH(hs_error_t, hs_database_info, const hs_database_t *db, char **info); +CONNECT_ARGS_1(hs_error_t, hs_database_info, db, info); +CONNECT_DISPATCH_2(hs_error_t, hs_database_info, const hs_database_t *db, char **info); +CONNECT_ARGS_3(hs_error_t, hs_database_info, db, info); CREATE_DISPATCH(hs_error_t, hs_copy_stream, hs_stream_t **to_id, const hs_stream_t *from_id); +CONNECT_ARGS_1(hs_error_t, hs_copy_stream, to_id, from_id); +CONNECT_DISPATCH_2(hs_error_t, hs_copy_stream, hs_stream_t **to_id, + const hs_stream_t *from_id); +CONNECT_ARGS_3(hs_error_t, hs_copy_stream, to_id, from_id); CREATE_DISPATCH(hs_error_t, hs_reset_stream, hs_stream_t *id, unsigned int flags, hs_scratch_t *scratch, match_event_handler onEvent, void *context); +CONNECT_ARGS_1(hs_error_t, hs_reset_stream, id, flags, scratch, onEvent, context); +CONNECT_DISPATCH_2(hs_error_t, hs_reset_stream, hs_stream_t *id, + unsigned int flags, hs_scratch_t *scratch, + match_event_handler onEvent, void *context); +CONNECT_ARGS_3(hs_error_t, hs_reset_stream, id, flags, scratch, onEvent, context); CREATE_DISPATCH(hs_error_t, hs_reset_and_copy_stream, hs_stream_t *to_id, const hs_stream_t *from_id, hs_scratch_t *scratch, match_event_handler onEvent, void *context); +CONNECT_ARGS_1(hs_error_t, hs_reset_and_copy_stream, to_id, from_id, scratch, onEvent, context); +CONNECT_DISPATCH_2(hs_error_t, hs_reset_and_copy_stream, hs_stream_t *to_id, + const hs_stream_t *from_id, hs_scratch_t *scratch, + match_event_handler onEvent, void *context); +CONNECT_ARGS_3(hs_error_t, hs_reset_and_copy_stream, to_id, from_id, scratch, onEvent, context); CREATE_DISPATCH(hs_error_t, hs_serialize_database, const hs_database_t *db, char **bytes, size_t *length); +CONNECT_ARGS_1(hs_error_t, hs_serialize_database, db, bytes, length); +CONNECT_DISPATCH_2(hs_error_t, hs_serialize_database, const hs_database_t *db, + char **bytes, size_t *length); +CONNECT_ARGS_3(hs_error_t, hs_serialize_database, db, bytes, length); CREATE_DISPATCH(hs_error_t, hs_deserialize_database, const char *bytes, const size_t length, hs_database_t **db); +CONNECT_ARGS_1(hs_error_t, hs_deserialize_database, bytes, length, db); +CONNECT_DISPATCH_2(hs_error_t, hs_deserialize_database, const char *bytes, + const size_t length, hs_database_t **db); +CONNECT_ARGS_3(hs_error_t, hs_deserialize_database, bytes, length, db); CREATE_DISPATCH(hs_error_t, hs_deserialize_database_at, const char *bytes, const size_t length, hs_database_t *db); +CONNECT_ARGS_1(hs_error_t, hs_deserialize_database_at, bytes, length, db); +CONNECT_DISPATCH_2(hs_error_t, hs_deserialize_database_at, const char *bytes, + const size_t length, hs_database_t *db); +CONNECT_ARGS_3(hs_error_t, hs_deserialize_database_at, bytes, length, db); CREATE_DISPATCH(hs_error_t, hs_serialized_database_info, const char *bytes, size_t length, char **info); +CONNECT_ARGS_1(hs_error_t, hs_serialized_database_info, bytes, length, info); +CONNECT_DISPATCH_2(hs_error_t, hs_serialized_database_info, const char *bytes, + size_t length, char **info); +CONNECT_ARGS_3(hs_error_t, hs_serialized_database_info, bytes, length, info); CREATE_DISPATCH(hs_error_t, hs_serialized_database_size, const char *bytes, const size_t length, size_t *deserialized_size); +CONNECT_ARGS_1(hs_error_t, hs_serialized_database_size, bytes, length, deserialized_size); +CONNECT_DISPATCH_2(hs_error_t, hs_serialized_database_size, const char *bytes, + const size_t length, size_t *deserialized_size); +CONNECT_ARGS_3(hs_error_t, hs_serialized_database_size, bytes, length, deserialized_size); CREATE_DISPATCH(hs_error_t, hs_compress_stream, const hs_stream_t *stream, char *buf, size_t buf_space, size_t *used_space); +CONNECT_ARGS_1(hs_error_t, hs_compress_stream, stream, + buf, buf_space, used_space); +CONNECT_DISPATCH_2(hs_error_t, hs_compress_stream, const hs_stream_t *stream, + char *buf, size_t buf_space, size_t *used_space); +CONNECT_ARGS_3(hs_error_t, hs_compress_stream, stream, + buf, buf_space, used_space); CREATE_DISPATCH(hs_error_t, hs_expand_stream, const hs_database_t *db, hs_stream_t **stream, const char *buf,size_t buf_size); +CONNECT_ARGS_1(hs_error_t, hs_expand_stream, db, stream, buf,buf_size); +CONNECT_DISPATCH_2(hs_error_t, hs_expand_stream, const hs_database_t *db, + hs_stream_t **stream, const char *buf,size_t buf_size); +CONNECT_ARGS_3(hs_error_t, hs_expand_stream, db, stream, buf,buf_size); CREATE_DISPATCH(hs_error_t, hs_reset_and_expand_stream, hs_stream_t *to_stream, const char *buf, size_t buf_size, hs_scratch_t *scratch, match_event_handler onEvent, void *context); +CONNECT_ARGS_1(hs_error_t, hs_reset_and_expand_stream, to_stream, + buf, buf_size, scratch, onEvent, context); +CONNECT_DISPATCH_2(hs_error_t, hs_reset_and_expand_stream, hs_stream_t *to_stream, + const char *buf, size_t buf_size, hs_scratch_t *scratch, + match_event_handler onEvent, void *context); +CONNECT_ARGS_3(hs_error_t, hs_reset_and_expand_stream, to_stream, + buf, buf_size, scratch, onEvent, context); /** INTERNALS **/ CREATE_DISPATCH(u32, Crc32c_ComputeBuf, u32 inCrc32, const void *buf, size_t bufLen); +CONNECT_ARGS_1(u32, Crc32c_ComputeBuf, inCrc32, buf, bufLen); +CONNECT_DISPATCH_2(u32, Crc32c_ComputeBuf, u32 inCrc32, const void *buf, size_t bufLen); +CONNECT_ARGS_3(u32, Crc32c_ComputeBuf, inCrc32, buf, bufLen); #pragma GCC diagnostic pop #pragma GCC diagnostic pop +