From c3a73446ee0c1f534e84ae31b085ab48f1d01686 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Wed, 2 Nov 2016 11:01:28 +1100 Subject: [PATCH] Fat runtime --- CMakeLists.txt | 165 ++++++++++++++++++++++++++++++------ cmake/arch.cmake | 11 +-- cmake/attrib.cmake | 3 + cmake/build_wrapper.sh | 27 ++++++ cmake/config.h.in | 3 + cmake/keep.syms.in | 11 +++ src/compiler/compiler.cpp | 41 +++++++++ src/database.c | 39 +-------- src/database.h | 3 +- src/dispatcher.c | 122 ++++++++++++++++++++++++++ src/hs.cpp | 20 +++++ src/hs_common.h | 11 +++ src/nfa/mcsheng_compile.cpp | 3 +- src/util/cpuid_flags.c | 23 ++++- src/util/cpuid_flags.h | 5 ++ src/util/simd_types.h | 1 + unit/CMakeLists.txt | 6 +- 17 files changed, 411 insertions(+), 83 deletions(-) create mode 100644 cmake/attrib.cmake create mode 100755 cmake/build_wrapper.sh create mode 100644 cmake/keep.syms.in create mode 100644 src/dispatcher.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 0559932d..9f953c6e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -213,7 +213,6 @@ else() set(ARCH_C_FLAGS "${ARCH_C_FLAGS} -march=native -mtune=native") endif() - # we don't use these for the lib, but other tools/unit tests if (NOT CMAKE_CXX_FLAGS MATCHES .*march.*) set(ARCH_CXX_FLAGS "${ARCH_CXX_FLAGS} -march=native -mtune=native") endif() @@ -257,9 +256,24 @@ if (RELEASE_BUILD) endif() endif() -# ensure we are building for the right target arch +if (CMAKE_SYSTEM_NAME MATCHES "Linux") + # This is a Linux-only feature for now - requires platform support + # elsewhere + option(FAT_RUNTIME "Build a library that supports multiple microarchitecures" RELEASE_BUILD) + if (FAT_RUNTIME) + include (${CMAKE_MODULE_PATH}/attrib.cmake) + if (NOT HAS_C_ATTR_IFUNC) + message(FATAL_ERROR "Compiler does not support ifunc attribute, cannot build fat runtime") + endif() + endif() +endif () + include (${CMAKE_MODULE_PATH}/arch.cmake) +if (NOT FAT_RUNTIME AND NOT HAVE_SSSE3) + message(FATAL_ERROR "A minimum of SSSE3 compiler support is required") +endif () + # testing a builtin takes a little more work CHECK_C_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CC_BUILTIN_ASSUME_ALIGNED) CHECK_CXX_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CXX_BUILTIN_ASSUME_ALIGNED) @@ -365,6 +379,14 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "Intel") endif() endif() +if (NOT FAT_RUNTIME) +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARCH_C_FLAGS}") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_CXX_FLAGS}") +else() +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") +endif() + add_subdirectory(util) add_subdirectory(unit) add_subdirectory(doc/dev-reference) @@ -391,8 +413,13 @@ if (NOT WIN32) endif() # only set these after all tests are done -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARCH_C_FLAGS} ${EXTRA_C_FLAGS}") +if (NOT FAT_RUNTIME) +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}") +else() +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}") +endif() if(NOT WIN32) @@ -414,12 +441,19 @@ SET(hs_HEADERS ) install(FILES ${hs_HEADERS} DESTINATION include/hs) +set (hs_exec_common_SRCS + src/alloc.c + src/scratch.c + src/util/multibit.c + ) + set (hs_exec_SRCS ${hs_HEADERS} src/hs_version.h src/ue2common.h - src/alloc.c src/allocator.h + src/crc32.c + src/crc32.h src/report.h src/runtime.c src/fdr/fdr.c @@ -558,8 +592,8 @@ set (hs_exec_SRCS src/util/join.h src/util/masked_move.h src/util/multibit.h - src/util/multibit_internal.h src/util/multibit.c + src/util/multibit_internal.h src/util/pack_bits.h src/util/popcount.h src/util/pqueue.h @@ -571,21 +605,14 @@ set (hs_exec_SRCS src/util/state_compress.c src/util/unaligned.h src/util/uniform_ops.h - src/scratch.h - src/scratch.c - src/crc32.c - src/crc32.h src/database.c src/database.h ) -if (HAVE_AVX2) - set (hs_exec_SRCS - ${hs_exec_SRCS} - src/fdr/teddy_avx2.c - src/util/masked_move.c - ) -endif () +set (hs_exec_avx2_SRCS + src/fdr/teddy_avx2.c + src/util/masked_move.c +) SET (hs_SRCS @@ -1013,27 +1040,101 @@ endif() set (LIB_VERSION ${HS_VERSION}) set (LIB_SOVERSION ${HS_MAJOR_VERSION}) -add_library(hs_exec OBJECT ${hs_exec_SRCS}) +if (NOT FAT_RUNTIME) + + if (HAVE_AVX2) + add_library(hs_exec OBJECT ${hs_exec_common_SRCS} ${hs_exec_SRCS} + ${hs_exec_avx2_SRCS}) + else() + add_library(hs_exec OBJECT ${hs_exec_common_SRCS} ${hs_exec_SRCS}) + endif() + + add_library(hs_runtime STATIC src/hs_version.c $) + set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C) + + if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) + add_library(hs_exec_shared OBJECT ${hs_exec_SRCS}) + set_target_properties(hs_exec_shared PROPERTIES POSITION_INDEPENDENT_CODE TRUE) + endif() + +else (FAT_RUNTIME) + set(BUILD_WRAPPER "${PROJECT_SOURCE_DIR}/cmake/build_wrapper.sh") + add_library(hs_exec_core2 OBJECT ${hs_exec_SRCS}) + set_target_properties(hs_exec_core2 PROPERTIES + COMPILE_FLAGS "-march=core2" + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} core2 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + + add_library(hs_exec_corei7 OBJECT ${hs_exec_SRCS}) + set_target_properties(hs_exec_corei7 PROPERTIES + COMPILE_FLAGS "-march=corei7" + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} corei7 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + + add_library(hs_exec_avx2 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) + set_target_properties(hs_exec_avx2 PROPERTIES + COMPILE_FLAGS "-march=core-avx2" + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx2 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + + add_library(hs_exec_common OBJECT + ${hs_exec_common_SRCS} + src/dispatcher.c + ) + set_source_files_properties(src/dispatcher.c PROPERTIES + COMPILE_FLAGS "-Wno-unused-parameter -Wno-unused-function") + set_source_files_properties(${hs_exec_common_SRCS} PROPERTIES + COMPILE_FLAGS "-march=core-avx2") + + if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) + add_library(hs_exec_shared_core2 OBJECT ${hs_exec_SRCS}) + set_target_properties(hs_exec_shared_core2 PROPERTIES + COMPILE_FLAGS "-march=core2" + POSITION_INDEPENDENT_CODE TRUE + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} core2 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + add_library(hs_exec_shared_corei7 OBJECT ${hs_exec_SRCS}) + set_target_properties(hs_exec_shared_corei7 PROPERTIES + COMPILE_FLAGS "-march=corei7" + POSITION_INDEPENDENT_CODE TRUE + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} corei7 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + add_library(hs_exec_shared_avx2 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) + set_target_properties(hs_exec_shared_avx2 PROPERTIES + COMPILE_FLAGS "-march=core-avx2" + POSITION_INDEPENDENT_CODE TRUE + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx2 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + add_library(hs_exec_common_shared OBJECT + ${hs_exec_common_SRCS} + src/dispatcher.c + ) + set_target_properties(hs_exec_common_shared PROPERTIES + OUTPUT_NAME hs_exec_common + POSITION_INDEPENDENT_CODE TRUE) + endif() # SHARED -if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) -add_library(hs_exec_shared OBJECT ${hs_exec_SRCS}) -set_target_properties(hs_exec_shared PROPERTIES - POSITION_INDEPENDENT_CODE TRUE) -endif() # hs_version.c is added explicitly to avoid some build systems that refuse to # create a lib without any src (I'm looking at you Xcode) -add_library(hs_runtime STATIC src/hs_version.c $) + add_library(hs_runtime STATIC src/hs_version.c + $ $ + $ $) +endif (NOT FAT_RUNTIME) -set_target_properties(hs_runtime PROPERTIES - LINKER_LANGUAGE C) + +set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C) if (NOT BUILD_SHARED_LIBS) install(TARGETS hs_runtime DESTINATION lib) endif() if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) - add_library(hs_runtime_shared SHARED src/hs_version.c $) + if (NOT FAT_RUNTIME) + add_library(hs_runtime_shared SHARED src/hs_version.c $) + else() + add_library(hs_runtime_shared SHARED src/hs_version.c $ $ $ $) + endif() set_target_properties(hs_runtime_shared PROPERTIES VERSION ${LIB_VERSION} SOVERSION ${LIB_SOVERSION} @@ -1046,8 +1147,12 @@ if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) LIBRARY DESTINATION lib) endif() +if (NOT FAT_RUNTIME) + add_library(hs STATIC ${hs_SRCS} $) +else() # we want the static lib for testing -add_library(hs STATIC ${hs_SRCS} $) +add_library(hs STATIC src/hs_version.c ${hs_SRCS} $ $ $ $) +endif() add_dependencies(hs ragel_Parser) @@ -1056,7 +1161,11 @@ install(TARGETS hs DESTINATION lib) endif() if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) - add_library(hs_shared SHARED ${hs_SRCS} $) + if (NOT FAT_RUNTIME) + add_library(hs_shared SHARED src/hs_version.c ${hs_SRCS} $) + else() + add_library(hs_shared SHARED src/hs_version.c ${hs_SRCS} $ $ $ $) + endif() add_dependencies(hs_shared ragel_Parser) set_target_properties(hs_shared PROPERTIES OUTPUT_NAME hs diff --git a/cmake/arch.cmake b/cmake/arch.cmake index c00401dd..e98fbf22 100644 --- a/cmake/arch.cmake +++ b/cmake/arch.cmake @@ -11,7 +11,8 @@ else () endif () -set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}") +set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} ${ARCH_C_FLAGS}") + # ensure we have the minimum of SSSE3 - call a SSSE3 intrinsic CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}> int main() { @@ -19,10 +20,6 @@ int main() { (void)_mm_shuffle_epi8(a, a); }" HAVE_SSSE3) -if (NOT HAVE_SSSE3) - message(FATAL_ERROR "A minimum of SSSE3 compiler support is required") -endif () - # now look for AVX2 CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}> #if !defined(__AVX2__) @@ -34,9 +31,5 @@ int main(){ (void)_mm256_xor_si256(z, z); }" HAVE_AVX2) -if (NOT HAVE_AVX2) - message(STATUS "Building without AVX2 support") -endif () - unset (CMAKE_REQUIRED_FLAGS) unset (INTRIN_INC_H) diff --git a/cmake/attrib.cmake b/cmake/attrib.cmake new file mode 100644 index 00000000..6ce3f2a7 --- /dev/null +++ b/cmake/attrib.cmake @@ -0,0 +1,3 @@ +# tests for compiler properties + +CHECK_C_SOURCE_COMPILES("int foo(int) __attribute__ ((ifunc(\"foo_i\"))); int f1(int i) { return i; } void (*foo_i()) { return f1; } int main(void) { return 0; }" HAS_C_ATTR_IFUNC) diff --git a/cmake/build_wrapper.sh b/cmake/build_wrapper.sh new file mode 100755 index 00000000..5baf209b --- /dev/null +++ b/cmake/build_wrapper.sh @@ -0,0 +1,27 @@ +#!/bin/sh -e +# This is used for renaming symbols for the fat runtime, don't call directly +# TODO: make this a lot less fragile! +PREFIX=$1 +KEEPSYMS_IN=$2 +shift 2 +BUILD=$@ +OUT=$(echo $BUILD | sed 's/.* -o \(.*\.o\).*/\1/') +SYMSFILE=/tmp/${PREFIX}_rename.syms.$$ +KEEPSYMS=/tmp/keep.syms.$$ +# grab the command without the target obj or src file flags +# we don't just call gcc directly as there may be flags modifying the arch +CC_CMD=$(echo $BUILD | sed 's/ -o .*\.o//;s/ -c //;s/ .[^ ]*\.c//;') +# find me a libc +LIBC_SO=$(${CC_CMD} --print-file-name=libc.so.6) +cp ${KEEPSYMS_IN} ${KEEPSYMS} +# get all symbols from libc and turn them into patterns +nm -f p -g -D ${LIBC_SO} | sed -s 's/\([^ ]*\).*/^\1$/' >> ${KEEPSYMS} +# build the object +${BUILD} +# rename the symbols in the object +nm -f p -g ${OUT} | cut -f1 -d' ' | grep -v -f ${KEEPSYMS} | sed -e "s/\(.*\)/\1\ ${PREFIX}_\1/" >> ${SYMSFILE} +if test -s ${SYMSFILE} +then + objcopy --redefine-syms=${SYMSFILE} ${OUT} +fi +rm -f ${SYMSFILE} ${KEEPSYMS} diff --git a/cmake/config.h.in b/cmake/config.h.in index 75c27b3e..198d96c5 100644 --- a/cmake/config.h.in +++ b/cmake/config.h.in @@ -15,6 +15,9 @@ /* internal build, switch on dump support. */ #cmakedefine DUMP_SUPPORT +/* Define if building "fat" runtime. */ +#cmakedefine FAT_RUNTIME + /* Define to 1 if `backtrace' works. */ #cmakedefine HAVE_BACKTRACE diff --git a/cmake/keep.syms.in b/cmake/keep.syms.in new file mode 100644 index 00000000..ab6f82a5 --- /dev/null +++ b/cmake/keep.syms.in @@ -0,0 +1,11 @@ +# names to exclude +hs_misc_alloc +hs_misc_free +hs_free_scratch +hs_stream_alloc +hs_stream_free +hs_scratch_alloc +hs_scratch_free +hs_database_alloc +hs_database_free +^_ diff --git a/src/compiler/compiler.cpp b/src/compiler/compiler.cpp index d56aff88..4a4afc64 100644 --- a/src/compiler/compiler.cpp +++ b/src/compiler/compiler.cpp @@ -29,8 +29,10 @@ /** \file * \brief Compiler front-end interface. */ +#include "allocator.h" #include "asserts.h" #include "compiler.h" +#include "crc32.h" #include "database.h" #include "grey.h" #include "hs_internal.h" @@ -321,6 +323,45 @@ platform_t target_to_platform(const target_t &target_info) { return p; } +/** \brief Encapsulate the given bytecode (RoseEngine) in a newly-allocated + * \ref hs_database, ensuring that it is padded correctly to give cacheline + * alignment. */ +static +hs_database_t *dbCreate(const char *in_bytecode, size_t len, u64a platform) { + size_t db_len = sizeof(struct hs_database) + len; + DEBUG_PRINTF("db size %zu\n", db_len); + DEBUG_PRINTF("db platform %llx\n", platform); + + struct hs_database *db = (struct hs_database *)hs_database_alloc(db_len); + if (hs_check_alloc(db) != HS_SUCCESS) { + hs_database_free(db); + return nullptr; + } + + // So that none of our database is uninitialized + memset(db, 0, db_len); + + // we need to align things manually + size_t shift = (uintptr_t)db->bytes & 0x3f; + DEBUG_PRINTF("shift is %zu\n", shift); + + db->bytecode = offsetof(struct hs_database, bytes) - shift; + char *bytecode = (char *)db + db->bytecode; + assert(ISALIGNED_CL(bytecode)); + + db->magic = HS_DB_MAGIC; + db->version = HS_DB_VERSION; + db->length = len; + db->platform = platform; + + // Copy bytecode + memcpy(bytecode, in_bytecode, len); + + db->crc32 = Crc32c_ComputeBuf(0, bytecode, db->length); + return db; +} + + struct hs_database *build(NG &ng, unsigned int *length) { assert(length); diff --git a/src/database.c b/src/database.c index a4e10c22..61eb021f 100644 --- a/src/database.c +++ b/src/database.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -348,43 +348,6 @@ hs_error_t dbIsValid(const hs_database_t *db) { return HS_SUCCESS; } -/** \brief Encapsulate the given bytecode (RoseEngine) in a newly-allocated - * \ref hs_database, ensuring that it is padded correctly to give cacheline - * alignment. */ -hs_database_t *dbCreate(const char *in_bytecode, size_t len, u64a platform) { - size_t db_len = sizeof(struct hs_database) + len; - DEBUG_PRINTF("db size %zu\n", db_len); - DEBUG_PRINTF("db platform %llx\n", platform); - - struct hs_database *db = (struct hs_database *)hs_database_alloc(db_len); - if (hs_check_alloc(db) != HS_SUCCESS) { - hs_database_free(db); - return NULL; - } - - // So that none of our database is uninitialized - memset(db, 0, db_len); - - // we need to align things manually - size_t shift = (uintptr_t)db->bytes & 0x3f; - DEBUG_PRINTF("shift is %zu\n", shift); - - db->bytecode = offsetof(struct hs_database, bytes) - shift; - char *bytecode = (char *)db + db->bytecode; - assert(ISALIGNED_CL(bytecode)); - - db->magic = HS_DB_MAGIC; - db->version = HS_DB_VERSION; - db->length = len; - db->platform = platform; - - // Copy bytecode - memcpy(bytecode, in_bytecode, len); - - db->crc32 = Crc32c_ComputeBuf(0, bytecode, db->length); - return db; -} - #if defined(_WIN32) #define SNPRINTF_COMPAT _snprintf #else diff --git a/src/database.h b/src/database.h index 5488c93d..399513fc 100644 --- a/src/database.h +++ b/src/database.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -110,7 +110,6 @@ hs_error_t validDatabase(const hs_database_t *db) { } hs_error_t dbIsValid(const struct hs_database *db); -struct hs_database *dbCreate(const char *bytecode, size_t len, u64a platform); #ifdef __cplusplus } /* extern "C" */ diff --git a/src/dispatcher.c b/src/dispatcher.c new file mode 100644 index 00000000..810a5299 --- /dev/null +++ b/src/dispatcher.c @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "hs_common.h" +#include "hs_runtime.h" +#include "ue2common.h" +#include "util/cpuid_flags.h" +#include "util/join.h" + +#define CREATE_DISPATCH(RTYPE, NAME, ...) \ + /* create defns */ \ + RTYPE JOIN(avx2_, NAME)(__VA_ARGS__); \ + RTYPE JOIN(corei7_, NAME)(__VA_ARGS__); \ + RTYPE JOIN(core2_, NAME)(__VA_ARGS__); \ + \ + /* error func */ \ + static inline RTYPE JOIN(error_, NAME)(__VA_ARGS__) { \ + return (RTYPE)HS_ARCH_ERROR; \ + } \ + \ + /* resolver */ \ + static void(*JOIN(resolve_, NAME)(void)) { \ + if (check_avx2()) { \ + return JOIN(avx2_, NAME); \ + } \ + if (check_sse42() && check_popcnt()) { \ + return JOIN(corei7_, NAME); \ + } \ + if (check_ssse3()) { \ + return JOIN(core2_, NAME); \ + } \ + /* anything else is fail */ \ + return JOIN(error_, NAME); \ + } \ + \ + /* function */ \ + RTYPE NAME(__VA_ARGS__) __attribute__((ifunc("resolve_" #NAME))) + +CREATE_DISPATCH(hs_error_t, hs_scan, const hs_database_t *db, const char *data, + unsigned length, unsigned flags, hs_scratch_t *scratch, + match_event_handler onEvent, void *userCtx); + +CREATE_DISPATCH(hs_error_t, hs_stream_size, const hs_database_t *database, + size_t *stream_size); + +CREATE_DISPATCH(hs_error_t, hs_database_size, const hs_database_t *db, + size_t *size); +CREATE_DISPATCH(hs_error_t, dbIsValid, const hs_database_t *db); +CREATE_DISPATCH(hs_error_t, hs_free_database, hs_database_t *db); + +CREATE_DISPATCH(hs_error_t, hs_open_stream, const hs_database_t *db, + unsigned int flags, hs_stream_t **stream); + +CREATE_DISPATCH(hs_error_t, hs_scan_stream, hs_stream_t *id, const char *data, + unsigned int length, unsigned int flags, hs_scratch_t *scratch, + match_event_handler onEvent, void *ctxt); + +CREATE_DISPATCH(hs_error_t, hs_close_stream, hs_stream_t *id, + hs_scratch_t *scratch, match_event_handler onEvent, void *ctxt); + +CREATE_DISPATCH(hs_error_t, hs_scan_vector, const hs_database_t *db, + const char *const *data, const unsigned int *length, + unsigned int count, unsigned int flags, hs_scratch_t *scratch, + match_event_handler onevent, void *context); + +CREATE_DISPATCH(hs_error_t, hs_database_info, const hs_database_t *db, char **info); + +CREATE_DISPATCH(hs_error_t, hs_copy_stream, hs_stream_t **to_id, + const hs_stream_t *from_id); + +CREATE_DISPATCH(hs_error_t, hs_reset_stream, hs_stream_t *id, + unsigned int flags, hs_scratch_t *scratch, + match_event_handler onEvent, void *context); + +CREATE_DISPATCH(hs_error_t, hs_reset_and_copy_stream, hs_stream_t *to_id, + const hs_stream_t *from_id, hs_scratch_t *scratch, + match_event_handler onEvent, void *context); + +CREATE_DISPATCH(hs_error_t, hs_serialize_database, const hs_database_t *db, + char **bytes, size_t *length); + +CREATE_DISPATCH(hs_error_t, hs_deserialize_database, const char *bytes, + const size_t length, hs_database_t **db); + +CREATE_DISPATCH(hs_error_t, hs_deserialize_database_at, const char *bytes, + const size_t length, hs_database_t *db); + +CREATE_DISPATCH(hs_error_t, hs_serialized_database_info, const char *bytes, + size_t length, char **info); + +CREATE_DISPATCH(hs_error_t, hs_serialized_database_size, const char *bytes, + const size_t length, size_t *deserialized_size); + +/** INTERNALS **/ + +CREATE_DISPATCH(u32, Crc32c_ComputeBuf, u32 inCrc32, const void *buf, size_t bufLen); diff --git a/src/hs.cpp b/src/hs.cpp index 07f6d2c1..f64e867a 100644 --- a/src/hs.cpp +++ b/src/hs.cpp @@ -192,6 +192,14 @@ hs_compile_multi_int(const char *const *expressions, const unsigned *flags, return HS_COMPILER_ERROR; } +#if defined(FAT_RUNTIME) + if (!check_ssse3()) { + *db = nullptr; + *comp_error = generateCompileError("Unsupported architecture", -1); + return HS_ARCH_ERROR; + } +#endif + if (!checkMode(mode, comp_error)) { *db = nullptr; assert(*comp_error); // set by checkMode. @@ -319,6 +327,13 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags, return HS_COMPILER_ERROR; } +#if defined(FAT_RUNTIME) + if (!check_ssse3()) { + *error = generateCompileError("Unsupported architecture", -1); + return HS_ARCH_ERROR; + } +#endif + if (!info) { *error = generateCompileError("Invalid parameter: info is NULL", -1); return HS_COMPILER_ERROR; @@ -426,6 +441,11 @@ hs_error_t hs_populate_platform(hs_platform_info_t *platform) { extern "C" HS_PUBLIC_API hs_error_t hs_free_compile_error(hs_compile_error_t *error) { +#if defined(FAT_RUNTIME) + if (!check_ssse3()) { + return HS_ARCH_ERROR; + } +#endif freeCompileError(error); return HS_SUCCESS; } diff --git a/src/hs_common.h b/src/hs_common.h index 4bf31146..ad8d9880 100644 --- a/src/hs_common.h +++ b/src/hs_common.h @@ -519,6 +519,17 @@ const char *hs_version(void); */ #define HS_SCRATCH_IN_USE (-10) +/** + * Unsupported CPU architecture. + * + * This error is returned when Hyperscan is able to detect that the current + * system does not support the required instruction set. + * + * At a minimum, Hyperscan requires Supplemental Streaming SIMD Extensions 3 + * (SSSE3). + */ +#define HS_ARCH_ERROR (-11) + /** @} */ #ifdef __cplusplus diff --git a/src/nfa/mcsheng_compile.cpp b/src/nfa/mcsheng_compile.cpp index b7570af4..a7713bb0 100644 --- a/src/nfa/mcsheng_compile.cpp +++ b/src/nfa/mcsheng_compile.cpp @@ -193,7 +193,8 @@ void createShuffleMasks(mcsheng *m, const dfa_info &info, } for (u32 i = 0; i < N_CHARS; i++) { assert(info.alpha_remap[i] != info.alpha_remap[TOP]); - memcpy((u8*)&m->sheng_masks[i], (u8*)masks[info.alpha_remap[i]].data(), sizeof(m128)); + memcpy((u8 *)&m->sheng_masks[i], + (u8 *)masks[info.alpha_remap[i]].data(), sizeof(m128)); } m->sheng_end = sheng_end; m->sheng_accel_limit = sheng_end - 1; diff --git a/src/util/cpuid_flags.c b/src/util/cpuid_flags.c index 9a8bd922..dba147ee 100644 --- a/src/util/cpuid_flags.c +++ b/src/util/cpuid_flags.c @@ -40,12 +40,14 @@ #define SSSE3 (1 << 9) #define SSE4_1 (1 << 19) #define SSE4_2 (1 << 20) +#define POPCNT (1 << 23) #define XSAVE (1 << 27) #define AVX (1 << 28) // EDX +#define FXSAVE (1 << 24) #define SSE (1 << 25) -#define SSE2 (1 << 25) +#define SSE2 (1 << 26) #define HTT (1 << 28) // Structured Extended Feature Flags Enumeration Leaf ECX values @@ -87,7 +89,6 @@ u64a xgetbv(u32 op) { #endif } -static int check_avx2(void) { #if defined(__INTEL_COMPILER) return _may_i_use_cpu_feature(_FEATURE_AVX2); @@ -137,6 +138,24 @@ u64a cpuid_flags(void) { return cap; } +int check_ssse3(void) { + unsigned int eax, ebx, ecx, edx; + cpuid(1, 0, &eax, &ebx, &ecx, &edx); + return !!(ecx & SSSE3); +} + +int check_sse42(void) { + unsigned int eax, ebx, ecx, edx; + cpuid(1, 0, &eax, &ebx, &ecx, &edx); + return !!(ecx & SSE4_2); +} + +int check_popcnt(void) { + unsigned int eax, ebx, ecx, edx; + cpuid(1, 0, &eax, &ebx, &ecx, &edx); + return !!(ecx & POPCNT); +} + struct family_id { u32 full_family; u32 full_model; diff --git a/src/util/cpuid_flags.h b/src/util/cpuid_flags.h index 2df97ab5..8b23d495 100644 --- a/src/util/cpuid_flags.h +++ b/src/util/cpuid_flags.h @@ -41,6 +41,11 @@ u64a cpuid_flags(void); u32 cpuid_tune(void); +int check_avx2(void); +int check_ssse3(void); +int check_sse42(void); +int check_popcnt(void); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/src/util/simd_types.h b/src/util/simd_types.h index 35f27e66..74e2abec 100644 --- a/src/util/simd_types.h +++ b/src/util/simd_types.h @@ -61,6 +61,7 @@ #error no intrinsics! #endif +#if defined(__SSE2__) typedef __m128i m128; #else typedef struct ALIGN_DIRECTIVE {u64a hi; u64a lo;} m128; diff --git a/unit/CMakeLists.txt b/unit/CMakeLists.txt index 77f3ac3b..8b494444 100644 --- a/unit/CMakeLists.txt +++ b/unit/CMakeLists.txt @@ -1,5 +1,5 @@ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_CXX_FLAGS} ${EXTRA_CXX_FLAGS}") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}") set(gtest_SOURCES gtest/gtest-all.cc gtest/gtest.h) if(NOT XCODE) @@ -34,7 +34,7 @@ add_library(gtest STATIC ${gtest_SOURCES}) add_definitions(-DGTEST_HAS_PTHREAD=0 -DSRCDIR=${PROJECT_SOURCE_DIR}) -if (NOT RELEASE_BUILD) +if (NOT (RELEASE_BUILD OR FAT_RUNTIME)) set(unit_internal_SOURCES internal/bitfield.cpp internal/bitutils.cpp @@ -89,7 +89,7 @@ set(unit_internal_SOURCES add_executable(unit-internal ${unit_internal_SOURCES}) target_link_libraries(unit-internal hs gtest corpusomatic) -endif(NOT RELEASE_BUILD) +endif(NOT (RELEASE_BUILD OR FAT_RUNTIME)) set(unit_hyperscan_SOURCES hyperscan/allocators.cpp