diff --git a/CHANGELOG.md b/CHANGELOG.md index 93336b50..2de58a7b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,14 @@ This is a list of notable changes to Hyperscan, in reverse chronological order. +## [5.1.1] 2019-04-03 +- Add extra detection and handling when invalid rose programs are triggered. +- Bugfix for issue #136: fix CMake parsing of CPU architecure for GCC-9. +- Bugfix for issue #137: avoid file path impact on fat runtime build. +- Bugfix for issue #141: fix rose literal programs for multi-pattern + matching when no pattern ids are provided. +- Bugfix for issue #144: fix library install path in pkg-config files. + ## [5.1.0] 2019-01-17 - Improve DFA state compression by wide-state optimization to reduce bytecode size. diff --git a/CMakeLists.txt b/CMakeLists.txt index cac4fab7..d3995362 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ project (hyperscan C CXX) set (HS_MAJOR_VERSION 5) set (HS_MINOR_VERSION 1) -set (HS_PATCH_VERSION 0) +set (HS_PATCH_VERSION 1) set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION}) set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) @@ -191,6 +191,8 @@ else() set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -march=native -mtune=native) execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS} OUTPUT_VARIABLE _GCC_OUTPUT) + string(FIND "${_GCC_OUTPUT}" "Known" POS) + string(SUBSTRING "${_GCC_OUTPUT}" 0 ${POS} _GCC_OUTPUT) string(REGEX REPLACE ".*march=[ \t]*([^ \n]*)[ \n].*" "\\1" GNUCC_ARCH "${_GCC_OUTPUT}") diff --git a/chimera/libch.pc.in b/chimera/libch.pc.in index 7f266009..1eaf5bce 100644 --- a/chimera/libch.pc.in +++ b/chimera/libch.pc.in @@ -1,7 +1,7 @@ prefix=@CMAKE_INSTALL_PREFIX@ exec_prefix=@CMAKE_INSTALL_PREFIX@ -libdir=@CMAKE_INSTALL_PREFIX@/lib -includedir=@CMAKE_INSTALL_PREFIX@/include +libdir=@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@ +includedir=@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_INCLUDEDIR@ Name: libch Description: Intel(R) Chimera Library diff --git a/cmake/build_wrapper.sh b/cmake/build_wrapper.sh index a6ee3b26..1962813f 100755 --- a/cmake/build_wrapper.sh +++ b/cmake/build_wrapper.sh @@ -9,7 +9,7 @@ PREFIX=$1 KEEPSYMS_IN=$2 shift 2 # $@ contains the actual build command -OUT=$(echo "$@" | sed 's/.* -o \(.*\.o\).*/\1/') +OUT=$(echo "$@" | rev | cut -d ' ' -f 2- | rev | sed 's/.* -o \(.*\.o\).*/\1/') trap cleanup INT QUIT EXIT SYMSFILE=$(mktemp -p /tmp ${PREFIX}_rename.syms.XXXXX) KEEPSYMS=$(mktemp -p /tmp keep.syms.XXXXX) diff --git a/libhs.pc.in b/libhs.pc.in index e16460e8..fed4db45 100644 --- a/libhs.pc.in +++ b/libhs.pc.in @@ -1,7 +1,7 @@ prefix=@CMAKE_INSTALL_PREFIX@ exec_prefix=@CMAKE_INSTALL_PREFIX@ -libdir=@CMAKE_INSTALL_PREFIX@/lib -includedir=@CMAKE_INSTALL_PREFIX@/include +libdir=@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@ +includedir=@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_INCLUDEDIR@ Name: libhs Description: Intel(R) Hyperscan Library diff --git a/src/hs_common.h b/src/hs_common.h index 67aedb80..93dc1fe8 100644 --- a/src/hs_common.h +++ b/src/hs_common.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -577,6 +577,16 @@ hs_error_t HS_CDECL hs_valid_platform(void); */ #define HS_INSUFFICIENT_SPACE (-12) +/** + * Unexpected internal error. + * + * This error indicates that there was unexpected matching behaviors. This + * could be related to invalid usage of stream and scratch space or invalid memory + * operations by users. + * + */ +#define HS_UNKNOWN_ERROR (-13) + /** @} */ #ifdef __cplusplus diff --git a/src/rose/program_runtime.c b/src/rose/program_runtime.c index 5a7f786e..7f5150e0 100644 --- a/src/rose/program_runtime.c +++ b/src/rose/program_runtime.c @@ -2771,6 +2771,12 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, work_done = 1; } PROGRAM_NEXT_INSTRUCTION + + default: { + assert(0); // unreachable + scratch->core_info.status |= STATUS_ERROR; + return HWLM_TERMINATE_MATCHING; + } } } @@ -2807,6 +2813,10 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t, const char *pc_base = getByOffset(t, programOffset); const char *pc = pc_base; + // If this program has an effect, work_done will be set to one (which may + // allow the program to squash groups). + int work_done = 0; + struct RoseContext *tctxt = &scratch->tctxt; assert(*(const u8 *)pc != ROSE_INSTR_END); @@ -2887,6 +2897,7 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t, INVALID_EKEY) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } + work_done = 1; } L_PROGRAM_NEXT_INSTRUCTION @@ -2896,6 +2907,7 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t, ri->ekey) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } + work_done = 1; } L_PROGRAM_NEXT_INSTRUCTION @@ -2906,6 +2918,7 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t, INVALID_EKEY) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } + work_done = 1; } L_PROGRAM_NEXT_INSTRUCTION @@ -2933,6 +2946,7 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t, ekey) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } + work_done = 1; } L_PROGRAM_NEXT_INSTRUCTION @@ -2963,6 +2977,16 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t, } L_PROGRAM_NEXT_INSTRUCTION + L_PROGRAM_CASE(SQUASH_GROUPS) { + assert(popcount64(ri->groups) == 63); // Squash only one group. + if (work_done) { + tctxt->groups &= ri->groups; + DEBUG_PRINTF("squash groups 0x%llx -> 0x%llx\n", ri->groups, + tctxt->groups); + } + } + L_PROGRAM_NEXT_INSTRUCTION + L_PROGRAM_CASE(CHECK_LONG_LIT) { const char nocase = 0; if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset, @@ -3011,6 +3035,12 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t, } L_PROGRAM_NEXT_INSTRUCTION + L_PROGRAM_CASE(CLEAR_WORK_DONE) { + DEBUG_PRINTF("clear work_done flag\n"); + work_done = 0; + } + L_PROGRAM_NEXT_INSTRUCTION + L_PROGRAM_CASE(SET_LOGICAL) { DEBUG_PRINTF("set logical value of lkey %u, offset_adjust=%d\n", ri->lkey, ri->offset_adjust); @@ -3048,11 +3078,14 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t, == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } + work_done = 1; } L_PROGRAM_NEXT_INSTRUCTION default: { assert(0); // unreachable + scratch->core_info.status |= STATUS_ERROR; + return HWLM_TERMINATE_MATCHING; } } } diff --git a/src/runtime.c b/src/runtime.c index 68f1f8a7..cfcd0f7c 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -151,7 +151,7 @@ void populateCoreInfo(struct hs_scratch *s, const struct RoseEngine *rose, } #define STATUS_VALID_BITS \ - (STATUS_TERMINATED | STATUS_EXHAUSTED | STATUS_DELAY_DIRTY) + (STATUS_TERMINATED | STATUS_EXHAUSTED | STATUS_DELAY_DIRTY | STATUS_ERROR) /** \brief Retrieve status bitmask from stream state. */ static really_inline @@ -428,7 +428,10 @@ hs_error_t HS_CDECL hs_scan(const hs_database_t *db, const char *data, } done_scan: - if (told_to_stop_matching(scratch)) { + if (unlikely(internal_matching_error(scratch))) { + unmarkScratchInUse(scratch); + return HS_UNKNOWN_ERROR; + } else if (told_to_stop_matching(scratch)) { unmarkScratchInUse(scratch); return HS_SCAN_TERMINATED; } @@ -447,8 +450,17 @@ done_scan: } set_retval: + if (unlikely(internal_matching_error(scratch))) { + unmarkScratchInUse(scratch); + return HS_UNKNOWN_ERROR; + } + if (rose->flushCombProgramOffset) { if (roseRunFlushCombProgram(rose, scratch, ~0ULL) == MO_HALT_MATCHING) { + if (unlikely(internal_matching_error(scratch))) { + unmarkScratchInUse(scratch); + return HS_UNKNOWN_ERROR; + } unmarkScratchInUse(scratch); return HS_SCAN_TERMINATED; } @@ -626,7 +638,7 @@ void report_eod_matches(hs_stream_t *id, hs_scratch_t *scratch, char *state = getMultiState(id); u8 status = getStreamStatus(state); - if (status & (STATUS_TERMINATED | STATUS_EXHAUSTED)) { + if (status & (STATUS_TERMINATED | STATUS_EXHAUSTED | STATUS_ERROR)) { DEBUG_PRINTF("stream is broken, just freeing storage\n"); return; } @@ -748,6 +760,10 @@ hs_error_t HS_CDECL hs_reset_and_copy_stream(hs_stream_t *to_id, return HS_SCRATCH_IN_USE; } report_eod_matches(to_id, scratch, onEvent, context); + if (unlikely(internal_matching_error(scratch))) { + unmarkScratchInUse(scratch); + return HS_UNKNOWN_ERROR; + } unmarkScratchInUse(scratch); } @@ -863,9 +879,11 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data, char *state = getMultiState(id); u8 status = getStreamStatus(state); - if (status & (STATUS_TERMINATED | STATUS_EXHAUSTED)) { + if (status & (STATUS_TERMINATED | STATUS_EXHAUSTED | STATUS_ERROR)) { DEBUG_PRINTF("stream is broken, halting scan\n"); - if (status & STATUS_TERMINATED) { + if (status & STATUS_ERROR) { + return HS_UNKNOWN_ERROR; + } else if (status & STATUS_TERMINATED) { return HS_SCAN_TERMINATED; } else { return HS_SUCCESS; @@ -937,7 +955,9 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data, setStreamStatus(state, scratch->core_info.status); - if (likely(!can_stop_matching(scratch))) { + if (unlikely(internal_matching_error(scratch))) { + return HS_UNKNOWN_ERROR; + } else if (likely(!can_stop_matching(scratch))) { maintainHistoryBuffer(rose, state, data, length); id->offset += length; /* maintain offset */ @@ -986,6 +1006,10 @@ hs_error_t HS_CDECL hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch, return HS_SCRATCH_IN_USE; } report_eod_matches(id, scratch, onEvent, context); + if (unlikely(internal_matching_error(scratch))) { + unmarkScratchInUse(scratch); + return HS_UNKNOWN_ERROR; + } unmarkScratchInUse(scratch); } @@ -993,6 +1017,11 @@ hs_error_t HS_CDECL hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch, if (roseRunFlushCombProgram(id->rose, scratch, ~0ULL) == MO_HALT_MATCHING) { scratch->core_info.status |= STATUS_TERMINATED; + if (unlikely(internal_matching_error(scratch))) { + unmarkScratchInUse(scratch); + return HS_UNKNOWN_ERROR; + } + unmarkScratchInUse(scratch); } } @@ -1018,6 +1047,10 @@ hs_error_t HS_CDECL hs_reset_stream(hs_stream_t *id, UNUSED unsigned int flags, return HS_SCRATCH_IN_USE; } report_eod_matches(id, scratch, onEvent, context); + if (unlikely(internal_matching_error(scratch))) { + unmarkScratchInUse(scratch); + return HS_UNKNOWN_ERROR; + } unmarkScratchInUse(scratch); } @@ -1025,6 +1058,11 @@ hs_error_t HS_CDECL hs_reset_stream(hs_stream_t *id, UNUSED unsigned int flags, if (roseRunFlushCombProgram(id->rose, scratch, ~0ULL) == MO_HALT_MATCHING) { scratch->core_info.status |= STATUS_TERMINATED; + if (unlikely(internal_matching_error(scratch))) { + unmarkScratchInUse(scratch); + return HS_UNKNOWN_ERROR; + } + unmarkScratchInUse(scratch); } } @@ -1139,7 +1177,10 @@ hs_error_t HS_CDECL hs_scan_vector(const hs_database_t *db, if (onEvent) { report_eod_matches(id, scratch, onEvent, context); - if (told_to_stop_matching(scratch)) { + if (unlikely(internal_matching_error(scratch))) { + unmarkScratchInUse(scratch); + return HS_UNKNOWN_ERROR; + } else if (told_to_stop_matching(scratch)) { unmarkScratchInUse(scratch); return HS_SCAN_TERMINATED; } @@ -1237,6 +1278,10 @@ hs_error_t HS_CDECL hs_reset_and_expand_stream(hs_stream_t *to_stream, return HS_SCRATCH_IN_USE; } report_eod_matches(to_stream, scratch, onEvent, context); + if (unlikely(internal_matching_error(scratch))) { + unmarkScratchInUse(scratch); + return HS_UNKNOWN_ERROR; + } unmarkScratchInUse(scratch); } diff --git a/src/scratch.h b/src/scratch.h index dab7bab7..e2e8039a 100644 --- a/src/scratch.h +++ b/src/scratch.h @@ -84,6 +84,9 @@ struct catchup_pq { * history. */ #define STATUS_DELAY_DIRTY (1U << 2) +/** \brief Status flag: Unexpected Rose program error. */ +#define STATUS_ERROR (1U << 3) + /** \brief Core information about the current scan, used everywhere. */ struct core_info { void *userContext; /**< user-supplied context */ @@ -229,7 +232,13 @@ char told_to_stop_matching(const struct hs_scratch *scratch) { static really_inline char can_stop_matching(const struct hs_scratch *scratch) { - return scratch->core_info.status & (STATUS_TERMINATED | STATUS_EXHAUSTED); + return scratch->core_info.status & + (STATUS_TERMINATED | STATUS_EXHAUSTED | STATUS_ERROR); +} + +static really_inline +char internal_matching_error(const struct hs_scratch *scratch) { + return scratch->core_info.status & STATUS_ERROR; } /** diff --git a/unit/hyperscan/multi.cpp b/unit/hyperscan/multi.cpp index d43ce327..85d8cd25 100644 --- a/unit/hyperscan/multi.cpp +++ b/unit/hyperscan/multi.cpp @@ -333,3 +333,37 @@ TEST(MPV, UE_2395) { err = hs_free_scratch(scratch); ASSERT_EQ(HS_SUCCESS, err); } + +TEST(MMRoseLiteralPath, issue_141) { + hs_database_t *db = nullptr; + hs_compile_error_t *compile_err = nullptr; + CallBackContext c; + string data = "/odezhda-dlya-bega/"; + const char *expr[] = {"/odezhda-dlya-bega/", + "kurtki-i-vetrovki-dlya-bega", + "futbolki-i-mayki-dlya-bega"}; + unsigned flags[] = {HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH, + HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH, + HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH}; + hs_error_t err = hs_compile_multi(expr, flags, nullptr, 3, HS_MODE_BLOCK, + nullptr, &db, &compile_err); + + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(db != nullptr); + + hs_scratch_t *scratch = nullptr; + err = hs_alloc_scratch(db, &scratch); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(scratch != nullptr); + + c.halt = 0; + err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb, + (void *)&c); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_EQ(1U, c.matches.size()); + ASSERT_EQ(MatchRecord(19, 0), c.matches[0]); + + hs_free_database(db); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); +}