From e0c489f98fe92efed402354a86c16014e3627691 Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Fri, 15 Jan 2021 13:55:04 +0000 Subject: [PATCH 01/32] Example code: bugfix of KW scan. --- examples/patbench.cc | 5 +++++ examples/pcapscan.cc | 5 +++++ examples/simplegrep.c | 10 ++++++++++ 3 files changed, 20 insertions(+) diff --git a/examples/patbench.cc b/examples/patbench.cc index 20de5745..7362095a 100644 --- a/examples/patbench.cc +++ b/examples/patbench.cc @@ -123,6 +123,7 @@ #include #include +#include #include // We use the BSD primitives throughout as they exist on both BSD and Linux. @@ -657,6 +658,10 @@ int main(int argc, char **argv) { break; case 'n': repeatCount = atoi(optarg); + if (repeatCount <= 0 || repeatCount > UINT_MAX) { + cerr << "Invalid repeatCount." << endl; + exit(-1); + } break; default: usage(argv[0]); diff --git a/examples/pcapscan.cc b/examples/pcapscan.cc index 12b94438..913fe607 100644 --- a/examples/pcapscan.cc +++ b/examples/pcapscan.cc @@ -58,6 +58,7 @@ #include #include +#include #include // We use the BSD primitives throughout as they exist on both BSD and Linux. @@ -489,6 +490,10 @@ int main(int argc, char **argv) { // Streaming mode scans. double secsStreamingScan = 0.0, secsStreamingOpenClose = 0.0; + if (repeatCount <= 0 || repeatCount > UINT_MAX) { + cerr << "Invalid repeatCount." << endl; + exit(-1); + } for (unsigned int i = 0; i < repeatCount; i++) { // Open streams. clock.start(); diff --git a/examples/simplegrep.c b/examples/simplegrep.c index d6bd4b39..b2c64f31 100644 --- a/examples/simplegrep.c +++ b/examples/simplegrep.c @@ -57,6 +57,7 @@ #include #include #include +#include #include @@ -152,6 +153,15 @@ int main(int argc, char *argv[]) { char *pattern = argv[1]; char *inputFN = argv[2]; + if (access(inputFN, F_OK) != 0) { + fprintf(stderr, "ERROR: file doesn't exist.\n"); + return -1; + } + if (access(inputFN, R_OK) != 0) { + fprintf(stderr, "ERROR: can't be read.\n"); + return -1; + } + /* First, we attempt to compile the pattern provided on the command line. * We assume 'DOTALL' semantics, meaning that the '.' meta-character will * match newline characters. The compiler will analyse the given pattern and From 98daf283b145ddae4106b768af00ec88737a4503 Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Thu, 11 Mar 2021 15:17:36 +0000 Subject: [PATCH 02/32] Example code: update year --- examples/patbench.cc | 2 +- examples/pcapscan.cc | 2 +- examples/simplegrep.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/patbench.cc b/examples/patbench.cc index 7362095a..23fff568 100644 --- a/examples/patbench.cc +++ b/examples/patbench.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2021, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: diff --git a/examples/pcapscan.cc b/examples/pcapscan.cc index 913fe607..95996a2a 100644 --- a/examples/pcapscan.cc +++ b/examples/pcapscan.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2021, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: diff --git a/examples/simplegrep.c b/examples/simplegrep.c index b2c64f31..30a97b0f 100644 --- a/examples/simplegrep.c +++ b/examples/simplegrep.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2021, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: From 95cd19c6f073fad3c9934562d1e5952830beeb80 Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Sun, 14 Mar 2021 18:24:03 +0000 Subject: [PATCH 03/32] Example code: update header position --- examples/patbench.cc | 2 +- examples/pcapscan.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/patbench.cc b/examples/patbench.cc index 23fff568..dac58db9 100644 --- a/examples/patbench.cc +++ b/examples/patbench.cc @@ -115,6 +115,7 @@ #include #include #include +#include #include #include #include @@ -123,7 +124,6 @@ #include #include -#include #include // We use the BSD primitives throughout as they exist on both BSD and Linux. diff --git a/examples/pcapscan.cc b/examples/pcapscan.cc index 95996a2a..2fd13e5b 100644 --- a/examples/pcapscan.cc +++ b/examples/pcapscan.cc @@ -51,6 +51,7 @@ #include #include +#include #include #include #include @@ -58,7 +59,6 @@ #include #include -#include #include // We use the BSD primitives throughout as they exist on both BSD and Linux. From 62e35c910b96d6619585c783d3061c635af8f965 Mon Sep 17 00:00:00 2001 From: Wang Xiang W Date: Tue, 19 Jan 2021 12:31:52 +0000 Subject: [PATCH 04/32] fat runtime: fix libc symbol parsing fixes github issue #292 --- cmake/build_wrapper.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/build_wrapper.sh b/cmake/build_wrapper.sh index 1962813f..895610c0 100755 --- a/cmake/build_wrapper.sh +++ b/cmake/build_wrapper.sh @@ -17,7 +17,7 @@ KEEPSYMS=$(mktemp -p /tmp keep.syms.XXXXX) LIBC_SO=$("$@" --print-file-name=libc.so.6) cp ${KEEPSYMS_IN} ${KEEPSYMS} # get all symbols from libc and turn them into patterns -nm -f p -g -D ${LIBC_SO} | sed -s 's/\([^ ]*\).*/^\1$/' >> ${KEEPSYMS} +nm -f p -g -D ${LIBC_SO} | sed -s 's/\([^ @]*\).*/^\1$/' >> ${KEEPSYMS} # build the object "$@" # rename the symbols in the object From 1ecb3aef8b09fc034555601bbd03b57be524ff23 Mon Sep 17 00:00:00 2001 From: Wang Xiang W Date: Wed, 27 Jan 2021 11:57:51 +0000 Subject: [PATCH 05/32] simd_utils: fix undefined instruction issue for 32-bit system fixes github issue #292 --- src/util/simd_utils.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/util/simd_utils.h b/src/util/simd_utils.h index d1f060b0..5fa727e5 100644 --- a/src/util/simd_utils.h +++ b/src/util/simd_utils.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2020, Intel Corporation + * Copyright (c) 2015-2021, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -156,6 +156,16 @@ static really_inline u32 movd(const m128 in) { return _mm_cvtsi128_si32(in); } +static really_inline u64a movq(const m128 in) { +#if defined(ARCH_X86_64) + return _mm_cvtsi128_si64(in); +#else // 32-bit - this is horrific + u32 lo = movd(in); + u32 hi = movd(_mm_srli_epi64(in, 32)); + return (u64a)hi << 32 | lo; +#endif +} + #if defined(HAVE_AVX512) static really_inline u32 movd512(const m512 in) { // NOTE: seems gcc doesn't support _mm512_cvtsi512_si32(in), @@ -166,20 +176,10 @@ static really_inline u32 movd512(const m512 in) { static really_inline u64a movq512(const m512 in) { // NOTE: seems AVX512 doesn't support _mm512_cvtsi512_si64(in), // so we use 2-step convertions to work around. - return _mm_cvtsi128_si64(_mm512_castsi512_si128(in)); + return movq(_mm512_castsi512_si128(in)); } #endif -static really_inline u64a movq(const m128 in) { -#if defined(ARCH_X86_64) - return _mm_cvtsi128_si64(in); -#else // 32-bit - this is horrific - u32 lo = movd(in); - u32 hi = movd(_mm_srli_epi64(in, 32)); - return (u64a)hi << 32 | lo; -#endif -} - /* another form of movq */ static really_inline m128 load_m128_from_u64a(const u64a *p) { @@ -791,7 +791,7 @@ m128 movdq_lo(m256 x) { #define lshift128_m256(a, count_immed) _mm256_slli_si256(a, count_immed) #define extract64from256(a, imm) _mm_extract_epi64(_mm256_extracti128_si256(a, imm >> 1), imm % 2) #define extract32from256(a, imm) _mm_extract_epi32(_mm256_extracti128_si256(a, imm >> 2), imm % 4) -#define extractlow64from256(a) _mm_cvtsi128_si64(cast256to128(a)) +#define extractlow64from256(a) movq(cast256to128(a)) #define extractlow32from256(a) movd(cast256to128(a)) #define interleave256hi(a, b) _mm256_unpackhi_epi8(a, b) #define interleave256lo(a, b) _mm256_unpacklo_epi8(a, b) From 9e17e8520f182ac81fb4f1092281a6969bf93083 Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Thu, 4 Mar 2021 16:13:46 +0000 Subject: [PATCH 06/32] literal API: add empty string check. fixes github issue #302, #304 --- src/compiler/compiler.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/compiler/compiler.cpp b/src/compiler/compiler.cpp index 5751bd64..15a130dd 100644 --- a/src/compiler/compiler.cpp +++ b/src/compiler/compiler.cpp @@ -416,6 +416,10 @@ void addLitExpression(NG &ng, unsigned index, const char *expression, "HS_FLAG_SOM_LEFTMOST are supported in literal API."); } + if (!strcmp(expression, "")) { + throw CompileError("Pure literal API doesn't support empty string."); + } + // This expression must be a pure literal, we can build ue2_literal // directly based on expression text. ParsedLitExpression ple(index, expression, expLength, flags, id); From 0b246c801af25acf083d317a95705e2ce0b5b0da Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Thu, 4 Mar 2021 16:50:14 +0000 Subject: [PATCH 07/32] literal API: add instruction support fixes github issue #303 --- src/rose/program_runtime.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/rose/program_runtime.c b/src/rose/program_runtime.c index ff5a5099..24a6cbea 100644 --- a/src/rose/program_runtime.c +++ b/src/rose/program_runtime.c @@ -3110,6 +3110,7 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t, const char in_catchup = prog_flags & ROSE_PROG_FLAG_IN_CATCHUP; const char from_mpv = prog_flags & ROSE_PROG_FLAG_FROM_MPV; + const char skip_mpv_catchup = prog_flags & ROSE_PROG_FLAG_SKIP_MPV_CATCHUP; const char *pc_base = getByOffset(t, programOffset); const char *pc = pc_base; @@ -3206,6 +3207,17 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t, } L_PROGRAM_NEXT_INSTRUCTION + L_PROGRAM_CASE(CATCH_UP_MPV) { + if (from_mpv || skip_mpv_catchup) { + DEBUG_PRINTF("skipping mpv catchup\n"); + } else if (roseCatchUpMPV(t, + end - scratch->core_info.buf_offset, + scratch) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + L_PROGRAM_NEXT_INSTRUCTION + L_PROGRAM_CASE(SOM_FROM_REPORT) { som = handleSomExternal(scratch, &ri->som, end); DEBUG_PRINTF("som from report %u is %llu\n", ri->som.onmatch, @@ -3213,6 +3225,15 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t, } L_PROGRAM_NEXT_INSTRUCTION + L_PROGRAM_CASE(TRIGGER_SUFFIX) { + if (roseTriggerSuffix(t, scratch, ri->queue, ri->event, som, + end) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + work_done = 1; + } + L_PROGRAM_NEXT_INSTRUCTION + L_PROGRAM_CASE(DEDUPE) { updateSeqPoint(tctxt, end, from_mpv); const char do_som = t->hasSom; // TODO: constant propagate From 7f4a806118f0c5089d425437bed5d75df1038dd6 Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Thu, 4 Mar 2021 17:00:34 +0000 Subject: [PATCH 08/32] mcclellan: improve wide-state checking in Sherman optimization fixes github issue #305 --- src/nfa/mcclellancompile.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index 27ec1716..432c7fd3 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -1082,7 +1082,9 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit, // Use the daddy already set for this state so long as it isn't already // a Sherman state. dstate_id_t daddy = currState.daddy; - if (!info.is_sherman(daddy) && !info.is_widestate(daddy)) { + if (info.is_widestate(daddy)) { + return; + } else if (!info.is_sherman(daddy)) { hinted.insert(currState.daddy); } else { // Fall back to granddaddy, which has already been processed (due From 819da8df1773d138b534db09f55a07bbda4a7f92 Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Thu, 11 Mar 2021 15:20:55 +0000 Subject: [PATCH 09/32] update year for bugfix #302-#305 --- src/compiler/compiler.cpp | 2 +- src/nfa/mcclellancompile.cpp | 2 +- src/rose/program_runtime.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/compiler/compiler.cpp b/src/compiler/compiler.cpp index 15a130dd..6f993ffe 100644 --- a/src/compiler/compiler.cpp +++ b/src/compiler/compiler.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2020, Intel Corporation + * Copyright (c) 2015-2021, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index 432c7fd3..6ae9558c 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2020, Intel Corporation + * Copyright (c) 2015-2021, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: diff --git a/src/rose/program_runtime.c b/src/rose/program_runtime.c index 24a6cbea..579ce278 100644 --- a/src/rose/program_runtime.c +++ b/src/rose/program_runtime.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2020, Intel Corporation + * Copyright (c) 2015-2021, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: From b254a88c436f70258789c6011ae95c700697d483 Mon Sep 17 00:00:00 2001 From: "Chang, Harry" Date: Wed, 10 Mar 2021 07:20:01 +0000 Subject: [PATCH 10/32] Logical Combination: bypass combination flag in hs_expression_info. Fixes github issue #291 --- src/hs.cpp | 8 +++++++- src/hs_compile.h | 12 +++--------- src/hs_internal.h | 6 ++++-- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/hs.cpp b/src/hs.cpp index eac58889..ae9cdf14 100644 --- a/src/hs.cpp +++ b/src/hs.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2020, Intel Corporation + * Copyright (c) 2015-2021, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -514,6 +514,12 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags, return HS_COMPILER_ERROR; } + if (flags & HS_FLAG_COMBINATION) { + *error = generateCompileError("Invalid parameter: unsupported " + "logical combination expression", -1); + return HS_COMPILER_ERROR; + } + *info = nullptr; *error = nullptr; diff --git a/src/hs_compile.h b/src/hs_compile.h index b318c29d..5aa24188 100644 --- a/src/hs_compile.h +++ b/src/hs_compile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2020, Intel Corporation + * Copyright (c) 2015-2021, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -748,10 +748,7 @@ hs_error_t HS_CDECL hs_free_compile_error(hs_compile_error_t *error); * - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode. * - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset * when a match is found. - * - HS_FLAG_COMBINATION - Parse the expression in logical combination - * syntax. - * - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for - * the sub-expressions in logical combinations. + * - HS_FLAG_QUIET - This flag will be ignored. * * @param info * On success, a pointer to the pattern information will be returned in @@ -814,10 +811,7 @@ hs_error_t HS_CDECL hs_expression_info(const char *expression, * - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode. * - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset * when a match is found. - * - HS_FLAG_COMBINATION - Parse the expression in logical combination - * syntax. - * - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for - * the sub-expressions in logical combinations. + * - HS_FLAG_QUIET - This flag will be ignored. * * @param ext * A pointer to a filled @ref hs_expr_ext_t structure that defines diff --git a/src/hs_internal.h b/src/hs_internal.h index adf07b22..4eb5e157 100644 --- a/src/hs_internal.h +++ b/src/hs_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, Intel Corporation + * Copyright (c) 2019-2021, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -80,7 +80,9 @@ extern "C" | HS_FLAG_PREFILTER \ | HS_FLAG_SINGLEMATCH \ | HS_FLAG_ALLOWEMPTY \ - | HS_FLAG_SOM_LEFTMOST) + | HS_FLAG_SOM_LEFTMOST \ + | HS_FLAG_COMBINATION \ + | HS_FLAG_QUIET) #ifdef __cplusplus } /* extern "C" */ From b386cbd20d2e326a4b9102081cd323ccaebea6aa Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Mon, 7 Jun 2021 15:35:57 +0800 Subject: [PATCH 11/32] bugfix: add vbmi platform parameter for tests in single.cpp --- unit/hyperscan/single.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/unit/hyperscan/single.cpp b/unit/hyperscan/single.cpp index 01fbfeab..07269cf0 100644 --- a/unit/hyperscan/single.cpp +++ b/unit/hyperscan/single.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2021, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -363,8 +363,9 @@ static const unsigned validModes[] = { // Mode bits for switching off various architecture features static const unsigned long long featureMask[] = { ~0ULL, /* native */ - ~(HS_CPU_FEATURES_AVX2 | HS_CPU_FEATURES_AVX512), /* no avx2 */ - ~HS_CPU_FEATURES_AVX512, /* no avx512 */ + ~(HS_CPU_FEATURES_AVX2 | HS_CPU_FEATURES_AVX512 | HS_CPU_FEATURES_AVX512VBMI), /* no avx2 */ + ~(HS_CPU_FEATURES_AVX512 | HS_CPU_FEATURES_AVX512VBMI), /* no avx512 */ + ~HS_CPU_FEATURES_AVX512VBMI, /* no avx512vbmi */ }; INSTANTIATE_TEST_CASE_P(Single, From 85019432f4c09ad31c1c6115eb3e2102401f4a13 Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Mon, 7 Jun 2021 16:24:51 +0800 Subject: [PATCH 12/32] bugfix: add vbmi case for test in database.cpp --- unit/internal/database.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/unit/internal/database.cpp b/unit/internal/database.cpp index 8f0c1a69..0070fbc9 100644 --- a/unit/internal/database.cpp +++ b/unit/internal/database.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2021, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -56,6 +56,10 @@ TEST(DB, flagsToPlatform) { p.cpu_features |= HS_CPU_FEATURES_AVX512; #endif +#if defined(HAVE_AVX512VBMI) + p.cpu_features |= HS_CPU_FEATURES_AVX512VBMI; +#endif + platform_t pp = target_to_platform(target_t(p)); ASSERT_EQ(pp, hs_current_platform); } From 9b4ba34c68475b567b2857b5573de3b089d37f47 Mon Sep 17 00:00:00 2001 From: hongyang7 Date: Thu, 16 Dec 2021 19:02:17 +0800 Subject: [PATCH 13/32] Fix segfaults on allocation failure (#4) Throw std::bad_alloc instead of returning nullptr from ue2::AlignedAllocator. Allocators for STL containers are expected never to return with an invalid pointer, and instead must throw on failure. Violating this expectation can lead to invalid pointer dereferences. Co-authored-by: johanngan --- src/util/alloc.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/util/alloc.h b/src/util/alloc.h index de20c8d0..49b4a824 100644 --- a/src/util/alloc.h +++ b/src/util/alloc.h @@ -76,7 +76,11 @@ public: T *allocate(std::size_t size) const { size_t alloc_size = size * sizeof(T); - return static_cast(aligned_malloc_internal(alloc_size, N)); + T *ptr = static_cast(aligned_malloc_internal(alloc_size, N)); + if (!ptr) { + throw std::bad_alloc(); + } + return ptr; } void deallocate(T *x, std::size_t) const noexcept { From 1baf340d1c3567a5bc2cc560510fdf3793e3dc8c Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Wed, 29 Dec 2021 22:30:18 +0000 Subject: [PATCH 14/32] sanitiser bugfix --- tools/hscollider/args.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/hscollider/args.cpp b/tools/hscollider/args.cpp index 2eb510e0..8a52d99f 100644 --- a/tools/hscollider/args.cpp +++ b/tools/hscollider/args.cpp @@ -503,8 +503,8 @@ void processArgs(int argc, char *argv[], CorpusProperties &corpus_gen_prop, } else if (in_corpora) { corpora->push_back(optarg); in_corpora = 2; - break; } + break; case 0: break; default: From 47bc68339ffe091e3ad7aaea95e6a7149261e9e6 Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Thu, 28 Apr 2022 10:11:32 +0000 Subject: [PATCH 15/32] bugfix: fix overflow risk of strlen function --- src/compiler/compiler.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/compiler/compiler.cpp b/src/compiler/compiler.cpp index 6f993ffe..35f46b3f 100644 --- a/src/compiler/compiler.cpp +++ b/src/compiler/compiler.cpp @@ -323,7 +323,8 @@ void addExpression(NG &ng, unsigned index, const char *expression, } // Ensure that our pattern isn't too long (in characters). - if (strlen(expression) > cc.grey.limitPatternLength) { + size_t maxlen = cc.grey.limitPatternLength + 1; + if (strnlen(expression, maxlen) >= maxlen) { throw CompileError("Pattern length exceeds limit."); } From 811f909d41e13916bdd66bf417731b7cd9e44bc6 Mon Sep 17 00:00:00 2001 From: "Chang, Harry" Date: Thu, 12 May 2022 02:15:07 +0000 Subject: [PATCH 16/32] Corpus generator: fix random char value of UTF-8. fixes github issue #184 --- util/ng_corpus_generator.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/util/ng_corpus_generator.cpp b/util/ng_corpus_generator.cpp index e5e8e06c..f796cd45 100644 --- a/util/ng_corpus_generator.cpp +++ b/util/ng_corpus_generator.cpp @@ -477,14 +477,14 @@ void CorpusGeneratorUtf8::generateCorpus(vector &data) { * that we've been asked for. */ unichar CorpusGeneratorUtf8::getRandomChar() { u32 range = MAX_UNICODE + 1 - - (UNICODE_SURROGATE_MAX + UNICODE_SURROGATE_MIN + 1); + - (UNICODE_SURROGATE_MAX - UNICODE_SURROGATE_MIN + 1); range = min(cProps.alphabetSize, range); assert(range); unichar c = 'a' + cProps.rand(0, range - 1); if (c >= UNICODE_SURROGATE_MIN) { - c =+ UNICODE_SURROGATE_MAX + 1; + c += UNICODE_SURROGATE_MAX - UNICODE_SURROGATE_MIN + 1; } return c % (MAX_UNICODE + 1); From 7bf5a9f5cd81d2135887a84efa139e2868e6989c Mon Sep 17 00:00:00 2001 From: "Chang, Harry" Date: Thu, 12 May 2022 08:20:29 +0000 Subject: [PATCH 17/32] Corpus editor: fix random char value of UTF-8. --- util/ng_corpus_editor.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/util/ng_corpus_editor.cpp b/util/ng_corpus_editor.cpp index ac4f8b65..c1149216 100644 --- a/util/ng_corpus_editor.cpp +++ b/util/ng_corpus_editor.cpp @@ -268,12 +268,12 @@ void CorpusEditorUtf8::flip_case(vector &corpus) { unichar CorpusEditorUtf8::chooseCodePoint(void) { /* We need to ensure that we don't pick a surrogate cp */ const u32 range = - MAX_UNICODE + 1 - (UNICODE_SURROGATE_MAX + UNICODE_SURROGATE_MIN + 1); + MAX_UNICODE + 1 - (UNICODE_SURROGATE_MAX - UNICODE_SURROGATE_MIN + 1); unichar raw = props.rand(0, range - 1); if (raw < UNICODE_SURROGATE_MIN) { return raw; } else { - return raw + UNICODE_SURROGATE_MAX + 1; + return raw + UNICODE_SURROGATE_MAX - UNICODE_SURROGATE_MIN + 1; } } From f194a85d510b5fc5ccf49f37d27ff8d7361817d6 Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Tue, 31 May 2022 06:30:18 +0000 Subject: [PATCH 18/32] klocwork: fix risk issues --- src/hwlm/noodle_engine_sse.c | 2 +- src/nfa/goughcompile.cpp | 8 ++++++++ src/nfa/repeatcompile.cpp | 4 ++++ src/nfagraph/ng_som.cpp | 9 +++++++++ src/nfagraph/ng_violet.cpp | 17 +++++++++++++++++ src/parser/logical_combination.cpp | 3 ++- src/rose/rose_build_convert.cpp | 4 ++++ src/smallwrite/smallwrite_build.cpp | 2 +- src/util/graph_undirected.h | 4 ++-- src/util/ue2string.h | 2 +- tools/hsbench/data_corpus.cpp | 5 ++++- tools/hsbench/main.cpp | 5 +++++ tools/hscollider/DatabaseProxy.h | 2 +- tools/hscollider/NfaGeneratedCorpora.cpp | 2 +- tools/hscollider/Thread.cpp | 2 +- util/ng_corpus_properties.cpp | 2 +- 16 files changed, 62 insertions(+), 11 deletions(-) diff --git a/src/hwlm/noodle_engine_sse.c b/src/hwlm/noodle_engine_sse.c index 7cd53d7c..58ace3b6 100644 --- a/src/hwlm/noodle_engine_sse.c +++ b/src/hwlm/noodle_engine_sse.c @@ -106,7 +106,7 @@ hwlm_error_t scanDoubleShort(const struct noodTable *n, const u8 *buf, if (!l) { return HWLM_SUCCESS; } - assert(l <= 32); + assert(l <= 16); DEBUG_PRINTF("d %zu\n", d - buf); m128 v = zeroes128(); diff --git a/src/nfa/goughcompile.cpp b/src/nfa/goughcompile.cpp index d41c6f42..47594f2e 100644 --- a/src/nfa/goughcompile.cpp +++ b/src/nfa/goughcompile.cpp @@ -207,6 +207,10 @@ void makeCFG_top_edge(GoughGraph &cfg, const vector &vertices, assert(contains(src_slots, slot_id)); shared_ptr vmin = make_shared(); + if (!vmin) { + assert(0); + throw std::bad_alloc(); + } cfg[e].vars.push_back(vmin); final_var = vmin.get(); @@ -318,6 +322,10 @@ void makeCFG_edge(GoughGraph &cfg, const map &som_creators, DEBUG_PRINTF("bypassing min on join %u\n", slot_id); } else { shared_ptr vmin = make_shared(); + if (!vmin) { + assert(0); + throw std::bad_alloc(); + } cfg[e].vars.push_back(vmin); final_var = vmin.get(); diff --git a/src/nfa/repeatcompile.cpp b/src/nfa/repeatcompile.cpp index 934dd29e..d15ae89b 100644 --- a/src/nfa/repeatcompile.cpp +++ b/src/nfa/repeatcompile.cpp @@ -124,6 +124,10 @@ RepeatStateInfo::RepeatStateInfo(enum RepeatType type, const depth &repeatMin, const depth &repeatMax, u32 minPeriod) : stateSize(0), packedCtrlSize(0), horizon(0), patchCount(0), patchSize(0), encodingSize(0), patchesOffset(0) { + if (type == REPEAT_SPARSE_OPTIMAL_P && minPeriod == 0) { + assert(0); + throw std::domain_error("SPARSE_OPTIMAL_P must have non-zero minPeriod."); + } assert(repeatMin <= repeatMax); assert(repeatMax.is_reachable()); assert(minPeriod || type != REPEAT_SPARSE_OPTIMAL_P); diff --git a/src/nfagraph/ng_som.cpp b/src/nfagraph/ng_som.cpp index d23ac408..47cc82da 100644 --- a/src/nfagraph/ng_som.cpp +++ b/src/nfagraph/ng_som.cpp @@ -2446,6 +2446,10 @@ static bool doLitHaigSom(NG &ng, NGHolder &g, som_type som) { ue2_literal lit; shared_ptr rhs = make_shared(); + if (!rhs) { + assert(0); + throw std::bad_alloc(); + } if (!ng.cc.grey.allowLitHaig) { return false; } @@ -2510,6 +2514,11 @@ bool doHaigLitHaigSom(NG &ng, NGHolder &g, ue2_literal lit; shared_ptr rhs = make_shared(); shared_ptr lhs = make_shared(); + if (!rhs || !lhs) { + assert(0); + throw std::bad_alloc(); + } + if (!splitOffBestLiteral(g, regions, &lit, &*lhs, &*rhs, ng.cc)) { return false; } diff --git a/src/nfagraph/ng_violet.cpp b/src/nfagraph/ng_violet.cpp index 78d73082..ba6b3501 100644 --- a/src/nfagraph/ng_violet.cpp +++ b/src/nfagraph/ng_violet.cpp @@ -1036,6 +1036,11 @@ bool splitRoseEdge(const NGHolder &base_graph, RoseInGraph &vg, shared_ptr lhs = make_shared(); shared_ptr rhs = make_shared(); + if (!lhs || !rhs) { + assert(0); + throw std::bad_alloc(); + } + unordered_map lhs_map; unordered_map rhs_map; @@ -1229,6 +1234,10 @@ void splitEdgesByCut(NGHolder &h, RoseInGraph &vg, DEBUG_PRINTF("splitting on pivot %zu\n", h[pivot].index); unordered_map temp_map; shared_ptr new_lhs = make_shared(); + if (!new_lhs) { + assert(0); + throw std::bad_alloc(); + } splitLHS(h, pivot, new_lhs.get(), &temp_map); /* want to cut off paths to pivot from things other than the pivot - @@ -1310,6 +1319,10 @@ void splitEdgesByCut(NGHolder &h, RoseInGraph &vg, if (!contains(done_rhs, adj)) { unordered_map temp_map; shared_ptr new_rhs = make_shared(); + if (!new_rhs) { + assert(0); + throw std::bad_alloc(); + } splitRHS(h, adj, new_rhs.get(), &temp_map); remove_edge(new_rhs->start, new_rhs->accept, *new_rhs); remove_edge(new_rhs->start, new_rhs->acceptEod, *new_rhs); @@ -2281,6 +2294,10 @@ void splitEdgesForSuffix(const NGHolder &base_graph, RoseInGraph &vg, assert(!splitters.empty()); shared_ptr lhs = make_shared(); + if (!lhs) { + assert(0); + throw bad_alloc(); + } unordered_map v_map; cloneHolder(*lhs, base_graph, &v_map); lhs->kind = NFA_INFIX; diff --git a/src/parser/logical_combination.cpp b/src/parser/logical_combination.cpp index de017a11..96c3bd89 100644 --- a/src/parser/logical_combination.cpp +++ b/src/parser/logical_combination.cpp @@ -140,7 +140,8 @@ void ParsedLogical::validateSubIDs(const unsigned *ids, } hs_compile_error_t *compile_err = NULL; hs_expr_info_t *info = NULL; - hs_error_t err = hs_expression_info(expressions[i], flags[i], &info, + hs_error_t err = hs_expression_info(expressions[i], + flags ? flags[i] : 0, &info, &compile_err); if (err != HS_SUCCESS) { hs_free_compile_error(compile_err); diff --git a/src/rose/rose_build_convert.cpp b/src/rose/rose_build_convert.cpp index 33351099..d5b73cad 100644 --- a/src/rose/rose_build_convert.cpp +++ b/src/rose/rose_build_convert.cpp @@ -562,6 +562,10 @@ bool handleMixedPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v, DEBUG_PRINTF("woot?\n"); shared_ptr h_new = make_shared(); + if (!h_new) { + assert(0); + throw std::bad_alloc(); + } unordered_map rhs_map; vector exits_vec; insert(&exits_vec, exits_vec.end(), exits); diff --git a/src/smallwrite/smallwrite_build.cpp b/src/smallwrite/smallwrite_build.cpp index 4eb4801d..ea89669a 100644 --- a/src/smallwrite/smallwrite_build.cpp +++ b/src/smallwrite/smallwrite_build.cpp @@ -78,7 +78,7 @@ namespace ue2 { struct LitTrieVertexProps { LitTrieVertexProps() = default; explicit LitTrieVertexProps(u8 c_in) : c(c_in) {} - size_t index; // managed by ue2_graph + size_t index = 0; // managed by ue2_graph u8 c = 0; //!< character reached on this vertex flat_set reports; //!< managed reports fired on this vertex }; diff --git a/src/util/graph_undirected.h b/src/util/graph_undirected.h index 049964ab..50717284 100644 --- a/src/util/graph_undirected.h +++ b/src/util/graph_undirected.h @@ -70,8 +70,8 @@ class undirected_graph_edge_descriptor using base_vertex_type = typename base_graph_traits::vertex_descriptor; base_edge_type underlying_edge; - const base_graph_type *g; - bool reverse; // if true, reverse vertices in source() and target() + const base_graph_type *g = nullptr; + bool reverse = false; // if true, reverse vertices in source() and target() inline std::pair canonical_edge() const { diff --git a/src/util/ue2string.h b/src/util/ue2string.h index 0aa84689..f436936d 100644 --- a/src/util/ue2string.h +++ b/src/util/ue2string.h @@ -133,7 +133,7 @@ public: : lit(&lit_in), idx(idx_in) {} const ue2_literal *lit = nullptr; - size_t idx; + size_t idx = 0; }; using const_reverse_iterator = std::reverse_iterator; diff --git a/tools/hsbench/data_corpus.cpp b/tools/hsbench/data_corpus.cpp index 8e761ec3..b23da1fb 100644 --- a/tools/hsbench/data_corpus.cpp +++ b/tools/hsbench/data_corpus.cpp @@ -58,7 +58,10 @@ void readRow(sqlite3_stmt *statement, vector &blocks, } auto internal_stream_index = stream_indices[stream_id]; - assert(blob || bytes > 0); + if (!(blob && bytes > 0)) { + assert(0); + throw std::domain_error("Invalid blob or bytes from sqlite3."); + } blocks.emplace_back(id, stream_id, internal_stream_index, string(blob, blob + bytes)); } diff --git a/tools/hsbench/main.cpp b/tools/hsbench/main.cpp index 1c91813b..22becbd1 100644 --- a/tools/hsbench/main.cpp +++ b/tools/hsbench/main.cpp @@ -760,6 +760,11 @@ u64a byte_size(const vector &corpus_blocks) { total += block.payload.size(); } + if (total == 0) { + assert(0); + throw std::invalid_argument("Empty corpus."); + } + return total; } diff --git a/tools/hscollider/DatabaseProxy.h b/tools/hscollider/DatabaseProxy.h index 831ab148..f6957d29 100644 --- a/tools/hscollider/DatabaseProxy.h +++ b/tools/hscollider/DatabaseProxy.h @@ -61,7 +61,7 @@ public: std::lock_guard lock(mutex); if (failed) { // We have previously failed to compile this database. - return nullptr; + throw CompileFailed("Unable to compile db previously."); } if (db) { return db; diff --git a/tools/hscollider/NfaGeneratedCorpora.cpp b/tools/hscollider/NfaGeneratedCorpora.cpp index 66ae270b..4de320e1 100644 --- a/tools/hscollider/NfaGeneratedCorpora.cpp +++ b/tools/hscollider/NfaGeneratedCorpora.cpp @@ -101,7 +101,7 @@ void NfaGeneratedCorpora::generate(unsigned id, vector &data) { pl.logicalKeyRenumber(); const auto &m_lkey = pl.getLkeyMap(); assert(!m_lkey.empty()); - u32 a_subid; // arbitrary sub id + u32 a_subid = 0; // arbitrary sub id unordered_map> m_data; for (const auto &it : m_lkey) { a_subid = it.first; diff --git a/tools/hscollider/Thread.cpp b/tools/hscollider/Thread.cpp index 5fff8239..c63793d9 100644 --- a/tools/hscollider/Thread.cpp +++ b/tools/hscollider/Thread.cpp @@ -98,6 +98,6 @@ void *Thread::runThread(void *thr) { } -Thread::Thread(size_t num) : thread_id(num) {} +Thread::Thread(size_t num) : thread_id(num), thread() {} Thread::~Thread() {} diff --git a/util/ng_corpus_properties.cpp b/util/ng_corpus_properties.cpp index e784e058..511ad60a 100644 --- a/util/ng_corpus_properties.cpp +++ b/util/ng_corpus_properties.cpp @@ -42,7 +42,7 @@ CorpusProperties::CorpusProperties() : matchness(100), unmatchness(0), randomness(0), prefixRange(0, 0), suffixRange(0, 0), cycleMin(1), cycleMax(1), corpusLimit(DEFAULT_CORPUS_GENERATOR_LIMIT), editDistance(0), - alphabetSize(~0) { + alphabetSize(~0), rngSeed(0) { // empty } From 838a04e66ff2803886440b6635bafe9d7a06991e Mon Sep 17 00:00:00 2001 From: Liu Zixian Date: Mon, 27 Jun 2022 16:07:16 +0800 Subject: [PATCH 19/32] fix build with glibc-2.34 SIGTSKSZ is no long a constant after glibc 2.34 https://sourceware.org/pipermail/libc-alpha/2021-August/129718.html --- tools/hscollider/sig.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/hscollider/sig.cpp b/tools/hscollider/sig.cpp index 7d580e41..fba748a5 100644 --- a/tools/hscollider/sig.cpp +++ b/tools/hscollider/sig.cpp @@ -38,6 +38,7 @@ #if defined(HAVE_SIGACTION) || defined(_WIN32) #include +#define STACK_SIZE 8192 #endif #ifdef HAVE_BACKTRACE @@ -175,7 +176,7 @@ void installSignalHandler(void) { } #ifdef HAVE_SIGALTSTACK -static TLS_VARIABLE char alt_stack_loc[SIGSTKSZ]; +static TLS_VARIABLE char alt_stack_loc[STACK_SIZE]; #endif void setSignalStack(void) { @@ -187,7 +188,7 @@ void setSignalStack(void) { stack_t alt_stack; memset(&alt_stack, 0, sizeof(alt_stack)); alt_stack.ss_flags = 0; - alt_stack.ss_size = SIGSTKSZ; + alt_stack.ss_size = STACK_SIZE; alt_stack.ss_sp = alt_stack_loc; if (!sigaltstack(&alt_stack, nullptr)) { act.sa_flags |= SA_ONSTACK; From 676490427cc0156ca683de0cebe08a7c8e75fc28 Mon Sep 17 00:00:00 2001 From: Liu Zixian Date: Thu, 30 Jun 2022 19:27:27 +0800 Subject: [PATCH 20/32] Add comment for stack size Linux kernel default stack size should be enough for hscollider. https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/x86/include/uapi/asm/signal.h --- tools/hscollider/sig.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/hscollider/sig.cpp b/tools/hscollider/sig.cpp index fba748a5..5f4fb567 100644 --- a/tools/hscollider/sig.cpp +++ b/tools/hscollider/sig.cpp @@ -38,7 +38,7 @@ #if defined(HAVE_SIGACTION) || defined(_WIN32) #include -#define STACK_SIZE 8192 +#define STACK_SIZE 8192 // linux kernel default stack size for x86 #endif #ifdef HAVE_BACKTRACE From a3ba1ad369bef4eb57b2dd48b8d632eb495e2610 Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Tue, 5 Jul 2022 17:11:18 +0000 Subject: [PATCH 21/32] gcc-10(and above): fix compile issue caused by stringop-overflow --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8bc6077f..5ff2c9ad 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -427,9 +427,9 @@ CHECK_CXX_COMPILER_FLAG("-Wunused-local-typedefs" CXX_UNUSED_LOCAL_TYPEDEFS) CHECK_CXX_COMPILER_FLAG("-Wunused-variable" CXX_WUNUSED_VARIABLE) # gcc 10 complains about this -CHECK_C_COMPILER_FLAG("-Wstringop-overflow" CC_STRINGOP_OVERFLOW) -if(CC_STRINGOP_OVERFLOW) +if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 10) set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-stringop-overflow") + set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-stringop-overflow") endif() endif() From e1f4542e6597e4beff34ea263b8f9e79adcef69c Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Tue, 12 Jul 2022 08:42:05 +0000 Subject: [PATCH 22/32] stringop-overflow compatible fix --- CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5ff2c9ad..9ea56b23 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -427,7 +427,9 @@ CHECK_CXX_COMPILER_FLAG("-Wunused-local-typedefs" CXX_UNUSED_LOCAL_TYPEDEFS) CHECK_CXX_COMPILER_FLAG("-Wunused-variable" CXX_WUNUSED_VARIABLE) # gcc 10 complains about this -if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 10) +CHECK_C_COMPILER_FLAG("-Wstringop-overflow" CC_STRINGOP_OVERFLOW) +CHECK_CXX_COMPILER_FLAG("-Wstringop-overflow" CXX_STRINGOP_OVERFLOW) +if(CC_STRINGOP_OVERFLOW OR CXX_STRINGOP_OVERFLOW) set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-stringop-overflow") set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-stringop-overflow") endif() From 44b5955ecdc08c1030d226c34452d6e708d7de59 Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Thu, 28 Jul 2022 04:59:34 +0000 Subject: [PATCH 23/32] chimera: fix SKIP flag issue fix github issue #360 --- chimera/ch_runtime.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/chimera/ch_runtime.c b/chimera/ch_runtime.c index fdb5b992..1009036b 100644 --- a/chimera/ch_runtime.c +++ b/chimera/ch_runtime.c @@ -326,6 +326,10 @@ ch_error_t catchupPcre(struct HybridContext *hyctx, unsigned int id, } else if (cbrv == CH_CALLBACK_SKIP_PATTERN) { DEBUG_PRINTF("user callback told us to skip this pattern\n"); pd->scanStart = hyctx->length; + if (top_id == id) { + break; + } + continue; } if (top_id == id) { From c1539d32df8166767e17d697eb1c21513274b42c Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Thu, 28 Jul 2022 21:24:31 +0000 Subject: [PATCH 24/32] UTF-8 validation: fix one cotec check corner issue fix github issue #362 --- src/parser/utf8_validate.cpp | 2 +- unit/internal/utf8_validate.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/parser/utf8_validate.cpp b/src/parser/utf8_validate.cpp index 50aa06d8..a4b74796 100644 --- a/src/parser/utf8_validate.cpp +++ b/src/parser/utf8_validate.cpp @@ -72,7 +72,7 @@ bool isValidUtf8(const char *expression, const size_t len) { while (i < len) { DEBUG_PRINTF("byte %zu: 0x%02x\n", i, s[i]); // One octet. - if (s[i] < 0x7f) { + if (s[i] <= 0x7f) { DEBUG_PRINTF("one octet\n"); i++; continue; diff --git a/unit/internal/utf8_validate.cpp b/unit/internal/utf8_validate.cpp index 03357942..f69ee857 100644 --- a/unit/internal/utf8_validate.cpp +++ b/unit/internal/utf8_validate.cpp @@ -64,8 +64,8 @@ static ValidUtf8TestInfo valid_utf8_tests[] = { {"공동경비구역", true}, {"জলসাঘর", true}, - // Invalid one-byte caseS. - {"\x7f", false}, + // Valid one-byte caseS. + {"\x7f", true}, // \x7f is valid // These bytes should never appear in a UTF-8 stream. {"\xc0", false}, From c81293c696380f0a6fd053f9a2ab8a5c325e9ebe Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Mon, 1 Aug 2022 17:13:25 +0000 Subject: [PATCH 25/32] update year 2022 --- chimera/ch_runtime.c | 2 +- src/parser/utf8_validate.cpp | 2 +- unit/internal/utf8_validate.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/chimera/ch_runtime.c b/chimera/ch_runtime.c index 1009036b..af7d1f08 100644 --- a/chimera/ch_runtime.c +++ b/chimera/ch_runtime.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, Intel Corporation + * Copyright (c) 2018-2022, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: diff --git a/src/parser/utf8_validate.cpp b/src/parser/utf8_validate.cpp index a4b74796..54c9755e 100644 --- a/src/parser/utf8_validate.cpp +++ b/src/parser/utf8_validate.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2022, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: diff --git a/unit/internal/utf8_validate.cpp b/unit/internal/utf8_validate.cpp index f69ee857..03f52903 100644 --- a/unit/internal/utf8_validate.cpp +++ b/unit/internal/utf8_validate.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2022, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: From 9e254af71fcd044ff6d1bfce0915cd2b4e3ac12e Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Tue, 2 Aug 2022 19:25:27 +0000 Subject: [PATCH 26/32] Fix cmake CMP0115 warning for CMake 3.20 and above --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9ea56b23..b099c97d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -581,7 +581,7 @@ set (hs_exec_common_SRCS set (hs_exec_SRCS ${hs_HEADERS} - src/hs_version.h + src/hs_version.h.in src/ue2common.h src/allocator.h src/crc32.c @@ -738,7 +738,7 @@ SET (hs_compile_SRCS src/grey.h src/hs.cpp src/hs_internal.h - src/hs_version.h + src/hs_version.h.in src/scratch.h src/state.h src/ue2common.h From f47b69a01dca93452c1f6f43796f8b6b08a9088a Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Mon, 15 Aug 2022 03:00:22 +0000 Subject: [PATCH 27/32] Silence clang-14 warnings --- CMakeLists.txt | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index b099c97d..41babe59 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -397,6 +397,18 @@ if (CXX_UNUSED_CONST_VAR) set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-unused-const-variable") endif() +# clang-14 complains about unused-but-set variable. +CHECK_CXX_COMPILER_FLAG("-Wunused-but-set-variable" CXX_UNUSED_BUT_SET_VAR) +if (CXX_UNUSED_BUT_SET_VAR) + set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-unused-but-set-variable") +endif() + +# clang-14 complains about using bitwise operator instead of logical ones. +CHECK_CXX_COMPILER_FLAG("-Wbitwise-instead-of-logical" CXX_BITWISE_INSTEAD_OF_LOGICAL) +if (CXX_BITWISE_INSTEAD_OF_LOGICAL) + set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-bitwise-instead-of-logical") +endif() + # gcc 6 complains about type attributes that get ignored, like alignment CHECK_CXX_COMPILER_FLAG("-Wignored-attributes" CXX_IGNORED_ATTR) if (CXX_IGNORED_ATTR) From 5aa4bd565fb1eef6c0ef1b04f6c823e6503bd5b4 Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Wed, 19 Oct 2022 16:50:02 +0000 Subject: [PATCH 28/32] stream close: free stream to avoid memory leak fix github issue #303 --- src/runtime.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/runtime.c b/src/runtime.c index a3659348..ab46db1a 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -1013,6 +1013,7 @@ hs_error_t HS_CDECL hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch, report_eod_matches(id, scratch, onEvent, context); if (unlikely(internal_matching_error(scratch))) { unmarkScratchInUse(scratch); + hs_stream_free(id); return HS_UNKNOWN_ERROR; } unmarkScratchInUse(scratch); From 277fc400892ba57ad5d9eda9f5bcbc6cc6a1b8ca Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Thu, 20 Oct 2022 08:48:46 +0000 Subject: [PATCH 29/32] scratch: add quick validity check fix github issue #350 --- src/runtime.c | 39 +++++++++++++++++--------------------- src/scratch.c | 4 +++- src/scratch.h | 3 ++- src/state.h | 5 ++++- src/stream_compress_impl.h | 3 ++- 5 files changed, 28 insertions(+), 26 deletions(-) diff --git a/src/runtime.c b/src/runtime.c index ab46db1a..3c2d6533 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2019, Intel Corporation + * Copyright (c) 2015-2022, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -90,7 +90,7 @@ u8 *getHistory(char *state, const struct RoseEngine *t, u64a offset) { * callers. */ static really_inline -char validScratch(const struct RoseEngine *t, const struct hs_scratch *s) { +char validScratch(const struct hs_scratch *s, u32 crc) { if (!ISALIGNED_CL(s)) { DEBUG_PRINTF("bad alignment %p\n", s); return 0; @@ -101,18 +101,12 @@ char validScratch(const struct RoseEngine *t, const struct hs_scratch *s) { return 0; } - if (t->mode == HS_MODE_BLOCK && t->stateOffsets.end > s->bStateSize) { - DEBUG_PRINTF("bad state size\n"); + /* add quick rose sanity checks by db crc*/ + if (s->db_crc != crc) { + DEBUG_PRINTF("Improper scratch for current db\n"); return 0; } - if (t->queueCount > s->queueCount) { - DEBUG_PRINTF("bad queue count\n"); - return 0; - } - - /* TODO: add quick rose sanity checks */ - return 1; } @@ -335,7 +329,7 @@ hs_error_t HS_CDECL hs_scan(const hs_database_t *db, const char *data, return HS_DB_MODE_ERROR; } - if (unlikely(!validScratch(rose, scratch))) { + if (unlikely(!validScratch(scratch, db->crc32))) { return HS_INVALID; } @@ -509,7 +503,7 @@ void maintainHistoryBuffer(const struct RoseEngine *rose, char *state, static really_inline void init_stream(struct hs_stream *s, const struct RoseEngine *rose, - char init_history) { + char init_history, u32 crc) { char *state = getMultiState(s); if (init_history) { @@ -524,6 +518,7 @@ void init_stream(struct hs_stream *s, const struct RoseEngine *rose, s->rose = rose; s->offset = 0; + s->crc32 = crc; setStreamStatus(state, 0); roseInitState(rose, state); @@ -568,7 +563,7 @@ hs_error_t HS_CDECL hs_open_stream(const hs_database_t *db, return HS_NOMEM; } - init_stream(s, rose, 1); + init_stream(s, rose, 1, db->crc32); *stream = s; return HS_SUCCESS; @@ -756,7 +751,7 @@ hs_error_t HS_CDECL hs_reset_and_copy_stream(hs_stream_t *to_id, } if (onEvent) { - if (!scratch || !validScratch(to_id->rose, scratch)) { + if (!scratch || !validScratch(scratch, to_id->crc32)) { return HS_INVALID; } if (unlikely(markScratchInUse(scratch))) { @@ -982,7 +977,7 @@ hs_error_t HS_CDECL hs_scan_stream(hs_stream_t *id, const char *data, hs_scratch_t *scratch, match_event_handler onEvent, void *context) { if (unlikely(!id || !scratch || !data || - !validScratch(id->rose, scratch))) { + !validScratch(scratch, id->crc32))) { return HS_INVALID; } @@ -1004,7 +999,7 @@ hs_error_t HS_CDECL hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch, } if (onEvent) { - if (!scratch || !validScratch(id->rose, scratch)) { + if (!scratch || !validScratch(scratch, id->crc32)) { return HS_INVALID; } if (unlikely(markScratchInUse(scratch))) { @@ -1034,7 +1029,7 @@ hs_error_t HS_CDECL hs_reset_stream(hs_stream_t *id, UNUSED unsigned int flags, } if (onEvent) { - if (!scratch || !validScratch(id->rose, scratch)) { + if (!scratch || !validScratch(scratch, id->crc32)) { return HS_INVALID; } if (unlikely(markScratchInUse(scratch))) { @@ -1049,7 +1044,7 @@ hs_error_t HS_CDECL hs_reset_stream(hs_stream_t *id, UNUSED unsigned int flags, } // history already initialised - init_stream(id, id->rose, 0); + init_stream(id, id->rose, 0, id->crc32); return HS_SUCCESS; } @@ -1128,7 +1123,7 @@ hs_error_t HS_CDECL hs_scan_vector(const hs_database_t *db, return HS_DB_MODE_ERROR; } - if (unlikely(!validScratch(rose, scratch))) { + if (unlikely(!validScratch(scratch, db->crc32))) { return HS_INVALID; } @@ -1138,7 +1133,7 @@ hs_error_t HS_CDECL hs_scan_vector(const hs_database_t *db, hs_stream_t *id = (hs_stream_t *)(scratch->bstate); - init_stream(id, rose, 1); /* open stream */ + init_stream(id, rose, 1, db->crc32); /* open stream */ for (u32 i = 0; i < count; i++) { DEBUG_PRINTF("block %u/%u offset=%llu len=%u\n", i, count, id->offset, @@ -1253,7 +1248,7 @@ hs_error_t HS_CDECL hs_reset_and_expand_stream(hs_stream_t *to_stream, const struct RoseEngine *rose = to_stream->rose; if (onEvent) { - if (!scratch || !validScratch(to_stream->rose, scratch)) { + if (!scratch || !validScratch(scratch, to_stream->crc32)) { return HS_INVALID; } if (unlikely(markScratchInUse(scratch))) { diff --git a/src/scratch.c b/src/scratch.c index 25991e2b..5849380d 100644 --- a/src/scratch.c +++ b/src/scratch.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2019, Intel Corporation + * Copyright (c) 2015-2022, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -373,6 +373,7 @@ hs_error_t HS_CDECL hs_alloc_scratch(const hs_database_t *db, hs_scratch_free((*scratch)->scratch_alloc); } + proto->db_crc = db->crc32; hs_error_t alloc_ret = alloc_scratch(proto, scratch); hs_scratch_free(proto_tmp); /* kill off temp used for sizing */ if (alloc_ret != HS_SUCCESS) { @@ -380,6 +381,7 @@ hs_error_t HS_CDECL hs_alloc_scratch(const hs_database_t *db, return alloc_ret; } } else { + (*scratch)->db_crc = db->crc32; hs_scratch_free(proto_tmp); /* kill off temp used for sizing */ unmarkScratchInUse(*scratch); } diff --git a/src/scratch.h b/src/scratch.h index 1256f7ab..efaa6884 100644 --- a/src/scratch.h +++ b/src/scratch.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2019, Intel Corporation + * Copyright (c) 2015-2022, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -171,6 +171,7 @@ struct match_deduper { */ struct ALIGN_CL_DIRECTIVE hs_scratch { u32 magic; + u32 db_crc; /**< identity of a scratch space, for validity check */ u8 in_use; /**< non-zero when being used by an API call. */ u32 queueCount; u32 activeQueueArraySize; /**< size of active queue array fatbit in bytes */ diff --git a/src/state.h b/src/state.h index 9ade59db..567001ea 100644 --- a/src/state.h +++ b/src/state.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2022, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -57,6 +57,9 @@ struct hs_stream { /** \brief The current stream offset. */ u64a offset; + + /** \brief Identity of hs_stream, for scratch validity check. */ + u32 crc32; }; #define getMultiState(hs_s) ((char *)(hs_s) + sizeof(*(hs_s))) diff --git a/src/stream_compress_impl.h b/src/stream_compress_impl.h index d1ccf5e6..ceea14a6 100644 --- a/src/stream_compress_impl.h +++ b/src/stream_compress_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, Intel Corporation + * Copyright (c) 2017-2022, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -116,6 +116,7 @@ size_t JOIN(sc_, FN_SUFFIX)(const struct RoseEngine *rose, = ((STREAM_QUAL char *)stream) + sizeof(struct hs_stream); COPY_FIELD(stream->offset); + COPY_FIELD(stream->crc32); ASSIGN(stream->rose, rose); COPY(stream_body + ROSE_STATE_OFFSET_STATUS_FLAGS, 1); From 2fbef659053f51cbccf0cee975bce88089b643b7 Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Thu, 20 Oct 2022 08:47:03 +0000 Subject: [PATCH 30/32] fix nfa dump error --- src/nfa/nfa_dump_dispatch.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/nfa/nfa_dump_dispatch.cpp b/src/nfa/nfa_dump_dispatch.cpp index bc8c175d..b498fd95 100644 --- a/src/nfa/nfa_dump_dispatch.cpp +++ b/src/nfa/nfa_dump_dispatch.cpp @@ -75,6 +75,7 @@ namespace ue2 { DISPATCH_CASE(LBR_NFA_VERM, LbrVerm, dbnt_func); \ DISPATCH_CASE(LBR_NFA_NVERM, LbrNVerm, dbnt_func); \ DISPATCH_CASE(LBR_NFA_SHUF, LbrShuf, dbnt_func); \ + DISPATCH_CASE(LBR_NFA_VSHUF, LbrVShuf, dbnt_func); \ DISPATCH_CASE(LBR_NFA_TRUF, LbrTruf, dbnt_func); \ DISPATCH_CASE(CASTLE_NFA, Castle, dbnt_func); \ DISPATCH_CASE(SHENG_NFA, Sheng, dbnt_func); \ From a775768988fae7b9717422261f78bdb009d75aca Mon Sep 17 00:00:00 2001 From: "Chang, Harry" Date: Tue, 21 Feb 2023 22:52:57 +0000 Subject: [PATCH 31/32] changelog: updates for 5.4.1 release --- CHANGELOG.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8de3a8d6..481f8fcf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,23 @@ This is a list of notable changes to Hyperscan, in reverse chronological order. +## [5.4.1] 2023-02-20 +- The Intel Hyperscan team is pleased to provide a bug fix release to our open source library. + Intel also maintains an upgraded version available through your Intel sales representative. +- Bugfix for issue #184: fix random char value of UTF-8. +- Bugfix for issue #291: bypass logical combination flag in hs_expression_info(). +- Bugfix for issue #292: fix build error due to libc symbol parsing. +- Bugfix for issue #302/304: add empty string check for pure literal API. +- Bugfix for issue #303: fix unknown instruction error in pure literal API. +- Bugfix for issue #303: avoid memory leak in stream close stage. +- Bugfix for issue #305: fix assertion failure in DFA construction. +- Bugfix for issue #317: fix aligned allocator segment faults. +- Bugfix for issue #350: add quick validity check for scratch. +- Bugfix for issue #359: fix glibc-2.34 stack size issue. +- Bugfix for issue #360: fix SKIP flag issue in chimera. +- Bugfix for issue #362: fix one cotec check corner issue in UTF-8 validation. +- Fix other compile issues. + ## [5.4.0] 2020-12-31 - Improvement on literal matcher "Fat Teddy" performance, including support for Intel(R) AVX-512 Vector Byte Manipulation Instructions (Intel(R) From f8156398305f5ec8d2e69b288848c92391e76e37 Mon Sep 17 00:00:00 2001 From: "Chang, Harry" Date: Tue, 21 Feb 2023 22:57:45 +0000 Subject: [PATCH 32/32] Bump version number for release --- CMakeLists.txt | 2 +- src/hs.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 41babe59..bd6d2def 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ project (hyperscan C CXX) set (HS_MAJOR_VERSION 5) set (HS_MINOR_VERSION 4) -set (HS_PATCH_VERSION 0) +set (HS_PATCH_VERSION 1) set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION}) set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) diff --git a/src/hs.h b/src/hs.h index 2fe5d248..ca3d6dec 100644 --- a/src/hs.h +++ b/src/hs.h @@ -43,7 +43,7 @@ #define HS_MAJOR 5 #define HS_MINOR 4 -#define HS_PATCH 0 +#define HS_PATCH 1 #include "hs_compile.h" #include "hs_runtime.h"