From cafd5248b11cbd98035286d64475b2c371aa4c87 Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Thu, 4 Mar 2021 16:50:14 +0000 Subject: [PATCH 01/12] literal API: add instruction support fixes github issue #303 --- src/rose/program_runtime.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/rose/program_runtime.c b/src/rose/program_runtime.c index 7d4da45a..2bba5bbf 100644 --- a/src/rose/program_runtime.c +++ b/src/rose/program_runtime.c @@ -3092,6 +3092,7 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t, const char in_catchup = prog_flags & ROSE_PROG_FLAG_IN_CATCHUP; const char from_mpv = prog_flags & ROSE_PROG_FLAG_FROM_MPV; + const char skip_mpv_catchup = prog_flags & ROSE_PROG_FLAG_SKIP_MPV_CATCHUP; const char *pc_base = getByOffset(t, programOffset); const char *pc = pc_base; @@ -3188,6 +3189,17 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t, } L_PROGRAM_NEXT_INSTRUCTION + L_PROGRAM_CASE(CATCH_UP_MPV) { + if (from_mpv || skip_mpv_catchup) { + DEBUG_PRINTF("skipping mpv catchup\n"); + } else if (roseCatchUpMPV(t, + end - scratch->core_info.buf_offset, + scratch) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + L_PROGRAM_NEXT_INSTRUCTION + L_PROGRAM_CASE(SOM_FROM_REPORT) { som = handleSomExternal(scratch, &ri->som, end); DEBUG_PRINTF("som from report %u is %llu\n", ri->som.onmatch, @@ -3195,6 +3207,15 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t, } L_PROGRAM_NEXT_INSTRUCTION + L_PROGRAM_CASE(TRIGGER_SUFFIX) { + if (roseTriggerSuffix(t, scratch, ri->queue, ri->event, som, + end) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + work_done = 1; + } + L_PROGRAM_NEXT_INSTRUCTION + L_PROGRAM_CASE(DEDUPE) { updateSeqPoint(tctxt, end, from_mpv); const char do_som = t->hasSom; // TODO: constant propagate From a119693a66504e671b73b6e96ef2bd9760647536 Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Thu, 4 Mar 2021 17:00:34 +0000 Subject: [PATCH 02/12] mcclellan: improve wide-state checking in Sherman optimization fixes github issue #305 --- src/nfa/mcclellancompile.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index b5c3a8ac..aa04e470 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -1081,7 +1081,9 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit, // Use the daddy already set for this state so long as it isn't already // a Sherman state. dstate_id_t daddy = currState.daddy; - if (!info.is_sherman(daddy) && !info.is_widestate(daddy)) { + if (info.is_widestate(daddy)) { + return; + } else if (!info.is_sherman(daddy)) { hinted.insert(currState.daddy); } else { // Fall back to granddaddy, which has already been processed (due From decabdfede6a3d3d846964795b8a45fbe63025ff Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Thu, 11 Mar 2021 15:20:55 +0000 Subject: [PATCH 03/12] update year for bugfix #302-#305 --- src/compiler/compiler.cpp | 2 +- src/nfa/mcclellancompile.cpp | 2 +- src/rose/program_runtime.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/compiler/compiler.cpp b/src/compiler/compiler.cpp index 5751bd64..ae5927bc 100644 --- a/src/compiler/compiler.cpp +++ b/src/compiler/compiler.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2020, Intel Corporation + * Copyright (c) 2015-2021, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index aa04e470..055920b2 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2020, Intel Corporation + * Copyright (c) 2015-2021, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: diff --git a/src/rose/program_runtime.c b/src/rose/program_runtime.c index 2bba5bbf..f607e8f2 100644 --- a/src/rose/program_runtime.c +++ b/src/rose/program_runtime.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2020, Intel Corporation + * Copyright (c) 2015-2021, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: From c1659b854437c4fa92cc2693b6c854cc2c4a4277 Mon Sep 17 00:00:00 2001 From: "Chang, Harry" Date: Wed, 10 Mar 2021 07:20:01 +0000 Subject: [PATCH 04/12] Logical Combination: bypass combination flag in hs_expression_info. Fixes github issue #291 --- src/hs.cpp | 8 +++++++- src/hs_compile.h | 12 +++--------- src/hs_internal.h | 6 ++++-- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/hs.cpp b/src/hs.cpp index 303e7838..73cc032f 100644 --- a/src/hs.cpp +++ b/src/hs.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2020, Intel Corporation + * Copyright (c) 2015-2021, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -517,6 +517,12 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags, return HS_COMPILER_ERROR; } + if (flags & HS_FLAG_COMBINATION) { + *error = generateCompileError("Invalid parameter: unsupported " + "logical combination expression", -1); + return HS_COMPILER_ERROR; + } + *info = nullptr; *error = nullptr; diff --git a/src/hs_compile.h b/src/hs_compile.h index b318c29d..5aa24188 100644 --- a/src/hs_compile.h +++ b/src/hs_compile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2020, Intel Corporation + * Copyright (c) 2015-2021, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -748,10 +748,7 @@ hs_error_t HS_CDECL hs_free_compile_error(hs_compile_error_t *error); * - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode. * - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset * when a match is found. - * - HS_FLAG_COMBINATION - Parse the expression in logical combination - * syntax. - * - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for - * the sub-expressions in logical combinations. + * - HS_FLAG_QUIET - This flag will be ignored. * * @param info * On success, a pointer to the pattern information will be returned in @@ -814,10 +811,7 @@ hs_error_t HS_CDECL hs_expression_info(const char *expression, * - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode. * - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset * when a match is found. - * - HS_FLAG_COMBINATION - Parse the expression in logical combination - * syntax. - * - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for - * the sub-expressions in logical combinations. + * - HS_FLAG_QUIET - This flag will be ignored. * * @param ext * A pointer to a filled @ref hs_expr_ext_t structure that defines diff --git a/src/hs_internal.h b/src/hs_internal.h index adf07b22..4eb5e157 100644 --- a/src/hs_internal.h +++ b/src/hs_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, Intel Corporation + * Copyright (c) 2019-2021, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -80,7 +80,9 @@ extern "C" | HS_FLAG_PREFILTER \ | HS_FLAG_SINGLEMATCH \ | HS_FLAG_ALLOWEMPTY \ - | HS_FLAG_SOM_LEFTMOST) + | HS_FLAG_SOM_LEFTMOST \ + | HS_FLAG_COMBINATION \ + | HS_FLAG_QUIET) #ifdef __cplusplus } /* extern "C" */ From 2731a3384bbd7ffc4933f6d43478ef2762e5b4d8 Mon Sep 17 00:00:00 2001 From: hongyang7 Date: Thu, 16 Dec 2021 19:02:17 +0800 Subject: [PATCH 05/12] Fix segfaults on allocation failure (#4) Throw std::bad_alloc instead of returning nullptr from ue2::AlignedAllocator. Allocators for STL containers are expected never to return with an invalid pointer, and instead must throw on failure. Violating this expectation can lead to invalid pointer dereferences. Co-authored-by: johanngan fixes github issue #317 (PR #320) --- src/util/alloc.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/util/alloc.h b/src/util/alloc.h index de20c8d0..49b4a824 100644 --- a/src/util/alloc.h +++ b/src/util/alloc.h @@ -76,7 +76,11 @@ public: T *allocate(std::size_t size) const { size_t alloc_size = size * sizeof(T); - return static_cast(aligned_malloc_internal(alloc_size, N)); + T *ptr = static_cast(aligned_malloc_internal(alloc_size, N)); + if (!ptr) { + throw std::bad_alloc(); + } + return ptr; } void deallocate(T *x, std::size_t) const noexcept { From 4d4940dfbe523589e4ea90033bda4c574c73d627 Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Thu, 28 Apr 2022 10:11:32 +0000 Subject: [PATCH 06/12] bugfix: fix overflow risk of strlen function --- src/compiler/compiler.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/compiler/compiler.cpp b/src/compiler/compiler.cpp index ae5927bc..32836834 100644 --- a/src/compiler/compiler.cpp +++ b/src/compiler/compiler.cpp @@ -323,7 +323,8 @@ void addExpression(NG &ng, unsigned index, const char *expression, } // Ensure that our pattern isn't too long (in characters). - if (strlen(expression) > cc.grey.limitPatternLength) { + size_t maxlen = cc.grey.limitPatternLength + 1; + if (strnlen(expression, maxlen) >= maxlen) { throw CompileError("Pattern length exceeds limit."); } From a9ca0e4de36ff32fb4a28f1bdc74ef08dc3f1ca4 Mon Sep 17 00:00:00 2001 From: "Chang, Harry" Date: Thu, 12 May 2022 02:15:07 +0000 Subject: [PATCH 07/12] Corpus generator: fix random char value of UTF-8. fixes github issue #184 --- util/ng_corpus_generator.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/util/ng_corpus_generator.cpp b/util/ng_corpus_generator.cpp index 145a0ab8..6c3f613d 100644 --- a/util/ng_corpus_generator.cpp +++ b/util/ng_corpus_generator.cpp @@ -476,14 +476,14 @@ void CorpusGeneratorUtf8::generateCorpus(vector &data) { * that we've been asked for. */ unichar CorpusGeneratorUtf8::getRandomChar() { u32 range = MAX_UNICODE + 1 - - (UNICODE_SURROGATE_MAX + UNICODE_SURROGATE_MIN + 1); + - (UNICODE_SURROGATE_MAX - UNICODE_SURROGATE_MIN + 1); range = min(cProps.alphabetSize, range); assert(range); unichar c = 'a' + cProps.rand(0, range - 1); if (c >= UNICODE_SURROGATE_MIN) { - c =+ UNICODE_SURROGATE_MAX + 1; + c += UNICODE_SURROGATE_MAX - UNICODE_SURROGATE_MIN + 1; } return c % (MAX_UNICODE + 1); From 31afacc7be282ac591e71564bfee794303a244fa Mon Sep 17 00:00:00 2001 From: "Chang, Harry" Date: Thu, 12 May 2022 08:20:29 +0000 Subject: [PATCH 08/12] Corpus editor: fix random char value of UTF-8. --- util/ng_corpus_editor.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/util/ng_corpus_editor.cpp b/util/ng_corpus_editor.cpp index ac4f8b65..c1149216 100644 --- a/util/ng_corpus_editor.cpp +++ b/util/ng_corpus_editor.cpp @@ -268,12 +268,12 @@ void CorpusEditorUtf8::flip_case(vector &corpus) { unichar CorpusEditorUtf8::chooseCodePoint(void) { /* We need to ensure that we don't pick a surrogate cp */ const u32 range = - MAX_UNICODE + 1 - (UNICODE_SURROGATE_MAX + UNICODE_SURROGATE_MIN + 1); + MAX_UNICODE + 1 - (UNICODE_SURROGATE_MAX - UNICODE_SURROGATE_MIN + 1); unichar raw = props.rand(0, range - 1); if (raw < UNICODE_SURROGATE_MIN) { return raw; } else { - return raw + UNICODE_SURROGATE_MAX + 1; + return raw + UNICODE_SURROGATE_MAX - UNICODE_SURROGATE_MIN + 1; } } From 4f27a70dd7c4c48d259a77bf22bfd7dfa51b1d7e Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Thu, 28 Jul 2022 04:59:34 +0000 Subject: [PATCH 09/12] chimera: fix SKIP flag issue fix github issue #360 --- chimera/ch_runtime.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/chimera/ch_runtime.c b/chimera/ch_runtime.c index fdb5b992..1009036b 100644 --- a/chimera/ch_runtime.c +++ b/chimera/ch_runtime.c @@ -326,6 +326,10 @@ ch_error_t catchupPcre(struct HybridContext *hyctx, unsigned int id, } else if (cbrv == CH_CALLBACK_SKIP_PATTERN) { DEBUG_PRINTF("user callback told us to skip this pattern\n"); pd->scanStart = hyctx->length; + if (top_id == id) { + break; + } + continue; } if (top_id == id) { From 70b2a28386f6a4be7903d9d61836c5918d219652 Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Thu, 4 Mar 2021 16:13:46 +0000 Subject: [PATCH 10/12] literal API: add empty string check. fixes github issue #302, #304 --- src/compiler/compiler.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/compiler/compiler.cpp b/src/compiler/compiler.cpp index 32836834..35f46b3f 100644 --- a/src/compiler/compiler.cpp +++ b/src/compiler/compiler.cpp @@ -417,6 +417,10 @@ void addLitExpression(NG &ng, unsigned index, const char *expression, "HS_FLAG_SOM_LEFTMOST are supported in literal API."); } + if (!strcmp(expression, "")) { + throw CompileError("Pure literal API doesn't support empty string."); + } + // This expression must be a pure literal, we can build ue2_literal // directly based on expression text. ParsedLitExpression ple(index, expression, expLength, flags, id); From c597f69c5910db5042cf1942de64416ed41cd5f4 Mon Sep 17 00:00:00 2001 From: Liu Zixian Date: Mon, 27 Jun 2022 16:07:16 +0800 Subject: [PATCH 11/12] fix build with glibc-2.34 SIGTSKSZ is no long a constant after glibc 2.34 https://sourceware.org/pipermail/libc-alpha/2021-August/129718.html --- tools/hscollider/sig.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/hscollider/sig.cpp b/tools/hscollider/sig.cpp index bb00185d..d2e221b5 100644 --- a/tools/hscollider/sig.cpp +++ b/tools/hscollider/sig.cpp @@ -38,6 +38,7 @@ #if defined(HAVE_SIGACTION) #include +#define STACK_SIZE 8192 #endif #ifdef HAVE_BACKTRACE @@ -166,7 +167,7 @@ void installSignalHandler(void) { } #ifdef HAVE_SIGALTSTACK -static TLS_VARIABLE char alt_stack_loc[SIGSTKSZ]; +static TLS_VARIABLE char alt_stack_loc[STACK_SIZE]; #endif void setSignalStack(void) { @@ -178,7 +179,7 @@ void setSignalStack(void) { stack_t alt_stack; memset(&alt_stack, 0, sizeof(alt_stack)); alt_stack.ss_flags = 0; - alt_stack.ss_size = SIGSTKSZ; + alt_stack.ss_size = STACK_SIZE; alt_stack.ss_sp = alt_stack_loc; if (!sigaltstack(&alt_stack, nullptr)) { act.sa_flags |= SA_ONSTACK; From 74ab41897cc1d4f03555e5adde679fe21c60ee0a Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Tue, 30 Aug 2022 20:40:23 +0300 Subject: [PATCH 12/12] Add missing header --- unit/internal/multi_bit_compress.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/unit/internal/multi_bit_compress.cpp b/unit/internal/multi_bit_compress.cpp index 2d59ea14..40078f81 100644 --- a/unit/internal/multi_bit_compress.cpp +++ b/unit/internal/multi_bit_compress.cpp @@ -28,6 +28,8 @@ #include "config.h" +#include + #include "gtest/gtest.h" #include "ue2common.h" #include "util/compile_error.h"