Merge pull request #118 from VectorCamp/bugfix/hyperscan-backport-202208

Bugfix/hyperscan backport 202208
This commit is contained in:
Konstantinos Margaritis 2022-09-03 09:32:43 +03:00 committed by GitHub
commit c043730675
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 65 additions and 24 deletions

View File

@ -326,6 +326,10 @@ ch_error_t catchupPcre(struct HybridContext *hyctx, unsigned int id,
} else if (cbrv == CH_CALLBACK_SKIP_PATTERN) {
DEBUG_PRINTF("user callback told us to skip this pattern\n");
pd->scanStart = hyctx->length;
if (top_id == id) {
break;
}
continue;
}
if (top_id == id) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2020, Intel Corporation
* Copyright (c) 2015-2021, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -323,7 +323,8 @@ void addExpression(NG &ng, unsigned index, const char *expression,
}
// Ensure that our pattern isn't too long (in characters).
if (strlen(expression) > cc.grey.limitPatternLength) {
size_t maxlen = cc.grey.limitPatternLength + 1;
if (strnlen(expression, maxlen) >= maxlen) {
throw CompileError("Pattern length exceeds limit.");
}
@ -416,6 +417,10 @@ void addLitExpression(NG &ng, unsigned index, const char *expression,
"HS_FLAG_SOM_LEFTMOST are supported in literal API.");
}
if (!strcmp(expression, "")) {
throw CompileError("Pure literal API doesn't support empty string.");
}
// This expression must be a pure literal, we can build ue2_literal
// directly based on expression text.
ParsedLitExpression ple(index, expression, expLength, flags, id);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2020, Intel Corporation
* Copyright (c) 2015-2021, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -517,6 +517,12 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags,
return HS_COMPILER_ERROR;
}
if (flags & HS_FLAG_COMBINATION) {
*error = generateCompileError("Invalid parameter: unsupported "
"logical combination expression", -1);
return HS_COMPILER_ERROR;
}
*info = nullptr;
*error = nullptr;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2020, Intel Corporation
* Copyright (c) 2015-2021, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -748,10 +748,7 @@ hs_error_t HS_CDECL hs_free_compile_error(hs_compile_error_t *error);
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
* when a match is found.
* - HS_FLAG_COMBINATION - Parse the expression in logical combination
* syntax.
* - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for
* the sub-expressions in logical combinations.
* - HS_FLAG_QUIET - This flag will be ignored.
*
* @param info
* On success, a pointer to the pattern information will be returned in
@ -814,10 +811,7 @@ hs_error_t HS_CDECL hs_expression_info(const char *expression,
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
* when a match is found.
* - HS_FLAG_COMBINATION - Parse the expression in logical combination
* syntax.
* - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for
* the sub-expressions in logical combinations.
* - HS_FLAG_QUIET - This flag will be ignored.
*
* @param ext
* A pointer to a filled @ref hs_expr_ext_t structure that defines

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, Intel Corporation
* Copyright (c) 2019-2021, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -80,7 +80,9 @@ extern "C"
| HS_FLAG_PREFILTER \
| HS_FLAG_SINGLEMATCH \
| HS_FLAG_ALLOWEMPTY \
| HS_FLAG_SOM_LEFTMOST)
| HS_FLAG_SOM_LEFTMOST \
| HS_FLAG_COMBINATION \
| HS_FLAG_QUIET)
#ifdef __cplusplus
} /* extern "C" */

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2020, Intel Corporation
* Copyright (c) 2015-2021, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -1081,7 +1081,9 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit,
// Use the daddy already set for this state so long as it isn't already
// a Sherman state.
dstate_id_t daddy = currState.daddy;
if (!info.is_sherman(daddy) && !info.is_widestate(daddy)) {
if (info.is_widestate(daddy)) {
return;
} else if (!info.is_sherman(daddy)) {
hinted.insert(currState.daddy);
} else {
// Fall back to granddaddy, which has already been processed (due

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2020, Intel Corporation
* Copyright (c) 2015-2021, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -3092,6 +3092,7 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t,
const char in_catchup = prog_flags & ROSE_PROG_FLAG_IN_CATCHUP;
const char from_mpv = prog_flags & ROSE_PROG_FLAG_FROM_MPV;
const char skip_mpv_catchup = prog_flags & ROSE_PROG_FLAG_SKIP_MPV_CATCHUP;
const char *pc_base = getByOffset(t, programOffset);
const char *pc = pc_base;
@ -3188,6 +3189,17 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t,
}
L_PROGRAM_NEXT_INSTRUCTION
L_PROGRAM_CASE(CATCH_UP_MPV) {
if (from_mpv || skip_mpv_catchup) {
DEBUG_PRINTF("skipping mpv catchup\n");
} else if (roseCatchUpMPV(t,
end - scratch->core_info.buf_offset,
scratch) == HWLM_TERMINATE_MATCHING) {
return HWLM_TERMINATE_MATCHING;
}
}
L_PROGRAM_NEXT_INSTRUCTION
L_PROGRAM_CASE(SOM_FROM_REPORT) {
som = handleSomExternal(scratch, &ri->som, end);
DEBUG_PRINTF("som from report %u is %llu\n", ri->som.onmatch,
@ -3195,6 +3207,15 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t,
}
L_PROGRAM_NEXT_INSTRUCTION
L_PROGRAM_CASE(TRIGGER_SUFFIX) {
if (roseTriggerSuffix(t, scratch, ri->queue, ri->event, som,
end) == HWLM_TERMINATE_MATCHING) {
return HWLM_TERMINATE_MATCHING;
}
work_done = 1;
}
L_PROGRAM_NEXT_INSTRUCTION
L_PROGRAM_CASE(DEDUPE) {
updateSeqPoint(tctxt, end, from_mpv);
const char do_som = t->hasSom; // TODO: constant propagate

View File

@ -76,7 +76,11 @@ public:
T *allocate(std::size_t size) const {
size_t alloc_size = size * sizeof(T);
return static_cast<T *>(aligned_malloc_internal(alloc_size, N));
T *ptr = static_cast<T *>(aligned_malloc_internal(alloc_size, N));
if (!ptr) {
throw std::bad_alloc();
}
return ptr;
}
void deallocate(T *x, std::size_t) const noexcept {

View File

@ -38,6 +38,7 @@
#if defined(HAVE_SIGACTION)
#include <signal.h>
#define STACK_SIZE 8192
#endif
#ifdef HAVE_BACKTRACE
@ -166,7 +167,7 @@ void installSignalHandler(void) {
}
#ifdef HAVE_SIGALTSTACK
static TLS_VARIABLE char alt_stack_loc[SIGSTKSZ];
static TLS_VARIABLE char alt_stack_loc[STACK_SIZE];
#endif
void setSignalStack(void) {
@ -178,7 +179,7 @@ void setSignalStack(void) {
stack_t alt_stack;
memset(&alt_stack, 0, sizeof(alt_stack));
alt_stack.ss_flags = 0;
alt_stack.ss_size = SIGSTKSZ;
alt_stack.ss_size = STACK_SIZE;
alt_stack.ss_sp = alt_stack_loc;
if (!sigaltstack(&alt_stack, nullptr)) {
act.sa_flags |= SA_ONSTACK;

View File

@ -28,6 +28,8 @@
#include "config.h"
#include <memory>
#include "gtest/gtest.h"
#include "ue2common.h"
#include "util/compile_error.h"

View File

@ -268,12 +268,12 @@ void CorpusEditorUtf8::flip_case(vector<unichar> &corpus) {
unichar CorpusEditorUtf8::chooseCodePoint(void) {
/* We need to ensure that we don't pick a surrogate cp */
const u32 range =
MAX_UNICODE + 1 - (UNICODE_SURROGATE_MAX + UNICODE_SURROGATE_MIN + 1);
MAX_UNICODE + 1 - (UNICODE_SURROGATE_MAX - UNICODE_SURROGATE_MIN + 1);
unichar raw = props.rand(0, range - 1);
if (raw < UNICODE_SURROGATE_MIN) {
return raw;
} else {
return raw + UNICODE_SURROGATE_MAX + 1;
return raw + UNICODE_SURROGATE_MAX - UNICODE_SURROGATE_MIN + 1;
}
}

View File

@ -476,14 +476,14 @@ void CorpusGeneratorUtf8::generateCorpus(vector<string> &data) {
* that we've been asked for. */
unichar CorpusGeneratorUtf8::getRandomChar() {
u32 range = MAX_UNICODE + 1
- (UNICODE_SURROGATE_MAX + UNICODE_SURROGATE_MIN + 1);
- (UNICODE_SURROGATE_MAX - UNICODE_SURROGATE_MIN + 1);
range = min(cProps.alphabetSize, range);
assert(range);
unichar c = 'a' + cProps.rand(0, range - 1);
if (c >= UNICODE_SURROGATE_MIN) {
c =+ UNICODE_SURROGATE_MAX + 1;
c += UNICODE_SURROGATE_MAX - UNICODE_SURROGATE_MIN + 1;
}
return c % (MAX_UNICODE + 1);