diff --git a/CHANGELOG.md b/CHANGELOG.md index 73cc2f3d..2e28e3b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ This is a list of notable changes to Hyperscan, in reverse chronological order. +## [4.5.2] 2017-07-26 +- Bugfix for issue #57: Treat characters between `\Q.\E` as codepoints in + UTF8 mode. +- Bugfix for issue #60: Use a portable flag for mktemp for fat runtime builds. +- Bugfix for fat runtime builds on AVX-512 capable machines with Hyperscan's + AVX-512 support disabled. + ## [4.5.1] 2017-06-16 - Bugfix for issue #56: workaround for gcc-4.8 C++11 defect. - Bugfix for literal matching table generation, reversing a regression in diff --git a/CMakeLists.txt b/CMakeLists.txt index a02584de..2c2e298a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ project (hyperscan C CXX) set (HS_MAJOR_VERSION 4) set (HS_MINOR_VERSION 5) -set (HS_PATCH_VERSION 1) +set (HS_PATCH_VERSION 2) set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION}) set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) diff --git a/cmake/build_wrapper.sh b/cmake/build_wrapper.sh index 70392229..a6ee3b26 100755 --- a/cmake/build_wrapper.sh +++ b/cmake/build_wrapper.sh @@ -11,8 +11,8 @@ shift 2 # $@ contains the actual build command OUT=$(echo "$@" | sed 's/.* -o \(.*\.o\).*/\1/') trap cleanup INT QUIT EXIT -SYMSFILE=$(mktemp --tmpdir ${PREFIX}_rename.syms.XXXXX) -KEEPSYMS=$(mktemp --tmpdir keep.syms.XXXXX) +SYMSFILE=$(mktemp -p /tmp ${PREFIX}_rename.syms.XXXXX) +KEEPSYMS=$(mktemp -p /tmp keep.syms.XXXXX) # find the libc used by gcc LIBC_SO=$("$@" --print-file-name=libc.so.6) cp ${KEEPSYMS_IN} ${KEEPSYMS} diff --git a/cmake/config.h.in b/cmake/config.h.in index 9c250b4c..203f0afd 100644 --- a/cmake/config.h.in +++ b/cmake/config.h.in @@ -21,6 +21,9 @@ /* Define if building "fat" runtime. */ #cmakedefine FAT_RUNTIME +/* Define if building AVX-512 in the fat runtime. */ +#cmakedefine BUILD_AVX512 + /* Define to 1 if `backtrace' works. */ #cmakedefine HAVE_BACKTRACE diff --git a/src/parser/Parser.rl b/src/parser/Parser.rl index 52b3340c..ce9ca865 100644 --- a/src/parser/Parser.rl +++ b/src/parser/Parser.rl @@ -1155,6 +1155,40 @@ unichar readUtf8CodePoint4c(const char *s) { '\\E' => { fgoto main; }; + + #unicode chars + utf8_2c when is_utf8 => { + assert(mode.utf8); + /* leverage ComponentClass to generate the vertices */ + auto cc = getComponentClass(mode); + cc->add(readUtf8CodePoint2c(ts)); + cc->finalize(); + currentSeq->addComponent(move(cc)); + }; + + utf8_3c when is_utf8 => { + assert(mode.utf8); + /* leverage ComponentClass to generate the vertices */ + auto cc = getComponentClass(mode); + cc->add(readUtf8CodePoint3c(ts)); + cc->finalize(); + currentSeq->addComponent(move(cc)); + }; + + utf8_4c when is_utf8 => { + assert(mode.utf8); + /* leverage ComponentClass to generate the vertices */ + auto cc = getComponentClass(mode); + cc->add(readUtf8CodePoint4c(ts)); + cc->finalize(); + currentSeq->addComponent(move(cc)); + }; + + hi_byte when is_utf8 => { + assert(mode.utf8); + throwInvalidUtf8(); + }; + # Literal character any => { addLiteral(currentSeq, *ts, mode); @@ -1169,6 +1203,31 @@ unichar readUtf8CodePoint4c(const char *s) { '\\E' => { fret; }; + + #unicode chars + utf8_2c when is_utf8 => { + assert(mode.utf8); + currentCls->add(readUtf8CodePoint2c(ts)); + inCharClassEarly = false; + }; + + utf8_3c when is_utf8 => { + assert(mode.utf8); + currentCls->add(readUtf8CodePoint3c(ts)); + inCharClassEarly = false; + }; + + utf8_4c when is_utf8 => { + assert(mode.utf8); + currentCls->add(readUtf8CodePoint4c(ts)); + inCharClassEarly = false; + }; + + hi_byte when is_utf8 => { + assert(mode.utf8); + throwInvalidUtf8(); + }; + # Literal character any => { currentCls->add(*ts); diff --git a/src/util/cpuid_flags.c b/src/util/cpuid_flags.c index c0ab09af..3c62c07b 100644 --- a/src/util/cpuid_flags.c +++ b/src/util/cpuid_flags.c @@ -192,7 +192,8 @@ u64a cpuid_flags(void) { cap &= ~HS_CPU_FEATURES_AVX2; #endif -#if !defined(FAT_RUNTIME) && !defined(HAVE_AVX512) +#if (!defined(FAT_RUNTIME) && !defined(HAVE_AVX512)) || \ + (defined(FAT_RUNTIME) && !defined(BUILD_AVX512)) cap &= ~HS_CPU_FEATURES_AVX512; #endif diff --git a/unit/hyperscan/bad_patterns.txt b/unit/hyperscan/bad_patterns.txt index 3d6d9db9..3042dc82 100644 --- a/unit/hyperscan/bad_patterns.txt +++ b/unit/hyperscan/bad_patterns.txt @@ -142,3 +142,5 @@ 145:/abc/8{edit_distance=1} #UTF-8 is disallowed for approximate matching. 146:/(*UTF8)abc/{edit_distance=1} #UTF-8 is disallowed for approximate matching. 147:/\b\BMYBt/s{edit_distance=1} #Pattern can never match. +148:/\QÀ\Eaaaa/8 #Expression is not valid UTF-8. +149:/[\QÀ\Eaaaa]/8 #Expression is not valid UTF-8.