Merge branch develop into master

This commit is contained in:
Matthew Barr 2017-07-26 16:11:00 +10:00
commit 7097ff3e63
7 changed files with 76 additions and 4 deletions

View File

@ -2,6 +2,13 @@
This is a list of notable changes to Hyperscan, in reverse chronological order.
## [4.5.2] 2017-07-26
- Bugfix for issue #57: Treat characters between `\Q.\E` as codepoints in
UTF8 mode.
- Bugfix for issue #60: Use a portable flag for mktemp for fat runtime builds.
- Bugfix for fat runtime builds on AVX-512 capable machines with Hyperscan's
AVX-512 support disabled.
## [4.5.1] 2017-06-16
- Bugfix for issue #56: workaround for gcc-4.8 C++11 defect.
- Bugfix for literal matching table generation, reversing a regression in

View File

@ -3,7 +3,7 @@ project (hyperscan C CXX)
set (HS_MAJOR_VERSION 4)
set (HS_MINOR_VERSION 5)
set (HS_PATCH_VERSION 1)
set (HS_PATCH_VERSION 2)
set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION})
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)

View File

@ -11,8 +11,8 @@ shift 2
# $@ contains the actual build command
OUT=$(echo "$@" | sed 's/.* -o \(.*\.o\).*/\1/')
trap cleanup INT QUIT EXIT
SYMSFILE=$(mktemp --tmpdir ${PREFIX}_rename.syms.XXXXX)
KEEPSYMS=$(mktemp --tmpdir keep.syms.XXXXX)
SYMSFILE=$(mktemp -p /tmp ${PREFIX}_rename.syms.XXXXX)
KEEPSYMS=$(mktemp -p /tmp keep.syms.XXXXX)
# find the libc used by gcc
LIBC_SO=$("$@" --print-file-name=libc.so.6)
cp ${KEEPSYMS_IN} ${KEEPSYMS}

View File

@ -21,6 +21,9 @@
/* Define if building "fat" runtime. */
#cmakedefine FAT_RUNTIME
/* Define if building AVX-512 in the fat runtime. */
#cmakedefine BUILD_AVX512
/* Define to 1 if `backtrace' works. */
#cmakedefine HAVE_BACKTRACE

View File

@ -1155,6 +1155,40 @@ unichar readUtf8CodePoint4c(const char *s) {
'\\E' => {
fgoto main;
};
#unicode chars
utf8_2c when is_utf8 => {
assert(mode.utf8);
/* leverage ComponentClass to generate the vertices */
auto cc = getComponentClass(mode);
cc->add(readUtf8CodePoint2c(ts));
cc->finalize();
currentSeq->addComponent(move(cc));
};
utf8_3c when is_utf8 => {
assert(mode.utf8);
/* leverage ComponentClass to generate the vertices */
auto cc = getComponentClass(mode);
cc->add(readUtf8CodePoint3c(ts));
cc->finalize();
currentSeq->addComponent(move(cc));
};
utf8_4c when is_utf8 => {
assert(mode.utf8);
/* leverage ComponentClass to generate the vertices */
auto cc = getComponentClass(mode);
cc->add(readUtf8CodePoint4c(ts));
cc->finalize();
currentSeq->addComponent(move(cc));
};
hi_byte when is_utf8 => {
assert(mode.utf8);
throwInvalidUtf8();
};
# Literal character
any => {
addLiteral(currentSeq, *ts, mode);
@ -1169,6 +1203,31 @@ unichar readUtf8CodePoint4c(const char *s) {
'\\E' => {
fret;
};
#unicode chars
utf8_2c when is_utf8 => {
assert(mode.utf8);
currentCls->add(readUtf8CodePoint2c(ts));
inCharClassEarly = false;
};
utf8_3c when is_utf8 => {
assert(mode.utf8);
currentCls->add(readUtf8CodePoint3c(ts));
inCharClassEarly = false;
};
utf8_4c when is_utf8 => {
assert(mode.utf8);
currentCls->add(readUtf8CodePoint4c(ts));
inCharClassEarly = false;
};
hi_byte when is_utf8 => {
assert(mode.utf8);
throwInvalidUtf8();
};
# Literal character
any => {
currentCls->add(*ts);

View File

@ -192,7 +192,8 @@ u64a cpuid_flags(void) {
cap &= ~HS_CPU_FEATURES_AVX2;
#endif
#if !defined(FAT_RUNTIME) && !defined(HAVE_AVX512)
#if (!defined(FAT_RUNTIME) && !defined(HAVE_AVX512)) || \
(defined(FAT_RUNTIME) && !defined(BUILD_AVX512))
cap &= ~HS_CPU_FEATURES_AVX512;
#endif

View File

@ -142,3 +142,5 @@
145:/abc/8{edit_distance=1} #UTF-8 is disallowed for approximate matching.
146:/(*UTF8)abc/{edit_distance=1} #UTF-8 is disallowed for approximate matching.
147:/\b\BMYBt/s{edit_distance=1} #Pattern can never match.
148:/\QÀ\Eaaaa/8 #Expression is not valid UTF-8.
149:/[\QÀ\Eaaaa]/8 #Expression is not valid UTF-8.