mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Merge branch develop into master
This commit is contained in:
commit
7097ff3e63
@ -2,6 +2,13 @@
|
|||||||
|
|
||||||
This is a list of notable changes to Hyperscan, in reverse chronological order.
|
This is a list of notable changes to Hyperscan, in reverse chronological order.
|
||||||
|
|
||||||
|
## [4.5.2] 2017-07-26
|
||||||
|
- Bugfix for issue #57: Treat characters between `\Q.\E` as codepoints in
|
||||||
|
UTF8 mode.
|
||||||
|
- Bugfix for issue #60: Use a portable flag for mktemp for fat runtime builds.
|
||||||
|
- Bugfix for fat runtime builds on AVX-512 capable machines with Hyperscan's
|
||||||
|
AVX-512 support disabled.
|
||||||
|
|
||||||
## [4.5.1] 2017-06-16
|
## [4.5.1] 2017-06-16
|
||||||
- Bugfix for issue #56: workaround for gcc-4.8 C++11 defect.
|
- Bugfix for issue #56: workaround for gcc-4.8 C++11 defect.
|
||||||
- Bugfix for literal matching table generation, reversing a regression in
|
- Bugfix for literal matching table generation, reversing a regression in
|
||||||
|
@ -3,7 +3,7 @@ project (hyperscan C CXX)
|
|||||||
|
|
||||||
set (HS_MAJOR_VERSION 4)
|
set (HS_MAJOR_VERSION 4)
|
||||||
set (HS_MINOR_VERSION 5)
|
set (HS_MINOR_VERSION 5)
|
||||||
set (HS_PATCH_VERSION 1)
|
set (HS_PATCH_VERSION 2)
|
||||||
set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION})
|
set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION})
|
||||||
|
|
||||||
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
|
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
|
||||||
|
@ -11,8 +11,8 @@ shift 2
|
|||||||
# $@ contains the actual build command
|
# $@ contains the actual build command
|
||||||
OUT=$(echo "$@" | sed 's/.* -o \(.*\.o\).*/\1/')
|
OUT=$(echo "$@" | sed 's/.* -o \(.*\.o\).*/\1/')
|
||||||
trap cleanup INT QUIT EXIT
|
trap cleanup INT QUIT EXIT
|
||||||
SYMSFILE=$(mktemp --tmpdir ${PREFIX}_rename.syms.XXXXX)
|
SYMSFILE=$(mktemp -p /tmp ${PREFIX}_rename.syms.XXXXX)
|
||||||
KEEPSYMS=$(mktemp --tmpdir keep.syms.XXXXX)
|
KEEPSYMS=$(mktemp -p /tmp keep.syms.XXXXX)
|
||||||
# find the libc used by gcc
|
# find the libc used by gcc
|
||||||
LIBC_SO=$("$@" --print-file-name=libc.so.6)
|
LIBC_SO=$("$@" --print-file-name=libc.so.6)
|
||||||
cp ${KEEPSYMS_IN} ${KEEPSYMS}
|
cp ${KEEPSYMS_IN} ${KEEPSYMS}
|
||||||
|
@ -21,6 +21,9 @@
|
|||||||
/* Define if building "fat" runtime. */
|
/* Define if building "fat" runtime. */
|
||||||
#cmakedefine FAT_RUNTIME
|
#cmakedefine FAT_RUNTIME
|
||||||
|
|
||||||
|
/* Define if building AVX-512 in the fat runtime. */
|
||||||
|
#cmakedefine BUILD_AVX512
|
||||||
|
|
||||||
/* Define to 1 if `backtrace' works. */
|
/* Define to 1 if `backtrace' works. */
|
||||||
#cmakedefine HAVE_BACKTRACE
|
#cmakedefine HAVE_BACKTRACE
|
||||||
|
|
||||||
|
@ -1155,6 +1155,40 @@ unichar readUtf8CodePoint4c(const char *s) {
|
|||||||
'\\E' => {
|
'\\E' => {
|
||||||
fgoto main;
|
fgoto main;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#unicode chars
|
||||||
|
utf8_2c when is_utf8 => {
|
||||||
|
assert(mode.utf8);
|
||||||
|
/* leverage ComponentClass to generate the vertices */
|
||||||
|
auto cc = getComponentClass(mode);
|
||||||
|
cc->add(readUtf8CodePoint2c(ts));
|
||||||
|
cc->finalize();
|
||||||
|
currentSeq->addComponent(move(cc));
|
||||||
|
};
|
||||||
|
|
||||||
|
utf8_3c when is_utf8 => {
|
||||||
|
assert(mode.utf8);
|
||||||
|
/* leverage ComponentClass to generate the vertices */
|
||||||
|
auto cc = getComponentClass(mode);
|
||||||
|
cc->add(readUtf8CodePoint3c(ts));
|
||||||
|
cc->finalize();
|
||||||
|
currentSeq->addComponent(move(cc));
|
||||||
|
};
|
||||||
|
|
||||||
|
utf8_4c when is_utf8 => {
|
||||||
|
assert(mode.utf8);
|
||||||
|
/* leverage ComponentClass to generate the vertices */
|
||||||
|
auto cc = getComponentClass(mode);
|
||||||
|
cc->add(readUtf8CodePoint4c(ts));
|
||||||
|
cc->finalize();
|
||||||
|
currentSeq->addComponent(move(cc));
|
||||||
|
};
|
||||||
|
|
||||||
|
hi_byte when is_utf8 => {
|
||||||
|
assert(mode.utf8);
|
||||||
|
throwInvalidUtf8();
|
||||||
|
};
|
||||||
|
|
||||||
# Literal character
|
# Literal character
|
||||||
any => {
|
any => {
|
||||||
addLiteral(currentSeq, *ts, mode);
|
addLiteral(currentSeq, *ts, mode);
|
||||||
@ -1169,6 +1203,31 @@ unichar readUtf8CodePoint4c(const char *s) {
|
|||||||
'\\E' => {
|
'\\E' => {
|
||||||
fret;
|
fret;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#unicode chars
|
||||||
|
utf8_2c when is_utf8 => {
|
||||||
|
assert(mode.utf8);
|
||||||
|
currentCls->add(readUtf8CodePoint2c(ts));
|
||||||
|
inCharClassEarly = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
utf8_3c when is_utf8 => {
|
||||||
|
assert(mode.utf8);
|
||||||
|
currentCls->add(readUtf8CodePoint3c(ts));
|
||||||
|
inCharClassEarly = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
utf8_4c when is_utf8 => {
|
||||||
|
assert(mode.utf8);
|
||||||
|
currentCls->add(readUtf8CodePoint4c(ts));
|
||||||
|
inCharClassEarly = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
hi_byte when is_utf8 => {
|
||||||
|
assert(mode.utf8);
|
||||||
|
throwInvalidUtf8();
|
||||||
|
};
|
||||||
|
|
||||||
# Literal character
|
# Literal character
|
||||||
any => {
|
any => {
|
||||||
currentCls->add(*ts);
|
currentCls->add(*ts);
|
||||||
|
@ -192,7 +192,8 @@ u64a cpuid_flags(void) {
|
|||||||
cap &= ~HS_CPU_FEATURES_AVX2;
|
cap &= ~HS_CPU_FEATURES_AVX2;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if !defined(FAT_RUNTIME) && !defined(HAVE_AVX512)
|
#if (!defined(FAT_RUNTIME) && !defined(HAVE_AVX512)) || \
|
||||||
|
(defined(FAT_RUNTIME) && !defined(BUILD_AVX512))
|
||||||
cap &= ~HS_CPU_FEATURES_AVX512;
|
cap &= ~HS_CPU_FEATURES_AVX512;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -142,3 +142,5 @@
|
|||||||
145:/abc/8{edit_distance=1} #UTF-8 is disallowed for approximate matching.
|
145:/abc/8{edit_distance=1} #UTF-8 is disallowed for approximate matching.
|
||||||
146:/(*UTF8)abc/{edit_distance=1} #UTF-8 is disallowed for approximate matching.
|
146:/(*UTF8)abc/{edit_distance=1} #UTF-8 is disallowed for approximate matching.
|
||||||
147:/\b\BMYBt/s{edit_distance=1} #Pattern can never match.
|
147:/\b\BMYBt/s{edit_distance=1} #Pattern can never match.
|
||||||
|
148:/\QÀ\Eaaaa/8 #Expression is not valid UTF-8.
|
||||||
|
149:/[\QÀ\Eaaaa]/8 #Expression is not valid UTF-8.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user