character classes: handle \Q\E and utf8

This commit is contained in:
Alex Coyte 2017-06-20 10:19:32 +10:00 committed by Matthew Barr
parent a185be5a4f
commit d317d75615
2 changed files with 32 additions and 0 deletions

View File

@ -1184,6 +1184,11 @@ unichar readUtf8CodePoint4c(const char *s) {
currentSeq->addComponent(move(cc));
};
hi_byte when is_utf8 => {
assert(mode.utf8);
throwInvalidUtf8();
};
# Literal character
any => {
addLiteral(currentSeq, *ts, mode);
@ -1198,6 +1203,31 @@ unichar readUtf8CodePoint4c(const char *s) {
'\\E' => {
fret;
};
#unicode chars
utf8_2c when is_utf8 => {
assert(mode.utf8);
currentCls->add(readUtf8CodePoint2c(ts));
inCharClassEarly = false;
};
utf8_3c when is_utf8 => {
assert(mode.utf8);
currentCls->add(readUtf8CodePoint3c(ts));
inCharClassEarly = false;
};
utf8_4c when is_utf8 => {
assert(mode.utf8);
currentCls->add(readUtf8CodePoint4c(ts));
inCharClassEarly = false;
};
hi_byte when is_utf8 => {
assert(mode.utf8);
throwInvalidUtf8();
};
# Literal character
any => {
currentCls->add(*ts);

View File

@ -142,3 +142,5 @@
145:/abc/8{edit_distance=1} #UTF-8 is disallowed for approximate matching.
146:/(*UTF8)abc/{edit_distance=1} #UTF-8 is disallowed for approximate matching.
147:/\b\BMYBt/s{edit_distance=1} #Pattern can never match.
148:/\QÀ\Eaaaa/8 #Expression is not valid UTF-8.
149:/[\QÀ\Eaaaa]/8 #Expression is not valid UTF-8.