character classes: handle \Q\E and utf8

This commit is contained in:
Alex Coyte 2017-06-20 10:19:32 +10:00 committed by Matthew Barr
parent a185be5a4f
commit d317d75615
2 changed files with 32 additions and 0 deletions

View File

@ -1184,6 +1184,11 @@ unichar readUtf8CodePoint4c(const char *s) {
currentSeq->addComponent(move(cc)); currentSeq->addComponent(move(cc));
}; };
hi_byte when is_utf8 => {
assert(mode.utf8);
throwInvalidUtf8();
};
# Literal character # Literal character
any => { any => {
addLiteral(currentSeq, *ts, mode); addLiteral(currentSeq, *ts, mode);
@ -1198,6 +1203,31 @@ unichar readUtf8CodePoint4c(const char *s) {
'\\E' => { '\\E' => {
fret; fret;
}; };
#unicode chars
utf8_2c when is_utf8 => {
assert(mode.utf8);
currentCls->add(readUtf8CodePoint2c(ts));
inCharClassEarly = false;
};
utf8_3c when is_utf8 => {
assert(mode.utf8);
currentCls->add(readUtf8CodePoint3c(ts));
inCharClassEarly = false;
};
utf8_4c when is_utf8 => {
assert(mode.utf8);
currentCls->add(readUtf8CodePoint4c(ts));
inCharClassEarly = false;
};
hi_byte when is_utf8 => {
assert(mode.utf8);
throwInvalidUtf8();
};
# Literal character # Literal character
any => { any => {
currentCls->add(*ts); currentCls->add(*ts);

View File

@ -142,3 +142,5 @@
145:/abc/8{edit_distance=1} #UTF-8 is disallowed for approximate matching. 145:/abc/8{edit_distance=1} #UTF-8 is disallowed for approximate matching.
146:/(*UTF8)abc/{edit_distance=1} #UTF-8 is disallowed for approximate matching. 146:/(*UTF8)abc/{edit_distance=1} #UTF-8 is disallowed for approximate matching.
147:/\b\BMYBt/s{edit_distance=1} #Pattern can never match. 147:/\b\BMYBt/s{edit_distance=1} #Pattern can never match.
148:/\QÀ\Eaaaa/8 #Expression is not valid UTF-8.
149:/[\QÀ\Eaaaa]/8 #Expression is not valid UTF-8.