Treat characters between \Q \E as codepoints in UTF8 mode.

fixes github issue #57
This commit is contained in:
Alex Coyte 2017-06-19 11:03:05 +10:00 committed by Matthew Barr
parent 2b788f1234
commit a185be5a4f

View File

@ -1155,6 +1155,35 @@ unichar readUtf8CodePoint4c(const char *s) {
'\\E' => { '\\E' => {
fgoto main; fgoto main;
}; };
#unicode chars
utf8_2c when is_utf8 => {
assert(mode.utf8);
/* leverage ComponentClass to generate the vertices */
auto cc = getComponentClass(mode);
cc->add(readUtf8CodePoint2c(ts));
cc->finalize();
currentSeq->addComponent(move(cc));
};
utf8_3c when is_utf8 => {
assert(mode.utf8);
/* leverage ComponentClass to generate the vertices */
auto cc = getComponentClass(mode);
cc->add(readUtf8CodePoint3c(ts));
cc->finalize();
currentSeq->addComponent(move(cc));
};
utf8_4c when is_utf8 => {
assert(mode.utf8);
/* leverage ComponentClass to generate the vertices */
auto cc = getComponentClass(mode);
cc->add(readUtf8CodePoint4c(ts));
cc->finalize();
currentSeq->addComponent(move(cc));
};
# Literal character # Literal character
any => { any => {
addLiteral(currentSeq, *ts, mode); addLiteral(currentSeq, *ts, mode);