mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Treat characters between \Q \E as codepoints in UTF8 mode.
fixes github issue #57
This commit is contained in:
parent
2b788f1234
commit
a185be5a4f
@ -1155,6 +1155,35 @@ unichar readUtf8CodePoint4c(const char *s) {
|
||||
'\\E' => {
|
||||
fgoto main;
|
||||
};
|
||||
|
||||
#unicode chars
|
||||
utf8_2c when is_utf8 => {
|
||||
assert(mode.utf8);
|
||||
/* leverage ComponentClass to generate the vertices */
|
||||
auto cc = getComponentClass(mode);
|
||||
cc->add(readUtf8CodePoint2c(ts));
|
||||
cc->finalize();
|
||||
currentSeq->addComponent(move(cc));
|
||||
};
|
||||
|
||||
utf8_3c when is_utf8 => {
|
||||
assert(mode.utf8);
|
||||
/* leverage ComponentClass to generate the vertices */
|
||||
auto cc = getComponentClass(mode);
|
||||
cc->add(readUtf8CodePoint3c(ts));
|
||||
cc->finalize();
|
||||
currentSeq->addComponent(move(cc));
|
||||
};
|
||||
|
||||
utf8_4c when is_utf8 => {
|
||||
assert(mode.utf8);
|
||||
/* leverage ComponentClass to generate the vertices */
|
||||
auto cc = getComponentClass(mode);
|
||||
cc->add(readUtf8CodePoint4c(ts));
|
||||
cc->finalize();
|
||||
currentSeq->addComponent(move(cc));
|
||||
};
|
||||
|
||||
# Literal character
|
||||
any => {
|
||||
addLiteral(currentSeq, *ts, mode);
|
||||
|
Loading…
x
Reference in New Issue
Block a user