mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Treat characters between \Q \E as codepoints in UTF8 mode.
fixes github issue #57
This commit is contained in:
parent
2b788f1234
commit
a185be5a4f
@ -1155,6 +1155,35 @@ unichar readUtf8CodePoint4c(const char *s) {
|
|||||||
'\\E' => {
|
'\\E' => {
|
||||||
fgoto main;
|
fgoto main;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#unicode chars
|
||||||
|
utf8_2c when is_utf8 => {
|
||||||
|
assert(mode.utf8);
|
||||||
|
/* leverage ComponentClass to generate the vertices */
|
||||||
|
auto cc = getComponentClass(mode);
|
||||||
|
cc->add(readUtf8CodePoint2c(ts));
|
||||||
|
cc->finalize();
|
||||||
|
currentSeq->addComponent(move(cc));
|
||||||
|
};
|
||||||
|
|
||||||
|
utf8_3c when is_utf8 => {
|
||||||
|
assert(mode.utf8);
|
||||||
|
/* leverage ComponentClass to generate the vertices */
|
||||||
|
auto cc = getComponentClass(mode);
|
||||||
|
cc->add(readUtf8CodePoint3c(ts));
|
||||||
|
cc->finalize();
|
||||||
|
currentSeq->addComponent(move(cc));
|
||||||
|
};
|
||||||
|
|
||||||
|
utf8_4c when is_utf8 => {
|
||||||
|
assert(mode.utf8);
|
||||||
|
/* leverage ComponentClass to generate the vertices */
|
||||||
|
auto cc = getComponentClass(mode);
|
||||||
|
cc->add(readUtf8CodePoint4c(ts));
|
||||||
|
cc->finalize();
|
||||||
|
currentSeq->addComponent(move(cc));
|
||||||
|
};
|
||||||
|
|
||||||
# Literal character
|
# Literal character
|
||||||
any => {
|
any => {
|
||||||
addLiteral(currentSeq, *ts, mode);
|
addLiteral(currentSeq, *ts, mode);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user