Treat characters between \Q \E as codepoints in UTF8 mode.

fixes github issue #57
2025-06-28 16:41:01 +03:00 · 2017-06-19 11:03:05 +10:00 · 2017-06-19 11:03:05 +10:00 · a185be5a4f
commit a185be5a4f
parent 2b788f1234
1 changed files with 29 additions and 0 deletions
--- a/src/parser/Parser.rl
+++ b/src/parser/Parser.rl
@ -1155,6 +1155,35 @@ unichar readUtf8CodePoint4c(const char *s) {
              '\\E' => {
                  fgoto main;
              };
              #unicode chars
              utf8_2c when is_utf8 => {
                  assert(mode.utf8);
                  /* leverage ComponentClass to generate the vertices */
                  auto cc = getComponentClass(mode);
                  cc->add(readUtf8CodePoint2c(ts));
                  cc->finalize();
                  currentSeq->addComponent(move(cc));
              };
              utf8_3c when is_utf8 => {
                  assert(mode.utf8);
                  /* leverage ComponentClass to generate the vertices */
                  auto cc = getComponentClass(mode);
                  cc->add(readUtf8CodePoint3c(ts));
                  cc->finalize();
                  currentSeq->addComponent(move(cc));
              };
              utf8_4c when is_utf8 => {
                  assert(mode.utf8);
                  /* leverage ComponentClass to generate the vertices */
                  auto cc = getComponentClass(mode);
                  cc->add(readUtf8CodePoint4c(ts));
                  cc->finalize();
                  currentSeq->addComponent(move(cc));
              };
              # Literal character
              any => {
                  addLiteral(currentSeq, *ts, mode);