Unify handling of caseless flag in class parser

Apply caselessness to each element added to a class, rather than all at
finalize time (which required separated ucp dnf and-ucp working data).

Unifies the behaviour of AsciiComponentClass and Utf8ComponentClass in
this respect.
This commit is contained in:
Justin Viiret
2015-11-17 17:23:52 +11:00
committed by Matthew Barr
parent bdb7a10034
commit 25a01e1c3c
4 changed files with 27 additions and 62 deletions

View File

@@ -61,11 +61,15 @@ void AsciiComponentClass::createRange(unichar to) {
unsigned char from = (u8)range_start;
if (from > to) {
throw LocatedParseError("Range out of order in character class");
} else {
in_cand_range = false;
cr.setRange(from, to);
range_start = INVALID_UNICODE;
}
in_cand_range = false;
CharReach ncr(from, to);
if (mode.caseless) {
make_caseless(&ncr);
}
cr |= ncr;
range_start = INVALID_UNICODE;
}
void AsciiComponentClass::notePositions(GlushkovBuildState &bs) {
@@ -95,16 +99,13 @@ void AsciiComponentClass::add(PredefinedClass c, bool negative) {
c = translateForUcpMode(c, mode);
}
// Note: caselessness is handled by getPredefinedCharReach.
CharReach pcr = getPredefinedCharReach(c, mode);
if (negative) {
pcr.flip();
}
if (isUcp(c)) {
cr_ucp |= pcr;
} else {
cr |= pcr;
}
cr |= pcr;
range_start = INVALID_UNICODE;
in_cand_range = false;
}
@@ -120,7 +121,12 @@ void AsciiComponentClass::add(unichar c) {
return;
}
cr.set(c);
CharReach ncr(c, c);
if (mode.caseless) {
make_caseless(&ncr);
}
cr |= ncr;
range_start = c;
}
@@ -136,12 +142,6 @@ void AsciiComponentClass::finalize() {
in_cand_range = false;
}
if (mode.caseless) {
make_caseless(&cr);
}
cr |= cr_ucp; /* characters from ucp props don't participate in caseless */
if (m_negate) {
cr.flip();
}