diff --git a/src/parser/AsciiComponentClass.cpp b/src/parser/AsciiComponentClass.cpp index 44ecb5bb..7cfa6e11 100644 --- a/src/parser/AsciiComponentClass.cpp +++ b/src/parser/AsciiComponentClass.cpp @@ -61,11 +61,15 @@ void AsciiComponentClass::createRange(unichar to) { unsigned char from = (u8)range_start; if (from > to) { throw LocatedParseError("Range out of order in character class"); - } else { - in_cand_range = false; - cr.setRange(from, to); - range_start = INVALID_UNICODE; } + + in_cand_range = false; + CharReach ncr(from, to); + if (mode.caseless) { + make_caseless(&ncr); + } + cr |= ncr; + range_start = INVALID_UNICODE; } void AsciiComponentClass::notePositions(GlushkovBuildState &bs) { @@ -95,16 +99,13 @@ void AsciiComponentClass::add(PredefinedClass c, bool negative) { c = translateForUcpMode(c, mode); } + // Note: caselessness is handled by getPredefinedCharReach. CharReach pcr = getPredefinedCharReach(c, mode); if (negative) { pcr.flip(); } - if (isUcp(c)) { - cr_ucp |= pcr; - } else { - cr |= pcr; - } + cr |= pcr; range_start = INVALID_UNICODE; in_cand_range = false; } @@ -120,7 +121,12 @@ void AsciiComponentClass::add(unichar c) { return; } - cr.set(c); + CharReach ncr(c, c); + if (mode.caseless) { + make_caseless(&ncr); + } + + cr |= ncr; range_start = c; } @@ -136,12 +142,6 @@ void AsciiComponentClass::finalize() { in_cand_range = false; } - if (mode.caseless) { - make_caseless(&cr); - } - - cr |= cr_ucp; /* characters from ucp props don't participate in caseless */ - if (m_negate) { cr.flip(); } diff --git a/src/parser/AsciiComponentClass.h b/src/parser/AsciiComponentClass.h index 2d5ef843..925fa9bf 100644 --- a/src/parser/AsciiComponentClass.h +++ b/src/parser/AsciiComponentClass.h @@ -78,12 +78,10 @@ protected: private: Position position; CharReach cr; - CharReach cr_ucp; // Private copy ctor. Use clone instead. AsciiComponentClass(const AsciiComponentClass &other) - : ComponentClass(other), position(other.position), cr(other.cr), - cr_ucp(other.cr_ucp) {} + : ComponentClass(other), position(other.position), cr(other.cr) {} }; } // namespace ue2 diff --git a/src/parser/Utf8ComponentClass.cpp b/src/parser/Utf8ComponentClass.cpp index 54f9edb9..21707902 100644 --- a/src/parser/Utf8ComponentClass.cpp +++ b/src/parser/Utf8ComponentClass.cpp @@ -515,16 +515,16 @@ void UTF8ComponentClass::createRange(unichar to) { unichar from = range_start; if (from > to) { throw LocatedParseError("Range out of order in character class"); - } else { - in_cand_range = false; - CodePointSet ncps; - ncps.setRange(from, to); - if (mode.caseless) { - make_caseless(&ncps); - } - cps |= ncps; - range_start = INVALID_UNICODE; } + + in_cand_range = false; + CodePointSet ncps; + ncps.setRange(from, to); + if (mode.caseless) { + make_caseless(&ncps); + } + cps |= ncps; + range_start = INVALID_UNICODE; } void UTF8ComponentClass::add(PredefinedClass c, bool negative) { @@ -543,11 +543,7 @@ void UTF8ComponentClass::add(PredefinedClass c, bool negative) { pcps.flip(); } - if (isUcp(c)) { - cps_ucp |= pcps; - } else { - cps |= pcps; - } + cps |= pcps; range_start = INVALID_UNICODE; in_cand_range = false; @@ -585,8 +581,6 @@ void UTF8ComponentClass::finalize() { in_cand_range = false; } - cps |= cps_ucp; /* characters from ucp props always case sensitive */ - if (m_negate) { cps.flip(); } @@ -594,31 +588,6 @@ void UTF8ComponentClass::finalize() { finalized = true; } -bool isUcp(PredefinedClass c) { - switch (c) { - case CLASS_ALNUM: - case CLASS_ALPHA: - case CLASS_ANY: - case CLASS_ASCII: - case CLASS_BLANK: - case CLASS_CNTRL: - case CLASS_DIGIT: - case CLASS_GRAPH: - case CLASS_HORZ: - case CLASS_LOWER: - case CLASS_PRINT: - case CLASS_PUNCT: - case CLASS_SPACE: - case CLASS_UPPER: - case CLASS_VERT: - case CLASS_WORD: - case CLASS_XDIGIT: - return false; - default: - return true; - } -} - Position UTF8ComponentClass::getHead(NFABuilder &builder, u8 first_byte) { map::const_iterator it = heads.find(first_byte); if (it != heads.end()) { diff --git a/src/parser/Utf8ComponentClass.h b/src/parser/Utf8ComponentClass.h index 3d21a278..f4e7ea32 100644 --- a/src/parser/Utf8ComponentClass.h +++ b/src/parser/Utf8ComponentClass.h @@ -93,7 +93,6 @@ private: void buildFourByte(GlushkovBuildState &bs); CodePointSet cps; - CodePointSet cps_ucp; std::map heads; Position single_pos; @@ -108,7 +107,6 @@ private: }; PredefinedClass translateForUcpMode(PredefinedClass in, const ParseMode &mode); -bool isUcp(PredefinedClass c); CodePointSet getPredefinedCodePointSet(PredefinedClass c, const ParseMode &mode);