diff --git a/src/parser/Parser.rl b/src/parser/Parser.rl index 1481b7d8..37beb765 100644 --- a/src/parser/Parser.rl +++ b/src/parser/Parser.rl @@ -790,10 +790,12 @@ unichar readUtf8CodePoint4c(const u8 *ts) { any => { throw LocatedParseError("Unknown property"); }; *|; charClassGuts := |* - # We don't like POSIX collating elements (neither does PCRE or Perl). - '\[\.' [^\]]* '\.\]' | - '\[=' [^\]]* '=\]' => { - throw LocatedParseError("Unsupported POSIX collating element"); + # We don't support POSIX collating elements (neither does PCRE + # or Perl). These look like [.ch.] or [=ch=]. + '\[\.' ( '\\]' | [^\]] )* '\.\]' | + '\[=' ( '\\]' | [^\]] )* '=\]' => { + throw LocatedParseError("Unsupported POSIX collating " + "element"); }; # Named sets # Adding these may cause the charclass to close, hence the @@ -1090,23 +1092,6 @@ unichar readUtf8CodePoint4c(const u8 *ts) { throwInvalidUtf8(); }; - # dot or equals at the end of a character class could be the end - # of a collating element, like [.blah.] or [=blah=]. - [.=] ']' => { - if (currentCls->getFirstChar() == *ts) { - assert(currentClsBegin); - ostringstream oss; - oss << "Unsupported POSIX collating element at index " - << currentClsBegin - ptr << "."; - throw ParseError(oss.str()); - } - currentCls->add(*ts); - currentCls->finalize(); - currentSeq->addComponent(move(currentCls)); - inCharClass = false; - fgoto main; - }; - # Literal character (any - ']') => { if (currentCls->class_empty()) { @@ -1232,6 +1217,13 @@ unichar readUtf8CodePoint4c(const u8 *ts) { throw LocatedParseError("POSIX named classes are only " "supported inside a class"); }; + # We don't support POSIX collating elements (neither does PCRE + # or Perl). These look like [.ch.] or [=ch=]. + '\[\.' ( '\\]' | [^\]] )* '\.\]' | + '\[=' ( '\\]' | [^\]] )* '=\]' => { + throw LocatedParseError("Unsupported POSIX collating " + "element"); + }; # Begin eating characters for class '\[' => eatClass; # Begin quoted literal diff --git a/unit/hyperscan/bad_patterns.txt b/unit/hyperscan/bad_patterns.txt index 1ad445b3..837ba871 100644 --- a/unit/hyperscan/bad_patterns.txt +++ b/unit/hyperscan/bad_patterns.txt @@ -128,3 +128,7 @@ 128:/(*UTF8)^fo?ob{ro|nax_off\Qt=10omnax+8Wnah/ρρρρρρρρρρρρρρρρρρρρρρρρρρρ0}l.{1,60}Car*k|npanomnax+8Wnah/ #Expression is not valid UTF-8. 129:/bignum \1111111111111111111/ #Number is too big at index 7. 130:/foo|&{5555555,}/ #Bounded repeat is too large. +131:/[a[..]]/ #Unsupported POSIX collating element at index 2. +132:/[a[==]]/ #Unsupported POSIX collating element at index 2. +133:/[a[.\].]]/ #Unsupported POSIX collating element at index 2. +134:/[a[=\]=]]/ #Unsupported POSIX collating element at index 2.