2018-01-19 06:48:56 -05:00

534 lines
11 KiB
Plaintext

85000:/\p{C}/8W
85001:/\p{Cc}/8W
85002:/\p{Cf}/8W
85003:/\p{Cn}/8W
85004:/\p{Co}/8W
85005:/\p{Cs}/8W
85006:/\p{L}/8W
85007:/\p{L&}/8W
85008:/\p{Ll}/8W
85009:/\p{Lm}/8W
85010:/\p{Lo}/8W
85011:/\p{Lt}/8W
85012:/\p{Lu}/8W
85013:/\p{M}/8W
85014:/\p{Mc}/8W
85015:/\p{Me}/8W
85016:/\p{Mn}/8W
85017:/\p{N}/8W
85018:/\p{Nd}/8W
85019:/\p{Nl}/8W
85020:/\p{No}/8W
85021:/\p{P}/8W
85022:/\p{Pc}/8W
85023:/\p{Pd}/8W
85024:/\p{Pe}/8W
85025:/\p{Pf}/8W
85026:/\p{Pi}/8W
85027:/\p{Po}/8W
85028:/\p{Ps}/8W
85029:/\p{S}/8W
85030:/\p{Sc}/8W
85031:/\p{Sk}/8W
85032:/\p{Sm}/8W
85033:/\p{So}/8W
85034:/\p{Xan}/8W
85035:/\p{Xps}/8W
85036:/\p{Xsp}/8W
85037:/\p{Xwd}/8W
85038:/\p{Z}/8W
85039:/\p{Zl}/8W
85040:/\p{Zp}/8W
85041:/\p{Zs}/8W
85042:/\p{Arabic}/8W
85043:/\p{Armenian}/8W
85044:/\p{Avestan}/8W
85045:/\p{Balinese}/8W
85046:/\p{Bamum}/8W
85047:/\p{Batak}/8W
85048:/\p{Bengali}/8W
85049:/\p{Bopomofo}/8W
85050:/\p{Brahmi}/8W
85051:/\p{Braille}/8W
85052:/\p{Buginese}/8W
85053:/\p{Buhid}/8W
85054:/\p{Canadian_Aboriginal}/8W
85055:/\p{Carian}/8W
85056:/\p{Cham}/8W
85057:/\p{Cherokee}/8W
85058:/\p{Common}/8W
85059:/\p{Coptic}/8W
85060:/\p{Cuneiform}/8W
85061:/\p{Cypriot}/8W
85062:/\p{Cyrillic}/8W
85063:/\p{Deseret}/8W
85064:/\p{Devanagari}/8W
85065:/\p{Egyptian_Hieroglyphs}/8W
85066:/\p{Ethiopic}/8W
85067:/\p{Georgian}/8W
85068:/\p{Glagolitic}/8W
85069:/\p{Gothic}/8W
85070:/\p{Greek}/8W
85071:/\p{Gujarati}/8W
85072:/\p{Gurmukhi}/8W
85073:/\p{Han}/8W
85074:/\p{Hangul}/8W
85075:/\p{Hanunoo}/8W
85076:/\p{Hebrew}/8W
85077:/\p{Hiragana}/8W
85078:/\p{Imperial_Aramaic}/8W
85079:/\p{Inherited}/8W
85080:/\p{Inscriptional_Pahlavi}/8W
85081:/\p{Inscriptional_Parthian}/8W
85082:/\p{Javanese}/8W
85083:/\p{Kaithi}/8W
85084:/\p{Kannada}/8W
85085:/\p{Katakana}/8W
85086:/\p{Kayah_Li}/8W
85087:/\p{Kharoshthi}/8W
85088:/\p{Khmer}/8W
85089:/\p{Lao}/8W
85090:/\p{Latin}/8W
85091:/\p{Lepcha}/8W
85092:/\p{Limbu}/8W
85093:/\p{Linear_B}/8W
85094:/\p{Lisu}/8W
85095:/\p{Lycian}/8W
85096:/\p{Lydian}/8W
85097:/\p{Malayalam}/8W
85098:/\p{Mandaic}/8W
85099:/\p{Meetei_Mayek}/8W
85100:/\p{Mongolian}/8W
85101:/\p{Myanmar}/8W
85102:/\p{New_Tai_Lue}/8W
85103:/\p{Nko}/8W
85104:/\p{Ogham}/8W
85105:/\p{Ol_Chiki}/8W
85106:/\p{Old_Italic}/8W
85107:/\p{Old_Persian}/8W
85108:/\p{Old_South_Arabian}/8W
85109:/\p{Old_Turkic}/8W
85110:/\p{Oriya}/8W
85111:/\p{Osmanya}/8W
85112:/\p{Phags_Pa}/8W
85113:/\p{Phoenician}/8W
85114:/\p{Rejang}/8W
85115:/\p{Runic}/8W
85116:/\p{Samaritan}/8W
85117:/\p{Saurashtra}/8W
85118:/\p{Shavian}/8W
85119:/\p{Sinhala}/8W
85120:/\p{Sundanese}/8W
85121:/\p{Syloti_Nagri}/8W
85122:/\p{Syriac}/8W
85123:/\p{Tagalog}/8W
85124:/\p{Tagbanwa}/8W
85125:/\p{Tai_Le}/8W
85126:/\p{Tai_Tham}/8W
85127:/\p{Tai_Viet}/8W
85128:/\p{Tamil}/8W
85129:/\p{Telugu}/8W
85130:/\p{Thaana}/8W
85131:/\p{Thai}/8W
85132:/\p{Tibetan}/8W
85133:/\p{Tifinagh}/8W
85134:/\p{Ugaritic}/8W
85135:/\p{Vai}/8W
85136:/\p{Yi}/8W
85500:/\P{C}/8W
85501:/\P{Cc}/8W
85502:/\P{Cf}/8W
85503:/\P{Cn}/8W
85504:/\P{Co}/8W
85505:/\P{Cs}/8W
85506:/\P{L}/8W
85507:/\P{L&}/8W
85508:/\P{Ll}/8W
85509:/\P{Lm}/8W
85510:/\P{Lo}/8W
85511:/\P{Lt}/8W
85512:/\P{Lu}/8W
85513:/\P{M}/8W
85514:/\P{Mc}/8W
85515:/\P{Me}/8W
85516:/\P{Mn}/8W
85517:/\P{N}/8W
85518:/\P{Nd}/8W
85519:/\P{Nl}/8W
85520:/\P{No}/8W
85521:/\P{P}/8W
85522:/\P{Pc}/8W
85523:/\P{Pd}/8W
85524:/\P{Pe}/8W
85525:/\P{Pf}/8W
85526:/\P{Pi}/8W
85527:/\P{Po}/8W
85528:/\P{Ps}/8W
85529:/\P{S}/8W
85530:/\P{Sc}/8W
85531:/\P{Sk}/8W
85532:/\P{Sm}/8W
85533:/\P{So}/8W
85534:/\P{Xan}/8W
85535:/\P{Xps}/8W
85536:/\P{Xsp}/8W
85537:/\P{Xwd}/8W
85538:/\P{Z}/8W
85539:/\P{Zl}/8W
85540:/\P{Zp}/8W
85541:/\P{Zs}/8W
85542:/\P{Arabic}/8W
85543:/\P{Armenian}/8W
85544:/\P{Avestan}/8W
85545:/\P{Balinese}/8W
85546:/\P{Bamum}/8W
85547:/\P{Batak}/8W
85548:/\P{Bengali}/8W
85549:/\P{Bopomofo}/8W
85550:/\P{Brahmi}/8W
85551:/\P{Braille}/8W
85552:/\P{Buginese}/8W
85553:/\P{Buhid}/8W
85554:/\P{Canadian_Aboriginal}/8W
85555:/\P{Carian}/8W
85556:/\P{Cham}/8W
85557:/\P{Cherokee}/8W
85558:/\P{Common}/8W
85559:/\P{Coptic}/8W
85560:/\P{Cuneiform}/8W
85561:/\P{Cypriot}/8W
85562:/\P{Cyrillic}/8W
85563:/\P{Deseret}/8W
85564:/\P{Devanagari}/8W
85565:/\P{Egyptian_Hieroglyphs}/8W
85566:/\P{Ethiopic}/8W
85567:/\P{Georgian}/8W
85568:/\P{Glagolitic}/8W
85569:/\P{Gothic}/8W
85570:/\P{Greek}/8W
85571:/\P{Gujarati}/8W
85572:/\P{Gurmukhi}/8W
85573:/\P{Han}/8W
85574:/\P{Hangul}/8W
85575:/\P{Hanunoo}/8W
85576:/\P{Hebrew}/8W
85577:/\P{Hiragana}/8W
85578:/\P{Imperial_Aramaic}/8W
85579:/\P{Inherited}/8W
85580:/\P{Inscriptional_Pahlavi}/8W
85581:/\P{Inscriptional_Parthian}/8W
85582:/\P{Javanese}/8W
85583:/\P{Kaithi}/8W
85584:/\P{Kannada}/8W
85585:/\P{Katakana}/8W
85586:/\P{Kayah_Li}/8W
85587:/\P{Kharoshthi}/8W
85588:/\P{Khmer}/8W
85589:/\P{Lao}/8W
85590:/\P{Latin}/8W
85591:/\P{Lepcha}/8W
85592:/\P{Limbu}/8W
85593:/\P{Linear_B}/8W
85594:/\P{Lisu}/8W
85595:/\P{Lycian}/8W
85596:/\P{Lydian}/8W
85597:/\P{Malayalam}/8W
85598:/\P{Mandaic}/8W
85599:/\P{Meetei_Mayek}/8W
85600:/\P{Mongolian}/8W
85601:/\P{Myanmar}/8W
85602:/\P{New_Tai_Lue}/8W
85603:/\P{Nko}/8W
85604:/\P{Ogham}/8W
85605:/\P{Ol_Chiki}/8W
85606:/\P{Old_Italic}/8W
85607:/\P{Old_Persian}/8W
85608:/\P{Old_South_Arabian}/8W
85609:/\P{Old_Turkic}/8W
85610:/\P{Oriya}/8W
85611:/\P{Osmanya}/8W
85612:/\P{Phags_Pa}/8W
85613:/\P{Phoenician}/8W
85614:/\P{Rejang}/8W
85615:/\P{Runic}/8W
85616:/\P{Samaritan}/8W
85617:/\P{Saurashtra}/8W
85618:/\P{Shavian}/8W
85619:/\P{Sinhala}/8W
85620:/\P{Sundanese}/8W
85621:/\P{Syloti_Nagri}/8W
85622:/\P{Syriac}/8W
85623:/\P{Tagalog}/8W
85624:/\P{Tagbanwa}/8W
85625:/\P{Tai_Le}/8W
85626:/\P{Tai_Tham}/8W
85627:/\P{Tai_Viet}/8W
85628:/\P{Tamil}/8W
85629:/\P{Telugu}/8W
85630:/\P{Thaana}/8W
85631:/\P{Thai}/8W
85632:/\P{Tibetan}/8W
85633:/\P{Tifinagh}/8W
85634:/\P{Ugaritic}/8W
85635:/\P{Vai}/8W
85636:/\P{Yi}/8W
# Non-UTF-8 cases (the ones that can actually match)
85200:/\p{C}/W
85201:/\p{Cc}/W
85202:/\p{Cf}/W
85206:/\p{L}/W
85207:/\p{L&}/W
85208:/\p{Ll}/W
85212:/\p{Lu}/W
85217:/\p{N}/W
85218:/\p{Nd}/W
85220:/\p{No}/W
85221:/\p{P}/W
85222:/\p{Pc}/W
85223:/\p{Pd}/W
85224:/\p{Pe}/W
85225:/\p{Pf}/W
85226:/\p{Pi}/W
85227:/\p{Po}/W
85228:/\p{Ps}/W
85229:/\p{S}/W
85230:/\p{Sc}/W
85231:/\p{Sk}/W
85232:/\p{Sm}/W
85233:/\p{So}/W
85234:/\p{Xan}/W
85235:/\p{Xps}/W
85236:/\p{Xsp}/W
85237:/\p{Xwd}/W
85238:/\p{Z}/W
85241:/\p{Zs}/W
85258:/\p{Common}/W
85290:/\p{Latin}/W
85700:/\P{C}/W
85701:/\P{Cc}/W
85702:/\P{Cf}/W
85703:/\P{Cn}/W
85704:/\P{Co}/W
85705:/\P{Cs}/W
85706:/\P{L}/W
85707:/\P{L&}/W
85708:/\P{Ll}/W
85709:/\P{Lm}/W
85710:/\P{Lo}/W
85711:/\P{Lt}/W
85712:/\P{Lu}/W
85713:/\P{M}/W
85714:/\P{Mc}/W
85715:/\P{Me}/W
85716:/\P{Mn}/W
85717:/\P{N}/W
85718:/\P{Nd}/W
85719:/\P{Nl}/W
85720:/\P{No}/W
85721:/\P{P}/W
85722:/\P{Pc}/W
85723:/\P{Pd}/W
85724:/\P{Pe}/W
85725:/\P{Pf}/W
85726:/\P{Pi}/W
85727:/\P{Po}/W
85728:/\P{Ps}/W
85729:/\P{S}/W
85730:/\P{Sc}/W
85731:/\P{Sk}/W
85732:/\P{Sm}/W
85733:/\P{So}/W
85734:/\P{Xan}/W
85735:/\P{Xps}/W
85736:/\P{Xsp}/W
85737:/\P{Xwd}/W
85738:/\P{Z}/W
85739:/\P{Zl}/W
85740:/\P{Zp}/W
85741:/\P{Zs}/W
85742:/\P{Arabic}/W
85743:/\P{Armenian}/W
85744:/\P{Avestan}/W
85745:/\P{Balinese}/W
85746:/\P{Bamum}/W
85747:/\P{Batak}/W
85748:/\P{Bengali}/W
85749:/\P{Bopomofo}/W
85750:/\P{Brahmi}/W
85751:/\P{Braille}/W
85752:/\P{Buginese}/W
85753:/\P{Buhid}/W
85754:/\P{Canadian_Aboriginal}/W
85755:/\P{Carian}/W
85756:/\P{Cham}/W
85757:/\P{Cherokee}/W
85758:/\P{Common}/W
85759:/\P{Coptic}/W
85760:/\P{Cuneiform}/W
85761:/\P{Cypriot}/W
85762:/\P{Cyrillic}/W
85763:/\P{Deseret}/W
85764:/\P{Devanagari}/W
85765:/\P{Egyptian_Hieroglyphs}/W
85766:/\P{Ethiopic}/W
85767:/\P{Georgian}/W
85768:/\P{Glagolitic}/W
85769:/\P{Gothic}/W
85770:/\P{Greek}/W
85771:/\P{Gujarati}/W
85772:/\P{Gurmukhi}/W
85773:/\P{Han}/W
85774:/\P{Hangul}/W
85775:/\P{Hanunoo}/W
85776:/\P{Hebrew}/W
85777:/\P{Hiragana}/W
85778:/\P{Imperial_Aramaic}/W
85779:/\P{Inherited}/W
85780:/\P{Inscriptional_Pahlavi}/W
85781:/\P{Inscriptional_Parthian}/W
85782:/\P{Javanese}/W
85783:/\P{Kaithi}/W
85784:/\P{Kannada}/W
85785:/\P{Katakana}/W
85786:/\P{Kayah_Li}/W
85787:/\P{Kharoshthi}/W
85788:/\P{Khmer}/W
85789:/\P{Lao}/W
85790:/\P{Latin}/W
85791:/\P{Lepcha}/W
85792:/\P{Limbu}/W
85793:/\P{Linear_B}/W
85794:/\P{Lisu}/W
85795:/\P{Lycian}/W
85796:/\P{Lydian}/W
85797:/\P{Malayalam}/W
85798:/\P{Mandaic}/W
85799:/\P{Meetei_Mayek}/W
85800:/\P{Mongolian}/W
85801:/\P{Myanmar}/W
85802:/\P{New_Tai_Lue}/W
85803:/\P{Nko}/W
85804:/\P{Ogham}/W
85805:/\P{Ol_Chiki}/W
85806:/\P{Old_Italic}/W
85807:/\P{Old_Persian}/W
85808:/\P{Old_South_Arabian}/W
85809:/\P{Old_Turkic}/W
85810:/\P{Oriya}/W
85811:/\P{Osmanya}/W
85812:/\P{Phags_Pa}/W
85813:/\P{Phoenician}/W
85814:/\P{Rejang}/W
85815:/\P{Runic}/W
85816:/\P{Samaritan}/W
85817:/\P{Saurashtra}/W
85818:/\P{Shavian}/W
85819:/\P{Sinhala}/W
85820:/\P{Sundanese}/W
85821:/\P{Syloti_Nagri}/W
85822:/\P{Syriac}/W
85823:/\P{Tagalog}/W
85824:/\P{Tagbanwa}/W
85825:/\P{Tai_Le}/W
85826:/\P{Tai_Tham}/W
85827:/\P{Tai_Viet}/W
85828:/\P{Tamil}/W
85829:/\P{Telugu}/W
85830:/\P{Thaana}/W
85831:/\P{Thai}/W
85832:/\P{Tibetan}/W
85833:/\P{Tifinagh}/W
85834:/\P{Ugaritic}/W
85835:/\P{Vai}/W
85836:/\P{Yi}/W
# Don't forget \p{Any}
86000:/^\p{Any}/W
86001:/^\p{Any}/8W
# Braceless variants for {C, L, M, N, P, S, Z}
86010:/\pC/8W
86011:/\pL/8W
86012:/\pM/8W
86013:/\pN/8W
86014:/\pP/8W
86015:/\pS/8W
86016:/\pZ/8W
86020:/\PC/8W
86021:/\PL/8W
86022:/\PM/8W
86023:/\PN/8W
86024:/\PP/8W
86025:/\PS/8W
86026:/\PZ/8W
86030:/\pC/W
86031:/\pL/W
86033:/\pN/W
86034:/\pP/W
86035:/\pS/W
86036:/\pZ/W
86040:/\PC/W
86041:/\PL/W
86042:/\PM/W
86043:/\PN/W
86044:/\PP/W
86045:/\PS/W
86046:/\PZ/W
# UCP properties are immune to the nocase flag -- they are always
# case-sensitive.
86060:/^case \p{Ll}/i
86061:/^case \p{Ll}/i8W
86062:/^case \p{Lu}/i
86063:/^case \p{Lu}/i8W
86064:/^case \P{Ll}/i
86065:/^case \P{Ll}/i8W
86066:/^case \P{Lu}/i
86067:/^case \P{Lu}/i8W
# Some POSIX classes behave differently in UCP mode.
86200:/^[[:alnum:]]/8W
86201:/^[[:alpha:]]/8W
86202:/^[[:blank:]]/8W
86203:/^[[:digit:]]/8W
86204:/^[[:lower:]]/8W
86205:/^[[:space:]]/8W
86206:/^[[:upper:]]/8W
86207:/^[[:word:]]/8W
86208:/^[[:graph:]]/8W
86209:/^[[:print:]]/8W
86210:/^[[:punct:]]/8W
86211:/^[[:alnum:]]/W
86212:/^[[:alpha:]]/W
86213:/^[[:blank:]]/W
86214:/^[[:digit:]]/W
86215:/^[[:lower:]]/W
86216:/^[[:space:]]/W
86217:/^[[:upper:]]/W
86218:/^[[:word:]]/W
86219:/^[[:graph:]]/W
86220:/^[[:print:]]/W
86221:/^[[:punct:]]/W
# Negated variants
86230:/^[[:^alnum]][[:^alpha]][[:^blank]][[:^digit]][[:^lower]][[:^space]][[:^upper]][[:^word]][[:^graph]][[:^print]][[:^punct]]/8W
86231:/^[[:^alnum]][[:^alpha]][[:^blank]][[:^digit]][[:^lower]][[:^space]][[:^upper]][[:^word]][[:^graph]][[:^print]][[:^punct]]/W
# Classes where the POSIX class component disappears (fixed in PCRE 8.38)
86240:/^a[[:punct:]bc]/W
86241:/^a[[:punct:]bc]/8W
# Check [:print:] in UCP mode against U+180E, MONGOLIAN VOWEL
# SEPARATOR.
86242:/^[[:print:]]/8W
86243:/^[^[:print:]]/8W
# PCRE bug fixed in PCRE 8.40.
86244:/[\D\P{Nd}]+/8