mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-09-29 11:16:29 +03:00
hscollider: example test cases
This commit is contained in:
221
tools/hscollider/test_cases/pcre/utf8.txt
Normal file
221
tools/hscollider/test_cases/pcre/utf8.txt
Normal file
@@ -0,0 +1,221 @@
|
||||
80000:/空/8
|
||||
80001:/حرية/8
|
||||
80002:/[空]/8
|
||||
80003:/\x{7a7a}/8
|
||||
80004:/[\x{7a7a}]/8
|
||||
80005:/[空]/
|
||||
80006:/^./s8
|
||||
80007:/^./8
|
||||
80008:/æ/8
|
||||
80009:/\346/8
|
||||
80010:/Ā/8
|
||||
80011:/\700/8
|
||||
80012:/[ἀ-ῼ]/8
|
||||
80013:/[\x{2e18}-⸮]/8
|
||||
80014:/^空{2}/8
|
||||
80015:/^空{2}/
|
||||
# 2 char all
|
||||
80016:/^[\x{80}-\x{7ff}]/8
|
||||
80017:/\x80/8
|
||||
80018:/\x{80}/8
|
||||
80019:/[\x80]/8
|
||||
80020:/[\x{80}]/8
|
||||
80021:/\xff/8
|
||||
80022:/\x{ff}/8
|
||||
80023:/[\xff]/8
|
||||
80024:/[\x{ff}]/8
|
||||
# 80025:/\X/
|
||||
80026:/foo\Xbar/8P
|
||||
80100:/foo.bar/8
|
||||
80101:/foo.*bar/s8
|
||||
80102:/foo[a]*bar/8
|
||||
80103:/foo[^a]*bar/8
|
||||
80104:/foo.+bar/s8
|
||||
80105:/foo[a]+bar/8
|
||||
80106:/foo[^a]_bar/8
|
||||
80107:/foo.*/s8
|
||||
80108:/foo.+/s8
|
||||
80109:/foo.*bar/8
|
||||
80110:/[]a]/8
|
||||
84000:/I/8i
|
||||
84001:/k/8i
|
||||
84002:/S/8i
|
||||
84003:/i/8i
|
||||
84004:/K/8i
|
||||
84005:/s/8i
|
||||
84006:/µ/8i
|
||||
84007:/Å/8i
|
||||
84008:/ß/8i
|
||||
84009:/å/8i
|
||||
84010:/İ/8i
|
||||
84011:/ı/8i
|
||||
84012:/ſ/8i
|
||||
84013:/ͅ/8i
|
||||
84014:/Β/8i
|
||||
84015:/Ε/8i
|
||||
84016:/Θ/8i
|
||||
84017:/Ι/8i
|
||||
84018:/Κ/8i
|
||||
84019:/Μ/8i
|
||||
84020:/Π/8i
|
||||
84021:/Ρ/8i
|
||||
84022:/Σ/8i
|
||||
84023:/Φ/8i
|
||||
84024:/Ω/8i
|
||||
84025:/β/8i
|
||||
84026:/ε/8i
|
||||
84027:/θ/8i
|
||||
84028:/ι/8i
|
||||
84029:/κ/8i
|
||||
84030:/μ/8i
|
||||
84031:/π/8i
|
||||
84032:/ρ/8i
|
||||
84033:/ς/8i
|
||||
84034:/σ/8i
|
||||
84035:/φ/8i
|
||||
84036:/ω/8i
|
||||
84037:/ϐ/8i
|
||||
84038:/ϑ/8i
|
||||
84039:/ϕ/8i
|
||||
84040:/ϖ/8i
|
||||
84041:/ϰ/8i
|
||||
84042:/ϱ/8i
|
||||
84043:/ϴ/8i
|
||||
84044:/ϵ/8i
|
||||
84045:/Ṡ/8i
|
||||
84046:/ṡ/8i
|
||||
84047:/ẛ/8i
|
||||
84048:/ẞ/8i
|
||||
84049:/ι/8i
|
||||
84050:/Ω/8i
|
||||
84051:/K/8i
|
||||
84052:/Å/8i
|
||||
84053:/SS/8i
|
||||
84054:/SZ/8i
|
||||
84055:/ss/8i
|
||||
84056:/sz/8i
|
||||
84057:/DŽ/8i
|
||||
84058:/Dž/8i
|
||||
84059:/dž/8i
|
||||
84060:/LJ/8i
|
||||
84061:/Lj/8i
|
||||
84062:/lj/8i
|
||||
84063:/NJ/8i
|
||||
84064:/Nj/8i
|
||||
84065:/nj/8i
|
||||
84066:/Dz/8i
|
||||
84067:/dz/8i
|
||||
88000:/\p{Vai}/8
|
||||
88001:/\p{^Vai}/8
|
||||
88002:/\P{Vai}/8
|
||||
88003:/\P{^Vai}/8
|
||||
#test for somethings which are in multiple scripts
|
||||
88004:/\p{Common}/8
|
||||
88005:/\p{Mongolian}/8
|
||||
88006:/\p{Phags_Pa}/8
|
||||
88007:/\P{Common}/8
|
||||
88008:/\P{Mongolian}/8
|
||||
88009:/\P{Phags_Pa}/8
|
||||
#unallocated and common with the pcre twistr
|
||||
88010:/\p{Cn}/8
|
||||
88011:/\P{Cn}/8
|
||||
88012:/\p{Common}/8
|
||||
88013:/\p{^Common}/8
|
||||
#'caseless' properties
|
||||
88014:/\p{Ll}/8i
|
||||
88015:/\p{Lu}/8i
|
||||
88016:/\p{Lt}/8i
|
||||
88017:/\p{Lo}/8i
|
||||
#although not letters, some elements have cased variants like COMBINING GREEK
|
||||
# YPOGEGRAMMENI
|
||||
88018:/\p{Mn}/8i
|
||||
#check Han extensions and private areas
|
||||
88019:/\p{Co}/8
|
||||
88020:/\p{Lo}/8
|
||||
88021:/\p{Han}/8
|
||||
88022:/\P{Co}/8
|
||||
88023:/\P{Lo}/8
|
||||
88024:/\P{Han}/8
|
||||
#ucp mode
|
||||
88100:/\d/8
|
||||
88101:/\d/8W
|
||||
88102:/[\w]/8
|
||||
88103:/[\w]/8W
|
||||
88104:/\s/8
|
||||
88105:/\s/8W
|
||||
88106:/\S/8
|
||||
88107:/\S/8W
|
||||
88108:/\s/
|
||||
88109:/\s/W
|
||||
88110:/[\w]/
|
||||
88111:/[\w]/W
|
||||
88112:/[[:alnum:]]/8
|
||||
88113:/[[:alnum:]]/8W
|
||||
88114:/[[:alpha:]]/8
|
||||
88115:/[[:alpha:]]/8W
|
||||
88116:/[[:blank:]]/8
|
||||
88117:/[[:blank:]]/8W
|
||||
88118:/[[:digit:]]/8
|
||||
88119:/[[:digit:]]/8W
|
||||
88120:/[[:lower:]]/8
|
||||
88121:/[[:lower:]]/8W
|
||||
88122:/[[:lower:]]/8i
|
||||
88123:/[[:lower:]]/8Wi
|
||||
88124:/[[:upper:]]/8
|
||||
88125:/[[:upper:]]/8W
|
||||
88126:/[[:upper:]]/8i
|
||||
88127:/[[:upper:]]/8Wi
|
||||
88128:/[[:space:]]/8
|
||||
88129:/[[:space:]]/8W
|
||||
88130:/[[:word:]]/8
|
||||
88131:/[[:word:]]/8W
|
||||
88132:/[[:^word:]]/8
|
||||
88133:/[[:^word:]]/8W
|
||||
88134:/[[:graph:]]/8
|
||||
88135:/[[:graph:]]/8W
|
||||
88136:/[[:cntrl:]]/8
|
||||
88137:/[[:cntrl:]]/8W
|
||||
88138:/\h/8
|
||||
88139:/\h/8W
|
||||
88140:/\H/8
|
||||
88141:/\H/8W
|
||||
88142:/\v/8
|
||||
88143:/\v/8W
|
||||
88144:/\V/8
|
||||
88145:/\V/8W
|
||||
#boundaries
|
||||
89000:/foo\B.*\bbar/8WP
|
||||
89001:/\b/8
|
||||
89002:/\B/8
|
||||
89003:/\bfoo/8
|
||||
89004:/\bfoo/8WP
|
||||
89005:/foo\b/8
|
||||
89006:/foo\b/8WP
|
||||
89007:/\Bfoo/8
|
||||
89008:/\Bfoo/8WP
|
||||
89009:/foo\B/8
|
||||
89010:/foo\B/8WP
|
||||
89011:/\bô/8
|
||||
89012:/\bô/8WP
|
||||
89013:/ô\b/8
|
||||
89014:/ô\b/8WP
|
||||
89015:/\b﹎/8
|
||||
89016:/\b﹎/8WP
|
||||
89017:/﹎\b/8
|
||||
89018:/﹎\b/8WP
|
||||
89019:/K/8
|
||||
|
||||
# (*VERB) syntax
|
||||
89100:/(*UTF8)\x{7a7a}/
|
||||
89101:/(*UTF8)(*UCP)\w/
|
||||
89102:/(*UCP)\w/8
|
||||
|
||||
# Octal encoding!
|
||||
89103:/\o{75172}/8
|
||||
|
||||
# 4byte utf8
|
||||
89104:/𒀀/8
|
||||
|
||||
# more (*VERB) syntax
|
||||
89105:/(*UCP)(*UTF8)\w/
|
||||
89106:/(*UTF)(*UCP)\w/
|
Reference in New Issue
Block a user