mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-09-30 03:34:25 +03:00
hscollider: example test cases
This commit is contained in:
3
tools/hscollider/test_cases/pcre/accel.txt
Normal file
3
tools/hscollider/test_cases/pcre/accel.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
2000:/abcdef[^X].*[^X](A|B(CDE)?F)Y?foobar/s
|
||||
2001:/[^abab]+\Z/si
|
||||
2002:/mj(?:[pajl]|o)[tplh]thd.hq.b[frt]pk(\b)hb/i8L
|
174
tools/hscollider/test_cases/pcre/anchors.txt
Normal file
174
tools/hscollider/test_cases/pcre/anchors.txt
Normal file
@@ -0,0 +1,174 @@
|
||||
# A set of patterns that use anchors in unusual ways, also makes some use of
|
||||
# capturing groups.
|
||||
|
||||
13000:/(^)?foo/O
|
||||
13001:/(^)??foo/O
|
||||
13002:/(^|)foo/O
|
||||
13003:/(|^)foo/O
|
||||
13004:/(^|^)?foo/O
|
||||
13005:/(^||^)foo/O
|
||||
|
||||
13006:/foo($)?/
|
||||
13007:/foo($)??/
|
||||
13008:/foo($|)/
|
||||
13009:/foo(|$)/
|
||||
13010:/foo($||$)/
|
||||
|
||||
13011:/foo(\z)?/
|
||||
13012:/foo(\z)??/
|
||||
13013:/foo(\z|)/
|
||||
13014:/foo(|\z)/
|
||||
13015:/foo(\z||\z)/
|
||||
|
||||
13016:/(?:^|foo|)foo/O
|
||||
|
||||
13017:/(^)*foo/O
|
||||
13018:/(^)*?foo/O
|
||||
13019:/(^)+foo/O
|
||||
13020:/(^)+?foo/O
|
||||
13021:/(^){1}foo/O
|
||||
|
||||
# More anchoring tests from the rose anchoring work
|
||||
13100:/a\Z/siH
|
||||
13101:/a\Z/
|
||||
13102:/\Aa\Z/iH
|
||||
13103:/\A(\Ab\Z|a)/smi
|
||||
13104:/(a|b|b$)/
|
||||
13105:/(b.|cbd|[dbd]$)/si
|
||||
13106:/(^h|[qj]|m|a)$/
|
||||
|
||||
# Various trailing .* cases
|
||||
13107:/foobar.*\z/
|
||||
13108:/foobar.*\z/sO
|
||||
13109:/foobar.*\z/m
|
||||
13110:/foobar.*\z/smO
|
||||
|
||||
13111:/foobar.*\Z/
|
||||
13112:/foobar.*\Z/s
|
||||
13113:/foobar.*\Z/m
|
||||
13114:/foobar.*\Z/sm
|
||||
|
||||
13115:/foobar.*$/
|
||||
13116:/foobar.*$/s
|
||||
13117:/foobar.*$/m
|
||||
13118:/foobar.*$/sm
|
||||
|
||||
13119:/foobar.*($|)/
|
||||
13120:/foobar.*($|)/s
|
||||
13121:/foobar.*($|)/m
|
||||
13122:/foobar.*($|)/sm
|
||||
|
||||
13123:/foobar.*(\z)?/
|
||||
13124:/foobar.*(\z)?/s
|
||||
13125:/foobar.*(\z)?/m
|
||||
13126:/foobar.*(\z)?/sm
|
||||
|
||||
# Some alternation variants of the above
|
||||
13127:/(foo|bar).*\z/
|
||||
13128:/(foo|bar).*\z/sO
|
||||
13129:/(foo|bar).*\z/m
|
||||
13130:/(foo|bar).*\z/smO
|
||||
|
||||
13131:/(foo|bar).*\Z/
|
||||
13132:/(foo|bar).*\Z/s
|
||||
13133:/(foo|bar).*\Z/m
|
||||
13134:/(foo|bar).*\Z/sm
|
||||
|
||||
13135:/(foo|bar).*$/
|
||||
13136:/(foo|bar).*$/s
|
||||
13137:/(foo|bar).*$/m
|
||||
13138:/(foo|bar).*$/sm
|
||||
|
||||
13139:/(foo|bar).*($|)/
|
||||
13140:/(foo|bar).*($|)/s
|
||||
13141:/(foo|bar).*($|)/m
|
||||
13142:/(foo|bar).*($|)/sm
|
||||
|
||||
13143:/(foo|bar).*(\z)?/
|
||||
13144:/(foo|bar).*(\z)?/s
|
||||
13145:/(foo|bar).*(\z)?/m
|
||||
13146:/(foo|bar).*(\z)?/sm
|
||||
|
||||
# Rose support for .{N,}$
|
||||
13147:/abc.+$/s
|
||||
13148:/abc.{2,}$/s
|
||||
13149:/abc.{3,}$/s
|
||||
13150:/abc.{10,}$/s
|
||||
13151:/abc.{150,}$/s
|
||||
|
||||
# More bounded and escaped EOD fun
|
||||
13152:/abc.{2,}\z/s
|
||||
13153:/abc.{2,5}\z/s
|
||||
13154:/abc.{2,50}\z/s
|
||||
13155:/abc.{2,5}$/s
|
||||
13156:/abc.{2,50}$/s
|
||||
13157:/abc[^xyz]{2,}\z/s
|
||||
13158:/abc[^xyz]{2,5}\z/s
|
||||
13159:/abc[^xyz]{2,50}\z/s
|
||||
13160:/abc[^xyz]{2,}$/s
|
||||
13161:/abc[^xyz]{2,5}$/s
|
||||
13162:/abc[^xyz]{2,50}$/s
|
||||
|
||||
# ... fun
|
||||
13163:/.\z/s
|
||||
13164:/.$/s
|
||||
13165:/^.+\z/s
|
||||
13166:/..\z/s
|
||||
|
||||
13167:/^.{50}foo.bar/s
|
||||
|
||||
# Anchored literals
|
||||
13168:/^(.*\.)?trailer\z/s
|
||||
13169:/^trailer\z/m
|
||||
13170:/^(.*\.)?trailer$/s
|
||||
13171:/^trailer$/m
|
||||
|
||||
13172:/\Ap\z|s|\A|n/
|
||||
13173:/\Ap\z|s|\A|\Ab|n/
|
||||
|
||||
# Bi-anchored, non cyclic outfix
|
||||
13174:/^\w{200}$/
|
||||
|
||||
# Leading anchored dots
|
||||
13200:/^.foo/sO
|
||||
13201:/^.{1}foo/sO
|
||||
13202:/^.{2}foo/sO
|
||||
13203:/^.{30}foo/sO
|
||||
13204:/^.{1,30}foo/sO
|
||||
13205:/^.{20,}foo/sO
|
||||
13206:/^.{20,30}foo/sO
|
||||
13207:/^.{0,20}foo/sO
|
||||
13208:/^.{1,}foo/sO
|
||||
13209:/^.?foo/sO
|
||||
13210:/^.+foo/sO
|
||||
13211:/^.*foo/sO
|
||||
13212:/^.{4,20}foo|bar/sO
|
||||
13213:/^(.{4,20}foo)|bar/sO
|
||||
13214:/^(.{4,20}foo)|^bar/sO
|
||||
13215:/^.{20,30}[^f][^o][^o]/sO
|
||||
13216:/^.{0,}foo/sO
|
||||
13217:/^.{20,30}a/sO
|
||||
13218:/^[\x00-\xff]+/s
|
||||
13219:/^..foo/sO
|
||||
13220:/^..*foo/sO
|
||||
13221:/^..*.foo/sO
|
||||
13222:/^.*.foo/sO
|
||||
13223:/^..+foo/sO
|
||||
13224:/^..+.foo/sO
|
||||
13225:/^.+.foo/sO
|
||||
13226:/..foo/sO
|
||||
13227:/..*foo/sO
|
||||
13228:/..*.foo/sO
|
||||
13229:/.*.foo/sO
|
||||
13230:/..+foo/sO
|
||||
13231:/..+.foo/sO
|
||||
13232:/.+.foo/sO
|
||||
13233:/(^.*.bar)?foo/sO
|
||||
13234:/(^...bar)?foo/sO
|
||||
13235:/((^....*bar)|(.._))foo/s
|
||||
13236:/^.{0,20}foo[a-z]/sO
|
||||
13237:/^.{0,20}fo[oO]/sO
|
||||
13238:/^.(.{0,2}|.{0,2})foo/sO
|
||||
13240:/(a|aaaa{4,}|^(a|[^a]|a)?.a[^a]*a)$/
|
||||
13241:/^.{600,}/s
|
||||
13242:/.{600,}/s
|
25
tools/hscollider/test_cases/pcre/approximate_matching.txt
Normal file
25
tools/hscollider/test_cases/pcre/approximate_matching.txt
Normal file
@@ -0,0 +1,25 @@
|
||||
# patterns that should produce matches
|
||||
40000:/^(012)*test$/ms{edit_distance=1}
|
||||
40001:/[^k]{3}$/ms{edit_distance=2}
|
||||
40002:/^test$/ms{edit_distance=1}
|
||||
40003:/^test/s{edit_distance=2}
|
||||
40004:/test/s{edit_distance=2}
|
||||
40005:/test|^lit/s{edit_distance=2}
|
||||
40006:/tta+tt/s{edit_distance=2}
|
||||
40007:/^ab(..)+bc/s{edit_distance=2}
|
||||
40008:/a( bc|d)e/s{edit_distance=1}
|
||||
40009:/abc|def$/s{edit_distance=2}
|
||||
40010:/abc|def$/ms{edit_distance=2}
|
||||
|
||||
40050:/^(012)*test$/ms{hamming_distance=1}
|
||||
40051:/[^k]{3}$/ms{hamming_distance=2}
|
||||
40052:/^test$/ms{hamming_distance=1}
|
||||
40053:/^test/s{hamming_distance=2}
|
||||
40054:/test/s{hamming_distance=2}
|
||||
40055:/test|^lit/s{hamming_distance=2}
|
||||
40056:/tta+tt/s{hamming_distance=2}
|
||||
40057:/^ab(..)+bc/s{hamming_distance=2}
|
||||
40058:/a( bc|d)e/s{hamming_distance=1}
|
||||
40059:/abc|def$/s{hamming_distance=2}
|
||||
40060:/abc|def$/ms{hamming_distance=2}
|
||||
40061:/^a?/m{hamming_distance=1}
|
86
tools/hscollider/test_cases/pcre/asserts.txt
Normal file
86
tools/hscollider/test_cases/pcre/asserts.txt
Normal file
@@ -0,0 +1,86 @@
|
||||
23000:/foo.*\bbar/s
|
||||
23001:/foo\b.*bar/s
|
||||
23002:/\bfoo/
|
||||
23003:/\Bfoo/O
|
||||
23004:/(word|nonword~).*\balpha/s
|
||||
23005:/\B~/
|
||||
23006:/foo\b/
|
||||
23007:/foo\B/
|
||||
23008:/foo\b$/
|
||||
23009:/foo\b\z/O
|
||||
23010:/foo.*\b\bbar/s
|
||||
23011:/foo.*\b\b\bbar/s
|
||||
23012:/foo\b.*\bbar/s
|
||||
23013:/foo\B.*\Bbar/s
|
||||
23014:/foo\b.*\Bbar/s
|
||||
23015:/\b\bfoo/
|
||||
23016:/\B\Bfoo/O
|
||||
23017:/\B\B~/
|
||||
23018:/foo\b\b/
|
||||
23019:/foo\B\B/
|
||||
23020:/foo\b\b$/
|
||||
23021:/foo\b\b\z/O
|
||||
23022:/a[b~]+\b/
|
||||
23023:/\b[b~]+a/
|
||||
23024:/\ba/
|
||||
23025:/\b/
|
||||
23026:/\B/
|
||||
23027:/\b\z/
|
||||
23028:/\B\z/
|
||||
23029:/^\b/
|
||||
23030:/^\B/
|
||||
23031:/(a|\A\b)/
|
||||
23032:/(^a|\b)/
|
||||
23033:/^\bfoo/O
|
||||
23034:/^\Bfoo/O
|
||||
23035:/^\b\bfoo/O
|
||||
23036:/^\B\Bfoo/O
|
||||
23037:/foo.*(\b|\B)bar/O
|
||||
23038:/three.*\b\b\basserts/
|
||||
23039:/three.*\B\B\Basserts/
|
||||
23040:/can't_match\b\B/O
|
||||
23041:/\b\Bcan't_match/O
|
||||
23042:/\b(.*)\b/s
|
||||
23043:/\b(foo|bar|baz)\b/
|
||||
|
||||
# More tests: repeats
|
||||
23044:/((\b){2,})+/
|
||||
23045:/((\b){10,})+/
|
||||
23046:/((\b|a){2,})+/
|
||||
23047:/(\b[a-f]\b)+/
|
||||
23048:/(((\b[a-f]+\b) ?))+/
|
||||
|
||||
# multiline bi-anchored boundaries
|
||||
23049:/^(\B)/m
|
||||
23050:/^(\b)/m
|
||||
23051:/^\b$/m
|
||||
23052:/^\b\Z/m
|
||||
23053:/^\b\z/mO
|
||||
23054:/\A\b$/m
|
||||
23055:/\A\b\Z/m
|
||||
23056:/\A\b\z/m
|
||||
23057:/^\B$/m
|
||||
23058:/^\B\Z/m
|
||||
23059:/^\B\z/m
|
||||
23060:/\A\B$/m
|
||||
23061:/\A\B\Z/m
|
||||
23062:/\A\B\z/m
|
||||
23063:/\b.*\b/s
|
||||
23064:/\B.*\B/s
|
||||
23065:/\b.*\B/s
|
||||
23066:/\B.*\b/s
|
||||
23067:/\b.+\b/s
|
||||
23068:/\B.+\B/s
|
||||
23069:/\b.+\B/s
|
||||
23070:/\B.+\b/s
|
||||
23071:/l(\B.)*/i
|
||||
23072:/(a?.\b){4,}bbabb/
|
||||
23073:/\A\B/
|
||||
23074:/\A\b/
|
||||
|
||||
23075:/\b[a-f]+\b/
|
||||
|
||||
# Asserts near repeats.
|
||||
23076:/godzilla\b.{0,10}mothra/s
|
||||
23077:/godzilla.{0,10}\bmothra/s
|
||||
23078:/godzilla\b.{0,10}\bmothra/s
|
34
tools/hscollider/test_cases/pcre/benefits.txt
Normal file
34
tools/hscollider/test_cases/pcre/benefits.txt
Normal file
@@ -0,0 +1,34 @@
|
||||
19501:/a..b/sO
|
||||
19502:/a..bc/sO
|
||||
19503:/ab..c/sO
|
||||
19504:/ab..c../sO
|
||||
19505:/ab..c../siO
|
||||
19506:/.ab..c../siO
|
||||
19507:/abcdefgh....abcd....efgh/sO
|
||||
19508:/foo..ba/sO
|
||||
19509:/foo..ba\z/sO
|
||||
|
||||
# Longer masks
|
||||
19510:/[a-f]{3}-[a-f]{3}-[a-f]{4}/
|
||||
|
||||
# Mixed-sensitivity literals can use masks.
|
||||
19511:/ab(?i)cdef(?-i)ghi/
|
||||
|
||||
# Fixed width pattern that's too long to use a benefits mask.
|
||||
19512:/a{10}b{10}[Cc]{10}d{10}/
|
||||
|
||||
# Mixed sensitivity and dot mask
|
||||
19513:/f[Yy][mPiU].W/
|
||||
|
||||
# Short mixed-case literals to stress literal matchers
|
||||
19514:/(?i)godzill(?-i)a/
|
||||
19515:/g(?i)odzilla/
|
||||
19516:/G(?i)odzilla/
|
||||
19517:/(?i)god(?-i)z(?i)illa/
|
||||
19518:/g(?i)od(?-i)z(?i)illa/
|
||||
|
||||
# Masks and mixed-case
|
||||
19519:/^nqt(?-i)qkf{14}bdr+k.t(?i)r[cp]q{3}\z/is
|
||||
|
||||
# More mixed-case
|
||||
19520:/[Bb][Cc][Aa][CDc]abaaEbcd/
|
30
tools/hscollider/test_cases/pcre/capturing.txt
Normal file
30
tools/hscollider/test_cases/pcre/capturing.txt
Normal file
@@ -0,0 +1,30 @@
|
||||
25000:/((e[dcb]ce(\B)a|.*b(d.|d|c|.|ad)ad[acd]b|a{4,10}|d)[cd]|ba[bcbc].b.ebcd{0,})/m
|
||||
25001:/(a[^aaaa]aaaaa..aaa{0,}|.aaa)/
|
||||
25002:/(\A[aab]{1}|^a.|[aca](\B)baacccc(\b).((b|a)){1,}|aa[bc]cc.\z|\A[aa].cabc.|a(b|[bca]|c|c))/s
|
||||
25003:/aaaaaa(a?|aaaa|a|.?)a+(.a.a.(\b)([aaa]|[aa]|.)aaaaa{4,}[aaa]aaa(a|[aa]))?/mi
|
||||
25004:/(aaa{14,16}aa.a.a|[aaaa]aaa[aa]|(aa|a|a|[aaaa]|a)a[^aaaa]aaaaa)/smi
|
||||
25005:/((aaaa[^\n]*aaaaa)|(aa)|(aa))/s
|
||||
25006:/((abaaa.+a)|(aa))/s
|
||||
25007:/[^p]b{3,20}/
|
||||
25008:/((acbddbdcccc)|(cc))/s
|
||||
25009:/(aa(a|a|a|a|a|[aa])){1,7}/s
|
||||
25010:/aa(a|a+)/s
|
||||
25011:/bba*/i
|
||||
25012:/hg[hkjs]*/s
|
||||
25013:/ke+/i
|
||||
25014:/.(a|.)a?/m
|
||||
25015:/(\b)../i
|
||||
25016:/../
|
||||
25017:/a/s
|
||||
25018:/(a{5}$|(a|[aaa]|[aaaa])aaa[^aaa])/sm
|
||||
25019:/(\B)..a*/i
|
||||
25020:/(.bb|bbaababb|\Aba.*|b?(b|[ab]|a)[aab]b([ba]b?|bb)|\Abaaaaaa(a|a|.))/smi
|
||||
25021:/foobar/
|
||||
25022:/foobar/i
|
||||
25023:/foo_bar/
|
||||
25024:/foo_bar/i
|
||||
25025:/[fg]oobar/
|
||||
25026:/[fg]oobar/i
|
||||
25027:/f[0o]obar/i
|
||||
25028:/foo(?i)bar/
|
||||
25029:/[Ff]oobar/
|
174
tools/hscollider/test_cases/pcre/charclass.txt
Normal file
174
tools/hscollider/test_cases/pcre/charclass.txt
Normal file
@@ -0,0 +1,174 @@
|
||||
# All the generic character types
|
||||
11000:/\d\D\h\H\s\S\v\V\w\W/
|
||||
|
||||
# All the following should be equivalent to a dot (type plus non-type)
|
||||
11001:/[\d\D]/
|
||||
11002:/[\h\H]/
|
||||
11003:/[\s\S]/
|
||||
11004:/[\v\V]/
|
||||
11005:/[\w\W]/
|
||||
|
||||
# POSIX classes
|
||||
11006:/[[:alnum:]][[:alpha:]][[:ascii:]][[:blank:]][[:cntrl:]][[:digit:]][[:graph:]]/
|
||||
11007:/[[:lower:]][[:print:]][[:punct:]][[:space:]][[:upper:]][[:word:]][[:xdigit:]]/
|
||||
11008:/[01[:alpha:]%]/
|
||||
# Perl extension: negation of posix classes. This matches 1, 2 or any non-digit.
|
||||
11009:/[12[:^digit:]]/
|
||||
|
||||
# Ranges
|
||||
11010:/[A-F][a-f][0-9][a-f5-7][^t-z][^a-f0-9]/
|
||||
11011:/[^^][^\n]/
|
||||
11012:/[\000-\037]/
|
||||
11013:/[W-c]/i
|
||||
11014:/[^\W_]/
|
||||
|
||||
# Literal 'dash' characters
|
||||
11015:/[\s-\w]/
|
||||
11016:/[A-]/O
|
||||
11017:/[A-Z-]/
|
||||
11018:/[a-f0-]/O
|
||||
|
||||
# \b is a backspace in a character class (hex 08)
|
||||
11020:/[\b]/O
|
||||
|
||||
# Dollar and dot have no special meaning inside a class
|
||||
11021:/[.][$]/O
|
||||
|
||||
# Closing square brackets can be unescaped if they are the first element in a
|
||||
# class (scary!)
|
||||
11022:/[]a]/O
|
||||
11023:/[^]a]/
|
||||
11024:/[]-_]/O
|
||||
|
||||
# \R and \X are the literal chars R and X (unlike outside a class)
|
||||
# 11025:/[\R\X]/O
|
||||
|
||||
# Others
|
||||
11100:/[\]]/O
|
||||
11101:/[]]/O
|
||||
11102:/[\[\]]/O
|
||||
11103:/[\^]/O
|
||||
11104:/[+--]/O
|
||||
11105:/[--A]/
|
||||
11106:/[A-C-E-G]/O
|
||||
11107:/[A-C-E]/O
|
||||
11108:/[\0]/O
|
||||
11109:/[\40]/O
|
||||
11110:/[\040]/O
|
||||
11111:/[\0400]/O
|
||||
11112:/[\0401]/O
|
||||
#hint: equiv to [18\x00]
|
||||
11113:/[\81]/O
|
||||
11114:/[\xg]/O
|
||||
11115:/[\x31]/O
|
||||
11116:/[\x{31}]/O
|
||||
11117:/[\x{0000000000000031}]/O
|
||||
11118:/[\x{31g}]/O
|
||||
11119:/[\x{foo}-~]/O
|
||||
11120:/[\x00-\x{31g}]/O
|
||||
11121:/[[:foo]/O
|
||||
11122:/[\Q^\Ea]/O
|
||||
11123:/[\Qa]\E]/O
|
||||
11124:/[\Q\Q\E]/O
|
||||
11125:/[\E]]/O
|
||||
11126:/[\x{31]/O
|
||||
11127:/[^e]/i
|
||||
11128:/[(?i)a]/O
|
||||
|
||||
# These classes with escaped '-' chars in them aren't ranges
|
||||
11200:/[\x20\x2d\x5f]/O
|
||||
11201:/[A\x2dZ]/O
|
||||
11202:/[A\055Z]/O
|
||||
11203:/[-\x2da]/O
|
||||
11204:/[\x20\x2d-]/O
|
||||
|
||||
# Is a range (dash-to-\x5f)
|
||||
11205:/[\x2d-\x5f]/s
|
||||
|
||||
# void character classed
|
||||
11300:/[^\x00-\xff]/O
|
||||
11301:/[^\x00-\xff]foo/O
|
||||
11302:/[^\x00-\xff]foo|bar/O
|
||||
11303:/^[^\x00-\xff]foo/O
|
||||
11304:/foo[^\x00-\xff]/O
|
||||
11305:/foo[^\x00-\xff]$/O
|
||||
11306:/baz.*([^\x00-\xff]foo|bar).*baz/O
|
||||
|
||||
# we had a buggy defn of xdigit (as '[0-9a-fA-Z]')
|
||||
11307:/[[:xdigit:]]/
|
||||
|
||||
# test negation of all the POSIX classes
|
||||
11308:/[[:^alnum:]][[:^alpha:]][[:^ascii:]][[:^blank:]][[:^cntrl:]][[:^digit:]][[:^graph:]]/
|
||||
11309:/[[:^lower:]][[:^print:]][[:^punct:]][[:^space:]][[:^upper:]][[:^word:]][[:^xdigit:]]/
|
||||
|
||||
# test cntrl against DEL
|
||||
11310:/[[:cntrl:]]/
|
||||
|
||||
# \C should just be the literal char C
|
||||
#11311:/[\C]/O
|
||||
|
||||
11312:/[[:alnum:]]/
|
||||
11313:/[[:^alnum:]]/
|
||||
11314:/[[:alpha:]]/
|
||||
11315:/[[:^alpha:]]/
|
||||
11316:/[[:ascii:]]/
|
||||
11317:/[[:^ascii:]]/
|
||||
11318:/[[:blank:]]/
|
||||
11319:/[[:^blank:]]/
|
||||
11320:/[[:cntrl:]]/
|
||||
11321:/[[:^cntrl:]]/
|
||||
11322:/[[:digit:]]/
|
||||
11323:/[[:^digit:]]/
|
||||
11324:/[[:graph:]]/
|
||||
11325:/[[:^graph:]]/
|
||||
11326:/[[:lower:]]/
|
||||
11327:/[[:^lower:]]/
|
||||
11328:/[[:print:]]/
|
||||
11329:/[[:^print:]]/
|
||||
11330:/[[:space:]]/
|
||||
11331:/[[:^space:]]/
|
||||
11332:/[[:upper:]]/
|
||||
11333:/[[:^upper:]]/
|
||||
11334:/[[:xdigit:]]/
|
||||
11335:/[[:^xdigit:]]/
|
||||
11339:/[[:punct:]]/
|
||||
11340:/[[:^punct:]]/
|
||||
|
||||
# Oddities
|
||||
11336:/[\f]/
|
||||
11337:/[\a]/
|
||||
11338:/[\e]/
|
||||
|
||||
# Unterminated classes.
|
||||
11341:/[ab-]/
|
||||
11342:/[ab-\Q\E]/
|
||||
|
||||
# More negated POSIX classes and interaction with mode bits.
|
||||
11343:/[[:^upper:]]/i
|
||||
11344:/[[:^lower:]]/i
|
||||
11345:/[^A-Z]/i
|
||||
11346:/[^a-z]/i
|
||||
|
||||
# More unterminated classes.
|
||||
11347:/[ab-]/8
|
||||
11348:/[ab-\Q\E]/8
|
||||
|
||||
# Some classes that look a little similar to POSIX collating classes, but
|
||||
# aren't.
|
||||
11349:/[.abc=] [=abc.] [^=abc=] [^.abc.]/
|
||||
|
||||
# Bug in PCRE versions before 8.38 (see PCRE issue #1697).
|
||||
11350:/[\W\p{Any}]/
|
||||
|
||||
# Bug in PCRE versions before 8.38 (see PCRE issue #1717).
|
||||
11351:/a[[:punct:]b]/W
|
||||
11352:/[[:^graph:]a]/W
|
||||
|
||||
# Bug in PCRE versions before 8.38 (see PCRE issue #1732).
|
||||
11353:/[^[:alpha:][:^cntrl:]]/8W
|
||||
|
||||
# Bug in PCRE versions before 8.38 (see PCRE issue #1719).
|
||||
11354:/[[:^ascii:][:alnum:]a]/8W
|
||||
|
||||
# Bug in PCRE < 8.38 to do with [:punct:] in UCP mode (see PCRE issue #1718).
|
||||
11355:/[[:punct:]]/8W
|
9
tools/hscollider/test_cases/pcre/comp.txt
Normal file
9
tools/hscollider/test_cases/pcre/comp.txt
Normal file
@@ -0,0 +1,9 @@
|
||||
15800:/^.*(foo.*bar|baz.*baz)/sO
|
||||
15801:/^.*(foo.*bar|baz.*baz)a/s
|
||||
15802:/^.{1,6}(foo.*bar|baz.*baz)/sO
|
||||
15803:/.{1,6}(foo.*bar|baz.*baz)/sO
|
||||
15804:/.{1,6}(foo.*bar|........baz.*baz)/sO
|
||||
15805:/^.{1,}(foo.*bar|........baz.*baz)/sO
|
||||
15806:/anorak.*trainspotter|ANORAK.*trainspotter/iO
|
||||
15807:/anorak.*trainspotter/iO
|
||||
15808:/anorak.*trainspotter\z|ANORAK.*trainspotter/iO
|
10
tools/hscollider/test_cases/pcre/comptree.txt
Normal file
10
tools/hscollider/test_cases/pcre/comptree.txt
Normal file
@@ -0,0 +1,10 @@
|
||||
1000:/(foo){2,}bar/
|
||||
1001:/(foo){2,3}bar/
|
||||
1002:/^(foo){2,}bar/
|
||||
1003:/^(foo){2,3}bar/
|
||||
1004:/x?(foo){2,}bar/
|
||||
1005:/x?(foo){2,3}bar/
|
||||
1006:/(foo){2,}bar/{min_length=10}
|
||||
1007:/(foo){2,3}bar/{min_length=10}
|
||||
1008:/(foo){2,}bar/L
|
||||
1009:/(foo){2,3}bar/L
|
119
tools/hscollider/test_cases/pcre/extparams.txt
Normal file
119
tools/hscollider/test_cases/pcre/extparams.txt
Normal file
@@ -0,0 +1,119 @@
|
||||
16200:/foo.*bar/{min_offset=1}
|
||||
16201:/foo.*bar/{min_offset=10}
|
||||
16202:/foo.*bar/{max_offset=10}
|
||||
16203:/foo.*bar/{min_offset=10,max_offset=10}
|
||||
16204:/foo.*bar/{min_offset=10,max_offset=15}
|
||||
16205:/a.*b/{min_length=5}
|
||||
16206:/a.*b/{min_offset=10,min_length=3}
|
||||
16207:/preamble.*q/{max_offset=30}
|
||||
16208:/preamble.*q/{min_length=12}
|
||||
16209:/aa[^a]+aa/{max_offset=10}
|
||||
16210:/[0-9]{32,}/{max_offset=48}
|
||||
16211:/\x04\S+/{max_offset=17,min_length=17}
|
||||
16212:/[^x]+/{min_offset=10,max_offset=10}
|
||||
16213:/[^x]+x/{min_length=20}
|
||||
16214:/foo.*/{min_length=10}
|
||||
16215:/foo.+/{min_length=10}
|
||||
16216:/^hatstand.*teakettle/{min_length=25}
|
||||
16217:/hatstand/{min_offset=10,max_offset=15}
|
||||
16218:/[abcdef]{3}/{min_offset=5,max_offset=10}
|
||||
|
||||
# Test min_length with one cyclic -> bounded repeat transform.
|
||||
16219:/foo.*bar/{min_length=10}
|
||||
16220:/foo.+bar/{min_length=10}
|
||||
16221:/.*./{min_length=4}
|
||||
16222:/.+/{min_length=4}
|
||||
16223:/^.*hatstand/{min_length=20}
|
||||
16224:/^.+hatstand/{min_length=20}
|
||||
16225:/hatstand.*/{min_length=20}
|
||||
|
||||
# Unnecessary min_length
|
||||
16226:/long cat is lo+ng/{min_length=8}
|
||||
|
||||
# Be wary of assertions.
|
||||
16227:/(\B|\Al)/smiV{min_offset=1,max_offset=10}
|
||||
|
||||
# Anchoring via max_offset.
|
||||
16228:/rascal/{max_offset=6}
|
||||
|
||||
# Be wary of vacuous patterns.
|
||||
16229:/(..v[xmdf]wn\b)*/V{max_offset=27}
|
||||
|
||||
16230:/abc([^a]|ab|a[^b]c)d+ef/{max_offset=30,min_length=10}
|
||||
|
||||
# Some alternations are disallowed by min_length. The next two also have $
|
||||
# metachars, which invoke -1 offset adjustment.
|
||||
16231:/(^g$|k\z|egs$|t)/{min_length=1}
|
||||
16232:/(h|.ab$)/{min_length=2}
|
||||
16233:/(abc|abcd|abcde|abcdef)/{min_length=5}
|
||||
|
||||
# Some highlander optimisations play merry hell with min_length.
|
||||
16234:/kn[er]{2,10}/sH{min_length=5}
|
||||
|
||||
# Mixed anchored/unanchored pattern with offsets.
|
||||
16235:/(\As|^c|z|[ycld]|^.)/s{min_offset=4,max_offset=22}
|
||||
|
||||
# More highlander shenanigans.
|
||||
16236:/h|z|w|efp./H{min_offset=4}
|
||||
|
||||
# \b or \B offset adjustments can cause trouble.
|
||||
16237:/\A(g|l|(\b)|[wfse]|^[wc])/s{min_length=1,max_offset=4}
|
||||
16238:/g(\B)/sV{max_offset=2}
|
||||
|
||||
# min_length with virtual starts (multiline)
|
||||
16239:/^p{1,}/m{min_length=5,max_offset=15}
|
||||
|
||||
# vacuous edges that need to to away.
|
||||
16240:/c?/V{min_length=1,min_offset=4}
|
||||
|
||||
# min_length -> bounded repeat for a trailing cyclic.
|
||||
16241:/ykmy[^kaib]g*/{min_length=8}
|
||||
|
||||
# Alternation that stresses our min_length/max offset transforms.
|
||||
16242:/\Aq[rgm]h+|z/i{min_length=8,max_offset=20}
|
||||
|
||||
16243:/cj.wjn*v?/{min_length=9,min_offset=4}
|
||||
|
||||
# min_length transformation with an offset-adjusted report.
|
||||
16244:/qye.+ys(\B)/si{min_length=7}
|
||||
|
||||
# More word-boundary tests.
|
||||
16245:/\bfoo/{min_offset=2,max_offset=10}
|
||||
16246:/foo\b/{min_offset=2,max_offset=10}
|
||||
16247:/\bfoo\b/{min_offset=2,max_offset=10}
|
||||
16248:/\bfoo\b.*\bbar\b/{min_offset=2,max_offset=15}
|
||||
16249:/\bfoo\b.*\bbar\b/{min_length=10}
|
||||
16250:/\bfoo\b.*\bbar\b/{max_offset=15}
|
||||
|
||||
# highlander + min_offset puff/lbr tests.
|
||||
16251:/.{50}/H{min_offset=51}
|
||||
16252:/.{50}/H{min_offset=52}
|
||||
16253:/.{50}/H{min_offset=53}
|
||||
16254:/.{50,}/H{min_offset=51}
|
||||
16255:/.{50,}/H{min_offset=52}
|
||||
16256:/.{50,}/H{min_offset=53}
|
||||
16257:/aaa.{50}/H{min_offset=54}
|
||||
16258:/aaa.{50}/H{min_offset=55}
|
||||
16259:/aaa.{50}/H{min_offset=56}
|
||||
16260:/aaa.{50,}/H{min_offset=54}
|
||||
16261:/aaa.{50,}/H{min_offset=55}
|
||||
16262:/aaa.{50,}/H{min_offset=56}
|
||||
|
||||
# unnecessary min_length
|
||||
16263:/unambiguous/{min_length=11}
|
||||
|
||||
# cases with prunable paths
|
||||
16264:/^a|g/m{min_offset=10,max_offset=16}
|
||||
16265:/^foo|jabberwocky|apple2|foo.*bar/{max_offset=6}
|
||||
16266:/^(a{8}|b{9}|c{10})|floating/{min_offset=10}
|
||||
|
||||
# some more cases to stress small block analyses
|
||||
16267:/abcdef.{5,}/s{min_offset=20}
|
||||
16268:/abcdef./s{min_offset=20}
|
||||
16269:/abcdef../s{min_offset=20}
|
||||
16270:/abcdef/sH{min_offset=10,max_offset=20}
|
||||
16271:/abcdef/s{min_offset=10,max_offset=10}
|
||||
16272:/abcdef../s{min_offset=10,max_offset=10}
|
||||
|
||||
# several things at once
|
||||
16273:/^[^dj].b$/sH8{min_length=9,max_offset=19}
|
37
tools/hscollider/test_cases/pcre/highlander.txt
Normal file
37
tools/hscollider/test_cases/pcre/highlander.txt
Normal file
@@ -0,0 +1,37 @@
|
||||
15300:/foo/HO
|
||||
15301:/foo.*bar/sHO
|
||||
15302:/foo.*bar/HO
|
||||
15303:/foo[^X]{15}/sHO
|
||||
15304:/foo[^X]{16}/sHO
|
||||
15305:/foo[^X]{17}/sHO
|
||||
15306:/foo.*[^X]{17}/sH
|
||||
15307:/foo[^X]{17}blah/sHO
|
||||
15308:/foo[^XY]{17}/sHO
|
||||
15309:/foo[^X]{17}$/sH
|
||||
15310:/[^X]{17}/sH
|
||||
15311:/^[^X]{17}/sHO
|
||||
15312:/fo.*o[^X]{15}/sH
|
||||
15313:/fo.*o[^X]{16}/sH
|
||||
15314:/fo.*o[^X]{17}/sH
|
||||
15315:/[fb][oa][or][^X]{15}/sH
|
||||
15316:/[fb][oa][or][^X]{16}/sH
|
||||
15317:/[fb][oa][or][^X]{17}/sH
|
||||
15318:/foo|bar/HO
|
||||
# for people who are unsure of highlander semantics
|
||||
15319:/foo.|bar./HO
|
||||
15320:/^(a[^aa]..aa|a*)/mHV
|
||||
|
||||
# highlander pruning
|
||||
15321:/(foo.*bar)|(foo.*bar.*baz)/H
|
||||
15322:/foo.*bar(.*baz)?/H
|
||||
15323:/foo.*bar+/H
|
||||
15324:/(foo.*bar)|(foo.*bar.*baz)|(foo.*bar.*eod\z)/H
|
||||
15325:/foo.*eod+\z/H
|
||||
15326:/foo.*eod+$/H
|
||||
|
||||
# SEP patterns
|
||||
15327:/[a-f]/iH
|
||||
15328:/a|b|c/H
|
||||
|
||||
# Longer, run-prone literals
|
||||
15329:/(p{100})|(q{100})/H
|
7
tools/hscollider/test_cases/pcre/longlits.txt
Normal file
7
tools/hscollider/test_cases/pcre/longlits.txt
Normal file
@@ -0,0 +1,7 @@
|
||||
# Alternation of ten long literals.
|
||||
50000:/ukdnsybecoqlszhxiwfcpvmnoqobdfuoovmotdeefiwdoxukyxldjxthcmnxqebsiyvwwtadafmibpxnwuxqtpcndwzaiwurnvbkgzpfutisauyagfwacajrcmjlgmomzdakzjgpgnlepnjcynzhptporgjcrjkrnhvnucgvjjgfboisxjfaywypljihrstqvmwsqdvq|ihcntajbgquaruyfimiabusvmmqcpaxpowhhucnzlpfxzmmbcqahdmposiymqscqugtmictrnomnccfcdxzlksyuqkbjvgekaebwmcmzydebtltpcfbmckvwoqtinlplzopauzkcyiinbcjrfjkncggcowuifvwvoavxrkuaxuwjhnnyotkgbrkggwkafzvzmkgijnsr|vtymzjxeeyazemvcwsvcacdzihfbgiaqwjxmcncgdzafmhtvbvnmjrpfudnflcvfbkwcfsmdfaqtawqqcbigfrnjzwrdvndstesayfgjsiofshzvtabtgblrgbksqechctlngykpladacvwiffqwjktuosdjvdonjixekrlvvxeeqenylwjgqicdpgjhojsyyhuhtphc|oxqvlanpjsnxnoodzpjhsnmgkwjfyqxmqlbqmteabqdbnwtvnpeodvvkukcxqfczyrftogophbmkeuzcxmpyqfigkynftdstdudfynmfxhssbrpdhebywvnpltqxtvdthpppdllyofyktnrwzhbiklpfrclizidkkqbgimzmdrznwkmbledtmsazljcvmfzdlpzgwymm|jtaanwnxkvwesndylwqcnsqlccnikybengkimrxauuvytagyasdomvupykaagpcthrrnkzjnysqywwyqpfbyqclabrcftwgvfcxdrhkgxlngnsxmkrvguvfugbgqruspkzojxrhqkrgjrybyoqktjrexxodbcdlfiyhclsvhsaysihhoycnpksoivyxxbglvyzhkbajh|ntafhsdlngeeeqphxqspxswcmubcwycbzusxunxrmqgmdktwblavcrdgjhecpfnqsyckxuxaboljmrvetofhgbeydypoeyydrxhordcgqafbnoylylqnvxynfcoygtiwiniwlctwmwornplgfpjbretneadneemlzzodtkkdmcyqrggrmjzlkzzjxoirfilosenpjexy|ckalndydcrodvvmyyuqbihprzzgnqympoeinwewgfqpzuhyygivfdhdxnnatccuaghjrddogabtgmcvpspptpicpftxdfdfsiilngteqvqjjsoevnqfiztgcvolmpqkemqeizzmlingcuyxyidvrlczmiifutjljifxiramtoxvtbkwzsrczyzdgbtkboudipjonydtt|fbbeioibbkbiiupjzcqrwjuvqjmbavnkvtogebhltedefahasbnvvvspugdtecfpstxsbtfluycxzfxgcvzhfyhgbgzyfcwltvyyoofolnolasemxqqywlrikjocwvhpqofufqyuhcisckvoveaeectwodmmcodisfwynzcctloqyheedjfpwcuwrixkdznnefgizrap|eosktuskzdokmshljlcazpmahliwzzmhmpzmsiymtvpctaqwdpmffcnkmkypkcrclmlcxmnysqhslegqetflncttxqiprjddowzkhyjlzytudxqnvcctpebufelzmxnzsfwqbahrgwbjrpbobfdwjfsbfjrhjsbqdlsurllezccluashcrywxhnbqqclikrnefkyutdo|gvoiwjevplfxkeempnspkgljnqdckunshelsuogizffvbplhbyhxnjfabmjiigideullxtxbnjxczvaoveafcechrilvdkyzehhuhlohtjxiocfvjzdrjosuawxqmlbcwsnfnpxusoqldoumsedxbbwummwtbqrwkcjxkvyukcxekpjacjlezesaihhpqdatiosxgbbb/
|
||||
|
||||
# Long flood literals.
|
||||
50001:/a{300}/
|
||||
50002:/A{300}/
|
||||
50003:/a{300}/i
|
8
tools/hscollider/test_cases/pcre/lookaround.txt
Normal file
8
tools/hscollider/test_cases/pcre/lookaround.txt
Normal file
@@ -0,0 +1,8 @@
|
||||
27000:/a[A-L]?[B-M]?[C-N]?[D-O]?foobar/
|
||||
27001:/ar[A-Z]{1,9}foobar/
|
||||
27002:/a{5}\w{1,8}foobar/
|
||||
27003:/(ab|bc|cd|de|ef|fg|gh|hi)[\w]{12}foobar/
|
||||
27004:/(abcdef|bcdefg|cdefgh|defghi|efghij|fghijk|ghijkl|hijklm)[\w]{8}foobar/
|
||||
27005:/[\x20\x31\x42\x53\x64][\x20\x25\x31\x35\x42\x45\x53\x55\x64\x65][\w]{1,7}foobar/
|
||||
27006:/[\dABcd][\dABCd][\dAbcd][\dabcd][\dabCD][\dEFGH][\dEFGh][\dEfgh][0-5B-Iw-z][\dA-Z]{1,6}foobar/
|
||||
27007:/ec.((c|C|d){3}|.BEe|...|dE.[bdC]A.Ac[Aa]|ca)CC.cdCA.D{1,}[Dd]..b(dbe{16,18}|E|b)aaC(c|d*|e|[Da]|C|a)/s
|
132
tools/hscollider/test_cases/pcre/mangle.txt
Normal file
132
tools/hscollider/test_cases/pcre/mangle.txt
Normal file
@@ -0,0 +1,132 @@
|
||||
18000:/^foo.{64}bar/sO
|
||||
18001:/^foo.{32}bar/sO
|
||||
18002:/^foo.{600}bar/sO
|
||||
18003:/^hoo.{64}bar|^foo.{64}bar.{178}bar/sO
|
||||
18004:/foo.{64}bar/sO
|
||||
18005:/^foo.{64}b(a?)r/sO
|
||||
18006:/^f(o?)o.{64}bar/sO
|
||||
18007:/^foo.{64}bar\z/sO
|
||||
18008:/^foo.{64}bar(|t\z)/s
|
||||
18009:/^foo.{63,64}bar/sO
|
||||
18010:/^foo.{1,64}bar/sO
|
||||
18011:/^foo.{599,600}bar/sO
|
||||
18012:/^foo.{1,600}bar/sO
|
||||
18013:/^(foo|fab).{400,600}(bar|baz)/sO
|
||||
18014:/^foo.{63,64}.bar/sO
|
||||
18015:/^foo.{1,600}.bar/sO
|
||||
18016:/^foo.{63}(|.?bar)/s
|
||||
18017:/^foo.{63}(|.{0,4}bar)/s
|
||||
18018:/^foo.{32}.?.{32}bar/sO
|
||||
18019:/^foo(..){1,32}bar/s
|
||||
18020:/^(foo.{32}|).{32}bar/s
|
||||
18021:/^(foo.{32}|).{0,32}bar/s
|
||||
18022:/^(foo.{32}|).{1,32}bar/s
|
||||
18023:/^foo.{64}bar/sHO
|
||||
18024:/^foo.{1,64}bar/sHO
|
||||
18025:/^foo.{63,64}bar/sHO
|
||||
18026:/^foo(.{64})?bar/sH
|
||||
18027:/^foo(.{1,64})?bar/sH
|
||||
18028:/^foo(.{63,64})?bar/sH
|
||||
18029:/^foo.{64}bar/O
|
||||
18030:/^foo.{32}bar/O
|
||||
18031:/^foo.{600}bar/O
|
||||
18032:/^hoo.{64}bar|^foo.{64}bar.{178}bar/O
|
||||
18033:/foo.{64}bar/O
|
||||
18034:/^foo.{64}b(a?)r/O
|
||||
18035:/^f(o?)o.{64}bar/O
|
||||
18036:/^foo.{64}bar\z/O
|
||||
18037:/^foo.{64}bar(|t\z)/
|
||||
18038:/^foo.{63,64}bar/O
|
||||
18039:/^foo.{1,64}bar/O
|
||||
18040:/^foo.{599,600}bar/O
|
||||
18041:/^foo.{1,600}bar/O
|
||||
18042:/^(foo|fab).{400,600}(bar|baz)/O
|
||||
18043:/^foo.{63,64}.bar/O
|
||||
18044:/^foo.{1,1000}.bar/O
|
||||
18045:/^foo.{63}(|.?bar)/
|
||||
18046:/^foo.{63}(|.{0,4}bar)/
|
||||
18047:/^foo.{32}.?.{32}bar/O
|
||||
18048:/^foo(..){1,32}bar/
|
||||
18049:/^(foo.{32}|).{32}bar/
|
||||
18050:/^(foo.{32}|).{0,32}bar/
|
||||
18051:/^(foo.{32}|).{1,32}bar/
|
||||
18052:/^foo.{64}bar/HO
|
||||
18053:/^foo.{1,64}bar/HO
|
||||
18054:/^foo.{63,64}bar/HO
|
||||
18055:/^foo(.{64})?bar/H
|
||||
18056:/^foo(.{1,64})?bar/H
|
||||
18057:/^foo(.{63,64})?bar/H
|
||||
18058:/^fooa{599,600}bar/O
|
||||
18059:/^fooa{1,600}bar/
|
||||
18060:/^fooa{600}bar/O
|
||||
18061:/^foo.{0,64}bar/sO
|
||||
18062:/^foo.{0,600}bar/sO
|
||||
18063:/^.{70}(aaaa|.{12})/s
|
||||
18100:/^[^X]foo.{64}bar/sO
|
||||
18101:/^[^X]foo.{32}bar/sO
|
||||
18102:/^[^X]foo.{600}bar/s
|
||||
18103:/^[^X]hoo.{64}bar|^[^X]foo.{130}bar.{178}bar/s
|
||||
18105:/^[^X]foo.{64}b(a?)r/s
|
||||
18106:/^[^X]f(o?)o.{64}bar/s
|
||||
18107:/^[^X]foo.{64}bar\z/s
|
||||
18108:/^[^X]foo.{64}bar(|t\z)/s
|
||||
18109:/^[^X]foo.{63,64}bar/s
|
||||
18110:/^[^X]foo.{1,64}bar/s
|
||||
18111:/^[^X]foo.{599,600}bar/s
|
||||
18112:/^[^X]foo.{1,600}bar/s
|
||||
18113:/^[^X](foo|fab).{400,600}(bar|baz)/s
|
||||
18114:/^[^X]foo.{63,64}.bar/s
|
||||
18115:/^[^X]foo.{1,600}.bar/s
|
||||
18116:/^[^X]foo.{63}(|.?bar)/s
|
||||
18117:/^[^X]foo.{63}(|.{0,4}bar)/s
|
||||
18118:/^[^X]foo.{32}.?.{32}bar/s
|
||||
18119:/^[^X]foo(..){1,32}bar/s
|
||||
18120:/^[^X](foo.{32}|).{32}bar/s
|
||||
18121:/^[^X](foo.{32}|).{0,32}bar/s
|
||||
18122:/^[^X](foo.{32}|).{1,32}bar/s
|
||||
18123:/^[^X]foo.{64}bar/sHO
|
||||
18124:/^[^X]foo.{1,64}bar/sH
|
||||
18125:/^[^X]foo.{63,64}bar/sH
|
||||
18126:/^[^X]foo(.{64})?bar/sH
|
||||
18127:/^[^X]foo(.{1,64})?bar/sH
|
||||
18128:/^[^X]foo(.{63,64})?bar/sH
|
||||
18129:/^[^X]foo.{64}bar/O
|
||||
18130:/^[^X]foo.{32}bar/O
|
||||
18131:/^[^X]foo.{600}bar/
|
||||
18132:/^[^X]hoo.{64}bar|^[^X]foo.{64}bar.{178}bar/O
|
||||
18134:/^[^X]foo.{64}b(a?)r/
|
||||
18135:/^[^X]f(o?)o.{64}bar/
|
||||
18136:/^[^X]foo.{64}bar\z/
|
||||
18137:/^[^X]foo.{64}bar(|t\z)/
|
||||
18138:/^[^X]foo.{63,64}bar/
|
||||
18139:/^[^X]foo.{1,64}bar/
|
||||
18140:/^[^X]foo.{599,600}bar/
|
||||
18141:/^[^X]foo.{1,600}bar/
|
||||
18142:/^[^X](foo|fab).{400,600}(bar|baz)/
|
||||
18143:/^[^X]foo.{63,64}.bar/
|
||||
18144:/^[^X]foo.{1,1000}.bar/
|
||||
18145:/^[^X]foo.{63}(|.?bar)/
|
||||
18146:/^[^X]foo.{63}(|.{0,4}bar)/
|
||||
18147:/^[^X]foo.{32}.?.{32}bar/
|
||||
18148:/^[^X]foo(..){1,32}bar/
|
||||
18149:/^[^X](foo.{32}|).{32}bar/
|
||||
18150:/^[^X](foo.{32}|).{0,32}bar/
|
||||
18151:/^[^X](foo.{32}|).{1,32}bar/
|
||||
18152:/^[^X]foo.{64}bar/HO
|
||||
18153:/^[^X]foo.{1,64}bar/H
|
||||
18154:/^[^X]foo.{63,64}bar/H
|
||||
18155:/^[^X]foo(.{64})?bar/H
|
||||
18156:/^[^X]foo(.{1,64})?bar/H
|
||||
18157:/^[^X]foo(.{63,64})?bar/H
|
||||
18158:/^[^X]fooa{599,600}bar/
|
||||
18159:/^[^X]fooa{1,600}bar/
|
||||
18160:/^[^X]fooa{600}bar/
|
||||
18161:/^[^X]foo.{0,64}bar/s
|
||||
18162:/^[^X]foo.{0,600}bar/s
|
||||
18163:/^[^X].{70}(aaaa|.{12})/s
|
||||
18164:/^[^X].{70}(aaaa|.{0,11}.b)/s
|
||||
18165:/^[^X].{70}(aaaa|.{1,11}.b)/s
|
||||
18166:/^[^X]foo.{0,100}.bar/s
|
||||
18167:/^[^X]foo.{100}.?bar/s
|
||||
18168:/^[^X]foo.{50}.?.{50}bar/s
|
||||
18169:/^[^X]foo.{50}(..)?.{50}bar/s
|
29
tools/hscollider/test_cases/pcre/mcclellan.txt
Normal file
29
tools/hscollider/test_cases/pcre/mcclellan.txt
Normal file
@@ -0,0 +1,29 @@
|
||||
15900:/foo.+ba[rR]/sO
|
||||
15901:/foo.*ba[rR]/sO
|
||||
15902:/foo.*ba[rR].*tea/sO
|
||||
15903:/fo(|o.*ba[rR].*tea)/s
|
||||
15904:/fo($|o.*ba[rR].*tea)/s
|
||||
15905:/foo.*ba[rR]|tea.*cof[^f]ee/s
|
||||
15906:/(foo|bar).*gaz/sO
|
||||
15907:/foo.*(bar|gaz)/sO
|
||||
15908:/foo.*(bar|gaz).*tea/sO
|
||||
15909:/foo.*a(bar|gaz).*tea/sO
|
||||
15910:/foo.*a(bar|gaz).*(tea|aet)/sO
|
||||
15911:/foo(b[^a]r|g[^a]z).*tea/s
|
||||
15912:/foo(b[^a]r|g[^a]z).*(tea|aet)/s
|
||||
15913:/^p;;.*[_;]*.*:/sO
|
||||
15914:/p;;.*[_;]*.*:/sO
|
||||
15915:/p;;.*[_;]*.*:/O
|
||||
15916:/^AA.*(a.*(Z|XX)|b.*(Z|YY))/s
|
||||
15917:/^AA.*(a.*(Z|XX)|b.*Z)/s
|
||||
15918:/^pppp;;.*[_;]*.*:/sO
|
||||
15919:/pppp;;.*[_;]*.*:/sO
|
||||
15920:/pppp;;.*[_;]*.*:/O
|
||||
15921:/^AAAAA.*(a.*(Z|XX)|b.*(Z|YY))/s
|
||||
15922:/^AAAAAA.*(a.*(Z|XX)|b.*Z)/s
|
||||
15923:/(bdcd{26}|([kjrs]|b|b)+){2,4}/s
|
||||
15924:/literal\w{0,7}\d+/s
|
||||
15925:/literal\w{0,10}\d+/s
|
||||
15926:/literal\w{0,10}/s
|
||||
15927:/literal\w{5,}/s
|
||||
15928:/[ab][^ab]a.a/s8
|
66
tools/hscollider/test_cases/pcre/metacharacters.txt
Normal file
66
tools/hscollider/test_cases/pcre/metacharacters.txt
Normal file
@@ -0,0 +1,66 @@
|
||||
# Some tests for specific special metacharacters
|
||||
|
||||
# matches any byte
|
||||
24000:/\C/
|
||||
|
||||
# alarm, that is, the BEL character (hex 07)
|
||||
24001:/\a/O
|
||||
|
||||
# "control-x", where x is any character
|
||||
24002:/\cz/O
|
||||
24003:/\c{/O
|
||||
24004:/\c;/O
|
||||
24005:/\ca/O
|
||||
24006:/\c0/O
|
||||
|
||||
# escape (hex 1B)
|
||||
24007:/\e/O
|
||||
|
||||
# formfeed (hex 0C)
|
||||
24008:/\f/O
|
||||
|
||||
# linefeed (hex 0A)
|
||||
24009:/\n/O
|
||||
|
||||
# carriage return (hex 0D)
|
||||
24010:/\r/O
|
||||
|
||||
# tab (hex 09)
|
||||
24011:/\t/O
|
||||
|
||||
# real hex escapes
|
||||
24012:/\xdc\x{dc}\x00\x{ff}/O
|
||||
|
||||
# broken hex escape, interpreted as a null followed by some chars
|
||||
24013:/\x{dc/O
|
||||
|
||||
# more control
|
||||
24015:/\cA/O
|
||||
24016:/[\cz]/O
|
||||
24017:/[\c{]/O
|
||||
24018:/[\c;]/O
|
||||
24019:/[\ca]/O
|
||||
24020:/[\c0]/O
|
||||
24021:/[\cA]/O
|
||||
24022:/\c\n/O
|
||||
|
||||
# \8 and \9 are not back-references, they are simply the literals 8 and 9.
|
||||
24023:/\8 literal \9/
|
||||
|
||||
# Big numbers
|
||||
24024:/bignum \1111111111/
|
||||
24025:/bignum \2147483639/
|
||||
24026:/bignum \18888/
|
||||
24027:/bignum \128888/
|
||||
24028:/bignum \1238888/
|
||||
24029:/bignum \3778888/
|
||||
|
||||
# Octal escapes
|
||||
24030:/\060/
|
||||
24031:/\60/
|
||||
24032:/\12/
|
||||
24033:/\012/
|
||||
24034:/\0120/
|
||||
24035:/\120/
|
||||
24036:/\377/
|
||||
24037:/\80/
|
182
tools/hscollider/test_cases/pcre/notbob.txt
Normal file
182
tools/hscollider/test_cases/pcre/notbob.txt
Normal file
@@ -0,0 +1,182 @@
|
||||
15500:/notbob/sO
|
||||
15501:/^notbob/sO
|
||||
15502:/\Anotbob/sO
|
||||
15503:/.notbob/sO
|
||||
15504:/^.notbob/sO
|
||||
15505:/\A.notbob/sO
|
||||
15506:/.{6}notbob/sO
|
||||
15507:/^.{6}notbob/sO
|
||||
15508:/\A.{6}notbob/sO
|
||||
15509:/.*notbob/sO
|
||||
15510:/^.*notbob/sO
|
||||
15511:/\A.*notbob/sO
|
||||
15512:/.+notbob/sO
|
||||
15513:/^.+notbob/sO
|
||||
15514:/\A.+notbob/sO
|
||||
15515:/.{4,7}notbob/sO
|
||||
15516:/^.{4,7}notbob/sO
|
||||
15517:/\A.{4,7}notbob/sO
|
||||
15518:/(\A|)notbob/sO
|
||||
15519:/(^|.)notbob/s
|
||||
15520:/not.*bob/sO
|
||||
15521:/^not.*bob/sO
|
||||
15522:/\Anot.*bob/sO
|
||||
15523:/.not.*bob/sO
|
||||
15524:/^.not.*bob/sO
|
||||
15525:/\A.not.*bob/sO
|
||||
15526:/.{6}not.*bob/sO
|
||||
15527:/^.{6}not.*bob/sO
|
||||
15528:/\A.{6}not.*bob/sO
|
||||
15529:/.*not.*bob/sO
|
||||
15530:/^.*not.*bob/sO
|
||||
15531:/\A.*not.*bob/sO
|
||||
15532:/.+not.*bob/sO
|
||||
15533:/^.+not.*bob/sO
|
||||
15534:/\A.+not.*bob/sO
|
||||
15535:/.{4,7}not.*bob/sO
|
||||
15536:/^.{4,7}not.*bob/sO
|
||||
15537:/\A.{4,7}not.*bob/sO
|
||||
15538:/(\A|)not.*bob/sO
|
||||
15539:/(^|.)not.*bob/s
|
||||
15540:/no[A-Z].*bar/s
|
||||
15541:/^no[A-Z].*bar/sO
|
||||
15542:/\Ano[A-Z].*bar/sO
|
||||
15543:/.no[A-Z].*bar/s
|
||||
15544:/^.no[A-Z].*bar/s
|
||||
15545:/\A.no[A-Z].*bar/s
|
||||
15546:/.{6}no[A-Z].*bar/s
|
||||
15547:/^.{6}no[A-Z].*bar/s
|
||||
15548:/\A.{6}no[A-Z].*bar/s
|
||||
15549:/.*no[A-Z].*bar/s
|
||||
15550:/^.*no[A-Z].*bar/s
|
||||
15551:/\A.*no[A-Z].*bar/s
|
||||
15552:/.+no[A-Z].*bar/s
|
||||
15553:/^.+no[A-Z].*bar/s
|
||||
15554:/\A.+no[A-Z].*bar/s
|
||||
15555:/.{4,7}no[A-Z].*bar/s
|
||||
15556:/^.{4,7}no[A-Z].*bar/s
|
||||
15557:/\A.{4,7}no[A-Z].*bar/s
|
||||
15558:/(\A|)no[A-Z].*bar/s
|
||||
15559:/(^|.)no[A-Z].*bar/s
|
||||
15560:/notbobno[A-Z].*bar/s
|
||||
15561:/^notbobno[A-Z].*bar/sO
|
||||
15562:/\Anotbobno[A-Z].*bar/sO
|
||||
15563:/.notbobno[A-Z].*bar/s
|
||||
15564:/^.notbobno[A-Z].*bar/s
|
||||
15565:/\A.notbobno[A-Z].*bar/s
|
||||
15566:/.{6}notbobno[A-Z].*bar/s
|
||||
15567:/^.{6}notbobno[A-Z].*bar/s
|
||||
15568:/\A.{6}notbobno[A-Z].*bar/s
|
||||
15569:/.*notbobno[A-Z].*bar/s
|
||||
15570:/^.*notbobno[A-Z].*bar/s
|
||||
15571:/\A.*notbobno[A-Z].*bar/s
|
||||
15572:/.+notbobno[A-Z].*bar/s
|
||||
15573:/^.+notbobno[A-Z].*bar/s
|
||||
15574:/\A.+notbobno[A-Z].*bar/s
|
||||
15575:/.{4,7}notbobno[A-Z].*bar/s
|
||||
15576:/^.{4,7}notbobno[A-Z].*bar/s
|
||||
15577:/\A.{4,7}notbobno[A-Z].*bar/s
|
||||
15578:/(\A|)notbobno[A-Z].*bar/s
|
||||
15579:/(^|.)notbobno[A-Z].*bar/s
|
||||
15580:/^.{40}/sO
|
||||
15581:/^[^X]{40}/sO
|
||||
15582:/\A[^X]{40}/sO
|
||||
15583:/[^X]{40}/s
|
||||
15584:/.{4,}notbobno[A-Z].*bar/s
|
||||
15585:/^.{4,}no[A-Z].*bar/s
|
||||
15586:/\A.{4,}notbobno[A-Z].*bar/s
|
||||
15587:/.{4,}notbobno[A-Z].*bar/s
|
||||
15588:/^.{4,}no[A-Z].*bar/s
|
||||
15589:/\A.{4,}notbobno[A-Z].*bar/s
|
||||
15590:/(^.*bar)|bob/sO
|
||||
15600:/notbob/smO
|
||||
15601:/^notbob/smO
|
||||
15602:/\Anotbob/smO
|
||||
15603:/.notbob/smO
|
||||
15604:/^.notbob/smO
|
||||
15605:/\A.notbob/smO
|
||||
15606:/.{6}notbob/smO
|
||||
15607:/^.{6}notbob/smO
|
||||
15608:/\A.{6}notbob/smO
|
||||
15609:/.*notbob/smO
|
||||
15610:/^.*notbob/smO
|
||||
15611:/\A.*notbob/smO
|
||||
15612:/.+notbob/smO
|
||||
15613:/^.+notbob/smO
|
||||
15614:/\A.+notbob/smO
|
||||
15615:/.{4,7}notbob/smO
|
||||
15616:/^.{4,7}notbob/smO
|
||||
15617:/\A.{4,7}notbob/smO
|
||||
15618:/(\A|)notbob/smO
|
||||
15619:/(^|.)notbob/sm
|
||||
15620:/not.*bob/smO
|
||||
15621:/^not.*bob/smO
|
||||
15622:/\Anot.*bob/smO
|
||||
15623:/.not.*bob/smO
|
||||
15624:/^.not.*bob/smO
|
||||
15625:/\A.not.*bob/smO
|
||||
15626:/.{6}not.*bob/smO
|
||||
15627:/^.{6}not.*bob/smO
|
||||
15628:/\A.{6}not.*bob/smO
|
||||
15629:/.*not.*bob/smO
|
||||
15630:/^.*not.*bob/smO
|
||||
15631:/\A.*not.*bob/smO
|
||||
15632:/.+not.*bob/smO
|
||||
15633:/^.+not.*bob/smO
|
||||
15634:/\A.+not.*bob/smO
|
||||
15635:/.{4,7}not.*bob/smO
|
||||
15636:/^.{4,7}not.*bob/smO
|
||||
15637:/\A.{4,7}not.*bob/smO
|
||||
15638:/(\A|)not.*bob/smO
|
||||
15639:/(^|.)not.*bob/sm
|
||||
15640:/no[A-Z].*bar/sm
|
||||
15641:/^no[A-Z].*bar/sm
|
||||
15642:/\Ano[A-Z].*bar/smO
|
||||
15643:/.no[A-Z].*bar/sm
|
||||
15644:/^.no[A-Z].*bar/sm
|
||||
15645:/\A.no[A-Z].*bar/sm
|
||||
15646:/.{6}no[A-Z].*bar/sm
|
||||
15647:/^.{6}no[A-Z].*bar/sm
|
||||
15648:/\A.{6}no[A-Z].*bar/sm
|
||||
15649:/.*no[A-Z].*bar/sm
|
||||
15650:/^.*no[A-Z].*bar/sm
|
||||
15651:/\A.*no[A-Z].*bar/sm
|
||||
15652:/.+no[A-Z].*bar/sm
|
||||
15653:/^.+no[A-Z].*bar/sm
|
||||
15654:/\A.+no[A-Z].*bar/sm
|
||||
15655:/.{4,7}no[A-Z].*bar/sm
|
||||
15656:/^.{4,7}no[A-Z].*bar/sm
|
||||
15657:/\A.{4,7}no[A-Z].*bar/sm
|
||||
15658:/(\A|)no[A-Z].*bar/sm
|
||||
15659:/(^|.)no[A-Z].*bar/sm
|
||||
15660:/notbobno[A-Z].*bar/sm
|
||||
15661:/^notbobno[A-Z].*bar/sm
|
||||
15662:/\Anotbobno[A-Z].*bar/smO
|
||||
15663:/.notbobno[A-Z].*bar/sm
|
||||
15664:/^.notbobno[A-Z].*bar/sm
|
||||
15665:/\A.notbobno[A-Z].*bar/sm
|
||||
15666:/.{6}notbobno[A-Z].*bar/sm
|
||||
15667:/^.{6}notbobno[A-Z].*bar/sm
|
||||
15668:/\A.{6}notbobno[A-Z].*bar/sm
|
||||
15669:/.*notbobno[A-Z].*bar/sm
|
||||
15670:/^.*notbobno[A-Z].*bar/sm
|
||||
15671:/\A.*notbobno[A-Z].*bar/sm
|
||||
15672:/.+notbobno[A-Z].*bar/sm
|
||||
15673:/^.+notbobno[A-Z].*bar/sm
|
||||
15674:/\A.+notbobno[A-Z].*bar/sm
|
||||
15675:/.{4,7}notbobno[A-Z].*bar/sm
|
||||
15676:/^.{4,7}notbobno[A-Z].*bar/sm
|
||||
15677:/\A.{4,7}notbobno[A-Z].*bar/sm
|
||||
15678:/(\A|)notbobno[A-Z].*bar/sm
|
||||
15679:/(^|.)notbobno[A-Z].*bar/sm
|
||||
15680:/^.{40}/sm
|
||||
15681:/^[^X]{40}/sm
|
||||
15682:/\A[^X]{40}/smO
|
||||
15683:/[^X]{40}/sm
|
||||
15684:/.{4,}notbobno[A-Z].*bar/sm
|
||||
15685:/^.{4,}no[A-Z].*bar/sm
|
||||
15686:/\A.{4,}notbobno[A-Z].*bar/sm
|
||||
15687:/.{4,}notbobno[A-Z].*bar/sm
|
||||
15688:/^.{4,}no[A-Z].*bar/sm
|
||||
15689:/\A.{4,}notbobno[A-Z].*bar/sm
|
||||
15690:/(^.*bar)|bob/smO
|
37
tools/hscollider/test_cases/pcre/options.txt
Normal file
37
tools/hscollider/test_cases/pcre/options.txt
Normal file
@@ -0,0 +1,37 @@
|
||||
# Patterns that set/unset various options
|
||||
|
||||
# DOTALL
|
||||
24500:/(?s)foo.*bar/O
|
||||
24501:/foo.*bar(?s).baz/O
|
||||
24502:/(?-s)foo.*bar/sO
|
||||
24503:/(?-s)foo.*bar/O
|
||||
24504:/(?-s)foo.*bar(?s).*baz/sO
|
||||
|
||||
# CASELESS
|
||||
24600:/(?i)foobar/O
|
||||
24601:/(?i)foobar/iO
|
||||
24602:/(?-i)foobar/iO
|
||||
24603:/foo(?-i)bar/iO
|
||||
24604:/(?-i)foo(?i)bar/iO
|
||||
24605:/(?i)foo(?-i)bar/O
|
||||
|
||||
# MULTILINE
|
||||
24700:/(?m)foobar$/
|
||||
24701:/(?-m)foobar$/m
|
||||
|
||||
# EXTENDED
|
||||
24800:/(?x)foo bar/O
|
||||
24801:/(?x)foo bar(?-x) baz/O
|
||||
|
||||
# EPIC COMBOS
|
||||
24900:/(?imsx)^ foo .* bar .* baz/O
|
||||
|
||||
# MORE COMPLEX CASES
|
||||
24901:/^(?i:(?:abbr(?:ev(?:iation)?)))/
|
||||
24902:/(?s)foo(?i).bar/
|
||||
24903:/(?s)foo(?i-s).bar/
|
||||
24904:/foo(?i:bar)baz/
|
||||
24905:/nested(?i:caseless(?-i:caseful)caseless)literal/
|
||||
24906:/(a(?i)b|c)/
|
||||
24907:/(?i:hatstand|teakettle)/
|
||||
24908:/foo.*(?i-s:bar.*baz).*bing/s
|
12
tools/hscollider/test_cases/pcre/passthrough.txt
Normal file
12
tools/hscollider/test_cases/pcre/passthrough.txt
Normal file
@@ -0,0 +1,12 @@
|
||||
9000:/(abc|def)(xxx|yyyy)/O
|
||||
9001:/^(abc|def)(xxx|yyyy)/O
|
||||
9002:/(^anchored|floating)(XXX|YYY)/O
|
||||
|
||||
# Some long literals.
|
||||
9005:/a{40}/
|
||||
9006:/a{48}/
|
||||
9007:/a{49}/
|
||||
9008:/a{50}/
|
||||
9009:/q{1000}/
|
||||
9010:/coagulateshyperinnervationagitationreassuranceexchangeability/
|
||||
9011:/nonplasticerythematoussnakebitesjubilatedworklessnesses/
|
72
tools/hscollider/test_cases/pcre/prefilter.txt
Normal file
72
tools/hscollider/test_cases/pcre/prefilter.txt
Normal file
@@ -0,0 +1,72 @@
|
||||
# Prefiltering ("/P" flag) patterns.
|
||||
|
||||
# Assertions.
|
||||
90000:/foo(?!bar)/P
|
||||
90001:/foo(?!bar).*baz/sP
|
||||
90002:/(?<!foo)bar/P
|
||||
90003:/foo.*(?=bar)/P
|
||||
|
||||
# Backreferences
|
||||
90200:/(sens|respons)e and \1ibility/P
|
||||
90201:/((?i)rah)\s+\1/P
|
||||
90202:/(\d{3}).*\1/P
|
||||
90203:/this is a (["'])quoted string\1/P
|
||||
90204:/<([^>+i])>.*?</\1>/sP
|
||||
|
||||
# Relative backreferences.
|
||||
90205:/(foo|bar).*\g{-1}/P
|
||||
90206:/((foo|bar).*)\g{-2}/P
|
||||
|
||||
# More simple back-references.
|
||||
90207:/([abc])teakettle\1/P
|
||||
90208:/([abc]+)teakettle\1/P
|
||||
90209:/([a-f])_\1\1\1/P
|
||||
|
||||
# Named back-references, in a variety of syntaxes.
|
||||
90210:/(?<label>[0-9])backref\k<label>/P
|
||||
90211:/(?'label'[0-9])backref\k'label'/P
|
||||
90212:/(?<label>[0-9])backref\g{label}/P
|
||||
90213:/(?P<label>[0-9])backref(?P=label)/P
|
||||
|
||||
# Atomic groups
|
||||
90300:/a(?>bc|b)c/P
|
||||
|
||||
90320:/possessive \d*+/P
|
||||
90321:/possessive \d++/P
|
||||
90322:/possessive \d?+/P
|
||||
90323:/possessive \d{4}+/P
|
||||
90324:/possessive \d{6,}+/P
|
||||
90325:/possessive \d{10,20}+/P
|
||||
|
||||
# Conditional references
|
||||
90400:/a (pine)?(?(1)apple)/P
|
||||
90401:/a (water)?(?(1)melon|orange)/P
|
||||
90402:/((?<animal>hamster)|guinea pig) (?(<animal>)party)/P
|
||||
90403:/((?<animal>hamster)|guinea pig) (?(<animal>)party|nothing)/P
|
||||
90404:/((?'animal'hamster)|guinea pig) (?('animal')party|nothing)/P
|
||||
90405:/((?P<animal>hamster)|guinea pig) (?(animal)party|nothing)/P
|
||||
|
||||
# Empty conditional reference group
|
||||
90406:/water(-buffalo)?(?(1)) field/P
|
||||
|
||||
# DEFINE group, which can only have one branch and is always false
|
||||
90407:/jabber(?(DEFINE)wocky)/P
|
||||
|
||||
# Conditional references with assertions
|
||||
90408:/^(?(?=hello)[a-z]+|[a-z]{3})/P
|
||||
90409:/^(?(?!hello)[a-z]+|[a-z]{3})/P
|
||||
90410:/[a-z]+(?(?<=hatstand)teakettle|badgerbrush)/P
|
||||
90411:/[a-z]+(?(?<!hatstand)teakettle|badgerbrush)/P
|
||||
|
||||
# This is a case that stresses some of the rose merging code.
|
||||
90413:/([Cc]C|[ab])(d|Ba[aD]De.{14})((EdeDa|d|CC|[ab]){2,8}|.bE|(\b)*|b|Cd|[ED]EE[adeD]){7}[EA]ea+/sP
|
||||
|
||||
# Repeats of small regions.
|
||||
90414:/qq(a.|.a){300}/sP
|
||||
90415:/prefix(\d{2}|\D){300}/P
|
||||
|
||||
# Repeats of modifiers
|
||||
90416:/((?i)a)foo\1/P
|
||||
|
||||
# Very large case that stresses prefilter region replacement
|
||||
90417:/dDcB.Ac(((\x65CA|D){0,19}){7}){1,26}/s8P
|
230
tools/hscollider/test_cases/pcre/priority.txt
Normal file
230
tools/hscollider/test_cases/pcre/priority.txt
Normal file
@@ -0,0 +1,230 @@
|
||||
14001:/a([a-z]+)??b+/
|
||||
14002:/a([a-z]{3,7}?)b+/
|
||||
14003:/abc|abcd|abcde/O
|
||||
14004:/a([a-z]+)?b+/
|
||||
14005:/foo(abc||abcd|abcde)bar/O
|
||||
14006:/a(?:fg||hi)z/O
|
||||
14007:/a(foo)??b+/
|
||||
14008:/ab{2,3}?c+/
|
||||
14009:/ab{2,3}c+/
|
||||
14010:/a(ttt)+?z/
|
||||
14011:/a(ttt)+z/
|
||||
14012:/a(ttt)*?z/
|
||||
14013:/a[a-z]{0,4}?b+/
|
||||
14014:/^a[a-z]{0,4}?b+/
|
||||
14015:/a+?b/O
|
||||
14016:/a*?b/O
|
||||
14017:/q(a||b){3}q/O
|
||||
14018:/q(a|){2}q/O
|
||||
14019:/q(a|){2}?q/O
|
||||
14020:/a?bb/O
|
||||
14021:/a??bb/O
|
||||
14022:/q(a||b)+q/
|
||||
14023:/q(a||b)(a||b)(a||b)q/O
|
||||
14024:/q(a??){3}/O
|
||||
14025:/(foo:.*?){3}/
|
||||
14026:/foo\w{0,10}bar/
|
||||
14027:/foo\w{0,10}?bar/
|
||||
14028:/foo\w{1,10}bar/
|
||||
14029:/foo\w{1,10}?bar/
|
||||
14030:/foo\w{10}bar/
|
||||
14031:/foo\w{10}?bar/
|
||||
14032:/foo(?:\w?){0,10}bar/
|
||||
14033:/foo(?:\w??){0,10}bar/
|
||||
14034:/foo(?:\w?){10}bar/
|
||||
14035:/foo(?:\w??){10}bar/
|
||||
14036:/foo\w{10,}bar/
|
||||
14037:/foo\w{10,}?bar/
|
||||
14038:/foo()+?/
|
||||
14039:/foo()+/
|
||||
14040:/foo()*?/
|
||||
14041:/foo()*/
|
||||
14042:/foo(()+?)bar/
|
||||
14043:/foo(()+)bar/
|
||||
14044:/foo(()*?)bar/
|
||||
14045:/foo(()*)bar/
|
||||
14046:/foo()+?bar/
|
||||
14047:/foo(())+?bar/
|
||||
14048:/foo(()+?)+?bar/
|
||||
14049:/foo(()())+?bar/
|
||||
14050:/^((c*?)|b){0,3}c/s
|
||||
14051:/^((c*)|b){3}c/s
|
||||
14052:/^((c*)|b){2,3}?c/s
|
||||
14053:/^((c*)|b){0,3}?c/s
|
||||
14054:/^((c*)|b){2,3}c/s
|
||||
14055:/^((c*)|b){0,3}c/s
|
||||
14056:/^((c*?)|b){3}c/s
|
||||
14057:/^((c*?)|b){2,3}?c/s
|
||||
14058:/^((c*?)|b){0,3}?c/s
|
||||
14059:/^((c*?)|b){2,3}c/s
|
||||
14060:/^S((a?)(\B))*T/s
|
||||
14061:/^S((a??)(\B))*T/s
|
||||
14062:/^S((a?)(\B))*?T/s
|
||||
14063:/^S((a??)(\B))*?T/s
|
||||
14064:/^[S;]((a?)(\B))*[T;]/s
|
||||
14065:/^[S;]((a??)(\B))*[T;]/s
|
||||
14066:/^[S;]((a?)(\B))*?[T;]/s
|
||||
14067:/^[S;]((a??)(\B))*?[T;]/s
|
||||
14068:/^[S;]((a?)(\b))*[T;]/s
|
||||
14069:/^[S;]((a??)(\b))*[T;]/s
|
||||
14070:/^[S;]((a?)(\b))*?[T;]/s
|
||||
14071:/^[S;]((a??)(\b))*?[T;]/s
|
||||
14072:/^S((a?)(\B))*/s
|
||||
14073:/^S((a??)(\B))*/s
|
||||
14074:/^S((a?)(\B))*?/s
|
||||
14075:/^S((a??)(\B))*?/s
|
||||
14076:/^S(((a?)(\B))*)*T/s
|
||||
14077:/^S(((a??)(\B))*)*T/s
|
||||
14078:/^S(((a?)(\B))*?)*T/s
|
||||
14079:/^S(((a??)(\B))*?)*T/s
|
||||
14080:/^[S;](((a?)(\B))*)*[T;]/s
|
||||
14081:/^[S;](((a??)(\B))*)*[T;]/s
|
||||
14082:/^[S;](((a?)(\B))*?)*[T;]/s
|
||||
14083:/^[S;](((a??)(\B))*?)*[T;]/s
|
||||
14084:/^[S;](((a?)(\b))*)*[T;]/s
|
||||
14085:/^[S;](((a??)(\b))*)*[T;]/s
|
||||
14086:/^[S;](((a?)(\b))*?)*[T;]/s
|
||||
14087:/^[S;](((a??)(\b))*?)*[T;]/s
|
||||
14088:/^S(((a?)(\B))*)*/s
|
||||
14089:/^S(((a?)(\B))*)*?/s
|
||||
14090:/^S(((a?)(\B))*?)*/s
|
||||
14091:/^S(((a?)(\B))*?)*?/s
|
||||
14092:/^[S;](((a?)(\B))*)*?[T;]/s
|
||||
14093:/^[S;](((a??)(\B))*)*?[T;]/s
|
||||
14094:/^[S;](((a?)(\B))*?)*?[T;]/s
|
||||
14095:/^[S;](((a??)(\B))*?)*?[T;]/s
|
||||
14096:/^[S;](((a?)(\b))*)*?[T;]/s
|
||||
14097:/^[S;](((a??)(\b))*)*?[T;]/s
|
||||
14098:/^[S;](((a?)(\b))*?)*?[T;]/s
|
||||
14099:/^[S;](((a??)(\b))*?)*?[T;]/s
|
||||
14100:/^(a|(c)*|b){1,3}a/s
|
||||
14101:/^(c??|b){2}c/s
|
||||
14102:/^((c??)|b){0,2}?c/s
|
||||
14103:/^((c?)|b){0,2}?c/s
|
||||
14104:/X(y||u){2,}/
|
||||
14105:/X(y||u){2,}?/
|
||||
14106:/X((\B)|e)+/s
|
||||
14107:/X((\B)|e)*/s
|
||||
14108:/X((\B)|e)+?/s
|
||||
14109:/X((\B)|e)*?/s
|
||||
14110:/^b(z?|c|a)*c/s
|
||||
14111:/^b(z||c|a)*c/s
|
||||
14112:/^b(|c|a)*c/s
|
||||
14113:/^b((c|()|a)*)c/s
|
||||
14114:/^b((c|)+)+X/s
|
||||
14115:/^b((z??)(c|)+)+X/s
|
||||
14116:/^b((c|)+(z??))+X/s
|
||||
14117:/^ab((x|)(b(c)*|))*d/
|
||||
14118:/^ab((x|)(bc)*())*d/
|
||||
14119:/^b(()(c|)*)+X/s
|
||||
14120:/^b(()(c|)+)+X/s
|
||||
14121:/^X((c*|a|z)*)*Y/s
|
||||
14122:/^b((c|()|a)*)*c/s
|
||||
14123:/^b((c|()|a)*?)*c/s
|
||||
14124:/^b((c|()|a)*)*?c/s
|
||||
14125:/^b((c|()|a)*?)*?c/s
|
||||
14126:/^b(c|()|a)*c/s
|
||||
14127:/^b(c?|a)*c/s
|
||||
14128:/^b(z|c??|a)*c/s
|
||||
14129:/^b(c??|a)*c/s
|
||||
14130:/^b((c|(\B\B)|a)*)*c/s
|
||||
14131:/^b((c|(\B\B)|()|a)*)*c/s
|
||||
14132:/^b((c|(\B\B)|(\b\b)|a)*)*c/s
|
||||
14133:/^b(((c|(\B\B)|a)(z?))*)*c/s
|
||||
14134:/^b(((c|(\B\B)|()|a)(z?))*)*c/s
|
||||
14135:/^b(((c|(\B\B)|(\b\b)|a)(z?))*)*c/s
|
||||
14136:/^b((c|(\b\b)|a)*)*c/s
|
||||
14137:/^b((c|(\b\b)|()|a)*)*c/s
|
||||
14138:/^b((c|(\b\b)|(\b\b)|a)*)*c/s
|
||||
14139:/^b((c|(\b)|()|a)*)*c/s
|
||||
14140:/^b((c|(\b)|()|a)*)*?c/s
|
||||
14141:/^b((c|(\b)|()|a)*?)*c/s
|
||||
14142:/^b((c|(\b)|()|a)*?)*?c/s
|
||||
14143:/^b((c|(\B)|()|a)*)*c/s
|
||||
14144:/^b((c|(\B)|()|a)*)*?c/s
|
||||
14145:/^b((c|(\B)|()|a)*?)*c/s
|
||||
14146:/^b((c|(\B)|()|a)*?)*?c/s
|
||||
14147:/^b((c|()|(\B)|a)*?)*c/s
|
||||
14148:/^b((c|()|(\B)|a)*)*?c/s
|
||||
14149:/^b((c|()|(\B)|a)*)*c/s
|
||||
14150:/^XYZ(((a*)*)*)*/
|
||||
14151:/^XYZ((a*)*)*/
|
||||
14152:/^XYZ(((a*)*)*)*b/
|
||||
14153:/^XYZ((a*)*)*b/
|
||||
14154:/^XYZ(((a?)*)*)*b/
|
||||
14155:/^XYZ(((a*)?)*)*b/
|
||||
14156:/^XYZ((((a*)*)*)*)*b/
|
||||
14157:/^XYZ(((((a*)*)*)*)*)*b/
|
||||
14158:/^XYZ(((a?)*)*)+/
|
||||
14159:/^XYZ(((((a*())*())*())*())*())*/
|
||||
14160:/(\b)*baa/
|
||||
14161:/;(\b)*baa/
|
||||
14162:/^x((a)|()|(b)){0,2}y/s
|
||||
14163:/^x((a)|()|(b)){0,2}?y/s
|
||||
14164:/^x((a)|()|(b)){0,}y/s
|
||||
14165:/^x((a)|()|(b)){0,}?y/s
|
||||
14166:/^x((a)|()|(b)){1,}y/s
|
||||
14167:/^x((a)|()|(b)){1,}?y/s
|
||||
14168:/^x((a)|()|(b)){5,8}y/s
|
||||
14169:/^x((a)|()|(b)){5,8}?y/s
|
||||
14170:/^x((a)|()|(b)){0,5}y/s
|
||||
14171:/^x((a)|()|(b)){0,5}?y/s
|
||||
14172:/^x((a)|()|(b)){1,5}y/s
|
||||
14173:/^x((a)|()|(b)){1,5}?y/s
|
||||
14174:/^X(_(X||A.)|.)+?/s
|
||||
14175:/^X(_(X||A.)|.)+/s
|
||||
14176:/^X(_(A.||X)|.)+?/s
|
||||
14177:/^X(_(A.||X)|.)+/s
|
||||
14178:/^X(_(X||A.)|.)+?/s
|
||||
14179:/^X(_(X||A.)|.)+/s
|
||||
14180:/^X(_(A.||X)|.){3,}?/s
|
||||
14181:/^X(_(A.||X)|.){3,}/s
|
||||
14182:/^X(_(A.||X)|.){3,5}?/s
|
||||
14183:/^X(_(A.||X)|.){3,5}/s
|
||||
14184:/^b((()|c|a)*)*c/s
|
||||
14185:/^b((()|c|a)*?)*c/s
|
||||
14186:/^b((()|c|a)*)*?c/s
|
||||
14187:/^b((()|c|a)*?)*?c/s
|
||||
14188:/^b(()|(c|()|a)*)*c/s
|
||||
14189:/^b(()|(c|()|a)*?)*c/s
|
||||
14190:/^b(()|(c|()|a)*)*?c/s
|
||||
14191:/^b(()|(c|()|a)*?)*?c/s
|
||||
14192:/^b(()|(|a)*)*c/s
|
||||
14193:/^b(()|(|a)*?)*c/s
|
||||
14194:/^b(()|(|a)*)*?c/s
|
||||
14195:/^b(()|(|a)*?)*?c/s
|
||||
14196:/^b(()|(|a)*|[cC])*c/s
|
||||
14197:/^b(()|(|a)*?|[cC])*c/s
|
||||
14198:/^b(()|(|a)*|[cC])*?c/s
|
||||
14199:/^b(()|(|a)*?|[cC])*?c/s
|
||||
14200:/^b(()|(c|()|a)*)*c/s
|
||||
14201:/^b(()|(c|()|a)*?)*c/s
|
||||
14202:/^b(()|(c|()|a)*)*?c/s
|
||||
14203:/^b(()|(c|()|a)*?)*?c/s
|
||||
14204:/^b(()|(|a)*)*c/s
|
||||
14205:/^b(()|(|a)*?)*c/s
|
||||
14206:/^b(()|(|a)*)*?c/s
|
||||
14207:/^b(()|(|a)*?)*?c/s
|
||||
14208:/^b(()|(|a)*|[cC])*c/s
|
||||
14209:/^b(()|(|a)*?|[cC])*c/s
|
||||
14210:/^b(()|(|a)*|[cC])*?c/s
|
||||
14211:/^b(()|(|a)*?|[cC])*?c/s
|
||||
14212:/^b(d|()|((a|()|b)*))+c/s
|
||||
14213:/^b(d|()|((a|())*))+c/s
|
||||
14214:/^b(d|()|((()|a)*))+c/s
|
||||
14215:/^b(d|((a|())*)|())+c/s
|
||||
14216:/^b(d|()|((a|())+))+c/s
|
||||
14217:/^b(()|(|a)*|d)*c/s
|
||||
14218:/^([ba]|){1,3}?a/si
|
||||
14219:/^([ba]){1,3}?a/si
|
||||
14220:/^(c|){2,3}?X/s
|
||||
14221:/^(c|){2,4}?X/s
|
||||
14222:/^b((|a)*?)*?c/s
|
||||
14223:/^b((|a)*?){0,10}?c/s
|
||||
14224:/^b((|a){0,10}?)*?c/s
|
||||
14225:/^b((|a){0,10}?){0,10}?c/s
|
||||
14226:/^b((|a)+?)+?c/s
|
||||
14227:/^b((|a)*?){10,}?c/s
|
||||
14228:/^b((|a){10,}?)*?c/s
|
||||
14229:/^b((|a){10,}?){10,}?c/s
|
||||
14230:/^x(a|()){1,3}?y/s
|
94
tools/hscollider/test_cases/pcre/puff.txt
Normal file
94
tools/hscollider/test_cases/pcre/puff.txt
Normal file
@@ -0,0 +1,94 @@
|
||||
15200:/foo[^X]{15}/sO
|
||||
15201:/foo[^X]{16}/sO
|
||||
15202:/foo[^X]{17}/sO
|
||||
15203:/foo.*[^X]{17}/s
|
||||
15204:/foo[^X]{17}blah/sO
|
||||
15205:/foo[^XY]{17}/sO
|
||||
15206:/foo[^X]{17}$/s
|
||||
15207:/[^X]{17}/s
|
||||
15208:/^[^X]{17}/sO
|
||||
15209:/fo.*o[^X]{15}/s
|
||||
15210:/fo.*o[^X]{16}/s
|
||||
15211:/fo.*o[^X]{17}/s
|
||||
15212:/[fb][oa][or][^X]{15}/s
|
||||
15213:/[fb][oa][or][^X]{16}/s
|
||||
15214:/[fb][oa][or][^X]{17}/s
|
||||
15215:/f[^X]{255}/H
|
||||
15216:/f[^X]{256}/H
|
||||
15217:/f[^X]{257}/H
|
||||
15218:/^[fb][oa][or][^X]{17}/sO
|
||||
15219:/f.{255}/H
|
||||
15220:/f.{256}/H
|
||||
15221:/f.{257}/H
|
||||
15222:/^[fb][oa][or].{17}/sO
|
||||
15223:/foo.{15}/sHO
|
||||
15224:/foo.{16}/sHO
|
||||
15225:/foo.{17}/sHO
|
||||
15226:/foo.*.{17}/sH
|
||||
15227:/foo.{17}blah/sHO
|
||||
15228:/foo.{17}/HO
|
||||
15229:/foo.{17}$/sH
|
||||
15230:/.{17}/sH
|
||||
15231:/^.{17}/sH
|
||||
15232:/fo.*o.{15}/sH
|
||||
15233:/fo.*o.{16}/sH
|
||||
15234:/fo.*o.{17}/sH
|
||||
15235:/[fb][oa][or].{15}/sH
|
||||
15236:/[fb][oa][or].{16}/sH
|
||||
15237:/[fb][oa][or].{17}/sH
|
||||
15238:/foo.{30,}/s
|
||||
15239:/foo.*bar.{30}/s
|
||||
15240:/foo.*bar.{30,}/s
|
||||
|
||||
15241:/^hhechbd(je[wn][oqc]bo|.{18}|l|.|r)/O
|
||||
15242:/^ii.m.x(.{22}|p|y|n*)/
|
||||
|
||||
15243:/f[^XY]{255}/H
|
||||
15244:/f[^XY]{256}/H
|
||||
15245:/f[^XY]{257}/H
|
||||
|
||||
15246:/bar.{16,}/s
|
||||
15247:/bar[^X]{16,}/s
|
||||
15248:/bar[^r]{16,}/s
|
||||
15249:/bar[^XY]{16,}/s
|
||||
15250:/bar[^rY]{16,}/s
|
||||
|
||||
15251:/foo.*bar[^r]{16}/s
|
||||
15252:/f.{254,}/s
|
||||
15253:/f.{255,}/s
|
||||
15254:/f.{256,}/s
|
||||
|
||||
15255:/a[fg]{35,}/s
|
||||
15256:/f[fg]{35,}/s
|
||||
15257:/[af][fg]{35,}/s
|
||||
|
||||
15258:/^[^X]{50}/
|
||||
15259:/^[^X]{50,}/
|
||||
15260:/^[^XY]{50}/
|
||||
15261:/^[^XY]{50,}/
|
||||
15262:/^[X]{50}/
|
||||
15263:/^[X]{50,}/
|
||||
15264:/^[XY]{50}/
|
||||
15265:/^[XY]{50,}/
|
||||
|
||||
15266:/x/s
|
||||
15267:/.{52}/s
|
||||
15268:/.{100}/s
|
||||
15269:/.{51}/s
|
||||
15270:/^.{100}/s
|
||||
15271:/^.{62,}/s
|
||||
15272:/^.{60,}/s
|
||||
15273:/^.{60,}|.{65}/s
|
||||
15274:/^.{60}|.{65}/s
|
||||
15275:/^.{60}|.{55}/s
|
||||
15276:/^[^a]{51}/s
|
||||
15277:/^[^a]{72}/s
|
||||
15278:/^[^a]{100,}/s
|
||||
15279:/[^X]{40}/s
|
||||
15280:/^.*[^X]{20}|[^Y]{21}|.{60}|^.{50,}/s
|
||||
15281:/[^X]{18}/s
|
||||
15282:/[^XY]{18}/s
|
||||
15283:/[^X\x00\x11\x22\x33\x44\x55\x66\x77\x88]{18}/s
|
||||
|
||||
15284:/^[A-Z]*\d\.\d{60}/s
|
||||
15285:/[A-Z]*\d.\d{60}/sH
|
53
tools/hscollider/test_cases/pcre/pug.txt
Normal file
53
tools/hscollider/test_cases/pcre/pug.txt
Normal file
@@ -0,0 +1,53 @@
|
||||
5000:/[^X]:.{111,}pug/s
|
||||
5001:/[^X]:.{111,}[pugPUGxyz]{3}/s
|
||||
5002:/aaa:.{111,}pug/s
|
||||
5003:/aaa:.{111,}[pugPUGxyz]{3}/s
|
||||
5004:/[^X]:.{112,}pug/s
|
||||
5005:/[^X]:.{112,}[pugPUGxyz]{3}/s
|
||||
5006:/aaa:.{112,}pug/s
|
||||
5007:/aaa:.{112,}[pugPUGxyz]{3}/s
|
||||
5008:/[^X]:.{113,}pug/s
|
||||
5009:/[^X]:.{113,}[pugPUGxyz]{3}/s
|
||||
5010:/aaa:.{113,}pug/s
|
||||
5011:/aaa:.{113,}[pugPUGxyz]{3}/s
|
||||
5012:/[^X]:.{127,}pug/s
|
||||
5013:/[^X]:.{127,}[pugPUGxyz]{3}/s
|
||||
5014:/aaa:.{127,}pug/s
|
||||
5015:/aaa:.{127,}[pugPUGxyz]{3}/s
|
||||
5016:/[^X]:.{128,}pug/s
|
||||
5017:/[^X]:.{128,}[pugPUGxyz]{3}/s
|
||||
5018:/aaa:.{128,}pug/s
|
||||
5019:/aaa:.{128,}[pugPUGxyz]{3}/s
|
||||
5020:/[^X]:.{129,}pug/s
|
||||
5021:/[^X]:.{129,}[pugPUGxyz]{3}/s
|
||||
5022:/aaa:.{129,}pug/s
|
||||
5023:/aaa:.{129,}[pugPUGxyz]{3}/s
|
||||
5024:/[^X]:[a-z]{111,}pug/s
|
||||
5025:/[^X]:[a-z]{111,}[pugPUGxyz]{3}/s
|
||||
5026:/aaa:[a-z]{111,}pug/s
|
||||
5027:/aaa:[a-z]{111,}[pugPUGxyz]{3}/s
|
||||
5028:/[^X]:[a-z]{112,}pug/s
|
||||
5029:/[^X]:[a-z]{112,}[pugPUGxyz]{3}/s
|
||||
5030:/aaa:[a-z]{112,}pug/s
|
||||
5031:/aaa:[a-z]{112,}[pugPUGxyz]{3}/s
|
||||
5032:/[^X]:[a-z]{113,}pug/s
|
||||
5033:/[^X]:[a-z]{113,}[pugPUGxyz]{3}/s
|
||||
5034:/aaa:[a-z]{113,}pug/s
|
||||
5035:/aaa:[a-z]{113,}[pugPUGxyz]{3}/s
|
||||
5036:/[^X]:[a-z]{127,}pug/s
|
||||
5037:/[^X]:[a-z]{127,}[pugPUGxyz]{3}/s
|
||||
5038:/aaa:[a-z]{127,}pug/s
|
||||
5039:/aaa:[a-z]{127,}[pugPUGxyz]{3}/s
|
||||
5040:/[^X]:[a-z]{128,}pug/s
|
||||
5041:/[^X]:[a-z]{128,}[pugPUGxyz]{3}/s
|
||||
5042:/aaa:[a-z]{128,}pug/s
|
||||
5043:/aaa:[a-z]{128,}[pugPUGxyz]{3}/s
|
||||
5044:/[^X]:[a-z]{129,}pug/s
|
||||
5045:/[^X]:[a-z]{129,}[pugPUGxyz]{3}/s
|
||||
5046:/aaa:[a-z]{129,}pug/s
|
||||
5047:/aaa:[a-z]{129,}[pugPUGxyz]{3}/s
|
||||
5048:/([^X]:|:[^Y]).{120,}[pugPUGxyz]{3}/s
|
||||
5049:/([^X]:|:[^Y])[a-z]{120,}[pugPUGxyz]{3}/s
|
||||
5900:/[^X][^Xx][defg][^:*][deDE][^bcC][deAE]/
|
||||
5901:/(^|[x\- B])(a|[bBbB]|cc)[^A-Z0-9]{112,}[deDE][^bcC][geAE]/s
|
||||
5902:/(^|[x\- B])(a|bb|cc).{113,}[deDE][^bcC][geAE]/s
|
47
tools/hscollider/test_cases/pcre/redundancy.txt
Normal file
47
tools/hscollider/test_cases/pcre/redundancy.txt
Normal file
@@ -0,0 +1,47 @@
|
||||
15100:/hatstand.*a?teakettle/sO
|
||||
15101:/foo(A|A)bar/O
|
||||
15102:/foo(1|2|3|4)bar/O
|
||||
15103:/^.*a+bc/
|
||||
15104:/foo[Aa]+a+bar/
|
||||
15105:/foo[Aa]+[aB]+bar/
|
||||
15106:/foo[AaB]+[aB]+bar/
|
||||
15107:/foo[Aa]*[aB]+bar/
|
||||
15108:/foo[AaB]*[aB]+bar/
|
||||
15109:/a(b|c.+)d+e/
|
||||
15110:/a(b|c.*)d+e/
|
||||
|
||||
# cases specifically aimed at the cyclic-dom redundancy code
|
||||
15120:/foo.*a*bar/O
|
||||
15121:/fooa*.*bar/O
|
||||
15122:/fooa?.*bar/O
|
||||
15123:/foo.*a?bar/O
|
||||
15124:/foo.*a+bar/O
|
||||
15125:/fooa+.*bar/O
|
||||
15126:/foo.+a*bar/O
|
||||
15127:/fooa*.+bar/O
|
||||
15128:/fooa?.+bar/O
|
||||
15129:/foo.+a?bar/O
|
||||
15130:/foo.+a+bar/O
|
||||
15131:/fooa+.+bar/O
|
||||
|
||||
# edge redun
|
||||
15132:/abc(a|\B){6}def/sO
|
||||
15133:/AAA([A-Z]_?)+(([\x00-\x3d]|[\x3f-\xff])\x00?)+ZZZ/s
|
||||
|
||||
# misc opt
|
||||
15134:/(Y.|X.+)a[^a]*foo/s
|
||||
15135:/oof[^a]*a(.+X|.Y)/s
|
||||
15136:/foo.*<[^<]*bar/s
|
||||
15137:/foo[^<]*<.*bar/s
|
||||
15138:/foo\b.*bar/s
|
||||
15139:/foo.*\bbar/s
|
||||
15140:/foo[^<]*<[^>]*bar/s
|
||||
15141:/foo[^<]*>[^>]*bar/s
|
||||
15142:/foo[^<=]*[<=][^>=]*bar/s
|
||||
15143:/foo[^<=]*[>=][^>=]*bar/s
|
||||
|
||||
# rev misc opt
|
||||
15144:/aa+.aaaa/si
|
||||
|
||||
# needs cyclic path redundancy followed by other redundancy passes.
|
||||
15145:/^abc.(.*|foo)\Sa..a...\S./s
|
533
tools/hscollider/test_cases/pcre/ucp.txt
Normal file
533
tools/hscollider/test_cases/pcre/ucp.txt
Normal file
@@ -0,0 +1,533 @@
|
||||
85000:/\p{C}/8W
|
||||
85001:/\p{Cc}/8W
|
||||
85002:/\p{Cf}/8W
|
||||
85003:/\p{Cn}/8W
|
||||
85004:/\p{Co}/8W
|
||||
85005:/\p{Cs}/8W
|
||||
85006:/\p{L}/8W
|
||||
85007:/\p{L&}/8W
|
||||
85008:/\p{Ll}/8W
|
||||
85009:/\p{Lm}/8W
|
||||
85010:/\p{Lo}/8W
|
||||
85011:/\p{Lt}/8W
|
||||
85012:/\p{Lu}/8W
|
||||
85013:/\p{M}/8W
|
||||
85014:/\p{Mc}/8W
|
||||
85015:/\p{Me}/8W
|
||||
85016:/\p{Mn}/8W
|
||||
85017:/\p{N}/8W
|
||||
85018:/\p{Nd}/8W
|
||||
85019:/\p{Nl}/8W
|
||||
85020:/\p{No}/8W
|
||||
85021:/\p{P}/8W
|
||||
85022:/\p{Pc}/8W
|
||||
85023:/\p{Pd}/8W
|
||||
85024:/\p{Pe}/8W
|
||||
85025:/\p{Pf}/8W
|
||||
85026:/\p{Pi}/8W
|
||||
85027:/\p{Po}/8W
|
||||
85028:/\p{Ps}/8W
|
||||
85029:/\p{S}/8W
|
||||
85030:/\p{Sc}/8W
|
||||
85031:/\p{Sk}/8W
|
||||
85032:/\p{Sm}/8W
|
||||
85033:/\p{So}/8W
|
||||
85034:/\p{Xan}/8W
|
||||
85035:/\p{Xps}/8W
|
||||
85036:/\p{Xsp}/8W
|
||||
85037:/\p{Xwd}/8W
|
||||
85038:/\p{Z}/8W
|
||||
85039:/\p{Zl}/8W
|
||||
85040:/\p{Zp}/8W
|
||||
85041:/\p{Zs}/8W
|
||||
85042:/\p{Arabic}/8W
|
||||
85043:/\p{Armenian}/8W
|
||||
85044:/\p{Avestan}/8W
|
||||
85045:/\p{Balinese}/8W
|
||||
85046:/\p{Bamum}/8W
|
||||
85047:/\p{Batak}/8W
|
||||
85048:/\p{Bengali}/8W
|
||||
85049:/\p{Bopomofo}/8W
|
||||
85050:/\p{Brahmi}/8W
|
||||
85051:/\p{Braille}/8W
|
||||
85052:/\p{Buginese}/8W
|
||||
85053:/\p{Buhid}/8W
|
||||
85054:/\p{Canadian_Aboriginal}/8W
|
||||
85055:/\p{Carian}/8W
|
||||
85056:/\p{Cham}/8W
|
||||
85057:/\p{Cherokee}/8W
|
||||
85058:/\p{Common}/8W
|
||||
85059:/\p{Coptic}/8W
|
||||
85060:/\p{Cuneiform}/8W
|
||||
85061:/\p{Cypriot}/8W
|
||||
85062:/\p{Cyrillic}/8W
|
||||
85063:/\p{Deseret}/8W
|
||||
85064:/\p{Devanagari}/8W
|
||||
85065:/\p{Egyptian_Hieroglyphs}/8W
|
||||
85066:/\p{Ethiopic}/8W
|
||||
85067:/\p{Georgian}/8W
|
||||
85068:/\p{Glagolitic}/8W
|
||||
85069:/\p{Gothic}/8W
|
||||
85070:/\p{Greek}/8W
|
||||
85071:/\p{Gujarati}/8W
|
||||
85072:/\p{Gurmukhi}/8W
|
||||
85073:/\p{Han}/8W
|
||||
85074:/\p{Hangul}/8W
|
||||
85075:/\p{Hanunoo}/8W
|
||||
85076:/\p{Hebrew}/8W
|
||||
85077:/\p{Hiragana}/8W
|
||||
85078:/\p{Imperial_Aramaic}/8W
|
||||
85079:/\p{Inherited}/8W
|
||||
85080:/\p{Inscriptional_Pahlavi}/8W
|
||||
85081:/\p{Inscriptional_Parthian}/8W
|
||||
85082:/\p{Javanese}/8W
|
||||
85083:/\p{Kaithi}/8W
|
||||
85084:/\p{Kannada}/8W
|
||||
85085:/\p{Katakana}/8W
|
||||
85086:/\p{Kayah_Li}/8W
|
||||
85087:/\p{Kharoshthi}/8W
|
||||
85088:/\p{Khmer}/8W
|
||||
85089:/\p{Lao}/8W
|
||||
85090:/\p{Latin}/8W
|
||||
85091:/\p{Lepcha}/8W
|
||||
85092:/\p{Limbu}/8W
|
||||
85093:/\p{Linear_B}/8W
|
||||
85094:/\p{Lisu}/8W
|
||||
85095:/\p{Lycian}/8W
|
||||
85096:/\p{Lydian}/8W
|
||||
85097:/\p{Malayalam}/8W
|
||||
85098:/\p{Mandaic}/8W
|
||||
85099:/\p{Meetei_Mayek}/8W
|
||||
85100:/\p{Mongolian}/8W
|
||||
85101:/\p{Myanmar}/8W
|
||||
85102:/\p{New_Tai_Lue}/8W
|
||||
85103:/\p{Nko}/8W
|
||||
85104:/\p{Ogham}/8W
|
||||
85105:/\p{Ol_Chiki}/8W
|
||||
85106:/\p{Old_Italic}/8W
|
||||
85107:/\p{Old_Persian}/8W
|
||||
85108:/\p{Old_South_Arabian}/8W
|
||||
85109:/\p{Old_Turkic}/8W
|
||||
85110:/\p{Oriya}/8W
|
||||
85111:/\p{Osmanya}/8W
|
||||
85112:/\p{Phags_Pa}/8W
|
||||
85113:/\p{Phoenician}/8W
|
||||
85114:/\p{Rejang}/8W
|
||||
85115:/\p{Runic}/8W
|
||||
85116:/\p{Samaritan}/8W
|
||||
85117:/\p{Saurashtra}/8W
|
||||
85118:/\p{Shavian}/8W
|
||||
85119:/\p{Sinhala}/8W
|
||||
85120:/\p{Sundanese}/8W
|
||||
85121:/\p{Syloti_Nagri}/8W
|
||||
85122:/\p{Syriac}/8W
|
||||
85123:/\p{Tagalog}/8W
|
||||
85124:/\p{Tagbanwa}/8W
|
||||
85125:/\p{Tai_Le}/8W
|
||||
85126:/\p{Tai_Tham}/8W
|
||||
85127:/\p{Tai_Viet}/8W
|
||||
85128:/\p{Tamil}/8W
|
||||
85129:/\p{Telugu}/8W
|
||||
85130:/\p{Thaana}/8W
|
||||
85131:/\p{Thai}/8W
|
||||
85132:/\p{Tibetan}/8W
|
||||
85133:/\p{Tifinagh}/8W
|
||||
85134:/\p{Ugaritic}/8W
|
||||
85135:/\p{Vai}/8W
|
||||
85136:/\p{Yi}/8W
|
||||
|
||||
85500:/\P{C}/8W
|
||||
85501:/\P{Cc}/8W
|
||||
85502:/\P{Cf}/8W
|
||||
85503:/\P{Cn}/8W
|
||||
85504:/\P{Co}/8W
|
||||
85505:/\P{Cs}/8W
|
||||
85506:/\P{L}/8W
|
||||
85507:/\P{L&}/8W
|
||||
85508:/\P{Ll}/8W
|
||||
85509:/\P{Lm}/8W
|
||||
85510:/\P{Lo}/8W
|
||||
85511:/\P{Lt}/8W
|
||||
85512:/\P{Lu}/8W
|
||||
85513:/\P{M}/8W
|
||||
85514:/\P{Mc}/8W
|
||||
85515:/\P{Me}/8W
|
||||
85516:/\P{Mn}/8W
|
||||
85517:/\P{N}/8W
|
||||
85518:/\P{Nd}/8W
|
||||
85519:/\P{Nl}/8W
|
||||
85520:/\P{No}/8W
|
||||
85521:/\P{P}/8W
|
||||
85522:/\P{Pc}/8W
|
||||
85523:/\P{Pd}/8W
|
||||
85524:/\P{Pe}/8W
|
||||
85525:/\P{Pf}/8W
|
||||
85526:/\P{Pi}/8W
|
||||
85527:/\P{Po}/8W
|
||||
85528:/\P{Ps}/8W
|
||||
85529:/\P{S}/8W
|
||||
85530:/\P{Sc}/8W
|
||||
85531:/\P{Sk}/8W
|
||||
85532:/\P{Sm}/8W
|
||||
85533:/\P{So}/8W
|
||||
85534:/\P{Xan}/8W
|
||||
85535:/\P{Xps}/8W
|
||||
85536:/\P{Xsp}/8W
|
||||
85537:/\P{Xwd}/8W
|
||||
85538:/\P{Z}/8W
|
||||
85539:/\P{Zl}/8W
|
||||
85540:/\P{Zp}/8W
|
||||
85541:/\P{Zs}/8W
|
||||
85542:/\P{Arabic}/8W
|
||||
85543:/\P{Armenian}/8W
|
||||
85544:/\P{Avestan}/8W
|
||||
85545:/\P{Balinese}/8W
|
||||
85546:/\P{Bamum}/8W
|
||||
85547:/\P{Batak}/8W
|
||||
85548:/\P{Bengali}/8W
|
||||
85549:/\P{Bopomofo}/8W
|
||||
85550:/\P{Brahmi}/8W
|
||||
85551:/\P{Braille}/8W
|
||||
85552:/\P{Buginese}/8W
|
||||
85553:/\P{Buhid}/8W
|
||||
85554:/\P{Canadian_Aboriginal}/8W
|
||||
85555:/\P{Carian}/8W
|
||||
85556:/\P{Cham}/8W
|
||||
85557:/\P{Cherokee}/8W
|
||||
85558:/\P{Common}/8W
|
||||
85559:/\P{Coptic}/8W
|
||||
85560:/\P{Cuneiform}/8W
|
||||
85561:/\P{Cypriot}/8W
|
||||
85562:/\P{Cyrillic}/8W
|
||||
85563:/\P{Deseret}/8W
|
||||
85564:/\P{Devanagari}/8W
|
||||
85565:/\P{Egyptian_Hieroglyphs}/8W
|
||||
85566:/\P{Ethiopic}/8W
|
||||
85567:/\P{Georgian}/8W
|
||||
85568:/\P{Glagolitic}/8W
|
||||
85569:/\P{Gothic}/8W
|
||||
85570:/\P{Greek}/8W
|
||||
85571:/\P{Gujarati}/8W
|
||||
85572:/\P{Gurmukhi}/8W
|
||||
85573:/\P{Han}/8W
|
||||
85574:/\P{Hangul}/8W
|
||||
85575:/\P{Hanunoo}/8W
|
||||
85576:/\P{Hebrew}/8W
|
||||
85577:/\P{Hiragana}/8W
|
||||
85578:/\P{Imperial_Aramaic}/8W
|
||||
85579:/\P{Inherited}/8W
|
||||
85580:/\P{Inscriptional_Pahlavi}/8W
|
||||
85581:/\P{Inscriptional_Parthian}/8W
|
||||
85582:/\P{Javanese}/8W
|
||||
85583:/\P{Kaithi}/8W
|
||||
85584:/\P{Kannada}/8W
|
||||
85585:/\P{Katakana}/8W
|
||||
85586:/\P{Kayah_Li}/8W
|
||||
85587:/\P{Kharoshthi}/8W
|
||||
85588:/\P{Khmer}/8W
|
||||
85589:/\P{Lao}/8W
|
||||
85590:/\P{Latin}/8W
|
||||
85591:/\P{Lepcha}/8W
|
||||
85592:/\P{Limbu}/8W
|
||||
85593:/\P{Linear_B}/8W
|
||||
85594:/\P{Lisu}/8W
|
||||
85595:/\P{Lycian}/8W
|
||||
85596:/\P{Lydian}/8W
|
||||
85597:/\P{Malayalam}/8W
|
||||
85598:/\P{Mandaic}/8W
|
||||
85599:/\P{Meetei_Mayek}/8W
|
||||
85600:/\P{Mongolian}/8W
|
||||
85601:/\P{Myanmar}/8W
|
||||
85602:/\P{New_Tai_Lue}/8W
|
||||
85603:/\P{Nko}/8W
|
||||
85604:/\P{Ogham}/8W
|
||||
85605:/\P{Ol_Chiki}/8W
|
||||
85606:/\P{Old_Italic}/8W
|
||||
85607:/\P{Old_Persian}/8W
|
||||
85608:/\P{Old_South_Arabian}/8W
|
||||
85609:/\P{Old_Turkic}/8W
|
||||
85610:/\P{Oriya}/8W
|
||||
85611:/\P{Osmanya}/8W
|
||||
85612:/\P{Phags_Pa}/8W
|
||||
85613:/\P{Phoenician}/8W
|
||||
85614:/\P{Rejang}/8W
|
||||
85615:/\P{Runic}/8W
|
||||
85616:/\P{Samaritan}/8W
|
||||
85617:/\P{Saurashtra}/8W
|
||||
85618:/\P{Shavian}/8W
|
||||
85619:/\P{Sinhala}/8W
|
||||
85620:/\P{Sundanese}/8W
|
||||
85621:/\P{Syloti_Nagri}/8W
|
||||
85622:/\P{Syriac}/8W
|
||||
85623:/\P{Tagalog}/8W
|
||||
85624:/\P{Tagbanwa}/8W
|
||||
85625:/\P{Tai_Le}/8W
|
||||
85626:/\P{Tai_Tham}/8W
|
||||
85627:/\P{Tai_Viet}/8W
|
||||
85628:/\P{Tamil}/8W
|
||||
85629:/\P{Telugu}/8W
|
||||
85630:/\P{Thaana}/8W
|
||||
85631:/\P{Thai}/8W
|
||||
85632:/\P{Tibetan}/8W
|
||||
85633:/\P{Tifinagh}/8W
|
||||
85634:/\P{Ugaritic}/8W
|
||||
85635:/\P{Vai}/8W
|
||||
85636:/\P{Yi}/8W
|
||||
|
||||
# Non-UTF-8 cases (the ones that can actually match)
|
||||
85200:/\p{C}/W
|
||||
85201:/\p{Cc}/W
|
||||
85202:/\p{Cf}/W
|
||||
85206:/\p{L}/W
|
||||
85207:/\p{L&}/W
|
||||
85208:/\p{Ll}/W
|
||||
85212:/\p{Lu}/W
|
||||
85217:/\p{N}/W
|
||||
85218:/\p{Nd}/W
|
||||
85220:/\p{No}/W
|
||||
85221:/\p{P}/W
|
||||
85222:/\p{Pc}/W
|
||||
85223:/\p{Pd}/W
|
||||
85224:/\p{Pe}/W
|
||||
85225:/\p{Pf}/W
|
||||
85226:/\p{Pi}/W
|
||||
85227:/\p{Po}/W
|
||||
85228:/\p{Ps}/W
|
||||
85229:/\p{S}/W
|
||||
85230:/\p{Sc}/W
|
||||
85231:/\p{Sk}/W
|
||||
85232:/\p{Sm}/W
|
||||
85233:/\p{So}/W
|
||||
85234:/\p{Xan}/W
|
||||
85235:/\p{Xps}/W
|
||||
85236:/\p{Xsp}/W
|
||||
85237:/\p{Xwd}/W
|
||||
85238:/\p{Z}/W
|
||||
85241:/\p{Zs}/W
|
||||
85258:/\p{Common}/W
|
||||
85290:/\p{Latin}/W
|
||||
|
||||
85700:/\P{C}/W
|
||||
85701:/\P{Cc}/W
|
||||
85702:/\P{Cf}/W
|
||||
85703:/\P{Cn}/W
|
||||
85704:/\P{Co}/W
|
||||
85705:/\P{Cs}/W
|
||||
85706:/\P{L}/W
|
||||
85707:/\P{L&}/W
|
||||
85708:/\P{Ll}/W
|
||||
85709:/\P{Lm}/W
|
||||
85710:/\P{Lo}/W
|
||||
85711:/\P{Lt}/W
|
||||
85712:/\P{Lu}/W
|
||||
85713:/\P{M}/W
|
||||
85714:/\P{Mc}/W
|
||||
85715:/\P{Me}/W
|
||||
85716:/\P{Mn}/W
|
||||
85717:/\P{N}/W
|
||||
85718:/\P{Nd}/W
|
||||
85719:/\P{Nl}/W
|
||||
85720:/\P{No}/W
|
||||
85721:/\P{P}/W
|
||||
85722:/\P{Pc}/W
|
||||
85723:/\P{Pd}/W
|
||||
85724:/\P{Pe}/W
|
||||
85725:/\P{Pf}/W
|
||||
85726:/\P{Pi}/W
|
||||
85727:/\P{Po}/W
|
||||
85728:/\P{Ps}/W
|
||||
85729:/\P{S}/W
|
||||
85730:/\P{Sc}/W
|
||||
85731:/\P{Sk}/W
|
||||
85732:/\P{Sm}/W
|
||||
85733:/\P{So}/W
|
||||
85734:/\P{Xan}/W
|
||||
85735:/\P{Xps}/W
|
||||
85736:/\P{Xsp}/W
|
||||
85737:/\P{Xwd}/W
|
||||
85738:/\P{Z}/W
|
||||
85739:/\P{Zl}/W
|
||||
85740:/\P{Zp}/W
|
||||
85741:/\P{Zs}/W
|
||||
85742:/\P{Arabic}/W
|
||||
85743:/\P{Armenian}/W
|
||||
85744:/\P{Avestan}/W
|
||||
85745:/\P{Balinese}/W
|
||||
85746:/\P{Bamum}/W
|
||||
85747:/\P{Batak}/W
|
||||
85748:/\P{Bengali}/W
|
||||
85749:/\P{Bopomofo}/W
|
||||
85750:/\P{Brahmi}/W
|
||||
85751:/\P{Braille}/W
|
||||
85752:/\P{Buginese}/W
|
||||
85753:/\P{Buhid}/W
|
||||
85754:/\P{Canadian_Aboriginal}/W
|
||||
85755:/\P{Carian}/W
|
||||
85756:/\P{Cham}/W
|
||||
85757:/\P{Cherokee}/W
|
||||
85758:/\P{Common}/W
|
||||
85759:/\P{Coptic}/W
|
||||
85760:/\P{Cuneiform}/W
|
||||
85761:/\P{Cypriot}/W
|
||||
85762:/\P{Cyrillic}/W
|
||||
85763:/\P{Deseret}/W
|
||||
85764:/\P{Devanagari}/W
|
||||
85765:/\P{Egyptian_Hieroglyphs}/W
|
||||
85766:/\P{Ethiopic}/W
|
||||
85767:/\P{Georgian}/W
|
||||
85768:/\P{Glagolitic}/W
|
||||
85769:/\P{Gothic}/W
|
||||
85770:/\P{Greek}/W
|
||||
85771:/\P{Gujarati}/W
|
||||
85772:/\P{Gurmukhi}/W
|
||||
85773:/\P{Han}/W
|
||||
85774:/\P{Hangul}/W
|
||||
85775:/\P{Hanunoo}/W
|
||||
85776:/\P{Hebrew}/W
|
||||
85777:/\P{Hiragana}/W
|
||||
85778:/\P{Imperial_Aramaic}/W
|
||||
85779:/\P{Inherited}/W
|
||||
85780:/\P{Inscriptional_Pahlavi}/W
|
||||
85781:/\P{Inscriptional_Parthian}/W
|
||||
85782:/\P{Javanese}/W
|
||||
85783:/\P{Kaithi}/W
|
||||
85784:/\P{Kannada}/W
|
||||
85785:/\P{Katakana}/W
|
||||
85786:/\P{Kayah_Li}/W
|
||||
85787:/\P{Kharoshthi}/W
|
||||
85788:/\P{Khmer}/W
|
||||
85789:/\P{Lao}/W
|
||||
85790:/\P{Latin}/W
|
||||
85791:/\P{Lepcha}/W
|
||||
85792:/\P{Limbu}/W
|
||||
85793:/\P{Linear_B}/W
|
||||
85794:/\P{Lisu}/W
|
||||
85795:/\P{Lycian}/W
|
||||
85796:/\P{Lydian}/W
|
||||
85797:/\P{Malayalam}/W
|
||||
85798:/\P{Mandaic}/W
|
||||
85799:/\P{Meetei_Mayek}/W
|
||||
85800:/\P{Mongolian}/W
|
||||
85801:/\P{Myanmar}/W
|
||||
85802:/\P{New_Tai_Lue}/W
|
||||
85803:/\P{Nko}/W
|
||||
85804:/\P{Ogham}/W
|
||||
85805:/\P{Ol_Chiki}/W
|
||||
85806:/\P{Old_Italic}/W
|
||||
85807:/\P{Old_Persian}/W
|
||||
85808:/\P{Old_South_Arabian}/W
|
||||
85809:/\P{Old_Turkic}/W
|
||||
85810:/\P{Oriya}/W
|
||||
85811:/\P{Osmanya}/W
|
||||
85812:/\P{Phags_Pa}/W
|
||||
85813:/\P{Phoenician}/W
|
||||
85814:/\P{Rejang}/W
|
||||
85815:/\P{Runic}/W
|
||||
85816:/\P{Samaritan}/W
|
||||
85817:/\P{Saurashtra}/W
|
||||
85818:/\P{Shavian}/W
|
||||
85819:/\P{Sinhala}/W
|
||||
85820:/\P{Sundanese}/W
|
||||
85821:/\P{Syloti_Nagri}/W
|
||||
85822:/\P{Syriac}/W
|
||||
85823:/\P{Tagalog}/W
|
||||
85824:/\P{Tagbanwa}/W
|
||||
85825:/\P{Tai_Le}/W
|
||||
85826:/\P{Tai_Tham}/W
|
||||
85827:/\P{Tai_Viet}/W
|
||||
85828:/\P{Tamil}/W
|
||||
85829:/\P{Telugu}/W
|
||||
85830:/\P{Thaana}/W
|
||||
85831:/\P{Thai}/W
|
||||
85832:/\P{Tibetan}/W
|
||||
85833:/\P{Tifinagh}/W
|
||||
85834:/\P{Ugaritic}/W
|
||||
85835:/\P{Vai}/W
|
||||
85836:/\P{Yi}/W
|
||||
|
||||
# Don't forget \p{Any}
|
||||
86000:/^\p{Any}/W
|
||||
86001:/^\p{Any}/8W
|
||||
|
||||
# Braceless variants for {C, L, M, N, P, S, Z}
|
||||
86010:/\pC/8W
|
||||
86011:/\pL/8W
|
||||
86012:/\pM/8W
|
||||
86013:/\pN/8W
|
||||
86014:/\pP/8W
|
||||
86015:/\pS/8W
|
||||
86016:/\pZ/8W
|
||||
|
||||
86020:/\PC/8W
|
||||
86021:/\PL/8W
|
||||
86022:/\PM/8W
|
||||
86023:/\PN/8W
|
||||
86024:/\PP/8W
|
||||
86025:/\PS/8W
|
||||
86026:/\PZ/8W
|
||||
|
||||
86030:/\pC/W
|
||||
86031:/\pL/W
|
||||
86033:/\pN/W
|
||||
86034:/\pP/W
|
||||
86035:/\pS/W
|
||||
86036:/\pZ/W
|
||||
|
||||
86040:/\PC/W
|
||||
86041:/\PL/W
|
||||
86042:/\PM/W
|
||||
86043:/\PN/W
|
||||
86044:/\PP/W
|
||||
86045:/\PS/W
|
||||
86046:/\PZ/W
|
||||
|
||||
# UCP properties are immune to the nocase flag -- they are always
|
||||
# case-sensitive.
|
||||
86060:/^case \p{Ll}/i
|
||||
86061:/^case \p{Ll}/i8W
|
||||
86062:/^case \p{Lu}/i
|
||||
86063:/^case \p{Lu}/i8W
|
||||
86064:/^case \P{Ll}/i
|
||||
86065:/^case \P{Ll}/i8W
|
||||
86066:/^case \P{Lu}/i
|
||||
86067:/^case \P{Lu}/i8W
|
||||
|
||||
# Some POSIX classes behave differently in UCP mode.
|
||||
86200:/^[[:alnum:]]/8W
|
||||
86201:/^[[:alpha:]]/8W
|
||||
86202:/^[[:blank:]]/8W
|
||||
86203:/^[[:digit:]]/8W
|
||||
86204:/^[[:lower:]]/8W
|
||||
86205:/^[[:space:]]/8W
|
||||
86206:/^[[:upper:]]/8W
|
||||
86207:/^[[:word:]]/8W
|
||||
86208:/^[[:graph:]]/8W
|
||||
86209:/^[[:print:]]/8W
|
||||
86210:/^[[:punct:]]/8W
|
||||
86211:/^[[:alnum:]]/W
|
||||
86212:/^[[:alpha:]]/W
|
||||
86213:/^[[:blank:]]/W
|
||||
86214:/^[[:digit:]]/W
|
||||
86215:/^[[:lower:]]/W
|
||||
86216:/^[[:space:]]/W
|
||||
86217:/^[[:upper:]]/W
|
||||
86218:/^[[:word:]]/W
|
||||
86219:/^[[:graph:]]/W
|
||||
86220:/^[[:print:]]/W
|
||||
86221:/^[[:punct:]]/W
|
||||
|
||||
# Negated variants
|
||||
86230:/^[[:^alnum]][[:^alpha]][[:^blank]][[:^digit]][[:^lower]][[:^space]][[:^upper]][[:^word]][[:^graph]][[:^print]][[:^punct]]/8W
|
||||
86231:/^[[:^alnum]][[:^alpha]][[:^blank]][[:^digit]][[:^lower]][[:^space]][[:^upper]][[:^word]][[:^graph]][[:^print]][[:^punct]]/W
|
||||
|
||||
# Classes where the POSIX class component disappears (fixed in PCRE 8.38)
|
||||
86240:/^a[[:punct:]bc]/W
|
||||
86241:/^a[[:punct:]bc]/8W
|
||||
|
||||
# Check [:print:] in UCP mode against U+180E, MONGOLIAN VOWEL
|
||||
# SEPARATOR.
|
||||
86242:/^[[:print:]]/8W
|
||||
86243:/^[^[:print:]]/8W
|
||||
|
||||
# PCRE bug fixed in PCRE 8.40.
|
||||
86244:/[\D\P{Nd}]+/8
|
221
tools/hscollider/test_cases/pcre/utf8.txt
Normal file
221
tools/hscollider/test_cases/pcre/utf8.txt
Normal file
@@ -0,0 +1,221 @@
|
||||
80000:/空/8
|
||||
80001:/حرية/8
|
||||
80002:/[空]/8
|
||||
80003:/\x{7a7a}/8
|
||||
80004:/[\x{7a7a}]/8
|
||||
80005:/[空]/
|
||||
80006:/^./s8
|
||||
80007:/^./8
|
||||
80008:/æ/8
|
||||
80009:/\346/8
|
||||
80010:/Ā/8
|
||||
80011:/\700/8
|
||||
80012:/[ἀ-ῼ]/8
|
||||
80013:/[\x{2e18}-⸮]/8
|
||||
80014:/^空{2}/8
|
||||
80015:/^空{2}/
|
||||
# 2 char all
|
||||
80016:/^[\x{80}-\x{7ff}]/8
|
||||
80017:/\x80/8
|
||||
80018:/\x{80}/8
|
||||
80019:/[\x80]/8
|
||||
80020:/[\x{80}]/8
|
||||
80021:/\xff/8
|
||||
80022:/\x{ff}/8
|
||||
80023:/[\xff]/8
|
||||
80024:/[\x{ff}]/8
|
||||
# 80025:/\X/
|
||||
80026:/foo\Xbar/8P
|
||||
80100:/foo.bar/8
|
||||
80101:/foo.*bar/s8
|
||||
80102:/foo[a]*bar/8
|
||||
80103:/foo[^a]*bar/8
|
||||
80104:/foo.+bar/s8
|
||||
80105:/foo[a]+bar/8
|
||||
80106:/foo[^a]_bar/8
|
||||
80107:/foo.*/s8
|
||||
80108:/foo.+/s8
|
||||
80109:/foo.*bar/8
|
||||
80110:/[]a]/8
|
||||
84000:/I/8i
|
||||
84001:/k/8i
|
||||
84002:/S/8i
|
||||
84003:/i/8i
|
||||
84004:/K/8i
|
||||
84005:/s/8i
|
||||
84006:/µ/8i
|
||||
84007:/Å/8i
|
||||
84008:/ß/8i
|
||||
84009:/å/8i
|
||||
84010:/İ/8i
|
||||
84011:/ı/8i
|
||||
84012:/ſ/8i
|
||||
84013:/ͅ/8i
|
||||
84014:/Β/8i
|
||||
84015:/Ε/8i
|
||||
84016:/Θ/8i
|
||||
84017:/Ι/8i
|
||||
84018:/Κ/8i
|
||||
84019:/Μ/8i
|
||||
84020:/Π/8i
|
||||
84021:/Ρ/8i
|
||||
84022:/Σ/8i
|
||||
84023:/Φ/8i
|
||||
84024:/Ω/8i
|
||||
84025:/β/8i
|
||||
84026:/ε/8i
|
||||
84027:/θ/8i
|
||||
84028:/ι/8i
|
||||
84029:/κ/8i
|
||||
84030:/μ/8i
|
||||
84031:/π/8i
|
||||
84032:/ρ/8i
|
||||
84033:/ς/8i
|
||||
84034:/σ/8i
|
||||
84035:/φ/8i
|
||||
84036:/ω/8i
|
||||
84037:/ϐ/8i
|
||||
84038:/ϑ/8i
|
||||
84039:/ϕ/8i
|
||||
84040:/ϖ/8i
|
||||
84041:/ϰ/8i
|
||||
84042:/ϱ/8i
|
||||
84043:/ϴ/8i
|
||||
84044:/ϵ/8i
|
||||
84045:/Ṡ/8i
|
||||
84046:/ṡ/8i
|
||||
84047:/ẛ/8i
|
||||
84048:/ẞ/8i
|
||||
84049:/ι/8i
|
||||
84050:/Ω/8i
|
||||
84051:/K/8i
|
||||
84052:/Å/8i
|
||||
84053:/SS/8i
|
||||
84054:/SZ/8i
|
||||
84055:/ss/8i
|
||||
84056:/sz/8i
|
||||
84057:/DŽ/8i
|
||||
84058:/Dž/8i
|
||||
84059:/dž/8i
|
||||
84060:/LJ/8i
|
||||
84061:/Lj/8i
|
||||
84062:/lj/8i
|
||||
84063:/NJ/8i
|
||||
84064:/Nj/8i
|
||||
84065:/nj/8i
|
||||
84066:/Dz/8i
|
||||
84067:/dz/8i
|
||||
88000:/\p{Vai}/8
|
||||
88001:/\p{^Vai}/8
|
||||
88002:/\P{Vai}/8
|
||||
88003:/\P{^Vai}/8
|
||||
#test for somethings which are in multiple scripts
|
||||
88004:/\p{Common}/8
|
||||
88005:/\p{Mongolian}/8
|
||||
88006:/\p{Phags_Pa}/8
|
||||
88007:/\P{Common}/8
|
||||
88008:/\P{Mongolian}/8
|
||||
88009:/\P{Phags_Pa}/8
|
||||
#unallocated and common with the pcre twistr
|
||||
88010:/\p{Cn}/8
|
||||
88011:/\P{Cn}/8
|
||||
88012:/\p{Common}/8
|
||||
88013:/\p{^Common}/8
|
||||
#'caseless' properties
|
||||
88014:/\p{Ll}/8i
|
||||
88015:/\p{Lu}/8i
|
||||
88016:/\p{Lt}/8i
|
||||
88017:/\p{Lo}/8i
|
||||
#although not letters, some elements have cased variants like COMBINING GREEK
|
||||
# YPOGEGRAMMENI
|
||||
88018:/\p{Mn}/8i
|
||||
#check Han extensions and private areas
|
||||
88019:/\p{Co}/8
|
||||
88020:/\p{Lo}/8
|
||||
88021:/\p{Han}/8
|
||||
88022:/\P{Co}/8
|
||||
88023:/\P{Lo}/8
|
||||
88024:/\P{Han}/8
|
||||
#ucp mode
|
||||
88100:/\d/8
|
||||
88101:/\d/8W
|
||||
88102:/[\w]/8
|
||||
88103:/[\w]/8W
|
||||
88104:/\s/8
|
||||
88105:/\s/8W
|
||||
88106:/\S/8
|
||||
88107:/\S/8W
|
||||
88108:/\s/
|
||||
88109:/\s/W
|
||||
88110:/[\w]/
|
||||
88111:/[\w]/W
|
||||
88112:/[[:alnum:]]/8
|
||||
88113:/[[:alnum:]]/8W
|
||||
88114:/[[:alpha:]]/8
|
||||
88115:/[[:alpha:]]/8W
|
||||
88116:/[[:blank:]]/8
|
||||
88117:/[[:blank:]]/8W
|
||||
88118:/[[:digit:]]/8
|
||||
88119:/[[:digit:]]/8W
|
||||
88120:/[[:lower:]]/8
|
||||
88121:/[[:lower:]]/8W
|
||||
88122:/[[:lower:]]/8i
|
||||
88123:/[[:lower:]]/8Wi
|
||||
88124:/[[:upper:]]/8
|
||||
88125:/[[:upper:]]/8W
|
||||
88126:/[[:upper:]]/8i
|
||||
88127:/[[:upper:]]/8Wi
|
||||
88128:/[[:space:]]/8
|
||||
88129:/[[:space:]]/8W
|
||||
88130:/[[:word:]]/8
|
||||
88131:/[[:word:]]/8W
|
||||
88132:/[[:^word:]]/8
|
||||
88133:/[[:^word:]]/8W
|
||||
88134:/[[:graph:]]/8
|
||||
88135:/[[:graph:]]/8W
|
||||
88136:/[[:cntrl:]]/8
|
||||
88137:/[[:cntrl:]]/8W
|
||||
88138:/\h/8
|
||||
88139:/\h/8W
|
||||
88140:/\H/8
|
||||
88141:/\H/8W
|
||||
88142:/\v/8
|
||||
88143:/\v/8W
|
||||
88144:/\V/8
|
||||
88145:/\V/8W
|
||||
#boundaries
|
||||
89000:/foo\B.*\bbar/8WP
|
||||
89001:/\b/8
|
||||
89002:/\B/8
|
||||
89003:/\bfoo/8
|
||||
89004:/\bfoo/8WP
|
||||
89005:/foo\b/8
|
||||
89006:/foo\b/8WP
|
||||
89007:/\Bfoo/8
|
||||
89008:/\Bfoo/8WP
|
||||
89009:/foo\B/8
|
||||
89010:/foo\B/8WP
|
||||
89011:/\bô/8
|
||||
89012:/\bô/8WP
|
||||
89013:/ô\b/8
|
||||
89014:/ô\b/8WP
|
||||
89015:/\b﹎/8
|
||||
89016:/\b﹎/8WP
|
||||
89017:/﹎\b/8
|
||||
89018:/﹎\b/8WP
|
||||
89019:/K/8
|
||||
|
||||
# (*VERB) syntax
|
||||
89100:/(*UTF8)\x{7a7a}/
|
||||
89101:/(*UTF8)(*UCP)\w/
|
||||
89102:/(*UCP)\w/8
|
||||
|
||||
# Octal encoding!
|
||||
89103:/\o{75172}/8
|
||||
|
||||
# 4byte utf8
|
||||
89104:/𒀀/8
|
||||
|
||||
# more (*VERB) syntax
|
||||
89105:/(*UCP)(*UTF8)\w/
|
||||
89106:/(*UTF)(*UCP)\w/
|
52
tools/hscollider/test_cases/pcre/vacuous.txt
Normal file
52
tools/hscollider/test_cases/pcre/vacuous.txt
Normal file
@@ -0,0 +1,52 @@
|
||||
# A selection of fine vacuous patterns (patterns that match the empty string).
|
||||
# Only true FIREHOSE patterns need the /V flag.
|
||||
|
||||
22000:/.*/V
|
||||
22001:/.?/V
|
||||
22002:/.{0,16}/V
|
||||
22003:/.*/sV
|
||||
22004:/.?/sV
|
||||
22005:/.{0,16}/sV
|
||||
22006:/(foo|)/V
|
||||
22007:/(foo|.?)/sV
|
||||
22008:/(foo|.*)/sV
|
||||
22009:/(foo|.{0,16})/sV
|
||||
22010:/^.*/
|
||||
22011:/^.?/
|
||||
22012:/^.{0,16}/
|
||||
22013:/^.*/s
|
||||
22014:/^.?/s
|
||||
22015:/^.{0,16}/s
|
||||
22016:/^(foo|)/
|
||||
22017:/^(foo|.?)/s
|
||||
22018:/^(foo|.*)/s
|
||||
22019:/^(foo|.{0,16})/s
|
||||
|
||||
22020:/^$/
|
||||
22021:/^\z/
|
||||
|
||||
22022:/(foo|$)/
|
||||
22023:/(foo|\z)/
|
||||
22024:/(.?|$)/sV
|
||||
22025:/(.?|\z)/sV
|
||||
22026:/(^|\z)/
|
||||
|
||||
# Tricky mix of vacuous and multiline
|
||||
22027:/^a?/m
|
||||
22028:/^$/m
|
||||
|
||||
# We handle /./s as a firehose with a min start offset of zero, now.
|
||||
22029:/./s
|
||||
|
||||
# For completeness, some other cases (especially multiline)
|
||||
22030:/^/m
|
||||
22031:/$/m
|
||||
22032:/\z/m
|
||||
22033:/\Z/m
|
||||
22034:/^\z/m
|
||||
22035:/^./sm
|
||||
22036:/^.?/sm
|
||||
22037:/(foo|$)/m
|
||||
|
||||
# Messy cases
|
||||
22038:/((?:d|C|B|D|B*|.)){1,4}/VP
|
Reference in New Issue
Block a user