mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
tools/fuzz: add test scripts for synthetic pattern generation.
This commit is contained in:
parent
4d33736a5c
commit
f1db5b71d8
45
tools/fuzz/aristocrats.py
Executable file
45
tools/fuzz/aristocrats.py
Executable file
@ -0,0 +1,45 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from random import choice,randint
|
||||
from optparse import OptionParser
|
||||
|
||||
def generateRandomOptions():
|
||||
if options.hybrid:
|
||||
allflags = "smiH8W"
|
||||
else:
|
||||
# Maintain an ordering for consistency.
|
||||
allflags = "smiHV8WLP"
|
||||
flags = ""
|
||||
for f in allflags:
|
||||
flags += choice(['', f])
|
||||
return flags
|
||||
|
||||
parser = OptionParser()
|
||||
parser.add_option("-d", "--depth",
|
||||
action="store", type="int", dest="depth", default=200,
|
||||
help="Depth of generation (akin to maximum length)")
|
||||
parser.add_option("-c", "--count",
|
||||
action="store", type="int", dest="count", default=1000,
|
||||
help="Number of expressions to generate")
|
||||
parser.add_option("-f", "--full",
|
||||
action="store_true", dest="full", default=False,
|
||||
help="Use a full character set including unprintables")
|
||||
parser.add_option("-H", "--hybrid",
|
||||
action="store_true", dest="hybrid",
|
||||
help="Generate random flags for hybrid mode")
|
||||
|
||||
(options, args) = parser.parse_args()
|
||||
if len(args) != 0:
|
||||
parser.error("incorrect number of arguments")
|
||||
|
||||
if (options.full):
|
||||
crange = range(0,256)
|
||||
crange.remove(ord('\n'))
|
||||
else:
|
||||
crange = range(32, 127)
|
||||
|
||||
for i in xrange(0, options.count):
|
||||
len = randint(1, options.depth)
|
||||
s = [ chr(choice(crange)) for x in xrange(len) ]
|
||||
line = str(i) + ":/" + "".join(s) + "/" + generateRandomOptions()
|
||||
print line
|
39
tools/fuzz/completocrats.py
Executable file
39
tools/fuzz/completocrats.py
Executable file
@ -0,0 +1,39 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from itertools import *
|
||||
from optparse import OptionParser
|
||||
|
||||
LIMITED_ALPHABET = "abc[](){}*?+^$|:=.\\-"
|
||||
|
||||
parser = OptionParser()
|
||||
parser.add_option("-d", "--depth",
|
||||
action="store", type="int", dest="depth", default=200,
|
||||
help="Depth of generation (akin to maximum length)")
|
||||
|
||||
parser.add_option("-f", "--full",
|
||||
action="store_true", dest="full", default=False,
|
||||
help="Use a full character set including unprintables")
|
||||
|
||||
parser.add_option("-l", "--limited",
|
||||
action="store_true", dest="limited", default=False,
|
||||
help="Use a very limited character set: just " + LIMITED_ALPHABET)
|
||||
|
||||
(options, args) = parser.parse_args()
|
||||
if len(args) != 0:
|
||||
parser.error("incorrect number of arguments")
|
||||
|
||||
if (options.full):
|
||||
crange = range(0,256)
|
||||
crange.remove(ord('\n'))
|
||||
elif (options.limited):
|
||||
crange = [ ord(c) for c in LIMITED_ALPHABET ]
|
||||
else:
|
||||
crange = range(32, 127)
|
||||
|
||||
srange = [ chr(c) for c in crange ]
|
||||
|
||||
i = 0
|
||||
for x in product(srange, repeat = options.depth):
|
||||
line = str(i) + ":/" + "".join(x) + "/"
|
||||
print line
|
||||
i += 1
|
259
tools/fuzz/heuristocrats.py
Executable file
259
tools/fuzz/heuristocrats.py
Executable file
@ -0,0 +1,259 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from optparse import OptionParser
|
||||
from random import *
|
||||
import string
|
||||
import sys
|
||||
|
||||
# return a random non-degenerate (ie not [10]) partition of nChildren
|
||||
def chooseLeafWidth(nChildren):
|
||||
width = randint(1, 5)
|
||||
width = min(width, nChildren-1)
|
||||
s = sample(range(1, nChildren), width)
|
||||
s.sort()
|
||||
s = [0] + s + [nChildren]
|
||||
v = [ s[i+1] - s[i] for i in range(0, len(s)-1) if s[i+1] != s[i] ]
|
||||
return v
|
||||
|
||||
def generateConcat(nChildren, atTopIgnored):
|
||||
v = [ generateRE(w, atTop = False) for w in chooseLeafWidth(nChildren) ]
|
||||
v = [ r for r in v if r != '' ]
|
||||
return string.join(v, "")
|
||||
|
||||
def makeGroup(s):
|
||||
# Parenthesise either in normal parens or a non-capturing group.
|
||||
if randint(0, 1) == 0:
|
||||
return "(" + s + ")"
|
||||
else:
|
||||
return "(?:" + s + ")"
|
||||
|
||||
def generateAlt(nChildren, atTop):
|
||||
v = [ generateRE(w, [generateAlt], atTop) for w in chooseLeafWidth(nChildren) ]
|
||||
v = [ r for r in v if r != '' ]
|
||||
s = string.join(v, "|")
|
||||
if len(v) == 1:
|
||||
return s
|
||||
else:
|
||||
return makeGroup(s)
|
||||
|
||||
def generateQuant(nChildren, atTopIgnored):
|
||||
lo = int(round(expovariate(0.2)))
|
||||
hi = lo + int(round(expovariate(0.2)))
|
||||
q = choice(["*", "?", "+", "{%d}"%lo, "{%d,}"%lo, "{%d,%d}"%(lo,hi)])
|
||||
r = generateRE(nChildren, [generateQuant], atTop = False)
|
||||
if (len(r) == 1) or (r[0] != '(' and r[-1] != ")"):
|
||||
return r + q
|
||||
else:
|
||||
return makeGroup(r) + q
|
||||
|
||||
def generateChar(nChildren, atTop = False):
|
||||
return chr(choice(alphabet))
|
||||
|
||||
def generateNocaseChar(nChildren, atTop = False):
|
||||
'Either generate an uppercase char from the alphabet or a nocase class [Aa]'
|
||||
c = generateChar(nChildren, atTop)
|
||||
if random() < 0.5:
|
||||
return c.upper()
|
||||
else:
|
||||
return '[' + c.upper() + c.lower() + ']'
|
||||
|
||||
def generateDot(nChildren, atTop = False):
|
||||
return "."
|
||||
|
||||
def generateBoundary(nChildren, atTop = False):
|
||||
# \b, \B in parens so that we can repeat them and still be accepted by
|
||||
# libpcre
|
||||
return makeGroup('\\' + choice('bB'))
|
||||
|
||||
def generateCharClass(nChildren, atTop = False):
|
||||
s = ""
|
||||
if random() < 0.2:
|
||||
s = "^"
|
||||
nChars = randint(1,4)
|
||||
else:
|
||||
nChars = randint(2,4)
|
||||
|
||||
for i in xrange(nChars):
|
||||
s += generateChar(1)
|
||||
return "[" + s + "]"
|
||||
|
||||
def generateOptionsFlags(nChildren, atTop = False):
|
||||
allflags = "smix"
|
||||
pos_flags = sample(allflags, randint(1, len(allflags)))
|
||||
neg_flags = sample(allflags, randint(1, len(allflags)))
|
||||
s = '(?' + ''.join(pos_flags) + '-' + ''.join(neg_flags) + ')'
|
||||
return s
|
||||
|
||||
def generateLogicalId(nChildren, atTop = False):
|
||||
return str(randint(0, options.count))
|
||||
|
||||
def makeLogicalGroup(s):
|
||||
return "(" + s + ")"
|
||||
|
||||
def generateLogicalNot(nChildren, atTop):
|
||||
r = generateCombination(nChildren, [generateLogicalNot], atTop = False)
|
||||
return "!" + makeLogicalGroup(r)
|
||||
|
||||
def generateLogicalAnd(nChildren, atTop):
|
||||
v = [ generateCombination(w, [generateLogicalAnd], atTop = False) for w in chooseLeafWidth(nChildren) ]
|
||||
v = [ r for r in v if r != '' ]
|
||||
s = string.join(v, "&")
|
||||
if len(v) == 1:
|
||||
return s
|
||||
else:
|
||||
return makeLogicalGroup(s)
|
||||
|
||||
def generateLogicalOr(nChildren, atTop):
|
||||
v = [ generateCombination(w, [generateLogicalOr], atTop = False) for w in chooseLeafWidth(nChildren) ]
|
||||
v = [ r for r in v if r != '' ]
|
||||
s = string.join(v, "|")
|
||||
if len(v) == 1:
|
||||
return s
|
||||
else:
|
||||
return makeLogicalGroup(s)
|
||||
|
||||
weightsTree = [
|
||||
(generateConcat, 10),
|
||||
(generateAlt, 3),
|
||||
(generateQuant, 2),
|
||||
]
|
||||
|
||||
weightsLeaf = [
|
||||
(generateChar, 30),
|
||||
(generateCharClass, 5),
|
||||
(generateDot, 5),
|
||||
(generateNocaseChar, 2),
|
||||
(generateBoundary, 1),
|
||||
(generateOptionsFlags, 1)
|
||||
]
|
||||
|
||||
weightsLogicalTree = [
|
||||
(generateLogicalNot, 1),
|
||||
(generateLogicalAnd, 5),
|
||||
(generateLogicalOr, 5),
|
||||
]
|
||||
|
||||
weightsLogicalLeaf = [
|
||||
(generateLogicalId, 1),
|
||||
]
|
||||
|
||||
def genChoices(weighted):
|
||||
r = []
|
||||
for (f, w) in weighted:
|
||||
r = r + [f] * w
|
||||
return r
|
||||
|
||||
choicesTree = genChoices(weightsTree)
|
||||
choicesLeaf = genChoices(weightsLeaf)
|
||||
choicesLogicalTree = genChoices(weightsLogicalTree)
|
||||
choicesLogicalLeaf = genChoices(weightsLogicalLeaf)
|
||||
|
||||
weightsAnchor = [
|
||||
("\\A%s\\Z", 1),
|
||||
("\\A%s\\z", 1),
|
||||
("\\A%s", 4),
|
||||
("%s\\Z", 2),
|
||||
("%s\\z", 2),
|
||||
("^%s$", 1),
|
||||
("^%s", 4),
|
||||
("%s$", 2),
|
||||
("%s", 25)
|
||||
]
|
||||
choicesAnchor = genChoices(weightsAnchor)
|
||||
|
||||
def generateRE(nChildren, suppressList = [], atTop = False):
|
||||
if atTop:
|
||||
anchorSubstituteString = choice(choicesAnchor)
|
||||
else:
|
||||
anchorSubstituteString = "%s"
|
||||
|
||||
nChildren -= 1
|
||||
if nChildren == 0:
|
||||
res = choice(choicesLeaf)(nChildren, atTop)
|
||||
else:
|
||||
c = [ ch for ch in choicesTree if ch not in suppressList ]
|
||||
res = choice(c)(nChildren, atTop)
|
||||
|
||||
return anchorSubstituteString % res
|
||||
|
||||
def generateCombination(nChildren, suppressList = [], atTop = False):
|
||||
nChildren -= 1
|
||||
if nChildren == 0:
|
||||
res = choice(choicesLogicalLeaf)(nChildren, atTop)
|
||||
else:
|
||||
c = [ ch for ch in choicesLogicalTree if ch not in suppressList ]
|
||||
res = choice(c)(nChildren, atTop)
|
||||
|
||||
return res
|
||||
|
||||
def generateRandomOptions():
|
||||
if options.hybrid:
|
||||
allflags = "smiH8W"
|
||||
else:
|
||||
# Maintain an ordering for consistency.
|
||||
allflags = "smiHV8WLP"
|
||||
flags = ""
|
||||
for f in allflags:
|
||||
flags += choice(['', f])
|
||||
if options.logical:
|
||||
flags += choice(['', 'Q'])
|
||||
return flags
|
||||
|
||||
def generateRandomExtParam(depth, extparam):
|
||||
if not extparam:
|
||||
return ""
|
||||
params = []
|
||||
if choice((False, True)):
|
||||
params.append("min_length=%u" % randint(1, depth))
|
||||
if choice((False, True)):
|
||||
params.append("min_offset=%u" % randint(1, depth))
|
||||
if choice((False, True)):
|
||||
params.append("max_offset=%u" % randint(1, depth*3))
|
||||
if choice((False, True)):
|
||||
dist = randint(1, 3)
|
||||
if choice((False, True)):
|
||||
params.append("edit_distance=%u" % dist)
|
||||
else:
|
||||
params.append("hamming_distance=%u" % dist)
|
||||
if params:
|
||||
return "{" + ",".join(params) + "}"
|
||||
else:
|
||||
return ""
|
||||
|
||||
parser = OptionParser()
|
||||
parser.add_option("-d", "--depth",
|
||||
action="store", type="int", dest="depth", default=200,
|
||||
help="Depth of generation (akin to maximum length)")
|
||||
parser.add_option("-c", "--count",
|
||||
action="store", type="int", dest="count", default=1000,
|
||||
help="Number of expressions to generate")
|
||||
parser.add_option("-a", "--alphabet",
|
||||
action="store", type="int", dest="alphabet", default=26,
|
||||
help="Size of alphabet to generate character expressions over (starting with lowercase 'a')")
|
||||
parser.add_option("-i", "--nocase",
|
||||
action="store_true", dest="nocase",
|
||||
help="Use a caseless alphabet for character generation")
|
||||
parser.add_option("-x", "--extparam",
|
||||
action="store_true", dest="extparam",
|
||||
help="Generate random extended parameters")
|
||||
parser.add_option("-l", "--logical",
|
||||
action="store_true", dest="logical",
|
||||
help="Generate logical combination expressions")
|
||||
parser.add_option("-H", "--hybrid",
|
||||
action="store_true", dest="hybrid",
|
||||
help="Generate random flags for hybrid mode")
|
||||
|
||||
(options, args) = parser.parse_args()
|
||||
if len(args) != 0:
|
||||
parser.error("incorrect number of arguments")
|
||||
|
||||
alphabet = range(ord('a'), ord('a') + options.alphabet)
|
||||
if options.nocase:
|
||||
alphabet += range(ord('A'), ord('A') + options.alphabet)
|
||||
|
||||
for i in xrange(0, options.count):
|
||||
print "%08d:/%s/%s%s" % (i, generateRE(randint(1, options.depth), atTop = True), generateRandomOptions(), generateRandomExtParam(options.depth, options.extparam))
|
||||
|
||||
if options.logical:
|
||||
for i in xrange(options.count, options.count + 3000):
|
||||
print "%08d:/%s/C" % (i, generateCombination(randint(1, options.depth), atTop = True))
|
9
tools/fuzz/limited_dict.txt
Normal file
9
tools/fuzz/limited_dict.txt
Normal file
@ -0,0 +1,9 @@
|
||||
hatstand
|
||||
teakettle
|
||||
badgerbrush
|
||||
mnemosyne
|
||||
rapscallion
|
||||
acerbic
|
||||
blackhat
|
||||
rufous
|
||||
echolalia
|
Loading…
x
Reference in New Issue
Block a user