diff --git a/tools/fuzz/aristocrats.py b/tools/fuzz/aristocrats.py new file mode 100755 index 00000000..7b6ff2bf --- /dev/null +++ b/tools/fuzz/aristocrats.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python + +from random import choice,randint +from optparse import OptionParser + +def generateRandomOptions(): + if options.hybrid: + allflags = "smiH8W" + else: + # Maintain an ordering for consistency. + allflags = "smiHV8WLP" + flags = "" + for f in allflags: + flags += choice(['', f]) + return flags + +parser = OptionParser() +parser.add_option("-d", "--depth", + action="store", type="int", dest="depth", default=200, + help="Depth of generation (akin to maximum length)") +parser.add_option("-c", "--count", + action="store", type="int", dest="count", default=1000, + help="Number of expressions to generate") +parser.add_option("-f", "--full", + action="store_true", dest="full", default=False, + help="Use a full character set including unprintables") +parser.add_option("-H", "--hybrid", + action="store_true", dest="hybrid", + help="Generate random flags for hybrid mode") + +(options, args) = parser.parse_args() +if len(args) != 0: + parser.error("incorrect number of arguments") + +if (options.full): + crange = range(0,256) + crange.remove(ord('\n')) +else: + crange = range(32, 127) + +for i in xrange(0, options.count): + len = randint(1, options.depth) + s = [ chr(choice(crange)) for x in xrange(len) ] + line = str(i) + ":/" + "".join(s) + "/" + generateRandomOptions() + print line diff --git a/tools/fuzz/completocrats.py b/tools/fuzz/completocrats.py new file mode 100755 index 00000000..60ac4d7e --- /dev/null +++ b/tools/fuzz/completocrats.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python + +from itertools import * +from optparse import OptionParser + +LIMITED_ALPHABET = "abc[](){}*?+^$|:=.\\-" + +parser = OptionParser() +parser.add_option("-d", "--depth", + action="store", type="int", dest="depth", default=200, + help="Depth of generation (akin to maximum length)") + +parser.add_option("-f", "--full", + action="store_true", dest="full", default=False, + help="Use a full character set including unprintables") + +parser.add_option("-l", "--limited", + action="store_true", dest="limited", default=False, + help="Use a very limited character set: just " + LIMITED_ALPHABET) + +(options, args) = parser.parse_args() +if len(args) != 0: + parser.error("incorrect number of arguments") + +if (options.full): + crange = range(0,256) + crange.remove(ord('\n')) +elif (options.limited): + crange = [ ord(c) for c in LIMITED_ALPHABET ] +else: + crange = range(32, 127) + +srange = [ chr(c) for c in crange ] + +i = 0 +for x in product(srange, repeat = options.depth): + line = str(i) + ":/" + "".join(x) + "/" + print line + i += 1 diff --git a/tools/fuzz/heuristocrats.py b/tools/fuzz/heuristocrats.py new file mode 100755 index 00000000..49c7acb4 --- /dev/null +++ b/tools/fuzz/heuristocrats.py @@ -0,0 +1,259 @@ +#!/usr/bin/env python + +from optparse import OptionParser +from random import * +import string +import sys + +# return a random non-degenerate (ie not [10]) partition of nChildren +def chooseLeafWidth(nChildren): + width = randint(1, 5) + width = min(width, nChildren-1) + s = sample(range(1, nChildren), width) + s.sort() + s = [0] + s + [nChildren] + v = [ s[i+1] - s[i] for i in range(0, len(s)-1) if s[i+1] != s[i] ] + return v + +def generateConcat(nChildren, atTopIgnored): + v = [ generateRE(w, atTop = False) for w in chooseLeafWidth(nChildren) ] + v = [ r for r in v if r != '' ] + return string.join(v, "") + +def makeGroup(s): + # Parenthesise either in normal parens or a non-capturing group. + if randint(0, 1) == 0: + return "(" + s + ")" + else: + return "(?:" + s + ")" + +def generateAlt(nChildren, atTop): + v = [ generateRE(w, [generateAlt], atTop) for w in chooseLeafWidth(nChildren) ] + v = [ r for r in v if r != '' ] + s = string.join(v, "|") + if len(v) == 1: + return s + else: + return makeGroup(s) + +def generateQuant(nChildren, atTopIgnored): + lo = int(round(expovariate(0.2))) + hi = lo + int(round(expovariate(0.2))) + q = choice(["*", "?", "+", "{%d}"%lo, "{%d,}"%lo, "{%d,%d}"%(lo,hi)]) + r = generateRE(nChildren, [generateQuant], atTop = False) + if (len(r) == 1) or (r[0] != '(' and r[-1] != ")"): + return r + q + else: + return makeGroup(r) + q + +def generateChar(nChildren, atTop = False): + return chr(choice(alphabet)) + +def generateNocaseChar(nChildren, atTop = False): + 'Either generate an uppercase char from the alphabet or a nocase class [Aa]' + c = generateChar(nChildren, atTop) + if random() < 0.5: + return c.upper() + else: + return '[' + c.upper() + c.lower() + ']' + +def generateDot(nChildren, atTop = False): + return "." + +def generateBoundary(nChildren, atTop = False): + # \b, \B in parens so that we can repeat them and still be accepted by + # libpcre + return makeGroup('\\' + choice('bB')) + +def generateCharClass(nChildren, atTop = False): + s = "" + if random() < 0.2: + s = "^" + nChars = randint(1,4) + else: + nChars = randint(2,4) + + for i in xrange(nChars): + s += generateChar(1) + return "[" + s + "]" + +def generateOptionsFlags(nChildren, atTop = False): + allflags = "smix" + pos_flags = sample(allflags, randint(1, len(allflags))) + neg_flags = sample(allflags, randint(1, len(allflags))) + s = '(?' + ''.join(pos_flags) + '-' + ''.join(neg_flags) + ')' + return s + +def generateLogicalId(nChildren, atTop = False): + return str(randint(0, options.count)) + +def makeLogicalGroup(s): + return "(" + s + ")" + +def generateLogicalNot(nChildren, atTop): + r = generateCombination(nChildren, [generateLogicalNot], atTop = False) + return "!" + makeLogicalGroup(r) + +def generateLogicalAnd(nChildren, atTop): + v = [ generateCombination(w, [generateLogicalAnd], atTop = False) for w in chooseLeafWidth(nChildren) ] + v = [ r for r in v if r != '' ] + s = string.join(v, "&") + if len(v) == 1: + return s + else: + return makeLogicalGroup(s) + +def generateLogicalOr(nChildren, atTop): + v = [ generateCombination(w, [generateLogicalOr], atTop = False) for w in chooseLeafWidth(nChildren) ] + v = [ r for r in v if r != '' ] + s = string.join(v, "|") + if len(v) == 1: + return s + else: + return makeLogicalGroup(s) + +weightsTree = [ + (generateConcat, 10), + (generateAlt, 3), + (generateQuant, 2), + ] + +weightsLeaf = [ + (generateChar, 30), + (generateCharClass, 5), + (generateDot, 5), + (generateNocaseChar, 2), + (generateBoundary, 1), + (generateOptionsFlags, 1) + ] + +weightsLogicalTree = [ + (generateLogicalNot, 1), + (generateLogicalAnd, 5), + (generateLogicalOr, 5), + ] + +weightsLogicalLeaf = [ + (generateLogicalId, 1), + ] + +def genChoices(weighted): + r = [] + for (f, w) in weighted: + r = r + [f] * w + return r + +choicesTree = genChoices(weightsTree) +choicesLeaf = genChoices(weightsLeaf) +choicesLogicalTree = genChoices(weightsLogicalTree) +choicesLogicalLeaf = genChoices(weightsLogicalLeaf) + +weightsAnchor = [ + ("\\A%s\\Z", 1), + ("\\A%s\\z", 1), + ("\\A%s", 4), + ("%s\\Z", 2), + ("%s\\z", 2), + ("^%s$", 1), + ("^%s", 4), + ("%s$", 2), + ("%s", 25) + ] +choicesAnchor = genChoices(weightsAnchor) + +def generateRE(nChildren, suppressList = [], atTop = False): + if atTop: + anchorSubstituteString = choice(choicesAnchor) + else: + anchorSubstituteString = "%s" + + nChildren -= 1 + if nChildren == 0: + res = choice(choicesLeaf)(nChildren, atTop) + else: + c = [ ch for ch in choicesTree if ch not in suppressList ] + res = choice(c)(nChildren, atTop) + + return anchorSubstituteString % res + +def generateCombination(nChildren, suppressList = [], atTop = False): + nChildren -= 1 + if nChildren == 0: + res = choice(choicesLogicalLeaf)(nChildren, atTop) + else: + c = [ ch for ch in choicesLogicalTree if ch not in suppressList ] + res = choice(c)(nChildren, atTop) + + return res + +def generateRandomOptions(): + if options.hybrid: + allflags = "smiH8W" + else: + # Maintain an ordering for consistency. + allflags = "smiHV8WLP" + flags = "" + for f in allflags: + flags += choice(['', f]) + if options.logical: + flags += choice(['', 'Q']) + return flags + +def generateRandomExtParam(depth, extparam): + if not extparam: + return "" + params = [] + if choice((False, True)): + params.append("min_length=%u" % randint(1, depth)) + if choice((False, True)): + params.append("min_offset=%u" % randint(1, depth)) + if choice((False, True)): + params.append("max_offset=%u" % randint(1, depth*3)) + if choice((False, True)): + dist = randint(1, 3) + if choice((False, True)): + params.append("edit_distance=%u" % dist) + else: + params.append("hamming_distance=%u" % dist) + if params: + return "{" + ",".join(params) + "}" + else: + return "" + +parser = OptionParser() +parser.add_option("-d", "--depth", + action="store", type="int", dest="depth", default=200, + help="Depth of generation (akin to maximum length)") +parser.add_option("-c", "--count", + action="store", type="int", dest="count", default=1000, + help="Number of expressions to generate") +parser.add_option("-a", "--alphabet", + action="store", type="int", dest="alphabet", default=26, + help="Size of alphabet to generate character expressions over (starting with lowercase 'a')") +parser.add_option("-i", "--nocase", + action="store_true", dest="nocase", + help="Use a caseless alphabet for character generation") +parser.add_option("-x", "--extparam", + action="store_true", dest="extparam", + help="Generate random extended parameters") +parser.add_option("-l", "--logical", + action="store_true", dest="logical", + help="Generate logical combination expressions") +parser.add_option("-H", "--hybrid", + action="store_true", dest="hybrid", + help="Generate random flags for hybrid mode") + +(options, args) = parser.parse_args() +if len(args) != 0: + parser.error("incorrect number of arguments") + +alphabet = range(ord('a'), ord('a') + options.alphabet) +if options.nocase: + alphabet += range(ord('A'), ord('A') + options.alphabet) + +for i in xrange(0, options.count): + print "%08d:/%s/%s%s" % (i, generateRE(randint(1, options.depth), atTop = True), generateRandomOptions(), generateRandomExtParam(options.depth, options.extparam)) + +if options.logical: + for i in xrange(options.count, options.count + 3000): + print "%08d:/%s/C" % (i, generateCombination(randint(1, options.depth), atTop = True)) diff --git a/tools/fuzz/limited_dict.txt b/tools/fuzz/limited_dict.txt new file mode 100644 index 00000000..7c3daf4b --- /dev/null +++ b/tools/fuzz/limited_dict.txt @@ -0,0 +1,9 @@ +hatstand +teakettle +badgerbrush +mnemosyne +rapscallion +acerbic +blackhat +rufous +echolalia