mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
260 lines
7.8 KiB
Python
Executable File
260 lines
7.8 KiB
Python
Executable File
#!/usr/bin/env python
|
|
|
|
from optparse import OptionParser
|
|
from random import *
|
|
import string
|
|
import sys
|
|
|
|
# return a random non-degenerate (ie not [10]) partition of nChildren
|
|
def chooseLeafWidth(nChildren):
|
|
width = randint(1, 5)
|
|
width = min(width, nChildren-1)
|
|
s = sample(range(1, nChildren), width)
|
|
s.sort()
|
|
s = [0] + s + [nChildren]
|
|
v = [ s[i+1] - s[i] for i in range(0, len(s)-1) if s[i+1] != s[i] ]
|
|
return v
|
|
|
|
def generateConcat(nChildren, atTopIgnored):
|
|
v = [ generateRE(w, atTop = False) for w in chooseLeafWidth(nChildren) ]
|
|
v = [ r for r in v if r != '' ]
|
|
return string.join(v, "")
|
|
|
|
def makeGroup(s):
|
|
# Parenthesise either in normal parens or a non-capturing group.
|
|
if randint(0, 1) == 0:
|
|
return "(" + s + ")"
|
|
else:
|
|
return "(?:" + s + ")"
|
|
|
|
def generateAlt(nChildren, atTop):
|
|
v = [ generateRE(w, [generateAlt], atTop) for w in chooseLeafWidth(nChildren) ]
|
|
v = [ r for r in v if r != '' ]
|
|
s = string.join(v, "|")
|
|
if len(v) == 1:
|
|
return s
|
|
else:
|
|
return makeGroup(s)
|
|
|
|
def generateQuant(nChildren, atTopIgnored):
|
|
lo = int(round(expovariate(0.2)))
|
|
hi = lo + int(round(expovariate(0.2)))
|
|
q = choice(["*", "?", "+", "{%d}"%lo, "{%d,}"%lo, "{%d,%d}"%(lo,hi)])
|
|
r = generateRE(nChildren, [generateQuant], atTop = False)
|
|
if (len(r) == 1) or (r[0] != '(' and r[-1] != ")"):
|
|
return r + q
|
|
else:
|
|
return makeGroup(r) + q
|
|
|
|
def generateChar(nChildren, atTop = False):
|
|
return chr(choice(alphabet))
|
|
|
|
def generateNocaseChar(nChildren, atTop = False):
|
|
'Either generate an uppercase char from the alphabet or a nocase class [Aa]'
|
|
c = generateChar(nChildren, atTop)
|
|
if random() < 0.5:
|
|
return c.upper()
|
|
else:
|
|
return '[' + c.upper() + c.lower() + ']'
|
|
|
|
def generateDot(nChildren, atTop = False):
|
|
return "."
|
|
|
|
def generateBoundary(nChildren, atTop = False):
|
|
# \b, \B in parens so that we can repeat them and still be accepted by
|
|
# libpcre
|
|
return makeGroup('\\' + choice('bB'))
|
|
|
|
def generateCharClass(nChildren, atTop = False):
|
|
s = ""
|
|
if random() < 0.2:
|
|
s = "^"
|
|
nChars = randint(1,4)
|
|
else:
|
|
nChars = randint(2,4)
|
|
|
|
for i in xrange(nChars):
|
|
s += generateChar(1)
|
|
return "[" + s + "]"
|
|
|
|
def generateOptionsFlags(nChildren, atTop = False):
|
|
allflags = "smix"
|
|
pos_flags = sample(allflags, randint(1, len(allflags)))
|
|
neg_flags = sample(allflags, randint(1, len(allflags)))
|
|
s = '(?' + ''.join(pos_flags) + '-' + ''.join(neg_flags) + ')'
|
|
return s
|
|
|
|
def generateLogicalId(nChildren, atTop = False):
|
|
return str(randint(0, options.count))
|
|
|
|
def makeLogicalGroup(s):
|
|
return "(" + s + ")"
|
|
|
|
def generateLogicalNot(nChildren, atTop):
|
|
r = generateCombination(nChildren, [generateLogicalNot], atTop = False)
|
|
return "!" + makeLogicalGroup(r)
|
|
|
|
def generateLogicalAnd(nChildren, atTop):
|
|
v = [ generateCombination(w, [generateLogicalAnd], atTop = False) for w in chooseLeafWidth(nChildren) ]
|
|
v = [ r for r in v if r != '' ]
|
|
s = string.join(v, "&")
|
|
if len(v) == 1:
|
|
return s
|
|
else:
|
|
return makeLogicalGroup(s)
|
|
|
|
def generateLogicalOr(nChildren, atTop):
|
|
v = [ generateCombination(w, [generateLogicalOr], atTop = False) for w in chooseLeafWidth(nChildren) ]
|
|
v = [ r for r in v if r != '' ]
|
|
s = string.join(v, "|")
|
|
if len(v) == 1:
|
|
return s
|
|
else:
|
|
return makeLogicalGroup(s)
|
|
|
|
weightsTree = [
|
|
(generateConcat, 10),
|
|
(generateAlt, 3),
|
|
(generateQuant, 2),
|
|
]
|
|
|
|
weightsLeaf = [
|
|
(generateChar, 30),
|
|
(generateCharClass, 5),
|
|
(generateDot, 5),
|
|
(generateNocaseChar, 2),
|
|
(generateBoundary, 1),
|
|
(generateOptionsFlags, 1)
|
|
]
|
|
|
|
weightsLogicalTree = [
|
|
(generateLogicalNot, 1),
|
|
(generateLogicalAnd, 5),
|
|
(generateLogicalOr, 5),
|
|
]
|
|
|
|
weightsLogicalLeaf = [
|
|
(generateLogicalId, 1),
|
|
]
|
|
|
|
def genChoices(weighted):
|
|
r = []
|
|
for (f, w) in weighted:
|
|
r = r + [f] * w
|
|
return r
|
|
|
|
choicesTree = genChoices(weightsTree)
|
|
choicesLeaf = genChoices(weightsLeaf)
|
|
choicesLogicalTree = genChoices(weightsLogicalTree)
|
|
choicesLogicalLeaf = genChoices(weightsLogicalLeaf)
|
|
|
|
weightsAnchor = [
|
|
("\\A%s\\Z", 1),
|
|
("\\A%s\\z", 1),
|
|
("\\A%s", 4),
|
|
("%s\\Z", 2),
|
|
("%s\\z", 2),
|
|
("^%s$", 1),
|
|
("^%s", 4),
|
|
("%s$", 2),
|
|
("%s", 25)
|
|
]
|
|
choicesAnchor = genChoices(weightsAnchor)
|
|
|
|
def generateRE(nChildren, suppressList = [], atTop = False):
|
|
if atTop:
|
|
anchorSubstituteString = choice(choicesAnchor)
|
|
else:
|
|
anchorSubstituteString = "%s"
|
|
|
|
nChildren -= 1
|
|
if nChildren == 0:
|
|
res = choice(choicesLeaf)(nChildren, atTop)
|
|
else:
|
|
c = [ ch for ch in choicesTree if ch not in suppressList ]
|
|
res = choice(c)(nChildren, atTop)
|
|
|
|
return anchorSubstituteString % res
|
|
|
|
def generateCombination(nChildren, suppressList = [], atTop = False):
|
|
nChildren -= 1
|
|
if nChildren == 0:
|
|
res = choice(choicesLogicalLeaf)(nChildren, atTop)
|
|
else:
|
|
c = [ ch for ch in choicesLogicalTree if ch not in suppressList ]
|
|
res = choice(c)(nChildren, atTop)
|
|
|
|
return res
|
|
|
|
def generateRandomOptions():
|
|
if options.hybrid:
|
|
allflags = "smiH8W"
|
|
else:
|
|
# Maintain an ordering for consistency.
|
|
allflags = "smiHV8WLP"
|
|
flags = ""
|
|
for f in allflags:
|
|
flags += choice(['', f])
|
|
if options.logical:
|
|
flags += choice(['', 'Q'])
|
|
return flags
|
|
|
|
def generateRandomExtParam(depth, extparam):
|
|
if not extparam:
|
|
return ""
|
|
params = []
|
|
if choice((False, True)):
|
|
params.append("min_length=%u" % randint(1, depth))
|
|
if choice((False, True)):
|
|
params.append("min_offset=%u" % randint(1, depth))
|
|
if choice((False, True)):
|
|
params.append("max_offset=%u" % randint(1, depth*3))
|
|
if choice((False, True)):
|
|
dist = randint(1, 3)
|
|
if choice((False, True)):
|
|
params.append("edit_distance=%u" % dist)
|
|
else:
|
|
params.append("hamming_distance=%u" % dist)
|
|
if params:
|
|
return "{" + ",".join(params) + "}"
|
|
else:
|
|
return ""
|
|
|
|
parser = OptionParser()
|
|
parser.add_option("-d", "--depth",
|
|
action="store", type="int", dest="depth", default=200,
|
|
help="Depth of generation (akin to maximum length)")
|
|
parser.add_option("-c", "--count",
|
|
action="store", type="int", dest="count", default=1000,
|
|
help="Number of expressions to generate")
|
|
parser.add_option("-a", "--alphabet",
|
|
action="store", type="int", dest="alphabet", default=26,
|
|
help="Size of alphabet to generate character expressions over (starting with lowercase 'a')")
|
|
parser.add_option("-i", "--nocase",
|
|
action="store_true", dest="nocase",
|
|
help="Use a caseless alphabet for character generation")
|
|
parser.add_option("-x", "--extparam",
|
|
action="store_true", dest="extparam",
|
|
help="Generate random extended parameters")
|
|
parser.add_option("-l", "--logical",
|
|
action="store_true", dest="logical",
|
|
help="Generate logical combination expressions")
|
|
parser.add_option("-H", "--hybrid",
|
|
action="store_true", dest="hybrid",
|
|
help="Generate random flags for hybrid mode")
|
|
|
|
(options, args) = parser.parse_args()
|
|
if len(args) != 0:
|
|
parser.error("incorrect number of arguments")
|
|
|
|
alphabet = range(ord('a'), ord('a') + options.alphabet)
|
|
if options.nocase:
|
|
alphabet += range(ord('A'), ord('A') + options.alphabet)
|
|
|
|
for i in xrange(0, options.count):
|
|
print "%08d:/%s/%s%s" % (i, generateRE(randint(1, options.depth), atTop = True), generateRandomOptions(), generateRandomExtParam(options.depth, options.extparam))
|
|
|
|
if options.logical:
|
|
for i in xrange(options.count, options.count + 3000):
|
|
print "%08d:/%s/C" % (i, generateCombination(randint(1, options.depth), atTop = True))
|