mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-07-12 21:44:44 +03:00
565 lines
24 KiB
Python
Executable File
565 lines
24 KiB
Python
Executable File
#!/usr/bin/python
|
|
|
|
# Copyright (c) 2015, Intel Corporation
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions are met:
|
|
#
|
|
# * Redistributions of source code must retain the above copyright notice,
|
|
# this list of conditions and the following disclaimer.
|
|
# * Redistributions in binary form must reproduce the above copyright
|
|
# notice, this list of conditions and the following disclaimer in the
|
|
# documentation and/or other materials provided with the distribution.
|
|
# * Neither the name of Intel Corporation nor the names of its contributors
|
|
# may be used to endorse or promote products derived from this software
|
|
# without specific prior written permission.
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
|
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
import sys
|
|
from autogen_utils import *
|
|
from base_autogen import *
|
|
from string import Template
|
|
|
|
class OrStep(Step):
|
|
def __init__(self, context, offset, width):
|
|
Step.__init__(self, context, offset)
|
|
s_var = self.gv("st%d" % offset)
|
|
if width < 128:
|
|
self.val = "s |= %s;" % s_var.name
|
|
else:
|
|
self.val = "s = or%d(s, %s);" % (width, s_var.name)
|
|
|
|
class ShiftStateStep(Step):
|
|
def __init__(self, context, offset = 0, stride_used = 1):
|
|
Step.__init__(self, context, offset)
|
|
m = self.matcher
|
|
state = m.state_variable
|
|
shift_distance = -1 * stride_used * m.num_buckets
|
|
self.val = "%s = %s;" % (state.name, state.type.shift_expr(state.name, shift_distance))
|
|
|
|
class BulkLoadStep(Step):
|
|
def __init__(self, context, offset, size, define_var = True, aligned = True):
|
|
Step.__init__(self, context, offset)
|
|
m = self.matcher
|
|
self.latency = 4
|
|
blt = m.bulk_load_type
|
|
if aligned:
|
|
init_string = blt.load_expr_data(self.offset, code = "aligned")
|
|
else:
|
|
init_string = blt.load_expr_data(self.offset)
|
|
|
|
var_name = "current_data_%d" % offset
|
|
if define_var:
|
|
lb_var = self.nv(blt, var_name)
|
|
self.val = lb_var.gen_initializer_stmt(init_string)
|
|
else:
|
|
lb_var = self.gv(var_name, reader = False, writer = True)
|
|
self.val = "%s = %s;" % (var_name, init_string)
|
|
|
|
class ValueExtractStep(Step):
|
|
def __init__(self, context, offset, sub_load_cautious = False):
|
|
Step.__init__(self, context, offset)
|
|
m = self.matcher
|
|
self.latency = 2
|
|
dsb = m.datasize_bytes
|
|
modval = offset % dsb
|
|
|
|
if modval == dsb - 1:
|
|
# Case 1: reading more than one byte over the end of the bulk load
|
|
|
|
self.latency = 4
|
|
if sub_load_cautious:
|
|
code_string = "cautious_forward"
|
|
else:
|
|
code_string = "normal"
|
|
load_string = m.single_load_type.load_expr_data(self.offset, code_string)
|
|
temp_string = "(%s << %d)" % (load_string, m.reach_shift_adjust)
|
|
else:
|
|
# Case 2: reading a value that can be found entirely in the current register
|
|
if m.fdr2_force_naive_load:
|
|
load_string = m.single_load_type.load_expr_data(self.offset, "normal")
|
|
temp_string = "(%s << %d)" % (load_string, m.reach_shift_adjust)
|
|
else:
|
|
lb_var = self.gv("current_data_%d" % (offset - modval))
|
|
if modval == 0:
|
|
# Case 2a: value is at LSB end of the register and must be left-
|
|
# shifted into place if there is a "reach_shift_adjust" required
|
|
temp_string = "(%s << %d)" % (lb_var.name, m.reach_shift_adjust)
|
|
else:
|
|
# Case 2b: value is in the middle of the register and will be
|
|
# right-shifted into place (adjusted by "reach_shift_adjust")
|
|
temp_string = "(%s >> %d)" % (lb_var.name, modval*8 - m.reach_shift_adjust)
|
|
|
|
|
|
init_string = "(%s) & (domain_mask << %d)" % (temp_string, m.reach_shift_adjust)
|
|
v_var = self.nv(m.value_extract_type, "v%d" % offset)
|
|
self.val = v_var.gen_initializer_stmt(init_string)
|
|
|
|
class TableLookupStep(Step):
|
|
def __init__(self, context, reach_multiplier, offset = 0):
|
|
Step.__init__(self, context, offset)
|
|
m = self.matcher
|
|
self.latency = 4
|
|
v_var = self.gv("v%d" % offset)
|
|
s_var = self.nv(m.state_type, "st%d" % offset)
|
|
init_string = "*(const %s *)(ft + %s*%dU)" % ( m.state_type.get_name(),
|
|
v_var.name, reach_multiplier)
|
|
self.val = s_var.gen_initializer_stmt(init_string)
|
|
|
|
class ShiftReachMaskStep(Step):
|
|
def __init__(self, context, offset):
|
|
Step.__init__(self, context, offset)
|
|
m = self.matcher
|
|
extr = m.extract_frequency
|
|
modval = offset % extr
|
|
s_var = self.gv("st%d" % offset, writer = True)
|
|
self.val = "%s = %s;" % (s_var.name, s_var.type.shift_expr(s_var.name, modval * m.num_buckets))
|
|
|
|
class ConfExtractStep(Step):
|
|
def __init__(self, context, offset):
|
|
Step.__init__(self, context, offset)
|
|
m = self.matcher
|
|
if m.state_type.isSIMDOnIntel():
|
|
self.latency = 2
|
|
init_string = m.state_type.lowbit_extract_expr("s", m.extract_size)
|
|
extr_var = self.nv(m.extr_type, "extr%d" % offset)
|
|
self.val = extr_var.gen_initializer_stmt(init_string)
|
|
|
|
class ConfAccumulateStep(Step):
|
|
def __init__(self, context, extract_offset, conf_offset, define_var = True):
|
|
Step.__init__(self, context, extract_offset)
|
|
m = self.matcher
|
|
extr_var = self.gv("extr%d" % extract_offset)
|
|
extr_var_cast = "((%s)%s)" % (m.conf_type.get_name(), extr_var.name)
|
|
if extract_offset == conf_offset:
|
|
# create conf_var as a straight copy of extr
|
|
if define_var:
|
|
conf_var = self.nv(m.conf_type, "conf%d" % conf_offset)
|
|
self.val = conf_var.gen_initializer_stmt(extr_var_cast)
|
|
else:
|
|
conf_var = self.gv("conf%d" % conf_offset, writer = True, reader = True)
|
|
self.val = "%s = %s;" % (conf_var.name, extr_var_cast)
|
|
else:
|
|
# shift extr_var and insert/OR it in conf_var
|
|
conf_var = self.gv("conf%d" % conf_offset, writer = True, reader = True)
|
|
shift_dist = (extract_offset - conf_offset) * m.num_buckets
|
|
self.val = "%s |= %s;" % (conf_var.name, m.conf_type.shift_expr(extr_var_cast, shift_dist))
|
|
self.latency = 2
|
|
|
|
class ConfirmFlipStep(Step):
|
|
def __init__(self, context, offset):
|
|
Step.__init__(self, context, offset)
|
|
m = self.matcher
|
|
conf_var = self.gv("conf%d" % self.offset, writer = True)
|
|
self.val = "%s = %s;" % (conf_var.name,
|
|
conf_var.type.flip_lowbits_expr(conf_var.name, self.matcher.confirm_frequency * m.num_buckets))
|
|
|
|
class ConfirmStep(Step):
|
|
def __init__(self, context, offset, cautious = False):
|
|
Step.__init__(self, context, offset)
|
|
m = self.matcher
|
|
conf_var = self.gv("conf%d" % offset, writer = True)
|
|
self.val = m.produce_confirm_base(conf_var.name, conf_var.type.size, offset, cautious,
|
|
enable_confirmless = m.stride == 1, do_bailout = False)
|
|
|
|
class M3(MatcherBase):
|
|
def produce_compile_call(self):
|
|
print " { %d, %d, %d, %d, %s, %d, %d }," % (
|
|
self.id, self.state_width, self.num_buckets,
|
|
self.stride,
|
|
self.arch.target, self.conf_pull_back, self.conf_top_level_split)
|
|
|
|
def produce_main_loop(self, switch_variant = False):
|
|
stride_offsets = xrange(0, self.loop_bytes, self.stride)
|
|
stride_offsetSet = set(stride_offsets)
|
|
so_steps_last_block = []
|
|
sh = None
|
|
last_confirm = None
|
|
ctxt = CodeGenContext(self)
|
|
|
|
if switch_variant:
|
|
print " ptr -= (iterBytes - dist);"
|
|
print " { " # need an extra scope around switch variant to stop its globals escaping
|
|
else:
|
|
print " if (doMainLoop) {"
|
|
print " for (; ptr + LOOP_READ_AHEAD < buf + len; ptr += iterBytes) {"
|
|
print self.produce_flood_check()
|
|
print " __builtin_prefetch(ptr + (iterBytes*4));"
|
|
print " assert(((size_t)ptr % START_MOD) == 0);"
|
|
|
|
|
|
# just do globally for now
|
|
if switch_variant:
|
|
subsidiary_load_cautious = True
|
|
confirm_cautious = True
|
|
else:
|
|
subsidiary_load_cautious = False
|
|
confirm_cautious = False
|
|
|
|
if not self.fdr2_force_naive_load:
|
|
bulk_load_steps = [ off for off in range(self.loop_bytes)
|
|
if off % self.datasize_bytes == 0 and
|
|
(set(range(off, off + self.datasize_bytes - 1)) & stride_offsetSet)]
|
|
else:
|
|
bulk_load_steps = []
|
|
|
|
confirm_steps = [ off for off in range(self.loop_bytes) if off % self.confirm_frequency == 0 ]
|
|
|
|
for off in bulk_load_steps:
|
|
lb_var = ctxt.new_var(None, self.bulk_load_type, "current_data_%d" % off)
|
|
print " " + lb_var.gen_initializer_stmt()
|
|
|
|
|
|
for off in confirm_steps:
|
|
var_name = "conf%d" % off
|
|
conf_def_var = ctxt.new_var(None, self.conf_type, var_name)
|
|
if switch_variant:
|
|
init_string = "(%s)-1" % self.conf_type.get_name()
|
|
else:
|
|
init_string = ""
|
|
print " " + conf_def_var.gen_initializer_stmt(init_string)
|
|
|
|
if switch_variant:
|
|
print " switch(iterBytes - dist) {"
|
|
for i in range(0, self.loop_bytes):
|
|
print " case %d:" % i
|
|
|
|
# init and poison conf; over-precise but harmless
|
|
conf_id = (i / self.confirm_frequency) * self.confirm_frequency
|
|
if i % self.confirm_frequency:
|
|
conf_fixup_bits = self.conf_type.size - (self.num_buckets * (i % self.confirm_frequency))
|
|
print " conf%d >>= %d;" % (conf_id, conf_fixup_bits)
|
|
else:
|
|
print " conf%d = 0;" % conf_id
|
|
|
|
# init state
|
|
state_fixup = i % self.extract_frequency
|
|
state = self.state_variable
|
|
shift_distance = self.num_buckets * state_fixup
|
|
if state_fixup:
|
|
print " %s = %s;" % (state.name, state.type.shift_expr(state.name, shift_distance))
|
|
if self.state_width < 128:
|
|
print " %s |= %s;" % (state.name, state.type.lowbit_mask(shift_distance))
|
|
else:
|
|
print " %s = or%d(%s, %s);" % (state.name, self.state_width, state.name, state.type.lowbit_mask(shift_distance))
|
|
|
|
if not self.fdr2_force_naive_load:
|
|
# init current_data (could poison it in some cases)
|
|
load_mod = i % self.datasize_bytes
|
|
load_offset = i - load_mod
|
|
if load_mod:
|
|
# not coming in on an even boundary means having to do a load var
|
|
# actually, there are a bunch of things we can do on this bulk load
|
|
# to avoid having to be 'cautious_backwards' but I'm not completely
|
|
# sure they are good ideas
|
|
init_string = self.bulk_load_type.load_expr_data(load_offset,
|
|
code = "cautious_backward")
|
|
var_name = "current_data_%d" % load_offset
|
|
lb_var = ctxt.get_var(None, var_name, reader = False, writer = True)
|
|
print " %s = %s;" % (lb_var.name, init_string)
|
|
|
|
print " goto off%d;" % i
|
|
print " case %d: goto skipSwitch;" % self.loop_bytes
|
|
print " }"
|
|
print " {"
|
|
|
|
|
|
for off in range(self.loop_bytes):
|
|
# X_mod is the offset we're up to relative to the last X operation
|
|
# X_offset is which of the last X operations matches this iteration
|
|
|
|
if (switch_variant):
|
|
LabelStep(ctxt, off)
|
|
|
|
if off in bulk_load_steps:
|
|
if not self.fdr2_force_naive_load:
|
|
BulkLoadStep(ctxt, off, self.datasize, define_var = False, aligned = not switch_variant)
|
|
|
|
if off in stride_offsets:
|
|
if switch_variant:
|
|
OpenScopeStep(ctxt, off)
|
|
ValueExtractStep(ctxt, off, sub_load_cautious = subsidiary_load_cautious)
|
|
TableLookupStep(ctxt, self.reach_mult, off)
|
|
if off % self.extract_frequency:
|
|
ShiftReachMaskStep(ctxt, off)
|
|
so = OrStep(ctxt, off, self.state_width)
|
|
if switch_variant:
|
|
CloseScopeStep(ctxt, off)
|
|
if sh != None:
|
|
so.add_dependency(sh)
|
|
so_steps_last_block += [ so ]
|
|
|
|
extract_mod = off % self.extract_frequency
|
|
extract_offset = off - extract_mod
|
|
extract_ready = extract_mod == self.extract_frequency - 1
|
|
if extract_ready:
|
|
if switch_variant:
|
|
OpenScopeStep(ctxt, off)
|
|
ex = ConfExtractStep(ctxt, extract_offset)
|
|
ConfAccumulateStep(ctxt, extract_offset, confirm_offset, define_var = False)
|
|
for so_step in so_steps_last_block:
|
|
ex.add_dependency(so_step)
|
|
if switch_variant:
|
|
CloseScopeStep(ctxt, off)
|
|
so_steps_last_block = []
|
|
sh = ShiftStateStep(ctxt, extract_offset, stride_used = self.extract_frequency)
|
|
sh.add_dependency(ex)
|
|
|
|
confirm_mod = off % self.confirm_frequency
|
|
confirm_offset = off - confirm_mod
|
|
confirm_ready = confirm_mod == self.confirm_frequency - 1
|
|
if confirm_ready:
|
|
cflip = ConfirmFlipStep(ctxt, confirm_offset)
|
|
cf = ConfirmStep(ctxt, confirm_offset, cautious = confirm_cautious )
|
|
if last_confirm:
|
|
cf.add_dependency(last_confirm)
|
|
last_confirm = cf
|
|
|
|
|
|
if not switch_variant:
|
|
print ctxt.schedule([ last_confirm, sh ])
|
|
else:
|
|
print ctxt.dontschedule([ last_confirm, sh ])
|
|
|
|
if switch_variant:
|
|
print "skipSwitch:;"
|
|
print " ptr += iterBytes;"
|
|
print " }" # close extra scope around switch variant
|
|
print " }"
|
|
|
|
|
|
def produce_init_state(self):
|
|
state = self.state_variable
|
|
s_type = self.state_type
|
|
shift_distance = -1 * self.num_buckets
|
|
shift_expr = "%s = %s" % (state.name, state.type.shift_expr(state.name, shift_distance))
|
|
|
|
s = Template("""
|
|
$TYPENAME s;
|
|
if (a->len_history) {
|
|
u32 tmp = 0;
|
|
if (a->start_offset == 0) {
|
|
tmp = a->buf_history[a->len_history - 1];
|
|
tmp |= (a->buf[0] << 8);
|
|
} else {
|
|
tmp = lv_u16(a->buf + a->start_offset - 1, a->buf, a->buf + a->len);
|
|
}
|
|
tmp &= fdr->domainMask;
|
|
s = *((const $TYPENAME *)ft + tmp);
|
|
$SHIFT_EXPR;
|
|
} else {
|
|
s = *(const $TYPENAME *)&fdr->start;
|
|
}
|
|
""").substitute(TYPENAME = s_type.get_name(),
|
|
ZERO_EXPR = s_type.zero_expression(),
|
|
SHIFT_EXPR = shift_expr)
|
|
return s
|
|
|
|
def produce_code(self):
|
|
|
|
loop_read_behind = 0
|
|
loop_read_ahead = self.loop_bytes + 1
|
|
|
|
# we set up mask and shift stuff for extracting our masks from registers
|
|
#
|
|
# we have a choice as to whether to mask out the value early or
|
|
# extract the value (shift first) then mask it
|
|
#
|
|
# Intel has a free scaling factor from 1/2/4/8 so we want to combine
|
|
# the extra needed shift for SSE registers with the mask operation
|
|
|
|
ssb = self.state_type.size / 8 # state size in bytes
|
|
|
|
# Intel path
|
|
if ssb == 16:
|
|
# obscure corner - we don't have the room in the register to
|
|
# do this for all values so we don't. domain==16 is pretty
|
|
# bad anyhow, of course
|
|
self.reach_mult = 8
|
|
else:
|
|
self.reach_mult = ssb
|
|
|
|
shift_amts = { 1 : 0, 2 : 1, 4 : 2, 8 : 3, 16: 4 }
|
|
self.reach_shift_adjust = shift_amts[ ssb/self.reach_mult ]
|
|
|
|
print self.produce_header(visible = False)
|
|
|
|
print "// ",
|
|
print " Arch: " + self.arch.name,
|
|
print " State type: " + self.state_type.get_name(),
|
|
print " Num buckets: %d" % self.num_buckets,
|
|
print " Stride: %d" % self.stride
|
|
|
|
print self.produce_common_declarations()
|
|
|
|
print " assert(fdr->domain > 8 && fdr->domain < 16);"
|
|
print
|
|
print " u64a domain_mask = fdr->domainMask;"
|
|
print " const u8 * ft = (const u8 *)fdr + ROUNDUP_16(sizeof(struct FDR));"
|
|
print " const u32 * confBase = (const u32 *)(ft + fdr->tabSize);"
|
|
print self.produce_init_state()
|
|
print " const size_t iterBytes = %d;" % self.loop_bytes
|
|
print " const size_t START_MOD = %d;" % self.datasize_bytes
|
|
print " const size_t LOOP_READ_AHEAD = %d;" % loop_read_ahead
|
|
|
|
print """
|
|
while (ptr < buf + len) {
|
|
|
|
u8 doMainLoop = 1;
|
|
size_t remaining = len - (ptr - buf);
|
|
size_t dist;
|
|
if (remaining <= iterBytes) {
|
|
dist = remaining; // once through the switch and we're done
|
|
} else if (remaining < 2 * iterBytes) {
|
|
// nibble some stuff off the front, skip the main loop,
|
|
// then come back here
|
|
dist = iterBytes; // maybe could be cleverer
|
|
} else {
|
|
// now, we need to see if we can make it to a main loop iteration
|
|
// if so, we need to ensure that the main loop iteration is aligned
|
|
// to a START_MOD boundary and i >= 8 so we can read ptr + i - 8
|
|
|
|
// see if we can do it - if not, just switch the main loop off,
|
|
// eat iterBytes in cautious mode, and come back to this loop
|
|
|
|
const u8 * target = MAX(buf + 8, ptr);
|
|
target = ROUNDUP_PTR(target, START_MOD);
|
|
dist = target - ptr;
|
|
if (dist > iterBytes) {
|
|
doMainLoop = 0;
|
|
dist = iterBytes;
|
|
}
|
|
}
|
|
"""
|
|
self.produce_main_loop(switch_variant = True)
|
|
self.produce_main_loop(switch_variant = False)
|
|
print """
|
|
}
|
|
"""
|
|
print self.produce_footer()
|
|
|
|
def get_name(self):
|
|
return "fdr_exec_%s_s%d_w%d" % (self.arch.name, self.stride, self.state_width)
|
|
|
|
def __init__(self, state_width, stride,
|
|
arch,
|
|
table_state_width = None,
|
|
num_buckets = 8,
|
|
extract_frequency = None,
|
|
confirm_frequency = None):
|
|
|
|
# First - set up the values that are fundamental to how this matcher will operate
|
|
self.arch = arch
|
|
|
|
# get the width of the state width on which we operate internally
|
|
if state_width not in [ 128 ]:
|
|
fail_out("Unknown state width: %d" % state_width)
|
|
self.state_width = state_width
|
|
self.state_type = getRequiredType(self.state_width)
|
|
self.state_variable = IntegerVariable("s", self.state_type)
|
|
|
|
table_state_width = state_width
|
|
self.table_state_width = state_width
|
|
self.table_state_type = getRequiredType(self.table_state_width)
|
|
|
|
# this is the load type required for domain [9:15] if we want to
|
|
# load it one at a time
|
|
self.single_load_type = IntegerType(16)
|
|
|
|
# stride is the frequency with which we make data-driven
|
|
# accesses to our reach table
|
|
if stride not in [ 1, 2, 4, 8]:
|
|
fail_out("Unsupported stride: %d" % stride)
|
|
if stride * num_buckets > state_width:
|
|
fail_out("Stride %d is too big for the number of buckets %d given state width %d\n" % (stride, num_buckets, state_width))
|
|
self.stride = stride
|
|
|
|
if num_buckets != 8:
|
|
fail_out("Unsupported number of buckets: %d" % num_buckets)
|
|
if state_width % num_buckets and state_width == 128:
|
|
fail_out("Bucket scheme requires bit-shifts on m128 (failing)")
|
|
self.num_buckets = num_buckets
|
|
|
|
# Second - set up derived or optimization values - these can be
|
|
# overridden by arguments that are passed in
|
|
|
|
self.datasize = 64
|
|
self.bulk_load_type = IntegerType(self.datasize)
|
|
self.datasize_bytes = self.datasize/8
|
|
|
|
self.value_extract_type = IntegerType(self.datasize)
|
|
|
|
self.fdr2_force_naive_load = False # disable everywhere for trunk
|
|
|
|
# extract frequency is how frequently (in bytes) we destructively shift
|
|
# our state value after having pulled out that many bytes into a
|
|
# confirm register (of one sort or another).
|
|
# none means a default value - datasize, our biggest easily available GPR
|
|
if extract_frequency is None:
|
|
extract_frequency = self.datasize_bytes
|
|
self.extract_frequency = extract_frequency
|
|
self.extract_size = self.extract_frequency*self.num_buckets
|
|
if extract_frequency < stride:
|
|
fail_out("Can't extract at extract frequency %d with stride %d" % (extract_frequency, stride))
|
|
if extract_frequency not in [ None, 1, 2, 4, 8, 16]:
|
|
fail_out("Weird extract frequency: %d" % extract_frequency)
|
|
|
|
if self.extract_size <= 32:
|
|
self.extr_type = IntegerType(32)
|
|
elif self.extract_size <= 64:
|
|
self.extr_type = IntegerType(64)
|
|
else:
|
|
fail_out("Implausible size %d required for confirm extract step" % size)
|
|
|
|
# extract_frequency is how often we pull out our state and place
|
|
# it somewhere in a lossless fashion
|
|
# confirm_frequency, on the other hand, is how frequently we
|
|
# take the state extracted by extract_frequency and cobble it
|
|
# together into a matching loop
|
|
# confirm_frequency must be a multiple of extract_frequency
|
|
# and must fit into a fast register; for now; we're going to
|
|
# stay in the GPR domain
|
|
if confirm_frequency is None:
|
|
confirm_frequency = self.extract_frequency
|
|
self.confirm_frequency = confirm_frequency
|
|
if confirm_frequency % self.extract_frequency:
|
|
fail_out("Confirm frequency %d must be evenly divisible by extract_frequency %d" % (confirm_frequency, self.extract_frequency))
|
|
|
|
self.conf_size = self.confirm_frequency * self.num_buckets
|
|
if self.conf_size <= 32:
|
|
self.conf_type = IntegerType(32)
|
|
elif self.conf_size <= 64:
|
|
self.conf_type = IntegerType(64)
|
|
else:
|
|
fail_out("Implausible size %d required for confirm accumulate step" % self.conf_size)
|
|
|
|
# how many bytes in flight at once
|
|
self.loop_bytes = 16
|
|
|
|
# confirm configuration
|
|
|
|
# how many entries in the top-level confirm table - 256 means
|
|
# complete split on the last character
|
|
self.conf_top_level_split = 256
|
|
|
|
# how much we 'pull back' in confirm - this is obviously related
|
|
# to the first level conf but we will keep two separate paramters
|
|
# for this to avoid the risk of conflating these
|
|
self.conf_pull_back = 1
|
|
|
|
if self.conf_pull_back > 0 and self.conf_top_level_split < 256:
|
|
fail_out("Pull back distance %d not supported by top level split %d" % (self.conf_pull_back, self.conf_top_level_split))
|
|
|
|
# minor stuff
|
|
self.default_body_indent = 8
|