mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-09-29 19:24:25 +03:00
Initial commit of Hyperscan
This commit is contained in:
39
src/fdr/CMakeLists.txt
Normal file
39
src/fdr/CMakeLists.txt
Normal file
@@ -0,0 +1,39 @@
|
||||
# The set of rules and other nastiness for generating FDR/Teddy source
|
||||
|
||||
# we need to add these as explicit dependencies
|
||||
set(AUTOGEN_PY_FILES
|
||||
arch.py
|
||||
autogen.py
|
||||
autogen_utils.py
|
||||
base_autogen.py
|
||||
fdr_autogen.py
|
||||
teddy_autogen.py
|
||||
)
|
||||
|
||||
function(fdr_autogen type out)
|
||||
add_custom_command (
|
||||
COMMENT "AUTOGEN ${out}"
|
||||
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${out}
|
||||
COMMAND ${PYTHON} ${CMAKE_CURRENT_SOURCE_DIR}/autogen.py ${type} > ${CMAKE_CURRENT_BINARY_DIR}/${out}
|
||||
DEPENDS ${AUTOGEN_PY_FILES}
|
||||
)
|
||||
add_custom_target(autogen_${type} DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${out})
|
||||
endfunction(fdr_autogen)
|
||||
|
||||
#now build the functions
|
||||
fdr_autogen(runtime fdr_autogen.c)
|
||||
fdr_autogen(compiler fdr_autogen_compiler.cpp)
|
||||
fdr_autogen(teddy_runtime teddy_autogen.c)
|
||||
fdr_autogen(teddy_compiler teddy_autogen_compiler.cpp)
|
||||
|
||||
set(fdr_GENERATED_SRC
|
||||
${CMAKE_BINARY_DIR}/src/fdr/fdr_autogen.c
|
||||
${CMAKE_BINARY_DIR}/src/fdr/fdr_autogen_compiler.cpp
|
||||
${CMAKE_BINARY_DIR}/src/fdr/teddy_autogen.c
|
||||
${CMAKE_BINARY_DIR}/src/fdr/teddy_autogen_compiler.cpp
|
||||
PARENT_SCOPE)
|
||||
|
||||
set_source_files_properties(${fdr_GENERATED_SRC} PROPERTIES GENERATED TRUE)
|
||||
include_directories(${CMAKE_CURRENT_BINARY_DIR})
|
||||
|
||||
|
58
src/fdr/arch.py
Executable file
58
src/fdr/arch.py
Executable file
@@ -0,0 +1,58 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# Copyright (c) 2015, Intel Corporation
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of Intel Corporation nor the names of its contributors
|
||||
# may be used to endorse or promote products derived from this software
|
||||
# without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
import autogen_utils
|
||||
|
||||
# wrapper for architectures
|
||||
|
||||
class Arch:
|
||||
def __init__(self, name, extensions = []):
|
||||
self.name = name
|
||||
self.extensions = extensions
|
||||
self.target = None
|
||||
|
||||
def get_guard(self):
|
||||
# these defines definitely fall into the "belt-and-suspenders"
|
||||
# category of paranoia
|
||||
if (self.guard_list == []):
|
||||
return "#if 1"
|
||||
|
||||
return "#if " + " && ".join(self.guard_list)
|
||||
|
||||
class X86Arch(Arch):
|
||||
def __init__(self, name, extensions = []):
|
||||
Arch.__init__(self, name, extensions)
|
||||
self.guard_list = [ ]
|
||||
self.target = "0"
|
||||
|
||||
if "AVX2" in extensions:
|
||||
self.target += " | HS_CPU_FEATURES_AVX2"
|
||||
self.guard_list += [ "defined(__AVX2__)" ]
|
||||
|
||||
|
||||
arch_x86_64 = X86Arch("x86_64", extensions = [ ])
|
||||
arch_x86_64_avx2 = X86Arch("x86_64_avx2", extensions = [ "AVX2" ])
|
159
src/fdr/autogen.py
Executable file
159
src/fdr/autogen.py
Executable file
@@ -0,0 +1,159 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# Copyright (c) 2015, Intel Corporation
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of Intel Corporation nor the names of its contributors
|
||||
# may be used to endorse or promote products derived from this software
|
||||
# without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
import sys
|
||||
from autogen_utils import *
|
||||
from fdr_autogen import *
|
||||
from teddy_autogen import *
|
||||
from arch import *
|
||||
|
||||
# FDR setup
|
||||
|
||||
# these are either produced - if the guard succeeds, or #defined to zeroes.
|
||||
# either the function or the zero is fine in our array of function pointers
|
||||
|
||||
def produce_fdr_runtimes(l):
|
||||
for m in l:
|
||||
m.produce_code()
|
||||
|
||||
def produce_fdr_compiles(l):
|
||||
print "void getFdrDescriptions(vector<FDREngineDescription> *out) {"
|
||||
print " static const FDREngineDef defns[] = {"
|
||||
for m in l:
|
||||
m.produce_compile_call()
|
||||
print " };"
|
||||
print " out->clear();"
|
||||
print " for (size_t i = 0; i < ARRAY_LENGTH(defns); i++) {"
|
||||
print " out->push_back(FDREngineDescription(defns[i]));"
|
||||
print " }"
|
||||
print "}"
|
||||
|
||||
def build_fdr_matchers():
|
||||
all_matchers = [ ]
|
||||
domains = [8, 10, 11, 12, 13]
|
||||
big_domains = [ 14, 15 ]
|
||||
|
||||
common = { "state_width" : 128, "num_buckets" : 8, "extract_frequency" : 8, "arch" : arch_x86_64 }
|
||||
for d in domains:
|
||||
all_matchers += [ M3(stride = 1, domain = d, **common) ]
|
||||
all_matchers += [ M3(stride = 2, domain = d, **common) ]
|
||||
all_matchers += [ M3(stride = 4, domain = d, **common) ]
|
||||
for d in big_domains:
|
||||
all_matchers += [ M3(stride = 1, domain = d, **common) ]
|
||||
|
||||
return all_matchers
|
||||
|
||||
# teddy setup
|
||||
|
||||
def build_teddy_matchers():
|
||||
all_matchers = [ ]
|
||||
|
||||
# AVX2
|
||||
all_matchers += [ MTFast(arch = arch_x86_64_avx2, packed = False) ]
|
||||
all_matchers += [ MTFast(arch = arch_x86_64_avx2, packed = True) ]
|
||||
for n_msk in range(1, 5):
|
||||
all_matchers += [ MTFat(arch = arch_x86_64_avx2, packed = False, num_masks = n_msk, num_buckets = 16) ]
|
||||
all_matchers += [ MTFat(arch = arch_x86_64_avx2, packed = True, num_masks = n_msk, num_buckets = 16) ]
|
||||
|
||||
# SSE/SSE2/SSSE3
|
||||
for n_msk in range(1, 5):
|
||||
all_matchers += [ MT(arch = arch_x86_64, packed = False, num_masks = n_msk, num_buckets = 8) ]
|
||||
all_matchers += [ MT(arch = arch_x86_64, packed = True, num_masks = n_msk, num_buckets = 8) ]
|
||||
|
||||
return all_matchers
|
||||
|
||||
def produce_teddy_compiles(l):
|
||||
print "void getTeddyDescriptions(vector<TeddyEngineDescription> *out) {"
|
||||
print " static const TeddyEngineDef defns[] = {"
|
||||
for m in l:
|
||||
m.produce_compile_call()
|
||||
print " };"
|
||||
print " out->clear();"
|
||||
print " for (size_t i = 0; i < ARRAY_LENGTH(defns); i++) {"
|
||||
print " out->push_back(TeddyEngineDescription(defns[i]));"
|
||||
print " }"
|
||||
print "}"
|
||||
|
||||
# see below - we don't produce our 'zeros' at the point of the teddy runtimes as they
|
||||
# are linked. So we either generate the function or we don't - then at the point of the
|
||||
# header in fdr_autogen.c we either generate the header or we #define the zero.
|
||||
|
||||
def produce_teddy_runtimes(l):
|
||||
# Since we're using -Wmissing-prototypes, we need headers first.
|
||||
for m in l:
|
||||
m.produce_guard()
|
||||
print m.produce_header(visible = True, header_only = True)
|
||||
m.close_guard()
|
||||
|
||||
for m in l:
|
||||
m.produce_guard()
|
||||
m.produce_code()
|
||||
m.close_guard()
|
||||
|
||||
# see produce_teddy_runtimes() comment for the rationale
|
||||
|
||||
def produce_teddy_headers(l):
|
||||
for m in l:
|
||||
m.produce_guard()
|
||||
print m.produce_header(visible = True, header_only = True)
|
||||
m.produce_zero_alternative()
|
||||
|
||||
# general utilities
|
||||
|
||||
def make_fdr_function_pointers(matcher_list):
|
||||
print """
|
||||
typedef hwlm_error_t (*FDRFUNCTYPE)(const struct FDR *fdr, const struct FDR_Runtime_Args *a);
|
||||
static FDRFUNCTYPE funcs[] = {
|
||||
"""
|
||||
all_funcs = ",\n".join([ " %s" % m.get_name() for m in matcher_list ])
|
||||
print all_funcs
|
||||
print """
|
||||
};
|
||||
"""
|
||||
|
||||
def assign_ids(matcher_list, next_id):
|
||||
for m in matcher_list:
|
||||
m.id = next_id
|
||||
next_id += 1
|
||||
return next_id
|
||||
|
||||
# Main entry point
|
||||
|
||||
m = build_fdr_matchers()
|
||||
next_id = assign_ids(m, 0)
|
||||
tm = build_teddy_matchers()
|
||||
next_id = assign_ids(tm, next_id)
|
||||
if sys.argv[1] == "compiler":
|
||||
produce_fdr_compiles(m)
|
||||
elif sys.argv[1] == "runtime":
|
||||
produce_fdr_runtimes(m)
|
||||
produce_teddy_headers(tm)
|
||||
make_fdr_function_pointers(m+tm)
|
||||
elif sys.argv[1] == "teddy_runtime":
|
||||
produce_teddy_runtimes(tm)
|
||||
elif sys.argv[1] == "teddy_compiler":
|
||||
produce_teddy_compiles(tm)
|
285
src/fdr/autogen_utils.py
Executable file
285
src/fdr/autogen_utils.py
Executable file
@@ -0,0 +1,285 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# Copyright (c) 2015, Intel Corporation
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of Intel Corporation nor the names of its contributors
|
||||
# may be used to endorse or promote products derived from this software
|
||||
# without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
import sys
|
||||
|
||||
def fail_out(msg = ""):
|
||||
print >>sys.stderr, "Internal failure in autogen.py: " + msg
|
||||
sys.exit(1)
|
||||
|
||||
class IntegerType:
|
||||
def __init__(self, size):
|
||||
self.size = size
|
||||
|
||||
def get_name(self):
|
||||
return { 256: "m256", 128 : "m128", 64 : "u64a", 32 : "u32" , 16 : "u16", 8 : "u8"}[self.size]
|
||||
|
||||
def size_in_bytes(self):
|
||||
return self.size / 8
|
||||
|
||||
def isSIMDOnIntel(self):
|
||||
return False
|
||||
|
||||
def zero_expression(self):
|
||||
return "0"
|
||||
|
||||
def constant_to_string(self, n):
|
||||
if self.size == 64:
|
||||
suffix = "ULL"
|
||||
else:
|
||||
suffix = ""
|
||||
return "0x%x%s" % (n & ((1 << self.size) - 1), suffix)
|
||||
|
||||
def lowbits(self, n):
|
||||
return (1 << n) - 1
|
||||
|
||||
def highbits(self, n):
|
||||
return ~(self.lowbits(self.size - n))
|
||||
|
||||
def lowbit_mask(self, n):
|
||||
return self.constant_to_string(self.lowbits(n))
|
||||
|
||||
def highbit_mask(self, n):
|
||||
return self.constant_to_string(self.highbits(n))
|
||||
|
||||
def lowbit_extract_expr(self, expr_string, n):
|
||||
return "(%s & %s)" % ( expr_string, self.lowbit_mask(n))
|
||||
|
||||
def highbit_extract_expr(self, expr_string, n):
|
||||
return "(%s >> %d)" % (expr_string, self.size - n)
|
||||
|
||||
def flip_lowbits_expr(self, expr_string, n):
|
||||
return "(%s ^ %s)" % ( expr_string, self.lowbit_mask(n))
|
||||
|
||||
def bit_extract_expr(self, expr_string, low, high):
|
||||
lbm = self.lowbit_mask(high - low)
|
||||
return "((%s >> %d) & %s)" % (expr_string, low, lbm)
|
||||
|
||||
# shifts are +ve if left and -ve if right
|
||||
def shift_expr(self, expr_string, n):
|
||||
if n <= -self.size or n >= self.size:
|
||||
return self.zero_expression()
|
||||
elif (n > 0):
|
||||
return "(%s << %d)" % (expr_string, n)
|
||||
elif (n < 0):
|
||||
return "(%s >> %d)" % (expr_string, -n)
|
||||
else:
|
||||
return "(%s)" % (expr_string)
|
||||
|
||||
# code is:
|
||||
# "normal" (always between buf and len) - the default
|
||||
# "aligned" (means normal + aligned to a natural boundary)
|
||||
# "cautious_forward" (means may go off the end of buf+len)
|
||||
# "cautious_backwards" (means may go off the start of buf)
|
||||
# "cautious_everywhere" (means may go off both)
|
||||
|
||||
def load_expr_data(self, offset = 0, code = "normal",
|
||||
base_string = "ptr", bounds_lo = "buf", bounds_hi = "buf + len"):
|
||||
if code is "normal":
|
||||
return "lv_%s(%s + %d, %s, %s)" % (self.get_name(), base_string, offset, bounds_lo, bounds_hi)
|
||||
elif code is "aligned":
|
||||
if self.size is 8:
|
||||
fail_out("no aligned byte loads")
|
||||
return "lv_%s_a(%s + %d, %s, %s)" % (self.get_name(), base_string, offset, bounds_lo, bounds_hi)
|
||||
elif code is "cautious_forward":
|
||||
return "lv_%s_cf(%s + %d, %s, %s)" % (self.get_name(), base_string, offset, bounds_lo, bounds_hi)
|
||||
elif code is "cautious_backward":
|
||||
return "lv_%s_cb(%s + %d, %s, %s)" % (self.get_name(), base_string, offset, bounds_lo, bounds_hi)
|
||||
elif code is "cautious_everywhere":
|
||||
return "lv_%s_ce(%s + %d, %s, %s)" % (self.get_name(), base_string, offset, bounds_lo, bounds_hi)
|
||||
|
||||
|
||||
class SIMDIntegerType(IntegerType):
|
||||
def __init__(self, size):
|
||||
IntegerType.__init__(self, size)
|
||||
|
||||
def isSIMDOnIntel(self):
|
||||
return True
|
||||
|
||||
def zero_expression(self):
|
||||
return "zeroes128()"
|
||||
|
||||
def lowbit_extract_expr(self, expr_string, n):
|
||||
if (n <= 32):
|
||||
tmpType = IntegerType(32)
|
||||
tmpExpr = "movd(%s)" % expr_string
|
||||
elif (32 < n <= 64):
|
||||
tmpType = IntegerType(64)
|
||||
tmpExpr = "movq(%s)" % expr_string
|
||||
return tmpType.lowbit_extract_expr(tmpExpr, n)
|
||||
|
||||
def highbit_extract_expr(self, expr_string, n):
|
||||
fail_out("Unimplemented high bit extract on m128")
|
||||
|
||||
def bit_extract_expr(self, expr_string, low, high, flip):
|
||||
fail_out("Unimplemented bit extract on m128")
|
||||
|
||||
def shift_expr(self, expr_string, n):
|
||||
if n % 8 != 0:
|
||||
fail_out("Trying to shift a m128 by a bit granular value")
|
||||
|
||||
# should check that n is divisible by 8
|
||||
if n <= -self.size or n >= self.size:
|
||||
return self.zero_expression()
|
||||
elif (n > 0):
|
||||
return "_mm_slli_si128(%s, %s)" % (expr_string, n / 8)
|
||||
elif (n < 0):
|
||||
return "_mm_srli_si128(%s, %s)" % (expr_string, -n / 8)
|
||||
else:
|
||||
return "(%s)" % (expr_string)
|
||||
|
||||
def lowbit_mask(self, n):
|
||||
if n % 8 != 0:
|
||||
fail_out("Trying to make a lowbit mask in a m128 by a bit granular value")
|
||||
return self.shift_expr("ones128()", -(128 - n))
|
||||
|
||||
def getRequiredType(bits):
|
||||
if bits == 128:
|
||||
return SIMDIntegerType(bits)
|
||||
for b in [ 8, 16, 32, 64]:
|
||||
if (bits <= b):
|
||||
return IntegerType(b)
|
||||
return None
|
||||
|
||||
class IntegerVariable:
|
||||
def __init__(self, name, type):
|
||||
self.name = name
|
||||
self.type = type
|
||||
|
||||
def gen_initializer_stmt(self, initialization_string = None):
|
||||
if initialization_string:
|
||||
return "%s %s = %s;" % (self.type.get_name(), self.name, initialization_string)
|
||||
else:
|
||||
return "%s %s;" % (self.type.get_name(), self.name)
|
||||
|
||||
|
||||
class Step:
|
||||
def __init__(self, context, offset = 0):
|
||||
self.context = context
|
||||
self.matcher = context.matcher
|
||||
self.offset = offset
|
||||
self.latency = 1
|
||||
self.dependency_list = []
|
||||
self.latest = None
|
||||
self.context.add_step(self)
|
||||
|
||||
# return a string, complete with indentation
|
||||
def emit(self):
|
||||
indent = " " * (self.offset*2 + self.matcher.default_body_indent)
|
||||
s = "\n".join( [ indent + line for line in self.val.split("\n")] )
|
||||
if self.latest:
|
||||
s += " // " + str(self.debug_step) + " L" + str(self.latency) + " LTST:%d" % self.latest
|
||||
if self.dependency_list:
|
||||
s += " Derps: "
|
||||
for (d,l) in self.dependency_list:
|
||||
s += "%d/%d " % (d.debug_step,l)
|
||||
return s
|
||||
|
||||
def add_dependency(self, step, anti_dependency = False, output_dependency = False):
|
||||
if anti_dependency or output_dependency:
|
||||
self.dependency_list += [ (step, 1) ]
|
||||
else:
|
||||
self.dependency_list += [ (step, step.latency) ]
|
||||
|
||||
def nv(self, type, var_name):
|
||||
return self.context.new_var(self, type, var_name)
|
||||
|
||||
def gv(self, var_name, reader = True, writer = False):
|
||||
return self.context.get_var(self, var_name, reader = reader, writer = writer)
|
||||
|
||||
# utility steps, generic
|
||||
|
||||
class LabelStep(Step):
|
||||
def __init__(self, context, offset = 0, label_prefix = "off"):
|
||||
Step.__init__(self, context, offset)
|
||||
self.val = "%s%d: UNUSED;" % (label_prefix, offset)
|
||||
|
||||
class OpenScopeStep(Step):
|
||||
def __init__(self, context, offset = 0):
|
||||
Step.__init__(self, context, offset)
|
||||
self.val = "{"
|
||||
|
||||
class CloseScopeStep(Step):
|
||||
def __init__(self, context, offset = 0):
|
||||
Step.__init__(self, context, offset)
|
||||
self.val = "}"
|
||||
|
||||
|
||||
class CodeGenContext:
|
||||
def __init__(self, matcher):
|
||||
self.vars = {}
|
||||
self.steps = []
|
||||
self.ctr = 0
|
||||
self.matcher = matcher
|
||||
self.var_writer = {} # var to a single writer
|
||||
self.var_readers = {} # var to a list of all the readers that read the last value
|
||||
|
||||
def new_var(self, step, type, var_name):
|
||||
var = IntegerVariable(var_name, type)
|
||||
self.vars[var_name] = var
|
||||
self.var_writer[var_name] = step
|
||||
return var
|
||||
|
||||
def get_var(self, step, var_name, reader = True, writer = False):
|
||||
if reader:
|
||||
writer_step = self.var_writer[var_name]
|
||||
if writer_step:
|
||||
step.add_dependency(writer_step)
|
||||
self.var_readers.setdefault(var_name, []).append(step)
|
||||
if writer and not reader:
|
||||
if self.var_writer[var_name]:
|
||||
step.add_dependency(self.var_writer[var_name], output_dependency = True)
|
||||
if writer:
|
||||
if self.var_readers.has_key(var_name):
|
||||
for reader in [ r for r in self.var_readers[var_name] if r is not step ]:
|
||||
step.add_dependency(reader, anti_dependency = True)
|
||||
self.var_readers[var_name] = []
|
||||
self.var_writer[var_name] = step
|
||||
return self.vars[var_name]
|
||||
|
||||
def add_step(self, step):
|
||||
self.steps += [ step ]
|
||||
step.debug_step = self.ctr
|
||||
self.ctr += 1
|
||||
|
||||
def dontschedule(self, finals):
|
||||
return "\n".join( [ s.emit() for s in self.steps ] )
|
||||
|
||||
def schedule(self, finals):
|
||||
for f in finals:
|
||||
f.latest = f.latency
|
||||
worklist = finals
|
||||
while worklist:
|
||||
current = worklist[0]
|
||||
worklist = worklist[1:]
|
||||
for (dep, lat) in current.dependency_list:
|
||||
if dep.latest is None or dep.latest < (current.latest + dep.latency):
|
||||
dep.latest = current.latest + lat
|
||||
if dep not in worklist:
|
||||
worklist += [ dep ]
|
||||
self.steps.sort(reverse = True, key = lambda s : s.latest)
|
||||
return "\n".join( [ s.emit() for s in self.steps ] )
|
167
src/fdr/base_autogen.py
Normal file
167
src/fdr/base_autogen.py
Normal file
@@ -0,0 +1,167 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# Copyright (c) 2015, Intel Corporation
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of Intel Corporation nor the names of its contributors
|
||||
# may be used to endorse or promote products derived from this software
|
||||
# without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
import sys
|
||||
from autogen_utils import *
|
||||
from base_autogen import *
|
||||
from string import Template
|
||||
|
||||
class MatcherBase:
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def get_name(self):
|
||||
return "fdr_exec_%03d" % self.id
|
||||
|
||||
def produce_header(self, visible, header_only = False):
|
||||
s = ""
|
||||
if not visible:
|
||||
s += "static never_inline"
|
||||
s += """
|
||||
hwlm_error_t %s(UNUSED const struct FDR *fdr,
|
||||
UNUSED const struct FDR_Runtime_Args * a)""" % self.get_name()
|
||||
if header_only:
|
||||
s += ";"
|
||||
else:
|
||||
s += "{"
|
||||
s += "\n"
|
||||
return s
|
||||
|
||||
def produce_guard(self):
|
||||
print self.arch.get_guard()
|
||||
|
||||
def produce_zero_alternative(self):
|
||||
print """
|
||||
#else
|
||||
#define %s 0
|
||||
#endif
|
||||
""" % self.get_name()
|
||||
|
||||
# trivial function for documentation/modularity
|
||||
def close_guard(self):
|
||||
print "#endif"
|
||||
|
||||
def produce_common_declarations(self):
|
||||
return """
|
||||
const u8 * buf = a->buf;
|
||||
const size_t len = a->len;
|
||||
const u8 * ptr = buf + a->start_offset;
|
||||
hwlmcb_rv_t controlVal = *a->groups;
|
||||
hwlmcb_rv_t * control = &controlVal;
|
||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||
const u8 * tryFloodDetect = a->firstFloodDetect;
|
||||
UNUSED u32 bit, bitRem, confSplit, idx;
|
||||
u32 byte, cf;
|
||||
const struct FDRConfirm *fdrc;
|
||||
u32 last_match = (u32)-1;
|
||||
"""
|
||||
|
||||
def produce_continue_check(self):
|
||||
return """if (P0(controlVal == HWLM_TERMINATE_MATCHING)) {
|
||||
*a->groups = controlVal;
|
||||
return HWLM_TERMINATED;
|
||||
}
|
||||
"""
|
||||
def produce_flood_check(self):
|
||||
return """
|
||||
if (P0(ptr > tryFloodDetect)) {
|
||||
tryFloodDetect = floodDetect(fdr, a, &ptr, tryFloodDetect, &floodBackoff, &controlVal, iterBytes);
|
||||
if (P0(controlVal == HWLM_TERMINATE_MATCHING)) {
|
||||
*a->groups = controlVal;
|
||||
return HWLM_TERMINATED;
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
def produce_footer(self):
|
||||
return """
|
||||
*a->groups = controlVal;
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
"""
|
||||
|
||||
def produce_confirm_base(self, conf_var_name, conf_var_size, offset, cautious, enable_confirmless, do_bailout = False):
|
||||
if cautious:
|
||||
caution_string = "VECTORING"
|
||||
else:
|
||||
caution_string = "NOT_CAUTIOUS"
|
||||
conf_split_mask = IntegerType(32).constant_to_string(
|
||||
self.conf_top_level_split - 1)
|
||||
if enable_confirmless:
|
||||
quick_check_string = """
|
||||
if (!fdrc->mult) {
|
||||
u32 id = fdrc->nBitsOrSoleID;
|
||||
if ((last_match == id) && (fdrc->flags & NoRepeat))
|
||||
continue;
|
||||
last_match = id;
|
||||
controlVal = a->cb(ptr+byte-buf, ptr+byte-buf, id, a->ctxt);
|
||||
continue;
|
||||
} """
|
||||
else:
|
||||
quick_check_string = ""
|
||||
if do_bailout:
|
||||
bailout_string = """
|
||||
if ((ptr + byte < buf + a->start_offset) || (ptr + byte >= buf + len)) continue;"""
|
||||
else:
|
||||
bailout_string = ""
|
||||
|
||||
return Template("""
|
||||
if (P0(!!$CONFVAR)) {
|
||||
do {
|
||||
bit = findAndClearLSB_$CONFVAR_SIZE(&$CONFVAR);
|
||||
byte = bit / $NUM_BUCKETS + $OFFSET;
|
||||
bitRem = bit % $NUM_BUCKETS;
|
||||
$BAILOUT_STRING
|
||||
confSplit = *(ptr+byte) & $SPLIT_MASK;
|
||||
idx = confSplit * $NUM_BUCKETS + bitRem;
|
||||
cf = confBase[idx];
|
||||
if (!cf)
|
||||
continue;
|
||||
fdrc = (const struct FDRConfirm *)((const u8 *)confBase + cf);
|
||||
if (!(fdrc->groups & *control))
|
||||
continue;
|
||||
$QUICK_CHECK_STRING
|
||||
confWithBit(fdrc, a, ptr - buf + byte, $CAUTION_STRING, $CONF_PULL_BACK, control, &last_match);
|
||||
} while(P0(!!$CONFVAR));
|
||||
if (P0(controlVal == HWLM_TERMINATE_MATCHING)) {
|
||||
*a->groups = controlVal;
|
||||
return HWLM_TERMINATED;
|
||||
}
|
||||
}""").substitute(CONFVAR = conf_var_name,
|
||||
CONFVAR_SIZE = conf_var_size,
|
||||
NUM_BUCKETS = self.num_buckets,
|
||||
OFFSET = offset,
|
||||
SPLIT_MASK = conf_split_mask,
|
||||
QUICK_CHECK_STRING = quick_check_string,
|
||||
BAILOUT_STRING = bailout_string,
|
||||
CAUTION_STRING = caution_string,
|
||||
CONF_PULL_BACK = self.conf_pull_back)
|
||||
|
||||
|
||||
def indent(block, depth):
|
||||
return "\n".join([ (" " * (4*depth)) + line for line in block.splitlines() ] )
|
49
src/fdr/engine_description.cpp
Normal file
49
src/fdr/engine_description.cpp
Normal file
@@ -0,0 +1,49 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "engine_description.h"
|
||||
#include "hs_compile.h" // for hs_platform_info
|
||||
#include "util/target_info.h"
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
EngineDescription::~EngineDescription() {}
|
||||
|
||||
bool EngineDescription::isValidOnTarget(const target_t &target_in) const {
|
||||
return target_in.can_run_on_code_built_for(code_target);
|
||||
}
|
||||
|
||||
target_t targetByArchFeatures(u64a cpu_features) {
|
||||
hs_platform_info p;
|
||||
p.tune = HS_TUNE_FAMILY_GENERIC;
|
||||
p.cpu_features = cpu_features;
|
||||
|
||||
return target_t(p);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
70
src/fdr/engine_description.h
Normal file
70
src/fdr/engine_description.h
Normal file
@@ -0,0 +1,70 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ENGINE_DESCRIPTION_H
|
||||
#define ENGINE_DESCRIPTION_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "util/target_info.h"
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class EngineDescription {
|
||||
u32 id;
|
||||
target_t code_target; // the target that we built this code for
|
||||
u32 numBuckets;
|
||||
u32 confirmPullBackDistance;
|
||||
u32 confirmTopLevelSplit;
|
||||
|
||||
public:
|
||||
EngineDescription(u32 id_in, const target_t &code_target_in,
|
||||
u32 numBuckets_in, u32 confirmPullBackDistance_in,
|
||||
u32 confirmTopLevelSplit_in)
|
||||
: id(id_in), code_target(code_target_in), numBuckets(numBuckets_in),
|
||||
confirmPullBackDistance(confirmPullBackDistance_in),
|
||||
confirmTopLevelSplit(confirmTopLevelSplit_in) {}
|
||||
|
||||
virtual ~EngineDescription();
|
||||
|
||||
u32 getID() const { return id; }
|
||||
u32 getNumBuckets() const { return numBuckets; }
|
||||
u32 getConfirmPullBackDistance() const { return confirmPullBackDistance; }
|
||||
u32 getConfirmTopLevelSplit() const { return confirmTopLevelSplit; }
|
||||
|
||||
bool isValidOnTarget(const target_t &target_in) const;
|
||||
virtual u32 getDefaultFloodSuffixLength() const = 0;
|
||||
|
||||
virtual bool typicallyHoldsOneCharLits() const { return true; }
|
||||
};
|
||||
|
||||
/** Returns a target given a CPU feature set value. */
|
||||
target_t targetByArchFeatures(u64a cpu_features);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
126
src/fdr/fdr.c
Normal file
126
src/fdr/fdr.c
Normal file
@@ -0,0 +1,126 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "util/simd_utils.h"
|
||||
|
||||
#define P0(cnd) unlikely(cnd)
|
||||
|
||||
#include "fdr.h"
|
||||
#include "fdr_internal.h"
|
||||
#include "teddy_internal.h"
|
||||
|
||||
#include "flood_runtime.h"
|
||||
|
||||
#include "fdr_confirm.h"
|
||||
#include "fdr_confirm_runtime.h"
|
||||
#include "fdr_streaming_runtime.h"
|
||||
#include "fdr_loadval.h"
|
||||
|
||||
static really_inline UNUSED
|
||||
u32 getPreStartVal(const struct FDR_Runtime_Args *a, u32 numBits) {
|
||||
u32 r = 0;
|
||||
if (a->start_offset == 0) {
|
||||
if (numBits <= 8) {
|
||||
r = a->buf_history[a->len_history - 1];
|
||||
} else {
|
||||
r = a->buf_history[a->len_history - 1];
|
||||
r |= (a->buf[0] << 8);
|
||||
}
|
||||
} else {
|
||||
if (numBits <= 8) {
|
||||
r = a->buf[a->start_offset - 1];
|
||||
} else {
|
||||
r = lv_u16(a->buf + a->start_offset - 1, a->buf, a->buf + a->len);
|
||||
}
|
||||
}
|
||||
return r & ((1 << numBits) - 1);
|
||||
}
|
||||
|
||||
#include "fdr_autogen.c"
|
||||
|
||||
#define FAKE_HISTORY_SIZE 16
|
||||
static const u8 fake_history[FAKE_HISTORY_SIZE];
|
||||
|
||||
hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len, size_t start,
|
||||
HWLMCallback cb, void *ctxt, hwlm_group_t groups) {
|
||||
|
||||
const struct FDR_Runtime_Args a = {
|
||||
buf,
|
||||
len,
|
||||
fake_history,
|
||||
0,
|
||||
fake_history, // nocase
|
||||
0,
|
||||
start,
|
||||
cb,
|
||||
ctxt,
|
||||
&groups,
|
||||
nextFloodDetect(buf, len, FLOOD_BACKOFF_START),
|
||||
0
|
||||
};
|
||||
if (unlikely(a.start_offset >= a.len)) {
|
||||
return HWLM_SUCCESS;
|
||||
} else {
|
||||
assert(funcs[fdr->engineID]);
|
||||
return funcs[fdr->engineID](fdr, &a);
|
||||
}
|
||||
}
|
||||
|
||||
hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf,
|
||||
size_t hlen, const u8 *buf, size_t len,
|
||||
size_t start, HWLMCallback cb, void *ctxt,
|
||||
hwlm_group_t groups, u8 * stream_state) {
|
||||
struct FDR_Runtime_Args a = {
|
||||
buf,
|
||||
len,
|
||||
hbuf,
|
||||
hlen,
|
||||
hbuf, // nocase - start same as caseful, override later if needed
|
||||
hlen, // nocase
|
||||
start,
|
||||
cb,
|
||||
ctxt,
|
||||
&groups,
|
||||
nextFloodDetect(buf, len, FLOOD_BACKOFF_START),
|
||||
hbuf ? CONF_LOADVAL_CALL_CAUTIOUS(hbuf + hlen - 8, hbuf, hbuf + hlen)
|
||||
: (u64a)0
|
||||
|
||||
};
|
||||
fdrUnpackState(fdr, &a, stream_state);
|
||||
|
||||
hwlm_error_t ret;
|
||||
if (unlikely(a.start_offset >= a.len)) {
|
||||
ret = HWLM_SUCCESS;
|
||||
} else {
|
||||
assert(funcs[fdr->engineID]);
|
||||
ret = funcs[fdr->engineID](fdr, &a);
|
||||
}
|
||||
|
||||
fdrPackState(fdr, &a, stream_state);
|
||||
return ret;
|
||||
}
|
91
src/fdr/fdr.h
Normal file
91
src/fdr/fdr.h
Normal file
@@ -0,0 +1,91 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief FDR literal matcher: runtime API.
|
||||
*/
|
||||
|
||||
#ifndef FDR_H
|
||||
#define FDR_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "hwlm/hwlm.h"
|
||||
|
||||
// C linkage in the API
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct FDR;
|
||||
|
||||
/** \brief Returns size in bytes of the given FDR engine. */
|
||||
size_t fdrSize(const struct FDR *fdr);
|
||||
|
||||
/** \brief Returns non-zero if the contents of the stream state indicate that
|
||||
* there is active FDR history beyond the regularly used history. */
|
||||
u32 fdrStreamStateActive(const struct FDR *fdr, const u8 *stream_state);
|
||||
|
||||
/**
|
||||
* \brief Block-mode scan.
|
||||
*
|
||||
* \param fdr FDR matcher engine.
|
||||
* \param buf Buffer to scan.
|
||||
* \param len Length of buffer to scan.
|
||||
* \param start First offset in buf at which a match may end.
|
||||
* \param cb Callback to call when a match is found.
|
||||
* \param ctxt Caller-provided context pointer supplied to callback on match.
|
||||
* \param groups Initial groups mask.
|
||||
*/
|
||||
hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len,
|
||||
size_t start, HWLMCallback cb, void *ctxt,
|
||||
hwlm_group_t groups);
|
||||
|
||||
/**
|
||||
* \brief Streaming-mode scan.
|
||||
*
|
||||
* \param fdr FDR matcher engine.
|
||||
* \param hbuf History buffer.
|
||||
* \param hlen Length of history buffer (hbuf).
|
||||
* \param buf Buffer to scan.
|
||||
* \param len Length of buffer to scan (buf).
|
||||
* \param start First offset in buf at which a match may end.
|
||||
* \param cb Callback to call when a match is found.
|
||||
* \param ctxt Caller-provided context pointer supplied to callback on match.
|
||||
* \param groups Initial groups mask.
|
||||
* \param stream_state Persistent stream state for use by FDR.
|
||||
*/
|
||||
hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf,
|
||||
size_t hlen, const u8 *buf, size_t len,
|
||||
size_t start, HWLMCallback cb, void *ctxt,
|
||||
hwlm_group_t groups, u8 *stream_state);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif // __cplusplus
|
||||
|
||||
#endif // FDR_H
|
574
src/fdr/fdr_autogen.py
Executable file
574
src/fdr/fdr_autogen.py
Executable file
@@ -0,0 +1,574 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# Copyright (c) 2015, Intel Corporation
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of Intel Corporation nor the names of its contributors
|
||||
# may be used to endorse or promote products derived from this software
|
||||
# without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
import sys
|
||||
from autogen_utils import *
|
||||
from base_autogen import *
|
||||
from string import Template
|
||||
|
||||
class OrStep(Step):
|
||||
def __init__(self, context, offset, width):
|
||||
Step.__init__(self, context, offset)
|
||||
s_var = self.gv("st%d" % offset)
|
||||
if width < 128:
|
||||
self.val = "s |= %s;" % s_var.name
|
||||
else:
|
||||
self.val = "s = or%d(s, %s);" % (width, s_var.name)
|
||||
|
||||
class ShiftStateStep(Step):
|
||||
def __init__(self, context, offset = 0, stride_used = 1):
|
||||
Step.__init__(self, context, offset)
|
||||
m = self.matcher
|
||||
state = m.state_variable
|
||||
shift_distance = -1 * stride_used * m.num_buckets
|
||||
self.val = "%s = %s;" % (state.name, state.type.shift_expr(state.name, shift_distance))
|
||||
|
||||
class BulkLoadStep(Step):
|
||||
def __init__(self, context, offset, size, define_var = True, aligned = True):
|
||||
Step.__init__(self, context, offset)
|
||||
m = self.matcher
|
||||
self.latency = 4
|
||||
blt = m.bulk_load_type
|
||||
if aligned:
|
||||
init_string = blt.load_expr_data(self.offset, code = "aligned")
|
||||
else:
|
||||
init_string = blt.load_expr_data(self.offset)
|
||||
|
||||
var_name = "current_data_%d" % offset
|
||||
if define_var:
|
||||
lb_var = self.nv(blt, var_name)
|
||||
self.val = lb_var.gen_initializer_stmt(init_string)
|
||||
else:
|
||||
lb_var = self.gv(var_name, reader = False, writer = True)
|
||||
self.val = "%s = %s;" % (var_name, init_string)
|
||||
|
||||
class ValueExtractStep(Step):
|
||||
def __init__(self, context, offset, sub_load_cautious = False):
|
||||
Step.__init__(self, context, offset)
|
||||
m = self.matcher
|
||||
self.latency = 2
|
||||
dsb = m.datasize_bytes
|
||||
modval = offset % dsb
|
||||
|
||||
if m.domain > 8 and modval == dsb - 1:
|
||||
# Case 1: reading more than one byte over the end of the bulk load
|
||||
|
||||
self.latency = 4
|
||||
if sub_load_cautious:
|
||||
code_string = "cautious_forward"
|
||||
else:
|
||||
code_string = "normal"
|
||||
load_string = m.single_load_type.load_expr_data(self.offset, code_string)
|
||||
temp_string = "(%s << %d)" % (load_string, m.reach_shift_adjust)
|
||||
else:
|
||||
# Case 2: reading a value that can be found entirely in the current register
|
||||
if m.fdr2_force_naive_load:
|
||||
load_string = m.single_load_type.load_expr_data(self.offset, "normal")
|
||||
temp_string = "(%s << %d)" % (load_string, m.reach_shift_adjust)
|
||||
else:
|
||||
lb_var = self.gv("current_data_%d" % (offset - modval))
|
||||
if modval == 0:
|
||||
# Case 2a: value is at LSB end of the register and must be left-
|
||||
# shifted into place if there is a "reach_shift_adjust" required
|
||||
temp_string = "(%s << %d)" % (lb_var.name, m.reach_shift_adjust)
|
||||
else:
|
||||
# Case 2b: value is in the middle of the register and will be
|
||||
# right-shifted into place (adjusted by "reach_shift_adjust")
|
||||
temp_string = "(%s >> %d)" % (lb_var.name, modval*8 - m.reach_shift_adjust)
|
||||
|
||||
|
||||
init_string = "(%s) & 0x%x" % (temp_string, m.reach_mask)
|
||||
v_var = self.nv(m.value_extract_type, "v%d" % offset)
|
||||
self.val = v_var.gen_initializer_stmt(init_string)
|
||||
|
||||
class TableLookupStep(Step):
|
||||
def __init__(self, context, reach_multiplier, offset = 0):
|
||||
Step.__init__(self, context, offset)
|
||||
m = self.matcher
|
||||
self.latency = 4
|
||||
v_var = self.gv("v%d" % offset)
|
||||
s_var = self.nv(m.state_type, "st%d" % offset)
|
||||
init_string = "*(const %s *)(ft + %s*%dU)" % ( m.state_type.get_name(),
|
||||
v_var.name, reach_multiplier)
|
||||
self.val = s_var.gen_initializer_stmt(init_string)
|
||||
|
||||
class ShiftReachMaskStep(Step):
|
||||
def __init__(self, context, offset):
|
||||
Step.__init__(self, context, offset)
|
||||
m = self.matcher
|
||||
extr = m.extract_frequency
|
||||
modval = offset % extr
|
||||
s_var = self.gv("st%d" % offset, writer = True)
|
||||
self.val = "%s = %s;" % (s_var.name, s_var.type.shift_expr(s_var.name, modval * m.num_buckets))
|
||||
|
||||
class ConfExtractStep(Step):
|
||||
def __init__(self, context, offset):
|
||||
Step.__init__(self, context, offset)
|
||||
m = self.matcher
|
||||
if m.state_type.isSIMDOnIntel():
|
||||
self.latency = 2
|
||||
init_string = m.state_type.lowbit_extract_expr("s", m.extract_size)
|
||||
extr_var = self.nv(m.extr_type, "extr%d" % offset)
|
||||
self.val = extr_var.gen_initializer_stmt(init_string)
|
||||
|
||||
class ConfAccumulateStep(Step):
|
||||
def __init__(self, context, extract_offset, conf_offset, define_var = True):
|
||||
Step.__init__(self, context, extract_offset)
|
||||
m = self.matcher
|
||||
extr_var = self.gv("extr%d" % extract_offset)
|
||||
extr_var_cast = "((%s)%s)" % (m.conf_type.get_name(), extr_var.name)
|
||||
if extract_offset == conf_offset:
|
||||
# create conf_var as a straight copy of extr
|
||||
if define_var:
|
||||
conf_var = self.nv(m.conf_type, "conf%d" % conf_offset)
|
||||
self.val = conf_var.gen_initializer_stmt(extr_var_cast)
|
||||
else:
|
||||
conf_var = self.gv("conf%d" % conf_offset, writer = True, reader = True)
|
||||
self.val = "%s = %s;" % (conf_var.name, extr_var_cast)
|
||||
else:
|
||||
# shift extr_var and insert/OR it in conf_var
|
||||
conf_var = self.gv("conf%d" % conf_offset, writer = True, reader = True)
|
||||
shift_dist = (extract_offset - conf_offset) * m.num_buckets
|
||||
self.val = "%s |= %s;" % (conf_var.name, m.conf_type.shift_expr(extr_var_cast, shift_dist))
|
||||
self.latency = 2
|
||||
|
||||
class ConfirmFlipStep(Step):
|
||||
def __init__(self, context, offset):
|
||||
Step.__init__(self, context, offset)
|
||||
m = self.matcher
|
||||
conf_var = self.gv("conf%d" % self.offset, writer = True)
|
||||
self.val = "%s = %s;" % (conf_var.name,
|
||||
conf_var.type.flip_lowbits_expr(conf_var.name, self.matcher.confirm_frequency * m.num_buckets))
|
||||
|
||||
class ConfirmStep(Step):
|
||||
def __init__(self, context, offset, cautious = False):
|
||||
Step.__init__(self, context, offset)
|
||||
m = self.matcher
|
||||
conf_var = self.gv("conf%d" % offset, writer = True)
|
||||
self.val = m.produce_confirm_base(conf_var.name, conf_var.type.size, offset, cautious,
|
||||
enable_confirmless = m.stride == 1, do_bailout = False)
|
||||
|
||||
class M3(MatcherBase):
|
||||
def get_hash_safety_parameters(self):
|
||||
h_size = self.single_load_type.size_in_bytes()
|
||||
return (0, h_size - 1)
|
||||
|
||||
def produce_compile_call(self):
|
||||
print " { %d, %d, %d, %d, %d, %s, %d, %d }," % (
|
||||
self.id, self.state_width, self.num_buckets,
|
||||
self.stride, self.domain,
|
||||
self.arch.target, self.conf_pull_back, self.conf_top_level_split)
|
||||
|
||||
def produce_main_loop(self, switch_variant = False):
|
||||
stride_offsets = xrange(0, self.loop_bytes, self.stride)
|
||||
stride_offsetSet = set(stride_offsets)
|
||||
so_steps_last_block = []
|
||||
sh = None
|
||||
last_confirm = None
|
||||
ctxt = CodeGenContext(self)
|
||||
|
||||
if switch_variant:
|
||||
print " ptr -= (iterBytes - dist);"
|
||||
print " { " # need an extra scope around switch variant to stop its globals escaping
|
||||
else:
|
||||
print " if (doMainLoop) {"
|
||||
print " for (; ptr + LOOP_READ_AHEAD < buf + len; ptr += iterBytes) {"
|
||||
print self.produce_flood_check()
|
||||
print " __builtin_prefetch(ptr + (iterBytes*4));"
|
||||
print " assert(((size_t)ptr % START_MOD) == 0);"
|
||||
|
||||
|
||||
# just do globally for now
|
||||
if switch_variant:
|
||||
subsidiary_load_cautious = True
|
||||
confirm_cautious = True
|
||||
else:
|
||||
subsidiary_load_cautious = False
|
||||
confirm_cautious = False
|
||||
|
||||
if not self.fdr2_force_naive_load:
|
||||
bulk_load_steps = [ off for off in range(self.loop_bytes)
|
||||
if off % self.datasize_bytes == 0 and
|
||||
(set(range(off, off + self.datasize_bytes - 1)) & stride_offsetSet)]
|
||||
else:
|
||||
bulk_load_steps = []
|
||||
|
||||
confirm_steps = [ off for off in range(self.loop_bytes) if off % self.confirm_frequency == 0 ]
|
||||
|
||||
for off in bulk_load_steps:
|
||||
lb_var = ctxt.new_var(None, self.bulk_load_type, "current_data_%d" % off)
|
||||
print " " + lb_var.gen_initializer_stmt()
|
||||
|
||||
|
||||
for off in confirm_steps:
|
||||
var_name = "conf%d" % off
|
||||
conf_def_var = ctxt.new_var(None, self.conf_type, var_name)
|
||||
if switch_variant:
|
||||
init_string = "(%s)-1" % self.conf_type.get_name()
|
||||
else:
|
||||
init_string = ""
|
||||
print " " + conf_def_var.gen_initializer_stmt(init_string)
|
||||
|
||||
if switch_variant:
|
||||
print " switch(iterBytes - dist) {"
|
||||
for i in range(0, self.loop_bytes):
|
||||
print " case %d:" % i
|
||||
|
||||
# init and poison conf; over-precise but harmless
|
||||
conf_id = (i / self.confirm_frequency) * self.confirm_frequency
|
||||
if i % self.confirm_frequency:
|
||||
conf_fixup_bits = self.conf_type.size - (self.num_buckets * (i % self.confirm_frequency))
|
||||
print " conf%d >>= %d;" % (conf_id, conf_fixup_bits)
|
||||
else:
|
||||
print " conf%d = 0;" % conf_id
|
||||
|
||||
# init state
|
||||
state_fixup = i % self.extract_frequency
|
||||
state = self.state_variable
|
||||
shift_distance = self.num_buckets * state_fixup
|
||||
if state_fixup:
|
||||
print " %s = %s;" % (state.name, state.type.shift_expr(state.name, shift_distance))
|
||||
if self.state_width < 128:
|
||||
print " %s |= %s;" % (state.name, state.type.lowbit_mask(shift_distance))
|
||||
else:
|
||||
print " %s = or%d(%s, %s);" % (state.name, self.state_width, state.name, state.type.lowbit_mask(shift_distance))
|
||||
|
||||
if not self.fdr2_force_naive_load:
|
||||
# init current_data (could poison it in some cases)
|
||||
load_mod = i % self.datasize_bytes
|
||||
load_offset = i - load_mod
|
||||
if load_mod:
|
||||
# not coming in on an even boundary means having to do a load var
|
||||
# actually, there are a bunch of things we can do on this bulk load
|
||||
# to avoid having to be 'cautious_backwards' but I'm not completely
|
||||
# sure they are good ideas
|
||||
init_string = self.bulk_load_type.load_expr_data(load_offset,
|
||||
code = "cautious_backward")
|
||||
var_name = "current_data_%d" % load_offset
|
||||
lb_var = ctxt.get_var(None, var_name, reader = False, writer = True)
|
||||
print " %s = %s;" % (lb_var.name, init_string)
|
||||
|
||||
print " goto off%d;" % i
|
||||
print " case %d: goto skipSwitch;" % self.loop_bytes
|
||||
print " }"
|
||||
print " {"
|
||||
|
||||
|
||||
for off in range(self.loop_bytes):
|
||||
# X_mod is the offset we're up to relative to the last X operation
|
||||
# X_offset is which of the last X operations matches this iteration
|
||||
|
||||
if (switch_variant):
|
||||
LabelStep(ctxt, off)
|
||||
|
||||
if off in bulk_load_steps:
|
||||
if not self.fdr2_force_naive_load:
|
||||
BulkLoadStep(ctxt, off, self.datasize, define_var = False, aligned = not switch_variant)
|
||||
|
||||
if off in stride_offsets:
|
||||
if switch_variant:
|
||||
OpenScopeStep(ctxt, off)
|
||||
ValueExtractStep(ctxt, off, sub_load_cautious = subsidiary_load_cautious)
|
||||
TableLookupStep(ctxt, self.reach_mult, off)
|
||||
if off % self.extract_frequency:
|
||||
ShiftReachMaskStep(ctxt, off)
|
||||
so = OrStep(ctxt, off, self.state_width)
|
||||
if switch_variant:
|
||||
CloseScopeStep(ctxt, off)
|
||||
if sh != None:
|
||||
so.add_dependency(sh)
|
||||
so_steps_last_block += [ so ]
|
||||
|
||||
extract_mod = off % self.extract_frequency
|
||||
extract_offset = off - extract_mod
|
||||
extract_ready = extract_mod == self.extract_frequency - 1
|
||||
if extract_ready:
|
||||
if switch_variant:
|
||||
OpenScopeStep(ctxt, off)
|
||||
ex = ConfExtractStep(ctxt, extract_offset)
|
||||
ConfAccumulateStep(ctxt, extract_offset, confirm_offset, define_var = False)
|
||||
for so_step in so_steps_last_block:
|
||||
ex.add_dependency(so_step)
|
||||
if switch_variant:
|
||||
CloseScopeStep(ctxt, off)
|
||||
so_steps_last_block = []
|
||||
sh = ShiftStateStep(ctxt, extract_offset, stride_used = self.extract_frequency)
|
||||
sh.add_dependency(ex)
|
||||
|
||||
confirm_mod = off % self.confirm_frequency
|
||||
confirm_offset = off - confirm_mod
|
||||
confirm_ready = confirm_mod == self.confirm_frequency - 1
|
||||
if confirm_ready:
|
||||
cflip = ConfirmFlipStep(ctxt, confirm_offset)
|
||||
cf = ConfirmStep(ctxt, confirm_offset, cautious = confirm_cautious )
|
||||
if last_confirm:
|
||||
cf.add_dependency(last_confirm)
|
||||
last_confirm = cf
|
||||
|
||||
|
||||
if not switch_variant:
|
||||
print ctxt.schedule([ last_confirm, sh ])
|
||||
else:
|
||||
print ctxt.dontschedule([ last_confirm, sh ])
|
||||
|
||||
if switch_variant:
|
||||
print "skipSwitch:;"
|
||||
print " ptr += iterBytes;"
|
||||
print " }" # close extra scope around switch variant
|
||||
print " }"
|
||||
|
||||
|
||||
def produce_init_state(self):
|
||||
state = self.state_variable
|
||||
s_type = self.state_type
|
||||
shift_distance = -1 * self.num_buckets
|
||||
shift_expr = "%s = %s" % (state.name, state.type.shift_expr(state.name, shift_distance))
|
||||
|
||||
s = Template("""
|
||||
$TYPENAME s;
|
||||
if (a->len_history) {
|
||||
u32 tmp = getPreStartVal(a, $DOMAIN);
|
||||
s = *((const $TYPENAME *)ft + tmp);
|
||||
$SHIFT_EXPR;
|
||||
} else {
|
||||
s = *(const $TYPENAME *)&fdr->start;
|
||||
}
|
||||
""").substitute(TYPENAME = s_type.get_name(),
|
||||
ZERO_EXPR = s_type.zero_expression(),
|
||||
DOMAIN = self.domain,
|
||||
SHIFT_EXPR = shift_expr)
|
||||
return s
|
||||
|
||||
def produce_code(self):
|
||||
|
||||
(behind, ahead) = self.get_hash_safety_parameters()
|
||||
loop_read_behind = behind
|
||||
loop_read_ahead = self.loop_bytes + ahead
|
||||
|
||||
# we set up mask and shift stuff for extracting our masks from registers
|
||||
#
|
||||
# we have a choice as to whether to mask out the value early or
|
||||
# extract the value (shift first) then mask it
|
||||
#
|
||||
# Intel has a free scaling factor from 1/2/4/8 so we want to combine
|
||||
# the extra needed shift for SSE registers with the mask operation
|
||||
|
||||
ssb = self.state_type.size / 8 # state size in bytes
|
||||
|
||||
# Intel path
|
||||
if ssb == 16 and self.domain == 16:
|
||||
# obscure corner - we don't have the room in the register to
|
||||
# do this for all values so we don't. domain==16 is pretty
|
||||
# bad anyhow, of course
|
||||
self.reach_mult = 8
|
||||
else:
|
||||
self.reach_mult = ssb
|
||||
|
||||
shift_amts = { 1 : 0, 2 : 1, 4 : 2, 8 : 3, 16: 4 }
|
||||
self.reach_shift_adjust = shift_amts[ ssb/self.reach_mult ]
|
||||
self.reach_mask = ((1 << self.domain) - 1) << self.reach_shift_adjust
|
||||
|
||||
print self.produce_header(visible = False)
|
||||
|
||||
print "// ",
|
||||
print " Arch: " + self.arch.name,
|
||||
print " State type: " + self.state_type.get_name(),
|
||||
print " Num buckets: %d" % self.num_buckets,
|
||||
print " Domain: %d" % self.domain,
|
||||
print " Stride: %d" % self.stride
|
||||
|
||||
print self.produce_common_declarations()
|
||||
print
|
||||
|
||||
print "\tconst size_t tabSize = %d;" % self.table_size
|
||||
print """
|
||||
const u8 * ft = (const u8 *)fdr + ROUNDUP_16(sizeof(struct FDR));
|
||||
const u32 * confBase = (const u32 *)(ft + tabSize);
|
||||
"""
|
||||
print self.produce_init_state()
|
||||
print "\tconst size_t iterBytes = %d;" % self.loop_bytes
|
||||
print "\tconst size_t START_MOD = %d;" % self.datasize_bytes
|
||||
print "\tconst size_t LOOP_READ_AHEAD = %d;" % loop_read_ahead
|
||||
|
||||
print """
|
||||
while (ptr < buf + len) {
|
||||
|
||||
u8 doMainLoop = 1;
|
||||
size_t remaining = len - (ptr - buf);
|
||||
size_t dist;
|
||||
if (remaining <= iterBytes) {
|
||||
dist = remaining; // once through the switch and we're done
|
||||
} else if (remaining < 2 * iterBytes) {
|
||||
// nibble some stuff off the front, skip the main loop,
|
||||
// then come back here
|
||||
dist = iterBytes; // maybe could be cleverer
|
||||
} else {
|
||||
// now, we need to see if we can make it to a main loop iteration
|
||||
// if so, we need to ensure that the main loop iteration is aligned
|
||||
// to a START_MOD boundary and i >= 8 so we can read ptr + i - 8
|
||||
|
||||
// see if we can do it - if not, just switch the main loop off,
|
||||
// eat iterBytes in cautious mode, and come back to this loop
|
||||
|
||||
const u8 * target = MAX(buf + 8, ptr);
|
||||
target = ROUNDUP_PTR(target, START_MOD);
|
||||
dist = target - ptr;
|
||||
if (dist > iterBytes) {
|
||||
doMainLoop = 0;
|
||||
dist = iterBytes;
|
||||
}
|
||||
}
|
||||
"""
|
||||
self.produce_main_loop(switch_variant = True)
|
||||
self.produce_main_loop(switch_variant = False)
|
||||
print """
|
||||
}
|
||||
"""
|
||||
print self.produce_footer()
|
||||
|
||||
def get_name(self):
|
||||
return "fdr_exec_%s_d%d_s%d_w%d" % (self.arch.name, self.domain, self.stride, self.state_width)
|
||||
|
||||
def __init__(self, state_width, domain, stride,
|
||||
arch,
|
||||
table_state_width = None,
|
||||
num_buckets = 8,
|
||||
extract_frequency = None,
|
||||
confirm_frequency = None):
|
||||
|
||||
# First - set up the values that are fundamental to how this matcher will operate
|
||||
self.arch = arch
|
||||
|
||||
# get the width of the state width on which we operate internally
|
||||
if state_width not in [ 128 ]:
|
||||
fail_out("Unknown state width: %d" % state_width)
|
||||
self.state_width = state_width
|
||||
self.state_type = getRequiredType(self.state_width)
|
||||
self.state_variable = IntegerVariable("s", self.state_type)
|
||||
|
||||
table_state_width = state_width
|
||||
self.table_state_width = state_width
|
||||
self.table_state_type = getRequiredType(self.table_state_width)
|
||||
|
||||
# domain is the number of bits that we draw from our input to
|
||||
# index our 'reach' table
|
||||
if not 8 <= domain <= 16:
|
||||
fail_out("Unsupported domain: %d" % domain)
|
||||
self.domain = domain
|
||||
# this is the load type required for this domain if we want to
|
||||
# load it one at a time
|
||||
self.single_load_type = getRequiredType(self.domain)
|
||||
|
||||
# table size
|
||||
self.table_size = 2**domain * table_state_width // 8
|
||||
|
||||
# stride is the frequency with which we make data-driven
|
||||
# accesses to our reach table
|
||||
if stride not in [ 1, 2, 4, 8]:
|
||||
fail_out("Unsupported stride: %d" % stride)
|
||||
if stride * num_buckets > state_width:
|
||||
fail_out("Stride %d is too big for the number of buckets %d given state width %d\n" % (stride, num_buckets, state_width))
|
||||
self.stride = stride
|
||||
|
||||
if num_buckets != 8:
|
||||
fail_out("Unsupported number of buckets: %d" % num_buckets)
|
||||
if state_width % num_buckets and state_width == 128:
|
||||
fail_out("Bucket scheme requires bit-shifts on m128 (failing)")
|
||||
self.num_buckets = num_buckets
|
||||
|
||||
# Second - set up derived or optimization values - these can be
|
||||
# overridden by arguments that are passed in
|
||||
|
||||
self.datasize = 64
|
||||
self.bulk_load_type = IntegerType(self.datasize)
|
||||
self.datasize_bytes = self.datasize/8
|
||||
|
||||
self.value_extract_type = IntegerType(self.datasize)
|
||||
|
||||
self.fdr2_force_naive_load = False # disable everywhere for trunk
|
||||
|
||||
# extract frequency is how frequently (in bytes) we destructively shift
|
||||
# our state value after having pulled out that many bytes into a
|
||||
# confirm register (of one sort or another).
|
||||
# none means a default value - datasize, our biggest easily available GPR
|
||||
if extract_frequency is None:
|
||||
extract_frequency = self.datasize_bytes
|
||||
self.extract_frequency = extract_frequency
|
||||
self.extract_size = self.extract_frequency*self.num_buckets
|
||||
if extract_frequency < stride:
|
||||
fail_out("Can't extract at extract frequency %d with stride %d" % (extract_frequency, stride))
|
||||
if extract_frequency not in [ None, 1, 2, 4, 8, 16]:
|
||||
fail_out("Weird extract frequency: %d" % extract_frequency)
|
||||
|
||||
if self.extract_size <= 32:
|
||||
self.extr_type = IntegerType(32)
|
||||
elif self.extract_size <= 64:
|
||||
self.extr_type = IntegerType(64)
|
||||
else:
|
||||
fail_out("Implausible size %d required for confirm extract step" % size)
|
||||
|
||||
# extract_frequency is how often we pull out our state and place
|
||||
# it somewhere in a lossless fashion
|
||||
# confirm_frequency, on the other hand, is how frequently we
|
||||
# take the state extracted by extract_frequency and cobble it
|
||||
# together into a matching loop
|
||||
# confirm_frequency must be a multiple of extract_frequency
|
||||
# and must fit into a fast register; for now; we're going to
|
||||
# stay in the GPR domain
|
||||
if confirm_frequency is None:
|
||||
confirm_frequency = self.extract_frequency
|
||||
self.confirm_frequency = confirm_frequency
|
||||
if confirm_frequency % self.extract_frequency:
|
||||
fail_out("Confirm frequency %d must be evenly divisible by extract_frequency %d" % (confirm_frequency, self.extract_frequency))
|
||||
|
||||
self.conf_size = self.confirm_frequency * self.num_buckets
|
||||
if self.conf_size <= 32:
|
||||
self.conf_type = IntegerType(32)
|
||||
elif self.conf_size <= 64:
|
||||
self.conf_type = IntegerType(64)
|
||||
else:
|
||||
fail_out("Implausible size %d required for confirm accumulate step" % self.conf_size)
|
||||
|
||||
# how many bytes in flight at once
|
||||
self.loop_bytes = 16
|
||||
|
||||
# confirm configuration
|
||||
|
||||
# how many entries in the top-level confirm table - 256 means
|
||||
# complete split on the last character
|
||||
self.conf_top_level_split = 256
|
||||
|
||||
# how much we 'pull back' in confirm - this is obviously related
|
||||
# to the first level conf but we will keep two separate paramters
|
||||
# for this to avoid the risk of conflating these
|
||||
self.conf_pull_back = 1
|
||||
|
||||
if self.conf_pull_back > 0 and self.conf_top_level_split < 256:
|
||||
fail_out("Pull back distance %d not supported by top level split %d" % (self.conf_pull_back, self.conf_top_level_split))
|
||||
|
||||
# minor stuff
|
||||
self.default_body_indent = 8
|
562
src/fdr/fdr_compile.cpp
Normal file
562
src/fdr/fdr_compile.cpp
Normal file
@@ -0,0 +1,562 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief FDR literal matcher: build API.
|
||||
*/
|
||||
#include "fdr.h"
|
||||
#include "fdr_internal.h"
|
||||
#include "fdr_compile.h"
|
||||
#include "fdr_confirm.h"
|
||||
#include "fdr_compile_internal.h"
|
||||
#include "fdr_engine_description.h"
|
||||
#include "teddy_compile.h"
|
||||
#include "teddy_engine_description.h"
|
||||
#include "grey.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/alloc.h"
|
||||
#include "util/compare.h"
|
||||
#include "util/dump_mask.h"
|
||||
#include "util/target_info.h"
|
||||
#include "util/ue2string.h"
|
||||
#include "util/verify_types.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cctype>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/core/noncopyable.hpp>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
namespace {
|
||||
|
||||
class FDRCompiler : boost::noncopyable {
|
||||
private:
|
||||
const FDREngineDescription ŋ
|
||||
vector<u8> tab;
|
||||
const vector<hwlmLiteral> &lits;
|
||||
map<BucketIndex, std::vector<LiteralIndex> > bucketToLits;
|
||||
bool make_small;
|
||||
|
||||
u8 *tabIndexToMask(u32 indexInTable);
|
||||
void assignStringToBucket(LiteralIndex l, BucketIndex b);
|
||||
void assignStringsToBuckets();
|
||||
#ifdef DEBUG
|
||||
void dumpMasks(const u8 *defaultMask);
|
||||
#endif
|
||||
void setupTab();
|
||||
aligned_unique_ptr<FDR> setupFDR(pair<u8 *, size_t> link);
|
||||
void createInitialState(FDR *fdr);
|
||||
|
||||
public:
|
||||
FDRCompiler(const vector<hwlmLiteral> &lits_in,
|
||||
const FDREngineDescription &eng_in, bool make_small_in)
|
||||
: eng(eng_in), tab(eng_in.getTabSizeBytes()), lits(lits_in),
|
||||
make_small(make_small_in) {}
|
||||
|
||||
aligned_unique_ptr<FDR> build(pair<u8 *, size_t> link);
|
||||
};
|
||||
|
||||
u8 *FDRCompiler::tabIndexToMask(u32 indexInTable) {
|
||||
assert(indexInTable < tab.size());
|
||||
return &tab[0] + (indexInTable * (eng.getSchemeWidth() / 8));
|
||||
}
|
||||
|
||||
static
|
||||
void setbit(u8 *msk, u32 bit) {
|
||||
msk[bit / 8] |= 1U << (bit % 8);
|
||||
}
|
||||
|
||||
static
|
||||
void clearbit(u8 *msk, u32 bit) {
|
||||
msk[bit / 8] &= ~(1U << (bit % 8));
|
||||
}
|
||||
|
||||
static
|
||||
void andMask(u8 *dest, const u8 *a, const u8 *b, u32 num_bytes) {
|
||||
for (u32 i = 0; i < num_bytes; i++) {
|
||||
dest[i] = a[i] & b[i];
|
||||
}
|
||||
}
|
||||
|
||||
void FDRCompiler::createInitialState(FDR *fdr) {
|
||||
u8 *start = (u8 *)&fdr->start;
|
||||
|
||||
/* initial state should to be 1 in each slot in the bucket up to bucket
|
||||
* minlen - 1, and 0 thereafter */
|
||||
for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {
|
||||
// Find the minimum length for the literals in this bucket.
|
||||
const vector<LiteralIndex> &bucket_lits = bucketToLits[b];
|
||||
u32 min_len = ~0U;
|
||||
for (vector<LiteralIndex>::const_iterator it = bucket_lits.begin(),
|
||||
ite = bucket_lits.end();
|
||||
it != ite; ++it) {
|
||||
min_len = min(min_len, verify_u32(lits[*it].s.length()));
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("bucket %u has min_len=%u\n", b, min_len);
|
||||
assert(min_len);
|
||||
|
||||
for (PositionInBucket i = 0; i < eng.getBucketWidth(b); i++) {
|
||||
if (i < min_len - 1) {
|
||||
setbit(start, eng.getSchemeBit(b, i));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
aligned_unique_ptr<FDR> FDRCompiler::setupFDR(pair<u8 *, size_t> link) {
|
||||
size_t tabSize = eng.getTabSizeBytes();
|
||||
|
||||
pair<u8 *, size_t> floodControlTmp = setupFDRFloodControl(lits, eng);
|
||||
|
||||
pair<u8 *, size_t> confirmTmp =
|
||||
setupFullMultiConfs(lits, eng, bucketToLits, make_small);
|
||||
|
||||
assert(ISALIGNED_16(tabSize));
|
||||
assert(ISALIGNED_16(confirmTmp.second));
|
||||
assert(ISALIGNED_16(floodControlTmp.second));
|
||||
assert(ISALIGNED_16(link.second));
|
||||
size_t headerSize = ROUNDUP_16(sizeof(FDR));
|
||||
size_t size = ROUNDUP_16(headerSize + tabSize + confirmTmp.second +
|
||||
floodControlTmp.second + link.second);
|
||||
|
||||
DEBUG_PRINTF("sizes base=%zu tabSize=%zu confirm=%zu floodControl=%zu "
|
||||
"total=%zu\n",
|
||||
headerSize, tabSize, confirmTmp.second, floodControlTmp.second,
|
||||
size);
|
||||
|
||||
aligned_unique_ptr<FDR> fdr = aligned_zmalloc_unique<FDR>(size);
|
||||
assert(fdr); // otherwise would have thrown std::bad_alloc
|
||||
|
||||
fdr->size = size;
|
||||
fdr->engineID = eng.getID();
|
||||
fdr->maxStringLen = verify_u32(maxLen(lits));
|
||||
createInitialState(fdr.get());
|
||||
|
||||
u8 *fdr_base = (u8 *)fdr.get();
|
||||
u8 * ptr = fdr_base + ROUNDUP_16(sizeof(FDR));
|
||||
copy(tab.begin(), tab.end(), ptr);
|
||||
ptr += tabSize;
|
||||
|
||||
memcpy(ptr, confirmTmp.first, confirmTmp.second);
|
||||
ptr += confirmTmp.second;
|
||||
aligned_free(confirmTmp.first);
|
||||
|
||||
fdr->floodOffset = verify_u32(ptr - fdr_base);
|
||||
memcpy(ptr, floodControlTmp.first, floodControlTmp.second);
|
||||
ptr += floodControlTmp.second;
|
||||
aligned_free(floodControlTmp.first);
|
||||
|
||||
if (link.first) {
|
||||
fdr->link = verify_u32(ptr - fdr_base);
|
||||
memcpy(ptr, link.first, link.second);
|
||||
aligned_free(link.first);
|
||||
} else {
|
||||
fdr->link = 0;
|
||||
}
|
||||
|
||||
return fdr;
|
||||
}
|
||||
|
||||
void FDRCompiler::assignStringToBucket(LiteralIndex l, BucketIndex b) {
|
||||
bucketToLits[b].push_back(l);
|
||||
}
|
||||
|
||||
struct LitOrder {
|
||||
explicit LitOrder(const vector<hwlmLiteral> &vl_) : vl(vl_) {}
|
||||
bool operator()(const u32 &i1, const u32 &i2) const {
|
||||
const string &i1s = vl[i1].s;
|
||||
const string &i2s = vl[i2].s;
|
||||
|
||||
size_t len1 = i1s.size(), len2 = i2s.size();
|
||||
|
||||
if (len1 != len2) {
|
||||
return len1 < len2;
|
||||
} else {
|
||||
string::const_reverse_iterator it1, it2;
|
||||
tie(it1, it2) =
|
||||
std::mismatch(i1s.rbegin(), i1s.rend(), i2s.rbegin());
|
||||
if (it1 == i1s.rend()) {
|
||||
return false;
|
||||
}
|
||||
return *it1 < *it2;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
const vector<hwlmLiteral> &vl;
|
||||
};
|
||||
|
||||
static u64a getScoreUtil(u32 len, u32 count) {
|
||||
if (len == 0) {
|
||||
return (u64a)-1;
|
||||
}
|
||||
const u32 LEN_THRESH = 128;
|
||||
const u32 elen = (len > LEN_THRESH) ? LEN_THRESH : len;
|
||||
const u64a lenScore =
|
||||
(LEN_THRESH * LEN_THRESH * LEN_THRESH) / (elen * elen * elen);
|
||||
return count * lenScore; // deemphasize count - possibly more than needed
|
||||
// this might be overkill in the other direction
|
||||
}
|
||||
|
||||
//#define DEBUG_ASSIGNMENT
|
||||
void FDRCompiler::assignStringsToBuckets() {
|
||||
typedef u64a SCORE; // 'Score' type
|
||||
const SCORE MAX_SCORE = (SCORE)-1;
|
||||
const u32 CHUNK_MAX = 512;
|
||||
const u32 BUCKET_MAX = 16;
|
||||
typedef pair<SCORE, u32> SCORE_INDEX_PAIR;
|
||||
|
||||
u32 ls = verify_u32(lits.size());
|
||||
// make a vector that contains our literals as pointers or u32 LiteralIndex values
|
||||
vector<LiteralIndex> vli;
|
||||
vli.resize(ls);
|
||||
map<u32, u32> lenCounts;
|
||||
for (LiteralIndex l = 0; l < ls; l++) {
|
||||
vli[l] = l;
|
||||
lenCounts[lits[l].s.size()]++;
|
||||
}
|
||||
// sort vector by literal length + if tied on length, 'magic' criteria of some kind (tbd)
|
||||
stable_sort(vli.begin(), vli.end(), LitOrder(lits));
|
||||
|
||||
#ifdef DEBUG_ASSIGNMENT
|
||||
for (map<u32, u32>::iterator i = lenCounts.begin(), e = lenCounts.end();
|
||||
i != e; ++i) {
|
||||
printf("l<%d>:%d ", i->first, i->second);
|
||||
}
|
||||
printf("\n");
|
||||
#endif
|
||||
|
||||
// TODO: detailed early stage literal analysis for v. small cases (actually look at lits)
|
||||
// yes - after we factor this out and merge in the Teddy style of building we can look
|
||||
// at this, although the teddy merge modelling is quite different. It's still probably
|
||||
// adaptable to some extent for this class of problem
|
||||
|
||||
u32 firstIds[CHUNK_MAX]; // how many are in this chunk (CHUNK_MAX - 1 contains 'last' bound)
|
||||
u32 count[CHUNK_MAX]; // how many are in this chunk
|
||||
u32 length[CHUNK_MAX]; // how long things in the chunk are
|
||||
|
||||
const u32 MAX_CONSIDERED_LENGTH = 16;
|
||||
u32 currentChunk = 0;
|
||||
u32 currentSize = 0;
|
||||
u32 chunkStartID = 0;
|
||||
u32 maxPerChunk = ls/(CHUNK_MAX - MIN(MAX_CONSIDERED_LENGTH, lenCounts.size())) + 1;
|
||||
|
||||
for (u32 i = 0; i < ls && currentChunk < CHUNK_MAX - 1; i++) {
|
||||
LiteralIndex l = vli[i];
|
||||
if ((currentSize < MAX_CONSIDERED_LENGTH && (lits[l].s.size() != currentSize)) ||
|
||||
(currentSize != 1 && ((i - chunkStartID) >= maxPerChunk))) {
|
||||
currentSize = lits[l].s.size();
|
||||
if (currentChunk) {
|
||||
count[currentChunk - 1 ] = i - chunkStartID;
|
||||
}
|
||||
chunkStartID = firstIds[currentChunk] = i;
|
||||
length[currentChunk] = currentSize;
|
||||
currentChunk++;
|
||||
}
|
||||
}
|
||||
count[currentChunk - 1] = ls - chunkStartID;
|
||||
// close off chunks with an empty row
|
||||
firstIds[currentChunk] = ls;
|
||||
length[currentChunk] = 0;
|
||||
count[currentChunk] = 0;
|
||||
u32 nChunks = currentChunk + 1;
|
||||
|
||||
#ifdef DEBUG_ASSIGNMENT
|
||||
for (u32 j = 0; j < nChunks; j++) {
|
||||
printf("%d %d %d %d\n", j, firstIds[j], count[j], length[j]);
|
||||
}
|
||||
#endif
|
||||
|
||||
SCORE_INDEX_PAIR t[CHUNK_MAX][BUCKET_MAX]; // pair of score, index
|
||||
u32 nb = eng.getNumBuckets();
|
||||
|
||||
for (u32 j = 0; j < nChunks; j++) {
|
||||
u32 cnt = 0;
|
||||
for (u32 k = j; k < nChunks; ++k) {
|
||||
cnt += count[k];
|
||||
}
|
||||
t[j][0] = make_pair(getScoreUtil(length[j], cnt), 0);
|
||||
}
|
||||
|
||||
for (u32 i = 1; i < nb; i++) {
|
||||
for (u32 j = 0; j < nChunks - 1; j++) { // don't process last, empty row
|
||||
SCORE_INDEX_PAIR best = make_pair(MAX_SCORE, 0);
|
||||
u32 cnt = count[j];
|
||||
for (u32 k = j + 1; k < nChunks - 1; k++, cnt += count[k]) {
|
||||
SCORE score = getScoreUtil(length[j], cnt);
|
||||
if (score > best.first) {
|
||||
break; // if we're now worse locally than our best score, give up
|
||||
}
|
||||
score += t[k][i-1].first;
|
||||
if (score < best.first) {
|
||||
best = make_pair(score, k);
|
||||
}
|
||||
}
|
||||
t[j][i] = best;
|
||||
}
|
||||
t[nChunks - 1][i] = make_pair(0,0); // fill in empty final row for next iteration
|
||||
}
|
||||
|
||||
#ifdef DEBUG_ASSIGNMENT
|
||||
for (u32 j = 0; j < nChunks; j++) {
|
||||
for (u32 i = 0; i < nb; i++) {
|
||||
SCORE_INDEX_PAIR v = t[j][i];
|
||||
printf("<%7lld,%3d>", v.first, v.second);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
// our best score is in best[0][N_BUCKETS-1] and we can follow the links
|
||||
// to find where our buckets should start and what goes into them
|
||||
for (u32 i = 0, n = nb; n && (i != nChunks - 1); n--) {
|
||||
u32 j = t[i][n - 1].second;
|
||||
if (j == 0) {
|
||||
j = nChunks - 1;
|
||||
}
|
||||
// put chunks between i - j into bucket (NBUCKETS-1) - n
|
||||
#ifdef DEBUG_ASSIGNMENT
|
||||
printf("placing from %d to %d in bucket %d\n", firstIds[i], firstIds[j],
|
||||
nb - n);
|
||||
#endif
|
||||
for (u32 k = firstIds[i]; k < firstIds[j]; k++) {
|
||||
assignStringToBucket((LiteralIndex)vli[k], nb - n);
|
||||
}
|
||||
i = j;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
void FDRCompiler::dumpMasks(const u8 *defaultMask) {
|
||||
const size_t width = eng.getSchemeWidth();
|
||||
printf("default mask: %s\n", dumpMask(defaultMask, width).c_str());
|
||||
for (u32 i = 0; i < eng.getNumTableEntries(); i++) {
|
||||
u8 *m = tabIndexToMask(i);
|
||||
if (memcmp(m, defaultMask, width / 8)) {
|
||||
printf("tab %04x: %s\n", i, dumpMask(m, width).c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static
|
||||
bool getMultiEntriesAtPosition(const FDREngineDescription &eng,
|
||||
const vector<LiteralIndex> &vl,
|
||||
const vector<hwlmLiteral> &lits,
|
||||
SuffixPositionInString pos,
|
||||
std::map<u32, ue2::unordered_set<u32> > &m2) {
|
||||
u32 distance = 0;
|
||||
if (eng.bits <= 8) {
|
||||
distance = 1;
|
||||
} else if (eng.bits <= 16) {
|
||||
distance = 2;
|
||||
} else if (eng.bits <= 32) {
|
||||
distance = 4;
|
||||
}
|
||||
|
||||
for (vector<LiteralIndex>::const_iterator i = vl.begin(), e = vl.end();
|
||||
i != e; ++i) {
|
||||
if (e - i > 5) {
|
||||
__builtin_prefetch(&lits[*(i + 5)]);
|
||||
}
|
||||
const hwlmLiteral &lit = lits[*i];
|
||||
const size_t sz = lit.s.size();
|
||||
u32 mask = 0;
|
||||
u32 dontCares = 0;
|
||||
for (u32 cnt = 0; cnt < distance; cnt++) {
|
||||
int newPos = pos - cnt;
|
||||
u8 dontCareByte = 0x0;
|
||||
u8 maskByte = 0x0;
|
||||
if (newPos < 0 || ((u32)newPos >= sz)) {
|
||||
dontCareByte = 0xff;
|
||||
} else {
|
||||
u8 c = lit.s[sz - newPos - 1];
|
||||
maskByte = c;
|
||||
u32 remainder = eng.bits - cnt * 8;
|
||||
assert(remainder != 0);
|
||||
if (remainder < 8) {
|
||||
u8 cmask = (1U << remainder) - 1;
|
||||
maskByte &= cmask;
|
||||
dontCareByte |= ~cmask;
|
||||
}
|
||||
if (lit.nocase && ourisalpha(c)) {
|
||||
maskByte &= 0xdf;
|
||||
dontCareByte |= 0x20;
|
||||
}
|
||||
}
|
||||
u32 loc = cnt * 8;
|
||||
mask |= maskByte << loc;
|
||||
dontCares |= dontCareByte << loc;
|
||||
}
|
||||
|
||||
// truncate m and dc down to nBits
|
||||
mask &= (1U << eng.bits) - 1;
|
||||
dontCares &= (1U << eng.bits) - 1;
|
||||
if (dontCares == ((1U << eng.bits) - 1)) {
|
||||
return true;
|
||||
}
|
||||
m2[dontCares].insert(mask);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void FDRCompiler::setupTab() {
|
||||
const size_t mask_size = eng.getSchemeWidth() / 8;
|
||||
assert(mask_size);
|
||||
|
||||
vector<u8> defaultMask(mask_size, 0xff);
|
||||
for (u32 i = 0; i < eng.getNumTableEntries(); i++) {
|
||||
memcpy(tabIndexToMask(i), &defaultMask[0], mask_size);
|
||||
}
|
||||
|
||||
typedef std::map<u32, ue2::unordered_set<u32> > M2SET;
|
||||
|
||||
for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {
|
||||
const vector<LiteralIndex> &vl = bucketToLits[b];
|
||||
SuffixPositionInString pLimit = eng.getBucketWidth(b);
|
||||
for (SuffixPositionInString pos = 0; pos < pLimit; pos++) {
|
||||
u32 bit = eng.getSchemeBit(b, pos);
|
||||
M2SET m2;
|
||||
bool done = getMultiEntriesAtPosition(eng, vl, lits, pos, m2);
|
||||
if (done) {
|
||||
clearbit(&defaultMask[0], bit);
|
||||
continue;
|
||||
}
|
||||
for (M2SET::const_iterator i = m2.begin(), e = m2.end(); i != e;
|
||||
++i) {
|
||||
u32 dc = i->first;
|
||||
const ue2::unordered_set<u32> &mskSet = i->second;
|
||||
u32 v = ~dc;
|
||||
do {
|
||||
u32 b2 = v & dc;
|
||||
for (ue2::unordered_set<u32>::const_iterator
|
||||
i2 = mskSet.begin(),
|
||||
e2 = mskSet.end();
|
||||
i2 != e2; ++i2) {
|
||||
u32 val = (*i2 & ~dc) | b2;
|
||||
clearbit(tabIndexToMask(val), bit);
|
||||
}
|
||||
v = (v + (dc & -dc)) | ~dc;
|
||||
} while (v != ~dc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < eng.getNumTableEntries(); i++) {
|
||||
u8 *m = tabIndexToMask(i);
|
||||
andMask(m, m, &defaultMask[0], mask_size);
|
||||
}
|
||||
#ifdef DEBUG
|
||||
dumpMasks(&defaultMask[0]);
|
||||
#endif
|
||||
}
|
||||
|
||||
aligned_unique_ptr<FDR> FDRCompiler::build(pair<u8 *, size_t> link) {
|
||||
assignStringsToBuckets();
|
||||
setupTab();
|
||||
return setupFDR(link);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
static
|
||||
aligned_unique_ptr<FDR>
|
||||
fdrBuildTableInternal(const vector<hwlmLiteral> &lits, bool make_small,
|
||||
const target_t &target, const Grey &grey, u32 hint,
|
||||
hwlmStreamingControl *stream_control) {
|
||||
pair<u8 *, size_t> link(nullptr, 0);
|
||||
if (stream_control) {
|
||||
link = fdrBuildTableStreaming(lits, stream_control);
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("cpu has %s\n", target.has_avx2() ? "avx2" : "no-avx2");
|
||||
|
||||
if (grey.fdrAllowTeddy) {
|
||||
aligned_unique_ptr<FDR> fdr
|
||||
= teddyBuildTableHinted(lits, make_small, hint, target, link);
|
||||
if (fdr) {
|
||||
DEBUG_PRINTF("build with teddy succeeded\n");
|
||||
return fdr;
|
||||
} else {
|
||||
DEBUG_PRINTF("build with teddy failed, will try with FDR\n");
|
||||
}
|
||||
}
|
||||
|
||||
const unique_ptr<FDREngineDescription> des =
|
||||
(hint == HINT_INVALID) ? chooseEngine(target, lits, make_small)
|
||||
: getFdrDescription(hint);
|
||||
|
||||
if (!des) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
FDRCompiler fc(lits, *des, make_small);
|
||||
return fc.build(link);
|
||||
}
|
||||
|
||||
aligned_unique_ptr<FDR> fdrBuildTable(const vector<hwlmLiteral> &lits,
|
||||
bool make_small, const target_t &target,
|
||||
const Grey &grey,
|
||||
hwlmStreamingControl *stream_control) {
|
||||
return fdrBuildTableInternal(lits, make_small, target, grey, HINT_INVALID,
|
||||
stream_control);
|
||||
}
|
||||
|
||||
#if !defined(RELEASE_BUILD)
|
||||
|
||||
aligned_unique_ptr<FDR>
|
||||
fdrBuildTableHinted(const vector<hwlmLiteral> &lits, bool make_small, u32 hint,
|
||||
const target_t &target, const Grey &grey,
|
||||
hwlmStreamingControl *stream_control) {
|
||||
pair<u8 *, size_t> link(nullptr, 0);
|
||||
return fdrBuildTableInternal(lits, make_small, target, grey, hint,
|
||||
stream_control);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
// FIXME: should be compile-time only
|
||||
size_t fdrSize(const FDR *fdr) {
|
||||
assert(fdr);
|
||||
return fdr->size;
|
||||
}
|
66
src/fdr/fdr_compile.h
Normal file
66
src/fdr/fdr_compile.h
Normal file
@@ -0,0 +1,66 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief FDR literal matcher: build API.
|
||||
*/
|
||||
|
||||
#ifndef FDR_COMPILE_H
|
||||
#define FDR_COMPILE_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "util/alloc.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
struct FDR;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
struct hwlmLiteral;
|
||||
struct hwlmStreamingControl;
|
||||
struct Grey;
|
||||
struct target_t;
|
||||
|
||||
ue2::aligned_unique_ptr<FDR>
|
||||
fdrBuildTable(const std::vector<hwlmLiteral> &lits, bool make_small,
|
||||
const target_t &target, const Grey &grey,
|
||||
hwlmStreamingControl *stream_control = nullptr);
|
||||
|
||||
#if !defined(RELEASE_BUILD)
|
||||
|
||||
ue2::aligned_unique_ptr<FDR>
|
||||
fdrBuildTableHinted(const std::vector<hwlmLiteral> &lits, bool make_small,
|
||||
u32 hint, const target_t &target, const Grey &grey,
|
||||
hwlmStreamingControl *stream_control = nullptr);
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
88
src/fdr/fdr_compile_internal.h
Normal file
88
src/fdr/fdr_compile_internal.h
Normal file
@@ -0,0 +1,88 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef FDR_COMPILE_INTERNAL_H
|
||||
#define FDR_COMPILE_INTERNAL_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "hwlm/hwlm_literal.h"
|
||||
|
||||
#include <map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
struct FDRConfirm;
|
||||
struct LitInfo;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
// a pile of decorative typedefs
|
||||
// good for documentation purposes more than anything else
|
||||
typedef u32 LiteralIndex;
|
||||
typedef u32 ConfirmIndex;
|
||||
typedef u32 SuffixPositionInString; // zero is last byte, counting back
|
||||
// into the string
|
||||
typedef u32 BucketIndex;
|
||||
typedef u32 SchemeBitIndex;
|
||||
typedef u32 PositionInBucket; // zero is 'we are matching right now!",
|
||||
// counting towards future matches
|
||||
|
||||
class EngineDescription;
|
||||
class FDREngineDescription;
|
||||
struct hwlmStreamingControl;
|
||||
|
||||
size_t getFDRConfirm(const std::vector<hwlmLiteral> &lits, FDRConfirm **fdrc_p,
|
||||
bool make_small);
|
||||
|
||||
std::pair<u8 *, size_t> setupFullMultiConfs(
|
||||
const std::vector<hwlmLiteral> &lits, const EngineDescription &eng,
|
||||
std::map<BucketIndex, std::vector<LiteralIndex> > &bucketToLits,
|
||||
bool make_small);
|
||||
|
||||
// all suffixes include an implicit max_bucket_width suffix to ensure that
|
||||
// we always read a full-scale flood "behind" us in terms of what's in our
|
||||
// state; if we don't have a flood that's long enough we won't be in the
|
||||
// right state yet to allow blindly advancing
|
||||
std::pair<u8 *, size_t>
|
||||
setupFDRFloodControl(const std::vector<hwlmLiteral> &lits,
|
||||
const EngineDescription &eng);
|
||||
|
||||
std::pair<u8 *, size_t>
|
||||
fdrBuildTableStreaming(const std::vector<hwlmLiteral> &lits,
|
||||
hwlmStreamingControl *stream_control);
|
||||
|
||||
static constexpr u32 HINT_INVALID = 0xffffffff;
|
||||
|
||||
// fdr_compile_util.cpp utilities
|
||||
size_t maxLen(const std::vector<hwlmLiteral> &lits);
|
||||
size_t minLenCount(const std::vector<hwlmLiteral> &lits, size_t *count);
|
||||
u32 absdiff(u32 i, u32 j);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
65
src/fdr/fdr_compile_util.cpp
Normal file
65
src/fdr/fdr_compile_util.cpp
Normal file
@@ -0,0 +1,65 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "fdr_compile_internal.h"
|
||||
#include "hwlm/hwlm_literal.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
size_t maxLen(const vector<hwlmLiteral> &lits) {
|
||||
size_t rv = 0;
|
||||
for (const auto &lit : lits) {
|
||||
rv = max(rv, lit.s.size());
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
size_t minLenCount(const vector<hwlmLiteral> &lits, size_t *count) {
|
||||
size_t rv = (size_t)-1;
|
||||
*count = 0;
|
||||
for (const auto &lit : lits) {
|
||||
if (lit.s.size() < rv) {
|
||||
rv = lit.s.size();
|
||||
*count = 1;
|
||||
} else if (lit.s.size() == rv) {
|
||||
(*count)++;
|
||||
}
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
u32 absdiff(u32 i, u32 j) {
|
||||
return (i > j) ? (i - j) : (j - i);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
100
src/fdr/fdr_confirm.h
Normal file
100
src/fdr/fdr_confirm.h
Normal file
@@ -0,0 +1,100 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef FDR_CONFIRM_H
|
||||
#define FDR_CONFIRM_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "hwlm/hwlm.h"
|
||||
|
||||
static really_inline
|
||||
u32 mul_hash_64(u64a lv, u64a andmsk, u64a mult, u32 nBits) {
|
||||
return ((lv & andmsk) * mult) >> (sizeof(u64a)*8 - nBits);
|
||||
}
|
||||
|
||||
// data structures
|
||||
// TODO: fix this hard-coding
|
||||
#define CONF_TYPE u64a
|
||||
#define CONF_HASH_CALL mul_hash_64
|
||||
|
||||
typedef enum LitInfoFlags {
|
||||
NoFlags = 0,
|
||||
Caseless = 1,
|
||||
NoRepeat = 2,
|
||||
ComplexConfirm = 4
|
||||
} LitInfoFlags;
|
||||
|
||||
/**
|
||||
* \brief Structure describing a literal, linked to by FDRConfirm.
|
||||
*
|
||||
* This structure is followed in memory by a variable-sized string prefix at
|
||||
* LitInfo::s, for strings that are longer than CONF_TYPE.
|
||||
*/
|
||||
struct LitInfo {
|
||||
CONF_TYPE v;
|
||||
CONF_TYPE msk;
|
||||
hwlm_group_t groups;
|
||||
u32 size;
|
||||
u32 id; // literal ID as passed in
|
||||
u8 flags; /* LitInfoFlags */
|
||||
u8 next;
|
||||
u8 extended_size;
|
||||
u8 s[1]; // literal prefix, which continues "beyond" this struct.
|
||||
};
|
||||
|
||||
#define FDRC_FLAG_NO_CONFIRM 1
|
||||
|
||||
/**
|
||||
* \brief FDR confirm header.
|
||||
*
|
||||
* This structure is followed in memory by:
|
||||
*
|
||||
* -# lit index mapping (array of u32)
|
||||
* -# list of LitInfo structures
|
||||
*/
|
||||
struct FDRConfirm {
|
||||
CONF_TYPE andmsk;
|
||||
CONF_TYPE mult;
|
||||
u32 nBitsOrSoleID; // if flags is NO_CONFIRM then this is soleID
|
||||
u32 flags; // sole meaning is 'non-zero means no-confirm' (that is all)
|
||||
hwlm_group_t groups;
|
||||
u32 soleLitSize;
|
||||
u32 soleLitCmp;
|
||||
u32 soleLitMsk;
|
||||
};
|
||||
|
||||
static really_inline
|
||||
const u32 *getConfirmLitIndex(const struct FDRConfirm *fdrc) {
|
||||
const u8 *base = (const u8 *)fdrc;
|
||||
const u32 *litIndex =
|
||||
(const u32 *)(base + ROUNDUP_N(sizeof(*fdrc), alignof(u32)));
|
||||
assert(ISALIGNED(litIndex));
|
||||
return litIndex;
|
||||
}
|
||||
|
||||
#endif // FDR_CONFIRM_H
|
479
src/fdr/fdr_confirm_compile.cpp
Normal file
479
src/fdr/fdr_confirm_compile.cpp
Normal file
@@ -0,0 +1,479 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "fdr.h"
|
||||
#include "fdr_internal.h"
|
||||
#include "fdr_compile_internal.h"
|
||||
#include "fdr_confirm.h"
|
||||
#include "engine_description.h"
|
||||
#include "teddy_engine_description.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/alloc.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/compare.h"
|
||||
#include "util/verify_types.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <set>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
typedef u8 ConfSplitType;
|
||||
typedef pair<BucketIndex, ConfSplitType> BucketSplitPair;
|
||||
typedef map<BucketSplitPair, pair<FDRConfirm *, size_t> > BC2CONF;
|
||||
|
||||
// return the number of bytes beyond a length threshold in all strings in lits
|
||||
static
|
||||
size_t thresholdedSize(const vector<hwlmLiteral> &lits, size_t threshold) {
|
||||
size_t tot = 0;
|
||||
for (const auto &lit : lits) {
|
||||
size_t sz = lit.s.size();
|
||||
if (sz > threshold) {
|
||||
tot += ROUNDUP_N(sz - threshold, 8);
|
||||
}
|
||||
}
|
||||
return tot;
|
||||
}
|
||||
|
||||
static
|
||||
u64a make_u64a_mask(const vector<u8> &v) {
|
||||
assert(v.size() <= sizeof(u64a));
|
||||
if (v.size() > sizeof(u64a)) {
|
||||
throw std::exception();
|
||||
}
|
||||
|
||||
u64a mask = 0;
|
||||
size_t vlen = v.size();
|
||||
size_t len = std::min(vlen, sizeof(mask));
|
||||
unsigned char *m = (unsigned char *)&mask;
|
||||
memcpy(m + sizeof(mask) - len, &v[vlen - len], len);
|
||||
return mask;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a temporary vector of LitInfo structures (without the corresponding
|
||||
* pointers to the actual strings; these cannot be laid out yet). These
|
||||
* stay in 1:1 correspondence with the lits[] vector as that's the only
|
||||
* place we have to obtain our full strings.
|
||||
*/
|
||||
static
|
||||
void fillLitInfo(const vector<hwlmLiteral> &lits, vector<LitInfo> &tmpLitInfo,
|
||||
CONF_TYPE &andmsk) {
|
||||
const CONF_TYPE all_ones = ~(u64a)0;
|
||||
andmsk = all_ones; // fill in with 'and' of all literal masks
|
||||
|
||||
for (LiteralIndex i = 0; i < lits.size(); i++) {
|
||||
const hwlmLiteral &lit = lits[i];
|
||||
LitInfo &info = tmpLitInfo[i];
|
||||
memset(&info, 0, sizeof(info));
|
||||
info.id = lit.id;
|
||||
u8 flags = NoFlags;
|
||||
if (lit.nocase) {
|
||||
flags |= Caseless;
|
||||
}
|
||||
if (lit.noruns) {
|
||||
flags |= NoRepeat;
|
||||
}
|
||||
if (lit.msk.size() > lit.s.size()) {
|
||||
flags |= ComplexConfirm;
|
||||
info.extended_size = verify_u8(lit.msk.size());
|
||||
}
|
||||
info.flags = flags;
|
||||
info.size = verify_u32(lit.s.size());
|
||||
info.groups = lit.groups;
|
||||
|
||||
// these are built up assuming a LE machine
|
||||
CONF_TYPE msk = all_ones;
|
||||
CONF_TYPE val = 0;
|
||||
for (u32 j = 0; j < sizeof(CONF_TYPE); j++) {
|
||||
u32 shiftLoc = (sizeof(CONF_TYPE) - j - 1) * 8;
|
||||
if (j >= lit.s.size()) {
|
||||
msk &= ~((CONF_TYPE)0xff << shiftLoc);
|
||||
} else {
|
||||
u8 c = lit.s[lit.s.size() - j - 1];
|
||||
if (lit.nocase && ourisalpha(c)) {
|
||||
msk &= ~((CONF_TYPE)CASE_BIT << shiftLoc);
|
||||
val |= (CONF_TYPE)(c & CASE_CLEAR) << shiftLoc;
|
||||
} else {
|
||||
val |= (CONF_TYPE)c << shiftLoc;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
info.v = val;
|
||||
info.msk = msk;
|
||||
if (!lit.msk.empty()) {
|
||||
u64a l_msk = make_u64a_mask(lit.msk);
|
||||
u64a l_cmp = make_u64a_mask(lit.cmp);
|
||||
|
||||
// test for consistency - if there's intersection, then v and msk
|
||||
// values must line up
|
||||
UNUSED u64a intersection = l_msk & info.msk;
|
||||
assert((info.v & intersection) == (l_cmp & intersection));
|
||||
|
||||
// incorporate lit.msk, lit.cmp into v and msk
|
||||
info.msk |= l_msk;
|
||||
info.v |= l_cmp;
|
||||
}
|
||||
|
||||
andmsk &= info.msk;
|
||||
}
|
||||
}
|
||||
|
||||
//#define FDR_CONFIRM_DUMP 1
|
||||
|
||||
static
|
||||
size_t getFDRConfirm(const vector<hwlmLiteral> &lits, FDRConfirm **fdrc_p,
|
||||
bool applyOneCharOpt, bool make_small, bool make_confirm) {
|
||||
vector<LitInfo> tmpLitInfo(lits.size());
|
||||
CONF_TYPE andmsk;
|
||||
fillLitInfo(lits, tmpLitInfo, andmsk);
|
||||
|
||||
#ifdef FDR_CONFIRM_DUMP
|
||||
printf("-------------------\n");
|
||||
#endif
|
||||
|
||||
// just magic numbers and crude measures for now
|
||||
u32 nBits;
|
||||
if (make_small) {
|
||||
nBits = min(10U, lg2(lits.size()) + 1);
|
||||
} else {
|
||||
nBits = min(13U, lg2(lits.size()) + 4);
|
||||
}
|
||||
|
||||
CONF_TYPE mult = (CONF_TYPE)0x0b4e0ef37bc32127ULL;
|
||||
u32 flags = 0;
|
||||
// we use next three variables for 'confirmless' case to speed-up
|
||||
// confirmation process
|
||||
u32 soleLitSize = 0;
|
||||
u32 soleLitCmp = 0;
|
||||
u32 soleLitMsk = 0;
|
||||
|
||||
if ((applyOneCharOpt && lits.size() == 1 && lits[0].s.size() == 0 &&
|
||||
lits[0].msk.empty()) || make_confirm == false) {
|
||||
flags = FDRC_FLAG_NO_CONFIRM;
|
||||
if (lits[0].noruns) {
|
||||
flags |= NoRepeat; // messy - need to clean this up later as flags is sorta kinda obsoleted
|
||||
}
|
||||
mult = 0;
|
||||
soleLitSize = lits[0].s.size() - 1;
|
||||
// we can get to this point only in confirmless case;
|
||||
// it means that we have only one literal per FDRConfirm (no packing),
|
||||
// with no literal mask and size of literal is less or equal
|
||||
// to the number of masks of Teddy engine;
|
||||
// maximum number of masks for Teddy is 4, so the size of
|
||||
// literal is definitely less or equal to size of u32
|
||||
assert(lits[0].s.size() <= sizeof(u32));
|
||||
for (u32 i = 0; i < lits[0].s.size(); i++) {
|
||||
u32 shiftLoc = (sizeof(u32) - i - 1) * 8;
|
||||
u8 c = lits[0].s[lits[0].s.size() - i - 1];
|
||||
if (lits[0].nocase && ourisalpha(c)) {
|
||||
soleLitCmp |= (u32)(c & CASE_CLEAR) << shiftLoc;
|
||||
soleLitMsk |= (u32)CASE_CLEAR << shiftLoc;
|
||||
}
|
||||
else {
|
||||
soleLitCmp |= (u32)c << shiftLoc;
|
||||
soleLitMsk |= (u32)0xff << shiftLoc;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// we can walk the vector and assign elements from the vectors to a
|
||||
// map by hash value
|
||||
map<u32, vector<LiteralIndex> > res2lits;
|
||||
hwlm_group_t gm = 0;
|
||||
for (LiteralIndex i = 0; i < lits.size(); i++) {
|
||||
LitInfo & li = tmpLitInfo[i];
|
||||
u32 hash = CONF_HASH_CALL(li.v, andmsk, mult, nBits);
|
||||
DEBUG_PRINTF("%016llx --> %u\n", li.v, hash);
|
||||
res2lits[hash].push_back(i);
|
||||
gm |= li.groups;
|
||||
}
|
||||
|
||||
#ifdef FDR_CONFIRM_DUMP
|
||||
// print out the literals reversed - makes it easier to line up analyses
|
||||
// that are end-offset based
|
||||
for (map<u32, vector<LiteralIndex> >::iterator i = res2lits.begin(),
|
||||
e = res2lits.end(); i != e; ++i) {
|
||||
u32 hash = i->first;
|
||||
vector<LiteralIndex> & vlidx = i->second;
|
||||
if (vlidx.size() > 1) {
|
||||
printf("%x -> %zu literals\n", hash, vlidx.size());
|
||||
u32 min_len = lits[vlidx.front()].s.size();
|
||||
vector<set<u8> > vsl; // contains the set of chars at each location
|
||||
// reversed from the end
|
||||
vsl.resize(1024);
|
||||
u32 total_string_size = 0;
|
||||
for (vector<LiteralIndex>::iterator i2 = vlidx.begin(),
|
||||
e2 = vlidx.end(); i2 != e2; ++i2) {
|
||||
LiteralIndex litIdx = *i2;
|
||||
total_string_size += lits[litIdx].s.size();
|
||||
for (u32 j = lits[litIdx].s.size(); j != 0 ; j--) {
|
||||
vsl[lits[litIdx].s.size()-j].insert(lits[litIdx].s.c_str()[j - 1]);
|
||||
}
|
||||
min_len = MIN(min_len, lits[litIdx].s.size());
|
||||
}
|
||||
printf("common ");
|
||||
for (u32 j = 0; j < min_len; j++) {
|
||||
if (vsl[j].size() == 1) {
|
||||
printf("%02x", (u32)*vsl[j].begin());
|
||||
} else {
|
||||
printf("__");
|
||||
}
|
||||
}
|
||||
printf("\n");
|
||||
for (vector<LiteralIndex>::iterator i2 = vlidx.begin(),
|
||||
e2 = vlidx.end(); i2 != e2; ++i2) {
|
||||
LiteralIndex litIdx = *i2;
|
||||
printf("%8x %c", lits[litIdx].id, lits[litIdx].nocase ? '!' : ' ');
|
||||
for (u32 j = lits[litIdx].s.size(); j != 0 ; j--) {
|
||||
u32 dist_from_end = lits[litIdx].s.size() - j;
|
||||
if (dist_from_end < min_len && vsl[dist_from_end].size() == 1) {
|
||||
printf("__");
|
||||
} else {
|
||||
printf("%02x", (u32)lits[litIdx].s.c_str()[j-1]);
|
||||
}
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
u32 total_compares = 0;
|
||||
for (u32 j = 0; j < 1024; j++) { // naughty
|
||||
total_compares += vsl[j].size();
|
||||
}
|
||||
printf("Total compare load: %d Total string size: %d\n\n", total_compares, total_string_size);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
const size_t bitsToLitIndexSize = (1U << nBits) * sizeof(u32);
|
||||
const size_t totalLitSize = thresholdedSize(lits, sizeof(CONF_TYPE));
|
||||
|
||||
// this size can now be a worst-case as we can always be a bit smaller
|
||||
size_t size = ROUNDUP_N(sizeof(FDRConfirm), alignof(u32)) +
|
||||
ROUNDUP_N(bitsToLitIndexSize, alignof(LitInfo)) +
|
||||
sizeof(LitInfo) * lits.size() + totalLitSize;
|
||||
size = ROUNDUP_N(size, alignof(FDRConfirm));
|
||||
|
||||
FDRConfirm *fdrc = (FDRConfirm *)aligned_zmalloc(size);
|
||||
assert(fdrc); // otherwise would have thrown std::bad_alloc
|
||||
|
||||
fdrc->andmsk = andmsk;
|
||||
fdrc->mult = mult;
|
||||
fdrc->nBitsOrSoleID = (flags & FDRC_FLAG_NO_CONFIRM) ? lits[0].id : nBits;
|
||||
fdrc->flags = flags;
|
||||
fdrc->soleLitSize = soleLitSize;
|
||||
fdrc->soleLitCmp = soleLitCmp;
|
||||
fdrc->soleLitMsk = soleLitMsk;
|
||||
|
||||
fdrc->groups = gm;
|
||||
|
||||
// After the FDRConfirm, we have the lit index array.
|
||||
u8 *fdrc_base = (u8 *)fdrc;
|
||||
u8 *ptr = fdrc_base + sizeof(*fdrc);
|
||||
ptr = ROUNDUP_PTR(ptr, alignof(u32));
|
||||
u32 *bitsToLitIndex = (u32 *)ptr;
|
||||
ptr += bitsToLitIndexSize;
|
||||
|
||||
// After the lit index array, we have the LitInfo structures themselves,
|
||||
// which vary in size (as each may have a variable-length string after it).
|
||||
ptr = ROUNDUP_PTR(ptr, alignof(LitInfo));
|
||||
|
||||
// Walk the map by hash value assigning indexes and laying out the
|
||||
// elements (and their associated string confirm material) in memory.
|
||||
for (std::map<u32, vector<LiteralIndex> >::const_iterator
|
||||
i = res2lits.begin(), e = res2lits.end(); i != e; ++i) {
|
||||
const u32 hash = i->first;
|
||||
const vector<LiteralIndex> &vlidx = i->second;
|
||||
bitsToLitIndex[hash] = verify_u32(ptr - (u8 *)fdrc);
|
||||
for (vector<LiteralIndex>::const_iterator i2 = vlidx.begin(),
|
||||
e2 = vlidx.end(); i2 != e2; ++i2) {
|
||||
LiteralIndex litIdx = *i2;
|
||||
|
||||
// Write LitInfo header.
|
||||
u8 *oldPtr = ptr;
|
||||
LitInfo &finalLI = *(LitInfo *)ptr;
|
||||
finalLI = tmpLitInfo[litIdx];
|
||||
|
||||
ptr += sizeof(LitInfo); // String starts directly after LitInfo.
|
||||
|
||||
// Write literal prefix (everything before the last N characters,
|
||||
// as the last N are already confirmed).
|
||||
const string &t = lits[litIdx].s;
|
||||
if (t.size() > sizeof(CONF_TYPE)) {
|
||||
size_t prefix_len = t.size() - sizeof(CONF_TYPE);
|
||||
memcpy(&finalLI.s[0], t.c_str(), prefix_len);
|
||||
ptr = &finalLI.s[0] + prefix_len;
|
||||
}
|
||||
|
||||
ptr = ROUNDUP_PTR(ptr, alignof(LitInfo));
|
||||
if (i2 + 1 == e2) {
|
||||
finalLI.next = 0x0;
|
||||
} else {
|
||||
// our next field represents an adjustment on top of
|
||||
// current address + the actual size of the literal
|
||||
// so we track any rounding up done for alignment and
|
||||
// add this in - that way we don't have to use bigger
|
||||
// than a u8 (for now)
|
||||
assert((size_t)(ptr - oldPtr) > t.size());
|
||||
finalLI.next = verify_u8(ptr - oldPtr - t.size());
|
||||
}
|
||||
}
|
||||
assert((size_t)(ptr - fdrc_base) <= size);
|
||||
}
|
||||
|
||||
*fdrc_p = fdrc;
|
||||
|
||||
// Return actual used size, not worst-case size. Must be rounded up to
|
||||
// FDRConfirm alignment so that the caller can lay out a sequence of these.
|
||||
size_t actual_size = ROUNDUP_N((size_t)(ptr - fdrc_base),
|
||||
alignof(FDRConfirm));
|
||||
assert(actual_size <= size);
|
||||
return actual_size;
|
||||
}
|
||||
|
||||
static
|
||||
u32 setupMultiConfirms(const vector<hwlmLiteral> &lits,
|
||||
const EngineDescription &eng, BC2CONF &bc2Conf,
|
||||
map<BucketIndex, vector<LiteralIndex> > &bucketToLits,
|
||||
bool make_small) {
|
||||
u32 pullBack = eng.getConfirmPullBackDistance();
|
||||
u32 splitMask = eng.getConfirmTopLevelSplit() - 1;
|
||||
bool splitHasCase = splitMask & 0x20;
|
||||
|
||||
bool makeConfirm = true;
|
||||
unique_ptr<TeddyEngineDescription> teddyDescr =
|
||||
getTeddyDescription(eng.getID());
|
||||
if (teddyDescr) {
|
||||
makeConfirm = teddyDescr->needConfirm(lits);
|
||||
}
|
||||
|
||||
u32 totalConfirmSize = 0;
|
||||
for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {
|
||||
if (!bucketToLits[b].empty()) {
|
||||
vector<vector<hwlmLiteral> > vl(eng.getConfirmTopLevelSplit());
|
||||
for (vector<LiteralIndex>::const_iterator
|
||||
i = bucketToLits[b].begin(),
|
||||
e = bucketToLits[b].end();
|
||||
i != e; ++i) {
|
||||
hwlmLiteral lit = lits[*i]; // copy
|
||||
// c is last char of this literal
|
||||
u8 c = *(lit.s.rbegin());
|
||||
|
||||
bool suppressSplit = false;
|
||||
if (pullBack) {
|
||||
// make a shorter string to work over if we're pulling back
|
||||
// getFDRConfirm doesn't know about that stuff
|
||||
assert(lit.s.size() >= pullBack);
|
||||
lit.s.resize(lit.s.size() - pullBack);
|
||||
|
||||
u8 c_sub, c_sub_msk;
|
||||
if (lit.msk.empty()) {
|
||||
c_sub = 0;
|
||||
c_sub_msk = 0;
|
||||
} else {
|
||||
c_sub = *(lit.cmp.rbegin());
|
||||
c_sub_msk = *(lit.msk.rbegin());
|
||||
size_t len = lit.msk.size() -
|
||||
min(lit.msk.size(), (size_t)pullBack);
|
||||
lit.msk.resize(len);
|
||||
lit.cmp.resize(len);
|
||||
}
|
||||
|
||||
// if c_sub_msk is 0xff and lit.nocase
|
||||
// resteer 'c' to an exact value and set suppressSplit
|
||||
if ((c_sub_msk == 0xff) && (lit.nocase)) {
|
||||
suppressSplit = true;
|
||||
c = c_sub;
|
||||
}
|
||||
}
|
||||
|
||||
if (!suppressSplit && splitHasCase && lit.nocase &&
|
||||
ourisalpha(c)) {
|
||||
vl[(u8)(mytoupper(c) & splitMask)].push_back(lit);
|
||||
vl[(u8)(mytolower(c) & splitMask)].push_back(lit);
|
||||
} else {
|
||||
vl[c & splitMask].push_back(lit);
|
||||
}
|
||||
}
|
||||
|
||||
for (u32 c = 0; c < eng.getConfirmTopLevelSplit(); c++) {
|
||||
if (!vl[c].empty()) {
|
||||
DEBUG_PRINTF("b %d c %02x sz %zu\n", b, c, vl[c].size());
|
||||
FDRConfirm *fdrc;
|
||||
size_t size = getFDRConfirm(vl[c], &fdrc,
|
||||
eng.typicallyHoldsOneCharLits(),
|
||||
make_small, makeConfirm);
|
||||
BucketSplitPair p = make_pair(b, c);
|
||||
bc2Conf[p] = make_pair(fdrc, size);
|
||||
totalConfirmSize += size;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return totalConfirmSize;
|
||||
}
|
||||
|
||||
pair<u8 *, size_t> setupFullMultiConfs(const vector<hwlmLiteral> &lits,
|
||||
const EngineDescription &eng,
|
||||
map<BucketIndex, vector<LiteralIndex> > &bucketToLits,
|
||||
bool make_small) {
|
||||
BC2CONF bc2Conf;
|
||||
u32 totalConfirmSize = setupMultiConfirms(lits, eng, bc2Conf, bucketToLits,
|
||||
make_small);
|
||||
|
||||
u32 primarySwitch = eng.getConfirmTopLevelSplit();
|
||||
u32 nBuckets = eng.getNumBuckets();
|
||||
u32 totalConfSwitchSize = primarySwitch * nBuckets * sizeof(u32);
|
||||
u32 totalSize = ROUNDUP_16(totalConfSwitchSize + totalConfirmSize);
|
||||
|
||||
u8 *buf = (u8 *)aligned_zmalloc(totalSize);
|
||||
assert(buf); // otherwise would have thrown std::bad_alloc
|
||||
|
||||
u32 *confBase = (u32 *)buf;
|
||||
u8 *ptr = buf + totalConfSwitchSize;
|
||||
|
||||
for (BC2CONF::const_iterator i = bc2Conf.begin(), e = bc2Conf.end(); i != e;
|
||||
++i) {
|
||||
const pair<FDRConfirm *, size_t> &p = i->second;
|
||||
// confirm offset is relative to the base of this structure, now
|
||||
u32 confirm_offset = verify_u32(ptr - (u8 *)buf);
|
||||
memcpy(ptr, p.first, p.second);
|
||||
ptr += p.second;
|
||||
aligned_free(p.first);
|
||||
BucketIndex b = i->first.first;
|
||||
u8 c = i->first.second;
|
||||
u32 idx = c * nBuckets + b;
|
||||
confBase[idx] = confirm_offset;
|
||||
}
|
||||
return make_pair(buf, totalSize);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
244
src/fdr/fdr_confirm_runtime.h
Normal file
244
src/fdr/fdr_confirm_runtime.h
Normal file
@@ -0,0 +1,244 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef FDR_CONFIRM_RUNTIME_H
|
||||
#define FDR_CONFIRM_RUNTIME_H
|
||||
|
||||
#include "fdr_internal.h"
|
||||
#include "fdr_loadval.h"
|
||||
#include "hwlm/hwlm.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/compare.h"
|
||||
|
||||
#define CONF_LOADVAL_CALL lv_u64a
|
||||
#define CONF_LOADVAL_CALL_CAUTIOUS lv_u64a_ce
|
||||
|
||||
// this is ordinary confirmation function which runs through
|
||||
// the whole confirmation procedure
|
||||
static really_inline
|
||||
void confWithBit(const struct FDRConfirm * fdrc,
|
||||
const struct FDR_Runtime_Args * a,
|
||||
size_t i,
|
||||
CautionReason r,
|
||||
u32 pullBackAmount,
|
||||
hwlmcb_rv_t *control,
|
||||
u32 * last_match) {
|
||||
assert(i < a->len);
|
||||
assert(ISALIGNED(fdrc));
|
||||
|
||||
const u8 * buf = a->buf;
|
||||
const size_t len = a->len;
|
||||
|
||||
CONF_TYPE v;
|
||||
const u8 * confirm_loc = buf + i - pullBackAmount - 7;
|
||||
if (likely(r == NOT_CAUTIOUS || confirm_loc >= buf)) {
|
||||
v = CONF_LOADVAL_CALL(confirm_loc, buf, buf + len);
|
||||
} else { // r == VECTORING, confirm_loc < buf
|
||||
u64a histBytes = a->histBytes;
|
||||
v = CONF_LOADVAL_CALL_CAUTIOUS(confirm_loc, buf, buf + len);
|
||||
// stitch together v (which doesn't move) and history (which does)
|
||||
u32 overhang = buf - confirm_loc;
|
||||
histBytes >>= 64 - (overhang * 8);
|
||||
v |= histBytes;
|
||||
}
|
||||
|
||||
u32 c = CONF_HASH_CALL(v, fdrc->andmsk, fdrc->mult, fdrc->nBitsOrSoleID);
|
||||
u32 start = getConfirmLitIndex(fdrc)[c];
|
||||
if (P0(start)) {
|
||||
const struct LitInfo *l =
|
||||
(const struct LitInfo *)((const u8 *)fdrc + start);
|
||||
|
||||
u8 oldNext; // initialized in loop
|
||||
do {
|
||||
assert(ISALIGNED(l));
|
||||
|
||||
if (P0( (v & l->msk) != l->v)) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
if ((*last_match == l->id) && (l->flags & NoRepeat)) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
const u8 * loc = buf + i - l->size + 1 - pullBackAmount;
|
||||
|
||||
u8 caseless = l->flags & Caseless;
|
||||
if (loc < buf) {
|
||||
u32 full_overhang = buf - loc;
|
||||
|
||||
const u8 * history = (caseless) ?
|
||||
a->buf_history_nocase : a->buf_history;
|
||||
size_t len_history = (caseless) ?
|
||||
a->len_history_nocase : a->len_history;
|
||||
|
||||
// can't do a vectored confirm either if we don't have
|
||||
// the bytes
|
||||
if (full_overhang > len_history) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
// as for the regular case, no need to do a full confirm if
|
||||
// we're a short literal
|
||||
if (unlikely(l->size > sizeof(CONF_TYPE))) {
|
||||
const u8 * s1 = l->s;
|
||||
const u8 * s2 = s1 + full_overhang;
|
||||
const u8 * loc1 = history + len_history - full_overhang;
|
||||
const u8 * loc2 = buf;
|
||||
size_t size1 = MIN(full_overhang,
|
||||
l->size - sizeof(CONF_TYPE));
|
||||
size_t wind_size2_back = sizeof(CONF_TYPE) +
|
||||
full_overhang;
|
||||
size_t size2 = wind_size2_back > l->size ?
|
||||
0 : l->size - wind_size2_back;
|
||||
|
||||
if (cmpForward(loc1, s1, size1, caseless)) {
|
||||
goto out;
|
||||
}
|
||||
if (cmpForward(loc2, s2, size2, caseless)) {
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
} else { // NON-VECTORING PATH
|
||||
|
||||
// if string < conf_type we don't need regular string cmp
|
||||
if (unlikely(l->size > sizeof(CONF_TYPE))) {
|
||||
if (cmpForward(loc, l->s, l->size - sizeof(CONF_TYPE), caseless)) {
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (P0(!(l->groups & *control))) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (unlikely(l->flags & ComplexConfirm)) {
|
||||
const u8 * loc2 = buf + i - l->extended_size + 1 - pullBackAmount;
|
||||
if (loc2 < buf) {
|
||||
u32 full_overhang = buf - loc2;
|
||||
size_t len_history = (caseless) ?
|
||||
a->len_history_nocase : a->len_history;
|
||||
if (full_overhang > len_history) {
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
*last_match = l->id;
|
||||
*control = a->cb(loc - buf, i, l->id, a->ctxt);
|
||||
out:
|
||||
oldNext = l->next; // oldNext is either 0 or an 'adjust' value
|
||||
l = (const struct LitInfo*)((const u8 *)l + oldNext + l->size);
|
||||
} while (oldNext);
|
||||
}
|
||||
}
|
||||
|
||||
// 'light-weight' confirmation function which is used by 1-mask Teddy;
|
||||
// in the 'confirmless' case it simply calls callback function,
|
||||
// otherwise it calls 'confWithBit' function for the full confirmation procedure
|
||||
static really_inline
|
||||
void confWithBit1(const struct FDRConfirm * fdrc,
|
||||
const struct FDR_Runtime_Args * a,
|
||||
size_t i,
|
||||
CautionReason r,
|
||||
hwlmcb_rv_t *control,
|
||||
u32 * last_match) {
|
||||
assert(i < a->len);
|
||||
assert(ISALIGNED(fdrc));
|
||||
|
||||
if (unlikely(fdrc->mult)) {
|
||||
confWithBit(fdrc, a, i, r, 0, control, last_match);
|
||||
return;
|
||||
} else {
|
||||
u32 id = fdrc->nBitsOrSoleID;
|
||||
|
||||
if ((*last_match == id) && (fdrc->flags & NoRepeat)) {
|
||||
return;
|
||||
}
|
||||
*last_match = id;
|
||||
*control = a->cb(i, i, id, a->ctxt);
|
||||
}
|
||||
}
|
||||
|
||||
// This is 'light-weight' confirmation function which is used by 2-3-4-mask Teddy
|
||||
// In the 'confirmless' case it makes fast 32-bit comparison,
|
||||
// otherwise it calls 'confWithBit' function for the full confirmation procedure
|
||||
static really_inline
|
||||
void confWithBitMany(const struct FDRConfirm * fdrc,
|
||||
const struct FDR_Runtime_Args * a,
|
||||
size_t i,
|
||||
CautionReason r,
|
||||
hwlmcb_rv_t *control,
|
||||
u32 * last_match) {
|
||||
assert(i < a->len);
|
||||
assert(ISALIGNED(fdrc));
|
||||
|
||||
if (i < a->start_offset) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (unlikely(fdrc->mult)) {
|
||||
confWithBit(fdrc, a, i, r, 0, control, last_match);
|
||||
return;
|
||||
} else {
|
||||
const u32 id = fdrc->nBitsOrSoleID;
|
||||
const u32 len = fdrc->soleLitSize;
|
||||
|
||||
if ((*last_match == id) && (fdrc->flags & NoRepeat)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (r == VECTORING && len > i - a->start_offset) {
|
||||
if (len > (i + a->len_history)) {
|
||||
return;
|
||||
}
|
||||
|
||||
u32 cmp = (u32)a->buf[i] << 24;
|
||||
|
||||
if (len <= i) {
|
||||
for (u32 j = 1; j <= len; j++) {
|
||||
cmp |= (u32)a->buf[i - j] << (24 - (j * 8));
|
||||
}
|
||||
} else {
|
||||
for (u32 j = 1; j <= i; j++) {
|
||||
cmp |= (u32)a->buf[i - j] << (24 - (j * 8));
|
||||
}
|
||||
cmp |= (u32)(a->histBytes >> (40 + i * 8));
|
||||
}
|
||||
|
||||
if ((fdrc->soleLitMsk & cmp) != fdrc->soleLitCmp) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
*last_match = id;
|
||||
*control = a->cb(i - len, i, id, a->ctxt);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
98
src/fdr/fdr_dump.cpp
Normal file
98
src/fdr/fdr_dump.cpp
Normal file
@@ -0,0 +1,98 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "fdr.h"
|
||||
#include "fdr_internal.h"
|
||||
#include "fdr_compile_internal.h"
|
||||
#include "fdr_dump.h"
|
||||
#include "fdr_engine_description.h"
|
||||
#include "teddy_engine_description.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <memory>
|
||||
|
||||
#ifndef DUMP_SUPPORT
|
||||
#error No dump support!
|
||||
#endif
|
||||
|
||||
using std::unique_ptr;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
static
|
||||
bool fdrIsTeddy(const FDR *fdr) {
|
||||
assert(fdr);
|
||||
u32 engine = fdr->engineID;
|
||||
|
||||
/* teddys don't have an fdr engine description (which is why the dump code
|
||||
* is so broken). */
|
||||
|
||||
return !getFdrDescription(engine);
|
||||
}
|
||||
|
||||
void fdrPrintStats(const FDR *fdr, FILE *f) {
|
||||
const bool isTeddy = fdrIsTeddy(fdr);
|
||||
|
||||
if (isTeddy) {
|
||||
fprintf(f, "TEDDY: %u\n", fdr->engineID);
|
||||
} else {
|
||||
fprintf(f, "FDR: %u\n", fdr->engineID);
|
||||
}
|
||||
|
||||
if (isTeddy) {
|
||||
unique_ptr<TeddyEngineDescription> des =
|
||||
getTeddyDescription(fdr->engineID);
|
||||
if (des) {
|
||||
fprintf(f, " masks %u\n", des->numMasks);
|
||||
fprintf(f, " buckets %u\n", des->getNumBuckets());
|
||||
fprintf(f, " packed %s\n", des->packed ? "true" : "false");
|
||||
} else {
|
||||
fprintf(f, " <unknown engine>\n");
|
||||
}
|
||||
} else {
|
||||
unique_ptr<FDREngineDescription> des =
|
||||
getFdrDescription(fdr->engineID);
|
||||
if (des) {
|
||||
fprintf(f, " stride %u\n", des->stride);
|
||||
fprintf(f, " buckets %u\n", des->getNumBuckets());
|
||||
fprintf(f, " width %u\n", des->schemeWidth);
|
||||
} else {
|
||||
fprintf(f, " <unknown engine>\n");
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(f, " strings ???\n");
|
||||
fprintf(f, " size %zu bytes\n", fdrSize(fdr));
|
||||
fprintf(f, " max length %u\n", fdr->maxStringLen);
|
||||
fprintf(f, " floodoff %u (%x)\n", fdr->floodOffset, fdr->floodOffset);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
49
src/fdr/fdr_dump.h
Normal file
49
src/fdr/fdr_dump.h
Normal file
@@ -0,0 +1,49 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief FDR literal matcher: dump API.
|
||||
*/
|
||||
|
||||
#ifndef FDR_DUMP_H
|
||||
#define FDR_DUMP_H
|
||||
|
||||
#if defined(DUMP_SUPPORT)
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
struct FDR;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
void fdrPrintStats(const struct FDR *fdr, FILE *f);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // DUMP_SUPPORT
|
||||
#endif // FDR_DUMP_H
|
216
src/fdr/fdr_engine_description.cpp
Normal file
216
src/fdr/fdr_engine_description.cpp
Normal file
@@ -0,0 +1,216 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "fdr_compile_internal.h"
|
||||
#include "fdr_engine_description.h"
|
||||
#include "hs_compile.h"
|
||||
#include "util/target_info.h"
|
||||
#include "util/compare.h" // for ourisalpha()
|
||||
#include "util/make_unique.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdlib>
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
#include "fdr_autogen_compiler.cpp"
|
||||
|
||||
FDREngineDescription::FDREngineDescription(const FDREngineDef &def)
|
||||
: EngineDescription(def.id, targetByArchFeatures(def.cpu_features),
|
||||
def.numBuckets, def.confirmPullBackDistance,
|
||||
def.confirmTopLevelSplit),
|
||||
schemeWidth(def.schemeWidth), stride(def.stride), bits(def.bits) {}
|
||||
|
||||
u32 FDREngineDescription::getDefaultFloodSuffixLength() const {
|
||||
// rounding up, so that scheme width 32 and 6 buckets is 6 not 5!
|
||||
// the +1 avoids pain due to various reach choices
|
||||
return ((getSchemeWidth() + getNumBuckets() - 1) / getNumBuckets()) + 1;
|
||||
}
|
||||
|
||||
static
|
||||
u32 findDesiredStride(size_t num_lits, size_t min_len, size_t min_len_count) {
|
||||
u32 desiredStride = 1; // always our safe fallback
|
||||
if (min_len > 1) {
|
||||
if (num_lits < 250) {
|
||||
// small cases we just go for it
|
||||
desiredStride = min_len;
|
||||
} else if (num_lits < 800) {
|
||||
// intermediate cases
|
||||
desiredStride = min_len - 1;
|
||||
} else if (num_lits < 5000) {
|
||||
// for larger but not huge sizes, go to stride 2 only if we have at
|
||||
// least minlen 3
|
||||
desiredStride = MIN(min_len - 1, 2);
|
||||
}
|
||||
}
|
||||
|
||||
// patch if count is quite large - a ton of length 2 literals can
|
||||
// break things
|
||||
#ifdef TRY_THIS_LATER
|
||||
if ((min_len == 2) && (desiredStride == 2) && (min_len_count > 20)) {
|
||||
desiredStride = 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
// patch stuff just for the stride 4 case; don't let min_len=4,
|
||||
// desiredStride=4 through as even a few length 4 literals can break things
|
||||
// (far more fragile)
|
||||
if ((min_len == 4) && (desiredStride == 4) && (min_len_count > 2)) {
|
||||
desiredStride = 2;
|
||||
}
|
||||
|
||||
return desiredStride;
|
||||
}
|
||||
|
||||
unique_ptr<FDREngineDescription> chooseEngine(const target_t &target,
|
||||
const vector<hwlmLiteral> &vl,
|
||||
bool make_small) {
|
||||
vector<FDREngineDescription> allDescs;
|
||||
getFdrDescriptions(&allDescs);
|
||||
|
||||
// find desired stride
|
||||
size_t count;
|
||||
size_t msl = minLenCount(vl, &count);
|
||||
u32 desiredStride = findDesiredStride(vl.size(), msl, count);
|
||||
|
||||
DEBUG_PRINTF("%zu lits, msl=%zu, desiredStride=%u\n", vl.size(), msl,
|
||||
desiredStride);
|
||||
|
||||
const FDREngineDescription *best = nullptr;
|
||||
u32 best_score = 0;
|
||||
|
||||
for (size_t engineID = 0; engineID < allDescs.size(); engineID++) {
|
||||
const FDREngineDescription &eng = allDescs[engineID];
|
||||
if (!eng.isValidOnTarget(target)) {
|
||||
continue;
|
||||
}
|
||||
if (msl < eng.stride) {
|
||||
continue;
|
||||
}
|
||||
|
||||
u32 score = 100;
|
||||
|
||||
score -= absdiff(desiredStride, eng.stride);
|
||||
|
||||
if (eng.stride <= desiredStride) {
|
||||
score += eng.stride;
|
||||
}
|
||||
|
||||
u32 effLits = vl.size(); /* * desiredStride;*/
|
||||
u32 ideal;
|
||||
if (effLits < eng.getNumBuckets()) {
|
||||
if (eng.stride == 1) {
|
||||
ideal = 8;
|
||||
} else {
|
||||
ideal = 10;
|
||||
}
|
||||
} else if (effLits < 20) {
|
||||
ideal = 10;
|
||||
} else if (effLits < 100) {
|
||||
ideal = 11;
|
||||
} else if (effLits < 1000) {
|
||||
ideal = 12;
|
||||
} else if (effLits < 10000) {
|
||||
ideal = 13;
|
||||
} else {
|
||||
ideal = 15;
|
||||
}
|
||||
|
||||
if (ideal != 8 && eng.schemeWidth == 32) {
|
||||
ideal += 1;
|
||||
}
|
||||
|
||||
if (make_small) {
|
||||
ideal -= 2;
|
||||
}
|
||||
|
||||
if (eng.stride > 1) {
|
||||
ideal++;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("effLits %u\n", effLits);
|
||||
|
||||
if (target.is_atom_class() && !make_small && effLits < 4000) {
|
||||
/* Unless it is a very heavy case, we want to build smaller tables
|
||||
* on lightweight machines due to their small caches. */
|
||||
ideal -= 2;
|
||||
}
|
||||
|
||||
score -= absdiff(ideal, eng.bits);
|
||||
|
||||
DEBUG_PRINTF("fdr %u: width=%u, bits=%u, buckets=%u, stride=%u "
|
||||
"-> score=%u\n",
|
||||
eng.getID(), eng.schemeWidth, eng.bits,
|
||||
eng.getNumBuckets(), eng.stride, score);
|
||||
|
||||
if (!best || score > best_score) {
|
||||
best = ŋ
|
||||
best_score = score;
|
||||
}
|
||||
}
|
||||
|
||||
if (!best) {
|
||||
DEBUG_PRINTF("failed to find engine\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("using engine %u\n", best->getID());
|
||||
return ue2::make_unique<FDREngineDescription>(*best);
|
||||
}
|
||||
|
||||
SchemeBitIndex FDREngineDescription::getSchemeBit(BucketIndex b,
|
||||
PositionInBucket p) const {
|
||||
assert(p < getBucketWidth(b));
|
||||
SchemeBitIndex sbi = p * getNumBuckets() + b;
|
||||
assert(sbi < getSchemeWidth());
|
||||
return sbi;
|
||||
}
|
||||
|
||||
u32 FDREngineDescription::getBucketWidth(BucketIndex) const {
|
||||
u32 sw = getSchemeWidth();
|
||||
u32 nm = getNumBuckets();
|
||||
assert(sw % nm == 0);
|
||||
return sw/nm;
|
||||
}
|
||||
|
||||
unique_ptr<FDREngineDescription> getFdrDescription(u32 engineID) {
|
||||
vector<FDREngineDescription> allDescs;
|
||||
getFdrDescriptions(&allDescs);
|
||||
|
||||
if (engineID >= allDescs.size()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return ue2::make_unique<FDREngineDescription>(allDescs[engineID]);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
80
src/fdr/fdr_engine_description.h
Normal file
80
src/fdr/fdr_engine_description.h
Normal file
@@ -0,0 +1,80 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef FDR_ENGINE_DESCRIPTION_H
|
||||
#define FDR_ENGINE_DESCRIPTION_H
|
||||
|
||||
#include "engine_description.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
struct FDREngineDef {
|
||||
u32 id;
|
||||
u32 schemeWidth;
|
||||
u32 numBuckets;
|
||||
u32 stride;
|
||||
u32 bits;
|
||||
u64a cpu_features;
|
||||
u32 confirmPullBackDistance;
|
||||
u32 confirmTopLevelSplit;
|
||||
};
|
||||
|
||||
class FDREngineDescription : public EngineDescription {
|
||||
public:
|
||||
u32 schemeWidth;
|
||||
u32 stride;
|
||||
u32 bits;
|
||||
|
||||
u32 getSchemeWidth() const { return schemeWidth; }
|
||||
u32 getBucketWidth(BucketIndex b) const;
|
||||
SchemeBitIndex getSchemeBit(BucketIndex b, PositionInBucket p) const;
|
||||
u32 getNumTableEntries() const { return 1 << bits; }
|
||||
u32 getTabSizeBytes() const {
|
||||
return schemeWidth / 8 * getNumTableEntries();
|
||||
}
|
||||
|
||||
explicit FDREngineDescription(const FDREngineDef &def);
|
||||
|
||||
u32 getDefaultFloodSuffixLength() const override;
|
||||
bool typicallyHoldsOneCharLits() const override { return stride == 1; }
|
||||
};
|
||||
|
||||
std::unique_ptr<FDREngineDescription>
|
||||
chooseEngine(const target_t &target, const std::vector<hwlmLiteral> &vl,
|
||||
bool make_small);
|
||||
std::unique_ptr<FDREngineDescription> getFdrDescription(u32 engineID);
|
||||
void getFdrDescriptions(std::vector<FDREngineDescription> *out);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
111
src/fdr/fdr_internal.h
Normal file
111
src/fdr/fdr_internal.h
Normal file
@@ -0,0 +1,111 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief FDR literal matcher: data structures.
|
||||
*/
|
||||
|
||||
#ifndef FDR_INTERNAL_H
|
||||
#define FDR_INTERNAL_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "hwlm/hwlm.h" // for hwlm_group_t, HWLMCallback
|
||||
|
||||
typedef enum {
|
||||
NOT_CAUTIOUS, //!< not near a boundary (quantify?)
|
||||
VECTORING //!< potentially vectoring
|
||||
} CautionReason;
|
||||
|
||||
/** \brief number of different ids that can be triggered by floods of any given
|
||||
* character. */
|
||||
#define FDR_FLOOD_MAX_IDS 16
|
||||
|
||||
struct FDRFlood {
|
||||
hwlm_group_t allGroups; //!< all the groups or'd together
|
||||
u32 suffix;
|
||||
|
||||
/** \brief 0 to FDR_FLOOD_MAX_IDS-1 ids that are generated once per char on
|
||||
* a flood.
|
||||
* If larger we won't handle this through the flood path at all. */
|
||||
u16 idCount;
|
||||
|
||||
u32 ids[FDR_FLOOD_MAX_IDS]; //!< the ids
|
||||
hwlm_group_t groups[FDR_FLOOD_MAX_IDS]; //!< group ids to go with string ids
|
||||
u32 len[FDR_FLOOD_MAX_IDS]; //!< lengths to go with the string ids
|
||||
};
|
||||
|
||||
/** \brief FDR structure.
|
||||
*
|
||||
* 1. struct as-is
|
||||
* 2. primary matching table
|
||||
* 3. confirm stuff
|
||||
*/
|
||||
struct FDR {
|
||||
u32 engineID;
|
||||
u32 size;
|
||||
u32 maxStringLen;
|
||||
u32 floodOffset;
|
||||
|
||||
/** link is the relative offset of a secondary included FDR table for
|
||||
* stream handling if we're a primary FDR table or the subsidiary tertiary
|
||||
* structures (spillover strings and hash table) if we're a secondary
|
||||
* structure. */
|
||||
u32 link;
|
||||
u32 pad1;
|
||||
u32 pad2;
|
||||
u32 pad3;
|
||||
|
||||
union {
|
||||
u32 s_u32;
|
||||
u64a s_u64a;
|
||||
m128 s_m128;
|
||||
} start;
|
||||
};
|
||||
|
||||
/** \brief FDR runtime arguments.
|
||||
*
|
||||
* This structure handles read-only things that are passed extensively around
|
||||
* the FDR run-time functions. They are set by the API, passed by value into
|
||||
* the main function, then a pointer is passed around to all the various
|
||||
* sub-functions (confirm & flood). */
|
||||
struct FDR_Runtime_Args {
|
||||
const u8 *buf;
|
||||
size_t len;
|
||||
const u8 *buf_history;
|
||||
size_t len_history;
|
||||
const u8 *buf_history_nocase;
|
||||
size_t len_history_nocase;
|
||||
size_t start_offset;
|
||||
HWLMCallback cb;
|
||||
void *ctxt;
|
||||
hwlm_group_t *groups;
|
||||
const u8 *firstFloodDetect;
|
||||
const u64a histBytes;
|
||||
};
|
||||
|
||||
#endif
|
216
src/fdr/fdr_loadval.h
Normal file
216
src/fdr/fdr_loadval.h
Normal file
@@ -0,0 +1,216 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef FDR_LOADVAL_H
|
||||
#define FDR_LOADVAL_H
|
||||
|
||||
#include "fdr_internal.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/unaligned.h"
|
||||
#include "util/simd_utils.h"
|
||||
|
||||
#define MAKE_LOADVAL(type, name) \
|
||||
static really_inline type name (const u8 * ptr, UNUSED const u8 * lo, UNUSED const u8 * hi)
|
||||
|
||||
#define NORMAL_SAFE(type) assert(ptr >= lo && (ptr + sizeof(type) - 1) < hi)
|
||||
#define ALIGNED_SAFE(type) NORMAL_SAFE(type); assert(((size_t)ptr % sizeof(type)) == 0);
|
||||
// these ones need asserts to test the property that we're not handling dynamically
|
||||
#define CAUTIOUS_FORWARD_SAFE(type) assert(ptr >= lo)
|
||||
#define CAUTIOUS_BACKWARD_SAFE(type) assert((ptr + sizeof(type) - 1) < hi)
|
||||
|
||||
#define CF_INDEX_CHECK (ptr + i < hi)
|
||||
#define CB_INDEX_CHECK (lo <= ptr + i)
|
||||
#define CE_INDEX_CHECK (lo <= ptr + i) && (ptr + i < hi)
|
||||
|
||||
#define MAKE_LOOP(TYPE, COND, SHIFT_FIDDLE) \
|
||||
TYPE v = 0; \
|
||||
for (TYPE i = 0; i < sizeof(TYPE); i++) { \
|
||||
if (COND) { \
|
||||
v += (TYPE)ptr[i] << ((SHIFT_FIDDLE)*8); \
|
||||
} \
|
||||
} \
|
||||
return v;
|
||||
|
||||
#define MAKE_LOOP_BE(TYPE, COND) \
|
||||
MAKE_LOOP(TYPE, COND, sizeof(TYPE)-i-1)
|
||||
|
||||
#define MAKE_LOOP_LE(TYPE, COND) \
|
||||
MAKE_LOOP(TYPE, COND, i)
|
||||
|
||||
|
||||
#define MAKE_LOOP_BE_CF(TYPE) CAUTIOUS_FORWARD_SAFE(TYPE); MAKE_LOOP_BE(TYPE, CF_INDEX_CHECK)
|
||||
#define MAKE_LOOP_BE_CB(TYPE) CAUTIOUS_BACKWARD_SAFE(TYPE); MAKE_LOOP_BE(TYPE, CB_INDEX_CHECK)
|
||||
#define MAKE_LOOP_BE_CE(TYPE) MAKE_LOOP_BE(TYPE, CE_INDEX_CHECK)
|
||||
#define MAKE_LOOP_LE_CF(TYPE) CAUTIOUS_FORWARD_SAFE(TYPE); MAKE_LOOP_LE(TYPE, CF_INDEX_CHECK)
|
||||
#define MAKE_LOOP_LE_CB(TYPE) CAUTIOUS_BACKWARD_SAFE(TYPE); MAKE_LOOP_LE(TYPE, CB_INDEX_CHECK)
|
||||
#define MAKE_LOOP_LE_CE(TYPE) MAKE_LOOP_LE(TYPE, CE_INDEX_CHECK)
|
||||
|
||||
// no suffix = normal (unaligned)
|
||||
// _a = aligned
|
||||
// _cf = cautious forwards, base is always in bounds, but may read over the end of the buffer (test against hi)
|
||||
// _cb = cautious backwards, final byte is always in bounds, but may read over the start of the buffer (test against lo)
|
||||
// _ce = cautious everywhere (in both directions); test against hi and lo
|
||||
|
||||
// u8 loadvals
|
||||
MAKE_LOADVAL(u8, lv_u8) {
|
||||
NORMAL_SAFE(u8);
|
||||
return *ptr;
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(u8, lv_u8_cf) {
|
||||
CAUTIOUS_FORWARD_SAFE(u8);
|
||||
if (ptr < hi) {
|
||||
return *ptr;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(u8, lv_u8_cb) {
|
||||
CAUTIOUS_BACKWARD_SAFE(u8);
|
||||
if (lo <= ptr) {
|
||||
return *ptr;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(u8, lv_u8_ce) {
|
||||
if ((lo <= ptr) && (ptr < hi)) {
|
||||
return *ptr;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(u16, lv_u16) {
|
||||
NORMAL_SAFE(u16);
|
||||
return unaligned_load_u16(ptr);
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(u16, lv_u16_a) {
|
||||
ALIGNED_SAFE(u16);
|
||||
return *(const u16 *)ptr;
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(u32, lv_u32) {
|
||||
NORMAL_SAFE(u32);
|
||||
return unaligned_load_u32(ptr);
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(u32, lv_u32_a) {
|
||||
ALIGNED_SAFE(u32);
|
||||
return *(const u32 *)ptr;
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(u64a, lv_u64a) {
|
||||
NORMAL_SAFE(u32);
|
||||
return unaligned_load_u64a(ptr);
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(u64a, lv_u64a_a) {
|
||||
ALIGNED_SAFE(u64a);
|
||||
return *(const u64a *)ptr;
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(u16, lv_u16_cf) { MAKE_LOOP_LE_CF(u16); }
|
||||
MAKE_LOADVAL(u16, lv_u16_cb) { MAKE_LOOP_LE_CB(u16); }
|
||||
MAKE_LOADVAL(u16, lv_u16_ce) { MAKE_LOOP_LE_CE(u16); }
|
||||
|
||||
MAKE_LOADVAL(u32, lv_u32_cf) { MAKE_LOOP_LE_CF(u32); }
|
||||
MAKE_LOADVAL(u32, lv_u32_cb) { MAKE_LOOP_LE_CB(u32); }
|
||||
MAKE_LOADVAL(u32, lv_u32_ce) { MAKE_LOOP_LE_CE(u32); }
|
||||
|
||||
MAKE_LOADVAL(u64a, lv_u64a_cf) { MAKE_LOOP_LE_CF(u64a); }
|
||||
MAKE_LOADVAL(u64a, lv_u64a_cb) { MAKE_LOOP_LE_CB(u64a); }
|
||||
MAKE_LOADVAL(u64a, lv_u64a_ce) { MAKE_LOOP_LE_CE(u64a); }
|
||||
|
||||
MAKE_LOADVAL(m128, lv_m128) {
|
||||
NORMAL_SAFE(m128);
|
||||
return loadu128(ptr);
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(m128, lv_m128_a) {
|
||||
ALIGNED_SAFE(m128);
|
||||
assert((size_t)ptr % sizeof(m128) == 0);
|
||||
return *(const m128 *)ptr;
|
||||
}
|
||||
|
||||
// m128 cases need to be manually created
|
||||
|
||||
MAKE_LOADVAL(m128, lv_m128_cf) {
|
||||
CAUTIOUS_FORWARD_SAFE(m128);
|
||||
union {
|
||||
u8 val8[16];
|
||||
m128 val128;
|
||||
} u;
|
||||
|
||||
for (u32 i = 0; i < 16; i++) {
|
||||
if (ptr + i < hi) {
|
||||
u.val8[i] = ptr[i];
|
||||
} else {
|
||||
u.val8[i] = 0;
|
||||
}
|
||||
}
|
||||
return u.val128;
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(m128, lv_m128_cb) {
|
||||
CAUTIOUS_BACKWARD_SAFE(m128);
|
||||
union {
|
||||
u8 val8[16];
|
||||
m128 val128;
|
||||
} u;
|
||||
|
||||
for (u32 i = 0; i < 16; i++) {
|
||||
if (lo <= ptr + i) {
|
||||
u.val8[i] = ptr[i];
|
||||
} else {
|
||||
u.val8[i] = 0;
|
||||
}
|
||||
}
|
||||
return u.val128;
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(m128, lv_m128_ce) {
|
||||
union {
|
||||
u8 val8[16];
|
||||
m128 val128;
|
||||
} u;
|
||||
|
||||
for (u32 i = 0; i < 16; i++) {
|
||||
if ((lo <= ptr + i) && (ptr + i < hi)) {
|
||||
u.val8[i] = ptr[i];
|
||||
} else {
|
||||
u.val8[i] = 0;
|
||||
}
|
||||
}
|
||||
return u.val128;
|
||||
}
|
||||
|
||||
#endif
|
445
src/fdr/fdr_streaming_compile.cpp
Normal file
445
src/fdr/fdr_streaming_compile.cpp
Normal file
@@ -0,0 +1,445 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "fdr.h"
|
||||
#include "fdr_internal.h"
|
||||
#include "fdr_streaming_internal.h"
|
||||
#include "fdr_compile_internal.h"
|
||||
#include "hwlm/hwlm_build.h"
|
||||
#include "util/alloc.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/target_info.h"
|
||||
#include "util/verify_types.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <deque>
|
||||
#include <set>
|
||||
|
||||
#include <boost/dynamic_bitset.hpp>
|
||||
|
||||
using namespace std;
|
||||
using boost::dynamic_bitset;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
namespace {
|
||||
struct LongLitOrder {
|
||||
bool operator()(const hwlmLiteral &i1, const hwlmLiteral &i2) const {
|
||||
if (i1.nocase != i2.nocase) {
|
||||
return i1.nocase < i2.nocase;
|
||||
} else {
|
||||
return i1.s < i2.s;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
static
|
||||
bool hwlmLitEqual(const hwlmLiteral &l1, const hwlmLiteral &l2) {
|
||||
return l1.s == l2.s && l1.nocase == l2.nocase;
|
||||
}
|
||||
|
||||
static
|
||||
u32 roundUpToPowerOfTwo(u32 x) {
|
||||
x -= 1;
|
||||
x |= (x >> 1);
|
||||
x |= (x >> 2);
|
||||
x |= (x >> 4);
|
||||
x |= (x >> 8);
|
||||
x |= (x >> 16);
|
||||
return x + 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Creates a long literals vector containing all literals of length > max_len.
|
||||
*
|
||||
* The last char of each literal is trimmed as we're not interested in full
|
||||
* matches, only partial matches.
|
||||
*
|
||||
* Literals are sorted (by caseful/caseless, then lexicographical order) and
|
||||
* made unique.
|
||||
*
|
||||
* The ID of each literal is set to its position in the vector.
|
||||
*
|
||||
* \return False if there aren't any long literals.
|
||||
*/
|
||||
static
|
||||
bool setupLongLits(const vector<hwlmLiteral> &lits,
|
||||
vector<hwlmLiteral> &long_lits, size_t max_len) {
|
||||
long_lits.reserve(lits.size());
|
||||
for (vector<hwlmLiteral>::const_iterator it = lits.begin();
|
||||
it != lits.end(); ++it) {
|
||||
if (it->s.length() > max_len) {
|
||||
hwlmLiteral tmp = *it; // copy
|
||||
tmp.s.erase(tmp.s.size() - 1, 1); // erase last char
|
||||
tmp.id = 0; // recalc later
|
||||
tmp.groups = 0; // filled in later by hash bucket(s)
|
||||
long_lits.push_back(tmp);
|
||||
}
|
||||
}
|
||||
|
||||
if (long_lits.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// sort long_literals by caseful/caseless and in lexicographical order,
|
||||
// remove duplicates
|
||||
stable_sort(long_lits.begin(), long_lits.end(), LongLitOrder());
|
||||
vector<hwlmLiteral>::iterator new_end =
|
||||
unique(long_lits.begin(), long_lits.end(), hwlmLitEqual);
|
||||
long_lits.erase(new_end, long_lits.end());
|
||||
|
||||
// fill in ids; not currently used
|
||||
for (vector<hwlmLiteral>::iterator i = long_lits.begin(),
|
||||
e = long_lits.end();
|
||||
i != e; ++i) {
|
||||
i->id = i - long_lits.begin();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// boundaries are the 'start' boundaries for each 'mode'
|
||||
// so boundary[CASEFUL] is the index one above the largest caseful index
|
||||
// positions[CASEFUL] is the # of positions in caseful strings (stream)
|
||||
// hashedPositions[CASEFUL] is the # of positions in caseful strings
|
||||
// (not returned - a temporary)
|
||||
// hashEntries[CASEFUL] is the # of positions hashed for caseful strings
|
||||
// (rounded up to the nearest power of two)
|
||||
static
|
||||
void analyzeLits(const vector<hwlmLiteral> &long_lits, size_t max_len,
|
||||
u32 *boundaries, u32 *positions, u32 *hashEntries) {
|
||||
u32 hashedPositions[MAX_MODES];
|
||||
|
||||
for (u32 m = CASEFUL; m < MAX_MODES; ++m) {
|
||||
boundaries[m] = verify_u32(long_lits.size());
|
||||
positions[m] = 0;
|
||||
hashedPositions[m] = 0;
|
||||
}
|
||||
|
||||
for (vector<hwlmLiteral>::const_iterator i = long_lits.begin(),
|
||||
e = long_lits.end();
|
||||
i != e; ++i) {
|
||||
if (i->nocase) {
|
||||
boundaries[CASEFUL] = verify_u32(i - long_lits.begin());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (vector<hwlmLiteral>::const_iterator i = long_lits.begin(),
|
||||
e = long_lits.end();
|
||||
i != e; ++i) {
|
||||
MODES m = i->nocase ? CASELESS : CASEFUL;
|
||||
for (u32 j = 1; j < i->s.size() - max_len + 1; j++) {
|
||||
hashedPositions[m]++;
|
||||
}
|
||||
positions[m] += i->s.size();
|
||||
}
|
||||
|
||||
for (u32 m = CASEFUL; m < MAX_MODES; m++) {
|
||||
hashEntries[m] = hashedPositions[m]
|
||||
? roundUpToPowerOfTwo(MAX(4096, hashedPositions[m]))
|
||||
: 0;
|
||||
}
|
||||
|
||||
#ifdef DEBUG_COMPILE
|
||||
printf("analyzeLits:\n");
|
||||
for (MODES m = CASEFUL; m < MAX_MODES; m++) {
|
||||
printf("mode %s boundary %d positions %d hashedPositions %d "
|
||||
"hashEntries %d\n",
|
||||
(m == CASEFUL) ? "caseful" : "caseless", boundaries[m],
|
||||
positions[m], hashedPositions[m], hashEntries[m]);
|
||||
}
|
||||
printf("\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
static
|
||||
u32 hashLit(const hwlmLiteral &l, u32 offset, size_t max_len, MODES m) {
|
||||
return streaming_hash((const u8 *)l.s.c_str() + offset, max_len, m);
|
||||
}
|
||||
|
||||
// sort by 'distance from start'
|
||||
namespace {
|
||||
struct OffsetIDFromEndOrder {
|
||||
const vector<hwlmLiteral> &lits; // not currently used
|
||||
explicit OffsetIDFromEndOrder(const vector<hwlmLiteral> &lits_in)
|
||||
: lits(lits_in) {}
|
||||
bool operator()(const pair<u32, u32> &i1, const pair<u32, u32> &i2) const {
|
||||
if (i1.second != i2.second) {
|
||||
// longest is 'first', so > not <
|
||||
return i1.second > i2.second;
|
||||
}
|
||||
return i1.first < i2.first;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
static
|
||||
void fillHashes(const vector<hwlmLiteral> &long_lits, size_t max_len,
|
||||
FDRSHashEntry *tab, size_t numEntries, MODES m,
|
||||
map<u32, u32> &litToOffsetVal) {
|
||||
const u32 nbits = lg2(numEntries);
|
||||
map<u32, deque<pair<u32, u32> > > bucketToLitOffPairs;
|
||||
map<u32, u64a> bucketToBitfield;
|
||||
|
||||
for (vector<hwlmLiteral>::const_iterator i = long_lits.begin(),
|
||||
e = long_lits.end();
|
||||
i != e; ++i) {
|
||||
const hwlmLiteral &l = *i;
|
||||
if ((m == CASELESS) != i->nocase) {
|
||||
continue;
|
||||
}
|
||||
for (u32 j = 1; j < i->s.size() - max_len + 1; j++) {
|
||||
u32 h = hashLit(l, j, max_len, m);
|
||||
u32 h_ent = h & ((1U << nbits) - 1);
|
||||
u32 h_low = (h >> nbits) & 63;
|
||||
bucketToLitOffPairs[h_ent].push_back(make_pair(i->id, j));
|
||||
bucketToBitfield[h_ent] |= (1ULL << h_low);
|
||||
}
|
||||
}
|
||||
|
||||
// this used to be a set<u32>, but a bitset is much much faster given that
|
||||
// we're using it only for membership testing.
|
||||
dynamic_bitset<> filledBuckets(numEntries); // all bits zero by default.
|
||||
|
||||
// sweep out bitfield entries and save the results swapped accordingly
|
||||
// also, anything with bitfield entries is put in filledBuckets
|
||||
for (map<u32, u64a>::const_iterator i = bucketToBitfield.begin(),
|
||||
e = bucketToBitfield.end();
|
||||
i != e; ++i) {
|
||||
u32 bucket = i->first;
|
||||
u64a contents = i->second;
|
||||
tab[bucket].bitfield = contents;
|
||||
filledBuckets.set(bucket);
|
||||
}
|
||||
|
||||
// store out all our chains based on free values in our hash table.
|
||||
// find nearest free locations that are empty (there will always be more
|
||||
// entries than strings, at present)
|
||||
for (map<u32, deque<pair<u32, u32> > >::iterator
|
||||
i = bucketToLitOffPairs.begin(),
|
||||
e = bucketToLitOffPairs.end();
|
||||
i != e; ++i) {
|
||||
u32 bucket = i->first;
|
||||
deque<pair<u32, u32> > &d = i->second;
|
||||
|
||||
// sort d by distance of the residual string (len minus our depth into
|
||||
// the string). We need to put the 'furthest back' string first...
|
||||
stable_sort(d.begin(), d.end(), OffsetIDFromEndOrder(long_lits));
|
||||
|
||||
while (1) {
|
||||
// first time through is always at bucket, then we fill in links
|
||||
filledBuckets.set(bucket);
|
||||
FDRSHashEntry *ent = &tab[bucket];
|
||||
u32 lit_id = d.front().first;
|
||||
u32 offset = d.front().second;
|
||||
|
||||
ent->state = verify_u32(litToOffsetVal[lit_id] + offset + max_len);
|
||||
ent->link = (u32)LINK_INVALID;
|
||||
|
||||
d.pop_front();
|
||||
if (d.empty()) {
|
||||
break;
|
||||
}
|
||||
// now, if there is another value
|
||||
// find a bucket for it and put in 'bucket' and repeat
|
||||
// all we really need to do is find something not in filledBuckets,
|
||||
// ideally something close to bucket
|
||||
// we search backward and forward from bucket, trying to stay as
|
||||
// close as possible.
|
||||
UNUSED bool found = false;
|
||||
int bucket_candidate = 0;
|
||||
for (u32 k = 1; k < numEntries * 2; k++) {
|
||||
bucket_candidate = bucket + (((k & 1) == 0)
|
||||
? (-(int)k / 2) : (k / 2));
|
||||
if (bucket_candidate < 0 ||
|
||||
(size_t)bucket_candidate >= numEntries) {
|
||||
continue;
|
||||
}
|
||||
if (!filledBuckets.test(bucket_candidate)) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
assert(found);
|
||||
bucket = bucket_candidate;
|
||||
ent->link = bucket;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
size_t maxMaskLen(const vector<hwlmLiteral> &lits) {
|
||||
size_t rv = 0;
|
||||
vector<hwlmLiteral>::const_iterator it, ite;
|
||||
for (it = lits.begin(), ite = lits.end(); it != ite; ++it) {
|
||||
rv = max(rv, it->msk.size());
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
pair<u8 *, size_t>
|
||||
fdrBuildTableStreaming(const vector<hwlmLiteral> &lits,
|
||||
hwlmStreamingControl *stream_control) {
|
||||
// refuse to compile if we are forced to have smaller than minimum
|
||||
// history required for long-literal support, full stop
|
||||
// otherwise, choose the maximum of the preferred history quantity
|
||||
// (currently a fairly extravagant 32) or the already used history
|
||||
// quantity - subject to the limitation of stream_control->history_max
|
||||
|
||||
const size_t MIN_HISTORY_REQUIRED = 32;
|
||||
|
||||
if (MIN_HISTORY_REQUIRED > stream_control->history_max) {
|
||||
throw std::logic_error("Cannot set history to minimum history required");
|
||||
}
|
||||
|
||||
size_t max_len =
|
||||
MIN(stream_control->history_max,
|
||||
MAX(MIN_HISTORY_REQUIRED, stream_control->history_min));
|
||||
assert(max_len >= MIN_HISTORY_REQUIRED);
|
||||
size_t max_mask_len = maxMaskLen(lits);
|
||||
|
||||
vector<hwlmLiteral> long_lits;
|
||||
if (!setupLongLits(lits, long_lits, max_len) || false) {
|
||||
// "Don't need to do anything" path, not really a fail
|
||||
DEBUG_PRINTF("Streaming literal path produces no table\n");
|
||||
|
||||
// we want enough history to manage the longest literal and the longest
|
||||
// mask.
|
||||
stream_control->literal_history_required =
|
||||
max(maxLen(lits), max_mask_len) - 1;
|
||||
stream_control->literal_stream_state_required = 0;
|
||||
return make_pair(nullptr, size_t{0});
|
||||
}
|
||||
|
||||
// Ensure that we have enough room for the longest mask.
|
||||
if (max_mask_len) {
|
||||
max_len = max(max_len, max_mask_len - 1);
|
||||
}
|
||||
|
||||
u32 boundary[MAX_MODES];
|
||||
u32 positions[MAX_MODES];
|
||||
u32 hashEntries[MAX_MODES];
|
||||
|
||||
analyzeLits(long_lits, max_len, boundary, positions, hashEntries);
|
||||
|
||||
// first assess the size and find our caseless threshold
|
||||
size_t headerSize = ROUNDUP_16(sizeof(FDRSTableHeader));
|
||||
|
||||
size_t litTabOffset = headerSize;
|
||||
|
||||
size_t litTabNumEntries = long_lits.size() + 1;
|
||||
size_t litTabSize = ROUNDUP_16(litTabNumEntries * sizeof(FDRSLiteral));
|
||||
|
||||
size_t wholeLitTabOffset = litTabOffset + litTabSize;
|
||||
size_t totalWholeLitTabSize = ROUNDUP_16(positions[CASEFUL] +
|
||||
positions[CASELESS]);
|
||||
|
||||
size_t htOffset[MAX_MODES];
|
||||
size_t htSize[MAX_MODES];
|
||||
|
||||
htOffset[CASEFUL] = wholeLitTabOffset + totalWholeLitTabSize;
|
||||
htSize[CASEFUL] = hashEntries[CASEFUL] * sizeof(FDRSHashEntry);
|
||||
htOffset[CASELESS] = htOffset[CASEFUL] + htSize[CASEFUL];
|
||||
htSize[CASELESS] = hashEntries[CASELESS] * sizeof(FDRSHashEntry);
|
||||
|
||||
size_t tabSize = ROUNDUP_16(htOffset[CASELESS] + htSize[CASELESS]);
|
||||
|
||||
// need to add +2 to both of these to allow space for the actual largest
|
||||
// value as well as handling the fact that we add one to the space when
|
||||
// storing out a position to allow zero to mean "no stream state value"
|
||||
u8 streamBits[MAX_MODES];
|
||||
streamBits[CASEFUL] = lg2(roundUpToPowerOfTwo(positions[CASEFUL] + 2));
|
||||
streamBits[CASELESS] = lg2(roundUpToPowerOfTwo(positions[CASELESS] + 2));
|
||||
u32 tot_state_bytes = (streamBits[CASEFUL] + streamBits[CASELESS] + 7) / 8;
|
||||
|
||||
u8 * secondaryTable = (u8 *)aligned_zmalloc(tabSize);
|
||||
assert(secondaryTable); // otherwise would have thrown std::bad_alloc
|
||||
|
||||
// then fill it in
|
||||
u8 * ptr = secondaryTable;
|
||||
FDRSTableHeader * header = (FDRSTableHeader *)ptr;
|
||||
// fill in header
|
||||
header->pseudoEngineID = (u32)0xffffffff;
|
||||
header->N = verify_u8(max_len); // u8 so doesn't matter; won't go > 255
|
||||
for (u32 m = CASEFUL; m < MAX_MODES; ++m) {
|
||||
header->boundary[m] = boundary[m];
|
||||
header->hashOffset[m] = verify_u32(htOffset[m]);
|
||||
header->hashNBits[m] = lg2(hashEntries[m]);
|
||||
header->streamStateBits[m] = streamBits[m];
|
||||
}
|
||||
assert(tot_state_bytes < sizeof(u64a));
|
||||
header->streamStateBytes = verify_u8(tot_state_bytes); // u8
|
||||
|
||||
ptr += headerSize;
|
||||
|
||||
// now fill in the rest
|
||||
|
||||
FDRSLiteral * litTabPtr = (FDRSLiteral *)ptr;
|
||||
ptr += litTabSize;
|
||||
|
||||
map<u32, u32> litToOffsetVal;
|
||||
for (vector<hwlmLiteral>::const_iterator i = long_lits.begin(),
|
||||
e = long_lits.end();
|
||||
i != e; ++i) {
|
||||
u32 entry = verify_u32(i - long_lits.begin());
|
||||
u32 offset = verify_u32(ptr - secondaryTable);
|
||||
|
||||
// point the table entry to the string location
|
||||
litTabPtr[entry].offset = offset;
|
||||
|
||||
litToOffsetVal[entry] = offset;
|
||||
|
||||
// copy the string into the string location
|
||||
memcpy(ptr, i->s.c_str(), i->s.size());
|
||||
|
||||
ptr += i->s.size(); // and the string location
|
||||
}
|
||||
|
||||
// fill in final lit table entry with current ptr (serves as end value)
|
||||
litTabPtr[long_lits.size()].offset = verify_u32(ptr - secondaryTable);
|
||||
|
||||
// fill hash tables
|
||||
ptr = secondaryTable + htOffset[CASEFUL];
|
||||
for (u32 m = CASEFUL; m < MAX_MODES; ++m) {
|
||||
fillHashes(long_lits, max_len, (FDRSHashEntry *)ptr, hashEntries[m],
|
||||
(MODES)m, litToOffsetVal);
|
||||
ptr += htSize[m];
|
||||
}
|
||||
|
||||
// tell the world what we did
|
||||
stream_control->literal_history_required = max_len;
|
||||
stream_control->literal_stream_state_required = tot_state_bytes;
|
||||
return make_pair(secondaryTable, tabSize);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
152
src/fdr/fdr_streaming_internal.h
Normal file
152
src/fdr/fdr_streaming_internal.h
Normal file
@@ -0,0 +1,152 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef FDR_STREAMING_INTERNAL_H
|
||||
#define FDR_STREAMING_INTERNAL_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "fdr_internal.h"
|
||||
#include "util/unaligned.h"
|
||||
|
||||
// tertiary table:
|
||||
// a header (FDRSTableHeader)
|
||||
// long_lits.size()+1 entries holding an offset to the string in the
|
||||
// 'whole literal table' (FDRSLiteral structure)
|
||||
// the whole literal table - every string packed in (freeform)
|
||||
// hash table (caseful) (FDRSHashEntry)
|
||||
// hash table (caseless) (FDRSHashEntry)
|
||||
|
||||
typedef enum {
|
||||
CASEFUL = 0,
|
||||
CASELESS = 1,
|
||||
MAX_MODES = 2
|
||||
} MODES;
|
||||
|
||||
// We have one of these structures hanging off the 'link' of our secondary
|
||||
// FDR table that handles streaming strings
|
||||
struct FDRSTableHeader {
|
||||
u32 pseudoEngineID; // set to 0xffffffff to indicate this isn't an FDR
|
||||
|
||||
// string id one beyond the maximum entry for this type of literal
|
||||
// boundary[CASEFUL] is the end of the caseful literals
|
||||
// boundary[CASELESS] is the end of the caseless literals and one beyond
|
||||
// the largest literal id (the size of the littab)
|
||||
u32 boundary[MAX_MODES];
|
||||
|
||||
// offsets are 0 if no such table exists
|
||||
// offset from the base of the tertiary structure to the hash table
|
||||
u32 hashOffset[MAX_MODES];
|
||||
u32 hashNBits[MAX_MODES]; // lg2 of the size of the hash table
|
||||
|
||||
u8 streamStateBits[MAX_MODES];
|
||||
u8 streamStateBytes; // total size of packed stream state in bytes
|
||||
u8 N; // prefix lengths
|
||||
u16 pad;
|
||||
};
|
||||
|
||||
// One of these structures per literal entry in our secondary FDR table.
|
||||
struct FDRSLiteral {
|
||||
u32 offset;
|
||||
// potentially - another u32 to point to the 'next lesser included literal'
|
||||
// which would be a literal that overlaps this one in such a way that a
|
||||
// failure to match _this_ literal can leave us in a state that we might
|
||||
// still match that literal. Offset information might also be called for,
|
||||
// in which case we might be wanting to use a FDRSLiteralOffset
|
||||
};
|
||||
|
||||
typedef u32 FDRSLiteralOffset;
|
||||
|
||||
#define LINK_INVALID 0xffffffff
|
||||
|
||||
// One of these structures per hash table entry in our secondary FDR table
|
||||
struct FDRSHashEntry {
|
||||
u64a bitfield;
|
||||
FDRSLiteralOffset state;
|
||||
u32 link;
|
||||
};
|
||||
|
||||
static really_inline
|
||||
u32 get_start_lit_idx(const struct FDRSTableHeader * h, MODES m) {
|
||||
return m == CASEFUL ? 0 : h->boundary[m-1];
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u32 get_end_lit_idx(const struct FDRSTableHeader * h, MODES m) {
|
||||
return h->boundary[m];
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const struct FDRSLiteral * getLitTab(const struct FDRSTableHeader * h) {
|
||||
return (const struct FDRSLiteral *) (((const u8 *)h) +
|
||||
ROUNDUP_16(sizeof(struct FDRSTableHeader)));
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u32 getBaseOffsetOfLits(const struct FDRSTableHeader * h, MODES m) {
|
||||
return getLitTab(h)[get_start_lit_idx(h, m)].offset;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u32 packStateVal(const struct FDRSTableHeader * h, MODES m, u32 v) {
|
||||
return v - getBaseOffsetOfLits(h, m) + 1;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u32 unpackStateVal(const struct FDRSTableHeader * h, MODES m, u32 v) {
|
||||
return v + getBaseOffsetOfLits(h, m) - 1;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u32 has_bit(const struct FDRSHashEntry * ent, u32 bit) {
|
||||
return (ent->bitfield >> bit) & 0x1;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u32 streaming_hash(const u8 *ptr, UNUSED size_t len, MODES mode) {
|
||||
const u64a CASEMASK = 0xdfdfdfdfdfdfdfdfULL;
|
||||
const u64a MULTIPLIER = 0x0b4e0ef37bc32127ULL;
|
||||
assert(len >= 32);
|
||||
|
||||
u64a v1 = unaligned_load_u64a(ptr);
|
||||
u64a v2 = unaligned_load_u64a(ptr + 8);
|
||||
u64a v3 = unaligned_load_u64a(ptr + 16);
|
||||
if (mode == CASELESS) {
|
||||
v1 &= CASEMASK;
|
||||
v2 &= CASEMASK;
|
||||
v3 &= CASEMASK;
|
||||
}
|
||||
v1 *= MULTIPLIER;
|
||||
v2 *= (MULTIPLIER*MULTIPLIER);
|
||||
v3 *= (MULTIPLIER*MULTIPLIER*MULTIPLIER);
|
||||
v1 >>= 32;
|
||||
v2 >>= 32;
|
||||
v3 >>= 32;
|
||||
return v1 ^ v2 ^ v3;
|
||||
}
|
||||
|
||||
#endif
|
365
src/fdr/fdr_streaming_runtime.h
Normal file
365
src/fdr/fdr_streaming_runtime.h
Normal file
@@ -0,0 +1,365 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef FDR_STREAMING_RUNTIME_H
|
||||
#define FDR_STREAMING_RUNTIME_H
|
||||
|
||||
#include "fdr_streaming_internal.h"
|
||||
#include "util/partial_store.h"
|
||||
|
||||
static really_inline
|
||||
const struct FDRSTableHeader * getSHDR(const struct FDR * fdr) {
|
||||
const u8 * linkPtr = ((const u8 *)fdr) + fdr->link;
|
||||
// test if it's not really a engineID, but a 'pseudo engine id'
|
||||
assert(*(const u32 *)linkPtr == 0xffffffff);
|
||||
assert(linkPtr);
|
||||
return (const struct FDRSTableHeader *)linkPtr;
|
||||
}
|
||||
|
||||
// Reads from stream state and unpacks values into stream state table.
|
||||
static really_inline
|
||||
void getStreamStates(const struct FDRSTableHeader * streamingTable,
|
||||
const u8 * stream_state, u32 * table) {
|
||||
assert(streamingTable);
|
||||
assert(stream_state);
|
||||
assert(table);
|
||||
|
||||
u8 ss_bytes = streamingTable->streamStateBytes;
|
||||
u8 ssb = streamingTable->streamStateBits[CASEFUL];
|
||||
UNUSED u8 ssb_nc = streamingTable->streamStateBits[CASELESS];
|
||||
assert(ss_bytes == (ssb + ssb_nc + 7) / 8);
|
||||
|
||||
#if defined(ARCH_32_BIT)
|
||||
// On 32-bit hosts, we may be able to avoid having to do any u64a
|
||||
// manipulation at all.
|
||||
if (ss_bytes <= 4) {
|
||||
u32 ssb_mask = (1U << ssb) - 1;
|
||||
u32 streamVal = partial_load_u32(stream_state, ss_bytes);
|
||||
table[CASEFUL] = (u32)(streamVal & ssb_mask);
|
||||
table[CASELESS] = (u32)(streamVal >> ssb);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
u64a ssb_mask = (1ULL << ssb) - 1;
|
||||
u64a streamVal = partial_load_u64a(stream_state, ss_bytes);
|
||||
table[CASEFUL] = (u32)(streamVal & ssb_mask);
|
||||
table[CASELESS] = (u32)(streamVal >> (u64a)ssb);
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
// Defensive checking (used in assert) that these table values don't overflow
|
||||
// outside the range available.
|
||||
static really_inline UNUSED
|
||||
u32 streamingTableOverflow(u32 * table, u8 ssb, u8 ssb_nc) {
|
||||
u32 ssb_mask = (1ULL << (ssb)) - 1;
|
||||
if (table[CASEFUL] & ~ssb_mask) {
|
||||
return 1;
|
||||
}
|
||||
u32 ssb_nc_mask = (1ULL << (ssb_nc)) - 1;
|
||||
if (table[CASELESS] & ~ssb_nc_mask) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Reads from stream state table and packs values into stream state.
|
||||
static really_inline
|
||||
void setStreamStates(const struct FDRSTableHeader * streamingTable,
|
||||
u8 * stream_state, u32 * table) {
|
||||
assert(streamingTable);
|
||||
assert(stream_state);
|
||||
assert(table);
|
||||
|
||||
u8 ss_bytes = streamingTable->streamStateBytes;
|
||||
u8 ssb = streamingTable->streamStateBits[CASEFUL];
|
||||
UNUSED u8 ssb_nc = streamingTable->streamStateBits[CASELESS];
|
||||
assert(ss_bytes == (ssb + ssb_nc + 7) / 8);
|
||||
assert(!streamingTableOverflow(table, ssb, ssb_nc));
|
||||
|
||||
#if defined(ARCH_32_BIT)
|
||||
// On 32-bit hosts, we may be able to avoid having to do any u64a
|
||||
// manipulation at all.
|
||||
if (ss_bytes <= 4) {
|
||||
u32 stagingStreamState = table[CASEFUL];
|
||||
stagingStreamState |= (table[CASELESS] << ssb);
|
||||
|
||||
partial_store_u32(stream_state, stagingStreamState, ss_bytes);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
u64a stagingStreamState = (u64a)table[CASEFUL];
|
||||
stagingStreamState |= (u64a)table[CASELESS] << ((u64a)ssb);
|
||||
partial_store_u64a(stream_state, stagingStreamState, ss_bytes);
|
||||
}
|
||||
|
||||
u32 fdrStreamStateActive(const struct FDR * fdr, const u8 * stream_state) {
|
||||
if (!stream_state) {
|
||||
return 0;
|
||||
}
|
||||
const struct FDRSTableHeader * streamingTable = getSHDR(fdr);
|
||||
u8 ss_bytes = streamingTable->streamStateBytes;
|
||||
|
||||
// We just care if there are any bits set, and the test below is faster
|
||||
// than a partial_load_u64a (especially on 32-bit hosts).
|
||||
for (u32 i = 0; i < ss_bytes; i++) {
|
||||
if (*stream_state) {
|
||||
return 1;
|
||||
}
|
||||
++stream_state;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// binary search for the literal index that contains the current state
|
||||
static really_inline
|
||||
u32 findLitTabEntry(const struct FDRSTableHeader * streamingTable,
|
||||
u32 stateValue, MODES m) {
|
||||
const struct FDRSLiteral * litTab = getLitTab(streamingTable);
|
||||
u32 lo = get_start_lit_idx(streamingTable, m);
|
||||
u32 hi = get_end_lit_idx(streamingTable, m);
|
||||
|
||||
// Now move stateValue back by one so that we're looking for the
|
||||
// litTab entry that includes it the string, not the one 'one past' it
|
||||
stateValue -= 1;
|
||||
assert(lo != hi);
|
||||
assert(litTab[lo].offset <= stateValue);
|
||||
assert(litTab[hi].offset > stateValue);
|
||||
|
||||
// binary search to find the entry e such that:
|
||||
// litTab[e].offsetToLiteral <= stateValue < litTab[e+1].offsetToLiteral
|
||||
while (lo + 1 < hi) {
|
||||
u32 mid = (lo + hi) / 2;
|
||||
if (litTab[mid].offset <= stateValue) {
|
||||
lo = mid;
|
||||
} else { //(litTab[mid].offset > stateValue) {
|
||||
hi = mid;
|
||||
}
|
||||
}
|
||||
assert(litTab[lo].offset <= stateValue);
|
||||
assert(litTab[hi].offset > stateValue);
|
||||
return lo;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void fdrUnpackStateMode(struct FDR_Runtime_Args *a,
|
||||
const struct FDRSTableHeader *streamingTable,
|
||||
const struct FDRSLiteral * litTab,
|
||||
const u32 *state_table,
|
||||
const MODES m) {
|
||||
if (!state_table[m]) {
|
||||
return;
|
||||
}
|
||||
|
||||
u32 stateValue = unpackStateVal(streamingTable, m, state_table[m]);
|
||||
u32 idx = findLitTabEntry(streamingTable, stateValue, m);
|
||||
size_t found_offset = litTab[idx].offset;
|
||||
const u8 * found_buf = found_offset + (const u8 *)streamingTable;
|
||||
size_t found_sz = stateValue - found_offset;
|
||||
if (m == CASEFUL) {
|
||||
a->buf_history = found_buf;
|
||||
a->len_history = found_sz;
|
||||
} else {
|
||||
a->buf_history_nocase = found_buf;
|
||||
a->len_history_nocase = found_sz;
|
||||
}
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void fdrUnpackState(const struct FDR * fdr, struct FDR_Runtime_Args * a,
|
||||
const u8 * stream_state) {
|
||||
// nothing to do if there's no stream state for the case
|
||||
if (!stream_state) {
|
||||
return;
|
||||
}
|
||||
|
||||
const struct FDRSTableHeader * streamingTable = getSHDR(fdr);
|
||||
const struct FDRSLiteral * litTab = getLitTab(streamingTable);
|
||||
|
||||
u32 state_table[MAX_MODES];
|
||||
getStreamStates(streamingTable, stream_state, state_table);
|
||||
|
||||
fdrUnpackStateMode(a, streamingTable, litTab, state_table, CASEFUL);
|
||||
fdrUnpackStateMode(a, streamingTable, litTab, state_table, CASELESS);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u32 do_single_confirm(const struct FDRSTableHeader * streamingTable,
|
||||
const struct FDR_Runtime_Args * a, u32 hashState, MODES m) {
|
||||
const struct FDRSLiteral * litTab = getLitTab(streamingTable);
|
||||
u32 idx = findLitTabEntry(streamingTable, hashState, m);
|
||||
size_t found_offset = litTab[idx].offset;
|
||||
const u8 * s1 = found_offset + (const u8 *)streamingTable;
|
||||
assert(hashState > found_offset);
|
||||
size_t l1 = hashState - found_offset;
|
||||
const u8 * buf = a->buf;
|
||||
size_t len = a->len;
|
||||
const char nocase = m != CASEFUL;
|
||||
|
||||
if (l1 > len) {
|
||||
const u8 * hist = nocase ? a->buf_history_nocase : a->buf_history;
|
||||
size_t hist_len = nocase ? a->len_history_nocase : a->len_history;
|
||||
|
||||
if (l1 > len+hist_len) {
|
||||
return 0; // Break out - not enough total history
|
||||
}
|
||||
|
||||
size_t overhang = l1 - len;
|
||||
assert(overhang <= hist_len);
|
||||
|
||||
if (cmpForward(hist + hist_len - overhang, s1, overhang, nocase)) {
|
||||
return 0;
|
||||
}
|
||||
s1 += overhang;
|
||||
l1 -= overhang;
|
||||
}
|
||||
// if we got here, we don't need history or we compared ok out of history
|
||||
assert(l1 <= len);
|
||||
|
||||
if (cmpForward(buf + len - l1, s1, l1, nocase)) {
|
||||
return 0;
|
||||
}
|
||||
return hashState; // our new state
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void fdrFindStreamingHash(const struct FDR_Runtime_Args *a,
|
||||
const struct FDRSTableHeader *streamingTable,
|
||||
u8 hash_len, u32 *hashes) {
|
||||
u8 tempbuf[128];
|
||||
const u8 *base;
|
||||
if (hash_len > a->len) {
|
||||
assert(hash_len <= 128);
|
||||
size_t overhang = hash_len - a->len;
|
||||
assert(overhang <= a->len_history);
|
||||
memcpy(tempbuf, a->buf_history + a->len_history - overhang, overhang);
|
||||
memcpy(tempbuf + overhang, a->buf, a->len);
|
||||
base = tempbuf;
|
||||
} else {
|
||||
assert(hash_len <= a->len);
|
||||
base = a->buf + a->len - hash_len;
|
||||
}
|
||||
|
||||
if (streamingTable->hashNBits[CASEFUL]) {
|
||||
hashes[CASEFUL] = streaming_hash(base, hash_len, CASEFUL);
|
||||
}
|
||||
if (streamingTable->hashNBits[CASELESS]) {
|
||||
hashes[CASELESS] = streaming_hash(base, hash_len, CASELESS);
|
||||
}
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const struct FDRSHashEntry *getEnt(const struct FDRSTableHeader *streamingTable,
|
||||
u32 h, const MODES m) {
|
||||
u32 nbits = streamingTable->hashNBits[m];
|
||||
if (!nbits) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
u32 h_ent = h & ((1 << nbits) - 1);
|
||||
u32 h_low = (h >> nbits) & 63;
|
||||
|
||||
const struct FDRSHashEntry *tab =
|
||||
(const struct FDRSHashEntry *)((const u8 *)streamingTable
|
||||
+ streamingTable->hashOffset[m]);
|
||||
const struct FDRSHashEntry *ent = tab + h_ent;
|
||||
|
||||
if (!has_bit(ent, h_low)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return ent;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void fdrPackStateMode(u32 *state_table, const struct FDR_Runtime_Args *a,
|
||||
const struct FDRSTableHeader *streamingTable,
|
||||
const struct FDRSHashEntry *ent, const MODES m) {
|
||||
assert(ent);
|
||||
assert(streamingTable->hashNBits[m]);
|
||||
|
||||
const struct FDRSHashEntry *tab =
|
||||
(const struct FDRSHashEntry *)((const u8 *)streamingTable
|
||||
+ streamingTable->hashOffset[m]);
|
||||
|
||||
while (1) {
|
||||
u32 tmp = 0;
|
||||
if ((tmp = do_single_confirm(streamingTable, a, ent->state, m))) {
|
||||
state_table[m] = packStateVal(streamingTable, m, tmp);
|
||||
break;
|
||||
}
|
||||
if (ent->link == LINK_INVALID) {
|
||||
break;
|
||||
}
|
||||
ent = tab + ent->link;
|
||||
}
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void fdrPackState(const struct FDR *fdr, const struct FDR_Runtime_Args *a,
|
||||
u8 *stream_state) {
|
||||
// nothing to do if there's no stream state for the case
|
||||
if (!stream_state) {
|
||||
return;
|
||||
}
|
||||
|
||||
// get pointers to the streamer FDR and the tertiary structure
|
||||
const struct FDRSTableHeader *streamingTable = getSHDR(fdr);
|
||||
|
||||
assert(streamingTable->N);
|
||||
|
||||
u32 state_table[MAX_MODES] = {0, 0};
|
||||
|
||||
// if we don't have enough history, we don't need to do anything
|
||||
if (streamingTable->N <= a->len + a->len_history) {
|
||||
u32 hashes[MAX_MODES] = {0, 0};
|
||||
|
||||
fdrFindStreamingHash(a, streamingTable, streamingTable->N, hashes);
|
||||
|
||||
const struct FDRSHashEntry *ent_ful = getEnt(streamingTable,
|
||||
hashes[CASEFUL], CASEFUL);
|
||||
const struct FDRSHashEntry *ent_less = getEnt(streamingTable,
|
||||
hashes[CASELESS], CASELESS);
|
||||
|
||||
if (ent_ful) {
|
||||
fdrPackStateMode(state_table, a, streamingTable, ent_ful,
|
||||
CASEFUL);
|
||||
}
|
||||
|
||||
if (ent_less) {
|
||||
fdrPackStateMode(state_table, a, streamingTable, ent_less,
|
||||
CASELESS);
|
||||
}
|
||||
}
|
||||
|
||||
setStreamStates(streamingTable, stream_state, state_table);
|
||||
}
|
||||
|
||||
#endif
|
222
src/fdr/flood_compile.cpp
Normal file
222
src/fdr/flood_compile.cpp
Normal file
@@ -0,0 +1,222 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "fdr.h"
|
||||
#include "fdr_internal.h"
|
||||
#include "fdr_confirm.h"
|
||||
#include "fdr_compile_internal.h"
|
||||
#include "fdr_engine_description.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/alloc.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/charreach.h"
|
||||
#include "util/compare.h"
|
||||
#include "util/ue2string.h"
|
||||
#include "util/verify_types.h"
|
||||
|
||||
#include <cstring>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
namespace {
|
||||
struct FloodComparator {
|
||||
bool operator()(const FDRFlood &f1, const FDRFlood &f2) const {
|
||||
return std::memcmp(&f1, &f2, sizeof(f1)) < 0;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
static
|
||||
bool isDifferent(u8 oldC, u8 c, bool caseless) {
|
||||
if (caseless) {
|
||||
return mytolower(oldC) != mytolower(c);
|
||||
} else {
|
||||
return oldC != c;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void updateFloodSuffix(vector<FDRFlood> &tmpFlood, u8 c, u32 suffix) {
|
||||
FDRFlood &fl = tmpFlood[c];
|
||||
fl.suffix = MAX(fl.suffix, suffix + 1);
|
||||
DEBUG_PRINTF("Updated Flood Suffix for char '%c' to %u\n", c, fl.suffix);
|
||||
}
|
||||
|
||||
static
|
||||
void addFlood(vector<FDRFlood> &tmpFlood, u8 c, const hwlmLiteral &lit,
|
||||
u32 suffix) {
|
||||
FDRFlood &fl = tmpFlood[c];
|
||||
fl.suffix = MAX(fl.suffix, suffix + 1);
|
||||
if (fl.idCount < FDR_FLOOD_MAX_IDS) {
|
||||
fl.ids[fl.idCount] = lit.id;
|
||||
fl.allGroups |= lit.groups;
|
||||
fl.groups[fl.idCount] = lit.groups;
|
||||
fl.len[fl.idCount] = suffix;
|
||||
// when idCount gets to max_ids this flood no longer happens
|
||||
// only incremented one more time to avoid arithmetic overflow
|
||||
DEBUG_PRINTF("Added Flood for char '%c' suffix=%u len[%hu]=%u\n",
|
||||
c, fl.suffix, fl.idCount, suffix);
|
||||
fl.idCount++;
|
||||
}
|
||||
}
|
||||
|
||||
pair<u8 *, size_t> setupFDRFloodControl(const vector<hwlmLiteral> &lits,
|
||||
const EngineDescription &eng) {
|
||||
vector<FDRFlood> tmpFlood(N_CHARS);
|
||||
u32 default_suffix = eng.getDefaultFloodSuffixLength();
|
||||
|
||||
// zero everything to avoid spurious distinctions in the compares
|
||||
memset(&tmpFlood[0], 0, N_CHARS * sizeof(FDRFlood));
|
||||
|
||||
for (u32 c = 0; c < N_CHARS; c++) {
|
||||
tmpFlood[c].suffix = default_suffix;
|
||||
}
|
||||
|
||||
for (const auto &lit : lits) {
|
||||
DEBUG_PRINTF("lit: '%s'%s\n", escapeString(lit.s).c_str(),
|
||||
lit.nocase ? " (nocase)" : "");
|
||||
u32 litSize = verify_u32(lit.s.size());
|
||||
u32 maskSize = (u32)lit.msk.size();
|
||||
u8 c = lit.s[litSize - 1];
|
||||
bool nocase = ourisalpha(c) ? lit.nocase : false;
|
||||
|
||||
if (nocase && maskSize && (lit.msk[maskSize - 1] & CASE_BIT)) {
|
||||
c = (lit.cmp[maskSize - 1] & CASE_BIT) ? mytolower(c) : mytoupper(c);
|
||||
nocase = false;
|
||||
}
|
||||
|
||||
u32 iEnd = MAX(litSize, maskSize);
|
||||
u32 upSuffix = iEnd; // upSuffix is used as an upper case suffix length
|
||||
// for case-less, or as a suffix length for case-sensitive;
|
||||
u32 loSuffix = iEnd; // loSuffix used only for case-less as a lower case suffix
|
||||
// length;
|
||||
|
||||
for (u32 i = 0; i < iEnd; i++) {
|
||||
if (i < litSize) {
|
||||
if (isDifferent(c, lit.s[litSize - i - 1], lit.nocase)) {
|
||||
DEBUG_PRINTF("non-flood char in literal[%u] %c != %c\n",
|
||||
i, c, lit.s[litSize - i - 1]);
|
||||
upSuffix = MIN(upSuffix, i);
|
||||
loSuffix = MIN(loSuffix, i); // makes sense only for case-less
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (i < maskSize) {
|
||||
u8 m = lit.msk[maskSize - i - 1];
|
||||
u8 cm = lit.cmp[maskSize - i - 1] & m;
|
||||
if(nocase) {
|
||||
if ((mytoupper(c) & m) != cm) {
|
||||
DEBUG_PRINTF("non-flood char in mask[%u] %c != %c\n",
|
||||
i, mytoupper(c), cm);
|
||||
upSuffix = MIN(upSuffix, i);
|
||||
}
|
||||
if ((mytolower(c) & m) != cm) {
|
||||
DEBUG_PRINTF("non-flood char in mask[%u] %c != %c\n",
|
||||
i, mytolower(c), cm);
|
||||
loSuffix = MIN(loSuffix, i);
|
||||
}
|
||||
if (loSuffix != iEnd && upSuffix != iEnd) {
|
||||
break;
|
||||
}
|
||||
} else if ((c & m) != cm) {
|
||||
DEBUG_PRINTF("non-flood char in mask[%u] %c != %c\n", i, c, cm);
|
||||
upSuffix = MIN(upSuffix, i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if(upSuffix != iEnd) {
|
||||
updateFloodSuffix(tmpFlood, nocase ? mytoupper(c) : c, upSuffix);
|
||||
} else {
|
||||
addFlood(tmpFlood, nocase ? mytoupper(c) : c, lit, upSuffix);
|
||||
}
|
||||
if (nocase) {
|
||||
if(loSuffix != iEnd) {
|
||||
updateFloodSuffix(tmpFlood, mytolower(c), loSuffix);
|
||||
} else {
|
||||
addFlood(tmpFlood, mytolower(c), lit, loSuffix);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
for (u32 i = 0; i < N_CHARS; i++) {
|
||||
FDRFlood &fl = tmpFlood[i];
|
||||
if (!fl.idCount) {
|
||||
continue;
|
||||
}
|
||||
|
||||
printf("i is %02x fl->idCount is %hd fl->suffix is %d fl->allGroups is "
|
||||
"%016llx\n", i, fl.idCount, fl.suffix, fl.allGroups);
|
||||
for (u32 j = 0; j < fl.idCount; j++) {
|
||||
printf("j is %d fl.groups[j] %016llx fl.len[j] %d \n", j,
|
||||
fl.groups[j], fl.len[j]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
map<FDRFlood, CharReach, FloodComparator> flood2chars;
|
||||
for (u32 i = 0; i < N_CHARS; i++) {
|
||||
FDRFlood fl = tmpFlood[i];
|
||||
flood2chars[fl].set(i);
|
||||
}
|
||||
|
||||
u32 nDistinctFloods = flood2chars.size();
|
||||
size_t floodHeaderSize = sizeof(u32) * N_CHARS;
|
||||
size_t floodStructSize = sizeof(FDRFlood) * nDistinctFloods;
|
||||
size_t totalSize = ROUNDUP_16(floodHeaderSize + floodStructSize);
|
||||
u8 *buf = (u8 *)aligned_zmalloc(totalSize);
|
||||
assert(buf); // otherwise would have thrown std::bad_alloc
|
||||
|
||||
u32 *floodHeader = (u32 *)buf;
|
||||
FDRFlood *layoutFlood = (FDRFlood * )(buf + floodHeaderSize);
|
||||
|
||||
u32 currentFloodIndex = 0;
|
||||
for (const auto &m : flood2chars) {
|
||||
const FDRFlood &fl = m.first;
|
||||
const CharReach &cr = m.second;
|
||||
layoutFlood[currentFloodIndex] = fl;
|
||||
for (size_t c = cr.find_first(); c != cr.npos; c = cr.find_next(c)) {
|
||||
floodHeader[c] = currentFloodIndex;
|
||||
}
|
||||
currentFloodIndex++;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("made a flood structure with %zu + %zu = %zu\n",
|
||||
floodHeaderSize, floodStructSize, totalSize);
|
||||
|
||||
return make_pair((u8 *)buf, totalSize);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
347
src/fdr/flood_runtime.h
Normal file
347
src/fdr/flood_runtime.h
Normal file
@@ -0,0 +1,347 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef FLOOD_RUNTIME
|
||||
#define FLOOD_RUNTIME
|
||||
|
||||
#if defined(ARCH_64_BIT)
|
||||
#define FLOOD_64
|
||||
#else
|
||||
#define FLOOD_32
|
||||
#endif
|
||||
#define FLOOD_MINIMUM_SIZE 256
|
||||
#define FLOOD_BACKOFF_START 32
|
||||
|
||||
static really_inline
|
||||
const u8 * nextFloodDetect(const u8 * buf, size_t len, u32 floodBackoff) {
|
||||
// if we don't have a flood at either the start or end,
|
||||
// or have a very small buffer, don't bother with flood detection
|
||||
if (len < FLOOD_MINIMUM_SIZE) {
|
||||
return buf + len;
|
||||
}
|
||||
|
||||
/* entry points in runtime.c prefetch relevant data */
|
||||
#ifndef FLOOD_32
|
||||
u64a x11 = *(const u64a *)ROUNDUP_PTR(buf, 8);
|
||||
u64a x12 = *(const u64a *)ROUNDUP_PTR(buf+8, 8);
|
||||
if (x11 == x12) {
|
||||
return buf + floodBackoff;
|
||||
}
|
||||
u64a x21 = *(const u64a *)ROUNDUP_PTR(buf + len/2, 8);
|
||||
u64a x22 = *(const u64a *)ROUNDUP_PTR(buf + len/2 + 8, 8);
|
||||
if (x21 == x22) {
|
||||
return buf + floodBackoff;
|
||||
}
|
||||
u64a x31 = *(const u64a *)ROUNDUP_PTR(buf + len - 24, 8);
|
||||
u64a x32 = *(const u64a *)ROUNDUP_PTR(buf + len - 16, 8);
|
||||
if (x31 == x32) {
|
||||
return buf + floodBackoff;
|
||||
}
|
||||
#else
|
||||
u32 x11 = *(const u32 *)ROUNDUP_PTR(buf, 4);
|
||||
u32 x12 = *(const u32 *)ROUNDUP_PTR(buf+4, 4);
|
||||
if (x11 == x12) {
|
||||
return buf + floodBackoff;
|
||||
}
|
||||
u32 x21 = *(const u32 *)ROUNDUP_PTR(buf + len/2, 4);
|
||||
u32 x22 = *(const u32 *)ROUNDUP_PTR(buf + len/2 + 4, 4);
|
||||
if (x21 == x22) {
|
||||
return buf + floodBackoff;
|
||||
}
|
||||
u32 x31 = *(const u32 *)ROUNDUP_PTR(buf + len - 12, 4);
|
||||
u32 x32 = *(const u32 *)ROUNDUP_PTR(buf + len - 8, 4);
|
||||
if (x31 == x32) {
|
||||
return buf + floodBackoff;
|
||||
}
|
||||
#endif
|
||||
return buf + len;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const u8 * floodDetect(const struct FDR * fdr,
|
||||
const struct FDR_Runtime_Args * a,
|
||||
const u8 ** ptrPtr,
|
||||
const u8 * tryFloodDetect,
|
||||
u32 * floodBackoffPtr,
|
||||
hwlmcb_rv_t * control,
|
||||
u32 iterBytes) {
|
||||
DEBUG_PRINTF("attempting flood detection at %p\n", tryFloodDetect);
|
||||
const u8 * buf = a->buf;
|
||||
const size_t len = a->len;
|
||||
HWLMCallback cb = a->cb;
|
||||
void * ctxt = a->ctxt;
|
||||
|
||||
const u8 * ptr = *ptrPtr;
|
||||
// tryFloodDetect is never put in places where unconditional
|
||||
// reads a short distance forward or backward here
|
||||
// TODO: rationale for this line needs to be rediscovered!!
|
||||
size_t mainLoopLen = len > iterBytes ? len - iterBytes : 0;
|
||||
const u32 i = ptr - buf;
|
||||
u32 j = i;
|
||||
|
||||
// go from c to our FDRFlood structure
|
||||
u8 c = buf[i];
|
||||
const u8 * fBase = ((const u8 *)fdr) + fdr->floodOffset;
|
||||
u32 fIdx = ((const u32 *)fBase)[c];
|
||||
const struct FDRFlood * fsb = (const struct FDRFlood *)(fBase + sizeof(u32) * 256);
|
||||
const struct FDRFlood * fl = &fsb[fIdx];
|
||||
|
||||
#ifndef FLOOD_32
|
||||
u64a cmpVal = c;
|
||||
cmpVal |= cmpVal << 8;
|
||||
cmpVal |= cmpVal << 16;
|
||||
cmpVal |= cmpVal << 32;
|
||||
u64a probe = *(const u64a *)ROUNDUP_PTR(buf+i, 8);
|
||||
#else
|
||||
u32 cmpVal = c;
|
||||
cmpVal |= cmpVal << 8;
|
||||
cmpVal |= cmpVal << 16;
|
||||
u32 probe = *(const u32 *)ROUNDUP_PTR(buf+i, 4);
|
||||
#endif
|
||||
|
||||
if ((probe != cmpVal) || (fl->idCount >= FDR_FLOOD_MAX_IDS)) {
|
||||
*floodBackoffPtr *= 2;
|
||||
goto floodout;
|
||||
}
|
||||
|
||||
if (i < fl->suffix + 7) {
|
||||
*floodBackoffPtr *= 2;
|
||||
goto floodout;
|
||||
}
|
||||
|
||||
j = i - fl->suffix;
|
||||
|
||||
#ifndef FLOOD_32
|
||||
j -= (u32)((uintptr_t)buf + j) & 0x7; // push j back to yield 8-aligned addrs
|
||||
for (; j + 32 < mainLoopLen; j += 32) {
|
||||
u64a v = *(const u64a *)(buf + j);
|
||||
u64a v2 = *(const u64a *)(buf + j + 8);
|
||||
u64a v3 = *(const u64a *)(buf + j + 16);
|
||||
u64a v4 = *(const u64a *)(buf + j + 24);
|
||||
if ((v4 != cmpVal) || (v3 != cmpVal) || (v2 != cmpVal) || (v != cmpVal)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (; j + 8 < mainLoopLen; j += 8) {
|
||||
u64a v = *(const u64a *)(buf + j);
|
||||
if (v != cmpVal) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
#else
|
||||
j -= (u32)((size_t)buf + j) & 0x3; // push j back to yield 4-aligned addrs
|
||||
for (; j + 16 < mainLoopLen; j += 16) {
|
||||
u32 v = *(const u32 *)(buf + j);
|
||||
u32 v2 = *(const u32 *)(buf + j + 4);
|
||||
u32 v3 = *(const u32 *)(buf + j + 8);
|
||||
u32 v4 = *(const u32 *)(buf + j + 12);
|
||||
if ((v4 != cmpVal) || (v3 != cmpVal) || (v2 != cmpVal) || (v != cmpVal)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (; j + 4 < mainLoopLen; j += 4) {
|
||||
u32 v = *(const u32 *)(buf + j);
|
||||
if (v != cmpVal) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
for (; j < mainLoopLen; j++) {
|
||||
u8 v = *(const u8 *)(buf + j);
|
||||
if (v != c) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (j > i ) {
|
||||
j--; // needed for some reaches
|
||||
u32 itersAhead = (j-i)/iterBytes;
|
||||
u32 floodSize = itersAhead*iterBytes;
|
||||
|
||||
DEBUG_PRINTF("flooding %u size j %u i %u fl->idCount %hu "
|
||||
"*control %016llx fl->allGroups %016llx\n",
|
||||
floodSize, j, i, fl->idCount, *control, fl->allGroups);
|
||||
DEBUG_PRINTF("mainloopLen %zu mainStart ??? mainEnd ??? len %zu\n",
|
||||
mainLoopLen, len);
|
||||
|
||||
if (fl->idCount && (*control & fl->allGroups)) {
|
||||
switch (fl->idCount) {
|
||||
#if !defined(FLOOD_DEBUG)
|
||||
// Carefully unrolled code
|
||||
case 1:
|
||||
for (u32 t = 0; t < floodSize && (*control & fl->allGroups);
|
||||
t += 4) {
|
||||
DEBUG_PRINTF("aaa %u %llx\n", t, fl->groups[0]);
|
||||
u32 len0 = fl->len[0] - 1;
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t + 0 - len0, i + t + 0, fl->ids[0], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t + 1 - len0, i + t + 1, fl->ids[0], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t + 2 - len0, i + t + 2, fl->ids[0], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t + 3 - len0, i + t + 3, fl->ids[0], ctxt);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 4) {
|
||||
u32 len0 = fl->len[0] - 1;
|
||||
u32 len1 = fl->len[1] - 1;
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t - len0, i + t, fl->ids[0], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[1]) {
|
||||
*control = cb(i + t - len1, i + t, fl->ids[1], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[0]) {
|
||||
*control =
|
||||
cb(i + t + 1 - len0, i + t + 1, fl->ids[0], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[1]) {
|
||||
*control = cb(i + t + 1 - len1, i + t + 1, fl->ids[1], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t + 2 - len0, i + t + 2, fl->ids[0], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[1]) {
|
||||
*control = cb(i + t + 2 - len1, i + t + 2, fl->ids[1], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t + 3 - len0, i + t + 3, fl->ids[0], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[1]) {
|
||||
*control = cb(i + t + 3 - len1, i + t + 3, fl->ids[1], ctxt);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 2) {
|
||||
u32 len0 = fl->len[0] - 1;
|
||||
u32 len1 = fl->len[1] - 1;
|
||||
u32 len2 = fl->len[2] - 1;
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t - len0, i + t, fl->ids[0], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[1]) {
|
||||
*control = cb(i + t - len1, i + t, fl->ids[1], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[2]) {
|
||||
*control = cb(i + t - len2, i + t, fl->ids[2], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t + 1 - len0, i + t + 1, fl->ids[0], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[1]) {
|
||||
*control = cb(i + t + 1 - len1, i + t + 1, fl->ids[1], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[2]) {
|
||||
*control = cb(i + t + 1 - len2, i + t + 1, fl->ids[2], ctxt);
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
// slow generalized loop
|
||||
for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 2) {
|
||||
u32 len0 = fl->len[0] - 1;
|
||||
u32 len1 = fl->len[1] - 1;
|
||||
u32 len2 = fl->len[2] - 1;
|
||||
u32 len3 = fl->len[3] - 1;
|
||||
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t - len0, i + t, fl->ids[0], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[1]) {
|
||||
*control = cb(i + t - len1, i + t, fl->ids[1], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[2]) {
|
||||
*control = cb(i + t - len2, i + t, fl->ids[2], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[3]) {
|
||||
*control = cb(i + t - len3, i + t, fl->ids[3], ctxt);
|
||||
}
|
||||
|
||||
for (u32 t2 = 4; t2 < fl->idCount; t2++) {
|
||||
if (*control & fl->groups[t2]) {
|
||||
*control = cb(i + t - (fl->len[t2] - 1), i + t, fl->ids[t2], ctxt);
|
||||
}
|
||||
}
|
||||
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t + 1 - len0, i + t + 1, fl->ids[0], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[1]) {
|
||||
*control = cb(i + t + 1 - len1, i + t + 1, fl->ids[1], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[2]) {
|
||||
*control = cb(i + t + 1 - len2, i + t + 1, fl->ids[2], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[3]) {
|
||||
*control = cb(i + t + 1 - len3, i + t + 1, fl->ids[3], ctxt);
|
||||
}
|
||||
|
||||
for (u32 t2 = 4; t2 < fl->idCount; t2++) {
|
||||
if (*control & fl->groups[t2]) {
|
||||
*control = cb(i + t + 1 - (fl->len[t2] - 1), i + t + 1, fl->ids[t2], ctxt);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
#else
|
||||
// Fallback for debugging
|
||||
default:
|
||||
for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t++) {
|
||||
for (u32 t2 = 0; t2 < fl->idCount; t2++) {
|
||||
if (*control & fl->groups[t2]) {
|
||||
*control = cb(i + t - (fl->len[t2] - 1), i + t, fl->ids[t2], ctxt);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
ptr += floodSize;
|
||||
} else {
|
||||
*floodBackoffPtr *= 2;
|
||||
}
|
||||
|
||||
floodout:
|
||||
if (j + *floodBackoffPtr < mainLoopLen - 128) {
|
||||
tryFloodDetect = buf + MAX(i,j) + *floodBackoffPtr;
|
||||
} else {
|
||||
tryFloodDetect = buf + mainLoopLen; // set so we never do another flood detect
|
||||
}
|
||||
*ptrPtr = ptr;
|
||||
DEBUG_PRINTF("finished flood detection at %p (next check %p)\n",
|
||||
ptr, tryFloodDetect);
|
||||
return tryFloodDetect;
|
||||
}
|
||||
|
||||
#endif
|
244
src/fdr/teddy.c
Normal file
244
src/fdr/teddy.c
Normal file
@@ -0,0 +1,244 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
#include "util/simd_utils.h"
|
||||
#include "util/simd_utils_ssse3.h"
|
||||
|
||||
static const u8 ALIGN_DIRECTIVE p_mask_arr[17][32] = {
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}
|
||||
};
|
||||
|
||||
// Note: p_mask is an output param that initialises a poison mask.
|
||||
UNUSED static really_inline
|
||||
m128 vectoredLoad128(m128 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi,
|
||||
const u8 *buf_history, size_t len_history,
|
||||
const u32 nMasks) {
|
||||
union {
|
||||
u8 val8[16];
|
||||
m128 val128;
|
||||
} u;
|
||||
u.val128 = zeroes128();
|
||||
|
||||
if (ptr >= lo) {
|
||||
u32 avail = (u32)(hi - ptr);
|
||||
if (avail >= 16) {
|
||||
*p_mask = load128((const void*)(p_mask_arr[16] + 16));
|
||||
return loadu128(ptr);
|
||||
}
|
||||
*p_mask = load128((const void*)(p_mask_arr[avail] + 16));
|
||||
for (u32 i = 0; i < avail; i++) {
|
||||
u.val8[i] = ptr[i];
|
||||
}
|
||||
} else {
|
||||
u32 need = MIN((u32)(lo - ptr), MIN(len_history, nMasks - 1));
|
||||
u32 start = (u32)(lo - ptr);
|
||||
u32 i;
|
||||
for (i = start - need; ptr + i < lo; i++) {
|
||||
u.val8[i] = buf_history[len_history - (lo - (ptr + i))];
|
||||
}
|
||||
u32 end = MIN(16, (u32)(hi - ptr));
|
||||
*p_mask = loadu128((const void*)(p_mask_arr[end - start] + 16 - start));
|
||||
for (; i < end; i++) {
|
||||
u.val8[i] = ptr[i];
|
||||
}
|
||||
}
|
||||
|
||||
return u.val128;
|
||||
}
|
||||
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
UNUSED static really_inline
|
||||
m256 vectoredLoad2x128(m256 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi,
|
||||
const u8 *buf_history, size_t len_history,
|
||||
const u32 nMasks) {
|
||||
m128 p_mask128;
|
||||
m256 ret = set2x128(vectoredLoad128(&p_mask128, ptr, lo, hi, buf_history, len_history, nMasks));
|
||||
*p_mask = set2x128(p_mask128);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const u8 ALIGN_AVX_DIRECTIVE p_mask_arr256[33][64] = {
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}
|
||||
};
|
||||
|
||||
|
||||
UNUSED static really_inline
|
||||
m256 vectoredLoad256(m256 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi,
|
||||
const u8 *buf_history, size_t len_history) {
|
||||
union {
|
||||
u8 val8[32];
|
||||
m256 val256;
|
||||
} u;
|
||||
|
||||
if (ptr >= lo) {
|
||||
u32 avail = (u32)(hi - ptr);
|
||||
if (avail >= 32) {
|
||||
*p_mask = load256((const void*)(p_mask_arr256[32] + 32));
|
||||
return loadu256(ptr);
|
||||
}
|
||||
*p_mask = load256((const void*)(p_mask_arr256[avail] + 32));
|
||||
for (u32 i = 0; i < avail; i++) {
|
||||
u.val8[i] = ptr[i];
|
||||
}
|
||||
} else {
|
||||
// need contains "how many chars to pull from history"
|
||||
// calculate based on what we need, what we have in the buffer
|
||||
// and only what we need to make primary confirm work
|
||||
u32 start = (u32)(lo - ptr);
|
||||
u32 i;
|
||||
for (i = start; ptr + i < lo; i++) {
|
||||
u.val8[i] = buf_history[len_history - (lo - (ptr + i))];
|
||||
}
|
||||
u32 end = MIN(32, (u32)(hi - ptr));
|
||||
*p_mask = loadu256((const void*)(p_mask_arr256[end - start] + 32 - start));
|
||||
for (; i < end; i++) {
|
||||
u.val8[i] = ptr[i];
|
||||
}
|
||||
}
|
||||
|
||||
return u.val256;
|
||||
}
|
||||
|
||||
|
||||
#endif // __AVX2__
|
||||
|
||||
#define P0(cnd) unlikely(cnd)
|
||||
|
||||
#include "fdr.h"
|
||||
#include "fdr_internal.h"
|
||||
#include "flood_runtime.h"
|
||||
|
||||
#include "fdr_confirm.h"
|
||||
#include "fdr_confirm_runtime.h"
|
||||
|
||||
#include "fdr_loadval.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "teddy_internal.h"
|
||||
|
||||
#include "teddy_autogen.c"
|
545
src/fdr/teddy_autogen.py
Executable file
545
src/fdr/teddy_autogen.py
Executable file
@@ -0,0 +1,545 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# Copyright (c) 2015, Intel Corporation
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of Intel Corporation nor the names of its contributors
|
||||
# may be used to endorse or promote products derived from this software
|
||||
# without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
import sys
|
||||
from autogen_utils import *
|
||||
from base_autogen import *
|
||||
from string import Template
|
||||
|
||||
class MT(MatcherBase):
|
||||
def produce_confirm(self, iter, var_name, offset, bits, cautious = True):
|
||||
if self.packed:
|
||||
print self.produce_confirm_base(var_name, bits, iter*16 + offset, cautious, enable_confirmless = False, do_bailout = False)
|
||||
else:
|
||||
if self.num_masks == 1:
|
||||
conf_func = "confWithBit1"
|
||||
else:
|
||||
conf_func = "confWithBitMany"
|
||||
|
||||
if cautious:
|
||||
caution_string = "VECTORING"
|
||||
else:
|
||||
caution_string = "NOT_CAUTIOUS"
|
||||
|
||||
print " if (P0(!!%s)) {" % var_name
|
||||
print " do {"
|
||||
if bits == 64:
|
||||
print " bit = findAndClearLSB_64(&%s);" % (var_name)
|
||||
else:
|
||||
print " bit = findAndClearLSB_32(&%s);" % (var_name)
|
||||
print " byte = bit / %d + %d;" % (self.num_buckets, iter*16 + offset)
|
||||
print " idx = bit %% %d;" % self.num_buckets
|
||||
print " cf = confBase[idx];"
|
||||
print " fdrc = (const struct FDRConfirm *)((const u8 *)confBase + cf);"
|
||||
print " if (!(fdrc->groups & *control))"
|
||||
print " continue;"
|
||||
print " %s(fdrc, a, ptr - buf + byte, %s, control, &last_match);" % (conf_func, caution_string)
|
||||
print " } while(P0(!!%s));" % var_name
|
||||
print " if (P0(controlVal == HWLM_TERMINATE_MATCHING)) {"
|
||||
print " *a->groups = controlVal;"
|
||||
print " return HWLM_TERMINATED;"
|
||||
print " }"
|
||||
print " }"
|
||||
|
||||
def produce_needed_temporaries(self, max_iterations):
|
||||
print " m128 p_mask;"
|
||||
for iter in range(0, max_iterations):
|
||||
print " m128 val_%d;" % iter
|
||||
print " m128 val_%d_lo;" % iter
|
||||
print " m128 val_%d_hi;" % iter
|
||||
for x in range(self.num_masks):
|
||||
print " m128 res_%d_%d;" % (iter, x)
|
||||
if x != 0:
|
||||
print " m128 res_shifted_%d_%d;" % (iter, x)
|
||||
print " m128 r_%d;" % iter
|
||||
print "#ifdef ARCH_64_BIT"
|
||||
print " u64a r_%d_lopart;" % iter
|
||||
print " u64a r_%d_hipart;" % iter
|
||||
print "#else"
|
||||
print " u32 r_%d_part1;" % iter
|
||||
print " u32 r_%d_part2;" % iter
|
||||
print " u32 r_%d_part3;" % iter
|
||||
print " u32 r_%d_part4;" % iter
|
||||
print "#endif"
|
||||
|
||||
def produce_one_iteration_state_calc(self, iter, effective_num_iterations,
|
||||
cautious, save_old):
|
||||
if cautious:
|
||||
print " val_%d = vectoredLoad128(&p_mask, ptr + %d, buf, buf+len, a->buf_history, a->len_history, %d);" % (iter, iter*16, self.num_masks)
|
||||
else:
|
||||
print " val_%d = load128(ptr + %d);" % (iter, iter*16)
|
||||
print " val_%d_lo = and128(val_%d, lomask);" % (iter, iter)
|
||||
print " val_%d_hi = rshift2x64(val_%d, 4);" % (iter, iter)
|
||||
print " val_%d_hi = and128(val_%d_hi, lomask);" % (iter, iter)
|
||||
print
|
||||
for x in range(self.num_masks):
|
||||
print Template("""
|
||||
res_${ITER}_${X} = and128(pshufb(maskBase[${X}*2] , val_${ITER}_lo),
|
||||
pshufb(maskBase[${X}*2+1], val_${ITER}_hi));""").substitute(ITER = iter, X = x)
|
||||
if x != 0:
|
||||
if iter == 0:
|
||||
print " res_shifted_%d_%d = palignr(res_%d_%d, res_old_%d, 16-%d);" % (iter, x, iter, x, x, x)
|
||||
else:
|
||||
print " res_shifted_%d_%d = palignr(res_%d_%d, res_%d_%d, 16-%d);" % (iter, x, iter, x, iter-1, x, x)
|
||||
if x != 0 and iter == effective_num_iterations - 1 and save_old:
|
||||
print " res_old_%d = res_%d_%d;" % (x, iter, x)
|
||||
print
|
||||
if cautious:
|
||||
print " r_%d = and128(res_%d_0, p_mask);" % (iter, iter)
|
||||
else:
|
||||
print " r_%d = res_%d_0;" % (iter, iter)
|
||||
for x in range(1, self.num_masks):
|
||||
print " r_%d = and128(r_%d, res_shifted_%d_%d);" % (iter, iter, iter, x)
|
||||
print
|
||||
|
||||
def produce_one_iteration_confirm(self, iter, confirmCautious):
|
||||
setup64 = [ (0, "r_%d_lopart" % iter, "movq(r_%d)" % iter),
|
||||
(8, "r_%d_hipart" % iter, "movq(byteShiftRight128(r_%d, 8))" % iter) ]
|
||||
|
||||
setup32 = [ (0, "r_%d_part1" % iter, "movd(r_%d)" % iter),
|
||||
(4, "r_%d_part2" % iter, "movd(byteShiftRight128(r_%d, 4))" % iter),
|
||||
(8, "r_%d_part3" % iter, "movd(byteShiftRight128(r_%d, 8))" % iter),
|
||||
(12, "r_%d_part4" % iter, "movd(byteShiftRight128(r_%d, 12))" % iter) ]
|
||||
|
||||
print " if (P0(isnonzero128(r_%d))) {" % (iter)
|
||||
print "#ifdef ARCH_64_BIT"
|
||||
for (off, val, init) in setup64:
|
||||
print " %s = %s;" % (val, init)
|
||||
for (off, val, init) in setup64:
|
||||
self.produce_confirm(iter, val, off, 64, cautious = confirmCautious)
|
||||
print "#else"
|
||||
for (off, val, init) in setup32:
|
||||
print " %s = %s;" % (val, init)
|
||||
for (off, val, init) in setup32:
|
||||
self.produce_confirm(iter, val, off, 32, cautious = confirmCautious)
|
||||
print "#endif"
|
||||
print " }"
|
||||
|
||||
def produce_one_iteration(self, iter, effective_num_iterations, cautious = False,
|
||||
confirmCautious = True, save_old = True):
|
||||
self.produce_one_iteration_state_calc(iter, effective_num_iterations, cautious, save_old)
|
||||
self.produce_one_iteration_confirm(iter, confirmCautious)
|
||||
|
||||
def produce_code(self):
|
||||
print self.produce_header(visible = True, header_only = False)
|
||||
print self.produce_common_declarations()
|
||||
print
|
||||
|
||||
self.produce_needed_temporaries(self.num_iterations)
|
||||
print
|
||||
|
||||
print " const struct Teddy * teddy = (const struct Teddy *)fdr;"
|
||||
print " const m128 * maskBase = (const m128 *)((const u8 *)fdr + sizeof(struct Teddy));"
|
||||
print " const u32 * confBase = (const u32 *)((const u8 *)teddy + sizeof(struct Teddy) + (%d*32));" % self.num_masks
|
||||
print " const u8 * mainStart = ROUNDUP_PTR(ptr, 16);"
|
||||
print " const size_t iterBytes = %d;" % (self.num_iterations * 16)
|
||||
|
||||
print ' DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\\n",' \
|
||||
' buf, len, a->start_offset);'
|
||||
print ' DEBUG_PRINTF("derive: ptr: %p mainstart %p\\n", ptr,' \
|
||||
' mainStart);'
|
||||
|
||||
for x in range(self.num_masks):
|
||||
if (x != 0):
|
||||
print " m128 res_old_%d = ones128();" % x
|
||||
print " m128 lomask = set16x8(0xf);"
|
||||
|
||||
print " if (ptr < mainStart) {"
|
||||
print " ptr = mainStart - 16;"
|
||||
self.produce_one_iteration(0, 1, cautious = True, confirmCautious = True, save_old = True)
|
||||
print " ptr += 16;"
|
||||
print " }"
|
||||
|
||||
print " if (ptr + 16 < buf + len) {"
|
||||
self.produce_one_iteration(0, 1, cautious = False, confirmCautious = True, save_old = True)
|
||||
print " ptr += 16;"
|
||||
print " }"
|
||||
|
||||
print " for ( ; ptr + iterBytes <= buf + len; ptr += iterBytes) {"
|
||||
print " __builtin_prefetch(ptr + (iterBytes*4));"
|
||||
print self.produce_flood_check()
|
||||
|
||||
for iter in range(self.num_iterations):
|
||||
self.produce_one_iteration(iter, self.num_iterations, cautious = False, confirmCautious = False)
|
||||
|
||||
print " }"
|
||||
|
||||
print " for (; ptr < buf + len; ptr += 16) {"
|
||||
self.produce_one_iteration(0, 1, cautious = True, confirmCautious = True, save_old = True)
|
||||
print " }"
|
||||
|
||||
print self.produce_footer()
|
||||
|
||||
def produce_compile_call(self):
|
||||
packed_str = { False : "false", True : "true"}[self.packed]
|
||||
print " { %d, %s, %d, %d, %s, %d, %d }," % (
|
||||
self.id, self.arch.target, self.num_masks, self.num_buckets, packed_str,
|
||||
self.conf_pull_back, self.conf_top_level_split)
|
||||
|
||||
def get_name(self):
|
||||
if self.packed:
|
||||
pck_string = "_pck"
|
||||
else:
|
||||
pck_string = ""
|
||||
|
||||
if self.num_buckets == 16:
|
||||
type_string = "_fat"
|
||||
else:
|
||||
type_string = ""
|
||||
|
||||
return "fdr_exec_teddy_%s_msks%d%s%s" % (self.arch.name, self.num_masks, pck_string, type_string)
|
||||
|
||||
def __init__(self, arch, packed = False, num_masks = 1, num_buckets = 8):
|
||||
self.arch = arch
|
||||
self.packed = packed
|
||||
self.num_masks = num_masks
|
||||
self.num_buckets = num_buckets
|
||||
self.num_iterations = 2
|
||||
|
||||
if packed:
|
||||
self.conf_top_level_split = 32
|
||||
else:
|
||||
self.conf_top_level_split = 1
|
||||
self.conf_pull_back = 0
|
||||
|
||||
class MTFat(MT):
|
||||
def produce_needed_temporaries(self, max_iterations):
|
||||
print " m256 p_mask;"
|
||||
for iter in range(0, max_iterations):
|
||||
print " m256 val_%d;" % iter
|
||||
print " m256 val_%d_lo;" % iter
|
||||
print " m256 val_%d_hi;" % iter
|
||||
for x in range(self.num_masks):
|
||||
print " m256 res_%d_%d;" % (iter, x)
|
||||
if x != 0:
|
||||
print " m256 res_shifted_%d_%d;" % (iter, x)
|
||||
print " m256 r_%d;" % iter
|
||||
print "#ifdef ARCH_64_BIT"
|
||||
print " u64a r_%d_part1;" % iter
|
||||
print " u64a r_%d_part2;" % iter
|
||||
print " u64a r_%d_part3;" % iter
|
||||
print " u64a r_%d_part4;" % iter
|
||||
print "#else"
|
||||
print " u32 r_%d_part1;" % iter
|
||||
print " u32 r_%d_part2;" % iter
|
||||
print " u32 r_%d_part3;" % iter
|
||||
print " u32 r_%d_part4;" % iter
|
||||
print " u32 r_%d_part5;" % iter
|
||||
print " u32 r_%d_part6;" % iter
|
||||
print " u32 r_%d_part7;" % iter
|
||||
print " u32 r_%d_part8;" % iter
|
||||
print "#endif"
|
||||
|
||||
def produce_code(self):
|
||||
print self.produce_header(visible = True, header_only = False)
|
||||
print self.produce_common_declarations()
|
||||
print
|
||||
|
||||
self.produce_needed_temporaries(self.num_iterations)
|
||||
print
|
||||
|
||||
print " const struct Teddy * teddy = (const struct Teddy *)fdr;"
|
||||
print " const m256 * maskBase = (const m256 *)((const u8 *)fdr + sizeof(struct Teddy));"
|
||||
print " const u32 * confBase = (const u32 *)((const u8 *)teddy + sizeof(struct Teddy) + (%d*32*2));" % self.num_masks
|
||||
print " const u8 * mainStart = ROUNDUP_PTR(ptr, 16);"
|
||||
print " const size_t iterBytes = %d;" % (self.num_iterations * 16)
|
||||
|
||||
print ' DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\\n",' \
|
||||
' buf, len, a->start_offset);'
|
||||
print ' DEBUG_PRINTF("derive: ptr: %p mainstart %p\\n", ptr,' \
|
||||
' mainStart);'
|
||||
|
||||
for x in range(self.num_masks):
|
||||
if (x != 0):
|
||||
print " m256 res_old_%d = ones256();" % x
|
||||
print " m256 lomask = set32x8(0xf);"
|
||||
|
||||
print " if (ptr < mainStart) {"
|
||||
print " ptr = mainStart - 16;"
|
||||
self.produce_one_iteration(0, 1, cautious = True, confirmCautious = True, save_old = True)
|
||||
print " ptr += 16;"
|
||||
print " }"
|
||||
|
||||
print " if (ptr + 16 < buf + len) {"
|
||||
self.produce_one_iteration(0, 1, cautious = False, confirmCautious = True, save_old = True)
|
||||
print " ptr += 16;"
|
||||
print " }"
|
||||
|
||||
print " for ( ; ptr + iterBytes <= buf + len; ptr += iterBytes) {"
|
||||
print " __builtin_prefetch(ptr + (iterBytes*4));"
|
||||
print self.produce_flood_check()
|
||||
|
||||
for iter in range(self.num_iterations):
|
||||
self.produce_one_iteration(iter, self.num_iterations, False, confirmCautious = False)
|
||||
|
||||
print " }"
|
||||
|
||||
print " for (; ptr < buf + len; ptr += 16) {"
|
||||
self.produce_one_iteration(0, 1, cautious = True, confirmCautious = True, save_old = True)
|
||||
print " }"
|
||||
|
||||
print self.produce_footer()
|
||||
|
||||
def produce_one_iteration_state_calc(self, iter, effective_num_iterations,
|
||||
cautious, save_old):
|
||||
if cautious:
|
||||
print " val_%d = vectoredLoad2x128(&p_mask, ptr + %d, buf, buf+len, a->buf_history, a->len_history, %d);" % (iter, iter*16, self.num_masks)
|
||||
else:
|
||||
print " val_%d = load2x128(ptr + %d);" % (iter, iter*16)
|
||||
print " val_%d_lo = and256(val_%d, lomask);" % (iter, iter)
|
||||
print " val_%d_hi = rshift4x64(val_%d, 4);" % (iter, iter)
|
||||
print " val_%d_hi = and256(val_%d_hi, lomask);" % (iter, iter)
|
||||
print
|
||||
for x in range(self.num_masks):
|
||||
print Template("""
|
||||
res_${ITER}_${X} = and256(vpshufb(maskBase[${X}*2] , val_${ITER}_lo),
|
||||
vpshufb(maskBase[${X}*2+1], val_${ITER}_hi));""").substitute(ITER = iter, X = x)
|
||||
if x != 0:
|
||||
if iter == 0:
|
||||
print " res_shifted_%d_%d = vpalignr(res_%d_%d, res_old_%d, 16-%d);" % (iter, x, iter, x, x, x)
|
||||
else:
|
||||
print " res_shifted_%d_%d = vpalignr(res_%d_%d, res_%d_%d, 16-%d);" % (iter, x, iter, x, iter-1, x, x)
|
||||
if x != 0 and iter == effective_num_iterations - 1 and save_old:
|
||||
print " res_old_%d = res_%d_%d;" % (x, iter, x)
|
||||
print
|
||||
if cautious:
|
||||
print " r_%d = and256(res_%d_0, p_mask);" % (iter, iter)
|
||||
else:
|
||||
print " r_%d = res_%d_0;" % (iter, iter)
|
||||
for x in range(1, self.num_masks):
|
||||
print " r_%d = and256(r_%d, res_shifted_%d_%d);" % (iter, iter, iter, x)
|
||||
print
|
||||
|
||||
def produce_one_iteration_confirm(self, iter, confirmCautious):
|
||||
setup64 = [ (0, "r_%d_part1" % iter, "extractlow64from256(r)"),
|
||||
(4, "r_%d_part2" % iter, "extract64from256(r, 1);\n r = interleave256hi(r_%d, r_swap)" % (iter)),
|
||||
(8, "r_%d_part3" % iter, "extractlow64from256(r)"),
|
||||
(12, "r_%d_part4" % iter, "extract64from256(r, 1)") ]
|
||||
|
||||
setup32 = [ (0, "r_%d_part1" % iter, "extractlow32from256(r)"),
|
||||
(2, "r_%d_part2" % iter, "extract32from256(r, 1)"),
|
||||
(4, "r_%d_part3" % iter, "extract32from256(r, 2)"),
|
||||
(6, "r_%d_part4" % iter, "extract32from256(r, 3);\n r = interleave256hi(r_%d, r_swap)" % (iter)),
|
||||
(8, "r_%d_part5" % iter, "extractlow32from256(r)"),
|
||||
(10, "r_%d_part6" % iter, "extract32from256(r, 1)"),
|
||||
(12, "r_%d_part7" % iter, "extract32from256(r, 2)"),
|
||||
(14, "r_%d_part8" % iter, "extract32from256(r, 3)") ]
|
||||
|
||||
print " if (P0(isnonzero256(r_%d))) {" % (iter)
|
||||
print " m256 r_swap = swap128in256(r_%d);" % (iter)
|
||||
print " m256 r = interleave256lo(r_%d, r_swap);" % (iter)
|
||||
print "#ifdef ARCH_64_BIT"
|
||||
for (off, val, init) in setup64:
|
||||
print " %s = %s;" % (val, init)
|
||||
|
||||
for (off, val, init) in setup64:
|
||||
self.produce_confirm(iter, val, off, 64, cautious = confirmCautious)
|
||||
print "#else"
|
||||
for (off, val, init) in setup32:
|
||||
print " %s = %s;" % (val, init)
|
||||
|
||||
for (off, val, init) in setup32:
|
||||
self.produce_confirm(iter, val, off, 32, cautious = confirmCautious)
|
||||
print "#endif"
|
||||
print " }"
|
||||
|
||||
class MTFast(MatcherBase):
|
||||
|
||||
def produce_confirm(self, cautious):
|
||||
if cautious:
|
||||
cautious_str = "VECTORING"
|
||||
else:
|
||||
cautious_str = "NOT_CAUTIOUS"
|
||||
|
||||
print " for (u32 i = 0; i < arrCnt; i++) {"
|
||||
print " byte = bitArr[i] / 8;"
|
||||
if self.packed:
|
||||
conf_split_mask = IntegerType(32).constant_to_string(
|
||||
self.conf_top_level_split - 1)
|
||||
print " bitRem = bitArr[i] % 8;"
|
||||
print " confSplit = *(ptr+byte) & 0x1f;"
|
||||
print " idx = confSplit * %d + bitRem;" % self.num_buckets
|
||||
print " cf = confBase[idx];"
|
||||
print " if (!cf)"
|
||||
print " continue;"
|
||||
print " fdrc = (const struct FDRConfirm *)((const u8 *)confBase + cf);"
|
||||
print " if (!(fdrc->groups & *control))"
|
||||
print " continue;"
|
||||
print " confWithBit(fdrc, a, ptr - buf + byte, %s, 0, control, &last_match);" % cautious_str
|
||||
else:
|
||||
print " cf = confBase[bitArr[i] % 8];"
|
||||
print " fdrc = (const struct FDRConfirm *)((const u8 *)confBase + cf);"
|
||||
print " confWithBit1(fdrc, a, ptr - buf + byte, %s, control, &last_match);" % cautious_str
|
||||
print " if (P0(controlVal == HWLM_TERMINATE_MATCHING)) {"
|
||||
print " *a->groups = controlVal;"
|
||||
print " return HWLM_TERMINATED;"
|
||||
print " }"
|
||||
print " }"
|
||||
|
||||
def produce_needed_temporaries(self, max_iterations):
|
||||
print " u32 arrCnt;"
|
||||
print " u16 bitArr[512];"
|
||||
print " m256 p_mask;"
|
||||
print " m256 val_0;"
|
||||
print " m256 val_0_lo;"
|
||||
print " m256 val_0_hi;"
|
||||
print " m256 res_0;"
|
||||
print " m256 res_1;"
|
||||
print " m128 lo_part;"
|
||||
print " m128 hi_part;"
|
||||
print "#ifdef ARCH_64_BIT"
|
||||
print " u64a r_0_part;"
|
||||
print "#else"
|
||||
print " u32 r_0_part;"
|
||||
print "#endif"
|
||||
|
||||
def produce_bit_scan(self, offset, bits):
|
||||
print " while (P0(!!r_0_part)) {"
|
||||
if bits == 64:
|
||||
print " bitArr[arrCnt++] = (u16)findAndClearLSB_64(&r_0_part) + 64 * %d;" % (offset)
|
||||
else:
|
||||
print " bitArr[arrCnt++] = (u16)findAndClearLSB_32(&r_0_part) + 32 * %d;" % (offset)
|
||||
print " }"
|
||||
|
||||
def produce_bit_check_128(self, var_name, offset):
|
||||
print " if (P0(isnonzero128(%s))) {" % (var_name)
|
||||
print "#ifdef ARCH_64_BIT"
|
||||
print " r_0_part = movq(%s);" % (var_name)
|
||||
self.produce_bit_scan(offset, 64)
|
||||
print " r_0_part = movq(byteShiftRight128(%s, 8));" % (var_name)
|
||||
self.produce_bit_scan(offset + 1, 64)
|
||||
print "#else"
|
||||
print " r_0_part = movd(%s);" % (var_name)
|
||||
self.produce_bit_scan(offset * 2, 32)
|
||||
for step in range(1, 4):
|
||||
print " r_0_part = movd(byteShiftRight128(%s, %d));" % (var_name, step * 4)
|
||||
self.produce_bit_scan(offset * 2 + step, 32)
|
||||
print "#endif"
|
||||
print " }"
|
||||
|
||||
def produce_bit_check_256(self, iter, single_iter, cautious):
|
||||
print " if (P0(isnonzero256(res_%d))) {" % (iter)
|
||||
if single_iter:
|
||||
print " arrCnt = 0;"
|
||||
print " lo_part = cast256to128(res_%d);" % (iter)
|
||||
print " hi_part = cast256to128(swap128in256(res_%d));" % (iter)
|
||||
self.produce_bit_check_128("lo_part", iter * 4)
|
||||
self.produce_bit_check_128("hi_part", iter * 4 + 2)
|
||||
if single_iter:
|
||||
self.produce_confirm(cautious)
|
||||
print " }"
|
||||
|
||||
def produce_one_iteration_state_calc(self, iter, cautious):
|
||||
if cautious:
|
||||
print " val_0 = vectoredLoad256(&p_mask, ptr + %d, buf+a->start_offset, buf+len, a->buf_history, a->len_history);" % (iter * 32)
|
||||
else:
|
||||
print " val_0 = load256(ptr + %d);" % (iter * 32)
|
||||
print " val_0_lo = and256(val_0, lomask);"
|
||||
print " val_0_hi = rshift4x64(val_0, 4);"
|
||||
print " val_0_hi = and256(val_0_hi, lomask);"
|
||||
print " res_%d = and256(vpshufb(maskLo , val_0_lo), vpshufb(maskHi, val_0_hi));" % (iter)
|
||||
if cautious:
|
||||
print " res_%d = and256(res_%d, p_mask);" % (iter, iter)
|
||||
|
||||
def produce_code(self):
|
||||
print self.produce_header(visible = True, header_only = False)
|
||||
print self.produce_common_declarations()
|
||||
print
|
||||
|
||||
self.produce_needed_temporaries(self.num_iterations)
|
||||
|
||||
print " const struct Teddy * teddy = (const struct Teddy *)fdr;"
|
||||
print " const m128 * maskBase = (const m128 *)((const u8 *)fdr + sizeof(struct Teddy));"
|
||||
print " const m256 maskLo = set2x128(maskBase[0]);"
|
||||
print " const m256 maskHi = set2x128(maskBase[1]);"
|
||||
print " const u32 * confBase = (const u32 *)((const u8 *)teddy + sizeof(struct Teddy) + 32);"
|
||||
print " const u8 * mainStart = ROUNDUP_PTR(ptr, 32);"
|
||||
print " const size_t iterBytes = %d;" % (self.num_iterations * 32)
|
||||
|
||||
print ' DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\\n",' \
|
||||
' buf, len, a->start_offset);'
|
||||
print ' DEBUG_PRINTF("derive: ptr: %p mainstart %p\\n", ptr,' \
|
||||
' mainStart);'
|
||||
print " const m256 lomask = set32x8(0xf);"
|
||||
|
||||
print " if (ptr < mainStart) {"
|
||||
print " ptr = mainStart - 32;"
|
||||
self.produce_one_iteration_state_calc(iter = 0, cautious = True)
|
||||
self.produce_bit_check_256(iter = 0, single_iter = True, cautious = True)
|
||||
print " ptr += 32;"
|
||||
print " }"
|
||||
|
||||
print " if (ptr + 32 < buf + len) {"
|
||||
self.produce_one_iteration_state_calc(iter = 0, cautious = False)
|
||||
self.produce_bit_check_256(iter = 0, single_iter = True, cautious = True)
|
||||
print " ptr += 32;"
|
||||
print " }"
|
||||
print " for ( ; ptr + iterBytes <= buf + len; ptr += iterBytes) {"
|
||||
print " __builtin_prefetch(ptr + (iterBytes*4));"
|
||||
print self.produce_flood_check()
|
||||
for iter in range (0, self.num_iterations):
|
||||
self.produce_one_iteration_state_calc(iter = iter, cautious = False)
|
||||
print " arrCnt = 0;"
|
||||
for iter in range (0, self.num_iterations):
|
||||
self.produce_bit_check_256(iter = iter, single_iter = False, cautious = False)
|
||||
self.produce_confirm(cautious = False)
|
||||
print " }"
|
||||
|
||||
print " for (; ptr < buf + len; ptr += 32) {"
|
||||
self.produce_one_iteration_state_calc(iter = 0, cautious = True)
|
||||
self.produce_bit_check_256(iter = 0, single_iter = True, cautious = True)
|
||||
print " }"
|
||||
|
||||
print self.produce_footer()
|
||||
|
||||
def get_name(self):
|
||||
if self.packed:
|
||||
pck_string = "_pck"
|
||||
else:
|
||||
pck_string = ""
|
||||
return "fdr_exec_teddy_%s_msks%d%s_fast" % (self.arch.name, self.num_masks, pck_string)
|
||||
|
||||
def produce_compile_call(self):
|
||||
packed_str = { False : "false", True : "true"}[self.packed]
|
||||
print " { %d, %s, %d, %d, %s, %d, %d }," % (
|
||||
self.id, self.arch.target, self.num_masks, self.num_buckets, packed_str,
|
||||
self.conf_pull_back, self.conf_top_level_split)
|
||||
|
||||
def __init__(self, arch, packed = False):
|
||||
self.arch = arch
|
||||
self.packed = packed
|
||||
self.num_masks = 1
|
||||
self.num_buckets = 8
|
||||
self.num_iterations = 2
|
||||
|
||||
self.conf_top_level_split = 1
|
||||
self.conf_pull_back = 0
|
||||
if packed:
|
||||
self.conf_top_level_split = 32
|
||||
else:
|
||||
self.conf_top_level_split = 1
|
||||
self.conf_pull_back = 0
|
459
src/fdr/teddy_compile.cpp
Normal file
459
src/fdr/teddy_compile.cpp
Normal file
@@ -0,0 +1,459 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "fdr.h"
|
||||
#include "fdr_internal.h"
|
||||
#include "fdr_compile_internal.h"
|
||||
#include "fdr_confirm.h"
|
||||
#include "fdr_engine_description.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/alloc.h"
|
||||
#include "util/compare.h"
|
||||
#include "util/popcount.h"
|
||||
#include "util/target_info.h"
|
||||
#include "util/verify_types.h"
|
||||
|
||||
#include "teddy_compile.h"
|
||||
#include "teddy_internal.h"
|
||||
#include "teddy_engine_description.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cctype>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/core/noncopyable.hpp>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
namespace {
|
||||
|
||||
//#define TEDDY_DEBUG
|
||||
|
||||
class TeddyCompiler : boost::noncopyable {
|
||||
const TeddyEngineDescription ŋ
|
||||
const vector<hwlmLiteral> &lits;
|
||||
bool make_small;
|
||||
|
||||
public:
|
||||
TeddyCompiler(const vector<hwlmLiteral> &lits_in,
|
||||
const TeddyEngineDescription &eng_in, bool make_small_in)
|
||||
: eng(eng_in), lits(lits_in), make_small(make_small_in) {}
|
||||
|
||||
aligned_unique_ptr<FDR> build(pair<u8 *, size_t> link);
|
||||
bool pack(map<BucketIndex, std::vector<LiteralIndex> > &bucketToLits);
|
||||
};
|
||||
|
||||
class TeddySet {
|
||||
const vector<hwlmLiteral> &lits;
|
||||
u32 len;
|
||||
// nibbleSets is a series of bitfields over 16 predicates
|
||||
// that represent the whether shufti nibble set
|
||||
// so for num_masks = 4 we will represent our strings by
|
||||
// 8 u16s in the vector that indicate what a shufti bucket
|
||||
// would have to look like
|
||||
vector<u16> nibbleSets;
|
||||
set<u32> litIds;
|
||||
public:
|
||||
TeddySet(const vector<hwlmLiteral> &lits_in, u32 len_in)
|
||||
: lits(lits_in), len(len_in), nibbleSets(len_in * 2, 0) {}
|
||||
const set<u32> & getLits() const { return litIds; }
|
||||
size_t litCount() const { return litIds.size(); }
|
||||
|
||||
bool operator<(const TeddySet & s) const {
|
||||
return litIds < s.litIds;
|
||||
}
|
||||
|
||||
#ifdef TEDDY_DEBUG
|
||||
void dump() const {
|
||||
printf("TS: ");
|
||||
for (u32 i = 0; i < nibbleSets.size(); i++) {
|
||||
printf("%04x ", (u32)nibbleSets[i]);
|
||||
}
|
||||
printf("\nnlits: %zu\nLit ids: ", litCount());
|
||||
printf("Prob: %llu\n", probability());
|
||||
for (set<u32>::iterator i = litIds.begin(), e = litIds.end(); i != e; ++i) {
|
||||
printf("%u ", *i);
|
||||
}
|
||||
printf("\n");
|
||||
printf("Flood prone : %s\n", isRunProne()?"yes":"no");
|
||||
}
|
||||
#endif
|
||||
|
||||
bool identicalTail(const TeddySet & ts) const {
|
||||
return nibbleSets == ts.nibbleSets;
|
||||
}
|
||||
|
||||
void addLiteral(u32 lit_id) {
|
||||
const string &s = lits[lit_id].s;
|
||||
for (u32 i = 0; i < len; i++) {
|
||||
if (i < s.size()) {
|
||||
u8 c = s[s.size() - i - 1];
|
||||
u8 c_hi = (c >> 4) & 0xf;
|
||||
u8 c_lo = c & 0xf;
|
||||
nibbleSets[i*2] = 1 << c_lo;
|
||||
if (lits[lit_id].nocase && ourisalpha(c)) {
|
||||
nibbleSets[i*2+1] = (1 << (c_hi&0xd)) | (1 << (c_hi|0x2));
|
||||
} else {
|
||||
nibbleSets[i*2+1] = 1 << c_hi;
|
||||
}
|
||||
} else {
|
||||
nibbleSets[i*2] = nibbleSets[i*2+1] = 0xffff;
|
||||
}
|
||||
}
|
||||
litIds.insert(lit_id);
|
||||
}
|
||||
|
||||
void merge(const TeddySet &ts) {
|
||||
for (u32 i = 0; i < nibbleSets.size(); i++) {
|
||||
nibbleSets[i] |= ts.nibbleSets[i];
|
||||
}
|
||||
litIds.insert(ts.litIds.begin(), ts.litIds.end());
|
||||
}
|
||||
|
||||
// return a value p from 0 .. MAXINT64 that gives p/MAXINT64
|
||||
// likelihood of this TeddySet firing a first-stage accept
|
||||
// if it was given a bucket of its own and random data were
|
||||
// to be passed in
|
||||
u64a probability() const {
|
||||
u64a val = 1;
|
||||
for (size_t i = 0; i < nibbleSets.size(); i++) {
|
||||
val *= popcount32((u32)nibbleSets[i]);
|
||||
}
|
||||
return val;
|
||||
}
|
||||
|
||||
// return a score based around the chance of this hitting times
|
||||
// a small fixed cost + the cost of traversing some sort of followup
|
||||
// (assumption is that the followup is linear)
|
||||
u64a heuristic() const {
|
||||
return probability() * (2+litCount());
|
||||
}
|
||||
|
||||
bool isRunProne() const {
|
||||
u16 lo_and = 0xffff;
|
||||
u16 hi_and = 0xffff;
|
||||
for (u32 i = 0; i < len; i++) {
|
||||
lo_and &= nibbleSets[i*2];
|
||||
hi_and &= nibbleSets[i*2+1];
|
||||
}
|
||||
// we're not flood-prone if there's no way to get
|
||||
// through with a flood
|
||||
if (!lo_and || !hi_and) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
bool TeddyCompiler::pack(map<BucketIndex,
|
||||
std::vector<LiteralIndex> > &bucketToLits) {
|
||||
set<TeddySet> sts;
|
||||
|
||||
for (u32 i = 0; i < lits.size(); i++) {
|
||||
TeddySet ts(lits, eng.numMasks);
|
||||
ts.addLiteral(i);
|
||||
sts.insert(ts);
|
||||
}
|
||||
|
||||
while (1) {
|
||||
#ifdef TEDDY_DEBUG
|
||||
printf("Size %zu\n", sts.size());
|
||||
for (set<TeddySet>::const_iterator i1 = sts.begin(), e1 = sts.end(); i1 != e1; ++i1) {
|
||||
printf("\n"); i1->dump();
|
||||
}
|
||||
printf("\n===============================================\n");
|
||||
#endif
|
||||
|
||||
set<TeddySet>::iterator m1 = sts.end(), m2 = sts.end();
|
||||
u64a best = 0xffffffffffffffffULL;
|
||||
|
||||
for (set<TeddySet>::iterator i1 = sts.begin(), e1 = sts.end(); i1 != e1; ++i1) {
|
||||
set<TeddySet>::iterator i2 = i1;
|
||||
++i2;
|
||||
const TeddySet &s1 = *i1;
|
||||
for (set<TeddySet>::iterator e2 = sts.end(); i2 != e2; ++i2) {
|
||||
const TeddySet &s2 = *i2;
|
||||
|
||||
// be more conservative if we don't absolutely need to
|
||||
// keep packing
|
||||
if ((sts.size() <= eng.getNumBuckets()) &&
|
||||
!s1.identicalTail(s2)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
TeddySet tmpSet(lits, eng.numMasks);
|
||||
tmpSet.merge(s1);
|
||||
tmpSet.merge(s2);
|
||||
u64a newScore = tmpSet.heuristic();
|
||||
u64a oldScore = s1.heuristic() + s2.heuristic();
|
||||
if (newScore < oldScore) {
|
||||
m1 = i1;
|
||||
m2 = i2;
|
||||
break;
|
||||
} else {
|
||||
u64a score = newScore - oldScore;
|
||||
bool oldRunProne = s1.isRunProne() && s2.isRunProne();
|
||||
bool newRunProne = tmpSet.isRunProne();
|
||||
if (newRunProne && !oldRunProne) {
|
||||
continue;
|
||||
}
|
||||
if (score < best) {
|
||||
best = score;
|
||||
m1 = i1;
|
||||
m2 = i2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// if we didn't find a merge candidate, bail out
|
||||
if ((m1 == sts.end()) || (m2 == sts.end())) {
|
||||
break;
|
||||
}
|
||||
|
||||
// do the merge
|
||||
TeddySet nts(lits, eng.numMasks);
|
||||
nts.merge(*m1);
|
||||
nts.merge(*m2);
|
||||
#ifdef TEDDY_DEBUG
|
||||
printf("Merging\n");
|
||||
printf("m1 = \n");
|
||||
m1->dump();
|
||||
printf("m2 = \n");
|
||||
m2->dump();
|
||||
printf("nts = \n");
|
||||
nts.dump();
|
||||
printf("\n===============================================\n");
|
||||
#endif
|
||||
sts.erase(m1);
|
||||
sts.erase(m2);
|
||||
sts.insert(nts);
|
||||
}
|
||||
u32 cnt = 0;
|
||||
|
||||
if (sts.size() > eng.getNumBuckets()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (set<TeddySet>::const_iterator i = sts.begin(), e = sts.end(); i != e;
|
||||
++i) {
|
||||
for (set<u32>::const_iterator i2 = i->getLits().begin(),
|
||||
e2 = i->getLits().end();
|
||||
i2 != e2; ++i2) {
|
||||
bucketToLits[cnt].push_back(*i2);
|
||||
}
|
||||
cnt++;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
aligned_unique_ptr<FDR> TeddyCompiler::build(pair<u8 *, size_t> link) {
|
||||
if (lits.size() > eng.getNumBuckets() * TEDDY_BUCKET_LOAD) {
|
||||
DEBUG_PRINTF("too many literals: %zu\n", lits.size());
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
#ifdef TEDDY_DEBUG
|
||||
for (size_t i = 0; i < lits.size(); i++) {
|
||||
printf("lit %zu (len = %zu, %s) is ", i, lits[i].s.size(),
|
||||
lits[i].nocase ? "caseless" : "caseful");
|
||||
for (size_t j = 0; j < lits[i].s.size(); j++) {
|
||||
printf("%02x", ((u32)lits[i].s[j])&0xff);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
map<BucketIndex, std::vector<LiteralIndex> > bucketToLits;
|
||||
if(eng.needConfirm(lits)) {
|
||||
if (!pack(bucketToLits)) {
|
||||
DEBUG_PRINTF("more lits (%zu) than buckets (%u), can't pack.\n",
|
||||
lits.size(), eng.getNumBuckets());
|
||||
return nullptr;
|
||||
}
|
||||
} else {
|
||||
for (u32 i = 0; i < lits.size(); i++) {
|
||||
bucketToLits[i].push_back(i);
|
||||
}
|
||||
}
|
||||
u32 maskWidth = eng.getNumBuckets() / 8;
|
||||
|
||||
size_t maskLen = eng.numMasks * 16 * 2 * maskWidth;
|
||||
|
||||
pair<u8 *, size_t> floodControlTmp = setupFDRFloodControl(lits, eng);
|
||||
pair<u8 *, size_t> confirmTmp
|
||||
= setupFullMultiConfs(lits, eng, bucketToLits, make_small);
|
||||
|
||||
size_t size = ROUNDUP_N(sizeof(Teddy) +
|
||||
maskLen +
|
||||
confirmTmp.second +
|
||||
floodControlTmp.second +
|
||||
link.second, 16 * maskWidth);
|
||||
|
||||
aligned_unique_ptr<FDR> fdr = aligned_zmalloc_unique<FDR>(size);
|
||||
assert(fdr); // otherwise would have thrown std::bad_alloc
|
||||
Teddy *teddy = (Teddy *)fdr.get(); // ugly
|
||||
u8 *teddy_base = (u8 *)teddy;
|
||||
|
||||
teddy->size = size;
|
||||
teddy->engineID = eng.getID();
|
||||
teddy->maxStringLen = verify_u32(maxLen(lits));
|
||||
|
||||
u8 *ptr = teddy_base + sizeof(Teddy) + maskLen;
|
||||
memcpy(ptr, confirmTmp.first, confirmTmp.second);
|
||||
ptr += confirmTmp.second;
|
||||
aligned_free(confirmTmp.first);
|
||||
|
||||
teddy->floodOffset = verify_u32(ptr - teddy_base);
|
||||
memcpy(ptr, floodControlTmp.first, floodControlTmp.second);
|
||||
ptr += floodControlTmp.second;
|
||||
aligned_free(floodControlTmp.first);
|
||||
|
||||
if (link.first) {
|
||||
teddy->link = verify_u32(ptr - teddy_base);
|
||||
memcpy(ptr, link.first, link.second);
|
||||
aligned_free(link.first);
|
||||
} else {
|
||||
teddy->link = 0;
|
||||
}
|
||||
|
||||
u8 *baseMsk = teddy_base + sizeof(Teddy);
|
||||
|
||||
for (map<BucketIndex, std::vector<LiteralIndex> >::const_iterator
|
||||
i = bucketToLits.begin(),
|
||||
e = bucketToLits.end();
|
||||
i != e; ++i) {
|
||||
const u32 bucket_id = i->first;
|
||||
const vector<LiteralIndex> &ids = i->second;
|
||||
const u8 bmsk = 1U << (bucket_id % 8);
|
||||
|
||||
for (vector<LiteralIndex>::const_iterator i2 = ids.begin(),
|
||||
e2 = ids.end();
|
||||
i2 != e2; ++i2) {
|
||||
LiteralIndex lit_id = *i2;
|
||||
const hwlmLiteral & l = lits[lit_id];
|
||||
DEBUG_PRINTF("putting lit %u into bucket %u\n", lit_id, bucket_id);
|
||||
const u32 sz = verify_u32(l.s.size());
|
||||
|
||||
// fill in masks
|
||||
for (u32 j = 0; j < eng.numMasks; j++) {
|
||||
u32 msk_id_lo = j * 2 * maskWidth + (bucket_id / 8);
|
||||
u32 msk_id_hi = (j * 2 + 1) * maskWidth + (bucket_id / 8);
|
||||
|
||||
// if we don't have a char at this position, fill in i
|
||||
// locations in these masks with '1'
|
||||
if (j >= sz) {
|
||||
for (u32 n = 0; n < 16; n++) {
|
||||
baseMsk[msk_id_lo * 16 + n] |= bmsk;
|
||||
baseMsk[msk_id_hi * 16 + n] |= bmsk;
|
||||
}
|
||||
} else {
|
||||
u8 c = l.s[sz - 1 - j];
|
||||
// if we do have a char at this position
|
||||
const u32 hiShift = 4;
|
||||
u32 n_hi = (c >> hiShift) & 0xf;
|
||||
u32 n_lo = c & 0xf;
|
||||
|
||||
if (j < l.msk.size() && l.msk[l.msk.size() - 1 - j]) {
|
||||
u8 m = l.msk[l.msk.size() - 1 - j];
|
||||
u8 m_hi = (m >> hiShift) & 0xf;
|
||||
u8 m_lo = m & 0xf;
|
||||
u8 cmp = l.cmp[l.msk.size() - 1 - j];
|
||||
u8 cmp_lo = cmp & 0xf;
|
||||
u8 cmp_hi = (cmp >> hiShift) & 0xf;
|
||||
|
||||
for (u8 cm = 0; cm < 0x10; cm++) {
|
||||
if ((cm & m_lo) == (cmp_lo & m_lo)) {
|
||||
baseMsk[msk_id_lo * 16 + cm] |= bmsk;
|
||||
}
|
||||
if ((cm & m_hi) == (cmp_hi & m_hi)) {
|
||||
baseMsk[msk_id_hi * 16 + cm] |= bmsk;
|
||||
}
|
||||
}
|
||||
} else{
|
||||
if (l.nocase && ourisalpha(c)) {
|
||||
u32 cmHalfClear = (0xdf >> hiShift) & 0xf;
|
||||
u32 cmHalfSet = (0x20 >> hiShift) & 0xf;
|
||||
baseMsk[msk_id_hi * 16 + (n_hi & cmHalfClear)] |= bmsk;
|
||||
baseMsk[msk_id_hi * 16 + (n_hi | cmHalfSet )] |= bmsk;
|
||||
} else {
|
||||
baseMsk[msk_id_hi * 16 + n_hi] |= bmsk;
|
||||
}
|
||||
baseMsk[msk_id_lo * 16 + n_lo] |= bmsk;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#ifdef TEDDY_DEBUG
|
||||
for (u32 i = 0; i < eng.numMasks * 2; i++) {
|
||||
for (u32 j = 0; j < 16; j++) {
|
||||
u8 val = baseMsk[i * 16 + j];
|
||||
for (u32 k = 0; k < 8; k++) {
|
||||
printf("%s", ((val >> k) & 0x1) ? "1" : "0");
|
||||
}
|
||||
printf(" ");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
return fdr;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
aligned_unique_ptr<FDR> teddyBuildTableHinted(const vector<hwlmLiteral> &lits,
|
||||
bool make_small, u32 hint,
|
||||
const target_t &target,
|
||||
pair<u8 *, size_t> link) {
|
||||
unique_ptr<TeddyEngineDescription> des;
|
||||
if (hint == HINT_INVALID) {
|
||||
des = chooseTeddyEngine(target, lits);
|
||||
} else {
|
||||
des = getTeddyDescription(hint);
|
||||
}
|
||||
if (!des) {
|
||||
return nullptr;
|
||||
}
|
||||
TeddyCompiler tc(lits, *des, make_small);
|
||||
return tc.build(link);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
56
src/fdr/teddy_compile.h
Normal file
56
src/fdr/teddy_compile.h
Normal file
@@ -0,0 +1,56 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief FDR literal matcher: Teddy build API.
|
||||
*/
|
||||
|
||||
#ifndef TEDDY_COMPILE_H
|
||||
#define TEDDY_COMPILE_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "util/alloc.h"
|
||||
|
||||
#include <vector>
|
||||
#include <utility> // std::pair
|
||||
|
||||
struct FDR;
|
||||
struct target_t;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
struct hwlmLiteral;
|
||||
|
||||
ue2::aligned_unique_ptr<FDR>
|
||||
teddyBuildTableHinted(const std::vector<hwlmLiteral> &lits, bool make_small,
|
||||
u32 hint, const target_t &target,
|
||||
std::pair<u8 *, size_t> link);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // TEDDY_COMPILE_H
|
207
src/fdr/teddy_engine_description.cpp
Normal file
207
src/fdr/teddy_engine_description.cpp
Normal file
@@ -0,0 +1,207 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "fdr.h"
|
||||
#include "fdr_internal.h"
|
||||
#include "fdr_compile_internal.h"
|
||||
#include "fdr_confirm.h"
|
||||
#include "ue2common.h"
|
||||
#include "hs_internal.h"
|
||||
#include "fdr_engine_description.h"
|
||||
#include "teddy_internal.h"
|
||||
#include "teddy_engine_description.h"
|
||||
#include "util/make_unique.h"
|
||||
|
||||
#include <cmath>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
TeddyEngineDescription::TeddyEngineDescription(const TeddyEngineDef &def)
|
||||
: EngineDescription(def.id, targetByArchFeatures(def.cpu_features),
|
||||
def.numBuckets, def.confirmPullBackDistance,
|
||||
def.confirmTopLevelSplit),
|
||||
numMasks(def.numMasks), packed(def.packed) {}
|
||||
|
||||
u32 TeddyEngineDescription::getDefaultFloodSuffixLength() const {
|
||||
return numMasks;
|
||||
}
|
||||
|
||||
bool TeddyEngineDescription::needConfirm(const vector<hwlmLiteral> &lits) const {
|
||||
if (packed || lits.size() > getNumBuckets()) {
|
||||
return true;
|
||||
}
|
||||
for (const auto &lit : lits) {
|
||||
if (lit.s.size() > numMasks || !lit.msk.empty()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
#include "teddy_autogen_compiler.cpp"
|
||||
|
||||
static
|
||||
size_t maxFloodTailLen(const vector<hwlmLiteral> &vl) {
|
||||
size_t max_flood_tail = 0;
|
||||
for (const auto &lit : vl) {
|
||||
const string &s = lit.s;
|
||||
assert(!s.empty());
|
||||
size_t j;
|
||||
for (j = 1; j < s.length(); j++) {
|
||||
if (s[s.length() - j - 1] != s[s.length() - 1]) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
max_flood_tail = max(max_flood_tail, j);
|
||||
}
|
||||
return max_flood_tail;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief True if this Teddy engine is qualified to handle this set of literals
|
||||
* on this target.
|
||||
*/
|
||||
static
|
||||
bool isAllowed(const vector<hwlmLiteral> &vl, const TeddyEngineDescription &eng,
|
||||
const size_t max_lit_len, const target_t &target) {
|
||||
if (!eng.isValidOnTarget(target)) {
|
||||
DEBUG_PRINTF("%u disallowed: not valid on target\n", eng.getID());
|
||||
return false;
|
||||
}
|
||||
if (eng.getNumBuckets() < vl.size() && !eng.packed) {
|
||||
DEBUG_PRINTF("%u disallowed: num buckets < num lits and not packed\n",
|
||||
eng.getID());
|
||||
return false;
|
||||
}
|
||||
if (eng.getNumBuckets() * TEDDY_BUCKET_LOAD < vl.size()) {
|
||||
DEBUG_PRINTF("%u disallowed: too many lits for num buckets\n",
|
||||
eng.getID());
|
||||
return false;
|
||||
}
|
||||
if (eng.numMasks > max_lit_len) {
|
||||
DEBUG_PRINTF("%u disallowed: more masks than max lit len (%zu)\n",
|
||||
eng.getID(), max_lit_len);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (vl.size() > 40) {
|
||||
u32 n_small_lits = 0;
|
||||
for (const auto &lit : vl) {
|
||||
if (lit.s.length() < eng.numMasks) {
|
||||
n_small_lits++;
|
||||
}
|
||||
}
|
||||
if (n_small_lits * 5 > vl.size()) {
|
||||
DEBUG_PRINTF("too many short literals (%u)\n", n_small_lits);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
unique_ptr<TeddyEngineDescription>
|
||||
chooseTeddyEngine(const target_t &target, const vector<hwlmLiteral> &vl) {
|
||||
vector<TeddyEngineDescription> descs;
|
||||
getTeddyDescriptions(&descs);
|
||||
const TeddyEngineDescription *best = nullptr;
|
||||
|
||||
const size_t max_lit_len = maxLen(vl);
|
||||
const size_t max_flood_tail = maxFloodTailLen(vl);
|
||||
DEBUG_PRINTF("%zu lits, max_lit_len=%zu, max_flood_tail=%zu\n", vl.size(),
|
||||
max_lit_len, max_flood_tail);
|
||||
|
||||
u32 best_score = 0;
|
||||
for (size_t engineID = 0; engineID < descs.size(); engineID++) {
|
||||
const TeddyEngineDescription &eng = descs[engineID];
|
||||
if (!isAllowed(vl, eng, max_lit_len, target)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
u32 score = 0;
|
||||
|
||||
// We prefer unpacked Teddy models.
|
||||
if (!eng.packed) {
|
||||
score += 100;
|
||||
}
|
||||
|
||||
// If we're heavily loaded, we prefer to have more masks.
|
||||
if (vl.size() > 4 * eng.getNumBuckets()) {
|
||||
score += eng.numMasks * 4;
|
||||
} else {
|
||||
// Lightly loaded cases are great.
|
||||
score += 100;
|
||||
}
|
||||
|
||||
// We want enough masks to avoid becoming flood-prone.
|
||||
if (eng.numMasks > max_flood_tail) {
|
||||
score += 50;
|
||||
}
|
||||
|
||||
// We prefer having 3 masks. 3 is just right.
|
||||
score += 6 / (abs(3 - (int)eng.numMasks) + 1);
|
||||
|
||||
// We prefer cheaper, smaller Teddy models.
|
||||
score += 16 / eng.getNumBuckets();
|
||||
|
||||
DEBUG_PRINTF("teddy %u: masks=%u, buckets=%u, packed=%u "
|
||||
"-> score=%u\n",
|
||||
eng.getID(), eng.numMasks, eng.getNumBuckets(),
|
||||
eng.packed ? 1U : 0U, score);
|
||||
|
||||
if (!best || score > best_score) {
|
||||
best = ŋ
|
||||
best_score = score;
|
||||
}
|
||||
}
|
||||
|
||||
if (!best) {
|
||||
DEBUG_PRINTF("failed to find engine\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("using engine %u\n", best->getID());
|
||||
return ue2::make_unique<TeddyEngineDescription>(*best);
|
||||
}
|
||||
|
||||
unique_ptr<TeddyEngineDescription> getTeddyDescription(u32 engineID) {
|
||||
vector<TeddyEngineDescription> descs;
|
||||
getTeddyDescriptions(&descs);
|
||||
|
||||
for (const auto &desc : descs) {
|
||||
if (desc.getID() == engineID) {
|
||||
return ue2::make_unique<TeddyEngineDescription>(desc);
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
70
src/fdr/teddy_engine_description.h
Normal file
70
src/fdr/teddy_engine_description.h
Normal file
@@ -0,0 +1,70 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef TEDDY_ENGINE_DESCRIPTION_H
|
||||
#define TEDDY_ENGINE_DESCRIPTION_H
|
||||
|
||||
#include "engine_description.h"
|
||||
#include "fdr_compile_internal.h"
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
#define TEDDY_BUCKET_LOAD 6
|
||||
|
||||
struct TeddyEngineDef {
|
||||
u32 id;
|
||||
u64a cpu_features;
|
||||
u32 numMasks;
|
||||
u32 numBuckets;
|
||||
bool packed;
|
||||
u32 confirmPullBackDistance;
|
||||
u32 confirmTopLevelSplit;
|
||||
};
|
||||
|
||||
class TeddyEngineDescription : public EngineDescription {
|
||||
public:
|
||||
u32 numMasks;
|
||||
bool packed;
|
||||
|
||||
explicit TeddyEngineDescription(const TeddyEngineDef &def);
|
||||
|
||||
u32 getDefaultFloodSuffixLength() const override;
|
||||
bool needConfirm(const std::vector<hwlmLiteral> &lits) const;
|
||||
};
|
||||
|
||||
std::unique_ptr<TeddyEngineDescription>
|
||||
chooseTeddyEngine(const target_t &target, const std::vector<hwlmLiteral> &vl);
|
||||
std::unique_ptr<TeddyEngineDescription> getTeddyDescription(u32 engineID);
|
||||
void getTeddyDescriptions(std::vector<TeddyEngineDescription> *out);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
46
src/fdr/teddy_internal.h
Normal file
46
src/fdr/teddy_internal.h
Normal file
@@ -0,0 +1,46 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef TEDDY_INTERNAL_H
|
||||
#define TEDDY_INTERNAL_H
|
||||
|
||||
#include "ue2common.h"
|
||||
|
||||
// first part is compatible with an FDR
|
||||
struct Teddy {
|
||||
u32 engineID;
|
||||
u32 size;
|
||||
u32 maxStringLen;
|
||||
u32 floodOffset;
|
||||
u32 link;
|
||||
u32 pad1;
|
||||
u32 pad2;
|
||||
u32 pad3;
|
||||
};
|
||||
|
||||
#endif
|
Reference in New Issue
Block a user