mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2026-01-02 14:44:41 +03:00
teddy: remove python codegen, refactor code
Major cleanup of the Teddy runtime code. Removes python code generation, splits AVX2 models into their own file, improves readability.
This commit is contained in:
committed by
Matthew Barr
parent
6899cab370
commit
ed772380c0
744
src/fdr/teddy.c
744
src/fdr/teddy.c
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -26,11 +26,19 @@
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
/** \file
|
||||
* \brief Teddy literal matcher: SSSE3 engine runtime.
|
||||
*/
|
||||
|
||||
#include "fdr_internal.h"
|
||||
#include "flood_runtime.h"
|
||||
#include "teddy.h"
|
||||
#include "teddy_internal.h"
|
||||
#include "teddy_runtime_common.h"
|
||||
#include "util/simd_utils.h"
|
||||
#include "util/simd_utils_ssse3.h"
|
||||
|
||||
static const u8 ALIGN_DIRECTIVE p_mask_arr[17][32] = {
|
||||
const u8 ALIGN_DIRECTIVE p_mask_arr[17][32] = {
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
@@ -67,178 +75,584 @@ static const u8 ALIGN_DIRECTIVE p_mask_arr[17][32] = {
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}
|
||||
};
|
||||
|
||||
// Note: p_mask is an output param that initialises a poison mask.
|
||||
UNUSED static really_inline
|
||||
m128 vectoredLoad128(m128 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi,
|
||||
const u8 *buf_history, size_t len_history,
|
||||
const u32 nMasks) {
|
||||
union {
|
||||
u8 val8[16];
|
||||
m128 val128;
|
||||
} u;
|
||||
u.val128 = zeroes128();
|
||||
#ifdef ARCH_64_BIT
|
||||
#define CONFIRM_TEDDY(var, bucket, offset, reason, conf_fn) \
|
||||
do { \
|
||||
if (unlikely(isnonzero128(var))) { \
|
||||
u64a lo = movq(var); \
|
||||
u64a hi = movq(byteShiftRight128(var, 8)); \
|
||||
if (unlikely(lo)) { \
|
||||
conf_fn(&lo, bucket, offset, confBase, reason, a, ptr, \
|
||||
control, &last_match); \
|
||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||
} \
|
||||
if (unlikely(hi)) { \
|
||||
conf_fn(&hi, bucket, offset + 8, confBase, reason, a, ptr, \
|
||||
control, &last_match); \
|
||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||
} \
|
||||
} \
|
||||
} while (0);
|
||||
#else
|
||||
#define CONFIRM_TEDDY(var, bucket, offset, reason, conf_fn) \
|
||||
do { \
|
||||
if (unlikely(isnonzero128(var))) { \
|
||||
u32 part1 = movd(var); \
|
||||
u32 part2 = movd(byteShiftRight128(var, 4)); \
|
||||
u32 part3 = movd(byteShiftRight128(var, 8)); \
|
||||
u32 part4 = movd(byteShiftRight128(var, 12)); \
|
||||
if (unlikely(part1)) { \
|
||||
conf_fn(&part1, bucket, offset, confBase, reason, a, ptr, \
|
||||
control, &last_match); \
|
||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||
} \
|
||||
if (unlikely(part2)) { \
|
||||
conf_fn(&part2, bucket, offset + 4, confBase, reason, a, ptr, \
|
||||
control, &last_match); \
|
||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||
} \
|
||||
if (unlikely(part3)) { \
|
||||
conf_fn(&part3, bucket, offset + 8, confBase, reason, a, ptr, \
|
||||
control, &last_match); \
|
||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||
} \
|
||||
if (unlikely(part4)) { \
|
||||
conf_fn(&part4, bucket, offset + 12, confBase, reason, a, ptr, \
|
||||
control, &last_match); \
|
||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||
} \
|
||||
} \
|
||||
} while (0);
|
||||
#endif
|
||||
|
||||
if (ptr >= lo) {
|
||||
u32 avail = (u32)(hi - ptr);
|
||||
if (avail >= 16) {
|
||||
*p_mask = load128((const void*)(p_mask_arr[16] + 16));
|
||||
return loadu128(ptr);
|
||||
}
|
||||
*p_mask = load128((const void*)(p_mask_arr[avail] + 16));
|
||||
for (u32 i = 0; i < avail; i++) {
|
||||
u.val8[i] = ptr[i];
|
||||
}
|
||||
} else {
|
||||
u32 need = MIN((u32)(lo - ptr), MIN(len_history, nMasks - 1));
|
||||
u32 start = (u32)(lo - ptr);
|
||||
u32 i;
|
||||
for (i = start - need; ptr + i < lo; i++) {
|
||||
u.val8[i] = buf_history[len_history - (lo - (ptr + i))];
|
||||
}
|
||||
u32 end = MIN(16, (u32)(hi - ptr));
|
||||
*p_mask = loadu128((const void*)(p_mask_arr[end - start] + 16 - start));
|
||||
for (; i < end; i++) {
|
||||
u.val8[i] = ptr[i];
|
||||
}
|
||||
static really_inline
|
||||
m128 prep_conf_teddy_m1(const m128 *maskBase, m128 p_mask, m128 val) {
|
||||
m128 mask = set16x8(0xf);
|
||||
m128 lo = and128(val, mask);
|
||||
m128 hi = and128(rshift2x64(val, 4), mask);
|
||||
return and128(and128(pshufb(maskBase[0*2], lo),
|
||||
pshufb(maskBase[0*2+1], hi)), p_mask);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
m128 prep_conf_teddy_m2(const m128 *maskBase, m128 *old_1, m128 p_mask,
|
||||
m128 val) {
|
||||
m128 mask = set16x8(0xf);
|
||||
m128 lo = and128(val, mask);
|
||||
m128 hi = and128(rshift2x64(val, 4), mask);
|
||||
m128 r = prep_conf_teddy_m1(maskBase, p_mask, val);
|
||||
|
||||
m128 res_1 = and128(pshufb(maskBase[1*2], lo),
|
||||
pshufb(maskBase[1*2+1], hi));
|
||||
m128 res_shifted_1 = palignr(res_1, *old_1, 16-1);
|
||||
*old_1 = res_1;
|
||||
return and128(and128(r, p_mask), res_shifted_1);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
m128 prep_conf_teddy_m3(const m128 *maskBase, m128 *old_1, m128 *old_2,
|
||||
m128 p_mask, m128 val) {
|
||||
m128 mask = set16x8(0xf);
|
||||
m128 lo = and128(val, mask);
|
||||
m128 hi = and128(rshift2x64(val, 4), mask);
|
||||
m128 r = prep_conf_teddy_m2(maskBase, old_1, p_mask, val);
|
||||
|
||||
m128 res_2 = and128(pshufb(maskBase[2*2], lo),
|
||||
pshufb(maskBase[2*2+1], hi));
|
||||
m128 res_shifted_2 = palignr(res_2, *old_2, 16-2);
|
||||
*old_2 = res_2;
|
||||
return and128(r, res_shifted_2);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
m128 prep_conf_teddy_m4(const m128 *maskBase, m128 *old_1, m128 *old_2,
|
||||
m128 *old_3, m128 p_mask, m128 val) {
|
||||
m128 mask = set16x8(0xf);
|
||||
m128 lo = and128(val, mask);
|
||||
m128 hi = and128(rshift2x64(val, 4), mask);
|
||||
m128 r = prep_conf_teddy_m3(maskBase, old_1, old_2, p_mask, val);
|
||||
|
||||
m128 res_3 = and128(pshufb(maskBase[3*2], lo),
|
||||
pshufb(maskBase[3*2+1], hi));
|
||||
m128 res_shifted_3 = palignr(res_3, *old_3, 16-3);
|
||||
*old_3 = res_3;
|
||||
return and128(r, res_shifted_3);
|
||||
}
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a) {
|
||||
const u8 *buf_end = a->buf + a->len;
|
||||
const u8 *ptr = a->buf + a->start_offset;
|
||||
hwlmcb_rv_t controlVal = *a->groups;
|
||||
hwlmcb_rv_t *control = &controlVal;
|
||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||
u32 last_match = (u32)-1;
|
||||
const struct Teddy *teddy = (const struct Teddy *)fdr;
|
||||
const size_t iterBytes = 32;
|
||||
DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n",
|
||||
a->buf, a->len, a->start_offset);
|
||||
|
||||
const m128 *maskBase = getMaskBase(teddy);
|
||||
const u32 *confBase = getConfBase(teddy, 1);
|
||||
|
||||
const u8 *mainStart = ROUNDUP_PTR(ptr, 16);
|
||||
DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart);
|
||||
if (ptr < mainStart) {
|
||||
ptr = mainStart - 16;
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 1);
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, p_mask, val_0);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit1_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
return u.val128;
|
||||
}
|
||||
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
UNUSED static really_inline
|
||||
m256 vectoredLoad2x128(m256 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi,
|
||||
const u8 *buf_history, size_t len_history,
|
||||
const u32 nMasks) {
|
||||
m128 p_mask128;
|
||||
m256 ret = set2x128(vectoredLoad128(&p_mask128, ptr, lo, hi, buf_history, len_history, nMasks));
|
||||
*p_mask = set2x128(p_mask128);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const u8 ALIGN_AVX_DIRECTIVE p_mask_arr256[33][64] = {
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}
|
||||
};
|
||||
|
||||
|
||||
UNUSED static really_inline
|
||||
m256 vectoredLoad256(m256 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi,
|
||||
const u8 *buf_history, size_t len_history) {
|
||||
union {
|
||||
u8 val8[32];
|
||||
m256 val256;
|
||||
} u;
|
||||
|
||||
if (ptr >= lo) {
|
||||
u32 avail = (u32)(hi - ptr);
|
||||
if (avail >= 32) {
|
||||
*p_mask = load256((const void*)(p_mask_arr256[32] + 32));
|
||||
return loadu256(ptr);
|
||||
}
|
||||
*p_mask = load256((const void*)(p_mask_arr256[avail] + 32));
|
||||
for (u32 i = 0; i < avail; i++) {
|
||||
u.val8[i] = ptr[i];
|
||||
}
|
||||
} else {
|
||||
// need contains "how many chars to pull from history"
|
||||
// calculate based on what we need, what we have in the buffer
|
||||
// and only what we need to make primary confirm work
|
||||
u32 start = (u32)(lo - ptr);
|
||||
u32 i;
|
||||
for (i = start; ptr + i < lo; i++) {
|
||||
u.val8[i] = buf_history[len_history - (lo - (ptr + i))];
|
||||
}
|
||||
u32 end = MIN(32, (u32)(hi - ptr));
|
||||
*p_mask = loadu256((const void*)(p_mask_arr256[end - start] + 32 - start));
|
||||
for (; i < end; i++) {
|
||||
u.val8[i] = ptr[i];
|
||||
}
|
||||
if (ptr + 16 < buf_end) {
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit1_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
return u.val256;
|
||||
for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
||||
__builtin_prefetch(ptr + (iterBytes*4));
|
||||
CHECK_FLOOD;
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit1_teddy);
|
||||
m128 r_1 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr + 16));
|
||||
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit1_teddy);
|
||||
}
|
||||
|
||||
for (; ptr < buf_end; ptr += 16) {
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 1);
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, p_mask, val_0);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit1_teddy);
|
||||
}
|
||||
*a->groups = controlVal;
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_msks1_pck(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a) {
|
||||
const u8 *buf_end = a->buf + a->len;
|
||||
const u8 *ptr = a->buf + a->start_offset;
|
||||
hwlmcb_rv_t controlVal = *a->groups;
|
||||
hwlmcb_rv_t *control = &controlVal;
|
||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||
u32 last_match = (u32)-1;
|
||||
const struct Teddy *teddy = (const struct Teddy *)fdr;
|
||||
const size_t iterBytes = 32;
|
||||
DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n",
|
||||
a->buf, a->len, a->start_offset);
|
||||
|
||||
#endif // __AVX2__
|
||||
const m128 *maskBase = getMaskBase(teddy);
|
||||
const u32 *confBase = getConfBase(teddy, 1);
|
||||
|
||||
#define P0(cnd) unlikely(cnd)
|
||||
const u8 *mainStart = ROUNDUP_PTR(ptr, 16);
|
||||
DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart);
|
||||
if (ptr < mainStart) {
|
||||
ptr = mainStart - 16;
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 1);
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, p_mask, val_0);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
#include "fdr.h"
|
||||
#include "fdr_internal.h"
|
||||
#include "flood_runtime.h"
|
||||
if (ptr + 16 < buf_end) {
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
#include "fdr_confirm.h"
|
||||
#include "fdr_confirm_runtime.h"
|
||||
for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
||||
__builtin_prefetch(ptr + (iterBytes*4));
|
||||
CHECK_FLOOD;
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||
m128 r_1 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr + 16));
|
||||
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||
}
|
||||
|
||||
#include "fdr_loadval.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "teddy_internal.h"
|
||||
for (; ptr < buf_end; ptr += 16) {
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 1);
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, p_mask, val_0);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
}
|
||||
*a->groups = controlVal;
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
#include "teddy_autogen.c"
|
||||
hwlm_error_t fdr_exec_teddy_msks2(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a) {
|
||||
const u8 *buf_end = a->buf + a->len;
|
||||
const u8 *ptr = a->buf + a->start_offset;
|
||||
hwlmcb_rv_t controlVal = *a->groups;
|
||||
hwlmcb_rv_t *control = &controlVal;
|
||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||
u32 last_match = (u32)-1;
|
||||
const struct Teddy *teddy = (const struct Teddy *)fdr;
|
||||
const size_t iterBytes = 32;
|
||||
DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n",
|
||||
a->buf, a->len, a->start_offset);
|
||||
|
||||
const m128 *maskBase = getMaskBase(teddy);
|
||||
const u32 *confBase = getConfBase(teddy, 2);
|
||||
|
||||
m128 res_old_1 = ones128();
|
||||
const u8 *mainStart = ROUNDUP_PTR(ptr, 16);
|
||||
DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart);
|
||||
if (ptr < mainStart) {
|
||||
ptr = mainStart - 16;
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 2);
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, p_mask, val_0);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
if (ptr + 16 < buf_end) {
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(),
|
||||
load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
||||
__builtin_prefetch(ptr + (iterBytes*4));
|
||||
CHECK_FLOOD;
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(),
|
||||
load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
||||
m128 r_1 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(),
|
||||
load128(ptr + 16));
|
||||
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
||||
}
|
||||
|
||||
for (; ptr < buf_end; ptr += 16) {
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 2);
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, p_mask, val_0);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
}
|
||||
*a->groups = controlVal;
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_msks2_pck(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a) {
|
||||
const u8 *buf_end = a->buf + a->len;
|
||||
const u8 *ptr = a->buf + a->start_offset;
|
||||
hwlmcb_rv_t controlVal = *a->groups;
|
||||
hwlmcb_rv_t *control = &controlVal;
|
||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||
u32 last_match = (u32)-1;
|
||||
const struct Teddy *teddy = (const struct Teddy *)fdr;
|
||||
const size_t iterBytes = 32;
|
||||
DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n",
|
||||
a->buf, a->len, a->start_offset);
|
||||
|
||||
const m128 *maskBase = getMaskBase(teddy);
|
||||
const u32 *confBase = getConfBase(teddy, 2);
|
||||
|
||||
m128 res_old_1 = ones128();
|
||||
const u8 *mainStart = ROUNDUP_PTR(ptr, 16);
|
||||
DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart);
|
||||
if (ptr < mainStart) {
|
||||
ptr = mainStart - 16;
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 2);
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, p_mask, val_0);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
if (ptr + 16 < buf_end) {
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(),
|
||||
load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
||||
__builtin_prefetch(ptr + (iterBytes*4));
|
||||
CHECK_FLOOD;
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(),
|
||||
load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||
m128 r_1 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(),
|
||||
load128(ptr + 16));
|
||||
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||
}
|
||||
|
||||
for (; ptr < buf_end; ptr += 16) {
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 2);
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, p_mask, val_0);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
}
|
||||
*a->groups = controlVal;
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_msks3(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a) {
|
||||
const u8 *buf_end = a->buf + a->len;
|
||||
const u8 *ptr = a->buf + a->start_offset;
|
||||
hwlmcb_rv_t controlVal = *a->groups;
|
||||
hwlmcb_rv_t *control = &controlVal;
|
||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||
u32 last_match = (u32)-1;
|
||||
const struct Teddy *teddy = (const struct Teddy *)fdr;
|
||||
const size_t iterBytes = 32;
|
||||
DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n",
|
||||
a->buf, a->len, a->start_offset);
|
||||
|
||||
const m128 *maskBase = getMaskBase(teddy);
|
||||
const u32 *confBase = getConfBase(teddy, 3);
|
||||
|
||||
m128 res_old_1 = ones128();
|
||||
m128 res_old_2 = ones128();
|
||||
const u8 *mainStart = ROUNDUP_PTR(ptr, 16);
|
||||
DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart);
|
||||
if (ptr < mainStart) {
|
||||
ptr = mainStart - 16;
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 3);
|
||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
p_mask, val_0);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
if (ptr + 16 < buf_end) {
|
||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
ones128(), load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
||||
__builtin_prefetch(ptr + (iterBytes*4));
|
||||
CHECK_FLOOD;
|
||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
ones128(), load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
||||
m128 r_1 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
ones128(), load128(ptr + 16));
|
||||
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
||||
}
|
||||
|
||||
for (; ptr < buf_end; ptr += 16) {
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 3);
|
||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
p_mask, val_0);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
}
|
||||
*a->groups = controlVal;
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_msks3_pck(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a) {
|
||||
const u8 *buf_end = a->buf + a->len;
|
||||
const u8 *ptr = a->buf + a->start_offset;
|
||||
hwlmcb_rv_t controlVal = *a->groups;
|
||||
hwlmcb_rv_t *control = &controlVal;
|
||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||
u32 last_match = (u32)-1;
|
||||
const struct Teddy *teddy = (const struct Teddy *)fdr;
|
||||
const size_t iterBytes = 32;
|
||||
DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n",
|
||||
a->buf, a->len, a->start_offset);
|
||||
|
||||
const m128 *maskBase = getMaskBase(teddy);
|
||||
const u32 *confBase = getConfBase(teddy, 3);
|
||||
|
||||
m128 res_old_1 = ones128();
|
||||
m128 res_old_2 = ones128();
|
||||
const u8 *mainStart = ROUNDUP_PTR(ptr, 16);
|
||||
DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart);
|
||||
if (ptr < mainStart) {
|
||||
ptr = mainStart - 16;
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 3);
|
||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
p_mask, val_0);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
if (ptr + 16 < buf_end) {
|
||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
ones128(), load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
||||
__builtin_prefetch(ptr + (iterBytes*4));
|
||||
CHECK_FLOOD;
|
||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
ones128(), load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||
m128 r_1 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
ones128(), load128(ptr + 16));
|
||||
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||
}
|
||||
|
||||
for (; ptr < buf_end; ptr += 16) {
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 3);
|
||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
p_mask, val_0);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
}
|
||||
*a->groups = controlVal;
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_msks4(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a) {
|
||||
const u8 *buf_end = a->buf + a->len;
|
||||
const u8 *ptr = a->buf + a->start_offset;
|
||||
hwlmcb_rv_t controlVal = *a->groups;
|
||||
hwlmcb_rv_t *control = &controlVal;
|
||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||
u32 last_match = (u32)-1;
|
||||
const struct Teddy *teddy = (const struct Teddy *)fdr;
|
||||
const size_t iterBytes = 32;
|
||||
DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n",
|
||||
a->buf, a->len, a->start_offset);
|
||||
|
||||
const m128 *maskBase = getMaskBase(teddy);
|
||||
const u32 *confBase = getConfBase(teddy, 4);
|
||||
|
||||
m128 res_old_1 = ones128();
|
||||
m128 res_old_2 = ones128();
|
||||
m128 res_old_3 = ones128();
|
||||
const u8 *mainStart = ROUNDUP_PTR(ptr, 16);
|
||||
DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart);
|
||||
if (ptr < mainStart) {
|
||||
ptr = mainStart - 16;
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 4);
|
||||
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, p_mask, val_0);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
if (ptr + 16 < buf_end) {
|
||||
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, ones128(), load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
||||
__builtin_prefetch(ptr + (iterBytes*4));
|
||||
CHECK_FLOOD;
|
||||
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, ones128(), load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
||||
m128 r_1 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, ones128(), load128(ptr + 16));
|
||||
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
||||
}
|
||||
|
||||
for (; ptr < buf_end; ptr += 16) {
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 4);
|
||||
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, p_mask, val_0);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
}
|
||||
*a->groups = controlVal;
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a) {
|
||||
const u8 *buf_end = a->buf + a->len;
|
||||
const u8 *ptr = a->buf + a->start_offset;
|
||||
hwlmcb_rv_t controlVal = *a->groups;
|
||||
hwlmcb_rv_t *control = &controlVal;
|
||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||
u32 last_match = (u32)-1;
|
||||
const struct Teddy *teddy = (const struct Teddy *)fdr;
|
||||
const size_t iterBytes = 32;
|
||||
DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n",
|
||||
a->buf, a->len, a->start_offset);
|
||||
|
||||
const m128 *maskBase = getMaskBase(teddy);
|
||||
const u32 *confBase = getConfBase(teddy, 4);
|
||||
|
||||
m128 res_old_1 = ones128();
|
||||
m128 res_old_2 = ones128();
|
||||
m128 res_old_3 = ones128();
|
||||
const u8 *mainStart = ROUNDUP_PTR(ptr, 16);
|
||||
DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart);
|
||||
if (ptr < mainStart) {
|
||||
ptr = mainStart - 16;
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 4);
|
||||
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, p_mask, val_0);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
if (ptr + 16 < buf_end) {
|
||||
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, ones128(), load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
||||
__builtin_prefetch(ptr + (iterBytes*4));
|
||||
CHECK_FLOOD;
|
||||
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, ones128(), load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||
m128 r_1 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, ones128(), load128(ptr + 16));
|
||||
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||
}
|
||||
|
||||
for (; ptr < buf_end; ptr += 16) {
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 4);
|
||||
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, p_mask, val_0);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
}
|
||||
*a->groups = controlVal;
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user