fdr/teddy: simplify computing of confirm base

This commit is contained in:
Justin Viiret 2017-05-24 11:10:39 +10:00 committed by Matthew Barr
parent 06bafae81d
commit b126cbf556
8 changed files with 24 additions and 28 deletions

View File

@ -734,8 +734,7 @@ hwlm_error_t fdr_engine_exec(const struct FDR *fdr,
const u64a *ft = const u64a *ft =
(const u64a *)((const u8 *)fdr + ROUNDUP_CL(sizeof(struct FDR))); (const u64a *)((const u8 *)fdr + ROUNDUP_CL(sizeof(struct FDR)));
assert(ISALIGNED_CL(ft)); assert(ISALIGNED_CL(ft));
const u32 *confBase = const u32 *confBase = (const u32 *)((const u8 *)fdr + fdr->confOffset);
(const u32 *)((const u8 *)ft + ROUNDUP_CL(fdr->tabSize));
assert(ISALIGNED_CL(confBase)); assert(ISALIGNED_CL(confBase));
struct zone zones[ZONE_MAX]; struct zone zones[ZONE_MAX];
assert(fdr->domain > 8 && fdr->domain < 16); assert(fdr->domain > 8 && fdr->domain < 16);

View File

@ -190,6 +190,7 @@ bytecode_ptr<FDR> FDRCompiler::setupFDR() {
// Write confirm structures. // Write confirm structures.
assert(ISALIGNED_CL(ptr)); assert(ISALIGNED_CL(ptr));
fdr->confOffset = verify_u32(ptr - fdr_base);
memcpy(ptr, confirmTable.get(), confirmTable.size()); memcpy(ptr, confirmTable.get(), confirmTable.size());
ptr += ROUNDUP_CL(confirmTable.size()); ptr += ROUNDUP_CL(confirmTable.size());

View File

@ -69,6 +69,7 @@ struct FDR {
u32 engineID; u32 engineID;
u32 size; u32 size;
u32 maxStringLen; u32 maxStringLen;
u32 confOffset;
u32 floodOffset; u32 floodOffset;
u8 stride; /* stride - how frequeuntly the data is consulted by the first u8 stride; /* stride - how frequeuntly the data is consulted by the first

View File

@ -191,7 +191,7 @@ hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr,
a->buf, a->len, a->start_offset); a->buf, a->len, a->start_offset);
const m128 *maskBase = getMaskBase(teddy); const m128 *maskBase = getMaskBase(teddy);
const u32 *confBase = getConfBase(teddy, 1); const u32 *confBase = getConfBase(teddy);
const u8 *mainStart = ROUNDUP_PTR(ptr, 16); const u8 *mainStart = ROUNDUP_PTR(ptr, 16);
DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart);
@ -247,7 +247,7 @@ hwlm_error_t fdr_exec_teddy_msks1_pck(const struct FDR *fdr,
a->buf, a->len, a->start_offset); a->buf, a->len, a->start_offset);
const m128 *maskBase = getMaskBase(teddy); const m128 *maskBase = getMaskBase(teddy);
const u32 *confBase = getConfBase(teddy, 1); const u32 *confBase = getConfBase(teddy);
const u8 *mainStart = ROUNDUP_PTR(ptr, 16); const u8 *mainStart = ROUNDUP_PTR(ptr, 16);
DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart);
@ -303,7 +303,7 @@ hwlm_error_t fdr_exec_teddy_msks2(const struct FDR *fdr,
a->buf, a->len, a->start_offset); a->buf, a->len, a->start_offset);
const m128 *maskBase = getMaskBase(teddy); const m128 *maskBase = getMaskBase(teddy);
const u32 *confBase = getConfBase(teddy, 2); const u32 *confBase = getConfBase(teddy);
m128 res_old_1 = ones128(); m128 res_old_1 = ones128();
const u8 *mainStart = ROUNDUP_PTR(ptr, 16); const u8 *mainStart = ROUNDUP_PTR(ptr, 16);
@ -360,7 +360,7 @@ hwlm_error_t fdr_exec_teddy_msks2_pck(const struct FDR *fdr,
a->buf, a->len, a->start_offset); a->buf, a->len, a->start_offset);
const m128 *maskBase = getMaskBase(teddy); const m128 *maskBase = getMaskBase(teddy);
const u32 *confBase = getConfBase(teddy, 2); const u32 *confBase = getConfBase(teddy);
m128 res_old_1 = ones128(); m128 res_old_1 = ones128();
const u8 *mainStart = ROUNDUP_PTR(ptr, 16); const u8 *mainStart = ROUNDUP_PTR(ptr, 16);
@ -417,7 +417,7 @@ hwlm_error_t fdr_exec_teddy_msks3(const struct FDR *fdr,
a->buf, a->len, a->start_offset); a->buf, a->len, a->start_offset);
const m128 *maskBase = getMaskBase(teddy); const m128 *maskBase = getMaskBase(teddy);
const u32 *confBase = getConfBase(teddy, 3); const u32 *confBase = getConfBase(teddy);
m128 res_old_1 = ones128(); m128 res_old_1 = ones128();
m128 res_old_2 = ones128(); m128 res_old_2 = ones128();
@ -479,7 +479,7 @@ hwlm_error_t fdr_exec_teddy_msks3_pck(const struct FDR *fdr,
a->buf, a->len, a->start_offset); a->buf, a->len, a->start_offset);
const m128 *maskBase = getMaskBase(teddy); const m128 *maskBase = getMaskBase(teddy);
const u32 *confBase = getConfBase(teddy, 3); const u32 *confBase = getConfBase(teddy);
m128 res_old_1 = ones128(); m128 res_old_1 = ones128();
m128 res_old_2 = ones128(); m128 res_old_2 = ones128();
@ -541,7 +541,7 @@ hwlm_error_t fdr_exec_teddy_msks4(const struct FDR *fdr,
a->buf, a->len, a->start_offset); a->buf, a->len, a->start_offset);
const m128 *maskBase = getMaskBase(teddy); const m128 *maskBase = getMaskBase(teddy);
const u32 *confBase = getConfBase(teddy, 4); const u32 *confBase = getConfBase(teddy);
m128 res_old_1 = ones128(); m128 res_old_1 = ones128();
m128 res_old_2 = ones128(); m128 res_old_2 = ones128();
@ -605,7 +605,7 @@ hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr,
a->buf, a->len, a->start_offset); a->buf, a->len, a->start_offset);
const m128 *maskBase = getMaskBase(teddy); const m128 *maskBase = getMaskBase(teddy);
const u32 *confBase = getConfBase(teddy, 4); const u32 *confBase = getConfBase(teddy);
m128 res_old_1 = ones128(); m128 res_old_1 = ones128();
m128 res_old_2 = ones128(); m128 res_old_2 = ones128();

View File

@ -200,12 +200,6 @@ const m256 *getMaskBase_avx2(const struct Teddy *teddy) {
return (const m256 *)((const u8 *)teddy + ROUNDUP_CL(sizeof(struct Teddy))); return (const m256 *)((const u8 *)teddy + ROUNDUP_CL(sizeof(struct Teddy)));
} }
static really_inline
const u32 *getConfBase_avx2(const struct Teddy *teddy, u8 numMask) {
return (const u32 *)((const u8 *)teddy + ROUNDUP_CL(sizeof(struct Teddy)) +
ROUNDUP_CL((numMask * 32 * 2)));
}
hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr, hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr,
const struct FDR_Runtime_Args *a, const struct FDR_Runtime_Args *a,
hwlm_group_t control) { hwlm_group_t control) {
@ -220,7 +214,7 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr,
a->buf, a->len, a->start_offset); a->buf, a->len, a->start_offset);
const m256 *maskBase = getMaskBase_avx2(teddy); const m256 *maskBase = getMaskBase_avx2(teddy);
const u32 *confBase = getConfBase_avx2(teddy, 1); const u32 *confBase = getConfBase(teddy);
const u8 *mainStart = ROUNDUP_PTR(ptr, 16); const u8 *mainStart = ROUNDUP_PTR(ptr, 16);
DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart);
@ -276,7 +270,7 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fat(const struct FDR *fdr,
a->buf, a->len, a->start_offset); a->buf, a->len, a->start_offset);
const m256 *maskBase = getMaskBase_avx2(teddy); const m256 *maskBase = getMaskBase_avx2(teddy);
const u32 *confBase = getConfBase_avx2(teddy, 1); const u32 *confBase = getConfBase(teddy);
const u8 *mainStart = ROUNDUP_PTR(ptr, 16); const u8 *mainStart = ROUNDUP_PTR(ptr, 16);
DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart);
@ -332,7 +326,7 @@ hwlm_error_t fdr_exec_teddy_avx2_msks2_fat(const struct FDR *fdr,
a->buf, a->len, a->start_offset); a->buf, a->len, a->start_offset);
const m256 *maskBase = getMaskBase_avx2(teddy); const m256 *maskBase = getMaskBase_avx2(teddy);
const u32 *confBase = getConfBase_avx2(teddy, 2); const u32 *confBase = getConfBase(teddy);
m256 res_old_1 = ones256(); m256 res_old_1 = ones256();
const u8 *mainStart = ROUNDUP_PTR(ptr, 16); const u8 *mainStart = ROUNDUP_PTR(ptr, 16);
@ -390,7 +384,7 @@ hwlm_error_t fdr_exec_teddy_avx2_msks2_pck_fat(const struct FDR *fdr,
a->buf, a->len, a->start_offset); a->buf, a->len, a->start_offset);
const m256 *maskBase = getMaskBase_avx2(teddy); const m256 *maskBase = getMaskBase_avx2(teddy);
const u32 *confBase = getConfBase_avx2(teddy, 2); const u32 *confBase = getConfBase(teddy);
m256 res_old_1 = ones256(); m256 res_old_1 = ones256();
const u8 *mainStart = ROUNDUP_PTR(ptr, 16); const u8 *mainStart = ROUNDUP_PTR(ptr, 16);
@ -448,7 +442,7 @@ hwlm_error_t fdr_exec_teddy_avx2_msks3_fat(const struct FDR *fdr,
a->buf, a->len, a->start_offset); a->buf, a->len, a->start_offset);
const m256 *maskBase = getMaskBase_avx2(teddy); const m256 *maskBase = getMaskBase_avx2(teddy);
const u32 *confBase = getConfBase_avx2(teddy, 3); const u32 *confBase = getConfBase(teddy);
m256 res_old_1 = ones256(); m256 res_old_1 = ones256();
m256 res_old_2 = ones256(); m256 res_old_2 = ones256();
@ -511,7 +505,7 @@ hwlm_error_t fdr_exec_teddy_avx2_msks3_pck_fat(const struct FDR *fdr,
a->buf, a->len, a->start_offset); a->buf, a->len, a->start_offset);
const m256 *maskBase = getMaskBase_avx2(teddy); const m256 *maskBase = getMaskBase_avx2(teddy);
const u32 *confBase = getConfBase_avx2(teddy, 3); const u32 *confBase = getConfBase(teddy);
m256 res_old_1 = ones256(); m256 res_old_1 = ones256();
m256 res_old_2 = ones256(); m256 res_old_2 = ones256();
@ -574,7 +568,7 @@ hwlm_error_t fdr_exec_teddy_avx2_msks4_fat(const struct FDR *fdr,
a->buf, a->len, a->start_offset); a->buf, a->len, a->start_offset);
const m256 *maskBase = getMaskBase_avx2(teddy); const m256 *maskBase = getMaskBase_avx2(teddy);
const u32 *confBase = getConfBase_avx2(teddy, 4); const u32 *confBase = getConfBase(teddy);
m256 res_old_1 = ones256(); m256 res_old_1 = ones256();
m256 res_old_2 = ones256(); m256 res_old_2 = ones256();
@ -638,7 +632,7 @@ hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr,
a->buf, a->len, a->start_offset); a->buf, a->len, a->start_offset);
const m256 *maskBase = getMaskBase_avx2(teddy); const m256 *maskBase = getMaskBase_avx2(teddy);
const u32 *confBase = getConfBase_avx2(teddy, 4); const u32 *confBase = getConfBase(teddy);
m256 res_old_1 = ones256(); m256 res_old_1 = ones256();
m256 res_old_2 = ones256(); m256 res_old_2 = ones256();

View File

@ -335,6 +335,7 @@ bytecode_ptr<FDR> TeddyCompiler::build() {
// Write confirm structures. // Write confirm structures.
u8 *ptr = teddy_base + headerSize + maskLen; u8 *ptr = teddy_base + headerSize + maskLen;
assert(ISALIGNED_CL(ptr)); assert(ISALIGNED_CL(ptr));
teddy->confOffset = verify_u32(ptr - teddy_base);
memcpy(ptr, confirmTable.get(), confirmTable.size()); memcpy(ptr, confirmTable.get(), confirmTable.size());
ptr += ROUNDUP_CL(confirmTable.size()); ptr += ROUNDUP_CL(confirmTable.size());

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -36,6 +36,7 @@ struct Teddy {
u32 engineID; u32 engineID;
u32 size; u32 size;
u32 maxStringLen; u32 maxStringLen;
u32 confOffset;
u32 floodOffset; u32 floodOffset;
u32 link; u32 link;
u32 pad1; u32 pad1;

View File

@ -244,9 +244,8 @@ const m128 *getMaskBase(const struct Teddy *teddy) {
} }
static really_inline static really_inline
const u32 *getConfBase(const struct Teddy *teddy, u8 numMask) { const u32 *getConfBase(const struct Teddy *teddy) {
return (const u32 *)((const u8 *)teddy + ROUNDUP_CL(sizeof(struct Teddy)) + return (const u32 *)((const u8 *)teddy + teddy->confOffset);
ROUNDUP_CL(numMask * 32));
} }
#endif /* TEDDY_RUNTIME_COMMON_H_ */ #endif /* TEDDY_RUNTIME_COMMON_H_ */