From 4f32a167d53f7758c2b2a1befb06d7f504f161e0 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 23 May 2017 14:40:04 +1000 Subject: [PATCH] teddy: align major structures to cachelines --- src/fdr/teddy_avx2.c | 10 +++++----- src/fdr/teddy_compile.cpp | 32 ++++++++++++++++++-------------- src/fdr/teddy_runtime_common.h | 10 +++++----- 3 files changed, 28 insertions(+), 24 deletions(-) diff --git a/src/fdr/teddy_avx2.c b/src/fdr/teddy_avx2.c index 299825cc..38ac3f72 100644 --- a/src/fdr/teddy_avx2.c +++ b/src/fdr/teddy_avx2.c @@ -196,14 +196,14 @@ m256 prep_conf_fat_teddy_m4(const m256 *maskBase, m256 *old_1, m256 *old_2, } static really_inline -const m256 * getMaskBase_avx2(const struct Teddy *teddy) { - return (const m256 *)((const u8 *)teddy + sizeof(struct Teddy)); +const m256 *getMaskBase_avx2(const struct Teddy *teddy) { + return (const m256 *)((const u8 *)teddy + ROUNDUP_CL(sizeof(struct Teddy))); } static really_inline -const u32 * getConfBase_avx2(const struct Teddy *teddy, u8 numMask) { - return (const u32 *)((const u8 *)teddy + sizeof(struct Teddy) + - (numMask*32*2)); +const u32 *getConfBase_avx2(const struct Teddy *teddy, u8 numMask) { + return (const u32 *)((const u8 *)teddy + ROUNDUP_CL(sizeof(struct Teddy)) + + ROUNDUP_CL((numMask * 32 * 2))); } hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr, diff --git a/src/fdr/teddy_compile.cpp b/src/fdr/teddy_compile.cpp index 6f956e8c..19e595fb 100644 --- a/src/fdr/teddy_compile.cpp +++ b/src/fdr/teddy_compile.cpp @@ -313,35 +313,39 @@ bytecode_ptr TeddyCompiler::build() { } u32 maskWidth = eng.getNumBuckets() / 8; - size_t maskLen = eng.numMasks * 16 * 2 * maskWidth; + size_t headerSize = ROUNDUP_CL(sizeof(Teddy)); + size_t maskLen = ROUNDUP_CL(eng.numMasks * 16 * 2 * maskWidth); - auto floodControlTmp = setupFDRFloodControl(lits, eng, grey); - auto confirmTmp = setupFullConfs(lits, eng, bucketToLits, make_small); + auto floodTable = setupFDRFloodControl(lits, eng, grey); + auto confirmTable = setupFullConfs(lits, eng, bucketToLits, make_small); - size_t size = ROUNDUP_N(sizeof(Teddy) + - maskLen + - confirmTmp.size() + - floodControlTmp.size(), - 16 * maskWidth); + size_t size = headerSize + maskLen + ROUNDUP_CL(confirmTable.size()) + + floodTable.size(); auto fdr = make_zeroed_bytecode_ptr(size, 64); assert(fdr); // otherwise would have thrown std::bad_alloc Teddy *teddy = (Teddy *)fdr.get(); // ugly u8 *teddy_base = (u8 *)teddy; + // Write header. teddy->size = size; teddy->engineID = eng.getID(); teddy->maxStringLen = verify_u32(maxLen(lits)); - u8 *ptr = teddy_base + sizeof(Teddy) + maskLen; - memcpy(ptr, confirmTmp.get(), confirmTmp.size()); - ptr += confirmTmp.size(); + // Write confirm structures. + u8 *ptr = teddy_base + headerSize + maskLen; + assert(ISALIGNED_CL(ptr)); + memcpy(ptr, confirmTable.get(), confirmTable.size()); + ptr += ROUNDUP_CL(confirmTable.size()); + // Write flood control structures. + assert(ISALIGNED_CL(ptr)); teddy->floodOffset = verify_u32(ptr - teddy_base); - memcpy(ptr, floodControlTmp.get(), floodControlTmp.size()); - ptr += floodControlTmp.size(); + memcpy(ptr, floodTable.get(), floodTable.size()); + ptr += floodTable.size(); - u8 *baseMsk = teddy_base + sizeof(Teddy); + // Write teddy masks. + u8 *baseMsk = teddy_base + headerSize; for (const auto &b2l : bucketToLits) { const u32 &bucket_id = b2l.first; diff --git a/src/fdr/teddy_runtime_common.h b/src/fdr/teddy_runtime_common.h index c5f0885f..883a68fc 100644 --- a/src/fdr/teddy_runtime_common.h +++ b/src/fdr/teddy_runtime_common.h @@ -239,14 +239,14 @@ void do_confWithBitMany_teddy(TEDDY_CONF_TYPE *conf, u8 bucket, u8 offset, } static really_inline -const m128 * getMaskBase(const struct Teddy *teddy) { - return (const m128 *)((const u8 *)teddy + sizeof(struct Teddy)); +const m128 *getMaskBase(const struct Teddy *teddy) { + return (const m128 *)((const u8 *)teddy + ROUNDUP_CL(sizeof(struct Teddy))); } static really_inline -const u32 * getConfBase(const struct Teddy *teddy, u8 numMask) { - return (const u32 *)((const u8 *)teddy + sizeof(struct Teddy) + - (numMask*32)); +const u32 *getConfBase(const struct Teddy *teddy, u8 numMask) { + return (const u32 *)((const u8 *)teddy + ROUNDUP_CL(sizeof(struct Teddy)) + + ROUNDUP_CL(numMask * 32)); } #endif /* TEDDY_RUNTIME_COMMON_H_ */