teddy: align major structures to cachelines

This commit is contained in:
Justin Viiret 2017-05-23 14:40:04 +10:00 committed by Matthew Barr
parent 9bdd370163
commit 4f32a167d5
3 changed files with 28 additions and 24 deletions

View File

@ -196,14 +196,14 @@ m256 prep_conf_fat_teddy_m4(const m256 *maskBase, m256 *old_1, m256 *old_2,
} }
static really_inline static really_inline
const m256 * getMaskBase_avx2(const struct Teddy *teddy) { const m256 *getMaskBase_avx2(const struct Teddy *teddy) {
return (const m256 *)((const u8 *)teddy + sizeof(struct Teddy)); return (const m256 *)((const u8 *)teddy + ROUNDUP_CL(sizeof(struct Teddy)));
} }
static really_inline static really_inline
const u32 * getConfBase_avx2(const struct Teddy *teddy, u8 numMask) { const u32 *getConfBase_avx2(const struct Teddy *teddy, u8 numMask) {
return (const u32 *)((const u8 *)teddy + sizeof(struct Teddy) + return (const u32 *)((const u8 *)teddy + ROUNDUP_CL(sizeof(struct Teddy)) +
(numMask*32*2)); ROUNDUP_CL((numMask * 32 * 2)));
} }
hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr, hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr,

View File

@ -313,35 +313,39 @@ bytecode_ptr<FDR> TeddyCompiler::build() {
} }
u32 maskWidth = eng.getNumBuckets() / 8; u32 maskWidth = eng.getNumBuckets() / 8;
size_t maskLen = eng.numMasks * 16 * 2 * maskWidth; size_t headerSize = ROUNDUP_CL(sizeof(Teddy));
size_t maskLen = ROUNDUP_CL(eng.numMasks * 16 * 2 * maskWidth);
auto floodControlTmp = setupFDRFloodControl(lits, eng, grey); auto floodTable = setupFDRFloodControl(lits, eng, grey);
auto confirmTmp = setupFullConfs(lits, eng, bucketToLits, make_small); auto confirmTable = setupFullConfs(lits, eng, bucketToLits, make_small);
size_t size = ROUNDUP_N(sizeof(Teddy) + size_t size = headerSize + maskLen + ROUNDUP_CL(confirmTable.size()) +
maskLen + floodTable.size();
confirmTmp.size() +
floodControlTmp.size(),
16 * maskWidth);
auto fdr = make_zeroed_bytecode_ptr<FDR>(size, 64); auto fdr = make_zeroed_bytecode_ptr<FDR>(size, 64);
assert(fdr); // otherwise would have thrown std::bad_alloc assert(fdr); // otherwise would have thrown std::bad_alloc
Teddy *teddy = (Teddy *)fdr.get(); // ugly Teddy *teddy = (Teddy *)fdr.get(); // ugly
u8 *teddy_base = (u8 *)teddy; u8 *teddy_base = (u8 *)teddy;
// Write header.
teddy->size = size; teddy->size = size;
teddy->engineID = eng.getID(); teddy->engineID = eng.getID();
teddy->maxStringLen = verify_u32(maxLen(lits)); teddy->maxStringLen = verify_u32(maxLen(lits));
u8 *ptr = teddy_base + sizeof(Teddy) + maskLen; // Write confirm structures.
memcpy(ptr, confirmTmp.get(), confirmTmp.size()); u8 *ptr = teddy_base + headerSize + maskLen;
ptr += confirmTmp.size(); assert(ISALIGNED_CL(ptr));
memcpy(ptr, confirmTable.get(), confirmTable.size());
ptr += ROUNDUP_CL(confirmTable.size());
// Write flood control structures.
assert(ISALIGNED_CL(ptr));
teddy->floodOffset = verify_u32(ptr - teddy_base); teddy->floodOffset = verify_u32(ptr - teddy_base);
memcpy(ptr, floodControlTmp.get(), floodControlTmp.size()); memcpy(ptr, floodTable.get(), floodTable.size());
ptr += floodControlTmp.size(); ptr += floodTable.size();
u8 *baseMsk = teddy_base + sizeof(Teddy); // Write teddy masks.
u8 *baseMsk = teddy_base + headerSize;
for (const auto &b2l : bucketToLits) { for (const auto &b2l : bucketToLits) {
const u32 &bucket_id = b2l.first; const u32 &bucket_id = b2l.first;

View File

@ -239,14 +239,14 @@ void do_confWithBitMany_teddy(TEDDY_CONF_TYPE *conf, u8 bucket, u8 offset,
} }
static really_inline static really_inline
const m128 * getMaskBase(const struct Teddy *teddy) { const m128 *getMaskBase(const struct Teddy *teddy) {
return (const m128 *)((const u8 *)teddy + sizeof(struct Teddy)); return (const m128 *)((const u8 *)teddy + ROUNDUP_CL(sizeof(struct Teddy)));
} }
static really_inline static really_inline
const u32 * getConfBase(const struct Teddy *teddy, u8 numMask) { const u32 *getConfBase(const struct Teddy *teddy, u8 numMask) {
return (const u32 *)((const u8 *)teddy + sizeof(struct Teddy) + return (const u32 *)((const u8 *)teddy + ROUNDUP_CL(sizeof(struct Teddy)) +
(numMask*32)); ROUNDUP_CL(numMask * 32));
} }
#endif /* TEDDY_RUNTIME_COMMON_H_ */ #endif /* TEDDY_RUNTIME_COMMON_H_ */