teddy: align major structures to cachelines

This commit is contained in:
Justin Viiret 2017-05-23 14:40:04 +10:00 committed by Matthew Barr
parent 9bdd370163
commit 4f32a167d5
3 changed files with 28 additions and 24 deletions

View File

@ -197,13 +197,13 @@ m256 prep_conf_fat_teddy_m4(const m256 *maskBase, m256 *old_1, m256 *old_2,
static really_inline
const m256 *getMaskBase_avx2(const struct Teddy *teddy) {
return (const m256 *)((const u8 *)teddy + sizeof(struct Teddy));
return (const m256 *)((const u8 *)teddy + ROUNDUP_CL(sizeof(struct Teddy)));
}
static really_inline
const u32 *getConfBase_avx2(const struct Teddy *teddy, u8 numMask) {
return (const u32 *)((const u8 *)teddy + sizeof(struct Teddy) +
(numMask*32*2));
return (const u32 *)((const u8 *)teddy + ROUNDUP_CL(sizeof(struct Teddy)) +
ROUNDUP_CL((numMask * 32 * 2)));
}
hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr,

View File

@ -313,35 +313,39 @@ bytecode_ptr<FDR> TeddyCompiler::build() {
}
u32 maskWidth = eng.getNumBuckets() / 8;
size_t maskLen = eng.numMasks * 16 * 2 * maskWidth;
size_t headerSize = ROUNDUP_CL(sizeof(Teddy));
size_t maskLen = ROUNDUP_CL(eng.numMasks * 16 * 2 * maskWidth);
auto floodControlTmp = setupFDRFloodControl(lits, eng, grey);
auto confirmTmp = setupFullConfs(lits, eng, bucketToLits, make_small);
auto floodTable = setupFDRFloodControl(lits, eng, grey);
auto confirmTable = setupFullConfs(lits, eng, bucketToLits, make_small);
size_t size = ROUNDUP_N(sizeof(Teddy) +
maskLen +
confirmTmp.size() +
floodControlTmp.size(),
16 * maskWidth);
size_t size = headerSize + maskLen + ROUNDUP_CL(confirmTable.size()) +
floodTable.size();
auto fdr = make_zeroed_bytecode_ptr<FDR>(size, 64);
assert(fdr); // otherwise would have thrown std::bad_alloc
Teddy *teddy = (Teddy *)fdr.get(); // ugly
u8 *teddy_base = (u8 *)teddy;
// Write header.
teddy->size = size;
teddy->engineID = eng.getID();
teddy->maxStringLen = verify_u32(maxLen(lits));
u8 *ptr = teddy_base + sizeof(Teddy) + maskLen;
memcpy(ptr, confirmTmp.get(), confirmTmp.size());
ptr += confirmTmp.size();
// Write confirm structures.
u8 *ptr = teddy_base + headerSize + maskLen;
assert(ISALIGNED_CL(ptr));
memcpy(ptr, confirmTable.get(), confirmTable.size());
ptr += ROUNDUP_CL(confirmTable.size());
// Write flood control structures.
assert(ISALIGNED_CL(ptr));
teddy->floodOffset = verify_u32(ptr - teddy_base);
memcpy(ptr, floodControlTmp.get(), floodControlTmp.size());
ptr += floodControlTmp.size();
memcpy(ptr, floodTable.get(), floodTable.size());
ptr += floodTable.size();
u8 *baseMsk = teddy_base + sizeof(Teddy);
// Write teddy masks.
u8 *baseMsk = teddy_base + headerSize;
for (const auto &b2l : bucketToLits) {
const u32 &bucket_id = b2l.first;

View File

@ -240,13 +240,13 @@ void do_confWithBitMany_teddy(TEDDY_CONF_TYPE *conf, u8 bucket, u8 offset,
static really_inline
const m128 *getMaskBase(const struct Teddy *teddy) {
return (const m128 *)((const u8 *)teddy + sizeof(struct Teddy));
return (const m128 *)((const u8 *)teddy + ROUNDUP_CL(sizeof(struct Teddy)));
}
static really_inline
const u32 *getConfBase(const struct Teddy *teddy, u8 numMask) {
return (const u32 *)((const u8 *)teddy + sizeof(struct Teddy) +
(numMask*32));
return (const u32 *)((const u8 *)teddy + ROUNDUP_CL(sizeof(struct Teddy)) +
ROUNDUP_CL(numMask * 32));
}
#endif /* TEDDY_RUNTIME_COMMON_H_ */