fdr: align major structures to cachelines

This commit is contained in:
Justin Viiret 2017-05-23 14:28:12 +10:00 committed by Matthew Barr
parent c36c071564
commit 9bdd370163
3 changed files with 43 additions and 29 deletions

View File

@ -725,13 +725,18 @@ static never_inline
hwlm_error_t fdr_engine_exec(const struct FDR *fdr, hwlm_error_t fdr_engine_exec(const struct FDR *fdr,
const struct FDR_Runtime_Args *a, const struct FDR_Runtime_Args *a,
hwlm_group_t control) { hwlm_group_t control) {
assert(ISALIGNED_CL(fdr));
u32 floodBackoff = FLOOD_BACKOFF_START; u32 floodBackoff = FLOOD_BACKOFF_START;
u32 last_match_id = INVALID_MATCH_ID; u32 last_match_id = INVALID_MATCH_ID;
u32 domain_mask_flipped = ~fdr->domainMask; u32 domain_mask_flipped = ~fdr->domainMask;
u8 stride = fdr->stride; u8 stride = fdr->stride;
const u64a *ft = const u64a *ft =
(const u64a *)((const u8 *)fdr + ROUNDUP_16(sizeof(struct FDR))); (const u64a *)((const u8 *)fdr + ROUNDUP_CL(sizeof(struct FDR)));
const u32 *confBase = (const u32 *)((const u8 *)ft + fdr->tabSize); assert(ISALIGNED_CL(ft));
const u32 *confBase =
(const u32 *)((const u8 *)ft + ROUNDUP_CL(fdr->tabSize));
assert(ISALIGNED_CL(confBase));
struct zone zones[ZONE_MAX]; struct zone zones[ZONE_MAX];
assert(fdr->domain > 8 && fdr->domain < 16); assert(fdr->domain > 8 && fdr->domain < 16);

View File

@ -144,50 +144,59 @@ void FDRCompiler::createInitialState(FDR *fdr) {
} }
} }
/**
* \brief Lay out FDR structures in bytecode.
*
* Note that each major structure (header, table, confirm, flood control) is
* cacheline-aligned.
*/
bytecode_ptr<FDR> FDRCompiler::setupFDR() { bytecode_ptr<FDR> FDRCompiler::setupFDR() {
size_t tabSize = eng.getTabSizeBytes(); size_t tabSize = ROUNDUP_CL(eng.getTabSizeBytes());
auto floodControlTmp = setupFDRFloodControl(lits, eng, grey); auto floodTable = setupFDRFloodControl(lits, eng, grey);
auto confirmTmp = setupFullConfs(lits, eng, bucketToLits, make_small); auto confirmTable = setupFullConfs(lits, eng, bucketToLits, make_small);
assert(ISALIGNED_16(tabSize)); size_t headerSize = ROUNDUP_CL(sizeof(FDR));
assert(ISALIGNED_16(confirmTmp.size())); size_t size = headerSize + tabSize + ROUNDUP_CL(confirmTable.size()) +
assert(ISALIGNED_16(floodControlTmp.size())); floodTable.size();
size_t headerSize = ROUNDUP_16(sizeof(FDR));
size_t size = ROUNDUP_16(headerSize + tabSize + confirmTmp.size() +
floodControlTmp.size());
DEBUG_PRINTF("sizes base=%zu tabSize=%zu confirm=%zu floodControl=%zu " DEBUG_PRINTF("sizes base=%zu tabSize=%zu confirm=%zu floodControl=%zu "
"total=%zu\n", "total=%zu\n",
headerSize, tabSize, confirmTmp.size(), floodControlTmp.size(), headerSize, tabSize, confirmTable.size(), floodTable.size(),
size); size);
auto fdr = make_zeroed_bytecode_ptr<FDR>(size, 64); auto fdr = make_zeroed_bytecode_ptr<FDR>(size, 64);
assert(fdr); // otherwise would have thrown std::bad_alloc assert(fdr); // otherwise would have thrown std::bad_alloc
u8 *fdr_base = (u8 *)fdr.get();
// Write header.
fdr->size = size; fdr->size = size;
fdr->engineID = eng.getID(); fdr->engineID = eng.getID();
fdr->maxStringLen = verify_u32(maxLen(lits)); fdr->maxStringLen = verify_u32(maxLen(lits));
createInitialState(fdr.get()); assert(eng.bits > 8 && eng.bits < 16); // we allow domains 9 to 15 only
u8 *fdr_base = (u8 *)fdr.get();
u8 *ptr = fdr_base + ROUNDUP_16(sizeof(FDR));
copy(tab.begin(), tab.end(), ptr);
ptr += tabSize;
memcpy(ptr, confirmTmp.get(), confirmTmp.size());
ptr += confirmTmp.size();
fdr->floodOffset = verify_u32(ptr - fdr_base);
memcpy(ptr, floodControlTmp.get(), floodControlTmp.size());
ptr += floodControlTmp.size();
/* we are allowing domains 9 to 15 only */
assert(eng.bits > 8 && eng.bits < 16);
fdr->domain = eng.bits; fdr->domain = eng.bits;
fdr->domainMask = (1 << eng.bits) - 1; fdr->domainMask = (1 << eng.bits) - 1;
fdr->tabSize = (1 << eng.bits) * (eng.schemeWidth / 8); fdr->tabSize = (1 << eng.bits) * (eng.schemeWidth / 8);
fdr->stride = eng.stride; fdr->stride = eng.stride;
createInitialState(fdr.get());
// Write table.
u8 *ptr = fdr_base + ROUNDUP_CL(sizeof(FDR));
assert(ISALIGNED_CL(ptr));
copy(tab.begin(), tab.end(), ptr);
ptr += tabSize;
// Write confirm structures.
assert(ISALIGNED_CL(ptr));
memcpy(ptr, confirmTable.get(), confirmTable.size());
ptr += ROUNDUP_CL(confirmTable.size());
// Write flood control structures.
assert(ISALIGNED_CL(ptr));
fdr->floodOffset = verify_u32(ptr - fdr_base);
memcpy(ptr, floodTable.get(), floodTable.size());
ptr += floodTable.size(); // last write, no need to round up
return fdr; return fdr;
} }

View File

@ -367,7 +367,7 @@ setupFullConfs(const vector<hwlmLiteral> &lits,
u32 totalConfSwitchSize = nBuckets * sizeof(u32); u32 totalConfSwitchSize = nBuckets * sizeof(u32);
u32 totalSize = ROUNDUP_16(totalConfSwitchSize + totalConfirmSize); u32 totalSize = ROUNDUP_16(totalConfSwitchSize + totalConfirmSize);
auto buf = make_zeroed_bytecode_ptr<u8>(totalSize, 16); auto buf = make_zeroed_bytecode_ptr<u8>(totalSize, 64);
assert(buf); // otherwise would have thrown std::bad_alloc assert(buf); // otherwise would have thrown std::bad_alloc
u32 *confBase = (u32 *)buf.get(); u32 *confBase = (u32 *)buf.get();