diff --git a/src/nfa/goughcompile.cpp b/src/nfa/goughcompile.cpp index d2de7b95..d735c80a 100644 --- a/src/nfa/goughcompile.cpp +++ b/src/nfa/goughcompile.cpp @@ -1136,16 +1136,11 @@ aligned_unique_ptr goughCompile(raw_som_dfa &raw, u8 somPrecision, gough_dfa->length = gough_size; /* copy in blocks */ - memcpy((u8 *)gough_dfa.get() + edge_prog_offset, &edge_blocks[0], - byte_length(edge_blocks)); + copy_bytes((u8 *)gough_dfa.get() + edge_prog_offset, edge_blocks); if (top_prog_offset) { - memcpy((u8 *)gough_dfa.get() + top_prog_offset, &top_blocks[0], - byte_length(top_blocks)); - } - if (!temp_blocks.empty()) { - memcpy((u8 *)gough_dfa.get() + prog_base_offset, &temp_blocks[0], - byte_length(temp_blocks)); + copy_bytes((u8 *)gough_dfa.get() + top_prog_offset, top_blocks); } + copy_bytes((u8 *)gough_dfa.get() + prog_base_offset, temp_blocks); return gough_dfa; } diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index 5cf46334..a6c34cb6 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -1397,8 +1397,7 @@ struct Factory { repeat->horizon = rsi.horizon; repeat->packedCtrlSize = rsi.packedCtrlSize; repeat->stateSize = rsi.stateSize; - memcpy(repeat->packedFieldSizes, rsi.packedFieldSizes.data(), - byte_length(rsi.packedFieldSizes)); + copy_bytes(repeat->packedFieldSizes, rsi.packedFieldSizes); repeat->patchCount = rsi.patchCount; repeat->patchSize = rsi.patchSize; repeat->encodingSize = rsi.encodingSize; @@ -1413,8 +1412,7 @@ struct Factory { // Copy in the sparse lookup table. if (br.type == REPEAT_SPARSE_OPTIMAL_P) { assert(!rsi.table.empty()); - memcpy(info_ptr + tableOffset, rsi.table.data(), - byte_length(rsi.table)); + copy_bytes(info_ptr + tableOffset, rsi.table); } // Fill the tug mask. @@ -1702,6 +1700,7 @@ struct Factory { for (u32 i = 0; i < num_repeats; i++) { repeatOffsets[i] = offset; + assert(repeats[i].first); memcpy((char *)limex + offset, repeats[i].first.get(), repeats[i].second); offset += repeats[i].second; @@ -1709,8 +1708,7 @@ struct Factory { // Write repeat offset lookup table. assert(ISALIGNED_N((char *)limex + repeatOffsetsOffset, alignof(u32))); - memcpy((char *)limex + repeatOffsetsOffset, repeatOffsets.data(), - byte_length(repeatOffsets)); + copy_bytes((char *)limex + repeatOffsetsOffset, repeatOffsets); limex->repeatOffset = repeatOffsetsOffset; limex->repeatCount = num_repeats; @@ -1725,8 +1723,7 @@ struct Factory { limex->exReportOffset = exceptionReportsOffset; assert(ISALIGNED_N((char *)limex + exceptionReportsOffset, alignof(ReportID))); - memcpy((char *)limex + exceptionReportsOffset, reports.data(), - byte_length(reports)); + copy_bytes((char *)limex + exceptionReportsOffset, reports); } static diff --git a/src/nfagraph/ng_lbr.cpp b/src/nfagraph/ng_lbr.cpp index 11eded69..b9cacaa7 100644 --- a/src/nfagraph/ng_lbr.cpp +++ b/src/nfagraph/ng_lbr.cpp @@ -98,8 +98,7 @@ void fillNfa(NFA *nfa, lbr_common *c, ReportID report, const depth &repeatMin, info->packedCtrlSize = rsi.packedCtrlSize; info->horizon = rsi.horizon; info->minPeriod = minPeriod; - memcpy(&info->packedFieldSizes, rsi.packedFieldSizes.data(), - byte_length(rsi.packedFieldSizes)); + copy_bytes(&info->packedFieldSizes, rsi.packedFieldSizes); info->patchCount = rsi.patchCount; info->patchSize = rsi.patchSize; info->encodingSize = rsi.encodingSize; @@ -122,7 +121,7 @@ void fillNfa(NFA *nfa, lbr_common *c, ReportID report, const depth &repeatMin, nfa->length = verify_u32(len); info->length = verify_u32(sizeof(RepeatInfo) + sizeof(u64a) * (rsi.patchSize + 1)); - memcpy(table, rsi.table.data(), byte_length(rsi.table)); + copy_bytes(table, rsi.table); } } diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index bbc8644e..e17953aa 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -2687,12 +2687,6 @@ void fillInReportInfo(RoseEngine *engine, u32 reportOffset, sizeof(internal_report)); } -static -void populateInvDkeyTable(char *ptr, const ReportManager &rm) { - vector table = rm.getDkeyToReportTable(); - memcpy(ptr, table.data(), byte_length(table)); -} - static bool hasSimpleReports(const vector &reports) { auto it = find_if(reports.begin(), reports.end(), isComplexReport); @@ -4154,7 +4148,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->ekeyCount = rm.numEkeys(); engine->dkeyCount = rm.numDkeys(); engine->invDkeyOffset = dkeyOffset; - populateInvDkeyTable(ptr + dkeyOffset, rm); + copy_bytes(ptr + dkeyOffset, rm.getDkeyToReportTable()); engine->somHorizon = ssm.somPrecision(); engine->somLocationCount = ssm.numSomSlots(); @@ -4314,33 +4308,22 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { buildLitBenefits(*this, engine.get(), base_lits_benefits_offset); // Copy in other tables - memcpy(ptr + bc.engine_blob_base, bc.engine_blob.data(), - byte_length(bc.engine_blob)); - - memcpy(ptr + engine->literalOffset, literalTable.data(), - byte_length(literalTable)); - memcpy(ptr + engine->roleOffset, bc.roleTable.data(), - byte_length(bc.roleTable)); - copy(leftInfoTable.begin(), leftInfoTable.end(), - (LeftNfaInfo *)(ptr + engine->leftOffset)); + copy_bytes(ptr + bc.engine_blob_base, bc.engine_blob); + copy_bytes(ptr + engine->literalOffset, literalTable); + copy_bytes(ptr + engine->roleOffset, bc.roleTable); + copy_bytes(ptr + engine->leftOffset, leftInfoTable); fillLookaroundTables(ptr + lookaroundTableOffset, ptr + lookaroundReachOffset, bc.lookaround); fillInSomRevNfas(engine.get(), ssm, rev_nfa_table_offset, rev_nfa_offsets); - memcpy(ptr + engine->predOffset, predTable.data(), byte_length(predTable)); - memcpy(ptr + engine->rootRoleOffset, rootRoleTable.data(), - byte_length(rootRoleTable)); - memcpy(ptr + engine->anchoredReportMapOffset, art.data(), byte_length(art)); - memcpy(ptr + engine->anchoredReportInverseMapOffset, arit.data(), - byte_length(arit)); - memcpy(ptr + engine->multidirectOffset, mdr_reports.data(), - byte_length(mdr_reports)); - - copy(activeLeftIter.begin(), activeLeftIter.end(), - (mmbit_sparse_iter *)(ptr + engine->activeLeftIterOffset)); - - memcpy(ptr + engine->sideOffset, sideTable.data(), byte_length(sideTable)); + copy_bytes(ptr + engine->predOffset, predTable); + copy_bytes(ptr + engine->rootRoleOffset, rootRoleTable); + copy_bytes(ptr + engine->anchoredReportMapOffset, art); + copy_bytes(ptr + engine->anchoredReportInverseMapOffset, arit); + copy_bytes(ptr + engine->multidirectOffset, mdr_reports); + copy_bytes(ptr + engine->activeLeftIterOffset, activeLeftIter); + copy_bytes(ptr + engine->sideOffset, sideTable); DEBUG_PRINTF("rose done %p\n", engine.get()); return engine; diff --git a/src/util/container.h b/src/util/container.h index b4a10c89..62e841c1 100644 --- a/src/util/container.h +++ b/src/util/container.h @@ -33,8 +33,13 @@ #ifndef UTIL_CONTAINER_H #define UTIL_CONTAINER_H +#include "ue2common.h" + #include +#include +#include #include +#include #include namespace ue2 { @@ -92,11 +97,35 @@ std::set assoc_keys(const C &container) { return keys; } +/** + * \brief Return the length in bytes of the given vector of (POD) objects. + */ template typename std::vector::size_type byte_length(const std::vector &vec) { + static_assert(std::is_pod::value, "should be pod"); return vec.size() * sizeof(T); } +/** + * \brief Copy the given vector of POD objects to the given location in memory. + * It is safe to give this function an empty vector. + */ +template +void *copy_bytes(void *dest, const std::vector &vec) { + static_assert(std::is_pod::value, "should be pod"); + assert(dest); + + // Since we're generally using this function to write into the bytecode, + // dest should be appropriately aligned for T. + assert(ISALIGNED_N(dest, alignof(T))); + + if (vec.empty()) { + return dest; // Protect memcpy against null pointers. + } + assert(vec.data() != nullptr); + return std::memcpy(dest, vec.data(), byte_length(vec)); +} + template bool is_subset_of(const OrderedContainer1 &small, const OrderedContainer2 &big) { static_assert(std::is_same