From 7bcb41914907e17bfd5af934a031f783bb76b969 Mon Sep 17 00:00:00 2001 From: bruvzg <7645683+bruvzg@users.noreply.github.com> Date: Thu, 1 Feb 2024 10:15:08 +0200 Subject: [PATCH] [Core] Improve `CowData` and `Memory` metadata alignment. --- core/os/memory.cpp | 24 +++++------ core/os/memory.h | 35 ++++++++++----- core/templates/cowdata.h | 92 +++++++++++++++++++++++++++------------- 3 files changed, 99 insertions(+), 52 deletions(-) diff --git a/core/os/memory.cpp b/core/os/memory.cpp index 5f6216a5f1c..32c316e58e8 100644 --- a/core/os/memory.cpp +++ b/core/os/memory.cpp @@ -72,23 +72,23 @@ void *Memory::alloc_static(size_t p_bytes, bool p_pad_align) { bool prepad = p_pad_align; #endif - void *mem = malloc(p_bytes + (prepad ? PAD_ALIGN : 0)); + void *mem = malloc(p_bytes + (prepad ? DATA_OFFSET : 0)); ERR_FAIL_NULL_V(mem, nullptr); alloc_count.increment(); if (prepad) { - uint64_t *s = (uint64_t *)mem; - *s = p_bytes; - uint8_t *s8 = (uint8_t *)mem; + uint64_t *s = (uint64_t *)(s8 + SIZE_OFFSET); + *s = p_bytes; + #ifdef DEBUG_ENABLED uint64_t new_mem_usage = mem_usage.add(p_bytes); max_usage.exchange_if_greater(new_mem_usage); #endif - return s8 + PAD_ALIGN; + return s8 + DATA_OFFSET; } else { return mem; } @@ -108,8 +108,8 @@ void *Memory::realloc_static(void *p_memory, size_t p_bytes, bool p_pad_align) { #endif if (prepad) { - mem -= PAD_ALIGN; - uint64_t *s = (uint64_t *)mem; + mem -= DATA_OFFSET; + uint64_t *s = (uint64_t *)(mem + SIZE_OFFSET); #ifdef DEBUG_ENABLED if (p_bytes > *s) { @@ -126,14 +126,14 @@ void *Memory::realloc_static(void *p_memory, size_t p_bytes, bool p_pad_align) { } else { *s = p_bytes; - mem = (uint8_t *)realloc(mem, p_bytes + PAD_ALIGN); + mem = (uint8_t *)realloc(mem, p_bytes + DATA_OFFSET); ERR_FAIL_NULL_V(mem, nullptr); - s = (uint64_t *)mem; + s = (uint64_t *)(mem + SIZE_OFFSET); *s = p_bytes; - return mem + PAD_ALIGN; + return mem + DATA_OFFSET; } } else { mem = (uint8_t *)realloc(mem, p_bytes); @@ -158,10 +158,10 @@ void Memory::free_static(void *p_ptr, bool p_pad_align) { alloc_count.decrement(); if (prepad) { - mem -= PAD_ALIGN; + mem -= DATA_OFFSET; #ifdef DEBUG_ENABLED - uint64_t *s = (uint64_t *)mem; + uint64_t *s = (uint64_t *)(mem + SIZE_OFFSET); mem_usage.sub(*s); #endif diff --git a/core/os/memory.h b/core/os/memory.h index a43a15e58f8..6f3f6fed399 100644 --- a/core/os/memory.h +++ b/core/os/memory.h @@ -38,10 +38,6 @@ #include #include -#ifndef PAD_ALIGN -#define PAD_ALIGN 16 //must always be greater than this at much -#endif - class Memory { #ifdef DEBUG_ENABLED static SafeNumeric mem_usage; @@ -51,6 +47,17 @@ class Memory { static SafeNumeric alloc_count; public: + // Alignment: ↓ max_align_t ↓ uint64_t ↓ max_align_t + // ┌─────────────────┬──┬────────────────┬──┬───────────... + // │ uint64_t │░░│ uint64_t │░░│ T[] + // │ alloc size │░░│ element count │░░│ data + // └─────────────────┴──┴────────────────┴──┴───────────... + // Offset: ↑ SIZE_OFFSET ↑ ELEMENT_OFFSET ↑ DATA_OFFSET + + static constexpr size_t SIZE_OFFSET = 0; + static constexpr size_t ELEMENT_OFFSET = ((SIZE_OFFSET + sizeof(uint64_t)) % alignof(uint64_t) == 0) ? (SIZE_OFFSET + sizeof(uint64_t)) : ((SIZE_OFFSET + sizeof(uint64_t)) + alignof(uint64_t) - ((SIZE_OFFSET + sizeof(uint64_t)) % alignof(uint64_t))); + static constexpr size_t DATA_OFFSET = ((ELEMENT_OFFSET + sizeof(uint64_t)) % alignof(max_align_t) == 0) ? (ELEMENT_OFFSET + sizeof(uint64_t)) : ((ELEMENT_OFFSET + sizeof(uint64_t)) + alignof(max_align_t) - ((ELEMENT_OFFSET + sizeof(uint64_t)) % alignof(max_align_t))); + static void *alloc_static(size_t p_bytes, bool p_pad_align = false); static void *realloc_static(void *p_memory, size_t p_bytes, bool p_pad_align = false); static void free_static(void *p_ptr, bool p_pad_align = false); @@ -133,6 +140,10 @@ void memdelete_allocator(T *p_class) { #define memnew_arr(m_class, m_count) memnew_arr_template(m_count) +_FORCE_INLINE_ uint64_t *_get_element_count_ptr(uint8_t *p_ptr) { + return (uint64_t *)(p_ptr - Memory::DATA_OFFSET + Memory::ELEMENT_OFFSET); +} + template T *memnew_arr_template(size_t p_elements) { if (p_elements == 0) { @@ -142,10 +153,12 @@ T *memnew_arr_template(size_t p_elements) { same strategy used by std::vector, and the Vector class, so it should be safe.*/ size_t len = sizeof(T) * p_elements; - uint64_t *mem = (uint64_t *)Memory::alloc_static(len, true); + uint8_t *mem = (uint8_t *)Memory::alloc_static(len, true); T *failptr = nullptr; //get rid of a warning ERR_FAIL_NULL_V(mem, failptr); - *(mem - 1) = p_elements; + + uint64_t *_elem_count_ptr = _get_element_count_ptr(mem); + *(_elem_count_ptr) = p_elements; if constexpr (!std::is_trivially_constructible_v) { T *elems = (T *)mem; @@ -166,16 +179,18 @@ T *memnew_arr_template(size_t p_elements) { template size_t memarr_len(const T *p_class) { - uint64_t *ptr = (uint64_t *)p_class; - return *(ptr - 1); + uint8_t *ptr = (uint8_t *)p_class; + uint64_t *_elem_count_ptr = _get_element_count_ptr(ptr); + return *(_elem_count_ptr); } template void memdelete_arr(T *p_class) { - uint64_t *ptr = (uint64_t *)p_class; + uint8_t *ptr = (uint8_t *)p_class; if constexpr (!std::is_trivially_destructible_v) { - uint64_t elem_count = *(ptr - 1); + uint64_t *_elem_count_ptr = _get_element_count_ptr(ptr); + uint64_t elem_count = *(_elem_count_ptr); for (uint64_t i = 0; i < elem_count; i++) { p_class[i].~T(); diff --git a/core/templates/cowdata.h b/core/templates/cowdata.h index a0632b26455..466658951ec 100644 --- a/core/templates/cowdata.h +++ b/core/templates/cowdata.h @@ -46,7 +46,7 @@ class CharString; template class VMap; -SAFE_NUMERIC_TYPE_PUN_GUARANTEES(uint64_t) +static_assert(std::is_trivially_destructible_v>); // Silence a false positive warning (see GH-52119). #if defined(__GNUC__) && !defined(__clang__) @@ -89,18 +89,39 @@ private: return ++x; } - static constexpr USize ALLOC_PAD = sizeof(USize) * 2; // For size and atomic refcount. + // Alignment: ↓ max_align_t ↓ USize ↓ max_align_t + // ┌────────────────────┬──┬─────────────┬──┬───────────... + // │ SafeNumeric │░░│ USize │░░│ T[] + // │ ref. count │░░│ data size │░░│ data + // └────────────────────┴──┴─────────────┴──┴───────────... + // Offset: ↑ REF_COUNT_OFFSET ↑ SIZE_OFFSET ↑ DATA_OFFSET + + static constexpr size_t REF_COUNT_OFFSET = 0; + static constexpr size_t SIZE_OFFSET = ((REF_COUNT_OFFSET + sizeof(SafeNumeric)) % alignof(USize) == 0) ? (REF_COUNT_OFFSET + sizeof(SafeNumeric)) : ((REF_COUNT_OFFSET + sizeof(SafeNumeric)) + alignof(USize) - ((REF_COUNT_OFFSET + sizeof(SafeNumeric)) % alignof(USize))); + static constexpr size_t DATA_OFFSET = ((SIZE_OFFSET + sizeof(USize)) % alignof(max_align_t) == 0) ? (SIZE_OFFSET + sizeof(USize)) : ((SIZE_OFFSET + sizeof(USize)) + alignof(max_align_t) - ((SIZE_OFFSET + sizeof(USize)) % alignof(max_align_t))); mutable T *_ptr = nullptr; // internal helpers + static _FORCE_INLINE_ SafeNumeric *_get_refcount_ptr(uint8_t *p_ptr) { + return (SafeNumeric *)(p_ptr + REF_COUNT_OFFSET); + } + + static _FORCE_INLINE_ USize *_get_size_ptr(uint8_t *p_ptr) { + return (USize *)(p_ptr + SIZE_OFFSET); + } + + static _FORCE_INLINE_ T *_get_data_ptr(uint8_t *p_ptr) { + return (T *)(p_ptr + DATA_OFFSET); + } + _FORCE_INLINE_ SafeNumeric *_get_refcount() const { if (!_ptr) { return nullptr; } - return reinterpret_cast *>(_ptr) - 2; + return (SafeNumeric *)((uint8_t *)_ptr - DATA_OFFSET + REF_COUNT_OFFSET); } _FORCE_INLINE_ USize *_get_size() const { @@ -108,7 +129,7 @@ private: return nullptr; } - return reinterpret_cast(_ptr) - 1; + return (USize *)((uint8_t *)_ptr - DATA_OFFSET + SIZE_OFFSET); } _FORCE_INLINE_ USize _get_alloc_size(USize p_elements) const { @@ -244,7 +265,7 @@ void CowData::_unref(void *p_data) { } // free mem - Memory::free_static(((uint8_t *)p_data) - ALLOC_PAD, false); + Memory::free_static(((uint8_t *)p_data) - DATA_OFFSET, false); } template @@ -260,26 +281,27 @@ typename CowData::USize CowData::_copy_on_write() { /* in use by more than me */ USize current_size = *_get_size(); - USize *mem_new = (USize *)Memory::alloc_static(_get_alloc_size(current_size) + ALLOC_PAD, false); - mem_new += 2; + uint8_t *mem_new = (uint8_t *)Memory::alloc_static(_get_alloc_size(current_size) + DATA_OFFSET, false); + ERR_FAIL_NULL_V(mem_new, 0); - new (mem_new - 2) SafeNumeric(1); //refcount - *(mem_new - 1) = current_size; //size + SafeNumeric *_refc_ptr = _get_refcount_ptr(mem_new); + USize *_size_ptr = _get_size_ptr(mem_new); + T *_data_ptr = _get_data_ptr(mem_new); - T *_data = (T *)(mem_new); + new (_refc_ptr) SafeNumeric(1); //refcount + *(_size_ptr) = current_size; //size // initialize new elements if constexpr (std::is_trivially_copyable_v) { - memcpy(mem_new, _ptr, current_size * sizeof(T)); - + memcpy((uint8_t *)_data_ptr, _ptr, current_size * sizeof(T)); } else { for (USize i = 0; i < current_size; i++) { - memnew_placement(&_data[i], T(_ptr[i])); + memnew_placement(&_data_ptr[i], T(_ptr[i])); } } _unref(_ptr); - _ptr = _data; + _ptr = _data_ptr; rc = 1; } @@ -315,21 +337,28 @@ Error CowData::resize(Size p_size) { if (alloc_size != current_alloc_size) { if (current_size == 0) { // alloc from scratch - USize *ptr = (USize *)Memory::alloc_static(alloc_size + ALLOC_PAD, false); - ptr += 2; - ERR_FAIL_NULL_V(ptr, ERR_OUT_OF_MEMORY); - *(ptr - 1) = 0; //size, currently none - new (ptr - 2) SafeNumeric(1); //refcount + uint8_t *mem_new = (uint8_t *)Memory::alloc_static(alloc_size + DATA_OFFSET, false); + ERR_FAIL_NULL_V(mem_new, ERR_OUT_OF_MEMORY); - _ptr = (T *)ptr; + SafeNumeric *_refc_ptr = _get_refcount_ptr(mem_new); + USize *_size_ptr = _get_size_ptr(mem_new); + T *_data_ptr = _get_data_ptr(mem_new); + + new (_refc_ptr) SafeNumeric(1); //refcount + *(_size_ptr) = 0; //size, currently none + + _ptr = _data_ptr; } else { - USize *_ptrnew = (USize *)Memory::realloc_static(((uint8_t *)_ptr) - ALLOC_PAD, alloc_size + ALLOC_PAD, false); - ERR_FAIL_NULL_V(_ptrnew, ERR_OUT_OF_MEMORY); - _ptrnew += 2; - new (_ptrnew - 2) SafeNumeric(rc); //refcount + uint8_t *mem_new = (uint8_t *)Memory::realloc_static(((uint8_t *)_ptr) - DATA_OFFSET, alloc_size + DATA_OFFSET, false); + ERR_FAIL_NULL_V(mem_new, ERR_OUT_OF_MEMORY); - _ptr = (T *)(_ptrnew); + SafeNumeric *_refc_ptr = _get_refcount_ptr(mem_new); + T *_data_ptr = _get_data_ptr(mem_new); + + new (_refc_ptr) SafeNumeric(rc); //refcount + + _ptr = _data_ptr; } } @@ -355,12 +384,15 @@ Error CowData::resize(Size p_size) { } if (alloc_size != current_alloc_size) { - USize *_ptrnew = (USize *)Memory::realloc_static(((uint8_t *)_ptr) - ALLOC_PAD, alloc_size + ALLOC_PAD, false); - ERR_FAIL_NULL_V(_ptrnew, ERR_OUT_OF_MEMORY); - _ptrnew += 2; - new (_ptrnew - 2) SafeNumeric(rc); //refcount + uint8_t *mem_new = (uint8_t *)Memory::realloc_static(((uint8_t *)_ptr) - DATA_OFFSET, alloc_size + DATA_OFFSET, false); + ERR_FAIL_NULL_V(mem_new, ERR_OUT_OF_MEMORY); - _ptr = (T *)(_ptrnew); + SafeNumeric *_refc_ptr = _get_refcount_ptr(mem_new); + T *_data_ptr = _get_data_ptr(mem_new); + + new (_refc_ptr) SafeNumeric(rc); //refcount + + _ptr = _data_ptr; } *_get_size() = p_size;