From 8f6e94dc30ad8f13ff303b824f2a7156645b1d5f Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Wed, 27 Jan 2021 20:41:46 +0100 Subject: [PATCH] vkd3d: Suballocate small allocations from larger chunks. This is necessary to keep the amount of allocated memory manageable in games that allocate a lot of small heaps or committed resources. Signed-off-by: Philip Rebohle --- libs/vkd3d/memory.c | 352 ++++++++++++++++++++++++++++++++++++- libs/vkd3d/vkd3d_private.h | 14 ++ 2 files changed, 361 insertions(+), 5 deletions(-) diff --git a/libs/vkd3d/memory.c b/libs/vkd3d/memory.c index d852cb9b..810b3eec 100644 --- a/libs/vkd3d/memory.c +++ b/libs/vkd3d/memory.c @@ -55,6 +55,20 @@ static uint32_t vkd3d_select_memory_types(struct d3d12_device *device, const D3D return type_mask; } +static uint32_t vkd3d_find_memory_types_with_flags(struct d3d12_device *device, VkMemoryPropertyFlags type_flags) +{ + const VkPhysicalDeviceMemoryProperties *memory_info = &device->memory_properties; + uint32_t i, mask = 0; + + for (i = 0; i < memory_info->memoryTypeCount; i++) + { + if ((memory_info->memoryTypes[i].propertyFlags & type_flags) == type_flags) + mask |= 1u << i; + } + + return mask; +} + static HRESULT vkd3d_select_memory_flags(struct d3d12_device *device, const D3D12_HEAP_PROPERTIES *heap_properties, VkMemoryPropertyFlags *type_flags) { switch (heap_properties->Type) @@ -380,6 +394,202 @@ static HRESULT vkd3d_memory_allocation_init(struct vkd3d_memory_allocation *allo return S_OK; } +static void vkd3d_memory_chunk_insert_range(struct vkd3d_memory_chunk *chunk, + size_t index, VkDeviceSize offset, VkDeviceSize length) +{ + if (!vkd3d_array_reserve((void**)&chunk->free_ranges, &chunk->free_ranges_size, + chunk->free_ranges_count + 1, sizeof(*chunk->free_ranges))) + { + ERR("Failed to insert free range.\n"); + return; + } + + memmove(&chunk->free_ranges[index + 1], &chunk->free_ranges[index], + sizeof(*chunk->free_ranges) * (chunk->free_ranges_count - index)); + + chunk->free_ranges[index].offset = offset; + chunk->free_ranges[index].length = length; + chunk->free_ranges_count++; +} + +static void vkd3d_memory_chunk_remove_range(struct vkd3d_memory_chunk *chunk, size_t index) +{ + chunk->free_ranges_count--; + + memmove(&chunk->free_ranges[index], &chunk->free_ranges[index + 1], + sizeof(*chunk->free_ranges) * (chunk->free_ranges_count - index)); +} + +static HRESULT vkd3d_memory_chunk_allocate_range(struct vkd3d_memory_chunk *chunk, const VkMemoryRequirements *memory_requirements, + struct vkd3d_memory_allocation *allocation) +{ + struct vkd3d_memory_free_range *pick_range; + VkDeviceSize l_length, r_length; + size_t i, pick_index; + + if (!chunk->free_ranges_count) + return E_OUTOFMEMORY; + + pick_index = chunk->free_ranges_count; + pick_range = NULL; + + for (i = 0; i < chunk->free_ranges_count; i++) + { + struct vkd3d_memory_free_range *range = &chunk->free_ranges[i]; + + if (range->offset + range->length - align(range->offset, memory_requirements->alignment) < memory_requirements->size) + continue; + + /* Exact fit leaving no gaps */ + if (range->length == memory_requirements->size) + { + pick_index = i; + pick_range = range; + break; + } + + /* Alignment is almost always going to be 64 KiB, so + * don't worry too much about misalignment gaps here */ + if (!pick_range || range->length > pick_range->length) + { + pick_index = i; + pick_range = range; + } + } + + if (!pick_range) + return E_OUTOFMEMORY; + + /* Adjust offsets and addresses of the base allocation */ + vkd3d_memory_allocation_slice(allocation, &chunk->allocation, + align(pick_range->offset, memory_requirements->alignment), + memory_requirements->size); + + /* Remove allocated range from the free list */ + l_length = allocation->offset - pick_range->offset; + r_length = pick_range->offset + pick_range->length + - allocation->offset - allocation->resource.size; + + if (l_length) + { + pick_range->length = l_length; + + if (r_length) + { + vkd3d_memory_chunk_insert_range(chunk, pick_index + 1, + allocation->offset + allocation->resource.size, r_length); + } + } + else if (r_length) + { + pick_range->offset = allocation->offset + allocation->resource.size; + pick_range->length = r_length; + } + else + { + vkd3d_memory_chunk_remove_range(chunk, pick_index); + } + + return S_OK; +} + +static size_t vkd3d_memory_chunk_find_range(struct vkd3d_memory_chunk *chunk, VkDeviceSize offset) +{ + struct vkd3d_memory_free_range *range; + size_t index, hi, lo; + + lo = 0; + hi = chunk->free_ranges_count; + + while (lo < hi) + { + index = lo + (hi - lo) / 2; + range = &chunk->free_ranges[index]; + + if (range->offset > offset) + hi = index; + else + lo = index + 1; + } + + return lo; +} + +static void vkd3d_memory_chunk_free_range(struct vkd3d_memory_chunk *chunk, const struct vkd3d_memory_allocation *allocation) +{ + struct vkd3d_memory_free_range *range; + bool adjacent_l, adjacent_r; + size_t index; + + index = vkd3d_memory_chunk_find_range(chunk, allocation->offset); + + adjacent_l = false; + adjacent_r = false; + + if (index > 0) + { + range = &chunk->free_ranges[index - 1]; + adjacent_l = range->offset + range->length == allocation->offset; + } + + if (index < chunk->free_ranges_count) + { + range = &chunk->free_ranges[index]; + adjacent_r = range->offset == allocation->offset + allocation->resource.size; + } + + if (adjacent_l) + { + range = &chunk->free_ranges[index - 1]; + range->length += allocation->resource.size; + + if (adjacent_r) + { + range->length += chunk->free_ranges[index].length; + vkd3d_memory_chunk_remove_range(chunk, index); + } + } + else if (adjacent_r) + { + range = &chunk->free_ranges[index]; + range->offset = allocation->offset; + range->length += allocation->resource.size; + } + else + { + vkd3d_memory_chunk_insert_range(chunk, index, + allocation->offset, allocation->resource.size); + } +} + +static bool vkd3d_memory_chunk_is_free(struct vkd3d_memory_chunk *chunk) +{ + return chunk->free_ranges_count == 1 && chunk->free_ranges[0].length == chunk->allocation.resource.size; +} + +static HRESULT vkd3d_memory_chunk_create(struct d3d12_device *device, struct vkd3d_memory_allocator *allocator, + const struct vkd3d_allocate_memory_info *info, struct vkd3d_memory_chunk **chunk) +{ + struct vkd3d_memory_chunk *object; + HRESULT hr; + + if (!(object = vkd3d_malloc(sizeof(*object)))) + return E_OUTOFMEMORY; + + memset(object, 0, sizeof(*object)); + + if (FAILED(hr = vkd3d_memory_allocation_init(&object->allocation, device, allocator, info))) + { + vkd3d_free(object); + return hr; + } + + object->allocation.chunk = object; + vkd3d_memory_chunk_insert_range(object, 0, 0, object->allocation.resource.size); + *chunk = object; + return S_OK; +} + static void vkd3d_memory_chunk_destroy(struct vkd3d_memory_chunk *chunk, struct d3d12_device *device, struct vkd3d_memory_allocator *allocator) { vkd3d_memory_allocation_free(&chunk->allocation, device, allocator); @@ -387,6 +597,22 @@ static void vkd3d_memory_chunk_destroy(struct vkd3d_memory_chunk *chunk, struct vkd3d_free(chunk); } +static void vkd3d_memory_allocator_remove_chunk(struct vkd3d_memory_allocator *allocator, struct d3d12_device *device, struct vkd3d_memory_chunk *chunk) +{ + size_t i; + + for (i = 0; i < allocator->chunks_count; i++) + { + if (allocator->chunks[i] == chunk) + { + allocator->chunks[i] = allocator->chunks[--allocator->chunks_count]; + break; + } + } + + vkd3d_memory_chunk_destroy(chunk, device, allocator); +} + HRESULT vkd3d_memory_allocator_init(struct vkd3d_memory_allocator *allocator, struct d3d12_device *device) { int rc; @@ -412,18 +638,132 @@ void vkd3d_memory_allocator_cleanup(struct vkd3d_memory_allocator *allocator, st pthread_mutex_destroy(&allocator->mutex); } +static HRESULT vkd3d_memory_allocator_add_chunk(struct vkd3d_memory_allocator *allocator, struct d3d12_device *device, + const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, uint32_t type_mask, struct vkd3d_memory_chunk **chunk) +{ + struct vkd3d_allocate_memory_info alloc_info; + struct vkd3d_memory_chunk *object; + HRESULT hr; + + memset(&alloc_info, 0, sizeof(alloc_info)); + alloc_info.memory_requirements.size = VKD3D_MEMORY_CHUNK_SIZE; + alloc_info.memory_requirements.alignment = 0; + alloc_info.memory_requirements.memoryTypeBits = type_mask; + alloc_info.heap_properties = *heap_properties; + alloc_info.heap_flags = heap_flags; + + if (!(heap_flags & D3D12_HEAP_FLAG_DENY_BUFFERS)) + alloc_info.flags |= VKD3D_ALLOCATION_FLAG_GLOBAL_BUFFER; + + if (!vkd3d_array_reserve((void**)&allocator->chunks, &allocator->chunks_size, + allocator->chunks_count + 1, sizeof(*allocator->chunks))) + { + ERR("Failed to allocate space for new chunk.\n"); + return E_OUTOFMEMORY; + } + + if (FAILED(hr = vkd3d_memory_chunk_create(device, allocator, &alloc_info, &object))) + return hr; + + allocator->chunks[allocator->chunks_count++] = *chunk = object; + return S_OK; +} + +static HRESULT vkd3d_memory_allocator_try_suballocate_memory(struct vkd3d_memory_allocator *allocator, + struct d3d12_device *device, const VkMemoryRequirements *memory_requirements, uint32_t type_mask, + const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, + struct vkd3d_memory_allocation *allocation) +{ + struct vkd3d_memory_chunk *chunk; + HRESULT hr; + size_t i; + + type_mask &= device->memory_info.global_mask; + type_mask &= memory_requirements->memoryTypeBits; + + for (i = 0; i < allocator->chunks_count; i++) + { + chunk = allocator->chunks[i]; + + /* Match flags since otherwise the backing buffer + * may not support our required usage flags */ + if (chunk->allocation.heap_type != heap_properties->Type || + chunk->allocation.heap_flags != heap_flags) + continue; + + /* Filter out unsupported memory types */ + if (!(type_mask & (1u << chunk->allocation.vk_memory_type))) + continue; + + if (SUCCEEDED(hr = vkd3d_memory_chunk_allocate_range(chunk, memory_requirements, allocation))) + return hr; + } + + /* Try allocating a new chunk on one of the supported memory type + * before the caller falls back to potentially slower memory */ + if (FAILED(hr = vkd3d_memory_allocator_add_chunk(allocator, device, heap_properties, + heap_flags, memory_requirements->memoryTypeBits, &chunk))) + return hr; + + return vkd3d_memory_chunk_allocate_range(chunk, memory_requirements, allocation); +} + void vkd3d_free_memory_2(struct d3d12_device *device, struct vkd3d_memory_allocator *allocator, const struct vkd3d_memory_allocation *allocation) { - /* TODO resolve suballocations */ - vkd3d_memory_allocation_free(allocation, device, allocator); + if (allocation->chunk) + { + pthread_mutex_lock(&allocator->mutex); + vkd3d_memory_chunk_free_range(allocation->chunk, allocation); + + if (vkd3d_memory_chunk_is_free(allocation->chunk)) + vkd3d_memory_allocator_remove_chunk(allocator, device, allocation->chunk); + pthread_mutex_unlock(&allocator->mutex); + } + else + vkd3d_memory_allocation_free(allocation, device, allocator); +} + +static HRESULT vkd3d_suballocate_memory(struct d3d12_device *device, struct vkd3d_memory_allocator *allocator, + const struct vkd3d_allocate_memory_info *info, struct vkd3d_memory_allocation *allocation) +{ + const VkMemoryPropertyFlags optional_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + VkMemoryRequirements memory_requirements = info->memory_requirements; + uint32_t required_mask, optional_mask; + VkMemoryPropertyFlags type_flags; + HRESULT hr; + + if (FAILED(hr = vkd3d_select_memory_flags(device, &info->heap_properties, &type_flags))) + return hr; + + /* Prefer device-local memory if allowed for this allocation */ + required_mask = vkd3d_find_memory_types_with_flags(device, type_flags & ~optional_flags); + optional_mask = vkd3d_find_memory_types_with_flags(device, type_flags); + + pthread_mutex_lock(&allocator->mutex); + + hr = vkd3d_memory_allocator_try_suballocate_memory(allocator, device, + &memory_requirements, optional_mask, &info->heap_properties, + info->heap_flags, allocation); + + if (FAILED(hr) && (required_mask & ~optional_mask)) + { + hr = vkd3d_memory_allocator_try_suballocate_memory(allocator, device, + &memory_requirements, required_mask & ~optional_mask, + &info->heap_properties, info->heap_flags, allocation); + } + + pthread_mutex_unlock(&allocator->mutex); + return hr; } static HRESULT vkd3d_allocate_memory_2(struct d3d12_device *device, struct vkd3d_memory_allocator *allocator, const struct vkd3d_allocate_memory_info *info, struct vkd3d_memory_allocation *allocation) { - /* TODO suballocate */ - return vkd3d_memory_allocation_init(allocation, device, allocator, info); + if (!info->pNext && !info->host_ptr && info->memory_requirements.size < VKD3D_VA_BLOCK_SIZE) + return vkd3d_suballocate_memory(device, allocator, info, allocation); + else + return vkd3d_memory_allocation_init(allocation, device, allocator, info); } HRESULT vkd3d_allocate_heap_memory_2(struct d3d12_device *device, struct vkd3d_memory_allocator *allocator, @@ -438,7 +778,9 @@ HRESULT vkd3d_allocate_heap_memory_2(struct d3d12_device *device, struct vkd3d_m alloc_info.heap_properties = info->heap_desc.Properties; alloc_info.heap_flags = info->heap_desc.Flags; alloc_info.host_ptr = info->host_ptr; - alloc_info.flags = VKD3D_ALLOCATION_FLAG_GLOBAL_BUFFER; + + if (!(info->heap_desc.Flags & D3D12_HEAP_FLAG_DENY_BUFFERS)) + alloc_info.flags |= VKD3D_ALLOCATION_FLAG_GLOBAL_BUFFER; return vkd3d_allocate_memory_2(device, allocator, &alloc_info, allocation); } diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index e0f8adb2..45906dd9 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -484,6 +484,8 @@ enum vkd3d_allocation_flag VKD3D_ALLOCATION_FLAG_CPU_ACCESS = (1u << 3), }; +#define VKD3D_MEMORY_CHUNK_SIZE (VKD3D_VA_BLOCK_SIZE * 16) + struct vkd3d_memory_chunk; struct vkd3d_allocate_memory_info @@ -539,6 +541,18 @@ struct vkd3d_memory_allocation struct vkd3d_memory_chunk *chunk; }; +static inline void vkd3d_memory_allocation_slice(struct vkd3d_memory_allocation *dst, + const struct vkd3d_memory_allocation *src, VkDeviceSize offset, VkDeviceSize size) +{ + *dst = *src; + dst->offset += offset; + dst->resource.size = size; + dst->resource.va += offset; + + if (dst->cpu_address) + dst->cpu_address = void_ptr_offset(dst->cpu_address, offset); +} + struct vkd3d_memory_free_range { VkDeviceSize offset;