From e0451bb541bcfe6405a6663ae7964ccfc44bfb4f Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 30 Aug 2021 13:38:38 +0200 Subject: [PATCH] vkd3d: Handle fallbacks properly in suballocator. With BAR budgets, what will happen is that - Small allocation is requested - A new chunk is requested - try_suballocate_memory will end up calling allocate_memory, which allocates a fallback memory type - Subsequent small allocators will always end up allocating a new fallback memory block, never reusing existing blocks. - System memory is rapidly exhausted once apps start hitting against budget. The fix is to add flags which explicitly do not attempt to fallback allocate. This makes it possible to handle fallbacks at the appropriate level in try_suballocate_memory instead. Signed-off-by: Hans-Kristian Arntzen --- libs/vkd3d/memory.c | 26 +++++++++++++++++++++----- libs/vkd3d/vkd3d_private.h | 2 ++ 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/libs/vkd3d/memory.c b/libs/vkd3d/memory.c index a09919d5..0c4c1bbb 100644 --- a/libs/vkd3d/memory.c +++ b/libs/vkd3d/memory.c @@ -385,6 +385,12 @@ static HRESULT vkd3d_memory_allocation_init(struct vkd3d_memory_allocation *allo if (FAILED(hr = vkd3d_select_memory_flags(device, &info->heap_properties, &type_flags))) return hr; + /* Mask out optional memory properties as needed. + * This is relevant for chunk allocator fallbacks + * since the info->memory_requirements already encodes + * only HOST_VISIBLE types and we use NO_FALLBACK allocation mode. */ + type_flags &= ~info->optional_memory_properties; + if (allocation->flags & VKD3D_ALLOCATION_FLAG_GLOBAL_BUFFER) { /* If requested, create a buffer covering the entire allocation @@ -441,6 +447,11 @@ static HRESULT vkd3d_memory_allocation_init(struct vkd3d_memory_allocation *allo hr = vkd3d_import_host_memory(device, host_ptr, memory_requirements.size, type_flags, type_mask, &flags_info, &allocation->device_allocation); } + else if (info->flags & VKD3D_ALLOCATION_NO_FALLBACK) + { + hr = vkd3d_try_allocate_device_memory(device, memory_requirements.size, type_flags, + type_mask, &flags_info, &allocation->device_allocation); + } else { hr = vkd3d_allocate_device_memory(device, memory_requirements.size, type_flags, @@ -1126,8 +1137,9 @@ static void vkd3d_memory_allocator_wait_allocation(struct vkd3d_memory_allocator vkd3d_memory_allocator_wait_clear_semaphore(allocator, device, wait_value, UINT64_MAX); } -static HRESULT vkd3d_memory_allocator_add_chunk(struct vkd3d_memory_allocator *allocator, struct d3d12_device *device, - const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, uint32_t type_mask, struct vkd3d_memory_chunk **chunk) +static HRESULT vkd3d_memory_allocator_try_add_chunk(struct vkd3d_memory_allocator *allocator, struct d3d12_device *device, + const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, uint32_t type_mask, + VkMemoryPropertyFlags optional_properties, struct vkd3d_memory_chunk **chunk) { struct vkd3d_allocate_memory_info alloc_info; struct vkd3d_memory_chunk *object; @@ -1139,6 +1151,8 @@ static HRESULT vkd3d_memory_allocator_add_chunk(struct vkd3d_memory_allocator *a alloc_info.memory_requirements.memoryTypeBits = type_mask; alloc_info.heap_properties = *heap_properties; alloc_info.heap_flags = heap_flags; + alloc_info.flags = VKD3D_ALLOCATION_NO_FALLBACK; + alloc_info.optional_memory_properties = optional_properties; if (!(heap_flags & D3D12_HEAP_FLAG_DENY_BUFFERS)) alloc_info.flags |= VKD3D_ALLOCATION_FLAG_GLOBAL_BUFFER; @@ -1159,6 +1173,7 @@ static HRESULT vkd3d_memory_allocator_add_chunk(struct vkd3d_memory_allocator *a static HRESULT vkd3d_memory_allocator_try_suballocate_memory(struct vkd3d_memory_allocator *allocator, struct d3d12_device *device, const VkMemoryRequirements *memory_requirements, uint32_t type_mask, + VkMemoryPropertyFlags optional_properties, const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, struct vkd3d_memory_allocation *allocation) { @@ -1190,8 +1205,8 @@ static HRESULT vkd3d_memory_allocator_try_suballocate_memory(struct vkd3d_memory /* Try allocating a new chunk on one of the supported memory type * before the caller falls back to potentially slower memory */ - if (FAILED(hr = vkd3d_memory_allocator_add_chunk(allocator, device, heap_properties, - heap_flags & heap_flag_mask, memory_requirements->memoryTypeBits, &chunk))) + if (FAILED(hr = vkd3d_memory_allocator_try_add_chunk(allocator, device, heap_properties, + heap_flags & heap_flag_mask, type_mask, optional_properties, &chunk))) return hr; return vkd3d_memory_chunk_allocate_range(chunk, memory_requirements, allocation); @@ -1235,13 +1250,14 @@ static HRESULT vkd3d_suballocate_memory(struct d3d12_device *device, struct vkd3 pthread_mutex_lock(&allocator->mutex); hr = vkd3d_memory_allocator_try_suballocate_memory(allocator, device, - &memory_requirements, optional_mask, &info->heap_properties, + &memory_requirements, optional_mask, 0, &info->heap_properties, info->heap_flags, allocation); if (FAILED(hr) && (required_mask & ~optional_mask)) { hr = vkd3d_memory_allocator_try_suballocate_memory(allocator, device, &memory_requirements, required_mask & ~optional_mask, + optional_flags, &info->heap_properties, info->heap_flags, allocation); } diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index a7f5ddc1..9c882ad0 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -585,6 +585,7 @@ enum vkd3d_allocation_flag VKD3D_ALLOCATION_FLAG_GPU_ADDRESS = (1u << 1), VKD3D_ALLOCATION_FLAG_CPU_ACCESS = (1u << 2), VKD3D_ALLOCATION_FLAG_ALLOW_WRITE_WATCH = (1u << 3), + VKD3D_ALLOCATION_NO_FALLBACK = (1u << 4), }; #define VKD3D_MEMORY_CHUNK_SIZE (VKD3D_VA_BLOCK_SIZE * 16) @@ -599,6 +600,7 @@ struct vkd3d_allocate_memory_info void *host_ptr; const void *pNext; uint32_t flags; + VkMemoryPropertyFlags optional_memory_properties; }; struct vkd3d_allocate_heap_memory_info