From e0451bb541bcfe6405a6663ae7964ccfc44bfb4f Mon Sep 17 00:00:00 2001
From: Hans-Kristian Arntzen <post@arntzen-software.no>
Date: Mon, 30 Aug 2021 13:38:38 +0200
Subject: [PATCH] vkd3d: Handle fallbacks properly in suballocator.

With BAR budgets, what will happen is that
- Small allocation is requested
- A new chunk is requested
- try_suballocate_memory will end up calling allocate_memory, which
  allocates a fallback memory type
- Subsequent small allocators will always end up allocating a new
  fallback memory block, never reusing existing blocks.
- System memory is rapidly exhausted once apps start hitting against
  budget.

The fix is to add flags which explicitly do not attempt to fallback
allocate. This makes it possible to handle fallbacks at the appropriate
level in try_suballocate_memory instead.

Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
---
 libs/vkd3d/memory.c        | 26 +++++++++++++++++++++-----
 libs/vkd3d/vkd3d_private.h |  2 ++
 2 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/libs/vkd3d/memory.c b/libs/vkd3d/memory.c
index a09919d5..0c4c1bbb 100644
--- a/libs/vkd3d/memory.c
+++ b/libs/vkd3d/memory.c
@@ -385,6 +385,12 @@ static HRESULT vkd3d_memory_allocation_init(struct vkd3d_memory_allocation *allo
     if (FAILED(hr = vkd3d_select_memory_flags(device, &info->heap_properties, &type_flags)))
         return hr;
 
+    /* Mask out optional memory properties as needed.
+     * This is relevant for chunk allocator fallbacks
+     * since the info->memory_requirements already encodes
+     * only HOST_VISIBLE types and we use NO_FALLBACK allocation mode. */
+    type_flags &= ~info->optional_memory_properties;
+
     if (allocation->flags & VKD3D_ALLOCATION_FLAG_GLOBAL_BUFFER)
     {
         /* If requested, create a buffer covering the entire allocation
@@ -441,6 +447,11 @@ static HRESULT vkd3d_memory_allocation_init(struct vkd3d_memory_allocation *allo
         hr = vkd3d_import_host_memory(device, host_ptr, memory_requirements.size,
                 type_flags, type_mask, &flags_info, &allocation->device_allocation);
     }
+    else if (info->flags & VKD3D_ALLOCATION_NO_FALLBACK)
+    {
+        hr = vkd3d_try_allocate_device_memory(device, memory_requirements.size, type_flags,
+                type_mask, &flags_info, &allocation->device_allocation);
+    }
     else
     {
         hr = vkd3d_allocate_device_memory(device, memory_requirements.size, type_flags,
@@ -1126,8 +1137,9 @@ static void vkd3d_memory_allocator_wait_allocation(struct vkd3d_memory_allocator
     vkd3d_memory_allocator_wait_clear_semaphore(allocator, device, wait_value, UINT64_MAX);
 }
 
-static HRESULT vkd3d_memory_allocator_add_chunk(struct vkd3d_memory_allocator *allocator, struct d3d12_device *device,
-        const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, uint32_t type_mask, struct vkd3d_memory_chunk **chunk)
+static HRESULT vkd3d_memory_allocator_try_add_chunk(struct vkd3d_memory_allocator *allocator, struct d3d12_device *device,
+        const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, uint32_t type_mask,
+        VkMemoryPropertyFlags optional_properties, struct vkd3d_memory_chunk **chunk)
 {
     struct vkd3d_allocate_memory_info alloc_info;
     struct vkd3d_memory_chunk *object;
@@ -1139,6 +1151,8 @@ static HRESULT vkd3d_memory_allocator_add_chunk(struct vkd3d_memory_allocator *a
     alloc_info.memory_requirements.memoryTypeBits = type_mask;
     alloc_info.heap_properties = *heap_properties;
     alloc_info.heap_flags = heap_flags;
+    alloc_info.flags = VKD3D_ALLOCATION_NO_FALLBACK;
+    alloc_info.optional_memory_properties = optional_properties;
 
     if (!(heap_flags & D3D12_HEAP_FLAG_DENY_BUFFERS))
         alloc_info.flags |= VKD3D_ALLOCATION_FLAG_GLOBAL_BUFFER;
@@ -1159,6 +1173,7 @@ static HRESULT vkd3d_memory_allocator_add_chunk(struct vkd3d_memory_allocator *a
 
 static HRESULT vkd3d_memory_allocator_try_suballocate_memory(struct vkd3d_memory_allocator *allocator,
         struct d3d12_device *device, const VkMemoryRequirements *memory_requirements, uint32_t type_mask,
+        VkMemoryPropertyFlags optional_properties,
         const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags,
         struct vkd3d_memory_allocation *allocation)
 {
@@ -1190,8 +1205,8 @@ static HRESULT vkd3d_memory_allocator_try_suballocate_memory(struct vkd3d_memory
 
     /* Try allocating a new chunk on one of the supported memory type
      * before the caller falls back to potentially slower memory */
-    if (FAILED(hr = vkd3d_memory_allocator_add_chunk(allocator, device, heap_properties,
-            heap_flags & heap_flag_mask, memory_requirements->memoryTypeBits, &chunk)))
+    if (FAILED(hr = vkd3d_memory_allocator_try_add_chunk(allocator, device, heap_properties,
+            heap_flags & heap_flag_mask, type_mask, optional_properties, &chunk)))
         return hr;
 
     return vkd3d_memory_chunk_allocate_range(chunk, memory_requirements, allocation);
@@ -1235,13 +1250,14 @@ static HRESULT vkd3d_suballocate_memory(struct d3d12_device *device, struct vkd3
     pthread_mutex_lock(&allocator->mutex);
 
     hr = vkd3d_memory_allocator_try_suballocate_memory(allocator, device,
-            &memory_requirements, optional_mask, &info->heap_properties,
+            &memory_requirements, optional_mask, 0, &info->heap_properties,
             info->heap_flags, allocation);
 
     if (FAILED(hr) && (required_mask & ~optional_mask))
     {
         hr = vkd3d_memory_allocator_try_suballocate_memory(allocator, device,
                 &memory_requirements, required_mask & ~optional_mask,
+                optional_flags,
                 &info->heap_properties, info->heap_flags, allocation);
     }
 
diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h
index a7f5ddc1..9c882ad0 100644
--- a/libs/vkd3d/vkd3d_private.h
+++ b/libs/vkd3d/vkd3d_private.h
@@ -585,6 +585,7 @@ enum vkd3d_allocation_flag
     VKD3D_ALLOCATION_FLAG_GPU_ADDRESS       = (1u << 1),
     VKD3D_ALLOCATION_FLAG_CPU_ACCESS        = (1u << 2),
     VKD3D_ALLOCATION_FLAG_ALLOW_WRITE_WATCH = (1u << 3),
+    VKD3D_ALLOCATION_NO_FALLBACK            = (1u << 4),
 };
 
 #define VKD3D_MEMORY_CHUNK_SIZE (VKD3D_VA_BLOCK_SIZE * 16)
@@ -599,6 +600,7 @@ struct vkd3d_allocate_memory_info
     void *host_ptr;
     const void *pNext;
     uint32_t flags;
+    VkMemoryPropertyFlags optional_memory_properties;
 };
 
 struct vkd3d_allocate_heap_memory_info