vkd3d: Handle fallbacks properly in suballocator.

With BAR budgets, what will happen is that
- Small allocation is requested
- A new chunk is requested
- try_suballocate_memory will end up calling allocate_memory, which
  allocates a fallback memory type
- Subsequent small allocators will always end up allocating a new
  fallback memory block, never reusing existing blocks.
- System memory is rapidly exhausted once apps start hitting against
  budget.

The fix is to add flags which explicitly do not attempt to fallback
allocate. This makes it possible to handle fallbacks at the appropriate
level in try_suballocate_memory instead.

Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
This commit is contained in:
Hans-Kristian Arntzen 2021-08-30 13:38:38 +02:00
parent cb94cfd10c
commit e0451bb541
2 changed files with 23 additions and 5 deletions

View File

@ -385,6 +385,12 @@ static HRESULT vkd3d_memory_allocation_init(struct vkd3d_memory_allocation *allo
if (FAILED(hr = vkd3d_select_memory_flags(device, &info->heap_properties, &type_flags)))
return hr;
/* Mask out optional memory properties as needed.
* This is relevant for chunk allocator fallbacks
* since the info->memory_requirements already encodes
* only HOST_VISIBLE types and we use NO_FALLBACK allocation mode. */
type_flags &= ~info->optional_memory_properties;
if (allocation->flags & VKD3D_ALLOCATION_FLAG_GLOBAL_BUFFER)
{
/* If requested, create a buffer covering the entire allocation
@ -441,6 +447,11 @@ static HRESULT vkd3d_memory_allocation_init(struct vkd3d_memory_allocation *allo
hr = vkd3d_import_host_memory(device, host_ptr, memory_requirements.size,
type_flags, type_mask, &flags_info, &allocation->device_allocation);
}
else if (info->flags & VKD3D_ALLOCATION_NO_FALLBACK)
{
hr = vkd3d_try_allocate_device_memory(device, memory_requirements.size, type_flags,
type_mask, &flags_info, &allocation->device_allocation);
}
else
{
hr = vkd3d_allocate_device_memory(device, memory_requirements.size, type_flags,
@ -1126,8 +1137,9 @@ static void vkd3d_memory_allocator_wait_allocation(struct vkd3d_memory_allocator
vkd3d_memory_allocator_wait_clear_semaphore(allocator, device, wait_value, UINT64_MAX);
}
static HRESULT vkd3d_memory_allocator_add_chunk(struct vkd3d_memory_allocator *allocator, struct d3d12_device *device,
const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, uint32_t type_mask, struct vkd3d_memory_chunk **chunk)
static HRESULT vkd3d_memory_allocator_try_add_chunk(struct vkd3d_memory_allocator *allocator, struct d3d12_device *device,
const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, uint32_t type_mask,
VkMemoryPropertyFlags optional_properties, struct vkd3d_memory_chunk **chunk)
{
struct vkd3d_allocate_memory_info alloc_info;
struct vkd3d_memory_chunk *object;
@ -1139,6 +1151,8 @@ static HRESULT vkd3d_memory_allocator_add_chunk(struct vkd3d_memory_allocator *a
alloc_info.memory_requirements.memoryTypeBits = type_mask;
alloc_info.heap_properties = *heap_properties;
alloc_info.heap_flags = heap_flags;
alloc_info.flags = VKD3D_ALLOCATION_NO_FALLBACK;
alloc_info.optional_memory_properties = optional_properties;
if (!(heap_flags & D3D12_HEAP_FLAG_DENY_BUFFERS))
alloc_info.flags |= VKD3D_ALLOCATION_FLAG_GLOBAL_BUFFER;
@ -1159,6 +1173,7 @@ static HRESULT vkd3d_memory_allocator_add_chunk(struct vkd3d_memory_allocator *a
static HRESULT vkd3d_memory_allocator_try_suballocate_memory(struct vkd3d_memory_allocator *allocator,
struct d3d12_device *device, const VkMemoryRequirements *memory_requirements, uint32_t type_mask,
VkMemoryPropertyFlags optional_properties,
const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags,
struct vkd3d_memory_allocation *allocation)
{
@ -1190,8 +1205,8 @@ static HRESULT vkd3d_memory_allocator_try_suballocate_memory(struct vkd3d_memory
/* Try allocating a new chunk on one of the supported memory type
* before the caller falls back to potentially slower memory */
if (FAILED(hr = vkd3d_memory_allocator_add_chunk(allocator, device, heap_properties,
heap_flags & heap_flag_mask, memory_requirements->memoryTypeBits, &chunk)))
if (FAILED(hr = vkd3d_memory_allocator_try_add_chunk(allocator, device, heap_properties,
heap_flags & heap_flag_mask, type_mask, optional_properties, &chunk)))
return hr;
return vkd3d_memory_chunk_allocate_range(chunk, memory_requirements, allocation);
@ -1235,13 +1250,14 @@ static HRESULT vkd3d_suballocate_memory(struct d3d12_device *device, struct vkd3
pthread_mutex_lock(&allocator->mutex);
hr = vkd3d_memory_allocator_try_suballocate_memory(allocator, device,
&memory_requirements, optional_mask, &info->heap_properties,
&memory_requirements, optional_mask, 0, &info->heap_properties,
info->heap_flags, allocation);
if (FAILED(hr) && (required_mask & ~optional_mask))
{
hr = vkd3d_memory_allocator_try_suballocate_memory(allocator, device,
&memory_requirements, required_mask & ~optional_mask,
optional_flags,
&info->heap_properties, info->heap_flags, allocation);
}

View File

@ -585,6 +585,7 @@ enum vkd3d_allocation_flag
VKD3D_ALLOCATION_FLAG_GPU_ADDRESS = (1u << 1),
VKD3D_ALLOCATION_FLAG_CPU_ACCESS = (1u << 2),
VKD3D_ALLOCATION_FLAG_ALLOW_WRITE_WATCH = (1u << 3),
VKD3D_ALLOCATION_NO_FALLBACK = (1u << 4),
};
#define VKD3D_MEMORY_CHUNK_SIZE (VKD3D_VA_BLOCK_SIZE * 16)
@ -599,6 +600,7 @@ struct vkd3d_allocate_memory_info
void *host_ptr;
const void *pNext;
uint32_t flags;
VkMemoryPropertyFlags optional_memory_properties;
};
struct vkd3d_allocate_heap_memory_info