From 619a54810d3b020358c1102a7965886606883889 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Fri, 7 Jan 2022 13:47:17 +0100 Subject: [PATCH] vkd3d: Pass down required memory types to scratch allocators. Separate scratch pools by their intended usage. Allows e.g. preprocess buffers to be allocated differently from normal buffers, which is necessary on implementations that use special memory types to implement preprocess buffers. Potentially can also allow for separate pools for host visible scratch memory down the line. Signed-off-by: Hans-Kristian Arntzen --- libs/vkd3d/command.c | 66 ++++++++++++++---------- libs/vkd3d/device.c | 100 ++++++++++++++++++++++++++----------- libs/vkd3d/vkd3d_private.h | 33 +++++++++--- 3 files changed, 138 insertions(+), 61 deletions(-) diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index d13c84ad..7d997a70 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -1553,7 +1553,7 @@ static ULONG STDMETHODCALLTYPE d3d12_command_allocator_Release(ID3D12CommandAllo { struct d3d12_command_allocator *allocator = impl_from_ID3D12CommandAllocator(iface); ULONG refcount = InterlockedDecrement(&allocator->refcount); - unsigned int i; + unsigned int i, j; TRACE("%p decreasing refcount to %u.\n", allocator, refcount); @@ -1609,13 +1609,16 @@ static ULONG STDMETHODCALLTYPE d3d12_command_allocator_Release(ID3D12CommandAllo vkd3d_free(allocator->command_buffers); VK_CALL(vkDestroyCommandPool(device->vk_device, allocator->vk_command_pool, NULL)); - for (i = 0; i < allocator->scratch_buffer_count; i++) - d3d12_device_return_scratch_buffer(device, &allocator->scratch_buffers[i]); + for (i = 0; i < VKD3D_SCRATCH_POOL_KIND_COUNT; i++) + { + for (j = 0; j < allocator->scratch_pools[i].scratch_buffer_count; j++) + d3d12_device_return_scratch_buffer(device, i, &allocator->scratch_pools[i].scratch_buffers[j]); + vkd3d_free(allocator->scratch_pools[i].scratch_buffers); + } for (i = 0; i < allocator->query_pool_count; i++) d3d12_device_return_query_pool(device, &allocator->query_pools[i]); - vkd3d_free(allocator->scratch_buffers); vkd3d_free(allocator->query_pools); #ifdef VKD3D_ENABLE_BREADCRUMBS @@ -1684,7 +1687,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_allocator_Reset(ID3D12CommandAllo struct d3d12_device *device; LONG pending; VkResult vr; - size_t i; + size_t i, j; TRACE("iface %p.\n", iface); @@ -1735,8 +1738,12 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_allocator_Reset(ID3D12CommandAllo } /* Return scratch buffers to the device */ - for (i = 0; i < allocator->scratch_buffer_count; i++) - d3d12_device_return_scratch_buffer(device, &allocator->scratch_buffers[i]); + for (i = 0; i < VKD3D_SCRATCH_POOL_KIND_COUNT; i++) + { + for (j = 0; j < allocator->scratch_pools[i].scratch_buffer_count; j++) + d3d12_device_return_scratch_buffer(device, i, &allocator->scratch_pools[i].scratch_buffers[j]); + allocator->scratch_pools[i].scratch_buffer_count = 0; + } #ifdef VKD3D_ENABLE_BREADCRUMBS if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS) @@ -1748,8 +1755,6 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_allocator_Reset(ID3D12CommandAllo } #endif - allocator->scratch_buffer_count = 0; - /* Return query pools to the device */ for (i = 0; i < allocator->query_pool_count; i++) d3d12_device_return_query_pool(device, &allocator->query_pools[i]); @@ -1907,9 +1912,7 @@ static HRESULT d3d12_command_allocator_init(struct d3d12_command_allocator *allo allocator->command_buffers_size = 0; allocator->command_buffer_count = 0; - allocator->scratch_buffers = NULL; - allocator->scratch_buffers_size = 0; - allocator->scratch_buffer_count = 0; + memset(allocator->scratch_pools, 0, sizeof(allocator->scratch_pools)); allocator->query_pools = NULL; allocator->query_pools_size = 0; @@ -1959,8 +1962,11 @@ struct vkd3d_scratch_allocation }; static bool d3d12_command_allocator_allocate_scratch_memory(struct d3d12_command_allocator *allocator, - VkDeviceSize size, VkDeviceSize alignment, struct vkd3d_scratch_allocation *allocation) + enum vkd3d_scratch_pool_kind kind, + VkDeviceSize size, VkDeviceSize alignment, uint32_t memory_types, + struct vkd3d_scratch_allocation *allocation) { + struct d3d12_command_allocator_scratch_pool *pool = &allocator->scratch_pools[kind]; VkDeviceSize aligned_offset, aligned_size; struct vkd3d_scratch_buffer *scratch; unsigned int i; @@ -1968,9 +1974,14 @@ static bool d3d12_command_allocator_allocate_scratch_memory(struct d3d12_command aligned_size = align(size, alignment); /* Probe last block first since the others are likely full */ - for (i = allocator->scratch_buffer_count; i; i--) + for (i = pool->scratch_buffer_count; i; i--) { - scratch = &allocator->scratch_buffers[i - 1]; + scratch = &pool->scratch_buffers[i - 1]; + + /* Extremely unlikely to fail since we have separate lists per pool kind, but to be 100% correct ... */ + if (!(memory_types & (1u << scratch->allocation.device_allocation.vk_memory_type))) + continue; + aligned_offset = align(scratch->offset, alignment); if (aligned_offset + aligned_size <= scratch->allocation.resource.size) @@ -1984,21 +1995,21 @@ static bool d3d12_command_allocator_allocate_scratch_memory(struct d3d12_command } } - if (!vkd3d_array_reserve((void**)&allocator->scratch_buffers, &allocator->scratch_buffers_size, - allocator->scratch_buffer_count + 1, sizeof(*allocator->scratch_buffers))) + if (!vkd3d_array_reserve((void**)&pool->scratch_buffers, &pool->scratch_buffers_size, + pool->scratch_buffer_count + 1, sizeof(*pool->scratch_buffers))) { ERR("Failed to allocate scratch buffer.\n"); return false; } - scratch = &allocator->scratch_buffers[allocator->scratch_buffer_count]; - if (FAILED(d3d12_device_get_scratch_buffer(allocator->device, aligned_size, scratch))) + scratch = &pool->scratch_buffers[pool->scratch_buffer_count]; + if (FAILED(d3d12_device_get_scratch_buffer(allocator->device, kind, aligned_size, memory_types, scratch))) { ERR("Failed to create scratch buffer.\n"); return false; } - allocator->scratch_buffer_count += 1; + pool->scratch_buffer_count += 1; scratch->offset = aligned_size; allocation->buffer = scratch->allocation.resource.vk_buffer; @@ -3387,7 +3398,8 @@ static bool d3d12_command_list_gather_pending_queries(struct d3d12_command_list /* Allocate scratch buffer and resolve virtual Vulkan queries into it */ if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator, - resolve_buffer_size, max(ssbo_alignment, sizeof(uint64_t)), &resolve_buffer)) + VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE, + resolve_buffer_size, max(ssbo_alignment, sizeof(uint64_t)), ~0u, &resolve_buffer)) goto cleanup; for (i = 0; i < resolve_count; i++) @@ -3404,7 +3416,8 @@ static bool d3d12_command_list_gather_pending_queries(struct d3d12_command_list entry_buffer_size = sizeof(struct query_entry) * list->pending_queries_count; if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator, - entry_buffer_size, ssbo_alignment, &entry_buffer)) + VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE, + entry_buffer_size, ssbo_alignment, ~0u, &entry_buffer)) goto cleanup; for (i = 0; i < dispatch_count; i++) @@ -5407,7 +5420,8 @@ static bool d3d12_command_list_emit_predicated_command(struct d3d12_command_list vkd3d_meta_get_predicate_pipeline(&list->device->meta_ops, command_type, &pipeline_info); if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator, - pipeline_info.data_size, sizeof(uint32_t), scratch)) + VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE, + pipeline_info.data_size, sizeof(uint32_t), ~0u, scratch)) return false; d3d12_command_list_end_current_render_pass(list, true); @@ -8541,7 +8555,8 @@ static void d3d12_command_list_clear_uav_with_copy(struct d3d12_command_list *li scratch_buffer_size = element_count * format->byte_count; if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator, - scratch_buffer_size, 16, &scratch)) + VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE, + scratch_buffer_size, 16, ~0u, &scratch)) { ERR("Failed to allocate scratch memory for UAV clear.\n"); return; @@ -9388,7 +9403,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(d3d12_command_li if (resource) { if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator, - sizeof(uint32_t), sizeof(uint32_t), &scratch)) + VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE, + sizeof(uint32_t), sizeof(uint32_t), ~0u, &scratch)) return; begin_info.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT; diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index 9f790140..9c61b00a 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -2539,23 +2539,52 @@ static void d3d12_remove_device_singleton(LUID luid) } } -static HRESULT d3d12_device_create_scratch_buffer(struct d3d12_device *device, VkDeviceSize size, struct vkd3d_scratch_buffer *scratch) +static HRESULT d3d12_device_create_scratch_buffer(struct d3d12_device *device, enum vkd3d_scratch_pool_kind kind, + VkDeviceSize size, uint32_t memory_types, struct vkd3d_scratch_buffer *scratch) { - struct vkd3d_allocate_heap_memory_info alloc_info; HRESULT hr; TRACE("device %p, size %llu, scratch %p.\n", device, size, scratch); - memset(&alloc_info, 0, sizeof(alloc_info)); - alloc_info.heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT; - alloc_info.heap_desc.SizeInBytes = size; - alloc_info.heap_desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; - alloc_info.heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; - alloc_info.extra_allocation_flags = VKD3D_ALLOCATION_FLAG_INTERNAL_SCRATCH; + if (kind == VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE) + { + struct vkd3d_allocate_heap_memory_info alloc_info; - if (FAILED(hr = vkd3d_allocate_heap_memory(device, &device->memory_allocator, - &alloc_info, &scratch->allocation))) - return hr; + /* We only care about memory types for INDIRECT_PREPROCESS. */ + assert(memory_types == ~0u); + + memset(&alloc_info, 0, sizeof(alloc_info)); + alloc_info.heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT; + alloc_info.heap_desc.SizeInBytes = size; + alloc_info.heap_desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + alloc_info.heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS | D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; + alloc_info.extra_allocation_flags = VKD3D_ALLOCATION_FLAG_INTERNAL_SCRATCH; + + if (FAILED(hr = vkd3d_allocate_heap_memory(device, &device->memory_allocator, + &alloc_info, &scratch->allocation))) + return hr; + } + else if (kind == VKD3D_SCRATCH_POOL_KIND_INDIRECT_PREPROCESS) + { + struct vkd3d_allocate_memory_info alloc_info; + memset(&alloc_info, 0, sizeof(alloc_info)); + + alloc_info.heap_properties.Type = D3D12_HEAP_TYPE_DEFAULT; + alloc_info.memory_requirements.size = size; + alloc_info.memory_requirements.memoryTypeBits = memory_types; + alloc_info.memory_requirements.alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + alloc_info.heap_flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS | D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; + alloc_info.optional_memory_properties = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + alloc_info.flags = VKD3D_ALLOCATION_FLAG_GLOBAL_BUFFER | VKD3D_ALLOCATION_FLAG_INTERNAL_SCRATCH; + + if (FAILED(hr = vkd3d_allocate_memory(device, &device->memory_allocator, + &alloc_info, &scratch->allocation))) + return hr; + } + else + { + return E_INVALIDARG; + } scratch->offset = 0; return S_OK; @@ -2568,35 +2597,47 @@ static void d3d12_device_destroy_scratch_buffer(struct d3d12_device *device, con vkd3d_free_memory(device, &device->memory_allocator, &scratch->allocation); } -HRESULT d3d12_device_get_scratch_buffer(struct d3d12_device *device, VkDeviceSize min_size, struct vkd3d_scratch_buffer *scratch) +HRESULT d3d12_device_get_scratch_buffer(struct d3d12_device *device, enum vkd3d_scratch_pool_kind kind, + VkDeviceSize min_size, uint32_t memory_types, struct vkd3d_scratch_buffer *scratch) { + struct d3d12_device_scratch_pool *pool = &device->scratch_pools[kind]; + struct vkd3d_scratch_buffer *candidate; + size_t i; + if (min_size > VKD3D_SCRATCH_BUFFER_SIZE) - return d3d12_device_create_scratch_buffer(device, min_size, scratch); + return d3d12_device_create_scratch_buffer(device, kind, min_size, memory_types, scratch); pthread_mutex_lock(&device->mutex); - if (device->scratch_buffer_count) + for (i = pool->scratch_buffer_count; i; i--) { - *scratch = device->scratch_buffers[--device->scratch_buffer_count]; - scratch->offset = 0; - pthread_mutex_unlock(&device->mutex); - return S_OK; - } - else - { - pthread_mutex_unlock(&device->mutex); - return d3d12_device_create_scratch_buffer(device, VKD3D_SCRATCH_BUFFER_SIZE, scratch); + candidate = &pool->scratch_buffers[i - 1]; + + /* Extremely unlikely to fail since we have separate lists per pool kind, but to be 100% correct ... */ + if (memory_types & (1u << candidate->allocation.device_allocation.vk_memory_type)) + { + *scratch = *candidate; + scratch->offset = 0; + pool->scratch_buffers[i - 1] = pool->scratch_buffers[--pool->scratch_buffer_count]; + pthread_mutex_unlock(&device->mutex); + return S_OK; + } } + + pthread_mutex_unlock(&device->mutex); + return d3d12_device_create_scratch_buffer(device, kind, VKD3D_SCRATCH_BUFFER_SIZE, memory_types, scratch); } -void d3d12_device_return_scratch_buffer(struct d3d12_device *device, const struct vkd3d_scratch_buffer *scratch) +void d3d12_device_return_scratch_buffer(struct d3d12_device *device, enum vkd3d_scratch_pool_kind kind, + const struct vkd3d_scratch_buffer *scratch) { + struct d3d12_device_scratch_pool *pool = &device->scratch_pools[kind]; pthread_mutex_lock(&device->mutex); if (scratch->allocation.resource.size == VKD3D_SCRATCH_BUFFER_SIZE && - device->scratch_buffer_count < VKD3D_SCRATCH_BUFFER_COUNT) + pool->scratch_buffer_count < VKD3D_SCRATCH_BUFFER_COUNT) { - device->scratch_buffers[device->scratch_buffer_count++] = *scratch; + pool->scratch_buffers[pool->scratch_buffer_count++] = *scratch; pthread_mutex_unlock(&device->mutex); } else @@ -2815,10 +2856,11 @@ static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(d3d12_device_iface *iface) static void d3d12_device_destroy(struct d3d12_device *device) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - size_t i; + size_t i, j; - for (i = 0; i < device->scratch_buffer_count; i++) - d3d12_device_destroy_scratch_buffer(device, &device->scratch_buffers[i]); + for (i = 0; i < VKD3D_SCRATCH_POOL_KIND_COUNT; i++) + for (j = 0; j < device->scratch_pools[i].scratch_buffer_count; j++) + d3d12_device_destroy_scratch_buffer(device, &device->scratch_pools[i].scratch_buffers[j]); for (i = 0; i < device->query_pool_count; i++) d3d12_device_destroy_query_pool(device, &device->query_pools[i]); diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 149d24e8..e8747848 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -1842,6 +1842,20 @@ struct vkd3d_query_pool uint32_t next_index; }; +struct d3d12_command_allocator_scratch_pool +{ + struct vkd3d_scratch_buffer *scratch_buffers; + size_t scratch_buffers_size; + size_t scratch_buffer_count; +}; + +enum vkd3d_scratch_pool_kind +{ + VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE = 0, + VKD3D_SCRATCH_POOL_KIND_INDIRECT_PREPROCESS, + VKD3D_SCRATCH_POOL_KIND_COUNT +}; + /* ID3D12CommandAllocator */ struct d3d12_command_allocator { @@ -1868,9 +1882,7 @@ struct d3d12_command_allocator size_t command_buffers_size; size_t command_buffer_count; - struct vkd3d_scratch_buffer *scratch_buffers; - size_t scratch_buffers_size; - size_t scratch_buffer_count; + struct d3d12_command_allocator_scratch_pool scratch_pools[VKD3D_SCRATCH_POOL_KIND_COUNT]; struct vkd3d_query_pool *query_pools; size_t query_pools_size; @@ -3185,6 +3197,12 @@ struct vkd3d_descriptor_qa_heap_buffer_data; /* ID3D12DeviceExt */ typedef ID3D12DeviceExt d3d12_device_vkd3d_ext_iface; +struct d3d12_device_scratch_pool +{ + struct vkd3d_scratch_buffer scratch_buffers[VKD3D_SCRATCH_BUFFER_COUNT]; + size_t scratch_buffer_count; +}; + struct d3d12_device { d3d12_device_iface ID3D12Device_iface; @@ -3219,8 +3237,7 @@ struct d3d12_device struct vkd3d_memory_allocator memory_allocator; - struct vkd3d_scratch_buffer scratch_buffers[VKD3D_SCRATCH_BUFFER_COUNT]; - size_t scratch_buffer_count; + struct d3d12_device_scratch_pool scratch_pools[VKD3D_SCRATCH_POOL_KIND_COUNT]; struct vkd3d_query_pool query_pools[VKD3D_VIRTUAL_QUERY_POOL_COUNT]; size_t query_pool_count; @@ -3293,8 +3310,10 @@ static inline struct d3d12_device *impl_from_ID3D12Device(d3d12_device_iface *if bool d3d12_device_validate_shader_meta(struct d3d12_device *device, const struct vkd3d_shader_meta *meta); -HRESULT d3d12_device_get_scratch_buffer(struct d3d12_device *device, VkDeviceSize min_size, struct vkd3d_scratch_buffer *scratch); -void d3d12_device_return_scratch_buffer(struct d3d12_device *device, const struct vkd3d_scratch_buffer *scratch); +HRESULT d3d12_device_get_scratch_buffer(struct d3d12_device *device, enum vkd3d_scratch_pool_kind kind, + VkDeviceSize min_size, uint32_t memory_types, struct vkd3d_scratch_buffer *scratch); +void d3d12_device_return_scratch_buffer(struct d3d12_device *device, enum vkd3d_scratch_pool_kind kind, + const struct vkd3d_scratch_buffer *scratch); HRESULT d3d12_device_get_query_pool(struct d3d12_device *device, uint32_t type_index, struct vkd3d_query_pool *pool); void d3d12_device_return_query_pool(struct d3d12_device *device, const struct vkd3d_query_pool *pool);