diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index d13c84ad..7d997a70 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -1553,7 +1553,7 @@ static ULONG STDMETHODCALLTYPE d3d12_command_allocator_Release(ID3D12CommandAllo { struct d3d12_command_allocator *allocator = impl_from_ID3D12CommandAllocator(iface); ULONG refcount = InterlockedDecrement(&allocator->refcount); - unsigned int i; + unsigned int i, j; TRACE("%p decreasing refcount to %u.\n", allocator, refcount); @@ -1609,13 +1609,16 @@ static ULONG STDMETHODCALLTYPE d3d12_command_allocator_Release(ID3D12CommandAllo vkd3d_free(allocator->command_buffers); VK_CALL(vkDestroyCommandPool(device->vk_device, allocator->vk_command_pool, NULL)); - for (i = 0; i < allocator->scratch_buffer_count; i++) - d3d12_device_return_scratch_buffer(device, &allocator->scratch_buffers[i]); + for (i = 0; i < VKD3D_SCRATCH_POOL_KIND_COUNT; i++) + { + for (j = 0; j < allocator->scratch_pools[i].scratch_buffer_count; j++) + d3d12_device_return_scratch_buffer(device, i, &allocator->scratch_pools[i].scratch_buffers[j]); + vkd3d_free(allocator->scratch_pools[i].scratch_buffers); + } for (i = 0; i < allocator->query_pool_count; i++) d3d12_device_return_query_pool(device, &allocator->query_pools[i]); - vkd3d_free(allocator->scratch_buffers); vkd3d_free(allocator->query_pools); #ifdef VKD3D_ENABLE_BREADCRUMBS @@ -1684,7 +1687,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_allocator_Reset(ID3D12CommandAllo struct d3d12_device *device; LONG pending; VkResult vr; - size_t i; + size_t i, j; TRACE("iface %p.\n", iface); @@ -1735,8 +1738,12 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_allocator_Reset(ID3D12CommandAllo } /* Return scratch buffers to the device */ - for (i = 0; i < allocator->scratch_buffer_count; i++) - d3d12_device_return_scratch_buffer(device, &allocator->scratch_buffers[i]); + for (i = 0; i < VKD3D_SCRATCH_POOL_KIND_COUNT; i++) + { + for (j = 0; j < allocator->scratch_pools[i].scratch_buffer_count; j++) + d3d12_device_return_scratch_buffer(device, i, &allocator->scratch_pools[i].scratch_buffers[j]); + allocator->scratch_pools[i].scratch_buffer_count = 0; + } #ifdef VKD3D_ENABLE_BREADCRUMBS if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS) @@ -1748,8 +1755,6 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_allocator_Reset(ID3D12CommandAllo } #endif - allocator->scratch_buffer_count = 0; - /* Return query pools to the device */ for (i = 0; i < allocator->query_pool_count; i++) d3d12_device_return_query_pool(device, &allocator->query_pools[i]); @@ -1907,9 +1912,7 @@ static HRESULT d3d12_command_allocator_init(struct d3d12_command_allocator *allo allocator->command_buffers_size = 0; allocator->command_buffer_count = 0; - allocator->scratch_buffers = NULL; - allocator->scratch_buffers_size = 0; - allocator->scratch_buffer_count = 0; + memset(allocator->scratch_pools, 0, sizeof(allocator->scratch_pools)); allocator->query_pools = NULL; allocator->query_pools_size = 0; @@ -1959,8 +1962,11 @@ struct vkd3d_scratch_allocation }; static bool d3d12_command_allocator_allocate_scratch_memory(struct d3d12_command_allocator *allocator, - VkDeviceSize size, VkDeviceSize alignment, struct vkd3d_scratch_allocation *allocation) + enum vkd3d_scratch_pool_kind kind, + VkDeviceSize size, VkDeviceSize alignment, uint32_t memory_types, + struct vkd3d_scratch_allocation *allocation) { + struct d3d12_command_allocator_scratch_pool *pool = &allocator->scratch_pools[kind]; VkDeviceSize aligned_offset, aligned_size; struct vkd3d_scratch_buffer *scratch; unsigned int i; @@ -1968,9 +1974,14 @@ static bool d3d12_command_allocator_allocate_scratch_memory(struct d3d12_command aligned_size = align(size, alignment); /* Probe last block first since the others are likely full */ - for (i = allocator->scratch_buffer_count; i; i--) + for (i = pool->scratch_buffer_count; i; i--) { - scratch = &allocator->scratch_buffers[i - 1]; + scratch = &pool->scratch_buffers[i - 1]; + + /* Extremely unlikely to fail since we have separate lists per pool kind, but to be 100% correct ... */ + if (!(memory_types & (1u << scratch->allocation.device_allocation.vk_memory_type))) + continue; + aligned_offset = align(scratch->offset, alignment); if (aligned_offset + aligned_size <= scratch->allocation.resource.size) @@ -1984,21 +1995,21 @@ static bool d3d12_command_allocator_allocate_scratch_memory(struct d3d12_command } } - if (!vkd3d_array_reserve((void**)&allocator->scratch_buffers, &allocator->scratch_buffers_size, - allocator->scratch_buffer_count + 1, sizeof(*allocator->scratch_buffers))) + if (!vkd3d_array_reserve((void**)&pool->scratch_buffers, &pool->scratch_buffers_size, + pool->scratch_buffer_count + 1, sizeof(*pool->scratch_buffers))) { ERR("Failed to allocate scratch buffer.\n"); return false; } - scratch = &allocator->scratch_buffers[allocator->scratch_buffer_count]; - if (FAILED(d3d12_device_get_scratch_buffer(allocator->device, aligned_size, scratch))) + scratch = &pool->scratch_buffers[pool->scratch_buffer_count]; + if (FAILED(d3d12_device_get_scratch_buffer(allocator->device, kind, aligned_size, memory_types, scratch))) { ERR("Failed to create scratch buffer.\n"); return false; } - allocator->scratch_buffer_count += 1; + pool->scratch_buffer_count += 1; scratch->offset = aligned_size; allocation->buffer = scratch->allocation.resource.vk_buffer; @@ -3387,7 +3398,8 @@ static bool d3d12_command_list_gather_pending_queries(struct d3d12_command_list /* Allocate scratch buffer and resolve virtual Vulkan queries into it */ if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator, - resolve_buffer_size, max(ssbo_alignment, sizeof(uint64_t)), &resolve_buffer)) + VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE, + resolve_buffer_size, max(ssbo_alignment, sizeof(uint64_t)), ~0u, &resolve_buffer)) goto cleanup; for (i = 0; i < resolve_count; i++) @@ -3404,7 +3416,8 @@ static bool d3d12_command_list_gather_pending_queries(struct d3d12_command_list entry_buffer_size = sizeof(struct query_entry) * list->pending_queries_count; if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator, - entry_buffer_size, ssbo_alignment, &entry_buffer)) + VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE, + entry_buffer_size, ssbo_alignment, ~0u, &entry_buffer)) goto cleanup; for (i = 0; i < dispatch_count; i++) @@ -5407,7 +5420,8 @@ static bool d3d12_command_list_emit_predicated_command(struct d3d12_command_list vkd3d_meta_get_predicate_pipeline(&list->device->meta_ops, command_type, &pipeline_info); if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator, - pipeline_info.data_size, sizeof(uint32_t), scratch)) + VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE, + pipeline_info.data_size, sizeof(uint32_t), ~0u, scratch)) return false; d3d12_command_list_end_current_render_pass(list, true); @@ -8541,7 +8555,8 @@ static void d3d12_command_list_clear_uav_with_copy(struct d3d12_command_list *li scratch_buffer_size = element_count * format->byte_count; if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator, - scratch_buffer_size, 16, &scratch)) + VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE, + scratch_buffer_size, 16, ~0u, &scratch)) { ERR("Failed to allocate scratch memory for UAV clear.\n"); return; @@ -9388,7 +9403,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(d3d12_command_li if (resource) { if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator, - sizeof(uint32_t), sizeof(uint32_t), &scratch)) + VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE, + sizeof(uint32_t), sizeof(uint32_t), ~0u, &scratch)) return; begin_info.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT; diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index 9f790140..9c61b00a 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -2539,23 +2539,52 @@ static void d3d12_remove_device_singleton(LUID luid) } } -static HRESULT d3d12_device_create_scratch_buffer(struct d3d12_device *device, VkDeviceSize size, struct vkd3d_scratch_buffer *scratch) +static HRESULT d3d12_device_create_scratch_buffer(struct d3d12_device *device, enum vkd3d_scratch_pool_kind kind, + VkDeviceSize size, uint32_t memory_types, struct vkd3d_scratch_buffer *scratch) { - struct vkd3d_allocate_heap_memory_info alloc_info; HRESULT hr; TRACE("device %p, size %llu, scratch %p.\n", device, size, scratch); - memset(&alloc_info, 0, sizeof(alloc_info)); - alloc_info.heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT; - alloc_info.heap_desc.SizeInBytes = size; - alloc_info.heap_desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; - alloc_info.heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; - alloc_info.extra_allocation_flags = VKD3D_ALLOCATION_FLAG_INTERNAL_SCRATCH; + if (kind == VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE) + { + struct vkd3d_allocate_heap_memory_info alloc_info; - if (FAILED(hr = vkd3d_allocate_heap_memory(device, &device->memory_allocator, - &alloc_info, &scratch->allocation))) - return hr; + /* We only care about memory types for INDIRECT_PREPROCESS. */ + assert(memory_types == ~0u); + + memset(&alloc_info, 0, sizeof(alloc_info)); + alloc_info.heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT; + alloc_info.heap_desc.SizeInBytes = size; + alloc_info.heap_desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + alloc_info.heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS | D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; + alloc_info.extra_allocation_flags = VKD3D_ALLOCATION_FLAG_INTERNAL_SCRATCH; + + if (FAILED(hr = vkd3d_allocate_heap_memory(device, &device->memory_allocator, + &alloc_info, &scratch->allocation))) + return hr; + } + else if (kind == VKD3D_SCRATCH_POOL_KIND_INDIRECT_PREPROCESS) + { + struct vkd3d_allocate_memory_info alloc_info; + memset(&alloc_info, 0, sizeof(alloc_info)); + + alloc_info.heap_properties.Type = D3D12_HEAP_TYPE_DEFAULT; + alloc_info.memory_requirements.size = size; + alloc_info.memory_requirements.memoryTypeBits = memory_types; + alloc_info.memory_requirements.alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + alloc_info.heap_flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS | D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; + alloc_info.optional_memory_properties = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + alloc_info.flags = VKD3D_ALLOCATION_FLAG_GLOBAL_BUFFER | VKD3D_ALLOCATION_FLAG_INTERNAL_SCRATCH; + + if (FAILED(hr = vkd3d_allocate_memory(device, &device->memory_allocator, + &alloc_info, &scratch->allocation))) + return hr; + } + else + { + return E_INVALIDARG; + } scratch->offset = 0; return S_OK; @@ -2568,35 +2597,47 @@ static void d3d12_device_destroy_scratch_buffer(struct d3d12_device *device, con vkd3d_free_memory(device, &device->memory_allocator, &scratch->allocation); } -HRESULT d3d12_device_get_scratch_buffer(struct d3d12_device *device, VkDeviceSize min_size, struct vkd3d_scratch_buffer *scratch) +HRESULT d3d12_device_get_scratch_buffer(struct d3d12_device *device, enum vkd3d_scratch_pool_kind kind, + VkDeviceSize min_size, uint32_t memory_types, struct vkd3d_scratch_buffer *scratch) { + struct d3d12_device_scratch_pool *pool = &device->scratch_pools[kind]; + struct vkd3d_scratch_buffer *candidate; + size_t i; + if (min_size > VKD3D_SCRATCH_BUFFER_SIZE) - return d3d12_device_create_scratch_buffer(device, min_size, scratch); + return d3d12_device_create_scratch_buffer(device, kind, min_size, memory_types, scratch); pthread_mutex_lock(&device->mutex); - if (device->scratch_buffer_count) + for (i = pool->scratch_buffer_count; i; i--) { - *scratch = device->scratch_buffers[--device->scratch_buffer_count]; - scratch->offset = 0; - pthread_mutex_unlock(&device->mutex); - return S_OK; - } - else - { - pthread_mutex_unlock(&device->mutex); - return d3d12_device_create_scratch_buffer(device, VKD3D_SCRATCH_BUFFER_SIZE, scratch); + candidate = &pool->scratch_buffers[i - 1]; + + /* Extremely unlikely to fail since we have separate lists per pool kind, but to be 100% correct ... */ + if (memory_types & (1u << candidate->allocation.device_allocation.vk_memory_type)) + { + *scratch = *candidate; + scratch->offset = 0; + pool->scratch_buffers[i - 1] = pool->scratch_buffers[--pool->scratch_buffer_count]; + pthread_mutex_unlock(&device->mutex); + return S_OK; + } } + + pthread_mutex_unlock(&device->mutex); + return d3d12_device_create_scratch_buffer(device, kind, VKD3D_SCRATCH_BUFFER_SIZE, memory_types, scratch); } -void d3d12_device_return_scratch_buffer(struct d3d12_device *device, const struct vkd3d_scratch_buffer *scratch) +void d3d12_device_return_scratch_buffer(struct d3d12_device *device, enum vkd3d_scratch_pool_kind kind, + const struct vkd3d_scratch_buffer *scratch) { + struct d3d12_device_scratch_pool *pool = &device->scratch_pools[kind]; pthread_mutex_lock(&device->mutex); if (scratch->allocation.resource.size == VKD3D_SCRATCH_BUFFER_SIZE && - device->scratch_buffer_count < VKD3D_SCRATCH_BUFFER_COUNT) + pool->scratch_buffer_count < VKD3D_SCRATCH_BUFFER_COUNT) { - device->scratch_buffers[device->scratch_buffer_count++] = *scratch; + pool->scratch_buffers[pool->scratch_buffer_count++] = *scratch; pthread_mutex_unlock(&device->mutex); } else @@ -2815,10 +2856,11 @@ static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(d3d12_device_iface *iface) static void d3d12_device_destroy(struct d3d12_device *device) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - size_t i; + size_t i, j; - for (i = 0; i < device->scratch_buffer_count; i++) - d3d12_device_destroy_scratch_buffer(device, &device->scratch_buffers[i]); + for (i = 0; i < VKD3D_SCRATCH_POOL_KIND_COUNT; i++) + for (j = 0; j < device->scratch_pools[i].scratch_buffer_count; j++) + d3d12_device_destroy_scratch_buffer(device, &device->scratch_pools[i].scratch_buffers[j]); for (i = 0; i < device->query_pool_count; i++) d3d12_device_destroy_query_pool(device, &device->query_pools[i]); diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 149d24e8..e8747848 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -1842,6 +1842,20 @@ struct vkd3d_query_pool uint32_t next_index; }; +struct d3d12_command_allocator_scratch_pool +{ + struct vkd3d_scratch_buffer *scratch_buffers; + size_t scratch_buffers_size; + size_t scratch_buffer_count; +}; + +enum vkd3d_scratch_pool_kind +{ + VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE = 0, + VKD3D_SCRATCH_POOL_KIND_INDIRECT_PREPROCESS, + VKD3D_SCRATCH_POOL_KIND_COUNT +}; + /* ID3D12CommandAllocator */ struct d3d12_command_allocator { @@ -1868,9 +1882,7 @@ struct d3d12_command_allocator size_t command_buffers_size; size_t command_buffer_count; - struct vkd3d_scratch_buffer *scratch_buffers; - size_t scratch_buffers_size; - size_t scratch_buffer_count; + struct d3d12_command_allocator_scratch_pool scratch_pools[VKD3D_SCRATCH_POOL_KIND_COUNT]; struct vkd3d_query_pool *query_pools; size_t query_pools_size; @@ -3185,6 +3197,12 @@ struct vkd3d_descriptor_qa_heap_buffer_data; /* ID3D12DeviceExt */ typedef ID3D12DeviceExt d3d12_device_vkd3d_ext_iface; +struct d3d12_device_scratch_pool +{ + struct vkd3d_scratch_buffer scratch_buffers[VKD3D_SCRATCH_BUFFER_COUNT]; + size_t scratch_buffer_count; +}; + struct d3d12_device { d3d12_device_iface ID3D12Device_iface; @@ -3219,8 +3237,7 @@ struct d3d12_device struct vkd3d_memory_allocator memory_allocator; - struct vkd3d_scratch_buffer scratch_buffers[VKD3D_SCRATCH_BUFFER_COUNT]; - size_t scratch_buffer_count; + struct d3d12_device_scratch_pool scratch_pools[VKD3D_SCRATCH_POOL_KIND_COUNT]; struct vkd3d_query_pool query_pools[VKD3D_VIRTUAL_QUERY_POOL_COUNT]; size_t query_pool_count; @@ -3293,8 +3310,10 @@ static inline struct d3d12_device *impl_from_ID3D12Device(d3d12_device_iface *if bool d3d12_device_validate_shader_meta(struct d3d12_device *device, const struct vkd3d_shader_meta *meta); -HRESULT d3d12_device_get_scratch_buffer(struct d3d12_device *device, VkDeviceSize min_size, struct vkd3d_scratch_buffer *scratch); -void d3d12_device_return_scratch_buffer(struct d3d12_device *device, const struct vkd3d_scratch_buffer *scratch); +HRESULT d3d12_device_get_scratch_buffer(struct d3d12_device *device, enum vkd3d_scratch_pool_kind kind, + VkDeviceSize min_size, uint32_t memory_types, struct vkd3d_scratch_buffer *scratch); +void d3d12_device_return_scratch_buffer(struct d3d12_device *device, enum vkd3d_scratch_pool_kind kind, + const struct vkd3d_scratch_buffer *scratch); HRESULT d3d12_device_get_query_pool(struct d3d12_device *device, uint32_t type_index, struct vkd3d_query_pool *pool); void d3d12_device_return_query_pool(struct d3d12_device *device, const struct vkd3d_query_pool *pool);