diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 762eb1ef..e4233e30 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -1562,7 +1562,7 @@ static ULONG STDMETHODCALLTYPE d3d12_command_allocator_Release(ID3D12CommandAllo { struct d3d12_command_allocator *allocator = impl_from_ID3D12CommandAllocator(iface); ULONG refcount = InterlockedDecrement(&allocator->refcount); - unsigned int i; + unsigned int i, j; TRACE("%p decreasing refcount to %u.\n", allocator, refcount); @@ -1591,13 +1591,16 @@ static ULONG STDMETHODCALLTYPE d3d12_command_allocator_Release(ID3D12CommandAllo vkd3d_free(allocator->command_buffers); VK_CALL(vkDestroyCommandPool(device->vk_device, allocator->vk_command_pool, NULL)); - for (i = 0; i < allocator->scratch_buffer_count; i++) - d3d12_device_return_scratch_buffer(device, &allocator->scratch_buffers[i]); + for (i = 0; i < VKD3D_SCRATCH_POOL_KIND_COUNT; i++) + { + for (j = 0; j < allocator->scratch_pools[i].scratch_buffer_count; j++) + d3d12_device_return_scratch_buffer(device, i, &allocator->scratch_pools[i].scratch_buffers[j]); + vkd3d_free(allocator->scratch_pools[i].scratch_buffers); + } for (i = 0; i < allocator->query_pool_count; i++) d3d12_device_return_query_pool(device, &allocator->query_pools[i]); - vkd3d_free(allocator->scratch_buffers); vkd3d_free(allocator->query_pools); vkd3d_free(allocator); @@ -1656,7 +1659,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_allocator_Reset(ID3D12CommandAllo struct d3d12_device *device; LONG pending; VkResult vr; - size_t i; + size_t i, j; TRACE("iface %p.\n", iface); @@ -1707,10 +1710,12 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_allocator_Reset(ID3D12CommandAllo } /* Return scratch buffers to the device */ - for (i = 0; i < allocator->scratch_buffer_count; i++) - d3d12_device_return_scratch_buffer(device, &allocator->scratch_buffers[i]); - - allocator->scratch_buffer_count = 0; + for (i = 0; i < VKD3D_SCRATCH_POOL_KIND_COUNT; i++) + { + for (j = 0; j < allocator->scratch_pools[i].scratch_buffer_count; j++) + d3d12_device_return_scratch_buffer(device, i, &allocator->scratch_pools[i].scratch_buffers[j]); + allocator->scratch_pools[i].scratch_buffer_count = 0; + } /* Return query pools to the device */ for (i = 0; i < allocator->query_pool_count; i++) @@ -1846,9 +1851,7 @@ static HRESULT d3d12_command_allocator_init(struct d3d12_command_allocator *allo allocator->command_buffers_size = 0; allocator->command_buffer_count = 0; - allocator->scratch_buffers = NULL; - allocator->scratch_buffers_size = 0; - allocator->scratch_buffer_count = 0; + memset(allocator->scratch_pools, 0, sizeof(allocator->scratch_pools)); allocator->query_pools = NULL; allocator->query_pools_size = 0; @@ -1898,8 +1901,11 @@ struct vkd3d_scratch_allocation }; static bool d3d12_command_allocator_allocate_scratch_memory(struct d3d12_command_allocator *allocator, - VkDeviceSize size, VkDeviceSize alignment, struct vkd3d_scratch_allocation *allocation) + enum vkd3d_scratch_pool_kind kind, + VkDeviceSize size, VkDeviceSize alignment, uint32_t memory_types, + struct vkd3d_scratch_allocation *allocation) { + struct d3d12_command_allocator_scratch_pool *pool = &allocator->scratch_pools[kind]; VkDeviceSize aligned_offset, aligned_size; struct vkd3d_scratch_buffer *scratch; unsigned int i; @@ -1907,9 +1913,14 @@ static bool d3d12_command_allocator_allocate_scratch_memory(struct d3d12_command aligned_size = align(size, alignment); /* Probe last block first since the others are likely full */ - for (i = allocator->scratch_buffer_count; i; i--) + for (i = pool->scratch_buffer_count; i; i--) { - scratch = &allocator->scratch_buffers[i - 1]; + scratch = &pool->scratch_buffers[i - 1]; + + /* Extremely unlikely to fail since we have separate lists per pool kind, but to be 100% correct ... */ + if (!(memory_types & (1u << scratch->allocation.device_allocation.vk_memory_type))) + continue; + aligned_offset = align(scratch->offset, alignment); if (aligned_offset + aligned_size <= scratch->allocation.resource.size) @@ -1923,21 +1934,21 @@ static bool d3d12_command_allocator_allocate_scratch_memory(struct d3d12_command } } - if (!vkd3d_array_reserve((void**)&allocator->scratch_buffers, &allocator->scratch_buffers_size, - allocator->scratch_buffer_count + 1, sizeof(*allocator->scratch_buffers))) + if (!vkd3d_array_reserve((void**)&pool->scratch_buffers, &pool->scratch_buffers_size, + pool->scratch_buffer_count + 1, sizeof(*pool->scratch_buffers))) { ERR("Failed to allocate scratch buffer.\n"); return false; } - scratch = &allocator->scratch_buffers[allocator->scratch_buffer_count]; - if (FAILED(d3d12_device_get_scratch_buffer(allocator->device, aligned_size, scratch))) + scratch = &pool->scratch_buffers[pool->scratch_buffer_count]; + if (FAILED(d3d12_device_get_scratch_buffer(allocator->device, kind, aligned_size, memory_types, scratch))) { ERR("Failed to create scratch buffer.\n"); return false; } - allocator->scratch_buffer_count += 1; + pool->scratch_buffer_count += 1; scratch->offset = aligned_size; allocation->buffer = scratch->allocation.resource.vk_buffer; @@ -3322,7 +3333,8 @@ static bool d3d12_command_list_gather_pending_queries(struct d3d12_command_list /* Allocate scratch buffer and resolve virtual Vulkan queries into it */ if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator, - resolve_buffer_size, max(ssbo_alignment, sizeof(uint64_t)), &resolve_buffer)) + VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE, + resolve_buffer_size, max(ssbo_alignment, sizeof(uint64_t)), ~0u, &resolve_buffer)) goto cleanup; for (i = 0; i < resolve_count; i++) @@ -3339,7 +3351,8 @@ static bool d3d12_command_list_gather_pending_queries(struct d3d12_command_list entry_buffer_size = sizeof(struct query_entry) * list->pending_queries_count; if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator, - entry_buffer_size, ssbo_alignment, &entry_buffer)) + VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE, + entry_buffer_size, ssbo_alignment, ~0u, &entry_buffer)) goto cleanup; for (i = 0; i < dispatch_count; i++) @@ -5516,7 +5529,8 @@ static bool d3d12_command_list_emit_predicated_command(struct d3d12_command_list vkd3d_meta_get_predicate_pipeline(&list->device->meta_ops, command_type, &pipeline_info); if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator, - pipeline_info.data_size, sizeof(uint32_t), scratch)) + VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE, + pipeline_info.data_size, sizeof(uint32_t), ~0u, scratch)) return false; d3d12_command_list_end_current_render_pass(list, true); @@ -9027,7 +9041,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(d3d12_command_li if (resource) { if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator, - sizeof(uint32_t), sizeof(uint32_t), &scratch)) + VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE, + sizeof(uint32_t), sizeof(uint32_t), ~0u, &scratch)) return; begin_info.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT; @@ -9284,9 +9299,10 @@ static void d3d12_command_list_execute_indirect_state_template( if (count_buffer) { if (FAILED(hr = d3d12_command_allocator_allocate_scratch_memory(list->allocator, + VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE, sizeof(uint32_t), list->device->device_info.device_generated_commands_properties_nv.minSequencesCountBufferOffsetAlignment, - &count_allocation))) + ~0u, &count_allocation))) { WARN("Failed to allocate count memory.\n"); return; @@ -12284,9 +12300,10 @@ static HRESULT d3d12_command_signature_allocate_stream_memory_for_list( struct vkd3d_scratch_allocation *allocation) { if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator, + VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE, max_command_count * signature->state_template.stride, list->device->device_info.device_generated_commands_properties_nv.minIndirectCommandsBufferOffsetAlignment, - allocation)) + ~0u, allocation)) return E_OUTOFMEMORY; return S_OK; @@ -12322,12 +12339,13 @@ static HRESULT d3d12_command_signature_allocate_preprocess_memory_for_list( VK_CALL(vkGetGeneratedCommandsMemoryRequirementsNV(list->device->vk_device, &info, &memory_info)); - if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator, memory_info.memoryRequirements.size, + if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator, + VKD3D_SCRATCH_POOL_KIND_INDIRECT_PREPROCESS, + memory_info.memoryRequirements.size, list->device->device_info.device_generated_commands_properties_nv.minIndirectCommandsBufferOffsetAlignment, - allocation)) + memory_info.memoryRequirements.memoryTypeBits, allocation)) return E_OUTOFMEMORY; - /* Going to assume the memory type is okay ... It's device local after all. */ *size = memory_info.memoryRequirements.size; return S_OK; } diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index 6120668f..9553b873 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -2412,22 +2412,50 @@ static void d3d12_remove_device_singleton(LUID luid) } } -static HRESULT d3d12_device_create_scratch_buffer(struct d3d12_device *device, VkDeviceSize size, struct vkd3d_scratch_buffer *scratch) +static HRESULT d3d12_device_create_scratch_buffer(struct d3d12_device *device, enum vkd3d_scratch_pool_kind kind, + VkDeviceSize size, uint32_t memory_types, struct vkd3d_scratch_buffer *scratch) { - struct vkd3d_allocate_heap_memory_info alloc_info; HRESULT hr; TRACE("device %p, size %llu, scratch %p.\n", device, size, scratch); - memset(&alloc_info, 0, sizeof(alloc_info)); - alloc_info.heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT; - alloc_info.heap_desc.SizeInBytes = size; - alloc_info.heap_desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; - alloc_info.heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; + if (kind == VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE) + { + struct vkd3d_allocate_heap_memory_info alloc_info; - if (FAILED(hr = vkd3d_allocate_heap_memory(device, &device->memory_allocator, - &alloc_info, &scratch->allocation))) - return hr; + /* We only care about memory types for INDIRECT_PREPROCESS. */ + assert(memory_types == ~0u); + + memset(&alloc_info, 0, sizeof(alloc_info)); + alloc_info.heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT; + alloc_info.heap_desc.SizeInBytes = size; + alloc_info.heap_desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + alloc_info.heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS | D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; + + if (FAILED(hr = vkd3d_allocate_heap_memory(device, &device->memory_allocator, + &alloc_info, &scratch->allocation))) + return hr; + } + else if (kind == VKD3D_SCRATCH_POOL_KIND_INDIRECT_PREPROCESS) + { + struct vkd3d_allocate_memory_info alloc_info; + memset(&alloc_info, 0, sizeof(alloc_info)); + + alloc_info.heap_properties.Type = D3D12_HEAP_TYPE_DEFAULT; + alloc_info.memory_requirements.size = size; + alloc_info.memory_requirements.memoryTypeBits = memory_types; + alloc_info.heap_flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS | D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; + alloc_info.optional_memory_properties = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + alloc_info.flags = VKD3D_ALLOCATION_FLAG_GLOBAL_BUFFER; + + if (FAILED(hr = vkd3d_allocate_memory(device, &device->memory_allocator, + &alloc_info, &scratch->allocation))) + return hr; + } + else + { + return E_INVALIDARG; + } scratch->offset = 0; return S_OK; @@ -2440,35 +2468,47 @@ static void d3d12_device_destroy_scratch_buffer(struct d3d12_device *device, con vkd3d_free_memory(device, &device->memory_allocator, &scratch->allocation); } -HRESULT d3d12_device_get_scratch_buffer(struct d3d12_device *device, VkDeviceSize min_size, struct vkd3d_scratch_buffer *scratch) +HRESULT d3d12_device_get_scratch_buffer(struct d3d12_device *device, enum vkd3d_scratch_pool_kind kind, + VkDeviceSize min_size, uint32_t memory_types, struct vkd3d_scratch_buffer *scratch) { + struct d3d12_device_scratch_pool *pool = &device->scratch_pools[kind]; + struct vkd3d_scratch_buffer *candidate; + size_t i; + if (min_size > VKD3D_SCRATCH_BUFFER_SIZE) - return d3d12_device_create_scratch_buffer(device, min_size, scratch); + return d3d12_device_create_scratch_buffer(device, kind, min_size, memory_types, scratch); pthread_mutex_lock(&device->mutex); - if (device->scratch_buffer_count) + for (i = pool->scratch_buffer_count; i; i--) { - *scratch = device->scratch_buffers[--device->scratch_buffer_count]; - scratch->offset = 0; - pthread_mutex_unlock(&device->mutex); - return S_OK; - } - else - { - pthread_mutex_unlock(&device->mutex); - return d3d12_device_create_scratch_buffer(device, VKD3D_SCRATCH_BUFFER_SIZE, scratch); + candidate = &pool->scratch_buffers[i - 1]; + + /* Extremely unlikely to fail since we have separate lists per pool kind, but to be 100% correct ... */ + if (memory_types & (1u << candidate->allocation.device_allocation.vk_memory_type)) + { + *scratch = *candidate; + scratch->offset = 0; + pool->scratch_buffers[i - 1] = pool->scratch_buffers[--pool->scratch_buffer_count]; + pthread_mutex_unlock(&device->mutex); + return S_OK; + } } + + pthread_mutex_unlock(&device->mutex); + return d3d12_device_create_scratch_buffer(device, kind, VKD3D_SCRATCH_BUFFER_SIZE, memory_types, scratch); } -void d3d12_device_return_scratch_buffer(struct d3d12_device *device, const struct vkd3d_scratch_buffer *scratch) +void d3d12_device_return_scratch_buffer(struct d3d12_device *device, enum vkd3d_scratch_pool_kind kind, + const struct vkd3d_scratch_buffer *scratch) { + struct d3d12_device_scratch_pool *pool = &device->scratch_pools[kind]; pthread_mutex_lock(&device->mutex); if (scratch->allocation.resource.size == VKD3D_SCRATCH_BUFFER_SIZE && - device->scratch_buffer_count < VKD3D_SCRATCH_BUFFER_COUNT) + pool->scratch_buffer_count < VKD3D_SCRATCH_BUFFER_COUNT) { - device->scratch_buffers[device->scratch_buffer_count++] = *scratch; + pool->scratch_buffers[pool->scratch_buffer_count++] = *scratch; pthread_mutex_unlock(&device->mutex); } else @@ -2719,10 +2759,11 @@ static void d3d12_device_global_pipeline_cache_cleanup(struct d3d12_device *devi static void d3d12_device_destroy(struct d3d12_device *device) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - size_t i; + size_t i, j; - for (i = 0; i < device->scratch_buffer_count; i++) - d3d12_device_destroy_scratch_buffer(device, &device->scratch_buffers[i]); + for (i = 0; i < VKD3D_SCRATCH_POOL_KIND_COUNT; i++) + for (j = 0; j < device->scratch_pools[i].scratch_buffer_count; j++) + d3d12_device_destroy_scratch_buffer(device, &device->scratch_pools[i].scratch_buffers[j]); for (i = 0; i < device->query_pool_count; i++) d3d12_device_destroy_query_pool(device, &device->query_pools[i]); diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 5411e4a8..71767560 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -1704,6 +1704,20 @@ struct vkd3d_query_pool uint32_t next_index; }; +struct d3d12_command_allocator_scratch_pool +{ + struct vkd3d_scratch_buffer *scratch_buffers; + size_t scratch_buffers_size; + size_t scratch_buffer_count; +}; + +enum vkd3d_scratch_pool_kind +{ + VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE = 0, + VKD3D_SCRATCH_POOL_KIND_INDIRECT_PREPROCESS, + VKD3D_SCRATCH_POOL_KIND_COUNT +}; + /* ID3D12CommandAllocator */ struct d3d12_command_allocator { @@ -1737,9 +1751,7 @@ struct d3d12_command_allocator size_t command_buffers_size; size_t command_buffer_count; - struct vkd3d_scratch_buffer *scratch_buffers; - size_t scratch_buffers_size; - size_t scratch_buffer_count; + struct d3d12_command_allocator_scratch_pool scratch_pools[VKD3D_SCRATCH_POOL_KIND_COUNT]; struct vkd3d_query_pool *query_pools; size_t query_pools_size; @@ -2891,6 +2903,12 @@ struct vkd3d_descriptor_qa_heap_buffer_data; /* ID3D12DeviceExt */ typedef ID3D12DeviceExt d3d12_device_vkd3d_ext_iface; +struct d3d12_device_scratch_pool +{ + struct vkd3d_scratch_buffer scratch_buffers[VKD3D_SCRATCH_BUFFER_COUNT]; + size_t scratch_buffer_count; +}; + struct d3d12_device { d3d12_device_iface ID3D12Device_iface; @@ -2926,8 +2944,7 @@ struct d3d12_device struct vkd3d_memory_allocator memory_allocator; - struct vkd3d_scratch_buffer scratch_buffers[VKD3D_SCRATCH_BUFFER_COUNT]; - size_t scratch_buffer_count; + struct d3d12_device_scratch_pool scratch_pools[VKD3D_SCRATCH_POOL_KIND_COUNT]; struct vkd3d_query_pool query_pools[VKD3D_VIRTUAL_QUERY_POOL_COUNT]; size_t query_pool_count; @@ -2989,8 +3006,10 @@ static inline struct d3d12_device *impl_from_ID3D12Device(d3d12_device_iface *if bool d3d12_device_validate_shader_meta(struct d3d12_device *device, const struct vkd3d_shader_meta *meta); -HRESULT d3d12_device_get_scratch_buffer(struct d3d12_device *device, VkDeviceSize min_size, struct vkd3d_scratch_buffer *scratch); -void d3d12_device_return_scratch_buffer(struct d3d12_device *device, const struct vkd3d_scratch_buffer *scratch); +HRESULT d3d12_device_get_scratch_buffer(struct d3d12_device *device, enum vkd3d_scratch_pool_kind kind, + VkDeviceSize min_size, uint32_t memory_types, struct vkd3d_scratch_buffer *scratch); +void d3d12_device_return_scratch_buffer(struct d3d12_device *device, enum vkd3d_scratch_pool_kind kind, + const struct vkd3d_scratch_buffer *scratch); HRESULT d3d12_device_get_query_pool(struct d3d12_device *device, uint32_t type_index, struct vkd3d_query_pool *pool); void d3d12_device_return_query_pool(struct d3d12_device *device, const struct vkd3d_query_pool *pool);