vkd3d: Create one fence worker per command queue.

Rather than one per device. This solves issues with D3D12 fences
being signalled too late because the fence worker is waiting on
a different set of semaphores while the fence is being enqueued.

Greatly increases performance in Horizon Zero Dawn and Death
Stranding with multi-queue mode enabled.

Signed-off-by: Philip Rebohle <philip.rebohle@tu-dortmund.de>
This commit is contained in:
Philip Rebohle 2021-03-15 14:17:19 +01:00 committed by Hans-Kristian Arntzen
parent 34bca90a9c
commit 93a80d5eaa
3 changed files with 10 additions and 11 deletions

View File

@ -8848,6 +8848,7 @@ static ULONG STDMETHODCALLTYPE d3d12_command_queue_Release(ID3D12CommandQueue *i
vkd3d_private_store_destroy(&command_queue->private_store);
d3d12_command_queue_submit_stop(command_queue);
vkd3d_fence_worker_stop(&command_queue->fence_worker, device);
d3d12_device_unmap_vkd3d_queue(device, command_queue->vkd3d_queue);
pthread_join(command_queue->submission_thread, NULL);
pthread_mutex_destroy(&command_queue->queue_lock);
@ -9561,7 +9562,7 @@ static void d3d12_command_queue_signal(struct d3d12_command_queue *command_queue
return;
}
if (FAILED(hr = vkd3d_enqueue_timeline_semaphore(&device->fence_worker, fence, physical_value, vkd3d_queue)))
if (FAILED(hr = vkd3d_enqueue_timeline_semaphore(&command_queue->fence_worker, fence, physical_value, vkd3d_queue)))
{
/* In case of an unexpected failure, try to safely destroy Vulkan objects. */
vkd3d_queue_wait_idle(vkd3d_queue, vk_procs);
@ -10398,6 +10399,9 @@ static HRESULT d3d12_command_queue_init(struct d3d12_command_queue *queue,
d3d12_device_add_ref(queue->device = device);
if (FAILED(hr = vkd3d_fence_worker_start(&queue->fence_worker, device)))
goto fail_fence_worker_start;
if ((rc = pthread_create(&queue->submission_thread, NULL, d3d12_command_queue_submission_worker_main, queue)) < 0)
{
d3d12_device_release(queue->device);
@ -10407,7 +10411,9 @@ static HRESULT d3d12_command_queue_init(struct d3d12_command_queue *queue,
return S_OK;
fail_pthread_create:;
fail_pthread_create:
vkd3d_fence_worker_stop(&queue->fence_worker, device);
fail_fence_worker_start:;
#ifdef VKD3D_BUILD_STANDALONE_D3D12
fail_swapchain_factory:
vkd3d_private_store_destroy(&queue->private_store);

View File

@ -2388,7 +2388,6 @@ static void d3d12_device_destroy(struct d3d12_device *device)
vkd3d_bindless_state_cleanup(&device->bindless_state, device);
vkd3d_destroy_null_resources(&device->null_resources, device);
vkd3d_render_pass_cache_cleanup(&device->render_pass_cache, device);
vkd3d_fence_worker_stop(&device->fence_worker, device);
d3d12_device_destroy_vkd3d_queues(device);
vkd3d_memory_allocator_cleanup(&device->memory_allocator, device);
VK_CALL(vkDestroyDevice(device->vk_device, NULL));
@ -4946,11 +4945,8 @@ static HRESULT d3d12_device_init(struct d3d12_device *device,
if (FAILED(hr = vkd3d_memory_allocator_init(&device->memory_allocator, device)))
goto out_free_private_store;
if (FAILED(hr = vkd3d_fence_worker_start(&device->fence_worker, device)))
goto out_free_memory_allocator;
if (FAILED(hr = vkd3d_init_format_info(device)))
goto out_stop_fence_worker;
goto out_free_memory_allocator;
if (FAILED(hr = vkd3d_memory_info_init(&device->memory_info, device)))
goto out_cleanup_format_info;
@ -4993,8 +4989,6 @@ out_destroy_null_resources:
vkd3d_destroy_null_resources(&device->null_resources, device);
out_cleanup_format_info:
vkd3d_cleanup_format_info(device);
out_stop_fence_worker:
vkd3d_fence_worker_stop(&device->fence_worker, device);
out_free_memory_allocator:
vkd3d_memory_allocator_cleanup(&device->memory_allocator, device);
out_free_private_store:

View File

@ -1925,6 +1925,7 @@ struct d3d12_command_queue
uint64_t drain_count;
uint64_t queue_drain_count;
struct vkd3d_fence_worker fence_worker;
struct vkd3d_private_store private_store;
#ifdef VKD3D_BUILD_STANDALONE_D3D12
@ -2511,8 +2512,6 @@ struct d3d12_device
struct vkd3d_vk_device_procs vk_procs;
PFN_vkd3d_signal_event signal_event;
struct vkd3d_fence_worker fence_worker;
pthread_mutex_t mutex;
struct vkd3d_render_pass_cache render_pass_cache;