From 0c94e07ab2c4c1a86bbaabfaee3f6a0c672381b7 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 22 Feb 2021 10:52:06 +0100 Subject: [PATCH] vkd3d: Elide timeline semaphore waits which can be satisfied implicitly. If we're signalling and waiting on same physical queue (always true for current SINGLE_QUEUE define), we can rely on submission boundary synchronization which doesn't require any extra submissions to resolve. Avoids awkward GPU driver bubbles with back to back signal -> wait pairs with timeline. Observed 2% GPU uplift on RE2 on AMD. Signed-off-by: Hans-Kristian Arntzen --- libs/vkd3d/command.c | 37 +++++++++++++++++++++++++++++-------- libs/vkd3d/vkd3d_private.h | 1 + 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 4bcd0f9c..586a93ec 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -645,11 +645,30 @@ static void d3d12_fence_unlock(struct d3d12_fence *fence) pthread_mutex_unlock(&fence->mutex); } -static bool d3d12_fence_can_elide_wait_semaphore_locked(struct d3d12_fence *fence, uint64_t value) +static bool d3d12_fence_can_elide_wait_semaphore_locked(struct d3d12_fence *fence, uint64_t value, + const struct vkd3d_queue *waiting_queue) { + unsigned int i; + /* Relevant if the semaphore has been signalled already on host. * We should not wait on the timeline semaphore directly, we can simply submit in-place. */ - return fence->virtual_value >= value; + if (fence->virtual_value >= value) + return true; + + /* We can elide a wait if we can use the submission order guarantee. + * If there is a pending signal on this queue which will satisfy the wait, + * submission barrier will implicitly complete the wait, + * and we don't have to eat the overhead of submitting an extra wait on top. + * This will essentially always trigger on single-queue. + */ + for (i = 0; i < fence->pending_updates_count; i++) + { + if (fence->pending_updates[i].signalling_queue == waiting_queue && + fence->pending_updates[i].virtual_value >= value) + return true; + } + + return false; } static HRESULT d3d12_fence_signal_cpu_timeline_semaphore(struct d3d12_fence *fence, uint64_t value) @@ -674,7 +693,8 @@ static HRESULT d3d12_fence_signal_cpu_timeline_semaphore(struct d3d12_fence *fen return S_OK; } -static uint64_t d3d12_fence_add_pending_signal_locked(struct d3d12_fence *fence, uint64_t virtual_value) +static uint64_t d3d12_fence_add_pending_signal_locked(struct d3d12_fence *fence, uint64_t virtual_value, + const struct vkd3d_queue *signalling_queue) { struct d3d12_fence_value *update; vkd3d_array_reserve((void**)&fence->pending_updates, &fence->pending_updates_size, @@ -683,6 +703,7 @@ static uint64_t d3d12_fence_add_pending_signal_locked(struct d3d12_fence *fence, update = &fence->pending_updates[fence->pending_updates_count++]; update->virtual_value = virtual_value; update->physical_value = ++fence->counter; + update->signalling_queue = signalling_queue; return fence->counter; } @@ -9018,6 +9039,9 @@ static void d3d12_command_queue_wait(struct d3d12_command_queue *command_queue, VkQueue vk_queue; VkResult vr; + vk_procs = &command_queue->device->vk_procs; + queue = command_queue->vkd3d_queue; + d3d12_fence_lock(fence); /* This is the critical part required to support out-of-order signal. @@ -9029,7 +9053,7 @@ static void d3d12_command_queue_wait(struct d3d12_command_queue *command_queue, /* If a host signal unblocked us, or we know that the fence has reached a specific value, there is no need * to queue up a wait. */ - if (d3d12_fence_can_elide_wait_semaphore_locked(fence, value)) + if (d3d12_fence_can_elide_wait_semaphore_locked(fence, value, queue)) { d3d12_fence_unlock(fence); return; @@ -9037,9 +9061,6 @@ static void d3d12_command_queue_wait(struct d3d12_command_queue *command_queue, TRACE("queue %p, fence %p, value %#"PRIx64".\n", command_queue, fence, value); - vk_procs = &command_queue->device->vk_procs; - queue = command_queue->vkd3d_queue; - wait_count = d3d12_fence_get_physical_wait_value_locked(fence, value); /* We can unlock the fence here. The queue semaphore will not be signalled to signal_value @@ -9104,7 +9125,7 @@ static void d3d12_command_queue_signal(struct d3d12_command_queue *command_queue TRACE("queue %p, fence %p, value %#"PRIx64".\n", command_queue, fence, value); - physical_value = d3d12_fence_add_pending_signal_locked(fence, value); + physical_value = d3d12_fence_add_pending_signal_locked(fence, value, vkd3d_queue); signal_value = physical_value; diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 685cf26b..29d4ed78 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -417,6 +417,7 @@ struct d3d12_fence_value { uint64_t virtual_value; uint64_t physical_value; + const struct vkd3d_queue *signalling_queue; }; struct d3d12_fence