Compare commits

...

2 Commits

Author SHA1 Message Date
Joshua Ashton e1f792e714 vkd3d: Low latency presentation and acquire semaphores
In cases where acquire image is blocking, we should call that after
presentation to avoid latency when the app calls present.

This avoids weird inverse frame cadences with Mesa WSI right now,
as acquiring an image is always a blocking call until it is complete.

In cases when we aren't blocking, this kicks off the acquisition so
it can be waited upon by the next present blit pass.

Use another set of semaphores to wait for the image acquisition on the
GPU.

In the non-blocking vkAcquireNextImageKHR case, this means that a
potential bubble of time between waiting on the fence and submitting
the blit + presentation is eliminated.

Runaway presentation in this setup is avoided by frame latency objects
and normal frame latency which is always 3 according to documentation.

Be careful about handling SUBOPTIMAL. Semaphores will be signaled, but
we might want to tear down the swapchain. In these cases, we need to
wait for the semaphore to be signaled first, which can only be done by
submitting a wait, since QueueWaitIdle or DeviceWaitIdle don't cover
WSI.

Signed-off-by: Joshua Ashton <joshua@froggi.es>
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
Co-authored-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2021-06-02 14:40:16 +02:00
Joshua Ashton c33cfd048b vkd3d: Handle frame latency without WAITABLE_OBJECT
Documentation says that this should always be 3 without WAITABLE_OBJECT
unlike in D3D11 where it will use the DXGI device's frame latency.

This stops runaway presentations in the non-blocking acquire image case
with the new semaphore setup.

Signed-off-by: Joshua Ashton <joshua@froggi.es>
2021-06-02 14:36:40 +02:00
1 changed files with 179 additions and 92 deletions

View File

@ -176,14 +176,15 @@ struct d3d12_swapchain
VkSwapchainKHR vk_swapchain;
VkSurfaceKHR vk_surface;
VkFence vk_fence;
VkCommandPool vk_cmd_pool;
VkImage vk_images[DXGI_MAX_SWAP_CHAIN_BUFFERS];
VkImage vk_swapchain_images[DXGI_MAX_SWAP_CHAIN_BUFFERS];
VkImageView vk_swapchain_image_views[DXGI_MAX_SWAP_CHAIN_BUFFERS];
VkFramebuffer vk_framebuffers[DXGI_MAX_SWAP_CHAIN_BUFFERS];
VkCommandBuffer vk_cmd_buffers[DXGI_MAX_SWAP_CHAIN_BUFFERS];
VkSemaphore vk_semaphores[DXGI_MAX_SWAP_CHAIN_BUFFERS];
bool vk_acquire_semaphores_signaled[DXGI_MAX_SWAP_CHAIN_BUFFERS];
VkSemaphore vk_acquire_semaphores[DXGI_MAX_SWAP_CHAIN_BUFFERS];
VkSemaphore vk_present_semaphores[DXGI_MAX_SWAP_CHAIN_BUFFERS];
ID3D12Resource *buffers[DXGI_MAX_SWAP_CHAIN_BUFFERS];
unsigned int buffer_count;
unsigned int vk_swapchain_width;
@ -216,6 +217,7 @@ struct d3d12_swapchain
uint64_t frame_number;
uint32_t frame_latency;
uint32_t frame_id;
};
static inline const struct vkd3d_vk_device_procs* d3d12_swapchain_procs(struct d3d12_swapchain* swapchain)
@ -1155,18 +1157,31 @@ static HRESULT d3d12_swapchain_prepare_command_buffers(struct d3d12_swapchain *s
return hresult_from_vk_result(vr);
}
swapchain->frame_id = 0;
memset(swapchain->vk_acquire_semaphores_signaled, 0, sizeof(swapchain->vk_acquire_semaphores_signaled));
for (i = 0; i < swapchain->buffer_count; ++i)
{
semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
semaphore_info.pNext = NULL;
semaphore_info.flags = 0;
assert(swapchain->vk_semaphores[i] == VK_NULL_HANDLE);
assert(swapchain->vk_acquire_semaphores[i] == VK_NULL_HANDLE);
if ((vr = vk_procs->vkCreateSemaphore(vk_device, &semaphore_info,
NULL, &swapchain->vk_semaphores[i])) < 0)
NULL, &swapchain->vk_acquire_semaphores[i])) < 0)
{
WARN("Failed to create semaphore, vr %d.\n", vr);
swapchain->vk_semaphores[i] = VK_NULL_HANDLE;
swapchain->vk_acquire_semaphores[i] = VK_NULL_HANDLE;
return hresult_from_vk_result(vr);
}
assert(swapchain->vk_present_semaphores[i] == VK_NULL_HANDLE);
if ((vr = vk_procs->vkCreateSemaphore(vk_device, &semaphore_info,
NULL, &swapchain->vk_present_semaphores[i])) < 0)
{
WARN("Failed to create semaphore, vr %d.\n", vr);
swapchain->vk_present_semaphores[i] = VK_NULL_HANDLE;
return hresult_from_vk_result(vr);
}
}
@ -1229,48 +1244,50 @@ static HRESULT d3d12_swapchain_create_buffers(struct d3d12_swapchain *swapchain,
return S_OK;
}
static VkResult d3d12_swapchain_wait_and_reset_swapchain_fence(struct d3d12_swapchain *swapchain)
static VkResult d3d12_swapchain_unsignal_acquire_semaphore(struct d3d12_swapchain *swapchain,
VkQueue vk_queue, uint32_t frame_id, bool blocking)
{
const struct vkd3d_vk_device_procs *vk_procs = d3d12_swapchain_procs(swapchain);
VkDevice vk_device = d3d12_swapchain_device(swapchain)->vk_device;
VkFence vk_fence = swapchain->vk_fence;
const VkPipelineStageFlags wait_stages = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
VkDevice vk_device = swapchain->command_queue->device->vk_device;
VkFenceCreateInfo fence_create_info;
VkFence vk_fence = VK_NULL_HANDLE;
VkSubmitInfo submit_info;
VkResult vr;
if ((vr = vk_procs->vkWaitForFences(vk_device, 1, &vk_fence, VK_TRUE, UINT64_MAX)) != VK_SUCCESS)
if (blocking)
{
ERR("Failed to wait for fence, vr %d.\n", vr);
return vr;
}
if ((vr = vk_procs->vkResetFences(vk_device, 1, &vk_fence)) < 0)
ERR("Failed to reset fence, vr %d.\n", vr);
return vr;
}
static VkResult d3d12_swapchain_acquire_next_vulkan_image(struct d3d12_swapchain *swapchain)
{
const struct vkd3d_vk_device_procs *vk_procs = d3d12_swapchain_procs(swapchain);
VkDevice vk_device = d3d12_swapchain_device(swapchain)->vk_device;
VkFence vk_fence = swapchain->vk_fence;
VkResult vr;
swapchain->vk_image_index = INVALID_VK_IMAGE_INDEX;
if ((vr = vk_procs->vkAcquireNextImageKHR(vk_device, swapchain->vk_swapchain, UINT64_MAX,
VK_NULL_HANDLE, vk_fence, &swapchain->vk_image_index)))
{
if (vr == VK_SUBOPTIMAL_KHR)
fence_create_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
fence_create_info.pNext = NULL;
fence_create_info.flags = 0;
if ((vr = VK_CALL(vkCreateFence(vk_device, &fence_create_info, NULL, &vk_fence))))
{
/* Suboptimal is still considered success, so make sure to wait and reset fence here, but we always want
* to recreate swapchains in this case. */
d3d12_swapchain_wait_and_reset_swapchain_fence(swapchain);
return VK_ERROR_OUT_OF_DATE_KHR;
ERR("Failed to create fence, vr %d\n", vr);
return vr;
}
WARN("Failed to acquire next Vulkan image, vr %d.\n", vr);
return vr;
}
vr = d3d12_swapchain_wait_and_reset_swapchain_fence(swapchain);
memset(&submit_info, 0, sizeof(submit_info));
submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submit_info.waitSemaphoreCount = 1;
submit_info.pWaitDstStageMask = &wait_stages;
assert(swapchain->vk_acquire_semaphores_signaled[frame_id]);
submit_info.pWaitSemaphores = &swapchain->vk_acquire_semaphores[frame_id];
if ((vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, vk_fence))))
{
ERR("Failed to submit unsignal operation, vr %d\n", vr);
goto end;
}
swapchain->vk_acquire_semaphores_signaled[frame_id] = false;
if (vk_fence)
if ((vr = VK_CALL(vkWaitForFences(swapchain->command_queue->device->vk_device, 1, &vk_fence, VK_TRUE, UINT64_MAX))))
ERR("Failed to wait for fences, vr %d\n", vr);
end:
VK_CALL(vkDestroyFence(vk_device, vk_fence, NULL));
return vr;
}
@ -1284,6 +1301,13 @@ static void d3d12_swapchain_destroy_buffers(struct d3d12_swapchain *swapchain, B
{
if ((vk_queue = vkd3d_acquire_vk_queue(d3d12_swapchain_queue_iface(swapchain))))
{
/* If we have outstanding vkAcquireNextImages, we need to wait for those semaphores
* before QueueWaitIdle, since vkAcquireNextImageKHR does not constitute a queue operation.
* We cannot safely destroy the semaphores without waiting for them first. */
for (i = 0; i < swapchain->buffer_count; i++)
if (swapchain->vk_acquire_semaphores_signaled[i])
d3d12_swapchain_unsignal_acquire_semaphore(swapchain, vk_queue, i, false);
vk_procs->vkQueueWaitIdle(vk_queue);
vkd3d_release_vk_queue(d3d12_swapchain_queue_iface(swapchain));
@ -1311,8 +1335,12 @@ static void d3d12_swapchain_destroy_buffers(struct d3d12_swapchain *swapchain, B
{
for (i = 0; i < swapchain->buffer_count; ++i)
{
vk_procs->vkDestroySemaphore(swapchain->command_queue->device->vk_device, swapchain->vk_semaphores[i], NULL);
swapchain->vk_semaphores[i] = VK_NULL_HANDLE;
vk_procs->vkDestroySemaphore(swapchain->command_queue->device->vk_device, swapchain->vk_acquire_semaphores[i], NULL);
swapchain->vk_acquire_semaphores[i] = VK_NULL_HANDLE;
swapchain->vk_acquire_semaphores_signaled[i] = false;
vk_procs->vkDestroySemaphore(swapchain->command_queue->device->vk_device, swapchain->vk_present_semaphores[i], NULL);
swapchain->vk_present_semaphores[i] = VK_NULL_HANDLE;
}
vk_procs->vkDestroyCommandPool(swapchain->command_queue->device->vk_device, swapchain->vk_cmd_pool, NULL);
swapchain->vk_cmd_pool = VK_NULL_HANDLE;
@ -1556,10 +1584,7 @@ static void d3d12_swapchain_destroy(struct d3d12_swapchain *swapchain)
vkd3d_private_store_destroy(&swapchain->private_store);
if (swapchain->command_queue->device->vk_device)
{
vk_procs->vkDestroyFence(swapchain->command_queue->device->vk_device, swapchain->vk_fence, NULL);
vk_procs->vkDestroySwapchainKHR(swapchain->command_queue->device->vk_device, swapchain->vk_swapchain, NULL);
}
vk_procs->vkDestroySurfaceKHR(d3d12_swapchain_device(swapchain)->vkd3d_instance->vk_instance, swapchain->vk_surface, NULL);
@ -1686,7 +1711,10 @@ static HRESULT d3d12_swapchain_set_sync_interval(struct d3d12_swapchain *swapcha
static VkResult d3d12_swapchain_queue_present(struct d3d12_swapchain *swapchain, VkQueue vk_queue)
{
/* Blit meta pass uses COLOR_ATTACHMENT_OUTPUT_BIT external subpass dependency. */
const VkPipelineStageFlags acquire_wait_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
const struct vkd3d_vk_device_procs *vk_procs = d3d12_swapchain_procs(swapchain);
VkDevice vk_device = d3d12_swapchain_device(swapchain)->vk_device;
VkCommandBuffer vk_cmd_buffer;
VkPresentInfoKHR present_info;
VkSubmitInfo submit_info;
@ -1698,8 +1726,31 @@ static VkResult d3d12_swapchain_queue_present(struct d3d12_swapchain *swapchain,
if (swapchain->vk_image_index == INVALID_VK_IMAGE_INDEX)
{
if ((vr = d3d12_swapchain_acquire_next_vulkan_image(swapchain)) < 0)
/* If we hit SUBOPTIMAL path last AcquireNextImageKHR, we will have a pending acquire we did not
* wait for yet. In this scenario, just drain the semaphore, wait for that to complete,
* then we can reuse the semaphore. */
if (swapchain->vk_acquire_semaphores_signaled[swapchain->frame_id])
if ((vr = d3d12_swapchain_unsignal_acquire_semaphore(swapchain, vk_queue, swapchain->frame_id, true)))
return vr;
vr = vk_procs->vkAcquireNextImageKHR(vk_device, swapchain->vk_swapchain, UINT64_MAX,
swapchain->vk_acquire_semaphores[swapchain->frame_id],
VK_NULL_HANDLE, &swapchain->vk_image_index);
if (vr >= 0)
swapchain->vk_acquire_semaphores_signaled[swapchain->frame_id] = true;
if (vr == VK_SUBOPTIMAL_KHR)
{
/* Suboptimal is still considered success, but we always want
* to recreate swapchains in this case. */
return VK_ERROR_OUT_OF_DATE_KHR;
}
else if (vr != VK_SUCCESS)
{
WARN("Failed to acquire next Vulkan image, vr %d.\n", vr);
return vr;
}
}
assert(swapchain->vk_image_index < swapchain->buffer_count);
@ -1727,13 +1778,14 @@ static VkResult d3d12_swapchain_queue_present(struct d3d12_swapchain *swapchain,
submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submit_info.pNext = NULL;
submit_info.waitSemaphoreCount = 0;
submit_info.pWaitSemaphores = NULL;
submit_info.pWaitDstStageMask = NULL;
submit_info.waitSemaphoreCount = 1;
assert(swapchain->vk_acquire_semaphores_signaled[swapchain->frame_id]);
submit_info.pWaitSemaphores = &swapchain->vk_acquire_semaphores[swapchain->frame_id];
submit_info.pWaitDstStageMask = &acquire_wait_mask;
submit_info.commandBufferCount = 1;
submit_info.pCommandBuffers = &vk_cmd_buffer;
submit_info.signalSemaphoreCount = 1;
submit_info.pSignalSemaphores = &swapchain->vk_semaphores[swapchain->vk_image_index];
submit_info.pSignalSemaphores = &swapchain->vk_present_semaphores[swapchain->vk_image_index];
if ((vr = vk_procs->vkQueueSubmit(vk_queue, 1, &submit_info, VK_NULL_HANDLE)) < 0)
{
@ -1741,13 +1793,38 @@ static VkResult d3d12_swapchain_queue_present(struct d3d12_swapchain *swapchain,
return vr;
}
swapchain->vk_acquire_semaphores_signaled[swapchain->frame_id] = false;
present_info.waitSemaphoreCount = 1;
present_info.pWaitSemaphores = &swapchain->vk_semaphores[swapchain->vk_image_index];
present_info.pWaitSemaphores = &swapchain->vk_present_semaphores[swapchain->vk_image_index];
if ((vr = vk_procs->vkQueuePresentKHR(vk_queue, &present_info)) >= 0)
{
swapchain->frame_id = (swapchain->frame_id + 1) % swapchain->buffer_count;
swapchain->vk_image_index = INVALID_VK_IMAGE_INDEX;
/* Could get SUBOPTIMAL here. Deal with it later. */
/* Could get SUBOPTIMAL here. Defer acquiring if we hit that path.
* On next present, we can recreate the swapchain. */
if (vr == VK_SUCCESS)
{
/* Try to acquire our next image to avoid blocking next frame. */
assert(!swapchain->vk_acquire_semaphores_signaled[swapchain->frame_id]);
vr = vk_procs->vkAcquireNextImageKHR(vk_device, swapchain->vk_swapchain, UINT64_MAX,
swapchain->vk_acquire_semaphores[swapchain->frame_id], VK_NULL_HANDLE,
&swapchain->vk_image_index);
if (vr >= 0)
{
swapchain->vk_acquire_semaphores_signaled[swapchain->frame_id] = true;
}
else
{
/* Didn't manage to get an image at all.
* The last present we did was a success so don't remake the
* swapchain now. Retry again at the next presentation. */
swapchain->vk_image_index = INVALID_VK_IMAGE_INDEX;
}
}
vr = VK_SUCCESS;
}
@ -1757,7 +1834,6 @@ static VkResult d3d12_swapchain_queue_present(struct d3d12_swapchain *swapchain,
static HRESULT d3d12_swapchain_present(struct d3d12_swapchain *swapchain,
unsigned int sync_interval, unsigned int flags)
{
HANDLE frame_latency_event;
VkQueue vk_queue;
VkResult vr;
HRESULT hr;
@ -1819,24 +1895,42 @@ static HRESULT d3d12_swapchain_present(struct d3d12_swapchain *swapchain,
return hresult_from_vk_result(vr);
}
if ((frame_latency_event = swapchain->frame_latency_event))
++swapchain->frame_number;
if (FAILED(hr = ID3D12CommandQueue_Signal(d3d12_swapchain_queue_iface(swapchain),
swapchain->frame_latency_fence, swapchain->frame_number)))
{
++swapchain->frame_number;
if (FAILED(hr = ID3D12CommandQueue_Signal(d3d12_swapchain_queue_iface(swapchain),
swapchain->frame_latency_fence, swapchain->frame_number)))
{
ERR("Failed to signal frame latency fence, hr %#x.\n", hr);
return hr;
}
ERR("Failed to signal frame latency fence, hr %#x.\n", hr);
return hr;
}
if (swapchain->desc.Flags & DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT)
{
if (FAILED(hr = ID3D12Fence_SetEventOnCompletion(swapchain->frame_latency_fence,
swapchain->frame_number - swapchain->frame_latency, frame_latency_event)))
swapchain->frame_number - swapchain->frame_latency, swapchain->frame_latency_event)))
{
ERR("Failed to enqueue frame latency event, hr %#x.\n", hr);
return hr;
}
}
else
{
const uint32_t sync_latency = min(swapchain->frame_latency, swapchain->desc.BufferCount + 1);
const uint64_t frame_target = swapchain->frame_number - sync_latency;
if (ID3D12Fence_GetCompletedValue(swapchain->frame_latency_fence) < frame_target)
{
/* Wait on the latency. */
if (FAILED(hr = ID3D12Fence_SetEventOnCompletion(swapchain->frame_latency_fence,
frame_target, swapchain->frame_latency_event)))
{
ERR("Failed to enqueue frame latency event (internal), hr %#x.\n", hr);
return hr;
}
WaitForSingleObject(swapchain->frame_latency_event, INFINITE);
}
}
swapchain->current_buffer_index = (swapchain->current_buffer_index + 1) % swapchain->desc.BufferCount;
return hresult_from_vk_result(vr);
@ -2295,6 +2389,8 @@ static HRESULT STDMETHODCALLTYPE d3d12_swapchain_SetMaximumFrameLatency(dxgi_swa
TRACE("iface %p, max_latency %u.\n", iface, max_latency);
EnterCriticalSection(&swapchain->mutex);
/* Max frame latency without WAITABLE_OBJECT is always 3,
* even if set on the device, according to docs. */
if (!(swapchain->desc.Flags & DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT))
{
WARN("DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT not set for swap chain %p.\n", iface);
@ -2339,6 +2435,9 @@ static HANDLE STDMETHODCALLTYPE d3d12_swapchain_GetFrameLatencyWaitableObject(dx
TRACE("iface %p.\n", iface);
if (!(swapchain->desc.Flags & DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT))
return NULL;
return swapchain->frame_latency_event;
}
@ -2529,15 +2628,12 @@ static HRESULT d3d12_swapchain_init(struct d3d12_swapchain *swapchain, IDXGIFact
const struct vkd3d_vk_device_procs *vk_procs = &queue->device->vk_procs;
VkWin32SurfaceCreateInfoKHR surface_desc;
VkPhysicalDevice vk_physical_device;
VkFenceCreateInfo fence_desc;
uint32_t queue_family_index;
VkSurfaceKHR vk_surface;
VkInstance vk_instance;
IDXGIAdapter *adapter;
IDXGIOutput *target;
VkBool32 supported;
VkDevice vk_device;
VkFence vk_fence;
VkResult vr;
HRESULT hr;
@ -2622,7 +2718,6 @@ static HRESULT d3d12_swapchain_init(struct d3d12_swapchain *swapchain, IDXGIFact
vk_instance = queue->device->vkd3d_instance->vk_instance;
vk_physical_device = queue->device->vk_physical_device;
vk_device = queue->device->vk_device;
vkd3d_private_store_init(&swapchain->private_store);
@ -2658,39 +2753,31 @@ static HRESULT d3d12_swapchain_init(struct d3d12_swapchain *swapchain, IDXGIFact
return hr;
}
fence_desc.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
fence_desc.pNext = NULL;
fence_desc.flags = 0;
if ((vr = vk_procs->vkCreateFence(vk_device, &fence_desc, NULL, &vk_fence)) < 0)
{
WARN("Failed to create Vulkan fence, vr %d.\n", vr);
d3d12_swapchain_destroy(swapchain);
return hresult_from_vk_result(vr);
}
swapchain->vk_fence = vk_fence;
swapchain->current_buffer_index = 0;
/* Frame latency without WAITABLE_OBJECT is always 3,
* even if set on the device, according to docs. */
#define DEFAULT_FRAME_LATENCY 3
swapchain->frame_number = DXGI_MAX_SWAP_CHAIN_BUFFERS;
swapchain->frame_latency = DEFAULT_FRAME_LATENCY;
if (swapchain_desc->Flags & DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT)
{
swapchain->frame_number = DXGI_MAX_SWAP_CHAIN_BUFFERS;
swapchain->frame_latency = 1;
if (FAILED(hr = ID3D12Device6_CreateFence(d3d12_swapchain_device_iface(swapchain), DXGI_MAX_SWAP_CHAIN_BUFFERS,
0, &IID_ID3D12Fence, (void **)&swapchain->frame_latency_fence)))
{
WARN("Failed to create frame latency fence, hr %#x.\n", hr);
d3d12_swapchain_destroy(swapchain);
return hr;
}
if (FAILED(hr = ID3D12Device6_CreateFence(d3d12_swapchain_device_iface(swapchain), DXGI_MAX_SWAP_CHAIN_BUFFERS,
0, &IID_ID3D12Fence, (void **)&swapchain->frame_latency_fence)))
{
WARN("Failed to create frame latency fence, hr %#x.\n", hr);
d3d12_swapchain_destroy(swapchain);
return hr;
}
if (!(swapchain->frame_latency_event = CreateEventW(NULL, FALSE, TRUE, NULL)))
{
hr = HRESULT_FROM_WIN32(GetLastError());
WARN("Failed to create frame latency event, hr %#x.\n", hr);
d3d12_swapchain_destroy(swapchain);
return hr;
}
if (!(swapchain->frame_latency_event = CreateEventW(NULL, FALSE, TRUE, NULL)))
{
hr = HRESULT_FROM_WIN32(GetLastError());
WARN("Failed to create frame latency event, hr %#x.\n", hr);
d3d12_swapchain_destroy(swapchain);
return hr;
}
if (FAILED(hr = d3d12_swapchain_set_fullscreen(swapchain, target, TRUE)))