2016-09-21 13:41:31 +01:00
|
|
|
/*
|
|
|
|
* Copyright 2016 Józef Kucia for CodeWeavers
|
2016-09-28 12:04:58 +01:00
|
|
|
* Copyright 2016 Henri Verbeet for CodeWeavers
|
2016-09-21 13:41:31 +01:00
|
|
|
*
|
2017-06-16 20:05:54 +01:00
|
|
|
* This library is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
|
|
* License as published by the Free Software Foundation; either
|
|
|
|
* version 2.1 of the License, or (at your option) any later version.
|
2016-09-21 13:41:31 +01:00
|
|
|
*
|
2017-06-16 20:05:54 +01:00
|
|
|
* This library is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* Lesser General Public License for more details.
|
2016-09-21 13:41:31 +01:00
|
|
|
*
|
2017-06-16 20:05:54 +01:00
|
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
|
|
* License along with this library; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
|
2016-09-21 13:41:31 +01:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include "vkd3d_private.h"
|
|
|
|
|
2020-04-16 09:47:06 +01:00
|
|
|
static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value);
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
static void d3d12_command_queue_add_submission(struct d3d12_command_queue *queue,
|
|
|
|
const struct d3d12_command_queue_submission *sub);
|
2019-04-17 16:26:34 +01:00
|
|
|
|
2018-01-15 12:49:04 +00:00
|
|
|
HRESULT vkd3d_queue_create(struct d3d12_device *device,
|
2018-09-27 12:30:50 +01:00
|
|
|
uint32_t family_index, const VkQueueFamilyProperties *properties, struct vkd3d_queue **queue)
|
2018-01-15 12:49:04 +00:00
|
|
|
{
|
|
|
|
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
|
|
|
struct vkd3d_queue *object;
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
if (!(object = vkd3d_malloc(sizeof(*object))))
|
|
|
|
return E_OUTOFMEMORY;
|
|
|
|
|
|
|
|
if ((rc = pthread_mutex_init(&object->mutex, NULL)))
|
|
|
|
{
|
|
|
|
ERR("Failed to initialize mutex, error %d.\n", rc);
|
|
|
|
vkd3d_free(object);
|
2018-11-09 17:06:23 +00:00
|
|
|
return hresult_from_errno(rc);
|
2018-01-15 12:49:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
object->vk_family_index = family_index;
|
2018-09-27 12:30:51 +01:00
|
|
|
object->vk_queue_flags = properties->queueFlags;
|
2018-09-27 12:30:50 +01:00
|
|
|
object->timestamp_bits = properties->timestampValidBits;
|
2018-01-15 12:49:04 +00:00
|
|
|
|
|
|
|
VK_CALL(vkGetDeviceQueue(device->vk_device, family_index, 0, &object->vk_queue));
|
|
|
|
|
|
|
|
TRACE("Created queue %p for queue family index %u.\n", object, family_index);
|
|
|
|
|
|
|
|
*queue = object;
|
|
|
|
|
|
|
|
return S_OK;
|
|
|
|
}
|
|
|
|
|
2019-05-02 15:02:40 +01:00
|
|
|
void vkd3d_queue_destroy(struct vkd3d_queue *queue, struct d3d12_device *device)
|
2018-01-15 12:49:04 +00:00
|
|
|
{
|
2019-05-02 15:02:40 +01:00
|
|
|
int rc;
|
|
|
|
|
|
|
|
if ((rc = pthread_mutex_lock(&queue->mutex)))
|
|
|
|
ERR("Failed to lock mutex, error %d.\n", rc);
|
|
|
|
|
|
|
|
if (!rc)
|
|
|
|
pthread_mutex_unlock(&queue->mutex);
|
|
|
|
|
2018-01-15 12:49:04 +00:00
|
|
|
pthread_mutex_destroy(&queue->mutex);
|
|
|
|
vkd3d_free(queue);
|
|
|
|
}
|
|
|
|
|
2019-03-28 16:07:26 +00:00
|
|
|
VkQueue vkd3d_queue_acquire(struct vkd3d_queue *queue)
|
2018-01-15 12:49:04 +00:00
|
|
|
{
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
TRACE("queue %p.\n", queue);
|
|
|
|
|
|
|
|
if ((rc = pthread_mutex_lock(&queue->mutex)))
|
|
|
|
{
|
|
|
|
ERR("Failed to lock mutex, error %d.\n", rc);
|
|
|
|
return VK_NULL_HANDLE;
|
|
|
|
}
|
|
|
|
|
2018-08-15 12:57:50 +01:00
|
|
|
assert(queue->vk_queue);
|
2018-01-15 12:49:04 +00:00
|
|
|
return queue->vk_queue;
|
|
|
|
}
|
|
|
|
|
2019-03-28 16:07:26 +00:00
|
|
|
void vkd3d_queue_release(struct vkd3d_queue *queue)
|
2018-01-15 12:49:04 +00:00
|
|
|
{
|
|
|
|
TRACE("queue %p.\n", queue);
|
|
|
|
|
|
|
|
pthread_mutex_unlock(&queue->mutex);
|
|
|
|
}
|
|
|
|
|
2019-04-17 16:26:35 +01:00
|
|
|
static VkResult vkd3d_queue_wait_idle(struct vkd3d_queue *queue,
|
|
|
|
const struct vkd3d_vk_device_procs *vk_procs)
|
|
|
|
{
|
|
|
|
VkQueue vk_queue;
|
|
|
|
VkResult vr;
|
|
|
|
|
|
|
|
if ((vk_queue = vkd3d_queue_acquire(queue)))
|
|
|
|
{
|
|
|
|
vr = VK_CALL(vkQueueWaitIdle(vk_queue));
|
|
|
|
vkd3d_queue_release(queue);
|
|
|
|
|
|
|
|
if (vr < 0)
|
|
|
|
WARN("Failed to wait for queue, vr %d.\n", vr);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
ERR("Failed to acquire queue %p.\n", queue);
|
|
|
|
vr = VK_ERROR_OUT_OF_HOST_MEMORY;
|
|
|
|
}
|
|
|
|
|
|
|
|
return vr;
|
|
|
|
}
|
|
|
|
|
2020-03-30 18:14:34 +01:00
|
|
|
static HRESULT vkd3d_enqueue_timeline_semaphore(struct vkd3d_fence_worker *worker,
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
struct d3d12_fence *fence, uint64_t value, struct vkd3d_queue *queue)
|
2020-03-30 18:14:34 +01:00
|
|
|
{
|
|
|
|
struct vkd3d_waiting_fence *waiting_fence;
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
TRACE("worker %p, fence %p, value %#"PRIx64".\n", worker, fence, value);
|
|
|
|
|
|
|
|
if ((rc = pthread_mutex_lock(&worker->mutex)))
|
|
|
|
{
|
|
|
|
ERR("Failed to lock mutex, error %d.\n", rc);
|
|
|
|
return hresult_from_errno(rc);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!vkd3d_array_reserve((void **)&worker->enqueued_fences, &worker->enqueued_fences_size,
|
|
|
|
worker->enqueued_fence_count + 1, sizeof(*worker->enqueued_fences)))
|
|
|
|
{
|
|
|
|
ERR("Failed to add GPU timeline semaphore.\n");
|
|
|
|
pthread_mutex_unlock(&worker->mutex);
|
|
|
|
return E_OUTOFMEMORY;
|
|
|
|
}
|
|
|
|
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
worker->enqueued_fences[worker->enqueued_fence_count].vk_semaphore = fence->timeline_semaphore;
|
2020-03-30 18:14:34 +01:00
|
|
|
waiting_fence = &worker->enqueued_fences[worker->enqueued_fence_count].waiting_fence;
|
|
|
|
waiting_fence->fence = fence;
|
|
|
|
waiting_fence->value = value;
|
|
|
|
waiting_fence->queue = queue;
|
2019-06-13 12:47:34 +01:00
|
|
|
++worker->enqueued_fence_count;
|
|
|
|
|
|
|
|
InterlockedIncrement(&fence->pending_worker_operation_count);
|
2016-10-08 13:31:57 +01:00
|
|
|
|
|
|
|
pthread_cond_signal(&worker->cond);
|
|
|
|
pthread_mutex_unlock(&worker->mutex);
|
|
|
|
|
|
|
|
return S_OK;
|
|
|
|
}
|
|
|
|
|
2019-04-17 16:26:34 +01:00
|
|
|
static void vkd3d_fence_worker_remove_fence(struct vkd3d_fence_worker *worker, struct d3d12_fence *fence)
|
2017-06-26 16:03:31 +01:00
|
|
|
{
|
2019-06-13 12:47:34 +01:00
|
|
|
LONG count;
|
2017-06-26 16:03:31 +01:00
|
|
|
int rc;
|
|
|
|
|
2020-04-17 14:31:51 +01:00
|
|
|
if (!(count = atomic_load_acquire(&fence->pending_worker_operation_count)))
|
2019-06-13 12:47:34 +01:00
|
|
|
return;
|
|
|
|
|
|
|
|
WARN("Waiting for %u pending fence operations (fence %p).\n", count, fence);
|
|
|
|
|
2017-06-26 16:03:31 +01:00
|
|
|
if ((rc = pthread_mutex_lock(&worker->mutex)))
|
|
|
|
{
|
|
|
|
ERR("Failed to lock mutex, error %d.\n", rc);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2020-04-17 14:31:51 +01:00
|
|
|
while ((count = atomic_load_acquire(&fence->pending_worker_operation_count)))
|
2017-06-26 16:03:31 +01:00
|
|
|
{
|
2019-06-13 12:47:34 +01:00
|
|
|
TRACE("Still waiting for %u pending fence operations (fence %p).\n", count, fence);
|
2017-06-26 16:03:31 +01:00
|
|
|
|
2019-06-13 12:47:34 +01:00
|
|
|
worker->pending_fence_destruction = true;
|
|
|
|
pthread_cond_signal(&worker->cond);
|
|
|
|
|
|
|
|
pthread_cond_wait(&worker->fence_destruction_cond, &worker->mutex);
|
2017-06-26 16:03:31 +01:00
|
|
|
}
|
2019-06-13 12:47:34 +01:00
|
|
|
|
|
|
|
TRACE("Removed fence %p.\n", fence);
|
2017-06-26 16:03:31 +01:00
|
|
|
|
|
|
|
pthread_mutex_unlock(&worker->mutex);
|
|
|
|
}
|
|
|
|
|
2019-06-13 12:47:34 +01:00
|
|
|
static void vkd3d_fence_worker_move_enqueued_fences_locked(struct vkd3d_fence_worker *worker)
|
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
size_t count;
|
|
|
|
bool ret;
|
|
|
|
|
|
|
|
if (!worker->enqueued_fence_count)
|
|
|
|
return;
|
|
|
|
|
|
|
|
count = worker->fence_count + worker->enqueued_fence_count;
|
|
|
|
|
2020-03-30 18:14:34 +01:00
|
|
|
ret = vkd3d_array_reserve((void **) &worker->fences, &worker->fences_size,
|
|
|
|
count, sizeof(*worker->fences));
|
|
|
|
|
2020-04-16 09:47:06 +01:00
|
|
|
ret &= vkd3d_array_reserve((void **) &worker->vk_semaphores, &worker->vk_semaphores_size,
|
|
|
|
count, sizeof(*worker->vk_semaphores));
|
|
|
|
ret &= vkd3d_array_reserve((void **) &worker->semaphore_wait_values, &worker->semaphore_wait_values_size,
|
|
|
|
count, sizeof(*worker->semaphore_wait_values));
|
2020-03-30 18:14:34 +01:00
|
|
|
|
2019-06-13 12:47:34 +01:00
|
|
|
if (!ret)
|
|
|
|
{
|
|
|
|
ERR("Failed to reserve memory.\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < worker->enqueued_fence_count; ++i)
|
|
|
|
{
|
|
|
|
struct vkd3d_enqueued_fence *current = &worker->enqueued_fences[i];
|
|
|
|
|
2020-04-16 09:47:06 +01:00
|
|
|
worker->vk_semaphores[worker->fence_count] = current->vk_semaphore;
|
|
|
|
worker->semaphore_wait_values[worker->fence_count] = current->waiting_fence.value;
|
2020-03-30 18:14:34 +01:00
|
|
|
|
2019-06-17 14:43:29 +01:00
|
|
|
worker->fences[worker->fence_count] = current->waiting_fence;
|
2019-06-13 12:47:34 +01:00
|
|
|
++worker->fence_count;
|
|
|
|
}
|
|
|
|
assert(worker->fence_count == count);
|
|
|
|
worker->enqueued_fence_count = 0;
|
|
|
|
}
|
|
|
|
|
2020-03-30 18:14:34 +01:00
|
|
|
static void vkd3d_wait_for_gpu_timeline_semaphores(struct vkd3d_fence_worker *worker)
|
|
|
|
{
|
|
|
|
struct d3d12_device *device = worker->device;
|
|
|
|
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
|
|
|
VkSemaphoreWaitInfoKHR wait_info;
|
|
|
|
VkSemaphore vk_semaphore;
|
|
|
|
uint64_t counter_value;
|
|
|
|
unsigned int i, j;
|
|
|
|
HRESULT hr;
|
|
|
|
int vr;
|
|
|
|
|
|
|
|
if (!worker->fence_count)
|
|
|
|
return;
|
|
|
|
|
|
|
|
wait_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO_KHR;
|
|
|
|
wait_info.pNext = NULL;
|
|
|
|
wait_info.flags = VK_SEMAPHORE_WAIT_ANY_BIT_KHR;
|
|
|
|
wait_info.pSemaphores = worker->vk_semaphores;
|
|
|
|
wait_info.semaphoreCount = worker->fence_count;
|
|
|
|
wait_info.pValues = worker->semaphore_wait_values;
|
|
|
|
|
|
|
|
vr = VK_CALL(vkWaitSemaphoresKHR(device->vk_device, &wait_info, ~(uint64_t)0));
|
|
|
|
if (vr == VK_TIMEOUT)
|
|
|
|
return;
|
|
|
|
if (vr != VK_SUCCESS)
|
|
|
|
{
|
|
|
|
ERR("Failed to wait for Vulkan timeline semaphores, vr %d.\n", vr);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0, j = 0; i < worker->fence_count; ++i)
|
|
|
|
{
|
|
|
|
struct vkd3d_waiting_fence *current = &worker->fences[i];
|
|
|
|
|
|
|
|
vk_semaphore = worker->vk_semaphores[i];
|
|
|
|
if (!(vr = VK_CALL(vkGetSemaphoreCounterValueKHR(device->vk_device, vk_semaphore, &counter_value))) &&
|
|
|
|
counter_value >= current->value)
|
|
|
|
{
|
|
|
|
TRACE("Signaling fence %p value %#"PRIx64".\n", current->fence, current->value);
|
2020-04-16 09:47:06 +01:00
|
|
|
if (FAILED(hr = d3d12_fence_signal(current->fence, counter_value)))
|
2020-03-30 18:14:34 +01:00
|
|
|
ERR("Failed to signal D3D12 fence, hr %#x.\n", hr);
|
|
|
|
|
|
|
|
InterlockedDecrement(¤t->fence->pending_worker_operation_count);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (vr != VK_NOT_READY && vr != VK_SUCCESS)
|
|
|
|
ERR("Failed to get Vulkan semaphore status, vr %d.\n", vr);
|
|
|
|
|
|
|
|
if (i != j)
|
|
|
|
{
|
|
|
|
worker->vk_semaphores[j] = worker->vk_semaphores[i];
|
|
|
|
worker->semaphore_wait_values[j] = worker->semaphore_wait_values[i];
|
|
|
|
worker->fences[j] = worker->fences[i];
|
|
|
|
}
|
|
|
|
++j;
|
|
|
|
}
|
|
|
|
worker->fence_count = j;
|
|
|
|
}
|
|
|
|
|
2016-10-08 13:31:57 +01:00
|
|
|
static void *vkd3d_fence_worker_main(void *arg)
|
|
|
|
{
|
|
|
|
struct vkd3d_fence_worker *worker = arg;
|
|
|
|
int rc;
|
|
|
|
|
2019-06-13 12:47:34 +01:00
|
|
|
vkd3d_set_thread_name("vkd3d_fence");
|
2018-06-27 14:19:24 +01:00
|
|
|
|
2016-10-08 13:31:57 +01:00
|
|
|
for (;;)
|
|
|
|
{
|
2020-04-16 09:47:06 +01:00
|
|
|
vkd3d_wait_for_gpu_timeline_semaphores(worker);
|
2016-10-08 13:31:57 +01:00
|
|
|
|
2020-04-17 14:31:51 +01:00
|
|
|
if (!worker->fence_count || atomic_load_acquire(&worker->enqueued_fence_count))
|
2016-10-08 13:31:57 +01:00
|
|
|
{
|
2019-06-13 12:47:34 +01:00
|
|
|
if ((rc = pthread_mutex_lock(&worker->mutex)))
|
|
|
|
{
|
|
|
|
ERR("Failed to lock mutex, error %d.\n", rc);
|
|
|
|
break;
|
|
|
|
}
|
2016-10-08 13:31:57 +01:00
|
|
|
|
2019-06-13 12:47:34 +01:00
|
|
|
if (worker->pending_fence_destruction)
|
2016-10-08 13:31:57 +01:00
|
|
|
{
|
2019-06-13 12:47:34 +01:00
|
|
|
pthread_cond_broadcast(&worker->fence_destruction_cond);
|
|
|
|
worker->pending_fence_destruction = false;
|
2016-10-08 13:31:57 +01:00
|
|
|
}
|
2016-10-08 13:31:57 +01:00
|
|
|
|
2019-06-13 12:47:34 +01:00
|
|
|
if (worker->enqueued_fence_count)
|
|
|
|
{
|
|
|
|
vkd3d_fence_worker_move_enqueued_fences_locked(worker);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (worker->should_exit)
|
|
|
|
{
|
|
|
|
pthread_mutex_unlock(&worker->mutex);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((rc = pthread_cond_wait(&worker->cond, &worker->mutex)))
|
|
|
|
{
|
|
|
|
ERR("Failed to wait on condition variable, error %d.\n", rc);
|
|
|
|
pthread_mutex_unlock(&worker->mutex);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2016-10-08 13:31:57 +01:00
|
|
|
|
2019-06-13 12:47:34 +01:00
|
|
|
pthread_mutex_unlock(&worker->mutex);
|
|
|
|
}
|
2016-10-08 13:31:57 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2016-10-20 15:38:04 +01:00
|
|
|
HRESULT vkd3d_fence_worker_start(struct vkd3d_fence_worker *worker,
|
2016-10-08 13:31:57 +01:00
|
|
|
struct d3d12_device *device)
|
2016-10-08 13:31:57 +01:00
|
|
|
{
|
2019-06-11 09:13:37 +01:00
|
|
|
HRESULT hr;
|
2016-10-08 13:31:57 +01:00
|
|
|
int rc;
|
|
|
|
|
|
|
|
TRACE("worker %p.\n", worker);
|
|
|
|
|
|
|
|
worker->should_exit = false;
|
2019-06-13 12:47:34 +01:00
|
|
|
worker->pending_fence_destruction = false;
|
2016-10-08 13:31:57 +01:00
|
|
|
worker->device = device;
|
|
|
|
|
2019-06-13 12:47:34 +01:00
|
|
|
worker->enqueued_fence_count = 0;
|
|
|
|
worker->enqueued_fences = NULL;
|
|
|
|
worker->enqueued_fences_size = 0;
|
|
|
|
|
2016-10-08 13:31:57 +01:00
|
|
|
worker->fence_count = 0;
|
|
|
|
|
|
|
|
worker->fences = NULL;
|
|
|
|
worker->fences_size = 0;
|
2016-10-08 13:31:57 +01:00
|
|
|
|
2020-03-30 18:14:34 +01:00
|
|
|
worker->vk_semaphores = NULL;
|
|
|
|
worker->vk_semaphores_size = 0;
|
|
|
|
worker->semaphore_wait_values = NULL;
|
|
|
|
worker->semaphore_wait_values_size = 0;
|
|
|
|
|
2016-10-08 13:31:57 +01:00
|
|
|
if ((rc = pthread_mutex_init(&worker->mutex, NULL)))
|
|
|
|
{
|
|
|
|
ERR("Failed to initialize mutex, error %d.\n", rc);
|
2018-11-09 17:06:23 +00:00
|
|
|
return hresult_from_errno(rc);
|
2016-10-08 13:31:57 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if ((rc = pthread_cond_init(&worker->cond, NULL)))
|
|
|
|
{
|
|
|
|
ERR("Failed to initialize condition variable, error %d.\n", rc);
|
|
|
|
pthread_mutex_destroy(&worker->mutex);
|
2018-11-09 17:06:23 +00:00
|
|
|
return hresult_from_errno(rc);
|
2016-10-08 13:31:57 +01:00
|
|
|
}
|
|
|
|
|
2019-06-13 12:47:34 +01:00
|
|
|
if ((rc = pthread_cond_init(&worker->fence_destruction_cond, NULL)))
|
|
|
|
{
|
|
|
|
ERR("Failed to initialize condition variable, error %d.\n", rc);
|
|
|
|
pthread_mutex_destroy(&worker->mutex);
|
|
|
|
pthread_cond_destroy(&worker->cond);
|
|
|
|
return hresult_from_errno(rc);
|
|
|
|
}
|
|
|
|
|
2019-06-11 09:13:37 +01:00
|
|
|
if (FAILED(hr = vkd3d_create_thread(device->vkd3d_instance,
|
|
|
|
vkd3d_fence_worker_main, worker, &worker->thread)))
|
2018-01-11 16:03:46 +00:00
|
|
|
{
|
2016-10-08 13:31:57 +01:00
|
|
|
pthread_mutex_destroy(&worker->mutex);
|
|
|
|
pthread_cond_destroy(&worker->cond);
|
2019-06-13 12:47:34 +01:00
|
|
|
pthread_cond_destroy(&worker->fence_destruction_cond);
|
2016-10-08 13:31:57 +01:00
|
|
|
}
|
|
|
|
|
2019-06-11 09:13:37 +01:00
|
|
|
return hr;
|
2016-10-08 13:31:57 +01:00
|
|
|
}
|
|
|
|
|
2018-01-11 16:03:46 +00:00
|
|
|
HRESULT vkd3d_fence_worker_stop(struct vkd3d_fence_worker *worker,
|
|
|
|
struct d3d12_device *device)
|
2016-10-08 13:31:57 +01:00
|
|
|
{
|
2018-04-11 12:21:43 +01:00
|
|
|
HRESULT hr;
|
2016-10-08 13:31:57 +01:00
|
|
|
int rc;
|
|
|
|
|
|
|
|
TRACE("worker %p.\n", worker);
|
|
|
|
|
|
|
|
if ((rc = pthread_mutex_lock(&worker->mutex)))
|
|
|
|
{
|
|
|
|
ERR("Failed to lock mutex, error %d.\n", rc);
|
2018-11-09 17:06:23 +00:00
|
|
|
return hresult_from_errno(rc);
|
2016-10-08 13:31:57 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
worker->should_exit = true;
|
|
|
|
pthread_cond_signal(&worker->cond);
|
|
|
|
|
|
|
|
pthread_mutex_unlock(&worker->mutex);
|
|
|
|
|
2019-06-11 09:13:38 +01:00
|
|
|
if (FAILED(hr = vkd3d_join_thread(device->vkd3d_instance, &worker->thread)))
|
|
|
|
return hr;
|
2016-10-08 13:31:57 +01:00
|
|
|
|
|
|
|
pthread_mutex_destroy(&worker->mutex);
|
|
|
|
pthread_cond_destroy(&worker->cond);
|
2019-06-13 12:47:34 +01:00
|
|
|
pthread_cond_destroy(&worker->fence_destruction_cond);
|
2016-10-08 13:31:57 +01:00
|
|
|
|
2019-06-13 12:47:34 +01:00
|
|
|
vkd3d_free(worker->enqueued_fences);
|
2016-10-08 13:31:57 +01:00
|
|
|
vkd3d_free(worker->fences);
|
2020-03-30 18:14:34 +01:00
|
|
|
vkd3d_free(worker->vk_semaphores);
|
|
|
|
vkd3d_free(worker->semaphore_wait_values);
|
2016-10-08 13:31:57 +01:00
|
|
|
|
2016-10-08 13:31:57 +01:00
|
|
|
return S_OK;
|
|
|
|
}
|
|
|
|
|
2019-04-01 10:19:49 +01:00
|
|
|
static const struct d3d12_root_parameter *root_signature_get_parameter(
|
|
|
|
const struct d3d12_root_signature *root_signature, unsigned int index)
|
|
|
|
{
|
|
|
|
assert(index < root_signature->parameter_count);
|
|
|
|
return &root_signature->parameters[index];
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct d3d12_root_descriptor_table *root_signature_get_descriptor_table(
|
|
|
|
const struct d3d12_root_signature *root_signature, unsigned int index)
|
|
|
|
{
|
|
|
|
const struct d3d12_root_parameter *p = root_signature_get_parameter(root_signature, index);
|
|
|
|
assert(p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE);
|
|
|
|
return &p->u.descriptor_table;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct d3d12_root_constant *root_signature_get_32bit_constants(
|
|
|
|
const struct d3d12_root_signature *root_signature, unsigned int index)
|
|
|
|
{
|
|
|
|
const struct d3d12_root_parameter *p = root_signature_get_parameter(root_signature, index);
|
|
|
|
assert(p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS);
|
|
|
|
return &p->u.constant;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct d3d12_root_parameter *root_signature_get_root_descriptor(
|
|
|
|
const struct d3d12_root_signature *root_signature, unsigned int index)
|
|
|
|
{
|
|
|
|
const struct d3d12_root_parameter *p = root_signature_get_parameter(root_signature, index);
|
|
|
|
assert(p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_CBV
|
|
|
|
|| p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_SRV
|
|
|
|
|| p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_UAV);
|
|
|
|
return p;
|
|
|
|
}
|
|
|
|
|
2016-09-28 13:16:18 +01:00
|
|
|
/* ID3D12Fence */
|
2020-04-15 07:33:31 +01:00
|
|
|
static struct d3d12_fence *impl_from_ID3D12Fence(d3d12_fence_iface *iface)
|
2016-09-28 13:16:18 +01:00
|
|
|
{
|
|
|
|
return CONTAINING_RECORD(iface, struct d3d12_fence, ID3D12Fence_iface);
|
|
|
|
}
|
|
|
|
|
2019-04-17 16:26:36 +01:00
|
|
|
static void d3d12_fence_destroy_vk_objects(struct d3d12_fence *fence)
|
2019-04-17 16:26:35 +01:00
|
|
|
{
|
2019-04-17 16:26:36 +01:00
|
|
|
const struct vkd3d_vk_device_procs *vk_procs;
|
|
|
|
struct d3d12_device *device = fence->device;
|
2019-04-17 16:26:35 +01:00
|
|
|
int rc;
|
|
|
|
|
|
|
|
if ((rc = pthread_mutex_lock(&fence->mutex)))
|
|
|
|
{
|
|
|
|
ERR("Failed to lock mutex, error %d.\n", rc);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2019-04-17 16:26:36 +01:00
|
|
|
vk_procs = &device->vk_procs;
|
2020-04-16 09:47:06 +01:00
|
|
|
VK_CALL(vkDestroySemaphore(device->vk_device, fence->timeline_semaphore, NULL));
|
2019-05-02 15:02:41 +01:00
|
|
|
pthread_mutex_unlock(&fence->mutex);
|
|
|
|
}
|
|
|
|
|
2020-03-31 11:05:09 +01:00
|
|
|
static void d3d12_fence_signal_external_events_locked(struct d3d12_fence *fence)
|
|
|
|
{
|
|
|
|
unsigned int i, j;
|
|
|
|
|
|
|
|
for (i = 0, j = 0; i < fence->event_count; ++i)
|
|
|
|
{
|
|
|
|
struct vkd3d_waiting_event *current = &fence->events[i];
|
|
|
|
|
|
|
|
if (current->value <= fence->value)
|
|
|
|
{
|
|
|
|
fence->device->signal_event(current->event);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (i != j)
|
|
|
|
fence->events[j] = *current;
|
|
|
|
++j;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fence->event_count = j;
|
|
|
|
}
|
|
|
|
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
static void d3d12_fence_block_until_pending_value_reaches_locked(struct d3d12_fence *fence, UINT64 pending_value)
|
2020-03-31 11:05:09 +01:00
|
|
|
{
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
while (pending_value > fence->pending_timeline_value)
|
|
|
|
{
|
|
|
|
TRACE("Blocking wait on fence %p until it reaches 0x%"PRIx64".\n", fence, pending_value);
|
|
|
|
pthread_cond_wait(&fence->cond, &fence->mutex);
|
|
|
|
}
|
|
|
|
}
|
2020-03-31 11:05:09 +01:00
|
|
|
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
static void d3d12_fence_update_pending_value_locked(struct d3d12_fence *fence, UINT64 pending_value)
|
|
|
|
{
|
|
|
|
/* If we're signalling the fence, wake up any submission threads which can now safely kick work. */
|
|
|
|
if (pending_value > fence->pending_timeline_value)
|
2020-03-31 11:05:09 +01:00
|
|
|
{
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
fence->pending_timeline_value = pending_value;
|
|
|
|
pthread_cond_broadcast(&fence->cond);
|
2020-03-31 11:05:09 +01:00
|
|
|
}
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void d3d12_fence_lock(struct d3d12_fence *fence)
|
|
|
|
{
|
|
|
|
pthread_mutex_lock(&fence->mutex);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void d3d12_fence_unlock(struct d3d12_fence *fence)
|
|
|
|
{
|
|
|
|
pthread_mutex_unlock(&fence->mutex);
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool d3d12_fence_can_elide_wait_semaphore_locked(struct d3d12_fence *fence, uint64_t value)
|
|
|
|
{
|
|
|
|
/* Relevant if the semaphore has been signalled already on host.
|
|
|
|
* We should not wait on the timeline semaphore directly, we can simply submit in-place. */
|
|
|
|
return fence->value >= value;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool d3d12_fence_can_signal_semaphore_locked(struct d3d12_fence *fence, uint64_t value)
|
|
|
|
{
|
|
|
|
struct d3d12_device *device = fence->device;
|
|
|
|
bool need_signal = false;
|
2020-03-31 11:05:09 +01:00
|
|
|
|
|
|
|
/* If we're attempting to async signal a fence with a value which is not monotonically increasing the payload value,
|
|
|
|
* warn about this case. Do not treat this as an error since it might work. */
|
|
|
|
if (value > fence->pending_timeline_value)
|
|
|
|
{
|
|
|
|
/* Sanity check against the delta limit. Use the current fence value. */
|
|
|
|
if (value - fence->value > device->device_info.timeline_semaphore_properties.maxTimelineSemaphoreValueDifference)
|
|
|
|
{
|
|
|
|
FIXME("Timeline semaphore delta is %"PRIu64", but implementation only supports a delta of %"PRIu64".\n",
|
|
|
|
value - fence->value, device->device_info.timeline_semaphore_properties.maxTimelineSemaphoreValueDifference);
|
|
|
|
}
|
|
|
|
|
|
|
|
need_signal = true;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
FIXME("Fence %p is being signalled non-monotonically. Old pending value %"PRIu64", new pending value %"PRIu64".\n",
|
|
|
|
fence, fence->pending_timeline_value, value);
|
|
|
|
|
|
|
|
/* Mostly to be safe against weird, unknown use cases.
|
|
|
|
* The pending signal might be blocked by another fence,
|
|
|
|
* we'll base this on the actual, currently visible count value. */
|
|
|
|
need_signal = value > fence->value;
|
|
|
|
}
|
|
|
|
|
|
|
|
return need_signal;
|
|
|
|
}
|
|
|
|
|
|
|
|
static HRESULT d3d12_fence_signal_cpu_timeline_semaphore(struct d3d12_fence *fence, uint64_t value)
|
2020-03-30 18:14:34 +01:00
|
|
|
{
|
|
|
|
struct d3d12_device *device = fence->device;
|
|
|
|
VkResult vr;
|
2020-03-31 11:05:09 +01:00
|
|
|
int rc;
|
2020-03-30 18:14:34 +01:00
|
|
|
|
2020-03-31 11:05:09 +01:00
|
|
|
if ((rc = pthread_mutex_lock(&fence->mutex)))
|
|
|
|
{
|
|
|
|
ERR("Failed to lock mutex, error %d.\n", rc);
|
|
|
|
return hresult_from_errno(rc);
|
|
|
|
}
|
|
|
|
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
/* We must only signal a value which is greater than the pending value.
|
|
|
|
* The pending timeline value is the highest value which is pending execution, and thus will eventually reach that value.
|
|
|
|
* It is unsafe to attempt to signal the fence to a lower value. */
|
|
|
|
if (value > fence->pending_timeline_value)
|
2020-03-31 11:05:09 +01:00
|
|
|
{
|
|
|
|
/* Sanity check against the delta limit. */
|
|
|
|
if (value - fence->value > device->device_info.timeline_semaphore_properties.maxTimelineSemaphoreValueDifference)
|
|
|
|
{
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
FIXME("Timeline semaphore delta is 0x%"PRIx64", but implementation only supports a delta of 0x%"PRIx64".\n",
|
2020-03-31 11:05:09 +01:00
|
|
|
value - fence->value, device->device_info.timeline_semaphore_properties.maxTimelineSemaphoreValueDifference);
|
|
|
|
}
|
|
|
|
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
/* Normally we would use vkSignalSemaphoreKHR here, but it has some CPU performance issues on
|
|
|
|
* both NV and AMD, and since we have threaded submission, we can simply unblock the submission thread(s)
|
|
|
|
* which wait for the host signal to come through.
|
|
|
|
* Any semaphore wait can be elided if wait value <= current value, so we do not need to have an up-to-date
|
|
|
|
* timeline semaphore object. */
|
|
|
|
d3d12_fence_update_pending_value_locked(fence, value);
|
|
|
|
fence->value = value;
|
2020-03-31 11:05:09 +01:00
|
|
|
}
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
else if (value != fence->value)
|
2020-03-31 11:05:09 +01:00
|
|
|
{
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
FIXME("Attempting to signal fence %p with 0x%"PRIx64", but value is currently 0x%"PRIx64", with a pending signaled to 0x%"PRIx64".\n",
|
|
|
|
fence, value, fence->value, fence->pending_timeline_value);
|
2020-03-31 11:05:09 +01:00
|
|
|
vr = VK_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
d3d12_fence_signal_external_events_locked(fence);
|
|
|
|
|
|
|
|
pthread_mutex_unlock(&fence->mutex);
|
2020-03-30 18:14:34 +01:00
|
|
|
return hresult_from_vk_result(vr);
|
|
|
|
}
|
|
|
|
|
2020-04-16 09:47:06 +01:00
|
|
|
static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value)
|
2019-04-17 16:26:34 +01:00
|
|
|
{
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
if ((rc = pthread_mutex_lock(&fence->mutex)))
|
|
|
|
{
|
|
|
|
ERR("Failed to lock mutex, error %d.\n", rc);
|
|
|
|
return hresult_from_errno(rc);
|
|
|
|
}
|
|
|
|
|
2020-03-30 18:14:34 +01:00
|
|
|
if (value < fence->value)
|
|
|
|
{
|
|
|
|
FIXME("Fence values must be monotonically increasing. Fence %p, was %"PRIx64", now %"PRIx64".\n",
|
|
|
|
fence, fence->value, value);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
fence->value = value;
|
2019-04-17 16:26:34 +01:00
|
|
|
|
2020-03-31 11:05:09 +01:00
|
|
|
d3d12_fence_signal_external_events_locked(fence);
|
2019-04-17 16:26:34 +01:00
|
|
|
|
|
|
|
pthread_mutex_unlock(&fence->mutex);
|
|
|
|
|
|
|
|
return S_OK;
|
|
|
|
}
|
|
|
|
|
2020-04-15 07:33:31 +01:00
|
|
|
static HRESULT STDMETHODCALLTYPE d3d12_fence_QueryInterface(d3d12_fence_iface *iface,
|
2016-09-28 13:16:18 +01:00
|
|
|
REFIID riid, void **object)
|
|
|
|
{
|
|
|
|
TRACE("iface %p, riid %s, object %p.\n", iface, debugstr_guid(riid), object);
|
|
|
|
|
|
|
|
if (IsEqualGUID(riid, &IID_ID3D12Fence)
|
2020-04-15 07:33:31 +01:00
|
|
|
|| IsEqualGUID(riid, &IID_ID3D12Fence1)
|
2016-09-28 13:16:18 +01:00
|
|
|
|| IsEqualGUID(riid, &IID_ID3D12Pageable)
|
|
|
|
|| IsEqualGUID(riid, &IID_ID3D12DeviceChild)
|
|
|
|
|| IsEqualGUID(riid, &IID_ID3D12Object)
|
|
|
|
|| IsEqualGUID(riid, &IID_IUnknown))
|
|
|
|
{
|
|
|
|
ID3D12Fence_AddRef(iface);
|
|
|
|
*object = iface;
|
|
|
|
return S_OK;
|
|
|
|
}
|
|
|
|
|
|
|
|
WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(riid));
|
|
|
|
|
|
|
|
*object = NULL;
|
|
|
|
return E_NOINTERFACE;
|
|
|
|
}
|
|
|
|
|
2020-04-15 07:33:31 +01:00
|
|
|
static ULONG STDMETHODCALLTYPE d3d12_fence_AddRef(d3d12_fence_iface *iface)
|
2016-09-28 13:16:18 +01:00
|
|
|
{
|
|
|
|
struct d3d12_fence *fence = impl_from_ID3D12Fence(iface);
|
|
|
|
ULONG refcount = InterlockedIncrement(&fence->refcount);
|
|
|
|
|
|
|
|
TRACE("%p increasing refcount to %u.\n", fence, refcount);
|
|
|
|
|
|
|
|
return refcount;
|
|
|
|
}
|
|
|
|
|
2020-04-15 07:33:31 +01:00
|
|
|
static ULONG STDMETHODCALLTYPE d3d12_fence_Release(d3d12_fence_iface *iface)
|
2016-09-28 13:16:18 +01:00
|
|
|
{
|
|
|
|
struct d3d12_fence *fence = impl_from_ID3D12Fence(iface);
|
|
|
|
ULONG refcount = InterlockedDecrement(&fence->refcount);
|
2016-10-05 14:56:27 +01:00
|
|
|
int rc;
|
2016-09-28 13:16:18 +01:00
|
|
|
|
|
|
|
TRACE("%p decreasing refcount to %u.\n", fence, refcount);
|
|
|
|
|
|
|
|
if (!refcount)
|
|
|
|
{
|
|
|
|
struct d3d12_device *device = fence->device;
|
|
|
|
|
2019-01-03 13:23:02 +00:00
|
|
|
vkd3d_private_store_destroy(&fence->private_store);
|
|
|
|
|
2019-04-17 16:26:34 +01:00
|
|
|
vkd3d_fence_worker_remove_fence(&device->fence_worker, fence);
|
2017-06-26 16:03:31 +01:00
|
|
|
|
2019-04-17 16:26:36 +01:00
|
|
|
d3d12_fence_destroy_vk_objects(fence);
|
2019-04-17 16:26:35 +01:00
|
|
|
|
2016-10-05 14:56:27 +01:00
|
|
|
vkd3d_free(fence->events);
|
2016-10-05 14:56:27 +01:00
|
|
|
if ((rc = pthread_mutex_destroy(&fence->mutex)))
|
|
|
|
ERR("Failed to destroy mutex, error %d.\n", rc);
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
if ((rc = pthread_cond_destroy(&fence->cond)))
|
|
|
|
ERR("Failed to destroy cond, error %d.\n", rc);
|
2016-09-28 13:16:18 +01:00
|
|
|
vkd3d_free(fence);
|
|
|
|
|
2019-06-07 13:38:03 +01:00
|
|
|
d3d12_device_release(device);
|
2016-09-28 13:16:18 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return refcount;
|
|
|
|
}
|
|
|
|
|
2020-04-15 07:33:31 +01:00
|
|
|
static HRESULT STDMETHODCALLTYPE d3d12_fence_GetPrivateData(d3d12_fence_iface *iface,
|
2016-09-28 13:16:18 +01:00
|
|
|
REFGUID guid, UINT *data_size, void *data)
|
|
|
|
{
|
2019-01-03 13:23:02 +00:00
|
|
|
struct d3d12_fence *fence = impl_from_ID3D12Fence(iface);
|
|
|
|
|
|
|
|
TRACE("iface %p, guid %s, data_size %p, data %p.\n",
|
2016-09-28 13:16:18 +01:00
|
|
|
iface, debugstr_guid(guid), data_size, data);
|
|
|
|
|
2019-01-03 13:23:02 +00:00
|
|
|
return vkd3d_get_private_data(&fence->private_store, guid, data_size, data);
|
2016-09-28 13:16:18 +01:00
|
|
|
}
|
|
|
|
|
2020-04-15 07:33:31 +01:00
|
|
|
static HRESULT STDMETHODCALLTYPE d3d12_fence_SetPrivateData(d3d12_fence_iface *iface,
|
2016-09-28 13:16:18 +01:00
|
|
|
REFGUID guid, UINT data_size, const void *data)
|
|
|
|
{
|
2019-01-03 13:23:02 +00:00
|
|
|
struct d3d12_fence *fence = impl_from_ID3D12Fence(iface);
|
|
|
|
|
|
|
|
TRACE("iface %p, guid %s, data_size %u, data %p.\n",
|
2016-09-28 13:16:18 +01:00
|
|
|
iface, debugstr_guid(guid), data_size, data);
|
|
|
|
|
2019-01-03 13:23:02 +00:00
|
|
|
return vkd3d_set_private_data(&fence->private_store, guid, data_size, data);
|
2016-09-28 13:16:18 +01:00
|
|
|
}
|
|
|
|
|
2020-04-15 07:33:31 +01:00
|
|
|
static HRESULT STDMETHODCALLTYPE d3d12_fence_SetPrivateDataInterface(d3d12_fence_iface *iface,
|
2016-09-28 13:16:18 +01:00
|
|
|
REFGUID guid, const IUnknown *data)
|
|
|
|
{
|
2019-01-03 13:23:02 +00:00
|
|
|
struct d3d12_fence *fence = impl_from_ID3D12Fence(iface);
|
2016-09-28 13:16:18 +01:00
|
|
|
|
2019-01-03 13:23:02 +00:00
|
|
|
TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data);
|
|
|
|
|
|
|
|
return vkd3d_set_private_data_interface(&fence->private_store, guid, data);
|
2016-09-28 13:16:18 +01:00
|
|
|
}
|
|
|
|
|
2020-04-15 07:33:31 +01:00
|
|
|
static HRESULT STDMETHODCALLTYPE d3d12_fence_SetName(d3d12_fence_iface *iface, const WCHAR *name)
|
2016-09-28 13:16:18 +01:00
|
|
|
{
|
2017-08-02 09:45:06 +01:00
|
|
|
struct d3d12_fence *fence = impl_from_ID3D12Fence(iface);
|
|
|
|
|
2019-01-31 09:08:06 +00:00
|
|
|
TRACE("iface %p, name %s.\n", iface, debugstr_w(name, fence->device->wchar_size));
|
2016-09-28 13:16:18 +01:00
|
|
|
|
2019-01-31 09:08:06 +00:00
|
|
|
return name ? S_OK : E_INVALIDARG;
|
2016-09-28 13:16:18 +01:00
|
|
|
}
|
|
|
|
|
2020-04-15 07:33:31 +01:00
|
|
|
static HRESULT STDMETHODCALLTYPE d3d12_fence_GetDevice(d3d12_fence_iface *iface, REFIID iid, void **device)
|
2016-09-28 13:16:18 +01:00
|
|
|
{
|
|
|
|
struct d3d12_fence *fence = impl_from_ID3D12Fence(iface);
|
|
|
|
|
2019-06-07 13:38:03 +01:00
|
|
|
TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device);
|
2016-09-28 13:16:18 +01:00
|
|
|
|
2019-06-07 13:38:03 +01:00
|
|
|
return d3d12_device_query_interface(fence->device, iid, device);
|
2016-09-28 13:16:18 +01:00
|
|
|
}
|
|
|
|
|
2020-04-15 07:33:31 +01:00
|
|
|
static UINT64 STDMETHODCALLTYPE d3d12_fence_GetCompletedValue(d3d12_fence_iface *iface)
|
2016-09-28 13:16:18 +01:00
|
|
|
{
|
|
|
|
struct d3d12_fence *fence = impl_from_ID3D12Fence(iface);
|
2019-06-11 09:13:35 +01:00
|
|
|
uint64_t completed_value;
|
2016-10-05 14:56:27 +01:00
|
|
|
int rc;
|
2016-09-28 13:16:18 +01:00
|
|
|
|
|
|
|
TRACE("iface %p.\n", iface);
|
|
|
|
|
2016-10-05 14:56:27 +01:00
|
|
|
if ((rc = pthread_mutex_lock(&fence->mutex)))
|
|
|
|
{
|
|
|
|
ERR("Failed to lock mutex, error %d.\n", rc);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
completed_value = fence->value;
|
|
|
|
pthread_mutex_unlock(&fence->mutex);
|
|
|
|
return completed_value;
|
2016-09-28 13:16:18 +01:00
|
|
|
}
|
|
|
|
|
2020-04-15 07:33:31 +01:00
|
|
|
static HRESULT STDMETHODCALLTYPE d3d12_fence_SetEventOnCompletion(d3d12_fence_iface *iface,
|
2016-09-28 13:16:18 +01:00
|
|
|
UINT64 value, HANDLE event)
|
|
|
|
{
|
2016-10-05 14:56:27 +01:00
|
|
|
struct d3d12_fence *fence = impl_from_ID3D12Fence(iface);
|
|
|
|
unsigned int i;
|
|
|
|
int rc;
|
2016-09-28 13:16:18 +01:00
|
|
|
|
2016-10-10 10:22:50 +01:00
|
|
|
TRACE("iface %p, value %#"PRIx64", event %p.\n", iface, value, event);
|
2016-10-05 14:56:27 +01:00
|
|
|
|
|
|
|
if ((rc = pthread_mutex_lock(&fence->mutex)))
|
|
|
|
{
|
|
|
|
ERR("Failed to lock mutex, error %d.\n", rc);
|
2018-11-09 17:06:23 +00:00
|
|
|
return hresult_from_errno(rc);
|
2016-10-05 14:56:27 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if (value <= fence->value)
|
|
|
|
{
|
2016-10-07 12:26:39 +01:00
|
|
|
fence->device->signal_event(event);
|
2016-10-05 14:56:27 +01:00
|
|
|
pthread_mutex_unlock(&fence->mutex);
|
|
|
|
return S_OK;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < fence->event_count; ++i)
|
|
|
|
{
|
|
|
|
struct vkd3d_waiting_event *current = &fence->events[i];
|
|
|
|
if (current->value == value && current->event == event)
|
|
|
|
{
|
2016-10-10 10:22:50 +01:00
|
|
|
WARN("Event completion for (%p, %#"PRIx64") is already in the list.\n",
|
|
|
|
event, value);
|
2016-10-05 14:56:27 +01:00
|
|
|
pthread_mutex_unlock(&fence->mutex);
|
|
|
|
return S_OK;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-10-05 18:01:57 +01:00
|
|
|
if (!vkd3d_array_reserve((void **)&fence->events, &fence->events_size,
|
|
|
|
fence->event_count + 1, sizeof(*fence->events)))
|
2016-10-05 14:56:27 +01:00
|
|
|
{
|
2016-10-05 18:01:57 +01:00
|
|
|
WARN("Failed to add event.\n");
|
|
|
|
pthread_mutex_unlock(&fence->mutex);
|
|
|
|
return E_OUTOFMEMORY;
|
2016-10-05 14:56:27 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
fence->events[fence->event_count].value = value;
|
|
|
|
fence->events[fence->event_count].event = event;
|
|
|
|
++fence->event_count;
|
|
|
|
|
|
|
|
pthread_mutex_unlock(&fence->mutex);
|
|
|
|
return S_OK;
|
2016-09-28 13:16:18 +01:00
|
|
|
}
|
|
|
|
|
2020-04-15 07:33:31 +01:00
|
|
|
static HRESULT STDMETHODCALLTYPE d3d12_fence_Signal(d3d12_fence_iface *iface, UINT64 value)
|
2016-09-28 13:16:18 +01:00
|
|
|
{
|
2016-10-05 14:56:27 +01:00
|
|
|
struct d3d12_fence *fence = impl_from_ID3D12Fence(iface);
|
2016-09-28 13:16:18 +01:00
|
|
|
|
2016-10-10 10:22:50 +01:00
|
|
|
TRACE("iface %p, value %#"PRIx64".\n", iface, value);
|
2016-10-05 14:56:27 +01:00
|
|
|
|
2020-04-16 09:47:06 +01:00
|
|
|
return d3d12_fence_signal_cpu_timeline_semaphore(fence, value);
|
2016-09-28 13:16:18 +01:00
|
|
|
}
|
|
|
|
|
2020-04-15 07:33:31 +01:00
|
|
|
static D3D12_FENCE_FLAGS STDMETHODCALLTYPE d3d12_fence_GetCreationFlags(d3d12_fence_iface *iface)
|
|
|
|
{
|
|
|
|
struct d3d12_fence *fence = impl_from_ID3D12Fence(iface);
|
|
|
|
|
|
|
|
TRACE("iface %p.\n", iface);
|
|
|
|
|
|
|
|
return fence->d3d12_flags;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct ID3D12Fence1Vtbl d3d12_fence_vtbl =
|
2016-09-28 13:16:18 +01:00
|
|
|
{
|
|
|
|
/* IUnknown methods */
|
|
|
|
d3d12_fence_QueryInterface,
|
|
|
|
d3d12_fence_AddRef,
|
|
|
|
d3d12_fence_Release,
|
|
|
|
/* ID3D12Object methods */
|
|
|
|
d3d12_fence_GetPrivateData,
|
|
|
|
d3d12_fence_SetPrivateData,
|
|
|
|
d3d12_fence_SetPrivateDataInterface,
|
|
|
|
d3d12_fence_SetName,
|
|
|
|
/* ID3D12DeviceChild methods */
|
|
|
|
d3d12_fence_GetDevice,
|
|
|
|
/* ID3D12Fence methods */
|
|
|
|
d3d12_fence_GetCompletedValue,
|
|
|
|
d3d12_fence_SetEventOnCompletion,
|
|
|
|
d3d12_fence_Signal,
|
2020-04-15 07:33:31 +01:00
|
|
|
/* ID3D12Fence1 methods */
|
|
|
|
d3d12_fence_GetCreationFlags,
|
2016-09-28 13:16:18 +01:00
|
|
|
};
|
|
|
|
|
2020-04-15 07:33:31 +01:00
|
|
|
static struct d3d12_fence *unsafe_impl_from_ID3D12Fence1(ID3D12Fence1 *iface)
|
2019-04-17 16:26:34 +01:00
|
|
|
{
|
|
|
|
if (!iface)
|
|
|
|
return NULL;
|
|
|
|
assert(iface->lpVtbl == &d3d12_fence_vtbl);
|
|
|
|
return impl_from_ID3D12Fence(iface);
|
|
|
|
}
|
|
|
|
|
2020-04-15 07:33:31 +01:00
|
|
|
static struct d3d12_fence *unsafe_impl_from_ID3D12Fence(ID3D12Fence *iface)
|
|
|
|
{
|
|
|
|
return unsafe_impl_from_ID3D12Fence1((ID3D12Fence1 *)iface);
|
|
|
|
}
|
|
|
|
|
2020-03-30 18:14:34 +01:00
|
|
|
static HRESULT d3d12_fence_init_timeline(struct d3d12_fence *fence, struct d3d12_device *device,
|
|
|
|
UINT64 initial_value)
|
|
|
|
{
|
|
|
|
VkSemaphoreTypeCreateInfoKHR type_info = { VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO_KHR };
|
|
|
|
VkSemaphoreCreateInfo info = { VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO };
|
|
|
|
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
|
|
|
VkResult vr;
|
|
|
|
|
|
|
|
type_info.initialValue = initial_value;
|
|
|
|
type_info.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE_KHR;
|
|
|
|
info.pNext = &type_info;
|
|
|
|
|
|
|
|
if ((vr = VK_CALL(vkCreateSemaphore(device->vk_device, &info, NULL, &fence->timeline_semaphore))))
|
|
|
|
{
|
|
|
|
WARN("Failed to create timeline semaphore, vr %d.\n", vr);
|
|
|
|
return hresult_from_vk_result(vr);
|
|
|
|
}
|
2020-03-31 11:05:09 +01:00
|
|
|
|
|
|
|
fence->pending_timeline_value = initial_value;
|
2020-03-30 18:14:34 +01:00
|
|
|
return S_OK;
|
|
|
|
}
|
|
|
|
|
2016-10-05 14:56:27 +01:00
|
|
|
static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device *device,
|
2016-09-28 13:16:18 +01:00
|
|
|
UINT64 initial_value, D3D12_FENCE_FLAGS flags)
|
|
|
|
{
|
2019-01-10 10:16:48 +00:00
|
|
|
HRESULT hr;
|
2016-10-05 14:56:27 +01:00
|
|
|
int rc;
|
|
|
|
|
2016-09-28 13:16:18 +01:00
|
|
|
fence->ID3D12Fence_iface.lpVtbl = &d3d12_fence_vtbl;
|
|
|
|
fence->refcount = 1;
|
2020-04-15 07:33:31 +01:00
|
|
|
fence->d3d12_flags = flags;
|
2016-09-28 13:16:18 +01:00
|
|
|
|
2020-04-16 09:47:06 +01:00
|
|
|
if (FAILED(hr = d3d12_fence_init_timeline(fence, device, initial_value)))
|
|
|
|
return hr;
|
2020-03-30 18:14:34 +01:00
|
|
|
|
2016-09-28 13:16:18 +01:00
|
|
|
fence->value = initial_value;
|
|
|
|
|
2016-10-05 14:56:27 +01:00
|
|
|
if ((rc = pthread_mutex_init(&fence->mutex, NULL)))
|
|
|
|
{
|
|
|
|
ERR("Failed to initialize mutex, error %d.\n", rc);
|
2018-11-09 17:06:23 +00:00
|
|
|
return hresult_from_errno(rc);
|
2016-10-05 14:56:27 +01:00
|
|
|
}
|
|
|
|
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
if ((rc = pthread_cond_init(&fence->cond, NULL)))
|
|
|
|
{
|
|
|
|
ERR("Failed to initialize cond variable, error %d.\n", rc);
|
|
|
|
return hresult_from_errno(rc);
|
|
|
|
}
|
|
|
|
|
2016-09-28 13:16:18 +01:00
|
|
|
if (flags)
|
|
|
|
FIXME("Ignoring flags %#x.\n", flags);
|
|
|
|
|
2016-10-05 18:01:57 +01:00
|
|
|
fence->events = NULL;
|
|
|
|
fence->events_size = 0;
|
2016-10-05 14:56:27 +01:00
|
|
|
fence->event_count = 0;
|
2019-06-13 12:47:34 +01:00
|
|
|
fence->pending_worker_operation_count = 0;
|
|
|
|
|
2019-01-10 10:16:48 +00:00
|
|
|
if (FAILED(hr = vkd3d_private_store_init(&fence->private_store)))
|
|
|
|
{
|
|
|
|
pthread_mutex_destroy(&fence->mutex);
|
|
|
|
return hr;
|
|
|
|
}
|
2019-01-03 13:23:02 +00:00
|
|
|
|
2019-06-07 13:38:03 +01:00
|
|
|
d3d12_device_add_ref(fence->device = device);
|
2016-10-05 14:56:27 +01:00
|
|
|
|
|
|
|
return S_OK;
|
2016-09-28 13:16:18 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
HRESULT d3d12_fence_create(struct d3d12_device *device,
|
2019-06-11 09:13:35 +01:00
|
|
|
uint64_t initial_value, D3D12_FENCE_FLAGS flags, struct d3d12_fence **fence)
|
2016-09-28 13:16:18 +01:00
|
|
|
{
|
|
|
|
struct d3d12_fence *object;
|
2020-03-30 18:14:34 +01:00
|
|
|
HRESULT hr;
|
2016-09-28 13:16:18 +01:00
|
|
|
|
|
|
|
if (!(object = vkd3d_malloc(sizeof(*object))))
|
|
|
|
return E_OUTOFMEMORY;
|
|
|
|
|
2020-03-30 18:14:34 +01:00
|
|
|
if (SUCCEEDED(hr = d3d12_fence_init(object, device, initial_value, flags)))
|
|
|
|
TRACE("Created fence %p.\n", object);
|
|
|
|
else
|
|
|
|
ERR("Failed to create fence.\n");
|
2016-09-28 13:16:18 +01:00
|
|
|
|
|
|
|
*fence = object;
|
2020-03-30 18:14:34 +01:00
|
|
|
return hr;
|
2016-09-28 13:16:18 +01:00
|
|
|
}
|
|
|
|
|
2016-10-05 14:56:27 +01:00
|
|
|
/* Command buffers */
|
2019-06-17 14:43:25 +01:00
|
|
|
static void d3d12_command_list_mark_as_invalid(struct d3d12_command_list *list,
|
|
|
|
const char *message, ...)
|
|
|
|
{
|
|
|
|
va_list args;
|
|
|
|
|
|
|
|
va_start(args, message);
|
|
|
|
WARN("Command list %p is invalid: \"%s\".\n", list, vkd3d_dbg_vsprintf(message, args));
|
|
|
|
va_end(args);
|
|
|
|
|
|
|
|
list->is_valid = false;
|
|
|
|
}
|
|
|
|
|
2016-10-20 15:38:04 +01:00
|
|
|
static HRESULT d3d12_command_list_begin_command_buffer(struct d3d12_command_list *list)
|
2016-09-28 08:42:49 +01:00
|
|
|
{
|
|
|
|
struct d3d12_device *device = list->device;
|
|
|
|
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
|
|
|
VkCommandBufferBeginInfo begin_info;
|
|
|
|
VkResult vr;
|
|
|
|
|
|
|
|
begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
|
|
|
|
begin_info.pNext = NULL;
|
|
|
|
begin_info.flags = 0;
|
|
|
|
begin_info.pInheritanceInfo = NULL;
|
|
|
|
|
2016-10-11 12:43:04 +01:00
|
|
|
if ((vr = VK_CALL(vkBeginCommandBuffer(list->vk_command_buffer, &begin_info))) < 0)
|
2016-09-28 08:42:49 +01:00
|
|
|
{
|
|
|
|
WARN("Failed to begin command buffer, vr %d.\n", vr);
|
|
|
|
return hresult_from_vk_result(vr);
|
|
|
|
}
|
|
|
|
|
2016-10-19 15:39:48 +01:00
|
|
|
list->is_recording = true;
|
|
|
|
list->is_valid = true;
|
2016-09-28 11:00:54 +01:00
|
|
|
|
2016-09-28 08:42:49 +01:00
|
|
|
return S_OK;
|
|
|
|
}
|
|
|
|
|
2016-10-20 15:38:04 +01:00
|
|
|
static HRESULT d3d12_command_allocator_allocate_command_buffer(struct d3d12_command_allocator *allocator,
|
2016-09-27 11:13:37 +01:00
|
|
|
struct d3d12_command_list *list)
|
|
|
|
{
|
|
|
|
struct d3d12_device *device = allocator->device;
|
|
|
|
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
|
|
|
VkCommandBufferAllocateInfo command_buffer_info;
|
|
|
|
VkResult vr;
|
2016-09-28 08:42:49 +01:00
|
|
|
HRESULT hr;
|
2016-09-27 11:13:37 +01:00
|
|
|
|
|
|
|
TRACE("allocator %p, list %p.\n", allocator, list);
|
|
|
|
|
|
|
|
if (allocator->current_command_list)
|
|
|
|
{
|
2016-10-20 15:38:04 +01:00
|
|
|
WARN("Command allocator is already in use.\n");
|
|
|
|
return E_INVALIDARG;
|
2016-09-27 11:13:37 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
command_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
|
|
|
|
command_buffer_info.pNext = NULL;
|
|
|
|
command_buffer_info.commandPool = allocator->vk_command_pool;
|
|
|
|
command_buffer_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
|
|
|
|
command_buffer_info.commandBufferCount = 1;
|
|
|
|
|
|
|
|
if ((vr = VK_CALL(vkAllocateCommandBuffers(device->vk_device, &command_buffer_info,
|
2016-10-11 12:43:04 +01:00
|
|
|
&list->vk_command_buffer))) < 0)
|
2016-09-27 11:13:37 +01:00
|
|
|
{
|
|
|
|
WARN("Failed to allocate Vulkan command buffer, vr %d.\n", vr);
|
|
|
|
return hresult_from_vk_result(vr);
|
|
|
|
}
|
|
|
|
|
2018-09-27 12:30:51 +01:00
|
|
|
list->vk_queue_flags = allocator->vk_queue_flags;
|
|
|
|
|
2016-10-20 15:38:04 +01:00
|
|
|
if (FAILED(hr = d3d12_command_list_begin_command_buffer(list)))
|
2016-09-28 08:42:49 +01:00
|
|
|
{
|
|
|
|
VK_CALL(vkFreeCommandBuffers(device->vk_device, allocator->vk_command_pool,
|
|
|
|
1, &list->vk_command_buffer));
|
|
|
|
return hr;
|
|
|
|
}
|
2016-09-28 08:42:49 +01:00
|
|
|
|
2016-09-27 11:13:37 +01:00
|
|
|
allocator->current_command_list = list;
|
2020-04-17 14:10:07 +01:00
|
|
|
list->outstanding_submissions_count = &allocator->outstanding_submissions_count;
|
2016-09-27 11:13:37 +01:00
|
|
|
|
|
|
|
return S_OK;
|
|
|
|
}
|
|
|
|
|
2016-10-20 15:38:04 +01:00
|
|
|
static void d3d12_command_allocator_free_command_buffer(struct d3d12_command_allocator *allocator,
|
2016-09-27 11:13:37 +01:00
|
|
|
struct d3d12_command_list *list)
|
|
|
|
{
|
|
|
|
struct d3d12_device *device = allocator->device;
|
|
|
|
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
|
|
|
|
|
|
|
TRACE("allocator %p, list %p.\n", allocator, list);
|
|
|
|
|
|
|
|
if (allocator->current_command_list == list)
|
|
|
|
allocator->current_command_list = NULL;
|
2016-10-20 15:38:04 +01:00
|
|
|
|
|
|
|
if (!vkd3d_array_reserve((void **)&allocator->command_buffers, &allocator->command_buffers_size,
|
|
|
|
allocator->command_buffer_count + 1, sizeof(*allocator->command_buffers)))
|
|
|
|
{
|
|
|
|
WARN("Failed to add command buffer.\n");
|
|
|
|
VK_CALL(vkFreeCommandBuffers(device->vk_device, allocator->vk_command_pool,
|
|
|
|
1, &list->vk_command_buffer));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
allocator->command_buffers[allocator->command_buffer_count++] = list->vk_command_buffer;
|
2016-09-27 11:13:37 +01:00
|
|
|
}
|
|
|
|
|
2016-10-10 15:55:07 +01:00
|
|
|
static bool d3d12_command_allocator_add_render_pass(struct d3d12_command_allocator *allocator, VkRenderPass pass)
|
|
|
|
{
|
|
|
|
if (!vkd3d_array_reserve((void **)&allocator->passes, &allocator->passes_size,
|
|
|
|
allocator->pass_count + 1, sizeof(*allocator->passes)))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
allocator->passes[allocator->pass_count++] = pass;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool d3d12_command_allocator_add_framebuffer(struct d3d12_command_allocator *allocator,
|
|
|
|
VkFramebuffer framebuffer)
|
|
|
|
{
|
|
|
|
if (!vkd3d_array_reserve((void **)&allocator->framebuffers, &allocator->framebuffers_size,
|
|
|
|
allocator->framebuffer_count + 1, sizeof(*allocator->framebuffers)))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
allocator->framebuffers[allocator->framebuffer_count++] = framebuffer;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2016-11-03 19:20:38 +00:00
|
|
|
static bool d3d12_command_allocator_add_descriptor_pool(struct d3d12_command_allocator *allocator,
|
2020-03-18 11:02:52 +00:00
|
|
|
VkDescriptorPool pool, enum vkd3d_descriptor_pool_types pool_type)
|
2016-11-03 19:20:38 +00:00
|
|
|
{
|
2020-03-18 11:02:52 +00:00
|
|
|
struct d3d12_descriptor_pool_cache *cache = &allocator->descriptor_pool_caches[pool_type];
|
|
|
|
|
|
|
|
if (!vkd3d_array_reserve((void **)&cache->descriptor_pools, &cache->descriptor_pools_size,
|
|
|
|
cache->descriptor_pool_count + 1, sizeof(*cache->descriptor_pools)))
|
2016-11-03 19:20:38 +00:00
|
|
|
return false;
|
|
|
|
|
2020-03-18 11:02:52 +00:00
|
|
|
cache->descriptor_pools[cache->descriptor_pool_count++] = pool;
|
2016-11-03 19:20:38 +00:00
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-08-21 17:08:01 +01:00
|
|
|
static bool d3d12_command_allocator_add_view(struct d3d12_command_allocator *allocator,
|
|
|
|
struct vkd3d_view *view)
|
|
|
|
{
|
|
|
|
if (!vkd3d_array_reserve((void **)&allocator->views, &allocator->views_size,
|
|
|
|
allocator->view_count + 1, sizeof(*allocator->views)))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
vkd3d_view_incref(view);
|
|
|
|
allocator->views[allocator->view_count++] = view;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-08-09 17:44:16 +01:00
|
|
|
static bool d3d12_command_allocator_add_buffer_view(struct d3d12_command_allocator *allocator,
|
|
|
|
VkBufferView view)
|
|
|
|
{
|
|
|
|
if (!vkd3d_array_reserve((void **)&allocator->buffer_views, &allocator->buffer_views_size,
|
|
|
|
allocator->buffer_view_count + 1, sizeof(*allocator->buffer_views)))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
allocator->buffer_views[allocator->buffer_view_count++] = view;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-09-12 14:19:54 +01:00
|
|
|
static VkDescriptorPool d3d12_command_allocator_allocate_descriptor_pool(
|
2020-03-18 11:02:52 +00:00
|
|
|
struct d3d12_command_allocator *allocator, enum vkd3d_descriptor_pool_types pool_type)
|
2018-09-12 14:19:54 +01:00
|
|
|
{
|
|
|
|
static const VkDescriptorPoolSize pool_sizes[] =
|
|
|
|
{
|
2020-03-18 11:02:52 +00:00
|
|
|
/* Must be first in the array. */
|
|
|
|
/* Need at least 2048 so we can allocate an immutable sampler set. */
|
|
|
|
{VK_DESCRIPTOR_TYPE_SAMPLER, 2048},
|
|
|
|
|
2018-09-12 14:19:54 +01:00
|
|
|
{VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1024},
|
|
|
|
{VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1024},
|
|
|
|
{VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1024},
|
|
|
|
{VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 1024},
|
|
|
|
{VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1024},
|
2020-03-16 19:21:57 +00:00
|
|
|
/* must be last in the array */
|
|
|
|
{VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT, 65536}
|
2018-09-12 14:19:54 +01:00
|
|
|
};
|
2020-03-18 11:02:52 +00:00
|
|
|
struct d3d12_descriptor_pool_cache *cache = &allocator->descriptor_pool_caches[pool_type];
|
2018-09-12 14:19:54 +01:00
|
|
|
struct d3d12_device *device = allocator->device;
|
|
|
|
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
2020-03-16 19:21:57 +00:00
|
|
|
VkDescriptorPoolInlineUniformBlockCreateInfoEXT inline_uniform_desc;
|
|
|
|
VkDescriptorPoolCreateInfo pool_desc;
|
2018-09-12 14:19:54 +01:00
|
|
|
VkDevice vk_device = device->vk_device;
|
|
|
|
VkDescriptorPool vk_pool;
|
|
|
|
VkResult vr;
|
|
|
|
|
2020-03-18 11:02:52 +00:00
|
|
|
if (cache->free_descriptor_pool_count > 0)
|
2018-09-12 14:19:54 +01:00
|
|
|
{
|
2020-03-18 11:02:52 +00:00
|
|
|
vk_pool = cache->free_descriptor_pools[cache->free_descriptor_pool_count - 1];
|
|
|
|
cache->free_descriptor_pools[cache->free_descriptor_pool_count - 1] = VK_NULL_HANDLE;
|
|
|
|
--cache->free_descriptor_pool_count;
|
2018-09-12 14:19:55 +01:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2020-03-16 19:21:57 +00:00
|
|
|
inline_uniform_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_INLINE_UNIFORM_BLOCK_CREATE_INFO_EXT;
|
|
|
|
inline_uniform_desc.pNext = NULL;
|
|
|
|
inline_uniform_desc.maxInlineUniformBlockBindings = 256;
|
|
|
|
|
2018-09-12 14:19:55 +01:00
|
|
|
pool_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
|
2020-03-16 19:21:57 +00:00
|
|
|
pool_desc.pNext = &inline_uniform_desc;
|
2020-03-10 15:58:08 +00:00
|
|
|
/* For a correct implementation of RS 1.0 we need to update packed descriptor sets late rather than on draw.
|
|
|
|
* If device does not support descriptor indexing, we must update on draw and pray applications don't rely on RS 1.0
|
|
|
|
* guarantees. */
|
2020-03-18 11:02:52 +00:00
|
|
|
pool_desc.flags = pool_type == VKD3D_DESCRIPTOR_POOL_TYPE_VOLATILE &&
|
|
|
|
allocator->device->vk_info.supports_volatile_packed_descriptors ?
|
2020-03-10 15:58:08 +00:00
|
|
|
VK_DESCRIPTOR_POOL_CREATE_UPDATE_AFTER_BIND_BIT_EXT : 0;
|
2018-09-12 14:19:55 +01:00
|
|
|
pool_desc.maxSets = 512;
|
|
|
|
pool_desc.poolSizeCount = ARRAY_SIZE(pool_sizes);
|
|
|
|
pool_desc.pPoolSizes = pool_sizes;
|
2020-03-16 19:21:57 +00:00
|
|
|
|
2020-03-18 11:02:52 +00:00
|
|
|
if (pool_type == VKD3D_DESCRIPTOR_POOL_TYPE_IMMUTABLE_SAMPLER)
|
|
|
|
{
|
|
|
|
/* Only allocate for samplers. */
|
|
|
|
pool_desc.poolSizeCount = 1;
|
|
|
|
}
|
|
|
|
else if (pool_type == VKD3D_DESCRIPTOR_POOL_TYPE_VOLATILE ||
|
|
|
|
!device->vk_info.EXT_inline_uniform_block ||
|
|
|
|
device->vk_info.device_limits.maxPushConstantsSize >= (D3D12_MAX_ROOT_COST * sizeof(uint32_t)))
|
2020-03-16 19:21:57 +00:00
|
|
|
{
|
2020-03-18 11:02:52 +00:00
|
|
|
/* We don't use volatile inline uniform block descriptors. */
|
2020-03-16 19:21:57 +00:00
|
|
|
pool_desc.pNext = NULL;
|
|
|
|
pool_desc.poolSizeCount -= 1;
|
|
|
|
}
|
|
|
|
|
2018-09-12 14:19:55 +01:00
|
|
|
if ((vr = VK_CALL(vkCreateDescriptorPool(vk_device, &pool_desc, NULL, &vk_pool))) < 0)
|
|
|
|
{
|
|
|
|
ERR("Failed to create descriptor pool, vr %d.\n", vr);
|
|
|
|
return VK_NULL_HANDLE;
|
|
|
|
}
|
2018-09-12 14:19:54 +01:00
|
|
|
}
|
|
|
|
|
2020-03-18 11:02:52 +00:00
|
|
|
if (!(d3d12_command_allocator_add_descriptor_pool(allocator, vk_pool, pool_type)))
|
2018-09-12 14:19:54 +01:00
|
|
|
{
|
|
|
|
ERR("Failed to add descriptor pool.\n");
|
|
|
|
VK_CALL(vkDestroyDescriptorPool(vk_device, vk_pool, NULL));
|
|
|
|
return VK_NULL_HANDLE;
|
|
|
|
}
|
|
|
|
|
|
|
|
return vk_pool;
|
|
|
|
}
|
|
|
|
|
|
|
|
static VkDescriptorSet d3d12_command_allocator_allocate_descriptor_set(
|
2020-03-18 11:02:52 +00:00
|
|
|
struct d3d12_command_allocator *allocator, VkDescriptorSetLayout vk_set_layout,
|
|
|
|
enum vkd3d_descriptor_pool_types pool_type)
|
2018-09-12 14:19:54 +01:00
|
|
|
{
|
2020-03-18 11:02:52 +00:00
|
|
|
struct d3d12_descriptor_pool_cache *cache = &allocator->descriptor_pool_caches[pool_type];
|
2018-09-12 14:19:54 +01:00
|
|
|
struct d3d12_device *device = allocator->device;
|
|
|
|
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
|
|
|
struct VkDescriptorSetAllocateInfo set_desc;
|
|
|
|
VkDevice vk_device = device->vk_device;
|
|
|
|
VkDescriptorSet vk_descriptor_set;
|
|
|
|
VkResult vr;
|
|
|
|
|
2020-03-18 11:02:52 +00:00
|
|
|
if (!cache->vk_descriptor_pool)
|
|
|
|
cache->vk_descriptor_pool = d3d12_command_allocator_allocate_descriptor_pool(allocator, pool_type);
|
|
|
|
if (!cache->vk_descriptor_pool)
|
2018-09-12 14:19:54 +01:00
|
|
|
return VK_NULL_HANDLE;
|
|
|
|
|
|
|
|
set_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
|
|
|
|
set_desc.pNext = NULL;
|
2020-03-18 11:02:52 +00:00
|
|
|
set_desc.descriptorPool = cache->vk_descriptor_pool;
|
2018-09-12 14:19:54 +01:00
|
|
|
set_desc.descriptorSetCount = 1;
|
|
|
|
set_desc.pSetLayouts = &vk_set_layout;
|
|
|
|
if ((vr = VK_CALL(vkAllocateDescriptorSets(vk_device, &set_desc, &vk_descriptor_set))) >= 0)
|
|
|
|
return vk_descriptor_set;
|
|
|
|
|
2020-03-18 11:02:52 +00:00
|
|
|
cache->vk_descriptor_pool = VK_NULL_HANDLE;
|
2018-09-12 14:19:54 +01:00
|
|
|
if (vr == VK_ERROR_FRAGMENTED_POOL || vr == VK_ERROR_OUT_OF_POOL_MEMORY_KHR)
|
2020-03-18 11:02:52 +00:00
|
|
|
cache->vk_descriptor_pool = d3d12_command_allocator_allocate_descriptor_pool(allocator, pool_type);
|
|
|
|
if (!cache->vk_descriptor_pool)
|
2018-09-12 14:19:54 +01:00
|
|
|
{
|
|
|
|
ERR("Failed to allocate descriptor set, vr %d.\n", vr);
|
|
|
|
return VK_NULL_HANDLE;
|
|
|
|
}
|
|
|
|
|
2020-03-18 11:02:52 +00:00
|
|
|
set_desc.descriptorPool = cache->vk_descriptor_pool;
|
2018-09-12 14:19:54 +01:00
|
|
|
if ((vr = VK_CALL(vkAllocateDescriptorSets(vk_device, &set_desc, &vk_descriptor_set))) < 0)
|
|
|
|
{
|
|
|
|
FIXME("Failed to allocate descriptor set from a new pool, vr %d.\n", vr);
|
|
|
|
return VK_NULL_HANDLE;
|
|
|
|
}
|
|
|
|
|
|
|
|
return vk_descriptor_set;
|
|
|
|
}
|
|
|
|
|
2016-10-20 15:38:04 +01:00
|
|
|
static void d3d12_command_list_allocator_destroyed(struct d3d12_command_list *list)
|
2016-09-27 11:13:37 +01:00
|
|
|
{
|
|
|
|
TRACE("list %p.\n", list);
|
|
|
|
|
|
|
|
list->allocator = NULL;
|
|
|
|
list->vk_command_buffer = VK_NULL_HANDLE;
|
|
|
|
}
|
|
|
|
|
2020-03-18 11:02:52 +00:00
|
|
|
static void d3d12_command_allocator_free_descriptor_pool_cache(struct d3d12_command_allocator *allocator,
|
|
|
|
struct d3d12_descriptor_pool_cache *cache, bool keep_reusable_resources)
|
2017-08-09 17:44:16 +01:00
|
|
|
{
|
|
|
|
struct d3d12_device *device = allocator->device;
|
|
|
|
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
2018-09-12 14:19:55 +01:00
|
|
|
unsigned int i, j;
|
2020-03-18 11:02:52 +00:00
|
|
|
cache->vk_descriptor_pool = VK_NULL_HANDLE;
|
2018-09-12 14:19:54 +01:00
|
|
|
|
2018-09-12 14:19:55 +01:00
|
|
|
if (keep_reusable_resources)
|
|
|
|
{
|
2020-03-18 11:02:52 +00:00
|
|
|
if (vkd3d_array_reserve((void **)&cache->free_descriptor_pools,
|
|
|
|
&cache->free_descriptor_pools_size,
|
|
|
|
cache->free_descriptor_pool_count + cache->descriptor_pool_count,
|
|
|
|
sizeof(*cache->free_descriptor_pools)))
|
2018-09-12 14:19:55 +01:00
|
|
|
{
|
2020-03-18 11:02:52 +00:00
|
|
|
for (i = 0, j = cache->free_descriptor_pool_count; i < cache->descriptor_pool_count; ++i, ++j)
|
2018-09-12 14:19:55 +01:00
|
|
|
{
|
2020-03-18 11:02:52 +00:00
|
|
|
VK_CALL(vkResetDescriptorPool(device->vk_device, cache->descriptor_pools[i], 0));
|
|
|
|
cache->free_descriptor_pools[j] = cache->descriptor_pools[i];
|
2018-09-12 14:19:55 +01:00
|
|
|
}
|
2020-03-18 11:02:52 +00:00
|
|
|
cache->free_descriptor_pool_count += cache->descriptor_pool_count;
|
|
|
|
cache->descriptor_pool_count = 0;
|
2018-09-12 14:19:55 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2020-03-18 11:02:52 +00:00
|
|
|
for (i = 0; i < cache->free_descriptor_pool_count; ++i)
|
2018-09-12 14:19:55 +01:00
|
|
|
{
|
2020-03-18 11:02:52 +00:00
|
|
|
VK_CALL(vkDestroyDescriptorPool(device->vk_device, cache->free_descriptor_pools[i], NULL));
|
2018-09-12 14:19:55 +01:00
|
|
|
}
|
2020-03-18 11:02:52 +00:00
|
|
|
cache->free_descriptor_pool_count = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < cache->descriptor_pool_count; ++i)
|
|
|
|
{
|
|
|
|
VK_CALL(vkDestroyDescriptorPool(device->vk_device, cache->descriptor_pools[i], NULL));
|
|
|
|
}
|
|
|
|
cache->descriptor_pool_count = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void d3d12_command_allocator_free_resources(struct d3d12_command_allocator *allocator,
|
|
|
|
bool keep_reusable_resources)
|
|
|
|
{
|
|
|
|
struct d3d12_device *device = allocator->device;
|
|
|
|
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
|
|
|
unsigned int i;
|
|
|
|
|
|
|
|
for (i = 0; i < VKD3D_DESCRIPTOR_POOL_TYPE_COUNT; i++)
|
|
|
|
{
|
|
|
|
d3d12_command_allocator_free_descriptor_pool_cache(allocator,
|
|
|
|
&allocator->descriptor_pool_caches[i],
|
|
|
|
keep_reusable_resources);
|
2018-09-12 14:19:55 +01:00
|
|
|
}
|
|
|
|
|
2017-08-09 17:44:16 +01:00
|
|
|
for (i = 0; i < allocator->buffer_view_count; ++i)
|
|
|
|
{
|
|
|
|
VK_CALL(vkDestroyBufferView(device->vk_device, allocator->buffer_views[i], NULL));
|
|
|
|
}
|
|
|
|
allocator->buffer_view_count = 0;
|
|
|
|
|
2018-08-21 17:08:01 +01:00
|
|
|
for (i = 0; i < allocator->view_count; ++i)
|
|
|
|
{
|
|
|
|
vkd3d_view_decref(allocator->views[i], device);
|
|
|
|
}
|
|
|
|
allocator->view_count = 0;
|
|
|
|
|
2017-08-09 17:44:16 +01:00
|
|
|
for (i = 0; i < allocator->framebuffer_count; ++i)
|
|
|
|
{
|
|
|
|
VK_CALL(vkDestroyFramebuffer(device->vk_device, allocator->framebuffers[i], NULL));
|
|
|
|
}
|
|
|
|
allocator->framebuffer_count = 0;
|
|
|
|
|
|
|
|
for (i = 0; i < allocator->pass_count; ++i)
|
|
|
|
{
|
|
|
|
VK_CALL(vkDestroyRenderPass(device->vk_device, allocator->passes[i], NULL));
|
|
|
|
}
|
|
|
|
allocator->pass_count = 0;
|
|
|
|
}
|
|
|
|
|
2016-09-21 14:33:17 +01:00
|
|
|
/* ID3D12CommandAllocator */
|
|
|
|
static inline struct d3d12_command_allocator *impl_from_ID3D12CommandAllocator(ID3D12CommandAllocator *iface)
|
|
|
|
{
|
|
|
|
return CONTAINING_RECORD(iface, struct d3d12_command_allocator, ID3D12CommandAllocator_iface);
|
|
|
|
}
|
|
|
|
|
|
|
|
static HRESULT STDMETHODCALLTYPE d3d12_command_allocator_QueryInterface(ID3D12CommandAllocator *iface,
|
|
|
|
REFIID riid, void **object)
|
|
|
|
{
|
|
|
|
TRACE("iface %p, riid %s, object %p.\n", iface, debugstr_guid(riid), object);
|
|
|
|
|
|
|
|
if (IsEqualGUID(riid, &IID_ID3D12CommandAllocator)
|
|
|
|
|| IsEqualGUID(riid, &IID_ID3D12Pageable)
|
|
|
|
|| IsEqualGUID(riid, &IID_ID3D12DeviceChild)
|
|
|
|
|| IsEqualGUID(riid, &IID_ID3D12Object)
|
|
|
|
|| IsEqualGUID(riid, &IID_IUnknown))
|
|
|
|
{
|
|
|
|
ID3D12CommandAllocator_AddRef(iface);
|
|
|
|
*object = iface;
|
|
|
|
return S_OK;
|
|
|
|
}
|
|
|
|
|
|
|
|
WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(riid));
|
|
|
|
|
|
|
|
*object = NULL;
|
|
|
|
return E_NOINTERFACE;
|
|
|
|
}
|
|
|
|
|
|
|
|
static ULONG STDMETHODCALLTYPE d3d12_command_allocator_AddRef(ID3D12CommandAllocator *iface)
|
|
|
|
{
|
|
|
|
struct d3d12_command_allocator *allocator = impl_from_ID3D12CommandAllocator(iface);
|
|
|
|
ULONG refcount = InterlockedIncrement(&allocator->refcount);
|
|
|
|
|
|
|
|
TRACE("%p increasing refcount to %u.\n", allocator, refcount);
|
|
|
|
|
|
|
|
return refcount;
|
|
|
|
}
|
|
|
|
|
|
|
|
static ULONG STDMETHODCALLTYPE d3d12_command_allocator_Release(ID3D12CommandAllocator *iface)
|
|
|
|
{
|
|
|
|
struct d3d12_command_allocator *allocator = impl_from_ID3D12CommandAllocator(iface);
|
|
|
|
ULONG refcount = InterlockedDecrement(&allocator->refcount);
|
2020-03-18 11:02:52 +00:00
|
|
|
unsigned int i;
|
2016-09-21 14:33:17 +01:00
|
|
|
|
|
|
|
TRACE("%p decreasing refcount to %u.\n", allocator, refcount);
|
|
|
|
|
|
|
|
if (!refcount)
|
|
|
|
{
|
|
|
|
struct d3d12_device *device = allocator->device;
|
2017-08-11 12:58:04 +01:00
|
|
|
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
2016-09-27 11:13:37 +01:00
|
|
|
|
2019-01-03 13:23:03 +00:00
|
|
|
vkd3d_private_store_destroy(&allocator->private_store);
|
|
|
|
|
2016-09-27 11:13:37 +01:00
|
|
|
if (allocator->current_command_list)
|
2016-10-20 15:38:04 +01:00
|
|
|
d3d12_command_list_allocator_destroyed(allocator->current_command_list);
|
2016-09-27 11:13:37 +01:00
|
|
|
|
2018-09-12 14:19:55 +01:00
|
|
|
d3d12_command_allocator_free_resources(allocator, false);
|
2017-08-11 12:58:04 +01:00
|
|
|
vkd3d_free(allocator->buffer_views);
|
2018-08-21 17:08:01 +01:00
|
|
|
vkd3d_free(allocator->views);
|
2020-03-18 11:02:52 +00:00
|
|
|
for (i = 0; i < VKD3D_DESCRIPTOR_POOL_TYPE_COUNT; i++)
|
|
|
|
{
|
|
|
|
vkd3d_free(allocator->descriptor_pool_caches[i].descriptor_pools);
|
|
|
|
vkd3d_free(allocator->descriptor_pool_caches[i].free_descriptor_pools);
|
|
|
|
}
|
2017-08-11 12:58:04 +01:00
|
|
|
vkd3d_free(allocator->framebuffers);
|
|
|
|
vkd3d_free(allocator->passes);
|
|
|
|
|
|
|
|
/* All command buffers are implicitly freed when a pool is destroyed. */
|
|
|
|
vkd3d_free(allocator->command_buffers);
|
|
|
|
VK_CALL(vkDestroyCommandPool(device->vk_device, allocator->vk_command_pool, NULL));
|
|
|
|
|
2016-09-21 14:33:17 +01:00
|
|
|
vkd3d_free(allocator);
|
|
|
|
|
2019-06-07 13:38:03 +01:00
|
|
|
d3d12_device_release(device);
|
2016-09-21 14:33:17 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return refcount;
|
|
|
|
}
|
|
|
|
|
|
|
|
static HRESULT STDMETHODCALLTYPE d3d12_command_allocator_GetPrivateData(ID3D12CommandAllocator *iface,
|
|
|
|
REFGUID guid, UINT *data_size, void *data)
|
|
|
|
{
|
2019-01-03 13:23:03 +00:00
|
|
|
struct d3d12_command_allocator *allocator = impl_from_ID3D12CommandAllocator(iface);
|
2016-09-21 14:33:17 +01:00
|
|
|
|
2019-01-03 13:23:03 +00:00
|
|
|
TRACE("iface %p, guid %s, data_size %p, data %p.\n", iface, debugstr_guid(guid), data_size, data);
|
|
|
|
|
|
|
|
return vkd3d_get_private_data(&allocator->private_store, guid, data_size, data);
|
2016-09-21 14:33:17 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static HRESULT STDMETHODCALLTYPE d3d12_command_allocator_SetPrivateData(ID3D12CommandAllocator *iface,
|
|
|
|
REFGUID guid, UINT data_size, const void *data)
|
|
|
|
{
|
2019-01-03 13:23:03 +00:00
|
|
|
struct d3d12_command_allocator *allocator = impl_from_ID3D12CommandAllocator(iface);
|
2016-09-21 14:33:17 +01:00
|
|
|
|
2019-01-03 13:23:03 +00:00
|
|
|
TRACE("iface %p, guid %s, data_size %u, data %p.\n", iface, debugstr_guid(guid), data_size, data);
|
|
|
|
|
|
|
|
return vkd3d_set_private_data(&allocator->private_store, guid, data_size, data);
|
2016-09-21 14:33:17 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static HRESULT STDMETHODCALLTYPE d3d12_command_allocator_SetPrivateDataInterface(ID3D12CommandAllocator *iface,
|
|
|
|
REFGUID guid, const IUnknown *data)
|
|
|
|
{
|
2019-01-03 13:23:03 +00:00
|
|
|
struct d3d12_command_allocator *allocator = impl_from_ID3D12CommandAllocator(iface);
|
2016-09-21 14:33:17 +01:00
|
|
|
|
2019-01-03 13:23:03 +00:00
|
|
|
TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data);
|
|
|
|
|
|
|
|
return vkd3d_set_private_data_interface(&allocator->private_store, guid, data);
|
2016-09-21 14:33:17 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static HRESULT STDMETHODCALLTYPE d3d12_command_allocator_SetName(ID3D12CommandAllocator *iface, const WCHAR *name)
|
|
|
|
{
|
2017-08-02 09:45:06 +01:00
|
|
|
struct d3d12_command_allocator *allocator = impl_from_ID3D12CommandAllocator(iface);
|
|
|
|
|
2019-02-01 03:37:30 +00:00
|
|
|
TRACE("iface %p, name %s.\n", iface, debugstr_w(name, allocator->device->wchar_size));
|
2016-09-21 14:33:17 +01:00
|
|
|
|
2019-02-06 11:38:05 +00:00
|
|
|
return vkd3d_set_vk_object_name(allocator->device, (uint64_t)allocator->vk_command_pool,
|
|
|
|
VK_DEBUG_REPORT_OBJECT_TYPE_COMMAND_POOL_EXT, name);
|
2016-09-21 14:33:17 +01:00
|
|
|
}
|
|
|
|
|
2019-06-07 13:38:03 +01:00
|
|
|
static HRESULT STDMETHODCALLTYPE d3d12_command_allocator_GetDevice(ID3D12CommandAllocator *iface, REFIID iid, void **device)
|
2016-09-21 14:33:17 +01:00
|
|
|
{
|
|
|
|
struct d3d12_command_allocator *allocator = impl_from_ID3D12CommandAllocator(iface);
|
|
|
|
|
2019-06-07 13:38:03 +01:00
|
|
|
TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device);
|
2016-09-21 14:33:17 +01:00
|
|
|
|
2019-06-07 13:38:03 +01:00
|
|
|
return d3d12_device_query_interface(allocator->device, iid, device);
|
2016-09-21 14:33:17 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static HRESULT STDMETHODCALLTYPE d3d12_command_allocator_Reset(ID3D12CommandAllocator *iface)
|
|
|
|
{
|
2016-09-28 08:42:49 +01:00
|
|
|
struct d3d12_command_allocator *allocator = impl_from_ID3D12CommandAllocator(iface);
|
|
|
|
const struct vkd3d_vk_device_procs *vk_procs;
|
|
|
|
struct d3d12_command_list *list;
|
|
|
|
struct d3d12_device *device;
|
2020-04-17 14:10:07 +01:00
|
|
|
LONG pending;
|
2016-09-28 08:42:49 +01:00
|
|
|
VkResult vr;
|
2016-09-21 14:33:17 +01:00
|
|
|
|
2016-09-28 08:42:49 +01:00
|
|
|
TRACE("iface %p.\n", iface);
|
|
|
|
|
|
|
|
if ((list = allocator->current_command_list))
|
|
|
|
{
|
|
|
|
if (list->is_recording)
|
|
|
|
{
|
|
|
|
WARN("A command list using this allocator is in the recording state.\n");
|
|
|
|
return E_FAIL;
|
|
|
|
}
|
|
|
|
|
|
|
|
TRACE("Resetting command list %p.\n", list);
|
|
|
|
}
|
|
|
|
|
2020-04-17 14:31:51 +01:00
|
|
|
if ((pending = atomic_load_acquire(&allocator->outstanding_submissions_count)) != 0)
|
2020-04-17 14:10:07 +01:00
|
|
|
{
|
|
|
|
/* HACK: There are currently command lists waiting to be submitted to the queue in the submission threads.
|
|
|
|
* Buggy application, but work around this by not resetting the command pool this time.
|
|
|
|
* To be perfectly safe, we can only reset after the fence timeline is signalled,
|
|
|
|
* however, this is enough to workaround SotTR which resets the command list right
|
|
|
|
* after calling ID3D12CommandQueue::ExecuteCommandLists().
|
|
|
|
* Only happens once or twice on bootup and doesn't cause memory leaks over time
|
|
|
|
* since the command pool is eventually reset.
|
|
|
|
* Game does not seem to care if E_FAIL is returned, which is the correct thing to do here.
|
|
|
|
*
|
|
|
|
* TODO: Guard this with actual timeline semaphores from vkQueueSubmit(). */
|
|
|
|
ERR("There are still %u pending command lists awaiting execution from command allocator iface %p!\n",
|
|
|
|
(unsigned int)pending, iface);
|
|
|
|
return E_FAIL;
|
|
|
|
}
|
|
|
|
|
2016-09-28 08:42:49 +01:00
|
|
|
device = allocator->device;
|
|
|
|
vk_procs = &device->vk_procs;
|
|
|
|
|
2018-09-12 14:19:55 +01:00
|
|
|
d3d12_command_allocator_free_resources(allocator, true);
|
2017-08-11 12:58:04 +01:00
|
|
|
if (allocator->command_buffer_count)
|
|
|
|
{
|
|
|
|
VK_CALL(vkFreeCommandBuffers(device->vk_device, allocator->vk_command_pool,
|
|
|
|
allocator->command_buffer_count, allocator->command_buffers));
|
|
|
|
allocator->command_buffer_count = 0;
|
|
|
|
}
|
2016-10-20 15:38:04 +01:00
|
|
|
|
2019-10-01 15:01:54 +01:00
|
|
|
/* The intent here is to recycle memory, so do not use RELEASE_RESOURCES_BIT here. */
|
|
|
|
if ((vr = VK_CALL(vkResetCommandPool(device->vk_device, allocator->vk_command_pool, 0))))
|
2016-09-28 08:42:49 +01:00
|
|
|
{
|
|
|
|
WARN("Resetting command pool failed, vr %d.\n", vr);
|
|
|
|
return hresult_from_vk_result(vr);
|
|
|
|
}
|
|
|
|
|
|
|
|
return S_OK;
|
2016-09-21 14:33:17 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static const struct ID3D12CommandAllocatorVtbl d3d12_command_allocator_vtbl =
|
|
|
|
{
|
|
|
|
/* IUnknown methods */
|
|
|
|
d3d12_command_allocator_QueryInterface,
|
|
|
|
d3d12_command_allocator_AddRef,
|
|
|
|
d3d12_command_allocator_Release,
|
|
|
|
/* ID3D12Object methods */
|
|
|
|
d3d12_command_allocator_GetPrivateData,
|
|
|
|
d3d12_command_allocator_SetPrivateData,
|
|
|
|
d3d12_command_allocator_SetPrivateDataInterface,
|
|
|
|
d3d12_command_allocator_SetName,
|
|
|
|
/* ID3D12DeviceChild methods */
|
|
|
|
d3d12_command_allocator_GetDevice,
|
|
|
|
/* ID3D12CommandAllocator methods */
|
|
|
|
d3d12_command_allocator_Reset,
|
|
|
|
};
|
|
|
|
|
2020-04-14 12:42:43 +01:00
|
|
|
struct d3d12_command_allocator *unsafe_impl_from_ID3D12CommandAllocator(ID3D12CommandAllocator *iface)
|
2016-09-21 15:18:13 +01:00
|
|
|
{
|
|
|
|
if (!iface)
|
|
|
|
return NULL;
|
|
|
|
assert(iface->lpVtbl == &d3d12_command_allocator_vtbl);
|
|
|
|
return impl_from_ID3D12CommandAllocator(iface);
|
|
|
|
}
|
|
|
|
|
2019-03-28 16:07:26 +00:00
|
|
|
struct vkd3d_queue *d3d12_device_get_vkd3d_queue(struct d3d12_device *device,
|
2018-01-15 12:49:04 +00:00
|
|
|
D3D12_COMMAND_LIST_TYPE type)
|
|
|
|
{
|
|
|
|
switch (type)
|
|
|
|
{
|
|
|
|
case D3D12_COMMAND_LIST_TYPE_DIRECT:
|
|
|
|
return device->direct_queue;
|
|
|
|
case D3D12_COMMAND_LIST_TYPE_COMPUTE:
|
|
|
|
return device->compute_queue;
|
|
|
|
case D3D12_COMMAND_LIST_TYPE_COPY:
|
|
|
|
return device->copy_queue;
|
|
|
|
default:
|
|
|
|
FIXME("Unhandled command list type %#x.\n", type);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-09-27 11:13:37 +01:00
|
|
|
static HRESULT d3d12_command_allocator_init(struct d3d12_command_allocator *allocator,
|
2016-09-21 14:33:17 +01:00
|
|
|
struct d3d12_device *device, D3D12_COMMAND_LIST_TYPE type)
|
|
|
|
{
|
2016-09-27 11:13:37 +01:00
|
|
|
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
|
|
|
VkCommandPoolCreateInfo command_pool_info;
|
2018-01-15 12:49:04 +00:00
|
|
|
struct vkd3d_queue *queue;
|
2016-09-27 11:13:37 +01:00
|
|
|
VkResult vr;
|
2019-01-10 10:16:48 +00:00
|
|
|
HRESULT hr;
|
|
|
|
|
|
|
|
if (FAILED(hr = vkd3d_private_store_init(&allocator->private_store)))
|
|
|
|
return hr;
|
2016-09-27 11:13:37 +01:00
|
|
|
|
2018-01-15 12:49:04 +00:00
|
|
|
if (!(queue = d3d12_device_get_vkd3d_queue(device, type)))
|
|
|
|
queue = device->direct_queue;
|
|
|
|
|
2016-09-21 14:33:17 +01:00
|
|
|
allocator->ID3D12CommandAllocator_iface.lpVtbl = &d3d12_command_allocator_vtbl;
|
|
|
|
allocator->refcount = 1;
|
2020-04-17 14:10:07 +01:00
|
|
|
allocator->outstanding_submissions_count = 0;
|
2016-09-21 14:33:17 +01:00
|
|
|
allocator->type = type;
|
2018-09-27 12:30:51 +01:00
|
|
|
allocator->vk_queue_flags = queue->vk_queue_flags;
|
2016-09-21 14:33:17 +01:00
|
|
|
|
2016-09-27 11:13:37 +01:00
|
|
|
command_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
|
|
|
|
command_pool_info.pNext = NULL;
|
2019-10-01 15:01:55 +01:00
|
|
|
/* Do not use RESET_COMMAND_BUFFER_BIT. This allows the CommandPool to be a D3D12-style command pool.
|
|
|
|
* Memory is owned by the pool and CommandBuffers become lightweight handles,
|
|
|
|
* assuming a half-decent driver implementation. */
|
|
|
|
command_pool_info.flags = 0;
|
2018-01-15 12:49:04 +00:00
|
|
|
command_pool_info.queueFamilyIndex = queue->vk_family_index;
|
2016-09-27 11:13:37 +01:00
|
|
|
|
|
|
|
if ((vr = VK_CALL(vkCreateCommandPool(device->vk_device, &command_pool_info, NULL,
|
2016-10-11 12:43:04 +01:00
|
|
|
&allocator->vk_command_pool))) < 0)
|
2016-09-27 11:13:37 +01:00
|
|
|
{
|
|
|
|
WARN("Failed to create Vulkan command pool, vr %d.\n", vr);
|
2019-01-10 10:16:48 +00:00
|
|
|
vkd3d_private_store_destroy(&allocator->private_store);
|
2016-09-27 11:13:37 +01:00
|
|
|
return hresult_from_vk_result(vr);
|
|
|
|
}
|
|
|
|
|
2020-03-18 11:02:52 +00:00
|
|
|
memset(allocator->descriptor_pool_caches, 0, sizeof(allocator->descriptor_pool_caches));
|
2018-09-12 14:19:55 +01:00
|
|
|
|
2016-10-10 15:55:07 +01:00
|
|
|
allocator->passes = NULL;
|
|
|
|
allocator->passes_size = 0;
|
|
|
|
allocator->pass_count = 0;
|
|
|
|
|
|
|
|
allocator->framebuffers = NULL;
|
|
|
|
allocator->framebuffers_size = 0;
|
|
|
|
allocator->framebuffer_count = 0;
|
|
|
|
|
2016-11-03 19:20:38 +00:00
|
|
|
|
2018-08-21 17:08:01 +01:00
|
|
|
allocator->views = NULL;
|
|
|
|
allocator->views_size = 0;
|
|
|
|
allocator->view_count = 0;
|
|
|
|
|
2017-08-09 17:44:16 +01:00
|
|
|
allocator->buffer_views = NULL;
|
|
|
|
allocator->buffer_views_size = 0;
|
|
|
|
allocator->buffer_view_count = 0;
|
|
|
|
|
2016-10-20 15:38:04 +01:00
|
|
|
allocator->command_buffers = NULL;
|
|
|
|
allocator->command_buffers_size = 0;
|
|
|
|
allocator->command_buffer_count = 0;
|
|
|
|
|
2016-09-27 11:13:37 +01:00
|
|
|
allocator->current_command_list = NULL;
|
|
|
|
|
2019-06-07 13:38:03 +01:00
|
|
|
d3d12_device_add_ref(allocator->device = device);
|
2016-09-27 11:13:37 +01:00
|
|
|
|
|
|
|
return S_OK;
|
2016-09-21 14:33:17 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
HRESULT d3d12_command_allocator_create(struct d3d12_device *device,
|
|
|
|
D3D12_COMMAND_LIST_TYPE type, struct d3d12_command_allocator **allocator)
|
|
|
|
{
|
|
|
|
struct d3d12_command_allocator *object;
|
2016-09-27 11:13:37 +01:00
|
|
|
HRESULT hr;
|
2016-09-21 14:33:17 +01:00
|
|
|
|
|
|
|
if (!(D3D12_COMMAND_LIST_TYPE_DIRECT <= type && type <= D3D12_COMMAND_LIST_TYPE_COPY))
|
|
|
|
{
|
|
|
|
WARN("Invalid type %#x.\n", type);
|
|
|
|
return E_INVALIDARG;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!(object = vkd3d_malloc(sizeof(*object))))
|
|
|
|
return E_OUTOFMEMORY;
|
|
|
|
|
2016-09-27 11:13:37 +01:00
|
|
|
if (FAILED(hr = d3d12_command_allocator_init(object, device, type)))
|
|
|
|
{
|
|
|
|
vkd3d_free(object);
|
|
|
|
return hr;
|
|
|
|
}
|
2016-09-21 14:33:17 +01:00
|
|
|
|
|
|
|
TRACE("Created command allocator %p.\n", object);
|
|
|
|
|
|
|
|
*allocator = object;
|
|
|
|
|
|
|
|
return S_OK;
|
|
|
|
}
|
|
|
|
|
2016-09-21 15:18:13 +01:00
|
|
|
/* ID3D12CommandList */
|
2020-03-30 15:44:12 +01:00
|
|
|
static inline struct d3d12_command_list *impl_from_ID3D12GraphicsCommandList(d3d12_command_list_iface *iface)
|
2016-09-21 15:18:13 +01:00
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList_iface);
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2016-09-28 10:26:17 +01:00
|
|
|
static void d3d12_command_list_invalidate_current_framebuffer(struct d3d12_command_list *list)
|
|
|
|
{
|
|
|
|
list->current_framebuffer = VK_NULL_HANDLE;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void d3d12_command_list_invalidate_current_pipeline(struct d3d12_command_list *list)
|
|
|
|
{
|
|
|
|
list->current_pipeline = VK_NULL_HANDLE;
|
|
|
|
}
|
|
|
|
|
2020-04-26 16:29:39 +01:00
|
|
|
enum vkd3d_render_pass_transition_mode
|
|
|
|
{
|
|
|
|
VKD3D_RENDER_PASS_TRANSITION_MODE_BEGIN,
|
|
|
|
VKD3D_RENDER_PASS_TRANSITION_MODE_END,
|
|
|
|
};
|
|
|
|
|
|
|
|
static VkPipelineStageFlags vk_render_pass_barrier_from_view(const struct vkd3d_view *view, const struct d3d12_resource *resource,
|
|
|
|
enum vkd3d_render_pass_transition_mode mode, VkImageMemoryBarrier *vk_barrier)
|
|
|
|
{
|
|
|
|
VkPipelineStageFlags stages;
|
|
|
|
VkAccessFlags access;
|
|
|
|
|
|
|
|
if (view->format->vk_aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT)
|
|
|
|
{
|
|
|
|
stages = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
|
|
|
access = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
stages = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
|
|
|
|
access = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
|
|
|
|
}
|
|
|
|
|
|
|
|
vk_barrier->sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
|
|
|
|
vk_barrier->pNext = NULL;
|
|
|
|
|
|
|
|
if (mode == VKD3D_RENDER_PASS_TRANSITION_MODE_BEGIN)
|
|
|
|
{
|
|
|
|
vk_barrier->srcAccessMask = 0;
|
|
|
|
vk_barrier->dstAccessMask = access;
|
|
|
|
vk_barrier->oldLayout = resource->common_layout;
|
|
|
|
vk_barrier->newLayout = view->info.texture.vk_layout;
|
|
|
|
}
|
|
|
|
else /* if (mode == VKD3D_RENDER_PASS_TRANSITION_MODE_END) */
|
|
|
|
{
|
|
|
|
vk_barrier->srcAccessMask = access;
|
|
|
|
vk_barrier->dstAccessMask = 0;
|
|
|
|
vk_barrier->oldLayout = view->info.texture.vk_layout;
|
|
|
|
vk_barrier->newLayout = resource->common_layout;
|
|
|
|
}
|
|
|
|
|
|
|
|
vk_barrier->srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
|
|
|
vk_barrier->dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
|
|
|
vk_barrier->image = resource->u.vk_image;
|
|
|
|
vk_barrier->subresourceRange = vk_subresource_range_from_view(view);
|
|
|
|
return stages;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void d3d12_command_list_emit_render_pass_transition(struct d3d12_command_list *list,
|
|
|
|
enum vkd3d_render_pass_transition_mode mode)
|
|
|
|
{
|
|
|
|
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
|
|
|
|
VkImageMemoryBarrier vk_image_barriers[D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT + 1];
|
|
|
|
VkPipelineStageFlags stage_mask = 0;
|
|
|
|
struct d3d12_dsv_desc *dsv;
|
|
|
|
uint32_t i, j;
|
|
|
|
|
|
|
|
for (i = 0, j = 0; i < D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT; i++)
|
|
|
|
{
|
|
|
|
struct d3d12_rtv_desc *rtv = &list->rtvs[i];
|
|
|
|
|
|
|
|
if (!rtv->view)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
stage_mask |= vk_render_pass_barrier_from_view(rtv->view,
|
|
|
|
rtv->resource, mode, &vk_image_barriers[j++]);
|
|
|
|
}
|
|
|
|
|
|
|
|
dsv = &list->dsv;
|
|
|
|
|
|
|
|
if (dsv->view)
|
|
|
|
{
|
|
|
|
stage_mask |= vk_render_pass_barrier_from_view(dsv->view,
|
|
|
|
dsv->resource, mode, &vk_image_barriers[j++]);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!j)
|
|
|
|
return;
|
|
|
|
|
|
|
|
VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer,
|
|
|
|
stage_mask, stage_mask, 0, 0, NULL, 0, NULL,
|
|
|
|
j, vk_image_barriers));
|
|
|
|
}
|
|
|
|
|
2018-08-23 17:33:04 +01:00
|
|
|
static void d3d12_command_list_end_current_render_pass(struct d3d12_command_list *list)
|
2018-08-15 12:58:00 +01:00
|
|
|
{
|
2018-08-23 17:33:04 +01:00
|
|
|
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
|
|
|
|
|
2019-01-14 16:05:45 +00:00
|
|
|
if (list->xfb_enabled)
|
|
|
|
{
|
|
|
|
VK_CALL(vkCmdEndTransformFeedbackEXT(list->vk_command_buffer, 0, ARRAY_SIZE(list->so_counter_buffers),
|
|
|
|
list->so_counter_buffers, list->so_counter_buffer_offsets));
|
|
|
|
}
|
|
|
|
|
2018-08-15 12:58:00 +01:00
|
|
|
if (list->current_render_pass)
|
2020-04-26 16:29:39 +01:00
|
|
|
{
|
2018-08-15 12:58:00 +01:00
|
|
|
VK_CALL(vkCmdEndRenderPass(list->vk_command_buffer));
|
2020-04-26 16:29:39 +01:00
|
|
|
d3d12_command_list_emit_render_pass_transition(list, VKD3D_RENDER_PASS_TRANSITION_MODE_END);
|
|
|
|
}
|
2018-08-15 12:58:00 +01:00
|
|
|
|
|
|
|
list->current_render_pass = VK_NULL_HANDLE;
|
2019-01-14 16:05:46 +00:00
|
|
|
|
|
|
|
if (list->xfb_enabled)
|
|
|
|
{
|
|
|
|
VkMemoryBarrier vk_barrier;
|
|
|
|
|
|
|
|
/* We need a barrier between pause and resume. */
|
|
|
|
vk_barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
|
|
|
|
vk_barrier.pNext = NULL;
|
|
|
|
vk_barrier.srcAccessMask = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT;
|
|
|
|
vk_barrier.dstAccessMask = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT;
|
|
|
|
VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer,
|
|
|
|
VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, 0,
|
|
|
|
1, &vk_barrier, 0, NULL, 0, NULL));
|
|
|
|
|
|
|
|
list->xfb_enabled = false;
|
|
|
|
}
|
2018-08-15 12:58:00 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void d3d12_command_list_invalidate_current_render_pass(struct d3d12_command_list *list)
|
|
|
|
{
|
2018-08-23 17:33:04 +01:00
|
|
|
d3d12_command_list_end_current_render_pass(list);
|
2018-08-15 12:58:00 +01:00
|
|
|
}
|
|
|
|
|
2019-11-11 16:03:39 +00:00
|
|
|
static void d3d12_command_list_invalidate_root_parameters(struct d3d12_command_list *list,
|
2020-03-07 10:57:13 +00:00
|
|
|
VkPipelineBindPoint bind_point, bool invalidate_descriptor_heaps)
|
2019-11-11 16:03:39 +00:00
|
|
|
{
|
|
|
|
struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point];
|
|
|
|
|
|
|
|
if (!bindings->root_signature)
|
|
|
|
return;
|
|
|
|
|
2020-03-10 16:05:56 +00:00
|
|
|
/* Previously dirty states may no longer be dirty
|
|
|
|
* if the new root signature does not use them */
|
|
|
|
bindings->dirty_flags = 0;
|
|
|
|
|
|
|
|
if (bindings->static_sampler_set)
|
|
|
|
bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_STATIC_SAMPLER_SET;
|
|
|
|
|
|
|
|
if (bindings->root_signature->vk_packed_descriptor_layout)
|
|
|
|
bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_PACKED_DESCRIPTOR_SET;
|
2020-03-03 17:56:10 +00:00
|
|
|
|
2020-03-10 16:14:20 +00:00
|
|
|
if (bindings->root_signature->descriptor_table_count)
|
|
|
|
bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_DESCRIPTOR_TABLE_OFFSETS;
|
|
|
|
|
2020-03-26 20:25:43 +00:00
|
|
|
if (bindings->root_signature->flags & VKD3D_ROOT_SIGNATURE_USE_BINDLESS_UAV_COUNTERS)
|
|
|
|
bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_UAV_COUNTER_BINDING;
|
|
|
|
|
2020-03-03 16:03:51 +00:00
|
|
|
bindings->root_descriptor_dirty_mask = bindings->root_signature->root_descriptor_mask;
|
2020-03-03 17:56:10 +00:00
|
|
|
bindings->root_constant_dirty_mask = bindings->root_signature->root_constant_mask;
|
2020-03-03 16:03:51 +00:00
|
|
|
|
2020-03-07 10:57:13 +00:00
|
|
|
if (invalidate_descriptor_heaps)
|
|
|
|
{
|
|
|
|
struct d3d12_device *device = bindings->root_signature->device;
|
|
|
|
bindings->descriptor_heap_dirty_mask = (1ull << device->bindless_state.set_count) - 1;
|
|
|
|
}
|
2019-11-11 16:03:39 +00:00
|
|
|
}
|
|
|
|
|
2020-04-26 17:17:01 +01:00
|
|
|
static void vk_access_and_stage_flags_from_d3d12_resource_state(const struct d3d12_device *device,
|
|
|
|
const struct d3d12_resource *resource, uint32_t state_mask, VkQueueFlags vk_queue_flags,
|
|
|
|
VkPipelineStageFlags *stages, VkAccessFlags *access)
|
|
|
|
{
|
|
|
|
VkPipelineStageFlags queue_shader_stages = 0;
|
|
|
|
uint32_t unhandled_state = 0;
|
|
|
|
|
|
|
|
if (vk_queue_flags & VK_QUEUE_GRAPHICS_BIT)
|
|
|
|
{
|
|
|
|
queue_shader_stages |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
|
|
|
|
VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT |
|
|
|
|
VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT |
|
|
|
|
VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT |
|
|
|
|
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (vk_queue_flags & VK_QUEUE_COMPUTE_BIT)
|
|
|
|
queue_shader_stages |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
|
|
|
|
|
|
|
|
if (state_mask == D3D12_RESOURCE_STATE_COMMON)
|
|
|
|
{
|
|
|
|
*stages |= VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
|
|
|
|
*access |= VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
while (state_mask)
|
|
|
|
{
|
|
|
|
uint32_t state = state_mask & -state_mask;
|
|
|
|
|
|
|
|
switch (state)
|
|
|
|
{
|
|
|
|
case D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER:
|
|
|
|
*stages |= queue_shader_stages;
|
|
|
|
*access |= VK_ACCESS_UNIFORM_READ_BIT;
|
|
|
|
|
|
|
|
if (device->bindless_state.flags & VKD3D_BINDLESS_CBV_AS_SSBO)
|
|
|
|
*access |= VK_ACCESS_SHADER_READ_BIT;
|
|
|
|
|
|
|
|
if (vk_queue_flags & VK_QUEUE_GRAPHICS_BIT)
|
|
|
|
{
|
|
|
|
*stages |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
|
|
|
|
*access |= VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case D3D12_RESOURCE_STATE_INDEX_BUFFER:
|
|
|
|
*stages |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
|
|
|
|
*access |= VK_ACCESS_INDEX_READ_BIT;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case D3D12_RESOURCE_STATE_RENDER_TARGET:
|
|
|
|
*stages |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case D3D12_RESOURCE_STATE_UNORDERED_ACCESS:
|
|
|
|
*stages |= queue_shader_stages;
|
|
|
|
*access |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case D3D12_RESOURCE_STATE_DEPTH_WRITE:
|
|
|
|
case D3D12_RESOURCE_STATE_DEPTH_READ:
|
|
|
|
*stages |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE:
|
|
|
|
*stages |= queue_shader_stages & ~VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
|
|
|
|
*access |= VK_ACCESS_SHADER_READ_BIT;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE:
|
|
|
|
*stages |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
|
|
|
|
*access |= VK_ACCESS_SHADER_READ_BIT;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case D3D12_RESOURCE_STATE_STREAM_OUT:
|
|
|
|
*stages |= VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT;
|
|
|
|
*access |= VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT | VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT |
|
|
|
|
VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT:
|
|
|
|
*stages |= VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT;
|
|
|
|
*access |= VK_ACCESS_INDIRECT_COMMAND_READ_BIT;
|
|
|
|
|
|
|
|
if (device->vk_info.EXT_conditional_rendering)
|
|
|
|
{
|
|
|
|
*stages |= VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT;
|
|
|
|
*access |= VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case D3D12_RESOURCE_STATE_COPY_DEST:
|
|
|
|
*stages |= VK_PIPELINE_STAGE_TRANSFER_BIT;
|
|
|
|
if (d3d12_resource_is_buffer(resource))
|
|
|
|
*access |= VK_ACCESS_TRANSFER_WRITE_BIT;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case D3D12_RESOURCE_STATE_COPY_SOURCE:
|
|
|
|
*stages |= VK_PIPELINE_STAGE_TRANSFER_BIT;
|
|
|
|
if (d3d12_resource_is_buffer(resource))
|
|
|
|
*access |= VK_ACCESS_TRANSFER_READ_BIT;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case D3D12_RESOURCE_STATE_RESOLVE_DEST:
|
|
|
|
case D3D12_RESOURCE_STATE_RESOLVE_SOURCE:
|
|
|
|
*stages |= VK_PIPELINE_STAGE_TRANSFER_BIT;
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
unhandled_state |= state;
|
|
|
|
}
|
|
|
|
|
|
|
|
state_mask &= ~state;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (unhandled_state)
|
|
|
|
FIXME("Unhandled resource state %#x.\n", unhandled_state);
|
|
|
|
}
|
|
|
|
|
2016-10-25 12:23:18 +01:00
|
|
|
static void d3d12_command_list_transition_resource_to_initial_state(struct d3d12_command_list *list,
|
|
|
|
struct d3d12_resource *resource)
|
|
|
|
{
|
|
|
|
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
|
2020-04-26 17:17:18 +01:00
|
|
|
VkPipelineStageFlags dst_stage_mask = 0;
|
2016-10-25 12:23:18 +01:00
|
|
|
const struct vkd3d_format *format;
|
|
|
|
VkImageMemoryBarrier barrier;
|
|
|
|
|
2017-08-30 17:31:52 +01:00
|
|
|
assert(d3d12_resource_is_texture(resource));
|
2016-10-25 12:23:18 +01:00
|
|
|
|
2019-05-07 14:37:01 +01:00
|
|
|
if (!(format = vkd3d_format_from_d3d12_resource_desc(list->device, &resource->desc, 0)))
|
2016-10-25 12:23:18 +01:00
|
|
|
{
|
|
|
|
ERR("Resource %p has invalid format %#x.\n", resource, resource->desc.Format);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
|
|
|
|
barrier.pNext = NULL;
|
2017-10-06 16:06:31 +01:00
|
|
|
barrier.srcAccessMask = 0;
|
2020-04-26 17:17:18 +01:00
|
|
|
barrier.dstAccessMask = 0;
|
|
|
|
barrier.oldLayout = d3d12_resource_is_cpu_accessible(resource)
|
|
|
|
? VK_IMAGE_LAYOUT_PREINITIALIZED : VK_IMAGE_LAYOUT_UNDEFINED;
|
|
|
|
barrier.newLayout = resource->common_layout;
|
2016-10-25 12:23:18 +01:00
|
|
|
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
|
|
|
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
|
|
|
barrier.image = resource->u.vk_image;
|
|
|
|
barrier.subresourceRange.aspectMask = format->vk_aspect_mask;
|
|
|
|
barrier.subresourceRange.baseMipLevel = 0;
|
|
|
|
barrier.subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS;
|
|
|
|
barrier.subresourceRange.baseArrayLayer = 0;
|
|
|
|
barrier.subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS;
|
|
|
|
|
2020-04-26 17:17:18 +01:00
|
|
|
vk_access_and_stage_flags_from_d3d12_resource_state(list->device, resource,
|
|
|
|
resource->initial_state, list->vk_queue_flags, &dst_stage_mask, &barrier.dstAccessMask);
|
|
|
|
|
2016-10-25 12:23:18 +01:00
|
|
|
TRACE("Initial state %#x transition for resource %p (old layout %#x, new layout %#x).\n",
|
|
|
|
resource->initial_state, resource, barrier.oldLayout, barrier.newLayout);
|
|
|
|
|
2020-04-26 17:17:18 +01:00
|
|
|
VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer,
|
|
|
|
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, dst_stage_mask,
|
|
|
|
0, 0, NULL, 0, NULL, 1, &barrier));
|
2016-10-25 12:23:18 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void d3d12_command_list_track_resource_usage(struct d3d12_command_list *list,
|
|
|
|
struct d3d12_resource *resource)
|
|
|
|
{
|
|
|
|
if (resource->flags & VKD3D_RESOURCE_INITIAL_STATE_TRANSITION)
|
|
|
|
{
|
2018-08-23 17:33:04 +01:00
|
|
|
d3d12_command_list_end_current_render_pass(list);
|
2018-08-15 12:58:00 +01:00
|
|
|
|
2016-10-25 12:23:18 +01:00
|
|
|
d3d12_command_list_transition_resource_to_initial_state(list, resource);
|
|
|
|
resource->flags &= ~VKD3D_RESOURCE_INITIAL_STATE_TRANSITION;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(d3d12_command_list_iface *iface,
|
2019-03-07 10:01:15 +00:00
|
|
|
REFIID iid, void **object)
|
2016-09-21 15:18:13 +01:00
|
|
|
{
|
2019-03-07 10:01:15 +00:00
|
|
|
TRACE("iface %p, iid %s, object %p.\n", iface, debugstr_guid(iid), object);
|
2016-09-21 15:18:13 +01:00
|
|
|
|
2020-04-14 11:46:59 +01:00
|
|
|
if (IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList)
|
2019-12-06 19:03:17 +00:00
|
|
|
|| IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList1)
|
2020-04-14 11:46:59 +01:00
|
|
|
|| IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList2)
|
|
|
|
|| IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList3)
|
2020-04-15 07:59:46 +01:00
|
|
|
|| IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList4)
|
2020-04-15 13:11:03 +01:00
|
|
|
|| IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList5)
|
2019-03-07 10:01:15 +00:00
|
|
|
|| IsEqualGUID(iid, &IID_ID3D12CommandList)
|
|
|
|
|| IsEqualGUID(iid, &IID_ID3D12DeviceChild)
|
|
|
|
|| IsEqualGUID(iid, &IID_ID3D12Object)
|
|
|
|
|| IsEqualGUID(iid, &IID_IUnknown))
|
2016-09-21 15:18:13 +01:00
|
|
|
{
|
2020-04-14 11:46:59 +01:00
|
|
|
ID3D12GraphicsCommandList_AddRef(iface);
|
2016-09-21 15:18:13 +01:00
|
|
|
*object = iface;
|
|
|
|
return S_OK;
|
|
|
|
}
|
|
|
|
|
2019-03-07 10:01:15 +00:00
|
|
|
WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(iid));
|
2016-09-21 15:18:13 +01:00
|
|
|
|
|
|
|
*object = NULL;
|
|
|
|
return E_NOINTERFACE;
|
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static ULONG STDMETHODCALLTYPE d3d12_command_list_AddRef(d3d12_command_list_iface *iface)
|
2016-09-21 15:18:13 +01:00
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2016-09-21 15:18:13 +01:00
|
|
|
ULONG refcount = InterlockedIncrement(&list->refcount);
|
|
|
|
|
|
|
|
TRACE("%p increasing refcount to %u.\n", list, refcount);
|
|
|
|
|
|
|
|
return refcount;
|
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(d3d12_command_list_iface *iface)
|
2016-09-21 15:18:13 +01:00
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2016-09-21 15:18:13 +01:00
|
|
|
ULONG refcount = InterlockedDecrement(&list->refcount);
|
2020-03-05 10:23:42 +00:00
|
|
|
unsigned int i;
|
2016-09-21 15:18:13 +01:00
|
|
|
|
|
|
|
TRACE("%p decreasing refcount to %u.\n", list, refcount);
|
|
|
|
|
|
|
|
if (!refcount)
|
|
|
|
{
|
|
|
|
struct d3d12_device *device = list->device;
|
|
|
|
|
2019-01-03 13:23:04 +00:00
|
|
|
vkd3d_private_store_destroy(&list->private_store);
|
|
|
|
|
2016-09-27 11:13:37 +01:00
|
|
|
/* When command pool is destroyed, all command buffers are implicitly freed. */
|
|
|
|
if (list->allocator)
|
2016-10-20 15:38:04 +01:00
|
|
|
d3d12_command_allocator_free_command_buffer(list->allocator, list);
|
2016-09-27 11:13:37 +01:00
|
|
|
|
2020-03-05 10:23:42 +00:00
|
|
|
for (i = 0; i < ARRAY_SIZE(list->packed_descriptors); i++)
|
|
|
|
{
|
|
|
|
struct vkd3d_descriptor_updates *updates = &list->packed_descriptors[i];
|
|
|
|
vkd3d_free(updates->descriptors);
|
|
|
|
vkd3d_free(updates->descriptor_writes);
|
|
|
|
}
|
2016-09-21 15:18:13 +01:00
|
|
|
vkd3d_free(list);
|
|
|
|
|
2019-06-07 13:38:03 +01:00
|
|
|
d3d12_device_release(device);
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return refcount;
|
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetPrivateData(d3d12_command_list_iface *iface,
|
2016-09-21 15:18:13 +01:00
|
|
|
REFGUID guid, UINT *data_size, void *data)
|
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2016-09-21 15:18:13 +01:00
|
|
|
|
2019-01-03 13:23:04 +00:00
|
|
|
TRACE("iface %p, guid %s, data_size %p, data %p.\n", iface, debugstr_guid(guid), data_size, data);
|
|
|
|
|
|
|
|
return vkd3d_get_private_data(&list->private_store, guid, data_size, data);
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateData(d3d12_command_list_iface *iface,
|
2016-09-21 15:18:13 +01:00
|
|
|
REFGUID guid, UINT data_size, const void *data)
|
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2016-09-21 15:18:13 +01:00
|
|
|
|
2019-01-03 13:23:04 +00:00
|
|
|
TRACE("iface %p, guid %s, data_size %u, data %p.\n", iface, debugstr_guid(guid), data_size, data);
|
|
|
|
|
|
|
|
return vkd3d_set_private_data(&list->private_store, guid, data_size, data);
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateDataInterface(d3d12_command_list_iface *iface,
|
2016-09-21 15:18:13 +01:00
|
|
|
REFGUID guid, const IUnknown *data)
|
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2016-09-21 15:18:13 +01:00
|
|
|
|
2019-01-03 13:23:04 +00:00
|
|
|
TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data);
|
|
|
|
|
|
|
|
return vkd3d_set_private_data_interface(&list->private_store, guid, data);
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetName(d3d12_command_list_iface *iface, const WCHAR *name)
|
2016-09-21 15:18:13 +01:00
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2017-08-02 09:45:06 +01:00
|
|
|
|
2019-02-01 03:36:59 +00:00
|
|
|
TRACE("iface %p, name %s.\n", iface, debugstr_w(name, list->device->wchar_size));
|
2016-09-21 15:18:13 +01:00
|
|
|
|
2019-02-01 03:36:59 +00:00
|
|
|
return name ? S_OK : E_INVALIDARG;
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetDevice(d3d12_command_list_iface *iface, REFIID iid, void **device)
|
2016-09-21 15:18:13 +01:00
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2016-09-21 15:18:13 +01:00
|
|
|
|
2019-06-07 13:38:03 +01:00
|
|
|
TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device);
|
2016-09-21 15:18:13 +01:00
|
|
|
|
2019-06-07 13:38:03 +01:00
|
|
|
return d3d12_device_query_interface(list->device, iid, device);
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static D3D12_COMMAND_LIST_TYPE STDMETHODCALLTYPE d3d12_command_list_GetType(d3d12_command_list_iface *iface)
|
2016-09-21 15:18:13 +01:00
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2016-09-21 15:18:13 +01:00
|
|
|
|
|
|
|
TRACE("iface %p.\n", iface);
|
|
|
|
|
|
|
|
return list->type;
|
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(d3d12_command_list_iface *iface)
|
2016-09-21 15:18:13 +01:00
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2016-09-27 11:13:37 +01:00
|
|
|
const struct vkd3d_vk_device_procs *vk_procs;
|
|
|
|
VkResult vr;
|
2016-09-21 15:18:13 +01:00
|
|
|
|
2016-09-27 11:13:37 +01:00
|
|
|
TRACE("iface %p.\n", iface);
|
|
|
|
|
2016-09-28 08:42:49 +01:00
|
|
|
if (!list->is_recording)
|
|
|
|
{
|
|
|
|
WARN("Command list is not in the recording state.\n");
|
|
|
|
return E_FAIL;
|
|
|
|
}
|
|
|
|
|
2016-09-27 11:13:37 +01:00
|
|
|
vk_procs = &list->device->vk_procs;
|
2018-08-15 12:58:00 +01:00
|
|
|
|
2018-08-23 17:33:04 +01:00
|
|
|
d3d12_command_list_end_current_render_pass(list);
|
2019-06-18 09:07:50 +01:00
|
|
|
if (list->is_predicated)
|
|
|
|
VK_CALL(vkCmdEndConditionalRenderingEXT(list->vk_command_buffer));
|
2018-08-15 12:58:00 +01:00
|
|
|
|
2016-10-11 12:43:04 +01:00
|
|
|
if ((vr = VK_CALL(vkEndCommandBuffer(list->vk_command_buffer))) < 0)
|
2016-09-28 08:42:49 +01:00
|
|
|
{
|
|
|
|
WARN("Failed to end command buffer, vr %d.\n", vr);
|
|
|
|
return hresult_from_vk_result(vr);
|
|
|
|
}
|
|
|
|
|
2018-01-09 12:13:10 +00:00
|
|
|
if (list->allocator)
|
|
|
|
{
|
|
|
|
d3d12_command_allocator_free_command_buffer(list->allocator, list);
|
|
|
|
list->allocator = NULL;
|
|
|
|
}
|
|
|
|
|
2016-10-19 15:39:48 +01:00
|
|
|
list->is_recording = false;
|
|
|
|
|
|
|
|
if (!list->is_valid)
|
|
|
|
{
|
|
|
|
WARN("Error occurred during command list recording.\n");
|
|
|
|
return E_INVALIDARG;
|
|
|
|
}
|
|
|
|
|
2016-09-28 08:42:49 +01:00
|
|
|
return S_OK;
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2018-09-12 14:20:00 +01:00
|
|
|
static void d3d12_command_list_reset_state(struct d3d12_command_list *list,
|
|
|
|
ID3D12PipelineState *initial_pipeline_state)
|
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
d3d12_command_list_iface *iface = &list->ID3D12GraphicsCommandList_iface;
|
2018-09-12 14:20:00 +01:00
|
|
|
|
2018-12-03 10:31:30 +00:00
|
|
|
list->index_buffer_format = DXGI_FORMAT_UNKNOWN;
|
|
|
|
|
2019-05-17 09:39:11 +01:00
|
|
|
memset(list->rtvs, 0, sizeof(list->rtvs));
|
2020-04-26 15:58:38 +01:00
|
|
|
memset(&list->dsv, 0, sizeof(list->dsv));
|
2018-09-12 14:20:00 +01:00
|
|
|
list->fb_width = 0;
|
|
|
|
list->fb_height = 0;
|
|
|
|
list->fb_layer_count = 0;
|
|
|
|
|
2019-01-14 16:05:45 +00:00
|
|
|
list->xfb_enabled = false;
|
|
|
|
|
2019-06-15 20:24:46 +01:00
|
|
|
list->is_predicated = false;
|
|
|
|
|
2018-09-12 14:20:00 +01:00
|
|
|
list->current_framebuffer = VK_NULL_HANDLE;
|
|
|
|
list->current_pipeline = VK_NULL_HANDLE;
|
2019-05-15 12:17:56 +01:00
|
|
|
list->pso_render_pass = VK_NULL_HANDLE;
|
2018-09-12 14:20:00 +01:00
|
|
|
list->current_render_pass = VK_NULL_HANDLE;
|
2020-03-26 20:25:43 +00:00
|
|
|
list->uav_counter_address_buffer = VK_NULL_HANDLE;
|
2018-09-12 14:20:00 +01:00
|
|
|
|
2020-04-01 11:09:51 +01:00
|
|
|
memset(&list->dynamic_state, 0, sizeof(list->dynamic_state));
|
|
|
|
list->dynamic_state.blend_constants[0] = D3D12_DEFAULT_BLEND_FACTOR_RED;
|
|
|
|
list->dynamic_state.blend_constants[1] = D3D12_DEFAULT_BLEND_FACTOR_GREEN;
|
|
|
|
list->dynamic_state.blend_constants[2] = D3D12_DEFAULT_BLEND_FACTOR_BLUE;
|
|
|
|
list->dynamic_state.blend_constants[3] = D3D12_DEFAULT_BLEND_FACTOR_ALPHA;
|
2020-04-01 14:25:25 +01:00
|
|
|
|
|
|
|
list->dynamic_state.min_depth_bounds = 0.0f;
|
|
|
|
list->dynamic_state.max_depth_bounds = 1.0f;
|
|
|
|
|
2020-04-01 13:30:22 +01:00
|
|
|
list->dynamic_state.primitive_topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST;
|
2020-04-01 11:09:51 +01:00
|
|
|
|
2018-09-12 14:20:00 +01:00
|
|
|
memset(list->pipeline_bindings, 0, sizeof(list->pipeline_bindings));
|
2020-03-06 20:38:26 +00:00
|
|
|
memset(list->descriptor_heaps, 0, sizeof(list->descriptor_heaps));
|
2018-09-12 14:20:00 +01:00
|
|
|
|
|
|
|
list->state = NULL;
|
|
|
|
|
2020-03-10 15:58:08 +00:00
|
|
|
list->descriptor_updates_count = 0;
|
|
|
|
|
2019-01-14 16:05:40 +00:00
|
|
|
memset(list->so_counter_buffers, 0, sizeof(list->so_counter_buffers));
|
|
|
|
memset(list->so_counter_buffer_offsets, 0, sizeof(list->so_counter_buffer_offsets));
|
|
|
|
|
2020-04-14 11:46:59 +01:00
|
|
|
ID3D12GraphicsCommandList_SetPipelineState(iface, initial_pipeline_state);
|
2018-09-12 14:20:00 +01:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static HRESULT STDMETHODCALLTYPE d3d12_command_list_Reset(d3d12_command_list_iface *iface,
|
2018-09-12 14:20:00 +01:00
|
|
|
ID3D12CommandAllocator *allocator, ID3D12PipelineState *initial_pipeline_state)
|
2016-09-21 15:18:13 +01:00
|
|
|
{
|
2016-09-28 08:42:49 +01:00
|
|
|
struct d3d12_command_allocator *allocator_impl = unsafe_impl_from_ID3D12CommandAllocator(allocator);
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2016-09-28 08:42:49 +01:00
|
|
|
HRESULT hr;
|
|
|
|
|
2018-09-12 14:20:00 +01:00
|
|
|
TRACE("iface %p, allocator %p, initial_pipeline_state %p.\n",
|
|
|
|
iface, allocator, initial_pipeline_state);
|
2016-09-21 15:18:13 +01:00
|
|
|
|
2016-09-28 08:42:49 +01:00
|
|
|
if (!allocator_impl)
|
|
|
|
{
|
|
|
|
WARN("Command allocator is NULL.\n");
|
|
|
|
return E_INVALIDARG;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (list->is_recording)
|
|
|
|
{
|
|
|
|
WARN("Command list is in the recording state.\n");
|
|
|
|
return E_FAIL;
|
|
|
|
}
|
|
|
|
|
2016-10-20 15:38:04 +01:00
|
|
|
if (SUCCEEDED(hr = d3d12_command_allocator_allocate_command_buffer(allocator_impl, list)))
|
|
|
|
{
|
|
|
|
list->allocator = allocator_impl;
|
2018-09-12 14:20:00 +01:00
|
|
|
d3d12_command_list_reset_state(list, initial_pipeline_state);
|
2016-10-20 15:38:04 +01:00
|
|
|
}
|
2016-09-28 08:42:49 +01:00
|
|
|
|
2016-10-20 15:38:04 +01:00
|
|
|
return hr;
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static HRESULT STDMETHODCALLTYPE d3d12_command_list_ClearState(d3d12_command_list_iface *iface,
|
2016-09-21 15:18:13 +01:00
|
|
|
ID3D12PipelineState *pipeline_state)
|
|
|
|
{
|
|
|
|
FIXME("iface %p, pipline_state %p stub!\n", iface, pipeline_state);
|
|
|
|
|
|
|
|
return E_NOTIMPL;
|
|
|
|
}
|
|
|
|
|
2019-05-17 09:39:11 +01:00
|
|
|
static bool d3d12_command_list_has_depth_stencil_view(struct d3d12_command_list *list)
|
|
|
|
{
|
|
|
|
struct d3d12_graphics_pipeline_state *graphics;
|
|
|
|
|
|
|
|
assert(d3d12_pipeline_state_is_graphics(list->state));
|
|
|
|
graphics = &list->state->u.graphics;
|
|
|
|
|
2020-04-26 16:02:09 +01:00
|
|
|
return graphics->dsv_format || (d3d12_pipeline_state_has_unknown_dsv_format(list->state) && list->dsv.format);
|
2019-05-17 09:39:11 +01:00
|
|
|
}
|
|
|
|
|
2017-08-18 16:08:56 +01:00
|
|
|
static void d3d12_command_list_get_fb_extent(struct d3d12_command_list *list,
|
2018-06-26 13:41:51 +01:00
|
|
|
uint32_t *width, uint32_t *height, uint32_t *layer_count)
|
2017-08-18 16:08:56 +01:00
|
|
|
{
|
2019-05-17 09:39:11 +01:00
|
|
|
struct d3d12_graphics_pipeline_state *graphics = &list->state->u.graphics;
|
2017-08-18 16:08:56 +01:00
|
|
|
struct d3d12_device *device = list->device;
|
|
|
|
|
2019-05-17 09:39:11 +01:00
|
|
|
if (graphics->rt_count || d3d12_command_list_has_depth_stencil_view(list))
|
2017-10-24 11:10:48 +01:00
|
|
|
{
|
|
|
|
*width = list->fb_width;
|
|
|
|
*height = list->fb_height;
|
2018-06-26 13:41:51 +01:00
|
|
|
if (layer_count)
|
|
|
|
*layer_count = list->fb_layer_count;
|
2017-10-24 11:10:48 +01:00
|
|
|
}
|
|
|
|
else
|
2017-08-18 16:08:56 +01:00
|
|
|
{
|
|
|
|
*width = device->vk_info.device_limits.maxFramebufferWidth;
|
|
|
|
*height = device->vk_info.device_limits.maxFramebufferHeight;
|
2018-06-26 13:41:51 +01:00
|
|
|
if (layer_count)
|
|
|
|
*layer_count = 1;
|
2017-08-18 16:08:56 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-22 10:34:27 +01:00
|
|
|
static bool d3d12_command_list_create_framebuffer(struct d3d12_command_list *list, VkRenderPass render_pass,
|
|
|
|
uint32_t view_count, const VkImageView *views, VkExtent3D extent, VkFramebuffer *vk_framebuffer)
|
2016-09-28 10:26:17 +01:00
|
|
|
{
|
2017-08-18 16:08:56 +01:00
|
|
|
struct d3d12_device *device = list->device;
|
2018-08-23 17:33:04 +01:00
|
|
|
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
2020-04-22 10:34:27 +01:00
|
|
|
struct VkFramebufferCreateInfo fb_desc;
|
|
|
|
VkResult vr;
|
|
|
|
|
|
|
|
fb_desc.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO;
|
|
|
|
fb_desc.pNext = NULL;
|
|
|
|
fb_desc.flags = 0;
|
|
|
|
fb_desc.renderPass = render_pass;
|
|
|
|
fb_desc.attachmentCount = view_count;
|
|
|
|
fb_desc.pAttachments = views;
|
|
|
|
fb_desc.width = extent.width;
|
|
|
|
fb_desc.height = extent.height;
|
|
|
|
fb_desc.layers = extent.depth;
|
|
|
|
|
|
|
|
if ((vr = VK_CALL(vkCreateFramebuffer(device->vk_device, &fb_desc, NULL, vk_framebuffer))) < 0)
|
|
|
|
{
|
|
|
|
ERR("Failed to create Vulkan framebuffer, vr %d.\n", vr);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!d3d12_command_allocator_add_framebuffer(list->allocator, *vk_framebuffer))
|
|
|
|
{
|
|
|
|
WARN("Failed to add framebuffer.\n");
|
|
|
|
VK_CALL(vkDestroyFramebuffer(device->vk_device, *vk_framebuffer, NULL));
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool d3d12_command_list_update_current_framebuffer(struct d3d12_command_list *list)
|
|
|
|
{
|
2019-05-10 13:15:18 +01:00
|
|
|
VkImageView views[D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT + 1];
|
|
|
|
struct d3d12_graphics_pipeline_state *graphics;
|
2016-09-28 10:26:17 +01:00
|
|
|
VkFramebuffer vk_framebuffer;
|
2019-05-10 13:15:18 +01:00
|
|
|
unsigned int view_count;
|
2020-04-22 10:34:27 +01:00
|
|
|
VkExtent3D extent;
|
2019-03-07 10:01:13 +00:00
|
|
|
unsigned int i;
|
2016-09-28 10:26:17 +01:00
|
|
|
|
|
|
|
if (list->current_framebuffer != VK_NULL_HANDLE)
|
|
|
|
return true;
|
|
|
|
|
2019-05-10 13:15:18 +01:00
|
|
|
graphics = &list->state->u.graphics;
|
|
|
|
|
2019-05-17 09:39:11 +01:00
|
|
|
for (i = 0, view_count = 0; i < graphics->rt_count; ++i)
|
2019-03-07 10:01:13 +00:00
|
|
|
{
|
2019-05-17 09:39:11 +01:00
|
|
|
if (graphics->null_attachment_mask & (1u << i))
|
2019-05-10 13:15:18 +01:00
|
|
|
{
|
2020-04-26 15:58:38 +01:00
|
|
|
if (list->rtvs[i].view)
|
2019-05-17 09:39:11 +01:00
|
|
|
WARN("Expected NULL RTV for attachment %u.\n", i);
|
2019-05-10 13:15:18 +01:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2020-04-26 15:58:38 +01:00
|
|
|
if (!list->rtvs[i].view)
|
2019-03-07 10:01:13 +00:00
|
|
|
{
|
2019-05-17 09:39:11 +01:00
|
|
|
FIXME("Invalid RTV for attachment %u.\n", i);
|
2019-03-07 10:01:13 +00:00
|
|
|
return false;
|
|
|
|
}
|
2019-05-10 13:15:18 +01:00
|
|
|
|
2020-04-26 15:58:38 +01:00
|
|
|
views[view_count++] = list->rtvs[i].view->u.vk_image_view;
|
2019-05-17 09:39:11 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if (d3d12_command_list_has_depth_stencil_view(list))
|
|
|
|
{
|
2020-04-26 15:58:38 +01:00
|
|
|
if (!list->dsv.view)
|
2019-05-17 09:39:11 +01:00
|
|
|
{
|
|
|
|
FIXME("Invalid DSV.\n");
|
|
|
|
return false;
|
|
|
|
}
|
2020-04-26 15:58:38 +01:00
|
|
|
|
|
|
|
views[view_count++] = list->dsv.view->u.vk_image_view;
|
2019-03-07 10:01:13 +00:00
|
|
|
}
|
|
|
|
|
2020-04-22 10:34:27 +01:00
|
|
|
d3d12_command_list_get_fb_extent(list, &extent.width, &extent.height, &extent.depth);
|
2016-09-28 10:26:17 +01:00
|
|
|
|
2020-04-22 10:34:27 +01:00
|
|
|
if (!d3d12_command_list_create_framebuffer(list, list->pso_render_pass, view_count, views, extent, &vk_framebuffer))
|
2016-09-28 10:26:17 +01:00
|
|
|
{
|
2020-04-22 10:34:27 +01:00
|
|
|
ERR("Failed to create framebuffer.\n");
|
2016-09-28 10:26:17 +01:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
list->current_framebuffer = vk_framebuffer;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-11-11 16:03:38 +00:00
|
|
|
static bool d3d12_command_list_update_compute_pipeline(struct d3d12_command_list *list)
|
|
|
|
{
|
|
|
|
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
|
|
|
|
|
|
|
|
if (list->current_pipeline != VK_NULL_HANDLE)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
if (!d3d12_pipeline_state_is_compute(list->state))
|
|
|
|
{
|
|
|
|
WARN("Pipeline state %p is not a compute pipeline.\n", list->state);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
VK_CALL(vkCmdBindPipeline(list->vk_command_buffer, list->state->vk_bind_point, list->state->u.compute.vk_pipeline));
|
|
|
|
list->current_pipeline = list->state->u.compute.vk_pipeline;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool d3d12_command_list_update_graphics_pipeline(struct d3d12_command_list *list)
|
2018-09-12 14:19:56 +01:00
|
|
|
{
|
|
|
|
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
|
2019-05-17 09:39:17 +01:00
|
|
|
VkRenderPass vk_render_pass;
|
2018-09-12 14:19:56 +01:00
|
|
|
VkPipeline vk_pipeline;
|
2020-04-26 16:02:09 +01:00
|
|
|
VkFormat dsv_format;
|
2018-09-12 14:19:56 +01:00
|
|
|
|
|
|
|
if (list->current_pipeline != VK_NULL_HANDLE)
|
|
|
|
return true;
|
|
|
|
|
2018-10-24 12:16:28 +01:00
|
|
|
if (!d3d12_pipeline_state_is_graphics(list->state))
|
2018-09-12 14:19:56 +01:00
|
|
|
{
|
2018-10-24 12:16:28 +01:00
|
|
|
WARN("Pipeline state %p is not a graphics pipeline.\n", list->state);
|
2016-09-28 10:26:17 +01:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2020-04-26 16:02:09 +01:00
|
|
|
dsv_format = list->dsv.format ? list->dsv.format->vk_format : VK_FORMAT_UNDEFINED;
|
|
|
|
|
2018-10-24 12:16:28 +01:00
|
|
|
if (!(vk_pipeline = d3d12_pipeline_state_get_or_create_pipeline(list->state,
|
2020-04-26 16:02:09 +01:00
|
|
|
&list->dynamic_state, dsv_format, &vk_render_pass)))
|
2018-09-12 14:19:56 +01:00
|
|
|
return false;
|
|
|
|
|
2019-05-17 09:39:17 +01:00
|
|
|
/* The render pass cache ensures that we use the same Vulkan render pass
|
|
|
|
* object for compatible render passes. */
|
|
|
|
if (list->pso_render_pass != vk_render_pass)
|
|
|
|
{
|
|
|
|
list->pso_render_pass = vk_render_pass;
|
|
|
|
d3d12_command_list_invalidate_current_framebuffer(list);
|
|
|
|
d3d12_command_list_invalidate_current_render_pass(list);
|
|
|
|
}
|
2019-05-15 12:17:56 +01:00
|
|
|
|
2016-09-28 10:26:17 +01:00
|
|
|
VK_CALL(vkCmdBindPipeline(list->vk_command_buffer, list->state->vk_bind_point, vk_pipeline));
|
|
|
|
list->current_pipeline = vk_pipeline;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-03-05 10:23:42 +00:00
|
|
|
static bool vkd3d_descriptor_info_from_d3d12_desc(struct d3d12_device *device,
|
|
|
|
const struct d3d12_desc *desc, const struct vkd3d_shader_resource_binding *binding,
|
|
|
|
union vkd3d_descriptor_info *vk_descriptor)
|
2017-09-05 10:53:55 +01:00
|
|
|
{
|
2020-03-05 10:23:42 +00:00
|
|
|
switch (binding->type)
|
|
|
|
{
|
|
|
|
case VKD3D_SHADER_DESCRIPTOR_TYPE_CBV:
|
|
|
|
if (desc->magic != VKD3D_DESCRIPTOR_MAGIC_CBV)
|
|
|
|
return false;
|
2017-09-05 10:53:55 +01:00
|
|
|
|
2020-03-05 10:23:42 +00:00
|
|
|
vk_descriptor->buffer = desc->u.vk_cbv_info;
|
|
|
|
return true;
|
2017-09-05 10:53:55 +01:00
|
|
|
|
2020-03-05 10:23:42 +00:00
|
|
|
case VKD3D_SHADER_DESCRIPTOR_TYPE_SRV:
|
|
|
|
if (desc->magic != VKD3D_DESCRIPTOR_MAGIC_SRV)
|
|
|
|
return false;
|
2017-09-11 21:35:16 +01:00
|
|
|
|
2020-03-05 10:23:42 +00:00
|
|
|
if ((binding->flags & VKD3D_SHADER_BINDING_FLAG_IMAGE)
|
|
|
|
&& (desc->u.view->type == VKD3D_VIEW_TYPE_IMAGE))
|
|
|
|
{
|
|
|
|
vk_descriptor->image.imageView = desc->u.view->u.vk_image_view;
|
|
|
|
vk_descriptor->image.sampler = VK_NULL_HANDLE;
|
2020-04-24 14:57:59 +01:00
|
|
|
vk_descriptor->image.imageLayout = desc->u.view->info.texture.vk_layout;
|
2020-03-05 10:23:42 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
else if ((binding->flags & VKD3D_SHADER_BINDING_FLAG_BUFFER)
|
|
|
|
&& (desc->u.view->type == VKD3D_VIEW_TYPE_BUFFER))
|
|
|
|
{
|
|
|
|
vk_descriptor->buffer_view = desc->u.view->u.vk_buffer_view;
|
|
|
|
return true;
|
|
|
|
}
|
2017-09-05 10:53:55 +01:00
|
|
|
break;
|
|
|
|
|
2020-03-05 10:23:42 +00:00
|
|
|
case VKD3D_SHADER_DESCRIPTOR_TYPE_UAV:
|
|
|
|
if (desc->magic != VKD3D_DESCRIPTOR_MAGIC_UAV)
|
|
|
|
return false;
|
2020-03-01 01:49:58 +00:00
|
|
|
|
2020-03-05 10:23:42 +00:00
|
|
|
if ((binding->flags & VKD3D_SHADER_BINDING_FLAG_IMAGE)
|
|
|
|
&& (desc->u.view->type == VKD3D_VIEW_TYPE_IMAGE))
|
|
|
|
{
|
|
|
|
vk_descriptor->image.imageView = desc->u.view->u.vk_image_view;
|
|
|
|
vk_descriptor->image.sampler = VK_NULL_HANDLE;
|
2020-04-24 14:57:59 +01:00
|
|
|
vk_descriptor->image.imageLayout = desc->u.view->info.texture.vk_layout;
|
2020-03-05 10:23:42 +00:00
|
|
|
return true;
|
2017-09-05 10:53:55 +01:00
|
|
|
}
|
2020-03-05 10:23:42 +00:00
|
|
|
else if ((binding->flags & VKD3D_SHADER_BINDING_FLAG_BUFFER)
|
|
|
|
&& (desc->u.view->type == VKD3D_VIEW_TYPE_BUFFER))
|
2017-09-05 10:53:55 +01:00
|
|
|
{
|
2020-03-05 10:23:42 +00:00
|
|
|
vk_descriptor->buffer_view = desc->u.view->u.vk_buffer_view;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
else if ((binding->flags & VKD3D_SHADER_BINDING_FLAG_COUNTER)
|
|
|
|
&& desc->u.view->vk_counter_view)
|
|
|
|
{
|
|
|
|
vk_descriptor->buffer_view = desc->u.view->vk_counter_view;
|
|
|
|
return true;
|
2017-09-05 10:53:55 +01:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
2020-03-05 10:23:42 +00:00
|
|
|
case VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER:
|
|
|
|
if (desc->magic != VKD3D_DESCRIPTOR_MAGIC_SAMPLER)
|
|
|
|
return false;
|
2017-09-05 10:53:55 +01:00
|
|
|
|
2020-03-05 10:23:42 +00:00
|
|
|
vk_descriptor->image.sampler = desc->u.view->u.vk_sampler;
|
|
|
|
vk_descriptor->image.imageView = VK_NULL_HANDLE;
|
|
|
|
vk_descriptor->image.imageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
|
|
|
return true;
|
2017-09-05 10:53:55 +01:00
|
|
|
|
|
|
|
default:
|
2020-03-05 10:23:42 +00:00
|
|
|
ERR("Unhandled descriptor type %d.\n", binding->type);
|
2017-09-05 10:53:55 +01:00
|
|
|
}
|
|
|
|
|
2020-03-05 10:23:42 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2020-03-10 15:58:08 +00:00
|
|
|
static bool vkd3d_descriptor_updates_reserve_arrays(struct vkd3d_descriptor_updates *updates,
|
2020-03-05 10:23:42 +00:00
|
|
|
unsigned int descriptor_count)
|
|
|
|
{
|
|
|
|
/* This should grow over time to the point where no further allocations are necessary */
|
|
|
|
if (!vkd3d_array_reserve((void **)&updates->descriptors, &updates->descriptors_size,
|
|
|
|
descriptor_count, sizeof(*updates->descriptors)))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (!vkd3d_array_reserve((void **)&updates->descriptor_writes, &updates->descriptor_writes_size,
|
|
|
|
descriptor_count, sizeof(*updates->descriptor_writes)))
|
|
|
|
return false;
|
|
|
|
|
2017-09-05 10:53:55 +01:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-03-05 10:23:42 +00:00
|
|
|
static void vk_write_descriptor_set_for_descriptor_info(VkDescriptorSet vk_descriptor_set, uint32_t vk_binding,
|
|
|
|
VkDescriptorType vk_descriptor_type, union vkd3d_descriptor_info *vk_descriptor, VkWriteDescriptorSet *vk_write)
|
|
|
|
{
|
|
|
|
vk_write->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
|
|
|
vk_write->pNext = NULL;
|
|
|
|
vk_write->dstSet = vk_descriptor_set;
|
|
|
|
vk_write->dstBinding = vk_binding;
|
|
|
|
vk_write->dstArrayElement = 0;
|
|
|
|
vk_write->descriptorCount = 1;
|
|
|
|
vk_write->descriptorType = vk_descriptor_type;
|
|
|
|
vk_write->pImageInfo = &vk_descriptor->image;
|
|
|
|
vk_write->pBufferInfo = &vk_descriptor->buffer;
|
|
|
|
vk_write->pTexelBufferView = &vk_descriptor->buffer_view;
|
|
|
|
}
|
|
|
|
|
2020-03-10 15:58:08 +00:00
|
|
|
static void d3d12_command_list_update_descriptor_table(struct d3d12_command_list *list,
|
|
|
|
VkDescriptorSet descriptor_set,
|
|
|
|
struct vkd3d_descriptor_updates *updates,
|
|
|
|
const struct d3d12_root_signature *root_signature,
|
|
|
|
const struct d3d12_desc *base_descriptor,
|
|
|
|
unsigned int root_parameter_index)
|
|
|
|
{
|
|
|
|
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
|
|
|
|
const struct d3d12_root_descriptor_table *table;
|
|
|
|
union vkd3d_descriptor_info *vk_descriptor;
|
|
|
|
unsigned int write_count = 0;
|
|
|
|
unsigned int i, j;
|
|
|
|
|
|
|
|
table = root_signature_get_descriptor_table(root_signature, root_parameter_index);
|
|
|
|
vk_descriptor = &updates->descriptors[table->first_packed_descriptor];
|
|
|
|
|
|
|
|
for (i = 0; i < table->binding_count; i++)
|
|
|
|
{
|
|
|
|
const struct vkd3d_shader_resource_binding *binding = &table->first_binding[i];
|
|
|
|
|
|
|
|
if (binding->flags & VKD3D_SHADER_BINDING_FLAG_BINDLESS)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
for (j = 0; j < binding->register_count; j++)
|
|
|
|
{
|
|
|
|
const struct d3d12_desc *desc = &base_descriptor[binding->descriptor_offset + j];
|
|
|
|
|
|
|
|
/* Skip invalid descriptors */
|
|
|
|
if (vkd3d_descriptor_info_from_d3d12_desc(list->device, desc, binding, vk_descriptor))
|
|
|
|
{
|
|
|
|
vk_write_descriptor_set_for_descriptor_info(descriptor_set, binding->binding.binding,
|
|
|
|
desc->vk_descriptor_type, vk_descriptor,
|
|
|
|
&updates->descriptor_writes[write_count++]);
|
|
|
|
}
|
|
|
|
|
|
|
|
vk_descriptor++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (write_count)
|
|
|
|
{
|
|
|
|
VK_CALL(vkUpdateDescriptorSets(list->device->vk_device,
|
|
|
|
write_count, updates->descriptor_writes, 0, NULL));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void d3d12_deferred_descriptor_set_update_resolve(struct d3d12_command_list *list,
|
|
|
|
const struct d3d12_deferred_descriptor_set_update *update)
|
|
|
|
{
|
|
|
|
d3d12_command_list_update_descriptor_table(list,
|
|
|
|
update->descriptor_set,
|
|
|
|
update->updates,
|
|
|
|
update->root_signature,
|
|
|
|
update->base_descriptor,
|
|
|
|
update->root_parameter_index);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void d3d12_command_list_defer_update_descriptor_table(struct d3d12_command_list *list,
|
|
|
|
VkDescriptorSet descriptor_set,
|
|
|
|
struct vkd3d_descriptor_updates *updates,
|
|
|
|
const struct d3d12_root_signature *root_signature,
|
|
|
|
const struct d3d12_desc *base_descriptor,
|
|
|
|
unsigned int root_parameter_index)
|
|
|
|
{
|
|
|
|
struct d3d12_deferred_descriptor_set_update *update;
|
|
|
|
|
|
|
|
if (!vkd3d_array_reserve((void **)&list->descriptor_updates, &list->descriptor_updates_size,
|
|
|
|
list->descriptor_updates_count + 1, sizeof(*list->descriptor_updates)))
|
|
|
|
{
|
|
|
|
ERR("Failed to allocate space for deferred descriptor set update!\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
update = &list->descriptor_updates[list->descriptor_updates_count++];
|
|
|
|
update->descriptor_set = descriptor_set;
|
|
|
|
update->root_signature = root_signature;
|
|
|
|
update->root_parameter_index = root_parameter_index;
|
|
|
|
update->base_descriptor = base_descriptor;
|
|
|
|
update->updates = updates;
|
|
|
|
}
|
|
|
|
|
2020-03-05 10:23:42 +00:00
|
|
|
static void d3d12_command_list_update_packed_descriptors(struct d3d12_command_list *list,
|
|
|
|
VkPipelineBindPoint bind_point)
|
2017-09-05 10:53:55 +01:00
|
|
|
{
|
2020-03-10 15:58:08 +00:00
|
|
|
bool deferred_update = list->device->vk_info.supports_volatile_packed_descriptors;
|
2020-03-05 10:23:42 +00:00
|
|
|
struct vkd3d_descriptor_updates *updates = &list->packed_descriptors[bind_point];
|
2017-09-05 10:53:55 +01:00
|
|
|
struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point];
|
2017-09-08 14:04:30 +01:00
|
|
|
const struct d3d12_root_signature *root_signature = bindings->root_signature;
|
2017-09-08 14:04:30 +01:00
|
|
|
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
|
2020-03-05 10:23:42 +00:00
|
|
|
VkDescriptorSet descriptor_set = VK_NULL_HANDLE;
|
|
|
|
const struct d3d12_desc *base_descriptor;
|
2020-03-10 15:58:08 +00:00
|
|
|
unsigned int root_parameter_index;
|
2020-03-10 16:05:56 +00:00
|
|
|
uint64_t descriptor_table_mask;
|
2017-09-05 10:53:55 +01:00
|
|
|
|
2020-03-10 15:58:08 +00:00
|
|
|
/* Reserves the array for worst case. */
|
|
|
|
if (!vkd3d_descriptor_updates_reserve_arrays(updates, root_signature->packed_descriptor_count))
|
2020-03-05 10:23:42 +00:00
|
|
|
{
|
|
|
|
ERR("Failed to resize descriptor update arrays.\n");
|
|
|
|
return;
|
|
|
|
}
|
2017-09-05 10:53:55 +01:00
|
|
|
|
2020-03-05 10:23:42 +00:00
|
|
|
/* Update packed descriptor set for all active descriptor tables */
|
2020-03-10 16:14:20 +00:00
|
|
|
assert(root_signature->vk_packed_descriptor_layout);
|
2020-03-10 16:05:56 +00:00
|
|
|
descriptor_table_mask = root_signature->descriptor_table_mask & bindings->descriptor_table_active_mask;
|
2020-03-05 10:23:42 +00:00
|
|
|
|
2020-03-10 16:14:20 +00:00
|
|
|
descriptor_set = d3d12_command_allocator_allocate_descriptor_set(
|
2020-03-18 11:02:52 +00:00
|
|
|
list->allocator, root_signature->vk_packed_descriptor_layout, VKD3D_DESCRIPTOR_POOL_TYPE_VOLATILE);
|
2017-09-05 10:53:55 +01:00
|
|
|
|
2020-03-10 16:05:56 +00:00
|
|
|
while (descriptor_table_mask)
|
2020-03-05 10:23:42 +00:00
|
|
|
{
|
2020-03-10 16:05:56 +00:00
|
|
|
root_parameter_index = vkd3d_bitmask_iter64(&descriptor_table_mask);
|
2020-03-05 10:23:42 +00:00
|
|
|
base_descriptor = d3d12_desc_from_gpu_handle(bindings->descriptor_tables[root_parameter_index]);
|
2017-09-05 10:53:55 +01:00
|
|
|
|
2020-03-10 15:58:08 +00:00
|
|
|
if (deferred_update)
|
2017-09-05 10:53:55 +01:00
|
|
|
{
|
2020-03-10 15:58:08 +00:00
|
|
|
/* If we have EXT_descriptor_indexing we implement RS 1.0 correctly by deferring the descriptor
|
|
|
|
* set update until submit time. */
|
|
|
|
d3d12_command_list_defer_update_descriptor_table(list,
|
|
|
|
descriptor_set, updates,
|
|
|
|
root_signature, base_descriptor,
|
|
|
|
root_parameter_index);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* Fallback, we update the descriptor set here.
|
|
|
|
* Will work in most cases, but it's not a correct implementation of RS 1.0.
|
|
|
|
* TODO: Use this path if application uses RS 1.1 STATIC descriptors for all entries in a table. */
|
|
|
|
d3d12_command_list_update_descriptor_table(list,
|
|
|
|
descriptor_set, updates,
|
|
|
|
root_signature, base_descriptor,
|
|
|
|
root_parameter_index);
|
2017-09-05 10:53:55 +01:00
|
|
|
}
|
2020-03-05 10:23:42 +00:00
|
|
|
}
|
|
|
|
|
2020-03-10 16:14:20 +00:00
|
|
|
VK_CALL(vkCmdBindDescriptorSets(list->vk_command_buffer, bind_point,
|
|
|
|
root_signature->vk_pipeline_layout,
|
|
|
|
root_signature->packed_descriptor_set,
|
|
|
|
1, &descriptor_set, 0, NULL));
|
2020-03-06 20:43:11 +00:00
|
|
|
|
2020-03-10 16:14:20 +00:00
|
|
|
bindings->dirty_flags &= ~VKD3D_PIPELINE_DIRTY_PACKED_DESCRIPTOR_SET;
|
|
|
|
}
|
2020-03-06 20:43:11 +00:00
|
|
|
|
2020-03-10 16:14:20 +00:00
|
|
|
static void d3d12_command_list_update_descriptor_table_offsets(struct d3d12_command_list *list,
|
|
|
|
VkPipelineBindPoint bind_point)
|
|
|
|
{
|
|
|
|
struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point];
|
|
|
|
const struct d3d12_root_signature *root_signature = bindings->root_signature;
|
|
|
|
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
|
|
|
|
const struct d3d12_root_descriptor_table *table;
|
|
|
|
const struct d3d12_desc *base_descriptor;
|
|
|
|
uint32_t table_offsets[D3D12_MAX_ROOT_COST];
|
|
|
|
unsigned int root_parameter_index;
|
|
|
|
uint64_t descriptor_table_mask;
|
|
|
|
|
|
|
|
assert(root_signature->descriptor_table_count);
|
|
|
|
descriptor_table_mask = root_signature->descriptor_table_mask & bindings->descriptor_table_active_mask;
|
|
|
|
|
|
|
|
while (descriptor_table_mask)
|
2020-03-06 20:43:11 +00:00
|
|
|
{
|
2020-03-10 16:14:20 +00:00
|
|
|
root_parameter_index = vkd3d_bitmask_iter64(&descriptor_table_mask);
|
|
|
|
base_descriptor = d3d12_desc_from_gpu_handle(bindings->descriptor_tables[root_parameter_index]);
|
|
|
|
|
|
|
|
table = root_signature_get_descriptor_table(root_signature, root_parameter_index);
|
|
|
|
|
|
|
|
table_offsets[table->table_index] = d3d12_desc_heap_offset(base_descriptor);
|
2020-03-06 20:43:11 +00:00
|
|
|
}
|
2020-03-10 16:05:56 +00:00
|
|
|
|
2020-03-10 16:14:20 +00:00
|
|
|
/* Set descriptor offsets */
|
|
|
|
VK_CALL(vkCmdPushConstants(list->vk_command_buffer,
|
|
|
|
root_signature->vk_pipeline_layout, VK_SHADER_STAGE_ALL,
|
|
|
|
root_signature->descriptor_table_offset,
|
|
|
|
root_signature->descriptor_table_count * sizeof(uint32_t),
|
|
|
|
table_offsets));
|
|
|
|
|
|
|
|
bindings->dirty_flags &= ~VKD3D_PIPELINE_DIRTY_DESCRIPTOR_TABLE_OFFSETS;
|
2017-09-05 10:53:55 +01:00
|
|
|
}
|
|
|
|
|
2018-10-11 14:33:31 +01:00
|
|
|
static bool vk_write_descriptor_set_from_root_descriptor(VkWriteDescriptorSet *vk_descriptor_write,
|
|
|
|
const struct d3d12_root_parameter *root_parameter, VkDescriptorSet vk_descriptor_set,
|
2020-03-03 16:03:51 +00:00
|
|
|
const union vkd3d_descriptor_info *descriptors)
|
2018-10-11 14:33:31 +01:00
|
|
|
{
|
2020-03-03 16:03:51 +00:00
|
|
|
const union vkd3d_descriptor_info *descriptor;
|
|
|
|
|
2020-04-15 17:38:15 +01:00
|
|
|
descriptor = &descriptors[root_parameter->u.descriptor.packed_descriptor];
|
|
|
|
|
2018-10-11 14:33:31 +01:00
|
|
|
switch (root_parameter->parameter_type)
|
|
|
|
{
|
|
|
|
case D3D12_ROOT_PARAMETER_TYPE_CBV:
|
|
|
|
vk_descriptor_write->descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
|
2020-04-15 17:38:15 +01:00
|
|
|
|
|
|
|
if (!descriptor->buffer.buffer)
|
|
|
|
return false;
|
2018-10-11 14:33:31 +01:00
|
|
|
break;
|
|
|
|
case D3D12_ROOT_PARAMETER_TYPE_SRV:
|
|
|
|
vk_descriptor_write->descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
|
2020-04-15 17:38:15 +01:00
|
|
|
|
|
|
|
if (!descriptor->buffer_view)
|
|
|
|
return false;
|
2018-10-11 14:33:31 +01:00
|
|
|
break;
|
|
|
|
case D3D12_ROOT_PARAMETER_TYPE_UAV:
|
|
|
|
vk_descriptor_write->descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
|
2020-04-15 17:38:15 +01:00
|
|
|
|
|
|
|
if (!descriptor->buffer_view)
|
|
|
|
return false;
|
2018-10-11 14:33:31 +01:00
|
|
|
break;
|
|
|
|
default:
|
2019-04-01 10:19:49 +01:00
|
|
|
ERR("Invalid root descriptor %#x.\n", root_parameter->parameter_type);
|
2018-10-11 14:33:31 +01:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
vk_descriptor_write->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
|
|
|
vk_descriptor_write->pNext = NULL;
|
|
|
|
vk_descriptor_write->dstSet = vk_descriptor_set;
|
2020-03-03 13:28:35 +00:00
|
|
|
vk_descriptor_write->dstBinding = root_parameter->u.descriptor.binding->binding.binding;
|
2018-10-11 14:33:31 +01:00
|
|
|
vk_descriptor_write->dstArrayElement = 0;
|
|
|
|
vk_descriptor_write->descriptorCount = 1;
|
|
|
|
vk_descriptor_write->pImageInfo = NULL;
|
2020-03-03 16:03:51 +00:00
|
|
|
vk_descriptor_write->pBufferInfo = &descriptor->buffer;
|
|
|
|
vk_descriptor_write->pTexelBufferView = &descriptor->buffer_view;
|
2018-10-11 14:33:31 +01:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-03-12 16:19:03 +00:00
|
|
|
static bool vk_write_descriptor_set_and_inline_uniform_block(VkWriteDescriptorSet *vk_descriptor_write,
|
|
|
|
VkWriteDescriptorSetInlineUniformBlockEXT *vk_inline_uniform_block_write,
|
|
|
|
VkDescriptorSet vk_descriptor_set, const struct d3d12_root_signature *root_signature,
|
|
|
|
const void* data)
|
|
|
|
{
|
|
|
|
vk_inline_uniform_block_write->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_INLINE_UNIFORM_BLOCK_EXT;
|
|
|
|
vk_inline_uniform_block_write->pNext = NULL;
|
|
|
|
vk_inline_uniform_block_write->dataSize = root_signature->push_constant_range.size;
|
|
|
|
vk_inline_uniform_block_write->pData = data;
|
|
|
|
|
|
|
|
vk_descriptor_write->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
|
|
|
vk_descriptor_write->pNext = vk_inline_uniform_block_write;
|
|
|
|
vk_descriptor_write->dstSet = vk_descriptor_set;
|
|
|
|
vk_descriptor_write->dstBinding = root_signature->push_constant_ubo_binding.binding;
|
|
|
|
vk_descriptor_write->dstArrayElement = 0;
|
|
|
|
vk_descriptor_write->descriptorCount = root_signature->push_constant_range.size;
|
|
|
|
vk_descriptor_write->descriptorType = VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT;
|
|
|
|
vk_descriptor_write->pImageInfo = NULL;
|
|
|
|
vk_descriptor_write->pBufferInfo = NULL;
|
|
|
|
vk_descriptor_write->pTexelBufferView = NULL;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-03-06 20:38:26 +00:00
|
|
|
static void d3d12_command_list_update_descriptor_heaps(struct d3d12_command_list *list,
|
|
|
|
VkPipelineBindPoint bind_point)
|
|
|
|
{
|
|
|
|
struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point];
|
|
|
|
const struct d3d12_root_signature *root_signature = bindings->root_signature;
|
|
|
|
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
|
|
|
|
|
|
|
|
while (bindings->descriptor_heap_dirty_mask)
|
|
|
|
{
|
|
|
|
unsigned int heap_index = vkd3d_bitmask_iter64(&bindings->descriptor_heap_dirty_mask);
|
|
|
|
|
|
|
|
if (list->descriptor_heaps[heap_index])
|
|
|
|
{
|
|
|
|
VK_CALL(vkCmdBindDescriptorSets(list->vk_command_buffer, bind_point,
|
|
|
|
root_signature->vk_pipeline_layout, heap_index, 1,
|
|
|
|
&list->descriptor_heaps[heap_index], 0, NULL));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-03 19:28:38 +00:00
|
|
|
static void d3d12_command_list_update_static_samplers(struct d3d12_command_list *list,
|
|
|
|
VkPipelineBindPoint bind_point)
|
|
|
|
{
|
|
|
|
struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point];
|
|
|
|
const struct d3d12_root_signature *root_signature = bindings->root_signature;
|
|
|
|
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
|
|
|
|
|
|
|
|
VK_CALL(vkCmdBindDescriptorSets(list->vk_command_buffer, bind_point,
|
|
|
|
root_signature->vk_pipeline_layout,
|
|
|
|
root_signature->sampler_descriptor_set,
|
|
|
|
1, &bindings->static_sampler_set, 0, NULL));
|
|
|
|
|
2020-03-10 16:05:56 +00:00
|
|
|
bindings->dirty_flags &= ~VKD3D_PIPELINE_DIRTY_STATIC_SAMPLER_SET;
|
2020-03-03 19:28:38 +00:00
|
|
|
}
|
|
|
|
|
2020-03-03 17:56:10 +00:00
|
|
|
static void d3d12_command_list_update_root_constants(struct d3d12_command_list *list,
|
|
|
|
VkPipelineBindPoint bind_point)
|
|
|
|
{
|
|
|
|
struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point];
|
|
|
|
const struct d3d12_root_signature *root_signature = bindings->root_signature;
|
|
|
|
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
|
|
|
|
const struct d3d12_root_constant *root_constant;
|
|
|
|
unsigned int root_parameter_index;
|
|
|
|
|
|
|
|
while (bindings->root_constant_dirty_mask)
|
|
|
|
{
|
|
|
|
root_parameter_index = vkd3d_bitmask_iter64(&bindings->root_constant_dirty_mask);
|
|
|
|
root_constant = root_signature_get_32bit_constants(root_signature, root_parameter_index);
|
|
|
|
|
|
|
|
VK_CALL(vkCmdPushConstants(list->vk_command_buffer,
|
|
|
|
root_signature->vk_pipeline_layout, VK_SHADER_STAGE_ALL,
|
|
|
|
root_constant->constant_index * sizeof(uint32_t),
|
|
|
|
root_constant->constant_count * sizeof(uint32_t),
|
|
|
|
&bindings->root_constants[root_constant->constant_index]));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-12 16:19:03 +00:00
|
|
|
static void d3d12_command_list_fetch_inline_uniform_block_data(struct d3d12_command_list *list,
|
|
|
|
VkPipelineBindPoint bind_point, uint32_t *dst_data)
|
|
|
|
{
|
|
|
|
struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point];
|
|
|
|
const struct d3d12_root_signature *root_signature = bindings->root_signature;
|
|
|
|
uint64_t descriptor_table_mask = bindings->descriptor_table_active_mask;
|
|
|
|
uint64_t root_constant_mask = root_signature->root_constant_mask;
|
|
|
|
const uint32_t *src_data = bindings->root_constants;
|
|
|
|
const struct d3d12_root_descriptor_table *table;
|
|
|
|
const struct d3d12_root_constant *root_constant;
|
|
|
|
const struct d3d12_desc *base_descriptor;
|
|
|
|
unsigned int root_parameter_index;
|
|
|
|
uint32_t first_table_offset;
|
|
|
|
|
|
|
|
while (root_constant_mask)
|
|
|
|
{
|
|
|
|
root_parameter_index = vkd3d_bitmask_iter64(&root_constant_mask);
|
|
|
|
root_constant = root_signature_get_32bit_constants(root_signature, root_parameter_index);
|
|
|
|
|
|
|
|
memcpy(&dst_data[root_constant->constant_index],
|
|
|
|
&src_data[root_constant->constant_index],
|
|
|
|
root_constant->constant_count * sizeof(uint32_t));
|
|
|
|
}
|
|
|
|
|
|
|
|
first_table_offset = root_signature->descriptor_table_offset / sizeof(uint32_t);
|
|
|
|
|
|
|
|
while (descriptor_table_mask)
|
|
|
|
{
|
|
|
|
root_parameter_index = vkd3d_bitmask_iter64(&descriptor_table_mask);
|
|
|
|
base_descriptor = d3d12_desc_from_gpu_handle(bindings->descriptor_tables[root_parameter_index]);
|
|
|
|
|
|
|
|
table = root_signature_get_descriptor_table(root_signature, root_parameter_index);
|
|
|
|
|
|
|
|
dst_data[first_table_offset + table->table_index] = d3d12_desc_heap_offset(base_descriptor);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Reset dirty flags to avoid redundant updates in the future */
|
|
|
|
bindings->dirty_flags &= ~VKD3D_PIPELINE_DIRTY_DESCRIPTOR_TABLE_OFFSETS;
|
|
|
|
bindings->root_constant_dirty_mask = 0;
|
|
|
|
}
|
|
|
|
|
2020-03-26 20:25:43 +00:00
|
|
|
static bool vk_write_descriptor_set_from_uav_counter_binding(struct d3d12_command_list *list,
|
|
|
|
VkPipelineBindPoint bind_point, VkDescriptorSet vk_descriptor_set,
|
|
|
|
VkWriteDescriptorSet *vk_descriptor_write, VkDescriptorBufferInfo *vk_buffer_info)
|
|
|
|
{
|
|
|
|
struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point];
|
|
|
|
const struct d3d12_root_signature *root_signature = bindings->root_signature;
|
|
|
|
|
|
|
|
bindings->dirty_flags &= ~VKD3D_PIPELINE_DIRTY_UAV_COUNTER_BINDING;
|
|
|
|
|
|
|
|
if (!list->uav_counter_address_buffer || !(root_signature->flags & VKD3D_ROOT_SIGNATURE_USE_BINDLESS_UAV_COUNTERS))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
vk_buffer_info->buffer = list->uav_counter_address_buffer;
|
|
|
|
vk_buffer_info->offset = 0;
|
|
|
|
vk_buffer_info->range = VK_WHOLE_SIZE;
|
|
|
|
|
|
|
|
vk_descriptor_write->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
|
|
|
vk_descriptor_write->pNext = NULL;
|
|
|
|
vk_descriptor_write->dstSet = vk_descriptor_set;
|
|
|
|
vk_descriptor_write->dstBinding = root_signature->uav_counter_binding.binding;
|
|
|
|
vk_descriptor_write->dstArrayElement = 0;
|
|
|
|
vk_descriptor_write->descriptorCount = 1;
|
|
|
|
vk_descriptor_write->descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
|
|
|
vk_descriptor_write->pImageInfo = NULL;
|
|
|
|
vk_descriptor_write->pBufferInfo = vk_buffer_info;
|
|
|
|
vk_descriptor_write->pTexelBufferView = NULL;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-03-03 16:03:51 +00:00
|
|
|
static void d3d12_command_list_update_root_descriptors(struct d3d12_command_list *list,
|
2018-10-11 14:33:31 +01:00
|
|
|
VkPipelineBindPoint bind_point)
|
|
|
|
{
|
|
|
|
struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point];
|
|
|
|
const struct d3d12_root_signature *root_signature = bindings->root_signature;
|
|
|
|
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
|
2020-03-12 16:19:03 +00:00
|
|
|
VkWriteDescriptorSetInlineUniformBlockEXT inline_uniform_block_write;
|
2020-03-26 20:25:43 +00:00
|
|
|
VkWriteDescriptorSet descriptor_writes[D3D12_MAX_ROOT_COST / 2 + 2];
|
2020-03-12 16:19:03 +00:00
|
|
|
uint32_t inline_uniform_block_data[D3D12_MAX_ROOT_COST];
|
2018-10-17 16:59:32 +01:00
|
|
|
const struct d3d12_root_parameter *root_parameter;
|
2020-03-03 16:03:51 +00:00
|
|
|
VkDescriptorSet descriptor_set = VK_NULL_HANDLE;
|
2020-03-26 20:25:43 +00:00
|
|
|
VkDescriptorBufferInfo uav_counter_descriptor;
|
2020-03-03 16:03:51 +00:00
|
|
|
unsigned int descriptor_write_count = 0;
|
|
|
|
unsigned int root_parameter_index;
|
2018-10-11 14:33:31 +01:00
|
|
|
|
2020-03-12 14:10:43 +00:00
|
|
|
if (!(root_signature->flags & VKD3D_ROOT_SIGNATURE_USE_PUSH_DESCRIPTORS))
|
2020-03-03 16:03:51 +00:00
|
|
|
{
|
2020-03-12 14:10:43 +00:00
|
|
|
/* Ensure that we populate all descriptors if push descriptors cannot be used */
|
2020-03-26 20:25:43 +00:00
|
|
|
bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_UAV_COUNTER_BINDING;
|
2020-03-03 16:03:51 +00:00
|
|
|
bindings->root_descriptor_dirty_mask |= bindings->root_descriptor_active_mask & root_signature->root_descriptor_mask;
|
2018-10-11 14:33:31 +01:00
|
|
|
|
2020-03-03 16:03:51 +00:00
|
|
|
descriptor_set = d3d12_command_allocator_allocate_descriptor_set(
|
2020-03-18 11:02:52 +00:00
|
|
|
list->allocator, root_signature->vk_root_descriptor_layout, VKD3D_DESCRIPTOR_POOL_TYPE_STATIC);
|
2020-03-03 16:03:51 +00:00
|
|
|
}
|
2018-10-11 14:33:31 +01:00
|
|
|
|
2020-03-03 16:03:51 +00:00
|
|
|
/* TODO bind null descriptors for inactive root descriptors */
|
|
|
|
bindings->root_descriptor_dirty_mask &= bindings->root_descriptor_active_mask;
|
2018-10-11 14:33:31 +01:00
|
|
|
|
2020-03-03 16:03:51 +00:00
|
|
|
while (bindings->root_descriptor_dirty_mask)
|
2018-10-11 14:33:31 +01:00
|
|
|
{
|
2020-03-03 16:03:51 +00:00
|
|
|
root_parameter_index = vkd3d_bitmask_iter64(&bindings->root_descriptor_dirty_mask);
|
|
|
|
root_parameter = root_signature_get_root_descriptor(root_signature, root_parameter_index);
|
2018-10-11 14:33:31 +01:00
|
|
|
|
2020-03-03 16:03:51 +00:00
|
|
|
if (!vk_write_descriptor_set_from_root_descriptor(&descriptor_writes[descriptor_write_count],
|
|
|
|
root_parameter, descriptor_set, bindings->root_descriptors))
|
2018-10-11 14:33:31 +01:00
|
|
|
continue;
|
|
|
|
|
2020-03-03 16:03:51 +00:00
|
|
|
descriptor_write_count += 1;
|
2018-10-11 14:33:31 +01:00
|
|
|
}
|
|
|
|
|
2020-03-26 20:25:43 +00:00
|
|
|
if (bindings->dirty_flags & VKD3D_PIPELINE_DIRTY_UAV_COUNTER_BINDING)
|
|
|
|
{
|
|
|
|
if (vk_write_descriptor_set_from_uav_counter_binding(list, bind_point,
|
|
|
|
descriptor_set, &descriptor_writes[descriptor_write_count], &uav_counter_descriptor))
|
|
|
|
descriptor_write_count += 1;
|
|
|
|
}
|
|
|
|
|
2020-03-12 16:19:03 +00:00
|
|
|
if (root_signature->flags & VKD3D_ROOT_SIGNATURE_USE_INLINE_UNIFORM_BLOCK)
|
|
|
|
{
|
|
|
|
d3d12_command_list_fetch_inline_uniform_block_data(list, bind_point, inline_uniform_block_data);
|
|
|
|
|
|
|
|
vk_write_descriptor_set_and_inline_uniform_block(&descriptor_writes[descriptor_write_count],
|
|
|
|
&inline_uniform_block_write, descriptor_set, root_signature, inline_uniform_block_data);
|
|
|
|
|
|
|
|
descriptor_write_count += 1;
|
|
|
|
}
|
|
|
|
|
2020-03-03 16:03:51 +00:00
|
|
|
if (!descriptor_write_count)
|
|
|
|
return;
|
2018-10-11 14:33:31 +01:00
|
|
|
|
2020-03-12 14:10:43 +00:00
|
|
|
if (root_signature->flags & VKD3D_ROOT_SIGNATURE_USE_PUSH_DESCRIPTORS)
|
2020-03-03 16:03:51 +00:00
|
|
|
{
|
|
|
|
VK_CALL(vkCmdPushDescriptorSetKHR(list->vk_command_buffer, bind_point,
|
|
|
|
root_signature->vk_pipeline_layout, root_signature->root_descriptor_set,
|
|
|
|
descriptor_write_count, descriptor_writes));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
VK_CALL(vkUpdateDescriptorSets(list->device->vk_device,
|
|
|
|
descriptor_write_count, descriptor_writes, 0, NULL));
|
|
|
|
VK_CALL(vkCmdBindDescriptorSets(list->vk_command_buffer, bind_point,
|
|
|
|
root_signature->vk_pipeline_layout, root_signature->root_descriptor_set,
|
|
|
|
1, &descriptor_set, 0, NULL));
|
|
|
|
}
|
2018-10-11 14:33:31 +01:00
|
|
|
}
|
|
|
|
|
2017-08-11 12:58:04 +01:00
|
|
|
static void d3d12_command_list_update_descriptors(struct d3d12_command_list *list,
|
|
|
|
VkPipelineBindPoint bind_point)
|
|
|
|
{
|
|
|
|
struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point];
|
2017-09-08 14:04:30 +01:00
|
|
|
const struct d3d12_root_signature *rs = bindings->root_signature;
|
2017-08-11 12:58:04 +01:00
|
|
|
|
2020-03-03 16:03:51 +00:00
|
|
|
if (!rs)
|
2017-08-11 12:58:04 +01:00
|
|
|
return;
|
|
|
|
|
2020-03-06 20:38:26 +00:00
|
|
|
if (bindings->descriptor_heap_dirty_mask)
|
|
|
|
d3d12_command_list_update_descriptor_heaps(list, bind_point);
|
|
|
|
|
2020-03-10 16:05:56 +00:00
|
|
|
if (bindings->dirty_flags & VKD3D_PIPELINE_DIRTY_STATIC_SAMPLER_SET)
|
2020-03-03 19:28:38 +00:00
|
|
|
d3d12_command_list_update_static_samplers(list, bind_point);
|
|
|
|
|
2020-03-10 16:05:56 +00:00
|
|
|
if (bindings->dirty_flags & VKD3D_PIPELINE_DIRTY_PACKED_DESCRIPTOR_SET)
|
2020-03-05 10:23:42 +00:00
|
|
|
d3d12_command_list_update_packed_descriptors(list, bind_point);
|
2017-09-05 10:53:55 +01:00
|
|
|
|
2020-03-12 16:19:03 +00:00
|
|
|
if (rs->flags & VKD3D_ROOT_SIGNATURE_USE_INLINE_UNIFORM_BLOCK)
|
|
|
|
{
|
|
|
|
/* Root constants and descriptor table offsets are part of the root descriptor set */
|
|
|
|
if (bindings->root_descriptor_dirty_mask || bindings->root_constant_dirty_mask
|
2020-03-26 20:25:43 +00:00
|
|
|
|| (bindings->dirty_flags & (VKD3D_PIPELINE_DIRTY_DESCRIPTOR_TABLE_OFFSETS | VKD3D_PIPELINE_DIRTY_UAV_COUNTER_BINDING)))
|
2020-03-12 16:19:03 +00:00
|
|
|
d3d12_command_list_update_root_descriptors(list, bind_point);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2020-03-26 20:25:43 +00:00
|
|
|
if (bindings->root_descriptor_dirty_mask || (bindings->dirty_flags & VKD3D_PIPELINE_DIRTY_UAV_COUNTER_BINDING))
|
2020-03-12 16:19:03 +00:00
|
|
|
d3d12_command_list_update_root_descriptors(list, bind_point);
|
2018-10-11 14:33:31 +01:00
|
|
|
|
2020-03-12 16:19:03 +00:00
|
|
|
if (bindings->root_constant_dirty_mask)
|
|
|
|
d3d12_command_list_update_root_constants(list, bind_point);
|
2020-03-10 16:14:20 +00:00
|
|
|
|
2020-03-12 16:19:03 +00:00
|
|
|
if (bindings->dirty_flags & VKD3D_PIPELINE_DIRTY_DESCRIPTOR_TABLE_OFFSETS)
|
|
|
|
d3d12_command_list_update_descriptor_table_offsets(list, bind_point);
|
|
|
|
}
|
2017-08-11 12:58:04 +01:00
|
|
|
}
|
|
|
|
|
2019-11-11 16:03:38 +00:00
|
|
|
static bool d3d12_command_list_update_compute_state(struct d3d12_command_list *list)
|
|
|
|
{
|
|
|
|
d3d12_command_list_end_current_render_pass(list);
|
|
|
|
|
|
|
|
if (!d3d12_command_list_update_compute_pipeline(list))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
d3d12_command_list_update_descriptors(list, VK_PIPELINE_BIND_POINT_COMPUTE);
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-04-01 11:09:51 +01:00
|
|
|
static void d3d12_command_list_update_dynamic_state(struct d3d12_command_list *list)
|
|
|
|
{
|
|
|
|
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
|
|
|
|
struct vkd3d_dynamic_state *dyn_state = &list->dynamic_state;
|
|
|
|
|
2020-04-01 12:44:07 +01:00
|
|
|
/* Make sure we only update states that are dynamic in the pipeline */
|
|
|
|
dyn_state->dirty_flags &= list->state->u.graphics.dynamic_state_flags;
|
|
|
|
|
2020-04-01 11:09:51 +01:00
|
|
|
if (dyn_state->dirty_flags & VKD3D_DYNAMIC_STATE_VIEWPORT)
|
|
|
|
{
|
|
|
|
VK_CALL(vkCmdSetViewport(list->vk_command_buffer,
|
|
|
|
0, dyn_state->viewport_count, dyn_state->viewports));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (dyn_state->dirty_flags & VKD3D_DYNAMIC_STATE_SCISSOR)
|
|
|
|
{
|
|
|
|
VK_CALL(vkCmdSetScissor(list->vk_command_buffer,
|
|
|
|
0, dyn_state->viewport_count, dyn_state->scissors));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (dyn_state->dirty_flags & VKD3D_DYNAMIC_STATE_BLEND_CONSTANTS)
|
|
|
|
{
|
|
|
|
VK_CALL(vkCmdSetBlendConstants(list->vk_command_buffer,
|
|
|
|
dyn_state->blend_constants));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (dyn_state->dirty_flags & VKD3D_DYNAMIC_STATE_STENCIL_REFERENCE)
|
|
|
|
{
|
|
|
|
VK_CALL(vkCmdSetStencilReference(list->vk_command_buffer,
|
|
|
|
VK_STENCIL_FRONT_AND_BACK, dyn_state->stencil_reference));
|
|
|
|
}
|
|
|
|
|
2020-04-01 14:25:25 +01:00
|
|
|
if (dyn_state->dirty_flags & VKD3D_DYNAMIC_STATE_DEPTH_BOUNDS)
|
|
|
|
{
|
|
|
|
VK_CALL(vkCmdSetDepthBounds(list->vk_command_buffer,
|
|
|
|
dyn_state->min_depth_bounds, dyn_state->max_depth_bounds));
|
|
|
|
}
|
|
|
|
|
2020-04-01 11:09:51 +01:00
|
|
|
dyn_state->dirty_flags = 0;
|
|
|
|
}
|
|
|
|
|
2018-08-23 17:33:04 +01:00
|
|
|
static bool d3d12_command_list_begin_render_pass(struct d3d12_command_list *list)
|
2016-09-21 15:18:13 +01:00
|
|
|
{
|
2018-08-23 17:33:04 +01:00
|
|
|
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
|
2019-01-14 16:05:45 +00:00
|
|
|
struct d3d12_graphics_pipeline_state *graphics;
|
2016-09-28 10:26:17 +01:00
|
|
|
struct VkRenderPassBeginInfo begin_desc;
|
2018-08-15 12:58:00 +01:00
|
|
|
VkRenderPass vk_render_pass;
|
2016-09-28 10:26:17 +01:00
|
|
|
|
2019-11-11 16:03:38 +00:00
|
|
|
if (!d3d12_command_list_update_graphics_pipeline(list))
|
2016-11-02 18:08:12 +00:00
|
|
|
return false;
|
2019-05-15 12:17:54 +01:00
|
|
|
if (!d3d12_command_list_update_current_framebuffer(list))
|
|
|
|
return false;
|
2016-09-28 10:26:17 +01:00
|
|
|
|
2020-04-01 11:09:51 +01:00
|
|
|
if (list->dynamic_state.dirty_flags)
|
|
|
|
d3d12_command_list_update_dynamic_state(list);
|
|
|
|
|
2017-08-11 12:58:04 +01:00
|
|
|
d3d12_command_list_update_descriptors(list, VK_PIPELINE_BIND_POINT_GRAPHICS);
|
2017-06-23 21:24:33 +01:00
|
|
|
|
2018-08-15 12:58:00 +01:00
|
|
|
if (list->current_render_pass != VK_NULL_HANDLE)
|
|
|
|
return true;
|
|
|
|
|
2019-05-15 12:17:56 +01:00
|
|
|
vk_render_pass = list->pso_render_pass;
|
|
|
|
assert(vk_render_pass);
|
2018-08-15 12:58:00 +01:00
|
|
|
|
2020-04-26 16:29:39 +01:00
|
|
|
d3d12_command_list_emit_render_pass_transition(list, VKD3D_RENDER_PASS_TRANSITION_MODE_BEGIN);
|
|
|
|
|
2016-09-28 10:26:17 +01:00
|
|
|
begin_desc.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
|
|
|
|
begin_desc.pNext = NULL;
|
2018-08-15 12:58:00 +01:00
|
|
|
begin_desc.renderPass = vk_render_pass;
|
2016-09-28 10:26:17 +01:00
|
|
|
begin_desc.framebuffer = list->current_framebuffer;
|
|
|
|
begin_desc.renderArea.offset.x = 0;
|
|
|
|
begin_desc.renderArea.offset.y = 0;
|
2017-08-18 16:08:56 +01:00
|
|
|
d3d12_command_list_get_fb_extent(list,
|
2018-06-26 13:41:51 +01:00
|
|
|
&begin_desc.renderArea.extent.width, &begin_desc.renderArea.extent.height, NULL);
|
2016-09-28 10:26:17 +01:00
|
|
|
begin_desc.clearValueCount = 0;
|
|
|
|
begin_desc.pClearValues = NULL;
|
|
|
|
VK_CALL(vkCmdBeginRenderPass(list->vk_command_buffer, &begin_desc, VK_SUBPASS_CONTENTS_INLINE));
|
|
|
|
|
2018-08-15 12:58:00 +01:00
|
|
|
list->current_render_pass = vk_render_pass;
|
|
|
|
|
2019-01-14 16:05:45 +00:00
|
|
|
graphics = &list->state->u.graphics;
|
|
|
|
if (graphics->xfb_enabled)
|
|
|
|
{
|
|
|
|
VK_CALL(vkCmdBeginTransformFeedbackEXT(list->vk_command_buffer, 0, ARRAY_SIZE(list->so_counter_buffers),
|
|
|
|
list->so_counter_buffers, list->so_counter_buffer_offsets));
|
|
|
|
|
|
|
|
list->xfb_enabled = true;
|
|
|
|
}
|
|
|
|
|
2016-11-02 18:08:12 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-12-03 10:31:31 +00:00
|
|
|
static void d3d12_command_list_check_index_buffer_strip_cut_value(struct d3d12_command_list *list)
|
|
|
|
{
|
|
|
|
struct d3d12_graphics_pipeline_state *graphics = &list->state->u.graphics;
|
2020-03-25 08:29:50 +00:00
|
|
|
if (TRACE_ON())
|
2018-12-03 10:31:31 +00:00
|
|
|
{
|
2020-03-25 08:29:50 +00:00
|
|
|
/* In Vulkan, the strip cut value is derived from the index buffer format. */
|
|
|
|
switch (graphics->index_buffer_strip_cut_value)
|
|
|
|
{
|
2018-12-03 10:31:31 +00:00
|
|
|
case D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF:
|
|
|
|
if (list->index_buffer_format != DXGI_FORMAT_R16_UINT)
|
|
|
|
{
|
2020-03-25 08:29:50 +00:00
|
|
|
TRACE("Strip cut value 0xffff is not supported with index buffer format %#x.\n",
|
|
|
|
list->index_buffer_format);
|
2018-12-03 10:31:31 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF:
|
|
|
|
if (list->index_buffer_format != DXGI_FORMAT_R32_UINT)
|
|
|
|
{
|
2020-03-25 08:29:50 +00:00
|
|
|
TRACE("Strip cut value 0xffffffff is not supported with index buffer format %#x.\n",
|
|
|
|
list->index_buffer_format);
|
2018-12-03 10:31:31 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
break;
|
2020-03-25 08:29:50 +00:00
|
|
|
}
|
2018-12-03 10:31:31 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_DrawInstanced(d3d12_command_list_iface *iface,
|
2016-11-02 18:08:12 +00:00
|
|
|
UINT vertex_count_per_instance, UINT instance_count, UINT start_vertex_location,
|
|
|
|
UINT start_instance_location)
|
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2016-11-02 18:08:12 +00:00
|
|
|
const struct vkd3d_vk_device_procs *vk_procs;
|
|
|
|
|
|
|
|
TRACE("iface %p, vertex_count_per_instance %u, instance_count %u, "
|
|
|
|
"start_vertex_location %u, start_instance_location %u.\n",
|
|
|
|
iface, vertex_count_per_instance, instance_count,
|
|
|
|
start_vertex_location, start_instance_location);
|
|
|
|
|
|
|
|
vk_procs = &list->device->vk_procs;
|
|
|
|
|
2018-08-23 17:33:04 +01:00
|
|
|
if (!d3d12_command_list_begin_render_pass(list))
|
2016-11-02 18:08:12 +00:00
|
|
|
{
|
|
|
|
WARN("Failed to begin render pass, ignoring draw call.\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2016-09-28 10:26:17 +01:00
|
|
|
VK_CALL(vkCmdDraw(list->vk_command_buffer, vertex_count_per_instance,
|
|
|
|
instance_count, start_vertex_location, start_instance_location));
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_DrawIndexedInstanced(d3d12_command_list_iface *iface,
|
2016-09-21 15:18:13 +01:00
|
|
|
UINT index_count_per_instance, UINT instance_count, UINT start_vertex_location,
|
|
|
|
INT base_vertex_location, UINT start_instance_location)
|
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2016-11-02 18:08:12 +00:00
|
|
|
const struct vkd3d_vk_device_procs *vk_procs;
|
|
|
|
|
|
|
|
TRACE("iface %p, index_count_per_instance %u, instance_count %u, start_vertex_location %u, "
|
|
|
|
"base_vertex_location %d, start_instance_location %u.\n",
|
2016-09-21 15:18:13 +01:00
|
|
|
iface, index_count_per_instance, instance_count, start_vertex_location,
|
|
|
|
base_vertex_location, start_instance_location);
|
2016-11-02 18:08:12 +00:00
|
|
|
|
2018-08-23 17:33:04 +01:00
|
|
|
if (!d3d12_command_list_begin_render_pass(list))
|
2016-11-02 18:08:12 +00:00
|
|
|
{
|
|
|
|
WARN("Failed to begin render pass, ignoring draw call.\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2018-12-03 10:31:31 +00:00
|
|
|
vk_procs = &list->device->vk_procs;
|
|
|
|
|
|
|
|
d3d12_command_list_check_index_buffer_strip_cut_value(list);
|
2018-12-03 10:31:30 +00:00
|
|
|
|
2016-11-02 18:08:12 +00:00
|
|
|
VK_CALL(vkCmdDrawIndexed(list->vk_command_buffer, index_count_per_instance,
|
|
|
|
instance_count, start_vertex_location, base_vertex_location, start_instance_location));
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_Dispatch(d3d12_command_list_iface *iface,
|
2016-09-21 15:18:13 +01:00
|
|
|
UINT x, UINT y, UINT z)
|
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2017-07-25 11:50:14 +01:00
|
|
|
const struct vkd3d_vk_device_procs *vk_procs;
|
|
|
|
|
|
|
|
TRACE("iface %p, x %u, y %u, z %u.\n", iface, x, y, z);
|
|
|
|
|
2019-11-11 16:03:38 +00:00
|
|
|
if (!d3d12_command_list_update_compute_state(list))
|
2017-07-25 11:50:14 +01:00
|
|
|
{
|
2019-11-11 16:03:38 +00:00
|
|
|
WARN("Failed to update compute state, ignoring dispatch.\n");
|
2017-07-25 11:50:14 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
vk_procs = &list->device->vk_procs;
|
|
|
|
|
|
|
|
VK_CALL(vkCmdDispatch(list->vk_command_buffer, x, y, z));
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_CopyBufferRegion(d3d12_command_list_iface *iface,
|
2016-10-25 12:23:18 +01:00
|
|
|
ID3D12Resource *dst, UINT64 dst_offset, ID3D12Resource *src, UINT64 src_offset, UINT64 byte_count)
|
2016-09-21 15:18:13 +01:00
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2016-10-25 12:23:18 +01:00
|
|
|
struct d3d12_resource *dst_resource, *src_resource;
|
2016-10-05 10:34:07 +01:00
|
|
|
const struct vkd3d_vk_device_procs *vk_procs;
|
|
|
|
VkBufferCopy buffer_copy;
|
|
|
|
|
2016-10-10 10:22:50 +01:00
|
|
|
TRACE("iface %p, dst_resource %p, dst_offset %#"PRIx64", src_resource %p, "
|
|
|
|
"src_offset %#"PRIx64", byte_count %#"PRIx64".\n",
|
2016-10-25 12:23:18 +01:00
|
|
|
iface, dst, dst_offset, src, src_offset, byte_count);
|
2016-10-05 10:34:07 +01:00
|
|
|
|
|
|
|
vk_procs = &list->device->vk_procs;
|
|
|
|
|
2016-10-25 12:23:18 +01:00
|
|
|
dst_resource = unsafe_impl_from_ID3D12Resource(dst);
|
|
|
|
assert(d3d12_resource_is_buffer(dst_resource));
|
|
|
|
src_resource = unsafe_impl_from_ID3D12Resource(src);
|
|
|
|
assert(d3d12_resource_is_buffer(src_resource));
|
2016-10-05 10:34:07 +01:00
|
|
|
|
2016-10-25 12:23:18 +01:00
|
|
|
d3d12_command_list_track_resource_usage(list, dst_resource);
|
|
|
|
d3d12_command_list_track_resource_usage(list, src_resource);
|
|
|
|
|
2018-08-23 17:33:04 +01:00
|
|
|
d3d12_command_list_end_current_render_pass(list);
|
2018-08-15 12:58:00 +01:00
|
|
|
|
2019-12-07 13:41:21 +00:00
|
|
|
buffer_copy.srcOffset = src_offset + src_resource->heap_offset;
|
|
|
|
buffer_copy.dstOffset = dst_offset + dst_resource->heap_offset;
|
2016-10-05 10:34:07 +01:00
|
|
|
buffer_copy.size = byte_count;
|
|
|
|
|
|
|
|
VK_CALL(vkCmdCopyBuffer(list->vk_command_buffer,
|
2016-10-25 12:23:18 +01:00
|
|
|
src_resource->u.vk_buffer, dst_resource->u.vk_buffer, 1, &buffer_copy));
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2017-10-20 17:27:17 +01:00
|
|
|
static void vk_image_subresource_layers_from_d3d12(VkImageSubresourceLayers *subresource,
|
|
|
|
const struct vkd3d_format *format, unsigned int sub_resource_idx, unsigned int miplevel_count)
|
|
|
|
{
|
|
|
|
subresource->aspectMask = format->vk_aspect_mask;
|
|
|
|
subresource->mipLevel = sub_resource_idx % miplevel_count;
|
|
|
|
subresource->baseArrayLayer = sub_resource_idx / miplevel_count;
|
|
|
|
subresource->layerCount = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void vk_extent_3d_from_d3d12_miplevel(VkExtent3D *extent,
|
|
|
|
const D3D12_RESOURCE_DESC *resource_desc, unsigned int miplevel_idx)
|
|
|
|
{
|
|
|
|
extent->width = d3d12_resource_desc_get_width(resource_desc, miplevel_idx);
|
|
|
|
extent->height = d3d12_resource_desc_get_height(resource_desc, miplevel_idx);
|
|
|
|
extent->depth = d3d12_resource_desc_get_depth(resource_desc, miplevel_idx);
|
|
|
|
}
|
|
|
|
|
2019-03-18 09:02:58 +00:00
|
|
|
static void vk_buffer_image_copy_from_d3d12(VkBufferImageCopy *copy,
|
2017-07-14 12:44:35 +01:00
|
|
|
const D3D12_PLACED_SUBRESOURCE_FOOTPRINT *footprint, unsigned int sub_resource_idx,
|
|
|
|
const D3D12_RESOURCE_DESC *image_desc, const struct vkd3d_format *format,
|
2019-03-18 09:02:58 +00:00
|
|
|
const D3D12_BOX *src_box, unsigned int dst_x, unsigned int dst_y, unsigned int dst_z)
|
2017-07-14 12:44:35 +01:00
|
|
|
{
|
2019-03-18 09:02:58 +00:00
|
|
|
copy->bufferOffset = footprint->Offset;
|
|
|
|
if (src_box)
|
|
|
|
{
|
|
|
|
VkDeviceSize row_count = footprint->Footprint.Height / format->block_height;
|
2019-10-18 14:58:56 +01:00
|
|
|
copy->bufferOffset += vkd3d_format_get_data_offset(format, footprint->Footprint.RowPitch,
|
|
|
|
row_count * footprint->Footprint.RowPitch, src_box->left, src_box->top, src_box->front);
|
2019-03-18 09:02:58 +00:00
|
|
|
}
|
|
|
|
copy->bufferRowLength = footprint->Footprint.RowPitch /
|
2017-08-02 12:53:18 +01:00
|
|
|
(format->byte_count * format->block_byte_count) * format->block_width;
|
2019-03-18 09:02:58 +00:00
|
|
|
copy->bufferImageHeight = footprint->Footprint.Height;
|
|
|
|
vk_image_subresource_layers_from_d3d12(©->imageSubresource,
|
2017-10-20 17:27:17 +01:00
|
|
|
format, sub_resource_idx, image_desc->MipLevels);
|
2019-03-18 09:02:58 +00:00
|
|
|
copy->imageOffset.x = dst_x;
|
|
|
|
copy->imageOffset.y = dst_y;
|
|
|
|
copy->imageOffset.z = dst_z;
|
2019-10-24 20:54:02 +01:00
|
|
|
|
|
|
|
vk_extent_3d_from_d3d12_miplevel(©->imageExtent, image_desc,
|
|
|
|
copy->imageSubresource.mipLevel);
|
|
|
|
copy->imageExtent.width -= copy->imageOffset.x;
|
|
|
|
copy->imageExtent.height -= copy->imageOffset.y;
|
|
|
|
copy->imageExtent.depth -= copy->imageOffset.z;
|
|
|
|
|
2019-03-18 09:02:58 +00:00
|
|
|
if (src_box)
|
|
|
|
{
|
2019-10-24 20:54:02 +01:00
|
|
|
copy->imageExtent.width = min(copy->imageExtent.width, src_box->right - src_box->left);
|
|
|
|
copy->imageExtent.height = min(copy->imageExtent.height, src_box->bottom - src_box->top);
|
|
|
|
copy->imageExtent.depth = min(copy->imageExtent.depth, src_box->back - src_box->front);
|
2019-03-18 09:02:58 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2019-10-24 20:54:02 +01:00
|
|
|
copy->imageExtent.width = min(copy->imageExtent.width, footprint->Footprint.Width);
|
|
|
|
copy->imageExtent.height = min(copy->imageExtent.height, footprint->Footprint.Height);
|
|
|
|
copy->imageExtent.depth = min(copy->imageExtent.depth, footprint->Footprint.Depth);
|
2019-03-18 09:02:58 +00:00
|
|
|
}
|
2017-07-14 12:44:35 +01:00
|
|
|
}
|
|
|
|
|
2019-03-18 09:03:00 +00:00
|
|
|
static void vk_image_buffer_copy_from_d3d12(VkBufferImageCopy *copy,
|
|
|
|
const D3D12_PLACED_SUBRESOURCE_FOOTPRINT *footprint, unsigned int sub_resource_idx,
|
|
|
|
const D3D12_RESOURCE_DESC *image_desc, const struct vkd3d_format *format,
|
|
|
|
const D3D12_BOX *src_box, unsigned int dst_x, unsigned int dst_y, unsigned int dst_z)
|
|
|
|
{
|
|
|
|
VkDeviceSize row_count = footprint->Footprint.Height / format->block_height;
|
|
|
|
|
2019-10-18 14:58:56 +01:00
|
|
|
copy->bufferOffset = footprint->Offset + vkd3d_format_get_data_offset(format,
|
|
|
|
footprint->Footprint.RowPitch, row_count * footprint->Footprint.RowPitch, dst_x, dst_y, dst_z);
|
2019-03-18 09:03:00 +00:00
|
|
|
copy->bufferRowLength = footprint->Footprint.RowPitch /
|
|
|
|
(format->byte_count * format->block_byte_count) * format->block_width;
|
|
|
|
copy->bufferImageHeight = footprint->Footprint.Height;
|
|
|
|
vk_image_subresource_layers_from_d3d12(©->imageSubresource,
|
|
|
|
format, sub_resource_idx, image_desc->MipLevels);
|
|
|
|
copy->imageOffset.x = src_box ? src_box->left : 0;
|
|
|
|
copy->imageOffset.y = src_box ? src_box->top : 0;
|
|
|
|
copy->imageOffset.z = src_box ? src_box->front : 0;
|
|
|
|
if (src_box)
|
|
|
|
{
|
|
|
|
copy->imageExtent.width = src_box->right - src_box->left;
|
|
|
|
copy->imageExtent.height = src_box->bottom - src_box->top;
|
|
|
|
copy->imageExtent.depth = src_box->back - src_box->front;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
unsigned int miplevel = copy->imageSubresource.mipLevel;
|
|
|
|
vk_extent_3d_from_d3d12_miplevel(©->imageExtent, image_desc, miplevel);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-08-30 14:40:43 +01:00
|
|
|
static void vk_image_copy_from_d3d12(VkImageCopy *image_copy,
|
|
|
|
unsigned int src_sub_resource_idx, unsigned int dst_sub_resource_idx,
|
|
|
|
const D3D12_RESOURCE_DESC *src_desc, const D3D12_RESOURCE_DESC *dst_desc,
|
|
|
|
const struct vkd3d_format *src_format, const struct vkd3d_format *dst_format,
|
|
|
|
const D3D12_BOX *src_box, unsigned int dst_x, unsigned int dst_y, unsigned int dst_z)
|
|
|
|
{
|
2017-10-20 17:27:17 +01:00
|
|
|
vk_image_subresource_layers_from_d3d12(&image_copy->srcSubresource,
|
|
|
|
src_format, src_sub_resource_idx, src_desc->MipLevels);
|
2017-08-30 14:40:43 +01:00
|
|
|
image_copy->srcOffset.x = src_box ? src_box->left : 0;
|
|
|
|
image_copy->srcOffset.y = src_box ? src_box->top : 0;
|
|
|
|
image_copy->srcOffset.z = src_box ? src_box->front : 0;
|
2017-10-20 17:27:17 +01:00
|
|
|
vk_image_subresource_layers_from_d3d12(&image_copy->dstSubresource,
|
|
|
|
dst_format, dst_sub_resource_idx, dst_desc->MipLevels);
|
2017-08-30 14:40:43 +01:00
|
|
|
image_copy->dstOffset.x = dst_x;
|
|
|
|
image_copy->dstOffset.y = dst_y;
|
|
|
|
image_copy->dstOffset.z = dst_z;
|
2017-08-31 08:42:50 +01:00
|
|
|
if (src_box)
|
|
|
|
{
|
|
|
|
image_copy->extent.width = src_box->right - src_box->left;
|
|
|
|
image_copy->extent.height = src_box->bottom - src_box->top;
|
|
|
|
image_copy->extent.depth = src_box->back - src_box->front;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2017-08-31 08:42:50 +01:00
|
|
|
unsigned int miplevel = image_copy->srcSubresource.mipLevel;
|
2017-10-20 17:27:17 +01:00
|
|
|
vk_extent_3d_from_d3d12_miplevel(&image_copy->extent, src_desc, miplevel);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-22 12:23:40 +01:00
|
|
|
static void d3d12_command_list_copy_image(struct d3d12_command_list *list,
|
|
|
|
struct d3d12_resource *dst_resource, const struct vkd3d_format *dst_format, VkImageLayout dst_layout,
|
|
|
|
struct d3d12_resource *src_resource, const struct vkd3d_format *src_format, VkImageLayout src_layout,
|
|
|
|
const VkImageCopy *region)
|
2017-10-20 17:27:17 +01:00
|
|
|
{
|
|
|
|
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
|
2020-04-22 12:23:40 +01:00
|
|
|
struct vkd3d_texture_view_desc dst_view_desc, src_view_desc;
|
|
|
|
struct vkd3d_copy_image_pipeline_key pipeline_key;
|
|
|
|
struct vkd3d_copy_image_info pipeline_info;
|
|
|
|
VkImageMemoryBarrier vk_image_barriers[2];
|
|
|
|
VkWriteDescriptorSet vk_descriptor_write;
|
|
|
|
struct vkd3d_copy_image_args push_args;
|
|
|
|
struct vkd3d_view *dst_view, *src_view;
|
|
|
|
VkDescriptorImageInfo vk_image_info;
|
|
|
|
VkDescriptorSet vk_descriptor_set;
|
|
|
|
VkRenderPassBeginInfo begin_info;
|
|
|
|
VkImageLayout attachment_layout;
|
|
|
|
VkFramebuffer vk_framebuffer;
|
|
|
|
bool dst_is_depth_stencil;
|
|
|
|
VkViewport viewport;
|
|
|
|
VkExtent3D extent;
|
|
|
|
VkRect2D scissor;
|
|
|
|
unsigned int i;
|
2017-10-20 17:27:17 +01:00
|
|
|
HRESULT hr;
|
|
|
|
|
2020-04-22 12:23:40 +01:00
|
|
|
if (dst_format->vk_aspect_mask == src_format->vk_aspect_mask)
|
|
|
|
{
|
|
|
|
VK_CALL(vkCmdCopyImage(list->vk_command_buffer,
|
|
|
|
src_resource->u.vk_image, src_layout,
|
|
|
|
dst_resource->u.vk_image, dst_layout,
|
|
|
|
1, region));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
dst_is_depth_stencil = !!(dst_format->vk_aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT));
|
|
|
|
|
2020-04-24 14:53:01 +01:00
|
|
|
attachment_layout = dst_is_depth_stencil
|
|
|
|
? VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL
|
|
|
|
: VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
|
|
|
|
|
2020-04-22 12:23:40 +01:00
|
|
|
if (!(dst_format = vkd3d_meta_get_copy_image_attachment_format(&list->device->meta_ops, dst_format, src_format)))
|
|
|
|
{
|
|
|
|
ERR("No attachment format found for source format %u.\n", src_format->vk_format);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
pipeline_key.format = dst_format;
|
|
|
|
pipeline_key.view_type = vkd3d_meta_get_copy_image_view_type(dst_resource->desc.Dimension);
|
|
|
|
pipeline_key.sample_count = vk_samples_from_dxgi_sample_desc(&dst_resource->desc.SampleDesc);
|
2017-10-20 17:27:17 +01:00
|
|
|
|
2020-04-22 12:23:40 +01:00
|
|
|
if (FAILED(hr = vkd3d_meta_get_copy_image_pipeline(&list->device->meta_ops, &pipeline_key, &pipeline_info)))
|
|
|
|
{
|
|
|
|
ERR("Failed to obtain pipeline, format %u, view_type %u, sample_count %u.\n",
|
|
|
|
pipeline_key.format->vk_format, pipeline_key.view_type, pipeline_key.sample_count);
|
|
|
|
return;
|
|
|
|
}
|
2017-10-20 17:27:17 +01:00
|
|
|
|
2020-04-22 12:23:40 +01:00
|
|
|
d3d12_command_list_invalidate_current_pipeline(list);
|
|
|
|
d3d12_command_list_invalidate_root_parameters(list, VK_PIPELINE_BIND_POINT_GRAPHICS, true);
|
|
|
|
|
|
|
|
memset(&dst_view_desc, 0, sizeof(dst_view_desc));
|
|
|
|
dst_view_desc.view_type = pipeline_key.view_type;
|
2020-04-24 14:53:01 +01:00
|
|
|
dst_view_desc.layout = d3d12_resource_pick_layout(dst_resource, attachment_layout);
|
2020-04-22 12:23:40 +01:00
|
|
|
dst_view_desc.format = dst_format;
|
|
|
|
dst_view_desc.miplevel_idx = region->dstSubresource.mipLevel;
|
|
|
|
dst_view_desc.miplevel_count = 1;
|
|
|
|
dst_view_desc.layer_idx = region->dstSubresource.baseArrayLayer;
|
|
|
|
dst_view_desc.layer_count = region->dstSubresource.layerCount;
|
|
|
|
dst_view_desc.allowed_swizzle = false;
|
|
|
|
|
|
|
|
memset(&src_view_desc, 0, sizeof(src_view_desc));
|
|
|
|
src_view_desc.view_type = pipeline_key.view_type;
|
2020-04-24 14:53:01 +01:00
|
|
|
src_view_desc.layout = d3d12_resource_pick_layout(src_resource, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
2020-04-22 12:23:40 +01:00
|
|
|
src_view_desc.format = src_format;
|
|
|
|
src_view_desc.miplevel_idx = region->srcSubresource.mipLevel;
|
|
|
|
src_view_desc.miplevel_count = 1;
|
|
|
|
src_view_desc.layer_idx = region->srcSubresource.baseArrayLayer;
|
|
|
|
src_view_desc.layer_count = region->srcSubresource.layerCount;
|
|
|
|
src_view_desc.allowed_swizzle = false;
|
|
|
|
|
|
|
|
dst_view = src_view = NULL;
|
|
|
|
|
|
|
|
if (!vkd3d_create_texture_view(list->device, dst_resource->u.vk_image, &dst_view_desc, &dst_view) ||
|
|
|
|
!vkd3d_create_texture_view(list->device, src_resource->u.vk_image, &src_view_desc, &src_view))
|
|
|
|
{
|
|
|
|
ERR("Failed to create image views.\n");
|
|
|
|
goto cleanup;
|
|
|
|
}
|
2017-10-20 17:27:17 +01:00
|
|
|
|
2020-04-22 12:23:40 +01:00
|
|
|
if (!d3d12_command_allocator_add_view(list->allocator, dst_view) ||
|
|
|
|
!d3d12_command_allocator_add_view(list->allocator, src_view))
|
|
|
|
{
|
|
|
|
ERR("Failed to add views.\n");
|
|
|
|
goto cleanup;
|
|
|
|
}
|
2017-10-20 17:27:17 +01:00
|
|
|
|
2020-04-22 12:23:40 +01:00
|
|
|
extent.width = d3d12_resource_desc_get_width(&dst_resource->desc, dst_view_desc.miplevel_idx);
|
|
|
|
extent.height = d3d12_resource_desc_get_height(&dst_resource->desc, dst_view_desc.miplevel_idx);
|
|
|
|
extent.depth = dst_view_desc.layer_count;
|
2017-10-20 17:27:17 +01:00
|
|
|
|
2020-04-22 12:23:40 +01:00
|
|
|
if (!d3d12_command_list_create_framebuffer(list, pipeline_info.vk_render_pass, 1, &dst_view->u.vk_image_view, extent, &vk_framebuffer))
|
|
|
|
{
|
|
|
|
ERR("Failed to create framebuffer.\n");
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
|
|
|
|
begin_info.pNext = NULL;
|
|
|
|
begin_info.renderPass = pipeline_info.vk_render_pass;
|
|
|
|
begin_info.framebuffer = vk_framebuffer;
|
|
|
|
begin_info.clearValueCount = 0;
|
|
|
|
begin_info.pClearValues = NULL;
|
|
|
|
begin_info.renderArea.offset.x = 0;
|
|
|
|
begin_info.renderArea.offset.y = 0;
|
|
|
|
begin_info.renderArea.extent.width = extent.width;
|
|
|
|
begin_info.renderArea.extent.height = extent.height;
|
|
|
|
|
|
|
|
viewport.x = (float)region->dstOffset.x;
|
|
|
|
viewport.y = (float)region->dstOffset.y;
|
|
|
|
viewport.width = (float)region->extent.width;
|
|
|
|
viewport.height = (float)region->extent.height;
|
|
|
|
viewport.minDepth = 0.0f;
|
|
|
|
viewport.maxDepth = 1.0f;
|
|
|
|
|
|
|
|
scissor.offset.x = region->dstOffset.x;
|
|
|
|
scissor.offset.y = region->dstOffset.y;
|
|
|
|
scissor.extent.width = region->extent.width;
|
|
|
|
scissor.extent.height = region->extent.height;
|
|
|
|
|
|
|
|
push_args.offset.x = region->srcOffset.x - region->dstOffset.x;
|
|
|
|
push_args.offset.y = region->srcOffset.y - region->dstOffset.y;
|
|
|
|
|
|
|
|
vk_descriptor_set = d3d12_command_allocator_allocate_descriptor_set(
|
|
|
|
list->allocator, pipeline_info.vk_set_layout,
|
|
|
|
VKD3D_DESCRIPTOR_POOL_TYPE_STATIC);
|
|
|
|
|
|
|
|
if (!vk_descriptor_set)
|
|
|
|
{
|
|
|
|
ERR("Failed to allocate descriptor set.\n");
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
vk_image_info.sampler = VK_NULL_HANDLE;
|
|
|
|
vk_image_info.imageView = src_view->u.vk_image_view;
|
2020-04-24 14:57:59 +01:00
|
|
|
vk_image_info.imageLayout = src_view_desc.layout;
|
2020-04-22 12:23:40 +01:00
|
|
|
|
|
|
|
vk_descriptor_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
|
|
|
vk_descriptor_write.pNext = NULL;
|
|
|
|
vk_descriptor_write.dstSet = vk_descriptor_set;
|
|
|
|
vk_descriptor_write.dstBinding = 0;
|
|
|
|
vk_descriptor_write.dstArrayElement = 0;
|
|
|
|
vk_descriptor_write.descriptorCount = 1;
|
|
|
|
vk_descriptor_write.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
|
|
|
|
vk_descriptor_write.pImageInfo = &vk_image_info;
|
|
|
|
vk_descriptor_write.pBufferInfo = NULL;
|
|
|
|
vk_descriptor_write.pTexelBufferView = NULL;
|
|
|
|
|
|
|
|
VK_CALL(vkUpdateDescriptorSets(list->device->vk_device, 1, &vk_descriptor_write, 0, NULL));
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(vk_image_barriers); i++)
|
|
|
|
{
|
|
|
|
vk_image_barriers[i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
|
|
|
|
vk_image_barriers[i].pNext = NULL;
|
|
|
|
vk_image_barriers[i].srcAccessMask = 0;
|
|
|
|
vk_image_barriers[i].dstAccessMask = 0;
|
|
|
|
vk_image_barriers[i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
|
|
|
vk_image_barriers[i].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
|
|
|
}
|
|
|
|
|
|
|
|
vk_image_barriers[0].oldLayout = dst_layout;
|
2020-04-24 14:57:59 +01:00
|
|
|
vk_image_barriers[0].newLayout = dst_view_desc.layout;
|
2020-04-22 12:23:40 +01:00
|
|
|
vk_image_barriers[0].image = dst_resource->u.vk_image;
|
|
|
|
vk_image_barriers[0].subresourceRange = vk_subresource_range_from_layers(®ion->dstSubresource);
|
|
|
|
vk_image_barriers[0].dstAccessMask = dst_is_depth_stencil
|
|
|
|
? VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT
|
|
|
|
: VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
|
|
|
|
|
|
|
if (region->extent.width == extent.width && region->extent.height == extent.height)
|
|
|
|
vk_image_barriers[0].oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
2017-10-20 17:27:17 +01:00
|
|
|
|
2020-04-22 12:23:40 +01:00
|
|
|
vk_image_barriers[1].oldLayout = src_layout;
|
2020-04-24 14:57:59 +01:00
|
|
|
vk_image_barriers[1].newLayout = src_view_desc.layout;
|
2020-04-22 12:23:40 +01:00
|
|
|
vk_image_barriers[1].dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
|
|
|
|
vk_image_barriers[1].image = src_resource->u.vk_image;
|
|
|
|
vk_image_barriers[1].subresourceRange = vk_subresource_range_from_layers(®ion->srcSubresource);
|
|
|
|
|
|
|
|
VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer,
|
|
|
|
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
|
|
|
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
|
|
|
|
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
|
|
|
|
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
|
|
|
|
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
|
|
|
0, 0, NULL, 0, NULL, ARRAY_SIZE(vk_image_barriers),
|
|
|
|
vk_image_barriers));
|
|
|
|
|
|
|
|
VK_CALL(vkCmdBeginRenderPass(list->vk_command_buffer, &begin_info, VK_SUBPASS_CONTENTS_INLINE));
|
|
|
|
VK_CALL(vkCmdBindPipeline(list->vk_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_info.vk_pipeline));
|
|
|
|
VK_CALL(vkCmdSetViewport(list->vk_command_buffer, 0, 1, &viewport));
|
|
|
|
VK_CALL(vkCmdSetScissor(list->vk_command_buffer, 0, 1, &scissor));
|
|
|
|
VK_CALL(vkCmdBindDescriptorSets(list->vk_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
|
|
|
|
pipeline_info.vk_pipeline_layout, 0, 1, &vk_descriptor_set, 0, NULL));
|
|
|
|
VK_CALL(vkCmdPushConstants(list->vk_command_buffer, pipeline_info.vk_pipeline_layout,
|
|
|
|
VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(push_args), &push_args));
|
|
|
|
VK_CALL(vkCmdDraw(list->vk_command_buffer, 3, region->dstSubresource.layerCount, 0, 0));
|
|
|
|
VK_CALL(vkCmdEndRenderPass(list->vk_command_buffer));
|
|
|
|
|
2020-04-24 14:57:59 +01:00
|
|
|
vk_image_barriers[0].oldLayout = dst_view_desc.layout;
|
2020-04-22 12:23:40 +01:00
|
|
|
vk_image_barriers[0].newLayout = dst_layout;
|
|
|
|
vk_image_barriers[0].srcAccessMask = dst_is_depth_stencil
|
|
|
|
? VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT
|
|
|
|
: VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
|
|
|
vk_image_barriers[0].dstAccessMask = 0;
|
|
|
|
|
|
|
|
vk_image_barriers[1].oldLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
|
|
|
vk_image_barriers[1].newLayout = src_layout;
|
|
|
|
vk_image_barriers[1].dstAccessMask = 0;
|
|
|
|
|
|
|
|
VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer,
|
|
|
|
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
|
|
|
|
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT |
|
|
|
|
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
|
|
|
|
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
|
|
|
|
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
|
|
|
0, 0, NULL, 0, NULL, ARRAY_SIZE(vk_image_barriers),
|
|
|
|
vk_image_barriers));
|
|
|
|
|
|
|
|
cleanup:
|
|
|
|
if (dst_view)
|
|
|
|
vkd3d_view_decref(dst_view, list->device);
|
|
|
|
if (src_view)
|
|
|
|
vkd3d_view_decref(src_view, list->device);
|
|
|
|
}
|
2017-08-30 14:40:43 +01:00
|
|
|
}
|
|
|
|
|
2019-03-18 09:03:02 +00:00
|
|
|
static bool validate_d3d12_box(const D3D12_BOX *box)
|
|
|
|
{
|
|
|
|
return box->right > box->left
|
|
|
|
&& box->bottom > box->top
|
|
|
|
&& box->back > box->front;
|
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(d3d12_command_list_iface *iface,
|
2016-09-21 15:18:13 +01:00
|
|
|
const D3D12_TEXTURE_COPY_LOCATION *dst, UINT dst_x, UINT dst_y, UINT dst_z,
|
|
|
|
const D3D12_TEXTURE_COPY_LOCATION *src, const D3D12_BOX *src_box)
|
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2016-10-08 13:31:57 +01:00
|
|
|
struct d3d12_resource *dst_resource, *src_resource;
|
2017-09-21 14:12:13 +01:00
|
|
|
const struct vkd3d_format *src_format, *dst_format;
|
2016-10-08 13:31:57 +01:00
|
|
|
const struct vkd3d_vk_device_procs *vk_procs;
|
|
|
|
VkBufferImageCopy buffer_image_copy;
|
2017-08-30 14:40:43 +01:00
|
|
|
VkImageCopy image_copy;
|
2016-10-08 13:31:57 +01:00
|
|
|
|
|
|
|
TRACE("iface %p, dst %p, dst_x %u, dst_y %u, dst_z %u, src %p, src_box %p.\n",
|
2016-09-21 15:18:13 +01:00
|
|
|
iface, dst, dst_x, dst_y, dst_z, src, src_box);
|
2016-10-08 13:31:57 +01:00
|
|
|
|
2019-03-18 09:03:02 +00:00
|
|
|
if (src_box && !validate_d3d12_box(src_box))
|
|
|
|
{
|
|
|
|
WARN("Empty box %s.\n", debug_d3d12_box(src_box));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2016-10-08 13:31:57 +01:00
|
|
|
vk_procs = &list->device->vk_procs;
|
|
|
|
|
|
|
|
dst_resource = unsafe_impl_from_ID3D12Resource(dst->pResource);
|
|
|
|
src_resource = unsafe_impl_from_ID3D12Resource(src->pResource);
|
|
|
|
|
2016-10-25 12:23:18 +01:00
|
|
|
d3d12_command_list_track_resource_usage(list, dst_resource);
|
2017-07-28 11:56:18 +01:00
|
|
|
d3d12_command_list_track_resource_usage(list, src_resource);
|
2016-10-25 12:23:18 +01:00
|
|
|
|
2018-08-23 17:33:04 +01:00
|
|
|
d3d12_command_list_end_current_render_pass(list);
|
2018-08-15 12:58:00 +01:00
|
|
|
|
2016-10-08 13:31:57 +01:00
|
|
|
if (src->Type == D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX
|
|
|
|
&& dst->Type == D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT)
|
|
|
|
{
|
2017-08-16 16:38:33 +01:00
|
|
|
assert(d3d12_resource_is_buffer(dst_resource));
|
2017-08-30 17:31:52 +01:00
|
|
|
assert(d3d12_resource_is_texture(src_resource));
|
2017-08-16 16:38:33 +01:00
|
|
|
|
2019-05-07 14:37:01 +01:00
|
|
|
if (!(dst_format = vkd3d_format_from_d3d12_resource_desc(list->device,
|
|
|
|
&src_resource->desc, dst->u.PlacedFootprint.Footprint.Format)))
|
2016-10-08 13:31:57 +01:00
|
|
|
{
|
|
|
|
WARN("Invalid format %#x.\n", dst->u.PlacedFootprint.Footprint.Format);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2019-05-07 14:37:03 +01:00
|
|
|
if (dst_format->is_emulated)
|
|
|
|
{
|
|
|
|
FIXME("Format %#x is not supported yet.\n", dst_format->dxgi_format);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2017-08-30 14:40:43 +01:00
|
|
|
if ((dst_format->vk_aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT)
|
|
|
|
&& (dst_format->vk_aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT))
|
|
|
|
FIXME("Depth-stencil format %#x not fully supported yet.\n", dst_format->dxgi_format);
|
2016-10-08 13:31:57 +01:00
|
|
|
|
2019-03-18 09:03:00 +00:00
|
|
|
vk_image_buffer_copy_from_d3d12(&buffer_image_copy, &dst->u.PlacedFootprint,
|
2019-03-18 09:02:58 +00:00
|
|
|
src->u.SubresourceIndex, &src_resource->desc, dst_format, src_box, dst_x, dst_y, dst_z);
|
2019-12-07 13:41:21 +00:00
|
|
|
buffer_image_copy.bufferOffset += dst_resource->heap_offset;
|
2016-10-08 13:31:57 +01:00
|
|
|
VK_CALL(vkCmdCopyImageToBuffer(list->vk_command_buffer,
|
|
|
|
src_resource->u.vk_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
|
|
|
|
dst_resource->u.vk_buffer, 1, &buffer_image_copy));
|
|
|
|
}
|
2017-07-14 12:44:35 +01:00
|
|
|
else if (src->Type == D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT
|
|
|
|
&& dst->Type == D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX)
|
|
|
|
{
|
2017-08-30 17:31:52 +01:00
|
|
|
assert(d3d12_resource_is_texture(dst_resource));
|
2017-08-16 16:38:33 +01:00
|
|
|
assert(d3d12_resource_is_buffer(src_resource));
|
|
|
|
|
2019-05-07 14:37:01 +01:00
|
|
|
if (!(src_format = vkd3d_format_from_d3d12_resource_desc(list->device,
|
|
|
|
&dst_resource->desc, src->u.PlacedFootprint.Footprint.Format)))
|
2017-07-14 12:44:35 +01:00
|
|
|
{
|
|
|
|
WARN("Invalid format %#x.\n", src->u.PlacedFootprint.Footprint.Format);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2019-05-07 14:37:03 +01:00
|
|
|
if (src_format->is_emulated)
|
|
|
|
{
|
|
|
|
FIXME("Format %#x is not supported yet.\n", src_format->dxgi_format);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2017-08-30 14:40:43 +01:00
|
|
|
if ((src_format->vk_aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT)
|
|
|
|
&& (src_format->vk_aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT))
|
|
|
|
FIXME("Depth-stencil format %#x not fully supported yet.\n", src_format->dxgi_format);
|
2017-07-14 12:44:35 +01:00
|
|
|
|
|
|
|
vk_buffer_image_copy_from_d3d12(&buffer_image_copy, &src->u.PlacedFootprint,
|
2019-03-18 09:02:58 +00:00
|
|
|
dst->u.SubresourceIndex, &dst_resource->desc, src_format, src_box, dst_x, dst_y, dst_z);
|
2019-12-07 13:41:21 +00:00
|
|
|
buffer_image_copy.bufferOffset += src_resource->heap_offset;
|
2017-07-14 12:44:35 +01:00
|
|
|
VK_CALL(vkCmdCopyBufferToImage(list->vk_command_buffer,
|
|
|
|
src_resource->u.vk_buffer, dst_resource->u.vk_image,
|
|
|
|
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &buffer_image_copy));
|
|
|
|
}
|
2017-08-30 14:40:43 +01:00
|
|
|
else if (src->Type == D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX
|
|
|
|
&& dst->Type == D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX)
|
|
|
|
{
|
2017-08-30 17:31:52 +01:00
|
|
|
assert(d3d12_resource_is_texture(dst_resource));
|
|
|
|
assert(d3d12_resource_is_texture(src_resource));
|
2017-08-30 14:40:43 +01:00
|
|
|
|
2019-05-07 14:37:01 +01:00
|
|
|
if (!(dst_format = vkd3d_format_from_d3d12_resource_desc(list->device,
|
|
|
|
&dst_resource->desc, DXGI_FORMAT_UNKNOWN)))
|
2017-08-30 14:40:43 +01:00
|
|
|
{
|
|
|
|
WARN("Invalid format %#x.\n", dst_resource->desc.Format);
|
|
|
|
return;
|
|
|
|
}
|
2019-05-07 14:37:01 +01:00
|
|
|
if (!(src_format = vkd3d_format_from_d3d12_resource_desc(list->device,
|
|
|
|
&src_resource->desc, DXGI_FORMAT_UNKNOWN)))
|
2017-08-30 14:40:43 +01:00
|
|
|
{
|
|
|
|
WARN("Invalid format %#x.\n", src_resource->desc.Format);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((dst_format->vk_aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT)
|
|
|
|
&& (dst_format->vk_aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT))
|
|
|
|
FIXME("Depth-stencil format %#x not fully supported yet.\n", dst_format->dxgi_format);
|
|
|
|
if ((src_format->vk_aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT)
|
|
|
|
&& (src_format->vk_aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT))
|
|
|
|
FIXME("Depth-stencil format %#x not fully supported yet.\n", src_format->dxgi_format);
|
|
|
|
|
|
|
|
vk_image_copy_from_d3d12(&image_copy, src->u.SubresourceIndex, dst->u.SubresourceIndex,
|
|
|
|
&src_resource->desc, &dst_resource->desc, src_format, dst_format,
|
|
|
|
src_box, dst_x, dst_y, dst_z);
|
2020-04-22 12:23:40 +01:00
|
|
|
|
|
|
|
d3d12_command_list_copy_image(list,
|
|
|
|
dst_resource, dst_format, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
|
|
|
src_resource, src_format, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
|
|
|
|
&image_copy);
|
2017-08-30 14:40:43 +01:00
|
|
|
}
|
2016-10-08 13:31:57 +01:00
|
|
|
else
|
|
|
|
{
|
|
|
|
FIXME("Copy type %#x -> %#x not implemented.\n", src->Type, dst->Type);
|
|
|
|
}
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(d3d12_command_list_iface *iface,
|
2017-09-14 13:57:09 +01:00
|
|
|
ID3D12Resource *dst, ID3D12Resource *src)
|
2016-09-21 15:18:13 +01:00
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2017-09-14 13:57:09 +01:00
|
|
|
struct d3d12_resource *dst_resource, *src_resource;
|
2018-03-28 14:03:20 +01:00
|
|
|
const struct vkd3d_format *src_format, *dst_format;
|
2017-09-14 13:57:09 +01:00
|
|
|
const struct vkd3d_vk_device_procs *vk_procs;
|
2018-03-28 14:03:20 +01:00
|
|
|
VkBufferCopy vk_buffer_copy;
|
|
|
|
VkImageCopy vk_image_copy;
|
|
|
|
unsigned int layer_count;
|
|
|
|
unsigned int i;
|
2017-09-14 13:57:09 +01:00
|
|
|
|
|
|
|
TRACE("iface %p, dst_resource %p, src_resource %p.\n", iface, dst, src);
|
|
|
|
|
|
|
|
vk_procs = &list->device->vk_procs;
|
|
|
|
|
|
|
|
dst_resource = unsafe_impl_from_ID3D12Resource(dst);
|
|
|
|
src_resource = unsafe_impl_from_ID3D12Resource(src);
|
|
|
|
|
2018-03-28 14:03:20 +01:00
|
|
|
d3d12_command_list_track_resource_usage(list, dst_resource);
|
|
|
|
d3d12_command_list_track_resource_usage(list, src_resource);
|
|
|
|
|
2018-08-23 17:33:04 +01:00
|
|
|
d3d12_command_list_end_current_render_pass(list);
|
2018-08-15 12:58:00 +01:00
|
|
|
|
2017-09-14 13:57:09 +01:00
|
|
|
if (d3d12_resource_is_buffer(dst_resource))
|
|
|
|
{
|
|
|
|
assert(d3d12_resource_is_buffer(src_resource));
|
|
|
|
assert(src_resource->desc.Width == dst_resource->desc.Width);
|
|
|
|
|
2019-12-07 13:41:21 +00:00
|
|
|
vk_buffer_copy.srcOffset = src_resource->heap_offset;
|
|
|
|
vk_buffer_copy.dstOffset = dst_resource->heap_offset;
|
2017-09-14 13:57:09 +01:00
|
|
|
vk_buffer_copy.size = dst_resource->desc.Width;
|
|
|
|
VK_CALL(vkCmdCopyBuffer(list->vk_command_buffer,
|
|
|
|
src_resource->u.vk_buffer, dst_resource->u.vk_buffer, 1, &vk_buffer_copy));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2019-05-07 14:37:01 +01:00
|
|
|
if (!(dst_format = vkd3d_format_from_d3d12_resource_desc(list->device,
|
|
|
|
&dst_resource->desc, DXGI_FORMAT_UNKNOWN)))
|
2018-03-28 14:03:20 +01:00
|
|
|
{
|
|
|
|
WARN("Invalid format %#x.\n", dst_resource->desc.Format);
|
|
|
|
return;
|
|
|
|
}
|
2019-05-07 14:37:01 +01:00
|
|
|
if (!(src_format = vkd3d_format_from_d3d12_resource_desc(list->device,
|
|
|
|
&src_resource->desc, DXGI_FORMAT_UNKNOWN)))
|
2018-03-28 14:03:20 +01:00
|
|
|
{
|
|
|
|
WARN("Invalid format %#x.\n", src_resource->desc.Format);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
layer_count = d3d12_resource_desc_get_layer_count(&dst_resource->desc);
|
|
|
|
|
|
|
|
assert(d3d12_resource_is_texture(dst_resource));
|
|
|
|
assert(d3d12_resource_is_texture(src_resource));
|
|
|
|
assert(dst_resource->desc.MipLevels == src_resource->desc.MipLevels);
|
|
|
|
assert(layer_count == d3d12_resource_desc_get_layer_count(&src_resource->desc));
|
|
|
|
|
|
|
|
for (i = 0; i < dst_resource->desc.MipLevels; ++i)
|
|
|
|
{
|
|
|
|
vk_image_copy_from_d3d12(&vk_image_copy, i, i,
|
|
|
|
&src_resource->desc, &dst_resource->desc, src_format, dst_format, NULL, 0, 0, 0);
|
|
|
|
vk_image_copy.dstSubresource.layerCount = layer_count;
|
|
|
|
vk_image_copy.srcSubresource.layerCount = layer_count;
|
2020-04-22 12:25:46 +01:00
|
|
|
|
|
|
|
d3d12_command_list_copy_image(list,
|
|
|
|
dst_resource, dst_format, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
|
|
|
src_resource, src_format, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
|
|
|
|
&vk_image_copy);
|
2018-03-28 14:03:20 +01:00
|
|
|
}
|
2017-09-14 13:57:09 +01:00
|
|
|
}
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_CopyTiles(d3d12_command_list_iface *iface,
|
2016-09-21 15:18:13 +01:00
|
|
|
ID3D12Resource *tiled_resource, const D3D12_TILED_RESOURCE_COORDINATE *tile_region_start_coordinate,
|
|
|
|
const D3D12_TILE_REGION_SIZE *tile_region_size, ID3D12Resource *buffer, UINT64 buffer_offset,
|
|
|
|
D3D12_TILE_COPY_FLAGS flags)
|
|
|
|
{
|
|
|
|
FIXME("iface %p, tiled_resource %p, tile_region_start_coordinate %p, tile_region_size %p, "
|
2016-10-10 10:22:50 +01:00
|
|
|
"buffer %p, buffer_offset %#"PRIx64", flags %#x stub!\n",
|
2016-09-21 15:18:13 +01:00
|
|
|
iface, tiled_resource, tile_region_start_coordinate, tile_region_size,
|
2016-10-10 10:22:50 +01:00
|
|
|
buffer, buffer_offset, flags);
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresource(d3d12_command_list_iface *iface,
|
2018-10-25 10:24:01 +01:00
|
|
|
ID3D12Resource *dst, UINT dst_sub_resource_idx,
|
|
|
|
ID3D12Resource *src, UINT src_sub_resource_idx, DXGI_FORMAT format)
|
2016-09-21 15:18:13 +01:00
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2019-02-27 17:03:20 +00:00
|
|
|
const struct vkd3d_format *src_format, *dst_format, *vk_format;
|
2018-10-25 10:24:01 +01:00
|
|
|
struct d3d12_resource *dst_resource, *src_resource;
|
|
|
|
const struct vkd3d_vk_device_procs *vk_procs;
|
2019-05-07 14:37:01 +01:00
|
|
|
const struct d3d12_device *device;
|
2018-10-25 10:24:01 +01:00
|
|
|
VkImageResolve vk_image_resolve;
|
|
|
|
|
|
|
|
TRACE("iface %p, dst_resource %p, dst_sub_resource_idx %u, src_resource %p, src_sub_resource_idx %u, "
|
|
|
|
"format %#x.\n", iface, dst, dst_sub_resource_idx, src, src_sub_resource_idx, format);
|
|
|
|
|
2019-05-07 14:37:01 +01:00
|
|
|
device = list->device;
|
|
|
|
vk_procs = &device->vk_procs;
|
2018-10-25 10:24:01 +01:00
|
|
|
|
|
|
|
dst_resource = unsafe_impl_from_ID3D12Resource(dst);
|
|
|
|
src_resource = unsafe_impl_from_ID3D12Resource(src);
|
|
|
|
|
|
|
|
assert(d3d12_resource_is_texture(dst_resource));
|
|
|
|
assert(d3d12_resource_is_texture(src_resource));
|
|
|
|
|
|
|
|
d3d12_command_list_track_resource_usage(list, dst_resource);
|
|
|
|
d3d12_command_list_track_resource_usage(list, src_resource);
|
|
|
|
|
|
|
|
d3d12_command_list_end_current_render_pass(list);
|
|
|
|
|
2019-05-07 14:37:01 +01:00
|
|
|
if (!(dst_format = vkd3d_format_from_d3d12_resource_desc(device, &dst_resource->desc, DXGI_FORMAT_UNKNOWN)))
|
2018-10-25 10:24:01 +01:00
|
|
|
{
|
|
|
|
WARN("Invalid format %#x.\n", dst_resource->desc.Format);
|
|
|
|
return;
|
|
|
|
}
|
2019-05-07 14:37:01 +01:00
|
|
|
if (!(src_format = vkd3d_format_from_d3d12_resource_desc(device, &src_resource->desc, DXGI_FORMAT_UNKNOWN)))
|
2018-10-25 10:24:01 +01:00
|
|
|
{
|
|
|
|
WARN("Invalid format %#x.\n", src_resource->desc.Format);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2019-07-30 10:40:32 +01:00
|
|
|
if (dst_format->type == VKD3D_FORMAT_TYPE_TYPELESS || src_format->type == VKD3D_FORMAT_TYPE_TYPELESS)
|
2019-02-27 17:03:20 +00:00
|
|
|
{
|
2019-05-07 14:37:01 +01:00
|
|
|
if (!(vk_format = vkd3d_format_from_d3d12_resource_desc(device, &dst_resource->desc, format)))
|
2019-02-27 17:03:20 +00:00
|
|
|
{
|
|
|
|
WARN("Invalid format %#x.\n", format);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (dst_format->vk_format != src_format->vk_format || dst_format->vk_format != vk_format->vk_format)
|
|
|
|
{
|
|
|
|
FIXME("Not implemented for typeless resources.\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-10-25 10:24:01 +01:00
|
|
|
/* Resolve of depth/stencil images is not supported in Vulkan. */
|
|
|
|
if ((dst_format->vk_aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))
|
|
|
|
|| (src_format->vk_aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)))
|
|
|
|
{
|
|
|
|
FIXME("Resolve of depth/stencil images is not implemented yet.\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
vk_image_subresource_layers_from_d3d12(&vk_image_resolve.srcSubresource,
|
|
|
|
src_format, src_sub_resource_idx, src_resource->desc.MipLevels);
|
|
|
|
memset(&vk_image_resolve.srcOffset, 0, sizeof(vk_image_resolve.srcOffset));
|
|
|
|
vk_image_subresource_layers_from_d3d12(&vk_image_resolve.dstSubresource,
|
|
|
|
dst_format, dst_sub_resource_idx, dst_resource->desc.MipLevels);
|
|
|
|
memset(&vk_image_resolve.dstOffset, 0, sizeof(vk_image_resolve.dstOffset));
|
|
|
|
vk_extent_3d_from_d3d12_miplevel(&vk_image_resolve.extent,
|
|
|
|
&dst_resource->desc, vk_image_resolve.dstSubresource.mipLevel);
|
|
|
|
|
|
|
|
VK_CALL(vkCmdResolveImage(list->vk_command_buffer, src_resource->u.vk_image,
|
|
|
|
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_resource->u.vk_image,
|
|
|
|
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &vk_image_resolve));
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_IASetPrimitiveTopology(d3d12_command_list_iface *iface,
|
2016-10-06 20:46:33 +01:00
|
|
|
D3D12_PRIMITIVE_TOPOLOGY topology)
|
2016-09-21 15:18:13 +01:00
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2020-04-01 13:30:22 +01:00
|
|
|
struct vkd3d_dynamic_state *dyn_state = &list->dynamic_state;
|
2016-10-06 20:46:33 +01:00
|
|
|
|
|
|
|
TRACE("iface %p, topology %#x.\n", iface, topology);
|
|
|
|
|
2018-10-08 14:40:17 +01:00
|
|
|
if (topology == D3D_PRIMITIVE_TOPOLOGY_UNDEFINED)
|
|
|
|
{
|
|
|
|
WARN("Ignoring D3D_PRIMITIVE_TOPOLOGY_UNDEFINED.\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2020-04-01 13:30:22 +01:00
|
|
|
if (dyn_state->primitive_topology == topology)
|
2019-03-19 13:40:58 +00:00
|
|
|
return;
|
|
|
|
|
2020-04-01 13:30:22 +01:00
|
|
|
dyn_state->primitive_topology = topology;
|
2016-09-28 10:26:17 +01:00
|
|
|
d3d12_command_list_invalidate_current_pipeline(list);
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(d3d12_command_list_iface *iface,
|
2016-09-21 15:18:13 +01:00
|
|
|
UINT viewport_count, const D3D12_VIEWPORT *viewports)
|
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2020-04-01 12:44:07 +01:00
|
|
|
struct vkd3d_dynamic_state *dyn_state = &list->dynamic_state;
|
2017-06-21 21:00:19 +01:00
|
|
|
unsigned int i;
|
2016-09-28 12:23:14 +01:00
|
|
|
|
|
|
|
TRACE("iface %p, viewport_count %u, viewports %p.\n", iface, viewport_count, viewports);
|
|
|
|
|
2020-04-01 13:41:45 +01:00
|
|
|
if (viewport_count > ARRAY_SIZE(dyn_state->viewports))
|
2019-06-11 09:13:33 +01:00
|
|
|
{
|
2020-04-01 13:41:45 +01:00
|
|
|
FIXME_ONCE("Viewport count %u > D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE.\n", viewport_count);
|
|
|
|
viewport_count = ARRAY_SIZE(dyn_state->viewports);
|
2019-06-11 09:13:33 +01:00
|
|
|
}
|
2017-06-21 21:00:19 +01:00
|
|
|
|
|
|
|
for (i = 0; i < viewport_count; ++i)
|
|
|
|
{
|
2020-04-01 12:44:07 +01:00
|
|
|
if (!viewports[i].Width || !viewports[i].Height)
|
2019-02-14 11:22:34 +00:00
|
|
|
{
|
2019-06-12 13:08:01 +01:00
|
|
|
FIXME_ONCE("Invalid viewport %u, ignoring RSSetViewports().\n", i);
|
2019-02-14 11:22:34 +00:00
|
|
|
return;
|
|
|
|
}
|
2017-06-21 21:00:19 +01:00
|
|
|
}
|
|
|
|
|
2020-04-01 12:44:07 +01:00
|
|
|
for (i = 0; i < viewport_count; ++i)
|
|
|
|
{
|
|
|
|
VkViewport *vk_viewport = &dyn_state->viewports[i];
|
|
|
|
vk_viewport->x = viewports[i].TopLeftX;
|
|
|
|
vk_viewport->y = viewports[i].TopLeftY + viewports[i].Height;
|
|
|
|
vk_viewport->width = viewports[i].Width;
|
|
|
|
vk_viewport->height = -viewports[i].Height;
|
|
|
|
vk_viewport->minDepth = viewports[i].MinDepth;
|
|
|
|
vk_viewport->maxDepth = viewports[i].MaxDepth;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (dyn_state->viewport_count != viewport_count)
|
|
|
|
{
|
|
|
|
dyn_state->viewport_count = viewport_count;
|
|
|
|
dyn_state->dirty_flags |= VKD3D_DYNAMIC_STATE_SCISSOR;
|
2020-04-01 13:41:45 +01:00
|
|
|
d3d12_command_list_invalidate_current_pipeline(list);
|
2020-04-01 12:44:07 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
dyn_state->dirty_flags |= VKD3D_DYNAMIC_STATE_VIEWPORT;
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_RSSetScissorRects(d3d12_command_list_iface *iface,
|
2016-09-21 15:18:13 +01:00
|
|
|
UINT rect_count, const D3D12_RECT *rects)
|
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2020-04-01 12:44:07 +01:00
|
|
|
struct vkd3d_dynamic_state *dyn_state = &list->dynamic_state;
|
2016-09-28 12:24:50 +01:00
|
|
|
unsigned int i;
|
|
|
|
|
|
|
|
TRACE("iface %p, rect_count %u, rects %p.\n", iface, rect_count, rects);
|
|
|
|
|
2020-04-01 12:44:07 +01:00
|
|
|
if (rect_count > ARRAY_SIZE(dyn_state->scissors))
|
2016-09-28 12:24:50 +01:00
|
|
|
{
|
2019-06-11 09:13:33 +01:00
|
|
|
FIXME("Rect count %u > D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE.\n", rect_count);
|
2020-04-01 12:44:07 +01:00
|
|
|
rect_count = ARRAY_SIZE(dyn_state->scissors);
|
2016-09-28 12:24:50 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < rect_count; ++i)
|
|
|
|
{
|
2020-04-01 12:44:07 +01:00
|
|
|
VkRect2D *vk_rect = &dyn_state->scissors[i];
|
|
|
|
vk_rect->offset.x = rects[i].left;
|
|
|
|
vk_rect->offset.y = rects[i].top;
|
|
|
|
vk_rect->extent.width = rects[i].right - rects[i].left;
|
|
|
|
vk_rect->extent.height = rects[i].bottom - rects[i].top;
|
2016-09-28 12:24:50 +01:00
|
|
|
}
|
|
|
|
|
2020-04-01 12:44:07 +01:00
|
|
|
dyn_state->dirty_flags |= VKD3D_DYNAMIC_STATE_SCISSOR;
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_OMSetBlendFactor(d3d12_command_list_iface *iface,
|
2016-09-21 15:18:13 +01:00
|
|
|
const FLOAT blend_factor[4])
|
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2020-04-01 12:44:07 +01:00
|
|
|
struct vkd3d_dynamic_state *dyn_state = &list->dynamic_state;
|
|
|
|
unsigned int i;
|
2018-09-28 11:16:39 +01:00
|
|
|
|
|
|
|
TRACE("iface %p, blend_factor %p.\n", iface, blend_factor);
|
|
|
|
|
2020-04-01 12:44:07 +01:00
|
|
|
for (i = 0; i < 4; i++)
|
|
|
|
dyn_state->blend_constants[i] = blend_factor[i];
|
|
|
|
|
|
|
|
dyn_state->dirty_flags |= VKD3D_DYNAMIC_STATE_BLEND_CONSTANTS;
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_OMSetStencilRef(d3d12_command_list_iface *iface,
|
2016-09-21 15:18:13 +01:00
|
|
|
UINT stencil_ref)
|
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2020-04-01 12:44:07 +01:00
|
|
|
struct vkd3d_dynamic_state *dyn_state = &list->dynamic_state;
|
2016-11-28 10:19:12 +00:00
|
|
|
|
|
|
|
TRACE("iface %p, stencil_ref %u.\n", iface, stencil_ref);
|
|
|
|
|
2020-04-01 12:44:07 +01:00
|
|
|
dyn_state->stencil_reference = stencil_ref;
|
|
|
|
dyn_state->dirty_flags |= VKD3D_DYNAMIC_STATE_STENCIL_REFERENCE;
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState(d3d12_command_list_iface *iface,
|
2016-09-21 15:18:13 +01:00
|
|
|
ID3D12PipelineState *pipeline_state)
|
|
|
|
{
|
2016-09-28 11:20:58 +01:00
|
|
|
struct d3d12_pipeline_state *state = unsafe_impl_from_ID3D12PipelineState(pipeline_state);
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2016-09-28 11:20:58 +01:00
|
|
|
|
|
|
|
TRACE("iface %p, pipeline_state %p.\n", iface, pipeline_state);
|
|
|
|
|
2019-03-19 13:40:59 +00:00
|
|
|
if (list->state == state)
|
|
|
|
return;
|
|
|
|
|
2019-11-11 16:03:38 +00:00
|
|
|
d3d12_command_list_invalidate_current_pipeline(list);
|
2018-08-15 12:57:59 +01:00
|
|
|
|
2020-04-01 12:44:07 +01:00
|
|
|
if (d3d12_pipeline_state_is_graphics(state))
|
|
|
|
{
|
|
|
|
uint32_t old_dynamic_state_flags = d3d12_pipeline_state_is_graphics(list->state)
|
|
|
|
? list->state->u.graphics.dynamic_state_flags
|
|
|
|
: 0u;
|
|
|
|
|
|
|
|
/* Reapply all dynamic states that were not dynamic in previously bound pipeline */
|
|
|
|
list->dynamic_state.dirty_flags |= state->u.graphics.dynamic_state_flags & ~old_dynamic_state_flags;
|
|
|
|
}
|
|
|
|
|
2018-08-15 12:57:59 +01:00
|
|
|
list->state = state;
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(d3d12_command_list_iface *iface,
|
2016-09-21 15:18:13 +01:00
|
|
|
UINT barrier_count, const D3D12_RESOURCE_BARRIER *barriers)
|
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2020-03-23 12:28:32 +00:00
|
|
|
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
|
2016-10-19 15:39:48 +01:00
|
|
|
bool have_aliasing_barriers = false, have_split_barriers = false;
|
2020-04-26 17:44:34 +01:00
|
|
|
VkPipelineStageFlags dst_stage_mask, src_stage_mask;
|
|
|
|
VkMemoryBarrier vk_memory_barrier;
|
2016-10-19 15:39:48 +01:00
|
|
|
unsigned int i;
|
|
|
|
|
|
|
|
TRACE("iface %p, barrier_count %u, barriers %p.\n", iface, barrier_count, barriers);
|
|
|
|
|
2018-08-23 17:33:04 +01:00
|
|
|
d3d12_command_list_end_current_render_pass(list);
|
2018-08-15 12:58:00 +01:00
|
|
|
|
2020-04-26 17:44:34 +01:00
|
|
|
vk_memory_barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
|
|
|
|
vk_memory_barrier.pNext = NULL;
|
|
|
|
vk_memory_barrier.srcAccessMask = 0;
|
|
|
|
vk_memory_barrier.dstAccessMask = 0;
|
|
|
|
|
|
|
|
src_stage_mask = 0;
|
|
|
|
dst_stage_mask = 0;
|
|
|
|
|
2016-10-19 15:39:48 +01:00
|
|
|
for (i = 0; i < barrier_count; ++i)
|
|
|
|
{
|
|
|
|
const D3D12_RESOURCE_BARRIER *current = &barriers[i];
|
2016-10-19 15:39:48 +01:00
|
|
|
struct d3d12_resource *resource;
|
2016-10-19 15:39:48 +01:00
|
|
|
|
|
|
|
have_split_barriers = have_split_barriers
|
|
|
|
|| (current->Flags & D3D12_RESOURCE_BARRIER_FLAG_BEGIN_ONLY)
|
|
|
|
|| (current->Flags & D3D12_RESOURCE_BARRIER_FLAG_END_ONLY);
|
|
|
|
|
|
|
|
if (current->Flags & D3D12_RESOURCE_BARRIER_FLAG_BEGIN_ONLY)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
switch (current->Type)
|
|
|
|
{
|
|
|
|
case D3D12_RESOURCE_BARRIER_TYPE_TRANSITION:
|
|
|
|
{
|
|
|
|
const D3D12_RESOURCE_TRANSITION_BARRIER *transition = ¤t->u.Transition;
|
|
|
|
|
|
|
|
if (!is_valid_resource_state(transition->StateBefore))
|
|
|
|
{
|
2019-06-17 14:43:25 +01:00
|
|
|
d3d12_command_list_mark_as_invalid(list,
|
|
|
|
"Invalid StateBefore %#x (barrier %u).", transition->StateBefore, i);
|
2016-10-19 15:39:48 +01:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!is_valid_resource_state(transition->StateAfter))
|
|
|
|
{
|
2019-06-17 14:43:25 +01:00
|
|
|
d3d12_command_list_mark_as_invalid(list,
|
|
|
|
"Invalid StateAfter %#x (barrier %u).", transition->StateAfter, i);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!(resource = unsafe_impl_from_ID3D12Resource(transition->pResource)))
|
|
|
|
{
|
|
|
|
d3d12_command_list_mark_as_invalid(list, "A resource pointer is NULL.");
|
2016-10-19 15:39:48 +01:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2020-04-26 17:44:34 +01:00
|
|
|
vk_access_and_stage_flags_from_d3d12_resource_state(list->device, resource,
|
|
|
|
transition->StateBefore, list->vk_queue_flags, &src_stage_mask,
|
|
|
|
&vk_memory_barrier.srcAccessMask);
|
|
|
|
vk_access_and_stage_flags_from_d3d12_resource_state(list->device, resource,
|
|
|
|
transition->StateAfter, list->vk_queue_flags, &dst_stage_mask,
|
|
|
|
&vk_memory_barrier.dstAccessMask);
|
2016-10-19 15:39:48 +01:00
|
|
|
|
|
|
|
TRACE("Transition barrier (resource %p, subresource %#x, before %#x, after %#x).\n",
|
|
|
|
resource, transition->Subresource, transition->StateBefore, transition->StateAfter);
|
2016-10-19 15:39:48 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
case D3D12_RESOURCE_BARRIER_TYPE_UAV:
|
|
|
|
{
|
2016-10-19 15:39:48 +01:00
|
|
|
const D3D12_RESOURCE_UAV_BARRIER *uav = ¤t->u.UAV;
|
|
|
|
resource = unsafe_impl_from_ID3D12Resource(uav->pResource);
|
2020-04-26 17:44:34 +01:00
|
|
|
|
|
|
|
vk_access_and_stage_flags_from_d3d12_resource_state(list->device, resource,
|
|
|
|
D3D12_RESOURCE_STATE_UNORDERED_ACCESS, list->vk_queue_flags, &src_stage_mask,
|
|
|
|
&vk_memory_barrier.srcAccessMask);
|
|
|
|
vk_access_and_stage_flags_from_d3d12_resource_state(list->device, resource,
|
|
|
|
D3D12_RESOURCE_STATE_UNORDERED_ACCESS, list->vk_queue_flags, &dst_stage_mask,
|
|
|
|
&vk_memory_barrier.dstAccessMask);
|
2016-10-19 15:39:48 +01:00
|
|
|
|
|
|
|
TRACE("UAV barrier (resource %p).\n", resource);
|
|
|
|
break;
|
2016-10-19 15:39:48 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
case D3D12_RESOURCE_BARRIER_TYPE_ALIASING:
|
|
|
|
have_aliasing_barriers = true;
|
|
|
|
continue;
|
|
|
|
default:
|
|
|
|
WARN("Invalid barrier type %#x.\n", current->Type);
|
|
|
|
continue;
|
|
|
|
}
|
2016-10-19 15:39:48 +01:00
|
|
|
|
2016-10-25 12:23:18 +01:00
|
|
|
if (resource)
|
|
|
|
d3d12_command_list_track_resource_usage(list, resource);
|
2016-10-19 15:39:48 +01:00
|
|
|
}
|
|
|
|
|
2020-04-26 17:44:34 +01:00
|
|
|
if (src_stage_mask && dst_stage_mask)
|
|
|
|
{
|
|
|
|
VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer,
|
|
|
|
src_stage_mask, dst_stage_mask, 0,
|
|
|
|
1, &vk_memory_barrier, 0, NULL, 0, NULL));
|
|
|
|
}
|
2019-10-24 13:20:49 +01:00
|
|
|
|
2016-10-19 15:39:48 +01:00
|
|
|
if (have_aliasing_barriers)
|
2019-06-12 13:08:01 +01:00
|
|
|
FIXME_ONCE("Aliasing barriers not implemented yet.\n");
|
2016-10-19 15:39:48 +01:00
|
|
|
|
|
|
|
/* Vulkan doesn't support split barriers. */
|
|
|
|
if (have_split_barriers)
|
|
|
|
WARN("Issuing split barrier(s) on D3D12_RESOURCE_BARRIER_FLAG_END_ONLY.\n");
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_ExecuteBundle(d3d12_command_list_iface *iface,
|
2016-09-21 15:18:13 +01:00
|
|
|
ID3D12GraphicsCommandList *command_list)
|
|
|
|
{
|
|
|
|
FIXME("iface %p, command_list %p stub!\n", iface, command_list);
|
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_SetDescriptorHeaps(d3d12_command_list_iface *iface,
|
2016-09-21 15:18:13 +01:00
|
|
|
UINT heap_count, ID3D12DescriptorHeap *const *heaps)
|
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2020-03-06 20:38:26 +00:00
|
|
|
struct vkd3d_bindless_state *bindless_state = &list->device->bindless_state;
|
2020-03-26 20:25:43 +00:00
|
|
|
bool dirty_uav_counters = false;
|
2020-03-06 20:38:26 +00:00
|
|
|
unsigned int i, j, set_index;
|
|
|
|
uint64_t dirty_mask = 0;
|
|
|
|
|
2017-08-04 16:06:33 +01:00
|
|
|
TRACE("iface %p, heap_count %u, heaps %p.\n", iface, heap_count, heaps);
|
|
|
|
|
2020-03-06 20:38:26 +00:00
|
|
|
for (i = 0; i < heap_count; i++)
|
|
|
|
{
|
|
|
|
struct d3d12_descriptor_heap *heap = unsafe_impl_from_ID3D12DescriptorHeap(heaps[i]);
|
|
|
|
|
|
|
|
if (!heap)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
for (j = 0; j < bindless_state->set_count; j++)
|
|
|
|
{
|
|
|
|
if (bindless_state->set_info[j].heap_type != heap->desc.Type)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
set_index = d3d12_descriptor_heap_set_index_from_binding(&bindless_state->set_info[j]);
|
|
|
|
list->descriptor_heaps[j] = heap->vk_descriptor_sets[set_index];
|
|
|
|
dirty_mask |= 1ull << j;
|
|
|
|
}
|
2020-03-26 20:25:43 +00:00
|
|
|
|
|
|
|
if (heap->desc.Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV)
|
|
|
|
{
|
|
|
|
list->uav_counter_address_buffer = heap->uav_counters.vk_buffer;
|
|
|
|
dirty_uav_counters = true;
|
|
|
|
}
|
2020-03-06 20:38:26 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(list->pipeline_bindings); i++)
|
2020-03-26 20:25:43 +00:00
|
|
|
{
|
|
|
|
struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[i];
|
|
|
|
bindings->descriptor_heap_dirty_mask = dirty_mask;
|
|
|
|
|
|
|
|
if (dirty_uav_counters && bindings->root_signature &&
|
|
|
|
(bindings->root_signature->flags & VKD3D_ROOT_SIGNATURE_USE_BINDLESS_UAV_COUNTERS))
|
|
|
|
bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_UAV_COUNTER_BINDING;
|
|
|
|
}
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2017-08-11 12:58:04 +01:00
|
|
|
static void d3d12_command_list_set_root_signature(struct d3d12_command_list *list,
|
2017-09-08 14:04:30 +01:00
|
|
|
VkPipelineBindPoint bind_point, const struct d3d12_root_signature *root_signature)
|
2017-08-11 12:58:04 +01:00
|
|
|
{
|
|
|
|
struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point];
|
|
|
|
|
|
|
|
if (bindings->root_signature == root_signature)
|
|
|
|
return;
|
|
|
|
|
|
|
|
bindings->root_signature = root_signature;
|
2020-03-03 19:28:38 +00:00
|
|
|
bindings->static_sampler_set = VK_NULL_HANDLE;
|
|
|
|
|
|
|
|
if (root_signature && root_signature->vk_sampler_descriptor_layout)
|
|
|
|
{
|
|
|
|
/* FIXME allocate static sampler sets globally */
|
|
|
|
bindings->static_sampler_set = d3d12_command_allocator_allocate_descriptor_set(
|
2020-03-18 11:02:52 +00:00
|
|
|
list->allocator, root_signature->vk_sampler_descriptor_layout,
|
|
|
|
VKD3D_DESCRIPTOR_POOL_TYPE_IMMUTABLE_SAMPLER);
|
2020-03-03 19:28:38 +00:00
|
|
|
}
|
2019-11-11 16:03:39 +00:00
|
|
|
|
2020-03-07 10:57:13 +00:00
|
|
|
d3d12_command_list_invalidate_root_parameters(list, bind_point, false);
|
2017-08-11 12:58:04 +01:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootSignature(d3d12_command_list_iface *iface,
|
2017-07-25 11:50:14 +01:00
|
|
|
ID3D12RootSignature *root_signature)
|
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2017-07-25 11:50:14 +01:00
|
|
|
|
|
|
|
TRACE("iface %p, root_signature %p.\n", iface, root_signature);
|
|
|
|
|
2017-08-11 12:58:04 +01:00
|
|
|
d3d12_command_list_set_root_signature(list, VK_PIPELINE_BIND_POINT_COMPUTE,
|
|
|
|
unsafe_impl_from_ID3D12RootSignature(root_signature));
|
2017-07-25 11:50:14 +01:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootSignature(d3d12_command_list_iface *iface,
|
2017-07-25 11:50:14 +01:00
|
|
|
ID3D12RootSignature *root_signature)
|
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2017-07-25 11:50:14 +01:00
|
|
|
|
|
|
|
TRACE("iface %p, root_signature %p.\n", iface, root_signature);
|
|
|
|
|
2017-08-11 12:58:04 +01:00
|
|
|
d3d12_command_list_set_root_signature(list, VK_PIPELINE_BIND_POINT_GRAPHICS,
|
|
|
|
unsafe_impl_from_ID3D12RootSignature(root_signature));
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2017-09-05 10:53:55 +01:00
|
|
|
static void d3d12_command_list_set_descriptor_table(struct d3d12_command_list *list,
|
|
|
|
VkPipelineBindPoint bind_point, unsigned int index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor)
|
|
|
|
{
|
|
|
|
struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point];
|
2017-09-08 14:04:30 +01:00
|
|
|
const struct d3d12_root_signature *root_signature = bindings->root_signature;
|
2020-03-10 16:14:20 +00:00
|
|
|
const struct d3d12_root_descriptor_table *table;
|
2017-09-05 10:53:55 +01:00
|
|
|
|
2020-03-10 16:14:20 +00:00
|
|
|
table = root_signature_get_descriptor_table(root_signature, index);
|
2017-09-05 10:53:55 +01:00
|
|
|
|
2020-03-10 16:14:20 +00:00
|
|
|
assert(table && index < ARRAY_SIZE(bindings->descriptor_tables));
|
2017-09-05 10:53:55 +01:00
|
|
|
bindings->descriptor_tables[index] = base_descriptor;
|
2018-10-11 14:33:32 +01:00
|
|
|
bindings->descriptor_table_active_mask |= (uint64_t)1 << index;
|
2020-03-10 16:14:20 +00:00
|
|
|
|
|
|
|
if (root_signature->descriptor_table_count)
|
|
|
|
bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_DESCRIPTOR_TABLE_OFFSETS;
|
|
|
|
|
|
|
|
if (table->flags & VKD3D_ROOT_DESCRIPTOR_TABLE_HAS_PACKED_DESCRIPTORS)
|
|
|
|
bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_PACKED_DESCRIPTOR_SET;
|
2017-09-05 10:53:55 +01:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootDescriptorTable(d3d12_command_list_iface *iface,
|
2017-07-25 11:50:14 +01:00
|
|
|
UINT root_parameter_index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor)
|
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2017-07-25 11:50:14 +01:00
|
|
|
|
2017-08-04 16:06:33 +01:00
|
|
|
TRACE("iface %p, root_parameter_index %u, base_descriptor %#"PRIx64".\n",
|
2017-07-25 11:50:14 +01:00
|
|
|
iface, root_parameter_index, base_descriptor.ptr);
|
|
|
|
|
2017-08-11 12:58:04 +01:00
|
|
|
d3d12_command_list_set_descriptor_table(list, VK_PIPELINE_BIND_POINT_COMPUTE,
|
|
|
|
root_parameter_index, base_descriptor);
|
2017-07-25 11:50:14 +01:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootDescriptorTable(d3d12_command_list_iface *iface,
|
2017-07-25 11:50:14 +01:00
|
|
|
UINT root_parameter_index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor)
|
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2017-07-25 11:50:14 +01:00
|
|
|
|
2017-08-04 16:06:33 +01:00
|
|
|
TRACE("iface %p, root_parameter_index %u, base_descriptor %#"PRIx64".\n",
|
2017-07-25 11:50:14 +01:00
|
|
|
iface, root_parameter_index, base_descriptor.ptr);
|
|
|
|
|
2017-08-11 12:58:04 +01:00
|
|
|
d3d12_command_list_set_descriptor_table(list, VK_PIPELINE_BIND_POINT_GRAPHICS,
|
|
|
|
root_parameter_index, base_descriptor);
|
2017-07-25 11:50:14 +01:00
|
|
|
}
|
|
|
|
|
2017-07-28 09:19:37 +01:00
|
|
|
static void d3d12_command_list_set_root_constants(struct d3d12_command_list *list,
|
2017-08-11 12:58:04 +01:00
|
|
|
VkPipelineBindPoint bind_point, unsigned int index, unsigned int offset,
|
|
|
|
unsigned int count, const void *data)
|
2017-07-28 09:19:37 +01:00
|
|
|
{
|
2020-03-03 17:56:10 +00:00
|
|
|
struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point];
|
|
|
|
const struct d3d12_root_signature *root_signature = bindings->root_signature;
|
2017-08-11 12:58:04 +01:00
|
|
|
const struct d3d12_root_constant *c;
|
2017-07-28 09:19:37 +01:00
|
|
|
|
2019-04-01 10:19:49 +01:00
|
|
|
c = root_signature_get_32bit_constants(root_signature, index);
|
2020-03-03 17:56:10 +00:00
|
|
|
memcpy(&bindings->root_constants[c->constant_index + offset], data, count * sizeof(uint32_t));
|
|
|
|
|
|
|
|
bindings->root_constant_dirty_mask |= 1ull << index;
|
2017-07-28 09:19:37 +01:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstant(d3d12_command_list_iface *iface,
|
2016-09-21 15:18:13 +01:00
|
|
|
UINT root_parameter_index, UINT data, UINT dst_offset)
|
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2017-07-28 09:19:37 +01:00
|
|
|
|
|
|
|
TRACE("iface %p, root_parameter_index %u, data 0x%08x, dst_offset %u.\n",
|
2016-09-21 15:18:13 +01:00
|
|
|
iface, root_parameter_index, data, dst_offset);
|
2017-07-28 09:19:37 +01:00
|
|
|
|
2017-08-11 12:58:04 +01:00
|
|
|
d3d12_command_list_set_root_constants(list, VK_PIPELINE_BIND_POINT_COMPUTE,
|
2017-07-28 09:19:37 +01:00
|
|
|
root_parameter_index, dst_offset, 1, &data);
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstant(d3d12_command_list_iface *iface,
|
2016-09-21 15:18:13 +01:00
|
|
|
UINT root_parameter_index, UINT data, UINT dst_offset)
|
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2017-07-28 09:19:37 +01:00
|
|
|
|
|
|
|
TRACE("iface %p, root_parameter_index %u, data 0x%08x, dst_offset %u.\n",
|
2016-09-21 15:18:13 +01:00
|
|
|
iface, root_parameter_index, data, dst_offset);
|
2017-07-28 09:19:37 +01:00
|
|
|
|
2017-08-11 12:58:04 +01:00
|
|
|
d3d12_command_list_set_root_constants(list, VK_PIPELINE_BIND_POINT_GRAPHICS,
|
2017-07-28 09:19:37 +01:00
|
|
|
root_parameter_index, dst_offset, 1, &data);
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstants(d3d12_command_list_iface *iface,
|
2016-09-21 15:18:13 +01:00
|
|
|
UINT root_parameter_index, UINT constant_count, const void *data, UINT dst_offset)
|
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2017-07-28 09:19:37 +01:00
|
|
|
|
|
|
|
TRACE("iface %p, root_parameter_index %u, constant_count %u, data %p, dst_offset %u.\n",
|
2016-09-21 15:18:13 +01:00
|
|
|
iface, root_parameter_index, constant_count, data, dst_offset);
|
2017-07-28 09:19:37 +01:00
|
|
|
|
2017-08-11 12:58:04 +01:00
|
|
|
d3d12_command_list_set_root_constants(list, VK_PIPELINE_BIND_POINT_COMPUTE,
|
2017-07-28 09:19:37 +01:00
|
|
|
root_parameter_index, dst_offset, constant_count, data);
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstants(d3d12_command_list_iface *iface,
|
2016-09-21 15:18:13 +01:00
|
|
|
UINT root_parameter_index, UINT constant_count, const void *data, UINT dst_offset)
|
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2017-07-28 09:19:37 +01:00
|
|
|
|
|
|
|
TRACE("iface %p, root_parameter_index %u, constant_count %u, data %p, dst_offset %u.\n",
|
2016-09-21 15:18:13 +01:00
|
|
|
iface, root_parameter_index, constant_count, data, dst_offset);
|
2017-07-28 09:19:37 +01:00
|
|
|
|
2017-08-11 12:58:04 +01:00
|
|
|
d3d12_command_list_set_root_constants(list, VK_PIPELINE_BIND_POINT_GRAPHICS,
|
2017-07-28 09:19:37 +01:00
|
|
|
root_parameter_index, dst_offset, constant_count, data);
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2020-03-03 16:03:51 +00:00
|
|
|
static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *list,
|
2017-08-11 12:58:04 +01:00
|
|
|
VkPipelineBindPoint bind_point, unsigned int index, D3D12_GPU_VIRTUAL_ADDRESS gpu_address)
|
2016-09-21 15:18:13 +01:00
|
|
|
{
|
2017-08-11 12:58:04 +01:00
|
|
|
struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point];
|
2017-09-08 14:04:30 +01:00
|
|
|
const struct d3d12_root_signature *root_signature = bindings->root_signature;
|
2017-08-02 14:30:15 +01:00
|
|
|
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
|
2017-08-11 12:58:04 +01:00
|
|
|
const struct vkd3d_vulkan_info *vk_info = &list->device->vk_info;
|
2018-10-17 16:59:32 +01:00
|
|
|
const struct d3d12_root_parameter *root_parameter;
|
2020-03-03 16:03:51 +00:00
|
|
|
union vkd3d_descriptor_info *descriptor;
|
2017-07-25 00:56:16 +01:00
|
|
|
struct d3d12_resource *resource;
|
2020-03-03 16:03:51 +00:00
|
|
|
VkBufferView vk_buffer_view;
|
2016-11-02 20:01:00 +00:00
|
|
|
|
2020-03-03 16:03:51 +00:00
|
|
|
/* FIXME handle null descriptors */
|
2019-04-01 10:19:49 +01:00
|
|
|
root_parameter = root_signature_get_root_descriptor(root_signature, index);
|
2020-03-03 16:03:51 +00:00
|
|
|
descriptor = &bindings->root_descriptors[root_parameter->u.descriptor.packed_descriptor];
|
2016-11-02 20:01:00 +00:00
|
|
|
|
2020-03-03 16:03:51 +00:00
|
|
|
if (root_parameter->parameter_type == D3D12_ROOT_PARAMETER_TYPE_CBV)
|
2017-08-11 12:58:04 +01:00
|
|
|
{
|
2020-03-03 16:03:51 +00:00
|
|
|
resource = vkd3d_gpu_va_allocator_dereference(&list->device->gpu_va_allocator, gpu_address);
|
|
|
|
descriptor->buffer.buffer = resource->u.vk_buffer;
|
|
|
|
descriptor->buffer.offset = gpu_address - resource->gpu_address;
|
|
|
|
descriptor->buffer.range = min(resource->desc.Width - descriptor->buffer.offset,
|
|
|
|
vk_info->device_limits.maxUniformBufferRange);
|
2017-08-11 12:58:04 +01:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2020-03-03 16:03:51 +00:00
|
|
|
if (!vkd3d_create_raw_buffer_view(list->device, gpu_address, &vk_buffer_view))
|
|
|
|
{
|
|
|
|
ERR("Failed to create buffer view.\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!(d3d12_command_allocator_add_buffer_view(list->allocator, vk_buffer_view)))
|
|
|
|
{
|
|
|
|
ERR("Failed to add buffer view.\n");
|
|
|
|
VK_CALL(vkDestroyBufferView(list->device->vk_device, vk_buffer_view, NULL));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
descriptor->buffer_view = vk_buffer_view;
|
2017-08-11 12:58:04 +01:00
|
|
|
}
|
2020-03-03 16:03:51 +00:00
|
|
|
|
|
|
|
bindings->root_descriptor_dirty_mask |= 1ull << index;
|
|
|
|
bindings->root_descriptor_active_mask |= 1ull << index;
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2017-08-02 14:30:15 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootConstantBufferView(
|
2020-03-30 15:44:12 +01:00
|
|
|
d3d12_command_list_iface *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address)
|
2017-08-02 14:30:15 +01:00
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2017-08-02 14:30:15 +01:00
|
|
|
|
|
|
|
TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n",
|
|
|
|
iface, root_parameter_index, address);
|
|
|
|
|
2020-03-03 16:03:51 +00:00
|
|
|
d3d12_command_list_set_root_descriptor(list, VK_PIPELINE_BIND_POINT_COMPUTE,
|
|
|
|
root_parameter_index, address);
|
2017-08-02 14:30:15 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootConstantBufferView(
|
2020-03-30 15:44:12 +01:00
|
|
|
d3d12_command_list_iface *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address)
|
2017-08-02 14:30:15 +01:00
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2017-08-02 14:30:15 +01:00
|
|
|
|
|
|
|
TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n",
|
|
|
|
iface, root_parameter_index, address);
|
|
|
|
|
2020-03-03 16:03:51 +00:00
|
|
|
d3d12_command_list_set_root_descriptor(list, VK_PIPELINE_BIND_POINT_GRAPHICS,
|
|
|
|
root_parameter_index, address);
|
2017-08-09 17:44:16 +01:00
|
|
|
}
|
|
|
|
|
2016-09-21 15:18:13 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootShaderResourceView(
|
2020-03-30 15:44:12 +01:00
|
|
|
d3d12_command_list_iface *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address)
|
2016-09-21 15:18:13 +01:00
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2017-09-19 16:29:20 +01:00
|
|
|
|
|
|
|
TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n",
|
2016-10-10 10:22:50 +01:00
|
|
|
iface, root_parameter_index, address);
|
2017-09-19 16:29:20 +01:00
|
|
|
|
|
|
|
d3d12_command_list_set_root_descriptor(list, VK_PIPELINE_BIND_POINT_COMPUTE,
|
|
|
|
root_parameter_index, address);
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootShaderResourceView(
|
2020-03-30 15:44:12 +01:00
|
|
|
d3d12_command_list_iface *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address)
|
2016-09-21 15:18:13 +01:00
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2017-09-19 16:29:20 +01:00
|
|
|
|
|
|
|
TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n",
|
2016-10-10 10:22:50 +01:00
|
|
|
iface, root_parameter_index, address);
|
2017-09-19 16:29:20 +01:00
|
|
|
|
|
|
|
d3d12_command_list_set_root_descriptor(list, VK_PIPELINE_BIND_POINT_GRAPHICS,
|
|
|
|
root_parameter_index, address);
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootUnorderedAccessView(
|
2020-03-30 15:44:12 +01:00
|
|
|
d3d12_command_list_iface *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address)
|
2016-09-21 15:18:13 +01:00
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2017-08-09 17:44:16 +01:00
|
|
|
|
|
|
|
TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n",
|
2016-10-10 10:22:50 +01:00
|
|
|
iface, root_parameter_index, address);
|
2017-08-09 17:44:16 +01:00
|
|
|
|
2017-09-19 16:29:20 +01:00
|
|
|
d3d12_command_list_set_root_descriptor(list, VK_PIPELINE_BIND_POINT_COMPUTE,
|
|
|
|
root_parameter_index, address);
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootUnorderedAccessView(
|
2020-03-30 15:44:12 +01:00
|
|
|
d3d12_command_list_iface *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address)
|
2016-09-21 15:18:13 +01:00
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2017-08-09 17:44:16 +01:00
|
|
|
|
|
|
|
TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n",
|
2016-10-10 10:22:50 +01:00
|
|
|
iface, root_parameter_index, address);
|
2017-08-09 17:44:16 +01:00
|
|
|
|
2017-09-19 16:29:20 +01:00
|
|
|
d3d12_command_list_set_root_descriptor(list, VK_PIPELINE_BIND_POINT_GRAPHICS,
|
|
|
|
root_parameter_index, address);
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(d3d12_command_list_iface *iface,
|
2016-09-21 15:18:13 +01:00
|
|
|
const D3D12_INDEX_BUFFER_VIEW *view)
|
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2016-11-02 18:18:30 +00:00
|
|
|
const struct vkd3d_vk_device_procs *vk_procs;
|
2017-07-25 00:56:16 +01:00
|
|
|
struct d3d12_resource *resource;
|
2016-11-02 18:18:30 +00:00
|
|
|
enum VkIndexType index_type;
|
|
|
|
|
|
|
|
TRACE("iface %p, view %p.\n", iface, view);
|
|
|
|
|
2019-03-07 10:01:14 +00:00
|
|
|
if (!view)
|
|
|
|
{
|
|
|
|
WARN("Ignoring NULL index buffer view.\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2016-11-02 18:18:30 +00:00
|
|
|
vk_procs = &list->device->vk_procs;
|
|
|
|
|
|
|
|
switch (view->Format)
|
|
|
|
{
|
|
|
|
case DXGI_FORMAT_R16_UINT:
|
|
|
|
index_type = VK_INDEX_TYPE_UINT16;
|
|
|
|
break;
|
|
|
|
case DXGI_FORMAT_R32_UINT:
|
|
|
|
index_type = VK_INDEX_TYPE_UINT32;
|
|
|
|
break;
|
|
|
|
default:
|
2018-10-08 14:40:16 +01:00
|
|
|
WARN("Invalid index format %#x.\n", view->Format);
|
2016-11-02 18:18:30 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2018-12-03 10:31:30 +00:00
|
|
|
list->index_buffer_format = view->Format;
|
|
|
|
|
2017-07-25 00:56:16 +01:00
|
|
|
resource = vkd3d_gpu_va_allocator_dereference(&list->device->gpu_va_allocator, view->BufferLocation);
|
|
|
|
VK_CALL(vkCmdBindIndexBuffer(list->vk_command_buffer, resource->u.vk_buffer,
|
|
|
|
view->BufferLocation - resource->gpu_address, index_type));
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(d3d12_command_list_iface *iface,
|
2016-09-21 15:18:13 +01:00
|
|
|
UINT start_slot, UINT view_count, const D3D12_VERTEX_BUFFER_VIEW *views)
|
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2020-04-01 13:30:22 +01:00
|
|
|
struct vkd3d_dynamic_state *dyn_state = &list->dynamic_state;
|
|
|
|
VkDeviceSize offsets[ARRAY_SIZE(dyn_state->vertex_strides)];
|
|
|
|
VkBuffer buffers[ARRAY_SIZE(dyn_state->vertex_strides)];
|
2019-05-15 12:17:58 +01:00
|
|
|
const struct vkd3d_null_resources *null_resources;
|
2018-08-21 17:07:59 +01:00
|
|
|
struct vkd3d_gpu_va_allocator *gpu_va_allocator;
|
2016-09-28 12:20:26 +01:00
|
|
|
const struct vkd3d_vk_device_procs *vk_procs;
|
2017-07-25 00:56:16 +01:00
|
|
|
struct d3d12_resource *resource;
|
2018-08-23 17:33:01 +01:00
|
|
|
bool invalidate = false;
|
2019-05-15 12:17:58 +01:00
|
|
|
unsigned int i, stride;
|
2016-09-28 12:20:26 +01:00
|
|
|
|
|
|
|
TRACE("iface %p, start_slot %u, view_count %u, views %p.\n", iface, start_slot, view_count, views);
|
|
|
|
|
|
|
|
vk_procs = &list->device->vk_procs;
|
2019-05-15 12:17:58 +01:00
|
|
|
null_resources = &list->device->null_resources;
|
2018-08-21 17:07:59 +01:00
|
|
|
gpu_va_allocator = &list->device->gpu_va_allocator;
|
2016-09-28 12:20:26 +01:00
|
|
|
|
2020-04-01 13:30:22 +01:00
|
|
|
if (start_slot >= ARRAY_SIZE(dyn_state->vertex_strides) ||
|
|
|
|
view_count > ARRAY_SIZE(dyn_state->vertex_strides) - start_slot)
|
2016-09-28 12:20:26 +01:00
|
|
|
{
|
|
|
|
WARN("Invalid start slot %u / view count %u.\n", start_slot, view_count);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2018-08-23 17:33:02 +01:00
|
|
|
for (i = 0; i < view_count; ++i)
|
2016-09-28 12:20:26 +01:00
|
|
|
{
|
2018-08-21 17:07:59 +01:00
|
|
|
if (views[i].BufferLocation)
|
|
|
|
{
|
|
|
|
resource = vkd3d_gpu_va_allocator_dereference(gpu_va_allocator, views[i].BufferLocation);
|
2019-05-15 12:17:58 +01:00
|
|
|
buffers[i] = resource->u.vk_buffer;
|
|
|
|
offsets[i] = views[i].BufferLocation - resource->gpu_address;
|
2018-08-21 17:07:59 +01:00
|
|
|
stride = views[i].StrideInBytes;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2019-05-15 12:17:58 +01:00
|
|
|
buffers[i] = null_resources->vk_buffer;
|
|
|
|
offsets[i] = 0;
|
2018-08-21 17:07:59 +01:00
|
|
|
stride = 0;
|
|
|
|
}
|
|
|
|
|
2020-04-01 13:30:22 +01:00
|
|
|
invalidate |= dyn_state->vertex_strides[start_slot + i] != stride;
|
|
|
|
dyn_state->vertex_strides[start_slot + i] = stride;
|
2016-09-28 12:20:26 +01:00
|
|
|
}
|
|
|
|
|
2019-05-15 12:17:58 +01:00
|
|
|
if (view_count)
|
|
|
|
VK_CALL(vkCmdBindVertexBuffers(list->vk_command_buffer, start_slot, view_count, buffers, offsets));
|
2016-09-28 10:26:17 +01:00
|
|
|
|
2018-08-23 17:33:01 +01:00
|
|
|
if (invalidate)
|
|
|
|
d3d12_command_list_invalidate_current_pipeline(list);
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_SOSetTargets(d3d12_command_list_iface *iface,
|
2016-09-21 15:18:13 +01:00
|
|
|
UINT start_slot, UINT view_count, const D3D12_STREAM_OUTPUT_BUFFER_VIEW *views)
|
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2019-01-14 16:05:40 +00:00
|
|
|
VkDeviceSize offsets[ARRAY_SIZE(list->so_counter_buffers)];
|
|
|
|
VkDeviceSize sizes[ARRAY_SIZE(list->so_counter_buffers)];
|
|
|
|
VkBuffer buffers[ARRAY_SIZE(list->so_counter_buffers)];
|
|
|
|
struct vkd3d_gpu_va_allocator *gpu_va_allocator;
|
|
|
|
const struct vkd3d_vk_device_procs *vk_procs;
|
|
|
|
struct d3d12_resource *resource;
|
|
|
|
unsigned int i, first, count;
|
|
|
|
|
|
|
|
TRACE("iface %p, start_slot %u, view_count %u, views %p.\n", iface, start_slot, view_count, views);
|
|
|
|
|
|
|
|
d3d12_command_list_end_current_render_pass(list);
|
|
|
|
|
|
|
|
if (!list->device->vk_info.EXT_transform_feedback)
|
|
|
|
{
|
|
|
|
FIXME("Transform feedback is not supported by Vulkan implementation.\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (start_slot >= ARRAY_SIZE(buffers) || view_count > ARRAY_SIZE(buffers) - start_slot)
|
|
|
|
{
|
|
|
|
WARN("Invalid start slot %u / view count %u.\n", start_slot, view_count);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
vk_procs = &list->device->vk_procs;
|
|
|
|
gpu_va_allocator = &list->device->gpu_va_allocator;
|
|
|
|
|
|
|
|
count = 0;
|
|
|
|
first = start_slot;
|
|
|
|
for (i = 0; i < view_count; ++i)
|
|
|
|
{
|
|
|
|
if (views[i].BufferLocation && views[i].SizeInBytes)
|
|
|
|
{
|
|
|
|
resource = vkd3d_gpu_va_allocator_dereference(gpu_va_allocator, views[i].BufferLocation);
|
|
|
|
buffers[count] = resource->u.vk_buffer;
|
|
|
|
offsets[count] = views[i].BufferLocation - resource->gpu_address;
|
|
|
|
sizes[count] = views[i].SizeInBytes;
|
|
|
|
|
|
|
|
resource = vkd3d_gpu_va_allocator_dereference(gpu_va_allocator, views[i].BufferFilledSizeLocation);
|
|
|
|
list->so_counter_buffers[start_slot + i] = resource->u.vk_buffer;
|
|
|
|
list->so_counter_buffer_offsets[start_slot + i] = views[i].BufferFilledSizeLocation - resource->gpu_address;
|
|
|
|
++count;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (count)
|
|
|
|
VK_CALL(vkCmdBindTransformFeedbackBuffersEXT(list->vk_command_buffer, first, count, buffers, offsets, sizes));
|
|
|
|
count = 0;
|
|
|
|
first = start_slot + i + 1;
|
|
|
|
|
|
|
|
list->so_counter_buffers[start_slot + i] = VK_NULL_HANDLE;
|
|
|
|
list->so_counter_buffer_offsets[start_slot + i] = 0;
|
2019-05-15 12:18:00 +01:00
|
|
|
|
|
|
|
WARN("Trying to unbind transform feedback buffer %u. Ignoring.\n", start_slot + i);
|
2019-01-14 16:05:40 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (count)
|
|
|
|
VK_CALL(vkCmdBindTransformFeedbackBuffersEXT(list->vk_command_buffer, first, count, buffers, offsets, sizes));
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(d3d12_command_list_iface *iface,
|
2016-09-21 15:18:13 +01:00
|
|
|
UINT render_target_descriptor_count, const D3D12_CPU_DESCRIPTOR_HANDLE *render_target_descriptors,
|
|
|
|
BOOL single_descriptor_handle, const D3D12_CPU_DESCRIPTOR_HANDLE *depth_stencil_descriptor)
|
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2020-03-24 11:30:56 +00:00
|
|
|
const VkPhysicalDeviceLimits *limits = &list->device->vk_info.device_limits;
|
2020-04-26 16:02:09 +01:00
|
|
|
VkFormat prev_dsv_format, next_dsv_format;
|
2018-02-13 12:15:01 +00:00
|
|
|
const struct d3d12_rtv_desc *rtv_desc;
|
|
|
|
const struct d3d12_dsv_desc *dsv_desc;
|
2016-10-06 21:03:04 +01:00
|
|
|
unsigned int i;
|
|
|
|
|
|
|
|
TRACE("iface %p, render_target_descriptor_count %u, render_target_descriptors %p, "
|
|
|
|
"single_descriptor_handle %#x, depth_stencil_descriptor %p.\n",
|
2016-09-21 15:18:13 +01:00
|
|
|
iface, render_target_descriptor_count, render_target_descriptors,
|
|
|
|
single_descriptor_handle, depth_stencil_descriptor);
|
2016-10-06 21:03:04 +01:00
|
|
|
|
2020-04-26 16:29:39 +01:00
|
|
|
d3d12_command_list_invalidate_current_framebuffer(list);
|
|
|
|
d3d12_command_list_invalidate_current_render_pass(list);
|
|
|
|
|
2019-05-17 09:39:11 +01:00
|
|
|
if (render_target_descriptor_count > ARRAY_SIZE(list->rtvs))
|
2016-10-06 21:03:04 +01:00
|
|
|
{
|
|
|
|
WARN("Descriptor count %u > %zu, ignoring extra descriptors.\n",
|
2019-05-17 09:39:11 +01:00
|
|
|
render_target_descriptor_count, ARRAY_SIZE(list->rtvs));
|
|
|
|
render_target_descriptor_count = ARRAY_SIZE(list->rtvs);
|
2016-10-06 21:03:04 +01:00
|
|
|
}
|
|
|
|
|
2020-03-24 11:30:56 +00:00
|
|
|
list->fb_width = limits->maxFramebufferWidth;
|
|
|
|
list->fb_height = limits->maxFramebufferHeight;
|
|
|
|
list->fb_layer_count = limits->maxFramebufferLayers;
|
|
|
|
|
2020-04-26 16:02:09 +01:00
|
|
|
prev_dsv_format = list->dsv.format ? list->dsv.format->vk_format : VK_FORMAT_UNDEFINED;
|
|
|
|
next_dsv_format = VK_FORMAT_UNDEFINED;
|
|
|
|
|
2020-04-26 15:58:38 +01:00
|
|
|
memset(list->rtvs, 0, sizeof(list->rtvs));
|
|
|
|
memset(&list->dsv, 0, sizeof(list->dsv));
|
|
|
|
|
2016-10-06 21:03:04 +01:00
|
|
|
for (i = 0; i < render_target_descriptor_count; ++i)
|
|
|
|
{
|
2018-02-13 12:15:01 +00:00
|
|
|
if (single_descriptor_handle)
|
2018-11-27 16:04:34 +00:00
|
|
|
{
|
|
|
|
if ((rtv_desc = d3d12_rtv_desc_from_cpu_handle(*render_target_descriptors)))
|
|
|
|
rtv_desc += i;
|
|
|
|
}
|
2018-02-13 12:15:01 +00:00
|
|
|
else
|
2018-11-27 16:04:34 +00:00
|
|
|
{
|
2018-02-13 12:15:01 +00:00
|
|
|
rtv_desc = d3d12_rtv_desc_from_cpu_handle(render_target_descriptors[i]);
|
2018-11-27 16:04:34 +00:00
|
|
|
}
|
|
|
|
|
2019-01-18 09:25:45 +00:00
|
|
|
if (!rtv_desc || !rtv_desc->resource)
|
2018-11-27 16:04:34 +00:00
|
|
|
{
|
|
|
|
WARN("RTV descriptor %u is not initialized.\n", i);
|
|
|
|
continue;
|
|
|
|
}
|
2016-10-06 21:03:04 +01:00
|
|
|
|
2016-10-25 12:23:18 +01:00
|
|
|
d3d12_command_list_track_resource_usage(list, rtv_desc->resource);
|
|
|
|
|
2018-08-21 17:08:01 +01:00
|
|
|
/* In D3D12 CPU descriptors are consumed when a command is recorded. */
|
2020-04-26 15:58:38 +01:00
|
|
|
if (!d3d12_command_allocator_add_view(list->allocator, rtv_desc->view))
|
2018-08-21 17:08:01 +01:00
|
|
|
WARN("Failed to add view.\n");
|
|
|
|
|
2020-04-26 15:58:38 +01:00
|
|
|
list->rtvs[i] = *rtv_desc;
|
2020-03-24 11:30:56 +00:00
|
|
|
list->fb_width = min(list->fb_width, rtv_desc->width);
|
|
|
|
list->fb_height = min(list->fb_height, rtv_desc->height);
|
|
|
|
list->fb_layer_count = min(list->fb_layer_count, rtv_desc->layer_count);
|
2016-10-06 21:03:04 +01:00
|
|
|
}
|
2016-09-28 10:26:17 +01:00
|
|
|
|
2016-11-02 20:03:47 +00:00
|
|
|
if (depth_stencil_descriptor)
|
|
|
|
{
|
2019-01-18 09:25:45 +00:00
|
|
|
if ((dsv_desc = d3d12_dsv_desc_from_cpu_handle(*depth_stencil_descriptor))
|
|
|
|
&& dsv_desc->resource)
|
2018-11-27 16:04:34 +00:00
|
|
|
{
|
|
|
|
d3d12_command_list_track_resource_usage(list, dsv_desc->resource);
|
2016-11-02 20:03:47 +00:00
|
|
|
|
2018-11-27 16:04:34 +00:00
|
|
|
/* In D3D12 CPU descriptors are consumed when a command is recorded. */
|
2020-04-26 15:58:38 +01:00
|
|
|
if (!d3d12_command_allocator_add_view(list->allocator, dsv_desc->view))
|
2018-11-27 16:04:34 +00:00
|
|
|
WARN("Failed to add view.\n");
|
2016-11-02 20:03:47 +00:00
|
|
|
|
2020-04-26 15:58:38 +01:00
|
|
|
list->dsv = *dsv_desc;
|
2020-03-24 11:30:56 +00:00
|
|
|
list->fb_width = min(list->fb_width, dsv_desc->width);
|
|
|
|
list->fb_height = min(list->fb_height, dsv_desc->height);
|
|
|
|
list->fb_layer_count = min(list->fb_layer_count, dsv_desc->layer_count);
|
2020-04-26 16:02:09 +01:00
|
|
|
next_dsv_format = dsv_desc->format->vk_format;
|
2018-11-27 16:04:34 +00:00
|
|
|
}
|
|
|
|
else
|
2018-08-21 17:08:01 +01:00
|
|
|
{
|
2018-11-27 16:04:34 +00:00
|
|
|
WARN("DSV descriptor is not initialized.\n");
|
2018-08-21 17:08:01 +01:00
|
|
|
}
|
2016-11-02 20:03:47 +00:00
|
|
|
}
|
|
|
|
|
2020-04-26 16:02:09 +01:00
|
|
|
if (prev_dsv_format != next_dsv_format && d3d12_pipeline_state_has_unknown_dsv_format(list->state))
|
2019-06-11 09:13:28 +01:00
|
|
|
d3d12_command_list_invalidate_current_pipeline(list);
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2018-06-26 13:41:51 +01:00
|
|
|
static void d3d12_command_list_clear(struct d3d12_command_list *list,
|
2018-08-23 17:33:04 +01:00
|
|
|
const struct VkAttachmentDescription *attachment_desc,
|
2018-06-26 13:41:51 +01:00
|
|
|
const struct VkAttachmentReference *color_reference, const struct VkAttachmentReference *ds_reference,
|
2018-08-21 17:08:01 +01:00
|
|
|
struct vkd3d_view *view, size_t width, size_t height, unsigned int layer_count,
|
2016-11-02 19:57:23 +00:00
|
|
|
const union VkClearValue *clear_value, unsigned int rect_count, const D3D12_RECT *rects)
|
2016-09-21 15:18:13 +01:00
|
|
|
{
|
2018-08-23 17:33:04 +01:00
|
|
|
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
|
2016-09-28 12:04:58 +01:00
|
|
|
struct VkSubpassDescription sub_pass_desc;
|
|
|
|
struct VkRenderPassCreateInfo pass_desc;
|
|
|
|
struct VkRenderPassBeginInfo begin_desc;
|
|
|
|
VkFramebuffer vk_framebuffer;
|
|
|
|
VkRenderPass vk_render_pass;
|
|
|
|
D3D12_RECT full_rect;
|
2020-04-22 10:34:27 +01:00
|
|
|
VkExtent3D extent;
|
2016-09-28 12:04:58 +01:00
|
|
|
unsigned int i;
|
|
|
|
VkResult vr;
|
|
|
|
|
2018-08-23 17:33:04 +01:00
|
|
|
d3d12_command_list_end_current_render_pass(list);
|
2018-08-15 12:58:00 +01:00
|
|
|
|
2016-09-28 12:04:58 +01:00
|
|
|
if (!rect_count)
|
|
|
|
{
|
|
|
|
full_rect.top = 0;
|
|
|
|
full_rect.left = 0;
|
2016-11-02 19:57:23 +00:00
|
|
|
full_rect.bottom = height;
|
|
|
|
full_rect.right = width;
|
2016-09-28 12:04:58 +01:00
|
|
|
|
|
|
|
rect_count = 1;
|
|
|
|
rects = &full_rect;
|
|
|
|
}
|
|
|
|
|
|
|
|
sub_pass_desc.flags = 0;
|
|
|
|
sub_pass_desc.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
|
|
|
|
sub_pass_desc.inputAttachmentCount = 0;
|
|
|
|
sub_pass_desc.pInputAttachments = NULL;
|
2016-11-02 19:57:23 +00:00
|
|
|
sub_pass_desc.colorAttachmentCount = !!color_reference;
|
|
|
|
sub_pass_desc.pColorAttachments = color_reference;
|
2016-09-28 12:04:58 +01:00
|
|
|
sub_pass_desc.pResolveAttachments = NULL;
|
2016-11-02 19:57:23 +00:00
|
|
|
sub_pass_desc.pDepthStencilAttachment = ds_reference;
|
2016-09-28 12:04:58 +01:00
|
|
|
sub_pass_desc.preserveAttachmentCount = 0;
|
|
|
|
sub_pass_desc.pPreserveAttachments = NULL;
|
|
|
|
|
|
|
|
pass_desc.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
|
|
|
|
pass_desc.pNext = NULL;
|
|
|
|
pass_desc.flags = 0;
|
|
|
|
pass_desc.attachmentCount = 1;
|
2016-11-02 19:57:23 +00:00
|
|
|
pass_desc.pAttachments = attachment_desc;
|
2016-09-28 12:04:58 +01:00
|
|
|
pass_desc.subpassCount = 1;
|
|
|
|
pass_desc.pSubpasses = &sub_pass_desc;
|
|
|
|
pass_desc.dependencyCount = 0;
|
|
|
|
pass_desc.pDependencies = NULL;
|
|
|
|
if ((vr = VK_CALL(vkCreateRenderPass(list->device->vk_device, &pass_desc, NULL, &vk_render_pass))) < 0)
|
|
|
|
{
|
|
|
|
WARN("Failed to create Vulkan render pass, vr %d.\n", vr);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2016-10-10 15:55:07 +01:00
|
|
|
if (!d3d12_command_allocator_add_render_pass(list->allocator, vk_render_pass))
|
2016-10-05 14:11:15 +01:00
|
|
|
{
|
2016-10-20 15:38:04 +01:00
|
|
|
WARN("Failed to add render pass.\n");
|
2016-10-05 14:11:15 +01:00
|
|
|
VK_CALL(vkDestroyRenderPass(list->device->vk_device, vk_render_pass, NULL));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2018-08-21 17:08:01 +01:00
|
|
|
if (!d3d12_command_allocator_add_view(list->allocator, view))
|
|
|
|
{
|
|
|
|
WARN("Failed to add view.\n");
|
|
|
|
}
|
|
|
|
|
2020-04-22 10:34:27 +01:00
|
|
|
extent.width = width;
|
|
|
|
extent.height = height;
|
|
|
|
extent.depth = layer_count;
|
2016-09-28 12:04:58 +01:00
|
|
|
|
2020-04-22 10:34:27 +01:00
|
|
|
if (!d3d12_command_list_create_framebuffer(list, vk_render_pass, 1, &view->u.vk_image_view, extent, &vk_framebuffer))
|
2016-10-05 14:29:12 +01:00
|
|
|
{
|
2020-04-22 10:34:27 +01:00
|
|
|
ERR("Failed to create framebuffer.\n");
|
2016-10-05 14:29:12 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2016-09-28 12:04:58 +01:00
|
|
|
begin_desc.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
|
|
|
|
begin_desc.pNext = NULL;
|
|
|
|
begin_desc.renderPass = vk_render_pass;
|
|
|
|
begin_desc.framebuffer = vk_framebuffer;
|
|
|
|
begin_desc.clearValueCount = 1;
|
2016-11-02 19:57:23 +00:00
|
|
|
begin_desc.pClearValues = clear_value;
|
2016-09-28 12:04:58 +01:00
|
|
|
|
|
|
|
for (i = 0; i < rect_count; ++i)
|
|
|
|
{
|
|
|
|
begin_desc.renderArea.offset.x = rects[i].left;
|
|
|
|
begin_desc.renderArea.offset.y = rects[i].top;
|
|
|
|
begin_desc.renderArea.extent.width = rects[i].right - rects[i].left;
|
|
|
|
begin_desc.renderArea.extent.height = rects[i].bottom - rects[i].top;
|
|
|
|
VK_CALL(vkCmdBeginRenderPass(list->vk_command_buffer, &begin_desc, VK_SUBPASS_CONTENTS_INLINE));
|
|
|
|
VK_CALL(vkCmdEndRenderPass(list->vk_command_buffer));
|
|
|
|
}
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_ClearDepthStencilView(d3d12_command_list_iface *iface,
|
2016-11-02 19:57:23 +00:00
|
|
|
D3D12_CPU_DESCRIPTOR_HANDLE dsv, D3D12_CLEAR_FLAGS flags, float depth, UINT8 stencil,
|
|
|
|
UINT rect_count, const D3D12_RECT *rects)
|
|
|
|
{
|
|
|
|
const union VkClearValue clear_value = {.depthStencil = {depth, stencil}};
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2018-02-07 20:27:09 +00:00
|
|
|
const struct d3d12_dsv_desc *dsv_desc = d3d12_dsv_desc_from_cpu_handle(dsv);
|
2016-11-02 19:57:23 +00:00
|
|
|
struct VkAttachmentDescription attachment_desc;
|
|
|
|
struct VkAttachmentReference ds_reference;
|
|
|
|
|
|
|
|
TRACE("iface %p, dsv %#lx, flags %#x, depth %.8e, stencil 0x%02x, rect_count %u, rects %p.\n",
|
|
|
|
iface, dsv.ptr, flags, depth, stencil, rect_count, rects);
|
|
|
|
|
|
|
|
d3d12_command_list_track_resource_usage(list, dsv_desc->resource);
|
|
|
|
|
|
|
|
attachment_desc.flags = 0;
|
2019-07-30 10:40:30 +01:00
|
|
|
attachment_desc.format = dsv_desc->format->vk_format;
|
2018-10-25 10:24:00 +01:00
|
|
|
attachment_desc.samples = dsv_desc->sample_count;
|
2016-11-02 19:57:23 +00:00
|
|
|
if (flags & D3D12_CLEAR_FLAG_DEPTH)
|
|
|
|
{
|
|
|
|
attachment_desc.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
|
|
|
|
attachment_desc.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
attachment_desc.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
|
|
|
|
attachment_desc.storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
|
|
|
|
}
|
|
|
|
if (flags & D3D12_CLEAR_FLAG_STENCIL)
|
|
|
|
{
|
|
|
|
attachment_desc.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
|
|
|
|
attachment_desc.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
attachment_desc.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
|
|
|
|
attachment_desc.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
|
|
|
|
}
|
|
|
|
attachment_desc.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
|
|
|
|
attachment_desc.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
|
|
|
|
|
|
|
|
ds_reference.attachment = 0;
|
|
|
|
ds_reference.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
|
|
|
|
|
2018-08-23 17:33:04 +01:00
|
|
|
d3d12_command_list_clear(list, &attachment_desc, NULL, &ds_reference,
|
2018-09-28 11:16:37 +01:00
|
|
|
dsv_desc->view, dsv_desc->width, dsv_desc->height, dsv_desc->layer_count,
|
|
|
|
&clear_value, rect_count, rects);
|
2016-11-02 19:57:23 +00:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_ClearRenderTargetView(d3d12_command_list_iface *iface,
|
2016-11-02 19:57:23 +00:00
|
|
|
D3D12_CPU_DESCRIPTOR_HANDLE rtv, const FLOAT color[4], UINT rect_count, const D3D12_RECT *rects)
|
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2018-02-07 20:27:09 +00:00
|
|
|
const struct d3d12_rtv_desc *rtv_desc = d3d12_rtv_desc_from_cpu_handle(rtv);
|
2016-11-02 19:57:23 +00:00
|
|
|
struct VkAttachmentDescription attachment_desc;
|
|
|
|
struct VkAttachmentReference color_reference;
|
2019-07-30 10:40:31 +01:00
|
|
|
VkClearValue clear_value;
|
2016-11-02 19:57:23 +00:00
|
|
|
|
|
|
|
TRACE("iface %p, rtv %#lx, color %p, rect_count %u, rects %p.\n",
|
|
|
|
iface, rtv.ptr, color, rect_count, rects);
|
|
|
|
|
|
|
|
d3d12_command_list_track_resource_usage(list, rtv_desc->resource);
|
|
|
|
|
|
|
|
attachment_desc.flags = 0;
|
2019-07-30 10:40:30 +01:00
|
|
|
attachment_desc.format = rtv_desc->format->vk_format;
|
2018-10-25 10:24:00 +01:00
|
|
|
attachment_desc.samples = rtv_desc->sample_count;
|
2016-11-02 19:57:23 +00:00
|
|
|
attachment_desc.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
|
|
|
|
attachment_desc.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
|
|
|
|
attachment_desc.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
|
|
|
|
attachment_desc.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
|
|
|
|
attachment_desc.initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
|
|
|
|
attachment_desc.finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
|
|
|
|
|
|
|
|
color_reference.attachment = 0;
|
|
|
|
color_reference.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
|
|
|
|
|
2019-07-30 10:40:31 +01:00
|
|
|
if (rtv_desc->format->type == VKD3D_FORMAT_TYPE_UINT)
|
|
|
|
{
|
|
|
|
clear_value.color.uint32[0] = max(0, color[0]);
|
|
|
|
clear_value.color.uint32[1] = max(0, color[1]);
|
|
|
|
clear_value.color.uint32[2] = max(0, color[2]);
|
|
|
|
clear_value.color.uint32[3] = max(0, color[3]);
|
|
|
|
}
|
|
|
|
else if (rtv_desc->format->type == VKD3D_FORMAT_TYPE_SINT)
|
|
|
|
{
|
|
|
|
clear_value.color.int32[0] = color[0];
|
|
|
|
clear_value.color.int32[1] = color[1];
|
|
|
|
clear_value.color.int32[2] = color[2];
|
|
|
|
clear_value.color.int32[3] = color[3];
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
clear_value.color.float32[0] = color[0];
|
|
|
|
clear_value.color.float32[1] = color[1];
|
|
|
|
clear_value.color.float32[2] = color[2];
|
|
|
|
clear_value.color.float32[3] = color[3];
|
|
|
|
}
|
|
|
|
|
2018-08-23 17:33:04 +01:00
|
|
|
d3d12_command_list_clear(list, &attachment_desc, &color_reference, NULL,
|
2018-08-21 17:08:01 +01:00
|
|
|
rtv_desc->view, rtv_desc->width, rtv_desc->height, rtv_desc->layer_count,
|
2018-06-26 13:41:51 +01:00
|
|
|
&clear_value, rect_count, rects);
|
2016-11-02 19:57:23 +00:00
|
|
|
}
|
|
|
|
|
2019-11-25 14:05:37 +00:00
|
|
|
static void d3d12_command_list_clear_uav(struct d3d12_command_list *list,
|
2020-04-16 16:38:29 +01:00
|
|
|
struct d3d12_resource *resource, struct vkd3d_view *view, const VkClearColorValue *clear_color,
|
|
|
|
UINT rect_count, const D3D12_RECT *rects)
|
2019-11-25 14:05:37 +00:00
|
|
|
{
|
|
|
|
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
|
|
|
|
unsigned int i, miplevel_idx, layer_count;
|
2020-04-16 16:38:29 +01:00
|
|
|
struct vkd3d_clear_uav_pipeline pipeline;
|
|
|
|
struct vkd3d_clear_uav_args clear_args;
|
2019-11-25 14:05:37 +00:00
|
|
|
VkDescriptorImageInfo image_info;
|
|
|
|
D3D12_RECT full_rect, curr_rect;
|
|
|
|
VkWriteDescriptorSet write_set;
|
2020-04-16 16:38:29 +01:00
|
|
|
VkExtent3D workgroup_size;
|
2019-11-25 14:05:37 +00:00
|
|
|
|
|
|
|
d3d12_command_list_track_resource_usage(list, resource);
|
|
|
|
d3d12_command_list_end_current_render_pass(list);
|
|
|
|
|
|
|
|
d3d12_command_list_invalidate_current_pipeline(list);
|
2020-03-07 10:57:13 +00:00
|
|
|
d3d12_command_list_invalidate_root_parameters(list, VK_PIPELINE_BIND_POINT_COMPUTE, true);
|
2019-11-25 14:05:37 +00:00
|
|
|
|
|
|
|
if (!d3d12_command_allocator_add_view(list->allocator, view))
|
|
|
|
WARN("Failed to add view.\n");
|
|
|
|
|
2020-04-16 16:38:29 +01:00
|
|
|
clear_args.clear_color = *clear_color;
|
2019-11-25 14:05:37 +00:00
|
|
|
|
|
|
|
write_set.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
|
|
|
write_set.pNext = NULL;
|
|
|
|
write_set.dstBinding = 0;
|
|
|
|
write_set.dstArrayElement = 0;
|
|
|
|
write_set.descriptorCount = 1;
|
|
|
|
|
2020-04-16 16:38:29 +01:00
|
|
|
if (d3d12_resource_is_texture(resource))
|
2019-11-25 14:05:37 +00:00
|
|
|
{
|
|
|
|
image_info.sampler = VK_NULL_HANDLE;
|
|
|
|
image_info.imageView = view->u.vk_image_view;
|
2020-04-24 14:57:59 +01:00
|
|
|
image_info.imageLayout = view->info.texture.vk_layout;
|
2019-11-25 14:05:37 +00:00
|
|
|
|
|
|
|
write_set.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
|
|
|
|
write_set.pImageInfo = &image_info;
|
|
|
|
write_set.pBufferInfo = NULL;
|
|
|
|
write_set.pTexelBufferView = NULL;
|
|
|
|
|
|
|
|
miplevel_idx = view->info.texture.miplevel_idx;
|
|
|
|
layer_count = view->info.texture.vk_view_type == VK_IMAGE_VIEW_TYPE_3D
|
|
|
|
? d3d12_resource_desc_get_depth(&resource->desc, miplevel_idx)
|
|
|
|
: view->info.texture.layer_count;
|
2020-04-21 14:15:16 +01:00
|
|
|
pipeline = vkd3d_meta_get_clear_image_uav_pipeline(
|
|
|
|
&list->device->meta_ops, view->info.texture.vk_view_type,
|
2020-04-16 16:38:29 +01:00
|
|
|
view->format->type == VKD3D_FORMAT_TYPE_UINT);
|
2020-04-21 14:15:16 +01:00
|
|
|
workgroup_size = vkd3d_meta_get_clear_image_uav_workgroup_size(view->info.texture.vk_view_type);
|
2020-04-16 16:38:29 +01:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
write_set.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
|
|
|
|
write_set.pImageInfo = NULL;
|
|
|
|
write_set.pBufferInfo = NULL;
|
|
|
|
write_set.pTexelBufferView = &view->u.vk_buffer_view;
|
|
|
|
|
|
|
|
miplevel_idx = 0;
|
|
|
|
layer_count = 1;
|
2020-04-21 14:15:16 +01:00
|
|
|
pipeline = vkd3d_meta_get_clear_buffer_uav_pipeline(
|
|
|
|
&list->device->meta_ops, view->format->type == VKD3D_FORMAT_TYPE_UINT);
|
|
|
|
workgroup_size = vkd3d_meta_get_clear_buffer_uav_workgroup_size();
|
2019-11-25 14:05:37 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!(write_set.dstSet = d3d12_command_allocator_allocate_descriptor_set(
|
2020-03-18 11:02:52 +00:00
|
|
|
list->allocator, pipeline.vk_set_layout, VKD3D_DESCRIPTOR_POOL_TYPE_STATIC)))
|
2019-11-25 14:05:37 +00:00
|
|
|
{
|
|
|
|
ERR("Failed to allocate descriptor set.\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
VK_CALL(vkUpdateDescriptorSets(list->device->vk_device, 1, &write_set, 0, NULL));
|
|
|
|
|
|
|
|
full_rect.left = 0;
|
|
|
|
full_rect.right = d3d12_resource_desc_get_width(&resource->desc, miplevel_idx);
|
|
|
|
full_rect.top = 0;
|
|
|
|
full_rect.bottom = d3d12_resource_desc_get_height(&resource->desc, miplevel_idx);
|
|
|
|
|
2020-04-16 16:38:29 +01:00
|
|
|
/* clear full resource if no rects are specified */
|
|
|
|
curr_rect = full_rect;
|
2019-11-25 14:05:37 +00:00
|
|
|
|
2020-04-16 16:38:29 +01:00
|
|
|
VK_CALL(vkCmdBindPipeline(list->vk_command_buffer,
|
|
|
|
VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.vk_pipeline));
|
2019-11-25 14:05:37 +00:00
|
|
|
|
2020-04-16 16:38:29 +01:00
|
|
|
VK_CALL(vkCmdBindDescriptorSets(list->vk_command_buffer,
|
|
|
|
VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.vk_pipeline_layout,
|
|
|
|
0, 1, &write_set.dstSet, 0, NULL));
|
2019-11-25 14:05:37 +00:00
|
|
|
|
2020-04-16 16:38:29 +01:00
|
|
|
for (i = 0; i < rect_count || !i; i++)
|
2019-11-25 14:05:37 +00:00
|
|
|
{
|
2020-04-16 16:38:29 +01:00
|
|
|
if (rect_count)
|
|
|
|
{
|
|
|
|
/* clamp to actual resource region and skip empty rects */
|
|
|
|
curr_rect.left = max(rects[i].left, full_rect.left);
|
|
|
|
curr_rect.top = max(rects[i].top, full_rect.top);
|
|
|
|
curr_rect.right = min(rects[i].right, full_rect.right);
|
|
|
|
curr_rect.bottom = min(rects[i].bottom, full_rect.bottom);
|
2019-11-25 14:05:37 +00:00
|
|
|
|
2020-04-16 16:38:29 +01:00
|
|
|
if (curr_rect.left >= curr_rect.right || curr_rect.top >= curr_rect.bottom)
|
|
|
|
continue;
|
|
|
|
}
|
2019-11-25 14:05:37 +00:00
|
|
|
|
|
|
|
clear_args.offset.x = curr_rect.left;
|
|
|
|
clear_args.offset.y = curr_rect.top;
|
|
|
|
clear_args.extent.width = curr_rect.right - curr_rect.left;
|
|
|
|
clear_args.extent.height = curr_rect.bottom - curr_rect.top;
|
|
|
|
|
2020-04-16 16:38:29 +01:00
|
|
|
VK_CALL(vkCmdPushConstants(list->vk_command_buffer,
|
|
|
|
pipeline.vk_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT,
|
|
|
|
0, sizeof(clear_args), &clear_args));
|
2019-11-25 14:05:37 +00:00
|
|
|
|
|
|
|
VK_CALL(vkCmdDispatch(list->vk_command_buffer,
|
2020-04-16 16:38:29 +01:00
|
|
|
vkd3d_compute_workgroup_count(clear_args.extent.width, workgroup_size.width),
|
|
|
|
vkd3d_compute_workgroup_count(clear_args.extent.height, workgroup_size.height),
|
|
|
|
vkd3d_compute_workgroup_count(layer_count, workgroup_size.depth)));
|
2019-11-25 14:05:37 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-16 16:38:29 +01:00
|
|
|
static const struct vkd3d_format *vkd3d_fixup_clear_uav_uint_color(struct d3d12_device *device,
|
|
|
|
DXGI_FORMAT dxgi_format, VkClearColorValue *color)
|
2019-11-25 14:05:40 +00:00
|
|
|
{
|
|
|
|
switch (dxgi_format)
|
|
|
|
{
|
|
|
|
case DXGI_FORMAT_R11G11B10_FLOAT:
|
2020-04-16 16:38:29 +01:00
|
|
|
color->uint32[0] = (color->uint32[0] & 0x7FF)
|
|
|
|
| ((color->uint32[1] & 0x7FF) << 11)
|
|
|
|
| ((color->uint32[2] & 0x3FF) << 22);
|
2019-11-25 14:05:40 +00:00
|
|
|
return vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false);
|
|
|
|
|
|
|
|
default:
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(d3d12_command_list_iface *iface,
|
2017-06-15 15:43:22 +01:00
|
|
|
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle, D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle, ID3D12Resource *resource,
|
|
|
|
const UINT values[4], UINT rect_count, const D3D12_RECT *rects)
|
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2020-04-16 16:38:29 +01:00
|
|
|
struct vkd3d_view *base_view, *uint_view;
|
2019-11-25 14:05:39 +00:00
|
|
|
struct vkd3d_texture_view_desc view_desc;
|
|
|
|
const struct vkd3d_format *uint_format;
|
2017-10-04 12:55:33 +01:00
|
|
|
struct d3d12_resource *resource_impl;
|
2020-04-16 16:38:29 +01:00
|
|
|
VkClearColorValue color;
|
2017-08-21 11:41:07 +01:00
|
|
|
|
2017-10-04 12:55:33 +01:00
|
|
|
TRACE("iface %p, gpu_handle %#"PRIx64", cpu_handle %lx, resource %p, values %p, rect_count %u, rects %p.\n",
|
2017-06-15 15:43:22 +01:00
|
|
|
iface, gpu_handle.ptr, cpu_handle.ptr, resource, values, rect_count, rects);
|
2017-08-21 11:41:07 +01:00
|
|
|
|
2020-04-16 16:38:29 +01:00
|
|
|
memcpy(color.uint32, values, sizeof(color.uint32));
|
|
|
|
|
2017-10-04 12:55:33 +01:00
|
|
|
resource_impl = unsafe_impl_from_ID3D12Resource(resource);
|
|
|
|
|
2020-04-16 16:38:29 +01:00
|
|
|
base_view = d3d12_desc_from_cpu_handle(cpu_handle)->u.view;
|
|
|
|
uint_view = NULL;
|
|
|
|
|
|
|
|
if (base_view->format->type != VKD3D_FORMAT_TYPE_UINT)
|
2017-10-04 12:55:33 +01:00
|
|
|
{
|
2020-04-16 16:38:29 +01:00
|
|
|
uint_format = vkd3d_find_uint_format(list->device, base_view->format->dxgi_format);
|
|
|
|
|
|
|
|
if (!uint_format && !(uint_format = vkd3d_fixup_clear_uav_uint_color(
|
|
|
|
list->device, base_view->format->dxgi_format, &color)))
|
2018-09-13 10:26:01 +01:00
|
|
|
{
|
2020-04-16 16:38:29 +01:00
|
|
|
ERR("Unhandled format %d.\n", base_view->format->dxgi_format);
|
2018-09-13 10:26:01 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2020-04-16 16:38:29 +01:00
|
|
|
if (d3d12_resource_is_texture(resource_impl))
|
2019-11-25 14:05:39 +00:00
|
|
|
{
|
2020-04-16 16:38:29 +01:00
|
|
|
memset(&view_desc, 0, sizeof(view_desc));
|
|
|
|
view_desc.view_type = base_view->info.texture.vk_view_type;
|
2020-04-24 14:53:01 +01:00
|
|
|
view_desc.layout = base_view->info.texture.vk_layout;
|
2020-04-16 16:38:29 +01:00
|
|
|
view_desc.format = uint_format;
|
|
|
|
view_desc.miplevel_idx = base_view->info.texture.miplevel_idx;
|
|
|
|
view_desc.miplevel_count = 1;
|
|
|
|
view_desc.layer_idx = base_view->info.texture.layer_idx;
|
|
|
|
view_desc.layer_count = base_view->info.texture.layer_count;
|
|
|
|
view_desc.allowed_swizzle = false;
|
|
|
|
|
|
|
|
if (!vkd3d_create_texture_view(list->device, resource_impl->u.vk_image, &view_desc, &uint_view))
|
2019-11-25 14:05:39 +00:00
|
|
|
{
|
2020-04-16 16:38:29 +01:00
|
|
|
ERR("Failed to create image view.\n");
|
2019-11-25 14:05:39 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2020-04-16 16:38:29 +01:00
|
|
|
if (!vkd3d_create_buffer_view(list->device, resource_impl->u.vk_buffer, uint_format,
|
|
|
|
base_view->info.buffer.offset, base_view->info.buffer.size, &uint_view))
|
2019-11-25 14:05:39 +00:00
|
|
|
{
|
2020-04-16 16:38:29 +01:00
|
|
|
ERR("Failed to create buffer view.\n");
|
2019-11-25 14:05:39 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
2017-10-04 12:55:33 +01:00
|
|
|
}
|
2019-06-11 09:13:30 +01:00
|
|
|
|
2020-04-16 16:38:29 +01:00
|
|
|
d3d12_command_list_clear_uav(list, resource_impl,
|
|
|
|
uint_view ? uint_view : base_view, &color, rect_count, rects);
|
2019-11-25 14:05:39 +00:00
|
|
|
|
|
|
|
if (uint_view)
|
2020-04-16 16:38:29 +01:00
|
|
|
vkd3d_view_decref(uint_view, list->device);
|
2017-06-15 15:43:22 +01:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(d3d12_command_list_iface *iface,
|
2017-06-15 15:43:22 +01:00
|
|
|
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle, D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle, ID3D12Resource *resource,
|
|
|
|
const float values[4], UINT rect_count, const D3D12_RECT *rects)
|
2016-09-21 15:18:13 +01:00
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2017-10-04 12:55:33 +01:00
|
|
|
struct d3d12_resource *resource_impl;
|
2019-11-25 14:05:37 +00:00
|
|
|
struct vkd3d_view *view;
|
2020-04-16 16:38:29 +01:00
|
|
|
VkClearColorValue color;
|
2017-08-21 11:41:07 +01:00
|
|
|
|
2019-11-25 14:05:37 +00:00
|
|
|
TRACE("iface %p, gpu_handle %#"PRIx64", cpu_handle %lx, resource %p, values %p, rect_count %u, rects %p.\n",
|
2017-06-15 15:43:22 +01:00
|
|
|
iface, gpu_handle.ptr, cpu_handle.ptr, resource, values, rect_count, rects);
|
2017-08-21 11:41:07 +01:00
|
|
|
|
2020-04-16 16:38:29 +01:00
|
|
|
memcpy(color.float32, values, sizeof(color.float32));
|
|
|
|
|
2017-10-04 12:55:33 +01:00
|
|
|
resource_impl = unsafe_impl_from_ID3D12Resource(resource);
|
2019-11-25 14:05:37 +00:00
|
|
|
view = d3d12_desc_from_cpu_handle(cpu_handle)->u.view;
|
2017-10-04 12:55:33 +01:00
|
|
|
|
2020-04-16 16:38:29 +01:00
|
|
|
d3d12_command_list_clear_uav(list, resource_impl, view, &color, rect_count, rects);
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_DiscardResource(d3d12_command_list_iface *iface,
|
2016-09-21 15:18:13 +01:00
|
|
|
ID3D12Resource *resource, const D3D12_DISCARD_REGION *region)
|
|
|
|
{
|
2019-06-12 13:08:01 +01:00
|
|
|
FIXME_ONCE("iface %p, resource %p, region %p stub!\n", iface, resource, region);
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_BeginQuery(d3d12_command_list_iface *iface,
|
2016-09-21 15:18:13 +01:00
|
|
|
ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT index)
|
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2017-08-28 14:13:01 +01:00
|
|
|
struct d3d12_query_heap *query_heap = unsafe_impl_from_ID3D12QueryHeap(heap);
|
2018-08-23 17:33:04 +01:00
|
|
|
const struct vkd3d_vk_device_procs *vk_procs;
|
2017-08-29 12:04:17 +01:00
|
|
|
VkQueryControlFlags flags = 0;
|
2017-08-28 14:13:01 +01:00
|
|
|
|
|
|
|
TRACE("iface %p, heap %p, type %#x, index %u.\n", iface, heap, type, index);
|
|
|
|
|
2018-08-23 17:33:04 +01:00
|
|
|
vk_procs = &list->device->vk_procs;
|
|
|
|
|
|
|
|
d3d12_command_list_end_current_render_pass(list);
|
2018-08-15 12:58:00 +01:00
|
|
|
|
2019-02-20 12:42:50 +00:00
|
|
|
VK_CALL(vkCmdResetQueryPool(list->vk_command_buffer, query_heap->vk_query_pool, index, 1));
|
|
|
|
|
2017-08-29 12:04:17 +01:00
|
|
|
if (type == D3D12_QUERY_TYPE_OCCLUSION)
|
|
|
|
flags = VK_QUERY_CONTROL_PRECISE_BIT;
|
|
|
|
|
2019-02-20 12:42:50 +00:00
|
|
|
if (D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0 <= type && type <= D3D12_QUERY_TYPE_SO_STATISTICS_STREAM3)
|
|
|
|
{
|
|
|
|
unsigned int stream_index = type - D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0;
|
|
|
|
VK_CALL(vkCmdBeginQueryIndexedEXT(list->vk_command_buffer,
|
|
|
|
query_heap->vk_query_pool, index, flags, stream_index));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2017-08-29 12:04:17 +01:00
|
|
|
VK_CALL(vkCmdBeginQuery(list->vk_command_buffer, query_heap->vk_query_pool, index, flags));
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_EndQuery(d3d12_command_list_iface *iface,
|
2016-09-21 15:18:13 +01:00
|
|
|
ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT index)
|
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2017-08-25 14:09:31 +01:00
|
|
|
struct d3d12_query_heap *query_heap = unsafe_impl_from_ID3D12QueryHeap(heap);
|
2018-08-23 17:33:04 +01:00
|
|
|
const struct vkd3d_vk_device_procs *vk_procs;
|
2017-08-25 14:09:31 +01:00
|
|
|
|
2017-08-28 12:03:37 +01:00
|
|
|
TRACE("iface %p, heap %p, type %#x, index %u.\n", iface, heap, type, index);
|
|
|
|
|
2018-08-23 17:33:04 +01:00
|
|
|
vk_procs = &list->device->vk_procs;
|
|
|
|
|
|
|
|
d3d12_command_list_end_current_render_pass(list);
|
2018-08-15 12:58:00 +01:00
|
|
|
|
2017-09-14 13:57:09 +01:00
|
|
|
d3d12_query_heap_mark_result_as_available(query_heap, index);
|
|
|
|
|
2017-08-28 19:24:36 +01:00
|
|
|
if (type == D3D12_QUERY_TYPE_TIMESTAMP)
|
2017-08-25 14:09:31 +01:00
|
|
|
{
|
2017-08-29 13:05:49 +01:00
|
|
|
VK_CALL(vkCmdResetQueryPool(list->vk_command_buffer, query_heap->vk_query_pool, index, 1));
|
2017-08-28 19:24:36 +01:00
|
|
|
VK_CALL(vkCmdWriteTimestamp(list->vk_command_buffer,
|
|
|
|
VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, query_heap->vk_query_pool, index));
|
|
|
|
return;
|
2017-08-28 14:13:41 +01:00
|
|
|
}
|
2017-08-28 19:24:36 +01:00
|
|
|
|
2019-02-20 12:42:50 +00:00
|
|
|
if (D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0 <= type && type <= D3D12_QUERY_TYPE_SO_STATISTICS_STREAM3)
|
|
|
|
{
|
|
|
|
unsigned int stream_index = type - D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0;
|
|
|
|
VK_CALL(vkCmdEndQueryIndexedEXT(list->vk_command_buffer,
|
|
|
|
query_heap->vk_query_pool, index, stream_index));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2017-08-28 19:24:36 +01:00
|
|
|
VK_CALL(vkCmdEndQuery(list->vk_command_buffer, query_heap->vk_query_pool, index));
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2019-02-20 12:42:50 +00:00
|
|
|
static size_t get_query_stride(D3D12_QUERY_TYPE type)
|
|
|
|
{
|
|
|
|
if (type == D3D12_QUERY_TYPE_PIPELINE_STATISTICS)
|
|
|
|
return sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS);
|
|
|
|
|
|
|
|
if (D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0 <= type && type <= D3D12_QUERY_TYPE_SO_STATISTICS_STREAM3)
|
|
|
|
return sizeof(D3D12_QUERY_DATA_SO_STATISTICS);
|
|
|
|
|
|
|
|
return sizeof(uint64_t);
|
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_ResolveQueryData(d3d12_command_list_iface *iface,
|
2016-09-21 15:18:13 +01:00
|
|
|
ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT start_index, UINT query_count,
|
|
|
|
ID3D12Resource *dst_buffer, UINT64 aligned_dst_buffer_offset)
|
|
|
|
{
|
2017-09-14 13:57:09 +01:00
|
|
|
const struct d3d12_query_heap *query_heap = unsafe_impl_from_ID3D12QueryHeap(heap);
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2017-08-25 14:10:19 +01:00
|
|
|
struct d3d12_resource *buffer = unsafe_impl_from_ID3D12Resource(dst_buffer);
|
2018-08-23 17:33:04 +01:00
|
|
|
const struct vkd3d_vk_device_procs *vk_procs;
|
2018-08-23 17:33:03 +01:00
|
|
|
unsigned int i, first, count;
|
2019-02-20 12:42:50 +00:00
|
|
|
VkDeviceSize offset, stride;
|
2017-08-25 14:10:19 +01:00
|
|
|
|
2017-08-28 12:03:37 +01:00
|
|
|
TRACE("iface %p, heap %p, type %#x, start_index %u, query_count %u, "
|
|
|
|
"dst_buffer %p, aligned_dst_buffer_offset %#"PRIx64".\n",
|
|
|
|
iface, heap, type, start_index, query_count,
|
|
|
|
dst_buffer, aligned_dst_buffer_offset);
|
|
|
|
|
2018-08-23 17:33:04 +01:00
|
|
|
vk_procs = &list->device->vk_procs;
|
|
|
|
|
2017-08-29 12:05:58 +01:00
|
|
|
/* Vulkan is less strict than D3D12 here. Vulkan implementations are free
|
|
|
|
* to return any non-zero result for binary occlusion with at least one
|
|
|
|
* sample passing, while D3D12 guarantees that the result is 1 then.
|
|
|
|
*
|
|
|
|
* For example, the Nvidia binary blob drivers on Linux seem to always
|
|
|
|
* count precisely, even when it was signalled that non-precise is enough.
|
|
|
|
*/
|
|
|
|
if (type == D3D12_QUERY_TYPE_BINARY_OCCLUSION)
|
2019-06-12 13:08:01 +01:00
|
|
|
FIXME_ONCE("D3D12 guarantees binary occlusion queries result in only 0 and 1.\n");
|
2017-08-29 12:05:58 +01:00
|
|
|
|
2017-08-31 08:29:02 +01:00
|
|
|
if (!d3d12_resource_is_buffer(buffer))
|
2017-08-25 14:10:19 +01:00
|
|
|
{
|
|
|
|
WARN("Destination resource is not a buffer.\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2018-08-23 17:33:04 +01:00
|
|
|
d3d12_command_list_end_current_render_pass(list);
|
2018-08-15 12:58:00 +01:00
|
|
|
|
2019-02-20 12:42:50 +00:00
|
|
|
stride = get_query_stride(type);
|
2017-08-25 14:10:19 +01:00
|
|
|
|
2018-08-23 17:33:03 +01:00
|
|
|
count = 0;
|
|
|
|
first = start_index;
|
|
|
|
offset = aligned_dst_buffer_offset;
|
2017-09-14 13:57:09 +01:00
|
|
|
for (i = 0; i < query_count; ++i)
|
|
|
|
{
|
2018-08-23 17:33:03 +01:00
|
|
|
if (d3d12_query_heap_is_result_available(query_heap, start_index + i))
|
|
|
|
{
|
|
|
|
++count;
|
|
|
|
}
|
|
|
|
else
|
2017-09-14 13:57:09 +01:00
|
|
|
{
|
2018-08-23 17:33:03 +01:00
|
|
|
if (count)
|
|
|
|
{
|
|
|
|
VK_CALL(vkCmdCopyQueryPoolResults(list->vk_command_buffer,
|
|
|
|
query_heap->vk_query_pool, first, count, buffer->u.vk_buffer,
|
2020-04-23 16:23:10 +01:00
|
|
|
buffer->heap_offset + offset, stride, VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
|
2018-08-23 17:33:03 +01:00
|
|
|
}
|
|
|
|
count = 0;
|
|
|
|
first = start_index + i;
|
|
|
|
offset = aligned_dst_buffer_offset + i * stride;
|
|
|
|
|
|
|
|
/* We cannot copy query results if a query was not issued:
|
|
|
|
*
|
|
|
|
* "If the query does not become available in a finite amount of
|
|
|
|
* time (e.g. due to not issuing a query since the last reset),
|
|
|
|
* a VK_ERROR_DEVICE_LOST error may occur."
|
|
|
|
*/
|
2017-09-14 13:57:09 +01:00
|
|
|
VK_CALL(vkCmdFillBuffer(list->vk_command_buffer,
|
2020-04-23 16:23:10 +01:00
|
|
|
buffer->u.vk_buffer, buffer->heap_offset + offset, stride, 0x00000000));
|
2018-08-23 17:33:03 +01:00
|
|
|
|
|
|
|
++first;
|
|
|
|
offset += stride;
|
2017-09-14 13:57:09 +01:00
|
|
|
}
|
2018-08-23 17:33:03 +01:00
|
|
|
}
|
2017-09-14 13:57:09 +01:00
|
|
|
|
2018-08-23 17:33:03 +01:00
|
|
|
if (count)
|
|
|
|
{
|
2017-09-14 13:57:09 +01:00
|
|
|
VK_CALL(vkCmdCopyQueryPoolResults(list->vk_command_buffer,
|
2018-08-23 17:33:03 +01:00
|
|
|
query_heap->vk_query_pool, first, count, buffer->u.vk_buffer,
|
2020-04-23 16:23:10 +01:00
|
|
|
buffer->heap_offset + offset, stride, VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
|
2017-09-14 13:57:09 +01:00
|
|
|
}
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(d3d12_command_list_iface *iface,
|
2016-09-21 15:18:13 +01:00
|
|
|
ID3D12Resource *buffer, UINT64 aligned_buffer_offset, D3D12_PREDICATION_OP operation)
|
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2019-06-15 20:24:46 +01:00
|
|
|
struct d3d12_resource *resource = unsafe_impl_from_ID3D12Resource(buffer);
|
|
|
|
const struct vkd3d_vulkan_info *vk_info = &list->device->vk_info;
|
|
|
|
const struct vkd3d_vk_device_procs *vk_procs;
|
|
|
|
|
|
|
|
TRACE("iface %p, buffer %p, aligned_buffer_offset %#"PRIx64", operation %#x.\n",
|
2016-10-10 10:22:50 +01:00
|
|
|
iface, buffer, aligned_buffer_offset, operation);
|
2019-06-15 20:24:46 +01:00
|
|
|
|
|
|
|
if (!vk_info->EXT_conditional_rendering)
|
|
|
|
{
|
|
|
|
FIXME("Vulkan conditional rendering extension not present. Conditional rendering not supported.\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
vk_procs = &list->device->vk_procs;
|
|
|
|
|
2019-06-18 09:07:49 +01:00
|
|
|
/* FIXME: Add support for conditional rendering in render passes. */
|
|
|
|
d3d12_command_list_end_current_render_pass(list);
|
|
|
|
|
2019-06-15 20:24:46 +01:00
|
|
|
if (resource)
|
|
|
|
{
|
|
|
|
VkConditionalRenderingBeginInfoEXT cond_info;
|
|
|
|
|
2019-06-18 09:07:49 +01:00
|
|
|
if (aligned_buffer_offset & (sizeof(uint64_t) - 1))
|
2019-06-15 20:24:46 +01:00
|
|
|
{
|
|
|
|
WARN("Unaligned predicate argument buffer offset %#"PRIx64".\n", aligned_buffer_offset);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!d3d12_resource_is_buffer(resource))
|
|
|
|
{
|
|
|
|
WARN("Predicate arguments must be stored in a buffer resource.\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2019-06-18 09:07:49 +01:00
|
|
|
FIXME_ONCE("Predication doesn't support clear and copy commands, "
|
2019-06-15 20:24:46 +01:00
|
|
|
"and predication values are treated as 32-bit values.\n");
|
|
|
|
|
|
|
|
cond_info.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT;
|
|
|
|
cond_info.pNext = NULL;
|
|
|
|
cond_info.buffer = resource->u.vk_buffer;
|
|
|
|
cond_info.offset = aligned_buffer_offset;
|
|
|
|
switch (operation)
|
|
|
|
{
|
|
|
|
case D3D12_PREDICATION_OP_EQUAL_ZERO:
|
|
|
|
cond_info.flags = 0;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case D3D12_PREDICATION_OP_NOT_EQUAL_ZERO:
|
|
|
|
cond_info.flags = VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT;
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
FIXME("Unhandled predication operation %#x.\n", operation);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (list->is_predicated)
|
|
|
|
VK_CALL(vkCmdEndConditionalRenderingEXT(list->vk_command_buffer));
|
|
|
|
VK_CALL(vkCmdBeginConditionalRenderingEXT(list->vk_command_buffer, &cond_info));
|
|
|
|
list->is_predicated = true;
|
|
|
|
}
|
|
|
|
else if (list->is_predicated)
|
|
|
|
{
|
|
|
|
VK_CALL(vkCmdEndConditionalRenderingEXT(list->vk_command_buffer));
|
|
|
|
list->is_predicated = false;
|
|
|
|
}
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_SetMarker(d3d12_command_list_iface *iface,
|
2016-09-21 15:18:13 +01:00
|
|
|
UINT metadata, const void *data, UINT size)
|
|
|
|
{
|
|
|
|
FIXME("iface %p, metadata %#x, data %p, size %u stub!\n", iface, metadata, data, size);
|
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_BeginEvent(d3d12_command_list_iface *iface,
|
2016-09-21 15:18:13 +01:00
|
|
|
UINT metadata, const void *data, UINT size)
|
|
|
|
{
|
|
|
|
FIXME("iface %p, metadata %#x, data %p, size %u stub!\n", iface, metadata, data, size);
|
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_EndEvent(d3d12_command_list_iface *iface)
|
2016-09-21 15:18:13 +01:00
|
|
|
{
|
|
|
|
FIXME("iface %p stub!\n", iface);
|
|
|
|
}
|
|
|
|
|
2018-12-04 14:55:55 +00:00
|
|
|
STATIC_ASSERT(sizeof(VkDispatchIndirectCommand) == sizeof(D3D12_DISPATCH_ARGUMENTS));
|
|
|
|
STATIC_ASSERT(sizeof(VkDrawIndexedIndirectCommand) == sizeof(D3D12_DRAW_INDEXED_ARGUMENTS));
|
|
|
|
STATIC_ASSERT(sizeof(VkDrawIndirectCommand) == sizeof(D3D12_DRAW_ARGUMENTS));
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(d3d12_command_list_iface *iface,
|
2017-08-24 19:33:49 +01:00
|
|
|
ID3D12CommandSignature *command_signature, UINT max_command_count, ID3D12Resource *arg_buffer,
|
2016-09-21 15:18:13 +01:00
|
|
|
UINT64 arg_buffer_offset, ID3D12Resource *count_buffer, UINT64 count_buffer_offset)
|
|
|
|
{
|
2017-08-24 19:33:49 +01:00
|
|
|
struct d3d12_command_signature *sig_impl = unsafe_impl_from_ID3D12CommandSignature(command_signature);
|
2019-05-20 08:09:24 +01:00
|
|
|
struct d3d12_resource *count_impl = unsafe_impl_from_ID3D12Resource(count_buffer);
|
2017-08-24 19:33:49 +01:00
|
|
|
struct d3d12_resource *arg_impl = unsafe_impl_from_ID3D12Resource(arg_buffer);
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2017-08-24 19:33:49 +01:00
|
|
|
const D3D12_COMMAND_SIGNATURE_DESC *signature_desc;
|
2018-08-23 17:33:04 +01:00
|
|
|
const struct vkd3d_vk_device_procs *vk_procs;
|
2017-08-24 19:33:49 +01:00
|
|
|
unsigned int i;
|
|
|
|
|
|
|
|
TRACE("iface %p, command_signature %p, max_command_count %u, arg_buffer %p, "
|
|
|
|
"arg_buffer_offset %#"PRIx64", count_buffer %p, count_buffer_offset %#"PRIx64".\n",
|
2016-10-10 10:22:50 +01:00
|
|
|
iface, command_signature, max_command_count, arg_buffer, arg_buffer_offset,
|
|
|
|
count_buffer, count_buffer_offset);
|
2017-08-24 19:33:49 +01:00
|
|
|
|
2018-08-23 17:33:04 +01:00
|
|
|
vk_procs = &list->device->vk_procs;
|
|
|
|
|
2019-05-20 08:09:24 +01:00
|
|
|
if (count_buffer && !list->device->vk_info.KHR_draw_indirect_count)
|
2017-08-24 19:33:49 +01:00
|
|
|
{
|
2019-05-20 08:09:24 +01:00
|
|
|
FIXME("Count buffers not supported by Vulkan implementation.\n");
|
2017-08-24 19:33:49 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
signature_desc = &sig_impl->desc;
|
|
|
|
for (i = 0; i < signature_desc->NumArgumentDescs; ++i)
|
|
|
|
{
|
|
|
|
const D3D12_INDIRECT_ARGUMENT_DESC *arg_desc = &signature_desc->pArgumentDescs[i];
|
|
|
|
|
|
|
|
switch (arg_desc->Type)
|
|
|
|
{
|
|
|
|
case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW:
|
2018-08-23 17:33:04 +01:00
|
|
|
if (!d3d12_command_list_begin_render_pass(list))
|
2017-08-24 19:33:49 +01:00
|
|
|
{
|
|
|
|
WARN("Failed to begin render pass, ignoring draw.\n");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2019-05-20 08:09:24 +01:00
|
|
|
if (count_buffer)
|
|
|
|
{
|
|
|
|
VK_CALL(vkCmdDrawIndirectCountKHR(list->vk_command_buffer, arg_impl->u.vk_buffer,
|
|
|
|
arg_buffer_offset, count_impl->u.vk_buffer, count_buffer_offset,
|
|
|
|
max_command_count, signature_desc->ByteStride));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
VK_CALL(vkCmdDrawIndirect(list->vk_command_buffer, arg_impl->u.vk_buffer,
|
|
|
|
arg_buffer_offset, max_command_count, signature_desc->ByteStride));
|
|
|
|
}
|
2017-08-24 19:33:49 +01:00
|
|
|
break;
|
|
|
|
|
2018-12-03 10:31:31 +00:00
|
|
|
case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED:
|
|
|
|
if (!d3d12_command_list_begin_render_pass(list))
|
|
|
|
{
|
|
|
|
WARN("Failed to begin render pass, ignoring draw.\n");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
d3d12_command_list_check_index_buffer_strip_cut_value(list);
|
|
|
|
|
2019-05-20 08:09:24 +01:00
|
|
|
if (count_buffer)
|
|
|
|
{
|
|
|
|
VK_CALL(vkCmdDrawIndexedIndirectCountKHR(list->vk_command_buffer, arg_impl->u.vk_buffer,
|
|
|
|
arg_buffer_offset, count_impl->u.vk_buffer, count_buffer_offset,
|
|
|
|
max_command_count, signature_desc->ByteStride));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
VK_CALL(vkCmdDrawIndexedIndirect(list->vk_command_buffer, arg_impl->u.vk_buffer,
|
|
|
|
arg_buffer_offset, max_command_count, signature_desc->ByteStride));
|
|
|
|
}
|
2018-12-03 10:31:31 +00:00
|
|
|
break;
|
|
|
|
|
2017-08-26 00:34:31 +01:00
|
|
|
case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH:
|
2017-08-29 09:32:24 +01:00
|
|
|
if (max_command_count != 1)
|
|
|
|
FIXME("Ignoring command count %u.\n", max_command_count);
|
|
|
|
|
2019-05-20 08:09:24 +01:00
|
|
|
if (count_buffer)
|
|
|
|
{
|
|
|
|
FIXME("Count buffers not supported for indirect dispatch.\n");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2019-11-11 16:03:38 +00:00
|
|
|
if (!d3d12_command_list_update_compute_state(list))
|
2017-08-26 00:34:31 +01:00
|
|
|
{
|
2019-11-11 16:03:38 +00:00
|
|
|
WARN("Failed to update compute state, ignoring dispatch.\n");
|
|
|
|
return;
|
2017-08-26 00:34:31 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
VK_CALL(vkCmdDispatchIndirect(list->vk_command_buffer,
|
|
|
|
arg_impl->u.vk_buffer, arg_buffer_offset));
|
|
|
|
break;
|
|
|
|
|
2017-08-24 19:33:49 +01:00
|
|
|
default:
|
|
|
|
FIXME("Ignoring unhandled argument type %#x.\n", arg_desc->Type);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT(d3d12_command_list_iface *iface,
|
2019-03-07 10:01:15 +00:00
|
|
|
ID3D12Resource *dst_buffer, UINT64 dst_offset,
|
|
|
|
ID3D12Resource *src_buffer, UINT64 src_offset,
|
|
|
|
UINT dependent_resource_count, ID3D12Resource * const *dependent_resources,
|
|
|
|
const D3D12_SUBRESOURCE_RANGE_UINT64 *dependent_sub_resource_ranges)
|
|
|
|
{
|
|
|
|
FIXME("iface %p, dst_resource %p, dst_offset %#"PRIx64", src_resource %p, "
|
|
|
|
"src_offset %#"PRIx64", dependent_resource_count %u, "
|
|
|
|
"dependent_resources %p, dependent_sub_resource_ranges %p stub!\n",
|
|
|
|
iface, dst_buffer, dst_offset, src_buffer, src_offset,
|
|
|
|
dependent_resource_count, dependent_resources, dependent_sub_resource_ranges);
|
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(d3d12_command_list_iface *iface,
|
2019-03-07 10:01:15 +00:00
|
|
|
ID3D12Resource *dst_buffer, UINT64 dst_offset,
|
|
|
|
ID3D12Resource *src_buffer, UINT64 src_offset,
|
|
|
|
UINT dependent_resource_count, ID3D12Resource * const *dependent_resources,
|
|
|
|
const D3D12_SUBRESOURCE_RANGE_UINT64 *dependent_sub_resource_ranges)
|
|
|
|
{
|
|
|
|
FIXME("iface %p, dst_resource %p, dst_offset %#"PRIx64", src_resource %p, "
|
|
|
|
"src_offset %#"PRIx64", dependent_resource_count %u, "
|
|
|
|
"dependent_resources %p, dependent_sub_resource_ranges %p stub!\n",
|
|
|
|
iface, dst_buffer, dst_offset, src_buffer, src_offset,
|
|
|
|
dependent_resource_count, dependent_resources, dependent_sub_resource_ranges);
|
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_OMSetDepthBounds(d3d12_command_list_iface *iface,
|
2019-03-07 10:01:15 +00:00
|
|
|
FLOAT min, FLOAT max)
|
|
|
|
{
|
2020-04-01 14:25:25 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
|
|
|
struct vkd3d_dynamic_state *dyn_state = &list->dynamic_state;
|
|
|
|
|
|
|
|
TRACE("iface %p, min %.8e, max %.8e.\n", iface, min, max);
|
|
|
|
|
|
|
|
dyn_state->min_depth_bounds = min;
|
|
|
|
dyn_state->max_depth_bounds = max;
|
|
|
|
|
|
|
|
dyn_state->dirty_flags |= VKD3D_DYNAMIC_STATE_DEPTH_BOUNDS;
|
2019-03-07 10:01:15 +00:00
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_SetSamplePositions(d3d12_command_list_iface *iface,
|
2019-03-07 10:01:15 +00:00
|
|
|
UINT sample_count, UINT pixel_count, D3D12_SAMPLE_POSITION *sample_positions)
|
|
|
|
{
|
|
|
|
FIXME("iface %p, sample_count %u, pixel_count %u, sample_positions %p stub!\n",
|
|
|
|
iface, sample_count, pixel_count, sample_positions);
|
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresourceRegion(d3d12_command_list_iface *iface,
|
2019-03-07 10:01:15 +00:00
|
|
|
ID3D12Resource *dst_resource, UINT dst_sub_resource_idx, UINT dst_x, UINT dst_y,
|
|
|
|
ID3D12Resource *src_resource, UINT src_sub_resource_idx,
|
|
|
|
D3D12_RECT *src_rect, DXGI_FORMAT format, D3D12_RESOLVE_MODE mode)
|
|
|
|
{
|
|
|
|
FIXME("iface %p, dst_resource %p, dst_sub_resource_idx %u, "
|
|
|
|
"dst_x %u, dst_y %u, src_resource %p, src_sub_resource_idx %u, "
|
|
|
|
"src_rect %p, format %#x, mode %#x stub!\n",
|
|
|
|
iface, dst_resource, dst_sub_resource_idx, dst_x, dst_y,
|
|
|
|
src_resource, src_sub_resource_idx, src_rect, format, mode);
|
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_SetViewInstanceMask(d3d12_command_list_iface *iface, UINT mask)
|
2019-12-06 19:03:15 +00:00
|
|
|
{
|
|
|
|
FIXME("iface %p, mask %#x stub!\n", iface, mask);
|
|
|
|
}
|
|
|
|
|
2020-03-30 15:44:12 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_WriteBufferImmediate(d3d12_command_list_iface *iface,
|
2019-12-06 19:03:17 +00:00
|
|
|
UINT count, const D3D12_WRITEBUFFERIMMEDIATE_PARAMETER *parameters,
|
|
|
|
const D3D12_WRITEBUFFERIMMEDIATE_MODE *modes)
|
|
|
|
{
|
2020-03-30 15:44:12 +01:00
|
|
|
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
|
2019-12-06 19:03:17 +00:00
|
|
|
struct d3d12_resource *resource;
|
|
|
|
unsigned int i;
|
|
|
|
|
|
|
|
FIXME("iface %p, count %u, parameters %p, modes %p stub!\n", iface, count, parameters, modes);
|
|
|
|
|
|
|
|
for (i = 0; i < count; ++i)
|
|
|
|
{
|
|
|
|
resource = vkd3d_gpu_va_allocator_dereference(&list->device->gpu_va_allocator, parameters[i].Dest);
|
|
|
|
d3d12_command_list_track_resource_usage(list, resource);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-14 11:46:59 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_SetProtectedResourceSession(d3d12_command_list_iface *iface,
|
|
|
|
ID3D12ProtectedResourceSession *protected_session)
|
|
|
|
{
|
|
|
|
FIXME("iface %p, protected_session %p stub!\n", iface, protected_session);
|
|
|
|
}
|
|
|
|
|
2020-04-15 07:59:46 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_BeginRenderPass(d3d12_command_list_iface *iface,
|
|
|
|
UINT rt_count, const D3D12_RENDER_PASS_RENDER_TARGET_DESC *render_targets,
|
|
|
|
const D3D12_RENDER_PASS_DEPTH_STENCIL_DESC *depth_stencil, D3D12_RENDER_PASS_FLAGS flags)
|
|
|
|
{
|
|
|
|
FIXME("iface %p, rt_count %u, render_targets %p, depth_stencil %p, flags %#x stub!\n",
|
|
|
|
iface, rt_count, render_targets, depth_stencil, flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_EndRenderPass(d3d12_command_list_iface *iface)
|
|
|
|
{
|
|
|
|
FIXME("iface %p stub!\n", iface);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_InitializeMetaCommand(d3d12_command_list_iface *iface,
|
|
|
|
ID3D12MetaCommand *meta_command, const void *parameter_data, SIZE_T parameter_size)
|
|
|
|
{
|
2020-04-17 16:03:33 +01:00
|
|
|
FIXME("iface %p, meta_command %p, parameter_data %p, parameter_size %lu stub!\n",
|
2020-04-15 07:59:46 +01:00
|
|
|
iface, meta_command, parameter_data, parameter_size);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_ExecuteMetaCommand(d3d12_command_list_iface *iface,
|
|
|
|
ID3D12MetaCommand *meta_command, const void *parameter_data, SIZE_T parameter_size)
|
|
|
|
{
|
2020-04-17 16:03:33 +01:00
|
|
|
FIXME("iface %p, meta_command %p, parameter_data %p, parameter_size %lu stub!\n",
|
2020-04-15 07:59:46 +01:00
|
|
|
iface, meta_command, parameter_data, parameter_size);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_BuildRaytracingAccelerationStructure(d3d12_command_list_iface *iface,
|
|
|
|
const D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC *desc, UINT num_postbuild_info_descs,
|
|
|
|
const D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_DESC *postbuild_info_descs)
|
|
|
|
{
|
|
|
|
FIXME("iface %p, desc %p, num_postbuild_info_descs %u, postbuild_info_descs %p stub!\n",
|
|
|
|
iface, desc, num_postbuild_info_descs, postbuild_info_descs);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_EmitRaytracingAccelerationStructurePostbuildInfo(d3d12_command_list_iface *iface,
|
|
|
|
const D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_DESC *desc, UINT num_acceleration_structures,
|
|
|
|
const D3D12_GPU_VIRTUAL_ADDRESS *src_data)
|
|
|
|
{
|
|
|
|
FIXME("iface %p, desc %p, num_acceleration_structures %u, src_data %p stub!\n",
|
|
|
|
iface, desc, num_acceleration_structures, src_data);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_CopyRaytracingAccelerationStructure(d3d12_command_list_iface *iface,
|
|
|
|
D3D12_GPU_VIRTUAL_ADDRESS dst_data, D3D12_GPU_VIRTUAL_ADDRESS src_data,
|
|
|
|
D3D12_RAYTRACING_ACCELERATION_STRUCTURE_COPY_MODE mode)
|
|
|
|
{
|
|
|
|
FIXME("iface %p, dst_data %#"PRIx64", src_data %#"PRIx64", mode %u stub!\n",
|
|
|
|
iface, dst_data, src_data, mode);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState1(d3d12_command_list_iface *iface,
|
|
|
|
ID3D12StateObject *state_object)
|
|
|
|
{
|
|
|
|
FIXME("iface %p, state_object %p stub!\n", iface, state_object);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_DispatchRays(d3d12_command_list_iface *iface,
|
|
|
|
const D3D12_DISPATCH_RAYS_DESC *desc)
|
|
|
|
{
|
|
|
|
FIXME("iface %p, desc %p stub!\n", iface, desc);
|
|
|
|
}
|
|
|
|
|
2020-04-15 13:11:03 +01:00
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_RSSetShadingRate(d3d12_command_list_iface *iface,
|
|
|
|
D3D12_SHADING_RATE base, const D3D12_SHADING_RATE_COMBINER *combiners)
|
|
|
|
{
|
|
|
|
FIXME("iface %p, base %#x, combiners %p stub!\n", iface, base, combiners);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void STDMETHODCALLTYPE d3d12_command_list_RSSetShadingRateImage(d3d12_command_list_iface *iface,
|
|
|
|
ID3D12Resource *image)
|
|
|
|
{
|
|
|
|
FIXME("iface %p, image %p stub!\n", iface, image);
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct ID3D12GraphicsCommandList5Vtbl d3d12_command_list_vtbl =
|
2016-09-21 15:18:13 +01:00
|
|
|
{
|
|
|
|
/* IUnknown methods */
|
|
|
|
d3d12_command_list_QueryInterface,
|
|
|
|
d3d12_command_list_AddRef,
|
|
|
|
d3d12_command_list_Release,
|
|
|
|
/* ID3D12Object methods */
|
|
|
|
d3d12_command_list_GetPrivateData,
|
|
|
|
d3d12_command_list_SetPrivateData,
|
|
|
|
d3d12_command_list_SetPrivateDataInterface,
|
|
|
|
d3d12_command_list_SetName,
|
|
|
|
/* ID3D12DeviceChild methods */
|
|
|
|
d3d12_command_list_GetDevice,
|
|
|
|
/* ID3D12CommandList methods */
|
|
|
|
d3d12_command_list_GetType,
|
|
|
|
/* ID3D12GraphicsCommandList methods */
|
|
|
|
d3d12_command_list_Close,
|
|
|
|
d3d12_command_list_Reset,
|
|
|
|
d3d12_command_list_ClearState,
|
|
|
|
d3d12_command_list_DrawInstanced,
|
|
|
|
d3d12_command_list_DrawIndexedInstanced,
|
|
|
|
d3d12_command_list_Dispatch,
|
|
|
|
d3d12_command_list_CopyBufferRegion,
|
|
|
|
d3d12_command_list_CopyTextureRegion,
|
|
|
|
d3d12_command_list_CopyResource,
|
|
|
|
d3d12_command_list_CopyTiles,
|
|
|
|
d3d12_command_list_ResolveSubresource,
|
|
|
|
d3d12_command_list_IASetPrimitiveTopology,
|
|
|
|
d3d12_command_list_RSSetViewports,
|
|
|
|
d3d12_command_list_RSSetScissorRects,
|
|
|
|
d3d12_command_list_OMSetBlendFactor,
|
|
|
|
d3d12_command_list_OMSetStencilRef,
|
|
|
|
d3d12_command_list_SetPipelineState,
|
|
|
|
d3d12_command_list_ResourceBarrier,
|
|
|
|
d3d12_command_list_ExecuteBundle,
|
|
|
|
d3d12_command_list_SetDescriptorHeaps,
|
|
|
|
d3d12_command_list_SetComputeRootSignature,
|
|
|
|
d3d12_command_list_SetGraphicsRootSignature,
|
|
|
|
d3d12_command_list_SetComputeRootDescriptorTable,
|
|
|
|
d3d12_command_list_SetGraphicsRootDescriptorTable,
|
|
|
|
d3d12_command_list_SetComputeRoot32BitConstant,
|
|
|
|
d3d12_command_list_SetGraphicsRoot32BitConstant,
|
|
|
|
d3d12_command_list_SetComputeRoot32BitConstants,
|
|
|
|
d3d12_command_list_SetGraphicsRoot32BitConstants,
|
|
|
|
d3d12_command_list_SetComputeRootConstantBufferView,
|
|
|
|
d3d12_command_list_SetGraphicsRootConstantBufferView,
|
|
|
|
d3d12_command_list_SetComputeRootShaderResourceView,
|
|
|
|
d3d12_command_list_SetGraphicsRootShaderResourceView,
|
|
|
|
d3d12_command_list_SetComputeRootUnorderedAccessView,
|
|
|
|
d3d12_command_list_SetGraphicsRootUnorderedAccessView,
|
|
|
|
d3d12_command_list_IASetIndexBuffer,
|
|
|
|
d3d12_command_list_IASetVertexBuffers,
|
|
|
|
d3d12_command_list_SOSetTargets,
|
|
|
|
d3d12_command_list_OMSetRenderTargets,
|
|
|
|
d3d12_command_list_ClearDepthStencilView,
|
|
|
|
d3d12_command_list_ClearRenderTargetView,
|
2017-06-15 15:43:22 +01:00
|
|
|
d3d12_command_list_ClearUnorderedAccessViewUint,
|
2016-09-21 15:18:13 +01:00
|
|
|
d3d12_command_list_ClearUnorderedAccessViewFloat,
|
|
|
|
d3d12_command_list_DiscardResource,
|
|
|
|
d3d12_command_list_BeginQuery,
|
|
|
|
d3d12_command_list_EndQuery,
|
|
|
|
d3d12_command_list_ResolveQueryData,
|
|
|
|
d3d12_command_list_SetPredication,
|
|
|
|
d3d12_command_list_SetMarker,
|
|
|
|
d3d12_command_list_BeginEvent,
|
|
|
|
d3d12_command_list_EndEvent,
|
|
|
|
d3d12_command_list_ExecuteIndirect,
|
2019-03-07 10:01:15 +00:00
|
|
|
/* ID3D12GraphicsCommandList1 methods */
|
|
|
|
d3d12_command_list_AtomicCopyBufferUINT,
|
|
|
|
d3d12_command_list_AtomicCopyBufferUINT64,
|
|
|
|
d3d12_command_list_OMSetDepthBounds,
|
|
|
|
d3d12_command_list_SetSamplePositions,
|
|
|
|
d3d12_command_list_ResolveSubresourceRegion,
|
2019-12-06 19:03:15 +00:00
|
|
|
d3d12_command_list_SetViewInstanceMask,
|
2019-12-06 19:03:17 +00:00
|
|
|
/* ID3D12GraphicsCommandList2 methods */
|
|
|
|
d3d12_command_list_WriteBufferImmediate,
|
2020-04-14 11:46:59 +01:00
|
|
|
/* ID3D12GraphicsCommandList3 methods */
|
|
|
|
d3d12_command_list_SetProtectedResourceSession,
|
2020-04-15 07:59:46 +01:00
|
|
|
/* ID3D12GraphicsCommandList4 methods */
|
|
|
|
d3d12_command_list_BeginRenderPass,
|
|
|
|
d3d12_command_list_EndRenderPass,
|
|
|
|
d3d12_command_list_InitializeMetaCommand,
|
|
|
|
d3d12_command_list_ExecuteMetaCommand,
|
|
|
|
d3d12_command_list_BuildRaytracingAccelerationStructure,
|
|
|
|
d3d12_command_list_EmitRaytracingAccelerationStructurePostbuildInfo,
|
|
|
|
d3d12_command_list_CopyRaytracingAccelerationStructure,
|
|
|
|
d3d12_command_list_SetPipelineState1,
|
|
|
|
d3d12_command_list_DispatchRays,
|
2020-04-15 13:11:03 +01:00
|
|
|
/* ID3D12GraphicsCommandList5 methods */
|
|
|
|
d3d12_command_list_RSSetShadingRate,
|
|
|
|
d3d12_command_list_RSSetShadingRateImage,
|
2016-09-21 15:18:13 +01:00
|
|
|
};
|
|
|
|
|
2016-09-28 12:09:12 +01:00
|
|
|
static struct d3d12_command_list *unsafe_impl_from_ID3D12CommandList(ID3D12CommandList *iface)
|
|
|
|
{
|
|
|
|
if (!iface)
|
|
|
|
return NULL;
|
|
|
|
assert(iface->lpVtbl == (struct ID3D12CommandListVtbl *)&d3d12_command_list_vtbl);
|
2020-03-30 15:44:12 +01:00
|
|
|
return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList_iface);
|
2016-09-28 12:09:12 +01:00
|
|
|
}
|
|
|
|
|
2016-09-27 11:13:37 +01:00
|
|
|
static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d3d12_device *device,
|
2016-09-21 15:18:13 +01:00
|
|
|
D3D12_COMMAND_LIST_TYPE type, struct d3d12_command_allocator *allocator,
|
|
|
|
ID3D12PipelineState *initial_pipeline_state)
|
|
|
|
{
|
2016-09-27 11:13:37 +01:00
|
|
|
HRESULT hr;
|
|
|
|
|
2020-03-05 10:23:42 +00:00
|
|
|
memset(list, 0, sizeof(*list));
|
2020-03-30 15:44:12 +01:00
|
|
|
list->ID3D12GraphicsCommandList_iface.lpVtbl = &d3d12_command_list_vtbl;
|
2016-09-21 15:18:13 +01:00
|
|
|
list->refcount = 1;
|
|
|
|
|
|
|
|
list->type = type;
|
2019-01-10 10:16:48 +00:00
|
|
|
|
|
|
|
if (FAILED(hr = vkd3d_private_store_init(&list->private_store)))
|
|
|
|
return hr;
|
|
|
|
|
2019-06-07 13:38:03 +01:00
|
|
|
d3d12_device_add_ref(list->device = device);
|
2016-09-21 15:18:13 +01:00
|
|
|
|
2020-04-14 12:42:43 +01:00
|
|
|
if ((list->allocator = allocator))
|
2019-01-10 10:16:48 +00:00
|
|
|
{
|
2020-04-14 12:42:43 +01:00
|
|
|
if (SUCCEEDED(hr = d3d12_command_allocator_allocate_command_buffer(allocator, list)))
|
|
|
|
{
|
|
|
|
d3d12_command_list_reset_state(list, initial_pipeline_state);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
vkd3d_private_store_destroy(&list->private_store);
|
|
|
|
d3d12_device_release(device);
|
|
|
|
}
|
2019-01-10 10:16:48 +00:00
|
|
|
}
|
2018-09-12 14:19:59 +01:00
|
|
|
|
2018-09-12 14:20:00 +01:00
|
|
|
return hr;
|
2016-09-21 15:18:13 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
HRESULT d3d12_command_list_create(struct d3d12_device *device,
|
|
|
|
UINT node_mask, D3D12_COMMAND_LIST_TYPE type, ID3D12CommandAllocator *allocator_iface,
|
|
|
|
ID3D12PipelineState *initial_pipeline_state, struct d3d12_command_list **list)
|
|
|
|
{
|
2020-04-14 12:42:43 +01:00
|
|
|
struct d3d12_command_allocator *allocator = unsafe_impl_from_ID3D12CommandAllocator(allocator_iface);
|
2016-09-21 15:18:13 +01:00
|
|
|
struct d3d12_command_list *object;
|
2016-09-27 11:13:37 +01:00
|
|
|
HRESULT hr;
|
2016-09-21 15:18:13 +01:00
|
|
|
|
2018-07-20 13:30:17 +01:00
|
|
|
debug_ignored_node_mask(node_mask);
|
2016-09-21 15:18:13 +01:00
|
|
|
|
|
|
|
if (!(object = vkd3d_malloc(sizeof(*object))))
|
|
|
|
return E_OUTOFMEMORY;
|
|
|
|
|
2016-09-27 11:13:37 +01:00
|
|
|
if (FAILED(hr = d3d12_command_list_init(object, device, type, allocator, initial_pipeline_state)))
|
|
|
|
{
|
|
|
|
vkd3d_free(object);
|
|
|
|
return hr;
|
|
|
|
}
|
2016-09-21 15:18:13 +01:00
|
|
|
|
|
|
|
TRACE("Created command list %p.\n", object);
|
|
|
|
|
|
|
|
*list = object;
|
|
|
|
|
|
|
|
return S_OK;
|
|
|
|
}
|
|
|
|
|
2016-09-21 13:41:31 +01:00
|
|
|
/* ID3D12CommandQueue */
|
|
|
|
static inline struct d3d12_command_queue *impl_from_ID3D12CommandQueue(ID3D12CommandQueue *iface)
|
|
|
|
{
|
|
|
|
return CONTAINING_RECORD(iface, struct d3d12_command_queue, ID3D12CommandQueue_iface);
|
|
|
|
}
|
|
|
|
|
|
|
|
static HRESULT STDMETHODCALLTYPE d3d12_command_queue_QueryInterface(ID3D12CommandQueue *iface,
|
|
|
|
REFIID riid, void **object)
|
|
|
|
{
|
|
|
|
TRACE("iface %p, riid %s, object %p.\n", iface, debugstr_guid(riid), object);
|
|
|
|
|
|
|
|
if (IsEqualGUID(riid, &IID_ID3D12CommandQueue)
|
|
|
|
|| IsEqualGUID(riid, &IID_ID3D12Pageable)
|
|
|
|
|| IsEqualGUID(riid, &IID_ID3D12DeviceChild)
|
|
|
|
|| IsEqualGUID(riid, &IID_ID3D12Object)
|
|
|
|
|| IsEqualGUID(riid, &IID_IUnknown))
|
|
|
|
{
|
|
|
|
ID3D12CommandQueue_AddRef(iface);
|
|
|
|
*object = iface;
|
|
|
|
return S_OK;
|
|
|
|
}
|
|
|
|
|
|
|
|
WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(riid));
|
|
|
|
|
|
|
|
*object = NULL;
|
|
|
|
return E_NOINTERFACE;
|
|
|
|
}
|
|
|
|
|
|
|
|
static ULONG STDMETHODCALLTYPE d3d12_command_queue_AddRef(ID3D12CommandQueue *iface)
|
|
|
|
{
|
|
|
|
struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
|
|
|
|
ULONG refcount = InterlockedIncrement(&command_queue->refcount);
|
|
|
|
|
|
|
|
TRACE("%p increasing refcount to %u.\n", command_queue, refcount);
|
|
|
|
|
|
|
|
return refcount;
|
|
|
|
}
|
|
|
|
|
|
|
|
static ULONG STDMETHODCALLTYPE d3d12_command_queue_Release(ID3D12CommandQueue *iface)
|
|
|
|
{
|
|
|
|
struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
|
|
|
|
ULONG refcount = InterlockedDecrement(&command_queue->refcount);
|
|
|
|
|
|
|
|
TRACE("%p decreasing refcount to %u.\n", command_queue, refcount);
|
|
|
|
|
|
|
|
if (!refcount)
|
|
|
|
{
|
|
|
|
struct d3d12_device *device = command_queue->device;
|
|
|
|
|
2019-01-03 13:23:01 +00:00
|
|
|
vkd3d_private_store_destroy(&command_queue->private_store);
|
|
|
|
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
d3d12_command_queue_submit_stop(command_queue);
|
|
|
|
pthread_join(command_queue->submission_thread, NULL);
|
|
|
|
pthread_mutex_destroy(&command_queue->queue_lock);
|
|
|
|
pthread_cond_destroy(&command_queue->queue_cond);
|
|
|
|
|
|
|
|
vkd3d_free(command_queue->submissions);
|
2016-09-21 13:41:31 +01:00
|
|
|
vkd3d_free(command_queue);
|
|
|
|
|
2019-06-07 13:38:03 +01:00
|
|
|
d3d12_device_release(device);
|
2016-09-21 13:41:31 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return refcount;
|
|
|
|
}
|
|
|
|
|
|
|
|
static HRESULT STDMETHODCALLTYPE d3d12_command_queue_GetPrivateData(ID3D12CommandQueue *iface,
|
|
|
|
REFGUID guid, UINT *data_size, void *data)
|
|
|
|
{
|
2019-01-03 13:23:01 +00:00
|
|
|
struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
|
2016-09-21 13:41:31 +01:00
|
|
|
|
2019-01-03 13:23:01 +00:00
|
|
|
TRACE("iface %p, guid %s, data_size %p, data %p.\n", iface, debugstr_guid(guid), data_size, data);
|
|
|
|
|
|
|
|
return vkd3d_get_private_data(&command_queue->private_store, guid, data_size, data);
|
2016-09-21 13:41:31 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static HRESULT STDMETHODCALLTYPE d3d12_command_queue_SetPrivateData(ID3D12CommandQueue *iface,
|
|
|
|
REFGUID guid, UINT data_size, const void *data)
|
|
|
|
{
|
2019-01-03 13:23:01 +00:00
|
|
|
struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
|
2016-09-21 13:41:31 +01:00
|
|
|
|
2019-01-03 13:23:01 +00:00
|
|
|
TRACE("iface %p, guid %s, data_size %u, data %p.\n", iface, debugstr_guid(guid), data_size, data);
|
|
|
|
|
|
|
|
return vkd3d_set_private_data(&command_queue->private_store, guid, data_size, data);
|
2016-09-21 13:41:31 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static HRESULT STDMETHODCALLTYPE d3d12_command_queue_SetPrivateDataInterface(ID3D12CommandQueue *iface,
|
|
|
|
REFGUID guid, const IUnknown *data)
|
|
|
|
{
|
2019-01-03 13:23:01 +00:00
|
|
|
struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
|
2016-09-21 13:41:31 +01:00
|
|
|
|
2019-01-03 13:23:01 +00:00
|
|
|
TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data);
|
|
|
|
|
|
|
|
return vkd3d_set_private_data_interface(&command_queue->private_store, guid, data);
|
2016-09-21 13:41:31 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static HRESULT STDMETHODCALLTYPE d3d12_command_queue_SetName(ID3D12CommandQueue *iface, const WCHAR *name)
|
|
|
|
{
|
2017-08-02 09:45:06 +01:00
|
|
|
struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
|
2019-02-01 03:37:58 +00:00
|
|
|
VkQueue vk_queue;
|
|
|
|
HRESULT hr;
|
2017-08-02 09:45:06 +01:00
|
|
|
|
2019-02-01 03:37:58 +00:00
|
|
|
TRACE("iface %p, name %s.\n", iface, debugstr_w(name, command_queue->device->wchar_size));
|
2016-09-21 13:41:31 +01:00
|
|
|
|
2019-02-01 03:37:58 +00:00
|
|
|
if (!(vk_queue = vkd3d_queue_acquire(command_queue->vkd3d_queue)))
|
|
|
|
{
|
|
|
|
ERR("Failed to acquire queue %p.\n", command_queue->vkd3d_queue);
|
|
|
|
return E_FAIL;
|
|
|
|
}
|
|
|
|
|
|
|
|
hr = vkd3d_set_vk_object_name(command_queue->device, (uint64_t)(uintptr_t)vk_queue,
|
|
|
|
VK_DEBUG_REPORT_OBJECT_TYPE_QUEUE_EXT, name);
|
|
|
|
vkd3d_queue_release(command_queue->vkd3d_queue);
|
|
|
|
return hr;
|
2016-09-21 13:41:31 +01:00
|
|
|
}
|
|
|
|
|
2019-06-07 13:38:03 +01:00
|
|
|
static HRESULT STDMETHODCALLTYPE d3d12_command_queue_GetDevice(ID3D12CommandQueue *iface, REFIID iid, void **device)
|
2016-09-21 13:41:31 +01:00
|
|
|
{
|
|
|
|
struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
|
|
|
|
|
2019-06-07 13:38:03 +01:00
|
|
|
TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device);
|
2016-09-21 13:41:31 +01:00
|
|
|
|
2019-06-07 13:38:03 +01:00
|
|
|
return d3d12_device_query_interface(command_queue->device, iid, device);
|
2016-09-21 13:41:31 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void STDMETHODCALLTYPE d3d12_command_queue_UpdateTileMappings(ID3D12CommandQueue *iface,
|
|
|
|
ID3D12Resource *resource, UINT region_count,
|
|
|
|
const D3D12_TILED_RESOURCE_COORDINATE *region_start_coordinates,
|
|
|
|
const D3D12_TILE_REGION_SIZE *region_sizes,
|
|
|
|
UINT range_count,
|
|
|
|
const D3D12_TILE_RANGE_FLAGS *range_flags,
|
|
|
|
UINT *heap_range_offsets,
|
|
|
|
UINT *range_tile_counts,
|
|
|
|
D3D12_TILE_MAPPING_FLAGS flags)
|
|
|
|
{
|
|
|
|
FIXME("iface %p, resource %p, region_count %u, region_start_coordinates %p, "
|
|
|
|
"region_sizes %p, range_count %u, range_flags %p, heap_range_offsets %p, "
|
|
|
|
"range_tile_counts %p, flags %#x stub!\n",
|
|
|
|
iface, resource, region_count, region_start_coordinates, region_sizes, range_count,
|
|
|
|
range_flags, heap_range_offsets, range_tile_counts, flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void STDMETHODCALLTYPE d3d12_command_queue_CopyTileMappings(ID3D12CommandQueue *iface,
|
|
|
|
ID3D12Resource *dst_resource,
|
|
|
|
const D3D12_TILED_RESOURCE_COORDINATE *dst_region_start_coordinate,
|
|
|
|
ID3D12Resource *src_resource,
|
|
|
|
const D3D12_TILED_RESOURCE_COORDINATE *src_region_start_coordinate,
|
|
|
|
const D3D12_TILE_REGION_SIZE *region_size,
|
|
|
|
D3D12_TILE_MAPPING_FLAGS flags)
|
|
|
|
{
|
|
|
|
FIXME("iface %p, dst_resource %p, dst_region_start_coordinate %p, "
|
|
|
|
"src_resource %p, src_region_start_coordinate %p, region_size %p, flags %#x stub!\n",
|
|
|
|
iface, dst_resource, dst_region_start_coordinate, src_resource,
|
|
|
|
src_region_start_coordinate, region_size, flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12CommandQueue *iface,
|
|
|
|
UINT command_list_count, ID3D12CommandList * const *command_lists)
|
|
|
|
{
|
2016-09-28 12:09:12 +01:00
|
|
|
struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
struct d3d12_command_queue_submission sub;
|
2018-04-16 11:16:21 +01:00
|
|
|
struct d3d12_command_list *cmd_list;
|
2016-09-28 12:09:12 +01:00
|
|
|
VkCommandBuffer *buffers;
|
2020-04-17 14:10:07 +01:00
|
|
|
LONG **outstanding;
|
2020-03-10 15:58:08 +00:00
|
|
|
unsigned int i, j;
|
2016-09-28 12:09:12 +01:00
|
|
|
|
|
|
|
TRACE("iface %p, command_list_count %u, command_lists %p.\n",
|
2016-09-21 13:41:31 +01:00
|
|
|
iface, command_list_count, command_lists);
|
2016-09-28 12:09:12 +01:00
|
|
|
|
|
|
|
if (!(buffers = vkd3d_calloc(command_list_count, sizeof(*buffers))))
|
|
|
|
{
|
|
|
|
ERR("Failed to allocate command buffer array.\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2020-04-17 14:10:07 +01:00
|
|
|
if (!(outstanding = vkd3d_calloc(command_list_count, sizeof(*outstanding))))
|
|
|
|
{
|
|
|
|
ERR("Failed to allocate outstanding submissions count.\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2016-09-28 12:09:12 +01:00
|
|
|
for (i = 0; i < command_list_count; ++i)
|
|
|
|
{
|
2018-04-16 11:16:21 +01:00
|
|
|
cmd_list = unsafe_impl_from_ID3D12CommandList(command_lists[i]);
|
|
|
|
|
|
|
|
if (cmd_list->is_recording)
|
|
|
|
{
|
|
|
|
d3d12_device_mark_as_removed(command_queue->device, DXGI_ERROR_INVALID_CALL,
|
|
|
|
"Command list %p is in recording state.\n", command_lists[i]);
|
|
|
|
vkd3d_free(buffers);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2020-04-17 14:10:07 +01:00
|
|
|
outstanding[i] = cmd_list->outstanding_submissions_count;
|
|
|
|
InterlockedIncrement(outstanding[i]);
|
|
|
|
|
2020-03-10 15:58:08 +00:00
|
|
|
for (j = 0; j < cmd_list->descriptor_updates_count; j++)
|
|
|
|
d3d12_deferred_descriptor_set_update_resolve(cmd_list, &cmd_list->descriptor_updates[j]);
|
2018-04-16 11:16:21 +01:00
|
|
|
buffers[i] = cmd_list->vk_command_buffer;
|
2016-09-28 12:09:12 +01:00
|
|
|
}
|
|
|
|
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
sub.type = VKD3D_SUBMISSION_EXECUTE;
|
|
|
|
sub.u.execute.cmd = buffers;
|
|
|
|
sub.u.execute.count = command_list_count;
|
2020-04-17 14:10:07 +01:00
|
|
|
sub.u.execute.outstanding_submissions_count = outstanding;
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
d3d12_command_queue_add_submission(command_queue, &sub);
|
2016-09-21 13:41:31 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void STDMETHODCALLTYPE d3d12_command_queue_SetMarker(ID3D12CommandQueue *iface,
|
|
|
|
UINT metadata, const void *data, UINT size)
|
|
|
|
{
|
2016-09-29 09:46:42 +01:00
|
|
|
FIXME("iface %p, metadata %#x, data %p, size %u stub!\n",
|
2016-09-21 13:41:31 +01:00
|
|
|
iface, metadata, data, size);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void STDMETHODCALLTYPE d3d12_command_queue_BeginEvent(ID3D12CommandQueue *iface,
|
|
|
|
UINT metadata, const void *data, UINT size)
|
|
|
|
{
|
|
|
|
FIXME("iface %p, metatdata %#x, data %p, size %u stub!\n",
|
|
|
|
iface, metadata, data, size);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void STDMETHODCALLTYPE d3d12_command_queue_EndEvent(ID3D12CommandQueue *iface)
|
|
|
|
{
|
|
|
|
FIXME("iface %p stub!\n", iface);
|
|
|
|
}
|
|
|
|
|
|
|
|
static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue *iface,
|
2019-04-17 16:26:34 +01:00
|
|
|
ID3D12Fence *fence_iface, UINT64 value)
|
2016-09-21 13:41:31 +01:00
|
|
|
{
|
2016-10-08 13:31:57 +01:00
|
|
|
struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
struct d3d12_command_queue_submission sub;
|
2019-04-17 16:26:34 +01:00
|
|
|
struct d3d12_fence *fence;
|
2016-09-21 13:41:31 +01:00
|
|
|
|
2019-04-17 16:26:34 +01:00
|
|
|
TRACE("iface %p, fence %p, value %#"PRIx64".\n", iface, fence_iface, value);
|
2016-10-08 13:31:57 +01:00
|
|
|
|
2019-04-17 16:26:34 +01:00
|
|
|
fence = unsafe_impl_from_ID3D12Fence(fence_iface);
|
|
|
|
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
sub.type = VKD3D_SUBMISSION_SIGNAL;
|
|
|
|
sub.u.signal.fence = fence;
|
|
|
|
sub.u.signal.value = value;
|
|
|
|
d3d12_command_queue_add_submission(command_queue, &sub);
|
|
|
|
return S_OK;
|
|
|
|
}
|
2016-10-08 13:31:57 +01:00
|
|
|
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *iface,
|
|
|
|
ID3D12Fence *fence_iface, UINT64 value)
|
|
|
|
{
|
|
|
|
struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
|
|
|
|
struct d3d12_command_queue_submission sub;
|
|
|
|
struct d3d12_fence *fence;
|
2019-05-06 13:47:40 +01:00
|
|
|
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
TRACE("iface %p, fence %p, value %#"PRIx64".\n", iface, fence_iface, value);
|
2019-04-17 16:26:35 +01:00
|
|
|
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
fence = unsafe_impl_from_ID3D12Fence(fence_iface);
|
2020-03-30 18:14:34 +01:00
|
|
|
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
sub.type = VKD3D_SUBMISSION_WAIT;
|
|
|
|
sub.u.wait.fence = fence;
|
|
|
|
sub.u.wait.value = value;
|
|
|
|
d3d12_command_queue_add_submission(command_queue, &sub);
|
|
|
|
return S_OK;
|
|
|
|
}
|
2020-03-30 18:14:34 +01:00
|
|
|
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
static HRESULT STDMETHODCALLTYPE d3d12_command_queue_GetTimestampFrequency(ID3D12CommandQueue *iface,
|
|
|
|
UINT64 *frequency)
|
|
|
|
{
|
|
|
|
struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
|
|
|
|
struct d3d12_device *device = command_queue->device;
|
2019-05-02 15:02:40 +01:00
|
|
|
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
TRACE("iface %p, frequency %p.\n", iface, frequency);
|
2019-05-02 15:02:40 +01:00
|
|
|
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
if (!command_queue->vkd3d_queue->timestamp_bits)
|
2016-10-08 13:31:57 +01:00
|
|
|
{
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
WARN("Timestamp queries not supported.\n");
|
|
|
|
return E_FAIL;
|
2016-10-08 13:31:57 +01:00
|
|
|
}
|
|
|
|
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
*frequency = 1000000000 / device->vk_info.device_limits.timestampPeriod;
|
2020-03-30 18:14:34 +01:00
|
|
|
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
return S_OK;
|
|
|
|
}
|
2019-04-17 16:26:35 +01:00
|
|
|
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
static HRESULT STDMETHODCALLTYPE d3d12_command_queue_GetClockCalibration(ID3D12CommandQueue *iface,
|
|
|
|
UINT64 *gpu_timestamp, UINT64 *cpu_timestamp)
|
|
|
|
{
|
|
|
|
FIXME("iface %p, gpu_timestamp %p, cpu_timestamp %p stub!\n",
|
|
|
|
iface, gpu_timestamp, cpu_timestamp);
|
2019-04-17 16:26:35 +01:00
|
|
|
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
return E_NOTIMPL;
|
2016-09-21 13:41:31 +01:00
|
|
|
}
|
|
|
|
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
static D3D12_COMMAND_QUEUE_DESC * STDMETHODCALLTYPE d3d12_command_queue_GetDesc(ID3D12CommandQueue *iface,
|
|
|
|
D3D12_COMMAND_QUEUE_DESC *desc)
|
2016-09-21 13:41:31 +01:00
|
|
|
{
|
2019-05-02 15:02:41 +01:00
|
|
|
struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
|
|
|
|
TRACE("iface %p, desc %p.\n", iface, desc);
|
|
|
|
|
|
|
|
*desc = command_queue->desc;
|
|
|
|
return desc;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct ID3D12CommandQueueVtbl d3d12_command_queue_vtbl =
|
|
|
|
{
|
|
|
|
/* IUnknown methods */
|
|
|
|
d3d12_command_queue_QueryInterface,
|
|
|
|
d3d12_command_queue_AddRef,
|
|
|
|
d3d12_command_queue_Release,
|
|
|
|
/* ID3D12Object methods */
|
|
|
|
d3d12_command_queue_GetPrivateData,
|
|
|
|
d3d12_command_queue_SetPrivateData,
|
|
|
|
d3d12_command_queue_SetPrivateDataInterface,
|
|
|
|
d3d12_command_queue_SetName,
|
|
|
|
/* ID3D12DeviceChild methods */
|
|
|
|
d3d12_command_queue_GetDevice,
|
|
|
|
/* ID3D12CommandQueue methods */
|
|
|
|
d3d12_command_queue_UpdateTileMappings,
|
|
|
|
d3d12_command_queue_CopyTileMappings,
|
|
|
|
d3d12_command_queue_ExecuteCommandLists,
|
|
|
|
d3d12_command_queue_SetMarker,
|
|
|
|
d3d12_command_queue_BeginEvent,
|
|
|
|
d3d12_command_queue_EndEvent,
|
|
|
|
d3d12_command_queue_Signal,
|
|
|
|
d3d12_command_queue_Wait,
|
|
|
|
d3d12_command_queue_GetTimestampFrequency,
|
|
|
|
d3d12_command_queue_GetClockCalibration,
|
|
|
|
d3d12_command_queue_GetDesc,
|
|
|
|
};
|
|
|
|
|
|
|
|
static void d3d12_command_queue_wait(struct d3d12_command_queue *command_queue,
|
|
|
|
struct d3d12_fence *fence, UINT64 value)
|
|
|
|
{
|
|
|
|
static const VkPipelineStageFlagBits wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
|
2020-03-30 18:14:34 +01:00
|
|
|
VkTimelineSemaphoreSubmitInfoKHR timeline_submit_info;
|
2019-05-02 15:02:41 +01:00
|
|
|
const struct vkd3d_vk_device_procs *vk_procs;
|
|
|
|
struct vkd3d_queue *queue;
|
|
|
|
VkSubmitInfo submit_info;
|
2019-05-10 13:15:20 +01:00
|
|
|
VkQueue vk_queue;
|
2019-05-02 15:02:41 +01:00
|
|
|
VkResult vr;
|
|
|
|
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
d3d12_fence_lock(fence);
|
|
|
|
|
|
|
|
/* This is the critical part required to support out-of-order signal.
|
|
|
|
* Normally we would be able to submit waits and signals out of order,
|
|
|
|
* but we don't have virtualized queues in Vulkan, so we need to handle the case
|
|
|
|
* where multiple queues alias over the same physical queue, so effectively, we need to manage out-of-order submits
|
|
|
|
* ourselves. */
|
|
|
|
d3d12_fence_block_until_pending_value_reaches_locked(fence, value);
|
|
|
|
|
|
|
|
/* If a host signal unblocked us, or we know that the fence has reached a specific value, there is no need
|
|
|
|
* to queue up a wait. */
|
|
|
|
if (d3d12_fence_can_elide_wait_semaphore_locked(fence, value))
|
|
|
|
{
|
|
|
|
d3d12_fence_unlock(fence);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
d3d12_fence_unlock(fence);
|
|
|
|
|
|
|
|
TRACE("queue %p, fence %p, value %#"PRIx64".\n", command_queue, fence, value);
|
2019-05-02 15:02:41 +01:00
|
|
|
|
|
|
|
vk_procs = &command_queue->device->vk_procs;
|
|
|
|
queue = command_queue->vkd3d_queue;
|
|
|
|
|
2020-04-16 09:47:06 +01:00
|
|
|
assert(fence->timeline_semaphore);
|
|
|
|
timeline_submit_info.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR;
|
|
|
|
timeline_submit_info.pNext = NULL;
|
|
|
|
timeline_submit_info.signalSemaphoreValueCount = 0;
|
|
|
|
timeline_submit_info.pSignalSemaphoreValues = NULL;
|
|
|
|
timeline_submit_info.waitSemaphoreValueCount = 1;
|
|
|
|
timeline_submit_info.pWaitSemaphoreValues = &value;
|
2019-05-10 13:15:20 +01:00
|
|
|
|
|
|
|
if (!(vk_queue = vkd3d_queue_acquire(queue)))
|
|
|
|
{
|
|
|
|
ERR("Failed to acquire queue %p.\n", queue);
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
return;
|
2019-05-10 13:15:20 +01:00
|
|
|
}
|
|
|
|
|
2019-05-02 15:02:41 +01:00
|
|
|
submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
|
2020-04-16 09:47:06 +01:00
|
|
|
submit_info.pNext = &timeline_submit_info;
|
2019-05-02 15:02:41 +01:00
|
|
|
submit_info.waitSemaphoreCount = 1;
|
2020-04-16 09:47:06 +01:00
|
|
|
submit_info.pWaitSemaphores = &fence->timeline_semaphore;
|
2019-05-02 15:02:41 +01:00
|
|
|
submit_info.pWaitDstStageMask = &wait_stage_mask;
|
|
|
|
submit_info.commandBufferCount = 0;
|
|
|
|
submit_info.pCommandBuffers = NULL;
|
|
|
|
submit_info.signalSemaphoreCount = 0;
|
|
|
|
submit_info.pSignalSemaphores = NULL;
|
|
|
|
|
2020-04-16 09:47:06 +01:00
|
|
|
vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, VK_NULL_HANDLE));
|
2019-05-02 15:02:41 +01:00
|
|
|
|
|
|
|
vkd3d_queue_release(queue);
|
|
|
|
|
|
|
|
if (vr < 0)
|
|
|
|
{
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
ERR("Failed to submit wait operation, vr %d.\n", vr);
|
2019-05-02 15:02:41 +01:00
|
|
|
}
|
|
|
|
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
/* We should probably trigger DEVICE_REMOVED if we hit any errors in the submission thread. */
|
|
|
|
}
|
2019-05-02 15:02:41 +01:00
|
|
|
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
static void d3d12_command_queue_signal(struct d3d12_command_queue *command_queue,
|
|
|
|
struct d3d12_fence *fence, UINT64 value)
|
|
|
|
{
|
|
|
|
VkTimelineSemaphoreSubmitInfoKHR timeline_submit_info;
|
|
|
|
const struct vkd3d_vk_device_procs *vk_procs;
|
|
|
|
struct vkd3d_queue *vkd3d_queue;
|
|
|
|
struct d3d12_device *device;
|
|
|
|
VkSubmitInfo submit_info;
|
|
|
|
VkQueue vk_queue;
|
|
|
|
VkResult vr;
|
|
|
|
HRESULT hr;
|
|
|
|
|
|
|
|
device = command_queue->device;
|
|
|
|
vk_procs = &device->vk_procs;
|
|
|
|
vkd3d_queue = command_queue->vkd3d_queue;
|
|
|
|
|
|
|
|
d3d12_fence_lock(fence);
|
|
|
|
|
|
|
|
if (!d3d12_fence_can_signal_semaphore_locked(fence, value))
|
|
|
|
{
|
|
|
|
d3d12_fence_unlock(fence);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
TRACE("queue %p, fence %p, value %#"PRIx64".\n", command_queue, fence, value);
|
|
|
|
|
|
|
|
/* Need to hold the fence lock while we're submitting, since another thread could come in and signal the semaphore
|
|
|
|
* to a higher value before we call vkQueueSubmit, which creates a non-monotonically increasing value. */
|
|
|
|
|
|
|
|
submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
|
|
|
|
submit_info.pNext = NULL;
|
|
|
|
submit_info.waitSemaphoreCount = 0;
|
|
|
|
submit_info.pWaitSemaphores = NULL;
|
|
|
|
submit_info.pWaitDstStageMask = NULL;
|
|
|
|
submit_info.commandBufferCount = 0;
|
|
|
|
submit_info.pCommandBuffers = NULL;
|
|
|
|
submit_info.signalSemaphoreCount = 1;
|
|
|
|
submit_info.pSignalSemaphores = &fence->timeline_semaphore;
|
|
|
|
|
|
|
|
timeline_submit_info.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR;
|
|
|
|
timeline_submit_info.pNext = NULL;
|
|
|
|
submit_info.pNext = &timeline_submit_info;
|
|
|
|
|
|
|
|
timeline_submit_info.pSignalSemaphoreValues = &value;
|
|
|
|
timeline_submit_info.signalSemaphoreValueCount = 1;
|
|
|
|
timeline_submit_info.waitSemaphoreValueCount = 0;
|
|
|
|
timeline_submit_info.pWaitSemaphoreValues = NULL;
|
|
|
|
|
|
|
|
if (!(vk_queue = vkd3d_queue_acquire(vkd3d_queue)))
|
|
|
|
{
|
|
|
|
ERR("Failed to acquire queue %p.\n", vkd3d_queue);
|
|
|
|
d3d12_fence_unlock(fence);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, VK_NULL_HANDLE));
|
|
|
|
|
|
|
|
if (vr == VK_SUCCESS)
|
|
|
|
d3d12_fence_update_pending_value_locked(fence, value);
|
|
|
|
d3d12_fence_unlock(fence);
|
|
|
|
|
|
|
|
vkd3d_queue_release(vkd3d_queue);
|
|
|
|
|
|
|
|
if (vr < 0)
|
|
|
|
{
|
|
|
|
ERR("Failed to submit signal operation, vr %d.\n", vr);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (FAILED(hr = vkd3d_enqueue_timeline_semaphore(&device->fence_worker, fence, value, vkd3d_queue)))
|
|
|
|
{
|
|
|
|
/* In case of an unexpected failure, try to safely destroy Vulkan objects. */
|
|
|
|
vkd3d_queue_wait_idle(vkd3d_queue, vk_procs);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* We should probably trigger DEVICE_REMOVED if we hit any errors in the submission thread. */
|
2016-09-21 13:41:31 +01:00
|
|
|
}
|
|
|
|
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queue,
|
|
|
|
VkCommandBuffer *cmd, UINT count)
|
2016-09-21 13:41:31 +01:00
|
|
|
{
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
const struct vkd3d_vk_device_procs *vk_procs;
|
|
|
|
struct VkSubmitInfo submit_desc;
|
|
|
|
VkQueue vk_queue;
|
|
|
|
VkResult vr;
|
2016-09-21 13:41:31 +01:00
|
|
|
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
TRACE("queue %p, command_list_count %u, command_lists %p.\n",
|
|
|
|
command_queue, count, cmd);
|
2017-08-28 12:03:37 +01:00
|
|
|
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
vk_procs = &command_queue->device->vk_procs;
|
|
|
|
|
|
|
|
submit_desc.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
|
|
|
|
submit_desc.pNext = NULL;
|
|
|
|
submit_desc.waitSemaphoreCount = 0;
|
|
|
|
submit_desc.pWaitSemaphores = NULL;
|
|
|
|
submit_desc.pWaitDstStageMask = NULL;
|
|
|
|
submit_desc.commandBufferCount = count;
|
|
|
|
submit_desc.pCommandBuffers = cmd;
|
|
|
|
submit_desc.signalSemaphoreCount = 0;
|
|
|
|
submit_desc.pSignalSemaphores = NULL;
|
|
|
|
|
|
|
|
if (!(vk_queue = vkd3d_queue_acquire(command_queue->vkd3d_queue)))
|
2018-01-15 12:49:04 +00:00
|
|
|
{
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
ERR("Failed to acquire queue %p.\n", command_queue->vkd3d_queue);
|
|
|
|
return;
|
2018-01-15 12:49:04 +00:00
|
|
|
}
|
2017-08-29 14:17:39 +01:00
|
|
|
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
if ((vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_desc, VK_NULL_HANDLE))) < 0)
|
|
|
|
ERR("Failed to submit queue(s), vr %d.\n", vr);
|
2017-08-25 14:07:05 +01:00
|
|
|
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
vkd3d_queue_release(command_queue->vkd3d_queue);
|
2016-09-21 13:41:31 +01:00
|
|
|
}
|
|
|
|
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
void d3d12_command_queue_submit_stop(struct d3d12_command_queue *queue)
|
2016-09-21 13:41:31 +01:00
|
|
|
{
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
struct d3d12_command_queue_submission sub;
|
|
|
|
sub.type = VKD3D_SUBMISSION_STOP;
|
|
|
|
d3d12_command_queue_add_submission(queue, &sub);
|
|
|
|
}
|
2016-09-21 13:41:31 +01:00
|
|
|
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
static void d3d12_command_queue_add_submission_locked(struct d3d12_command_queue *queue,
|
|
|
|
const struct d3d12_command_queue_submission *sub)
|
|
|
|
{
|
|
|
|
vkd3d_array_reserve((void**)&queue->submissions, &queue->submissions_size,
|
|
|
|
queue->submissions_count + 1, sizeof(*queue->submissions));
|
|
|
|
queue->submissions[queue->submissions_count++] = *sub;
|
|
|
|
pthread_cond_signal(&queue->queue_cond);
|
2016-09-21 13:41:31 +01:00
|
|
|
}
|
|
|
|
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
static void d3d12_command_queue_add_submission(struct d3d12_command_queue *queue,
|
|
|
|
const struct d3d12_command_queue_submission *sub)
|
2016-09-21 13:41:31 +01:00
|
|
|
{
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
pthread_mutex_lock(&queue->queue_lock);
|
|
|
|
d3d12_command_queue_add_submission_locked(queue, sub);
|
|
|
|
pthread_mutex_unlock(&queue->queue_lock);
|
|
|
|
}
|
2016-09-21 13:41:31 +01:00
|
|
|
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
static void d3d12_command_queue_acquire_serialized(struct d3d12_command_queue *queue)
|
|
|
|
{
|
|
|
|
/* In order to make sure all pending operations queued so far have been submitted,
|
|
|
|
* we build a drain task which will increment the queue_drain_count once the thread has finished all its work. */
|
|
|
|
struct d3d12_command_queue_submission sub;
|
|
|
|
uint64_t current_drain;
|
2016-09-21 13:41:31 +01:00
|
|
|
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
sub.type = VKD3D_SUBMISSION_DRAIN;
|
|
|
|
|
|
|
|
pthread_mutex_lock(&queue->queue_lock);
|
|
|
|
|
|
|
|
current_drain = ++queue->drain_count;
|
|
|
|
d3d12_command_queue_add_submission_locked(queue, &sub);
|
|
|
|
|
|
|
|
while (current_drain != queue->queue_drain_count)
|
|
|
|
pthread_cond_wait(&queue->queue_cond, &queue->queue_lock);
|
2016-09-21 13:41:31 +01:00
|
|
|
}
|
|
|
|
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
static void d3d12_command_queue_release_serialized(struct d3d12_command_queue *queue)
|
2016-09-21 13:41:31 +01:00
|
|
|
{
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
pthread_mutex_unlock(&queue->queue_lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void *d3d12_command_queue_submission_worker_main(void *userdata)
|
|
|
|
{
|
|
|
|
struct d3d12_command_queue_submission submission;
|
|
|
|
struct d3d12_command_queue *queue = userdata;
|
2020-04-17 14:10:07 +01:00
|
|
|
unsigned int i;
|
|
|
|
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
vkd3d_set_thread_name("vkd3d_queue");
|
|
|
|
|
|
|
|
for (;;)
|
|
|
|
{
|
|
|
|
pthread_mutex_lock(&queue->queue_lock);
|
|
|
|
while (queue->submissions_count == 0)
|
|
|
|
pthread_cond_wait(&queue->queue_cond, &queue->queue_lock);
|
|
|
|
|
|
|
|
queue->submissions_count--;
|
|
|
|
submission = queue->submissions[0];
|
|
|
|
memmove(queue->submissions, queue->submissions + 1, queue->submissions_count * sizeof(submission));
|
|
|
|
pthread_mutex_unlock(&queue->queue_lock);
|
|
|
|
|
|
|
|
switch (submission.type)
|
|
|
|
{
|
|
|
|
case VKD3D_SUBMISSION_STOP:
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
case VKD3D_SUBMISSION_WAIT:
|
|
|
|
d3d12_command_queue_wait(queue, submission.u.wait.fence, submission.u.wait.value);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case VKD3D_SUBMISSION_SIGNAL:
|
|
|
|
d3d12_command_queue_signal(queue, submission.u.signal.fence, submission.u.signal.value);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case VKD3D_SUBMISSION_EXECUTE:
|
|
|
|
d3d12_command_queue_execute(queue, submission.u.execute.cmd, submission.u.execute.count);
|
|
|
|
vkd3d_free(submission.u.execute.cmd);
|
2020-04-17 14:10:07 +01:00
|
|
|
/* TODO: The correct place to do this would be in a fence handler, but this is good enough for now. */
|
|
|
|
for (i = 0; i < submission.u.execute.count; i++)
|
|
|
|
InterlockedDecrement(submission.u.execute.outstanding_submissions_count[i]);
|
|
|
|
vkd3d_free(submission.u.execute.outstanding_submissions_count);
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
case VKD3D_SUBMISSION_DRAIN:
|
|
|
|
{
|
|
|
|
pthread_mutex_lock(&queue->queue_lock);
|
|
|
|
queue->queue_drain_count++;
|
|
|
|
pthread_cond_signal(&queue->queue_cond);
|
|
|
|
pthread_mutex_unlock(&queue->queue_lock);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
default:
|
|
|
|
ERR("Unrecognized submission type %u.\n", submission.type);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2016-09-21 13:41:31 +01:00
|
|
|
|
2016-09-28 08:42:49 +01:00
|
|
|
static HRESULT d3d12_command_queue_init(struct d3d12_command_queue *queue,
|
2016-09-21 13:41:31 +01:00
|
|
|
struct d3d12_device *device, const D3D12_COMMAND_QUEUE_DESC *desc)
|
|
|
|
{
|
2019-01-10 10:16:48 +00:00
|
|
|
HRESULT hr;
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
int rc;
|
2019-01-10 10:16:48 +00:00
|
|
|
|
2016-09-21 13:41:31 +01:00
|
|
|
queue->ID3D12CommandQueue_iface.lpVtbl = &d3d12_command_queue_vtbl;
|
|
|
|
queue->refcount = 1;
|
|
|
|
|
|
|
|
queue->desc = *desc;
|
|
|
|
if (!queue->desc.NodeMask)
|
|
|
|
queue->desc.NodeMask = 0x1;
|
|
|
|
|
2018-01-15 12:49:04 +00:00
|
|
|
if (!(queue->vkd3d_queue = d3d12_device_get_vkd3d_queue(device, desc->Type)))
|
|
|
|
return E_NOTIMPL;
|
2016-09-28 08:42:49 +01:00
|
|
|
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
queue->submissions = NULL;
|
|
|
|
queue->submissions_count = 0;
|
|
|
|
queue->submissions_size = 0;
|
|
|
|
queue->drain_count = 0;
|
|
|
|
queue->queue_drain_count = 0;
|
|
|
|
|
|
|
|
if ((rc = pthread_mutex_init(&queue->queue_lock, NULL)) < 0)
|
|
|
|
return E_FAIL;
|
|
|
|
if ((rc = pthread_cond_init(&queue->queue_cond, NULL)) < 0)
|
|
|
|
return E_FAIL;
|
|
|
|
if ((rc = pthread_create(&queue->submission_thread, NULL, d3d12_command_queue_submission_worker_main, queue)) < 0)
|
|
|
|
return E_FAIL;
|
|
|
|
|
2019-04-17 16:26:41 +01:00
|
|
|
if (desc->Priority == D3D12_COMMAND_QUEUE_PRIORITY_GLOBAL_REALTIME)
|
|
|
|
{
|
|
|
|
FIXME("Global realtime priority is not implemented.\n");
|
|
|
|
return E_NOTIMPL;
|
|
|
|
}
|
|
|
|
|
2016-09-28 08:42:49 +01:00
|
|
|
if (desc->Priority)
|
|
|
|
FIXME("Ignoring priority %#x.\n", desc->Priority);
|
|
|
|
if (desc->Flags)
|
|
|
|
FIXME("Ignoring flags %#x.\n", desc->Flags);
|
|
|
|
|
2019-01-10 10:16:48 +00:00
|
|
|
if (FAILED(hr = vkd3d_private_store_init(&queue->private_store)))
|
|
|
|
return hr;
|
2019-01-03 13:23:01 +00:00
|
|
|
|
2019-06-07 13:38:03 +01:00
|
|
|
d3d12_device_add_ref(queue->device = device);
|
2016-09-28 08:42:49 +01:00
|
|
|
|
|
|
|
return S_OK;
|
2016-09-21 13:41:31 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
HRESULT d3d12_command_queue_create(struct d3d12_device *device,
|
|
|
|
const D3D12_COMMAND_QUEUE_DESC *desc, struct d3d12_command_queue **queue)
|
|
|
|
{
|
|
|
|
struct d3d12_command_queue *object;
|
2016-09-28 08:42:49 +01:00
|
|
|
HRESULT hr;
|
2016-09-21 13:41:31 +01:00
|
|
|
|
|
|
|
if (!(object = vkd3d_malloc(sizeof(*object))))
|
|
|
|
return E_OUTOFMEMORY;
|
|
|
|
|
2016-09-28 08:42:49 +01:00
|
|
|
if (FAILED(hr = d3d12_command_queue_init(object, device, desc)))
|
|
|
|
{
|
|
|
|
vkd3d_free(object);
|
|
|
|
return hr;
|
|
|
|
}
|
2016-09-21 13:41:31 +01:00
|
|
|
|
|
|
|
TRACE("Created command queue %p.\n", object);
|
|
|
|
|
|
|
|
*queue = object;
|
|
|
|
|
|
|
|
return S_OK;
|
|
|
|
}
|
2016-10-17 14:10:53 +01:00
|
|
|
|
2016-10-22 19:42:46 +01:00
|
|
|
uint32_t vkd3d_get_vk_queue_family_index(ID3D12CommandQueue *queue)
|
|
|
|
{
|
|
|
|
struct d3d12_command_queue *d3d12_queue = impl_from_ID3D12CommandQueue(queue);
|
|
|
|
|
2018-01-15 12:49:04 +00:00
|
|
|
return d3d12_queue->vkd3d_queue->vk_family_index;
|
2016-10-22 19:42:46 +01:00
|
|
|
}
|
2017-07-20 18:22:51 +01:00
|
|
|
|
2018-01-15 12:49:07 +00:00
|
|
|
VkQueue vkd3d_acquire_vk_queue(ID3D12CommandQueue *queue)
|
|
|
|
{
|
|
|
|
struct d3d12_command_queue *d3d12_queue = impl_from_ID3D12CommandQueue(queue);
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
/* For external users of the Vulkan queue, we must ensure that the queue is drained so that submissions happen in
|
|
|
|
* desired order. */
|
|
|
|
d3d12_command_queue_acquire_serialized(d3d12_queue);
|
2018-01-15 12:49:07 +00:00
|
|
|
return vkd3d_queue_acquire(d3d12_queue->vkd3d_queue);
|
|
|
|
}
|
|
|
|
|
|
|
|
void vkd3d_release_vk_queue(ID3D12CommandQueue *queue)
|
|
|
|
{
|
|
|
|
struct d3d12_command_queue *d3d12_queue = impl_from_ID3D12CommandQueue(queue);
|
vkd3d: Implement threaded submission queue.
D3D12 supports out-of-order signal and wait. So does Vulkan timeline
semaphores. However, in Vulkan we don't have an infinite amount of
virtual queues. We must potentially map multiple D3D12 queues on top of
Vulkan, which might lead to a deadlock when app attempts to
wait-before-signal if the two queues are mapped to the same physical
Vulkan queue.
In order to solve this, we need to hold back submissions until we know
it is safe to do so. To make this work in practice as simply as possible, each
ID3D12CommandQueue has its own submission thread, which will block on an
ID3D12Fence's pending timeline value for a Wait command. The main reason to use a
submission thread is that resolving this directly in
ID3D12CommandQueue::Signal is extremely tricky and potentially
needs recursively locking queues and fences.
Note that we only block on the pending wait value, not the actual wait
value, so there is no real CPU <-> GPU synchronization here. In the
common case, no submission thread will block.
The added benefit is that submits are async now, so main thread CPU
overhead might slightly decrease.
To play nice with DXGI swapchain, the external entry point for acquiring
the Vulkan queue needs to drain the submission thread and lock it to ensure
submissions happen in order.
Fixes hangs in The Division 1, which makes use of this D3D12 feature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2020-04-16 10:15:22 +01:00
|
|
|
vkd3d_queue_release(d3d12_queue->vkd3d_queue);
|
|
|
|
d3d12_command_queue_release_serialized(d3d12_queue);
|
2018-01-15 12:49:07 +00:00
|
|
|
}
|
|
|
|
|
2017-07-20 18:22:51 +01:00
|
|
|
/* ID3D12CommandSignature */
|
|
|
|
static inline struct d3d12_command_signature *impl_from_ID3D12CommandSignature(ID3D12CommandSignature *iface)
|
|
|
|
{
|
|
|
|
return CONTAINING_RECORD(iface, struct d3d12_command_signature, ID3D12CommandSignature_iface);
|
|
|
|
}
|
|
|
|
|
|
|
|
static HRESULT STDMETHODCALLTYPE d3d12_command_signature_QueryInterface(ID3D12CommandSignature *iface,
|
|
|
|
REFIID iid, void **out)
|
|
|
|
{
|
|
|
|
TRACE("iface %p, iid %s, out %p.\n", iface, debugstr_guid(iid), out);
|
|
|
|
|
|
|
|
if (IsEqualGUID(iid, &IID_ID3D12CommandSignature)
|
|
|
|
|| IsEqualGUID(iid, &IID_ID3D12Pageable)
|
|
|
|
|| IsEqualGUID(iid, &IID_ID3D12DeviceChild)
|
|
|
|
|| IsEqualGUID(iid, &IID_ID3D12Object)
|
|
|
|
|| IsEqualGUID(iid, &IID_IUnknown))
|
|
|
|
{
|
|
|
|
ID3D12CommandSignature_AddRef(iface);
|
|
|
|
*out = iface;
|
|
|
|
return S_OK;
|
|
|
|
}
|
|
|
|
|
|
|
|
WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(iid));
|
|
|
|
|
|
|
|
*out = NULL;
|
|
|
|
return E_NOINTERFACE;
|
|
|
|
}
|
|
|
|
|
|
|
|
static ULONG STDMETHODCALLTYPE d3d12_command_signature_AddRef(ID3D12CommandSignature *iface)
|
|
|
|
{
|
|
|
|
struct d3d12_command_signature *signature = impl_from_ID3D12CommandSignature(iface);
|
|
|
|
ULONG refcount = InterlockedIncrement(&signature->refcount);
|
|
|
|
|
|
|
|
TRACE("%p increasing refcount to %u.\n", signature, refcount);
|
|
|
|
|
|
|
|
return refcount;
|
|
|
|
}
|
|
|
|
|
|
|
|
static ULONG STDMETHODCALLTYPE d3d12_command_signature_Release(ID3D12CommandSignature *iface)
|
|
|
|
{
|
|
|
|
struct d3d12_command_signature *signature = impl_from_ID3D12CommandSignature(iface);
|
|
|
|
ULONG refcount = InterlockedDecrement(&signature->refcount);
|
|
|
|
|
|
|
|
TRACE("%p decreasing refcount to %u.\n", signature, refcount);
|
|
|
|
|
|
|
|
if (!refcount)
|
|
|
|
{
|
|
|
|
struct d3d12_device *device = signature->device;
|
|
|
|
|
2019-01-04 13:34:12 +00:00
|
|
|
vkd3d_private_store_destroy(&signature->private_store);
|
|
|
|
|
2017-08-24 19:33:49 +01:00
|
|
|
vkd3d_free((void *)signature->desc.pArgumentDescs);
|
2017-07-20 18:22:51 +01:00
|
|
|
vkd3d_free(signature);
|
|
|
|
|
2019-06-07 13:38:03 +01:00
|
|
|
d3d12_device_release(device);
|
2017-07-20 18:22:51 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return refcount;
|
|
|
|
}
|
|
|
|
|
|
|
|
static HRESULT STDMETHODCALLTYPE d3d12_command_signature_GetPrivateData(ID3D12CommandSignature *iface,
|
|
|
|
REFGUID guid, UINT *data_size, void *data)
|
|
|
|
{
|
2019-01-04 13:34:12 +00:00
|
|
|
struct d3d12_command_signature *signature = impl_from_ID3D12CommandSignature(iface);
|
2017-07-20 18:22:51 +01:00
|
|
|
|
2019-01-04 13:34:12 +00:00
|
|
|
TRACE("iface %p, guid %s, data_size %p, data %p.\n", iface, debugstr_guid(guid), data_size, data);
|
|
|
|
|
|
|
|
return vkd3d_get_private_data(&signature->private_store, guid, data_size, data);
|
2017-07-20 18:22:51 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static HRESULT STDMETHODCALLTYPE d3d12_command_signature_SetPrivateData(ID3D12CommandSignature *iface,
|
|
|
|
REFGUID guid, UINT data_size, const void *data)
|
|
|
|
{
|
2019-01-04 13:34:12 +00:00
|
|
|
struct d3d12_command_signature *signature = impl_from_ID3D12CommandSignature(iface);
|
2017-07-20 18:22:51 +01:00
|
|
|
|
2019-01-04 13:34:12 +00:00
|
|
|
TRACE("iface %p, guid %s, data_size %u, data %p.\n", iface, debugstr_guid(guid), data_size, data);
|
|
|
|
|
|
|
|
return vkd3d_set_private_data(&signature->private_store, guid, data_size, data);
|
2017-07-20 18:22:51 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static HRESULT STDMETHODCALLTYPE d3d12_command_signature_SetPrivateDataInterface(ID3D12CommandSignature *iface,
|
|
|
|
REFGUID guid, const IUnknown *data)
|
|
|
|
{
|
2019-01-04 13:34:12 +00:00
|
|
|
struct d3d12_command_signature *signature = impl_from_ID3D12CommandSignature(iface);
|
2017-07-20 18:22:51 +01:00
|
|
|
|
2019-01-04 13:34:12 +00:00
|
|
|
TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data);
|
|
|
|
|
|
|
|
return vkd3d_set_private_data_interface(&signature->private_store, guid, data);
|
2017-07-20 18:22:51 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static HRESULT STDMETHODCALLTYPE d3d12_command_signature_SetName(ID3D12CommandSignature *iface, const WCHAR *name)
|
|
|
|
{
|
2017-08-02 09:45:06 +01:00
|
|
|
struct d3d12_command_signature *signature = impl_from_ID3D12CommandSignature(iface);
|
|
|
|
|
2019-01-31 09:07:54 +00:00
|
|
|
TRACE("iface %p, name %s.\n", iface, debugstr_w(name, signature->device->wchar_size));
|
2017-07-20 18:22:51 +01:00
|
|
|
|
2019-01-31 09:07:54 +00:00
|
|
|
return name ? S_OK : E_INVALIDARG;
|
2017-07-20 18:22:51 +01:00
|
|
|
}
|
|
|
|
|
2019-06-07 13:38:03 +01:00
|
|
|
static HRESULT STDMETHODCALLTYPE d3d12_command_signature_GetDevice(ID3D12CommandSignature *iface, REFIID iid, void **device)
|
2017-07-20 18:22:51 +01:00
|
|
|
{
|
|
|
|
struct d3d12_command_signature *signature = impl_from_ID3D12CommandSignature(iface);
|
|
|
|
|
2017-07-23 14:02:41 +01:00
|
|
|
TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device);
|
2017-07-20 18:22:51 +01:00
|
|
|
|
2019-06-07 13:38:03 +01:00
|
|
|
return d3d12_device_query_interface(signature->device, iid, device);
|
2017-07-20 18:22:51 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static const struct ID3D12CommandSignatureVtbl d3d12_command_signature_vtbl =
|
|
|
|
{
|
|
|
|
/* IUnknown methods */
|
|
|
|
d3d12_command_signature_QueryInterface,
|
|
|
|
d3d12_command_signature_AddRef,
|
|
|
|
d3d12_command_signature_Release,
|
|
|
|
/* ID3D12Object methods */
|
|
|
|
d3d12_command_signature_GetPrivateData,
|
|
|
|
d3d12_command_signature_SetPrivateData,
|
|
|
|
d3d12_command_signature_SetPrivateDataInterface,
|
|
|
|
d3d12_command_signature_SetName,
|
|
|
|
/* ID3D12DeviceChild methods */
|
|
|
|
d3d12_command_signature_GetDevice,
|
|
|
|
};
|
|
|
|
|
2017-08-24 19:33:49 +01:00
|
|
|
struct d3d12_command_signature *unsafe_impl_from_ID3D12CommandSignature(ID3D12CommandSignature *iface)
|
|
|
|
{
|
|
|
|
if (!iface)
|
|
|
|
return NULL;
|
|
|
|
assert(iface->lpVtbl == &d3d12_command_signature_vtbl);
|
|
|
|
return CONTAINING_RECORD(iface, struct d3d12_command_signature, ID3D12CommandSignature_iface);
|
|
|
|
}
|
|
|
|
|
|
|
|
HRESULT d3d12_command_signature_create(struct d3d12_device *device, const D3D12_COMMAND_SIGNATURE_DESC *desc,
|
|
|
|
struct d3d12_command_signature **signature)
|
2017-07-20 18:22:51 +01:00
|
|
|
{
|
|
|
|
struct d3d12_command_signature *object;
|
2018-12-04 14:55:56 +00:00
|
|
|
unsigned int i;
|
2019-01-10 10:16:48 +00:00
|
|
|
HRESULT hr;
|
2018-12-04 14:55:56 +00:00
|
|
|
|
|
|
|
for (i = 0; i < desc->NumArgumentDescs; ++i)
|
|
|
|
{
|
|
|
|
const D3D12_INDIRECT_ARGUMENT_DESC *argument_desc = &desc->pArgumentDescs[i];
|
|
|
|
switch (argument_desc->Type)
|
|
|
|
{
|
|
|
|
case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW:
|
|
|
|
case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED:
|
|
|
|
case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH:
|
|
|
|
if (i != desc->NumArgumentDescs - 1)
|
|
|
|
{
|
|
|
|
WARN("Draw/dispatch must be the last element of a command signature.\n");
|
|
|
|
return E_INVALIDARG;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2017-07-20 18:22:51 +01:00
|
|
|
|
|
|
|
if (!(object = vkd3d_malloc(sizeof(*object))))
|
|
|
|
return E_OUTOFMEMORY;
|
|
|
|
|
|
|
|
object->ID3D12CommandSignature_iface.lpVtbl = &d3d12_command_signature_vtbl;
|
|
|
|
object->refcount = 1;
|
2017-08-24 19:33:49 +01:00
|
|
|
|
|
|
|
object->desc = *desc;
|
|
|
|
if (!(object->desc.pArgumentDescs = vkd3d_calloc(desc->NumArgumentDescs, sizeof(*desc->pArgumentDescs))))
|
|
|
|
{
|
|
|
|
vkd3d_free(object);
|
|
|
|
return E_OUTOFMEMORY;
|
|
|
|
}
|
|
|
|
memcpy((void *)object->desc.pArgumentDescs, desc->pArgumentDescs,
|
|
|
|
desc->NumArgumentDescs * sizeof(*desc->pArgumentDescs));
|
|
|
|
|
2019-01-10 10:16:48 +00:00
|
|
|
if (FAILED(hr = vkd3d_private_store_init(&object->private_store)))
|
|
|
|
{
|
|
|
|
vkd3d_free((void *)object->desc.pArgumentDescs);
|
|
|
|
vkd3d_free(object);
|
|
|
|
return hr;
|
|
|
|
}
|
2019-01-04 13:34:12 +00:00
|
|
|
|
2019-06-07 13:38:03 +01:00
|
|
|
d3d12_device_add_ref(object->device = device);
|
2017-07-20 18:22:51 +01:00
|
|
|
|
|
|
|
TRACE("Created command signature %p.\n", object);
|
|
|
|
|
|
|
|
*signature = object;
|
|
|
|
|
|
|
|
return S_OK;
|
|
|
|
}
|