/*
 * Copyright 2016 Józef Kucia for CodeWeavers
 * Copyright 2016 Henri Verbeet for CodeWeavers
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
 */

#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API

#include "vkd3d_private.h"
#include "vkd3d_swapchain_factory.h"
#include "vkd3d_descriptor_debug.h"
#ifdef VKD3D_ENABLE_RENDERDOC
#include "vkd3d_renderdoc.h"
#endif

static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value);
static void d3d12_command_queue_add_submission(struct d3d12_command_queue *queue,
        const struct d3d12_command_queue_submission *sub);
static void d3d12_fence_inc_ref(struct d3d12_fence *fence);
static void d3d12_fence_dec_ref(struct d3d12_fence *fence);

#define MAX_BATCHED_IMAGE_BARRIERS 16
struct d3d12_command_list_barrier_batch
{
    VkImageMemoryBarrier vk_image_barriers[MAX_BATCHED_IMAGE_BARRIERS];
    VkMemoryBarrier vk_memory_barrier;
    uint32_t image_barrier_count;
    VkPipelineStageFlags dst_stage_mask, src_stage_mask;
};

static void d3d12_command_list_barrier_batch_init(struct d3d12_command_list_barrier_batch *batch);
static void d3d12_command_list_barrier_batch_end(struct d3d12_command_list *list,
        struct d3d12_command_list_barrier_batch *batch);
static void d3d12_command_list_barrier_batch_add_layout_transition(
        struct d3d12_command_list *list,
        struct d3d12_command_list_barrier_batch *batch,
        const VkImageMemoryBarrier *image_barrier);
static void d3d12_command_list_barrier_batch_add_global_transition(
        struct d3d12_command_list *list,
        struct d3d12_command_list_barrier_batch *batch,
        VkAccessFlags srcAccessMask,
        VkAccessFlags dstAccessMask);

static uint32_t d3d12_command_list_promote_dsv_resource(struct d3d12_command_list *list,
        struct d3d12_resource *resource, uint32_t plane_optimal_mask);
static uint32_t d3d12_command_list_notify_decay_dsv_resource(struct d3d12_command_list *list,
        struct d3d12_resource *resource);
static uint32_t d3d12_command_list_notify_dsv_writes(struct d3d12_command_list *list,
        struct d3d12_resource *resource, const struct vkd3d_view *view,
        uint32_t plane_write_mask);
static void d3d12_command_list_notify_dsv_discard(struct d3d12_command_list *list,
        struct d3d12_resource *resource,
        uint32_t first_subresource, uint32_t subresource_count,
        uint32_t resource_subresource_count);
static VkImageLayout d3d12_command_list_get_depth_stencil_resource_layout(const struct d3d12_command_list *list,
        const struct d3d12_resource *resource, uint32_t *plane_optimal_mask);
static void d3d12_command_list_decay_optimal_dsv_resource(struct d3d12_command_list *list,
        const struct d3d12_resource *resource, uint32_t plane_optimal_mask,
        struct d3d12_command_list_barrier_batch *batch);
static void d3d12_command_list_end_transfer_batch(struct d3d12_command_list *list);
static inline void d3d12_command_list_ensure_transfer_batch(struct d3d12_command_list *list, enum vkd3d_batch_type type);

static HRESULT vkd3d_create_binary_semaphore(struct d3d12_device *device, VkSemaphore *vk_semaphore)
{
    const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
    VkSemaphoreCreateInfo info;
    VkResult vr;

    info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
    info.pNext = NULL;
    info.flags = 0;
    vr = VK_CALL(vkCreateSemaphore(device->vk_device, &info, NULL, vk_semaphore));
    return hresult_from_vk_result(vr);
}

static HRESULT vkd3d_create_timeline_semaphore(struct d3d12_device *device, uint64_t initial_value, VkSemaphore *vk_semaphore);

HRESULT vkd3d_queue_create(struct d3d12_device *device, uint32_t family_index, uint32_t queue_index,
        const VkQueueFamilyProperties *properties, struct vkd3d_queue **queue)
{
    const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
    VkCommandBufferAllocateInfo allocate_info;
    VkCommandPoolCreateInfo pool_create_info;
    VkCommandBufferBeginInfo begin_info;
    VkMemoryBarrier memory_barrier;
    struct vkd3d_queue *object;
    VkResult vr;
    HRESULT hr;
    int rc;

    if (!(object = vkd3d_malloc(sizeof(*object))))
        return E_OUTOFMEMORY;

    memset(object, 0, sizeof(*object));

    if ((rc = pthread_mutex_init(&object->mutex, NULL)))
    {
        ERR("Failed to initialize mutex, error %d.\n", rc);
        vkd3d_free(object);
        return hresult_from_errno(rc);
    }

    object->vk_family_index = family_index;
    object->vk_queue_flags = properties->queueFlags;
    object->timestamp_bits = properties->timestampValidBits;

    VK_CALL(vkGetDeviceQueue(device->vk_device, family_index, queue_index, &object->vk_queue));

    TRACE("Created queue %p for queue family index %u.\n", object, family_index);

    /* Create a reusable full barrier command buffer. This is used in submissions
     * to guarantee serialized behavior of Vulkan queues. */
    pool_create_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
    pool_create_info.pNext = NULL;
    pool_create_info.flags = 0;
    pool_create_info.queueFamilyIndex = family_index;
    if ((vr = VK_CALL(vkCreateCommandPool(device->vk_device, &pool_create_info, NULL, &object->barrier_pool))))
    {
        hr = hresult_from_vk_result(vr);
        goto fail_destroy_mutex;
    }

    allocate_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
    allocate_info.pNext = NULL;
    allocate_info.commandPool = object->barrier_pool;
    allocate_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
    allocate_info.commandBufferCount = 1;
    if ((vr = VK_CALL(vkAllocateCommandBuffers(device->vk_device, &allocate_info, &object->barrier_command_buffer))))
    {
        hr = hresult_from_vk_result(vr);
        goto fail_free_command_pool;
    }

    begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
    begin_info.pNext = NULL;
    /* It's not very meaningful to rebuild this command buffer over and over. */
    begin_info.flags = VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT;
    begin_info.pInheritanceInfo = NULL;
    VK_CALL(vkBeginCommandBuffer(object->barrier_command_buffer, &begin_info));

    /* To avoid unnecessary tracking, just emit a host barrier on every submit. */
    memory_barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
    memory_barrier.pNext = NULL;
    memory_barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
    memory_barrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_HOST_READ_BIT;
    VK_CALL(vkCmdPipelineBarrier(object->barrier_command_buffer,
            VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
            VK_PIPELINE_STAGE_ALL_COMMANDS_BIT | VK_PIPELINE_STAGE_HOST_BIT, 0,
            1, &memory_barrier, 0, NULL, 0, NULL));
    VK_CALL(vkEndCommandBuffer(object->barrier_command_buffer));

    if (FAILED(hr = vkd3d_create_binary_semaphore(device, &object->serializing_binary_semaphore)))
        goto fail_free_command_pool;
    if (FAILED(hr = vkd3d_create_timeline_semaphore(device, 0, &object->submission_timeline)))
        goto fail_free_binary_semaphore;

    *queue = object;
    return hr;

fail_free_binary_semaphore:
    VK_CALL(vkDestroySemaphore(device->vk_device, object->serializing_binary_semaphore, NULL));
fail_free_command_pool:
    VK_CALL(vkDestroyCommandPool(device->vk_device, object->barrier_pool, NULL));
fail_destroy_mutex:
    pthread_mutex_destroy(&object->mutex);
    return hr;
}

static void vkd3d_queue_flush_waiters(struct vkd3d_queue *vkd3d_queue,
        struct vkd3d_fence_worker *worker,
        const struct vkd3d_vk_device_procs *vk_procs);

void vkd3d_queue_destroy(struct vkd3d_queue *queue, struct d3d12_device *device)
{
    const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
    int rc;

    if ((rc = pthread_mutex_lock(&queue->mutex)))
        ERR("Failed to lock mutex, error %d.\n", rc);

    if (!rc)
        pthread_mutex_unlock(&queue->mutex);

    /* Also waits for queue idle when we don't pass in a worker. */
    vkd3d_queue_flush_waiters(queue, NULL, vk_procs);

    VK_CALL(vkDestroyCommandPool(device->vk_device, queue->barrier_pool, NULL));
    VK_CALL(vkDestroySemaphore(device->vk_device, queue->serializing_binary_semaphore, NULL));
    VK_CALL(vkDestroySemaphore(device->vk_device, queue->submission_timeline, NULL));

    pthread_mutex_destroy(&queue->mutex);
    vkd3d_free(queue->wait_semaphores);
    vkd3d_free(queue->wait_values);
    vkd3d_free(queue->wait_stages);
    vkd3d_free(queue->wait_fences);
    vkd3d_free(queue);
}

VkQueue vkd3d_queue_acquire(struct vkd3d_queue *queue)
{
    int rc;

    TRACE("queue %p.\n", queue);

    if ((rc = pthread_mutex_lock(&queue->mutex)))
    {
        ERR("Failed to lock mutex, error %d.\n", rc);
        return VK_NULL_HANDLE;
    }

    assert(queue->vk_queue);
    return queue->vk_queue;
}

void vkd3d_queue_release(struct vkd3d_queue *queue)
{
    TRACE("queue %p.\n", queue);

    pthread_mutex_unlock(&queue->mutex);
}

void vkd3d_queue_add_wait(struct vkd3d_queue *queue, struct d3d12_fence *waiter, VkSemaphore semaphore, uint64_t value)
{
    uint32_t i;

    pthread_mutex_lock(&queue->mutex);

    for (i = 0; i < queue->wait_count; i++)
    {
        if (queue->wait_semaphores[i] == semaphore)
        {
            if (queue->wait_values[i] < value)
                queue->wait_values[i] = value;
            pthread_mutex_unlock(&queue->mutex);
            return;
        }
    }

    if (!vkd3d_array_reserve((void**)&queue->wait_semaphores, &queue->wait_semaphores_size,
            queue->wait_count + 1, sizeof(*queue->wait_semaphores)) ||
        !vkd3d_array_reserve((void**)&queue->wait_fences, &queue->wait_fences_size,
            queue->wait_count + 1, sizeof(*queue->wait_fences)) ||
        !vkd3d_array_reserve((void**)&queue->wait_values, &queue->wait_values_size,
            queue->wait_count + 1, sizeof(*queue->wait_values)) ||
        !vkd3d_array_reserve((void**)&queue->wait_stages, &queue->wait_stages_size,
            queue->wait_count + 1, sizeof(*queue->wait_stages)))
    {
        ERR("Failed to add semaphore wait to queue.\n");
        pthread_mutex_unlock(&queue->mutex);
        return;
    }

    queue->wait_semaphores[queue->wait_count] = semaphore;
    queue->wait_values[queue->wait_count] = value;
    queue->wait_stages[queue->wait_count] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
    queue->wait_fences[queue->wait_count] = waiter;
    queue->wait_count += 1;
    pthread_mutex_unlock(&queue->mutex);

    if (waiter)
        d3d12_fence_inc_ref(waiter);
}

static void vkd3d_queue_reset_wait_count_locked(struct vkd3d_queue *vkd3d_queue)
{
    size_t i;
    for (i = 0; i < vkd3d_queue->wait_count; i++)
        if (vkd3d_queue->wait_fences[i])
            d3d12_fence_dec_ref(vkd3d_queue->wait_fences[i]);
    vkd3d_queue->wait_count = 0;
}

static HRESULT vkd3d_enqueue_timeline_semaphore(struct vkd3d_fence_worker *worker,
        struct d3d12_fence *fence, VkSemaphore timeline, uint64_t value, bool signal,
        LONG **submission_counters, size_t num_submission_counts);

static void vkd3d_queue_push_waiters_to_worker_locked(struct vkd3d_queue *vkd3d_queue,
        struct vkd3d_fence_worker *worker,
        VkSemaphore timeline, uint64_t value)
{
    HRESULT hr;
    size_t i;

    for (i = 0; i < vkd3d_queue->wait_count; i++)
    {
        if (vkd3d_queue->wait_fences[i])
        {
            if (FAILED(hr = vkd3d_enqueue_timeline_semaphore(worker, vkd3d_queue->wait_fences[i],
                    timeline, value, false,
                    NULL, 0)))
            {
                ERR("Failed to enqueue timeline semaphore.\n");
            }
        }
    }
}

static void vkd3d_queue_flush_waiters(struct vkd3d_queue *vkd3d_queue,
        struct vkd3d_fence_worker *worker,
        const struct vkd3d_vk_device_procs *vk_procs)
{
    VkTimelineSemaphoreSubmitInfoKHR timeline_submit_info;
    VkSubmitInfo submit_desc;
    VkQueue vk_queue;
    VkResult vr;

    if (!(vk_queue = vkd3d_queue_acquire(vkd3d_queue)))
    {
        ERR("Failed to acquire queue %p.\n", vkd3d_queue);
        return;
    }

    memset(&timeline_submit_info, 0, sizeof(timeline_submit_info));
    memset(&submit_desc, 0, sizeof(submit_desc));

    if (vkd3d_queue->wait_count == 0)
    {
        if (!worker)
        {
            /* This only happens on teardown. */
            vr = VK_CALL(vkQueueWaitIdle(vk_queue));
            if (vr < 0)
                WARN("Failed to wait for queue, vr %d.\n", vr);
        }

        vkd3d_queue_release(vkd3d_queue);
        return;
    }

    submit_desc.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
    timeline_submit_info.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR;
    submit_desc.pNext = &timeline_submit_info;

    submit_desc.waitSemaphoreCount = vkd3d_queue->wait_count;
    submit_desc.pWaitSemaphores = vkd3d_queue->wait_semaphores;
    submit_desc.pWaitDstStageMask = vkd3d_queue->wait_stages;
    timeline_submit_info.waitSemaphoreValueCount = vkd3d_queue->wait_count;
    timeline_submit_info.pWaitSemaphoreValues = vkd3d_queue->wait_values;

    vkd3d_queue->submission_timeline_count++;
    submit_desc.signalSemaphoreCount = 1;
    timeline_submit_info.signalSemaphoreValueCount = 1;
    submit_desc.pSignalSemaphores = &vkd3d_queue->submission_timeline;
    timeline_submit_info.pSignalSemaphoreValues = &vkd3d_queue->submission_timeline_count;

    if ((vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_desc, VK_NULL_HANDLE))) < 0)
        ERR("Failed to submit queue(s), vr %d.\n", vr);

    if (vr == VK_SUCCESS)
    {
        if (worker)
        {
            vkd3d_queue_push_waiters_to_worker_locked(vkd3d_queue, worker,
                    vkd3d_queue->submission_timeline,
                    vkd3d_queue->submission_timeline_count);
        }
        else
        {
            /* This only happens on teardown. */
            vr = VK_CALL(vkQueueWaitIdle(vk_queue));
            if (vr < 0)
                WARN("Failed to wait for queue, vr %d.\n", vr);
        }
    }

    vkd3d_queue_reset_wait_count_locked(vkd3d_queue);
    vkd3d_queue_release(vkd3d_queue);
}

static HRESULT vkd3d_create_timeline_semaphore(struct d3d12_device *device, uint64_t initial_value, VkSemaphore *vk_semaphore)
{
    const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
    VkSemaphoreTypeCreateInfoKHR type_info;
    VkSemaphoreCreateInfo info;
    VkResult vr;

    type_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO_KHR;
    type_info.pNext = NULL;
    type_info.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE_KHR;
    type_info.initialValue = initial_value;

    info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
    info.pNext = &type_info;
    info.flags = 0;

    if ((vr = VK_CALL(vkCreateSemaphore(device->vk_device, &info, NULL, vk_semaphore))) < 0)
        ERR("Failed to create timeline semaphore, vr %d.\n", vr);

    return hresult_from_vk_result(vr);
}

static HRESULT vkd3d_enqueue_timeline_semaphore(struct vkd3d_fence_worker *worker,
        struct d3d12_fence *fence, VkSemaphore timeline, uint64_t value, bool signal,
        LONG **submission_counters, size_t num_submission_counts)
{
    struct vkd3d_waiting_fence *waiting_fence;
    size_t i;
    int rc;

    TRACE("worker %p, fence %p, value %#"PRIx64".\n", worker, fence, value);

    if ((rc = pthread_mutex_lock(&worker->mutex)))
    {
        ERR("Failed to lock mutex, error %d.\n", rc);
        for (i = 0; i < num_submission_counts; i++)
            InterlockedDecrement(submission_counters[i]);
        vkd3d_free(submission_counters);
        return hresult_from_errno(rc);
    }

    if (!vkd3d_array_reserve((void **)&worker->enqueued_fences, &worker->enqueued_fences_size,
                             worker->enqueued_fence_count + 1, sizeof(*worker->enqueued_fences)))
    {
        ERR("Failed to add GPU timeline semaphore.\n");
        pthread_mutex_unlock(&worker->mutex);
        for (i = 0; i < num_submission_counts; i++)
            InterlockedDecrement(submission_counters[i]);
        vkd3d_free(submission_counters);
        return E_OUTOFMEMORY;
    }

    if (fence)
        d3d12_fence_inc_ref(fence);

    waiting_fence = &worker->enqueued_fences[worker->enqueued_fence_count];
    waiting_fence->fence = fence;
    waiting_fence->submission_timeline = timeline;
    waiting_fence->value = value;
    waiting_fence->signal = signal;
    waiting_fence->submission_counters = submission_counters;
    waiting_fence->num_submission_counts = num_submission_counts;
    ++worker->enqueued_fence_count;

    pthread_cond_signal(&worker->cond);
    pthread_mutex_unlock(&worker->mutex);
    return S_OK;
}

static void vkd3d_waiting_fence_release_submissions(const struct vkd3d_waiting_fence *fence)
{
    size_t i;
    for (i = 0; i < fence->num_submission_counts; i++)
        InterlockedDecrement(fence->submission_counters[i]);
    vkd3d_free(fence->submission_counters);
}

static void vkd3d_wait_for_gpu_timeline_semaphore(struct vkd3d_fence_worker *worker, const struct vkd3d_waiting_fence *fence)
{
    struct d3d12_device *device = worker->device;
    const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
    VkSemaphoreWaitInfoKHR wait_info;
    uint64_t timeout = UINT64_MAX;
    HRESULT hr;
    int vr;

    wait_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO_KHR;
    wait_info.pNext = NULL;
    wait_info.flags = 0;
    wait_info.semaphoreCount = 1;
    wait_info.pSemaphores = &fence->submission_timeline;
    wait_info.pValues = &fence->value;

    /* Some drivers hang indefinitely in face of device lost.
     * If a wait here takes more than 5 seconds, this is pretty much
     * a guaranteed timeout (TDR) scenario.
     * Usually, we'd observe DEVICE_LOST in subsequent submissions,
     * but if application submits something and expects to wait on that submission
     * immediately, this can happen. */
    if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS)
        timeout = 5000000000ull;

    if ((vr = VK_CALL(vkWaitSemaphoresKHR(device->vk_device, &wait_info, timeout))))
    {
        ERR("Failed to wait for Vulkan timeline semaphore, vr %d.\n", vr);
        VKD3D_DEVICE_REPORT_BREADCRUMB_IF(device, vr == VK_ERROR_DEVICE_LOST || vr == VK_TIMEOUT);
        vkd3d_waiting_fence_release_submissions(fence);
        return;
    }

    /* This is a good time to kick the debug threads into action. */
    vkd3d_shader_debug_ring_kick(&device->debug_ring, device, false);
    vkd3d_descriptor_debug_kick_qa_check(device->descriptor_qa_global_info);

    if (fence->fence && fence->signal)
    {
        TRACE("Signaling fence %p value %#"PRIx64".\n", fence->fence, fence->value);
        if (FAILED(hr = d3d12_fence_signal(fence->fence, fence->value)))
            ERR("Failed to signal D3D12 fence, hr %#x.\n", hr);
    }

    if (fence->fence)
        d3d12_fence_dec_ref(fence->fence);

    /* Submission release should only be paired with an execute command.
     * Such execute commands can be paired with a d3d12_fence_dec_ref(),
     * but no signalling operation. */
    assert(!fence->num_submission_counts || !fence->signal);
    vkd3d_waiting_fence_release_submissions(fence);
}

static void *vkd3d_fence_worker_main(void *arg)
{
    struct vkd3d_waiting_fence *cur_fences, *old_fences;
    struct vkd3d_fence_worker *worker = arg;
    size_t cur_fences_size, old_fences_size;
    uint32_t cur_fence_count;
    uint32_t i;
    bool do_exit;
    int rc;

    vkd3d_set_thread_name("vkd3d_fence");

    cur_fence_count = 0;
    cur_fences_size = 0;
    cur_fences = NULL;

    for (;;)
    {
        if ((rc = pthread_mutex_lock(&worker->mutex)))
        {
            ERR("Failed to lock mutex, error %d.\n", rc);
            break;
        }

        if (!worker->enqueued_fence_count && !worker->should_exit)
        {
            if ((rc = pthread_cond_wait(&worker->cond, &worker->mutex)))
            {
                ERR("Failed to wait on condition variable, error %d.\n", rc);
                pthread_mutex_unlock(&worker->mutex);
                break;
            }
        }

        old_fences_size = cur_fences_size;
        old_fences = cur_fences;

        cur_fence_count = worker->enqueued_fence_count;
        cur_fences_size = worker->enqueued_fences_size;
        cur_fences = worker->enqueued_fences;
        do_exit = worker->should_exit;

        worker->enqueued_fence_count = 0;
        worker->enqueued_fences_size = old_fences_size;
        worker->enqueued_fences = old_fences;

        pthread_mutex_unlock(&worker->mutex);

        for (i = 0; i < cur_fence_count; i++)
            vkd3d_wait_for_gpu_timeline_semaphore(worker, &cur_fences[i]);

        if (do_exit)
            break;
    }

    vkd3d_free(cur_fences);
    return NULL;
}

HRESULT vkd3d_fence_worker_start(struct vkd3d_fence_worker *worker,
        struct d3d12_device *device)
{
    HRESULT hr;
    int rc;

    TRACE("worker %p.\n", worker);

    worker->should_exit = false;
    worker->device = device;

    worker->enqueued_fence_count = 0;
    worker->enqueued_fences = NULL;
    worker->enqueued_fences_size = 0;

    if ((rc = pthread_mutex_init(&worker->mutex, NULL)))
    {
        ERR("Failed to initialize mutex, error %d.\n", rc);
        return hresult_from_errno(rc);
    }

    if ((rc = pthread_cond_init(&worker->cond, NULL)))
    {
        ERR("Failed to initialize condition variable, error %d.\n", rc);
        pthread_mutex_destroy(&worker->mutex);
        return hresult_from_errno(rc);
    }

    if (FAILED(hr = vkd3d_create_thread(device->vkd3d_instance,
            vkd3d_fence_worker_main, worker, &worker->thread)))
    {
        pthread_mutex_destroy(&worker->mutex);
        pthread_cond_destroy(&worker->cond);
    }

    return hr;
}

HRESULT vkd3d_fence_worker_stop(struct vkd3d_fence_worker *worker,
        struct d3d12_device *device)
{
    HRESULT hr;
    int rc;

    TRACE("worker %p.\n", worker);

    if ((rc = pthread_mutex_lock(&worker->mutex)))
    {
        ERR("Failed to lock mutex, error %d.\n", rc);
        return hresult_from_errno(rc);
    }

    worker->should_exit = true;
    pthread_cond_signal(&worker->cond);

    pthread_mutex_unlock(&worker->mutex);

    if (FAILED(hr = vkd3d_join_thread(device->vkd3d_instance, &worker->thread)))
        return hr;

    pthread_mutex_destroy(&worker->mutex);
    pthread_cond_destroy(&worker->cond);

    vkd3d_free(worker->enqueued_fences);
    return S_OK;
}

static const struct vkd3d_shader_root_parameter *root_signature_get_parameter(
        const struct d3d12_root_signature *root_signature, unsigned int index)
{
    assert(index < root_signature->parameter_count);
    return &root_signature->parameters[index];
}

static const struct vkd3d_shader_descriptor_table *root_signature_get_descriptor_table(
        const struct d3d12_root_signature *root_signature, unsigned int index)
{
    const struct vkd3d_shader_root_parameter *p = root_signature_get_parameter(root_signature, index);
    assert(p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE);
    return &p->descriptor_table;
}

static const struct vkd3d_shader_root_constant *root_signature_get_32bit_constants(
        const struct d3d12_root_signature *root_signature, unsigned int index)
{
    const struct vkd3d_shader_root_parameter *p = root_signature_get_parameter(root_signature, index);
    assert(p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS);
    return &p->constant;
}

static const struct vkd3d_shader_root_parameter *root_signature_get_root_descriptor(
        const struct d3d12_root_signature *root_signature, unsigned int index)
{
    const struct vkd3d_shader_root_parameter *p = root_signature_get_parameter(root_signature, index);
    assert(p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_CBV
        || p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_SRV
        || p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_UAV);
    return p;
}

/* ID3D12Fence */
static void d3d12_fence_destroy_vk_objects(struct d3d12_fence *fence)
{
    const struct vkd3d_vk_device_procs *vk_procs;
    struct d3d12_device *device = fence->device;
    int rc;

    if ((rc = pthread_mutex_lock(&fence->mutex)))
    {
        ERR("Failed to lock mutex, error %d.\n", rc);
        return;
    }

    vk_procs = &device->vk_procs;
    VK_CALL(vkDestroySemaphore(device->vk_device, fence->timeline_semaphore, NULL));
    pthread_mutex_unlock(&fence->mutex);
}

static void d3d12_fence_inc_ref(struct d3d12_fence *fence)
{
    InterlockedIncrement(&fence->refcount_internal);
}

static void d3d12_fence_dec_ref(struct d3d12_fence *fence)
{
    ULONG refcount_internal = InterlockedDecrement(&fence->refcount_internal);

    if (!refcount_internal)
    {
        vkd3d_private_store_destroy(&fence->private_store);
        d3d12_fence_destroy_vk_objects(fence);

        vkd3d_free(fence->events);
        vkd3d_free(fence->pending_updates);
        pthread_mutex_destroy(&fence->mutex);
        pthread_cond_destroy(&fence->cond);
        pthread_cond_destroy(&fence->null_event_cond);
        vkd3d_free(fence);
    }
}

HRESULT d3d12_fence_signal_event(struct d3d12_fence *fence, HANDLE event, enum vkd3d_waiting_event_type type)
{
    switch (type)
    {
        case VKD3D_WAITING_EVENT_TYPE_EVENT:
            return fence->device->signal_event(event);

        case VKD3D_WAITING_EVENT_TYPE_SEMAPHORE:
#ifdef _WIN32
            /* Failing to release semaphore is expected if the counter exceeds the maximum limit.
             * If the application does not wait for the semaphore once per present, this
             * will eventually happen. */
            if (!ReleaseSemaphore(event, 1, NULL))
                WARN("Failed to release semaphore. Application likely forgot to wait for presentation event.\n");
            return S_OK;
#else
            ERR("Semaphores not supported on this platform.\n");
            return E_NOTIMPL;
#endif
    }

    ERR("Unhandled waiting event type %u.\n", type);
    return E_INVALIDARG;
}

static void d3d12_fence_signal_external_events_locked(struct d3d12_fence *fence)
{
    bool signal_null_event_cond = false;
    unsigned int i, j;
    HRESULT hr;

    for (i = 0, j = 0; i < fence->event_count; ++i)
    {
        struct vkd3d_waiting_event *current = &fence->events[i];

        if (current->value <= fence->virtual_value)
        {
            if (current->event)
            {
                if (FAILED(hr = d3d12_fence_signal_event(fence, current->event, current->type)))
                    ERR("Failed to signal event, hr #%x.\n", hr);
            }
            else
            {
                *current->latch = true;
                signal_null_event_cond = true;
            }
        }
        else
        {
            if (i != j)
                fence->events[j] = *current;
            ++j;
        }
    }

    fence->event_count = j;

    if (signal_null_event_cond)
        pthread_cond_broadcast(&fence->null_event_cond);
}

static void d3d12_fence_block_until_pending_value_reaches_locked(struct d3d12_fence *fence, UINT64 pending_value)
{
    while (pending_value > fence->max_pending_virtual_timeline_value)
    {
        TRACE("Blocking wait on fence %p until it reaches 0x%"PRIx64".\n", fence, pending_value);
        pthread_cond_wait(&fence->cond, &fence->mutex);
    }
}

static void d3d12_fence_update_pending_value_locked(struct d3d12_fence *fence)
{
    uint64_t new_max_pending_virtual_timeline_value = 0;
    size_t i;

    for (i = 0; i < fence->pending_updates_count; i++)
        new_max_pending_virtual_timeline_value = max(fence->pending_updates[i].virtual_value, new_max_pending_virtual_timeline_value);
    new_max_pending_virtual_timeline_value = max(fence->virtual_value, new_max_pending_virtual_timeline_value);

    /* If we're signalling the fence, wake up any submission threads which can now safely kick work. */
    fence->max_pending_virtual_timeline_value = new_max_pending_virtual_timeline_value;
    pthread_cond_broadcast(&fence->cond);
}

static void d3d12_fence_lock(struct d3d12_fence *fence)
{
    pthread_mutex_lock(&fence->mutex);
}

static void d3d12_fence_unlock(struct d3d12_fence *fence)
{
    pthread_mutex_unlock(&fence->mutex);
}

static bool d3d12_fence_can_elide_wait_semaphore_locked(struct d3d12_fence *fence, uint64_t value,
        const struct vkd3d_queue *waiting_queue)
{
    unsigned int i;

    /* Relevant if the semaphore has been signalled already on host.
     * We should not wait on the timeline semaphore directly, we can simply submit in-place. */
    if (fence->virtual_value >= value)
        return true;

    /* We can elide a wait if we can use the submission order guarantee.
     * If there is a pending signal on this queue which will satisfy the wait,
     * submission barrier will implicitly complete the wait,
     * and we don't have to eat the overhead of submitting an extra wait on top.
     * This will essentially always trigger on single-queue.
     */
    for (i = 0; i < fence->pending_updates_count; i++)
    {
        if (fence->pending_updates[i].signalling_queue == waiting_queue &&
                fence->pending_updates[i].virtual_value >= value)
            return true;
    }

    return false;
}

static HRESULT d3d12_fence_signal_cpu_timeline_semaphore(struct d3d12_fence *fence, uint64_t value)
{
    int rc;

    if ((rc = pthread_mutex_lock(&fence->mutex)))
    {
        ERR("Failed to lock mutex, error %d.\n", rc);
        return hresult_from_errno(rc);
    }

    fence->virtual_value = value;
    d3d12_fence_signal_external_events_locked(fence);
    d3d12_fence_update_pending_value_locked(fence);
    pthread_mutex_unlock(&fence->mutex);
    return S_OK;
}

static uint64_t d3d12_fence_add_pending_signal_locked(struct d3d12_fence *fence, uint64_t virtual_value,
        const struct vkd3d_queue *signalling_queue)
{
    struct d3d12_fence_value *update;
    vkd3d_array_reserve((void**)&fence->pending_updates, &fence->pending_updates_size,
                        fence->pending_updates_count + 1, sizeof(*fence->pending_updates));

    update = &fence->pending_updates[fence->pending_updates_count++];
    update->virtual_value = virtual_value;
    update->physical_value = ++fence->counter;
    update->signalling_queue = signalling_queue;
    return fence->counter;
}

static uint64_t d3d12_fence_get_physical_wait_value_locked(struct d3d12_fence *fence, uint64_t virtual_value)
{
    uint64_t target_physical_value = UINT64_MAX;
    size_t i;

    /* This shouldn't happen, we will have elided the wait completely in can_elide_wait_semaphore_locked. */
    assert(virtual_value > fence->virtual_value);

    /* Find the smallest physical value which is at least the virtual value. */
    for (i = 0; i < fence->pending_updates_count; i++)
        if (virtual_value <= fence->pending_updates[i].virtual_value)
            target_physical_value = min(target_physical_value, fence->pending_updates[i].physical_value);

    if (target_physical_value == UINT64_MAX)
    {
        FIXME("Cannot find a pending physical wait value. Emitting a noop wait.\n");
        return 0;
    }
    else
        return target_physical_value;
}

static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t physical_value)
{
    bool did_signal;
    size_t i;
    int rc;

    if ((rc = pthread_mutex_lock(&fence->mutex)))
    {
        ERR("Failed to lock mutex, error %d.\n", rc);
        return hresult_from_errno(rc);
    }

    /* With multiple fence workers, it is possible that signal calls are
     * out of order. The physical value itself is monotonic, but we need to
     * make sure that all signals happen in correct order if there are fence rewinds.
     * We don't expect the loop to run more than once,
     * but there might be extreme edge cases where we signal 2 or more. */
    while (fence->physical_value < physical_value)
    {
        fence->physical_value++;
        did_signal = false;

        for (i = 0; i < fence->pending_updates_count; i++)
        {
            if (fence->physical_value == fence->pending_updates[i].physical_value)
            {
                fence->virtual_value = fence->pending_updates[i].virtual_value;
                d3d12_fence_signal_external_events_locked(fence);
                fence->pending_updates[i] = fence->pending_updates[--fence->pending_updates_count];
                did_signal = true;
                break;
            }
        }

        if (!did_signal)
            FIXME("Did not signal a virtual value?\n");
    }

    /* In case we have a rewind signalled from GPU, we need to recompute the max pending timeline value. */
    d3d12_fence_update_pending_value_locked(fence);

    pthread_mutex_unlock(&fence->mutex);
    return S_OK;
}

static HRESULT STDMETHODCALLTYPE d3d12_fence_QueryInterface(d3d12_fence_iface *iface,
        REFIID riid, void **object)
{
    TRACE("iface %p, riid %s, object %p.\n", iface, debugstr_guid(riid), object);

    if (IsEqualGUID(riid, &IID_ID3D12Fence)
            || IsEqualGUID(riid, &IID_ID3D12Fence1)
            || IsEqualGUID(riid, &IID_ID3D12Pageable)
            || IsEqualGUID(riid, &IID_ID3D12DeviceChild)
            || IsEqualGUID(riid, &IID_ID3D12Object)
            || IsEqualGUID(riid, &IID_IUnknown))
    {
        ID3D12Fence_AddRef(iface);
        *object = iface;
        return S_OK;
    }

    WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(riid));

    *object = NULL;
    return E_NOINTERFACE;
}

static ULONG STDMETHODCALLTYPE d3d12_fence_AddRef(d3d12_fence_iface *iface)
{
    struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface);
    ULONG refcount = InterlockedIncrement(&fence->refcount);

    TRACE("%p increasing refcount to %u.\n", fence, refcount);

    return refcount;
}

static ULONG STDMETHODCALLTYPE d3d12_fence_Release(d3d12_fence_iface *iface)
{
    struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface);
    ULONG refcount = InterlockedDecrement(&fence->refcount);

    TRACE("%p decreasing refcount to %u.\n", fence, refcount);

    if (!refcount)
    {
        struct d3d12_device *device = fence->device;

        /* When a fence's public ref-count hits zero, all waiters must be released.
         * NOTE: For shared fences later,
         * we cannot signal here since we cannot know if there are other fences.
         * According to our tests, the fence unblocks all waiters when the last reference
         * to the shared HANDLE is released. This is completely outside the scope of what we can
         * reasonably implement ourselves. For now, the plan is to wait with timeout
         * and mark "TDR" if that ever happens in real world usage. */
        if (!(fence->d3d12_flags & D3D12_FENCE_FLAG_SHARED))
            d3d12_fence_signal_cpu_timeline_semaphore(fence, UINT64_MAX);

        d3d12_fence_dec_ref(fence);
        d3d12_device_release(device);
    }

    return refcount;
}

static HRESULT STDMETHODCALLTYPE d3d12_fence_GetPrivateData(d3d12_fence_iface *iface,
        REFGUID guid, UINT *data_size, void *data)
{
    struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface);

    TRACE("iface %p, guid %s, data_size %p, data %p.\n",
            iface, debugstr_guid(guid), data_size, data);

    return vkd3d_get_private_data(&fence->private_store, guid, data_size, data);
}

static HRESULT STDMETHODCALLTYPE d3d12_fence_SetPrivateData(d3d12_fence_iface *iface,
        REFGUID guid, UINT data_size, const void *data)
{
    struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface);

    TRACE("iface %p, guid %s, data_size %u, data %p.\n",
            iface, debugstr_guid(guid), data_size, data);

    return vkd3d_set_private_data(&fence->private_store, guid, data_size, data,
            NULL, NULL);
}

static HRESULT STDMETHODCALLTYPE d3d12_fence_SetPrivateDataInterface(d3d12_fence_iface *iface,
        REFGUID guid, const IUnknown *data)
{
    struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface);

    TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data);

    return vkd3d_set_private_data_interface(&fence->private_store, guid, data,
            NULL, NULL);
}

static HRESULT STDMETHODCALLTYPE d3d12_fence_GetDevice(d3d12_fence_iface *iface, REFIID iid, void **device)
{
    struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface);

    TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device);

    return d3d12_device_query_interface(fence->device, iid, device);
}

static UINT64 STDMETHODCALLTYPE d3d12_fence_GetCompletedValue(d3d12_fence_iface *iface)
{
    struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface);
    uint64_t completed_value;
    int rc;

    TRACE("iface %p.\n", iface);

    if ((rc = pthread_mutex_lock(&fence->mutex)))
    {
        ERR("Failed to lock mutex, error %d.\n", rc);
        return 0;
    }
    completed_value = fence->virtual_value;
    pthread_mutex_unlock(&fence->mutex);
    return completed_value;
}

HRESULT d3d12_fence_set_event_on_completion(struct d3d12_fence *fence,
        UINT64 value, HANDLE event, enum vkd3d_waiting_event_type type)
{
    unsigned int i;
    HRESULT hr;
    bool latch;
    int rc;

    if ((rc = pthread_mutex_lock(&fence->mutex)))
    {
        ERR("Failed to lock mutex, error %d.\n", rc);
        return hresult_from_errno(rc);
    }

    if (value <= fence->virtual_value)
    {
        if (event)
        {
            if (FAILED(hr = d3d12_fence_signal_event(fence, event, type)))
            {
                ERR("Failed to signal event, hr #%x.\n", hr);
                pthread_mutex_unlock(&fence->mutex);
                return hr;
            }
        }

        pthread_mutex_unlock(&fence->mutex);
        return S_OK;
    }

    for (i = 0; i < fence->event_count; ++i)
    {
        struct vkd3d_waiting_event *current = &fence->events[i];
        if (current->value == value && event && current->event == event)
        {
            WARN("Event completion for (%p, %#"PRIx64") is already in the list.\n",
                    event, value);
            pthread_mutex_unlock(&fence->mutex);
            return S_OK;
        }
    }

    if (!vkd3d_array_reserve((void **)&fence->events, &fence->events_size,
            fence->event_count + 1, sizeof(*fence->events)))
    {
        WARN("Failed to add event.\n");
        pthread_mutex_unlock(&fence->mutex);
        return E_OUTOFMEMORY;
    }

    fence->events[fence->event_count].value = value;
    fence->events[fence->event_count].event = event;
    fence->events[fence->event_count].type  = type;
    fence->events[fence->event_count].latch = &latch;
    ++fence->event_count;

    /* If event is NULL, we need to block until the fence value completes.
     * Implement this in a uniform way where we pretend we have a dummy event.
     * A NULL fence->events[].event means that we should set latch to true
     * and signal a condition variable instead of calling external signal_event callback. */
    if (!event)
    {
        latch = false;
        while (!latch)
            pthread_cond_wait(&fence->null_event_cond, &fence->mutex);
    }

    pthread_mutex_unlock(&fence->mutex);
    return S_OK;
}

static HRESULT STDMETHODCALLTYPE d3d12_fence_SetEventOnCompletion(d3d12_fence_iface *iface,
        UINT64 value, HANDLE event)
{
    struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface);

    TRACE("iface %p, value %#"PRIx64", event %p.\n", iface, value, event);

    return d3d12_fence_set_event_on_completion(fence, value, event, VKD3D_WAITING_EVENT_TYPE_EVENT);
}

static HRESULT STDMETHODCALLTYPE d3d12_fence_Signal(d3d12_fence_iface *iface, UINT64 value)
{
    struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface);

    TRACE("iface %p, value %#"PRIx64".\n", iface, value);

    return d3d12_fence_signal_cpu_timeline_semaphore(fence, value);
}

static D3D12_FENCE_FLAGS STDMETHODCALLTYPE d3d12_fence_GetCreationFlags(d3d12_fence_iface *iface)
{
    struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface);

    TRACE("iface %p.\n", iface);

    return fence->d3d12_flags;
}

CONST_VTBL struct ID3D12Fence1Vtbl d3d12_fence_vtbl =
{
    /* IUnknown methods */
    d3d12_fence_QueryInterface,
    d3d12_fence_AddRef,
    d3d12_fence_Release,
    /* ID3D12Object methods */
    d3d12_fence_GetPrivateData,
    d3d12_fence_SetPrivateData,
    d3d12_fence_SetPrivateDataInterface,
    (void *)d3d12_object_SetName,
    /* ID3D12DeviceChild methods */
    d3d12_fence_GetDevice,
    /* ID3D12Fence methods */
    d3d12_fence_GetCompletedValue,
    d3d12_fence_SetEventOnCompletion,
    d3d12_fence_Signal,
    /* ID3D12Fence1 methods */
    d3d12_fence_GetCreationFlags,
};

static HRESULT d3d12_fence_init_timeline(struct d3d12_fence *fence, struct d3d12_device *device,
        UINT64 initial_value)
{
    fence->virtual_value = initial_value;
    fence->max_pending_virtual_timeline_value = initial_value;
    fence->physical_value = 0;
    fence->counter = 0;
    return vkd3d_create_timeline_semaphore(device, 0, &fence->timeline_semaphore);
}

static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device *device,
        UINT64 initial_value, D3D12_FENCE_FLAGS flags)
{
    HRESULT hr;
    int rc;

    fence->ID3D12Fence_iface.lpVtbl = &d3d12_fence_vtbl;
    fence->refcount_internal = 1;
    fence->refcount = 1;
    fence->d3d12_flags = flags;

    if (FAILED(hr = d3d12_fence_init_timeline(fence, device, initial_value)))
        return hr;

    if ((rc = pthread_mutex_init(&fence->mutex, NULL)))
    {
        ERR("Failed to initialize mutex, error %d.\n", rc);
        return hresult_from_errno(rc);
    }

    if ((rc = pthread_cond_init(&fence->cond, NULL)))
    {
        ERR("Failed to initialize cond variable, error %d.\n", rc);
        pthread_mutex_destroy(&fence->mutex);
        return hresult_from_errno(rc);
    }

    if ((rc = pthread_cond_init(&fence->null_event_cond, NULL)))
    {
        ERR("Failed to initialize cond variable, error %d.\n", rc);
        pthread_mutex_destroy(&fence->mutex);
        pthread_cond_destroy(&fence->cond);
        return hresult_from_errno(rc);
    }

    if (flags)
        FIXME("Ignoring flags %#x.\n", flags);

    fence->events = NULL;
    fence->events_size = 0;
    fence->event_count = 0;

    fence->pending_updates = NULL;
    fence->pending_updates_count = 0;
    fence->pending_updates_size = 0;

    if (FAILED(hr = vkd3d_private_store_init(&fence->private_store)))
    {
        pthread_mutex_destroy(&fence->mutex);
        pthread_cond_destroy(&fence->cond);
        pthread_cond_destroy(&fence->null_event_cond);
        return hr;
    }

    d3d12_device_add_ref(fence->device = device);

    return S_OK;
}

HRESULT d3d12_fence_create(struct d3d12_device *device,
        uint64_t initial_value, D3D12_FENCE_FLAGS flags, struct d3d12_fence **fence)
{
    struct d3d12_fence *object;
    HRESULT hr;

    if (!(object = vkd3d_malloc(sizeof(*object))))
        return E_OUTOFMEMORY;

    if (SUCCEEDED(hr = d3d12_fence_init(object, device, initial_value, flags)))
        TRACE("Created fence %p.\n", object);
    else
        ERR("Failed to create fence.\n");

    *fence = object;
    return hr;
}

/* Command buffers */
static void d3d12_command_list_mark_as_invalid(struct d3d12_command_list *list,
        const char *message, ...)
{
    va_list args;

    va_start(args, message);
    WARN("Command list %p is invalid: \"%s\".\n", list, vkd3d_dbg_vsprintf(message, args));
    va_end(args);

    list->is_valid = false;
}

static HRESULT d3d12_command_list_begin_command_buffer(struct d3d12_command_list *list)
{
    struct d3d12_device *device = list->device;
    const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
    VkCommandBufferBeginInfo begin_info;
    VkResult vr;

    begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
    begin_info.pNext = NULL;
    begin_info.flags = 0;
    begin_info.pInheritanceInfo = NULL;

    if ((vr = VK_CALL(vkBeginCommandBuffer(list->vk_command_buffer, &begin_info))) < 0)
    {
        WARN("Failed to begin command buffer, vr %d.\n", vr);
        return hresult_from_vk_result(vr);
    }

    list->is_recording = true;
    list->is_valid = true;

    return S_OK;
}

static HRESULT d3d12_command_allocator_allocate_command_buffer(struct d3d12_command_allocator *allocator,
        struct d3d12_command_list *list)
{
    struct d3d12_device *device = allocator->device;
    const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
    VkCommandBufferAllocateInfo command_buffer_info;
    VkResult vr;
    HRESULT hr;

    TRACE("allocator %p, list %p.\n", allocator, list);

    if (allocator->current_command_list)
    {
        WARN("Command allocator is already in use.\n");
        return E_INVALIDARG;
    }

    command_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
    command_buffer_info.pNext = NULL;
    command_buffer_info.commandPool = allocator->vk_command_pool;
    command_buffer_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
    command_buffer_info.commandBufferCount = 1;

    if ((vr = VK_CALL(vkAllocateCommandBuffers(device->vk_device, &command_buffer_info,
            &list->vk_command_buffer))) < 0)
    {
        WARN("Failed to allocate Vulkan command buffer, vr %d.\n", vr);
        return hresult_from_vk_result(vr);
    }

    list->vk_init_commands = VK_NULL_HANDLE;
    list->vk_queue_flags = allocator->vk_queue_flags;

    if (FAILED(hr = d3d12_command_list_begin_command_buffer(list)))
    {
        VK_CALL(vkFreeCommandBuffers(device->vk_device, allocator->vk_command_pool,
                1, &list->vk_command_buffer));
        return hr;
    }

#ifdef VKD3D_ENABLE_BREADCRUMBS
    if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS)
    {
        vkd3d_breadcrumb_tracer_allocate_command_list(&allocator->device->breadcrumb_tracer,
                list, allocator);
        vkd3d_breadcrumb_tracer_begin_command_list(list);
    }
#endif

    allocator->current_command_list = list;
    list->outstanding_submissions_count = &allocator->outstanding_submissions_count;

    return S_OK;
}

static HRESULT d3d12_command_allocator_allocate_init_command_buffer(struct d3d12_command_allocator *allocator,
        struct d3d12_command_list *list)
{
    struct d3d12_device *device = allocator->device;
    const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
    VkCommandBufferAllocateInfo command_buffer_info;
    VkCommandBufferBeginInfo begin_info;
    VkResult vr;

    TRACE("allocator %p, list %p.\n", allocator, list);

    if (list->vk_init_commands)
        return S_OK;

    command_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
    command_buffer_info.pNext = NULL;
    command_buffer_info.commandPool = allocator->vk_command_pool;
    command_buffer_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
    command_buffer_info.commandBufferCount = 1;

    if ((vr = VK_CALL(vkAllocateCommandBuffers(device->vk_device, &command_buffer_info,
            &list->vk_init_commands))) < 0)
    {
        WARN("Failed to allocate Vulkan command buffer, vr %d.\n", vr);
        return hresult_from_vk_result(vr);
    }

    begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
    begin_info.pNext = NULL;
    begin_info.flags = 0;
    begin_info.pInheritanceInfo = NULL;

    if ((vr = VK_CALL(vkBeginCommandBuffer(list->vk_init_commands, &begin_info))) < 0)
    {
        WARN("Failed to begin command buffer, vr %d.\n", vr);
        VK_CALL(vkFreeCommandBuffers(device->vk_device, allocator->vk_command_pool,
                1, &list->vk_init_commands));
        return hresult_from_vk_result(vr);
    }

    return S_OK;
}

static void d3d12_command_allocator_free_vk_command_buffer(struct d3d12_command_allocator *allocator,
        VkCommandBuffer vk_command_buffer)
{
    struct d3d12_device *device = allocator->device;
    const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;

    if (!vk_command_buffer)
        return;

    if (!vkd3d_array_reserve((void **)&allocator->command_buffers, &allocator->command_buffers_size,
            allocator->command_buffer_count + 1, sizeof(*allocator->command_buffers)))
    {
        WARN("Failed to add command buffer.\n");
        VK_CALL(vkFreeCommandBuffers(device->vk_device, allocator->vk_command_pool,
                1, &vk_command_buffer));
    }
    else
        allocator->command_buffers[allocator->command_buffer_count++] = vk_command_buffer;
}

static void d3d12_command_allocator_free_command_buffer(struct d3d12_command_allocator *allocator,
        struct d3d12_command_list *list)
{
    TRACE("allocator %p, list %p.\n", allocator, list);

    if (allocator->current_command_list == list)
        allocator->current_command_list = NULL;

    d3d12_command_allocator_free_vk_command_buffer(allocator, list->vk_command_buffer);
    d3d12_command_allocator_free_vk_command_buffer(allocator, list->vk_init_commands);
}

static bool d3d12_command_allocator_add_descriptor_pool(struct d3d12_command_allocator *allocator,
        VkDescriptorPool pool, enum vkd3d_descriptor_pool_types pool_type)
{
    struct d3d12_descriptor_pool_cache *cache = &allocator->descriptor_pool_caches[pool_type];

    if (!vkd3d_array_reserve((void **)&cache->descriptor_pools, &cache->descriptor_pools_size,
            cache->descriptor_pool_count + 1, sizeof(*cache->descriptor_pools)))
        return false;

    cache->descriptor_pools[cache->descriptor_pool_count++] = pool;

    return true;
}

static bool d3d12_command_allocator_add_view(struct d3d12_command_allocator *allocator,
        struct vkd3d_view *view)
{
    if (!vkd3d_array_reserve((void **)&allocator->views, &allocator->views_size,
            allocator->view_count + 1, sizeof(*allocator->views)))
        return false;

    vkd3d_view_incref(view);
    allocator->views[allocator->view_count++] = view;

    return true;
}

static bool d3d12_command_allocator_add_buffer_view(struct d3d12_command_allocator *allocator,
        VkBufferView view)
{
    if (!vkd3d_array_reserve((void **)&allocator->buffer_views, &allocator->buffer_views_size,
            allocator->buffer_view_count + 1, sizeof(*allocator->buffer_views)))
        return false;

    allocator->buffer_views[allocator->buffer_view_count++] = view;

    return true;
}

static VkDescriptorPool d3d12_command_allocator_allocate_descriptor_pool(
        struct d3d12_command_allocator *allocator, enum vkd3d_descriptor_pool_types pool_type)
{
    static const VkDescriptorPoolSize pool_sizes[] =
    {
        {VK_DESCRIPTOR_TYPE_SAMPLER, 2048},
        {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1024},
        {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1024},
        {VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1024},
        {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 1024},
        {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1024},
        /* must be last in the array */
        {VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT, 65536}
    };
    struct d3d12_descriptor_pool_cache *cache = &allocator->descriptor_pool_caches[pool_type];
    struct d3d12_device *device = allocator->device;
    const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
    VkDescriptorPoolInlineUniformBlockCreateInfoEXT inline_uniform_desc;
    VkDescriptorPoolCreateInfo pool_desc;
    VkDevice vk_device = device->vk_device;
    VkDescriptorPool vk_pool;
    VkResult vr;

    if (cache->free_descriptor_pool_count > 0)
    {
        vk_pool = cache->free_descriptor_pools[cache->free_descriptor_pool_count - 1];
        cache->free_descriptor_pools[cache->free_descriptor_pool_count - 1] = VK_NULL_HANDLE;
        --cache->free_descriptor_pool_count;
    }
    else
    {
        inline_uniform_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_INLINE_UNIFORM_BLOCK_CREATE_INFO_EXT;
        inline_uniform_desc.pNext = NULL;
        inline_uniform_desc.maxInlineUniformBlockBindings = 256;

        pool_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
        pool_desc.pNext = &inline_uniform_desc;
        pool_desc.flags = 0;
        pool_desc.maxSets = 512;
        pool_desc.poolSizeCount = ARRAY_SIZE(pool_sizes);
        pool_desc.pPoolSizes = pool_sizes;

        if (!device->vk_info.EXT_inline_uniform_block ||
                device->vk_info.device_limits.maxPushConstantsSize >= (D3D12_MAX_ROOT_COST * sizeof(uint32_t)))
        {
            pool_desc.pNext = NULL;
            pool_desc.poolSizeCount -= 1;
        }

        if ((vr = VK_CALL(vkCreateDescriptorPool(vk_device, &pool_desc, NULL, &vk_pool))) < 0)
        {
            ERR("Failed to create descriptor pool, vr %d.\n", vr);
            return VK_NULL_HANDLE;
        }
    }

    if (!(d3d12_command_allocator_add_descriptor_pool(allocator, vk_pool, pool_type)))
    {
        ERR("Failed to add descriptor pool.\n");
        VK_CALL(vkDestroyDescriptorPool(vk_device, vk_pool, NULL));
        return VK_NULL_HANDLE;
    }

    return vk_pool;
}

static VkDescriptorSet d3d12_command_allocator_allocate_descriptor_set(
        struct d3d12_command_allocator *allocator, VkDescriptorSetLayout vk_set_layout,
        enum vkd3d_descriptor_pool_types pool_type)
{
    struct d3d12_descriptor_pool_cache *cache = &allocator->descriptor_pool_caches[pool_type];
    struct d3d12_device *device = allocator->device;
    const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
    struct VkDescriptorSetAllocateInfo set_desc;
    VkDevice vk_device = device->vk_device;
    VkDescriptorSet vk_descriptor_set;
    VkResult vr;

    if (!cache->vk_descriptor_pool)
        cache->vk_descriptor_pool = d3d12_command_allocator_allocate_descriptor_pool(allocator, pool_type);
    if (!cache->vk_descriptor_pool)
        return VK_NULL_HANDLE;

    set_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
    set_desc.pNext = NULL;
    set_desc.descriptorPool = cache->vk_descriptor_pool;
    set_desc.descriptorSetCount = 1;
    set_desc.pSetLayouts = &vk_set_layout;
    if ((vr = VK_CALL(vkAllocateDescriptorSets(vk_device, &set_desc, &vk_descriptor_set))) >= 0)
        return vk_descriptor_set;

    cache->vk_descriptor_pool = VK_NULL_HANDLE;
    if (vr == VK_ERROR_FRAGMENTED_POOL || vr == VK_ERROR_OUT_OF_POOL_MEMORY_KHR)
        cache->vk_descriptor_pool = d3d12_command_allocator_allocate_descriptor_pool(allocator, pool_type);
    if (!cache->vk_descriptor_pool)
    {
        ERR("Failed to allocate descriptor set, vr %d.\n", vr);
        return VK_NULL_HANDLE;
    }

    set_desc.descriptorPool = cache->vk_descriptor_pool;
    if ((vr = VK_CALL(vkAllocateDescriptorSets(vk_device, &set_desc, &vk_descriptor_set))) < 0)
    {
        FIXME("Failed to allocate descriptor set from a new pool, vr %d.\n", vr);
        return VK_NULL_HANDLE;
    }

    return vk_descriptor_set;
}

static void d3d12_command_list_allocator_destroyed(struct d3d12_command_list *list)
{
    TRACE("list %p.\n", list);

    list->allocator = NULL;
    list->vk_command_buffer = VK_NULL_HANDLE;
    list->vk_init_commands = VK_NULL_HANDLE;
}

static void d3d12_command_allocator_free_descriptor_pool_cache(struct d3d12_command_allocator *allocator,
        struct d3d12_descriptor_pool_cache *cache, bool keep_reusable_resources)
{
    struct d3d12_device *device = allocator->device;
    const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
    unsigned int i, j;
    cache->vk_descriptor_pool = VK_NULL_HANDLE;

    if (keep_reusable_resources)
    {
        if (vkd3d_array_reserve((void **)&cache->free_descriptor_pools,
                                &cache->free_descriptor_pools_size,
                                cache->free_descriptor_pool_count + cache->descriptor_pool_count,
                                sizeof(*cache->free_descriptor_pools)))
        {
            for (i = 0, j = cache->free_descriptor_pool_count; i < cache->descriptor_pool_count; ++i, ++j)
            {
                VK_CALL(vkResetDescriptorPool(device->vk_device, cache->descriptor_pools[i], 0));
                cache->free_descriptor_pools[j] = cache->descriptor_pools[i];
            }
            cache->free_descriptor_pool_count += cache->descriptor_pool_count;
            cache->descriptor_pool_count = 0;
        }
    }
    else
    {
        for (i = 0; i < cache->free_descriptor_pool_count; ++i)
        {
            VK_CALL(vkDestroyDescriptorPool(device->vk_device, cache->free_descriptor_pools[i], NULL));
        }
        cache->free_descriptor_pool_count = 0;
    }

    for (i = 0; i < cache->descriptor_pool_count; ++i)
    {
        VK_CALL(vkDestroyDescriptorPool(device->vk_device, cache->descriptor_pools[i], NULL));
    }
    cache->descriptor_pool_count = 0;
}

static void d3d12_command_allocator_free_resources(struct d3d12_command_allocator *allocator,
        bool keep_reusable_resources)
{
    struct d3d12_device *device = allocator->device;
    const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
    unsigned int i;

    for (i = 0; i < VKD3D_DESCRIPTOR_POOL_TYPE_COUNT; i++)
    {
        d3d12_command_allocator_free_descriptor_pool_cache(allocator,
                &allocator->descriptor_pool_caches[i],
                keep_reusable_resources);
    }

    for (i = 0; i < allocator->buffer_view_count; ++i)
    {
        VK_CALL(vkDestroyBufferView(device->vk_device, allocator->buffer_views[i], NULL));
    }
    allocator->buffer_view_count = 0;

    for (i = 0; i < allocator->view_count; ++i)
    {
        vkd3d_view_decref(allocator->views[i], device);
    }
    allocator->view_count = 0;
}

static void d3d12_command_allocator_set_name(struct d3d12_command_allocator *allocator, const char *name)
{
    vkd3d_set_vk_object_name(allocator->device, (uint64_t)allocator->vk_command_pool,
            VK_OBJECT_TYPE_COMMAND_POOL, name);
}

/* ID3D12CommandAllocator */
static inline struct d3d12_command_allocator *impl_from_ID3D12CommandAllocator(ID3D12CommandAllocator *iface)
{
    return CONTAINING_RECORD(iface, struct d3d12_command_allocator, ID3D12CommandAllocator_iface);
}

static HRESULT STDMETHODCALLTYPE d3d12_command_allocator_QueryInterface(ID3D12CommandAllocator *iface,
        REFIID riid, void **object)
{
    TRACE("iface %p, riid %s, object %p.\n", iface, debugstr_guid(riid), object);

    if (IsEqualGUID(riid, &IID_ID3D12CommandAllocator)
            || IsEqualGUID(riid, &IID_ID3D12Pageable)
            || IsEqualGUID(riid, &IID_ID3D12DeviceChild)
            || IsEqualGUID(riid, &IID_ID3D12Object)
            || IsEqualGUID(riid, &IID_IUnknown))
    {
        ID3D12CommandAllocator_AddRef(iface);
        *object = iface;
        return S_OK;
    }

    WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(riid));

    *object = NULL;
    return E_NOINTERFACE;
}

static ULONG STDMETHODCALLTYPE d3d12_command_allocator_AddRef(ID3D12CommandAllocator *iface)
{
    struct d3d12_command_allocator *allocator = impl_from_ID3D12CommandAllocator(iface);
    ULONG refcount = InterlockedIncrement(&allocator->refcount);

    TRACE("%p increasing refcount to %u.\n", allocator, refcount);

    return refcount;
}

static ULONG STDMETHODCALLTYPE d3d12_command_allocator_Release(ID3D12CommandAllocator *iface)
{
    struct d3d12_command_allocator *allocator = impl_from_ID3D12CommandAllocator(iface);
    ULONG refcount = InterlockedDecrement(&allocator->refcount);
    unsigned int i, j;
    LONG pending;

    TRACE("%p decreasing refcount to %u.\n", allocator, refcount);

    if (!refcount)
    {
        struct d3d12_device *device = allocator->device;
        const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;

        vkd3d_private_store_destroy(&allocator->private_store);

        if ((pending = vkd3d_atomic_uint32_load_explicit(&allocator->outstanding_submissions_count, vkd3d_memory_order_acquire)) != 0)
        {
            /* Nothing we can do about this other than report the error. Might find some game bugs! */
            ERR("Attempting to free command allocator, but there are still %u pending submissions!\n",
                    (unsigned int)allocator->outstanding_submissions_count);
        }

        if (allocator->current_command_list)
            d3d12_command_list_allocator_destroyed(allocator->current_command_list);

        d3d12_command_allocator_free_resources(allocator, false);
        vkd3d_free(allocator->buffer_views);
        vkd3d_free(allocator->views);
        for (i = 0; i < VKD3D_DESCRIPTOR_POOL_TYPE_COUNT; i++)
        {
            vkd3d_free(allocator->descriptor_pool_caches[i].descriptor_pools);
            vkd3d_free(allocator->descriptor_pool_caches[i].free_descriptor_pools);
        }

        if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_RECYCLE_COMMAND_POOLS)
        {
            /* Don't want to do this unless we have to, so hide it behind a config.
             * For well-behaving apps, we'll just bloat memory. */
            if (pthread_mutex_lock(&device->mutex) == 0)
            {
                if (device->cached_command_allocator_count < ARRAY_SIZE(device->cached_command_allocators))
                {
                    /* Recycle the pool. Some games spam free/allocate pools,
                     * even if it completely goes against the point of the API. */

                    /* Have to free command buffers here if we're going to recycle,
                     * otherwise DestroyCommandPool takes care of it. */
                    VK_CALL(vkFreeCommandBuffers(device->vk_device, allocator->vk_command_pool,
                            allocator->command_buffer_count, allocator->command_buffers));
                    VK_CALL(vkResetCommandPool(device->vk_device, allocator->vk_command_pool, 0));

                    device->cached_command_allocators[device->cached_command_allocator_count].vk_command_pool =
                            allocator->vk_command_pool;
                    device->cached_command_allocators[device->cached_command_allocator_count].vk_family_index =
                            allocator->vk_family_index;
                    device->cached_command_allocator_count++;
                    allocator->vk_command_pool = VK_NULL_HANDLE;
                }

                pthread_mutex_unlock(&device->mutex);
            }
        }

        /* Command buffers are implicitly freed when destroying the pool. */
        vkd3d_free(allocator->command_buffers);
        VK_CALL(vkDestroyCommandPool(device->vk_device, allocator->vk_command_pool, NULL));

        for (i = 0; i < VKD3D_SCRATCH_POOL_KIND_COUNT; i++)
        {
            for (j = 0; j < allocator->scratch_pools[i].scratch_buffer_count; j++)
                d3d12_device_return_scratch_buffer(device, i, &allocator->scratch_pools[i].scratch_buffers[j]);
            vkd3d_free(allocator->scratch_pools[i].scratch_buffers);
        }

        for (i = 0; i < allocator->query_pool_count; i++)
            d3d12_device_return_query_pool(device, &allocator->query_pools[i]);

        vkd3d_free(allocator->query_pools);

#ifdef VKD3D_ENABLE_BREADCRUMBS
        if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS)
        {
            vkd3d_breadcrumb_tracer_release_command_lists(&device->breadcrumb_tracer,
                    allocator->breadcrumb_context_indices, allocator->breadcrumb_context_index_count);
            vkd3d_free(allocator->breadcrumb_context_indices);
        }
#endif

        vkd3d_free(allocator);

        d3d12_device_release(device);
    }

    return refcount;
}

static HRESULT STDMETHODCALLTYPE d3d12_command_allocator_GetPrivateData(ID3D12CommandAllocator *iface,
        REFGUID guid, UINT *data_size, void *data)
{
    struct d3d12_command_allocator *allocator = impl_from_ID3D12CommandAllocator(iface);

    TRACE("iface %p, guid %s, data_size %p, data %p.\n", iface, debugstr_guid(guid), data_size, data);

    return vkd3d_get_private_data(&allocator->private_store, guid, data_size, data);
}

static HRESULT STDMETHODCALLTYPE d3d12_command_allocator_SetPrivateData(ID3D12CommandAllocator *iface,
        REFGUID guid, UINT data_size, const void *data)
{
    struct d3d12_command_allocator *allocator = impl_from_ID3D12CommandAllocator(iface);

    TRACE("iface %p, guid %s, data_size %u, data %p.\n", iface, debugstr_guid(guid), data_size, data);

    return vkd3d_set_private_data(&allocator->private_store, guid, data_size, data,
            (vkd3d_set_name_callback) d3d12_command_allocator_set_name, allocator);
}

static HRESULT STDMETHODCALLTYPE d3d12_command_allocator_SetPrivateDataInterface(ID3D12CommandAllocator *iface,
        REFGUID guid, const IUnknown *data)
{
    struct d3d12_command_allocator *allocator = impl_from_ID3D12CommandAllocator(iface);

    TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data);

    return vkd3d_set_private_data_interface(&allocator->private_store, guid, data,
            (vkd3d_set_name_callback) d3d12_command_allocator_set_name, allocator);
}

static HRESULT STDMETHODCALLTYPE d3d12_command_allocator_GetDevice(ID3D12CommandAllocator *iface, REFIID iid, void **device)
{
    struct d3d12_command_allocator *allocator = impl_from_ID3D12CommandAllocator(iface);

    TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device);

    return d3d12_device_query_interface(allocator->device, iid, device);
}

static HRESULT STDMETHODCALLTYPE d3d12_command_allocator_Reset(ID3D12CommandAllocator *iface)
{
    struct d3d12_command_allocator *allocator = impl_from_ID3D12CommandAllocator(iface);
    const struct vkd3d_vk_device_procs *vk_procs;
    struct d3d12_command_list *list;
    struct d3d12_device *device;
    LONG pending;
    VkResult vr;
    size_t i, j;

    TRACE("iface %p.\n", iface);

    if ((list = allocator->current_command_list))
    {
        if (list->is_recording)
        {
            WARN("A command list using this allocator is in the recording state.\n");
            return E_FAIL;
        }

        TRACE("Resetting command list %p.\n", list);
    }

    if ((pending = vkd3d_atomic_uint32_load_explicit(&allocator->outstanding_submissions_count, vkd3d_memory_order_acquire)) != 0)
    {
        /* HACK: There are currently command lists waiting to be submitted to the queue in the submission threads.
         * Buggy application, but work around this by not resetting the command pool this time.
         * To be perfectly safe, we can only reset after the fence timeline is signalled,
         * however, this is enough to workaround SotTR which resets the command list right
         * after calling ID3D12CommandQueue::ExecuteCommandLists().
         * Only happens once or twice on bootup and doesn't cause memory leaks over time
         * since the command pool is eventually reset. */

        /* Runtime appears to detect this case, but does not return E_FAIL for whatever reason anymore. */
        ERR("There are still %u pending command lists awaiting execution from command allocator iface %p!\n",
            (unsigned int)pending, iface);
        return S_OK;
    }

    device = allocator->device;
    vk_procs = &device->vk_procs;

    d3d12_command_allocator_free_resources(allocator, true);
    if (allocator->command_buffer_count)
    {
        VK_CALL(vkFreeCommandBuffers(device->vk_device, allocator->vk_command_pool,
                allocator->command_buffer_count, allocator->command_buffers));
        allocator->command_buffer_count = 0;
    }

    /* The intent here is to recycle memory, so do not use RELEASE_RESOURCES_BIT here. */
    if ((vr = VK_CALL(vkResetCommandPool(device->vk_device, allocator->vk_command_pool, 0))))
    {
        WARN("Resetting command pool failed, vr %d.\n", vr);
        return hresult_from_vk_result(vr);
    }

    /* Return scratch buffers to the device */
    for (i = 0; i < VKD3D_SCRATCH_POOL_KIND_COUNT; i++)
    {
        for (j = 0; j < allocator->scratch_pools[i].scratch_buffer_count; j++)
            d3d12_device_return_scratch_buffer(device, i, &allocator->scratch_pools[i].scratch_buffers[j]);
        allocator->scratch_pools[i].scratch_buffer_count = 0;
    }

#ifdef VKD3D_ENABLE_BREADCRUMBS
    if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS)
    {
        /* Release breadcrumb references. */
        vkd3d_breadcrumb_tracer_release_command_lists(&device->breadcrumb_tracer,
                allocator->breadcrumb_context_indices, allocator->breadcrumb_context_index_count);
        allocator->breadcrumb_context_index_count = 0;
    }
#endif

    /* Return query pools to the device */
    for (i = 0; i < allocator->query_pool_count; i++)
        d3d12_device_return_query_pool(device, &allocator->query_pools[i]);

    allocator->query_pool_count = 0;
    memset(&allocator->active_query_pools, 0, sizeof(allocator->active_query_pools));
    return S_OK;
}

static CONST_VTBL struct ID3D12CommandAllocatorVtbl d3d12_command_allocator_vtbl =
{
    /* IUnknown methods */
    d3d12_command_allocator_QueryInterface,
    d3d12_command_allocator_AddRef,
    d3d12_command_allocator_Release,
    /* ID3D12Object methods */
    d3d12_command_allocator_GetPrivateData,
    d3d12_command_allocator_SetPrivateData,
    d3d12_command_allocator_SetPrivateDataInterface,
    (void *)d3d12_object_SetName,
    /* ID3D12DeviceChild methods */
    d3d12_command_allocator_GetDevice,
    /* ID3D12CommandAllocator methods */
    d3d12_command_allocator_Reset,
};

struct vkd3d_queue_family_info *d3d12_device_get_vkd3d_queue_family(struct d3d12_device *device,
        D3D12_COMMAND_LIST_TYPE type)
{
    switch (type)
    {
        case D3D12_COMMAND_LIST_TYPE_DIRECT:
            return device->queue_families[VKD3D_QUEUE_FAMILY_GRAPHICS];
        case D3D12_COMMAND_LIST_TYPE_COMPUTE:
            return device->queue_families[VKD3D_QUEUE_FAMILY_COMPUTE];
        case D3D12_COMMAND_LIST_TYPE_COPY:
            return device->queue_families[VKD3D_QUEUE_FAMILY_TRANSFER];
        default:
            FIXME("Unhandled command list type %#x.\n", type);
            return device->queue_families[VKD3D_QUEUE_FAMILY_GRAPHICS];
    }
}

struct vkd3d_queue *d3d12_device_allocate_vkd3d_queue(struct d3d12_device *device,
        struct vkd3d_queue_family_info *queue_family)
{
    struct vkd3d_queue *queue;
    unsigned int i;

    pthread_mutex_lock(&device->mutex);

    /* Select the queue that has the lowest number of virtual queues mapped
     * to it, in order to avoid situations where we map multiple queues to
     * the same vkd3d queue while others are unused */
    queue = queue_family->queues[0];

    for (i = 1; i < queue_family->queue_count; i++)
    {
        if (queue_family->queues[i]->virtual_queue_count < queue->virtual_queue_count)
            queue = queue_family->queues[i];
    }

    queue->virtual_queue_count++;
    pthread_mutex_unlock(&device->mutex);
    return queue;
}

void d3d12_device_unmap_vkd3d_queue(struct d3d12_device *device,
        struct vkd3d_queue *queue)
{
    pthread_mutex_lock(&device->mutex);
    queue->virtual_queue_count--;
    pthread_mutex_unlock(&device->mutex);
}


static HRESULT d3d12_command_allocator_init(struct d3d12_command_allocator *allocator,
        struct d3d12_device *device, D3D12_COMMAND_LIST_TYPE type)
{
    const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
    struct vkd3d_queue_family_info *queue_family;
    VkCommandPoolCreateInfo command_pool_info;
    VkResult vr;
    HRESULT hr;
    size_t i;

    if (FAILED(hr = vkd3d_private_store_init(&allocator->private_store)))
        return hr;

    queue_family = d3d12_device_get_vkd3d_queue_family(device, type);
    allocator->ID3D12CommandAllocator_iface.lpVtbl = &d3d12_command_allocator_vtbl;
    allocator->refcount = 1;
    allocator->outstanding_submissions_count = 0;
    allocator->type = type;
    allocator->vk_queue_flags = queue_family->vk_queue_flags;

    command_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
    command_pool_info.pNext = NULL;
    /* Do not use RESET_COMMAND_BUFFER_BIT. This allows the CommandPool to be a D3D12-style command pool.
     * Memory is owned by the pool and CommandBuffers become lightweight handles,
     * assuming a half-decent driver implementation. */
    command_pool_info.flags = 0;
    command_pool_info.queueFamilyIndex = queue_family->vk_family_index;

    allocator->vk_command_pool = VK_NULL_HANDLE;
    allocator->vk_family_index = queue_family->vk_family_index;

    if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_RECYCLE_COMMAND_POOLS)
    {
        /* Try to recycle command allocators. Some games spam free/allocate pools. */
        if (pthread_mutex_lock(&device->mutex) == 0)
        {
            for (i = 0; i < device->cached_command_allocator_count; i++)
            {
                if (device->cached_command_allocators[i].vk_family_index == queue_family->vk_family_index)
                {
                    allocator->vk_command_pool = device->cached_command_allocators[i].vk_command_pool;
                    device->cached_command_allocators[i] =
                            device->cached_command_allocators[--device->cached_command_allocator_count];
                    break;
                }
            }
            pthread_mutex_unlock(&device->mutex);
        }
    }

    if (allocator->vk_command_pool == VK_NULL_HANDLE)
    {
        if ((vr = VK_CALL(vkCreateCommandPool(device->vk_device, &command_pool_info, NULL,
                &allocator->vk_command_pool))) < 0)
        {
            WARN("Failed to create Vulkan command pool, vr %d.\n", vr);
            vkd3d_private_store_destroy(&allocator->private_store);
            return hresult_from_vk_result(vr);
        }
    }

    memset(allocator->descriptor_pool_caches, 0, sizeof(allocator->descriptor_pool_caches));

#ifdef VKD3D_ENABLE_BREADCRUMBS
    allocator->breadcrumb_context_indices = NULL;
    allocator->breadcrumb_context_index_count = 0;
    allocator->breadcrumb_context_index_size = 0;
#endif

    allocator->views = NULL;
    allocator->views_size = 0;
    allocator->view_count = 0;

    allocator->buffer_views = NULL;
    allocator->buffer_views_size = 0;
    allocator->buffer_view_count = 0;

    allocator->command_buffers = NULL;
    allocator->command_buffers_size = 0;
    allocator->command_buffer_count = 0;

    memset(allocator->scratch_pools, 0, sizeof(allocator->scratch_pools));

    allocator->query_pools = NULL;
    allocator->query_pools_size = 0;
    allocator->query_pool_count = 0;
    memset(&allocator->active_query_pools, 0, sizeof(allocator->active_query_pools));

    allocator->current_command_list = NULL;

    d3d12_device_add_ref(allocator->device = device);

    return S_OK;
}

HRESULT d3d12_command_allocator_create(struct d3d12_device *device,
        D3D12_COMMAND_LIST_TYPE type, struct d3d12_command_allocator **allocator)
{
    struct d3d12_command_allocator *object;
    HRESULT hr;

    if (!(D3D12_COMMAND_LIST_TYPE_DIRECT <= type && type <= D3D12_COMMAND_LIST_TYPE_COPY))
    {
        WARN("Invalid type %#x.\n", type);
        return E_INVALIDARG;
    }

    if (!(object = vkd3d_malloc(sizeof(*object))))
        return E_OUTOFMEMORY;

    if (FAILED(hr = d3d12_command_allocator_init(object, device, type)))
    {
        vkd3d_free(object);
        return hr;
    }

    TRACE("Created command allocator %p.\n", object);

    *allocator = object;

    return S_OK;
}

struct vkd3d_scratch_allocation
{
    VkBuffer buffer;
    VkDeviceSize offset;
    VkDeviceAddress va;
};

static bool d3d12_command_allocator_allocate_scratch_memory(struct d3d12_command_allocator *allocator,
        enum vkd3d_scratch_pool_kind kind,
        VkDeviceSize size, VkDeviceSize alignment, uint32_t memory_types,
        struct vkd3d_scratch_allocation *allocation)
{
    struct d3d12_command_allocator_scratch_pool *pool = &allocator->scratch_pools[kind];
    VkDeviceSize aligned_offset, aligned_size;
    struct vkd3d_scratch_buffer *scratch;
    unsigned int i;

    aligned_size = align(size, alignment);

    /* Probe last block first since the others are likely full */
    for (i = pool->scratch_buffer_count; i; i--)
    {
        scratch = &pool->scratch_buffers[i - 1];

        /* Extremely unlikely to fail since we have separate lists per pool kind, but to be 100% correct ... */
        if (!(memory_types & (1u << scratch->allocation.device_allocation.vk_memory_type)))
            continue;

        aligned_offset = align(scratch->offset, alignment);

        if (aligned_offset + aligned_size <= scratch->allocation.resource.size)
        {
            scratch->offset = aligned_offset + aligned_size;

            allocation->buffer = scratch->allocation.resource.vk_buffer;
            allocation->offset = scratch->allocation.offset + aligned_offset;
            allocation->va = scratch->allocation.resource.va + aligned_offset;
            return true;
        }
    }

    if (!vkd3d_array_reserve((void**)&pool->scratch_buffers, &pool->scratch_buffers_size,
            pool->scratch_buffer_count + 1, sizeof(*pool->scratch_buffers)))
    {
        ERR("Failed to allocate scratch buffer.\n");
        return false;
    }

    scratch = &pool->scratch_buffers[pool->scratch_buffer_count];
    if (FAILED(d3d12_device_get_scratch_buffer(allocator->device, kind, aligned_size, memory_types, scratch)))
    {
        ERR("Failed to create scratch buffer.\n");
        return false;
    }

    pool->scratch_buffer_count += 1;
    scratch->offset = aligned_size;

    allocation->buffer = scratch->allocation.resource.vk_buffer;
    allocation->offset = scratch->allocation.offset;
    allocation->va = scratch->allocation.resource.va;
    return true;
}

static struct vkd3d_query_pool *d3d12_command_allocator_get_active_query_pool_from_type_index(
        struct d3d12_command_allocator *allocator, uint32_t type_index)
{
    return &allocator->active_query_pools[type_index];
}

static uint32_t d3d12_query_heap_type_to_type_index(D3D12_QUERY_HEAP_TYPE heap_type)
{
    switch (heap_type)
    {
        case D3D12_QUERY_HEAP_TYPE_OCCLUSION:
            return VKD3D_QUERY_TYPE_INDEX_OCCLUSION;
        case D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS:
            return VKD3D_QUERY_TYPE_INDEX_PIPELINE_STATISTICS;
        case D3D12_QUERY_HEAP_TYPE_SO_STATISTICS:
            return VKD3D_QUERY_TYPE_INDEX_TRANSFORM_FEEDBACK;
        default:
            return UINT32_MAX;
    }
}

bool d3d12_command_allocator_allocate_query_from_type_index(
        struct d3d12_command_allocator *allocator,
        uint32_t type_index, VkQueryPool *query_pool, uint32_t *query_index)
{
    struct vkd3d_query_pool *pool = d3d12_command_allocator_get_active_query_pool_from_type_index(allocator, type_index);
    assert(pool);

    if (pool->next_index >= pool->query_count)
    {
        if (FAILED(d3d12_device_get_query_pool(allocator->device, type_index, pool)))
            return false;

        if (vkd3d_array_reserve((void**)&allocator->query_pools, &allocator->query_pools_size,
                allocator->query_pool_count + 1, sizeof(*allocator->query_pools)))
            allocator->query_pools[allocator->query_pool_count++] = *pool;
        else
            ERR("Failed to add query pool.\n");
    }

    *query_pool = pool->vk_query_pool;
    *query_index = pool->next_index++;
    return true;
}

static bool d3d12_command_allocator_allocate_query_from_heap_type(struct d3d12_command_allocator *allocator,
        D3D12_QUERY_HEAP_TYPE heap_type, VkQueryPool *query_pool, uint32_t *query_index)
{
    uint32_t type_index = d3d12_query_heap_type_to_type_index(heap_type);
    return d3d12_command_allocator_allocate_query_from_type_index(allocator, type_index, query_pool, query_index);
}

static struct d3d12_command_allocator *d3d12_command_allocator_from_iface(ID3D12CommandAllocator *iface)
{
    if (!iface || iface->lpVtbl != &d3d12_command_allocator_vtbl)
        return NULL;

    return impl_from_ID3D12CommandAllocator(iface);
}

/* ID3D12CommandList */
static inline struct d3d12_command_list *impl_from_ID3D12GraphicsCommandList(d3d12_command_list_iface *iface)
{
    return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList_iface);
}

static void d3d12_command_list_invalidate_rendering_info(struct d3d12_command_list *list)
{
    list->rendering_info.state_flags &= ~VKD3D_RENDERING_CURRENT;
}

static void d3d12_command_list_invalidate_current_pipeline(struct d3d12_command_list *list, bool meta_shader)
{
    list->current_pipeline = VK_NULL_HANDLE;

    /* If we're binding a meta shader, invalidate everything.
     * Next time we bind a user pipeline, we need to reapply all dynamic state. */
    if (meta_shader)
    {
        list->dynamic_state.active_flags = 0;
        /* For meta shaders, just pretend we never bound anything since we don't do tracking for these pipeline binds. */
        list->command_buffer_pipeline = VK_NULL_HANDLE;
    }
}

static D3D12_RECT d3d12_get_image_rect(struct d3d12_resource *resource, unsigned int mip_level)
{
    D3D12_RECT rect;
    rect.left = 0;
    rect.top = 0;
    rect.right = d3d12_resource_desc_get_width(&resource->desc, mip_level);
    rect.bottom = d3d12_resource_desc_get_height(&resource->desc, mip_level);
    return rect;
}

static bool d3d12_image_copy_writes_full_subresource(struct d3d12_resource *resource,
        const VkExtent3D *extent, const VkImageSubresourceLayers *subresource)
{
    unsigned int width, height, depth;
    width = d3d12_resource_desc_get_width(&resource->desc, subresource->mipLevel);
    height = d3d12_resource_desc_get_height(&resource->desc, subresource->mipLevel);
    depth = d3d12_resource_desc_get_depth(&resource->desc, subresource->mipLevel);
    return width == extent->width && height == extent->height && depth == extent->depth;
}

static bool vk_rect_from_d3d12(const D3D12_RECT *rect, VkRect2D *vk_rect, const D3D12_RECT *clamp_rect)
{
    D3D12_RECT clamped;

    clamped.left = max(rect->left, clamp_rect->left);
    clamped.right = min(rect->right, clamp_rect->right);
    clamped.top = max(rect->top, clamp_rect->top);
    clamped.bottom = min(rect->bottom, clamp_rect->bottom);

    if (clamped.top >= clamped.bottom || clamped.left >= clamped.right)
    {
        WARN("Empty clear rect.\n");
        return false;
    }

    vk_rect->offset.x = clamped.left;
    vk_rect->offset.y = clamped.top;
    vk_rect->extent.width = clamped.right - clamped.left;
    vk_rect->extent.height = clamped.bottom - clamped.top;
    return true;
}

static VkImageAspectFlags vk_writable_aspects_from_image_layout(VkImageLayout vk_layout)
{
    switch (vk_layout)
    {
        case VK_IMAGE_LAYOUT_GENERAL:
            return VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
        case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
            return VK_IMAGE_ASPECT_COLOR_BIT;
        case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
            return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
        case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL:
            return VK_IMAGE_ASPECT_STENCIL_BIT;
        case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL:
            return VK_IMAGE_ASPECT_DEPTH_BIT;
        case VK_IMAGE_LAYOUT_UNDEFINED:
        case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
            return 0;
        default:
            ERR("Unhandled image layout %u.\n", vk_layout);
            return 0;
    }
}

static int d3d12_command_list_find_attachment_view(struct d3d12_command_list *list,
        const struct d3d12_resource *resource, const VkImageSubresourceLayers *subresource)
{
    unsigned int i;

    if (list->dsv.resource == resource)
    {
        const struct vkd3d_view *dsv = list->dsv.view;

        if (dsv->info.texture.miplevel_idx == subresource->mipLevel &&
                dsv->info.texture.layer_idx == subresource->baseArrayLayer &&
                dsv->info.texture.layer_count == subresource->layerCount)
            return D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT;
    }
    else
    {
        for (i = 0; i < D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT; i++)
        {
            const struct vkd3d_view *rtv = list->rtvs[i].view;

            if (list->rtvs[i].resource != resource)
                continue;

            if (rtv->info.texture.miplevel_idx == subresource->mipLevel &&
                    rtv->info.texture.layer_idx == subresource->baseArrayLayer &&
                    rtv->info.texture.layer_count == subresource->layerCount)
                return i;
        }
    }

    return -1;
}

static int d3d12_command_list_find_attachment(struct d3d12_command_list *list,
        const struct d3d12_resource *resource, const struct vkd3d_view *view)
{
    VkImageSubresourceLayers subresource = vk_subresource_layers_from_view(view);
    return d3d12_command_list_find_attachment_view(list, resource, &subresource);
}

static void d3d12_command_list_clear_attachment_inline(struct d3d12_command_list *list, struct d3d12_resource *resource,
        struct vkd3d_view *view, unsigned int attachment_idx, VkImageAspectFlags clear_aspects,
        const VkClearValue *clear_value, UINT rect_count, const D3D12_RECT *rects)
{
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
    VkClearAttachment vk_clear_attachment;
    VkClearRect vk_clear_rect;
    D3D12_RECT full_rect;
    unsigned int i;

    full_rect = d3d12_get_image_rect(resource, view->info.texture.miplevel_idx);

    if (!rect_count)
    {
        rect_count = 1;
        rects = &full_rect;
    }

    /* We expect more than one clear rect to be very uncommon
     * in practice, so make no effort to batch calls for now.
     * colorAttachment is ignored for depth-stencil clears. */
    vk_clear_attachment.aspectMask = clear_aspects;
    vk_clear_attachment.colorAttachment = attachment_idx;
    vk_clear_attachment.clearValue = *clear_value;

    vk_clear_rect.baseArrayLayer = 0;
    vk_clear_rect.layerCount = view->info.texture.layer_count;

    for (i = 0; i < rect_count; i++)
    {
        if (vk_rect_from_d3d12(&rects[i], &vk_clear_rect.rect, &full_rect))
        {
            VK_CALL(vkCmdClearAttachments(list->vk_command_buffer,
                    1, &vk_clear_attachment, 1, &vk_clear_rect));
        }
    }
}

static void d3d12_command_list_resolve_buffer_copy_writes(struct d3d12_command_list *list)
{
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
    VkMemoryBarrier vk_barrier;

    if (list->tracked_copy_buffer_count)
    {
        vk_barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
        vk_barrier.pNext = NULL;
        vk_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
        vk_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
        VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer,
                VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
                1, &vk_barrier, 0, NULL, 0, NULL));
        list->tracked_copy_buffer_count = 0;
    }
}

static void d3d12_command_list_reset_buffer_copy_tracking(struct d3d12_command_list *list)
{
    list->tracked_copy_buffer_count = 0;
}

static void d3d12_command_list_mark_copy_buffer_write(struct d3d12_command_list *list, VkBuffer vk_buffer,
        VkDeviceSize offset, VkDeviceSize size, bool sparse)
{
    struct d3d12_buffer_copy_tracked_buffer *tracked_buffer;
    VkDeviceSize range_end;
    unsigned int i;

    if (sparse)
    {
        vk_buffer = VK_NULL_HANDLE;
        offset = 0;
        size = VK_WHOLE_SIZE;
    }

    range_end = offset + size;

    for (i = 0; i < list->tracked_copy_buffer_count; i++)
    {
        tracked_buffer = &list->tracked_copy_buffers[i];

        /* Any write to a sparse buffer will be considered to be aliasing with any other resource. */
        if (tracked_buffer->vk_buffer == vk_buffer || tracked_buffer->vk_buffer == VK_NULL_HANDLE || sparse)
        {
            if (range_end > tracked_buffer->hazard_begin && offset < tracked_buffer->hazard_end)
            {
                /* Hazard. Inject barrier. */
                d3d12_command_list_resolve_buffer_copy_writes(list);
                tracked_buffer = &list->tracked_copy_buffers[0];
                tracked_buffer->vk_buffer = vk_buffer;
                tracked_buffer->hazard_begin = offset;
                tracked_buffer->hazard_end = range_end;
                list->tracked_copy_buffer_count = 1;
            }
            else
            {
                tracked_buffer->hazard_begin = min(offset, tracked_buffer->hazard_begin);
                tracked_buffer->hazard_end = max(range_end, tracked_buffer->hazard_end);
            }
            return;
        }
    }

    /* Keep the tracking data structures lean and mean. If we have decent overlap, this isn't a real problem. */
    if (list->tracked_copy_buffer_count == ARRAY_SIZE(list->tracked_copy_buffers))
        d3d12_command_list_resolve_buffer_copy_writes(list);

    tracked_buffer = &list->tracked_copy_buffers[list->tracked_copy_buffer_count++];
    tracked_buffer->vk_buffer = vk_buffer;
    tracked_buffer->hazard_begin = offset;
    tracked_buffer->hazard_end = range_end;
}

static VkImageLayout dsv_plane_optimal_mask_to_layout(uint32_t plane_optimal_mask, VkImageAspectFlags image_aspects)
{
    static const VkImageLayout layouts[] = {
        VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL,
        VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL,
        VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL,
        VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
    };

    if (plane_optimal_mask & VKD3D_DEPTH_STENCIL_PLANE_GENERAL)
        return VK_IMAGE_LAYOUT_GENERAL;

    if (image_aspects != (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))
    {
        /* If aspects is only DEPTH or only STENCIL, we should use the OPTIMAL or READ_ONLY layout.
         * We should not use the separate layouts, or we might end up with more barriers than we need. */
        return plane_optimal_mask ?
                VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL :
                VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL;
    }
    else
        return layouts[plane_optimal_mask];
}

static void d3d12_command_list_decay_optimal_dsv_resource(struct d3d12_command_list *list,
        const struct d3d12_resource *resource, uint32_t plane_optimal_mask,
        struct d3d12_command_list_barrier_batch *batch)
{
    bool current_layout_is_shader_visible;
    VkImageMemoryBarrier barrier;
    VkImageLayout layout;

    assert(!(plane_optimal_mask & ~(VKD3D_DEPTH_PLANE_OPTIMAL | VKD3D_STENCIL_PLANE_OPTIMAL)));
    layout = dsv_plane_optimal_mask_to_layout(plane_optimal_mask, resource->format->vk_aspect_mask);
    if (layout == resource->common_layout)
        return;

    current_layout_is_shader_visible = layout != VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;

    barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
    barrier.pNext = NULL;
    barrier.oldLayout = layout;
    barrier.newLayout = resource->common_layout;
    barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
    barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
    barrier.srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
    barrier.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
            (current_layout_is_shader_visible ? VK_ACCESS_SHADER_READ_BIT : 0);
    barrier.subresourceRange.aspectMask = resource->format->vk_aspect_mask;
    barrier.subresourceRange.baseMipLevel = 0;
    barrier.subresourceRange.baseArrayLayer = 0;
    barrier.subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS;
    barrier.subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS;
    barrier.image = resource->res.vk_image;
    /* We want to wait for storeOp to complete here, and that is defined to happen in LATE_FRAGMENT_TESTS. */
    batch->src_stage_mask |= VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;

    /* If one aspect was readable, we have to make it visible to shaders since the resource state might have been
     * DEPTH_READ | RESOURCE | NON_PIXEL_RESOURCE.
     * If we transitioned from OPTIMAL,
     * there cannot possibly be shader reads until we observe a ResourceBarrier() later. */
    if (current_layout_is_shader_visible)
        batch->dst_stage_mask |= VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
    else
        batch->dst_stage_mask |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
    d3d12_command_list_barrier_batch_add_layout_transition(list, batch, &barrier);
}

static uint32_t d3d12_command_list_notify_decay_dsv_resource(struct d3d12_command_list *list,
        struct d3d12_resource *resource)
{
    uint32_t decay_aspects;
    size_t i, n;

    /* No point in adding these since they are always deduced to be optimal or general. */
    if ((resource->desc.Flags & D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE) ||
            resource->common_layout == VK_IMAGE_LAYOUT_GENERAL)
        return 0;

    for (i = 0, n = list->dsv_resource_tracking_count; i < n; i++)
    {
        if (list->dsv_resource_tracking[i].resource == resource)
        {
            decay_aspects = list->dsv_resource_tracking[i].plane_optimal_mask;
            list->dsv_resource_tracking[i] = list->dsv_resource_tracking[--list->dsv_resource_tracking_count];
            return decay_aspects;
        }
    }

    return 0;
}

static uint32_t d3d12_command_list_promote_dsv_resource(struct d3d12_command_list *list,
        struct d3d12_resource *resource, uint32_t plane_optimal_mask)
{
    size_t i, n;
    assert(!(plane_optimal_mask & ~(VKD3D_DEPTH_PLANE_OPTIMAL | VKD3D_STENCIL_PLANE_OPTIMAL)));

    /* No point in adding these since they are always deduced to be optimal. */
    if (resource->desc.Flags & D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE)
        return VKD3D_DEPTH_PLANE_OPTIMAL | VKD3D_STENCIL_PLANE_OPTIMAL;
    else if (resource->common_layout == VK_IMAGE_LAYOUT_GENERAL)
        return VKD3D_DEPTH_STENCIL_PLANE_GENERAL;

    /* For single aspect images, mirror the optimal mask in the unused aspect. This avoids some
     * extra checks elsewhere (particularly graphics pipeline setup and compat render passes)
     * to handle single aspect DSVs. */
    if (!(resource->format->vk_aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT))
        plane_optimal_mask |= (plane_optimal_mask & VKD3D_DEPTH_PLANE_OPTIMAL) ? VKD3D_STENCIL_PLANE_OPTIMAL : 0;
    if (!(resource->format->vk_aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT))
        plane_optimal_mask |= (plane_optimal_mask & VKD3D_STENCIL_PLANE_OPTIMAL) ? VKD3D_DEPTH_PLANE_OPTIMAL : 0;

    for (i = 0, n = list->dsv_resource_tracking_count; i < n; i++)
    {
        if (list->dsv_resource_tracking[i].resource == resource)
        {
            list->dsv_resource_tracking[i].plane_optimal_mask |= plane_optimal_mask;
            return list->dsv_resource_tracking[i].plane_optimal_mask;
        }
    }

    vkd3d_array_reserve((void **)&list->dsv_resource_tracking, &list->dsv_resource_tracking_size,
            list->dsv_resource_tracking_count + 1, sizeof(*list->dsv_resource_tracking));
    list->dsv_resource_tracking[list->dsv_resource_tracking_count].resource = resource;
    list->dsv_resource_tracking[list->dsv_resource_tracking_count].plane_optimal_mask = plane_optimal_mask;
    list->dsv_resource_tracking_count++;
    return plane_optimal_mask;
}

static uint32_t d3d12_command_list_notify_dsv_writes(struct d3d12_command_list *list,
        struct d3d12_resource *resource, const struct vkd3d_view *view, uint32_t plane_write_mask)
{
    if (plane_write_mask & VKD3D_DEPTH_STENCIL_PLANE_GENERAL)
        return VKD3D_DEPTH_STENCIL_PLANE_GENERAL;

    assert(!(plane_write_mask & ~(VKD3D_DEPTH_PLANE_OPTIMAL | VKD3D_STENCIL_PLANE_OPTIMAL)));

    /* If we cover the entire resource, we can promote it to our target layout. */
    if (view->info.texture.layer_count == resource->desc.DepthOrArraySize &&
            resource->desc.MipLevels == 1)
    {
        return d3d12_command_list_promote_dsv_resource(list, resource, plane_write_mask);
    }
    else
    {
        d3d12_command_list_get_depth_stencil_resource_layout(list, resource, &plane_write_mask);
        return plane_write_mask;
    }
}

static void d3d12_command_list_notify_dsv_discard(struct d3d12_command_list *list,
        struct d3d12_resource *resource,
        uint32_t first_subresource, uint32_t subresource_count,
        uint32_t resource_subresource_count)
{
    if (subresource_count == resource_subresource_count)
    {
        d3d12_command_list_promote_dsv_resource(list, resource,
                VKD3D_DEPTH_PLANE_OPTIMAL | VKD3D_STENCIL_PLANE_OPTIMAL);
    }
    else if (resource->format->vk_aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))
    {
        /* Can we at least discard a plane fully? */

        if (first_subresource == 0 && subresource_count >= resource_subresource_count / 2)
        {
            if (resource->format->vk_aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT)
                d3d12_command_list_promote_dsv_resource(list, resource, VKD3D_DEPTH_PLANE_OPTIMAL);
            else if (resource->format->vk_aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT)
                d3d12_command_list_promote_dsv_resource(list, resource, VKD3D_STENCIL_PLANE_OPTIMAL);
        }
        else if (first_subresource <= resource_subresource_count / 2 &&
                first_subresource + subresource_count == resource_subresource_count)
        {
            if (resource->format->vk_aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT)
                d3d12_command_list_promote_dsv_resource(list, resource, VKD3D_STENCIL_PLANE_OPTIMAL);
        }
    }
}

/* Returns a mask of DSV aspects which should be considered to be fully transitioned for all subresources.
 * For these aspects, force layer/mip count to ALL. Only relevant for decay transitions.
 * We only promote when all subresources are transitioned as one, but if a single subresource enters read-only,
 * we decay the entire resource. */
static uint32_t d3d12_command_list_notify_dsv_state(struct d3d12_command_list *list,
        struct d3d12_resource *resource, D3D12_RESOURCE_STATES state, UINT subresource)
{
    /* Need to decide if we should promote or decay or promote DSV optimal state.
     * We can promote if we know for sure that all subresources are optimal.
     * If we observe any barrier which leaves this state, we must decay.
     *
     * Note: DEPTH_READ in isolation does not allow shaders to read a resource,
     * so we should keep it in OPTIMAL layouts. There is a certain risk of applications
     * screwing this up, but a workaround for that is to consider DEPTH_READ to be DEPTH_READ | RESOURCE
     * if applications prove to be buggy. */
    bool dsv_optimal = state == D3D12_RESOURCE_STATE_DEPTH_READ ||
            state == D3D12_RESOURCE_STATE_DEPTH_WRITE;
    uint32_t dsv_decay_mask = 0;

    if (!dsv_optimal)
    {
        dsv_decay_mask = d3d12_command_list_notify_decay_dsv_resource(list, resource);
    }
    else if (subresource == D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES)
    {
        d3d12_command_list_promote_dsv_resource(list, resource,
                VKD3D_DEPTH_PLANE_OPTIMAL | VKD3D_STENCIL_PLANE_OPTIMAL);
    }
    else if (resource->desc.MipLevels == 1 && resource->desc.DepthOrArraySize == 1)
    {
        /* For single mip/layer images (common case for depth-stencil),
         * a specific subresource can be handled correctly. */
        if (subresource == 0)
        {
            if (resource->format->vk_aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT)
                d3d12_command_list_promote_dsv_resource(list, resource, VKD3D_DEPTH_PLANE_OPTIMAL);
            else if (resource->format->vk_aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT)
                d3d12_command_list_promote_dsv_resource(list, resource, VKD3D_STENCIL_PLANE_OPTIMAL);
        }
        else if (subresource == 1)
        {
            if (resource->format->vk_aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))
                d3d12_command_list_promote_dsv_resource(list, resource, VKD3D_STENCIL_PLANE_OPTIMAL);
        }
    }

    return dsv_decay_mask;
}

static void d3d12_command_list_decay_optimal_dsv_resources(struct d3d12_command_list *list)
{
    struct d3d12_command_list_barrier_batch batch;
    size_t i, n;

    d3d12_command_list_barrier_batch_init(&batch);
    for (i = 0, n = list->dsv_resource_tracking_count; i < n; i++)
    {
        const struct d3d12_resource_tracking *track = &list->dsv_resource_tracking[i];
        d3d12_command_list_decay_optimal_dsv_resource(list, track->resource, track->plane_optimal_mask, &batch);
    }
    d3d12_command_list_barrier_batch_end(list, &batch);
    list->dsv_resource_tracking_count = 0;
}

static VkImageLayout d3d12_command_list_get_depth_stencil_resource_layout(const struct d3d12_command_list *list,
        const struct d3d12_resource *resource, uint32_t *plane_optimal_mask)
{
    size_t i, n;

    if (resource->desc.Flags & D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE)
    {
        if (plane_optimal_mask)
            *plane_optimal_mask = VKD3D_DEPTH_PLANE_OPTIMAL | VKD3D_STENCIL_PLANE_OPTIMAL;
        return VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
    }
    else if (resource->common_layout == VK_IMAGE_LAYOUT_GENERAL)
    {
        if (plane_optimal_mask)
            *plane_optimal_mask = VKD3D_DEPTH_STENCIL_PLANE_GENERAL;
        return VK_IMAGE_LAYOUT_GENERAL;
    }

    for (i = 0, n = list->dsv_resource_tracking_count; i < n; i++)
    {
        if (resource == list->dsv_resource_tracking[i].resource)
        {
            if (plane_optimal_mask)
                *plane_optimal_mask = list->dsv_resource_tracking[i].plane_optimal_mask;
            return dsv_plane_optimal_mask_to_layout(list->dsv_resource_tracking[i].plane_optimal_mask,
                    resource->format->vk_aspect_mask);
        }
    }

    if (plane_optimal_mask)
        *plane_optimal_mask = 0;
    return resource->common_layout;
}

static VkImageLayout vk_separate_depth_layout(VkImageLayout combined_layout)
{
    if (combined_layout == VK_IMAGE_LAYOUT_GENERAL)
    {
        return combined_layout;
    }
    else
    {
        return (combined_layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
                combined_layout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL) ?
                VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL;
    }
}

static VkImageLayout vk_separate_stencil_layout(VkImageLayout combined_layout)
{
    if (combined_layout == VK_IMAGE_LAYOUT_GENERAL)
    {
        return combined_layout;
    }
    else
    {
        return (combined_layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
                combined_layout == VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL) ?
                VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL;
    }
}

static bool d3d12_resource_may_alias_other_resources(struct d3d12_resource *resource)
{
    /* Treat a NULL resource as "all" resources. */
    if (!resource)
        return true;

    /* Cannot alias if the resource is allocated in a dedicated heap. */
    if (resource->flags & VKD3D_RESOURCE_ALLOCATION)
        return false;

    return true;
}

static void d3d12_command_list_clear_attachment_pass(struct d3d12_command_list *list, struct d3d12_resource *resource,
        struct vkd3d_view *view, VkImageAspectFlags clear_aspects, const VkClearValue *clear_value, UINT rect_count,
        const D3D12_RECT *rects, bool is_bound)
{
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
    VkRenderingAttachmentInfoKHR attachment_info, stencil_attachment_info;
    VkImageLayout initial_layouts[2], final_layouts[2];
    uint32_t plane_write_mask, image_barrier_count, i;
    VkImageMemoryBarrier image_barriers[2];
    VkRenderingInfoKHR rendering_info;
    bool requires_discard_barrier;
    VkPipelineStageFlags stages;
    bool separate_ds_layouts;
    VkAccessFlags access;
    bool clear_op;

    memset(initial_layouts, 0, sizeof(initial_layouts));
    memset(final_layouts, 0, sizeof(final_layouts));

    memset(&attachment_info, 0, sizeof(attachment_info));
    attachment_info.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO_KHR;
    attachment_info.imageView = view->vk_image_view;
    attachment_info.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
    attachment_info.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
    attachment_info.clearValue = *clear_value;

    stencil_attachment_info = attachment_info;

    memset(&rendering_info, 0, sizeof(rendering_info));
    rendering_info.sType = VK_STRUCTURE_TYPE_RENDERING_INFO_KHR;
    rendering_info.renderArea.offset.x = 0;
    rendering_info.renderArea.offset.y = 0;
    rendering_info.renderArea.extent.width = d3d12_resource_desc_get_width(&resource->desc, view->info.texture.miplevel_idx);
    rendering_info.renderArea.extent.height = d3d12_resource_desc_get_height(&resource->desc, view->info.texture.miplevel_idx);
    rendering_info.layerCount = view->info.texture.layer_count;

    if (view->format->vk_aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT)
    {
        rendering_info.colorAttachmentCount = 1;
        rendering_info.pColorAttachments = &attachment_info;
    }

    if (view->format->vk_aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT)
        rendering_info.pDepthAttachment = &attachment_info;
    if (view->format->vk_aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT)
        rendering_info.pStencilAttachment = &stencil_attachment_info;

    /* If we need to discard a single aspect, use separate layouts, since we have to use UNDEFINED barrier when we can. */
    separate_ds_layouts = view->format->vk_aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) &&
            clear_aspects != view->format->vk_aspect_mask;

    if (clear_aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))
    {
        initial_layouts[0] = is_bound ? list->dsv_layout : d3d12_command_list_get_depth_stencil_resource_layout(list, resource, NULL);

        if (separate_ds_layouts)
        {
            initial_layouts[1] = vk_separate_stencil_layout(initial_layouts[0]);
            initial_layouts[0] = vk_separate_depth_layout(initial_layouts[0]);
        }

        /* We have proven a write, try to promote the image layout to something OPTIMAL. */
        plane_write_mask = 0;
        if (clear_aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
            plane_write_mask |= VKD3D_DEPTH_PLANE_OPTIMAL;
        if (clear_aspects & VK_IMAGE_ASPECT_STENCIL_BIT)
            plane_write_mask |= VKD3D_STENCIL_PLANE_OPTIMAL;

        final_layouts[0] = dsv_plane_optimal_mask_to_layout(
                d3d12_command_list_notify_dsv_writes(list, resource, view, plane_write_mask),
                resource->format->vk_aspect_mask);

        if (separate_ds_layouts)
        {
            /* Do not transition aspects that we are not supposed to clear */
            final_layouts[1] = vk_separate_stencil_layout(final_layouts[0]);
            final_layouts[0] = vk_separate_depth_layout(final_layouts[0]);

            attachment_info.imageLayout = final_layouts[0];
            stencil_attachment_info.imageLayout = final_layouts[1];
        }
        else
        {
            attachment_info.imageLayout = final_layouts[0];
            stencil_attachment_info.imageLayout = final_layouts[0];
        }
    }
    else
    {
        attachment_info.imageLayout = d3d12_resource_pick_layout(resource, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
        initial_layouts[0] = attachment_info.imageLayout;
        final_layouts[0] = attachment_info.imageLayout;
    }

    if ((clear_op = !rect_count))
    {
        if (clear_aspects & (VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT))
            attachment_info.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;

        if (clear_aspects & (VK_IMAGE_ASPECT_STENCIL_BIT))
            stencil_attachment_info.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;

        /* Ignore 3D images as re-initializing those may cause us to
         * discard the entire image, not just the layers to clear.
         * Also, no need to perform extra transition barriers from UNDEFINED for committed resources.
         * The initial transition is handled by Clear*View().
         * Discarding with UNDEFINED is required to handle placed resources, however.
         * Also, if we're going to perform layout transitions anyways (for DSV),
         * might as well discard. */
        requires_discard_barrier = d3d12_resource_may_alias_other_resources(resource);
        if (separate_ds_layouts)
        {
            if (initial_layouts[0] != final_layouts[0] || initial_layouts[1] != final_layouts[1])
                requires_discard_barrier = true;
        }
        else if (initial_layouts[0] != final_layouts[0])
            requires_discard_barrier = true;

        if (resource->desc.Dimension != D3D12_RESOURCE_DIMENSION_TEXTURE3D && requires_discard_barrier)
        {
            if (separate_ds_layouts)
            {
                if (clear_aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
                    initial_layouts[0] = VK_IMAGE_LAYOUT_UNDEFINED;
                if (clear_aspects & VK_IMAGE_ASPECT_STENCIL_BIT)
                    initial_layouts[1] = VK_IMAGE_LAYOUT_UNDEFINED;
            }
            else
                initial_layouts[0] = VK_IMAGE_LAYOUT_UNDEFINED;
        }
    }

    if (clear_aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))
    {
        stages = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
        access = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;

        if (!clear_op || clear_aspects != view->format->vk_aspect_mask)
            access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
    }
    else
    {
        stages = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
        access = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;

        if (!clear_op)
            access |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT;
    }

    image_barrier_count = 0;

    for (i = 0; i < (separate_ds_layouts ? 2 : 1); i++)
    {
        if (initial_layouts[i] != final_layouts[i])
        {
            VkImageMemoryBarrier *barrier = &image_barriers[image_barrier_count++];

            memset(barrier, 0, sizeof(*barrier));
            barrier->sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
            barrier->srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
            barrier->dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
            barrier->image = resource->res.vk_image;
            barrier->srcAccessMask = clear_op ? 0 : access;
            barrier->dstAccessMask = access;
            barrier->oldLayout = initial_layouts[i];
            barrier->newLayout = final_layouts[i];
            barrier->subresourceRange.aspectMask = view->format->vk_aspect_mask;
            barrier->subresourceRange.baseMipLevel = view->info.texture.miplevel_idx;
            barrier->subresourceRange.levelCount = 1;
            barrier->subresourceRange.baseArrayLayer = view->info.texture.layer_idx;
            barrier->subresourceRange.layerCount = view->info.texture.layer_count;

            if (resource->desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D)
            {
                barrier->subresourceRange.baseArrayLayer = 0;
                barrier->subresourceRange.layerCount = 1;
            }

            if (separate_ds_layouts)
                barrier->subresourceRange.aspectMask = i ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT;
        }
    }

    if (image_barrier_count)
    {
        VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer,
            stages, stages, 0, 0, NULL, 0, NULL,
            image_barrier_count, image_barriers));
    }

    VK_CALL(vkCmdBeginRenderingKHR(list->vk_command_buffer, &rendering_info));

    if (!clear_op)
    {
        d3d12_command_list_clear_attachment_inline(list, resource, view, 0,
                clear_aspects, clear_value, rect_count, rects);
    }

    VK_CALL(vkCmdEndRenderingKHR(list->vk_command_buffer));
}

static VkPipelineStageFlags vk_queue_shader_stages(VkQueueFlags vk_queue_flags)
{
    VkPipelineStageFlags queue_shader_stages = 0;

    if (vk_queue_flags & VK_QUEUE_GRAPHICS_BIT)
    {
        queue_shader_stages |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
                VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT |
                VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT |
                VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT |
                VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
    }

    if (vk_queue_flags & VK_QUEUE_COMPUTE_BIT)
        queue_shader_stages |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;

    return queue_shader_stages;
}

static void d3d12_command_list_discard_attachment_barrier(struct d3d12_command_list *list,
        struct d3d12_resource *resource, const VkImageSubresourceRange *subresources, bool is_bound)
{
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
    VkImageMemoryBarrier barrier;
    VkPipelineStageFlags stages;
    VkAccessFlags access;
    VkImageLayout layout;

    /* Ignore read access bits since reads will be undefined anyway */
    if (resource->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)
    {
        stages = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
        access = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
        layout = d3d12_resource_pick_layout(resource, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
    }
    else if (resource->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)
    {
        stages = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
        access = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
        layout = is_bound && list->dsv_layout ?
                list->dsv_layout :
                d3d12_command_list_get_depth_stencil_resource_layout(list, resource, NULL);
    }
    else if (resource->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS)
    {
        stages = vk_queue_shader_stages(list->vk_queue_flags);
        access = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT;
        layout = VK_IMAGE_LAYOUT_GENERAL;
    }
    else
    {
        ERR("Unsupported resource flags %#x.\n", resource->desc.Flags);
        return;
    }

    /* With separate depth stencil layouts, we can only discard the aspect we care about. */

    barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
    barrier.pNext = NULL;
    barrier.srcAccessMask = access;
    barrier.dstAccessMask = access;
    barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
    barrier.newLayout = layout;
    barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
    barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
    barrier.image = resource->res.vk_image;
    barrier.subresourceRange = *subresources;

    VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer,
        stages, stages, 0, 0, NULL, 0, NULL, 1, &barrier));
}

enum vkd3d_render_pass_transition_mode
{
    VKD3D_RENDER_PASS_TRANSITION_MODE_BEGIN,
    VKD3D_RENDER_PASS_TRANSITION_MODE_END,
};

static bool d3d12_resource_requires_shader_visibility_after_transition(
        const struct d3d12_resource *resource,
        VkImageLayout old_layout, VkImageLayout new_layout)
{
    return !(resource->desc.Flags & D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE) &&
            old_layout != VK_IMAGE_LAYOUT_UNDEFINED &&
            old_layout != new_layout &&
            (new_layout == VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL ||
                    new_layout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL ||
                    new_layout == VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL);
}

static VkPipelineStageFlags vk_render_pass_barrier_from_view(struct d3d12_command_list *list,
        const struct vkd3d_view *view, const struct d3d12_resource *resource,
        enum vkd3d_render_pass_transition_mode mode, VkImageLayout layout, VkImageMemoryBarrier *vk_barrier)
{
    VkImageLayout outside_render_pass_layout;
    VkPipelineStageFlags stages;
    VkAccessFlags access;

    if (view->format->vk_aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT)
    {
        stages = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
        access = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT;
        outside_render_pass_layout = d3d12_resource_pick_layout(resource, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
    }
    else
    {
        stages = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
        access = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
        outside_render_pass_layout = d3d12_command_list_get_depth_stencil_resource_layout(list, resource, NULL);
    }

    vk_barrier->sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
    vk_barrier->pNext = NULL;

    if (mode == VKD3D_RENDER_PASS_TRANSITION_MODE_BEGIN)
    {
        vk_barrier->srcAccessMask = 0;
        vk_barrier->dstAccessMask = access;
        vk_barrier->oldLayout = outside_render_pass_layout;
        vk_barrier->newLayout = layout;

        /* If we're transitioning into depth state and we could potentially read
         * (we cannot know this here),
         * shader might want to read from it as well, so we have to make that visible here
         * if we're performing a layout transition, which nukes any existing visibility. */
        if (d3d12_resource_requires_shader_visibility_after_transition(resource,
                vk_barrier->oldLayout, vk_barrier->newLayout))
        {
            vk_barrier->dstAccessMask |= VK_ACCESS_SHADER_READ_BIT;
            stages = VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT;
        }
    }
    else /* if (mode == VKD3D_RENDER_PASS_TRANSITION_MODE_END) */
    {
        vk_barrier->srcAccessMask = access;
        vk_barrier->oldLayout = layout;
        vk_barrier->newLayout = outside_render_pass_layout;

        /* Dst access mask is generally 0 here since we are transitioning into an image layout
         * which only serves as a stepping stone for other layout transitions. When we use the image,
         * we are supposed to transition into another layout, and thus it is meaningless to make memory visible here.
         * The exception is depth attachments, which can be used right away without an internal transition barrier.
         * A case here is if the resource state is DEPTH_READ | RESOURCE. When we enter the enter pass,
         * we transition it into the appropriate DS state. When we leave, we would use DS_READ_ONLY_OPTIMAL,
         * which can be sampled from and used as a read-only depth attachment without any extra barrier.
         * Thus, we have to complete that barrier here. */
        vk_barrier->dstAccessMask = 0;
        if (vk_barrier->oldLayout != vk_barrier->newLayout)
        {
            if (vk_barrier->newLayout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL)
            {
                vk_barrier->dstAccessMask =
                        VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_SHADER_READ_BIT;
                /* We don't know if we have DEPTH_READ | NON_PIXEL_RESOURCE or DEPTH_READ | PIXEL_RESOURCE. */
                stages = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
            }
            else if (vk_barrier->newLayout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL)
            {
                vk_barrier->dstAccessMask =
                        VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
            }
        }
    }

    /* The common case for color attachments is that this is a no-op.
     * An exception here is color attachment with SIMULTANEOUS use, where we need to decay to COMMON state.
     * Implicit decay or promotion does *not* happen for normal render targets, so we can rely on resource states.
     * For read-only depth or read-write depth for non-resource DSVs, this is also a no-op. */
    if (vk_barrier->oldLayout == vk_barrier->newLayout)
        return 0;

    vk_barrier->srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
    vk_barrier->dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
    vk_barrier->image = resource->res.vk_image;
    vk_barrier->subresourceRange = vk_subresource_range_from_view(view);
    return stages;
}

static void d3d12_command_list_emit_render_pass_transition(struct d3d12_command_list *list,
        enum vkd3d_render_pass_transition_mode mode)
{
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
    VkImageMemoryBarrier vk_image_barriers[D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT + 2];
    VkPipelineStageFlags stage_mask = 0;
    VkPipelineStageFlags new_stages;
    struct d3d12_rtv_desc *dsv;
    uint32_t i, j;

    for (i = 0, j = 0; i < D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT; i++)
    {
        struct d3d12_rtv_desc *rtv = &list->rtvs[i];

        if (!rtv->view)
            continue;

        if ((new_stages = vk_render_pass_barrier_from_view(list, rtv->view, rtv->resource,
                mode, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, &vk_image_barriers[j])))
        {
            stage_mask |= new_stages;
            j++;
        }
    }

    dsv = &list->dsv;

    /* The dsv_layout is updated in d3d12_command_list_begin_render_pass(). */

    if (dsv->view && list->dsv_layout)
    {
        if ((new_stages = vk_render_pass_barrier_from_view(list, dsv->view, dsv->resource,
                mode, list->dsv_layout, &vk_image_barriers[j])))
        {
            stage_mask |= new_stages;
            j++;
        }

        /* We know for sure we will write something to these attachments now, so try to promote. */
        if (mode == VKD3D_RENDER_PASS_TRANSITION_MODE_BEGIN)
            d3d12_command_list_notify_dsv_writes(list, dsv->resource, dsv->view, list->dsv_plane_optimal_mask);
    }

    /* Need to deduce DSV layouts again before we start a new render pass. */
    if (mode == VKD3D_RENDER_PASS_TRANSITION_MODE_END)
        list->dsv_layout = VK_IMAGE_LAYOUT_UNDEFINED;

    /* Ignore VRS targets. They have to be in the appropriate resource state here. */

    if (!j)
        return;

    VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer,
        stage_mask, stage_mask, 0, 0, NULL, 0, NULL,
        j, vk_image_barriers));
}

static inline bool d3d12_query_type_is_indexed(D3D12_QUERY_TYPE type)
{
    return type >= D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0 &&
            type <= D3D12_QUERY_TYPE_SO_STATISTICS_STREAM3;
}

static VkQueryControlFlags d3d12_query_type_get_vk_flags(D3D12_QUERY_TYPE type)
{
    return type == D3D12_QUERY_TYPE_OCCLUSION
            ? VK_QUERY_CONTROL_PRECISE_BIT : 0;
}

static bool d3d12_command_list_add_pending_query(struct d3d12_command_list *list,
        const struct vkd3d_active_query *query)
{
    if (!vkd3d_array_reserve((void **)&list->pending_queries, &list->pending_queries_size,
            list->pending_queries_count + 1, sizeof(*list->pending_queries)))
    {
        ERR("Failed to add pending query.\n");
        return false;
    }

    list->pending_queries[list->pending_queries_count++] = *query;
    return true;
}

static void d3d12_command_list_begin_active_query(struct d3d12_command_list *list,
        struct vkd3d_active_query *query)
{
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
    VkQueryControlFlags flags = d3d12_query_type_get_vk_flags(query->type);

    assert(query->state == VKD3D_ACTIVE_QUERY_RESET);

    if (d3d12_query_type_is_indexed(query->type))
    {
        unsigned int stream = query->type - D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0;
        VK_CALL(vkCmdBeginQueryIndexedEXT(list->vk_command_buffer, query->vk_pool, query->vk_index, flags, stream));
    }
    else
        VK_CALL(vkCmdBeginQuery(list->vk_command_buffer, query->vk_pool, query->vk_index, flags));

    query->state = VKD3D_ACTIVE_QUERY_BEGUN;
}

static void d3d12_command_list_end_active_query(struct d3d12_command_list *list,
        struct vkd3d_active_query *query)
{
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
    assert(query->state == VKD3D_ACTIVE_QUERY_BEGUN);

    if (d3d12_query_type_is_indexed(query->type))
    {
        unsigned int stream = query->type - D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0;
        VK_CALL(vkCmdEndQueryIndexedEXT(list->vk_command_buffer, query->vk_pool, query->vk_index, stream));
    }
    else
        VK_CALL(vkCmdEndQuery(list->vk_command_buffer, query->vk_pool, query->vk_index));

    query->state = VKD3D_ACTIVE_QUERY_ENDED;
}

static void d3d12_command_list_reset_active_query(struct d3d12_command_list *list,
        struct vkd3d_active_query *query)
{
    if (!d3d12_command_list_add_pending_query(list, query))
        return;

    if (!d3d12_command_allocator_allocate_query_from_heap_type(list->allocator,
            query->heap->desc.Type, &query->vk_pool, &query->vk_index))
        return;

    if (!d3d12_command_list_reset_query(list, query->vk_pool, query->vk_index))
        return;

    query->state = VKD3D_ACTIVE_QUERY_RESET;
}

static bool d3d12_command_list_enable_query(struct d3d12_command_list *list,
        struct d3d12_query_heap *heap, uint32_t index, D3D12_QUERY_TYPE type)
{
    struct vkd3d_active_query *query;

    if (!vkd3d_array_reserve((void **)&list->active_queries, &list->active_queries_size,
            list->active_queries_count + 1, sizeof(*list->active_queries)))
    {
        ERR("Failed to add query.\n");
        return false;
    }

    query = &list->active_queries[list->active_queries_count++];
    query->heap = heap;
    query->index = index;
    query->type = type;
    query->state = VKD3D_ACTIVE_QUERY_RESET;
    query->resolve_index = 0;

    if (!d3d12_command_allocator_allocate_query_from_heap_type(list->allocator,
            heap->desc.Type, &query->vk_pool, &query->vk_index))
        return false;

    return d3d12_command_list_reset_query(list, query->vk_pool, query->vk_index);
}

static bool d3d12_command_list_disable_query(struct d3d12_command_list *list,
        struct d3d12_query_heap *heap, uint32_t index)
{
    unsigned int i;

    for (i = 0; i < list->active_queries_count; i++)
    {
        struct vkd3d_active_query *query = &list->active_queries[i];

        if (query->heap == heap && query->index == index)
        {
            if (!d3d12_command_list_add_pending_query(list, query))
                return false;

            if (query->state == VKD3D_ACTIVE_QUERY_RESET)
                d3d12_command_list_begin_active_query(list, query);
            if (query->state == VKD3D_ACTIVE_QUERY_BEGUN)
                d3d12_command_list_end_active_query(list, query);

            *query = list->active_queries[--list->active_queries_count];
            return true;
        }
    }

    WARN("Query (%p, %u) not active.\n", heap, index);
    return true;
}

static void d3d12_command_list_handle_active_queries(struct d3d12_command_list *list, bool end)
{
    unsigned int i;

    for (i = 0; i < list->active_queries_count; i++)
    {
        struct vkd3d_active_query *query = &list->active_queries[i];

        if (query->state == VKD3D_ACTIVE_QUERY_ENDED && !end)
            d3d12_command_list_reset_active_query(list, query);

        if (query->state == VKD3D_ACTIVE_QUERY_RESET)
            d3d12_command_list_begin_active_query(list, query);
        if (query->state == VKD3D_ACTIVE_QUERY_BEGUN && end)
            d3d12_command_list_end_active_query(list, query);
    }
}

int vkd3d_compare_pending_query(const void* query_a, const void* query_b)
{
    const struct vkd3d_active_query *a = query_a;
    const struct vkd3d_active_query *b = query_b;

    // Sort by D3D12 heap since we need to do one compute dispatch per buffer
    if (a->heap < b->heap) return -1;
    if (a->heap > b->heap) return 1;

    // Sort by Vulkan query pool and index to batch query resolves
    if (a->vk_pool > b->vk_pool) return -1;
    if (a->vk_pool < b->vk_pool) return 1;

    return (int)(a->vk_index - b->vk_index);
}

static size_t get_query_heap_stride(D3D12_QUERY_HEAP_TYPE heap_type)
{
    if (heap_type == D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS)
        return sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS);

    if (heap_type == D3D12_QUERY_HEAP_TYPE_SO_STATISTICS)
        return sizeof(D3D12_QUERY_DATA_SO_STATISTICS);

    return sizeof(uint64_t);
}

static void d3d12_command_list_invalidate_root_parameters(struct d3d12_command_list *list,
        VkPipelineBindPoint bind_point, bool invalidate_descriptor_heaps);

static bool d3d12_command_list_gather_pending_queries(struct d3d12_command_list *list)
{
    /* TODO allocate arrays from command allocator in case
     * games hit this path multiple times per frame */
    VkDeviceSize resolve_buffer_size, resolve_buffer_stride, ssbo_alignment, entry_buffer_size;
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
    struct vkd3d_scratch_allocation resolve_buffer, entry_buffer;
    VkDescriptorBufferInfo dst_buffer, src_buffer, map_buffer;
    struct vkd3d_query_gather_info gather_pipeline;
    const struct vkd3d_active_query *src_queries;
    unsigned int i, j, k, workgroup_count;
    uint32_t resolve_index, entry_offset;
    struct vkd3d_query_gather_args args;
    VkWriteDescriptorSet vk_writes[3];
    VkMemoryBarrier vk_barrier;
    VkDescriptorSet vk_set;
    bool result = false;

    struct dispatch_entry
    {
        struct d3d12_query_heap *heap;
        uint32_t virtual_query_count;
        uint32_t unique_query_count;
        uint32_t min_index;
        uint32_t max_index;
        VkDeviceSize resolve_buffer_offset;
        VkDeviceSize resolve_buffer_size;
    };
    
    struct dispatch_entry *dispatches = NULL;
    size_t dispatch_size = 0;
    size_t dispatch_count = 0;

    struct resolve_entry
    {
        VkQueryPool query_pool;
        uint32_t first_query;
        uint32_t query_count;
        VkDeviceSize offset;
        VkDeviceSize stride;
    };
    
    struct resolve_entry *resolves = NULL;
    size_t resolve_size = 0;
    size_t resolve_count = 0;

    struct query_entry
    {
        uint32_t dst_index;
        uint32_t src_index;
        uint32_t next;
    };

    struct query_map
    {
        struct query_entry *entry;
        unsigned int dispatch_id;
    };
    
    struct query_entry *dst_queries = NULL;
    struct query_entry *query_list = NULL;
    struct query_map *query_map = NULL;
    size_t query_map_size = 0;

    if (!list->pending_queries_count)
        return true;

    /* Sort pending query list so that we can batch commands */
    qsort(list->pending_queries, list->pending_queries_count,
            sizeof(*list->pending_queries), &vkd3d_compare_pending_query);

    ssbo_alignment = d3d12_device_get_ssbo_alignment(list->device);
    resolve_buffer_size = 0;
    resolve_buffer_stride = 0;
    resolve_index = 0;

    for (i = 0; i < list->pending_queries_count; i++)
    {
        struct dispatch_entry *d = dispatches ? &dispatches[dispatch_count - 1] : NULL;
        struct resolve_entry *r = resolves ? &resolves[resolve_count - 1] : NULL;
        struct vkd3d_active_query *q = &list->pending_queries[i];

        /* Prepare one compute dispatch per D3D12 query heap */
        if (!d || d->heap != q->heap)
        {
            if (!vkd3d_array_reserve((void **)&dispatches, &dispatch_size, dispatch_count + 1, sizeof(*dispatches)))
            {
                ERR("Failed to allocate dispatch list.\n");
                goto cleanup;
            }

            /* Force new resolve entry as well so that binding the scratch buffer
             * doesn't get overly complicated when we need to deal with potential
             * SSBO alignment issues on some hardware. */
            resolve_buffer_stride = get_query_heap_stride(q->heap->desc.Type);
            resolve_buffer_size = align(resolve_buffer_size, ssbo_alignment);
            resolve_index = 0;

            d = &dispatches[dispatch_count++];
            d->min_index = q->index;
            d->max_index = q->index;
            d->heap = q->heap;
            d->virtual_query_count = 1;
            d->unique_query_count = 0;
            d->resolve_buffer_offset = resolve_buffer_size;

            r = NULL;
        }
        else
        {
            d->virtual_query_count++;
            d->min_index = min(d->min_index, q->index);
            d->max_index = max(d->max_index, q->index);
        }

        /* Prepare one resolve entry per Vulkan query range */
        if (!r || r->query_pool != q->vk_pool || r->first_query + r->query_count != q->vk_index)
        {
            if (!vkd3d_array_reserve((void **)&resolves, &resolve_size, resolve_count + 1, sizeof(*resolves)))
            {
                ERR("Failed to allocate resolve list.\n");
                goto cleanup;
            }

            r = &resolves[resolve_count++];
            r->query_pool = q->vk_pool;
            r->first_query = q->vk_index;
            r->query_count = 1;
            r->offset = resolve_buffer_size;
            r->stride = get_query_heap_stride(q->heap->desc.Type);
        }
        else
            r->query_count++;

        resolve_buffer_size += resolve_buffer_stride;

        d->resolve_buffer_size = resolve_buffer_size - d->resolve_buffer_offset;
        q->resolve_index = resolve_index++;
    }

    /* Allocate scratch buffer and resolve virtual Vulkan queries into it */
    if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator,
            VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE,
            resolve_buffer_size, max(ssbo_alignment, sizeof(uint64_t)), ~0u, &resolve_buffer))
        goto cleanup;

    for (i = 0; i < resolve_count; i++)
    {
        const struct resolve_entry *r = &resolves[i];

        VK_CALL(vkCmdCopyQueryPoolResults(list->vk_command_buffer,
            r->query_pool, r->first_query, r->query_count,
            resolve_buffer.buffer, resolve_buffer.offset + r->offset,
            r->stride, VK_QUERY_RESULT_WAIT_BIT | VK_QUERY_RESULT_64_BIT));
    }

    /* Allocate scratch buffer for query lists */
    entry_buffer_size = sizeof(struct query_entry) * list->pending_queries_count;

    if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator,
            VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE,
            entry_buffer_size, ssbo_alignment, ~0u, &entry_buffer))
        goto cleanup;

    for (i = 0; i < dispatch_count; i++)
    {
        const struct dispatch_entry *d = &dispatches[i];
        query_map_size = max(query_map_size, d->max_index - d->min_index + 1);
    }

    if (!(query_map = vkd3d_calloc(query_map_size, sizeof(*query_map))) ||
            !(query_list = vkd3d_malloc(sizeof(*query_list) * list->pending_queries_count)))
    {
        ERR("Failed to allocate query map.\n");
        goto cleanup;
    }

    /* Active list for the current dispatch */
    src_queries = list->pending_queries;
    dst_queries = query_list;

    for (i = 0; i < dispatch_count; i++)
    {
        struct dispatch_entry *d = &dispatches[i];
        unsigned int dispatch_id = i + 1;

        /* First pass that counts unique queries since the compute
         * shader expects list heads to be packed first in the array */
        for (j = 0; j < d->virtual_query_count; j++)
        {
            const struct vkd3d_active_query *q = &src_queries[j];
            struct query_map *e = &query_map[q->index - d->min_index];

            if (e->dispatch_id != dispatch_id)
            {
                e->entry = &dst_queries[d->unique_query_count++];
                e->entry->dst_index = q->index;
                e->entry->src_index = q->resolve_index;
                e->entry->next = ~0u;
                e->dispatch_id = dispatch_id;
            }
        }

        /* Second pass that actually generates the query list. */
        for (j = 0, k = d->unique_query_count; j < d->virtual_query_count; j++)
        {
            const struct vkd3d_active_query *q = &src_queries[j];
            struct query_map *e = &query_map[q->index - d->min_index];

            /* Skip entries that we already added in the first pass */
            if (e->entry->src_index == q->resolve_index)
                continue;

            e->entry->next = k;
            e->entry = &dst_queries[k++];
            e->entry->dst_index = q->index;
            e->entry->src_index = q->resolve_index;
            e->entry->next = ~0u;
        }

        src_queries += d->virtual_query_count;
        dst_queries += d->virtual_query_count;
    }

    /* Upload query lists in chunks since vkCmdUpdateBuffer is limited to
     * 64kiB per invocation. Normally, one single iteration should suffice. */
    for (i = 0; i < list->pending_queries_count; i += 2048)
    {
        unsigned int count = min(2048, list->pending_queries_count - i);

        VK_CALL(vkCmdUpdateBuffer(list->vk_command_buffer, entry_buffer.buffer,
                sizeof(struct query_entry) * i + entry_buffer.offset,
                sizeof(struct query_entry) * count, &query_list[i]));
    }

    vk_barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
    vk_barrier.pNext = NULL;
    vk_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
    vk_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;

    VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer,
            VK_PIPELINE_STAGE_TRANSFER_BIT,
            VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
            0, 1, &vk_barrier, 0, NULL, 0, NULL));

    /* Gather virtual query results and store
     * them in the query heap's buffer */
    entry_offset = 0;

    for (i = 0; i < ARRAY_SIZE(vk_writes); i++)
    {
        vk_writes[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
        vk_writes[i].pNext = NULL;
        vk_writes[i].dstBinding = i;
        vk_writes[i].dstArrayElement = 0;
        vk_writes[i].descriptorCount = 1;
        vk_writes[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
        vk_writes[i].pImageInfo = NULL;
        vk_writes[i].pTexelBufferView = NULL;
    }

    vk_writes[0].pBufferInfo = &dst_buffer;
    vk_writes[1].pBufferInfo = &src_buffer;
    vk_writes[2].pBufferInfo = &map_buffer;

    for (i = 0; i < dispatch_count; i++)
    {
        const struct dispatch_entry *d = &dispatches[i];

        if (!(vkd3d_meta_get_query_gather_pipeline(&list->device->meta_ops,
                d->heap->desc.Type, &gather_pipeline)))
            goto cleanup;

        VK_CALL(vkCmdBindPipeline(list->vk_command_buffer,
                VK_PIPELINE_BIND_POINT_COMPUTE, gather_pipeline.vk_pipeline));

        vk_set = d3d12_command_allocator_allocate_descriptor_set(list->allocator,
                gather_pipeline.vk_set_layout, VKD3D_DESCRIPTOR_POOL_TYPE_STATIC);

        dst_buffer.buffer = d->heap->vk_buffer;
        dst_buffer.offset = 0;
        dst_buffer.range = VK_WHOLE_SIZE;

        src_buffer.buffer = resolve_buffer.buffer;
        src_buffer.offset = resolve_buffer.offset + d->resolve_buffer_offset;
        src_buffer.range = d->resolve_buffer_size;

        map_buffer.buffer = entry_buffer.buffer;
        map_buffer.offset = entry_buffer.offset;
        map_buffer.range = entry_buffer_size;

        for (j = 0; j < ARRAY_SIZE(vk_writes); j++)
            vk_writes[j].dstSet = vk_set;

        VK_CALL(vkUpdateDescriptorSets(list->device->vk_device,
                ARRAY_SIZE(vk_writes), vk_writes, 0, NULL));

        VK_CALL(vkCmdBindDescriptorSets(list->vk_command_buffer,
                VK_PIPELINE_BIND_POINT_COMPUTE, gather_pipeline.vk_pipeline_layout,
                0, 1, &vk_set, 0, NULL));

        args.query_count = d->unique_query_count;
        args.entry_offset = entry_offset;
        entry_offset += d->virtual_query_count;

        VK_CALL(vkCmdPushConstants(list->vk_command_buffer,
                gather_pipeline.vk_pipeline_layout,
                VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args), &args));

        workgroup_count = vkd3d_compute_workgroup_count(d->unique_query_count, VKD3D_QUERY_OP_WORKGROUP_SIZE);
        VK_CALL(vkCmdDispatch(list->vk_command_buffer, workgroup_count, 1, 1));
    }

    vk_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
    vk_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT;

    VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer,
            VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
            VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT,
            0, 1, &vk_barrier, 0, NULL, 0, NULL));

    list->pending_queries_count = 0;
    result = true;

    d3d12_command_list_invalidate_current_pipeline(list, true);
    d3d12_command_list_invalidate_root_parameters(list, VK_PIPELINE_BIND_POINT_COMPUTE, true);

    VKD3D_BREADCRUMB_COMMAND(GATHER_VIRTUAL_QUERY);

cleanup:
    vkd3d_free(resolves);
    vkd3d_free(dispatches);
    vkd3d_free(query_list);
    vkd3d_free(query_map);
    return result;
}

static void d3d12_command_list_end_current_render_pass(struct d3d12_command_list *list, bool suspend)
{
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;

    d3d12_command_list_handle_active_queries(list, true);

    if (list->xfb_enabled)
    {
        VK_CALL(vkCmdEndTransformFeedbackEXT(list->vk_command_buffer, 0, ARRAY_SIZE(list->so_counter_buffers),
                list->so_counter_buffers, list->so_counter_buffer_offsets));
    }

    if (list->rendering_info.state_flags & VKD3D_RENDERING_ACTIVE)
        VK_CALL(vkCmdEndRenderingKHR(list->vk_command_buffer));

    /* Don't emit barriers for temporary suspension of the render pass */
    if (!suspend && (list->rendering_info.state_flags & (VKD3D_RENDERING_ACTIVE | VKD3D_RENDERING_SUSPENDED)))
        d3d12_command_list_emit_render_pass_transition(list, VKD3D_RENDER_PASS_TRANSITION_MODE_END);

    if (suspend && (list->rendering_info.state_flags & (VKD3D_RENDERING_ACTIVE)))
        list->rendering_info.state_flags |= VKD3D_RENDERING_SUSPENDED;
    else if (!suspend)
        list->rendering_info.state_flags &= ~VKD3D_RENDERING_SUSPENDED;

    list->rendering_info.state_flags &= ~VKD3D_RENDERING_ACTIVE;

    if (list->xfb_enabled)
    {
        VkMemoryBarrier vk_barrier;

        /* We need a barrier between pause and resume. */
        vk_barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
        vk_barrier.pNext = NULL;
        vk_barrier.srcAccessMask = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT;
        vk_barrier.dstAccessMask = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT;
        VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer,
                VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, 0,
                1, &vk_barrier, 0, NULL, 0, NULL));

        list->xfb_enabled = false;
    }
}

static void d3d12_command_list_invalidate_push_constants(struct vkd3d_pipeline_bindings *bindings)
{
    if (bindings->root_signature->descriptor_table_count)
        bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_DESCRIPTOR_TABLE_OFFSETS;

    bindings->root_descriptor_dirty_mask =
            bindings->root_signature->root_descriptor_raw_va_mask |
            bindings->root_signature->root_descriptor_push_mask;
    bindings->root_constant_dirty_mask = bindings->root_signature->root_constant_mask;
}

static void d3d12_command_list_invalidate_root_parameters(struct d3d12_command_list *list,
        VkPipelineBindPoint bind_point, bool invalidate_descriptor_heaps)
{
    struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point];

    if (!bindings->root_signature)
        return;

    /* Previously dirty states may no longer be dirty
     * if the new root signature does not use them */
    bindings->dirty_flags = 0;

    if (bindings->static_sampler_set)
        bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_STATIC_SAMPLER_SET;
    if (bindings->root_signature->hoist_info.num_desc)
        bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_HOISTED_DESCRIPTORS;

    d3d12_command_list_invalidate_push_constants(bindings);

    if (invalidate_descriptor_heaps)
    {
        struct d3d12_device *device = bindings->root_signature->device;
        bindings->descriptor_heap_dirty_mask = (1ull << device->bindless_state.set_count) - 1;
    }
}

static void vk_access_and_stage_flags_from_d3d12_resource_state(const struct d3d12_command_list *list,
        const struct d3d12_resource *resource, uint32_t state_mask, VkQueueFlags vk_queue_flags,
        VkPipelineStageFlags *stages, VkAccessFlags *access)
{
    struct d3d12_device *device = list->device;
    VkPipelineStageFlags queue_shader_stages;
    uint32_t unhandled_state = 0;

    queue_shader_stages = vk_queue_shader_stages(vk_queue_flags);

    if (state_mask == D3D12_RESOURCE_STATE_COMMON)
    {
        *stages |= VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
        *access |= VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
    }

    while (state_mask)
    {
        uint32_t state = state_mask & -state_mask;

        switch (state)
        {
            case D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER:
                *stages |= queue_shader_stages;
                *access |= VK_ACCESS_UNIFORM_READ_BIT;

                if (device->bindless_state.flags & (VKD3D_BINDLESS_CBV_AS_SSBO | VKD3D_RAW_VA_ROOT_DESCRIPTOR_CBV))
                    *access |= VK_ACCESS_SHADER_READ_BIT;

                if (vk_queue_flags & VK_QUEUE_GRAPHICS_BIT)
                {
                    *stages |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
                    *access |= VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
                }
                break;

            case D3D12_RESOURCE_STATE_INDEX_BUFFER:
                *stages |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
                *access |= VK_ACCESS_INDEX_READ_BIT;
                break;

            case D3D12_RESOURCE_STATE_RENDER_TARGET:
                *stages |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
                /* If the corresponding image layout is COLOR_ATTACHMENT_OPTIMAL, we won't get automatic barriers,
                 * so add access masks as appropriate. */
                if (d3d12_resource_pick_layout(resource, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) ==
                        VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL)
                {
                    *access |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT;
                }
                break;

            case D3D12_RESOURCE_STATE_UNORDERED_ACCESS:
                *stages |= queue_shader_stages;
                *access |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
                if ((vk_queue_flags & VK_QUEUE_COMPUTE_BIT) &&
                        d3d12_device_supports_ray_tracing_tier_1_0(device))
                {
                    /* UNORDERED_ACCESS state is also used for scratch buffers.
                     * Acceleration structures cannot transition their state,
                     * and must use UAV barriers. This is still relevant for scratch buffers however. */
                    *stages |= VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR;
                    *access |= VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR |
                            VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR;
                }
                break;

            case D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE:
                if ((vk_queue_flags & VK_QUEUE_COMPUTE_BIT) &&
                        d3d12_device_supports_ray_tracing_tier_1_0(device))
                {
                    *stages |= VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR |
                            VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR;
                    *access |= VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR |
                            VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
                }
                break;

            case D3D12_RESOURCE_STATE_DEPTH_WRITE:
                /* If our DS layout is attachment optimal in any way, we might not perform implicit
                 * memory barriers as part of a render pass. */
                if (d3d12_command_list_get_depth_stencil_resource_layout(list, resource, NULL) !=
                        VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL)
                {
                    *access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
                }
                /* fallthrough */
            case D3D12_RESOURCE_STATE_DEPTH_READ:
                *stages |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
                *access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
                break;

            case D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE:
                *stages |= queue_shader_stages & ~VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
                *access |= VK_ACCESS_SHADER_READ_BIT;
                if ((vk_queue_flags & VK_QUEUE_COMPUTE_BIT) &&
                        d3d12_device_supports_ray_tracing_tier_1_0(device))
                {
                    /* Vertex / index / transform buffer inputs are NON_PIXEL_SHADER_RESOURCES in DXR.
                     * They access SHADER_READ_BIT in Vulkan, so just need to add the stage. */
                    *stages |= VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR;
                }
                break;

            case D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE:
                *stages |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
                *access |= VK_ACCESS_SHADER_READ_BIT;
                break;

            case D3D12_RESOURCE_STATE_STREAM_OUT:
                *stages |= VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT;
                *access |= VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT | VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT |
                        VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT;
                break;

            case D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT:
                *stages |= VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT;
                *access |= VK_ACCESS_INDIRECT_COMMAND_READ_BIT;

                /* D3D12_RESOURCE_STATE_PREDICATION */
                if (device->device_info.buffer_device_address_features.bufferDeviceAddress)
                {
                    *stages |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
                    *access |= VK_ACCESS_SHADER_READ_BIT;
                }
                else
                {
                    *stages |= VK_PIPELINE_STAGE_TRANSFER_BIT;
                    *access |= VK_ACCESS_TRANSFER_READ_BIT;
                }
                break;

            case D3D12_RESOURCE_STATE_COPY_DEST:
                *stages |= VK_PIPELINE_STAGE_TRANSFER_BIT;
                if (d3d12_resource_is_buffer(resource))
                    *access |= VK_ACCESS_TRANSFER_WRITE_BIT;
                break;

            case D3D12_RESOURCE_STATE_COPY_SOURCE:
                *stages |= VK_PIPELINE_STAGE_TRANSFER_BIT;
                if (d3d12_resource_is_buffer(resource))
                    *access |= VK_ACCESS_TRANSFER_READ_BIT;
                break;

            case D3D12_RESOURCE_STATE_RESOLVE_DEST:
            case D3D12_RESOURCE_STATE_RESOLVE_SOURCE:
                *stages |= VK_PIPELINE_STAGE_TRANSFER_BIT;
                break;

            case D3D12_RESOURCE_STATE_SHADING_RATE_SOURCE:
                *stages |= VK_PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR;
                *access |= VK_ACCESS_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT_KHR;
                break;

            default:
                unhandled_state |= state;
        }

        state_mask &= ~state;
    }

    if (unhandled_state)
        FIXME("Unhandled resource state %#x.\n", unhandled_state);
}

static void d3d12_command_list_add_transition(struct d3d12_command_list *list, struct vkd3d_initial_transition *transition)
{
    bool skip;
    size_t i;

    /* Search in reverse as we're more likely to use same resource again. */
    for (i = list->init_transitions_count; i; i--)
    {
        if (list->init_transitions[i - 1].type != transition->type)
            continue;

        switch (transition->type)
        {
            case VKD3D_INITIAL_TRANSITION_TYPE_RESOURCE:
                skip = list->init_transitions[i - 1].resource.resource == transition->resource.resource;
                break;

            case VKD3D_INITIAL_TRANSITION_TYPE_QUERY_HEAP:
                skip = list->init_transitions[i - 1].query_heap == transition->query_heap;
                break;

            default:
                ERR("Unhandled transition type %u.\n", transition->type);
                continue;
        }

        if (skip)
            return;
    }

    if (!vkd3d_array_reserve((void**)&list->init_transitions, &list->init_transitions_size,
            list->init_transitions_count + 1, sizeof(*list->init_transitions)))
    {
        ERR("Failed to allocate memory.\n");
        return;
    }

    switch (transition->type)
    {
        case VKD3D_INITIAL_TRANSITION_TYPE_RESOURCE:
            TRACE("Adding initial resource transition for resource %p (%s).\n",
                  transition->resource.resource, transition->resource.perform_initial_transition ? "yes" : "no");
            break;

        case VKD3D_INITIAL_TRANSITION_TYPE_QUERY_HEAP:
            TRACE("Adding initialization for query heap %p.\n", transition->query_heap);
            break;

        default:
            ERR("Unhandled transition type %u.\n", transition->type);
    }

    list->init_transitions[list->init_transitions_count++] = *transition;
}

static void d3d12_command_list_track_resource_usage(struct d3d12_command_list *list,
        struct d3d12_resource *resource, bool perform_initial_transition)
{
    struct vkd3d_initial_transition transition;

    /* When a command queue has confirmed that it has received a command list for submission, this flag will eventually
     * be cleared. The command queue will only perform the transition once.
     * Until that point, we must keep submitting initial transitions like this. */
    if (vkd3d_atomic_uint32_load_explicit(&resource->initial_layout_transition, vkd3d_memory_order_relaxed))
    {
        transition.type = VKD3D_INITIAL_TRANSITION_TYPE_RESOURCE;
        transition.resource.resource = resource;
        transition.resource.perform_initial_transition = perform_initial_transition;
        d3d12_command_list_add_transition(list, &transition);
    }
}

static void d3d12_command_list_track_query_heap(struct d3d12_command_list *list,
        struct d3d12_query_heap *heap)
{
    struct vkd3d_initial_transition transition;

    if (!vkd3d_atomic_uint32_load_explicit(&heap->initialized, vkd3d_memory_order_relaxed))
    {
        transition.type = VKD3D_INITIAL_TRANSITION_TYPE_QUERY_HEAP;
        transition.query_heap = heap;
        d3d12_command_list_add_transition(list, &transition);
    }
}

extern ULONG STDMETHODCALLTYPE d3d12_command_list_vkd3d_ext_AddRef(ID3D12GraphicsCommandListExt *iface);

HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(d3d12_command_list_iface *iface,
        REFIID iid, void **object)
{
    TRACE("iface %p, iid %s, object %p.\n", iface, debugstr_guid(iid), object);

    if (IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList)
            || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList1)
            || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList2)
            || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList3)
            || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList4)
            || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList5)
            || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList6)
            || IsEqualGUID(iid, &IID_ID3D12CommandList)
            || IsEqualGUID(iid, &IID_ID3D12DeviceChild)
            || IsEqualGUID(iid, &IID_ID3D12Object)
            || IsEqualGUID(iid, &IID_IUnknown))
    {
        ID3D12GraphicsCommandList_AddRef(iface);
        *object = iface;
        return S_OK;
    }

    if (IsEqualGUID(iid, &IID_ID3D12GraphicsCommandListExt))
    {
        struct d3d12_command_list *command_list = impl_from_ID3D12GraphicsCommandList(iface);
        d3d12_command_list_vkd3d_ext_AddRef(&command_list->ID3D12GraphicsCommandListExt_iface);
        *object = &command_list->ID3D12GraphicsCommandListExt_iface;
        return S_OK;
    }

    WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(iid));

    *object = NULL;
    return E_NOINTERFACE;
}

ULONG STDMETHODCALLTYPE d3d12_command_list_AddRef(d3d12_command_list_iface *iface)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    ULONG refcount = InterlockedIncrement(&list->refcount);

    TRACE("%p increasing refcount to %u.\n", list, refcount);

    return refcount;
}

ULONG STDMETHODCALLTYPE d3d12_command_list_Release(d3d12_command_list_iface *iface)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    ULONG refcount = InterlockedDecrement(&list->refcount);

    TRACE("%p decreasing refcount to %u.\n", list, refcount);

    if (!refcount)
    {
        struct d3d12_device *device = list->device;

        vkd3d_private_store_destroy(&list->private_store);

        /* When command pool is destroyed, all command buffers are implicitly freed. */
        if (list->allocator)
            d3d12_command_allocator_free_command_buffer(list->allocator, list);

        vkd3d_free(list->init_transitions);
        vkd3d_free(list->query_ranges);
        vkd3d_free(list->active_queries);
        vkd3d_free(list->pending_queries);
        vkd3d_free(list->dsv_resource_tracking);
        vkd3d_free_aligned(list);

        d3d12_device_release(device);
    }

    return refcount;
}

static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetPrivateData(d3d12_command_list_iface *iface,
        REFGUID guid, UINT *data_size, void *data)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);

    TRACE("iface %p, guid %s, data_size %p, data %p.\n", iface, debugstr_guid(guid), data_size, data);

    return vkd3d_get_private_data(&list->private_store, guid, data_size, data);
}

static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateData(d3d12_command_list_iface *iface,
        REFGUID guid, UINT data_size, const void *data)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);

    TRACE("iface %p, guid %s, data_size %u, data %p.\n", iface, debugstr_guid(guid), data_size, data);

    return vkd3d_set_private_data(&list->private_store, guid, data_size, data,
            NULL, NULL);
}

static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateDataInterface(d3d12_command_list_iface *iface,
        REFGUID guid, const IUnknown *data)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);

    TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data);

    return vkd3d_set_private_data_interface(&list->private_store, guid, data,
            NULL, NULL);
}

static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetDevice(d3d12_command_list_iface *iface, REFIID iid, void **device)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);

    TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device);

    return d3d12_device_query_interface(list->device, iid, device);
}

static D3D12_COMMAND_LIST_TYPE STDMETHODCALLTYPE d3d12_command_list_GetType(d3d12_command_list_iface *iface)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);

    TRACE("iface %p.\n", iface);

    return list->type;
}

static HRESULT d3d12_command_list_batch_reset_query_pools(struct d3d12_command_list *list)
{
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
    HRESULT hr;
    size_t i;

    for (i = 0; i < list->query_ranges_count; i++)
    {
        const struct vkd3d_query_range *range = &list->query_ranges[i];

        if (!(range->flags & VKD3D_QUERY_RANGE_RESET))
            continue;

        if (FAILED(hr = d3d12_command_allocator_allocate_init_command_buffer(list->allocator, list)))
            return hr;

        VK_CALL(vkCmdResetQueryPool(list->vk_init_commands,
                range->vk_pool, range->index, range->count));
    }

    return S_OK;
}

static HRESULT d3d12_command_list_build_init_commands(struct d3d12_command_list *list)
{
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
    VkMemoryBarrier barrier;
    VkResult vr;
    HRESULT hr;

    if (FAILED(hr = d3d12_command_list_batch_reset_query_pools(list)))
        return hr;

    if (!list->vk_init_commands)
        return S_OK;

    if (list->execute_indirect.has_emitted_indirect_to_compute_barrier)
    {
        /* We've patched an indirect command stream here, so do the final barrier now. */
        barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
        barrier.pNext = NULL;
        barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
        barrier.dstAccessMask = VK_ACCESS_INDIRECT_COMMAND_READ_BIT;
        VK_CALL(vkCmdPipelineBarrier(list->vk_init_commands, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
                VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT,
                0, 1, &barrier, 0, NULL, 0, NULL));
    }

    if ((vr = VK_CALL(vkEndCommandBuffer(list->vk_init_commands))) < 0)
    {
        WARN("Failed to end command buffer, vr %d.\n", vr);
        return hresult_from_vk_result(vr);
    }

    return S_OK;
}

static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(d3d12_command_list_iface *iface)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
    VkResult vr;
    HRESULT hr;

    TRACE("iface %p.\n", iface);

    if (!list->is_recording)
    {
        WARN("Command list is not in the recording state.\n");
        return E_FAIL;
    }

    d3d12_command_list_end_current_render_pass(list, false);
    d3d12_command_list_end_transfer_batch(list);

    if (list->predicate_enabled)
        VK_CALL(vkCmdEndConditionalRenderingEXT(list->vk_command_buffer));

    if (!d3d12_command_list_gather_pending_queries(list))
        d3d12_command_list_mark_as_invalid(list, "Failed to gather virtual queries.\n");

    /* If we have kept some DSV resources in optimal layout throughout the command buffer,
     * now is the time to decay them. */
    d3d12_command_list_decay_optimal_dsv_resources(list);

    /* If we have some pending copy barriers, need to resolve those now, since we cannot track across command lists. */
    d3d12_command_list_resolve_buffer_copy_writes(list);

#ifdef VKD3D_ENABLE_BREADCRUMBS
    if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS)
        vkd3d_breadcrumb_tracer_end_command_list(list);
#endif

    if (FAILED(hr = d3d12_command_list_build_init_commands(list)))
        return hr;

    if ((vr = VK_CALL(vkEndCommandBuffer(list->vk_command_buffer))) < 0)
    {
        WARN("Failed to end command buffer, vr %d.\n", vr);
        return hresult_from_vk_result(vr);
    }

    if (list->allocator)
    {
        d3d12_command_allocator_free_command_buffer(list->allocator, list);
        list->allocator = NULL;
    }

    list->is_recording = false;

    if (!list->is_valid)
    {
        WARN("Error occurred during command list recording.\n");
        return E_INVALIDARG;
    }

    return S_OK;
}

static bool d3d12_command_list_find_query(struct d3d12_command_list *list,
        VkQueryPool vk_pool, uint32_t index, size_t *out_pos)
{
    const struct vkd3d_query_range *range;
    size_t hi = list->query_ranges_count;
    size_t lo = 0;

    while (lo < hi)
    {
        size_t pos = lo + (hi - lo) / 2;
        range = &list->query_ranges[pos];

        if (vk_pool < range->vk_pool)
            hi = pos;
        else if (vk_pool > range->vk_pool)
            lo = pos + 1;
        else if (index < range->index)
            hi = pos;
        else if (index >= range->index + range->count)
            lo = pos + 1;
        else
        {
            if (out_pos)
                *out_pos = pos;
            return true;
        }
    }

    if (out_pos)
        *out_pos = lo;
    return false;
}

static void d3d12_command_list_insert_query_range(struct d3d12_command_list *list, size_t *where,
        VkQueryPool vk_pool, uint32_t index, uint32_t count, uint32_t flags)
{
    struct vkd3d_query_range *range;
    unsigned int move_count;
    bool merge_lo, merge_hi;
    size_t pos = *where;

    merge_lo = false;
    merge_hi = false;

    if (pos > 0)
    {
        range = &list->query_ranges[pos - 1];
        merge_lo = range->vk_pool == vk_pool
                && range->flags == flags
                && range->index + range->count == index;
    }

    if (pos < list->query_ranges_count)
    {
        range = &list->query_ranges[pos];
        merge_hi = range->vk_pool == vk_pool
                && range->flags == flags
                && range->index == index + count;
    }

    /* The idea is that 'where' will point to the range that contains
     * the original range it was pointing to before the insertion, which
     * may be moved around depending on which ranges get merged. */
    if (merge_lo)
    {
        range = &list->query_ranges[pos - 1];
        range[0].count += count;

        if (merge_hi)
        {
            range[0].count += range[1].count;
            move_count = (--list->query_ranges_count) - pos;
            memmove(&range[1], &range[2], sizeof(*range) * move_count);
            (*where)--;
        }
    }
    else if (merge_hi)
    {
        range = &list->query_ranges[pos];
        range->index = index;
        range->count += count;
    }
    else
    {
        vkd3d_array_reserve((void**)&list->query_ranges, &list->query_ranges_size,
                list->query_ranges_count + 1, sizeof(*list->query_ranges));

        range = &list->query_ranges[pos];
        move_count = (list->query_ranges_count++) - pos;
        memmove(range + 1, range, sizeof(*range) * move_count);

        range->vk_pool = vk_pool;
        range->index = index;
        range->count = count;
        range->flags = flags;

        (*where)++;
    }
}

static void d3d12_command_list_read_query_range(struct d3d12_command_list *list,
        VkQueryPool vk_pool, uint32_t index, uint32_t count)
{
    const struct vkd3d_query_range *range;
    size_t hi = index + count;
    size_t lo = index;
    size_t pos;

    /* pos contains either the location of an existing range
     * containing the first query of the new range, or the
     * location where we need to insert it */
    d3d12_command_list_find_query(list, vk_pool, index, &pos);

    /* Avoid overriding already existing ranges by splitting
     * this range into pieces so that each query is contained
     * in at most one range. */
    while (lo < hi)
    {
        if (pos < list->query_ranges_count)
        {
            range = &list->query_ranges[pos];

            if (lo >= range->index)
            {
                lo = max(lo, range->index + range->count);
                pos += 1;
            }
            else
            {
                size_t range_end = min(hi, range->index);
                d3d12_command_list_insert_query_range(list, &pos,
                        vk_pool, lo, range_end - lo, 0);
                lo = range_end;
            }
        }
        else
        {
            d3d12_command_list_insert_query_range(list, &pos,
                    vk_pool, lo, hi - lo, 0);
            lo = hi;
        }
    }
}

bool d3d12_command_list_reset_query(struct d3d12_command_list *list,
        VkQueryPool vk_pool, uint32_t index)
{
    size_t pos;

    if (d3d12_command_list_find_query(list, vk_pool, index, &pos))
        return false;

    d3d12_command_list_insert_query_range(list, &pos,
            vk_pool, index, 1, VKD3D_QUERY_RANGE_RESET);
    return true;
}

static void d3d12_command_list_reset_api_state(struct d3d12_command_list *list,
        ID3D12PipelineState *initial_pipeline_state)
{
    d3d12_command_list_iface *iface = &list->ID3D12GraphicsCommandList_iface;

    list->index_buffer.dxgi_format = DXGI_FORMAT_UNKNOWN;

    memset(list->rtvs, 0, sizeof(list->rtvs));
    memset(&list->dsv, 0, sizeof(list->dsv));
    list->dsv_layout = VK_IMAGE_LAYOUT_UNDEFINED;
    list->dsv_plane_optimal_mask = 0;
    list->fb_width = 0;
    list->fb_height = 0;
    list->fb_layer_count = 0;

    list->xfb_enabled = false;

    list->predicate_enabled = false;
    list->predicate_va = 0;

    list->index_buffer.buffer = VK_NULL_HANDLE;

    list->current_pipeline = VK_NULL_HANDLE;
    list->command_buffer_pipeline = VK_NULL_HANDLE;

    memset(&list->dynamic_state, 0, sizeof(list->dynamic_state));
    list->dynamic_state.blend_constants[0] = D3D12_DEFAULT_BLEND_FACTOR_RED;
    list->dynamic_state.blend_constants[1] = D3D12_DEFAULT_BLEND_FACTOR_GREEN;
    list->dynamic_state.blend_constants[2] = D3D12_DEFAULT_BLEND_FACTOR_BLUE;
    list->dynamic_state.blend_constants[3] = D3D12_DEFAULT_BLEND_FACTOR_ALPHA;

    list->dynamic_state.min_depth_bounds = 0.0f;
    list->dynamic_state.max_depth_bounds = 1.0f;

    list->dynamic_state.primitive_topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST;
    list->dynamic_state.vk_primitive_topology = VK_PRIMITIVE_TOPOLOGY_POINT_LIST;

    list->dynamic_state.fragment_shading_rate.fragment_size = (VkExtent2D) { 1u, 1u };
    list->dynamic_state.fragment_shading_rate.combiner_ops[0] = VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR;
    list->dynamic_state.fragment_shading_rate.combiner_ops[1] = VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR;

    memset(list->pipeline_bindings, 0, sizeof(list->pipeline_bindings));
    memset(list->descriptor_heaps, 0, sizeof(list->descriptor_heaps));

    list->state = NULL;
    list->rt_state = NULL;
    list->active_bind_point = VK_PIPELINE_BIND_POINT_MAX_ENUM;

    memset(list->so_counter_buffers, 0, sizeof(list->so_counter_buffers));
    memset(list->so_counter_buffer_offsets, 0, sizeof(list->so_counter_buffer_offsets));

    list->cbv_srv_uav_descriptors_types = NULL;
    list->cbv_srv_uav_descriptors_view = NULL;
    list->vrs_image = NULL;

    ID3D12GraphicsCommandList_SetPipelineState(iface, initial_pipeline_state);
}

static void d3d12_command_list_reset_internal_state(struct d3d12_command_list *list)
{
#ifdef VKD3D_ENABLE_RENDERDOC
    list->debug_capture = vkd3d_renderdoc_active() && vkd3d_renderdoc_should_capture_shader_hash(0);
#else
    list->debug_capture = false;
#endif
    list->has_replaced_shaders = false;

    list->init_transitions_count = 0;
    list->query_ranges_count = 0;
    list->active_queries_count = 0;
    list->pending_queries_count = 0;
    list->dsv_resource_tracking_count = 0;
    list->tracked_copy_buffer_count = 0;

    list->rendering_info.state_flags = 0;
    list->execute_indirect.has_emitted_indirect_to_compute_barrier = false;
    list->execute_indirect.has_observed_transition_to_indirect = false;
}

static void d3d12_command_list_reset_state(struct d3d12_command_list *list,
        ID3D12PipelineState *initial_pipeline_state)
{
    d3d12_command_list_reset_api_state(list, initial_pipeline_state);
    d3d12_command_list_reset_internal_state(list);
}

static inline void d3d12_command_list_invalidate_all_state(struct d3d12_command_list *list)
{
    d3d12_command_list_invalidate_current_pipeline(list, true);
    d3d12_command_list_invalidate_root_parameters(list, VK_PIPELINE_BIND_POINT_GRAPHICS, true);
    d3d12_command_list_invalidate_root_parameters(list, VK_PIPELINE_BIND_POINT_COMPUTE, true);
    list->index_buffer.is_dirty = true;
}

static HRESULT STDMETHODCALLTYPE d3d12_command_list_Reset(d3d12_command_list_iface *iface,
        ID3D12CommandAllocator *allocator, ID3D12PipelineState *initial_pipeline_state)
{
    struct d3d12_command_allocator *allocator_impl = d3d12_command_allocator_from_iface(allocator);
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    HRESULT hr;

    TRACE("iface %p, allocator %p, initial_pipeline_state %p.\n",
            iface, allocator, initial_pipeline_state);

    if (!allocator_impl || allocator_impl->type != list->type)
    {
        WARN("Invalid command allocator.\n");
        return E_INVALIDARG;
    }

    if (list->is_recording)
    {
        WARN("Command list is in the recording state.\n");
        return E_FAIL;
    }

    if (SUCCEEDED(hr = d3d12_command_allocator_allocate_command_buffer(allocator_impl, list)))
    {
        list->allocator = allocator_impl;
        d3d12_command_list_reset_state(list, initial_pipeline_state);
    }

    return hr;
}

static void STDMETHODCALLTYPE d3d12_command_list_ClearState(d3d12_command_list_iface *iface,
        ID3D12PipelineState *pipeline_state)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    TRACE("iface %p, pipline_state %p!\n", iface, pipeline_state);
    d3d12_command_list_end_current_render_pass(list, false);
    d3d12_command_list_reset_api_state(list, pipeline_state);
}

static bool d3d12_command_list_has_depth_stencil_view(struct d3d12_command_list *list)
{
    const struct d3d12_graphics_pipeline_state *graphics;

    assert(d3d12_pipeline_state_is_graphics(list->state));
    graphics = &list->state->graphics;

    return list->dsv.format && (graphics->dsv_format ||
            d3d12_graphics_pipeline_state_has_unknown_dsv_format_with_test(graphics));
}

static void d3d12_command_list_get_fb_extent(struct d3d12_command_list *list,
        uint32_t *width, uint32_t *height, uint32_t *layer_count)
{
    struct d3d12_graphics_pipeline_state *graphics = &list->state->graphics;
    struct d3d12_device *device = list->device;

    if (graphics->rt_count || d3d12_command_list_has_depth_stencil_view(list))
    {
        *width = list->fb_width;
        *height = list->fb_height;
        if (layer_count)
            *layer_count = list->fb_layer_count;
    }
    else
    {
        *width = device->vk_info.device_limits.maxFramebufferWidth;
        *height = device->vk_info.device_limits.maxFramebufferHeight;
        if (layer_count)
            *layer_count = 1;
    }
}

static bool d3d12_command_list_update_rendering_info(struct d3d12_command_list *list)
{
    struct vkd3d_rendering_info *rendering_info = &list->rendering_info;
    struct d3d12_graphics_pipeline_state *graphics;
    unsigned int i;

    if (rendering_info->state_flags & VKD3D_RENDERING_CURRENT)
        return true;

    graphics = &list->state->graphics;

    rendering_info->rtv_mask = graphics->rtv_active_mask;
    rendering_info->info.colorAttachmentCount = graphics->rt_count;

    /* The pipeline has fallback PSO in case we're attempting to render to unbound RTV. */
    for (i = 0; i < D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT; i++)
    {
        VkRenderingAttachmentInfoKHR *attachment = &rendering_info->rtv[i];

        if ((graphics->rtv_active_mask & (1u << i)) && list->rtvs[i].view)
        {
            attachment->imageView = list->rtvs[i].view->vk_image_view;
            attachment->imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
        }
        else
        {
            attachment->imageView = VK_NULL_HANDLE;
            attachment->imageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
        }
    }

    rendering_info->info.pDepthAttachment = NULL;
    rendering_info->info.pStencilAttachment = NULL;

    if (d3d12_command_list_has_depth_stencil_view(list))
    {
        rendering_info->dsv.imageView = list->dsv.view->vk_image_view;
        rendering_info->dsv.imageLayout = list->dsv_layout;

        /* Spec says that to use pDepthAttachment or pStencilAttachment, with non-NULL image view,
         * the format must have the aspect mask set. */
        if (list->dsv.view->format->vk_aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT)
            rendering_info->info.pDepthAttachment = &rendering_info->dsv;
        if (list->dsv.view->format->vk_aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT)
            rendering_info->info.pStencilAttachment = &rendering_info->dsv;
    }
    else
    {
        rendering_info->dsv.imageView = VK_NULL_HANDLE;
        rendering_info->dsv.imageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
    }

    if (list->vrs_image)
    {
        rendering_info->vrs.imageView = list->vrs_image->vrs_view;
        rendering_info->vrs.imageLayout = VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR;
    }
    else
    {
        rendering_info->vrs.imageView = VK_NULL_HANDLE;
        rendering_info->vrs.imageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
    }

    d3d12_command_list_get_fb_extent(list,
            &rendering_info->info.renderArea.extent.width,
            &rendering_info->info.renderArea.extent.height,
            &rendering_info->info.layerCount);

    return true;
}

static bool d3d12_command_list_update_compute_pipeline(struct d3d12_command_list *list)
{
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;

    if (list->current_pipeline != VK_NULL_HANDLE)
        return true;

    if (!d3d12_pipeline_state_is_compute(list->state))
    {
        WARN("Pipeline state %p is not a compute pipeline.\n", list->state);
        return false;
    }

    if (list->command_buffer_pipeline != list->state->compute.vk_pipeline)
    {
        VK_CALL(vkCmdBindPipeline(list->vk_command_buffer, list->state->vk_bind_point,
                list->state->compute.vk_pipeline));
        list->command_buffer_pipeline = list->state->compute.vk_pipeline;
    }
    list->current_pipeline = list->state->compute.vk_pipeline;
    list->dynamic_state.active_flags = 0;

    return true;
}

static bool d3d12_command_list_update_raygen_pipeline(struct d3d12_command_list *list)
{
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
    bool stack_size_dirty = false;

    if (list->current_pipeline != VK_NULL_HANDLE)
        return true;

    if (!list->rt_state)
    {
        WARN("Pipeline state %p is not a raygen pipeline.\n", list->rt_state);
        return false;
    }

    if (list->command_buffer_pipeline != list->rt_state->pipeline)
    {
        VK_CALL(vkCmdBindPipeline(list->vk_command_buffer,
                VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR,
                list->rt_state->pipeline));
        list->command_buffer_pipeline = list->rt_state->pipeline;
        stack_size_dirty = true;
    }
    else
    {
        stack_size_dirty = list->dynamic_state.pipeline_stack_size != list->rt_state->pipeline_stack_size;
    }

    if (stack_size_dirty)
    {
        /* Pipeline stack size is part of the PSO, not any command buffer state for some reason ... */
        VK_CALL(vkCmdSetRayTracingPipelineStackSizeKHR(list->vk_command_buffer,
                list->rt_state->pipeline_stack_size));
        list->dynamic_state.pipeline_stack_size = list->rt_state->pipeline_stack_size;
    }

    return true;
}

static void d3d12_command_list_check_vbo_alignment(struct d3d12_command_list *list)
{
    const uint32_t *stride_masks;
    VkDeviceSize *offsets;
    uint32_t update_vbos;
    unsigned int index;

    stride_masks = list->state->graphics.vertex_buffer_stride_align_mask;
    update_vbos = list->state->graphics.vertex_buffer_mask;
    offsets = list->dynamic_state.vertex_offsets;

    while (update_vbos)
    {
        index = vkd3d_bitmask_iter32(&update_vbos);
        if (stride_masks[index] & offsets[index])
            list->dynamic_state.dirty_vbos |= 1u << index;
    }
}

static bool d3d12_command_list_update_graphics_pipeline(struct d3d12_command_list *list)
{
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
    uint32_t dsv_plane_optimal_mask;
    uint32_t new_active_flags;
    VkImageLayout dsv_layout;
    VkPipeline vk_pipeline;

    if (list->current_pipeline != VK_NULL_HANDLE)
        return true;

    if (!d3d12_pipeline_state_is_graphics(list->state))
    {
        WARN("Pipeline state %p is not a graphics pipeline.\n", list->state);
        return false;
    }

    /* Try to grab the pipeline we compiled ahead of time. If we cannot do so, fall back. */
    if (!(vk_pipeline = d3d12_pipeline_state_get_pipeline(list->state,
            &list->dynamic_state, list->dsv.format, &new_active_flags)))
    {
        if (!(vk_pipeline = d3d12_pipeline_state_get_or_create_pipeline(list->state,
                &list->dynamic_state, list->dsv.format, &new_active_flags)))
            return false;
    }

    if (d3d12_command_list_has_depth_stencil_view(list))
    {
        /* Select new dsv_layout. Any new PSO write we didn't observe yet must be updated here. */
        dsv_plane_optimal_mask = list->dsv_plane_optimal_mask | list->state->graphics.dsv_plane_optimal_mask;
        dsv_layout = dsv_plane_optimal_mask_to_layout(dsv_plane_optimal_mask, list->dsv.format->vk_aspect_mask);
    }
    else
    {
        dsv_plane_optimal_mask = 0;
        dsv_layout = VK_IMAGE_LAYOUT_UNDEFINED;
    }

    /* If we need to bind or unbind certain render targets or if the DSV layout changed, interrupt rendering.
     * It's also possible that rtv_active_mask is constant, but rt_count increases (if last RT format is NULL). */
    if ((list->state->graphics.rtv_active_mask != list->rendering_info.rtv_mask) ||
            (list->state->graphics.rt_count != list->rendering_info.info.colorAttachmentCount) ||
            (dsv_layout != list->rendering_info.dsv.imageLayout))
    {
        d3d12_command_list_invalidate_rendering_info(list);
        d3d12_command_list_end_current_render_pass(list, false);
    }

    list->dsv_plane_optimal_mask = dsv_plane_optimal_mask;
    list->dsv_layout = dsv_layout;

    if (list->command_buffer_pipeline != vk_pipeline)
    {
        VK_CALL(vkCmdBindPipeline(list->vk_command_buffer, list->state->vk_bind_point, vk_pipeline));

        /* If we bind a new pipeline, make sure that we end up binding VBOs that are aligned.
         * It is fine to do it here, since we are binding a pipeline right before we perform
         * a draw call. If we trip any dirty check here, VBO offsets will be fixed up when emitting
         * dynamic state after this. */
        d3d12_command_list_check_vbo_alignment(list);

        /* The application did set vertex buffers that we didn't bind because of the pipeline vbo mask.
         * The new pipeline could use those so we need to rebind vertex buffers. */
        if ((new_active_flags & (VKD3D_DYNAMIC_STATE_VERTEX_BUFFER | VKD3D_DYNAMIC_STATE_VERTEX_BUFFER_STRIDE))
          && (list->dynamic_state.dirty_vbos || list->dynamic_state.dirty_vbo_strides))
          list->dynamic_state.dirty_flags |= VKD3D_DYNAMIC_STATE_VERTEX_BUFFER | VKD3D_DYNAMIC_STATE_VERTEX_BUFFER_STRIDE;

        /* Reapply all dynamic states that were not dynamic in previously bound pipeline.
         * If we didn't use to have dynamic vertex strides, but we then bind a pipeline with dynamic strides,
         * we will need to rebind all VBOs. Mark dynamic stride as dirty in this case. */
        if (new_active_flags & ~list->dynamic_state.active_flags & VKD3D_DYNAMIC_STATE_VERTEX_BUFFER_STRIDE)
            list->dynamic_state.dirty_vbo_strides = ~0u;
        list->dynamic_state.dirty_flags |= new_active_flags & ~list->dynamic_state.active_flags;
        list->command_buffer_pipeline = vk_pipeline;
    }

    list->dynamic_state.active_flags = new_active_flags;
    list->current_pipeline = vk_pipeline;
    return true;
}

static void d3d12_command_list_update_descriptor_table_offsets(struct d3d12_command_list *list,
        struct vkd3d_pipeline_bindings *bindings, VkPipelineLayout layout, VkShaderStageFlags push_stages)
{
    const struct d3d12_root_signature *root_signature = bindings->root_signature;
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
    const struct vkd3d_shader_descriptor_table *table;
    uint32_t table_offsets[D3D12_MAX_ROOT_COST];
    unsigned int root_parameter_index;
    uint64_t descriptor_table_mask;

    assert(root_signature->descriptor_table_count);
    descriptor_table_mask = root_signature->descriptor_table_mask & bindings->descriptor_table_active_mask;

    while (descriptor_table_mask)
    {
        root_parameter_index = vkd3d_bitmask_iter64(&descriptor_table_mask);
        table = root_signature_get_descriptor_table(root_signature, root_parameter_index);
        table_offsets[table->table_index] = bindings->descriptor_tables[root_parameter_index];
    }

    /* Set descriptor offsets */
    if (push_stages)
    {
        VK_CALL(vkCmdPushConstants(list->vk_command_buffer,
                layout, push_stages,
                root_signature->descriptor_table_offset,
                root_signature->descriptor_table_count * sizeof(uint32_t),
                table_offsets));
    }

    bindings->dirty_flags &= ~VKD3D_PIPELINE_DIRTY_DESCRIPTOR_TABLE_OFFSETS;
}

static void vk_write_descriptor_set_from_root_descriptor(struct d3d12_command_list *list,
        VkWriteDescriptorSet *vk_descriptor_write, const struct vkd3d_shader_root_parameter *root_parameter,
        VkDescriptorSet vk_descriptor_set, const struct vkd3d_root_descriptor_info *descriptor)
{
    vk_descriptor_write->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
    vk_descriptor_write->pNext = NULL;
    vk_descriptor_write->dstSet = vk_descriptor_set;
    vk_descriptor_write->dstBinding = root_parameter->descriptor.binding->binding.binding;
    vk_descriptor_write->dstArrayElement = 0;
    vk_descriptor_write->descriptorType = descriptor->vk_descriptor_type;
    vk_descriptor_write->descriptorCount = 1;
    vk_descriptor_write->pImageInfo = NULL;
    vk_descriptor_write->pBufferInfo = &descriptor->info.buffer;
    vk_descriptor_write->pTexelBufferView = &descriptor->info.buffer_view;
}

static bool vk_write_descriptor_set_and_inline_uniform_block(VkWriteDescriptorSet *vk_descriptor_write,
        VkWriteDescriptorSetInlineUniformBlockEXT *vk_inline_uniform_block_write,
        VkDescriptorSet vk_descriptor_set, const struct d3d12_root_signature *root_signature,
        const void* data)
{
    vk_inline_uniform_block_write->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_INLINE_UNIFORM_BLOCK_EXT;
    vk_inline_uniform_block_write->pNext = NULL;
    vk_inline_uniform_block_write->dataSize = root_signature->push_constant_range.size;
    vk_inline_uniform_block_write->pData = data;

    vk_descriptor_write->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
    vk_descriptor_write->pNext = vk_inline_uniform_block_write;
    vk_descriptor_write->dstSet = vk_descriptor_set;
    vk_descriptor_write->dstBinding = root_signature->push_constant_ubo_binding.binding;
    vk_descriptor_write->dstArrayElement = 0;
    vk_descriptor_write->descriptorCount = root_signature->push_constant_range.size;
    vk_descriptor_write->descriptorType = VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT;
    vk_descriptor_write->pImageInfo = NULL;
    vk_descriptor_write->pBufferInfo = NULL;
    vk_descriptor_write->pTexelBufferView = NULL;
    return true;
}

static void d3d12_command_list_update_descriptor_heaps(struct d3d12_command_list *list,
        struct vkd3d_pipeline_bindings *bindings, VkPipelineBindPoint vk_bind_point,
        VkPipelineLayout layout)
{
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;

    while (bindings->descriptor_heap_dirty_mask)
    {
        unsigned int heap_index = vkd3d_bitmask_iter64(&bindings->descriptor_heap_dirty_mask);

        if (list->descriptor_heaps[heap_index])
        {
            VK_CALL(vkCmdBindDescriptorSets(list->vk_command_buffer, vk_bind_point,
                layout, heap_index, 1,
                &list->descriptor_heaps[heap_index], 0, NULL));
        }
    }
}

static void d3d12_command_list_update_static_samplers(struct d3d12_command_list *list,
        struct vkd3d_pipeline_bindings *bindings, VkPipelineBindPoint vk_bind_point,
        VkPipelineLayout layout)
{
    const struct d3d12_root_signature *root_signature = bindings->root_signature;
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;

    VK_CALL(vkCmdBindDescriptorSets(list->vk_command_buffer, vk_bind_point,
            layout,
            root_signature->sampler_descriptor_set,
            1, &bindings->static_sampler_set, 0, NULL));

    bindings->dirty_flags &= ~VKD3D_PIPELINE_DIRTY_STATIC_SAMPLER_SET;
}

static void d3d12_command_list_update_root_constants(struct d3d12_command_list *list,
        struct vkd3d_pipeline_bindings *bindings,
        VkPipelineLayout layout, VkShaderStageFlags push_stages)
{
    const struct d3d12_root_signature *root_signature = bindings->root_signature;
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
    const struct vkd3d_shader_root_constant *root_constant;
    unsigned int root_parameter_index;

    if (!push_stages)
    {
        bindings->root_constant_dirty_mask = 0;
        return;
    }

    while (bindings->root_constant_dirty_mask)
    {
        root_parameter_index = vkd3d_bitmask_iter64(&bindings->root_constant_dirty_mask);
        root_constant = root_signature_get_32bit_constants(root_signature, root_parameter_index);

        VK_CALL(vkCmdPushConstants(list->vk_command_buffer,
                layout, push_stages,
                root_constant->constant_index * sizeof(uint32_t),
                root_constant->constant_count * sizeof(uint32_t),
                &bindings->root_constants[root_constant->constant_index]));
    }
}

union root_parameter_data
{
    uint32_t root_constants[D3D12_MAX_ROOT_COST];
    VkDeviceAddress root_descriptor_vas[D3D12_MAX_ROOT_COST / 2];
};

static unsigned int d3d12_command_list_fetch_root_descriptor_vas(struct d3d12_command_list *list,
        struct vkd3d_pipeline_bindings *bindings, union root_parameter_data *dst_data)
{
    const struct d3d12_root_signature *root_signature = bindings->root_signature;
    uint64_t root_descriptor_mask = root_signature->root_descriptor_raw_va_mask;
    unsigned int va_idx = 0;

    /* Ignore dirty mask. We'll always update all VAs either via push constants
     * in order to reduce API calls, or an inline uniform buffer in which case
     * we need to re-upload all data anyway. */
    while (root_descriptor_mask)
    {
        unsigned int root_parameter_index = vkd3d_bitmask_iter64(&root_descriptor_mask);
        dst_data->root_descriptor_vas[va_idx++] = bindings->root_descriptors[root_parameter_index].info.va;
    }

    return va_idx;
}

static void d3d12_command_list_fetch_inline_uniform_block_data(struct d3d12_command_list *list,
        struct vkd3d_pipeline_bindings *bindings, union root_parameter_data *dst_data)
{
    const struct d3d12_root_signature *root_signature = bindings->root_signature;
    uint64_t root_constant_mask = root_signature->root_constant_mask;
    const struct vkd3d_shader_root_constant *root_constant;
    const uint32_t *src_data = bindings->root_constants;
    const struct vkd3d_shader_descriptor_table *table;
    unsigned int root_parameter_index;
    uint64_t descriptor_table_mask;
    uint32_t first_table_offset;

    /* Root descriptors are already filled in dst_data. */

    while (root_constant_mask)
    {
        root_parameter_index = vkd3d_bitmask_iter64(&root_constant_mask);
        root_constant = root_signature_get_32bit_constants(root_signature, root_parameter_index);

        memcpy(&dst_data->root_constants[root_constant->constant_index],
                &src_data[root_constant->constant_index],
                root_constant->constant_count * sizeof(uint32_t));
    }

    first_table_offset = root_signature->descriptor_table_offset / sizeof(uint32_t);
    descriptor_table_mask = root_signature->descriptor_table_mask & bindings->descriptor_table_active_mask;

    while (descriptor_table_mask)
    {
        root_parameter_index = vkd3d_bitmask_iter64(&descriptor_table_mask);
        table = root_signature_get_descriptor_table(root_signature, root_parameter_index);
        dst_data->root_constants[first_table_offset + table->table_index] =
                bindings->descriptor_tables[root_parameter_index];
    }

    /* Reset dirty flags to avoid redundant updates in the future */
    bindings->dirty_flags &= ~VKD3D_PIPELINE_DIRTY_DESCRIPTOR_TABLE_OFFSETS;
    bindings->root_constant_dirty_mask = 0;
}

static void d3d12_command_list_update_root_descriptors(struct d3d12_command_list *list,
        struct vkd3d_pipeline_bindings *bindings, VkPipelineBindPoint vk_bind_point,
        VkPipelineLayout layout, VkShaderStageFlags push_stages)
{
    const struct d3d12_root_signature *root_signature = bindings->root_signature;
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
    VkWriteDescriptorSetInlineUniformBlockEXT inline_uniform_block_write;
    VkWriteDescriptorSet descriptor_writes[D3D12_MAX_ROOT_COST / 2 + 2];
    const struct vkd3d_shader_root_parameter *root_parameter;
    VkDescriptorSet descriptor_set = VK_NULL_HANDLE;
    union root_parameter_data root_parameter_data;
    unsigned int descriptor_write_count = 0;
    unsigned int root_parameter_index;
    unsigned int va_count = 0;
    uint64_t dirty_push_mask;

    if (root_signature->flags & VKD3D_ROOT_SIGNATURE_USE_ROOT_DESCRIPTOR_SET)
    {
        /* Ensure that we populate all descriptors if push descriptors cannot be used */
        bindings->root_descriptor_dirty_mask |=
                bindings->root_descriptor_active_mask &
                (root_signature->root_descriptor_raw_va_mask | root_signature->root_descriptor_push_mask);

        descriptor_set = d3d12_command_allocator_allocate_descriptor_set(
                list->allocator, root_signature->vk_root_descriptor_layout, VKD3D_DESCRIPTOR_POOL_TYPE_STATIC);
    }

    if (bindings->root_descriptor_dirty_mask)
    {
        /* If any raw VA descriptor is dirty, we need to update all of them. */
        if (root_signature->root_descriptor_raw_va_mask & bindings->root_descriptor_dirty_mask)
            va_count = d3d12_command_list_fetch_root_descriptor_vas(list, bindings, &root_parameter_data);

        /* TODO bind null descriptors for inactive root descriptors. */
        dirty_push_mask =
                bindings->root_descriptor_dirty_mask &
                root_signature->root_descriptor_push_mask &
                bindings->root_descriptor_active_mask;

        while (dirty_push_mask)
        {
            root_parameter_index = vkd3d_bitmask_iter64(&dirty_push_mask);
            root_parameter = root_signature_get_root_descriptor(root_signature, root_parameter_index);

            vk_write_descriptor_set_from_root_descriptor(list,
                    &descriptor_writes[descriptor_write_count], root_parameter,
                    descriptor_set, &bindings->root_descriptors[root_parameter_index]);

            descriptor_write_count += 1;
        }

        bindings->root_descriptor_dirty_mask = 0;
    }

    if (root_signature->flags & VKD3D_ROOT_SIGNATURE_USE_INLINE_UNIFORM_BLOCK)
    {
        d3d12_command_list_fetch_inline_uniform_block_data(list, bindings, &root_parameter_data);

        vk_write_descriptor_set_and_inline_uniform_block(&descriptor_writes[descriptor_write_count],
                &inline_uniform_block_write, descriptor_set, root_signature, &root_parameter_data);

        descriptor_write_count += 1;
    }
    else if (va_count && bindings->layout.vk_push_stages)
    {
        VK_CALL(vkCmdPushConstants(list->vk_command_buffer,
                layout, push_stages,
                0, va_count * sizeof(*root_parameter_data.root_descriptor_vas),
                root_parameter_data.root_descriptor_vas));
    }

    if (!descriptor_write_count)
        return;

    if (root_signature->flags & VKD3D_ROOT_SIGNATURE_USE_ROOT_DESCRIPTOR_SET)
    {
        VK_CALL(vkUpdateDescriptorSets(list->device->vk_device,
                descriptor_write_count, descriptor_writes, 0, NULL));
        VK_CALL(vkCmdBindDescriptorSets(list->vk_command_buffer, vk_bind_point,
                layout, root_signature->root_descriptor_set,
                1, &descriptor_set, 0, NULL));
    }
    else
    {
        VK_CALL(vkCmdPushDescriptorSetKHR(list->vk_command_buffer, vk_bind_point,
                layout, root_signature->root_descriptor_set,
                descriptor_write_count, descriptor_writes));
    }
}

static void d3d12_command_list_update_hoisted_descriptors(struct d3d12_command_list *list,
        struct vkd3d_pipeline_bindings *bindings)
{
    const struct d3d12_root_signature *rs = bindings->root_signature;
    const struct vkd3d_descriptor_hoist_desc *hoist_desc;
    const struct vkd3d_descriptor_metadata_types *types;
    struct vkd3d_root_descriptor_info *root_parameter;
    const struct vkd3d_descriptor_metadata_view *view;
    union vkd3d_descriptor_info *info;
    unsigned int i;

    /* We don't track dirty table index, just update every hoisted descriptor.
     * Uniform buffers tend to be updated all the time anyways, so this should be fine. */
    for (i = 0; i < rs->hoist_info.num_desc; i++)
    {
        hoist_desc = &rs->hoist_info.desc[i];

        view = list->cbv_srv_uav_descriptors_view;
        types = list->cbv_srv_uav_descriptors_types;
        if (view)
        {
            view += bindings->descriptor_tables[hoist_desc->table_index] + hoist_desc->table_offset;
            types += bindings->descriptor_tables[hoist_desc->table_index] + hoist_desc->table_offset;
        }

        root_parameter = &bindings->root_descriptors[hoist_desc->parameter_index];

        bindings->root_descriptor_dirty_mask |= 1ull << hoist_desc->parameter_index;
        bindings->root_descriptor_active_mask |= 1ull << hoist_desc->parameter_index;
        root_parameter->vk_descriptor_type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
        info = &root_parameter->info;

        if (types && (types->flags & VKD3D_DESCRIPTOR_FLAG_OFFSET_RANGE))
        {
            /* Buffer descriptors must be valid on recording time. */
            info->buffer = view->info.buffer;
        }
        else
        {
            info->buffer.buffer = VK_NULL_HANDLE;
            info->buffer.offset = 0;
            info->buffer.range = VK_WHOLE_SIZE;
        }
    }

    bindings->dirty_flags &= ~VKD3D_PIPELINE_DIRTY_HOISTED_DESCRIPTORS;
}

static void d3d12_command_list_update_descriptors(struct d3d12_command_list *list,
        VkPipelineBindPoint bind_point)
{
    struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point];
    const struct d3d12_root_signature *rs = bindings->root_signature;
    VkPipelineBindPoint vk_bind_point;
    VkShaderStageFlags push_stages;
    VkPipelineLayout layout;

    if (!rs)
        return;

    if (list->active_bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR)
    {
        /* We might have to emit to RT bind point,
         * but we pretend we're in compute bind point. */
        layout = bindings->rt_layout.vk_pipeline_layout;
        push_stages = bindings->rt_layout.vk_push_stages;
    }
    else
    {
        layout = bindings->layout.vk_pipeline_layout;
        push_stages = bindings->layout.vk_push_stages;
    }

    vk_bind_point = list->active_bind_point;

    if (bindings->descriptor_heap_dirty_mask)
        d3d12_command_list_update_descriptor_heaps(list, bindings, vk_bind_point, layout);

    if (bindings->dirty_flags & VKD3D_PIPELINE_DIRTY_STATIC_SAMPLER_SET)
        d3d12_command_list_update_static_samplers(list, bindings, vk_bind_point, layout);

    /* If we can, hoist descriptors from the descriptor heap into fake root parameters. */
    if (bindings->dirty_flags & VKD3D_PIPELINE_DIRTY_HOISTED_DESCRIPTORS)
        d3d12_command_list_update_hoisted_descriptors(list, bindings);

    if (rs->flags & VKD3D_ROOT_SIGNATURE_USE_INLINE_UNIFORM_BLOCK)
    {
        /* Root constants and descriptor table offsets are part of the root descriptor set */
        if (bindings->root_descriptor_dirty_mask || bindings->root_constant_dirty_mask
                || (bindings->dirty_flags & VKD3D_PIPELINE_DIRTY_DESCRIPTOR_TABLE_OFFSETS))
            d3d12_command_list_update_root_descriptors(list, bindings, vk_bind_point, layout, push_stages);
    }
    else
    {
        if (bindings->root_descriptor_dirty_mask)
            d3d12_command_list_update_root_descriptors(list, bindings, vk_bind_point, layout, push_stages);

        if (bindings->root_constant_dirty_mask)
            d3d12_command_list_update_root_constants(list, bindings, layout, push_stages);

        if (bindings->dirty_flags & VKD3D_PIPELINE_DIRTY_DESCRIPTOR_TABLE_OFFSETS)
            d3d12_command_list_update_descriptor_table_offsets(list, bindings, layout, push_stages);
    }
}

static bool d3d12_command_list_update_compute_state(struct d3d12_command_list *list)
{
    d3d12_command_list_end_current_render_pass(list, false);

    if (!d3d12_command_list_update_compute_pipeline(list))
        return false;

    d3d12_command_list_update_descriptors(list, VK_PIPELINE_BIND_POINT_COMPUTE);

    return true;
}

static bool d3d12_command_list_update_raygen_state(struct d3d12_command_list *list)
{
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
    d3d12_command_list_end_current_render_pass(list, false);

    if (!d3d12_command_list_update_raygen_pipeline(list))
        return false;

    /* DXR uses compute bind point for descriptors, we will redirect internally to
     * raygen bind point in Vulkan. */
    d3d12_command_list_update_descriptors(list, VK_PIPELINE_BIND_POINT_COMPUTE);

    /* If we have a static sampler set for local root signatures, bind it now.
     * Don't bother with dirty tracking of this for time being.
     * Should be very rare that this path is even hit. */
    if (list->rt_state->local_static_sampler.desc_set)
    {
        VK_CALL(vkCmdBindDescriptorSets(list->vk_command_buffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR,
                list->rt_state->local_static_sampler.pipeline_layout,
                list->rt_state->local_static_sampler.set_index,
                1, &list->rt_state->local_static_sampler.desc_set,
                0, NULL));
    }

    return true;
}

static void d3d12_command_list_update_dynamic_state(struct d3d12_command_list *list)
{
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
    struct vkd3d_dynamic_state *dyn_state = &list->dynamic_state;
    const uint32_t *stride_align_masks;
    struct vkd3d_bitmask_range range;
    uint32_t update_vbos;
    unsigned int i;

    /* Make sure we only update states that are dynamic in the pipeline */
    dyn_state->dirty_flags &= list->dynamic_state.active_flags;

    if (dyn_state->viewport_count)
    {
        if (dyn_state->dirty_flags & VKD3D_DYNAMIC_STATE_VIEWPORT)
        {
            VK_CALL(vkCmdSetViewportWithCountEXT(list->vk_command_buffer,
                    dyn_state->viewport_count, dyn_state->viewports));
        }

        if (dyn_state->dirty_flags & VKD3D_DYNAMIC_STATE_SCISSOR)
        {
            VK_CALL(vkCmdSetScissorWithCountEXT(list->vk_command_buffer,
                    dyn_state->viewport_count, dyn_state->scissors));
        }
    }
    else
    {
        /* Zero viewports disables rasterization. Emit dummy viewport / scissor rects.
         * For non-dynamic fallbacks, we force viewportCount to be at least 1. */
        static const VkViewport dummy_vp = { 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f };
        static const VkRect2D dummy_rect = { { 0, 0 }, { 0, 0 } };

        if (dyn_state->dirty_flags & VKD3D_DYNAMIC_STATE_VIEWPORT)
            VK_CALL(vkCmdSetViewportWithCountEXT(list->vk_command_buffer, 1, &dummy_vp));
        if (dyn_state->dirty_flags & VKD3D_DYNAMIC_STATE_SCISSOR)
            VK_CALL(vkCmdSetScissorWithCountEXT(list->vk_command_buffer, 1, &dummy_rect));
    }

    if (dyn_state->dirty_flags & VKD3D_DYNAMIC_STATE_BLEND_CONSTANTS)
    {
        VK_CALL(vkCmdSetBlendConstants(list->vk_command_buffer,
                dyn_state->blend_constants));
    }

    if (dyn_state->dirty_flags & VKD3D_DYNAMIC_STATE_STENCIL_REFERENCE)
    {
        VK_CALL(vkCmdSetStencilReference(list->vk_command_buffer,
                VK_STENCIL_FRONT_AND_BACK, dyn_state->stencil_reference));
    }

    if (dyn_state->dirty_flags & VKD3D_DYNAMIC_STATE_DEPTH_BOUNDS)
    {
        VK_CALL(vkCmdSetDepthBounds(list->vk_command_buffer,
                dyn_state->min_depth_bounds, dyn_state->max_depth_bounds));
    }

    if (dyn_state->dirty_flags & VKD3D_DYNAMIC_STATE_TOPOLOGY)
    {
        VK_CALL(vkCmdSetPrimitiveTopologyEXT(list->vk_command_buffer,
                dyn_state->vk_primitive_topology));
    }

    if (dyn_state->dirty_flags & VKD3D_DYNAMIC_STATE_PRIMITIVE_RESTART)
    {
        /* The primitive restart dynamic state is only present if the PSO
         * has a strip cut value, so we only need to check if the
         * current primitive topology is a strip type. */
        VK_CALL(vkCmdSetPrimitiveRestartEnableEXT(list->vk_command_buffer,
                vk_primitive_topology_supports_restart(dyn_state->vk_primitive_topology)));
    }

    if (dyn_state->dirty_flags & VKD3D_DYNAMIC_STATE_VERTEX_BUFFER_STRIDE)
    {
        update_vbos = (dyn_state->dirty_vbos | dyn_state->dirty_vbo_strides) & list->state->graphics.vertex_buffer_mask;
        dyn_state->dirty_vbos &= ~update_vbos;
        dyn_state->dirty_vbo_strides &= ~update_vbos;
        stride_align_masks = list->state->graphics.vertex_buffer_stride_align_mask;

        while (update_vbos)
        {
            range = vkd3d_bitmask_iter32_range(&update_vbos);

            for (i = 0; i < range.count; i++)
            {
                if (dyn_state->vertex_offsets[i + range.offset] & stride_align_masks[i + range.offset])
                {
                    FIXME("Binding VBO at offset %"PRIu64", but required alignment is %u.\n",
                          dyn_state->vertex_offsets[i + range.offset],
                          stride_align_masks[i + range.offset] + 1);

                    /* This modifies global state, but if app hits this, it's already buggy. */
                    dyn_state->vertex_offsets[i + range.offset] &= ~(VkDeviceSize)stride_align_masks[i + range.offset];
                }

                if (dyn_state->vertex_strides[i + range.offset] & stride_align_masks[i + range.offset])
                {
                    FIXME("Binding VBO with stride %"PRIu64", but required alignment is %u.\n",
                          dyn_state->vertex_strides[i + range.offset],
                          stride_align_masks[i + range.offset] + 1);

                    /* This modifies global state, but if app hits this, it's already buggy.
                     * Round up, so that we don't hit offset > size case with dynamic strides. */
                    dyn_state->vertex_strides[i + range.offset] =
                            (dyn_state->vertex_strides[i + range.offset] + stride_align_masks[i + range.offset]) &
                            ~(VkDeviceSize)stride_align_masks[i + range.offset];
                }
            }

            VK_CALL(vkCmdBindVertexBuffers2EXT(list->vk_command_buffer,
                    range.offset, range.count,
                    dyn_state->vertex_buffers + range.offset,
                    dyn_state->vertex_offsets + range.offset,
                    dyn_state->vertex_sizes + range.offset,
                    dyn_state->vertex_strides + range.offset));
        }
    }
    else if (dyn_state->dirty_flags & VKD3D_DYNAMIC_STATE_VERTEX_BUFFER)
    {
        update_vbos = dyn_state->dirty_vbos & list->state->graphics.vertex_buffer_mask;
        dyn_state->dirty_vbos &= ~update_vbos;
        dyn_state->dirty_vbo_strides &= ~update_vbos;
        stride_align_masks = list->state->graphics.vertex_buffer_stride_align_mask;

        while (update_vbos)
        {
            range = vkd3d_bitmask_iter32_range(&update_vbos);

            for (i = 0; i < range.count; i++)
            {
                if (dyn_state->vertex_offsets[i + range.offset] & stride_align_masks[i + range.offset])
                {
                    FIXME("Binding VBO at offset %"PRIu64", but required alignment is %u.\n",
                          dyn_state->vertex_offsets[i + range.offset],
                          stride_align_masks[i + range.offset] + 1);
                    dyn_state->vertex_offsets[i + range.offset] &= ~(VkDeviceSize)stride_align_masks[i + range.offset];
                }

                if (dyn_state->vertex_strides[i + range.offset] & stride_align_masks[i + range.offset])
                {
                    FIXME("Binding VBO with stride %"PRIu64", but required alignment is %u.\n",
                            dyn_state->vertex_strides[i + range.offset],
                            stride_align_masks[i + range.offset] + 1);

                    /* This modifies global state, but if app hits this, it's already buggy.
                     * Round up, so that we don't hit offset > size case with dynamic strides. */
                    dyn_state->vertex_strides[i + range.offset] =
                            (dyn_state->vertex_strides[i + range.offset] + stride_align_masks[i + range.offset]) &
                                    ~(VkDeviceSize)stride_align_masks[i + range.offset];
                }
            }

            VK_CALL(vkCmdBindVertexBuffers2EXT(list->vk_command_buffer,
                    range.offset, range.count,
                    dyn_state->vertex_buffers + range.offset,
                    dyn_state->vertex_offsets + range.offset,
                    dyn_state->vertex_sizes + range.offset,
                    NULL));
        }
    }

    if (dyn_state->dirty_flags & VKD3D_DYNAMIC_STATE_FRAGMENT_SHADING_RATE)
    {
        VK_CALL(vkCmdSetFragmentShadingRateKHR(list->vk_command_buffer,
                &dyn_state->fragment_shading_rate.fragment_size,
                dyn_state->fragment_shading_rate.combiner_ops));
    }

    dyn_state->dirty_flags = 0;
}

static void d3d12_command_list_promote_dsv_layout(struct d3d12_command_list *list)
{
    /* If we know at this point that the image is DSV optimal in some way, promote the layout
     * so that we can select the appropriate render pass right away and ignore any
     * read-state shenanigans. If we cannot promote yet, the pipeline will override dsv_layout as required
     * by write enable bits. */
    if (list->dsv_layout == VK_IMAGE_LAYOUT_UNDEFINED &&
            list->state &&
            d3d12_command_list_has_depth_stencil_view(list) &&
            list->dsv.resource)
    {
        list->dsv_layout = d3d12_command_list_get_depth_stencil_resource_layout(list, list->dsv.resource,
                &list->dsv_plane_optimal_mask);
    }
}

static bool d3d12_command_list_begin_render_pass(struct d3d12_command_list *list)
{
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
    struct d3d12_graphics_pipeline_state *graphics;

    d3d12_command_list_end_transfer_batch(list);

    d3d12_command_list_promote_dsv_layout(list);
    if (!d3d12_command_list_update_graphics_pipeline(list))
        return false;
    if (!d3d12_command_list_update_rendering_info(list))
        return false;

    if (list->dynamic_state.dirty_flags)
        d3d12_command_list_update_dynamic_state(list);

    d3d12_command_list_update_descriptors(list, VK_PIPELINE_BIND_POINT_GRAPHICS);

    if (list->rendering_info.state_flags & VKD3D_RENDERING_ACTIVE)
    {
        d3d12_command_list_handle_active_queries(list, false);
        return true;
    }

    if (!(list->rendering_info.state_flags & VKD3D_RENDERING_SUSPENDED))
        d3d12_command_list_emit_render_pass_transition(list, VKD3D_RENDER_PASS_TRANSITION_MODE_BEGIN);

    VK_CALL(vkCmdBeginRenderingKHR(list->vk_command_buffer, &list->rendering_info.info));

    list->rendering_info.state_flags |= VKD3D_RENDERING_ACTIVE;
    list->rendering_info.state_flags &= ~VKD3D_RENDERING_SUSPENDED;

    graphics = &list->state->graphics;
    if (graphics->xfb_enabled)
    {
        VK_CALL(vkCmdBeginTransformFeedbackEXT(list->vk_command_buffer, 0, ARRAY_SIZE(list->so_counter_buffers),
                list->so_counter_buffers, list->so_counter_buffer_offsets));

        list->xfb_enabled = true;
    }

    d3d12_command_list_handle_active_queries(list, false);
    return true;
}

static void d3d12_command_list_check_index_buffer_strip_cut_value(struct d3d12_command_list *list)
{
    struct d3d12_graphics_pipeline_state *graphics = &list->state->graphics;
    if (TRACE_ON())
    {
        /* In Vulkan, the strip cut value is derived from the index buffer format. */
        switch (graphics->index_buffer_strip_cut_value)
        {
        case D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF:
            if (list->index_buffer.dxgi_format != DXGI_FORMAT_R16_UINT)
            {
                TRACE("Strip cut value 0xffff is not supported with index buffer format %#x.\n",
                      list->index_buffer.dxgi_format);
            }
            break;

        case D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF:
            if (list->index_buffer.dxgi_format != DXGI_FORMAT_R32_UINT)
            {
                TRACE("Strip cut value 0xffffffff is not supported with index buffer format %#x.\n",
                      list->index_buffer.dxgi_format);
            }
            break;

        default:
            break;
        }
    }
}

static bool d3d12_command_list_emit_predicated_command(struct d3d12_command_list *list,
        enum vkd3d_predicate_command_type command_type, VkDeviceAddress indirect_args,
        const union vkd3d_predicate_command_direct_args *direct_args, struct vkd3d_scratch_allocation *scratch)
{
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
    struct vkd3d_predicate_command_info pipeline_info;
    struct vkd3d_predicate_command_args args;
    VkMemoryBarrier vk_barrier;

    vkd3d_meta_get_predicate_pipeline(&list->device->meta_ops, command_type, &pipeline_info);

    if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator,
            VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE,
            pipeline_info.data_size, sizeof(uint32_t), ~0u, scratch))
        return false;

    d3d12_command_list_end_current_render_pass(list, true);

    d3d12_command_list_invalidate_current_pipeline(list, true);
    d3d12_command_list_invalidate_root_parameters(list, VK_PIPELINE_BIND_POINT_COMPUTE, true);

    args.predicate_va = list->predicate_va;
    args.dst_arg_va = scratch->va;
    args.src_arg_va = indirect_args;
    args.args = *direct_args;

    VK_CALL(vkCmdBindPipeline(list->vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
            pipeline_info.vk_pipeline));
    VK_CALL(vkCmdPushConstants(list->vk_command_buffer,
            pipeline_info.vk_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT,
            0, sizeof(args), &args));
    VK_CALL(vkCmdDispatch(list->vk_command_buffer, 1, 1, 1));

    vk_barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
    vk_barrier.pNext = NULL;
    vk_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
    vk_barrier.dstAccessMask = VK_ACCESS_INDIRECT_COMMAND_READ_BIT;

    VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer,
            VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
            VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT,
            0, 1, &vk_barrier, 0, NULL, 0, NULL));
    return true;
}

static void STDMETHODCALLTYPE d3d12_command_list_DrawInstanced(d3d12_command_list_iface *iface,
        UINT vertex_count_per_instance, UINT instance_count, UINT start_vertex_location,
        UINT start_instance_location)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
    struct vkd3d_scratch_allocation scratch;

    TRACE("iface %p, vertex_count_per_instance %u, instance_count %u, "
            "start_vertex_location %u, start_instance_location %u.\n",
            iface, vertex_count_per_instance, instance_count,
            start_vertex_location, start_instance_location);

    if (list->predicate_va)
    {
        union vkd3d_predicate_command_direct_args args;
        args.draw.vertexCount = vertex_count_per_instance;
        args.draw.instanceCount = instance_count;
        args.draw.firstVertex = start_vertex_location;
        args.draw.firstInstance = start_instance_location;

        if (!d3d12_command_list_emit_predicated_command(list, VKD3D_PREDICATE_COMMAND_DRAW, 0, &args, &scratch))
            return;
    }

    if (!d3d12_command_list_begin_render_pass(list))
    {
        WARN("Failed to begin render pass, ignoring draw call.\n");
        return;
    }

    if (!list->predicate_va)
        VK_CALL(vkCmdDraw(list->vk_command_buffer, vertex_count_per_instance,
                instance_count, start_vertex_location, start_instance_location));
    else
        VK_CALL(vkCmdDrawIndirect(list->vk_command_buffer, scratch.buffer, scratch.offset, 1, 0));

    VKD3D_BREADCRUMB_AUX32(vertex_count_per_instance);
    VKD3D_BREADCRUMB_AUX32(instance_count);
    VKD3D_BREADCRUMB_AUX32(start_vertex_location);
    VKD3D_BREADCRUMB_AUX32(start_instance_location);
    VKD3D_BREADCRUMB_COMMAND(DRAW);
}

static bool d3d12_command_list_update_index_buffer(struct d3d12_command_list *list)
{
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;

    if (!list->index_buffer.buffer)
    {
        FIXME_ONCE("Application attempts to perform an indexed draw call without index buffer bound.\n");
        /* We are supposed to render all 0 indices here. However, there are several problems with emulating this approach.
         * There is no robustness support for index buffers, and if we render all 0 indices,
         * it is extremely unlikely that this would create a meaningful side effect.
         * For any line or triangle primitive, we would end up creating degenerates for every primitive.
         * The only reasonable scenarios where we will observe anything is stream-out with all duplicate values, or
         * geometry shaders where the application makes use of PrimitiveID to construct primitives.
         * Until proven to be required otherwise, we just ignore the draw call. */
        return false;
    }

    if (list->index_buffer.is_dirty)
    {
        VK_CALL(vkCmdBindIndexBuffer(list->vk_command_buffer, list->index_buffer.buffer,
                list->index_buffer.offset, list->index_buffer.vk_type));
        list->index_buffer.is_dirty = false;
    }

    return true;
}

static void STDMETHODCALLTYPE d3d12_command_list_DrawIndexedInstanced(d3d12_command_list_iface *iface,
        UINT index_count_per_instance, UINT instance_count, UINT start_vertex_location,
        INT base_vertex_location, UINT start_instance_location)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
    struct vkd3d_scratch_allocation scratch;

    TRACE("iface %p, index_count_per_instance %u, instance_count %u, start_vertex_location %u, "
            "base_vertex_location %d, start_instance_location %u.\n",
            iface, index_count_per_instance, instance_count, start_vertex_location,
            base_vertex_location, start_instance_location);

    if (!d3d12_command_list_update_index_buffer(list))
        return;

    if (list->predicate_va)
    {
        union vkd3d_predicate_command_direct_args args;
        args.draw_indexed.indexCount = index_count_per_instance;
        args.draw_indexed.instanceCount = instance_count;
        args.draw_indexed.firstIndex = start_vertex_location;
        args.draw_indexed.vertexOffset = base_vertex_location;
        args.draw_indexed.firstInstance = start_instance_location;

        if (!d3d12_command_list_emit_predicated_command(list, VKD3D_PREDICATE_COMMAND_DRAW_INDEXED, 0, &args, &scratch))
            return;
    }

    if (!d3d12_command_list_begin_render_pass(list))
    {
        WARN("Failed to begin render pass, ignoring draw call.\n");
        return;
    }

    d3d12_command_list_check_index_buffer_strip_cut_value(list);

    if (!list->predicate_va)
        VK_CALL(vkCmdDrawIndexed(list->vk_command_buffer, index_count_per_instance,
                instance_count, start_vertex_location, base_vertex_location, start_instance_location));
    else
        VK_CALL(vkCmdDrawIndexedIndirect(list->vk_command_buffer, scratch.buffer, scratch.offset, 1, 0));

    VKD3D_BREADCRUMB_AUX32(index_count_per_instance);
    VKD3D_BREADCRUMB_AUX32(instance_count);
    VKD3D_BREADCRUMB_AUX32(start_vertex_location);
    VKD3D_BREADCRUMB_AUX32(base_vertex_location);
    VKD3D_BREADCRUMB_AUX32(start_instance_location);
    VKD3D_BREADCRUMB_COMMAND(DRAW_INDEXED);
}

static void STDMETHODCALLTYPE d3d12_command_list_Dispatch(d3d12_command_list_iface *iface,
        UINT x, UINT y, UINT z)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
    struct vkd3d_scratch_allocation scratch;

    TRACE("iface %p, x %u, y %u, z %u.\n", iface, x, y, z);

    if (list->predicate_va)
    {
        union vkd3d_predicate_command_direct_args args;
        args.dispatch.x = x;
        args.dispatch.y = y;
        args.dispatch.z = z;

        if (!d3d12_command_list_emit_predicated_command(list, VKD3D_PREDICATE_COMMAND_DISPATCH, 0, &args, &scratch))
            return;
    }

    d3d12_command_list_end_transfer_batch(list);

    if (!d3d12_command_list_update_compute_state(list))
    {
        WARN("Failed to update compute state, ignoring dispatch.\n");
        return;
    }

    if (!list->predicate_va)
        VK_CALL(vkCmdDispatch(list->vk_command_buffer, x, y, z));
    else
        VK_CALL(vkCmdDispatchIndirect(list->vk_command_buffer, scratch.buffer, scratch.offset));

    VKD3D_BREADCRUMB_AUX32(x);
    VKD3D_BREADCRUMB_AUX32(y);
    VKD3D_BREADCRUMB_AUX32(z);
    VKD3D_BREADCRUMB_COMMAND(DISPATCH);
}

static void STDMETHODCALLTYPE d3d12_command_list_CopyBufferRegion(d3d12_command_list_iface *iface,
        ID3D12Resource *dst, UINT64 dst_offset, ID3D12Resource *src, UINT64 src_offset, UINT64 byte_count)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    struct d3d12_resource *dst_resource, *src_resource;
    const struct vkd3d_vk_device_procs *vk_procs;
    VkCopyBufferInfo2KHR copy_info;
    VkBufferCopy2KHR buffer_copy;

    TRACE("iface %p, dst_resource %p, dst_offset %#"PRIx64", src_resource %p, "
            "src_offset %#"PRIx64", byte_count %#"PRIx64".\n",
            iface, dst, dst_offset, src, src_offset, byte_count);

    vk_procs = &list->device->vk_procs;

    dst_resource = impl_from_ID3D12Resource(dst);
    assert(d3d12_resource_is_buffer(dst_resource));
    src_resource = impl_from_ID3D12Resource(src);
    assert(d3d12_resource_is_buffer(src_resource));

    d3d12_command_list_track_resource_usage(list, dst_resource, true);
    d3d12_command_list_track_resource_usage(list, src_resource, true);

    d3d12_command_list_end_current_render_pass(list, true);
    d3d12_command_list_end_transfer_batch(list);

    buffer_copy.sType = VK_STRUCTURE_TYPE_BUFFER_COPY_2_KHR;
    buffer_copy.pNext = NULL;
    buffer_copy.srcOffset = src_offset + src_resource->mem.offset;
    buffer_copy.dstOffset = dst_offset + dst_resource->mem.offset;
    buffer_copy.size = byte_count;

    copy_info.sType = VK_STRUCTURE_TYPE_COPY_BUFFER_INFO_2_KHR;
    copy_info.pNext = NULL;
    copy_info.srcBuffer = src_resource->res.vk_buffer;
    copy_info.dstBuffer = dst_resource->res.vk_buffer;
    copy_info.regionCount = 1;
    copy_info.pRegions = &buffer_copy;

    d3d12_command_list_mark_copy_buffer_write(list, copy_info.dstBuffer, buffer_copy.dstOffset, buffer_copy.size,
            !!(dst_resource->flags & VKD3D_RESOURCE_RESERVED));
    VK_CALL(vkCmdCopyBuffer2KHR(list->vk_command_buffer, &copy_info));

    VKD3D_BREADCRUMB_COMMAND(COPY);
}

static void vk_image_subresource_layers_from_d3d12(VkImageSubresourceLayers *subresource,
        const struct vkd3d_format *format, unsigned int sub_resource_idx,
        unsigned int miplevel_count, unsigned int layer_count)
{
    VkImageSubresource sub = vk_image_subresource_from_d3d12(
            format, sub_resource_idx, miplevel_count, layer_count, false);

    subresource->aspectMask = sub.aspectMask;
    subresource->mipLevel = sub.mipLevel;
    subresource->baseArrayLayer = sub.arrayLayer;
    subresource->layerCount = 1;
}

static void vk_extent_3d_from_d3d12_miplevel(VkExtent3D *extent,
        const D3D12_RESOURCE_DESC1 *resource_desc, unsigned int miplevel_idx)
{
    extent->width = d3d12_resource_desc_get_width(resource_desc, miplevel_idx);
    extent->height = d3d12_resource_desc_get_height(resource_desc, miplevel_idx);
    extent->depth = d3d12_resource_desc_get_depth(resource_desc, miplevel_idx);
}

static void vk_buffer_image_copy_from_d3d12(VkBufferImageCopy2KHR *copy,
        const D3D12_PLACED_SUBRESOURCE_FOOTPRINT *footprint, unsigned int sub_resource_idx,
        const D3D12_RESOURCE_DESC1 *image_desc, const struct vkd3d_format *src_format,
        const struct vkd3d_format *dst_format, const D3D12_BOX *src_box, unsigned int dst_x,
        unsigned int dst_y, unsigned int dst_z)
{
    copy->bufferOffset = footprint->Offset;
    if (src_box)
    {
        VkDeviceSize row_count = footprint->Footprint.Height / src_format->block_height;
        copy->bufferOffset += vkd3d_format_get_data_offset(src_format, footprint->Footprint.RowPitch,
                row_count * footprint->Footprint.RowPitch, src_box->left, src_box->top, src_box->front);
    }
    copy->bufferRowLength = footprint->Footprint.RowPitch /
            (src_format->byte_count * src_format->block_byte_count) * src_format->block_width;
    copy->bufferImageHeight = footprint->Footprint.Height;
    vk_image_subresource_layers_from_d3d12(&copy->imageSubresource,
            dst_format, sub_resource_idx, image_desc->MipLevels,
            d3d12_resource_desc_get_layer_count(image_desc));
    copy->imageOffset.x = dst_x;
    copy->imageOffset.y = dst_y;
    copy->imageOffset.z = dst_z;

    vk_extent_3d_from_d3d12_miplevel(&copy->imageExtent, image_desc,
            copy->imageSubresource.mipLevel);
    copy->imageExtent.width -= copy->imageOffset.x;
    copy->imageExtent.height -= copy->imageOffset.y;
    copy->imageExtent.depth -= copy->imageOffset.z;

    if (src_box)
    {
        copy->imageExtent.width = min(copy->imageExtent.width, src_box->right - src_box->left);
        copy->imageExtent.height = min(copy->imageExtent.height, src_box->bottom - src_box->top);
        copy->imageExtent.depth = min(copy->imageExtent.depth, src_box->back - src_box->front);
    }
    else
    {
        copy->imageExtent.width = min(copy->imageExtent.width, footprint->Footprint.Width);
        copy->imageExtent.height = min(copy->imageExtent.height, footprint->Footprint.Height);
        copy->imageExtent.depth = min(copy->imageExtent.depth, footprint->Footprint.Depth);
    }
}

static void vk_image_buffer_copy_from_d3d12(VkBufferImageCopy2KHR *copy,
        const D3D12_PLACED_SUBRESOURCE_FOOTPRINT *footprint, unsigned int sub_resource_idx,
        const D3D12_RESOURCE_DESC1 *image_desc,
        const struct vkd3d_format *src_format, const struct vkd3d_format *dst_format,
        const D3D12_BOX *src_box, unsigned int dst_x, unsigned int dst_y, unsigned int dst_z)
{
    VkDeviceSize row_count = footprint->Footprint.Height / dst_format->block_height;

    copy->bufferOffset = footprint->Offset + vkd3d_format_get_data_offset(dst_format,
            footprint->Footprint.RowPitch, row_count * footprint->Footprint.RowPitch, dst_x, dst_y, dst_z);
    copy->bufferRowLength = footprint->Footprint.RowPitch /
            (dst_format->byte_count * dst_format->block_byte_count) * dst_format->block_width;
    copy->bufferImageHeight = footprint->Footprint.Height;
    vk_image_subresource_layers_from_d3d12(&copy->imageSubresource,
            src_format, sub_resource_idx, image_desc->MipLevels,
            d3d12_resource_desc_get_layer_count(image_desc));
    copy->imageOffset.x = src_box ? src_box->left : 0;
    copy->imageOffset.y = src_box ? src_box->top : 0;
    copy->imageOffset.z = src_box ? src_box->front : 0;
    if (src_box)
    {
        copy->imageExtent.width = src_box->right - src_box->left;
        copy->imageExtent.height = src_box->bottom - src_box->top;
        copy->imageExtent.depth = src_box->back - src_box->front;
    }
    else
    {
        unsigned int miplevel = copy->imageSubresource.mipLevel;
        vk_extent_3d_from_d3d12_miplevel(&copy->imageExtent, image_desc, miplevel);
    }
}

static void vk_image_copy_from_d3d12(VkImageCopy2KHR *image_copy,
        unsigned int src_sub_resource_idx, unsigned int dst_sub_resource_idx,
        const D3D12_RESOURCE_DESC1 *src_desc, const D3D12_RESOURCE_DESC1 *dst_desc,
        const struct vkd3d_format *src_format, const struct vkd3d_format *dst_format,
        const D3D12_BOX *src_box, unsigned int dst_x, unsigned int dst_y, unsigned int dst_z)
{
    vk_image_subresource_layers_from_d3d12(&image_copy->srcSubresource,
            src_format, src_sub_resource_idx, src_desc->MipLevels,
            d3d12_resource_desc_get_layer_count(src_desc));
    image_copy->sType = VK_STRUCTURE_TYPE_IMAGE_COPY_2_KHR;
    image_copy->pNext = NULL;
    image_copy->srcOffset.x = src_box ? src_box->left : 0;
    image_copy->srcOffset.y = src_box ? src_box->top : 0;
    image_copy->srcOffset.z = src_box ? src_box->front : 0;
    vk_image_subresource_layers_from_d3d12(&image_copy->dstSubresource,
            dst_format, dst_sub_resource_idx, dst_desc->MipLevels,
            d3d12_resource_desc_get_layer_count(dst_desc));
    image_copy->dstOffset.x = dst_x;
    image_copy->dstOffset.y = dst_y;
    image_copy->dstOffset.z = dst_z;
    if (src_box)
    {
        image_copy->extent.width = src_box->right - src_box->left;
        image_copy->extent.height = src_box->bottom - src_box->top;
        image_copy->extent.depth = src_box->back - src_box->front;
    }
    else
    {
        VkExtent3D srcExtent, dstExtent;
        vk_extent_3d_from_d3d12_miplevel(&srcExtent, src_desc, image_copy->srcSubresource.mipLevel);
        vk_extent_3d_from_d3d12_miplevel(&dstExtent, dst_desc, image_copy->dstSubresource.mipLevel);
        image_copy->extent.width = min(dst_x + srcExtent.width, dstExtent.width) - dst_x;
        image_copy->extent.height = min(dst_y + srcExtent.height, dstExtent.height) - dst_y;
        image_copy->extent.depth = min(dst_z + srcExtent.depth, dstExtent.depth) - dst_z;
    }
}

static void d3d12_command_list_copy_image(struct d3d12_command_list *list,
        struct d3d12_resource *dst_resource, const struct vkd3d_format *dst_format,
        struct d3d12_resource *src_resource, const struct vkd3d_format *src_format,
        const VkImageCopy2KHR *region, bool writes_full_subresource, bool overlapping_subresource)
{
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
    struct vkd3d_texture_view_desc dst_view_desc, src_view_desc;
    struct vkd3d_copy_image_pipeline_key pipeline_key;
    VkRenderingAttachmentInfoKHR attachment_info;
    VkPipelineStageFlags src_stages, dst_stages;
    struct vkd3d_copy_image_info pipeline_info;
    VkImageMemoryBarrier vk_image_barriers[2];
    VkWriteDescriptorSet vk_descriptor_write;
    struct vkd3d_copy_image_args push_args;
    struct vkd3d_view *dst_view, *src_view;
    VkAccessFlags src_access, dst_access;
    VkImageLayout src_layout, dst_layout;
    bool dst_is_depth_stencil, use_copy;
    VkDescriptorImageInfo vk_image_info;
    VkDescriptorSet vk_descriptor_set;
    VkRenderingInfoKHR rendering_info;
    VkCopyImageInfo2KHR copy_info;
    VkViewport viewport;
    unsigned int i;
    HRESULT hr;

    use_copy = dst_format->vk_aspect_mask == src_format->vk_aspect_mask;
    dst_is_depth_stencil = !!(dst_format->vk_aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT));

    if (use_copy)
    {
        if (overlapping_subresource)
        {
            src_layout = VK_IMAGE_LAYOUT_GENERAL;
            dst_layout = VK_IMAGE_LAYOUT_GENERAL;
        }
        else
        {
            src_layout = d3d12_resource_pick_layout(src_resource, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
            dst_layout = d3d12_resource_pick_layout(dst_resource, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
        }

        src_stages = VK_PIPELINE_STAGE_TRANSFER_BIT;
        dst_stages = VK_PIPELINE_STAGE_TRANSFER_BIT;
        src_access = VK_ACCESS_TRANSFER_READ_BIT;
        dst_access = VK_ACCESS_TRANSFER_WRITE_BIT;
    }
    else
    {
        src_layout = d3d12_resource_pick_layout(src_resource, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
        src_stages = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
        src_access = VK_ACCESS_SHADER_READ_BIT;

        if (dst_is_depth_stencil)
        {
            /* We will only promote one aspect out of common layout. */
            if (region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT)
            {
                dst_layout = dst_resource->common_layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ?
                        VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL
                        : VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL;
            }
            else if (region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT)
            {
                dst_layout = dst_resource->common_layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ?
                        VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL
                        : VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL;
            }
            else
                dst_layout = d3d12_resource_pick_layout(dst_resource, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);

            dst_stages = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
            dst_access = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
        }
        else
        {
            dst_layout = d3d12_resource_pick_layout(dst_resource, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
            dst_stages = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
            dst_access = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
        }
    }

    for (i = 0; i < ARRAY_SIZE(vk_image_barriers); i++)
    {
        vk_image_barriers[i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
        vk_image_barriers[i].pNext = NULL;
        vk_image_barriers[i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
        vk_image_barriers[i].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
    }

    vk_image_barriers[0].srcAccessMask = 0;
    vk_image_barriers[0].dstAccessMask = dst_access;
    /* Fully writing a subresource with a copy is a valid way to use the "advanced" aliasing model of D3D12.
     * In this model, a complete Copy command is sufficient to activate an aliased resource.
     * This is also an optimization, since we can avoid a potential decompress when entering TRANSFER_DST layout. */
    vk_image_barriers[0].oldLayout = writes_full_subresource ? VK_IMAGE_LAYOUT_UNDEFINED : dst_resource->common_layout;
    vk_image_barriers[0].newLayout = dst_layout;
    vk_image_barriers[0].image = dst_resource->res.vk_image;
    vk_image_barriers[0].subresourceRange = vk_subresource_range_from_layers(&region->dstSubresource);

    if (overlapping_subresource)
        vk_image_barriers[0].dstAccessMask |= src_access;

    vk_image_barriers[1].srcAccessMask = 0;
    vk_image_barriers[1].dstAccessMask = src_access;
    vk_image_barriers[1].oldLayout = src_resource->common_layout;
    vk_image_barriers[1].newLayout = src_layout;
    vk_image_barriers[1].image = src_resource->res.vk_image;
    vk_image_barriers[1].subresourceRange = vk_subresource_range_from_layers(&region->srcSubresource);

    VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer,
            VK_PIPELINE_STAGE_TRANSFER_BIT, src_stages | dst_stages,
            0, 0, NULL, 0, NULL, overlapping_subresource ? 1 : ARRAY_SIZE(vk_image_barriers),
            vk_image_barriers));

    if (use_copy)
    {
        copy_info.sType = VK_STRUCTURE_TYPE_COPY_IMAGE_INFO_2_KHR;
        copy_info.pNext = NULL;
        copy_info.srcImage = src_resource->res.vk_image;
        copy_info.srcImageLayout = src_layout;
        copy_info.dstImage = dst_resource->res.vk_image;
        copy_info.dstImageLayout = dst_layout;
        copy_info.regionCount = 1;
        copy_info.pRegions = region;

        VK_CALL(vkCmdCopyImage2KHR(list->vk_command_buffer, &copy_info));
    }
    else
    {
        dst_view = src_view = NULL;

        if (!(dst_format = vkd3d_meta_get_copy_image_attachment_format(&list->device->meta_ops, dst_format, src_format,
                region->dstSubresource.aspectMask,
                region->srcSubresource.aspectMask)))
        {
            ERR("No attachment format found for source format %u.\n", src_format->vk_format);
            goto cleanup;
        }

        memset(&pipeline_key, 0, sizeof(pipeline_key));
        pipeline_key.format = dst_format;
        pipeline_key.view_type = vkd3d_meta_get_copy_image_view_type(dst_resource->desc.Dimension);
        pipeline_key.sample_count = vk_samples_from_dxgi_sample_desc(&dst_resource->desc.SampleDesc);
        pipeline_key.dst_aspect_mask = region->dstSubresource.aspectMask;

        if (FAILED(hr = vkd3d_meta_get_copy_image_pipeline(&list->device->meta_ops, &pipeline_key, &pipeline_info)))
        {
            ERR("Failed to obtain pipeline, format %u, view_type %u, sample_count %u.\n",
                    pipeline_key.format->vk_format, pipeline_key.view_type, pipeline_key.sample_count);
            goto cleanup;
        }

        d3d12_command_list_invalidate_current_pipeline(list, true);
        d3d12_command_list_invalidate_root_parameters(list, VK_PIPELINE_BIND_POINT_GRAPHICS, true);

        memset(&dst_view_desc, 0, sizeof(dst_view_desc));
        dst_view_desc.image = dst_resource->res.vk_image;
        dst_view_desc.view_type = pipeline_key.view_type;
        dst_view_desc.format = dst_format;
        dst_view_desc.miplevel_idx = region->dstSubresource.mipLevel;
        dst_view_desc.miplevel_count = 1;
        dst_view_desc.layer_idx = region->dstSubresource.baseArrayLayer;
        dst_view_desc.layer_count = region->dstSubresource.layerCount;
        /* A render pass must cover all depth-stencil aspects. */
        dst_view_desc.aspect_mask = dst_resource->format->vk_aspect_mask;
        dst_view_desc.image_usage = (pipeline_key.dst_aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) ?
                VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT : VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
        dst_view_desc.allowed_swizzle = false;

        memset(&src_view_desc, 0, sizeof(src_view_desc));
        src_view_desc.image = src_resource->res.vk_image;
        src_view_desc.view_type = pipeline_key.view_type;
        src_view_desc.format = src_format;
        src_view_desc.miplevel_idx = region->srcSubresource.mipLevel;
        src_view_desc.miplevel_count = 1;
        src_view_desc.layer_idx = region->srcSubresource.baseArrayLayer;
        src_view_desc.layer_count = region->srcSubresource.layerCount;
        src_view_desc.aspect_mask = region->srcSubresource.aspectMask;
        src_view_desc.image_usage = VK_IMAGE_USAGE_SAMPLED_BIT;
        src_view_desc.allowed_swizzle = false;

        if (!vkd3d_create_texture_view(list->device, &dst_view_desc, &dst_view) ||
                !vkd3d_create_texture_view(list->device, &src_view_desc, &src_view))
        {
            ERR("Failed to create image views.\n");
            goto cleanup;
        }

        if (!d3d12_command_allocator_add_view(list->allocator, dst_view) ||
                !d3d12_command_allocator_add_view(list->allocator, src_view))
        {
            ERR("Failed to add views.\n");
            goto cleanup;
        }

        memset(&attachment_info, 0, sizeof(attachment_info));
        attachment_info.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO_KHR;
        attachment_info.imageView = dst_view->vk_image_view;
        attachment_info.imageLayout = dst_layout;
        attachment_info.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
        attachment_info.storeOp = VK_ATTACHMENT_STORE_OP_STORE;

        memset(&rendering_info, 0, sizeof(rendering_info));
        rendering_info.sType = VK_STRUCTURE_TYPE_RENDERING_INFO_KHR;
        rendering_info.renderArea.offset.x = region->dstOffset.x;
        rendering_info.renderArea.offset.y = region->dstOffset.y;
        rendering_info.renderArea.extent.width = region->extent.width;
        rendering_info.renderArea.extent.height = region->extent.height;
        rendering_info.layerCount = dst_view_desc.layer_count;

        if (dst_is_depth_stencil)
        {
            if (dst_format->vk_aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT)
                rendering_info.pDepthAttachment = &attachment_info;
            if (dst_format->vk_aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT)
                rendering_info.pStencilAttachment = &attachment_info;
        }
        else
        {
            rendering_info.colorAttachmentCount = 1;
            rendering_info.pColorAttachments = &attachment_info;
        }

        viewport.x = (float)region->dstOffset.x;
        viewport.y = (float)region->dstOffset.y;
        viewport.width = (float)region->extent.width;
        viewport.height = (float)region->extent.height;
        viewport.minDepth = 0.0f;
        viewport.maxDepth = 1.0f;

        push_args.offset.x = region->srcOffset.x - region->dstOffset.x;
        push_args.offset.y = region->srcOffset.y - region->dstOffset.y;

        vk_descriptor_set = d3d12_command_allocator_allocate_descriptor_set(
                list->allocator, pipeline_info.vk_set_layout,
                VKD3D_DESCRIPTOR_POOL_TYPE_STATIC);

        if (!vk_descriptor_set)
        {
            ERR("Failed to allocate descriptor set.\n");
            goto cleanup;
        }

        vk_image_info.sampler = VK_NULL_HANDLE;
        vk_image_info.imageView = src_view->vk_image_view;
        vk_image_info.imageLayout = src_layout;

        vk_descriptor_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
        vk_descriptor_write.pNext = NULL;
        vk_descriptor_write.dstSet = vk_descriptor_set;
        vk_descriptor_write.dstBinding = 0;
        vk_descriptor_write.dstArrayElement = 0;
        vk_descriptor_write.descriptorCount = 1;
        vk_descriptor_write.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
        vk_descriptor_write.pImageInfo = &vk_image_info;
        vk_descriptor_write.pBufferInfo = NULL;
        vk_descriptor_write.pTexelBufferView = NULL;

        VK_CALL(vkUpdateDescriptorSets(list->device->vk_device, 1, &vk_descriptor_write, 0, NULL));

        VK_CALL(vkCmdBeginRenderingKHR(list->vk_command_buffer, &rendering_info));
        VK_CALL(vkCmdBindPipeline(list->vk_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_info.vk_pipeline));
        VK_CALL(vkCmdSetViewport(list->vk_command_buffer, 0, 1, &viewport));
        VK_CALL(vkCmdSetScissor(list->vk_command_buffer, 0, 1, &rendering_info.renderArea));
        VK_CALL(vkCmdBindDescriptorSets(list->vk_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
                pipeline_info.vk_pipeline_layout, 0, 1, &vk_descriptor_set, 0, NULL));
        VK_CALL(vkCmdPushConstants(list->vk_command_buffer, pipeline_info.vk_pipeline_layout,
                VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(push_args), &push_args));
        VK_CALL(vkCmdDraw(list->vk_command_buffer, 3, region->dstSubresource.layerCount, 0, 0));
        VK_CALL(vkCmdEndRenderingKHR(list->vk_command_buffer));

cleanup:
        if (dst_view)
            vkd3d_view_decref(dst_view, list->device);
        if (src_view)
            vkd3d_view_decref(src_view, list->device);
    }

    vk_image_barriers[0].srcAccessMask = dst_access;
    vk_image_barriers[0].dstAccessMask = 0;
    vk_image_barriers[0].oldLayout = dst_layout;
    vk_image_barriers[0].newLayout = dst_resource->common_layout;

    vk_image_barriers[1].srcAccessMask = 0;
    vk_image_barriers[1].dstAccessMask = 0;
    vk_image_barriers[1].oldLayout = src_layout;
    vk_image_barriers[1].newLayout = src_resource->common_layout;

    VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer,
            src_stages | dst_stages, VK_PIPELINE_STAGE_TRANSFER_BIT,
            0, 0, NULL, 0, NULL, overlapping_subresource ? 1 : ARRAY_SIZE(vk_image_barriers),
            vk_image_barriers));
}

static bool validate_d3d12_box(const D3D12_BOX *box)
{
    return box->right > box->left
            && box->bottom > box->top
            && box->back > box->front;
}

static void d3d12_command_list_transition_image_layout_with_global_memory_barrier(struct d3d12_command_list *list,
        struct d3d12_command_list_barrier_batch *batch,
        VkImage vk_image, const VkImageSubresourceLayers *vk_subresource,
        VkPipelineStageFlags src_stages, VkAccessFlags src_access, VkImageLayout old_layout,
        VkPipelineStageFlags dst_stages, VkAccessFlags dst_access, VkImageLayout new_layout,
        VkAccessFlags global_src_access, VkAccessFlags global_dst_access)
{
    VkImageMemoryBarrier vk_barrier;
    bool need_global_barrier;

    vk_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
    vk_barrier.pNext = NULL;
    vk_barrier.srcAccessMask = src_access;
    vk_barrier.dstAccessMask = dst_access;
    vk_barrier.oldLayout = old_layout;
    vk_barrier.newLayout = new_layout;
    vk_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
    vk_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
    vk_barrier.image = vk_image;
    vk_barrier.subresourceRange = vk_subresource_range_from_layers(vk_subresource);

    need_global_barrier = global_src_access || global_dst_access;

    if (need_global_barrier)
    {
        d3d12_command_list_barrier_batch_add_global_transition(list, batch, global_src_access, global_dst_access);
    }
    d3d12_command_list_barrier_batch_add_layout_transition(list, batch, &vk_barrier);
    batch->dst_stage_mask |= dst_stages;
    batch->src_stage_mask |= src_stages;
}

static void d3d12_command_list_transition_image_layout(struct d3d12_command_list *list,
        struct d3d12_command_list_barrier_batch *batch,
        VkImage vk_image, const VkImageSubresourceLayers *vk_subresource,
        VkPipelineStageFlags src_stages, VkAccessFlags src_access, VkImageLayout old_layout,
        VkPipelineStageFlags dst_stages, VkAccessFlags dst_access, VkImageLayout new_layout)
{
    d3d12_command_list_transition_image_layout_with_global_memory_barrier(list, batch,
            vk_image, vk_subresource,
            src_stages, src_access, old_layout,
            dst_stages, dst_access, new_layout,
            0, 0);
}

static bool d3d12_command_list_init_copy_texture_region(struct d3d12_command_list *list,
        const D3D12_TEXTURE_COPY_LOCATION *dst,
        UINT dst_x, UINT dst_y, UINT dst_z,
        const D3D12_TEXTURE_COPY_LOCATION *src,
        const D3D12_BOX *src_box,
        struct vkd3d_image_copy_info *out)
{
    struct d3d12_resource *dst_resource, *src_resource;
    memset(out, 0, sizeof(*out));

    out->src = *src;
    out->dst = *dst;

    dst_resource = impl_from_ID3D12Resource(dst->pResource);
    src_resource = impl_from_ID3D12Resource(src->pResource);

    out->copy.buffer_image.sType = VK_STRUCTURE_TYPE_BUFFER_IMAGE_COPY_2_KHR;
    out->copy.buffer_image.pNext = NULL;

    if (src->Type == D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX && dst->Type == D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT)
    {
        assert(d3d12_resource_is_buffer(dst_resource));
        assert(d3d12_resource_is_texture(src_resource));

        if (!(out->src_format = vkd3d_format_from_d3d12_resource_desc(list->device, &src_resource->desc,
                DXGI_FORMAT_UNKNOWN)))
        {
            WARN("Invalid format %#x.\n", dst->PlacedFootprint.Footprint.Format);
            return false;
        }

        if (!(out->dst_format = vkd3d_get_format(list->device, dst->PlacedFootprint.Footprint.Format, true)))
        {
            WARN("Invalid format %#x.\n", dst->PlacedFootprint.Footprint.Format);
            return false;
        }

        vk_image_buffer_copy_from_d3d12(&out->copy.buffer_image, &dst->PlacedFootprint, src->SubresourceIndex,
                &src_resource->desc, out->src_format, out->dst_format, src_box, dst_x, dst_y, dst_z);
        out->copy.buffer_image.bufferOffset += dst_resource->mem.offset;

        out->src_layout = d3d12_resource_pick_layout(src_resource, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
        out->batch_type = VKD3D_BATCH_TYPE_COPY_IMAGE_TO_BUFFER;
    }
    else if (src->Type == D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT && dst->Type == D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX)
    {
        assert(d3d12_resource_is_texture(dst_resource));
        assert(d3d12_resource_is_buffer(src_resource));

        if (!(out->dst_format = vkd3d_format_from_d3d12_resource_desc(list->device, &dst_resource->desc,
                DXGI_FORMAT_UNKNOWN)))
        {
            WARN("Invalid format %#x.\n", dst_resource->desc.Format);
            return false;
        }

        if (!(out->src_format = vkd3d_get_format(list->device, src->PlacedFootprint.Footprint.Format, true)))
        {
            WARN("Invalid format %#x.\n", src->PlacedFootprint.Footprint.Format);
            return false;
        }

        vk_buffer_image_copy_from_d3d12(&out->copy.buffer_image, &src->PlacedFootprint, dst->SubresourceIndex,
                &dst_resource->desc, out->src_format, out->dst_format, src_box, dst_x, dst_y, dst_z);
        out->copy.buffer_image.bufferOffset += src_resource->mem.offset;

        out->dst_layout = d3d12_resource_pick_layout(dst_resource, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
        out->writes_full_subresource = d3d12_image_copy_writes_full_subresource(dst_resource,
                &out->copy.buffer_image.imageExtent, &out->copy.buffer_image.imageSubresource);
        out->batch_type = VKD3D_BATCH_TYPE_COPY_BUFFER_TO_IMAGE;
    }
    else if (src->Type == D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX && dst->Type == D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX)
    {
        assert(d3d12_resource_is_texture(dst_resource));
        assert(d3d12_resource_is_texture(src_resource));

        out->dst_format = dst_resource->format;
        out->src_format = src_resource->format;

        vk_image_copy_from_d3d12(&out->copy.image, src->SubresourceIndex, dst->SubresourceIndex,
                &src_resource->desc, &dst_resource->desc, out->src_format, out->dst_format,
                src_box, dst_x, dst_y, dst_z);

        /* If aspect masks do not match, we have to use fallback copies with a render pass, and there
         * is no standard way to write to stencil without fallbacks.
         * Checking aspect masks here is equivalent to checking formats. vkCmdCopyImage can only be
         * used for compatible formats and depth stencil formats are only compatible with themselves. */
        if (out->dst_format->vk_aspect_mask != out->src_format->vk_aspect_mask &&
                (out->copy.image.dstSubresource.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) &&
                !list->device->vk_info.EXT_shader_stencil_export)
        {
            FIXME("Destination depth-stencil format %#x is not supported for STENCIL dst copy with render pass fallback.\n",
                    out->dst_format->dxgi_format);
            return false;
        }

        out->writes_full_subresource = d3d12_image_copy_writes_full_subresource(dst_resource,
                &out->copy.image.extent,
                &out->copy.image.dstSubresource);
        out->batch_type = VKD3D_BATCH_TYPE_COPY_IMAGE;
    }
    else
    {
        FIXME("Copy type %#x -> %#x not implemented.\n", src->Type, dst->Type);
        return false;
    }
    return true;
}

static void d3d12_command_list_before_copy_texture_region(struct d3d12_command_list *list,
        struct d3d12_command_list_barrier_batch *batch,
        struct vkd3d_image_copy_info *info)
{
    VkAccessFlags global_transfer_access;
    struct d3d12_resource *dst_resource, *src_resource;

    dst_resource = impl_from_ID3D12Resource(info->dst.pResource);
    src_resource = impl_from_ID3D12Resource(info->src.pResource);

    d3d12_command_list_track_resource_usage(list, src_resource, true);

    if (info->batch_type == VKD3D_BATCH_TYPE_COPY_IMAGE_TO_BUFFER)
    {
        d3d12_command_list_track_resource_usage(list, dst_resource, true);

        /* We're going to do an image layout transition, so we can handle pending buffer barriers while we're at it.
         * After that barrier completes, we implicitly synchronize any outstanding copies, so we can drop the tracking.
         * This also avoids having to compute the destination damage region. */
        global_transfer_access = list->tracked_copy_buffer_count ? VK_ACCESS_TRANSFER_WRITE_BIT : 0;
        d3d12_command_list_reset_buffer_copy_tracking(list);

        d3d12_command_list_transition_image_layout_with_global_memory_barrier(list, batch, src_resource->res.vk_image,
                &info->copy.buffer_image.imageSubresource, VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
                src_resource->common_layout, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT,
                info->src_layout, global_transfer_access, global_transfer_access);
    }
    else if (info->batch_type == VKD3D_BATCH_TYPE_COPY_BUFFER_TO_IMAGE)
    {
        d3d12_command_list_track_resource_usage(list, dst_resource, !info->writes_full_subresource);

        d3d12_command_list_transition_image_layout(list, batch, dst_resource->res.vk_image,
                &info->copy.buffer_image.imageSubresource, VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
                info->writes_full_subresource ? VK_IMAGE_LAYOUT_UNDEFINED : dst_resource->common_layout,
                VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, info->dst_layout);
    }
    else if (info->batch_type == VKD3D_BATCH_TYPE_COPY_IMAGE)
    {
        d3d12_command_list_track_resource_usage(list, dst_resource, !info->writes_full_subresource);
    }
}

static void d3d12_command_list_copy_texture_region(struct d3d12_command_list *list,
        struct d3d12_command_list_barrier_batch *batch,
        struct vkd3d_image_copy_info *info)
{
    struct d3d12_resource *dst_resource, *src_resource;
    const struct vkd3d_vk_device_procs *vk_procs;
    VkAccessFlags global_transfer_access;

    vk_procs = &list->device->vk_procs;

    dst_resource = impl_from_ID3D12Resource(info->dst.pResource);
    src_resource = impl_from_ID3D12Resource(info->src.pResource);

    if (info->batch_type == VKD3D_BATCH_TYPE_COPY_IMAGE_TO_BUFFER)
    {
        VkCopyImageToBufferInfo2KHR copy_info;

        global_transfer_access = VK_ACCESS_TRANSFER_WRITE_BIT;

        copy_info.sType = VK_STRUCTURE_TYPE_COPY_IMAGE_TO_BUFFER_INFO_2_KHR;
        copy_info.pNext = NULL;
        copy_info.srcImage = src_resource->res.vk_image;
        copy_info.srcImageLayout = info->src_layout;
        copy_info.dstBuffer = dst_resource->res.vk_buffer;
        copy_info.regionCount = 1;
        copy_info.pRegions = &info->copy.buffer_image;

        VK_CALL(vkCmdCopyImageToBuffer2KHR(list->vk_command_buffer, &copy_info));

        d3d12_command_list_transition_image_layout_with_global_memory_barrier(list, batch, src_resource->res.vk_image,
                &info->copy.buffer_image.imageSubresource, VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
                info->src_layout, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, src_resource->common_layout,
                global_transfer_access, global_transfer_access);
    }
    else if (info->batch_type == VKD3D_BATCH_TYPE_COPY_BUFFER_TO_IMAGE)
    {
        VkCopyBufferToImageInfo2KHR copy_info;

        copy_info.sType = VK_STRUCTURE_TYPE_COPY_BUFFER_TO_IMAGE_INFO_2_KHR;
        copy_info.pNext = NULL;
        copy_info.srcBuffer = src_resource->res.vk_buffer;
        copy_info.dstImage = dst_resource->res.vk_image;
        copy_info.dstImageLayout = info->dst_layout;
        copy_info.regionCount = 1;
        copy_info.pRegions = &info->copy.buffer_image;

        VK_CALL(vkCmdCopyBufferToImage2KHR(list->vk_command_buffer, &copy_info));

        d3d12_command_list_transition_image_layout(list, batch, dst_resource->res.vk_image,
                &info->copy.buffer_image.imageSubresource, VK_PIPELINE_STAGE_TRANSFER_BIT,
                VK_ACCESS_TRANSFER_WRITE_BIT, info->dst_layout, VK_PIPELINE_STAGE_TRANSFER_BIT,
                0, dst_resource->common_layout);
    }
    else if (info->batch_type == VKD3D_BATCH_TYPE_COPY_IMAGE)
    {
        d3d12_command_list_copy_image(list, dst_resource, info->dst_format,
                src_resource, info->src_format, &info->copy.image, info->writes_full_subresource, false);
    }
}

static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(d3d12_command_list_iface *iface,
        const D3D12_TEXTURE_COPY_LOCATION *dst, UINT dst_x, UINT dst_y, UINT dst_z,
        const D3D12_TEXTURE_COPY_LOCATION *src, const D3D12_BOX *src_box)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    struct vkd3d_image_copy_info copy_info;
    bool alias;
    size_t i;

    TRACE("iface %p, dst %p, dst_x %u, dst_y %u, dst_z %u, src %p, src_box %p.\n",
            iface, dst, dst_x, dst_y, dst_z, src, src_box);

    if (src_box && !validate_d3d12_box(src_box))
    {
        WARN("Empty box %s.\n", debug_d3d12_box(src_box));
        return;
    }

    if (!d3d12_command_list_init_copy_texture_region(list, dst, dst_x, dst_y, dst_z, src, src_box, &copy_info))
        return;

    d3d12_command_list_ensure_transfer_batch(list, copy_info.batch_type);
    alias = false;
    for (i = 0; !alias && i < list->transfer_batch.batch_len; i++)
    {
        const struct vkd3d_image_copy_info *other_info = &list->transfer_batch.batch[i];
        const VkImageSubresourceLayers *subres, *other_subres;
        const struct d3d12_resource *res, *other_res;

        switch (copy_info.batch_type)
        {
            case VKD3D_BATCH_TYPE_COPY_BUFFER_TO_IMAGE:
                /* Test for destination aliasing as D3D12 requires serialization on overlapping copies (WAW hazard). */
                subres = &copy_info.copy.buffer_image.imageSubresource;
                other_subres = &other_info->copy.buffer_image.imageSubresource;
                assert(subres->layerCount == 1 && other_subres->layerCount == 1);
                alias = copy_info.dst.pResource == other_info->dst.pResource &&
                         subres->aspectMask == other_subres->aspectMask &&
                         subres->baseArrayLayer == other_subres->baseArrayLayer &&
                         subres->mipLevel == other_subres->mipLevel;
                break;
            case VKD3D_BATCH_TYPE_COPY_IMAGE_TO_BUFFER:
                /* Test for destination aliasing as D3D12 requires serialization on overlapping copies (WAW hazard). */
                /* TODO: Do more granular alias testing or merge this with d3d12_command_list_mark_copy_buffer_write. */
                res = impl_from_ID3D12Resource(copy_info.dst.pResource);
                other_res = impl_from_ID3D12Resource(other_info->dst.pResource);
                /* If either resource is sparse, consider it to alias with anything. */
                alias = copy_info.dst.pResource == other_info->dst.pResource ||
                        (res->flags & VKD3D_RESOURCE_RESERVED) || (other_res->flags & VKD3D_RESOURCE_RESERVED);
                break;
            case VKD3D_BATCH_TYPE_COPY_IMAGE:
                /* TODO: Check for alias once we start batching barriers for image-image copies too */
                break;
            default:
                assert(false);
                return;
        }
    }
    if (alias)
    {
        d3d12_command_list_end_transfer_batch(list);
        /* end_transfer_batch resets the batch_type to NONE, so we need to restore it here. */
        list->transfer_batch.batch_type = copy_info.batch_type;
    }

    list->transfer_batch.batch[list->transfer_batch.batch_len++] = copy_info;

    VKD3D_BREADCRUMB_COMMAND(COPY);
}

static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(d3d12_command_list_iface *iface,
        ID3D12Resource *dst, ID3D12Resource *src)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    struct d3d12_resource *dst_resource, *src_resource;
    const struct vkd3d_vk_device_procs *vk_procs;
    VkBufferCopy2KHR vk_buffer_copy;
    VkCopyBufferInfo2KHR copy_info;
    VkImageCopy2KHR vk_image_copy;
    unsigned int layer_count;
    unsigned int i;

    TRACE("iface %p, dst_resource %p, src_resource %p.\n", iface, dst, src);

    vk_procs = &list->device->vk_procs;

    dst_resource = impl_from_ID3D12Resource(dst);
    src_resource = impl_from_ID3D12Resource(src);

    d3d12_command_list_track_resource_usage(list, dst_resource, false);
    d3d12_command_list_track_resource_usage(list, src_resource, true);

    d3d12_command_list_end_current_render_pass(list, false);
    d3d12_command_list_end_transfer_batch(list);

    if (d3d12_resource_is_buffer(dst_resource))
    {
        assert(d3d12_resource_is_buffer(src_resource));
        assert(src_resource->desc.Width == dst_resource->desc.Width);

        vk_buffer_copy.sType = VK_STRUCTURE_TYPE_BUFFER_COPY_2_KHR;
        vk_buffer_copy.pNext = NULL;
        vk_buffer_copy.srcOffset = src_resource->mem.offset;
        vk_buffer_copy.dstOffset = dst_resource->mem.offset;
        vk_buffer_copy.size = dst_resource->desc.Width;

        copy_info.sType = VK_STRUCTURE_TYPE_COPY_BUFFER_INFO_2_KHR;
        copy_info.pNext = NULL;
        copy_info.srcBuffer = src_resource->res.vk_buffer;
        copy_info.dstBuffer = dst_resource->res.vk_buffer;
        copy_info.regionCount = 1;
        copy_info.pRegions = &vk_buffer_copy;

        d3d12_command_list_mark_copy_buffer_write(list, copy_info.dstBuffer, vk_buffer_copy.dstOffset, vk_buffer_copy.size,
                !!(dst_resource->flags & VKD3D_RESOURCE_RESERVED));
        VK_CALL(vkCmdCopyBuffer2KHR(list->vk_command_buffer, &copy_info));
    }
    else
    {
        layer_count = d3d12_resource_desc_get_layer_count(&dst_resource->desc);

        assert(d3d12_resource_is_texture(dst_resource));
        assert(d3d12_resource_is_texture(src_resource));
        assert(dst_resource->desc.MipLevels == src_resource->desc.MipLevels);
        assert(layer_count == d3d12_resource_desc_get_layer_count(&src_resource->desc));

        for (i = 0; i < dst_resource->desc.MipLevels; ++i)
        {
            vk_image_copy_from_d3d12(&vk_image_copy, i, i,
                    &src_resource->desc, &dst_resource->desc, src_resource->format, dst_resource->format, NULL, 0, 0, 0);
            vk_image_copy.dstSubresource.layerCount = layer_count;
            vk_image_copy.srcSubresource.layerCount = layer_count;
            vk_image_copy.dstSubresource.aspectMask = dst_resource->format->vk_aspect_mask;
            vk_image_copy.srcSubresource.aspectMask = src_resource->format->vk_aspect_mask;

            /* CopyResource() always copies all subresources, so we can safely discard the dst_resource contents. */
            d3d12_command_list_copy_image(list, dst_resource, dst_resource->format,
                    src_resource, src_resource->format, &vk_image_copy, true, false);
        }
    }

    VKD3D_BREADCRUMB_COMMAND(COPY);
}

static void d3d12_command_list_end_transfer_batch(struct d3d12_command_list *list)
{
    struct d3d12_command_list_barrier_batch barriers;
    size_t i;

    switch (list->transfer_batch.batch_type)
    {
        case VKD3D_BATCH_TYPE_NONE:
            return;
        case VKD3D_BATCH_TYPE_COPY_BUFFER_TO_IMAGE:
        case VKD3D_BATCH_TYPE_COPY_IMAGE_TO_BUFFER:
        case VKD3D_BATCH_TYPE_COPY_IMAGE:
            d3d12_command_list_end_current_render_pass(list, false);
            d3d12_command_list_barrier_batch_init(&barriers);
            for (i = 0; i < list->transfer_batch.batch_len; i++)
                d3d12_command_list_before_copy_texture_region(list, &barriers, &list->transfer_batch.batch[i]);
            d3d12_command_list_barrier_batch_end(list, &barriers);
            d3d12_command_list_barrier_batch_init(&barriers);
            for (i = 0; i < list->transfer_batch.batch_len; i++)
                d3d12_command_list_copy_texture_region(list, &barriers, &list->transfer_batch.batch[i]);
            d3d12_command_list_barrier_batch_end(list, &barriers);
            list->transfer_batch.batch_len = 0;
            break;
        default:
            break;
    }
    list->transfer_batch.batch_type = VKD3D_BATCH_TYPE_NONE;
}

static void d3d12_command_list_ensure_transfer_batch(struct d3d12_command_list *list, enum vkd3d_batch_type type)
{
    if (list->transfer_batch.batch_type != type || list->transfer_batch.batch_len == ARRAY_SIZE(list->transfer_batch.batch))
    {
        d3d12_command_list_end_transfer_batch(list);
        list->transfer_batch.batch_type = type;
    }
}

static unsigned int vkd3d_get_tile_index_from_region(const struct d3d12_sparse_info *sparse,
        const D3D12_TILED_RESOURCE_COORDINATE *coord, const D3D12_TILE_REGION_SIZE *size,
        unsigned int tile_index_in_region);

static void STDMETHODCALLTYPE d3d12_command_list_CopyTiles(d3d12_command_list_iface *iface,
        ID3D12Resource *tiled_resource, const D3D12_TILED_RESOURCE_COORDINATE *region_coord,
        const D3D12_TILE_REGION_SIZE *region_size, ID3D12Resource *buffer, UINT64 buffer_offset,
        D3D12_TILE_COPY_FLAGS flags)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
    struct d3d12_resource *tiled_res, *linear_res;
    VkBufferImageCopy2KHR buffer_image_copy;
    VkImageMemoryBarrier vk_image_barrier;
    VkMemoryBarrier vk_global_barrier;
    VkCopyBufferInfo2KHR copy_info;
    VkImageLayout vk_image_layout;
    VkBufferCopy2KHR buffer_copy;
    bool copy_to_buffer;
    unsigned int i;

    TRACE("iface %p, tiled_resource %p, region_coord %p, region_size %p, "
            "buffer %p, buffer_offset %#"PRIx64", flags %#x.\n",
            iface, tiled_resource, region_coord, region_size,
            buffer, buffer_offset, flags);

    d3d12_command_list_end_current_render_pass(list, true);
    d3d12_command_list_end_transfer_batch(list);

    tiled_res = impl_from_ID3D12Resource(tiled_resource);
    linear_res = impl_from_ID3D12Resource(buffer);

    d3d12_command_list_track_resource_usage(list, tiled_res, true);

    /* We can't rely on D3D12_TILE_COPY_FLAG_SWIZZLED_TILED_RESOURCE_TO_LINEAR_BUFFER being
     * set for the copy-to-buffer case, since D3D12_TILE_COPY_FLAG_NONE behaves the same. */
    copy_to_buffer = !(flags & D3D12_TILE_COPY_FLAG_LINEAR_BUFFER_TO_SWIZZLED_TILED_RESOURCE);

    if (d3d12_resource_is_texture(tiled_res))
    {
        /* This API cannot be used for non-tiled resources, so this is safe */
        const D3D12_TILE_SHAPE *tile_shape = &tiled_res->sparse.tile_shape;

        if (tiled_res->desc.SampleDesc.Count > 1)
        {
            FIXME("MSAA images not supported.\n");
            return;
        }

        vk_image_layout = d3d12_resource_pick_layout(tiled_res, copy_to_buffer
                ? VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);

        vk_image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
        vk_image_barrier.pNext = NULL;
        vk_image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
        vk_image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
        vk_image_barrier.srcAccessMask = 0;
        vk_image_barrier.dstAccessMask = copy_to_buffer ? VK_ACCESS_TRANSFER_READ_BIT : VK_ACCESS_TRANSFER_WRITE_BIT;
        vk_image_barrier.oldLayout = tiled_res->common_layout;
        vk_image_barrier.newLayout = vk_image_layout;
        vk_image_barrier.image = tiled_res->res.vk_image;

        /* The entire resource must be in the appropriate copy state */
        vk_image_barrier.subresourceRange.aspectMask = tiled_res->format->vk_aspect_mask;
        vk_image_barrier.subresourceRange.baseMipLevel = 0;
        vk_image_barrier.subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS;
        vk_image_barrier.subresourceRange.baseArrayLayer = 0;
        vk_image_barrier.subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS;

        vk_global_barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
        vk_global_barrier.pNext = NULL;
        vk_global_barrier.srcAccessMask = 0;
        vk_global_barrier.dstAccessMask = 0;

        if (copy_to_buffer)
        {
            /* Need to handle hazards before the image to buffer copy. */
            if (list->tracked_copy_buffer_count)
            {
                vk_global_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
                vk_global_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
            }

            /* We're doing a transfer barrier anyways, so resolve buffer copy tracking in that barrier. */
            d3d12_command_list_reset_buffer_copy_tracking(list);
        }

        VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer,
                VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
                0, 1, &vk_global_barrier, 0, NULL, 1, &vk_image_barrier));

        buffer_image_copy.bufferRowLength = tile_shape->WidthInTexels;
        buffer_image_copy.bufferImageHeight = tile_shape->HeightInTexels;

        for (i = 0; i < region_size->NumTiles; i++)
        {
            unsigned int tile_index = vkd3d_get_tile_index_from_region(&tiled_res->sparse, region_coord, region_size, i);
            const struct d3d12_sparse_image_region *region = &tiled_res->sparse.tiles[tile_index].image;

            buffer_image_copy.sType = VK_STRUCTURE_TYPE_BUFFER_IMAGE_COPY_2_KHR;
            buffer_image_copy.pNext = NULL;
            buffer_image_copy.bufferOffset = buffer_offset + VKD3D_TILE_SIZE * i + linear_res->mem.offset;
            buffer_image_copy.imageSubresource = vk_subresource_layers_from_subresource(&region->subresource);
            buffer_image_copy.imageOffset = region->offset;
            buffer_image_copy.imageExtent = region->extent;

            if (copy_to_buffer)
            {
                VkCopyImageToBufferInfo2KHR copy_info;

                copy_info.sType = VK_STRUCTURE_TYPE_COPY_IMAGE_TO_BUFFER_INFO_2_KHR;
                copy_info.pNext = NULL;
                copy_info.srcImage = tiled_res->res.vk_image;
                copy_info.srcImageLayout = vk_image_layout;
                copy_info.dstBuffer = linear_res->res.vk_buffer;
                copy_info.regionCount = 1;
                copy_info.pRegions = &buffer_image_copy;

                /* Resolve hazards after the image to buffer copy since we're going an image barrier anyways. */
                vk_global_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
                vk_global_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;

                VK_CALL(vkCmdCopyImageToBuffer2KHR(list->vk_command_buffer, &copy_info));
            }
            else
            {
                VkCopyBufferToImageInfo2KHR copy_info;

                copy_info.sType = VK_STRUCTURE_TYPE_COPY_BUFFER_TO_IMAGE_INFO_2_KHR;
                copy_info.pNext = NULL;
                copy_info.srcBuffer = linear_res->res.vk_buffer;
                copy_info.dstImage = tiled_res->res.vk_image;
                copy_info.dstImageLayout = vk_image_layout;
                copy_info.regionCount = 1;
                copy_info.pRegions = &buffer_image_copy;

                VK_CALL(vkCmdCopyBufferToImage2KHR(list->vk_command_buffer, &copy_info));
            }
        }

        vk_image_barrier.srcAccessMask = copy_to_buffer ? 0 : VK_ACCESS_TRANSFER_WRITE_BIT;
        vk_image_barrier.dstAccessMask = 0;
        vk_image_barrier.oldLayout = vk_image_layout;
        vk_image_barrier.newLayout = tiled_res->common_layout;

        VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer,
                VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
                0, 1, &vk_global_barrier, 0, NULL, 1, &vk_image_barrier));
    }
    else
    {
        buffer_copy.sType = VK_STRUCTURE_TYPE_BUFFER_COPY_2_KHR;
        buffer_copy.pNext = NULL;
        buffer_copy.size = region_size->NumTiles * VKD3D_TILE_SIZE;

        if (copy_to_buffer)
        {
            buffer_copy.srcOffset = VKD3D_TILE_SIZE * region_coord->X + tiled_res->mem.offset;
            buffer_copy.dstOffset = buffer_offset + linear_res->mem.offset;
        }
        else
        {
            buffer_copy.srcOffset = buffer_offset + linear_res->mem.offset;
            buffer_copy.dstOffset = VKD3D_TILE_SIZE * region_coord->X + tiled_res->mem.offset;
        }

        copy_info.sType = VK_STRUCTURE_TYPE_COPY_BUFFER_INFO_2_KHR;
        copy_info.pNext = NULL;
        copy_info.srcBuffer = copy_to_buffer ? tiled_res->res.vk_buffer : linear_res->res.vk_buffer;
        copy_info.dstBuffer = copy_to_buffer ? linear_res->res.vk_buffer : tiled_res->res.vk_buffer,
        copy_info.regionCount = 1;
        copy_info.pRegions = &buffer_copy;

        d3d12_command_list_mark_copy_buffer_write(list, copy_info.dstBuffer, buffer_copy.dstOffset, buffer_copy.size,
                !!((copy_to_buffer ? linear_res : tiled_res)->flags & VKD3D_RESOURCE_RESERVED));
        VK_CALL(vkCmdCopyBuffer2KHR(list->vk_command_buffer, &copy_info));
    }
}

static void d3d12_command_list_resolve_subresource(struct d3d12_command_list *list,
        struct d3d12_resource *dst_resource, struct d3d12_resource *src_resource,
        const VkImageResolve2KHR *resolve, DXGI_FORMAT format, D3D12_RESOLVE_MODE mode)
{
    const struct vkd3d_vk_device_procs *vk_procs;
    VkImageMemoryBarrier vk_image_barriers[2];
    const struct vkd3d_format *vk_format;
    VkImageLayout dst_layout, src_layout;
    VkResolveImageInfo2KHR resolve_info;
    const struct d3d12_device *device;
    bool writes_full_subresource;
    unsigned int i;

    if (mode != D3D12_RESOLVE_MODE_AVERAGE)
    {
        FIXME("Resolve mode %u is not yet supported.\n", mode);
        return;
    }

    if (mode == D3D12_RESOLVE_MODE_AVERAGE && (dst_resource->format->vk_aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT))
    {
        FIXME("AVERAGE resolve on DEPTH aspect is not supported yet.\n");
        return;
    }

    device = list->device;
    vk_procs = &device->vk_procs;
    d3d12_command_list_end_current_render_pass(list, false);
    d3d12_command_list_end_transfer_batch(list);

    if (dst_resource->format->type == VKD3D_FORMAT_TYPE_TYPELESS || src_resource->format->type == VKD3D_FORMAT_TYPE_TYPELESS)
    {
        if (!(vk_format = vkd3d_format_from_d3d12_resource_desc(device, &dst_resource->desc, format)))
        {
            WARN("Invalid format %#x.\n", format);
            return;
        }
        if (dst_resource->format->vk_format != src_resource->format->vk_format || dst_resource->format->vk_format != vk_format->vk_format)
        {
            FIXME("Not implemented for typeless resources.\n");
            return;
        }
    }

    /* Resolve of depth/stencil images is not supported in Vulkan. */
    if ((dst_resource->format->vk_aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))
            || (src_resource->format->vk_aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)))
    {
        FIXME("Resolve of depth/stencil images is not implemented yet.\n");
        return;
    }

    dst_layout = d3d12_resource_pick_layout(dst_resource, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
    src_layout = d3d12_resource_pick_layout(src_resource, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);

    for (i = 0; i < ARRAY_SIZE(vk_image_barriers); i++)
    {
        vk_image_barriers[i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
        vk_image_barriers[i].pNext = NULL;
        vk_image_barriers[i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
        vk_image_barriers[i].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
    }

    writes_full_subresource = d3d12_image_copy_writes_full_subresource(dst_resource,
            &resolve->extent, &resolve->dstSubresource);

    d3d12_command_list_track_resource_usage(list, dst_resource, !writes_full_subresource);
    d3d12_command_list_track_resource_usage(list, src_resource, true);

    vk_image_barriers[0].srcAccessMask = 0;
    vk_image_barriers[0].dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
    vk_image_barriers[0].oldLayout = writes_full_subresource ? VK_IMAGE_LAYOUT_UNDEFINED : dst_resource->common_layout;
    vk_image_barriers[0].newLayout = dst_layout;
    vk_image_barriers[0].image = dst_resource->res.vk_image;
    vk_image_barriers[0].subresourceRange = vk_subresource_range_from_layers(&resolve->dstSubresource);

    vk_image_barriers[1].srcAccessMask = 0;
    vk_image_barriers[1].dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
    vk_image_barriers[1].oldLayout = src_resource->common_layout;
    vk_image_barriers[1].newLayout = src_layout;
    vk_image_barriers[1].image = src_resource->res.vk_image;
    vk_image_barriers[1].subresourceRange = vk_subresource_range_from_layers(&resolve->srcSubresource);

    VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer,
            VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
            0, 0, NULL, 0, NULL, ARRAY_SIZE(vk_image_barriers), vk_image_barriers));

    resolve_info.sType = VK_STRUCTURE_TYPE_RESOLVE_IMAGE_INFO_2_KHR;
    resolve_info.pNext = NULL;
    resolve_info.srcImage = src_resource->res.vk_image;
    resolve_info.srcImageLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
    resolve_info.dstImage = dst_resource->res.vk_image;
    resolve_info.dstImageLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
    resolve_info.regionCount = 1;
    resolve_info.pRegions = resolve;

    VK_CALL(vkCmdResolveImage2KHR(list->vk_command_buffer, &resolve_info));

    vk_image_barriers[0].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
    vk_image_barriers[0].dstAccessMask = 0;
    vk_image_barriers[0].oldLayout = dst_layout;
    vk_image_barriers[0].newLayout = dst_resource->common_layout;

    vk_image_barriers[1].srcAccessMask = 0;
    vk_image_barriers[1].dstAccessMask = 0;
    vk_image_barriers[1].oldLayout = src_layout;
    vk_image_barriers[1].newLayout = src_resource->common_layout;

    VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer,
            VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
            0, 0, NULL, 0, NULL, ARRAY_SIZE(vk_image_barriers), vk_image_barriers));

    VKD3D_BREADCRUMB_COMMAND(RESOLVE);
}

static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresource(d3d12_command_list_iface *iface,
        ID3D12Resource *dst, UINT dst_sub_resource_idx,
        ID3D12Resource *src, UINT src_sub_resource_idx, DXGI_FORMAT format)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    struct d3d12_resource *dst_resource, *src_resource;
    VkImageResolve2KHR vk_image_resolve;

    TRACE("iface %p, dst_resource %p, dst_sub_resource_idx %u, src_resource %p, src_sub_resource_idx %u, "
            "format %#x.\n", iface, dst, dst_sub_resource_idx, src, src_sub_resource_idx, format);

    dst_resource = impl_from_ID3D12Resource(dst);
    src_resource = impl_from_ID3D12Resource(src);

    assert(d3d12_resource_is_texture(dst_resource));
    assert(d3d12_resource_is_texture(src_resource));

    vk_image_subresource_layers_from_d3d12(&vk_image_resolve.srcSubresource,
            src_resource->format, src_sub_resource_idx,
            src_resource->desc.MipLevels,
            d3d12_resource_desc_get_layer_count(&src_resource->desc));
    memset(&vk_image_resolve.srcOffset, 0, sizeof(vk_image_resolve.srcOffset));
    vk_image_subresource_layers_from_d3d12(&vk_image_resolve.dstSubresource,
            dst_resource->format, dst_sub_resource_idx,
            dst_resource->desc.MipLevels,
            d3d12_resource_desc_get_layer_count(&dst_resource->desc));
    memset(&vk_image_resolve.dstOffset, 0, sizeof(vk_image_resolve.dstOffset));
    vk_extent_3d_from_d3d12_miplevel(&vk_image_resolve.extent,
            &dst_resource->desc, vk_image_resolve.dstSubresource.mipLevel);

    vk_image_resolve.sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2_KHR;
    vk_image_resolve.pNext = NULL;

    d3d12_command_list_resolve_subresource(list, dst_resource, src_resource, &vk_image_resolve, format,
            D3D12_RESOLVE_MODE_AVERAGE);
}

static void STDMETHODCALLTYPE d3d12_command_list_IASetPrimitiveTopology(d3d12_command_list_iface *iface,
        D3D12_PRIMITIVE_TOPOLOGY topology)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    struct vkd3d_dynamic_state *dyn_state = &list->dynamic_state;

    TRACE("iface %p, topology %#x.\n", iface, topology);

    if (topology == D3D_PRIMITIVE_TOPOLOGY_UNDEFINED)
    {
        WARN("Ignoring D3D_PRIMITIVE_TOPOLOGY_UNDEFINED.\n");
        return;
    }

    if (dyn_state->primitive_topology == topology)
        return;

    dyn_state->primitive_topology = topology;
    dyn_state->vk_primitive_topology = vk_topology_from_d3d12_topology(topology);
    d3d12_command_list_invalidate_current_pipeline(list, false);
    dyn_state->dirty_flags |= VKD3D_DYNAMIC_STATE_TOPOLOGY | VKD3D_DYNAMIC_STATE_PRIMITIVE_RESTART;
}

static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(d3d12_command_list_iface *iface,
        UINT viewport_count, const D3D12_VIEWPORT *viewports)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    struct vkd3d_dynamic_state *dyn_state = &list->dynamic_state;
    unsigned int i;

    TRACE("iface %p, viewport_count %u, viewports %p.\n", iface, viewport_count, viewports);

    if (viewport_count > ARRAY_SIZE(dyn_state->viewports))
    {
        FIXME_ONCE("Viewport count %u > D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE.\n", viewport_count);
        viewport_count = ARRAY_SIZE(dyn_state->viewports);
    }

    for (i = 0; i < viewport_count; ++i)
    {
        VkViewport *vk_viewport = &dyn_state->viewports[i];
        vk_viewport->x = viewports[i].TopLeftX;
        vk_viewport->y = viewports[i].TopLeftY + viewports[i].Height;
        vk_viewport->width = viewports[i].Width;
        vk_viewport->height = -viewports[i].Height;
        vk_viewport->minDepth = viewports[i].MinDepth;
        vk_viewport->maxDepth = viewports[i].MaxDepth;

        if (vk_viewport->width <= 0.0f)
        {
            vk_viewport->width = 1.0f;
            vk_viewport->height = 0.0f;
        }
    }

    if (dyn_state->viewport_count != viewport_count)
    {
        dyn_state->viewport_count = viewport_count;
        dyn_state->dirty_flags |= VKD3D_DYNAMIC_STATE_SCISSOR;
        d3d12_command_list_invalidate_current_pipeline(list, false);
    }

    dyn_state->dirty_flags |= VKD3D_DYNAMIC_STATE_VIEWPORT;
}

static void STDMETHODCALLTYPE d3d12_command_list_RSSetScissorRects(d3d12_command_list_iface *iface,
        UINT rect_count, const D3D12_RECT *rects)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    struct vkd3d_dynamic_state *dyn_state = &list->dynamic_state;
    unsigned int i;

    TRACE("iface %p, rect_count %u, rects %p.\n", iface, rect_count, rects);

    if (rect_count > ARRAY_SIZE(dyn_state->scissors))
    {
        FIXME("Rect count %u > D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE.\n", rect_count);
        rect_count = ARRAY_SIZE(dyn_state->scissors);
    }

    for (i = 0; i < rect_count; ++i)
    {
        VkRect2D *vk_rect = &dyn_state->scissors[i];
        vk_rect->offset.x = rects[i].left;
        vk_rect->offset.y = rects[i].top;
        vk_rect->extent.width = rects[i].right - rects[i].left;
        vk_rect->extent.height = rects[i].bottom - rects[i].top;
    }

    dyn_state->dirty_flags |= VKD3D_DYNAMIC_STATE_SCISSOR;
}

static void STDMETHODCALLTYPE d3d12_command_list_OMSetBlendFactor(d3d12_command_list_iface *iface,
        const FLOAT blend_factor[4])
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    struct vkd3d_dynamic_state *dyn_state = &list->dynamic_state;

    TRACE("iface %p, blend_factor %p.\n", iface, blend_factor);

    if (memcmp(dyn_state->blend_constants, blend_factor, sizeof(dyn_state->blend_constants)) != 0)
    {
        memcpy(dyn_state->blend_constants, blend_factor, sizeof(dyn_state->blend_constants));
        dyn_state->dirty_flags |= VKD3D_DYNAMIC_STATE_BLEND_CONSTANTS;
    }
}

static void STDMETHODCALLTYPE d3d12_command_list_OMSetStencilRef(d3d12_command_list_iface *iface,
        UINT stencil_ref)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    struct vkd3d_dynamic_state *dyn_state = &list->dynamic_state;

    TRACE("iface %p, stencil_ref %u.\n", iface, stencil_ref);

    if (dyn_state->stencil_reference != stencil_ref)
    {
        dyn_state->stencil_reference = stencil_ref;
        dyn_state->dirty_flags |= VKD3D_DYNAMIC_STATE_STENCIL_REFERENCE;
    }
}

static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState(d3d12_command_list_iface *iface,
        ID3D12PipelineState *pipeline_state)
{
    struct d3d12_pipeline_state *state = impl_from_ID3D12PipelineState(pipeline_state);
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    struct vkd3d_pipeline_bindings *bindings;
    unsigned int i;

    TRACE("iface %p, pipeline_state %p.\n", iface, pipeline_state);

    if ((TRACE_ON() || list->device->debug_ring.active) && state)
    {
        if (d3d12_pipeline_state_has_replaced_shaders(state))
        {
            TRACE("Binding pipeline state %p which has replaced shader(s)!\n", pipeline_state);
            list->has_replaced_shaders = true;
        }

        if (state->vk_bind_point == VK_PIPELINE_BIND_POINT_COMPUTE)
        {
            TRACE("Binding compute module with hash: %016"PRIx64".\n", state->compute.code.meta.hash);
        }
        else if (state->vk_bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS)
        {
            for (i = 0; i < state->graphics.stage_count; i++)
            {
                TRACE("Binding graphics module with hash: %016"PRIx64" (replaced: %s).\n",
                        state->graphics.code[i].meta.hash,
                        (state->graphics.code[i].meta.flags & VKD3D_SHADER_META_FLAG_REPLACED) ? "yes" : "no");
            }
        }
    }

#ifdef VKD3D_ENABLE_BREADCRUMBS
    if ((vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS) && state)
    {
        struct vkd3d_breadcrumb_command cmd;
        cmd.type = VKD3D_BREADCRUMB_COMMAND_SET_SHADER_HASH;

        if (state->vk_bind_point == VK_PIPELINE_BIND_POINT_COMPUTE)
        {
            cmd.shader.hash = state->compute.code.meta.hash;
            cmd.shader.stage = VK_SHADER_STAGE_COMPUTE_BIT;
            vkd3d_breadcrumb_tracer_add_command(list, &cmd);
        }
        else if (state->vk_bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS)
        {
            for (i = 0; i < state->graphics.stage_count; i++)
            {
                cmd.shader.hash = state->graphics.code[i].meta.hash;
                cmd.shader.stage = state->graphics.stages[i].stage;
                vkd3d_breadcrumb_tracer_add_command(list, &cmd);
            }
        }
    }
#endif

#ifdef VKD3D_ENABLE_RENDERDOC
    vkd3d_renderdoc_command_list_check_capture(list, state);
#endif

    if (list->state == state)
        return;

    d3d12_command_list_invalidate_current_pipeline(list, false);
    /* SetPSO and SetPSO1 alias the same internal active pipeline state even if they are completely different types. */
    list->state = state;
    list->rt_state = NULL;

    if (!state || list->active_bind_point != state->vk_bind_point)
    {
        if (list->active_bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR)
        {
            /* DXR uses compute bind points for descriptors. When binding an RTPSO, invalidate all compute state
             * to make sure we broadcast state correctly to COMPUTE or RT bind points in Vulkan. */
            d3d12_command_list_invalidate_root_parameters(list, VK_PIPELINE_BIND_POINT_COMPUTE, true);
        }

        if (state)
        {
            bindings = &list->pipeline_bindings[state->vk_bind_point];
            if (bindings->root_signature)
            {
                /* We might have clobbered push constants in the new bind point,
                 * invalidate all state which can affect push constants. */
                d3d12_command_list_invalidate_push_constants(bindings);
            }

            list->active_bind_point = state->vk_bind_point;
        }
        else
            list->active_bind_point = VK_PIPELINE_BIND_POINT_MAX_ENUM;
    }
}

static VkImageLayout vk_image_layout_from_d3d12_resource_state(
        struct d3d12_command_list *list, const struct d3d12_resource *resource, D3D12_RESOURCE_STATES state)
{
    /* Simultaneous access is always general, until we're forced to treat it differently in
     * a transfer, render pass, or similar. */
    if (resource->flags & (VKD3D_RESOURCE_LINEAR_TILING | VKD3D_RESOURCE_SIMULTANEOUS_ACCESS))
        return VK_IMAGE_LAYOUT_GENERAL;

    /* Anything generic read-related uses common layout since we get implicit promotion and decay. */
    if (state & D3D12_RESOURCE_STATE_GENERIC_READ)
        return resource->common_layout;

    switch (state)
    {
        /* These are the only layouts which cannot decay or promote,
         * and are not ambiguous in some way (depth-stencil). */
        case D3D12_RESOURCE_STATE_UNORDERED_ACCESS:
            return VK_IMAGE_LAYOUT_GENERAL;

        case D3D12_RESOURCE_STATE_RENDER_TARGET:
            return VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;

        case D3D12_RESOURCE_STATE_SHADING_RATE_SOURCE:
            /* This is not a promotable or decayable state, even if it's a "read-only" state.
             * VRS images also cannot be simultaneous access. */
            return VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR;

        case D3D12_RESOURCE_STATE_DEPTH_WRITE:
        case D3D12_RESOURCE_STATE_DEPTH_READ:
            /* DEPTH_READ only is not a shader read state, and we treat WRITE and READ more or less the same. */
            if (list)
                return d3d12_command_list_get_depth_stencil_resource_layout(list, resource, NULL);
            else
                return resource->common_layout;

        default:
            /* For TRANSFER or RESOLVE states, we transition in and out of common state since we have to
             * handle implicit sync anyways and TRANSFER can decay/promote. */
            return resource->common_layout;
    }
}

static void vk_image_memory_barrier_for_transition(
        VkImageMemoryBarrier *image_barrier, const struct d3d12_resource *resource,
        UINT subresource_idx, VkImageLayout old_layout, VkImageLayout new_layout,
        VkAccessFlags src_access, VkAccessFlags dst_access,
        uint32_t dsv_decay_mask)
{
    image_barrier->sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
    image_barrier->pNext = NULL;
    image_barrier->oldLayout = old_layout;
    image_barrier->newLayout = new_layout;
    image_barrier->srcAccessMask = src_access;
    image_barrier->dstAccessMask = dst_access;
    image_barrier->srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
    image_barrier->dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
    image_barrier->image = resource->res.vk_image;

    if (subresource_idx != D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES)
    {
        VkImageSubresource subresource;

        subresource = d3d12_resource_get_vk_subresource(resource, subresource_idx, false);
        image_barrier->subresourceRange.aspectMask = subresource.aspectMask;
        image_barrier->subresourceRange.baseMipLevel = subresource.mipLevel;
        image_barrier->subresourceRange.baseArrayLayer = subresource.arrayLayer;
        image_barrier->subresourceRange.levelCount = 1;
        image_barrier->subresourceRange.layerCount = 1;

        /* In a decay, need to transition everything that we promoted back to the common state.
         * DSV decay is all or nothing, so just use a full transition. */
        if ((dsv_decay_mask & VKD3D_DEPTH_PLANE_OPTIMAL) &&
                (resource->format->vk_aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT))
        {
            image_barrier->subresourceRange.aspectMask |= VK_IMAGE_ASPECT_DEPTH_BIT;
            image_barrier->subresourceRange.baseMipLevel = 0;
            image_barrier->subresourceRange.baseArrayLayer = 0;
            image_barrier->subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS;
            image_barrier->subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS;
        }

        if ((dsv_decay_mask & VKD3D_STENCIL_PLANE_OPTIMAL) &&
                (resource->format->vk_aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT))
        {
            image_barrier->subresourceRange.aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT;
            image_barrier->subresourceRange.baseMipLevel = 0;
            image_barrier->subresourceRange.baseArrayLayer = 0;
            image_barrier->subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS;
            image_barrier->subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS;
        }
    }
    else
    {
        image_barrier->subresourceRange.aspectMask = resource->format->vk_aspect_mask;
        image_barrier->subresourceRange.baseMipLevel = 0;
        image_barrier->subresourceRange.baseArrayLayer = 0;
        image_barrier->subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS;
        image_barrier->subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS;
    }
}

static void d3d12_command_list_barrier_batch_init(struct d3d12_command_list_barrier_batch *batch)
{
    batch->image_barrier_count = 0;
    batch->vk_memory_barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
    batch->vk_memory_barrier.pNext = NULL;
    batch->vk_memory_barrier.srcAccessMask = 0;
    batch->vk_memory_barrier.dstAccessMask = 0;
    batch->dst_stage_mask = 0;
    batch->src_stage_mask = 0;
}

static void d3d12_command_list_barrier_batch_end(struct d3d12_command_list *list,
        struct d3d12_command_list_barrier_batch *batch)
{
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;

    if (batch->src_stage_mask && batch->dst_stage_mask)
    {
        VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer,
                batch->src_stage_mask, batch->dst_stage_mask, 0,
                1, &batch->vk_memory_barrier, 0, NULL,
                batch->image_barrier_count, batch->vk_image_barriers));

        batch->src_stage_mask = 0;
        batch->dst_stage_mask = 0;
        batch->vk_memory_barrier.srcAccessMask = 0;
        batch->vk_memory_barrier.dstAccessMask = 0;
        batch->image_barrier_count = 0;
    }
}

static bool vk_subresource_range_overlaps(uint32_t base_a, uint32_t count_a, uint32_t base_b, uint32_t count_b)
{
    uint32_t end_a, end_b;
    end_a = count_a == UINT32_MAX ? UINT32_MAX : base_a + count_a;
    end_b = count_b == UINT32_MAX ? UINT32_MAX : base_b + count_b;
    if (base_a <= base_b)
        return end_a > base_b;
    else
        return end_b > base_a;
}

static bool vk_image_barrier_overlaps_subresource(const VkImageMemoryBarrier *a, const VkImageMemoryBarrier *b,
        bool *exact_match)
{
    *exact_match = false;
    if (a->image != b->image)
        return false;
    if (!(a->subresourceRange.aspectMask & b->subresourceRange.aspectMask))
        return false;

    *exact_match = a->subresourceRange.aspectMask == b->subresourceRange.aspectMask &&
            a->subresourceRange.baseMipLevel == b->subresourceRange.baseMipLevel &&
            a->subresourceRange.levelCount == b->subresourceRange.levelCount &&
            a->subresourceRange.baseArrayLayer == b->subresourceRange.baseArrayLayer &&
            a->subresourceRange.layerCount == b->subresourceRange.layerCount;

    return vk_subresource_range_overlaps(
            a->subresourceRange.baseMipLevel, a->subresourceRange.levelCount,
            b->subresourceRange.baseMipLevel, b->subresourceRange.levelCount) &&
            vk_subresource_range_overlaps(
                    a->subresourceRange.baseArrayLayer, a->subresourceRange.layerCount,
                    b->subresourceRange.baseArrayLayer, b->subresourceRange.layerCount);
}

static void d3d12_command_list_barrier_batch_add_layout_transition(
        struct d3d12_command_list *list,
        struct d3d12_command_list_barrier_batch *batch,
        const VkImageMemoryBarrier *image_barrier)
{
    bool layout_match, exact_match;
    uint32_t i;

    if (batch->image_barrier_count == ARRAY_SIZE(batch->vk_image_barriers))
        d3d12_command_list_barrier_batch_end(list, batch);

    for (i = 0; i < batch->image_barrier_count; i++)
    {
        if (vk_image_barrier_overlaps_subresource(image_barrier, &batch->vk_image_barriers[i], &exact_match))
        {
            /* The barrier batch is used at two places: ResourceBarrier and CopyTextureRegion, which results in
             * different kind of overlaps.
             * In CopyTextureRegion() we can have two copies on the same src or dst resource batched into one,
             * resulting in an exact duplicate layout transition.
             * In ResourceBarrier() we won't have such exact duplicates, mainly because doing the same transition twice
             * is illegal.
             * The reason to test for overlap is that barriers in D3D12 behaves as if each transition happens in
             * order. Vulkan memory barriers do not, so if there is a race condition, we need to split the barrier.
             * As such, we need to eliminate duplicates like the former, while cutting the batch when we encounter an
             * overlap like the latter. */
            layout_match = image_barrier->oldLayout == batch->vk_image_barriers[i].oldLayout &&
                    image_barrier->newLayout == batch->vk_image_barriers[i].newLayout;
            if (exact_match && layout_match)
            {
                /* Exact duplicate, skip this barrier. */
                return;
            }
            else
            {
                /* Overlap, break the batch and add barrier. */
                d3d12_command_list_barrier_batch_end(list, batch);
                break;
            }
        }
    }

    batch->vk_image_barriers[batch->image_barrier_count++] = *image_barrier;
}

static void d3d12_command_list_barrier_batch_add_global_transition(
        struct d3d12_command_list *list,
        struct d3d12_command_list_barrier_batch *batch,
        VkAccessFlags srcAccessMask,
        VkAccessFlags dstAccessMask)
{
    batch->vk_memory_barrier.srcAccessMask |= srcAccessMask;
    batch->vk_memory_barrier.dstAccessMask |= dstAccessMask;
}

static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(d3d12_command_list_iface *iface,
        UINT barrier_count, const D3D12_RESOURCE_BARRIER *barriers)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    struct d3d12_command_list_barrier_batch batch;
    bool have_split_barriers = false;

    unsigned int i;

    TRACE("iface %p, barrier_count %u, barriers %p.\n", iface, barrier_count, barriers);

    d3d12_command_list_end_current_render_pass(list, false);
    d3d12_command_list_end_transfer_batch(list);
    d3d12_command_list_barrier_batch_init(&batch);

    for (i = 0; i < barrier_count; ++i)
    {
        const D3D12_RESOURCE_BARRIER *current = &barriers[i];
        struct d3d12_resource *preserve_resource = NULL;

        have_split_barriers = have_split_barriers
                || (current->Flags & D3D12_RESOURCE_BARRIER_FLAG_BEGIN_ONLY)
                || (current->Flags & D3D12_RESOURCE_BARRIER_FLAG_END_ONLY);

        if (current->Flags & D3D12_RESOURCE_BARRIER_FLAG_BEGIN_ONLY)
            continue;

        switch (current->Type)
        {
            case D3D12_RESOURCE_BARRIER_TYPE_TRANSITION:
            {
                const D3D12_RESOURCE_TRANSITION_BARRIER *transition = &current->Transition;
                VkAccessFlags transition_src_access = 0, transition_dst_access = 0;
                VkPipelineStageFlags transition_src_stage_mask = 0;
                VkPipelineStageFlags transition_dst_stage_mask = 0;
                VkImageLayout old_layout = VK_IMAGE_LAYOUT_UNDEFINED;
                VkImageLayout new_layout = VK_IMAGE_LAYOUT_UNDEFINED;
                uint32_t dsv_decay_mask = 0;

                /* If we have not observed any transition to INDIRECT_ARGUMENT it means
                 * that in this command buffer there couldn't legally have been writes to an indirect
                 * command buffer. The docs mention an implementation strategy where we can do this optimization.
                 * This is very handy when handling back-to-back ExecuteIndirects(). */
                if (transition->StateAfter == D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT)
                    list->execute_indirect.has_observed_transition_to_indirect = true;

                if (!is_valid_resource_state(transition->StateBefore))
                {
                    d3d12_command_list_mark_as_invalid(list,
                            "Invalid StateBefore %#x (barrier %u).", transition->StateBefore, i);
                    continue;
                }
                if (!is_valid_resource_state(transition->StateAfter))
                {
                    d3d12_command_list_mark_as_invalid(list,
                            "Invalid StateAfter %#x (barrier %u).", transition->StateAfter, i);
                    continue;
                }

                if (!(preserve_resource = impl_from_ID3D12Resource(transition->pResource)))
                {
                    d3d12_command_list_mark_as_invalid(list, "A resource pointer is NULL.");
                    continue;
                }

                /* If we're going to do transfer barriers and we have
                 * pending copies in flight which need to be synchronized,
                 * we should just resolve that while we're at it. */
                if (list->tracked_copy_buffer_count && (
                        transition->StateBefore == D3D12_RESOURCE_STATE_COPY_DEST ||
                        transition->StateAfter == D3D12_RESOURCE_STATE_COPY_DEST))
                {
                    d3d12_command_list_reset_buffer_copy_tracking(list);
                    batch.src_stage_mask |= VK_PIPELINE_STAGE_TRANSFER_BIT;
                    batch.dst_stage_mask |= VK_PIPELINE_STAGE_TRANSFER_BIT;
                    batch.vk_memory_barrier.srcAccessMask |= VK_ACCESS_TRANSFER_WRITE_BIT;
                    batch.vk_memory_barrier.dstAccessMask |= VK_ACCESS_TRANSFER_WRITE_BIT;
                }

                vk_access_and_stage_flags_from_d3d12_resource_state(list, preserve_resource,
                        transition->StateBefore, list->vk_queue_flags, &transition_src_stage_mask,
                        &transition_src_access);
                if (d3d12_resource_is_texture(preserve_resource))
                    old_layout = vk_image_layout_from_d3d12_resource_state(list, preserve_resource, transition->StateBefore);

                if (preserve_resource->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)
                {
                    /* If we enter DEPTH_WRITE or DEPTH_READ we can promote to optimal. */
                    /* Depth-stencil aspects are transitioned all or nothing.
                     * If just one aspect transitions out of READ_ONLY, we have a decay situation.
                     * We must transition the entire image from optimal state to read-only state. */
                    dsv_decay_mask = d3d12_command_list_notify_dsv_state(list, preserve_resource,
                            transition->StateAfter, transition->Subresource);
                }

                vk_access_and_stage_flags_from_d3d12_resource_state(list, preserve_resource,
                        transition->StateAfter, list->vk_queue_flags, &transition_dst_stage_mask,
                        &transition_dst_access);
                if (d3d12_resource_is_texture(preserve_resource))
                    new_layout = vk_image_layout_from_d3d12_resource_state(list, preserve_resource, transition->StateAfter);

                if (old_layout != new_layout)
                {
                    VkImageMemoryBarrier vk_transition;
                    vk_image_memory_barrier_for_transition(&vk_transition,
                            preserve_resource,
                            transition->Subresource, old_layout, new_layout,
                            transition_src_access, transition_dst_access,
                            dsv_decay_mask);
                    d3d12_command_list_barrier_batch_add_layout_transition(list, &batch, &vk_transition);
                }
                else
                {
                    batch.vk_memory_barrier.srcAccessMask |= transition_src_access;
                    batch.vk_memory_barrier.dstAccessMask |= transition_dst_access;
                }

                /* In case add_layout_transition triggers a batch flush,
                 * make sure we add stage masks after that happens. */
                batch.src_stage_mask |= transition_src_stage_mask;
                batch.dst_stage_mask |= transition_dst_stage_mask;

                TRACE("Transition barrier (resource %p, subresource %#x, before %#x, after %#x).\n",
                        preserve_resource, transition->Subresource, transition->StateBefore, transition->StateAfter);
                break;
            }

            case D3D12_RESOURCE_BARRIER_TYPE_UAV:
            {
                const D3D12_RESOURCE_UAV_BARRIER *uav = &current->UAV;
                uint32_t state_mask;

                preserve_resource = impl_from_ID3D12Resource(uav->pResource);

                /* The only way to synchronize an RTAS is UAV barriers,
                 * as their resource state must be frozen.
                 * If we don't know the resource, we must assume a global UAV transition
                 * which also includes RTAS. */
                state_mask = 0;
                if (!preserve_resource || d3d12_resource_is_acceleration_structure(preserve_resource))
                    state_mask |= D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE;
                if (!preserve_resource || !d3d12_resource_is_acceleration_structure(preserve_resource))
                    state_mask |= D3D12_RESOURCE_STATE_UNORDERED_ACCESS;

                assert(state_mask);

                vk_access_and_stage_flags_from_d3d12_resource_state(list, preserve_resource,
                        state_mask, list->vk_queue_flags, &batch.src_stage_mask,
                        &batch.vk_memory_barrier.srcAccessMask);
                vk_access_and_stage_flags_from_d3d12_resource_state(list, preserve_resource,
                        state_mask, list->vk_queue_flags, &batch.dst_stage_mask,
                        &batch.vk_memory_barrier.dstAccessMask);

                TRACE("UAV barrier (resource %p).\n", preserve_resource);
                break;
            }

            case D3D12_RESOURCE_BARRIER_TYPE_ALIASING:
            {
                const D3D12_RESOURCE_ALIASING_BARRIER *alias;
                struct d3d12_resource *before, *after;
                VkAccessFlags alias_src_access;
                VkAccessFlags alias_dst_access;

                alias = &current->Aliasing;
                TRACE("Aliasing barrier (before %p, after %p).\n", alias->pResourceBefore, alias->pResourceAfter);
                before = impl_from_ID3D12Resource(alias->pResourceBefore);
                after = impl_from_ID3D12Resource(alias->pResourceAfter);

                if (d3d12_resource_may_alias_other_resources(before) && d3d12_resource_may_alias_other_resources(after))
                {
                    /* Aliasing barriers in D3D12 are extremely weird and don't behavior like you would expect.
                     * For buffer aliasing, it is basically a global memory barrier, but for images it gets
                     * quite weird. We cannot perform UNDEFINED transitions here, even if that is what makes sense.
                     * UNDEFINED transitions are deferred to their required "activation" command, which is a full-subresource
                     * command that writes every pixel. We detect those cases and perform a transition away from UNDEFINED. */
                    alias_src_access = VK_ACCESS_MEMORY_WRITE_BIT;

                    if (after && d3d12_resource_is_texture(after))
                    {
                        /* To correctly alias images, it is required to perform an initializing operation
                         * at a later time. This includes fully clearing a render target, full subresource CopyResource,
                         * etc. In all those cases we will handle UNDEFINED layouts. Making memory visible is redundant at this stage,
                         * since memory only needs to be available to perform transition away from UNDEFINED.
                         * Thus, at the very least, we need srcAccessMask to be correct in the aliasing barrier. */
                        alias_dst_access = 0;
                    }
                    else
                    {
                        alias_dst_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
                    }

                    batch.src_stage_mask |= VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
                    batch.dst_stage_mask |= VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
                    batch.vk_memory_barrier.srcAccessMask |= alias_src_access;
                    batch.vk_memory_barrier.dstAccessMask |= alias_dst_access;
                }
                break;
            }

            default:
                WARN("Invalid barrier type %#x.\n", current->Type);
                continue;
        }

        if (preserve_resource)
            d3d12_command_list_track_resource_usage(list, preserve_resource, true);
    }

    d3d12_command_list_barrier_batch_end(list, &batch);

    /* Vulkan doesn't support split barriers. */
    if (have_split_barriers)
        WARN("Issuing split barrier(s) on D3D12_RESOURCE_BARRIER_FLAG_END_ONLY.\n");

    VKD3D_BREADCRUMB_COMMAND(BARRIER);
}

static void STDMETHODCALLTYPE d3d12_command_list_ExecuteBundle(d3d12_command_list_iface *iface,
        ID3D12GraphicsCommandList *command_list)
{
    struct d3d12_bundle *bundle;

    TRACE("iface %p, command_list %p.\n", iface, command_list);

    if (!(bundle = d3d12_bundle_from_iface(command_list)))
    {
        WARN("Command list %p not a bundle.\n", command_list);
        return;
    }

    d3d12_bundle_execute(bundle, iface);
}

static void STDMETHODCALLTYPE d3d12_command_list_SetDescriptorHeaps(d3d12_command_list_iface *iface,
        UINT heap_count, ID3D12DescriptorHeap *const *heaps)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    struct vkd3d_bindless_state *bindless_state = &list->device->bindless_state;
    uint64_t dirty_mask = 0;
    unsigned int i, j;

    TRACE("iface %p, heap_count %u, heaps %p.\n", iface, heap_count, heaps);

    for (i = 0; i < heap_count; i++)
    {
        struct d3d12_descriptor_heap *heap = impl_from_ID3D12DescriptorHeap(heaps[i]);
        unsigned int set_index = 0;

        if (!heap)
            continue;

        for (j = 0; j < bindless_state->set_count; j++)
        {
            if (bindless_state->set_info[j].heap_type != heap->desc.Type)
                continue;

            list->descriptor_heaps[j] = heap->sets[set_index++].vk_descriptor_set;
            dirty_mask |= 1ull << j;
        }

        /* In case we need to hoist buffer descriptors. */
        if (heap->desc.Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV)
        {
            struct d3d12_desc_split d;
            d = d3d12_desc_decode_va(heap->cpu_va.ptr);
            list->cbv_srv_uav_descriptors_types = d.types;
            list->cbv_srv_uav_descriptors_view = d.view;
        }
    }

    for (i = 0; i < ARRAY_SIZE(list->pipeline_bindings); i++)
    {
        struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[i];
        bindings->descriptor_heap_dirty_mask = dirty_mask;
        bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_HOISTED_DESCRIPTORS;
    }
}

static void d3d12_command_list_set_root_signature(struct d3d12_command_list *list,
        VkPipelineBindPoint bind_point, const struct d3d12_root_signature *root_signature)
{
    struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point];

    if (bindings->root_signature == root_signature)
        return;

    bindings->root_signature = root_signature;
    bindings->static_sampler_set = VK_NULL_HANDLE;

    switch (bind_point)
    {
        case VK_PIPELINE_BIND_POINT_GRAPHICS:
            bindings->layout = root_signature->graphics;
            break;

        case VK_PIPELINE_BIND_POINT_COMPUTE:
            bindings->layout = root_signature->compute;
            bindings->rt_layout = root_signature->raygen;
            break;

        default:
            break;
    }

    if (root_signature && root_signature->vk_sampler_set)
        bindings->static_sampler_set = root_signature->vk_sampler_set;

    d3d12_command_list_invalidate_root_parameters(list, bind_point, true);
}

static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootSignature(d3d12_command_list_iface *iface,
        ID3D12RootSignature *root_signature)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);

    TRACE("iface %p, root_signature %p.\n", iface, root_signature);

    d3d12_command_list_set_root_signature(list, VK_PIPELINE_BIND_POINT_COMPUTE,
            impl_from_ID3D12RootSignature(root_signature));
}

static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootSignature(d3d12_command_list_iface *iface,
        ID3D12RootSignature *root_signature)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);

    TRACE("iface %p, root_signature %p.\n", iface, root_signature);

    d3d12_command_list_set_root_signature(list, VK_PIPELINE_BIND_POINT_GRAPHICS,
            impl_from_ID3D12RootSignature(root_signature));
}

static void d3d12_command_list_set_descriptor_table(struct d3d12_command_list *list,
        VkPipelineBindPoint bind_point, unsigned int index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor)
{
    struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point];
    const struct d3d12_root_signature *root_signature = bindings->root_signature;
    const struct vkd3d_shader_descriptor_table *table;

    table = root_signature_get_descriptor_table(root_signature, index);

    assert(table && index < ARRAY_SIZE(bindings->descriptor_tables));
    bindings->descriptor_tables[index] = d3d12_desc_heap_offset_from_gpu_handle(base_descriptor);
    bindings->descriptor_table_active_mask |= (uint64_t)1 << index;

    if (root_signature->descriptor_table_count)
        bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_DESCRIPTOR_TABLE_OFFSETS;
    if (root_signature->hoist_info.num_desc)
        bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_HOISTED_DESCRIPTORS;
}

static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootDescriptorTable(d3d12_command_list_iface *iface,
        UINT root_parameter_index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);

    TRACE("iface %p, root_parameter_index %u, base_descriptor %#"PRIx64".\n",
            iface, root_parameter_index, base_descriptor.ptr);

    d3d12_command_list_set_descriptor_table(list, VK_PIPELINE_BIND_POINT_COMPUTE,
            root_parameter_index, base_descriptor);
}

static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootDescriptorTable(d3d12_command_list_iface *iface,
        UINT root_parameter_index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);

    TRACE("iface %p, root_parameter_index %u, base_descriptor %#"PRIx64".\n",
            iface, root_parameter_index, base_descriptor.ptr);

    d3d12_command_list_set_descriptor_table(list, VK_PIPELINE_BIND_POINT_GRAPHICS,
            root_parameter_index, base_descriptor);
}

static void d3d12_command_list_set_root_constants(struct d3d12_command_list *list,
        VkPipelineBindPoint bind_point, unsigned int index, unsigned int offset,
        unsigned int count, const void *data)
{
    struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point];
    const struct d3d12_root_signature *root_signature = bindings->root_signature;
    const struct vkd3d_shader_root_constant *c;
    VKD3D_UNUSED unsigned int i;

    c = root_signature_get_32bit_constants(root_signature, index);
    memcpy(&bindings->root_constants[c->constant_index + offset], data, count * sizeof(uint32_t));

    bindings->root_constant_dirty_mask |= 1ull << index;

#ifdef VKD3D_ENABLE_BREADCRUMBS
    for (i = 0; i < count; i++)
    {
        VKD3D_BREADCRUMB_AUX32(index);
        VKD3D_BREADCRUMB_AUX32(offset + i);
        VKD3D_BREADCRUMB_AUX32(((const uint32_t *)data)[i]);
        VKD3D_BREADCRUMB_COMMAND_STATE(ROOT_CONST);
    }
#endif
}

static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstant(d3d12_command_list_iface *iface,
        UINT root_parameter_index, UINT data, UINT dst_offset)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);

    TRACE("iface %p, root_parameter_index %u, data 0x%08x, dst_offset %u.\n",
            iface, root_parameter_index, data, dst_offset);

    d3d12_command_list_set_root_constants(list, VK_PIPELINE_BIND_POINT_COMPUTE,
            root_parameter_index, dst_offset, 1, &data);
}

static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstant(d3d12_command_list_iface *iface,
        UINT root_parameter_index, UINT data, UINT dst_offset)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);

    TRACE("iface %p, root_parameter_index %u, data 0x%08x, dst_offset %u.\n",
            iface, root_parameter_index, data, dst_offset);

    d3d12_command_list_set_root_constants(list, VK_PIPELINE_BIND_POINT_GRAPHICS,
            root_parameter_index, dst_offset, 1, &data);
}

static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstants(d3d12_command_list_iface *iface,
        UINT root_parameter_index, UINT constant_count, const void *data, UINT dst_offset)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);

    TRACE("iface %p, root_parameter_index %u, constant_count %u, data %p, dst_offset %u.\n",
            iface, root_parameter_index, constant_count, data, dst_offset);

    d3d12_command_list_set_root_constants(list, VK_PIPELINE_BIND_POINT_COMPUTE,
            root_parameter_index, dst_offset, constant_count, data);
}

static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstants(d3d12_command_list_iface *iface,
        UINT root_parameter_index, UINT constant_count, const void *data, UINT dst_offset)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);

    TRACE("iface %p, root_parameter_index %u, constant_count %u, data %p, dst_offset %u.\n",
            iface, root_parameter_index, constant_count, data, dst_offset);

    d3d12_command_list_set_root_constants(list, VK_PIPELINE_BIND_POINT_GRAPHICS,
            root_parameter_index, dst_offset, constant_count, data);
}

static void d3d12_command_list_set_push_descriptor_info(struct d3d12_command_list *list,
        VkPipelineBindPoint bind_point, unsigned int index, D3D12_GPU_VIRTUAL_ADDRESS gpu_address)
{
    struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point];
    const struct d3d12_root_signature *root_signature = bindings->root_signature;
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
    const struct vkd3d_vulkan_info *vk_info = &list->device->vk_info;
    const struct vkd3d_shader_root_parameter *root_parameter;
    struct vkd3d_root_descriptor_info *descriptor;
    const struct vkd3d_unique_resource *resource;
    VkBufferView vk_buffer_view;
    VkDeviceSize max_range;
    bool ssbo;

    ssbo = d3d12_device_use_ssbo_root_descriptors(list->device);
    root_parameter = root_signature_get_root_descriptor(root_signature, index);
    descriptor = &bindings->root_descriptors[index];

    if (ssbo || root_parameter->parameter_type == D3D12_ROOT_PARAMETER_TYPE_CBV)
    {
        descriptor->vk_descriptor_type = root_parameter->parameter_type == D3D12_ROOT_PARAMETER_TYPE_CBV
                ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;

        if (gpu_address)
        {
            max_range = descriptor->vk_descriptor_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER
                    ? vk_info->device_limits.maxUniformBufferRange
                    : vk_info->device_limits.maxStorageBufferRange;

            resource = vkd3d_va_map_deref(&list->device->memory_allocator.va_map, gpu_address);
            descriptor->info.buffer.buffer = resource->vk_buffer;
            descriptor->info.buffer.offset = gpu_address - resource->va;
            descriptor->info.buffer.range = min(resource->size - descriptor->info.buffer.offset, max_range);
        }
        else
        {
            descriptor->info.buffer.buffer = VK_NULL_HANDLE;
            descriptor->info.buffer.offset = 0;
            descriptor->info.buffer.range = VK_WHOLE_SIZE;
        }
    }
    else
    {
        descriptor->vk_descriptor_type = root_parameter->parameter_type == D3D12_ROOT_PARAMETER_TYPE_SRV
                ? VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;

        if (gpu_address)
        {
            if (!vkd3d_create_raw_buffer_view(list->device, gpu_address, &vk_buffer_view))
            {
                ERR("Failed to create buffer view.\n");
                return;
            }

            if (!(d3d12_command_allocator_add_buffer_view(list->allocator, vk_buffer_view)))
            {
                ERR("Failed to add buffer view.\n");
                VK_CALL(vkDestroyBufferView(list->device->vk_device, vk_buffer_view, NULL));
                return;
            }

            descriptor->info.buffer_view = vk_buffer_view;
        }
        else
            descriptor->info.buffer_view = VK_NULL_HANDLE;
    }
}

static void d3d12_command_list_set_root_descriptor_va(struct d3d12_command_list *list,
        struct vkd3d_root_descriptor_info *descriptor, D3D12_GPU_VIRTUAL_ADDRESS gpu_address)
{
    descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_MAX_ENUM;
    descriptor->info.va = gpu_address;
}

static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *list,
        VkPipelineBindPoint bind_point, unsigned int index, D3D12_GPU_VIRTUAL_ADDRESS gpu_address)
{
    struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point];
    struct vkd3d_root_descriptor_info *descriptor = &bindings->root_descriptors[index];

    if (bindings->root_signature->root_descriptor_raw_va_mask & (1ull << index))
        d3d12_command_list_set_root_descriptor_va(list, descriptor, gpu_address);
    else
        d3d12_command_list_set_push_descriptor_info(list, bind_point, index, gpu_address);

    bindings->root_descriptor_dirty_mask |= 1ull << index;
    bindings->root_descriptor_active_mask |= 1ull << index;

    VKD3D_BREADCRUMB_AUX32(index);
    VKD3D_BREADCRUMB_AUX64(gpu_address);
    VKD3D_BREADCRUMB_COMMAND_STATE(ROOT_DESC);
}

static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootConstantBufferView(
        d3d12_command_list_iface *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);

    TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n",
            iface, root_parameter_index, address);

    d3d12_command_list_set_root_descriptor(list, VK_PIPELINE_BIND_POINT_COMPUTE,
            root_parameter_index, address);
}

static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootConstantBufferView(
        d3d12_command_list_iface *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);

    TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n",
            iface, root_parameter_index, address);

    d3d12_command_list_set_root_descriptor(list, VK_PIPELINE_BIND_POINT_GRAPHICS,
            root_parameter_index, address);
}

static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootShaderResourceView(
        d3d12_command_list_iface *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);

    TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n",
            iface, root_parameter_index, address);

    d3d12_command_list_set_root_descriptor(list, VK_PIPELINE_BIND_POINT_COMPUTE,
            root_parameter_index, address);
}

static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootShaderResourceView(
        d3d12_command_list_iface *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);

    TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n",
            iface, root_parameter_index, address);

    d3d12_command_list_set_root_descriptor(list, VK_PIPELINE_BIND_POINT_GRAPHICS,
            root_parameter_index, address);
}

static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootUnorderedAccessView(
        d3d12_command_list_iface *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);

    TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n",
            iface, root_parameter_index, address);

    d3d12_command_list_set_root_descriptor(list, VK_PIPELINE_BIND_POINT_COMPUTE,
            root_parameter_index, address);
}

static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootUnorderedAccessView(
        d3d12_command_list_iface *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);

    TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n",
            iface, root_parameter_index, address);

    d3d12_command_list_set_root_descriptor(list, VK_PIPELINE_BIND_POINT_GRAPHICS,
            root_parameter_index, address);
}

static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(d3d12_command_list_iface *iface,
        const D3D12_INDEX_BUFFER_VIEW *view)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    const struct vkd3d_unique_resource *resource;
    enum VkIndexType index_type;

    TRACE("iface %p, view %p.\n", iface, view);

    if (!view)
    {
        list->index_buffer.buffer = VK_NULL_HANDLE;
        VKD3D_BREADCRUMB_AUX32(0);
        VKD3D_BREADCRUMB_COMMAND_STATE(IBO);
        return;
    }

    switch (view->Format)
    {
        case DXGI_FORMAT_R16_UINT:
            index_type = VK_INDEX_TYPE_UINT16;
            break;
        case DXGI_FORMAT_R32_UINT:
            index_type = VK_INDEX_TYPE_UINT32;
            break;
        default:
            FIXME_ONCE("Invalid index format %#x. This will map to R16_UINT to match observed driver behavior.\n", view->Format);
            /* D3D12 debug layer disallows this case, but it doesn't trigger a DEVICE LOST event, so we shouldn't crash and burn. */
            index_type = VK_INDEX_TYPE_UINT16;
            break;
    }

    list->index_buffer.dxgi_format = view->Format;
    list->index_buffer.vk_type = index_type;
    if (view->BufferLocation != 0)
    {
        resource = vkd3d_va_map_deref(&list->device->memory_allocator.va_map, view->BufferLocation);
        list->index_buffer.buffer = resource->vk_buffer;
        list->index_buffer.offset = view->BufferLocation - resource->va;
        list->index_buffer.is_dirty = true;
    }
    else
        list->index_buffer.buffer = VK_NULL_HANDLE;

    VKD3D_BREADCRUMB_AUX32(index_type == VK_INDEX_TYPE_UINT32 ? 32 : 16);
    VKD3D_BREADCRUMB_AUX64(view->BufferLocation);
    VKD3D_BREADCRUMB_AUX64(view->SizeInBytes);
    VKD3D_BREADCRUMB_COMMAND_STATE(IBO);
}

static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(d3d12_command_list_iface *iface,
        UINT start_slot, UINT view_count, const D3D12_VERTEX_BUFFER_VIEW *views)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    struct vkd3d_dynamic_state *dyn_state = &list->dynamic_state;
    const struct vkd3d_unique_resource *resource;
    uint32_t vbo_invalidate_mask;
    bool invalidate = false;
    unsigned int i;

    TRACE("iface %p, start_slot %u, view_count %u, views %p.\n", iface, start_slot, view_count, views);

    if (start_slot >= ARRAY_SIZE(dyn_state->vertex_strides) ||
            view_count > ARRAY_SIZE(dyn_state->vertex_strides) - start_slot)
    {
        WARN("Invalid start slot %u / view count %u.\n", start_slot, view_count);
        return;
    }

    /* Native drivers appear to ignore this call. Buffer bindings are kept as-is. */
    if (!views)
        return;

    for (i = 0; i < view_count; ++i)
    {
        bool invalid_va = false;
        VkBuffer buffer;
        VkDeviceSize offset;
        VkDeviceSize size;
        uint32_t stride;

        VKD3D_BREADCRUMB_AUX32(start_slot + i);
        VKD3D_BREADCRUMB_AUX64(views[i].BufferLocation);
        VKD3D_BREADCRUMB_AUX32(views[i].StrideInBytes);
        VKD3D_BREADCRUMB_AUX64(views[i].SizeInBytes);
        VKD3D_BREADCRUMB_COMMAND_STATE(VBO);

        if (views[i].BufferLocation)
        {
            if ((resource = vkd3d_va_map_deref(&list->device->memory_allocator.va_map, views[i].BufferLocation)))
            {
                buffer = resource->vk_buffer;
                offset = views[i].BufferLocation - resource->va;
                stride = views[i].StrideInBytes;
                size = views[i].SizeInBytes;
            }
            else
            {
                invalid_va = true;
                FIXME("Attempting to bind a VBO VA that does not exist, binding NULL VA ...\n");
            }
        }
        else
            invalid_va = true;

        if (invalid_va)
        {
            buffer = VK_NULL_HANDLE;
            offset = 0;
            size = 0;
            stride = VKD3D_NULL_BUFFER_SIZE;
        }

        invalidate |= dyn_state->vertex_strides[start_slot + i] != stride;
        dyn_state->vertex_strides[start_slot + i] = stride;
        dyn_state->vertex_buffers[start_slot + i] = buffer;
        dyn_state->vertex_offsets[start_slot + i] = offset;
        dyn_state->vertex_sizes[start_slot + i] = size;
    }

    dyn_state->dirty_flags |= VKD3D_DYNAMIC_STATE_VERTEX_BUFFER | VKD3D_DYNAMIC_STATE_VERTEX_BUFFER_STRIDE;

    vbo_invalidate_mask = ((1u << view_count) - 1u) << start_slot;
    dyn_state->dirty_vbos |= vbo_invalidate_mask;
    dyn_state->dirty_vbo_strides |= vbo_invalidate_mask;

    if (invalidate)
        d3d12_command_list_invalidate_current_pipeline(list, false);
}

static void STDMETHODCALLTYPE d3d12_command_list_SOSetTargets(d3d12_command_list_iface *iface,
        UINT start_slot, UINT view_count, const D3D12_STREAM_OUTPUT_BUFFER_VIEW *views)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
    VkDeviceSize offsets[ARRAY_SIZE(list->so_counter_buffers)];
    VkDeviceSize sizes[ARRAY_SIZE(list->so_counter_buffers)];
    VkBuffer buffers[ARRAY_SIZE(list->so_counter_buffers)];
    const struct vkd3d_unique_resource *resource;
    unsigned int i, first, count;

    TRACE("iface %p, start_slot %u, view_count %u, views %p.\n", iface, start_slot, view_count, views);

    d3d12_command_list_end_current_render_pass(list, true);

    if (!list->device->vk_info.EXT_transform_feedback)
    {
        FIXME("Transform feedback is not supported by Vulkan implementation.\n");
        return;
    }

    if (start_slot >= ARRAY_SIZE(buffers) || view_count > ARRAY_SIZE(buffers) - start_slot)
    {
        WARN("Invalid start slot %u / view count %u.\n", start_slot, view_count);
        return;
    }

    count = 0;
    first = start_slot;
    for (i = 0; i < view_count; ++i)
    {
        if (views[i].BufferLocation && views[i].SizeInBytes)
        {
            resource = vkd3d_va_map_deref(&list->device->memory_allocator.va_map, views[i].BufferLocation);
            buffers[count] = resource->vk_buffer;
            offsets[count] = views[i].BufferLocation - resource->va;
            sizes[count] = views[i].SizeInBytes;

            resource = vkd3d_va_map_deref(&list->device->memory_allocator.va_map, views[i].BufferFilledSizeLocation);
            list->so_counter_buffers[start_slot + i] = resource->vk_buffer;
            list->so_counter_buffer_offsets[start_slot + i] = views[i].BufferFilledSizeLocation - resource->va;
            ++count;
        }
        else
        {
            if (count)
                VK_CALL(vkCmdBindTransformFeedbackBuffersEXT(list->vk_command_buffer, first, count, buffers, offsets, sizes));
            count = 0;
            first = start_slot + i + 1;

            list->so_counter_buffers[start_slot + i] = VK_NULL_HANDLE;
            list->so_counter_buffer_offsets[start_slot + i] = 0;

            WARN("Trying to unbind transform feedback buffer %u. Ignoring.\n", start_slot + i);
        }
    }

    if (count)
        VK_CALL(vkCmdBindTransformFeedbackBuffersEXT(list->vk_command_buffer, first, count, buffers, offsets, sizes));
}

static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(d3d12_command_list_iface *iface,
        UINT render_target_descriptor_count, const D3D12_CPU_DESCRIPTOR_HANDLE *render_target_descriptors,
        BOOL single_descriptor_handle, const D3D12_CPU_DESCRIPTOR_HANDLE *depth_stencil_descriptor)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    const VkPhysicalDeviceLimits *limits = &list->device->vk_info.device_limits;
    const struct d3d12_graphics_pipeline_state *graphics;
    VkFormat prev_dsv_format, next_dsv_format;
    const struct d3d12_rtv_desc *rtv_desc;
    unsigned int i;

    TRACE("iface %p, render_target_descriptor_count %u, render_target_descriptors %p, "
            "single_descriptor_handle %#x, depth_stencil_descriptor %p.\n",
            iface, render_target_descriptor_count, render_target_descriptors,
            single_descriptor_handle, depth_stencil_descriptor);

    d3d12_command_list_invalidate_rendering_info(list);
    d3d12_command_list_end_current_render_pass(list, false);

    if (render_target_descriptor_count > ARRAY_SIZE(list->rtvs))
    {
        WARN("Descriptor count %u > %zu, ignoring extra descriptors.\n",
                render_target_descriptor_count, ARRAY_SIZE(list->rtvs));
        render_target_descriptor_count = ARRAY_SIZE(list->rtvs);
    }

    list->fb_width = limits->maxFramebufferWidth;
    list->fb_height = limits->maxFramebufferHeight;
    list->fb_layer_count = limits->maxFramebufferLayers;

    prev_dsv_format = list->dsv.format ? list->dsv.format->vk_format : VK_FORMAT_UNDEFINED;
    next_dsv_format = VK_FORMAT_UNDEFINED;

    memset(list->rtvs, 0, sizeof(list->rtvs));
    memset(&list->dsv, 0, sizeof(list->dsv));
    /* Need to deduce DSV layouts again. */
    list->dsv_layout = VK_IMAGE_LAYOUT_UNDEFINED;
    list->dsv_plane_optimal_mask = 0;

    for (i = 0; i < render_target_descriptor_count; ++i)
    {
        if (single_descriptor_handle)
        {
            if ((rtv_desc = d3d12_rtv_desc_from_cpu_handle(*render_target_descriptors)))
                rtv_desc += i;
        }
        else
        {
            rtv_desc = d3d12_rtv_desc_from_cpu_handle(render_target_descriptors[i]);
        }

        if (!rtv_desc || !rtv_desc->resource)
        {
            WARN("RTV descriptor %u is not initialized.\n", i);
            continue;
        }

        d3d12_command_list_track_resource_usage(list, rtv_desc->resource, true);

        list->rtvs[i] = *rtv_desc;
        list->fb_width = min(list->fb_width, rtv_desc->width);
        list->fb_height = min(list->fb_height, rtv_desc->height);
        list->fb_layer_count = min(list->fb_layer_count, rtv_desc->layer_count);
    }

    if (depth_stencil_descriptor)
    {
        if ((rtv_desc = d3d12_rtv_desc_from_cpu_handle(*depth_stencil_descriptor))
                && rtv_desc->resource)
        {
            d3d12_command_list_track_resource_usage(list, rtv_desc->resource, true);

            list->dsv = *rtv_desc;
            list->fb_width = min(list->fb_width, rtv_desc->width);
            list->fb_height = min(list->fb_height, rtv_desc->height);
            list->fb_layer_count = min(list->fb_layer_count, rtv_desc->layer_count);
            next_dsv_format = rtv_desc->format->vk_format;
        }
        else
        {
            WARN("DSV descriptor is not initialized.\n");
        }
    }

    if (d3d12_pipeline_state_is_graphics(list->state))
    {
        graphics = &list->state->graphics;

        if (prev_dsv_format != next_dsv_format &&
                d3d12_graphics_pipeline_state_has_unknown_dsv_format_with_test(graphics))
        {
            /* If we change the NULL-ness of the depth-stencil attachment, we are
             * at risk of having to use fallback pipelines. Invalidate the pipeline
             * since we'll have to refresh the VkRenderingInfo and VkPipeline. */
            d3d12_command_list_invalidate_current_pipeline(list, false);
        }
    }
}

static bool d3d12_rect_fully_covers_region(const D3D12_RECT *a, const D3D12_RECT *b)
{
    return a->top <= b->top && a->bottom >= b->bottom &&
            a->left <= b->left && a->right >= b->right;
}

static void d3d12_command_list_clear_attachment(struct d3d12_command_list *list, struct d3d12_resource *resource,
        struct vkd3d_view *view, VkImageAspectFlags clear_aspects, const VkClearValue *clear_value, UINT rect_count,
        const D3D12_RECT *rects)
{
    bool full_clear, writable = true;
    D3D12_RECT full_rect;
    int attachment_idx;
    unsigned int i;

    /* If one of the clear rectangles covers the entire image, we
     * may be able to use a fast path and re-initialize the image */
    full_rect = d3d12_get_image_rect(resource, view->info.texture.miplevel_idx);
    full_clear = !rect_count;

    for (i = 0; i < rect_count && !full_clear; i++)
        full_clear = d3d12_rect_fully_covers_region(&rects[i], &full_rect);

    if (full_clear)
        rect_count = 0;

    attachment_idx = d3d12_command_list_find_attachment(list, resource, view);

    if (attachment_idx == D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT && (list->rendering_info.state_flags & VKD3D_RENDERING_ACTIVE))
        writable = (vk_writable_aspects_from_image_layout(list->dsv_layout) & clear_aspects) == clear_aspects;

    if (attachment_idx < 0 || !(list->rendering_info.state_flags & VKD3D_RENDERING_ACTIVE) || !writable)
    {
        /* View currently not bound as a render target, or bound but
         * the render pass isn't active and we're only going to clear
         * a sub-region of the image, or one of the aspects to clear
         * uses a read-only layout in the current render pass */
        d3d12_command_list_end_current_render_pass(list, false);
        d3d12_command_list_clear_attachment_pass(list, resource, view,
                clear_aspects, clear_value, rect_count, rects, false);
    }
    else
    {
        /* View bound and render pass active, just emit the clear */
        d3d12_command_list_clear_attachment_inline(list, resource, view,
                attachment_idx, clear_aspects, clear_value,
                rect_count, rects);
    }
}

static void STDMETHODCALLTYPE d3d12_command_list_ClearDepthStencilView(d3d12_command_list_iface *iface,
        D3D12_CPU_DESCRIPTOR_HANDLE dsv, D3D12_CLEAR_FLAGS flags, float depth, UINT8 stencil,
        UINT rect_count, const D3D12_RECT *rects)
{
    const union VkClearValue clear_value = {.depthStencil = {depth, stencil}};
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    const struct d3d12_rtv_desc *dsv_desc = d3d12_rtv_desc_from_cpu_handle(dsv);
    VkImageAspectFlags clear_aspects = 0;

    TRACE("iface %p, dsv %#lx, flags %#x, depth %.8e, stencil 0x%02x, rect_count %u, rects %p.\n",
            iface, dsv.ptr, flags, depth, stencil, rect_count, rects);

    d3d12_command_list_track_resource_usage(list, dsv_desc->resource, true);

    if (flags & D3D12_CLEAR_FLAG_DEPTH)
        clear_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;

    if (flags & D3D12_CLEAR_FLAG_STENCIL)
        clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;

    clear_aspects &= dsv_desc->format->vk_aspect_mask;

    if (!clear_aspects)
    {
        WARN("Not clearing any aspects.\n");
        return;
    }

    d3d12_command_list_clear_attachment(list, dsv_desc->resource, dsv_desc->view,
            clear_aspects, &clear_value, rect_count, rects);
}

static void STDMETHODCALLTYPE d3d12_command_list_ClearRenderTargetView(d3d12_command_list_iface *iface,
        D3D12_CPU_DESCRIPTOR_HANDLE rtv, const FLOAT color[4], UINT rect_count, const D3D12_RECT *rects)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    const struct d3d12_rtv_desc *rtv_desc = d3d12_rtv_desc_from_cpu_handle(rtv);
    VkClearValue clear_value;

    TRACE("iface %p, rtv %#lx, color %p, rect_count %u, rects %p.\n",
            iface, rtv.ptr, color, rect_count, rects);

    d3d12_command_list_track_resource_usage(list, rtv_desc->resource, true);

    if (rtv_desc->format->type == VKD3D_FORMAT_TYPE_UINT)
    {
        clear_value.color.uint32[0] = max(0, color[0]);
        clear_value.color.uint32[1] = max(0, color[1]);
        clear_value.color.uint32[2] = max(0, color[2]);
        clear_value.color.uint32[3] = max(0, color[3]);
    }
    else if (rtv_desc->format->type == VKD3D_FORMAT_TYPE_SINT)
    {
        clear_value.color.int32[0] = color[0];
        clear_value.color.int32[1] = color[1];
        clear_value.color.int32[2] = color[2];
        clear_value.color.int32[3] = color[3];
    }
    else
    {
        clear_value.color.float32[0] = color[0];
        clear_value.color.float32[1] = color[1];
        clear_value.color.float32[2] = color[2];
        clear_value.color.float32[3] = color[3];
    }

    d3d12_command_list_clear_attachment(list, rtv_desc->resource, rtv_desc->view,
            VK_IMAGE_ASPECT_COLOR_BIT, &clear_value, rect_count, rects);
}

struct vkd3d_clear_uav_info
{
    bool has_view;
    union
    {
        struct vkd3d_view *view;
        struct
        {
            VkDeviceSize offset;
            VkDeviceSize range;
        } buffer;
    } u;
};

static void d3d12_command_list_clear_uav(struct d3d12_command_list *list,
        const struct d3d12_desc_split *d,
        struct d3d12_resource *resource, const struct vkd3d_clear_uav_info *args,
        const VkClearColorValue *clear_color, UINT rect_count, const D3D12_RECT *rects)
{
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
    unsigned int i, miplevel_idx, layer_count;
    struct vkd3d_clear_uav_pipeline pipeline;
    struct vkd3d_clear_uav_args clear_args;
    VkDescriptorBufferInfo buffer_info;
    VkDescriptorImageInfo image_info;
    D3D12_RECT full_rect, curr_rect;
    VkWriteDescriptorSet write_set;
    VkExtent3D workgroup_size;
    uint32_t extra_offset;

    d3d12_command_list_track_resource_usage(list, resource, true);
    d3d12_command_list_end_current_render_pass(list, false);

    d3d12_command_list_invalidate_current_pipeline(list, true);
    d3d12_command_list_invalidate_root_parameters(list, VK_PIPELINE_BIND_POINT_COMPUTE, true);

    clear_args.clear_color = *clear_color;

    write_set.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
    write_set.pNext = NULL;
    write_set.dstBinding = 0;
    write_set.dstArrayElement = 0;
    write_set.descriptorCount = 1;

    if (d3d12_resource_is_texture(resource))
    {
        assert(args->has_view);
        image_info.sampler = VK_NULL_HANDLE;
        image_info.imageView = args->u.view->vk_image_view;
        image_info.imageLayout = VK_IMAGE_LAYOUT_GENERAL;

        write_set.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
        write_set.pImageInfo = &image_info;
        write_set.pBufferInfo = NULL;
        write_set.pTexelBufferView = NULL;

        miplevel_idx = args->u.view->info.texture.miplevel_idx;
        layer_count = args->u.view->info.texture.vk_view_type == VK_IMAGE_VIEW_TYPE_3D
                ? d3d12_resource_desc_get_depth(&resource->desc, miplevel_idx)
                : args->u.view->info.texture.layer_count;
        pipeline = vkd3d_meta_get_clear_image_uav_pipeline(
                &list->device->meta_ops, args->u.view->info.texture.vk_view_type,
                args->u.view->format->type == VKD3D_FORMAT_TYPE_UINT);
        workgroup_size = vkd3d_meta_get_clear_image_uav_workgroup_size(args->u.view->info.texture.vk_view_type);
    }
    else
    {
        write_set.pImageInfo = NULL;
        write_set.pBufferInfo = NULL;
        write_set.pTexelBufferView = NULL;

        if (args->has_view)
        {
            write_set.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
            write_set.pTexelBufferView = &args->u.view->vk_buffer_view;
        }
        else
        {
            write_set.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
            write_set.pBufferInfo = &buffer_info;
            /* resource heap offset is already in descriptor */
            buffer_info.buffer = resource->res.vk_buffer;
            buffer_info.offset = args->u.buffer.offset;
            buffer_info.range = args->u.buffer.range;
        }

        miplevel_idx = 0;
        layer_count = 1;
        pipeline = vkd3d_meta_get_clear_buffer_uav_pipeline(&list->device->meta_ops,
                !args->has_view || args->u.view->format->type == VKD3D_FORMAT_TYPE_UINT,
                !args->has_view);
        workgroup_size = vkd3d_meta_get_clear_buffer_uav_workgroup_size();
    }

    if (!(write_set.dstSet = d3d12_command_allocator_allocate_descriptor_set(
            list->allocator, pipeline.vk_set_layout, VKD3D_DESCRIPTOR_POOL_TYPE_STATIC)))
    {
        ERR("Failed to allocate descriptor set.\n");
        return;
    }

    VK_CALL(vkUpdateDescriptorSets(list->device->vk_device, 1, &write_set, 0, NULL));

    full_rect.left = 0;
    full_rect.right = d3d12_resource_desc_get_width(&resource->desc, miplevel_idx);
    full_rect.top = 0;
    full_rect.bottom = d3d12_resource_desc_get_height(&resource->desc, miplevel_idx);
    extra_offset = 0;

    if (d3d12_resource_is_buffer(resource))
    {
        const struct vkd3d_bound_buffer_range *ranges = d->heap->buffer_ranges.host_ptr;

        if (args->has_view)
        {
            if (list->device->bindless_state.flags & VKD3D_TYPED_OFFSET_BUFFER)
            {
                extra_offset = ranges[d->offset].element_offset;
                full_rect.right = ranges[d->offset].element_count;
            }
            else
            {
                VkDeviceSize byte_count = args->u.view->format->byte_count
                        ? args->u.view->format->byte_count
                        : sizeof(uint32_t);  /* structured buffer */
                full_rect.right = args->u.view->info.buffer.size / byte_count;
            }
        }
        else if (list->device->bindless_state.flags & VKD3D_SSBO_OFFSET_BUFFER)
        {
            extra_offset = ranges[d->offset].byte_offset / sizeof(uint32_t);
            full_rect.right = ranges[d->offset].byte_count / sizeof(uint32_t);
        }
        else
            full_rect.right = args->u.buffer.range / sizeof(uint32_t);
    }

    /* clear full resource if no rects are specified */
    curr_rect = full_rect;

    VK_CALL(vkCmdBindPipeline(list->vk_command_buffer,
            VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.vk_pipeline));

    VK_CALL(vkCmdBindDescriptorSets(list->vk_command_buffer,
            VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.vk_pipeline_layout,
            0, 1, &write_set.dstSet, 0, NULL));

    for (i = 0; i < rect_count || !i; i++)
    {
        if (rect_count)
        {
            /* clamp to actual resource region and skip empty rects */
            curr_rect.left = max(rects[i].left, full_rect.left);
            curr_rect.top = max(rects[i].top, full_rect.top);
            curr_rect.right = min(rects[i].right, full_rect.right);
            curr_rect.bottom = min(rects[i].bottom, full_rect.bottom);

            if (curr_rect.left >= curr_rect.right || curr_rect.top >= curr_rect.bottom)
                continue;
        }

        clear_args.offset.x = curr_rect.left + extra_offset;
        clear_args.offset.y = curr_rect.top;
        clear_args.extent.width = curr_rect.right - curr_rect.left;
        clear_args.extent.height = curr_rect.bottom - curr_rect.top;

        VK_CALL(vkCmdPushConstants(list->vk_command_buffer,
                pipeline.vk_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT,
                0, sizeof(clear_args), &clear_args));

        VK_CALL(vkCmdDispatch(list->vk_command_buffer,
                vkd3d_compute_workgroup_count(clear_args.extent.width, workgroup_size.width),
                vkd3d_compute_workgroup_count(clear_args.extent.height, workgroup_size.height),
                vkd3d_compute_workgroup_count(layer_count, workgroup_size.depth)));
    }
}

static void d3d12_command_list_clear_uav_with_copy(struct d3d12_command_list *list,
        const struct d3d12_desc_split *d, struct d3d12_resource *resource,
        const struct vkd3d_clear_uav_info *args, const VkClearColorValue *clear_value,
        const struct vkd3d_format *format, UINT rect_count, const D3D12_RECT *rects)
{
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
    unsigned int miplevel_idx, base_layer, layer_count, i, j;
    struct vkd3d_clear_uav_pipeline pipeline;
    struct vkd3d_scratch_allocation scratch;
    struct vkd3d_clear_uav_args clear_args;
    VkCopyBufferToImageInfo2KHR copy_info;
    VkBufferImageCopy2KHR copy_region;
    VkDeviceSize scratch_buffer_size;
    D3D12_RECT curr_rect, full_rect;
    VkWriteDescriptorSet write_set;
    VkBufferView vk_buffer_view;
    VkExtent3D workgroup_size;
    VkMemoryBarrier barrier;
    uint32_t element_count;

    d3d12_command_list_track_resource_usage(list, resource, true);
    d3d12_command_list_end_current_render_pass(list, false);

    d3d12_command_list_invalidate_current_pipeline(list, true);
    d3d12_command_list_invalidate_root_parameters(list, VK_PIPELINE_BIND_POINT_COMPUTE, true);

    assert(args->has_view);
    assert(d3d12_resource_is_texture(resource));

    miplevel_idx = args->u.view->info.texture.miplevel_idx;

    full_rect.left = 0;
    full_rect.right = d3d12_resource_desc_get_width(&resource->desc, miplevel_idx);
    full_rect.top = 0;
    full_rect.bottom = d3d12_resource_desc_get_height(&resource->desc, miplevel_idx);

    if (rect_count)
    {
        element_count = 0;

        for (i = 0; i < rect_count; i++)
        {
            if (rects[i].right > rects[i].left && rects[i].bottom > rects[i].top)
            {
                unsigned int w = rects[i].right - rects[i].left;
                unsigned int h = rects[i].bottom - rects[i].top;
                element_count = max(element_count, w * h);
            }
        }
    }
    else
    {
        element_count = full_rect.right * full_rect.bottom;
    }

    element_count *= d3d12_resource_desc_get_depth(&resource->desc, miplevel_idx);
    scratch_buffer_size = element_count * format->byte_count;

    if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator,
            VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE,
            scratch_buffer_size, 16, ~0u, &scratch))
    {
        ERR("Failed to allocate scratch memory for UAV clear.\n");
        return;
    }

    pipeline = vkd3d_meta_get_clear_buffer_uav_pipeline(&list->device->meta_ops, true, false);
    workgroup_size = vkd3d_meta_get_clear_buffer_uav_workgroup_size();

    if (!vkd3d_create_vk_buffer_view(list->device, scratch.buffer, format, scratch.offset, scratch_buffer_size, &vk_buffer_view))
    {
        ERR("Failed to create buffer view for UAV clear.\n");
        return;
    }

    if (!(d3d12_command_allocator_add_buffer_view(list->allocator, vk_buffer_view)))
    {
        ERR("Failed to add buffer view.\n");
        VK_CALL(vkDestroyBufferView(list->device->vk_device, vk_buffer_view, NULL));
        return;
    }

    memset(&write_set, 0, sizeof(write_set));
    write_set.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
    write_set.descriptorCount = 1;
    write_set.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
    write_set.pTexelBufferView = &vk_buffer_view;

    if (!(write_set.dstSet = d3d12_command_allocator_allocate_descriptor_set(
            list->allocator, pipeline.vk_set_layout, VKD3D_DESCRIPTOR_POOL_TYPE_STATIC)))
    {
        ERR("Failed to allocate descriptor set for UAV clear.\n");
        return;
    }

    VK_CALL(vkUpdateDescriptorSets(list->device->vk_device, 1, &write_set, 0, NULL));

    VK_CALL(vkCmdBindPipeline(list->vk_command_buffer,
            VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.vk_pipeline));

    VK_CALL(vkCmdBindDescriptorSets(list->vk_command_buffer,
            VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.vk_pipeline_layout,
            0, 1, &write_set.dstSet, 0, NULL));

    clear_args.clear_color = *clear_value;
    clear_args.offset.x = 0;
    clear_args.offset.y = 0;
    clear_args.extent.width = element_count;
    clear_args.extent.height = 1;

    VK_CALL(vkCmdPushConstants(list->vk_command_buffer,
            pipeline.vk_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT,
            0, sizeof(clear_args), &clear_args));

    VK_CALL(vkCmdDispatch(list->vk_command_buffer,
            vkd3d_compute_workgroup_count(element_count, workgroup_size.width), 1, 1));

    /* Insert barrier to make the buffer clear visible, but also to make the
     * image safely accessible by the transfer stage. This fallback is so rare
     * that we should not pessimize regular UAV barriers. */
    barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
    barrier.pNext = NULL;
    barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
    barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT;

    VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer,
            vk_queue_shader_stages(list->vk_queue_flags),
            VK_PIPELINE_STAGE_TRANSFER_BIT,
            0, 1, &barrier, 0, NULL, 0, NULL));

    copy_region.sType = VK_STRUCTURE_TYPE_BUFFER_IMAGE_COPY_2_KHR;
    copy_region.pNext = NULL;
    copy_region.bufferOffset = scratch.offset;
    copy_region.bufferRowLength = 0;
    copy_region.bufferImageHeight = 0;
    copy_region.imageOffset.z = 0;
    copy_region.imageExtent.depth = d3d12_resource_desc_get_depth(&resource->desc, miplevel_idx);
    copy_region.imageSubresource = vk_subresource_layers_from_view(args->u.view);
    base_layer = copy_region.imageSubresource.baseArrayLayer;
    layer_count = copy_region.imageSubresource.layerCount;

    copy_info.sType = VK_STRUCTURE_TYPE_COPY_BUFFER_TO_IMAGE_INFO_2_KHR;
    copy_info.pNext = NULL;
    copy_info.srcBuffer = scratch.buffer;
    copy_info.dstImage = resource->res.vk_image;
    copy_info.dstImageLayout = VK_IMAGE_LAYOUT_GENERAL;
    copy_info.regionCount = 1;
    copy_info.pRegions = &copy_region;

    curr_rect = full_rect;

    for (i = 0; i < rect_count || !i; i++)
    {
        if (rect_count)
        {
            /* clamp to actual resource region and skip empty rects */
            curr_rect.left = max(rects[i].left, full_rect.left);
            curr_rect.top = max(rects[i].top, full_rect.top);
            curr_rect.right = min(rects[i].right, full_rect.right);
            curr_rect.bottom = min(rects[i].bottom, full_rect.bottom);

            if (curr_rect.left >= curr_rect.right || curr_rect.top >= curr_rect.bottom)
                continue;
        }

        copy_region.imageOffset.x = curr_rect.left;
        copy_region.imageOffset.y = curr_rect.top;
        copy_region.imageExtent.width = curr_rect.right - curr_rect.left;
        copy_region.imageExtent.height = curr_rect.bottom - curr_rect.top;

        for (j = 0; j < layer_count; j++)
        {
            copy_region.imageSubresource.baseArrayLayer = base_layer + j;
            copy_region.imageSubresource.layerCount = 1;
            VK_CALL(vkCmdCopyBufferToImage2KHR(list->vk_command_buffer, &copy_info));
        }
    }

    barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
    barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;

    VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer,
            VK_PIPELINE_STAGE_TRANSFER_BIT,
            vk_queue_shader_stages(list->vk_queue_flags),
            0, 1, &barrier, 0, NULL, 0, NULL));
}

static VkClearColorValue vkd3d_fixup_clear_uav_swizzle(struct d3d12_device *device,
        DXGI_FORMAT dxgi_format, VkClearColorValue color)
{
    VkClearColorValue result;

    switch (dxgi_format)
    {
        case DXGI_FORMAT_A8_UNORM:
            result.uint32[0] = color.uint32[3];
            return result;

        default:
            return color;
    }
}

static VkClearColorValue vkd3d_fixup_clear_uav_uint_color(struct d3d12_device *device,
        DXGI_FORMAT dxgi_format, VkClearColorValue color)
{
    VkClearColorValue result = {0};

    switch (dxgi_format)
    {
        case DXGI_FORMAT_R11G11B10_FLOAT:
            result.uint32[0] = (color.uint32[0] & 0x7FF)
                    | ((color.uint32[1] & 0x7FF) << 11)
                    | ((color.uint32[2] & 0x3FF) << 22);
            return result;

        case DXGI_FORMAT_B8G8R8A8_UNORM:
        case DXGI_FORMAT_B8G8R8X8_UNORM:
            result.uint32[0] = color.uint32[2];
            result.uint32[1] = color.uint32[1];
            result.uint32[2] = color.uint32[0];
            result.uint32[3] = color.uint32[3];
            return result;

        default:
            return color;
    }
}

static const struct vkd3d_format *vkd3d_clear_uav_find_uint_format(struct d3d12_device *device, DXGI_FORMAT dxgi_format)
{
    DXGI_FORMAT uint_format = DXGI_FORMAT_UNKNOWN;

    if (dxgi_format < device->format_compatibility_list_count)
        uint_format = device->format_compatibility_lists[dxgi_format].uint_format;

    return vkd3d_get_format(device, uint_format, false);
}

static bool vkd3d_clear_uav_check_uint_format_compatibility(struct d3d12_device *device, const struct vkd3d_format *resource_format, const struct vkd3d_format *uint_format)
{
    const struct vkd3d_format_compatibility_list *compat;
    unsigned int i;

    if (resource_format->vk_format == uint_format->vk_format)
        return true;

    compat = &device->format_compatibility_lists[resource_format->dxgi_format];

    for (i = 0; i < compat->format_count; i++)
    {
        if (compat->vk_formats[i] == uint_format->vk_format)
            return true;
    }

    return false;
}

static inline bool vkd3d_clear_uav_info_from_desc(struct vkd3d_clear_uav_info *args, const struct d3d12_desc_split *d)
{
    if (d->types->flags & VKD3D_DESCRIPTOR_FLAG_VIEW)
    {
        args->has_view = true;
        args->u.view = d->view->info.view;
        return true;
    }
    else if (d->types->flags & VKD3D_DESCRIPTOR_FLAG_OFFSET_RANGE)
    {
        args->has_view = false;
        args->u.buffer.offset = d->view->info.buffer.offset;
        args->u.buffer.range = d->view->info.buffer.range;
        return true;
    }
    else
    {
        /* Hit if we try to clear a NULL descriptor, just noop it. */
        return false;
    }
}

static void vkd3d_mask_uint_clear_color(uint32_t color[4], VkFormat vk_format)
{
    unsigned int component_count, i;

    switch (vk_format)
    {
        case VK_FORMAT_R8_UINT:
        case VK_FORMAT_R16_UINT:
        case VK_FORMAT_R32_UINT:
            component_count = 1;
            break;

        case VK_FORMAT_R8G8_UINT:
        case VK_FORMAT_R16G16_UINT:
        case VK_FORMAT_R32G32_UINT:
            component_count = 2;
            break;

        case VK_FORMAT_R32G32B32_UINT:
            component_count = 3;
            break;

        default:
            component_count = 4;
            break;
    }

    for (i = component_count; i < 4; i++)
        color[i] = 0x0;

    /* Need to mask the clear value, since apparently driver can saturate the clear value instead. */
    switch (vk_format)
    {
        case VK_FORMAT_R8_UINT:
        case VK_FORMAT_R8G8_UINT:
        case VK_FORMAT_R8G8B8A8_UINT:
            for (i = 0; i < 4; i++)
                color[i] &= 0xffu;
            break;

        case VK_FORMAT_R16_UINT:
        case VK_FORMAT_R16G16_UINT:
        case VK_FORMAT_R16G16B16A16_UINT:
            for (i = 0; i < 4; i++)
                color[i] &= 0xffffu;
            break;

        case VK_FORMAT_A2B10G10R10_UINT_PACK32:
            for (i = 0; i < 3; i++)
                color[i] &= 0x3ff;
            color[3] &= 0x3;
            break;

        default:
            break;
    }
}

static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(d3d12_command_list_iface *iface,
        D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle, D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle, ID3D12Resource *resource,
        const UINT values[4], UINT rect_count, const D3D12_RECT *rects)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    const struct vkd3d_format *uint_format;
    struct vkd3d_view *inline_view = NULL;
    struct d3d12_resource *resource_impl;
    struct vkd3d_clear_uav_info args;
    struct d3d12_desc_split d;
    VkClearColorValue color;

    TRACE("iface %p, gpu_handle %#"PRIx64", cpu_handle %lx, resource %p, values %p, rect_count %u, rects %p.\n",
            iface, gpu_handle.ptr, cpu_handle.ptr, resource, values, rect_count, rects);

    memcpy(color.uint32, values, sizeof(color.uint32));

    d = d3d12_desc_decode_va(cpu_handle.ptr);
    resource_impl = impl_from_ID3D12Resource(resource);

    if (!vkd3d_clear_uav_info_from_desc(&args, &d))
        return;

    if (args.has_view)
        color = vkd3d_fixup_clear_uav_swizzle(list->device, d.view->info.view->format->dxgi_format, color);

    if (args.has_view && d.view->info.view->format->type != VKD3D_FORMAT_TYPE_UINT)
    {
        const struct vkd3d_view *base_view = d.view->info.view;
        uint_format = vkd3d_clear_uav_find_uint_format(list->device, base_view->format->dxgi_format);
        color = vkd3d_fixup_clear_uav_uint_color(list->device, base_view->format->dxgi_format, color);

        if (!uint_format)
        {
            ERR("Unhandled format %d.\n", base_view->format->dxgi_format);
            return;
        }

        vkd3d_mask_uint_clear_color(color.uint32, uint_format->vk_format);

        if (d3d12_resource_is_texture(resource_impl))
        {
            if (vkd3d_clear_uav_check_uint_format_compatibility(list->device, resource_impl->format, uint_format))
            {
                struct vkd3d_texture_view_desc view_desc;
                memset(&view_desc, 0, sizeof(view_desc));

                view_desc.image = resource_impl->res.vk_image;
                view_desc.view_type = base_view->info.texture.vk_view_type;
                view_desc.format = uint_format;
                view_desc.miplevel_idx = base_view->info.texture.miplevel_idx;
                view_desc.miplevel_count = 1;
                view_desc.layer_idx = base_view->info.texture.layer_idx;
                view_desc.layer_count = base_view->info.texture.layer_count;
                view_desc.aspect_mask = view_desc.format->vk_aspect_mask;
                view_desc.image_usage = VK_IMAGE_USAGE_STORAGE_BIT;
                view_desc.allowed_swizzle = false;

                if (!vkd3d_create_texture_view(list->device, &view_desc, &args.u.view))
                {
                    ERR("Failed to create image view.\n");
                    return;
                }

                inline_view = args.u.view;
            }
            else
            {
                /* If the clear color is 0, we can safely use the existing view to perform the
                 * clear since the bit pattern will not change. Otherwise, fill a scratch buffer
                 * with the packed clear value and perform a buffer to image copy. */
                if (color.uint32[0] || color.uint32[1] || color.uint32[2] || color.uint32[3])
                {
                    d3d12_command_list_clear_uav_with_copy(list, &d, resource_impl,
                            &args, &color, uint_format, rect_count, rects);
                    return;
                }
            }
        }
        else
        {
            struct vkd3d_buffer_view_desc view_desc;

            if (!uint_format)
                uint_format = vkd3d_get_format(list->device, DXGI_FORMAT_R32_UINT, false);

            view_desc.buffer = resource_impl->res.vk_buffer;
            view_desc.format = uint_format;
            view_desc.offset = base_view->info.buffer.offset;
            view_desc.size = base_view->info.buffer.size;

            if (!vkd3d_create_buffer_view(list->device, &view_desc, &args.u.view))
            {
                ERR("Failed to create buffer view.\n");
                return;
            }

            inline_view = args.u.view;
        }
    }
    else if (args.has_view)
    {
        vkd3d_mask_uint_clear_color(color.uint32, d.view->info.view->format->vk_format);
    }

    d3d12_command_list_clear_uav(list, &d, resource_impl, &args, &color, rect_count, rects);

    if (inline_view)
    {
        d3d12_command_allocator_add_view(list->allocator, inline_view);
        vkd3d_view_decref(inline_view, list->device);
    }
}

static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(d3d12_command_list_iface *iface,
        D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle, D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle, ID3D12Resource *resource,
        const float values[4], UINT rect_count, const D3D12_RECT *rects)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    struct d3d12_resource *resource_impl;
    struct vkd3d_clear_uav_info args;
    struct d3d12_desc_split d;
    VkClearColorValue color;

    TRACE("iface %p, gpu_handle %#"PRIx64", cpu_handle %lx, resource %p, values %p, rect_count %u, rects %p.\n",
            iface, gpu_handle.ptr, cpu_handle.ptr, resource, values, rect_count, rects);

    d = d3d12_desc_decode_va(cpu_handle.ptr);
    memcpy(color.float32, values, sizeof(color.float32));
    resource_impl = impl_from_ID3D12Resource(resource);

    if (!vkd3d_clear_uav_info_from_desc(&args, &d))
        return;

    if (args.has_view)
        color = vkd3d_fixup_clear_uav_swizzle(list->device, d.view->info.view->format->dxgi_format, color);

    d3d12_command_list_clear_uav(list, &d, resource_impl, &args, &color, rect_count, rects);
}

static void STDMETHODCALLTYPE d3d12_command_list_DiscardResource(d3d12_command_list_iface *iface,
        ID3D12Resource *resource, const D3D12_DISCARD_REGION *region)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    struct d3d12_resource *texture = impl_from_ID3D12Resource(resource);
    unsigned int i, first_subresource, subresource_count;
    bool has_bound_subresource, has_unbound_subresource;
    VkImageSubresourceLayers vk_subresource_layers;
    VkImageSubresourceRange vk_subresource_range;
    unsigned int resource_subresource_count;
    VkImageSubresource vk_subresource;
    bool all_subresource_full_discard;
    bool full_discard, is_bound;
    D3D12_RECT full_rect;
    int attachment_idx;

    TRACE("iface %p, resource %p, region %p.\n", iface, resource, region);

    /* This method is only supported on DIRECT and COMPUTE queues,
     * but we only implement it for render targets, so ignore it
     * on compute. */
    if (list->type != D3D12_COMMAND_LIST_TYPE_DIRECT && list->type != D3D12_COMMAND_LIST_TYPE_COMPUTE)
    {
        WARN("Not supported for queue type %d.\n", list->type);
        return;
    }

    /* Ignore buffers */
    if (!d3d12_resource_is_texture(texture))
        return;

    /* D3D12 requires that the texture is either in render target
     * state, in depth-stencil state, or in UAV state depending on usage flags.
     * In compute lists, we only allow UAV state. */
    if (!(texture->desc.Flags &
          (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET |
           D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL |
           D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS)))
    {
        WARN("Not supported for resource %p.\n", resource);
        return;
    }

    /* Assume that pRegion == NULL means that we should discard
     * the entire resource. This does not seem to be documented. */
    resource_subresource_count = d3d12_resource_get_sub_resource_count(texture);
    if (region)
    {
        first_subresource = region->FirstSubresource;
        subresource_count = region->NumSubresources;
    }
    else
    {
        first_subresource = 0;
        subresource_count = resource_subresource_count;
    }

    /* If we write to all subresources, we can promote the depth image to OPTIMAL since we know the resource
     * must be in OPTIMAL state. */
    if (texture->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)
    {
        d3d12_command_list_notify_dsv_discard(list, texture,
                first_subresource, subresource_count, resource_subresource_count);
    }

    /* We can't meaningfully discard sub-regions of an image. If rects
     * are specified, all specified subresources must have the same
     * dimensions, so just base this off the first subresource */
    if (!(full_discard = (!region || !region->NumRects)))
    {
        vk_subresource = d3d12_resource_get_vk_subresource(texture, first_subresource, false);
        full_rect = d3d12_get_image_rect(texture, vk_subresource.mipLevel);

        for (i = 0; i < region->NumRects && !full_discard; i++)
            full_discard = d3d12_rect_fully_covers_region(&region->pRects[i], &full_rect);
    }

    if (!full_discard)
        return;

    /* Resource tracking. If we do a full discard, there is no need to do initial layout transitions.
     * Partial discards on first resource use needs to be handles however,
     * so we must make sure to discard all subresources on first use. */
    all_subresource_full_discard = first_subresource == 0 && subresource_count == resource_subresource_count;
    d3d12_command_list_track_resource_usage(list, texture, !all_subresource_full_discard);

    if (all_subresource_full_discard)
    {
        has_bound_subresource = false;
        has_unbound_subresource = false;

        /* If we're discarding all subresources, we can only safely discard with one barrier
         * if is_bound state is the same for all subresources. */
        for (i = first_subresource; i < first_subresource + subresource_count; i++)
        {
            vk_subresource = d3d12_resource_get_vk_subresource(texture, i, false);
            vk_subresource_layers = vk_subresource_layers_from_subresource(&vk_subresource);
            attachment_idx = d3d12_command_list_find_attachment_view(list, texture, &vk_subresource_layers);

            is_bound = attachment_idx >= 0 && (list->rendering_info.state_flags & (VKD3D_RENDERING_ACTIVE | VKD3D_RENDERING_SUSPENDED));
            if (is_bound)
                has_bound_subresource = true;
            else
                has_unbound_subresource = true;
        }

        all_subresource_full_discard = !has_bound_subresource || !has_unbound_subresource;
    }

    if (all_subresource_full_discard)
    {
        vk_subresource_range.baseMipLevel = 0;
        vk_subresource_range.baseArrayLayer = 0;
        vk_subresource_range.levelCount = VK_REMAINING_MIP_LEVELS;
        vk_subresource_range.layerCount = VK_REMAINING_ARRAY_LAYERS;
        vk_subresource_range.aspectMask = texture->format->vk_aspect_mask;
        is_bound = has_bound_subresource;
        d3d12_command_list_end_current_render_pass(list, is_bound);
        d3d12_command_list_discard_attachment_barrier(list, texture, &vk_subresource_range, is_bound);
    }
    else
    {
        for (i = first_subresource; i < first_subresource + subresource_count; i++)
        {
            vk_subresource = d3d12_resource_get_vk_subresource(texture, i, false);
            vk_subresource_layers = vk_subresource_layers_from_subresource(&vk_subresource);
            attachment_idx = d3d12_command_list_find_attachment_view(list, texture, &vk_subresource_layers);

            is_bound = attachment_idx >= 0 && (list->rendering_info.state_flags & (VKD3D_RENDERING_ACTIVE | VKD3D_RENDERING_SUSPENDED));
            d3d12_command_list_end_current_render_pass(list, is_bound);
            vk_subresource_range = vk_subresource_range_from_layers(&vk_subresource_layers);
            d3d12_command_list_discard_attachment_barrier(list, texture, &vk_subresource_range, is_bound);
        }
    }
}

static inline bool d3d12_query_type_is_scoped(D3D12_QUERY_TYPE type)
{
    return type != D3D12_QUERY_TYPE_TIMESTAMP;
}

static void STDMETHODCALLTYPE d3d12_command_list_BeginQuery(d3d12_command_list_iface *iface,
        ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT index)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    struct d3d12_query_heap *query_heap = impl_from_ID3D12QueryHeap(heap);
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
    VkQueryControlFlags flags = d3d12_query_type_get_vk_flags(type);

    TRACE("iface %p, heap %p, type %#x, index %u.\n", iface, heap, type, index);

    if (!d3d12_query_type_is_scoped(type))
    {
        WARN("Query type %u is not scoped.\n", type);
        return;
    }

    d3d12_command_list_track_query_heap(list, query_heap);

    if (d3d12_query_heap_type_is_inline(query_heap->desc.Type))
    {
        if (!d3d12_command_list_enable_query(list, query_heap, index, type))
            d3d12_command_list_mark_as_invalid(list, "Failed to enable virtual query.\n");
    }
    else
    {
        d3d12_command_list_end_current_render_pass(list, true);

        if (!d3d12_command_list_reset_query(list, query_heap->vk_query_pool, index))
            VK_CALL(vkCmdResetQueryPool(list->vk_command_buffer, query_heap->vk_query_pool, index, 1));

        if (d3d12_query_type_is_indexed(type))
        {
            unsigned int stream_index = type - D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0;
            VK_CALL(vkCmdBeginQueryIndexedEXT(list->vk_command_buffer,
                    query_heap->vk_query_pool, index, flags, stream_index));
        }
        else
            VK_CALL(vkCmdBeginQuery(list->vk_command_buffer, query_heap->vk_query_pool, index, flags));
    }
}

static void STDMETHODCALLTYPE d3d12_command_list_EndQuery(d3d12_command_list_iface *iface,
        ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT index)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    struct d3d12_query_heap *query_heap = impl_from_ID3D12QueryHeap(heap);
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;

    TRACE("iface %p, heap %p, type %#x, index %u.\n", iface, heap, type, index);

    d3d12_command_list_track_query_heap(list, query_heap);

    if (d3d12_query_heap_type_is_inline(query_heap->desc.Type))
    {
        if (!d3d12_command_list_disable_query(list, query_heap, index))
            d3d12_command_list_mark_as_invalid(list, "Failed to disable virtual query.\n");
    }
    else if (d3d12_query_type_is_scoped(type))
    {
        d3d12_command_list_end_current_render_pass(list, true);

        if (d3d12_query_type_is_indexed(type))
        {
            unsigned int stream_index = type - D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0;
            VK_CALL(vkCmdEndQueryIndexedEXT(list->vk_command_buffer,
                    query_heap->vk_query_pool, index, stream_index));
        }
        else
            VK_CALL(vkCmdEndQuery(list->vk_command_buffer, query_heap->vk_query_pool, index));
    }
    else if (type == D3D12_QUERY_TYPE_TIMESTAMP)
    {
        if (!d3d12_command_list_reset_query(list, query_heap->vk_query_pool, index))
        {
            d3d12_command_list_end_current_render_pass(list, true);
            VK_CALL(vkCmdResetQueryPool(list->vk_command_buffer, query_heap->vk_query_pool, index, 1));
        }

        VK_CALL(vkCmdWriteTimestamp(list->vk_command_buffer,
                VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, query_heap->vk_query_pool, index));
    }
    else
        FIXME("Unhandled query type %u.\n", type);
}

static void d3d12_command_list_resolve_binary_occlusion_queries(struct d3d12_command_list *list,
        VkBuffer src_buffer, uint32_t src_index, VkBuffer dst_buffer, VkDeviceSize dst_offset,
        VkDeviceSize dst_size, uint32_t dst_index, uint32_t count)
{
    const struct vkd3d_query_ops *query_ops = &list->device->meta_ops.query;
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
    VkDescriptorBufferInfo dst_buffer_info, src_buffer_info;
    struct vkd3d_query_resolve_args args;
    VkWriteDescriptorSet vk_writes[2];
    unsigned int workgroup_count;
    VkMemoryBarrier vk_barrier;
    VkDescriptorSet vk_set;
    unsigned int i;

    d3d12_command_list_invalidate_current_pipeline(list, true);
    d3d12_command_list_invalidate_root_parameters(list, VK_PIPELINE_BIND_POINT_COMPUTE, true);

    vk_barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
    vk_barrier.pNext = NULL;
    vk_barrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
    /* If there are any overlapping copy writes, handle them here since we're doing a transfer barrier anyways. */
    vk_barrier.srcAccessMask = list->tracked_copy_buffer_count ? VK_ACCESS_TRANSFER_WRITE_BIT : 0;
    d3d12_command_list_reset_buffer_copy_tracking(list);

    /* dst_buffer is in COPY_DEST state */
    VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer,
            VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
            0, 1, &vk_barrier, 0, NULL, 0, NULL));

    VK_CALL(vkCmdBindPipeline(list->vk_command_buffer,
            VK_PIPELINE_BIND_POINT_COMPUTE,
            query_ops->vk_resolve_binary_pipeline));

    vk_set = d3d12_command_allocator_allocate_descriptor_set(list->allocator,
            query_ops->vk_resolve_set_layout, VKD3D_DESCRIPTOR_POOL_TYPE_STATIC);

    for (i = 0; i < ARRAY_SIZE(vk_writes); i++)
    {
        vk_writes[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
        vk_writes[i].pNext = NULL;
        vk_writes[i].dstSet = vk_set;
        vk_writes[i].dstBinding = i;
        vk_writes[i].dstArrayElement = 0;
        vk_writes[i].descriptorCount = 1;
        vk_writes[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
        vk_writes[i].pImageInfo = NULL;
        vk_writes[i].pTexelBufferView = NULL;
    }

    vk_writes[0].pBufferInfo = &dst_buffer_info;
    vk_writes[1].pBufferInfo = &src_buffer_info;

    dst_buffer_info.buffer = dst_buffer;
    dst_buffer_info.offset = dst_offset;
    dst_buffer_info.range = dst_size;
    
    src_buffer_info.buffer = src_buffer;
    src_buffer_info.offset = 0;
    src_buffer_info.range = VK_WHOLE_SIZE;

    VK_CALL(vkUpdateDescriptorSets(list->device->vk_device,
            ARRAY_SIZE(vk_writes), vk_writes, 0, NULL));

    VK_CALL(vkCmdBindDescriptorSets(list->vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
            query_ops->vk_resolve_pipeline_layout, 0, 1, &vk_set, 0, NULL));

    args.dst_index = dst_index;
    args.src_index = src_index;
    args.query_count = count;

    VK_CALL(vkCmdPushConstants(list->vk_command_buffer,
            query_ops->vk_resolve_pipeline_layout,
            VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args), &args));

    workgroup_count = vkd3d_compute_workgroup_count(count, VKD3D_QUERY_OP_WORKGROUP_SIZE);
    VK_CALL(vkCmdDispatch(list->vk_command_buffer, workgroup_count, 1, 1));

    vk_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
    vk_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
    VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer,
            VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
            0, 1, &vk_barrier, 0, NULL, 0, NULL));
}

static void STDMETHODCALLTYPE d3d12_command_list_ResolveQueryData(d3d12_command_list_iface *iface,
        ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT start_index, UINT query_count,
        ID3D12Resource *dst_buffer, UINT64 aligned_dst_buffer_offset)
{
    struct d3d12_query_heap *query_heap = impl_from_ID3D12QueryHeap(heap);
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    struct d3d12_resource *buffer = impl_from_ID3D12Resource(dst_buffer);
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
    size_t stride = d3d12_query_heap_type_get_data_size(query_heap->desc.Type);
    VkCopyBufferInfo2KHR copy_info;
    VkBufferCopy2KHR copy_region;

    TRACE("iface %p, heap %p, type %#x, start_index %u, query_count %u, "
            "dst_buffer %p, aligned_dst_buffer_offset %#"PRIx64".\n",
            iface, heap, type, start_index, query_count,
            dst_buffer, aligned_dst_buffer_offset);

    /* Some games call this with a query_count of 0.
     * Avoid ending the render pass and doing worthless tracking. */
    if (!query_count)
        return;

    if (!d3d12_resource_is_buffer(buffer))
    {
        WARN("Destination resource is not a buffer.\n");
        return;
    }

    d3d12_command_list_track_query_heap(list, query_heap);
    d3d12_command_list_end_current_render_pass(list, true);

    if (d3d12_query_heap_type_is_inline(query_heap->desc.Type))
    {
        if (!d3d12_command_list_gather_pending_queries(list))
        {
            d3d12_command_list_mark_as_invalid(list, "Failed to gather virtual queries.\n");
            return;
        }

        if (type != D3D12_QUERY_TYPE_BINARY_OCCLUSION)
        {
            copy_region.sType = VK_STRUCTURE_TYPE_BUFFER_COPY_2_KHR;
            copy_region.pNext = NULL;
            copy_region.srcOffset = stride * start_index;
            copy_region.dstOffset = buffer->mem.offset + aligned_dst_buffer_offset;
            copy_region.size = stride * query_count;

            copy_info.sType = VK_STRUCTURE_TYPE_COPY_BUFFER_INFO_2_KHR;
            copy_info.pNext = NULL;
            copy_info.srcBuffer = query_heap->vk_buffer;
            copy_info.dstBuffer = buffer->res.vk_buffer;
            copy_info.regionCount = 1;
            copy_info.pRegions = &copy_region;

            d3d12_command_list_mark_copy_buffer_write(list, copy_info.dstBuffer, copy_region.dstOffset, copy_region.size,
                    !!(buffer->flags & VKD3D_RESOURCE_RESERVED));
            VK_CALL(vkCmdCopyBuffer2KHR(list->vk_command_buffer, &copy_info));
        }
        else
        {
            uint32_t dst_index = aligned_dst_buffer_offset / sizeof(uint64_t);

            d3d12_command_list_resolve_binary_occlusion_queries(list,
                    query_heap->vk_buffer, start_index, buffer->res.vk_buffer,
                    buffer->mem.offset, buffer->desc.Width, dst_index,
                    query_count);
        }
    }
    else
    {
        d3d12_command_list_read_query_range(list, query_heap->vk_query_pool, start_index, query_count);
        d3d12_command_list_mark_copy_buffer_write(list, buffer->res.vk_buffer,
                buffer->mem.offset + aligned_dst_buffer_offset, sizeof(uint64_t),
                !!(buffer->flags & VKD3D_RESOURCE_RESERVED));
        VK_CALL(vkCmdCopyQueryPoolResults(list->vk_command_buffer, query_heap->vk_query_pool,
                start_index, query_count, buffer->res.vk_buffer, buffer->mem.offset + aligned_dst_buffer_offset,
                stride, VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
    }

    VKD3D_BREADCRUMB_COMMAND(RESOLVE_QUERY);
}

static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(d3d12_command_list_iface *iface,
        ID3D12Resource *buffer, UINT64 aligned_buffer_offset, D3D12_PREDICATION_OP operation)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    struct d3d12_resource *resource = impl_from_ID3D12Resource(buffer);
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
    const struct vkd3d_predicate_ops *predicate_ops = &list->device->meta_ops.predicate;
    struct vkd3d_predicate_resolve_args resolve_args;
    VkConditionalRenderingBeginInfoEXT begin_info;
    VkPipelineStageFlags dst_stages, src_stages;
    struct vkd3d_scratch_allocation scratch;
    VkAccessFlags dst_access, src_access;
    VkCopyBufferInfo2KHR copy_info;
    VkBufferCopy2KHR copy_region;
    VkMemoryBarrier vk_barrier;

    TRACE("iface %p, buffer %p, aligned_buffer_offset %#"PRIx64", operation %#x.\n",
            iface, buffer, aligned_buffer_offset, operation);

    d3d12_command_list_end_current_render_pass(list, true);

    if (resource && (aligned_buffer_offset & 0x7))
        return;

    if (!list->device->device_info.buffer_device_address_features.bufferDeviceAddress &&
            !list->device->device_info.conditional_rendering_features.conditionalRendering)
    {
        FIXME_ONCE("Conditional rendering not supported by device.\n");
        return;
    }

    if (list->predicate_enabled)
        VK_CALL(vkCmdEndConditionalRenderingEXT(list->vk_command_buffer));

    if (resource)
    {
        if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator,
                VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE,
                sizeof(uint32_t), sizeof(uint32_t), ~0u, &scratch))
            return;

        begin_info.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT;
        begin_info.pNext = NULL;
        begin_info.buffer = scratch.buffer;
        begin_info.offset = scratch.offset;
        begin_info.flags = 0;

        if (list->device->device_info.buffer_device_address_features.bufferDeviceAddress)
        {
            /* Resolve 64-bit predicate into a 32-bit location so that this works with
             * VK_EXT_conditional_rendering. We'll handle the predicate operation here
             * so setting VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT is not necessary. */
            d3d12_command_list_invalidate_current_pipeline(list, true);
            d3d12_command_list_invalidate_root_parameters(list, VK_PIPELINE_BIND_POINT_COMPUTE, true);

            resolve_args.src_va = d3d12_resource_get_va(resource, aligned_buffer_offset);
            resolve_args.dst_va = scratch.va;
            resolve_args.invert = operation != D3D12_PREDICATION_OP_EQUAL_ZERO;

            VK_CALL(vkCmdBindPipeline(list->vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
                    predicate_ops->vk_resolve_pipeline));
            VK_CALL(vkCmdPushConstants(list->vk_command_buffer, predicate_ops->vk_resolve_pipeline_layout,
                    VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(resolve_args), &resolve_args));
            VK_CALL(vkCmdDispatch(list->vk_command_buffer, 1, 1, 1));

            src_stages = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
            src_access = VK_ACCESS_SHADER_WRITE_BIT;
        }
        else
        {
            FIXME_ONCE("64-bit predicates not supported.\n");

            copy_region.sType = VK_STRUCTURE_TYPE_BUFFER_COPY_2_KHR;
            copy_region.pNext = NULL;
            copy_region.srcOffset = resource->mem.offset + aligned_buffer_offset;
            copy_region.dstOffset = scratch.offset;
            copy_region.size = sizeof(uint32_t);

            copy_info.sType = VK_STRUCTURE_TYPE_COPY_BUFFER_INFO_2_KHR;
            copy_info.pNext = NULL;
            copy_info.srcBuffer = resource->res.vk_buffer;
            copy_info.dstBuffer = scratch.buffer;
            copy_info.regionCount = 1;
            copy_info.pRegions = &copy_region;

            VK_CALL(vkCmdCopyBuffer2KHR(list->vk_command_buffer, &copy_info));

            src_stages = VK_PIPELINE_STAGE_TRANSFER_BIT;
            src_access = VK_ACCESS_TRANSFER_WRITE_BIT;

            if (operation != D3D12_PREDICATION_OP_EQUAL_ZERO)
                begin_info.flags = VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT;
        }

        if (list->device->device_info.conditional_rendering_features.conditionalRendering)
        {
            dst_stages = VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT;
            dst_access = VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT;
            list->predicate_enabled = true;
        }
        else
        {
            dst_stages = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
            dst_access = VK_ACCESS_SHADER_READ_BIT;
            list->predicate_va = scratch.va;
        }

        vk_barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
        vk_barrier.pNext = NULL;
        vk_barrier.srcAccessMask = src_access;
        vk_barrier.dstAccessMask = dst_access;

        VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer,
                src_stages, dst_stages, 0, 1, &vk_barrier, 0, NULL, 0, NULL));

        if (list->predicate_enabled)
            VK_CALL(vkCmdBeginConditionalRenderingEXT(list->vk_command_buffer, &begin_info));
    }
    else
    {
        list->predicate_enabled = false;
        list->predicate_va = 0;
    }
}

static char *decode_pix_string(UINT metadata, const void *data, size_t size)
{
    char *label_str;

#define PIX_EVENT_UNICODE_VERSION 0
#define PIX_EVENT_ANSI_VERSION 1
#define PIX_EVENT_PIX3BLOB_VERSION 2
    switch (metadata)
    {
    case PIX_EVENT_ANSI_VERSION:
        /* Be defensive in case the string is not nul-terminated. */
        label_str = vkd3d_malloc(size + 1);
        if (!label_str)
            return NULL;
        memcpy(label_str, data, size);
        label_str[size] = '\0';
        break;

    case PIX_EVENT_UNICODE_VERSION:
        label_str = vkd3d_strdup_w_utf8(data, size / sizeof(WCHAR));
        if (!label_str)
            return NULL;
        break;

    case PIX_EVENT_PIX3BLOB_VERSION:
        FIXME("PIX3BLOB event format not supported.\n");
        return NULL;

    default:
        FIXME("Unrecognized metadata format %u for BeginEvent.\n", metadata);
        return NULL;
    }

    return label_str;
}

static void STDMETHODCALLTYPE d3d12_command_list_SetMarker(d3d12_command_list_iface *iface,
        UINT metadata, const void *data, UINT size)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
    VkDebugUtilsLabelEXT label;
    char *label_str;
    unsigned int i;

    if (!list->device->vk_info.EXT_debug_utils)
        return;

    label_str = decode_pix_string(metadata, data, size);
    if (!label_str)
    {
        FIXME("Failed to decode PIX debug event.\n");
        return;
    }

    label.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT;
    label.pNext = NULL;
    label.pLabelName = label_str;
    for (i = 0; i < 4; i++)
        label.color[i] = 1.0f;

    VK_CALL(vkCmdInsertDebugUtilsLabelEXT(list->vk_command_buffer, &label));
    vkd3d_free(label_str);
}

static void STDMETHODCALLTYPE d3d12_command_list_BeginEvent(d3d12_command_list_iface *iface,
        UINT metadata, const void *data, UINT size)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
    VkDebugUtilsLabelEXT label;
    char *label_str;
    unsigned int i;

    TRACE("iface %p, metadata %u, data %p, size %u.\n",
          iface, metadata, data, size);

    if (!list->device->vk_info.EXT_debug_utils)
        return;

    label_str = decode_pix_string(metadata, data, size);
    if (!label_str)
    {
        FIXME("Failed to decode PIX debug event.\n");
        return;
    }

    label.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT;
    label.pNext = NULL;
    label.pLabelName = label_str;
    for (i = 0; i < 4; i++)
        label.color[i] = 1.0f;

    VK_CALL(vkCmdBeginDebugUtilsLabelEXT(list->vk_command_buffer, &label));
    vkd3d_free(label_str);
}

static void STDMETHODCALLTYPE d3d12_command_list_EndEvent(d3d12_command_list_iface *iface)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;

    TRACE("iface %p.\n", iface);

    if (!list->device->vk_info.EXT_debug_utils)
        return;

    VK_CALL(vkCmdEndDebugUtilsLabelEXT(list->vk_command_buffer));
}

STATIC_ASSERT(sizeof(VkDispatchIndirectCommand) == sizeof(D3D12_DISPATCH_ARGUMENTS));
STATIC_ASSERT(sizeof(VkDrawIndexedIndirectCommand) == sizeof(D3D12_DRAW_INDEXED_ARGUMENTS));
STATIC_ASSERT(sizeof(VkDrawIndirectCommand) == sizeof(D3D12_DRAW_ARGUMENTS));
STATIC_ASSERT(offsetof(VkTraceRaysIndirectCommand2KHR, depth) == offsetof(D3D12_DISPATCH_RAYS_DESC, Depth));

static HRESULT d3d12_command_signature_allocate_stream_memory_for_list(
        struct d3d12_command_list *list,
        struct d3d12_command_signature *signature,
        uint32_t max_command_count,
        struct vkd3d_scratch_allocation *allocation);

static HRESULT d3d12_command_signature_allocate_preprocess_memory_for_list(
        struct d3d12_command_list *list,
        struct d3d12_command_signature *signature, VkPipeline render_pipeline,
        uint32_t max_command_count,
        struct vkd3d_scratch_allocation *allocation, VkDeviceSize *size);

static void d3d12_command_list_execute_indirect_state_template(
        struct d3d12_command_list *list, struct d3d12_command_signature *signature,
        uint32_t max_command_count,
        struct d3d12_resource *arg_buffer, UINT64 arg_buffer_offset,
        struct d3d12_resource *count_buffer, UINT64 count_buffer_offset)
{
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
    const VkPhysicalDeviceDeviceGeneratedCommandsPropertiesNV *props;
    struct vkd3d_scratch_allocation preprocess_allocation;
    struct vkd3d_scratch_allocation stream_allocation;
    struct vkd3d_scratch_allocation count_allocation;
    struct vkd3d_execute_indirect_args patch_args;
    VkGeneratedCommandsInfoNV generated;
    VkCommandBuffer vk_patch_cmd_buffer;
    VkIndirectCommandsStreamNV stream;
    VkDeviceSize preprocess_size;
    VkPipeline current_pipeline;
    VkMemoryBarrier barrier;
    bool require_ibo_update;
    bool require_patch;
    unsigned int i;
    HRESULT hr;

    /* To build device generated commands, we need to know the pipeline we're going to render with. */
    if (!d3d12_command_list_update_graphics_pipeline(list))
        return;
    current_pipeline = list->current_pipeline;

    memset(&patch_args, 0, sizeof(patch_args));
    patch_args.debug_tag = 0; /* Modify to non-zero value as desired when debugging. */

    if (FAILED(hr = d3d12_command_signature_allocate_preprocess_memory_for_list(
            list, signature, current_pipeline,
            max_command_count, &preprocess_allocation, &preprocess_size)))
    {
        WARN("Failed to allocate preprocess memory.\n");
        return;
    }

    /* If everything regarding alignment works out, we can just reuse the app indirect buffer instead. */
    require_ibo_update = false;
    require_patch = false;

    /* Bind IBO. If we always update the IBO indirectly, do not validate the index buffer here.
     * We can render fine even with a NULL IBO bound. */
    for (i = 0; i < signature->desc.NumArgumentDescs; i++)
    {
        if (signature->desc.pArgumentDescs[i].Type == D3D12_INDIRECT_ARGUMENT_TYPE_INDEX_BUFFER_VIEW)
        {
            require_ibo_update = true;
            break;
        }
    }

    /* - Stride can mismatch, i.e. we need internal alignment of arguments.
     * - Min required alignment on the indirect buffer itself might be too strict.
     * - Min required alignment on count buffer might be too strict.
     * - We require debugging. */
    props = &list->device->device_info.device_generated_commands_properties_nv;

    if ((signature->state_template.stride != signature->desc.ByteStride && max_command_count > 1) ||
            (arg_buffer_offset & (props->minIndirectCommandsBufferOffsetAlignment - 1)) ||
            (count_buffer && (count_buffer_offset & (props->minSequencesCountBufferOffsetAlignment - 1))) ||
            patch_args.debug_tag)
    {
        require_patch = true;
    }

    if (require_patch)
    {
        if (FAILED(hr = d3d12_command_signature_allocate_stream_memory_for_list(
                list, signature, max_command_count, &stream_allocation)))
        {
            WARN("Failed to allocate stream memory.\n");
            return;
        }

        if (count_buffer)
        {
            if (FAILED(hr = d3d12_command_allocator_allocate_scratch_memory(list->allocator,
                    VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE,
                    sizeof(uint32_t),
                    props->minSequencesCountBufferOffsetAlignment,
                    ~0u, &count_allocation)))
            {
                WARN("Failed to allocate count memory.\n");
                return;
            }
        }

        patch_args.template_va = signature->state_template.buffer_va;
        patch_args.api_buffer_va = d3d12_resource_get_va(arg_buffer, arg_buffer_offset);
        patch_args.device_generated_commands_va = stream_allocation.va;
        patch_args.indirect_count_va = count_buffer ? d3d12_resource_get_va(count_buffer, count_buffer_offset) : 0;
        patch_args.dst_indirect_count_va = count_buffer ? count_allocation.va : 0;
        patch_args.api_buffer_word_stride = signature->desc.ByteStride / sizeof(uint32_t);
        patch_args.device_generated_commands_word_stride = signature->state_template.stride / sizeof(uint32_t);

        if (patch_args.debug_tag != 0)
        {
            /* Makes log easier to understand since a sorted log will appear in-order. */
            static uint32_t vkd3d_implicit_instance_count;
            patch_args.implicit_instance = vkd3d_atomic_uint32_increment(
                    &vkd3d_implicit_instance_count, vkd3d_memory_order_relaxed) - 1;
        }

        barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
        barrier.pNext = NULL;
        barrier.srcAccessMask = 0;
        barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;

        if (!list->execute_indirect.has_observed_transition_to_indirect)
        {
            /* Fast path, throw the template resolve to the init command buffer. */
            d3d12_command_allocator_allocate_init_command_buffer(list->allocator, list);
            vk_patch_cmd_buffer = list->vk_init_commands;
            if (!list->execute_indirect.has_emitted_indirect_to_compute_barrier)
            {
                VK_CALL(vkCmdPipelineBarrier(vk_patch_cmd_buffer, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT,
                        VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 1, &barrier, 0, NULL, 0, NULL));
                list->execute_indirect.has_emitted_indirect_to_compute_barrier = true;
            }
        }
        else
        {
            vk_patch_cmd_buffer = list->vk_command_buffer;
            d3d12_command_list_end_current_render_pass(list, true);
            VK_CALL(vkCmdPipelineBarrier(vk_patch_cmd_buffer, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT,
                    VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 1, &barrier, 0, NULL, 0, NULL));
            d3d12_command_list_invalidate_current_pipeline(list, true);
        }

        VK_CALL(vkCmdPushConstants(vk_patch_cmd_buffer, signature->state_template.pipeline.vk_pipeline_layout,
                VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(patch_args), &patch_args));
        VK_CALL(vkCmdBindPipeline(vk_patch_cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
                signature->state_template.pipeline.vk_pipeline));

        /* One workgroup processes the patching for one draw. We could potentially use indirect dispatch
         * to restrict the patching work to just the indirect count, but meh, just more barriers.
         * We'll nop out the workgroup early based on direct count, and the number of threads should be trivial either way. */
        VK_CALL(vkCmdDispatch(vk_patch_cmd_buffer, max_command_count, 1, 1));

        if (vk_patch_cmd_buffer == list->vk_command_buffer)
        {
            barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
            barrier.dstAccessMask = VK_ACCESS_INDIRECT_COMMAND_READ_BIT;
            VK_CALL(vkCmdPipelineBarrier(vk_patch_cmd_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
                    VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT,
                    0, 1, &barrier, 0, NULL, 0, NULL));
            /* The barrier is deferred if we moved the dispatch to init command buffer. */
        }
    }

    if (!d3d12_command_list_begin_render_pass(list))
    {
        WARN("Failed to begin render pass, ignoring draw.\n");
        return;
    }

    if (!require_ibo_update &&
            signature->desc.pArgumentDescs[signature->desc.NumArgumentDescs - 1].Type ==
                    D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED &&
            !d3d12_command_list_update_index_buffer(list))
    {
        return;
    }

    generated.sType = VK_STRUCTURE_TYPE_GENERATED_COMMANDS_INFO_NV;
    generated.pNext = NULL;
    generated.pipeline = list->current_pipeline;
    generated.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
    generated.indirectCommandsLayout = signature->state_template.layout;
    generated.streamCount = 1;
    generated.pStreams = &stream;
    generated.preprocessBuffer = preprocess_allocation.buffer;
    generated.preprocessOffset = preprocess_allocation.offset;
    generated.preprocessSize = preprocess_size;
    generated.sequencesCount = max_command_count;
    generated.sequencesIndexBuffer = VK_NULL_HANDLE;
    generated.sequencesIndexOffset = 0;

    if (count_buffer)
    {
        if (require_patch)
        {
            generated.sequencesCountBuffer = count_allocation.buffer;
            generated.sequencesCountOffset = count_allocation.offset;
        }
        else
        {
            generated.sequencesCountBuffer = count_buffer->res.vk_buffer;
            generated.sequencesCountOffset = count_buffer->mem.offset + count_buffer_offset;
        }
    }
    else
    {
        generated.sequencesCountBuffer = VK_NULL_HANDLE;
        generated.sequencesCountOffset = 0;
    }

    if (require_patch)
    {
        stream.buffer = stream_allocation.buffer;
        stream.offset = stream_allocation.offset;
    }
    else
    {
        stream.buffer = arg_buffer->res.vk_buffer;
        stream.offset = arg_buffer->mem.offset + arg_buffer_offset;
    }

    if (require_patch)
        WARN("Template requires patching :(\n");

    VK_CALL(vkCmdExecuteGeneratedCommandsNV(list->vk_command_buffer, VK_FALSE, &generated));

    /* Need to clear state to zero if it was part of a command signature. */
    for (i = 0; i < signature->desc.NumArgumentDescs; i++)
    {
        const D3D12_INDIRECT_ARGUMENT_DESC *arg = &signature->desc.pArgumentDescs[i];
        switch (arg->Type)
        {
            case D3D12_INDIRECT_ARGUMENT_TYPE_INDEX_BUFFER_VIEW:
                /* Null IBO */
                list->index_buffer.buffer = VK_NULL_HANDLE;
                break;

            case D3D12_INDIRECT_ARGUMENT_TYPE_VERTEX_BUFFER_VIEW:
            {
                /* Null VBO */
                uint32_t slot = arg->VertexBuffer.Slot;
                list->dynamic_state.vertex_buffers[slot] = VK_NULL_HANDLE;
                list->dynamic_state.vertex_strides[slot] = 0;
                list->dynamic_state.vertex_offsets[slot] = 0;
                list->dynamic_state.vertex_sizes[slot] = 0;
                break;
            }

            case D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT_BUFFER_VIEW:
            case D3D12_INDIRECT_ARGUMENT_TYPE_SHADER_RESOURCE_VIEW:
            case D3D12_INDIRECT_ARGUMENT_TYPE_UNORDERED_ACCESS_VIEW:
            {
                uint32_t index = arg->ConstantBufferView.RootParameterIndex;
                d3d12_command_list_set_root_descriptor(list,
                        VK_PIPELINE_BIND_POINT_GRAPHICS, index, 0);
                break;
            }

            case D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT:
            {
                uint32_t zeroes[D3D12_MAX_ROOT_COST];
                memset(zeroes, 0, sizeof(uint32_t) * arg->Constant.Num32BitValuesToSet);
                d3d12_command_list_set_root_constants(list,
                        VK_PIPELINE_BIND_POINT_GRAPHICS, arg->Constant.RootParameterIndex,
                        arg->Constant.DestOffsetIn32BitValues,
                        arg->Constant.Num32BitValuesToSet, zeroes);
                break;
            }

            default:
                break;
        }
    }

    /* Spec mentions that all state related to the bind point is undefined after this, so
     * invalidate all state. Unclear exactly which state is invalidated though ...
     * Treat it as a meta shader. We need to nuke all state after running execute generated commands. */
    d3d12_command_list_invalidate_all_state(list);
}

static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(d3d12_command_list_iface *iface,
        ID3D12CommandSignature *command_signature, UINT max_command_count, ID3D12Resource *arg_buffer,
        UINT64 arg_buffer_offset, ID3D12Resource *count_buffer, UINT64 count_buffer_offset)
{
    struct d3d12_command_signature *sig_impl = impl_from_ID3D12CommandSignature(command_signature);
    struct d3d12_resource *count_impl = impl_from_ID3D12Resource(count_buffer);
    struct d3d12_resource *arg_impl = impl_from_ID3D12Resource(arg_buffer);
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
    const D3D12_COMMAND_SIGNATURE_DESC *signature_desc = &sig_impl->desc;
    struct vkd3d_scratch_allocation scratch;
    unsigned int i;

    TRACE("iface %p, command_signature %p, max_command_count %u, arg_buffer %p, "
            "arg_buffer_offset %#"PRIx64", count_buffer %p, count_buffer_offset %#"PRIx64".\n",
            iface, command_signature, max_command_count, arg_buffer, arg_buffer_offset,
            count_buffer, count_buffer_offset);

    if (!max_command_count)
        return;

    if ((count_buffer || list->predicate_va) && !list->device->vk_info.KHR_draw_indirect_count)
    {
        FIXME("Count buffers not supported by Vulkan implementation.\n");
        return;
    }

    if (sig_impl->requires_state_template)
    {
        /* Complex execute indirect path. */
        if (list->predicate_va)
            FIXME("Predicated ExecuteIndirect with state template not supported yet. Ignoring predicate.\n");
        d3d12_command_list_execute_indirect_state_template(list, sig_impl,
                max_command_count,
                arg_impl, arg_buffer_offset,
                count_impl, count_buffer_offset);
        VKD3D_BREADCRUMB_COMMAND(EXECUTE_INDIRECT_TEMPLATE);
        return;
    }

    /* Temporary workaround, since we cannot parse non-draw arguments yet. Point directly
     * to the first argument. Should avoid hard crashes for now. */
    arg_buffer_offset += sig_impl->argument_buffer_offset;

    for (i = 0; i < signature_desc->NumArgumentDescs; ++i)
    {
        const D3D12_INDIRECT_ARGUMENT_DESC *arg_desc = &signature_desc->pArgumentDescs[i];

        if (list->predicate_va)
        {
            union vkd3d_predicate_command_direct_args args;
            enum vkd3d_predicate_command_type type;
            VkDeviceSize indirect_va;

            switch (arg_desc->Type)
            {
                case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW:
                case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED:
                    if (count_buffer)
                    {
                        type = VKD3D_PREDICATE_COMMAND_DRAW_INDIRECT_COUNT;
                        indirect_va = d3d12_resource_get_va(count_impl, count_buffer_offset);
                    }
                    else
                    {
                        args.draw_count = max_command_count;
                        type = VKD3D_PREDICATE_COMMAND_DRAW_INDIRECT;
                        indirect_va = 0;
                    }
                    break;

                case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH:
                    type = VKD3D_PREDICATE_COMMAND_DISPATCH_INDIRECT;
                    indirect_va = d3d12_resource_get_va(arg_impl, arg_buffer_offset);
                    break;

                default:
                    FIXME("Ignoring unhandled argument type %#x.\n", arg_desc->Type);
                    continue;
            }

            if (!d3d12_command_list_emit_predicated_command(list, type, indirect_va, &args, &scratch))
                return;
        }
        else if (count_buffer)
        {
            scratch.buffer = count_impl->res.vk_buffer;
            scratch.offset = count_impl->mem.offset + count_buffer_offset;
            scratch.va = d3d12_resource_get_va(count_impl, count_buffer_offset);
        }
        else
        {
            scratch.buffer = arg_impl->res.vk_buffer;
            scratch.offset = arg_impl->mem.offset + arg_buffer_offset;
            scratch.va = d3d12_resource_get_va(arg_impl, arg_buffer_offset);
        }

        d3d12_command_list_end_transfer_batch(list);
        switch (arg_desc->Type)
        {
            case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW:
                if (!d3d12_command_list_begin_render_pass(list))
                {
                    WARN("Failed to begin render pass, ignoring draw.\n");
                    break;
                }

                if (count_buffer || list->predicate_va)
                {
                    VK_CALL(vkCmdDrawIndirectCountKHR(list->vk_command_buffer, arg_impl->res.vk_buffer,
                            arg_buffer_offset + arg_impl->mem.offset, scratch.buffer, scratch.offset,
                            max_command_count, signature_desc->ByteStride));
                }
                else
                {
                    VK_CALL(vkCmdDrawIndirect(list->vk_command_buffer, arg_impl->res.vk_buffer,
                            arg_buffer_offset + arg_impl->mem.offset, max_command_count, signature_desc->ByteStride));
                }
                break;

            case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED:
                if (!d3d12_command_list_update_index_buffer(list))
                    break;

                if (!d3d12_command_list_begin_render_pass(list))
                {
                    WARN("Failed to begin render pass, ignoring draw.\n");
                    break;
                }

                d3d12_command_list_check_index_buffer_strip_cut_value(list);

                if (count_buffer || list->predicate_va)
                {
                    VK_CALL(vkCmdDrawIndexedIndirectCountKHR(list->vk_command_buffer, arg_impl->res.vk_buffer,
                            arg_buffer_offset + arg_impl->mem.offset, scratch.buffer, scratch.offset,
                            max_command_count, signature_desc->ByteStride));
                }
                else
                {
                    VK_CALL(vkCmdDrawIndexedIndirect(list->vk_command_buffer, arg_impl->res.vk_buffer,
                            arg_buffer_offset + arg_impl->mem.offset, max_command_count, signature_desc->ByteStride));
                }
                break;

            case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH:
                if (max_command_count != 1)
                    FIXME("Ignoring command count %u.\n", max_command_count);

                if (count_buffer)
                {
                    FIXME_ONCE("Count buffers not supported for indirect dispatch.\n");
                    break;
                }

                if (!d3d12_command_list_update_compute_state(list))
                {
                    WARN("Failed to update compute state, ignoring dispatch.\n");
                    break;
                }

                VK_CALL(vkCmdDispatchIndirect(list->vk_command_buffer, scratch.buffer, scratch.offset));
                break;

            case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH_RAYS:
                if (max_command_count != 1)
                    FIXME("Ignoring command count %u.\n", max_command_count);

                if (count_buffer)
                {
                    FIXME_ONCE("Count buffers not supported for indirect ray dispatch.\n");
                    break;
                }

                if (!d3d12_command_list_update_raygen_state(list))
                {
                    WARN("Failed to update raygen state, ignoring ray dispatch.\n");
                    break;
                }

                if (!list->device->device_info.ray_tracing_maintenance1_features.rayTracingPipelineTraceRaysIndirect2)
                {
                    WARN("TraceRaysIndirect2 is not supported, ignoring ray dispatch.\n");
                    break;
                }

                VK_CALL(vkCmdTraceRaysIndirect2KHR(list->vk_command_buffer, scratch.va));
                break;

            default:
                FIXME("Ignoring unhandled argument type %#x.\n", arg_desc->Type);
                break;
        }
    }

    VKD3D_BREADCRUMB_COMMAND(EXECUTE_INDIRECT);
}

static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT(d3d12_command_list_iface *iface,
        ID3D12Resource *dst_buffer, UINT64 dst_offset,
        ID3D12Resource *src_buffer, UINT64 src_offset,
        UINT dependent_resource_count, ID3D12Resource * const *dependent_resources,
        const D3D12_SUBRESOURCE_RANGE_UINT64 *dependent_sub_resource_ranges)
{
    FIXME("iface %p, dst_resource %p, dst_offset %#"PRIx64", src_resource %p, "
            "src_offset %#"PRIx64", dependent_resource_count %u, "
            "dependent_resources %p, dependent_sub_resource_ranges %p stub!\n",
            iface, dst_buffer, dst_offset, src_buffer, src_offset,
            dependent_resource_count, dependent_resources, dependent_sub_resource_ranges);
}

static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(d3d12_command_list_iface *iface,
        ID3D12Resource *dst_buffer, UINT64 dst_offset,
        ID3D12Resource *src_buffer, UINT64 src_offset,
        UINT dependent_resource_count, ID3D12Resource * const *dependent_resources,
        const D3D12_SUBRESOURCE_RANGE_UINT64 *dependent_sub_resource_ranges)
{
    FIXME("iface %p, dst_resource %p, dst_offset %#"PRIx64", src_resource %p, "
            "src_offset %#"PRIx64", dependent_resource_count %u, "
            "dependent_resources %p, dependent_sub_resource_ranges %p stub!\n",
            iface, dst_buffer, dst_offset, src_buffer, src_offset,
            dependent_resource_count, dependent_resources, dependent_sub_resource_ranges);
}

static void STDMETHODCALLTYPE d3d12_command_list_OMSetDepthBounds(d3d12_command_list_iface *iface,
        FLOAT min, FLOAT max)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    struct vkd3d_dynamic_state *dyn_state = &list->dynamic_state;

    TRACE("iface %p, min %.8e, max %.8e.\n", iface, min, max);

    if (dyn_state->min_depth_bounds != min || dyn_state->max_depth_bounds != max)
    {
        dyn_state->min_depth_bounds = min;
        dyn_state->max_depth_bounds = max;
        dyn_state->dirty_flags |= VKD3D_DYNAMIC_STATE_DEPTH_BOUNDS;
    }
}

static void STDMETHODCALLTYPE d3d12_command_list_SetSamplePositions(d3d12_command_list_iface *iface,
        UINT sample_count, UINT pixel_count, D3D12_SAMPLE_POSITION *sample_positions)
{
    FIXME("iface %p, sample_count %u, pixel_count %u, sample_positions %p stub!\n",
            iface, sample_count, pixel_count, sample_positions);
}

static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresourceRegion(d3d12_command_list_iface *iface,
        ID3D12Resource *dst, UINT dst_sub_resource_idx, UINT dst_x, UINT dst_y,
        ID3D12Resource *src, UINT src_sub_resource_idx,
        D3D12_RECT *src_rect, DXGI_FORMAT format, D3D12_RESOLVE_MODE mode)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    struct d3d12_resource *dst_resource, *src_resource;
    VkImageSubresourceLayers src_subresource;
    VkImageSubresourceLayers dst_subresource;
    VkOffset3D src_offset;
    VkOffset3D dst_offset;
    VkExtent3D extent;

    TRACE("iface %p, dst_resource %p, dst_sub_resource_idx %u, "
            "dst_x %u, dst_y %u, src_resource %p, src_sub_resource_idx %u, "
            "src_rect %p, format %#x, mode %#x!\n",
            iface, dst, dst_sub_resource_idx, dst_x, dst_y,
            src, src_sub_resource_idx, src_rect, format, mode);

    dst_resource = impl_from_ID3D12Resource(dst);
    src_resource = impl_from_ID3D12Resource(src);

    assert(d3d12_resource_is_texture(dst_resource));
    assert(d3d12_resource_is_texture(src_resource));

    vk_image_subresource_layers_from_d3d12(&src_subresource,
            src_resource->format, src_sub_resource_idx,
            src_resource->desc.MipLevels,
            d3d12_resource_desc_get_layer_count(&src_resource->desc));
    vk_image_subresource_layers_from_d3d12(&dst_subresource,
            dst_resource->format, dst_sub_resource_idx,
            dst_resource->desc.MipLevels,
            d3d12_resource_desc_get_layer_count(&dst_resource->desc));

    if (src_rect)
    {
        src_offset.x = src_rect->left;
        src_offset.y = src_rect->top;
        src_offset.z = 0;
        extent.width = src_rect->right - src_rect->left;
        extent.height = src_rect->bottom - src_rect->top;
        extent.depth = 1;
    }
    else
    {
        memset(&src_offset, 0, sizeof(src_offset));
        vk_extent_3d_from_d3d12_miplevel(&extent, &src_resource->desc, src_subresource.mipLevel);
    }

    dst_offset.x = (int32_t)dst_x;
    dst_offset.y = (int32_t)dst_y;
    dst_offset.z = 0;

    if (mode == D3D12_RESOLVE_MODE_AVERAGE || mode == D3D12_RESOLVE_MODE_MIN || mode == D3D12_RESOLVE_MODE_MAX)
    {
        VkImageResolve2KHR vk_image_resolve;
        vk_image_resolve.sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2_KHR;
        vk_image_resolve.pNext = NULL;
        vk_image_resolve.srcSubresource = src_subresource;
        vk_image_resolve.dstSubresource = dst_subresource;
        vk_image_resolve.extent = extent;
        vk_image_resolve.srcOffset = src_offset;
        vk_image_resolve.dstOffset = dst_offset;
        d3d12_command_list_resolve_subresource(list, dst_resource, src_resource, &vk_image_resolve, format, mode);
    }
    else if (mode == D3D12_RESOLVE_MODE_DECOMPRESS)
    {
        /* This is a glorified copy path. The region can overlap fully, in which case we have an in-place decompress.
         * Do nothing here. We can copy within a subresource, in which case we enter GENERAL layout.
         * Otherwise, this can always map to vkCmdCopyImage2KHR, except for DEPTH -> COLOR copy.
         * In this case, just use the fallback paths as is. */
        bool writes_full_subresource;
        bool overlapping_subresource;
        VkImageCopy2KHR image_copy;

        overlapping_subresource = dst_resource == src_resource && dst_sub_resource_idx == src_sub_resource_idx;

        /* In place DECOMPRESS. No-op. */
        if (overlapping_subresource && memcmp(&src_offset, &dst_offset, sizeof(VkOffset3D)) == 0)
            return;

        /* Cannot discard if we're copying in-place. */
        writes_full_subresource = !overlapping_subresource &&
                d3d12_image_copy_writes_full_subresource(dst_resource,
                        &extent, &dst_subresource);

        d3d12_command_list_track_resource_usage(list, src_resource, true);
        d3d12_command_list_track_resource_usage(list, dst_resource, !writes_full_subresource);

        image_copy.sType = VK_STRUCTURE_TYPE_IMAGE_COPY_2_KHR;
        image_copy.pNext = NULL;
        image_copy.srcSubresource = src_subresource;
        image_copy.dstSubresource = dst_subresource;
        image_copy.srcOffset = src_offset;
        image_copy.dstOffset = dst_offset;
        image_copy.extent = extent;

        d3d12_command_list_copy_image(list, dst_resource, dst_resource->format,
                src_resource, src_resource->format, &image_copy,
                writes_full_subresource, overlapping_subresource);
    }
    else
    {
        /* The "weird" resolve modes like sampler feedback encode/decode, etc. */
        FIXME("Unsupported resolve mode: %u.\n", mode);
    }
}

static void STDMETHODCALLTYPE d3d12_command_list_SetViewInstanceMask(d3d12_command_list_iface *iface, UINT mask)
{
    FIXME("iface %p, mask %#x stub!\n", iface, mask);
}

static bool vk_pipeline_stage_from_wbi_mode(D3D12_WRITEBUFFERIMMEDIATE_MODE mode, VkPipelineStageFlagBits *stage)
{
    switch (mode)
    {
        case D3D12_WRITEBUFFERIMMEDIATE_MODE_DEFAULT:
            *stage = VK_PIPELINE_STAGE_TRANSFER_BIT;
            return true;

        case D3D12_WRITEBUFFERIMMEDIATE_MODE_MARKER_IN:
            *stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
            return true;

        case D3D12_WRITEBUFFERIMMEDIATE_MODE_MARKER_OUT:
            *stage = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
            return true;

        default:
            return false;
    }
}

static void STDMETHODCALLTYPE d3d12_command_list_WriteBufferImmediate(d3d12_command_list_iface *iface,
        UINT count, const D3D12_WRITEBUFFERIMMEDIATE_PARAMETER *parameters,
        const D3D12_WRITEBUFFERIMMEDIATE_MODE *modes)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
    const struct vkd3d_unique_resource *resource;
    D3D12_WRITEBUFFERIMMEDIATE_MODE mode;
    bool flush_after, flush_before;
    VkPipelineStageFlagBits stage;
    uint32_t dword_buffer[64];
    VkDeviceSize curr_offset;
    unsigned int dword_count;
    VkBuffer curr_buffer;
    VkDeviceSize offset;
    unsigned int i;

    TRACE("iface %p, count %u, parameters %p, modes %p.\n", iface, count, parameters, modes);

    curr_buffer = VK_NULL_HANDLE;
    curr_offset = 0;
    dword_count = 0;

    for (i = 0; i < count; ++i)
    {
        if (!(resource = vkd3d_va_map_deref(&list->device->memory_allocator.va_map, parameters[i].Dest)))
        {
            d3d12_command_list_mark_as_invalid(list, "Invalid target address %p.\n", parameters[i].Dest);
            return;
        }

        offset = parameters[i].Dest - resource->va;
        mode = modes ? modes[i] : D3D12_WRITEBUFFERIMMEDIATE_MODE_DEFAULT;

        if (!vk_pipeline_stage_from_wbi_mode(mode, &stage))
        {
            d3d12_command_list_mark_as_invalid(list, "Invalid WBI mode %u.\n", mode);
            return;
        }

        /* MODE_DEFAULT behaves like a normal transfer operation, and some games
         * use this to update large parts of a buffer, so try to batch consecutive
         * writes. Ignore marker semantics if AMD_buffer_marker is not supported
         * since we cannot implement them in a useful way otherwise. */
        if (mode == D3D12_WRITEBUFFERIMMEDIATE_MODE_DEFAULT || !list->device->vk_info.AMD_buffer_marker)
        {
            d3d12_command_list_end_current_render_pass(list, true);

            flush_before = resource->vk_buffer != curr_buffer ||
                    offset != curr_offset + dword_count * sizeof(uint32_t);

            if (flush_before)
            {
                if (dword_count)
                {
                    d3d12_command_list_mark_copy_buffer_write(list, curr_buffer,
                            curr_offset, dword_count * sizeof(uint32_t), false);

                    VK_CALL(vkCmdUpdateBuffer(list->vk_command_buffer, curr_buffer,
                            curr_offset, dword_count * sizeof(uint32_t), dword_buffer));
                }

                curr_buffer = resource->vk_buffer;
                curr_offset = offset;
                dword_count = 0;
            }

            dword_buffer[dword_count++] = parameters[i].Value;

            /* Record batched writes if the buffer is full or if we're at the
             * end of the list, or if the next write has marker semantics. */
            flush_after = dword_count == ARRAY_SIZE(dword_buffer) || i + 1 == count ||
                    (modes && modes[i + 1] != mode && list->device->vk_info.AMD_buffer_marker);

            if (flush_after)
            {
                d3d12_command_list_mark_copy_buffer_write(list, curr_buffer,
                        curr_offset, dword_count * sizeof(uint32_t), false);

                VK_CALL(vkCmdUpdateBuffer(list->vk_command_buffer, curr_buffer,
                        curr_offset, dword_count * sizeof(uint32_t), dword_buffer));

                curr_buffer = VK_NULL_HANDLE;
                curr_offset = 0;
                dword_count = 0;
            }
        }
        else
        {
            VK_CALL(vkCmdWriteBufferMarkerAMD(list->vk_command_buffer, stage,
                    resource->vk_buffer, offset, parameters[i].Value));
        }
    }

    VKD3D_BREADCRUMB_COMMAND(WBI);
}

static void STDMETHODCALLTYPE d3d12_command_list_SetProtectedResourceSession(d3d12_command_list_iface *iface,
        ID3D12ProtectedResourceSession *protected_session)
{
    FIXME("iface %p, protected_session %p stub!\n", iface, protected_session);
}

static void STDMETHODCALLTYPE d3d12_command_list_BeginRenderPass(d3d12_command_list_iface *iface,
        UINT rt_count, const D3D12_RENDER_PASS_RENDER_TARGET_DESC *render_targets,
        const D3D12_RENDER_PASS_DEPTH_STENCIL_DESC *depth_stencil, D3D12_RENDER_PASS_FLAGS flags)
{
    FIXME("iface %p, rt_count %u, render_targets %p, depth_stencil %p, flags %#x stub!\n",
            iface, rt_count, render_targets, depth_stencil, flags);
}

static void STDMETHODCALLTYPE d3d12_command_list_EndRenderPass(d3d12_command_list_iface *iface)
{
    FIXME("iface %p stub!\n", iface);
}

static void STDMETHODCALLTYPE d3d12_command_list_InitializeMetaCommand(d3d12_command_list_iface *iface,
        ID3D12MetaCommand *meta_command, const void *parameter_data, SIZE_T parameter_size)
{
    FIXME("iface %p, meta_command %p, parameter_data %p, parameter_size %lu stub!\n",
            iface, meta_command, parameter_data, parameter_size);
}

static void STDMETHODCALLTYPE d3d12_command_list_ExecuteMetaCommand(d3d12_command_list_iface *iface,
        ID3D12MetaCommand *meta_command, const void *parameter_data, SIZE_T parameter_size)
{
    FIXME("iface %p, meta_command %p, parameter_data %p, parameter_size %lu stub!\n",
            iface, meta_command, parameter_data, parameter_size);
}

static void STDMETHODCALLTYPE d3d12_command_list_BuildRaytracingAccelerationStructure(d3d12_command_list_iface *iface,
        const D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC *desc, UINT num_postbuild_info_descs,
        const D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_DESC *postbuild_info_descs)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
    struct vkd3d_acceleration_structure_build_info build_info;

    TRACE("iface %p, desc %p, num_postbuild_info_descs %u, postbuild_info_descs %p\n",
            iface, desc, num_postbuild_info_descs, postbuild_info_descs);

    if (!d3d12_device_supports_ray_tracing_tier_1_0(list->device))
    {
        WARN("Acceleration structure is not supported. Calling this is invalid.\n");
        return;
    }

    if (!vkd3d_acceleration_structure_convert_inputs(list->device, &build_info, &desc->Inputs))
    {
        ERR("Failed to convert inputs.\n");
        return;
    }

    if (desc->DestAccelerationStructureData)
    {
        build_info.build_info.dstAccelerationStructure =
                vkd3d_va_map_place_acceleration_structure(&list->device->memory_allocator.va_map,
                        list->device, desc->DestAccelerationStructureData);
        if (build_info.build_info.dstAccelerationStructure == VK_NULL_HANDLE)
        {
            ERR("Failed to place destAccelerationStructure. Dropping call.\n");
            return;
        }
    }

    if (build_info.build_info.mode == VK_BUILD_ACCELERATION_STRUCTURE_MODE_UPDATE_KHR &&
            desc->SourceAccelerationStructureData)
    {
        build_info.build_info.srcAccelerationStructure =
                vkd3d_va_map_place_acceleration_structure(&list->device->memory_allocator.va_map,
                        list->device, desc->SourceAccelerationStructureData);
        if (build_info.build_info.srcAccelerationStructure == VK_NULL_HANDLE)
        {
            ERR("Failed to place srcAccelerationStructure. Dropping call.\n");
            return;
        }
    }

    build_info.build_info.scratchData.deviceAddress = desc->ScratchAccelerationStructureData;

    d3d12_command_list_end_current_render_pass(list, true);
    VK_CALL(vkCmdBuildAccelerationStructuresKHR(list->vk_command_buffer, 1,
            &build_info.build_info, build_info.build_range_ptrs));

    vkd3d_acceleration_structure_build_info_cleanup(&build_info);

    if (num_postbuild_info_descs)
    {
        vkd3d_acceleration_structure_emit_immediate_postbuild_info(list,
                num_postbuild_info_descs, postbuild_info_descs,
                build_info.build_info.dstAccelerationStructure);
    }

    VKD3D_BREADCRUMB_COMMAND(BUILD_RTAS);
}

static void STDMETHODCALLTYPE d3d12_command_list_EmitRaytracingAccelerationStructurePostbuildInfo(d3d12_command_list_iface *iface,
        const D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_DESC *desc, UINT num_acceleration_structures,
        const D3D12_GPU_VIRTUAL_ADDRESS *src_data)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    TRACE("iface %p, desc %p, num_acceleration_structures %u, src_data %p\n",
            iface, desc, num_acceleration_structures, src_data);

    if (!d3d12_device_supports_ray_tracing_tier_1_0(list->device))
    {
        WARN("Acceleration structure is not supported. Calling this is invalid.\n");
        return;
    }

    d3d12_command_list_end_current_render_pass(list, true);
    vkd3d_acceleration_structure_emit_postbuild_info(list,
            desc, num_acceleration_structures, src_data);

    VKD3D_BREADCRUMB_COMMAND(EMIT_RTAS_POSTBUILD);
}

static void STDMETHODCALLTYPE d3d12_command_list_CopyRaytracingAccelerationStructure(d3d12_command_list_iface *iface,
        D3D12_GPU_VIRTUAL_ADDRESS dst_data, D3D12_GPU_VIRTUAL_ADDRESS src_data,
        D3D12_RAYTRACING_ACCELERATION_STRUCTURE_COPY_MODE mode)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);

    TRACE("iface %p, dst_data %#"PRIx64", src_data %#"PRIx64", mode %u\n",
          iface, dst_data, src_data, mode);

    if (!d3d12_device_supports_ray_tracing_tier_1_0(list->device))
    {
        WARN("Acceleration structure is not supported. Calling this is invalid.\n");
        return;
    }

    d3d12_command_list_end_current_render_pass(list, true);
    vkd3d_acceleration_structure_copy(list, dst_data, src_data, mode);

    VKD3D_BREADCRUMB_COMMAND(COPY_RTAS);
}

static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState1(d3d12_command_list_iface *iface,
        ID3D12StateObject *state_object)
{
    struct d3d12_state_object *state = impl_from_ID3D12StateObject(state_object);
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    TRACE("iface %p, state_object %p\n", iface, state_object);

    if (list->rt_state == state)
        return;

    d3d12_command_list_invalidate_current_pipeline(list, false);
    /* SetPSO and SetPSO1 alias the same internal active pipeline state even if they are completely different types. */
    list->state = NULL;
    list->rt_state = state;

    /* DXR uses compute bind points for descriptors. When binding an RTPSO, invalidate all state
     * to make sure we broadcast state correctly to COMPUTE or RT bind points in Vulkan. */
    if (list->active_bind_point != VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR)
    {
        list->active_bind_point = VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR;
        d3d12_command_list_invalidate_root_parameters(list, VK_PIPELINE_BIND_POINT_COMPUTE, true);
    }

#ifdef VKD3D_ENABLE_BREADCRUMBS
    if ((vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS) && state)
    {
        struct vkd3d_breadcrumb_command cmd;
        size_t i;

        for (i = 0; i < state->breadcrumb_shaders_count; i++)
        {
            cmd.type = VKD3D_BREADCRUMB_COMMAND_SET_SHADER_HASH;
            cmd.shader.stage = state->breadcrumb_shaders[i].stage;
            cmd.shader.hash = state->breadcrumb_shaders[i].hash;
            vkd3d_breadcrumb_tracer_add_command(list, &cmd);

            cmd.type = VKD3D_BREADCRUMB_COMMAND_TAG;
            cmd.tag = state->breadcrumb_shaders[i].name;
            vkd3d_breadcrumb_tracer_add_command(list, &cmd);
        }
    }
#endif
}

static VkStridedDeviceAddressRegionKHR convert_strided_range(
        const D3D12_GPU_VIRTUAL_ADDRESS_RANGE_AND_STRIDE *region)
{
    VkStridedDeviceAddressRegionKHR table;
    table.deviceAddress = region->StartAddress;
    table.size = region->SizeInBytes;
    table.stride = region->StrideInBytes;
    return table;
}

static void STDMETHODCALLTYPE d3d12_command_list_DispatchRays(d3d12_command_list_iface *iface,
        const D3D12_DISPATCH_RAYS_DESC *desc)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
    VkStridedDeviceAddressRegionKHR callable_table;
    VkStridedDeviceAddressRegionKHR raygen_table;
    VkStridedDeviceAddressRegionKHR miss_table;
    VkStridedDeviceAddressRegionKHR hit_table;

    TRACE("iface %p, desc %p\n", iface, desc);

    if (!d3d12_device_supports_ray_tracing_tier_1_0(list->device))
    {
        WARN("Ray tracing is not supported. Calling this is invalid.\n");
        return;
    }

    raygen_table.deviceAddress = desc->RayGenerationShaderRecord.StartAddress;
    raygen_table.size = desc->RayGenerationShaderRecord.SizeInBytes;
    raygen_table.stride = raygen_table.size;
    miss_table = convert_strided_range(&desc->MissShaderTable);
    hit_table = convert_strided_range(&desc->HitGroupTable);
    callable_table = convert_strided_range(&desc->CallableShaderTable);

    if (!d3d12_command_list_update_raygen_state(list))
    {
        WARN("Failed to update raygen state, ignoring dispatch.\n");
        return;
    }

    /* TODO: Is DispatchRays predicated? */
    VK_CALL(vkCmdTraceRaysKHR(list->vk_command_buffer,
            &raygen_table, &miss_table, &hit_table, &callable_table,
            desc->Width, desc->Height, desc->Depth));

    VKD3D_BREADCRUMB_AUX32(desc->Width);
    VKD3D_BREADCRUMB_AUX32(desc->Height);
    VKD3D_BREADCRUMB_AUX32(desc->Depth);
    VKD3D_BREADCRUMB_AUX64(raygen_table.deviceAddress);
    VKD3D_BREADCRUMB_AUX64(raygen_table.size);
    VKD3D_BREADCRUMB_AUX32(raygen_table.stride);
    VKD3D_BREADCRUMB_AUX64(miss_table.deviceAddress);
    VKD3D_BREADCRUMB_AUX64(miss_table.size);
    VKD3D_BREADCRUMB_AUX32(miss_table.stride);
    VKD3D_BREADCRUMB_AUX64(hit_table.deviceAddress);
    VKD3D_BREADCRUMB_AUX64(hit_table.size);
    VKD3D_BREADCRUMB_AUX32(hit_table.stride);
    VKD3D_BREADCRUMB_AUX64(callable_table.deviceAddress);
    VKD3D_BREADCRUMB_AUX64(callable_table.size);
    VKD3D_BREADCRUMB_AUX32(callable_table.stride);
    VKD3D_BREADCRUMB_COMMAND(TRACE_RAYS);
}

static VkFragmentShadingRateCombinerOpKHR vk_shading_rate_combiner_from_d3d12(D3D12_SHADING_RATE_COMBINER combiner)
{
    switch (combiner)
    {
        case D3D12_SHADING_RATE_COMBINER_PASSTHROUGH:
            return VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR;
        case D3D12_SHADING_RATE_COMBINER_OVERRIDE:
            return VK_FRAGMENT_SHADING_RATE_COMBINER_OP_REPLACE_KHR;
        case D3D12_SHADING_RATE_COMBINER_MAX:
            return VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MAX_KHR;
        case D3D12_SHADING_RATE_COMBINER_MIN:
            return VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MIN_KHR;
        case D3D12_SHADING_RATE_COMBINER_SUM:
            /* Undocumented log space */
            return VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MUL_KHR;
        default:
            ERR("Unhandled shading rate combiner %u.\n", combiner);
            /* Default to passthrough for unknown */
            return VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR;
    }
}

static uint32_t vk_fragment_size_from_d3d12(D3D12_AXIS_SHADING_RATE axis_rate)
{
    switch (axis_rate)
    {
        case D3D12_AXIS_SHADING_RATE_1X: return 1;
        case D3D12_AXIS_SHADING_RATE_2X: return 2;
        case D3D12_AXIS_SHADING_RATE_4X: return 4;
        default:
            ERR("Unhandled axis shading rate %u.\n", axis_rate);
            return 1;
    }
}

static void STDMETHODCALLTYPE d3d12_command_list_RSSetShadingRate(d3d12_command_list_iface *iface,
        D3D12_SHADING_RATE base, const D3D12_SHADING_RATE_COMBINER *combiners)
{
    VkFragmentShadingRateCombinerOpKHR combiner_ops[D3D12_RS_SET_SHADING_RATE_COMBINER_COUNT];
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    struct vkd3d_dynamic_state *dyn_state = &list->dynamic_state;
    VkExtent2D fragment_size;
    uint32_t i;

    TRACE("iface %p, base %#x, combiners %p\n", iface, base, combiners);

    fragment_size.width = vk_fragment_size_from_d3d12(D3D12_GET_COARSE_SHADING_RATE_X_AXIS(base));
    fragment_size.height = vk_fragment_size_from_d3d12(D3D12_GET_COARSE_SHADING_RATE_Y_AXIS(base));

    for (i = 0; i < D3D12_RS_SET_SHADING_RATE_COMBINER_COUNT; i++)
    {
        combiner_ops[i] = combiners ?
                vk_shading_rate_combiner_from_d3d12(combiners[i]) :
                VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR;
    }

    if (memcmp(&fragment_size, &dyn_state->fragment_shading_rate.fragment_size, sizeof(fragment_size)) != 0 ||
            memcmp(combiner_ops, dyn_state->fragment_shading_rate.combiner_ops, sizeof(combiner_ops)) != 0)
    {
        dyn_state->fragment_shading_rate.fragment_size = fragment_size;
        memcpy(dyn_state->fragment_shading_rate.combiner_ops, combiner_ops, sizeof(combiner_ops));
        dyn_state->dirty_flags |= VKD3D_DYNAMIC_STATE_FRAGMENT_SHADING_RATE;
    }
}

static void STDMETHODCALLTYPE d3d12_command_list_RSSetShadingRateImage(d3d12_command_list_iface *iface,
        ID3D12Resource *image)
{
    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
    struct d3d12_resource *vrs_image = impl_from_ID3D12Resource(image);

    TRACE("iface %p, image %p.\n", iface, image);

    /* Handle invalid images being set here. */
    if (vrs_image && !vrs_image->vrs_view)
    {
        WARN("RSSetShadingRateImage called with invalid resource for VRS.\n");
        vrs_image = NULL;
    }

    if (vrs_image == list->vrs_image)
        return;

    d3d12_command_list_invalidate_rendering_info(list);

    /* Need to end the renderpass if we have one to make
     * way for the new VRS attachment */
    if (list->rendering_info.state_flags & (VKD3D_RENDERING_ACTIVE | VKD3D_RENDERING_SUSPENDED))
        d3d12_command_list_end_current_render_pass(list, false);

    if (vrs_image)
        d3d12_command_list_track_resource_usage(list, vrs_image, true);

    list->vrs_image = vrs_image;
}

static void STDMETHODCALLTYPE d3d12_command_list_DispatchMesh(d3d12_command_list_iface *iface, UINT x, UINT y, UINT z)
{
    FIXME("iface %p, x %u, y %u, z %u stub!", iface, x, y, z);
}

static CONST_VTBL struct ID3D12GraphicsCommandList6Vtbl d3d12_command_list_vtbl =
{
    /* IUnknown methods */
    d3d12_command_list_QueryInterface,
    d3d12_command_list_AddRef,
    d3d12_command_list_Release,
    /* ID3D12Object methods */
    d3d12_command_list_GetPrivateData,
    d3d12_command_list_SetPrivateData,
    d3d12_command_list_SetPrivateDataInterface,
    (void *)d3d12_object_SetName,
    /* ID3D12DeviceChild methods */
    d3d12_command_list_GetDevice,
    /* ID3D12CommandList methods */
    d3d12_command_list_GetType,
    /* ID3D12GraphicsCommandList methods */
    d3d12_command_list_Close,
    d3d12_command_list_Reset,
    d3d12_command_list_ClearState,
    d3d12_command_list_DrawInstanced,
    d3d12_command_list_DrawIndexedInstanced,
    d3d12_command_list_Dispatch,
    d3d12_command_list_CopyBufferRegion,
    d3d12_command_list_CopyTextureRegion,
    d3d12_command_list_CopyResource,
    d3d12_command_list_CopyTiles,
    d3d12_command_list_ResolveSubresource,
    d3d12_command_list_IASetPrimitiveTopology,
    d3d12_command_list_RSSetViewports,
    d3d12_command_list_RSSetScissorRects,
    d3d12_command_list_OMSetBlendFactor,
    d3d12_command_list_OMSetStencilRef,
    d3d12_command_list_SetPipelineState,
    d3d12_command_list_ResourceBarrier,
    d3d12_command_list_ExecuteBundle,
    d3d12_command_list_SetDescriptorHeaps,
    d3d12_command_list_SetComputeRootSignature,
    d3d12_command_list_SetGraphicsRootSignature,
    d3d12_command_list_SetComputeRootDescriptorTable,
    d3d12_command_list_SetGraphicsRootDescriptorTable,
    d3d12_command_list_SetComputeRoot32BitConstant,
    d3d12_command_list_SetGraphicsRoot32BitConstant,
    d3d12_command_list_SetComputeRoot32BitConstants,
    d3d12_command_list_SetGraphicsRoot32BitConstants,
    d3d12_command_list_SetComputeRootConstantBufferView,
    d3d12_command_list_SetGraphicsRootConstantBufferView,
    d3d12_command_list_SetComputeRootShaderResourceView,
    d3d12_command_list_SetGraphicsRootShaderResourceView,
    d3d12_command_list_SetComputeRootUnorderedAccessView,
    d3d12_command_list_SetGraphicsRootUnorderedAccessView,
    d3d12_command_list_IASetIndexBuffer,
    d3d12_command_list_IASetVertexBuffers,
    d3d12_command_list_SOSetTargets,
    d3d12_command_list_OMSetRenderTargets,
    d3d12_command_list_ClearDepthStencilView,
    d3d12_command_list_ClearRenderTargetView,
    d3d12_command_list_ClearUnorderedAccessViewUint,
    d3d12_command_list_ClearUnorderedAccessViewFloat,
    d3d12_command_list_DiscardResource,
    d3d12_command_list_BeginQuery,
    d3d12_command_list_EndQuery,
    d3d12_command_list_ResolveQueryData,
    d3d12_command_list_SetPredication,
    d3d12_command_list_SetMarker,
    d3d12_command_list_BeginEvent,
    d3d12_command_list_EndEvent,
    d3d12_command_list_ExecuteIndirect,
    /* ID3D12GraphicsCommandList1 methods */
    d3d12_command_list_AtomicCopyBufferUINT,
    d3d12_command_list_AtomicCopyBufferUINT64,
    d3d12_command_list_OMSetDepthBounds,
    d3d12_command_list_SetSamplePositions,
    d3d12_command_list_ResolveSubresourceRegion,
    d3d12_command_list_SetViewInstanceMask,
    /* ID3D12GraphicsCommandList2 methods */
    d3d12_command_list_WriteBufferImmediate,
    /* ID3D12GraphicsCommandList3 methods */
    d3d12_command_list_SetProtectedResourceSession,
    /* ID3D12GraphicsCommandList4 methods */
    d3d12_command_list_BeginRenderPass,
    d3d12_command_list_EndRenderPass,
    d3d12_command_list_InitializeMetaCommand,
    d3d12_command_list_ExecuteMetaCommand,
    d3d12_command_list_BuildRaytracingAccelerationStructure,
    d3d12_command_list_EmitRaytracingAccelerationStructurePostbuildInfo,
    d3d12_command_list_CopyRaytracingAccelerationStructure,
    d3d12_command_list_SetPipelineState1,
    d3d12_command_list_DispatchRays,
    /* ID3D12GraphicsCommandList5 methods */
    d3d12_command_list_RSSetShadingRate,
    d3d12_command_list_RSSetShadingRateImage,
    /* ID3D12GraphicsCommandList6 methods */
    d3d12_command_list_DispatchMesh,
};

#ifdef VKD3D_ENABLE_PROFILING
#include "command_list_profiled.h"
#endif

static struct d3d12_command_list *unsafe_impl_from_ID3D12CommandList(ID3D12CommandList *iface)
{
    if (!iface)
        return NULL;
#ifdef VKD3D_ENABLE_PROFILING
    assert(iface->lpVtbl == (struct ID3D12CommandListVtbl *)&d3d12_command_list_vtbl ||
           iface->lpVtbl == (struct ID3D12CommandListVtbl *)&d3d12_command_list_vtbl_profiled);
#else
    assert(iface->lpVtbl == (struct ID3D12CommandListVtbl *)&d3d12_command_list_vtbl);
#endif
    return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList_iface);
}

extern CONST_VTBL struct ID3D12GraphicsCommandListExtVtbl d3d12_command_list_vkd3d_ext_vtbl;

static void d3d12_command_list_init_attachment_info(VkRenderingAttachmentInfoKHR *attachment_info)
{
    attachment_info->sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO_KHR;
    attachment_info->loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
    attachment_info->storeOp = VK_ATTACHMENT_STORE_OP_STORE;
}

static void d3d12_command_list_init_rendering_info(struct d3d12_device *device, struct vkd3d_rendering_info *rendering_info)
{
    unsigned int i;

    rendering_info->info.sType = VK_STRUCTURE_TYPE_RENDERING_INFO_KHR;
    rendering_info->info.colorAttachmentCount = D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT;
    rendering_info->info.pColorAttachments = rendering_info->rtv;

    for (i = 0; i < D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT; i++)
        d3d12_command_list_init_attachment_info(&rendering_info->rtv[i]);

    d3d12_command_list_init_attachment_info(&rendering_info->dsv);

    if (device->device_info.fragment_shading_rate_features.attachmentFragmentShadingRate)
    {
        uint32_t tile_size = d3d12_determine_shading_rate_image_tile_size(device);

        if (tile_size)
        {
            rendering_info->vrs.sType = VK_STRUCTURE_TYPE_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR;
            rendering_info->vrs.shadingRateAttachmentTexelSize.width = tile_size;
            rendering_info->vrs.shadingRateAttachmentTexelSize.height = tile_size;

            vk_prepend_struct(&rendering_info->info, &rendering_info->vrs);
        }
    }
}

static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d3d12_device *device,
        D3D12_COMMAND_LIST_TYPE type)
{
    HRESULT hr;

    memset(list, 0, sizeof(*list));

#ifdef VKD3D_ENABLE_PROFILING
    if (vkd3d_uses_profiling())
        list->ID3D12GraphicsCommandList_iface.lpVtbl = &d3d12_command_list_vtbl_profiled;
    else
        list->ID3D12GraphicsCommandList_iface.lpVtbl = &d3d12_command_list_vtbl;
#else
    list->ID3D12GraphicsCommandList_iface.lpVtbl = &d3d12_command_list_vtbl;
#endif

    list->refcount = 1;

    list->type = type;

    list->ID3D12GraphicsCommandListExt_iface.lpVtbl = &d3d12_command_list_vkd3d_ext_vtbl;

    d3d12_command_list_init_rendering_info(device, &list->rendering_info);

    if (FAILED(hr = vkd3d_private_store_init(&list->private_store)))
        return hr;

    d3d12_device_add_ref(list->device = device);
    return hr;
}

HRESULT d3d12_command_list_create(struct d3d12_device *device,
        UINT node_mask, D3D12_COMMAND_LIST_TYPE type, struct d3d12_command_list **list)
{
    struct d3d12_command_list *object;
    HRESULT hr;

    debug_ignored_node_mask(node_mask);

    /* We store RTV descriptors by value, which we align to 64 bytes, so d3d12_command_list inherits this requirement.
     * Otherwise ubsan complains. */
    if (!(object = vkd3d_malloc_aligned(sizeof(*object), D3D12_DESC_ALIGNMENT)))
        return E_OUTOFMEMORY;

    if (FAILED(hr = d3d12_command_list_init(object, device, type)))
    {
        vkd3d_free_aligned(object);
        return hr;
    }

    TRACE("Created command list %p.\n", object);

    *list = object;

    return S_OK;
}

static struct d3d12_command_list *d3d12_command_list_from_iface(ID3D12CommandList *iface)
{
    bool is_valid = false;
    if (!iface)
        return NULL;

#ifdef VKD3D_ENABLE_PROFILING
    is_valid |= iface->lpVtbl == (struct ID3D12CommandListVtbl *)&d3d12_command_list_vtbl_profiled;
#endif
    is_valid |= iface->lpVtbl == (struct ID3D12CommandListVtbl *)&d3d12_command_list_vtbl;

    if (!is_valid)
        return NULL;

    return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList_iface);
}

/* ID3D12CommandQueue */
static inline struct d3d12_command_queue *impl_from_ID3D12CommandQueue(ID3D12CommandQueue *iface)
{
    return CONTAINING_RECORD(iface, struct d3d12_command_queue, ID3D12CommandQueue_iface);
}

static HRESULT STDMETHODCALLTYPE d3d12_command_queue_QueryInterface(ID3D12CommandQueue *iface,
        REFIID riid, void **object)
{
    TRACE("iface %p, riid %s, object %p.\n", iface, debugstr_guid(riid), object);

    if (IsEqualGUID(riid, &IID_ID3D12CommandQueue)
            || IsEqualGUID(riid, &IID_ID3D12Pageable)
            || IsEqualGUID(riid, &IID_ID3D12DeviceChild)
            || IsEqualGUID(riid, &IID_ID3D12Object)
            || IsEqualGUID(riid, &IID_IUnknown))
    {
        ID3D12CommandQueue_AddRef(iface);
        *object = iface;
        return S_OK;
    }

#ifdef VKD3D_BUILD_STANDALONE_D3D12
    if (IsEqualGUID(riid, &IID_IWineDXGISwapChainFactory))
    {
        struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
        IWineDXGISwapChainFactory_AddRef(&command_queue->swapchain_factory.IWineDXGISwapChainFactory_iface);
        *object = &command_queue->swapchain_factory;
        return S_OK;
    }
#endif

    WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(riid));

    *object = NULL;
    return E_NOINTERFACE;
}

static ULONG STDMETHODCALLTYPE d3d12_command_queue_AddRef(ID3D12CommandQueue *iface)
{
    struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
    ULONG refcount = InterlockedIncrement(&command_queue->refcount);

    TRACE("%p increasing refcount to %u.\n", command_queue, refcount);

    return refcount;
}

static ULONG STDMETHODCALLTYPE d3d12_command_queue_Release(ID3D12CommandQueue *iface)
{
    struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
    ULONG refcount = InterlockedDecrement(&command_queue->refcount);

    TRACE("%p decreasing refcount to %u.\n", command_queue, refcount);

    if (!refcount)
    {
        struct d3d12_device *device = command_queue->device;

        vkd3d_private_store_destroy(&command_queue->private_store);

        d3d12_command_queue_submit_stop(command_queue);
        vkd3d_fence_worker_stop(&command_queue->fence_worker, device);
        d3d12_device_unmap_vkd3d_queue(device, command_queue->vkd3d_queue);
        pthread_join(command_queue->submission_thread, NULL);
        pthread_mutex_destroy(&command_queue->queue_lock);
        pthread_cond_destroy(&command_queue->queue_cond);

        vkd3d_free(command_queue->submissions);
        vkd3d_free(command_queue);

        d3d12_device_release(device);
    }

    return refcount;
}

static HRESULT STDMETHODCALLTYPE d3d12_command_queue_GetPrivateData(ID3D12CommandQueue *iface,
        REFGUID guid, UINT *data_size, void *data)
{
    struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);

    TRACE("iface %p, guid %s, data_size %p, data %p.\n", iface, debugstr_guid(guid), data_size, data);

    return vkd3d_get_private_data(&command_queue->private_store, guid, data_size, data);
}

static HRESULT STDMETHODCALLTYPE d3d12_command_queue_SetPrivateData(ID3D12CommandQueue *iface,
        REFGUID guid, UINT data_size, const void *data)
{
    struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);

    TRACE("iface %p, guid %s, data_size %u, data %p.\n", iface, debugstr_guid(guid), data_size, data);

    return vkd3d_set_private_data(&command_queue->private_store, guid, data_size, data,
            NULL, NULL);
}

static HRESULT STDMETHODCALLTYPE d3d12_command_queue_SetPrivateDataInterface(ID3D12CommandQueue *iface,
        REFGUID guid, const IUnknown *data)
{
    struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);

    TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data);

    return vkd3d_set_private_data_interface(&command_queue->private_store, guid, data,
            NULL, NULL);
}

static HRESULT STDMETHODCALLTYPE d3d12_command_queue_GetDevice(ID3D12CommandQueue *iface, REFIID iid, void **device)
{
    struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);

    TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device);

    return d3d12_device_query_interface(command_queue->device, iid, device);
}

static unsigned int vkd3d_get_tile_index_from_coordinate(const struct d3d12_sparse_info *sparse,
        const D3D12_TILED_RESOURCE_COORDINATE *coord)
{
    const D3D12_SUBRESOURCE_TILING *tiling = &sparse->tilings[coord->Subresource];

    if (tiling->StartTileIndexInOverallResource == ~0u)
        return sparse->packed_mips.StartTileIndexInOverallResource + coord->X;

    return tiling->StartTileIndexInOverallResource + coord->X +
            tiling->WidthInTiles * (coord->Y + tiling->HeightInTiles * coord->Z);
}

static unsigned int vkd3d_get_tile_index_from_region(const struct d3d12_sparse_info *sparse,
        const D3D12_TILED_RESOURCE_COORDINATE *coord, const D3D12_TILE_REGION_SIZE *size,
        unsigned int tile_index_in_region)
{
    if (!size->UseBox)
    {
        /* Tiles are already ordered by subresource and coordinates correctly,
         * so we can just add the tile index to the region's base index */
        return vkd3d_get_tile_index_from_coordinate(sparse, coord) + tile_index_in_region;
    }
    else
    {
        D3D12_TILED_RESOURCE_COORDINATE box_coord = *coord;
        box_coord.X += (tile_index_in_region % size->Width);
        box_coord.Y += (tile_index_in_region / size->Width) % size->Height;
        box_coord.Z += (tile_index_in_region / (size->Width * size->Height));
        return vkd3d_get_tile_index_from_coordinate(sparse, &box_coord);
    }
}

static void STDMETHODCALLTYPE d3d12_command_queue_UpdateTileMappings(ID3D12CommandQueue *iface,
        ID3D12Resource *resource, UINT region_count, const D3D12_TILED_RESOURCE_COORDINATE *region_coords,
        const D3D12_TILE_REGION_SIZE *region_sizes, ID3D12Heap *heap, UINT range_count,
        const D3D12_TILE_RANGE_FLAGS *range_flags, UINT *heap_range_offsets, UINT *range_tile_counts,
        D3D12_TILE_MAPPING_FLAGS flags)
{
    struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
    unsigned int region_tile = 0, region_idx = 0, range_tile = 0, range_idx = 0;
    struct d3d12_resource *res = impl_from_ID3D12Resource(resource);
    struct d3d12_heap *memory_heap = impl_from_ID3D12Heap(heap);
    struct vkd3d_sparse_memory_bind *bind, **bound_tiles;
    struct d3d12_sparse_info *sparse = &res->sparse;
    D3D12_TILED_RESOURCE_COORDINATE region_coord;
    struct d3d12_command_queue_submission sub;
    D3D12_TILE_REGION_SIZE region_size;
    D3D12_TILE_RANGE_FLAGS range_flag;
    UINT range_size, range_offset;
    size_t bind_infos_size = 0;

    TRACE("iface %p, resource %p, region_count %u, region_coords %p, "
            "region_sizes %p, heap %p, range_count %u, range_flags %p, heap_range_offsets %p, "
            "range_tile_counts %p, flags %#x.\n",
            iface, resource, region_count, region_coords, region_sizes, heap,
            range_count, range_flags, heap_range_offsets, range_tile_counts, flags);

    if (!region_count || !range_count)
        return;

    sub.type = VKD3D_SUBMISSION_BIND_SPARSE;
    sub.bind_sparse.mode = VKD3D_SPARSE_MEMORY_BIND_MODE_UPDATE;
    sub.bind_sparse.bind_count = 0;
    sub.bind_sparse.bind_infos = NULL;
    sub.bind_sparse.dst_resource = res;
    sub.bind_sparse.src_resource = NULL;

    if (region_coords)
        region_coord = region_coords[0];
    else
    {
        region_coord.X = 0;
        region_coord.Y = 0;
        region_coord.Z = 0;
        region_coord.Subresource = 0;
    }

    if (region_sizes)
        region_size = region_sizes[0];
    else
    {
        region_size.NumTiles = region_coords ? 1 : sparse->tile_count;
        region_size.UseBox = false;
        region_size.Width = 0;
        region_size.Height = 0;
        region_size.Depth = 0;
    }

    range_flag = D3D12_TILE_RANGE_FLAG_NONE;
    range_size = ~0u;
    range_offset = 0;

    if (!(bound_tiles = vkd3d_calloc(sparse->tile_count, sizeof(*bound_tiles))))
    {
        ERR("Failed to allocate tile mapping table.\n");
        return;
    }

    while (region_idx < region_count && range_idx < range_count)
    {
        if (range_tile == 0)
        {
            if (range_flags)
                range_flag = range_flags[range_idx];

            if (range_tile_counts)
                range_size = range_tile_counts[range_idx];

            if (heap_range_offsets)
                range_offset = heap_range_offsets[range_idx];
        }

        if (region_tile == 0)
        {
            if (region_coords)
                region_coord = region_coords[region_idx];

            if (region_sizes)
                region_size = region_sizes[region_idx];
        }

        if (range_flag != D3D12_TILE_RANGE_FLAG_SKIP)
        {
            unsigned int tile_index = vkd3d_get_tile_index_from_region(sparse, &region_coord, &region_size, region_tile);

            if (!(bind = bound_tiles[tile_index]))
            {
                if (!vkd3d_array_reserve((void **)&sub.bind_sparse.bind_infos, &bind_infos_size,
                        sub.bind_sparse.bind_count + 1, sizeof(*sub.bind_sparse.bind_infos)))
                {
                    ERR("Failed to allocate bind info array.\n");
                    goto fail;
                }

                bind = &sub.bind_sparse.bind_infos[sub.bind_sparse.bind_count++];
                bound_tiles[tile_index] = bind;
            }

            bind->dst_tile = tile_index;
            bind->src_tile = 0;

            if (range_flag == D3D12_TILE_RANGE_FLAG_NULL)
            {
                bind->vk_memory = VK_NULL_HANDLE;
                bind->vk_offset = 0;
            }
            else
            {
                bind->vk_memory = memory_heap->allocation.device_allocation.vk_memory;
                bind->vk_offset = memory_heap->allocation.offset + VKD3D_TILE_SIZE * range_offset;

                if (range_flag != D3D12_TILE_RANGE_FLAG_REUSE_SINGLE_TILE)
                    bind->vk_offset += VKD3D_TILE_SIZE * range_tile;
            }
        }

        if (++range_tile == range_size)
        {
            range_idx += 1;
            range_tile = 0;
        }

        if (++region_tile == region_size.NumTiles)
        {
            region_idx += 1;
            region_tile = 0;
        }
    }

    vkd3d_free(bound_tiles);
    d3d12_command_queue_add_submission(command_queue, &sub);
    return;

fail:
    vkd3d_free(bound_tiles);
    vkd3d_free(sub.bind_sparse.bind_infos);
}

static void STDMETHODCALLTYPE d3d12_command_queue_CopyTileMappings(ID3D12CommandQueue *iface,
        ID3D12Resource *dst_resource, const D3D12_TILED_RESOURCE_COORDINATE *dst_region_start_coordinate,
        ID3D12Resource *src_resource, const D3D12_TILED_RESOURCE_COORDINATE *src_region_start_coordinate,
        const D3D12_TILE_REGION_SIZE *region_size, D3D12_TILE_MAPPING_FLAGS flags)
{
    struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
    struct d3d12_resource *dst_res = impl_from_ID3D12Resource(dst_resource);
    struct d3d12_resource *src_res = impl_from_ID3D12Resource(src_resource);
    struct d3d12_command_queue_submission sub;
    struct vkd3d_sparse_memory_bind *bind;
    unsigned int i;

    TRACE("iface %p, dst_resource %p, dst_region_start_coordinate %p, "
            "src_resource %p, src_region_start_coordinate %p, region_size %p, flags %#x.\n",
            iface, dst_resource, dst_region_start_coordinate, src_resource,
            src_region_start_coordinate, region_size, flags);

    sub.type = VKD3D_SUBMISSION_BIND_SPARSE;
    sub.bind_sparse.mode = VKD3D_SPARSE_MEMORY_BIND_MODE_COPY;
    sub.bind_sparse.bind_count = region_size->NumTiles;
    sub.bind_sparse.bind_infos = vkd3d_malloc(region_size->NumTiles * sizeof(*sub.bind_sparse.bind_infos));
    sub.bind_sparse.dst_resource = dst_res;
    sub.bind_sparse.src_resource = src_res;

    if (!sub.bind_sparse.bind_infos)
    {
        ERR("Failed to allocate bind info array.\n");
        return;
    }

    for (i = 0; i < region_size->NumTiles; i++)
    {
        bind = &sub.bind_sparse.bind_infos[i];
        bind->dst_tile = vkd3d_get_tile_index_from_region(&dst_res->sparse, dst_region_start_coordinate, region_size, i);
        bind->src_tile = vkd3d_get_tile_index_from_region(&src_res->sparse, src_region_start_coordinate, region_size, i);
        bind->vk_memory = VK_NULL_HANDLE;
        bind->vk_offset = 0;
    }

    d3d12_command_queue_add_submission(command_queue, &sub);
}

static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12CommandQueue *iface,
        UINT command_list_count, ID3D12CommandList * const *command_lists)
{
    struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
    struct vkd3d_initial_transition *transitions;
    size_t num_transitions, num_command_buffers;
    struct d3d12_command_queue_submission sub;
    struct d3d12_command_list *cmd_list;
    VkCommandBuffer *buffers;
    LONG **outstanding;
    unsigned int i, j;
    HRESULT hr;

    TRACE("iface %p, command_list_count %u, command_lists %p.\n",
            iface, command_list_count, command_lists);

    if (!command_list_count)
        return;

    if (FAILED(hr = vkd3d_memory_allocator_flush_clears(
            &command_queue->device->memory_allocator, command_queue->device)))
    {
        d3d12_device_mark_as_removed(command_queue->device, hr,
                "Failed to execute pending memory clears.\n");
        return;
    }

    num_command_buffers = command_list_count + 1;

    for (i = 0; i < command_list_count; ++i)
    {
        cmd_list = d3d12_command_list_from_iface(command_lists[i]);

        if (!cmd_list)
        {
            WARN("Unsupported command list type %p.\n", cmd_list);
            return;
        }

        if (cmd_list->vk_init_commands)
            num_command_buffers++;
    }

    if (!(buffers = vkd3d_calloc(num_command_buffers, sizeof(*buffers))))
    {
        ERR("Failed to allocate command buffer array.\n");
        return;
    }

    if (!(outstanding = vkd3d_calloc(command_list_count, sizeof(*outstanding))))
    {
        ERR("Failed to allocate outstanding submissions count.\n");
        vkd3d_free(buffers);
        return;
    }

    sub.execute.debug_capture = false;

    num_transitions = 0;

    for (i = 0, j = 0; i < command_list_count; ++i)
    {
        cmd_list = unsafe_impl_from_ID3D12CommandList(command_lists[i]);

        if (cmd_list->is_recording)
        {
            d3d12_device_mark_as_removed(command_queue->device, DXGI_ERROR_INVALID_CALL,
                    "Command list %p is in recording state.\n", command_lists[i]);
            vkd3d_free(outstanding);
            vkd3d_free(buffers);
            return;
        }

        num_transitions += cmd_list->init_transitions_count;

        outstanding[i] = cmd_list->outstanding_submissions_count;
        InterlockedIncrement(outstanding[i]);

        if (cmd_list->vk_init_commands)
            buffers[j++] = cmd_list->vk_init_commands;
        buffers[j++] = cmd_list->vk_command_buffer;
        if (cmd_list->debug_capture)
            sub.execute.debug_capture = true;
    }

    /* Append a full GPU barrier between submissions.
     * This command buffer is SIMULTANEOUS_BIT. */
    buffers[j++] = command_queue->vkd3d_queue->barrier_command_buffer;

    if (command_list_count == 1 && num_transitions != 0)
    {
        /* Pilfer directly. */
        cmd_list = unsafe_impl_from_ID3D12CommandList(command_lists[0]);
        sub.execute.transitions = cmd_list->init_transitions;
        sub.execute.transition_count = cmd_list->init_transitions_count;
        cmd_list->init_transitions = NULL;
        cmd_list->init_transitions_count = 0;
        cmd_list->init_transitions_size = 0;
    }
    else if (num_transitions != 0)
    {
        sub.execute.transitions = vkd3d_malloc(num_transitions * sizeof(*sub.execute.transitions));
        sub.execute.transition_count = num_transitions;
        transitions = sub.execute.transitions;
        for (i = 0; i < command_list_count; ++i)
        {
            cmd_list = unsafe_impl_from_ID3D12CommandList(command_lists[i]);
            memcpy(transitions, cmd_list->init_transitions,
                   cmd_list->init_transitions_count * sizeof(*transitions));
            transitions += cmd_list->init_transitions_count;
        }
    }
    else
    {
        sub.execute.transitions = NULL;
        sub.execute.transition_count = 0;
    }

    sub.type = VKD3D_SUBMISSION_EXECUTE;
    sub.execute.cmd = buffers;
    sub.execute.cmd_count = num_command_buffers;
    sub.execute.outstanding_submissions_counters = outstanding;
    sub.execute.outstanding_submissions_counter_count = command_list_count;
    d3d12_command_queue_add_submission(command_queue, &sub);
}

static void STDMETHODCALLTYPE d3d12_command_queue_SetMarker(ID3D12CommandQueue *iface,
        UINT metadata, const void *data, UINT size)
{
    FIXME("iface %p, metadata %#x, data %p, size %u stub!\n",
            iface, metadata, data, size);
}

static void STDMETHODCALLTYPE d3d12_command_queue_BeginEvent(ID3D12CommandQueue *iface,
        UINT metadata, const void *data, UINT size)
{
    FIXME("iface %p, metatdata %#x, data %p, size %u stub!\n",
            iface, metadata, data, size);
}

static void STDMETHODCALLTYPE d3d12_command_queue_EndEvent(ID3D12CommandQueue *iface)
{
    FIXME("iface %p stub!\n", iface);
}

static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue *iface,
        ID3D12Fence *fence_iface, UINT64 value)
{
    struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
    struct d3d12_command_queue_submission sub;
    struct d3d12_fence *fence;

    TRACE("iface %p, fence %p, value %#"PRIx64".\n", iface, fence_iface, value);

    fence = impl_from_ID3D12Fence(fence_iface);
    d3d12_fence_inc_ref(fence);

    sub.type = VKD3D_SUBMISSION_SIGNAL;
    sub.signal.fence = fence;
    sub.signal.value = value;
    d3d12_command_queue_add_submission(command_queue, &sub);
    return S_OK;
}

static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *iface,
        ID3D12Fence *fence_iface, UINT64 value)
{
    struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
    struct d3d12_command_queue_submission sub;
    struct d3d12_fence *fence;

    TRACE("iface %p, fence %p, value %#"PRIx64".\n", iface, fence_iface, value);

    fence = impl_from_ID3D12Fence(fence_iface);
    d3d12_fence_inc_ref(fence);

    sub.type = VKD3D_SUBMISSION_WAIT;
    sub.wait.fence = fence;
    sub.wait.value = value;
    d3d12_command_queue_add_submission(command_queue, &sub);
    return S_OK;
}

static HRESULT STDMETHODCALLTYPE d3d12_command_queue_GetTimestampFrequency(ID3D12CommandQueue *iface,
        UINT64 *frequency)
{
    struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
    struct d3d12_device *device = command_queue->device;

    TRACE("iface %p, frequency %p.\n", iface, frequency);

    if (!command_queue->vkd3d_queue->timestamp_bits)
    {
        WARN("Timestamp queries not supported.\n");
        return E_FAIL;
    }

    *frequency = 1000000000 / device->vk_info.device_limits.timestampPeriod;

    return S_OK;
}

static HRESULT STDMETHODCALLTYPE d3d12_command_queue_GetClockCalibration(ID3D12CommandQueue *iface,
        UINT64 *gpu_timestamp, UINT64 *cpu_timestamp)
{
#ifdef _WIN32
    struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
    struct d3d12_device *device = command_queue->device;
    const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
    VkCalibratedTimestampInfoEXT timestamp_infos[2], *timestamp_info;
    uint64_t max_deviation, timestamps[2];
    LARGE_INTEGER qpc_begin, qpc_end;
    uint32_t count = 0;
    VkResult vr;

    TRACE("iface %p, gpu_timestamp %p, cpu_timestamp %p.\n",
            iface, gpu_timestamp, cpu_timestamp);

    if (!command_queue->vkd3d_queue->timestamp_bits)
    {
        WARN("Timestamp queries not supported.\n");
        return E_FAIL;
    }

    if (!(device->device_info.time_domains & VKD3D_TIME_DOMAIN_DEVICE))
    {
        FIXME("Calibrated timestamps not supported by device.\n");
        *gpu_timestamp = 0;
        *cpu_timestamp = 0;
        return S_OK;
    }

    timestamp_info = &timestamp_infos[count++];
    timestamp_info->sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT;
    timestamp_info->pNext = NULL;
    timestamp_info->timeDomain = VK_TIME_DOMAIN_DEVICE_EXT;

    if (device->device_info.time_domains & VKD3D_TIME_DOMAIN_QPC)
    {
        timestamp_info = &timestamp_infos[count++];
        timestamp_info->sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT;
        timestamp_info->pNext = NULL;
        timestamp_info->timeDomain = VK_TIME_DOMAIN_QUERY_PERFORMANCE_COUNTER_EXT;
    }
    else
    {
        FIXME_ONCE("QPC domain not supported by device, timestamp calibration may be inaccurate.\n");
        QueryPerformanceCounter(&qpc_begin);
    }

    if ((vr = VK_CALL(vkGetCalibratedTimestampsEXT(device->vk_device,
            2, timestamp_infos, timestamps, &max_deviation))) < 0)
    {
        ERR("Querying calibrated timestamps failed, vr %d.\n", vr);
        return hresult_from_vk_result(vr);
    }

    if (!(device->device_info.time_domains & VKD3D_TIME_DOMAIN_QPC))
    {
        QueryPerformanceCounter(&qpc_end);
        timestamps[1] = qpc_begin.QuadPart + (qpc_end.QuadPart - qpc_begin.QuadPart) / 2;
    }

    *gpu_timestamp = timestamps[0];
    *cpu_timestamp = timestamps[1];
    return S_OK;
#else
    FIXME("Calibrated timestamps not supported.\n");
    *gpu_timestamp = 0;
    *cpu_timestamp = 0;
    return S_OK;
#endif
}

static D3D12_COMMAND_QUEUE_DESC * STDMETHODCALLTYPE d3d12_command_queue_GetDesc(ID3D12CommandQueue *iface,
        D3D12_COMMAND_QUEUE_DESC *desc)
{
    struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);

    TRACE("iface %p, desc %p.\n", iface, desc);

    *desc = command_queue->desc;
    return desc;
}

static CONST_VTBL struct ID3D12CommandQueueVtbl d3d12_command_queue_vtbl =
{
    /* IUnknown methods */
    d3d12_command_queue_QueryInterface,
    d3d12_command_queue_AddRef,
    d3d12_command_queue_Release,
    /* ID3D12Object methods */
    d3d12_command_queue_GetPrivateData,
    d3d12_command_queue_SetPrivateData,
    d3d12_command_queue_SetPrivateDataInterface,
    (void *)d3d12_object_SetName,
    /* ID3D12DeviceChild methods */
    d3d12_command_queue_GetDevice,
    /* ID3D12CommandQueue methods */
    d3d12_command_queue_UpdateTileMappings,
    d3d12_command_queue_CopyTileMappings,
    d3d12_command_queue_ExecuteCommandLists,
    d3d12_command_queue_SetMarker,
    d3d12_command_queue_BeginEvent,
    d3d12_command_queue_EndEvent,
    d3d12_command_queue_Signal,
    d3d12_command_queue_Wait,
    d3d12_command_queue_GetTimestampFrequency,
    d3d12_command_queue_GetClockCalibration,
    d3d12_command_queue_GetDesc,
};

static void d3d12_command_queue_wait(struct d3d12_command_queue *command_queue,
        struct d3d12_fence *fence, UINT64 value)
{
    struct vkd3d_queue *queue;
    uint64_t wait_count;

    queue = command_queue->vkd3d_queue;

    d3d12_fence_lock(fence);

    /* This is the critical part required to support out-of-order signal.
     * Normally we would be able to submit waits and signals out of order,
     * but we don't have virtualized queues in Vulkan, so we need to handle the case
     * where multiple queues alias over the same physical queue, so effectively, we need to manage out-of-order submits
     * ourselves. */
    d3d12_fence_block_until_pending_value_reaches_locked(fence, value);

    /* If a host signal unblocked us, or we know that the fence has reached a specific value, there is no need
     * to queue up a wait. */
    if (d3d12_fence_can_elide_wait_semaphore_locked(fence, value, queue))
    {
        d3d12_fence_unlock(fence);
        return;
    }

    TRACE("queue %p, fence %p, value %#"PRIx64".\n", command_queue, fence, value);

    wait_count = d3d12_fence_get_physical_wait_value_locked(fence, value);

    d3d12_fence_unlock(fence);

    /* Defer the wait to next submit.
     * This is also important, since we have to hold on to a private reference on the fence
     * until we have observed the wait to actually complete. */
    assert(fence->timeline_semaphore);
    vkd3d_queue_add_wait(command_queue->vkd3d_queue, fence, fence->timeline_semaphore, wait_count);
}

static void d3d12_command_queue_signal(struct d3d12_command_queue *command_queue,
        struct d3d12_fence *fence, UINT64 value)
{
    VkTimelineSemaphoreSubmitInfoKHR timeline_submit_info;
    const struct vkd3d_vk_device_procs *vk_procs;
    struct vkd3d_queue *vkd3d_queue;
    struct d3d12_device *device;
    VkSubmitInfo submit_info;
    uint64_t physical_value;
    uint64_t signal_value;
    VkQueue vk_queue;
    VkResult vr;
    HRESULT hr;

    device = command_queue->device;
    vk_procs = &device->vk_procs;
    vkd3d_queue = command_queue->vkd3d_queue;

    d3d12_fence_lock(fence);

    TRACE("queue %p, fence %p, value %#"PRIx64".\n", command_queue, fence, value);

    physical_value = d3d12_fence_add_pending_signal_locked(fence, value, vkd3d_queue);

    signal_value = physical_value;

    /* Need to hold the fence lock while we're submitting, since another thread could come in and signal the semaphore
     * to a higher value before we call vkQueueSubmit, which creates a non-monotonically increasing value. */
    timeline_submit_info.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR;
    timeline_submit_info.pNext = NULL;
    timeline_submit_info.waitSemaphoreValueCount = 0;
    timeline_submit_info.pWaitSemaphoreValues = NULL;
    timeline_submit_info.signalSemaphoreValueCount = 1;
    timeline_submit_info.pSignalSemaphoreValues = &signal_value;

    submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
    submit_info.pNext = &timeline_submit_info;
    submit_info.waitSemaphoreCount = 0;
    submit_info.pWaitSemaphores = NULL;
    submit_info.pWaitDstStageMask = NULL;
    submit_info.commandBufferCount = 0;
    submit_info.pCommandBuffers = NULL;
    submit_info.signalSemaphoreCount = 1;
    submit_info.pSignalSemaphores = &fence->timeline_semaphore;

    if (!(vk_queue = vkd3d_queue_acquire(vkd3d_queue)))
    {
        ERR("Failed to acquire queue %p.\n", vkd3d_queue);
        d3d12_fence_unlock(fence);
        return;
    }

    vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, VK_NULL_HANDLE));

    if (vr == VK_SUCCESS)
        d3d12_fence_update_pending_value_locked(fence);
    d3d12_fence_unlock(fence);

    vkd3d_queue_release(vkd3d_queue);

    if (vr < 0)
    {
        ERR("Failed to submit signal operation, vr %d.\n", vr);
        return;
    }

    VKD3D_DEVICE_REPORT_BREADCRUMB_IF(command_queue->device, vr == VK_ERROR_DEVICE_LOST);

    if (FAILED(hr = vkd3d_enqueue_timeline_semaphore(&command_queue->fence_worker, fence,
            fence->timeline_semaphore, physical_value, true, NULL, 0)))
    {
        ERR("Failed to enqueue timeline semaphore, hr #%x.\n", hr);
    }

    /* We should probably trigger DEVICE_REMOVED if we hit any errors in the submission thread. */
}

#define VKD3D_COMMAND_QUEUE_NUM_TRANSITION_BUFFERS 16
struct d3d12_command_queue_transition_pool
{
    VkCommandBuffer cmd[VKD3D_COMMAND_QUEUE_NUM_TRANSITION_BUFFERS];
    VkCommandPool pool;
    VkSemaphore timeline;
    uint64_t timeline_value;

    VkImageMemoryBarrier *barriers;
    size_t barriers_size;
    size_t barriers_count;

    const struct d3d12_query_heap **query_heaps;
    size_t query_heaps_size;
    size_t query_heaps_count;
};

static HRESULT d3d12_command_queue_transition_pool_init(struct d3d12_command_queue_transition_pool *pool,
        struct d3d12_command_queue *queue)
{
    const struct vkd3d_vk_device_procs *vk_procs = &queue->device->vk_procs;
    VkCommandBufferAllocateInfo alloc_info;
    VkCommandPoolCreateInfo pool_info;
    VkResult vr;
    HRESULT hr;

    memset(pool, 0, sizeof(*pool));

    pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
    pool_info.pNext = NULL;
    pool_info.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
    pool_info.queueFamilyIndex = queue->vkd3d_queue->vk_family_index;

    if ((vr = VK_CALL(vkCreateCommandPool(queue->device->vk_device, &pool_info, NULL, &pool->pool))))
        return hresult_from_vk_result(vr);

    alloc_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
    alloc_info.pNext = NULL;
    alloc_info.commandPool = pool->pool;
    alloc_info.commandBufferCount = VKD3D_COMMAND_QUEUE_NUM_TRANSITION_BUFFERS;
    alloc_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
    if ((vr = VK_CALL(vkAllocateCommandBuffers(queue->device->vk_device, &alloc_info, pool->cmd))))
        return hresult_from_vk_result(vr);

    if (FAILED(hr = vkd3d_create_timeline_semaphore(queue->device, 0, &pool->timeline)))
        return hr;

    return S_OK;
}

static void d3d12_command_queue_transition_pool_wait(struct d3d12_command_queue_transition_pool *pool,
        struct d3d12_device *device, uint64_t value)
{
    const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
    VkSemaphoreWaitInfoKHR wait_info;
    VkResult vr;

    wait_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO_KHR;
    wait_info.pNext = NULL;
    wait_info.flags = 0;
    wait_info.pSemaphores = &pool->timeline;
    wait_info.semaphoreCount = 1;
    wait_info.pValues = &value;
    vr = VK_CALL(vkWaitSemaphoresKHR(device->vk_device, &wait_info, ~(uint64_t)0));
    VKD3D_DEVICE_REPORT_BREADCRUMB_IF(device, vr == VK_ERROR_DEVICE_LOST);
}

static void d3d12_command_queue_transition_pool_deinit(struct d3d12_command_queue_transition_pool *pool,
        struct d3d12_device *device)
{
    const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
    d3d12_command_queue_transition_pool_wait(pool, device, pool->timeline_value);
    VK_CALL(vkDestroyCommandPool(device->vk_device, pool->pool, NULL));
    VK_CALL(vkDestroySemaphore(device->vk_device, pool->timeline, NULL));
    vkd3d_free(pool->barriers);
    vkd3d_free((void*)pool->query_heaps);
}

static void d3d12_command_queue_transition_pool_add_barrier(struct d3d12_command_queue_transition_pool *pool,
            const struct d3d12_resource *resource)
{
    VkImageMemoryBarrier *barrier;
    assert(d3d12_resource_is_texture(resource));

    if (!vkd3d_array_reserve((void**)&pool->barriers, &pool->barriers_size,
            pool->barriers_count + 1, sizeof(*pool->barriers)))
    {
        ERR("Failed to allocate barriers.\n");
        return;
    }

    barrier = &pool->barriers[pool->barriers_count++];

    barrier->sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
    barrier->pNext = NULL;
    barrier->srcAccessMask = 0;
    barrier->dstAccessMask = 0;
    barrier->oldLayout = d3d12_resource_is_cpu_accessible(resource)
                        ? VK_IMAGE_LAYOUT_PREINITIALIZED : VK_IMAGE_LAYOUT_UNDEFINED;
    barrier->newLayout = vk_image_layout_from_d3d12_resource_state(NULL, resource, resource->initial_state);
    barrier->srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
    barrier->dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
    barrier->image = resource->res.vk_image;
    barrier->subresourceRange.aspectMask = resource->format->vk_aspect_mask;
    barrier->subresourceRange.baseMipLevel = 0;
    barrier->subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS;
    barrier->subresourceRange.baseArrayLayer = 0;
    barrier->subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS;

    /* srcAccess and dstAccess mask is 0 since we will use the timeline semaphore to synchronize anyways. */

    TRACE("Initial layout transition for resource %p (old layout %#x, new layout %#x).\n",
          resource, barrier->oldLayout, barrier->newLayout);
}

static void d3d12_command_queue_transition_pool_add_query_heap(struct d3d12_command_queue_transition_pool *pool,
            const struct d3d12_query_heap *heap)
{
    if (!vkd3d_array_reserve((void**)&pool->query_heaps, &pool->query_heaps_size,
            pool->query_heaps_count + 1, sizeof(*pool->query_heaps)))
    {
        ERR("Failed to allocate query heap list.\n");
        return;
    }

    pool->query_heaps[pool->query_heaps_count++] = heap;

    TRACE("Initialization for query heap %p.\n", heap);
}

static void d3d12_command_queue_init_query_heap(struct d3d12_device *device, VkCommandBuffer vk_cmd_buffer,
        const struct d3d12_query_heap *heap)
{
    const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
    unsigned int i;

    VK_CALL(vkCmdResetQueryPool(vk_cmd_buffer, heap->vk_query_pool, 0, heap->desc.Count));

    for (i = 0; i < heap->desc.Count; i++)
    {
        switch (heap->desc.Type)
        {
            case D3D12_QUERY_HEAP_TYPE_OCCLUSION:
            case D3D12_QUERY_HEAP_TYPE_SO_STATISTICS:
            case D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS:
                VK_CALL(vkCmdBeginQuery(vk_cmd_buffer, heap->vk_query_pool, i, 0));
                VK_CALL(vkCmdEndQuery(vk_cmd_buffer, heap->vk_query_pool, i));
                break;

            case D3D12_QUERY_HEAP_TYPE_TIMESTAMP:
            case D3D12_QUERY_HEAP_TYPE_COPY_QUEUE_TIMESTAMP:
                VK_CALL(vkCmdWriteTimestamp(vk_cmd_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
                        heap->vk_query_pool, i));
                break;

            default:
                ERR("Unhandled query pool type %u.\n", heap->desc.Type);
                return;
        }
    }
}

static void d3d12_command_queue_transition_pool_build(struct d3d12_command_queue_transition_pool *pool,
        struct d3d12_device *device, const struct vkd3d_initial_transition *transitions, size_t count,
        VkCommandBuffer *vk_cmd_buffer, uint64_t *timeline_value)
{
    const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
    const struct vkd3d_initial_transition *transition;
    VkCommandBufferBeginInfo begin_info;
    unsigned int command_index;
    uint32_t need_transition;
    size_t i;

    pool->barriers_count = 0;
    pool->query_heaps_count = 0;

    if (!count)
    {
        *vk_cmd_buffer = VK_NULL_HANDLE;
        return;
    }

    for (i = 0; i < count; i++)
    {
        transition = &transitions[i];

        switch (transition->type)
        {
            case VKD3D_INITIAL_TRANSITION_TYPE_RESOURCE:
                /* Memory order can be relaxed since this only needs to return 1 once.
                 * Ordering is guaranteed by synchronization between queues.
                 * A Signal() -> Wait() pair on the queue will guarantee that this step is done in execution order. */
                need_transition = vkd3d_atomic_uint32_exchange_explicit(&transition->resource.resource->initial_layout_transition,
                        0, vkd3d_memory_order_relaxed);

                if (need_transition && transition->resource.perform_initial_transition)
                    d3d12_command_queue_transition_pool_add_barrier(pool, transition->resource.resource);
                break;

            case VKD3D_INITIAL_TRANSITION_TYPE_QUERY_HEAP:
                if (!vkd3d_atomic_uint32_exchange_explicit(&transition->query_heap->initialized, 1, vkd3d_memory_order_relaxed))
                    d3d12_command_queue_transition_pool_add_query_heap(pool, transition->query_heap);
                break;

            default:
                ERR("Unhandled transition type %u.\n", transition->type);
        }
    }

    if (!pool->barriers_count && !pool->query_heaps_count)
    {
        *vk_cmd_buffer = VK_NULL_HANDLE;
        return;
    }

    pool->timeline_value++;
    command_index = pool->timeline_value % VKD3D_COMMAND_QUEUE_NUM_TRANSITION_BUFFERS;

    if (pool->timeline_value > VKD3D_COMMAND_QUEUE_NUM_TRANSITION_BUFFERS)
        d3d12_command_queue_transition_pool_wait(pool, device, pool->timeline_value - VKD3D_COMMAND_QUEUE_NUM_TRANSITION_BUFFERS);

    begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
    begin_info.pNext = NULL;
    begin_info.pInheritanceInfo = NULL;
    begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
    VK_CALL(vkResetCommandBuffer(pool->cmd[command_index], 0));
    VK_CALL(vkBeginCommandBuffer(pool->cmd[command_index], &begin_info));
    VK_CALL(vkCmdPipelineBarrier(pool->cmd[command_index],
            VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
            0, 0, NULL, 0, NULL, pool->barriers_count, pool->barriers));
    for (i = 0; i < pool->query_heaps_count; i++)
        d3d12_command_queue_init_query_heap(device, pool->cmd[command_index], pool->query_heaps[i]);
    VK_CALL(vkEndCommandBuffer(pool->cmd[command_index]));

    *vk_cmd_buffer = pool->cmd[command_index];
    *timeline_value = pool->timeline_value;
}

static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queue,
        VkCommandBuffer *cmd, UINT count,
        VkCommandBuffer transition_cmd, VkSemaphore transition_timeline, uint64_t transition_timeline_value,
        LONG **submission_counters, size_t num_submission_counters,
        bool debug_capture)
{
    static const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
    const struct vkd3d_vk_device_procs *vk_procs = &command_queue->device->vk_procs;
    struct vkd3d_queue *vkd3d_queue = command_queue->vkd3d_queue;
    VkTimelineSemaphoreSubmitInfoKHR timeline_submit_info[2];
    uint64_t submission_timeline_count;
    VkSubmitInfo submit_desc[2];
    uint32_t num_submits;
    VkQueue vk_queue;
    unsigned int i;
    VkResult vr;
    HRESULT hr;

    TRACE("queue %p, command_list_count %u, command_lists %p.\n",
          command_queue, count, cmd);

    memset(timeline_submit_info, 0, sizeof(timeline_submit_info));
    memset(submit_desc, 0, sizeof(submit_desc));

    if (transition_cmd)
    {
        /* The transition cmd must happen in-order, since with the advanced aliasing model in D3D12,
         * it is enough to separate aliases with an ExecuteCommandLists.
         * A clear-like operation must still happen though in the application which would acquire the alias,
         * but we must still be somewhat careful about when we emit initial state transitions.
         * The clear requirement only exists for render targets. */
        num_submits = 2;

        submit_desc[0].signalSemaphoreCount = 1;
        submit_desc[0].pSignalSemaphores = &transition_timeline;
        submit_desc[0].commandBufferCount = 1;
        submit_desc[0].pCommandBuffers = &transition_cmd;

        timeline_submit_info[0].signalSemaphoreValueCount = 1;
        /* Could use the serializing binary semaphore here,
         * but we need to keep track of the timeline on CPU as well
         * to know when we can reset the barrier command buffer. */
        timeline_submit_info[0].pSignalSemaphoreValues = &transition_timeline_value;

        submit_desc[1].waitSemaphoreCount = 1;
        timeline_submit_info[1].waitSemaphoreValueCount = 1;
        timeline_submit_info[1].pWaitSemaphoreValues = &transition_timeline_value;
        submit_desc[1].pWaitSemaphores = &transition_timeline;
        submit_desc[1].pWaitDstStageMask = &wait_stage_mask;
    }
    else
    {
        num_submits = 1;
    }

    if (!(vk_queue = vkd3d_queue_acquire(vkd3d_queue)))
    {
        ERR("Failed to acquire queue %p.\n", vkd3d_queue);
        for (i = 0; i < num_submission_counters; i++)
            InterlockedDecrement(submission_counters[i]);
        vkd3d_free(submission_counters);
        return;
    }

    submit_desc[num_submits - 1].commandBufferCount = count;
    submit_desc[num_submits - 1].pCommandBuffers = cmd;

    submission_timeline_count = ++vkd3d_queue->submission_timeline_count;
    submit_desc[num_submits - 1].signalSemaphoreCount = 1;
    timeline_submit_info[num_submits - 1].signalSemaphoreValueCount = 1;
    submit_desc[num_submits - 1].pSignalSemaphores = &vkd3d_queue->submission_timeline;
    timeline_submit_info[num_submits - 1].pSignalSemaphoreValues = &submission_timeline_count;

    for (i = 0; i < num_submits; i++)
    {
        submit_desc[i].sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;

        if (submit_desc[i].waitSemaphoreCount || submit_desc[i].signalSemaphoreCount)
        {
            timeline_submit_info[i].sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR;
            submit_desc[i].pNext = &timeline_submit_info[i];
        }
    }

#ifdef VKD3D_ENABLE_RENDERDOC
    /* For each submission we have marked to be captured, we will first need to filter it
     * based on VKD3D_AUTO_CAPTURE_COUNTS.
     * If a submission index is not marked to be captured after all, we drop any capture here.
     * Deciding this in the submission thread is more robust than the alternative, since the submission
     * threads are mostly serialized. */
    if (debug_capture)
        debug_capture = vkd3d_renderdoc_command_queue_begin_capture(command_queue);
#else
    (void)debug_capture;
#endif

    if ((vr = VK_CALL(vkQueueSubmit(vk_queue, num_submits, submit_desc, VK_NULL_HANDLE))) < 0)
        ERR("Failed to submit queue(s), vr %d.\n", vr);

    VKD3D_DEVICE_REPORT_BREADCRUMB_IF(command_queue->device, vr == VK_ERROR_DEVICE_LOST);

#ifdef VKD3D_ENABLE_RENDERDOC
    if (debug_capture)
        vkd3d_renderdoc_command_queue_end_capture(command_queue);
#endif

    vkd3d_queue_release(vkd3d_queue);

    /* After a proper submit we have to queue up some work which is tied to this submission:
     * - After the submit completes, we know it's safe to release private reference on any queue waits.
     *   D3D12 allows fences to be released at any time.
     * - Decrementing counters for submissions. This allows us to track when it's safe to reset a command pool.
     *   If there are pending submissions waiting, we are expected to ignore the reset.
     *   We will report a failure in this case. Some games run into this.
     */
    if (vr == VK_SUCCESS && num_submission_counters)
    {
        if (FAILED(hr = vkd3d_enqueue_timeline_semaphore(&command_queue->fence_worker,
                NULL, vkd3d_queue->submission_timeline,
                submission_timeline_count, false,
                submission_counters, num_submission_counters)))
        {
            ERR("Failed to enqueue timeline semaphore.\n");
        }
    }
}

static unsigned int vkd3d_compact_sparse_bind_ranges(const struct d3d12_resource *src_resource,
        struct vkd3d_sparse_memory_bind_range *bind_ranges, struct vkd3d_sparse_memory_bind *bind_infos,
        unsigned int count, enum vkd3d_sparse_memory_bind_mode mode, bool can_compact)
{
    struct vkd3d_sparse_memory_bind_range *range = NULL;
    VkDeviceMemory vk_memory;
    VkDeviceSize vk_offset;
    unsigned int i, j;

    for (i = 0, j = 0; i < count; i++)
    {
        struct vkd3d_sparse_memory_bind *bind = &bind_infos[i];

        if (mode == VKD3D_SPARSE_MEMORY_BIND_MODE_UPDATE)
        {
            vk_memory = bind->vk_memory;
            vk_offset = bind->vk_offset;
        }
        else /* if (mode == VKD3D_SPARSE_MEMORY_BIND_MODE_COPY) */
        {
            struct d3d12_sparse_tile *src_tile = &src_resource->sparse.tiles[bind->src_tile];
            vk_memory = src_tile->vk_memory;
            vk_offset = src_tile->vk_offset;
        }

        if (can_compact && range && bind->dst_tile == range->tile_index + range->tile_count && vk_memory == range->vk_memory &&
                (vk_offset == range->vk_offset + range->tile_count * VKD3D_TILE_SIZE || !vk_memory))
        {
            range->tile_count++;
        }
        else
        {
            range = &bind_ranges[j++];
            range->tile_index = bind->dst_tile;
            range->tile_count = 1;
            range->vk_memory = vk_memory;
            range->vk_offset = vk_offset;
        }
    }

    return j;
}

static void d3d12_command_queue_bind_sparse(struct d3d12_command_queue *command_queue,
        enum vkd3d_sparse_memory_bind_mode mode, struct d3d12_resource *dst_resource,
        struct d3d12_resource *src_resource, unsigned int count,
        struct vkd3d_sparse_memory_bind *bind_infos)
{
    const VkPipelineStageFlags wait_stages = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
    struct vkd3d_sparse_memory_bind_range *bind_ranges = NULL;
    unsigned int first_packed_tile, processed_tiles;
    VkSparseImageOpaqueMemoryBindInfo opaque_info;
    const struct vkd3d_vk_device_procs *vk_procs;
    VkSparseImageMemoryBind *image_binds = NULL;
    VkSparseBufferMemoryBindInfo buffer_info;
    VkSparseMemoryBind *memory_binds = NULL;
    VkSparseImageMemoryBindInfo image_info;
    VkBindSparseInfo bind_sparse_info;
    struct vkd3d_queue *queue_sparse;
    struct vkd3d_queue *queue;
    VkSubmitInfo submit_info;
    VkQueue vk_queue_sparse;
    unsigned int i, j, k;
    VkQueue vk_queue;
    bool can_compact;
    VkResult vr;

    TRACE("queue %p, dst_resource %p, src_resource %p, count %u, bind_infos %p.\n",
          command_queue, dst_resource, src_resource, count, bind_infos);

    vk_procs = &command_queue->device->vk_procs;

    bind_sparse_info.sType = VK_STRUCTURE_TYPE_BIND_SPARSE_INFO;
    bind_sparse_info.pNext = NULL;
    bind_sparse_info.bufferBindCount = 0;
    bind_sparse_info.pBufferBinds = NULL;
    bind_sparse_info.imageOpaqueBindCount = 0;
    bind_sparse_info.pImageOpaqueBinds = NULL;
    bind_sparse_info.imageBindCount = 0;
    bind_sparse_info.pImageBinds = NULL;

    if (!(bind_ranges = vkd3d_malloc(count * sizeof(*bind_ranges))))
    {
        ERR("Failed to allocate bind range info.\n");
        goto cleanup;
    }

    /* NV driver is buggy and test_update_tile_mappings fails (bug 3274618). */
    can_compact = command_queue->device->device_info.properties2.properties.vendorID != VKD3D_VENDOR_ID_NVIDIA;
    count = vkd3d_compact_sparse_bind_ranges(src_resource, bind_ranges, bind_infos, count, mode, can_compact);

    first_packed_tile = dst_resource->sparse.tile_count;

    if (d3d12_resource_is_buffer(dst_resource))
    {
        if (!(memory_binds = vkd3d_malloc(count * sizeof(*memory_binds))))
        {
            ERR("Failed to allocate sparse memory bind info.\n");
            goto cleanup;
        }

        buffer_info.buffer = dst_resource->res.vk_buffer;
        buffer_info.bindCount = count;
        buffer_info.pBinds = memory_binds;

        bind_sparse_info.bufferBindCount = 1;
        bind_sparse_info.pBufferBinds = &buffer_info;
    }
    else
    {
        unsigned int opaque_bind_count = 0;
        unsigned int image_bind_count = 0;

        if (dst_resource->sparse.packed_mips.NumPackedMips)
            first_packed_tile = dst_resource->sparse.packed_mips.StartTileIndexInOverallResource;

        for (i = 0; i < count; i++)
        {
            const struct vkd3d_sparse_memory_bind_range *bind = &bind_ranges[i];

            if (bind->tile_index < first_packed_tile)
                image_bind_count += bind->tile_count;
            if (bind->tile_index + bind->tile_count > first_packed_tile)
                opaque_bind_count++;
        }

        if (opaque_bind_count)
        {
            if (!(memory_binds = vkd3d_malloc(opaque_bind_count * sizeof(*memory_binds))))
            {
                ERR("Failed to allocate sparse memory bind info.\n");
                goto cleanup;
            }

            opaque_info.image = dst_resource->res.vk_image;
            opaque_info.bindCount = opaque_bind_count;
            opaque_info.pBinds = memory_binds;

            bind_sparse_info.imageOpaqueBindCount = 1;
            bind_sparse_info.pImageOpaqueBinds = &opaque_info;
        }

        if (image_bind_count)
        {
            if (!(image_binds = vkd3d_malloc(image_bind_count * sizeof(*image_binds))))
            {
                ERR("Failed to allocate sparse memory bind info.\n");
                goto cleanup;
            }

            /* The image bind count is not exact but only an upper limit,
             * so do the actual counting while filling in bind infos */
            image_info.image = dst_resource->res.vk_image;
            image_info.bindCount = 0;
            image_info.pBinds = image_binds;

            bind_sparse_info.imageBindCount = 1;
            bind_sparse_info.pImageBinds = &image_info;
        }
    }

    for (i = 0, k = 0; i < count; i++)
    {
        struct vkd3d_sparse_memory_bind_range *bind = &bind_ranges[i];

        while (bind->tile_count)
        {
            struct d3d12_sparse_tile *tile = &dst_resource->sparse.tiles[bind->tile_index];

            if (d3d12_resource_is_texture(dst_resource) && bind->tile_index < first_packed_tile)
            {
                const D3D12_SUBRESOURCE_TILING *tiling = &dst_resource->sparse.tilings[tile->image.subresource_index];
                const uint32_t tile_count = tiling->WidthInTiles * tiling->HeightInTiles * tiling->DepthInTiles;

                if (bind->tile_index == tiling->StartTileIndexInOverallResource && bind->tile_count >= tile_count)
                {
                    /* Bind entire subresource at once to reduce overhead */
                    const struct d3d12_sparse_tile *last_tile = &tile[tile_count - 1];

                    VkSparseImageMemoryBind *vk_bind = &image_binds[image_info.bindCount++];
                    vk_bind->subresource = tile->image.subresource;
                    vk_bind->offset = tile->image.offset;
                    vk_bind->extent.width = last_tile->image.offset.x + last_tile->image.extent.width;
                    vk_bind->extent.height = last_tile->image.offset.y + last_tile->image.extent.height;
                    vk_bind->extent.depth = last_tile->image.offset.z + last_tile->image.extent.depth;
                    vk_bind->memory = bind->vk_memory;
                    vk_bind->memoryOffset = bind->vk_offset;
                    vk_bind->flags = 0;

                    processed_tiles = tile_count;
                }
                else
                {
                    VkSparseImageMemoryBind *vk_bind = &image_binds[image_info.bindCount++];
                    vk_bind->subresource = tile->image.subresource;
                    vk_bind->offset = tile->image.offset;
                    vk_bind->extent = tile->image.extent;
                    vk_bind->memory = bind->vk_memory;
                    vk_bind->memoryOffset = bind->vk_offset;
                    vk_bind->flags = 0;

                    processed_tiles = 1;
                }
            }
            else
            {
                const struct d3d12_sparse_tile *last_tile = &tile[bind->tile_count - 1];

                VkSparseMemoryBind *vk_bind = &memory_binds[k++];
                vk_bind->resourceOffset = tile->buffer.offset;
                vk_bind->size = last_tile->buffer.offset
                              + last_tile->buffer.length
                              - vk_bind->resourceOffset;
                vk_bind->memory = bind->vk_memory;
                vk_bind->memoryOffset = bind->vk_offset;
                vk_bind->flags = 0;

                processed_tiles = bind->tile_count;
            }

            for (j = 0; j < processed_tiles; j++)
            {
                tile[j].vk_memory = bind->vk_memory;
                tile[j].vk_offset = bind->vk_offset + j * VKD3D_TILE_SIZE;
            }

            bind->tile_index += processed_tiles;
            bind->tile_count -= processed_tiles;
            bind->vk_offset += processed_tiles * VKD3D_TILE_SIZE;
        }
    }

    /* Ensure that we use a queue that supports sparse binding */
    queue = command_queue->vkd3d_queue;

    if (!(queue->vk_queue_flags & VK_QUEUE_SPARSE_BINDING_BIT))
        queue_sparse = command_queue->device->queue_families[VKD3D_QUEUE_FAMILY_SPARSE_BINDING]->queues[0];
    else
        queue_sparse = queue;

    if (!(vk_queue = vkd3d_queue_acquire(queue)))
    {
        ERR("Failed to acquire queue %p.\n", queue);
        goto cleanup;
    }

    submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
    submit_info.pNext = NULL;
    submit_info.signalSemaphoreCount = 1;
    submit_info.pSignalSemaphores = &queue->serializing_binary_semaphore;
    submit_info.commandBufferCount = 0;
    submit_info.pCommandBuffers = NULL;
    submit_info.waitSemaphoreCount = 0;
    submit_info.pWaitDstStageMask = NULL;
    submit_info.pWaitSemaphores = NULL;

    /* We need to serialize sparse bind operations.
     * Create a roundtrip with binary semaphores. */
    if ((vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, VK_NULL_HANDLE))) < 0)
        ERR("Failed to submit signal, vr %d.\n", vr);

    VKD3D_DEVICE_REPORT_BREADCRUMB_IF(command_queue->device, vr == VK_ERROR_DEVICE_LOST);

    if (queue != queue_sparse)
    {
        if (!(vk_queue_sparse = vkd3d_queue_acquire(queue_sparse)))
        {
            ERR("Failed to acquire queue %p.\n", queue_sparse);
            vkd3d_queue_release(queue);
            goto cleanup;
        }
    }
    else
        vk_queue_sparse = vk_queue;

    bind_sparse_info.pWaitSemaphores = &queue->serializing_binary_semaphore;
    bind_sparse_info.pSignalSemaphores = &queue->serializing_binary_semaphore;
    bind_sparse_info.waitSemaphoreCount = 1;
    bind_sparse_info.signalSemaphoreCount = 1;

    if ((vr = VK_CALL(vkQueueBindSparse(vk_queue_sparse, 1, &bind_sparse_info, VK_NULL_HANDLE))) < 0)
        ERR("Failed to perform sparse binding, vr %d.\n", vr);

    if (queue != queue_sparse)
        vkd3d_queue_release(queue_sparse);

    submit_info.pWaitSemaphores = &queue->serializing_binary_semaphore;
    submit_info.waitSemaphoreCount = 1;
    submit_info.pWaitDstStageMask = &wait_stages;
    submit_info.pSignalSemaphores = NULL;
    submit_info.signalSemaphoreCount = 0;

    if ((vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, VK_NULL_HANDLE))) < 0)
        ERR("Failed to submit signal, vr %d.\n", vr);

    vkd3d_queue_release(queue);
    VKD3D_DEVICE_REPORT_BREADCRUMB_IF(command_queue->device, vr == VK_ERROR_DEVICE_LOST);

cleanup:
    vkd3d_free(memory_binds);
    vkd3d_free(image_binds);
    vkd3d_free(bind_ranges);
}

void d3d12_command_queue_submit_stop(struct d3d12_command_queue *queue)
{
    struct d3d12_command_queue_submission sub;
    sub.type = VKD3D_SUBMISSION_STOP;
    d3d12_command_queue_add_submission(queue, &sub);
}

static void d3d12_command_queue_add_submission_locked(struct d3d12_command_queue *queue,
                                                      const struct d3d12_command_queue_submission *sub)
{
    vkd3d_array_reserve((void**)&queue->submissions, &queue->submissions_size,
                        queue->submissions_count + 1, sizeof(*queue->submissions));
    queue->submissions[queue->submissions_count++] = *sub;
    pthread_cond_signal(&queue->queue_cond);
}

static void d3d12_command_queue_add_submission(struct d3d12_command_queue *queue,
        const struct d3d12_command_queue_submission *sub)
{
    pthread_mutex_lock(&queue->queue_lock);
    d3d12_command_queue_add_submission_locked(queue, sub);
    pthread_mutex_unlock(&queue->queue_lock);
}

static void d3d12_command_queue_acquire_serialized(struct d3d12_command_queue *queue)
{
    /* In order to make sure all pending operations queued so far have been submitted,
     * we build a drain task which will increment the queue_drain_count once the thread has finished all its work. */
    struct d3d12_command_queue_submission sub;
    uint64_t current_drain;

    sub.type = VKD3D_SUBMISSION_DRAIN;

    pthread_mutex_lock(&queue->queue_lock);

    current_drain = ++queue->drain_count;
    d3d12_command_queue_add_submission_locked(queue, &sub);

    while (current_drain != queue->queue_drain_count)
        pthread_cond_wait(&queue->queue_cond, &queue->queue_lock);
}

static void d3d12_command_queue_release_serialized(struct d3d12_command_queue *queue)
{
    pthread_mutex_unlock(&queue->queue_lock);
}

static void *d3d12_command_queue_submission_worker_main(void *userdata)
{
    struct d3d12_command_queue_submission submission;
    struct d3d12_command_queue_transition_pool pool;
    struct d3d12_command_queue *queue = userdata;
    uint64_t transition_timeline_value = 0;
    VkCommandBuffer transition_cmd;
    HRESULT hr;

    VKD3D_REGION_DECL(queue_wait);
    VKD3D_REGION_DECL(queue_signal);
    VKD3D_REGION_DECL(queue_execute);

    vkd3d_set_thread_name("vkd3d_queue");

    if (FAILED(hr = d3d12_command_queue_transition_pool_init(&pool, queue)))
        ERR("Failed to initialize transition pool.\n");

    for (;;)
    {
        pthread_mutex_lock(&queue->queue_lock);
        while (queue->submissions_count == 0)
            pthread_cond_wait(&queue->queue_cond, &queue->queue_lock);

        queue->submissions_count--;
        submission = queue->submissions[0];
        memmove(queue->submissions, queue->submissions + 1, queue->submissions_count * sizeof(submission));
        pthread_mutex_unlock(&queue->queue_lock);

        if (submission.type != VKD3D_SUBMISSION_WAIT)
        {
            vkd3d_queue_flush_waiters(queue->vkd3d_queue,
                    &queue->fence_worker, &queue->device->vk_procs);
        }

        switch (submission.type)
        {
        case VKD3D_SUBMISSION_STOP:
            goto cleanup;

        case VKD3D_SUBMISSION_WAIT:
            VKD3D_REGION_BEGIN(queue_wait);
            d3d12_command_queue_wait(queue, submission.wait.fence, submission.wait.value);
            d3d12_fence_dec_ref(submission.wait.fence);
            /* Flush eagerly. For unknown reasons, we observe some issues when trying to fuse this flush
             * with normal SUBMISSION_EXECUTE. */
            vkd3d_queue_flush_waiters(queue->vkd3d_queue,
                    &queue->fence_worker, &queue->device->vk_procs);
            VKD3D_REGION_END(queue_wait);
            break;

        case VKD3D_SUBMISSION_SIGNAL:
            VKD3D_REGION_BEGIN(queue_signal);
            d3d12_command_queue_signal(queue, submission.signal.fence, submission.signal.value);
            d3d12_fence_dec_ref(submission.signal.fence);
            VKD3D_REGION_END(queue_signal);
            break;

        case VKD3D_SUBMISSION_EXECUTE:
            VKD3D_REGION_BEGIN(queue_execute);
            d3d12_command_queue_transition_pool_build(&pool, queue->device,
                    submission.execute.transitions,
                    submission.execute.transition_count,
                    &transition_cmd, &transition_timeline_value);
            d3d12_command_queue_execute(queue, submission.execute.cmd,
                    submission.execute.cmd_count,
                    transition_cmd, pool.timeline, transition_timeline_value,
                    submission.execute.outstanding_submissions_counters,
                    submission.execute.outstanding_submissions_counter_count,
                    submission.execute.debug_capture);
            /* command_queue_execute takes ownership of the outstanding_submission_counters allocation.
             * The atomic counters are decremented when the submission is observed to be freed.
             * On error, the counters are freed early, so there is no risk of leak. */
            vkd3d_free(submission.execute.cmd);
            vkd3d_free(submission.execute.transitions);
            VKD3D_REGION_END(queue_execute);
            break;

        case VKD3D_SUBMISSION_BIND_SPARSE:
            d3d12_command_queue_bind_sparse(queue, submission.bind_sparse.mode,
                    submission.bind_sparse.dst_resource, submission.bind_sparse.src_resource,
                    submission.bind_sparse.bind_count, submission.bind_sparse.bind_infos);
            vkd3d_free(submission.bind_sparse.bind_infos);
            break;

        case VKD3D_SUBMISSION_DRAIN:
        {
            pthread_mutex_lock(&queue->queue_lock);
            queue->queue_drain_count++;
            pthread_cond_signal(&queue->queue_cond);
            pthread_mutex_unlock(&queue->queue_lock);
            break;
        }

        default:
            ERR("Unrecognized submission type %u.\n", submission.type);
            break;
        }
    }

cleanup:
    vkd3d_queue_flush_waiters(queue->vkd3d_queue,
            &queue->fence_worker, &queue->device->vk_procs);
    d3d12_command_queue_transition_pool_deinit(&pool, queue->device);
    return NULL;
}

static HRESULT d3d12_command_queue_init(struct d3d12_command_queue *queue,
        struct d3d12_device *device, const D3D12_COMMAND_QUEUE_DESC *desc)
{
    HRESULT hr;
    int rc;

    queue->ID3D12CommandQueue_iface.lpVtbl = &d3d12_command_queue_vtbl;
    queue->refcount = 1;

    queue->desc = *desc;
    if (!queue->desc.NodeMask)
        queue->desc.NodeMask = 0x1;

    queue->vkd3d_queue = d3d12_device_allocate_vkd3d_queue(device,
            d3d12_device_get_vkd3d_queue_family(device, desc->Type));
    queue->submissions = NULL;
    queue->submissions_count = 0;
    queue->submissions_size = 0;
    queue->drain_count = 0;
    queue->queue_drain_count = 0;

    if ((rc = pthread_mutex_init(&queue->queue_lock, NULL)) < 0)
    {
        hr = hresult_from_errno(rc);
        goto fail;
    }

    if ((rc = pthread_cond_init(&queue->queue_cond, NULL)) < 0)
    {
        hr = hresult_from_errno(rc);
        goto fail_pthread_cond;
    }

    if (desc->Priority == D3D12_COMMAND_QUEUE_PRIORITY_GLOBAL_REALTIME)
        FIXME("Global realtime priority is not implemented.\n");
    if (desc->Priority)
        FIXME("Ignoring priority %#x.\n", desc->Priority);
    if (desc->Flags)
        FIXME("Ignoring flags %#x.\n", desc->Flags);

    if (FAILED(hr = vkd3d_private_store_init(&queue->private_store)))
        goto fail_private_store;

#ifdef VKD3D_BUILD_STANDALONE_D3D12
    if (FAILED(hr = d3d12_swapchain_factory_init(queue, &queue->swapchain_factory)))
        goto fail_swapchain_factory;
#endif

    d3d12_device_add_ref(queue->device = device);

    if (FAILED(hr = vkd3d_fence_worker_start(&queue->fence_worker, device)))
        goto fail_fence_worker_start;

    if ((rc = pthread_create(&queue->submission_thread, NULL, d3d12_command_queue_submission_worker_main, queue)) < 0)
    {
        d3d12_device_release(queue->device);
        hr = hresult_from_errno(rc);
        goto fail_pthread_create;
    }

    return S_OK;

fail_pthread_create:
    vkd3d_fence_worker_stop(&queue->fence_worker, device);
fail_fence_worker_start:;
#ifdef VKD3D_BUILD_STANDALONE_D3D12
fail_swapchain_factory:
    vkd3d_private_store_destroy(&queue->private_store);
#endif
fail_private_store:
    pthread_cond_destroy(&queue->queue_cond);
fail_pthread_cond:
    pthread_mutex_destroy(&queue->queue_lock);
fail:
    d3d12_device_unmap_vkd3d_queue(device, queue->vkd3d_queue);
    return hr;
}

HRESULT d3d12_command_queue_create(struct d3d12_device *device,
        const D3D12_COMMAND_QUEUE_DESC *desc, struct d3d12_command_queue **queue)
{
    struct d3d12_command_queue *object;
    HRESULT hr;

    if (!(object = vkd3d_calloc(1, sizeof(*object))))
        return E_OUTOFMEMORY;

    if (FAILED(hr = d3d12_command_queue_init(object, device, desc)))
    {
        vkd3d_free(object);
        return hr;
    }

    TRACE("Created command queue %p.\n", object);

    *queue = object;

    return S_OK;
}

VKD3D_EXPORT uint32_t vkd3d_get_vk_queue_family_index(ID3D12CommandQueue *queue)
{
    struct d3d12_command_queue *d3d12_queue = impl_from_ID3D12CommandQueue(queue);

    return d3d12_queue->vkd3d_queue->vk_family_index;
}

VKD3D_EXPORT VkQueue vkd3d_acquire_vk_queue(ID3D12CommandQueue *queue)
{
    struct d3d12_command_queue *d3d12_queue;
    VkQueue vk_queue;

    /* For external users of the Vulkan queue, we must ensure that the queue is drained so that submissions happen in
     * desired order. */
    VKD3D_REGION_DECL(acquire_vk_queue);
    VKD3D_REGION_BEGIN(acquire_vk_queue);
    d3d12_queue = impl_from_ID3D12CommandQueue(queue);
    d3d12_command_queue_acquire_serialized(d3d12_queue);
    vk_queue = vkd3d_queue_acquire(d3d12_queue->vkd3d_queue);
    VKD3D_REGION_END(acquire_vk_queue);

    return vk_queue;
}

VKD3D_EXPORT void vkd3d_release_vk_queue(ID3D12CommandQueue *queue)
{
    struct d3d12_command_queue *d3d12_queue = impl_from_ID3D12CommandQueue(queue);
    vkd3d_queue_release(d3d12_queue->vkd3d_queue);
    d3d12_command_queue_release_serialized(d3d12_queue);
}

VKD3D_EXPORT void vkd3d_enqueue_initial_transition(ID3D12CommandQueue *queue, ID3D12Resource *resource)
{
    struct d3d12_command_queue_submission sub;
    struct d3d12_command_queue *d3d12_queue = impl_from_ID3D12CommandQueue(queue);
    struct d3d12_resource *d3d12_resource = impl_from_ID3D12Resource(resource);

    memset(&sub, 0, sizeof(sub));
    sub.type = VKD3D_SUBMISSION_EXECUTE;
    sub.execute.transition_count = 1;
    sub.execute.transitions = vkd3d_malloc(sizeof(*sub.execute.transitions));
    sub.execute.transitions[0].type = VKD3D_INITIAL_TRANSITION_TYPE_RESOURCE;
    sub.execute.transitions[0].resource.resource = d3d12_resource;
    sub.execute.transitions[0].resource.perform_initial_transition = true;
    d3d12_command_queue_add_submission(d3d12_queue, &sub);
}

/* ID3D12CommandSignature */
static HRESULT STDMETHODCALLTYPE d3d12_command_signature_QueryInterface(ID3D12CommandSignature *iface,
        REFIID iid, void **out)
{
    TRACE("iface %p, iid %s, out %p.\n", iface, debugstr_guid(iid), out);

    if (IsEqualGUID(iid, &IID_ID3D12CommandSignature)
            || IsEqualGUID(iid, &IID_ID3D12Pageable)
            || IsEqualGUID(iid, &IID_ID3D12DeviceChild)
            || IsEqualGUID(iid, &IID_ID3D12Object)
            || IsEqualGUID(iid, &IID_IUnknown))
    {
        ID3D12CommandSignature_AddRef(iface);
        *out = iface;
        return S_OK;
    }

    WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(iid));

    *out = NULL;
    return E_NOINTERFACE;
}

static ULONG STDMETHODCALLTYPE d3d12_command_signature_AddRef(ID3D12CommandSignature *iface)
{
    struct d3d12_command_signature *signature = impl_from_ID3D12CommandSignature(iface);
    ULONG refcount = InterlockedIncrement(&signature->refcount);

    TRACE("%p increasing refcount to %u.\n", signature, refcount);

    return refcount;
}

static void d3d12_command_signature_cleanup(struct d3d12_command_signature *signature)
{
    const struct vkd3d_vk_device_procs *vk_procs = &signature->device->vk_procs;

    if (signature->device->device_info.device_generated_commands_features_nv.deviceGeneratedCommands)
    {
        VK_CALL(vkDestroyBuffer(signature->device->vk_device, signature->state_template.buffer, NULL));
        vkd3d_free_device_memory(signature->device, &signature->state_template.memory);
        VK_CALL(vkDestroyIndirectCommandsLayoutNV(signature->device->vk_device, signature->state_template.layout, NULL));
    }

    vkd3d_private_store_destroy(&signature->private_store);
    vkd3d_free((void *)signature->desc.pArgumentDescs);
    vkd3d_free(signature);
}

static ULONG STDMETHODCALLTYPE d3d12_command_signature_Release(ID3D12CommandSignature *iface)
{
    struct d3d12_command_signature *signature = impl_from_ID3D12CommandSignature(iface);
    ULONG refcount = InterlockedDecrement(&signature->refcount);

    TRACE("%p decreasing refcount to %u.\n", signature, refcount);

    if (!refcount)
    {
        struct d3d12_device *device = signature->device;
        d3d12_command_signature_cleanup(signature);
        d3d12_device_release(device);
    }

    return refcount;
}

static HRESULT STDMETHODCALLTYPE d3d12_command_signature_GetPrivateData(ID3D12CommandSignature *iface,
        REFGUID guid, UINT *data_size, void *data)
{
    struct d3d12_command_signature *signature = impl_from_ID3D12CommandSignature(iface);

    TRACE("iface %p, guid %s, data_size %p, data %p.\n", iface, debugstr_guid(guid), data_size, data);

    return vkd3d_get_private_data(&signature->private_store, guid, data_size, data);
}

static HRESULT STDMETHODCALLTYPE d3d12_command_signature_SetPrivateData(ID3D12CommandSignature *iface,
        REFGUID guid, UINT data_size, const void *data)
{
    struct d3d12_command_signature *signature = impl_from_ID3D12CommandSignature(iface);

    TRACE("iface %p, guid %s, data_size %u, data %p.\n", iface, debugstr_guid(guid), data_size, data);

    return vkd3d_set_private_data(&signature->private_store, guid, data_size, data,
            NULL, NULL);
}

static HRESULT STDMETHODCALLTYPE d3d12_command_signature_SetPrivateDataInterface(ID3D12CommandSignature *iface,
        REFGUID guid, const IUnknown *data)
{
    struct d3d12_command_signature *signature = impl_from_ID3D12CommandSignature(iface);

    TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data);

    return vkd3d_set_private_data_interface(&signature->private_store, guid, data,
            NULL, NULL);
}

static HRESULT STDMETHODCALLTYPE d3d12_command_signature_GetDevice(ID3D12CommandSignature *iface, REFIID iid, void **device)
{
    struct d3d12_command_signature *signature = impl_from_ID3D12CommandSignature(iface);

    TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device);

    return d3d12_device_query_interface(signature->device, iid, device);
}

CONST_VTBL struct ID3D12CommandSignatureVtbl d3d12_command_signature_vtbl =
{
    /* IUnknown methods */
    d3d12_command_signature_QueryInterface,
    d3d12_command_signature_AddRef,
    d3d12_command_signature_Release,
    /* ID3D12Object methods */
    d3d12_command_signature_GetPrivateData,
    d3d12_command_signature_SetPrivateData,
    d3d12_command_signature_SetPrivateDataInterface,
    (void *)d3d12_object_SetName,
    /* ID3D12DeviceChild methods */
    d3d12_command_signature_GetDevice,
};

struct vkd3d_patch_command
{
    enum vkd3d_patch_command_token token;
    uint32_t src_offset;
    uint32_t dst_offset;
};

static HRESULT d3d12_command_signature_init_patch_commands_buffer(struct d3d12_command_signature *signature,
        struct d3d12_device *device,
        const struct vkd3d_patch_command *commands, size_t command_count)
{
    const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
    D3D12_RESOURCE_DESC1 buffer_desc;
    D3D12_HEAP_PROPERTIES heap_info;
    HRESULT hr = S_OK;
    VkResult vr;
    void *ptr;

    memset(&heap_info, 0, sizeof(heap_info));
    heap_info.Type = D3D12_HEAP_TYPE_UPLOAD;
    memset(&buffer_desc, 0, sizeof(buffer_desc));
    buffer_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
    buffer_desc.Width = command_count * sizeof(struct vkd3d_patch_command);
    buffer_desc.Height = 1;
    buffer_desc.DepthOrArraySize = 1;
    buffer_desc.SampleDesc.Count = 1;
    buffer_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
    buffer_desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;

    if (FAILED(hr = vkd3d_create_buffer(device, &heap_info, D3D12_HEAP_FLAG_NONE,
            &buffer_desc, &signature->state_template.buffer)))
        return hr;

    if (FAILED(hr = vkd3d_allocate_buffer_memory(device, signature->state_template.buffer,
            VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
            &signature->state_template.memory)))
        return hr;

    signature->state_template.buffer_va = vkd3d_get_buffer_device_address(device,
            signature->state_template.buffer);

    if ((vr = VK_CALL(vkMapMemory(device->vk_device, signature->state_template.memory.vk_memory,
            0, VK_WHOLE_SIZE, 0, (void**)&ptr))))
        return hr;

    memcpy(ptr, commands, command_count * sizeof(struct vkd3d_patch_command));
    VK_CALL(vkUnmapMemory(device->vk_device, signature->state_template.memory.vk_memory));

    return hr;
}

static HRESULT d3d12_command_signature_init_indirect_commands_layout(
        struct d3d12_command_signature *signature, struct d3d12_device *device,
        const VkIndirectCommandsLayoutTokenNV *tokens, uint32_t token_count,
        uint32_t stream_stride)
{
    const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
    VkIndirectCommandsLayoutCreateInfoNV create_info;
    VkResult vr;

    create_info.sType = VK_STRUCTURE_TYPE_INDIRECT_COMMANDS_LAYOUT_CREATE_INFO_NV;
    create_info.pNext = NULL;
    create_info.flags = 0;
    create_info.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
    create_info.streamCount = 1;
    create_info.pStreamStrides = &stream_stride;
    create_info.tokenCount = token_count;
    create_info.pTokens = tokens;

    signature->state_template.stride = stream_stride;

    if (token_count > device->device_info.device_generated_commands_properties_nv.maxIndirectCommandsTokenCount)
    {
        FIXME("Token count %u is too large (max %u).\n",
                token_count, device->device_info.device_generated_commands_properties_nv.maxIndirectCommandsTokenCount);
        return E_NOTIMPL;
    }

    vr = VK_CALL(vkCreateIndirectCommandsLayoutNV(device->vk_device, &create_info, NULL, &signature->state_template.layout));
    return hresult_from_vk_result(vr);
}

static HRESULT d3d12_command_signature_allocate_stream_memory_for_list(
        struct d3d12_command_list *list,
        struct d3d12_command_signature *signature,
        uint32_t max_command_count,
        struct vkd3d_scratch_allocation *allocation)
{
    if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator,
            VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE,
            max_command_count * signature->state_template.stride,
            list->device->device_info.device_generated_commands_properties_nv.minIndirectCommandsBufferOffsetAlignment,
            ~0u, allocation))
        return E_OUTOFMEMORY;

    return S_OK;
}

static HRESULT d3d12_command_signature_allocate_preprocess_memory_for_list(
        struct d3d12_command_list *list,
        struct d3d12_command_signature *signature, VkPipeline render_pipeline,
        uint32_t max_command_count,
        struct vkd3d_scratch_allocation *allocation, VkDeviceSize *size)
{
    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
    VkGeneratedCommandsMemoryRequirementsInfoNV info;
    VkMemoryRequirements2 memory_info;
    uint32_t alignment;

    memory_info.sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2;
    memory_info.pNext = NULL;

    info.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
    info.sType = VK_STRUCTURE_TYPE_GENERATED_COMMANDS_MEMORY_REQUIREMENTS_INFO_NV;
    info.pNext = NULL;
    info.maxSequencesCount = max_command_count;
    info.pipeline = render_pipeline;
    info.indirectCommandsLayout = signature->state_template.layout;

    if (max_command_count > list->device->device_info.device_generated_commands_properties_nv.maxIndirectSequenceCount)
    {
        FIXME("max_command_count %u exceeds device limit %u.\n",
                max_command_count,
                list->device->device_info.device_generated_commands_properties_nv.maxIndirectSequenceCount);
        return E_NOTIMPL;
    }

    VK_CALL(vkGetGeneratedCommandsMemoryRequirementsNV(list->device->vk_device, &info, &memory_info));

    alignment = max(memory_info.memoryRequirements.alignment,
            list->device->device_info.device_generated_commands_properties_nv.minIndirectCommandsBufferOffsetAlignment);

    if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator,
            VKD3D_SCRATCH_POOL_KIND_INDIRECT_PREPROCESS,
            memory_info.memoryRequirements.size,
            alignment,
            memory_info.memoryRequirements.memoryTypeBits, allocation))
        return E_OUTOFMEMORY;

    /* Going to assume the memory type is okay ... It's device local after all. */
    *size = memory_info.memoryRequirements.size;
    return S_OK;
}

static HRESULT d3d12_command_signature_init_state_template(struct d3d12_command_signature *signature,
        const D3D12_COMMAND_SIGNATURE_DESC *desc,
        struct d3d12_root_signature *root_signature,
        struct d3d12_device *device)
{
    const enum vkd3d_patch_command_token *generic_u32_copy_types;
    const struct vkd3d_shader_root_parameter *root_parameter;
    const struct vkd3d_shader_root_constant *root_constant;
    struct vkd3d_patch_command *patch_commands = NULL;
    VkIndirectCommandsLayoutTokenNV *tokens = NULL;
    uint32_t required_stride_alignment = 0;
    VkIndirectCommandsLayoutTokenNV token;
    uint32_t generic_u32_copy_count;
    size_t patch_commands_count = 0;
    uint32_t required_alignment = 0;
    size_t patch_commands_size = 0;
    uint32_t root_parameter_index;
    uint32_t src_word_offset = 0;
    uint32_t stream_stride = 0;
    uint32_t dst_word_offset;
    size_t token_count = 0;
    size_t token_size = 0;
    HRESULT hr = S_OK;
    uint32_t i, j;

    /* Mostly for debug. Lets debug ring report what it is writing easily. */
    static const enum vkd3d_patch_command_token ibv_types[] =
    {
        VKD3D_PATCH_COMMAND_TOKEN_COPY_IBO_VA_LO,
        VKD3D_PATCH_COMMAND_TOKEN_COPY_IBO_VA_HI,
        VKD3D_PATCH_COMMAND_TOKEN_COPY_IBO_SIZE,
        VKD3D_PATCH_COMMAND_TOKEN_COPY_INDEX_FORMAT,
    };

    static const enum vkd3d_patch_command_token vbv_types[] =
    {
        VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_VA_LO,
        VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_VA_HI,
        VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_SIZE,
        VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_STRIDE,
    };

    static const enum vkd3d_patch_command_token draw_types[] =
    {
        VKD3D_PATCH_COMMAND_TOKEN_COPY_VERTEX_COUNT,
        VKD3D_PATCH_COMMAND_TOKEN_COPY_INSTANCE_COUNT,
        VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_VERTEX,
        VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_INSTANCE,
    };

    static const enum vkd3d_patch_command_token draw_indexed_types[] =
    {
        VKD3D_PATCH_COMMAND_TOKEN_COPY_INDEX_COUNT,
        VKD3D_PATCH_COMMAND_TOKEN_COPY_INSTANCE_COUNT,
        VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_INDEX,
        VKD3D_PATCH_COMMAND_TOKEN_COPY_VERTEX_OFFSET,
        VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_INSTANCE,
    };

    static const enum vkd3d_patch_command_token va_types[] =
    {
        VKD3D_PATCH_COMMAND_TOKEN_COPY_ROOT_VA_LO,
        VKD3D_PATCH_COMMAND_TOKEN_COPY_ROOT_VA_HI,
    };

    static const VkIndexType vk_index_types[] = { VK_INDEX_TYPE_UINT32, VK_INDEX_TYPE_UINT16 };
    static const uint32_t d3d_index_types[] = { DXGI_FORMAT_R32_UINT, DXGI_FORMAT_R16_UINT };

    if (!device->device_info.device_generated_commands_features_nv.deviceGeneratedCommands)
    {
        WARN("Device generated commands not supported, indirect state commands will be ignored.\n");
        return S_OK;
    }

    for (i = 0; i < desc->NumArgumentDescs; i++)
    {
        const D3D12_INDIRECT_ARGUMENT_DESC *argument_desc = &desc->pArgumentDescs[i];
        memset(&token, 0, sizeof(token));
        token.sType = VK_STRUCTURE_TYPE_INDIRECT_COMMANDS_LAYOUT_TOKEN_NV;
        generic_u32_copy_count = 0;
        dst_word_offset = 0;

        switch (argument_desc->Type)
        {
            case D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT:
                root_parameter_index = argument_desc->Constant.RootParameterIndex;
                root_constant = root_signature_get_32bit_constants(root_signature, root_parameter_index);
                token.tokenType = VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV;
                token.pushconstantPipelineLayout = root_signature->graphics.vk_pipeline_layout;
                token.pushconstantShaderStageFlags = root_signature->graphics.vk_push_stages;
                token.pushconstantOffset = root_constant->constant_index + argument_desc->Constant.DestOffsetIn32BitValues;
                token.pushconstantSize = argument_desc->Constant.Num32BitValuesToSet;
                token.pushconstantOffset *= sizeof(uint32_t);
                token.pushconstantSize *= sizeof(uint32_t);
                required_alignment = sizeof(uint32_t);

                stream_stride = align(stream_stride, required_alignment);
                token.offset = stream_stride;
                stream_stride += token.pushconstantSize;
                dst_word_offset = token.offset / sizeof(uint32_t);

                generic_u32_copy_count = argument_desc->Constant.Num32BitValuesToSet;
                generic_u32_copy_types = NULL;
                break;

            case D3D12_INDIRECT_ARGUMENT_TYPE_UNORDERED_ACCESS_VIEW:
            case D3D12_INDIRECT_ARGUMENT_TYPE_SHADER_RESOURCE_VIEW:
            case D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT_BUFFER_VIEW:
                root_parameter_index = argument_desc->ShaderResourceView.RootParameterIndex;
                root_parameter = root_signature_get_parameter(root_signature, root_parameter_index);
                if (!(root_signature->root_descriptor_raw_va_mask & (1ull << root_parameter_index)))
                {
                    ERR("Root parameter %u is not a raw VA. Cannot implement command signature which updates root descriptor.\n",
                            root_parameter_index);
                    hr = E_NOTIMPL;
                    goto end;
                }

                token.tokenType = VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV;
                token.pushconstantPipelineLayout = root_signature->graphics.vk_pipeline_layout;
                token.pushconstantShaderStageFlags = root_signature->graphics.vk_push_stages;
                token.pushconstantOffset = root_parameter->descriptor.raw_va_root_descriptor_index * sizeof(VkDeviceAddress);
                token.pushconstantSize = sizeof(VkDeviceAddress);
                required_alignment = sizeof(uint32_t);

                stream_stride = align(stream_stride, required_alignment);
                token.offset = stream_stride;
                stream_stride += token.pushconstantSize;
                dst_word_offset = token.offset / sizeof(uint32_t);

                /* Simply patch by copying U32s. Need to handle unaligned U32s since everything is tightly packed. */
                generic_u32_copy_count = sizeof(VkDeviceAddress) / sizeof(uint32_t);
                generic_u32_copy_types = va_types;
                break;

            case D3D12_INDIRECT_ARGUMENT_TYPE_VERTEX_BUFFER_VIEW:
                token.tokenType = VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_NV;
                token.vertexBindingUnit = argument_desc->VertexBuffer.Slot;
                token.vertexDynamicStride = VK_TRUE;

                /* If device exposes 4 byte alignment of the indirect command buffer, we can
                 * pack VA at sub-scalar alignment. */
                required_alignment = min(
                        device->device_info.device_generated_commands_properties_nv.minIndirectCommandsBufferOffsetAlignment,
                        sizeof(VkDeviceAddress));

                stream_stride = align(stream_stride, required_alignment);
                token.offset = stream_stride;
                stream_stride += sizeof(VkBindVertexBufferIndirectCommandNV);
                dst_word_offset = token.offset / sizeof(uint32_t);

                /* The VBV indirect layout is the same as DX, so just copy the U32s. */
                generic_u32_copy_count = sizeof(D3D12_VERTEX_BUFFER_VIEW) / sizeof(uint32_t);
                generic_u32_copy_types = vbv_types;
                break;

            case D3D12_INDIRECT_ARGUMENT_TYPE_INDEX_BUFFER_VIEW:
                token.tokenType = VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_NV;
                token.indexTypeCount = ARRAY_SIZE(vk_index_types);
                token.pIndexTypeValues = d3d_index_types;
                token.pIndexTypes = vk_index_types;

                /* If device exposes 4 byte alignment of the indirect command buffer, we can
                 * pack VA at sub-scalar alignment. */
                required_alignment = min(
                        device->device_info.device_generated_commands_properties_nv.minIndirectCommandsBufferOffsetAlignment,
                        sizeof(VkDeviceAddress));

                stream_stride = align(stream_stride, required_alignment);
                token.offset = stream_stride;
                stream_stride += sizeof(VkBindVertexBufferIndirectCommandNV);
                dst_word_offset = token.offset / sizeof(uint32_t);

                vkd3d_array_reserve((void**)&patch_commands, &patch_commands_size,
                        patch_commands_count + sizeof(D3D12_INDEX_BUFFER_VIEW) / sizeof(uint32_t),
                        sizeof(*patch_commands));

                for (j = 0; j < 4; j++)
                {
                    patch_commands[patch_commands_count].token = ibv_types[j];
                    patch_commands[patch_commands_count].src_offset = src_word_offset++;
                    patch_commands[patch_commands_count].dst_offset = dst_word_offset++;
                    patch_commands_count++;
                }
                break;

            case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW:
                token.tokenType = VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_NV;
                required_alignment = sizeof(uint32_t);
                stream_stride = align(stream_stride, required_alignment);
                token.offset = stream_stride;
                stream_stride += sizeof(VkDrawIndirectCommand);
                dst_word_offset = token.offset / sizeof(uint32_t);
                generic_u32_copy_count = sizeof(VkDrawIndirectCommand) / sizeof(uint32_t);
                generic_u32_copy_types = draw_types;
                break;

            case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED:
                token.tokenType = VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV;
                required_alignment = sizeof(uint32_t);
                stream_stride = align(stream_stride, required_alignment);
                token.offset = stream_stride;
                stream_stride += sizeof(VkDrawIndexedIndirectCommand);
                dst_word_offset = token.offset / sizeof(uint32_t);
                generic_u32_copy_count = sizeof(VkDrawIndexedIndirectCommand) / sizeof(uint32_t);
                generic_u32_copy_types = draw_indexed_types;
                break;

            default:
                FIXME("Unsupported token type %u.\n", argument_desc->Type);
                hr = E_NOTIMPL;
                goto end;
        }

        vkd3d_array_reserve((void**)&tokens, &token_size, token_count + 1, sizeof(*tokens));
        tokens[token_count++] = token;

        if (generic_u32_copy_count)
        {
            vkd3d_array_reserve((void**)&patch_commands, &patch_commands_size,
                    patch_commands_count + generic_u32_copy_count,
                    sizeof(*patch_commands));

            /* Simply patch by copying U32s. */
            for (j = 0; j < generic_u32_copy_count; j++, patch_commands_count++)
            {
                patch_commands[patch_commands_count].token =
                        generic_u32_copy_types ? generic_u32_copy_types[j] : VKD3D_PATCH_COMMAND_TOKEN_COPY_CONST_U32;
                patch_commands[patch_commands_count].src_offset = src_word_offset++;
                patch_commands[patch_commands_count].dst_offset = dst_word_offset++;
            }
        }

        /* Required alignment is scalar alignment rules, i.e. maximum individual alignment requirement. */
        required_stride_alignment = max(required_stride_alignment, required_alignment);
    }

    stream_stride = max(stream_stride, desc->ByteStride);
    stream_stride = align(stream_stride, required_stride_alignment);

    if (FAILED(hr = d3d12_command_signature_init_patch_commands_buffer(signature, device, patch_commands, patch_commands_count)))
        goto end;
    if (FAILED(hr = d3d12_command_signature_init_indirect_commands_layout(signature, device, tokens, token_count, stream_stride)))
        goto end;
    if (FAILED(hr = vkd3d_meta_get_execute_indirect_pipeline(&device->meta_ops, patch_commands_count, &signature->state_template.pipeline)))
        goto end;

end:
    vkd3d_free(tokens);
    vkd3d_free(patch_commands);
    return hr;
}

HRESULT d3d12_command_signature_create(struct d3d12_device *device, struct d3d12_root_signature *root_signature,
        const D3D12_COMMAND_SIGNATURE_DESC *desc,
        struct d3d12_command_signature **signature)
{
    struct d3d12_command_signature *object;
    bool requires_root_signature = false;
    bool requires_state_template = false;
    uint32_t argument_buffer_offset = 0;
    uint32_t signature_size = 0;
    bool has_action = false;
    unsigned int i;
    bool is_action;
    HRESULT hr;

    for (i = 0; i < desc->NumArgumentDescs; ++i)
    {
        const D3D12_INDIRECT_ARGUMENT_DESC *argument_desc = &desc->pArgumentDescs[i];
        is_action = false;

        switch (argument_desc->Type)
        {
            case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW:
                argument_buffer_offset = signature_size;
                signature_size += sizeof(D3D12_DRAW_ARGUMENTS);
                is_action = true;
                break;

            case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED:
                argument_buffer_offset = signature_size;
                signature_size += sizeof(D3D12_DRAW_INDEXED_ARGUMENTS);
                is_action = true;
                break;

            case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH:
                argument_buffer_offset = signature_size;
                signature_size += sizeof(D3D12_DISPATCH_ARGUMENTS);
                is_action = true;
                break;

            case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH_RAYS:
                argument_buffer_offset = signature_size;
                signature_size += sizeof(D3D12_DISPATCH_RAYS_DESC);
                is_action = true;
                break;

            case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH_MESH:
                argument_buffer_offset = signature_size;
                signature_size += sizeof(D3D12_DISPATCH_MESH_ARGUMENTS);
                is_action = true;
                break;

            case D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT:
                requires_root_signature = true;
                requires_state_template = true;
                signature_size += argument_desc->Constant.Num32BitValuesToSet * sizeof(uint32_t);
                break;

            case D3D12_INDIRECT_ARGUMENT_TYPE_SHADER_RESOURCE_VIEW:
            case D3D12_INDIRECT_ARGUMENT_TYPE_UNORDERED_ACCESS_VIEW:
            case D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT_BUFFER_VIEW:
                requires_root_signature = true;
                requires_state_template = true;
                /* The command signature payload is *not* aligned. */
                signature_size += sizeof(D3D12_GPU_VIRTUAL_ADDRESS);
                break;

            case D3D12_INDIRECT_ARGUMENT_TYPE_VERTEX_BUFFER_VIEW:
                /* The command signature payload is *not* aligned. */
                signature_size += sizeof(D3D12_VERTEX_BUFFER_VIEW);
                requires_state_template = true;
                break;

            case D3D12_INDIRECT_ARGUMENT_TYPE_INDEX_BUFFER_VIEW:
                /* The command signature payload is *not* aligned. */
                signature_size += sizeof(D3D12_INDEX_BUFFER_VIEW);
                requires_state_template = true;
                break;

            default:
                FIXME("Unsupported indirect argument type: %u.\n", argument_desc->Type);
                break;
        }

        if (is_action)
        {
            if (has_action)
            {
                ERR("Using multiple action commands per command signature is invalid.\n");
                return E_INVALIDARG;
            }

            if (i != desc->NumArgumentDescs - 1)
            {
                WARN("Action command must be the last element of a command signature.\n");
                return E_INVALIDARG;
            }

            has_action = true;
        }
    }

    if (!has_action)
    {
        ERR("Command signature must have exactly one action command.\n");
        return E_INVALIDARG;
    }

    if (desc->ByteStride < signature_size)
    {
        ERR("Command signature stride %u must be at least %u bytes.\n",
                desc->ByteStride, signature_size);
        return E_INVALIDARG;
    }

    if (requires_root_signature && !root_signature)
    {
        ERR("Command signature requires root signature, but is not provided.\n");
        return E_INVALIDARG;
    }
    else if (!requires_root_signature && root_signature)
    {
        ERR("Command signature does not require root signature, root signature must be NULL.\n");
        return E_INVALIDARG;
    }

    if (!(object = vkd3d_calloc(1, sizeof(*object))))
        return E_OUTOFMEMORY;

    object->ID3D12CommandSignature_iface.lpVtbl = &d3d12_command_signature_vtbl;
    object->refcount = 1;

    object->desc = *desc;
    if (!(object->desc.pArgumentDescs = vkd3d_calloc(desc->NumArgumentDescs, sizeof(*desc->pArgumentDescs))))
    {
        vkd3d_free(object);
        return E_OUTOFMEMORY;
    }
    memcpy((void *)object->desc.pArgumentDescs, desc->pArgumentDescs,
            desc->NumArgumentDescs * sizeof(*desc->pArgumentDescs));

    if (FAILED(hr = vkd3d_private_store_init(&object->private_store)))
        goto err;

    if ((object->requires_state_template = requires_state_template))
    {
        if (!device->device_info.device_generated_commands_features_nv.deviceGeneratedCommands)
        {
            FIXME("VK_NV_device_generated_commands is not supported by implementation.\n");
            hr = E_NOTIMPL;
            goto err;
        }

        if (FAILED(hr = d3d12_command_signature_init_state_template(object, desc, root_signature, device)))
            goto err;
    }
    else
        object->argument_buffer_offset = argument_buffer_offset;

    d3d12_device_add_ref(object->device = device);

    TRACE("Created command signature %p.\n", object);

    *signature = object;

    return S_OK;

err:
    vkd3d_free((void *)object->desc.pArgumentDescs);
    vkd3d_free(object);
    return hr;
}