494 lines
16 KiB
C
494 lines
16 KiB
C
/*
|
|
* Copyright 2022 Google LLC
|
|
* SPDX-License-Identifier: MIT
|
|
*/
|
|
|
|
#include "vn_feedback.h"
|
|
|
|
#include "vn_device.h"
|
|
#include "vn_physical_device.h"
|
|
#include "vn_queue.h"
|
|
|
|
/* coherent buffer with bound and mapped memory */
|
|
struct vn_feedback_buffer {
|
|
VkBuffer buffer;
|
|
VkDeviceMemory memory;
|
|
void *data;
|
|
|
|
struct list_head head;
|
|
};
|
|
|
|
static uint32_t
|
|
vn_get_memory_type_index(const VkPhysicalDeviceMemoryProperties *mem_props,
|
|
uint32_t mem_type_bits,
|
|
VkMemoryPropertyFlags required_mem_flags)
|
|
{
|
|
u_foreach_bit(mem_type_index, mem_type_bits)
|
|
{
|
|
assert(mem_type_index < mem_props->memoryTypeCount);
|
|
if ((mem_props->memoryTypes[mem_type_index].propertyFlags &
|
|
required_mem_flags) == required_mem_flags)
|
|
return mem_type_index;
|
|
}
|
|
|
|
return UINT32_MAX;
|
|
}
|
|
|
|
static VkResult
|
|
vn_feedback_buffer_create(struct vn_device *dev,
|
|
uint32_t size,
|
|
const VkAllocationCallbacks *alloc,
|
|
struct vn_feedback_buffer **out_feedback_buf)
|
|
{
|
|
const bool exclusive = dev->queue_family_count == 1;
|
|
const VkPhysicalDeviceMemoryProperties *mem_props =
|
|
&dev->physical_device->memory_properties.memoryProperties;
|
|
VkDevice dev_handle = vn_device_to_handle(dev);
|
|
struct vn_feedback_buffer *feedback_buf;
|
|
VkResult result;
|
|
|
|
feedback_buf = vk_zalloc(alloc, sizeof(*feedback_buf), VN_DEFAULT_ALIGN,
|
|
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
|
if (!feedback_buf)
|
|
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
|
|
|
/* use concurrent to avoid explicit queue family ownership transfer for
|
|
* device created with queues from multiple queue families
|
|
*/
|
|
const VkBufferCreateInfo buf_create_info = {
|
|
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
|
.size = size,
|
|
.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT,
|
|
.sharingMode =
|
|
exclusive ? VK_SHARING_MODE_EXCLUSIVE : VK_SHARING_MODE_CONCURRENT,
|
|
/* below favors the current venus protocol */
|
|
.queueFamilyIndexCount = exclusive ? 0 : dev->queue_family_count,
|
|
.pQueueFamilyIndices = exclusive ? NULL : dev->queue_families,
|
|
};
|
|
result = vn_CreateBuffer(dev_handle, &buf_create_info, alloc,
|
|
&feedback_buf->buffer);
|
|
if (result != VK_SUCCESS)
|
|
goto out_free_feedback_buf;
|
|
|
|
struct vn_buffer *buf = vn_buffer_from_handle(feedback_buf->buffer);
|
|
const VkMemoryRequirements *mem_req =
|
|
&buf->requirements.memory.memoryRequirements;
|
|
const uint32_t mem_type_index =
|
|
vn_get_memory_type_index(mem_props, mem_req->memoryTypeBits,
|
|
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
|
|
if (mem_type_index >= mem_props->memoryTypeCount) {
|
|
result = VK_ERROR_INITIALIZATION_FAILED;
|
|
goto out_destroy_buffer;
|
|
}
|
|
|
|
const VkMemoryAllocateInfo mem_alloc_info = {
|
|
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
|
.allocationSize = mem_req->size,
|
|
.memoryTypeIndex = mem_type_index,
|
|
};
|
|
result = vn_AllocateMemory(dev_handle, &mem_alloc_info, alloc,
|
|
&feedback_buf->memory);
|
|
if (result != VK_SUCCESS)
|
|
goto out_destroy_buffer;
|
|
|
|
const VkBindBufferMemoryInfo bind_info = {
|
|
.sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
|
|
.buffer = feedback_buf->buffer,
|
|
.memory = feedback_buf->memory,
|
|
.memoryOffset = 0,
|
|
};
|
|
result = vn_BindBufferMemory2(dev_handle, 1, &bind_info);
|
|
if (result != VK_SUCCESS)
|
|
goto out_free_memory;
|
|
|
|
result = vn_MapMemory(dev_handle, feedback_buf->memory, 0, VK_WHOLE_SIZE,
|
|
0, &feedback_buf->data);
|
|
if (result != VK_SUCCESS)
|
|
goto out_free_memory;
|
|
|
|
*out_feedback_buf = feedback_buf;
|
|
|
|
return VK_SUCCESS;
|
|
|
|
out_free_memory:
|
|
vn_FreeMemory(dev_handle, feedback_buf->memory, alloc);
|
|
|
|
out_destroy_buffer:
|
|
vn_DestroyBuffer(dev_handle, feedback_buf->buffer, alloc);
|
|
|
|
out_free_feedback_buf:
|
|
vk_free(alloc, feedback_buf);
|
|
|
|
return result;
|
|
}
|
|
|
|
static void
|
|
vn_feedback_buffer_destroy(struct vn_device *dev,
|
|
struct vn_feedback_buffer *feedback_buf,
|
|
const VkAllocationCallbacks *alloc)
|
|
{
|
|
VkDevice dev_handle = vn_device_to_handle(dev);
|
|
|
|
vn_UnmapMemory(dev_handle, feedback_buf->memory);
|
|
vn_FreeMemory(dev_handle, feedback_buf->memory, alloc);
|
|
vn_DestroyBuffer(dev_handle, feedback_buf->buffer, alloc);
|
|
vk_free(alloc, feedback_buf);
|
|
}
|
|
|
|
static VkResult
|
|
vn_feedback_pool_grow_locked(struct vn_feedback_pool *pool)
|
|
{
|
|
VN_TRACE_FUNC();
|
|
struct vn_feedback_buffer *feedback_buf = NULL;
|
|
VkResult result;
|
|
|
|
result = vn_feedback_buffer_create(pool->device, pool->size, pool->alloc,
|
|
&feedback_buf);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
pool->used = 0;
|
|
|
|
list_add(&feedback_buf->head, &pool->feedback_buffers);
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
VkResult
|
|
vn_feedback_pool_init(struct vn_device *dev,
|
|
struct vn_feedback_pool *pool,
|
|
uint32_t size,
|
|
const VkAllocationCallbacks *alloc)
|
|
{
|
|
simple_mtx_init(&pool->mutex, mtx_plain);
|
|
|
|
pool->device = dev;
|
|
pool->alloc = alloc;
|
|
pool->size = size;
|
|
pool->used = size;
|
|
list_inithead(&pool->feedback_buffers);
|
|
list_inithead(&pool->free_slots);
|
|
|
|
/* no lock needed upon init */
|
|
return vn_feedback_pool_grow_locked(pool);
|
|
}
|
|
|
|
void
|
|
vn_feedback_pool_fini(struct vn_feedback_pool *pool)
|
|
{
|
|
list_for_each_entry_safe(struct vn_feedback_slot, slot, &pool->free_slots,
|
|
head)
|
|
vk_free(pool->alloc, slot);
|
|
|
|
list_for_each_entry_safe(struct vn_feedback_buffer, feedback_buf,
|
|
&pool->feedback_buffers, head)
|
|
vn_feedback_buffer_destroy(pool->device, feedback_buf, pool->alloc);
|
|
|
|
simple_mtx_destroy(&pool->mutex);
|
|
}
|
|
|
|
static struct vn_feedback_buffer *
|
|
vn_feedback_pool_alloc_locked(struct vn_feedback_pool *pool,
|
|
uint32_t size,
|
|
uint32_t *out_offset)
|
|
{
|
|
VN_TRACE_FUNC();
|
|
const uint32_t aligned_size = align(size, 4);
|
|
|
|
if (unlikely(aligned_size > pool->size - pool->used)) {
|
|
VkResult result = vn_feedback_pool_grow_locked(pool);
|
|
if (result != VK_SUCCESS)
|
|
return NULL;
|
|
|
|
assert(aligned_size <= pool->size - pool->used);
|
|
}
|
|
|
|
*out_offset = pool->used;
|
|
pool->used += aligned_size;
|
|
|
|
return list_first_entry(&pool->feedback_buffers, struct vn_feedback_buffer,
|
|
head);
|
|
}
|
|
|
|
struct vn_feedback_slot *
|
|
vn_feedback_pool_alloc(struct vn_feedback_pool *pool,
|
|
enum vn_feedback_type type)
|
|
{
|
|
/* TODO Make slot size variable for VkQueryPool feedback. Currently it's
|
|
* MAX2(sizeof(VkResult), sizeof(uint64_t)).
|
|
*/
|
|
static const uint32_t slot_size = 8;
|
|
struct vn_feedback_buffer *feedback_buf;
|
|
uint32_t offset;
|
|
struct vn_feedback_slot *slot;
|
|
|
|
simple_mtx_lock(&pool->mutex);
|
|
if (!list_is_empty(&pool->free_slots)) {
|
|
slot =
|
|
list_first_entry(&pool->free_slots, struct vn_feedback_slot, head);
|
|
list_del(&slot->head);
|
|
simple_mtx_unlock(&pool->mutex);
|
|
|
|
slot->type = type;
|
|
return slot;
|
|
}
|
|
|
|
slot = vk_alloc(pool->alloc, sizeof(*slot), VN_DEFAULT_ALIGN,
|
|
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
|
if (!slot) {
|
|
simple_mtx_unlock(&pool->mutex);
|
|
return NULL;
|
|
}
|
|
|
|
feedback_buf = vn_feedback_pool_alloc_locked(pool, slot_size, &offset);
|
|
simple_mtx_unlock(&pool->mutex);
|
|
|
|
if (!feedback_buf) {
|
|
vk_free(pool->alloc, slot);
|
|
return NULL;
|
|
}
|
|
|
|
slot->type = type;
|
|
slot->offset = offset;
|
|
slot->buffer = feedback_buf->buffer;
|
|
slot->data = feedback_buf->data + offset;
|
|
|
|
return slot;
|
|
}
|
|
|
|
void
|
|
vn_feedback_pool_free(struct vn_feedback_pool *pool,
|
|
struct vn_feedback_slot *slot)
|
|
{
|
|
simple_mtx_lock(&pool->mutex);
|
|
list_add(&slot->head, &pool->free_slots);
|
|
simple_mtx_unlock(&pool->mutex);
|
|
}
|
|
|
|
void
|
|
vn_feedback_event_cmd_record(VkCommandBuffer cmd_handle,
|
|
VkEvent ev_handle,
|
|
VkPipelineStageFlags stage_mask,
|
|
VkResult status)
|
|
{
|
|
/* For vkCmdSetEvent and vkCmdResetEvent feedback interception.
|
|
*
|
|
* The injection point is after the event call to avoid introducing
|
|
* unexpected src stage waiting for VK_PIPELINE_STAGE_HOST_BIT and
|
|
* VK_PIPELINE_STAGE_TRANSFER_BIT if they are not already being waited by
|
|
* vkCmdSetEvent or vkCmdResetEvent. On the other hand, the delay in the
|
|
* feedback signal is acceptable for the nature of VkEvent, and the event
|
|
* feedback cmds lifecycle is guarded by the intercepted command buffer.
|
|
*/
|
|
struct vn_event *ev = vn_event_from_handle(ev_handle);
|
|
struct vn_feedback_slot *slot = ev->feedback_slot;
|
|
|
|
if (!slot)
|
|
return;
|
|
|
|
STATIC_ASSERT(sizeof(*slot->status) == 4);
|
|
|
|
const VkBufferMemoryBarrier buf_barrier_before = {
|
|
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
|
|
.pNext = NULL,
|
|
.srcAccessMask =
|
|
VK_ACCESS_HOST_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
|
|
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
|
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
|
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
|
.buffer = slot->buffer,
|
|
.offset = slot->offset,
|
|
.size = 4,
|
|
};
|
|
vn_CmdPipelineBarrier(cmd_handle,
|
|
stage_mask | VK_PIPELINE_STAGE_HOST_BIT |
|
|
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
|
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1,
|
|
&buf_barrier_before, 0, NULL);
|
|
vn_CmdFillBuffer(cmd_handle, slot->buffer, slot->offset, 4, status);
|
|
|
|
const VkBufferMemoryBarrier buf_barrier_after = {
|
|
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
|
|
.pNext = NULL,
|
|
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
|
.dstAccessMask = VK_ACCESS_HOST_READ_BIT | VK_ACCESS_HOST_WRITE_BIT,
|
|
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
|
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
|
.buffer = slot->buffer,
|
|
.offset = slot->offset,
|
|
.size = 4,
|
|
};
|
|
vn_CmdPipelineBarrier(cmd_handle, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
|
VK_PIPELINE_STAGE_HOST_BIT, 0, 0, NULL, 1,
|
|
&buf_barrier_after, 0, NULL);
|
|
}
|
|
|
|
static VkResult
|
|
vn_feedback_fence_cmd_record(VkCommandBuffer cmd_handle,
|
|
struct vn_feedback_slot *slot)
|
|
|
|
{
|
|
STATIC_ASSERT(sizeof(*slot->status) == 4);
|
|
|
|
static const VkCommandBufferBeginInfo begin_info = {
|
|
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
|
|
.pNext = NULL,
|
|
.flags = 0,
|
|
.pInheritanceInfo = NULL,
|
|
};
|
|
VkResult result = vn_BeginCommandBuffer(cmd_handle, &begin_info);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
static const VkMemoryBarrier mem_barrier_before = {
|
|
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
|
.pNext = NULL,
|
|
/* make pending writes available to stay close to fence signal op */
|
|
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
|
|
/* no need to make all memory visible for feedback update */
|
|
.dstAccessMask = 0,
|
|
};
|
|
const VkBufferMemoryBarrier buf_barrier_before = {
|
|
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
|
|
.pNext = NULL,
|
|
/* slot memory has been made available via mem_barrier_before */
|
|
.srcAccessMask = 0,
|
|
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
|
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
|
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
|
.buffer = slot->buffer,
|
|
.offset = slot->offset,
|
|
.size = 4,
|
|
};
|
|
vn_CmdPipelineBarrier(cmd_handle, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
|
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 1,
|
|
&mem_barrier_before, 1, &buf_barrier_before, 0,
|
|
NULL);
|
|
vn_CmdFillBuffer(cmd_handle, slot->buffer, slot->offset, 4, VK_SUCCESS);
|
|
|
|
const VkBufferMemoryBarrier buf_barrier_after = {
|
|
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
|
|
.pNext = NULL,
|
|
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
|
.dstAccessMask = VK_ACCESS_HOST_READ_BIT | VK_ACCESS_HOST_WRITE_BIT,
|
|
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
|
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
|
.buffer = slot->buffer,
|
|
.offset = slot->offset,
|
|
.size = 4,
|
|
};
|
|
vn_CmdPipelineBarrier(cmd_handle, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
|
VK_PIPELINE_STAGE_HOST_BIT, 0, 0, NULL, 1,
|
|
&buf_barrier_after, 0, NULL);
|
|
|
|
return vn_EndCommandBuffer(cmd_handle);
|
|
}
|
|
|
|
VkResult
|
|
vn_feedback_fence_cmd_alloc(VkDevice dev_handle,
|
|
struct vn_feedback_cmd_pool *pool,
|
|
struct vn_feedback_slot *slot,
|
|
VkCommandBuffer *out_cmd_handle)
|
|
{
|
|
const VkCommandBufferAllocateInfo info = {
|
|
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
|
|
.pNext = NULL,
|
|
.commandPool = pool->pool,
|
|
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
|
|
.commandBufferCount = 1,
|
|
};
|
|
VkCommandBuffer cmd_handle;
|
|
VkResult result;
|
|
|
|
simple_mtx_lock(&pool->mutex);
|
|
result = vn_AllocateCommandBuffers(dev_handle, &info, &cmd_handle);
|
|
if (result != VK_SUCCESS)
|
|
goto out_unlock;
|
|
|
|
result = vn_feedback_fence_cmd_record(cmd_handle, slot);
|
|
if (result != VK_SUCCESS) {
|
|
vn_FreeCommandBuffers(dev_handle, pool->pool, 1, &cmd_handle);
|
|
goto out_unlock;
|
|
}
|
|
|
|
*out_cmd_handle = cmd_handle;
|
|
|
|
out_unlock:
|
|
simple_mtx_unlock(&pool->mutex);
|
|
|
|
return result;
|
|
}
|
|
|
|
void
|
|
vn_feedback_fence_cmd_free(VkDevice dev_handle,
|
|
struct vn_feedback_cmd_pool *pool,
|
|
VkCommandBuffer cmd_handle)
|
|
{
|
|
simple_mtx_lock(&pool->mutex);
|
|
vn_FreeCommandBuffers(dev_handle, pool->pool, 1, &cmd_handle);
|
|
simple_mtx_unlock(&pool->mutex);
|
|
}
|
|
|
|
VkResult
|
|
vn_feedback_cmd_pools_init(struct vn_device *dev)
|
|
{
|
|
const VkAllocationCallbacks *alloc = &dev->base.base.alloc;
|
|
VkDevice dev_handle = vn_device_to_handle(dev);
|
|
struct vn_feedback_cmd_pool *pools;
|
|
VkCommandPoolCreateInfo info = {
|
|
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
|
|
.pNext = NULL,
|
|
.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
|
|
};
|
|
|
|
/* TODO will also condition on timeline semaphore feedback */
|
|
if (VN_PERF(NO_FENCE_FEEDBACK))
|
|
return VK_SUCCESS;
|
|
|
|
assert(dev->queue_family_count);
|
|
|
|
pools = vk_zalloc(alloc, sizeof(*pools) * dev->queue_family_count,
|
|
VN_DEFAULT_ALIGN, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
|
if (!pools)
|
|
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
|
|
|
for (uint32_t i = 0; i < dev->queue_family_count; i++) {
|
|
VkResult result;
|
|
|
|
info.queueFamilyIndex = dev->queue_families[i];
|
|
result = vn_CreateCommandPool(dev_handle, &info, alloc, &pools[i].pool);
|
|
if (result != VK_SUCCESS) {
|
|
for (uint32_t j = 0; j < i; j++) {
|
|
vn_DestroyCommandPool(dev_handle, pools[j].pool, alloc);
|
|
simple_mtx_destroy(&pools[j].mutex);
|
|
}
|
|
|
|
vk_free(alloc, pools);
|
|
return result;
|
|
}
|
|
|
|
simple_mtx_init(&pools[i].mutex, mtx_plain);
|
|
}
|
|
|
|
dev->cmd_pools = pools;
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
void
|
|
vn_feedback_cmd_pools_fini(struct vn_device *dev)
|
|
{
|
|
const VkAllocationCallbacks *alloc = &dev->base.base.alloc;
|
|
VkDevice dev_handle = vn_device_to_handle(dev);
|
|
|
|
if (!dev->cmd_pools)
|
|
return;
|
|
|
|
for (uint32_t i = 0; i < dev->queue_family_count; i++) {
|
|
vn_DestroyCommandPool(dev_handle, dev->cmd_pools[i].pool, alloc);
|
|
simple_mtx_destroy(&dev->cmd_pools[i].mutex);
|
|
}
|
|
|
|
vk_free(alloc, dev->cmd_pools);
|
|
}
|