/* * Copyright 2022 Google LLC * SPDX-License-Identifier: MIT */ #include "vn_feedback.h" #include "vn_command_buffer.h" #include "vn_device.h" #include "vn_physical_device.h" #include "vn_query_pool.h" #include "vn_queue.h" static uint32_t vn_get_memory_type_index(const VkPhysicalDeviceMemoryProperties *mem_props, uint32_t mem_type_bits, VkMemoryPropertyFlags required_mem_flags) { u_foreach_bit(mem_type_index, mem_type_bits) { assert(mem_type_index < mem_props->memoryTypeCount); if ((mem_props->memoryTypes[mem_type_index].propertyFlags & required_mem_flags) == required_mem_flags) return mem_type_index; } return UINT32_MAX; } VkResult vn_feedback_buffer_create(struct vn_device *dev, uint32_t size, const VkAllocationCallbacks *alloc, struct vn_feedback_buffer **out_fb_buf) { const bool exclusive = dev->queue_family_count == 1; const VkPhysicalDeviceMemoryProperties *mem_props = &dev->physical_device->memory_properties; VkDevice dev_handle = vn_device_to_handle(dev); VkResult result; struct vn_feedback_buffer *fb_buf = vk_zalloc(alloc, sizeof(*fb_buf), VN_DEFAULT_ALIGN, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (!fb_buf) return VK_ERROR_OUT_OF_HOST_MEMORY; /* use concurrent to avoid explicit queue family ownership transfer for * device created with queues from multiple queue families */ const VkBufferCreateInfo buf_create_info = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .size = size, /* Feedback for fences and timeline semaphores will write to this buffer * as a DST when signalling. Timeline semaphore feedback will also read * from this buffer as a SRC to retrieve the counter value to signal. */ .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, .sharingMode = exclusive ? VK_SHARING_MODE_EXCLUSIVE : VK_SHARING_MODE_CONCURRENT, /* below favors the current venus protocol */ .queueFamilyIndexCount = exclusive ? 0 : dev->queue_family_count, .pQueueFamilyIndices = exclusive ? NULL : dev->queue_families, }; result = vn_CreateBuffer(dev_handle, &buf_create_info, alloc, &fb_buf->buf_handle); if (result != VK_SUCCESS) goto out_free_feedback_buffer; struct vn_buffer *buf = vn_buffer_from_handle(fb_buf->buf_handle); const VkMemoryRequirements *mem_req = &buf->requirements.memory.memoryRequirements; const uint32_t mem_type_index = vn_get_memory_type_index(mem_props, mem_req->memoryTypeBits, VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); if (mem_type_index >= mem_props->memoryTypeCount) { result = VK_ERROR_INITIALIZATION_FAILED; goto out_destroy_buffer; } const VkMemoryAllocateInfo mem_alloc_info = { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .allocationSize = mem_req->size, .memoryTypeIndex = mem_type_index, }; result = vn_AllocateMemory(dev_handle, &mem_alloc_info, alloc, &fb_buf->mem_handle); if (result != VK_SUCCESS) goto out_destroy_buffer; const VkBindBufferMemoryInfo bind_info = { .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO, .buffer = fb_buf->buf_handle, .memory = fb_buf->mem_handle, .memoryOffset = 0, }; result = vn_BindBufferMemory2(dev_handle, 1, &bind_info); if (result != VK_SUCCESS) goto out_free_memory; result = vn_MapMemory(dev_handle, fb_buf->mem_handle, 0, VK_WHOLE_SIZE, 0, &fb_buf->data); if (result != VK_SUCCESS) goto out_free_memory; *out_fb_buf = fb_buf; return VK_SUCCESS; out_free_memory: vn_FreeMemory(dev_handle, fb_buf->mem_handle, alloc); out_destroy_buffer: vn_DestroyBuffer(dev_handle, fb_buf->buf_handle, alloc); out_free_feedback_buffer: vk_free(alloc, fb_buf); return result; } void vn_feedback_buffer_destroy(struct vn_device *dev, struct vn_feedback_buffer *fb_buf, const VkAllocationCallbacks *alloc) { VkDevice dev_handle = vn_device_to_handle(dev); vn_UnmapMemory(dev_handle, fb_buf->mem_handle); vn_FreeMemory(dev_handle, fb_buf->mem_handle, alloc); vn_DestroyBuffer(dev_handle, fb_buf->buf_handle, alloc); vk_free(alloc, fb_buf); } static inline uint32_t vn_get_feedback_buffer_alignment(struct vn_feedback_buffer *fb_buf) { struct vn_buffer *buf = vn_buffer_from_handle(fb_buf->buf_handle); return buf->requirements.memory.memoryRequirements.alignment; } static VkResult vn_feedback_pool_grow_locked(struct vn_feedback_pool *pool) { VN_TRACE_FUNC(); struct vn_feedback_buffer *fb_buf = NULL; VkResult result; result = vn_feedback_buffer_create(pool->dev, pool->size, pool->alloc, &fb_buf); if (result != VK_SUCCESS) return result; pool->used = 0; pool->alignment = vn_get_feedback_buffer_alignment(fb_buf); list_add(&fb_buf->head, &pool->fb_bufs); return VK_SUCCESS; } VkResult vn_feedback_pool_init(struct vn_device *dev, struct vn_feedback_pool *pool, uint32_t size, const VkAllocationCallbacks *alloc) { simple_mtx_init(&pool->mutex, mtx_plain); pool->dev = dev; pool->alloc = alloc; pool->size = size; pool->used = size; pool->alignment = 1; list_inithead(&pool->fb_bufs); list_inithead(&pool->free_slots); return VK_SUCCESS; } void vn_feedback_pool_fini(struct vn_feedback_pool *pool) { list_for_each_entry_safe(struct vn_feedback_slot, slot, &pool->free_slots, head) vk_free(pool->alloc, slot); list_for_each_entry_safe(struct vn_feedback_buffer, fb_buf, &pool->fb_bufs, head) vn_feedback_buffer_destroy(pool->dev, fb_buf, pool->alloc); simple_mtx_destroy(&pool->mutex); } static struct vn_feedback_buffer * vn_feedback_pool_alloc_locked(struct vn_feedback_pool *pool, uint32_t size, uint32_t *out_offset) { /* Default values of pool->used and pool->alignment are used to trigger the * initial pool grow, and will be properly initialized after that. */ if (unlikely(align(size, pool->alignment) > pool->size - pool->used)) { VkResult result = vn_feedback_pool_grow_locked(pool); if (result != VK_SUCCESS) return NULL; assert(align(size, pool->alignment) <= pool->size - pool->used); } *out_offset = pool->used; pool->used += align(size, pool->alignment); return list_first_entry(&pool->fb_bufs, struct vn_feedback_buffer, head); } struct vn_feedback_slot * vn_feedback_pool_alloc(struct vn_feedback_pool *pool, enum vn_feedback_type type) { static const uint32_t slot_size = 8; struct vn_feedback_buffer *fb_buf; uint32_t offset; struct vn_feedback_slot *slot; simple_mtx_lock(&pool->mutex); if (!list_is_empty(&pool->free_slots)) { slot = list_first_entry(&pool->free_slots, struct vn_feedback_slot, head); list_del(&slot->head); simple_mtx_unlock(&pool->mutex); slot->type = type; return slot; } slot = vk_alloc(pool->alloc, sizeof(*slot), VN_DEFAULT_ALIGN, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (!slot) { simple_mtx_unlock(&pool->mutex); return NULL; } fb_buf = vn_feedback_pool_alloc_locked(pool, slot_size, &offset); simple_mtx_unlock(&pool->mutex); if (!fb_buf) { vk_free(pool->alloc, slot); return NULL; } slot->type = type; slot->offset = offset; slot->buf_handle = fb_buf->buf_handle; slot->data = fb_buf->data + offset; return slot; } void vn_feedback_pool_free(struct vn_feedback_pool *pool, struct vn_feedback_slot *slot) { simple_mtx_lock(&pool->mutex); list_add(&slot->head, &pool->free_slots); simple_mtx_unlock(&pool->mutex); } static inline bool mask_is_32bit(uint64_t x) { return (x & 0xffffffff00000000) == 0; } static void vn_build_buffer_memory_barrier(const VkDependencyInfo *dep_info, VkBufferMemoryBarrier *barrier1, VkPipelineStageFlags *src_stage_mask, VkPipelineStageFlags *dst_stage_mask) { assert(dep_info->pNext == NULL); assert(dep_info->memoryBarrierCount == 0); assert(dep_info->bufferMemoryBarrierCount == 1); assert(dep_info->imageMemoryBarrierCount == 0); const VkBufferMemoryBarrier2 *barrier2 = &dep_info->pBufferMemoryBarriers[0]; assert(barrier2->pNext == NULL); assert(mask_is_32bit(barrier2->srcStageMask)); assert(mask_is_32bit(barrier2->srcAccessMask)); assert(mask_is_32bit(barrier2->dstStageMask)); assert(mask_is_32bit(barrier2->dstAccessMask)); *barrier1 = (VkBufferMemoryBarrier){ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, .pNext = NULL, .srcAccessMask = barrier2->srcAccessMask, .dstAccessMask = barrier2->dstAccessMask, .srcQueueFamilyIndex = barrier2->srcQueueFamilyIndex, .dstQueueFamilyIndex = barrier2->dstQueueFamilyIndex, .buffer = barrier2->buffer, .offset = barrier2->offset, .size = barrier2->size, }; *src_stage_mask = barrier2->srcStageMask; *dst_stage_mask = barrier2->dstStageMask; } static void vn_cmd_buffer_memory_barrier(VkCommandBuffer cmd_handle, const VkDependencyInfo *dep_info, bool sync2) { if (sync2) vn_CmdPipelineBarrier2(cmd_handle, dep_info); else { VkBufferMemoryBarrier barrier1; VkPipelineStageFlags src_stage_mask; VkPipelineStageFlags dst_stage_mask; vn_build_buffer_memory_barrier(dep_info, &barrier1, &src_stage_mask, &dst_stage_mask); vn_CmdPipelineBarrier(cmd_handle, src_stage_mask, dst_stage_mask, dep_info->dependencyFlags, 0, NULL, 1, &barrier1, 0, NULL); } } void vn_event_feedback_cmd_record(VkCommandBuffer cmd_handle, VkEvent ev_handle, VkPipelineStageFlags2 src_stage_mask, VkResult status, bool sync2) { /* For vkCmdSetEvent and vkCmdResetEvent feedback interception. * * The injection point is after the event call to avoid introducing * unexpected src stage waiting for VK_PIPELINE_STAGE_HOST_BIT and * VK_PIPELINE_STAGE_TRANSFER_BIT if they are not already being waited by * vkCmdSetEvent or vkCmdResetEvent. On the other hand, the delay in the * feedback signal is acceptable for the nature of VkEvent, and the event * feedback cmds lifecycle is guarded by the intercepted command buffer. */ struct vn_event *ev = vn_event_from_handle(ev_handle); struct vn_feedback_slot *slot = ev->feedback_slot; if (!slot) return; STATIC_ASSERT(sizeof(*slot->status) == 4); const VkDependencyInfo dep_before = { .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, .dependencyFlags = 0, .bufferMemoryBarrierCount = 1, .pBufferMemoryBarriers = (VkBufferMemoryBarrier2[]){ { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, .srcStageMask = src_stage_mask | VK_PIPELINE_STAGE_HOST_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT, .srcAccessMask = VK_ACCESS_HOST_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, .dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT, .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .buffer = slot->buf_handle, .offset = slot->offset, .size = 4, }, }, }; vn_cmd_buffer_memory_barrier(cmd_handle, &dep_before, sync2); vn_CmdFillBuffer(cmd_handle, slot->buf_handle, slot->offset, 4, status); const VkDependencyInfo dep_after = { .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, .dependencyFlags = 0, .bufferMemoryBarrierCount = 1, .pBufferMemoryBarriers = (VkBufferMemoryBarrier2[]){ { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, .srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT, .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, .dstStageMask = VK_PIPELINE_STAGE_HOST_BIT, .dstAccessMask = VK_ACCESS_HOST_READ_BIT | VK_ACCESS_HOST_WRITE_BIT, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .buffer = slot->buf_handle, .offset = slot->offset, .size = 4, }, }, }; vn_cmd_buffer_memory_barrier(cmd_handle, &dep_after, sync2); } static inline void vn_feedback_cmd_record_flush_barrier(VkCommandBuffer cmd_handle, VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size) { const VkBufferMemoryBarrier buf_flush_barrier = { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, .pNext = NULL, .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, .dstAccessMask = VK_ACCESS_HOST_READ_BIT | VK_ACCESS_HOST_WRITE_BIT, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .buffer = buffer, .offset = offset, .size = size, }; vn_CmdPipelineBarrier(cmd_handle, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0, 0, NULL, 1, &buf_flush_barrier, 0, NULL); } static VkResult vn_feedback_cmd_record(VkCommandBuffer cmd_handle, struct vn_feedback_slot *dst_slot, struct vn_feedback_slot *src_slot) { STATIC_ASSERT(sizeof(*dst_slot->status) == 4); STATIC_ASSERT(sizeof(*dst_slot->counter) == 8); STATIC_ASSERT(sizeof(*src_slot->counter) == 8); /* slot size is 8 bytes for timeline semaphore and 4 bytes fence. * src slot is non-null for timeline semaphore. */ const VkDeviceSize buf_size = src_slot ? 8 : 4; static const VkCommandBufferBeginInfo begin_info = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, .pNext = NULL, .flags = 0, .pInheritanceInfo = NULL, }; VkResult result = vn_BeginCommandBuffer(cmd_handle, &begin_info); if (result != VK_SUCCESS) return result; static const VkMemoryBarrier mem_barrier_before = { .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, .pNext = NULL, /* make pending writes available to stay close to signal op */ .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, /* no need to make all memory visible for feedback update */ .dstAccessMask = 0, }; const VkBufferMemoryBarrier buf_barrier_before = { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, .pNext = NULL, /* slot memory has been made available via mem_barrier_before */ .srcAccessMask = 0, .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .buffer = dst_slot->buf_handle, .offset = dst_slot->offset, .size = buf_size, }; /* host writes for src_slots should implicitly be made visible upon * QueueSubmit call */ vn_CmdPipelineBarrier(cmd_handle, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 1, &mem_barrier_before, 1, &buf_barrier_before, 0, NULL); /* If passed a src_slot, timeline semaphore feedback records a * cmd to copy the counter value from the src slot to the dst slot. * If src_slot is NULL, then fence feedback records a cmd to fill * the dst slot with VK_SUCCESS. */ if (src_slot) { assert(src_slot->type == VN_FEEDBACK_TYPE_SEMAPHORE); assert(dst_slot->type == VN_FEEDBACK_TYPE_SEMAPHORE); const VkBufferCopy buffer_copy = { .srcOffset = src_slot->offset, .dstOffset = dst_slot->offset, .size = buf_size, }; vn_CmdCopyBuffer(cmd_handle, src_slot->buf_handle, dst_slot->buf_handle, 1, &buffer_copy); } else { assert(dst_slot->type == VN_FEEDBACK_TYPE_FENCE); vn_CmdFillBuffer(cmd_handle, dst_slot->buf_handle, dst_slot->offset, buf_size, VK_SUCCESS); } vn_feedback_cmd_record_flush_barrier(cmd_handle, dst_slot->buf_handle, dst_slot->offset, buf_size); return vn_EndCommandBuffer(cmd_handle); } struct vn_semaphore_feedback_cmd * vn_semaphore_feedback_cmd_alloc(struct vn_device *dev, struct vn_feedback_slot *dst_slot) { const VkAllocationCallbacks *alloc = &dev->base.base.alloc; struct vn_semaphore_feedback_cmd *sfb_cmd; VkCommandBuffer *cmd_handles; VK_MULTIALLOC(ma); vk_multialloc_add(&ma, &sfb_cmd, __typeof__(*sfb_cmd), 1); vk_multialloc_add(&ma, &cmd_handles, __typeof__(*cmd_handles), dev->queue_family_count); if (!vk_multialloc_zalloc(&ma, alloc, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT)) return NULL; struct vn_feedback_slot *src_slot = vn_feedback_pool_alloc(&dev->feedback_pool, VN_FEEDBACK_TYPE_SEMAPHORE); if (!src_slot) { vk_free(alloc, sfb_cmd); return NULL; } for (uint32_t i = 0; i < dev->queue_family_count; i++) { VkDevice dev_handle = vn_device_to_handle(dev); VkResult result = vn_feedback_cmd_alloc(dev_handle, &dev->fb_cmd_pools[i], dst_slot, src_slot, &cmd_handles[i]); if (result != VK_SUCCESS) { for (uint32_t j = 0; j < i; j++) { vn_feedback_cmd_free(dev_handle, &dev->fb_cmd_pools[j], cmd_handles[j]); } vn_feedback_pool_free(&dev->feedback_pool, src_slot); vk_free(alloc, sfb_cmd); return NULL; } } sfb_cmd->cmd_handles = cmd_handles; sfb_cmd->src_slot = src_slot; return sfb_cmd; } void vn_semaphore_feedback_cmd_free(struct vn_device *dev, struct vn_semaphore_feedback_cmd *sfb_cmd) { const VkAllocationCallbacks *alloc = &dev->base.base.alloc; for (uint32_t i = 0; i < dev->queue_family_count; i++) { vn_feedback_cmd_free(vn_device_to_handle(dev), &dev->fb_cmd_pools[i], sfb_cmd->cmd_handles[i]); } vn_feedback_pool_free(&dev->feedback_pool, sfb_cmd->src_slot); vk_free(alloc, sfb_cmd); } static void vn_query_feedback_cmd_record_internal(VkCommandBuffer cmd_handle, VkQueryPool pool_handle, uint32_t query, uint32_t count, bool copy) { struct vn_query_pool *pool = vn_query_pool_from_handle(pool_handle); assert(pool->fb_buf); /* Results are always 64 bit and include availability bit (also 64 bit) */ const VkDeviceSize slot_size = (pool->result_array_size * 8) + 8; const VkDeviceSize offset = slot_size * query; const VkDeviceSize buf_size = slot_size * count; /* The first synchronization scope of vkCmdCopyQueryPoolResults does not * include the query feedback buffer. Insert a barrier to ensure ordering * against feedback buffer fill cmd injected in vkCmdResetQueryPool. * * The second synchronization scope of vkCmdResetQueryPool does not include * the query feedback buffer. Insert a barrer to ensure ordering against * prior cmds referencing the queries. * * For srcAccessMask, VK_ACCESS_TRANSFER_WRITE_BIT is sufficient since the * gpu cache invalidation for feedback buffer fill in vkResetQueryPool is * done implicitly via queue submission. */ const VkPipelineStageFlags src_stage_mask = copy ? VK_PIPELINE_STAGE_TRANSFER_BIT : VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; const VkBufferMemoryBarrier buf_barrier_before = { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, .pNext = NULL, .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .buffer = pool->fb_buf->buf_handle, .offset = offset, .size = buf_size, }; vn_CmdPipelineBarrier(cmd_handle, src_stage_mask, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1, &buf_barrier_before, 0, NULL); if (copy) { /* Per spec: "The first synchronization scope includes all commands * which reference the queries in queryPool indicated by query that * occur earlier in submission order. If flags does not include * VK_QUERY_RESULT_WAIT_BIT, vkCmdEndQueryIndexedEXT, * vkCmdWriteTimestamp2, vkCmdEndQuery, and vkCmdWriteTimestamp are * excluded from this scope." * * Set VK_QUERY_RESULT_WAIT_BIT to ensure ordering after * vkCmdEndQuery or vkCmdWriteTimestamp makes the query available. * * Set VK_QUERY_RESULT_64_BIT as we can convert it to 32 bit if app * requested that. * * Per spec: "vkCmdCopyQueryPoolResults is considered to be a transfer * operation, and its writes to buffer memory must be synchronized using * VK_PIPELINE_STAGE_TRANSFER_BIT and VK_ACCESS_TRANSFER_WRITE_BIT * before using the results." * * So we can reuse the flush barrier after this copy cmd. */ vn_CmdCopyQueryPoolResults(cmd_handle, pool_handle, query, count, pool->fb_buf->buf_handle, offset, slot_size, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT | VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); } else { vn_CmdFillBuffer(cmd_handle, pool->fb_buf->buf_handle, offset, buf_size, 0); } vn_feedback_cmd_record_flush_barrier(cmd_handle, pool->fb_buf->buf_handle, offset, buf_size); } static VkResult vn_query_feedback_cmd_record(VkDevice dev_handle, struct list_head *query_records, struct vn_query_feedback_cmd *qfb_cmd) { assert(!list_is_empty(query_records)); static const VkCommandBufferBeginInfo begin_info = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, }; VkResult result = vn_BeginCommandBuffer(qfb_cmd->cmd_handle, &begin_info); if (result != VK_SUCCESS) return result; list_for_each_entry_safe(struct vn_cmd_query_record, record, query_records, head) { vn_query_feedback_cmd_record_internal( qfb_cmd->cmd_handle, vn_query_pool_to_handle(record->query_pool), record->query, record->query_count, record->copy); } return vn_EndCommandBuffer(qfb_cmd->cmd_handle); } VkResult vn_query_feedback_cmd_alloc(VkDevice dev_handle, struct vn_feedback_cmd_pool *fb_cmd_pool, struct list_head *query_records, struct vn_query_feedback_cmd **out_qfb_cmd) { struct vn_query_feedback_cmd *qfb_cmd; VkResult result; simple_mtx_lock(&fb_cmd_pool->mutex); if (list_is_empty(&fb_cmd_pool->free_qfb_cmds)) { struct vn_command_pool *cmd_pool = vn_command_pool_from_handle(fb_cmd_pool->pool_handle); qfb_cmd = vk_alloc(&cmd_pool->allocator, sizeof(*qfb_cmd), VN_DEFAULT_ALIGN, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (!qfb_cmd) { result = VK_ERROR_OUT_OF_HOST_MEMORY; goto out_unlock; } const VkCommandBufferAllocateInfo info = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, .commandPool = fb_cmd_pool->pool_handle, .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, .commandBufferCount = 1, }; VkCommandBuffer qfb_cmd_handle; result = vn_AllocateCommandBuffers(dev_handle, &info, &qfb_cmd_handle); if (result != VK_SUCCESS) { vk_free(&cmd_pool->allocator, qfb_cmd); goto out_unlock; } qfb_cmd->fb_cmd_pool = fb_cmd_pool; qfb_cmd->cmd_handle = qfb_cmd_handle; } else { qfb_cmd = list_first_entry(&fb_cmd_pool->free_qfb_cmds, struct vn_query_feedback_cmd, head); list_del(&qfb_cmd->head); vn_ResetCommandBuffer(qfb_cmd->cmd_handle, 0); } result = vn_query_feedback_cmd_record(dev_handle, query_records, qfb_cmd); if (result != VK_SUCCESS) { list_add(&qfb_cmd->head, &fb_cmd_pool->free_qfb_cmds); goto out_unlock; } *out_qfb_cmd = qfb_cmd; out_unlock: simple_mtx_unlock(&fb_cmd_pool->mutex); return result; } void vn_query_feedback_cmd_free(struct vn_query_feedback_cmd *qfb_cmd) { simple_mtx_lock(&qfb_cmd->fb_cmd_pool->mutex); list_add(&qfb_cmd->head, &qfb_cmd->fb_cmd_pool->free_qfb_cmds); simple_mtx_unlock(&qfb_cmd->fb_cmd_pool->mutex); } VkResult vn_feedback_cmd_alloc(VkDevice dev_handle, struct vn_feedback_cmd_pool *fb_cmd_pool, struct vn_feedback_slot *dst_slot, struct vn_feedback_slot *src_slot, VkCommandBuffer *out_cmd_handle) { VkCommandPool cmd_pool_handle = fb_cmd_pool->pool_handle; const VkCommandBufferAllocateInfo info = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, .pNext = NULL, .commandPool = cmd_pool_handle, .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, .commandBufferCount = 1, }; VkCommandBuffer cmd_handle; VkResult result; simple_mtx_lock(&fb_cmd_pool->mutex); result = vn_AllocateCommandBuffers(dev_handle, &info, &cmd_handle); if (result != VK_SUCCESS) goto out_unlock; result = vn_feedback_cmd_record(cmd_handle, dst_slot, src_slot); if (result != VK_SUCCESS) { vn_FreeCommandBuffers(dev_handle, cmd_pool_handle, 1, &cmd_handle); goto out_unlock; } *out_cmd_handle = cmd_handle; out_unlock: simple_mtx_unlock(&fb_cmd_pool->mutex); return result; } void vn_feedback_cmd_free(VkDevice dev_handle, struct vn_feedback_cmd_pool *fb_cmd_pool, VkCommandBuffer cmd_handle) { simple_mtx_lock(&fb_cmd_pool->mutex); vn_FreeCommandBuffers(dev_handle, fb_cmd_pool->pool_handle, 1, &cmd_handle); simple_mtx_unlock(&fb_cmd_pool->mutex); } VkResult vn_feedback_cmd_pools_init(struct vn_device *dev) { const VkAllocationCallbacks *alloc = &dev->base.base.alloc; VkDevice dev_handle = vn_device_to_handle(dev); struct vn_feedback_cmd_pool *fb_cmd_pools; VkCommandPoolCreateInfo info = { .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, .pNext = NULL, .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, }; if (VN_PERF(NO_FENCE_FEEDBACK) && VN_PERF(NO_SEMAPHORE_FEEDBACK) && VN_PERF(NO_QUERY_FEEDBACK)) return VK_SUCCESS; assert(dev->queue_family_count); fb_cmd_pools = vk_zalloc(alloc, sizeof(*fb_cmd_pools) * dev->queue_family_count, VN_DEFAULT_ALIGN, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); if (!fb_cmd_pools) return VK_ERROR_OUT_OF_HOST_MEMORY; for (uint32_t i = 0; i < dev->queue_family_count; i++) { VkResult result; info.queueFamilyIndex = dev->queue_families[i]; result = vn_CreateCommandPool(dev_handle, &info, alloc, &fb_cmd_pools[i].pool_handle); if (result != VK_SUCCESS) { for (uint32_t j = 0; j < i; j++) { vn_DestroyCommandPool(dev_handle, fb_cmd_pools[j].pool_handle, alloc); simple_mtx_destroy(&fb_cmd_pools[j].mutex); } vk_free(alloc, fb_cmd_pools); return result; } simple_mtx_init(&fb_cmd_pools[i].mutex, mtx_plain); list_inithead(&fb_cmd_pools[i].free_qfb_cmds); } dev->fb_cmd_pools = fb_cmd_pools; return VK_SUCCESS; } void vn_feedback_cmd_pools_fini(struct vn_device *dev) { const VkAllocationCallbacks *alloc = &dev->base.base.alloc; VkDevice dev_handle = vn_device_to_handle(dev); if (!dev->fb_cmd_pools) return; for (uint32_t i = 0; i < dev->queue_family_count; i++) { list_for_each_entry_safe(struct vn_query_feedback_cmd, feedback_cmd, &dev->fb_cmd_pools[i].free_qfb_cmds, head) vk_free(alloc, feedback_cmd); vn_DestroyCommandPool(dev_handle, dev->fb_cmd_pools[i].pool_handle, alloc); simple_mtx_destroy(&dev->fb_cmd_pools[i].mutex); } vk_free(alloc, dev->fb_cmd_pools); }