/* * Copyright © 2022 Imagination Technologies Ltd. * * based in part on radv driver which is: * Copyright © 2016 Red Hat. * Copyright © 2016 Bas Nieuwenhuizen * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /** * This file implements VkQueue, VkFence, and VkSemaphore */ #include #include #include #include #include #include #include "pvr_job_compute.h" #include "pvr_job_context.h" #include "pvr_job_render.h" #include "pvr_job_transfer.h" #include "pvr_limits.h" #include "pvr_private.h" #include "util/macros.h" #include "util/u_atomic.h" #include "vk_alloc.h" #include "vk_fence.h" #include "vk_log.h" #include "vk_object.h" #include "vk_queue.h" #include "vk_semaphore.h" #include "vk_sync.h" #include "vk_sync_dummy.h" #include "vk_util.h" static VkResult pvr_queue_init(struct pvr_device *device, struct pvr_queue *queue, const VkDeviceQueueCreateInfo *pCreateInfo, uint32_t index_in_family) { struct pvr_transfer_ctx *transfer_ctx; struct pvr_compute_ctx *compute_ctx; struct pvr_render_ctx *gfx_ctx; VkResult result; result = vk_queue_init(&queue->vk, &device->vk, pCreateInfo, index_in_family); if (result != VK_SUCCESS) return result; result = pvr_transfer_ctx_create(device, PVR_WINSYS_CTX_PRIORITY_MEDIUM, &transfer_ctx); if (result != VK_SUCCESS) goto err_vk_queue_finish; result = pvr_compute_ctx_create(device, PVR_WINSYS_CTX_PRIORITY_MEDIUM, &compute_ctx); if (result != VK_SUCCESS) goto err_transfer_ctx_destroy; result = pvr_render_ctx_create(device, PVR_WINSYS_CTX_PRIORITY_MEDIUM, &gfx_ctx); if (result != VK_SUCCESS) goto err_compute_ctx_destroy; queue->device = device; queue->gfx_ctx = gfx_ctx; queue->compute_ctx = compute_ctx; queue->transfer_ctx = transfer_ctx; for (uint32_t i = 0; i < ARRAY_SIZE(queue->completion); i++) queue->completion[i] = NULL; return VK_SUCCESS; err_compute_ctx_destroy: pvr_compute_ctx_destroy(compute_ctx); err_transfer_ctx_destroy: pvr_transfer_ctx_destroy(transfer_ctx); err_vk_queue_finish: vk_queue_finish(&queue->vk); return result; } VkResult pvr_queues_create(struct pvr_device *device, const VkDeviceCreateInfo *pCreateInfo) { VkResult result; /* Check requested queue families and queues */ assert(pCreateInfo->queueCreateInfoCount == 1); assert(pCreateInfo->pQueueCreateInfos[0].queueFamilyIndex == 0); assert(pCreateInfo->pQueueCreateInfos[0].queueCount <= PVR_MAX_QUEUES); const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[0]; device->queues = vk_alloc(&device->vk.alloc, queue_create->queueCount * sizeof(*device->queues), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); if (!device->queues) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); device->queue_count = 0; for (uint32_t i = 0; i < queue_create->queueCount; i++) { result = pvr_queue_init(device, &device->queues[i], queue_create, i); if (result != VK_SUCCESS) goto err_queues_finish; device->queue_count++; } return VK_SUCCESS; err_queues_finish: pvr_queues_destroy(device); return result; } static void pvr_queue_finish(struct pvr_queue *queue) { for (uint32_t i = 0; i < ARRAY_SIZE(queue->completion); i++) { if (queue->completion[i]) vk_sync_destroy(&queue->device->vk, queue->completion[i]); } pvr_render_ctx_destroy(queue->gfx_ctx); pvr_compute_ctx_destroy(queue->compute_ctx); pvr_transfer_ctx_destroy(queue->transfer_ctx); vk_queue_finish(&queue->vk); } void pvr_queues_destroy(struct pvr_device *device) { for (uint32_t q_idx = 0; q_idx < device->queue_count; q_idx++) pvr_queue_finish(&device->queues[q_idx]); vk_free(&device->vk.alloc, device->queues); } VkResult pvr_QueueWaitIdle(VkQueue _queue) { PVR_FROM_HANDLE(pvr_queue, queue, _queue); for (int i = 0U; i < ARRAY_SIZE(queue->completion); i++) { VkResult result; if (!queue->completion[i]) continue; result = vk_sync_wait(&queue->device->vk, queue->completion[i], 0U, VK_SYNC_WAIT_COMPLETE, UINT64_MAX); if (result != VK_SUCCESS) return result; } return VK_SUCCESS; } static VkResult pvr_process_graphics_cmd(struct pvr_device *device, struct pvr_queue *queue, struct pvr_cmd_buffer *cmd_buffer, struct pvr_sub_cmd_gfx *sub_cmd, struct vk_sync **waits, uint32_t wait_count, uint32_t *stage_flags, struct vk_sync *completions[static PVR_JOB_TYPE_MAX]) { const struct pvr_framebuffer *framebuffer = sub_cmd->framebuffer; struct vk_sync *sync_geom; struct vk_sync *sync_frag; uint32_t bo_count = 0; VkResult result; STACK_ARRAY(struct pvr_winsys_job_bo, bos, framebuffer->attachment_count); if (!bos) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); result = vk_sync_create(&device->vk, &device->pdevice->ws->syncobj_type, 0U, 0UL, &sync_geom); if (result != VK_SUCCESS) return result; result = vk_sync_create(&device->vk, &device->pdevice->ws->syncobj_type, 0U, 0UL, &sync_frag); if (result != VK_SUCCESS) { vk_sync_destroy(&device->vk, sync_geom); return result; } /* FIXME: DoShadowLoadOrStore() */ /* FIXME: If the framebuffer being rendered to has multiple layers then we * need to split submissions that run a fragment job into two. */ if (sub_cmd->job.run_frag && framebuffer->layers > 1) pvr_finishme("Split job submission for framebuffers with > 1 layers"); /* Get any imported buffers used in framebuffer attachments. */ for (uint32_t i = 0U; i < framebuffer->attachment_count; i++) { if (!framebuffer->attachments[i]->image->vma->bo->is_imported) continue; bos[bo_count].bo = framebuffer->attachments[i]->image->vma->bo; bos[bo_count].flags = PVR_WINSYS_JOB_BO_FLAG_WRITE; bo_count++; } /* This passes ownership of the wait fences to pvr_render_job_submit(). */ result = pvr_render_job_submit(queue->gfx_ctx, &sub_cmd->job, bos, bo_count, waits, wait_count, stage_flags, sync_geom, sync_frag); STACK_ARRAY_FINISH(bos); if (result != VK_SUCCESS) { vk_sync_destroy(&device->vk, sync_geom); vk_sync_destroy(&device->vk, sync_frag); return result; } /* Replace the completion fences. */ if (completions[PVR_JOB_TYPE_GEOM]) vk_sync_destroy(&device->vk, completions[PVR_JOB_TYPE_GEOM]); completions[PVR_JOB_TYPE_GEOM] = sync_geom; if (completions[PVR_JOB_TYPE_FRAG]) vk_sync_destroy(&device->vk, completions[PVR_JOB_TYPE_FRAG]); completions[PVR_JOB_TYPE_FRAG] = sync_frag; /* FIXME: DoShadowLoadOrStore() */ return result; } static VkResult pvr_process_compute_cmd(struct pvr_device *device, struct pvr_queue *queue, struct pvr_sub_cmd_compute *sub_cmd, struct vk_sync **waits, uint32_t wait_count, uint32_t *stage_flags, struct vk_sync *completions[static PVR_JOB_TYPE_MAX]) { struct vk_sync *sync; VkResult result; result = vk_sync_create(&device->vk, &device->pdevice->ws->syncobj_type, 0U, 0UL, &sync); if (result != VK_SUCCESS) return result; /* This passes ownership of the wait fences to pvr_compute_job_submit(). */ result = pvr_compute_job_submit(queue->compute_ctx, sub_cmd, waits, wait_count, stage_flags, sync); if (result != VK_SUCCESS) { vk_sync_destroy(&device->vk, sync); return result; } /* Replace the completion fences. */ if (completions[PVR_JOB_TYPE_COMPUTE]) vk_sync_destroy(&device->vk, completions[PVR_JOB_TYPE_COMPUTE]); completions[PVR_JOB_TYPE_COMPUTE] = sync; return result; } static VkResult pvr_process_transfer_cmds(struct pvr_device *device, struct pvr_queue *queue, struct pvr_sub_cmd_transfer *sub_cmd, struct vk_sync **waits, uint32_t wait_count, uint32_t *stage_flags, struct vk_sync *completions[static PVR_JOB_TYPE_MAX]) { struct vk_sync *sync; VkResult result; result = vk_sync_create(&device->vk, &device->pdevice->ws->syncobj_type, 0U, 0UL, &sync); if (result != VK_SUCCESS) return result; /* This passes ownership of the wait fences to pvr_transfer_job_submit(). */ result = pvr_transfer_job_submit(device, queue->transfer_ctx, sub_cmd, waits, wait_count, stage_flags, sync); if (result != VK_SUCCESS) { vk_sync_destroy(&device->vk, sync); return result; } /* Replace the completion fences. */ if (completions[PVR_JOB_TYPE_TRANSFER]) vk_sync_destroy(&device->vk, completions[PVR_JOB_TYPE_TRANSFER]); completions[PVR_JOB_TYPE_TRANSFER] = sync; return result; } static VkResult pvr_set_semaphore_payloads(struct pvr_device *device, struct vk_sync *completions[static PVR_JOB_TYPE_MAX], const VkSemaphore *signals, uint32_t signal_count) { struct vk_sync *sync; VkResult result; int fd = -1; result = vk_sync_create(&device->vk, &device->pdevice->ws->syncobj_type, 0U, 0UL, &sync); if (result != VK_SUCCESS) return result; result = device->ws->ops->null_job_submit(device->ws, completions, PVR_JOB_TYPE_MAX, sync); if (result != VK_SUCCESS) goto end_set_semaphore_payloads; /* If we have a single signal semaphore, we can simply move merged sync's * payload to the signal semahpore's payload. */ if (signal_count == 1U) { VK_FROM_HANDLE(vk_semaphore, sem, signals[0]); struct vk_sync *sem_sync = vk_semaphore_get_active_sync(sem); result = vk_sync_move(&device->vk, sem_sync, sync); goto end_set_semaphore_payloads; } result = vk_sync_export_sync_file(&device->vk, sync, &fd); if (result != VK_SUCCESS) goto end_set_semaphore_payloads; for (uint32_t i = 0U; i < signal_count; i++) { VK_FROM_HANDLE(vk_semaphore, sem, signals[i]); struct vk_sync *sem_sync = vk_semaphore_get_active_sync(sem); result = vk_sync_import_sync_file(&device->vk, sem_sync, fd); if (result != VK_SUCCESS) goto end_set_semaphore_payloads; } end_set_semaphore_payloads: if (fd != -1) close(fd); vk_sync_destroy(&device->vk, sync); return result; } static VkResult pvr_set_fence_payload(struct pvr_device *device, struct vk_sync *completions[static PVR_JOB_TYPE_MAX], VkFence _fence) { VK_FROM_HANDLE(vk_fence, fence, _fence); struct vk_sync *fence_sync; struct vk_sync *sync; VkResult result; result = vk_sync_create(&device->vk, &device->pdevice->ws->syncobj_type, 0U, 0UL, &sync); if (result != VK_SUCCESS) return result; result = device->ws->ops->null_job_submit(device->ws, completions, PVR_JOB_TYPE_MAX, sync); if (result != VK_SUCCESS) { vk_sync_destroy(&device->vk, sync); return result; } fence_sync = vk_fence_get_active_sync(fence); result = vk_sync_move(&device->vk, fence_sync, sync); vk_sync_destroy(&device->vk, sync); return result; } static VkResult pvr_process_cmd_buffer(struct pvr_device *device, struct pvr_queue *queue, VkCommandBuffer commandBuffer, struct vk_sync **waits, uint32_t wait_count, uint32_t *stage_flags, struct vk_sync *completions[static PVR_JOB_TYPE_MAX]) { PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); VkResult result; assert(cmd_buffer->status == PVR_CMD_BUFFER_STATUS_EXECUTABLE); list_for_each_entry_safe (struct pvr_sub_cmd, sub_cmd, &cmd_buffer->sub_cmds, link) { switch (sub_cmd->type) { case PVR_SUB_CMD_TYPE_GRAPHICS: result = pvr_process_graphics_cmd(device, queue, cmd_buffer, &sub_cmd->gfx, waits, wait_count, stage_flags, completions); break; case PVR_SUB_CMD_TYPE_COMPUTE: result = pvr_process_compute_cmd(device, queue, &sub_cmd->compute, waits, wait_count, stage_flags, completions); break; case PVR_SUB_CMD_TYPE_TRANSFER: result = pvr_process_transfer_cmds(device, queue, &sub_cmd->transfer, waits, wait_count, stage_flags, completions); break; default: pvr_finishme("Unsupported sub-command type %d", sub_cmd->type); return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); } if (result != VK_SUCCESS) { cmd_buffer->status = PVR_CMD_BUFFER_STATUS_INVALID; return result; } p_atomic_inc(&device->global_queue_job_count); } return VK_SUCCESS; } static VkResult pvr_submit_null_job(struct pvr_device *device, struct vk_sync **waits, uint32_t wait_count, uint32_t *stage_flags, struct vk_sync *completions[static PVR_JOB_TYPE_MAX]) { VkResult result; STATIC_ASSERT(PVR_JOB_TYPE_MAX >= PVR_NUM_SYNC_PIPELINE_STAGES); for (uint32_t i = 0U; i < PVR_JOB_TYPE_MAX; i++) { struct vk_sync *per_job_waits[wait_count]; uint32_t per_job_waits_count = 0; /* Get the waits specific to the job type. */ for (uint32_t j = 0U; j < wait_count; j++) { if (stage_flags[j] & (1U << i)) { per_job_waits[per_job_waits_count] = waits[j]; per_job_waits_count++; } } if (per_job_waits_count == 0U) continue; result = vk_sync_create(&device->vk, &device->pdevice->ws->syncobj_type, 0U, 0UL, &completions[i]); if (result != VK_SUCCESS) goto err_destroy_completion_syncs; result = device->ws->ops->null_job_submit(device->ws, per_job_waits, per_job_waits_count, completions[i]); if (result != VK_SUCCESS) goto err_destroy_completion_syncs; } return VK_SUCCESS; err_destroy_completion_syncs: for (uint32_t i = 0U; i < PVR_JOB_TYPE_MAX; i++) { if (completions[i]) { vk_sync_destroy(&device->vk, completions[i]); completions[i] = NULL; } } return result; } static void pvr_update_syncobjs(struct pvr_device *device, struct vk_sync *src[static PVR_JOB_TYPE_MAX], struct vk_sync *dst[static PVR_JOB_TYPE_MAX]) { for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) { if (src[i]) { if (dst[i]) vk_sync_destroy(&device->vk, dst[i]); dst[i] = src[i]; } } } VkResult pvr_QueueSubmit(VkQueue _queue, uint32_t submitCount, const VkSubmitInfo *pSubmits, VkFence fence) { PVR_FROM_HANDLE(pvr_queue, queue, _queue); struct vk_sync *completion_syncobjs[PVR_JOB_TYPE_MAX] = {}; struct pvr_device *device = queue->device; VkResult result; for (uint32_t i = 0U; i < submitCount; i++) { struct vk_sync *per_submit_completion_syncobjs[PVR_JOB_TYPE_MAX] = {}; const VkSubmitInfo *desc = &pSubmits[i]; struct vk_sync *waits[desc->waitSemaphoreCount]; uint32_t stage_flags[desc->waitSemaphoreCount]; uint32_t wait_count = 0; for (uint32_t j = 0U; j < desc->waitSemaphoreCount; j++) { VK_FROM_HANDLE(vk_semaphore, semaphore, desc->pWaitSemaphores[j]); struct vk_sync *sync = vk_semaphore_get_active_sync(semaphore); if (sync->type == &vk_sync_dummy_type) continue; /* We don't currently support timeline semaphores. */ assert(!(sync->flags & VK_SYNC_IS_TIMELINE)); stage_flags[wait_count] = pvr_stage_mask_dst(desc->pWaitDstStageMask[j]); waits[wait_count] = vk_semaphore_get_active_sync(semaphore); wait_count++; } if (desc->commandBufferCount > 0U) { for (uint32_t j = 0U; j < desc->commandBufferCount; j++) { result = pvr_process_cmd_buffer(device, queue, desc->pCommandBuffers[j], waits, wait_count, stage_flags, per_submit_completion_syncobjs); if (result != VK_SUCCESS) return result; } } else { result = pvr_submit_null_job(device, waits, wait_count, stage_flags, per_submit_completion_syncobjs); if (result != VK_SUCCESS) return result; } if (desc->signalSemaphoreCount) { result = pvr_set_semaphore_payloads(device, per_submit_completion_syncobjs, desc->pSignalSemaphores, desc->signalSemaphoreCount); if (result != VK_SUCCESS) return result; } pvr_update_syncobjs(device, per_submit_completion_syncobjs, completion_syncobjs); } if (fence) { result = pvr_set_fence_payload(device, completion_syncobjs, fence); if (result != VK_SUCCESS) return result; } pvr_update_syncobjs(device, completion_syncobjs, queue->completion); return VK_SUCCESS; }