diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c index b8da3489f07..4c06bd72d89 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c @@ -96,6 +96,11 @@ struct radv_winsys_sem_info { bool cs_emit_wait; struct radv_winsys_sem_counts wait; struct radv_winsys_sem_counts signal; + + /* Expresses a scheduled dependency, meaning that the sumbission of the + * referenced fence must be scheduled before the current submission. + */ + struct radv_amdgpu_fence *scheduled_dependency; }; static uint32_t radv_amdgpu_ctx_queue_syncobj(struct radv_amdgpu_ctx *ctx, unsigned ip, @@ -1411,7 +1416,26 @@ radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx, uint32_t submit_cou if (submit_count == 1) { result = radv_amdgpu_winsys_cs_submit_internal(ctx, &submits[0], &sem_info, can_patch); } else { - unreachable("submitting to multiple queues at the same time is not supported yet."); + /* Multiple queue submissions without gang submit. + * This code path will submit each item separately and add the + * previous submission as a scheduled dependency to the next one. + */ + + assert(ws->info.has_scheduled_fence_dependency); + struct radv_amdgpu_fence *next_dependency = NULL; + + for (unsigned i = 0; i < submit_count; ++i) { + sem_info.scheduled_dependency = next_dependency; + sem_info.cs_emit_wait = i == 0; + sem_info.cs_emit_signal = i == submit_count - 1; + + result = radv_amdgpu_winsys_cs_submit_internal(ctx, &submits[i], &sem_info, can_patch); + + if (result != VK_SUCCESS) + goto out; + + next_dependency = &ctx->last_submission[submits[i].ip_type][submits[i].queue_index]; + } } out: @@ -1684,6 +1708,7 @@ radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx, struct radv_amdgpu_cs_request int size; struct drm_amdgpu_cs_chunk *chunks; struct drm_amdgpu_cs_chunk_data *chunk_data; + struct drm_amdgpu_cs_chunk_dep chunk_dep; bool use_bo_list_create = ctx->ws->info.drm_minor < 27; struct drm_amdgpu_bo_list_in bo_list_in; void *wait_syncobj = NULL, *signal_syncobj = NULL; @@ -1697,7 +1722,8 @@ radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx, struct radv_amdgpu_cs_request if (!queue_syncobj) return VK_ERROR_OUT_OF_HOST_MEMORY; - size = request->number_of_ibs + 1 + (has_user_fence ? 1 : 0) + (!use_bo_list_create ? 1 : 0) + 3; + size = request->number_of_ibs + 1 + (has_user_fence ? 1 : 0) + (!use_bo_list_create ? 1 : 0) + + 3 + !!sem_info->scheduled_dependency; chunks = malloc(sizeof(chunks[0]) * size); if (!chunks) @@ -1741,6 +1767,14 @@ radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx, struct radv_amdgpu_cs_request amdgpu_cs_chunk_fence_info_to_data(&fence_info, &chunk_data[i]); } + if (sem_info->scheduled_dependency) { + amdgpu_cs_chunk_fence_to_dep(&sem_info->scheduled_dependency->fence, &chunk_dep); + i = num_chunks++; + chunks[i].chunk_id = AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES; + chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_dep) / 4; + chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_dep; + } + if (sem_info->cs_emit_wait && (sem_info->wait.timeline_syncobj_count || sem_info->wait.syncobj_count || *queue_syncobj_wait)) {