radv/amdgpu: Use scheduled dependency for submitting to multiple queues.
This code path will be used on kernels that don't have the "gang submit" feature in amdgpu yet. It does each submission separately, and uses scheduled dependencies to connect them. Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-By: Tatsuyuki Ishi <ishitatsuyuki@gmail.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16361>
This commit is contained in:
parent
03b3344a7c
commit
81ad8c4979
|
@ -96,6 +96,11 @@ struct radv_winsys_sem_info {
|
|||
bool cs_emit_wait;
|
||||
struct radv_winsys_sem_counts wait;
|
||||
struct radv_winsys_sem_counts signal;
|
||||
|
||||
/* Expresses a scheduled dependency, meaning that the sumbission of the
|
||||
* referenced fence must be scheduled before the current submission.
|
||||
*/
|
||||
struct radv_amdgpu_fence *scheduled_dependency;
|
||||
};
|
||||
|
||||
static uint32_t radv_amdgpu_ctx_queue_syncobj(struct radv_amdgpu_ctx *ctx, unsigned ip,
|
||||
|
@ -1411,7 +1416,26 @@ radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx, uint32_t submit_cou
|
|||
if (submit_count == 1) {
|
||||
result = radv_amdgpu_winsys_cs_submit_internal(ctx, &submits[0], &sem_info, can_patch);
|
||||
} else {
|
||||
unreachable("submitting to multiple queues at the same time is not supported yet.");
|
||||
/* Multiple queue submissions without gang submit.
|
||||
* This code path will submit each item separately and add the
|
||||
* previous submission as a scheduled dependency to the next one.
|
||||
*/
|
||||
|
||||
assert(ws->info.has_scheduled_fence_dependency);
|
||||
struct radv_amdgpu_fence *next_dependency = NULL;
|
||||
|
||||
for (unsigned i = 0; i < submit_count; ++i) {
|
||||
sem_info.scheduled_dependency = next_dependency;
|
||||
sem_info.cs_emit_wait = i == 0;
|
||||
sem_info.cs_emit_signal = i == submit_count - 1;
|
||||
|
||||
result = radv_amdgpu_winsys_cs_submit_internal(ctx, &submits[i], &sem_info, can_patch);
|
||||
|
||||
if (result != VK_SUCCESS)
|
||||
goto out;
|
||||
|
||||
next_dependency = &ctx->last_submission[submits[i].ip_type][submits[i].queue_index];
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
|
@ -1684,6 +1708,7 @@ radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx, struct radv_amdgpu_cs_request
|
|||
int size;
|
||||
struct drm_amdgpu_cs_chunk *chunks;
|
||||
struct drm_amdgpu_cs_chunk_data *chunk_data;
|
||||
struct drm_amdgpu_cs_chunk_dep chunk_dep;
|
||||
bool use_bo_list_create = ctx->ws->info.drm_minor < 27;
|
||||
struct drm_amdgpu_bo_list_in bo_list_in;
|
||||
void *wait_syncobj = NULL, *signal_syncobj = NULL;
|
||||
|
@ -1697,7 +1722,8 @@ radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx, struct radv_amdgpu_cs_request
|
|||
if (!queue_syncobj)
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
|
||||
size = request->number_of_ibs + 1 + (has_user_fence ? 1 : 0) + (!use_bo_list_create ? 1 : 0) + 3;
|
||||
size = request->number_of_ibs + 1 + (has_user_fence ? 1 : 0) + (!use_bo_list_create ? 1 : 0) +
|
||||
3 + !!sem_info->scheduled_dependency;
|
||||
|
||||
chunks = malloc(sizeof(chunks[0]) * size);
|
||||
if (!chunks)
|
||||
|
@ -1741,6 +1767,14 @@ radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx, struct radv_amdgpu_cs_request
|
|||
amdgpu_cs_chunk_fence_info_to_data(&fence_info, &chunk_data[i]);
|
||||
}
|
||||
|
||||
if (sem_info->scheduled_dependency) {
|
||||
amdgpu_cs_chunk_fence_to_dep(&sem_info->scheduled_dependency->fence, &chunk_dep);
|
||||
i = num_chunks++;
|
||||
chunks[i].chunk_id = AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES;
|
||||
chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_dep) / 4;
|
||||
chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_dep;
|
||||
}
|
||||
|
||||
if (sem_info->cs_emit_wait && (sem_info->wait.timeline_syncobj_count ||
|
||||
sem_info->wait.syncobj_count || *queue_syncobj_wait)) {
|
||||
|
||||
|
|
Loading…
Reference in New Issue