radv/amdgpu: Add a syncobj per queue.
For merging our own dependencies in without submitting. Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14097>
This commit is contained in:
parent
6d1a15f7fa
commit
c03d258046
|
@ -78,6 +78,9 @@ struct radv_amdgpu_cs {
|
||||||
unsigned num_old_cs_buffers;
|
unsigned num_old_cs_buffers;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static uint32_t radv_amdgpu_ctx_queue_syncobj(struct radv_amdgpu_ctx *ctx, unsigned ip,
|
||||||
|
unsigned ring);
|
||||||
|
|
||||||
static inline struct radv_amdgpu_cs *
|
static inline struct radv_amdgpu_cs *
|
||||||
radv_amdgpu_cs(struct radeon_cmdbuf *base)
|
radv_amdgpu_cs(struct radeon_cmdbuf *base)
|
||||||
{
|
{
|
||||||
|
@ -1303,11 +1306,29 @@ static void
|
||||||
radv_amdgpu_ctx_destroy(struct radeon_winsys_ctx *rwctx)
|
radv_amdgpu_ctx_destroy(struct radeon_winsys_ctx *rwctx)
|
||||||
{
|
{
|
||||||
struct radv_amdgpu_ctx *ctx = (struct radv_amdgpu_ctx *)rwctx;
|
struct radv_amdgpu_ctx *ctx = (struct radv_amdgpu_ctx *)rwctx;
|
||||||
|
|
||||||
|
for (unsigned ip = 0; ip <= AMDGPU_HW_IP_DMA; ++ip) {
|
||||||
|
for (unsigned ring = 0; ring < MAX_RINGS_PER_TYPE; ++ring) {
|
||||||
|
if (ctx->queue_syncobj[ip][ring])
|
||||||
|
amdgpu_cs_destroy_syncobj(ctx->ws->dev, ctx->queue_syncobj[ip][ring]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ctx->ws->base.buffer_destroy(&ctx->ws->base, ctx->fence_bo);
|
ctx->ws->base.buffer_destroy(&ctx->ws->base, ctx->fence_bo);
|
||||||
amdgpu_cs_ctx_free(ctx->ctx);
|
amdgpu_cs_ctx_free(ctx->ctx);
|
||||||
FREE(ctx);
|
FREE(ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static uint32_t
|
||||||
|
radv_amdgpu_ctx_queue_syncobj(struct radv_amdgpu_ctx *ctx, unsigned ip, unsigned ring)
|
||||||
|
{
|
||||||
|
uint32_t *syncobj = &ctx->queue_syncobj[ip][ring];
|
||||||
|
if (!*syncobj) {
|
||||||
|
amdgpu_cs_create_syncobj2(ctx->ws->dev, DRM_SYNCOBJ_CREATE_SIGNALED, syncobj);
|
||||||
|
}
|
||||||
|
return *syncobj;
|
||||||
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
radv_amdgpu_ctx_wait_idle(struct radeon_winsys_ctx *rwctx, enum ring_type ring_type, int ring_index)
|
radv_amdgpu_ctx_wait_idle(struct radeon_winsys_ctx *rwctx, enum ring_type ring_type, int ring_index)
|
||||||
{
|
{
|
||||||
|
@ -1328,12 +1349,13 @@ radv_amdgpu_ctx_wait_idle(struct radeon_winsys_ctx *rwctx, enum ring_type ring_t
|
||||||
|
|
||||||
static void *
|
static void *
|
||||||
radv_amdgpu_cs_alloc_syncobj_chunk(struct radv_winsys_sem_counts *counts,
|
radv_amdgpu_cs_alloc_syncobj_chunk(struct radv_winsys_sem_counts *counts,
|
||||||
const uint32_t *syncobj_override,
|
const uint32_t *syncobj_override, uint32_t queue_syncobj,
|
||||||
struct drm_amdgpu_cs_chunk *chunk, int chunk_id)
|
struct drm_amdgpu_cs_chunk *chunk, int chunk_id)
|
||||||
{
|
{
|
||||||
const uint32_t *src = syncobj_override ? syncobj_override : counts->syncobj;
|
const uint32_t *src = syncobj_override ? syncobj_override : counts->syncobj;
|
||||||
|
unsigned count = counts->syncobj_count + 1;
|
||||||
struct drm_amdgpu_cs_chunk_sem *syncobj =
|
struct drm_amdgpu_cs_chunk_sem *syncobj =
|
||||||
malloc(sizeof(struct drm_amdgpu_cs_chunk_sem) * counts->syncobj_count);
|
malloc(sizeof(struct drm_amdgpu_cs_chunk_sem) * count);
|
||||||
if (!syncobj)
|
if (!syncobj)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
@ -1342,8 +1364,10 @@ radv_amdgpu_cs_alloc_syncobj_chunk(struct radv_winsys_sem_counts *counts,
|
||||||
sem->handle = src[i];
|
sem->handle = src[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
syncobj[counts->syncobj_count].handle = queue_syncobj;
|
||||||
|
|
||||||
chunk->chunk_id = chunk_id;
|
chunk->chunk_id = chunk_id;
|
||||||
chunk->length_dw = sizeof(struct drm_amdgpu_cs_chunk_sem) / 4 * counts->syncobj_count;
|
chunk->length_dw = sizeof(struct drm_amdgpu_cs_chunk_sem) / 4 * count;
|
||||||
chunk->chunk_data = (uint64_t)(uintptr_t)syncobj;
|
chunk->chunk_data = (uint64_t)(uintptr_t)syncobj;
|
||||||
return syncobj;
|
return syncobj;
|
||||||
}
|
}
|
||||||
|
@ -1351,12 +1375,13 @@ radv_amdgpu_cs_alloc_syncobj_chunk(struct radv_winsys_sem_counts *counts,
|
||||||
static void *
|
static void *
|
||||||
radv_amdgpu_cs_alloc_timeline_syncobj_chunk(struct radv_winsys_sem_counts *counts,
|
radv_amdgpu_cs_alloc_timeline_syncobj_chunk(struct radv_winsys_sem_counts *counts,
|
||||||
const uint32_t *syncobj_override,
|
const uint32_t *syncobj_override,
|
||||||
|
uint32_t queue_syncobj,
|
||||||
struct drm_amdgpu_cs_chunk *chunk, int chunk_id)
|
struct drm_amdgpu_cs_chunk *chunk, int chunk_id)
|
||||||
{
|
{
|
||||||
const uint32_t *src = syncobj_override ? syncobj_override : counts->syncobj;
|
const uint32_t *src = syncobj_override ? syncobj_override : counts->syncobj;
|
||||||
|
uint32_t count = counts->syncobj_count + counts->timeline_syncobj_count + 1;
|
||||||
struct drm_amdgpu_cs_chunk_syncobj *syncobj =
|
struct drm_amdgpu_cs_chunk_syncobj *syncobj =
|
||||||
malloc(sizeof(struct drm_amdgpu_cs_chunk_syncobj) *
|
malloc(sizeof(struct drm_amdgpu_cs_chunk_syncobj) * count);
|
||||||
(counts->syncobj_count + counts->timeline_syncobj_count));
|
|
||||||
if (!syncobj)
|
if (!syncobj)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
@ -1374,9 +1399,12 @@ radv_amdgpu_cs_alloc_timeline_syncobj_chunk(struct radv_winsys_sem_counts *count
|
||||||
sem->point = counts->points[i];
|
sem->point = counts->points[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
syncobj[count - 1].handle = queue_syncobj;
|
||||||
|
syncobj[count - 1].flags = 0;
|
||||||
|
syncobj[count - 1].point = 0;
|
||||||
|
|
||||||
chunk->chunk_id = chunk_id;
|
chunk->chunk_id = chunk_id;
|
||||||
chunk->length_dw = sizeof(struct drm_amdgpu_cs_chunk_syncobj) / 4 *
|
chunk->length_dw = sizeof(struct drm_amdgpu_cs_chunk_syncobj) / 4 * count;
|
||||||
(counts->syncobj_count + counts->timeline_syncobj_count);
|
|
||||||
chunk->chunk_data = (uint64_t)(uintptr_t)syncobj;
|
chunk->chunk_data = (uint64_t)(uintptr_t)syncobj;
|
||||||
return syncobj;
|
return syncobj;
|
||||||
}
|
}
|
||||||
|
@ -1494,6 +1522,10 @@ radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx, struct radv_amdgpu_cs_request
|
||||||
int i;
|
int i;
|
||||||
uint32_t bo_list = 0;
|
uint32_t bo_list = 0;
|
||||||
VkResult result = VK_SUCCESS;
|
VkResult result = VK_SUCCESS;
|
||||||
|
uint32_t queue_syncobj = radv_amdgpu_ctx_queue_syncobj(ctx, request->ip_type, request->ring);
|
||||||
|
|
||||||
|
if (!queue_syncobj)
|
||||||
|
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||||
|
|
||||||
size = request->number_of_ibs + 2 /* user fence */ + (!use_bo_list_create ? 1 : 0) + 3;
|
size = request->number_of_ibs + 2 /* user fence */ + (!use_bo_list_create ? 1 : 0) + 3;
|
||||||
|
|
||||||
|
@ -1537,19 +1569,19 @@ radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx, struct radv_amdgpu_cs_request
|
||||||
fence_info.offset = (request->ip_type * MAX_RINGS_PER_TYPE + request->ring) * sizeof(uint64_t);
|
fence_info.offset = (request->ip_type * MAX_RINGS_PER_TYPE + request->ring) * sizeof(uint64_t);
|
||||||
amdgpu_cs_chunk_fence_info_to_data(&fence_info, &chunk_data[i]);
|
amdgpu_cs_chunk_fence_info_to_data(&fence_info, &chunk_data[i]);
|
||||||
|
|
||||||
if ((sem_info->wait.syncobj_count || sem_info->wait.timeline_syncobj_count) &&
|
if (sem_info->cs_emit_wait) {
|
||||||
sem_info->cs_emit_wait) {
|
|
||||||
r = radv_amdgpu_cs_prepare_syncobjs(ctx->ws, &sem_info->wait, &in_syncobjs);
|
r = radv_amdgpu_cs_prepare_syncobjs(ctx->ws, &sem_info->wait, &in_syncobjs);
|
||||||
if (r)
|
if (r)
|
||||||
goto error_out;
|
goto error_out;
|
||||||
|
|
||||||
if (ctx->ws->info.has_timeline_syncobj) {
|
if (ctx->ws->info.has_timeline_syncobj) {
|
||||||
wait_syncobj = radv_amdgpu_cs_alloc_timeline_syncobj_chunk(
|
wait_syncobj = radv_amdgpu_cs_alloc_timeline_syncobj_chunk(
|
||||||
&sem_info->wait, in_syncobjs, &chunks[num_chunks],
|
&sem_info->wait, in_syncobjs, queue_syncobj, &chunks[num_chunks],
|
||||||
AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT);
|
AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT);
|
||||||
} else {
|
} else {
|
||||||
wait_syncobj = radv_amdgpu_cs_alloc_syncobj_chunk(
|
wait_syncobj =
|
||||||
&sem_info->wait, in_syncobjs, &chunks[num_chunks], AMDGPU_CHUNK_ID_SYNCOBJ_IN);
|
radv_amdgpu_cs_alloc_syncobj_chunk(&sem_info->wait, in_syncobjs, queue_syncobj,
|
||||||
|
&chunks[num_chunks], AMDGPU_CHUNK_ID_SYNCOBJ_IN);
|
||||||
}
|
}
|
||||||
if (!wait_syncobj) {
|
if (!wait_syncobj) {
|
||||||
result = VK_ERROR_OUT_OF_HOST_MEMORY;
|
result = VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||||
|
@ -1560,14 +1592,15 @@ radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx, struct radv_amdgpu_cs_request
|
||||||
sem_info->cs_emit_wait = false;
|
sem_info->cs_emit_wait = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((sem_info->signal.syncobj_count || sem_info->signal.timeline_syncobj_count) &&
|
if (sem_info->cs_emit_signal) {
|
||||||
sem_info->cs_emit_signal) {
|
|
||||||
if (ctx->ws->info.has_timeline_syncobj) {
|
if (ctx->ws->info.has_timeline_syncobj) {
|
||||||
signal_syncobj = radv_amdgpu_cs_alloc_timeline_syncobj_chunk(
|
signal_syncobj = radv_amdgpu_cs_alloc_timeline_syncobj_chunk(
|
||||||
&sem_info->signal, NULL, &chunks[num_chunks], AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL);
|
&sem_info->signal, NULL, queue_syncobj, &chunks[num_chunks],
|
||||||
|
AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL);
|
||||||
} else {
|
} else {
|
||||||
signal_syncobj = radv_amdgpu_cs_alloc_syncobj_chunk(
|
signal_syncobj =
|
||||||
&sem_info->signal, NULL, &chunks[num_chunks], AMDGPU_CHUNK_ID_SYNCOBJ_OUT);
|
radv_amdgpu_cs_alloc_syncobj_chunk(&sem_info->signal, NULL, queue_syncobj,
|
||||||
|
&chunks[num_chunks], AMDGPU_CHUNK_ID_SYNCOBJ_OUT);
|
||||||
}
|
}
|
||||||
if (!signal_syncobj) {
|
if (!signal_syncobj) {
|
||||||
result = VK_ERROR_OUT_OF_HOST_MEMORY;
|
result = VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||||
|
|
|
@ -49,6 +49,8 @@ struct radv_amdgpu_ctx {
|
||||||
struct radv_amdgpu_fence last_submission[AMDGPU_HW_IP_DMA + 1][MAX_RINGS_PER_TYPE];
|
struct radv_amdgpu_fence last_submission[AMDGPU_HW_IP_DMA + 1][MAX_RINGS_PER_TYPE];
|
||||||
|
|
||||||
struct radeon_winsys_bo *fence_bo;
|
struct radeon_winsys_bo *fence_bo;
|
||||||
|
|
||||||
|
uint32_t queue_syncobj[AMDGPU_HW_IP_DMA + 1][MAX_RINGS_PER_TYPE];
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline struct radv_amdgpu_ctx *
|
static inline struct radv_amdgpu_ctx *
|
||||||
|
|
Loading…
Reference in New Issue