From 1fdc3b0fdec84893618e59187b58da92311b8d28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 14 May 2022 20:51:47 -0400 Subject: [PATCH] radeonsi: move CS preamble emission into the winsys The preamble will be skipped by the kernel if there is no context switch. Reviewed-by: Mihai Preda Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/si_gfx_cs.c | 9 +++++++-- src/gallium/drivers/radeonsi/si_pipe.h | 1 + src/gallium/drivers/radeonsi/si_state_shaders.cpp | 2 ++ src/gallium/include/winsys/radeon_winsys.h | 13 +++++++++++++ src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 8 ++++++++ src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 8 ++++++++ 6 files changed, 39 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index 6bebfffccb9..75e9c768816 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -436,8 +436,13 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs) si_pm4_reset_emitted(ctx, first_cs); /* The CS initialization should be emitted before everything else. */ - if (ctx->cs_preamble_state) - si_pm4_emit(ctx, unlikely(is_secure) ? ctx->cs_preamble_state_tmz : ctx->cs_preamble_state); + if (ctx->cs_preamble_state) { + struct si_pm4_state *preamble = is_secure ? ctx->cs_preamble_state_tmz : + ctx->cs_preamble_state; + ctx->ws->cs_set_preamble(&ctx->gfx_cs, preamble->pm4, preamble->ndw, + preamble != ctx->last_preamble); + ctx->last_preamble = preamble; + } if (ctx->queued.named.ls) ctx->prefetch_L2_mask |= SI_PREFETCH_LS; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 8dcdb6a0d99..a3d8da2ea4c 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1050,6 +1050,7 @@ struct si_context { struct pipe_scissor_state window_rectangles[4]; /* Precomputed states. */ + struct si_pm4_state *last_preamble; struct si_pm4_state *cs_preamble_state; struct si_pm4_state *cs_preamble_state_tmz; uint16_t gs_ring_state_dw_offset; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index 4e824f39d28..5b5e72129e8 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -3853,6 +3853,7 @@ bool si_update_gs_ring_buffers(struct si_context *sctx) } /* Flush the context to re-emit both cs_preamble states. */ + sctx->last_preamble = NULL; /* flag that the preamble has changed */ sctx->initial_gfx_cs_size = 0; /* force flush */ si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); @@ -4127,6 +4128,7 @@ void si_init_tess_factor_ring(struct si_context *sctx) /* Flush the context to re-emit the cs_preamble state. * This is done only once in a lifetime of a context. */ + sctx->last_preamble = NULL; /* flag that the preamble has changed */ sctx->initial_gfx_cs_size = 0; /* force flush */ si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); } diff --git a/src/gallium/include/winsys/radeon_winsys.h b/src/gallium/include/winsys/radeon_winsys.h index 925223e16ea..c3e218f0d4a 100644 --- a/src/gallium/include/winsys/radeon_winsys.h +++ b/src/gallium/include/winsys/radeon_winsys.h @@ -496,6 +496,19 @@ struct radeon_winsys { struct pipe_fence_handle **fence), void *flush_ctx, bool stop_exec_on_failure); + /** + * Set or change the CS preamble, which is a sequence of packets that is executed before + * the command buffer. If the winsys doesn't support preambles, the packets are inserted + * into the command buffer. + * + * \param cs Command stream + * \param preamble_ib Preamble IB for the context. + * \param preamble_num_dw Number of dwords in the preamble IB. + * \param preamble_changed Whether the preamble changed or is the same as the last one. + */ + void (*cs_set_preamble)(struct radeon_cmdbuf *cs, const uint32_t *preamble_ib, + unsigned preamble_num_dw, bool preamble_changed); + /** * Set up and enable mid command buffer preemption for the command stream. * diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index 4d3d269bb17..94e6c6f0c41 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -1003,6 +1003,13 @@ amdgpu_cs_create(struct radeon_cmdbuf *rcs, return true; } +static void amdgpu_cs_set_preamble(struct radeon_cmdbuf *cs, const uint32_t *preamble_ib, + unsigned preamble_num_dw, bool preamble_changed) +{ + /* TODO: implement this properly */ + radeon_emit_array(cs, preamble_ib, preamble_num_dw); +} + static bool amdgpu_cs_setup_preemption(struct radeon_cmdbuf *rcs, const uint32_t *preamble_ib, unsigned preamble_num_dw) @@ -1811,6 +1818,7 @@ void amdgpu_cs_init_functions(struct amdgpu_screen_winsys *ws) ws->base.ctx_destroy = amdgpu_ctx_destroy; ws->base.ctx_query_reset_status = amdgpu_ctx_query_reset_status; ws->base.cs_create = amdgpu_cs_create; + ws->base.cs_set_preamble = amdgpu_cs_set_preamble; ws->base.cs_setup_preemption = amdgpu_cs_setup_preemption; ws->base.cs_destroy = amdgpu_cs_destroy; ws->base.cs_add_buffer = amdgpu_cs_add_buffer; diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index 9199654ef00..2e3991aa076 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -215,6 +215,13 @@ radeon_drm_cs_create(struct radeon_cmdbuf *rcs, return true; } +static void radeon_drm_cs_set_preamble(struct radeon_cmdbuf *cs, const uint32_t *preamble_ib, + unsigned preamble_num_dw, bool preamble_changed) +{ + /* The radeon kernel driver doesn't support preambles. */ + radeon_emit_array(cs, preamble_ib, preamble_num_dw); +} + int radeon_lookup_buffer(struct radeon_cs_context *csc, struct radeon_bo *bo) { unsigned hash = bo->hash & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1); @@ -853,6 +860,7 @@ void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws) ws->base.ctx_destroy = radeon_drm_ctx_destroy; ws->base.ctx_query_reset_status = radeon_drm_ctx_query_reset_status; ws->base.cs_create = radeon_drm_cs_create; + ws->base.cs_set_preamble = radeon_drm_cs_set_preamble; ws->base.cs_destroy = radeon_drm_cs_destroy; ws->base.cs_add_buffer = radeon_drm_cs_add_buffer; ws->base.cs_lookup_buffer = radeon_drm_cs_lookup_buffer;