radeonsi: set up IBs for preemption
- Execute cs_preamble_state as a separate IB with different flags. - Set the PREEMPT flag for the main IB. Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5798>
This commit is contained in:
parent
b8892bc818
commit
9e2113c6dc
|
@ -498,6 +498,16 @@ struct radeon_winsys {
|
|||
struct radeon_cmdbuf *(*cs_add_parallel_compute_ib)(struct radeon_cmdbuf *cs,
|
||||
bool uses_gds_ordered_append);
|
||||
|
||||
/**
|
||||
* Set up and enable mid command buffer preemption for the command stream.
|
||||
*
|
||||
* \param cs Command stream
|
||||
* \param preamble_ib Non-preemptible preamble IB for the context.
|
||||
* \param preamble_num_dw Number of dwords in the preamble IB.
|
||||
*/
|
||||
bool (*cs_setup_preemption)(struct radeon_cmdbuf *cs, const uint32_t *preamble_ib,
|
||||
unsigned preamble_num_dw);
|
||||
|
||||
/**
|
||||
* Destroy a command stream.
|
||||
*
|
||||
|
|
|
@ -179,12 +179,15 @@ void si_init_cp_reg_shadowing(struct si_context *sctx)
|
|||
|
||||
/* The register values are shadowed, so we won't need to set them again. */
|
||||
si_pm4_free_state(sctx, sctx->cs_preamble_state, ~0);
|
||||
|
||||
/* Execute the shadowing preamble as cs_preamble, which will
|
||||
* load register values from memory.
|
||||
*/
|
||||
sctx->cs_preamble_state = shadowing_preamble;
|
||||
sctx->cs_preamble_state = NULL;
|
||||
|
||||
si_set_tracked_regs_to_clear_state(sctx);
|
||||
|
||||
/* Setup preemption. The shadowing preamble will be executed as a preamble IB,
|
||||
* which will load register values from memory on a context switch.
|
||||
*/
|
||||
sctx->ws->cs_setup_preemption(sctx->gfx_cs, shadowing_preamble->pm4,
|
||||
shadowing_preamble->ndw);
|
||||
si_pm4_free_state(sctx, shadowing_preamble, ~0);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1034,6 +1034,60 @@ amdgpu_cs_add_parallel_compute_ib(struct radeon_cmdbuf *ib,
|
|||
return &cs->compute_ib.base;
|
||||
}
|
||||
|
||||
static bool
|
||||
amdgpu_cs_setup_preemption(struct radeon_cmdbuf *rcs, const uint32_t *preamble_ib,
|
||||
unsigned preamble_num_dw)
|
||||
{
|
||||
struct amdgpu_ib *ib = amdgpu_ib(rcs);
|
||||
struct amdgpu_cs *cs = amdgpu_cs_from_ib(ib);
|
||||
struct amdgpu_winsys *ws = cs->ctx->ws;
|
||||
struct amdgpu_cs_context *csc[2] = {&cs->csc1, &cs->csc2};
|
||||
unsigned size = align(preamble_num_dw * 4, ws->info.ib_alignment);
|
||||
struct pb_buffer *preamble_bo;
|
||||
uint32_t *map;
|
||||
|
||||
/* Create the preamble IB buffer. */
|
||||
preamble_bo = amdgpu_bo_create(ws, size, ws->info.ib_alignment,
|
||||
RADEON_DOMAIN_VRAM,
|
||||
RADEON_FLAG_NO_INTERPROCESS_SHARING |
|
||||
RADEON_FLAG_GTT_WC |
|
||||
RADEON_FLAG_READ_ONLY);
|
||||
if (!preamble_bo)
|
||||
return false;
|
||||
|
||||
map = (uint32_t*)amdgpu_bo_map(preamble_bo, NULL,
|
||||
PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
|
||||
if (!map) {
|
||||
pb_reference(&preamble_bo, NULL);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Upload the preamble IB. */
|
||||
memcpy(map, preamble_ib, preamble_num_dw * 4);
|
||||
|
||||
/* Pad the IB. */
|
||||
uint32_t ib_pad_dw_mask = ws->info.ib_pad_dw_mask[cs->ring_type];
|
||||
while (preamble_num_dw & ib_pad_dw_mask)
|
||||
map[preamble_num_dw++] = PKT3_NOP_PAD;
|
||||
amdgpu_bo_unmap(preamble_bo);
|
||||
|
||||
for (unsigned i = 0; i < 2; i++) {
|
||||
csc[i]->ib[IB_PREAMBLE] = csc[i]->ib[IB_MAIN];
|
||||
csc[i]->ib[IB_PREAMBLE].flags |= AMDGPU_IB_FLAG_PREAMBLE;
|
||||
csc[i]->ib[IB_PREAMBLE].va_start = amdgpu_winsys_bo(preamble_bo)->va;
|
||||
csc[i]->ib[IB_PREAMBLE].ib_bytes = preamble_num_dw * 4;
|
||||
|
||||
csc[i]->ib[IB_MAIN].flags |= AMDGPU_IB_FLAG_PREEMPT;
|
||||
}
|
||||
|
||||
assert(!cs->preamble_ib_bo);
|
||||
cs->preamble_ib_bo = preamble_bo;
|
||||
|
||||
amdgpu_cs_add_buffer(rcs, cs->preamble_ib_bo, RADEON_USAGE_READ, 0,
|
||||
RADEON_PRIO_IB1);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool amdgpu_cs_validate(struct radeon_cmdbuf *rcs)
|
||||
{
|
||||
return true;
|
||||
|
@ -1446,7 +1500,7 @@ void amdgpu_cs_submit_ib(void *job, int thread_index)
|
|||
if (acs->stop_exec_on_failure && acs->ctx->num_rejected_cs) {
|
||||
r = -ECANCELED;
|
||||
} else {
|
||||
struct drm_amdgpu_cs_chunk chunks[6];
|
||||
struct drm_amdgpu_cs_chunk chunks[7];
|
||||
unsigned num_chunks = 0;
|
||||
|
||||
/* BO list */
|
||||
|
@ -1589,6 +1643,14 @@ void amdgpu_cs_submit_ib(void *job, int thread_index)
|
|||
num_chunks++;
|
||||
}
|
||||
|
||||
/* IB */
|
||||
if (cs->ib[IB_PREAMBLE].ib_bytes) {
|
||||
chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_IB;
|
||||
chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
|
||||
chunks[num_chunks].chunk_data = (uintptr_t)&cs->ib[IB_PREAMBLE];
|
||||
num_chunks++;
|
||||
}
|
||||
|
||||
/* IB */
|
||||
cs->ib[IB_MAIN].ib_bytes *= 4; /* Convert from dwords to bytes. */
|
||||
chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_IB;
|
||||
|
@ -1596,10 +1658,13 @@ void amdgpu_cs_submit_ib(void *job, int thread_index)
|
|||
chunks[num_chunks].chunk_data = (uintptr_t)&cs->ib[IB_MAIN];
|
||||
num_chunks++;
|
||||
|
||||
if (ws->secure && cs->secure)
|
||||
if (ws->secure && cs->secure) {
|
||||
cs->ib[IB_PREAMBLE].flags |= AMDGPU_IB_FLAGS_SECURE;
|
||||
cs->ib[IB_MAIN].flags |= AMDGPU_IB_FLAGS_SECURE;
|
||||
else
|
||||
} else {
|
||||
cs->ib[IB_PREAMBLE].flags &= ~AMDGPU_IB_FLAGS_SECURE;
|
||||
cs->ib[IB_MAIN].flags &= ~AMDGPU_IB_FLAGS_SECURE;
|
||||
}
|
||||
|
||||
assert(num_chunks <= ARRAY_SIZE(chunks));
|
||||
|
||||
|
@ -1784,6 +1849,11 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs,
|
|||
if (cs->compute_ib.ib_mapped)
|
||||
amdgpu_get_new_ib(ws, cs, IB_PARALLEL_COMPUTE);
|
||||
|
||||
if (cs->preamble_ib_bo) {
|
||||
amdgpu_cs_add_buffer(rcs, cs->preamble_ib_bo, RADEON_USAGE_READ, 0,
|
||||
RADEON_PRIO_IB1);
|
||||
}
|
||||
|
||||
cs->main.base.used_gart = 0;
|
||||
cs->main.base.used_vram = 0;
|
||||
|
||||
|
@ -1802,6 +1872,7 @@ static void amdgpu_cs_destroy(struct radeon_cmdbuf *rcs)
|
|||
amdgpu_cs_sync_flush(rcs);
|
||||
util_queue_fence_destroy(&cs->flush_completed);
|
||||
p_atomic_dec(&cs->ctx->ws->num_cs);
|
||||
pb_reference(&cs->preamble_ib_bo, NULL);
|
||||
pb_reference(&cs->main.big_ib_buffer, NULL);
|
||||
FREE(cs->main.base.prev);
|
||||
pb_reference(&cs->compute_ib.big_ib_buffer, NULL);
|
||||
|
@ -1829,6 +1900,7 @@ void amdgpu_cs_init_functions(struct amdgpu_screen_winsys *ws)
|
|||
ws->base.ctx_query_reset_status = amdgpu_ctx_query_reset_status;
|
||||
ws->base.cs_create = amdgpu_cs_create;
|
||||
ws->base.cs_add_parallel_compute_ib = amdgpu_cs_add_parallel_compute_ib;
|
||||
ws->base.cs_setup_preemption = amdgpu_cs_setup_preemption;
|
||||
ws->base.cs_destroy = amdgpu_cs_destroy;
|
||||
ws->base.cs_add_buffer = amdgpu_cs_add_buffer;
|
||||
ws->base.cs_validate = amdgpu_cs_validate;
|
||||
|
|
|
@ -56,6 +56,7 @@ struct amdgpu_cs_buffer {
|
|||
};
|
||||
|
||||
enum ib_type {
|
||||
IB_PREAMBLE,
|
||||
IB_MAIN,
|
||||
IB_PARALLEL_COMPUTE,
|
||||
IB_NUM,
|
||||
|
@ -151,6 +152,7 @@ struct amdgpu_cs {
|
|||
|
||||
struct util_queue_fence flush_completed;
|
||||
struct pipe_fence_handle *next_fence;
|
||||
struct pb_buffer *preamble_ib_bo;
|
||||
};
|
||||
|
||||
struct amdgpu_fence {
|
||||
|
|
Loading…
Reference in New Issue