radeonsi: set up IBs for preemption
- Execute cs_preamble_state as a separate IB with different flags. - Set the PREEMPT flag for the main IB. Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5798>
This commit is contained in:
parent
b8892bc818
commit
9e2113c6dc
|
@ -498,6 +498,16 @@ struct radeon_winsys {
|
||||||
struct radeon_cmdbuf *(*cs_add_parallel_compute_ib)(struct radeon_cmdbuf *cs,
|
struct radeon_cmdbuf *(*cs_add_parallel_compute_ib)(struct radeon_cmdbuf *cs,
|
||||||
bool uses_gds_ordered_append);
|
bool uses_gds_ordered_append);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set up and enable mid command buffer preemption for the command stream.
|
||||||
|
*
|
||||||
|
* \param cs Command stream
|
||||||
|
* \param preamble_ib Non-preemptible preamble IB for the context.
|
||||||
|
* \param preamble_num_dw Number of dwords in the preamble IB.
|
||||||
|
*/
|
||||||
|
bool (*cs_setup_preemption)(struct radeon_cmdbuf *cs, const uint32_t *preamble_ib,
|
||||||
|
unsigned preamble_num_dw);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Destroy a command stream.
|
* Destroy a command stream.
|
||||||
*
|
*
|
||||||
|
|
|
@ -179,12 +179,15 @@ void si_init_cp_reg_shadowing(struct si_context *sctx)
|
||||||
|
|
||||||
/* The register values are shadowed, so we won't need to set them again. */
|
/* The register values are shadowed, so we won't need to set them again. */
|
||||||
si_pm4_free_state(sctx, sctx->cs_preamble_state, ~0);
|
si_pm4_free_state(sctx, sctx->cs_preamble_state, ~0);
|
||||||
|
sctx->cs_preamble_state = NULL;
|
||||||
/* Execute the shadowing preamble as cs_preamble, which will
|
|
||||||
* load register values from memory.
|
|
||||||
*/
|
|
||||||
sctx->cs_preamble_state = shadowing_preamble;
|
|
||||||
|
|
||||||
si_set_tracked_regs_to_clear_state(sctx);
|
si_set_tracked_regs_to_clear_state(sctx);
|
||||||
|
|
||||||
|
/* Setup preemption. The shadowing preamble will be executed as a preamble IB,
|
||||||
|
* which will load register values from memory on a context switch.
|
||||||
|
*/
|
||||||
|
sctx->ws->cs_setup_preemption(sctx->gfx_cs, shadowing_preamble->pm4,
|
||||||
|
shadowing_preamble->ndw);
|
||||||
|
si_pm4_free_state(sctx, shadowing_preamble, ~0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1034,6 +1034,60 @@ amdgpu_cs_add_parallel_compute_ib(struct radeon_cmdbuf *ib,
|
||||||
return &cs->compute_ib.base;
|
return &cs->compute_ib.base;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
amdgpu_cs_setup_preemption(struct radeon_cmdbuf *rcs, const uint32_t *preamble_ib,
|
||||||
|
unsigned preamble_num_dw)
|
||||||
|
{
|
||||||
|
struct amdgpu_ib *ib = amdgpu_ib(rcs);
|
||||||
|
struct amdgpu_cs *cs = amdgpu_cs_from_ib(ib);
|
||||||
|
struct amdgpu_winsys *ws = cs->ctx->ws;
|
||||||
|
struct amdgpu_cs_context *csc[2] = {&cs->csc1, &cs->csc2};
|
||||||
|
unsigned size = align(preamble_num_dw * 4, ws->info.ib_alignment);
|
||||||
|
struct pb_buffer *preamble_bo;
|
||||||
|
uint32_t *map;
|
||||||
|
|
||||||
|
/* Create the preamble IB buffer. */
|
||||||
|
preamble_bo = amdgpu_bo_create(ws, size, ws->info.ib_alignment,
|
||||||
|
RADEON_DOMAIN_VRAM,
|
||||||
|
RADEON_FLAG_NO_INTERPROCESS_SHARING |
|
||||||
|
RADEON_FLAG_GTT_WC |
|
||||||
|
RADEON_FLAG_READ_ONLY);
|
||||||
|
if (!preamble_bo)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
map = (uint32_t*)amdgpu_bo_map(preamble_bo, NULL,
|
||||||
|
PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
|
||||||
|
if (!map) {
|
||||||
|
pb_reference(&preamble_bo, NULL);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Upload the preamble IB. */
|
||||||
|
memcpy(map, preamble_ib, preamble_num_dw * 4);
|
||||||
|
|
||||||
|
/* Pad the IB. */
|
||||||
|
uint32_t ib_pad_dw_mask = ws->info.ib_pad_dw_mask[cs->ring_type];
|
||||||
|
while (preamble_num_dw & ib_pad_dw_mask)
|
||||||
|
map[preamble_num_dw++] = PKT3_NOP_PAD;
|
||||||
|
amdgpu_bo_unmap(preamble_bo);
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < 2; i++) {
|
||||||
|
csc[i]->ib[IB_PREAMBLE] = csc[i]->ib[IB_MAIN];
|
||||||
|
csc[i]->ib[IB_PREAMBLE].flags |= AMDGPU_IB_FLAG_PREAMBLE;
|
||||||
|
csc[i]->ib[IB_PREAMBLE].va_start = amdgpu_winsys_bo(preamble_bo)->va;
|
||||||
|
csc[i]->ib[IB_PREAMBLE].ib_bytes = preamble_num_dw * 4;
|
||||||
|
|
||||||
|
csc[i]->ib[IB_MAIN].flags |= AMDGPU_IB_FLAG_PREEMPT;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(!cs->preamble_ib_bo);
|
||||||
|
cs->preamble_ib_bo = preamble_bo;
|
||||||
|
|
||||||
|
amdgpu_cs_add_buffer(rcs, cs->preamble_ib_bo, RADEON_USAGE_READ, 0,
|
||||||
|
RADEON_PRIO_IB1);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static bool amdgpu_cs_validate(struct radeon_cmdbuf *rcs)
|
static bool amdgpu_cs_validate(struct radeon_cmdbuf *rcs)
|
||||||
{
|
{
|
||||||
return true;
|
return true;
|
||||||
|
@ -1446,7 +1500,7 @@ void amdgpu_cs_submit_ib(void *job, int thread_index)
|
||||||
if (acs->stop_exec_on_failure && acs->ctx->num_rejected_cs) {
|
if (acs->stop_exec_on_failure && acs->ctx->num_rejected_cs) {
|
||||||
r = -ECANCELED;
|
r = -ECANCELED;
|
||||||
} else {
|
} else {
|
||||||
struct drm_amdgpu_cs_chunk chunks[6];
|
struct drm_amdgpu_cs_chunk chunks[7];
|
||||||
unsigned num_chunks = 0;
|
unsigned num_chunks = 0;
|
||||||
|
|
||||||
/* BO list */
|
/* BO list */
|
||||||
|
@ -1589,6 +1643,14 @@ void amdgpu_cs_submit_ib(void *job, int thread_index)
|
||||||
num_chunks++;
|
num_chunks++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* IB */
|
||||||
|
if (cs->ib[IB_PREAMBLE].ib_bytes) {
|
||||||
|
chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_IB;
|
||||||
|
chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
|
||||||
|
chunks[num_chunks].chunk_data = (uintptr_t)&cs->ib[IB_PREAMBLE];
|
||||||
|
num_chunks++;
|
||||||
|
}
|
||||||
|
|
||||||
/* IB */
|
/* IB */
|
||||||
cs->ib[IB_MAIN].ib_bytes *= 4; /* Convert from dwords to bytes. */
|
cs->ib[IB_MAIN].ib_bytes *= 4; /* Convert from dwords to bytes. */
|
||||||
chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_IB;
|
chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_IB;
|
||||||
|
@ -1596,10 +1658,13 @@ void amdgpu_cs_submit_ib(void *job, int thread_index)
|
||||||
chunks[num_chunks].chunk_data = (uintptr_t)&cs->ib[IB_MAIN];
|
chunks[num_chunks].chunk_data = (uintptr_t)&cs->ib[IB_MAIN];
|
||||||
num_chunks++;
|
num_chunks++;
|
||||||
|
|
||||||
if (ws->secure && cs->secure)
|
if (ws->secure && cs->secure) {
|
||||||
|
cs->ib[IB_PREAMBLE].flags |= AMDGPU_IB_FLAGS_SECURE;
|
||||||
cs->ib[IB_MAIN].flags |= AMDGPU_IB_FLAGS_SECURE;
|
cs->ib[IB_MAIN].flags |= AMDGPU_IB_FLAGS_SECURE;
|
||||||
else
|
} else {
|
||||||
|
cs->ib[IB_PREAMBLE].flags &= ~AMDGPU_IB_FLAGS_SECURE;
|
||||||
cs->ib[IB_MAIN].flags &= ~AMDGPU_IB_FLAGS_SECURE;
|
cs->ib[IB_MAIN].flags &= ~AMDGPU_IB_FLAGS_SECURE;
|
||||||
|
}
|
||||||
|
|
||||||
assert(num_chunks <= ARRAY_SIZE(chunks));
|
assert(num_chunks <= ARRAY_SIZE(chunks));
|
||||||
|
|
||||||
|
@ -1784,6 +1849,11 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs,
|
||||||
if (cs->compute_ib.ib_mapped)
|
if (cs->compute_ib.ib_mapped)
|
||||||
amdgpu_get_new_ib(ws, cs, IB_PARALLEL_COMPUTE);
|
amdgpu_get_new_ib(ws, cs, IB_PARALLEL_COMPUTE);
|
||||||
|
|
||||||
|
if (cs->preamble_ib_bo) {
|
||||||
|
amdgpu_cs_add_buffer(rcs, cs->preamble_ib_bo, RADEON_USAGE_READ, 0,
|
||||||
|
RADEON_PRIO_IB1);
|
||||||
|
}
|
||||||
|
|
||||||
cs->main.base.used_gart = 0;
|
cs->main.base.used_gart = 0;
|
||||||
cs->main.base.used_vram = 0;
|
cs->main.base.used_vram = 0;
|
||||||
|
|
||||||
|
@ -1802,6 +1872,7 @@ static void amdgpu_cs_destroy(struct radeon_cmdbuf *rcs)
|
||||||
amdgpu_cs_sync_flush(rcs);
|
amdgpu_cs_sync_flush(rcs);
|
||||||
util_queue_fence_destroy(&cs->flush_completed);
|
util_queue_fence_destroy(&cs->flush_completed);
|
||||||
p_atomic_dec(&cs->ctx->ws->num_cs);
|
p_atomic_dec(&cs->ctx->ws->num_cs);
|
||||||
|
pb_reference(&cs->preamble_ib_bo, NULL);
|
||||||
pb_reference(&cs->main.big_ib_buffer, NULL);
|
pb_reference(&cs->main.big_ib_buffer, NULL);
|
||||||
FREE(cs->main.base.prev);
|
FREE(cs->main.base.prev);
|
||||||
pb_reference(&cs->compute_ib.big_ib_buffer, NULL);
|
pb_reference(&cs->compute_ib.big_ib_buffer, NULL);
|
||||||
|
@ -1829,6 +1900,7 @@ void amdgpu_cs_init_functions(struct amdgpu_screen_winsys *ws)
|
||||||
ws->base.ctx_query_reset_status = amdgpu_ctx_query_reset_status;
|
ws->base.ctx_query_reset_status = amdgpu_ctx_query_reset_status;
|
||||||
ws->base.cs_create = amdgpu_cs_create;
|
ws->base.cs_create = amdgpu_cs_create;
|
||||||
ws->base.cs_add_parallel_compute_ib = amdgpu_cs_add_parallel_compute_ib;
|
ws->base.cs_add_parallel_compute_ib = amdgpu_cs_add_parallel_compute_ib;
|
||||||
|
ws->base.cs_setup_preemption = amdgpu_cs_setup_preemption;
|
||||||
ws->base.cs_destroy = amdgpu_cs_destroy;
|
ws->base.cs_destroy = amdgpu_cs_destroy;
|
||||||
ws->base.cs_add_buffer = amdgpu_cs_add_buffer;
|
ws->base.cs_add_buffer = amdgpu_cs_add_buffer;
|
||||||
ws->base.cs_validate = amdgpu_cs_validate;
|
ws->base.cs_validate = amdgpu_cs_validate;
|
||||||
|
|
|
@ -56,6 +56,7 @@ struct amdgpu_cs_buffer {
|
||||||
};
|
};
|
||||||
|
|
||||||
enum ib_type {
|
enum ib_type {
|
||||||
|
IB_PREAMBLE,
|
||||||
IB_MAIN,
|
IB_MAIN,
|
||||||
IB_PARALLEL_COMPUTE,
|
IB_PARALLEL_COMPUTE,
|
||||||
IB_NUM,
|
IB_NUM,
|
||||||
|
@ -151,6 +152,7 @@ struct amdgpu_cs {
|
||||||
|
|
||||||
struct util_queue_fence flush_completed;
|
struct util_queue_fence flush_completed;
|
||||||
struct pipe_fence_handle *next_fence;
|
struct pipe_fence_handle *next_fence;
|
||||||
|
struct pb_buffer *preamble_ib_bo;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct amdgpu_fence {
|
struct amdgpu_fence {
|
||||||
|
|
Loading…
Reference in New Issue