radeonsi: set up IBs for preemption

- Execute cs_preamble_state as a separate IB with different flags.
- Set the PREEMPT flag for the main IB.

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5798>
This commit is contained in:
Marek Olšák 2020-06-18 01:06:12 -04:00
parent b8892bc818
commit 9e2113c6dc
4 changed files with 95 additions and 8 deletions

View File

@ -498,6 +498,16 @@ struct radeon_winsys {
struct radeon_cmdbuf *(*cs_add_parallel_compute_ib)(struct radeon_cmdbuf *cs,
bool uses_gds_ordered_append);
/**
* Set up and enable mid command buffer preemption for the command stream.
*
* \param cs Command stream
* \param preamble_ib Non-preemptible preamble IB for the context.
* \param preamble_num_dw Number of dwords in the preamble IB.
*/
bool (*cs_setup_preemption)(struct radeon_cmdbuf *cs, const uint32_t *preamble_ib,
unsigned preamble_num_dw);
/**
* Destroy a command stream.
*

View File

@ -179,12 +179,15 @@ void si_init_cp_reg_shadowing(struct si_context *sctx)
/* The register values are shadowed, so we won't need to set them again. */
si_pm4_free_state(sctx, sctx->cs_preamble_state, ~0);
/* Execute the shadowing preamble as cs_preamble, which will
* load register values from memory.
*/
sctx->cs_preamble_state = shadowing_preamble;
sctx->cs_preamble_state = NULL;
si_set_tracked_regs_to_clear_state(sctx);
/* Setup preemption. The shadowing preamble will be executed as a preamble IB,
* which will load register values from memory on a context switch.
*/
sctx->ws->cs_setup_preemption(sctx->gfx_cs, shadowing_preamble->pm4,
shadowing_preamble->ndw);
si_pm4_free_state(sctx, shadowing_preamble, ~0);
}
}

View File

@ -1034,6 +1034,60 @@ amdgpu_cs_add_parallel_compute_ib(struct radeon_cmdbuf *ib,
return &cs->compute_ib.base;
}
static bool
amdgpu_cs_setup_preemption(struct radeon_cmdbuf *rcs, const uint32_t *preamble_ib,
unsigned preamble_num_dw)
{
struct amdgpu_ib *ib = amdgpu_ib(rcs);
struct amdgpu_cs *cs = amdgpu_cs_from_ib(ib);
struct amdgpu_winsys *ws = cs->ctx->ws;
struct amdgpu_cs_context *csc[2] = {&cs->csc1, &cs->csc2};
unsigned size = align(preamble_num_dw * 4, ws->info.ib_alignment);
struct pb_buffer *preamble_bo;
uint32_t *map;
/* Create the preamble IB buffer. */
preamble_bo = amdgpu_bo_create(ws, size, ws->info.ib_alignment,
RADEON_DOMAIN_VRAM,
RADEON_FLAG_NO_INTERPROCESS_SHARING |
RADEON_FLAG_GTT_WC |
RADEON_FLAG_READ_ONLY);
if (!preamble_bo)
return false;
map = (uint32_t*)amdgpu_bo_map(preamble_bo, NULL,
PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
if (!map) {
pb_reference(&preamble_bo, NULL);
return false;
}
/* Upload the preamble IB. */
memcpy(map, preamble_ib, preamble_num_dw * 4);
/* Pad the IB. */
uint32_t ib_pad_dw_mask = ws->info.ib_pad_dw_mask[cs->ring_type];
while (preamble_num_dw & ib_pad_dw_mask)
map[preamble_num_dw++] = PKT3_NOP_PAD;
amdgpu_bo_unmap(preamble_bo);
for (unsigned i = 0; i < 2; i++) {
csc[i]->ib[IB_PREAMBLE] = csc[i]->ib[IB_MAIN];
csc[i]->ib[IB_PREAMBLE].flags |= AMDGPU_IB_FLAG_PREAMBLE;
csc[i]->ib[IB_PREAMBLE].va_start = amdgpu_winsys_bo(preamble_bo)->va;
csc[i]->ib[IB_PREAMBLE].ib_bytes = preamble_num_dw * 4;
csc[i]->ib[IB_MAIN].flags |= AMDGPU_IB_FLAG_PREEMPT;
}
assert(!cs->preamble_ib_bo);
cs->preamble_ib_bo = preamble_bo;
amdgpu_cs_add_buffer(rcs, cs->preamble_ib_bo, RADEON_USAGE_READ, 0,
RADEON_PRIO_IB1);
return true;
}
static bool amdgpu_cs_validate(struct radeon_cmdbuf *rcs)
{
return true;
@ -1446,7 +1500,7 @@ void amdgpu_cs_submit_ib(void *job, int thread_index)
if (acs->stop_exec_on_failure && acs->ctx->num_rejected_cs) {
r = -ECANCELED;
} else {
struct drm_amdgpu_cs_chunk chunks[6];
struct drm_amdgpu_cs_chunk chunks[7];
unsigned num_chunks = 0;
/* BO list */
@ -1589,6 +1643,14 @@ void amdgpu_cs_submit_ib(void *job, int thread_index)
num_chunks++;
}
/* IB */
if (cs->ib[IB_PREAMBLE].ib_bytes) {
chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_IB;
chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
chunks[num_chunks].chunk_data = (uintptr_t)&cs->ib[IB_PREAMBLE];
num_chunks++;
}
/* IB */
cs->ib[IB_MAIN].ib_bytes *= 4; /* Convert from dwords to bytes. */
chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_IB;
@ -1596,10 +1658,13 @@ void amdgpu_cs_submit_ib(void *job, int thread_index)
chunks[num_chunks].chunk_data = (uintptr_t)&cs->ib[IB_MAIN];
num_chunks++;
if (ws->secure && cs->secure)
if (ws->secure && cs->secure) {
cs->ib[IB_PREAMBLE].flags |= AMDGPU_IB_FLAGS_SECURE;
cs->ib[IB_MAIN].flags |= AMDGPU_IB_FLAGS_SECURE;
else
} else {
cs->ib[IB_PREAMBLE].flags &= ~AMDGPU_IB_FLAGS_SECURE;
cs->ib[IB_MAIN].flags &= ~AMDGPU_IB_FLAGS_SECURE;
}
assert(num_chunks <= ARRAY_SIZE(chunks));
@ -1784,6 +1849,11 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs,
if (cs->compute_ib.ib_mapped)
amdgpu_get_new_ib(ws, cs, IB_PARALLEL_COMPUTE);
if (cs->preamble_ib_bo) {
amdgpu_cs_add_buffer(rcs, cs->preamble_ib_bo, RADEON_USAGE_READ, 0,
RADEON_PRIO_IB1);
}
cs->main.base.used_gart = 0;
cs->main.base.used_vram = 0;
@ -1802,6 +1872,7 @@ static void amdgpu_cs_destroy(struct radeon_cmdbuf *rcs)
amdgpu_cs_sync_flush(rcs);
util_queue_fence_destroy(&cs->flush_completed);
p_atomic_dec(&cs->ctx->ws->num_cs);
pb_reference(&cs->preamble_ib_bo, NULL);
pb_reference(&cs->main.big_ib_buffer, NULL);
FREE(cs->main.base.prev);
pb_reference(&cs->compute_ib.big_ib_buffer, NULL);
@ -1829,6 +1900,7 @@ void amdgpu_cs_init_functions(struct amdgpu_screen_winsys *ws)
ws->base.ctx_query_reset_status = amdgpu_ctx_query_reset_status;
ws->base.cs_create = amdgpu_cs_create;
ws->base.cs_add_parallel_compute_ib = amdgpu_cs_add_parallel_compute_ib;
ws->base.cs_setup_preemption = amdgpu_cs_setup_preemption;
ws->base.cs_destroy = amdgpu_cs_destroy;
ws->base.cs_add_buffer = amdgpu_cs_add_buffer;
ws->base.cs_validate = amdgpu_cs_validate;

View File

@ -56,6 +56,7 @@ struct amdgpu_cs_buffer {
};
enum ib_type {
IB_PREAMBLE,
IB_MAIN,
IB_PARALLEL_COMPUTE,
IB_NUM,
@ -151,6 +152,7 @@ struct amdgpu_cs {
struct util_queue_fence flush_completed;
struct pipe_fence_handle *next_fence;
struct pb_buffer *preamble_ib_bo;
};
struct amdgpu_fence {