radeonsi: add a separate dirty mask for prefetches
so that we don't rely on si_pm4_state_enabled_and_changed, allowing us to move prefetches after draw calls. v2: ckear the dirty mask after unbinding shaders Tested-by: Dieter Nützel <Dieter@nuetzel-hh.de> (v1) Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com> (v1)
This commit is contained in:
parent
a7b0014d1a
commit
e887c68bd2
|
@ -451,28 +451,28 @@ static void cik_prefetch_shader_async(struct si_context *sctx,
|
|||
void cik_emit_prefetch_L2(struct si_context *sctx)
|
||||
{
|
||||
/* Prefetch shaders and VBO descriptors to TC L2. */
|
||||
if (si_pm4_state_enabled_and_changed(sctx, ls))
|
||||
if (sctx->prefetch_L2_mask & SI_PREFETCH_LS)
|
||||
cik_prefetch_shader_async(sctx, sctx->queued.named.ls);
|
||||
if (si_pm4_state_enabled_and_changed(sctx, hs))
|
||||
if (sctx->prefetch_L2_mask & SI_PREFETCH_HS)
|
||||
cik_prefetch_shader_async(sctx, sctx->queued.named.hs);
|
||||
if (si_pm4_state_enabled_and_changed(sctx, es))
|
||||
if (sctx->prefetch_L2_mask & SI_PREFETCH_ES)
|
||||
cik_prefetch_shader_async(sctx, sctx->queued.named.es);
|
||||
if (si_pm4_state_enabled_and_changed(sctx, gs))
|
||||
if (sctx->prefetch_L2_mask & SI_PREFETCH_GS)
|
||||
cik_prefetch_shader_async(sctx, sctx->queued.named.gs);
|
||||
if (si_pm4_state_enabled_and_changed(sctx, vs))
|
||||
if (sctx->prefetch_L2_mask & SI_PREFETCH_VS)
|
||||
cik_prefetch_shader_async(sctx, sctx->queued.named.vs);
|
||||
|
||||
/* Vertex buffer descriptors are uploaded uncached, so prefetch
|
||||
* them right after the VS binary. */
|
||||
if (sctx->vertex_buffer_pointer_dirty) {
|
||||
if (sctx->prefetch_L2_mask & SI_PREFETCH_VBO_DESCRIPTORS) {
|
||||
cik_prefetch_TC_L2_async(sctx, &sctx->vertex_buffers.buffer->b.b,
|
||||
sctx->vertex_buffers.buffer_offset,
|
||||
sctx->vertex_elements->desc_list_byte_size);
|
||||
}
|
||||
if (si_pm4_state_enabled_and_changed(sctx, ps))
|
||||
if (sctx->prefetch_L2_mask & SI_PREFETCH_PS)
|
||||
cik_prefetch_shader_async(sctx, sctx->queued.named.ps);
|
||||
|
||||
sctx->prefetch_L2 = false;
|
||||
sctx->prefetch_L2_mask = 0;
|
||||
}
|
||||
|
||||
void si_init_cp_dma_functions(struct si_context *sctx)
|
||||
|
|
|
@ -1176,10 +1176,9 @@ bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
|
|||
* uploaded to a fresh new buffer, so I don't think flushing the const
|
||||
* cache is needed. */
|
||||
si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
|
||||
if (sctx->b.chip_class >= CIK)
|
||||
sctx->prefetch_L2 = true;
|
||||
sctx->vertex_buffers_dirty = false;
|
||||
sctx->vertex_buffer_pointer_dirty = true;
|
||||
sctx->prefetch_L2_mask |= SI_PREFETCH_VBO_DESCRIPTORS;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -216,8 +216,20 @@ void si_begin_new_cs(struct si_context *ctx)
|
|||
if (ctx->ce_ib)
|
||||
si_ce_restore_all_descriptors_at_ib_start(ctx);
|
||||
|
||||
if (ctx->b.chip_class >= CIK)
|
||||
ctx->prefetch_L2 = true;
|
||||
if (ctx->queued.named.ls)
|
||||
ctx->prefetch_L2_mask |= SI_PREFETCH_LS;
|
||||
if (ctx->queued.named.hs)
|
||||
ctx->prefetch_L2_mask |= SI_PREFETCH_HS;
|
||||
if (ctx->queued.named.es)
|
||||
ctx->prefetch_L2_mask |= SI_PREFETCH_ES;
|
||||
if (ctx->queued.named.gs)
|
||||
ctx->prefetch_L2_mask |= SI_PREFETCH_GS;
|
||||
if (ctx->queued.named.vs)
|
||||
ctx->prefetch_L2_mask |= SI_PREFETCH_VS;
|
||||
if (ctx->queued.named.ps)
|
||||
ctx->prefetch_L2_mask |= SI_PREFETCH_PS;
|
||||
if (ctx->vertex_buffers.buffer)
|
||||
ctx->prefetch_L2_mask |= SI_PREFETCH_VBO_DESCRIPTORS;
|
||||
|
||||
/* CLEAR_STATE disables all colorbuffers, so only enable bound ones. */
|
||||
ctx->framebuffer.dirty_cbufs =
|
||||
|
|
|
@ -68,6 +68,14 @@
|
|||
#define SI_CONTEXT_VGT_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 12)
|
||||
#define SI_CONTEXT_VGT_STREAMOUT_SYNC (R600_CONTEXT_PRIVATE_FLAG << 13)
|
||||
|
||||
#define SI_PREFETCH_VBO_DESCRIPTORS (1 << 0)
|
||||
#define SI_PREFETCH_LS (1 << 1)
|
||||
#define SI_PREFETCH_HS (1 << 2)
|
||||
#define SI_PREFETCH_ES (1 << 3)
|
||||
#define SI_PREFETCH_GS (1 << 4)
|
||||
#define SI_PREFETCH_VS (1 << 5)
|
||||
#define SI_PREFETCH_PS (1 << 6)
|
||||
|
||||
#define SI_MAX_BORDER_COLORS 4096
|
||||
#define SIX_BITS 0x3F
|
||||
|
||||
|
@ -279,11 +287,11 @@ struct si_context {
|
|||
struct u_suballocator *ce_suballocator;
|
||||
unsigned ce_ram_saved_offset;
|
||||
uint16_t total_ce_ram_allocated;
|
||||
uint16_t prefetch_L2_mask;
|
||||
bool ce_need_synchronization:1;
|
||||
|
||||
bool gfx_flush_in_progress:1;
|
||||
bool compute_is_busy:1;
|
||||
bool prefetch_L2:1;
|
||||
|
||||
/* Atoms (direct states). */
|
||||
union si_state_atoms atoms;
|
||||
|
|
|
@ -1346,7 +1346,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
|
|||
if (sctx->b.flags)
|
||||
si_emit_cache_flush(sctx);
|
||||
|
||||
if (sctx->prefetch_L2)
|
||||
if (sctx->b.chip_class >= CIK && sctx->prefetch_L2_mask)
|
||||
cik_emit_prefetch_L2(sctx);
|
||||
|
||||
/* Emit state atoms. */
|
||||
|
|
|
@ -3307,8 +3307,37 @@ bool si_update_shaders(struct si_context *sctx)
|
|||
return false;
|
||||
}
|
||||
|
||||
if (sctx->b.chip_class >= CIK)
|
||||
sctx->prefetch_L2 = true;
|
||||
if (sctx->b.chip_class >= CIK) {
|
||||
if (si_pm4_state_enabled_and_changed(sctx, ls))
|
||||
sctx->prefetch_L2_mask |= SI_PREFETCH_LS;
|
||||
else if (!sctx->queued.named.ls)
|
||||
sctx->prefetch_L2_mask &= ~SI_PREFETCH_LS;
|
||||
|
||||
if (si_pm4_state_enabled_and_changed(sctx, hs))
|
||||
sctx->prefetch_L2_mask |= SI_PREFETCH_HS;
|
||||
else if (!sctx->queued.named.hs)
|
||||
sctx->prefetch_L2_mask &= ~SI_PREFETCH_HS;
|
||||
|
||||
if (si_pm4_state_enabled_and_changed(sctx, es))
|
||||
sctx->prefetch_L2_mask |= SI_PREFETCH_ES;
|
||||
else if (!sctx->queued.named.es)
|
||||
sctx->prefetch_L2_mask &= ~SI_PREFETCH_ES;
|
||||
|
||||
if (si_pm4_state_enabled_and_changed(sctx, gs))
|
||||
sctx->prefetch_L2_mask |= SI_PREFETCH_GS;
|
||||
else if (!sctx->queued.named.gs)
|
||||
sctx->prefetch_L2_mask &= ~SI_PREFETCH_GS;
|
||||
|
||||
if (si_pm4_state_enabled_and_changed(sctx, vs))
|
||||
sctx->prefetch_L2_mask |= SI_PREFETCH_VS;
|
||||
else if (!sctx->queued.named.vs)
|
||||
sctx->prefetch_L2_mask &= ~SI_PREFETCH_VS;
|
||||
|
||||
if (si_pm4_state_enabled_and_changed(sctx, ps))
|
||||
sctx->prefetch_L2_mask |= SI_PREFETCH_PS;
|
||||
else if (!sctx->queued.named.ps)
|
||||
sctx->prefetch_L2_mask &= ~SI_PREFETCH_PS;
|
||||
}
|
||||
|
||||
sctx->do_update_shaders = false;
|
||||
return true;
|
||||
|
|
Loading…
Reference in New Issue