diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c index 582e5995164..b39825627ed 100644 --- a/src/gallium/drivers/radeonsi/si_cp_dma.c +++ b/src/gallium/drivers/radeonsi/si_cp_dma.c @@ -406,7 +406,46 @@ void cik_prefetch_TC_L2_async(struct si_context *sctx, struct pipe_resource *buf si_copy_buffer(sctx, buf, buf, offset, offset, size, SI_CPDMA_SKIP_ALL); } +static void cik_prefetch_shader_async(struct si_context *sctx, + struct si_pm4_state *state) +{ + if (state) { + struct pipe_resource *bo = &state->bo[0]->b.b; + assert(state->nbo == 1); + + cik_prefetch_TC_L2_async(sctx, bo, 0, bo->width0); + } +} + +static void cik_emit_prefetch_L2(struct si_context *sctx, struct r600_atom *atom) +{ + /* Prefetch shaders and VBO descriptors to TC L2. */ + if (si_pm4_state_changed(sctx, ls)) + cik_prefetch_shader_async(sctx, sctx->queued.named.ls); + if (si_pm4_state_changed(sctx, hs)) + cik_prefetch_shader_async(sctx, sctx->queued.named.hs); + if (si_pm4_state_changed(sctx, es)) + cik_prefetch_shader_async(sctx, sctx->queued.named.es); + if (si_pm4_state_changed(sctx, gs)) + cik_prefetch_shader_async(sctx, sctx->queued.named.gs); + if (si_pm4_state_changed(sctx, vs)) + cik_prefetch_shader_async(sctx, sctx->queued.named.vs); + + /* Vertex buffer descriptors are uploaded uncached, so prefetch + * them right after the VS binary. */ + if (sctx->vertex_buffer_pointer_dirty) { + cik_prefetch_TC_L2_async(sctx, &sctx->vertex_buffers.buffer->b.b, + sctx->vertex_buffers.buffer_offset, + sctx->vertex_elements->count * 16); + } + if (si_pm4_state_changed(sctx, ps)) + cik_prefetch_shader_async(sctx, sctx->queued.named.ps); +} + void si_init_cp_dma_functions(struct si_context *sctx) { sctx->b.clear_buffer = si_clear_buffer; + + si_init_atom(sctx, &sctx->prefetch_L2, &sctx->atoms.s.prefetch_L2, + cik_emit_prefetch_L2); } diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 4a9fcd0dff2..4c1120a60d1 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -1038,6 +1038,8 @@ bool si_upload_vertex_buffer_descriptors(struct si_context *sctx) * uploaded to a fresh new buffer, so I don't think flushing the const * cache is needed. */ si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom); + if (sctx->b.chip_class >= CIK) + si_mark_atom_dirty(sctx, &sctx->prefetch_L2); sctx->vertex_buffers_dirty = false; sctx->vertex_buffer_pointer_dirty = true; return true; diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index 57eaac9dadc..d862e26342d 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -207,6 +207,9 @@ void si_begin_new_cs(struct si_context *ctx) if (ctx->ce_preamble_ib) si_ce_reinitialize_all_descriptors(ctx); + if (ctx->b.chip_class >= CIK) + si_mark_atom_dirty(ctx, &ctx->prefetch_L2); + ctx->framebuffer.dirty_cbufs = (1 << 8) - 1; ctx->framebuffer.dirty_zsbuf = true; si_mark_atom_dirty(ctx, &ctx->framebuffer.atom); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 655847439b6..b6474e6c38c 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -232,6 +232,7 @@ struct si_context { union si_state emitted; /* Atom declarations. */ + struct r600_atom prefetch_L2; struct si_framebuffer framebuffer; struct si_sample_locs msaa_sample_locs; struct r600_atom db_render_state; diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 03e5011779b..915a8eb8e53 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -132,6 +132,7 @@ union si_state { union si_state_atoms { struct { /* The order matters. */ + struct r600_atom *prefetch_L2; struct r600_atom *render_cond; struct r600_atom *streamout_begin; struct r600_atom *streamout_enable; /* must be after streamout_begin */ diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index db671c9efb3..0374841f2b8 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -940,17 +940,6 @@ void si_ce_post_draw_synchronization(struct si_context *sctx) } } -static void cik_prefetch_shader_async(struct si_context *sctx, - struct si_pm4_state *state) -{ - if (state) { - struct pipe_resource *bo = &state->bo[0]->b.b; - assert(state->nbo == 1); - - cik_prefetch_TC_L2_async(sctx, bo, 0, bo->width0); - } -} - void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) { struct si_context *sctx = (struct si_context *)ctx; @@ -1129,34 +1118,10 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) if (!si_upload_vertex_buffer_descriptors(sctx)) return; - /* Flushed caches prior to prefetching shaders. */ + /* Flush caches before the first state atom, which does L2 prefetches. */ if (sctx->b.flags) si_emit_cache_flush(sctx); - /* Prefetch shaders and VBO descriptors to TC L2. */ - if (sctx->b.chip_class >= CIK) { - if (si_pm4_state_changed(sctx, ls)) - cik_prefetch_shader_async(sctx, sctx->queued.named.ls); - if (si_pm4_state_changed(sctx, hs)) - cik_prefetch_shader_async(sctx, sctx->queued.named.hs); - if (si_pm4_state_changed(sctx, es)) - cik_prefetch_shader_async(sctx, sctx->queued.named.es); - if (si_pm4_state_changed(sctx, gs)) - cik_prefetch_shader_async(sctx, sctx->queued.named.gs); - if (si_pm4_state_changed(sctx, vs)) - cik_prefetch_shader_async(sctx, sctx->queued.named.vs); - - /* Vertex buffer descriptors are uploaded uncached, so prefetch - * them right after the VS binary. */ - if (sctx->vertex_buffer_pointer_dirty) { - cik_prefetch_TC_L2_async(sctx, &sctx->vertex_buffers.buffer->b.b, - sctx->vertex_buffers.buffer_offset, - sctx->vertex_elements->count * 16); - } - if (si_pm4_state_changed(sctx, ps)) - cik_prefetch_shader_async(sctx, sctx->queued.named.ps); - } - /* Emit states. */ mask = sctx->dirty_atoms; while (mask) { diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index b3616dcfaff..02f8d6c06be 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -2525,6 +2525,9 @@ bool si_update_shaders(struct si_context *sctx) return false; } + if (sctx->b.chip_class >= CIK) + si_mark_atom_dirty(sctx, &sctx->prefetch_L2); + sctx->do_update_shaders = false; return true; }