radeonsi: atomize L2 prefetches
to move the big conditional statement out of draw_vbo Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
This commit is contained in:
parent
c99ba3eb47
commit
802fcdc0d2
|
@ -406,7 +406,46 @@ void cik_prefetch_TC_L2_async(struct si_context *sctx, struct pipe_resource *buf
|
||||||
si_copy_buffer(sctx, buf, buf, offset, offset, size, SI_CPDMA_SKIP_ALL);
|
si_copy_buffer(sctx, buf, buf, offset, offset, size, SI_CPDMA_SKIP_ALL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void cik_prefetch_shader_async(struct si_context *sctx,
|
||||||
|
struct si_pm4_state *state)
|
||||||
|
{
|
||||||
|
if (state) {
|
||||||
|
struct pipe_resource *bo = &state->bo[0]->b.b;
|
||||||
|
assert(state->nbo == 1);
|
||||||
|
|
||||||
|
cik_prefetch_TC_L2_async(sctx, bo, 0, bo->width0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void cik_emit_prefetch_L2(struct si_context *sctx, struct r600_atom *atom)
|
||||||
|
{
|
||||||
|
/* Prefetch shaders and VBO descriptors to TC L2. */
|
||||||
|
if (si_pm4_state_changed(sctx, ls))
|
||||||
|
cik_prefetch_shader_async(sctx, sctx->queued.named.ls);
|
||||||
|
if (si_pm4_state_changed(sctx, hs))
|
||||||
|
cik_prefetch_shader_async(sctx, sctx->queued.named.hs);
|
||||||
|
if (si_pm4_state_changed(sctx, es))
|
||||||
|
cik_prefetch_shader_async(sctx, sctx->queued.named.es);
|
||||||
|
if (si_pm4_state_changed(sctx, gs))
|
||||||
|
cik_prefetch_shader_async(sctx, sctx->queued.named.gs);
|
||||||
|
if (si_pm4_state_changed(sctx, vs))
|
||||||
|
cik_prefetch_shader_async(sctx, sctx->queued.named.vs);
|
||||||
|
|
||||||
|
/* Vertex buffer descriptors are uploaded uncached, so prefetch
|
||||||
|
* them right after the VS binary. */
|
||||||
|
if (sctx->vertex_buffer_pointer_dirty) {
|
||||||
|
cik_prefetch_TC_L2_async(sctx, &sctx->vertex_buffers.buffer->b.b,
|
||||||
|
sctx->vertex_buffers.buffer_offset,
|
||||||
|
sctx->vertex_elements->count * 16);
|
||||||
|
}
|
||||||
|
if (si_pm4_state_changed(sctx, ps))
|
||||||
|
cik_prefetch_shader_async(sctx, sctx->queued.named.ps);
|
||||||
|
}
|
||||||
|
|
||||||
void si_init_cp_dma_functions(struct si_context *sctx)
|
void si_init_cp_dma_functions(struct si_context *sctx)
|
||||||
{
|
{
|
||||||
sctx->b.clear_buffer = si_clear_buffer;
|
sctx->b.clear_buffer = si_clear_buffer;
|
||||||
|
|
||||||
|
si_init_atom(sctx, &sctx->prefetch_L2, &sctx->atoms.s.prefetch_L2,
|
||||||
|
cik_emit_prefetch_L2);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1038,6 +1038,8 @@ bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
|
||||||
* uploaded to a fresh new buffer, so I don't think flushing the const
|
* uploaded to a fresh new buffer, so I don't think flushing the const
|
||||||
* cache is needed. */
|
* cache is needed. */
|
||||||
si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
|
si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
|
||||||
|
if (sctx->b.chip_class >= CIK)
|
||||||
|
si_mark_atom_dirty(sctx, &sctx->prefetch_L2);
|
||||||
sctx->vertex_buffers_dirty = false;
|
sctx->vertex_buffers_dirty = false;
|
||||||
sctx->vertex_buffer_pointer_dirty = true;
|
sctx->vertex_buffer_pointer_dirty = true;
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -207,6 +207,9 @@ void si_begin_new_cs(struct si_context *ctx)
|
||||||
if (ctx->ce_preamble_ib)
|
if (ctx->ce_preamble_ib)
|
||||||
si_ce_reinitialize_all_descriptors(ctx);
|
si_ce_reinitialize_all_descriptors(ctx);
|
||||||
|
|
||||||
|
if (ctx->b.chip_class >= CIK)
|
||||||
|
si_mark_atom_dirty(ctx, &ctx->prefetch_L2);
|
||||||
|
|
||||||
ctx->framebuffer.dirty_cbufs = (1 << 8) - 1;
|
ctx->framebuffer.dirty_cbufs = (1 << 8) - 1;
|
||||||
ctx->framebuffer.dirty_zsbuf = true;
|
ctx->framebuffer.dirty_zsbuf = true;
|
||||||
si_mark_atom_dirty(ctx, &ctx->framebuffer.atom);
|
si_mark_atom_dirty(ctx, &ctx->framebuffer.atom);
|
||||||
|
|
|
@ -232,6 +232,7 @@ struct si_context {
|
||||||
union si_state emitted;
|
union si_state emitted;
|
||||||
|
|
||||||
/* Atom declarations. */
|
/* Atom declarations. */
|
||||||
|
struct r600_atom prefetch_L2;
|
||||||
struct si_framebuffer framebuffer;
|
struct si_framebuffer framebuffer;
|
||||||
struct si_sample_locs msaa_sample_locs;
|
struct si_sample_locs msaa_sample_locs;
|
||||||
struct r600_atom db_render_state;
|
struct r600_atom db_render_state;
|
||||||
|
|
|
@ -132,6 +132,7 @@ union si_state {
|
||||||
union si_state_atoms {
|
union si_state_atoms {
|
||||||
struct {
|
struct {
|
||||||
/* The order matters. */
|
/* The order matters. */
|
||||||
|
struct r600_atom *prefetch_L2;
|
||||||
struct r600_atom *render_cond;
|
struct r600_atom *render_cond;
|
||||||
struct r600_atom *streamout_begin;
|
struct r600_atom *streamout_begin;
|
||||||
struct r600_atom *streamout_enable; /* must be after streamout_begin */
|
struct r600_atom *streamout_enable; /* must be after streamout_begin */
|
||||||
|
|
|
@ -940,17 +940,6 @@ void si_ce_post_draw_synchronization(struct si_context *sctx)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void cik_prefetch_shader_async(struct si_context *sctx,
|
|
||||||
struct si_pm4_state *state)
|
|
||||||
{
|
|
||||||
if (state) {
|
|
||||||
struct pipe_resource *bo = &state->bo[0]->b.b;
|
|
||||||
assert(state->nbo == 1);
|
|
||||||
|
|
||||||
cik_prefetch_TC_L2_async(sctx, bo, 0, bo->width0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
|
void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
|
||||||
{
|
{
|
||||||
struct si_context *sctx = (struct si_context *)ctx;
|
struct si_context *sctx = (struct si_context *)ctx;
|
||||||
|
@ -1129,34 +1118,10 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
|
||||||
if (!si_upload_vertex_buffer_descriptors(sctx))
|
if (!si_upload_vertex_buffer_descriptors(sctx))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Flushed caches prior to prefetching shaders. */
|
/* Flush caches before the first state atom, which does L2 prefetches. */
|
||||||
if (sctx->b.flags)
|
if (sctx->b.flags)
|
||||||
si_emit_cache_flush(sctx);
|
si_emit_cache_flush(sctx);
|
||||||
|
|
||||||
/* Prefetch shaders and VBO descriptors to TC L2. */
|
|
||||||
if (sctx->b.chip_class >= CIK) {
|
|
||||||
if (si_pm4_state_changed(sctx, ls))
|
|
||||||
cik_prefetch_shader_async(sctx, sctx->queued.named.ls);
|
|
||||||
if (si_pm4_state_changed(sctx, hs))
|
|
||||||
cik_prefetch_shader_async(sctx, sctx->queued.named.hs);
|
|
||||||
if (si_pm4_state_changed(sctx, es))
|
|
||||||
cik_prefetch_shader_async(sctx, sctx->queued.named.es);
|
|
||||||
if (si_pm4_state_changed(sctx, gs))
|
|
||||||
cik_prefetch_shader_async(sctx, sctx->queued.named.gs);
|
|
||||||
if (si_pm4_state_changed(sctx, vs))
|
|
||||||
cik_prefetch_shader_async(sctx, sctx->queued.named.vs);
|
|
||||||
|
|
||||||
/* Vertex buffer descriptors are uploaded uncached, so prefetch
|
|
||||||
* them right after the VS binary. */
|
|
||||||
if (sctx->vertex_buffer_pointer_dirty) {
|
|
||||||
cik_prefetch_TC_L2_async(sctx, &sctx->vertex_buffers.buffer->b.b,
|
|
||||||
sctx->vertex_buffers.buffer_offset,
|
|
||||||
sctx->vertex_elements->count * 16);
|
|
||||||
}
|
|
||||||
if (si_pm4_state_changed(sctx, ps))
|
|
||||||
cik_prefetch_shader_async(sctx, sctx->queued.named.ps);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Emit states. */
|
/* Emit states. */
|
||||||
mask = sctx->dirty_atoms;
|
mask = sctx->dirty_atoms;
|
||||||
while (mask) {
|
while (mask) {
|
||||||
|
|
|
@ -2525,6 +2525,9 @@ bool si_update_shaders(struct si_context *sctx)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (sctx->b.chip_class >= CIK)
|
||||||
|
si_mark_atom_dirty(sctx, &sctx->prefetch_L2);
|
||||||
|
|
||||||
sctx->do_update_shaders = false;
|
sctx->do_update_shaders = false;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue