diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index d137a1fbec3..631676bcd79 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -405,6 +405,14 @@ si_decompress_depth(struct si_context *sctx, tex->stencil_dirty_level_mask = 0; } } + /* set_framebuffer_state takes care of coherency for single-sample. + * The DB->CB copy uses CB for the final writes. + */ + if (copy_planes && tex->resource.b.b.nr_samples > 1) { + sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 | + SI_CONTEXT_INV_GLOBAL_L2 | + SI_CONTEXT_FLUSH_AND_INV_CB; + } } static void @@ -487,10 +495,19 @@ static void si_blit_decompress_color(struct pipe_context *ctx, surf_tmpl.u.tex.last_layer = layer; cbsurf = ctx->create_surface(ctx, &rtex->resource.b.b, &surf_tmpl); + /* Required before and after FMASK and DCC_DECOMPRESS. */ + if (custom_blend == sctx->custom_blend_fmask_decompress || + custom_blend == sctx->custom_blend_dcc_decompress) + sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB; + si_blitter_begin(ctx, SI_DECOMPRESS); util_blitter_custom_color(sctx->blitter, cbsurf, custom_blend); si_blitter_end(ctx); + if (custom_blend == sctx->custom_blend_fmask_decompress || + custom_blend == sctx->custom_blend_dcc_decompress) + sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB; + pipe_surface_reference(&cbsurf, NULL); } @@ -503,6 +520,10 @@ static void si_blit_decompress_color(struct pipe_context *ctx, sctx->decompression_enabled = false; sctx->framebuffer.do_update_surf_dirtiness = old_update_dirtiness; + + sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB | + SI_CONTEXT_INV_GLOBAL_L2 | + SI_CONTEXT_INV_VMEM_L1; } static void @@ -1157,6 +1178,9 @@ static void si_do_CB_resolve(struct si_context *sctx, unsigned dst_level, unsigned dst_z, enum pipe_format format) { + /* Required before and after CB_RESOLVE. */ + sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB; + si_blitter_begin(&sctx->b.b, SI_COLOR_RESOLVE | (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND)); util_blitter_custom_resolve_color(sctx->blitter, dst, dst_level, dst_z, @@ -1164,6 +1188,11 @@ static void si_do_CB_resolve(struct si_context *sctx, ~0, sctx->custom_blend_resolve, format); si_blitter_end(&sctx->b.b); + + /* Flush caches for possible texturing. */ + sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB | + SI_CONTEXT_INV_GLOBAL_L2 | + SI_CONTEXT_INV_VMEM_L1; } static bool do_hardware_msaa_resolve(struct pipe_context *ctx, diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 193816d2bf7..a0d790ac2a9 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2530,11 +2530,18 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, * - shader write -> FB read * * DB caches are flushed on demand (using si_decompress_textures). + * + * When MSAA is enabled, CB and TC caches are flushed on demand + * (after FMASK decompression). Shader write -> FB read transitions + * cannot happen for MSAA textures, because MSAA shader images are + * not supported. */ - sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 | - SI_CONTEXT_INV_GLOBAL_L2 | - SI_CONTEXT_FLUSH_AND_INV_CB | - SI_CONTEXT_CS_PARTIAL_FLUSH; + if (sctx->framebuffer.nr_samples <= 1) { + sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 | + SI_CONTEXT_INV_GLOBAL_L2 | + SI_CONTEXT_FLUSH_AND_INV_CB; + } + sctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH; /* u_blitter doesn't invoke depth decompression when it does multiple * blits in a row, but the only case when it matters for DB is when @@ -2542,8 +2549,11 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, * individual generate_mipmap blits. * Note that lower mipmap levels aren't compressed. */ - if (sctx->generate_mipmap_for_depth) - sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB; + if (sctx->generate_mipmap_for_depth) { + sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 | + SI_CONTEXT_INV_GLOBAL_L2 | + SI_CONTEXT_FLUSH_AND_INV_DB; + } /* Take the maximum of the old and new count. If the new count is lower, * dirtying is needed to disable the unbound colorbuffers. @@ -3961,9 +3971,12 @@ static void si_texture_barrier(struct pipe_context *ctx, unsigned flags) { struct si_context *sctx = (struct si_context *)ctx; - sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 | - SI_CONTEXT_INV_GLOBAL_L2 | - SI_CONTEXT_FLUSH_AND_INV_CB; + /* Multisample surfaces are flushed in si_decompress_textures. */ + if (sctx->framebuffer.nr_samples <= 1) { + sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 | + SI_CONTEXT_INV_GLOBAL_L2 | + SI_CONTEXT_FLUSH_AND_INV_CB; + } sctx->framebuffer.do_update_surf_dirtiness = true; } @@ -4001,12 +4014,16 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags) sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; } - /* Depth and stencil are flushed in si_decompress_textures when needed. */ - if (flags & PIPE_BARRIER_FRAMEBUFFER) - sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB; + /* MSAA color, any depth and any stencil are flushed in + * si_decompress_textures when needed. + */ + if (flags & PIPE_BARRIER_FRAMEBUFFER && + sctx->framebuffer.nr_samples <= 1) { + sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB | + SI_CONTEXT_WRITEBACK_GLOBAL_L2; + } - if (flags & (PIPE_BARRIER_FRAMEBUFFER | - PIPE_BARRIER_INDIRECT_BUFFER)) + if (flags & PIPE_BARRIER_INDIRECT_BUFFER) sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; }