radeonsi: update dirty_level_mask only when flushing or unbinding framebuffer

This fixes corruption with bindless textures in Dawn Of War 3.

The do_update_surf_dirtiness mechanism was complicated and dirty_level_mask
was only updated after the first draw call. The problem is bindless textures
are checked for decompression every draw call and we would only decompress
after the first draw call. The solution is to set dirtiness after the last
draw call to the framebuffer, so the (unconditional) decompression of
bindless textures happens at the right time.

Cc: 17.2 <mesa-stable@lists.freedesktop.org>
Tested-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
This commit is contained in:
Marek Olšák 2017-07-04 16:11:16 +02:00
parent 221fdae38b
commit f4d095cc65
5 changed files with 59 additions and 43 deletions

View File

@ -121,9 +121,7 @@ si_blit_dbcb_copy(struct si_context *sctx,
assert(sctx->dbcb_depth_copy_enabled || sctx->dbcb_stencil_copy_enabled);
bool old_update_dirtiness = sctx->framebuffer.do_update_surf_dirtiness;
sctx->decompression_enabled = true;
sctx->framebuffer.do_update_surf_dirtiness = false;
while (level_mask) {
unsigned level = u_bit_scan(&level_mask);
@ -169,7 +167,6 @@ si_blit_dbcb_copy(struct si_context *sctx,
}
sctx->decompression_enabled = false;
sctx->framebuffer.do_update_surf_dirtiness = old_update_dirtiness;
sctx->dbcb_depth_copy_enabled = false;
sctx->dbcb_stencil_copy_enabled = false;
si_mark_atom_dirty(sctx, &sctx->db_render_state);
@ -225,9 +222,7 @@ si_blit_decompress_zs_planes_in_place(struct si_context *sctx,
surf_tmpl.format = texture->resource.b.b.format;
bool old_update_dirtiness = sctx->framebuffer.do_update_surf_dirtiness;
sctx->decompression_enabled = true;
sctx->framebuffer.do_update_surf_dirtiness = false;
while (level_mask) {
unsigned level = u_bit_scan(&level_mask);
@ -267,7 +262,6 @@ si_blit_decompress_zs_planes_in_place(struct si_context *sctx,
texture->stencil_dirty_level_mask &= ~fully_decompressed_mask;
sctx->decompression_enabled = false;
sctx->framebuffer.do_update_surf_dirtiness = old_update_dirtiness;
sctx->db_flush_depth_inplace = false;
sctx->db_flush_stencil_inplace = false;
si_mark_atom_dirty(sctx, &sctx->db_render_state);
@ -474,9 +468,7 @@ static void si_blit_decompress_color(struct pipe_context *ctx,
custom_blend = sctx->custom_blend_eliminate_fastclear;
}
bool old_update_dirtiness = sctx->framebuffer.do_update_surf_dirtiness;
sctx->decompression_enabled = true;
sctx->framebuffer.do_update_surf_dirtiness = false;
while (level_mask) {
unsigned level = u_bit_scan(&level_mask);
@ -519,7 +511,6 @@ static void si_blit_decompress_color(struct pipe_context *ctx,
}
sctx->decompression_enabled = false;
sctx->framebuffer.do_update_surf_dirtiness = old_update_dirtiness;
sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
SI_CONTEXT_INV_GLOBAL_L2 |
@ -971,10 +962,32 @@ static void si_decompress_subresource(struct pipe_context *ctx,
if (!(rtex->surface.flags & RADEON_SURF_SBUFFER))
planes &= ~PIPE_MASK_S;
/* If we've rendered into the framebuffer and it's a blitting
* source, make sure the decompression pass is invoked
* by dirtying the framebuffer.
*/
if (sctx->framebuffer.state.zsbuf &&
sctx->framebuffer.state.zsbuf->u.tex.level == level &&
sctx->framebuffer.state.zsbuf->texture == tex)
si_update_fb_dirtiness_after_rendering(sctx);
si_decompress_depth(sctx, rtex, planes,
level, level,
first_layer, last_layer);
} else if (rtex->fmask.size || rtex->cmask.size || rtex->dcc_offset) {
/* If we've rendered into the framebuffer and it's a blitting
* source, make sure the decompression pass is invoked
* by dirtying the framebuffer.
*/
for (unsigned i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
if (sctx->framebuffer.state.cbufs[i] &&
sctx->framebuffer.state.cbufs[i]->u.tex.level == level &&
sctx->framebuffer.state.cbufs[i]->texture == tex) {
si_update_fb_dirtiness_after_rendering(sctx);
break;
}
}
si_blit_decompress_color(ctx, rtex, level, level,
first_layer, last_layer, false);
}

View File

@ -188,7 +188,6 @@ struct si_framebuffer {
ubyte dirty_cbufs;
bool dirty_zsbuf;
bool any_dst_linear;
bool do_update_surf_dirtiness;
};
struct si_clip_state {

View File

@ -2456,6 +2456,38 @@ static void si_init_depth_surface(struct si_context *sctx,
surf->depth_initialized = true;
}
void si_update_fb_dirtiness_after_rendering(struct si_context *sctx)
{
if (sctx->decompression_enabled)
return;
if (sctx->framebuffer.state.zsbuf) {
struct pipe_surface *surf = sctx->framebuffer.state.zsbuf;
struct r600_texture *rtex = (struct r600_texture *)surf->texture;
rtex->dirty_level_mask |= 1 << surf->u.tex.level;
if (rtex->surface.flags & RADEON_SURF_SBUFFER)
rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level;
}
if (sctx->framebuffer.compressed_cb_mask) {
struct pipe_surface *surf;
struct r600_texture *rtex;
unsigned mask = sctx->framebuffer.compressed_cb_mask;
do {
unsigned i = u_bit_scan(&mask);
surf = sctx->framebuffer.state.cbufs[i];
rtex = (struct r600_texture*)surf->texture;
if (rtex->fmask.size)
rtex->dirty_level_mask |= 1 << surf->u.tex.level;
if (rtex->dcc_gather_statistics)
rtex->separate_dcc_dirty = true;
} while (mask);
}
}
static void si_dec_framebuffer_counters(const struct pipe_framebuffer_state *state)
{
for (int i = 0; i < state->nr_cbufs; ++i) {
@ -2483,6 +2515,8 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
bool unbound = false;
int i;
si_update_fb_dirtiness_after_rendering(sctx);
for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
if (!sctx->framebuffer.state.cbufs[i])
continue;
@ -2680,7 +2714,6 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
* changes come from the decompression passes themselves.
*/
sctx->need_check_render_feedback = true;
sctx->framebuffer.do_update_surf_dirtiness = true;
}
}
@ -3988,6 +4021,8 @@ static void si_texture_barrier(struct pipe_context *ctx, unsigned flags)
{
struct si_context *sctx = (struct si_context *)ctx;
si_update_fb_dirtiness_after_rendering(sctx);
/* Multisample surfaces are flushed in si_decompress_textures. */
if (sctx->framebuffer.nr_samples <= 1 &&
sctx->framebuffer.state.nr_cbufs) {
@ -3995,7 +4030,6 @@ static void si_texture_barrier(struct pipe_context *ctx, unsigned flags)
SI_CONTEXT_INV_GLOBAL_L2 |
SI_CONTEXT_FLUSH_AND_INV_CB;
}
sctx->framebuffer.do_update_surf_dirtiness = true;
}
/* This only ensures coherency for shader image/buffer stores. */

View File

@ -384,6 +384,7 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
const struct pipe_sampler_view *state,
unsigned width0, unsigned height0,
unsigned force_level);
void si_update_fb_dirtiness_after_rendering(struct si_context *sctx);
/* si_state_shader.c */
bool si_update_shaders(struct si_context *sctx);

View File

@ -1207,7 +1207,6 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
sctx->framebuffer.dirty_cbufs |=
((1 << sctx->framebuffer.state.nr_cbufs) - 1);
sctx->framebuffer.dirty_zsbuf = true;
sctx->framebuffer.do_update_surf_dirtiness = true;
si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
si_update_all_texture_descriptors(sctx);
}
@ -1392,36 +1391,6 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
sctx->b.flags |= SI_CONTEXT_VGT_STREAMOUT_SYNC;
}
if (sctx->framebuffer.do_update_surf_dirtiness) {
/* Set the depth buffer as dirty. */
if (sctx->framebuffer.state.zsbuf) {
struct pipe_surface *surf = sctx->framebuffer.state.zsbuf;
struct r600_texture *rtex = (struct r600_texture *)surf->texture;
rtex->dirty_level_mask |= 1 << surf->u.tex.level;
if (rtex->surface.flags & RADEON_SURF_SBUFFER)
rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level;
}
if (sctx->framebuffer.compressed_cb_mask) {
struct pipe_surface *surf;
struct r600_texture *rtex;
unsigned mask = sctx->framebuffer.compressed_cb_mask;
do {
unsigned i = u_bit_scan(&mask);
surf = sctx->framebuffer.state.cbufs[i];
rtex = (struct r600_texture*)surf->texture;
if (rtex->fmask.size)
rtex->dirty_level_mask |= 1 << surf->u.tex.level;
if (rtex->dcc_gather_statistics)
rtex->separate_dcc_dirty = true;
} while (mask);
}
sctx->framebuffer.do_update_surf_dirtiness = false;
}
sctx->b.num_draw_calls++;
if (info->primitive_restart)
sctx->b.num_prim_restart_calls++;