From f7c58559f5731790f4e68f1b1cb38c10818efa96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 29 Mar 2021 02:24:39 -0400 Subject: [PATCH] radeonsi: refine fast clears for small buffers, always use them for large HTILE Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/si_clear.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_clear.c b/src/gallium/drivers/radeonsi/si_clear.c index d67453e7b0a..ee69e872c4f 100644 --- a/src/gallium/drivers/radeonsi/si_clear.c +++ b/src/gallium/drivers/radeonsi/si_clear.c @@ -538,6 +538,7 @@ static void si_fast_clear(struct si_context *sctx, unsigned *buffers, struct si_clear_info info[8 * 2 + 1]; /* MRTs * (CMASK + DCC) + ZS */ unsigned num_clears = 0; unsigned clear_types = 0; + bool fb_too_small = fb->width * fb->height * fb->layers <= 512 * 512; /* This function is broken in BE, so just disable this path for now */ #if UTIL_ARCH_BIG_ENDIAN @@ -591,8 +592,7 @@ static void si_fast_clear(struct si_context *sctx, unsigned *buffers, * * This helps on both dGPUs and APUs, even small APUs like Mullins. */ - bool too_small = tex->buffer.b.b.nr_samples <= 1 && - tex->buffer.b.b.width0 * tex->buffer.b.b.height0 <= 512 * 512; + bool too_small = tex->buffer.b.b.nr_samples <= 1 && fb_too_small; bool eliminate_needed = false; bool fmask_decompress_needed = false; @@ -843,12 +843,13 @@ static void si_fast_clear(struct si_context *sctx, unsigned *buffers, zstex->surface.meta_offset, zstex->surface.meta_size, clear_value); clear_types |= SI_CLEAR_TYPE_HTILE; } - } else if (num_clears) { + } else if (num_clears || !fb_too_small) { /* This is where the HTILE buffer clear is done. * - * If there is no clear scheduled, we should use the draw-based clear that is without - * waits. If there is some other clear scheduled, we will have to wait anyway, so add - * the HTILE buffer clear to the batch here. + * If there is no clear scheduled and the framebuffer size is too small, we should use + * the draw-based clear that is without waits. If there is some other clear scheduled, + * we will have to wait anyway, so add the HTILE buffer clear to the batch here. + * If the framebuffer size is large enough, use this codepath too. */ uint64_t htile_offset = zstex->surface.meta_offset; unsigned htile_size = 0;