radeonsi: try to hit direct hw MSAA resolve by changing micro mode in clear

We could also do MSAA resolve in a compute shader like Vulkan and remove
these workarounds.

v2: comment the magic numbers

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
This commit is contained in:
Marek Olšák 2016-06-08 21:00:22 +02:00
parent 373060652c
commit 4eea710b0d
3 changed files with 107 additions and 1 deletions

View File

@ -252,6 +252,7 @@ struct r600_texture {
uint64_t dcc_offset; /* 0 = disabled */
unsigned cb_color_info; /* fast clear enable bit */
unsigned color_clear_value[2];
unsigned last_msaa_resolve_target_micro_mode;
/* Depth buffer compression and fast clear. */
struct r600_htile_info htile;

View File

@ -1012,6 +1012,8 @@ r600_texture_create_object(struct pipe_screen *screen,
* This must be done after r600_setup_surface.
* Applies to R600-Cayman. */
rtex->non_disp_tiling = rtex->is_depth && rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D;
/* Applies to GCN. */
rtex->last_msaa_resolve_target_micro_mode = rtex->surface.micro_tile_mode;
if (rtex->is_depth) {
if (!(base->flags & (R600_RESOURCE_FLAG_TRANSFER |
@ -1808,6 +1810,83 @@ void vi_dcc_clear_level(struct r600_common_context *rctx,
clear_value, R600_COHERENCY_CB_META);
}
/* Set the same micro tile mode as the destination of the last MSAA resolve.
* This allows hitting the MSAA resolve fast path, which requires that both
* src and dst micro tile modes match.
*/
static void si_set_optimal_micro_tile_mode(struct r600_common_screen *rscreen,
struct r600_texture *rtex)
{
if (rtex->resource.is_shared ||
rtex->surface.nsamples <= 1 ||
rtex->surface.micro_tile_mode == rtex->last_msaa_resolve_target_micro_mode)
return;
assert(rtex->surface.level[0].mode == RADEON_SURF_MODE_2D);
assert(rtex->surface.last_level == 0);
/* These magic numbers were copied from addrlib. It doesn't use any
* definitions for them either. They are all 2D_TILED_THIN1 modes with
* different bpp and micro tile mode.
*/
if (rscreen->chip_class >= CIK) {
switch (rtex->last_msaa_resolve_target_micro_mode) {
case 0: /* displayable */
rtex->surface.tiling_index[0] = 10;
break;
case 1: /* thin */
rtex->surface.tiling_index[0] = 14;
break;
case 3: /* rotated */
rtex->surface.tiling_index[0] = 28;
break;
default: /* depth, thick */
assert(!"unexpected micro mode");
return;
}
} else { /* SI */
switch (rtex->last_msaa_resolve_target_micro_mode) {
case 0: /* displayable */
switch (rtex->surface.bpe) {
case 8:
rtex->surface.tiling_index[0] = 10;
break;
case 16:
rtex->surface.tiling_index[0] = 11;
break;
default: /* 32, 64 */
rtex->surface.tiling_index[0] = 12;
break;
}
break;
case 1: /* thin */
switch (rtex->surface.bpe) {
case 8:
rtex->surface.tiling_index[0] = 14;
break;
case 16:
rtex->surface.tiling_index[0] = 15;
break;
case 32:
rtex->surface.tiling_index[0] = 16;
break;
default: /* 64, 128 */
rtex->surface.tiling_index[0] = 17;
break;
}
break;
default: /* depth, thick */
assert(!"unexpected micro mode");
return;
}
}
rtex->surface.micro_tile_mode = rtex->last_msaa_resolve_target_micro_mode;
p_atomic_inc(&rscreen->dirty_fb_counter);
p_atomic_inc(&rscreen->dirty_tex_descriptor_counter);
}
void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
struct pipe_framebuffer_state *fb,
struct r600_atom *fb_state,
@ -1881,6 +1960,10 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
if (rctx->screen->debug_flags & DBG_NO_DCC_CLEAR)
continue;
/* We can change the micro tile mode before a full clear. */
if (rctx->screen->chip_class >= SI)
si_set_optimal_micro_tile_mode(rctx->screen, tex);
vi_get_fast_clear_parameters(fb->cbufs[i]->format, color, &reset_value, &clear_words_needed);
vi_dcc_clear_level(rctx, tex, 0, reset_value);
@ -1897,6 +1980,10 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
continue;
}
/* We can change the micro tile mode before a full clear. */
if (rctx->screen->chip_class >= SI)
si_set_optimal_micro_tile_mode(rctx->screen, tex);
/* Do the fast clear. */
rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b,
tex->cmask.offset, tex->cmask.size, 0,

View File

@ -22,6 +22,7 @@
*/
#include "si_pipe.h"
#include "sid.h"
#include "util/u_format.h"
#include "util/u_surface.h"
@ -903,8 +904,18 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
info->src.box.height == dst_height &&
info->src.box.depth == 1 &&
dst->surface.level[info->dst.level].mode >= RADEON_SURF_MODE_1D &&
src->surface.micro_tile_mode == dst->surface.micro_tile_mode &&
(!dst->cmask.size || !dst->dirty_level_mask)) { /* dst cannot be fast-cleared */
/* Check the last constraint. */
if (src->surface.micro_tile_mode != dst->surface.micro_tile_mode) {
/* The next fast clear will switch to this mode to
* get direct hw resolve next time if the mode is
* different now.
*/
src->last_msaa_resolve_target_micro_mode =
dst->surface.micro_tile_mode;
goto resolve_to_temp;
}
/* Resolving into a surface with DCC is unsupported. Since
* it's being overwritten anyway, clear it to uncompressed.
* This is still the fastest codepath even with this clear.
@ -929,6 +940,7 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
return true;
}
resolve_to_temp:
/* Shader-based resolve is VERY SLOW. Instead, resolve into
* a temporary texture and blit.
*/
@ -943,6 +955,12 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
templ.flags = R600_RESOURCE_FLAG_FORCE_TILING |
R600_RESOURCE_FLAG_DISABLE_DCC;
/* The src and dst microtile modes must be the same. */
if (src->surface.micro_tile_mode == V_009910_ADDR_SURF_DISPLAY_MICRO_TILING)
templ.bind = PIPE_BIND_SCANOUT;
else
templ.bind = 0;
tmp = ctx->screen->resource_create(ctx->screen, &templ);
if (!tmp)
return false;