ac/surface: add code for gfx10 displayable DCC
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4697>
This commit is contained in:
parent
e2fbba7720
commit
5e31e4b697
|
@ -32,6 +32,7 @@
|
|||
#include "util/macros.h"
|
||||
#include "util/u_atomic.h"
|
||||
#include "util/u_math.h"
|
||||
#include "sid.h"
|
||||
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
|
@ -378,10 +379,6 @@ static int gfx6_compute_level(ADDR_HANDLE addrlib,
|
|||
return 0;
|
||||
}
|
||||
|
||||
#define G_009910_MICRO_TILE_MODE(x) (((x) >> 0) & 0x03)
|
||||
#define V_009910_ADDR_SURF_THICK_MICRO_TILING 0x03
|
||||
#define G_009910_MICRO_TILE_MODE_NEW(x) (((x) >> 22) & 0x07)
|
||||
|
||||
static void gfx6_set_micro_tile_mode(struct radeon_surf *surf,
|
||||
const struct radeon_info *info)
|
||||
{
|
||||
|
@ -1046,12 +1043,37 @@ static bool is_dcc_supported_by_DCN(const struct radeon_info *info,
|
|||
!info->use_display_dcc_with_retile_blit)
|
||||
return false;
|
||||
|
||||
/* 16bpp and 64bpp are more complicated, so they are disallowed for now. */
|
||||
if (surf->bpe != 4)
|
||||
return false;
|
||||
|
||||
/* Handle unaligned DCC. */
|
||||
if (info->use_display_dcc_unaligned &&
|
||||
(rb_aligned || pipe_aligned))
|
||||
return false;
|
||||
|
||||
switch (info->chip_class) {
|
||||
case GFX9:
|
||||
/* There are more constraints, but we always set
|
||||
* INDEPENDENT_64B_BLOCKS = 1 and MAX_COMPRESSED_BLOCK_SIZE = 64B,
|
||||
* which always works.
|
||||
*/
|
||||
assert(surf->u.gfx9.dcc.independent_64B_blocks &&
|
||||
surf->u.gfx9.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B);
|
||||
return true;
|
||||
case GFX10:
|
||||
/* DCN requires INDEPENDENT_128B_BLOCKS = 0.
|
||||
* For 4K, it also requires INDEPENDENT_64B_BLOCKS = 1.
|
||||
*/
|
||||
return !surf->u.gfx9.dcc.independent_128B_blocks &&
|
||||
((config->info.width <= 2560 &&
|
||||
config->info.height <= 2560) ||
|
||||
(surf->u.gfx9.dcc.independent_64B_blocks &&
|
||||
surf->u.gfx9.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B));
|
||||
default:
|
||||
unreachable("unhandled chip");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
|
||||
|
@ -1552,6 +1574,18 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib,
|
|||
AddrSurfInfoIn.flags.metaPipeUnaligned = 0;
|
||||
AddrSurfInfoIn.flags.metaRbUnaligned = 0;
|
||||
|
||||
/* Optimal values for the L2 cache. */
|
||||
if (info->chip_class == GFX9) {
|
||||
surf->u.gfx9.dcc.independent_64B_blocks = 1;
|
||||
surf->u.gfx9.dcc.independent_128B_blocks = 0;
|
||||
surf->u.gfx9.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
|
||||
} else if (info->chip_class >= GFX10) {
|
||||
surf->u.gfx9.dcc.independent_64B_blocks = 0;
|
||||
surf->u.gfx9.dcc.independent_128B_blocks = 1;
|
||||
surf->u.gfx9.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
|
||||
}
|
||||
|
||||
if (AddrSurfInfoIn.flags.display) {
|
||||
/* The display hardware can only read DCC with RB_ALIGNED=0 and
|
||||
* PIPE_ALIGNED=0. PIPE_ALIGNED really means L2CACHE_ALIGNED.
|
||||
*
|
||||
|
@ -1559,12 +1593,26 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib,
|
|||
* PIPE_ALIGNED is optional, but PIPE_ALIGNED=0 requires L2 flushes
|
||||
* after rendering, so PIPE_ALIGNED=1 is recommended.
|
||||
*/
|
||||
if (info->use_display_dcc_unaligned &&
|
||||
AddrSurfInfoIn.flags.display) {
|
||||
if (info->use_display_dcc_unaligned) {
|
||||
AddrSurfInfoIn.flags.metaPipeUnaligned = 1;
|
||||
AddrSurfInfoIn.flags.metaRbUnaligned = 1;
|
||||
}
|
||||
|
||||
/* Adjust DCC settings to meet DCN requirements. */
|
||||
if (info->use_display_dcc_unaligned ||
|
||||
info->use_display_dcc_with_retile_blit) {
|
||||
/* Only Navi12/14 support independent 64B blocks in L2,
|
||||
* but without DCC image stores.
|
||||
*/
|
||||
if (info->family == CHIP_NAVI12 ||
|
||||
info->family == CHIP_NAVI14) {
|
||||
surf->u.gfx9.dcc.independent_64B_blocks = 1;
|
||||
surf->u.gfx9.dcc.independent_128B_blocks = 0;
|
||||
surf->u.gfx9.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
switch (mode) {
|
||||
case RADEON_SURF_MODE_LINEAR_ALIGNED:
|
||||
assert(config->info.samples <= 1);
|
||||
|
|
|
@ -139,6 +139,9 @@ struct gfx9_surf_flags {
|
|||
struct gfx9_surf_meta_flags {
|
||||
unsigned rb_aligned:1; /* optimal for RBs */
|
||||
unsigned pipe_aligned:1; /* optimal for TC */
|
||||
unsigned independent_64B_blocks:1;
|
||||
unsigned independent_128B_blocks:1;
|
||||
unsigned max_compressed_block_size:2;
|
||||
};
|
||||
|
||||
struct gfx9_surf_layout {
|
||||
|
|
|
@ -227,6 +227,8 @@ struct radeon_bo_metadata {
|
|||
unsigned dcc_offset_256B : 24;
|
||||
unsigned dcc_pitch_max : 14; /* (mip chain pitch - 1) for DCN */
|
||||
unsigned dcc_independent_64B : 1;
|
||||
unsigned dcc_independent_128B : 1;
|
||||
unsigned dcc_max_compressed_block_size : 2;
|
||||
|
||||
bool scanout;
|
||||
} gfx9;
|
||||
|
|
|
@ -2416,10 +2416,10 @@ static void si_initialize_color_surface(struct si_context *sctx, struct si_surfa
|
|||
min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_64B;
|
||||
|
||||
surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
|
||||
S_028C78_MAX_COMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_128B) |
|
||||
S_028C78_MAX_COMPRESSED_BLOCK_SIZE(tex->surface.u.gfx9.dcc.max_compressed_block_size) |
|
||||
S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
|
||||
S_028C78_INDEPENDENT_64B_BLOCKS(0) |
|
||||
S_028C78_INDEPENDENT_128B_BLOCKS(1);
|
||||
S_028C78_INDEPENDENT_64B_BLOCKS(tex->surface.u.gfx9.dcc.independent_64B_blocks) |
|
||||
S_028C78_INDEPENDENT_128B_BLOCKS(tex->surface.u.gfx9.dcc.independent_128B_blocks);
|
||||
} else if (sctx->chip_class >= GFX8) {
|
||||
unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B;
|
||||
unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B;
|
||||
|
@ -3799,7 +3799,7 @@ static void gfx10_make_texture_descriptor(
|
|||
|
||||
if (tex->surface.dcc_offset) {
|
||||
state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
|
||||
S_00A018_MAX_COMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_128B) |
|
||||
S_00A018_MAX_COMPRESSED_BLOCK_SIZE(tex->surface.u.gfx9.dcc.max_compressed_block_size) |
|
||||
S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(screen, pipe_format));
|
||||
}
|
||||
|
||||
|
|
|
@ -339,12 +339,11 @@ static void si_get_display_metadata(struct si_screen *sscreen, struct radeon_sur
|
|||
*array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
|
||||
|
||||
surf->u.gfx9.surf.swizzle_mode = metadata->u.gfx9.swizzle_mode;
|
||||
*is_scanout = metadata->u.gfx9.scanout;
|
||||
|
||||
if (metadata->u.gfx9.dcc_offset_256B) {
|
||||
surf->u.gfx9.dcc.independent_64B_blocks = metadata->u.gfx9.dcc_independent_64B;
|
||||
surf->u.gfx9.dcc.independent_128B_blocks = metadata->u.gfx9.dcc_independent_128B;
|
||||
surf->u.gfx9.dcc.max_compressed_block_size = metadata->u.gfx9.dcc_max_compressed_block_size;
|
||||
surf->u.gfx9.display_dcc_pitch_max = metadata->u.gfx9.dcc_pitch_max;
|
||||
assert(metadata->u.gfx9.dcc_independent_64B == 1);
|
||||
}
|
||||
*is_scanout = metadata->u.gfx9.scanout;
|
||||
} else {
|
||||
surf->u.legacy.pipe_config = metadata->u.legacy.pipe_config;
|
||||
surf->u.legacy.bankw = metadata->u.legacy.bankw;
|
||||
|
@ -613,7 +612,9 @@ static void si_set_tex_bo_metadata(struct si_screen *sscreen, struct si_texture
|
|||
assert((dcc_offset >> 8) != 0 && (dcc_offset >> 8) < (1 << 24));
|
||||
md.u.gfx9.dcc_offset_256B = dcc_offset >> 8;
|
||||
md.u.gfx9.dcc_pitch_max = tex->surface.u.gfx9.display_dcc_pitch_max;
|
||||
md.u.gfx9.dcc_independent_64B = 1;
|
||||
md.u.gfx9.dcc_independent_64B = tex->surface.u.gfx9.dcc.independent_64B_blocks;
|
||||
md.u.gfx9.dcc_independent_128B = tex->surface.u.gfx9.dcc.independent_128B_blocks;
|
||||
md.u.gfx9.dcc_max_compressed_block_size = tex->surface.u.gfx9.dcc.max_compressed_block_size;
|
||||
}
|
||||
} else {
|
||||
md.u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D
|
||||
|
|
|
@ -1223,6 +1223,10 @@ static unsigned eg_tile_split_rev(unsigned eg_tile_split)
|
|||
}
|
||||
}
|
||||
|
||||
#define AMDGPU_TILING_DCC_INDEPENDENT_128B_SHIFT 44
|
||||
#define AMDGPU_TILING_DCC_INDEPENDENT_128B_MASK 0x1
|
||||
#define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_SHIFT 45
|
||||
#define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_MASK 0x3
|
||||
#define AMDGPU_TILING_SCANOUT_SHIFT 63
|
||||
#define AMDGPU_TILING_SCANOUT_MASK 0x1
|
||||
|
||||
|
@ -1248,6 +1252,8 @@ static void amdgpu_buffer_get_metadata(struct pb_buffer *_buf,
|
|||
md->u.gfx9.dcc_offset_256B = AMDGPU_TILING_GET(tiling_flags, DCC_OFFSET_256B);
|
||||
md->u.gfx9.dcc_pitch_max = AMDGPU_TILING_GET(tiling_flags, DCC_PITCH_MAX);
|
||||
md->u.gfx9.dcc_independent_64B = AMDGPU_TILING_GET(tiling_flags, DCC_INDEPENDENT_64B);
|
||||
md->u.gfx9.dcc_independent_128B = AMDGPU_TILING_GET(tiling_flags, DCC_INDEPENDENT_128B);
|
||||
md->u.gfx9.dcc_max_compressed_block_size = AMDGPU_TILING_GET(tiling_flags, DCC_MAX_COMPRESSED_BLOCK_SIZE);
|
||||
md->u.gfx9.scanout = AMDGPU_TILING_GET(tiling_flags, SCANOUT);
|
||||
} else {
|
||||
md->u.legacy.microtile = RADEON_LAYOUT_LINEAR;
|
||||
|
@ -1286,6 +1292,8 @@ static void amdgpu_buffer_set_metadata(struct pb_buffer *_buf,
|
|||
tiling_flags |= AMDGPU_TILING_SET(DCC_OFFSET_256B, md->u.gfx9.dcc_offset_256B);
|
||||
tiling_flags |= AMDGPU_TILING_SET(DCC_PITCH_MAX, md->u.gfx9.dcc_pitch_max);
|
||||
tiling_flags |= AMDGPU_TILING_SET(DCC_INDEPENDENT_64B, md->u.gfx9.dcc_independent_64B);
|
||||
tiling_flags |= AMDGPU_TILING_SET(DCC_INDEPENDENT_128B, md->u.gfx9.dcc_independent_128B);
|
||||
tiling_flags |= AMDGPU_TILING_SET(DCC_MAX_COMPRESSED_BLOCK_SIZE, md->u.gfx9.dcc_max_compressed_block_size);
|
||||
tiling_flags |= AMDGPU_TILING_SET(SCANOUT, md->u.gfx9.scanout);
|
||||
} else {
|
||||
if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
|
||||
|
|
Loading…
Reference in New Issue