diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c index 8773fd51b47..c64f9903aad 100644 --- a/src/amd/common/ac_surface.c +++ b/src/amd/common/ac_surface.c @@ -32,6 +32,7 @@ #include "util/macros.h" #include "util/u_atomic.h" #include "util/u_math.h" +#include "sid.h" #include #include @@ -378,10 +379,6 @@ static int gfx6_compute_level(ADDR_HANDLE addrlib, return 0; } -#define G_009910_MICRO_TILE_MODE(x) (((x) >> 0) & 0x03) -#define V_009910_ADDR_SURF_THICK_MICRO_TILING 0x03 -#define G_009910_MICRO_TILE_MODE_NEW(x) (((x) >> 22) & 0x07) - static void gfx6_set_micro_tile_mode(struct radeon_surf *surf, const struct radeon_info *info) { @@ -1046,12 +1043,37 @@ static bool is_dcc_supported_by_DCN(const struct radeon_info *info, !info->use_display_dcc_with_retile_blit) return false; + /* 16bpp and 64bpp are more complicated, so they are disallowed for now. */ + if (surf->bpe != 4) + return false; + /* Handle unaligned DCC. */ if (info->use_display_dcc_unaligned && (rb_aligned || pipe_aligned)) return false; - return true; + switch (info->chip_class) { + case GFX9: + /* There are more constraints, but we always set + * INDEPENDENT_64B_BLOCKS = 1 and MAX_COMPRESSED_BLOCK_SIZE = 64B, + * which always works. + */ + assert(surf->u.gfx9.dcc.independent_64B_blocks && + surf->u.gfx9.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B); + return true; + case GFX10: + /* DCN requires INDEPENDENT_128B_BLOCKS = 0. + * For 4K, it also requires INDEPENDENT_64B_BLOCKS = 1. + */ + return !surf->u.gfx9.dcc.independent_128B_blocks && + ((config->info.width <= 2560 && + config->info.height <= 2560) || + (surf->u.gfx9.dcc.independent_64B_blocks && + surf->u.gfx9.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B)); + default: + unreachable("unhandled chip"); + return false; + } } static int gfx9_compute_miptree(ADDR_HANDLE addrlib, @@ -1552,17 +1574,43 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib, AddrSurfInfoIn.flags.metaPipeUnaligned = 0; AddrSurfInfoIn.flags.metaRbUnaligned = 0; - /* The display hardware can only read DCC with RB_ALIGNED=0 and - * PIPE_ALIGNED=0. PIPE_ALIGNED really means L2CACHE_ALIGNED. - * - * The CB block requires RB_ALIGNED=1 except 1 RB chips. - * PIPE_ALIGNED is optional, but PIPE_ALIGNED=0 requires L2 flushes - * after rendering, so PIPE_ALIGNED=1 is recommended. - */ - if (info->use_display_dcc_unaligned && - AddrSurfInfoIn.flags.display) { - AddrSurfInfoIn.flags.metaPipeUnaligned = 1; - AddrSurfInfoIn.flags.metaRbUnaligned = 1; + /* Optimal values for the L2 cache. */ + if (info->chip_class == GFX9) { + surf->u.gfx9.dcc.independent_64B_blocks = 1; + surf->u.gfx9.dcc.independent_128B_blocks = 0; + surf->u.gfx9.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B; + } else if (info->chip_class >= GFX10) { + surf->u.gfx9.dcc.independent_64B_blocks = 0; + surf->u.gfx9.dcc.independent_128B_blocks = 1; + surf->u.gfx9.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B; + } + + if (AddrSurfInfoIn.flags.display) { + /* The display hardware can only read DCC with RB_ALIGNED=0 and + * PIPE_ALIGNED=0. PIPE_ALIGNED really means L2CACHE_ALIGNED. + * + * The CB block requires RB_ALIGNED=1 except 1 RB chips. + * PIPE_ALIGNED is optional, but PIPE_ALIGNED=0 requires L2 flushes + * after rendering, so PIPE_ALIGNED=1 is recommended. + */ + if (info->use_display_dcc_unaligned) { + AddrSurfInfoIn.flags.metaPipeUnaligned = 1; + AddrSurfInfoIn.flags.metaRbUnaligned = 1; + } + + /* Adjust DCC settings to meet DCN requirements. */ + if (info->use_display_dcc_unaligned || + info->use_display_dcc_with_retile_blit) { + /* Only Navi12/14 support independent 64B blocks in L2, + * but without DCC image stores. + */ + if (info->family == CHIP_NAVI12 || + info->family == CHIP_NAVI14) { + surf->u.gfx9.dcc.independent_64B_blocks = 1; + surf->u.gfx9.dcc.independent_128B_blocks = 0; + surf->u.gfx9.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B; + } + } } switch (mode) { diff --git a/src/amd/common/ac_surface.h b/src/amd/common/ac_surface.h index 77ddf2fc5e3..8bdafa295ef 100644 --- a/src/amd/common/ac_surface.h +++ b/src/amd/common/ac_surface.h @@ -139,6 +139,9 @@ struct gfx9_surf_flags { struct gfx9_surf_meta_flags { unsigned rb_aligned:1; /* optimal for RBs */ unsigned pipe_aligned:1; /* optimal for TC */ + unsigned independent_64B_blocks:1; + unsigned independent_128B_blocks:1; + unsigned max_compressed_block_size:2; }; struct gfx9_surf_layout { diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h index a2293837763..e3bb9c3f369 100644 --- a/src/gallium/drivers/radeon/radeon_winsys.h +++ b/src/gallium/drivers/radeon/radeon_winsys.h @@ -227,6 +227,8 @@ struct radeon_bo_metadata { unsigned dcc_offset_256B : 24; unsigned dcc_pitch_max : 14; /* (mip chain pitch - 1) for DCN */ unsigned dcc_independent_64B : 1; + unsigned dcc_independent_128B : 1; + unsigned dcc_max_compressed_block_size : 2; bool scanout; } gfx9; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 1d62f11c6be..baa65452e36 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2416,10 +2416,10 @@ static void si_initialize_color_surface(struct si_context *sctx, struct si_surfa min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_64B; surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) | - S_028C78_MAX_COMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_128B) | + S_028C78_MAX_COMPRESSED_BLOCK_SIZE(tex->surface.u.gfx9.dcc.max_compressed_block_size) | S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) | - S_028C78_INDEPENDENT_64B_BLOCKS(0) | - S_028C78_INDEPENDENT_128B_BLOCKS(1); + S_028C78_INDEPENDENT_64B_BLOCKS(tex->surface.u.gfx9.dcc.independent_64B_blocks) | + S_028C78_INDEPENDENT_128B_BLOCKS(tex->surface.u.gfx9.dcc.independent_128B_blocks); } else if (sctx->chip_class >= GFX8) { unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B; unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B; @@ -3799,7 +3799,7 @@ static void gfx10_make_texture_descriptor( if (tex->surface.dcc_offset) { state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) | - S_00A018_MAX_COMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_128B) | + S_00A018_MAX_COMPRESSED_BLOCK_SIZE(tex->surface.u.gfx9.dcc.max_compressed_block_size) | S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(screen, pipe_format)); } diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c index 43fc648edfa..0b29fc44bf3 100644 --- a/src/gallium/drivers/radeonsi/si_texture.c +++ b/src/gallium/drivers/radeonsi/si_texture.c @@ -339,12 +339,11 @@ static void si_get_display_metadata(struct si_screen *sscreen, struct radeon_sur *array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED; surf->u.gfx9.surf.swizzle_mode = metadata->u.gfx9.swizzle_mode; + surf->u.gfx9.dcc.independent_64B_blocks = metadata->u.gfx9.dcc_independent_64B; + surf->u.gfx9.dcc.independent_128B_blocks = metadata->u.gfx9.dcc_independent_128B; + surf->u.gfx9.dcc.max_compressed_block_size = metadata->u.gfx9.dcc_max_compressed_block_size; + surf->u.gfx9.display_dcc_pitch_max = metadata->u.gfx9.dcc_pitch_max; *is_scanout = metadata->u.gfx9.scanout; - - if (metadata->u.gfx9.dcc_offset_256B) { - surf->u.gfx9.display_dcc_pitch_max = metadata->u.gfx9.dcc_pitch_max; - assert(metadata->u.gfx9.dcc_independent_64B == 1); - } } else { surf->u.legacy.pipe_config = metadata->u.legacy.pipe_config; surf->u.legacy.bankw = metadata->u.legacy.bankw; @@ -613,7 +612,9 @@ static void si_set_tex_bo_metadata(struct si_screen *sscreen, struct si_texture assert((dcc_offset >> 8) != 0 && (dcc_offset >> 8) < (1 << 24)); md.u.gfx9.dcc_offset_256B = dcc_offset >> 8; md.u.gfx9.dcc_pitch_max = tex->surface.u.gfx9.display_dcc_pitch_max; - md.u.gfx9.dcc_independent_64B = 1; + md.u.gfx9.dcc_independent_64B = tex->surface.u.gfx9.dcc.independent_64B_blocks; + md.u.gfx9.dcc_independent_128B = tex->surface.u.gfx9.dcc.independent_128B_blocks; + md.u.gfx9.dcc_max_compressed_block_size = tex->surface.u.gfx9.dcc.max_compressed_block_size; } } else { md.u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c index bdb03ee033f..ec2fa3a56e4 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c @@ -1223,6 +1223,10 @@ static unsigned eg_tile_split_rev(unsigned eg_tile_split) } } +#define AMDGPU_TILING_DCC_INDEPENDENT_128B_SHIFT 44 +#define AMDGPU_TILING_DCC_INDEPENDENT_128B_MASK 0x1 +#define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_SHIFT 45 +#define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_MASK 0x3 #define AMDGPU_TILING_SCANOUT_SHIFT 63 #define AMDGPU_TILING_SCANOUT_MASK 0x1 @@ -1248,6 +1252,8 @@ static void amdgpu_buffer_get_metadata(struct pb_buffer *_buf, md->u.gfx9.dcc_offset_256B = AMDGPU_TILING_GET(tiling_flags, DCC_OFFSET_256B); md->u.gfx9.dcc_pitch_max = AMDGPU_TILING_GET(tiling_flags, DCC_PITCH_MAX); md->u.gfx9.dcc_independent_64B = AMDGPU_TILING_GET(tiling_flags, DCC_INDEPENDENT_64B); + md->u.gfx9.dcc_independent_128B = AMDGPU_TILING_GET(tiling_flags, DCC_INDEPENDENT_128B); + md->u.gfx9.dcc_max_compressed_block_size = AMDGPU_TILING_GET(tiling_flags, DCC_MAX_COMPRESSED_BLOCK_SIZE); md->u.gfx9.scanout = AMDGPU_TILING_GET(tiling_flags, SCANOUT); } else { md->u.legacy.microtile = RADEON_LAYOUT_LINEAR; @@ -1286,6 +1292,8 @@ static void amdgpu_buffer_set_metadata(struct pb_buffer *_buf, tiling_flags |= AMDGPU_TILING_SET(DCC_OFFSET_256B, md->u.gfx9.dcc_offset_256B); tiling_flags |= AMDGPU_TILING_SET(DCC_PITCH_MAX, md->u.gfx9.dcc_pitch_max); tiling_flags |= AMDGPU_TILING_SET(DCC_INDEPENDENT_64B, md->u.gfx9.dcc_independent_64B); + tiling_flags |= AMDGPU_TILING_SET(DCC_INDEPENDENT_128B, md->u.gfx9.dcc_independent_128B); + tiling_flags |= AMDGPU_TILING_SET(DCC_MAX_COMPRESSED_BLOCK_SIZE, md->u.gfx9.dcc_max_compressed_block_size); tiling_flags |= AMDGPU_TILING_SET(SCANOUT, md->u.gfx9.scanout); } else { if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)