radv: more register changes on GFX11

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16419>
This commit is contained in:
Samuel Pitoiset 2022-05-05 15:15:17 +02:00 committed by Marge Bot
parent 7f31917119
commit 9b4c346029
4 changed files with 84 additions and 30 deletions

View File

@ -1943,8 +1943,12 @@ radv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_buffer_
radeon_set_context_reg(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, ds->db_htile_data_base);
radeon_set_context_reg(cmd_buffer->cs, R_02801C_DB_DEPTH_SIZE_XY, ds->db_depth_size);
radeon_set_context_reg_seq(cmd_buffer->cs, R_02803C_DB_DEPTH_INFO, 7);
radeon_emit(cmd_buffer->cs, S_02803C_RESOURCE_LEVEL(1));
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) {
radeon_set_context_reg_seq(cmd_buffer->cs, R_028040_DB_Z_INFO, 6);
} else {
radeon_set_context_reg_seq(cmd_buffer->cs, R_02803C_DB_DEPTH_INFO, 7);
radeon_emit(cmd_buffer->cs, S_02803C_RESOURCE_LEVEL(1));
}
radeon_emit(cmd_buffer->cs, db_z_info);
radeon_emit(cmd_buffer->cs, db_stencil_info);
radeon_emit(cmd_buffer->cs, ds->db_z_read_base);
@ -2616,12 +2620,11 @@ radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer)
enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->rad_info.gfx_level;
uint8_t watermark = gfx_level >= GFX10 ? 6 : 4;
radeon_set_context_reg(
cmd_buffer->cs, R_028424_CB_DCC_CONTROL,
S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(gfx_level <= GFX9) |
S_028424_OVERWRITE_COMBINER_WATERMARK(watermark) |
S_028424_DISABLE_CONSTANT_ENCODE_AC01(disable_constant_encode_ac01) |
S_028424_DISABLE_CONSTANT_ENCODE_REG(disable_constant_encode));
radeon_set_context_reg(cmd_buffer->cs, R_028424_CB_DCC_CONTROL,
S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(gfx_level <= GFX9) |
S_028424_OVERWRITE_COMBINER_WATERMARK(watermark) |
S_028424_DISABLE_CONSTANT_ENCODE_AC01(disable_constant_encode_ac01) |
S_028424_DISABLE_CONSTANT_ENCODE_REG(gfx_level < GFX11 && disable_constant_encode));
}
cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_FRAMEBUFFER;

View File

@ -5930,9 +5930,12 @@ radv_initialise_ds_surface(struct radv_device *device, struct radv_ds_buffer_inf
ds->db_z_info = S_028038_FORMAT(format) |
S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
S_028038_SW_MODE(surf->u.gfx9.swizzle_mode) |
S_028038_MAXMIP(iview->image->info.levels - 1) | S_028038_ZRANGE_PRECISION(1);
ds->db_stencil_info =
S_02803C_FORMAT(stencil_format) | S_02803C_SW_MODE(surf->u.gfx9.zs.stencil_swizzle_mode);
S_028038_MAXMIP(iview->image->info.levels - 1) |
S_028038_ZRANGE_PRECISION(1) |
S_028040_ITERATE_256(device->physical_device->rad_info.gfx_level >= GFX11);
ds->db_stencil_info = S_02803C_FORMAT(stencil_format) |
S_02803C_SW_MODE(surf->u.gfx9.zs.stencil_swizzle_mode) |
S_028044_ITERATE_256(device->physical_device->rad_info.gfx_level >= GFX11);
if (device->physical_device->rad_info.gfx_level == GFX9) {
ds->db_z_info2 = S_028068_EPITCH(surf->u.gfx9.epitch);

View File

@ -852,7 +852,9 @@ radv_pipeline_init_blend_state(struct radv_pipeline *pipeline,
/* RB+ doesn't work with dual source blending, logic op and
* RESOLVE.
*/
if (blend.mrt0_is_dual_src || (vkblend && vkblend->logicOpEnable))
if (blend.mrt0_is_dual_src || (vkblend && vkblend->logicOpEnable) ||
(pipeline->device->physical_device->rad_info.gfx_level >= GFX11 &&
blend.blend_enable_4bit))
cb_color_control |= S_028808_DISABLE_DUAL_QUAD(1);
}
@ -5621,11 +5623,14 @@ radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf
struct radv_shader *gs = pipeline->shaders[MESA_SHADER_GEOMETRY];
uint32_t gs_num_invocations = gs ? gs->info.gs.invocations : 1;
radeon_set_context_reg(
ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL,
S_028A44_ES_VERTS_PER_SUBGRP(ngg_state->hw_max_esverts) |
S_028A44_GS_PRIMS_PER_SUBGRP(ngg_state->max_gsprims) |
S_028A44_GS_INST_PRIMS_IN_SUBGRP(ngg_state->max_gsprims * gs_num_invocations));
if (pipeline->device->physical_device->rad_info.gfx_level < GFX11) {
radeon_set_context_reg(
ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL,
S_028A44_ES_VERTS_PER_SUBGRP(ngg_state->hw_max_esverts) |
S_028A44_GS_PRIMS_PER_SUBGRP(ngg_state->max_gsprims) |
S_028A44_GS_INST_PRIMS_IN_SUBGRP(ngg_state->max_gsprims * gs_num_invocations));
}
radeon_set_context_reg(ctx_cs, R_0287FC_GE_MAX_OUTPUT_PER_SUBGROUP,
S_0287FC_MAX_VERTS_PER_SUBGROUP(ngg_state->max_out_verts));
radeon_set_context_reg(ctx_cs, R_028B4C_GE_NGG_SUBGRP_CNTL,
@ -6167,6 +6172,7 @@ radv_pipeline_generate_fragment_shader(struct radeon_cmdbuf *ctx_cs, struct rade
struct radv_pipeline *pipeline)
{
struct radv_shader *ps;
bool param_gen;
uint64_t va;
assert(pipeline->shaders[MESA_SHADER_FRAGMENT]);
@ -6186,11 +6192,16 @@ radv_pipeline_generate_fragment_shader(struct radeon_cmdbuf *ctx_cs, struct rade
radeon_emit(ctx_cs, ps->config.spi_ps_input_ena);
radeon_emit(ctx_cs, ps->config.spi_ps_input_addr);
/* Workaround when there are no PS inputs but LDS is used. */
param_gen = pipeline->device->physical_device->rad_info.gfx_level >= GFX11 &&
!ps->info.ps.num_interp && ps->config.lds_size;
radeon_set_context_reg(
ctx_cs, R_0286D8_SPI_PS_IN_CONTROL,
S_0286D8_NUM_INTERP(ps->info.ps.num_interp) |
S_0286D8_NUM_PRIM_INTERP(ps->info.ps.num_prim_interp) |
S_0286D8_PS_W32_EN(ps->info.wave_size == 32));
S_0286D8_PS_W32_EN(ps->info.wave_size == 32) |
S_0286D8_PARAM_GEN(param_gen));
radeon_set_context_reg(ctx_cs, R_0286E0_SPI_BARYC_CNTL, pipeline->graphics.spi_baryc_cntl);
@ -6357,7 +6368,11 @@ radv_pipeline_generate_vgt_gs_out(struct radeon_cmdbuf *ctx_cs,
const struct radv_pipeline *pipeline,
uint32_t vgt_gs_out_prim_type)
{
radeon_set_context_reg(ctx_cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, vgt_gs_out_prim_type);
if (pipeline->device->physical_device->rad_info.gfx_level >= GFX11) {
radeon_set_uconfig_reg(ctx_cs, R_030998_VGT_GS_OUT_PRIM_TYPE, vgt_gs_out_prim_type);
} else {
radeon_set_context_reg(ctx_cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, vgt_gs_out_prim_type);
}
}
static void

View File

@ -151,6 +151,18 @@ si_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs)
radeon_emit(cs, tma_va >> 8);
radeon_emit(cs, tma_va >> 40);
}
if (device->physical_device->rad_info.gfx_level >= GFX11) {
uint32_t spi_cu_en = device->physical_device->rad_info.spi_cu_en;
radeon_set_sh_reg_seq(cs, R_00B8AC_COMPUTE_STATIC_THREAD_MGMT_SE4, 4);
radeon_emit(cs, S_00B8AC_SA0_CU_EN(spi_cu_en) | S_00B8AC_SA1_CU_EN(spi_cu_en)); /* SE4 */
radeon_emit(cs, S_00B8AC_SA0_CU_EN(spi_cu_en) | S_00B8AC_SA1_CU_EN(spi_cu_en)); /* SE5 */
radeon_emit(cs, S_00B8AC_SA0_CU_EN(spi_cu_en) | S_00B8AC_SA1_CU_EN(spi_cu_en)); /* SE6 */
radeon_emit(cs, S_00B8AC_SA0_CU_EN(spi_cu_en) | S_00B8AC_SA1_CU_EN(spi_cu_en)); /* SE7 */
radeon_set_sh_reg(cs, R_00B8BC_COMPUTE_DISPATCH_INTERLEAVE, 64);
}
}
/* 12.4 fixed-point */
@ -212,14 +224,15 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
}
if (!has_clear_state) {
radeon_set_context_reg(cs, R_028A5C_VGT_GS_PER_VS, 0x2);
if (physical_device->rad_info.gfx_level < GFX11)
radeon_set_context_reg(cs, R_028A5C_VGT_GS_PER_VS, 0x2);
radeon_set_context_reg(cs, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0);
radeon_set_context_reg(cs, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
}
if (physical_device->rad_info.gfx_level <= GFX9)
radeon_set_context_reg(cs, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1);
if (!has_clear_state)
if (!has_clear_state && physical_device->rad_info.gfx_level < GFX11)
radeon_set_context_reg(cs, R_028AB8_VGT_VTX_CNT_EN, 0x0);
if (physical_device->rad_info.gfx_level < GFX7)
radeon_set_config_reg(cs, R_008A14_PA_CL_ENHANCE,
@ -371,7 +384,8 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
if (physical_device->rad_info.gfx_level >= GFX10) {
ac_set_reg_cu_en(cs, R_00B01C_SPI_SHADER_PGM_RSRC3_PS,
S_00B01C_CU_EN(cu_mask_ps) | S_00B01C_WAVE_LIMIT(0x3F),
S_00B01C_CU_EN(cu_mask_ps) | S_00B01C_WAVE_LIMIT(0x3F) |
S_00B01C_LDS_GROUP_SIZE(physical_device->rad_info.gfx_level >= GFX11),
C_00B01C_CU_EN, 0, &physical_device->rad_info,
(void*)gfx10_set_sh_reg_idx3);
} else {
@ -390,8 +404,12 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
* the size of the PC minus the largest possible allocation for
* a single primitive shader subgroup.
*/
radeon_set_context_reg(cs, R_028C50_PA_SC_NGG_MODE_CNTL, S_028C50_MAX_DEALLOCS_IN_WAVE(512));
radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
uint32_t max_deallocs_in_wave = physical_device->rad_info.gfx_level >= GFX11 ? 16 : 512;
radeon_set_context_reg(cs, R_028C50_PA_SC_NGG_MODE_CNTL,
S_028C50_MAX_DEALLOCS_IN_WAVE(max_deallocs_in_wave));
if (physical_device->rad_info.gfx_level < GFX11)
radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
/* Vulkan doesn't support user edge flags and it also doesn't
* need to prevent drawing lines on internal edges of
@ -448,11 +466,15 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
radeon_emit(cs, 0); /* R_00B0CC_SPI_SHADER_USER_ACCUM_PS_1 */
radeon_emit(cs, 0); /* R_00B0D0_SPI_SHADER_USER_ACCUM_PS_2 */
radeon_emit(cs, 0); /* R_00B0D4_SPI_SHADER_USER_ACCUM_PS_3 */
radeon_set_sh_reg_seq(cs, R_00B1C8_SPI_SHADER_USER_ACCUM_VS_0, 4);
radeon_emit(cs, 0); /* R_00B1C8_SPI_SHADER_USER_ACCUM_VS_0 */
radeon_emit(cs, 0); /* R_00B1CC_SPI_SHADER_USER_ACCUM_VS_1 */
radeon_emit(cs, 0); /* R_00B1D0_SPI_SHADER_USER_ACCUM_VS_2 */
radeon_emit(cs, 0); /* R_00B1D4_SPI_SHADER_USER_ACCUM_VS_3 */
if (physical_device->rad_info.gfx_level < GFX11) {
radeon_set_sh_reg_seq(cs, R_00B1C8_SPI_SHADER_USER_ACCUM_VS_0, 4);
radeon_emit(cs, 0); /* R_00B1C8_SPI_SHADER_USER_ACCUM_VS_0 */
radeon_emit(cs, 0); /* R_00B1CC_SPI_SHADER_USER_ACCUM_VS_1 */
radeon_emit(cs, 0); /* R_00B1D0_SPI_SHADER_USER_ACCUM_VS_2 */
radeon_emit(cs, 0); /* R_00B1D4_SPI_SHADER_USER_ACCUM_VS_3 */
}
radeon_set_sh_reg_seq(cs, R_00B2C8_SPI_SHADER_USER_ACCUM_ESGS_0, 4);
radeon_emit(cs, 0); /* R_00B2C8_SPI_SHADER_USER_ACCUM_ESGS_0 */
radeon_emit(cs, 0); /* R_00B2CC_SPI_SHADER_USER_ACCUM_ESGS_1 */
@ -466,7 +488,9 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
radeon_set_sh_reg(cs, R_00B0C0_SPI_SHADER_REQ_CTRL_PS,
S_00B0C0_SOFT_GROUPING_EN(1) | S_00B0C0_NUMBER_OF_REQUESTS_PER_CU(4 - 1));
radeon_set_sh_reg(cs, R_00B1C0_SPI_SHADER_REQ_CTRL_VS, 0);
if (physical_device->rad_info.gfx_level < GFX11)
radeon_set_sh_reg(cs, R_00B1C0_SPI_SHADER_REQ_CTRL_VS, 0);
if (physical_device->rad_info.gfx_level >= GFX10_3) {
radeon_set_context_reg(cs, R_028750_SX_PS_DOWNCONVERT_CONTROL, 0xff);
@ -588,6 +612,15 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
*/
radeon_set_context_reg(cs, R_028BDC_PA_SC_LINE_CNTL, 0);
if (physical_device->rad_info.gfx_level >= GFX11) {
radeon_set_context_reg(cs, R_028C54_PA_SC_BINNER_CNTL_2, 0);
radeon_set_context_reg(cs, R_028620_PA_RATE_CNTL,
S_028620_VERTEX_RATE(2) | S_028620_PRIM_RATE(1));
radeon_set_uconfig_reg(cs, R_031110_SPI_GS_THROTTLE_CNTL1, 0x12355123);
radeon_set_uconfig_reg(cs, R_031114_SPI_GS_THROTTLE_CNTL2, 0x1544D);
}
si_emit_compute(device, cs);
}