diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 7dc342bccc9..fbe7f9fd6a8 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -1943,8 +1943,12 @@ radv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_buffer_ radeon_set_context_reg(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, ds->db_htile_data_base); radeon_set_context_reg(cmd_buffer->cs, R_02801C_DB_DEPTH_SIZE_XY, ds->db_depth_size); - radeon_set_context_reg_seq(cmd_buffer->cs, R_02803C_DB_DEPTH_INFO, 7); - radeon_emit(cmd_buffer->cs, S_02803C_RESOURCE_LEVEL(1)); + if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) { + radeon_set_context_reg_seq(cmd_buffer->cs, R_028040_DB_Z_INFO, 6); + } else { + radeon_set_context_reg_seq(cmd_buffer->cs, R_02803C_DB_DEPTH_INFO, 7); + radeon_emit(cmd_buffer->cs, S_02803C_RESOURCE_LEVEL(1)); + } radeon_emit(cmd_buffer->cs, db_z_info); radeon_emit(cmd_buffer->cs, db_stencil_info); radeon_emit(cmd_buffer->cs, ds->db_z_read_base); @@ -2616,12 +2620,11 @@ radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer) enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->rad_info.gfx_level; uint8_t watermark = gfx_level >= GFX10 ? 6 : 4; - radeon_set_context_reg( - cmd_buffer->cs, R_028424_CB_DCC_CONTROL, - S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(gfx_level <= GFX9) | - S_028424_OVERWRITE_COMBINER_WATERMARK(watermark) | - S_028424_DISABLE_CONSTANT_ENCODE_AC01(disable_constant_encode_ac01) | - S_028424_DISABLE_CONSTANT_ENCODE_REG(disable_constant_encode)); + radeon_set_context_reg(cmd_buffer->cs, R_028424_CB_DCC_CONTROL, + S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(gfx_level <= GFX9) | + S_028424_OVERWRITE_COMBINER_WATERMARK(watermark) | + S_028424_DISABLE_CONSTANT_ENCODE_AC01(disable_constant_encode_ac01) | + S_028424_DISABLE_CONSTANT_ENCODE_REG(gfx_level < GFX11 && disable_constant_encode)); } cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_FRAMEBUFFER; diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 8fc6f5b69d6..05ba884b1dc 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -5930,9 +5930,12 @@ radv_initialise_ds_surface(struct radv_device *device, struct radv_ds_buffer_inf ds->db_z_info = S_028038_FORMAT(format) | S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) | S_028038_SW_MODE(surf->u.gfx9.swizzle_mode) | - S_028038_MAXMIP(iview->image->info.levels - 1) | S_028038_ZRANGE_PRECISION(1); - ds->db_stencil_info = - S_02803C_FORMAT(stencil_format) | S_02803C_SW_MODE(surf->u.gfx9.zs.stencil_swizzle_mode); + S_028038_MAXMIP(iview->image->info.levels - 1) | + S_028038_ZRANGE_PRECISION(1) | + S_028040_ITERATE_256(device->physical_device->rad_info.gfx_level >= GFX11); + ds->db_stencil_info = S_02803C_FORMAT(stencil_format) | + S_02803C_SW_MODE(surf->u.gfx9.zs.stencil_swizzle_mode) | + S_028044_ITERATE_256(device->physical_device->rad_info.gfx_level >= GFX11); if (device->physical_device->rad_info.gfx_level == GFX9) { ds->db_z_info2 = S_028068_EPITCH(surf->u.gfx9.epitch); diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 40c1dbd1b07..16bee2dcc6f 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -852,7 +852,9 @@ radv_pipeline_init_blend_state(struct radv_pipeline *pipeline, /* RB+ doesn't work with dual source blending, logic op and * RESOLVE. */ - if (blend.mrt0_is_dual_src || (vkblend && vkblend->logicOpEnable)) + if (blend.mrt0_is_dual_src || (vkblend && vkblend->logicOpEnable) || + (pipeline->device->physical_device->rad_info.gfx_level >= GFX11 && + blend.blend_enable_4bit)) cb_color_control |= S_028808_DISABLE_DUAL_QUAD(1); } @@ -5621,11 +5623,14 @@ radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf struct radv_shader *gs = pipeline->shaders[MESA_SHADER_GEOMETRY]; uint32_t gs_num_invocations = gs ? gs->info.gs.invocations : 1; - radeon_set_context_reg( - ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL, - S_028A44_ES_VERTS_PER_SUBGRP(ngg_state->hw_max_esverts) | - S_028A44_GS_PRIMS_PER_SUBGRP(ngg_state->max_gsprims) | - S_028A44_GS_INST_PRIMS_IN_SUBGRP(ngg_state->max_gsprims * gs_num_invocations)); + if (pipeline->device->physical_device->rad_info.gfx_level < GFX11) { + radeon_set_context_reg( + ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL, + S_028A44_ES_VERTS_PER_SUBGRP(ngg_state->hw_max_esverts) | + S_028A44_GS_PRIMS_PER_SUBGRP(ngg_state->max_gsprims) | + S_028A44_GS_INST_PRIMS_IN_SUBGRP(ngg_state->max_gsprims * gs_num_invocations)); + } + radeon_set_context_reg(ctx_cs, R_0287FC_GE_MAX_OUTPUT_PER_SUBGROUP, S_0287FC_MAX_VERTS_PER_SUBGROUP(ngg_state->max_out_verts)); radeon_set_context_reg(ctx_cs, R_028B4C_GE_NGG_SUBGRP_CNTL, @@ -6167,6 +6172,7 @@ radv_pipeline_generate_fragment_shader(struct radeon_cmdbuf *ctx_cs, struct rade struct radv_pipeline *pipeline) { struct radv_shader *ps; + bool param_gen; uint64_t va; assert(pipeline->shaders[MESA_SHADER_FRAGMENT]); @@ -6186,11 +6192,16 @@ radv_pipeline_generate_fragment_shader(struct radeon_cmdbuf *ctx_cs, struct rade radeon_emit(ctx_cs, ps->config.spi_ps_input_ena); radeon_emit(ctx_cs, ps->config.spi_ps_input_addr); + /* Workaround when there are no PS inputs but LDS is used. */ + param_gen = pipeline->device->physical_device->rad_info.gfx_level >= GFX11 && + !ps->info.ps.num_interp && ps->config.lds_size; + radeon_set_context_reg( ctx_cs, R_0286D8_SPI_PS_IN_CONTROL, S_0286D8_NUM_INTERP(ps->info.ps.num_interp) | S_0286D8_NUM_PRIM_INTERP(ps->info.ps.num_prim_interp) | - S_0286D8_PS_W32_EN(ps->info.wave_size == 32)); + S_0286D8_PS_W32_EN(ps->info.wave_size == 32) | + S_0286D8_PARAM_GEN(param_gen)); radeon_set_context_reg(ctx_cs, R_0286E0_SPI_BARYC_CNTL, pipeline->graphics.spi_baryc_cntl); @@ -6357,7 +6368,11 @@ radv_pipeline_generate_vgt_gs_out(struct radeon_cmdbuf *ctx_cs, const struct radv_pipeline *pipeline, uint32_t vgt_gs_out_prim_type) { - radeon_set_context_reg(ctx_cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, vgt_gs_out_prim_type); + if (pipeline->device->physical_device->rad_info.gfx_level >= GFX11) { + radeon_set_uconfig_reg(ctx_cs, R_030998_VGT_GS_OUT_PRIM_TYPE, vgt_gs_out_prim_type); + } else { + radeon_set_context_reg(ctx_cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, vgt_gs_out_prim_type); + } } static void diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c index 376981f0e7e..53e19920e55 100644 --- a/src/amd/vulkan/si_cmd_buffer.c +++ b/src/amd/vulkan/si_cmd_buffer.c @@ -151,6 +151,18 @@ si_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs) radeon_emit(cs, tma_va >> 8); radeon_emit(cs, tma_va >> 40); } + + if (device->physical_device->rad_info.gfx_level >= GFX11) { + uint32_t spi_cu_en = device->physical_device->rad_info.spi_cu_en; + + radeon_set_sh_reg_seq(cs, R_00B8AC_COMPUTE_STATIC_THREAD_MGMT_SE4, 4); + radeon_emit(cs, S_00B8AC_SA0_CU_EN(spi_cu_en) | S_00B8AC_SA1_CU_EN(spi_cu_en)); /* SE4 */ + radeon_emit(cs, S_00B8AC_SA0_CU_EN(spi_cu_en) | S_00B8AC_SA1_CU_EN(spi_cu_en)); /* SE5 */ + radeon_emit(cs, S_00B8AC_SA0_CU_EN(spi_cu_en) | S_00B8AC_SA1_CU_EN(spi_cu_en)); /* SE6 */ + radeon_emit(cs, S_00B8AC_SA0_CU_EN(spi_cu_en) | S_00B8AC_SA1_CU_EN(spi_cu_en)); /* SE7 */ + + radeon_set_sh_reg(cs, R_00B8BC_COMPUTE_DISPATCH_INTERLEAVE, 64); + } } /* 12.4 fixed-point */ @@ -212,14 +224,15 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs) } if (!has_clear_state) { - radeon_set_context_reg(cs, R_028A5C_VGT_GS_PER_VS, 0x2); + if (physical_device->rad_info.gfx_level < GFX11) + radeon_set_context_reg(cs, R_028A5C_VGT_GS_PER_VS, 0x2); radeon_set_context_reg(cs, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0); radeon_set_context_reg(cs, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0); } if (physical_device->rad_info.gfx_level <= GFX9) radeon_set_context_reg(cs, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1); - if (!has_clear_state) + if (!has_clear_state && physical_device->rad_info.gfx_level < GFX11) radeon_set_context_reg(cs, R_028AB8_VGT_VTX_CNT_EN, 0x0); if (physical_device->rad_info.gfx_level < GFX7) radeon_set_config_reg(cs, R_008A14_PA_CL_ENHANCE, @@ -371,7 +384,8 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs) if (physical_device->rad_info.gfx_level >= GFX10) { ac_set_reg_cu_en(cs, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, - S_00B01C_CU_EN(cu_mask_ps) | S_00B01C_WAVE_LIMIT(0x3F), + S_00B01C_CU_EN(cu_mask_ps) | S_00B01C_WAVE_LIMIT(0x3F) | + S_00B01C_LDS_GROUP_SIZE(physical_device->rad_info.gfx_level >= GFX11), C_00B01C_CU_EN, 0, &physical_device->rad_info, (void*)gfx10_set_sh_reg_idx3); } else { @@ -390,8 +404,12 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs) * the size of the PC minus the largest possible allocation for * a single primitive shader subgroup. */ - radeon_set_context_reg(cs, R_028C50_PA_SC_NGG_MODE_CNTL, S_028C50_MAX_DEALLOCS_IN_WAVE(512)); - radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14); + uint32_t max_deallocs_in_wave = physical_device->rad_info.gfx_level >= GFX11 ? 16 : 512; + radeon_set_context_reg(cs, R_028C50_PA_SC_NGG_MODE_CNTL, + S_028C50_MAX_DEALLOCS_IN_WAVE(max_deallocs_in_wave)); + + if (physical_device->rad_info.gfx_level < GFX11) + radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14); /* Vulkan doesn't support user edge flags and it also doesn't * need to prevent drawing lines on internal edges of @@ -448,11 +466,15 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs) radeon_emit(cs, 0); /* R_00B0CC_SPI_SHADER_USER_ACCUM_PS_1 */ radeon_emit(cs, 0); /* R_00B0D0_SPI_SHADER_USER_ACCUM_PS_2 */ radeon_emit(cs, 0); /* R_00B0D4_SPI_SHADER_USER_ACCUM_PS_3 */ - radeon_set_sh_reg_seq(cs, R_00B1C8_SPI_SHADER_USER_ACCUM_VS_0, 4); - radeon_emit(cs, 0); /* R_00B1C8_SPI_SHADER_USER_ACCUM_VS_0 */ - radeon_emit(cs, 0); /* R_00B1CC_SPI_SHADER_USER_ACCUM_VS_1 */ - radeon_emit(cs, 0); /* R_00B1D0_SPI_SHADER_USER_ACCUM_VS_2 */ - radeon_emit(cs, 0); /* R_00B1D4_SPI_SHADER_USER_ACCUM_VS_3 */ + + if (physical_device->rad_info.gfx_level < GFX11) { + radeon_set_sh_reg_seq(cs, R_00B1C8_SPI_SHADER_USER_ACCUM_VS_0, 4); + radeon_emit(cs, 0); /* R_00B1C8_SPI_SHADER_USER_ACCUM_VS_0 */ + radeon_emit(cs, 0); /* R_00B1CC_SPI_SHADER_USER_ACCUM_VS_1 */ + radeon_emit(cs, 0); /* R_00B1D0_SPI_SHADER_USER_ACCUM_VS_2 */ + radeon_emit(cs, 0); /* R_00B1D4_SPI_SHADER_USER_ACCUM_VS_3 */ + } + radeon_set_sh_reg_seq(cs, R_00B2C8_SPI_SHADER_USER_ACCUM_ESGS_0, 4); radeon_emit(cs, 0); /* R_00B2C8_SPI_SHADER_USER_ACCUM_ESGS_0 */ radeon_emit(cs, 0); /* R_00B2CC_SPI_SHADER_USER_ACCUM_ESGS_1 */ @@ -466,7 +488,9 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs) radeon_set_sh_reg(cs, R_00B0C0_SPI_SHADER_REQ_CTRL_PS, S_00B0C0_SOFT_GROUPING_EN(1) | S_00B0C0_NUMBER_OF_REQUESTS_PER_CU(4 - 1)); - radeon_set_sh_reg(cs, R_00B1C0_SPI_SHADER_REQ_CTRL_VS, 0); + + if (physical_device->rad_info.gfx_level < GFX11) + radeon_set_sh_reg(cs, R_00B1C0_SPI_SHADER_REQ_CTRL_VS, 0); if (physical_device->rad_info.gfx_level >= GFX10_3) { radeon_set_context_reg(cs, R_028750_SX_PS_DOWNCONVERT_CONTROL, 0xff); @@ -588,6 +612,15 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs) */ radeon_set_context_reg(cs, R_028BDC_PA_SC_LINE_CNTL, 0); + if (physical_device->rad_info.gfx_level >= GFX11) { + radeon_set_context_reg(cs, R_028C54_PA_SC_BINNER_CNTL_2, 0); + radeon_set_context_reg(cs, R_028620_PA_RATE_CNTL, + S_028620_VERTEX_RATE(2) | S_028620_PRIM_RATE(1)); + + radeon_set_uconfig_reg(cs, R_031110_SPI_GS_THROTTLE_CNTL1, 0x12355123); + radeon_set_uconfig_reg(cs, R_031114_SPI_GS_THROTTLE_CNTL2, 0x1544D); + } + si_emit_compute(device, cs); }