diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index bd73fcc3f9f..f04ed8794f7 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -1986,7 +1986,11 @@ void si_init_all_descriptors(struct si_context *sctx) int i; unsigned ce_offset = 0; + STATIC_ASSERT(GFX9_SGPR_TCS_CONST_BUFFERS % 2 == 0); + for (i = 0; i < SI_NUM_SHADERS; i++) { + bool gfx9_tcs = sctx->b.chip_class == GFX9 && + i == PIPE_SHADER_TESS_CTRL; /* GFX9 has only 4KB of CE, while previous chips had 32KB. * Rarely used descriptors don't use CE RAM. */ @@ -1999,22 +2003,30 @@ void si_init_all_descriptors(struct si_context *sctx) si_init_buffer_resources(&sctx->const_buffers[i], si_const_buffer_descriptors(sctx, i), - SI_NUM_CONST_BUFFERS, SI_SGPR_CONST_BUFFERS, + SI_NUM_CONST_BUFFERS, + gfx9_tcs ? GFX9_SGPR_TCS_CONST_BUFFERS : + SI_SGPR_CONST_BUFFERS, RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER, &ce_offset); si_init_buffer_resources(&sctx->shader_buffers[i], si_shader_buffer_descriptors(sctx, i), - SI_NUM_SHADER_BUFFERS, SI_SGPR_SHADER_BUFFERS, + SI_NUM_SHADER_BUFFERS, + gfx9_tcs ? GFX9_SGPR_TCS_SHADER_BUFFERS : + SI_SGPR_SHADER_BUFFERS, RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RW_BUFFER, shaderbufs_use_ce ? &ce_offset : NULL); si_init_descriptors(si_sampler_descriptors(sctx, i), - SI_SGPR_SAMPLERS, 16, SI_NUM_SAMPLERS, + gfx9_tcs ? GFX9_SGPR_TCS_SAMPLERS : + SI_SGPR_SAMPLERS, + 16, SI_NUM_SAMPLERS, null_texture_descriptor, samplers_use_ce ? &ce_offset : NULL); si_init_descriptors(si_image_descriptors(sctx, i), - SI_SGPR_IMAGES, 8, SI_NUM_IMAGES, + gfx9_tcs ? GFX9_SGPR_TCS_IMAGES : + SI_SGPR_IMAGES, + 8, SI_NUM_IMAGES, null_image_descriptor, images_use_ce ? &ce_offset : NULL); } diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index b7d3514def2..ed5327cbe9b 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2648,18 +2648,18 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base) tf_soffset = LLVMGetParam(ctx->main_fn, SI_PARAM_TESS_FACTOR_OFFSET); ret = LLVMBuildInsertValue(builder, ret, offchip_layout, - SI_SGPR_TCS_OFFCHIP_LAYOUT, ""); + GFX6_SGPR_TCS_OFFCHIP_LAYOUT, ""); ret = LLVMBuildInsertValue(builder, ret, offchip_soffset, - SI_TCS_NUM_USER_SGPR, ""); + GFX6_TCS_NUM_USER_SGPR, ""); ret = LLVMBuildInsertValue(builder, ret, tf_soffset, - SI_TCS_NUM_USER_SGPR + 1, ""); + GFX6_TCS_NUM_USER_SGPR + 1, ""); /* VGPRs */ rel_patch_id = bitcast(bld_base, TGSI_TYPE_FLOAT, rel_patch_id); invocation_id = bitcast(bld_base, TGSI_TYPE_FLOAT, invocation_id); tf_lds_offset = bitcast(bld_base, TGSI_TYPE_FLOAT, tf_lds_offset); - vgpr = SI_TCS_NUM_USER_SGPR + 2; + vgpr = GFX6_TCS_NUM_USER_SGPR + 2; ret = LLVMBuildInsertValue(builder, ret, rel_patch_id, vgpr++, ""); ret = LLVMBuildInsertValue(builder, ret, invocation_id, vgpr++, ""); ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, ""); @@ -5715,7 +5715,7 @@ static void create_function(struct si_shader_context *ctx) /* SI_PARAM_TCS_OC_LDS and PARAM_TESS_FACTOR_OFFSET are * placed after the user SGPRs. */ - for (i = 0; i < SI_TCS_NUM_USER_SGPR + 2; i++) + for (i = 0; i < GFX6_TCS_NUM_USER_SGPR + 2; i++) returns[num_returns++] = ctx->i32; /* SGPRs */ for (i = 0; i < 3; i++) diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 90d8a18e174..df61a418d0f 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -101,15 +101,31 @@ enum { SI_SGPR_VS_STATE_BITS, SI_VS_NUM_USER_SGPR, - /* both TCS and TES */ - SI_SGPR_TCS_OFFCHIP_LAYOUT = SI_NUM_RESOURCE_SGPRS, + /* TES */ + SI_SGPR_TES_OFFCHIP_LAYOUT = SI_NUM_RESOURCE_SGPRS, SI_TES_NUM_USER_SGPR, - /* TCS only */ - SI_SGPR_TCS_OUT_OFFSETS = SI_TES_NUM_USER_SGPR, - SI_SGPR_TCS_OUT_LAYOUT, - SI_SGPR_TCS_IN_LAYOUT, - SI_TCS_NUM_USER_SGPR, + /* GFX6-8: TCS only */ + GFX6_SGPR_TCS_OFFCHIP_LAYOUT = SI_NUM_RESOURCE_SGPRS, + GFX6_SGPR_TCS_OUT_OFFSETS, + GFX6_SGPR_TCS_OUT_LAYOUT, + GFX6_SGPR_TCS_IN_LAYOUT, + GFX6_TCS_NUM_USER_SGPR, + + /* GFX9: Merged LS-HS (VS-TCS) only. */ + GFX9_SGPR_TCS_OFFCHIP_LAYOUT = SI_VS_NUM_USER_SGPR, + GFX9_SGPR_TCS_OUT_OFFSETS, + GFX9_SGPR_TCS_OUT_LAYOUT, + GFX9_SGPR_unused_to_align_the_next_pointer, + GFX9_SGPR_TCS_CONST_BUFFERS, + GFX9_SGPR_TCS_CONST_BUFFERS_HI, + GFX9_SGPR_TCS_SAMPLERS, /* images & sampler states interleaved */ + GFX9_SGPR_TCS_SAMPLERS_HI, + GFX9_SGPR_TCS_IMAGES, + GFX9_SGPR_TCS_IMAGES_HI, + GFX9_SGPR_TCS_SHADER_BUFFERS, + GFX9_SGPR_TCS_SHADER_BUFFERS_HI, + GFX9_TCS_NUM_USER_SGPR, /* GS limits */ SI_GS_NUM_USER_SGPR = SI_NUM_RESOURCE_SGPRS, diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 4feadbe3d65..de97c0e8f06 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -235,6 +235,14 @@ static void si_emit_derived_tess_state(struct si_context *sctx, S_00B42C_LDS_SIZE(lds_size); radeon_set_sh_reg(cs, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, hs_rsrc2); + + /* Set userdata SGPRs for merged LS-HS. */ + radeon_set_sh_reg_seq(cs, + R_00B430_SPI_SHADER_USER_DATA_LS_0 + + GFX9_SGPR_TCS_OFFCHIP_LAYOUT * 4, 3); + radeon_emit(cs, offchip_layout); + radeon_emit(cs, tcs_out_offsets); + radeon_emit(cs, tcs_out_layout | (num_tcs_input_cp << 26)); } else { unsigned ls_rsrc2 = ls_current->config.rsrc2; @@ -251,7 +259,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, /* Set userdata SGPRs for TCS. */ radeon_set_sh_reg_seq(cs, - R_00B430_SPI_SHADER_USER_DATA_HS_0 + SI_SGPR_TCS_OFFCHIP_LAYOUT * 4, 4); + R_00B430_SPI_SHADER_USER_DATA_HS_0 + GFX6_SGPR_TCS_OFFCHIP_LAYOUT * 4, 4); radeon_emit(cs, offchip_layout); radeon_emit(cs, tcs_out_offsets); radeon_emit(cs, tcs_out_layout | (num_tcs_input_cp << 26)); @@ -259,7 +267,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, } /* Set userdata SGPRs for TES. */ - radeon_set_sh_reg_seq(cs, tes_sh_base + SI_SGPR_TCS_OFFCHIP_LAYOUT * 4, 1); + radeon_set_sh_reg_seq(cs, tes_sh_base + SI_SGPR_TES_OFFCHIP_LAYOUT * 4, 1); radeon_emit(cs, offchip_layout); ls_hs_config = S_028B58_NUM_PATCHES(*num_patches) | diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 9e56a45ab77..b04ad920a90 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -492,14 +492,15 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader) ls_vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 1; shader->config.rsrc2 = - S_00B42C_USER_SGPR(SI_TCS_NUM_USER_SGPR) | + S_00B42C_USER_SGPR(GFX9_TCS_NUM_USER_SGPR) | + S_00B42C_USER_SGPR_MSB(GFX9_TCS_NUM_USER_SGPR >> 5) | S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0); } else { si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8); si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, va >> 40); shader->config.rsrc2 = - S_00B42C_USER_SGPR(SI_TCS_NUM_USER_SGPR) | + S_00B42C_USER_SGPR(GFX6_TCS_NUM_USER_SGPR) | S_00B42C_OC_LDS_EN(1) | S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0); }