radeonsi/gfx9: define and set LS-HS user SGPRs
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
This commit is contained in:
parent
0588146cb0
commit
067dacd1b1
|
@ -1986,7 +1986,11 @@ void si_init_all_descriptors(struct si_context *sctx)
|
||||||
int i;
|
int i;
|
||||||
unsigned ce_offset = 0;
|
unsigned ce_offset = 0;
|
||||||
|
|
||||||
|
STATIC_ASSERT(GFX9_SGPR_TCS_CONST_BUFFERS % 2 == 0);
|
||||||
|
|
||||||
for (i = 0; i < SI_NUM_SHADERS; i++) {
|
for (i = 0; i < SI_NUM_SHADERS; i++) {
|
||||||
|
bool gfx9_tcs = sctx->b.chip_class == GFX9 &&
|
||||||
|
i == PIPE_SHADER_TESS_CTRL;
|
||||||
/* GFX9 has only 4KB of CE, while previous chips had 32KB.
|
/* GFX9 has only 4KB of CE, while previous chips had 32KB.
|
||||||
* Rarely used descriptors don't use CE RAM.
|
* Rarely used descriptors don't use CE RAM.
|
||||||
*/
|
*/
|
||||||
|
@ -1999,22 +2003,30 @@ void si_init_all_descriptors(struct si_context *sctx)
|
||||||
|
|
||||||
si_init_buffer_resources(&sctx->const_buffers[i],
|
si_init_buffer_resources(&sctx->const_buffers[i],
|
||||||
si_const_buffer_descriptors(sctx, i),
|
si_const_buffer_descriptors(sctx, i),
|
||||||
SI_NUM_CONST_BUFFERS, SI_SGPR_CONST_BUFFERS,
|
SI_NUM_CONST_BUFFERS,
|
||||||
|
gfx9_tcs ? GFX9_SGPR_TCS_CONST_BUFFERS :
|
||||||
|
SI_SGPR_CONST_BUFFERS,
|
||||||
RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER,
|
RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER,
|
||||||
&ce_offset);
|
&ce_offset);
|
||||||
si_init_buffer_resources(&sctx->shader_buffers[i],
|
si_init_buffer_resources(&sctx->shader_buffers[i],
|
||||||
si_shader_buffer_descriptors(sctx, i),
|
si_shader_buffer_descriptors(sctx, i),
|
||||||
SI_NUM_SHADER_BUFFERS, SI_SGPR_SHADER_BUFFERS,
|
SI_NUM_SHADER_BUFFERS,
|
||||||
|
gfx9_tcs ? GFX9_SGPR_TCS_SHADER_BUFFERS :
|
||||||
|
SI_SGPR_SHADER_BUFFERS,
|
||||||
RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RW_BUFFER,
|
RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RW_BUFFER,
|
||||||
shaderbufs_use_ce ? &ce_offset : NULL);
|
shaderbufs_use_ce ? &ce_offset : NULL);
|
||||||
|
|
||||||
si_init_descriptors(si_sampler_descriptors(sctx, i),
|
si_init_descriptors(si_sampler_descriptors(sctx, i),
|
||||||
SI_SGPR_SAMPLERS, 16, SI_NUM_SAMPLERS,
|
gfx9_tcs ? GFX9_SGPR_TCS_SAMPLERS :
|
||||||
|
SI_SGPR_SAMPLERS,
|
||||||
|
16, SI_NUM_SAMPLERS,
|
||||||
null_texture_descriptor,
|
null_texture_descriptor,
|
||||||
samplers_use_ce ? &ce_offset : NULL);
|
samplers_use_ce ? &ce_offset : NULL);
|
||||||
|
|
||||||
si_init_descriptors(si_image_descriptors(sctx, i),
|
si_init_descriptors(si_image_descriptors(sctx, i),
|
||||||
SI_SGPR_IMAGES, 8, SI_NUM_IMAGES,
|
gfx9_tcs ? GFX9_SGPR_TCS_IMAGES :
|
||||||
|
SI_SGPR_IMAGES,
|
||||||
|
8, SI_NUM_IMAGES,
|
||||||
null_image_descriptor,
|
null_image_descriptor,
|
||||||
images_use_ce ? &ce_offset : NULL);
|
images_use_ce ? &ce_offset : NULL);
|
||||||
}
|
}
|
||||||
|
|
|
@ -2648,18 +2648,18 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
|
||||||
tf_soffset = LLVMGetParam(ctx->main_fn,
|
tf_soffset = LLVMGetParam(ctx->main_fn,
|
||||||
SI_PARAM_TESS_FACTOR_OFFSET);
|
SI_PARAM_TESS_FACTOR_OFFSET);
|
||||||
ret = LLVMBuildInsertValue(builder, ret, offchip_layout,
|
ret = LLVMBuildInsertValue(builder, ret, offchip_layout,
|
||||||
SI_SGPR_TCS_OFFCHIP_LAYOUT, "");
|
GFX6_SGPR_TCS_OFFCHIP_LAYOUT, "");
|
||||||
ret = LLVMBuildInsertValue(builder, ret, offchip_soffset,
|
ret = LLVMBuildInsertValue(builder, ret, offchip_soffset,
|
||||||
SI_TCS_NUM_USER_SGPR, "");
|
GFX6_TCS_NUM_USER_SGPR, "");
|
||||||
ret = LLVMBuildInsertValue(builder, ret, tf_soffset,
|
ret = LLVMBuildInsertValue(builder, ret, tf_soffset,
|
||||||
SI_TCS_NUM_USER_SGPR + 1, "");
|
GFX6_TCS_NUM_USER_SGPR + 1, "");
|
||||||
|
|
||||||
/* VGPRs */
|
/* VGPRs */
|
||||||
rel_patch_id = bitcast(bld_base, TGSI_TYPE_FLOAT, rel_patch_id);
|
rel_patch_id = bitcast(bld_base, TGSI_TYPE_FLOAT, rel_patch_id);
|
||||||
invocation_id = bitcast(bld_base, TGSI_TYPE_FLOAT, invocation_id);
|
invocation_id = bitcast(bld_base, TGSI_TYPE_FLOAT, invocation_id);
|
||||||
tf_lds_offset = bitcast(bld_base, TGSI_TYPE_FLOAT, tf_lds_offset);
|
tf_lds_offset = bitcast(bld_base, TGSI_TYPE_FLOAT, tf_lds_offset);
|
||||||
|
|
||||||
vgpr = SI_TCS_NUM_USER_SGPR + 2;
|
vgpr = GFX6_TCS_NUM_USER_SGPR + 2;
|
||||||
ret = LLVMBuildInsertValue(builder, ret, rel_patch_id, vgpr++, "");
|
ret = LLVMBuildInsertValue(builder, ret, rel_patch_id, vgpr++, "");
|
||||||
ret = LLVMBuildInsertValue(builder, ret, invocation_id, vgpr++, "");
|
ret = LLVMBuildInsertValue(builder, ret, invocation_id, vgpr++, "");
|
||||||
ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, "");
|
ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, "");
|
||||||
|
@ -5715,7 +5715,7 @@ static void create_function(struct si_shader_context *ctx)
|
||||||
/* SI_PARAM_TCS_OC_LDS and PARAM_TESS_FACTOR_OFFSET are
|
/* SI_PARAM_TCS_OC_LDS and PARAM_TESS_FACTOR_OFFSET are
|
||||||
* placed after the user SGPRs.
|
* placed after the user SGPRs.
|
||||||
*/
|
*/
|
||||||
for (i = 0; i < SI_TCS_NUM_USER_SGPR + 2; i++)
|
for (i = 0; i < GFX6_TCS_NUM_USER_SGPR + 2; i++)
|
||||||
returns[num_returns++] = ctx->i32; /* SGPRs */
|
returns[num_returns++] = ctx->i32; /* SGPRs */
|
||||||
|
|
||||||
for (i = 0; i < 3; i++)
|
for (i = 0; i < 3; i++)
|
||||||
|
|
|
@ -101,15 +101,31 @@ enum {
|
||||||
SI_SGPR_VS_STATE_BITS,
|
SI_SGPR_VS_STATE_BITS,
|
||||||
SI_VS_NUM_USER_SGPR,
|
SI_VS_NUM_USER_SGPR,
|
||||||
|
|
||||||
/* both TCS and TES */
|
/* TES */
|
||||||
SI_SGPR_TCS_OFFCHIP_LAYOUT = SI_NUM_RESOURCE_SGPRS,
|
SI_SGPR_TES_OFFCHIP_LAYOUT = SI_NUM_RESOURCE_SGPRS,
|
||||||
SI_TES_NUM_USER_SGPR,
|
SI_TES_NUM_USER_SGPR,
|
||||||
|
|
||||||
/* TCS only */
|
/* GFX6-8: TCS only */
|
||||||
SI_SGPR_TCS_OUT_OFFSETS = SI_TES_NUM_USER_SGPR,
|
GFX6_SGPR_TCS_OFFCHIP_LAYOUT = SI_NUM_RESOURCE_SGPRS,
|
||||||
SI_SGPR_TCS_OUT_LAYOUT,
|
GFX6_SGPR_TCS_OUT_OFFSETS,
|
||||||
SI_SGPR_TCS_IN_LAYOUT,
|
GFX6_SGPR_TCS_OUT_LAYOUT,
|
||||||
SI_TCS_NUM_USER_SGPR,
|
GFX6_SGPR_TCS_IN_LAYOUT,
|
||||||
|
GFX6_TCS_NUM_USER_SGPR,
|
||||||
|
|
||||||
|
/* GFX9: Merged LS-HS (VS-TCS) only. */
|
||||||
|
GFX9_SGPR_TCS_OFFCHIP_LAYOUT = SI_VS_NUM_USER_SGPR,
|
||||||
|
GFX9_SGPR_TCS_OUT_OFFSETS,
|
||||||
|
GFX9_SGPR_TCS_OUT_LAYOUT,
|
||||||
|
GFX9_SGPR_unused_to_align_the_next_pointer,
|
||||||
|
GFX9_SGPR_TCS_CONST_BUFFERS,
|
||||||
|
GFX9_SGPR_TCS_CONST_BUFFERS_HI,
|
||||||
|
GFX9_SGPR_TCS_SAMPLERS, /* images & sampler states interleaved */
|
||||||
|
GFX9_SGPR_TCS_SAMPLERS_HI,
|
||||||
|
GFX9_SGPR_TCS_IMAGES,
|
||||||
|
GFX9_SGPR_TCS_IMAGES_HI,
|
||||||
|
GFX9_SGPR_TCS_SHADER_BUFFERS,
|
||||||
|
GFX9_SGPR_TCS_SHADER_BUFFERS_HI,
|
||||||
|
GFX9_TCS_NUM_USER_SGPR,
|
||||||
|
|
||||||
/* GS limits */
|
/* GS limits */
|
||||||
SI_GS_NUM_USER_SGPR = SI_NUM_RESOURCE_SGPRS,
|
SI_GS_NUM_USER_SGPR = SI_NUM_RESOURCE_SGPRS,
|
||||||
|
|
|
@ -235,6 +235,14 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
|
||||||
S_00B42C_LDS_SIZE(lds_size);
|
S_00B42C_LDS_SIZE(lds_size);
|
||||||
|
|
||||||
radeon_set_sh_reg(cs, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, hs_rsrc2);
|
radeon_set_sh_reg(cs, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, hs_rsrc2);
|
||||||
|
|
||||||
|
/* Set userdata SGPRs for merged LS-HS. */
|
||||||
|
radeon_set_sh_reg_seq(cs,
|
||||||
|
R_00B430_SPI_SHADER_USER_DATA_LS_0 +
|
||||||
|
GFX9_SGPR_TCS_OFFCHIP_LAYOUT * 4, 3);
|
||||||
|
radeon_emit(cs, offchip_layout);
|
||||||
|
radeon_emit(cs, tcs_out_offsets);
|
||||||
|
radeon_emit(cs, tcs_out_layout | (num_tcs_input_cp << 26));
|
||||||
} else {
|
} else {
|
||||||
unsigned ls_rsrc2 = ls_current->config.rsrc2;
|
unsigned ls_rsrc2 = ls_current->config.rsrc2;
|
||||||
|
|
||||||
|
@ -251,7 +259,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
|
||||||
|
|
||||||
/* Set userdata SGPRs for TCS. */
|
/* Set userdata SGPRs for TCS. */
|
||||||
radeon_set_sh_reg_seq(cs,
|
radeon_set_sh_reg_seq(cs,
|
||||||
R_00B430_SPI_SHADER_USER_DATA_HS_0 + SI_SGPR_TCS_OFFCHIP_LAYOUT * 4, 4);
|
R_00B430_SPI_SHADER_USER_DATA_HS_0 + GFX6_SGPR_TCS_OFFCHIP_LAYOUT * 4, 4);
|
||||||
radeon_emit(cs, offchip_layout);
|
radeon_emit(cs, offchip_layout);
|
||||||
radeon_emit(cs, tcs_out_offsets);
|
radeon_emit(cs, tcs_out_offsets);
|
||||||
radeon_emit(cs, tcs_out_layout | (num_tcs_input_cp << 26));
|
radeon_emit(cs, tcs_out_layout | (num_tcs_input_cp << 26));
|
||||||
|
@ -259,7 +267,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Set userdata SGPRs for TES. */
|
/* Set userdata SGPRs for TES. */
|
||||||
radeon_set_sh_reg_seq(cs, tes_sh_base + SI_SGPR_TCS_OFFCHIP_LAYOUT * 4, 1);
|
radeon_set_sh_reg_seq(cs, tes_sh_base + SI_SGPR_TES_OFFCHIP_LAYOUT * 4, 1);
|
||||||
radeon_emit(cs, offchip_layout);
|
radeon_emit(cs, offchip_layout);
|
||||||
|
|
||||||
ls_hs_config = S_028B58_NUM_PATCHES(*num_patches) |
|
ls_hs_config = S_028B58_NUM_PATCHES(*num_patches) |
|
||||||
|
|
|
@ -492,14 +492,15 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader)
|
||||||
ls_vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 1;
|
ls_vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 1;
|
||||||
|
|
||||||
shader->config.rsrc2 =
|
shader->config.rsrc2 =
|
||||||
S_00B42C_USER_SGPR(SI_TCS_NUM_USER_SGPR) |
|
S_00B42C_USER_SGPR(GFX9_TCS_NUM_USER_SGPR) |
|
||||||
|
S_00B42C_USER_SGPR_MSB(GFX9_TCS_NUM_USER_SGPR >> 5) |
|
||||||
S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
|
S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
|
||||||
} else {
|
} else {
|
||||||
si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8);
|
si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8);
|
||||||
si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, va >> 40);
|
si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, va >> 40);
|
||||||
|
|
||||||
shader->config.rsrc2 =
|
shader->config.rsrc2 =
|
||||||
S_00B42C_USER_SGPR(SI_TCS_NUM_USER_SGPR) |
|
S_00B42C_USER_SGPR(GFX6_TCS_NUM_USER_SGPR) |
|
||||||
S_00B42C_OC_LDS_EN(1) |
|
S_00B42C_OC_LDS_EN(1) |
|
||||||
S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
|
S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue