radeonsi: move 2nd-shader descriptor pointers into s[0:1]

If 32-bit pointers are supported, both pointers can be moved into s[0:1]
and then ESGS has exactly the same user data SGPR declarations as VS.

If 32-bit pointers are not supported, only one pointer can be moved into
s[0:1]. In that case, the 2nd pointer is moved before TCS constants,
so that the location is the same in HS and GS.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
This commit is contained in:
Marek Olšák 2018-02-02 21:35:20 +01:00
parent 1d1df76d2b
commit 190e064e63
3 changed files with 140 additions and 74 deletions

View File

@ -2053,6 +2053,24 @@ static void si_emit_consecutive_shader_pointers(struct si_context *sctx,
}
}
static void si_emit_disjoint_shader_pointers(struct si_context *sctx,
unsigned pointer_mask,
unsigned sh_base)
{
if (!sh_base)
return;
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
unsigned mask = sctx->shader_pointers_dirty & pointer_mask;
while (mask) {
struct si_descriptors *descs = &sctx->descriptors[u_bit_scan(&mask)];
si_emit_shader_pointer_head(cs, descs, sh_base, 1);
si_emit_shader_pointer_body(sctx->screen, cs, descs);
}
}
static void si_emit_global_shader_pointers(struct si_context *sctx,
struct si_descriptors *descs)
{
@ -2089,14 +2107,21 @@ void si_emit_graphics_shader_pointers(struct si_context *sctx,
si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(VERTEX),
sh_base[PIPE_SHADER_VERTEX]);
si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_CTRL),
sh_base[PIPE_SHADER_TESS_CTRL]);
si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_EVAL),
sh_base[PIPE_SHADER_TESS_EVAL]);
si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(GEOMETRY),
sh_base[PIPE_SHADER_GEOMETRY]);
si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(FRAGMENT),
sh_base[PIPE_SHADER_FRAGMENT]);
if (HAVE_32BIT_POINTERS || sctx->b.chip_class <= VI) {
si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_CTRL),
sh_base[PIPE_SHADER_TESS_CTRL]);
si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(GEOMETRY),
sh_base[PIPE_SHADER_GEOMETRY]);
} else {
si_emit_disjoint_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_CTRL),
sh_base[PIPE_SHADER_TESS_CTRL]);
si_emit_disjoint_shader_pointers(sctx, SI_DESCS_SHADER_MASK(GEOMETRY),
sh_base[PIPE_SHADER_GEOMETRY]);
}
sctx->shader_pointers_dirty &=
~u_bit_consecutive(SI_DESCS_RW_BUFFERS, SI_DESCS_FIRST_COMPUTE);
@ -2572,40 +2597,56 @@ void si_init_all_descriptors(struct si_context *sctx)
int i;
#if !HAVE_32BIT_POINTERS
STATIC_ASSERT(GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS % 2 == 0);
STATIC_ASSERT(GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS % 2 == 0);
STATIC_ASSERT(GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES % 2 == 0);
#endif
for (i = 0; i < SI_NUM_SHADERS; i++) {
bool gfx9_tcs = false;
bool gfx9_gs = false;
bool is_2nd = sctx->b.chip_class >= GFX9 &&
(i == PIPE_SHADER_TESS_CTRL ||
i == PIPE_SHADER_GEOMETRY);
unsigned num_sampler_slots = SI_NUM_IMAGES / 2 + SI_NUM_SAMPLERS;
unsigned num_buffer_slots = SI_NUM_SHADER_BUFFERS + SI_NUM_CONST_BUFFERS;
int rel_dw_offset;
struct si_descriptors *desc;
if (sctx->b.chip_class >= GFX9) {
gfx9_tcs = i == PIPE_SHADER_TESS_CTRL;
gfx9_gs = i == PIPE_SHADER_GEOMETRY;
if (is_2nd) {
if (i == PIPE_SHADER_TESS_CTRL) {
rel_dw_offset = (R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS -
R_00B430_SPI_SHADER_USER_DATA_LS_0) / 4;
} else { /* PIPE_SHADER_GEOMETRY */
rel_dw_offset = (R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS -
R_00B330_SPI_SHADER_USER_DATA_ES_0) / 4;
}
} else {
rel_dw_offset = SI_SGPR_CONST_AND_SHADER_BUFFERS;
}
desc = si_const_and_shader_buffer_descriptors(sctx, i);
si_init_buffer_resources(&sctx->const_and_shader_buffers[i], desc,
num_buffer_slots,
gfx9_tcs ? GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS :
gfx9_gs ? GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS :
SI_SGPR_CONST_AND_SHADER_BUFFERS,
num_buffer_slots, rel_dw_offset,
RADEON_USAGE_READWRITE,
RADEON_USAGE_READ,
RADEON_PRIO_SHADER_RW_BUFFER,
RADEON_PRIO_CONST_BUFFER);
desc->slot_index_to_bind_directly = si_get_constbuf_slot(0);
if (is_2nd) {
#if HAVE_32BIT_POINTERS
if (i == PIPE_SHADER_TESS_CTRL) {
rel_dw_offset = (R_00B40C_SPI_SHADER_USER_DATA_ADDR_HI_HS -
R_00B430_SPI_SHADER_USER_DATA_LS_0) / 4;
} else { /* PIPE_SHADER_GEOMETRY */
rel_dw_offset = (R_00B20C_SPI_SHADER_USER_DATA_ADDR_HI_GS -
R_00B330_SPI_SHADER_USER_DATA_ES_0) / 4;
}
#else
rel_dw_offset = GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES;
#endif
} else {
rel_dw_offset = SI_SGPR_SAMPLERS_AND_IMAGES;
}
desc = si_sampler_and_image_descriptors(sctx, i);
si_init_descriptors(desc,
gfx9_tcs ? GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES :
gfx9_gs ? GFX9_SGPR_GS_SAMPLERS_AND_IMAGES :
SI_SGPR_SAMPLERS_AND_IMAGES,
16, num_sampler_slots);
si_init_descriptors(desc, rel_dw_offset, 16, num_sampler_slots);
int j;
for (j = 0; j < SI_NUM_IMAGES; j++)

View File

@ -3351,6 +3351,9 @@ static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx)
{
LLVMValueRef ret = ctx->return_value;
ret = si_insert_input_ptr(ctx, ret, 0, 0);
if (HAVE_32BIT_POINTERS)
ret = si_insert_input_ptr(ctx, ret, 1, 1);
ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_offset, 2);
ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, 3);
ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_factor_offset, 4);
@ -3364,6 +3367,12 @@ static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx)
ret = si_insert_input_ret(ctx, ret, ctx->param_vs_state_bits,
8 + SI_SGPR_VS_STATE_BITS);
#if !HAVE_32BIT_POINTERS
ret = si_insert_input_ptr(ctx, ret, ctx->param_vs_state_bits + 1,
8 + GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES);
#endif
ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_layout,
8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT);
ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_out_lds_offsets,
@ -3375,13 +3384,6 @@ static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx)
ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_factor_addr_base64k,
8 + GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K);
unsigned desc_param = ctx->param_tcs_factor_addr_base64k +
(HAVE_32BIT_POINTERS ? 1 : 2);
ret = si_insert_input_ptr(ctx, ret, desc_param,
8 + GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS);
ret = si_insert_input_ptr(ctx, ret, desc_param + 1,
8 + GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES);
unsigned vgpr = 8 + GFX9_TCS_NUM_USER_SGPR;
ret = LLVMBuildInsertValue(ctx->ac.builder, ret,
ac_to_float(&ctx->ac, ctx->abi.tcs_patch_id),
@ -3397,6 +3399,9 @@ static void si_set_es_return_value_for_gs(struct si_shader_context *ctx)
{
LLVMValueRef ret = ctx->return_value;
ret = si_insert_input_ptr(ctx, ret, 0, 0);
if (HAVE_32BIT_POINTERS)
ret = si_insert_input_ptr(ctx, ret, 1, 1);
ret = si_insert_input_ret(ctx, ret, ctx->param_gs2vs_offset, 2);
ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, 3);
ret = si_insert_input_ret(ctx, ret, ctx->param_merged_scratch_offset, 5);
@ -3407,11 +3412,10 @@ static void si_set_es_return_value_for_gs(struct si_shader_context *ctx)
ctx->param_bindless_samplers_and_images,
8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES);
unsigned desc_param = ctx->param_vs_state_bits + 1;
ret = si_insert_input_ptr(ctx, ret, desc_param,
8 + GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS);
ret = si_insert_input_ptr(ctx, ret, desc_param + 1,
8 + GFX9_SGPR_GS_SAMPLERS_AND_IMAGES);
#if !HAVE_32BIT_POINTERS
ret = si_insert_input_ptr(ctx, ret, ctx->param_vs_state_bits + 1,
8 + GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES);
#endif
unsigned vgpr = 8 + GFX9_GS_NUM_USER_SGPR;
for (unsigned i = 0; i < 5; i++) {
@ -4489,9 +4493,9 @@ static unsigned si_get_max_workgroup_size(const struct si_shader *shader)
return max_work_group_size;
}
static void declare_per_stage_desc_pointers(struct si_shader_context *ctx,
struct si_function_info *fninfo,
bool assign_params)
static void declare_const_and_shader_buffers(struct si_shader_context *ctx,
struct si_function_info *fninfo,
bool assign_params)
{
LLVMTypeRef const_shader_buf_type;
@ -4505,14 +4509,28 @@ static void declare_per_stage_desc_pointers(struct si_shader_context *ctx,
add_arg(fninfo, ARG_SGPR,
ac_array_in_const32_addr_space(const_shader_buf_type));
if (assign_params)
ctx->param_const_and_shader_buffers = const_and_shader_buffers;
}
static void declare_samplers_and_images(struct si_shader_context *ctx,
struct si_function_info *fninfo,
bool assign_params)
{
unsigned samplers_and_images =
add_arg(fninfo, ARG_SGPR,
ac_array_in_const32_addr_space(ctx->v8i32));
if (assign_params) {
ctx->param_const_and_shader_buffers = const_and_shader_buffers;
if (assign_params)
ctx->param_samplers_and_images = samplers_and_images;
}
}
static void declare_per_stage_desc_pointers(struct si_shader_context *ctx,
struct si_function_info *fninfo,
bool assign_params)
{
declare_const_and_shader_buffers(ctx, fninfo, assign_params);
declare_samplers_and_images(ctx, fninfo, assign_params);
}
static void declare_global_desc_pointers(struct si_shader_context *ctx,
@ -4677,8 +4695,14 @@ static void create_function(struct si_shader_context *ctx)
case SI_SHADER_MERGED_VERTEX_TESSCTRL:
/* Merged stages have 8 system SGPRs at the beginning. */
add_arg(&fninfo, ARG_SGPR, ctx->i32); /* SPI_SHADER_USER_DATA_ADDR_LO_HS */
add_arg(&fninfo, ARG_SGPR, ctx->i32); /* SPI_SHADER_USER_DATA_ADDR_HI_HS */
/* SPI_SHADER_USER_DATA_ADDR_LO/HI_HS */
if (HAVE_32BIT_POINTERS) {
declare_per_stage_desc_pointers(ctx, &fninfo,
ctx->type == PIPE_SHADER_TESS_CTRL);
} else {
declare_const_and_shader_buffers(ctx, &fninfo,
ctx->type == PIPE_SHADER_TESS_CTRL);
}
ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_merged_wave_info = add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_tcs_factor_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
@ -4691,16 +4715,15 @@ static void create_function(struct si_shader_context *ctx)
ctx->type == PIPE_SHADER_VERTEX);
declare_vs_specific_input_sgprs(ctx, &fninfo);
if (!HAVE_32BIT_POINTERS) {
declare_samplers_and_images(ctx, &fninfo,
ctx->type == PIPE_SHADER_TESS_CTRL);
}
ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_tcs_out_lds_offsets = add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_tcs_out_lds_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_tcs_factor_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32);
if (!HAVE_32BIT_POINTERS)
add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
declare_per_stage_desc_pointers(ctx, &fninfo,
ctx->type == PIPE_SHADER_TESS_CTRL);
/* VGPRs (first TCS, then VS) */
add_arg_assign(&fninfo, ARG_VGPR, ctx->i32, &ctx->abi.tcs_patch_id);
@ -4731,8 +4754,14 @@ static void create_function(struct si_shader_context *ctx)
case SI_SHADER_MERGED_VERTEX_OR_TESSEVAL_GEOMETRY:
/* Merged stages have 8 system SGPRs at the beginning. */
add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused (SPI_SHADER_USER_DATA_ADDR_LO_GS) */
add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused (SPI_SHADER_USER_DATA_ADDR_HI_GS) */
/* SPI_SHADER_USER_DATA_ADDR_LO/HI_GS */
if (HAVE_32BIT_POINTERS) {
declare_per_stage_desc_pointers(ctx, &fninfo,
ctx->type == PIPE_SHADER_GEOMETRY);
} else {
declare_const_and_shader_buffers(ctx, &fninfo,
ctx->type == PIPE_SHADER_GEOMETRY);
}
ctx->param_gs2vs_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_merged_wave_info = add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
@ -4758,8 +4787,10 @@ static void create_function(struct si_shader_context *ctx)
ctx->param_vs_state_bits = add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
}
declare_per_stage_desc_pointers(ctx, &fninfo,
ctx->type == PIPE_SHADER_GEOMETRY);
if (!HAVE_32BIT_POINTERS) {
declare_samplers_and_images(ctx, &fninfo,
ctx->type == PIPE_SHADER_GEOMETRY);
}
/* VGPRs (first GS, then VS/TES) */
ctx->param_gs_vtx01_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32);
@ -7266,7 +7297,8 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx,
si_init_function_info(&fninfo);
if (ctx->screen->info.chip_class >= GFX9) {
add_arg(&fninfo, ARG_SGPR, ctx->i64);
add_arg(&fninfo, ARG_SGPR, ctx->i32);
add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
add_arg(&fninfo, ARG_SGPR, ctx->i32); /* wave info */
ctx->param_tcs_factor_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
@ -7282,6 +7314,8 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx,
add_arg(&fninfo, ARG_SGPR, ctx->i32);
add_arg(&fninfo, ARG_SGPR, ctx->i32);
add_arg(&fninfo, ARG_SGPR, ctx->i32);
if (!HAVE_32BIT_POINTERS)
add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr);
ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
add_arg(&fninfo, ARG_SGPR, ctx->i32);
add_arg(&fninfo, ARG_SGPR, ctx->i32);

View File

@ -195,38 +195,29 @@ enum {
GFX6_SGPR_TCS_FACTOR_ADDR_BASE64K,
GFX6_TCS_NUM_USER_SGPR,
/* GFX9: Merged shaders. */
#if HAVE_32BIT_POINTERS
/* 2ND_CONST_AND_SHADER_BUFFERS is set in USER_DATA_ADDR_LO (SGPR0). */
/* 2ND_SAMPLERS_AND_IMAGES is set in USER_DATA_ADDR_HI (SGPR1). */
GFX9_MERGED_NUM_USER_SGPR = SI_VS_NUM_USER_SGPR,
#else
/* 2ND_CONST_AND_SHADER_BUFFERS is set in USER_DATA_ADDR_LO/HI (SGPR[0:1]). */
GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES = SI_VS_NUM_USER_SGPR,
GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES_HI,
GFX9_MERGED_NUM_USER_SGPR,
#endif
/* GFX9: Merged LS-HS (VS-TCS) only. */
GFX9_SGPR_TCS_OFFCHIP_LAYOUT = SI_VS_NUM_USER_SGPR,
GFX9_SGPR_TCS_OFFCHIP_LAYOUT = GFX9_MERGED_NUM_USER_SGPR,
GFX9_SGPR_TCS_OUT_OFFSETS,
GFX9_SGPR_TCS_OUT_LAYOUT,
GFX9_SGPR_TCS_OFFCHIP_ADDR_BASE64K,
GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K,
#if !HAVE_32BIT_POINTERS
GFX9_SGPR_unused_to_align_the_next_pointer,
#endif
GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS,
#if !HAVE_32BIT_POINTERS
GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS_HI,
#endif
GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES,
#if !HAVE_32BIT_POINTERS
GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES_HI,
#endif
GFX9_TCS_NUM_USER_SGPR,
/* GFX9: Merged ES-GS (VS-GS or TES-GS). */
GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS = SI_VS_NUM_USER_SGPR,
#if !HAVE_32BIT_POINTERS
GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS_HI,
#endif
GFX9_SGPR_GS_SAMPLERS_AND_IMAGES,
#if !HAVE_32BIT_POINTERS
GFX9_SGPR_GS_SAMPLERS_AND_IMAGES_HI,
#endif
GFX9_GS_NUM_USER_SGPR,
/* GS limits */
GFX6_GS_NUM_USER_SGPR = SI_NUM_RESOURCE_SGPRS,
GFX9_GS_NUM_USER_SGPR = GFX9_MERGED_NUM_USER_SGPR,
SI_GSCOPY_NUM_USER_SGPR = SI_SGPR_RW_BUFFERS + (HAVE_32BIT_POINTERS ? 1 : 2),
/* PS only */