From d00845faf4f4a2fd46c5efc40a2b8cee32243265 Mon Sep 17 00:00:00 2001 From: Qiang Yu Date: Wed, 1 Jun 2022 17:13:00 +0800 Subject: [PATCH] ac/nir: add no_input_lds_space param to hs output lower MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is used by radeonsi to save some lds space when all LS output is passed by register. Reviewed-by: Marek Olšák Acked-by: Pierre-Eric Pelloux-Prayer Signed-off-by: Qiang Yu Part-of: --- src/amd/common/ac_nir.h | 1 + src/amd/common/ac_nir_lower_tess_io_to_mem.c | 25 +++++++++++++++----- src/amd/vulkan/radv_shader.c | 3 ++- 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/src/amd/common/ac_nir.h b/src/amd/common/ac_nir.h index 12021b66628..08f55470d76 100644 --- a/src/amd/common/ac_nir.h +++ b/src/amd/common/ac_nir.h @@ -90,6 +90,7 @@ ac_nir_lower_hs_outputs_to_mem(nir_shader *shader, unsigned num_reserved_tcs_outputs, unsigned num_reserved_tcs_patch_outputs, unsigned wave_size, + bool no_inputs_in_lds, bool pass_tessfactors_by_reg, bool emit_tess_factor_write); diff --git a/src/amd/common/ac_nir_lower_tess_io_to_mem.c b/src/amd/common/ac_nir_lower_tess_io_to_mem.c index d2ce7c11479..e14ddc3b06e 100644 --- a/src/amd/common/ac_nir_lower_tess_io_to_mem.c +++ b/src/amd/common/ac_nir_lower_tess_io_to_mem.c @@ -159,6 +159,11 @@ typedef struct { * can be passed by register. */ bool tcs_pass_tessfactors_by_reg; + + /* Whether all TCS inputs are accessed using gl_InvocationID and passed via VGPRs. + * In that case, no LDS is allocated for TCS inputs. + */ + bool tcs_no_inputs_in_lds; } lower_tess_io_state; static bool @@ -325,18 +330,24 @@ hs_output_lds_offset(nir_builder *b, unsigned pervertex_output_patch_size = b->shader->info.tess.tcs_vertices_out * output_vertex_size; unsigned output_patch_stride = pervertex_output_patch_size + st->tcs_num_reserved_patch_outputs * 16u; - nir_ssa_def *tcs_in_vtxcnt = nir_load_patch_vertices_in(b); - nir_ssa_def *tcs_num_patches = nir_load_tcs_num_patches_amd(b); - nir_ssa_def *input_patch_size = nir_imul(b, tcs_in_vtxcnt, nir_load_lshs_vertex_stride_amd(b)); - nir_ssa_def *output_patch0_offset = nir_imul(b, input_patch_size, tcs_num_patches); - nir_ssa_def *off = intrin ? ac_nir_calc_io_offset(b, intrin, nir_imm_int(b, 16u), 4u, st->map_io) : nir_imm_int(b, 0); nir_ssa_def *rel_patch_id = nir_load_tess_rel_patch_id_amd(b); nir_ssa_def *patch_offset = nir_imul_imm(b, rel_patch_id, output_patch_stride); - nir_ssa_def *output_patch_offset = nir_iadd_nuw(b, patch_offset, output_patch0_offset); + + nir_ssa_def *output_patch_offset; + if (st->tcs_no_inputs_in_lds) + output_patch_offset = patch_offset; + else { + nir_ssa_def *tcs_in_vtxcnt = nir_load_patch_vertices_in(b); + nir_ssa_def *tcs_num_patches = nir_load_tcs_num_patches_amd(b); + nir_ssa_def *input_patch_size = + nir_imul(b, tcs_in_vtxcnt, nir_load_lshs_vertex_stride_amd(b)); + nir_ssa_def *output_patch0_offset = nir_imul(b, input_patch_size, tcs_num_patches); + output_patch_offset = nir_iadd_nuw(b, patch_offset, output_patch0_offset); + } if (per_vertex) { nir_ssa_def *vertex_index = nir_ssa_for_src(b, *nir_get_io_arrayed_index_src(intrin), 1); @@ -706,6 +717,7 @@ ac_nir_lower_hs_outputs_to_mem(nir_shader *shader, unsigned num_reserved_tcs_outputs, unsigned num_reserved_tcs_patch_outputs, unsigned wave_size, + bool no_inputs_in_lds, bool pass_tessfactors_by_reg, bool emit_tess_factor_write) { @@ -720,6 +732,7 @@ ac_nir_lower_hs_outputs_to_mem(nir_shader *shader, .tcs_num_reserved_patch_outputs = num_reserved_tcs_patch_outputs, .tcs_out_patch_fits_subgroup = wave_size % shader->info.tess.tcs_vertices_out == 0, .tcs_pass_tessfactors_by_reg = pass_tessfactors_by_reg, + .tcs_no_inputs_in_lds = no_inputs_in_lds, .map_io = map, }; diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index c2650bde38a..ca3f112385d 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -1124,7 +1124,8 @@ radv_lower_io_to_mem(struct radv_device *device, struct radv_pipeline_stage *sta device->physical_device->rad_info.gfx_level, info->tcs.tes_reads_tess_factors, info->tcs.tes_inputs_read, info->tcs.tes_patch_inputs_read, info->tcs.num_linked_outputs, - info->tcs.num_linked_patch_outputs, info->wave_size, false, true); + info->tcs.num_linked_patch_outputs, info->wave_size, + false, false, true); return true; } else if (nir->info.stage == MESA_SHADER_TESS_EVAL) {