From 1f4662cc4ed0c5b87479eb71e53a1320ab1b414b Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Fri, 12 Jan 2024 11:49:30 +0100 Subject: [PATCH] radv: move alu MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The stats are decent now that aco has an ILP scheduler Foz-DB Navi31: Totals from 73549 (92.59% of 79439) affected shaders: MaxWaves: 2226952 -> 2229352 (+0.11%); split: +0.21%, -0.10% Instrs: 44690384 -> 44905884 (+0.48%); split: -0.10%, +0.58% CodeSize: 232666088 -> 233474808 (+0.35%); split: -0.10%, +0.45% VGPRs: 2998036 -> 2986936 (-0.37%); split: -0.58%, +0.21% SpillSGPRs: 7176 -> 7170 (-0.08%); split: -0.53%, +0.45% SpillVGPRs: 1124 -> 1068 (-4.98%); split: -5.07%, +0.09% Scratch: 6981632 -> 6977792 (-0.06%) Latency: 297998345 -> 298541597 (+0.18%); split: -0.35%, +0.53% InvThroughput: 49162321 -> 49039572 (-0.25%); split: -0.46%, +0.21% VClause: 881737 -> 884147 (+0.27%); split: -0.35%, +0.62% SClause: 1371928 -> 1373973 (+0.15%); split: -0.78%, +0.92% Copies: 2920492 -> 2927281 (+0.23%); split: -0.84%, +1.08% Branches: 890209 -> 890121 (-0.01%); split: -0.03%, +0.02% PreSGPRs: 2376670 -> 2377251 (+0.02%); split: -0.25%, +0.28% PreVGPRs: 2229634 -> 2208966 (-0.93%); split: -1.04%, +0.11% VALU: 25124040 -> 25127521 (+0.01%); split: -0.07%, +0.08% SALU: 4343167 -> 4361062 (+0.41%); split: -0.23%, +0.65% VMEM: 1582363 -> 1582245 (-0.01%); split: -0.01%, +0.00% VOPD: 8709 -> 8708 (-0.01%); split: +2.35%, -2.37% Reviewed-by: Timur Kristóf Reviewed-by: Daniel Schürmann Reviewed-by: Alyssa Rosenzweig Part-of: --- src/amd/compiler/tests/test_d3d11_derivs.cpp | 9 +++++---- src/amd/vulkan/radv_pipeline.c | 9 +++++++-- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/amd/compiler/tests/test_d3d11_derivs.cpp b/src/amd/compiler/tests/test_d3d11_derivs.cpp index ca2ec72ebbcbb..d48bc55a5d370 100644 --- a/src/amd/compiler/tests/test_d3d11_derivs.cpp +++ b/src/amd/compiler/tests/test_d3d11_derivs.cpp @@ -460,10 +460,10 @@ BEGIN_TEST(d3d11_derivs.cube_array) PipelineBuilder pbld(get_vk_device(GFX10_3)); pbld.add_vsfs(vs, fs); - //>> v1: %layer = v_rndne_f32 (kill)%_ //>> v1: %face = v_cubeid_f32 (kill)%_, (kill)%_, (kill)%_ //>> v1: %x = v_fmaak_f32 (kill)%_, %_, 0x3fc00000 //>> v1: %y = v_fmaak_f32 (kill)%_, (kill)%_, 0x3fc00000 + //>> v1: %layer = v_rndne_f32 (kill)%_ //>> v1: %face_layer = v_fmamk_f32 (kill)%layer, (kill)%face, 0x41000000 //>> lv3: %wqm = p_start_linear_vgpr (kill)%x, (kill)%y, (kill)%face_layer //>> BB1 @@ -473,14 +473,15 @@ BEGIN_TEST(d3d11_derivs.cube_array) //>> p_end_linear_vgpr (latekill)(kill)%wqm pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR"); - //>> v_rndne_f32_e32 v#rl, v#_ ; $_ //>> v_cubeid_f32 v#rf, v#_, v#_, v#_ ; $_ $_ - //>> v_fmamk_f32 v#rlf_tmp, v#rl, 0x41000000, v#rf ; $_ $_ + //>> v_fmaak_f32 v#rx_tmp, v#_, v#_, 0x3fc00000 ; $_ $_ //>> v_fmaak_f32 v#ry_tmp, v#_, v#_, 0x3fc00000 ; $_ $_ //>> v_mov_b32_e32 v#ry, v#ry_tmp ; $_ - //>> v_mov_b32_e32 v#rlf, v#rlf_tmp ; $_ + //>> v_fmamk_f32 v#rlf_tmp, v#rl, 0x41000000, v#rf ; $_ $_ //>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_ + //>> v_mov_b32_e32 v#rlf, v#rlf_tmp ; $_ + //>> BB1: //; success = rx+1 == ry and rx+2 == rlf //>> image_sample v[#_:#_], v[#rx:#rlf], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; $_ $_ diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 5e841089385cf..076a627ceb48c 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -722,9 +722,14 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat sink_opts |= nir_move_comparisons | nir_move_load_ubo | nir_move_load_ssbo | nir_move_alu; NIR_PASS(_, stage->nir, nir_opt_sink, sink_opts); - nir_move_options move_opts = - nir_move_const_undef | nir_move_load_ubo | nir_move_load_input | nir_move_comparisons | nir_move_copies; + nir_move_options move_opts = nir_move_const_undef | nir_move_load_ubo | nir_move_load_input | + nir_move_comparisons | nir_move_copies | nir_move_alu; NIR_PASS(_, stage->nir, nir_opt_move, move_opts); + + /* Run nir_opt_move again to make sure that comparision are as close as possible to the first use to prevent SCC + * spilling. + */ + NIR_PASS(_, stage->nir, nir_opt_move, nir_move_comparisons); } }