From 786d434397c2e3566404b5efe2492cd08e0e6c86 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 3 Dec 2021 13:42:25 +0000 Subject: [PATCH] aco: don't create unnecessary addition in indirect get_sampler_desc() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I don't think this has any effect on GFX9+ because the addition is combined into the load. fossil-db (polaris10): Totals from 12595 (9.29% of 135627) affected shaders: SGPRs: 1054348 -> 1054860 (+0.05%); split: -0.02%, +0.07% VGPRs: 667240 -> 667320 (+0.01%); split: -0.01%, +0.02% CodeSize: 82761508 -> 82512816 (-0.30%); split: -0.30%, +0.00% MaxWaves: 62182 -> 62181 (-0.00%) Instrs: 16072934 -> 16010764 (-0.39%); split: -0.39%, +0.00% Latency: 582819635 -> 582287964 (-0.09%); split: -0.13%, +0.04% InvThroughput: 276460536 -> 276417613 (-0.02%); split: -0.06%, +0.05% VClause: 261656 -> 261654 (-0.00%); split: -0.01%, +0.01% SClause: 680952 -> 680854 (-0.01%); split: -0.05%, +0.04% Copies: 1727202 -> 1727742 (+0.03%); split: -0.12%, +0.15% Branches: 547050 -> 547033 (-0.00%); split: -0.01%, +0.00% Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index ad496f7e714..7b786c85667 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -5865,9 +5865,10 @@ get_sampler_desc(isel_context* ctx, nir_deref_instr* deref_instr, if (!index_set) { off = bld.copy(bld.def(s1), Operand::c32(offset)); } else { - off = Operand( - (Temp)bld.sop2(aco_opcode::s_add_i32, bld.def(s1), bld.def(s1, scc), Operand::c32(offset), - bld.sop2(aco_opcode::s_mul_i32, bld.def(s1), Operand::c32(stride), index))); + off = bld.sop2(aco_opcode::s_mul_i32, bld.def(s1), Operand::c32(stride), index); + if (offset) + off = bld.sop2(aco_opcode::s_add_i32, bld.def(s1), bld.def(s1, scc), Operand::c32(offset), + off); } Temp res = bld.smem(opcode, bld.def(type), list, off);