From df3e674fb3e627223782b1bd1f4748a3544735c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Thu, 5 Dec 2019 19:17:52 +0100 Subject: [PATCH] aco: improve readfirstlane after uniform ssbo loads on GFX7 pipeline-db changes for GFX7: 80310 shaders in 40472 tests Totals: SGPRS: 3655900 -> 3643916 (-0.33 %) VGPRS: 2678324 -> 2686324 (0.30 %) Spilled SGPRs: 1730 -> 1634 (-5.55 %) Spilled VGPRs: 14 -> 21 (50.00 %) Scratch size: 15540 -> 15536 (-0.03 %) dwords per thread Code Size: 136106120 -> 135457616 (-0.48 %) bytes LDS: 1259 -> 1259 (0.00 %) blocks Max Waves: 601014 -> 600206 (-0.13 %) Totals from affected shaders: SGPRS: 307832 -> 295848 (-3.89 %) VGPRS: 267864 -> 275864 (2.99 %) Spilled SGPRs: 770 -> 674 (-12.47 %) Spilled VGPRs: 14 -> 21 (50.00 %) Scratch size: 16 -> 12 (-25.00 %) dwords per thread Code Size: 22007488 -> 21358984 (-2.95 %) bytes LDS: 65 -> 65 (0.00 %) blocks Max Waves: 28668 -> 27860 (-2.82 %) Reviewed-by: Rhys Perry --- src/amd/compiler/aco_instruction_selection.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 1cff595659c..b92d7c0eb5f 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -3447,10 +3447,11 @@ void load_buffer(isel_context *ctx, unsigned num_components, Temp dst, Temp vec = bld.tmp(RegType::vgpr, dst.size()); instr->definitions[0] = Definition(vec); bld.insert(std::move(instr)); - bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), vec); + expand_vector(ctx, vec, dst, num_components, (1 << num_components) - 1); } else { instr->definitions[0] = Definition(dst); bld.insert(std::move(instr)); + emit_split_vector(ctx, dst, num_components); } } else { switch (num_bytes) { @@ -3506,9 +3507,8 @@ void load_buffer(isel_context *ctx, unsigned num_components, Temp dst, } else { bld.insert(std::move(load)); } - + emit_split_vector(ctx, dst, num_components); } - emit_split_vector(ctx, dst, num_components); } void visit_load_ubo(isel_context *ctx, nir_intrinsic_instr *instr)