From 6ebc61d71b853443e7f9f509352656a961b0841a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Tue, 15 Mar 2022 12:15:44 +0100 Subject: [PATCH] aco/ra: create VCC-affinities during RA instead of using register hints. Totals from 88367 (65.50% of 134913) affected shaders: (GFX10.3) CodeSize: 322492184 -> 322252912 (-0.07%); split: -0.08%, +0.01% Instrs: 60615809 -> 60541260 (-0.12%); split: -0.12%, +0.00% Latency: 557067980 -> 557009210 (-0.01%); split: -0.01%, +0.00% InvThroughput: 109676757 -> 109674804 (-0.00%); split: -0.00%, +0.00% SClause: 1939703 -> 1939924 (+0.01%); split: -0.01%, +0.02% Copies: 4557567 -> 4487530 (-1.54%); split: -1.54%, +0.00% Branches: 1941123 -> 1937453 (-0.19%) Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_register_allocation.cpp | 41 +++++++++++++++----- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 0204f934bb3..314955061ff 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -48,7 +48,13 @@ void add_subdword_definition(Program* program, aco_ptr& instr, Phys struct assignment { PhysReg reg; RegClass rc; - bool assigned = false; + union { + struct { + bool assigned : 1; + bool vcc : 1; + }; + uint8_t _ = 0; + }; uint32_t affinity = 0; assignment() = default; assignment(PhysReg reg_, RegClass rc_) : reg(reg_), rc(rc_), assigned(-1) {} @@ -1624,6 +1630,10 @@ get_reg(ra_ctx& ctx, RegisterFile& reg_file, Temp temp, return affinity.reg; } } + if (ctx.assignments[temp.id()].vcc) { + if (get_reg_specified(ctx, reg_file, temp.regClass(), instr, vcc)) + return vcc; + } std::pair res; @@ -2380,11 +2390,26 @@ get_affinities(ra_ctx& ctx, std::vector& live_out_per_block) } else if (instr->format == Format::MIMG && instr->operands.size() > 4) { for (unsigned i = 3; i < instr->operands.size(); i++) ctx.vectors[instr->operands[i].tempId()] = instr.get(); - } - - if (instr->opcode == aco_opcode::p_split_vector && - instr->operands[0].isFirstKillBeforeDef()) + } else if (instr->opcode == aco_opcode::p_split_vector && + instr->operands[0].isFirstKillBeforeDef()) { ctx.split_vectors[instr->operands[0].tempId()] = instr.get(); + } else if (instr->isVOPC() && !instr->isVOP3()) { + ctx.assignments[instr->definitions[0].tempId()].vcc = true; + } else if (instr->isVOP2() && !instr->isVOP3()) { + if (instr->operands.size() == 3 && instr->operands[2].isTemp() && + instr->operands[2].regClass().type() == RegType::sgpr) + ctx.assignments[instr->operands[2].tempId()].vcc = true; + if (instr->definitions.size() == 2) + ctx.assignments[instr->definitions[1].tempId()].vcc = true; + } else if (instr->opcode == aco_opcode::s_and_b32 || + instr->opcode == aco_opcode::s_and_b64) { + /* If SCC is used by a branch, we might be able to use + * s_cbranch_vccz/s_cbranch_vccnz if the operand is VCC. + */ + if (!instr->definitions[1].isKill() && instr->operands[0].isTemp() && + instr->operands[1].isFixed() && instr->operands[1].physReg() == exec) + ctx.assignments[instr->operands[0].tempId()].vcc = true; + } /* add operands to live variables */ for (const Operand& op : instr->operands) { @@ -2760,11 +2785,7 @@ register_allocation(Program* program, std::vector& live_out_per_block, ra continue; /* find free reg */ - if (definition->hasHint() && - get_reg_specified(ctx, register_file, definition->regClass(), instr, - definition->physReg())) { - definition->setFixed(definition->physReg()); - } else if (instr->opcode == aco_opcode::p_split_vector) { + if (instr->opcode == aco_opcode::p_split_vector) { PhysReg reg = instr->operands[0].physReg(); for (unsigned j = 0; j < i; j++) reg.reg_b += instr->definitions[j].bytes();