From 87b4f3daa1f74c5561e57e92a314a59da0d62bb4 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Thu, 19 May 2022 15:50:30 +0200 Subject: [PATCH] aco/ra: Move mac encoding optimization to its own function. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Georg Lehmann Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_register_allocation.cpp | 114 ++++++++++--------- 1 file changed, 60 insertions(+), 54 deletions(-) diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 922c5b849e5..32ddca431b0 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -2541,6 +2541,65 @@ get_affinities(ra_ctx& ctx, std::vector& live_out_per_block) } } +void +optimize_encoding_vop2(Program* program, ra_ctx& ctx, RegisterFile& register_file, + aco_ptr& instr) +{ + /* try to optimize v_mad_f32 -> v_mac_f32 */ + if ((instr->opcode != aco_opcode::v_mad_f32 && + (instr->opcode != aco_opcode::v_fma_f32 || program->gfx_level < GFX10) && + instr->opcode != aco_opcode::v_mad_f16 && instr->opcode != aco_opcode::v_mad_legacy_f16 && + (instr->opcode != aco_opcode::v_fma_f16 || program->gfx_level < GFX10) && + (instr->opcode != aco_opcode::v_pk_fma_f16 || program->gfx_level < GFX10) && + (instr->opcode != aco_opcode::v_mad_legacy_f32 || !program->dev.has_mac_legacy32) && + (instr->opcode != aco_opcode::v_fma_legacy_f32 || !program->dev.has_mac_legacy32) && + (instr->opcode != aco_opcode::v_dot4_i32_i8 || program->family == CHIP_VEGA20)) || + !instr->operands[2].isTemp() || !instr->operands[2].isKillBeforeDef() || + instr->operands[2].getTemp().type() != RegType::vgpr || + ((!instr->operands[0].isTemp() || instr->operands[0].getTemp().type() != RegType::vgpr) && + (!instr->operands[1].isTemp() || instr->operands[1].getTemp().type() != RegType::vgpr)) || + instr->usesModifiers() || instr->operands[0].physReg().byte() != 0 || + instr->operands[1].physReg().byte() != 0 || instr->operands[2].physReg().byte() != 0) + return; + + if (!instr->operands[1].isTemp() || instr->operands[1].getTemp().type() != RegType::vgpr) + std::swap(instr->operands[0], instr->operands[1]); + + unsigned def_id = instr->definitions[0].tempId(); + if (ctx.assignments[def_id].affinity) { + assignment& affinity = ctx.assignments[ctx.assignments[def_id].affinity]; + if (affinity.assigned && affinity.reg != instr->operands[2].physReg() && + !register_file.test(affinity.reg, instr->operands[2].bytes())) + return; + } + + static_assert(sizeof(VOP2_instruction) <= sizeof(VOP3_instruction), + "Invalid direct instruction cast."); + static_assert(sizeof(VOP2_instruction) <= sizeof(VOP3P_instruction), + "Invalid direct instruction cast."); + instr->format = Format::VOP2; + switch (instr->opcode) { + case aco_opcode::v_mad_f32: instr->opcode = aco_opcode::v_mac_f32; break; + case aco_opcode::v_fma_f32: instr->opcode = aco_opcode::v_fmac_f32; break; + case aco_opcode::v_mad_f16: + case aco_opcode::v_mad_legacy_f16: instr->opcode = aco_opcode::v_mac_f16; break; + case aco_opcode::v_fma_f16: instr->opcode = aco_opcode::v_fmac_f16; break; + case aco_opcode::v_pk_fma_f16: instr->opcode = aco_opcode::v_pk_fmac_f16; break; + case aco_opcode::v_dot4_i32_i8: instr->opcode = aco_opcode::v_dot4c_i32_i8; break; + case aco_opcode::v_mad_legacy_f32: instr->opcode = aco_opcode::v_mac_legacy_f32; break; + case aco_opcode::v_fma_legacy_f32: instr->opcode = aco_opcode::v_fmac_legacy_f32; break; + default: break; + } +} + +void +optimize_encoding(Program* program, ra_ctx& ctx, RegisterFile& register_file, + aco_ptr& instr) +{ + if (instr->isVALU()) + optimize_encoding_vop2(program, ctx, register_file, instr); +} + } /* end namespace */ void @@ -2664,60 +2723,7 @@ register_allocation(Program* program, std::vector& live_out_per_block, ra register_file.clear(op); } - /* try to optimize v_mad_f32 -> v_mac_f32 */ - if ((instr->opcode == aco_opcode::v_mad_f32 || - (instr->opcode == aco_opcode::v_fma_f32 && program->gfx_level >= GFX10) || - instr->opcode == aco_opcode::v_mad_f16 || - instr->opcode == aco_opcode::v_mad_legacy_f16 || - (instr->opcode == aco_opcode::v_fma_f16 && program->gfx_level >= GFX10) || - (instr->opcode == aco_opcode::v_pk_fma_f16 && program->gfx_level >= GFX10) || - (instr->opcode == aco_opcode::v_mad_legacy_f32 && program->dev.has_mac_legacy32) || - (instr->opcode == aco_opcode::v_fma_legacy_f32 && program->dev.has_mac_legacy32) || - (instr->opcode == aco_opcode::v_dot4_i32_i8 && program->family != CHIP_VEGA20)) && - instr->operands[2].isTemp() && instr->operands[2].isKillBeforeDef() && - instr->operands[2].getTemp().type() == RegType::vgpr && - ((instr->operands[0].isTemp() && - instr->operands[0].getTemp().type() == RegType::vgpr) || - (instr->operands[1].isTemp() && - instr->operands[1].getTemp().type() == RegType::vgpr)) && - !instr->usesModifiers() && instr->operands[0].physReg().byte() == 0 && - instr->operands[1].physReg().byte() == 0 && instr->operands[2].physReg().byte() == 0) { - if (!instr->operands[1].isTemp() || - instr->operands[1].getTemp().type() != RegType::vgpr) - std::swap(instr->operands[0], instr->operands[1]); - - unsigned def_id = instr->definitions[0].tempId(); - bool use_vop2 = true; - if (ctx.assignments[def_id].affinity) { - assignment& affinity = ctx.assignments[ctx.assignments[def_id].affinity]; - if (affinity.assigned && affinity.reg != instr->operands[2].physReg() && - !register_file.test(affinity.reg, instr->operands[2].bytes())) - use_vop2 = false; - } - if (use_vop2) { - static_assert(sizeof(VOP2_instruction) <= sizeof(VOP3_instruction), - "Invalid direct instruction cast."); - static_assert(sizeof(VOP2_instruction) <= sizeof(VOP3P_instruction), - "Invalid direct instruction cast."); - instr->format = Format::VOP2; - switch (instr->opcode) { - case aco_opcode::v_mad_f32: instr->opcode = aco_opcode::v_mac_f32; break; - case aco_opcode::v_fma_f32: instr->opcode = aco_opcode::v_fmac_f32; break; - case aco_opcode::v_mad_f16: - case aco_opcode::v_mad_legacy_f16: instr->opcode = aco_opcode::v_mac_f16; break; - case aco_opcode::v_fma_f16: instr->opcode = aco_opcode::v_fmac_f16; break; - case aco_opcode::v_pk_fma_f16: instr->opcode = aco_opcode::v_pk_fmac_f16; break; - case aco_opcode::v_dot4_i32_i8: instr->opcode = aco_opcode::v_dot4c_i32_i8; break; - case aco_opcode::v_mad_legacy_f32: - instr->opcode = aco_opcode::v_mac_legacy_f32; - break; - case aco_opcode::v_fma_legacy_f32: - instr->opcode = aco_opcode::v_fmac_legacy_f32; - break; - default: break; - } - } - } + optimize_encoding(program, ctx, register_file, instr); /* Handle definitions which must have the same register as an operand. * We expect that the definition has the same size as the operand, otherwise the new