aco/ra: Move mac encoding optimization to its own function.

Signed-off-by: Georg Lehmann <dadschoorse@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15999>
This commit is contained in:
Georg Lehmann 2022-05-19 15:50:30 +02:00 committed by Marge Bot
parent c9490436b6
commit 87b4f3daa1
1 changed files with 60 additions and 54 deletions

View File

@ -2541,6 +2541,65 @@ get_affinities(ra_ctx& ctx, std::vector<IDSet>& live_out_per_block)
}
}
void
optimize_encoding_vop2(Program* program, ra_ctx& ctx, RegisterFile& register_file,
aco_ptr<Instruction>& instr)
{
/* try to optimize v_mad_f32 -> v_mac_f32 */
if ((instr->opcode != aco_opcode::v_mad_f32 &&
(instr->opcode != aco_opcode::v_fma_f32 || program->gfx_level < GFX10) &&
instr->opcode != aco_opcode::v_mad_f16 && instr->opcode != aco_opcode::v_mad_legacy_f16 &&
(instr->opcode != aco_opcode::v_fma_f16 || program->gfx_level < GFX10) &&
(instr->opcode != aco_opcode::v_pk_fma_f16 || program->gfx_level < GFX10) &&
(instr->opcode != aco_opcode::v_mad_legacy_f32 || !program->dev.has_mac_legacy32) &&
(instr->opcode != aco_opcode::v_fma_legacy_f32 || !program->dev.has_mac_legacy32) &&
(instr->opcode != aco_opcode::v_dot4_i32_i8 || program->family == CHIP_VEGA20)) ||
!instr->operands[2].isTemp() || !instr->operands[2].isKillBeforeDef() ||
instr->operands[2].getTemp().type() != RegType::vgpr ||
((!instr->operands[0].isTemp() || instr->operands[0].getTemp().type() != RegType::vgpr) &&
(!instr->operands[1].isTemp() || instr->operands[1].getTemp().type() != RegType::vgpr)) ||
instr->usesModifiers() || instr->operands[0].physReg().byte() != 0 ||
instr->operands[1].physReg().byte() != 0 || instr->operands[2].physReg().byte() != 0)
return;
if (!instr->operands[1].isTemp() || instr->operands[1].getTemp().type() != RegType::vgpr)
std::swap(instr->operands[0], instr->operands[1]);
unsigned def_id = instr->definitions[0].tempId();
if (ctx.assignments[def_id].affinity) {
assignment& affinity = ctx.assignments[ctx.assignments[def_id].affinity];
if (affinity.assigned && affinity.reg != instr->operands[2].physReg() &&
!register_file.test(affinity.reg, instr->operands[2].bytes()))
return;
}
static_assert(sizeof(VOP2_instruction) <= sizeof(VOP3_instruction),
"Invalid direct instruction cast.");
static_assert(sizeof(VOP2_instruction) <= sizeof(VOP3P_instruction),
"Invalid direct instruction cast.");
instr->format = Format::VOP2;
switch (instr->opcode) {
case aco_opcode::v_mad_f32: instr->opcode = aco_opcode::v_mac_f32; break;
case aco_opcode::v_fma_f32: instr->opcode = aco_opcode::v_fmac_f32; break;
case aco_opcode::v_mad_f16:
case aco_opcode::v_mad_legacy_f16: instr->opcode = aco_opcode::v_mac_f16; break;
case aco_opcode::v_fma_f16: instr->opcode = aco_opcode::v_fmac_f16; break;
case aco_opcode::v_pk_fma_f16: instr->opcode = aco_opcode::v_pk_fmac_f16; break;
case aco_opcode::v_dot4_i32_i8: instr->opcode = aco_opcode::v_dot4c_i32_i8; break;
case aco_opcode::v_mad_legacy_f32: instr->opcode = aco_opcode::v_mac_legacy_f32; break;
case aco_opcode::v_fma_legacy_f32: instr->opcode = aco_opcode::v_fmac_legacy_f32; break;
default: break;
}
}
void
optimize_encoding(Program* program, ra_ctx& ctx, RegisterFile& register_file,
aco_ptr<Instruction>& instr)
{
if (instr->isVALU())
optimize_encoding_vop2(program, ctx, register_file, instr);
}
} /* end namespace */
void
@ -2664,60 +2723,7 @@ register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, ra
register_file.clear(op);
}
/* try to optimize v_mad_f32 -> v_mac_f32 */
if ((instr->opcode == aco_opcode::v_mad_f32 ||
(instr->opcode == aco_opcode::v_fma_f32 && program->gfx_level >= GFX10) ||
instr->opcode == aco_opcode::v_mad_f16 ||
instr->opcode == aco_opcode::v_mad_legacy_f16 ||
(instr->opcode == aco_opcode::v_fma_f16 && program->gfx_level >= GFX10) ||
(instr->opcode == aco_opcode::v_pk_fma_f16 && program->gfx_level >= GFX10) ||
(instr->opcode == aco_opcode::v_mad_legacy_f32 && program->dev.has_mac_legacy32) ||
(instr->opcode == aco_opcode::v_fma_legacy_f32 && program->dev.has_mac_legacy32) ||
(instr->opcode == aco_opcode::v_dot4_i32_i8 && program->family != CHIP_VEGA20)) &&
instr->operands[2].isTemp() && instr->operands[2].isKillBeforeDef() &&
instr->operands[2].getTemp().type() == RegType::vgpr &&
((instr->operands[0].isTemp() &&
instr->operands[0].getTemp().type() == RegType::vgpr) ||
(instr->operands[1].isTemp() &&
instr->operands[1].getTemp().type() == RegType::vgpr)) &&
!instr->usesModifiers() && instr->operands[0].physReg().byte() == 0 &&
instr->operands[1].physReg().byte() == 0 && instr->operands[2].physReg().byte() == 0) {
if (!instr->operands[1].isTemp() ||
instr->operands[1].getTemp().type() != RegType::vgpr)
std::swap(instr->operands[0], instr->operands[1]);
unsigned def_id = instr->definitions[0].tempId();
bool use_vop2 = true;
if (ctx.assignments[def_id].affinity) {
assignment& affinity = ctx.assignments[ctx.assignments[def_id].affinity];
if (affinity.assigned && affinity.reg != instr->operands[2].physReg() &&
!register_file.test(affinity.reg, instr->operands[2].bytes()))
use_vop2 = false;
}
if (use_vop2) {
static_assert(sizeof(VOP2_instruction) <= sizeof(VOP3_instruction),
"Invalid direct instruction cast.");
static_assert(sizeof(VOP2_instruction) <= sizeof(VOP3P_instruction),
"Invalid direct instruction cast.");
instr->format = Format::VOP2;
switch (instr->opcode) {
case aco_opcode::v_mad_f32: instr->opcode = aco_opcode::v_mac_f32; break;
case aco_opcode::v_fma_f32: instr->opcode = aco_opcode::v_fmac_f32; break;
case aco_opcode::v_mad_f16:
case aco_opcode::v_mad_legacy_f16: instr->opcode = aco_opcode::v_mac_f16; break;
case aco_opcode::v_fma_f16: instr->opcode = aco_opcode::v_fmac_f16; break;
case aco_opcode::v_pk_fma_f16: instr->opcode = aco_opcode::v_pk_fmac_f16; break;
case aco_opcode::v_dot4_i32_i8: instr->opcode = aco_opcode::v_dot4c_i32_i8; break;
case aco_opcode::v_mad_legacy_f32:
instr->opcode = aco_opcode::v_mac_legacy_f32;
break;
case aco_opcode::v_fma_legacy_f32:
instr->opcode = aco_opcode::v_fmac_legacy_f32;
break;
default: break;
}
}
}
optimize_encoding(program, ctx, register_file, instr);
/* Handle definitions which must have the same register as an operand.
* We expect that the definition has the same size as the operand, otherwise the new