aco/ra: Move mac encoding optimization to its own function.
Signed-off-by: Georg Lehmann <dadschoorse@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15999>
This commit is contained in:
parent
c9490436b6
commit
87b4f3daa1
|
@ -2541,6 +2541,65 @@ get_affinities(ra_ctx& ctx, std::vector<IDSet>& live_out_per_block)
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
optimize_encoding_vop2(Program* program, ra_ctx& ctx, RegisterFile& register_file,
|
||||
aco_ptr<Instruction>& instr)
|
||||
{
|
||||
/* try to optimize v_mad_f32 -> v_mac_f32 */
|
||||
if ((instr->opcode != aco_opcode::v_mad_f32 &&
|
||||
(instr->opcode != aco_opcode::v_fma_f32 || program->gfx_level < GFX10) &&
|
||||
instr->opcode != aco_opcode::v_mad_f16 && instr->opcode != aco_opcode::v_mad_legacy_f16 &&
|
||||
(instr->opcode != aco_opcode::v_fma_f16 || program->gfx_level < GFX10) &&
|
||||
(instr->opcode != aco_opcode::v_pk_fma_f16 || program->gfx_level < GFX10) &&
|
||||
(instr->opcode != aco_opcode::v_mad_legacy_f32 || !program->dev.has_mac_legacy32) &&
|
||||
(instr->opcode != aco_opcode::v_fma_legacy_f32 || !program->dev.has_mac_legacy32) &&
|
||||
(instr->opcode != aco_opcode::v_dot4_i32_i8 || program->family == CHIP_VEGA20)) ||
|
||||
!instr->operands[2].isTemp() || !instr->operands[2].isKillBeforeDef() ||
|
||||
instr->operands[2].getTemp().type() != RegType::vgpr ||
|
||||
((!instr->operands[0].isTemp() || instr->operands[0].getTemp().type() != RegType::vgpr) &&
|
||||
(!instr->operands[1].isTemp() || instr->operands[1].getTemp().type() != RegType::vgpr)) ||
|
||||
instr->usesModifiers() || instr->operands[0].physReg().byte() != 0 ||
|
||||
instr->operands[1].physReg().byte() != 0 || instr->operands[2].physReg().byte() != 0)
|
||||
return;
|
||||
|
||||
if (!instr->operands[1].isTemp() || instr->operands[1].getTemp().type() != RegType::vgpr)
|
||||
std::swap(instr->operands[0], instr->operands[1]);
|
||||
|
||||
unsigned def_id = instr->definitions[0].tempId();
|
||||
if (ctx.assignments[def_id].affinity) {
|
||||
assignment& affinity = ctx.assignments[ctx.assignments[def_id].affinity];
|
||||
if (affinity.assigned && affinity.reg != instr->operands[2].physReg() &&
|
||||
!register_file.test(affinity.reg, instr->operands[2].bytes()))
|
||||
return;
|
||||
}
|
||||
|
||||
static_assert(sizeof(VOP2_instruction) <= sizeof(VOP3_instruction),
|
||||
"Invalid direct instruction cast.");
|
||||
static_assert(sizeof(VOP2_instruction) <= sizeof(VOP3P_instruction),
|
||||
"Invalid direct instruction cast.");
|
||||
instr->format = Format::VOP2;
|
||||
switch (instr->opcode) {
|
||||
case aco_opcode::v_mad_f32: instr->opcode = aco_opcode::v_mac_f32; break;
|
||||
case aco_opcode::v_fma_f32: instr->opcode = aco_opcode::v_fmac_f32; break;
|
||||
case aco_opcode::v_mad_f16:
|
||||
case aco_opcode::v_mad_legacy_f16: instr->opcode = aco_opcode::v_mac_f16; break;
|
||||
case aco_opcode::v_fma_f16: instr->opcode = aco_opcode::v_fmac_f16; break;
|
||||
case aco_opcode::v_pk_fma_f16: instr->opcode = aco_opcode::v_pk_fmac_f16; break;
|
||||
case aco_opcode::v_dot4_i32_i8: instr->opcode = aco_opcode::v_dot4c_i32_i8; break;
|
||||
case aco_opcode::v_mad_legacy_f32: instr->opcode = aco_opcode::v_mac_legacy_f32; break;
|
||||
case aco_opcode::v_fma_legacy_f32: instr->opcode = aco_opcode::v_fmac_legacy_f32; break;
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
optimize_encoding(Program* program, ra_ctx& ctx, RegisterFile& register_file,
|
||||
aco_ptr<Instruction>& instr)
|
||||
{
|
||||
if (instr->isVALU())
|
||||
optimize_encoding_vop2(program, ctx, register_file, instr);
|
||||
}
|
||||
|
||||
} /* end namespace */
|
||||
|
||||
void
|
||||
|
@ -2664,60 +2723,7 @@ register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, ra
|
|||
register_file.clear(op);
|
||||
}
|
||||
|
||||
/* try to optimize v_mad_f32 -> v_mac_f32 */
|
||||
if ((instr->opcode == aco_opcode::v_mad_f32 ||
|
||||
(instr->opcode == aco_opcode::v_fma_f32 && program->gfx_level >= GFX10) ||
|
||||
instr->opcode == aco_opcode::v_mad_f16 ||
|
||||
instr->opcode == aco_opcode::v_mad_legacy_f16 ||
|
||||
(instr->opcode == aco_opcode::v_fma_f16 && program->gfx_level >= GFX10) ||
|
||||
(instr->opcode == aco_opcode::v_pk_fma_f16 && program->gfx_level >= GFX10) ||
|
||||
(instr->opcode == aco_opcode::v_mad_legacy_f32 && program->dev.has_mac_legacy32) ||
|
||||
(instr->opcode == aco_opcode::v_fma_legacy_f32 && program->dev.has_mac_legacy32) ||
|
||||
(instr->opcode == aco_opcode::v_dot4_i32_i8 && program->family != CHIP_VEGA20)) &&
|
||||
instr->operands[2].isTemp() && instr->operands[2].isKillBeforeDef() &&
|
||||
instr->operands[2].getTemp().type() == RegType::vgpr &&
|
||||
((instr->operands[0].isTemp() &&
|
||||
instr->operands[0].getTemp().type() == RegType::vgpr) ||
|
||||
(instr->operands[1].isTemp() &&
|
||||
instr->operands[1].getTemp().type() == RegType::vgpr)) &&
|
||||
!instr->usesModifiers() && instr->operands[0].physReg().byte() == 0 &&
|
||||
instr->operands[1].physReg().byte() == 0 && instr->operands[2].physReg().byte() == 0) {
|
||||
if (!instr->operands[1].isTemp() ||
|
||||
instr->operands[1].getTemp().type() != RegType::vgpr)
|
||||
std::swap(instr->operands[0], instr->operands[1]);
|
||||
|
||||
unsigned def_id = instr->definitions[0].tempId();
|
||||
bool use_vop2 = true;
|
||||
if (ctx.assignments[def_id].affinity) {
|
||||
assignment& affinity = ctx.assignments[ctx.assignments[def_id].affinity];
|
||||
if (affinity.assigned && affinity.reg != instr->operands[2].physReg() &&
|
||||
!register_file.test(affinity.reg, instr->operands[2].bytes()))
|
||||
use_vop2 = false;
|
||||
}
|
||||
if (use_vop2) {
|
||||
static_assert(sizeof(VOP2_instruction) <= sizeof(VOP3_instruction),
|
||||
"Invalid direct instruction cast.");
|
||||
static_assert(sizeof(VOP2_instruction) <= sizeof(VOP3P_instruction),
|
||||
"Invalid direct instruction cast.");
|
||||
instr->format = Format::VOP2;
|
||||
switch (instr->opcode) {
|
||||
case aco_opcode::v_mad_f32: instr->opcode = aco_opcode::v_mac_f32; break;
|
||||
case aco_opcode::v_fma_f32: instr->opcode = aco_opcode::v_fmac_f32; break;
|
||||
case aco_opcode::v_mad_f16:
|
||||
case aco_opcode::v_mad_legacy_f16: instr->opcode = aco_opcode::v_mac_f16; break;
|
||||
case aco_opcode::v_fma_f16: instr->opcode = aco_opcode::v_fmac_f16; break;
|
||||
case aco_opcode::v_pk_fma_f16: instr->opcode = aco_opcode::v_pk_fmac_f16; break;
|
||||
case aco_opcode::v_dot4_i32_i8: instr->opcode = aco_opcode::v_dot4c_i32_i8; break;
|
||||
case aco_opcode::v_mad_legacy_f32:
|
||||
instr->opcode = aco_opcode::v_mac_legacy_f32;
|
||||
break;
|
||||
case aco_opcode::v_fma_legacy_f32:
|
||||
instr->opcode = aco_opcode::v_fmac_legacy_f32;
|
||||
break;
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
}
|
||||
optimize_encoding(program, ctx, register_file, instr);
|
||||
|
||||
/* Handle definitions which must have the same register as an operand.
|
||||
* We expect that the definition has the same size as the operand, otherwise the new
|
||||
|
|
Loading…
Reference in New Issue