aco: swap operands if necessary to create v_madak/v_fmaak

Also rewrite the check_literal logic to be more straightforward.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9805>
This commit is contained in:
Rhys Perry 2021-05-13 13:34:52 +01:00 committed by Marge Bot
parent 2665320c78
commit f4f5d577fc
1 changed files with 38 additions and 22 deletions

View File

@ -3731,33 +3731,46 @@ select_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
if (instr->opcode == aco_opcode::v_fma_legacy_f16) if (instr->opcode == aco_opcode::v_fma_legacy_f16)
return; return;
bool sgpr_used = false;
uint32_t literal_idx = 0; uint32_t literal_idx = 0;
uint32_t literal_uses = UINT32_MAX; uint32_t literal_uses = UINT32_MAX;
for (unsigned i = 0; i < instr->operands.size(); i++) {
if (instr->operands[i].isConstant() && i > 0) { /* Try using v_madak/v_fmaak */
literal_uses = UINT32_MAX; if (instr->operands[2].isTemp() &&
break; ctx.info[instr->operands[2].tempId()].is_literal(get_operand_size(instr, 2))) {
bool has_sgpr = false;
bool has_vgpr = false;
for (unsigned i = 0; i < 2; i++) {
if (!instr->operands[i].isTemp())
continue;
has_sgpr |= instr->operands[i].getTemp().type() == RegType::sgpr;
has_vgpr |= instr->operands[i].getTemp().type() == RegType::vgpr;
} }
if (!instr->operands[i].isTemp()) /* Encoding limitations requires a VGPR operand. The constant bus limitations before
continue; * GFX10 disallows SGPRs.
unsigned bits = get_operand_size(instr, i); */
/* if one of the operands is sgpr, we cannot add a literal somewhere else on pre-GFX10 if ((!has_sgpr || ctx.program->chip_class >= GFX10) && has_vgpr) {
* or operands other than the 1st */ literal_idx = 2;
if (instr->operands[i].getTemp().type() == RegType::sgpr && literal_uses = ctx.uses[instr->operands[2].tempId()];
(i > 0 || ctx.program->chip_class < GFX10)) { }
if (!sgpr_used && ctx.info[instr->operands[i].tempId()].is_literal(bits)) { }
literal_uses = ctx.uses[instr->operands[i].tempId()];
/* Try using v_madmk/v_fmamk */
/* Encoding limitations requires a VGPR operand. */
if (instr->operands[2].isTemp() && instr->operands[2].getTemp().type() == RegType::vgpr) {
for (unsigned i = 0; i < 2; i++) {
if (!instr->operands[i].isTemp())
continue;
/* The constant bus limitations before GFX10 disallows SGPRs. */
if (ctx.program->chip_class < GFX10 && instr->operands[!i].isTemp() &&
instr->operands[!i].getTemp().type() == RegType::sgpr)
continue;
if (ctx.info[instr->operands[i].tempId()].is_literal(get_operand_size(instr, i)) &&
ctx.uses[instr->operands[i].tempId()] < literal_uses) {
literal_idx = i; literal_idx = i;
} else { literal_uses = ctx.uses[instr->operands[i].tempId()];
literal_uses = UINT32_MAX;
} }
sgpr_used = true;
/* don't break because we still need to check constants */
} else if (!sgpr_used && ctx.info[instr->operands[i].tempId()].is_literal(bits) &&
ctx.uses[instr->operands[i].tempId()] < literal_uses) {
literal_uses = ctx.uses[instr->operands[i].tempId()];
literal_idx = i;
} }
} }
@ -3953,6 +3966,9 @@ apply_literals(opt_ctx& ctx, aco_ptr<Instruction>& instr)
if (info->literal_idx == 2) { /* add literal -> madak */ if (info->literal_idx == 2) { /* add literal -> madak */
new_mad->operands[0] = instr->operands[0]; new_mad->operands[0] = instr->operands[0];
new_mad->operands[1] = instr->operands[1]; new_mad->operands[1] = instr->operands[1];
if (!new_mad->operands[1].isTemp() ||
new_mad->operands[1].getTemp().type() == RegType::sgpr)
std::swap(new_mad->operands[0], new_mad->operands[1]);
} else { /* mul literal -> madmk */ } else { /* mul literal -> madmk */
new_mad->operands[0] = instr->operands[1 - info->literal_idx]; new_mad->operands[0] = instr->operands[1 - info->literal_idx];
new_mad->operands[1] = instr->operands[2]; new_mad->operands[1] = instr->operands[2];