aco: implement 64-bit VGPR constant copies in handle_operands()

64-bit VGPR constant copies can happen because of 64-bit constant copy
propagation. Since this optimization is beneficial and more annoying to
deal with in the optimizer, I've implemented 64-bit VGPR constant copies
in handle_operands().

This also sets copy_operation::size correctly for 64-bit constant copies.

Cc: 20.0 <mesa-stable@lists.freedesktop.org>
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4260>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4260>
This commit is contained in:
Rhys Perry 2020-03-20 16:07:08 +00:00 committed by Marge Bot
parent 21ba2bc595
commit 43918c9a7f
2 changed files with 39 additions and 4 deletions

View File

@ -472,6 +472,36 @@ public:
return isConstant() && constantValue() == cmp;
}
constexpr uint64_t constantValue64(bool signext=false) const noexcept
{
if (is64BitConst_) {
if (reg_.reg <= 192)
return reg_.reg - 128;
else if (reg_.reg <= 208)
return 0xFFFFFFFFFFFFFFFF - (reg_.reg - 193);
switch (reg_.reg) {
case 240:
return 0x3FE0000000000000;
case 241:
return 0xBFE0000000000000;
case 242:
return 0x3FF0000000000000;
case 243:
return 0xBFF0000000000000;
case 244:
return 0x4000000000000000;
case 245:
return 0xC000000000000000;
case 246:
return 0x4010000000000000;
case 247:
return 0xC010000000000000;
}
}
return (signext && (data_.i & 0x80000000u) ? 0xffffffff00000000ull : 0ull) | data_.i;
}
/* Indicates that the killed operand's live range intersects with the
* instruction's definitions. Unlike isKill() and isFirstKill(), this is
* not set by liveness analysis. */

View File

@ -764,6 +764,11 @@ void handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context*
preserve_scc = true;
} else if (it->second.size == 2 && it->second.def.getTemp().type() == RegType::sgpr) {
bld.sop1(aco_opcode::s_mov_b64, it->second.def, Operand(it->second.op.physReg(), s2));
} else if (it->second.size == 2 && it->second.op.isConstant()) {
uint64_t val = it->second.op.constantValue64();
bld.vop1(aco_opcode::v_mov_b32, it->second.def, Operand((uint32_t)val));
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg{it->second.def.physReg() + 1}, v1),
Operand((uint32_t)(val >> 32)));
} else {
bld.copy(it->second.def, it->second.op);
}
@ -905,7 +910,7 @@ void lower_to_hw_instr(Program* program)
if (op.isConstant()) {
const PhysReg reg = PhysReg{instr->definitions[0].physReg() + reg_idx};
const Definition def = Definition(reg, rc_def);
copy_operations[reg] = {op, def, 0, 1};
copy_operations[reg] = {op, def, 0, op.size()};
reg_idx++;
continue;
}
@ -932,7 +937,7 @@ void lower_to_hw_instr(Program* program)
for (unsigned j = 0; j < k; j++) {
Operand op = Operand(PhysReg{instr->operands[0].physReg() + (i*k+j)}, rc_op);
Definition def = Definition(PhysReg{instr->definitions[i].physReg() + j}, rc_def);
copy_operations[def.physReg()] = {op, def, 0, 1};
copy_operations[def.physReg()] = {op, def, 0, op.size()};
}
}
handle_operands(copy_operations, &ctx, program->chip_class, pi);
@ -947,7 +952,7 @@ void lower_to_hw_instr(Program* program)
Operand operand = instr->operands[i];
if (operand.isConstant() || operand.size() == 1) {
assert(instr->definitions[i].size() == operand.size());
copy_operations[instr->definitions[i].physReg()] = {operand, instr->definitions[i], 0, 1};
copy_operations[instr->definitions[i].physReg()] = {operand, instr->definitions[i], 0, operand.size()};
} else {
RegClass def_rc = RegClass(instr->definitions[i].regClass().type(), 1);
RegClass op_rc = RegClass(operand.getTemp().type(), 1);
@ -1019,7 +1024,7 @@ void lower_to_hw_instr(Program* program)
Operand operand = instr->operands[0];
if (operand.isConstant() || operand.size() == 1) {
assert(instr->definitions[0].size() == 1);
copy_operations[instr->definitions[0].physReg()] = {operand, instr->definitions[0], 0, 1};
copy_operations[instr->definitions[0].physReg()] = {operand, instr->definitions[0], 0, operand.size()};
} else {
for (unsigned i = 0; i < operand.size(); i++)
{