aco: implement 64-bit VGPR constant copies in handle_operands()
64-bit VGPR constant copies can happen because of 64-bit constant copy propagation. Since this optimization is beneficial and more annoying to deal with in the optimizer, I've implemented 64-bit VGPR constant copies in handle_operands(). This also sets copy_operation::size correctly for 64-bit constant copies. Cc: 20.0 <mesa-stable@lists.freedesktop.org> Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4260> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4260>
This commit is contained in:
parent
21ba2bc595
commit
43918c9a7f
|
@ -472,6 +472,36 @@ public:
|
|||
return isConstant() && constantValue() == cmp;
|
||||
}
|
||||
|
||||
constexpr uint64_t constantValue64(bool signext=false) const noexcept
|
||||
{
|
||||
if (is64BitConst_) {
|
||||
if (reg_.reg <= 192)
|
||||
return reg_.reg - 128;
|
||||
else if (reg_.reg <= 208)
|
||||
return 0xFFFFFFFFFFFFFFFF - (reg_.reg - 193);
|
||||
|
||||
switch (reg_.reg) {
|
||||
case 240:
|
||||
return 0x3FE0000000000000;
|
||||
case 241:
|
||||
return 0xBFE0000000000000;
|
||||
case 242:
|
||||
return 0x3FF0000000000000;
|
||||
case 243:
|
||||
return 0xBFF0000000000000;
|
||||
case 244:
|
||||
return 0x4000000000000000;
|
||||
case 245:
|
||||
return 0xC000000000000000;
|
||||
case 246:
|
||||
return 0x4010000000000000;
|
||||
case 247:
|
||||
return 0xC010000000000000;
|
||||
}
|
||||
}
|
||||
return (signext && (data_.i & 0x80000000u) ? 0xffffffff00000000ull : 0ull) | data_.i;
|
||||
}
|
||||
|
||||
/* Indicates that the killed operand's live range intersects with the
|
||||
* instruction's definitions. Unlike isKill() and isFirstKill(), this is
|
||||
* not set by liveness analysis. */
|
||||
|
|
|
@ -764,6 +764,11 @@ void handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context*
|
|||
preserve_scc = true;
|
||||
} else if (it->second.size == 2 && it->second.def.getTemp().type() == RegType::sgpr) {
|
||||
bld.sop1(aco_opcode::s_mov_b64, it->second.def, Operand(it->second.op.physReg(), s2));
|
||||
} else if (it->second.size == 2 && it->second.op.isConstant()) {
|
||||
uint64_t val = it->second.op.constantValue64();
|
||||
bld.vop1(aco_opcode::v_mov_b32, it->second.def, Operand((uint32_t)val));
|
||||
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg{it->second.def.physReg() + 1}, v1),
|
||||
Operand((uint32_t)(val >> 32)));
|
||||
} else {
|
||||
bld.copy(it->second.def, it->second.op);
|
||||
}
|
||||
|
@ -905,7 +910,7 @@ void lower_to_hw_instr(Program* program)
|
|||
if (op.isConstant()) {
|
||||
const PhysReg reg = PhysReg{instr->definitions[0].physReg() + reg_idx};
|
||||
const Definition def = Definition(reg, rc_def);
|
||||
copy_operations[reg] = {op, def, 0, 1};
|
||||
copy_operations[reg] = {op, def, 0, op.size()};
|
||||
reg_idx++;
|
||||
continue;
|
||||
}
|
||||
|
@ -932,7 +937,7 @@ void lower_to_hw_instr(Program* program)
|
|||
for (unsigned j = 0; j < k; j++) {
|
||||
Operand op = Operand(PhysReg{instr->operands[0].physReg() + (i*k+j)}, rc_op);
|
||||
Definition def = Definition(PhysReg{instr->definitions[i].physReg() + j}, rc_def);
|
||||
copy_operations[def.physReg()] = {op, def, 0, 1};
|
||||
copy_operations[def.physReg()] = {op, def, 0, op.size()};
|
||||
}
|
||||
}
|
||||
handle_operands(copy_operations, &ctx, program->chip_class, pi);
|
||||
|
@ -947,7 +952,7 @@ void lower_to_hw_instr(Program* program)
|
|||
Operand operand = instr->operands[i];
|
||||
if (operand.isConstant() || operand.size() == 1) {
|
||||
assert(instr->definitions[i].size() == operand.size());
|
||||
copy_operations[instr->definitions[i].physReg()] = {operand, instr->definitions[i], 0, 1};
|
||||
copy_operations[instr->definitions[i].physReg()] = {operand, instr->definitions[i], 0, operand.size()};
|
||||
} else {
|
||||
RegClass def_rc = RegClass(instr->definitions[i].regClass().type(), 1);
|
||||
RegClass op_rc = RegClass(operand.getTemp().type(), 1);
|
||||
|
@ -1019,7 +1024,7 @@ void lower_to_hw_instr(Program* program)
|
|||
Operand operand = instr->operands[0];
|
||||
if (operand.isConstant() || operand.size() == 1) {
|
||||
assert(instr->definitions[0].size() == 1);
|
||||
copy_operations[instr->definitions[0].physReg()] = {operand, instr->definitions[0], 0, 1};
|
||||
copy_operations[instr->definitions[0].physReg()] = {operand, instr->definitions[0], 0, operand.size()};
|
||||
} else {
|
||||
for (unsigned i = 0; i < operand.size(); i++)
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue