aco: Workarounds subdword lowering on GFX6/7
As there are no SDWA instructions, we need to take care not to overwrite the upper bits of other copy_operation's operands. Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5226>
This commit is contained in:
parent
942e3c40c3
commit
b083581010
|
@ -1165,6 +1165,9 @@ void do_swap(lower_context *ctx, Builder& bld, const copy_operation& copy, bool
|
|||
offset += def.bytes();
|
||||
}
|
||||
|
||||
if (ctx->program->chip_class <= GFX7)
|
||||
return;
|
||||
|
||||
/* fixup in case we swapped bytes we shouldn't have */
|
||||
copy_operation tmp_copy = copy;
|
||||
tmp_copy.op.setFixed(copy.def.physReg());
|
||||
|
@ -1250,6 +1253,46 @@ void handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context*
|
|||
}
|
||||
// TODO: try to coalesce subdword copies
|
||||
|
||||
/* on GFX6/7, we need some small workarounds as there is no
|
||||
* SDWA instruction to do partial register writes */
|
||||
if (ctx->program->chip_class < GFX8 && it->second.bytes == 2) {
|
||||
if (it->first.byte() == 0 && it->second.op.physReg().byte() == 0 &&
|
||||
!it->second.is_used && pi->opcode == aco_opcode::p_split_vector) {
|
||||
/* Other operations might overwrite the high bits, so change all users
|
||||
* of the high bits to the new target where they are still available.
|
||||
* This mechanism depends on also emitting dead definitions. */
|
||||
PhysReg reg_hi = it->second.op.physReg().advance(2);
|
||||
std::map<PhysReg, copy_operation>::iterator other = copy_map.begin();
|
||||
for (other = copy_map.begin(); other != copy_map.end(); other++) {
|
||||
/* on GFX6/7, if the high bits are used as operand, they cannot be a target */
|
||||
if (other->second.op.physReg() == reg_hi) {
|
||||
other->second.op.setFixed(it->first.advance(2));
|
||||
break; /* break because an operand can only be used once */
|
||||
}
|
||||
}
|
||||
} else if (it->first.byte() == 2) {
|
||||
/* on GFX6/7, if we target an upper half where the lower half hasn't yet been handled,
|
||||
* move to the target operand's high bits. This is save to do as it cannot be an operand */
|
||||
PhysReg lo = PhysReg(it->first.reg());
|
||||
std::map<PhysReg, copy_operation>::iterator other = copy_map.find(lo);
|
||||
if (other != copy_map.end()) {
|
||||
PhysReg new_reg_hi = other->second.op.physReg().advance(2);
|
||||
assert(other->second.bytes == 2 && new_reg_hi.byte() == 2);
|
||||
it->second.def = Definition(new_reg_hi, v2b);
|
||||
it->second.is_used = 0;
|
||||
other->second.bytes = 4;
|
||||
other->second.def.setTemp(Temp(other->second.def.tempId(), v1));
|
||||
other->second.op.setTemp(Temp(other->second.op.tempId(), v1));
|
||||
/* if the new target's high bits are also a target, change uses */
|
||||
std::map<PhysReg, copy_operation>::iterator target = copy_map.find(new_reg_hi);
|
||||
if (target != copy_map.end()) {
|
||||
target->second.uses[0]++;
|
||||
target->second.uses[1]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* find portions where the target reg is not used as operand for any other copy */
|
||||
if (it->second.is_used) {
|
||||
if (it->second.op.isConstant()) {
|
||||
|
|
Loading…
Reference in New Issue