From b083581010d29b2604ad34e0dd7ff6dae4417dd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Sat, 16 May 2020 17:30:21 +0100 Subject: [PATCH] aco: Workarounds subdword lowering on GFX6/7 As there are no SDWA instructions, we need to take care not to overwrite the upper bits of other copy_operation's operands. Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_lower_to_hw_instr.cpp | 43 ++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index da78ab4b649..8bf57770ce5 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -1165,6 +1165,9 @@ void do_swap(lower_context *ctx, Builder& bld, const copy_operation& copy, bool offset += def.bytes(); } + if (ctx->program->chip_class <= GFX7) + return; + /* fixup in case we swapped bytes we shouldn't have */ copy_operation tmp_copy = copy; tmp_copy.op.setFixed(copy.def.physReg()); @@ -1250,6 +1253,46 @@ void handle_operands(std::map& copy_map, lower_context* } // TODO: try to coalesce subdword copies + /* on GFX6/7, we need some small workarounds as there is no + * SDWA instruction to do partial register writes */ + if (ctx->program->chip_class < GFX8 && it->second.bytes == 2) { + if (it->first.byte() == 0 && it->second.op.physReg().byte() == 0 && + !it->second.is_used && pi->opcode == aco_opcode::p_split_vector) { + /* Other operations might overwrite the high bits, so change all users + * of the high bits to the new target where they are still available. + * This mechanism depends on also emitting dead definitions. */ + PhysReg reg_hi = it->second.op.physReg().advance(2); + std::map::iterator other = copy_map.begin(); + for (other = copy_map.begin(); other != copy_map.end(); other++) { + /* on GFX6/7, if the high bits are used as operand, they cannot be a target */ + if (other->second.op.physReg() == reg_hi) { + other->second.op.setFixed(it->first.advance(2)); + break; /* break because an operand can only be used once */ + } + } + } else if (it->first.byte() == 2) { + /* on GFX6/7, if we target an upper half where the lower half hasn't yet been handled, + * move to the target operand's high bits. This is save to do as it cannot be an operand */ + PhysReg lo = PhysReg(it->first.reg()); + std::map::iterator other = copy_map.find(lo); + if (other != copy_map.end()) { + PhysReg new_reg_hi = other->second.op.physReg().advance(2); + assert(other->second.bytes == 2 && new_reg_hi.byte() == 2); + it->second.def = Definition(new_reg_hi, v2b); + it->second.is_used = 0; + other->second.bytes = 4; + other->second.def.setTemp(Temp(other->second.def.tempId(), v1)); + other->second.op.setTemp(Temp(other->second.op.tempId(), v1)); + /* if the new target's high bits are also a target, change uses */ + std::map::iterator target = copy_map.find(new_reg_hi); + if (target != copy_map.end()) { + target->second.uses[0]++; + target->second.uses[1]++; + } + } + } + } + /* find portions where the target reg is not used as operand for any other copy */ if (it->second.is_used) { if (it->second.op.isConstant()) {