From edc2b57ac14c6f9f3dadd3d7282e9d6ac1bc4304 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Mon, 13 Apr 2020 17:23:38 +0100 Subject: [PATCH] aco: allocate full register for subdword definitions if HW doesn't support it Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_register_allocation.cpp | 13 +++++++++++-- src/amd/compiler/aco_validate.cpp | 18 +++++++++++++++--- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 0857084a486..a6792679da1 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -1839,8 +1839,17 @@ void register_allocation(Program *program, std::vector& live_out_per_bl definition.setFixed(reg); } - if (!definition.isFixed()) - definition.setFixed(get_reg(ctx, register_file, definition.getTemp(), parallelcopy, instr)); + if (!definition.isFixed()) { + Temp tmp = definition.getTemp(); + /* subdword instructions before RDNA write full registers */ + if (tmp.regClass().is_subdword() && + !instr_can_access_subdword(instr) && + ctx.program->chip_class <= GFX9) { + assert(tmp.bytes() <= 4); + tmp = Temp(definition.tempId(), v1); + } + definition.setFixed(get_reg(ctx, register_file, tmp, parallelcopy, instr)); + } assert(definition.isFixed() && ((definition.getTemp().type() == RegType::vgpr && definition.physReg() >= 256) || (definition.getTemp().type() != RegType::vgpr && definition.physReg() < 256))); diff --git a/src/amd/compiler/aco_validate.cpp b/src/amd/compiler/aco_validate.cpp index 3e0f4584cd3..2f51b8a2eb1 100644 --- a/src/amd/compiler/aco_validate.cpp +++ b/src/amd/compiler/aco_validate.cpp @@ -46,6 +46,11 @@ void perfwarn(bool cond, const char *msg, Instruction *instr) } #endif +bool instr_can_access_subdword(aco_ptr& instr) +{ + return instr->isSDWA() || instr->format == Format::PSEUDO; +} + void validate(Program* program, FILE * output) { if (!(debug_flags & DEBUG_VALIDATE)) @@ -162,7 +167,7 @@ void validate(Program* program, FILE * output) /* check subdword definitions */ for (unsigned i = 0; i < instr->definitions.size(); i++) { if (instr->definitions[i].regClass().is_subdword()) - check(instr->isSDWA() || instr->format == Format::PSEUDO, "Only SDWA and Pseudo instructions can write subdword registers", instr.get()); + check(instr_can_access_subdword(instr) || instr->definitions[i].bytes() <= 4, "Only SDWA and Pseudo instructions can write subdword registers larger than 4 bytes", instr.get()); } if (instr->isSALU() || instr->isVALU()) { @@ -456,7 +461,7 @@ bool validate_ra(Program *program, const struct radv_nir_compiler_options *optio err |= ra_fail(output, loc, assignments.at(op.tempId()).firstloc, "Operand %d has an out-of-bounds register assignment", i); if (op.physReg() == vcc && !program->needs_vcc) err |= ra_fail(output, loc, Location(), "Operand %d fixed to vcc but needs_vcc=false", i); - if (!(instr->isSDWA() || instr->format == Format::PSEUDO) && op.regClass().is_subdword() && op.physReg().byte()) + if (!instr_can_access_subdword(instr) && op.regClass().is_subdword() && op.physReg().byte()) err |= ra_fail(output, loc, assignments.at(op.tempId()).firstloc, "Operand %d must be aligned to a full register", i); if (!assignments[op.tempId()].firstloc.block) assignments[op.tempId()].firstloc = loc; @@ -477,6 +482,8 @@ bool validate_ra(Program *program, const struct radv_nir_compiler_options *optio err |= ra_fail(output, loc, assignments.at(def.tempId()).firstloc, "Definition %d has an out-of-bounds register assignment", i); if (def.physReg() == vcc && !program->needs_vcc) err |= ra_fail(output, loc, Location(), "Definition %d fixed to vcc but needs_vcc=false", i); + if (!instr_can_access_subdword(instr) && def.regClass().is_subdword() && def.physReg().byte()) + err |= ra_fail(output, loc, assignments.at(def.tempId()).firstloc, "Definition %d must be aligned to a full register", i); if (!assignments[def.tempId()].firstloc.block) assignments[def.tempId()].firstloc = loc; assignments[def.tempId()].defloc = loc; @@ -579,9 +586,14 @@ bool validate_ra(Program *program, const struct radv_nir_compiler_options *optio PhysReg reg = assignments.at(tmp.id()).reg; for (unsigned j = 0; j < tmp.bytes(); j++) { if (regs[reg.reg_b + j]) - err |= ra_fail(output, loc, assignments.at(regs[reg.reg_b + i]).defloc, "Assignment of element %d of %%%d already taken by %%%d from instruction", i, tmp.id(), regs[reg.reg_b + j]); + err |= ra_fail(output, loc, assignments.at(regs[reg.reg_b + j]).defloc, "Assignment of element %d of %%%d already taken by %%%d from instruction", i, tmp.id(), regs[reg.reg_b + j]); regs[reg.reg_b + j] = tmp.id(); } + if (def.regClass().is_subdword() && !instr_can_access_subdword(instr)) { + for (unsigned j = tmp.bytes(); j < 4; j++) + if (reg.reg_b + j) + err |= ra_fail(output, loc, assignments.at(regs[reg.reg_b + j]).defloc, "Assignment of element %d of %%%d overwrites the full register taken by %%%d from instruction", i, tmp.id(), regs[reg.reg_b + j]); + } } for (const Definition& def : instr->definitions) {