From 51f4b22feec3720c89458094a3245efc984115ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Wed, 6 May 2020 11:00:24 +0100 Subject: [PATCH] aco: don't allow unaligned subdword accesses on GFX6/7 There are no SDWA instructions which means that only full registers can be accessed. Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_register_allocation.cpp | 16 +++++++++------- src/amd/compiler/aco_validate.cpp | 20 +++++++++++--------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index f40914e8bf0..c4d5a482932 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -81,8 +81,10 @@ struct ra_ctx { } }; -bool instr_can_access_subdword(aco_ptr& instr) +bool instr_can_access_subdword(ra_ctx& ctx, aco_ptr& instr) { + if (ctx.program->chip_class < GFX8) + return false; return instr->isSDWA() || instr->format == Format::PSEUDO; } @@ -111,7 +113,7 @@ struct DefInfo { if (rc.is_subdword()) { /* stride in bytes */ - if(!instr_can_access_subdword(instr)) + if(!instr_can_access_subdword(ctx, instr)) stride = 4; else if (rc.bytes() % 4 == 0) stride = 4; @@ -878,7 +880,7 @@ bool get_reg_specified(ra_ctx& ctx, aco_ptr& instr, PhysReg reg) { - if (rc.is_subdword() && reg.byte() && !instr_can_access_subdword(instr)) + if (rc.is_subdword() && reg.byte() && !instr_can_access_subdword(ctx, instr)) return false; if (!rc.is_subdword() && reg.byte()) return false; @@ -1216,12 +1218,12 @@ void handle_pseudo(ra_ctx& ctx, } } -bool operand_can_use_reg(aco_ptr& instr, unsigned idx, PhysReg reg) +bool operand_can_use_reg(ra_ctx& ctx, aco_ptr& instr, unsigned idx, PhysReg reg) { if (instr->operands[idx].isFixed()) return instr->operands[idx].physReg() == reg; - if (!instr_can_access_subdword(instr) && reg.byte()) + if (reg.byte() && !instr_can_access_subdword(ctx, instr)) return false; switch (instr->format) { @@ -1737,7 +1739,7 @@ void register_allocation(Program *program, std::vector& live_out_per_bl assert(ctx.assignments[operand.tempId()].assigned); PhysReg reg = ctx.assignments[operand.tempId()].reg; - if (operand_can_use_reg(instr, i, reg)) + if (operand_can_use_reg(ctx, instr, i, reg)) operand.setFixed(reg); else get_reg_for_operand(ctx, register_file, parallelcopy, instr, operand); @@ -1898,7 +1900,7 @@ void register_allocation(Program *program, std::vector& live_out_per_bl Temp tmp = definition.getTemp(); /* subdword instructions before RDNA write full registers */ if (tmp.regClass().is_subdword() && - !instr_can_access_subdword(instr) && + !instr_can_access_subdword(ctx, instr) && ctx.program->chip_class <= GFX9) { assert(tmp.bytes() <= 4); tmp = Temp(definition.tempId(), v1); diff --git a/src/amd/compiler/aco_validate.cpp b/src/amd/compiler/aco_validate.cpp index 95cc1df3ffd..e101d20068c 100644 --- a/src/amd/compiler/aco_validate.cpp +++ b/src/amd/compiler/aco_validate.cpp @@ -46,11 +46,6 @@ void perfwarn(bool cond, const char *msg, Instruction *instr) } #endif -bool instr_can_access_subdword(aco_ptr& instr) -{ - return instr->isSDWA() || instr->format == Format::PSEUDO; -} - void validate(Program* program, FILE * output) { if (!(debug_flags & DEBUG_VALIDATE)) @@ -178,7 +173,7 @@ void validate(Program* program, FILE * output) /* check subdword definitions */ for (unsigned i = 0; i < instr->definitions.size(); i++) { if (instr->definitions[i].regClass().is_subdword()) - check(instr_can_access_subdword(instr) || instr->definitions[i].bytes() <= 4, "Only SDWA and Pseudo instructions can write subdword registers larger than 4 bytes", instr.get()); + check(instr->format == Format::PSEUDO || instr->definitions[i].bytes() <= 4, "Only Pseudo instructions can write subdword registers larger than 4 bytes", instr.get()); } if (instr->isSALU() || instr->isVALU()) { @@ -434,6 +429,13 @@ bool ra_fail(FILE *output, Location loc, Location loc2, const char *fmt, ...) { return true; } +bool instr_can_access_subdword(Program* program, aco_ptr& instr) +{ + if (program->chip_class < GFX8) + return false; + return instr->isSDWA() || instr->format == Format::PSEUDO; +} + } /* end namespace */ bool validate_ra(Program *program, const struct radv_nir_compiler_options *options, FILE *output) { @@ -472,7 +474,7 @@ bool validate_ra(Program *program, const struct radv_nir_compiler_options *optio err |= ra_fail(output, loc, assignments.at(op.tempId()).firstloc, "Operand %d has an out-of-bounds register assignment", i); if (op.physReg() == vcc && !program->needs_vcc) err |= ra_fail(output, loc, Location(), "Operand %d fixed to vcc but needs_vcc=false", i); - if (!instr_can_access_subdword(instr) && op.regClass().is_subdword() && op.physReg().byte()) + if (!instr_can_access_subdword(program, instr) && op.regClass().is_subdword() && op.physReg().byte()) err |= ra_fail(output, loc, assignments.at(op.tempId()).firstloc, "Operand %d must be aligned to a full register", i); if (!assignments[op.tempId()].firstloc.block) assignments[op.tempId()].firstloc = loc; @@ -493,7 +495,7 @@ bool validate_ra(Program *program, const struct radv_nir_compiler_options *optio err |= ra_fail(output, loc, assignments.at(def.tempId()).firstloc, "Definition %d has an out-of-bounds register assignment", i); if (def.physReg() == vcc && !program->needs_vcc) err |= ra_fail(output, loc, Location(), "Definition %d fixed to vcc but needs_vcc=false", i); - if (!instr_can_access_subdword(instr) && def.regClass().is_subdword() && def.physReg().byte()) + if (!instr_can_access_subdword(program, instr) && def.regClass().is_subdword() && def.physReg().byte()) err |= ra_fail(output, loc, assignments.at(def.tempId()).firstloc, "Definition %d must be aligned to a full register", i); if (!assignments[def.tempId()].firstloc.block) assignments[def.tempId()].firstloc = loc; @@ -600,7 +602,7 @@ bool validate_ra(Program *program, const struct radv_nir_compiler_options *optio err |= ra_fail(output, loc, assignments.at(regs[reg.reg_b + j]).defloc, "Assignment of element %d of %%%d already taken by %%%d from instruction", i, tmp.id(), regs[reg.reg_b + j]); regs[reg.reg_b + j] = tmp.id(); } - if (def.regClass().is_subdword() && !instr_can_access_subdword(instr)) { + if (def.regClass().is_subdword() && !instr_can_access_subdword(program, instr)) { for (unsigned j = tmp.bytes(); j < 4; j++) if (regs[reg.reg_b + j]) err |= ra_fail(output, loc, assignments.at(regs[reg.reg_b + j]).defloc, "Assignment of element %d of %%%d overwrites the full register taken by %%%d from instruction", i, tmp.id(), regs[reg.reg_b + j]);