aco: fix NSA MIMG followed by MUBUF/MTBUF

No fossil-db changes on GFX10.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Fixes: c353895c92 ("aco: use non-sequential addressing")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9187>
This commit is contained in:
Rhys Perry 2021-02-22 11:12:15 +00:00 committed by Marge Bot
parent b804abd61d
commit 194f3e4c69
3 changed files with 33 additions and 9 deletions

View File

@ -48,6 +48,15 @@ static uint32_t get_sdwa_sel(unsigned sel, PhysReg reg)
return sel & sdwa_asuint; return sel & sdwa_asuint;
} }
unsigned get_mimg_nsa_dwords(const Instruction *instr) {
unsigned addr_dwords = instr->operands.size() - 3;
for (unsigned i = 1; i < addr_dwords; i++) {
if (instr->operands[3 + i].physReg() != instr->operands[3].physReg().advance(i * 4))
return DIV_ROUND_UP(addr_dwords - 1, 4);
}
return 0;
}
void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* instr) void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* instr)
{ {
/* lower remaining pseudo-instructions */ /* lower remaining pseudo-instructions */
@ -412,14 +421,8 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
break; break;
} }
case Format::MIMG: { case Format::MIMG: {
unsigned use_nsa = false; unsigned nsa_dwords = get_mimg_nsa_dwords(instr);
unsigned addr_dwords = instr->operands.size() - 3; assert(!nsa_dwords || ctx.chip_class >= GFX10);
for (unsigned i = 1; i < addr_dwords; i++) {
if (instr->operands[3 + i].physReg() != instr->operands[3].physReg().advance(i * 4))
use_nsa = true;
}
assert(!use_nsa || ctx.chip_class >= GFX10);
unsigned nsa_dwords = use_nsa ? DIV_ROUND_UP(addr_dwords - 1, 4) : 0;
MIMG_instruction& mimg = instr->mimg(); MIMG_instruction& mimg = instr->mimg();
uint32_t encoding = (0b111100 << 26); uint32_t encoding = (0b111100 << 26);
@ -463,7 +466,7 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
if (nsa_dwords) { if (nsa_dwords) {
out.resize(out.size() + nsa_dwords); out.resize(out.size() + nsa_dwords);
std::vector<uint32_t>::iterator nsa = std::prev(out.end(), nsa_dwords); std::vector<uint32_t>::iterator nsa = std::prev(out.end(), nsa_dwords);
for (unsigned i = 0; i < addr_dwords - 1; i++) for (unsigned i = 0; i < instr->operands.size() - 4u; i++)
nsa[i / 4] |= (0xFF & instr->operands[4 + i].physReg().reg()) << (i % 4 * 8); nsa[i / 4] |= (0xFF & instr->operands[4 + i].physReg().reg()) << (i % 4 * 8);
} }
break; break;

View File

@ -25,6 +25,7 @@
#include <algorithm> #include <algorithm>
#include "aco_ir.h" #include "aco_ir.h"
#include "aco_builder.h"
#include <stack> #include <stack>
#include <functional> #include <functional>
@ -149,6 +150,7 @@ struct NOP_ctx_gfx10 {
bool has_branch_after_VMEM = false; bool has_branch_after_VMEM = false;
bool has_DS = false; bool has_DS = false;
bool has_branch_after_DS = false; bool has_branch_after_DS = false;
bool has_NSA_MIMG = false;
std::bitset<128> sgprs_read_by_VMEM; std::bitset<128> sgprs_read_by_VMEM;
std::bitset<128> sgprs_read_by_SMEM; std::bitset<128> sgprs_read_by_SMEM;
@ -159,6 +161,7 @@ struct NOP_ctx_gfx10 {
has_branch_after_VMEM |= other.has_branch_after_VMEM; has_branch_after_VMEM |= other.has_branch_after_VMEM;
has_DS |= other.has_DS; has_DS |= other.has_DS;
has_branch_after_DS |= other.has_branch_after_DS; has_branch_after_DS |= other.has_branch_after_DS;
has_NSA_MIMG |= other.has_NSA_MIMG;
sgprs_read_by_VMEM |= other.sgprs_read_by_VMEM; sgprs_read_by_VMEM |= other.sgprs_read_by_VMEM;
sgprs_read_by_SMEM |= other.sgprs_read_by_SMEM; sgprs_read_by_SMEM |= other.sgprs_read_by_SMEM;
} }
@ -172,6 +175,7 @@ struct NOP_ctx_gfx10 {
has_branch_after_VMEM == other.has_branch_after_VMEM && has_branch_after_VMEM == other.has_branch_after_VMEM &&
has_DS == other.has_DS && has_DS == other.has_DS &&
has_branch_after_DS == other.has_branch_after_DS && has_branch_after_DS == other.has_branch_after_DS &&
has_NSA_MIMG == other.has_NSA_MIMG &&
sgprs_read_by_VMEM == other.sgprs_read_by_VMEM && sgprs_read_by_VMEM == other.sgprs_read_by_VMEM &&
sgprs_read_by_SMEM == other.sgprs_read_by_SMEM; sgprs_read_by_SMEM == other.sgprs_read_by_SMEM;
} }
@ -737,6 +741,21 @@ void handle_instruction_gfx10(Program *program, Block *cur_block, NOP_ctx_gfx10
wait->imm = 0; wait->imm = 0;
new_instructions.emplace_back(std::move(wait)); new_instructions.emplace_back(std::move(wait));
} }
/* NSAToVMEMBug
* Handles NSA MIMG (4 or more dwords) immediately followed by MUBUF/MTBUF (with offset[2:1] != 0).
*/
if (instr->isMIMG() && get_mimg_nsa_dwords(instr.get()) > 1) {
ctx.has_NSA_MIMG = true;
} else if (ctx.has_NSA_MIMG) {
ctx.has_NSA_MIMG = false;
if (instr->isMUBUF() || instr->isMTBUF()) {
uint32_t offset = instr->isMUBUF() ? instr->mubuf().offset : instr->mtbuf().offset;
if (offset & 6)
Builder(program, &new_instructions).sopp(aco_opcode::s_nop, -1, 0);
}
}
} }
template <typename Ctx> template <typename Ctx>

View File

@ -1615,6 +1615,8 @@ bool needs_exec_mask(const Instruction* instr);
uint32_t get_reduction_identity(ReduceOp op, unsigned idx); uint32_t get_reduction_identity(ReduceOp op, unsigned idx);
unsigned get_mimg_nsa_dwords(const Instruction *instr);
enum block_kind { enum block_kind {
/* uniform indicates that leaving this block, /* uniform indicates that leaving this block,
* all actives lanes stay active */ * all actives lanes stay active */