aco: fix NSA MIMG followed by MUBUF/MTBUF

No fossil-db changes on GFX10.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Fixes: c353895c92 ("aco: use non-sequential addressing")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9187>
This commit is contained in:
Rhys Perry 2021-02-22 11:12:15 +00:00 committed by Marge Bot
parent b804abd61d
commit 194f3e4c69
3 changed files with 33 additions and 9 deletions

View File

@ -48,6 +48,15 @@ static uint32_t get_sdwa_sel(unsigned sel, PhysReg reg)
return sel & sdwa_asuint;
}
unsigned get_mimg_nsa_dwords(const Instruction *instr) {
unsigned addr_dwords = instr->operands.size() - 3;
for (unsigned i = 1; i < addr_dwords; i++) {
if (instr->operands[3 + i].physReg() != instr->operands[3].physReg().advance(i * 4))
return DIV_ROUND_UP(addr_dwords - 1, 4);
}
return 0;
}
void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* instr)
{
/* lower remaining pseudo-instructions */
@ -412,14 +421,8 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
break;
}
case Format::MIMG: {
unsigned use_nsa = false;
unsigned addr_dwords = instr->operands.size() - 3;
for (unsigned i = 1; i < addr_dwords; i++) {
if (instr->operands[3 + i].physReg() != instr->operands[3].physReg().advance(i * 4))
use_nsa = true;
}
assert(!use_nsa || ctx.chip_class >= GFX10);
unsigned nsa_dwords = use_nsa ? DIV_ROUND_UP(addr_dwords - 1, 4) : 0;
unsigned nsa_dwords = get_mimg_nsa_dwords(instr);
assert(!nsa_dwords || ctx.chip_class >= GFX10);
MIMG_instruction& mimg = instr->mimg();
uint32_t encoding = (0b111100 << 26);
@ -463,7 +466,7 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
if (nsa_dwords) {
out.resize(out.size() + nsa_dwords);
std::vector<uint32_t>::iterator nsa = std::prev(out.end(), nsa_dwords);
for (unsigned i = 0; i < addr_dwords - 1; i++)
for (unsigned i = 0; i < instr->operands.size() - 4u; i++)
nsa[i / 4] |= (0xFF & instr->operands[4 + i].physReg().reg()) << (i % 4 * 8);
}
break;

View File

@ -25,6 +25,7 @@
#include <algorithm>
#include "aco_ir.h"
#include "aco_builder.h"
#include <stack>
#include <functional>
@ -149,6 +150,7 @@ struct NOP_ctx_gfx10 {
bool has_branch_after_VMEM = false;
bool has_DS = false;
bool has_branch_after_DS = false;
bool has_NSA_MIMG = false;
std::bitset<128> sgprs_read_by_VMEM;
std::bitset<128> sgprs_read_by_SMEM;
@ -159,6 +161,7 @@ struct NOP_ctx_gfx10 {
has_branch_after_VMEM |= other.has_branch_after_VMEM;
has_DS |= other.has_DS;
has_branch_after_DS |= other.has_branch_after_DS;
has_NSA_MIMG |= other.has_NSA_MIMG;
sgprs_read_by_VMEM |= other.sgprs_read_by_VMEM;
sgprs_read_by_SMEM |= other.sgprs_read_by_SMEM;
}
@ -172,6 +175,7 @@ struct NOP_ctx_gfx10 {
has_branch_after_VMEM == other.has_branch_after_VMEM &&
has_DS == other.has_DS &&
has_branch_after_DS == other.has_branch_after_DS &&
has_NSA_MIMG == other.has_NSA_MIMG &&
sgprs_read_by_VMEM == other.sgprs_read_by_VMEM &&
sgprs_read_by_SMEM == other.sgprs_read_by_SMEM;
}
@ -737,6 +741,21 @@ void handle_instruction_gfx10(Program *program, Block *cur_block, NOP_ctx_gfx10
wait->imm = 0;
new_instructions.emplace_back(std::move(wait));
}
/* NSAToVMEMBug
* Handles NSA MIMG (4 or more dwords) immediately followed by MUBUF/MTBUF (with offset[2:1] != 0).
*/
if (instr->isMIMG() && get_mimg_nsa_dwords(instr.get()) > 1) {
ctx.has_NSA_MIMG = true;
} else if (ctx.has_NSA_MIMG) {
ctx.has_NSA_MIMG = false;
if (instr->isMUBUF() || instr->isMTBUF()) {
uint32_t offset = instr->isMUBUF() ? instr->mubuf().offset : instr->mtbuf().offset;
if (offset & 6)
Builder(program, &new_instructions).sopp(aco_opcode::s_nop, -1, 0);
}
}
}
template <typename Ctx>

View File

@ -1615,6 +1615,8 @@ bool needs_exec_mask(const Instruction* instr);
uint32_t get_reduction_identity(ReduceOp op, unsigned idx);
unsigned get_mimg_nsa_dwords(const Instruction *instr);
enum block_kind {
/* uniform indicates that leaving this block,
* all actives lanes stay active */