aco: fix NSA MIMG followed by MUBUF/MTBUF
No fossil-db changes on GFX10.
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Fixes: c353895c92
("aco: use non-sequential addressing")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9187>
This commit is contained in:
parent
b804abd61d
commit
194f3e4c69
|
@ -48,6 +48,15 @@ static uint32_t get_sdwa_sel(unsigned sel, PhysReg reg)
|
|||
return sel & sdwa_asuint;
|
||||
}
|
||||
|
||||
unsigned get_mimg_nsa_dwords(const Instruction *instr) {
|
||||
unsigned addr_dwords = instr->operands.size() - 3;
|
||||
for (unsigned i = 1; i < addr_dwords; i++) {
|
||||
if (instr->operands[3 + i].physReg() != instr->operands[3].physReg().advance(i * 4))
|
||||
return DIV_ROUND_UP(addr_dwords - 1, 4);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* instr)
|
||||
{
|
||||
/* lower remaining pseudo-instructions */
|
||||
|
@ -412,14 +421,8 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
|
|||
break;
|
||||
}
|
||||
case Format::MIMG: {
|
||||
unsigned use_nsa = false;
|
||||
unsigned addr_dwords = instr->operands.size() - 3;
|
||||
for (unsigned i = 1; i < addr_dwords; i++) {
|
||||
if (instr->operands[3 + i].physReg() != instr->operands[3].physReg().advance(i * 4))
|
||||
use_nsa = true;
|
||||
}
|
||||
assert(!use_nsa || ctx.chip_class >= GFX10);
|
||||
unsigned nsa_dwords = use_nsa ? DIV_ROUND_UP(addr_dwords - 1, 4) : 0;
|
||||
unsigned nsa_dwords = get_mimg_nsa_dwords(instr);
|
||||
assert(!nsa_dwords || ctx.chip_class >= GFX10);
|
||||
|
||||
MIMG_instruction& mimg = instr->mimg();
|
||||
uint32_t encoding = (0b111100 << 26);
|
||||
|
@ -463,7 +466,7 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
|
|||
if (nsa_dwords) {
|
||||
out.resize(out.size() + nsa_dwords);
|
||||
std::vector<uint32_t>::iterator nsa = std::prev(out.end(), nsa_dwords);
|
||||
for (unsigned i = 0; i < addr_dwords - 1; i++)
|
||||
for (unsigned i = 0; i < instr->operands.size() - 4u; i++)
|
||||
nsa[i / 4] |= (0xFF & instr->operands[4 + i].physReg().reg()) << (i % 4 * 8);
|
||||
}
|
||||
break;
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include <algorithm>
|
||||
|
||||
#include "aco_ir.h"
|
||||
#include "aco_builder.h"
|
||||
#include <stack>
|
||||
#include <functional>
|
||||
|
||||
|
@ -149,6 +150,7 @@ struct NOP_ctx_gfx10 {
|
|||
bool has_branch_after_VMEM = false;
|
||||
bool has_DS = false;
|
||||
bool has_branch_after_DS = false;
|
||||
bool has_NSA_MIMG = false;
|
||||
std::bitset<128> sgprs_read_by_VMEM;
|
||||
std::bitset<128> sgprs_read_by_SMEM;
|
||||
|
||||
|
@ -159,6 +161,7 @@ struct NOP_ctx_gfx10 {
|
|||
has_branch_after_VMEM |= other.has_branch_after_VMEM;
|
||||
has_DS |= other.has_DS;
|
||||
has_branch_after_DS |= other.has_branch_after_DS;
|
||||
has_NSA_MIMG |= other.has_NSA_MIMG;
|
||||
sgprs_read_by_VMEM |= other.sgprs_read_by_VMEM;
|
||||
sgprs_read_by_SMEM |= other.sgprs_read_by_SMEM;
|
||||
}
|
||||
|
@ -172,6 +175,7 @@ struct NOP_ctx_gfx10 {
|
|||
has_branch_after_VMEM == other.has_branch_after_VMEM &&
|
||||
has_DS == other.has_DS &&
|
||||
has_branch_after_DS == other.has_branch_after_DS &&
|
||||
has_NSA_MIMG == other.has_NSA_MIMG &&
|
||||
sgprs_read_by_VMEM == other.sgprs_read_by_VMEM &&
|
||||
sgprs_read_by_SMEM == other.sgprs_read_by_SMEM;
|
||||
}
|
||||
|
@ -737,6 +741,21 @@ void handle_instruction_gfx10(Program *program, Block *cur_block, NOP_ctx_gfx10
|
|||
wait->imm = 0;
|
||||
new_instructions.emplace_back(std::move(wait));
|
||||
}
|
||||
|
||||
/* NSAToVMEMBug
|
||||
* Handles NSA MIMG (4 or more dwords) immediately followed by MUBUF/MTBUF (with offset[2:1] != 0).
|
||||
*/
|
||||
if (instr->isMIMG() && get_mimg_nsa_dwords(instr.get()) > 1) {
|
||||
ctx.has_NSA_MIMG = true;
|
||||
} else if (ctx.has_NSA_MIMG) {
|
||||
ctx.has_NSA_MIMG = false;
|
||||
|
||||
if (instr->isMUBUF() || instr->isMTBUF()) {
|
||||
uint32_t offset = instr->isMUBUF() ? instr->mubuf().offset : instr->mtbuf().offset;
|
||||
if (offset & 6)
|
||||
Builder(program, &new_instructions).sopp(aco_opcode::s_nop, -1, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Ctx>
|
||||
|
|
|
@ -1615,6 +1615,8 @@ bool needs_exec_mask(const Instruction* instr);
|
|||
|
||||
uint32_t get_reduction_identity(ReduceOp op, unsigned idx);
|
||||
|
||||
unsigned get_mimg_nsa_dwords(const Instruction *instr);
|
||||
|
||||
enum block_kind {
|
||||
/* uniform indicates that leaving this block,
|
||||
* all actives lanes stay active */
|
||||
|
|
Loading…
Reference in New Issue