From 0447a2303fb06d6ad1f64e5f079a74bf2cf540da Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Wed, 2 Feb 2022 16:42:24 +0000 Subject: [PATCH] aco: don't encode src2 for v_writelane_b32_e64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Encoding src2 doesn't cause issues for print_asm() because we have a workaround there, but it does for RGP and it seems the developers are not interested in fixing it. https://github.com/GPUOpen-Tools/radeon_gpu_profiler/issues/61 Signed-off-by: Rhys Perry Tested-by: Tatsuyuki Ishi Reviewed-by: Daniel Schürmann Cc: mesa-stable Part-of: --- src/amd/compiler/aco_assembler.cpp | 4 ++++ src/amd/compiler/aco_print_asm.cpp | 6 ------ 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp index 4c404bab7e8..3095f0bda89 100644 --- a/src/amd/compiler/aco_assembler.cpp +++ b/src/amd/compiler/aco_assembler.cpp @@ -625,6 +625,10 @@ emit_instruction(asm_context& ctx, std::vector& out, Instruction* inst encoding = 0; if (instr->opcode == aco_opcode::v_interp_mov_f32) { encoding = 0x3 & instr->operands[0].constantValue(); + } else if (instr->opcode == aco_opcode::v_writelane_b32_e64) { + encoding |= instr->operands[0].physReg() << 0; + encoding |= instr->operands[1].physReg() << 9; + /* Encoding src2 works fine with hardware but breaks some disassemblers. */ } else { for (unsigned i = 0; i < instr->operands.size(); i++) encoding |= instr->operands[i].physReg() << (i * 9); diff --git a/src/amd/compiler/aco_print_asm.cpp b/src/amd/compiler/aco_print_asm.cpp index a2e88b7a6f7..673f41578b8 100644 --- a/src/amd/compiler/aco_print_asm.cpp +++ b/src/amd/compiler/aco_print_asm.cpp @@ -271,12 +271,6 @@ std::pair disasm_instr(chip_class chip, LLVMDisasmContextRef disasm, uint32_t* binary, unsigned exec_size, size_t pos, char* outline, unsigned outline_size) { - /* mask out src2 on v_writelane_b32 */ - if (((chip == GFX8 || chip == GFX9) && (binary[pos] & 0xffff8000) == 0xd28a0000) || - (chip >= GFX10 && (binary[pos] & 0xffff8000) == 0xd7610000)) { - binary[pos + 1] = binary[pos + 1] & 0xF803FFFF; - } - size_t l = LLVMDisasmInstruction(disasm, (uint8_t*)&binary[pos], (exec_size - pos) * sizeof(uint32_t), pos * 4, outline, outline_size);