aco: create long jumps
When the branch offset can't be encoded, we have to use s_setpc_b64. Fixes hang in RPCS3 vertex ubershader. Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/3231 Cc: 20.2 <mesa-stable> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6212>
This commit is contained in:
parent
156fd58cda
commit
fe2dc41258
|
@ -2,6 +2,7 @@
|
|||
#include <algorithm>
|
||||
|
||||
#include "aco_ir.h"
|
||||
#include "aco_builder.h"
|
||||
#include "common/sid.h"
|
||||
#include "ac_shader_util.h"
|
||||
#include "util/u_math.h"
|
||||
|
@ -157,8 +158,10 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
|
|||
uint32_t encoding = (0b101111111 << 23);
|
||||
encoding |= opcode << 16;
|
||||
encoding |= (uint16_t) sopp->imm;
|
||||
if (sopp->block != -1)
|
||||
if (sopp->block != -1) {
|
||||
sopp->pass_flags = 0;
|
||||
ctx.branches.emplace_back(out.size(), sopp);
|
||||
}
|
||||
out.push_back(encoding);
|
||||
break;
|
||||
}
|
||||
|
@ -752,6 +755,36 @@ void fix_exports(asm_context& ctx, std::vector<uint32_t>& out, Program* program)
|
|||
}
|
||||
}
|
||||
|
||||
static void insert_code(asm_context& ctx, std::vector<uint32_t>& out, unsigned insert_before,
|
||||
unsigned insert_count, const uint32_t *insert_data)
|
||||
{
|
||||
out.insert(out.begin() + insert_before, insert_data, insert_data + insert_count);
|
||||
|
||||
/* Update the offset of each affected block */
|
||||
for (Block& block : ctx.program->blocks) {
|
||||
if (block.offset >= insert_before)
|
||||
block.offset += insert_count;
|
||||
}
|
||||
|
||||
/* Find first branch after the inserted code */
|
||||
auto branch_it = std::find_if(ctx.branches.begin(), ctx.branches.end(), [insert_before](const auto &branch) -> bool {
|
||||
return (unsigned)branch.first >= insert_before;
|
||||
});
|
||||
|
||||
/* Update the locations of branches */
|
||||
for (; branch_it != ctx.branches.end(); ++branch_it)
|
||||
branch_it->first += insert_count;
|
||||
|
||||
/* Find first constant address after the inserted code */
|
||||
auto caddr_it = std::find_if(ctx.constaddrs.begin(), ctx.constaddrs.end(), [insert_before](const int &caddr_pos) -> bool {
|
||||
return (unsigned)caddr_pos >= insert_before;
|
||||
});
|
||||
|
||||
/* Update the locations of constant addresses */
|
||||
for (; caddr_it != ctx.constaddrs.end(); ++caddr_it)
|
||||
(*caddr_it) += insert_count;
|
||||
}
|
||||
|
||||
static void fix_branches_gfx10(asm_context& ctx, std::vector<uint32_t>& out)
|
||||
{
|
||||
/* Branches with an offset of 0x3f are buggy on GFX10, we workaround by inserting NOPs if needed. */
|
||||
|
@ -767,42 +800,107 @@ static void fix_branches_gfx10(asm_context& ctx, std::vector<uint32_t>& out)
|
|||
if (gfx10_3f_bug) {
|
||||
/* Insert an s_nop after the branch */
|
||||
constexpr uint32_t s_nop_0 = 0xbf800000u;
|
||||
int s_nop_pos = buggy_branch_it->first + 1;
|
||||
auto out_pos = std::next(out.begin(), s_nop_pos);
|
||||
out.insert(out_pos, s_nop_0);
|
||||
|
||||
/* Update the offset of each affected block */
|
||||
for (Block& block : ctx.program->blocks) {
|
||||
if (block.offset > (unsigned)buggy_branch_it->first)
|
||||
block.offset++;
|
||||
}
|
||||
|
||||
/* Update the branches following the current one */
|
||||
for (auto branch_it = std::next(buggy_branch_it); branch_it != ctx.branches.end(); ++branch_it)
|
||||
branch_it->first++;
|
||||
|
||||
/* Find first constant address after the inserted instruction */
|
||||
auto caddr_it = std::find_if(ctx.constaddrs.begin(), ctx.constaddrs.end(), [s_nop_pos](const int &caddr_pos) -> bool {
|
||||
return caddr_pos >= s_nop_pos;
|
||||
});
|
||||
|
||||
/* Update the locations of constant addresses */
|
||||
for (; caddr_it != ctx.constaddrs.end(); ++caddr_it)
|
||||
(*caddr_it)++;
|
||||
|
||||
insert_code(ctx, out, buggy_branch_it->first + 1, 1, &s_nop_0);
|
||||
}
|
||||
} while (gfx10_3f_bug);
|
||||
}
|
||||
|
||||
void emit_long_jump(asm_context& ctx, SOPP_instruction *branch, bool backwards, std::vector<uint32_t>& out)
|
||||
{
|
||||
Builder bld(ctx.program);
|
||||
|
||||
Definition def_tmp_lo(branch->definitions[0].physReg(), s1);
|
||||
Operand op_tmp_lo(branch->definitions[0].physReg(), s1);
|
||||
Definition def_tmp_hi(branch->definitions[0].physReg().advance(4), s1);
|
||||
Operand op_tmp_hi(branch->definitions[0].physReg().advance(4), s1);
|
||||
|
||||
aco_ptr<Instruction> instr;
|
||||
|
||||
if (branch->opcode != aco_opcode::s_branch) {
|
||||
/* for conditional branches, skip the long jump if the condition is false */
|
||||
aco_opcode inv;
|
||||
switch (branch->opcode) {
|
||||
case aco_opcode::s_cbranch_scc0:
|
||||
inv = aco_opcode::s_cbranch_scc1;
|
||||
break;
|
||||
case aco_opcode::s_cbranch_scc1:
|
||||
inv = aco_opcode::s_cbranch_scc0;
|
||||
break;
|
||||
case aco_opcode::s_cbranch_vccz:
|
||||
inv = aco_opcode::s_cbranch_vccnz;
|
||||
break;
|
||||
case aco_opcode::s_cbranch_vccnz:
|
||||
inv = aco_opcode::s_cbranch_vccz;
|
||||
break;
|
||||
case aco_opcode::s_cbranch_execz:
|
||||
inv = aco_opcode::s_cbranch_execnz;
|
||||
break;
|
||||
case aco_opcode::s_cbranch_execnz:
|
||||
inv = aco_opcode::s_cbranch_execz;
|
||||
break;
|
||||
default:
|
||||
unreachable("Unhandled long jump.");
|
||||
}
|
||||
instr.reset(bld.sopp(inv, -1, 7));
|
||||
emit_instruction(ctx, out, instr.get());
|
||||
}
|
||||
|
||||
/* create the new PC and stash SCC in the LSB */
|
||||
instr.reset(bld.sop1(aco_opcode::s_getpc_b64, branch->definitions[0]).instr);
|
||||
emit_instruction(ctx, out, instr.get());
|
||||
|
||||
instr.reset(bld.sop2(aco_opcode::s_addc_u32, def_tmp_lo, op_tmp_lo, Operand(0u)).instr);
|
||||
instr->operands[1].setFixed(PhysReg{255}); /* this operand has to be a literal */
|
||||
emit_instruction(ctx, out, instr.get());
|
||||
branch->pass_flags = out.size();
|
||||
|
||||
instr.reset(bld.sop2(aco_opcode::s_addc_u32, def_tmp_hi, op_tmp_hi, Operand(backwards ? UINT32_MAX : 0u)).instr);
|
||||
emit_instruction(ctx, out, instr.get());
|
||||
|
||||
/* restore SCC and clear the LSB of the new PC */
|
||||
instr.reset(bld.sopc(aco_opcode::s_bitcmp1_b32, def_tmp_lo, op_tmp_lo, Operand(0u)).instr);
|
||||
emit_instruction(ctx, out, instr.get());
|
||||
instr.reset(bld.sop1(aco_opcode::s_bitset0_b32, def_tmp_lo, Operand(0u)).instr);
|
||||
emit_instruction(ctx, out, instr.get());
|
||||
|
||||
/* create the s_setpc_b64 to jump */
|
||||
instr.reset(bld.sop1(aco_opcode::s_setpc_b64, Operand(branch->definitions[0].physReg(), s2)).instr);
|
||||
emit_instruction(ctx, out, instr.get());
|
||||
}
|
||||
|
||||
void fix_branches(asm_context& ctx, std::vector<uint32_t>& out)
|
||||
{
|
||||
if (ctx.chip_class == GFX10)
|
||||
fix_branches_gfx10(ctx, out);
|
||||
bool repeat = false;
|
||||
do {
|
||||
repeat = false;
|
||||
|
||||
for (std::pair<int, SOPP_instruction*> &branch : ctx.branches) {
|
||||
int offset = (int)ctx.program->blocks[branch.second->block].offset - branch.first - 1;
|
||||
out[branch.first] |= (uint16_t) offset;
|
||||
}
|
||||
if (ctx.chip_class == GFX10)
|
||||
fix_branches_gfx10(ctx, out);
|
||||
|
||||
for (std::pair<int, SOPP_instruction*> &branch : ctx.branches) {
|
||||
int offset = (int)ctx.program->blocks[branch.second->block].offset - branch.first - 1;
|
||||
if ((offset < INT16_MIN || offset > INT16_MAX) && !branch.second->pass_flags) {
|
||||
std::vector<uint32_t> long_jump;
|
||||
bool backwards = ctx.program->blocks[branch.second->block].offset < (unsigned)branch.first;
|
||||
emit_long_jump(ctx, branch.second, backwards, long_jump);
|
||||
|
||||
out[branch.first] = long_jump[0];
|
||||
insert_code(ctx, out, branch.first + 1, long_jump.size() - 1, long_jump.data() + 1);
|
||||
|
||||
repeat = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (branch.second->pass_flags) {
|
||||
int after_getpc = branch.first + branch.second->pass_flags - 2;
|
||||
offset = (int)ctx.program->blocks[branch.second->block].offset - after_getpc;
|
||||
out[branch.first + branch.second->pass_flags - 1] = offset * 4;
|
||||
} else {
|
||||
out[branch.first] &= 0xffff0000u;
|
||||
out[branch.first] |= (uint16_t) offset;
|
||||
}
|
||||
}
|
||||
} while (repeat);
|
||||
}
|
||||
|
||||
void fix_constaddrs(asm_context& ctx, std::vector<uint32_t>& out)
|
||||
|
|
|
@ -534,7 +534,7 @@ public:
|
|||
<%
|
||||
import itertools
|
||||
formats = [("pseudo", [Format.PSEUDO], 'Pseudo_instruction', list(itertools.product(range(5), range(5))) + [(8, 1), (1, 8)]),
|
||||
("sop1", [Format.SOP1], 'SOP1_instruction', [(1, 1), (2, 1), (3, 2)]),
|
||||
("sop1", [Format.SOP1], 'SOP1_instruction', [(0, 1), (1, 0), (1, 1), (2, 1), (3, 2)]),
|
||||
("sop2", [Format.SOP2], 'SOP2_instruction', itertools.product([1, 2], [2, 3])),
|
||||
("sopk", [Format.SOPK], 'SOPK_instruction', itertools.product([0, 1, 2], [0, 1])),
|
||||
("sopp", [Format.SOPP], 'SOPP_instruction', itertools.product([0, 1], [0, 1])),
|
||||
|
|
Loading…
Reference in New Issue