mirror of https://gitlab.freedesktop.org/mesa/mesa
aco: add wait_imm::unpack and wait_imm::max
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28981>
This commit is contained in:
parent
c894c9ab1b
commit
75532d8687
|
@ -898,14 +898,14 @@ handle_instruction_gfx10(State& state, NOP_ctx_gfx10& ctx, aco_ptr<Instruction>&
|
|||
if (instr->isFlat() || instr->isDS())
|
||||
mark_read_regs_exec(state, instr, ctx.sgprs_read_by_DS);
|
||||
} else if (instr->isSALU() || instr->isSMEM()) {
|
||||
if (instr->opcode == aco_opcode::s_waitcnt) {
|
||||
wait_imm imm(state.program->gfx_level, instr->salu().imm);
|
||||
wait_imm imm;
|
||||
if (imm.unpack(state.program->gfx_level, instr.get())) {
|
||||
if (imm.vm == 0)
|
||||
ctx.sgprs_read_by_VMEM.reset();
|
||||
if (imm.lgkm == 0)
|
||||
ctx.sgprs_read_by_DS.reset();
|
||||
} else if (instr->opcode == aco_opcode::s_waitcnt_vscnt && instr->salu().imm == 0) {
|
||||
ctx.sgprs_read_by_VMEM_store.reset();
|
||||
if (imm.vs == 0)
|
||||
ctx.sgprs_read_by_VMEM_store.reset();
|
||||
} else if (vm_vsrc == 0) {
|
||||
ctx.sgprs_read_by_VMEM.reset();
|
||||
ctx.sgprs_read_by_DS.reset();
|
||||
|
@ -981,15 +981,10 @@ handle_instruction_gfx10(State& state, NOP_ctx_gfx10& ctx, aco_ptr<Instruction>&
|
|||
bld.sop1(aco_opcode::s_mov_b32, Definition(sgpr_null, s1), Operand::zero());
|
||||
}
|
||||
} else if (instr->isSALU()) {
|
||||
/* Reducing lgkmcnt count to 0 always mitigates the hazard. */
|
||||
if (instr->opcode == aco_opcode::s_waitcnt_lgkmcnt) {
|
||||
const SALU_instruction& sopk = instr->salu();
|
||||
if (sopk.imm == 0 && sopk.operands[0].physReg() == sgpr_null)
|
||||
ctx.sgprs_read_by_SMEM.reset();
|
||||
} else if (instr->opcode == aco_opcode::s_waitcnt) {
|
||||
wait_imm imm(state.program->gfx_level, instr->salu().imm);
|
||||
if (imm.lgkm == 0)
|
||||
ctx.sgprs_read_by_SMEM.reset();
|
||||
wait_imm imm;
|
||||
if (imm.unpack(state.program->gfx_level, instr.get()) && imm.lgkm == 0) {
|
||||
/* Reducing lgkmcnt count to 0 always mitigates the hazard. */
|
||||
ctx.sgprs_read_by_SMEM.reset();
|
||||
} else if (instr->format != Format::SOPP && instr->definitions.size()) {
|
||||
/* SALU can mitigate the hazard */
|
||||
ctx.sgprs_read_by_SMEM.reset();
|
||||
|
@ -1515,18 +1510,18 @@ handle_instruction_gfx11(State& state, NOP_ctx_gfx11& ctx, aco_ptr<Instruction>&
|
|||
for (Operand& op : instr->operands)
|
||||
fill_vgpr_bitset(ctx.vgpr_used_by_ds, op.physReg(), op.bytes());
|
||||
}
|
||||
wait_imm imm;
|
||||
if (instr->isVALU() || instr->isEXP() || vm_vsrc == 0) {
|
||||
ctx.vgpr_used_by_vmem_load.reset();
|
||||
ctx.vgpr_used_by_vmem_store.reset();
|
||||
ctx.vgpr_used_by_ds.reset();
|
||||
} else if (instr->opcode == aco_opcode::s_waitcnt) {
|
||||
wait_imm imm(GFX11, instr->salu().imm);
|
||||
} else if (imm.unpack(state.program->gfx_level, instr.get())) {
|
||||
if (imm.vm == 0)
|
||||
ctx.vgpr_used_by_vmem_load.reset();
|
||||
if (imm.lgkm == 0)
|
||||
ctx.vgpr_used_by_ds.reset();
|
||||
} else if (instr->opcode == aco_opcode::s_waitcnt_vscnt && instr->salu().imm == 0) {
|
||||
ctx.vgpr_used_by_vmem_store.reset();
|
||||
if (imm.vs == 0)
|
||||
ctx.vgpr_used_by_vmem_store.reset();
|
||||
}
|
||||
if (instr->isLDSDIR()) {
|
||||
if (ctx.vgpr_used_by_vmem_load[instr->definitions[0].physReg().reg() - 256] ||
|
||||
|
|
|
@ -228,16 +228,15 @@ struct wait_entry {
|
|||
};
|
||||
|
||||
struct target_info {
|
||||
uint16_t max_cnt[wait_type_num] = {};
|
||||
wait_imm max_cnt;
|
||||
uint32_t events[wait_type_num] = {};
|
||||
uint16_t unordered_events;
|
||||
|
||||
target_info(enum amd_gfx_level gfx_level)
|
||||
{
|
||||
max_cnt[wait_type_vm] = gfx_level >= GFX9 ? 62 : 14;
|
||||
max_cnt[wait_type_exp] = 6;
|
||||
max_cnt[wait_type_lgkm] = gfx_level >= GFX10 ? 62 : 14;
|
||||
max_cnt[wait_type_vs] = gfx_level >= GFX10 ? 62 : 0;
|
||||
max_cnt = wait_imm::max(gfx_level);
|
||||
for (unsigned i = 0; i < wait_type_num; i++)
|
||||
max_cnt[i] = max_cnt[i] ? max_cnt[i] - 1 : 0;
|
||||
|
||||
events[wait_type_exp] = event_exp_pos | event_exp_param | event_exp_mrt_null |
|
||||
event_gds_gpr_lock | event_vmem_gpr_lock | event_ldsdir;
|
||||
|
@ -402,19 +401,6 @@ check_instr(wait_ctx& ctx, wait_imm& wait, alu_delay_info& delay, Instruction* i
|
|||
}
|
||||
}
|
||||
|
||||
bool
|
||||
parse_wait_instr(wait_ctx& ctx, wait_imm& imm, Instruction* instr)
|
||||
{
|
||||
if (instr->opcode == aco_opcode::s_waitcnt_vscnt && instr->operands[0].physReg() == sgpr_null) {
|
||||
imm.vs = std::min<uint8_t>(imm.vs, instr->salu().imm);
|
||||
return true;
|
||||
} else if (instr->opcode == aco_opcode::s_waitcnt) {
|
||||
imm.combine(wait_imm(ctx.gfx_level, instr->salu().imm));
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
parse_delay_alu(wait_ctx& ctx, alu_delay_info& delay, Instruction* instr)
|
||||
{
|
||||
|
@ -962,7 +948,7 @@ handle_block(Program* program, Block& block, wait_ctx& ctx)
|
|||
for (size_t i = 0; i < block.instructions.size(); i++) {
|
||||
aco_ptr<Instruction>& instr = block.instructions[i];
|
||||
|
||||
bool is_wait = parse_wait_instr(ctx, queued_imm, instr.get());
|
||||
bool is_wait = queued_imm.unpack(ctx.gfx_level, instr.get());
|
||||
bool is_delay_alu = parse_delay_alu(ctx, queued_delay, instr.get());
|
||||
|
||||
memory_sync_info sync_info = get_sync_info(instr.get());
|
||||
|
|
|
@ -1200,32 +1200,6 @@ wait_imm::wait_imm(uint16_t vm_, uint16_t exp_, uint16_t lgkm_, uint16_t vs_)
|
|||
: exp(exp_), lgkm(lgkm_), vm(vm_), vs(vs_)
|
||||
{}
|
||||
|
||||
wait_imm::wait_imm(enum amd_gfx_level gfx_level, uint16_t packed) : vs(unset_counter)
|
||||
{
|
||||
if (gfx_level >= GFX11) {
|
||||
vm = (packed >> 10) & 0x3f;
|
||||
lgkm = (packed >> 4) & 0x3f;
|
||||
exp = packed & 0x7;
|
||||
} else {
|
||||
vm = packed & 0xf;
|
||||
if (gfx_level >= GFX9)
|
||||
vm |= (packed >> 10) & 0x30;
|
||||
|
||||
exp = (packed >> 4) & 0x7;
|
||||
|
||||
lgkm = (packed >> 8) & 0xf;
|
||||
if (gfx_level >= GFX10)
|
||||
lgkm |= (packed >> 8) & 0x30;
|
||||
}
|
||||
|
||||
if (vm == (gfx_level >= GFX9 ? 0x3f : 0xf))
|
||||
vm = wait_imm::unset_counter;
|
||||
if (exp == 0x7)
|
||||
exp = wait_imm::unset_counter;
|
||||
if (lgkm == (gfx_level >= GFX10 ? 0x3f : 0xf))
|
||||
lgkm = wait_imm::unset_counter;
|
||||
}
|
||||
|
||||
uint16_t
|
||||
wait_imm::pack(enum amd_gfx_level gfx_level) const
|
||||
{
|
||||
|
@ -1257,6 +1231,68 @@ wait_imm::pack(enum amd_gfx_level gfx_level) const
|
|||
return imm;
|
||||
}
|
||||
|
||||
wait_imm
|
||||
wait_imm::max(enum amd_gfx_level gfx_level)
|
||||
{
|
||||
wait_imm imm;
|
||||
imm.vm = gfx_level >= GFX9 ? 63 : 15;
|
||||
imm.exp = 7;
|
||||
imm.lgkm = gfx_level >= GFX10 ? 63 : 15;
|
||||
imm.vs = gfx_level >= GFX10 ? 63 : 0;
|
||||
return imm;
|
||||
}
|
||||
|
||||
bool
|
||||
wait_imm::unpack(enum amd_gfx_level gfx_level, const Instruction* instr)
|
||||
{
|
||||
if (!instr->isSALU() || (!instr->operands.empty() && instr->operands[0].physReg() != sgpr_null))
|
||||
return false;
|
||||
|
||||
aco_opcode op = instr->opcode;
|
||||
uint16_t packed = instr->salu().imm;
|
||||
|
||||
if (op == aco_opcode::s_waitcnt_expcnt) {
|
||||
exp = std::min<uint8_t>(exp, packed);
|
||||
} else if (op == aco_opcode::s_waitcnt_lgkmcnt) {
|
||||
lgkm = std::min<uint8_t>(lgkm, packed);
|
||||
} else if (op == aco_opcode::s_waitcnt_vmcnt) {
|
||||
vm = std::min<uint8_t>(vm, packed);
|
||||
} else if (op == aco_opcode::s_waitcnt_vscnt) {
|
||||
vs = std::min<uint8_t>(vs, packed);
|
||||
} else if (op == aco_opcode::s_waitcnt) {
|
||||
uint8_t vm2, lgkm2, exp2;
|
||||
if (gfx_level >= GFX11) {
|
||||
vm2 = (packed >> 10) & 0x3f;
|
||||
lgkm2 = (packed >> 4) & 0x3f;
|
||||
exp2 = packed & 0x7;
|
||||
} else {
|
||||
vm2 = packed & 0xf;
|
||||
if (gfx_level >= GFX9)
|
||||
vm2 |= (packed >> 10) & 0x30;
|
||||
|
||||
exp2 = (packed >> 4) & 0x7;
|
||||
|
||||
lgkm2 = (packed >> 8) & 0xf;
|
||||
if (gfx_level >= GFX10)
|
||||
lgkm2 |= (packed >> 8) & 0x30;
|
||||
}
|
||||
|
||||
if (vm2 == (gfx_level >= GFX9 ? 0x3f : 0xf))
|
||||
vm2 = wait_imm::unset_counter;
|
||||
if (exp2 == 0x7)
|
||||
exp2 = wait_imm::unset_counter;
|
||||
if (lgkm2 == (gfx_level >= GFX10 ? 0x3f : 0xf))
|
||||
lgkm2 = wait_imm::unset_counter;
|
||||
|
||||
vm = std::min(vm, vm2);
|
||||
exp = std::min(exp, exp2);
|
||||
lgkm = std::min(lgkm, lgkm2);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
wait_imm::combine(const wait_imm& other)
|
||||
{
|
||||
|
|
|
@ -182,6 +182,8 @@ enum wait_type {
|
|||
wait_type_num = 4,
|
||||
};
|
||||
|
||||
struct Instruction;
|
||||
|
||||
struct wait_imm {
|
||||
static const uint8_t unset_counter = 0xff;
|
||||
|
||||
|
@ -192,10 +194,13 @@ struct wait_imm {
|
|||
|
||||
wait_imm();
|
||||
wait_imm(uint16_t vm_, uint16_t exp_, uint16_t lgkm_, uint16_t vs_);
|
||||
wait_imm(enum amd_gfx_level chip, uint16_t packed);
|
||||
|
||||
uint16_t pack(enum amd_gfx_level chip) const;
|
||||
|
||||
static wait_imm max(enum amd_gfx_level gfx_level);
|
||||
|
||||
bool unpack(enum amd_gfx_level gfx_level, const Instruction* instr);
|
||||
|
||||
bool combine(const wait_imm& other);
|
||||
|
||||
bool empty() const;
|
||||
|
|
|
@ -275,13 +275,17 @@ print_instr_format_specific(enum amd_gfx_level gfx_level, const Instruction* ins
|
|||
uint16_t imm = instr->salu().imm;
|
||||
switch (instr->opcode) {
|
||||
case aco_opcode::s_waitcnt: {
|
||||
wait_imm unpacked(gfx_level, imm);
|
||||
if (unpacked.vm != wait_imm::unset_counter)
|
||||
fprintf(output, " vmcnt(%d)", unpacked.vm);
|
||||
if (unpacked.exp != wait_imm::unset_counter)
|
||||
fprintf(output, " expcnt(%d)", unpacked.exp);
|
||||
if (unpacked.lgkm != wait_imm::unset_counter)
|
||||
fprintf(output, " lgkmcnt(%d)", unpacked.lgkm);
|
||||
wait_imm unpacked;
|
||||
unpacked.unpack(gfx_level, instr);
|
||||
const char* names[wait_type_num];
|
||||
names[wait_type_exp] = "expcnt";
|
||||
names[wait_type_vm] = "vmcnt";
|
||||
names[wait_type_lgkm] = "lgkmcnt";
|
||||
names[wait_type_vs] = "vscnt";
|
||||
for (unsigned i = 0; i < wait_type_num; i++) {
|
||||
if (unpacked[i] != wait_imm::unset_counter)
|
||||
fprintf(output, " %s(%d)", names[i], unpacked[i]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case aco_opcode::s_waitcnt_depctr: {
|
||||
|
|
|
@ -318,28 +318,20 @@ get_wait_imm(Program* program, aco_ptr<Instruction>& instr)
|
|||
if (instr->opcode == aco_opcode::s_endpgm) {
|
||||
for (unsigned i = 0; i < wait_type_num; i++)
|
||||
imm[i] = 0;
|
||||
} else if (instr->opcode == aco_opcode::s_waitcnt) {
|
||||
return wait_imm(program->gfx_level, instr->salu().imm);
|
||||
} else if (instr->opcode == aco_opcode::s_waitcnt_vscnt) {
|
||||
imm.vs = instr->salu().imm;
|
||||
return imm;
|
||||
} else if (imm.unpack(program->gfx_level, instr.get())) {
|
||||
} else if (instr->isVINTERP_INREG()) {
|
||||
imm.exp = instr->vinterp_inreg().wait_exp;
|
||||
if (imm.exp == 0x7)
|
||||
imm.exp = wait_imm::unset_counter;
|
||||
return imm;
|
||||
} else {
|
||||
unsigned max_lgkm_cnt = program->gfx_level >= GFX10 ? 62 : 14;
|
||||
unsigned max_exp_cnt = 6;
|
||||
unsigned max_vm_cnt = program->gfx_level >= GFX9 ? 62 : 14;
|
||||
unsigned max_vs_cnt = 62;
|
||||
|
||||
/* If an instruction increases a counter, it waits for it to be below maximum first. */
|
||||
std::array<unsigned, wait_type_num> wait_info =
|
||||
get_wait_counter_info(program->gfx_level, instr);
|
||||
imm.lgkm = wait_info[wait_type_lgkm] ? max_lgkm_cnt : wait_imm::unset_counter;
|
||||
imm.exp = wait_info[wait_type_exp] ? max_exp_cnt : wait_imm::unset_counter;
|
||||
imm.vm = wait_info[wait_type_vm] ? max_vm_cnt : wait_imm::unset_counter;
|
||||
imm.vs = wait_info[wait_type_vs] ? max_vs_cnt : wait_imm::unset_counter;
|
||||
wait_imm max = wait_imm::max(program->gfx_level);
|
||||
for (unsigned i = 0; i < wait_type_num; i++) {
|
||||
if (wait_info[i])
|
||||
imm[i] = max[i] - 1;
|
||||
}
|
||||
}
|
||||
return imm;
|
||||
}
|
||||
|
|
|
@ -90,7 +90,7 @@ BEGIN_TEST(insert_waitcnt.clause)
|
|||
Operand::zero());
|
||||
//! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0
|
||||
bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc0, op_v0, Operand::zero(), 0, false);
|
||||
//! s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
//! s_waitcnt lgkmcnt(0) vmcnt(0)
|
||||
//! v1: %0:v[5] = buffer_load_dword %0:s[4-7], %0:v[4], 0
|
||||
bld.mubuf(aco_opcode::buffer_load_dword, def_v5, Operand(PhysReg(4), s4), op_v4, Operand::zero(),
|
||||
0, false);
|
||||
|
|
Loading…
Reference in New Issue