aco/ra: remove gfx6/7 subdword paths

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28836>
This commit is contained in:
Georg Lehmann 2024-04-20 09:24:29 +02:00 committed by Marge Bot
parent d914ff3aa5
commit 6ecbda83f8
4 changed files with 9 additions and 60 deletions

View File

@ -498,14 +498,13 @@ unsigned
get_subdword_operand_stride(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr,
unsigned idx, RegClass rc)
{
assert(gfx_level >= GFX8);
if (instr->isPseudo()) {
/* v_readfirstlane_b32 cannot use SDWA */
if (instr->opcode == aco_opcode::p_as_uniform)
return 4;
else if (gfx_level >= GFX8)
return rc.bytes() % 2 == 0 ? 2 : 1;
else
return 4;
return rc.bytes() % 2 == 0 ? 2 : 1;
}
assert(rc.bytes() <= 2);
@ -608,13 +607,13 @@ get_subdword_definition_info(Program* program, const aco_ptr<Instruction>& instr
{
amd_gfx_level gfx_level = program->gfx_level;
assert(gfx_level >= GFX8);
if (instr->isPseudo()) {
if (instr->opcode == aco_opcode::p_interp_gfx11)
return std::make_pair(4u, 4u);
else if (gfx_level >= GFX8)
return std::make_pair(rc.bytes() % 2 == 0 ? 2 : 1, rc.bytes());
else
return std::make_pair(4, rc.size() * 4u);
return std::make_pair(rc.bytes() % 2 == 0 ? 2 : 1, rc.bytes());
}
if (instr->isVALU()) {
@ -2050,16 +2049,12 @@ handle_pseudo(ra_ctx& ctx, const RegisterFile& reg_file, Instruction* instr)
}
/* if all operands are constant, no need to care either */
bool reads_linear = false;
bool reads_subdword = false;
for (Operand& op : instr->operands) {
if (op.isTemp() && op.getTemp().regClass().is_linear())
reads_linear = true;
if (op.isTemp() && op.regClass().is_subdword())
reads_subdword = true;
}
bool needs_scratch_reg = (writes_linear && reads_linear && reg_file[scc]) ||
(ctx.program->gfx_level <= GFX7 && reads_subdword);
if (!needs_scratch_reg)
if (!writes_linear || !reads_linear || !reg_file[scc])
return;
instr->pseudo().needs_scratch_reg = true;
@ -2072,10 +2067,6 @@ handle_pseudo(ra_ctx& ctx, const RegisterFile& reg_file, Instruction* instr)
reg = ctx.max_used_sgpr + 1;
for (; reg < ctx.program->max_reg_demand.sgpr && reg_file[PhysReg{(unsigned)reg}]; reg++)
;
if (reg == ctx.program->max_reg_demand.sgpr) {
assert(reads_subdword && reg_file[m0] == 0);
reg = m0;
}
}
adjust_max_used_regs(ctx, s1, reg);

View File

@ -238,7 +238,7 @@ finish_lower_subdword_test()
}
void
finish_ra_test(ra_test_policy policy, bool lower)
finish_ra_test(ra_test_policy policy)
{
finish_program(program.get());
if (!aco::validate_ir(program.get())) {
@ -255,11 +255,6 @@ finish_ra_test(ra_test_policy policy, bool lower)
return;
}
if (lower) {
aco::ssa_elimination(program.get());
aco::lower_to_hw_instr(program.get());
}
aco_print_program(program.get(), output);
}

View File

@ -71,7 +71,7 @@ void finish_validator_test();
void finish_opt_test();
void finish_setup_reduce_temp_test();
void finish_lower_subdword_test();
void finish_ra_test(aco::ra_test_policy, bool lower = false);
void finish_ra_test(aco::ra_test_policy);
void finish_optimizer_postRA_test();
void finish_to_hw_instr_test();
void finish_schedule_vopd_test();

View File

@ -172,43 +172,6 @@ BEGIN_TEST(regalloc.precolor.different_regs)
finish_ra_test(ra_test_policy());
END_TEST
BEGIN_TEST(regalloc.scratch_sgpr.create_vector)
if (!setup_cs("v1 s1", GFX7))
return;
Temp tmp = bld.pseudo(aco_opcode::p_extract_vector, bld.def(v1b), inputs[0], Operand::zero());
//>> v3b: %0:v[0][0:24] = v_and_b32 0xffffff, %0:v[0][0:24]
//! s1: %0:s[1] = s_mov_b32 0x1000001
//! v1: %0:v[0] = v_mul_lo_u32 %0:s[1], %_:v[0][0:8]
bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), Operand(v3b), Operand(tmp));
//! p_unit_test %_:s[0]
//! s_endpgm
bld.pseudo(aco_opcode::p_unit_test, inputs[1]);
finish_ra_test(ra_test_policy(), true);
END_TEST
BEGIN_TEST(regalloc.scratch_sgpr.create_vector_sgpr_operand)
if (!setup_cs("v2 s1", GFX7))
return;
Temp tmp = bld.pseudo(aco_opcode::p_extract_vector, bld.def(v1b), inputs[0], Operand::c32(4u));
//>> v1: %0:v[0] = v_mov_b32 %_:s[0]
//! v3b: %0:v[1][0:24] = v_and_b32 0xffffff, %0:v[1][0:24]
//! s1: %0:s[1] = s_mov_b32 0x1000001
//! v1: %0:v[1] = v_mul_lo_u32 %0:s[1], %_:v[1][0:8]
bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), inputs[1], Operand(v3b), Operand(tmp));
//! p_unit_test %_:s[0]
//! s_endpgm
bld.pseudo(aco_opcode::p_unit_test, inputs[1]);
finish_ra_test(ra_test_policy(), true);
END_TEST
BEGIN_TEST(regalloc.branch_def_phis_at_merge_block)
//>> p_startpgm
if (!setup_cs("", GFX10))