aco/ra: create VCC-affinities during RA

instead of using register hints.

Totals from 88367 (65.50% of 134913) affected shaders: (GFX10.3)
CodeSize: 322492184 -> 322252912 (-0.07%); split: -0.08%, +0.01%
Instrs: 60615809 -> 60541260 (-0.12%); split: -0.12%, +0.00%
Latency: 557067980 -> 557009210 (-0.01%); split: -0.01%, +0.00%
InvThroughput: 109676757 -> 109674804 (-0.00%); split: -0.00%, +0.00%
SClause: 1939703 -> 1939924 (+0.01%); split: -0.01%, +0.02%
Copies: 4557567 -> 4487530 (-1.54%); split: -1.54%, +0.00%
Branches: 1941123 -> 1937453 (-0.19%)

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15408>
This commit is contained in:
Daniel Schürmann 2022-03-15 12:15:44 +01:00 committed by Marge Bot
parent 44fb9ba84a
commit 6ebc61d71b
1 changed files with 31 additions and 10 deletions

View File

@ -48,7 +48,13 @@ void add_subdword_definition(Program* program, aco_ptr<Instruction>& instr, Phys
struct assignment {
PhysReg reg;
RegClass rc;
bool assigned = false;
union {
struct {
bool assigned : 1;
bool vcc : 1;
};
uint8_t _ = 0;
};
uint32_t affinity = 0;
assignment() = default;
assignment(PhysReg reg_, RegClass rc_) : reg(reg_), rc(rc_), assigned(-1) {}
@ -1624,6 +1630,10 @@ get_reg(ra_ctx& ctx, RegisterFile& reg_file, Temp temp,
return affinity.reg;
}
}
if (ctx.assignments[temp.id()].vcc) {
if (get_reg_specified(ctx, reg_file, temp.regClass(), instr, vcc))
return vcc;
}
std::pair<PhysReg, bool> res;
@ -2380,11 +2390,26 @@ get_affinities(ra_ctx& ctx, std::vector<IDSet>& live_out_per_block)
} else if (instr->format == Format::MIMG && instr->operands.size() > 4) {
for (unsigned i = 3; i < instr->operands.size(); i++)
ctx.vectors[instr->operands[i].tempId()] = instr.get();
}
if (instr->opcode == aco_opcode::p_split_vector &&
instr->operands[0].isFirstKillBeforeDef())
} else if (instr->opcode == aco_opcode::p_split_vector &&
instr->operands[0].isFirstKillBeforeDef()) {
ctx.split_vectors[instr->operands[0].tempId()] = instr.get();
} else if (instr->isVOPC() && !instr->isVOP3()) {
ctx.assignments[instr->definitions[0].tempId()].vcc = true;
} else if (instr->isVOP2() && !instr->isVOP3()) {
if (instr->operands.size() == 3 && instr->operands[2].isTemp() &&
instr->operands[2].regClass().type() == RegType::sgpr)
ctx.assignments[instr->operands[2].tempId()].vcc = true;
if (instr->definitions.size() == 2)
ctx.assignments[instr->definitions[1].tempId()].vcc = true;
} else if (instr->opcode == aco_opcode::s_and_b32 ||
instr->opcode == aco_opcode::s_and_b64) {
/* If SCC is used by a branch, we might be able to use
* s_cbranch_vccz/s_cbranch_vccnz if the operand is VCC.
*/
if (!instr->definitions[1].isKill() && instr->operands[0].isTemp() &&
instr->operands[1].isFixed() && instr->operands[1].physReg() == exec)
ctx.assignments[instr->operands[0].tempId()].vcc = true;
}
/* add operands to live variables */
for (const Operand& op : instr->operands) {
@ -2760,11 +2785,7 @@ register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, ra
continue;
/* find free reg */
if (definition->hasHint() &&
get_reg_specified(ctx, register_file, definition->regClass(), instr,
definition->physReg())) {
definition->setFixed(definition->physReg());
} else if (instr->opcode == aco_opcode::p_split_vector) {
if (instr->opcode == aco_opcode::p_split_vector) {
PhysReg reg = instr->operands[0].physReg();
for (unsigned j = 0; j < i; j++)
reg.reg_b += instr->definitions[j].bytes();