aco/spill: refactor live-in registerDemand calculation

This also fixes some hypothetical issue for loops without phis
and for loops with higher register pressure at the end of the
loop preheader.

No fossil-db changes.

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9196>
This commit is contained in:
Daniel Schürmann 2021-02-22 18:02:24 +01:00 committed by Marge Bot
parent 282eacc3e0
commit b56ea19111
1 changed files with 56 additions and 75 deletions

View File

@ -360,6 +360,46 @@ std::vector<std::map<Temp, uint32_t>> local_next_uses(spill_ctx& ctx, Block* blo
return local_next_uses;
}
RegisterDemand get_demand_before(spill_ctx& ctx, unsigned block_idx, unsigned idx)
{
if (idx == 0) {
RegisterDemand demand = ctx.register_demand[block_idx][idx];
aco_ptr<Instruction>& instr = ctx.program->blocks[block_idx].instructions[idx];
aco_ptr<Instruction> instr_before(nullptr);
return get_demand_before(demand, instr, instr_before);
} else {
return ctx.register_demand[block_idx][idx - 1];
}
}
RegisterDemand get_live_in_demand(spill_ctx& ctx, unsigned block_idx)
{
unsigned idx = 0;
RegisterDemand reg_pressure = RegisterDemand();
Block& block = ctx.program->blocks[block_idx];
for (aco_ptr<Instruction>& phi : block.instructions) {
if (!is_phi(phi))
break;
idx++;
/* Killed phi definitions increase pressure in the predecessor but not
* the block they're in. Since the loops below are both to control
* pressure of the start of this block and the ends of it's
* predecessors, we need to count killed unspilled phi definitions here. */
if (phi->definitions[0].isTemp() && phi->definitions[0].isKill() &&
!ctx.spills_entry[block_idx].count(phi->definitions[0].getTemp()))
reg_pressure += phi->definitions[0].getTemp();
}
reg_pressure += get_demand_before(ctx, block_idx, idx);
/* Consider register pressure from linear predecessors. This can affect
* reg_pressure if the branch instructions define sgprs. */
for (unsigned pred : block.linear_preds)
reg_pressure.sgpr = std::max<int16_t>(reg_pressure.sgpr, ctx.register_demand[pred].back().sgpr);
return reg_pressure;
}
RegisterDemand init_live_in_vars(spill_ctx& ctx, Block* block, unsigned block_idx)
{
@ -378,12 +418,12 @@ RegisterDemand init_live_in_vars(spill_ctx& ctx, Block* block, unsigned block_id
ctx.loop_header.emplace(block);
/* check how many live-through variables should be spilled */
RegisterDemand new_demand;
RegisterDemand reg_pressure = get_live_in_demand(ctx, block_idx);
RegisterDemand loop_demand = reg_pressure;
unsigned i = block_idx;
while (ctx.program->blocks[i].loop_nest_depth >= block->loop_nest_depth) {
assert(ctx.program->blocks.size() > i);
new_demand.update(ctx.program->blocks[i].register_demand);
i++;
loop_demand.update(ctx.program->blocks[i++].register_demand);
}
unsigned loop_end = i;
@ -399,17 +439,18 @@ RegisterDemand init_live_in_vars(spill_ctx& ctx, Block* block, unsigned block_id
if (it->second.first >= loop_end || ctx.remat.count(spilled.first)) {
ctx.spills_entry[block_idx][spilled.first] = spilled.second;
spilled_registers += spilled.first;
loop_demand -= spilled.first;
}
}
/* select live-through variables and constants */
RegType type = RegType::vgpr;
while ((new_demand - spilled_registers).exceeds(ctx.target_pressure)) {
while (loop_demand.exceeds(ctx.target_pressure)) {
/* if VGPR demand is low enough, select SGPRs */
if (type == RegType::vgpr && new_demand.vgpr - spilled_registers.vgpr <= ctx.target_pressure.vgpr)
if (type == RegType::vgpr && loop_demand.vgpr <= ctx.target_pressure.vgpr)
type = RegType::sgpr;
/* if SGPR demand is low enough, break */
if (type == RegType::sgpr && new_demand.sgpr - spilled_registers.sgpr <= ctx.target_pressure.sgpr)
if (type == RegType::sgpr && loop_demand.sgpr <= ctx.target_pressure.sgpr)
break;
unsigned distance = 0;
@ -441,26 +482,15 @@ RegisterDemand init_live_in_vars(spill_ctx& ctx, Block* block, unsigned block_id
ctx.spills_entry[block_idx][to_spill] = spill_id;
spilled_registers += to_spill;
loop_demand -= to_spill;
}
/* shortcut */
if (!(new_demand - spilled_registers).exceeds(ctx.target_pressure))
if (!loop_demand.exceeds(ctx.target_pressure))
return spilled_registers;
/* if reg pressure is too high at beginning of loop, add variables with furthest use */
unsigned idx = 0;
while (block->instructions[idx]->opcode == aco_opcode::p_phi || block->instructions[idx]->opcode == aco_opcode::p_linear_phi)
idx++;
assert(idx != 0 && "loop without phis: TODO");
idx--;
RegisterDemand reg_pressure = ctx.register_demand[block_idx][idx] - spilled_registers;
/* Consider register pressure from linear predecessors. This can affect
* reg_pressure if the branch instructions define sgprs. */
for (unsigned pred : block->linear_preds) {
reg_pressure.sgpr = std::max<int16_t>(
reg_pressure.sgpr, ctx.register_demand[pred].back().sgpr - spilled_registers.sgpr);
}
reg_pressure -= spilled_registers;
while (reg_pressure.exceeds(ctx.target_pressure)) {
unsigned distance = 0;
@ -571,10 +601,9 @@ RegisterDemand init_live_in_vars(spill_ctx& ctx, Block* block, unsigned block_id
}
/* same for phis */
unsigned idx = 0;
while (block->instructions[idx]->opcode == aco_opcode::p_linear_phi ||
block->instructions[idx]->opcode == aco_opcode::p_phi) {
aco_ptr<Instruction>& phi = block->instructions[idx];
for (aco_ptr<Instruction>& phi : block->instructions) {
if (!is_phi(phi))
break;
std::vector<unsigned>& preds = phi->opcode == aco_opcode::p_phi ? block->logical_preds : block->linear_preds;
bool spill = true;
@ -597,45 +626,11 @@ RegisterDemand init_live_in_vars(spill_ctx& ctx, Block* block, unsigned block_id
partial_spills.erase(phi->definitions[0].getTemp());
spilled_registers += phi->definitions[0].getTemp();
}
idx++;
}
/* if reg pressure at first instruction is still too high, add partially spilled variables */
RegisterDemand reg_pressure;
if (idx == 0) {
for (const Definition& def : block->instructions[idx]->definitions) {
if (def.isTemp()) {
reg_pressure -= def.getTemp();
}
}
for (const Operand& op : block->instructions[idx]->operands) {
if (op.isTemp() && op.isFirstKill()) {
reg_pressure += op.getTemp();
}
}
} else {
for (unsigned i = 0; i < idx; i++) {
aco_ptr<Instruction>& instr = block->instructions[i];
assert(is_phi(instr));
/* Killed phi definitions increase pressure in the predecessor but not
* the block they're in. Since the loops below are both to control
* pressure of the start of this block and the ends of it's
* predecessors, we need to count killed unspilled phi definitions here. */
if (instr->definitions[0].isKill() &&
!ctx.spills_entry[block_idx].count(instr->definitions[0].getTemp()))
reg_pressure += instr->definitions[0].getTemp();
}
idx--;
}
reg_pressure += ctx.register_demand[block_idx][idx] - spilled_registers;
/* Consider register pressure from linear predecessors. This can affect
* reg_pressure if the branch instructions define sgprs. */
for (unsigned pred : block->linear_preds) {
reg_pressure.sgpr = std::max<int16_t>(
reg_pressure.sgpr, ctx.register_demand[pred].back().sgpr - spilled_registers.sgpr);
}
RegisterDemand reg_pressure = get_live_in_demand(ctx, block_idx);
reg_pressure -= spilled_registers;
while (reg_pressure.exceeds(ctx.target_pressure)) {
assert(!partial_spills.empty());
@ -664,19 +659,6 @@ RegisterDemand init_live_in_vars(spill_ctx& ctx, Block* block, unsigned block_id
return spilled_registers;
}
RegisterDemand get_demand_before(spill_ctx& ctx, unsigned block_idx, unsigned idx)
{
if (idx == 0) {
RegisterDemand demand = ctx.register_demand[block_idx][idx];
aco_ptr<Instruction>& instr = ctx.program->blocks[block_idx].instructions[idx];
aco_ptr<Instruction> instr_before(nullptr);
return get_demand_before(demand, instr, instr_before);
} else {
return ctx.register_demand[block_idx][idx - 1];
}
}
void add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx)
{
/* no coupling code necessary */
@ -769,8 +751,7 @@ void add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx)
/* iterate the phi nodes for which operands to spill at the predecessor */
for (aco_ptr<Instruction>& phi : block->instructions) {
if (phi->opcode != aco_opcode::p_phi &&
phi->opcode != aco_opcode::p_linear_phi)
if (!is_phi(phi))
break;
/* if the phi is not spilled, add to instructions */