aco: ignore parallelcopies to the same register on jump threading
The more conservative lowering to CSSA inserts unnecessary parallelcopies which might get coalesced and can be ignored on jump threading. v2: outline is_empty_block() check. Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3385> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3385>
This commit is contained in:
parent
427e5eeb02
commit
3bca0af25d
|
@ -111,6 +111,35 @@ void insert_parallelcopies(ssa_elimination_ctx& ctx)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool is_empty_block(Block* block, bool ignore_exec_writes)
|
||||||
|
{
|
||||||
|
/* check if this block is empty and the exec mask is not needed */
|
||||||
|
for (aco_ptr<Instruction>& instr : block->instructions) {
|
||||||
|
switch (instr->opcode) {
|
||||||
|
case aco_opcode::p_linear_phi:
|
||||||
|
case aco_opcode::p_phi:
|
||||||
|
case aco_opcode::p_logical_start:
|
||||||
|
case aco_opcode::p_logical_end:
|
||||||
|
case aco_opcode::p_branch:
|
||||||
|
break;
|
||||||
|
case aco_opcode::p_parallelcopy:
|
||||||
|
for (unsigned i = 0; i < instr->definitions.size(); i++) {
|
||||||
|
if (ignore_exec_writes && instr->definitions[i].physReg() == exec)
|
||||||
|
continue;
|
||||||
|
if (instr->definitions[i].physReg() != instr->operands[i].physReg())
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case aco_opcode::s_andn2_b64:
|
||||||
|
case aco_opcode::s_andn2_b32:
|
||||||
|
if (ignore_exec_writes && instr->definitions[0].physReg() == exec)
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
void try_remove_merge_block(ssa_elimination_ctx& ctx, Block* block)
|
void try_remove_merge_block(ssa_elimination_ctx& ctx, Block* block)
|
||||||
{
|
{
|
||||||
|
@ -120,22 +149,9 @@ void try_remove_merge_block(ssa_elimination_ctx& ctx, Block* block)
|
||||||
!(ctx.program->blocks[block->linear_succs[0]].kind & block_kind_merge))
|
!(ctx.program->blocks[block->linear_succs[0]].kind & block_kind_merge))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* check if this block is empty and the exec mask is not needed */
|
/* check if this block is empty */
|
||||||
for (aco_ptr<Instruction>& instr : block->instructions) {
|
if (!is_empty_block(block, true))
|
||||||
if (instr->opcode == aco_opcode::p_parallelcopy) {
|
|
||||||
if (instr->definitions[0].physReg() == exec)
|
|
||||||
continue;
|
|
||||||
else
|
|
||||||
return;
|
return;
|
||||||
}
|
|
||||||
|
|
||||||
if (instr->opcode != aco_opcode::p_linear_phi &&
|
|
||||||
instr->opcode != aco_opcode::p_phi &&
|
|
||||||
instr->opcode != aco_opcode::p_logical_start &&
|
|
||||||
instr->opcode != aco_opcode::p_logical_end &&
|
|
||||||
instr->opcode != aco_opcode::p_branch)
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* keep the branch instruction and remove the rest */
|
/* keep the branch instruction and remove the rest */
|
||||||
aco_ptr<Instruction> branch = std::move(block->instructions.back());
|
aco_ptr<Instruction> branch = std::move(block->instructions.back());
|
||||||
|
@ -146,18 +162,13 @@ void try_remove_merge_block(ssa_elimination_ctx& ctx, Block* block)
|
||||||
void try_remove_invert_block(ssa_elimination_ctx& ctx, Block* block)
|
void try_remove_invert_block(ssa_elimination_ctx& ctx, Block* block)
|
||||||
{
|
{
|
||||||
assert(block->linear_succs.size() == 2);
|
assert(block->linear_succs.size() == 2);
|
||||||
|
/* only remove this block if the successor got removed as well */
|
||||||
if (block->linear_succs[0] != block->linear_succs[1])
|
if (block->linear_succs[0] != block->linear_succs[1])
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* check if we can remove this block */
|
/* check if block is otherwise empty */
|
||||||
for (aco_ptr<Instruction>& instr : block->instructions) {
|
if (!is_empty_block(block, true))
|
||||||
if (instr->opcode != aco_opcode::p_linear_phi &&
|
|
||||||
instr->opcode != aco_opcode::p_phi &&
|
|
||||||
(instr->opcode != aco_opcode::s_andn2_b64 || ctx.program->wave_size != 64) &&
|
|
||||||
(instr->opcode != aco_opcode::s_andn2_b32 || ctx.program->wave_size != 32) &&
|
|
||||||
instr->opcode != aco_opcode::p_branch)
|
|
||||||
return;
|
return;
|
||||||
}
|
|
||||||
|
|
||||||
unsigned succ_idx = block->linear_succs[0];
|
unsigned succ_idx = block->linear_succs[0];
|
||||||
assert(block->linear_preds.size() == 2);
|
assert(block->linear_preds.size() == 2);
|
||||||
|
@ -179,12 +190,8 @@ void try_remove_invert_block(ssa_elimination_ctx& ctx, Block* block)
|
||||||
|
|
||||||
void try_remove_simple_block(ssa_elimination_ctx& ctx, Block* block)
|
void try_remove_simple_block(ssa_elimination_ctx& ctx, Block* block)
|
||||||
{
|
{
|
||||||
for (aco_ptr<Instruction>& instr : block->instructions) {
|
if (!is_empty_block(block, false))
|
||||||
if (instr->opcode != aco_opcode::p_logical_start &&
|
|
||||||
instr->opcode != aco_opcode::p_logical_end &&
|
|
||||||
instr->opcode != aco_opcode::p_branch)
|
|
||||||
return;
|
return;
|
||||||
}
|
|
||||||
|
|
||||||
Block& pred = ctx.program->blocks[block->linear_preds[0]];
|
Block& pred = ctx.program->blocks[block->linear_preds[0]];
|
||||||
Block& succ = ctx.program->blocks[block->linear_succs[0]];
|
Block& succ = ctx.program->blocks[block->linear_succs[0]];
|
||||||
|
|
Loading…
Reference in New Issue