aco: Eliminate useless exec writes in jump threading.
Eliminate exec writes which are unused by subsequent instructions. Fossil DB results on Sienna Cichlid: Totals from 80960 (54.03% of 149839) affected shaders: CodeSize: 162953748 -> 161749372 (-0.74%) Instrs: 31462273 -> 31161179 (-0.96%) Copies: 2171239 -> 1942293 (-10.54%) Branches: 807771 -> 807747 (-0.00%) Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Tony Wasserka <tony.wasserka@gmx.de> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10691>
This commit is contained in:
parent
e230dcc30b
commit
bc13049747
|
@ -40,12 +40,14 @@ struct ssa_elimination_ctx {
|
||||||
std::vector<std::vector<phi_info_item>> logical_phi_info;
|
std::vector<std::vector<phi_info_item>> logical_phi_info;
|
||||||
std::vector<std::vector<phi_info_item>> linear_phi_info;
|
std::vector<std::vector<phi_info_item>> linear_phi_info;
|
||||||
std::vector<bool> empty_blocks;
|
std::vector<bool> empty_blocks;
|
||||||
|
std::vector<bool> blocks_incoming_exec_used;
|
||||||
Program* program;
|
Program* program;
|
||||||
|
|
||||||
ssa_elimination_ctx(Program* program_)
|
ssa_elimination_ctx(Program* program_)
|
||||||
: logical_phi_info(program_->blocks.size())
|
: logical_phi_info(program_->blocks.size())
|
||||||
, linear_phi_info(program_->blocks.size())
|
, linear_phi_info(program_->blocks.size())
|
||||||
, empty_blocks(program_->blocks.size(), true)
|
, empty_blocks(program_->blocks.size(), true)
|
||||||
|
, blocks_incoming_exec_used(program_->blocks.size(), true)
|
||||||
, program(program_) {}
|
, program(program_) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -274,10 +276,84 @@ void try_remove_simple_block(ssa_elimination_ctx& ctx, Block* block)
|
||||||
block->linear_succs.clear();
|
block->linear_succs.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool instr_writes_exec(Instruction* instr)
|
||||||
|
{
|
||||||
|
for (Definition& def : instr->definitions)
|
||||||
|
if (def.physReg() == exec || def.physReg() == exec_hi)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void eliminate_useless_exec_writes_in_block(ssa_elimination_ctx& ctx, Block& block)
|
||||||
|
{
|
||||||
|
/* Check if any successor needs the outgoing exec mask from the current block. */
|
||||||
|
|
||||||
|
bool exec_write_used;
|
||||||
|
|
||||||
|
if (!ctx.logical_phi_info[block.index].empty()) {
|
||||||
|
exec_write_used = true;
|
||||||
|
} else {
|
||||||
|
bool will_insert_exec_copy = false;
|
||||||
|
bool will_inserted_exec_copy_need_exec = false;
|
||||||
|
|
||||||
|
for (const auto& successor_phi_info : ctx.linear_phi_info[block.index]) {
|
||||||
|
if (successor_phi_info.def.physReg() == exec)
|
||||||
|
will_insert_exec_copy = true;
|
||||||
|
if (successor_phi_info.op.physReg() == exec)
|
||||||
|
will_inserted_exec_copy_need_exec = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (will_insert_exec_copy && !will_inserted_exec_copy_need_exec)
|
||||||
|
exec_write_used = false;
|
||||||
|
else
|
||||||
|
/* blocks_incoming_exec_used is initialized to true, so this is correct even for loops. */
|
||||||
|
exec_write_used = std::any_of(block.linear_succs.begin(), block.linear_succs.end(),
|
||||||
|
[&ctx](int succ_idx) { return ctx.blocks_incoming_exec_used[succ_idx]; });
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Go through all instructions and eliminate useless exec writes. */
|
||||||
|
|
||||||
|
for (int i = block.instructions.size() - 1; i >= 0; --i) {
|
||||||
|
aco_ptr<Instruction>& instr = block.instructions[i];
|
||||||
|
|
||||||
|
/* We already take information from phis into account before the loop, so let's just break on phis. */
|
||||||
|
if (instr->opcode == aco_opcode::p_linear_phi || instr->opcode == aco_opcode::p_phi)
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* See if the current instruction needs or writes exec. */
|
||||||
|
bool needs_exec = needs_exec_mask(instr.get());
|
||||||
|
bool writes_exec = instr_writes_exec(instr.get());
|
||||||
|
|
||||||
|
/* See if we found an unused exec write. */
|
||||||
|
if (writes_exec && !exec_write_used) {
|
||||||
|
instr.reset();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* For a newly encountered exec write, clear the used flag. */
|
||||||
|
if (writes_exec)
|
||||||
|
exec_write_used = false;
|
||||||
|
|
||||||
|
/* If the current instruction needs exec, mark it as used. */
|
||||||
|
exec_write_used |= needs_exec;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Remember if the current block needs an incoming exec mask from its predecessors. */
|
||||||
|
|
||||||
|
ctx.blocks_incoming_exec_used[block.index] = exec_write_used;
|
||||||
|
|
||||||
|
/* Cleanup: remove deleted instructions from the vector. */
|
||||||
|
|
||||||
|
auto new_end = std::remove(block.instructions.begin(), block.instructions.end(), nullptr);
|
||||||
|
block.instructions.resize(new_end - block.instructions.begin());
|
||||||
|
}
|
||||||
|
|
||||||
void jump_threading(ssa_elimination_ctx& ctx)
|
void jump_threading(ssa_elimination_ctx& ctx)
|
||||||
{
|
{
|
||||||
for (int i = ctx.program->blocks.size() - 1; i >= 0; i--) {
|
for (int i = ctx.program->blocks.size() - 1; i >= 0; i--) {
|
||||||
Block* block = &ctx.program->blocks[i];
|
Block* block = &ctx.program->blocks[i];
|
||||||
|
eliminate_useless_exec_writes_in_block(ctx, *block);
|
||||||
|
|
||||||
if (!ctx.empty_blocks[i])
|
if (!ctx.empty_blocks[i])
|
||||||
continue;
|
continue;
|
||||||
|
|
Loading…
Reference in New Issue