aco: optimize discard_if when WQM is not needed afterwards

Totals from 11560 (8.57% of 134913) affected shaders: (GFX10.3)
CodeSize: 12092560 -> 11997652 (-0.78%)
Instrs: 2205325 -> 2181598 (-1.08%)
Latency: 15376048 -> 15356958 (-0.12%); split: -0.12%, +0.00%
InvThroughput: 3526105 -> 3525120 (-0.03%); split: -0.03%, +0.00%
Copies: 98543 -> 87601 (-11.10%)
Branches: 16919 -> 16873 (-0.27%)
PreSGPRs: 291584 -> 291532 (-0.02%)

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14805>
This commit is contained in:
Daniel Schürmann 2022-02-04 17:13:19 +01:00 committed by Marge Bot
parent 13c3137960
commit 5e9df85b1a
1 changed files with 24 additions and 14 deletions

View File

@ -708,11 +708,29 @@ process_instructions(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instructio
WQMState needs = ctx.handle_wqm ? ctx.info[block->index].instr_needs[idx] : Unspecified;
if (needs == WQM && state != WQM) {
transition_to_WQM(ctx, bld, block->index);
state = WQM;
} else if (needs == Exact && state != Exact) {
transition_to_Exact(ctx, bld, block->index);
state = Exact;
}
if (instr->opcode == aco_opcode::p_discard_if) {
if (ctx.info[block->index].block_needs & Preserve_WQM) {
assert(block->kind & block_kind_top_level);
transition_to_WQM(ctx, bld, block->index);
ctx.info[block->index].exec.back().second &= ~mask_type_global;
Operand current_exec = Operand(exec, bld.lm);
if (block->kind & block_kind_top_level) {
if (needs == Preserve_WQM) {
/* Preserve the WQM mask */
transition_to_WQM(ctx, bld, block->index);
ctx.info[block->index].exec.back().second &= ~mask_type_global;
} else if (ctx.info[block->index].exec.size() == 2) {
assert(state == WQM);
/* Transition to Exact without extra instruction */
ctx.info[block->index].exec.pop_back();
current_exec = get_exec_op(ctx.info[block->index].exec.back().first);
ctx.info[block->index].exec[0].first = Operand(bld.lm);
}
}
Temp cond, exit_cond;
@ -727,7 +745,7 @@ process_instructions(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instructio
cond = instr->operands[0].getTemp();
/* discard from current exec */
exit_cond = bld.sop2(Builder::s_andn2, Definition(exec, bld.lm), bld.def(s1, scc),
Operand(exec, bld.lm), cond)
current_exec, cond)
.def(1)
.getTemp();
}
@ -745,15 +763,7 @@ process_instructions(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instructio
instr->operands[0] = bld.scc(exit_cond);
assert(!ctx.handle_wqm || (ctx.info[block->index].exec[0].second & mask_type_wqm) == 0);
} else if (needs == WQM && state != WQM) {
transition_to_WQM(ctx, bld, block->index);
state = WQM;
} else if (needs == Exact && state != Exact) {
transition_to_Exact(ctx, bld, block->index);
state = Exact;
}
if (instr->opcode == aco_opcode::p_is_helper) {
} else if (instr->opcode == aco_opcode::p_is_helper) {
Definition dst = instr->definitions[0];
assert(dst.size() == bld.lm.size());
if (state == Exact) {