aco: optimize boolean phis with uniform selections
Even though the boolean can be divergent, the control flow can be (at least partially) uniform. For example, we don't have to create any s_andn2_b64/s_and_b64/s_or_b64 instructions with this code: a = ... loop { b = bool_phi a, c if (uniform) break c = ... } d = phi c fossil-db (Navi): Totals from 5506 (4.05% of 135946) affected shaders: SGPRs: 605720 -> 604024 (-0.28%) SpillSGPRs: 52025 -> 51733 (-0.56%) CodeSize: 65221188 -> 64957808 (-0.40%); split: -0.41%, +0.00% Instrs: 12637881 -> 12584610 (-0.42%); split: -0.42%, +0.00% Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3388>
This commit is contained in:
parent
f622e80494
commit
9a089baff1
|
@ -35,6 +35,9 @@
|
|||
namespace aco {
|
||||
|
||||
struct ssa_state {
|
||||
bool checked_preds_for_uniform;
|
||||
bool all_preds_uniform;
|
||||
|
||||
bool needs_init;
|
||||
uint64_t cur_undef_operands;
|
||||
|
||||
|
@ -152,6 +155,19 @@ void lower_divergent_bool_phi(Program *program, ssa_state *state, Block *block,
|
|||
{
|
||||
Builder bld(program);
|
||||
|
||||
if (!state->checked_preds_for_uniform) {
|
||||
state->all_preds_uniform = !(block->kind & block_kind_merge);
|
||||
for (unsigned pred : block->logical_preds)
|
||||
state->all_preds_uniform = state->all_preds_uniform && (program->blocks[pred].kind & block_kind_uniform);
|
||||
state->checked_preds_for_uniform = true;
|
||||
}
|
||||
|
||||
if (state->all_preds_uniform) {
|
||||
assert(block->logical_preds.size() == block->linear_preds.size());
|
||||
phi->opcode = aco_opcode::p_linear_phi;
|
||||
return;
|
||||
}
|
||||
|
||||
state->latest.resize(program->blocks.size());
|
||||
|
||||
uint64_t undef_operands = 0;
|
||||
|
@ -180,14 +196,23 @@ void lower_divergent_bool_phi(Program *program, ssa_state *state, Block *block,
|
|||
state->writes[block->logical_preds[i]] = program->allocateId();
|
||||
}
|
||||
|
||||
bool uniform_merge = block->kind & block_kind_loop_header;
|
||||
|
||||
for (unsigned i = 0; i < phi->operands.size(); i++) {
|
||||
Block *pred = &program->blocks[block->logical_preds[i]];
|
||||
|
||||
bool need_get_ssa = !uniform_merge;
|
||||
if (block->kind & block_kind_loop_header && !(pred->kind & block_kind_uniform))
|
||||
uniform_merge = false;
|
||||
|
||||
if (phi->operands[i].isUndefined())
|
||||
continue;
|
||||
|
||||
Operand cur = get_ssa(program, pred->index, state, true);
|
||||
Operand cur(bld.lm);
|
||||
if (need_get_ssa)
|
||||
cur = get_ssa(program, pred->index, state, true);
|
||||
assert(cur.regClass() == bld.lm);
|
||||
|
||||
Temp new_cur = {state->writes.at(pred->index), program->lane_mask};
|
||||
assert(new_cur.regClass() == bld.lm);
|
||||
|
||||
|
@ -241,6 +266,7 @@ void lower_phis(Program* program)
|
|||
ssa_state state;
|
||||
|
||||
for (Block& block : program->blocks) {
|
||||
state.checked_preds_for_uniform = false;
|
||||
state.needs_init = true;
|
||||
for (aco_ptr<Instruction>& phi : block.instructions) {
|
||||
if (phi->opcode == aco_opcode::p_phi) {
|
||||
|
|
Loading…
Reference in New Issue