nir/opt_if: split ALU from Phi more aggressively

If the only user is a trivial bcsel which in a second step
can be turned into a phi, this conversion is also worth it
even if the previous result is not undefined or constant.
Allows for some more loop unrolling or saves a few instructions.

Totals from 62 (0.04% of 139391) affected shaders (NAVI10):
SGPRs: 4976 -> 4992 (+0.32%)
VGPRs: 4408 -> 4472 (+1.45%); split: -0.45%, +1.91%
CodeSize: 453632 -> 464000 (+2.29%); split: -0.32%, +2.60%
MaxWaves: 527 -> 511 (-3.04%); split: +0.38%, -3.42%
Instrs: 84940 -> 86681 (+2.05%); split: -0.36%, +2.41%
Cycles: 11946844 -> 11783708 (-1.37%); split: -1.40%, +0.04%
VMEM: 9403 -> 10357 (+10.15%); split: +11.59%, -1.45%
SMEM: 3003 -> 3025 (+0.73%); split: +1.07%, -0.33%
VClause: 1756 -> 1997 (+13.72%); split: -0.11%, +13.84%
SClause: 2914 -> 2915 (+0.03%); split: -0.10%, +0.14%
Copies: 6426 -> 6768 (+5.32%); split: -4.14%, +9.46%
Branches: 2105 -> 2102 (-0.14%); split: -1.66%, +1.52%
PreSGPRs: 2921 -> 2909 (-0.41%); split: -0.55%, +0.14%
PreVGPRs: 4151 -> 4179 (+0.67%); split: -0.24%, +0.92%

cc: mesa-stable

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8123>
This commit is contained in:
Daniel Schürmann 2020-12-16 10:33:59 +01:00 committed by Marge Bot
parent 7192362531
commit 8513b12590
1 changed files with 100 additions and 76 deletions

View File

@ -295,6 +295,39 @@ alu_instr_is_type_conversion(const nir_alu_instr *alu)
nir_op_infos[alu->op].output_type != nir_op_infos[alu->op].input_types[0];
}
static bool
is_trivial_bcsel(const nir_instr *instr, bool allow_non_phi_src)
{
if (instr->type != nir_instr_type_alu)
return false;
nir_alu_instr *const bcsel = nir_instr_as_alu(instr);
if (bcsel->op != nir_op_bcsel &&
bcsel->op != nir_op_b32csel &&
bcsel->op != nir_op_fcsel)
return false;
for (unsigned i = 0; i < 3; i++) {
if (!nir_alu_src_is_trivial_ssa(bcsel, i) ||
bcsel->src[i].src.ssa->parent_instr->block != instr->block)
return false;
if (bcsel->src[i].src.ssa->parent_instr->type != nir_instr_type_phi) {
/* opt_split_alu_of_phi() is able to peel that src from the loop */
if (i == 0 || !allow_non_phi_src)
return false;
allow_non_phi_src = false;
}
}
nir_foreach_phi_src(src, nir_instr_as_phi(bcsel->src[0].src.ssa->parent_instr)) {
if (!nir_src_is_const(src->src))
return false;
}
return true;
}
/**
* Splits ALU instructions that have a source that is a phi node
*
@ -306,12 +339,13 @@ alu_instr_is_type_conversion(const nir_alu_instr *alu)
*
* - At least one source of the instruction is a phi node from the header block.
*
* - The phi node selects a constant or undef from the block before the loop.
*
* - Any non-phi sources of the ALU instruction come from a block that
* dominates the block before the loop. The most common failure mode for
* this check is sources that are generated in the loop header block.
*
* - The phi node selects a constant or undef from the block before the loop or
* the only ALU user is a trivial bcsel that gets removed by peeling the ALU
*
* The split process splits the original ALU instruction into two, one at the
* bottom of the loop and one at the block before the loop. The instruction
* before the loop computes the value on the first iteration, and the
@ -450,60 +484,72 @@ opt_split_alu_of_phi(nir_builder *b, nir_loop *loop)
}
}
if (has_phi_src_from_prev_block && all_non_phi_exist_in_prev_block &&
(is_prev_result_undef || is_prev_result_const)) {
nir_block *const continue_block = find_continue_block(loop);
if (!has_phi_src_from_prev_block || !all_non_phi_exist_in_prev_block)
continue;
b->cursor = nir_after_block(prev_block);
nir_ssa_def *prev_value = clone_alu_and_replace_src_defs(b, alu, prev_srcs);
if (!is_prev_result_undef && !is_prev_result_const) {
/* check if the only user is a trivial bcsel */
if (!list_is_empty(&alu->dest.dest.ssa.if_uses) ||
!list_is_singular(&alu->dest.dest.ssa.uses))
continue;
/* Make a copy of the original ALU instruction. Replace the sources
* of the new instruction that read a phi with an undef source from
* prev_block with the non-undef source of that phi.
*
* Insert the new instruction at the end of the continue block.
*/
b->cursor = nir_after_block_before_jump(continue_block);
nir_ssa_def *const alu_copy =
clone_alu_and_replace_src_defs(b, alu, continue_srcs);
/* Make a new phi node that selects a value from prev_block and the
* result of the new instruction from continue_block.
*/
nir_phi_instr *const phi = nir_phi_instr_create(b->shader);
nir_phi_src *phi_src;
phi_src = ralloc(phi, nir_phi_src);
phi_src->pred = prev_block;
phi_src->src = nir_src_for_ssa(prev_value);
exec_list_push_tail(&phi->srcs, &phi_src->node);
phi_src = ralloc(phi, nir_phi_src);
phi_src->pred = continue_block;
phi_src->src = nir_src_for_ssa(alu_copy);
exec_list_push_tail(&phi->srcs, &phi_src->node);
nir_ssa_dest_init(&phi->instr, &phi->dest,
alu_copy->num_components, alu_copy->bit_size, NULL);
b->cursor = nir_after_phis(header_block);
nir_builder_instr_insert(b, &phi->instr);
/* Modify all readers of the original ALU instruction to read the
* result of the phi.
*/
nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa,
nir_src_for_ssa(&phi->dest.ssa));
/* Since the original ALU instruction no longer has any readers, just
* remove it.
*/
nir_instr_remove_v(&alu->instr);
ralloc_free(alu);
progress = true;
nir_src *use = list_first_entry(&alu->dest.dest.ssa.uses, nir_src, use_link);
if (!is_trivial_bcsel(use->parent_instr, true))
continue;
}
/* Split ALU of Phi */
nir_block *const continue_block = find_continue_block(loop);
b->cursor = nir_after_block(prev_block);
nir_ssa_def *prev_value = clone_alu_and_replace_src_defs(b, alu, prev_srcs);
/* Make a copy of the original ALU instruction. Replace the sources
* of the new instruction that read a phi with an undef source from
* prev_block with the non-undef source of that phi.
*
* Insert the new instruction at the end of the continue block.
*/
b->cursor = nir_after_block_before_jump(continue_block);
nir_ssa_def *const alu_copy =
clone_alu_and_replace_src_defs(b, alu, continue_srcs);
/* Make a new phi node that selects a value from prev_block and the
* result of the new instruction from continue_block.
*/
nir_phi_instr *const phi = nir_phi_instr_create(b->shader);
nir_phi_src *phi_src;
phi_src = ralloc(phi, nir_phi_src);
phi_src->pred = prev_block;
phi_src->src = nir_src_for_ssa(prev_value);
exec_list_push_tail(&phi->srcs, &phi_src->node);
phi_src = ralloc(phi, nir_phi_src);
phi_src->pred = continue_block;
phi_src->src = nir_src_for_ssa(alu_copy);
exec_list_push_tail(&phi->srcs, &phi_src->node);
nir_ssa_dest_init(&phi->instr, &phi->dest,
alu_copy->num_components, alu_copy->bit_size, NULL);
b->cursor = nir_after_phis(header_block);
nir_builder_instr_insert(b, &phi->instr);
/* Modify all readers of the original ALU instruction to read the
* result of the phi.
*/
nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa,
nir_src_for_ssa(&phi->dest.ssa));
/* Since the original ALU instruction no longer has any readers, just
* remove it.
*/
nir_instr_remove_v(&alu->instr);
ralloc_free(alu);
progress = true;
}
return progress;
@ -609,32 +655,10 @@ opt_simplify_bcsel_of_phi(nir_builder *b, nir_loop *loop)
* https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/170#note_110305
*/
nir_foreach_instr_safe(instr, header_block) {
if (instr->type != nir_instr_type_alu)
if (!is_trivial_bcsel(instr, false))
continue;
nir_alu_instr *const bcsel = nir_instr_as_alu(instr);
if (bcsel->op != nir_op_bcsel &&
bcsel->op != nir_op_b32csel &&
bcsel->op != nir_op_fcsel)
continue;
bool match = true;
for (unsigned i = 0; i < 3; i++) {
/* FINISHME: The abs, negate and swizzled cases could be handled by
* adding move instructions at the bottom of the continue block and
* more phi nodes in the header_block.
*/
if (!nir_alu_src_is_trivial_ssa(bcsel, i) ||
bcsel->src[i].src.ssa->parent_instr->type != nir_instr_type_phi ||
bcsel->src[i].src.ssa->parent_instr->block != header_block) {
match = false;
break;
}
}
if (!match)
continue;
nir_phi_instr *const cond_phi =
nir_instr_as_phi(bcsel->src[0].src.ssa->parent_instr);