nir: don't sink instructions into loops

Repeatedly loading constants or evaluating ALU operations
in loops doesn't seem beneficial. This might increase the register
pressure, but the tradeoff seems worth it.

Totals from 13629 (9.77% of 139517) affected shaders (RAVEN):
SGPRs: 1179481 -> 1184697 (+0.44%); split: -0.03%, +0.47%
VGPRs: 978776 -> 978732 (-0.00%); split: -0.02%, +0.02%
SpillSGPRs: 51036 -> 50943 (-0.18%); split: -1.35%, +1.17%
CodeSize: 113775020 -> 113428812 (-0.30%); split: -0.34%, +0.04%
MaxWaves: 49877 -> 49881 (+0.01%); split: +0.02%, -0.01%
Instrs: 22295979 -> 22204936 (-0.41%); split: -0.42%, +0.02%
Cycles: 1637198832 -> 1626916048 (-0.63%); split: -0.64%, +0.01%
VMEM: 2403434 -> 2507645 (+4.34%); split: +4.76%, -0.42%
SMEM: 849676 -> 834576 (-1.78%); split: +0.60%, -2.38%
VClause: 412396 -> 398139 (-3.46%); split: -3.46%, +0.01%
SClause: 810480 -> 817349 (+0.85%); split: -0.19%, +1.04%
Copies: 2188260 -> 2166716 (-0.98%); split: -1.18%, +0.19%
Branches: 761204 -> 760475 (-0.10%); split: -0.15%, +0.05%
PreSGPRs: 972892 -> 981054 (+0.84%); split: -0.05%, +0.89%
PreVGPRs: 925390 -> 925420 (+0.00%); split: -0.02%, +0.02%

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7694>
This commit is contained in:
Daniel Schürmann 2020-11-19 12:21:17 +01:00
parent b02e15d1a3
commit 5f6c5e5b86
1 changed files with 8 additions and 20 deletions

View File

@ -131,7 +131,7 @@ adjust_block_for_loops(nir_block *use_block, nir_block *def_block,
* the uses
*/
static nir_block *
get_preferred_block(nir_ssa_def *def, bool sink_into_loops, bool sink_out_of_loops)
get_preferred_block(nir_ssa_def *def, bool sink_out_of_loops)
{
nir_block *lca = NULL;
@ -166,24 +166,13 @@ get_preferred_block(nir_ssa_def *def, bool sink_into_loops, bool sink_out_of_loo
lca = nir_dominance_lca(lca, use_block);
}
/* If we're moving a load_ubo or load_interpolated_input, we don't want to
* sink it down into loops, which may result in accessing memory or shared
* functions multiple times. Sink it just above the start of the loop
* where it's used. For load_consts, undefs, and comparisons, we expect
* the driver to be able to emit them as simple ALU ops, so sinking as far
* in as we can go is probably worth it for register pressure.
/* We don't sink any instructions into loops to avoid repeated executions
* This might occasionally increase register pressure, but seems overall
* the better choice.
*/
if (!sink_into_loops) {
lca = adjust_block_for_loops(lca, def->parent_instr->block,
sink_out_of_loops);
assert(nir_block_dominates(def->parent_instr->block, lca));
} else {
/* sink_into_loops = true and sink_out_of_loops = false isn't
* implemented yet because it's not used.
*/
assert(sink_out_of_loops);
}
lca = adjust_block_for_loops(lca, def->parent_instr->block,
sink_out_of_loops);
assert(nir_block_dominates(def->parent_instr->block, lca));
return lca;
}
@ -227,7 +216,6 @@ nir_opt_sink(nir_shader *shader, nir_move_options options)
nir_ssa_def *def = nir_instr_ssa_def(instr);
bool sink_into_loops = instr->type != nir_instr_type_intrinsic;
/* Don't sink load_ubo out of loops because that can make its
* resource divergent and break code like that which is generated
* by nir_lower_non_uniform_access.
@ -236,7 +224,7 @@ nir_opt_sink(nir_shader *shader, nir_move_options options)
instr->type != nir_instr_type_intrinsic ||
nir_instr_as_intrinsic(instr)->intrinsic != nir_intrinsic_load_ubo;
nir_block *use_block =
get_preferred_block(def, sink_into_loops, sink_out_of_loops);
get_preferred_block(def, sink_out_of_loops);
if (!use_block || use_block == instr->block)
continue;