nir: don't sink instructions into loops
Repeatedly loading constants or evaluating ALU operations in loops doesn't seem beneficial. This might increase the register pressure, but the tradeoff seems worth it. Totals from 13629 (9.77% of 139517) affected shaders (RAVEN): SGPRs: 1179481 -> 1184697 (+0.44%); split: -0.03%, +0.47% VGPRs: 978776 -> 978732 (-0.00%); split: -0.02%, +0.02% SpillSGPRs: 51036 -> 50943 (-0.18%); split: -1.35%, +1.17% CodeSize: 113775020 -> 113428812 (-0.30%); split: -0.34%, +0.04% MaxWaves: 49877 -> 49881 (+0.01%); split: +0.02%, -0.01% Instrs: 22295979 -> 22204936 (-0.41%); split: -0.42%, +0.02% Cycles: 1637198832 -> 1626916048 (-0.63%); split: -0.64%, +0.01% VMEM: 2403434 -> 2507645 (+4.34%); split: +4.76%, -0.42% SMEM: 849676 -> 834576 (-1.78%); split: +0.60%, -2.38% VClause: 412396 -> 398139 (-3.46%); split: -3.46%, +0.01% SClause: 810480 -> 817349 (+0.85%); split: -0.19%, +1.04% Copies: 2188260 -> 2166716 (-0.98%); split: -1.18%, +0.19% Branches: 761204 -> 760475 (-0.10%); split: -0.15%, +0.05% PreSGPRs: 972892 -> 981054 (+0.84%); split: -0.05%, +0.89% PreVGPRs: 925390 -> 925420 (+0.00%); split: -0.02%, +0.02% Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7694>
This commit is contained in:
parent
b02e15d1a3
commit
5f6c5e5b86
|
@ -131,7 +131,7 @@ adjust_block_for_loops(nir_block *use_block, nir_block *def_block,
|
||||||
* the uses
|
* the uses
|
||||||
*/
|
*/
|
||||||
static nir_block *
|
static nir_block *
|
||||||
get_preferred_block(nir_ssa_def *def, bool sink_into_loops, bool sink_out_of_loops)
|
get_preferred_block(nir_ssa_def *def, bool sink_out_of_loops)
|
||||||
{
|
{
|
||||||
nir_block *lca = NULL;
|
nir_block *lca = NULL;
|
||||||
|
|
||||||
|
@ -166,24 +166,13 @@ get_preferred_block(nir_ssa_def *def, bool sink_into_loops, bool sink_out_of_loo
|
||||||
lca = nir_dominance_lca(lca, use_block);
|
lca = nir_dominance_lca(lca, use_block);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* If we're moving a load_ubo or load_interpolated_input, we don't want to
|
/* We don't sink any instructions into loops to avoid repeated executions
|
||||||
* sink it down into loops, which may result in accessing memory or shared
|
* This might occasionally increase register pressure, but seems overall
|
||||||
* functions multiple times. Sink it just above the start of the loop
|
* the better choice.
|
||||||
* where it's used. For load_consts, undefs, and comparisons, we expect
|
|
||||||
* the driver to be able to emit them as simple ALU ops, so sinking as far
|
|
||||||
* in as we can go is probably worth it for register pressure.
|
|
||||||
*/
|
*/
|
||||||
if (!sink_into_loops) {
|
lca = adjust_block_for_loops(lca, def->parent_instr->block,
|
||||||
lca = adjust_block_for_loops(lca, def->parent_instr->block,
|
sink_out_of_loops);
|
||||||
sink_out_of_loops);
|
assert(nir_block_dominates(def->parent_instr->block, lca));
|
||||||
assert(nir_block_dominates(def->parent_instr->block, lca));
|
|
||||||
} else {
|
|
||||||
/* sink_into_loops = true and sink_out_of_loops = false isn't
|
|
||||||
* implemented yet because it's not used.
|
|
||||||
*/
|
|
||||||
assert(sink_out_of_loops);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
return lca;
|
return lca;
|
||||||
}
|
}
|
||||||
|
@ -227,7 +216,6 @@ nir_opt_sink(nir_shader *shader, nir_move_options options)
|
||||||
|
|
||||||
nir_ssa_def *def = nir_instr_ssa_def(instr);
|
nir_ssa_def *def = nir_instr_ssa_def(instr);
|
||||||
|
|
||||||
bool sink_into_loops = instr->type != nir_instr_type_intrinsic;
|
|
||||||
/* Don't sink load_ubo out of loops because that can make its
|
/* Don't sink load_ubo out of loops because that can make its
|
||||||
* resource divergent and break code like that which is generated
|
* resource divergent and break code like that which is generated
|
||||||
* by nir_lower_non_uniform_access.
|
* by nir_lower_non_uniform_access.
|
||||||
|
@ -236,7 +224,7 @@ nir_opt_sink(nir_shader *shader, nir_move_options options)
|
||||||
instr->type != nir_instr_type_intrinsic ||
|
instr->type != nir_instr_type_intrinsic ||
|
||||||
nir_instr_as_intrinsic(instr)->intrinsic != nir_intrinsic_load_ubo;
|
nir_instr_as_intrinsic(instr)->intrinsic != nir_intrinsic_load_ubo;
|
||||||
nir_block *use_block =
|
nir_block *use_block =
|
||||||
get_preferred_block(def, sink_into_loops, sink_out_of_loops);
|
get_preferred_block(def, sink_out_of_loops);
|
||||||
|
|
||||||
if (!use_block || use_block == instr->block)
|
if (!use_block || use_block == instr->block)
|
||||||
continue;
|
continue;
|
||||||
|
|
Loading…
Reference in New Issue