nir: don't sink instructions into loops

Repeatedly loading constants or evaluating ALU operations in loops doesn't seem beneficial. This might increase the register pressure, but the tradeoff seems worth it. Totals from 13629 (9.77% of 139517) affected shaders (RAVEN): SGPRs: 1179481 -> 1184697 (+0.44%); split: -0.03%, +0.47% VGPRs: 978776 -> 978732 (-0.00%); split: -0.02%, +0.02% SpillSGPRs: 51036 -> 50943 (-0.18%); split: -1.35%, +1.17% CodeSize: 113775020 -> 113428812 (-0.30%); split: -0.34%, +0.04% MaxWaves: 49877 -> 49881 (+0.01%); split: +0.02%, -0.01% Instrs: 22295979 -> 22204936 (-0.41%); split: -0.42%, +0.02% Cycles: 1637198832 -> 1626916048 (-0.63%); split: -0.64%, +0.01% VMEM: 2403434 -> 2507645 (+4.34%); split: +4.76%, -0.42% SMEM: 849676 -> 834576 (-1.78%); split: +0.60%, -2.38% VClause: 412396 -> 398139 (-3.46%); split: -3.46%, +0.01% SClause: 810480 -> 817349 (+0.85%); split: -0.19%, +1.04% Copies: 2188260 -> 2166716 (-0.98%); split: -1.18%, +0.19% Branches: 761204 -> 760475 (-0.10%); split: -0.15%, +0.05% PreSGPRs: 972892 -> 981054 (+0.84%); split: -0.05%, +0.89% PreVGPRs: 925390 -> 925420 (+0.00%); split: -0.02%, +0.02% Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7694>
2020-11-19 12:21:17 +01:00 · 2020-11-19 12:21:17 +01:00 · 5f6c5e5b86
parent b02e15d1a3
commit 5f6c5e5b86
1 changed files with 8 additions and 20 deletions
--- a/src/compiler/nir/nir_opt_sink.c
+++ b/src/compiler/nir/nir_opt_sink.c
@ -131,7 +131,7 @@ adjust_block_for_loops(nir_block *use_block, nir_block *def_block,
 * the uses
 */
 static nir_block *
-get_preferred_block(nir_ssa_def *def, bool sink_into_loops, bool sink_out_of_loops)
+get_preferred_block(nir_ssa_def *def, bool sink_out_of_loops)
 {
   nir_block *lca = NULL;

@ -166,24 +166,13 @@ get_preferred_block(nir_ssa_def *def, bool sink_into_loops, bool sink_out_of_loo
      lca = nir_dominance_lca(lca, use_block);
   }

-   /* If we're moving a load_ubo or load_interpolated_input, we don't want to
-    * sink it down into loops, which may result in accessing memory or shared
-    * functions multiple times.  Sink it just above the start of the loop
-    * where it's used.  For load_consts, undefs, and comparisons, we expect
-    * the driver to be able to emit them as simple ALU ops, so sinking as far
-    * in as we can go is probably worth it for register pressure.
+   /* We don't sink any instructions into loops to avoid repeated executions
+    * This might occasionally increase register pressure, but seems overall
+    * the better choice.
    */
-   if (!sink_into_loops) {
-      lca = adjust_block_for_loops(lca, def->parent_instr->block,
-                                   sink_out_of_loops);
-      assert(nir_block_dominates(def->parent_instr->block, lca));
-   } else {
-      /* sink_into_loops = true and sink_out_of_loops = false isn't
-       * implemented yet because it's not used.
-       */
-      assert(sink_out_of_loops);
-   }
-
+   lca = adjust_block_for_loops(lca, def->parent_instr->block,
+                                sink_out_of_loops);
+   assert(nir_block_dominates(def->parent_instr->block, lca));

   return lca;
 }
@ -227,7 +216,6 @@ nir_opt_sink(nir_shader *shader, nir_move_options options)

            nir_ssa_def *def = nir_instr_ssa_def(instr);

-            bool sink_into_loops = instr->type != nir_instr_type_intrinsic;
            /* Don't sink load_ubo out of loops because that can make its
             * resource divergent and break code like that which is generated
             * by nir_lower_non_uniform_access.
@ -236,7 +224,7 @@ nir_opt_sink(nir_shader *shader, nir_move_options options)
               instr->type != nir_instr_type_intrinsic ||
               nir_instr_as_intrinsic(instr)->intrinsic != nir_intrinsic_load_ubo;
            nir_block *use_block =
-                  get_preferred_block(def, sink_into_loops, sink_out_of_loops);
+                  get_preferred_block(def, sink_out_of_loops);

            if (!use_block || use_block == instr->block)
               continue;