From 5f6c5e5b86f366c6fe0a0911fdc1926f1e0d1d5f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= <daniel@schuermann.dev>
Date: Thu, 19 Nov 2020 12:21:17 +0100
Subject: [PATCH] nir: don't sink instructions into loops

Repeatedly loading constants or evaluating ALU operations
in loops doesn't seem beneficial. This might increase the register
pressure, but the tradeoff seems worth it.

Totals from 13629 (9.77% of 139517) affected shaders (RAVEN):
SGPRs: 1179481 -> 1184697 (+0.44%); split: -0.03%, +0.47%
VGPRs: 978776 -> 978732 (-0.00%); split: -0.02%, +0.02%
SpillSGPRs: 51036 -> 50943 (-0.18%); split: -1.35%, +1.17%
CodeSize: 113775020 -> 113428812 (-0.30%); split: -0.34%, +0.04%
MaxWaves: 49877 -> 49881 (+0.01%); split: +0.02%, -0.01%
Instrs: 22295979 -> 22204936 (-0.41%); split: -0.42%, +0.02%
Cycles: 1637198832 -> 1626916048 (-0.63%); split: -0.64%, +0.01%
VMEM: 2403434 -> 2507645 (+4.34%); split: +4.76%, -0.42%
SMEM: 849676 -> 834576 (-1.78%); split: +0.60%, -2.38%
VClause: 412396 -> 398139 (-3.46%); split: -3.46%, +0.01%
SClause: 810480 -> 817349 (+0.85%); split: -0.19%, +1.04%
Copies: 2188260 -> 2166716 (-0.98%); split: -1.18%, +0.19%
Branches: 761204 -> 760475 (-0.10%); split: -0.15%, +0.05%
PreSGPRs: 972892 -> 981054 (+0.84%); split: -0.05%, +0.89%
PreVGPRs: 925390 -> 925420 (+0.00%); split: -0.02%, +0.02%

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7694>
---
 src/compiler/nir/nir_opt_sink.c | 28 ++++++++--------------------
 1 file changed, 8 insertions(+), 20 deletions(-)

diff --git a/src/compiler/nir/nir_opt_sink.c b/src/compiler/nir/nir_opt_sink.c
index 03feacfedec..25ce7aaf1fb 100644
--- a/src/compiler/nir/nir_opt_sink.c
+++ b/src/compiler/nir/nir_opt_sink.c
@@ -131,7 +131,7 @@ adjust_block_for_loops(nir_block *use_block, nir_block *def_block,
  * the uses
  */
 static nir_block *
-get_preferred_block(nir_ssa_def *def, bool sink_into_loops, bool sink_out_of_loops)
+get_preferred_block(nir_ssa_def *def, bool sink_out_of_loops)
 {
    nir_block *lca = NULL;
 
@@ -166,24 +166,13 @@ get_preferred_block(nir_ssa_def *def, bool sink_into_loops, bool sink_out_of_loo
       lca = nir_dominance_lca(lca, use_block);
    }
 
-   /* If we're moving a load_ubo or load_interpolated_input, we don't want to
-    * sink it down into loops, which may result in accessing memory or shared
-    * functions multiple times.  Sink it just above the start of the loop
-    * where it's used.  For load_consts, undefs, and comparisons, we expect
-    * the driver to be able to emit them as simple ALU ops, so sinking as far
-    * in as we can go is probably worth it for register pressure.
+   /* We don't sink any instructions into loops to avoid repeated executions
+    * This might occasionally increase register pressure, but seems overall
+    * the better choice.
     */
-   if (!sink_into_loops) {
-      lca = adjust_block_for_loops(lca, def->parent_instr->block,
-                                   sink_out_of_loops);
-      assert(nir_block_dominates(def->parent_instr->block, lca));
-   } else {
-      /* sink_into_loops = true and sink_out_of_loops = false isn't
-       * implemented yet because it's not used.
-       */
-      assert(sink_out_of_loops);
-   }
-
+   lca = adjust_block_for_loops(lca, def->parent_instr->block,
+                                sink_out_of_loops);
+   assert(nir_block_dominates(def->parent_instr->block, lca));
 
    return lca;
 }
@@ -227,7 +216,6 @@ nir_opt_sink(nir_shader *shader, nir_move_options options)
 
             nir_ssa_def *def = nir_instr_ssa_def(instr);
 
-            bool sink_into_loops = instr->type != nir_instr_type_intrinsic;
             /* Don't sink load_ubo out of loops because that can make its
              * resource divergent and break code like that which is generated
              * by nir_lower_non_uniform_access.
@@ -236,7 +224,7 @@ nir_opt_sink(nir_shader *shader, nir_move_options options)
                instr->type != nir_instr_type_intrinsic ||
                nir_instr_as_intrinsic(instr)->intrinsic != nir_intrinsic_load_ubo;
             nir_block *use_block =
-                  get_preferred_block(def, sink_into_loops, sink_out_of_loops);
+                  get_preferred_block(def, sink_out_of_loops);
 
             if (!use_block || use_block == instr->block)
                continue;