From 0d29184f6985b5e88c3a32526850acd7c8f3ab46 Mon Sep 17 00:00:00 2001
From: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Date: Thu, 5 Mar 2020 10:11:39 -0500
Subject: [PATCH] pan/bi: Lower and optimize NIR

Pretty much a copypaste from Midgard except where architectural
decisions diverge around vectorization. On that note, we will need our
own ALU scalarization pass at some point (or rather we'll need to extend
nir_lower_alu_scalar) to allow partial lowering for 8/16-bit ops. I.e.
we'll approximately need to lower

   vec4 16 ssa_2 = fadd ssa_0, ssa_1

to

   vec2 16 ssa_2 = fadd ssa_0.xy, ssa_1.xy
   vec2 16 ssa_3 = fadd ssa_0.zw, ssa_1.zw
   vec4 16 ssa_4 = vec4 ssa_2.x, ssa_2.y, ssa_3.x, ssa_4

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4097>
---
 src/panfrost/bifrost/bifrost_compile.c | 99 ++++++++++++++++++++++++++
 src/panfrost/bifrost/cmdline.c         |  2 -
 src/panfrost/bifrost/compiler.h        |  1 +
 3 files changed, 100 insertions(+), 2 deletions(-)

diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c
index 754876f89ee..87a447a9ab3 100644
--- a/src/panfrost/bifrost/bifrost_compile.c
+++ b/src/panfrost/bifrost/bifrost_compile.c
@@ -35,14 +35,113 @@
 #include "compiler.h"
 #include "bi_quirks.h"
 
+static int
+glsl_type_size(const struct glsl_type *type, bool bindless)
+{
+        return glsl_count_attribute_slots(type, false);
+}
+
+static void
+bi_optimize_nir(nir_shader *nir)
+{
+        bool progress;
+        unsigned lower_flrp = 16 | 32 | 64;
+
+        NIR_PASS(progress, nir, nir_lower_regs_to_ssa);
+        NIR_PASS(progress, nir, nir_lower_idiv, nir_lower_idiv_fast);
+
+        nir_lower_tex_options lower_tex_options = {
+                .lower_txs_lod = true,
+                .lower_txp = ~0,
+                .lower_tex_without_implicit_lod = true,
+                .lower_txd = true,
+        };
+
+        NIR_PASS(progress, nir, nir_lower_tex, &lower_tex_options);
+
+        do {
+                progress = false;
+
+                NIR_PASS(progress, nir, nir_lower_var_copies);
+                NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
+
+                NIR_PASS(progress, nir, nir_copy_prop);
+                NIR_PASS(progress, nir, nir_opt_remove_phis);
+                NIR_PASS(progress, nir, nir_opt_dce);
+                NIR_PASS(progress, nir, nir_opt_dead_cf);
+                NIR_PASS(progress, nir, nir_opt_cse);
+                NIR_PASS(progress, nir, nir_opt_peephole_select, 64, false, true);
+                NIR_PASS(progress, nir, nir_opt_algebraic);
+                NIR_PASS(progress, nir, nir_opt_constant_folding);
+
+                if (lower_flrp != 0) {
+                        bool lower_flrp_progress = false;
+                        NIR_PASS(lower_flrp_progress,
+                                 nir,
+                                 nir_lower_flrp,
+                                 lower_flrp,
+                                 false /* always_precise */,
+                                 nir->options->lower_ffma);
+                        if (lower_flrp_progress) {
+                                NIR_PASS(progress, nir,
+                                         nir_opt_constant_folding);
+                                progress = true;
+                        }
+
+                        /* Nothing should rematerialize any flrps, so we only
+                         * need to do this lowering once.
+                         */
+                        lower_flrp = 0;
+                }
+
+                NIR_PASS(progress, nir, nir_opt_undef);
+                NIR_PASS(progress, nir, nir_opt_loop_unroll,
+                         nir_var_shader_in |
+                         nir_var_shader_out |
+                         nir_var_function_temp);
+        } while (progress);
+
+        NIR_PASS(progress, nir, nir_opt_algebraic_late);
+
+        /* Take us out of SSA */
+        NIR_PASS(progress, nir, nir_lower_locals_to_regs);
+        NIR_PASS(progress, nir, nir_convert_from_ssa, true);
+}
+
 void
 bifrost_compile_shader_nir(nir_shader *nir, bifrost_program *program, unsigned product_id)
 {
         bi_context *ctx = rzalloc(NULL, bi_context);
         ctx->nir = nir;
+        ctx->stage = nir->info.stage;
         ctx->quirks = bifrost_get_quirks(product_id);
 
+        /* Lower gl_Position pre-optimisation, but after lowering vars to ssa
+         * (so we don't accidentally duplicate the epilogue since mesa/st has
+         * messed with our I/O quite a bit already) */
+
+        NIR_PASS_V(nir, nir_lower_vars_to_ssa);
+
+        if (ctx->stage == MESA_SHADER_VERTEX) {
+                NIR_PASS_V(nir, nir_lower_viewport_transform);
+                NIR_PASS_V(nir, nir_lower_point_size, 1.0, 1024.0);
+        }
+
+        NIR_PASS_V(nir, nir_split_var_copies);
+        NIR_PASS_V(nir, nir_lower_global_vars_to_local);
+        NIR_PASS_V(nir, nir_lower_var_copies);
+        NIR_PASS_V(nir, nir_lower_vars_to_ssa);
+        NIR_PASS_V(nir, nir_lower_io, nir_var_all, glsl_type_size, 0);
+        NIR_PASS_V(nir, nir_lower_ssbo);
+
+        /* We have to lower ALU to scalar ourselves since viewport
+         * transformations produce vector ops */
+        NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
+
+        bi_optimize_nir(nir);
         nir_print_shader(nir, stdout);
 
+        bi_print_shader(ctx, stdout);
+
         ralloc_free(ctx);
 }
diff --git a/src/panfrost/bifrost/cmdline.c b/src/panfrost/bifrost/cmdline.c
index bf55ded7df8..7658c7cc343 100644
--- a/src/panfrost/bifrost/cmdline.c
+++ b/src/panfrost/bifrost/cmdline.c
@@ -58,8 +58,6 @@ compile_shader(char **argv)
                 NIR_PASS_V(nir[i], nir_split_var_copies);
                 NIR_PASS_V(nir[i], nir_lower_var_copies);
 
-                NIR_PASS_V(nir[i], nir_lower_alu_to_scalar, NULL, NULL);
-
                 /* before buffers and vars_to_ssa */
                 NIR_PASS_V(nir[i], gl_nir_lower_images, true);
 
diff --git a/src/panfrost/bifrost/compiler.h b/src/panfrost/bifrost/compiler.h
index 74b9e2205c3..97147d0a723 100644
--- a/src/panfrost/bifrost/compiler.h
+++ b/src/panfrost/bifrost/compiler.h
@@ -317,6 +317,7 @@ typedef struct bi_block {
 
 typedef struct {
        nir_shader *nir;
+       gl_shader_stage stage;
        struct list_head blocks; /* list of bi_block */
        uint32_t quirks;
 } bi_context;