pan/bi: Lower and optimize NIR

Pretty much a copypaste from Midgard except where architectural decisions diverge around vectorization. On that note, we will need our own ALU scalarization pass at some point (or rather we'll need to extend nir_lower_alu_scalar) to allow partial lowering for 8/16-bit ops. I.e. we'll approximately need to lower vec4 16 ssa_2 = fadd ssa_0, ssa_1 to vec2 16 ssa_2 = fadd ssa_0.xy, ssa_1.xy vec2 16 ssa_3 = fadd ssa_0.zw, ssa_1.zw vec4 16 ssa_4 = vec4 ssa_2.x, ssa_2.y, ssa_3.x, ssa_4 Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4097>
2020-03-05 10:11:39 -05:00 · 2020-03-05 10:11:39 -05:00 · 0d29184f69
parent c652ff8caa
commit 0d29184f69
3 changed files with 100 additions and 2 deletions
--- a/src/panfrost/bifrost/bifrost_compile.c
+++ b/src/panfrost/bifrost/bifrost_compile.c
@ -35,14 +35,113 @@
 #include "compiler.h"
 #include "bi_quirks.h"

+static int
+glsl_type_size(const struct glsl_type *type, bool bindless)
+{
+        return glsl_count_attribute_slots(type, false);
+}
+
+static void
+bi_optimize_nir(nir_shader *nir)
+{
+        bool progress;
+        unsigned lower_flrp = 16 | 32 | 64;
+
+        NIR_PASS(progress, nir, nir_lower_regs_to_ssa);
+        NIR_PASS(progress, nir, nir_lower_idiv, nir_lower_idiv_fast);
+
+        nir_lower_tex_options lower_tex_options = {
+                .lower_txs_lod = true,
+                .lower_txp = ~0,
+                .lower_tex_without_implicit_lod = true,
+                .lower_txd = true,
+        };
+
+        NIR_PASS(progress, nir, nir_lower_tex, &lower_tex_options);
+
+        do {
+                progress = false;
+
+                NIR_PASS(progress, nir, nir_lower_var_copies);
+                NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
+
+                NIR_PASS(progress, nir, nir_copy_prop);
+                NIR_PASS(progress, nir, nir_opt_remove_phis);
+                NIR_PASS(progress, nir, nir_opt_dce);
+                NIR_PASS(progress, nir, nir_opt_dead_cf);
+                NIR_PASS(progress, nir, nir_opt_cse);
+                NIR_PASS(progress, nir, nir_opt_peephole_select, 64, false, true);
+                NIR_PASS(progress, nir, nir_opt_algebraic);
+                NIR_PASS(progress, nir, nir_opt_constant_folding);
+
+                if (lower_flrp != 0) {
+                        bool lower_flrp_progress = false;
+                        NIR_PASS(lower_flrp_progress,
+                                 nir,
+                                 nir_lower_flrp,
+                                 lower_flrp,
+                                 false /* always_precise */,
+                                 nir->options->lower_ffma);
+                        if (lower_flrp_progress) {
+                                NIR_PASS(progress, nir,
+                                         nir_opt_constant_folding);
+                                progress = true;
+                        }
+
+                        /* Nothing should rematerialize any flrps, so we only
+                         * need to do this lowering once.
+                         */
+                        lower_flrp = 0;
+                }
+
+                NIR_PASS(progress, nir, nir_opt_undef);
+                NIR_PASS(progress, nir, nir_opt_loop_unroll,
+                         nir_var_shader_in |
+                         nir_var_shader_out |
+                         nir_var_function_temp);
+        } while (progress);
+
+        NIR_PASS(progress, nir, nir_opt_algebraic_late);
+
+        /* Take us out of SSA */
+        NIR_PASS(progress, nir, nir_lower_locals_to_regs);
+        NIR_PASS(progress, nir, nir_convert_from_ssa, true);
+}
+
 void
 bifrost_compile_shader_nir(nir_shader *nir, bifrost_program *program, unsigned product_id)
 {
        bi_context *ctx = rzalloc(NULL, bi_context);
        ctx->nir = nir;
+        ctx->stage = nir->info.stage;
        ctx->quirks = bifrost_get_quirks(product_id);

+        /* Lower gl_Position pre-optimisation, but after lowering vars to ssa
+         * (so we don't accidentally duplicate the epilogue since mesa/st has
+         * messed with our I/O quite a bit already) */
+
+        NIR_PASS_V(nir, nir_lower_vars_to_ssa);
+
+        if (ctx->stage == MESA_SHADER_VERTEX) {
+                NIR_PASS_V(nir, nir_lower_viewport_transform);
+                NIR_PASS_V(nir, nir_lower_point_size, 1.0, 1024.0);
+        }
+
+        NIR_PASS_V(nir, nir_split_var_copies);
+        NIR_PASS_V(nir, nir_lower_global_vars_to_local);
+        NIR_PASS_V(nir, nir_lower_var_copies);
+        NIR_PASS_V(nir, nir_lower_vars_to_ssa);
+        NIR_PASS_V(nir, nir_lower_io, nir_var_all, glsl_type_size, 0);
+        NIR_PASS_V(nir, nir_lower_ssbo);
+
+        /* We have to lower ALU to scalar ourselves since viewport
+         * transformations produce vector ops */
+        NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
+
+        bi_optimize_nir(nir);
        nir_print_shader(nir, stdout);

+        bi_print_shader(ctx, stdout);
+
        ralloc_free(ctx);
 }
--- a/src/panfrost/bifrost/cmdline.c
+++ b/src/panfrost/bifrost/cmdline.c
@ -58,8 +58,6 @@ compile_shader(char **argv)
                NIR_PASS_V(nir[i], nir_split_var_copies);
                NIR_PASS_V(nir[i], nir_lower_var_copies);

-                NIR_PASS_V(nir[i], nir_lower_alu_to_scalar, NULL, NULL);
-
                /* before buffers and vars_to_ssa */
                NIR_PASS_V(nir[i], gl_nir_lower_images, true);

--- a/src/panfrost/bifrost/compiler.h
+++ b/src/panfrost/bifrost/compiler.h
@ -317,6 +317,7 @@ typedef struct bi_block {

 typedef struct {
       nir_shader *nir;
+       gl_shader_stage stage;
       struct list_head blocks; /* list of bi_block */
       uint32_t quirks;
 } bi_context;