diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index 754876f89ee..87a447a9ab3 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -35,14 +35,113 @@ #include "compiler.h" #include "bi_quirks.h" +static int +glsl_type_size(const struct glsl_type *type, bool bindless) +{ + return glsl_count_attribute_slots(type, false); +} + +static void +bi_optimize_nir(nir_shader *nir) +{ + bool progress; + unsigned lower_flrp = 16 | 32 | 64; + + NIR_PASS(progress, nir, nir_lower_regs_to_ssa); + NIR_PASS(progress, nir, nir_lower_idiv, nir_lower_idiv_fast); + + nir_lower_tex_options lower_tex_options = { + .lower_txs_lod = true, + .lower_txp = ~0, + .lower_tex_without_implicit_lod = true, + .lower_txd = true, + }; + + NIR_PASS(progress, nir, nir_lower_tex, &lower_tex_options); + + do { + progress = false; + + NIR_PASS(progress, nir, nir_lower_var_copies); + NIR_PASS(progress, nir, nir_lower_vars_to_ssa); + + NIR_PASS(progress, nir, nir_copy_prop); + NIR_PASS(progress, nir, nir_opt_remove_phis); + NIR_PASS(progress, nir, nir_opt_dce); + NIR_PASS(progress, nir, nir_opt_dead_cf); + NIR_PASS(progress, nir, nir_opt_cse); + NIR_PASS(progress, nir, nir_opt_peephole_select, 64, false, true); + NIR_PASS(progress, nir, nir_opt_algebraic); + NIR_PASS(progress, nir, nir_opt_constant_folding); + + if (lower_flrp != 0) { + bool lower_flrp_progress = false; + NIR_PASS(lower_flrp_progress, + nir, + nir_lower_flrp, + lower_flrp, + false /* always_precise */, + nir->options->lower_ffma); + if (lower_flrp_progress) { + NIR_PASS(progress, nir, + nir_opt_constant_folding); + progress = true; + } + + /* Nothing should rematerialize any flrps, so we only + * need to do this lowering once. + */ + lower_flrp = 0; + } + + NIR_PASS(progress, nir, nir_opt_undef); + NIR_PASS(progress, nir, nir_opt_loop_unroll, + nir_var_shader_in | + nir_var_shader_out | + nir_var_function_temp); + } while (progress); + + NIR_PASS(progress, nir, nir_opt_algebraic_late); + + /* Take us out of SSA */ + NIR_PASS(progress, nir, nir_lower_locals_to_regs); + NIR_PASS(progress, nir, nir_convert_from_ssa, true); +} + void bifrost_compile_shader_nir(nir_shader *nir, bifrost_program *program, unsigned product_id) { bi_context *ctx = rzalloc(NULL, bi_context); ctx->nir = nir; + ctx->stage = nir->info.stage; ctx->quirks = bifrost_get_quirks(product_id); + /* Lower gl_Position pre-optimisation, but after lowering vars to ssa + * (so we don't accidentally duplicate the epilogue since mesa/st has + * messed with our I/O quite a bit already) */ + + NIR_PASS_V(nir, nir_lower_vars_to_ssa); + + if (ctx->stage == MESA_SHADER_VERTEX) { + NIR_PASS_V(nir, nir_lower_viewport_transform); + NIR_PASS_V(nir, nir_lower_point_size, 1.0, 1024.0); + } + + NIR_PASS_V(nir, nir_split_var_copies); + NIR_PASS_V(nir, nir_lower_global_vars_to_local); + NIR_PASS_V(nir, nir_lower_var_copies); + NIR_PASS_V(nir, nir_lower_vars_to_ssa); + NIR_PASS_V(nir, nir_lower_io, nir_var_all, glsl_type_size, 0); + NIR_PASS_V(nir, nir_lower_ssbo); + + /* We have to lower ALU to scalar ourselves since viewport + * transformations produce vector ops */ + NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL); + + bi_optimize_nir(nir); nir_print_shader(nir, stdout); + bi_print_shader(ctx, stdout); + ralloc_free(ctx); } diff --git a/src/panfrost/bifrost/cmdline.c b/src/panfrost/bifrost/cmdline.c index bf55ded7df8..7658c7cc343 100644 --- a/src/panfrost/bifrost/cmdline.c +++ b/src/panfrost/bifrost/cmdline.c @@ -58,8 +58,6 @@ compile_shader(char **argv) NIR_PASS_V(nir[i], nir_split_var_copies); NIR_PASS_V(nir[i], nir_lower_var_copies); - NIR_PASS_V(nir[i], nir_lower_alu_to_scalar, NULL, NULL); - /* before buffers and vars_to_ssa */ NIR_PASS_V(nir[i], gl_nir_lower_images, true); diff --git a/src/panfrost/bifrost/compiler.h b/src/panfrost/bifrost/compiler.h index 74b9e2205c3..97147d0a723 100644 --- a/src/panfrost/bifrost/compiler.h +++ b/src/panfrost/bifrost/compiler.h @@ -317,6 +317,7 @@ typedef struct bi_block { typedef struct { nir_shader *nir; + gl_shader_stage stage; struct list_head blocks; /* list of bi_block */ uint32_t quirks; } bi_context;