pan/bi: Lower and optimize NIR

Pretty much a copypaste from Midgard except where architectural
decisions diverge around vectorization. On that note, we will need our
own ALU scalarization pass at some point (or rather we'll need to extend
nir_lower_alu_scalar) to allow partial lowering for 8/16-bit ops. I.e.
we'll approximately need to lower

   vec4 16 ssa_2 = fadd ssa_0, ssa_1

to

   vec2 16 ssa_2 = fadd ssa_0.xy, ssa_1.xy
   vec2 16 ssa_3 = fadd ssa_0.zw, ssa_1.zw
   vec4 16 ssa_4 = vec4 ssa_2.x, ssa_2.y, ssa_3.x, ssa_4

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4097>
This commit is contained in:
Alyssa Rosenzweig 2020-03-05 10:11:39 -05:00 committed by Marge Bot
parent c652ff8caa
commit 0d29184f69
3 changed files with 100 additions and 2 deletions

View File

@ -35,14 +35,113 @@
#include "compiler.h"
#include "bi_quirks.h"
static int
glsl_type_size(const struct glsl_type *type, bool bindless)
{
return glsl_count_attribute_slots(type, false);
}
static void
bi_optimize_nir(nir_shader *nir)
{
bool progress;
unsigned lower_flrp = 16 | 32 | 64;
NIR_PASS(progress, nir, nir_lower_regs_to_ssa);
NIR_PASS(progress, nir, nir_lower_idiv, nir_lower_idiv_fast);
nir_lower_tex_options lower_tex_options = {
.lower_txs_lod = true,
.lower_txp = ~0,
.lower_tex_without_implicit_lod = true,
.lower_txd = true,
};
NIR_PASS(progress, nir, nir_lower_tex, &lower_tex_options);
do {
progress = false;
NIR_PASS(progress, nir, nir_lower_var_copies);
NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
NIR_PASS(progress, nir, nir_copy_prop);
NIR_PASS(progress, nir, nir_opt_remove_phis);
NIR_PASS(progress, nir, nir_opt_dce);
NIR_PASS(progress, nir, nir_opt_dead_cf);
NIR_PASS(progress, nir, nir_opt_cse);
NIR_PASS(progress, nir, nir_opt_peephole_select, 64, false, true);
NIR_PASS(progress, nir, nir_opt_algebraic);
NIR_PASS(progress, nir, nir_opt_constant_folding);
if (lower_flrp != 0) {
bool lower_flrp_progress = false;
NIR_PASS(lower_flrp_progress,
nir,
nir_lower_flrp,
lower_flrp,
false /* always_precise */,
nir->options->lower_ffma);
if (lower_flrp_progress) {
NIR_PASS(progress, nir,
nir_opt_constant_folding);
progress = true;
}
/* Nothing should rematerialize any flrps, so we only
* need to do this lowering once.
*/
lower_flrp = 0;
}
NIR_PASS(progress, nir, nir_opt_undef);
NIR_PASS(progress, nir, nir_opt_loop_unroll,
nir_var_shader_in |
nir_var_shader_out |
nir_var_function_temp);
} while (progress);
NIR_PASS(progress, nir, nir_opt_algebraic_late);
/* Take us out of SSA */
NIR_PASS(progress, nir, nir_lower_locals_to_regs);
NIR_PASS(progress, nir, nir_convert_from_ssa, true);
}
void
bifrost_compile_shader_nir(nir_shader *nir, bifrost_program *program, unsigned product_id)
{
bi_context *ctx = rzalloc(NULL, bi_context);
ctx->nir = nir;
ctx->stage = nir->info.stage;
ctx->quirks = bifrost_get_quirks(product_id);
/* Lower gl_Position pre-optimisation, but after lowering vars to ssa
* (so we don't accidentally duplicate the epilogue since mesa/st has
* messed with our I/O quite a bit already) */
NIR_PASS_V(nir, nir_lower_vars_to_ssa);
if (ctx->stage == MESA_SHADER_VERTEX) {
NIR_PASS_V(nir, nir_lower_viewport_transform);
NIR_PASS_V(nir, nir_lower_point_size, 1.0, 1024.0);
}
NIR_PASS_V(nir, nir_split_var_copies);
NIR_PASS_V(nir, nir_lower_global_vars_to_local);
NIR_PASS_V(nir, nir_lower_var_copies);
NIR_PASS_V(nir, nir_lower_vars_to_ssa);
NIR_PASS_V(nir, nir_lower_io, nir_var_all, glsl_type_size, 0);
NIR_PASS_V(nir, nir_lower_ssbo);
/* We have to lower ALU to scalar ourselves since viewport
* transformations produce vector ops */
NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
bi_optimize_nir(nir);
nir_print_shader(nir, stdout);
bi_print_shader(ctx, stdout);
ralloc_free(ctx);
}

View File

@ -58,8 +58,6 @@ compile_shader(char **argv)
NIR_PASS_V(nir[i], nir_split_var_copies);
NIR_PASS_V(nir[i], nir_lower_var_copies);
NIR_PASS_V(nir[i], nir_lower_alu_to_scalar, NULL, NULL);
/* before buffers and vars_to_ssa */
NIR_PASS_V(nir[i], gl_nir_lower_images, true);

View File

@ -317,6 +317,7 @@ typedef struct bi_block {
typedef struct {
nir_shader *nir;
gl_shader_stage stage;
struct list_head blocks; /* list of bi_block */
uint32_t quirks;
} bi_context;