pan/mdg: Add support for multi sample iteration writeout

Some MSAA+fmt combination require writeout to be split. Right now, it only impacts blend shaders since we only support MSAA 4x, and the only formats that could exceed the 128bit/pixel limit in MSAA 4x are not supported by the fixed-function blend unit. We thus rely on the blend shader to split things properly. Things will change once we add MSAA 8x/16x to the mix, since even the blendable formats will exceed the 128b/pixel limit in that case. Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7984>
2020-12-08 11:38:50 +01:00 · 2020-12-08 11:38:50 +01:00 · e27052281a
parent 29f938a0ec
commit e27052281a
3 changed files with 61 additions and 27 deletions
--- a/src/panfrost/midgard/compiler.h
+++ b/src/panfrost/midgard/compiler.h
@ -233,6 +233,8 @@ enum midgard_rt_id {
        MIDGARD_NUM_RTS,
 };

+#define MIDGARD_MAX_SAMPLE_ITER 16
+
 typedef struct compiler_context {
        nir_shader *nir;
        gl_shader_stage stage;
@ -243,6 +245,9 @@ typedef struct compiler_context {
        /* Render target number for a keyed blend shader. Depends on is_blend */
        unsigned blend_rt;

+        /* Number of samples for a keyed blend shader. Depends on is_blend */
+        unsigned blend_sample_iterations;
+
        /* Index to precolour to r0 for an input blend colour */
        unsigned blend_input;

@ -313,7 +318,7 @@ typedef struct compiler_context {
        uint32_t quirks;

        /* Writeout instructions for each render target */
-        midgard_instruction *writeout_branch[MIDGARD_NUM_RTS];
+        midgard_instruction *writeout_branch[MIDGARD_NUM_RTS][MIDGARD_MAX_SAMPLE_ITER];

        struct panfrost_sysvals sysvals;
 } compiler_context;
--- a/src/panfrost/midgard/midgard_compile.c
+++ b/src/panfrost/midgard/midgard_compile.c
@ -1316,11 +1316,13 @@ compute_builtin_arg(nir_op op)
 }

 static void
-emit_fragment_store(compiler_context *ctx, unsigned src, unsigned src_z, unsigned src_s, enum midgard_rt_id rt)
+emit_fragment_store(compiler_context *ctx, unsigned src, unsigned src_z, unsigned src_s,
+                    enum midgard_rt_id rt, unsigned sample_iter)
 {
        assert(rt < ARRAY_SIZE(ctx->writeout_branch));
+        assert(sample_iter < ARRAY_SIZE(ctx->writeout_branch[0]));

-        midgard_instruction *br = ctx->writeout_branch[rt];
+        midgard_instruction *br = ctx->writeout_branch[rt][sample_iter];

        assert(!br);

@ -1336,7 +1338,12 @@ emit_fragment_store(compiler_context *ctx, unsigned src, unsigned src_z, unsigne
        /* Add dependencies */
        ins.src[0] = src;
        ins.src_types[0] = nir_type_uint32;
-        ins.constants.u32[0] = depth_only ? 0xFF : (rt - MIDGARD_COLOR_RT0) * 0x100;
+
+        if (depth_only)
+                ins.constants.u32[0] = 0xFF;
+        else
+                ins.constants.u32[0] = ((rt - MIDGARD_COLOR_RT0) << 8) | sample_iter;
+
        for (int i = 0; i < 4; ++i)
                ins.swizzle[0][i] = i;

@ -1356,7 +1363,7 @@ emit_fragment_store(compiler_context *ctx, unsigned src, unsigned src_z, unsigne
        /* Emit the branch */
        br = emit_mir_instruction(ctx, ins);
        schedule_barrier(ctx);
-        ctx->writeout_branch[rt] = br;
+        ctx->writeout_branch[rt][sample_iter] = br;

        /* Push our current location = current block count - 1 = where we'll
         * jump to. Maybe a bit too clever for my own good */
@ -1693,7 +1700,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
                                        reg_s = nir_src_index(ctx, &instr->src[3]);
                        }

-                        emit_fragment_store(ctx, reg, reg_z, reg_s, rt);
+                        emit_fragment_store(ctx, reg, reg_z, reg_s, rt, 0);
                } else if (ctx->stage == MESA_SHADER_VERTEX) {
                        assert(instr->intrinsic == nir_intrinsic_store_output);

@ -1757,7 +1764,8 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
        case nir_intrinsic_store_raw_output_pan:
                assert (ctx->stage == MESA_SHADER_FRAGMENT);
                reg = nir_src_index(ctx, &instr->src[0]);
-                emit_fragment_store(ctx, reg, ~0, ~0, ctx->blend_rt);
+                for (unsigned s = 0; s < ctx->blend_sample_iterations; s++)
+                        emit_fragment_store(ctx, reg, ~0, ~0, ctx->blend_rt, s);
                break;

        case nir_intrinsic_store_global:
@ -2451,10 +2459,10 @@ midgard_legalize_invert(compiler_context *ctx, midgard_block *block)
 }

 static unsigned
-emit_fragment_epilogue(compiler_context *ctx, unsigned rt)
+emit_fragment_epilogue(compiler_context *ctx, unsigned rt, unsigned sample_iter)
 {
        /* Loop to ourselves */
-        midgard_instruction *br = ctx->writeout_branch[rt];
+        midgard_instruction *br = ctx->writeout_branch[rt][sample_iter];
        struct midgard_instruction ins = v_branch(false, false);
        ins.writeout = br->writeout;
        ins.branch.target_block = ctx->block_count - 1;
@ -2683,27 +2691,38 @@ static void
 mir_add_writeout_loops(compiler_context *ctx)
 {
        for (unsigned rt = 0; rt < ARRAY_SIZE(ctx->writeout_branch); ++rt) {
-                midgard_instruction *br = ctx->writeout_branch[rt];
-                if (!br) continue;
+                for (unsigned s = 0; s < MIDGARD_MAX_SAMPLE_ITER; ++s) {
+                        midgard_instruction *br = ctx->writeout_branch[rt][s];
+                        if (!br) continue;

-                unsigned popped = br->branch.target_block;
-                pan_block_add_successor(&(mir_get_block(ctx, popped - 1)->base), &ctx->current_block->base);
-                br->branch.target_block = emit_fragment_epilogue(ctx, rt);
-                br->branch.target_type = TARGET_GOTO;
+                        unsigned popped = br->branch.target_block;
+                        pan_block_add_successor(&(mir_get_block(ctx, popped - 1)->base),
+                                                &ctx->current_block->base);
+                        br->branch.target_block = emit_fragment_epilogue(ctx, rt, s);
+                        br->branch.target_type = TARGET_GOTO;

-                /* If we have more RTs, we'll need to restore back after our
-                 * loop terminates */
+                        /* If we have more RTs, we'll need to restore back after our
+                         * loop terminates */
+                        midgard_instruction *next_br = NULL;

-                if ((rt + 1) < ARRAY_SIZE(ctx->writeout_branch) && ctx->writeout_branch[rt + 1]) {
-                        midgard_instruction uncond = v_branch(false, false);
-                        uncond.branch.target_block = popped;
-                        uncond.branch.target_type = TARGET_GOTO;
-                        emit_mir_instruction(ctx, uncond);
-                        pan_block_add_successor(&ctx->current_block->base, &(mir_get_block(ctx, popped)->base));
-                        schedule_barrier(ctx);
-                } else {
-                        /* We're last, so we can terminate here */
-                        br->last_writeout = true;
+                        if ((s + 1) < MIDGARD_MAX_SAMPLE_ITER)
+                                next_br = ctx->writeout_branch[rt][s + 1];
+
+                        if (!next_br && (rt + 1) < ARRAY_SIZE(ctx->writeout_branch))
+			        next_br = ctx->writeout_branch[rt + 1][0];
+
+                        if (next_br) {
+                                midgard_instruction uncond = v_branch(false, false);
+                                uncond.branch.target_block = popped;
+                                uncond.branch.target_type = TARGET_GOTO;
+                                emit_mir_instruction(ctx, uncond);
+                                pan_block_add_successor(&ctx->current_block->base,
+                                                        &(mir_get_block(ctx, popped)->base));
+                                schedule_barrier(ctx);
+                        } else {
+                                /* We're last, so we can terminate here */
+                                br->last_writeout = true;
+                        }
                }
        }
 }
@ -2725,6 +2744,15 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
        ctx->stage = nir->info.stage;
        ctx->is_blend = inputs->is_blend;
        ctx->blend_rt = MIDGARD_COLOR_RT0 + inputs->blend.rt;
+        if (inputs->is_blend) {
+                unsigned nr_samples = MAX2(inputs->blend.nr_samples, 1);
+                const struct util_format_description *desc =
+                        util_format_description(inputs->rt_formats[inputs->blend.rt]);
+
+                /* We have to split writeout in 128 bit chunks */
+                ctx->blend_sample_iterations =
+                        DIV_ROUND_UP(desc->block.bits * nr_samples, 128);
+        }
        memcpy(ctx->blend_constants, inputs->blend.constants, sizeof(ctx->blend_constants));
        ctx->blend_input = ~0;
        ctx->blend_src1 = ~0;
--- a/src/panfrost/util/pan_ir.h
+++ b/src/panfrost/util/pan_ir.h
@ -112,6 +112,7 @@ struct panfrost_compile_inputs {
        bool is_blend;
        struct {
                unsigned rt;
+                unsigned nr_samples;
                float constants[4];
                uint64_t bifrost_blend_desc;
        } blend;