panfrost: Fix uniform_count on Midgard

The compiler ABI specifies push uniforms at a 4-byte granularity (like Bifrost), but Midgard require a 16-byte granularity. As such if the number of pushed words is not a multiple of 4, there is a buffer overrun at shader load time. Ordinarily this is inaccessible so the garbage is ignored. However, there was a great deal of confusion around the `uniform_cutoff` variable. In some cases (such a full glmark2 run on a 64-bit processor), the push uniforms would be at the end of a BO and the overrun would cause a page fault. Remove uniform_cutoff entirely and work with count directly to avoid faulting, and round the count up to be defensive. Closes: #4289 Fixes: ed810eb0a0 ("panfrost: Don't truncate uniform_count") Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Tested-by: Robin Murphy <robin.murphy@arm.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9109>
2021-02-17 15:36:13 -05:00 · 2021-02-17 15:36:13 -05:00 · c6ed8bf77c
parent 0fc146e7da
commit c6ed8bf77c
6 changed files with 17 additions and 26 deletions
--- a/src/panfrost/lib/pan_shader.h
+++ b/src/panfrost/lib/pan_shader.h
@ -47,8 +47,10 @@ static inline void
 pan_shader_prepare_midgard_rsd(const struct pan_shader_info *info,
                               struct MALI_RENDERER_STATE *rsd)
 {
+        assert((info->push.count & 3) == 0);
+
        rsd->properties.uniform_buffer_count = info->ubo_count;
-        rsd->properties.midgard.uniform_count = info->midgard.uniform_cutoff;
+        rsd->properties.midgard.uniform_count = info->push.count / 4;
        rsd->properties.midgard.shader_has_side_effects = info->writes_global;
        rsd->properties.midgard.fp_mode = MALI_FP_MODE_GL_INF_NAN_ALLOWED;

--- a/src/panfrost/midgard/compiler.h
+++ b/src/panfrost/midgard/compiler.h
@ -289,9 +289,6 @@ typedef struct compiler_context {
        /* Set of NIR indices that were already emitted as outmods */
        BITSET_WORD *already_emitted;

-        /* The number of uniforms allowable for the fast path */
-        int uniform_cutoff;
-
        /* Count of instructions emitted from NIR overall, across all blocks */
        int instruction_count;

--- a/src/panfrost/midgard/midgard_compile.c
+++ b/src/panfrost/midgard/midgard_compile.c
@ -3008,11 +3008,6 @@ midgard_compile_shader_nir(nir_shader *nir,
        ctx->blend_src1 = ~0;
        ctx->quirks = midgard_get_quirks(inputs->gpu_id);

-        /* Start off with a safe cutoff, allowing usage of all 16 work
-         * registers. Later, we'll promote uniform reads to uniform registers
-         * if we determine it is beneficial to do so */
-        info->midgard.uniform_cutoff = 8;
-
        /* Initialize at a global (not block) level hash tables */

        ctx->ssa_constants = _mesa_hash_table_u64_create(NULL);
--- a/src/panfrost/midgard/midgard_ra.c
+++ b/src/panfrost/midgard/midgard_ra.c
@ -391,11 +391,11 @@ mir_is_64(midgard_instruction *ins)
 static struct lcra_state *
 allocate_registers(compiler_context *ctx, bool *spilled)
 {
-        /* The number of vec4 work registers available depends on when the
-         * uniforms start and the shader stage. By ABI we limit blend shaders
-         * to 8 registers, should be lower XXX */
-        int work_count = ctx->inputs->is_blend ? 8 :
-                16 - MAX2((ctx->info->midgard.uniform_cutoff - 8), 0);
+        /* The number of vec4 work registers available depends on the number of
+         * register-mapped uniforms and the shader stage. By ABI we limit blend
+         * shaders to 8 registers, should be lower XXX */
+        unsigned rmu = ctx->info->push.count / 4;
+        int work_count = ctx->inputs->is_blend ? 8 : 16 - MAX2(rmu - 8, 0);

       /* No register allocation to do with no SSA */

@ -959,15 +959,13 @@ mir_spill_register(
 static void
 mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff)
 {
-        unsigned old_work_count =
-                16 - MAX2((ctx->info->midgard.uniform_cutoff - 8), 0);
+        unsigned uniforms = ctx->info->push.count / 4;
+        unsigned old_work_count = 16 - MAX2(uniforms - 8, 0);
        unsigned work_count = 16 - MAX2((new_cutoff - 8), 0);

        unsigned min_demote = SSA_FIXED_REGISTER(old_work_count);
        unsigned max_demote = SSA_FIXED_REGISTER(work_count);

-        ctx->info->midgard.uniform_cutoff = new_cutoff;
-
        mir_foreach_block(ctx, _block) {
                midgard_block *block = (midgard_block *) _block;
                mir_foreach_instr_in_block(block, ins) {
@ -1002,6 +1000,8 @@ mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff)
                        }
                }
        }
+
+        ctx->info->push.count = MIN2(ctx->info->push.count, new_cutoff * 4);
 }

 /* Run register allocation in a loop, spilling until we succeed */
@ -1022,13 +1022,12 @@ mir_ra(compiler_context *ctx)
        do {
                if (spilled) {
                        signed spill_node = mir_choose_spill_node(ctx, l);
+                        unsigned uniforms = ctx->info->push.count / 4;

                        /* It's a lot cheaper to demote uniforms to get more
                         * work registers than to spill to TLS. */
-                        if (l->spill_class == REG_CLASS_WORK &&
-                            ctx->info->midgard.uniform_cutoff > 8) {
-
-                                mir_demote_uniforms(ctx, MAX2(ctx->info->midgard.uniform_cutoff - 4, 8));
+                        if (l->spill_class == REG_CLASS_WORK && uniforms > 8) {
+                                mir_demote_uniforms(ctx, MAX2(uniforms - 4, 8));
                        } else if (spill_node == -1) {
                                fprintf(stderr, "ERROR: Failed to choose spill node\n");
                                lcra_free(l);
--- a/src/panfrost/midgard/mir_promote_uniforms.c
+++ b/src/panfrost/midgard/mir_promote_uniforms.c
@ -263,7 +263,9 @@ midgard_promote_uniforms(compiler_context *ctx)
        unsigned work_count = mir_work_heuristic(ctx, &analysis);
        unsigned promoted_count = 24 - work_count;

+        /* Ensure we are 16 byte aligned to avoid underallocations */
        mir_pick_ubo(&ctx->info->push, &analysis, promoted_count);
+        ctx->info->push.count = ALIGN_POT(ctx->info->push.count, 4);

        /* First, figure out special indices a priori so we don't recompute a lot */
        BITSET_WORD *special = mir_special_indices(ctx);
@ -287,9 +289,6 @@ midgard_promote_uniforms(compiler_context *ctx)

                /* Should've taken into account when pushing */
                assert(address < promoted_count);
-
-                ctx->info->midgard.uniform_cutoff =
-                        MAX2(ctx->info->midgard.uniform_cutoff, address + 1);
                unsigned promoted = SSA_FIXED_REGISTER(uniform_reg);

                /* We do need the move for safety for a non-SSA dest, or if
--- a/src/panfrost/util/pan_ir.h
+++ b/src/panfrost/util/pan_ir.h
@ -145,7 +145,6 @@ struct bifrost_shader_info {
 };

 struct midgard_shader_info {
-        unsigned uniform_cutoff;
        unsigned first_tag;
 };