panfrost: Move the shader compilation logic out of the gallium driver

While at it, rework the code to avoid copies between intermediate structures: the pan_shader_info is passed to the compiler context so the compiler can fill shader information directly. Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8963>
2021-02-13 08:24:03 +01:00 · 2021-02-13 08:24:03 +01:00 · d5b1a33460
parent d18fc89066
commit d5b1a33460
23 changed files with 558 additions and 540 deletions
--- a/src/gallium/drivers/panfrost/pan_assemble.c
+++ b/src/gallium/drivers/panfrost/pan_assemble.c
@ -40,35 +40,30 @@
 #include "tgsi/tgsi_dump.h"

 static void
-pan_prepare_midgard_props(struct panfrost_shader_state *state,
-                          panfrost_program *program,
-                          gl_shader_stage stage)
+pan_prepare_midgard_props(struct panfrost_shader_state *state)
 {
        pan_prepare(&state->properties, RENDERER_PROPERTIES);
-        state->properties.uniform_buffer_count = state->ubo_count;
-        state->properties.midgard.uniform_count = program->uniform_cutoff;
-        state->properties.midgard.shader_has_side_effects = state->writes_global;
+        state->properties.uniform_buffer_count = state->info.ubo_count;
+        state->properties.midgard.uniform_count = state->info.midgard.uniform_cutoff;
+        state->properties.midgard.shader_has_side_effects = state->info.writes_global;
        state->properties.midgard.fp_mode = MALI_FP_MODE_GL_INF_NAN_ALLOWED;

        /* For fragment shaders, work register count, early-z, reads at draw-time */

-        if (stage != MESA_SHADER_FRAGMENT)
-                state->properties.midgard.work_register_count = state->work_reg_count;
+        if (state->info.stage != MESA_SHADER_FRAGMENT)
+                state->properties.midgard.work_register_count = state->info.work_reg_count;
 }

 static void
-pan_prepare_bifrost_props(struct panfrost_shader_state *state,
-                          panfrost_program *program,
-                          gl_shader_stage stage,
-                          shader_info *info)
+pan_prepare_bifrost_props(struct panfrost_shader_state *state)
 {
-        unsigned fau_count = DIV_ROUND_UP(program->push.count, 2);
+        unsigned fau_count = DIV_ROUND_UP(state->info.push.count, 2);

-        switch (stage) {
+        switch (state->info.stage) {
        case MESA_SHADER_VERTEX:
                pan_prepare(&state->properties, RENDERER_PROPERTIES);
                state->properties.bifrost.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
-                state->properties.uniform_buffer_count = state->ubo_count;
+                state->properties.uniform_buffer_count = state->info.ubo_count;

                pan_prepare(&state->preload, PRELOAD);
                state->preload.uniform_count = fau_count;
@ -78,39 +73,39 @@ pan_prepare_bifrost_props(struct panfrost_shader_state *state,
        case MESA_SHADER_FRAGMENT:
                pan_prepare(&state->properties, RENDERER_PROPERTIES);
                /* Early-Z set at draw-time */
-                if (state->writes_depth || state->writes_stencil) {
+                if (state->info.fs.writes_depth || state->info.fs.writes_stencil) {
                        state->properties.bifrost.zs_update_operation = MALI_PIXEL_KILL_FORCE_LATE;
                        state->properties.bifrost.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_LATE;
-                } else if (state->can_discard) {
+                } else if (state->info.fs.can_discard) {
                        state->properties.bifrost.zs_update_operation = MALI_PIXEL_KILL_FORCE_LATE;
                        state->properties.bifrost.pixel_kill_operation = MALI_PIXEL_KILL_WEAK_EARLY;
                } else {
                        state->properties.bifrost.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
                        state->properties.bifrost.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_EARLY;
                }
-                state->properties.uniform_buffer_count = state->ubo_count;
-                state->properties.bifrost.shader_modifies_coverage = state->can_discard;
-                state->properties.bifrost.shader_wait_dependency_6 = program->wait_6;
-                state->properties.bifrost.shader_wait_dependency_7 = program->wait_7;
+                state->properties.uniform_buffer_count = state->info.ubo_count;
+                state->properties.bifrost.shader_modifies_coverage = state->info.fs.can_discard;
+                state->properties.bifrost.shader_wait_dependency_6 = state->info.bifrost.wait_6;
+                state->properties.bifrost.shader_wait_dependency_7 = state->info.bifrost.wait_7;

                pan_prepare(&state->preload, PRELOAD);
                state->preload.uniform_count = fau_count;
-                state->preload.fragment.fragment_position = state->reads_frag_coord;
+                state->preload.fragment.fragment_position = state->info.fs.reads_frag_coord;
                state->preload.fragment.coverage = true;
-                state->preload.fragment.primitive_flags = state->reads_face;
+                state->preload.fragment.primitive_flags = state->info.fs.reads_face;

                /* Contains sample ID and sample mask. Sample position and
                 * helper invocation are expressed in terms of the above, so
                 * preload for those too */
                state->preload.fragment.sample_mask_id =
-                        BITSET_TEST(info->system_values_read, SYSTEM_VALUE_SAMPLE_ID) ||
-                        BITSET_TEST(info->system_values_read, SYSTEM_VALUE_SAMPLE_POS) ||
-                        BITSET_TEST(info->system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN) ||
-                        BITSET_TEST(info->system_values_read, SYSTEM_VALUE_HELPER_INVOCATION);
+                        state->info.fs.reads_sample_id |
+                        state->info.fs.reads_sample_pos |
+                        state->info.fs.reads_sample_mask_in |
+                        state->info.fs.reads_helper_invocation;
                break;
        case MESA_SHADER_COMPUTE:
                pan_prepare(&state->properties, RENDERER_PROPERTIES);
-                state->properties.uniform_buffer_count = state->ubo_count;
+                state->properties.uniform_buffer_count = state->info.ubo_count;

                pan_prepare(&state->preload, PRELOAD);
                state->preload.uniform_count = fau_count;
@ -152,112 +147,12 @@ pan_upload_shader_descriptor(struct panfrost_context *ctx,
        u_upload_unmap(ctx->state_uploader);
 }

-static unsigned
-pan_format_from_nir_base(nir_alu_type base)
-{
-        switch (base) {
-        case nir_type_int:
-                return MALI_FORMAT_SINT;
-        case nir_type_uint:
-        case nir_type_bool:
-                return MALI_FORMAT_UINT;
-        case nir_type_float:
-                return MALI_CHANNEL_FLOAT;
-        default:
-                unreachable("Invalid base");
-        }
-}
-
-static unsigned
-pan_format_from_nir_size(nir_alu_type base, unsigned size)
-{
-        if (base == nir_type_float) {
-                switch (size) {
-                case 16: return MALI_FORMAT_SINT;
-                case 32: return MALI_FORMAT_UNORM;
-                default:
-                        unreachable("Invalid float size for format");
-                }
-        } else {
-                switch (size) {
-                case 1:
-                case 8:  return MALI_CHANNEL_8;
-                case 16: return MALI_CHANNEL_16;
-                case 32: return MALI_CHANNEL_32;
-                default:
-                         unreachable("Invalid int size for format");
-                }
-        }
-}
-
-static enum mali_format
-pan_format_from_glsl(const struct glsl_type *type, unsigned precision, unsigned frac)
-{
-        const struct glsl_type *column = glsl_without_array_or_matrix(type);
-        enum glsl_base_type glsl_base = glsl_get_base_type(column);
-        nir_alu_type t = nir_get_nir_type_for_glsl_base_type(glsl_base);
-        unsigned chan = glsl_get_components(column);
-
-        /* If we have a fractional location added, we need to increase the size
-         * so it will fit, i.e. a vec3 in YZW requires us to allocate a vec4.
-         * We could do better but this is an edge case as it is, normally
-         * packed varyings will be aligned. */
-        chan += frac;
-
-        assert(chan >= 1 && chan <= 4);
-
-        unsigned base = nir_alu_type_get_base_type(t);
-        unsigned size = nir_alu_type_get_type_size(t);
-
-        /* Demote to fp16 where possible. int16 varyings are TODO as the hw
-         * will saturate instead of wrap which is not conformant, so we need to
-         * insert i2i16/u2u16 instructions before the st_vary_32i/32u to get
-         * the intended behaviour */
-
-        bool is_16 = (precision == GLSL_PRECISION_MEDIUM)
-                || (precision == GLSL_PRECISION_LOW);
-
-        if (is_16 && base == nir_type_float)
-                size = 16;
-        else
-                size = 32;
-
-        return pan_format_from_nir_base(base) |
-                pan_format_from_nir_size(base, size) |
-                MALI_NR_CHANNELS(chan);
-}
-
-static enum mali_bifrost_register_file_format
-bifrost_blend_type_from_nir(nir_alu_type nir_type)
-{
-        switch(nir_type) {
-        case 0: /* Render target not in use */
-                return 0;
-        case nir_type_float16:
-                return MALI_BIFROST_REGISTER_FILE_FORMAT_F16;
-        case nir_type_float32:
-                return MALI_BIFROST_REGISTER_FILE_FORMAT_F32;
-        case nir_type_int32:
-                return MALI_BIFROST_REGISTER_FILE_FORMAT_I32;
-        case nir_type_uint32:
-                return MALI_BIFROST_REGISTER_FILE_FORMAT_U32;
-        case nir_type_int16:
-                return MALI_BIFROST_REGISTER_FILE_FORMAT_I16;
-        case nir_type_uint16:
-                return MALI_BIFROST_REGISTER_FILE_FORMAT_U16;
-        default:
-                unreachable("Unsupported blend shader type for NIR alu type");
-                return 0;
-        }
-}
-
 void
 panfrost_shader_compile(struct panfrost_context *ctx,
                        enum pipe_shader_ir ir_type,
                        const void *ir,
                        gl_shader_stage stage,
-                        struct panfrost_shader_state *state,
-                        uint64_t *outputs_written)
+                        struct panfrost_shader_state *state)
 {
        struct panfrost_device *dev = pan_device(ctx->base.screen);

@ -280,169 +175,62 @@ panfrost_shader_compile(struct panfrost_context *ctx,

        memcpy(inputs.rt_formats, state->rt_formats, sizeof(inputs.rt_formats));

-        panfrost_program *program;
+        struct util_dynarray binary;

-        program = pan_shader_compile(dev, NULL, s, &inputs);
+        util_dynarray_init(&binary, NULL);
+        pan_shader_compile(dev, s, &inputs, &binary, &state->info);

        /* Prepare the compiled binary for upload */
        mali_ptr shader = 0;
-        unsigned attribute_count = 0, varying_count = 0;
-        int size = program->compiled.size;
+        int size = binary.size;

        if (size) {
                state->bo = panfrost_bo_create(dev, size, PAN_BO_EXECUTE);
-                memcpy(state->bo->ptr.cpu, program->compiled.data, size);
+                memcpy(state->bo->ptr.cpu, binary.data, size);
                shader = state->bo->ptr.gpu;
        }

        /* Midgard needs the first tag on the bottom nibble */

-        if (!pan_is_bifrost(dev)) {
-                /* If size = 0, we tag as "end-of-shader" */
-
-                if (size)
-                        shader |= program->first_tag;
-                else
-                        shader = 0x1;
-        }
-
-        state->sysval_count = program->sysval_count;
-        memcpy(state->sysval, program->sysvals, sizeof(state->sysval[0]) * state->sysval_count);
-        memcpy(&state->push, &program->push, sizeof(program->push));
-
-        bool vertex_id = BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_VERTEX_ID);
-        bool instance_id = BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID);
-
-        state->writes_global = s->info.writes_memory;
-
-        switch (stage) {
-        case MESA_SHADER_VERTEX:
-                attribute_count = util_bitcount64(s->info.inputs_read) +
-                                  util_bitcount(s->info.images_used);
-                varying_count = util_bitcount64(s->info.outputs_written);
-
-                if (vertex_id)
-                        attribute_count = MAX2(attribute_count, PAN_VERTEX_ID + 1);
-
-                if (instance_id)
-                        attribute_count = MAX2(attribute_count, PAN_INSTANCE_ID + 1);
-
-                break;
-        case MESA_SHADER_FRAGMENT:
-                for (unsigned i = 0; i < ARRAY_SIZE(state->blend_ret_addrs); i++) {
-                        if (!program->blend_ret_offsets[i])
-                                continue;
-
-                        state->blend_ret_addrs[i] = (state->bo->ptr.gpu & UINT32_MAX) +
-                                                    program->blend_ret_offsets[i];
-                        assert(!(state->blend_ret_addrs[i] & 0x7));
-                }
-                attribute_count = util_bitcount(s->info.images_used);
-                varying_count = util_bitcount64(s->info.inputs_read);
-                if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
-                        state->writes_depth = true;
-                if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL))
-                        state->writes_stencil = true;
-
-                uint64_t outputs_read = s->info.outputs_read;
-                if (outputs_read & BITFIELD64_BIT(FRAG_RESULT_COLOR))
-                        outputs_read |= BITFIELD64_BIT(FRAG_RESULT_DATA0);
-
-                state->outputs_read = outputs_read >> FRAG_RESULT_DATA0;
-
-                /* EXT_shader_framebuffer_fetch requires per-sample */
-                state->sample_shading = s->info.fs.uses_sample_shading ||
-                        outputs_read;
-
-                /* List of reasons we need to execute frag shaders when things
-                 * are masked off */
-
-                state->fs_sidefx =
-                        s->info.writes_memory ||
-                        s->info.fs.uses_discard ||
-                        s->info.fs.uses_demote;
-
-                state->can_discard = s->info.fs.uses_discard;
-                break;
-        case MESA_SHADER_COMPUTE:
-                attribute_count = util_bitcount(s->info.images_used);
-                state->shared_size = s->info.cs.shared_size;
-                break;
-        default:
-                unreachable("Unknown shader state");
-        }
-
-        state->stack_size = program->tls_size;
-        state->reads_frag_coord = (s->info.inputs_read & (1 << VARYING_SLOT_POS)) ||
-                                  BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_FRAG_COORD);
-        state->reads_point_coord = s->info.inputs_read & (1 << VARYING_SLOT_PNTC);
-        state->reads_face = (s->info.inputs_read & (1 << VARYING_SLOT_FACE)) ||
-                            BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_FRONT_FACE);
-        state->writes_point_size = s->info.outputs_written & (1 << VARYING_SLOT_PSIZ);
-
-        if (outputs_written)
-                *outputs_written = s->info.outputs_written;
-
-        state->work_reg_count = program->work_register_count;
-
-        if (pan_is_bifrost(dev))
-                for (unsigned i = 0; i < ARRAY_SIZE(state->blend_types); i++)
-                        state->blend_types[i] = bifrost_blend_type_from_nir(program->blend_types[i]);
-
-        /* Record the varying mapping for the command stream's bookkeeping */
-
-        nir_variable_mode varying_mode =
-                        stage == MESA_SHADER_VERTEX ? nir_var_shader_out : nir_var_shader_in;
-
-        nir_foreach_variable_with_modes(var, s, varying_mode) {
-                unsigned loc = var->data.driver_location;
-                unsigned sz = glsl_count_attribute_slots(var->type, FALSE);
-
-                for (int c = 0; c < sz; ++c) {
-                        state->varyings_loc[loc + c] = var->data.location + c;
-                        state->varyings[loc + c] = pan_format_from_glsl(var->type,
-                                        var->data.precision, var->data.location_frac);
-                }
-        }
-
-        /* Needed for linkage */
-        state->attribute_count = attribute_count;
-        state->varying_count = varying_count;
-
-        /* Sysvals have dedicated UBO */
-        state->ubo_count = s->info.num_ubos + (state->sysval_count ? 1 : 0);
+        if (!pan_is_bifrost(dev))
+                shader |= state->info.midgard.first_tag;

        /* Prepare the descriptors at compile-time */
        state->shader.shader = shader;
-        state->shader.attribute_count = attribute_count;
-        state->shader.varying_count = varying_count;
-        state->shader.texture_count = s->info.num_textures;
-        state->shader.sampler_count = s->info.num_textures;
+        state->shader.attribute_count = state->info.attribute_count;
+        state->shader.varying_count = state->info.varyings.input_count +
+                                      state->info.varyings.output_count;
+        state->shader.texture_count = state->info.texture_count;
+        state->shader.sampler_count = state->info.texture_count;

        if (pan_is_bifrost(dev))
-                pan_prepare_bifrost_props(state, program, stage, &s->info);
+                pan_prepare_bifrost_props(state);
        else
-                pan_prepare_midgard_props(state, program, stage);
+                pan_prepare_midgard_props(state);

        state->properties.shader_contains_barrier =
-                s->info.uses_memory_barrier |
-                s->info.uses_control_barrier;
+                state->info.contains_barrier;

        /* Ordering gaurantees are the same */
        if (stage == MESA_SHADER_FRAGMENT) {
                state->properties.shader_contains_barrier |=
-                       s->info.fs.needs_quad_helper_invocations;
+                        state->info.fs.helper_invocations;
+                state->properties.stencil_from_shader =
+                        state->info.fs.writes_stencil;
+                state->properties.depth_source =
+                        state->info.fs.writes_depth ?
+                        MALI_DEPTH_SOURCE_SHADER :
+                        MALI_DEPTH_SOURCE_FIXED_FUNCTION;
+        } else {
+                state->properties.depth_source =
+                        MALI_DEPTH_SOURCE_FIXED_FUNCTION;
        }

-        state->properties.stencil_from_shader = state->writes_stencil;
-        state->properties.depth_source = state->writes_depth ?
-                                         MALI_DEPTH_SOURCE_SHADER :
-                                         MALI_DEPTH_SOURCE_FIXED_FUNCTION;

        if (stage != MESA_SHADER_FRAGMENT)
                pan_upload_shader_descriptor(ctx, state);

-        ralloc_free(program);
+        util_dynarray_fini(&binary);

        /* In both clone and tgsi_to_nir paths, the shader is ralloc'd against
         * a NULL context */
--- a/src/gallium/drivers/panfrost/pan_blend_shaders.c
+++ b/src/gallium/drivers/panfrost/pan_blend_shaders.c
@ -295,21 +295,23 @@ panfrost_compile_blend_shader(struct panfrost_blend_shader *shader,
        if (constants)
                memcpy(inputs.blend.constants, constants, sizeof(inputs.blend.constants));

-        panfrost_program *program;
-
        if (pan_is_bifrost(dev)) {
                inputs.blend.bifrost_blend_desc =
                        bifrost_get_blend_desc(dev, shader->key.format, shader->key.rt);
        }

-        program = pan_shader_compile(dev, NULL, shader->nir, &inputs);
+        struct pan_shader_info info;
+        struct util_dynarray binary;
+
+        util_dynarray_init(&binary, NULL);
+        pan_shader_compile(dev, shader->nir, &inputs, &binary, &info);

        /* Allow us to patch later */
-        shader->first_tag = program->first_tag;
-        shader->size = program->compiled.size;
+        shader->first_tag = pan_is_bifrost(dev) ? 0 : info.midgard.first_tag;
+        shader->size = binary.size;
        shader->buffer = reralloc_size(shader, shader->buffer, shader->size);
-        memcpy(shader->buffer, program->compiled.data, shader->size);
-        shader->work_count = program->work_register_count;
+        memcpy(shader->buffer, binary.data, shader->size);
+        shader->work_count = info.work_reg_count;

-        ralloc_free(program);
+        util_dynarray_fini(&binary);
 }
--- a/src/gallium/drivers/panfrost/pan_cmdstream.c
+++ b/src/gallium/drivers/panfrost/pan_cmdstream.c
@ -241,7 +241,7 @@ panfrost_fs_required(
                unsigned rt_count)
 {
        /* If we generally have side effects */
-        if (fs->fs_sidefx)
+        if (fs->info.fs.sidefx)
                return true;

        /* If colour is written we need to execute */
@ -252,7 +252,31 @@ panfrost_fs_required(

        /* If depth is written and not implied we need to execute.
         * TODO: Predicate on Z/S writes being enabled */
-        return (fs->writes_depth || fs->writes_stencil);
+        return (fs->info.fs.writes_depth || fs->info.fs.writes_stencil);
+}
+
+static enum mali_bifrost_register_file_format
+bifrost_blend_type_from_nir(nir_alu_type nir_type)
+{
+        switch(nir_type) {
+        case 0: /* Render target not in use */
+                return 0;
+        case nir_type_float16:
+                return MALI_BIFROST_REGISTER_FILE_FORMAT_F16;
+        case nir_type_float32:
+                return MALI_BIFROST_REGISTER_FILE_FORMAT_F32;
+        case nir_type_int32:
+                return MALI_BIFROST_REGISTER_FILE_FORMAT_I32;
+        case nir_type_uint32:
+                return MALI_BIFROST_REGISTER_FILE_FORMAT_U32;
+        case nir_type_int16:
+                return MALI_BIFROST_REGISTER_FILE_FORMAT_I16;
+        case nir_type_uint16:
+                return MALI_BIFROST_REGISTER_FILE_FORMAT_U16;
+        default:
+                unreachable("Unsupported blend shader type for NIR alu type");
+                return 0;
+        }
 }

 static void
@ -292,8 +316,12 @@ panfrost_emit_bifrost_blend(struct panfrost_batch *batch,
                                assert((blend[i].shader.gpu & (0xffffffffull << 32)) ==
                                       (fs->bo->ptr.gpu & (0xffffffffull << 32)));
                                cfg.bifrost.internal.shader.pc = (u32)blend[i].shader.gpu;
-                                assert(!(fs->blend_ret_addrs[i] & 0x7));
-                                cfg.bifrost.internal.shader.return_value = fs->blend_ret_addrs[i];
+                                unsigned ret_offset = fs->info.bifrost.blend[i].return_offset;
+                                if (ret_offset) {
+                                        assert(!(ret_offset & 0x7));
+                                        cfg.bifrost.internal.shader.return_value =
+                                                fs->bo->ptr.gpu + ret_offset;
+                                }
                                cfg.bifrost.internal.mode = MALI_BIFROST_BLEND_MODE_SHADER;
                        } else {
                                enum pipe_format format = batch->key.cbufs[i]->format;
@ -324,7 +352,7 @@ panfrost_emit_bifrost_blend(struct panfrost_batch *batch,
                                cfg.bifrost.internal.fixed_function.conversion.memory_format =
                                        panfrost_format_to_bifrost_blend(dev, format_desc, true);
                                cfg.bifrost.internal.fixed_function.conversion.register_format =
-                                        fs->blend_types[i];
+                                        bifrost_blend_type_from_nir(fs->info.bifrost.blend[i].type);
                                cfg.bifrost.internal.fixed_function.rt = i;
                        }
                }
@ -412,7 +440,9 @@ panfrost_prepare_bifrost_fs_state(struct panfrost_context *ctx,

                state->properties = fs->properties;
                state->properties.bifrost.allow_forward_pixel_to_kill =
-                        !fs->can_discard && !fs->writes_depth && no_blend;
+                        !fs->info.fs.can_discard &&
+                        !fs->info.fs.writes_depth &&
+                        no_blend;
                state->shader = fs->shader;
                state->preload = fs->preload;
        }
@ -436,8 +466,8 @@ panfrost_prepare_midgard_fs_state(struct panfrost_context *ctx,
                state->properties.midgard.force_early_z = true;
        } else {
                /* Reasons to disable early-Z from a shader perspective */
-                bool late_z = fs->can_discard || fs->writes_global ||
-                              fs->writes_depth || fs->writes_stencil;
+                bool late_z = fs->info.fs.can_discard || fs->info.writes_global ||
+                              fs->info.fs.writes_depth || fs->info.fs.writes_stencil;

                /* If either depth or stencil is enabled, discard matters */
                bool zs_enabled =
@ -452,9 +482,9 @@ panfrost_prepare_midgard_fs_state(struct panfrost_context *ctx,
                /* TODO: Reduce this limit? */
                state->properties = fs->properties;
                if (has_blend_shader)
-                        state->properties.midgard.work_register_count = MAX2(fs->work_reg_count, 8);
+                        state->properties.midgard.work_register_count = MAX2(fs->info.work_reg_count, 8);
                else
-                        state->properties.midgard.work_register_count = fs->work_reg_count;
+                        state->properties.midgard.work_register_count = fs->info.work_reg_count;

                state->properties.midgard.force_early_z = !(late_z || alpha_to_coverage);

@ -463,8 +493,10 @@ panfrost_prepare_midgard_fs_state(struct panfrost_context *ctx,
                 * lying to the hardware about the discard and setting the
                 * reads tilebuffer? flag to compensate */
                state->properties.midgard.shader_reads_tilebuffer =
-                        fs->outputs_read || (!zs_enabled && fs->can_discard);
-                state->properties.midgard.shader_contains_discard = zs_enabled && fs->can_discard;
+                        fs->info.fs.outputs_read ||
+                        (!zs_enabled && fs->info.fs.can_discard);
+                state->properties.midgard.shader_contains_discard =
+                        zs_enabled && fs->info.fs.can_discard;
                state->shader = fs->shader;
        }

@ -528,7 +560,7 @@ panfrost_prepare_fs_state(struct panfrost_context *ctx,
        state->multisample_misc.sample_mask = (msaa ? ctx->sample_mask : ~0) & 0xFFFF;

        state->multisample_misc.evaluate_per_sample =
-                msaa && (ctx->min_samples > 1 || fs->sample_shading);
+                msaa && (ctx->min_samples > 1 || fs->info.fs.sample_shading);

        state->multisample_misc.depth_function = zsa->base.depth_enabled ?
                panfrost_translate_compare_func(zsa->base.depth_func) :
@ -930,8 +962,8 @@ panfrost_upload_sysvals(struct panfrost_batch *batch, void *buf,
 {
        struct sysval_uniform *uniforms = (void *)buf;

-        for (unsigned i = 0; i < ss->sysval_count; ++i) {
-                int sysval = ss->sysval[i];
+        for (unsigned i = 0; i < ss->info.sysvals.sysval_count; ++i) {
+                int sysval = ss->info.sysvals.sysvals[i];

                switch (PAN_SYSVAL_TYPE(sysval)) {
                case PAN_SYSVAL_VIEWPORT_SCALE:
@ -1023,7 +1055,7 @@ panfrost_emit_const_buf(struct panfrost_batch *batch,
        struct panfrost_shader_state *ss = &all->variants[all->active_variant];

        /* Allocate room for the sysval and the uniforms */
-        size_t sys_size = sizeof(float) * 4 * ss->sysval_count;
+        size_t sys_size = sizeof(float) * 4 * ss->info.sysvals.sysval_count;
        struct panfrost_ptr transfer =
                panfrost_pool_alloc_aligned(&batch->pool, sys_size, 16);

@ -1032,7 +1064,7 @@ panfrost_emit_const_buf(struct panfrost_batch *batch,

        /* Next up, attach UBOs. UBO count includes gaps but no sysval UBO */
        struct panfrost_shader_state *shader = panfrost_get_shader_state(ctx, stage);
-        unsigned ubo_count = shader->ubo_count - (sys_size ? 1 : 0);
+        unsigned ubo_count = shader->info.ubo_count - (sys_size ? 1 : 0);
        unsigned sysval_ubo = sys_size ? ubo_count : ~0;

        size_t sz = MALI_UNIFORM_BUFFER_LENGTH * (ubo_count + 1);
@ -1076,13 +1108,14 @@ panfrost_emit_const_buf(struct panfrost_batch *batch,

        /* Copy push constants required by the shader */
        struct panfrost_ptr push_transfer =
-                panfrost_pool_alloc_aligned(&batch->pool, ss->push.count * 4, 16);
+                panfrost_pool_alloc_aligned(&batch->pool,
+                                            ss->info.push.count * 4, 16);

        uint32_t *push_cpu = (uint32_t *) push_transfer.cpu;
        *push_constants = push_transfer.gpu;

-        for (unsigned i = 0; i < ss->push.count; ++i) {
-                struct panfrost_ubo_word src = ss->push.words[i];
+        for (unsigned i = 0; i < ss->info.push.count; ++i) {
+                struct panfrost_ubo_word src = ss->info.push.words[i];

                /* Map the UBO, this should be cheap. However this is reading
                 * from write-combine memory which is _very_ slow. It might pay
@ -1108,7 +1141,7 @@ panfrost_emit_shared_memory(struct panfrost_batch *batch,
        struct panfrost_device *dev = pan_device(ctx->base.screen);
        struct panfrost_shader_variants *all = ctx->shader[PIPE_SHADER_COMPUTE];
        struct panfrost_shader_state *ss = &all->variants[all->active_variant];
-        unsigned single_size = util_next_power_of_two(MAX2(ss->shared_size,
+        unsigned single_size = util_next_power_of_two(MAX2(ss->info.wls_size,
                                                           128));

        unsigned instances =
@ -1130,12 +1163,12 @@ panfrost_emit_shared_memory(struct panfrost_batch *batch,
                ls.wls_instances = instances;
                ls.wls_size_scale = util_logbase2(single_size) + 1;

-                if (ss->stack_size) {
+                if (ss->info.tls_size) {
                        unsigned shift =
-                                panfrost_get_stack_shift(ss->stack_size);
+                                panfrost_get_stack_shift(ss->info.tls_size);
                        struct panfrost_bo *bo =
                                panfrost_batch_get_scratchpad(batch,
-                                                              ss->stack_size,
+                                                              ss->info.tls_size,
                                                              dev->thread_tls_alloc,
                                                              dev->core_count);

@ -1366,7 +1399,7 @@ panfrost_emit_image_attribs(struct panfrost_batch *batch,
        struct panfrost_context *ctx = batch->ctx;
        struct panfrost_shader_state *shader = panfrost_get_shader_state(ctx, type);

-        if (!shader->attribute_count) {
+        if (!shader->info.attribute_count) {
                *buffers = 0;
                return 0;
        }
@ -1375,11 +1408,11 @@ panfrost_emit_image_attribs(struct panfrost_batch *batch,
        unsigned attrib_buf_size = MALI_ATTRIBUTE_BUFFER_LENGTH +
                                   MALI_ATTRIBUTE_BUFFER_CONTINUATION_3D_LENGTH;
        unsigned bytes_per_image_desc = MALI_ATTRIBUTE_LENGTH + attrib_buf_size;
-        unsigned attribs_offset = attrib_buf_size * shader->attribute_count;
+        unsigned attribs_offset = attrib_buf_size * shader->info.attribute_count;

        struct panfrost_ptr ptr =
                panfrost_pool_alloc_aligned(&batch->pool,
-                                            bytes_per_image_desc * shader->attribute_count,
+                                            bytes_per_image_desc * shader->info.attribute_count,
                                            util_next_power_of_two(bytes_per_image_desc));

        emit_image_attribs(batch, type, ptr.cpu + attribs_offset, ptr.cpu, 0);
@ -1404,7 +1437,7 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch,
         * Also, we allocate more memory than what's needed here if either instancing
         * is enabled or images are present, this can be improved. */
        unsigned bufs_per_attrib = (ctx->instance_count > 1 || nr_images > 0) ? 2 : 1;
-        unsigned nr_bufs = (vs->attribute_count * bufs_per_attrib) +
+        unsigned nr_bufs = (vs->info.attribute_count * bufs_per_attrib) +
                           (pan_is_bifrost(dev) ? 1 : 0);

        if (!nr_bufs) {
@ -1417,7 +1450,7 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch,
                        MALI_ATTRIBUTE_BUFFER_LENGTH * 2);

        struct panfrost_ptr T = panfrost_pool_alloc_aligned(&batch->pool,
-                        MALI_ATTRIBUTE_LENGTH * vs->attribute_count,
+                        MALI_ATTRIBUTE_LENGTH * vs->info.attribute_count,
                        MALI_ATTRIBUTE_LENGTH);

        struct mali_attribute_buffer_packed *bufs =
@ -1525,7 +1558,7 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch,

        /* Add special gl_VertexID/gl_InstanceID buffers */

-        if (unlikely(vs->attribute_count >= PAN_VERTEX_ID)) {
+        if (unlikely(vs->info.attribute_count >= PAN_VERTEX_ID)) {
                panfrost_vertex_id(ctx->padded_count, &bufs[k], ctx->instance_count > 1);

                pan_pack(out + PAN_VERTEX_ID, ATTRIBUTE, cfg) {
@ -1742,22 +1775,22 @@ pan_varying_present(const struct panfrost_device *dev,

        /* Enable special buffers by the shader info */

-        if (vs->writes_point_size)
+        if (vs->info.vs.writes_point_size)
                present |= (1 << PAN_VARY_PSIZ);

-        if (fs->reads_point_coord)
+        if (fs->info.fs.reads_point_coord)
                present |= (1 << PAN_VARY_PNTCOORD);

-        if (fs->reads_face)
+        if (fs->info.fs.reads_face)
                present |= (1 << PAN_VARY_FACE);

-        if (fs->reads_frag_coord && !pan_is_bifrost(dev))
+        if (fs->info.fs.reads_frag_coord && !pan_is_bifrost(dev))
                present |= (1 << PAN_VARY_FRAGCOORD);

        /* Also, if we have a point sprite, we need a point coord buffer */

-        for (unsigned i = 0; i < fs->varying_count; i++)  {
-                gl_varying_slot loc = fs->varyings_loc[i];
+        for (unsigned i = 0; i < fs->info.varyings.input_count; i++)  {
+                gl_varying_slot loc = fs->info.varyings.input[i].location;

                if (util_varying_is_point_coord(loc, point_coord_mask))
                        present |= (1 << PAN_VARY_PNTCOORD);
@ -1886,10 +1919,18 @@ pan_emit_general_varying(const struct panfrost_device *dev,
                         bool should_alloc)
 {
        /* Check if we're linked */
+        unsigned other_varying_count =
+                other->info.stage == MESA_SHADER_FRAGMENT ?
+                other->info.varyings.input_count :
+                other->info.varyings.output_count;
+        const struct pan_shader_varying *other_varyings =
+                other->info.stage == MESA_SHADER_FRAGMENT ?
+                other->info.varyings.input :
+                other->info.varyings.output;
        signed other_idx = -1;

-        for (unsigned j = 0; j < other->varying_count; ++j) {
-                if (other->varyings_loc[j] == loc) {
+        for (unsigned j = 0; j < other_varying_count; ++j) {
+                if (other_varyings[j].location == loc) {
                        other_idx = j;
                        break;
                }
@ -1904,7 +1945,8 @@ pan_emit_general_varying(const struct panfrost_device *dev,

        if (should_alloc) {
                /* We're linked, so allocate a space via a watermark allocation */
-                enum mali_format alt = other->varyings[other_idx];
+                enum mali_format alt =
+                        dev->formats[other_varyings[other_idx].format].hw >> 12;

                /* Do interpolation at minimum precision */
                unsigned size_main = pan_varying_size(format);
@ -1953,8 +1995,14 @@ panfrost_emit_varying(const struct panfrost_device *dev,
                      bool should_alloc,
                      bool is_fragment)
 {
-        gl_varying_slot loc = stage->varyings_loc[idx];
-        enum mali_format format = stage->varyings[idx];
+        gl_varying_slot loc =
+                stage->info.stage == MESA_SHADER_FRAGMENT ?
+                stage->info.varyings.input[idx].location :
+                stage->info.varyings.output[idx].location;
+        enum mali_format format =
+                stage->info.stage == MESA_SHADER_FRAGMENT ?
+                dev->formats[stage->info.varyings.input[idx].format].hw >> 12 :
+                dev->formats[stage->info.varyings.output[idx].format].hw >> 12;

        /* Override format to match linkage */
        if (!should_alloc && gen_formats[idx])
@ -2018,8 +2066,8 @@ panfrost_emit_varying_descriptor(struct panfrost_batch *batch,

        vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
        fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
-        vs_size = MALI_ATTRIBUTE_LENGTH * vs->varying_count;
-        fs_size = MALI_ATTRIBUTE_LENGTH * fs->varying_count;
+        vs_size = MALI_ATTRIBUTE_LENGTH * vs->info.varyings.output_count;
+        fs_size = MALI_ATTRIBUTE_LENGTH * fs->info.varyings.input_count;

        struct panfrost_ptr trans = panfrost_pool_alloc_aligned(
                        &batch->pool, vs_size + fs_size, MALI_ATTRIBUTE_LENGTH);
@ -2044,8 +2092,8 @@ panfrost_emit_varying_descriptor(struct panfrost_batch *batch,
        memset(gen_formats, 0, sizeof(gen_formats));

        unsigned gen_stride = 0;
-        assert(vs->varying_count < ARRAY_SIZE(gen_offsets));
-        assert(fs->varying_count < ARRAY_SIZE(gen_offsets));
+        assert(vs->info.varyings.output_count < ARRAY_SIZE(gen_offsets));
+        assert(fs->info.varyings.input_count < ARRAY_SIZE(gen_offsets));

        unsigned streamout_offsets[32];

@ -2056,16 +2104,16 @@ panfrost_emit_varying_descriptor(struct panfrost_batch *batch,
        }

        struct mali_attribute_packed *ovs = (struct mali_attribute_packed *)trans.cpu;
-        struct mali_attribute_packed *ofs = ovs + vs->varying_count;
+        struct mali_attribute_packed *ofs = ovs + vs->info.varyings.output_count;

-        for (unsigned i = 0; i < vs->varying_count; i++) {
+        for (unsigned i = 0; i < vs->info.varyings.output_count; i++) {
                panfrost_emit_varying(dev, ovs + i, vs, fs, vs, present, 0,
                                      ctx->streamout.num_targets, streamout_offsets,
                                      gen_offsets, gen_formats, &gen_stride, i,
                                      true, false);
        }

-        for (unsigned i = 0; i < fs->varying_count; i++) {
+        for (unsigned i = 0; i < fs->info.varyings.input_count; i++) {
                panfrost_emit_varying(dev, ofs + i, fs, vs, vs, present, point_coord_mask,
                                      ctx->streamout.num_targets, streamout_offsets,
                                      gen_offsets, gen_formats, &gen_stride, i,
@ -2114,8 +2162,8 @@ panfrost_emit_varying_descriptor(struct panfrost_batch *batch,
        pan_emit_special_input(varyings, present, PAN_VARY_FRAGCOORD, MALI_ATTRIBUTE_SPECIAL_FRAG_COORD);

        *buffers = T.gpu;
-        *vs_attribs = vs->varying_count ? trans.gpu : 0;
-        *fs_attribs = fs->varying_count ? trans.gpu + vs_size : 0;
+        *vs_attribs = vs->info.varyings.output_count ? trans.gpu : 0;
+        *fs_attribs = fs->info.varyings.input_count ? trans.gpu + vs_size : 0;
 }

 void
--- a/src/gallium/drivers/panfrost/pan_compute.c
+++ b/src/gallium/drivers/panfrost/pan_compute.c
@ -71,7 +71,7 @@ panfrost_create_compute_state(
        }

        panfrost_shader_compile(ctx, so->cbase.ir_type, so->cbase.prog,
-                                MESA_SHADER_COMPUTE, v, NULL);
+                                MESA_SHADER_COMPUTE, v);

        return so;
 }
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@ -150,7 +150,7 @@ panfrost_writes_point_size(struct panfrost_context *ctx)
        assert(ctx->shader[PIPE_SHADER_VERTEX]);
        struct panfrost_shader_state *vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);

-        return vs->writes_point_size && ctx->active_prim == PIPE_PRIM_POINTS;
+        return vs->info.vs.writes_point_size && ctx->active_prim == PIPE_PRIM_POINTS;
 }

 /* The entire frame is in memory -- send it off to the kernel! */
@ -739,12 +739,11 @@ panfrost_create_shader_state(
                struct panfrost_context *ctx = pan_context(pctx);

                struct panfrost_shader_state state = { 0 };
-                uint64_t outputs_written;

                panfrost_shader_compile(ctx, PIPE_SHADER_IR_NIR,
                                        so->base.ir.nir,
                                        tgsi_processor_to_shader_stage(stage),
-                                        &state, &outputs_written);
+                                        &state);
        }

        return so;
@ -821,11 +820,12 @@ panfrost_variant_matches(
 {
        struct panfrost_device *dev = pan_device(ctx->base.screen);

-        if (variant->outputs_read) {
+        if (variant->info.stage == MESA_SHADER_FRAGMENT &&
+            variant->info.fs.outputs_read) {
                struct pipe_framebuffer_state *fb = &ctx->pipe_framebuffer;

                unsigned i;
-                BITSET_FOREACH_SET(i, &variant->outputs_read, 8) {
+                BITSET_FOREACH_SET(i, &variant->info.fs.outputs_read, 8) {
                        enum pipe_format fmt = PIPE_FORMAT_R8G8B8A8_UNORM;

                        if ((fb->nr_cbufs > i) && fb->cbufs[i])
@ -963,15 +963,12 @@ panfrost_bind_shader_state(
        /* We finally have a variant, so compile it */

        if (!shader_state->compiled) {
-                uint64_t outputs_written = 0;
-
                panfrost_shader_compile(ctx, variants->base.type,
                                        variants->base.type == PIPE_SHADER_IR_NIR ?
                                        variants->base.ir.nir :
                                        variants->base.tokens,
                                        tgsi_processor_to_shader_stage(type),
-                                        shader_state,
-                                        &outputs_written);
+                                        shader_state);

                shader_state->compiled = true;

@ -980,7 +977,8 @@ panfrost_bind_shader_state(

                shader_state->stream_output = variants->base.stream_output;
                shader_state->so_mask =
-                        update_so_info(&shader_state->stream_output, outputs_written);
+                        update_so_info(&shader_state->stream_output,
+                                       shader_state->info.outputs_written);
        }
 }

@ -1251,7 +1249,8 @@ panfrost_set_framebuffer_state(struct pipe_context *pctx,
         * keyed to the framebuffer format (due to EXT_framebuffer_fetch) */
        struct panfrost_shader_variants *fs = ctx->shader[PIPE_SHADER_FRAGMENT];

-        if (fs && fs->variant_count && fs->variants[fs->active_variant].outputs_read)
+        if (fs && fs->variant_count &&
+            fs->variants[fs->active_variant].info.fs.outputs_read)
                ctx->base.bind_fs_state(&ctx->base, fs);
 }

--- a/src/gallium/drivers/panfrost/pan_context.h
+++ b/src/gallium/drivers/panfrost/pan_context.h
@ -214,46 +214,15 @@ struct panfrost_shader_state {
        struct MALI_RENDERER_PROPERTIES properties;
        struct MALI_PRELOAD preload;

-        /* Non-descript information */
-        unsigned work_reg_count;
-        bool sample_shading;
-        bool can_discard;
-        bool writes_point_size;
-        bool writes_depth;
-        bool writes_stencil;
-        bool reads_point_coord;
-        bool reads_face;
-        bool reads_frag_coord;
-        bool writes_global;
-        unsigned stack_size;
-        unsigned shared_size;
+        struct pan_shader_info info;

-        /* Does the fragment shader have side effects? In particular, if output
-         * is masked out, is it legal to skip shader execution? */
-        bool fs_sidefx;
-
-        /* For Bifrost - output type for each RT */
-        enum mali_bifrost_register_file_format blend_types[MALI_BIFROST_BLEND_MAX_RT];
-
-        unsigned attribute_count, varying_count, ubo_count;
-        enum mali_format varyings[PIPE_MAX_ATTRIBS];
-        gl_varying_slot varyings_loc[PIPE_MAX_ATTRIBS];
        struct pipe_stream_output_info stream_output;
        uint64_t so_mask;

-        unsigned sysval_count;
-        unsigned sysval[MAX_SYSVAL_COUNT];
-
-        struct panfrost_ubo_push push;
-
        /* GPU-executable memory */
        struct panfrost_bo *bo;

-        BITSET_WORD outputs_read;
        enum pipe_format rt_formats[8];
-
-        /* Blend return addresses */
-        uint32_t blend_ret_addrs[8];
 };

 /* A collection of varyings (the CSO) */
@ -374,8 +343,7 @@ panfrost_shader_compile(struct panfrost_context *ctx,
                        enum pipe_shader_ir ir_type,
                        const void *ir,
                        gl_shader_stage stage,
-                        struct panfrost_shader_state *state,
-                        uint64_t *outputs_written);
+                        struct panfrost_shader_state *state);

 void
 panfrost_create_sampler_view_bo(struct panfrost_sampler_view *so,
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@ -1238,7 +1238,7 @@ panfrost_batch_adjust_stack_size(struct panfrost_batch *batch)
                if (!ss)
                        continue;

-                batch->stack_size = MAX2(batch->stack_size, ss->stack_size);
+                batch->stack_size = MAX2(batch->stack_size, ss->info.tls_size);
        }
 }

--- a/src/panfrost/bifrost/bi_opt_push_ubo.c
+++ b/src/panfrost/bifrost/bi_opt_push_ubo.c
@ -119,10 +119,10 @@ void
 bi_opt_push_ubo(bi_context *ctx)
 {
        /* This pass only runs once */
-        assert(ctx->push->count == 0);
+        assert(ctx->info->push.count == 0);

        struct bi_ubo_analysis analysis = bi_analyze_ranges(ctx);
-        bi_pick_ubo(ctx->push, &analysis);
+        bi_pick_ubo(&ctx->info->push, &analysis);

        bi_foreach_instr_global_safe(ctx, ins) {
                if (!bi_is_direct_aligned_ubo(ins)) continue;
@ -141,8 +141,9 @@ bi_opt_push_ubo(bi_context *ctx)

                for (unsigned w = 0; w < channels; ++w) {
                        /* FAU is grouped in pairs (2 x 4-byte) */
-                        unsigned base = pan_lookup_pushed_ubo(ctx->push, ubo,
-                                        (offset + 4 * w));
+                        unsigned base =
+                                pan_lookup_pushed_ubo(&ctx->info->push, ubo,
+                                                      (offset + 4 * w));

                        unsigned fau_idx = (base >> 1);
                        unsigned fau_hi = (base & 1);
--- a/src/panfrost/bifrost/bi_pack.c
+++ b/src/panfrost/bifrost/bi_pack.c
@ -713,11 +713,11 @@ bi_collect_blend_ret_addr(bi_context *ctx, struct util_dynarray *emission,


        unsigned loc = tuple->regs.fau_idx - BIR_FAU_BLEND_0;
-        assert(loc < ARRAY_SIZE(ctx->blend_ret_offsets));
-        assert(!ctx->blend_ret_offsets[loc]);
-        ctx->blend_ret_offsets[loc] =
+        assert(loc < ARRAY_SIZE(ctx->info->bifrost.blend));
+        assert(!ctx->info->bifrost.blend[loc].return_offset);
+        ctx->info->bifrost.blend[loc].return_offset =
                util_dynarray_num_elements(emission, uint8_t);
-        assert(!(ctx->blend_ret_offsets[loc] & 0x7));
+        assert(!(ctx->info->bifrost.blend[loc].return_offset & 0x7));
 }

 unsigned
--- a/src/panfrost/bifrost/bi_ra.c
+++ b/src/panfrost/bifrost/bi_ra.c
@ -388,7 +388,7 @@ bi_register_allocate(bi_context *ctx)
        unsigned iter_count = 1000; /* max iterations */

        /* Number of bytes of memory we've spilled into */
-        unsigned spill_count = ctx->tls_size;
+        unsigned spill_count = ctx->info->tls_size;

        do {
                if (l) {
@ -410,7 +410,7 @@ bi_register_allocate(bi_context *ctx)

        assert(success);

-        ctx->tls_size = spill_count;
+        ctx->info->tls_size = spill_count;
        bi_install_registers(ctx, l);

        lcra_free(l);
--- a/src/panfrost/bifrost/bifrost_compile.c
+++ b/src/panfrost/bifrost/bifrost_compile.c
@ -297,7 +297,8 @@ bi_load_sysval_to(bi_builder *b, bi_index dest, int sysval,
                unsigned nr_components, unsigned offset)
 {
        unsigned uniform =
-                pan_lookup_sysval(b->shader->sysval_to_id, &b->shader->sysvals,
+                pan_lookup_sysval(b->shader->sysval_to_id,
+                                  &b->shader->info->sysvals,
                                  sysval);
        unsigned idx = (uniform * 16) + offset;

@ -368,8 +369,7 @@ bi_emit_blend_op(bi_builder *b, bi_index rgba, nir_alu_type T, unsigned rt)
        }

        assert(rt < 8);
-        assert(b->shader->blend_types);
-        b->shader->blend_types[rt] = T;
+        b->shader->info->bifrost.blend[rt].type = T;
 }

 /* Blend shaders do not need to run ATEST since they are dependent on a
@ -2511,23 +2511,23 @@ bi_lower_branch(bi_block *block)
        }
 }

-panfrost_program *
-bifrost_compile_shader_nir(void *mem_ctx, nir_shader *nir,
-                           const struct panfrost_compile_inputs *inputs)
+void
+bifrost_compile_shader_nir(nir_shader *nir,
+                           const struct panfrost_compile_inputs *inputs,
+                           struct util_dynarray *binary,
+                           struct pan_shader_info *info)
 {
-        panfrost_program *program = rzalloc(mem_ctx, panfrost_program);
-
        bifrost_debug = debug_get_option_bifrost_debug();

        bi_context *ctx = rzalloc(NULL, bi_context);
-        ctx->sysval_to_id = panfrost_init_sysvals(&ctx->sysvals, ctx);
+        ctx->sysval_to_id = panfrost_init_sysvals(&info->sysvals, ctx);

        ctx->inputs = inputs;
        ctx->nir = nir;
+        ctx->info = info;
        ctx->stage = nir->info.stage;
        ctx->quirks = bifrost_get_quirks(inputs->gpu_id);
        ctx->arch = inputs->gpu_id >> 12;
-        ctx->push = &program->push;
        list_inithead(&ctx->blocks);

        /* Lower gl_Position pre-optimisation, but after lowering vars to ssa
@ -2565,8 +2565,7 @@ bifrost_compile_shader_nir(void *mem_ctx, nir_shader *nir,
                nir_print_shader(nir, stdout);
        }

-        ctx->blend_types = program->blend_types;
-        ctx->tls_size = nir->scratch_size;
+        info->tls_size = nir->scratch_size;

        nir_foreach_function(func, nir) {
                if (!func->impl)
@ -2614,8 +2613,7 @@ bifrost_compile_shader_nir(void *mem_ctx, nir_shader *nir,
        if (bifrost_debug & BIFROST_DBG_SHADERS && !skip_internal)
                bi_print_shader(ctx, stdout);

-        util_dynarray_init(&program->compiled, NULL);
-        unsigned final_clause = bi_pack(ctx, &program->compiled);
+        unsigned final_clause = bi_pack(ctx, binary);

        /* If we need to wait for ATEST or BLEND in the first clause, pass the
         * corresponding bits through to the renderer state descriptor */
@ -2623,17 +2621,12 @@ bifrost_compile_shader_nir(void *mem_ctx, nir_shader *nir,
        bi_clause *first_clause = bi_next_clause(ctx, first_block, NULL);

        unsigned first_deps = first_clause ? first_clause->dependencies : 0;
-        program->wait_6 = (first_deps & (1 << 6));
-        program->wait_7 = (first_deps & (1 << 7));
-
-        memcpy(program->blend_ret_offsets, ctx->blend_ret_offsets, sizeof(program->blend_ret_offsets));
-        program->sysval_count = ctx->sysvals.sysval_count;
-        memcpy(program->sysvals, ctx->sysvals.sysvals, sizeof(ctx->sysvals.sysvals[0]) * ctx->sysvals.sysval_count);
+        info->bifrost.wait_6 = (first_deps & (1 << 6));
+        info->bifrost.wait_7 = (first_deps & (1 << 7));

        if (bifrost_debug & BIFROST_DBG_SHADERS && !skip_internal) {
-                disassemble_bifrost(stdout, program->compiled.data,
-                                program->compiled.size,
-                                bifrost_debug & BIFROST_DBG_VERBOSE);
+                disassemble_bifrost(stdout, binary->data, binary->size,
+                                    bifrost_debug & BIFROST_DBG_VERBOSE);
        }

        /* Pad the shader with enough zero bytes to trick the prefetcher,
@ -2641,19 +2634,15 @@ bifrost_compile_shader_nir(void *mem_ctx, nir_shader *nir,
         * so the size remains 0) */
        unsigned prefetch_size = BIFROST_SHADER_PREFETCH - final_clause;

-        if (program->compiled.size) {
-                memset(util_dynarray_grow(&program->compiled, uint8_t, prefetch_size),
+        if (binary->size) {
+                memset(util_dynarray_grow(binary, uint8_t, prefetch_size),
                       0, prefetch_size);
        }

-        program->tls_size = ctx->tls_size;
-
        if ((bifrost_debug & BIFROST_DBG_SHADERDB || inputs->shaderdb) &&
            !skip_internal) {
-                bi_print_stats(ctx, program->compiled.size, stderr);
+                bi_print_stats(ctx, binary->size, stderr);
        }

        ralloc_free(ctx);
-
-        return program;
 }
--- a/src/panfrost/bifrost/bifrost_compile.h
+++ b/src/panfrost/bifrost/bifrost_compile.h
@ -28,9 +28,11 @@
 #include "util/u_dynarray.h"
 #include "panfrost/util/pan_ir.h"

-panfrost_program *
-bifrost_compile_shader_nir(void *mem_ctx, nir_shader *nir,
-                           const struct panfrost_compile_inputs *inputs);
+void
+bifrost_compile_shader_nir(nir_shader *nir,
+                           const struct panfrost_compile_inputs *inputs,
+                           struct util_dynarray *binary,
+                           struct pan_shader_info *info);

 static const nir_shader_compiler_options bifrost_nir_options = {
        .lower_scmp = true,
--- a/src/panfrost/bifrost/cmdline.c
+++ b/src/panfrost/bifrost/cmdline.c
@ -32,7 +32,7 @@
 #include "util/u_dynarray.h"
 #include "bifrost_compile.h"

-static panfrost_program *
+static void
 compile_shader(char **argv, bool vertex_only)
 {
        struct gl_shader_program *prog;
@ -53,7 +53,10 @@ compile_shader(char **argv, bool vertex_only)
        prog = standalone_compile_shader(&options, 2, argv, &local_ctx);
        prog->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program->info.stage = MESA_SHADER_FRAGMENT;

-        panfrost_program *compiled;
+        struct util_dynarray binary;
+
+        util_dynarray_init(&binary, NULL);
+
        for (unsigned i = 0; i < 2; ++i) {
                nir[i] = glsl_to_nir(&local_ctx, prog, shader_types[i], &bifrost_nir_options);
                NIR_PASS_V(nir[i], nir_lower_global_vars_to_local);
@ -70,14 +73,16 @@ compile_shader(char **argv, bool vertex_only)
                struct panfrost_compile_inputs inputs = {
                        .gpu_id = 0x7212, /* Mali G52 */
                };
+                struct pan_shader_info info;

-                compiled = bifrost_compile_shader_nir(NULL, nir[i], &inputs);
+                util_dynarray_clear(&binary);
+                bifrost_compile_shader_nir(nir[i], &inputs, &binary, &info);

                if (vertex_only)
-                        return compiled;
+                        break;
        }

-        return compiled;
+        util_dynarray_fini(&binary);
 }

 #define BI_FOURCC(ch0, ch1, ch2, ch3) ( \
--- a/src/panfrost/bifrost/compiler.h
+++ b/src/panfrost/bifrost/compiler.h
@ -496,17 +496,12 @@ typedef struct bi_block {
 typedef struct {
       const struct panfrost_compile_inputs *inputs;
       nir_shader *nir;
+       struct pan_shader_info *info;
       gl_shader_stage stage;
       struct list_head blocks; /* list of bi_block */
-       struct panfrost_sysvals sysvals;
       struct hash_table_u64 *sysval_to_id;
-       struct panfrost_ubo_push *push;
       uint32_t quirks;
       unsigned arch;
-       unsigned tls_size;
-
-       /* Blend return offsets */
-       uint32_t blend_ret_offsets[8];

       /* During NIR->BIR */
       bi_block *current_block;
@ -514,7 +509,6 @@ typedef struct {
       bi_block *break_block;
       bi_block *continue_block;
       bool emitted_atest;
-       nir_alu_type *blend_types;

       /* For creating temporaries */
       unsigned ssa_alloc;
--- a/src/panfrost/lib/pan_blit.c
+++ b/src/panfrost/lib/pan_blit.c
@ -43,11 +43,13 @@
 * This is primarily designed as a fallback for preloads but could be extended
 * for other clears/blits if needed in the future. */

-static panfrost_program *
+static void
 panfrost_build_blit_shader(struct panfrost_device *dev,
                           gl_frag_result loc,
                           nir_alu_type T,
-                           bool ms)
+                           bool ms,
+                           struct util_dynarray *binary,
+                           struct pan_shader_info *info)
 {
        bool is_colour = loc >= FRAG_RESULT_DATA0;

@ -110,11 +112,9 @@ panfrost_build_blit_shader(struct panfrost_device *dev,
                .is_blit = true,
        };

-        panfrost_program *program =
-                pan_shader_compile(dev, NULL, shader, &inputs);
+        pan_shader_compile(dev, shader, &inputs, binary, info);

        ralloc_free(shader);
-        return program;
 }

 /* Compile and upload all possible blit shaders ahead-of-time to reduce draw
@ -162,6 +162,9 @@ panfrost_init_blit_shaders(struct panfrost_device *dev)
        /* Don't bother generating multisampling variants if we don't actually
         * support multisampling */
        bool has_ms = !(dev->quirks & MIDGARD_SFBD);
+        struct util_dynarray binary;
+
+        util_dynarray_init(&binary, NULL);

        for (unsigned ms = 0; ms <= has_ms; ++ms) {
                for (unsigned i = 0; i < ARRAY_SIZE(shader_descs); ++i) {
@ -172,27 +175,38 @@ panfrost_init_blit_shaders(struct panfrost_device *dev)
                                        continue;

                                struct pan_blit_shader *shader = &dev->blit_shaders.loads[loc][T][ms];
-                                panfrost_program *program =
-                                        panfrost_build_blit_shader(dev, loc,
-                                                                   nir_types[T], ms);
+                                struct pan_shader_info info;

-                                assert(offset + program->compiled.size < total_size);
+                                util_dynarray_clear(&binary);
+                                panfrost_build_blit_shader(dev, loc,
+                                                           nir_types[T], ms,
+                                                           &binary, &info);
+
+                                assert(offset + binary.size < total_size);
                                memcpy(dev->blit_shaders.bo->ptr.cpu + offset,
-                                       program->compiled.data, program->compiled.size);
+                                       binary.data, binary.size);

-                                shader->shader = (dev->blit_shaders.bo->ptr.gpu + offset) |
-                                                 program->first_tag;
+                                shader->shader = (dev->blit_shaders.bo->ptr.gpu + offset);
+                                if (pan_is_bifrost(dev)) {
+                                        int rt = loc - FRAG_RESULT_DATA0;
+                                        if (rt >= 0 && rt < 8 &&
+                                            info.bifrost.blend[rt].return_offset) {
+                                                shader->blend_ret_addr =
+                                                        shader->shader +
+                                                        info.bifrost.blend[rt].return_offset;
+                                        }
+                                } else {
+                                        shader->shader |= info.midgard.first_tag;
+                                }

-                                int rt = loc - FRAG_RESULT_DATA0;
-                                if (rt >= 0 && rt < 8 && program->blend_ret_offsets[rt])
-                                        shader->blend_ret_addr = program->blend_ret_offsets[rt] + shader->shader;

-                                offset += ALIGN_POT(program->compiled.size,
+                                offset += ALIGN_POT(binary.size,
                                                    pan_is_bifrost(dev) ? 128 : 64);
-                                ralloc_free(program);
                        }
                }
        }
+
+        util_dynarray_fini(&binary);
 }

 static void
--- a/src/panfrost/lib/pan_shader.c
+++ b/src/panfrost/lib/pan_shader.c
@ -37,13 +37,196 @@ pan_shader_get_compiler_options(const struct panfrost_device *dev)
        return &midgard_nir_options;
 }

-panfrost_program *
-pan_shader_compile(const struct panfrost_device *dev,
-                   void *mem_ctx, nir_shader *nir,
-                   const struct panfrost_compile_inputs *inputs)
+static enum pipe_format
+varying_format(nir_alu_type t, unsigned ncomps)
 {
-        if (pan_is_bifrost(dev))
-                return bifrost_compile_shader_nir(mem_ctx, nir, inputs);
+#define VARYING_FORMAT(ntype, nsz, ptype, psz) \
+        { \
+                .type = nir_type_ ## ntype ## nsz, \
+                .formats = { \
+                        PIPE_FORMAT_R ## psz ## _ ## ptype, \
+                        PIPE_FORMAT_R ## psz ## G ## psz ## _ ## ptype, \
+                        PIPE_FORMAT_R ## psz ## G ## psz ## B ## psz ## _ ## ptype, \
+                        PIPE_FORMAT_R ## psz ## G ## psz ## B ## psz  ## A ## psz ## _ ## ptype, \
+                } \
+        }

-        return midgard_compile_shader_nir(mem_ctx, nir, inputs);
+        static const struct {
+                nir_alu_type type;
+                enum pipe_format formats[4];
+        } conv[] = {
+                VARYING_FORMAT(float, 32, FLOAT, 32),
+                VARYING_FORMAT(int, 32, SINT, 32),
+                VARYING_FORMAT(uint, 32, UINT, 32),
+                VARYING_FORMAT(float, 16, FLOAT, 16),
+                VARYING_FORMAT(int, 16, SINT, 16),
+                VARYING_FORMAT(uint, 16, UINT, 16),
+                VARYING_FORMAT(int, 8, SINT, 8),
+                VARYING_FORMAT(uint, 8, UINT, 8),
+                VARYING_FORMAT(bool, 32, UINT, 32),
+                VARYING_FORMAT(bool, 16, UINT, 16),
+                VARYING_FORMAT(bool, 8, UINT, 8),
+                VARYING_FORMAT(bool, 1, UINT, 8),
+        };
+#undef VARYING_FORMAT
+
+        assert(ncomps > 0 && ncomps <= ARRAY_SIZE(conv[0].formats));
+
+        for (unsigned i = 0; i < ARRAY_SIZE(conv); i++) {
+                if (conv[i].type == t)
+                        return conv[i].formats[ncomps - 1];
+        }
+
+        return PIPE_FORMAT_NONE;
+}
+
+static void
+collect_varyings(nir_shader *s, nir_variable_mode varying_mode,
+                 struct pan_shader_varying *varyings,
+                 unsigned *varying_count)
+{
+        *varying_count = 0;
+
+        nir_foreach_variable_with_modes(var, s, varying_mode) {
+                unsigned loc = var->data.driver_location;
+                unsigned sz = glsl_count_attribute_slots(var->type, FALSE);
+                const struct glsl_type *column =
+                        glsl_without_array_or_matrix(var->type);
+                unsigned chan = glsl_get_components(column);
+                enum glsl_base_type base_type = glsl_get_base_type(column);
+
+                /* If we have a fractional location added, we need to increase the size
+                 * so it will fit, i.e. a vec3 in YZW requires us to allocate a vec4.
+                 * We could do better but this is an edge case as it is, normally
+                 * packed varyings will be aligned.
+                 */
+                chan += var->data.location_frac;
+                assert(chan >= 1 && chan <= 4);
+
+                nir_alu_type type = nir_get_nir_type_for_glsl_base_type(base_type);
+
+                type = nir_alu_type_get_base_type(type);
+
+                /* Demote to fp16 where possible. int16 varyings are TODO as the hw
+                 * will saturate instead of wrap which is not conformant, so we need to
+                 * insert i2i16/u2u16 instructions before the st_vary_32i/32u to get
+                 * the intended behaviour.
+                 */
+                if (type == nir_type_float &&
+                    (var->data.precision == GLSL_PRECISION_MEDIUM ||
+                     var->data.precision == GLSL_PRECISION_LOW)) {
+                        type |= 16;
+                } else {
+                        type |= 32;
+                }
+
+                enum pipe_format format = varying_format(type, chan);
+                assert(format != PIPE_FORMAT_NONE);
+
+                for (int c = 0; c < sz; ++c) {
+                        varyings[loc + c].location = var->data.location + c;
+                        varyings[loc + c].format = format;
+                }
+
+                *varying_count = MAX2(*varying_count, loc + sz);
+        }
+}
+
+void
+pan_shader_compile(const struct panfrost_device *dev,
+                   nir_shader *s,
+                   const struct panfrost_compile_inputs *inputs,
+                   struct util_dynarray *binary,
+                   struct pan_shader_info *info)
+{
+        memset(info, 0, sizeof(*info));
+
+        if (pan_is_bifrost(dev))
+                bifrost_compile_shader_nir(s, inputs, binary, info);
+        else
+                midgard_compile_shader_nir(s, inputs, binary, info);
+
+        info->stage = s->info.stage;
+        info->contains_barrier = s->info.uses_memory_barrier ||
+                                 s->info.uses_control_barrier;
+
+        switch (info->stage) {
+        case MESA_SHADER_VERTEX:
+                info->attribute_count = util_bitcount64(s->info.inputs_read);
+
+                bool vertex_id = BITSET_TEST(s->info.system_values_read,
+                                             SYSTEM_VALUE_VERTEX_ID);
+                if (vertex_id)
+                        info->attribute_count = MAX2(info->attribute_count, PAN_VERTEX_ID + 1);
+
+                bool instance_id = BITSET_TEST(s->info.system_values_read,
+                                               SYSTEM_VALUE_INSTANCE_ID);
+                if (instance_id)
+                        info->attribute_count = MAX2(info->attribute_count, PAN_INSTANCE_ID + 1);
+
+                info->vs.writes_point_size =
+                        s->info.outputs_written & (1 << VARYING_SLOT_PSIZ);
+                collect_varyings(s, nir_var_shader_out, info->varyings.output,
+                                 &info->varyings.output_count);
+                break;
+        case MESA_SHADER_FRAGMENT:
+                if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
+                        info->fs.writes_depth = true;
+                if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL))
+                        info->fs.writes_stencil = true;
+
+                uint64_t outputs_read = s->info.outputs_read;
+                if (outputs_read & BITFIELD64_BIT(FRAG_RESULT_COLOR))
+                        outputs_read |= BITFIELD64_BIT(FRAG_RESULT_DATA0);
+
+                info->fs.outputs_read = outputs_read >> FRAG_RESULT_DATA0;
+
+                /* EXT_shader_framebuffer_fetch requires per-sample */
+                info->fs.sample_shading = s->info.fs.uses_sample_shading ||
+                                          outputs_read;
+
+                info->fs.can_discard = s->info.fs.uses_discard;
+                info->fs.helper_invocations = s->info.fs.needs_quad_helper_invocations;
+
+                /* List of reasons we need to execute frag shaders when things
+                 * are masked off */
+
+                info->fs.sidefx = s->info.writes_memory ||
+                                  s->info.fs.uses_discard ||
+                                  s->info.fs.uses_demote;
+                info->fs.reads_frag_coord =
+                        (s->info.inputs_read & (1 << VARYING_SLOT_POS)) ||
+                        BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_FRAG_COORD);
+                info->fs.reads_point_coord =
+                        s->info.inputs_read & (1 << VARYING_SLOT_PNTC);
+                info->fs.reads_face =
+                        (s->info.inputs_read & (1 << VARYING_SLOT_FACE)) ||
+                        BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_FRONT_FACE);
+                info->fs.reads_sample_id =
+                        BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID);
+                info->fs.reads_sample_pos =
+                        BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS);
+                info->fs.reads_sample_mask_in =
+                        BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN);
+                info->fs.reads_helper_invocation =
+                        BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_HELPER_INVOCATION);
+                collect_varyings(s, nir_var_shader_in, info->varyings.input,
+                                 &info->varyings.input_count);
+                break;
+        case MESA_SHADER_COMPUTE:
+                info->wls_size = s->info.cs.shared_size;
+                break;
+        default:
+                unreachable("Unknown shader state");
+        }
+
+        info->outputs_written = s->info.outputs_written;
+
+        /* Sysvals have dedicated UBO */
+        info->ubo_count = s->info.num_ubos + (info->sysvals.sysval_count ? 1 : 0);
+
+        info->attribute_count += util_bitcount(s->info.images_used);
+        info->writes_global = s->info.writes_memory;
+
+        info->texture_count = s->info.num_textures;
 }
--- a/src/panfrost/lib/pan_shader.h
+++ b/src/panfrost/lib/pan_shader.h
@ -33,9 +33,11 @@ struct panfrost_device;
 const nir_shader_compiler_options *
 pan_shader_get_compiler_options(const struct panfrost_device *dev);

-panfrost_program *
+void
 pan_shader_compile(const struct panfrost_device *dev,
-                   void *mem_ctx, nir_shader *nir,
-                   const struct panfrost_compile_inputs *inputs);
+                   nir_shader *nir,
+                   const struct panfrost_compile_inputs *inputs,
+                   struct util_dynarray *binary,
+                   struct pan_shader_info *info);

 #endif
--- a/src/panfrost/midgard/compiler.h
+++ b/src/panfrost/midgard/compiler.h
@ -238,6 +238,7 @@ enum midgard_rt_id {
 typedef struct compiler_context {
        const struct panfrost_compile_inputs *inputs;
        nir_shader *nir;
+        struct pan_shader_info *info;
        gl_shader_stage stage;

        /* Number of samples for a keyed blend shader. Depends on is_blend */
@ -249,9 +250,6 @@ typedef struct compiler_context {
        /* Index to precolour to r2 for a dual-source blend colour */
        unsigned blend_src1;

-        /* Number of bytes used for Thread Local Storage */
-        unsigned tls_size;
-
        /* Count of spills and fills for shaderdb */
        unsigned spills;
        unsigned fills;
@ -291,10 +289,6 @@ typedef struct compiler_context {
        /* Set of NIR indices that were already emitted as outmods */
        BITSET_WORD *already_emitted;

-        /* Just the count of the max register used. Higher count => higher
-         * register pressure */
-        int work_registers;
-
        /* The number of uniforms allowable for the fast path */
        int uniform_cutoff;

@ -312,9 +306,7 @@ typedef struct compiler_context {
        /* Writeout instructions for each render target */
        midgard_instruction *writeout_branch[MIDGARD_NUM_RTS][MIDGARD_MAX_SAMPLE_ITER];

-        struct panfrost_sysvals sysvals;
        struct hash_table_u64 *sysval_to_id;
-        struct panfrost_ubo_push *push;
 } compiler_context;

 /* Per-block live_in/live_out */
--- a/src/panfrost/midgard/midgard_compile.c
+++ b/src/panfrost/midgard/midgard_compile.c
@ -1448,7 +1448,7 @@ emit_sysval_read(compiler_context *ctx, nir_instr *instr,
        int sysval = panfrost_sysval_for_instr(instr, &nir_dest);
        unsigned dest = nir_dest_index(&nir_dest);
        unsigned uniform =
-                pan_lookup_sysval(ctx->sysval_to_id, &ctx->sysvals, sysval);
+                pan_lookup_sysval(ctx->sysval_to_id, &ctx->info->sysvals, sysval);

        /* Emit the read itself -- this is never indirect */
        midgard_instruction *ins =
@ -2978,24 +2978,22 @@ mir_add_writeout_loops(compiler_context *ctx)
        }
 }

-panfrost_program *
-midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
-                           const struct panfrost_compile_inputs *inputs)
+void
+midgard_compile_shader_nir(nir_shader *nir,
+                           const struct panfrost_compile_inputs *inputs,
+                           struct util_dynarray *binary,
+                           struct pan_shader_info *info)
 {
-        panfrost_program *program = rzalloc(mem_ctx, panfrost_program);
-
-        struct util_dynarray *compiled = &program->compiled;
-
        midgard_debug = debug_get_option_midgard_debug();

        /* TODO: Bound against what? */
        compiler_context *ctx = rzalloc(NULL, compiler_context);
-        ctx->sysval_to_id = panfrost_init_sysvals(&ctx->sysvals, ctx);
+        ctx->sysval_to_id = panfrost_init_sysvals(&info->sysvals, ctx);

        ctx->inputs = inputs;
        ctx->nir = nir;
+        ctx->info = info;
        ctx->stage = nir->info.stage;
-        ctx->push = &program->push;

        if (inputs->is_blend) {
                unsigned nr_samples = MAX2(inputs->blend.nr_samples, 1);
@ -3013,7 +3011,7 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
        /* Start off with a safe cutoff, allowing usage of all 16 work
         * registers. Later, we'll promote uniform reads to uniform registers
         * if we determine it is beneficial to do so */
-        ctx->uniform_cutoff = 8;
+        info->midgard.uniform_cutoff = 8;

        /* Initialize at a global (not block) level hash tables */

@ -3059,7 +3057,7 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
                nir_print_shader(nir, stdout);
        }

-        ctx->tls_size = nir->scratch_size;
+        info->tls_size = nir->scratch_size;

        nir_foreach_function(func, nir) {
                if (!func->impl)
@ -3086,8 +3084,6 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
                break; /* TODO: Multi-function shaders */
        }

-        util_dynarray_init(compiled, program);
-
        /* Per-block lowering before opts */

        mir_foreach_block(ctx, _block) {
@ -3164,7 +3160,7 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
                        if (!bundle->last_writeout && (current_bundle + 1 < bundle_count))
                                lookahead = source_order_bundles[current_bundle + 1]->tag;

-                        emit_binary_bundle(ctx, block, bundle, compiled, lookahead);
+                        emit_binary_bundle(ctx, block, bundle, binary, lookahead);
                        ++current_bundle;
                }

@ -3175,20 +3171,11 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
        free(source_order_bundles);

        /* Report the very first tag executed */
-        program->first_tag = midgard_get_first_tag_from_block(ctx, 0);
-
-        /* Deal with off-by-one related to the fencepost problem */
-        program->work_register_count = ctx->work_registers + 1;
-        program->uniform_cutoff = ctx->uniform_cutoff;
-
-        program->tls_size = ctx->tls_size;
-
-        program->sysval_count = ctx->sysvals.sysval_count;
-        memcpy(program->sysvals, ctx->sysvals.sysvals, sizeof(ctx->sysvals.sysvals[0]) * ctx->sysvals.sysval_count);
+        info->midgard.first_tag = midgard_get_first_tag_from_block(ctx, 0);

        if ((midgard_debug & MIDGARD_DBG_SHADERS) && !nir->info.internal) {
-                disassemble_midgard(stdout, program->compiled.data,
-                                    program->compiled.size, inputs->gpu_id);
+                disassemble_midgard(stdout, binary->data,
+                                    binary->size, inputs->gpu_id);
        }

        if ((midgard_debug & MIDGARD_DBG_SHADERDB || inputs->shaderdb) &&
@ -3209,7 +3196,7 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
                /* Calculate thread count. There are certain cutoffs by
                 * register count for thread count */

-                unsigned nr_registers = program->work_register_count;
+                unsigned nr_registers = info->work_reg_count;

                unsigned nr_threads =
                        (nr_registers <= 4) ? 4 :
@ -3232,6 +3219,4 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
        }

        ralloc_free(ctx);
-
-        return program;
 }
--- a/src/panfrost/midgard/midgard_compile.h
+++ b/src/panfrost/midgard/midgard_compile.h
@ -29,9 +29,11 @@
 #include "util/u_dynarray.h"
 #include "panfrost/util/pan_ir.h"

-panfrost_program *
-midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
-                           const struct panfrost_compile_inputs *inputs);
+void
+midgard_compile_shader_nir(nir_shader *nir,
+                           const struct panfrost_compile_inputs *inputs,
+                           struct util_dynarray *binary,
+                           struct pan_shader_info *info);

 /* NIR options are shared between the standalone compiler and the online
 * compiler. Defining it here is the simplest, though maybe not the Right
--- a/src/panfrost/midgard/midgard_ra.c
+++ b/src/panfrost/midgard/midgard_ra.c
@ -99,7 +99,7 @@ index_to_reg(compiler_context *ctx, struct lcra_state *l, unsigned reg, unsigned
        /* Report that we actually use this register, and return it */

        if (r.reg < 16)
-                ctx->work_registers = MAX2(ctx->work_registers, r.reg);
+                ctx->info->work_reg_count = MAX2(ctx->info->work_reg_count, r.reg + 1);

        return r;
 }
@ -395,7 +395,7 @@ allocate_registers(compiler_context *ctx, bool *spilled)
         * uniforms start and the shader stage. By ABI we limit blend shaders
         * to 8 registers, should be lower XXX */
        int work_count = ctx->inputs->is_blend ? 8 :
-                16 - MAX2((ctx->uniform_cutoff - 8), 0);
+                16 - MAX2((ctx->info->midgard.uniform_cutoff - 8), 0);

       /* No register allocation to do with no SSA */

@ -646,7 +646,7 @@ allocate_registers(compiler_context *ctx, bool *spilled)
        if (ctx->blend_src1 != ~0) {
                assert(ctx->blend_src1 < ctx->temp_count);
                l->solutions[ctx->blend_src1] = (16 * 2);
-                ctx->work_registers = MAX2(ctx->work_registers, 2);
+                ctx->info->work_reg_count = MAX2(ctx->info->work_reg_count, 3);
        }

        mir_compute_interference(ctx, l);
@ -959,13 +959,14 @@ mir_spill_register(
 static void
 mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff)
 {
-        unsigned old_work_count = 16 - MAX2((ctx->uniform_cutoff - 8), 0);
+        unsigned old_work_count =
+                16 - MAX2((ctx->info->midgard.uniform_cutoff - 8), 0);
        unsigned work_count = 16 - MAX2((new_cutoff - 8), 0);

        unsigned min_demote = SSA_FIXED_REGISTER(old_work_count);
        unsigned max_demote = SSA_FIXED_REGISTER(work_count);

-        ctx->uniform_cutoff = new_cutoff;
+        ctx->info->midgard.uniform_cutoff = new_cutoff;

        mir_foreach_block(ctx, _block) {
                midgard_block *block = (midgard_block *) _block;
@ -978,7 +979,7 @@ mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff)

                                unsigned temp = make_compiler_temp(ctx);
                                unsigned idx = (23 - SSA_REG_FROM_FIXED(ins->src[i])) * 4;
-                                assert(idx < ctx->push->count);
+                                assert(idx < ctx->info->push.count);

                                midgard_instruction ld = {
                                        .type = TAG_LOAD_STORE_4,
@ -989,10 +990,10 @@ mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff)
                                        .swizzle = SWIZZLE_IDENTITY_4,
                                        .op = midgard_op_ld_ubo_int4,
                                        .load_store = {
-                                                .arg_1 = ctx->push->words[idx].ubo,
+                                                .arg_1 = ctx->info->push.words[idx].ubo,
                                                .arg_2 = 0x1E,
                                        },
-                                        .constants.u32[0] = ctx->push->words[idx].offset
+                                        .constants.u32[0] = ctx->info->push.words[idx].offset
                                };

                                mir_insert_instruction_before_scheduled(ctx, block, before, ld);
@ -1013,7 +1014,7 @@ mir_ra(compiler_context *ctx)
        int iter_count = 1000; /* max iterations */

        /* Number of 128-bit slots in memory we've spilled into */
-        unsigned spill_count = DIV_ROUND_UP(ctx->tls_size, 16);
+        unsigned spill_count = DIV_ROUND_UP(ctx->info->tls_size, 16);


        mir_create_pipeline_registers(ctx);
@ -1025,9 +1026,9 @@ mir_ra(compiler_context *ctx)
                        /* It's a lot cheaper to demote uniforms to get more
                         * work registers than to spill to TLS. */
                        if (l->spill_class == REG_CLASS_WORK &&
-                            ctx->uniform_cutoff > 8) {
+                            ctx->info->midgard.uniform_cutoff > 8) {

-                                mir_demote_uniforms(ctx, MAX2(ctx->uniform_cutoff - 4, 8));
+                                mir_demote_uniforms(ctx, MAX2(ctx->info->midgard.uniform_cutoff - 4, 8));
                        } else if (spill_node == -1) {
                                fprintf(stderr, "ERROR: Failed to choose spill node\n");
                                lcra_free(l);
@ -1056,7 +1057,7 @@ mir_ra(compiler_context *ctx)
        /* Report spilling information. spill_count is in 128-bit slots (vec4 x
         * fp32), but tls_size is in bytes, so multiply by 16 */

-        ctx->tls_size = spill_count * 16;
+        ctx->info->tls_size = spill_count * 16;

        install_registers(ctx, l);

--- a/src/panfrost/midgard/mir_promote_uniforms.c
+++ b/src/panfrost/midgard/mir_promote_uniforms.c
@ -263,7 +263,7 @@ midgard_promote_uniforms(compiler_context *ctx)
        unsigned work_count = mir_work_heuristic(ctx, &analysis);
        unsigned promoted_count = 24 - work_count;

-        mir_pick_ubo(ctx->push, &analysis, promoted_count);
+        mir_pick_ubo(&ctx->info->push, &analysis, promoted_count);

        /* First, figure out special indices a priori so we don't recompute a lot */
        BITSET_WORD *special = mir_special_indices(ctx);
@ -279,7 +279,7 @@ midgard_promote_uniforms(compiler_context *ctx)
                if (!BITSET_TEST(analysis.blocks[ubo].pushed, qword)) continue;

                /* Find where we pushed to, TODO: unaligned pushes to pack */
-                unsigned base = pan_lookup_pushed_ubo(ctx->push, ubo, qword * 16);
+                unsigned base = pan_lookup_pushed_ubo(&ctx->info->push, ubo, qword * 16);
                assert((base & 0x3) == 0);

                unsigned address = base / 4;
@ -288,7 +288,8 @@ midgard_promote_uniforms(compiler_context *ctx)
                /* Should've taken into account when pushing */
                assert(address < promoted_count);

-                ctx->uniform_cutoff = MAX2(ctx->uniform_cutoff, address + 1);
+                ctx->info->midgard.uniform_cutoff =
+                        MAX2(ctx->info->midgard.uniform_cutoff, address + 1);
                unsigned promoted = SSA_FIXED_REGISTER(uniform_reg);

                /* We do need the move for safety for a non-SSA dest, or if
--- a/src/panfrost/util/pan_ir.h
+++ b/src/panfrost/util/pan_ir.h
@ -115,40 +115,6 @@ pan_lookup_sysval(struct hash_table_u64 *sysval_to_id,
 int
 panfrost_sysval_for_instr(nir_instr *instr, nir_dest *dest);

-typedef struct {
-        int work_register_count;
-        int uniform_cutoff;
-
-        /* For Bifrost - output type for each RT */
-        nir_alu_type blend_types[8];
-
-        /* For Bifrost - return address for blend instructions */
-        uint32_t blend_ret_offsets[8];
-
-        /* Prepended before uniforms, mapping to SYSVAL_ names for the
-         * sysval */
-
-        unsigned sysval_count;
-        unsigned sysvals[MAX_SYSVAL_COUNT];
-
-        /* UBOs to push to Register Mapped Uniforms (Midgard) or Fast Access
-         * Uniforms (Bifrost) */
-        struct panfrost_ubo_push push;
-
-        int first_tag;
-
-        struct util_dynarray compiled;
-
-        /* The number of bytes to allocate per-thread for Thread Local Storage
-         * (register spilling), or zero if no spilling is used */
-        unsigned tls_size;
-
-        /* For Bifrost, should the program wait on dependency slots 6/7 before
-         * starting? For ATEST/BLEND in the first clause, which can occur with
-         * extremely simple shaders */
-        bool wait_6, wait_7;
-} panfrost_program;
-
 struct panfrost_compile_inputs {
        unsigned gpu_id;
        bool is_blend, is_blit;
@ -163,6 +129,82 @@ struct panfrost_compile_inputs {
        enum pipe_format rt_formats[8];
 };

+struct pan_shader_varying {
+        gl_varying_slot location;
+        enum pipe_format format;
+};
+
+struct bifrost_shader_blend_info {
+        nir_alu_type type;
+        uint32_t return_offset;
+};
+
+struct bifrost_shader_info {
+        struct bifrost_shader_blend_info blend[8];
+        bool wait_6, wait_7;
+};
+
+struct midgard_shader_info {
+        unsigned uniform_cutoff;
+        unsigned first_tag;
+};
+
+struct pan_shader_info {
+        gl_shader_stage stage;
+        unsigned work_reg_count;
+        unsigned tls_size;
+        unsigned wls_size;
+
+        union {
+                struct {
+                        bool reads_frag_coord;
+                        bool reads_point_coord;
+                        bool reads_face;
+                        bool helper_invocations;
+                        bool can_discard;
+                        bool writes_depth;
+                        bool writes_stencil;
+                        bool sidefx;
+                        bool reads_sample_id;
+                        bool reads_sample_pos;
+                        bool reads_sample_mask_in;
+                        bool reads_helper_invocation;
+                        bool sample_shading;
+                        BITSET_WORD outputs_read;
+                } fs;
+
+                struct {
+                        bool writes_point_size;
+                } vs;
+        };
+
+        bool contains_barrier;
+        bool writes_global;
+        uint64_t outputs_written;
+
+        unsigned texture_count;
+        unsigned ubo_count;
+        unsigned attribute_count;
+
+        struct {
+                unsigned input_count;
+                struct pan_shader_varying input[MAX_VARYING];
+                unsigned output_count;
+                struct pan_shader_varying output[MAX_VARYING];
+        } varyings;
+
+        struct panfrost_sysvals sysvals;
+
+        /* UBOs to push to Register Mapped Uniforms (Midgard) or Fast Access
+         * Uniforms (Bifrost) */
+        struct panfrost_ubo_push push;
+
+        union {
+                struct bifrost_shader_info bifrost;
+                struct midgard_shader_info midgard;
+        };
+};
+
 typedef struct pan_block {
        /* Link to next block. Must be first for mir_get_block */
        struct list_head link;