diff --git a/src/gallium/drivers/panfrost/pan_assemble.c b/src/gallium/drivers/panfrost/pan_assemble.c
index 66549add540..4d90850f520 100644
--- a/src/gallium/drivers/panfrost/pan_assemble.c
+++ b/src/gallium/drivers/panfrost/pan_assemble.c
@@ -40,35 +40,30 @@
 #include "tgsi/tgsi_dump.h"
 
 static void
-pan_prepare_midgard_props(struct panfrost_shader_state *state,
-                          panfrost_program *program,
-                          gl_shader_stage stage)
+pan_prepare_midgard_props(struct panfrost_shader_state *state)
 {
         pan_prepare(&state->properties, RENDERER_PROPERTIES);
-        state->properties.uniform_buffer_count = state->ubo_count;
-        state->properties.midgard.uniform_count = program->uniform_cutoff;
-        state->properties.midgard.shader_has_side_effects = state->writes_global;
+        state->properties.uniform_buffer_count = state->info.ubo_count;
+        state->properties.midgard.uniform_count = state->info.midgard.uniform_cutoff;
+        state->properties.midgard.shader_has_side_effects = state->info.writes_global;
         state->properties.midgard.fp_mode = MALI_FP_MODE_GL_INF_NAN_ALLOWED;
 
         /* For fragment shaders, work register count, early-z, reads at draw-time */
 
-        if (stage != MESA_SHADER_FRAGMENT)
-                state->properties.midgard.work_register_count = state->work_reg_count;
+        if (state->info.stage != MESA_SHADER_FRAGMENT)
+                state->properties.midgard.work_register_count = state->info.work_reg_count;
 }
 
 static void
-pan_prepare_bifrost_props(struct panfrost_shader_state *state,
-                          panfrost_program *program,
-                          gl_shader_stage stage,
-                          shader_info *info)
+pan_prepare_bifrost_props(struct panfrost_shader_state *state)
 {
-        unsigned fau_count = DIV_ROUND_UP(program->push.count, 2);
+        unsigned fau_count = DIV_ROUND_UP(state->info.push.count, 2);
 
-        switch (stage) {
+        switch (state->info.stage) {
         case MESA_SHADER_VERTEX:
                 pan_prepare(&state->properties, RENDERER_PROPERTIES);
                 state->properties.bifrost.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
-                state->properties.uniform_buffer_count = state->ubo_count;
+                state->properties.uniform_buffer_count = state->info.ubo_count;
 
                 pan_prepare(&state->preload, PRELOAD);
                 state->preload.uniform_count = fau_count;
@@ -78,39 +73,39 @@ pan_prepare_bifrost_props(struct panfrost_shader_state *state,
         case MESA_SHADER_FRAGMENT:
                 pan_prepare(&state->properties, RENDERER_PROPERTIES);
                 /* Early-Z set at draw-time */
-                if (state->writes_depth || state->writes_stencil) {
+                if (state->info.fs.writes_depth || state->info.fs.writes_stencil) {
                         state->properties.bifrost.zs_update_operation = MALI_PIXEL_KILL_FORCE_LATE;
                         state->properties.bifrost.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_LATE;
-                } else if (state->can_discard) {
+                } else if (state->info.fs.can_discard) {
                         state->properties.bifrost.zs_update_operation = MALI_PIXEL_KILL_FORCE_LATE;
                         state->properties.bifrost.pixel_kill_operation = MALI_PIXEL_KILL_WEAK_EARLY;
                 } else {
                         state->properties.bifrost.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
                         state->properties.bifrost.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_EARLY;
                 }
-                state->properties.uniform_buffer_count = state->ubo_count;
-                state->properties.bifrost.shader_modifies_coverage = state->can_discard;
-                state->properties.bifrost.shader_wait_dependency_6 = program->wait_6;
-                state->properties.bifrost.shader_wait_dependency_7 = program->wait_7;
+                state->properties.uniform_buffer_count = state->info.ubo_count;
+                state->properties.bifrost.shader_modifies_coverage = state->info.fs.can_discard;
+                state->properties.bifrost.shader_wait_dependency_6 = state->info.bifrost.wait_6;
+                state->properties.bifrost.shader_wait_dependency_7 = state->info.bifrost.wait_7;
 
                 pan_prepare(&state->preload, PRELOAD);
                 state->preload.uniform_count = fau_count;
-                state->preload.fragment.fragment_position = state->reads_frag_coord;
+                state->preload.fragment.fragment_position = state->info.fs.reads_frag_coord;
                 state->preload.fragment.coverage = true;
-                state->preload.fragment.primitive_flags = state->reads_face;
+                state->preload.fragment.primitive_flags = state->info.fs.reads_face;
 
                 /* Contains sample ID and sample mask. Sample position and
                  * helper invocation are expressed in terms of the above, so
                  * preload for those too */
                 state->preload.fragment.sample_mask_id =
-                        BITSET_TEST(info->system_values_read, SYSTEM_VALUE_SAMPLE_ID) ||
-                        BITSET_TEST(info->system_values_read, SYSTEM_VALUE_SAMPLE_POS) ||
-                        BITSET_TEST(info->system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN) ||
-                        BITSET_TEST(info->system_values_read, SYSTEM_VALUE_HELPER_INVOCATION);
+                        state->info.fs.reads_sample_id |
+                        state->info.fs.reads_sample_pos |
+                        state->info.fs.reads_sample_mask_in |
+                        state->info.fs.reads_helper_invocation;
                 break;
         case MESA_SHADER_COMPUTE:
                 pan_prepare(&state->properties, RENDERER_PROPERTIES);
-                state->properties.uniform_buffer_count = state->ubo_count;
+                state->properties.uniform_buffer_count = state->info.ubo_count;
 
                 pan_prepare(&state->preload, PRELOAD);
                 state->preload.uniform_count = fau_count;
@@ -152,112 +147,12 @@ pan_upload_shader_descriptor(struct panfrost_context *ctx,
         u_upload_unmap(ctx->state_uploader);
 }
 
-static unsigned
-pan_format_from_nir_base(nir_alu_type base)
-{
-        switch (base) {
-        case nir_type_int:
-                return MALI_FORMAT_SINT;
-        case nir_type_uint:
-        case nir_type_bool:
-                return MALI_FORMAT_UINT;
-        case nir_type_float:
-                return MALI_CHANNEL_FLOAT;
-        default:
-                unreachable("Invalid base");
-        }
-}
-
-static unsigned
-pan_format_from_nir_size(nir_alu_type base, unsigned size)
-{
-        if (base == nir_type_float) {
-                switch (size) {
-                case 16: return MALI_FORMAT_SINT;
-                case 32: return MALI_FORMAT_UNORM;
-                default:
-                        unreachable("Invalid float size for format");
-                }
-        } else {
-                switch (size) {
-                case 1:
-                case 8:  return MALI_CHANNEL_8;
-                case 16: return MALI_CHANNEL_16;
-                case 32: return MALI_CHANNEL_32;
-                default:
-                         unreachable("Invalid int size for format");
-                }
-        }
-}
-
-static enum mali_format
-pan_format_from_glsl(const struct glsl_type *type, unsigned precision, unsigned frac)
-{
-        const struct glsl_type *column = glsl_without_array_or_matrix(type);
-        enum glsl_base_type glsl_base = glsl_get_base_type(column);
-        nir_alu_type t = nir_get_nir_type_for_glsl_base_type(glsl_base);
-        unsigned chan = glsl_get_components(column);
-
-        /* If we have a fractional location added, we need to increase the size
-         * so it will fit, i.e. a vec3 in YZW requires us to allocate a vec4.
-         * We could do better but this is an edge case as it is, normally
-         * packed varyings will be aligned. */
-        chan += frac;
-
-        assert(chan >= 1 && chan <= 4);
-
-        unsigned base = nir_alu_type_get_base_type(t);
-        unsigned size = nir_alu_type_get_type_size(t);
-
-        /* Demote to fp16 where possible. int16 varyings are TODO as the hw
-         * will saturate instead of wrap which is not conformant, so we need to
-         * insert i2i16/u2u16 instructions before the st_vary_32i/32u to get
-         * the intended behaviour */
-
-        bool is_16 = (precision == GLSL_PRECISION_MEDIUM)
-                || (precision == GLSL_PRECISION_LOW);
-
-        if (is_16 && base == nir_type_float)
-                size = 16;
-        else
-                size = 32;
-
-        return pan_format_from_nir_base(base) |
-                pan_format_from_nir_size(base, size) |
-                MALI_NR_CHANNELS(chan);
-}
-
-static enum mali_bifrost_register_file_format
-bifrost_blend_type_from_nir(nir_alu_type nir_type)
-{
-        switch(nir_type) {
-        case 0: /* Render target not in use */
-                return 0;
-        case nir_type_float16:
-                return MALI_BIFROST_REGISTER_FILE_FORMAT_F16;
-        case nir_type_float32:
-                return MALI_BIFROST_REGISTER_FILE_FORMAT_F32;
-        case nir_type_int32:
-                return MALI_BIFROST_REGISTER_FILE_FORMAT_I32;
-        case nir_type_uint32:
-                return MALI_BIFROST_REGISTER_FILE_FORMAT_U32;
-        case nir_type_int16:
-                return MALI_BIFROST_REGISTER_FILE_FORMAT_I16;
-        case nir_type_uint16:
-                return MALI_BIFROST_REGISTER_FILE_FORMAT_U16;
-        default:
-                unreachable("Unsupported blend shader type for NIR alu type");
-                return 0;
-        }
-}
-
 void
 panfrost_shader_compile(struct panfrost_context *ctx,
                         enum pipe_shader_ir ir_type,
                         const void *ir,
                         gl_shader_stage stage,
-                        struct panfrost_shader_state *state,
-                        uint64_t *outputs_written)
+                        struct panfrost_shader_state *state)
 {
         struct panfrost_device *dev = pan_device(ctx->base.screen);
 
@@ -280,169 +175,62 @@ panfrost_shader_compile(struct panfrost_context *ctx,
 
         memcpy(inputs.rt_formats, state->rt_formats, sizeof(inputs.rt_formats));
 
-        panfrost_program *program;
+        struct util_dynarray binary;
 
-        program = pan_shader_compile(dev, NULL, s, &inputs);
+        util_dynarray_init(&binary, NULL);
+        pan_shader_compile(dev, s, &inputs, &binary, &state->info);
 
         /* Prepare the compiled binary for upload */
         mali_ptr shader = 0;
-        unsigned attribute_count = 0, varying_count = 0;
-        int size = program->compiled.size;
+        int size = binary.size;
 
         if (size) {
                 state->bo = panfrost_bo_create(dev, size, PAN_BO_EXECUTE);
-                memcpy(state->bo->ptr.cpu, program->compiled.data, size);
+                memcpy(state->bo->ptr.cpu, binary.data, size);
                 shader = state->bo->ptr.gpu;
         }
 
         /* Midgard needs the first tag on the bottom nibble */
 
-        if (!pan_is_bifrost(dev)) {
-                /* If size = 0, we tag as "end-of-shader" */
-
-                if (size)
-                        shader |= program->first_tag;
-                else
-                        shader = 0x1;
-        }
-
-        state->sysval_count = program->sysval_count;
-        memcpy(state->sysval, program->sysvals, sizeof(state->sysval[0]) * state->sysval_count);
-        memcpy(&state->push, &program->push, sizeof(program->push));
-
-        bool vertex_id = BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_VERTEX_ID);
-        bool instance_id = BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID);
-
-        state->writes_global = s->info.writes_memory;
-
-        switch (stage) {
-        case MESA_SHADER_VERTEX:
-                attribute_count = util_bitcount64(s->info.inputs_read) +
-                                  util_bitcount(s->info.images_used);
-                varying_count = util_bitcount64(s->info.outputs_written);
-
-                if (vertex_id)
-                        attribute_count = MAX2(attribute_count, PAN_VERTEX_ID + 1);
-
-                if (instance_id)
-                        attribute_count = MAX2(attribute_count, PAN_INSTANCE_ID + 1);
-
-                break;
-        case MESA_SHADER_FRAGMENT:
-                for (unsigned i = 0; i < ARRAY_SIZE(state->blend_ret_addrs); i++) {
-                        if (!program->blend_ret_offsets[i])
-                                continue;
-
-                        state->blend_ret_addrs[i] = (state->bo->ptr.gpu & UINT32_MAX) +
-                                                    program->blend_ret_offsets[i];
-                        assert(!(state->blend_ret_addrs[i] & 0x7));
-                }
-                attribute_count = util_bitcount(s->info.images_used);
-                varying_count = util_bitcount64(s->info.inputs_read);
-                if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
-                        state->writes_depth = true;
-                if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL))
-                        state->writes_stencil = true;
-
-                uint64_t outputs_read = s->info.outputs_read;
-                if (outputs_read & BITFIELD64_BIT(FRAG_RESULT_COLOR))
-                        outputs_read |= BITFIELD64_BIT(FRAG_RESULT_DATA0);
-
-                state->outputs_read = outputs_read >> FRAG_RESULT_DATA0;
-
-                /* EXT_shader_framebuffer_fetch requires per-sample */
-                state->sample_shading = s->info.fs.uses_sample_shading ||
-                        outputs_read;
-
-                /* List of reasons we need to execute frag shaders when things
-                 * are masked off */
-
-                state->fs_sidefx =
-                        s->info.writes_memory ||
-                        s->info.fs.uses_discard ||
-                        s->info.fs.uses_demote;
-
-                state->can_discard = s->info.fs.uses_discard;
-                break;
-        case MESA_SHADER_COMPUTE:
-                attribute_count = util_bitcount(s->info.images_used);
-                state->shared_size = s->info.cs.shared_size;
-                break;
-        default:
-                unreachable("Unknown shader state");
-        }
-
-        state->stack_size = program->tls_size;
-        state->reads_frag_coord = (s->info.inputs_read & (1 << VARYING_SLOT_POS)) ||
-                                  BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_FRAG_COORD);
-        state->reads_point_coord = s->info.inputs_read & (1 << VARYING_SLOT_PNTC);
-        state->reads_face = (s->info.inputs_read & (1 << VARYING_SLOT_FACE)) ||
-                            BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_FRONT_FACE);
-        state->writes_point_size = s->info.outputs_written & (1 << VARYING_SLOT_PSIZ);
-
-        if (outputs_written)
-                *outputs_written = s->info.outputs_written;
-
-        state->work_reg_count = program->work_register_count;
-
-        if (pan_is_bifrost(dev))
-                for (unsigned i = 0; i < ARRAY_SIZE(state->blend_types); i++)
-                        state->blend_types[i] = bifrost_blend_type_from_nir(program->blend_types[i]);
-
-        /* Record the varying mapping for the command stream's bookkeeping */
-
-        nir_variable_mode varying_mode =
-                        stage == MESA_SHADER_VERTEX ? nir_var_shader_out : nir_var_shader_in;
-
-        nir_foreach_variable_with_modes(var, s, varying_mode) {
-                unsigned loc = var->data.driver_location;
-                unsigned sz = glsl_count_attribute_slots(var->type, FALSE);
-
-                for (int c = 0; c < sz; ++c) {
-                        state->varyings_loc[loc + c] = var->data.location + c;
-                        state->varyings[loc + c] = pan_format_from_glsl(var->type,
-                                        var->data.precision, var->data.location_frac);
-                }
-        }
-
-        /* Needed for linkage */
-        state->attribute_count = attribute_count;
-        state->varying_count = varying_count;
-
-        /* Sysvals have dedicated UBO */
-        state->ubo_count = s->info.num_ubos + (state->sysval_count ? 1 : 0);
+        if (!pan_is_bifrost(dev))
+                shader |= state->info.midgard.first_tag;
 
         /* Prepare the descriptors at compile-time */
         state->shader.shader = shader;
-        state->shader.attribute_count = attribute_count;
-        state->shader.varying_count = varying_count;
-        state->shader.texture_count = s->info.num_textures;
-        state->shader.sampler_count = s->info.num_textures;
+        state->shader.attribute_count = state->info.attribute_count;
+        state->shader.varying_count = state->info.varyings.input_count +
+                                      state->info.varyings.output_count;
+        state->shader.texture_count = state->info.texture_count;
+        state->shader.sampler_count = state->info.texture_count;
 
         if (pan_is_bifrost(dev))
-                pan_prepare_bifrost_props(state, program, stage, &s->info);
+                pan_prepare_bifrost_props(state);
         else
-                pan_prepare_midgard_props(state, program, stage);
+                pan_prepare_midgard_props(state);
 
         state->properties.shader_contains_barrier =
-                s->info.uses_memory_barrier |
-                s->info.uses_control_barrier;
+                state->info.contains_barrier;
 
         /* Ordering gaurantees are the same */
         if (stage == MESA_SHADER_FRAGMENT) {
                 state->properties.shader_contains_barrier |=
-                       s->info.fs.needs_quad_helper_invocations;
+                        state->info.fs.helper_invocations;
+                state->properties.stencil_from_shader =
+                        state->info.fs.writes_stencil;
+                state->properties.depth_source =
+                        state->info.fs.writes_depth ?
+                        MALI_DEPTH_SOURCE_SHADER :
+                        MALI_DEPTH_SOURCE_FIXED_FUNCTION;
+        } else {
+                state->properties.depth_source =
+                        MALI_DEPTH_SOURCE_FIXED_FUNCTION;
         }
 
-        state->properties.stencil_from_shader = state->writes_stencil;
-        state->properties.depth_source = state->writes_depth ?
-                                         MALI_DEPTH_SOURCE_SHADER :
-                                         MALI_DEPTH_SOURCE_FIXED_FUNCTION;
 
         if (stage != MESA_SHADER_FRAGMENT)
                 pan_upload_shader_descriptor(ctx, state);
 
-        ralloc_free(program);
+        util_dynarray_fini(&binary);
 
         /* In both clone and tgsi_to_nir paths, the shader is ralloc'd against
          * a NULL context */
diff --git a/src/gallium/drivers/panfrost/pan_blend_shaders.c b/src/gallium/drivers/panfrost/pan_blend_shaders.c
index 16c8b0937cd..21a4a2720da 100644
--- a/src/gallium/drivers/panfrost/pan_blend_shaders.c
+++ b/src/gallium/drivers/panfrost/pan_blend_shaders.c
@@ -295,21 +295,23 @@ panfrost_compile_blend_shader(struct panfrost_blend_shader *shader,
         if (constants)
                 memcpy(inputs.blend.constants, constants, sizeof(inputs.blend.constants));
 
-        panfrost_program *program;
-
         if (pan_is_bifrost(dev)) {
                 inputs.blend.bifrost_blend_desc =
                         bifrost_get_blend_desc(dev, shader->key.format, shader->key.rt);
         }
 
-        program = pan_shader_compile(dev, NULL, shader->nir, &inputs);
+        struct pan_shader_info info;
+        struct util_dynarray binary;
+
+        util_dynarray_init(&binary, NULL);
+        pan_shader_compile(dev, shader->nir, &inputs, &binary, &info);
 
         /* Allow us to patch later */
-        shader->first_tag = program->first_tag;
-        shader->size = program->compiled.size;
+        shader->first_tag = pan_is_bifrost(dev) ? 0 : info.midgard.first_tag;
+        shader->size = binary.size;
         shader->buffer = reralloc_size(shader, shader->buffer, shader->size);
-        memcpy(shader->buffer, program->compiled.data, shader->size);
-        shader->work_count = program->work_register_count;
+        memcpy(shader->buffer, binary.data, shader->size);
+        shader->work_count = info.work_reg_count;
 
-        ralloc_free(program);
+        util_dynarray_fini(&binary);
 }
diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c
index 62975cef5a7..bda57198155 100644
--- a/src/gallium/drivers/panfrost/pan_cmdstream.c
+++ b/src/gallium/drivers/panfrost/pan_cmdstream.c
@@ -241,7 +241,7 @@ panfrost_fs_required(
                 unsigned rt_count)
 {
         /* If we generally have side effects */
-        if (fs->fs_sidefx)
+        if (fs->info.fs.sidefx)
                 return true;
 
         /* If colour is written we need to execute */
@@ -252,7 +252,31 @@ panfrost_fs_required(
 
         /* If depth is written and not implied we need to execute.
          * TODO: Predicate on Z/S writes being enabled */
-        return (fs->writes_depth || fs->writes_stencil);
+        return (fs->info.fs.writes_depth || fs->info.fs.writes_stencil);
+}
+
+static enum mali_bifrost_register_file_format
+bifrost_blend_type_from_nir(nir_alu_type nir_type)
+{
+        switch(nir_type) {
+        case 0: /* Render target not in use */
+                return 0;
+        case nir_type_float16:
+                return MALI_BIFROST_REGISTER_FILE_FORMAT_F16;
+        case nir_type_float32:
+                return MALI_BIFROST_REGISTER_FILE_FORMAT_F32;
+        case nir_type_int32:
+                return MALI_BIFROST_REGISTER_FILE_FORMAT_I32;
+        case nir_type_uint32:
+                return MALI_BIFROST_REGISTER_FILE_FORMAT_U32;
+        case nir_type_int16:
+                return MALI_BIFROST_REGISTER_FILE_FORMAT_I16;
+        case nir_type_uint16:
+                return MALI_BIFROST_REGISTER_FILE_FORMAT_U16;
+        default:
+                unreachable("Unsupported blend shader type for NIR alu type");
+                return 0;
+        }
 }
 
 static void
@@ -292,8 +316,12 @@ panfrost_emit_bifrost_blend(struct panfrost_batch *batch,
                                 assert((blend[i].shader.gpu & (0xffffffffull << 32)) ==
                                        (fs->bo->ptr.gpu & (0xffffffffull << 32)));
                                 cfg.bifrost.internal.shader.pc = (u32)blend[i].shader.gpu;
-                                assert(!(fs->blend_ret_addrs[i] & 0x7));
-                                cfg.bifrost.internal.shader.return_value = fs->blend_ret_addrs[i];
+                                unsigned ret_offset = fs->info.bifrost.blend[i].return_offset;
+                                if (ret_offset) {
+                                        assert(!(ret_offset & 0x7));
+                                        cfg.bifrost.internal.shader.return_value =
+                                                fs->bo->ptr.gpu + ret_offset;
+                                }
                                 cfg.bifrost.internal.mode = MALI_BIFROST_BLEND_MODE_SHADER;
                         } else {
                                 enum pipe_format format = batch->key.cbufs[i]->format;
@@ -324,7 +352,7 @@ panfrost_emit_bifrost_blend(struct panfrost_batch *batch,
                                 cfg.bifrost.internal.fixed_function.conversion.memory_format =
                                         panfrost_format_to_bifrost_blend(dev, format_desc, true);
                                 cfg.bifrost.internal.fixed_function.conversion.register_format =
-                                        fs->blend_types[i];
+                                        bifrost_blend_type_from_nir(fs->info.bifrost.blend[i].type);
                                 cfg.bifrost.internal.fixed_function.rt = i;
                         }
                 }
@@ -412,7 +440,9 @@ panfrost_prepare_bifrost_fs_state(struct panfrost_context *ctx,
 
                 state->properties = fs->properties;
                 state->properties.bifrost.allow_forward_pixel_to_kill =
-                        !fs->can_discard && !fs->writes_depth && no_blend;
+                        !fs->info.fs.can_discard &&
+                        !fs->info.fs.writes_depth &&
+                        no_blend;
                 state->shader = fs->shader;
                 state->preload = fs->preload;
         }
@@ -436,8 +466,8 @@ panfrost_prepare_midgard_fs_state(struct panfrost_context *ctx,
                 state->properties.midgard.force_early_z = true;
         } else {
                 /* Reasons to disable early-Z from a shader perspective */
-                bool late_z = fs->can_discard || fs->writes_global ||
-                              fs->writes_depth || fs->writes_stencil;
+                bool late_z = fs->info.fs.can_discard || fs->info.writes_global ||
+                              fs->info.fs.writes_depth || fs->info.fs.writes_stencil;
 
                 /* If either depth or stencil is enabled, discard matters */
                 bool zs_enabled =
@@ -452,9 +482,9 @@ panfrost_prepare_midgard_fs_state(struct panfrost_context *ctx,
                 /* TODO: Reduce this limit? */
                 state->properties = fs->properties;
                 if (has_blend_shader)
-                        state->properties.midgard.work_register_count = MAX2(fs->work_reg_count, 8);
+                        state->properties.midgard.work_register_count = MAX2(fs->info.work_reg_count, 8);
                 else
-                        state->properties.midgard.work_register_count = fs->work_reg_count;
+                        state->properties.midgard.work_register_count = fs->info.work_reg_count;
 
                 state->properties.midgard.force_early_z = !(late_z || alpha_to_coverage);
 
@@ -463,8 +493,10 @@ panfrost_prepare_midgard_fs_state(struct panfrost_context *ctx,
                  * lying to the hardware about the discard and setting the
                  * reads tilebuffer? flag to compensate */
                 state->properties.midgard.shader_reads_tilebuffer =
-                        fs->outputs_read || (!zs_enabled && fs->can_discard);
-                state->properties.midgard.shader_contains_discard = zs_enabled && fs->can_discard;
+                        fs->info.fs.outputs_read ||
+                        (!zs_enabled && fs->info.fs.can_discard);
+                state->properties.midgard.shader_contains_discard =
+                        zs_enabled && fs->info.fs.can_discard;
                 state->shader = fs->shader;
         }
 
@@ -528,7 +560,7 @@ panfrost_prepare_fs_state(struct panfrost_context *ctx,
         state->multisample_misc.sample_mask = (msaa ? ctx->sample_mask : ~0) & 0xFFFF;
 
         state->multisample_misc.evaluate_per_sample =
-                msaa && (ctx->min_samples > 1 || fs->sample_shading);
+                msaa && (ctx->min_samples > 1 || fs->info.fs.sample_shading);
 
         state->multisample_misc.depth_function = zsa->base.depth_enabled ?
                 panfrost_translate_compare_func(zsa->base.depth_func) :
@@ -930,8 +962,8 @@ panfrost_upload_sysvals(struct panfrost_batch *batch, void *buf,
 {
         struct sysval_uniform *uniforms = (void *)buf;
 
-        for (unsigned i = 0; i < ss->sysval_count; ++i) {
-                int sysval = ss->sysval[i];
+        for (unsigned i = 0; i < ss->info.sysvals.sysval_count; ++i) {
+                int sysval = ss->info.sysvals.sysvals[i];
 
                 switch (PAN_SYSVAL_TYPE(sysval)) {
                 case PAN_SYSVAL_VIEWPORT_SCALE:
@@ -1023,7 +1055,7 @@ panfrost_emit_const_buf(struct panfrost_batch *batch,
         struct panfrost_shader_state *ss = &all->variants[all->active_variant];
 
         /* Allocate room for the sysval and the uniforms */
-        size_t sys_size = sizeof(float) * 4 * ss->sysval_count;
+        size_t sys_size = sizeof(float) * 4 * ss->info.sysvals.sysval_count;
         struct panfrost_ptr transfer =
                 panfrost_pool_alloc_aligned(&batch->pool, sys_size, 16);
 
@@ -1032,7 +1064,7 @@ panfrost_emit_const_buf(struct panfrost_batch *batch,
 
         /* Next up, attach UBOs. UBO count includes gaps but no sysval UBO */
         struct panfrost_shader_state *shader = panfrost_get_shader_state(ctx, stage);
-        unsigned ubo_count = shader->ubo_count - (sys_size ? 1 : 0);
+        unsigned ubo_count = shader->info.ubo_count - (sys_size ? 1 : 0);
         unsigned sysval_ubo = sys_size ? ubo_count : ~0;
 
         size_t sz = MALI_UNIFORM_BUFFER_LENGTH * (ubo_count + 1);
@@ -1076,13 +1108,14 @@ panfrost_emit_const_buf(struct panfrost_batch *batch,
 
         /* Copy push constants required by the shader */
         struct panfrost_ptr push_transfer =
-                panfrost_pool_alloc_aligned(&batch->pool, ss->push.count * 4, 16);
+                panfrost_pool_alloc_aligned(&batch->pool,
+                                            ss->info.push.count * 4, 16);
 
         uint32_t *push_cpu = (uint32_t *) push_transfer.cpu;
         *push_constants = push_transfer.gpu;
 
-        for (unsigned i = 0; i < ss->push.count; ++i) {
-                struct panfrost_ubo_word src = ss->push.words[i];
+        for (unsigned i = 0; i < ss->info.push.count; ++i) {
+                struct panfrost_ubo_word src = ss->info.push.words[i];
 
                 /* Map the UBO, this should be cheap. However this is reading
                  * from write-combine memory which is _very_ slow. It might pay
@@ -1108,7 +1141,7 @@ panfrost_emit_shared_memory(struct panfrost_batch *batch,
         struct panfrost_device *dev = pan_device(ctx->base.screen);
         struct panfrost_shader_variants *all = ctx->shader[PIPE_SHADER_COMPUTE];
         struct panfrost_shader_state *ss = &all->variants[all->active_variant];
-        unsigned single_size = util_next_power_of_two(MAX2(ss->shared_size,
+        unsigned single_size = util_next_power_of_two(MAX2(ss->info.wls_size,
                                                            128));
 
         unsigned instances =
@@ -1130,12 +1163,12 @@ panfrost_emit_shared_memory(struct panfrost_batch *batch,
                 ls.wls_instances = instances;
                 ls.wls_size_scale = util_logbase2(single_size) + 1;
 
-                if (ss->stack_size) {
+                if (ss->info.tls_size) {
                         unsigned shift =
-                                panfrost_get_stack_shift(ss->stack_size);
+                                panfrost_get_stack_shift(ss->info.tls_size);
                         struct panfrost_bo *bo =
                                 panfrost_batch_get_scratchpad(batch,
-                                                              ss->stack_size,
+                                                              ss->info.tls_size,
                                                               dev->thread_tls_alloc,
                                                               dev->core_count);
 
@@ -1366,7 +1399,7 @@ panfrost_emit_image_attribs(struct panfrost_batch *batch,
         struct panfrost_context *ctx = batch->ctx;
         struct panfrost_shader_state *shader = panfrost_get_shader_state(ctx, type);
 
-        if (!shader->attribute_count) {
+        if (!shader->info.attribute_count) {
                 *buffers = 0;
                 return 0;
         }
@@ -1375,11 +1408,11 @@ panfrost_emit_image_attribs(struct panfrost_batch *batch,
         unsigned attrib_buf_size = MALI_ATTRIBUTE_BUFFER_LENGTH +
                                    MALI_ATTRIBUTE_BUFFER_CONTINUATION_3D_LENGTH;
         unsigned bytes_per_image_desc = MALI_ATTRIBUTE_LENGTH + attrib_buf_size;
-        unsigned attribs_offset = attrib_buf_size * shader->attribute_count;
+        unsigned attribs_offset = attrib_buf_size * shader->info.attribute_count;
 
         struct panfrost_ptr ptr =
                 panfrost_pool_alloc_aligned(&batch->pool,
-                                            bytes_per_image_desc * shader->attribute_count,
+                                            bytes_per_image_desc * shader->info.attribute_count,
                                             util_next_power_of_two(bytes_per_image_desc));
 
         emit_image_attribs(batch, type, ptr.cpu + attribs_offset, ptr.cpu, 0);
@@ -1404,7 +1437,7 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch,
          * Also, we allocate more memory than what's needed here if either instancing
          * is enabled or images are present, this can be improved. */
         unsigned bufs_per_attrib = (ctx->instance_count > 1 || nr_images > 0) ? 2 : 1;
-        unsigned nr_bufs = (vs->attribute_count * bufs_per_attrib) +
+        unsigned nr_bufs = (vs->info.attribute_count * bufs_per_attrib) +
                            (pan_is_bifrost(dev) ? 1 : 0);
 
         if (!nr_bufs) {
@@ -1417,7 +1450,7 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch,
                         MALI_ATTRIBUTE_BUFFER_LENGTH * 2);
 
         struct panfrost_ptr T = panfrost_pool_alloc_aligned(&batch->pool,
-                        MALI_ATTRIBUTE_LENGTH * vs->attribute_count,
+                        MALI_ATTRIBUTE_LENGTH * vs->info.attribute_count,
                         MALI_ATTRIBUTE_LENGTH);
 
         struct mali_attribute_buffer_packed *bufs =
@@ -1525,7 +1558,7 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch,
 
         /* Add special gl_VertexID/gl_InstanceID buffers */
 
-        if (unlikely(vs->attribute_count >= PAN_VERTEX_ID)) {
+        if (unlikely(vs->info.attribute_count >= PAN_VERTEX_ID)) {
                 panfrost_vertex_id(ctx->padded_count, &bufs[k], ctx->instance_count > 1);
 
                 pan_pack(out + PAN_VERTEX_ID, ATTRIBUTE, cfg) {
@@ -1742,22 +1775,22 @@ pan_varying_present(const struct panfrost_device *dev,
 
         /* Enable special buffers by the shader info */
 
-        if (vs->writes_point_size)
+        if (vs->info.vs.writes_point_size)
                 present |= (1 << PAN_VARY_PSIZ);
 
-        if (fs->reads_point_coord)
+        if (fs->info.fs.reads_point_coord)
                 present |= (1 << PAN_VARY_PNTCOORD);
 
-        if (fs->reads_face)
+        if (fs->info.fs.reads_face)
                 present |= (1 << PAN_VARY_FACE);
 
-        if (fs->reads_frag_coord && !pan_is_bifrost(dev))
+        if (fs->info.fs.reads_frag_coord && !pan_is_bifrost(dev))
                 present |= (1 << PAN_VARY_FRAGCOORD);
 
         /* Also, if we have a point sprite, we need a point coord buffer */
 
-        for (unsigned i = 0; i < fs->varying_count; i++)  {
-                gl_varying_slot loc = fs->varyings_loc[i];
+        for (unsigned i = 0; i < fs->info.varyings.input_count; i++)  {
+                gl_varying_slot loc = fs->info.varyings.input[i].location;
 
                 if (util_varying_is_point_coord(loc, point_coord_mask))
                         present |= (1 << PAN_VARY_PNTCOORD);
@@ -1886,10 +1919,18 @@ pan_emit_general_varying(const struct panfrost_device *dev,
                          bool should_alloc)
 {
         /* Check if we're linked */
+        unsigned other_varying_count =
+                other->info.stage == MESA_SHADER_FRAGMENT ?
+                other->info.varyings.input_count :
+                other->info.varyings.output_count;
+        const struct pan_shader_varying *other_varyings =
+                other->info.stage == MESA_SHADER_FRAGMENT ?
+                other->info.varyings.input :
+                other->info.varyings.output;
         signed other_idx = -1;
 
-        for (unsigned j = 0; j < other->varying_count; ++j) {
-                if (other->varyings_loc[j] == loc) {
+        for (unsigned j = 0; j < other_varying_count; ++j) {
+                if (other_varyings[j].location == loc) {
                         other_idx = j;
                         break;
                 }
@@ -1904,7 +1945,8 @@ pan_emit_general_varying(const struct panfrost_device *dev,
 
         if (should_alloc) {
                 /* We're linked, so allocate a space via a watermark allocation */
-                enum mali_format alt = other->varyings[other_idx];
+                enum mali_format alt =
+                        dev->formats[other_varyings[other_idx].format].hw >> 12;
 
                 /* Do interpolation at minimum precision */
                 unsigned size_main = pan_varying_size(format);
@@ -1953,8 +1995,14 @@ panfrost_emit_varying(const struct panfrost_device *dev,
                       bool should_alloc,
                       bool is_fragment)
 {
-        gl_varying_slot loc = stage->varyings_loc[idx];
-        enum mali_format format = stage->varyings[idx];
+        gl_varying_slot loc =
+                stage->info.stage == MESA_SHADER_FRAGMENT ?
+                stage->info.varyings.input[idx].location :
+                stage->info.varyings.output[idx].location;
+        enum mali_format format =
+                stage->info.stage == MESA_SHADER_FRAGMENT ?
+                dev->formats[stage->info.varyings.input[idx].format].hw >> 12 :
+                dev->formats[stage->info.varyings.output[idx].format].hw >> 12;
 
         /* Override format to match linkage */
         if (!should_alloc && gen_formats[idx])
@@ -2018,8 +2066,8 @@ panfrost_emit_varying_descriptor(struct panfrost_batch *batch,
 
         vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
         fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
-        vs_size = MALI_ATTRIBUTE_LENGTH * vs->varying_count;
-        fs_size = MALI_ATTRIBUTE_LENGTH * fs->varying_count;
+        vs_size = MALI_ATTRIBUTE_LENGTH * vs->info.varyings.output_count;
+        fs_size = MALI_ATTRIBUTE_LENGTH * fs->info.varyings.input_count;
 
         struct panfrost_ptr trans = panfrost_pool_alloc_aligned(
                         &batch->pool, vs_size + fs_size, MALI_ATTRIBUTE_LENGTH);
@@ -2044,8 +2092,8 @@ panfrost_emit_varying_descriptor(struct panfrost_batch *batch,
         memset(gen_formats, 0, sizeof(gen_formats));
 
         unsigned gen_stride = 0;
-        assert(vs->varying_count < ARRAY_SIZE(gen_offsets));
-        assert(fs->varying_count < ARRAY_SIZE(gen_offsets));
+        assert(vs->info.varyings.output_count < ARRAY_SIZE(gen_offsets));
+        assert(fs->info.varyings.input_count < ARRAY_SIZE(gen_offsets));
 
         unsigned streamout_offsets[32];
 
@@ -2056,16 +2104,16 @@ panfrost_emit_varying_descriptor(struct panfrost_batch *batch,
         }
 
         struct mali_attribute_packed *ovs = (struct mali_attribute_packed *)trans.cpu;
-        struct mali_attribute_packed *ofs = ovs + vs->varying_count;
+        struct mali_attribute_packed *ofs = ovs + vs->info.varyings.output_count;
 
-        for (unsigned i = 0; i < vs->varying_count; i++) {
+        for (unsigned i = 0; i < vs->info.varyings.output_count; i++) {
                 panfrost_emit_varying(dev, ovs + i, vs, fs, vs, present, 0,
                                       ctx->streamout.num_targets, streamout_offsets,
                                       gen_offsets, gen_formats, &gen_stride, i,
                                       true, false);
         }
 
-        for (unsigned i = 0; i < fs->varying_count; i++) {
+        for (unsigned i = 0; i < fs->info.varyings.input_count; i++) {
                 panfrost_emit_varying(dev, ofs + i, fs, vs, vs, present, point_coord_mask,
                                       ctx->streamout.num_targets, streamout_offsets,
                                       gen_offsets, gen_formats, &gen_stride, i,
@@ -2114,8 +2162,8 @@ panfrost_emit_varying_descriptor(struct panfrost_batch *batch,
         pan_emit_special_input(varyings, present, PAN_VARY_FRAGCOORD, MALI_ATTRIBUTE_SPECIAL_FRAG_COORD);
 
         *buffers = T.gpu;
-        *vs_attribs = vs->varying_count ? trans.gpu : 0;
-        *fs_attribs = fs->varying_count ? trans.gpu + vs_size : 0;
+        *vs_attribs = vs->info.varyings.output_count ? trans.gpu : 0;
+        *fs_attribs = fs->info.varyings.input_count ? trans.gpu + vs_size : 0;
 }
 
 void
diff --git a/src/gallium/drivers/panfrost/pan_compute.c b/src/gallium/drivers/panfrost/pan_compute.c
index af9bafb81e5..049e4ff21d9 100644
--- a/src/gallium/drivers/panfrost/pan_compute.c
+++ b/src/gallium/drivers/panfrost/pan_compute.c
@@ -71,7 +71,7 @@ panfrost_create_compute_state(
         }
 
         panfrost_shader_compile(ctx, so->cbase.ir_type, so->cbase.prog,
-                                MESA_SHADER_COMPUTE, v, NULL);
+                                MESA_SHADER_COMPUTE, v);
 
         return so;
 }
diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c
index f7ce64896aa..d324dd79377 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -150,7 +150,7 @@ panfrost_writes_point_size(struct panfrost_context *ctx)
         assert(ctx->shader[PIPE_SHADER_VERTEX]);
         struct panfrost_shader_state *vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
 
-        return vs->writes_point_size && ctx->active_prim == PIPE_PRIM_POINTS;
+        return vs->info.vs.writes_point_size && ctx->active_prim == PIPE_PRIM_POINTS;
 }
 
 /* The entire frame is in memory -- send it off to the kernel! */
@@ -739,12 +739,11 @@ panfrost_create_shader_state(
                 struct panfrost_context *ctx = pan_context(pctx);
 
                 struct panfrost_shader_state state = { 0 };
-                uint64_t outputs_written;
 
                 panfrost_shader_compile(ctx, PIPE_SHADER_IR_NIR,
                                         so->base.ir.nir,
                                         tgsi_processor_to_shader_stage(stage),
-                                        &state, &outputs_written);
+                                        &state);
         }
 
         return so;
@@ -821,11 +820,12 @@ panfrost_variant_matches(
 {
         struct panfrost_device *dev = pan_device(ctx->base.screen);
 
-        if (variant->outputs_read) {
+        if (variant->info.stage == MESA_SHADER_FRAGMENT &&
+            variant->info.fs.outputs_read) {
                 struct pipe_framebuffer_state *fb = &ctx->pipe_framebuffer;
 
                 unsigned i;
-                BITSET_FOREACH_SET(i, &variant->outputs_read, 8) {
+                BITSET_FOREACH_SET(i, &variant->info.fs.outputs_read, 8) {
                         enum pipe_format fmt = PIPE_FORMAT_R8G8B8A8_UNORM;
 
                         if ((fb->nr_cbufs > i) && fb->cbufs[i])
@@ -963,15 +963,12 @@ panfrost_bind_shader_state(
         /* We finally have a variant, so compile it */
 
         if (!shader_state->compiled) {
-                uint64_t outputs_written = 0;
-
                 panfrost_shader_compile(ctx, variants->base.type,
                                         variants->base.type == PIPE_SHADER_IR_NIR ?
                                         variants->base.ir.nir :
                                         variants->base.tokens,
                                         tgsi_processor_to_shader_stage(type),
-                                        shader_state,
-                                        &outputs_written);
+                                        shader_state);
 
                 shader_state->compiled = true;
 
@@ -980,7 +977,8 @@ panfrost_bind_shader_state(
 
                 shader_state->stream_output = variants->base.stream_output;
                 shader_state->so_mask =
-                        update_so_info(&shader_state->stream_output, outputs_written);
+                        update_so_info(&shader_state->stream_output,
+                                       shader_state->info.outputs_written);
         }
 }
 
@@ -1251,7 +1249,8 @@ panfrost_set_framebuffer_state(struct pipe_context *pctx,
          * keyed to the framebuffer format (due to EXT_framebuffer_fetch) */
         struct panfrost_shader_variants *fs = ctx->shader[PIPE_SHADER_FRAGMENT];
 
-        if (fs && fs->variant_count && fs->variants[fs->active_variant].outputs_read)
+        if (fs && fs->variant_count &&
+            fs->variants[fs->active_variant].info.fs.outputs_read)
                 ctx->base.bind_fs_state(&ctx->base, fs);
 }
 
diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h
index 5dea026d4b6..153060b13f3 100644
--- a/src/gallium/drivers/panfrost/pan_context.h
+++ b/src/gallium/drivers/panfrost/pan_context.h
@@ -214,46 +214,15 @@ struct panfrost_shader_state {
         struct MALI_RENDERER_PROPERTIES properties;
         struct MALI_PRELOAD preload;
 
-        /* Non-descript information */
-        unsigned work_reg_count;
-        bool sample_shading;
-        bool can_discard;
-        bool writes_point_size;
-        bool writes_depth;
-        bool writes_stencil;
-        bool reads_point_coord;
-        bool reads_face;
-        bool reads_frag_coord;
-        bool writes_global;
-        unsigned stack_size;
-        unsigned shared_size;
+        struct pan_shader_info info;
 
-        /* Does the fragment shader have side effects? In particular, if output
-         * is masked out, is it legal to skip shader execution? */
-        bool fs_sidefx;
-
-        /* For Bifrost - output type for each RT */
-        enum mali_bifrost_register_file_format blend_types[MALI_BIFROST_BLEND_MAX_RT];
-
-        unsigned attribute_count, varying_count, ubo_count;
-        enum mali_format varyings[PIPE_MAX_ATTRIBS];
-        gl_varying_slot varyings_loc[PIPE_MAX_ATTRIBS];
         struct pipe_stream_output_info stream_output;
         uint64_t so_mask;
 
-        unsigned sysval_count;
-        unsigned sysval[MAX_SYSVAL_COUNT];
-
-        struct panfrost_ubo_push push;
-
         /* GPU-executable memory */
         struct panfrost_bo *bo;
 
-        BITSET_WORD outputs_read;
         enum pipe_format rt_formats[8];
-
-        /* Blend return addresses */
-        uint32_t blend_ret_addrs[8];
 };
 
 /* A collection of varyings (the CSO) */
@@ -374,8 +343,7 @@ panfrost_shader_compile(struct panfrost_context *ctx,
                         enum pipe_shader_ir ir_type,
                         const void *ir,
                         gl_shader_stage stage,
-                        struct panfrost_shader_state *state,
-                        uint64_t *outputs_written);
+                        struct panfrost_shader_state *state);
 
 void
 panfrost_create_sampler_view_bo(struct panfrost_sampler_view *so,
diff --git a/src/gallium/drivers/panfrost/pan_job.c b/src/gallium/drivers/panfrost/pan_job.c
index 3eac60d7296..25f664a6e4d 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -1238,7 +1238,7 @@ panfrost_batch_adjust_stack_size(struct panfrost_batch *batch)
                 if (!ss)
                         continue;
 
-                batch->stack_size = MAX2(batch->stack_size, ss->stack_size);
+                batch->stack_size = MAX2(batch->stack_size, ss->info.tls_size);
         }
 }
 
diff --git a/src/panfrost/bifrost/bi_opt_push_ubo.c b/src/panfrost/bifrost/bi_opt_push_ubo.c
index bdd6ddf59a2..9442315b213 100644
--- a/src/panfrost/bifrost/bi_opt_push_ubo.c
+++ b/src/panfrost/bifrost/bi_opt_push_ubo.c
@@ -119,10 +119,10 @@ void
 bi_opt_push_ubo(bi_context *ctx)
 {
         /* This pass only runs once */
-        assert(ctx->push->count == 0);
+        assert(ctx->info->push.count == 0);
 
         struct bi_ubo_analysis analysis = bi_analyze_ranges(ctx);
-        bi_pick_ubo(ctx->push, &analysis);
+        bi_pick_ubo(&ctx->info->push, &analysis);
 
         bi_foreach_instr_global_safe(ctx, ins) {
                 if (!bi_is_direct_aligned_ubo(ins)) continue;
@@ -141,8 +141,9 @@ bi_opt_push_ubo(bi_context *ctx)
 
                 for (unsigned w = 0; w < channels; ++w) {
                         /* FAU is grouped in pairs (2 x 4-byte) */
-                        unsigned base = pan_lookup_pushed_ubo(ctx->push, ubo,
-                                        (offset + 4 * w));
+                        unsigned base =
+                                pan_lookup_pushed_ubo(&ctx->info->push, ubo,
+                                                      (offset + 4 * w));
 
                         unsigned fau_idx = (base >> 1);
                         unsigned fau_hi = (base & 1);
diff --git a/src/panfrost/bifrost/bi_pack.c b/src/panfrost/bifrost/bi_pack.c
index aaed48094b7..b43e5a8aae5 100644
--- a/src/panfrost/bifrost/bi_pack.c
+++ b/src/panfrost/bifrost/bi_pack.c
@@ -713,11 +713,11 @@ bi_collect_blend_ret_addr(bi_context *ctx, struct util_dynarray *emission,
 
 
         unsigned loc = tuple->regs.fau_idx - BIR_FAU_BLEND_0;
-        assert(loc < ARRAY_SIZE(ctx->blend_ret_offsets));
-        assert(!ctx->blend_ret_offsets[loc]);
-        ctx->blend_ret_offsets[loc] =
+        assert(loc < ARRAY_SIZE(ctx->info->bifrost.blend));
+        assert(!ctx->info->bifrost.blend[loc].return_offset);
+        ctx->info->bifrost.blend[loc].return_offset =
                 util_dynarray_num_elements(emission, uint8_t);
-        assert(!(ctx->blend_ret_offsets[loc] & 0x7));
+        assert(!(ctx->info->bifrost.blend[loc].return_offset & 0x7));
 }
 
 unsigned
diff --git a/src/panfrost/bifrost/bi_ra.c b/src/panfrost/bifrost/bi_ra.c
index f927b3eeb2c..fb3f38f99f6 100644
--- a/src/panfrost/bifrost/bi_ra.c
+++ b/src/panfrost/bifrost/bi_ra.c
@@ -388,7 +388,7 @@ bi_register_allocate(bi_context *ctx)
         unsigned iter_count = 1000; /* max iterations */
 
         /* Number of bytes of memory we've spilled into */
-        unsigned spill_count = ctx->tls_size;
+        unsigned spill_count = ctx->info->tls_size;
 
         do {
                 if (l) {
@@ -410,7 +410,7 @@ bi_register_allocate(bi_context *ctx)
 
         assert(success);
 
-        ctx->tls_size = spill_count;
+        ctx->info->tls_size = spill_count;
         bi_install_registers(ctx, l);
 
         lcra_free(l);
diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c
index 6cbc593c27d..68a4a928f3e 100644
--- a/src/panfrost/bifrost/bifrost_compile.c
+++ b/src/panfrost/bifrost/bifrost_compile.c
@@ -297,7 +297,8 @@ bi_load_sysval_to(bi_builder *b, bi_index dest, int sysval,
                 unsigned nr_components, unsigned offset)
 {
         unsigned uniform =
-                pan_lookup_sysval(b->shader->sysval_to_id, &b->shader->sysvals,
+                pan_lookup_sysval(b->shader->sysval_to_id,
+                                  &b->shader->info->sysvals,
                                   sysval);
         unsigned idx = (uniform * 16) + offset;
 
@@ -368,8 +369,7 @@ bi_emit_blend_op(bi_builder *b, bi_index rgba, nir_alu_type T, unsigned rt)
         }
 
         assert(rt < 8);
-        assert(b->shader->blend_types);
-        b->shader->blend_types[rt] = T;
+        b->shader->info->bifrost.blend[rt].type = T;
 }
 
 /* Blend shaders do not need to run ATEST since they are dependent on a
@@ -2511,23 +2511,23 @@ bi_lower_branch(bi_block *block)
         }
 }
 
-panfrost_program *
-bifrost_compile_shader_nir(void *mem_ctx, nir_shader *nir,
-                           const struct panfrost_compile_inputs *inputs)
+void
+bifrost_compile_shader_nir(nir_shader *nir,
+                           const struct panfrost_compile_inputs *inputs,
+                           struct util_dynarray *binary,
+                           struct pan_shader_info *info)
 {
-        panfrost_program *program = rzalloc(mem_ctx, panfrost_program);
-
         bifrost_debug = debug_get_option_bifrost_debug();
 
         bi_context *ctx = rzalloc(NULL, bi_context);
-        ctx->sysval_to_id = panfrost_init_sysvals(&ctx->sysvals, ctx);
+        ctx->sysval_to_id = panfrost_init_sysvals(&info->sysvals, ctx);
 
         ctx->inputs = inputs;
         ctx->nir = nir;
+        ctx->info = info;
         ctx->stage = nir->info.stage;
         ctx->quirks = bifrost_get_quirks(inputs->gpu_id);
         ctx->arch = inputs->gpu_id >> 12;
-        ctx->push = &program->push;
         list_inithead(&ctx->blocks);
 
         /* Lower gl_Position pre-optimisation, but after lowering vars to ssa
@@ -2565,8 +2565,7 @@ bifrost_compile_shader_nir(void *mem_ctx, nir_shader *nir,
                 nir_print_shader(nir, stdout);
         }
 
-        ctx->blend_types = program->blend_types;
-        ctx->tls_size = nir->scratch_size;
+        info->tls_size = nir->scratch_size;
 
         nir_foreach_function(func, nir) {
                 if (!func->impl)
@@ -2614,8 +2613,7 @@ bifrost_compile_shader_nir(void *mem_ctx, nir_shader *nir,
         if (bifrost_debug & BIFROST_DBG_SHADERS && !skip_internal)
                 bi_print_shader(ctx, stdout);
 
-        util_dynarray_init(&program->compiled, NULL);
-        unsigned final_clause = bi_pack(ctx, &program->compiled);
+        unsigned final_clause = bi_pack(ctx, binary);
 
         /* If we need to wait for ATEST or BLEND in the first clause, pass the
          * corresponding bits through to the renderer state descriptor */
@@ -2623,17 +2621,12 @@ bifrost_compile_shader_nir(void *mem_ctx, nir_shader *nir,
         bi_clause *first_clause = bi_next_clause(ctx, first_block, NULL);
 
         unsigned first_deps = first_clause ? first_clause->dependencies : 0;
-        program->wait_6 = (first_deps & (1 << 6));
-        program->wait_7 = (first_deps & (1 << 7));
-
-        memcpy(program->blend_ret_offsets, ctx->blend_ret_offsets, sizeof(program->blend_ret_offsets));
-        program->sysval_count = ctx->sysvals.sysval_count;
-        memcpy(program->sysvals, ctx->sysvals.sysvals, sizeof(ctx->sysvals.sysvals[0]) * ctx->sysvals.sysval_count);
+        info->bifrost.wait_6 = (first_deps & (1 << 6));
+        info->bifrost.wait_7 = (first_deps & (1 << 7));
 
         if (bifrost_debug & BIFROST_DBG_SHADERS && !skip_internal) {
-                disassemble_bifrost(stdout, program->compiled.data,
-                                program->compiled.size,
-                                bifrost_debug & BIFROST_DBG_VERBOSE);
+                disassemble_bifrost(stdout, binary->data, binary->size,
+                                    bifrost_debug & BIFROST_DBG_VERBOSE);
         }
 
         /* Pad the shader with enough zero bytes to trick the prefetcher,
@@ -2641,19 +2634,15 @@ bifrost_compile_shader_nir(void *mem_ctx, nir_shader *nir,
          * so the size remains 0) */
         unsigned prefetch_size = BIFROST_SHADER_PREFETCH - final_clause;
 
-        if (program->compiled.size) {
-                memset(util_dynarray_grow(&program->compiled, uint8_t, prefetch_size),
+        if (binary->size) {
+                memset(util_dynarray_grow(binary, uint8_t, prefetch_size),
                        0, prefetch_size);
         }
 
-        program->tls_size = ctx->tls_size;
-
         if ((bifrost_debug & BIFROST_DBG_SHADERDB || inputs->shaderdb) &&
             !skip_internal) {
-                bi_print_stats(ctx, program->compiled.size, stderr);
+                bi_print_stats(ctx, binary->size, stderr);
         }
 
         ralloc_free(ctx);
-
-        return program;
 }
diff --git a/src/panfrost/bifrost/bifrost_compile.h b/src/panfrost/bifrost/bifrost_compile.h
index b64721d3f41..4fe468c1e12 100644
--- a/src/panfrost/bifrost/bifrost_compile.h
+++ b/src/panfrost/bifrost/bifrost_compile.h
@@ -28,9 +28,11 @@
 #include "util/u_dynarray.h"
 #include "panfrost/util/pan_ir.h"
 
-panfrost_program *
-bifrost_compile_shader_nir(void *mem_ctx, nir_shader *nir,
-                           const struct panfrost_compile_inputs *inputs);
+void
+bifrost_compile_shader_nir(nir_shader *nir,
+                           const struct panfrost_compile_inputs *inputs,
+                           struct util_dynarray *binary,
+                           struct pan_shader_info *info);
 
 static const nir_shader_compiler_options bifrost_nir_options = {
         .lower_scmp = true,
diff --git a/src/panfrost/bifrost/cmdline.c b/src/panfrost/bifrost/cmdline.c
index d9515e17465..0792e9c2980 100644
--- a/src/panfrost/bifrost/cmdline.c
+++ b/src/panfrost/bifrost/cmdline.c
@@ -32,7 +32,7 @@
 #include "util/u_dynarray.h"
 #include "bifrost_compile.h"
 
-static panfrost_program *
+static void
 compile_shader(char **argv, bool vertex_only)
 {
         struct gl_shader_program *prog;
@@ -53,7 +53,10 @@ compile_shader(char **argv, bool vertex_only)
         prog = standalone_compile_shader(&options, 2, argv, &local_ctx);
         prog->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program->info.stage = MESA_SHADER_FRAGMENT;
 
-        panfrost_program *compiled;
+        struct util_dynarray binary;
+
+        util_dynarray_init(&binary, NULL);
+
         for (unsigned i = 0; i < 2; ++i) {
                 nir[i] = glsl_to_nir(&local_ctx, prog, shader_types[i], &bifrost_nir_options);
                 NIR_PASS_V(nir[i], nir_lower_global_vars_to_local);
@@ -70,14 +73,16 @@ compile_shader(char **argv, bool vertex_only)
                 struct panfrost_compile_inputs inputs = {
                         .gpu_id = 0x7212, /* Mali G52 */
                 };
+                struct pan_shader_info info;
 
-                compiled = bifrost_compile_shader_nir(NULL, nir[i], &inputs);
+                util_dynarray_clear(&binary);
+                bifrost_compile_shader_nir(nir[i], &inputs, &binary, &info);
 
                 if (vertex_only)
-                        return compiled;
+                        break;
         }
 
-        return compiled;
+        util_dynarray_fini(&binary);
 }
 
 #define BI_FOURCC(ch0, ch1, ch2, ch3) ( \
diff --git a/src/panfrost/bifrost/compiler.h b/src/panfrost/bifrost/compiler.h
index 37de7ccdb27..0c35920cd7e 100644
--- a/src/panfrost/bifrost/compiler.h
+++ b/src/panfrost/bifrost/compiler.h
@@ -496,17 +496,12 @@ typedef struct bi_block {
 typedef struct {
        const struct panfrost_compile_inputs *inputs;
        nir_shader *nir;
+       struct pan_shader_info *info;
        gl_shader_stage stage;
        struct list_head blocks; /* list of bi_block */
-       struct panfrost_sysvals sysvals;
        struct hash_table_u64 *sysval_to_id;
-       struct panfrost_ubo_push *push;
        uint32_t quirks;
        unsigned arch;
-       unsigned tls_size;
-
-       /* Blend return offsets */
-       uint32_t blend_ret_offsets[8];
 
        /* During NIR->BIR */
        bi_block *current_block;
@@ -514,7 +509,6 @@ typedef struct {
        bi_block *break_block;
        bi_block *continue_block;
        bool emitted_atest;
-       nir_alu_type *blend_types;
 
        /* For creating temporaries */
        unsigned ssa_alloc;
diff --git a/src/panfrost/lib/pan_blit.c b/src/panfrost/lib/pan_blit.c
index ac2ea8f1455..99fed039e77 100644
--- a/src/panfrost/lib/pan_blit.c
+++ b/src/panfrost/lib/pan_blit.c
@@ -43,11 +43,13 @@
  * This is primarily designed as a fallback for preloads but could be extended
  * for other clears/blits if needed in the future. */
 
-static panfrost_program *
+static void
 panfrost_build_blit_shader(struct panfrost_device *dev,
                            gl_frag_result loc,
                            nir_alu_type T,
-                           bool ms)
+                           bool ms,
+                           struct util_dynarray *binary,
+                           struct pan_shader_info *info)
 {
         bool is_colour = loc >= FRAG_RESULT_DATA0;
 
@@ -110,11 +112,9 @@ panfrost_build_blit_shader(struct panfrost_device *dev,
                 .is_blit = true,
         };
 
-        panfrost_program *program =
-                pan_shader_compile(dev, NULL, shader, &inputs);
+        pan_shader_compile(dev, shader, &inputs, binary, info);
 
         ralloc_free(shader);
-        return program;
 }
 
 /* Compile and upload all possible blit shaders ahead-of-time to reduce draw
@@ -162,6 +162,9 @@ panfrost_init_blit_shaders(struct panfrost_device *dev)
         /* Don't bother generating multisampling variants if we don't actually
          * support multisampling */
         bool has_ms = !(dev->quirks & MIDGARD_SFBD);
+        struct util_dynarray binary;
+
+        util_dynarray_init(&binary, NULL);
 
         for (unsigned ms = 0; ms <= has_ms; ++ms) {
                 for (unsigned i = 0; i < ARRAY_SIZE(shader_descs); ++i) {
@@ -172,27 +175,38 @@ panfrost_init_blit_shaders(struct panfrost_device *dev)
                                         continue;
 
                                 struct pan_blit_shader *shader = &dev->blit_shaders.loads[loc][T][ms];
-                                panfrost_program *program =
-                                        panfrost_build_blit_shader(dev, loc,
-                                                                   nir_types[T], ms);
+                                struct pan_shader_info info;
 
-                                assert(offset + program->compiled.size < total_size);
+                                util_dynarray_clear(&binary);
+                                panfrost_build_blit_shader(dev, loc,
+                                                           nir_types[T], ms,
+                                                           &binary, &info);
+
+                                assert(offset + binary.size < total_size);
                                 memcpy(dev->blit_shaders.bo->ptr.cpu + offset,
-                                       program->compiled.data, program->compiled.size);
+                                       binary.data, binary.size);
 
-                                shader->shader = (dev->blit_shaders.bo->ptr.gpu + offset) |
-                                                 program->first_tag;
+                                shader->shader = (dev->blit_shaders.bo->ptr.gpu + offset);
+                                if (pan_is_bifrost(dev)) {
+                                        int rt = loc - FRAG_RESULT_DATA0;
+                                        if (rt >= 0 && rt < 8 &&
+                                            info.bifrost.blend[rt].return_offset) {
+                                                shader->blend_ret_addr =
+                                                        shader->shader +
+                                                        info.bifrost.blend[rt].return_offset;
+                                        }
+                                } else {
+                                        shader->shader |= info.midgard.first_tag;
+                                }
 
-                                int rt = loc - FRAG_RESULT_DATA0;
-                                if (rt >= 0 && rt < 8 && program->blend_ret_offsets[rt])
-                                        shader->blend_ret_addr = program->blend_ret_offsets[rt] + shader->shader;
 
-                                offset += ALIGN_POT(program->compiled.size,
+                                offset += ALIGN_POT(binary.size,
                                                     pan_is_bifrost(dev) ? 128 : 64);
-                                ralloc_free(program);
                         }
                 }
         }
+
+        util_dynarray_fini(&binary);
 }
 
 static void
diff --git a/src/panfrost/lib/pan_shader.c b/src/panfrost/lib/pan_shader.c
index e3b13189db2..20e9d43817c 100644
--- a/src/panfrost/lib/pan_shader.c
+++ b/src/panfrost/lib/pan_shader.c
@@ -37,13 +37,196 @@ pan_shader_get_compiler_options(const struct panfrost_device *dev)
         return &midgard_nir_options;
 }
 
-panfrost_program *
-pan_shader_compile(const struct panfrost_device *dev,
-                   void *mem_ctx, nir_shader *nir,
-                   const struct panfrost_compile_inputs *inputs)
+static enum pipe_format
+varying_format(nir_alu_type t, unsigned ncomps)
 {
-        if (pan_is_bifrost(dev))
-                return bifrost_compile_shader_nir(mem_ctx, nir, inputs);
+#define VARYING_FORMAT(ntype, nsz, ptype, psz) \
+        { \
+                .type = nir_type_ ## ntype ## nsz, \
+                .formats = { \
+                        PIPE_FORMAT_R ## psz ## _ ## ptype, \
+                        PIPE_FORMAT_R ## psz ## G ## psz ## _ ## ptype, \
+                        PIPE_FORMAT_R ## psz ## G ## psz ## B ## psz ## _ ## ptype, \
+                        PIPE_FORMAT_R ## psz ## G ## psz ## B ## psz  ## A ## psz ## _ ## ptype, \
+                } \
+        }
 
-        return midgard_compile_shader_nir(mem_ctx, nir, inputs);
+        static const struct {
+                nir_alu_type type;
+                enum pipe_format formats[4];
+        } conv[] = {
+                VARYING_FORMAT(float, 32, FLOAT, 32),
+                VARYING_FORMAT(int, 32, SINT, 32),
+                VARYING_FORMAT(uint, 32, UINT, 32),
+                VARYING_FORMAT(float, 16, FLOAT, 16),
+                VARYING_FORMAT(int, 16, SINT, 16),
+                VARYING_FORMAT(uint, 16, UINT, 16),
+                VARYING_FORMAT(int, 8, SINT, 8),
+                VARYING_FORMAT(uint, 8, UINT, 8),
+                VARYING_FORMAT(bool, 32, UINT, 32),
+                VARYING_FORMAT(bool, 16, UINT, 16),
+                VARYING_FORMAT(bool, 8, UINT, 8),
+                VARYING_FORMAT(bool, 1, UINT, 8),
+        };
+#undef VARYING_FORMAT
+
+        assert(ncomps > 0 && ncomps <= ARRAY_SIZE(conv[0].formats));
+
+        for (unsigned i = 0; i < ARRAY_SIZE(conv); i++) {
+                if (conv[i].type == t)
+                        return conv[i].formats[ncomps - 1];
+        }
+
+        return PIPE_FORMAT_NONE;
+}
+
+static void
+collect_varyings(nir_shader *s, nir_variable_mode varying_mode,
+                 struct pan_shader_varying *varyings,
+                 unsigned *varying_count)
+{
+        *varying_count = 0;
+
+        nir_foreach_variable_with_modes(var, s, varying_mode) {
+                unsigned loc = var->data.driver_location;
+                unsigned sz = glsl_count_attribute_slots(var->type, FALSE);
+                const struct glsl_type *column =
+                        glsl_without_array_or_matrix(var->type);
+                unsigned chan = glsl_get_components(column);
+                enum glsl_base_type base_type = glsl_get_base_type(column);
+
+                /* If we have a fractional location added, we need to increase the size
+                 * so it will fit, i.e. a vec3 in YZW requires us to allocate a vec4.
+                 * We could do better but this is an edge case as it is, normally
+                 * packed varyings will be aligned.
+                 */
+                chan += var->data.location_frac;
+                assert(chan >= 1 && chan <= 4);
+
+                nir_alu_type type = nir_get_nir_type_for_glsl_base_type(base_type);
+
+                type = nir_alu_type_get_base_type(type);
+
+                /* Demote to fp16 where possible. int16 varyings are TODO as the hw
+                 * will saturate instead of wrap which is not conformant, so we need to
+                 * insert i2i16/u2u16 instructions before the st_vary_32i/32u to get
+                 * the intended behaviour.
+                 */
+                if (type == nir_type_float &&
+                    (var->data.precision == GLSL_PRECISION_MEDIUM ||
+                     var->data.precision == GLSL_PRECISION_LOW)) {
+                        type |= 16;
+                } else {
+                        type |= 32;
+                }
+
+                enum pipe_format format = varying_format(type, chan);
+                assert(format != PIPE_FORMAT_NONE);
+
+                for (int c = 0; c < sz; ++c) {
+                        varyings[loc + c].location = var->data.location + c;
+                        varyings[loc + c].format = format;
+                }
+
+                *varying_count = MAX2(*varying_count, loc + sz);
+        }
+}
+
+void
+pan_shader_compile(const struct panfrost_device *dev,
+                   nir_shader *s,
+                   const struct panfrost_compile_inputs *inputs,
+                   struct util_dynarray *binary,
+                   struct pan_shader_info *info)
+{
+        memset(info, 0, sizeof(*info));
+
+        if (pan_is_bifrost(dev))
+                bifrost_compile_shader_nir(s, inputs, binary, info);
+        else
+                midgard_compile_shader_nir(s, inputs, binary, info);
+
+        info->stage = s->info.stage;
+        info->contains_barrier = s->info.uses_memory_barrier ||
+                                 s->info.uses_control_barrier;
+
+        switch (info->stage) {
+        case MESA_SHADER_VERTEX:
+                info->attribute_count = util_bitcount64(s->info.inputs_read);
+
+                bool vertex_id = BITSET_TEST(s->info.system_values_read,
+                                             SYSTEM_VALUE_VERTEX_ID);
+                if (vertex_id)
+                        info->attribute_count = MAX2(info->attribute_count, PAN_VERTEX_ID + 1);
+
+                bool instance_id = BITSET_TEST(s->info.system_values_read,
+                                               SYSTEM_VALUE_INSTANCE_ID);
+                if (instance_id)
+                        info->attribute_count = MAX2(info->attribute_count, PAN_INSTANCE_ID + 1);
+
+                info->vs.writes_point_size =
+                        s->info.outputs_written & (1 << VARYING_SLOT_PSIZ);
+                collect_varyings(s, nir_var_shader_out, info->varyings.output,
+                                 &info->varyings.output_count);
+                break;
+        case MESA_SHADER_FRAGMENT:
+                if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
+                        info->fs.writes_depth = true;
+                if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL))
+                        info->fs.writes_stencil = true;
+
+                uint64_t outputs_read = s->info.outputs_read;
+                if (outputs_read & BITFIELD64_BIT(FRAG_RESULT_COLOR))
+                        outputs_read |= BITFIELD64_BIT(FRAG_RESULT_DATA0);
+
+                info->fs.outputs_read = outputs_read >> FRAG_RESULT_DATA0;
+
+                /* EXT_shader_framebuffer_fetch requires per-sample */
+                info->fs.sample_shading = s->info.fs.uses_sample_shading ||
+                                          outputs_read;
+
+                info->fs.can_discard = s->info.fs.uses_discard;
+                info->fs.helper_invocations = s->info.fs.needs_quad_helper_invocations;
+
+                /* List of reasons we need to execute frag shaders when things
+                 * are masked off */
+
+                info->fs.sidefx = s->info.writes_memory ||
+                                  s->info.fs.uses_discard ||
+                                  s->info.fs.uses_demote;
+                info->fs.reads_frag_coord =
+                        (s->info.inputs_read & (1 << VARYING_SLOT_POS)) ||
+                        BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_FRAG_COORD);
+                info->fs.reads_point_coord =
+                        s->info.inputs_read & (1 << VARYING_SLOT_PNTC);
+                info->fs.reads_face =
+                        (s->info.inputs_read & (1 << VARYING_SLOT_FACE)) ||
+                        BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_FRONT_FACE);
+                info->fs.reads_sample_id =
+                        BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID);
+                info->fs.reads_sample_pos =
+                        BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS);
+                info->fs.reads_sample_mask_in =
+                        BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN);
+                info->fs.reads_helper_invocation =
+                        BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_HELPER_INVOCATION);
+                collect_varyings(s, nir_var_shader_in, info->varyings.input,
+                                 &info->varyings.input_count);
+                break;
+        case MESA_SHADER_COMPUTE:
+                info->wls_size = s->info.cs.shared_size;
+                break;
+        default:
+                unreachable("Unknown shader state");
+        }
+
+        info->outputs_written = s->info.outputs_written;
+
+        /* Sysvals have dedicated UBO */
+        info->ubo_count = s->info.num_ubos + (info->sysvals.sysval_count ? 1 : 0);
+
+        info->attribute_count += util_bitcount(s->info.images_used);
+        info->writes_global = s->info.writes_memory;
+
+        info->texture_count = s->info.num_textures;
 }
diff --git a/src/panfrost/lib/pan_shader.h b/src/panfrost/lib/pan_shader.h
index 7b3501da444..b67d5a2e596 100644
--- a/src/panfrost/lib/pan_shader.h
+++ b/src/panfrost/lib/pan_shader.h
@@ -33,9 +33,11 @@ struct panfrost_device;
 const nir_shader_compiler_options *
 pan_shader_get_compiler_options(const struct panfrost_device *dev);
 
-panfrost_program *
+void
 pan_shader_compile(const struct panfrost_device *dev,
-                   void *mem_ctx, nir_shader *nir,
-                   const struct panfrost_compile_inputs *inputs);
+                   nir_shader *nir,
+                   const struct panfrost_compile_inputs *inputs,
+                   struct util_dynarray *binary,
+                   struct pan_shader_info *info);
 
 #endif
diff --git a/src/panfrost/midgard/compiler.h b/src/panfrost/midgard/compiler.h
index fdb8160d203..f804ee0f53c 100644
--- a/src/panfrost/midgard/compiler.h
+++ b/src/panfrost/midgard/compiler.h
@@ -238,6 +238,7 @@ enum midgard_rt_id {
 typedef struct compiler_context {
         const struct panfrost_compile_inputs *inputs;
         nir_shader *nir;
+        struct pan_shader_info *info;
         gl_shader_stage stage;
 
         /* Number of samples for a keyed blend shader. Depends on is_blend */
@@ -249,9 +250,6 @@ typedef struct compiler_context {
         /* Index to precolour to r2 for a dual-source blend colour */
         unsigned blend_src1;
 
-        /* Number of bytes used for Thread Local Storage */
-        unsigned tls_size;
-
         /* Count of spills and fills for shaderdb */
         unsigned spills;
         unsigned fills;
@@ -291,10 +289,6 @@ typedef struct compiler_context {
         /* Set of NIR indices that were already emitted as outmods */
         BITSET_WORD *already_emitted;
 
-        /* Just the count of the max register used. Higher count => higher
-         * register pressure */
-        int work_registers;
-
         /* The number of uniforms allowable for the fast path */
         int uniform_cutoff;
 
@@ -312,9 +306,7 @@ typedef struct compiler_context {
         /* Writeout instructions for each render target */
         midgard_instruction *writeout_branch[MIDGARD_NUM_RTS][MIDGARD_MAX_SAMPLE_ITER];
 
-        struct panfrost_sysvals sysvals;
         struct hash_table_u64 *sysval_to_id;
-        struct panfrost_ubo_push *push;
 } compiler_context;
 
 /* Per-block live_in/live_out */
diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c
index 16617e0b8fc..7f6c18a26fc 100644
--- a/src/panfrost/midgard/midgard_compile.c
+++ b/src/panfrost/midgard/midgard_compile.c
@@ -1448,7 +1448,7 @@ emit_sysval_read(compiler_context *ctx, nir_instr *instr,
         int sysval = panfrost_sysval_for_instr(instr, &nir_dest);
         unsigned dest = nir_dest_index(&nir_dest);
         unsigned uniform =
-                pan_lookup_sysval(ctx->sysval_to_id, &ctx->sysvals, sysval);
+                pan_lookup_sysval(ctx->sysval_to_id, &ctx->info->sysvals, sysval);
 
         /* Emit the read itself -- this is never indirect */
         midgard_instruction *ins =
@@ -2978,24 +2978,22 @@ mir_add_writeout_loops(compiler_context *ctx)
         }
 }
 
-panfrost_program *
-midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
-                           const struct panfrost_compile_inputs *inputs)
+void
+midgard_compile_shader_nir(nir_shader *nir,
+                           const struct panfrost_compile_inputs *inputs,
+                           struct util_dynarray *binary,
+                           struct pan_shader_info *info)
 {
-        panfrost_program *program = rzalloc(mem_ctx, panfrost_program);
-
-        struct util_dynarray *compiled = &program->compiled;
-
         midgard_debug = debug_get_option_midgard_debug();
 
         /* TODO: Bound against what? */
         compiler_context *ctx = rzalloc(NULL, compiler_context);
-        ctx->sysval_to_id = panfrost_init_sysvals(&ctx->sysvals, ctx);
+        ctx->sysval_to_id = panfrost_init_sysvals(&info->sysvals, ctx);
 
         ctx->inputs = inputs;
         ctx->nir = nir;
+        ctx->info = info;
         ctx->stage = nir->info.stage;
-        ctx->push = &program->push;
 
         if (inputs->is_blend) {
                 unsigned nr_samples = MAX2(inputs->blend.nr_samples, 1);
@@ -3013,7 +3011,7 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
         /* Start off with a safe cutoff, allowing usage of all 16 work
          * registers. Later, we'll promote uniform reads to uniform registers
          * if we determine it is beneficial to do so */
-        ctx->uniform_cutoff = 8;
+        info->midgard.uniform_cutoff = 8;
 
         /* Initialize at a global (not block) level hash tables */
 
@@ -3059,7 +3057,7 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
                 nir_print_shader(nir, stdout);
         }
 
-        ctx->tls_size = nir->scratch_size;
+        info->tls_size = nir->scratch_size;
 
         nir_foreach_function(func, nir) {
                 if (!func->impl)
@@ -3086,8 +3084,6 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
                 break; /* TODO: Multi-function shaders */
         }
 
-        util_dynarray_init(compiled, program);
-
         /* Per-block lowering before opts */
 
         mir_foreach_block(ctx, _block) {
@@ -3164,7 +3160,7 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
                         if (!bundle->last_writeout && (current_bundle + 1 < bundle_count))
                                 lookahead = source_order_bundles[current_bundle + 1]->tag;
 
-                        emit_binary_bundle(ctx, block, bundle, compiled, lookahead);
+                        emit_binary_bundle(ctx, block, bundle, binary, lookahead);
                         ++current_bundle;
                 }
 
@@ -3175,20 +3171,11 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
         free(source_order_bundles);
 
         /* Report the very first tag executed */
-        program->first_tag = midgard_get_first_tag_from_block(ctx, 0);
-
-        /* Deal with off-by-one related to the fencepost problem */
-        program->work_register_count = ctx->work_registers + 1;
-        program->uniform_cutoff = ctx->uniform_cutoff;
-
-        program->tls_size = ctx->tls_size;
-
-        program->sysval_count = ctx->sysvals.sysval_count;
-        memcpy(program->sysvals, ctx->sysvals.sysvals, sizeof(ctx->sysvals.sysvals[0]) * ctx->sysvals.sysval_count);
+        info->midgard.first_tag = midgard_get_first_tag_from_block(ctx, 0);
 
         if ((midgard_debug & MIDGARD_DBG_SHADERS) && !nir->info.internal) {
-                disassemble_midgard(stdout, program->compiled.data,
-                                    program->compiled.size, inputs->gpu_id);
+                disassemble_midgard(stdout, binary->data,
+                                    binary->size, inputs->gpu_id);
         }
 
         if ((midgard_debug & MIDGARD_DBG_SHADERDB || inputs->shaderdb) &&
@@ -3209,7 +3196,7 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
                 /* Calculate thread count. There are certain cutoffs by
                  * register count for thread count */
 
-                unsigned nr_registers = program->work_register_count;
+                unsigned nr_registers = info->work_reg_count;
 
                 unsigned nr_threads =
                         (nr_registers <= 4) ? 4 :
@@ -3232,6 +3219,4 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
         }
 
         ralloc_free(ctx);
-
-        return program;
 }
diff --git a/src/panfrost/midgard/midgard_compile.h b/src/panfrost/midgard/midgard_compile.h
index 00d43a64e90..f049fbabb6b 100644
--- a/src/panfrost/midgard/midgard_compile.h
+++ b/src/panfrost/midgard/midgard_compile.h
@@ -29,9 +29,11 @@
 #include "util/u_dynarray.h"
 #include "panfrost/util/pan_ir.h"
 
-panfrost_program *
-midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
-                           const struct panfrost_compile_inputs *inputs);
+void
+midgard_compile_shader_nir(nir_shader *nir,
+                           const struct panfrost_compile_inputs *inputs,
+                           struct util_dynarray *binary,
+                           struct pan_shader_info *info);
 
 /* NIR options are shared between the standalone compiler and the online
  * compiler. Defining it here is the simplest, though maybe not the Right
diff --git a/src/panfrost/midgard/midgard_ra.c b/src/panfrost/midgard/midgard_ra.c
index 37cecb1c339..44b3c7dc1c6 100644
--- a/src/panfrost/midgard/midgard_ra.c
+++ b/src/panfrost/midgard/midgard_ra.c
@@ -99,7 +99,7 @@ index_to_reg(compiler_context *ctx, struct lcra_state *l, unsigned reg, unsigned
         /* Report that we actually use this register, and return it */
 
         if (r.reg < 16)
-                ctx->work_registers = MAX2(ctx->work_registers, r.reg);
+                ctx->info->work_reg_count = MAX2(ctx->info->work_reg_count, r.reg + 1);
 
         return r;
 }
@@ -395,7 +395,7 @@ allocate_registers(compiler_context *ctx, bool *spilled)
          * uniforms start and the shader stage. By ABI we limit blend shaders
          * to 8 registers, should be lower XXX */
         int work_count = ctx->inputs->is_blend ? 8 :
-                16 - MAX2((ctx->uniform_cutoff - 8), 0);
+                16 - MAX2((ctx->info->midgard.uniform_cutoff - 8), 0);
 
        /* No register allocation to do with no SSA */
 
@@ -646,7 +646,7 @@ allocate_registers(compiler_context *ctx, bool *spilled)
         if (ctx->blend_src1 != ~0) {
                 assert(ctx->blend_src1 < ctx->temp_count);
                 l->solutions[ctx->blend_src1] = (16 * 2);
-                ctx->work_registers = MAX2(ctx->work_registers, 2);
+                ctx->info->work_reg_count = MAX2(ctx->info->work_reg_count, 3);
         }
 
         mir_compute_interference(ctx, l);
@@ -959,13 +959,14 @@ mir_spill_register(
 static void
 mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff)
 {
-        unsigned old_work_count = 16 - MAX2((ctx->uniform_cutoff - 8), 0);
+        unsigned old_work_count =
+                16 - MAX2((ctx->info->midgard.uniform_cutoff - 8), 0);
         unsigned work_count = 16 - MAX2((new_cutoff - 8), 0);
 
         unsigned min_demote = SSA_FIXED_REGISTER(old_work_count);
         unsigned max_demote = SSA_FIXED_REGISTER(work_count);
 
-        ctx->uniform_cutoff = new_cutoff;
+        ctx->info->midgard.uniform_cutoff = new_cutoff;
 
         mir_foreach_block(ctx, _block) {
                 midgard_block *block = (midgard_block *) _block;
@@ -978,7 +979,7 @@ mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff)
 
                                 unsigned temp = make_compiler_temp(ctx);
                                 unsigned idx = (23 - SSA_REG_FROM_FIXED(ins->src[i])) * 4;
-                                assert(idx < ctx->push->count);
+                                assert(idx < ctx->info->push.count);
 
                                 midgard_instruction ld = {
                                         .type = TAG_LOAD_STORE_4,
@@ -989,10 +990,10 @@ mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff)
                                         .swizzle = SWIZZLE_IDENTITY_4,
                                         .op = midgard_op_ld_ubo_int4,
                                         .load_store = {
-                                                .arg_1 = ctx->push->words[idx].ubo,
+                                                .arg_1 = ctx->info->push.words[idx].ubo,
                                                 .arg_2 = 0x1E,
                                         },
-                                        .constants.u32[0] = ctx->push->words[idx].offset
+                                        .constants.u32[0] = ctx->info->push.words[idx].offset
                                 };
 
                                 mir_insert_instruction_before_scheduled(ctx, block, before, ld);
@@ -1013,7 +1014,7 @@ mir_ra(compiler_context *ctx)
         int iter_count = 1000; /* max iterations */
 
         /* Number of 128-bit slots in memory we've spilled into */
-        unsigned spill_count = DIV_ROUND_UP(ctx->tls_size, 16);
+        unsigned spill_count = DIV_ROUND_UP(ctx->info->tls_size, 16);
 
 
         mir_create_pipeline_registers(ctx);
@@ -1025,9 +1026,9 @@ mir_ra(compiler_context *ctx)
                         /* It's a lot cheaper to demote uniforms to get more
                          * work registers than to spill to TLS. */
                         if (l->spill_class == REG_CLASS_WORK &&
-                            ctx->uniform_cutoff > 8) {
+                            ctx->info->midgard.uniform_cutoff > 8) {
 
-                                mir_demote_uniforms(ctx, MAX2(ctx->uniform_cutoff - 4, 8));
+                                mir_demote_uniforms(ctx, MAX2(ctx->info->midgard.uniform_cutoff - 4, 8));
                         } else if (spill_node == -1) {
                                 fprintf(stderr, "ERROR: Failed to choose spill node\n");
                                 lcra_free(l);
@@ -1056,7 +1057,7 @@ mir_ra(compiler_context *ctx)
         /* Report spilling information. spill_count is in 128-bit slots (vec4 x
          * fp32), but tls_size is in bytes, so multiply by 16 */
 
-        ctx->tls_size = spill_count * 16;
+        ctx->info->tls_size = spill_count * 16;
 
         install_registers(ctx, l);
 
diff --git a/src/panfrost/midgard/mir_promote_uniforms.c b/src/panfrost/midgard/mir_promote_uniforms.c
index b5e063e0600..744d88e540e 100644
--- a/src/panfrost/midgard/mir_promote_uniforms.c
+++ b/src/panfrost/midgard/mir_promote_uniforms.c
@@ -263,7 +263,7 @@ midgard_promote_uniforms(compiler_context *ctx)
         unsigned work_count = mir_work_heuristic(ctx, &analysis);
         unsigned promoted_count = 24 - work_count;
 
-        mir_pick_ubo(ctx->push, &analysis, promoted_count);
+        mir_pick_ubo(&ctx->info->push, &analysis, promoted_count);
 
         /* First, figure out special indices a priori so we don't recompute a lot */
         BITSET_WORD *special = mir_special_indices(ctx);
@@ -279,7 +279,7 @@ midgard_promote_uniforms(compiler_context *ctx)
                 if (!BITSET_TEST(analysis.blocks[ubo].pushed, qword)) continue;
 
                 /* Find where we pushed to, TODO: unaligned pushes to pack */
-                unsigned base = pan_lookup_pushed_ubo(ctx->push, ubo, qword * 16);
+                unsigned base = pan_lookup_pushed_ubo(&ctx->info->push, ubo, qword * 16);
                 assert((base & 0x3) == 0);
 
                 unsigned address = base / 4;
@@ -288,7 +288,8 @@ midgard_promote_uniforms(compiler_context *ctx)
                 /* Should've taken into account when pushing */
                 assert(address < promoted_count);
 
-                ctx->uniform_cutoff = MAX2(ctx->uniform_cutoff, address + 1);
+                ctx->info->midgard.uniform_cutoff =
+                        MAX2(ctx->info->midgard.uniform_cutoff, address + 1);
                 unsigned promoted = SSA_FIXED_REGISTER(uniform_reg);
 
                 /* We do need the move for safety for a non-SSA dest, or if
diff --git a/src/panfrost/util/pan_ir.h b/src/panfrost/util/pan_ir.h
index a48b3e92c97..1c094793f6d 100644
--- a/src/panfrost/util/pan_ir.h
+++ b/src/panfrost/util/pan_ir.h
@@ -115,40 +115,6 @@ pan_lookup_sysval(struct hash_table_u64 *sysval_to_id,
 int
 panfrost_sysval_for_instr(nir_instr *instr, nir_dest *dest);
 
-typedef struct {
-        int work_register_count;
-        int uniform_cutoff;
-
-        /* For Bifrost - output type for each RT */
-        nir_alu_type blend_types[8];
-
-        /* For Bifrost - return address for blend instructions */
-        uint32_t blend_ret_offsets[8];
-
-        /* Prepended before uniforms, mapping to SYSVAL_ names for the
-         * sysval */
-
-        unsigned sysval_count;
-        unsigned sysvals[MAX_SYSVAL_COUNT];
-
-        /* UBOs to push to Register Mapped Uniforms (Midgard) or Fast Access
-         * Uniforms (Bifrost) */
-        struct panfrost_ubo_push push;
-
-        int first_tag;
-
-        struct util_dynarray compiled;
-
-        /* The number of bytes to allocate per-thread for Thread Local Storage
-         * (register spilling), or zero if no spilling is used */
-        unsigned tls_size;
-
-        /* For Bifrost, should the program wait on dependency slots 6/7 before
-         * starting? For ATEST/BLEND in the first clause, which can occur with
-         * extremely simple shaders */
-        bool wait_6, wait_7;
-} panfrost_program;
-
 struct panfrost_compile_inputs {
         unsigned gpu_id;
         bool is_blend, is_blit;
@@ -163,6 +129,82 @@ struct panfrost_compile_inputs {
         enum pipe_format rt_formats[8];
 };
 
+struct pan_shader_varying {
+        gl_varying_slot location;
+        enum pipe_format format;
+};
+
+struct bifrost_shader_blend_info {
+        nir_alu_type type;
+        uint32_t return_offset;
+};
+
+struct bifrost_shader_info {
+        struct bifrost_shader_blend_info blend[8];
+        bool wait_6, wait_7;
+};
+
+struct midgard_shader_info {
+        unsigned uniform_cutoff;
+        unsigned first_tag;
+};
+
+struct pan_shader_info {
+        gl_shader_stage stage;
+        unsigned work_reg_count;
+        unsigned tls_size;
+        unsigned wls_size;
+
+        union {
+                struct {
+                        bool reads_frag_coord;
+                        bool reads_point_coord;
+                        bool reads_face;
+                        bool helper_invocations;
+                        bool can_discard;
+                        bool writes_depth;
+                        bool writes_stencil;
+                        bool sidefx;
+                        bool reads_sample_id;
+                        bool reads_sample_pos;
+                        bool reads_sample_mask_in;
+                        bool reads_helper_invocation;
+                        bool sample_shading;
+                        BITSET_WORD outputs_read;
+                } fs;
+
+                struct {
+                        bool writes_point_size;
+                } vs;
+        };
+
+        bool contains_barrier;
+        bool writes_global;
+        uint64_t outputs_written;
+
+        unsigned texture_count;
+        unsigned ubo_count;
+        unsigned attribute_count;
+
+        struct {
+                unsigned input_count;
+                struct pan_shader_varying input[MAX_VARYING];
+                unsigned output_count;
+                struct pan_shader_varying output[MAX_VARYING];
+        } varyings;
+
+        struct panfrost_sysvals sysvals;
+
+        /* UBOs to push to Register Mapped Uniforms (Midgard) or Fast Access
+         * Uniforms (Bifrost) */
+        struct panfrost_ubo_push push;
+
+        union {
+                struct bifrost_shader_info bifrost;
+                struct midgard_shader_info midgard;
+        };
+};
+
 typedef struct pan_block {
         /* Link to next block. Must be first for mir_get_block */
         struct list_head link;