diff --git a/src/panfrost/lib/pan_shader.h b/src/panfrost/lib/pan_shader.h index 43f6733eafc..7d36d47c5cc 100644 --- a/src/panfrost/lib/pan_shader.h +++ b/src/panfrost/lib/pan_shader.h @@ -47,8 +47,10 @@ static inline void pan_shader_prepare_midgard_rsd(const struct pan_shader_info *info, struct MALI_RENDERER_STATE *rsd) { + assert((info->push.count & 3) == 0); + rsd->properties.uniform_buffer_count = info->ubo_count; - rsd->properties.midgard.uniform_count = info->midgard.uniform_cutoff; + rsd->properties.midgard.uniform_count = info->push.count / 4; rsd->properties.midgard.shader_has_side_effects = info->writes_global; rsd->properties.midgard.fp_mode = MALI_FP_MODE_GL_INF_NAN_ALLOWED; diff --git a/src/panfrost/midgard/compiler.h b/src/panfrost/midgard/compiler.h index f804ee0f53c..23ca0e88a26 100644 --- a/src/panfrost/midgard/compiler.h +++ b/src/panfrost/midgard/compiler.h @@ -289,9 +289,6 @@ typedef struct compiler_context { /* Set of NIR indices that were already emitted as outmods */ BITSET_WORD *already_emitted; - /* The number of uniforms allowable for the fast path */ - int uniform_cutoff; - /* Count of instructions emitted from NIR overall, across all blocks */ int instruction_count; diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c index 7f6c18a26fc..38efe98bd25 100644 --- a/src/panfrost/midgard/midgard_compile.c +++ b/src/panfrost/midgard/midgard_compile.c @@ -3008,11 +3008,6 @@ midgard_compile_shader_nir(nir_shader *nir, ctx->blend_src1 = ~0; ctx->quirks = midgard_get_quirks(inputs->gpu_id); - /* Start off with a safe cutoff, allowing usage of all 16 work - * registers. Later, we'll promote uniform reads to uniform registers - * if we determine it is beneficial to do so */ - info->midgard.uniform_cutoff = 8; - /* Initialize at a global (not block) level hash tables */ ctx->ssa_constants = _mesa_hash_table_u64_create(NULL); diff --git a/src/panfrost/midgard/midgard_ra.c b/src/panfrost/midgard/midgard_ra.c index 44b3c7dc1c6..d3757b2e605 100644 --- a/src/panfrost/midgard/midgard_ra.c +++ b/src/panfrost/midgard/midgard_ra.c @@ -391,11 +391,11 @@ mir_is_64(midgard_instruction *ins) static struct lcra_state * allocate_registers(compiler_context *ctx, bool *spilled) { - /* The number of vec4 work registers available depends on when the - * uniforms start and the shader stage. By ABI we limit blend shaders - * to 8 registers, should be lower XXX */ - int work_count = ctx->inputs->is_blend ? 8 : - 16 - MAX2((ctx->info->midgard.uniform_cutoff - 8), 0); + /* The number of vec4 work registers available depends on the number of + * register-mapped uniforms and the shader stage. By ABI we limit blend + * shaders to 8 registers, should be lower XXX */ + unsigned rmu = ctx->info->push.count / 4; + int work_count = ctx->inputs->is_blend ? 8 : 16 - MAX2(rmu - 8, 0); /* No register allocation to do with no SSA */ @@ -959,15 +959,13 @@ mir_spill_register( static void mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff) { - unsigned old_work_count = - 16 - MAX2((ctx->info->midgard.uniform_cutoff - 8), 0); + unsigned uniforms = ctx->info->push.count / 4; + unsigned old_work_count = 16 - MAX2(uniforms - 8, 0); unsigned work_count = 16 - MAX2((new_cutoff - 8), 0); unsigned min_demote = SSA_FIXED_REGISTER(old_work_count); unsigned max_demote = SSA_FIXED_REGISTER(work_count); - ctx->info->midgard.uniform_cutoff = new_cutoff; - mir_foreach_block(ctx, _block) { midgard_block *block = (midgard_block *) _block; mir_foreach_instr_in_block(block, ins) { @@ -1002,6 +1000,8 @@ mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff) } } } + + ctx->info->push.count = MIN2(ctx->info->push.count, new_cutoff * 4); } /* Run register allocation in a loop, spilling until we succeed */ @@ -1022,13 +1022,12 @@ mir_ra(compiler_context *ctx) do { if (spilled) { signed spill_node = mir_choose_spill_node(ctx, l); + unsigned uniforms = ctx->info->push.count / 4; /* It's a lot cheaper to demote uniforms to get more * work registers than to spill to TLS. */ - if (l->spill_class == REG_CLASS_WORK && - ctx->info->midgard.uniform_cutoff > 8) { - - mir_demote_uniforms(ctx, MAX2(ctx->info->midgard.uniform_cutoff - 4, 8)); + if (l->spill_class == REG_CLASS_WORK && uniforms > 8) { + mir_demote_uniforms(ctx, MAX2(uniforms - 4, 8)); } else if (spill_node == -1) { fprintf(stderr, "ERROR: Failed to choose spill node\n"); lcra_free(l); diff --git a/src/panfrost/midgard/mir_promote_uniforms.c b/src/panfrost/midgard/mir_promote_uniforms.c index 744d88e540e..19525efbc59 100644 --- a/src/panfrost/midgard/mir_promote_uniforms.c +++ b/src/panfrost/midgard/mir_promote_uniforms.c @@ -263,7 +263,9 @@ midgard_promote_uniforms(compiler_context *ctx) unsigned work_count = mir_work_heuristic(ctx, &analysis); unsigned promoted_count = 24 - work_count; + /* Ensure we are 16 byte aligned to avoid underallocations */ mir_pick_ubo(&ctx->info->push, &analysis, promoted_count); + ctx->info->push.count = ALIGN_POT(ctx->info->push.count, 4); /* First, figure out special indices a priori so we don't recompute a lot */ BITSET_WORD *special = mir_special_indices(ctx); @@ -287,9 +289,6 @@ midgard_promote_uniforms(compiler_context *ctx) /* Should've taken into account when pushing */ assert(address < promoted_count); - - ctx->info->midgard.uniform_cutoff = - MAX2(ctx->info->midgard.uniform_cutoff, address + 1); unsigned promoted = SSA_FIXED_REGISTER(uniform_reg); /* We do need the move for safety for a non-SSA dest, or if diff --git a/src/panfrost/util/pan_ir.h b/src/panfrost/util/pan_ir.h index 1c094793f6d..b8925a3fdd3 100644 --- a/src/panfrost/util/pan_ir.h +++ b/src/panfrost/util/pan_ir.h @@ -145,7 +145,6 @@ struct bifrost_shader_info { }; struct midgard_shader_info { - unsigned uniform_cutoff; unsigned first_tag; };