diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c index 1751ab8a6c9..fd2b8eab9cc 100644 --- a/src/freedreno/ir3/ir3.c +++ b/src/freedreno/ir3/ir3.c @@ -80,7 +80,6 @@ static uint32_t reg(struct ir3_register *reg, struct ir3_info *info, uint32_t repeat, uint32_t valid_flags) { struct ir3_shader_variant *v = info->data; - bool mergedregs = v->shader->compiler->gpu_id >= 600; reg_t val = { .dummy32 = 0 }; if (reg->flags & ~valid_flags) { @@ -114,7 +113,7 @@ static uint32_t reg(struct ir3_register *reg, struct ir3_info *info, /* ignore writes to dummy register r63.x */ } else if (max < regid(48, 0)) { if (reg->flags & IR3_REG_HALF) { - if (mergedregs) { + if (v->mergedregs) { /* starting w/ a6xx, half regs conflict with full regs: */ info->max_reg = MAX2(info->max_reg, max >> 3); } else { diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c index 9b5dae7b39e..1f9a94d0f98 100644 --- a/src/freedreno/ir3/ir3_legalize.c +++ b/src/freedreno/ir3/ir3_legalize.c @@ -88,7 +88,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) struct ir3_legalize_state *state = &bd->state; bool last_input_needs_ss = false; bool has_tex_prefetch = false; - bool mergedregs = ctx->compiler->gpu_id >= 600; + bool mergedregs = ctx->so->mergedregs; /* our input state is the OR of all predecessor blocks' state: */ set_foreach(block->predecessors, entry) { @@ -711,7 +711,7 @@ bool ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary) { struct ir3_legalize_ctx *ctx = rzalloc(ir, struct ir3_legalize_ctx); - bool mergedregs = ctx->compiler->gpu_id >= 600; + bool mergedregs = so->mergedregs; bool progress; ctx->so = so; diff --git a/src/freedreno/ir3/ir3_postsched.c b/src/freedreno/ir3/ir3_postsched.c index ad16526cd50..ad2c9a6c529 100644 --- a/src/freedreno/ir3/ir3_postsched.c +++ b/src/freedreno/ir3/ir3_postsched.c @@ -462,7 +462,7 @@ calculate_forward_deps(struct ir3_postsched_ctx *ctx) struct ir3_postsched_deps_state state = { .ctx = ctx, .direction = F, - .merged = ctx->ir->compiler->gpu_id >= 600, + .merged = ctx->v->mergedregs, }; foreach_instr (instr, &ctx->unscheduled_list) { @@ -476,7 +476,7 @@ calculate_reverse_deps(struct ir3_postsched_ctx *ctx) struct ir3_postsched_deps_state state = { .ctx = ctx, .direction = R, - .merged = ctx->ir->compiler->gpu_id >= 600, + .merged = ctx->v->mergedregs, }; foreach_instr_rev (instr, &ctx->unscheduled_list) { diff --git a/src/freedreno/ir3/ir3_ra.c b/src/freedreno/ir3/ir3_ra.c index 7812a5b7026..c92436f8b21 100644 --- a/src/freedreno/ir3/ir3_ra.c +++ b/src/freedreno/ir3/ir3_ra.c @@ -572,7 +572,7 @@ ra_init(struct ir3_ra_ctx *ctx) ctx->use = rzalloc_array(ctx->g, unsigned, ctx->alloc_count); /* TODO add selector callback for split (pre-a6xx) register file: */ - if (ctx->ir->compiler->gpu_id >= 600) { + if (ctx->v->mergedregs) { ra_set_select_reg_callback(ctx->g, ra_select_reg_merged, ctx); if (ctx->scalar_pass) { @@ -1488,7 +1488,7 @@ ir3_ra_pass(struct ir3_shader_variant *v, struct ir3_instruction **precolor, struct ir3_ra_ctx ctx = { .v = v, .ir = v->ir, - .set = (v->ir->compiler->gpu_id >= 600) ? + .set = v->mergedregs ? v->ir->compiler->mergedregs_set : v->ir->compiler->set, .scalar_pass = scalar_pass, }; diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c index a2ca295845e..aa699463012 100644 --- a/src/freedreno/ir3/ir3_shader.c +++ b/src/freedreno/ir3/ir3_shader.c @@ -64,7 +64,7 @@ delete_variant(struct ir3_shader_variant *v) * the reg off. */ static void -fixup_regfootprint(struct ir3_shader_variant *v, uint32_t gpu_id) +fixup_regfootprint(struct ir3_shader_variant *v) { unsigned i; @@ -86,7 +86,7 @@ fixup_regfootprint(struct ir3_shader_variant *v, uint32_t gpu_id) unsigned n = util_last_bit(v->inputs[i].compmask) - 1; int32_t regid = v->inputs[i].regid + n; if (v->inputs[i].half) { - if (gpu_id < 500) { + if (!v->mergedregs) { v->info.max_half_reg = MAX2(v->info.max_half_reg, regid >> 2); } else { v->info.max_reg = MAX2(v->info.max_reg, regid >> 3); @@ -103,7 +103,7 @@ fixup_regfootprint(struct ir3_shader_variant *v, uint32_t gpu_id) continue; int32_t regid = v->outputs[i].regid + 3; if (v->outputs[i].half) { - if (gpu_id < 500) { + if (!v->mergedregs) { v->info.max_half_reg = MAX2(v->info.max_half_reg, regid >> 2); } else { v->info.max_reg = MAX2(v->info.max_reg, regid >> 3); @@ -117,7 +117,7 @@ fixup_regfootprint(struct ir3_shader_variant *v, uint32_t gpu_id) unsigned n = util_last_bit(v->sampler_prefetch[i].wrmask) - 1; int32_t regid = v->sampler_prefetch[i].dst + n; if (v->sampler_prefetch[i].half_precision) { - if (gpu_id < 500) { + if (!v->mergedregs) { v->info.max_half_reg = MAX2(v->info.max_half_reg, regid >> 2); } else { v->info.max_reg = MAX2(v->info.max_reg, regid >> 3); @@ -152,7 +152,7 @@ void * ir3_shader_assemble(struct ir3_shader_variant *v) */ v->constlen = MAX2(v->constlen, v->info.max_const + 1); - fixup_regfootprint(v, gpu_id); + fixup_regfootprint(v); return bin; } @@ -197,6 +197,26 @@ create_variant(struct ir3_shader *shader, struct ir3_shader_key *key, v->key = *key; v->type = shader->type; + if (shader->compiler->gpu_id >= 600) { + switch (v->type) { + case MESA_SHADER_TESS_CTRL: + case MESA_SHADER_TESS_EVAL: + v->mergedregs = false; + break; + case MESA_SHADER_VERTEX: + case MESA_SHADER_GEOMETRY: + /* For VS/GS, normally do mergedregs, but if there is tess + * we need to not used MERGEDREGS + */ + v->mergedregs = !key->tessellation; + break; + default: + v->mergedregs = true; + } + } else { + v->mergedregs = false; + } + ret = ir3_compile_shader_nir(shader->compiler, v); if (ret) { debug_error("compile failed!"); diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index 472e4ab265b..7e3e34afa5b 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -549,6 +549,9 @@ struct ir3_shader_variant { bool per_samp; + /* Are we using split or merged register file? */ + bool mergedregs; + /* for astc srgb workaround, the number/base of additional * alpha tex states we need, and index of original tex states */