freedreno/ir3: make mergedregs a property of the variant

Rather than assuming a6xx+ means mergedregs. We can actually (mostly?) do splitregs on a6xx as well. And GS/DS/HS currently require it, which might be papering over a bug, or might be something to do with how chaining shaders works. At any rate, we should at least be consistent, and not have the compiler thinking we are doing mergedregs when we are actually doing splitregs. Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5458>
2020-06-12 20:48:36 -07:00 · 2020-06-12 20:48:36 -07:00 · 1cc4cf141a
parent c052087038
commit 1cc4cf141a
6 changed files with 35 additions and 13 deletions
--- a/src/freedreno/ir3/ir3.c
+++ b/src/freedreno/ir3/ir3.c
@ -80,7 +80,6 @@ static uint32_t reg(struct ir3_register *reg, struct ir3_info *info,
 		uint32_t repeat, uint32_t valid_flags)
 {
 	struct ir3_shader_variant *v = info->data;
-	bool mergedregs = v->shader->compiler->gpu_id >= 600;
 	reg_t val = { .dummy32 = 0 };

 	if (reg->flags & ~valid_flags) {
@ -114,7 +113,7 @@ static uint32_t reg(struct ir3_register *reg, struct ir3_info *info,
 			/* ignore writes to dummy register r63.x */
 		} else if (max < regid(48, 0)) {
 			if (reg->flags & IR3_REG_HALF) {
-				if (mergedregs) {
+				if (v->mergedregs) {
 					/* starting w/ a6xx, half regs conflict with full regs: */
 					info->max_reg = MAX2(info->max_reg, max >> 3);
 				} else {
--- a/src/freedreno/ir3/ir3_legalize.c
+++ b/src/freedreno/ir3/ir3_legalize.c
@ -88,7 +88,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
 	struct ir3_legalize_state *state = &bd->state;
 	bool last_input_needs_ss = false;
 	bool has_tex_prefetch = false;
-	bool mergedregs = ctx->compiler->gpu_id >= 600;
+	bool mergedregs = ctx->so->mergedregs;

 	/* our input state is the OR of all predecessor blocks' state: */
 	set_foreach(block->predecessors, entry) {
@ -711,7 +711,7 @@ bool
 ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary)
 {
 	struct ir3_legalize_ctx *ctx = rzalloc(ir, struct ir3_legalize_ctx);
-	bool mergedregs = ctx->compiler->gpu_id >= 600;
+	bool mergedregs = so->mergedregs;
 	bool progress;

 	ctx->so = so;
--- a/src/freedreno/ir3/ir3_postsched.c
+++ b/src/freedreno/ir3/ir3_postsched.c
@ -462,7 +462,7 @@ calculate_forward_deps(struct ir3_postsched_ctx *ctx)
 	struct ir3_postsched_deps_state state = {
 			.ctx = ctx,
 			.direction = F,
-			.merged = ctx->ir->compiler->gpu_id >= 600,
+			.merged = ctx->v->mergedregs,
 	};

 	foreach_instr (instr, &ctx->unscheduled_list) {
@ -476,7 +476,7 @@ calculate_reverse_deps(struct ir3_postsched_ctx *ctx)
 	struct ir3_postsched_deps_state state = {
 			.ctx = ctx,
 			.direction = R,
-			.merged = ctx->ir->compiler->gpu_id >= 600,
+			.merged = ctx->v->mergedregs,
 	};

 	foreach_instr_rev (instr, &ctx->unscheduled_list) {
--- a/src/freedreno/ir3/ir3_ra.c
+++ b/src/freedreno/ir3/ir3_ra.c
@ -572,7 +572,7 @@ ra_init(struct ir3_ra_ctx *ctx)
 	ctx->use = rzalloc_array(ctx->g, unsigned, ctx->alloc_count);

 	/* TODO add selector callback for split (pre-a6xx) register file: */
-	if (ctx->ir->compiler->gpu_id >= 600) {
+	if (ctx->v->mergedregs) {
 		ra_set_select_reg_callback(ctx->g, ra_select_reg_merged, ctx);

 		if (ctx->scalar_pass) {
@ -1488,7 +1488,7 @@ ir3_ra_pass(struct ir3_shader_variant *v, struct ir3_instruction **precolor,
 	struct ir3_ra_ctx ctx = {
 			.v = v,
 			.ir = v->ir,
-			.set = (v->ir->compiler->gpu_id >= 600) ?
+			.set = v->mergedregs ?
 				v->ir->compiler->mergedregs_set : v->ir->compiler->set,
 			.scalar_pass = scalar_pass,
 	};
--- a/src/freedreno/ir3/ir3_shader.c
+++ b/src/freedreno/ir3/ir3_shader.c
@ -64,7 +64,7 @@ delete_variant(struct ir3_shader_variant *v)
 * the reg off.
 */
 static void
-fixup_regfootprint(struct ir3_shader_variant *v, uint32_t gpu_id)
+fixup_regfootprint(struct ir3_shader_variant *v)
 {
 	unsigned i;

@ -86,7 +86,7 @@ fixup_regfootprint(struct ir3_shader_variant *v, uint32_t gpu_id)
 			unsigned n = util_last_bit(v->inputs[i].compmask) - 1;
 			int32_t regid = v->inputs[i].regid + n;
 			if (v->inputs[i].half) {
-				if (gpu_id < 500) {
+				if (!v->mergedregs) {
 					v->info.max_half_reg = MAX2(v->info.max_half_reg, regid >> 2);
 				} else {
 					v->info.max_reg = MAX2(v->info.max_reg, regid >> 3);
@ -103,7 +103,7 @@ fixup_regfootprint(struct ir3_shader_variant *v, uint32_t gpu_id)
 			continue;
 		int32_t regid = v->outputs[i].regid + 3;
 		if (v->outputs[i].half) {
-			if (gpu_id < 500) {
+			if (!v->mergedregs) {
 				v->info.max_half_reg = MAX2(v->info.max_half_reg, regid >> 2);
 			} else {
 				v->info.max_reg = MAX2(v->info.max_reg, regid >> 3);
@ -117,7 +117,7 @@ fixup_regfootprint(struct ir3_shader_variant *v, uint32_t gpu_id)
 		unsigned n = util_last_bit(v->sampler_prefetch[i].wrmask) - 1;
 		int32_t regid = v->sampler_prefetch[i].dst + n;
 		if (v->sampler_prefetch[i].half_precision) {
-			if (gpu_id < 500) {
+			if (!v->mergedregs) {
 				v->info.max_half_reg = MAX2(v->info.max_half_reg, regid >> 2);
 			} else {
 				v->info.max_reg = MAX2(v->info.max_reg, regid >> 3);
@ -152,7 +152,7 @@ void * ir3_shader_assemble(struct ir3_shader_variant *v)
 	 */
 	v->constlen = MAX2(v->constlen, v->info.max_const + 1);

-	fixup_regfootprint(v, gpu_id);
+	fixup_regfootprint(v);

 	return bin;
 }
@ -197,6 +197,26 @@ create_variant(struct ir3_shader *shader, struct ir3_shader_key *key,
 	v->key = *key;
 	v->type = shader->type;

+	if (shader->compiler->gpu_id >= 600) {
+		switch (v->type) {
+		case MESA_SHADER_TESS_CTRL:
+		case MESA_SHADER_TESS_EVAL:
+			v->mergedregs = false;
+			break;
+		case MESA_SHADER_VERTEX:
+		case MESA_SHADER_GEOMETRY:
+			/* For VS/GS, normally do mergedregs, but if there is tess
+			 * we need to not used MERGEDREGS
+			 */
+			v->mergedregs = !key->tessellation;
+			break;
+		default:
+			v->mergedregs = true;
+		}
+	} else {
+		v->mergedregs = false;
+	}
+
 	ret = ir3_compile_shader_nir(shader->compiler, v);
 	if (ret) {
 		debug_error("compile failed!");
--- a/src/freedreno/ir3/ir3_shader.h
+++ b/src/freedreno/ir3/ir3_shader.h
@ -549,6 +549,9 @@ struct ir3_shader_variant {

 	bool per_samp;

+	/* Are we using split or merged register file? */
+	bool mergedregs;
+
 	/* for astc srgb workaround, the number/base of additional
 	 * alpha tex states we need, and index of original tex states
 	 */