From f11ced475e341170ae29dda15d0c2b602304e89c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 20 Feb 2017 22:25:17 +0100 Subject: [PATCH] radeonsi/gfx9: add VS prolog support for merged LS-HS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit HS input VGPRs must be reserved. Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeonsi/si_shader.c | 29 ++++++++++++++++-------- src/gallium/drivers/radeonsi/si_shader.h | 2 ++ 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 574244b484c..ecb8ee92839 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -7078,6 +7078,9 @@ static void si_get_vs_prolog_key(const struct tgsi_shader_info *info, key->vs_prolog.num_input_sgprs = num_input_sgprs; key->vs_prolog.last_input = MAX2(1, info->num_inputs) - 1; + if (shader_out->selector->type == PIPE_SHADER_TESS_CTRL) + key->vs_prolog.num_merged_next_stage_vgprs = 2; + /* Set the instanceID flag. */ for (unsigned i = 0; i < info->num_inputs; i++) if (key->vs_prolog.states.instance_divisors[i]) @@ -7861,15 +7864,19 @@ static void si_build_vs_prolog_function(struct si_shader_context *ctx, LLVMTypeRef *params, *returns; LLVMValueRef ret, func; int last_sgpr, num_params, num_returns, i; + unsigned first_vs_vgpr = key->vs_prolog.num_input_sgprs + + key->vs_prolog.num_merged_next_stage_vgprs; + unsigned num_input_vgprs = key->vs_prolog.num_merged_next_stage_vgprs + 4; + unsigned num_all_input_regs = key->vs_prolog.num_input_sgprs + + num_input_vgprs; + unsigned user_sgpr_base = key->vs_prolog.num_merged_next_stage_vgprs ? 8 : 0; - ctx->param_vertex_id = key->vs_prolog.num_input_sgprs; - ctx->param_instance_id = key->vs_prolog.num_input_sgprs + 3; + ctx->param_vertex_id = first_vs_vgpr; + ctx->param_instance_id = first_vs_vgpr + 3; /* 4 preloaded VGPRs + vertex load indices as prolog outputs */ - params = alloca((key->vs_prolog.num_input_sgprs + 4) * - sizeof(LLVMTypeRef)); - returns = alloca((key->vs_prolog.num_input_sgprs + 4 + - key->vs_prolog.last_input + 1) * + params = alloca(num_all_input_regs * sizeof(LLVMTypeRef)); + returns = alloca((num_all_input_regs + key->vs_prolog.last_input + 1) * sizeof(LLVMTypeRef)); num_params = 0; num_returns = 0; @@ -7882,8 +7889,8 @@ static void si_build_vs_prolog_function(struct si_shader_context *ctx, } last_sgpr = num_params - 1; - /* 4 preloaded VGPRs (outputs must be floats) */ - for (i = 0; i < 4; i++) { + /* Preloaded VGPRs (outputs must be floats) */ + for (i = 0; i < num_input_vgprs; i++) { params[num_params++] = ctx->i32; returns[num_returns++] = ctx->f32; } @@ -7905,7 +7912,7 @@ static void si_build_vs_prolog_function(struct si_shader_context *ctx, LLVMValueRef p = LLVMGetParam(func, i); ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, ""); } - for (i = num_params - 4; i < num_params; i++) { + for (; i < num_params; i++) { LLVMValueRef p = LLVMGetParam(func, i); p = LLVMBuildBitCast(gallivm->builder, p, ctx->f32, ""); ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, ""); @@ -7919,13 +7926,15 @@ static void si_build_vs_prolog_function(struct si_shader_context *ctx, if (divisor) { /* InstanceID / Divisor + StartInstance */ index = get_instance_index_for_fetch(ctx, + user_sgpr_base + SI_SGPR_START_INSTANCE, divisor); } else { /* VertexID + BaseVertex */ index = LLVMBuildAdd(gallivm->builder, LLVMGetParam(func, ctx->param_vertex_id), - LLVMGetParam(func, SI_SGPR_BASE_VERTEX), ""); + LLVMGetParam(func, user_sgpr_base + + SI_SGPR_BASE_VERTEX), ""); } index = LLVMBuildBitCast(gallivm->builder, index, ctx->f32, ""); diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 34ba1412071..2d660ac6cc1 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -352,6 +352,8 @@ union si_shader_part_key { struct { struct si_vs_prolog_bits states; unsigned num_input_sgprs:6; + /* For merged stages such as LS-HS, HS input VGPRs are first. */ + unsigned num_merged_next_stage_vgprs:3; unsigned last_input:4; } vs_prolog; struct {