From 1fe73d55e3f92b1dfbd619d46fd892fd169ae796 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 11 Feb 2016 21:09:38 +0100 Subject: [PATCH] radeonsi: move some struct si_shader members to new struct si_shader_info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will be part of shader binaries. Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeonsi/si_shader.c | 100 +++++++++--------- src/gallium/drivers/radeonsi/si_shader.h | 21 ++-- .../drivers/radeonsi/si_state_shaders.c | 18 ++-- 3 files changed, 71 insertions(+), 68 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index c20abfc7cb4..57458ae1381 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -453,7 +453,7 @@ static void declare_input_vs( input_index); } else if (divisor) { /* Build index from instance ID, start instance and divisor */ - ctx->shader->uses_instanceid = true; + ctx->shader->info.uses_instanceid = true; buffer_index = get_instance_index_for_fetch(&ctx->radeon_bld, SI_PARAM_START_INSTANCE, divisor); @@ -1893,8 +1893,8 @@ handle_semantic: case TGSI_SEMANTIC_COLOR: case TGSI_SEMANTIC_BCOLOR: target = V_008DFC_SQ_EXP_PARAM + param_count; - assert(i < ARRAY_SIZE(shader->vs_output_param_offset)); - shader->vs_output_param_offset[i] = param_count; + assert(i < ARRAY_SIZE(shader->info.vs_output_param_offset)); + shader->info.vs_output_param_offset[i] = param_count; param_count++; break; case TGSI_SEMANTIC_CLIPDIST: @@ -1908,8 +1908,8 @@ handle_semantic: case TGSI_SEMANTIC_TEXCOORD: case TGSI_SEMANTIC_GENERIC: target = V_008DFC_SQ_EXP_PARAM + param_count; - assert(i < ARRAY_SIZE(shader->vs_output_param_offset)); - shader->vs_output_param_offset[i] = param_count; + assert(i < ARRAY_SIZE(shader->info.vs_output_param_offset)); + shader->info.vs_output_param_offset[i] = param_count; param_count++; break; default: @@ -1937,7 +1937,7 @@ handle_semantic: } } - shader->nr_param_exports = param_count; + shader->info.nr_param_exports = param_count; /* We need to add the position output manually if it's missing. */ if (!pos_args[0][0]) { @@ -1999,7 +1999,7 @@ handle_semantic: for (i = 0; i < 4; i++) if (pos_args[i][0]) - shader->nr_pos_exports++; + shader->info.nr_pos_exports++; pos_idx = 0; for (i = 0; i < 4; i++) { @@ -2009,7 +2009,7 @@ handle_semantic: /* Specify the target we are exporting */ pos_args[i][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS + pos_idx++); - if (pos_idx == shader->nr_pos_exports) + if (pos_idx == shader->info.nr_pos_exports) /* Specify that this is the last export */ pos_args[i][2] = uint->one; @@ -4061,18 +4061,18 @@ static void create_function(struct si_shader_context *ctx) S_0286D0_POS_FIXED_PT_ENA(1)); } - shader->num_input_sgprs = 0; - shader->num_input_vgprs = 0; + shader->info.num_input_sgprs = 0; + shader->info.num_input_vgprs = 0; for (i = 0; i <= last_sgpr; ++i) - shader->num_input_sgprs += llvm_get_type_size(params[i]) / 4; + shader->info.num_input_sgprs += llvm_get_type_size(params[i]) / 4; /* Unused fragment shader inputs are eliminated by the compiler, * so we don't know yet how many there will be. */ if (ctx->type != TGSI_PROCESSOR_FRAGMENT) for (; i < num_params; ++i) - shader->num_input_vgprs += llvm_get_type_size(params[i]) / 4; + shader->info.num_input_vgprs += llvm_get_type_size(params[i]) / 4; if (bld_base->info && (bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 || @@ -4873,7 +4873,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, si_init_shader_ctx(&ctx, sscreen, shader, tm); ctx.is_monolithic = is_monolithic; - shader->uses_instanceid = sel->info.uses_instanceid; + shader->info.uses_instanceid = sel->info.uses_instanceid; bld_base = &ctx.radeon_bld.soa.bld_base; ctx.radeon_bld.load_system_value = declare_system_value; @@ -4967,43 +4967,43 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, /* Calculate the number of fragment input VGPRs. */ if (ctx.type == TGSI_PROCESSOR_FRAGMENT) { - shader->num_input_vgprs = 0; - shader->face_vgpr_index = -1; + shader->info.num_input_vgprs = 0; + shader->info.face_vgpr_index = -1; if (G_0286CC_PERSP_SAMPLE_ENA(shader->config.spi_ps_input_addr)) - shader->num_input_vgprs += 2; + shader->info.num_input_vgprs += 2; if (G_0286CC_PERSP_CENTER_ENA(shader->config.spi_ps_input_addr)) - shader->num_input_vgprs += 2; + shader->info.num_input_vgprs += 2; if (G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_addr)) - shader->num_input_vgprs += 2; + shader->info.num_input_vgprs += 2; if (G_0286CC_PERSP_PULL_MODEL_ENA(shader->config.spi_ps_input_addr)) - shader->num_input_vgprs += 3; + shader->info.num_input_vgprs += 3; if (G_0286CC_LINEAR_SAMPLE_ENA(shader->config.spi_ps_input_addr)) - shader->num_input_vgprs += 2; + shader->info.num_input_vgprs += 2; if (G_0286CC_LINEAR_CENTER_ENA(shader->config.spi_ps_input_addr)) - shader->num_input_vgprs += 2; + shader->info.num_input_vgprs += 2; if (G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_addr)) - shader->num_input_vgprs += 2; + shader->info.num_input_vgprs += 2; if (G_0286CC_LINE_STIPPLE_TEX_ENA(shader->config.spi_ps_input_addr)) - shader->num_input_vgprs += 1; + shader->info.num_input_vgprs += 1; if (G_0286CC_POS_X_FLOAT_ENA(shader->config.spi_ps_input_addr)) - shader->num_input_vgprs += 1; + shader->info.num_input_vgprs += 1; if (G_0286CC_POS_Y_FLOAT_ENA(shader->config.spi_ps_input_addr)) - shader->num_input_vgprs += 1; + shader->info.num_input_vgprs += 1; if (G_0286CC_POS_Z_FLOAT_ENA(shader->config.spi_ps_input_addr)) - shader->num_input_vgprs += 1; + shader->info.num_input_vgprs += 1; if (G_0286CC_POS_W_FLOAT_ENA(shader->config.spi_ps_input_addr)) - shader->num_input_vgprs += 1; + shader->info.num_input_vgprs += 1; if (G_0286CC_FRONT_FACE_ENA(shader->config.spi_ps_input_addr)) { - shader->face_vgpr_index = shader->num_input_vgprs; - shader->num_input_vgprs += 1; + shader->info.face_vgpr_index = shader->info.num_input_vgprs; + shader->info.num_input_vgprs += 1; } if (G_0286CC_ANCILLARY_ENA(shader->config.spi_ps_input_addr)) - shader->num_input_vgprs += 1; + shader->info.num_input_vgprs += 1; if (G_0286CC_SAMPLE_COVERAGE_ENA(shader->config.spi_ps_input_addr)) - shader->num_input_vgprs += 1; + shader->info.num_input_vgprs += 1; if (G_0286CC_POS_FIXED_PT_ENA(shader->config.spi_ps_input_addr)) - shader->num_input_vgprs += 1; + shader->info.num_input_vgprs += 1; } if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { @@ -5279,11 +5279,11 @@ static bool si_get_vs_epilog(struct si_screen *sscreen, /* Set up the PrimitiveID output. */ if (shader->key.vs.epilog.export_prim_id) { unsigned index = shader->selector->info.num_outputs; - unsigned offset = shader->nr_param_exports++; + unsigned offset = shader->info.nr_param_exports++; epilog_key.vs_epilog.prim_id_param_offset = offset; - assert(index < ARRAY_SIZE(shader->vs_output_param_offset)); - shader->vs_output_param_offset[index] = offset; + assert(index < ARRAY_SIZE(shader->info.vs_output_param_offset)); + shader->info.vs_output_param_offset[index] = offset; } shader->epilog = si_get_shader_part(sscreen, &sscreen->vs_epilogs, @@ -5307,7 +5307,7 @@ static bool si_shader_select_vs_parts(struct si_screen *sscreen, /* Get the prolog. */ memset(&prolog_key, 0, sizeof(prolog_key)); prolog_key.vs_prolog.states = shader->key.vs.prolog; - prolog_key.vs_prolog.num_input_sgprs = shader->num_input_sgprs; + prolog_key.vs_prolog.num_input_sgprs = shader->info.num_input_sgprs; prolog_key.vs_prolog.last_input = MAX2(1, info->num_inputs) - 1; /* The prolog is a no-op if there are no inputs. */ @@ -5329,7 +5329,7 @@ static bool si_shader_select_vs_parts(struct si_screen *sscreen, /* Set the instanceID flag. */ for (i = 0; i < info->num_inputs; i++) if (prolog_key.vs_prolog.states.instance_divisors[i]) - shader->uses_instanceid = true; + shader->info.uses_instanceid = true; return true; } @@ -5735,8 +5735,8 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen, memset(&prolog_key, 0, sizeof(prolog_key)); prolog_key.ps_prolog.states = shader->key.ps.prolog; prolog_key.ps_prolog.colors_read = info->colors_read; - prolog_key.ps_prolog.num_input_sgprs = shader->num_input_sgprs; - prolog_key.ps_prolog.num_input_vgprs = shader->num_input_vgprs; + prolog_key.ps_prolog.num_input_sgprs = shader->info.num_input_sgprs; + prolog_key.ps_prolog.num_input_vgprs = shader->info.num_input_vgprs; if (info->colors_read) { unsigned *color = shader->selector->color_attr_index; @@ -5744,7 +5744,7 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen, if (shader->key.ps.prolog.color_two_side) { /* BCOLORs are stored after the last input. */ prolog_key.ps_prolog.num_interp_inputs = info->num_inputs; - prolog_key.ps_prolog.face_vgpr_index = shader->face_vgpr_index; + prolog_key.ps_prolog.face_vgpr_index = shader->info.face_vgpr_index; shader->config.spi_ps_input_ena |= S_0286CC_FRONT_FACE_ENA(1); } @@ -5920,15 +5920,15 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, shader->is_binary_shared = true; shader->binary = mainp->binary; shader->config = mainp->config; - shader->num_input_sgprs = mainp->num_input_sgprs; - shader->num_input_vgprs = mainp->num_input_vgprs; - shader->face_vgpr_index = mainp->face_vgpr_index; - memcpy(shader->vs_output_param_offset, - mainp->vs_output_param_offset, - sizeof(mainp->vs_output_param_offset)); - shader->uses_instanceid = mainp->uses_instanceid; - shader->nr_pos_exports = mainp->nr_pos_exports; - shader->nr_param_exports = mainp->nr_param_exports; + shader->info.num_input_sgprs = mainp->info.num_input_sgprs; + shader->info.num_input_vgprs = mainp->info.num_input_vgprs; + shader->info.face_vgpr_index = mainp->info.face_vgpr_index; + memcpy(shader->info.vs_output_param_offset, + mainp->info.vs_output_param_offset, + sizeof(mainp->info.vs_output_param_offset)); + shader->info.uses_instanceid = mainp->info.uses_instanceid; + shader->info.nr_pos_exports = mainp->info.nr_pos_exports; + shader->info.nr_param_exports = mainp->info.nr_param_exports; /* Select prologs and/or epilogs. */ switch (shader->selector->type) { @@ -5952,7 +5952,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, * are allocated inputs. */ shader->config.num_vgprs = MAX2(shader->config.num_vgprs, - shader->num_input_vgprs); + shader->info.num_input_vgprs); break; } diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 88602dcd811..b299b7b2c0a 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -341,6 +341,17 @@ struct si_shader_config { unsigned rsrc2; }; +/* GCN-specific shader info. */ +struct si_shader_info { + ubyte vs_output_param_offset[SI_MAX_VS_OUTPUTS]; + ubyte num_input_sgprs; + ubyte num_input_vgprs; + char face_vgpr_index; + bool uses_instanceid; + ubyte nr_pos_exports; + ubyte nr_param_exports; +}; + struct si_shader { struct si_shader_selector *selector; struct si_shader *next_variant; @@ -356,15 +367,7 @@ struct si_shader { struct radeon_shader_binary binary; bool is_binary_shared; struct si_shader_config config; - - ubyte num_input_sgprs; - ubyte num_input_vgprs; - char face_vgpr_index; - - ubyte vs_output_param_offset[SI_MAX_VS_OUTPUTS]; - bool uses_instanceid; - ubyte nr_pos_exports; - ubyte nr_param_exports; + struct si_shader_info info; }; struct si_shader_part { diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index dc813437f14..c62cbb72c91 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -108,7 +108,7 @@ static void si_shader_ls(struct si_shader *shader) /* We need at least 2 components for LS. * VGPR0-3: (VertexID, RelAutoindex, ???, InstanceID). */ - vgpr_comp_cnt = shader->uses_instanceid ? 3 : 1; + vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 1; num_user_sgprs = SI_LS_NUM_USER_SGPR; num_sgprs = shader->config.num_sgprs; @@ -181,7 +181,7 @@ static void si_shader_es(struct si_shader *shader) si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER); if (shader->selector->type == PIPE_SHADER_VERTEX) { - vgpr_comp_cnt = shader->uses_instanceid ? 3 : 0; + vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 0; num_user_sgprs = SI_ES_NUM_USER_SGPR; } else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) { vgpr_comp_cnt = 3; /* all components are needed for TES */ @@ -347,7 +347,7 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs) vgpr_comp_cnt = 0; /* only VertexID is needed for GS-COPY. */ num_user_sgprs = SI_GSCOPY_NUM_USER_SGPR; } else if (shader->selector->type == PIPE_SHADER_VERTEX) { - vgpr_comp_cnt = shader->uses_instanceid ? 3 : (enable_prim_id ? 2 : 0); + vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : (enable_prim_id ? 2 : 0); num_user_sgprs = SI_VS_NUM_USER_SGPR; } else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) { vgpr_comp_cnt = 3; /* all components are needed for TES */ @@ -363,19 +363,19 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs) assert(num_sgprs <= 104); /* VS is required to export at least one param. */ - nparams = MAX2(shader->nr_param_exports, 1); + nparams = MAX2(shader->info.nr_param_exports, 1); si_pm4_set_reg(pm4, R_0286C4_SPI_VS_OUT_CONFIG, S_0286C4_VS_EXPORT_COUNT(nparams - 1)); si_pm4_set_reg(pm4, R_02870C_SPI_SHADER_POS_FORMAT, S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) | - S_02870C_POS1_EXPORT_FORMAT(shader->nr_pos_exports > 1 ? + S_02870C_POS1_EXPORT_FORMAT(shader->info.nr_pos_exports > 1 ? V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE) | - S_02870C_POS2_EXPORT_FORMAT(shader->nr_pos_exports > 2 ? + S_02870C_POS2_EXPORT_FORMAT(shader->info.nr_pos_exports > 2 ? V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE) | - S_02870C_POS3_EXPORT_FORMAT(shader->nr_pos_exports > 3 ? + S_02870C_POS3_EXPORT_FORMAT(shader->info.nr_pos_exports > 3 ? V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE)); @@ -1178,14 +1178,14 @@ static unsigned si_get_ps_input_cntl(struct si_context *sctx, for (j = 0; j < vsinfo->num_outputs; j++) { if (name == vsinfo->output_semantic_name[j] && index == vsinfo->output_semantic_index[j]) { - ps_input_cntl |= S_028644_OFFSET(vs->vs_output_param_offset[j]); + ps_input_cntl |= S_028644_OFFSET(vs->info.vs_output_param_offset[j]); break; } } if (name == TGSI_SEMANTIC_PRIMID) /* PrimID is written after the last output. */ - ps_input_cntl |= S_028644_OFFSET(vs->vs_output_param_offset[vsinfo->num_outputs]); + ps_input_cntl |= S_028644_OFFSET(vs->info.vs_output_param_offset[vsinfo->num_outputs]); else if (j == vsinfo->num_outputs && !G_028644_PT_SPRITE_TEX(ps_input_cntl)) { /* No corresponding output found, load defaults into input. * Don't set any other bits.