diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 06bc31ff004..2864cb0e4fd 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -11257,37 +11257,8 @@ emit_streamout(isel_context* ctx, unsigned stream) Pseudo_instruction* add_startpgm(struct isel_context* ctx) { - unsigned arg_count = ctx->args->ac.arg_count; - if (ctx->stage == fragment_fs) { - /* LLVM optimizes away unused FS inputs and computes spi_ps_input_addr - * itself and then communicates the results back via the ELF binary. - * Mirror what LLVM does by re-mapping the VGPR arguments here. - * - * TODO: If we made the FS input scanning code into a separate pass that - * could run before argument setup, then this wouldn't be necessary - * anymore. - */ - struct ac_shader_args* args = &ctx->args->ac; - arg_count = 0; - for (unsigned i = 0, vgpr_arg = 0, vgpr_reg = 0; i < args->arg_count; i++) { - if (args->args[i].file != AC_ARG_VGPR) { - arg_count++; - continue; - } - - if (!(ctx->program->config->spi_ps_input_addr & (1 << vgpr_arg))) { - args->args[i].skip = true; - } else { - args->args[i].offset = vgpr_reg; - vgpr_reg += args->args[i].size; - arg_count++; - } - vgpr_arg++; - } - } - aco_ptr startpgm{ - create_instruction(aco_opcode::p_startpgm, Format::PSEUDO, 0, arg_count)}; + create_instruction(aco_opcode::p_startpgm, Format::PSEUDO, 0, ctx->args->ac.arg_count)}; for (unsigned i = 0, arg = 0; i < ctx->args->ac.arg_count; i++) { if (ctx->args->ac.args[i].skip) continue; diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 645bd240c54..6c37a72c44b 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -1676,6 +1676,7 @@ radv_shader_variant_compile(struct radv_device *device, struct vk_shader_module options.key = *key; options.explicit_scratch_args = !radv_use_llvm_for_stage(device, stage); + options.remap_spi_ps_input = !radv_use_llvm_for_stage(device, stage); options.robust_buffer_access = device->robust_buffer_access; options.wgp_mode = radv_should_use_wgp_mode(device, stage, info); @@ -1693,6 +1694,7 @@ radv_create_gs_copy_shader(struct radv_device *device, struct nir_shader *shader gl_shader_stage stage = MESA_SHADER_VERTEX; options.explicit_scratch_args = !radv_use_llvm_for_stage(device, stage); + options.remap_spi_ps_input = !radv_use_llvm_for_stage(device, stage); options.key.has_multiview_view_index = multiview; options.key.optimisations_disabled = disable_optimizations; diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index c1303f77669..1918460e6b4 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -112,6 +112,7 @@ struct radv_nir_compiler_options { bool has_image_load_dcc_bug; bool enable_mrt_output_nan_fixup; bool wgp_mode; + bool remap_spi_ps_input; enum radeon_family family; enum chip_class chip_class; const struct radeon_info *info; diff --git a/src/amd/vulkan/radv_shader_args.c b/src/amd/vulkan/radv_shader_args.c index 8eec8e90219..28659b751b5 100644 --- a/src/amd/vulkan/radv_shader_args.c +++ b/src/amd/vulkan/radv_shader_args.c @@ -363,6 +363,52 @@ declare_tes_input_vgprs(struct radv_shader_args *args) ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_patch_id); } +static void +declare_ps_input_vgprs(struct radv_shader_args *args) +{ + unsigned spi_ps_input = args->shader_info->ps.spi_ps_input; + + ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_sample); + ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_center); + ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_centroid); + ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.pull_model); + ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_sample); + ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_center); + ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_centroid); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL); /* line stipple tex */ + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[0]); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[1]); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[2]); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[3]); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.front_face); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.ancillary); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.sample_coverage); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* fixed pt */ + + if (args->options->remap_spi_ps_input) { + /* LLVM optimizes away unused FS inputs and computes spi_ps_input_addr itself and then + * communicates the results back via the ELF binary. Mirror what LLVM does by re-mapping the + * VGPR arguments here. + */ + unsigned arg_count = 0; + for (unsigned i = 0, vgpr_arg = 0, vgpr_reg = 0; i < args->ac.arg_count; i++) { + if (args->ac.args[i].file != AC_ARG_VGPR) { + arg_count++; + continue; + } + + if (!(spi_ps_input & (1 << vgpr_arg))) { + args->ac.args[i].skip = true; + } else { + args->ac.args[i].offset = vgpr_reg; + vgpr_reg += args->ac.args[i].size; + arg_count++; + } + vgpr_arg++; + } + } +} + static void declare_ngg_sgprs(struct radv_shader_args *args, bool has_api_gs) { @@ -654,22 +700,8 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage, if (args->options->explicit_scratch_args) { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); } - ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_sample); - ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_center); - ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_centroid); - ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.pull_model); - ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_sample); - ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_center); - ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_centroid); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL); /* line stipple tex */ - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[0]); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[1]); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[2]); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[3]); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.front_face); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.ancillary); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.sample_coverage); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* fixed pt */ + + declare_ps_input_vgprs(args); break; default: unreachable("Shader stage not implemented");