From 5ee5c73d2d6bcbfa412d6bc43d2b0d645a9500aa Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Thu, 14 Jul 2022 18:54:20 +0200 Subject: [PATCH] radv: implement PS epilogs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Samuel Pitoiset Reviewed-by: Timur Kristóf Part-of: --- docs/envvars.rst | 2 ++ src/amd/vulkan/radv_debug.h | 1 + src/amd/vulkan/radv_device.c | 1 + src/amd/vulkan/radv_shader.c | 49 +++++++++++++++++++++++++++++++ src/amd/vulkan/radv_shader.h | 3 ++ src/amd/vulkan/radv_shader_args.c | 6 ++++ 6 files changed, 62 insertions(+) diff --git a/docs/envvars.rst b/docs/envvars.rst index c2c38ac0b55..56550ec658d 100644 --- a/docs/envvars.rst +++ b/docs/envvars.rst @@ -658,6 +658,8 @@ RADV driver environment variables force all allocated buffers to be referenced in submissions ``checkir`` validate the LLVM IR before LLVM compiles the shader + ``epilogs`` + dump fragment shader epilogs ``forcecompress`` Enables DCC,FMASK,CMASK,HTILE in situations where the driver supports it but normally does not deem it beneficial. diff --git a/src/amd/vulkan/radv_debug.h b/src/amd/vulkan/radv_debug.h index 3c8a4ed70a1..f727a8331fa 100644 --- a/src/amd/vulkan/radv_debug.h +++ b/src/amd/vulkan/radv_debug.h @@ -66,6 +66,7 @@ enum { RADV_DEBUG_DUMP_PROLOGS = 1ull << 35, RADV_DEBUG_NO_DMA_BLIT = 1ull << 36, RADV_DEBUG_SPLIT_FMA = 1ull << 37, + RADV_DEBUG_DUMP_EPILOGS = 1ull << 38, }; enum { diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index aaeb04b429c..b5ebad24e9d 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -967,6 +967,7 @@ static const struct debug_control radv_debug_options[] = { {"nonggc", RADV_DEBUG_NO_NGGC}, {"prologs", RADV_DEBUG_DUMP_PROLOGS}, {"nodma", RADV_DEBUG_NO_DMA_BLIT}, + {"epilogs", RADV_DEBUG_DUMP_EPILOGS}, {NULL, 0}}; const char * diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index e8e89ff925e..7ca5b292dde 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -2444,6 +2444,55 @@ radv_create_vs_prolog(struct radv_device *device, const struct radv_vs_prolog_ke return prolog; } +struct radv_shader_part * +radv_create_ps_epilog(struct radv_device *device, const struct radv_ps_epilog_key *key) +{ + struct radv_shader_args args = {0}; + struct radv_nir_compiler_options options = {0}; + options.family = device->physical_device->rad_info.family; + options.gfx_level = device->physical_device->rad_info.gfx_level; + options.address32_hi = device->physical_device->rad_info.address32_hi; + options.dump_shader = device->instance->debug_flags & RADV_DEBUG_DUMP_EPILOGS; + options.record_ir = device->instance->debug_flags & RADV_DEBUG_HANG; + options.dump_preoptir = device->instance->debug_flags & RADV_DEBUG_DUMP_EPILOGS; + options.dump_shader = device->instance->debug_flags & RADV_DEBUG_DUMP_EPILOGS; + + struct radv_shader_info info = {0}; + info.wave_size = key->wave32 ? 32 : 64; + info.workgroup_size = 64; + + radv_declare_ps_epilog_args(device->physical_device->rad_info.gfx_level, key, &args); + +#ifdef LLVM_AVAILABLE + if (options.dump_shader || options.record_ir) + ac_init_llvm_once(); +#endif + + struct radv_shader_part_binary *binary = NULL; + struct aco_shader_info ac_info; + struct aco_ps_epilog_key ac_key; + struct aco_compiler_options ac_opts; + radv_aco_convert_shader_info(&ac_info, &info); + radv_aco_convert_opts(&ac_opts, &options); + radv_aco_convert_ps_epilog_key(&ac_key, key); + aco_compile_ps_epilog(&ac_opts, &ac_info, &ac_key, &args, &radv_aco_build_shader_part, + (void **)&binary); + struct radv_shader_part *epilog = upload_shader_part(device, binary, info.wave_size); + if (epilog) { + epilog->disasm_string = + binary->disasm_size ? strdup((const char *)(binary->data + binary->code_size)) : NULL; + } + + free(binary); + + if (epilog && options.dump_shader) { + fprintf(stderr, "Fragment epilog"); + fprintf(stderr, "\ndisasm:\n%s\n", epilog->disasm_string); + } + + return epilog; +} + void radv_shader_destroy(struct radv_device *device, struct radv_shader *shader) { diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index 4a03789fc8c..ad49a51bf02 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -596,6 +596,9 @@ void radv_trap_handler_shader_destroy(struct radv_device *device, struct radv_shader_part *radv_create_vs_prolog(struct radv_device *device, const struct radv_vs_prolog_key *key); +struct radv_shader_part *radv_create_ps_epilog(struct radv_device *device, + const struct radv_ps_epilog_key *key); + void radv_shader_destroy(struct radv_device *device, struct radv_shader *shader); void radv_shader_part_destroy(struct radv_device *device, struct radv_shader_part *shader_part); diff --git a/src/amd/vulkan/radv_shader_args.c b/src/amd/vulkan/radv_shader_args.c index 40ac439672b..ae00a244efd 100644 --- a/src/amd/vulkan/radv_shader_args.c +++ b/src/amd/vulkan/radv_shader_args.c @@ -450,6 +450,12 @@ declare_ps_input_vgprs(const struct radv_shader_info *info, struct radv_shader_a vgpr_arg++; } } + + if (info->ps.has_epilog) { + /* FIXME: Ensure the main shader doesn't have less VGPRs than the epilog */ + for (unsigned i = 0; i < MAX_RTS; i++) + ac_add_arg(&args->ac, AC_ARG_VGPR, 4, AC_ARG_INT, NULL); + } } static void