From 651c6b16ff09745cb033566bc7910375868027df Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Fri, 18 Jun 2021 09:08:40 -0400 Subject: [PATCH] radv: move pipe_misaligned and l2_coherent image checks to flags set on init this should save 4-5% cpu in some cases Reviewed-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/radv_cmd_buffer.c | 77 +------------------------------- src/amd/vulkan/radv_image.c | 74 ++++++++++++++++++++++++++++++ src/amd/vulkan/radv_private.h | 1 + 3 files changed, 77 insertions(+), 75 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index b95347a2166..94d48f6bf5f 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -3225,85 +3225,12 @@ radv_stage_flush(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags src_st } } -/* Determine if the image is affected by the pipe misaligned metadata issue - * which requires to invalidate L2. - */ -static bool -radv_image_is_pipe_misaligned(const struct radv_device *device, const struct radv_image *image) -{ - struct radeon_info *rad_info = &device->physical_device->rad_info; - unsigned log2_samples = util_logbase2(image->info.samples); - - assert(rad_info->chip_class >= GFX10); - - for (unsigned i = 0; i < image->plane_count; ++i) { - VkFormat fmt = vk_format_get_plane_format(image->vk_format, i); - unsigned log2_bpp = util_logbase2(vk_format_get_blocksize(fmt)); - unsigned log2_bpp_and_samples; - - if (rad_info->chip_class >= GFX10_3) { - log2_bpp_and_samples = log2_bpp + log2_samples; - } else { - if (vk_format_has_depth(image->vk_format) && image->info.array_size >= 8) { - log2_bpp = 2; - } - - log2_bpp_and_samples = MIN2(6, log2_bpp + log2_samples); - } - - unsigned num_pipes = G_0098F8_NUM_PIPES(rad_info->gb_addr_config); - int overlap = MAX2(0, log2_bpp_and_samples + num_pipes - 8); - - if (vk_format_has_depth(image->vk_format)) { - if (radv_image_is_tc_compat_htile(image) && overlap) { - return true; - } - } else { - unsigned max_compressed_frags = G_0098F8_MAX_COMPRESSED_FRAGS(rad_info->gb_addr_config); - int log2_samples_frag_diff = MAX2(0, log2_samples - max_compressed_frags); - int samples_overlap = MIN2(log2_samples, overlap); - - /* TODO: It shouldn't be necessary if the image has DCC but - * not readable by shader. - */ - if ((radv_image_has_dcc(image) || radv_image_is_tc_compat_cmask(image)) && - (samples_overlap > log2_samples_frag_diff)) { - return true; - } - } - } - - return false; -} - -static bool -radv_image_is_l2_coherent(const struct radv_device *device, const struct radv_image *image) -{ - if (device->physical_device->rad_info.chip_class >= GFX10) { - return !device->physical_device->rad_info.tcc_rb_non_coherent && - (image && !radv_image_is_pipe_misaligned(device, image)); - } else if (device->physical_device->rad_info.chip_class == GFX9 && image) { - if (image->info.samples == 1 && - (image->usage & - (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) && - !vk_format_has_stencil(image->vk_format)) { - /* Single-sample color and single-sample depth - * (not stencil) are coherent with shaders on - * GFX9. - */ - return true; - } - } - - return false; -} - enum radv_cmd_flush_bits radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags src_flags, const struct radv_image *image) { bool has_CB_meta = true, has_DB_meta = true; - bool image_is_coherent = radv_image_is_l2_coherent(cmd_buffer->device, image); + bool image_is_coherent = image ? image->l2_coherent : false; enum radv_cmd_flush_bits flush_bits = 0; if (image) { @@ -3379,7 +3306,7 @@ radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags dst_flag bool has_CB_meta = true, has_DB_meta = true; enum radv_cmd_flush_bits flush_bits = 0; bool flush_CB = true, flush_DB = true; - bool image_is_coherent = radv_image_is_l2_coherent(cmd_buffer->device, image); + bool image_is_coherent = image ? image->l2_coherent : false; if (image) { if (!(image->usage & VK_IMAGE_USAGE_STORAGE_BIT)) { diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c index f18f96ca491..9a11e013542 100644 --- a/src/amd/vulkan/radv_image.c +++ b/src/amd/vulkan/radv_image.c @@ -1527,6 +1527,79 @@ radv_select_modifier(const struct radv_device *dev, VkFormat format, unreachable("App specified an invalid modifier"); } +/* Determine if the image is affected by the pipe misaligned metadata issue + * which requires to invalidate L2. + */ +static bool +radv_image_is_pipe_misaligned(const struct radv_device *device, const struct radv_image *image) +{ + struct radeon_info *rad_info = &device->physical_device->rad_info; + unsigned log2_samples = util_logbase2(image->info.samples); + + assert(rad_info->chip_class >= GFX10); + + for (unsigned i = 0; i < image->plane_count; ++i) { + VkFormat fmt = vk_format_get_plane_format(image->vk_format, i); + unsigned log2_bpp = util_logbase2(vk_format_get_blocksize(fmt)); + unsigned log2_bpp_and_samples; + + if (rad_info->chip_class >= GFX10_3) { + log2_bpp_and_samples = log2_bpp + log2_samples; + } else { + if (vk_format_has_depth(image->vk_format) && image->info.array_size >= 8) { + log2_bpp = 2; + } + + log2_bpp_and_samples = MIN2(6, log2_bpp + log2_samples); + } + + unsigned num_pipes = G_0098F8_NUM_PIPES(rad_info->gb_addr_config); + int overlap = MAX2(0, log2_bpp_and_samples + num_pipes - 8); + + if (vk_format_has_depth(image->vk_format)) { + if (radv_image_is_tc_compat_htile(image) && overlap) { + return true; + } + } else { + unsigned max_compressed_frags = G_0098F8_MAX_COMPRESSED_FRAGS(rad_info->gb_addr_config); + int log2_samples_frag_diff = MAX2(0, log2_samples - max_compressed_frags); + int samples_overlap = MIN2(log2_samples, overlap); + + /* TODO: It shouldn't be necessary if the image has DCC but + * not readable by shader. + */ + if ((radv_image_has_dcc(image) || radv_image_is_tc_compat_cmask(image)) && + (samples_overlap > log2_samples_frag_diff)) { + return true; + } + } + } + + return false; +} + +static bool +radv_image_is_l2_coherent(const struct radv_device *device, const struct radv_image *image) +{ + if (device->physical_device->rad_info.chip_class >= GFX10) { + return !device->physical_device->rad_info.tcc_rb_non_coherent && + !radv_image_is_pipe_misaligned(device, image); + } else if (device->physical_device->rad_info.chip_class == GFX9) { + if (image->info.samples == 1 && + (image->usage & + (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) && + !vk_format_has_stencil(image->vk_format)) { + /* Single-sample color and single-sample depth + * (not stencil) are coherent with shaders on + * GFX9. + */ + return true; + } + } + + return false; +} + VkResult radv_image_create(VkDevice _device, const struct radv_image_create_info *create_info, const VkAllocationCallbacks *alloc, VkImage *pImage) @@ -1634,6 +1707,7 @@ radv_image_create(VkDevice _device, const struct radv_image_create_info *create_ return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); } } + image->l2_coherent = radv_image_is_l2_coherent(device, image); if (device->instance->debug_flags & RADV_DEBUG_IMG) { radv_image_print_info(device, image); diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 6bcb9993854..610c8bb7653 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -1856,6 +1856,7 @@ struct radv_image { unsigned queue_family_mask; bool exclusive; bool shareable; + bool l2_coherent; /* Set when bound */ struct radeon_winsys_bo *bo;