From 17b1ffb41a7c55864353eb210d3d98b25fc7a766 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Wed, 9 Mar 2022 15:17:31 +0100 Subject: [PATCH] vkd3d: Add path to use GENERAL depth-stencil images. On some implementations, it doesn't matter for performance what we use, and we can avoid a lot of ugly barriers this way. Opt-in to use this extensions on GPUs we know handles it well, otherwise, keep using the tracking paths. With VK_KHR_dynamic_rendering, this is now feasible to do since we no longer have to deal with shenanigans related to VkRenderPass layouts and complicated compatibility rules. To make this work with the existing framework, just need to consider that GENERAL can be a common layout alongside DEPTH_STENCIL_OPTIMAL, which are both common layouts that do not need to be tracked at all. Signed-off-by: Hans-Kristian Arntzen --- libs/vkd3d/command.c | 45 +++++++++++++++++++++++++++++++------- libs/vkd3d/resource.c | 44 ++++++++++++++++++++++++++++++++++--- libs/vkd3d/vkd3d_private.h | 2 +- 3 files changed, 79 insertions(+), 12 deletions(-) diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index f27bf314..774ea97d 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -2265,6 +2265,9 @@ static VkImageLayout dsv_plane_optimal_mask_to_layout(uint32_t plane_optimal_mas VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, }; + if (plane_optimal_mask & VKD3D_DEPTH_STENCIL_PLANE_GENERAL) + return VK_IMAGE_LAYOUT_GENERAL; + if (image_aspects != (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { /* If aspects is only DEPTH or only STENCIL, we should use the OPTIMAL or READ_ONLY layout. @@ -2327,8 +2330,9 @@ static uint32_t d3d12_command_list_notify_decay_dsv_resource(struct d3d12_comman uint32_t decay_aspects; size_t i, n; - /* No point in adding these since they are always deduced to be optimal. */ - if (resource->desc.Flags & D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE) + /* No point in adding these since they are always deduced to be optimal or general. */ + if ((resource->desc.Flags & D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE) || + resource->common_layout == VK_IMAGE_LAYOUT_GENERAL) return 0; for (i = 0, n = list->dsv_resource_tracking_count; i < n; i++) @@ -2353,6 +2357,8 @@ static uint32_t d3d12_command_list_promote_dsv_resource(struct d3d12_command_lis /* No point in adding these since they are always deduced to be optimal. */ if (resource->desc.Flags & D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE) return VKD3D_DEPTH_PLANE_OPTIMAL | VKD3D_STENCIL_PLANE_OPTIMAL; + else if (resource->common_layout == VK_IMAGE_LAYOUT_GENERAL) + return VKD3D_DEPTH_STENCIL_PLANE_GENERAL; /* For single aspect images, mirror the optimal mask in the unused aspect. This avoids some * extra checks elsewhere (particularly graphics pipeline setup and compat render passes) @@ -2382,6 +2388,9 @@ static uint32_t d3d12_command_list_promote_dsv_resource(struct d3d12_command_lis static uint32_t d3d12_command_list_notify_dsv_writes(struct d3d12_command_list *list, struct d3d12_resource *resource, const struct vkd3d_view *view, uint32_t plane_write_mask) { + if (plane_write_mask & VKD3D_DEPTH_STENCIL_PLANE_GENERAL) + return VKD3D_DEPTH_STENCIL_PLANE_GENERAL; + assert(!(plane_write_mask & ~(VKD3D_DEPTH_PLANE_OPTIMAL | VKD3D_STENCIL_PLANE_OPTIMAL))); /* If we cover the entire resource, we can promote it to our target layout. */ @@ -2502,6 +2511,12 @@ static VkImageLayout d3d12_command_list_get_depth_stencil_resource_layout(const *plane_optimal_mask = VKD3D_DEPTH_PLANE_OPTIMAL | VKD3D_STENCIL_PLANE_OPTIMAL; return VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; } + else if (resource->common_layout == VK_IMAGE_LAYOUT_GENERAL) + { + if (plane_optimal_mask) + *plane_optimal_mask = VKD3D_DEPTH_STENCIL_PLANE_GENERAL; + return VK_IMAGE_LAYOUT_GENERAL; + } for (i = 0, n = list->dsv_resource_tracking_count; i < n; i++) { @@ -2521,16 +2536,30 @@ static VkImageLayout d3d12_command_list_get_depth_stencil_resource_layout(const static VkImageLayout vk_separate_depth_layout(VkImageLayout combined_layout) { - return (combined_layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL || - combined_layout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL) ? - VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL; + if (combined_layout == VK_IMAGE_LAYOUT_GENERAL) + { + return combined_layout; + } + else + { + return (combined_layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL || + combined_layout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL) ? + VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL; + } } static VkImageLayout vk_separate_stencil_layout(VkImageLayout combined_layout) { - return (combined_layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL || - combined_layout == VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL) ? - VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL; + if (combined_layout == VK_IMAGE_LAYOUT_GENERAL) + { + return combined_layout; + } + else + { + return (combined_layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL || + combined_layout == VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL) ? + VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL; + } } static void d3d12_command_list_clear_attachment_pass(struct d3d12_command_list *list, struct d3d12_resource *resource, diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index 72fbb39c..249e2702 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -299,7 +299,31 @@ static bool vkd3d_is_linear_tiling_supported(const struct d3d12_device *device, return supported; } -static VkImageLayout vk_common_image_layout_from_d3d12_desc(const D3D12_RESOURCE_DESC1 *desc) +static bool d3d12_device_prefers_general_depth_stencil(const struct d3d12_device *device) +{ + if (device->vk_info.KHR_driver_properties) + { + if (device->device_info.driver_properties.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY) + { + /* NVIDIA doesn't really care about layouts for the most part. */ + return true; + } + else if (device->device_info.driver_properties.driverID == VK_DRIVER_ID_MESA_RADV) + { + /* RADV can use TC-compat HTILE without too much issues on Polaris and later. + * Use GENERAL for these GPUs. + * Pre-Polaris we run into issues where even read-only depth requires decompress + * so using GENERAL shouldn't really make things worse, it's going to run pretty bad + * either way. */ + return true; + } + } + + return false; +} + +static VkImageLayout vk_common_image_layout_from_d3d12_desc(const struct d3d12_device *device, + const D3D12_RESOURCE_DESC1 *desc) { /* We need aggressive decay and promotion into anything. */ if (desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS) @@ -307,6 +331,20 @@ static VkImageLayout vk_common_image_layout_from_d3d12_desc(const D3D12_RESOURCE if (desc->Layout == D3D12_TEXTURE_LAYOUT_ROW_MAJOR) return VK_IMAGE_LAYOUT_GENERAL; + /* This is counter-intuitive, but using GENERAL layout for depth-stencils works around + * having to perform DSV plane tracking all the time, since we don't necessarily know at recording time + * if a DSV image is OPTIMAL or READ_ONLY. + * This saves us many redundant barriers while rendering, especially since games tend + * to split their rendering across many command lists in parallel. + * On several implementations, GENERAL is a perfectly fine layout to use, + * on others it is a disaster since compression is disabled :') */ + if (((desc->Flags & (D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL | D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE)) == + D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL) && + d3d12_device_prefers_general_depth_stencil(device)) + { + return VK_IMAGE_LAYOUT_GENERAL; + } + /* DENY_SHADER_RESOURCE only allowed with ALLOW_DEPTH_STENCIL */ if (desc->Flags & D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE) return VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; @@ -708,7 +746,7 @@ static HRESULT vkd3d_create_image(struct d3d12_device *device, resource->common_layout = VK_IMAGE_LAYOUT_GENERAL; } else - resource->common_layout = vk_common_image_layout_from_d3d12_desc(desc); + resource->common_layout = vk_common_image_layout_from_d3d12_desc(device, desc); if (desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS) resource->flags |= VKD3D_RESOURCE_SIMULTANEOUS_ACCESS; @@ -2952,7 +2990,7 @@ VKD3D_EXPORT HRESULT vkd3d_create_image_resource(ID3D12Device *device, object->flags = create_info->flags; object->flags |= VKD3D_RESOURCE_EXTERNAL; object->initial_layout_transition = 1; - object->common_layout = vk_common_image_layout_from_d3d12_desc(&object->desc); + object->common_layout = vk_common_image_layout_from_d3d12_desc(d3d12_device, &object->desc); memset(&object->sparse, 0, sizeof(object->sparse)); diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index bc9a2c1a..412c3611 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -1445,11 +1445,11 @@ struct vkd3d_shader_debug_ring_spec_info VkSpecializationInfo spec_info; }; -/* One render pass for each plane optimal mask. */ enum vkd3d_plane_optimal_flag { VKD3D_DEPTH_PLANE_OPTIMAL = (1 << 0), VKD3D_STENCIL_PLANE_OPTIMAL = (1 << 1), + VKD3D_DEPTH_STENCIL_PLANE_GENERAL = (1 << 2), }; struct d3d12_graphics_pipeline_state