diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index f27bf314..774ea97d 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -2265,6 +2265,9 @@ static VkImageLayout dsv_plane_optimal_mask_to_layout(uint32_t plane_optimal_mas VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, }; + if (plane_optimal_mask & VKD3D_DEPTH_STENCIL_PLANE_GENERAL) + return VK_IMAGE_LAYOUT_GENERAL; + if (image_aspects != (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { /* If aspects is only DEPTH or only STENCIL, we should use the OPTIMAL or READ_ONLY layout. @@ -2327,8 +2330,9 @@ static uint32_t d3d12_command_list_notify_decay_dsv_resource(struct d3d12_comman uint32_t decay_aspects; size_t i, n; - /* No point in adding these since they are always deduced to be optimal. */ - if (resource->desc.Flags & D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE) + /* No point in adding these since they are always deduced to be optimal or general. */ + if ((resource->desc.Flags & D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE) || + resource->common_layout == VK_IMAGE_LAYOUT_GENERAL) return 0; for (i = 0, n = list->dsv_resource_tracking_count; i < n; i++) @@ -2353,6 +2357,8 @@ static uint32_t d3d12_command_list_promote_dsv_resource(struct d3d12_command_lis /* No point in adding these since they are always deduced to be optimal. */ if (resource->desc.Flags & D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE) return VKD3D_DEPTH_PLANE_OPTIMAL | VKD3D_STENCIL_PLANE_OPTIMAL; + else if (resource->common_layout == VK_IMAGE_LAYOUT_GENERAL) + return VKD3D_DEPTH_STENCIL_PLANE_GENERAL; /* For single aspect images, mirror the optimal mask in the unused aspect. This avoids some * extra checks elsewhere (particularly graphics pipeline setup and compat render passes) @@ -2382,6 +2388,9 @@ static uint32_t d3d12_command_list_promote_dsv_resource(struct d3d12_command_lis static uint32_t d3d12_command_list_notify_dsv_writes(struct d3d12_command_list *list, struct d3d12_resource *resource, const struct vkd3d_view *view, uint32_t plane_write_mask) { + if (plane_write_mask & VKD3D_DEPTH_STENCIL_PLANE_GENERAL) + return VKD3D_DEPTH_STENCIL_PLANE_GENERAL; + assert(!(plane_write_mask & ~(VKD3D_DEPTH_PLANE_OPTIMAL | VKD3D_STENCIL_PLANE_OPTIMAL))); /* If we cover the entire resource, we can promote it to our target layout. */ @@ -2502,6 +2511,12 @@ static VkImageLayout d3d12_command_list_get_depth_stencil_resource_layout(const *plane_optimal_mask = VKD3D_DEPTH_PLANE_OPTIMAL | VKD3D_STENCIL_PLANE_OPTIMAL; return VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; } + else if (resource->common_layout == VK_IMAGE_LAYOUT_GENERAL) + { + if (plane_optimal_mask) + *plane_optimal_mask = VKD3D_DEPTH_STENCIL_PLANE_GENERAL; + return VK_IMAGE_LAYOUT_GENERAL; + } for (i = 0, n = list->dsv_resource_tracking_count; i < n; i++) { @@ -2521,16 +2536,30 @@ static VkImageLayout d3d12_command_list_get_depth_stencil_resource_layout(const static VkImageLayout vk_separate_depth_layout(VkImageLayout combined_layout) { - return (combined_layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL || - combined_layout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL) ? - VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL; + if (combined_layout == VK_IMAGE_LAYOUT_GENERAL) + { + return combined_layout; + } + else + { + return (combined_layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL || + combined_layout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL) ? + VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL; + } } static VkImageLayout vk_separate_stencil_layout(VkImageLayout combined_layout) { - return (combined_layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL || - combined_layout == VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL) ? - VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL; + if (combined_layout == VK_IMAGE_LAYOUT_GENERAL) + { + return combined_layout; + } + else + { + return (combined_layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL || + combined_layout == VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL) ? + VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL; + } } static void d3d12_command_list_clear_attachment_pass(struct d3d12_command_list *list, struct d3d12_resource *resource, diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index 72fbb39c..249e2702 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -299,7 +299,31 @@ static bool vkd3d_is_linear_tiling_supported(const struct d3d12_device *device, return supported; } -static VkImageLayout vk_common_image_layout_from_d3d12_desc(const D3D12_RESOURCE_DESC1 *desc) +static bool d3d12_device_prefers_general_depth_stencil(const struct d3d12_device *device) +{ + if (device->vk_info.KHR_driver_properties) + { + if (device->device_info.driver_properties.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY) + { + /* NVIDIA doesn't really care about layouts for the most part. */ + return true; + } + else if (device->device_info.driver_properties.driverID == VK_DRIVER_ID_MESA_RADV) + { + /* RADV can use TC-compat HTILE without too much issues on Polaris and later. + * Use GENERAL for these GPUs. + * Pre-Polaris we run into issues where even read-only depth requires decompress + * so using GENERAL shouldn't really make things worse, it's going to run pretty bad + * either way. */ + return true; + } + } + + return false; +} + +static VkImageLayout vk_common_image_layout_from_d3d12_desc(const struct d3d12_device *device, + const D3D12_RESOURCE_DESC1 *desc) { /* We need aggressive decay and promotion into anything. */ if (desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS) @@ -307,6 +331,20 @@ static VkImageLayout vk_common_image_layout_from_d3d12_desc(const D3D12_RESOURCE if (desc->Layout == D3D12_TEXTURE_LAYOUT_ROW_MAJOR) return VK_IMAGE_LAYOUT_GENERAL; + /* This is counter-intuitive, but using GENERAL layout for depth-stencils works around + * having to perform DSV plane tracking all the time, since we don't necessarily know at recording time + * if a DSV image is OPTIMAL or READ_ONLY. + * This saves us many redundant barriers while rendering, especially since games tend + * to split their rendering across many command lists in parallel. + * On several implementations, GENERAL is a perfectly fine layout to use, + * on others it is a disaster since compression is disabled :') */ + if (((desc->Flags & (D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL | D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE)) == + D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL) && + d3d12_device_prefers_general_depth_stencil(device)) + { + return VK_IMAGE_LAYOUT_GENERAL; + } + /* DENY_SHADER_RESOURCE only allowed with ALLOW_DEPTH_STENCIL */ if (desc->Flags & D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE) return VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; @@ -708,7 +746,7 @@ static HRESULT vkd3d_create_image(struct d3d12_device *device, resource->common_layout = VK_IMAGE_LAYOUT_GENERAL; } else - resource->common_layout = vk_common_image_layout_from_d3d12_desc(desc); + resource->common_layout = vk_common_image_layout_from_d3d12_desc(device, desc); if (desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS) resource->flags |= VKD3D_RESOURCE_SIMULTANEOUS_ACCESS; @@ -2952,7 +2990,7 @@ VKD3D_EXPORT HRESULT vkd3d_create_image_resource(ID3D12Device *device, object->flags = create_info->flags; object->flags |= VKD3D_RESOURCE_EXTERNAL; object->initial_layout_transition = 1; - object->common_layout = vk_common_image_layout_from_d3d12_desc(&object->desc); + object->common_layout = vk_common_image_layout_from_d3d12_desc(d3d12_device, &object->desc); memset(&object->sparse, 0, sizeof(object->sparse)); diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index bc9a2c1a..412c3611 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -1445,11 +1445,11 @@ struct vkd3d_shader_debug_ring_spec_info VkSpecializationInfo spec_info; }; -/* One render pass for each plane optimal mask. */ enum vkd3d_plane_optimal_flag { VKD3D_DEPTH_PLANE_OPTIMAL = (1 << 0), VKD3D_STENCIL_PLANE_OPTIMAL = (1 << 1), + VKD3D_DEPTH_STENCIL_PLANE_GENERAL = (1 << 2), }; struct d3d12_graphics_pipeline_state