vkd3d: Add path to use GENERAL depth-stencil images.

On some implementations, it doesn't matter for performance what we use,
and we can avoid a lot of ugly barriers this way.

Opt-in to use this extensions on GPUs we know handles it well,
otherwise, keep using the tracking paths.

With VK_KHR_dynamic_rendering, this is now feasible to do since we no longer
have to deal with shenanigans related to VkRenderPass layouts and
complicated compatibility rules.

To make this work with the existing framework, just need to consider
that GENERAL can be a common layout alongside DEPTH_STENCIL_OPTIMAL,
which are both common layouts that do not need to be tracked at all.

Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
This commit is contained in:
Hans-Kristian Arntzen 2022-03-09 15:17:31 +01:00
parent f9da3bf564
commit 17b1ffb41a
3 changed files with 79 additions and 12 deletions

View File

@ -2265,6 +2265,9 @@ static VkImageLayout dsv_plane_optimal_mask_to_layout(uint32_t plane_optimal_mas
VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
};
if (plane_optimal_mask & VKD3D_DEPTH_STENCIL_PLANE_GENERAL)
return VK_IMAGE_LAYOUT_GENERAL;
if (image_aspects != (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))
{
/* If aspects is only DEPTH or only STENCIL, we should use the OPTIMAL or READ_ONLY layout.
@ -2327,8 +2330,9 @@ static uint32_t d3d12_command_list_notify_decay_dsv_resource(struct d3d12_comman
uint32_t decay_aspects;
size_t i, n;
/* No point in adding these since they are always deduced to be optimal. */
if (resource->desc.Flags & D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE)
/* No point in adding these since they are always deduced to be optimal or general. */
if ((resource->desc.Flags & D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE) ||
resource->common_layout == VK_IMAGE_LAYOUT_GENERAL)
return 0;
for (i = 0, n = list->dsv_resource_tracking_count; i < n; i++)
@ -2353,6 +2357,8 @@ static uint32_t d3d12_command_list_promote_dsv_resource(struct d3d12_command_lis
/* No point in adding these since they are always deduced to be optimal. */
if (resource->desc.Flags & D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE)
return VKD3D_DEPTH_PLANE_OPTIMAL | VKD3D_STENCIL_PLANE_OPTIMAL;
else if (resource->common_layout == VK_IMAGE_LAYOUT_GENERAL)
return VKD3D_DEPTH_STENCIL_PLANE_GENERAL;
/* For single aspect images, mirror the optimal mask in the unused aspect. This avoids some
* extra checks elsewhere (particularly graphics pipeline setup and compat render passes)
@ -2382,6 +2388,9 @@ static uint32_t d3d12_command_list_promote_dsv_resource(struct d3d12_command_lis
static uint32_t d3d12_command_list_notify_dsv_writes(struct d3d12_command_list *list,
struct d3d12_resource *resource, const struct vkd3d_view *view, uint32_t plane_write_mask)
{
if (plane_write_mask & VKD3D_DEPTH_STENCIL_PLANE_GENERAL)
return VKD3D_DEPTH_STENCIL_PLANE_GENERAL;
assert(!(plane_write_mask & ~(VKD3D_DEPTH_PLANE_OPTIMAL | VKD3D_STENCIL_PLANE_OPTIMAL)));
/* If we cover the entire resource, we can promote it to our target layout. */
@ -2502,6 +2511,12 @@ static VkImageLayout d3d12_command_list_get_depth_stencil_resource_layout(const
*plane_optimal_mask = VKD3D_DEPTH_PLANE_OPTIMAL | VKD3D_STENCIL_PLANE_OPTIMAL;
return VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
}
else if (resource->common_layout == VK_IMAGE_LAYOUT_GENERAL)
{
if (plane_optimal_mask)
*plane_optimal_mask = VKD3D_DEPTH_STENCIL_PLANE_GENERAL;
return VK_IMAGE_LAYOUT_GENERAL;
}
for (i = 0, n = list->dsv_resource_tracking_count; i < n; i++)
{
@ -2521,16 +2536,30 @@ static VkImageLayout d3d12_command_list_get_depth_stencil_resource_layout(const
static VkImageLayout vk_separate_depth_layout(VkImageLayout combined_layout)
{
return (combined_layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
combined_layout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL) ?
VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL;
if (combined_layout == VK_IMAGE_LAYOUT_GENERAL)
{
return combined_layout;
}
else
{
return (combined_layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
combined_layout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL) ?
VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL;
}
}
static VkImageLayout vk_separate_stencil_layout(VkImageLayout combined_layout)
{
return (combined_layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
combined_layout == VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL) ?
VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL;
if (combined_layout == VK_IMAGE_LAYOUT_GENERAL)
{
return combined_layout;
}
else
{
return (combined_layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
combined_layout == VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL) ?
VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL;
}
}
static void d3d12_command_list_clear_attachment_pass(struct d3d12_command_list *list, struct d3d12_resource *resource,

View File

@ -299,7 +299,31 @@ static bool vkd3d_is_linear_tiling_supported(const struct d3d12_device *device,
return supported;
}
static VkImageLayout vk_common_image_layout_from_d3d12_desc(const D3D12_RESOURCE_DESC1 *desc)
static bool d3d12_device_prefers_general_depth_stencil(const struct d3d12_device *device)
{
if (device->vk_info.KHR_driver_properties)
{
if (device->device_info.driver_properties.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY)
{
/* NVIDIA doesn't really care about layouts for the most part. */
return true;
}
else if (device->device_info.driver_properties.driverID == VK_DRIVER_ID_MESA_RADV)
{
/* RADV can use TC-compat HTILE without too much issues on Polaris and later.
* Use GENERAL for these GPUs.
* Pre-Polaris we run into issues where even read-only depth requires decompress
* so using GENERAL shouldn't really make things worse, it's going to run pretty bad
* either way. */
return true;
}
}
return false;
}
static VkImageLayout vk_common_image_layout_from_d3d12_desc(const struct d3d12_device *device,
const D3D12_RESOURCE_DESC1 *desc)
{
/* We need aggressive decay and promotion into anything. */
if (desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS)
@ -307,6 +331,20 @@ static VkImageLayout vk_common_image_layout_from_d3d12_desc(const D3D12_RESOURCE
if (desc->Layout == D3D12_TEXTURE_LAYOUT_ROW_MAJOR)
return VK_IMAGE_LAYOUT_GENERAL;
/* This is counter-intuitive, but using GENERAL layout for depth-stencils works around
* having to perform DSV plane tracking all the time, since we don't necessarily know at recording time
* if a DSV image is OPTIMAL or READ_ONLY.
* This saves us many redundant barriers while rendering, especially since games tend
* to split their rendering across many command lists in parallel.
* On several implementations, GENERAL is a perfectly fine layout to use,
* on others it is a disaster since compression is disabled :') */
if (((desc->Flags & (D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL | D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE)) ==
D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL) &&
d3d12_device_prefers_general_depth_stencil(device))
{
return VK_IMAGE_LAYOUT_GENERAL;
}
/* DENY_SHADER_RESOURCE only allowed with ALLOW_DEPTH_STENCIL */
if (desc->Flags & D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE)
return VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
@ -708,7 +746,7 @@ static HRESULT vkd3d_create_image(struct d3d12_device *device,
resource->common_layout = VK_IMAGE_LAYOUT_GENERAL;
}
else
resource->common_layout = vk_common_image_layout_from_d3d12_desc(desc);
resource->common_layout = vk_common_image_layout_from_d3d12_desc(device, desc);
if (desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS)
resource->flags |= VKD3D_RESOURCE_SIMULTANEOUS_ACCESS;
@ -2952,7 +2990,7 @@ VKD3D_EXPORT HRESULT vkd3d_create_image_resource(ID3D12Device *device,
object->flags = create_info->flags;
object->flags |= VKD3D_RESOURCE_EXTERNAL;
object->initial_layout_transition = 1;
object->common_layout = vk_common_image_layout_from_d3d12_desc(&object->desc);
object->common_layout = vk_common_image_layout_from_d3d12_desc(d3d12_device, &object->desc);
memset(&object->sparse, 0, sizeof(object->sparse));

View File

@ -1445,11 +1445,11 @@ struct vkd3d_shader_debug_ring_spec_info
VkSpecializationInfo spec_info;
};
/* One render pass for each plane optimal mask. */
enum vkd3d_plane_optimal_flag
{
VKD3D_DEPTH_PLANE_OPTIMAL = (1 << 0),
VKD3D_STENCIL_PLANE_OPTIMAL = (1 << 1),
VKD3D_DEPTH_STENCIL_PLANE_GENERAL = (1 << 2),
};
struct d3d12_graphics_pipeline_state