From e7a6af497150cb30e3c60738d607f90e7447576c Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Wed, 20 Apr 2022 17:00:07 +0200 Subject: [PATCH] vkd3d: Use texel buffer views for UAV clears with buffer to image copy. Allows this to more easily work with more formats. Signed-off-by: Philip Rebohle --- libs/vkd3d/command.c | 113 ++++++++++++++++++++++++++++++++----- libs/vkd3d/resource.c | 2 +- libs/vkd3d/vkd3d_private.h | 3 + 3 files changed, 103 insertions(+), 15 deletions(-) diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 64e4e4ac..c654b15a 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -8307,21 +8307,30 @@ static void d3d12_command_list_clear_uav(struct d3d12_command_list *list, static void d3d12_command_list_clear_uav_with_copy(struct d3d12_command_list *list, const struct d3d12_desc_split *d, struct d3d12_resource *resource, - const struct vkd3d_clear_uav_info *args, uint32_t clear_value, - UINT rect_count, const D3D12_RECT *rects) + const struct vkd3d_clear_uav_info *args, const VkClearColorValue *clear_value, + const struct vkd3d_format *format, UINT rect_count, const D3D12_RECT *rects) { const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; unsigned int miplevel_idx, layer_count, i, j; + struct vkd3d_clear_uav_pipeline pipeline; struct vkd3d_scratch_allocation scratch; + struct vkd3d_clear_uav_args clear_args; VkCopyBufferToImageInfo2KHR copy_info; VkBufferImageCopy2KHR copy_region; VkDeviceSize scratch_buffer_size; D3D12_RECT curr_rect, full_rect; + VkWriteDescriptorSet write_set; + VkBufferView vk_buffer_view; + VkExtent3D workgroup_size; VkMemoryBarrier barrier; + uint32_t element_count; d3d12_command_list_track_resource_usage(list, resource, true); d3d12_command_list_end_current_render_pass(list, false); + d3d12_command_list_invalidate_current_pipeline(list, true); + d3d12_command_list_invalidate_root_parameters(list, VK_PIPELINE_BIND_POINT_COMPUTE, true); + assert(args->has_view); assert(d3d12_resource_is_texture(resource)); @@ -8334,7 +8343,7 @@ static void d3d12_command_list_clear_uav_with_copy(struct d3d12_command_list *li if (rect_count) { - scratch_buffer_size = 0; + element_count = 0; for (i = 0; i < rect_count; i++) { @@ -8342,37 +8351,86 @@ static void d3d12_command_list_clear_uav_with_copy(struct d3d12_command_list *li { unsigned int w = rects[i].right - rects[i].left; unsigned int h = rects[i].bottom - rects[i].top; - scratch_buffer_size = max(scratch_buffer_size, w * h * sizeof(uint32_t)); + element_count = max(element_count, w * h); } } } else { - scratch_buffer_size = full_rect.right * full_rect.bottom * sizeof(uint32_t); + element_count = full_rect.right * full_rect.bottom; } - scratch_buffer_size *= d3d12_resource_desc_get_depth(&resource->desc, miplevel_idx); + element_count *= d3d12_resource_desc_get_depth(&resource->desc, miplevel_idx); + scratch_buffer_size = element_count * format->byte_count; if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator, - scratch_buffer_size, sizeof(uint32_t), &scratch)) + scratch_buffer_size, 16, &scratch)) { - ERR("Failed to allocate scratch memory for UAV clear"); + ERR("Failed to allocate scratch memory for UAV clear.\n"); return; } - VK_CALL(vkCmdFillBuffer(list->vk_command_buffer, scratch.buffer, - scratch.offset, scratch_buffer_size, clear_value)); + pipeline = vkd3d_meta_get_clear_buffer_uav_pipeline(&list->device->meta_ops, true, false); + workgroup_size = vkd3d_meta_get_clear_buffer_uav_workgroup_size(); + + if (!vkd3d_create_vk_buffer_view(list->device, scratch.buffer, format, scratch.offset, scratch_buffer_size, &vk_buffer_view)) + { + ERR("Failed to create buffer view for UAV clear.\n"); + return; + } + + if (!(d3d12_command_allocator_add_buffer_view(list->allocator, vk_buffer_view))) + { + ERR("Failed to add buffer view.\n"); + VK_CALL(vkDestroyBufferView(list->device->vk_device, vk_buffer_view, NULL)); + return; + } + + memset(&write_set, 0, sizeof(write_set)); + write_set.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + write_set.descriptorCount = 1; + write_set.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; + write_set.pTexelBufferView = &vk_buffer_view; + + if (!(write_set.dstSet = d3d12_command_allocator_allocate_descriptor_set( + list->allocator, pipeline.vk_set_layout, VKD3D_DESCRIPTOR_POOL_TYPE_STATIC))) + { + ERR("Failed to allocate descriptor set for UAV clear.\n"); + return; + } + + VK_CALL(vkUpdateDescriptorSets(list->device->vk_device, 1, &write_set, 0, NULL)); + + VK_CALL(vkCmdBindPipeline(list->vk_command_buffer, + VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.vk_pipeline)); + + VK_CALL(vkCmdBindDescriptorSets(list->vk_command_buffer, + VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.vk_pipeline_layout, + 0, 1, &write_set.dstSet, 0, NULL)); + + clear_args.clear_color = *clear_value; + clear_args.offset.x = 0; + clear_args.offset.y = 0; + clear_args.extent.width = element_count; + clear_args.extent.height = 1; + + VK_CALL(vkCmdPushConstants(list->vk_command_buffer, + pipeline.vk_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(clear_args), &clear_args)); + + VK_CALL(vkCmdDispatch(list->vk_command_buffer, + vkd3d_compute_workgroup_count(element_count, workgroup_size.width), 1, 1)); /* Insert barrier to make the buffer clear visible, but also to make the * image safely accessible by the transfer stage. This fallback is so rare * that we should not pessimize regular UAV barriers. */ barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; barrier.pNext = NULL; - barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT; VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer, - VK_PIPELINE_STAGE_TRANSFER_BIT | vk_queue_shader_stages(list->vk_queue_flags), + vk_queue_shader_stages(list->vk_queue_flags), VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 1, &barrier, 0, NULL, 0, NULL)); @@ -8515,7 +8573,33 @@ static inline bool vkd3d_clear_uav_info_from_desc(struct vkd3d_clear_uav_info *a static void vkd3d_mask_uint_clear_color(uint32_t color[4], VkFormat vk_format) { - unsigned int i; + unsigned int component_count, i; + + switch (vk_format) + { + case VK_FORMAT_R8_UINT: + case VK_FORMAT_R16_UINT: + case VK_FORMAT_R32_UINT: + component_count = 1; + break; + + case VK_FORMAT_R8G8_UINT: + case VK_FORMAT_R16G16_UINT: + case VK_FORMAT_R32G32_UINT: + component_count = 2; + break; + + case VK_FORMAT_R32G32B32_UINT: + component_count = 3; + break; + + default: + component_count = 4; + break; + } + + for (i = component_count; i < 4; i++) + color[i] = 0x0; /* Need to mask the clear value, since apparently driver can saturate the clear value instead. */ switch (vk_format) @@ -8618,7 +8702,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(d3 * with the packed clear value and perform a buffer to image copy. */ if (color.uint32[0]) { - d3d12_command_list_clear_uav_with_copy(list, &d, resource_impl, &args, color.uint32[0], rect_count, rects); + d3d12_command_list_clear_uav_with_copy(list, &d, resource_impl, + &args, &color, uint_format, rect_count, rects); return; } } diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index e8145755..6003ed80 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -3351,7 +3351,7 @@ bool vkd3d_create_raw_r32ui_vk_buffer_view(struct d3d12_device *device, return vr == VK_SUCCESS; } -static bool vkd3d_create_vk_buffer_view(struct d3d12_device *device, +bool vkd3d_create_vk_buffer_view(struct d3d12_device *device, VkBuffer vk_buffer, const struct vkd3d_format *format, VkDeviceSize offset, VkDeviceSize range, VkBufferView *vk_view) { diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 903ff8ec..f2e3614f 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -1073,6 +1073,9 @@ void d3d12_desc_create_uav(vkd3d_cpu_descriptor_va_t descriptor, struct d3d12_de void d3d12_desc_create_sampler(vkd3d_cpu_descriptor_va_t sampler, struct d3d12_device *device, const D3D12_SAMPLER_DESC *desc); +bool vkd3d_create_vk_buffer_view(struct d3d12_device *device, + VkBuffer vk_buffer, const struct vkd3d_format *format, + VkDeviceSize offset, VkDeviceSize range, VkBufferView *vk_view); bool vkd3d_create_raw_buffer_view(struct d3d12_device *device, D3D12_GPU_VIRTUAL_ADDRESS gpu_address, VkBufferView *vk_buffer_view); HRESULT d3d12_create_static_sampler(struct d3d12_device *device,