vkd3d: Implement postbuild info queries.

Can only support a subset in Vulkan without extra heroics. The DXR API
lets you query things that you technically should know apriori in the
application. We might need to allocate some side-channel buffers on
demand, but let's defer that until actually needed ... :\

DXR is also very awkward in that we have a query which is resolved in
UNORDERED_ACCESS state instead of COPY_DEST state, so we'll have to
ping-pong through some barriers redundantly.

Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
This commit is contained in:
Hans-Kristian Arntzen 2021-02-24 16:23:34 +01:00
parent 4365f9962f
commit be9c376fde
4 changed files with 199 additions and 7 deletions

View File

@ -177,3 +177,167 @@ bool vkd3d_acceleration_structure_convert_inputs(
build_info->pGeometries = info->geometries;
return true;
}
static void vkd3d_acceleration_structure_end_barrier(struct d3d12_command_list *list)
{
/* We resolve the query in TRANSFER, but DXR expects UNORDERED_ACCESS. */
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
VkMemoryBarrier barrier;
barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
barrier.pNext = NULL;
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.dstAccessMask = 0;
VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer,
VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0,
1, &barrier, 0, NULL, 0, NULL));
}
static void vkd3d_acceleration_structure_write_postbuild_info(
struct d3d12_command_list *list,
const D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_DESC *desc,
VkDeviceSize desc_offset,
VkAccelerationStructureKHR vk_acceleration_structure)
{
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
const struct vkd3d_unique_resource *resource;
VkQueryPool vk_query_pool;
VkQueryType vk_query_type;
uint32_t vk_query_index;
VkDeviceSize stride;
uint32_t type_index;
VkBuffer vk_buffer;
uint32_t offset;
resource = vkd3d_va_map_deref(&list->device->memory_allocator.va_map, desc->DestBuffer);
if (!resource)
{
ERR("Invalid resource.\n");
return;
}
vk_buffer = resource->vk_buffer;
offset = desc->DestBuffer - resource->va;
offset += desc_offset;
if (desc->InfoType == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_COMPACTED_SIZE)
{
vk_query_type = VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR;
type_index = VKD3D_QUERY_TYPE_INDEX_RT_COMPACTED_SIZE;
stride = sizeof(uint64_t);
}
else if (desc->InfoType == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_SERIALIZATION)
{
vk_query_type = VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR;
type_index = VKD3D_QUERY_TYPE_INDEX_RT_SERIALIZE_SIZE;
stride = sizeof(uint64_t);
FIXME("NumBottomLevelPointers will always return 0.\n");
}
else
{
FIXME("Unsupported InfoType %u.\n", desc->InfoType);
/* TODO: CURRENT_SIZE is something we cannot query in Vulkan, so
* we'll need to keep around a buffer to handle this.
* For now, just clear to 0. */
VK_CALL(vkCmdFillBuffer(list->vk_command_buffer, vk_buffer, offset,
sizeof(uint64_t), 0));
return;
}
if (!d3d12_command_allocator_allocate_query_from_type_index(list->allocator,
type_index, &vk_query_pool, &vk_query_index))
{
ERR("Failed to allocate query.\n");
return;
}
d3d12_command_list_reset_query(list, vk_query_pool, vk_query_index);
VK_CALL(vkCmdWriteAccelerationStructuresPropertiesKHR(list->vk_command_buffer,
1, &vk_acceleration_structure, vk_query_type, vk_query_pool, vk_query_index));
VK_CALL(vkCmdCopyQueryPoolResults(list->vk_command_buffer,
vk_query_pool, vk_query_index, 1,
vk_buffer, offset, stride,
VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
if (desc->InfoType == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_SERIALIZATION)
{
/* TODO: We'll need some way to store these values for later use and copy them here instead. */
VK_CALL(vkCmdFillBuffer(list->vk_command_buffer, vk_buffer, offset + sizeof(uint64_t),
sizeof(uint64_t), 0));
}
}
void vkd3d_acceleration_structure_emit_postbuild_info(
struct d3d12_command_list *list,
const D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_DESC *desc,
uint32_t count,
const D3D12_GPU_VIRTUAL_ADDRESS *addresses)
{
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
VkAccelerationStructureKHR vk_acceleration_structure;
VkMemoryBarrier barrier;
VkDeviceSize stride;
uint32_t i;
barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
barrier.pNext = NULL;
barrier.srcAccessMask = 0;
barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
/* We resolve the query in TRANSFER, but DXR expects UNORDERED_ACCESS. */
VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
1, &barrier, 0, NULL, 0, NULL));
stride = desc->InfoType == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_SERIALIZATION ?
2 * sizeof(uint64_t) : sizeof(uint64_t);
for (i = 0; i < count; i++)
{
vk_acceleration_structure = vkd3d_va_map_place_acceleration_structure(
&list->device->memory_allocator.va_map, list->device, addresses[i]);
if (vk_acceleration_structure)
vkd3d_acceleration_structure_write_postbuild_info(list, desc, i * stride, vk_acceleration_structure);
else
ERR("Failed to query acceleration structure for VA 0x%"PRIx64".\n", addresses[i]);
}
vkd3d_acceleration_structure_end_barrier(list);
}
void vkd3d_acceleration_structure_emit_immediate_postbuild_info(
struct d3d12_command_list *list, uint32_t count,
const D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_DESC *desc,
VkAccelerationStructureKHR vk_acceleration_structure)
{
/* In D3D12 we are supposed to be able to emit without an explicit barrier,
* but we need to emit them for Vulkan. */
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
VkMemoryBarrier barrier;
uint32_t i;
barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
barrier.pNext = NULL;
barrier.srcAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR;
/* The query accesses STRUCTURE_READ_BIT in BUILD_BIT stage. */
barrier.dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR | VK_ACCESS_TRANSFER_WRITE_BIT;
/* Writing to the result buffer is supposed to happen in UNORDERED_ACCESS on DXR for
* some bizarre reason, so we have to satisfy a transfer barrier.
* Have to basically do a full stall to make this work ... */
VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR | VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
1, &barrier, 0, NULL, 0, NULL));
/* Could optimize a bit by batching more aggressively, but no idea if it's going to help in practice. */
for (i = 0; i < count; i++)
vkd3d_acceleration_structure_write_postbuild_info(list, &desc[i], 0, vk_acceleration_structure);
vkd3d_acceleration_structure_end_barrier(list);
}

View File

@ -1926,7 +1926,7 @@ static uint32_t d3d12_query_heap_type_to_type_index(D3D12_QUERY_HEAP_TYPE heap_t
}
}
static bool d3d12_command_allocator_allocate_query_from_type_index(
bool d3d12_command_allocator_allocate_query_from_type_index(
struct d3d12_command_allocator *allocator,
uint32_t type_index, VkQueryPool *query_pool, uint32_t *query_index)
{
@ -2575,9 +2575,6 @@ static void d3d12_command_list_emit_render_pass_transition(struct d3d12_command_
j, vk_image_barriers));
}
static bool d3d12_command_list_reset_query(struct d3d12_command_list *list,
VkQueryPool vk_pool, uint32_t index);
static inline bool d3d12_query_type_is_indexed(D3D12_QUERY_TYPE type)
{
return type >= D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0 &&
@ -3809,7 +3806,7 @@ static void d3d12_command_list_read_query_range(struct d3d12_command_list *list,
}
}
static bool d3d12_command_list_reset_query(struct d3d12_command_list *list,
bool d3d12_command_list_reset_query(struct d3d12_command_list *list,
VkQueryPool vk_pool, uint32_t index)
{
size_t pos;
@ -8144,7 +8141,6 @@ static void STDMETHODCALLTYPE d3d12_command_list_BuildRaytracingAccelerationStru
{
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
struct vkd3d_acceleration_structure_build_info build_info;
TRACE("iface %p, desc %p, num_postbuild_info_descs %u, postbuild_info_descs %p\n",
@ -8193,14 +8189,32 @@ static void STDMETHODCALLTYPE d3d12_command_list_BuildRaytracingAccelerationStru
&build_info.build_info, build_info.build_range_ptrs));
vkd3d_acceleration_structure_build_info_cleanup(&build_info);
if (num_postbuild_info_descs)
{
vkd3d_acceleration_structure_emit_immediate_postbuild_info(list,
num_postbuild_info_descs, postbuild_info_descs,
build_info.build_info.dstAccelerationStructure);
}
}
static void STDMETHODCALLTYPE d3d12_command_list_EmitRaytracingAccelerationStructurePostbuildInfo(d3d12_command_list_iface *iface,
const D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_DESC *desc, UINT num_acceleration_structures,
const D3D12_GPU_VIRTUAL_ADDRESS *src_data)
{
FIXME("iface %p, desc %p, num_acceleration_structures %u, src_data %p stub!\n",
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
TRACE("iface %p, desc %p, num_acceleration_structures %u, src_data %p\n",
iface, desc, num_acceleration_structures, src_data);
if (!d3d12_device_supports_ray_tracing_tier_1_0(list->device))
{
ERR("Acceleration structure is not supported. Calling this is invalid.\n");
return;
}
d3d12_command_list_end_current_render_pass(list, true);
vkd3d_acceleration_structure_emit_postbuild_info(list,
desc, num_acceleration_structures, src_data);
}
static void STDMETHODCALLTYPE d3d12_command_list_CopyRaytracingAccelerationStructure(d3d12_command_list_iface *iface,

View File

@ -1394,6 +1394,9 @@ struct d3d12_command_allocator
HRESULT d3d12_command_allocator_create(struct d3d12_device *device,
D3D12_COMMAND_LIST_TYPE type, struct d3d12_command_allocator **allocator);
bool d3d12_command_allocator_allocate_query_from_type_index(
struct d3d12_command_allocator *allocator,
uint32_t type_index, VkQueryPool *query_pool, uint32_t *query_index);
struct d3d12_command_allocator *unsafe_impl_from_ID3D12CommandAllocator(ID3D12CommandAllocator *iface);
enum vkd3d_pipeline_dirty_flag
@ -1620,6 +1623,8 @@ struct d3d12_command_list
HRESULT d3d12_command_list_create(struct d3d12_device *device,
UINT node_mask, D3D12_COMMAND_LIST_TYPE type, ID3D12CommandAllocator *allocator_iface,
ID3D12PipelineState *initial_pipeline_state, struct d3d12_command_list **list);
bool d3d12_command_list_reset_query(struct d3d12_command_list *list,
VkQueryPool vk_pool, uint32_t index);
struct vkd3d_queue
{
@ -2717,6 +2722,14 @@ void vkd3d_acceleration_structure_build_info_cleanup(
bool vkd3d_acceleration_structure_convert_inputs(
struct vkd3d_acceleration_structure_build_info *info,
const D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS *desc);
void vkd3d_acceleration_structure_emit_postbuild_info(
struct d3d12_command_list *list,
const D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_DESC *desc,
uint32_t count, const D3D12_GPU_VIRTUAL_ADDRESS *addresses);
void vkd3d_acceleration_structure_emit_immediate_postbuild_info(
struct d3d12_command_list *list, uint32_t count,
const D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_DESC *desc,
VkAccelerationStructureKHR vk_acceleration_structure);
#define VKD3D_VENDOR_ID_NVIDIA 0x10DE
#define VKD3D_VENDOR_ID_AMD 0x1002

View File

@ -211,6 +211,7 @@ VK_DEVICE_EXT_PFN(vkCreateAccelerationStructureKHR)
VK_DEVICE_EXT_PFN(vkDestroyAccelerationStructureKHR)
VK_DEVICE_EXT_PFN(vkGetAccelerationStructureDeviceAddressKHR)
VK_DEVICE_EXT_PFN(vkCmdBuildAccelerationStructuresKHR)
VK_DEVICE_EXT_PFN(vkCmdWriteAccelerationStructuresPropertiesKHR)
/* VK_KHR_fragment_shading_rate */
VK_INSTANCE_PFN(vkGetPhysicalDeviceFragmentShadingRatesKHR)