vkd3d: Attempt to reuse application indirect command buffer.

Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
This commit is contained in:
Hans-Kristian Arntzen 2022-06-10 14:35:21 +02:00
parent 538c3c1f19
commit d23f5f4343
1 changed files with 130 additions and 77 deletions

View File

@ -9466,6 +9466,7 @@ static void d3d12_command_list_execute_indirect_state_template(
struct d3d12_resource *count_buffer, UINT64 count_buffer_offset)
{
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
const VkPhysicalDeviceDeviceGeneratedCommandsPropertiesNV *props;
struct vkd3d_scratch_allocation preprocess_allocation;
struct vkd3d_scratch_allocation stream_allocation;
struct vkd3d_scratch_allocation count_allocation;
@ -9476,6 +9477,8 @@ static void d3d12_command_list_execute_indirect_state_template(
VkDeviceSize preprocess_size;
VkPipeline current_pipeline;
VkMemoryBarrier barrier;
bool require_ibo_update;
bool require_patch;
unsigned int i;
HRESULT hr;
@ -9491,6 +9494,8 @@ static void d3d12_command_list_execute_indirect_state_template(
current_pipeline = list->current_pipeline;
memset(&patch_args, 0, sizeof(patch_args));
patch_args.debug_tag = 0; /* Modify to non-zero value as desired when debugging. */
if (FAILED(hr = d3d12_command_signature_allocate_preprocess_memory_for_list(
list, signature, current_pipeline,
max_command_count, &preprocess_allocation, &preprocess_size)))
@ -9499,6 +9504,39 @@ static void d3d12_command_list_execute_indirect_state_template(
return;
}
/* If everything regarding alignment works out, we can just reuse the app indirect buffer instead. */
require_ibo_update = false;
require_patch = false;
/* Bind IBO. If we always update the IBO indirectly, do not validate the index buffer here.
* We can render fine even with a NULL IBO bound. */
for (i = 0; i < signature->desc.NumArgumentDescs; i++)
{
if (signature->desc.pArgumentDescs[i].Type == D3D12_INDIRECT_ARGUMENT_TYPE_INDEX_BUFFER_VIEW)
{
require_ibo_update = true;
break;
}
}
/* - Stride can mismatch, i.e. we need internal alignment of arguments.
* - Min required alignment on the indirect buffer itself might be too strict.
* - Min required alignment on count buffer might be too strict.
* - We require debugging.
* - Temporary: IBO type rewrite is required. TODO: Use index type LUT feature. */
props = &list->device->device_info.device_generated_commands_properties_nv;
if ((signature->state_template.stride != signature->desc.ByteStride && max_command_count > 1) ||
(arg_buffer_offset & (props->minIndirectCommandsBufferOffsetAlignment - 1)) ||
(count_buffer && (count_buffer_offset & (props->minSequencesCountBufferOffsetAlignment - 1))) ||
patch_args.debug_tag ||
require_ibo_update)
{
require_patch = true;
}
if (require_patch)
{
if (FAILED(hr = d3d12_command_signature_allocate_stream_memory_for_list(
list, signature, max_command_count, &stream_allocation)))
{
@ -9511,7 +9549,7 @@ static void d3d12_command_list_execute_indirect_state_template(
if (FAILED(hr = d3d12_command_allocator_allocate_scratch_memory(list->allocator,
VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE,
sizeof(uint32_t),
list->device->device_info.device_generated_commands_properties_nv.minSequencesCountBufferOffsetAlignment,
props->minSequencesCountBufferOffsetAlignment,
~0u, &count_allocation)))
{
WARN("Failed to allocate count memory.\n");
@ -9526,7 +9564,6 @@ static void d3d12_command_list_execute_indirect_state_template(
patch_args.dst_indirect_count_va = count_buffer ? count_allocation.va : 0;
patch_args.api_buffer_word_stride = signature->desc.ByteStride / sizeof(uint32_t);
patch_args.device_generated_commands_word_stride = signature->state_template.stride / sizeof(uint32_t);
patch_args.debug_tag = 0; /* Modify to non-zero value as desired when debugging. */
if (patch_args.debug_tag != 0)
{
@ -9581,6 +9618,7 @@ static void d3d12_command_list_execute_indirect_state_template(
0, 1, &barrier, 0, NULL, 0, NULL));
/* The barrier is deferred if we moved the dispatch to init command buffer. */
}
}
if (!d3d12_command_list_begin_render_pass(list))
{
@ -9588,13 +9626,7 @@ static void d3d12_command_list_execute_indirect_state_template(
return;
}
/* Bind IBO. If we always update the IBO indirectly, do not validate the index buffer here.
* We can render fine even with a NULL IBO bound. */
for (i = 0; i < signature->desc.NumArgumentDescs; i++)
if (signature->desc.pArgumentDescs[i].Type == D3D12_INDIRECT_ARGUMENT_TYPE_INDEX_BUFFER_VIEW)
break;
if (i == signature->desc.NumArgumentDescs &&
if (!require_ibo_update &&
signature->desc.pArgumentDescs[signature->desc.NumArgumentDescs - 1].Type ==
D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED &&
!d3d12_command_list_update_index_buffer(list))
@ -9617,18 +9649,39 @@ static void d3d12_command_list_execute_indirect_state_template(
generated.sequencesIndexOffset = 0;
if (count_buffer)
{
if (require_patch)
{
generated.sequencesCountBuffer = count_allocation.buffer;
generated.sequencesCountOffset = count_allocation.offset;
}
else
{
generated.sequencesCountBuffer = count_buffer->res.vk_buffer;
generated.sequencesCountOffset = count_buffer->mem.offset + count_buffer_offset;
}
}
else
{
generated.sequencesCountBuffer = VK_NULL_HANDLE;
generated.sequencesCountOffset = 0;
}
if (require_patch)
{
stream.buffer = stream_allocation.buffer;
stream.offset = stream_allocation.offset;
}
else
{
stream.buffer = arg_buffer->res.vk_buffer;
stream.offset = arg_buffer->mem.offset + arg_buffer_offset;
}
if (require_patch)
INFO("Template requires patching :(\n");
else
INFO("Template skips patching :)\n");
VK_CALL(vkCmdExecuteGeneratedCommandsNV(list->vk_command_buffer, VK_FALSE, &generated));