diff --git a/libs/vkd3d/meson.build b/libs/vkd3d/meson.build index 60c9d6e0..71283708 100644 --- a/libs/vkd3d/meson.build +++ b/libs/vkd3d/meson.build @@ -27,6 +27,7 @@ vkd3d_shaders =[ 'shaders/vs_swapchain_fullscreen.vert', 'shaders/fs_swapchain_fullscreen.frag', + 'shaders/cs_execute_indirect_patch.comp', ] vkd3d_src = [ diff --git a/libs/vkd3d/meta.c b/libs/vkd3d/meta.c index d28b6fa6..d7fd6116 100644 --- a/libs/vkd3d/meta.c +++ b/libs/vkd3d/meta.c @@ -1361,6 +1361,106 @@ void vkd3d_meta_get_predicate_pipeline(struct vkd3d_meta_ops *meta_ops, info->data_size = predicate_ops->data_sizes[command_type]; } +HRESULT vkd3d_execute_indirect_ops_init(struct vkd3d_execute_indirect_ops *meta_indirect_ops, + struct d3d12_device *device) +{ + VkPushConstantRange push_constant_range; + VkResult vr; + int rc; + + if ((rc = pthread_mutex_init(&meta_indirect_ops->mutex, NULL))) + return hresult_from_errno(rc); + + push_constant_range.offset = 0; + push_constant_range.size = sizeof(struct vkd3d_execute_indirect_args); + push_constant_range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + + if ((vr = vkd3d_meta_create_pipeline_layout(device, 0, NULL, 1, + &push_constant_range, &meta_indirect_ops->vk_pipeline_layout)) < 0) + { + pthread_mutex_destroy(&meta_indirect_ops->mutex); + return hresult_from_vk_result(vr); + } + + meta_indirect_ops->pipelines_count = 0; + meta_indirect_ops->pipelines_size = 0; + meta_indirect_ops->pipelines = NULL; + return S_OK; +} + +HRESULT vkd3d_meta_get_execute_indirect_pipeline(struct vkd3d_meta_ops *meta_ops, + uint32_t patch_command_count, struct vkd3d_execute_indirect_info *info) +{ + struct vkd3d_execute_indirect_ops *meta_indirect_ops = &meta_ops->execute_indirect; + VkSpecializationMapEntry map_entry; + VkSpecializationInfo spec; + HRESULT hr = S_OK; + VkResult vr; + size_t i; + int rc; + + if ((rc = pthread_mutex_lock(&meta_indirect_ops->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return hresult_from_errno(rc); + } + + for (i = 0; i < meta_indirect_ops->pipelines_count; i++) + { + if (meta_indirect_ops->pipelines[i].workgroup_size_x == patch_command_count) + { + info->vk_pipeline_layout = meta_indirect_ops->vk_pipeline_layout; + info->vk_pipeline = meta_indirect_ops->pipelines[i].vk_pipeline; + goto out; + } + } + + map_entry.constantID = 0; + map_entry.offset = 0; + map_entry.size = sizeof(patch_command_count); + + spec.pMapEntries = &map_entry; + spec.pData = &patch_command_count; + spec.mapEntryCount = 1; + spec.dataSize = sizeof(patch_command_count); + + vkd3d_array_reserve((void**)&meta_indirect_ops->pipelines, &meta_indirect_ops->pipelines_size, + meta_indirect_ops->pipelines_count + 1, sizeof(*meta_indirect_ops->pipelines)); + + meta_indirect_ops->pipelines[meta_indirect_ops->pipelines_count].workgroup_size_x = patch_command_count; + + vr = vkd3d_meta_create_compute_pipeline(meta_ops->device, + sizeof(cs_execute_indirect_patch), cs_execute_indirect_patch, + meta_indirect_ops->vk_pipeline_layout, &spec, + &meta_indirect_ops->pipelines[meta_indirect_ops->pipelines_count].vk_pipeline); + + if (vr) + { + hr = hresult_from_vk_result(vr); + goto out; + } + + info->vk_pipeline_layout = meta_indirect_ops->vk_pipeline_layout; + info->vk_pipeline = meta_indirect_ops->pipelines[meta_indirect_ops->pipelines_count].vk_pipeline; + meta_indirect_ops->pipelines_count++; + +out: + pthread_mutex_unlock(&meta_indirect_ops->mutex); + return hr; +} + +void vkd3d_execute_indirect_ops_cleanup(struct vkd3d_execute_indirect_ops *meta_indirect_ops, + struct d3d12_device *device) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + size_t i; + + for (i = 0; i < meta_indirect_ops->pipelines_count; i++) + VK_CALL(vkDestroyPipeline(device->vk_device, meta_indirect_ops->pipelines[i].vk_pipeline, NULL)); + VK_CALL(vkDestroyPipelineLayout(device->vk_device, meta_indirect_ops->vk_pipeline_layout, NULL)); + pthread_mutex_destroy(&meta_indirect_ops->mutex); +} + HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device *device) { HRESULT hr; @@ -1386,8 +1486,13 @@ HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device if (FAILED(hr = vkd3d_predicate_ops_init(&meta_ops->predicate, device))) goto fail_predicate_ops; + if (FAILED(hr = vkd3d_execute_indirect_ops_init(&meta_ops->execute_indirect, device))) + goto fail_execute_indirect_ops; + return S_OK; +fail_execute_indirect_ops: + vkd3d_predicate_ops_cleanup(&meta_ops->predicate, device); fail_predicate_ops: vkd3d_query_ops_cleanup(&meta_ops->query, device); fail_query_ops: @@ -1404,6 +1509,7 @@ fail_common: HRESULT vkd3d_meta_ops_cleanup(struct vkd3d_meta_ops *meta_ops, struct d3d12_device *device) { + vkd3d_execute_indirect_ops_cleanup(&meta_ops->execute_indirect, device); vkd3d_predicate_ops_cleanup(&meta_ops->predicate, device); vkd3d_query_ops_cleanup(&meta_ops->query, device); vkd3d_swapchain_ops_cleanup(&meta_ops->swapchain, device); diff --git a/libs/vkd3d/shaders/cs_execute_indirect_patch.comp b/libs/vkd3d/shaders/cs_execute_indirect_patch.comp new file mode 100644 index 00000000..43e96204 --- /dev/null +++ b/libs/vkd3d/shaders/cs_execute_indirect_patch.comp @@ -0,0 +1,76 @@ +#version 450 +#extension GL_EXT_buffer_reference : require +#extension GL_EXT_buffer_reference_uvec2 : require + +layout(local_size_x_id = 0) in; + +struct Command +{ + uint type; + uint src_offset; + uint dst_offset; +}; + +const int COMMAND_TYPE_COPY_U32 = 0; +const int COMMAND_TYPE_COPY_INDEX_TYPE = 1; +const int DXGI_FORMAT_R32_UINT = 0x2a; +const int VK_INDEX_TYPE_UINT16 = 0; +const int VK_INDEX_TYPE_UINT32 = 1; + +layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer Commands +{ + Command commands[]; +}; + +layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer SrcBuffer { + uint values[]; +}; + +layout(buffer_reference, std430, buffer_reference_align = 4) writeonly buffer DstBuffer { + uint values[]; +}; + +layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer IndirectCount { + uint count; +}; + +layout(buffer_reference, std430, buffer_reference_align = 4) writeonly buffer IndirectCountWrite { + uint count; +}; + +layout(push_constant) uniform Registers +{ + Commands commands_va; + SrcBuffer src_buffer_va; + DstBuffer dst_buffer_va; + uvec2 indirect_count_va; + IndirectCountWrite dst_indirect_count_va; + uint src_stride; + uint dst_stride; +}; + +void main() +{ + Command cmd = commands_va.commands[gl_LocalInvocationIndex]; + + uint draw_id = gl_WorkGroupID.x; + uint max_draws = ~0u; + if (any(notEqual(indirect_count_va, uvec2(0)))) + { + max_draws = IndirectCount(indirect_count_va).count; + if (gl_WorkGroupID.x == 0u) + dst_indirect_count_va.count = max_draws; + } + + if (draw_id < max_draws) + { + uint src_offset = src_stride * draw_id + cmd.src_offset; + uint dst_offset = dst_stride * draw_id + cmd.dst_offset; + + uint src_value = src_buffer_va.values[src_offset]; + if (cmd.type == COMMAND_TYPE_COPY_INDEX_TYPE) + src_value = src_value == DXGI_FORMAT_R32_UINT ? VK_INDEX_TYPE_UINT32 : VK_INDEX_TYPE_UINT16; + + dst_buffer_va.values[dst_offset] = src_value; + } +} diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index d37ff949..7d287fe5 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -2226,6 +2226,12 @@ HRESULT d3d12_command_queue_create(struct d3d12_device *device, const D3D12_COMMAND_QUEUE_DESC *desc, struct d3d12_command_queue **queue); void d3d12_command_queue_submit_stop(struct d3d12_command_queue *queue); +struct vkd3d_execute_indirect_info +{ + VkPipelineLayout vk_pipeline_layout; + VkPipeline vk_pipeline; +}; + /* ID3D12CommandSignature */ struct d3d12_command_signature { @@ -2656,6 +2662,37 @@ HRESULT vkd3d_predicate_ops_init(struct vkd3d_predicate_ops *meta_predicate_ops, void vkd3d_predicate_ops_cleanup(struct vkd3d_predicate_ops *meta_predicate_ops, struct d3d12_device *device); +struct vkd3d_execute_indirect_args +{ + VkDeviceAddress template_va; + VkDeviceAddress api_buffer_va; + VkDeviceAddress device_generated_commands_va; + VkDeviceAddress indirect_count_va; + VkDeviceAddress dst_indirect_count_va; + uint32_t api_buffer_word_stride; + uint32_t device_generated_commands_word_stride; +}; + +struct vkd3d_execute_indirect_pipeline +{ + VkPipeline vk_pipeline; + uint32_t workgroup_size_x; +}; + +struct vkd3d_execute_indirect_ops +{ + VkPipelineLayout vk_pipeline_layout; + struct vkd3d_execute_indirect_pipeline *pipelines; + size_t pipelines_count; + size_t pipelines_size; + pthread_mutex_t mutex; +}; + +HRESULT vkd3d_execute_indirect_ops_init(struct vkd3d_execute_indirect_ops *meta_indirect_ops, + struct d3d12_device *device); +void vkd3d_execute_indirect_ops_cleanup(struct vkd3d_execute_indirect_ops *meta_indirect_ops, + struct d3d12_device *device); + struct vkd3d_meta_ops_common { VkShaderModule vk_module_fullscreen_vs; @@ -2671,6 +2708,7 @@ struct vkd3d_meta_ops struct vkd3d_swapchain_ops swapchain; struct vkd3d_query_ops query; struct vkd3d_predicate_ops predicate; + struct vkd3d_execute_indirect_ops execute_indirect; }; HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device *device); @@ -2703,6 +2741,9 @@ bool vkd3d_meta_get_query_gather_pipeline(struct vkd3d_meta_ops *meta_ops, void vkd3d_meta_get_predicate_pipeline(struct vkd3d_meta_ops *meta_ops, enum vkd3d_predicate_command_type command_type, struct vkd3d_predicate_command_info *info); +HRESULT vkd3d_meta_get_execute_indirect_pipeline(struct vkd3d_meta_ops *meta_ops, + uint32_t patch_command_count, struct vkd3d_execute_indirect_info *info); + enum vkd3d_time_domain_flag { VKD3D_TIME_DOMAIN_DEVICE = 0x00000001u, diff --git a/libs/vkd3d/vkd3d_shaders.h b/libs/vkd3d/vkd3d_shaders.h index 9f96323c..9e5698d6 100644 --- a/libs/vkd3d/vkd3d_shaders.h +++ b/libs/vkd3d/vkd3d_shaders.h @@ -45,6 +45,7 @@ enum vkd3d_meta_copy_mode #include #include #include +#include #include #include #include