diff --git a/libs/vkd3d/meson.build b/libs/vkd3d/meson.build index eea562b4..45272def 100644 --- a/libs/vkd3d/meson.build +++ b/libs/vkd3d/meson.build @@ -12,7 +12,9 @@ vkd3d_shaders =[ 'shaders/cs_clear_uav_image_2d_uint.comp', 'shaders/cs_clear_uav_image_3d_float.comp', 'shaders/cs_clear_uav_image_3d_uint.comp', + 'shaders/cs_predicate_command.comp', 'shaders/cs_resolve_binary_queries.comp', + 'shaders/cs_resolve_predicate.comp', 'shaders/fs_copy_image_float.frag', diff --git a/libs/vkd3d/meta.c b/libs/vkd3d/meta.c index edc69d02..989ef2cc 100644 --- a/libs/vkd3d/meta.c +++ b/libs/vkd3d/meta.c @@ -1103,6 +1103,99 @@ void vkd3d_query_ops_cleanup(struct vkd3d_query_ops *meta_query_ops, VK_CALL(vkDestroyPipeline(device->vk_device, meta_query_ops->vk_resolve_binary_pipeline, NULL)); } +HRESULT vkd3d_predicate_ops_init(struct vkd3d_predicate_ops *meta_predicate_ops, + struct d3d12_device *device) +{ + VkPushConstantRange push_constant_range; + VkSpecializationInfo spec_info; + VkResult vr; + size_t i; + + static const struct spec_data + { + uint32_t arg_count; + VkBool32 arg_indirect; + } + spec_data[] = + { + { 4, VK_FALSE }, /* VKD3D_PREDICATE_OP_DRAW */ + { 5, VK_FALSE }, /* VKD3D_PREDICATE_OP_DRAW_INDEXED */ + { 1, VK_FALSE }, /* VKD3D_PREDICATE_OP_DRAW_INDIRECT */ + { 1, VK_TRUE }, /* VKD3D_PREDICATE_OP_DRAW_INDIRECT_COUNT */ + { 3, VK_FALSE }, /* VKD3D_PREDICATE_OP_DISPATCH */ + { 3, VK_TRUE }, /* VKD3D_PREDICATE_OP_DISPATCH_INDIRECT */ + }; + + static const VkSpecializationMapEntry spec_map[] = + { + { 0, offsetof(struct spec_data, arg_count), sizeof(uint32_t) }, + { 1, offsetof(struct spec_data, arg_indirect), sizeof(VkBool32) }, + }; + + memset(meta_predicate_ops, 0, sizeof(*meta_predicate_ops)); + push_constant_range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + push_constant_range.offset = 0; + push_constant_range.size = sizeof(struct vkd3d_predicate_command_args); + + if ((vr = vkd3d_meta_create_pipeline_layout(device, 0, NULL, 1, + &push_constant_range, &meta_predicate_ops->vk_command_pipeline_layout)) < 0) + return hresult_from_vk_result(vr); + + push_constant_range.size = sizeof(struct vkd3d_predicate_resolve_args); + if ((vr = vkd3d_meta_create_pipeline_layout(device, 0, NULL, 1, + &push_constant_range, &meta_predicate_ops->vk_resolve_pipeline_layout)) < 0) + return hresult_from_vk_result(vr); + + spec_info.mapEntryCount = ARRAY_SIZE(spec_map); + spec_info.pMapEntries = spec_map; + spec_info.dataSize = sizeof(struct spec_data); + + for (i = 0; i < ARRAY_SIZE(spec_data); i++) + { + spec_info.pData = &spec_data[i]; + + if ((vr = vkd3d_meta_create_compute_pipeline(device, sizeof(cs_predicate_command), cs_predicate_command, + meta_predicate_ops->vk_command_pipeline_layout, &spec_info, &meta_predicate_ops->vk_command_pipelines[i])) < 0) + goto fail; + + meta_predicate_ops->data_sizes[i] = spec_data[i].arg_count * sizeof(uint32_t); + } + + if ((vr = vkd3d_meta_create_compute_pipeline(device, sizeof(cs_resolve_predicate), cs_resolve_predicate, + meta_predicate_ops->vk_resolve_pipeline_layout, &spec_info, &meta_predicate_ops->vk_resolve_pipeline)) < 0) + goto fail; + + return S_OK; + +fail: + vkd3d_predicate_ops_cleanup(meta_predicate_ops, device); + return hresult_from_vk_result(vr); +} + +void vkd3d_predicate_ops_cleanup(struct vkd3d_predicate_ops *meta_predicate_ops, + struct d3d12_device *device) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + size_t i; + + for (i = 0; i < VKD3D_PREDICATE_COMMAND_COUNT; i++) + VK_CALL(vkDestroyPipeline(device->vk_device, meta_predicate_ops->vk_command_pipelines[i], NULL)); + VK_CALL(vkDestroyPipeline(device->vk_device, meta_predicate_ops->vk_resolve_pipeline, NULL)); + + VK_CALL(vkDestroyPipelineLayout(device->vk_device, meta_predicate_ops->vk_command_pipeline_layout, NULL)); + VK_CALL(vkDestroyPipelineLayout(device->vk_device, meta_predicate_ops->vk_resolve_pipeline_layout, NULL)); +} + +void vkd3d_meta_get_predicate_pipeline(struct vkd3d_meta_ops *meta_ops, + enum vkd3d_predicate_command_type command_type, struct vkd3d_predicate_command_info *info) +{ + const struct vkd3d_predicate_ops *predicate_ops = &meta_ops->predicate; + + info->vk_pipeline_layout = predicate_ops->vk_command_pipeline_layout; + info->vk_pipeline = predicate_ops->vk_command_pipelines[command_type]; + info->data_size = predicate_ops->data_sizes[command_type]; +} + HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device *device) { HRESULT hr; @@ -1125,8 +1218,13 @@ HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device if (FAILED(hr = vkd3d_query_ops_init(&meta_ops->query, device))) goto fail_query_ops; + if (FAILED(hr = vkd3d_predicate_ops_init(&meta_ops->predicate, device))) + goto fail_predicate_ops; + return S_OK; +fail_predicate_ops: + vkd3d_query_ops_cleanup(&meta_ops->query, device); fail_query_ops: vkd3d_swapchain_ops_cleanup(&meta_ops->swapchain, device); fail_swapchain_ops: @@ -1141,6 +1239,7 @@ fail_common: HRESULT vkd3d_meta_ops_cleanup(struct vkd3d_meta_ops *meta_ops, struct d3d12_device *device) { + vkd3d_predicate_ops_cleanup(&meta_ops->predicate, device); vkd3d_query_ops_cleanup(&meta_ops->query, device); vkd3d_swapchain_ops_cleanup(&meta_ops->swapchain, device); vkd3d_copy_image_ops_cleanup(&meta_ops->copy_image, device); diff --git a/libs/vkd3d/shaders/cs_predicate_command.comp b/libs/vkd3d/shaders/cs_predicate_command.comp new file mode 100644 index 00000000..7498f376 --- /dev/null +++ b/libs/vkd3d/shaders/cs_predicate_command.comp @@ -0,0 +1,40 @@ +#version 450 + +#extension GL_EXT_buffer_reference : require + +layout(local_size_x = 1) in; + +layout(constant_id = 0) const uint c_arg_count = 0; +layout(constant_id = 1) const bool c_arg_indirect = false; + +layout(std430, buffer_reference, buffer_reference_align = 4) +readonly buffer predicate_t { + uint data; +}; + +layout(std430, buffer_reference, buffer_reference_align = 4) +readonly buffer src_args_t { + uint data[]; +}; + +layout(std430, buffer_reference, buffer_reference_align = 4) +writeonly buffer dst_args_t { + uint data[]; +}; + +layout(push_constant) +uniform u_info_t { + predicate_t predicate; + src_args_t src_args; + dst_args_t dst_args; + uint cmd_args[5]; +}; + +void main() { + bool do_exec = predicate.data != 0; + + for (uint i = 0; i < c_arg_count; i++) { + uint arg = c_arg_indirect ? src_args.data[i] : cmd_args[i]; + dst_args.data[i] = do_exec ? arg : 0u; + } +} diff --git a/libs/vkd3d/shaders/cs_resolve_predicate.comp b/libs/vkd3d/shaders/cs_resolve_predicate.comp new file mode 100644 index 00000000..f13cf66c --- /dev/null +++ b/libs/vkd3d/shaders/cs_resolve_predicate.comp @@ -0,0 +1,26 @@ +#version 450 + +#extension GL_EXT_buffer_reference : require + +layout(local_size_x = 1) in; + +layout(std430, buffer_reference, buffer_reference_align = 8) +readonly buffer src_predicate_t { + uvec2 data; +}; + +layout(std430, buffer_reference, buffer_reference_align = 4) +writeonly buffer dst_predicate_t { + uint data; +}; + +layout(push_constant) +uniform u_info_t { + src_predicate_t src; + dst_predicate_t dst; + bool invert; +}; + +void main() { + dst.data = (all(equal(src.data, 0u.xx)) != invert) ? 0u : 1u; +} diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 51a98830..9c72b39e 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -1818,6 +1818,61 @@ HRESULT vkd3d_query_ops_init(struct vkd3d_query_ops *meta_query_ops, void vkd3d_query_ops_cleanup(struct vkd3d_query_ops *meta_query_ops, struct d3d12_device *device); +union vkd3d_predicate_command_direct_args +{ + VkDispatchIndirectCommand dispatch; + VkDrawIndirectCommand draw; + VkDrawIndexedIndirectCommand draw_indexed; + uint32_t draw_count; +}; + +struct vkd3d_predicate_command_args +{ + VkDeviceAddress predicate_va; + VkDeviceAddress src_arg_va; + VkDeviceAddress dst_arg_va; + union vkd3d_predicate_command_direct_args args; +}; + +enum vkd3d_predicate_command_type +{ + VKD3D_PREDICATE_COMMAND_DRAW, + VKD3D_PREDICATE_COMMAND_DRAW_INDEXED, + VKD3D_PREDICATE_COMMAND_DRAW_INDIRECT, + VKD3D_PREDICATE_COMMAND_DRAW_INDIRECT_COUNT, + VKD3D_PREDICATE_COMMAND_DISPATCH, + VKD3D_PREDICATE_COMMAND_DISPATCH_INDIRECT, + VKD3D_PREDICATE_COMMAND_COUNT +}; + +struct vkd3d_predicate_command_info +{ + VkPipelineLayout vk_pipeline_layout; + VkPipeline vk_pipeline; + uint32_t data_size; +}; + +struct vkd3d_predicate_resolve_args +{ + VkDeviceAddress src_va; + VkDeviceAddress dst_va; + VkBool32 invert; +}; + +struct vkd3d_predicate_ops +{ + VkPipelineLayout vk_command_pipeline_layout; + VkPipelineLayout vk_resolve_pipeline_layout; + VkPipeline vk_command_pipelines[VKD3D_PREDICATE_COMMAND_COUNT]; + VkPipeline vk_resolve_pipeline; + uint32_t data_sizes[VKD3D_PREDICATE_COMMAND_COUNT]; +}; + +HRESULT vkd3d_predicate_ops_init(struct vkd3d_predicate_ops *meta_predicate_ops, + struct d3d12_device *device); +void vkd3d_predicate_ops_cleanup(struct vkd3d_predicate_ops *meta_predicate_ops, + struct d3d12_device *device); + struct vkd3d_meta_ops_common { VkShaderModule vk_module_fullscreen_vs; @@ -1832,6 +1887,7 @@ struct vkd3d_meta_ops struct vkd3d_copy_image_ops copy_image; struct vkd3d_swapchain_ops swapchain; struct vkd3d_query_ops query; + struct vkd3d_predicate_ops predicate; }; HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device *device); @@ -1857,6 +1913,9 @@ const struct vkd3d_format *vkd3d_meta_get_copy_image_attachment_format(struct vk HRESULT vkd3d_meta_get_swapchain_pipeline(struct vkd3d_meta_ops *meta_ops, const struct vkd3d_swapchain_pipeline_key *key, struct vkd3d_swapchain_info *info); +void vkd3d_meta_get_predicate_pipeline(struct vkd3d_meta_ops *meta_ops, + enum vkd3d_predicate_command_type command_type, struct vkd3d_predicate_command_info *info); + enum vkd3d_time_domain_flag { VKD3D_TIME_DOMAIN_DEVICE = 0x00000001u, diff --git a/libs/vkd3d/vkd3d_shaders.h b/libs/vkd3d/vkd3d_shaders.h index bef0e24c..422f4cf6 100644 --- a/libs/vkd3d/vkd3d_shaders.h +++ b/libs/vkd3d/vkd3d_shaders.h @@ -41,7 +41,9 @@ enum vkd3d_meta_copy_mode #include #include #include +#include #include +#include #include #include #include