meta: Add ExecuteIndirect patch meta shader.

Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
This commit is contained in:
Hans-Kristian Arntzen 2021-11-24 17:37:07 +01:00
parent eac42dca4b
commit 17b771dc0c
5 changed files with 225 additions and 0 deletions

View File

@ -27,6 +27,7 @@ vkd3d_shaders =[
'shaders/vs_swapchain_fullscreen.vert',
'shaders/fs_swapchain_fullscreen.frag',
'shaders/cs_execute_indirect_patch.comp',
]
vkd3d_src = [

View File

@ -1361,6 +1361,106 @@ void vkd3d_meta_get_predicate_pipeline(struct vkd3d_meta_ops *meta_ops,
info->data_size = predicate_ops->data_sizes[command_type];
}
HRESULT vkd3d_execute_indirect_ops_init(struct vkd3d_execute_indirect_ops *meta_indirect_ops,
struct d3d12_device *device)
{
VkPushConstantRange push_constant_range;
VkResult vr;
int rc;
if ((rc = pthread_mutex_init(&meta_indirect_ops->mutex, NULL)))
return hresult_from_errno(rc);
push_constant_range.offset = 0;
push_constant_range.size = sizeof(struct vkd3d_execute_indirect_args);
push_constant_range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
if ((vr = vkd3d_meta_create_pipeline_layout(device, 0, NULL, 1,
&push_constant_range, &meta_indirect_ops->vk_pipeline_layout)) < 0)
{
pthread_mutex_destroy(&meta_indirect_ops->mutex);
return hresult_from_vk_result(vr);
}
meta_indirect_ops->pipelines_count = 0;
meta_indirect_ops->pipelines_size = 0;
meta_indirect_ops->pipelines = NULL;
return S_OK;
}
HRESULT vkd3d_meta_get_execute_indirect_pipeline(struct vkd3d_meta_ops *meta_ops,
uint32_t patch_command_count, struct vkd3d_execute_indirect_info *info)
{
struct vkd3d_execute_indirect_ops *meta_indirect_ops = &meta_ops->execute_indirect;
VkSpecializationMapEntry map_entry;
VkSpecializationInfo spec;
HRESULT hr = S_OK;
VkResult vr;
size_t i;
int rc;
if ((rc = pthread_mutex_lock(&meta_indirect_ops->mutex)))
{
ERR("Failed to lock mutex, error %d.\n", rc);
return hresult_from_errno(rc);
}
for (i = 0; i < meta_indirect_ops->pipelines_count; i++)
{
if (meta_indirect_ops->pipelines[i].workgroup_size_x == patch_command_count)
{
info->vk_pipeline_layout = meta_indirect_ops->vk_pipeline_layout;
info->vk_pipeline = meta_indirect_ops->pipelines[i].vk_pipeline;
goto out;
}
}
map_entry.constantID = 0;
map_entry.offset = 0;
map_entry.size = sizeof(patch_command_count);
spec.pMapEntries = &map_entry;
spec.pData = &patch_command_count;
spec.mapEntryCount = 1;
spec.dataSize = sizeof(patch_command_count);
vkd3d_array_reserve((void**)&meta_indirect_ops->pipelines, &meta_indirect_ops->pipelines_size,
meta_indirect_ops->pipelines_count + 1, sizeof(*meta_indirect_ops->pipelines));
meta_indirect_ops->pipelines[meta_indirect_ops->pipelines_count].workgroup_size_x = patch_command_count;
vr = vkd3d_meta_create_compute_pipeline(meta_ops->device,
sizeof(cs_execute_indirect_patch), cs_execute_indirect_patch,
meta_indirect_ops->vk_pipeline_layout, &spec,
&meta_indirect_ops->pipelines[meta_indirect_ops->pipelines_count].vk_pipeline);
if (vr)
{
hr = hresult_from_vk_result(vr);
goto out;
}
info->vk_pipeline_layout = meta_indirect_ops->vk_pipeline_layout;
info->vk_pipeline = meta_indirect_ops->pipelines[meta_indirect_ops->pipelines_count].vk_pipeline;
meta_indirect_ops->pipelines_count++;
out:
pthread_mutex_unlock(&meta_indirect_ops->mutex);
return hr;
}
void vkd3d_execute_indirect_ops_cleanup(struct vkd3d_execute_indirect_ops *meta_indirect_ops,
struct d3d12_device *device)
{
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
size_t i;
for (i = 0; i < meta_indirect_ops->pipelines_count; i++)
VK_CALL(vkDestroyPipeline(device->vk_device, meta_indirect_ops->pipelines[i].vk_pipeline, NULL));
VK_CALL(vkDestroyPipelineLayout(device->vk_device, meta_indirect_ops->vk_pipeline_layout, NULL));
pthread_mutex_destroy(&meta_indirect_ops->mutex);
}
HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device *device)
{
HRESULT hr;
@ -1386,8 +1486,13 @@ HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device
if (FAILED(hr = vkd3d_predicate_ops_init(&meta_ops->predicate, device)))
goto fail_predicate_ops;
if (FAILED(hr = vkd3d_execute_indirect_ops_init(&meta_ops->execute_indirect, device)))
goto fail_execute_indirect_ops;
return S_OK;
fail_execute_indirect_ops:
vkd3d_predicate_ops_cleanup(&meta_ops->predicate, device);
fail_predicate_ops:
vkd3d_query_ops_cleanup(&meta_ops->query, device);
fail_query_ops:
@ -1404,6 +1509,7 @@ fail_common:
HRESULT vkd3d_meta_ops_cleanup(struct vkd3d_meta_ops *meta_ops, struct d3d12_device *device)
{
vkd3d_execute_indirect_ops_cleanup(&meta_ops->execute_indirect, device);
vkd3d_predicate_ops_cleanup(&meta_ops->predicate, device);
vkd3d_query_ops_cleanup(&meta_ops->query, device);
vkd3d_swapchain_ops_cleanup(&meta_ops->swapchain, device);

View File

@ -0,0 +1,76 @@
#version 450
#extension GL_EXT_buffer_reference : require
#extension GL_EXT_buffer_reference_uvec2 : require
layout(local_size_x_id = 0) in;
struct Command
{
uint type;
uint src_offset;
uint dst_offset;
};
const int COMMAND_TYPE_COPY_U32 = 0;
const int COMMAND_TYPE_COPY_INDEX_TYPE = 1;
const int DXGI_FORMAT_R32_UINT = 0x2a;
const int VK_INDEX_TYPE_UINT16 = 0;
const int VK_INDEX_TYPE_UINT32 = 1;
layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer Commands
{
Command commands[];
};
layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer SrcBuffer {
uint values[];
};
layout(buffer_reference, std430, buffer_reference_align = 4) writeonly buffer DstBuffer {
uint values[];
};
layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer IndirectCount {
uint count;
};
layout(buffer_reference, std430, buffer_reference_align = 4) writeonly buffer IndirectCountWrite {
uint count;
};
layout(push_constant) uniform Registers
{
Commands commands_va;
SrcBuffer src_buffer_va;
DstBuffer dst_buffer_va;
uvec2 indirect_count_va;
IndirectCountWrite dst_indirect_count_va;
uint src_stride;
uint dst_stride;
};
void main()
{
Command cmd = commands_va.commands[gl_LocalInvocationIndex];
uint draw_id = gl_WorkGroupID.x;
uint max_draws = ~0u;
if (any(notEqual(indirect_count_va, uvec2(0))))
{
max_draws = IndirectCount(indirect_count_va).count;
if (gl_WorkGroupID.x == 0u)
dst_indirect_count_va.count = max_draws;
}
if (draw_id < max_draws)
{
uint src_offset = src_stride * draw_id + cmd.src_offset;
uint dst_offset = dst_stride * draw_id + cmd.dst_offset;
uint src_value = src_buffer_va.values[src_offset];
if (cmd.type == COMMAND_TYPE_COPY_INDEX_TYPE)
src_value = src_value == DXGI_FORMAT_R32_UINT ? VK_INDEX_TYPE_UINT32 : VK_INDEX_TYPE_UINT16;
dst_buffer_va.values[dst_offset] = src_value;
}
}

View File

@ -2226,6 +2226,12 @@ HRESULT d3d12_command_queue_create(struct d3d12_device *device,
const D3D12_COMMAND_QUEUE_DESC *desc, struct d3d12_command_queue **queue);
void d3d12_command_queue_submit_stop(struct d3d12_command_queue *queue);
struct vkd3d_execute_indirect_info
{
VkPipelineLayout vk_pipeline_layout;
VkPipeline vk_pipeline;
};
/* ID3D12CommandSignature */
struct d3d12_command_signature
{
@ -2656,6 +2662,37 @@ HRESULT vkd3d_predicate_ops_init(struct vkd3d_predicate_ops *meta_predicate_ops,
void vkd3d_predicate_ops_cleanup(struct vkd3d_predicate_ops *meta_predicate_ops,
struct d3d12_device *device);
struct vkd3d_execute_indirect_args
{
VkDeviceAddress template_va;
VkDeviceAddress api_buffer_va;
VkDeviceAddress device_generated_commands_va;
VkDeviceAddress indirect_count_va;
VkDeviceAddress dst_indirect_count_va;
uint32_t api_buffer_word_stride;
uint32_t device_generated_commands_word_stride;
};
struct vkd3d_execute_indirect_pipeline
{
VkPipeline vk_pipeline;
uint32_t workgroup_size_x;
};
struct vkd3d_execute_indirect_ops
{
VkPipelineLayout vk_pipeline_layout;
struct vkd3d_execute_indirect_pipeline *pipelines;
size_t pipelines_count;
size_t pipelines_size;
pthread_mutex_t mutex;
};
HRESULT vkd3d_execute_indirect_ops_init(struct vkd3d_execute_indirect_ops *meta_indirect_ops,
struct d3d12_device *device);
void vkd3d_execute_indirect_ops_cleanup(struct vkd3d_execute_indirect_ops *meta_indirect_ops,
struct d3d12_device *device);
struct vkd3d_meta_ops_common
{
VkShaderModule vk_module_fullscreen_vs;
@ -2671,6 +2708,7 @@ struct vkd3d_meta_ops
struct vkd3d_swapchain_ops swapchain;
struct vkd3d_query_ops query;
struct vkd3d_predicate_ops predicate;
struct vkd3d_execute_indirect_ops execute_indirect;
};
HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device *device);
@ -2703,6 +2741,9 @@ bool vkd3d_meta_get_query_gather_pipeline(struct vkd3d_meta_ops *meta_ops,
void vkd3d_meta_get_predicate_pipeline(struct vkd3d_meta_ops *meta_ops,
enum vkd3d_predicate_command_type command_type, struct vkd3d_predicate_command_info *info);
HRESULT vkd3d_meta_get_execute_indirect_pipeline(struct vkd3d_meta_ops *meta_ops,
uint32_t patch_command_count, struct vkd3d_execute_indirect_info *info);
enum vkd3d_time_domain_flag
{
VKD3D_TIME_DOMAIN_DEVICE = 0x00000001u,

View File

@ -45,6 +45,7 @@ enum vkd3d_meta_copy_mode
#include <cs_resolve_binary_queries.h>
#include <cs_resolve_predicate.h>
#include <cs_resolve_query.h>
#include <cs_execute_indirect_patch.h>
#include <vs_fullscreen_layer.h>
#include <vs_fullscreen.h>
#include <gs_fullscreen.h>