meta: Add ExecuteIndirect patch meta shader.
Currently we are translating the index type. This will be changed in a follow up commit where we move over to index LUT. Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
This commit is contained in:
parent
619a54810d
commit
f975f09bb1
|
@ -27,6 +27,7 @@ vkd3d_shaders =[
|
|||
|
||||
'shaders/vs_swapchain_fullscreen.vert',
|
||||
'shaders/fs_swapchain_fullscreen.frag',
|
||||
'shaders/cs_execute_indirect_patch.comp',
|
||||
]
|
||||
|
||||
vkd3d_src = [
|
||||
|
|
|
@ -1217,6 +1217,106 @@ void vkd3d_meta_get_predicate_pipeline(struct vkd3d_meta_ops *meta_ops,
|
|||
info->data_size = predicate_ops->data_sizes[command_type];
|
||||
}
|
||||
|
||||
HRESULT vkd3d_execute_indirect_ops_init(struct vkd3d_execute_indirect_ops *meta_indirect_ops,
|
||||
struct d3d12_device *device)
|
||||
{
|
||||
VkPushConstantRange push_constant_range;
|
||||
VkResult vr;
|
||||
int rc;
|
||||
|
||||
if ((rc = pthread_mutex_init(&meta_indirect_ops->mutex, NULL)))
|
||||
return hresult_from_errno(rc);
|
||||
|
||||
push_constant_range.offset = 0;
|
||||
push_constant_range.size = sizeof(struct vkd3d_execute_indirect_args);
|
||||
push_constant_range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
|
||||
if ((vr = vkd3d_meta_create_pipeline_layout(device, 0, NULL, 1,
|
||||
&push_constant_range, &meta_indirect_ops->vk_pipeline_layout)) < 0)
|
||||
{
|
||||
pthread_mutex_destroy(&meta_indirect_ops->mutex);
|
||||
return hresult_from_vk_result(vr);
|
||||
}
|
||||
|
||||
meta_indirect_ops->pipelines_count = 0;
|
||||
meta_indirect_ops->pipelines_size = 0;
|
||||
meta_indirect_ops->pipelines = NULL;
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
HRESULT vkd3d_meta_get_execute_indirect_pipeline(struct vkd3d_meta_ops *meta_ops,
|
||||
uint32_t patch_command_count, struct vkd3d_execute_indirect_info *info)
|
||||
{
|
||||
struct vkd3d_execute_indirect_ops *meta_indirect_ops = &meta_ops->execute_indirect;
|
||||
VkSpecializationMapEntry map_entry;
|
||||
VkSpecializationInfo spec;
|
||||
HRESULT hr = S_OK;
|
||||
VkResult vr;
|
||||
size_t i;
|
||||
int rc;
|
||||
|
||||
if ((rc = pthread_mutex_lock(&meta_indirect_ops->mutex)))
|
||||
{
|
||||
ERR("Failed to lock mutex, error %d.\n", rc);
|
||||
return hresult_from_errno(rc);
|
||||
}
|
||||
|
||||
for (i = 0; i < meta_indirect_ops->pipelines_count; i++)
|
||||
{
|
||||
if (meta_indirect_ops->pipelines[i].workgroup_size_x == patch_command_count)
|
||||
{
|
||||
info->vk_pipeline_layout = meta_indirect_ops->vk_pipeline_layout;
|
||||
info->vk_pipeline = meta_indirect_ops->pipelines[i].vk_pipeline;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
map_entry.constantID = 0;
|
||||
map_entry.offset = 0;
|
||||
map_entry.size = sizeof(patch_command_count);
|
||||
|
||||
spec.pMapEntries = &map_entry;
|
||||
spec.pData = &patch_command_count;
|
||||
spec.mapEntryCount = 1;
|
||||
spec.dataSize = sizeof(patch_command_count);
|
||||
|
||||
vkd3d_array_reserve((void**)&meta_indirect_ops->pipelines, &meta_indirect_ops->pipelines_size,
|
||||
meta_indirect_ops->pipelines_count + 1, sizeof(*meta_indirect_ops->pipelines));
|
||||
|
||||
meta_indirect_ops->pipelines[meta_indirect_ops->pipelines_count].workgroup_size_x = patch_command_count;
|
||||
|
||||
vr = vkd3d_meta_create_compute_pipeline(meta_ops->device,
|
||||
sizeof(cs_execute_indirect_patch), cs_execute_indirect_patch,
|
||||
meta_indirect_ops->vk_pipeline_layout, &spec,
|
||||
&meta_indirect_ops->pipelines[meta_indirect_ops->pipelines_count].vk_pipeline);
|
||||
|
||||
if (vr)
|
||||
{
|
||||
hr = hresult_from_vk_result(vr);
|
||||
goto out;
|
||||
}
|
||||
|
||||
info->vk_pipeline_layout = meta_indirect_ops->vk_pipeline_layout;
|
||||
info->vk_pipeline = meta_indirect_ops->pipelines[meta_indirect_ops->pipelines_count].vk_pipeline;
|
||||
meta_indirect_ops->pipelines_count++;
|
||||
|
||||
out:
|
||||
pthread_mutex_unlock(&meta_indirect_ops->mutex);
|
||||
return hr;
|
||||
}
|
||||
|
||||
void vkd3d_execute_indirect_ops_cleanup(struct vkd3d_execute_indirect_ops *meta_indirect_ops,
|
||||
struct d3d12_device *device)
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < meta_indirect_ops->pipelines_count; i++)
|
||||
VK_CALL(vkDestroyPipeline(device->vk_device, meta_indirect_ops->pipelines[i].vk_pipeline, NULL));
|
||||
VK_CALL(vkDestroyPipelineLayout(device->vk_device, meta_indirect_ops->vk_pipeline_layout, NULL));
|
||||
pthread_mutex_destroy(&meta_indirect_ops->mutex);
|
||||
}
|
||||
|
||||
HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device *device)
|
||||
{
|
||||
HRESULT hr;
|
||||
|
@ -1242,8 +1342,13 @@ HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device
|
|||
if (FAILED(hr = vkd3d_predicate_ops_init(&meta_ops->predicate, device)))
|
||||
goto fail_predicate_ops;
|
||||
|
||||
if (FAILED(hr = vkd3d_execute_indirect_ops_init(&meta_ops->execute_indirect, device)))
|
||||
goto fail_execute_indirect_ops;
|
||||
|
||||
return S_OK;
|
||||
|
||||
fail_execute_indirect_ops:
|
||||
vkd3d_predicate_ops_cleanup(&meta_ops->predicate, device);
|
||||
fail_predicate_ops:
|
||||
vkd3d_query_ops_cleanup(&meta_ops->query, device);
|
||||
fail_query_ops:
|
||||
|
@ -1260,6 +1365,7 @@ fail_common:
|
|||
|
||||
HRESULT vkd3d_meta_ops_cleanup(struct vkd3d_meta_ops *meta_ops, struct d3d12_device *device)
|
||||
{
|
||||
vkd3d_execute_indirect_ops_cleanup(&meta_ops->execute_indirect, device);
|
||||
vkd3d_predicate_ops_cleanup(&meta_ops->predicate, device);
|
||||
vkd3d_query_ops_cleanup(&meta_ops->query, device);
|
||||
vkd3d_swapchain_ops_cleanup(&meta_ops->swapchain, device);
|
||||
|
|
|
@ -0,0 +1,76 @@
|
|||
#version 450
|
||||
#extension GL_EXT_buffer_reference : require
|
||||
#extension GL_EXT_buffer_reference_uvec2 : require
|
||||
|
||||
layout(local_size_x_id = 0) in;
|
||||
|
||||
struct Command
|
||||
{
|
||||
uint type;
|
||||
uint src_offset;
|
||||
uint dst_offset;
|
||||
};
|
||||
|
||||
const int COMMAND_TYPE_COPY_U32 = 0;
|
||||
const int COMMAND_TYPE_COPY_INDEX_TYPE = 1;
|
||||
const int DXGI_FORMAT_R32_UINT = 0x2a;
|
||||
const int VK_INDEX_TYPE_UINT16 = 0;
|
||||
const int VK_INDEX_TYPE_UINT32 = 1;
|
||||
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer Commands
|
||||
{
|
||||
Command commands[];
|
||||
};
|
||||
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer SrcBuffer {
|
||||
uint values[];
|
||||
};
|
||||
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) writeonly buffer DstBuffer {
|
||||
uint values[];
|
||||
};
|
||||
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer IndirectCount {
|
||||
uint count;
|
||||
};
|
||||
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) writeonly buffer IndirectCountWrite {
|
||||
uint count;
|
||||
};
|
||||
|
||||
layout(push_constant) uniform Registers
|
||||
{
|
||||
Commands commands_va;
|
||||
SrcBuffer src_buffer_va;
|
||||
DstBuffer dst_buffer_va;
|
||||
uvec2 indirect_count_va;
|
||||
IndirectCountWrite dst_indirect_count_va;
|
||||
uint src_stride;
|
||||
uint dst_stride;
|
||||
};
|
||||
|
||||
void main()
|
||||
{
|
||||
Command cmd = commands_va.commands[gl_LocalInvocationIndex];
|
||||
|
||||
uint draw_id = gl_WorkGroupID.x;
|
||||
uint max_draws = ~0u;
|
||||
if (any(notEqual(indirect_count_va, uvec2(0))))
|
||||
{
|
||||
max_draws = IndirectCount(indirect_count_va).count;
|
||||
if (gl_WorkGroupID.x == 0u)
|
||||
dst_indirect_count_va.count = max_draws;
|
||||
}
|
||||
|
||||
if (draw_id < max_draws)
|
||||
{
|
||||
uint src_offset = src_stride * draw_id + cmd.src_offset;
|
||||
uint dst_offset = dst_stride * draw_id + cmd.dst_offset;
|
||||
|
||||
uint src_value = src_buffer_va.values[src_offset];
|
||||
if (cmd.type == COMMAND_TYPE_COPY_INDEX_TYPE)
|
||||
src_value = src_value == DXGI_FORMAT_R32_UINT ? VK_INDEX_TYPE_UINT32 : VK_INDEX_TYPE_UINT16;
|
||||
|
||||
dst_buffer_va.values[dst_offset] = src_value;
|
||||
}
|
||||
}
|
|
@ -2419,6 +2419,12 @@ HRESULT d3d12_command_queue_create(struct d3d12_device *device,
|
|||
const D3D12_COMMAND_QUEUE_DESC *desc, struct d3d12_command_queue **queue);
|
||||
void d3d12_command_queue_submit_stop(struct d3d12_command_queue *queue);
|
||||
|
||||
struct vkd3d_execute_indirect_info
|
||||
{
|
||||
VkPipelineLayout vk_pipeline_layout;
|
||||
VkPipeline vk_pipeline;
|
||||
};
|
||||
|
||||
/* ID3D12CommandSignature */
|
||||
struct d3d12_command_signature
|
||||
{
|
||||
|
@ -3007,6 +3013,37 @@ HRESULT vkd3d_predicate_ops_init(struct vkd3d_predicate_ops *meta_predicate_ops,
|
|||
void vkd3d_predicate_ops_cleanup(struct vkd3d_predicate_ops *meta_predicate_ops,
|
||||
struct d3d12_device *device);
|
||||
|
||||
struct vkd3d_execute_indirect_args
|
||||
{
|
||||
VkDeviceAddress template_va;
|
||||
VkDeviceAddress api_buffer_va;
|
||||
VkDeviceAddress device_generated_commands_va;
|
||||
VkDeviceAddress indirect_count_va;
|
||||
VkDeviceAddress dst_indirect_count_va;
|
||||
uint32_t api_buffer_word_stride;
|
||||
uint32_t device_generated_commands_word_stride;
|
||||
};
|
||||
|
||||
struct vkd3d_execute_indirect_pipeline
|
||||
{
|
||||
VkPipeline vk_pipeline;
|
||||
uint32_t workgroup_size_x;
|
||||
};
|
||||
|
||||
struct vkd3d_execute_indirect_ops
|
||||
{
|
||||
VkPipelineLayout vk_pipeline_layout;
|
||||
struct vkd3d_execute_indirect_pipeline *pipelines;
|
||||
size_t pipelines_count;
|
||||
size_t pipelines_size;
|
||||
pthread_mutex_t mutex;
|
||||
};
|
||||
|
||||
HRESULT vkd3d_execute_indirect_ops_init(struct vkd3d_execute_indirect_ops *meta_indirect_ops,
|
||||
struct d3d12_device *device);
|
||||
void vkd3d_execute_indirect_ops_cleanup(struct vkd3d_execute_indirect_ops *meta_indirect_ops,
|
||||
struct d3d12_device *device);
|
||||
|
||||
struct vkd3d_meta_ops_common
|
||||
{
|
||||
VkShaderModule vk_module_fullscreen_vs;
|
||||
|
@ -3022,6 +3059,7 @@ struct vkd3d_meta_ops
|
|||
struct vkd3d_swapchain_ops swapchain;
|
||||
struct vkd3d_query_ops query;
|
||||
struct vkd3d_predicate_ops predicate;
|
||||
struct vkd3d_execute_indirect_ops execute_indirect;
|
||||
};
|
||||
|
||||
HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device *device);
|
||||
|
@ -3054,6 +3092,9 @@ bool vkd3d_meta_get_query_gather_pipeline(struct vkd3d_meta_ops *meta_ops,
|
|||
void vkd3d_meta_get_predicate_pipeline(struct vkd3d_meta_ops *meta_ops,
|
||||
enum vkd3d_predicate_command_type command_type, struct vkd3d_predicate_command_info *info);
|
||||
|
||||
HRESULT vkd3d_meta_get_execute_indirect_pipeline(struct vkd3d_meta_ops *meta_ops,
|
||||
uint32_t patch_command_count, struct vkd3d_execute_indirect_info *info);
|
||||
|
||||
enum vkd3d_time_domain_flag
|
||||
{
|
||||
VKD3D_TIME_DOMAIN_DEVICE = 0x00000001u,
|
||||
|
|
|
@ -45,6 +45,7 @@ enum vkd3d_meta_copy_mode
|
|||
#include <cs_resolve_binary_queries.h>
|
||||
#include <cs_resolve_predicate.h>
|
||||
#include <cs_resolve_query.h>
|
||||
#include <cs_execute_indirect_patch.h>
|
||||
#include <vs_fullscreen_layer.h>
|
||||
#include <vs_fullscreen.h>
|
||||
#include <gs_fullscreen.h>
|
||||
|
|
Loading…
Reference in New Issue