vkd3d: Add meta shader to generate predicated draw/dispatch commands.
The idea is to use indirect draws and dispatches to implement predication. For predicated indirect draws, we'll use indirect count. Signed-off-by: Philip Rebohle <philip.rebohle@tu-dortmund.de>
This commit is contained in:
parent
e27a153a22
commit
82d9ba1ebf
|
@ -12,7 +12,9 @@ vkd3d_shaders =[
|
|||
'shaders/cs_clear_uav_image_2d_uint.comp',
|
||||
'shaders/cs_clear_uav_image_3d_float.comp',
|
||||
'shaders/cs_clear_uav_image_3d_uint.comp',
|
||||
'shaders/cs_predicate_command.comp',
|
||||
'shaders/cs_resolve_binary_queries.comp',
|
||||
'shaders/cs_resolve_predicate.comp',
|
||||
|
||||
'shaders/fs_copy_image_float.frag',
|
||||
|
||||
|
|
|
@ -1103,6 +1103,99 @@ void vkd3d_query_ops_cleanup(struct vkd3d_query_ops *meta_query_ops,
|
|||
VK_CALL(vkDestroyPipeline(device->vk_device, meta_query_ops->vk_resolve_binary_pipeline, NULL));
|
||||
}
|
||||
|
||||
HRESULT vkd3d_predicate_ops_init(struct vkd3d_predicate_ops *meta_predicate_ops,
|
||||
struct d3d12_device *device)
|
||||
{
|
||||
VkPushConstantRange push_constant_range;
|
||||
VkSpecializationInfo spec_info;
|
||||
VkResult vr;
|
||||
size_t i;
|
||||
|
||||
static const struct spec_data
|
||||
{
|
||||
uint32_t arg_count;
|
||||
VkBool32 arg_indirect;
|
||||
}
|
||||
spec_data[] =
|
||||
{
|
||||
{ 4, VK_FALSE }, /* VKD3D_PREDICATE_OP_DRAW */
|
||||
{ 5, VK_FALSE }, /* VKD3D_PREDICATE_OP_DRAW_INDEXED */
|
||||
{ 1, VK_FALSE }, /* VKD3D_PREDICATE_OP_DRAW_INDIRECT */
|
||||
{ 1, VK_TRUE }, /* VKD3D_PREDICATE_OP_DRAW_INDIRECT_COUNT */
|
||||
{ 3, VK_FALSE }, /* VKD3D_PREDICATE_OP_DISPATCH */
|
||||
{ 3, VK_TRUE }, /* VKD3D_PREDICATE_OP_DISPATCH_INDIRECT */
|
||||
};
|
||||
|
||||
static const VkSpecializationMapEntry spec_map[] =
|
||||
{
|
||||
{ 0, offsetof(struct spec_data, arg_count), sizeof(uint32_t) },
|
||||
{ 1, offsetof(struct spec_data, arg_indirect), sizeof(VkBool32) },
|
||||
};
|
||||
|
||||
memset(meta_predicate_ops, 0, sizeof(*meta_predicate_ops));
|
||||
push_constant_range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
push_constant_range.offset = 0;
|
||||
push_constant_range.size = sizeof(struct vkd3d_predicate_command_args);
|
||||
|
||||
if ((vr = vkd3d_meta_create_pipeline_layout(device, 0, NULL, 1,
|
||||
&push_constant_range, &meta_predicate_ops->vk_command_pipeline_layout)) < 0)
|
||||
return hresult_from_vk_result(vr);
|
||||
|
||||
push_constant_range.size = sizeof(struct vkd3d_predicate_resolve_args);
|
||||
if ((vr = vkd3d_meta_create_pipeline_layout(device, 0, NULL, 1,
|
||||
&push_constant_range, &meta_predicate_ops->vk_resolve_pipeline_layout)) < 0)
|
||||
return hresult_from_vk_result(vr);
|
||||
|
||||
spec_info.mapEntryCount = ARRAY_SIZE(spec_map);
|
||||
spec_info.pMapEntries = spec_map;
|
||||
spec_info.dataSize = sizeof(struct spec_data);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(spec_data); i++)
|
||||
{
|
||||
spec_info.pData = &spec_data[i];
|
||||
|
||||
if ((vr = vkd3d_meta_create_compute_pipeline(device, sizeof(cs_predicate_command), cs_predicate_command,
|
||||
meta_predicate_ops->vk_command_pipeline_layout, &spec_info, &meta_predicate_ops->vk_command_pipelines[i])) < 0)
|
||||
goto fail;
|
||||
|
||||
meta_predicate_ops->data_sizes[i] = spec_data[i].arg_count * sizeof(uint32_t);
|
||||
}
|
||||
|
||||
if ((vr = vkd3d_meta_create_compute_pipeline(device, sizeof(cs_resolve_predicate), cs_resolve_predicate,
|
||||
meta_predicate_ops->vk_resolve_pipeline_layout, &spec_info, &meta_predicate_ops->vk_resolve_pipeline)) < 0)
|
||||
goto fail;
|
||||
|
||||
return S_OK;
|
||||
|
||||
fail:
|
||||
vkd3d_predicate_ops_cleanup(meta_predicate_ops, device);
|
||||
return hresult_from_vk_result(vr);
|
||||
}
|
||||
|
||||
void vkd3d_predicate_ops_cleanup(struct vkd3d_predicate_ops *meta_predicate_ops,
|
||||
struct d3d12_device *device)
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < VKD3D_PREDICATE_COMMAND_COUNT; i++)
|
||||
VK_CALL(vkDestroyPipeline(device->vk_device, meta_predicate_ops->vk_command_pipelines[i], NULL));
|
||||
VK_CALL(vkDestroyPipeline(device->vk_device, meta_predicate_ops->vk_resolve_pipeline, NULL));
|
||||
|
||||
VK_CALL(vkDestroyPipelineLayout(device->vk_device, meta_predicate_ops->vk_command_pipeline_layout, NULL));
|
||||
VK_CALL(vkDestroyPipelineLayout(device->vk_device, meta_predicate_ops->vk_resolve_pipeline_layout, NULL));
|
||||
}
|
||||
|
||||
void vkd3d_meta_get_predicate_pipeline(struct vkd3d_meta_ops *meta_ops,
|
||||
enum vkd3d_predicate_command_type command_type, struct vkd3d_predicate_command_info *info)
|
||||
{
|
||||
const struct vkd3d_predicate_ops *predicate_ops = &meta_ops->predicate;
|
||||
|
||||
info->vk_pipeline_layout = predicate_ops->vk_command_pipeline_layout;
|
||||
info->vk_pipeline = predicate_ops->vk_command_pipelines[command_type];
|
||||
info->data_size = predicate_ops->data_sizes[command_type];
|
||||
}
|
||||
|
||||
HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device *device)
|
||||
{
|
||||
HRESULT hr;
|
||||
|
@ -1125,8 +1218,13 @@ HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device
|
|||
if (FAILED(hr = vkd3d_query_ops_init(&meta_ops->query, device)))
|
||||
goto fail_query_ops;
|
||||
|
||||
if (FAILED(hr = vkd3d_predicate_ops_init(&meta_ops->predicate, device)))
|
||||
goto fail_predicate_ops;
|
||||
|
||||
return S_OK;
|
||||
|
||||
fail_predicate_ops:
|
||||
vkd3d_query_ops_cleanup(&meta_ops->query, device);
|
||||
fail_query_ops:
|
||||
vkd3d_swapchain_ops_cleanup(&meta_ops->swapchain, device);
|
||||
fail_swapchain_ops:
|
||||
|
@ -1141,6 +1239,7 @@ fail_common:
|
|||
|
||||
HRESULT vkd3d_meta_ops_cleanup(struct vkd3d_meta_ops *meta_ops, struct d3d12_device *device)
|
||||
{
|
||||
vkd3d_predicate_ops_cleanup(&meta_ops->predicate, device);
|
||||
vkd3d_query_ops_cleanup(&meta_ops->query, device);
|
||||
vkd3d_swapchain_ops_cleanup(&meta_ops->swapchain, device);
|
||||
vkd3d_copy_image_ops_cleanup(&meta_ops->copy_image, device);
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
#version 450
|
||||
|
||||
#extension GL_EXT_buffer_reference : require
|
||||
|
||||
layout(local_size_x = 1) in;
|
||||
|
||||
layout(constant_id = 0) const uint c_arg_count = 0;
|
||||
layout(constant_id = 1) const bool c_arg_indirect = false;
|
||||
|
||||
layout(std430, buffer_reference, buffer_reference_align = 4)
|
||||
readonly buffer predicate_t {
|
||||
uint data;
|
||||
};
|
||||
|
||||
layout(std430, buffer_reference, buffer_reference_align = 4)
|
||||
readonly buffer src_args_t {
|
||||
uint data[];
|
||||
};
|
||||
|
||||
layout(std430, buffer_reference, buffer_reference_align = 4)
|
||||
writeonly buffer dst_args_t {
|
||||
uint data[];
|
||||
};
|
||||
|
||||
layout(push_constant)
|
||||
uniform u_info_t {
|
||||
predicate_t predicate;
|
||||
src_args_t src_args;
|
||||
dst_args_t dst_args;
|
||||
uint cmd_args[5];
|
||||
};
|
||||
|
||||
void main() {
|
||||
bool do_exec = predicate.data != 0;
|
||||
|
||||
for (uint i = 0; i < c_arg_count; i++) {
|
||||
uint arg = c_arg_indirect ? src_args.data[i] : cmd_args[i];
|
||||
dst_args.data[i] = do_exec ? arg : 0u;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,26 @@
|
|||
#version 450
|
||||
|
||||
#extension GL_EXT_buffer_reference : require
|
||||
|
||||
layout(local_size_x = 1) in;
|
||||
|
||||
layout(std430, buffer_reference, buffer_reference_align = 8)
|
||||
readonly buffer src_predicate_t {
|
||||
uvec2 data;
|
||||
};
|
||||
|
||||
layout(std430, buffer_reference, buffer_reference_align = 4)
|
||||
writeonly buffer dst_predicate_t {
|
||||
uint data;
|
||||
};
|
||||
|
||||
layout(push_constant)
|
||||
uniform u_info_t {
|
||||
src_predicate_t src;
|
||||
dst_predicate_t dst;
|
||||
bool invert;
|
||||
};
|
||||
|
||||
void main() {
|
||||
dst.data = (all(equal(src.data, 0u.xx)) != invert) ? 0u : 1u;
|
||||
}
|
|
@ -1818,6 +1818,61 @@ HRESULT vkd3d_query_ops_init(struct vkd3d_query_ops *meta_query_ops,
|
|||
void vkd3d_query_ops_cleanup(struct vkd3d_query_ops *meta_query_ops,
|
||||
struct d3d12_device *device);
|
||||
|
||||
union vkd3d_predicate_command_direct_args
|
||||
{
|
||||
VkDispatchIndirectCommand dispatch;
|
||||
VkDrawIndirectCommand draw;
|
||||
VkDrawIndexedIndirectCommand draw_indexed;
|
||||
uint32_t draw_count;
|
||||
};
|
||||
|
||||
struct vkd3d_predicate_command_args
|
||||
{
|
||||
VkDeviceAddress predicate_va;
|
||||
VkDeviceAddress src_arg_va;
|
||||
VkDeviceAddress dst_arg_va;
|
||||
union vkd3d_predicate_command_direct_args args;
|
||||
};
|
||||
|
||||
enum vkd3d_predicate_command_type
|
||||
{
|
||||
VKD3D_PREDICATE_COMMAND_DRAW,
|
||||
VKD3D_PREDICATE_COMMAND_DRAW_INDEXED,
|
||||
VKD3D_PREDICATE_COMMAND_DRAW_INDIRECT,
|
||||
VKD3D_PREDICATE_COMMAND_DRAW_INDIRECT_COUNT,
|
||||
VKD3D_PREDICATE_COMMAND_DISPATCH,
|
||||
VKD3D_PREDICATE_COMMAND_DISPATCH_INDIRECT,
|
||||
VKD3D_PREDICATE_COMMAND_COUNT
|
||||
};
|
||||
|
||||
struct vkd3d_predicate_command_info
|
||||
{
|
||||
VkPipelineLayout vk_pipeline_layout;
|
||||
VkPipeline vk_pipeline;
|
||||
uint32_t data_size;
|
||||
};
|
||||
|
||||
struct vkd3d_predicate_resolve_args
|
||||
{
|
||||
VkDeviceAddress src_va;
|
||||
VkDeviceAddress dst_va;
|
||||
VkBool32 invert;
|
||||
};
|
||||
|
||||
struct vkd3d_predicate_ops
|
||||
{
|
||||
VkPipelineLayout vk_command_pipeline_layout;
|
||||
VkPipelineLayout vk_resolve_pipeline_layout;
|
||||
VkPipeline vk_command_pipelines[VKD3D_PREDICATE_COMMAND_COUNT];
|
||||
VkPipeline vk_resolve_pipeline;
|
||||
uint32_t data_sizes[VKD3D_PREDICATE_COMMAND_COUNT];
|
||||
};
|
||||
|
||||
HRESULT vkd3d_predicate_ops_init(struct vkd3d_predicate_ops *meta_predicate_ops,
|
||||
struct d3d12_device *device);
|
||||
void vkd3d_predicate_ops_cleanup(struct vkd3d_predicate_ops *meta_predicate_ops,
|
||||
struct d3d12_device *device);
|
||||
|
||||
struct vkd3d_meta_ops_common
|
||||
{
|
||||
VkShaderModule vk_module_fullscreen_vs;
|
||||
|
@ -1832,6 +1887,7 @@ struct vkd3d_meta_ops
|
|||
struct vkd3d_copy_image_ops copy_image;
|
||||
struct vkd3d_swapchain_ops swapchain;
|
||||
struct vkd3d_query_ops query;
|
||||
struct vkd3d_predicate_ops predicate;
|
||||
};
|
||||
|
||||
HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device *device);
|
||||
|
@ -1857,6 +1913,9 @@ const struct vkd3d_format *vkd3d_meta_get_copy_image_attachment_format(struct vk
|
|||
HRESULT vkd3d_meta_get_swapchain_pipeline(struct vkd3d_meta_ops *meta_ops,
|
||||
const struct vkd3d_swapchain_pipeline_key *key, struct vkd3d_swapchain_info *info);
|
||||
|
||||
void vkd3d_meta_get_predicate_pipeline(struct vkd3d_meta_ops *meta_ops,
|
||||
enum vkd3d_predicate_command_type command_type, struct vkd3d_predicate_command_info *info);
|
||||
|
||||
enum vkd3d_time_domain_flag
|
||||
{
|
||||
VKD3D_TIME_DOMAIN_DEVICE = 0x00000001u,
|
||||
|
|
|
@ -41,7 +41,9 @@ enum vkd3d_meta_copy_mode
|
|||
#include <cs_clear_uav_image_2d_uint.h>
|
||||
#include <cs_clear_uav_image_3d_float.h>
|
||||
#include <cs_clear_uav_image_3d_uint.h>
|
||||
#include <cs_predicate_command.h>
|
||||
#include <cs_resolve_binary_queries.h>
|
||||
#include <cs_resolve_predicate.h>
|
||||
#include <vs_fullscreen_layer.h>
|
||||
#include <vs_fullscreen.h>
|
||||
#include <gs_fullscreen.h>
|
||||
|
|
Loading…
Reference in New Issue