vkd3d: Add meta shader to generate predicated draw/dispatch commands.

The idea is to use indirect draws and dispatches to implement
predication. For predicated indirect draws, we'll use indirect
count.

Signed-off-by: Philip Rebohle <philip.rebohle@tu-dortmund.de>
This commit is contained in:
Philip Rebohle 2020-11-23 17:42:36 +01:00 committed by Hans-Kristian Arntzen
parent e27a153a22
commit 82d9ba1ebf
6 changed files with 228 additions and 0 deletions

View File

@ -12,7 +12,9 @@ vkd3d_shaders =[
'shaders/cs_clear_uav_image_2d_uint.comp',
'shaders/cs_clear_uav_image_3d_float.comp',
'shaders/cs_clear_uav_image_3d_uint.comp',
'shaders/cs_predicate_command.comp',
'shaders/cs_resolve_binary_queries.comp',
'shaders/cs_resolve_predicate.comp',
'shaders/fs_copy_image_float.frag',

View File

@ -1103,6 +1103,99 @@ void vkd3d_query_ops_cleanup(struct vkd3d_query_ops *meta_query_ops,
VK_CALL(vkDestroyPipeline(device->vk_device, meta_query_ops->vk_resolve_binary_pipeline, NULL));
}
HRESULT vkd3d_predicate_ops_init(struct vkd3d_predicate_ops *meta_predicate_ops,
struct d3d12_device *device)
{
VkPushConstantRange push_constant_range;
VkSpecializationInfo spec_info;
VkResult vr;
size_t i;
static const struct spec_data
{
uint32_t arg_count;
VkBool32 arg_indirect;
}
spec_data[] =
{
{ 4, VK_FALSE }, /* VKD3D_PREDICATE_OP_DRAW */
{ 5, VK_FALSE }, /* VKD3D_PREDICATE_OP_DRAW_INDEXED */
{ 1, VK_FALSE }, /* VKD3D_PREDICATE_OP_DRAW_INDIRECT */
{ 1, VK_TRUE }, /* VKD3D_PREDICATE_OP_DRAW_INDIRECT_COUNT */
{ 3, VK_FALSE }, /* VKD3D_PREDICATE_OP_DISPATCH */
{ 3, VK_TRUE }, /* VKD3D_PREDICATE_OP_DISPATCH_INDIRECT */
};
static const VkSpecializationMapEntry spec_map[] =
{
{ 0, offsetof(struct spec_data, arg_count), sizeof(uint32_t) },
{ 1, offsetof(struct spec_data, arg_indirect), sizeof(VkBool32) },
};
memset(meta_predicate_ops, 0, sizeof(*meta_predicate_ops));
push_constant_range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
push_constant_range.offset = 0;
push_constant_range.size = sizeof(struct vkd3d_predicate_command_args);
if ((vr = vkd3d_meta_create_pipeline_layout(device, 0, NULL, 1,
&push_constant_range, &meta_predicate_ops->vk_command_pipeline_layout)) < 0)
return hresult_from_vk_result(vr);
push_constant_range.size = sizeof(struct vkd3d_predicate_resolve_args);
if ((vr = vkd3d_meta_create_pipeline_layout(device, 0, NULL, 1,
&push_constant_range, &meta_predicate_ops->vk_resolve_pipeline_layout)) < 0)
return hresult_from_vk_result(vr);
spec_info.mapEntryCount = ARRAY_SIZE(spec_map);
spec_info.pMapEntries = spec_map;
spec_info.dataSize = sizeof(struct spec_data);
for (i = 0; i < ARRAY_SIZE(spec_data); i++)
{
spec_info.pData = &spec_data[i];
if ((vr = vkd3d_meta_create_compute_pipeline(device, sizeof(cs_predicate_command), cs_predicate_command,
meta_predicate_ops->vk_command_pipeline_layout, &spec_info, &meta_predicate_ops->vk_command_pipelines[i])) < 0)
goto fail;
meta_predicate_ops->data_sizes[i] = spec_data[i].arg_count * sizeof(uint32_t);
}
if ((vr = vkd3d_meta_create_compute_pipeline(device, sizeof(cs_resolve_predicate), cs_resolve_predicate,
meta_predicate_ops->vk_resolve_pipeline_layout, &spec_info, &meta_predicate_ops->vk_resolve_pipeline)) < 0)
goto fail;
return S_OK;
fail:
vkd3d_predicate_ops_cleanup(meta_predicate_ops, device);
return hresult_from_vk_result(vr);
}
void vkd3d_predicate_ops_cleanup(struct vkd3d_predicate_ops *meta_predicate_ops,
struct d3d12_device *device)
{
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
size_t i;
for (i = 0; i < VKD3D_PREDICATE_COMMAND_COUNT; i++)
VK_CALL(vkDestroyPipeline(device->vk_device, meta_predicate_ops->vk_command_pipelines[i], NULL));
VK_CALL(vkDestroyPipeline(device->vk_device, meta_predicate_ops->vk_resolve_pipeline, NULL));
VK_CALL(vkDestroyPipelineLayout(device->vk_device, meta_predicate_ops->vk_command_pipeline_layout, NULL));
VK_CALL(vkDestroyPipelineLayout(device->vk_device, meta_predicate_ops->vk_resolve_pipeline_layout, NULL));
}
void vkd3d_meta_get_predicate_pipeline(struct vkd3d_meta_ops *meta_ops,
enum vkd3d_predicate_command_type command_type, struct vkd3d_predicate_command_info *info)
{
const struct vkd3d_predicate_ops *predicate_ops = &meta_ops->predicate;
info->vk_pipeline_layout = predicate_ops->vk_command_pipeline_layout;
info->vk_pipeline = predicate_ops->vk_command_pipelines[command_type];
info->data_size = predicate_ops->data_sizes[command_type];
}
HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device *device)
{
HRESULT hr;
@ -1125,8 +1218,13 @@ HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device
if (FAILED(hr = vkd3d_query_ops_init(&meta_ops->query, device)))
goto fail_query_ops;
if (FAILED(hr = vkd3d_predicate_ops_init(&meta_ops->predicate, device)))
goto fail_predicate_ops;
return S_OK;
fail_predicate_ops:
vkd3d_query_ops_cleanup(&meta_ops->query, device);
fail_query_ops:
vkd3d_swapchain_ops_cleanup(&meta_ops->swapchain, device);
fail_swapchain_ops:
@ -1141,6 +1239,7 @@ fail_common:
HRESULT vkd3d_meta_ops_cleanup(struct vkd3d_meta_ops *meta_ops, struct d3d12_device *device)
{
vkd3d_predicate_ops_cleanup(&meta_ops->predicate, device);
vkd3d_query_ops_cleanup(&meta_ops->query, device);
vkd3d_swapchain_ops_cleanup(&meta_ops->swapchain, device);
vkd3d_copy_image_ops_cleanup(&meta_ops->copy_image, device);

View File

@ -0,0 +1,40 @@
#version 450
#extension GL_EXT_buffer_reference : require
layout(local_size_x = 1) in;
layout(constant_id = 0) const uint c_arg_count = 0;
layout(constant_id = 1) const bool c_arg_indirect = false;
layout(std430, buffer_reference, buffer_reference_align = 4)
readonly buffer predicate_t {
uint data;
};
layout(std430, buffer_reference, buffer_reference_align = 4)
readonly buffer src_args_t {
uint data[];
};
layout(std430, buffer_reference, buffer_reference_align = 4)
writeonly buffer dst_args_t {
uint data[];
};
layout(push_constant)
uniform u_info_t {
predicate_t predicate;
src_args_t src_args;
dst_args_t dst_args;
uint cmd_args[5];
};
void main() {
bool do_exec = predicate.data != 0;
for (uint i = 0; i < c_arg_count; i++) {
uint arg = c_arg_indirect ? src_args.data[i] : cmd_args[i];
dst_args.data[i] = do_exec ? arg : 0u;
}
}

View File

@ -0,0 +1,26 @@
#version 450
#extension GL_EXT_buffer_reference : require
layout(local_size_x = 1) in;
layout(std430, buffer_reference, buffer_reference_align = 8)
readonly buffer src_predicate_t {
uvec2 data;
};
layout(std430, buffer_reference, buffer_reference_align = 4)
writeonly buffer dst_predicate_t {
uint data;
};
layout(push_constant)
uniform u_info_t {
src_predicate_t src;
dst_predicate_t dst;
bool invert;
};
void main() {
dst.data = (all(equal(src.data, 0u.xx)) != invert) ? 0u : 1u;
}

View File

@ -1818,6 +1818,61 @@ HRESULT vkd3d_query_ops_init(struct vkd3d_query_ops *meta_query_ops,
void vkd3d_query_ops_cleanup(struct vkd3d_query_ops *meta_query_ops,
struct d3d12_device *device);
union vkd3d_predicate_command_direct_args
{
VkDispatchIndirectCommand dispatch;
VkDrawIndirectCommand draw;
VkDrawIndexedIndirectCommand draw_indexed;
uint32_t draw_count;
};
struct vkd3d_predicate_command_args
{
VkDeviceAddress predicate_va;
VkDeviceAddress src_arg_va;
VkDeviceAddress dst_arg_va;
union vkd3d_predicate_command_direct_args args;
};
enum vkd3d_predicate_command_type
{
VKD3D_PREDICATE_COMMAND_DRAW,
VKD3D_PREDICATE_COMMAND_DRAW_INDEXED,
VKD3D_PREDICATE_COMMAND_DRAW_INDIRECT,
VKD3D_PREDICATE_COMMAND_DRAW_INDIRECT_COUNT,
VKD3D_PREDICATE_COMMAND_DISPATCH,
VKD3D_PREDICATE_COMMAND_DISPATCH_INDIRECT,
VKD3D_PREDICATE_COMMAND_COUNT
};
struct vkd3d_predicate_command_info
{
VkPipelineLayout vk_pipeline_layout;
VkPipeline vk_pipeline;
uint32_t data_size;
};
struct vkd3d_predicate_resolve_args
{
VkDeviceAddress src_va;
VkDeviceAddress dst_va;
VkBool32 invert;
};
struct vkd3d_predicate_ops
{
VkPipelineLayout vk_command_pipeline_layout;
VkPipelineLayout vk_resolve_pipeline_layout;
VkPipeline vk_command_pipelines[VKD3D_PREDICATE_COMMAND_COUNT];
VkPipeline vk_resolve_pipeline;
uint32_t data_sizes[VKD3D_PREDICATE_COMMAND_COUNT];
};
HRESULT vkd3d_predicate_ops_init(struct vkd3d_predicate_ops *meta_predicate_ops,
struct d3d12_device *device);
void vkd3d_predicate_ops_cleanup(struct vkd3d_predicate_ops *meta_predicate_ops,
struct d3d12_device *device);
struct vkd3d_meta_ops_common
{
VkShaderModule vk_module_fullscreen_vs;
@ -1832,6 +1887,7 @@ struct vkd3d_meta_ops
struct vkd3d_copy_image_ops copy_image;
struct vkd3d_swapchain_ops swapchain;
struct vkd3d_query_ops query;
struct vkd3d_predicate_ops predicate;
};
HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device *device);
@ -1857,6 +1913,9 @@ const struct vkd3d_format *vkd3d_meta_get_copy_image_attachment_format(struct vk
HRESULT vkd3d_meta_get_swapchain_pipeline(struct vkd3d_meta_ops *meta_ops,
const struct vkd3d_swapchain_pipeline_key *key, struct vkd3d_swapchain_info *info);
void vkd3d_meta_get_predicate_pipeline(struct vkd3d_meta_ops *meta_ops,
enum vkd3d_predicate_command_type command_type, struct vkd3d_predicate_command_info *info);
enum vkd3d_time_domain_flag
{
VKD3D_TIME_DOMAIN_DEVICE = 0x00000001u,

View File

@ -41,7 +41,9 @@ enum vkd3d_meta_copy_mode
#include <cs_clear_uav_image_2d_uint.h>
#include <cs_clear_uav_image_3d_float.h>
#include <cs_clear_uav_image_3d_uint.h>
#include <cs_predicate_command.h>
#include <cs_resolve_binary_queries.h>
#include <cs_resolve_predicate.h>
#include <vs_fullscreen_layer.h>
#include <vs_fullscreen.h>
#include <gs_fullscreen.h>