vkd3d: Add meta shader to generate predicated draw/dispatch commands.
The idea is to use indirect draws and dispatches to implement predication. For predicated indirect draws, we'll use indirect count. Signed-off-by: Philip Rebohle <philip.rebohle@tu-dortmund.de>
This commit is contained in:
parent
e27a153a22
commit
82d9ba1ebf
|
@ -12,7 +12,9 @@ vkd3d_shaders =[
|
||||||
'shaders/cs_clear_uav_image_2d_uint.comp',
|
'shaders/cs_clear_uav_image_2d_uint.comp',
|
||||||
'shaders/cs_clear_uav_image_3d_float.comp',
|
'shaders/cs_clear_uav_image_3d_float.comp',
|
||||||
'shaders/cs_clear_uav_image_3d_uint.comp',
|
'shaders/cs_clear_uav_image_3d_uint.comp',
|
||||||
|
'shaders/cs_predicate_command.comp',
|
||||||
'shaders/cs_resolve_binary_queries.comp',
|
'shaders/cs_resolve_binary_queries.comp',
|
||||||
|
'shaders/cs_resolve_predicate.comp',
|
||||||
|
|
||||||
'shaders/fs_copy_image_float.frag',
|
'shaders/fs_copy_image_float.frag',
|
||||||
|
|
||||||
|
|
|
@ -1103,6 +1103,99 @@ void vkd3d_query_ops_cleanup(struct vkd3d_query_ops *meta_query_ops,
|
||||||
VK_CALL(vkDestroyPipeline(device->vk_device, meta_query_ops->vk_resolve_binary_pipeline, NULL));
|
VK_CALL(vkDestroyPipeline(device->vk_device, meta_query_ops->vk_resolve_binary_pipeline, NULL));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HRESULT vkd3d_predicate_ops_init(struct vkd3d_predicate_ops *meta_predicate_ops,
|
||||||
|
struct d3d12_device *device)
|
||||||
|
{
|
||||||
|
VkPushConstantRange push_constant_range;
|
||||||
|
VkSpecializationInfo spec_info;
|
||||||
|
VkResult vr;
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
static const struct spec_data
|
||||||
|
{
|
||||||
|
uint32_t arg_count;
|
||||||
|
VkBool32 arg_indirect;
|
||||||
|
}
|
||||||
|
spec_data[] =
|
||||||
|
{
|
||||||
|
{ 4, VK_FALSE }, /* VKD3D_PREDICATE_OP_DRAW */
|
||||||
|
{ 5, VK_FALSE }, /* VKD3D_PREDICATE_OP_DRAW_INDEXED */
|
||||||
|
{ 1, VK_FALSE }, /* VKD3D_PREDICATE_OP_DRAW_INDIRECT */
|
||||||
|
{ 1, VK_TRUE }, /* VKD3D_PREDICATE_OP_DRAW_INDIRECT_COUNT */
|
||||||
|
{ 3, VK_FALSE }, /* VKD3D_PREDICATE_OP_DISPATCH */
|
||||||
|
{ 3, VK_TRUE }, /* VKD3D_PREDICATE_OP_DISPATCH_INDIRECT */
|
||||||
|
};
|
||||||
|
|
||||||
|
static const VkSpecializationMapEntry spec_map[] =
|
||||||
|
{
|
||||||
|
{ 0, offsetof(struct spec_data, arg_count), sizeof(uint32_t) },
|
||||||
|
{ 1, offsetof(struct spec_data, arg_indirect), sizeof(VkBool32) },
|
||||||
|
};
|
||||||
|
|
||||||
|
memset(meta_predicate_ops, 0, sizeof(*meta_predicate_ops));
|
||||||
|
push_constant_range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||||
|
push_constant_range.offset = 0;
|
||||||
|
push_constant_range.size = sizeof(struct vkd3d_predicate_command_args);
|
||||||
|
|
||||||
|
if ((vr = vkd3d_meta_create_pipeline_layout(device, 0, NULL, 1,
|
||||||
|
&push_constant_range, &meta_predicate_ops->vk_command_pipeline_layout)) < 0)
|
||||||
|
return hresult_from_vk_result(vr);
|
||||||
|
|
||||||
|
push_constant_range.size = sizeof(struct vkd3d_predicate_resolve_args);
|
||||||
|
if ((vr = vkd3d_meta_create_pipeline_layout(device, 0, NULL, 1,
|
||||||
|
&push_constant_range, &meta_predicate_ops->vk_resolve_pipeline_layout)) < 0)
|
||||||
|
return hresult_from_vk_result(vr);
|
||||||
|
|
||||||
|
spec_info.mapEntryCount = ARRAY_SIZE(spec_map);
|
||||||
|
spec_info.pMapEntries = spec_map;
|
||||||
|
spec_info.dataSize = sizeof(struct spec_data);
|
||||||
|
|
||||||
|
for (i = 0; i < ARRAY_SIZE(spec_data); i++)
|
||||||
|
{
|
||||||
|
spec_info.pData = &spec_data[i];
|
||||||
|
|
||||||
|
if ((vr = vkd3d_meta_create_compute_pipeline(device, sizeof(cs_predicate_command), cs_predicate_command,
|
||||||
|
meta_predicate_ops->vk_command_pipeline_layout, &spec_info, &meta_predicate_ops->vk_command_pipelines[i])) < 0)
|
||||||
|
goto fail;
|
||||||
|
|
||||||
|
meta_predicate_ops->data_sizes[i] = spec_data[i].arg_count * sizeof(uint32_t);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((vr = vkd3d_meta_create_compute_pipeline(device, sizeof(cs_resolve_predicate), cs_resolve_predicate,
|
||||||
|
meta_predicate_ops->vk_resolve_pipeline_layout, &spec_info, &meta_predicate_ops->vk_resolve_pipeline)) < 0)
|
||||||
|
goto fail;
|
||||||
|
|
||||||
|
return S_OK;
|
||||||
|
|
||||||
|
fail:
|
||||||
|
vkd3d_predicate_ops_cleanup(meta_predicate_ops, device);
|
||||||
|
return hresult_from_vk_result(vr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void vkd3d_predicate_ops_cleanup(struct vkd3d_predicate_ops *meta_predicate_ops,
|
||||||
|
struct d3d12_device *device)
|
||||||
|
{
|
||||||
|
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
for (i = 0; i < VKD3D_PREDICATE_COMMAND_COUNT; i++)
|
||||||
|
VK_CALL(vkDestroyPipeline(device->vk_device, meta_predicate_ops->vk_command_pipelines[i], NULL));
|
||||||
|
VK_CALL(vkDestroyPipeline(device->vk_device, meta_predicate_ops->vk_resolve_pipeline, NULL));
|
||||||
|
|
||||||
|
VK_CALL(vkDestroyPipelineLayout(device->vk_device, meta_predicate_ops->vk_command_pipeline_layout, NULL));
|
||||||
|
VK_CALL(vkDestroyPipelineLayout(device->vk_device, meta_predicate_ops->vk_resolve_pipeline_layout, NULL));
|
||||||
|
}
|
||||||
|
|
||||||
|
void vkd3d_meta_get_predicate_pipeline(struct vkd3d_meta_ops *meta_ops,
|
||||||
|
enum vkd3d_predicate_command_type command_type, struct vkd3d_predicate_command_info *info)
|
||||||
|
{
|
||||||
|
const struct vkd3d_predicate_ops *predicate_ops = &meta_ops->predicate;
|
||||||
|
|
||||||
|
info->vk_pipeline_layout = predicate_ops->vk_command_pipeline_layout;
|
||||||
|
info->vk_pipeline = predicate_ops->vk_command_pipelines[command_type];
|
||||||
|
info->data_size = predicate_ops->data_sizes[command_type];
|
||||||
|
}
|
||||||
|
|
||||||
HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device *device)
|
HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device *device)
|
||||||
{
|
{
|
||||||
HRESULT hr;
|
HRESULT hr;
|
||||||
|
@ -1125,8 +1218,13 @@ HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device
|
||||||
if (FAILED(hr = vkd3d_query_ops_init(&meta_ops->query, device)))
|
if (FAILED(hr = vkd3d_query_ops_init(&meta_ops->query, device)))
|
||||||
goto fail_query_ops;
|
goto fail_query_ops;
|
||||||
|
|
||||||
|
if (FAILED(hr = vkd3d_predicate_ops_init(&meta_ops->predicate, device)))
|
||||||
|
goto fail_predicate_ops;
|
||||||
|
|
||||||
return S_OK;
|
return S_OK;
|
||||||
|
|
||||||
|
fail_predicate_ops:
|
||||||
|
vkd3d_query_ops_cleanup(&meta_ops->query, device);
|
||||||
fail_query_ops:
|
fail_query_ops:
|
||||||
vkd3d_swapchain_ops_cleanup(&meta_ops->swapchain, device);
|
vkd3d_swapchain_ops_cleanup(&meta_ops->swapchain, device);
|
||||||
fail_swapchain_ops:
|
fail_swapchain_ops:
|
||||||
|
@ -1141,6 +1239,7 @@ fail_common:
|
||||||
|
|
||||||
HRESULT vkd3d_meta_ops_cleanup(struct vkd3d_meta_ops *meta_ops, struct d3d12_device *device)
|
HRESULT vkd3d_meta_ops_cleanup(struct vkd3d_meta_ops *meta_ops, struct d3d12_device *device)
|
||||||
{
|
{
|
||||||
|
vkd3d_predicate_ops_cleanup(&meta_ops->predicate, device);
|
||||||
vkd3d_query_ops_cleanup(&meta_ops->query, device);
|
vkd3d_query_ops_cleanup(&meta_ops->query, device);
|
||||||
vkd3d_swapchain_ops_cleanup(&meta_ops->swapchain, device);
|
vkd3d_swapchain_ops_cleanup(&meta_ops->swapchain, device);
|
||||||
vkd3d_copy_image_ops_cleanup(&meta_ops->copy_image, device);
|
vkd3d_copy_image_ops_cleanup(&meta_ops->copy_image, device);
|
||||||
|
|
|
@ -0,0 +1,40 @@
|
||||||
|
#version 450
|
||||||
|
|
||||||
|
#extension GL_EXT_buffer_reference : require
|
||||||
|
|
||||||
|
layout(local_size_x = 1) in;
|
||||||
|
|
||||||
|
layout(constant_id = 0) const uint c_arg_count = 0;
|
||||||
|
layout(constant_id = 1) const bool c_arg_indirect = false;
|
||||||
|
|
||||||
|
layout(std430, buffer_reference, buffer_reference_align = 4)
|
||||||
|
readonly buffer predicate_t {
|
||||||
|
uint data;
|
||||||
|
};
|
||||||
|
|
||||||
|
layout(std430, buffer_reference, buffer_reference_align = 4)
|
||||||
|
readonly buffer src_args_t {
|
||||||
|
uint data[];
|
||||||
|
};
|
||||||
|
|
||||||
|
layout(std430, buffer_reference, buffer_reference_align = 4)
|
||||||
|
writeonly buffer dst_args_t {
|
||||||
|
uint data[];
|
||||||
|
};
|
||||||
|
|
||||||
|
layout(push_constant)
|
||||||
|
uniform u_info_t {
|
||||||
|
predicate_t predicate;
|
||||||
|
src_args_t src_args;
|
||||||
|
dst_args_t dst_args;
|
||||||
|
uint cmd_args[5];
|
||||||
|
};
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
bool do_exec = predicate.data != 0;
|
||||||
|
|
||||||
|
for (uint i = 0; i < c_arg_count; i++) {
|
||||||
|
uint arg = c_arg_indirect ? src_args.data[i] : cmd_args[i];
|
||||||
|
dst_args.data[i] = do_exec ? arg : 0u;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,26 @@
|
||||||
|
#version 450
|
||||||
|
|
||||||
|
#extension GL_EXT_buffer_reference : require
|
||||||
|
|
||||||
|
layout(local_size_x = 1) in;
|
||||||
|
|
||||||
|
layout(std430, buffer_reference, buffer_reference_align = 8)
|
||||||
|
readonly buffer src_predicate_t {
|
||||||
|
uvec2 data;
|
||||||
|
};
|
||||||
|
|
||||||
|
layout(std430, buffer_reference, buffer_reference_align = 4)
|
||||||
|
writeonly buffer dst_predicate_t {
|
||||||
|
uint data;
|
||||||
|
};
|
||||||
|
|
||||||
|
layout(push_constant)
|
||||||
|
uniform u_info_t {
|
||||||
|
src_predicate_t src;
|
||||||
|
dst_predicate_t dst;
|
||||||
|
bool invert;
|
||||||
|
};
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
dst.data = (all(equal(src.data, 0u.xx)) != invert) ? 0u : 1u;
|
||||||
|
}
|
|
@ -1818,6 +1818,61 @@ HRESULT vkd3d_query_ops_init(struct vkd3d_query_ops *meta_query_ops,
|
||||||
void vkd3d_query_ops_cleanup(struct vkd3d_query_ops *meta_query_ops,
|
void vkd3d_query_ops_cleanup(struct vkd3d_query_ops *meta_query_ops,
|
||||||
struct d3d12_device *device);
|
struct d3d12_device *device);
|
||||||
|
|
||||||
|
union vkd3d_predicate_command_direct_args
|
||||||
|
{
|
||||||
|
VkDispatchIndirectCommand dispatch;
|
||||||
|
VkDrawIndirectCommand draw;
|
||||||
|
VkDrawIndexedIndirectCommand draw_indexed;
|
||||||
|
uint32_t draw_count;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct vkd3d_predicate_command_args
|
||||||
|
{
|
||||||
|
VkDeviceAddress predicate_va;
|
||||||
|
VkDeviceAddress src_arg_va;
|
||||||
|
VkDeviceAddress dst_arg_va;
|
||||||
|
union vkd3d_predicate_command_direct_args args;
|
||||||
|
};
|
||||||
|
|
||||||
|
enum vkd3d_predicate_command_type
|
||||||
|
{
|
||||||
|
VKD3D_PREDICATE_COMMAND_DRAW,
|
||||||
|
VKD3D_PREDICATE_COMMAND_DRAW_INDEXED,
|
||||||
|
VKD3D_PREDICATE_COMMAND_DRAW_INDIRECT,
|
||||||
|
VKD3D_PREDICATE_COMMAND_DRAW_INDIRECT_COUNT,
|
||||||
|
VKD3D_PREDICATE_COMMAND_DISPATCH,
|
||||||
|
VKD3D_PREDICATE_COMMAND_DISPATCH_INDIRECT,
|
||||||
|
VKD3D_PREDICATE_COMMAND_COUNT
|
||||||
|
};
|
||||||
|
|
||||||
|
struct vkd3d_predicate_command_info
|
||||||
|
{
|
||||||
|
VkPipelineLayout vk_pipeline_layout;
|
||||||
|
VkPipeline vk_pipeline;
|
||||||
|
uint32_t data_size;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct vkd3d_predicate_resolve_args
|
||||||
|
{
|
||||||
|
VkDeviceAddress src_va;
|
||||||
|
VkDeviceAddress dst_va;
|
||||||
|
VkBool32 invert;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct vkd3d_predicate_ops
|
||||||
|
{
|
||||||
|
VkPipelineLayout vk_command_pipeline_layout;
|
||||||
|
VkPipelineLayout vk_resolve_pipeline_layout;
|
||||||
|
VkPipeline vk_command_pipelines[VKD3D_PREDICATE_COMMAND_COUNT];
|
||||||
|
VkPipeline vk_resolve_pipeline;
|
||||||
|
uint32_t data_sizes[VKD3D_PREDICATE_COMMAND_COUNT];
|
||||||
|
};
|
||||||
|
|
||||||
|
HRESULT vkd3d_predicate_ops_init(struct vkd3d_predicate_ops *meta_predicate_ops,
|
||||||
|
struct d3d12_device *device);
|
||||||
|
void vkd3d_predicate_ops_cleanup(struct vkd3d_predicate_ops *meta_predicate_ops,
|
||||||
|
struct d3d12_device *device);
|
||||||
|
|
||||||
struct vkd3d_meta_ops_common
|
struct vkd3d_meta_ops_common
|
||||||
{
|
{
|
||||||
VkShaderModule vk_module_fullscreen_vs;
|
VkShaderModule vk_module_fullscreen_vs;
|
||||||
|
@ -1832,6 +1887,7 @@ struct vkd3d_meta_ops
|
||||||
struct vkd3d_copy_image_ops copy_image;
|
struct vkd3d_copy_image_ops copy_image;
|
||||||
struct vkd3d_swapchain_ops swapchain;
|
struct vkd3d_swapchain_ops swapchain;
|
||||||
struct vkd3d_query_ops query;
|
struct vkd3d_query_ops query;
|
||||||
|
struct vkd3d_predicate_ops predicate;
|
||||||
};
|
};
|
||||||
|
|
||||||
HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device *device);
|
HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device *device);
|
||||||
|
@ -1857,6 +1913,9 @@ const struct vkd3d_format *vkd3d_meta_get_copy_image_attachment_format(struct vk
|
||||||
HRESULT vkd3d_meta_get_swapchain_pipeline(struct vkd3d_meta_ops *meta_ops,
|
HRESULT vkd3d_meta_get_swapchain_pipeline(struct vkd3d_meta_ops *meta_ops,
|
||||||
const struct vkd3d_swapchain_pipeline_key *key, struct vkd3d_swapchain_info *info);
|
const struct vkd3d_swapchain_pipeline_key *key, struct vkd3d_swapchain_info *info);
|
||||||
|
|
||||||
|
void vkd3d_meta_get_predicate_pipeline(struct vkd3d_meta_ops *meta_ops,
|
||||||
|
enum vkd3d_predicate_command_type command_type, struct vkd3d_predicate_command_info *info);
|
||||||
|
|
||||||
enum vkd3d_time_domain_flag
|
enum vkd3d_time_domain_flag
|
||||||
{
|
{
|
||||||
VKD3D_TIME_DOMAIN_DEVICE = 0x00000001u,
|
VKD3D_TIME_DOMAIN_DEVICE = 0x00000001u,
|
||||||
|
|
|
@ -41,7 +41,9 @@ enum vkd3d_meta_copy_mode
|
||||||
#include <cs_clear_uav_image_2d_uint.h>
|
#include <cs_clear_uav_image_2d_uint.h>
|
||||||
#include <cs_clear_uav_image_3d_float.h>
|
#include <cs_clear_uav_image_3d_float.h>
|
||||||
#include <cs_clear_uav_image_3d_uint.h>
|
#include <cs_clear_uav_image_3d_uint.h>
|
||||||
|
#include <cs_predicate_command.h>
|
||||||
#include <cs_resolve_binary_queries.h>
|
#include <cs_resolve_binary_queries.h>
|
||||||
|
#include <cs_resolve_predicate.h>
|
||||||
#include <vs_fullscreen_layer.h>
|
#include <vs_fullscreen_layer.h>
|
||||||
#include <vs_fullscreen.h>
|
#include <vs_fullscreen.h>
|
||||||
#include <gs_fullscreen.h>
|
#include <gs_fullscreen.h>
|
||||||
|
|
Loading…
Reference in New Issue