vkd3d: Redirect push constants to their bind point stages.

Gives a massive boost on NVIDIA for some reason.
RADV defers push constant update, so ALL_STAGES doesn't have
that much of a perf hit.

~20% uplift in RE2, ~5% uplift in CP77 from some quick and dirty testing.
Seems to be heavily content dependent either way.

Also a bug fix, since we would clobber graphics push constants from
compute and vice versa if both graphics and compute used the same root
signature.

Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
This commit is contained in:
Hans-Kristian Arntzen 2021-02-25 19:24:10 +01:00
parent 3839f5e17c
commit 89fbe334df
4 changed files with 127 additions and 36 deletions

View File

@ -3101,6 +3101,17 @@ static void d3d12_command_list_invalidate_current_render_pass(struct d3d12_comma
d3d12_command_list_end_current_render_pass(list, false);
}
static void d3d12_command_list_invalidate_push_constants(struct vkd3d_pipeline_bindings *bindings)
{
if (bindings->root_signature->descriptor_table_count)
bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_DESCRIPTOR_TABLE_OFFSETS;
bindings->root_descriptor_dirty_mask =
bindings->root_signature->root_descriptor_raw_va_mask |
bindings->root_signature->root_descriptor_push_mask;
bindings->root_constant_dirty_mask = bindings->root_signature->root_constant_mask;
}
static void d3d12_command_list_invalidate_root_parameters(struct d3d12_command_list *list,
VkPipelineBindPoint bind_point, bool invalidate_descriptor_heaps)
{
@ -3116,13 +3127,7 @@ static void d3d12_command_list_invalidate_root_parameters(struct d3d12_command_l
if (bindings->static_sampler_set)
bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_STATIC_SAMPLER_SET;
if (bindings->root_signature->descriptor_table_count)
bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_DESCRIPTOR_TABLE_OFFSETS;
bindings->root_descriptor_dirty_mask =
bindings->root_signature->root_descriptor_raw_va_mask |
bindings->root_signature->root_descriptor_push_mask;
bindings->root_constant_dirty_mask = bindings->root_signature->root_constant_mask;
d3d12_command_list_invalidate_push_constants(bindings);
if (invalidate_descriptor_heaps)
{
@ -3860,6 +3865,7 @@ static void d3d12_command_list_reset_state(struct d3d12_command_list *list,
memset(list->pipeline_bindings, 0, sizeof(list->pipeline_bindings));
memset(list->descriptor_heaps, 0, sizeof(list->descriptor_heaps));
list->active_bind_point = VK_PIPELINE_BIND_POINT_MAX_ENUM;
list->state = NULL;
@ -4176,12 +4182,15 @@ static void d3d12_command_list_update_descriptor_table_offsets(struct d3d12_comm
}
/* Set descriptor offsets */
VK_CALL(vkCmdPushConstants(list->vk_command_buffer,
root_signature->vk_pipeline_layout,
root_signature->push_constant_range.stageFlags,
root_signature->descriptor_table_offset,
root_signature->descriptor_table_count * sizeof(uint32_t),
table_offsets));
if (bindings->layout.vk_push_stages)
{
VK_CALL(vkCmdPushConstants(list->vk_command_buffer,
bindings->layout.vk_pipeline_layout,
bindings->layout.vk_push_stages,
root_signature->descriptor_table_offset,
root_signature->descriptor_table_count * sizeof(uint32_t),
table_offsets));
}
bindings->dirty_flags &= ~VKD3D_PIPELINE_DIRTY_DESCRIPTOR_TABLE_OFFSETS;
}
@ -4236,7 +4245,6 @@ static void d3d12_command_list_update_descriptor_heaps(struct d3d12_command_list
VkPipelineBindPoint bind_point)
{
struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point];
const struct d3d12_root_signature *root_signature = bindings->root_signature;
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
while (bindings->descriptor_heap_dirty_mask)
@ -4246,7 +4254,7 @@ static void d3d12_command_list_update_descriptor_heaps(struct d3d12_command_list
if (list->descriptor_heaps[heap_index])
{
VK_CALL(vkCmdBindDescriptorSets(list->vk_command_buffer, bind_point,
root_signature->vk_pipeline_layout, heap_index, 1,
bindings->layout.vk_pipeline_layout, heap_index, 1,
&list->descriptor_heaps[heap_index], 0, NULL));
}
}
@ -4260,7 +4268,7 @@ static void d3d12_command_list_update_static_samplers(struct d3d12_command_list
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
VK_CALL(vkCmdBindDescriptorSets(list->vk_command_buffer, bind_point,
root_signature->vk_pipeline_layout,
bindings->layout.vk_pipeline_layout,
root_signature->sampler_descriptor_set,
1, &bindings->static_sampler_set, 0, NULL));
@ -4276,14 +4284,20 @@ static void d3d12_command_list_update_root_constants(struct d3d12_command_list *
const struct vkd3d_shader_root_constant *root_constant;
unsigned int root_parameter_index;
if (!bindings->layout.vk_push_stages)
{
bindings->root_constant_dirty_mask = 0;
return;
}
while (bindings->root_constant_dirty_mask)
{
root_parameter_index = vkd3d_bitmask_iter64(&bindings->root_constant_dirty_mask);
root_constant = root_signature_get_32bit_constants(root_signature, root_parameter_index);
VK_CALL(vkCmdPushConstants(list->vk_command_buffer,
root_signature->vk_pipeline_layout,
root_signature->push_constant_range.stageFlags,
bindings->layout.vk_pipeline_layout,
bindings->layout.vk_push_stages,
root_constant->constant_index * sizeof(uint32_t),
root_constant->constant_count * sizeof(uint32_t),
&bindings->root_constants[root_constant->constant_index]));
@ -4421,11 +4435,11 @@ static void d3d12_command_list_update_root_descriptors(struct d3d12_command_list
descriptor_write_count += 1;
}
else if (va_count)
else if (va_count && bindings->layout.vk_push_stages)
{
VK_CALL(vkCmdPushConstants(list->vk_command_buffer,
root_signature->vk_pipeline_layout,
root_signature->push_constant_range.stageFlags,
bindings->layout.vk_pipeline_layout,
bindings->layout.vk_push_stages,
0, va_count * sizeof(*root_parameter_data.root_descriptor_vas),
root_parameter_data.root_descriptor_vas));
}
@ -4438,13 +4452,13 @@ static void d3d12_command_list_update_root_descriptors(struct d3d12_command_list
VK_CALL(vkUpdateDescriptorSets(list->device->vk_device,
descriptor_write_count, descriptor_writes, 0, NULL));
VK_CALL(vkCmdBindDescriptorSets(list->vk_command_buffer, bind_point,
root_signature->vk_pipeline_layout, root_signature->root_descriptor_set,
bindings->layout.vk_pipeline_layout, root_signature->root_descriptor_set,
1, &descriptor_set, 0, NULL));
}
else
{
VK_CALL(vkCmdPushDescriptorSetKHR(list->vk_command_buffer, bind_point,
root_signature->vk_pipeline_layout, root_signature->root_descriptor_set,
bindings->layout.vk_pipeline_layout, root_signature->root_descriptor_set,
descriptor_write_count, descriptor_writes));
}
}
@ -4473,6 +4487,13 @@ static void d3d12_command_list_update_descriptors(struct d3d12_command_list *lis
}
else
{
if (list->active_bind_point != bind_point)
{
/* We might have clobbered push constants,
* invalidate all state which can affect push constants. */
d3d12_command_list_invalidate_push_constants(bindings);
}
if (bindings->root_descriptor_dirty_mask)
d3d12_command_list_update_root_descriptors(list, bind_point);
@ -4482,6 +4503,8 @@ static void d3d12_command_list_update_descriptors(struct d3d12_command_list *lis
if (bindings->dirty_flags & VKD3D_PIPELINE_DIRTY_DESCRIPTOR_TABLE_OFFSETS)
d3d12_command_list_update_descriptor_table_offsets(list, bind_point);
}
list->active_bind_point = bind_point;
}
static bool d3d12_command_list_update_compute_state(struct d3d12_command_list *list)
@ -6268,6 +6291,22 @@ static void d3d12_command_list_set_root_signature(struct d3d12_command_list *lis
bindings->root_signature = root_signature;
bindings->static_sampler_set = VK_NULL_HANDLE;
switch (bind_point)
{
case VK_PIPELINE_BIND_POINT_GRAPHICS:
bindings->layout = root_signature->graphics;
break;
case VK_PIPELINE_BIND_POINT_COMPUTE:
bindings->layout = root_signature->compute;
break;
default:
/* TODO, RT will be relevant here later somehow.
* It will get awkward since RayGen happens in compute on DXR. */
break;
}
if (root_signature && root_signature->vk_sampler_set)
bindings->static_sampler_set = root_signature->vk_sampler_set;

View File

@ -677,7 +677,7 @@ static HRESULT d3d12_state_object_compile_pipeline(struct d3d12_state_object *ob
/* FIXME: What if we have no global root signature? */
if (!global_signature)
return E_INVALIDARG;
pipeline_create_info.layout = global_signature->vk_pipeline_layout;
pipeline_create_info.layout = global_signature->raygen.vk_pipeline_layout;
pipeline_create_info.basePipelineHandle = VK_NULL_HANDLE;
pipeline_create_info.basePipelineIndex = -1;
pipeline_create_info.pGroups = data->groups;

View File

@ -67,7 +67,9 @@ static void d3d12_root_signature_cleanup(struct d3d12_root_signature *root_signa
vkd3d_sampler_state_free_descriptor_set(&device->sampler_state, device,
root_signature->vk_sampler_set, root_signature->vk_sampler_pool);
VK_CALL(vkDestroyPipelineLayout(device->vk_device, root_signature->vk_pipeline_layout, NULL));
VK_CALL(vkDestroyPipelineLayout(device->vk_device, root_signature->graphics.vk_pipeline_layout, NULL));
VK_CALL(vkDestroyPipelineLayout(device->vk_device, root_signature->compute.vk_pipeline_layout, NULL));
VK_CALL(vkDestroyPipelineLayout(device->vk_device, root_signature->raygen.vk_pipeline_layout, NULL));
VK_CALL(vkDestroyDescriptorSetLayout(device->vk_device, root_signature->vk_sampler_descriptor_layout, NULL));
VK_CALL(vkDestroyDescriptorSetLayout(device->vk_device, root_signature->vk_root_descriptor_layout, NULL));
@ -315,6 +317,24 @@ static HRESULT vkd3d_create_pipeline_layout(struct d3d12_device *device,
return S_OK;
}
static HRESULT vkd3d_create_pipeline_layout_for_stage_mask(struct d3d12_device *device,
unsigned int set_layout_count, const VkDescriptorSetLayout *set_layouts,
const VkPushConstantRange *push_constants,
VkShaderStageFlags stages,
struct d3d12_bind_point_layout *bind_point_layout)
{
VkPushConstantRange range;
/* Can just mask directly since STAGE_ALL and ALL_GRAPHICS are OR masks. */
range.stageFlags = push_constants->stageFlags & stages;
range.offset = push_constants->offset;
range.size = push_constants->size;
bind_point_layout->vk_push_stages = range.stageFlags;
return vkd3d_create_pipeline_layout(device, set_layout_count, set_layouts,
range.stageFlags ? 1 : 0, &range,
&bind_point_layout->vk_pipeline_layout);
}
struct d3d12_root_signature_info
{
uint32_t binding_count;
@ -475,6 +495,7 @@ static HRESULT d3d12_root_signature_init_push_constants(struct d3d12_root_signat
{
unsigned int i, j;
/* Stages set later. */
push_constant_range->stageFlags = 0;
push_constant_range->offset = 0;
push_constant_range->size = 0;
@ -972,8 +993,8 @@ static HRESULT d3d12_root_signature_init_global(struct d3d12_root_signature *roo
const struct vkd3d_bindless_state *bindless_state = &device->bindless_state;
VkDescriptorSetLayout set_layouts[VKD3D_MAX_DESCRIPTOR_SETS];
struct vkd3d_descriptor_set_context context;
unsigned int i, push_constant_range_count;
struct d3d12_root_signature_info info;
unsigned int i;
HRESULT hr;
memset(&context, 0, sizeof(context));
@ -1070,17 +1091,40 @@ static HRESULT d3d12_root_signature_init_global(struct d3d12_root_signature *roo
if (FAILED(hr = d3d12_root_signature_init_root_descriptor_tables(root_signature, desc, &info, &context)))
return hr;
push_constant_range_count = 0;
if (root_signature->flags & VKD3D_ROOT_SIGNATURE_USE_INLINE_UNIFORM_BLOCK)
root_signature->push_constant_range.stageFlags = 0;
if (root_signature->push_constant_range.size &&
!(root_signature->flags & VKD3D_ROOT_SIGNATURE_USE_INLINE_UNIFORM_BLOCK))
push_constant_range_count = 1;
/* If we need to use restricted stages in vkCmdPushConstants,
* we are unfortunately required to do it like this
* since stageFlags in vkCmdPushConstants must cover at least all stages in the layout.
*
* We can pick the appropriate layout to use in PSO creation.
* In set_root_signature we can bind the appropriate layout as well.
*
* For graphics we can generally rely on visibility mask, but not so for compute and raygen,
* since they use ALL visibility. */
if (FAILED(hr = vkd3d_create_pipeline_layout(device, context.vk_set, set_layouts,
push_constant_range_count, &root_signature->push_constant_range,
&root_signature->vk_pipeline_layout)))
if (FAILED(hr = vkd3d_create_pipeline_layout_for_stage_mask(
device, context.vk_set, set_layouts,
&root_signature->push_constant_range,
VK_SHADER_STAGE_ALL_GRAPHICS, &root_signature->graphics)))
return hr;
if (FAILED(hr = vkd3d_create_pipeline_layout_for_stage_mask(
device, context.vk_set, set_layouts,
&root_signature->push_constant_range,
VK_SHADER_STAGE_COMPUTE_BIT, &root_signature->compute)))
return hr;
if (d3d12_device_supports_ray_tracing_tier_1_0(device))
{
if (FAILED(hr = vkd3d_create_pipeline_layout_for_stage_mask(
device, context.vk_set, set_layouts,
&root_signature->push_constant_range,
VK_SHADER_STAGE_RAYGEN_BIT_KHR, &root_signature->raygen)))
return hr;
}
return S_OK;
}
@ -1939,7 +1983,7 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st
}
hr = vkd3d_create_compute_pipeline(device, &desc->cs, &shader_interface,
root_signature->vk_pipeline_layout, state->vk_pso_cache, &state->compute.vk_pipeline,
root_signature->compute.vk_pipeline_layout, state->vk_pso_cache, &state->compute.vk_pipeline,
&state->compute.meta);
if (FAILED(hr))
@ -3049,7 +3093,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s
(desc->primitive_topology_type != D3D12_PRIMITIVE_TOPOLOGY_TYPE_PATCH || graphics->patch_vertex_count != 0) &&
desc->primitive_topology_type != D3D12_PRIMITIVE_TOPOLOGY_TYPE_UNDEFINED;
graphics->pipeline_layout = root_signature->vk_pipeline_layout;
graphics->pipeline_layout = root_signature->graphics.vk_pipeline_layout;
graphics->pipeline = VK_NULL_HANDLE;
state->device = device;

View File

@ -1126,12 +1126,18 @@ enum vkd3d_root_signature_flag
};
/* ID3D12RootSignature */
struct d3d12_bind_point_layout
{
VkPipelineLayout vk_pipeline_layout;
VkShaderStageFlags vk_push_stages;
};
struct d3d12_root_signature
{
ID3D12RootSignature ID3D12RootSignature_iface;
LONG refcount;
VkPipelineLayout vk_pipeline_layout;
struct d3d12_bind_point_layout graphics, compute, raygen;
VkDescriptorSetLayout vk_sampler_descriptor_layout;
VkDescriptorSetLayout vk_root_descriptor_layout;
@ -1527,6 +1533,7 @@ struct vkd3d_root_descriptor_info
struct vkd3d_pipeline_bindings
{
const struct d3d12_root_signature *root_signature;
struct d3d12_bind_point_layout layout;
VkDescriptorSet static_sampler_set;
uint32_t dirty_flags; /* vkd3d_pipeline_dirty_flags */
@ -1689,6 +1696,7 @@ struct d3d12_command_list
VkRenderPass current_render_pass;
struct vkd3d_dynamic_state dynamic_state;
struct vkd3d_pipeline_bindings pipeline_bindings[VKD3D_PIPELINE_BIND_POINT_COUNT];
VkPipelineBindPoint active_bind_point;
VkDescriptorSet descriptor_heaps[VKD3D_MAX_BINDLESS_DESCRIPTOR_SETS];