vkd3d: Redirect push constants to their bind point stages.
Gives a massive boost on NVIDIA for some reason. RADV defers push constant update, so ALL_STAGES doesn't have that much of a perf hit. ~20% uplift in RE2, ~5% uplift in CP77 from some quick and dirty testing. Seems to be heavily content dependent either way. Also a bug fix, since we would clobber graphics push constants from compute and vice versa if both graphics and compute used the same root signature. Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
This commit is contained in:
parent
3839f5e17c
commit
89fbe334df
|
@ -3101,6 +3101,17 @@ static void d3d12_command_list_invalidate_current_render_pass(struct d3d12_comma
|
|||
d3d12_command_list_end_current_render_pass(list, false);
|
||||
}
|
||||
|
||||
static void d3d12_command_list_invalidate_push_constants(struct vkd3d_pipeline_bindings *bindings)
|
||||
{
|
||||
if (bindings->root_signature->descriptor_table_count)
|
||||
bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_DESCRIPTOR_TABLE_OFFSETS;
|
||||
|
||||
bindings->root_descriptor_dirty_mask =
|
||||
bindings->root_signature->root_descriptor_raw_va_mask |
|
||||
bindings->root_signature->root_descriptor_push_mask;
|
||||
bindings->root_constant_dirty_mask = bindings->root_signature->root_constant_mask;
|
||||
}
|
||||
|
||||
static void d3d12_command_list_invalidate_root_parameters(struct d3d12_command_list *list,
|
||||
VkPipelineBindPoint bind_point, bool invalidate_descriptor_heaps)
|
||||
{
|
||||
|
@ -3116,13 +3127,7 @@ static void d3d12_command_list_invalidate_root_parameters(struct d3d12_command_l
|
|||
if (bindings->static_sampler_set)
|
||||
bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_STATIC_SAMPLER_SET;
|
||||
|
||||
if (bindings->root_signature->descriptor_table_count)
|
||||
bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_DESCRIPTOR_TABLE_OFFSETS;
|
||||
|
||||
bindings->root_descriptor_dirty_mask =
|
||||
bindings->root_signature->root_descriptor_raw_va_mask |
|
||||
bindings->root_signature->root_descriptor_push_mask;
|
||||
bindings->root_constant_dirty_mask = bindings->root_signature->root_constant_mask;
|
||||
d3d12_command_list_invalidate_push_constants(bindings);
|
||||
|
||||
if (invalidate_descriptor_heaps)
|
||||
{
|
||||
|
@ -3860,6 +3865,7 @@ static void d3d12_command_list_reset_state(struct d3d12_command_list *list,
|
|||
|
||||
memset(list->pipeline_bindings, 0, sizeof(list->pipeline_bindings));
|
||||
memset(list->descriptor_heaps, 0, sizeof(list->descriptor_heaps));
|
||||
list->active_bind_point = VK_PIPELINE_BIND_POINT_MAX_ENUM;
|
||||
|
||||
list->state = NULL;
|
||||
|
||||
|
@ -4176,12 +4182,15 @@ static void d3d12_command_list_update_descriptor_table_offsets(struct d3d12_comm
|
|||
}
|
||||
|
||||
/* Set descriptor offsets */
|
||||
VK_CALL(vkCmdPushConstants(list->vk_command_buffer,
|
||||
root_signature->vk_pipeline_layout,
|
||||
root_signature->push_constant_range.stageFlags,
|
||||
root_signature->descriptor_table_offset,
|
||||
root_signature->descriptor_table_count * sizeof(uint32_t),
|
||||
table_offsets));
|
||||
if (bindings->layout.vk_push_stages)
|
||||
{
|
||||
VK_CALL(vkCmdPushConstants(list->vk_command_buffer,
|
||||
bindings->layout.vk_pipeline_layout,
|
||||
bindings->layout.vk_push_stages,
|
||||
root_signature->descriptor_table_offset,
|
||||
root_signature->descriptor_table_count * sizeof(uint32_t),
|
||||
table_offsets));
|
||||
}
|
||||
|
||||
bindings->dirty_flags &= ~VKD3D_PIPELINE_DIRTY_DESCRIPTOR_TABLE_OFFSETS;
|
||||
}
|
||||
|
@ -4236,7 +4245,6 @@ static void d3d12_command_list_update_descriptor_heaps(struct d3d12_command_list
|
|||
VkPipelineBindPoint bind_point)
|
||||
{
|
||||
struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point];
|
||||
const struct d3d12_root_signature *root_signature = bindings->root_signature;
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
|
||||
|
||||
while (bindings->descriptor_heap_dirty_mask)
|
||||
|
@ -4246,7 +4254,7 @@ static void d3d12_command_list_update_descriptor_heaps(struct d3d12_command_list
|
|||
if (list->descriptor_heaps[heap_index])
|
||||
{
|
||||
VK_CALL(vkCmdBindDescriptorSets(list->vk_command_buffer, bind_point,
|
||||
root_signature->vk_pipeline_layout, heap_index, 1,
|
||||
bindings->layout.vk_pipeline_layout, heap_index, 1,
|
||||
&list->descriptor_heaps[heap_index], 0, NULL));
|
||||
}
|
||||
}
|
||||
|
@ -4260,7 +4268,7 @@ static void d3d12_command_list_update_static_samplers(struct d3d12_command_list
|
|||
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
|
||||
|
||||
VK_CALL(vkCmdBindDescriptorSets(list->vk_command_buffer, bind_point,
|
||||
root_signature->vk_pipeline_layout,
|
||||
bindings->layout.vk_pipeline_layout,
|
||||
root_signature->sampler_descriptor_set,
|
||||
1, &bindings->static_sampler_set, 0, NULL));
|
||||
|
||||
|
@ -4276,14 +4284,20 @@ static void d3d12_command_list_update_root_constants(struct d3d12_command_list *
|
|||
const struct vkd3d_shader_root_constant *root_constant;
|
||||
unsigned int root_parameter_index;
|
||||
|
||||
if (!bindings->layout.vk_push_stages)
|
||||
{
|
||||
bindings->root_constant_dirty_mask = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
while (bindings->root_constant_dirty_mask)
|
||||
{
|
||||
root_parameter_index = vkd3d_bitmask_iter64(&bindings->root_constant_dirty_mask);
|
||||
root_constant = root_signature_get_32bit_constants(root_signature, root_parameter_index);
|
||||
|
||||
VK_CALL(vkCmdPushConstants(list->vk_command_buffer,
|
||||
root_signature->vk_pipeline_layout,
|
||||
root_signature->push_constant_range.stageFlags,
|
||||
bindings->layout.vk_pipeline_layout,
|
||||
bindings->layout.vk_push_stages,
|
||||
root_constant->constant_index * sizeof(uint32_t),
|
||||
root_constant->constant_count * sizeof(uint32_t),
|
||||
&bindings->root_constants[root_constant->constant_index]));
|
||||
|
@ -4421,11 +4435,11 @@ static void d3d12_command_list_update_root_descriptors(struct d3d12_command_list
|
|||
|
||||
descriptor_write_count += 1;
|
||||
}
|
||||
else if (va_count)
|
||||
else if (va_count && bindings->layout.vk_push_stages)
|
||||
{
|
||||
VK_CALL(vkCmdPushConstants(list->vk_command_buffer,
|
||||
root_signature->vk_pipeline_layout,
|
||||
root_signature->push_constant_range.stageFlags,
|
||||
bindings->layout.vk_pipeline_layout,
|
||||
bindings->layout.vk_push_stages,
|
||||
0, va_count * sizeof(*root_parameter_data.root_descriptor_vas),
|
||||
root_parameter_data.root_descriptor_vas));
|
||||
}
|
||||
|
@ -4438,13 +4452,13 @@ static void d3d12_command_list_update_root_descriptors(struct d3d12_command_list
|
|||
VK_CALL(vkUpdateDescriptorSets(list->device->vk_device,
|
||||
descriptor_write_count, descriptor_writes, 0, NULL));
|
||||
VK_CALL(vkCmdBindDescriptorSets(list->vk_command_buffer, bind_point,
|
||||
root_signature->vk_pipeline_layout, root_signature->root_descriptor_set,
|
||||
bindings->layout.vk_pipeline_layout, root_signature->root_descriptor_set,
|
||||
1, &descriptor_set, 0, NULL));
|
||||
}
|
||||
else
|
||||
{
|
||||
VK_CALL(vkCmdPushDescriptorSetKHR(list->vk_command_buffer, bind_point,
|
||||
root_signature->vk_pipeline_layout, root_signature->root_descriptor_set,
|
||||
bindings->layout.vk_pipeline_layout, root_signature->root_descriptor_set,
|
||||
descriptor_write_count, descriptor_writes));
|
||||
}
|
||||
}
|
||||
|
@ -4473,6 +4487,13 @@ static void d3d12_command_list_update_descriptors(struct d3d12_command_list *lis
|
|||
}
|
||||
else
|
||||
{
|
||||
if (list->active_bind_point != bind_point)
|
||||
{
|
||||
/* We might have clobbered push constants,
|
||||
* invalidate all state which can affect push constants. */
|
||||
d3d12_command_list_invalidate_push_constants(bindings);
|
||||
}
|
||||
|
||||
if (bindings->root_descriptor_dirty_mask)
|
||||
d3d12_command_list_update_root_descriptors(list, bind_point);
|
||||
|
||||
|
@ -4482,6 +4503,8 @@ static void d3d12_command_list_update_descriptors(struct d3d12_command_list *lis
|
|||
if (bindings->dirty_flags & VKD3D_PIPELINE_DIRTY_DESCRIPTOR_TABLE_OFFSETS)
|
||||
d3d12_command_list_update_descriptor_table_offsets(list, bind_point);
|
||||
}
|
||||
|
||||
list->active_bind_point = bind_point;
|
||||
}
|
||||
|
||||
static bool d3d12_command_list_update_compute_state(struct d3d12_command_list *list)
|
||||
|
@ -6268,6 +6291,22 @@ static void d3d12_command_list_set_root_signature(struct d3d12_command_list *lis
|
|||
bindings->root_signature = root_signature;
|
||||
bindings->static_sampler_set = VK_NULL_HANDLE;
|
||||
|
||||
switch (bind_point)
|
||||
{
|
||||
case VK_PIPELINE_BIND_POINT_GRAPHICS:
|
||||
bindings->layout = root_signature->graphics;
|
||||
break;
|
||||
|
||||
case VK_PIPELINE_BIND_POINT_COMPUTE:
|
||||
bindings->layout = root_signature->compute;
|
||||
break;
|
||||
|
||||
default:
|
||||
/* TODO, RT will be relevant here later somehow.
|
||||
* It will get awkward since RayGen happens in compute on DXR. */
|
||||
break;
|
||||
}
|
||||
|
||||
if (root_signature && root_signature->vk_sampler_set)
|
||||
bindings->static_sampler_set = root_signature->vk_sampler_set;
|
||||
|
||||
|
|
|
@ -677,7 +677,7 @@ static HRESULT d3d12_state_object_compile_pipeline(struct d3d12_state_object *ob
|
|||
/* FIXME: What if we have no global root signature? */
|
||||
if (!global_signature)
|
||||
return E_INVALIDARG;
|
||||
pipeline_create_info.layout = global_signature->vk_pipeline_layout;
|
||||
pipeline_create_info.layout = global_signature->raygen.vk_pipeline_layout;
|
||||
pipeline_create_info.basePipelineHandle = VK_NULL_HANDLE;
|
||||
pipeline_create_info.basePipelineIndex = -1;
|
||||
pipeline_create_info.pGroups = data->groups;
|
||||
|
|
|
@ -67,7 +67,9 @@ static void d3d12_root_signature_cleanup(struct d3d12_root_signature *root_signa
|
|||
vkd3d_sampler_state_free_descriptor_set(&device->sampler_state, device,
|
||||
root_signature->vk_sampler_set, root_signature->vk_sampler_pool);
|
||||
|
||||
VK_CALL(vkDestroyPipelineLayout(device->vk_device, root_signature->vk_pipeline_layout, NULL));
|
||||
VK_CALL(vkDestroyPipelineLayout(device->vk_device, root_signature->graphics.vk_pipeline_layout, NULL));
|
||||
VK_CALL(vkDestroyPipelineLayout(device->vk_device, root_signature->compute.vk_pipeline_layout, NULL));
|
||||
VK_CALL(vkDestroyPipelineLayout(device->vk_device, root_signature->raygen.vk_pipeline_layout, NULL));
|
||||
VK_CALL(vkDestroyDescriptorSetLayout(device->vk_device, root_signature->vk_sampler_descriptor_layout, NULL));
|
||||
VK_CALL(vkDestroyDescriptorSetLayout(device->vk_device, root_signature->vk_root_descriptor_layout, NULL));
|
||||
|
||||
|
@ -315,6 +317,24 @@ static HRESULT vkd3d_create_pipeline_layout(struct d3d12_device *device,
|
|||
return S_OK;
|
||||
}
|
||||
|
||||
static HRESULT vkd3d_create_pipeline_layout_for_stage_mask(struct d3d12_device *device,
|
||||
unsigned int set_layout_count, const VkDescriptorSetLayout *set_layouts,
|
||||
const VkPushConstantRange *push_constants,
|
||||
VkShaderStageFlags stages,
|
||||
struct d3d12_bind_point_layout *bind_point_layout)
|
||||
{
|
||||
VkPushConstantRange range;
|
||||
/* Can just mask directly since STAGE_ALL and ALL_GRAPHICS are OR masks. */
|
||||
range.stageFlags = push_constants->stageFlags & stages;
|
||||
range.offset = push_constants->offset;
|
||||
range.size = push_constants->size;
|
||||
|
||||
bind_point_layout->vk_push_stages = range.stageFlags;
|
||||
return vkd3d_create_pipeline_layout(device, set_layout_count, set_layouts,
|
||||
range.stageFlags ? 1 : 0, &range,
|
||||
&bind_point_layout->vk_pipeline_layout);
|
||||
}
|
||||
|
||||
struct d3d12_root_signature_info
|
||||
{
|
||||
uint32_t binding_count;
|
||||
|
@ -475,6 +495,7 @@ static HRESULT d3d12_root_signature_init_push_constants(struct d3d12_root_signat
|
|||
{
|
||||
unsigned int i, j;
|
||||
|
||||
/* Stages set later. */
|
||||
push_constant_range->stageFlags = 0;
|
||||
push_constant_range->offset = 0;
|
||||
push_constant_range->size = 0;
|
||||
|
@ -972,8 +993,8 @@ static HRESULT d3d12_root_signature_init_global(struct d3d12_root_signature *roo
|
|||
const struct vkd3d_bindless_state *bindless_state = &device->bindless_state;
|
||||
VkDescriptorSetLayout set_layouts[VKD3D_MAX_DESCRIPTOR_SETS];
|
||||
struct vkd3d_descriptor_set_context context;
|
||||
unsigned int i, push_constant_range_count;
|
||||
struct d3d12_root_signature_info info;
|
||||
unsigned int i;
|
||||
HRESULT hr;
|
||||
|
||||
memset(&context, 0, sizeof(context));
|
||||
|
@ -1070,17 +1091,40 @@ static HRESULT d3d12_root_signature_init_global(struct d3d12_root_signature *roo
|
|||
if (FAILED(hr = d3d12_root_signature_init_root_descriptor_tables(root_signature, desc, &info, &context)))
|
||||
return hr;
|
||||
|
||||
push_constant_range_count = 0;
|
||||
if (root_signature->flags & VKD3D_ROOT_SIGNATURE_USE_INLINE_UNIFORM_BLOCK)
|
||||
root_signature->push_constant_range.stageFlags = 0;
|
||||
|
||||
if (root_signature->push_constant_range.size &&
|
||||
!(root_signature->flags & VKD3D_ROOT_SIGNATURE_USE_INLINE_UNIFORM_BLOCK))
|
||||
push_constant_range_count = 1;
|
||||
/* If we need to use restricted stages in vkCmdPushConstants,
|
||||
* we are unfortunately required to do it like this
|
||||
* since stageFlags in vkCmdPushConstants must cover at least all stages in the layout.
|
||||
*
|
||||
* We can pick the appropriate layout to use in PSO creation.
|
||||
* In set_root_signature we can bind the appropriate layout as well.
|
||||
*
|
||||
* For graphics we can generally rely on visibility mask, but not so for compute and raygen,
|
||||
* since they use ALL visibility. */
|
||||
|
||||
if (FAILED(hr = vkd3d_create_pipeline_layout(device, context.vk_set, set_layouts,
|
||||
push_constant_range_count, &root_signature->push_constant_range,
|
||||
&root_signature->vk_pipeline_layout)))
|
||||
if (FAILED(hr = vkd3d_create_pipeline_layout_for_stage_mask(
|
||||
device, context.vk_set, set_layouts,
|
||||
&root_signature->push_constant_range,
|
||||
VK_SHADER_STAGE_ALL_GRAPHICS, &root_signature->graphics)))
|
||||
return hr;
|
||||
|
||||
if (FAILED(hr = vkd3d_create_pipeline_layout_for_stage_mask(
|
||||
device, context.vk_set, set_layouts,
|
||||
&root_signature->push_constant_range,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT, &root_signature->compute)))
|
||||
return hr;
|
||||
|
||||
if (d3d12_device_supports_ray_tracing_tier_1_0(device))
|
||||
{
|
||||
if (FAILED(hr = vkd3d_create_pipeline_layout_for_stage_mask(
|
||||
device, context.vk_set, set_layouts,
|
||||
&root_signature->push_constant_range,
|
||||
VK_SHADER_STAGE_RAYGEN_BIT_KHR, &root_signature->raygen)))
|
||||
return hr;
|
||||
}
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
|
@ -1939,7 +1983,7 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st
|
|||
}
|
||||
|
||||
hr = vkd3d_create_compute_pipeline(device, &desc->cs, &shader_interface,
|
||||
root_signature->vk_pipeline_layout, state->vk_pso_cache, &state->compute.vk_pipeline,
|
||||
root_signature->compute.vk_pipeline_layout, state->vk_pso_cache, &state->compute.vk_pipeline,
|
||||
&state->compute.meta);
|
||||
|
||||
if (FAILED(hr))
|
||||
|
@ -3049,7 +3093,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s
|
|||
(desc->primitive_topology_type != D3D12_PRIMITIVE_TOPOLOGY_TYPE_PATCH || graphics->patch_vertex_count != 0) &&
|
||||
desc->primitive_topology_type != D3D12_PRIMITIVE_TOPOLOGY_TYPE_UNDEFINED;
|
||||
|
||||
graphics->pipeline_layout = root_signature->vk_pipeline_layout;
|
||||
graphics->pipeline_layout = root_signature->graphics.vk_pipeline_layout;
|
||||
graphics->pipeline = VK_NULL_HANDLE;
|
||||
state->device = device;
|
||||
|
||||
|
|
|
@ -1126,12 +1126,18 @@ enum vkd3d_root_signature_flag
|
|||
};
|
||||
|
||||
/* ID3D12RootSignature */
|
||||
struct d3d12_bind_point_layout
|
||||
{
|
||||
VkPipelineLayout vk_pipeline_layout;
|
||||
VkShaderStageFlags vk_push_stages;
|
||||
};
|
||||
|
||||
struct d3d12_root_signature
|
||||
{
|
||||
ID3D12RootSignature ID3D12RootSignature_iface;
|
||||
LONG refcount;
|
||||
|
||||
VkPipelineLayout vk_pipeline_layout;
|
||||
struct d3d12_bind_point_layout graphics, compute, raygen;
|
||||
VkDescriptorSetLayout vk_sampler_descriptor_layout;
|
||||
VkDescriptorSetLayout vk_root_descriptor_layout;
|
||||
|
||||
|
@ -1527,6 +1533,7 @@ struct vkd3d_root_descriptor_info
|
|||
struct vkd3d_pipeline_bindings
|
||||
{
|
||||
const struct d3d12_root_signature *root_signature;
|
||||
struct d3d12_bind_point_layout layout;
|
||||
|
||||
VkDescriptorSet static_sampler_set;
|
||||
uint32_t dirty_flags; /* vkd3d_pipeline_dirty_flags */
|
||||
|
@ -1689,6 +1696,7 @@ struct d3d12_command_list
|
|||
VkRenderPass current_render_pass;
|
||||
struct vkd3d_dynamic_state dynamic_state;
|
||||
struct vkd3d_pipeline_bindings pipeline_bindings[VKD3D_PIPELINE_BIND_POINT_COUNT];
|
||||
VkPipelineBindPoint active_bind_point;
|
||||
|
||||
VkDescriptorSet descriptor_heaps[VKD3D_MAX_BINDLESS_DESCRIPTOR_SETS];
|
||||
|
||||
|
|
Loading…
Reference in New Issue