vkd3d: Add support for hoisting CBV descriptors to push descriptors.
Bindless CBV is *pretty* bad on NVIDIA, so add a code path which can promote descriptor table CBVs into push descriptors. We can safely do this with Root Signature 1.1 STATIC or the somewhat obscure STATIC_KEEPING_BUFFER_BOUNDS_CHECKS. With VOLATILE, which basically all titles are using, we can still force this behavior through a config flag, but this is an incorrect speed hack. It works in most titles however, since bindless CBV is exceptionally rare. We only hoist descriptors when the root signature range has 1 descriptor anyway, so we should avoid any reasonable bindless scenario. Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
This commit is contained in:
parent
d758a6e296
commit
13d132f1c4
|
@ -3126,6 +3126,8 @@ static void d3d12_command_list_invalidate_root_parameters(struct d3d12_command_l
|
|||
|
||||
if (bindings->static_sampler_set)
|
||||
bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_STATIC_SAMPLER_SET;
|
||||
if (bindings->root_signature->hoist_info.num_desc)
|
||||
bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_HOISTED_DESCRIPTORS;
|
||||
|
||||
d3d12_command_list_invalidate_push_constants(bindings);
|
||||
|
||||
|
@ -3878,6 +3880,8 @@ static void d3d12_command_list_reset_state(struct d3d12_command_list *list,
|
|||
list->active_queries_count = 0;
|
||||
list->pending_queries_count = 0;
|
||||
|
||||
list->cbv_srv_uav_descriptors = NULL;
|
||||
|
||||
ID3D12GraphicsCommandList_SetPipelineState(iface, initial_pipeline_state);
|
||||
}
|
||||
|
||||
|
@ -4494,6 +4498,55 @@ static void d3d12_command_list_update_root_descriptors(struct d3d12_command_list
|
|||
}
|
||||
}
|
||||
|
||||
static void d3d12_command_list_update_hoisted_descriptors(struct d3d12_command_list *list,
|
||||
struct vkd3d_pipeline_bindings *bindings)
|
||||
{
|
||||
const struct d3d12_root_signature *rs = bindings->root_signature;
|
||||
const struct vkd3d_descriptor_hoist_desc *hoist_desc;
|
||||
struct vkd3d_root_descriptor_info *root_parameter;
|
||||
union vkd3d_descriptor_info *info;
|
||||
const struct d3d12_desc *desc;
|
||||
unsigned int i;
|
||||
|
||||
/* We don't track dirty table index, just update every hoisted descriptor.
|
||||
* Uniform buffers tend to be updated all the time anyways, so this should be fine. */
|
||||
for (i = 0; i < rs->hoist_info.num_desc; i++)
|
||||
{
|
||||
hoist_desc = &rs->hoist_info.desc[i];
|
||||
|
||||
desc = list->cbv_srv_uav_descriptors;
|
||||
if (desc)
|
||||
desc += bindings->descriptor_tables[hoist_desc->table_index] + hoist_desc->table_offset;
|
||||
|
||||
root_parameter = &bindings->root_descriptors[hoist_desc->parameter_index];
|
||||
|
||||
bindings->root_descriptor_dirty_mask |= 1ull << hoist_desc->parameter_index;
|
||||
bindings->root_descriptor_active_mask |= 1ull << hoist_desc->parameter_index;
|
||||
root_parameter->vk_descriptor_type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
|
||||
info = &root_parameter->info;
|
||||
|
||||
if (desc && (desc->metadata.flags & VKD3D_DESCRIPTOR_FLAG_OFFSET_RANGE))
|
||||
{
|
||||
/* Buffer descriptors must be valid on recording time. */
|
||||
info->buffer = desc->info.buffer;
|
||||
}
|
||||
else if (list->device->device_info.robustness2_features.nullDescriptor)
|
||||
{
|
||||
info->buffer.buffer = VK_NULL_HANDLE;
|
||||
info->buffer.offset = 0;
|
||||
info->buffer.range = VK_WHOLE_SIZE;
|
||||
}
|
||||
else
|
||||
{
|
||||
info->buffer.buffer = list->device->null_resources.vk_buffer;
|
||||
info->buffer.offset = 0;
|
||||
info->buffer.range = VKD3D_NULL_BUFFER_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
bindings->dirty_flags &= ~VKD3D_PIPELINE_DIRTY_HOISTED_DESCRIPTORS;
|
||||
}
|
||||
|
||||
static void d3d12_command_list_update_descriptors(struct d3d12_command_list *list,
|
||||
VkPipelineBindPoint bind_point)
|
||||
{
|
||||
|
@ -4527,6 +4580,10 @@ static void d3d12_command_list_update_descriptors(struct d3d12_command_list *lis
|
|||
if (bindings->dirty_flags & VKD3D_PIPELINE_DIRTY_STATIC_SAMPLER_SET)
|
||||
d3d12_command_list_update_static_samplers(list, bindings, vk_bind_point, layout);
|
||||
|
||||
/* If we can, hoist descriptors from the descriptor heap into fake root parameters. */
|
||||
if (bindings->dirty_flags & VKD3D_PIPELINE_DIRTY_HOISTED_DESCRIPTORS)
|
||||
d3d12_command_list_update_hoisted_descriptors(list, bindings);
|
||||
|
||||
if (rs->flags & VKD3D_ROOT_SIGNATURE_USE_INLINE_UNIFORM_BLOCK)
|
||||
{
|
||||
/* Root constants and descriptor table offsets are part of the root descriptor set */
|
||||
|
@ -6353,12 +6410,17 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetDescriptorHeaps(d3d12_comman
|
|||
list->descriptor_heaps[j] = heap->vk_descriptor_sets[set_index++];
|
||||
dirty_mask |= 1ull << j;
|
||||
}
|
||||
|
||||
/* In case we need to hoist buffer descriptors. */
|
||||
if (heap->desc.Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV)
|
||||
list->cbv_srv_uav_descriptors = (const struct d3d12_desc *) heap->descriptors;
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(list->pipeline_bindings); i++)
|
||||
{
|
||||
struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[i];
|
||||
bindings->descriptor_heap_dirty_mask = dirty_mask;
|
||||
bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_HOISTED_DESCRIPTORS;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -6431,6 +6493,8 @@ static void d3d12_command_list_set_descriptor_table(struct d3d12_command_list *l
|
|||
|
||||
if (root_signature->descriptor_table_count)
|
||||
bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_DESCRIPTOR_TABLE_OFFSETS;
|
||||
if (root_signature->hoist_info.num_desc)
|
||||
bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_HOISTED_DESCRIPTORS;
|
||||
}
|
||||
|
||||
static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootDescriptorTable(d3d12_command_list_iface *iface,
|
||||
|
|
|
@ -476,6 +476,7 @@ static const struct vkd3d_debug_option vkd3d_config_options[] =
|
|||
{"vk_debug", VKD3D_CONFIG_FLAG_VULKAN_DEBUG},
|
||||
{"skip_application_workarounds", VKD3D_CONFIG_FLAG_SKIP_APPLICATION_WORKAROUNDS},
|
||||
{"debug_utils", VKD3D_CONFIG_FLAG_DEBUG_UTILS},
|
||||
{"force_static_cbv", VKD3D_CONFIG_FLAG_FORCE_STATIC_CBV},
|
||||
};
|
||||
|
||||
static void vkd3d_config_flags_init_once(void)
|
||||
|
|
|
@ -2901,8 +2901,6 @@ static VkDeviceSize vkd3d_get_required_texel_buffer_alignment(const struct d3d12
|
|||
bool vkd3d_create_raw_r32ui_vk_buffer_view(struct d3d12_device *device,
|
||||
VkBuffer vk_buffer, VkDeviceSize offset, VkDeviceSize range, VkBufferView *vk_view)
|
||||
{
|
||||
/* Called when we know the Vulkan format implicitly and we don't need to search through
|
||||
* format descriptions. */
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
||||
struct VkBufferViewCreateInfo view_desc;
|
||||
VkResult vr;
|
||||
|
|
|
@ -339,9 +339,11 @@ struct d3d12_root_signature_info
|
|||
{
|
||||
uint32_t binding_count;
|
||||
uint32_t descriptor_count;
|
||||
uint32_t parameter_count;
|
||||
|
||||
uint32_t push_descriptor_count;
|
||||
uint32_t root_constant_count;
|
||||
uint32_t hoist_descriptor_count;
|
||||
bool has_raw_va_aux_buffer;
|
||||
bool has_ssbo_offset_buffer;
|
||||
bool has_typed_offset_buffer;
|
||||
|
@ -349,8 +351,30 @@ struct d3d12_root_signature_info
|
|||
uint32_t cost;
|
||||
};
|
||||
|
||||
static HRESULT d3d12_root_signature_info_count_descriptors(struct d3d12_root_signature_info *info,
|
||||
static bool d3d12_descriptor_range_can_hoist_cbv_descriptor(
|
||||
struct d3d12_device *device, const D3D12_DESCRIPTOR_RANGE1 *range)
|
||||
{
|
||||
/* Cannot/should not hoist arrays.
|
||||
* We only care about CBVs. SRVs and UAVs are too fiddly
|
||||
* since they don't necessary map to buffers at all. */
|
||||
if (!(device->bindless_state.flags & VKD3D_HOIST_STATIC_TABLE_CBV) ||
|
||||
range->RangeType != D3D12_DESCRIPTOR_RANGE_TYPE_CBV ||
|
||||
range->NumDescriptors != 1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
/* If descriptors are not marked volatile, we are guaranteed that the descriptors are
|
||||
* set before updating the root table parameter in the command list.
|
||||
* We can latch the descriptor at draw time.
|
||||
* As a speed hack, we can pretend that all CBVs have this flag set.
|
||||
* Basically no applications set this flag, even though they really could. */
|
||||
return !(range->Flags & D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE) ||
|
||||
(vkd3d_config_flags & VKD3D_CONFIG_FLAG_FORCE_STATIC_CBV);
|
||||
}
|
||||
|
||||
static HRESULT d3d12_root_signature_info_count_descriptors(struct d3d12_root_signature_info *info,
|
||||
struct d3d12_device *device, const D3D12_ROOT_SIGNATURE_DESC1 *desc, const D3D12_DESCRIPTOR_RANGE1 *range)
|
||||
{
|
||||
switch (range->RangeType)
|
||||
{
|
||||
|
@ -370,6 +394,13 @@ static HRESULT d3d12_root_signature_info_count_descriptors(struct d3d12_root_sig
|
|||
info->has_typed_offset_buffer = true;
|
||||
break;
|
||||
case D3D12_DESCRIPTOR_RANGE_TYPE_CBV:
|
||||
if (!(desc->Flags & D3D12_ROOT_SIGNATURE_FLAG_LOCAL_ROOT_SIGNATURE) &&
|
||||
d3d12_descriptor_range_can_hoist_cbv_descriptor(device, range))
|
||||
{
|
||||
info->hoist_descriptor_count += 1;
|
||||
}
|
||||
info->binding_count += 1;
|
||||
break;
|
||||
case D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER:
|
||||
info->binding_count += 1;
|
||||
break;
|
||||
|
@ -401,7 +432,7 @@ static HRESULT d3d12_root_signature_info_from_desc(struct d3d12_root_signature_i
|
|||
case D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE:
|
||||
for (j = 0; j < p->DescriptorTable.NumDescriptorRanges; ++j)
|
||||
if (FAILED(hr = d3d12_root_signature_info_count_descriptors(info,
|
||||
device, &p->DescriptorTable.pDescriptorRanges[j])))
|
||||
device, desc, &p->DescriptorTable.pDescriptorRanges[j])))
|
||||
return hr;
|
||||
|
||||
/* Local root signature directly affects memory layout. */
|
||||
|
@ -445,7 +476,13 @@ static HRESULT d3d12_root_signature_info_from_desc(struct d3d12_root_signature_i
|
|||
}
|
||||
}
|
||||
|
||||
info->hoist_descriptor_count = min(info->hoist_descriptor_count, VKD3D_MAX_HOISTED_DESCRIPTORS);
|
||||
info->hoist_descriptor_count = min(info->hoist_descriptor_count, D3D12_MAX_ROOT_COST - desc->NumParameters);
|
||||
|
||||
info->push_descriptor_count += info->hoist_descriptor_count;
|
||||
info->binding_count += info->hoist_descriptor_count;
|
||||
info->binding_count += desc->NumStaticSamplers;
|
||||
info->parameter_count = desc->NumParameters + info->hoist_descriptor_count;
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
|
@ -763,15 +800,18 @@ static HRESULT d3d12_root_signature_init_shader_record_descriptors(
|
|||
}
|
||||
|
||||
static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_signature *root_signature,
|
||||
const D3D12_ROOT_SIGNATURE_DESC1 *desc, const struct d3d12_root_signature_info *info,
|
||||
const D3D12_ROOT_SIGNATURE_DESC1 *desc, struct d3d12_root_signature_info *info,
|
||||
const VkPushConstantRange *push_constant_range, struct vkd3d_descriptor_set_context *context,
|
||||
VkDescriptorSetLayout *vk_set_layout)
|
||||
{
|
||||
VkDescriptorSetLayoutBinding *vk_binding, *vk_binding_info = NULL;
|
||||
struct vkd3d_descriptor_hoist_desc *hoist_desc;
|
||||
struct vkd3d_shader_resource_binding *binding;
|
||||
VkDescriptorSetLayoutCreateFlags vk_flags;
|
||||
struct vkd3d_shader_root_parameter *param;
|
||||
unsigned int i, j;
|
||||
unsigned int hoisted_parameter_index;
|
||||
const D3D12_DESCRIPTOR_RANGE1 *range;
|
||||
unsigned int i, j, k;
|
||||
HRESULT hr = S_OK;
|
||||
|
||||
if (info->push_descriptor_count || (root_signature->flags & VKD3D_ROOT_SIGNATURE_USE_INLINE_UNIFORM_BLOCK))
|
||||
|
@ -785,11 +825,66 @@ static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_sign
|
|||
return S_OK;
|
||||
}
|
||||
|
||||
hoisted_parameter_index = desc->NumParameters;
|
||||
|
||||
for (i = 0, j = 0; i < desc->NumParameters; ++i)
|
||||
{
|
||||
const D3D12_ROOT_PARAMETER1 *p = &desc->pParameters[i];
|
||||
bool raw_va;
|
||||
|
||||
if (!(desc->Flags & D3D12_ROOT_SIGNATURE_FLAG_LOCAL_ROOT_SIGNATURE) &&
|
||||
p->ParameterType == D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE)
|
||||
{
|
||||
unsigned int range_descriptor_offset = 0;
|
||||
for (k = 0; k < p->DescriptorTable.NumDescriptorRanges && info->hoist_descriptor_count; k++)
|
||||
{
|
||||
range = &p->DescriptorTable.pDescriptorRanges[k];
|
||||
if (range->OffsetInDescriptorsFromTableStart != D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND)
|
||||
range_descriptor_offset = range->OffsetInDescriptorsFromTableStart;
|
||||
|
||||
if (d3d12_descriptor_range_can_hoist_cbv_descriptor(root_signature->device, range))
|
||||
{
|
||||
vk_binding = &vk_binding_info[j++];
|
||||
vk_binding->binding = context->vk_binding;
|
||||
|
||||
vk_binding->descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
|
||||
vk_binding->descriptorCount = 1;
|
||||
vk_binding->stageFlags = stage_flags_from_visibility(p->ShaderVisibility);
|
||||
vk_binding->pImmutableSamplers = NULL;
|
||||
|
||||
root_signature->root_descriptor_push_mask |= 1ull << hoisted_parameter_index;
|
||||
hoist_desc = &root_signature->hoist_info.desc[root_signature->hoist_info.num_desc];
|
||||
hoist_desc->table_index = i;
|
||||
hoist_desc->parameter_index = hoisted_parameter_index;
|
||||
hoist_desc->table_offset = range_descriptor_offset;
|
||||
root_signature->hoist_info.num_desc++;
|
||||
|
||||
binding = &root_signature->bindings[context->binding_index];
|
||||
binding->type = vkd3d_descriptor_type_from_d3d12_range_type(range->RangeType);
|
||||
binding->register_space = range->RegisterSpace;
|
||||
binding->register_index = range->BaseShaderRegister;
|
||||
binding->register_count = 1;
|
||||
binding->descriptor_table = 0; /* ignored */
|
||||
binding->descriptor_offset = 0; /* ignored */
|
||||
binding->shader_visibility = vkd3d_shader_visibility_from_d3d12(p->ShaderVisibility);
|
||||
binding->flags = VKD3D_SHADER_BINDING_FLAG_BUFFER;
|
||||
binding->binding.binding = context->vk_binding;
|
||||
binding->binding.set = context->vk_set;
|
||||
|
||||
param = &root_signature->parameters[hoisted_parameter_index];
|
||||
param->parameter_type = D3D12_ROOT_PARAMETER_TYPE_CBV;
|
||||
param->descriptor.binding = binding;
|
||||
|
||||
context->binding_index += 1;
|
||||
context->vk_binding += 1;
|
||||
hoisted_parameter_index += 1;
|
||||
info->hoist_descriptor_count -= 1;
|
||||
}
|
||||
|
||||
range_descriptor_offset += range->NumDescriptors;
|
||||
}
|
||||
}
|
||||
|
||||
if (p->ParameterType != D3D12_ROOT_PARAMETER_TYPE_CBV
|
||||
&& p->ParameterType != D3D12_ROOT_PARAMETER_TYPE_SRV
|
||||
&& p->ParameterType != D3D12_ROOT_PARAMETER_TYPE_UAV)
|
||||
|
@ -1016,7 +1111,7 @@ static HRESULT d3d12_root_signature_init_global(struct d3d12_root_signature *roo
|
|||
root_signature->static_sampler_count = desc->NumStaticSamplers;
|
||||
|
||||
hr = E_OUTOFMEMORY;
|
||||
root_signature->parameter_count = desc->NumParameters;
|
||||
root_signature->parameter_count = info.parameter_count;
|
||||
if (!(root_signature->parameters = vkd3d_calloc(root_signature->parameter_count,
|
||||
sizeof(*root_signature->parameters))))
|
||||
return hr;
|
||||
|
@ -3946,6 +4041,15 @@ static uint32_t vkd3d_bindless_state_get_bindless_flags(struct d3d12_device *dev
|
|||
flags |= VKD3D_RAW_VA_ROOT_DESCRIPTOR_CBV;
|
||||
}
|
||||
|
||||
if (device_info->properties2.properties.vendorID == VKD3D_VENDOR_ID_NVIDIA &&
|
||||
!(flags & VKD3D_RAW_VA_ROOT_DESCRIPTOR_CBV))
|
||||
{
|
||||
/* On NVIDIA, it's preferable to hoist CBVs to push descriptors if we can.
|
||||
* Hoisting is only safe with push descriptors since we need to consider
|
||||
* robustness as well for STATIC_KEEPING_BUFFER_BOUNDS_CHECKS. */
|
||||
flags |= VKD3D_HOIST_STATIC_TABLE_CBV;
|
||||
}
|
||||
|
||||
if (vkd3d_bindless_supports_mutable_type(device, flags))
|
||||
{
|
||||
INFO("Device supports VK_VALVE_mutable_descriptor_type.\n");
|
||||
|
|
|
@ -168,6 +168,7 @@ enum vkd3d_config_flags
|
|||
VKD3D_CONFIG_FLAG_VULKAN_DEBUG = 0x00000001,
|
||||
VKD3D_CONFIG_FLAG_SKIP_APPLICATION_WORKAROUNDS = 0x00000002,
|
||||
VKD3D_CONFIG_FLAG_DEBUG_UTILS = 0x00000004,
|
||||
VKD3D_CONFIG_FLAG_FORCE_STATIC_CBV = 0x00000008,
|
||||
};
|
||||
|
||||
struct vkd3d_instance
|
||||
|
@ -1142,6 +1143,20 @@ struct d3d12_bind_point_layout
|
|||
VkShaderStageFlags vk_push_stages;
|
||||
};
|
||||
|
||||
#define VKD3D_MAX_HOISTED_DESCRIPTORS 16
|
||||
struct vkd3d_descriptor_hoist_desc
|
||||
{
|
||||
uint32_t table_index;
|
||||
uint32_t table_offset;
|
||||
uint32_t parameter_index;
|
||||
};
|
||||
|
||||
struct vkd3d_descriptor_hoist_info
|
||||
{
|
||||
struct vkd3d_descriptor_hoist_desc desc[VKD3D_MAX_HOISTED_DESCRIPTORS];
|
||||
unsigned int num_desc;
|
||||
};
|
||||
|
||||
struct d3d12_root_signature
|
||||
{
|
||||
ID3D12RootSignature ID3D12RootSignature_iface;
|
||||
|
@ -1186,6 +1201,8 @@ struct d3d12_root_signature
|
|||
unsigned int static_sampler_count;
|
||||
VkSampler *static_samplers;
|
||||
|
||||
struct vkd3d_descriptor_hoist_info hoist_info;
|
||||
|
||||
struct d3d12_device *device;
|
||||
|
||||
struct vkd3d_private_store private_store;
|
||||
|
@ -1526,6 +1543,7 @@ enum vkd3d_pipeline_dirty_flag
|
|||
{
|
||||
VKD3D_PIPELINE_DIRTY_STATIC_SAMPLER_SET = 0x00000001u,
|
||||
VKD3D_PIPELINE_DIRTY_DESCRIPTOR_TABLE_OFFSETS = 0x00000002u,
|
||||
VKD3D_PIPELINE_DIRTY_HOISTED_DESCRIPTORS = 0x00000004u,
|
||||
};
|
||||
|
||||
union vkd3d_descriptor_info
|
||||
|
@ -1744,6 +1762,8 @@ struct d3d12_command_list
|
|||
|
||||
LONG *outstanding_submissions_count;
|
||||
|
||||
const struct d3d12_desc *cbv_srv_uav_descriptors;
|
||||
|
||||
struct vkd3d_private_store private_store;
|
||||
};
|
||||
|
||||
|
@ -2003,6 +2023,7 @@ enum vkd3d_bindless_flags
|
|||
VKD3D_RAW_VA_ROOT_DESCRIPTOR_CBV = (1u << 9),
|
||||
VKD3D_RAW_VA_ROOT_DESCRIPTOR_SRV_UAV = (1u << 10),
|
||||
VKD3D_BINDLESS_MUTABLE_TYPE = (1u << 11),
|
||||
VKD3D_HOIST_STATIC_TABLE_CBV = (1u << 12),
|
||||
};
|
||||
|
||||
#define VKD3D_BINDLESS_SET_MAX_EXTRA_BINDINGS 8
|
||||
|
|
Loading…
Reference in New Issue