vkd3d: Add support for hoisting CBV descriptors to push descriptors.

Bindless CBV is *pretty* bad on NVIDIA, so add a code path which can
promote descriptor table CBVs into push descriptors.

We can safely do this with Root Signature 1.1 STATIC or
the somewhat obscure STATIC_KEEPING_BUFFER_BOUNDS_CHECKS.

With VOLATILE, which basically all titles are using,
we can still force this behavior through a config flag,
but this is an incorrect speed hack. It works in most
titles however, since bindless CBV is exceptionally rare.

We only hoist descriptors when the root signature range has 1 descriptor
anyway, so we should avoid any reasonable bindless scenario.

Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
This commit is contained in:
Hans-Kristian Arntzen 2021-03-05 11:51:14 +01:00
parent d758a6e296
commit 13d132f1c4
5 changed files with 195 additions and 7 deletions

View File

@ -3126,6 +3126,8 @@ static void d3d12_command_list_invalidate_root_parameters(struct d3d12_command_l
if (bindings->static_sampler_set)
bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_STATIC_SAMPLER_SET;
if (bindings->root_signature->hoist_info.num_desc)
bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_HOISTED_DESCRIPTORS;
d3d12_command_list_invalidate_push_constants(bindings);
@ -3878,6 +3880,8 @@ static void d3d12_command_list_reset_state(struct d3d12_command_list *list,
list->active_queries_count = 0;
list->pending_queries_count = 0;
list->cbv_srv_uav_descriptors = NULL;
ID3D12GraphicsCommandList_SetPipelineState(iface, initial_pipeline_state);
}
@ -4494,6 +4498,55 @@ static void d3d12_command_list_update_root_descriptors(struct d3d12_command_list
}
}
static void d3d12_command_list_update_hoisted_descriptors(struct d3d12_command_list *list,
struct vkd3d_pipeline_bindings *bindings)
{
const struct d3d12_root_signature *rs = bindings->root_signature;
const struct vkd3d_descriptor_hoist_desc *hoist_desc;
struct vkd3d_root_descriptor_info *root_parameter;
union vkd3d_descriptor_info *info;
const struct d3d12_desc *desc;
unsigned int i;
/* We don't track dirty table index, just update every hoisted descriptor.
* Uniform buffers tend to be updated all the time anyways, so this should be fine. */
for (i = 0; i < rs->hoist_info.num_desc; i++)
{
hoist_desc = &rs->hoist_info.desc[i];
desc = list->cbv_srv_uav_descriptors;
if (desc)
desc += bindings->descriptor_tables[hoist_desc->table_index] + hoist_desc->table_offset;
root_parameter = &bindings->root_descriptors[hoist_desc->parameter_index];
bindings->root_descriptor_dirty_mask |= 1ull << hoist_desc->parameter_index;
bindings->root_descriptor_active_mask |= 1ull << hoist_desc->parameter_index;
root_parameter->vk_descriptor_type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
info = &root_parameter->info;
if (desc && (desc->metadata.flags & VKD3D_DESCRIPTOR_FLAG_OFFSET_RANGE))
{
/* Buffer descriptors must be valid on recording time. */
info->buffer = desc->info.buffer;
}
else if (list->device->device_info.robustness2_features.nullDescriptor)
{
info->buffer.buffer = VK_NULL_HANDLE;
info->buffer.offset = 0;
info->buffer.range = VK_WHOLE_SIZE;
}
else
{
info->buffer.buffer = list->device->null_resources.vk_buffer;
info->buffer.offset = 0;
info->buffer.range = VKD3D_NULL_BUFFER_SIZE;
}
}
bindings->dirty_flags &= ~VKD3D_PIPELINE_DIRTY_HOISTED_DESCRIPTORS;
}
static void d3d12_command_list_update_descriptors(struct d3d12_command_list *list,
VkPipelineBindPoint bind_point)
{
@ -4527,6 +4580,10 @@ static void d3d12_command_list_update_descriptors(struct d3d12_command_list *lis
if (bindings->dirty_flags & VKD3D_PIPELINE_DIRTY_STATIC_SAMPLER_SET)
d3d12_command_list_update_static_samplers(list, bindings, vk_bind_point, layout);
/* If we can, hoist descriptors from the descriptor heap into fake root parameters. */
if (bindings->dirty_flags & VKD3D_PIPELINE_DIRTY_HOISTED_DESCRIPTORS)
d3d12_command_list_update_hoisted_descriptors(list, bindings);
if (rs->flags & VKD3D_ROOT_SIGNATURE_USE_INLINE_UNIFORM_BLOCK)
{
/* Root constants and descriptor table offsets are part of the root descriptor set */
@ -6353,12 +6410,17 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetDescriptorHeaps(d3d12_comman
list->descriptor_heaps[j] = heap->vk_descriptor_sets[set_index++];
dirty_mask |= 1ull << j;
}
/* In case we need to hoist buffer descriptors. */
if (heap->desc.Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV)
list->cbv_srv_uav_descriptors = (const struct d3d12_desc *) heap->descriptors;
}
for (i = 0; i < ARRAY_SIZE(list->pipeline_bindings); i++)
{
struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[i];
bindings->descriptor_heap_dirty_mask = dirty_mask;
bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_HOISTED_DESCRIPTORS;
}
}
@ -6431,6 +6493,8 @@ static void d3d12_command_list_set_descriptor_table(struct d3d12_command_list *l
if (root_signature->descriptor_table_count)
bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_DESCRIPTOR_TABLE_OFFSETS;
if (root_signature->hoist_info.num_desc)
bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_HOISTED_DESCRIPTORS;
}
static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootDescriptorTable(d3d12_command_list_iface *iface,

View File

@ -476,6 +476,7 @@ static const struct vkd3d_debug_option vkd3d_config_options[] =
{"vk_debug", VKD3D_CONFIG_FLAG_VULKAN_DEBUG},
{"skip_application_workarounds", VKD3D_CONFIG_FLAG_SKIP_APPLICATION_WORKAROUNDS},
{"debug_utils", VKD3D_CONFIG_FLAG_DEBUG_UTILS},
{"force_static_cbv", VKD3D_CONFIG_FLAG_FORCE_STATIC_CBV},
};
static void vkd3d_config_flags_init_once(void)

View File

@ -2901,8 +2901,6 @@ static VkDeviceSize vkd3d_get_required_texel_buffer_alignment(const struct d3d12
bool vkd3d_create_raw_r32ui_vk_buffer_view(struct d3d12_device *device,
VkBuffer vk_buffer, VkDeviceSize offset, VkDeviceSize range, VkBufferView *vk_view)
{
/* Called when we know the Vulkan format implicitly and we don't need to search through
* format descriptions. */
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
struct VkBufferViewCreateInfo view_desc;
VkResult vr;

View File

@ -339,9 +339,11 @@ struct d3d12_root_signature_info
{
uint32_t binding_count;
uint32_t descriptor_count;
uint32_t parameter_count;
uint32_t push_descriptor_count;
uint32_t root_constant_count;
uint32_t hoist_descriptor_count;
bool has_raw_va_aux_buffer;
bool has_ssbo_offset_buffer;
bool has_typed_offset_buffer;
@ -349,8 +351,30 @@ struct d3d12_root_signature_info
uint32_t cost;
};
static HRESULT d3d12_root_signature_info_count_descriptors(struct d3d12_root_signature_info *info,
static bool d3d12_descriptor_range_can_hoist_cbv_descriptor(
struct d3d12_device *device, const D3D12_DESCRIPTOR_RANGE1 *range)
{
/* Cannot/should not hoist arrays.
* We only care about CBVs. SRVs and UAVs are too fiddly
* since they don't necessary map to buffers at all. */
if (!(device->bindless_state.flags & VKD3D_HOIST_STATIC_TABLE_CBV) ||
range->RangeType != D3D12_DESCRIPTOR_RANGE_TYPE_CBV ||
range->NumDescriptors != 1)
{
return false;
}
/* If descriptors are not marked volatile, we are guaranteed that the descriptors are
* set before updating the root table parameter in the command list.
* We can latch the descriptor at draw time.
* As a speed hack, we can pretend that all CBVs have this flag set.
* Basically no applications set this flag, even though they really could. */
return !(range->Flags & D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE) ||
(vkd3d_config_flags & VKD3D_CONFIG_FLAG_FORCE_STATIC_CBV);
}
static HRESULT d3d12_root_signature_info_count_descriptors(struct d3d12_root_signature_info *info,
struct d3d12_device *device, const D3D12_ROOT_SIGNATURE_DESC1 *desc, const D3D12_DESCRIPTOR_RANGE1 *range)
{
switch (range->RangeType)
{
@ -370,6 +394,13 @@ static HRESULT d3d12_root_signature_info_count_descriptors(struct d3d12_root_sig
info->has_typed_offset_buffer = true;
break;
case D3D12_DESCRIPTOR_RANGE_TYPE_CBV:
if (!(desc->Flags & D3D12_ROOT_SIGNATURE_FLAG_LOCAL_ROOT_SIGNATURE) &&
d3d12_descriptor_range_can_hoist_cbv_descriptor(device, range))
{
info->hoist_descriptor_count += 1;
}
info->binding_count += 1;
break;
case D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER:
info->binding_count += 1;
break;
@ -401,7 +432,7 @@ static HRESULT d3d12_root_signature_info_from_desc(struct d3d12_root_signature_i
case D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE:
for (j = 0; j < p->DescriptorTable.NumDescriptorRanges; ++j)
if (FAILED(hr = d3d12_root_signature_info_count_descriptors(info,
device, &p->DescriptorTable.pDescriptorRanges[j])))
device, desc, &p->DescriptorTable.pDescriptorRanges[j])))
return hr;
/* Local root signature directly affects memory layout. */
@ -445,7 +476,13 @@ static HRESULT d3d12_root_signature_info_from_desc(struct d3d12_root_signature_i
}
}
info->hoist_descriptor_count = min(info->hoist_descriptor_count, VKD3D_MAX_HOISTED_DESCRIPTORS);
info->hoist_descriptor_count = min(info->hoist_descriptor_count, D3D12_MAX_ROOT_COST - desc->NumParameters);
info->push_descriptor_count += info->hoist_descriptor_count;
info->binding_count += info->hoist_descriptor_count;
info->binding_count += desc->NumStaticSamplers;
info->parameter_count = desc->NumParameters + info->hoist_descriptor_count;
return S_OK;
}
@ -763,15 +800,18 @@ static HRESULT d3d12_root_signature_init_shader_record_descriptors(
}
static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_signature *root_signature,
const D3D12_ROOT_SIGNATURE_DESC1 *desc, const struct d3d12_root_signature_info *info,
const D3D12_ROOT_SIGNATURE_DESC1 *desc, struct d3d12_root_signature_info *info,
const VkPushConstantRange *push_constant_range, struct vkd3d_descriptor_set_context *context,
VkDescriptorSetLayout *vk_set_layout)
{
VkDescriptorSetLayoutBinding *vk_binding, *vk_binding_info = NULL;
struct vkd3d_descriptor_hoist_desc *hoist_desc;
struct vkd3d_shader_resource_binding *binding;
VkDescriptorSetLayoutCreateFlags vk_flags;
struct vkd3d_shader_root_parameter *param;
unsigned int i, j;
unsigned int hoisted_parameter_index;
const D3D12_DESCRIPTOR_RANGE1 *range;
unsigned int i, j, k;
HRESULT hr = S_OK;
if (info->push_descriptor_count || (root_signature->flags & VKD3D_ROOT_SIGNATURE_USE_INLINE_UNIFORM_BLOCK))
@ -785,11 +825,66 @@ static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_sign
return S_OK;
}
hoisted_parameter_index = desc->NumParameters;
for (i = 0, j = 0; i < desc->NumParameters; ++i)
{
const D3D12_ROOT_PARAMETER1 *p = &desc->pParameters[i];
bool raw_va;
if (!(desc->Flags & D3D12_ROOT_SIGNATURE_FLAG_LOCAL_ROOT_SIGNATURE) &&
p->ParameterType == D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE)
{
unsigned int range_descriptor_offset = 0;
for (k = 0; k < p->DescriptorTable.NumDescriptorRanges && info->hoist_descriptor_count; k++)
{
range = &p->DescriptorTable.pDescriptorRanges[k];
if (range->OffsetInDescriptorsFromTableStart != D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND)
range_descriptor_offset = range->OffsetInDescriptorsFromTableStart;
if (d3d12_descriptor_range_can_hoist_cbv_descriptor(root_signature->device, range))
{
vk_binding = &vk_binding_info[j++];
vk_binding->binding = context->vk_binding;
vk_binding->descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
vk_binding->descriptorCount = 1;
vk_binding->stageFlags = stage_flags_from_visibility(p->ShaderVisibility);
vk_binding->pImmutableSamplers = NULL;
root_signature->root_descriptor_push_mask |= 1ull << hoisted_parameter_index;
hoist_desc = &root_signature->hoist_info.desc[root_signature->hoist_info.num_desc];
hoist_desc->table_index = i;
hoist_desc->parameter_index = hoisted_parameter_index;
hoist_desc->table_offset = range_descriptor_offset;
root_signature->hoist_info.num_desc++;
binding = &root_signature->bindings[context->binding_index];
binding->type = vkd3d_descriptor_type_from_d3d12_range_type(range->RangeType);
binding->register_space = range->RegisterSpace;
binding->register_index = range->BaseShaderRegister;
binding->register_count = 1;
binding->descriptor_table = 0; /* ignored */
binding->descriptor_offset = 0; /* ignored */
binding->shader_visibility = vkd3d_shader_visibility_from_d3d12(p->ShaderVisibility);
binding->flags = VKD3D_SHADER_BINDING_FLAG_BUFFER;
binding->binding.binding = context->vk_binding;
binding->binding.set = context->vk_set;
param = &root_signature->parameters[hoisted_parameter_index];
param->parameter_type = D3D12_ROOT_PARAMETER_TYPE_CBV;
param->descriptor.binding = binding;
context->binding_index += 1;
context->vk_binding += 1;
hoisted_parameter_index += 1;
info->hoist_descriptor_count -= 1;
}
range_descriptor_offset += range->NumDescriptors;
}
}
if (p->ParameterType != D3D12_ROOT_PARAMETER_TYPE_CBV
&& p->ParameterType != D3D12_ROOT_PARAMETER_TYPE_SRV
&& p->ParameterType != D3D12_ROOT_PARAMETER_TYPE_UAV)
@ -1016,7 +1111,7 @@ static HRESULT d3d12_root_signature_init_global(struct d3d12_root_signature *roo
root_signature->static_sampler_count = desc->NumStaticSamplers;
hr = E_OUTOFMEMORY;
root_signature->parameter_count = desc->NumParameters;
root_signature->parameter_count = info.parameter_count;
if (!(root_signature->parameters = vkd3d_calloc(root_signature->parameter_count,
sizeof(*root_signature->parameters))))
return hr;
@ -3946,6 +4041,15 @@ static uint32_t vkd3d_bindless_state_get_bindless_flags(struct d3d12_device *dev
flags |= VKD3D_RAW_VA_ROOT_DESCRIPTOR_CBV;
}
if (device_info->properties2.properties.vendorID == VKD3D_VENDOR_ID_NVIDIA &&
!(flags & VKD3D_RAW_VA_ROOT_DESCRIPTOR_CBV))
{
/* On NVIDIA, it's preferable to hoist CBVs to push descriptors if we can.
* Hoisting is only safe with push descriptors since we need to consider
* robustness as well for STATIC_KEEPING_BUFFER_BOUNDS_CHECKS. */
flags |= VKD3D_HOIST_STATIC_TABLE_CBV;
}
if (vkd3d_bindless_supports_mutable_type(device, flags))
{
INFO("Device supports VK_VALVE_mutable_descriptor_type.\n");

View File

@ -168,6 +168,7 @@ enum vkd3d_config_flags
VKD3D_CONFIG_FLAG_VULKAN_DEBUG = 0x00000001,
VKD3D_CONFIG_FLAG_SKIP_APPLICATION_WORKAROUNDS = 0x00000002,
VKD3D_CONFIG_FLAG_DEBUG_UTILS = 0x00000004,
VKD3D_CONFIG_FLAG_FORCE_STATIC_CBV = 0x00000008,
};
struct vkd3d_instance
@ -1142,6 +1143,20 @@ struct d3d12_bind_point_layout
VkShaderStageFlags vk_push_stages;
};
#define VKD3D_MAX_HOISTED_DESCRIPTORS 16
struct vkd3d_descriptor_hoist_desc
{
uint32_t table_index;
uint32_t table_offset;
uint32_t parameter_index;
};
struct vkd3d_descriptor_hoist_info
{
struct vkd3d_descriptor_hoist_desc desc[VKD3D_MAX_HOISTED_DESCRIPTORS];
unsigned int num_desc;
};
struct d3d12_root_signature
{
ID3D12RootSignature ID3D12RootSignature_iface;
@ -1186,6 +1201,8 @@ struct d3d12_root_signature
unsigned int static_sampler_count;
VkSampler *static_samplers;
struct vkd3d_descriptor_hoist_info hoist_info;
struct d3d12_device *device;
struct vkd3d_private_store private_store;
@ -1526,6 +1543,7 @@ enum vkd3d_pipeline_dirty_flag
{
VKD3D_PIPELINE_DIRTY_STATIC_SAMPLER_SET = 0x00000001u,
VKD3D_PIPELINE_DIRTY_DESCRIPTOR_TABLE_OFFSETS = 0x00000002u,
VKD3D_PIPELINE_DIRTY_HOISTED_DESCRIPTORS = 0x00000004u,
};
union vkd3d_descriptor_info
@ -1744,6 +1762,8 @@ struct d3d12_command_list
LONG *outstanding_submissions_count;
const struct d3d12_desc *cbv_srv_uav_descriptors;
struct vkd3d_private_store private_store;
};
@ -2003,6 +2023,7 @@ enum vkd3d_bindless_flags
VKD3D_RAW_VA_ROOT_DESCRIPTOR_CBV = (1u << 9),
VKD3D_RAW_VA_ROOT_DESCRIPTOR_SRV_UAV = (1u << 10),
VKD3D_BINDLESS_MUTABLE_TYPE = (1u << 11),
VKD3D_HOIST_STATIC_TABLE_CBV = (1u << 12),
};
#define VKD3D_BINDLESS_SET_MAX_EXTRA_BINDINGS 8