diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 71455451..3230fd2d 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -3126,6 +3126,8 @@ static void d3d12_command_list_invalidate_root_parameters(struct d3d12_command_l if (bindings->static_sampler_set) bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_STATIC_SAMPLER_SET; + if (bindings->root_signature->hoist_info.num_desc) + bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_HOISTED_DESCRIPTORS; d3d12_command_list_invalidate_push_constants(bindings); @@ -3878,6 +3880,8 @@ static void d3d12_command_list_reset_state(struct d3d12_command_list *list, list->active_queries_count = 0; list->pending_queries_count = 0; + list->cbv_srv_uav_descriptors = NULL; + ID3D12GraphicsCommandList_SetPipelineState(iface, initial_pipeline_state); } @@ -4494,6 +4498,55 @@ static void d3d12_command_list_update_root_descriptors(struct d3d12_command_list } } +static void d3d12_command_list_update_hoisted_descriptors(struct d3d12_command_list *list, + struct vkd3d_pipeline_bindings *bindings) +{ + const struct d3d12_root_signature *rs = bindings->root_signature; + const struct vkd3d_descriptor_hoist_desc *hoist_desc; + struct vkd3d_root_descriptor_info *root_parameter; + union vkd3d_descriptor_info *info; + const struct d3d12_desc *desc; + unsigned int i; + + /* We don't track dirty table index, just update every hoisted descriptor. + * Uniform buffers tend to be updated all the time anyways, so this should be fine. */ + for (i = 0; i < rs->hoist_info.num_desc; i++) + { + hoist_desc = &rs->hoist_info.desc[i]; + + desc = list->cbv_srv_uav_descriptors; + if (desc) + desc += bindings->descriptor_tables[hoist_desc->table_index] + hoist_desc->table_offset; + + root_parameter = &bindings->root_descriptors[hoist_desc->parameter_index]; + + bindings->root_descriptor_dirty_mask |= 1ull << hoist_desc->parameter_index; + bindings->root_descriptor_active_mask |= 1ull << hoist_desc->parameter_index; + root_parameter->vk_descriptor_type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + info = &root_parameter->info; + + if (desc && (desc->metadata.flags & VKD3D_DESCRIPTOR_FLAG_OFFSET_RANGE)) + { + /* Buffer descriptors must be valid on recording time. */ + info->buffer = desc->info.buffer; + } + else if (list->device->device_info.robustness2_features.nullDescriptor) + { + info->buffer.buffer = VK_NULL_HANDLE; + info->buffer.offset = 0; + info->buffer.range = VK_WHOLE_SIZE; + } + else + { + info->buffer.buffer = list->device->null_resources.vk_buffer; + info->buffer.offset = 0; + info->buffer.range = VKD3D_NULL_BUFFER_SIZE; + } + } + + bindings->dirty_flags &= ~VKD3D_PIPELINE_DIRTY_HOISTED_DESCRIPTORS; +} + static void d3d12_command_list_update_descriptors(struct d3d12_command_list *list, VkPipelineBindPoint bind_point) { @@ -4527,6 +4580,10 @@ static void d3d12_command_list_update_descriptors(struct d3d12_command_list *lis if (bindings->dirty_flags & VKD3D_PIPELINE_DIRTY_STATIC_SAMPLER_SET) d3d12_command_list_update_static_samplers(list, bindings, vk_bind_point, layout); + /* If we can, hoist descriptors from the descriptor heap into fake root parameters. */ + if (bindings->dirty_flags & VKD3D_PIPELINE_DIRTY_HOISTED_DESCRIPTORS) + d3d12_command_list_update_hoisted_descriptors(list, bindings); + if (rs->flags & VKD3D_ROOT_SIGNATURE_USE_INLINE_UNIFORM_BLOCK) { /* Root constants and descriptor table offsets are part of the root descriptor set */ @@ -6353,12 +6410,17 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetDescriptorHeaps(d3d12_comman list->descriptor_heaps[j] = heap->vk_descriptor_sets[set_index++]; dirty_mask |= 1ull << j; } + + /* In case we need to hoist buffer descriptors. */ + if (heap->desc.Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) + list->cbv_srv_uav_descriptors = (const struct d3d12_desc *) heap->descriptors; } for (i = 0; i < ARRAY_SIZE(list->pipeline_bindings); i++) { struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[i]; bindings->descriptor_heap_dirty_mask = dirty_mask; + bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_HOISTED_DESCRIPTORS; } } @@ -6431,6 +6493,8 @@ static void d3d12_command_list_set_descriptor_table(struct d3d12_command_list *l if (root_signature->descriptor_table_count) bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_DESCRIPTOR_TABLE_OFFSETS; + if (root_signature->hoist_info.num_desc) + bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_HOISTED_DESCRIPTORS; } static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootDescriptorTable(d3d12_command_list_iface *iface, diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index c99199d8..d015c568 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -476,6 +476,7 @@ static const struct vkd3d_debug_option vkd3d_config_options[] = {"vk_debug", VKD3D_CONFIG_FLAG_VULKAN_DEBUG}, {"skip_application_workarounds", VKD3D_CONFIG_FLAG_SKIP_APPLICATION_WORKAROUNDS}, {"debug_utils", VKD3D_CONFIG_FLAG_DEBUG_UTILS}, + {"force_static_cbv", VKD3D_CONFIG_FLAG_FORCE_STATIC_CBV}, }; static void vkd3d_config_flags_init_once(void) diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index 9f6ac978..37b08bd7 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -2901,8 +2901,6 @@ static VkDeviceSize vkd3d_get_required_texel_buffer_alignment(const struct d3d12 bool vkd3d_create_raw_r32ui_vk_buffer_view(struct d3d12_device *device, VkBuffer vk_buffer, VkDeviceSize offset, VkDeviceSize range, VkBufferView *vk_view) { - /* Called when we know the Vulkan format implicitly and we don't need to search through - * format descriptions. */ const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; struct VkBufferViewCreateInfo view_desc; VkResult vr; diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index d3166b34..36306c05 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -339,9 +339,11 @@ struct d3d12_root_signature_info { uint32_t binding_count; uint32_t descriptor_count; + uint32_t parameter_count; uint32_t push_descriptor_count; uint32_t root_constant_count; + uint32_t hoist_descriptor_count; bool has_raw_va_aux_buffer; bool has_ssbo_offset_buffer; bool has_typed_offset_buffer; @@ -349,8 +351,30 @@ struct d3d12_root_signature_info uint32_t cost; }; -static HRESULT d3d12_root_signature_info_count_descriptors(struct d3d12_root_signature_info *info, +static bool d3d12_descriptor_range_can_hoist_cbv_descriptor( struct d3d12_device *device, const D3D12_DESCRIPTOR_RANGE1 *range) +{ + /* Cannot/should not hoist arrays. + * We only care about CBVs. SRVs and UAVs are too fiddly + * since they don't necessary map to buffers at all. */ + if (!(device->bindless_state.flags & VKD3D_HOIST_STATIC_TABLE_CBV) || + range->RangeType != D3D12_DESCRIPTOR_RANGE_TYPE_CBV || + range->NumDescriptors != 1) + { + return false; + } + + /* If descriptors are not marked volatile, we are guaranteed that the descriptors are + * set before updating the root table parameter in the command list. + * We can latch the descriptor at draw time. + * As a speed hack, we can pretend that all CBVs have this flag set. + * Basically no applications set this flag, even though they really could. */ + return !(range->Flags & D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE) || + (vkd3d_config_flags & VKD3D_CONFIG_FLAG_FORCE_STATIC_CBV); +} + +static HRESULT d3d12_root_signature_info_count_descriptors(struct d3d12_root_signature_info *info, + struct d3d12_device *device, const D3D12_ROOT_SIGNATURE_DESC1 *desc, const D3D12_DESCRIPTOR_RANGE1 *range) { switch (range->RangeType) { @@ -370,6 +394,13 @@ static HRESULT d3d12_root_signature_info_count_descriptors(struct d3d12_root_sig info->has_typed_offset_buffer = true; break; case D3D12_DESCRIPTOR_RANGE_TYPE_CBV: + if (!(desc->Flags & D3D12_ROOT_SIGNATURE_FLAG_LOCAL_ROOT_SIGNATURE) && + d3d12_descriptor_range_can_hoist_cbv_descriptor(device, range)) + { + info->hoist_descriptor_count += 1; + } + info->binding_count += 1; + break; case D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER: info->binding_count += 1; break; @@ -401,7 +432,7 @@ static HRESULT d3d12_root_signature_info_from_desc(struct d3d12_root_signature_i case D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE: for (j = 0; j < p->DescriptorTable.NumDescriptorRanges; ++j) if (FAILED(hr = d3d12_root_signature_info_count_descriptors(info, - device, &p->DescriptorTable.pDescriptorRanges[j]))) + device, desc, &p->DescriptorTable.pDescriptorRanges[j]))) return hr; /* Local root signature directly affects memory layout. */ @@ -445,7 +476,13 @@ static HRESULT d3d12_root_signature_info_from_desc(struct d3d12_root_signature_i } } + info->hoist_descriptor_count = min(info->hoist_descriptor_count, VKD3D_MAX_HOISTED_DESCRIPTORS); + info->hoist_descriptor_count = min(info->hoist_descriptor_count, D3D12_MAX_ROOT_COST - desc->NumParameters); + + info->push_descriptor_count += info->hoist_descriptor_count; + info->binding_count += info->hoist_descriptor_count; info->binding_count += desc->NumStaticSamplers; + info->parameter_count = desc->NumParameters + info->hoist_descriptor_count; return S_OK; } @@ -763,15 +800,18 @@ static HRESULT d3d12_root_signature_init_shader_record_descriptors( } static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_signature *root_signature, - const D3D12_ROOT_SIGNATURE_DESC1 *desc, const struct d3d12_root_signature_info *info, + const D3D12_ROOT_SIGNATURE_DESC1 *desc, struct d3d12_root_signature_info *info, const VkPushConstantRange *push_constant_range, struct vkd3d_descriptor_set_context *context, VkDescriptorSetLayout *vk_set_layout) { VkDescriptorSetLayoutBinding *vk_binding, *vk_binding_info = NULL; + struct vkd3d_descriptor_hoist_desc *hoist_desc; struct vkd3d_shader_resource_binding *binding; VkDescriptorSetLayoutCreateFlags vk_flags; struct vkd3d_shader_root_parameter *param; - unsigned int i, j; + unsigned int hoisted_parameter_index; + const D3D12_DESCRIPTOR_RANGE1 *range; + unsigned int i, j, k; HRESULT hr = S_OK; if (info->push_descriptor_count || (root_signature->flags & VKD3D_ROOT_SIGNATURE_USE_INLINE_UNIFORM_BLOCK)) @@ -785,11 +825,66 @@ static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_sign return S_OK; } + hoisted_parameter_index = desc->NumParameters; + for (i = 0, j = 0; i < desc->NumParameters; ++i) { const D3D12_ROOT_PARAMETER1 *p = &desc->pParameters[i]; bool raw_va; + if (!(desc->Flags & D3D12_ROOT_SIGNATURE_FLAG_LOCAL_ROOT_SIGNATURE) && + p->ParameterType == D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE) + { + unsigned int range_descriptor_offset = 0; + for (k = 0; k < p->DescriptorTable.NumDescriptorRanges && info->hoist_descriptor_count; k++) + { + range = &p->DescriptorTable.pDescriptorRanges[k]; + if (range->OffsetInDescriptorsFromTableStart != D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND) + range_descriptor_offset = range->OffsetInDescriptorsFromTableStart; + + if (d3d12_descriptor_range_can_hoist_cbv_descriptor(root_signature->device, range)) + { + vk_binding = &vk_binding_info[j++]; + vk_binding->binding = context->vk_binding; + + vk_binding->descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + vk_binding->descriptorCount = 1; + vk_binding->stageFlags = stage_flags_from_visibility(p->ShaderVisibility); + vk_binding->pImmutableSamplers = NULL; + + root_signature->root_descriptor_push_mask |= 1ull << hoisted_parameter_index; + hoist_desc = &root_signature->hoist_info.desc[root_signature->hoist_info.num_desc]; + hoist_desc->table_index = i; + hoist_desc->parameter_index = hoisted_parameter_index; + hoist_desc->table_offset = range_descriptor_offset; + root_signature->hoist_info.num_desc++; + + binding = &root_signature->bindings[context->binding_index]; + binding->type = vkd3d_descriptor_type_from_d3d12_range_type(range->RangeType); + binding->register_space = range->RegisterSpace; + binding->register_index = range->BaseShaderRegister; + binding->register_count = 1; + binding->descriptor_table = 0; /* ignored */ + binding->descriptor_offset = 0; /* ignored */ + binding->shader_visibility = vkd3d_shader_visibility_from_d3d12(p->ShaderVisibility); + binding->flags = VKD3D_SHADER_BINDING_FLAG_BUFFER; + binding->binding.binding = context->vk_binding; + binding->binding.set = context->vk_set; + + param = &root_signature->parameters[hoisted_parameter_index]; + param->parameter_type = D3D12_ROOT_PARAMETER_TYPE_CBV; + param->descriptor.binding = binding; + + context->binding_index += 1; + context->vk_binding += 1; + hoisted_parameter_index += 1; + info->hoist_descriptor_count -= 1; + } + + range_descriptor_offset += range->NumDescriptors; + } + } + if (p->ParameterType != D3D12_ROOT_PARAMETER_TYPE_CBV && p->ParameterType != D3D12_ROOT_PARAMETER_TYPE_SRV && p->ParameterType != D3D12_ROOT_PARAMETER_TYPE_UAV) @@ -1016,7 +1111,7 @@ static HRESULT d3d12_root_signature_init_global(struct d3d12_root_signature *roo root_signature->static_sampler_count = desc->NumStaticSamplers; hr = E_OUTOFMEMORY; - root_signature->parameter_count = desc->NumParameters; + root_signature->parameter_count = info.parameter_count; if (!(root_signature->parameters = vkd3d_calloc(root_signature->parameter_count, sizeof(*root_signature->parameters)))) return hr; @@ -3946,6 +4041,15 @@ static uint32_t vkd3d_bindless_state_get_bindless_flags(struct d3d12_device *dev flags |= VKD3D_RAW_VA_ROOT_DESCRIPTOR_CBV; } + if (device_info->properties2.properties.vendorID == VKD3D_VENDOR_ID_NVIDIA && + !(flags & VKD3D_RAW_VA_ROOT_DESCRIPTOR_CBV)) + { + /* On NVIDIA, it's preferable to hoist CBVs to push descriptors if we can. + * Hoisting is only safe with push descriptors since we need to consider + * robustness as well for STATIC_KEEPING_BUFFER_BOUNDS_CHECKS. */ + flags |= VKD3D_HOIST_STATIC_TABLE_CBV; + } + if (vkd3d_bindless_supports_mutable_type(device, flags)) { INFO("Device supports VK_VALVE_mutable_descriptor_type.\n"); diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index a23fc992..678b6ac6 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -168,6 +168,7 @@ enum vkd3d_config_flags VKD3D_CONFIG_FLAG_VULKAN_DEBUG = 0x00000001, VKD3D_CONFIG_FLAG_SKIP_APPLICATION_WORKAROUNDS = 0x00000002, VKD3D_CONFIG_FLAG_DEBUG_UTILS = 0x00000004, + VKD3D_CONFIG_FLAG_FORCE_STATIC_CBV = 0x00000008, }; struct vkd3d_instance @@ -1142,6 +1143,20 @@ struct d3d12_bind_point_layout VkShaderStageFlags vk_push_stages; }; +#define VKD3D_MAX_HOISTED_DESCRIPTORS 16 +struct vkd3d_descriptor_hoist_desc +{ + uint32_t table_index; + uint32_t table_offset; + uint32_t parameter_index; +}; + +struct vkd3d_descriptor_hoist_info +{ + struct vkd3d_descriptor_hoist_desc desc[VKD3D_MAX_HOISTED_DESCRIPTORS]; + unsigned int num_desc; +}; + struct d3d12_root_signature { ID3D12RootSignature ID3D12RootSignature_iface; @@ -1186,6 +1201,8 @@ struct d3d12_root_signature unsigned int static_sampler_count; VkSampler *static_samplers; + struct vkd3d_descriptor_hoist_info hoist_info; + struct d3d12_device *device; struct vkd3d_private_store private_store; @@ -1526,6 +1543,7 @@ enum vkd3d_pipeline_dirty_flag { VKD3D_PIPELINE_DIRTY_STATIC_SAMPLER_SET = 0x00000001u, VKD3D_PIPELINE_DIRTY_DESCRIPTOR_TABLE_OFFSETS = 0x00000002u, + VKD3D_PIPELINE_DIRTY_HOISTED_DESCRIPTORS = 0x00000004u, }; union vkd3d_descriptor_info @@ -1744,6 +1762,8 @@ struct d3d12_command_list LONG *outstanding_submissions_count; + const struct d3d12_desc *cbv_srv_uav_descriptors; + struct vkd3d_private_store private_store; }; @@ -2003,6 +2023,7 @@ enum vkd3d_bindless_flags VKD3D_RAW_VA_ROOT_DESCRIPTOR_CBV = (1u << 9), VKD3D_RAW_VA_ROOT_DESCRIPTOR_SRV_UAV = (1u << 10), VKD3D_BINDLESS_MUTABLE_TYPE = (1u << 11), + VKD3D_HOIST_STATIC_TABLE_CBV = (1u << 12), }; #define VKD3D_BINDLESS_SET_MAX_EXTRA_BINDINGS 8