From e438c42da089a9aa4ab80e15d22ae27431888513 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Fri, 18 Mar 2022 15:50:00 +0100 Subject: [PATCH 01/19] vkd3d: Unify how we hold on to root signatures in PSO state. Make use of private references to hold on to the root signature object. This is important in situations where we end up compiling pipelines late. With private references like this, there is no longer a need to distinguish a "private_root_signature", so just rename. Signed-off-by: Hans-Kristian Arntzen --- libs/vkd3d/cache.c | 5 +++-- libs/vkd3d/state.c | 38 +++++++++++++++++--------------------- libs/vkd3d/vkd3d_private.h | 3 ++- 3 files changed, 22 insertions(+), 24 deletions(-) diff --git a/libs/vkd3d/cache.c b/libs/vkd3d/cache.c index 7df66ff3..eef6ebd8 100644 --- a/libs/vkd3d/cache.c +++ b/libs/vkd3d/cache.c @@ -1573,10 +1573,11 @@ static HRESULT d3d12_pipeline_library_load_pipeline(struct d3d12_pipeline_librar if (root_signature) pipeline_cache_compat.root_signature_compat_hash = root_signature->compatibility_hash; } - else if (!cached_state->private_root_signature) + else if (cached_state->root_signature_compat_hash_is_dxbc_derived) { /* If we have no explicit root signature and the existing PSO didn't either, - * just inherit the compat hash from PSO to avoid comparing them. */ + * just inherit the compat hash from PSO to avoid comparing them. + * The hash depends entirely on the DXBC blob either way. */ pipeline_cache_compat.root_signature_compat_hash = cached_state->pipeline_cache_compat.root_signature_compat_hash; } diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index 631adf79..70e7142c 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -1904,8 +1904,8 @@ void d3d12_pipeline_state_dec_ref(struct d3d12_pipeline_state *state) VK_CALL(vkDestroyPipelineCache(device->vk_device, state->vk_pso_cache, NULL)); - if (state->private_root_signature) - d3d12_root_signature_dec_ref(state->private_root_signature); + if (state->root_signature) + d3d12_root_signature_dec_ref(state->root_signature); vkd3d_free(state); } @@ -2283,11 +2283,7 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st HRESULT hr; state->vk_bind_point = VK_PIPELINE_BIND_POINT_COMPUTE; - - if (desc->root_signature) - root_signature = impl_from_ID3D12RootSignature(desc->root_signature); - else - root_signature = state->private_root_signature; + root_signature = state->root_signature; shader_interface.flags = d3d12_root_signature_get_shader_interface_flags(root_signature); shader_interface.min_ssbo_alignment = d3d12_device_get_ssbo_alignment(device); @@ -3046,10 +3042,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s } } - if (desc->root_signature) - root_signature = impl_from_ID3D12RootSignature(desc->root_signature); - else - root_signature = state->private_root_signature; + root_signature = state->root_signature; sample_count = vk_samples_from_dxgi_sample_desc(&desc->sample_desc); if (desc->sample_desc.Count != 1 && desc->sample_desc.Quality) @@ -3666,7 +3659,6 @@ HRESULT d3d12_pipeline_state_create(struct d3d12_device *device, VkPipelineBindP const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; const struct d3d12_cached_pipeline_state *desc_cached_pso; struct d3d12_cached_pipeline_state cached_pso; - struct d3d12_root_signature *root_signature; struct d3d12_pipeline_state *object; HRESULT hr; @@ -3678,20 +3670,24 @@ HRESULT d3d12_pipeline_state_create(struct d3d12_device *device, VkPipelineBindP if (!desc->root_signature) { if (FAILED(hr = d3d12_pipeline_create_private_root_signature(device, - bind_point, desc, &object->private_root_signature))) + bind_point, desc, &object->root_signature))) { ERR("No root signature for pipeline.\n"); vkd3d_free(object); return hr; } - root_signature = object->private_root_signature; + object->root_signature_compat_hash_is_dxbc_derived = true; } else - root_signature = impl_from_ID3D12RootSignature(desc->root_signature); + { + object->root_signature = impl_from_ID3D12RootSignature(desc->root_signature); + /* Hold a private reference on this root signature in case we have to create fallback PSOs. */ + d3d12_root_signature_inc_ref(object->root_signature); + } vkd3d_pipeline_cache_compat_from_state_desc(&object->pipeline_cache_compat, desc); - if (root_signature) - object->pipeline_cache_compat.root_signature_compat_hash = root_signature->compatibility_hash; + if (object->root_signature) + object->pipeline_cache_compat.root_signature_compat_hash = object->root_signature->compatibility_hash; desc_cached_pso = &desc->cached_pso; @@ -3700,8 +3696,8 @@ HRESULT d3d12_pipeline_state_create(struct d3d12_device *device, VkPipelineBindP if (FAILED(hr = d3d12_cached_pipeline_state_validate(device, &desc->cached_pso, &object->pipeline_cache_compat))) { - if (object->private_root_signature) - d3d12_root_signature_dec_ref(object->private_root_signature); + if (object->root_signature) + d3d12_root_signature_dec_ref(object->root_signature); vkd3d_free(object); return hr; } @@ -3753,8 +3749,8 @@ HRESULT d3d12_pipeline_state_create(struct d3d12_device *device, VkPipelineBindP if (FAILED(hr)) { - if (object->private_root_signature) - d3d12_root_signature_dec_ref(object->private_root_signature); + if (object->root_signature) + d3d12_root_signature_dec_ref(object->root_signature); d3d12_pipeline_state_free_spirv_code(object); d3d12_pipeline_state_destroy_shader_modules(object, device); VK_CALL(vkDestroyPipelineCache(device->vk_device, object->vk_pso_cache, NULL)); diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index d1618179..00551671 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -1556,8 +1556,9 @@ struct d3d12_pipeline_state spinlock_t lock; struct vkd3d_pipeline_cache_compatibility pipeline_cache_compat; - struct d3d12_root_signature *private_root_signature; + struct d3d12_root_signature *root_signature; struct d3d12_device *device; + bool root_signature_compat_hash_is_dxbc_derived; struct vkd3d_private_store private_store; }; From 1495ead2c4db652bfb9a0623ff33b0851c08c455 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Fri, 18 Mar 2022 16:01:28 +0100 Subject: [PATCH 02/19] vkd3d: Refactor out shader interface struct plumbing. Signed-off-by: Hans-Kristian Arntzen --- libs/vkd3d/state.c | 71 +++++++++++++++++++++------------------------- 1 file changed, 32 insertions(+), 39 deletions(-) diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index 70e7142c..9b5e7bcd 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -2273,34 +2273,42 @@ static HRESULT vkd3d_create_compute_pipeline(struct d3d12_device *device, return S_OK; } +static void d3d12_pipeline_state_init_shader_interface(struct d3d12_pipeline_state *state, + struct d3d12_device *device, + VkShaderStageFlagBits stage, + struct vkd3d_shader_interface_info *shader_interface) +{ + const struct d3d12_root_signature *root_signature = state->root_signature; + shader_interface->flags = d3d12_root_signature_get_shader_interface_flags(root_signature); + shader_interface->min_ssbo_alignment = d3d12_device_get_ssbo_alignment(device); + shader_interface->descriptor_tables.offset = root_signature->descriptor_table_offset; + shader_interface->descriptor_tables.count = root_signature->descriptor_table_count; + shader_interface->bindings = root_signature->bindings; + shader_interface->binding_count = root_signature->binding_count; + shader_interface->push_constant_buffers = root_signature->root_constants; + shader_interface->push_constant_buffer_count = root_signature->root_constant_count; + shader_interface->push_constant_ubo_binding = &root_signature->push_constant_ubo_binding; + shader_interface->offset_buffer_binding = &root_signature->offset_buffer_binding; + shader_interface->stage = stage; + shader_interface->xfb_info = NULL; +#ifdef VKD3D_ENABLE_DESCRIPTOR_QA + shader_interface->descriptor_qa_global_binding = &root_signature->descriptor_qa_global_info; + shader_interface->descriptor_qa_heap_binding = &root_signature->descriptor_qa_heap_binding; +#endif +} + static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *state, struct d3d12_device *device, const struct d3d12_pipeline_state_desc *desc, const struct d3d12_cached_pipeline_state *cached_pso) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; struct vkd3d_shader_interface_info shader_interface; - const struct d3d12_root_signature *root_signature; HRESULT hr; state->vk_bind_point = VK_PIPELINE_BIND_POINT_COMPUTE; - root_signature = state->root_signature; - shader_interface.flags = d3d12_root_signature_get_shader_interface_flags(root_signature); - shader_interface.min_ssbo_alignment = d3d12_device_get_ssbo_alignment(device); - shader_interface.descriptor_tables.offset = root_signature->descriptor_table_offset; - shader_interface.descriptor_tables.count = root_signature->descriptor_table_count; - shader_interface.bindings = root_signature->bindings; - shader_interface.binding_count = root_signature->binding_count; - shader_interface.push_constant_buffers = root_signature->root_constants; - shader_interface.push_constant_buffer_count = root_signature->root_constant_count; - shader_interface.push_constant_ubo_binding = &root_signature->push_constant_ubo_binding; - shader_interface.offset_buffer_binding = &root_signature->offset_buffer_binding; - shader_interface.stage = VK_SHADER_STAGE_COMPUTE_BIT; - shader_interface.xfb_info = NULL; -#ifdef VKD3D_ENABLE_DESCRIPTOR_QA - shader_interface.descriptor_qa_global_binding = &root_signature->descriptor_qa_global_info; - shader_interface.descriptor_qa_heap_binding = &root_signature->descriptor_qa_heap_binding; -#endif + d3d12_pipeline_state_init_shader_interface(state, device, + VK_SHADER_STAGE_COMPUTE_BIT, &shader_interface); if (!(vkd3d_config_flags & VKD3D_CONFIG_FLAG_GLOBAL_PIPELINE_CACHE)) { @@ -2316,7 +2324,7 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st hr = vkd3d_create_compute_pipeline(device, &desc->cs, &shader_interface, - root_signature->compute.vk_pipeline_layout, + state->root_signature->compute.vk_pipeline_layout, state->vk_pso_cache, &state->compute.vk_pipeline, &state->compute.code); @@ -2996,7 +3004,6 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s struct vkd3d_shader_parameter ps_shader_parameters[1]; struct vkd3d_shader_transform_feedback_info xfb_info; struct vkd3d_shader_interface_info shader_interface; - const struct d3d12_root_signature *root_signature; bool have_attachment, can_compile_pipeline_early; struct vkd3d_shader_signature output_signature; struct vkd3d_shader_signature input_signature; @@ -3042,8 +3049,6 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s } } - root_signature = state->root_signature; - sample_count = vk_samples_from_dxgi_sample_desc(&desc->sample_desc); if (desc->sample_desc.Count != 1 && desc->sample_desc.Quality) WARN("Ignoring sample quality %u.\n", desc->sample_desc.Quality); @@ -3215,7 +3220,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s graphics->xfb_enabled = false; if (so_desc->NumEntries) { - if (!(root_signature->d3d12_flags & D3D12_ROOT_SIGNATURE_FLAG_ALLOW_STREAM_OUTPUT)) + if (!(state->root_signature->d3d12_flags & D3D12_ROOT_SIGNATURE_FLAG_ALLOW_STREAM_OUTPUT)) { WARN("Stream output is used without D3D12_ROOT_SIGNATURE_FLAG_ALLOW_STREAM_OUTPUT.\n"); hr = E_INVALIDARG; @@ -3244,20 +3249,8 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s xfb_stage = VK_SHADER_STAGE_VERTEX_BIT; } - shader_interface.flags = d3d12_root_signature_get_shader_interface_flags(root_signature); - shader_interface.min_ssbo_alignment = d3d12_device_get_ssbo_alignment(device); - shader_interface.descriptor_tables.offset = root_signature->descriptor_table_offset; - shader_interface.descriptor_tables.count = root_signature->descriptor_table_count; - shader_interface.bindings = root_signature->bindings; - shader_interface.binding_count = root_signature->binding_count; - shader_interface.push_constant_buffers = root_signature->root_constants; - shader_interface.push_constant_buffer_count = root_signature->root_constant_count; - shader_interface.push_constant_ubo_binding = &root_signature->push_constant_ubo_binding; - shader_interface.offset_buffer_binding = &root_signature->offset_buffer_binding; -#ifdef VKD3D_ENABLE_DESCRIPTOR_QA - shader_interface.descriptor_qa_global_binding = &root_signature->descriptor_qa_global_info; - shader_interface.descriptor_qa_heap_binding = &root_signature->descriptor_qa_heap_binding; -#endif + /* Stage / XFB info are overridden later. */ + d3d12_pipeline_state_init_shader_interface(state, device, VK_SHADER_STAGE_ALL, &shader_interface); graphics->patch_vertex_count = 0; @@ -3381,7 +3374,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s } if (graphics->attribute_count - && !(root_signature->d3d12_flags & D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT)) + && !(state->root_signature->d3d12_flags & D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT)) { WARN("Input layout is used without D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT.\n"); hr = E_INVALIDARG; @@ -3575,7 +3568,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s (desc->primitive_topology_type != D3D12_PRIMITIVE_TOPOLOGY_TYPE_PATCH || graphics->patch_vertex_count != 0) && desc->primitive_topology_type != D3D12_PRIMITIVE_TOPOLOGY_TYPE_UNDEFINED; - graphics->pipeline_layout = root_signature->graphics.vk_pipeline_layout; + graphics->pipeline_layout = state->root_signature->graphics.vk_pipeline_layout; graphics->pipeline = VK_NULL_HANDLE; state->device = device; From b387def67c642a390e9706167e85bb8ade60e118 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Fri, 18 Mar 2022 16:26:36 +0100 Subject: [PATCH 03/19] vkd3d: Refactor how we set compiler options. Signed-off-by: Hans-Kristian Arntzen --- libs/vkd3d/state.c | 73 +++++++++++++++++++++----------------- libs/vkd3d/vkd3d_private.h | 10 ++++++ 2 files changed, 50 insertions(+), 33 deletions(-) diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index 9b5e7bcd..1738a588 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -2200,7 +2200,29 @@ static void vkd3d_report_pipeline_creation_feedback_results(const VkPipelineCrea } } -static HRESULT vkd3d_create_compute_pipeline(struct d3d12_device *device, +static void d3d12_pipeline_state_init_compile_arguments(struct d3d12_pipeline_state *state, + struct d3d12_device *device, VkShaderStageFlagBits stage, + struct vkd3d_shader_compile_arguments *compile_arguments) +{ + memset(compile_arguments, 0, sizeof(*compile_arguments)); + compile_arguments->target = VKD3D_SHADER_TARGET_SPIRV_VULKAN_1_0; + compile_arguments->target_extension_count = device->vk_info.shader_extension_count; + compile_arguments->target_extensions = device->vk_info.shader_extensions; + compile_arguments->quirks = &vkd3d_shader_quirk_info; + + if (stage == VK_SHADER_STAGE_FRAGMENT_BIT) + { + /* Options which are exclusive to PS. Especially output swizzles must only be used in PS. */ + compile_arguments->parameter_count = ARRAY_SIZE(state->graphics.cached_desc.ps_shader_parameters); + compile_arguments->parameters = state->graphics.cached_desc.ps_shader_parameters; + compile_arguments->dual_source_blending = state->graphics.cached_desc.is_dual_source_blending; + compile_arguments->output_swizzles = state->graphics.cached_desc.ps_output_swizzle; + compile_arguments->output_swizzle_count = state->graphics.rt_count; + } +} + +static HRESULT vkd3d_create_compute_pipeline(struct d3d12_pipeline_state *state, + struct d3d12_device *device, const D3D12_SHADER_BYTECODE *code, const struct vkd3d_shader_interface_info *shader_interface, VkPipelineLayout vk_pipeline_layout, VkPipelineCache vk_cache, VkPipeline *vk_pipeline, @@ -2217,11 +2239,7 @@ static HRESULT vkd3d_create_compute_pipeline(struct d3d12_device *device, VkResult vr; HRESULT hr; - memset(&compile_args, 0, sizeof(compile_args)); - compile_args.target_extensions = device->vk_info.shader_extensions; - compile_args.target_extension_count = device->vk_info.shader_extension_count; - compile_args.target = VKD3D_SHADER_TARGET_SPIRV_VULKAN_1_0; - compile_args.quirks = &vkd3d_shader_quirk_info; + d3d12_pipeline_state_init_compile_arguments(state, device, VK_SHADER_STAGE_COMPUTE_BIT, &compile_args); pipeline_info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; pipeline_info.pNext = NULL; @@ -2322,7 +2340,7 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st vkd3d_load_spirv_from_cached_state(device, cached_pso, VK_SHADER_STAGE_COMPUTE_BIT, &state->compute.code); - hr = vkd3d_create_compute_pipeline(device, + hr = vkd3d_create_compute_pipeline(state, device, &desc->cs, &shader_interface, state->root_signature->compute.vk_pipeline_layout, state->vk_pso_cache, @@ -2993,15 +3011,12 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s const struct d3d12_cached_pipeline_state *cached_pso) { const VkPhysicalDeviceFeatures *features = &device->device_info.features2.features; - unsigned int ps_output_swizzle[D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT]; - struct vkd3d_shader_compile_arguments compile_args, ps_compile_args; struct d3d12_graphics_pipeline_state *graphics = &state->graphics; const D3D12_STREAM_OUTPUT_DESC *so_desc = &desc->stream_output; VkVertexInputBindingDivisorDescriptionEXT *binding_divisor; const struct vkd3d_vulkan_info *vk_info = &device->vk_info; uint32_t instance_divisors[D3D12_VS_INPUT_REGISTER_COUNT]; uint32_t aligned_offsets[D3D12_VS_INPUT_REGISTER_COUNT]; - struct vkd3d_shader_parameter ps_shader_parameters[1]; struct vkd3d_shader_transform_feedback_info xfb_info; struct vkd3d_shader_interface_info shader_interface; bool have_attachment, can_compile_pipeline_early; @@ -3060,6 +3075,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s rt_count, ARRAY_SIZE(graphics->blend_attachments)); rt_count = ARRAY_SIZE(graphics->blend_attachments); } + graphics->rt_count = rt_count; if (!desc->ps.pShaderBytecode || !desc->ps.BytecodeLength) { @@ -3078,12 +3094,12 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s if (desc->rtv_formats.RTFormats[i] == DXGI_FORMAT_UNKNOWN) { graphics->null_attachment_mask |= 1u << i; - ps_output_swizzle[i] = VKD3D_NO_SWIZZLE; + graphics->cached_desc.ps_output_swizzle[i] = VKD3D_NO_SWIZZLE; graphics->rtv_formats[i] = VK_FORMAT_UNDEFINED; } else if ((format = vkd3d_get_format(device, desc->rtv_formats.RTFormats[i], false))) { - ps_output_swizzle[i] = vkd3d_get_rt_format_swizzle(format); + graphics->cached_desc.ps_output_swizzle[i] = vkd3d_get_rt_format_swizzle(format); graphics->rtv_formats[i] = format->vk_format; graphics->rtv_active_mask |= 1u << i; } @@ -3116,7 +3132,6 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s for (i = rt_count; i < ARRAY_SIZE(graphics->rtv_formats); ++i) graphics->rtv_formats[i] = VK_FORMAT_UNDEFINED; - graphics->rt_count = rt_count; blend_desc_from_d3d12(&graphics->blend_desc, &desc->blend_state, graphics->rt_count, graphics->blend_attachments); @@ -3187,26 +3202,13 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s } } - ps_shader_parameters[0].name = VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT; - ps_shader_parameters[0].type = VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT; - ps_shader_parameters[0].data_type = VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32; - ps_shader_parameters[0].immediate_constant.u32 = sample_count; + graphics->cached_desc.ps_shader_parameters[0].name = VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT; + graphics->cached_desc.ps_shader_parameters[0].type = VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT; + graphics->cached_desc.ps_shader_parameters[0].data_type = VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32; + graphics->cached_desc.ps_shader_parameters[0].immediate_constant.u32 = sample_count; + graphics->cached_desc.is_dual_source_blending = is_dual_source_blending(&desc->blend_state.RenderTarget[0]); - memset(&compile_args, 0, sizeof(compile_args)); - compile_args.target = VKD3D_SHADER_TARGET_SPIRV_VULKAN_1_0; - compile_args.target_extension_count = vk_info->shader_extension_count; - compile_args.target_extensions = vk_info->shader_extensions; - compile_args.quirks = &vkd3d_shader_quirk_info; - - /* Options which are exclusive to PS. Especially output swizzles must only be used in PS. */ - ps_compile_args = compile_args; - ps_compile_args.parameter_count = ARRAY_SIZE(ps_shader_parameters); - ps_compile_args.parameters = ps_shader_parameters; - ps_compile_args.dual_source_blending = is_dual_source_blending(&desc->blend_state.RenderTarget[0]); - ps_compile_args.output_swizzles = ps_output_swizzle; - ps_compile_args.output_swizzle_count = rt_count; - - if (ps_compile_args.dual_source_blending) + if (graphics->cached_desc.is_dual_source_blending) { /* If we're using dual source blending, we can only safely write to MRT 0. * Be defensive about programs which do not do this for us. */ @@ -3338,15 +3340,20 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s for (i = 0; i < ARRAY_SIZE(shader_stages); i++) { const D3D12_SHADER_BYTECODE *b = (const void *)((uintptr_t)desc + shader_stages[i].offset); + struct vkd3d_shader_compile_arguments compile_args; + if (!b->pShaderBytecode) continue; shader_interface.xfb_info = shader_stages[i].stage == xfb_stage ? &xfb_info : NULL; shader_interface.stage = shader_stages[i].stage; + + d3d12_pipeline_state_init_compile_arguments(state, device, shader_interface.stage, &compile_args); + if (FAILED(hr = vkd3d_create_shader_stage(device, &graphics->stages[stage_count], shader_stages[i].stage, NULL, b, &shader_interface, - shader_stages[i].stage == VK_SHADER_STAGE_FRAGMENT_BIT ? &ps_compile_args : &compile_args, + &compile_args, &graphics->code[stage_count]))) goto fail; diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 00551671..6725fb88 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -1471,6 +1471,14 @@ enum vkd3d_plane_optimal_flag VKD3D_DEPTH_STENCIL_PLANE_GENERAL = (1 << 2), }; +struct d3d12_graphics_pipeline_state_cached_desc +{ + /* Information needed to compile to SPIR-V. */ + unsigned int ps_output_swizzle[D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT]; + struct vkd3d_shader_parameter ps_shader_parameters[1]; + bool is_dual_source_blending; +}; + struct d3d12_graphics_pipeline_state { struct vkd3d_shader_debug_ring_spec_info spec_info[VKD3D_MAX_SHADER_STAGES]; @@ -1478,6 +1486,8 @@ struct d3d12_graphics_pipeline_state struct vkd3d_shader_code code[VKD3D_MAX_SHADER_STAGES]; size_t stage_count; + struct d3d12_graphics_pipeline_state_cached_desc cached_desc; + VkVertexInputAttributeDescription attributes[D3D12_VS_INPUT_REGISTER_COUNT]; VkVertexInputRate input_rates[D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT]; VkVertexInputBindingDivisorDescriptionEXT instance_divisors[D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT]; From f816eeb60e58f73f0082e71a2f13bc862f0aa44b Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Fri, 18 Mar 2022 16:33:20 +0100 Subject: [PATCH 04/19] vkd3d: Ensure shader interface is set up per vkd3d_create_shader_stage. Prepares for a situation where we can move this code into vkd3d_create_shader_stage itself. Signed-off-by: Hans-Kristian Arntzen --- libs/vkd3d/state.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index 1738a588..adb99e6f 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -3018,7 +3018,6 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s uint32_t instance_divisors[D3D12_VS_INPUT_REGISTER_COUNT]; uint32_t aligned_offsets[D3D12_VS_INPUT_REGISTER_COUNT]; struct vkd3d_shader_transform_feedback_info xfb_info; - struct vkd3d_shader_interface_info shader_interface; bool have_attachment, can_compile_pipeline_early; struct vkd3d_shader_signature output_signature; struct vkd3d_shader_signature input_signature; @@ -3251,9 +3250,6 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s xfb_stage = VK_SHADER_STAGE_VERTEX_BIT; } - /* Stage / XFB info are overridden later. */ - d3d12_pipeline_state_init_shader_interface(state, device, VK_SHADER_STAGE_ALL, &shader_interface); - graphics->patch_vertex_count = 0; for (i = 0; i < ARRAY_SIZE(shader_stages); ++i) @@ -3340,14 +3336,15 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s for (i = 0; i < ARRAY_SIZE(shader_stages); i++) { const D3D12_SHADER_BYTECODE *b = (const void *)((uintptr_t)desc + shader_stages[i].offset); + struct vkd3d_shader_interface_info shader_interface; struct vkd3d_shader_compile_arguments compile_args; if (!b->pShaderBytecode) continue; + /* TODO: Move this to vkd3d_create_shader_stage itself. */ + d3d12_pipeline_state_init_shader_interface(state, device, shader_stages[i].stage, &shader_interface); shader_interface.xfb_info = shader_stages[i].stage == xfb_stage ? &xfb_info : NULL; - shader_interface.stage = shader_stages[i].stage; - d3d12_pipeline_state_init_compile_arguments(state, device, shader_interface.stage, &compile_args); if (FAILED(hr = vkd3d_create_shader_stage(device, From dc45142b9324cad1d635ea34adff96265bdc414f Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Fri, 18 Mar 2022 16:54:42 +0100 Subject: [PATCH 05/19] vkd3d: Refactor out how XFB info is stored. For deferred compilation, we need to dupe the structs. XFB is kinda rare, so it's okay to eat allocations here. Signed-off-by: Hans-Kristian Arntzen --- libs/vkd3d/state.c | 82 +++++++++++++++++++++++++++++++++----- libs/vkd3d/vkd3d_private.h | 2 + 2 files changed, 74 insertions(+), 10 deletions(-) diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index adb99e6f..5012077c 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -1886,6 +1886,63 @@ static void d3d12_pipeline_state_set_name(struct d3d12_pipeline_state *state, co } } +static void vkd3d_shader_transform_feedback_info_free(struct vkd3d_shader_transform_feedback_info *xfb_info) +{ + unsigned int i; + + if (!xfb_info) + return; + + for (i = 0; i < xfb_info->element_count; i++) + vkd3d_free((void*)xfb_info->elements[i].semantic_name); + vkd3d_free((void*)xfb_info->elements); + vkd3d_free((void*)xfb_info->buffer_strides); + vkd3d_free(xfb_info); +} + +static struct vkd3d_shader_transform_feedback_info *vkd3d_shader_transform_feedback_info_dup( + const D3D12_STREAM_OUTPUT_DESC *so_desc) +{ + struct vkd3d_shader_transform_feedback_element *new_entries = NULL; + struct vkd3d_shader_transform_feedback_info *xfb_info; + unsigned int *new_buffer_strides = NULL; + unsigned int num_duped = 0; + unsigned int i; + + xfb_info = vkd3d_calloc(1, sizeof(*xfb_info)); + if (!xfb_info) + return NULL; + + new_buffer_strides = malloc(so_desc->NumStrides * sizeof(*new_buffer_strides)); + if (!new_buffer_strides) + goto fail; + memcpy(new_buffer_strides, so_desc->pBufferStrides, so_desc->NumStrides * sizeof(*new_buffer_strides)); + xfb_info->buffer_strides = new_buffer_strides; + + new_entries = malloc(so_desc->NumEntries * sizeof(*new_entries)); + if (!new_entries) + goto fail; + memcpy(new_entries, so_desc->pSODeclaration, so_desc->NumEntries * sizeof(*new_entries)); + xfb_info->elements = new_entries; + + for (i = 0; i < so_desc->NumEntries; i++, num_duped++) + if (!(new_entries[i].semantic_name = vkd3d_strdup(new_entries[i].semantic_name))) + goto fail; + + xfb_info->buffer_stride_count = so_desc->NumStrides; + xfb_info->element_count = so_desc->NumEntries; + + return xfb_info; + +fail: + for (i = 0; i < num_duped; i++) + vkd3d_free((void*)new_entries[i].semantic_name); + vkd3d_free(new_buffer_strides); + vkd3d_free(new_entries); + vkd3d_free(xfb_info); + return NULL; +} + void d3d12_pipeline_state_dec_ref(struct d3d12_pipeline_state *state) { struct d3d12_device *device = state->device; @@ -1907,6 +1964,8 @@ void d3d12_pipeline_state_dec_ref(struct d3d12_pipeline_state *state) if (state->root_signature) d3d12_root_signature_dec_ref(state->root_signature); + if (state->vk_bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) + vkd3d_shader_transform_feedback_info_free(state->graphics.cached_desc.xfb_info); vkd3d_free(state); } } @@ -3017,11 +3076,9 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s const struct vkd3d_vulkan_info *vk_info = &device->vk_info; uint32_t instance_divisors[D3D12_VS_INPUT_REGISTER_COUNT]; uint32_t aligned_offsets[D3D12_VS_INPUT_REGISTER_COUNT]; - struct vkd3d_shader_transform_feedback_info xfb_info; bool have_attachment, can_compile_pipeline_early; struct vkd3d_shader_signature output_signature; struct vkd3d_shader_signature input_signature; - VkShaderStageFlagBits xfb_stage = 0; VkSampleCountFlagBits sample_count; const struct vkd3d_format *format; unsigned int i, j, stage_count; @@ -3236,18 +3293,20 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s } graphics->xfb_enabled = true; + graphics->cached_desc.xfb_info = vkd3d_shader_transform_feedback_info_dup(so_desc); - xfb_info.elements = (const struct vkd3d_shader_transform_feedback_element *)so_desc->pSODeclaration; - xfb_info.element_count = so_desc->NumEntries; - xfb_info.buffer_strides = so_desc->pBufferStrides; - xfb_info.buffer_stride_count = so_desc->NumStrides; + if (!graphics->cached_desc.xfb_info) + { + hr = E_OUTOFMEMORY; + goto fail; + } if (desc->gs.pShaderBytecode) - xfb_stage = VK_SHADER_STAGE_GEOMETRY_BIT; + graphics->cached_desc.xfb_stage = VK_SHADER_STAGE_GEOMETRY_BIT; else if (desc->ds.pShaderBytecode) - xfb_stage = VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; + graphics->cached_desc.xfb_stage = VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; else - xfb_stage = VK_SHADER_STAGE_VERTEX_BIT; + graphics->cached_desc.xfb_stage = VK_SHADER_STAGE_VERTEX_BIT; } graphics->patch_vertex_count = 0; @@ -3344,7 +3403,8 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s /* TODO: Move this to vkd3d_create_shader_stage itself. */ d3d12_pipeline_state_init_shader_interface(state, device, shader_stages[i].stage, &shader_interface); - shader_interface.xfb_info = shader_stages[i].stage == xfb_stage ? &xfb_info : NULL; + shader_interface.xfb_info = shader_stages[i].stage == graphics->cached_desc.xfb_stage ? + graphics->cached_desc.xfb_info : NULL; d3d12_pipeline_state_init_compile_arguments(state, device, shader_interface.stage, &compile_args); if (FAILED(hr = vkd3d_create_shader_stage(device, @@ -3750,6 +3810,8 @@ HRESULT d3d12_pipeline_state_create(struct d3d12_device *device, VkPipelineBindP d3d12_root_signature_dec_ref(object->root_signature); d3d12_pipeline_state_free_spirv_code(object); d3d12_pipeline_state_destroy_shader_modules(object, device); + if (object->vk_bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) + vkd3d_shader_transform_feedback_info_free(object->graphics.cached_desc.xfb_info); VK_CALL(vkDestroyPipelineCache(device->vk_device, object->vk_pso_cache, NULL)); vkd3d_free(object); diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 6725fb88..1b556450 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -1477,6 +1477,8 @@ struct d3d12_graphics_pipeline_state_cached_desc unsigned int ps_output_swizzle[D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT]; struct vkd3d_shader_parameter ps_shader_parameters[1]; bool is_dual_source_blending; + VkShaderStageFlagBits xfb_stage; + struct vkd3d_shader_transform_feedback_info *xfb_info; }; struct d3d12_graphics_pipeline_state From 73fa8b9588debab2460cfefe42e93f84a28c07a2 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Fri, 18 Mar 2022 17:05:43 +0100 Subject: [PATCH 06/19] vkd3d: Sink shader interface struct build to where we need it. Signed-off-by: Hans-Kristian Arntzen --- libs/vkd3d/state.c | 129 +++++++++++++++++++++------------------------ 1 file changed, 61 insertions(+), 68 deletions(-) diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index 5012077c..03a74ff0 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -2121,14 +2121,61 @@ static HRESULT vkd3d_load_spirv_from_cached_state(struct d3d12_device *device, return hr; } -static HRESULT vkd3d_create_shader_stage(struct d3d12_device *device, +static void d3d12_pipeline_state_init_shader_interface(struct d3d12_pipeline_state *state, + struct d3d12_device *device, + VkShaderStageFlagBits stage, + struct vkd3d_shader_interface_info *shader_interface) +{ + const struct d3d12_root_signature *root_signature = state->root_signature; + shader_interface->flags = d3d12_root_signature_get_shader_interface_flags(root_signature); + shader_interface->min_ssbo_alignment = d3d12_device_get_ssbo_alignment(device); + shader_interface->descriptor_tables.offset = root_signature->descriptor_table_offset; + shader_interface->descriptor_tables.count = root_signature->descriptor_table_count; + shader_interface->bindings = root_signature->bindings; + shader_interface->binding_count = root_signature->binding_count; + shader_interface->push_constant_buffers = root_signature->root_constants; + shader_interface->push_constant_buffer_count = root_signature->root_constant_count; + shader_interface->push_constant_ubo_binding = &root_signature->push_constant_ubo_binding; + shader_interface->offset_buffer_binding = &root_signature->offset_buffer_binding; + shader_interface->stage = stage; + shader_interface->xfb_info = + (stage != VK_SHADER_STAGE_COMPUTE_BIT && stage == state->graphics.cached_desc.xfb_stage) ? + state->graphics.cached_desc.xfb_info : NULL; +#ifdef VKD3D_ENABLE_DESCRIPTOR_QA + shader_interface->descriptor_qa_global_binding = &root_signature->descriptor_qa_global_info; + shader_interface->descriptor_qa_heap_binding = &root_signature->descriptor_qa_heap_binding; +#endif +} + +static void d3d12_pipeline_state_init_compile_arguments(struct d3d12_pipeline_state *state, + struct d3d12_device *device, VkShaderStageFlagBits stage, + struct vkd3d_shader_compile_arguments *compile_arguments) +{ + memset(compile_arguments, 0, sizeof(*compile_arguments)); + compile_arguments->target = VKD3D_SHADER_TARGET_SPIRV_VULKAN_1_0; + compile_arguments->target_extension_count = device->vk_info.shader_extension_count; + compile_arguments->target_extensions = device->vk_info.shader_extensions; + compile_arguments->quirks = &vkd3d_shader_quirk_info; + + if (stage == VK_SHADER_STAGE_FRAGMENT_BIT) + { + /* Options which are exclusive to PS. Especially output swizzles must only be used in PS. */ + compile_arguments->parameter_count = ARRAY_SIZE(state->graphics.cached_desc.ps_shader_parameters); + compile_arguments->parameters = state->graphics.cached_desc.ps_shader_parameters; + compile_arguments->dual_source_blending = state->graphics.cached_desc.is_dual_source_blending; + compile_arguments->output_swizzles = state->graphics.cached_desc.ps_output_swizzle; + compile_arguments->output_swizzle_count = state->graphics.rt_count; + } +} + +static HRESULT vkd3d_create_shader_stage(struct d3d12_pipeline_state *state, struct d3d12_device *device, VkPipelineShaderStageCreateInfo *stage_desc, VkShaderStageFlagBits stage, VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *required_subgroup_size_info, - const D3D12_SHADER_BYTECODE *code, - const struct vkd3d_shader_interface_info *shader_interface, - const struct vkd3d_shader_compile_arguments *compile_args, struct vkd3d_shader_code *spirv_code) + const D3D12_SHADER_BYTECODE *code, struct vkd3d_shader_code *spirv_code) { struct vkd3d_shader_code dxbc = {code->pShaderBytecode, code->BytecodeLength}; + struct vkd3d_shader_interface_info shader_interface; + struct vkd3d_shader_compile_arguments compile_args; vkd3d_shader_hash_t recovered_hash = 0; vkd3d_shader_hash_t compiled_hash = 0; int ret; @@ -2150,7 +2197,11 @@ static HRESULT vkd3d_create_shader_stage(struct d3d12_device *device, if (!spirv_code->code) { TRACE("Calling vkd3d_shader_compile_dxbc.\n"); - if ((ret = vkd3d_shader_compile_dxbc(&dxbc, spirv_code, 0, shader_interface, compile_args)) < 0) + + d3d12_pipeline_state_init_shader_interface(state, device, stage, &shader_interface); + d3d12_pipeline_state_init_compile_arguments(state, device, stage, &compile_args); + + if ((ret = vkd3d_shader_compile_dxbc(&dxbc, spirv_code, 0, &shader_interface, &compile_args)) < 0) { WARN("Failed to compile shader, vkd3d result %d.\n", ret); return hresult_from_vkd3d_result(ret); @@ -2259,31 +2310,9 @@ static void vkd3d_report_pipeline_creation_feedback_results(const VkPipelineCrea } } -static void d3d12_pipeline_state_init_compile_arguments(struct d3d12_pipeline_state *state, - struct d3d12_device *device, VkShaderStageFlagBits stage, - struct vkd3d_shader_compile_arguments *compile_arguments) -{ - memset(compile_arguments, 0, sizeof(*compile_arguments)); - compile_arguments->target = VKD3D_SHADER_TARGET_SPIRV_VULKAN_1_0; - compile_arguments->target_extension_count = device->vk_info.shader_extension_count; - compile_arguments->target_extensions = device->vk_info.shader_extensions; - compile_arguments->quirks = &vkd3d_shader_quirk_info; - - if (stage == VK_SHADER_STAGE_FRAGMENT_BIT) - { - /* Options which are exclusive to PS. Especially output swizzles must only be used in PS. */ - compile_arguments->parameter_count = ARRAY_SIZE(state->graphics.cached_desc.ps_shader_parameters); - compile_arguments->parameters = state->graphics.cached_desc.ps_shader_parameters; - compile_arguments->dual_source_blending = state->graphics.cached_desc.is_dual_source_blending; - compile_arguments->output_swizzles = state->graphics.cached_desc.ps_output_swizzle; - compile_arguments->output_swizzle_count = state->graphics.rt_count; - } -} - static HRESULT vkd3d_create_compute_pipeline(struct d3d12_pipeline_state *state, struct d3d12_device *device, const D3D12_SHADER_BYTECODE *code, - const struct vkd3d_shader_interface_info *shader_interface, VkPipelineLayout vk_pipeline_layout, VkPipelineCache vk_cache, VkPipeline *vk_pipeline, struct vkd3d_shader_code *spirv_code) { @@ -2291,22 +2320,19 @@ static HRESULT vkd3d_create_compute_pipeline(struct d3d12_pipeline_state *state, const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; VkPipelineCreationFeedbackCreateInfoEXT feedback_info; struct vkd3d_shader_debug_ring_spec_info spec_info; - struct vkd3d_shader_compile_arguments compile_args; VkPipelineCreationFeedbackEXT feedbacks[1]; VkComputePipelineCreateInfo pipeline_info; VkPipelineCreationFeedbackEXT feedback; VkResult vr; HRESULT hr; - d3d12_pipeline_state_init_compile_arguments(state, device, VK_SHADER_STAGE_COMPUTE_BIT, &compile_args); - pipeline_info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; pipeline_info.pNext = NULL; pipeline_info.flags = 0; - if (FAILED(hr = vkd3d_create_shader_stage(device, + if (FAILED(hr = vkd3d_create_shader_stage(state, device, &pipeline_info.stage, VK_SHADER_STAGE_COMPUTE_BIT, &required_subgroup_size_info, - code, shader_interface, &compile_args, spirv_code))) + code, spirv_code))) return hr; pipeline_info.layout = vk_pipeline_layout; pipeline_info.basePipelineHandle = VK_NULL_HANDLE; @@ -2350,30 +2376,6 @@ static HRESULT vkd3d_create_compute_pipeline(struct d3d12_pipeline_state *state, return S_OK; } -static void d3d12_pipeline_state_init_shader_interface(struct d3d12_pipeline_state *state, - struct d3d12_device *device, - VkShaderStageFlagBits stage, - struct vkd3d_shader_interface_info *shader_interface) -{ - const struct d3d12_root_signature *root_signature = state->root_signature; - shader_interface->flags = d3d12_root_signature_get_shader_interface_flags(root_signature); - shader_interface->min_ssbo_alignment = d3d12_device_get_ssbo_alignment(device); - shader_interface->descriptor_tables.offset = root_signature->descriptor_table_offset; - shader_interface->descriptor_tables.count = root_signature->descriptor_table_count; - shader_interface->bindings = root_signature->bindings; - shader_interface->binding_count = root_signature->binding_count; - shader_interface->push_constant_buffers = root_signature->root_constants; - shader_interface->push_constant_buffer_count = root_signature->root_constant_count; - shader_interface->push_constant_ubo_binding = &root_signature->push_constant_ubo_binding; - shader_interface->offset_buffer_binding = &root_signature->offset_buffer_binding; - shader_interface->stage = stage; - shader_interface->xfb_info = NULL; -#ifdef VKD3D_ENABLE_DESCRIPTOR_QA - shader_interface->descriptor_qa_global_binding = &root_signature->descriptor_qa_global_info; - shader_interface->descriptor_qa_heap_binding = &root_signature->descriptor_qa_heap_binding; -#endif -} - static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *state, struct d3d12_device *device, const struct d3d12_pipeline_state_desc *desc, const struct d3d12_cached_pipeline_state *cached_pso) @@ -2400,7 +2402,7 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st VK_SHADER_STAGE_COMPUTE_BIT, &state->compute.code); hr = vkd3d_create_compute_pipeline(state, device, - &desc->cs, &shader_interface, + &desc->cs, state->root_signature->compute.vk_pipeline_layout, state->vk_pso_cache, &state->compute.vk_pipeline, @@ -3395,22 +3397,13 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s for (i = 0; i < ARRAY_SIZE(shader_stages); i++) { const D3D12_SHADER_BYTECODE *b = (const void *)((uintptr_t)desc + shader_stages[i].offset); - struct vkd3d_shader_interface_info shader_interface; - struct vkd3d_shader_compile_arguments compile_args; if (!b->pShaderBytecode) continue; - /* TODO: Move this to vkd3d_create_shader_stage itself. */ - d3d12_pipeline_state_init_shader_interface(state, device, shader_stages[i].stage, &shader_interface); - shader_interface.xfb_info = shader_stages[i].stage == graphics->cached_desc.xfb_stage ? - graphics->cached_desc.xfb_info : NULL; - d3d12_pipeline_state_init_compile_arguments(state, device, shader_interface.stage, &compile_args); - - if (FAILED(hr = vkd3d_create_shader_stage(device, + if (FAILED(hr = vkd3d_create_shader_stage(state, device, &graphics->stages[stage_count], - shader_stages[i].stage, NULL, b, &shader_interface, - &compile_args, + shader_stages[i].stage, NULL, b, &graphics->code[stage_count]))) goto fail; From a098cce48a3494c2f08df15ed2aa04cdf8da5801 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Fri, 18 Mar 2022 17:20:10 +0100 Subject: [PATCH 07/19] vkd3d: Streamline vkd3d_create_compute_pipeline. Signed-off-by: Hans-Kristian Arntzen --- libs/vkd3d/state.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index 03a74ff0..36724316 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -2312,9 +2312,7 @@ static void vkd3d_report_pipeline_creation_feedback_results(const VkPipelineCrea static HRESULT vkd3d_create_compute_pipeline(struct d3d12_pipeline_state *state, struct d3d12_device *device, - const D3D12_SHADER_BYTECODE *code, - VkPipelineLayout vk_pipeline_layout, VkPipelineCache vk_cache, VkPipeline *vk_pipeline, - struct vkd3d_shader_code *spirv_code) + const D3D12_SHADER_BYTECODE *code) { VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT required_subgroup_size_info; const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; @@ -2323,9 +2321,14 @@ static HRESULT vkd3d_create_compute_pipeline(struct d3d12_pipeline_state *state, VkPipelineCreationFeedbackEXT feedbacks[1]; VkComputePipelineCreateInfo pipeline_info; VkPipelineCreationFeedbackEXT feedback; + struct vkd3d_shader_code *spirv_code; + VkPipelineCache vk_cache; VkResult vr; HRESULT hr; + vk_cache = state->vk_pso_cache; + spirv_code = &state->compute.code; + pipeline_info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; pipeline_info.pNext = NULL; pipeline_info.flags = 0; @@ -2334,7 +2337,7 @@ static HRESULT vkd3d_create_compute_pipeline(struct d3d12_pipeline_state *state, VK_SHADER_STAGE_COMPUTE_BIT, &required_subgroup_size_info, code, spirv_code))) return hr; - pipeline_info.layout = vk_pipeline_layout; + pipeline_info.layout = state->root_signature->compute.vk_pipeline_layout; pipeline_info.basePipelineHandle = VK_NULL_HANDLE; pipeline_info.basePipelineIndex = -1; @@ -2360,7 +2363,7 @@ static HRESULT vkd3d_create_compute_pipeline(struct d3d12_pipeline_state *state, feedback_info.pipelineStageCreationFeedbackCount = 0; vr = VK_CALL(vkCreateComputePipelines(device->vk_device, - vk_cache, 1, &pipeline_info, NULL, vk_pipeline)); + vk_cache, 1, &pipeline_info, NULL, &state->compute.vk_pipeline)); TRACE("Called vkCreateComputePipelines.\n"); VK_CALL(vkDestroyShaderModule(device->vk_device, pipeline_info.stage.module, NULL)); @@ -2401,12 +2404,7 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st vkd3d_load_spirv_from_cached_state(device, cached_pso, VK_SHADER_STAGE_COMPUTE_BIT, &state->compute.code); - hr = vkd3d_create_compute_pipeline(state, device, - &desc->cs, - state->root_signature->compute.vk_pipeline_layout, - state->vk_pso_cache, - &state->compute.vk_pipeline, - &state->compute.code); + hr = vkd3d_create_compute_pipeline(state, device, &desc->cs); if (FAILED(hr)) { From ef7924ce86c1c4fb7eaaa5ae69031324206cb9b8 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 21 Mar 2022 11:29:22 +0100 Subject: [PATCH 08/19] vkd3d: Hoist out pipeline cache creation. Not super useful to create a local pipeline cache if we're not going to compile early, but it's super rare, and cleans up the code either way. Signed-off-by: Hans-Kristian Arntzen --- libs/vkd3d/state.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index 36724316..565c15a8 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -3629,15 +3629,6 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s if (can_compile_pipeline_early) { - if (!(vkd3d_config_flags & VKD3D_CONFIG_FLAG_GLOBAL_PIPELINE_CACHE)) - { - if ((hr = vkd3d_create_pipeline_cache_from_d3d12_desc(device, cached_pso, &state->vk_pso_cache)) < 0) - { - ERR("Failed to create pipeline cache, hr %d.\n", hr); - goto fail; - } - } - if (!(graphics->pipeline = d3d12_pipeline_state_create_pipeline_variant(state, NULL, graphics->dsv_format, state->vk_pso_cache, &graphics->dynamic_state_flags))) goto fail; @@ -3780,19 +3771,28 @@ HRESULT d3d12_pipeline_state_create(struct d3d12_device *device, VkPipelineBindP object->refcount = 1; object->internal_refcount = 1; - switch (bind_point) + hr = S_OK; + + if (!(vkd3d_config_flags & VKD3D_CONFIG_FLAG_GLOBAL_PIPELINE_CACHE)) + if ((hr = vkd3d_create_pipeline_cache_from_d3d12_desc(device, &desc->cached_pso, &object->vk_pso_cache)) < 0) + ERR("Failed to create pipeline cache, hr %d.\n", hr); + + if (SUCCEEDED(hr)) { - case VK_PIPELINE_BIND_POINT_COMPUTE: - hr = d3d12_pipeline_state_init_compute(object, device, desc, desc_cached_pso); - break; + switch (bind_point) + { + case VK_PIPELINE_BIND_POINT_COMPUTE: + hr = d3d12_pipeline_state_init_compute(object, device, desc, desc_cached_pso); + break; - case VK_PIPELINE_BIND_POINT_GRAPHICS: - hr = d3d12_pipeline_state_init_graphics(object, device, desc, desc_cached_pso); - break; + case VK_PIPELINE_BIND_POINT_GRAPHICS: + hr = d3d12_pipeline_state_init_graphics(object, device, desc, desc_cached_pso); + break; - default: - ERR("Invalid pipeline type %u.", bind_point); - hr = E_INVALIDARG; + default: + ERR("Invalid pipeline type %u.", bind_point); + hr = E_INVALIDARG; + } } if (FAILED(hr)) From f16875d195889680bf7cd625c45686ddf2da8b62 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 21 Mar 2022 11:31:51 +0100 Subject: [PATCH 09/19] vkd3d: Add FIXME for dubious use of dsv_plane_optimal_mask. Signed-off-by: Hans-Kristian Arntzen --- libs/vkd3d/state.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index 565c15a8..cd47ca84 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -4125,6 +4125,8 @@ VkPipeline d3d12_pipeline_state_create_pipeline_variant(struct d3d12_pipeline_st if (d3d12_graphics_pipeline_state_has_unknown_dsv_format_with_test(graphics) && dsv_format) TRACE("Compiling %p with fallback DSV format %#x.\n", state, dsv_format->vk_format); + /* FIXME: This gets modified on late recompilation, could there be thread safety issues here? + * For GENERAL depth-stencil, this mask should not matter at all, but there might be edge cases for tracked DSV. */ graphics->dsv_plane_optimal_mask = d3d12_graphics_pipeline_state_get_plane_optimal_mask(graphics, dsv_format); if (key) From 4384b708d79b9d457a2bbc6f765c37687328e304 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 21 Mar 2022 11:52:01 +0100 Subject: [PATCH 10/19] vkd3d: Prepare for system where we can retain DXBC blobs in pipeline. Simplifies the code somewhat. Only iterate over the shader_stages LUT once. Adds concept of duped DXBC blobs as well. Signed-off-by: Hans-Kristian Arntzen --- libs/vkd3d/state.c | 75 ++++++++++++++++++++------------------ libs/vkd3d/vkd3d_private.h | 4 ++ 2 files changed, 43 insertions(+), 36 deletions(-) diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index cd47ca84..9168b351 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -1900,6 +1900,18 @@ static void vkd3d_shader_transform_feedback_info_free(struct vkd3d_shader_transf vkd3d_free(xfb_info); } +static void d3d12_pipeline_state_free_cached_desc(struct d3d12_graphics_pipeline_state_cached_desc *cached_desc) +{ + unsigned int i; + vkd3d_shader_transform_feedback_info_free(cached_desc->xfb_info); + + while (cached_desc->bytecode_duped_mask) + { + i = vkd3d_bitmask_iter32(&cached_desc->bytecode_duped_mask); + vkd3d_free((void*)cached_desc->bytecode[i].pShaderBytecode); + } +} + static struct vkd3d_shader_transform_feedback_info *vkd3d_shader_transform_feedback_info_dup( const D3D12_STREAM_OUTPUT_DESC *so_desc) { @@ -1965,7 +1977,7 @@ void d3d12_pipeline_state_dec_ref(struct d3d12_pipeline_state *state) d3d12_root_signature_dec_ref(state->root_signature); if (state->vk_bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) - vkd3d_shader_transform_feedback_info_free(state->graphics.cached_desc.xfb_info); + d3d12_pipeline_state_free_cached_desc(&state->graphics.cached_desc); vkd3d_free(state); } } @@ -3081,9 +3093,9 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s struct vkd3d_shader_signature input_signature; VkSampleCountFlagBits sample_count; const struct vkd3d_format *format; - unsigned int i, j, stage_count; unsigned int instance_divisor; VkVertexInputRate input_rate; + unsigned int i, j; size_t rt_count; uint32_t mask; HRESULT hr; @@ -3094,7 +3106,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s enum VkShaderStageFlagBits stage; ptrdiff_t offset; } - shader_stages[] = + shader_stages_lut[] = { {VK_SHADER_STAGE_VERTEX_BIT, offsetof(struct d3d12_pipeline_state_desc, vs)}, {VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT, offsetof(struct d3d12_pipeline_state_desc, hs)}, @@ -3311,15 +3323,16 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s graphics->patch_vertex_count = 0; - for (i = 0; i < ARRAY_SIZE(shader_stages); ++i) + /* Parse interface data from DXBC blobs. */ + for (i = 0; i < ARRAY_SIZE(shader_stages_lut); ++i) { - const D3D12_SHADER_BYTECODE *b = (const void *)((uintptr_t)desc + shader_stages[i].offset); + const D3D12_SHADER_BYTECODE *b = (const void *)((uintptr_t)desc + shader_stages_lut[i].offset); const struct vkd3d_shader_code dxbc = {b->pShaderBytecode, b->BytecodeLength}; if (!b->pShaderBytecode) continue; - switch (shader_stages[i].stage) + switch (shader_stages_lut[i].stage) { case VK_SHADER_STAGE_VERTEX_BIT: if ((ret = vkd3d_shader_parse_input_signature(&dxbc, &input_signature)) < 0) @@ -3358,6 +3371,11 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s goto fail; } + /* Not owned yet. If we return from pipeline creation without having concrete SPIR-V, + * we'll have to dupe the bytecode and potentially compile to SPIR-V late. */ + graphics->cached_desc.bytecode[graphics->stage_count] = *b; + graphics->cached_desc.bytecode_stages[graphics->stage_count] = shader_stages_lut[i].stage; + ++graphics->stage_count; } @@ -3365,59 +3383,44 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s * We cannot partially fall back since we cannot handle any situation where we need inter-stage code-gen fixups. * In this situation, just generate full SPIR-V from scratch. * This really shouldn't happen unless we have corrupt cache entries. */ - stage_count = 0; - for (i = 0; i < ARRAY_SIZE(shader_stages); i++) + for (i = 0; i < graphics->stage_count; i++) { - const D3D12_SHADER_BYTECODE *b = (const void *)((uintptr_t)desc + shader_stages[i].offset); - if (!b->pShaderBytecode) - continue; - if (FAILED(vkd3d_load_spirv_from_cached_state(device, cached_pso, - shader_stages[i].stage, &graphics->code[stage_count]))) + graphics->cached_desc.bytecode_stages[i], &graphics->code[i]))) { - for (j = 0; j < stage_count; j++) + for (j = 0; j < i; j++) { if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_LOG) - INFO("Discarding cached SPIR-V for stage #%x.\n", shader_stages[i].stage); + INFO("Discarding cached SPIR-V for stage #%x.\n", graphics->cached_desc.bytecode_stages[i]); vkd3d_shader_free_shader_code(&graphics->code[j]); memset(&graphics->code[j], 0, sizeof(graphics->code[j])); } break; } - - ++stage_count; } /* Now create the actual shader modules. If we managed to load SPIR-V from cache, use that directly. * Make sure we don't reset graphics->stage_count since that is a potential memory leak if * we fail to create shader module for whatever reason. */ - stage_count = 0; - for (i = 0; i < ARRAY_SIZE(shader_stages); i++) + for (i = 0; i < graphics->stage_count; i++) { - const D3D12_SHADER_BYTECODE *b = (const void *)((uintptr_t)desc + shader_stages[i].offset); - - if (!b->pShaderBytecode) - continue; - if (FAILED(hr = vkd3d_create_shader_stage(state, device, - &graphics->stages[stage_count], - shader_stages[i].stage, NULL, b, - &graphics->code[stage_count]))) + &graphics->stages[i], + graphics->cached_desc.bytecode_stages[i], NULL, + &graphics->cached_desc.bytecode[i], &graphics->code[i]))) goto fail; - if (shader_stages[i].stage == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) - graphics->patch_vertex_count = graphics->code[stage_count].meta.patch_vertex_count; + if (graphics->cached_desc.bytecode_stages[i] == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) + graphics->patch_vertex_count = graphics->code[i].meta.patch_vertex_count; - if ((graphics->code[stage_count].meta.flags & VKD3D_SHADER_META_FLAG_REPLACED) && + if ((graphics->code[i].meta.flags & VKD3D_SHADER_META_FLAG_REPLACED) && device->debug_ring.active) { vkd3d_shader_debug_ring_init_spec_constant(device, - &graphics->spec_info[stage_count], - graphics->code[stage_count].meta.hash); - graphics->stages[stage_count].pSpecializationInfo = &graphics->spec_info[stage_count].spec_info; + &graphics->spec_info[i], + graphics->code[i].meta.hash); + graphics->stages[i].pSpecializationInfo = &graphics->spec_info[i].spec_info; } - - ++stage_count; } graphics->attribute_count = desc->input_layout.NumElements; @@ -3802,7 +3805,7 @@ HRESULT d3d12_pipeline_state_create(struct d3d12_device *device, VkPipelineBindP d3d12_pipeline_state_free_spirv_code(object); d3d12_pipeline_state_destroy_shader_modules(object, device); if (object->vk_bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) - vkd3d_shader_transform_feedback_info_free(object->graphics.cached_desc.xfb_info); + d3d12_pipeline_state_free_cached_desc(&object->graphics.cached_desc); VK_CALL(vkDestroyPipelineCache(device->vk_device, object->vk_pso_cache, NULL)); vkd3d_free(object); diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 1b556450..2dd9866e 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -1479,6 +1479,10 @@ struct d3d12_graphics_pipeline_state_cached_desc bool is_dual_source_blending; VkShaderStageFlagBits xfb_stage; struct vkd3d_shader_transform_feedback_info *xfb_info; + + D3D12_SHADER_BYTECODE bytecode[VKD3D_MAX_SHADER_STAGES]; + VkShaderStageFlagBits bytecode_stages[VKD3D_MAX_SHADER_STAGES]; + uint32_t bytecode_duped_mask; }; struct d3d12_graphics_pipeline_state From 7f758e5904552eddfbbd764741ef1b245cb94cdb Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 21 Mar 2022 12:06:47 +0100 Subject: [PATCH 11/19] vkd3d: Refactor stages of obtaining SPIR-V modules. - Try to load SPIR-V from cache - Fallback compile to SPIR-V if necessary - Parse PSO metadata obtained from either compilation or cache lookup Also moves SPIR-V compilation to end of PSO init. Prepares for refactor where we completely decouple PSO creation info setup and SPIR-V compilation. Signed-off-by: Hans-Kristian Arntzen --- libs/vkd3d/state.c | 128 +++++++++++++++++++++++++++++---------------- 1 file changed, 83 insertions(+), 45 deletions(-) diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index 9168b351..ff5b2d40 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -3077,6 +3077,80 @@ static HRESULT d3d12_pipeline_state_validate_blend_state(struct d3d12_pipeline_s return S_OK; } +static void d3d12_pipeline_state_graphics_load_spirv_from_cached_state( + struct d3d12_pipeline_state *state, struct d3d12_device *device, + const struct d3d12_pipeline_state_desc *desc, + const struct d3d12_cached_pipeline_state *cached_pso) +{ + struct d3d12_graphics_pipeline_state *graphics = &state->graphics; + unsigned int i, j; + + /* We only accept SPIR-V from cache if we can successfully load all shaders. + * We cannot partially fall back since we cannot handle any situation where we need inter-stage code-gen fixups. + * In this situation, just generate full SPIR-V from scratch. + * This really shouldn't happen unless we have corrupt cache entries. */ + for (i = 0; i < graphics->stage_count; i++) + { + if (FAILED(vkd3d_load_spirv_from_cached_state(device, cached_pso, + graphics->cached_desc.bytecode_stages[i], &graphics->code[i]))) + { + for (j = 0; j < i; j++) + { + if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_LOG) + INFO("Discarding cached SPIR-V for stage #%x.\n", graphics->cached_desc.bytecode_stages[i]); + vkd3d_shader_free_shader_code(&graphics->code[j]); + memset(&graphics->code[j], 0, sizeof(graphics->code[j])); + } + break; + } + } +} + +static HRESULT d3d12_pipeline_state_graphics_create_shader_stages( + struct d3d12_pipeline_state *state, struct d3d12_device *device, + const struct d3d12_pipeline_state_desc *desc) +{ + struct d3d12_graphics_pipeline_state *graphics = &state->graphics; + unsigned int i; + HRESULT hr; + + /* Now create the actual shader modules. If we managed to load SPIR-V from cache, use that directly. + * Make sure we don't reset graphics->stage_count since that is a potential memory leak if + * we fail to create shader module for whatever reason. */ + for (i = 0; i < graphics->stage_count; i++) + { + if (FAILED(hr = vkd3d_create_shader_stage(state, device, + &graphics->stages[i], + graphics->cached_desc.bytecode_stages[i], NULL, + &graphics->cached_desc.bytecode[i], &graphics->code[i]))) + return hr; + } + + return S_OK; +} + +static void d3d12_pipeline_state_graphics_handle_meta(struct d3d12_pipeline_state *state, + struct d3d12_device *device) +{ + struct d3d12_graphics_pipeline_state *graphics = &state->graphics; + unsigned int i; + + for (i = 0; i < graphics->stage_count; i++) + { + if (graphics->cached_desc.bytecode_stages[i] == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) + graphics->patch_vertex_count = graphics->code[i].meta.patch_vertex_count; + + if ((graphics->code[i].meta.flags & VKD3D_SHADER_META_FLAG_REPLACED) && + device->debug_ring.active) + { + vkd3d_shader_debug_ring_init_spec_constant(device, + &graphics->spec_info[i], + graphics->code[i].meta.hash); + graphics->stages[i].pSpecializationInfo = &graphics->spec_info[i].spec_info; + } + } +} + static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *state, struct d3d12_device *device, const struct d3d12_pipeline_state_desc *desc, const struct d3d12_cached_pipeline_state *cached_pso) @@ -3379,50 +3453,6 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s ++graphics->stage_count; } - /* We only accept SPIR-V from cache if we can successfully load all shaders. - * We cannot partially fall back since we cannot handle any situation where we need inter-stage code-gen fixups. - * In this situation, just generate full SPIR-V from scratch. - * This really shouldn't happen unless we have corrupt cache entries. */ - for (i = 0; i < graphics->stage_count; i++) - { - if (FAILED(vkd3d_load_spirv_from_cached_state(device, cached_pso, - graphics->cached_desc.bytecode_stages[i], &graphics->code[i]))) - { - for (j = 0; j < i; j++) - { - if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_LOG) - INFO("Discarding cached SPIR-V for stage #%x.\n", graphics->cached_desc.bytecode_stages[i]); - vkd3d_shader_free_shader_code(&graphics->code[j]); - memset(&graphics->code[j], 0, sizeof(graphics->code[j])); - } - break; - } - } - - /* Now create the actual shader modules. If we managed to load SPIR-V from cache, use that directly. - * Make sure we don't reset graphics->stage_count since that is a potential memory leak if - * we fail to create shader module for whatever reason. */ - for (i = 0; i < graphics->stage_count; i++) - { - if (FAILED(hr = vkd3d_create_shader_stage(state, device, - &graphics->stages[i], - graphics->cached_desc.bytecode_stages[i], NULL, - &graphics->cached_desc.bytecode[i], &graphics->code[i]))) - goto fail; - - if (graphics->cached_desc.bytecode_stages[i] == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) - graphics->patch_vertex_count = graphics->code[i].meta.patch_vertex_count; - - if ((graphics->code[i].meta.flags & VKD3D_SHADER_META_FLAG_REPLACED) && - device->debug_ring.active) - { - vkd3d_shader_debug_ring_init_spec_constant(device, - &graphics->spec_info[i], - graphics->code[i].meta.hash); - graphics->stages[i].pSpecializationInfo = &graphics->spec_info[i].spec_info; - } - } - graphics->attribute_count = desc->input_layout.NumElements; if (graphics->attribute_count > ARRAY_SIZE(graphics->attributes)) { @@ -3620,6 +3650,14 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s goto fail; } + d3d12_pipeline_state_graphics_load_spirv_from_cached_state(state, device, desc, cached_pso); + if (FAILED(hr = d3d12_pipeline_state_graphics_create_shader_stages(state, device, desc))) + goto fail; + + /* At this point, we will have valid meta structures set up. + * Deduce further PSO information from these structs. */ + d3d12_pipeline_state_graphics_handle_meta(state, device); + /* If we don't know vertex count for tessellation shaders, we need to defer compilation, but this should * be exceedingly rare. */ can_compile_pipeline_early = @@ -3777,7 +3815,7 @@ HRESULT d3d12_pipeline_state_create(struct d3d12_device *device, VkPipelineBindP hr = S_OK; if (!(vkd3d_config_flags & VKD3D_CONFIG_FLAG_GLOBAL_PIPELINE_CACHE)) - if ((hr = vkd3d_create_pipeline_cache_from_d3d12_desc(device, &desc->cached_pso, &object->vk_pso_cache)) < 0) + if ((hr = vkd3d_create_pipeline_cache_from_d3d12_desc(device, desc_cached_pso, &object->vk_pso_cache)) < 0) ERR("Failed to create pipeline cache, hr %d.\n", hr); if (SUCCEEDED(hr)) From 131ff90ca3b04706b81944cf0c541b34246968bd Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 21 Mar 2022 12:26:53 +0100 Subject: [PATCH 12/19] vkd3d: Separate out the different stages of graphics PSO creation. Signed-off-by: Hans-Kristian Arntzen --- libs/vkd3d/state.c | 68 +++++++++++++++++++++++++++++++++------------- 1 file changed, 49 insertions(+), 19 deletions(-) diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index ff5b2d40..50fa4886 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -3151,9 +3151,8 @@ static void d3d12_pipeline_state_graphics_handle_meta(struct d3d12_pipeline_stat } } -static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *state, - struct d3d12_device *device, const struct d3d12_pipeline_state_desc *desc, - const struct d3d12_cached_pipeline_state *cached_pso) +static HRESULT d3d12_pipeline_state_init_graphics_create_info(struct d3d12_pipeline_state *state, + struct d3d12_device *device, const struct d3d12_pipeline_state_desc *desc) { const VkPhysicalDeviceFeatures *features = &device->device_info.features2.features; struct d3d12_graphics_pipeline_state *graphics = &state->graphics; @@ -3162,13 +3161,13 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s const struct vkd3d_vulkan_info *vk_info = &device->vk_info; uint32_t instance_divisors[D3D12_VS_INPUT_REGISTER_COUNT]; uint32_t aligned_offsets[D3D12_VS_INPUT_REGISTER_COUNT]; - bool have_attachment, can_compile_pipeline_early; struct vkd3d_shader_signature output_signature; struct vkd3d_shader_signature input_signature; VkSampleCountFlagBits sample_count; const struct vkd3d_format *format; unsigned int instance_divisor; VkVertexInputRate input_rate; + bool have_attachment; unsigned int i, j; size_t rt_count; uint32_t mask; @@ -3190,6 +3189,8 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s }; state->vk_bind_point = VK_PIPELINE_BIND_POINT_GRAPHICS; + /* Defer taking ref-count until completion. */ + state->device = device; graphics->stage_count = 0; graphics->primitive_topology_type = desc->primitive_topology_type; @@ -3650,13 +3651,37 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s goto fail; } + return S_OK; + +fail: + vkd3d_shader_free_shader_signature(&input_signature); + vkd3d_shader_free_shader_signature(&output_signature); + + return hr; +} + +static HRESULT d3d12_pipeline_state_init_graphics_spirv(struct d3d12_pipeline_state *state, + const struct d3d12_pipeline_state_desc *desc, + const struct d3d12_cached_pipeline_state *cached_pso) +{ + struct d3d12_device *device = state->device; + HRESULT hr; + d3d12_pipeline_state_graphics_load_spirv_from_cached_state(state, device, desc, cached_pso); if (FAILED(hr = d3d12_pipeline_state_graphics_create_shader_stages(state, device, desc))) - goto fail; + return hr; /* At this point, we will have valid meta structures set up. * Deduce further PSO information from these structs. */ d3d12_pipeline_state_graphics_handle_meta(state, device); + return S_OK; +} + +static HRESULT d3d12_pipeline_state_init_static_pipeline(struct d3d12_pipeline_state *state, + const struct d3d12_pipeline_state_desc *desc) +{ + struct d3d12_graphics_pipeline_state *graphics = &state->graphics; + bool can_compile_pipeline_early; /* If we don't know vertex count for tessellation shaders, we need to defer compilation, but this should * be exceedingly rare. */ @@ -3666,33 +3691,31 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s graphics->pipeline_layout = state->root_signature->graphics.vk_pipeline_layout; graphics->pipeline = VK_NULL_HANDLE; - state->device = device; if (can_compile_pipeline_early) { if (!(graphics->pipeline = d3d12_pipeline_state_create_pipeline_variant(state, NULL, graphics->dsv_format, state->vk_pso_cache, &graphics->dynamic_state_flags))) - goto fail; + return E_OUTOFMEMORY; } else { graphics->dsv_plane_optimal_mask = d3d12_graphics_pipeline_state_get_plane_optimal_mask(graphics, NULL); } - list_init(&graphics->compiled_fallback_pipelines); - - if (FAILED(hr = vkd3d_private_store_init(&state->private_store))) - goto fail; - - d3d12_device_add_ref(state->device); - return S_OK; +} -fail: - vkd3d_shader_free_shader_signature(&input_signature); - vkd3d_shader_free_shader_signature(&output_signature); +static HRESULT d3d12_pipeline_state_finish_graphics(struct d3d12_pipeline_state *state) +{ + struct d3d12_graphics_pipeline_state *graphics = &state->graphics; + HRESULT hr; - return hr; + list_init(&graphics->compiled_fallback_pipelines); + if (FAILED(hr = vkd3d_private_store_init(&state->private_store))) + return hr; + d3d12_device_add_ref(state->device); + return S_OK; } bool d3d12_pipeline_state_has_replaced_shaders(struct d3d12_pipeline_state *state) @@ -3827,7 +3850,14 @@ HRESULT d3d12_pipeline_state_create(struct d3d12_device *device, VkPipelineBindP break; case VK_PIPELINE_BIND_POINT_GRAPHICS: - hr = d3d12_pipeline_state_init_graphics(object, device, desc, desc_cached_pso); + /* Creating a graphics PSO is more involved ... */ + hr = d3d12_pipeline_state_init_graphics_create_info(object, device, desc); + if (SUCCEEDED(hr)) + hr = d3d12_pipeline_state_init_graphics_spirv(object, desc, desc_cached_pso); + if (SUCCEEDED(hr)) + hr = d3d12_pipeline_state_init_static_pipeline(object, desc); + if (SUCCEEDED(hr)) + hr = d3d12_pipeline_state_finish_graphics(object); break; default: From 0123e5fe5c256bda7b9df258701c59d54c9fe536 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 21 Mar 2022 12:36:49 +0100 Subject: [PATCH 13/19] vkd3d: Stub out DXBC code duplication for later. When we have the ability to load PSO from identifiers only, we need to retain DXBC blobs for later. Signed-off-by: Hans-Kristian Arntzen --- libs/vkd3d/state.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index 50fa4886..b57360b0 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -3709,8 +3709,29 @@ static HRESULT d3d12_pipeline_state_init_static_pipeline(struct d3d12_pipeline_s static HRESULT d3d12_pipeline_state_finish_graphics(struct d3d12_pipeline_state *state) { struct d3d12_graphics_pipeline_state *graphics = &state->graphics; + unsigned int i; + void *new_code; HRESULT hr; + /* If we got here successfully without SPIR-V code, + * it means we'll need to defer compilation from DXBC -> SPIR-V. + * Dupe the DXBC code. + * TODO: This codepath is not relevant yet. */ + for (i = 0; i < graphics->stage_count; i++) + { + if (graphics->code[i].size || graphics->stages[i].module != VK_NULL_HANDLE || + !graphics->cached_desc.bytecode[i].BytecodeLength) + continue; + + new_code = vkd3d_malloc(graphics->cached_desc.bytecode[i].BytecodeLength); + if (!new_code) + return E_OUTOFMEMORY; + memcpy(new_code, graphics->cached_desc.bytecode[i].pShaderBytecode, + graphics->cached_desc.bytecode[i].BytecodeLength); + graphics->cached_desc.bytecode[i].pShaderBytecode = new_code; + graphics->cached_desc.bytecode_duped_mask |= 1u << i; + } + list_init(&graphics->compiled_fallback_pipelines); if (FAILED(hr = vkd3d_private_store_init(&state->private_store))) return hr; From f510e92f6e1df483e90e172258da4a55d94c42a5 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 21 Mar 2022 13:00:37 +0100 Subject: [PATCH 14/19] vkd3d: Separate compilation to SPIR-V and creation of VkShaderModule. Signed-off-by: Hans-Kristian Arntzen --- libs/vkd3d/state.c | 125 +++++++++++++++++++++++++-------------------- 1 file changed, 71 insertions(+), 54 deletions(-) diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index b57360b0..0083bb1c 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -1792,7 +1792,7 @@ static ULONG STDMETHODCALLTYPE d3d12_pipeline_state_AddRef(ID3D12PipelineState * } static HRESULT d3d12_pipeline_state_create_shader_module(struct d3d12_device *device, - VkPipelineShaderStageCreateInfo *stage_desc, const struct vkd3d_shader_code *code) + VkShaderModule *vk_module, const struct vkd3d_shader_code *code) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; VkShaderModuleCreateInfo shader_desc; @@ -1800,7 +1800,7 @@ static HRESULT d3d12_pipeline_state_create_shader_module(struct d3d12_device *de VkResult vr; /* If we kept the module around, no need to create it again. */ - if (stage_desc->module != VK_NULL_HANDLE) + if (*vk_module != VK_NULL_HANDLE) return S_OK; shader_desc.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; @@ -1809,7 +1809,7 @@ static HRESULT d3d12_pipeline_state_create_shader_module(struct d3d12_device *de shader_desc.codeSize = code->size; shader_desc.pCode = code->code; - vr = VK_CALL(vkCreateShaderModule(device->vk_device, &shader_desc, NULL, &stage_desc->module)); + vr = VK_CALL(vkCreateShaderModule(device->vk_device, &shader_desc, NULL, vk_module)); if (vr < 0) { WARN("Failed to create Vulkan shader module, vr %d.\n", vr); @@ -1818,7 +1818,7 @@ static HRESULT d3d12_pipeline_state_create_shader_module(struct d3d12_device *de /* Helpful for tooling like RenderDoc. */ sprintf(hash_str, "%016"PRIx64, code->meta.hash); - vkd3d_set_vk_object_name(device, (uint64_t)stage_desc->module, VK_OBJECT_TYPE_SHADER_MODULE, hash_str); + vkd3d_set_vk_object_name(device, (uint64_t)*vk_module, VK_OBJECT_TYPE_SHADER_MODULE, hash_str); return S_OK; } @@ -2180,56 +2180,18 @@ static void d3d12_pipeline_state_init_compile_arguments(struct d3d12_pipeline_st } } -static HRESULT vkd3d_create_shader_stage(struct d3d12_pipeline_state *state, struct d3d12_device *device, +static HRESULT vkd3d_setup_shader_stage(struct d3d12_pipeline_state *state, struct d3d12_device *device, VkPipelineShaderStageCreateInfo *stage_desc, VkShaderStageFlagBits stage, VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *required_subgroup_size_info, - const D3D12_SHADER_BYTECODE *code, struct vkd3d_shader_code *spirv_code) + const struct vkd3d_shader_code *spirv_code) { - struct vkd3d_shader_code dxbc = {code->pShaderBytecode, code->BytecodeLength}; - struct vkd3d_shader_interface_info shader_interface; - struct vkd3d_shader_compile_arguments compile_args; - vkd3d_shader_hash_t recovered_hash = 0; - vkd3d_shader_hash_t compiled_hash = 0; - int ret; - stage_desc->sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; stage_desc->pNext = NULL; stage_desc->flags = 0; stage_desc->stage = stage; stage_desc->pName = "main"; stage_desc->pSpecializationInfo = NULL; - - if (spirv_code->code && (vkd3d_config_flags & VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_SANITIZE_SPIRV)) - { - recovered_hash = vkd3d_shader_hash(spirv_code); - vkd3d_shader_free_shader_code(spirv_code); - memset(spirv_code, 0, sizeof(*spirv_code)); - } - - if (!spirv_code->code) - { - TRACE("Calling vkd3d_shader_compile_dxbc.\n"); - - d3d12_pipeline_state_init_shader_interface(state, device, stage, &shader_interface); - d3d12_pipeline_state_init_compile_arguments(state, device, stage, &compile_args); - - if ((ret = vkd3d_shader_compile_dxbc(&dxbc, spirv_code, 0, &shader_interface, &compile_args)) < 0) - { - WARN("Failed to compile shader, vkd3d result %d.\n", ret); - return hresult_from_vkd3d_result(ret); - } - TRACE("Called vkd3d_shader_compile_dxbc.\n"); - } - - /* Debug compare SPIR-V we got from cache, and SPIR-V we got from compilation. */ - if (recovered_hash) - { - compiled_hash = vkd3d_shader_hash(spirv_code); - if (compiled_hash == recovered_hash) - INFO("SPIR-V match for cache reference OK!\n"); - else - INFO("SPIR-V mismatch for cache reference!\n"); - } + stage_desc->module = VK_NULL_HANDLE; if (!d3d12_device_validate_shader_meta(device, &spirv_code->meta)) return E_INVALIDARG; @@ -2277,8 +2239,52 @@ static HRESULT vkd3d_create_shader_stage(struct d3d12_pipeline_state *state, str } } - stage_desc->module = VK_NULL_HANDLE; - return d3d12_pipeline_state_create_shader_module(device, stage_desc, spirv_code); + return d3d12_pipeline_state_create_shader_module(device, &stage_desc->module, spirv_code); +} + +static HRESULT vkd3d_compile_shader_stage(struct d3d12_pipeline_state *state, struct d3d12_device *device, + VkShaderStageFlagBits stage, const D3D12_SHADER_BYTECODE *code, struct vkd3d_shader_code *spirv_code) +{ + struct vkd3d_shader_code dxbc = {code->pShaderBytecode, code->BytecodeLength}; + struct vkd3d_shader_interface_info shader_interface; + struct vkd3d_shader_compile_arguments compile_args; + vkd3d_shader_hash_t recovered_hash = 0; + vkd3d_shader_hash_t compiled_hash = 0; + int ret; + + if (spirv_code->code && (vkd3d_config_flags & VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_SANITIZE_SPIRV)) + { + recovered_hash = vkd3d_shader_hash(spirv_code); + vkd3d_shader_free_shader_code(spirv_code); + memset(spirv_code, 0, sizeof(*spirv_code)); + } + + if (!spirv_code->code) + { + TRACE("Calling vkd3d_shader_compile_dxbc.\n"); + + d3d12_pipeline_state_init_shader_interface(state, device, stage, &shader_interface); + d3d12_pipeline_state_init_compile_arguments(state, device, stage, &compile_args); + + if ((ret = vkd3d_shader_compile_dxbc(&dxbc, spirv_code, 0, &shader_interface, &compile_args)) < 0) + { + WARN("Failed to compile shader, vkd3d result %d.\n", ret); + return hresult_from_vkd3d_result(ret); + } + TRACE("Called vkd3d_shader_compile_dxbc.\n"); + } + + /* Debug compare SPIR-V we got from cache, and SPIR-V we got from compilation. */ + if (recovered_hash) + { + compiled_hash = vkd3d_shader_hash(spirv_code); + if (compiled_hash == recovered_hash) + INFO("SPIR-V match for cache reference OK!\n"); + else + INFO("SPIR-V mismatch for cache reference!\n"); + } + + return S_OK; } static void vkd3d_report_pipeline_creation_feedback_results(const VkPipelineCreationFeedbackCreateInfoEXT *feedback) @@ -2344,11 +2350,16 @@ static HRESULT vkd3d_create_compute_pipeline(struct d3d12_pipeline_state *state, pipeline_info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; pipeline_info.pNext = NULL; pipeline_info.flags = 0; - if (FAILED(hr = vkd3d_create_shader_stage(state, device, - &pipeline_info.stage, - VK_SHADER_STAGE_COMPUTE_BIT, &required_subgroup_size_info, - code, spirv_code))) + + if (FAILED(hr = vkd3d_compile_shader_stage(state, device, + VK_SHADER_STAGE_COMPUTE_BIT, code, spirv_code))) return hr; + + if (FAILED(hr = vkd3d_setup_shader_stage(state, device, + &pipeline_info.stage, VK_SHADER_STAGE_COMPUTE_BIT, &required_subgroup_size_info, + spirv_code))) + return hr; + pipeline_info.layout = state->root_signature->compute.vk_pipeline_layout; pipeline_info.basePipelineHandle = VK_NULL_HANDLE; pipeline_info.basePipelineIndex = -1; @@ -3119,10 +3130,15 @@ static HRESULT d3d12_pipeline_state_graphics_create_shader_stages( * we fail to create shader module for whatever reason. */ for (i = 0; i < graphics->stage_count; i++) { - if (FAILED(hr = vkd3d_create_shader_stage(state, device, + if (FAILED(hr = vkd3d_compile_shader_stage(state, device, + graphics->cached_desc.bytecode_stages[i], + &graphics->cached_desc.bytecode[i], &graphics->code[i]))) + return hr; + + if (FAILED(hr = vkd3d_setup_shader_stage(state, device, &graphics->stages[i], graphics->cached_desc.bytecode_stages[i], NULL, - &graphics->cached_desc.bytecode[i], &graphics->code[i]))) + &graphics->code[i]))) return hr; } @@ -4231,7 +4247,8 @@ VkPipeline d3d12_pipeline_state_create_pipeline_variant(struct d3d12_pipeline_st { if (stages[i].module == VK_NULL_HANDLE && graphics->code[i].code) { - if (FAILED(hr = d3d12_pipeline_state_create_shader_module(device, &stages[i], &graphics->code[i]))) + if (FAILED(hr = d3d12_pipeline_state_create_shader_module(device, + &stages[i].module, &graphics->code[i]))) { /* This is kind of fatal and should only happen for out-of-memory. */ ERR("Unexpected failure (hr %x) in creating fallback SPIR-V module.\n", hr); From b0a706cb4ea416f6b6fc523e3c697160244351ee Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 21 Mar 2022 13:21:47 +0100 Subject: [PATCH 15/19] cache: Explicitly do not serialize SPIR-V code for cached PSOs. With upcoming refactor, we might have to compile code on the fly. To avoid any race conditions on fallback compile storing code[i] <-> StorePipeline reading code[i], explicitly mark that code[] should be ignored. Signed-off-by: Hans-Kristian Arntzen --- libs/vkd3d/cache.c | 27 ++++++++++++++++----------- libs/vkd3d/state.c | 4 ++++ libs/vkd3d/vkd3d_private.h | 1 + 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/libs/vkd3d/cache.c b/libs/vkd3d/cache.c index eef6ebd8..6bb4e046 100644 --- a/libs/vkd3d/cache.c +++ b/libs/vkd3d/cache.c @@ -903,18 +903,21 @@ static VkResult vkd3d_serialize_pipeline_state_inline(const struct d3d12_pipelin chunk = finish_and_iterate_blob_chunk(chunk); } - if (d3d12_pipeline_state_is_graphics(state)) + if (!state->pso_is_loaded_from_cached_blob) { - for (i = 0; i < state->graphics.stage_count; i++) + if (d3d12_pipeline_state_is_graphics(state)) { - vkd3d_shader_code_serialize_inline(&state->graphics.code[i], state->graphics.stages[i].stage, - varint_size[i], &chunk); + for (i = 0; i < state->graphics.stage_count; i++) + { + vkd3d_shader_code_serialize_inline(&state->graphics.code[i], state->graphics.stages[i].stage, + varint_size[i], &chunk); + } + } + else if (d3d12_pipeline_state_is_compute(state)) + { + vkd3d_shader_code_serialize_inline(&state->compute.code, VK_SHADER_STAGE_COMPUTE_BIT, + varint_size[0], &chunk); } - } - else if (d3d12_pipeline_state_is_compute(state)) - { - vkd3d_shader_code_serialize_inline(&state->compute.code, VK_SHADER_STAGE_COMPUTE_BIT, - varint_size[0], &chunk); } return VK_SUCCESS; @@ -994,7 +997,8 @@ static VkResult vkd3d_serialize_pipeline_state_referenced(struct d3d12_pipeline_ chunk = finish_and_iterate_blob_chunk(chunk); } - if (pipeline_library->flags & VKD3D_PIPELINE_LIBRARY_FLAG_SAVE_FULL_SPIRV) + if ((pipeline_library->flags & VKD3D_PIPELINE_LIBRARY_FLAG_SAVE_FULL_SPIRV) && + !state->pso_is_loaded_from_cached_blob) { if (d3d12_pipeline_state_is_graphics(state)) { @@ -1054,7 +1058,8 @@ VkResult vkd3d_serialize_pipeline_state(struct d3d12_pipeline_library *pipeline_ vk_blob_size += VKD3D_PIPELINE_BLOB_CHUNK_SIZE_RAW(vk_blob_size_pipeline_cache); } - if (!pipeline_library || (pipeline_library->flags & VKD3D_PIPELINE_LIBRARY_FLAG_SAVE_FULL_SPIRV)) + if ((!pipeline_library || (pipeline_library->flags & VKD3D_PIPELINE_LIBRARY_FLAG_SAVE_FULL_SPIRV)) && + !state->pso_is_loaded_from_cached_blob) { if (d3d12_pipeline_state_is_graphics(state)) { diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index 0083bb1c..8a46d5f8 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -3938,6 +3938,10 @@ HRESULT d3d12_pipeline_state_create(struct d3d12_device *device, VkPipelineBindP { VK_CALL(vkDestroyPipelineCache(device->vk_device, object->vk_pso_cache, NULL)); object->vk_pso_cache = VK_NULL_HANDLE; + + /* Set this explicitly so we avoid attempting to touch code[i] when serializing the PSO blob. + * We are at risk of compiling code on the fly in some upcoming situations. */ + object->pso_is_loaded_from_cached_blob = true; } else if (device->disk_cache.library) { diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 2dd9866e..d3fbbcf3 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -1575,6 +1575,7 @@ struct d3d12_pipeline_state struct d3d12_root_signature *root_signature; struct d3d12_device *device; bool root_signature_compat_hash_is_dxbc_derived; + bool pso_is_loaded_from_cached_blob; struct vkd3d_private_store private_store; }; From b42caa0bffaf5a879018616b5f3c0780d90e1f65 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 21 Mar 2022 13:54:09 +0100 Subject: [PATCH 16/19] vkd3d: Use rwlock instead of spinlock in PSO fallback cache. If we defer SPIR-V compilation we risk holding the lock for quite a long time. Signed-off-by: Hans-Kristian Arntzen --- libs/vkd3d/state.c | 17 +++++++++++++---- libs/vkd3d/vkd3d_private.h | 2 +- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index 8a46d5f8..72815dcf 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -1978,6 +1978,7 @@ void d3d12_pipeline_state_dec_ref(struct d3d12_pipeline_state *state) if (state->vk_bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) d3d12_pipeline_state_free_cached_desc(&state->graphics.cached_desc); + rwlock_destroy(&state->lock); vkd3d_free(state); } } @@ -3807,6 +3808,12 @@ HRESULT d3d12_pipeline_state_create(struct d3d12_device *device, VkPipelineBindP memset(object, 0, sizeof(*object)); + if (rwlock_init(&object->lock)) + { + vkd3d_free(object); + return E_FAIL; + } + if (!desc->root_signature) { if (FAILED(hr = d3d12_pipeline_create_private_root_signature(device, @@ -3838,6 +3845,7 @@ HRESULT d3d12_pipeline_state_create(struct d3d12_device *device, VkPipelineBindP { if (object->root_signature) d3d12_root_signature_dec_ref(object->root_signature); + rwlock_destroy(&object->lock); vkd3d_free(object); return hr; } @@ -3912,6 +3920,7 @@ HRESULT d3d12_pipeline_state_create(struct d3d12_device *device, VkPipelineBindP if (object->vk_bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) d3d12_pipeline_state_free_cached_desc(&object->graphics.cached_desc); VK_CALL(vkDestroyPipelineCache(device->vk_device, object->vk_pso_cache, NULL)); + rwlock_destroy(&object->lock); vkd3d_free(object); return hr; @@ -4060,7 +4069,7 @@ static VkPipeline d3d12_pipeline_state_find_compiled_pipeline(struct d3d12_pipel struct vkd3d_compiled_pipeline *current; VkPipeline vk_pipeline = VK_NULL_HANDLE; - rw_spinlock_acquire_read(&state->lock); + rwlock_lock_read(&state->lock); LIST_FOR_EACH_ENTRY(current, &graphics->compiled_fallback_pipelines, struct vkd3d_compiled_pipeline, entry) { if (!memcmp(¤t->key, key, sizeof(*key))) @@ -4070,7 +4079,7 @@ static VkPipeline d3d12_pipeline_state_find_compiled_pipeline(struct d3d12_pipel break; } } - rw_spinlock_release_read(&state->lock); + rwlock_unlock_read(&state->lock); return vk_pipeline; } @@ -4088,7 +4097,7 @@ static bool d3d12_pipeline_state_put_pipeline_to_cache(struct d3d12_pipeline_sta compiled_pipeline->vk_pipeline = vk_pipeline; compiled_pipeline->dynamic_state_flags = dynamic_state_flags; - rw_spinlock_acquire_write(&state->lock); + rwlock_lock_write(&state->lock); LIST_FOR_EACH_ENTRY(current, &graphics->compiled_fallback_pipelines, struct vkd3d_compiled_pipeline, entry) { @@ -4103,7 +4112,7 @@ static bool d3d12_pipeline_state_put_pipeline_to_cache(struct d3d12_pipeline_sta if (compiled_pipeline) list_add_tail(&graphics->compiled_fallback_pipelines, &compiled_pipeline->entry); - rw_spinlock_release_write(&state->lock); + rwlock_unlock_write(&state->lock); return compiled_pipeline; } diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index d3fbbcf3..e54d0063 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -1569,7 +1569,7 @@ struct d3d12_pipeline_state }; VkPipelineBindPoint vk_bind_point; VkPipelineCache vk_pso_cache; - spinlock_t lock; + rwlock_t lock; struct vkd3d_pipeline_cache_compatibility pipeline_cache_compat; struct d3d12_root_signature *root_signature; From d9dc4b862a55097f0fa14b5eca4846e82ba6e954 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 21 Mar 2022 14:26:25 +0100 Subject: [PATCH 17/19] vkd3d: Add helper for late compilation of DXBC -> SPIR-V. Signed-off-by: Hans-Kristian Arntzen --- libs/vkd3d/state.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index 72815dcf..7185906e 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -2288,6 +2288,67 @@ static HRESULT vkd3d_compile_shader_stage(struct d3d12_pipeline_state *state, st return S_OK; } +static HRESULT vkd3d_late_compile_shader_stages(struct d3d12_pipeline_state *state) +{ + /* We are at risk of having to compile pipelines late if we return from CreatePipelineState without + * either code[i] or module being non-null. */ + struct d3d12_graphics_pipeline_state *graphics = &state->graphics; + bool need_compile = false; + unsigned int i; + HRESULT hr; + + rwlock_lock_read(&state->lock); + for (i = 0; i < graphics->stage_count; i++) + { + if (!graphics->code[i].size && graphics->stages[i].module == VK_NULL_HANDLE && + graphics->cached_desc.bytecode[i].BytecodeLength) + { + need_compile = true; + break; + } + } + rwlock_unlock_read(&state->lock); + + if (!need_compile) + return S_OK; + + /* Taking a writer lock here is kinda horrible, + * but we really shouldn't hit this path except in extreme circumstances. */ + hr = S_OK; + rwlock_lock_write(&state->lock); + for (i = 0; i < graphics->stage_count; i++) + { + if (graphics->stages[i].module == VK_NULL_HANDLE && !graphics->code[i].size && + graphics->cached_desc.bytecode[i].BytecodeLength) + { + if (FAILED(hr = vkd3d_compile_shader_stage(state, state->device, graphics->cached_desc.bytecode_stages[i], + &graphics->cached_desc.bytecode[i], &graphics->code[i]))) + break; + } + + if (FAILED(hr = d3d12_pipeline_state_create_shader_module(state->device, &graphics->stages[i].module, + &graphics->code[i]))) + break; + + /* We'll keep the module around here, no need to keep code/size pairs around for this. + * If we're in a situation where late compile is relevant, we're using PSO cached blobs, + * so we never expect to serialize out SPIR-V either way. */ + vkd3d_shader_free_shader_code(&graphics->code[i]); + graphics->code[i].code = NULL; + graphics->code[i].size = 0; + + /* Don't need the DXBC blob anymore either. */ + if (graphics->cached_desc.bytecode_duped_mask & (1u << i)) + { + vkd3d_free((void*)graphics->cached_desc.bytecode[i].pShaderBytecode); + memset(&graphics->cached_desc.bytecode[i], 0, sizeof(graphics->cached_desc.bytecode[i])); + graphics->cached_desc.bytecode_duped_mask &= ~(1u << i); + } + } + rwlock_unlock_write(&state->lock); + return hr; +} + static void vkd3d_report_pipeline_creation_feedback_results(const VkPipelineCreationFeedbackCreateInfoEXT *feedback) { uint32_t i; From 4d708bd7fe22586b3936dcc15d3f49e063c35fd0 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 21 Mar 2022 14:26:47 +0100 Subject: [PATCH 18/19] vkd3d: Enable prototype extension VK_EXT_shader_module_identifier. Signed-off-by: Hans-Kristian Arntzen --- include/private/vulkan_private_extensions.h | 2 +- libs/vkd3d/device.c | 19 +++++++++++++++++++ libs/vkd3d/vkd3d_private.h | 5 +++++ libs/vkd3d/vulkan_procs.h | 3 +++ subprojects/Vulkan-Headers | 2 +- 5 files changed, 29 insertions(+), 2 deletions(-) diff --git a/include/private/vulkan_private_extensions.h b/include/private/vulkan_private_extensions.h index 03746346..86158187 100644 --- a/include/private/vulkan_private_extensions.h +++ b/include/private/vulkan_private_extensions.h @@ -1,6 +1,6 @@ #ifndef __VULKAN_PRIVATE_EXTENSIONS_H__ #define __VULKAN_PRIVATE_EXTENSIONS_H__ -/* Nothing here at the moment. Add hacks here! */ +/* Add hacks here! */ #endif diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index 19090951..3011f9e3 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -115,6 +115,8 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = VK_EXTENSION(EXT_SHADER_IMAGE_ATOMIC_INT64, EXT_shader_image_atomic_int64), VK_EXTENSION(EXT_SCALAR_BLOCK_LAYOUT, EXT_scalar_block_layout), VK_EXTENSION(EXT_PIPELINE_CREATION_FEEDBACK, EXT_pipeline_creation_feedback), + VK_EXTENSION(EXT_PIPELINE_CREATION_CACHE_CONTROL, EXT_pipeline_creation_cache_control), + VK_EXTENSION(EXT_SHADER_MODULE_IDENTIFIER, EXT_shader_module_identifier), /* AMD extensions */ VK_EXTENSION(AMD_BUFFER_MARKER, AMD_buffer_marker), VK_EXTENSION(AMD_DEVICE_COHERENT_MEMORY, AMD_device_coherent_memory), @@ -1490,6 +1492,23 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i vk_prepend_struct(&info->features2, &info->device_coherent_memory_features_amd); } + if (vulkan_info->EXT_pipeline_creation_cache_control) + { + info->pipeline_creation_cache_control_features.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_CREATION_CACHE_CONTROL_FEATURES; + vk_prepend_struct(&info->features2, &info->pipeline_creation_cache_control_features); + } + + if (vulkan_info->EXT_shader_module_identifier) + { + info->shader_module_identifier_features.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_MODULE_IDENTIFIER_FEATURES_EXT; + info->shader_module_identifier_properties.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_MODULE_IDENTIFIER_PROPERTIES_EXT; + vk_prepend_struct(&info->features2, &info->shader_module_identifier_features); + vk_prepend_struct(&info->properties2, &info->shader_module_identifier_properties); + } + /* Core in Vulkan 1.1. */ info->shader_draw_parameters_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES; vk_prepend_struct(&info->features2, &info->shader_draw_parameters_features); diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index e54d0063..93fcc6c7 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -160,6 +160,8 @@ struct vkd3d_vulkan_info bool EXT_shader_image_atomic_int64; bool EXT_scalar_block_layout; bool EXT_pipeline_creation_feedback; + bool EXT_pipeline_creation_cache_control; + bool EXT_shader_module_identifier; /* AMD device extensions */ bool AMD_buffer_marker; bool AMD_device_coherent_memory; @@ -3167,6 +3169,7 @@ struct vkd3d_physical_device_info VkPhysicalDeviceDriverPropertiesKHR driver_properties; VkPhysicalDeviceMaintenance4PropertiesKHR maintenance4_properties; VkPhysicalDeviceDeviceGeneratedCommandsPropertiesNV device_generated_commands_properties_nv; + VkPhysicalDeviceShaderModuleIdentifierPropertiesEXT shader_module_identifier_properties; VkPhysicalDeviceProperties2KHR properties2; @@ -3212,6 +3215,8 @@ struct vkd3d_physical_device_info VkPhysicalDeviceMaintenance4FeaturesKHR maintenance4_features; VkPhysicalDeviceRayTracingMaintenance1FeaturesKHR ray_tracing_maintenance1_features; VkPhysicalDeviceDeviceGeneratedCommandsFeaturesNV device_generated_commands_features_nv; + VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT pipeline_creation_cache_control_features; + VkPhysicalDeviceShaderModuleIdentifierFeaturesEXT shader_module_identifier_features; VkPhysicalDeviceFeatures2 features2; diff --git a/libs/vkd3d/vulkan_procs.h b/libs/vkd3d/vulkan_procs.h index e7a1b2e4..71b9dc51 100644 --- a/libs/vkd3d/vulkan_procs.h +++ b/libs/vkd3d/vulkan_procs.h @@ -311,6 +311,9 @@ VK_DEVICE_EXT_PFN(vkDestroyIndirectCommandsLayoutNV) VK_DEVICE_EXT_PFN(vkGetGeneratedCommandsMemoryRequirementsNV) VK_DEVICE_EXT_PFN(vkCmdExecuteGeneratedCommandsNV) +/* VK_EXT_shader_module_identifier */ +VK_DEVICE_EXT_PFN(vkGetShaderModuleIdentifierEXT) + #undef VK_INSTANCE_PFN #undef VK_INSTANCE_EXT_PFN #undef VK_DEVICE_PFN diff --git a/subprojects/Vulkan-Headers b/subprojects/Vulkan-Headers index 245d25ce..2c823b7f 160000 --- a/subprojects/Vulkan-Headers +++ b/subprojects/Vulkan-Headers @@ -1 +1 @@ -Subproject commit 245d25ce8c3337919dc7916d0e62e31a0d8748ab +Subproject commit 2c823b7f27590ec0a489f7fbe14b154e13fa5cfb From fc69f469d516ddab8118571adfb0208dd868409c Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 21 Mar 2022 16:24:32 +0100 Subject: [PATCH 19/19] vkd3d: Prototype implementation of shader module identifier. Signed-off-by: Hans-Kristian Arntzen --- libs/vkd3d/cache.c | 152 +++++++++++++++++++++++++++++++++-- libs/vkd3d/state.c | 160 ++++++++++++++++++++++++++++++++----- libs/vkd3d/vkd3d_private.h | 8 +- libs/vkd3d/vulkan_procs.h | 1 + 4 files changed, 296 insertions(+), 25 deletions(-) diff --git a/libs/vkd3d/cache.c b/libs/vkd3d/cache.c index 6bb4e046..39fc8fb8 100644 --- a/libs/vkd3d/cache.c +++ b/libs/vkd3d/cache.c @@ -198,6 +198,8 @@ enum vkd3d_pipeline_blob_chunk_type /* VkShaderStage is stored in upper 16 bits. */ VKD3D_PIPELINE_BLOB_CHUNK_TYPE_SHADER_META = 4, VKD3D_PIPELINE_BLOB_CHUNK_TYPE_PSO_COMPAT = 5, + /* VkShaderStage is stored in upper 16 bits. */ + VKD3D_PIPELINE_BLOB_CHUNK_TYPE_SHADER_IDENTIFIER = 6, VKD3D_PIPELINE_BLOB_CHUNK_TYPE_MASK = 0xffff, VKD3D_PIPELINE_BLOB_CHUNK_INDEX_SHIFT = 16, }; @@ -380,6 +382,11 @@ HRESULT d3d12_cached_pipeline_state_validate(struct d3d12_device *device, if (memcmp(blob->cache_uuid, device_properties->pipelineCacheUUID, VK_UUID_SIZE) != 0) return D3D12_ERROR_DRIVER_VERSION_MISMATCH; + if (pipeline_library_flags & VKD3D_PIPELINE_LIBRARY_FLAG_SHADER_IDENTIFIER) + if (memcmp(blob->cache_uuid, device->device_info.shader_module_identifier_properties.shaderModuleIdentifierAlgorithmUUID, + VK_UUID_SIZE) != 0) + return D3D12_ERROR_DRIVER_VERSION_MISMATCH; + /* In stream archives, we perform checksums ahead of time before accepting a stream blob into internal cache. * No need to do redundant work. */ if (!(pipeline_library_flags & VKD3D_PIPELINE_LIBRARY_FLAG_STREAM_ARCHIVE)) @@ -473,6 +480,11 @@ bool d3d12_cached_pipeline_state_is_dummy(const struct d3d12_cached_pipeline_sta VKD3D_PIPELINE_BLOB_CHUNK_TYPE_MASK)) return false; + if (find_blob_chunk_masked(chunk, payload_size, + VKD3D_PIPELINE_BLOB_CHUNK_TYPE_SHADER_IDENTIFIER, + VKD3D_PIPELINE_BLOB_CHUNK_TYPE_MASK)) + return false; + return true; } @@ -599,7 +611,8 @@ HRESULT vkd3d_create_pipeline_cache_from_d3d12_desc(struct d3d12_device *device, HRESULT vkd3d_get_cached_spirv_code_from_d3d12_desc( const struct d3d12_cached_pipeline_state *state, VkShaderStageFlagBits stage, - struct vkd3d_shader_code *spirv_code) + struct vkd3d_shader_code *spirv_code, + VkPipelineShaderStageModuleIdentifierCreateInfoEXT *identifier) { const struct vkd3d_pipeline_blob *blob = state->blob.pCachedBlob; const struct vkd3d_pipeline_blob_chunk_shader_meta *meta; @@ -623,6 +636,22 @@ HRESULT vkd3d_get_cached_spirv_code_from_d3d12_desc( meta = CONST_CAST_CHUNK_DATA(chunk, shader_meta); memcpy(&spirv_code->meta, &meta->meta, sizeof(meta->meta)); + if (state->library && (state->library->flags & VKD3D_PIPELINE_LIBRARY_FLAG_SHADER_IDENTIFIER)) + { + /* Only return identifier if we can use it. */ + chunk = find_blob_chunk(CONST_CAST_CHUNK_BASE(blob), payload_size, + VKD3D_PIPELINE_BLOB_CHUNK_TYPE_SHADER_IDENTIFIER | (stage << VKD3D_PIPELINE_BLOB_CHUNK_INDEX_SHIFT)); + + if (chunk && chunk->size <= VK_MAX_SHADER_MODULE_IDENTIFIER_SIZE_EXT) + { + identifier->identifierSize = chunk->size; + identifier->pIdentifier = chunk->data; + spirv_code->size = 0; + spirv_code->code = NULL; + return S_OK; + } + } + /* Aim to pull SPIR-V either from inlined chunk, or a link. */ chunk = find_blob_chunk(CONST_CAST_CHUNK_BASE(blob), payload_size, VKD3D_PIPELINE_BLOB_CHUNK_TYPE_VARINT_SPIRV | (stage << VKD3D_PIPELINE_BLOB_CHUNK_INDEX_SHIFT)); @@ -797,6 +826,33 @@ static void vkd3d_shader_code_serialize_inline(const struct vkd3d_shader_code *c *inout_chunk = chunk; } +static void vkd3d_shader_code_serialize_identifier(struct d3d12_pipeline_library *pipeline_library, + const struct vkd3d_shader_code *code, + const VkShaderModuleIdentifierEXT *identifier, VkShaderStageFlagBits stage, + struct vkd3d_pipeline_blob_chunk **inout_chunk) +{ + struct vkd3d_pipeline_blob_chunk *chunk = *inout_chunk; + struct vkd3d_pipeline_blob_chunk_shader_meta *meta; + + if (!identifier->identifierSize) + return; + + /* Store identifier. */ + chunk->type = VKD3D_PIPELINE_BLOB_CHUNK_TYPE_SHADER_IDENTIFIER | (stage << VKD3D_PIPELINE_BLOB_CHUNK_INDEX_SHIFT); + chunk->size = identifier->identifierSize; + memcpy(chunk->data, identifier->identifier, chunk->size); + chunk = finish_and_iterate_blob_chunk(chunk); + + /* Store meta information for SPIR-V. */ + chunk->type = VKD3D_PIPELINE_BLOB_CHUNK_TYPE_SHADER_META | (stage << VKD3D_PIPELINE_BLOB_CHUNK_INDEX_SHIFT); + chunk->size = sizeof(*meta); + meta = CAST_CHUNK_DATA(chunk, shader_meta); + meta->meta = code->meta; + chunk = finish_and_iterate_blob_chunk(chunk); + + *inout_chunk = chunk; +} + static void vkd3d_shader_code_serialize_referenced(struct d3d12_pipeline_library *pipeline_library, const struct vkd3d_shader_code *code, VkShaderStageFlagBits stage, size_t varint_size, @@ -1017,6 +1073,27 @@ static VkResult vkd3d_serialize_pipeline_state_referenced(struct d3d12_pipeline_ } } + if (pipeline_library->flags & VKD3D_PIPELINE_LIBRARY_FLAG_SHADER_IDENTIFIER) + { + if (d3d12_pipeline_state_is_graphics(state)) + { + for (i = 0; i < state->graphics.stage_count; i++) + { + vkd3d_shader_code_serialize_identifier(pipeline_library, + &state->graphics.code[i], + &state->graphics.identifiers[i], state->graphics.stages[i].stage, + &chunk); + } + } + else if (d3d12_pipeline_state_is_compute(state)) + { + vkd3d_shader_code_serialize_identifier(pipeline_library, + &state->compute.code, + &state->compute.identifier, VK_SHADER_STAGE_COMPUTE_BIT, + &chunk); + } + } + return VK_SUCCESS; } @@ -1076,6 +1153,29 @@ VkResult vkd3d_serialize_pipeline_state(struct d3d12_pipeline_library *pipeline_ } } + if (pipeline_library && (pipeline_library->flags & VKD3D_PIPELINE_LIBRARY_FLAG_SHADER_IDENTIFIER)) + { + if (d3d12_pipeline_state_is_graphics(state)) + { + for (i = 0; i < state->graphics.stage_count; i++) + { + if (state->graphics.identifiers[i].identifierSize) + { + vk_blob_size += VKD3D_PIPELINE_BLOB_CHUNK_SIZE_RAW(state->graphics.identifiers[i].identifierSize); + vk_blob_size += VKD3D_PIPELINE_BLOB_CHUNK_SIZE(shader_meta); + } + } + } + else if (d3d12_pipeline_state_is_compute(state)) + { + if (state->compute.identifier.identifierSize) + { + vk_blob_size += VKD3D_PIPELINE_BLOB_CHUNK_SIZE_RAW(state->compute.identifier.identifierSize); + vk_blob_size += VKD3D_PIPELINE_BLOB_CHUNK_SIZE(shader_meta); + } + } + } + total_size += vk_blob_size; if (blob && *size < total_size) @@ -1089,7 +1189,13 @@ VkResult vkd3d_serialize_pipeline_state(struct d3d12_pipeline_library *pipeline_ blob->vkd3d_shader_interface_key = state->device->shader_interface_key; blob->vkd3d_build = vkd3d_build; - if (!pipeline_library || (pipeline_library->flags & VKD3D_PIPELINE_LIBRARY_FLAG_USE_PIPELINE_CACHE_UUID)) + if (pipeline_library && (pipeline_library->flags & VKD3D_PIPELINE_LIBRARY_FLAG_SHADER_IDENTIFIER)) + { + memcpy(blob->cache_uuid, + pipeline_library->device->device_info.shader_module_identifier_properties.shaderModuleIdentifierAlgorithmUUID, + VK_UUID_SIZE); + } + else if (!pipeline_library || (pipeline_library->flags & VKD3D_PIPELINE_LIBRARY_FLAG_USE_PIPELINE_CACHE_UUID)) memcpy(blob->cache_uuid, device_properties->pipelineCacheUUID, VK_UUID_SIZE); else memset(blob->cache_uuid, 0, VK_UUID_SIZE); @@ -1776,7 +1882,14 @@ static void d3d12_pipeline_library_serialize_stream_archive_header(struct d3d12_ header->reserved = 0; header->vkd3d_build = vkd3d_build; header->vkd3d_shader_interface_key = pipeline_library->device->shader_interface_key; - if (pipeline_library->flags & VKD3D_PIPELINE_LIBRARY_FLAG_USE_PIPELINE_CACHE_UUID) + + if (pipeline_library->flags & VKD3D_PIPELINE_LIBRARY_FLAG_SHADER_IDENTIFIER) + { + memcpy(header->cache_uuid, + pipeline_library->device->device_info.shader_module_identifier_properties.shaderModuleIdentifierAlgorithmUUID, + VK_UUID_SIZE); + } + else if (pipeline_library->flags & VKD3D_PIPELINE_LIBRARY_FLAG_USE_PIPELINE_CACHE_UUID) memcpy(header->cache_uuid, device_properties->pipelineCacheUUID, VK_UUID_SIZE); else memset(header->cache_uuid, 0, VK_UUID_SIZE); @@ -1814,7 +1927,13 @@ static HRESULT d3d12_pipeline_library_serialize(struct d3d12_pipeline_library *p header->vkd3d_build = vkd3d_build; header->vkd3d_shader_interface_key = pipeline_library->device->shader_interface_key; - if (pipeline_library->flags & VKD3D_PIPELINE_LIBRARY_FLAG_USE_PIPELINE_CACHE_UUID) + if (pipeline_library->flags & VKD3D_PIPELINE_LIBRARY_FLAG_SHADER_IDENTIFIER) + { + memcpy(header->cache_uuid, + pipeline_library->device->device_info.shader_module_identifier_properties.shaderModuleIdentifierAlgorithmUUID, + VK_UUID_SIZE); + } + else if (pipeline_library->flags & VKD3D_PIPELINE_LIBRARY_FLAG_USE_PIPELINE_CACHE_UUID) memcpy(header->cache_uuid, device_properties->pipelineCacheUUID, VK_UUID_SIZE); else memset(header->cache_uuid, 0, VK_UUID_SIZE); @@ -2016,6 +2135,11 @@ static HRESULT d3d12_pipeline_library_validate_stream_format_header(struct d3d12 if (memcmp(header->cache_uuid, device_properties->pipelineCacheUUID, VK_UUID_SIZE) != 0) return D3D12_ERROR_DRIVER_VERSION_MISMATCH; + if (pipeline_library->flags & VKD3D_PIPELINE_LIBRARY_FLAG_SHADER_IDENTIFIER) + if (memcmp(header->cache_uuid, device->device_info.shader_module_identifier_properties.shaderModuleIdentifierAlgorithmUUID, + VK_UUID_SIZE) != 0) + return D3D12_ERROR_DRIVER_VERSION_MISMATCH; + return S_OK; } @@ -2198,6 +2322,17 @@ static HRESULT d3d12_pipeline_library_read_blob_toc_format(struct d3d12_pipeline } } + if (pipeline_library->flags & VKD3D_PIPELINE_LIBRARY_FLAG_SHADER_IDENTIFIER) + { + if (memcmp(header->cache_uuid, device->device_info.shader_module_identifier_properties.shaderModuleIdentifierAlgorithmUUID, + VK_UUID_SIZE) != 0) + { + if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_LOG) + INFO("Rejecting pipeline library due to shaderModuleIdentifierAlgorithmUUID mismatch.\n"); + return D3D12_ERROR_DRIVER_VERSION_MISMATCH; + } + } + total_toc_entries = header->pipeline_count + header->spirv_count + header->driver_cache_count; header_entry_size = offsetof(struct vkd3d_serialized_pipeline_library_toc, entries) + @@ -2277,6 +2412,11 @@ static HRESULT d3d12_pipeline_library_init(struct d3d12_pipeline_library *pipeli pipeline_library->internal_refcount = 1; pipeline_library->flags = flags; + /* Mutually exclusive features. */ + if ((flags & VKD3D_PIPELINE_LIBRARY_FLAG_USE_PIPELINE_CACHE_UUID) && + (flags & VKD3D_PIPELINE_LIBRARY_FLAG_SHADER_IDENTIFIER)) + return E_INVALIDARG; + if (!blob_length && blob) return E_INVALIDARG; @@ -3010,7 +3150,9 @@ HRESULT vkd3d_pipeline_library_init_disk_cache(struct vkd3d_pipeline_library_dis if (!(vkd3d_config_flags & VKD3D_CONFIG_FLAG_SHADER_CACHE_SYNC)) flags |= VKD3D_PIPELINE_LIBRARY_FLAG_STREAM_ARCHIVE_PARSE_ASYNC; - if (!(vkd3d_config_flags & VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_NO_SERIALIZE_SPIRV)) + if (device->device_info.shader_module_identifier_features.shaderModuleIdentifier) + flags |= VKD3D_PIPELINE_LIBRARY_FLAG_SHADER_IDENTIFIER; + else if (!(vkd3d_config_flags & VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_NO_SERIALIZE_SPIRV)) flags |= VKD3D_PIPELINE_LIBRARY_FLAG_SAVE_FULL_SPIRV; /* For internal caches, we're mostly just concerned with caching SPIR-V. diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index 7185906e..dc04212d 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -2100,10 +2100,16 @@ CONST_VTBL struct ID3D12PipelineStateVtbl d3d12_pipeline_state_vtbl = static HRESULT vkd3d_load_spirv_from_cached_state(struct d3d12_device *device, const struct d3d12_cached_pipeline_state *cached_state, - VkShaderStageFlagBits stage, struct vkd3d_shader_code *spirv_code) + VkShaderStageFlagBits stage, struct vkd3d_shader_code *spirv_code, + VkPipelineShaderStageModuleIdentifierCreateInfoEXT *identifier) { HRESULT hr; + identifier->sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_MODULE_IDENTIFIER_CREATE_INFO_EXT; + identifier->pNext = NULL; + identifier->identifierSize = 0; + identifier->pIdentifier = NULL; + if (!cached_state->blob.CachedBlobSizeInBytes) { if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_LOG) @@ -2114,7 +2120,7 @@ static HRESULT vkd3d_load_spirv_from_cached_state(struct d3d12_device *device, if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_IGNORE_SPIRV) return E_FAIL; - hr = vkd3d_get_cached_spirv_code_from_d3d12_desc(cached_state, stage, spirv_code); + hr = vkd3d_get_cached_spirv_code_from_d3d12_desc(cached_state, stage, spirv_code, identifier); if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_LOG) { @@ -2184,6 +2190,7 @@ static void d3d12_pipeline_state_init_compile_arguments(struct d3d12_pipeline_st static HRESULT vkd3d_setup_shader_stage(struct d3d12_pipeline_state *state, struct d3d12_device *device, VkPipelineShaderStageCreateInfo *stage_desc, VkShaderStageFlagBits stage, VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *required_subgroup_size_info, + const VkPipelineShaderStageModuleIdentifierCreateInfoEXT *identifier_create_info, const struct vkd3d_shader_code *spirv_code) { stage_desc->sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; @@ -2197,6 +2204,9 @@ static HRESULT vkd3d_setup_shader_stage(struct d3d12_pipeline_state *state, stru if (!d3d12_device_validate_shader_meta(device, &spirv_code->meta)) return E_INVALIDARG; + if (identifier_create_info && identifier_create_info->identifierSize) + stage_desc->pNext = identifier_create_info; + if (((spirv_code->meta.flags & VKD3D_SHADER_META_FLAG_USES_SUBGROUP_SIZE) && device->device_info.subgroup_size_control_features.subgroupSizeControl) || spirv_code->meta.cs_required_wave_size) @@ -2206,6 +2216,9 @@ static HRESULT vkd3d_setup_shader_stage(struct d3d12_pipeline_state *state, stru if (required_subgroup_size_info) { + required_subgroup_size_info->sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT; + required_subgroup_size_info->pNext = (void*)stage_desc->pNext; + if (spirv_code->meta.cs_required_wave_size) { /* [WaveSize(N)] attribute in SM 6.6. */ @@ -2225,8 +2238,6 @@ static HRESULT vkd3d_setup_shader_stage(struct d3d12_pipeline_state *state, stru stage_desc->flags &= ~VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT; } - required_subgroup_size_info->sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT; - required_subgroup_size_info->pNext = NULL; required_subgroup_size_info->requiredSubgroupSize = subgroup_size_alignment; } @@ -2240,7 +2251,10 @@ static HRESULT vkd3d_setup_shader_stage(struct d3d12_pipeline_state *state, stru } } - return d3d12_pipeline_state_create_shader_module(device, &stage_desc->module, spirv_code); + if (identifier_create_info && identifier_create_info->identifierSize == 0) + return d3d12_pipeline_state_create_shader_module(device, &stage_desc->module, spirv_code); + else + return S_OK; } static HRESULT vkd3d_compile_shader_stage(struct d3d12_pipeline_state *state, struct d3d12_device *device, @@ -2318,6 +2332,8 @@ static HRESULT vkd3d_late_compile_shader_stages(struct d3d12_pipeline_state *sta rwlock_lock_write(&state->lock); for (i = 0; i < graphics->stage_count; i++) { + graphics->identifier_create_infos[i].identifierSize = 0; + if (graphics->stages[i].module == VK_NULL_HANDLE && !graphics->code[i].size && graphics->cached_desc.bytecode[i].BytecodeLength) { @@ -2413,15 +2429,29 @@ static HRESULT vkd3d_create_compute_pipeline(struct d3d12_pipeline_state *state, pipeline_info.pNext = NULL; pipeline_info.flags = 0; - if (FAILED(hr = vkd3d_compile_shader_stage(state, device, - VK_SHADER_STAGE_COMPUTE_BIT, code, spirv_code))) - return hr; + if (state->compute.identifier_create_info.identifierSize == 0) + { + if (FAILED(hr = vkd3d_compile_shader_stage(state, device, + VK_SHADER_STAGE_COMPUTE_BIT, code, spirv_code))) + return hr; + } if (FAILED(hr = vkd3d_setup_shader_stage(state, device, - &pipeline_info.stage, VK_SHADER_STAGE_COMPUTE_BIT, &required_subgroup_size_info, + &pipeline_info.stage, VK_SHADER_STAGE_COMPUTE_BIT, + &required_subgroup_size_info, + &state->compute.identifier_create_info, spirv_code))) return hr; + if (pipeline_info.stage.module != VK_NULL_HANDLE && + device->device_info.shader_module_identifier_features.shaderModuleIdentifier) + { + state->compute.identifier.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_IDENTIFIER_EXT; + state->compute.identifier.pNext = NULL; + VK_CALL(vkGetShaderModuleIdentifierEXT(device->vk_device, pipeline_info.stage.module, + &state->compute.identifier)); + } + pipeline_info.layout = state->root_signature->compute.vk_pipeline_layout; pipeline_info.basePipelineHandle = VK_NULL_HANDLE; pipeline_info.basePipelineIndex = -1; @@ -2447,9 +2477,44 @@ static HRESULT vkd3d_create_compute_pipeline(struct d3d12_pipeline_state *state, else feedback_info.pipelineStageCreationFeedbackCount = 0; + if (pipeline_info.stage.module == VK_NULL_HANDLE) + pipeline_info.flags |= VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT; + vr = VK_CALL(vkCreateComputePipelines(device->vk_device, vk_cache, 1, &pipeline_info, NULL, &state->compute.vk_pipeline)); + if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_LOG) + { + if (pipeline_info.flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT) + { + if (vr == VK_SUCCESS) + INFO("[IDENTIFIER] Successfully created compute pipeline from identifier.\n"); + else if (vr == VK_PIPELINE_COMPILE_REQUIRED_EXT) + INFO("[IDENTIFIER] Failed to create compute pipeline from identifier, falling back ...\n"); + } + else + INFO("[IDENTIFIER] None compute.\n"); + } + + /* Fallback. */ + if (vr == VK_PIPELINE_COMPILE_REQUIRED_EXT) + { + pipeline_info.flags &= ~VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT; + + if (FAILED(hr = vkd3d_compile_shader_stage(state, device, + VK_SHADER_STAGE_COMPUTE_BIT, code, spirv_code))) + return hr; + + if (FAILED(hr = vkd3d_setup_shader_stage(state, device, + &pipeline_info.stage, VK_SHADER_STAGE_COMPUTE_BIT, + &required_subgroup_size_info, NULL, + spirv_code))) + return hr; + + vr = VK_CALL(vkCreateComputePipelines(device->vk_device, + vk_cache, 1, &pipeline_info, NULL, &state->compute.vk_pipeline)); + } + TRACE("Called vkCreateComputePipelines.\n"); VK_CALL(vkDestroyShaderModule(device->vk_device, pipeline_info.stage.module, NULL)); if (vr < 0) @@ -2487,7 +2552,8 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st } vkd3d_load_spirv_from_cached_state(device, cached_pso, - VK_SHADER_STAGE_COMPUTE_BIT, &state->compute.code); + VK_SHADER_STAGE_COMPUTE_BIT, &state->compute.code, + &state->compute.identifier_create_info); hr = vkd3d_create_compute_pipeline(state, device, &desc->cs); @@ -3165,7 +3231,8 @@ static void d3d12_pipeline_state_graphics_load_spirv_from_cached_state( for (i = 0; i < graphics->stage_count; i++) { if (FAILED(vkd3d_load_spirv_from_cached_state(device, cached_pso, - graphics->cached_desc.bytecode_stages[i], &graphics->code[i]))) + graphics->cached_desc.bytecode_stages[i], &graphics->code[i], + &graphics->identifier_create_infos[i]))) { for (j = 0; j < i; j++) { @@ -3192,14 +3259,18 @@ static HRESULT d3d12_pipeline_state_graphics_create_shader_stages( * we fail to create shader module for whatever reason. */ for (i = 0; i < graphics->stage_count; i++) { - if (FAILED(hr = vkd3d_compile_shader_stage(state, device, - graphics->cached_desc.bytecode_stages[i], - &graphics->cached_desc.bytecode[i], &graphics->code[i]))) - return hr; + if (graphics->identifier_create_infos[i].identifierSize == 0) + { + if (FAILED(hr = vkd3d_compile_shader_stage(state, device, + graphics->cached_desc.bytecode_stages[i], + &graphics->cached_desc.bytecode[i], &graphics->code[i]))) + return hr; + } if (FAILED(hr = vkd3d_setup_shader_stage(state, device, &graphics->stages[i], - graphics->cached_desc.bytecode_stages[i], NULL, + graphics->cached_desc.bytecode_stages[i], + NULL, &graphics->identifier_create_infos[i], &graphics->code[i]))) return hr; } @@ -3211,6 +3282,7 @@ static void d3d12_pipeline_state_graphics_handle_meta(struct d3d12_pipeline_stat struct d3d12_device *device) { struct d3d12_graphics_pipeline_state *graphics = &state->graphics; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; unsigned int i; for (i = 0; i < graphics->stage_count; i++) @@ -3226,6 +3298,15 @@ static void d3d12_pipeline_state_graphics_handle_meta(struct d3d12_pipeline_stat graphics->code[i].meta.hash); graphics->stages[i].pSpecializationInfo = &graphics->spec_info[i].spec_info; } + + if (graphics->stages[i].module != VK_NULL_HANDLE && + device->device_info.shader_module_identifier_features.shaderModuleIdentifier) + { + state->graphics.identifiers[i].sType = VK_STRUCTURE_TYPE_SHADER_MODULE_IDENTIFIER_EXT; + state->graphics.identifiers[i].pNext = NULL; + VK_CALL(vkGetShaderModuleIdentifierEXT(device->vk_device, graphics->stages[i].module, + &state->graphics.identifiers[i])); + } } } @@ -4334,6 +4415,11 @@ VkPipeline d3d12_pipeline_state_create_pipeline_variant(struct d3d12_pipeline_st } } + /* If we're using identifiers, set the appropriate flag. */ + for (i = 0; i < graphics->stage_count; i++) + if (pipeline_desc.pStages[i].module == VK_NULL_HANDLE) + pipeline_desc.flags |= VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT; + TRACE("Calling vkCreateGraphicsPipelines.\n"); if ((vkd3d_config_flags & VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_LOG) && @@ -4349,13 +4435,49 @@ VkPipeline d3d12_pipeline_state_create_pipeline_variant(struct d3d12_pipeline_st else feedback_info.pipelineStageCreationFeedbackCount = 0; - if ((vr = VK_CALL(vkCreateGraphicsPipelines(device->vk_device, - vk_cache, 1, &pipeline_desc, NULL, &vk_pipeline))) < 0) + vr = VK_CALL(vkCreateGraphicsPipelines(device->vk_device, vk_cache, 1, &pipeline_desc, NULL, &vk_pipeline)); + + if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_LOG) + { + if (pipeline_desc.flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT) + { + if (vr == VK_SUCCESS) + INFO("[IDENTIFIER] Successfully created graphics pipeline from identifier.\n"); + else if (vr == VK_PIPELINE_COMPILE_REQUIRED_EXT) + INFO("[IDENTIFIER] Failed to create graphics pipeline from identifier, falling back ...\n"); + } + else + INFO("[IDENTIFIER] No graphics identifier\n"); + } + + if (vr == VK_PIPELINE_COMPILE_REQUIRED_EXT) + { + if (FAILED(hr = vkd3d_late_compile_shader_stages(state))) + { + ERR("Late compilation of SPIR-V failed.\n"); + return VK_NULL_HANDLE; + } + + pipeline_desc.flags &= ~VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT; + + /* Clean up any temporary SPIR-V modules we created. */ + if (pipeline_desc.pStages == stages) + for (i = 0; i < graphics->stage_count; i++) + if (stages[i].module != graphics->stages[i].module) + VK_CALL(vkDestroyShaderModule(device->vk_device, stages[i].module, NULL)); + + /* Internal modules are known to be non-null now. */ + pipeline_desc.pStages = state->graphics.stages; + vr = VK_CALL(vkCreateGraphicsPipelines(device->vk_device, vk_cache, 1, &pipeline_desc, NULL, &vk_pipeline)); + } + + TRACE("Completed vkCreateGraphicsPipelines.\n"); + + if (vr < 0) { WARN("Failed to create Vulkan graphics pipeline, vr %d.\n", vr); return VK_NULL_HANDLE; } - TRACE("Completed vkCreateGraphicsPipelines.\n"); /* Clean up any temporary SPIR-V modules we created. */ if (pipeline_desc.pStages == stages) diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 93fcc6c7..f2286f55 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -1492,6 +1492,8 @@ struct d3d12_graphics_pipeline_state struct vkd3d_shader_debug_ring_spec_info spec_info[VKD3D_MAX_SHADER_STAGES]; VkPipelineShaderStageCreateInfo stages[VKD3D_MAX_SHADER_STAGES]; struct vkd3d_shader_code code[VKD3D_MAX_SHADER_STAGES]; + VkShaderModuleIdentifierEXT identifiers[VKD3D_MAX_SHADER_STAGES]; + VkPipelineShaderStageModuleIdentifierCreateInfoEXT identifier_create_infos[VKD3D_MAX_SHADER_STAGES]; size_t stage_count; struct d3d12_graphics_pipeline_state_cached_desc cached_desc; @@ -1546,6 +1548,8 @@ struct d3d12_compute_pipeline_state { VkPipeline vk_pipeline; struct vkd3d_shader_code code; + VkShaderModuleIdentifierEXT identifier; + VkPipelineShaderStageModuleIdentifierCreateInfoEXT identifier_create_info; }; /* To be able to load a pipeline from cache, this information must match exactly, @@ -1768,6 +1772,7 @@ enum vkd3d_pipeline_library_flags VKD3D_PIPELINE_LIBRARY_FLAG_STREAM_ARCHIVE = 1 << 4, /* We expect to parse archive from thread, so consider thread safety and cancellation points. */ VKD3D_PIPELINE_LIBRARY_FLAG_STREAM_ARCHIVE_PARSE_ASYNC = 1 << 5, + VKD3D_PIPELINE_LIBRARY_FLAG_SHADER_IDENTIFIER = 1 << 6, }; HRESULT d3d12_pipeline_library_create(struct d3d12_device *device, const void *blob, @@ -1781,7 +1786,8 @@ HRESULT vkd3d_create_pipeline_cache_from_d3d12_desc(struct d3d12_device *device, HRESULT vkd3d_get_cached_spirv_code_from_d3d12_desc( const struct d3d12_cached_pipeline_state *state, VkShaderStageFlagBits stage, - struct vkd3d_shader_code *spirv_code); + struct vkd3d_shader_code *spirv_code, + VkPipelineShaderStageModuleIdentifierCreateInfoEXT *identifier); VkResult vkd3d_serialize_pipeline_state(struct d3d12_pipeline_library *pipeline_library, const struct d3d12_pipeline_state *state, size_t *size, void *data); HRESULT d3d12_cached_pipeline_state_validate(struct d3d12_device *device, diff --git a/libs/vkd3d/vulkan_procs.h b/libs/vkd3d/vulkan_procs.h index 71b9dc51..302b1ab7 100644 --- a/libs/vkd3d/vulkan_procs.h +++ b/libs/vkd3d/vulkan_procs.h @@ -313,6 +313,7 @@ VK_DEVICE_EXT_PFN(vkCmdExecuteGeneratedCommandsNV) /* VK_EXT_shader_module_identifier */ VK_DEVICE_EXT_PFN(vkGetShaderModuleIdentifierEXT) +VK_DEVICE_EXT_PFN(vkGetShaderModuleCreateInfoIdentifierEXT) #undef VK_INSTANCE_PFN #undef VK_INSTANCE_EXT_PFN