From 4384b708d79b9d457a2bbc6f765c37687328e304 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 21 Mar 2022 11:52:01 +0100 Subject: [PATCH] vkd3d: Prepare for system where we can retain DXBC blobs in pipeline. Simplifies the code somewhat. Only iterate over the shader_stages LUT once. Adds concept of duped DXBC blobs as well. Signed-off-by: Hans-Kristian Arntzen --- libs/vkd3d/state.c | 75 ++++++++++++++++++++------------------ libs/vkd3d/vkd3d_private.h | 4 ++ 2 files changed, 43 insertions(+), 36 deletions(-) diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index cd47ca84..9168b351 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -1900,6 +1900,18 @@ static void vkd3d_shader_transform_feedback_info_free(struct vkd3d_shader_transf vkd3d_free(xfb_info); } +static void d3d12_pipeline_state_free_cached_desc(struct d3d12_graphics_pipeline_state_cached_desc *cached_desc) +{ + unsigned int i; + vkd3d_shader_transform_feedback_info_free(cached_desc->xfb_info); + + while (cached_desc->bytecode_duped_mask) + { + i = vkd3d_bitmask_iter32(&cached_desc->bytecode_duped_mask); + vkd3d_free((void*)cached_desc->bytecode[i].pShaderBytecode); + } +} + static struct vkd3d_shader_transform_feedback_info *vkd3d_shader_transform_feedback_info_dup( const D3D12_STREAM_OUTPUT_DESC *so_desc) { @@ -1965,7 +1977,7 @@ void d3d12_pipeline_state_dec_ref(struct d3d12_pipeline_state *state) d3d12_root_signature_dec_ref(state->root_signature); if (state->vk_bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) - vkd3d_shader_transform_feedback_info_free(state->graphics.cached_desc.xfb_info); + d3d12_pipeline_state_free_cached_desc(&state->graphics.cached_desc); vkd3d_free(state); } } @@ -3081,9 +3093,9 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s struct vkd3d_shader_signature input_signature; VkSampleCountFlagBits sample_count; const struct vkd3d_format *format; - unsigned int i, j, stage_count; unsigned int instance_divisor; VkVertexInputRate input_rate; + unsigned int i, j; size_t rt_count; uint32_t mask; HRESULT hr; @@ -3094,7 +3106,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s enum VkShaderStageFlagBits stage; ptrdiff_t offset; } - shader_stages[] = + shader_stages_lut[] = { {VK_SHADER_STAGE_VERTEX_BIT, offsetof(struct d3d12_pipeline_state_desc, vs)}, {VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT, offsetof(struct d3d12_pipeline_state_desc, hs)}, @@ -3311,15 +3323,16 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s graphics->patch_vertex_count = 0; - for (i = 0; i < ARRAY_SIZE(shader_stages); ++i) + /* Parse interface data from DXBC blobs. */ + for (i = 0; i < ARRAY_SIZE(shader_stages_lut); ++i) { - const D3D12_SHADER_BYTECODE *b = (const void *)((uintptr_t)desc + shader_stages[i].offset); + const D3D12_SHADER_BYTECODE *b = (const void *)((uintptr_t)desc + shader_stages_lut[i].offset); const struct vkd3d_shader_code dxbc = {b->pShaderBytecode, b->BytecodeLength}; if (!b->pShaderBytecode) continue; - switch (shader_stages[i].stage) + switch (shader_stages_lut[i].stage) { case VK_SHADER_STAGE_VERTEX_BIT: if ((ret = vkd3d_shader_parse_input_signature(&dxbc, &input_signature)) < 0) @@ -3358,6 +3371,11 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s goto fail; } + /* Not owned yet. If we return from pipeline creation without having concrete SPIR-V, + * we'll have to dupe the bytecode and potentially compile to SPIR-V late. */ + graphics->cached_desc.bytecode[graphics->stage_count] = *b; + graphics->cached_desc.bytecode_stages[graphics->stage_count] = shader_stages_lut[i].stage; + ++graphics->stage_count; } @@ -3365,59 +3383,44 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s * We cannot partially fall back since we cannot handle any situation where we need inter-stage code-gen fixups. * In this situation, just generate full SPIR-V from scratch. * This really shouldn't happen unless we have corrupt cache entries. */ - stage_count = 0; - for (i = 0; i < ARRAY_SIZE(shader_stages); i++) + for (i = 0; i < graphics->stage_count; i++) { - const D3D12_SHADER_BYTECODE *b = (const void *)((uintptr_t)desc + shader_stages[i].offset); - if (!b->pShaderBytecode) - continue; - if (FAILED(vkd3d_load_spirv_from_cached_state(device, cached_pso, - shader_stages[i].stage, &graphics->code[stage_count]))) + graphics->cached_desc.bytecode_stages[i], &graphics->code[i]))) { - for (j = 0; j < stage_count; j++) + for (j = 0; j < i; j++) { if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_LOG) - INFO("Discarding cached SPIR-V for stage #%x.\n", shader_stages[i].stage); + INFO("Discarding cached SPIR-V for stage #%x.\n", graphics->cached_desc.bytecode_stages[i]); vkd3d_shader_free_shader_code(&graphics->code[j]); memset(&graphics->code[j], 0, sizeof(graphics->code[j])); } break; } - - ++stage_count; } /* Now create the actual shader modules. If we managed to load SPIR-V from cache, use that directly. * Make sure we don't reset graphics->stage_count since that is a potential memory leak if * we fail to create shader module for whatever reason. */ - stage_count = 0; - for (i = 0; i < ARRAY_SIZE(shader_stages); i++) + for (i = 0; i < graphics->stage_count; i++) { - const D3D12_SHADER_BYTECODE *b = (const void *)((uintptr_t)desc + shader_stages[i].offset); - - if (!b->pShaderBytecode) - continue; - if (FAILED(hr = vkd3d_create_shader_stage(state, device, - &graphics->stages[stage_count], - shader_stages[i].stage, NULL, b, - &graphics->code[stage_count]))) + &graphics->stages[i], + graphics->cached_desc.bytecode_stages[i], NULL, + &graphics->cached_desc.bytecode[i], &graphics->code[i]))) goto fail; - if (shader_stages[i].stage == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) - graphics->patch_vertex_count = graphics->code[stage_count].meta.patch_vertex_count; + if (graphics->cached_desc.bytecode_stages[i] == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) + graphics->patch_vertex_count = graphics->code[i].meta.patch_vertex_count; - if ((graphics->code[stage_count].meta.flags & VKD3D_SHADER_META_FLAG_REPLACED) && + if ((graphics->code[i].meta.flags & VKD3D_SHADER_META_FLAG_REPLACED) && device->debug_ring.active) { vkd3d_shader_debug_ring_init_spec_constant(device, - &graphics->spec_info[stage_count], - graphics->code[stage_count].meta.hash); - graphics->stages[stage_count].pSpecializationInfo = &graphics->spec_info[stage_count].spec_info; + &graphics->spec_info[i], + graphics->code[i].meta.hash); + graphics->stages[i].pSpecializationInfo = &graphics->spec_info[i].spec_info; } - - ++stage_count; } graphics->attribute_count = desc->input_layout.NumElements; @@ -3802,7 +3805,7 @@ HRESULT d3d12_pipeline_state_create(struct d3d12_device *device, VkPipelineBindP d3d12_pipeline_state_free_spirv_code(object); d3d12_pipeline_state_destroy_shader_modules(object, device); if (object->vk_bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) - vkd3d_shader_transform_feedback_info_free(object->graphics.cached_desc.xfb_info); + d3d12_pipeline_state_free_cached_desc(&object->graphics.cached_desc); VK_CALL(vkDestroyPipelineCache(device->vk_device, object->vk_pso_cache, NULL)); vkd3d_free(object); diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 1b556450..2dd9866e 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -1479,6 +1479,10 @@ struct d3d12_graphics_pipeline_state_cached_desc bool is_dual_source_blending; VkShaderStageFlagBits xfb_stage; struct vkd3d_shader_transform_feedback_info *xfb_info; + + D3D12_SHADER_BYTECODE bytecode[VKD3D_MAX_SHADER_STAGES]; + VkShaderStageFlagBits bytecode_stages[VKD3D_MAX_SHADER_STAGES]; + uint32_t bytecode_duped_mask; }; struct d3d12_graphics_pipeline_state