diff --git a/include/private/vulkan_private_extensions.h b/include/private/vulkan_private_extensions.h index 03746346..86158187 100644 --- a/include/private/vulkan_private_extensions.h +++ b/include/private/vulkan_private_extensions.h @@ -1,6 +1,6 @@ #ifndef __VULKAN_PRIVATE_EXTENSIONS_H__ #define __VULKAN_PRIVATE_EXTENSIONS_H__ -/* Nothing here at the moment. Add hacks here! */ +/* Add hacks here! */ #endif diff --git a/libs/vkd3d/cache.c b/libs/vkd3d/cache.c index 7df66ff3..39fc8fb8 100644 --- a/libs/vkd3d/cache.c +++ b/libs/vkd3d/cache.c @@ -198,6 +198,8 @@ enum vkd3d_pipeline_blob_chunk_type /* VkShaderStage is stored in upper 16 bits. */ VKD3D_PIPELINE_BLOB_CHUNK_TYPE_SHADER_META = 4, VKD3D_PIPELINE_BLOB_CHUNK_TYPE_PSO_COMPAT = 5, + /* VkShaderStage is stored in upper 16 bits. */ + VKD3D_PIPELINE_BLOB_CHUNK_TYPE_SHADER_IDENTIFIER = 6, VKD3D_PIPELINE_BLOB_CHUNK_TYPE_MASK = 0xffff, VKD3D_PIPELINE_BLOB_CHUNK_INDEX_SHIFT = 16, }; @@ -380,6 +382,11 @@ HRESULT d3d12_cached_pipeline_state_validate(struct d3d12_device *device, if (memcmp(blob->cache_uuid, device_properties->pipelineCacheUUID, VK_UUID_SIZE) != 0) return D3D12_ERROR_DRIVER_VERSION_MISMATCH; + if (pipeline_library_flags & VKD3D_PIPELINE_LIBRARY_FLAG_SHADER_IDENTIFIER) + if (memcmp(blob->cache_uuid, device->device_info.shader_module_identifier_properties.shaderModuleIdentifierAlgorithmUUID, + VK_UUID_SIZE) != 0) + return D3D12_ERROR_DRIVER_VERSION_MISMATCH; + /* In stream archives, we perform checksums ahead of time before accepting a stream blob into internal cache. * No need to do redundant work. */ if (!(pipeline_library_flags & VKD3D_PIPELINE_LIBRARY_FLAG_STREAM_ARCHIVE)) @@ -473,6 +480,11 @@ bool d3d12_cached_pipeline_state_is_dummy(const struct d3d12_cached_pipeline_sta VKD3D_PIPELINE_BLOB_CHUNK_TYPE_MASK)) return false; + if (find_blob_chunk_masked(chunk, payload_size, + VKD3D_PIPELINE_BLOB_CHUNK_TYPE_SHADER_IDENTIFIER, + VKD3D_PIPELINE_BLOB_CHUNK_TYPE_MASK)) + return false; + return true; } @@ -599,7 +611,8 @@ HRESULT vkd3d_create_pipeline_cache_from_d3d12_desc(struct d3d12_device *device, HRESULT vkd3d_get_cached_spirv_code_from_d3d12_desc( const struct d3d12_cached_pipeline_state *state, VkShaderStageFlagBits stage, - struct vkd3d_shader_code *spirv_code) + struct vkd3d_shader_code *spirv_code, + VkPipelineShaderStageModuleIdentifierCreateInfoEXT *identifier) { const struct vkd3d_pipeline_blob *blob = state->blob.pCachedBlob; const struct vkd3d_pipeline_blob_chunk_shader_meta *meta; @@ -623,6 +636,22 @@ HRESULT vkd3d_get_cached_spirv_code_from_d3d12_desc( meta = CONST_CAST_CHUNK_DATA(chunk, shader_meta); memcpy(&spirv_code->meta, &meta->meta, sizeof(meta->meta)); + if (state->library && (state->library->flags & VKD3D_PIPELINE_LIBRARY_FLAG_SHADER_IDENTIFIER)) + { + /* Only return identifier if we can use it. */ + chunk = find_blob_chunk(CONST_CAST_CHUNK_BASE(blob), payload_size, + VKD3D_PIPELINE_BLOB_CHUNK_TYPE_SHADER_IDENTIFIER | (stage << VKD3D_PIPELINE_BLOB_CHUNK_INDEX_SHIFT)); + + if (chunk && chunk->size <= VK_MAX_SHADER_MODULE_IDENTIFIER_SIZE_EXT) + { + identifier->identifierSize = chunk->size; + identifier->pIdentifier = chunk->data; + spirv_code->size = 0; + spirv_code->code = NULL; + return S_OK; + } + } + /* Aim to pull SPIR-V either from inlined chunk, or a link. */ chunk = find_blob_chunk(CONST_CAST_CHUNK_BASE(blob), payload_size, VKD3D_PIPELINE_BLOB_CHUNK_TYPE_VARINT_SPIRV | (stage << VKD3D_PIPELINE_BLOB_CHUNK_INDEX_SHIFT)); @@ -797,6 +826,33 @@ static void vkd3d_shader_code_serialize_inline(const struct vkd3d_shader_code *c *inout_chunk = chunk; } +static void vkd3d_shader_code_serialize_identifier(struct d3d12_pipeline_library *pipeline_library, + const struct vkd3d_shader_code *code, + const VkShaderModuleIdentifierEXT *identifier, VkShaderStageFlagBits stage, + struct vkd3d_pipeline_blob_chunk **inout_chunk) +{ + struct vkd3d_pipeline_blob_chunk *chunk = *inout_chunk; + struct vkd3d_pipeline_blob_chunk_shader_meta *meta; + + if (!identifier->identifierSize) + return; + + /* Store identifier. */ + chunk->type = VKD3D_PIPELINE_BLOB_CHUNK_TYPE_SHADER_IDENTIFIER | (stage << VKD3D_PIPELINE_BLOB_CHUNK_INDEX_SHIFT); + chunk->size = identifier->identifierSize; + memcpy(chunk->data, identifier->identifier, chunk->size); + chunk = finish_and_iterate_blob_chunk(chunk); + + /* Store meta information for SPIR-V. */ + chunk->type = VKD3D_PIPELINE_BLOB_CHUNK_TYPE_SHADER_META | (stage << VKD3D_PIPELINE_BLOB_CHUNK_INDEX_SHIFT); + chunk->size = sizeof(*meta); + meta = CAST_CHUNK_DATA(chunk, shader_meta); + meta->meta = code->meta; + chunk = finish_and_iterate_blob_chunk(chunk); + + *inout_chunk = chunk; +} + static void vkd3d_shader_code_serialize_referenced(struct d3d12_pipeline_library *pipeline_library, const struct vkd3d_shader_code *code, VkShaderStageFlagBits stage, size_t varint_size, @@ -903,18 +959,21 @@ static VkResult vkd3d_serialize_pipeline_state_inline(const struct d3d12_pipelin chunk = finish_and_iterate_blob_chunk(chunk); } - if (d3d12_pipeline_state_is_graphics(state)) + if (!state->pso_is_loaded_from_cached_blob) { - for (i = 0; i < state->graphics.stage_count; i++) + if (d3d12_pipeline_state_is_graphics(state)) { - vkd3d_shader_code_serialize_inline(&state->graphics.code[i], state->graphics.stages[i].stage, - varint_size[i], &chunk); + for (i = 0; i < state->graphics.stage_count; i++) + { + vkd3d_shader_code_serialize_inline(&state->graphics.code[i], state->graphics.stages[i].stage, + varint_size[i], &chunk); + } + } + else if (d3d12_pipeline_state_is_compute(state)) + { + vkd3d_shader_code_serialize_inline(&state->compute.code, VK_SHADER_STAGE_COMPUTE_BIT, + varint_size[0], &chunk); } - } - else if (d3d12_pipeline_state_is_compute(state)) - { - vkd3d_shader_code_serialize_inline(&state->compute.code, VK_SHADER_STAGE_COMPUTE_BIT, - varint_size[0], &chunk); } return VK_SUCCESS; @@ -994,7 +1053,8 @@ static VkResult vkd3d_serialize_pipeline_state_referenced(struct d3d12_pipeline_ chunk = finish_and_iterate_blob_chunk(chunk); } - if (pipeline_library->flags & VKD3D_PIPELINE_LIBRARY_FLAG_SAVE_FULL_SPIRV) + if ((pipeline_library->flags & VKD3D_PIPELINE_LIBRARY_FLAG_SAVE_FULL_SPIRV) && + !state->pso_is_loaded_from_cached_blob) { if (d3d12_pipeline_state_is_graphics(state)) { @@ -1013,6 +1073,27 @@ static VkResult vkd3d_serialize_pipeline_state_referenced(struct d3d12_pipeline_ } } + if (pipeline_library->flags & VKD3D_PIPELINE_LIBRARY_FLAG_SHADER_IDENTIFIER) + { + if (d3d12_pipeline_state_is_graphics(state)) + { + for (i = 0; i < state->graphics.stage_count; i++) + { + vkd3d_shader_code_serialize_identifier(pipeline_library, + &state->graphics.code[i], + &state->graphics.identifiers[i], state->graphics.stages[i].stage, + &chunk); + } + } + else if (d3d12_pipeline_state_is_compute(state)) + { + vkd3d_shader_code_serialize_identifier(pipeline_library, + &state->compute.code, + &state->compute.identifier, VK_SHADER_STAGE_COMPUTE_BIT, + &chunk); + } + } + return VK_SUCCESS; } @@ -1054,7 +1135,8 @@ VkResult vkd3d_serialize_pipeline_state(struct d3d12_pipeline_library *pipeline_ vk_blob_size += VKD3D_PIPELINE_BLOB_CHUNK_SIZE_RAW(vk_blob_size_pipeline_cache); } - if (!pipeline_library || (pipeline_library->flags & VKD3D_PIPELINE_LIBRARY_FLAG_SAVE_FULL_SPIRV)) + if ((!pipeline_library || (pipeline_library->flags & VKD3D_PIPELINE_LIBRARY_FLAG_SAVE_FULL_SPIRV)) && + !state->pso_is_loaded_from_cached_blob) { if (d3d12_pipeline_state_is_graphics(state)) { @@ -1071,6 +1153,29 @@ VkResult vkd3d_serialize_pipeline_state(struct d3d12_pipeline_library *pipeline_ } } + if (pipeline_library && (pipeline_library->flags & VKD3D_PIPELINE_LIBRARY_FLAG_SHADER_IDENTIFIER)) + { + if (d3d12_pipeline_state_is_graphics(state)) + { + for (i = 0; i < state->graphics.stage_count; i++) + { + if (state->graphics.identifiers[i].identifierSize) + { + vk_blob_size += VKD3D_PIPELINE_BLOB_CHUNK_SIZE_RAW(state->graphics.identifiers[i].identifierSize); + vk_blob_size += VKD3D_PIPELINE_BLOB_CHUNK_SIZE(shader_meta); + } + } + } + else if (d3d12_pipeline_state_is_compute(state)) + { + if (state->compute.identifier.identifierSize) + { + vk_blob_size += VKD3D_PIPELINE_BLOB_CHUNK_SIZE_RAW(state->compute.identifier.identifierSize); + vk_blob_size += VKD3D_PIPELINE_BLOB_CHUNK_SIZE(shader_meta); + } + } + } + total_size += vk_blob_size; if (blob && *size < total_size) @@ -1084,7 +1189,13 @@ VkResult vkd3d_serialize_pipeline_state(struct d3d12_pipeline_library *pipeline_ blob->vkd3d_shader_interface_key = state->device->shader_interface_key; blob->vkd3d_build = vkd3d_build; - if (!pipeline_library || (pipeline_library->flags & VKD3D_PIPELINE_LIBRARY_FLAG_USE_PIPELINE_CACHE_UUID)) + if (pipeline_library && (pipeline_library->flags & VKD3D_PIPELINE_LIBRARY_FLAG_SHADER_IDENTIFIER)) + { + memcpy(blob->cache_uuid, + pipeline_library->device->device_info.shader_module_identifier_properties.shaderModuleIdentifierAlgorithmUUID, + VK_UUID_SIZE); + } + else if (!pipeline_library || (pipeline_library->flags & VKD3D_PIPELINE_LIBRARY_FLAG_USE_PIPELINE_CACHE_UUID)) memcpy(blob->cache_uuid, device_properties->pipelineCacheUUID, VK_UUID_SIZE); else memset(blob->cache_uuid, 0, VK_UUID_SIZE); @@ -1573,10 +1684,11 @@ static HRESULT d3d12_pipeline_library_load_pipeline(struct d3d12_pipeline_librar if (root_signature) pipeline_cache_compat.root_signature_compat_hash = root_signature->compatibility_hash; } - else if (!cached_state->private_root_signature) + else if (cached_state->root_signature_compat_hash_is_dxbc_derived) { /* If we have no explicit root signature and the existing PSO didn't either, - * just inherit the compat hash from PSO to avoid comparing them. */ + * just inherit the compat hash from PSO to avoid comparing them. + * The hash depends entirely on the DXBC blob either way. */ pipeline_cache_compat.root_signature_compat_hash = cached_state->pipeline_cache_compat.root_signature_compat_hash; } @@ -1770,7 +1882,14 @@ static void d3d12_pipeline_library_serialize_stream_archive_header(struct d3d12_ header->reserved = 0; header->vkd3d_build = vkd3d_build; header->vkd3d_shader_interface_key = pipeline_library->device->shader_interface_key; - if (pipeline_library->flags & VKD3D_PIPELINE_LIBRARY_FLAG_USE_PIPELINE_CACHE_UUID) + + if (pipeline_library->flags & VKD3D_PIPELINE_LIBRARY_FLAG_SHADER_IDENTIFIER) + { + memcpy(header->cache_uuid, + pipeline_library->device->device_info.shader_module_identifier_properties.shaderModuleIdentifierAlgorithmUUID, + VK_UUID_SIZE); + } + else if (pipeline_library->flags & VKD3D_PIPELINE_LIBRARY_FLAG_USE_PIPELINE_CACHE_UUID) memcpy(header->cache_uuid, device_properties->pipelineCacheUUID, VK_UUID_SIZE); else memset(header->cache_uuid, 0, VK_UUID_SIZE); @@ -1808,7 +1927,13 @@ static HRESULT d3d12_pipeline_library_serialize(struct d3d12_pipeline_library *p header->vkd3d_build = vkd3d_build; header->vkd3d_shader_interface_key = pipeline_library->device->shader_interface_key; - if (pipeline_library->flags & VKD3D_PIPELINE_LIBRARY_FLAG_USE_PIPELINE_CACHE_UUID) + if (pipeline_library->flags & VKD3D_PIPELINE_LIBRARY_FLAG_SHADER_IDENTIFIER) + { + memcpy(header->cache_uuid, + pipeline_library->device->device_info.shader_module_identifier_properties.shaderModuleIdentifierAlgorithmUUID, + VK_UUID_SIZE); + } + else if (pipeline_library->flags & VKD3D_PIPELINE_LIBRARY_FLAG_USE_PIPELINE_CACHE_UUID) memcpy(header->cache_uuid, device_properties->pipelineCacheUUID, VK_UUID_SIZE); else memset(header->cache_uuid, 0, VK_UUID_SIZE); @@ -2010,6 +2135,11 @@ static HRESULT d3d12_pipeline_library_validate_stream_format_header(struct d3d12 if (memcmp(header->cache_uuid, device_properties->pipelineCacheUUID, VK_UUID_SIZE) != 0) return D3D12_ERROR_DRIVER_VERSION_MISMATCH; + if (pipeline_library->flags & VKD3D_PIPELINE_LIBRARY_FLAG_SHADER_IDENTIFIER) + if (memcmp(header->cache_uuid, device->device_info.shader_module_identifier_properties.shaderModuleIdentifierAlgorithmUUID, + VK_UUID_SIZE) != 0) + return D3D12_ERROR_DRIVER_VERSION_MISMATCH; + return S_OK; } @@ -2192,6 +2322,17 @@ static HRESULT d3d12_pipeline_library_read_blob_toc_format(struct d3d12_pipeline } } + if (pipeline_library->flags & VKD3D_PIPELINE_LIBRARY_FLAG_SHADER_IDENTIFIER) + { + if (memcmp(header->cache_uuid, device->device_info.shader_module_identifier_properties.shaderModuleIdentifierAlgorithmUUID, + VK_UUID_SIZE) != 0) + { + if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_LOG) + INFO("Rejecting pipeline library due to shaderModuleIdentifierAlgorithmUUID mismatch.\n"); + return D3D12_ERROR_DRIVER_VERSION_MISMATCH; + } + } + total_toc_entries = header->pipeline_count + header->spirv_count + header->driver_cache_count; header_entry_size = offsetof(struct vkd3d_serialized_pipeline_library_toc, entries) + @@ -2271,6 +2412,11 @@ static HRESULT d3d12_pipeline_library_init(struct d3d12_pipeline_library *pipeli pipeline_library->internal_refcount = 1; pipeline_library->flags = flags; + /* Mutually exclusive features. */ + if ((flags & VKD3D_PIPELINE_LIBRARY_FLAG_USE_PIPELINE_CACHE_UUID) && + (flags & VKD3D_PIPELINE_LIBRARY_FLAG_SHADER_IDENTIFIER)) + return E_INVALIDARG; + if (!blob_length && blob) return E_INVALIDARG; @@ -3004,7 +3150,9 @@ HRESULT vkd3d_pipeline_library_init_disk_cache(struct vkd3d_pipeline_library_dis if (!(vkd3d_config_flags & VKD3D_CONFIG_FLAG_SHADER_CACHE_SYNC)) flags |= VKD3D_PIPELINE_LIBRARY_FLAG_STREAM_ARCHIVE_PARSE_ASYNC; - if (!(vkd3d_config_flags & VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_NO_SERIALIZE_SPIRV)) + if (device->device_info.shader_module_identifier_features.shaderModuleIdentifier) + flags |= VKD3D_PIPELINE_LIBRARY_FLAG_SHADER_IDENTIFIER; + else if (!(vkd3d_config_flags & VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_NO_SERIALIZE_SPIRV)) flags |= VKD3D_PIPELINE_LIBRARY_FLAG_SAVE_FULL_SPIRV; /* For internal caches, we're mostly just concerned with caching SPIR-V. diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index b6f29dac..718921f0 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -119,6 +119,8 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = VK_EXTENSION(EXT_SHADER_IMAGE_ATOMIC_INT64, EXT_shader_image_atomic_int64), VK_EXTENSION(EXT_SCALAR_BLOCK_LAYOUT, EXT_scalar_block_layout), VK_EXTENSION(EXT_PIPELINE_CREATION_FEEDBACK, EXT_pipeline_creation_feedback), + VK_EXTENSION(EXT_PIPELINE_CREATION_CACHE_CONTROL, EXT_pipeline_creation_cache_control), + VK_EXTENSION(EXT_SHADER_MODULE_IDENTIFIER, EXT_shader_module_identifier), /* AMD extensions */ VK_EXTENSION(AMD_BUFFER_MARKER, AMD_buffer_marker), VK_EXTENSION(AMD_DEVICE_COHERENT_MEMORY, AMD_device_coherent_memory), @@ -1520,6 +1522,23 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i vk_prepend_struct(&info->features2, &info->device_coherent_memory_features_amd); } + if (vulkan_info->EXT_pipeline_creation_cache_control) + { + info->pipeline_creation_cache_control_features.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_CREATION_CACHE_CONTROL_FEATURES; + vk_prepend_struct(&info->features2, &info->pipeline_creation_cache_control_features); + } + + if (vulkan_info->EXT_shader_module_identifier) + { + info->shader_module_identifier_features.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_MODULE_IDENTIFIER_FEATURES_EXT; + info->shader_module_identifier_properties.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_MODULE_IDENTIFIER_PROPERTIES_EXT; + vk_prepend_struct(&info->features2, &info->shader_module_identifier_features); + vk_prepend_struct(&info->properties2, &info->shader_module_identifier_properties); + } + /* Core in Vulkan 1.1. */ info->shader_draw_parameters_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES; vk_prepend_struct(&info->features2, &info->shader_draw_parameters_features); diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index 93fbaf4a..c50f53ea 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -1846,7 +1846,7 @@ static ULONG STDMETHODCALLTYPE d3d12_pipeline_state_AddRef(ID3D12PipelineState * } static HRESULT d3d12_pipeline_state_create_shader_module(struct d3d12_device *device, - VkPipelineShaderStageCreateInfo *stage_desc, const struct vkd3d_shader_code *code) + VkShaderModule *vk_module, const struct vkd3d_shader_code *code) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; VkShaderModuleCreateInfo shader_desc; @@ -1854,7 +1854,7 @@ static HRESULT d3d12_pipeline_state_create_shader_module(struct d3d12_device *de VkResult vr; /* If we kept the module around, no need to create it again. */ - if (stage_desc->module != VK_NULL_HANDLE) + if (*vk_module != VK_NULL_HANDLE) return S_OK; shader_desc.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; @@ -1863,7 +1863,7 @@ static HRESULT d3d12_pipeline_state_create_shader_module(struct d3d12_device *de shader_desc.codeSize = code->size; shader_desc.pCode = code->code; - vr = VK_CALL(vkCreateShaderModule(device->vk_device, &shader_desc, NULL, &stage_desc->module)); + vr = VK_CALL(vkCreateShaderModule(device->vk_device, &shader_desc, NULL, vk_module)); if (vr < 0) { WARN("Failed to create Vulkan shader module, vr %d.\n", vr); @@ -1872,7 +1872,7 @@ static HRESULT d3d12_pipeline_state_create_shader_module(struct d3d12_device *de /* Helpful for tooling like RenderDoc. */ sprintf(hash_str, "%016"PRIx64, code->meta.hash); - vkd3d_set_vk_object_name(device, (uint64_t)stage_desc->module, VK_OBJECT_TYPE_SHADER_MODULE, hash_str); + vkd3d_set_vk_object_name(device, (uint64_t)*vk_module, VK_OBJECT_TYPE_SHADER_MODULE, hash_str); return S_OK; } @@ -1940,6 +1940,75 @@ static void d3d12_pipeline_state_set_name(struct d3d12_pipeline_state *state, co } } +static void vkd3d_shader_transform_feedback_info_free(struct vkd3d_shader_transform_feedback_info *xfb_info) +{ + unsigned int i; + + if (!xfb_info) + return; + + for (i = 0; i < xfb_info->element_count; i++) + vkd3d_free((void*)xfb_info->elements[i].semantic_name); + vkd3d_free((void*)xfb_info->elements); + vkd3d_free((void*)xfb_info->buffer_strides); + vkd3d_free(xfb_info); +} + +static void d3d12_pipeline_state_free_cached_desc(struct d3d12_graphics_pipeline_state_cached_desc *cached_desc) +{ + unsigned int i; + vkd3d_shader_transform_feedback_info_free(cached_desc->xfb_info); + + while (cached_desc->bytecode_duped_mask) + { + i = vkd3d_bitmask_iter32(&cached_desc->bytecode_duped_mask); + vkd3d_free((void*)cached_desc->bytecode[i].pShaderBytecode); + } +} + +static struct vkd3d_shader_transform_feedback_info *vkd3d_shader_transform_feedback_info_dup( + const D3D12_STREAM_OUTPUT_DESC *so_desc) +{ + struct vkd3d_shader_transform_feedback_element *new_entries = NULL; + struct vkd3d_shader_transform_feedback_info *xfb_info; + unsigned int *new_buffer_strides = NULL; + unsigned int num_duped = 0; + unsigned int i; + + xfb_info = vkd3d_calloc(1, sizeof(*xfb_info)); + if (!xfb_info) + return NULL; + + new_buffer_strides = malloc(so_desc->NumStrides * sizeof(*new_buffer_strides)); + if (!new_buffer_strides) + goto fail; + memcpy(new_buffer_strides, so_desc->pBufferStrides, so_desc->NumStrides * sizeof(*new_buffer_strides)); + xfb_info->buffer_strides = new_buffer_strides; + + new_entries = malloc(so_desc->NumEntries * sizeof(*new_entries)); + if (!new_entries) + goto fail; + memcpy(new_entries, so_desc->pSODeclaration, so_desc->NumEntries * sizeof(*new_entries)); + xfb_info->elements = new_entries; + + for (i = 0; i < so_desc->NumEntries; i++, num_duped++) + if (!(new_entries[i].semantic_name = vkd3d_strdup(new_entries[i].semantic_name))) + goto fail; + + xfb_info->buffer_stride_count = so_desc->NumStrides; + xfb_info->element_count = so_desc->NumEntries; + + return xfb_info; + +fail: + for (i = 0; i < num_duped; i++) + vkd3d_free((void*)new_entries[i].semantic_name); + vkd3d_free(new_buffer_strides); + vkd3d_free(new_entries); + vkd3d_free(xfb_info); + return NULL; +} + void d3d12_pipeline_state_dec_ref(struct d3d12_pipeline_state *state) { struct d3d12_device *device = state->device; @@ -1958,9 +2027,12 @@ void d3d12_pipeline_state_dec_ref(struct d3d12_pipeline_state *state) VK_CALL(vkDestroyPipelineCache(device->vk_device, state->vk_pso_cache, NULL)); - if (state->private_root_signature) - d3d12_root_signature_dec_ref(state->private_root_signature); + if (state->root_signature) + d3d12_root_signature_dec_ref(state->root_signature); + if (state->vk_bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) + d3d12_pipeline_state_free_cached_desc(&state->graphics.cached_desc); + rwlock_destroy(&state->lock); vkd3d_free(state); } } @@ -2082,10 +2154,16 @@ CONST_VTBL struct ID3D12PipelineStateVtbl d3d12_pipeline_state_vtbl = static HRESULT vkd3d_load_spirv_from_cached_state(struct d3d12_device *device, const struct d3d12_cached_pipeline_state *cached_state, - VkShaderStageFlagBits stage, struct vkd3d_shader_code *spirv_code) + VkShaderStageFlagBits stage, struct vkd3d_shader_code *spirv_code, + VkPipelineShaderStageModuleIdentifierCreateInfoEXT *identifier) { HRESULT hr; + identifier->sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_MODULE_IDENTIFIER_CREATE_INFO_EXT; + identifier->pNext = NULL; + identifier->identifierSize = 0; + identifier->pIdentifier = NULL; + if (!cached_state->blob.CachedBlobSizeInBytes) { if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_LOG) @@ -2096,7 +2174,7 @@ static HRESULT vkd3d_load_spirv_from_cached_state(struct d3d12_device *device, if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_IGNORE_SPIRV) return E_FAIL; - hr = vkd3d_get_cached_spirv_code_from_d3d12_desc(cached_state, stage, spirv_code); + hr = vkd3d_get_cached_spirv_code_from_d3d12_desc(cached_state, stage, spirv_code, identifier); if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_LOG) { @@ -2116,56 +2194,73 @@ static HRESULT vkd3d_load_spirv_from_cached_state(struct d3d12_device *device, return hr; } -static HRESULT vkd3d_create_shader_stage(struct d3d12_device *device, +static void d3d12_pipeline_state_init_shader_interface(struct d3d12_pipeline_state *state, + struct d3d12_device *device, + VkShaderStageFlagBits stage, + struct vkd3d_shader_interface_info *shader_interface) +{ + const struct d3d12_root_signature *root_signature = state->root_signature; + shader_interface->flags = d3d12_root_signature_get_shader_interface_flags(root_signature); + shader_interface->min_ssbo_alignment = d3d12_device_get_ssbo_alignment(device); + shader_interface->descriptor_tables.offset = root_signature->descriptor_table_offset; + shader_interface->descriptor_tables.count = root_signature->descriptor_table_count; + shader_interface->bindings = root_signature->bindings; + shader_interface->binding_count = root_signature->binding_count; + shader_interface->push_constant_buffers = root_signature->root_constants; + shader_interface->push_constant_buffer_count = root_signature->root_constant_count; + shader_interface->push_constant_ubo_binding = &root_signature->push_constant_ubo_binding; + shader_interface->offset_buffer_binding = &root_signature->offset_buffer_binding; + shader_interface->stage = stage; + shader_interface->xfb_info = + (stage != VK_SHADER_STAGE_COMPUTE_BIT && stage == state->graphics.cached_desc.xfb_stage) ? + state->graphics.cached_desc.xfb_info : NULL; +#ifdef VKD3D_ENABLE_DESCRIPTOR_QA + shader_interface->descriptor_qa_global_binding = &root_signature->descriptor_qa_global_info; + shader_interface->descriptor_qa_heap_binding = &root_signature->descriptor_qa_heap_binding; +#endif +} + +static void d3d12_pipeline_state_init_compile_arguments(struct d3d12_pipeline_state *state, + struct d3d12_device *device, VkShaderStageFlagBits stage, + struct vkd3d_shader_compile_arguments *compile_arguments) +{ + memset(compile_arguments, 0, sizeof(*compile_arguments)); + compile_arguments->target = VKD3D_SHADER_TARGET_SPIRV_VULKAN_1_0; + compile_arguments->target_extension_count = device->vk_info.shader_extension_count; + compile_arguments->target_extensions = device->vk_info.shader_extensions; + compile_arguments->quirks = &vkd3d_shader_quirk_info; + + if (stage == VK_SHADER_STAGE_FRAGMENT_BIT) + { + /* Options which are exclusive to PS. Especially output swizzles must only be used in PS. */ + compile_arguments->parameter_count = ARRAY_SIZE(state->graphics.cached_desc.ps_shader_parameters); + compile_arguments->parameters = state->graphics.cached_desc.ps_shader_parameters; + compile_arguments->dual_source_blending = state->graphics.cached_desc.is_dual_source_blending; + compile_arguments->output_swizzles = state->graphics.cached_desc.ps_output_swizzle; + compile_arguments->output_swizzle_count = state->graphics.rt_count; + } +} + +static HRESULT vkd3d_setup_shader_stage(struct d3d12_pipeline_state *state, struct d3d12_device *device, VkPipelineShaderStageCreateInfo *stage_desc, VkShaderStageFlagBits stage, VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *required_subgroup_size_info, - const D3D12_SHADER_BYTECODE *code, - const struct vkd3d_shader_interface_info *shader_interface, - const struct vkd3d_shader_compile_arguments *compile_args, struct vkd3d_shader_code *spirv_code) + const VkPipelineShaderStageModuleIdentifierCreateInfoEXT *identifier_create_info, + const struct vkd3d_shader_code *spirv_code) { - struct vkd3d_shader_code dxbc = {code->pShaderBytecode, code->BytecodeLength}; - vkd3d_shader_hash_t recovered_hash = 0; - vkd3d_shader_hash_t compiled_hash = 0; - int ret; - stage_desc->sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; stage_desc->pNext = NULL; stage_desc->flags = 0; stage_desc->stage = stage; stage_desc->pName = "main"; stage_desc->pSpecializationInfo = NULL; - - if (spirv_code->code && (vkd3d_config_flags & VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_SANITIZE_SPIRV)) - { - recovered_hash = vkd3d_shader_hash(spirv_code); - vkd3d_shader_free_shader_code(spirv_code); - memset(spirv_code, 0, sizeof(*spirv_code)); - } - - if (!spirv_code->code) - { - TRACE("Calling vkd3d_shader_compile_dxbc.\n"); - if ((ret = vkd3d_shader_compile_dxbc(&dxbc, spirv_code, 0, shader_interface, compile_args)) < 0) - { - WARN("Failed to compile shader, vkd3d result %d.\n", ret); - return hresult_from_vkd3d_result(ret); - } - TRACE("Called vkd3d_shader_compile_dxbc.\n"); - } - - /* Debug compare SPIR-V we got from cache, and SPIR-V we got from compilation. */ - if (recovered_hash) - { - compiled_hash = vkd3d_shader_hash(spirv_code); - if (compiled_hash == recovered_hash) - INFO("SPIR-V match for cache reference OK!\n"); - else - INFO("SPIR-V mismatch for cache reference!\n"); - } + stage_desc->module = VK_NULL_HANDLE; if (!d3d12_device_validate_shader_meta(device, &spirv_code->meta)) return E_INVALIDARG; + if (identifier_create_info && identifier_create_info->identifierSize) + stage_desc->pNext = identifier_create_info; + if (((spirv_code->meta.flags & VKD3D_SHADER_META_FLAG_USES_SUBGROUP_SIZE) && device->device_info.subgroup_size_control_features.subgroupSizeControl) || spirv_code->meta.cs_required_wave_size) @@ -2175,6 +2270,9 @@ static HRESULT vkd3d_create_shader_stage(struct d3d12_device *device, if (required_subgroup_size_info) { + required_subgroup_size_info->sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT; + required_subgroup_size_info->pNext = (void*)stage_desc->pNext; + if (spirv_code->meta.cs_required_wave_size) { /* [WaveSize(N)] attribute in SM 6.6. */ @@ -2194,8 +2292,6 @@ static HRESULT vkd3d_create_shader_stage(struct d3d12_device *device, stage_desc->flags &= ~VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT; } - required_subgroup_size_info->sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT; - required_subgroup_size_info->pNext = NULL; required_subgroup_size_info->requiredSubgroupSize = subgroup_size_alignment; } @@ -2209,8 +2305,118 @@ static HRESULT vkd3d_create_shader_stage(struct d3d12_device *device, } } - stage_desc->module = VK_NULL_HANDLE; - return d3d12_pipeline_state_create_shader_module(device, stage_desc, spirv_code); + if (identifier_create_info && identifier_create_info->identifierSize == 0) + return d3d12_pipeline_state_create_shader_module(device, &stage_desc->module, spirv_code); + else + return S_OK; +} + +static HRESULT vkd3d_compile_shader_stage(struct d3d12_pipeline_state *state, struct d3d12_device *device, + VkShaderStageFlagBits stage, const D3D12_SHADER_BYTECODE *code, struct vkd3d_shader_code *spirv_code) +{ + struct vkd3d_shader_code dxbc = {code->pShaderBytecode, code->BytecodeLength}; + struct vkd3d_shader_interface_info shader_interface; + struct vkd3d_shader_compile_arguments compile_args; + vkd3d_shader_hash_t recovered_hash = 0; + vkd3d_shader_hash_t compiled_hash = 0; + int ret; + + if (spirv_code->code && (vkd3d_config_flags & VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_SANITIZE_SPIRV)) + { + recovered_hash = vkd3d_shader_hash(spirv_code); + vkd3d_shader_free_shader_code(spirv_code); + memset(spirv_code, 0, sizeof(*spirv_code)); + } + + if (!spirv_code->code) + { + TRACE("Calling vkd3d_shader_compile_dxbc.\n"); + + d3d12_pipeline_state_init_shader_interface(state, device, stage, &shader_interface); + d3d12_pipeline_state_init_compile_arguments(state, device, stage, &compile_args); + + if ((ret = vkd3d_shader_compile_dxbc(&dxbc, spirv_code, 0, &shader_interface, &compile_args)) < 0) + { + WARN("Failed to compile shader, vkd3d result %d.\n", ret); + return hresult_from_vkd3d_result(ret); + } + TRACE("Called vkd3d_shader_compile_dxbc.\n"); + } + + /* Debug compare SPIR-V we got from cache, and SPIR-V we got from compilation. */ + if (recovered_hash) + { + compiled_hash = vkd3d_shader_hash(spirv_code); + if (compiled_hash == recovered_hash) + INFO("SPIR-V match for cache reference OK!\n"); + else + INFO("SPIR-V mismatch for cache reference!\n"); + } + + return S_OK; +} + +static HRESULT vkd3d_late_compile_shader_stages(struct d3d12_pipeline_state *state) +{ + /* We are at risk of having to compile pipelines late if we return from CreatePipelineState without + * either code[i] or module being non-null. */ + struct d3d12_graphics_pipeline_state *graphics = &state->graphics; + bool need_compile = false; + unsigned int i; + HRESULT hr; + + rwlock_lock_read(&state->lock); + for (i = 0; i < graphics->stage_count; i++) + { + if (!graphics->code[i].size && graphics->stages[i].module == VK_NULL_HANDLE && + graphics->cached_desc.bytecode[i].BytecodeLength) + { + need_compile = true; + break; + } + } + rwlock_unlock_read(&state->lock); + + if (!need_compile) + return S_OK; + + /* Taking a writer lock here is kinda horrible, + * but we really shouldn't hit this path except in extreme circumstances. */ + hr = S_OK; + rwlock_lock_write(&state->lock); + for (i = 0; i < graphics->stage_count; i++) + { + graphics->identifier_create_infos[i].identifierSize = 0; + + if (graphics->stages[i].module == VK_NULL_HANDLE && !graphics->code[i].size && + graphics->cached_desc.bytecode[i].BytecodeLength) + { + if (FAILED(hr = vkd3d_compile_shader_stage(state, state->device, graphics->cached_desc.bytecode_stages[i], + &graphics->cached_desc.bytecode[i], &graphics->code[i]))) + break; + } + + if (FAILED(hr = d3d12_pipeline_state_create_shader_module(state->device, &graphics->stages[i].module, + &graphics->code[i]))) + break; + + /* We'll keep the module around here, no need to keep code/size pairs around for this. + * If we're in a situation where late compile is relevant, we're using PSO cached blobs, + * so we never expect to serialize out SPIR-V either way. */ + vkd3d_shader_free_shader_code(&graphics->code[i]); + graphics->code[i].code = NULL; + graphics->code[i].size = 0; + + /* Don't need the DXBC blob anymore either. */ + if (graphics->cached_desc.bytecode_duped_mask & (1u << i)) + { + vkd3d_free((void*)graphics->cached_desc.bytecode[i].pShaderBytecode); + memset(&graphics->cached_desc.bytecode[i], 0, sizeof(graphics->cached_desc.bytecode[i])); + graphics->cached_desc.bytecode_duped_mask &= ~(1u << i); + } + } + rwlock_unlock_write(&state->lock); + return hr; } static void vkd3d_report_pipeline_creation_feedback_results(const VkPipelineCreationFeedbackCreateInfoEXT *feedback) @@ -2254,38 +2460,53 @@ static void vkd3d_report_pipeline_creation_feedback_results(const VkPipelineCrea } } -static HRESULT vkd3d_create_compute_pipeline(struct d3d12_device *device, - const D3D12_SHADER_BYTECODE *code, - const struct vkd3d_shader_interface_info *shader_interface, - VkPipelineLayout vk_pipeline_layout, VkPipelineCache vk_cache, VkPipeline *vk_pipeline, - struct vkd3d_shader_code *spirv_code) +static HRESULT vkd3d_create_compute_pipeline(struct d3d12_pipeline_state *state, + struct d3d12_device *device, + const D3D12_SHADER_BYTECODE *code) { VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT required_subgroup_size_info; const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; VkPipelineCreationFeedbackCreateInfoEXT feedback_info; struct vkd3d_shader_debug_ring_spec_info spec_info; - struct vkd3d_shader_compile_arguments compile_args; VkPipelineCreationFeedbackEXT feedbacks[1]; VkComputePipelineCreateInfo pipeline_info; VkPipelineCreationFeedbackEXT feedback; + struct vkd3d_shader_code *spirv_code; + VkPipelineCache vk_cache; VkResult vr; HRESULT hr; - memset(&compile_args, 0, sizeof(compile_args)); - compile_args.target_extensions = device->vk_info.shader_extensions; - compile_args.target_extension_count = device->vk_info.shader_extension_count; - compile_args.target = VKD3D_SHADER_TARGET_SPIRV_VULKAN_1_0; - compile_args.quirks = &vkd3d_shader_quirk_info; + vk_cache = state->vk_pso_cache; + spirv_code = &state->compute.code; pipeline_info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; pipeline_info.pNext = NULL; pipeline_info.flags = 0; - if (FAILED(hr = vkd3d_create_shader_stage(device, - &pipeline_info.stage, - VK_SHADER_STAGE_COMPUTE_BIT, &required_subgroup_size_info, - code, shader_interface, &compile_args, spirv_code))) + + if (state->compute.identifier_create_info.identifierSize == 0) + { + if (FAILED(hr = vkd3d_compile_shader_stage(state, device, + VK_SHADER_STAGE_COMPUTE_BIT, code, spirv_code))) + return hr; + } + + if (FAILED(hr = vkd3d_setup_shader_stage(state, device, + &pipeline_info.stage, VK_SHADER_STAGE_COMPUTE_BIT, + &required_subgroup_size_info, + &state->compute.identifier_create_info, + spirv_code))) return hr; - pipeline_info.layout = vk_pipeline_layout; + + if (pipeline_info.stage.module != VK_NULL_HANDLE && + device->device_info.shader_module_identifier_features.shaderModuleIdentifier) + { + state->compute.identifier.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_IDENTIFIER_EXT; + state->compute.identifier.pNext = NULL; + VK_CALL(vkGetShaderModuleIdentifierEXT(device->vk_device, pipeline_info.stage.module, + &state->compute.identifier)); + } + + pipeline_info.layout = state->root_signature->compute.vk_pipeline_layout; pipeline_info.basePipelineHandle = VK_NULL_HANDLE; pipeline_info.basePipelineIndex = -1; @@ -2310,8 +2531,43 @@ static HRESULT vkd3d_create_compute_pipeline(struct d3d12_device *device, else feedback_info.pipelineStageCreationFeedbackCount = 0; + if (pipeline_info.stage.module == VK_NULL_HANDLE) + pipeline_info.flags |= VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT; + vr = VK_CALL(vkCreateComputePipelines(device->vk_device, - vk_cache, 1, &pipeline_info, NULL, vk_pipeline)); + vk_cache, 1, &pipeline_info, NULL, &state->compute.vk_pipeline)); + + if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_LOG) + { + if (pipeline_info.flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT) + { + if (vr == VK_SUCCESS) + INFO("[IDENTIFIER] Successfully created compute pipeline from identifier.\n"); + else if (vr == VK_PIPELINE_COMPILE_REQUIRED_EXT) + INFO("[IDENTIFIER] Failed to create compute pipeline from identifier, falling back ...\n"); + } + else + INFO("[IDENTIFIER] None compute.\n"); + } + + /* Fallback. */ + if (vr == VK_PIPELINE_COMPILE_REQUIRED_EXT) + { + pipeline_info.flags &= ~VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT; + + if (FAILED(hr = vkd3d_compile_shader_stage(state, device, + VK_SHADER_STAGE_COMPUTE_BIT, code, spirv_code))) + return hr; + + if (FAILED(hr = vkd3d_setup_shader_stage(state, device, + &pipeline_info.stage, VK_SHADER_STAGE_COMPUTE_BIT, + &required_subgroup_size_info, NULL, + spirv_code))) + return hr; + + vr = VK_CALL(vkCreateComputePipelines(device->vk_device, + vk_cache, 1, &pipeline_info, NULL, &state->compute.vk_pipeline)); + } TRACE("Called vkCreateComputePipelines.\n"); VK_CALL(vkDestroyShaderModule(device->vk_device, pipeline_info.stage.module, NULL)); @@ -2333,32 +2589,12 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; struct vkd3d_shader_interface_info shader_interface; - const struct d3d12_root_signature *root_signature; HRESULT hr; state->vk_bind_point = VK_PIPELINE_BIND_POINT_COMPUTE; - if (desc->root_signature) - root_signature = impl_from_ID3D12RootSignature(desc->root_signature); - else - root_signature = state->private_root_signature; - - shader_interface.flags = d3d12_root_signature_get_shader_interface_flags(root_signature); - shader_interface.min_ssbo_alignment = d3d12_device_get_ssbo_alignment(device); - shader_interface.descriptor_tables.offset = root_signature->descriptor_table_offset; - shader_interface.descriptor_tables.count = root_signature->descriptor_table_count; - shader_interface.bindings = root_signature->bindings; - shader_interface.binding_count = root_signature->binding_count; - shader_interface.push_constant_buffers = root_signature->root_constants; - shader_interface.push_constant_buffer_count = root_signature->root_constant_count; - shader_interface.push_constant_ubo_binding = &root_signature->push_constant_ubo_binding; - shader_interface.offset_buffer_binding = &root_signature->offset_buffer_binding; - shader_interface.stage = VK_SHADER_STAGE_COMPUTE_BIT; - shader_interface.xfb_info = NULL; -#ifdef VKD3D_ENABLE_DESCRIPTOR_QA - shader_interface.descriptor_qa_global_binding = &root_signature->descriptor_qa_global_info; - shader_interface.descriptor_qa_heap_binding = &root_signature->descriptor_qa_heap_binding; -#endif + d3d12_pipeline_state_init_shader_interface(state, device, + VK_SHADER_STAGE_COMPUTE_BIT, &shader_interface); if (!(vkd3d_config_flags & VKD3D_CONFIG_FLAG_GLOBAL_PIPELINE_CACHE)) { @@ -2370,14 +2606,10 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st } vkd3d_load_spirv_from_cached_state(device, cached_pso, - VK_SHADER_STAGE_COMPUTE_BIT, &state->compute.code); + VK_SHADER_STAGE_COMPUTE_BIT, &state->compute.code, + &state->compute.identifier_create_info); - hr = vkd3d_create_compute_pipeline(device, - &desc->cs, &shader_interface, - root_signature->compute.vk_pipeline_layout, - state->vk_pso_cache, - &state->compute.vk_pipeline, - &state->compute.code); + hr = vkd3d_create_compute_pipeline(state, device, &desc->cs); if (FAILED(hr)) { @@ -3038,32 +3270,118 @@ static HRESULT d3d12_pipeline_state_validate_blend_state(struct d3d12_pipeline_s return S_OK; } -static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *state, - struct d3d12_device *device, const struct d3d12_pipeline_state_desc *desc, +static void d3d12_pipeline_state_graphics_load_spirv_from_cached_state( + struct d3d12_pipeline_state *state, struct d3d12_device *device, + const struct d3d12_pipeline_state_desc *desc, const struct d3d12_cached_pipeline_state *cached_pso) +{ + struct d3d12_graphics_pipeline_state *graphics = &state->graphics; + unsigned int i, j; + + /* We only accept SPIR-V from cache if we can successfully load all shaders. + * We cannot partially fall back since we cannot handle any situation where we need inter-stage code-gen fixups. + * In this situation, just generate full SPIR-V from scratch. + * This really shouldn't happen unless we have corrupt cache entries. */ + for (i = 0; i < graphics->stage_count; i++) + { + if (FAILED(vkd3d_load_spirv_from_cached_state(device, cached_pso, + graphics->cached_desc.bytecode_stages[i], &graphics->code[i], + &graphics->identifier_create_infos[i]))) + { + for (j = 0; j < i; j++) + { + if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_LOG) + INFO("Discarding cached SPIR-V for stage #%x.\n", graphics->cached_desc.bytecode_stages[i]); + vkd3d_shader_free_shader_code(&graphics->code[j]); + memset(&graphics->code[j], 0, sizeof(graphics->code[j])); + } + break; + } + } +} + +static HRESULT d3d12_pipeline_state_graphics_create_shader_stages( + struct d3d12_pipeline_state *state, struct d3d12_device *device, + const struct d3d12_pipeline_state_desc *desc) +{ + struct d3d12_graphics_pipeline_state *graphics = &state->graphics; + unsigned int i; + HRESULT hr; + + /* Now create the actual shader modules. If we managed to load SPIR-V from cache, use that directly. + * Make sure we don't reset graphics->stage_count since that is a potential memory leak if + * we fail to create shader module for whatever reason. */ + for (i = 0; i < graphics->stage_count; i++) + { + if (graphics->identifier_create_infos[i].identifierSize == 0) + { + if (FAILED(hr = vkd3d_compile_shader_stage(state, device, + graphics->cached_desc.bytecode_stages[i], + &graphics->cached_desc.bytecode[i], &graphics->code[i]))) + return hr; + } + + if (FAILED(hr = vkd3d_setup_shader_stage(state, device, + &graphics->stages[i], + graphics->cached_desc.bytecode_stages[i], + NULL, &graphics->identifier_create_infos[i], + &graphics->code[i]))) + return hr; + } + + return S_OK; +} + +static void d3d12_pipeline_state_graphics_handle_meta(struct d3d12_pipeline_state *state, + struct d3d12_device *device) +{ + struct d3d12_graphics_pipeline_state *graphics = &state->graphics; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + unsigned int i; + + for (i = 0; i < graphics->stage_count; i++) + { + if (graphics->cached_desc.bytecode_stages[i] == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) + graphics->patch_vertex_count = graphics->code[i].meta.patch_vertex_count; + + if ((graphics->code[i].meta.flags & VKD3D_SHADER_META_FLAG_REPLACED) && + device->debug_ring.active) + { + vkd3d_shader_debug_ring_init_spec_constant(device, + &graphics->spec_info[i], + graphics->code[i].meta.hash); + graphics->stages[i].pSpecializationInfo = &graphics->spec_info[i].spec_info; + } + + if (graphics->stages[i].module != VK_NULL_HANDLE && + device->device_info.shader_module_identifier_features.shaderModuleIdentifier) + { + state->graphics.identifiers[i].sType = VK_STRUCTURE_TYPE_SHADER_MODULE_IDENTIFIER_EXT; + state->graphics.identifiers[i].pNext = NULL; + VK_CALL(vkGetShaderModuleIdentifierEXT(device->vk_device, graphics->stages[i].module, + &state->graphics.identifiers[i])); + } + } +} + +static HRESULT d3d12_pipeline_state_init_graphics_create_info(struct d3d12_pipeline_state *state, + struct d3d12_device *device, const struct d3d12_pipeline_state_desc *desc) { const VkPhysicalDeviceFeatures *features = &device->device_info.features2.features; - unsigned int ps_output_swizzle[D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT]; - struct vkd3d_shader_compile_arguments compile_args, ps_compile_args; struct d3d12_graphics_pipeline_state *graphics = &state->graphics; const D3D12_STREAM_OUTPUT_DESC *so_desc = &desc->stream_output; VkVertexInputBindingDivisorDescriptionEXT *binding_divisor; const struct vkd3d_vulkan_info *vk_info = &device->vk_info; uint32_t instance_divisors[D3D12_VS_INPUT_REGISTER_COUNT]; uint32_t aligned_offsets[D3D12_VS_INPUT_REGISTER_COUNT]; - struct vkd3d_shader_parameter ps_shader_parameters[1]; - struct vkd3d_shader_transform_feedback_info xfb_info; - struct vkd3d_shader_interface_info shader_interface; - const struct d3d12_root_signature *root_signature; - bool have_attachment, can_compile_pipeline_early; struct vkd3d_shader_signature output_signature; struct vkd3d_shader_signature input_signature; - VkShaderStageFlagBits xfb_stage = 0; VkSampleCountFlagBits sample_count; const struct vkd3d_format *format; - unsigned int i, j, stage_count; unsigned int instance_divisor; VkVertexInputRate input_rate; + bool have_attachment; + unsigned int i, j; size_t rt_count; uint32_t mask; HRESULT hr; @@ -3074,7 +3392,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s enum VkShaderStageFlagBits stage; ptrdiff_t offset; } - shader_stages[] = + shader_stages_lut[] = { {VK_SHADER_STAGE_VERTEX_BIT, offsetof(struct d3d12_pipeline_state_desc, vs)}, {VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT, offsetof(struct d3d12_pipeline_state_desc, hs)}, @@ -3084,6 +3402,8 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s }; state->vk_bind_point = VK_PIPELINE_BIND_POINT_GRAPHICS; + /* Defer taking ref-count until completion. */ + state->device = device; graphics->stage_count = 0; graphics->primitive_topology_type = desc->primitive_topology_type; @@ -3100,11 +3420,6 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s } } - if (desc->root_signature) - root_signature = impl_from_ID3D12RootSignature(desc->root_signature); - else - root_signature = state->private_root_signature; - sample_count = vk_samples_from_dxgi_sample_desc(&desc->sample_desc); if (desc->sample_desc.Count != 1 && desc->sample_desc.Quality) WARN("Ignoring sample quality %u.\n", desc->sample_desc.Quality); @@ -3116,6 +3431,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s rt_count, ARRAY_SIZE(graphics->blend_attachments)); rt_count = ARRAY_SIZE(graphics->blend_attachments); } + graphics->rt_count = rt_count; if (!desc->ps.pShaderBytecode || !desc->ps.BytecodeLength) { @@ -3134,12 +3450,12 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s if (desc->rtv_formats.RTFormats[i] == DXGI_FORMAT_UNKNOWN) { graphics->null_attachment_mask |= 1u << i; - ps_output_swizzle[i] = VKD3D_NO_SWIZZLE; + graphics->cached_desc.ps_output_swizzle[i] = VKD3D_NO_SWIZZLE; graphics->rtv_formats[i] = VK_FORMAT_UNDEFINED; } else if ((format = vkd3d_get_format(device, desc->rtv_formats.RTFormats[i], false))) { - ps_output_swizzle[i] = vkd3d_get_rt_format_swizzle(format); + graphics->cached_desc.ps_output_swizzle[i] = vkd3d_get_rt_format_swizzle(format); graphics->rtv_formats[i] = format->vk_format; graphics->rtv_active_mask |= 1u << i; } @@ -3172,7 +3488,6 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s for (i = rt_count; i < ARRAY_SIZE(graphics->rtv_formats); ++i) graphics->rtv_formats[i] = VK_FORMAT_UNDEFINED; - graphics->rt_count = rt_count; blend_desc_from_d3d12(&graphics->blend_desc, &desc->blend_state, graphics->rt_count, graphics->blend_attachments); @@ -3243,26 +3558,13 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s } } - ps_shader_parameters[0].name = VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT; - ps_shader_parameters[0].type = VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT; - ps_shader_parameters[0].data_type = VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32; - ps_shader_parameters[0].immediate_constant.u32 = sample_count; + graphics->cached_desc.ps_shader_parameters[0].name = VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT; + graphics->cached_desc.ps_shader_parameters[0].type = VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT; + graphics->cached_desc.ps_shader_parameters[0].data_type = VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32; + graphics->cached_desc.ps_shader_parameters[0].immediate_constant.u32 = sample_count; + graphics->cached_desc.is_dual_source_blending = is_dual_source_blending(&desc->blend_state.RenderTarget[0]); - memset(&compile_args, 0, sizeof(compile_args)); - compile_args.target = VKD3D_SHADER_TARGET_SPIRV_VULKAN_1_0; - compile_args.target_extension_count = vk_info->shader_extension_count; - compile_args.target_extensions = vk_info->shader_extensions; - compile_args.quirks = &vkd3d_shader_quirk_info; - - /* Options which are exclusive to PS. Especially output swizzles must only be used in PS. */ - ps_compile_args = compile_args; - ps_compile_args.parameter_count = ARRAY_SIZE(ps_shader_parameters); - ps_compile_args.parameters = ps_shader_parameters; - ps_compile_args.dual_source_blending = is_dual_source_blending(&desc->blend_state.RenderTarget[0]); - ps_compile_args.output_swizzles = ps_output_swizzle; - ps_compile_args.output_swizzle_count = rt_count; - - if (ps_compile_args.dual_source_blending) + if (graphics->cached_desc.is_dual_source_blending) { /* If we're using dual source blending, we can only safely write to MRT 0. * Be defensive about programs which do not do this for us. */ @@ -3276,7 +3578,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s graphics->xfb_enabled = false; if (so_desc->NumEntries) { - if (!(root_signature->d3d12_flags & D3D12_ROOT_SIGNATURE_FLAG_ALLOW_STREAM_OUTPUT)) + if (!(state->root_signature->d3d12_flags & D3D12_ROOT_SIGNATURE_FLAG_ALLOW_STREAM_OUTPUT)) { WARN("Stream output is used without D3D12_ROOT_SIGNATURE_FLAG_ALLOW_STREAM_OUTPUT.\n"); hr = E_INVALIDARG; @@ -3291,46 +3593,34 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s } graphics->xfb_enabled = true; + graphics->cached_desc.xfb_info = vkd3d_shader_transform_feedback_info_dup(so_desc); - xfb_info.elements = (const struct vkd3d_shader_transform_feedback_element *)so_desc->pSODeclaration; - xfb_info.element_count = so_desc->NumEntries; - xfb_info.buffer_strides = so_desc->pBufferStrides; - xfb_info.buffer_stride_count = so_desc->NumStrides; + if (!graphics->cached_desc.xfb_info) + { + hr = E_OUTOFMEMORY; + goto fail; + } if (desc->gs.pShaderBytecode) - xfb_stage = VK_SHADER_STAGE_GEOMETRY_BIT; + graphics->cached_desc.xfb_stage = VK_SHADER_STAGE_GEOMETRY_BIT; else if (desc->ds.pShaderBytecode) - xfb_stage = VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; + graphics->cached_desc.xfb_stage = VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; else - xfb_stage = VK_SHADER_STAGE_VERTEX_BIT; + graphics->cached_desc.xfb_stage = VK_SHADER_STAGE_VERTEX_BIT; } - shader_interface.flags = d3d12_root_signature_get_shader_interface_flags(root_signature); - shader_interface.min_ssbo_alignment = d3d12_device_get_ssbo_alignment(device); - shader_interface.descriptor_tables.offset = root_signature->descriptor_table_offset; - shader_interface.descriptor_tables.count = root_signature->descriptor_table_count; - shader_interface.bindings = root_signature->bindings; - shader_interface.binding_count = root_signature->binding_count; - shader_interface.push_constant_buffers = root_signature->root_constants; - shader_interface.push_constant_buffer_count = root_signature->root_constant_count; - shader_interface.push_constant_ubo_binding = &root_signature->push_constant_ubo_binding; - shader_interface.offset_buffer_binding = &root_signature->offset_buffer_binding; -#ifdef VKD3D_ENABLE_DESCRIPTOR_QA - shader_interface.descriptor_qa_global_binding = &root_signature->descriptor_qa_global_info; - shader_interface.descriptor_qa_heap_binding = &root_signature->descriptor_qa_heap_binding; -#endif - graphics->patch_vertex_count = 0; - for (i = 0; i < ARRAY_SIZE(shader_stages); ++i) + /* Parse interface data from DXBC blobs. */ + for (i = 0; i < ARRAY_SIZE(shader_stages_lut); ++i) { - const D3D12_SHADER_BYTECODE *b = (const void *)((uintptr_t)desc + shader_stages[i].offset); + const D3D12_SHADER_BYTECODE *b = (const void *)((uintptr_t)desc + shader_stages_lut[i].offset); const struct vkd3d_shader_code dxbc = {b->pShaderBytecode, b->BytecodeLength}; if (!b->pShaderBytecode) continue; - switch (shader_stages[i].stage) + switch (shader_stages_lut[i].stage) { case VK_SHADER_STAGE_VERTEX_BIT: if ((ret = vkd3d_shader_parse_input_signature(&dxbc, &input_signature)) < 0) @@ -3369,70 +3659,14 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s goto fail; } + /* Not owned yet. If we return from pipeline creation without having concrete SPIR-V, + * we'll have to dupe the bytecode and potentially compile to SPIR-V late. */ + graphics->cached_desc.bytecode[graphics->stage_count] = *b; + graphics->cached_desc.bytecode_stages[graphics->stage_count] = shader_stages_lut[i].stage; + ++graphics->stage_count; } - /* We only accept SPIR-V from cache if we can successfully load all shaders. - * We cannot partially fall back since we cannot handle any situation where we need inter-stage code-gen fixups. - * In this situation, just generate full SPIR-V from scratch. - * This really shouldn't happen unless we have corrupt cache entries. */ - stage_count = 0; - for (i = 0; i < ARRAY_SIZE(shader_stages); i++) - { - const D3D12_SHADER_BYTECODE *b = (const void *)((uintptr_t)desc + shader_stages[i].offset); - if (!b->pShaderBytecode) - continue; - - if (FAILED(vkd3d_load_spirv_from_cached_state(device, cached_pso, - shader_stages[i].stage, &graphics->code[stage_count]))) - { - for (j = 0; j < stage_count; j++) - { - if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_LOG) - INFO("Discarding cached SPIR-V for stage #%x.\n", shader_stages[i].stage); - vkd3d_shader_free_shader_code(&graphics->code[j]); - memset(&graphics->code[j], 0, sizeof(graphics->code[j])); - } - break; - } - - ++stage_count; - } - - /* Now create the actual shader modules. If we managed to load SPIR-V from cache, use that directly. - * Make sure we don't reset graphics->stage_count since that is a potential memory leak if - * we fail to create shader module for whatever reason. */ - stage_count = 0; - for (i = 0; i < ARRAY_SIZE(shader_stages); i++) - { - const D3D12_SHADER_BYTECODE *b = (const void *)((uintptr_t)desc + shader_stages[i].offset); - if (!b->pShaderBytecode) - continue; - - shader_interface.xfb_info = shader_stages[i].stage == xfb_stage ? &xfb_info : NULL; - shader_interface.stage = shader_stages[i].stage; - if (FAILED(hr = vkd3d_create_shader_stage(device, - &graphics->stages[stage_count], - shader_stages[i].stage, NULL, b, &shader_interface, - shader_stages[i].stage == VK_SHADER_STAGE_FRAGMENT_BIT ? &ps_compile_args : &compile_args, - &graphics->code[stage_count]))) - goto fail; - - if (shader_stages[i].stage == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) - graphics->patch_vertex_count = graphics->code[stage_count].meta.patch_vertex_count; - - if ((graphics->code[stage_count].meta.flags & VKD3D_SHADER_META_FLAG_REPLACED) && - device->debug_ring.active) - { - vkd3d_shader_debug_ring_init_spec_constant(device, - &graphics->spec_info[stage_count], - graphics->code[stage_count].meta.hash); - graphics->stages[stage_count].pSpecializationInfo = &graphics->spec_info[stage_count].spec_info; - } - - ++stage_count; - } - graphics->attribute_count = desc->input_layout.NumElements; if (graphics->attribute_count > ARRAY_SIZE(graphics->attributes)) { @@ -3442,7 +3676,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s } if (graphics->attribute_count - && !(root_signature->d3d12_flags & D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT)) + && !(state->root_signature->d3d12_flags & D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT)) { WARN("Input layout is used without D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT.\n"); hr = E_INVALIDARG; @@ -3630,43 +3864,6 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s goto fail; } - /* If we don't know vertex count for tessellation shaders, we need to defer compilation, but this should - * be exceedingly rare. */ - can_compile_pipeline_early = - (desc->primitive_topology_type != D3D12_PRIMITIVE_TOPOLOGY_TYPE_PATCH || graphics->patch_vertex_count != 0) && - desc->primitive_topology_type != D3D12_PRIMITIVE_TOPOLOGY_TYPE_UNDEFINED; - - graphics->pipeline_layout = root_signature->graphics.vk_pipeline_layout; - graphics->pipeline = VK_NULL_HANDLE; - state->device = device; - - if (can_compile_pipeline_early) - { - if (!(vkd3d_config_flags & VKD3D_CONFIG_FLAG_GLOBAL_PIPELINE_CACHE)) - { - if ((hr = vkd3d_create_pipeline_cache_from_d3d12_desc(device, cached_pso, &state->vk_pso_cache)) < 0) - { - ERR("Failed to create pipeline cache, hr %d.\n", hr); - goto fail; - } - } - - if (!(graphics->pipeline = d3d12_pipeline_state_create_pipeline_variant(state, NULL, graphics->dsv_format, - state->vk_pso_cache, &graphics->dynamic_state_flags))) - goto fail; - } - else - { - graphics->dsv_plane_optimal_mask = d3d12_graphics_pipeline_state_get_plane_optimal_mask(graphics, NULL); - } - - list_init(&graphics->compiled_fallback_pipelines); - - if (FAILED(hr = vkd3d_private_store_init(&state->private_store))) - goto fail; - - d3d12_device_add_ref(state->device); - return S_OK; fail: @@ -3676,6 +3873,85 @@ fail: return hr; } +static HRESULT d3d12_pipeline_state_init_graphics_spirv(struct d3d12_pipeline_state *state, + const struct d3d12_pipeline_state_desc *desc, + const struct d3d12_cached_pipeline_state *cached_pso) +{ + struct d3d12_device *device = state->device; + HRESULT hr; + + d3d12_pipeline_state_graphics_load_spirv_from_cached_state(state, device, desc, cached_pso); + if (FAILED(hr = d3d12_pipeline_state_graphics_create_shader_stages(state, device, desc))) + return hr; + + /* At this point, we will have valid meta structures set up. + * Deduce further PSO information from these structs. */ + d3d12_pipeline_state_graphics_handle_meta(state, device); + return S_OK; +} + +static HRESULT d3d12_pipeline_state_init_static_pipeline(struct d3d12_pipeline_state *state, + const struct d3d12_pipeline_state_desc *desc) +{ + struct d3d12_graphics_pipeline_state *graphics = &state->graphics; + bool can_compile_pipeline_early; + + /* If we don't know vertex count for tessellation shaders, we need to defer compilation, but this should + * be exceedingly rare. */ + can_compile_pipeline_early = + (desc->primitive_topology_type != D3D12_PRIMITIVE_TOPOLOGY_TYPE_PATCH || graphics->patch_vertex_count != 0) && + desc->primitive_topology_type != D3D12_PRIMITIVE_TOPOLOGY_TYPE_UNDEFINED; + + graphics->pipeline_layout = state->root_signature->graphics.vk_pipeline_layout; + graphics->pipeline = VK_NULL_HANDLE; + + if (can_compile_pipeline_early) + { + if (!(graphics->pipeline = d3d12_pipeline_state_create_pipeline_variant(state, NULL, graphics->dsv_format, + state->vk_pso_cache, &graphics->dynamic_state_flags))) + return E_OUTOFMEMORY; + } + else + { + graphics->dsv_plane_optimal_mask = d3d12_graphics_pipeline_state_get_plane_optimal_mask(graphics, NULL); + } + + return S_OK; +} + +static HRESULT d3d12_pipeline_state_finish_graphics(struct d3d12_pipeline_state *state) +{ + struct d3d12_graphics_pipeline_state *graphics = &state->graphics; + unsigned int i; + void *new_code; + HRESULT hr; + + /* If we got here successfully without SPIR-V code, + * it means we'll need to defer compilation from DXBC -> SPIR-V. + * Dupe the DXBC code. + * TODO: This codepath is not relevant yet. */ + for (i = 0; i < graphics->stage_count; i++) + { + if (graphics->code[i].size || graphics->stages[i].module != VK_NULL_HANDLE || + !graphics->cached_desc.bytecode[i].BytecodeLength) + continue; + + new_code = vkd3d_malloc(graphics->cached_desc.bytecode[i].BytecodeLength); + if (!new_code) + return E_OUTOFMEMORY; + memcpy(new_code, graphics->cached_desc.bytecode[i].pShaderBytecode, + graphics->cached_desc.bytecode[i].BytecodeLength); + graphics->cached_desc.bytecode[i].pShaderBytecode = new_code; + graphics->cached_desc.bytecode_duped_mask |= 1u << i; + } + + list_init(&graphics->compiled_fallback_pipelines); + if (FAILED(hr = vkd3d_private_store_init(&state->private_store))) + return hr; + d3d12_device_add_ref(state->device); + return S_OK; +} + bool d3d12_pipeline_state_has_replaced_shaders(struct d3d12_pipeline_state *state) { unsigned int i; @@ -3720,7 +3996,6 @@ HRESULT d3d12_pipeline_state_create(struct d3d12_device *device, VkPipelineBindP const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; const struct d3d12_cached_pipeline_state *desc_cached_pso; struct d3d12_cached_pipeline_state cached_pso; - struct d3d12_root_signature *root_signature; struct d3d12_pipeline_state *object; HRESULT hr; @@ -3729,23 +4004,33 @@ HRESULT d3d12_pipeline_state_create(struct d3d12_device *device, VkPipelineBindP memset(object, 0, sizeof(*object)); + if (rwlock_init(&object->lock)) + { + vkd3d_free(object); + return E_FAIL; + } + if (!desc->root_signature) { if (FAILED(hr = d3d12_pipeline_create_private_root_signature(device, - bind_point, desc, &object->private_root_signature))) + bind_point, desc, &object->root_signature))) { ERR("No root signature for pipeline.\n"); vkd3d_free(object); return hr; } - root_signature = object->private_root_signature; + object->root_signature_compat_hash_is_dxbc_derived = true; } else - root_signature = impl_from_ID3D12RootSignature(desc->root_signature); + { + object->root_signature = impl_from_ID3D12RootSignature(desc->root_signature); + /* Hold a private reference on this root signature in case we have to create fallback PSOs. */ + d3d12_root_signature_inc_ref(object->root_signature); + } vkd3d_pipeline_cache_compat_from_state_desc(&object->pipeline_cache_compat, desc); - if (root_signature) - object->pipeline_cache_compat.root_signature_compat_hash = root_signature->compatibility_hash; + if (object->root_signature) + object->pipeline_cache_compat.root_signature_compat_hash = object->root_signature->compatibility_hash; desc_cached_pso = &desc->cached_pso; @@ -3754,8 +4039,9 @@ HRESULT d3d12_pipeline_state_create(struct d3d12_device *device, VkPipelineBindP if (FAILED(hr = d3d12_cached_pipeline_state_validate(device, &desc->cached_pso, &object->pipeline_cache_compat))) { - if (object->private_root_signature) - d3d12_root_signature_dec_ref(object->private_root_signature); + if (object->root_signature) + d3d12_root_signature_dec_ref(object->root_signature); + rwlock_destroy(&object->lock); vkd3d_free(object); return hr; } @@ -3790,28 +4076,47 @@ HRESULT d3d12_pipeline_state_create(struct d3d12_device *device, VkPipelineBindP object->refcount = 1; object->internal_refcount = 1; - switch (bind_point) + hr = S_OK; + + if (!(vkd3d_config_flags & VKD3D_CONFIG_FLAG_GLOBAL_PIPELINE_CACHE)) + if ((hr = vkd3d_create_pipeline_cache_from_d3d12_desc(device, desc_cached_pso, &object->vk_pso_cache)) < 0) + ERR("Failed to create pipeline cache, hr %d.\n", hr); + + if (SUCCEEDED(hr)) { - case VK_PIPELINE_BIND_POINT_COMPUTE: - hr = d3d12_pipeline_state_init_compute(object, device, desc, desc_cached_pso); - break; + switch (bind_point) + { + case VK_PIPELINE_BIND_POINT_COMPUTE: + hr = d3d12_pipeline_state_init_compute(object, device, desc, desc_cached_pso); + break; - case VK_PIPELINE_BIND_POINT_GRAPHICS: - hr = d3d12_pipeline_state_init_graphics(object, device, desc, desc_cached_pso); - break; + case VK_PIPELINE_BIND_POINT_GRAPHICS: + /* Creating a graphics PSO is more involved ... */ + hr = d3d12_pipeline_state_init_graphics_create_info(object, device, desc); + if (SUCCEEDED(hr)) + hr = d3d12_pipeline_state_init_graphics_spirv(object, desc, desc_cached_pso); + if (SUCCEEDED(hr)) + hr = d3d12_pipeline_state_init_static_pipeline(object, desc); + if (SUCCEEDED(hr)) + hr = d3d12_pipeline_state_finish_graphics(object); + break; - default: - ERR("Invalid pipeline type %u.", bind_point); - hr = E_INVALIDARG; + default: + ERR("Invalid pipeline type %u.", bind_point); + hr = E_INVALIDARG; + } } if (FAILED(hr)) { - if (object->private_root_signature) - d3d12_root_signature_dec_ref(object->private_root_signature); + if (object->root_signature) + d3d12_root_signature_dec_ref(object->root_signature); d3d12_pipeline_state_free_spirv_code(object); d3d12_pipeline_state_destroy_shader_modules(object, device); + if (object->vk_bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) + d3d12_pipeline_state_free_cached_desc(&object->graphics.cached_desc); VK_CALL(vkDestroyPipelineCache(device->vk_device, object->vk_pso_cache, NULL)); + rwlock_destroy(&object->lock); vkd3d_free(object); return hr; @@ -3838,6 +4143,10 @@ HRESULT d3d12_pipeline_state_create(struct d3d12_device *device, VkPipelineBindP { VK_CALL(vkDestroyPipelineCache(device->vk_device, object->vk_pso_cache, NULL)); object->vk_pso_cache = VK_NULL_HANDLE; + + /* Set this explicitly so we avoid attempting to touch code[i] when serializing the PSO blob. + * We are at risk of compiling code on the fly in some upcoming situations. */ + object->pso_is_loaded_from_cached_blob = true; } else if (device->disk_cache.library) { @@ -3956,7 +4265,7 @@ static VkPipeline d3d12_pipeline_state_find_compiled_pipeline(struct d3d12_pipel struct vkd3d_compiled_pipeline *current; VkPipeline vk_pipeline = VK_NULL_HANDLE; - rw_spinlock_acquire_read(&state->lock); + rwlock_lock_read(&state->lock); LIST_FOR_EACH_ENTRY(current, &graphics->compiled_fallback_pipelines, struct vkd3d_compiled_pipeline, entry) { if (!memcmp(¤t->key, key, sizeof(*key))) @@ -3966,7 +4275,7 @@ static VkPipeline d3d12_pipeline_state_find_compiled_pipeline(struct d3d12_pipel break; } } - rw_spinlock_release_read(&state->lock); + rwlock_unlock_read(&state->lock); return vk_pipeline; } @@ -3984,7 +4293,7 @@ static bool d3d12_pipeline_state_put_pipeline_to_cache(struct d3d12_pipeline_sta compiled_pipeline->vk_pipeline = vk_pipeline; compiled_pipeline->dynamic_state_flags = dynamic_state_flags; - rw_spinlock_acquire_write(&state->lock); + rwlock_lock_write(&state->lock); LIST_FOR_EACH_ENTRY(current, &graphics->compiled_fallback_pipelines, struct vkd3d_compiled_pipeline, entry) { @@ -3999,7 +4308,7 @@ static bool d3d12_pipeline_state_put_pipeline_to_cache(struct d3d12_pipeline_sta if (compiled_pipeline) list_add_tail(&graphics->compiled_fallback_pipelines, &compiled_pipeline->entry); - rw_spinlock_release_write(&state->lock); + rwlock_unlock_write(&state->lock); return compiled_pipeline; } @@ -4133,6 +4442,8 @@ VkPipeline d3d12_pipeline_state_create_pipeline_variant(struct d3d12_pipeline_st if (d3d12_graphics_pipeline_state_has_unknown_dsv_format_with_test(graphics) && dsv_format) TRACE("Compiling %p with fallback DSV format %#x.\n", state, dsv_format->vk_format); + /* FIXME: This gets modified on late recompilation, could there be thread safety issues here? + * For GENERAL depth-stencil, this mask should not matter at all, but there might be edge cases for tracked DSV. */ graphics->dsv_plane_optimal_mask = d3d12_graphics_pipeline_state_get_plane_optimal_mask(graphics, dsv_format); if (key) @@ -4145,7 +4456,8 @@ VkPipeline d3d12_pipeline_state_create_pipeline_variant(struct d3d12_pipeline_st { if (stages[i].module == VK_NULL_HANDLE && graphics->code[i].code) { - if (FAILED(hr = d3d12_pipeline_state_create_shader_module(device, &stages[i], &graphics->code[i]))) + if (FAILED(hr = d3d12_pipeline_state_create_shader_module(device, + &stages[i].module, &graphics->code[i]))) { /* This is kind of fatal and should only happen for out-of-memory. */ ERR("Unexpected failure (hr %x) in creating fallback SPIR-V module.\n", hr); @@ -4157,6 +4469,11 @@ VkPipeline d3d12_pipeline_state_create_pipeline_variant(struct d3d12_pipeline_st } } + /* If we're using identifiers, set the appropriate flag. */ + for (i = 0; i < graphics->stage_count; i++) + if (pipeline_desc.pStages[i].module == VK_NULL_HANDLE) + pipeline_desc.flags |= VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT; + TRACE("Calling vkCreateGraphicsPipelines.\n"); if ((vkd3d_config_flags & VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_LOG) && @@ -4172,13 +4489,49 @@ VkPipeline d3d12_pipeline_state_create_pipeline_variant(struct d3d12_pipeline_st else feedback_info.pipelineStageCreationFeedbackCount = 0; - if ((vr = VK_CALL(vkCreateGraphicsPipelines(device->vk_device, - vk_cache, 1, &pipeline_desc, NULL, &vk_pipeline))) < 0) + vr = VK_CALL(vkCreateGraphicsPipelines(device->vk_device, vk_cache, 1, &pipeline_desc, NULL, &vk_pipeline)); + + if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_LOG) + { + if (pipeline_desc.flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT) + { + if (vr == VK_SUCCESS) + INFO("[IDENTIFIER] Successfully created graphics pipeline from identifier.\n"); + else if (vr == VK_PIPELINE_COMPILE_REQUIRED_EXT) + INFO("[IDENTIFIER] Failed to create graphics pipeline from identifier, falling back ...\n"); + } + else + INFO("[IDENTIFIER] No graphics identifier\n"); + } + + if (vr == VK_PIPELINE_COMPILE_REQUIRED_EXT) + { + if (FAILED(hr = vkd3d_late_compile_shader_stages(state))) + { + ERR("Late compilation of SPIR-V failed.\n"); + return VK_NULL_HANDLE; + } + + pipeline_desc.flags &= ~VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT; + + /* Clean up any temporary SPIR-V modules we created. */ + if (pipeline_desc.pStages == stages) + for (i = 0; i < graphics->stage_count; i++) + if (stages[i].module != graphics->stages[i].module) + VK_CALL(vkDestroyShaderModule(device->vk_device, stages[i].module, NULL)); + + /* Internal modules are known to be non-null now. */ + pipeline_desc.pStages = state->graphics.stages; + vr = VK_CALL(vkCreateGraphicsPipelines(device->vk_device, vk_cache, 1, &pipeline_desc, NULL, &vk_pipeline)); + } + + TRACE("Completed vkCreateGraphicsPipelines.\n"); + + if (vr < 0) { WARN("Failed to create Vulkan graphics pipeline, vr %d.\n", vr); return VK_NULL_HANDLE; } - TRACE("Completed vkCreateGraphicsPipelines.\n"); /* Clean up any temporary SPIR-V modules we created. */ if (pipeline_desc.pStages == stages) diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index ac4311a6..711e5f17 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -164,6 +164,8 @@ struct vkd3d_vulkan_info bool EXT_shader_image_atomic_int64; bool EXT_scalar_block_layout; bool EXT_pipeline_creation_feedback; + bool EXT_pipeline_creation_cache_control; + bool EXT_shader_module_identifier; /* AMD device extensions */ bool AMD_buffer_marker; bool AMD_device_coherent_memory; @@ -1533,13 +1535,31 @@ enum vkd3d_plane_optimal_flag VKD3D_DEPTH_STENCIL_PLANE_GENERAL = (1 << 2), }; +struct d3d12_graphics_pipeline_state_cached_desc +{ + /* Information needed to compile to SPIR-V. */ + unsigned int ps_output_swizzle[D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT]; + struct vkd3d_shader_parameter ps_shader_parameters[1]; + bool is_dual_source_blending; + VkShaderStageFlagBits xfb_stage; + struct vkd3d_shader_transform_feedback_info *xfb_info; + + D3D12_SHADER_BYTECODE bytecode[VKD3D_MAX_SHADER_STAGES]; + VkShaderStageFlagBits bytecode_stages[VKD3D_MAX_SHADER_STAGES]; + uint32_t bytecode_duped_mask; +}; + struct d3d12_graphics_pipeline_state { struct vkd3d_shader_debug_ring_spec_info spec_info[VKD3D_MAX_SHADER_STAGES]; VkPipelineShaderStageCreateInfo stages[VKD3D_MAX_SHADER_STAGES]; struct vkd3d_shader_code code[VKD3D_MAX_SHADER_STAGES]; + VkShaderModuleIdentifierEXT identifiers[VKD3D_MAX_SHADER_STAGES]; + VkPipelineShaderStageModuleIdentifierCreateInfoEXT identifier_create_infos[VKD3D_MAX_SHADER_STAGES]; size_t stage_count; + struct d3d12_graphics_pipeline_state_cached_desc cached_desc; + VkVertexInputAttributeDescription attributes[D3D12_VS_INPUT_REGISTER_COUNT]; VkVertexInputRate input_rates[D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT]; VkVertexInputBindingDivisorDescriptionEXT instance_divisors[D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT]; @@ -1590,6 +1610,8 @@ struct d3d12_compute_pipeline_state { VkPipeline vk_pipeline; struct vkd3d_shader_code code; + VkShaderModuleIdentifierEXT identifier; + VkPipelineShaderStageModuleIdentifierCreateInfoEXT identifier_create_info; }; /* To be able to load a pipeline from cache, this information must match exactly, @@ -1615,11 +1637,13 @@ struct d3d12_pipeline_state }; VkPipelineBindPoint vk_bind_point; VkPipelineCache vk_pso_cache; - spinlock_t lock; + rwlock_t lock; struct vkd3d_pipeline_cache_compatibility pipeline_cache_compat; - struct d3d12_root_signature *private_root_signature; + struct d3d12_root_signature *root_signature; struct d3d12_device *device; + bool root_signature_compat_hash_is_dxbc_derived; + bool pso_is_loaded_from_cached_blob; struct vkd3d_private_store private_store; }; @@ -1810,6 +1834,7 @@ enum vkd3d_pipeline_library_flags VKD3D_PIPELINE_LIBRARY_FLAG_STREAM_ARCHIVE = 1 << 4, /* We expect to parse archive from thread, so consider thread safety and cancellation points. */ VKD3D_PIPELINE_LIBRARY_FLAG_STREAM_ARCHIVE_PARSE_ASYNC = 1 << 5, + VKD3D_PIPELINE_LIBRARY_FLAG_SHADER_IDENTIFIER = 1 << 6, }; HRESULT d3d12_pipeline_library_create(struct d3d12_device *device, const void *blob, @@ -1823,7 +1848,8 @@ HRESULT vkd3d_create_pipeline_cache_from_d3d12_desc(struct d3d12_device *device, HRESULT vkd3d_get_cached_spirv_code_from_d3d12_desc( const struct d3d12_cached_pipeline_state *state, VkShaderStageFlagBits stage, - struct vkd3d_shader_code *spirv_code); + struct vkd3d_shader_code *spirv_code, + VkPipelineShaderStageModuleIdentifierCreateInfoEXT *identifier); VkResult vkd3d_serialize_pipeline_state(struct d3d12_pipeline_library *pipeline_library, const struct d3d12_pipeline_state *state, size_t *size, void *data); HRESULT d3d12_cached_pipeline_state_validate(struct d3d12_device *device, @@ -3246,6 +3272,7 @@ struct vkd3d_physical_device_info VkPhysicalDeviceDriverPropertiesKHR driver_properties; VkPhysicalDeviceMaintenance4PropertiesKHR maintenance4_properties; VkPhysicalDeviceDeviceGeneratedCommandsPropertiesNV device_generated_commands_properties_nv; + VkPhysicalDeviceShaderModuleIdentifierPropertiesEXT shader_module_identifier_properties; VkPhysicalDeviceProperties2KHR properties2; @@ -3291,6 +3318,8 @@ struct vkd3d_physical_device_info VkPhysicalDeviceMaintenance4FeaturesKHR maintenance4_features; VkPhysicalDeviceRayTracingMaintenance1FeaturesKHR ray_tracing_maintenance1_features; VkPhysicalDeviceDeviceGeneratedCommandsFeaturesNV device_generated_commands_features_nv; + VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT pipeline_creation_cache_control_features; + VkPhysicalDeviceShaderModuleIdentifierFeaturesEXT shader_module_identifier_features; VkPhysicalDeviceFeatures2 features2; diff --git a/libs/vkd3d/vulkan_procs.h b/libs/vkd3d/vulkan_procs.h index 7fceb4da..95b63065 100644 --- a/libs/vkd3d/vulkan_procs.h +++ b/libs/vkd3d/vulkan_procs.h @@ -324,6 +324,10 @@ VK_DEVICE_EXT_PFN(vkDestroyIndirectCommandsLayoutNV) VK_DEVICE_EXT_PFN(vkGetGeneratedCommandsMemoryRequirementsNV) VK_DEVICE_EXT_PFN(vkCmdExecuteGeneratedCommandsNV) +/* VK_EXT_shader_module_identifier */ +VK_DEVICE_EXT_PFN(vkGetShaderModuleIdentifierEXT) +VK_DEVICE_EXT_PFN(vkGetShaderModuleCreateInfoIdentifierEXT) + #undef VK_INSTANCE_PFN #undef VK_INSTANCE_EXT_PFN #undef VK_DEVICE_PFN diff --git a/subprojects/Vulkan-Headers b/subprojects/Vulkan-Headers index 245d25ce..2c823b7f 160000 --- a/subprojects/Vulkan-Headers +++ b/subprojects/Vulkan-Headers @@ -1 +1 @@ -Subproject commit 245d25ce8c3337919dc7916d0e62e31a0d8748ab +Subproject commit 2c823b7f27590ec0a489f7fbe14b154e13fa5cfb