From 3c92b3a1bcaac35950f6b7cafe87610dc7328c79 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Tue, 3 May 2022 15:01:58 +0200 Subject: [PATCH] vkd3d: Implement AddToStateObject(). This is barely implementable, and relies on implementations to do kinda what we want. To make this work in practice, we need to allow two pipelines per state object. One that is created with LIBRARY and one that can be bound. When incrementing the PSO, we use the LIBRARY one. It seems to be allowed to create a new library from an old library. It is more convenient for us if we're allowed to do this, so do this until we're forced to do otherwise. DXR 1.1 requires that shader identifiers remain invariant for child pipelines if the parent pipeline also have them. Vulkan has no such guarantee, but we can speculate that it works and validate that identifiers remain invariant. This seems to work fine on NVIDIA at least ... It probably makes sense that it works for implementations where pipeline libraries are compiled at that time. The basic implementation of AddToStateObject() is to consider the parent pipeline as a COLLECTION pipeline. This composes well and avoids a lot of extra implementation cruft. Also adds validation to ensure that COLLECTION global state matches with other COLLECTION objects and the parent. We will also inherit global state like root signatures, pipeline config, shader configs etc when using AddToStateObject(). The tests pass on NVIDIA at least. Signed-off-by: Hans-Kristian Arntzen --- libs/vkd3d/device.c | 22 +++- libs/vkd3d/raytracing_pipeline.c | 217 +++++++++++++++++++++++++++---- libs/vkd3d/vkd3d_private.h | 19 +++ 3 files changed, 230 insertions(+), 28 deletions(-) diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index 19090951..6e7192ff 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -4935,7 +4935,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateStateObject(d3d12_device_ifa TRACE("iface %p, desc %p, iid %s, state_object %p!\n", iface, desc, debugstr_guid(iid), state_object); - if (FAILED(hr = d3d12_state_object_create(device, desc, &state))) + if (FAILED(hr = d3d12_state_object_create(device, desc, NULL, &state))) return hr; return return_interface(&state->ID3D12StateObject_iface, &IID_ID3D12StateObject, iid, state_object); @@ -5003,13 +5003,23 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetBackgroundProcessingMode(d3d12_ return E_NOTIMPL; } -static HRESULT STDMETHODCALLTYPE d3d12_device_AddToStateObject(d3d12_device_iface *iface, const D3D12_STATE_OBJECT_DESC *addition, - ID3D12StateObject *state_object, REFIID riid, void **new_state_object) +static HRESULT STDMETHODCALLTYPE d3d12_device_AddToStateObject(d3d12_device_iface *iface, + const D3D12_STATE_OBJECT_DESC *addition, + ID3D12StateObject *parent_state, REFIID riid, void **new_state_object) { - FIXME("iface %p, addition %p, state_object %p, riid %s, new_state_object %p stub!\n", - iface, addition, state_object, debugstr_guid(riid), new_state_object); + struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_state_object *parent; + struct d3d12_state_object *state; + HRESULT hr; - return E_NOTIMPL; + TRACE("iface %p, addition %p, state_object %p, riid %s, new_state_object %p stub!\n", + iface, addition, parent_state, debugstr_guid(riid), new_state_object); + + parent = impl_from_ID3D12StateObject(parent_state); + if (FAILED(hr = d3d12_state_object_add(device, addition, parent, &state))) + return hr; + + return return_interface(&state->ID3D12StateObject_iface, &IID_ID3D12StateObject, riid, new_state_object); } static HRESULT STDMETHODCALLTYPE d3d12_device_CreateProtectedResourceSession1(d3d12_device_iface *iface, diff --git a/libs/vkd3d/raytracing_pipeline.c b/libs/vkd3d/raytracing_pipeline.c index 79c7c116..cc505c4f 100644 --- a/libs/vkd3d/raytracing_pipeline.c +++ b/libs/vkd3d/raytracing_pipeline.c @@ -126,6 +126,7 @@ static void d3d12_state_object_cleanup(struct d3d12_state_object *object) d3d12_root_signature_dec_ref(object->global_root_signature); VK_CALL(vkDestroyPipeline(object->device->vk_device, object->pipeline, NULL)); + VK_CALL(vkDestroyPipeline(object->device->vk_device, object->pipeline_library, NULL)); VK_CALL(vkDestroyPipelineLayout(object->device->vk_device, object->local_static_sampler.pipeline_layout, NULL)); @@ -240,6 +241,7 @@ static void * STDMETHODCALLTYPE d3d12_state_object_properties_GetShaderIdentifie LPCWSTR export_name) { struct d3d12_state_object *object = impl_from_ID3D12StateObjectProperties(iface); + struct d3d12_state_object_identifier *export; const WCHAR *subtype = NULL; uint32_t index; @@ -250,7 +252,14 @@ static void * STDMETHODCALLTYPE d3d12_state_object_properties_GetShaderIdentifie /* Cannot query shader identifier for non-group names. */ if (!subtype && index != UINT32_MAX) { - return object->exports[index].identifier; + export = &object->exports[index]; + /* Need to return the parent SBT pointer if it exists */ + while (export->inherited_collection_index >= 0) + { + object = object->collections[export->inherited_collection_index]; + export = &object->exports[export->inherited_collection_export_index]; + } + return export->identifier; } else { @@ -426,10 +435,78 @@ static void d3d12_state_object_pipeline_data_cleanup(struct d3d12_state_object_p vkd3d_free(data->vk_libraries); } +static HRESULT d3d12_state_object_add_collection( + struct d3d12_state_object *collection, + struct d3d12_state_object_pipeline_data *data, + const D3D12_EXPORT_DESC *exports, unsigned int num_exports) +{ + if (!vkd3d_array_reserve((void **)&data->collections, &data->collections_size, + data->collections_count + 1, sizeof(*data->collections))) + return E_OUTOFMEMORY; + + /* If a PSO only declares collections, but no pipelines, just inherit various state. + * Also, validates that we have a match across different PSOs. */ + if (data->global_root_signature) + { + if (!collection->global_root_signature || + data->global_root_signature->compatibility_hash != collection->global_root_signature->compatibility_hash) + { + FIXME("Mismatch in global root signature state for PSO and collection.\n"); + return E_INVALIDARG; + } + } + else + data->global_root_signature = collection->global_root_signature; + + if (data->has_pipeline_config) + { + if (memcmp(&data->pipeline_config, &collection->pipeline_config, sizeof(data->pipeline_config)) != 0) + { + FIXME("Mismatch in pipeline config state for collection and PSO.\n"); + return E_INVALIDARG; + } + } + else + { + data->pipeline_config = collection->pipeline_config; + data->has_pipeline_config = true; + } + + if (data->shader_config) + { + if (memcmp(data->shader_config, &collection->shader_config, sizeof(*data->shader_config)) != 0) + { + FIXME("Mismatch in shader config state for collection and PSO.\n"); + return E_INVALIDARG; + } + } + else + data->shader_config = &collection->shader_config; + + data->collections[data->collections_count].object = collection; + data->collections[data->collections_count].num_exports = num_exports; + data->collections[data->collections_count].exports = exports; + + vkd3d_array_reserve((void **)&data->vk_libraries, &data->vk_libraries_size, + data->vk_libraries_count + 1, sizeof(*data->vk_libraries)); + data->vk_libraries[data->vk_libraries_count] = + data->collections[data->collections_count].object->pipeline_library; + + data->collections_count += 1; + data->vk_libraries_count += 1; + return S_OK; +} + static HRESULT d3d12_state_object_parse_subobjects(struct d3d12_state_object *object, - const D3D12_STATE_OBJECT_DESC *desc, struct d3d12_state_object_pipeline_data *data) + const D3D12_STATE_OBJECT_DESC *desc, + struct d3d12_state_object *parent, + struct d3d12_state_object_pipeline_data *data) { unsigned int i, j; + HRESULT hr; + + if (parent && FAILED(hr = d3d12_state_object_add_collection(parent, data, NULL, 0))) + return hr; for (i = 0; i < desc->NumSubobjects; i++) { @@ -438,9 +515,12 @@ static HRESULT d3d12_state_object_parse_subobjects(struct d3d12_state_object *ob { case D3D12_STATE_SUBOBJECT_TYPE_STATE_OBJECT_CONFIG: { + const uint32_t supported_flags = + D3D12_STATE_OBJECT_FLAG_ALLOW_EXTERNAL_DEPENDENCIES_ON_LOCAL_DEFINITIONS | + D3D12_STATE_OBJECT_FLAG_ALLOW_STATE_OBJECT_ADDITIONS; const D3D12_STATE_OBJECT_CONFIG *object_config = obj->pDesc; object->flags = object_config->Flags; - if (object->flags & ~D3D12_STATE_OBJECT_FLAG_ALLOW_EXTERNAL_DEPENDENCIES_ON_LOCAL_DEFINITIONS) + if (object->flags & ~supported_flags) { FIXME("Object config flag #%x is not supported.\n", object->flags); return E_INVALIDARG; @@ -601,19 +681,13 @@ static HRESULT d3d12_state_object_parse_subobjects(struct d3d12_state_object *ob case D3D12_STATE_SUBOBJECT_TYPE_EXISTING_COLLECTION: { const D3D12_EXISTING_COLLECTION_DESC *collection = obj->pDesc; - vkd3d_array_reserve((void **)&data->collections, &data->collections_size, - data->collections_count + 1, sizeof(*data->collections)); - - data->collections[data->collections_count].object = impl_from_ID3D12StateObject(collection->pExistingCollection); - data->collections[data->collections_count].num_exports = collection->NumExports; - data->collections[data->collections_count].exports = collection->pExports; - - vkd3d_array_reserve((void **)&data->vk_libraries, &data->vk_libraries_size, - data->vk_libraries_count + 1, sizeof(*data->vk_libraries)); - data->vk_libraries[data->vk_libraries_count] = data->collections[data->collections_count].object->pipeline; - - data->collections_count += 1; - data->vk_libraries_count += 1; + struct d3d12_state_object *library_state; + library_state = impl_from_ID3D12StateObject(collection->pExistingCollection); + if (FAILED(hr = d3d12_state_object_add_collection(library_state, data, + collection->pExports, collection->NumExports))) + { + return hr; + } break; } @@ -676,7 +750,8 @@ static uint32_t d3d12_state_object_pipeline_data_find_entry( offset += data->stages_count; - /* Try to look in collections. */ + /* Try to look in collections. We'll only find something in the ALLOW_EXTERNAL_DEPENDENCIES_ON_LOCAL + * situation. Otherwise entry_points will be NULL. */ for (i = 0; i < data->collections_count; i++) { index = d3d12_state_object_pipeline_data_find_entry_inner(data->collections[i].object->entry_points, @@ -863,6 +938,8 @@ static HRESULT d3d12_state_object_get_group_handles(struct d3d12_state_object *o const struct d3d12_state_object_pipeline_data *data) { const struct vkd3d_vk_device_procs *vk_procs = &object->device->vk_procs; + uint32_t collection_export; + int collection_index; uint32_t group_index; VkResult vr; size_t i; @@ -878,6 +955,27 @@ static HRESULT d3d12_state_object_get_group_handles(struct d3d12_state_object *o if (vr) return hresult_from_vk_result(vr); + collection_export = data->exports[i].inherited_collection_export_index; + collection_index = data->exports[i].inherited_collection_index; + + if (collection_index >= 0) + { + const uint8_t *parent_identifier; + const uint8_t *child_identifier; + + parent_identifier = data->collections[collection_index].object->exports[collection_export].identifier; + child_identifier = data->exports[i].identifier; + + /* Validate that we get an exact match for SBT handle. + * It appears to work just fine on NV. */ + if (memcmp(parent_identifier, child_identifier, D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES) != 0) + { + FIXME("SBT identifiers do not match for parent and child pipelines. " + "Vulkan does not guarantee this, but DXR 1.1 requires this. Cannot use pipeline.\n"); + return E_NOTIMPL; + } + } + data->exports[i].stack_size_general = UINT32_MAX; data->exports[i].stack_size_any = UINT32_MAX; data->exports[i].stack_size_closest = UINT32_MAX; @@ -1099,6 +1197,8 @@ static HRESULT d3d12_state_object_compile_pipeline(struct d3d12_state_object *ob export->closest_stage_index = VK_SHADER_UNUSED_KHR; export->anyhit_stage_index = VK_SHADER_UNUSED_KHR; export->intersection_stage_index = VK_SHADER_UNUSED_KHR; + export->inherited_collection_index = -1; + export->inherited_collection_export_index = 0; export->general_stage = entry->stage; entry->mangled_entry_point = NULL; entry->plain_entry_point = NULL; @@ -1198,6 +1298,8 @@ static HRESULT d3d12_state_object_compile_pipeline(struct d3d12_state_object *ob VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR, export); + export->inherited_collection_index = -1; + export->inherited_collection_export_index = 0; data->exports_count += 1; data->groups_count += 1; } @@ -1280,6 +1382,16 @@ static HRESULT d3d12_state_object_compile_pipeline(struct d3d12_state_object *ob if (export->intersection_stage_index != VK_SHADER_UNUSED_KHR) export->intersection_stage_index += pstage_offset; + /* If we inherited from a real pipeline, we must observe the rules of AddToStateObject(). + * SBT pointer must be invariant as well as its contents. + * Vulkan does not guarantee this, but we can validate and accept the pipeline if + * implementation happens to satisfy this rule. */ + if (collection->object->type == D3D12_STATE_OBJECT_TYPE_RAYTRACING_PIPELINE) + export->inherited_collection_index = (int)i; + else + export->inherited_collection_index = -1; + export->inherited_collection_export_index = input_export->group_index; + data->exports_count += 1; } @@ -1319,7 +1431,12 @@ static HRESULT d3d12_state_object_compile_pipeline(struct d3d12_state_object *ob pipeline_create_info.sType = VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR; pipeline_create_info.pNext = NULL; - pipeline_create_info.flags = object->type == D3D12_STATE_OBJECT_TYPE_COLLECTION ? + + /* If we allow state object additions, we must first lower this pipeline to a library, and + * then link it to itself so we can use it a library in subsequent PSO creations, but we + * must also be able to trace rays from the library. */ + pipeline_create_info.flags = (object->type == D3D12_STATE_OBJECT_TYPE_COLLECTION || + (object->flags & D3D12_STATE_OBJECT_FLAG_ALLOW_STATE_OBJECT_ADDITIONS)) ? VK_PIPELINE_CREATE_LIBRARY_BIT_KHR : 0; /* FIXME: What if we have no global root signature? */ @@ -1379,7 +1496,27 @@ static HRESULT d3d12_state_object_compile_pipeline(struct d3d12_state_object *ob dynamic_state.pDynamicStates = dynamic_states; vr = VK_CALL(vkCreateRayTracingPipelinesKHR(object->device->vk_device, VK_NULL_HANDLE, - VK_NULL_HANDLE, 1, &pipeline_create_info, NULL, &object->pipeline)); + VK_NULL_HANDLE, 1, &pipeline_create_info, NULL, + (pipeline_create_info.flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR) ? + &object->pipeline_library : &object->pipeline)); + + if (vr == VK_SUCCESS && (object->flags & D3D12_STATE_OBJECT_FLAG_ALLOW_STATE_OBJECT_ADDITIONS) && + object->type == D3D12_STATE_OBJECT_TYPE_RAYTRACING_PIPELINE) + { + /* TODO: Is it actually valid to inherit other pipeline libraries while creating a pipeline library? */ + pipeline_create_info.flags &= ~VK_PIPELINE_CREATE_LIBRARY_BIT_KHR; + pipeline_create_info.pStages = NULL; + pipeline_create_info.pGroups = NULL; + pipeline_create_info.stageCount = 0; + pipeline_create_info.groupCount = 0; + library_info.libraryCount = 1; + library_info.pLibraries = &object->pipeline_library; + + /* Self-link the pipeline library. */ + vr = VK_CALL(vkCreateRayTracingPipelinesKHR(object->device->vk_device, VK_NULL_HANDLE, + VK_NULL_HANDLE, 1, &pipeline_create_info, NULL, &object->pipeline)); + } + if (vr) return hresult_from_vk_result(vr); @@ -1405,6 +1542,9 @@ static HRESULT d3d12_state_object_compile_pipeline(struct d3d12_state_object *ob d3d12_root_signature_inc_ref(object->global_root_signature = global_signature); + object->shader_config = *data->shader_config; + object->pipeline_config = data->pipeline_config; + /* Spec says we need to hold a reference to the collection object, but it doesn't show up in API, * so we must assume private reference. */ if (data->collections_count) @@ -1435,7 +1575,8 @@ static HRESULT d3d12_state_object_compile_pipeline(struct d3d12_state_object *ob static HRESULT d3d12_state_object_init(struct d3d12_state_object *object, struct d3d12_device *device, - const D3D12_STATE_OBJECT_DESC *desc) + const D3D12_STATE_OBJECT_DESC *desc, + struct d3d12_state_object *parent) { struct d3d12_state_object_pipeline_data data; HRESULT hr = S_OK; @@ -1447,7 +1588,7 @@ static HRESULT d3d12_state_object_init(struct d3d12_state_object *object, object->type = desc->Type; memset(&data, 0, sizeof(data)); - if (FAILED(hr = d3d12_state_object_parse_subobjects(object, desc, &data))) + if (FAILED(hr = d3d12_state_object_parse_subobjects(object, desc, parent, &data))) goto fail; if (FAILED(hr = d3d12_state_object_compile_pipeline(object, &data))) @@ -1467,6 +1608,7 @@ fail: } HRESULT d3d12_state_object_create(struct d3d12_device *device, const D3D12_STATE_OBJECT_DESC *desc, + struct d3d12_state_object *parent, struct d3d12_state_object **state_object) { struct d3d12_state_object *object; @@ -1475,7 +1617,7 @@ HRESULT d3d12_state_object_create(struct d3d12_device *device, const D3D12_STATE if (!(object = vkd3d_calloc(1, sizeof(*object)))) return E_OUTOFMEMORY; - hr = d3d12_state_object_init(object, device, desc); + hr = d3d12_state_object_init(object, device, desc, parent); if (FAILED(hr)) { vkd3d_free(object); @@ -1485,3 +1627,34 @@ HRESULT d3d12_state_object_create(struct d3d12_device *device, const D3D12_STATE *state_object = object; return S_OK; } + +HRESULT d3d12_state_object_add(struct d3d12_device *device, const D3D12_STATE_OBJECT_DESC *desc, + struct d3d12_state_object *parent, struct d3d12_state_object **state_object) +{ + unsigned int i; + HRESULT hr; + + if (!parent) + return E_INVALIDARG; + if (!(parent->flags & D3D12_STATE_OBJECT_FLAG_ALLOW_STATE_OBJECT_ADDITIONS)) + return E_INVALIDARG; + if (desc->Type != D3D12_STATE_OBJECT_TYPE_RAYTRACING_PIPELINE) + return E_INVALIDARG; + + /* Addition must also allow this scenario. */ + for (i = 0; i < desc->NumSubobjects; i++) + { + if (desc->pSubobjects[i].Type == D3D12_STATE_SUBOBJECT_TYPE_STATE_OBJECT_CONFIG) + { + const D3D12_STATE_OBJECT_CONFIG *config = desc->pSubobjects[i].pDesc; + if (config->Flags & D3D12_STATE_OBJECT_FLAG_ALLOW_STATE_OBJECT_ADDITIONS) + break; + } + } + + if (i == desc->NumSubobjects) + return E_INVALIDARG; + + hr = d3d12_state_object_create(device, desc, parent, state_object); + return hr; +} diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 295fd8b1..28ac19dd 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -3501,6 +3501,14 @@ struct d3d12_state_object_identifier /* The index into vkGetShaderStackSize and friends for pGroups[]. */ uint32_t group_index; + + /* For AddToStateObject(). We need to return the identifier pointer + * for the parent, not the child. This makes it easy to validate that + * we observe the same SBT handles as specified by DXR 1.1. */ + + /* If -1, ignore, otherwise, redirect. */ + int inherited_collection_index; + uint32_t inherited_collection_export_index; }; struct d3d12_state_object_stack_info @@ -3534,7 +3542,14 @@ struct d3d12_state_object /* Normally stages_count == entry_points_count, but entry_points is the entry points we * export externally, and stages_count matches pStages[] size for purposes of index fixups. */ + /* Can be bound. */ VkPipeline pipeline; + /* Can be used as a library. */ + VkPipeline pipeline_library; + + /* Can be inherited by AddToStateObject(). */ + D3D12_RAYTRACING_PIPELINE_CONFIG1 pipeline_config; + D3D12_RAYTRACING_SHADER_CONFIG shader_config; struct { @@ -3557,6 +3572,10 @@ struct d3d12_state_object }; HRESULT d3d12_state_object_create(struct d3d12_device *device, const D3D12_STATE_OBJECT_DESC *desc, + struct d3d12_state_object *parent, + struct d3d12_state_object **object); +HRESULT d3d12_state_object_add(struct d3d12_device *device, const D3D12_STATE_OBJECT_DESC *desc, + struct d3d12_state_object *parent, struct d3d12_state_object **object); static inline struct d3d12_state_object *impl_from_ID3D12StateObject(ID3D12StateObject *iface)