diff --git a/libs/vkd3d/cache.c b/libs/vkd3d/cache.c index 26810319..e002c69c 100644 --- a/libs/vkd3d/cache.c +++ b/libs/vkd3d/cache.c @@ -20,7 +20,7 @@ #include "vkd3d_private.h" -static VkResult vkd3d_create_pipeline_cache(struct d3d12_device *device, +VkResult vkd3d_create_pipeline_cache(struct d3d12_device *device, size_t size, const void *data, VkPipelineCache *cache) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index 93d9ee93..6f8608a6 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -2462,6 +2462,33 @@ static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(d3d12_device_iface *iface) return refcount; } +static HRESULT d3d12_device_global_pipeline_cache_init(struct d3d12_device *device) +{ + /* On certain drivers, VkPipelineCache has a fixed (large) memory overhead. + * This means that using a VkPipelineCache per PSO will explode system memory usage, leading to OOM. + * To counteract this, we use one global pipeline cache instead, but this means we lose the ability to + * serialize and unserialize PSO state. Instead, we can just serialize garbage and ignore unserialization. + * From a correctness PoV, this is perfectly fine, and cached PSOs should be present in disk cache either way. */ + bool use_global; + uint32_t major; + VkResult vr; + + major = VK_VERSION_MAJOR(device->device_info.properties2.properties.driverVersion); + use_global = device->device_info.properties2.properties.vendorID == VKD3D_VENDOR_ID_NVIDIA && major >= 470; + if (!use_global) + return S_OK; + + WARN("Workaround applied. Creating global pipeline cache.\n"); + vr = vkd3d_create_pipeline_cache(device, 0, NULL, &device->global_pipeline_cache); + return hresult_from_vk_result(vr); +} + +static void d3d12_device_global_pipeline_cache_cleanup(struct d3d12_device *device) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VK_CALL(vkDestroyPipelineCache(device->vk_device, device->global_pipeline_cache, NULL)); +} + static void d3d12_device_destroy(struct d3d12_device *device) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; @@ -2479,6 +2506,7 @@ static void d3d12_device_destroy(struct d3d12_device *device) vkd3d_cleanup_format_info(device); vkd3d_shader_debug_ring_cleanup(&device->debug_ring, device); + d3d12_device_global_pipeline_cache_cleanup(device); vkd3d_sampler_state_cleanup(&device->sampler_state, device); vkd3d_view_map_destroy(&device->sampler_map, device); vkd3d_meta_ops_cleanup(&device->meta_ops, device); @@ -5194,11 +5222,14 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, if (FAILED(hr = vkd3d_shader_debug_ring_init(&device->debug_ring, device))) goto out_cleanup_meta_ops; + if (FAILED(hr = d3d12_device_global_pipeline_cache_init(device))) + goto out_cleanup_debug_ring; + if (vkd3d_descriptor_debug_active_qa_checks()) { if (FAILED(hr = vkd3d_descriptor_debug_alloc_global_info(&device->descriptor_qa_global_info, VKD3D_DESCRIPTOR_DEBUG_DEFAULT_NUM_COOKIES, device))) - goto out_cleanup_debug_ring; + goto out_cleanup_global_pipeline_cache; } vkd3d_render_pass_cache_init(&device->render_pass_cache); @@ -5209,6 +5240,8 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, d3d12_device_caps_init(device); return S_OK; +out_cleanup_global_pipeline_cache: + d3d12_device_global_pipeline_cache_cleanup(device); out_cleanup_debug_ring: vkd3d_shader_debug_ring_cleanup(&device->debug_ring, device); out_cleanup_meta_ops: diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index 57202df6..cb4edf96 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -2227,14 +2227,19 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st shader_interface.descriptor_qa_heap_binding = &root_signature->descriptor_qa_heap_binding; #endif - if ((hr = vkd3d_create_pipeline_cache_from_d3d12_desc(device, &desc->cached_pso, &state->vk_pso_cache)) < 0) + if (!device->global_pipeline_cache) { - ERR("Failed to create pipeline cache, hr %d.\n", hr); - return hr; + if ((hr = vkd3d_create_pipeline_cache_from_d3d12_desc(device, &desc->cached_pso, &state->vk_pso_cache)) < 0) + { + ERR("Failed to create pipeline cache, hr %d.\n", hr); + return hr; + } } hr = vkd3d_create_compute_pipeline(device, &desc->cs, &shader_interface, - root_signature->compute.vk_pipeline_layout, state->vk_pso_cache, &state->compute.vk_pipeline, + root_signature->compute.vk_pipeline_layout, + state->vk_pso_cache ? state->vk_pso_cache : device->global_pipeline_cache, + &state->compute.vk_pipeline, &state->compute.meta); if (FAILED(hr)) @@ -3481,12 +3486,15 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s if (supports_extended_dynamic_state) { - /* If we have EXT_extended_dynamic_state, we can compile a pipeline right here. - * There are still some edge cases where we need to fall back to special pipelines, but that should be very rare. */ - if ((hr = vkd3d_create_pipeline_cache_from_d3d12_desc(device, &desc->cached_pso, &state->vk_pso_cache)) < 0) + if (!device->global_pipeline_cache) { - ERR("Failed to create pipeline cache, hr %d.\n", hr); - goto fail; + /* If we have EXT_extended_dynamic_state, we can compile a pipeline right here. + * There are still some edge cases where we need to fall back to special pipelines, but that should be very rare. */ + if ((hr = vkd3d_create_pipeline_cache_from_d3d12_desc(device, &desc->cached_pso, &state->vk_pso_cache)) < 0) + { + ERR("Failed to create pipeline cache, hr %d.\n", hr); + goto fail; + } } for (i = 0; i < VKD3D_GRAPHICS_PIPELINE_STATIC_VARIANT_COUNT; i++) @@ -3495,7 +3503,8 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s continue; if (!(graphics->pipeline[i] = d3d12_pipeline_state_create_pipeline_variant(state, NULL, graphics->dsv_format, - state->vk_pso_cache, &graphics->render_pass[i], &graphics->dynamic_state_flags, i))) + state->vk_pso_cache ? state->vk_pso_cache : device->global_pipeline_cache, + &graphics->render_pass[i], &graphics->dynamic_state_flags, i))) goto fail; } } diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 483d5bf2..93ec476f 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -1505,6 +1505,8 @@ struct d3d12_pipeline_library HRESULT d3d12_pipeline_library_create(struct d3d12_device *device, const void *blob, size_t blob_length, struct d3d12_pipeline_library **pipeline_library); +VkResult vkd3d_create_pipeline_cache(struct d3d12_device *device, + size_t size, const void *data, VkPipelineCache *cache); HRESULT vkd3d_create_pipeline_cache_from_d3d12_desc(struct d3d12_device *device, const D3D12_CACHED_PIPELINE_STATE *state, VkPipelineCache *cache); VkResult vkd3d_serialize_pipeline_state(const struct d3d12_pipeline_state *state, size_t *size, void *data); @@ -2668,6 +2670,7 @@ struct d3d12_device #ifdef VKD3D_ENABLE_DESCRIPTOR_QA struct vkd3d_descriptor_qa_global_info *descriptor_qa_global_info; #endif + VkPipelineCache global_pipeline_cache; }; HRESULT d3d12_device_create(struct vkd3d_instance *instance,