vkd3d: Workaround huge memory overhead for individual VkPipelineCaches.

Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
This commit is contained in:
Hans-Kristian Arntzen 2021-08-25 11:44:39 +02:00
parent b8f0cd6eb6
commit 403d1f9743
4 changed files with 57 additions and 12 deletions

View File

@ -20,7 +20,7 @@
#include "vkd3d_private.h"
static VkResult vkd3d_create_pipeline_cache(struct d3d12_device *device,
VkResult vkd3d_create_pipeline_cache(struct d3d12_device *device,
size_t size, const void *data, VkPipelineCache *cache)
{
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;

View File

@ -2462,6 +2462,33 @@ static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(d3d12_device_iface *iface)
return refcount;
}
static HRESULT d3d12_device_global_pipeline_cache_init(struct d3d12_device *device)
{
/* On certain drivers, VkPipelineCache has a fixed (large) memory overhead.
* This means that using a VkPipelineCache per PSO will explode system memory usage, leading to OOM.
* To counteract this, we use one global pipeline cache instead, but this means we lose the ability to
* serialize and unserialize PSO state. Instead, we can just serialize garbage and ignore unserialization.
* From a correctness PoV, this is perfectly fine, and cached PSOs should be present in disk cache either way. */
bool use_global;
uint32_t major;
VkResult vr;
major = VK_VERSION_MAJOR(device->device_info.properties2.properties.driverVersion);
use_global = device->device_info.properties2.properties.vendorID == VKD3D_VENDOR_ID_NVIDIA && major >= 470;
if (!use_global)
return S_OK;
WARN("Workaround applied. Creating global pipeline cache.\n");
vr = vkd3d_create_pipeline_cache(device, 0, NULL, &device->global_pipeline_cache);
return hresult_from_vk_result(vr);
}
static void d3d12_device_global_pipeline_cache_cleanup(struct d3d12_device *device)
{
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
VK_CALL(vkDestroyPipelineCache(device->vk_device, device->global_pipeline_cache, NULL));
}
static void d3d12_device_destroy(struct d3d12_device *device)
{
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
@ -2479,6 +2506,7 @@ static void d3d12_device_destroy(struct d3d12_device *device)
vkd3d_cleanup_format_info(device);
vkd3d_shader_debug_ring_cleanup(&device->debug_ring, device);
d3d12_device_global_pipeline_cache_cleanup(device);
vkd3d_sampler_state_cleanup(&device->sampler_state, device);
vkd3d_view_map_destroy(&device->sampler_map, device);
vkd3d_meta_ops_cleanup(&device->meta_ops, device);
@ -5194,11 +5222,14 @@ static HRESULT d3d12_device_init(struct d3d12_device *device,
if (FAILED(hr = vkd3d_shader_debug_ring_init(&device->debug_ring, device)))
goto out_cleanup_meta_ops;
if (FAILED(hr = d3d12_device_global_pipeline_cache_init(device)))
goto out_cleanup_debug_ring;
if (vkd3d_descriptor_debug_active_qa_checks())
{
if (FAILED(hr = vkd3d_descriptor_debug_alloc_global_info(&device->descriptor_qa_global_info,
VKD3D_DESCRIPTOR_DEBUG_DEFAULT_NUM_COOKIES, device)))
goto out_cleanup_debug_ring;
goto out_cleanup_global_pipeline_cache;
}
vkd3d_render_pass_cache_init(&device->render_pass_cache);
@ -5209,6 +5240,8 @@ static HRESULT d3d12_device_init(struct d3d12_device *device,
d3d12_device_caps_init(device);
return S_OK;
out_cleanup_global_pipeline_cache:
d3d12_device_global_pipeline_cache_cleanup(device);
out_cleanup_debug_ring:
vkd3d_shader_debug_ring_cleanup(&device->debug_ring, device);
out_cleanup_meta_ops:

View File

@ -2227,14 +2227,19 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st
shader_interface.descriptor_qa_heap_binding = &root_signature->descriptor_qa_heap_binding;
#endif
if ((hr = vkd3d_create_pipeline_cache_from_d3d12_desc(device, &desc->cached_pso, &state->vk_pso_cache)) < 0)
if (!device->global_pipeline_cache)
{
ERR("Failed to create pipeline cache, hr %d.\n", hr);
return hr;
if ((hr = vkd3d_create_pipeline_cache_from_d3d12_desc(device, &desc->cached_pso, &state->vk_pso_cache)) < 0)
{
ERR("Failed to create pipeline cache, hr %d.\n", hr);
return hr;
}
}
hr = vkd3d_create_compute_pipeline(device, &desc->cs, &shader_interface,
root_signature->compute.vk_pipeline_layout, state->vk_pso_cache, &state->compute.vk_pipeline,
root_signature->compute.vk_pipeline_layout,
state->vk_pso_cache ? state->vk_pso_cache : device->global_pipeline_cache,
&state->compute.vk_pipeline,
&state->compute.meta);
if (FAILED(hr))
@ -3481,12 +3486,15 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s
if (supports_extended_dynamic_state)
{
/* If we have EXT_extended_dynamic_state, we can compile a pipeline right here.
* There are still some edge cases where we need to fall back to special pipelines, but that should be very rare. */
if ((hr = vkd3d_create_pipeline_cache_from_d3d12_desc(device, &desc->cached_pso, &state->vk_pso_cache)) < 0)
if (!device->global_pipeline_cache)
{
ERR("Failed to create pipeline cache, hr %d.\n", hr);
goto fail;
/* If we have EXT_extended_dynamic_state, we can compile a pipeline right here.
* There are still some edge cases where we need to fall back to special pipelines, but that should be very rare. */
if ((hr = vkd3d_create_pipeline_cache_from_d3d12_desc(device, &desc->cached_pso, &state->vk_pso_cache)) < 0)
{
ERR("Failed to create pipeline cache, hr %d.\n", hr);
goto fail;
}
}
for (i = 0; i < VKD3D_GRAPHICS_PIPELINE_STATIC_VARIANT_COUNT; i++)
@ -3495,7 +3503,8 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s
continue;
if (!(graphics->pipeline[i] = d3d12_pipeline_state_create_pipeline_variant(state, NULL, graphics->dsv_format,
state->vk_pso_cache, &graphics->render_pass[i], &graphics->dynamic_state_flags, i)))
state->vk_pso_cache ? state->vk_pso_cache : device->global_pipeline_cache,
&graphics->render_pass[i], &graphics->dynamic_state_flags, i)))
goto fail;
}
}

View File

@ -1505,6 +1505,8 @@ struct d3d12_pipeline_library
HRESULT d3d12_pipeline_library_create(struct d3d12_device *device, const void *blob,
size_t blob_length, struct d3d12_pipeline_library **pipeline_library);
VkResult vkd3d_create_pipeline_cache(struct d3d12_device *device,
size_t size, const void *data, VkPipelineCache *cache);
HRESULT vkd3d_create_pipeline_cache_from_d3d12_desc(struct d3d12_device *device,
const D3D12_CACHED_PIPELINE_STATE *state, VkPipelineCache *cache);
VkResult vkd3d_serialize_pipeline_state(const struct d3d12_pipeline_state *state, size_t *size, void *data);
@ -2668,6 +2670,7 @@ struct d3d12_device
#ifdef VKD3D_ENABLE_DESCRIPTOR_QA
struct vkd3d_descriptor_qa_global_info *descriptor_qa_global_info;
#endif
VkPipelineCache global_pipeline_cache;
};
HRESULT d3d12_device_create(struct vkd3d_instance *instance,