cache: Handle ref-count rules for multiple LoadPipeline/StorePipeline.

In pipeline libraries, the library holds on to private references of the
libraries so that they can be rapidly loaded on-demand.

This behavior is verifed by API tests.

Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
This commit is contained in:
Hans-Kristian Arntzen 2022-03-08 16:13:48 +01:00
parent cc08339624
commit c6149b47cd
3 changed files with 121 additions and 17 deletions

View File

@ -34,6 +34,10 @@ struct vkd3d_cached_pipeline_data
const void *blob;
size_t blob_length;
size_t is_new; /* Avoid padding issues. */
/* Need to internally hold a PSO and hand out the same one on subsequent LoadLibrary.
* This is a good performance boost for applications which load PSOs from library directly
* multiple times throughout the lifetime of an application. */
struct d3d12_pipeline_state *state;
};
struct vkd3d_cached_pipeline_entry
@ -642,6 +646,7 @@ static void vkd3d_shader_code_serialize_referenced(struct d3d12_pipeline_library
entry.key.name_length = 0;
entry.key.name = NULL;
entry.data.is_new = 1;
entry.data.state = NULL;
wrapped_varint_size = sizeof(struct vkd3d_pipeline_blob_chunk_spirv) + varint_size;
entry.data.blob_length = sizeof(*internal) + wrapped_varint_size;
@ -750,6 +755,7 @@ static VkResult vkd3d_serialize_pipeline_state_referenced(struct d3d12_pipeline_
entry.key.name_length = 0;
entry.key.name = NULL;
entry.data.is_new = 1;
entry.data.state = NULL;
if (state->vk_pso_cache)
{
@ -1041,10 +1047,16 @@ static void d3d12_pipeline_library_cleanup_map(struct hash_map *map)
{
struct vkd3d_cached_pipeline_entry *e = (struct vkd3d_cached_pipeline_entry*)hash_map_get_entry(map, i);
if ((e->entry.flags & HASH_MAP_ENTRY_OCCUPIED) && e->data.is_new)
if (e->entry.flags & HASH_MAP_ENTRY_OCCUPIED)
{
vkd3d_free((void*)e->key.name);
vkd3d_free((void*)e->data.blob);
if (e->data.is_new)
{
vkd3d_free((void*)e->key.name);
vkd3d_free((void*)e->data.blob);
}
if (e->data.state)
d3d12_pipeline_state_dec_ref(e->data.state);
}
}
@ -1218,6 +1230,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_pipeline_library_StorePipeline(d3d12_pipe
entry.data.blob = new_blob;
entry.data.is_new = 1;
entry.data.state = pipeline_state;
if (!d3d12_pipeline_library_insert_hash_map_blob(pipeline_library, &pipeline_library->pso_map, &entry))
{
@ -1227,6 +1240,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_pipeline_library_StorePipeline(d3d12_pipe
return E_OUTOFMEMORY;
}
/* If we get a subsequent LoadLibrary, we have to hand it back out again.
* API tests inform us that we need internal ref-count here. */
d3d12_pipeline_state_inc_ref(pipeline_state);
rwlock_unlock_write(&pipeline_library->mutex);
return S_OK;
}
@ -1234,8 +1251,13 @@ static HRESULT STDMETHODCALLTYPE d3d12_pipeline_library_StorePipeline(d3d12_pipe
static HRESULT d3d12_pipeline_library_load_pipeline(struct d3d12_pipeline_library *pipeline_library, LPCWSTR name,
VkPipelineBindPoint bind_point, struct d3d12_pipeline_state_desc *desc, struct d3d12_pipeline_state **state)
{
struct vkd3d_pipeline_cache_compatibility pipeline_cache_compat;
const struct vkd3d_cached_pipeline_entry *e;
struct d3d12_pipeline_state *existing_state;
struct d3d12_root_signature *root_signature;
struct d3d12_pipeline_state *cached_state;
struct vkd3d_cached_pipeline_key key;
HRESULT hr;
int rc;
if ((rc = rwlock_lock_read(&pipeline_library->mutex)))
@ -1254,12 +1276,82 @@ static HRESULT d3d12_pipeline_library_load_pipeline(struct d3d12_pipeline_librar
return E_INVALIDARG;
}
desc->cached_pso.blob.CachedBlobSizeInBytes = e->data.blob_length;
desc->cached_pso.blob.pCachedBlob = e->data.blob;
desc->cached_pso.library = pipeline_library;
rwlock_unlock_read(&pipeline_library->mutex);
/* Docs say that applications have to consider thread safety here:
* https://docs.microsoft.com/en-us/windows/win32/api/d3d12/nf-d3d12-id3d12device1-createpipelinelibrary#thread-safety.
* However, it seems questionable to rely on that, so just do cmpxchg replacements. */
cached_state = vkd3d_atomic_ptr_load_explicit(&e->data.state, vkd3d_memory_order_acquire);
return d3d12_pipeline_state_create(pipeline_library->device, bind_point, desc, state);
if (cached_state)
{
rwlock_unlock_read(&pipeline_library->mutex);
/* If we have handed out the PSO once, just need to do a quick validation. */
memset(&pipeline_cache_compat, 0, sizeof(pipeline_cache_compat));
vkd3d_pipeline_cache_compat_from_state_desc(&pipeline_cache_compat, desc);
if (desc->root_signature)
{
root_signature = impl_from_ID3D12RootSignature(desc->root_signature);
if (root_signature)
pipeline_cache_compat.root_signature_compat_hash = root_signature->compatibility_hash;
}
else if (!cached_state->private_root_signature)
{
/* If we have no explicit root signature and the existing PSO didn't either,
* just inherit the compat hash from PSO to avoid comparing them. */
pipeline_cache_compat.root_signature_compat_hash = cached_state->pipeline_cache_compat.root_signature_compat_hash;
}
if (memcmp(&pipeline_cache_compat, &cached_state->pipeline_cache_compat, sizeof(pipeline_cache_compat)) != 0)
{
if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_LOG)
INFO("Attempt to load existing PSO from library, but failed argument validation.\n");
return E_INVALIDARG;
}
if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_LOG)
INFO("Handing out existing pipeline state object.\n");
*state = cached_state;
d3d12_pipeline_state_inc_public_ref(cached_state);
return S_OK;
}
else
{
desc->cached_pso.blob.CachedBlobSizeInBytes = e->data.blob_length;
desc->cached_pso.blob.pCachedBlob = e->data.blob;
desc->cached_pso.library = pipeline_library;
rwlock_unlock_read(&pipeline_library->mutex);
/* Don't hold locks while creating pipeline, it takes *some* time to validate and decompress stuff,
* and in heavily multi-threaded scenarios we want to go as wide as we can. */
if (FAILED(hr = d3d12_pipeline_state_create(pipeline_library->device, bind_point, desc, &cached_state)))
return hr;
/* These really should not fail ... */
rwlock_lock_read(&pipeline_library->mutex);
e = (const struct vkd3d_cached_pipeline_entry*)hash_map_find(&pipeline_library->pso_map, &key);
existing_state = vkd3d_atomic_ptr_compare_exchange(&e->data.state, NULL, cached_state,
vkd3d_memory_order_acq_rel, vkd3d_memory_order_acquire);
rwlock_unlock_read(&pipeline_library->mutex);
if (!existing_state)
{
/* Successfully replaced. */
d3d12_pipeline_state_inc_ref(cached_state);
*state = cached_state;
}
else
{
/* Other thread ended up winning while we were creating the PSO.
* This shouldn't be legal D3D12 API usage according to docs, but be safe ... */
WARN("Race condition detected.\n");
d3d12_pipeline_state_dec_ref(cached_state);
d3d12_pipeline_state_inc_public_ref(existing_state);
*state = existing_state;
}
return S_OK;
}
}
static HRESULT STDMETHODCALLTYPE d3d12_pipeline_library_LoadGraphicsPipeline(d3d12_pipeline_library_iface *iface,
@ -1557,6 +1649,7 @@ static HRESULT d3d12_pipeline_library_unserialize_hash_map(
entry.data.blob_length = toc_entry->blob_length;
entry.data.blob = serialized_data_base + toc_entry->blob_offset;
entry.data.is_new = 0;
entry.data.state = NULL;
if (!d3d12_pipeline_library_insert_hash_map_blob(pipeline_library, map, &entry))
return E_OUTOFMEMORY;

View File

@ -1745,16 +1745,23 @@ void d3d12_pipeline_state_inc_ref(struct d3d12_pipeline_state *state)
InterlockedIncrement(&state->internal_refcount);
}
ULONG d3d12_pipeline_state_inc_public_ref(struct d3d12_pipeline_state *state)
{
ULONG refcount = InterlockedIncrement(&state->refcount);
if (refcount == 1)
{
d3d12_pipeline_state_inc_ref(state);
/* Bring device reference back to life. */
d3d12_device_add_ref(state->device);
}
TRACE("%p increasing refcount to %u.\n", state, refcount);
return refcount;
}
static ULONG STDMETHODCALLTYPE d3d12_pipeline_state_AddRef(ID3D12PipelineState *iface)
{
struct d3d12_pipeline_state *state = impl_from_ID3D12PipelineState(iface);
ULONG refcount = InterlockedIncrement(&state->refcount);
if (refcount == 1)
d3d12_pipeline_state_inc_ref(state);
TRACE("%p increasing refcount to %u.\n", state, refcount);
return refcount;
return d3d12_pipeline_state_inc_public_ref(state);
}
static HRESULT d3d12_pipeline_state_create_shader_module(struct d3d12_device *device,
@ -1874,20 +1881,23 @@ void d3d12_pipeline_state_dec_ref(struct d3d12_pipeline_state *state)
ID3D12RootSignature_Release(state->private_root_signature);
vkd3d_free(state);
d3d12_device_release(device);
}
}
static ULONG STDMETHODCALLTYPE d3d12_pipeline_state_Release(ID3D12PipelineState *iface)
{
struct d3d12_pipeline_state *state = impl_from_ID3D12PipelineState(iface);
struct d3d12_device *device = state->device;
ULONG refcount = InterlockedDecrement(&state->refcount);
TRACE("%p decreasing refcount to %u.\n", state, refcount);
if (!refcount)
{
d3d12_pipeline_state_dec_ref(state);
/* When public ref-count hits zero, we have to release the device too. */
d3d12_device_release(device);
}
return refcount;
}

View File

@ -1559,6 +1559,7 @@ static inline bool d3d12_graphics_pipeline_state_has_unknown_dsv_format(
}
/* Private ref counts, for pipeline library. */
ULONG d3d12_pipeline_state_inc_public_ref(struct d3d12_pipeline_state *state);
void d3d12_pipeline_state_inc_ref(struct d3d12_pipeline_state *state);
void d3d12_pipeline_state_dec_ref(struct d3d12_pipeline_state *state);