vkd3d: Pass down required memory types to scratch allocators.

Separate scratch pools by their intended usage. Allows e.g. preprocess buffers to be
allocated differently from normal buffers. Potentially can also allow
for separate pools for host visible scratch memory etc down the line.

Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
This commit is contained in:
Hans-Kristian Arntzen 2022-01-07 13:47:17 +01:00
parent 206108bbf4
commit 4821a244ad
3 changed files with 142 additions and 64 deletions

View File

@ -1562,7 +1562,7 @@ static ULONG STDMETHODCALLTYPE d3d12_command_allocator_Release(ID3D12CommandAllo
{
struct d3d12_command_allocator *allocator = impl_from_ID3D12CommandAllocator(iface);
ULONG refcount = InterlockedDecrement(&allocator->refcount);
unsigned int i;
unsigned int i, j;
TRACE("%p decreasing refcount to %u.\n", allocator, refcount);
@ -1591,13 +1591,16 @@ static ULONG STDMETHODCALLTYPE d3d12_command_allocator_Release(ID3D12CommandAllo
vkd3d_free(allocator->command_buffers);
VK_CALL(vkDestroyCommandPool(device->vk_device, allocator->vk_command_pool, NULL));
for (i = 0; i < allocator->scratch_buffer_count; i++)
d3d12_device_return_scratch_buffer(device, &allocator->scratch_buffers[i]);
for (i = 0; i < VKD3D_SCRATCH_POOL_KIND_COUNT; i++)
{
for (j = 0; j < allocator->scratch_pools[i].scratch_buffer_count; j++)
d3d12_device_return_scratch_buffer(device, i, &allocator->scratch_pools[i].scratch_buffers[j]);
vkd3d_free(allocator->scratch_pools[i].scratch_buffers);
}
for (i = 0; i < allocator->query_pool_count; i++)
d3d12_device_return_query_pool(device, &allocator->query_pools[i]);
vkd3d_free(allocator->scratch_buffers);
vkd3d_free(allocator->query_pools);
vkd3d_free(allocator);
@ -1656,7 +1659,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_allocator_Reset(ID3D12CommandAllo
struct d3d12_device *device;
LONG pending;
VkResult vr;
size_t i;
size_t i, j;
TRACE("iface %p.\n", iface);
@ -1707,10 +1710,12 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_allocator_Reset(ID3D12CommandAllo
}
/* Return scratch buffers to the device */
for (i = 0; i < allocator->scratch_buffer_count; i++)
d3d12_device_return_scratch_buffer(device, &allocator->scratch_buffers[i]);
allocator->scratch_buffer_count = 0;
for (i = 0; i < VKD3D_SCRATCH_POOL_KIND_COUNT; i++)
{
for (j = 0; j < allocator->scratch_pools[i].scratch_buffer_count; j++)
d3d12_device_return_scratch_buffer(device, i, &allocator->scratch_pools[i].scratch_buffers[j]);
allocator->scratch_pools[i].scratch_buffer_count = 0;
}
/* Return query pools to the device */
for (i = 0; i < allocator->query_pool_count; i++)
@ -1846,9 +1851,7 @@ static HRESULT d3d12_command_allocator_init(struct d3d12_command_allocator *allo
allocator->command_buffers_size = 0;
allocator->command_buffer_count = 0;
allocator->scratch_buffers = NULL;
allocator->scratch_buffers_size = 0;
allocator->scratch_buffer_count = 0;
memset(allocator->scratch_pools, 0, sizeof(allocator->scratch_pools));
allocator->query_pools = NULL;
allocator->query_pools_size = 0;
@ -1898,8 +1901,11 @@ struct vkd3d_scratch_allocation
};
static bool d3d12_command_allocator_allocate_scratch_memory(struct d3d12_command_allocator *allocator,
VkDeviceSize size, VkDeviceSize alignment, struct vkd3d_scratch_allocation *allocation)
enum vkd3d_scratch_pool_kind kind,
VkDeviceSize size, VkDeviceSize alignment, uint32_t memory_types,
struct vkd3d_scratch_allocation *allocation)
{
struct d3d12_command_allocator_scratch_pool *pool = &allocator->scratch_pools[kind];
VkDeviceSize aligned_offset, aligned_size;
struct vkd3d_scratch_buffer *scratch;
unsigned int i;
@ -1907,9 +1913,14 @@ static bool d3d12_command_allocator_allocate_scratch_memory(struct d3d12_command
aligned_size = align(size, alignment);
/* Probe last block first since the others are likely full */
for (i = allocator->scratch_buffer_count; i; i--)
for (i = pool->scratch_buffer_count; i; i--)
{
scratch = &allocator->scratch_buffers[i - 1];
scratch = &pool->scratch_buffers[i - 1];
/* Extremely unlikely to fail since we have separate lists per pool kind, but to be 100% correct ... */
if (!(memory_types & (1u << scratch->allocation.device_allocation.vk_memory_type)))
continue;
aligned_offset = align(scratch->offset, alignment);
if (aligned_offset + aligned_size <= scratch->allocation.resource.size)
@ -1923,21 +1934,21 @@ static bool d3d12_command_allocator_allocate_scratch_memory(struct d3d12_command
}
}
if (!vkd3d_array_reserve((void**)&allocator->scratch_buffers, &allocator->scratch_buffers_size,
allocator->scratch_buffer_count + 1, sizeof(*allocator->scratch_buffers)))
if (!vkd3d_array_reserve((void**)&pool->scratch_buffers, &pool->scratch_buffers_size,
pool->scratch_buffer_count + 1, sizeof(*pool->scratch_buffers)))
{
ERR("Failed to allocate scratch buffer.\n");
return false;
}
scratch = &allocator->scratch_buffers[allocator->scratch_buffer_count];
if (FAILED(d3d12_device_get_scratch_buffer(allocator->device, aligned_size, scratch)))
scratch = &pool->scratch_buffers[pool->scratch_buffer_count];
if (FAILED(d3d12_device_get_scratch_buffer(allocator->device, kind, aligned_size, memory_types, scratch)))
{
ERR("Failed to create scratch buffer.\n");
return false;
}
allocator->scratch_buffer_count += 1;
pool->scratch_buffer_count += 1;
scratch->offset = aligned_size;
allocation->buffer = scratch->allocation.resource.vk_buffer;
@ -3322,7 +3333,8 @@ static bool d3d12_command_list_gather_pending_queries(struct d3d12_command_list
/* Allocate scratch buffer and resolve virtual Vulkan queries into it */
if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator,
resolve_buffer_size, max(ssbo_alignment, sizeof(uint64_t)), &resolve_buffer))
VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE,
resolve_buffer_size, max(ssbo_alignment, sizeof(uint64_t)), ~0u, &resolve_buffer))
goto cleanup;
for (i = 0; i < resolve_count; i++)
@ -3339,7 +3351,8 @@ static bool d3d12_command_list_gather_pending_queries(struct d3d12_command_list
entry_buffer_size = sizeof(struct query_entry) * list->pending_queries_count;
if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator,
entry_buffer_size, ssbo_alignment, &entry_buffer))
VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE,
entry_buffer_size, ssbo_alignment, ~0u, &entry_buffer))
goto cleanup;
for (i = 0; i < dispatch_count; i++)
@ -5516,7 +5529,8 @@ static bool d3d12_command_list_emit_predicated_command(struct d3d12_command_list
vkd3d_meta_get_predicate_pipeline(&list->device->meta_ops, command_type, &pipeline_info);
if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator,
pipeline_info.data_size, sizeof(uint32_t), scratch))
VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE,
pipeline_info.data_size, sizeof(uint32_t), ~0u, scratch))
return false;
d3d12_command_list_end_current_render_pass(list, true);
@ -9027,7 +9041,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(d3d12_command_li
if (resource)
{
if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator,
sizeof(uint32_t), sizeof(uint32_t), &scratch))
VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE,
sizeof(uint32_t), sizeof(uint32_t), ~0u, &scratch))
return;
begin_info.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT;
@ -9284,9 +9299,10 @@ static void d3d12_command_list_execute_indirect_state_template(
if (count_buffer)
{
if (FAILED(hr = d3d12_command_allocator_allocate_scratch_memory(list->allocator,
VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE,
sizeof(uint32_t),
list->device->device_info.device_generated_commands_properties_nv.minSequencesCountBufferOffsetAlignment,
&count_allocation)))
~0u, &count_allocation)))
{
WARN("Failed to allocate count memory.\n");
return;
@ -12284,9 +12300,10 @@ static HRESULT d3d12_command_signature_allocate_stream_memory_for_list(
struct vkd3d_scratch_allocation *allocation)
{
if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator,
VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE,
max_command_count * signature->state_template.stride,
list->device->device_info.device_generated_commands_properties_nv.minIndirectCommandsBufferOffsetAlignment,
allocation))
~0u, allocation))
return E_OUTOFMEMORY;
return S_OK;
@ -12322,12 +12339,13 @@ static HRESULT d3d12_command_signature_allocate_preprocess_memory_for_list(
VK_CALL(vkGetGeneratedCommandsMemoryRequirementsNV(list->device->vk_device, &info, &memory_info));
if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator, memory_info.memoryRequirements.size,
if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator,
VKD3D_SCRATCH_POOL_KIND_INDIRECT_PREPROCESS,
memory_info.memoryRequirements.size,
list->device->device_info.device_generated_commands_properties_nv.minIndirectCommandsBufferOffsetAlignment,
allocation))
memory_info.memoryRequirements.memoryTypeBits, allocation))
return E_OUTOFMEMORY;
/* Going to assume the memory type is okay ... It's device local after all. */
*size = memory_info.memoryRequirements.size;
return S_OK;
}

View File

@ -2412,22 +2412,50 @@ static void d3d12_remove_device_singleton(LUID luid)
}
}
static HRESULT d3d12_device_create_scratch_buffer(struct d3d12_device *device, VkDeviceSize size, struct vkd3d_scratch_buffer *scratch)
static HRESULT d3d12_device_create_scratch_buffer(struct d3d12_device *device, enum vkd3d_scratch_pool_kind kind,
VkDeviceSize size, uint32_t memory_types, struct vkd3d_scratch_buffer *scratch)
{
struct vkd3d_allocate_heap_memory_info alloc_info;
HRESULT hr;
TRACE("device %p, size %llu, scratch %p.\n", device, size, scratch);
memset(&alloc_info, 0, sizeof(alloc_info));
alloc_info.heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT;
alloc_info.heap_desc.SizeInBytes = size;
alloc_info.heap_desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
alloc_info.heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS;
if (kind == VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE)
{
struct vkd3d_allocate_heap_memory_info alloc_info;
if (FAILED(hr = vkd3d_allocate_heap_memory(device, &device->memory_allocator,
&alloc_info, &scratch->allocation)))
return hr;
/* We only care about memory types for INDIRECT_PREPROCESS. */
assert(memory_types == ~0u);
memset(&alloc_info, 0, sizeof(alloc_info));
alloc_info.heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT;
alloc_info.heap_desc.SizeInBytes = size;
alloc_info.heap_desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
alloc_info.heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS | D3D12_HEAP_FLAG_CREATE_NOT_ZEROED;
if (FAILED(hr = vkd3d_allocate_heap_memory(device, &device->memory_allocator,
&alloc_info, &scratch->allocation)))
return hr;
}
else if (kind == VKD3D_SCRATCH_POOL_KIND_INDIRECT_PREPROCESS)
{
struct vkd3d_allocate_memory_info alloc_info;
memset(&alloc_info, 0, sizeof(alloc_info));
alloc_info.heap_properties.Type = D3D12_HEAP_TYPE_DEFAULT;
alloc_info.memory_requirements.size = size;
alloc_info.memory_requirements.memoryTypeBits = memory_types;
alloc_info.heap_flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS | D3D12_HEAP_FLAG_CREATE_NOT_ZEROED;
alloc_info.optional_memory_properties = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
alloc_info.flags = VKD3D_ALLOCATION_FLAG_GLOBAL_BUFFER;
if (FAILED(hr = vkd3d_allocate_memory(device, &device->memory_allocator,
&alloc_info, &scratch->allocation)))
return hr;
}
else
{
return E_INVALIDARG;
}
scratch->offset = 0;
return S_OK;
@ -2440,35 +2468,47 @@ static void d3d12_device_destroy_scratch_buffer(struct d3d12_device *device, con
vkd3d_free_memory(device, &device->memory_allocator, &scratch->allocation);
}
HRESULT d3d12_device_get_scratch_buffer(struct d3d12_device *device, VkDeviceSize min_size, struct vkd3d_scratch_buffer *scratch)
HRESULT d3d12_device_get_scratch_buffer(struct d3d12_device *device, enum vkd3d_scratch_pool_kind kind,
VkDeviceSize min_size, uint32_t memory_types, struct vkd3d_scratch_buffer *scratch)
{
struct d3d12_device_scratch_pool *pool = &device->scratch_pools[kind];
struct vkd3d_scratch_buffer *candidate;
size_t i;
if (min_size > VKD3D_SCRATCH_BUFFER_SIZE)
return d3d12_device_create_scratch_buffer(device, min_size, scratch);
return d3d12_device_create_scratch_buffer(device, kind, min_size, memory_types, scratch);
pthread_mutex_lock(&device->mutex);
if (device->scratch_buffer_count)
for (i = pool->scratch_buffer_count; i; i--)
{
*scratch = device->scratch_buffers[--device->scratch_buffer_count];
scratch->offset = 0;
pthread_mutex_unlock(&device->mutex);
return S_OK;
}
else
{
pthread_mutex_unlock(&device->mutex);
return d3d12_device_create_scratch_buffer(device, VKD3D_SCRATCH_BUFFER_SIZE, scratch);
candidate = &pool->scratch_buffers[i - 1];
/* Extremely unlikely to fail since we have separate lists per pool kind, but to be 100% correct ... */
if (memory_types & (1u << candidate->allocation.device_allocation.vk_memory_type))
{
*scratch = *candidate;
scratch->offset = 0;
pool->scratch_buffers[i - 1] = pool->scratch_buffers[--pool->scratch_buffer_count];
pthread_mutex_unlock(&device->mutex);
return S_OK;
}
}
pthread_mutex_unlock(&device->mutex);
return d3d12_device_create_scratch_buffer(device, kind, VKD3D_SCRATCH_BUFFER_SIZE, memory_types, scratch);
}
void d3d12_device_return_scratch_buffer(struct d3d12_device *device, const struct vkd3d_scratch_buffer *scratch)
void d3d12_device_return_scratch_buffer(struct d3d12_device *device, enum vkd3d_scratch_pool_kind kind,
const struct vkd3d_scratch_buffer *scratch)
{
struct d3d12_device_scratch_pool *pool = &device->scratch_pools[kind];
pthread_mutex_lock(&device->mutex);
if (scratch->allocation.resource.size == VKD3D_SCRATCH_BUFFER_SIZE &&
device->scratch_buffer_count < VKD3D_SCRATCH_BUFFER_COUNT)
pool->scratch_buffer_count < VKD3D_SCRATCH_BUFFER_COUNT)
{
device->scratch_buffers[device->scratch_buffer_count++] = *scratch;
pool->scratch_buffers[pool->scratch_buffer_count++] = *scratch;
pthread_mutex_unlock(&device->mutex);
}
else
@ -2719,10 +2759,11 @@ static void d3d12_device_global_pipeline_cache_cleanup(struct d3d12_device *devi
static void d3d12_device_destroy(struct d3d12_device *device)
{
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
size_t i;
size_t i, j;
for (i = 0; i < device->scratch_buffer_count; i++)
d3d12_device_destroy_scratch_buffer(device, &device->scratch_buffers[i]);
for (i = 0; i < VKD3D_SCRATCH_POOL_KIND_COUNT; i++)
for (j = 0; j < device->scratch_pools[i].scratch_buffer_count; j++)
d3d12_device_destroy_scratch_buffer(device, &device->scratch_pools[i].scratch_buffers[j]);
for (i = 0; i < device->query_pool_count; i++)
d3d12_device_destroy_query_pool(device, &device->query_pools[i]);

View File

@ -1704,6 +1704,20 @@ struct vkd3d_query_pool
uint32_t next_index;
};
struct d3d12_command_allocator_scratch_pool
{
struct vkd3d_scratch_buffer *scratch_buffers;
size_t scratch_buffers_size;
size_t scratch_buffer_count;
};
enum vkd3d_scratch_pool_kind
{
VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE = 0,
VKD3D_SCRATCH_POOL_KIND_INDIRECT_PREPROCESS,
VKD3D_SCRATCH_POOL_KIND_COUNT
};
/* ID3D12CommandAllocator */
struct d3d12_command_allocator
{
@ -1737,9 +1751,7 @@ struct d3d12_command_allocator
size_t command_buffers_size;
size_t command_buffer_count;
struct vkd3d_scratch_buffer *scratch_buffers;
size_t scratch_buffers_size;
size_t scratch_buffer_count;
struct d3d12_command_allocator_scratch_pool scratch_pools[VKD3D_SCRATCH_POOL_KIND_COUNT];
struct vkd3d_query_pool *query_pools;
size_t query_pools_size;
@ -2891,6 +2903,12 @@ struct vkd3d_descriptor_qa_heap_buffer_data;
/* ID3D12DeviceExt */
typedef ID3D12DeviceExt d3d12_device_vkd3d_ext_iface;
struct d3d12_device_scratch_pool
{
struct vkd3d_scratch_buffer scratch_buffers[VKD3D_SCRATCH_BUFFER_COUNT];
size_t scratch_buffer_count;
};
struct d3d12_device
{
d3d12_device_iface ID3D12Device_iface;
@ -2926,8 +2944,7 @@ struct d3d12_device
struct vkd3d_memory_allocator memory_allocator;
struct vkd3d_scratch_buffer scratch_buffers[VKD3D_SCRATCH_BUFFER_COUNT];
size_t scratch_buffer_count;
struct d3d12_device_scratch_pool scratch_pools[VKD3D_SCRATCH_POOL_KIND_COUNT];
struct vkd3d_query_pool query_pools[VKD3D_VIRTUAL_QUERY_POOL_COUNT];
size_t query_pool_count;
@ -2989,8 +3006,10 @@ static inline struct d3d12_device *impl_from_ID3D12Device(d3d12_device_iface *if
bool d3d12_device_validate_shader_meta(struct d3d12_device *device, const struct vkd3d_shader_meta *meta);
HRESULT d3d12_device_get_scratch_buffer(struct d3d12_device *device, VkDeviceSize min_size, struct vkd3d_scratch_buffer *scratch);
void d3d12_device_return_scratch_buffer(struct d3d12_device *device, const struct vkd3d_scratch_buffer *scratch);
HRESULT d3d12_device_get_scratch_buffer(struct d3d12_device *device, enum vkd3d_scratch_pool_kind kind,
VkDeviceSize min_size, uint32_t memory_types, struct vkd3d_scratch_buffer *scratch);
void d3d12_device_return_scratch_buffer(struct d3d12_device *device, enum vkd3d_scratch_pool_kind kind,
const struct vkd3d_scratch_buffer *scratch);
HRESULT d3d12_device_get_query_pool(struct d3d12_device *device, uint32_t type_index, struct vkd3d_query_pool *pool);
void d3d12_device_return_query_pool(struct d3d12_device *device, const struct vkd3d_query_pool *pool);