vkd3d: Pass down required memory types to scratch allocators.

Separate scratch pools by their intended usage.
Allows e.g. preprocess buffers to be
allocated differently from normal buffers, which is necessary on
implementations that use special memory types to implement preprocess
buffers.

Potentially can also allow for separate pools for
host visible scratch memory down the line.

Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
This commit is contained in:
Hans-Kristian Arntzen 2022-01-07 13:47:17 +01:00
parent cecb8d6ebc
commit 619a54810d
3 changed files with 138 additions and 61 deletions

View File

@ -1553,7 +1553,7 @@ static ULONG STDMETHODCALLTYPE d3d12_command_allocator_Release(ID3D12CommandAllo
{
struct d3d12_command_allocator *allocator = impl_from_ID3D12CommandAllocator(iface);
ULONG refcount = InterlockedDecrement(&allocator->refcount);
unsigned int i;
unsigned int i, j;
TRACE("%p decreasing refcount to %u.\n", allocator, refcount);
@ -1609,13 +1609,16 @@ static ULONG STDMETHODCALLTYPE d3d12_command_allocator_Release(ID3D12CommandAllo
vkd3d_free(allocator->command_buffers);
VK_CALL(vkDestroyCommandPool(device->vk_device, allocator->vk_command_pool, NULL));
for (i = 0; i < allocator->scratch_buffer_count; i++)
d3d12_device_return_scratch_buffer(device, &allocator->scratch_buffers[i]);
for (i = 0; i < VKD3D_SCRATCH_POOL_KIND_COUNT; i++)
{
for (j = 0; j < allocator->scratch_pools[i].scratch_buffer_count; j++)
d3d12_device_return_scratch_buffer(device, i, &allocator->scratch_pools[i].scratch_buffers[j]);
vkd3d_free(allocator->scratch_pools[i].scratch_buffers);
}
for (i = 0; i < allocator->query_pool_count; i++)
d3d12_device_return_query_pool(device, &allocator->query_pools[i]);
vkd3d_free(allocator->scratch_buffers);
vkd3d_free(allocator->query_pools);
#ifdef VKD3D_ENABLE_BREADCRUMBS
@ -1684,7 +1687,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_allocator_Reset(ID3D12CommandAllo
struct d3d12_device *device;
LONG pending;
VkResult vr;
size_t i;
size_t i, j;
TRACE("iface %p.\n", iface);
@ -1735,8 +1738,12 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_allocator_Reset(ID3D12CommandAllo
}
/* Return scratch buffers to the device */
for (i = 0; i < allocator->scratch_buffer_count; i++)
d3d12_device_return_scratch_buffer(device, &allocator->scratch_buffers[i]);
for (i = 0; i < VKD3D_SCRATCH_POOL_KIND_COUNT; i++)
{
for (j = 0; j < allocator->scratch_pools[i].scratch_buffer_count; j++)
d3d12_device_return_scratch_buffer(device, i, &allocator->scratch_pools[i].scratch_buffers[j]);
allocator->scratch_pools[i].scratch_buffer_count = 0;
}
#ifdef VKD3D_ENABLE_BREADCRUMBS
if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS)
@ -1748,8 +1755,6 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_allocator_Reset(ID3D12CommandAllo
}
#endif
allocator->scratch_buffer_count = 0;
/* Return query pools to the device */
for (i = 0; i < allocator->query_pool_count; i++)
d3d12_device_return_query_pool(device, &allocator->query_pools[i]);
@ -1907,9 +1912,7 @@ static HRESULT d3d12_command_allocator_init(struct d3d12_command_allocator *allo
allocator->command_buffers_size = 0;
allocator->command_buffer_count = 0;
allocator->scratch_buffers = NULL;
allocator->scratch_buffers_size = 0;
allocator->scratch_buffer_count = 0;
memset(allocator->scratch_pools, 0, sizeof(allocator->scratch_pools));
allocator->query_pools = NULL;
allocator->query_pools_size = 0;
@ -1959,8 +1962,11 @@ struct vkd3d_scratch_allocation
};
static bool d3d12_command_allocator_allocate_scratch_memory(struct d3d12_command_allocator *allocator,
VkDeviceSize size, VkDeviceSize alignment, struct vkd3d_scratch_allocation *allocation)
enum vkd3d_scratch_pool_kind kind,
VkDeviceSize size, VkDeviceSize alignment, uint32_t memory_types,
struct vkd3d_scratch_allocation *allocation)
{
struct d3d12_command_allocator_scratch_pool *pool = &allocator->scratch_pools[kind];
VkDeviceSize aligned_offset, aligned_size;
struct vkd3d_scratch_buffer *scratch;
unsigned int i;
@ -1968,9 +1974,14 @@ static bool d3d12_command_allocator_allocate_scratch_memory(struct d3d12_command
aligned_size = align(size, alignment);
/* Probe last block first since the others are likely full */
for (i = allocator->scratch_buffer_count; i; i--)
for (i = pool->scratch_buffer_count; i; i--)
{
scratch = &allocator->scratch_buffers[i - 1];
scratch = &pool->scratch_buffers[i - 1];
/* Extremely unlikely to fail since we have separate lists per pool kind, but to be 100% correct ... */
if (!(memory_types & (1u << scratch->allocation.device_allocation.vk_memory_type)))
continue;
aligned_offset = align(scratch->offset, alignment);
if (aligned_offset + aligned_size <= scratch->allocation.resource.size)
@ -1984,21 +1995,21 @@ static bool d3d12_command_allocator_allocate_scratch_memory(struct d3d12_command
}
}
if (!vkd3d_array_reserve((void**)&allocator->scratch_buffers, &allocator->scratch_buffers_size,
allocator->scratch_buffer_count + 1, sizeof(*allocator->scratch_buffers)))
if (!vkd3d_array_reserve((void**)&pool->scratch_buffers, &pool->scratch_buffers_size,
pool->scratch_buffer_count + 1, sizeof(*pool->scratch_buffers)))
{
ERR("Failed to allocate scratch buffer.\n");
return false;
}
scratch = &allocator->scratch_buffers[allocator->scratch_buffer_count];
if (FAILED(d3d12_device_get_scratch_buffer(allocator->device, aligned_size, scratch)))
scratch = &pool->scratch_buffers[pool->scratch_buffer_count];
if (FAILED(d3d12_device_get_scratch_buffer(allocator->device, kind, aligned_size, memory_types, scratch)))
{
ERR("Failed to create scratch buffer.\n");
return false;
}
allocator->scratch_buffer_count += 1;
pool->scratch_buffer_count += 1;
scratch->offset = aligned_size;
allocation->buffer = scratch->allocation.resource.vk_buffer;
@ -3387,7 +3398,8 @@ static bool d3d12_command_list_gather_pending_queries(struct d3d12_command_list
/* Allocate scratch buffer and resolve virtual Vulkan queries into it */
if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator,
resolve_buffer_size, max(ssbo_alignment, sizeof(uint64_t)), &resolve_buffer))
VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE,
resolve_buffer_size, max(ssbo_alignment, sizeof(uint64_t)), ~0u, &resolve_buffer))
goto cleanup;
for (i = 0; i < resolve_count; i++)
@ -3404,7 +3416,8 @@ static bool d3d12_command_list_gather_pending_queries(struct d3d12_command_list
entry_buffer_size = sizeof(struct query_entry) * list->pending_queries_count;
if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator,
entry_buffer_size, ssbo_alignment, &entry_buffer))
VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE,
entry_buffer_size, ssbo_alignment, ~0u, &entry_buffer))
goto cleanup;
for (i = 0; i < dispatch_count; i++)
@ -5407,7 +5420,8 @@ static bool d3d12_command_list_emit_predicated_command(struct d3d12_command_list
vkd3d_meta_get_predicate_pipeline(&list->device->meta_ops, command_type, &pipeline_info);
if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator,
pipeline_info.data_size, sizeof(uint32_t), scratch))
VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE,
pipeline_info.data_size, sizeof(uint32_t), ~0u, scratch))
return false;
d3d12_command_list_end_current_render_pass(list, true);
@ -8541,7 +8555,8 @@ static void d3d12_command_list_clear_uav_with_copy(struct d3d12_command_list *li
scratch_buffer_size = element_count * format->byte_count;
if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator,
scratch_buffer_size, 16, &scratch))
VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE,
scratch_buffer_size, 16, ~0u, &scratch))
{
ERR("Failed to allocate scratch memory for UAV clear.\n");
return;
@ -9388,7 +9403,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(d3d12_command_li
if (resource)
{
if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator,
sizeof(uint32_t), sizeof(uint32_t), &scratch))
VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE,
sizeof(uint32_t), sizeof(uint32_t), ~0u, &scratch))
return;
begin_info.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT;

View File

@ -2539,23 +2539,52 @@ static void d3d12_remove_device_singleton(LUID luid)
}
}
static HRESULT d3d12_device_create_scratch_buffer(struct d3d12_device *device, VkDeviceSize size, struct vkd3d_scratch_buffer *scratch)
static HRESULT d3d12_device_create_scratch_buffer(struct d3d12_device *device, enum vkd3d_scratch_pool_kind kind,
VkDeviceSize size, uint32_t memory_types, struct vkd3d_scratch_buffer *scratch)
{
struct vkd3d_allocate_heap_memory_info alloc_info;
HRESULT hr;
TRACE("device %p, size %llu, scratch %p.\n", device, size, scratch);
memset(&alloc_info, 0, sizeof(alloc_info));
alloc_info.heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT;
alloc_info.heap_desc.SizeInBytes = size;
alloc_info.heap_desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
alloc_info.heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS;
alloc_info.extra_allocation_flags = VKD3D_ALLOCATION_FLAG_INTERNAL_SCRATCH;
if (kind == VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE)
{
struct vkd3d_allocate_heap_memory_info alloc_info;
if (FAILED(hr = vkd3d_allocate_heap_memory(device, &device->memory_allocator,
&alloc_info, &scratch->allocation)))
return hr;
/* We only care about memory types for INDIRECT_PREPROCESS. */
assert(memory_types == ~0u);
memset(&alloc_info, 0, sizeof(alloc_info));
alloc_info.heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT;
alloc_info.heap_desc.SizeInBytes = size;
alloc_info.heap_desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
alloc_info.heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS | D3D12_HEAP_FLAG_CREATE_NOT_ZEROED;
alloc_info.extra_allocation_flags = VKD3D_ALLOCATION_FLAG_INTERNAL_SCRATCH;
if (FAILED(hr = vkd3d_allocate_heap_memory(device, &device->memory_allocator,
&alloc_info, &scratch->allocation)))
return hr;
}
else if (kind == VKD3D_SCRATCH_POOL_KIND_INDIRECT_PREPROCESS)
{
struct vkd3d_allocate_memory_info alloc_info;
memset(&alloc_info, 0, sizeof(alloc_info));
alloc_info.heap_properties.Type = D3D12_HEAP_TYPE_DEFAULT;
alloc_info.memory_requirements.size = size;
alloc_info.memory_requirements.memoryTypeBits = memory_types;
alloc_info.memory_requirements.alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
alloc_info.heap_flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS | D3D12_HEAP_FLAG_CREATE_NOT_ZEROED;
alloc_info.optional_memory_properties = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
alloc_info.flags = VKD3D_ALLOCATION_FLAG_GLOBAL_BUFFER | VKD3D_ALLOCATION_FLAG_INTERNAL_SCRATCH;
if (FAILED(hr = vkd3d_allocate_memory(device, &device->memory_allocator,
&alloc_info, &scratch->allocation)))
return hr;
}
else
{
return E_INVALIDARG;
}
scratch->offset = 0;
return S_OK;
@ -2568,35 +2597,47 @@ static void d3d12_device_destroy_scratch_buffer(struct d3d12_device *device, con
vkd3d_free_memory(device, &device->memory_allocator, &scratch->allocation);
}
HRESULT d3d12_device_get_scratch_buffer(struct d3d12_device *device, VkDeviceSize min_size, struct vkd3d_scratch_buffer *scratch)
HRESULT d3d12_device_get_scratch_buffer(struct d3d12_device *device, enum vkd3d_scratch_pool_kind kind,
VkDeviceSize min_size, uint32_t memory_types, struct vkd3d_scratch_buffer *scratch)
{
struct d3d12_device_scratch_pool *pool = &device->scratch_pools[kind];
struct vkd3d_scratch_buffer *candidate;
size_t i;
if (min_size > VKD3D_SCRATCH_BUFFER_SIZE)
return d3d12_device_create_scratch_buffer(device, min_size, scratch);
return d3d12_device_create_scratch_buffer(device, kind, min_size, memory_types, scratch);
pthread_mutex_lock(&device->mutex);
if (device->scratch_buffer_count)
for (i = pool->scratch_buffer_count; i; i--)
{
*scratch = device->scratch_buffers[--device->scratch_buffer_count];
scratch->offset = 0;
pthread_mutex_unlock(&device->mutex);
return S_OK;
}
else
{
pthread_mutex_unlock(&device->mutex);
return d3d12_device_create_scratch_buffer(device, VKD3D_SCRATCH_BUFFER_SIZE, scratch);
candidate = &pool->scratch_buffers[i - 1];
/* Extremely unlikely to fail since we have separate lists per pool kind, but to be 100% correct ... */
if (memory_types & (1u << candidate->allocation.device_allocation.vk_memory_type))
{
*scratch = *candidate;
scratch->offset = 0;
pool->scratch_buffers[i - 1] = pool->scratch_buffers[--pool->scratch_buffer_count];
pthread_mutex_unlock(&device->mutex);
return S_OK;
}
}
pthread_mutex_unlock(&device->mutex);
return d3d12_device_create_scratch_buffer(device, kind, VKD3D_SCRATCH_BUFFER_SIZE, memory_types, scratch);
}
void d3d12_device_return_scratch_buffer(struct d3d12_device *device, const struct vkd3d_scratch_buffer *scratch)
void d3d12_device_return_scratch_buffer(struct d3d12_device *device, enum vkd3d_scratch_pool_kind kind,
const struct vkd3d_scratch_buffer *scratch)
{
struct d3d12_device_scratch_pool *pool = &device->scratch_pools[kind];
pthread_mutex_lock(&device->mutex);
if (scratch->allocation.resource.size == VKD3D_SCRATCH_BUFFER_SIZE &&
device->scratch_buffer_count < VKD3D_SCRATCH_BUFFER_COUNT)
pool->scratch_buffer_count < VKD3D_SCRATCH_BUFFER_COUNT)
{
device->scratch_buffers[device->scratch_buffer_count++] = *scratch;
pool->scratch_buffers[pool->scratch_buffer_count++] = *scratch;
pthread_mutex_unlock(&device->mutex);
}
else
@ -2815,10 +2856,11 @@ static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(d3d12_device_iface *iface)
static void d3d12_device_destroy(struct d3d12_device *device)
{
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
size_t i;
size_t i, j;
for (i = 0; i < device->scratch_buffer_count; i++)
d3d12_device_destroy_scratch_buffer(device, &device->scratch_buffers[i]);
for (i = 0; i < VKD3D_SCRATCH_POOL_KIND_COUNT; i++)
for (j = 0; j < device->scratch_pools[i].scratch_buffer_count; j++)
d3d12_device_destroy_scratch_buffer(device, &device->scratch_pools[i].scratch_buffers[j]);
for (i = 0; i < device->query_pool_count; i++)
d3d12_device_destroy_query_pool(device, &device->query_pools[i]);

View File

@ -1842,6 +1842,20 @@ struct vkd3d_query_pool
uint32_t next_index;
};
struct d3d12_command_allocator_scratch_pool
{
struct vkd3d_scratch_buffer *scratch_buffers;
size_t scratch_buffers_size;
size_t scratch_buffer_count;
};
enum vkd3d_scratch_pool_kind
{
VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE = 0,
VKD3D_SCRATCH_POOL_KIND_INDIRECT_PREPROCESS,
VKD3D_SCRATCH_POOL_KIND_COUNT
};
/* ID3D12CommandAllocator */
struct d3d12_command_allocator
{
@ -1868,9 +1882,7 @@ struct d3d12_command_allocator
size_t command_buffers_size;
size_t command_buffer_count;
struct vkd3d_scratch_buffer *scratch_buffers;
size_t scratch_buffers_size;
size_t scratch_buffer_count;
struct d3d12_command_allocator_scratch_pool scratch_pools[VKD3D_SCRATCH_POOL_KIND_COUNT];
struct vkd3d_query_pool *query_pools;
size_t query_pools_size;
@ -3185,6 +3197,12 @@ struct vkd3d_descriptor_qa_heap_buffer_data;
/* ID3D12DeviceExt */
typedef ID3D12DeviceExt d3d12_device_vkd3d_ext_iface;
struct d3d12_device_scratch_pool
{
struct vkd3d_scratch_buffer scratch_buffers[VKD3D_SCRATCH_BUFFER_COUNT];
size_t scratch_buffer_count;
};
struct d3d12_device
{
d3d12_device_iface ID3D12Device_iface;
@ -3219,8 +3237,7 @@ struct d3d12_device
struct vkd3d_memory_allocator memory_allocator;
struct vkd3d_scratch_buffer scratch_buffers[VKD3D_SCRATCH_BUFFER_COUNT];
size_t scratch_buffer_count;
struct d3d12_device_scratch_pool scratch_pools[VKD3D_SCRATCH_POOL_KIND_COUNT];
struct vkd3d_query_pool query_pools[VKD3D_VIRTUAL_QUERY_POOL_COUNT];
size_t query_pool_count;
@ -3293,8 +3310,10 @@ static inline struct d3d12_device *impl_from_ID3D12Device(d3d12_device_iface *if
bool d3d12_device_validate_shader_meta(struct d3d12_device *device, const struct vkd3d_shader_meta *meta);
HRESULT d3d12_device_get_scratch_buffer(struct d3d12_device *device, VkDeviceSize min_size, struct vkd3d_scratch_buffer *scratch);
void d3d12_device_return_scratch_buffer(struct d3d12_device *device, const struct vkd3d_scratch_buffer *scratch);
HRESULT d3d12_device_get_scratch_buffer(struct d3d12_device *device, enum vkd3d_scratch_pool_kind kind,
VkDeviceSize min_size, uint32_t memory_types, struct vkd3d_scratch_buffer *scratch);
void d3d12_device_return_scratch_buffer(struct d3d12_device *device, enum vkd3d_scratch_pool_kind kind,
const struct vkd3d_scratch_buffer *scratch);
HRESULT d3d12_device_get_query_pool(struct d3d12_device *device, uint32_t type_index, struct vkd3d_query_pool *pool);
void d3d12_device_return_query_pool(struct d3d12_device *device, const struct vkd3d_query_pool *pool);