vkd3d: Suballocate small allocations from larger chunks.

This is necessary to keep the amount of allocated memory manageable
in games that allocate a lot of small heaps or committed resources.

Signed-off-by: Philip Rebohle <philip.rebohle@tu-dortmund.de>
This commit is contained in:
Philip Rebohle 2021-01-27 20:41:46 +01:00 committed by Hans-Kristian Arntzen
parent eaab2388b1
commit 8f6e94dc30
2 changed files with 361 additions and 5 deletions

View File

@ -55,6 +55,20 @@ static uint32_t vkd3d_select_memory_types(struct d3d12_device *device, const D3D
return type_mask;
}
static uint32_t vkd3d_find_memory_types_with_flags(struct d3d12_device *device, VkMemoryPropertyFlags type_flags)
{
const VkPhysicalDeviceMemoryProperties *memory_info = &device->memory_properties;
uint32_t i, mask = 0;
for (i = 0; i < memory_info->memoryTypeCount; i++)
{
if ((memory_info->memoryTypes[i].propertyFlags & type_flags) == type_flags)
mask |= 1u << i;
}
return mask;
}
static HRESULT vkd3d_select_memory_flags(struct d3d12_device *device, const D3D12_HEAP_PROPERTIES *heap_properties, VkMemoryPropertyFlags *type_flags)
{
switch (heap_properties->Type)
@ -380,6 +394,202 @@ static HRESULT vkd3d_memory_allocation_init(struct vkd3d_memory_allocation *allo
return S_OK;
}
static void vkd3d_memory_chunk_insert_range(struct vkd3d_memory_chunk *chunk,
size_t index, VkDeviceSize offset, VkDeviceSize length)
{
if (!vkd3d_array_reserve((void**)&chunk->free_ranges, &chunk->free_ranges_size,
chunk->free_ranges_count + 1, sizeof(*chunk->free_ranges)))
{
ERR("Failed to insert free range.\n");
return;
}
memmove(&chunk->free_ranges[index + 1], &chunk->free_ranges[index],
sizeof(*chunk->free_ranges) * (chunk->free_ranges_count - index));
chunk->free_ranges[index].offset = offset;
chunk->free_ranges[index].length = length;
chunk->free_ranges_count++;
}
static void vkd3d_memory_chunk_remove_range(struct vkd3d_memory_chunk *chunk, size_t index)
{
chunk->free_ranges_count--;
memmove(&chunk->free_ranges[index], &chunk->free_ranges[index + 1],
sizeof(*chunk->free_ranges) * (chunk->free_ranges_count - index));
}
static HRESULT vkd3d_memory_chunk_allocate_range(struct vkd3d_memory_chunk *chunk, const VkMemoryRequirements *memory_requirements,
struct vkd3d_memory_allocation *allocation)
{
struct vkd3d_memory_free_range *pick_range;
VkDeviceSize l_length, r_length;
size_t i, pick_index;
if (!chunk->free_ranges_count)
return E_OUTOFMEMORY;
pick_index = chunk->free_ranges_count;
pick_range = NULL;
for (i = 0; i < chunk->free_ranges_count; i++)
{
struct vkd3d_memory_free_range *range = &chunk->free_ranges[i];
if (range->offset + range->length - align(range->offset, memory_requirements->alignment) < memory_requirements->size)
continue;
/* Exact fit leaving no gaps */
if (range->length == memory_requirements->size)
{
pick_index = i;
pick_range = range;
break;
}
/* Alignment is almost always going to be 64 KiB, so
* don't worry too much about misalignment gaps here */
if (!pick_range || range->length > pick_range->length)
{
pick_index = i;
pick_range = range;
}
}
if (!pick_range)
return E_OUTOFMEMORY;
/* Adjust offsets and addresses of the base allocation */
vkd3d_memory_allocation_slice(allocation, &chunk->allocation,
align(pick_range->offset, memory_requirements->alignment),
memory_requirements->size);
/* Remove allocated range from the free list */
l_length = allocation->offset - pick_range->offset;
r_length = pick_range->offset + pick_range->length
- allocation->offset - allocation->resource.size;
if (l_length)
{
pick_range->length = l_length;
if (r_length)
{
vkd3d_memory_chunk_insert_range(chunk, pick_index + 1,
allocation->offset + allocation->resource.size, r_length);
}
}
else if (r_length)
{
pick_range->offset = allocation->offset + allocation->resource.size;
pick_range->length = r_length;
}
else
{
vkd3d_memory_chunk_remove_range(chunk, pick_index);
}
return S_OK;
}
static size_t vkd3d_memory_chunk_find_range(struct vkd3d_memory_chunk *chunk, VkDeviceSize offset)
{
struct vkd3d_memory_free_range *range;
size_t index, hi, lo;
lo = 0;
hi = chunk->free_ranges_count;
while (lo < hi)
{
index = lo + (hi - lo) / 2;
range = &chunk->free_ranges[index];
if (range->offset > offset)
hi = index;
else
lo = index + 1;
}
return lo;
}
static void vkd3d_memory_chunk_free_range(struct vkd3d_memory_chunk *chunk, const struct vkd3d_memory_allocation *allocation)
{
struct vkd3d_memory_free_range *range;
bool adjacent_l, adjacent_r;
size_t index;
index = vkd3d_memory_chunk_find_range(chunk, allocation->offset);
adjacent_l = false;
adjacent_r = false;
if (index > 0)
{
range = &chunk->free_ranges[index - 1];
adjacent_l = range->offset + range->length == allocation->offset;
}
if (index < chunk->free_ranges_count)
{
range = &chunk->free_ranges[index];
adjacent_r = range->offset == allocation->offset + allocation->resource.size;
}
if (adjacent_l)
{
range = &chunk->free_ranges[index - 1];
range->length += allocation->resource.size;
if (adjacent_r)
{
range->length += chunk->free_ranges[index].length;
vkd3d_memory_chunk_remove_range(chunk, index);
}
}
else if (adjacent_r)
{
range = &chunk->free_ranges[index];
range->offset = allocation->offset;
range->length += allocation->resource.size;
}
else
{
vkd3d_memory_chunk_insert_range(chunk, index,
allocation->offset, allocation->resource.size);
}
}
static bool vkd3d_memory_chunk_is_free(struct vkd3d_memory_chunk *chunk)
{
return chunk->free_ranges_count == 1 && chunk->free_ranges[0].length == chunk->allocation.resource.size;
}
static HRESULT vkd3d_memory_chunk_create(struct d3d12_device *device, struct vkd3d_memory_allocator *allocator,
const struct vkd3d_allocate_memory_info *info, struct vkd3d_memory_chunk **chunk)
{
struct vkd3d_memory_chunk *object;
HRESULT hr;
if (!(object = vkd3d_malloc(sizeof(*object))))
return E_OUTOFMEMORY;
memset(object, 0, sizeof(*object));
if (FAILED(hr = vkd3d_memory_allocation_init(&object->allocation, device, allocator, info)))
{
vkd3d_free(object);
return hr;
}
object->allocation.chunk = object;
vkd3d_memory_chunk_insert_range(object, 0, 0, object->allocation.resource.size);
*chunk = object;
return S_OK;
}
static void vkd3d_memory_chunk_destroy(struct vkd3d_memory_chunk *chunk, struct d3d12_device *device, struct vkd3d_memory_allocator *allocator)
{
vkd3d_memory_allocation_free(&chunk->allocation, device, allocator);
@ -387,6 +597,22 @@ static void vkd3d_memory_chunk_destroy(struct vkd3d_memory_chunk *chunk, struct
vkd3d_free(chunk);
}
static void vkd3d_memory_allocator_remove_chunk(struct vkd3d_memory_allocator *allocator, struct d3d12_device *device, struct vkd3d_memory_chunk *chunk)
{
size_t i;
for (i = 0; i < allocator->chunks_count; i++)
{
if (allocator->chunks[i] == chunk)
{
allocator->chunks[i] = allocator->chunks[--allocator->chunks_count];
break;
}
}
vkd3d_memory_chunk_destroy(chunk, device, allocator);
}
HRESULT vkd3d_memory_allocator_init(struct vkd3d_memory_allocator *allocator, struct d3d12_device *device)
{
int rc;
@ -412,18 +638,132 @@ void vkd3d_memory_allocator_cleanup(struct vkd3d_memory_allocator *allocator, st
pthread_mutex_destroy(&allocator->mutex);
}
static HRESULT vkd3d_memory_allocator_add_chunk(struct vkd3d_memory_allocator *allocator, struct d3d12_device *device,
const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, uint32_t type_mask, struct vkd3d_memory_chunk **chunk)
{
struct vkd3d_allocate_memory_info alloc_info;
struct vkd3d_memory_chunk *object;
HRESULT hr;
memset(&alloc_info, 0, sizeof(alloc_info));
alloc_info.memory_requirements.size = VKD3D_MEMORY_CHUNK_SIZE;
alloc_info.memory_requirements.alignment = 0;
alloc_info.memory_requirements.memoryTypeBits = type_mask;
alloc_info.heap_properties = *heap_properties;
alloc_info.heap_flags = heap_flags;
if (!(heap_flags & D3D12_HEAP_FLAG_DENY_BUFFERS))
alloc_info.flags |= VKD3D_ALLOCATION_FLAG_GLOBAL_BUFFER;
if (!vkd3d_array_reserve((void**)&allocator->chunks, &allocator->chunks_size,
allocator->chunks_count + 1, sizeof(*allocator->chunks)))
{
ERR("Failed to allocate space for new chunk.\n");
return E_OUTOFMEMORY;
}
if (FAILED(hr = vkd3d_memory_chunk_create(device, allocator, &alloc_info, &object)))
return hr;
allocator->chunks[allocator->chunks_count++] = *chunk = object;
return S_OK;
}
static HRESULT vkd3d_memory_allocator_try_suballocate_memory(struct vkd3d_memory_allocator *allocator,
struct d3d12_device *device, const VkMemoryRequirements *memory_requirements, uint32_t type_mask,
const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags,
struct vkd3d_memory_allocation *allocation)
{
struct vkd3d_memory_chunk *chunk;
HRESULT hr;
size_t i;
type_mask &= device->memory_info.global_mask;
type_mask &= memory_requirements->memoryTypeBits;
for (i = 0; i < allocator->chunks_count; i++)
{
chunk = allocator->chunks[i];
/* Match flags since otherwise the backing buffer
* may not support our required usage flags */
if (chunk->allocation.heap_type != heap_properties->Type ||
chunk->allocation.heap_flags != heap_flags)
continue;
/* Filter out unsupported memory types */
if (!(type_mask & (1u << chunk->allocation.vk_memory_type)))
continue;
if (SUCCEEDED(hr = vkd3d_memory_chunk_allocate_range(chunk, memory_requirements, allocation)))
return hr;
}
/* Try allocating a new chunk on one of the supported memory type
* before the caller falls back to potentially slower memory */
if (FAILED(hr = vkd3d_memory_allocator_add_chunk(allocator, device, heap_properties,
heap_flags, memory_requirements->memoryTypeBits, &chunk)))
return hr;
return vkd3d_memory_chunk_allocate_range(chunk, memory_requirements, allocation);
}
void vkd3d_free_memory_2(struct d3d12_device *device, struct vkd3d_memory_allocator *allocator,
const struct vkd3d_memory_allocation *allocation)
{
/* TODO resolve suballocations */
vkd3d_memory_allocation_free(allocation, device, allocator);
if (allocation->chunk)
{
pthread_mutex_lock(&allocator->mutex);
vkd3d_memory_chunk_free_range(allocation->chunk, allocation);
if (vkd3d_memory_chunk_is_free(allocation->chunk))
vkd3d_memory_allocator_remove_chunk(allocator, device, allocation->chunk);
pthread_mutex_unlock(&allocator->mutex);
}
else
vkd3d_memory_allocation_free(allocation, device, allocator);
}
static HRESULT vkd3d_suballocate_memory(struct d3d12_device *device, struct vkd3d_memory_allocator *allocator,
const struct vkd3d_allocate_memory_info *info, struct vkd3d_memory_allocation *allocation)
{
const VkMemoryPropertyFlags optional_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
VkMemoryRequirements memory_requirements = info->memory_requirements;
uint32_t required_mask, optional_mask;
VkMemoryPropertyFlags type_flags;
HRESULT hr;
if (FAILED(hr = vkd3d_select_memory_flags(device, &info->heap_properties, &type_flags)))
return hr;
/* Prefer device-local memory if allowed for this allocation */
required_mask = vkd3d_find_memory_types_with_flags(device, type_flags & ~optional_flags);
optional_mask = vkd3d_find_memory_types_with_flags(device, type_flags);
pthread_mutex_lock(&allocator->mutex);
hr = vkd3d_memory_allocator_try_suballocate_memory(allocator, device,
&memory_requirements, optional_mask, &info->heap_properties,
info->heap_flags, allocation);
if (FAILED(hr) && (required_mask & ~optional_mask))
{
hr = vkd3d_memory_allocator_try_suballocate_memory(allocator, device,
&memory_requirements, required_mask & ~optional_mask,
&info->heap_properties, info->heap_flags, allocation);
}
pthread_mutex_unlock(&allocator->mutex);
return hr;
}
static HRESULT vkd3d_allocate_memory_2(struct d3d12_device *device, struct vkd3d_memory_allocator *allocator,
const struct vkd3d_allocate_memory_info *info, struct vkd3d_memory_allocation *allocation)
{
/* TODO suballocate */
return vkd3d_memory_allocation_init(allocation, device, allocator, info);
if (!info->pNext && !info->host_ptr && info->memory_requirements.size < VKD3D_VA_BLOCK_SIZE)
return vkd3d_suballocate_memory(device, allocator, info, allocation);
else
return vkd3d_memory_allocation_init(allocation, device, allocator, info);
}
HRESULT vkd3d_allocate_heap_memory_2(struct d3d12_device *device, struct vkd3d_memory_allocator *allocator,
@ -438,7 +778,9 @@ HRESULT vkd3d_allocate_heap_memory_2(struct d3d12_device *device, struct vkd3d_m
alloc_info.heap_properties = info->heap_desc.Properties;
alloc_info.heap_flags = info->heap_desc.Flags;
alloc_info.host_ptr = info->host_ptr;
alloc_info.flags = VKD3D_ALLOCATION_FLAG_GLOBAL_BUFFER;
if (!(info->heap_desc.Flags & D3D12_HEAP_FLAG_DENY_BUFFERS))
alloc_info.flags |= VKD3D_ALLOCATION_FLAG_GLOBAL_BUFFER;
return vkd3d_allocate_memory_2(device, allocator, &alloc_info, allocation);
}

View File

@ -484,6 +484,8 @@ enum vkd3d_allocation_flag
VKD3D_ALLOCATION_FLAG_CPU_ACCESS = (1u << 3),
};
#define VKD3D_MEMORY_CHUNK_SIZE (VKD3D_VA_BLOCK_SIZE * 16)
struct vkd3d_memory_chunk;
struct vkd3d_allocate_memory_info
@ -539,6 +541,18 @@ struct vkd3d_memory_allocation
struct vkd3d_memory_chunk *chunk;
};
static inline void vkd3d_memory_allocation_slice(struct vkd3d_memory_allocation *dst,
const struct vkd3d_memory_allocation *src, VkDeviceSize offset, VkDeviceSize size)
{
*dst = *src;
dst->offset += offset;
dst->resource.size = size;
dst->resource.va += offset;
if (dst->cpu_address)
dst->cpu_address = void_ptr_offset(dst->cpu_address, offset);
}
struct vkd3d_memory_free_range
{
VkDeviceSize offset;