vkd3d: Add VA->resource map and new VA allocator.

This is designed to work with actual device addresses if supported by
the Vulkan implementation.

Signed-off-by: Philip Rebohle <philip.rebohle@tu-dortmund.de>
This commit is contained in:
Philip Rebohle 2020-11-13 15:18:03 +01:00 committed by Hans-Kristian Arntzen
parent f536daaacb
commit 7c017c1dba
3 changed files with 454 additions and 0 deletions

View File

@ -38,6 +38,7 @@ vkd3d_src = [
'state.c',
'utils.c',
'debug_ring.c',
'va_map.c',
'vkd3d_main.c',
'raytracing_pipeline.c',
]

384
libs/vkd3d/va_map.c Normal file
View File

@ -0,0 +1,384 @@
/*
* Copyright 2021 Philip Rebohle for Valve Software
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*/
#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
#include "vkd3d_private.h"
static inline VkDeviceAddress vkd3d_va_map_get_next_address(VkDeviceAddress va)
{
return va >> (VKD3D_VA_BLOCK_SIZE_BITS + VKD3D_VA_BLOCK_BITS);
}
static inline VkDeviceAddress vkd3d_va_map_get_block_address(VkDeviceAddress va)
{
return (va >> VKD3D_VA_BLOCK_SIZE_BITS) & VKD3D_VA_BLOCK_MASK;
}
static const struct vkd3d_va_block *vkd3d_va_map_find_block(const struct vkd3d_va_map *va_map, VkDeviceAddress va)
{
VkDeviceAddress next_address = vkd3d_va_map_get_next_address(va);
const struct vkd3d_va_tree *tree = &va_map->va_tree;
while (next_address && tree)
{
tree = vkd3d_atomic_ptr_load_explicit(&tree->next[next_address & VKD3D_VA_NEXT_MASK], vkd3d_memory_order_acquire);
next_address >>= VKD3D_VA_NEXT_BITS;
}
if (!tree)
return NULL;
return &tree->blocks[vkd3d_va_map_get_block_address(va)];
}
static struct vkd3d_va_block *vkd3d_va_map_get_block(struct vkd3d_va_map *va_map, VkDeviceAddress va)
{
VkDeviceAddress next_address = vkd3d_va_map_get_next_address(va);
struct vkd3d_va_tree *tree, **tree_ptr;
tree = &va_map->va_tree;
while (next_address)
{
tree_ptr = &tree->next[next_address & VKD3D_VA_NEXT_MASK];
tree = vkd3d_atomic_ptr_load_explicit(tree_ptr, vkd3d_memory_order_acquire);
if (!tree)
{
void *orig;
tree = vkd3d_calloc(1, sizeof(*tree));
orig = vkd3d_atomic_ptr_compare_exchange(tree_ptr, NULL, tree, vkd3d_memory_order_release, vkd3d_memory_order_acquire);
if (orig)
{
vkd3d_free(tree);
tree = orig;
}
}
next_address >>= VKD3D_VA_NEXT_BITS;
}
return &tree->blocks[vkd3d_va_map_get_block_address(va)];
}
static void vkd3d_va_map_cleanup_tree(struct vkd3d_va_tree *tree)
{
unsigned int i;
for (i = 0; i < ARRAY_SIZE(tree->next); i++)
{
if (tree->next[i])
{
vkd3d_va_map_cleanup_tree(tree->next[i]);
vkd3d_free(tree->next[i]);
}
}
}
static const struct vkd3d_unique_resource *vkd3d_va_map_find_small_entry(struct vkd3d_va_map *va_map,
VkDeviceAddress va, size_t *index)
{
const struct vkd3d_unique_resource *resource = NULL;
size_t hi = va_map->small_entries_count;
size_t lo = 0;
while (lo < hi)
{
const struct vkd3d_unique_resource *r;
size_t i = lo + (hi - lo) / 2;
r = va_map->small_entries[i];
if (va < r->va)
hi = i;
else if (va >= r->va + r->size)
lo = i + 1;
else
{
lo = hi = i;
resource = r;
}
}
if (index)
*index = lo;
return resource;
}
void vkd3d_va_map_insert(struct vkd3d_va_map *va_map, const struct vkd3d_unique_resource *resource)
{
VkDeviceAddress block_va, min_va, max_va;
struct vkd3d_va_block *block;
size_t index;
if (resource->size >= VKD3D_VA_BLOCK_SIZE)
{
min_va = resource->va;
max_va = resource->va + resource->size;
block_va = min_va & ~VKD3D_VA_LO_MASK;
while (block_va < max_va)
{
block = vkd3d_va_map_get_block(va_map, block_va);
if (block_va < min_va)
{
vkd3d_atomic_uint64_store_explicit(&block->r.va, min_va, vkd3d_memory_order_relaxed);
vkd3d_atomic_ptr_store_explicit(&block->r.resource, resource, vkd3d_memory_order_relaxed);
}
else
{
vkd3d_atomic_uint64_store_explicit(&block->l.va, max_va, vkd3d_memory_order_relaxed);
vkd3d_atomic_ptr_store_explicit(&block->l.resource, resource, vkd3d_memory_order_relaxed);
}
block_va += VKD3D_VA_BLOCK_SIZE;
}
}
else
{
pthread_mutex_lock(&va_map->mutex);
if (!vkd3d_va_map_find_small_entry(va_map, resource->va, &index))
{
vkd3d_array_reserve((void**)&va_map->small_entries, &va_map->small_entries_size,
va_map->small_entries_count + 1, sizeof(*va_map->small_entries));
memmove(&va_map->small_entries[index + 1], &va_map->small_entries[index],
sizeof(*va_map->small_entries) * (va_map->small_entries_count - index));
va_map->small_entries[index] = resource;
va_map->small_entries_count += 1;
}
pthread_mutex_unlock(&va_map->mutex);
}
}
void vkd3d_va_map_remove(struct vkd3d_va_map *va_map, const struct vkd3d_unique_resource *resource)
{
VkDeviceAddress block_va, min_va, max_va;
struct vkd3d_va_block *block;
size_t index;
if (resource->size >= VKD3D_VA_BLOCK_SIZE)
{
min_va = resource->va;
max_va = resource->va + resource->size;
block_va = min_va & ~VKD3D_VA_LO_MASK;
while (block_va < max_va)
{
block = vkd3d_va_map_get_block(va_map, block_va);
if (vkd3d_atomic_ptr_load_explicit(&block->l.resource, vkd3d_memory_order_relaxed) == resource)
{
vkd3d_atomic_uint64_store_explicit(&block->l.va, 0, vkd3d_memory_order_relaxed);
vkd3d_atomic_ptr_store_explicit(&block->l.resource, NULL, vkd3d_memory_order_relaxed);
}
else if (vkd3d_atomic_ptr_load_explicit(&block->r.resource, vkd3d_memory_order_relaxed) == resource)
{
vkd3d_atomic_uint64_store_explicit(&block->r.va, 0, vkd3d_memory_order_relaxed);
vkd3d_atomic_ptr_store_explicit(&block->r.resource, NULL, vkd3d_memory_order_relaxed);
}
block_va += VKD3D_VA_BLOCK_SIZE;
}
}
else
{
pthread_mutex_lock(&va_map->mutex);
if (vkd3d_va_map_find_small_entry(va_map, resource->va, &index) == resource)
{
va_map->small_entries_count -= 1;
memmove(&va_map->small_entries[index], &va_map->small_entries[index + 1],
sizeof(*va_map->small_entries) * (va_map->small_entries_count - index));
}
pthread_mutex_unlock(&va_map->mutex);
}
}
const struct vkd3d_unique_resource *vkd3d_va_map_deref(struct vkd3d_va_map *va_map, VkDeviceAddress va)
{
const struct vkd3d_va_block *block = vkd3d_va_map_find_block(va_map, va);
const struct vkd3d_unique_resource *resource = NULL;
if (block)
{
if (va < vkd3d_atomic_uint64_load_explicit(&block->l.va, vkd3d_memory_order_relaxed))
resource = vkd3d_atomic_ptr_load_explicit(&block->l.resource, vkd3d_memory_order_relaxed);
else if (va >= vkd3d_atomic_uint64_load_explicit(&block->r.va, vkd3d_memory_order_relaxed))
resource = vkd3d_atomic_ptr_load_explicit(&block->r.resource, vkd3d_memory_order_relaxed);
}
if (!resource)
{
pthread_mutex_lock(&va_map->mutex);
resource = vkd3d_va_map_find_small_entry(va_map, va, NULL);
pthread_mutex_unlock(&va_map->mutex);
}
return resource;
}
#define VKD3D_FAKE_VA_ALIGNMENT (65536)
VkDeviceAddress vkd3d_va_map_alloc_fake_va(struct vkd3d_va_map *va_map, VkDeviceSize size)
{
struct vkd3d_va_allocator *allocator = &va_map->va_allocator;
struct vkd3d_va_range range;
VkDeviceAddress va;
size_t i;
int rc;
if ((rc = pthread_mutex_lock(&allocator->mutex)))
{
ERR("Failed to lock mutex, rc %d.\n", rc);
return 0;
}
memset(&range, 0, sizeof(range));
size = align(size, VKD3D_FAKE_VA_ALIGNMENT);
/* The free list is ordered in such a way that the largest range
* is always first, so we don't have to iterate over the list */
if (allocator->free_range_count)
range = allocator->free_ranges[0];
if (range.size >= size)
{
va = range.base;
range.base += size;
range.size -= size;
for (i = 0; i < allocator->free_range_count - 1; i++)
{
if (allocator->free_ranges[i + 1].size > range.size)
allocator->free_ranges[i] = allocator->free_ranges[i + 1];
else
break;
}
if (range.size)
allocator->free_ranges[i] = range;
else
allocator->free_range_count--;
}
else
{
va = allocator->next_va;
allocator->next_va += size;
}
pthread_mutex_unlock(&allocator->mutex);
return va;
}
void vkd3d_va_map_free_fake_va(struct vkd3d_va_map *va_map, VkDeviceAddress va, VkDeviceSize size)
{
struct vkd3d_va_allocator *allocator = &va_map->va_allocator;
size_t range_idx, range_shift, i;
struct vkd3d_va_range new_range;
int rc;
if ((rc = pthread_mutex_lock(&allocator->mutex)))
{
ERR("Failed to lock mutex, rc %d.\n", rc);
return;
}
new_range.base = va;
new_range.size = align(size, VKD3D_FAKE_VA_ALIGNMENT);
range_idx = allocator->free_range_count;
range_shift = 0;
/* Find and effectively delete any free range adjacent to new_range */
for (i = 0; i < allocator->free_range_count; i++)
{
const struct vkd3d_va_range *cur_range = &allocator->free_ranges[i];
if (range_shift)
allocator->free_ranges[i - range_shift] = *cur_range;
if (cur_range->base == new_range.base + new_range.size || cur_range->base + cur_range->size == new_range.base)
{
if (range_idx == allocator->free_range_count)
range_idx = i;
else
range_shift++;
new_range.base = min(new_range.base, cur_range->base);
new_range.size += cur_range->size;
}
}
if (range_idx == allocator->free_range_count)
{
/* range_idx will be valid and point to the last element afterwards */
if (!(vkd3d_array_reserve((void **)&allocator->free_ranges, &allocator->free_ranges_size,
allocator->free_range_count + 1, sizeof(*allocator->free_ranges))))
{
ERR("Failed to add free range.\n");
pthread_mutex_unlock(&allocator->mutex);
return;
}
allocator->free_range_count += 1;
}
else
allocator->free_range_count -= range_shift;
/* Move ranges smaller than our new free range back to keep the list ordered */
while (range_idx && allocator->free_ranges[range_idx - 1].size < new_range.size)
{
allocator->free_ranges[range_idx] = allocator->free_ranges[range_idx - 1];
range_idx--;
}
allocator->free_ranges[range_idx] = new_range;
pthread_mutex_unlock(&allocator->mutex);
}
void vkd3d_va_map_init(struct vkd3d_va_map *va_map)
{
memset(va_map, 0, sizeof(*va_map));
pthread_mutex_init(&va_map->mutex, NULL);
pthread_mutex_init(&va_map->va_allocator.mutex, NULL);
/* Make sure we never return 0 as a valid VA */
va_map->va_allocator.next_va = VKD3D_VA_BLOCK_SIZE;
}
void vkd3d_va_map_cleanup(struct vkd3d_va_map *va_map)
{
vkd3d_va_map_cleanup_tree(&va_map->va_tree);
pthread_mutex_destroy(&va_map->va_allocator.mutex);
pthread_mutex_destroy(&va_map->mutex);
vkd3d_free(va_map->va_allocator.free_ranges);
vkd3d_free(va_map->small_entries);
}

View File

@ -240,6 +240,75 @@ HRESULT vkd3d_fence_worker_start(struct vkd3d_fence_worker *worker,
HRESULT vkd3d_fence_worker_stop(struct vkd3d_fence_worker *worker,
struct d3d12_device *device);
#define VKD3D_VA_BLOCK_SIZE_BITS (20)
#define VKD3D_VA_BLOCK_SIZE (1ull << VKD3D_VA_BLOCK_SIZE_BITS)
#define VKD3D_VA_LO_MASK (VKD3D_VA_BLOCK_SIZE - 1)
#define VKD3D_VA_BLOCK_BITS (20)
#define VKD3D_VA_BLOCK_COUNT (1ull << VKD3D_VA_BLOCK_BITS)
#define VKD3D_VA_BLOCK_MASK (VKD3D_VA_BLOCK_COUNT - 1)
#define VKD3D_VA_NEXT_BITS (12)
#define VKD3D_VA_NEXT_COUNT (1ull << VKD3D_VA_NEXT_BITS)
#define VKD3D_VA_NEXT_MASK (VKD3D_VA_NEXT_COUNT - 1)
struct vkd3d_unique_resource;
struct vkd3d_va_entry
{
DECLSPEC_ALIGN(8) VkDeviceAddress va;
const struct vkd3d_unique_resource *resource;
};
struct vkd3d_va_block
{
struct vkd3d_va_entry l;
struct vkd3d_va_entry r;
};
struct vkd3d_va_tree
{
struct vkd3d_va_block blocks[1u << VKD3D_VA_BLOCK_BITS];
struct vkd3d_va_tree *next[VKD3D_VA_NEXT_COUNT];
};
struct vkd3d_va_range
{
VkDeviceAddress base;
VkDeviceSize size;
};
struct vkd3d_va_allocator
{
pthread_mutex_t mutex;
struct vkd3d_va_range *free_ranges;
size_t free_ranges_size;
size_t free_range_count;
VkDeviceAddress next_va;
};
struct vkd3d_va_map
{
struct vkd3d_va_tree va_tree;
struct vkd3d_va_allocator va_allocator;
pthread_mutex_t mutex;
const struct vkd3d_unique_resource **small_entries;
size_t small_entries_size;
size_t small_entries_count;
};
void vkd3d_va_map_insert(struct vkd3d_va_map *va_map, const struct vkd3d_unique_resource *resource);
void vkd3d_va_map_remove(struct vkd3d_va_map *va_map, const struct vkd3d_unique_resource *resource);
const struct vkd3d_unique_resource *vkd3d_va_map_deref(struct vkd3d_va_map *va_map, VkDeviceAddress va);
VkDeviceAddress vkd3d_va_map_alloc_fake_va(struct vkd3d_va_map *va_map, VkDeviceSize size);
void vkd3d_va_map_free_fake_va(struct vkd3d_va_map *va_map, VkDeviceAddress va, VkDeviceSize size);
void vkd3d_va_map_init(struct vkd3d_va_map *va_map);
void vkd3d_va_map_cleanup(struct vkd3d_va_map *va_map);
struct vkd3d_gpu_va_allocation
{
D3D12_GPU_VIRTUAL_ADDRESS base;