vkd3d: Use virtual VAs for descriptor heap GPU VAs.

Allows local root signatures to work correctly and is also a good
optimization since we no longer need to dereference memory (potentially
cold cache lines) to figure out heap offset in command buffer.

Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
This commit is contained in:
Hans-Kristian Arntzen 2021-02-22 11:57:53 +01:00
parent 1af3f9c65f
commit 15e36a0430
4 changed files with 57 additions and 15 deletions

View File

@ -4148,7 +4148,6 @@ static void d3d12_command_list_update_descriptor_table_offsets(struct d3d12_comm
const struct d3d12_root_signature *root_signature = bindings->root_signature;
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
const struct vkd3d_shader_descriptor_table *table;
const struct d3d12_desc *base_descriptor;
uint32_t table_offsets[D3D12_MAX_ROOT_COST];
unsigned int root_parameter_index;
uint64_t descriptor_table_mask;
@ -4159,11 +4158,8 @@ static void d3d12_command_list_update_descriptor_table_offsets(struct d3d12_comm
while (descriptor_table_mask)
{
root_parameter_index = vkd3d_bitmask_iter64(&descriptor_table_mask);
base_descriptor = d3d12_desc_from_gpu_handle(bindings->descriptor_tables[root_parameter_index]);
table = root_signature_get_descriptor_table(root_signature, root_parameter_index);
table_offsets[table->table_index] = d3d12_desc_heap_offset(base_descriptor);
table_offsets[table->table_index] = bindings->descriptor_tables[root_parameter_index];
}
/* Set descriptor offsets */
@ -4314,7 +4310,6 @@ static void d3d12_command_list_fetch_inline_uniform_block_data(struct d3d12_comm
const struct vkd3d_shader_root_constant *root_constant;
const uint32_t *src_data = bindings->root_constants;
const struct vkd3d_shader_descriptor_table *table;
const struct d3d12_desc *base_descriptor;
unsigned int root_parameter_index;
uint64_t descriptor_table_mask;
uint32_t first_table_offset;
@ -4337,11 +4332,9 @@ static void d3d12_command_list_fetch_inline_uniform_block_data(struct d3d12_comm
while (descriptor_table_mask)
{
root_parameter_index = vkd3d_bitmask_iter64(&descriptor_table_mask);
base_descriptor = d3d12_desc_from_gpu_handle(bindings->descriptor_tables[root_parameter_index]);
table = root_signature_get_descriptor_table(root_signature, root_parameter_index);
dst_data->root_constants[first_table_offset + table->table_index] = d3d12_desc_heap_offset(base_descriptor);
dst_data->root_constants[first_table_offset + table->table_index] =
bindings->descriptor_tables[root_parameter_index];
}
/* Reset dirty flags to avoid redundant updates in the future */
@ -6283,7 +6276,7 @@ static void d3d12_command_list_set_descriptor_table(struct d3d12_command_list *l
table = root_signature_get_descriptor_table(root_signature, index);
assert(table && index < ARRAY_SIZE(bindings->descriptor_tables));
bindings->descriptor_tables[index] = base_descriptor;
bindings->descriptor_tables[index] = d3d12_desc_heap_offset_from_gpu_handle(base_descriptor);
bindings->descriptor_table_active_mask |= (uint64_t)1 << index;
if (root_signature->descriptor_table_count)

View File

@ -2138,6 +2138,33 @@ void d3d12_device_return_scratch_buffer(struct d3d12_device *device, const struc
}
}
uint64_t d3d12_device_get_descriptor_heap_gpu_va(struct d3d12_device *device)
{
uint64_t va;
/* The virtual GPU descriptor VAs are of form (unique << 32) | (desc index * sizeof(d3d12_desc)),
* which simplifies local root signature tables.
* Also simplifies SetRootDescriptorTable since we can deduce offset without memory lookups. */
pthread_mutex_lock(&device->mutex);
if (device->descriptor_heap_gpu_va_count)
va = device->descriptor_heap_gpu_vas[--device->descriptor_heap_gpu_va_count];
else
va = ++device->descriptor_heap_gpu_next;
pthread_mutex_unlock(&device->mutex);
va <<= 32;
return va;
}
void d3d12_device_return_descriptor_heap_gpu_va(struct d3d12_device *device, uint64_t va)
{
pthread_mutex_lock(&device->mutex);
vkd3d_array_reserve((void **)&device->descriptor_heap_gpu_vas, &device->descriptor_heap_gpu_va_size,
device->descriptor_heap_gpu_va_count + 1, sizeof(*device->descriptor_heap_gpu_vas));
device->descriptor_heap_gpu_vas[device->descriptor_heap_gpu_va_count++] = (uint32_t)(va >> 32);
pthread_mutex_unlock(&device->mutex);
}
static HRESULT d3d12_device_create_query_pool(struct d3d12_device *device, D3D12_QUERY_HEAP_TYPE heap_type, struct vkd3d_query_pool *pool)
{
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
@ -2300,6 +2327,8 @@ static void d3d12_device_destroy(struct d3d12_device *device)
for (i = 0; i < device->query_pool_count; i++)
d3d12_device_destroy_query_pool(device, &device->query_pools[i]);
vkd3d_free(device->descriptor_heap_gpu_vas);
vkd3d_private_store_destroy(&device->private_store);
vkd3d_cleanup_format_info(device);

View File

@ -4709,7 +4709,7 @@ static D3D12_GPU_DESCRIPTOR_HANDLE * STDMETHODCALLTYPE d3d12_descriptor_heap_Get
TRACE("iface %p, descriptor %p.\n", iface, descriptor);
descriptor->ptr = (uint64_t)(intptr_t)heap->descriptors;
descriptor->ptr = heap->gpu_va;
return descriptor;
}
@ -5078,6 +5078,9 @@ static HRESULT d3d12_descriptor_heap_init(struct d3d12_descriptor_heap *descript
descriptor_heap->device = device;
descriptor_heap->desc = *desc;
if (desc->Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE)
descriptor_heap->gpu_va = d3d12_device_get_descriptor_heap_gpu_va(device);
if (FAILED(hr = d3d12_descriptor_heap_create_descriptor_pool(descriptor_heap,
&descriptor_heap->vk_descriptor_pool)))
goto fail;
@ -5165,7 +5168,7 @@ HRESULT d3d12_descriptor_heap_create(struct d3d12_device *device,
return E_INVALIDARG;
}
max_descriptor_count = (~(size_t)0 - sizeof(*object)) / descriptor_size;
max_descriptor_count = (UINT32_MAX - sizeof(*object)) / descriptor_size;
if (desc->NumDescriptors > max_descriptor_count)
{
WARN("Invalid descriptor count %u (max %zu).\n", desc->NumDescriptors, max_descriptor_count);
@ -5198,11 +5201,14 @@ HRESULT d3d12_descriptor_heap_create(struct d3d12_device *device,
void d3d12_descriptor_heap_cleanup(struct d3d12_descriptor_heap *descriptor_heap)
{
const struct vkd3d_vk_device_procs *vk_procs = &descriptor_heap->device->vk_procs;
const struct d3d12_device *device = descriptor_heap->device;
struct d3d12_device *device = descriptor_heap->device;
if (!descriptor_heap->vk_memory)
vkd3d_free(descriptor_heap->host_memory);
if (descriptor_heap->gpu_va != 0)
d3d12_device_return_descriptor_heap_gpu_va(device, descriptor_heap->gpu_va);
VK_CALL(vkDestroyBuffer(device->vk_device, descriptor_heap->vk_buffer, NULL));
VK_CALL(vkFreeMemory(device->vk_device, descriptor_heap->vk_memory, NULL));

View File

@ -929,6 +929,7 @@ struct d3d12_descriptor_heap
ID3D12DescriptorHeap ID3D12DescriptorHeap_iface;
LONG refcount;
uint64_t gpu_va;
D3D12_DESCRIPTOR_HEAP_DESC desc;
VkDescriptorPool vk_descriptor_pool;
@ -958,6 +959,11 @@ static inline uint32_t d3d12_desc_heap_offset(const struct d3d12_desc *dst)
return dst->heap_offset;
}
static inline uint32_t d3d12_desc_heap_offset_from_gpu_handle(D3D12_GPU_DESCRIPTOR_HANDLE handle)
{
return (uint32_t)handle.ptr / sizeof(struct d3d12_desc);
}
/* ID3D12QueryHeap */
struct d3d12_query_heap
{
@ -1396,7 +1402,7 @@ struct vkd3d_pipeline_bindings
VkDescriptorSet static_sampler_set;
uint32_t dirty_flags; /* vkd3d_pipeline_dirty_flags */
D3D12_GPU_DESCRIPTOR_HANDLE descriptor_tables[D3D12_MAX_ROOT_COST];
uint32_t descriptor_tables[D3D12_MAX_ROOT_COST];
uint64_t descriptor_table_active_mask;
uint64_t descriptor_heap_dirty_mask;
@ -2331,6 +2337,11 @@ struct d3d12_device
struct vkd3d_query_pool query_pools[VKD3D_VIRTUAL_QUERY_POOL_COUNT];
size_t query_pool_count;
uint32_t *descriptor_heap_gpu_vas;
size_t descriptor_heap_gpu_va_count;
size_t descriptor_heap_gpu_va_size;
uint32_t descriptor_heap_gpu_next;
HRESULT removed_reason;
const struct vkd3d_format *depth_stencil_formats;
@ -2360,6 +2371,9 @@ void d3d12_device_return_scratch_buffer(struct d3d12_device *device, const struc
HRESULT d3d12_device_get_query_pool(struct d3d12_device *device, D3D12_QUERY_HEAP_TYPE heap_type, struct vkd3d_query_pool *pool);
void d3d12_device_return_query_pool(struct d3d12_device *device, const struct vkd3d_query_pool *pool);
uint64_t d3d12_device_get_descriptor_heap_gpu_va(struct d3d12_device *device);
void d3d12_device_return_descriptor_heap_gpu_va(struct d3d12_device *device, uint64_t va);
static inline HRESULT d3d12_device_query_interface(struct d3d12_device *device, REFIID iid, void **object)
{
return ID3D12Device6_QueryInterface(&device->ID3D12Device_iface, iid, object);