Compare commits

...

4 Commits

Author SHA1 Message Date
Hans-Kristian Arntzen e330a7d228 vkd3d: Support RS 1.0 VOLATILE descriptors.
Use EXT_descriptor_indexing's UPDATE_AFTER_BIND feature to support
semantics required by RS 1.0 VOLATILE descriptors. We implement this by
deferring all updates of desciptor sets until Submit time.

This is fine, as command buffers cannot be executed simultaneously on
D3D12, so at Submit time, we know that the command buffer is not being
executed on the GPU, and updating descriptors for multiple submissions
is correct.

If EXT_descriptor_indexing is not available, the fallback is the older
method, which matches RS 1.1 STATIC descriptor model.

Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2019-10-15 10:42:37 +02:00
Hans-Kristian Arntzen dd9b681dab vkd3d: Add simple pthread wrapper for MSVC.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2019-10-08 10:20:27 +02:00
Hans-Kristian Arntzen b23b777bf4 vkd3d: Allocate one large buffer for a heap and offset into it.
Greatly reduce VA allocations we have to make and makes returned VA more
sensible, and better matches returned VAs we see on native drivers.

D3D12 usage flags for buffers seem generic enough that there
is no obvious benefit to place smaller VkBuffers on top of
VkDeviceMemory.

Ideally, physical_buffer_address is used here, but this works as a
good fallback if that path is added later.

With this patch and previous VA optimization, I'm observing a 2.0-2.5%
FPS uplift on SOTTR when CPU bound.

Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2019-10-08 10:20:27 +02:00
Hans-Kristian Arntzen b0b2a50ecc vkd3d: Optimize GPU VA allocator.
The GPU VA allocator was allocating memory in a way where dereferencing
GPU VA required a lock + bsearch to find the right VA range.

Rather than going this route, we turn the common case into O(1) and
lockless by creating a slab allocator which allows us to lookup a ptr
directly from GPU VA with (VA - Base) / PageSize.

The number of allocations in the fast path must be limited since we
cannot trivially grow the allocator while remaining lock-free for
dereferences.

Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2019-10-08 10:20:27 +02:00
7 changed files with 862 additions and 154 deletions

View File

@ -0,0 +1,165 @@
/*
* Copyright 2019 Hans-Kristian Arntzen for Valve
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*/
#ifndef __VKD3D_THREADS_H
#define __VKD3D_THREADS_H
#include "config.h"
#if defined(HAVE_PTHREAD_H)
#include <pthread.h>
#elif defined(_WIN32) /* HAVE_PTHREAD_H */
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
/* pthread_t is passed by value in some functions,
* which implies we need pthread_t to be a pointer type here. */
struct pthread
{
HANDLE thread;
DWORD id;
void * (*routine)(void *);
void *arg;
};
typedef struct pthread *pthread_t;
/* pthread_mutex_t is not copyable, so embed CS inline. */
typedef struct pthread_mutex
{
CRITICAL_SECTION lock;
} pthread_mutex_t;
/* pthread_cond_t is not copyable, so embed CV inline. */
typedef struct pthread_cond
{
CONDITION_VARIABLE cond;
} pthread_cond_t;
static DWORD WINAPI win32_thread_wrapper_routine(void *arg)
{
pthread_t thread = arg;
thread->routine(thread->arg);
return 0;
}
static inline int pthread_create(pthread_t *out_thread, void *attr, void * (*thread_fun)(void *), void *arg)
{
pthread_t thread = vkd3d_calloc(1, sizeof(*thread));
if (!thread)
return -1;
(void)attr;
thread->routine = thread_fun;
thread->arg = arg;
thread->thread = CreateThread(NULL, 0, win32_thread_wrapper_routine, thread, 0, &thread->id);
if (!thread->thread)
{
vkd3d_free(thread);
return -1;
}
*out_thread = thread;
return 0;
}
static inline int pthread_join(pthread_t thread, void **ret)
{
(void)ret;
int success = WaitForSingleObject(thread->thread, INFINITE) == WAIT_OBJECT_0;
if (success)
{
CloseHandle(thread->thread);
vkd3d_free(thread);
}
return success ? 0 : -1;
}
static inline int pthread_mutex_init(pthread_mutex_t *lock, void *attr)
{
(void)attr;
InitializeCriticalSection(&lock->lock);
return 0;
}
static inline int pthread_mutex_lock(pthread_mutex_t *lock)
{
EnterCriticalSection(&lock->lock);
return 0;
}
static inline int pthread_mutex_unlock(pthread_mutex_t *lock)
{
LeaveCriticalSection(&lock->lock);
return 0;
}
static inline int pthread_mutex_destroy(pthread_mutex_t *lock)
{
DeleteCriticalSection(&lock->lock);
return 0;
}
static inline int pthread_cond_init(pthread_cond_t *cond, void *attr)
{
(void)attr;
InitializeConditionVariable(&cond->cond);
return 0;
}
static inline void pthread_cond_destroy(pthread_cond_t *cond)
{
(void)cond;
}
static inline int pthread_cond_signal(pthread_cond_t *cond)
{
WakeConditionVariable(&cond->cond);
return 0;
}
static inline int pthread_cond_broadcast(pthread_cond_t *cond)
{
WakeAllConditionVariable(&cond->cond);
return 0;
}
static inline int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *lock)
{
bool ret = SleepConditionVariableCS(&cond->cond, &lock->lock, INFINITE);
return ret ? 0 : -1;
}
#else /* HAVE_PTHREAD_H */
#error "Threads are not supported. Cannot build."
#endif /* HAVE_PTHREAD_H */
static inline void vkd3d_set_thread_name(const char *name)
{
#if defined(_MSC_VER)
(void)name;
#elif defined(HAVE_PTHREAD_SETNAME_NP_2)
pthread_setname_np(pthread_self(), name);
#elif defined(HAVE_PTHREAD_SETNAME_NP_1)
pthread_setname_np(name);
#else
(void)name;
#endif
}
#endif /* __VKD3D_THREADS_H */

View File

@ -1341,7 +1341,7 @@ static VkDescriptorPool d3d12_command_allocator_allocate_descriptor_pool(
{
pool_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
pool_desc.pNext = NULL;
pool_desc.flags = 0;
pool_desc.flags = device->vk_info.EXT_descriptor_indexing ? VK_DESCRIPTOR_POOL_CREATE_UPDATE_AFTER_BIND_BIT_EXT : 0;
pool_desc.maxSets = 512;
pool_desc.poolSizeCount = ARRAY_SIZE(pool_sizes);
pool_desc.pPoolSizes = pool_sizes;
@ -1865,6 +1865,10 @@ static void d3d12_command_list_invalidate_bindings(struct d3d12_command_list *li
if (!state)
return;
/* Each pipeline state has its own set layout for UAV counters
* based on their implicit usage in the shader.
* Binding a different pipeline state means having to re-remit
* UAV counters in a new descriptor set (and layout). */
if (state->uav_counter_mask)
{
struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[state->vk_bind_point];
@ -2181,6 +2185,7 @@ static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(ID3D12GraphicsCommandL
if (list->allocator)
d3d12_command_allocator_free_command_buffer(list->allocator, list);
vkd3d_free(list->descriptor_updates);
vkd3d_free(list);
d3d12_device_release(device);
@ -2319,6 +2324,9 @@ static void d3d12_command_list_reset_state(struct d3d12_command_list *list,
list->state = NULL;
/* Recycle deferred descriptor update memory if possible. */
list->descriptor_updates_count = 0;
memset(list->so_counter_buffers, 0, sizeof(list->so_counter_buffers));
memset(list->so_counter_buffer_offsets, 0, sizeof(list->so_counter_buffer_offsets));
@ -2519,15 +2527,54 @@ static void d3d12_command_list_prepare_descriptors(struct d3d12_command_list *li
* time in between. Thus, the contents must not be altered (overwritten
* by an update command, or freed) between when the command is recorded
* and when the command completes executing on the queue."
*
* Even if we have descriptor indexing and UPDATE_AFTER_BIND,
* we need at the very least a new descriptor set.
*/
bindings->descriptor_set = d3d12_command_allocator_allocate_descriptor_set(list->allocator,
root_signature->vk_set_layout);
bindings->in_use = false;
bindings->descriptor_table_dirty_mask |= bindings->descriptor_table_active_mask;
bindings->push_descriptor_dirty_mask |= bindings->push_descriptor_active_mask;
}
static void d3d12_command_list_prepare_uav_counter_descriptors(struct d3d12_command_list *list,
VkPipelineBindPoint bind_point)
{
struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point];
if (bindings->uav_counter_descriptor_set && !bindings->uav_counter_in_use)
return;
/* We cannot modify bound descriptor sets. We need a new descriptor set if
* we are about to update resource bindings.
*
* The Vulkan spec says:
*
* "The descriptor set contents bound by a call to
* vkCmdBindDescriptorSets may be consumed during host execution of the
* command, or during shader execution of the resulting draws, or any
* time in between. Thus, the contents must not be altered (overwritten
* by an update command, or freed) between when the command is recorded
* and when the command completes executing on the queue."
*
* Even if we have descriptor indexing and UPDATE_AFTER_BIND,
* we need at the very least a new descriptor set.
*/
if (list->state->uav_counter_mask)
{
bindings->uav_counter_descriptor_set = d3d12_command_allocator_allocate_descriptor_set(list->allocator,
list->state->vk_set_layout);
}
else
bindings->uav_counter_descriptor_set = VK_NULL_HANDLE;
bindings->uav_counter_in_use = false;
}
static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_descriptor_write,
VkDescriptorImageInfo *vk_image_info, const struct d3d12_desc *descriptor,
uint32_t descriptor_range_magic, VkDescriptorSet vk_descriptor_set,
@ -2596,23 +2643,125 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des
return true;
}
static void d3d12_command_list_update_descriptor_table(struct d3d12_command_list *list,
VkPipelineBindPoint bind_point, unsigned int index, struct d3d12_desc *base_descriptor)
static void d3d12_command_list_defer_update_descriptor_table(struct d3d12_command_list *list,
VkPipelineBindPoint bind_point, uint64_t table_mask, bool uav)
{
const struct d3d12_desc *base_descriptor;
unsigned i;
struct d3d12_deferred_descriptor_set_update *update;
struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point];
struct VkWriteDescriptorSet descriptor_writes[24], *current_descriptor_write;
const struct d3d12_root_signature *root_signature = bindings->root_signature;
for (i = 0; i < ARRAY_SIZE(bindings->descriptor_tables); ++i)
{
if (table_mask & ((uint64_t)1 << i))
{
if ((base_descriptor = d3d12_desc_from_gpu_handle(bindings->descriptor_tables[i])))
{
vkd3d_array_reserve((void **)&list->descriptor_updates, &list->descriptor_updates_size,
list->descriptor_updates_count + 1, sizeof(*list->descriptor_updates));
update = &list->descriptor_updates[list->descriptor_updates_count];
update->base_descriptor = base_descriptor;
update->index = i;
update->root_signature = bindings->root_signature;
update->descriptor_set = uav ? bindings->uav_counter_descriptor_set : bindings->descriptor_set;
update->uav = uav;
list->descriptor_updates_count++;
}
else
WARN("Descriptor table %u is not set.\n", i);
}
}
}
static void d3d12_command_list_resolve_descriptor_table_uav(struct d3d12_command_list *list,
const struct d3d12_deferred_descriptor_set_update *update)
{
const struct d3d12_root_signature *root_signature = update->root_signature;
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
struct VkDescriptorImageInfo image_infos[24], *current_image_info;
const struct d3d12_root_descriptor_table *descriptor_table;
const struct d3d12_root_descriptor_table_range *range;
VkDevice vk_device = list->device->vk_device;
unsigned int i, j, descriptor_count;
struct d3d12_desc *descriptor;
descriptor_table = root_signature_get_descriptor_table(root_signature, index);
unsigned int i, j;
unsigned int uav_counter_count;
const struct d3d12_desc *descriptor;
VkWriteDescriptorSet vk_descriptor_writes[VKD3D_SHADER_MAX_UNORDERED_ACCESS_VIEWS];
VkBufferView vk_uav_counter_views[VKD3D_SHADER_MAX_UNORDERED_ACCESS_VIEWS];
VkDescriptorSet vk_descriptor_set;
const struct d3d12_desc *base_descriptor = update->base_descriptor;
descriptor_table = root_signature_get_descriptor_table(root_signature, update->index);
descriptor = base_descriptor;
vk_descriptor_set = update->descriptor_set;
if (!vk_descriptor_set)
return;
/* FIXME: There should be a smarter way than scanning through all the descriptor table ranges for this. */
for (i = 0; i < descriptor_table->range_count; ++i)
{
range = &descriptor_table->ranges[i];
if (range->offset != D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND)
{
descriptor = base_descriptor + range->offset;
}
for (j = 0; j < range->descriptor_count; ++j, ++descriptor)
{
unsigned int register_idx = range->base_register_idx + j;
/* Fish out UAV counters. */
if (range->descriptor_magic == VKD3D_DESCRIPTOR_MAGIC_UAV
&& register_idx < ARRAY_SIZE(vk_uav_counter_views))
{
VkBufferView vk_counter_view = descriptor->magic == VKD3D_DESCRIPTOR_MAGIC_UAV
? descriptor->u.view->vk_counter_view : VK_NULL_HANDLE;
vk_uav_counter_views[register_idx] = vk_counter_view;
}
}
}
uav_counter_count = vkd3d_popcount(list->state->uav_counter_mask);
assert(uav_counter_count <= ARRAY_SIZE(vk_descriptor_writes));
for (i = 0; i < uav_counter_count; ++i)
{
const struct vkd3d_shader_uav_counter_binding *uav_counter = &list->state->uav_counters[i];
assert(vk_uav_counter_views[uav_counter->register_index]);
vk_descriptor_writes[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
vk_descriptor_writes[i].pNext = NULL;
vk_descriptor_writes[i].dstSet = vk_descriptor_set;
vk_descriptor_writes[i].dstBinding = uav_counter->binding.binding;
vk_descriptor_writes[i].dstArrayElement = 0;
vk_descriptor_writes[i].descriptorCount = 1;
vk_descriptor_writes[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
vk_descriptor_writes[i].pImageInfo = NULL;
vk_descriptor_writes[i].pBufferInfo = NULL;
vk_descriptor_writes[i].pTexelBufferView = &vk_uav_counter_views[uav_counter->register_index];
}
VK_CALL(vkUpdateDescriptorSets(vk_device, uav_counter_count, vk_descriptor_writes, 0, NULL));
}
static void d3d12_command_list_resolve_descriptor_table_normal(struct d3d12_command_list *list,
const struct d3d12_deferred_descriptor_set_update *update)
{
const struct d3d12_root_descriptor_table *descriptor_table;
struct VkWriteDescriptorSet descriptor_writes[24], *current_descriptor_write;
struct VkDescriptorImageInfo image_infos[24], *current_image_info;
const struct d3d12_root_signature *root_signature = update->root_signature;
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
const struct d3d12_root_descriptor_table_range *range;
VkDevice vk_device = list->device->vk_device;
unsigned int i, j, descriptor_count;
const struct d3d12_desc *descriptor;
const struct d3d12_desc *base_descriptor = update->base_descriptor;
descriptor_table = root_signature_get_descriptor_table(root_signature, update->index);
descriptor = update->base_descriptor;
descriptor_count = 0;
current_descriptor_write = descriptor_writes;
current_image_info = image_infos;
@ -2627,22 +2776,9 @@ static void d3d12_command_list_update_descriptor_table(struct d3d12_command_list
for (j = 0; j < range->descriptor_count; ++j, ++descriptor)
{
unsigned int register_idx = range->base_register_idx + j;
/* Track UAV counters. */
if (range->descriptor_magic == VKD3D_DESCRIPTOR_MAGIC_UAV
&& register_idx < ARRAY_SIZE(bindings->vk_uav_counter_views))
{
VkBufferView vk_counter_view = descriptor->magic == VKD3D_DESCRIPTOR_MAGIC_UAV
? descriptor->u.view->vk_counter_view : VK_NULL_HANDLE;
if (bindings->vk_uav_counter_views[register_idx] != vk_counter_view)
bindings->uav_counter_dirty_mask |= 1u << register_idx;
bindings->vk_uav_counter_views[register_idx] = vk_counter_view;
}
if (!vk_write_descriptor_set_from_d3d12_desc(current_descriptor_write,
current_image_info, descriptor, range->descriptor_magic,
bindings->descriptor_set, range->binding, j))
current_image_info, descriptor, range->descriptor_magic,
update->descriptor_set, range->binding, j))
continue;
++descriptor_count;
@ -2662,6 +2798,52 @@ static void d3d12_command_list_update_descriptor_table(struct d3d12_command_list
VK_CALL(vkUpdateDescriptorSets(vk_device, descriptor_count, descriptor_writes, 0, NULL));
}
static void d3d12_command_list_resolve_descriptor_table(struct d3d12_command_list *list,
const struct d3d12_deferred_descriptor_set_update *update)
{
if (update->uav)
d3d12_command_list_resolve_descriptor_table_uav(list, update);
else
d3d12_command_list_resolve_descriptor_table_normal(list, update);
}
static void d3d12_command_list_resolve_descriptor_tables(struct d3d12_command_list *list)
{
unsigned i;
for (i = 0; i < list->descriptor_updates_count; i++)
d3d12_command_list_resolve_descriptor_table(list, &list->descriptor_updates[i]);
}
static void d3d12_command_list_update_descriptor_table(struct d3d12_command_list *list,
VkPipelineBindPoint bind_point, unsigned int index, struct d3d12_desc *base_descriptor)
{
struct d3d12_deferred_descriptor_set_update update;
struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point];
const struct d3d12_root_signature *root_signature = bindings->root_signature;
update.descriptor_set = bindings->descriptor_set;
update.index = index;
update.root_signature = root_signature;
update.base_descriptor = base_descriptor;
update.uav = false;
d3d12_command_list_resolve_descriptor_table_normal(list, &update);
}
static void d3d12_command_list_update_uav_counter_descriptor_table(struct d3d12_command_list *list,
VkPipelineBindPoint bind_point, unsigned int index, struct d3d12_desc *base_descriptor)
{
struct d3d12_deferred_descriptor_set_update update;
struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point];
const struct d3d12_root_signature *root_signature = bindings->root_signature;
update.descriptor_set = bindings->uav_counter_descriptor_set;
update.index = index;
update.root_signature = root_signature;
update.base_descriptor = base_descriptor;
update.uav = true;
d3d12_command_list_resolve_descriptor_table_uav(list, &update);
}
static bool vk_write_descriptor_set_from_root_descriptor(VkWriteDescriptorSet *vk_descriptor_write,
const struct d3d12_root_parameter *root_parameter, VkDescriptorSet vk_descriptor_set,
VkBufferView *vk_buffer_view, const VkDescriptorBufferInfo *vk_buffer_info)
@ -2766,55 +2948,6 @@ done:
vkd3d_free(buffer_infos);
}
static void d3d12_command_list_update_uav_counter_descriptors(struct d3d12_command_list *list,
VkPipelineBindPoint bind_point)
{
VkWriteDescriptorSet vk_descriptor_writes[VKD3D_SHADER_MAX_UNORDERED_ACCESS_VIEWS];
struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point];
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
const struct d3d12_pipeline_state *state = list->state;
VkDevice vk_device = list->device->vk_device;
VkDescriptorSet vk_descriptor_set;
unsigned int uav_counter_count;
unsigned int i;
if (!state || !(state->uav_counter_mask & bindings->uav_counter_dirty_mask))
return;
uav_counter_count = vkd3d_popcount(state->uav_counter_mask);
assert(uav_counter_count <= ARRAY_SIZE(vk_descriptor_writes));
vk_descriptor_set = d3d12_command_allocator_allocate_descriptor_set(list->allocator, state->vk_set_layout);
if (!vk_descriptor_set)
return;
for (i = 0; i < uav_counter_count; ++i)
{
const struct vkd3d_shader_uav_counter_binding *uav_counter = &state->uav_counters[i];
const VkBufferView *vk_uav_counter_views = bindings->vk_uav_counter_views;
assert(vk_uav_counter_views[uav_counter->register_index]);
vk_descriptor_writes[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
vk_descriptor_writes[i].pNext = NULL;
vk_descriptor_writes[i].dstSet = vk_descriptor_set;
vk_descriptor_writes[i].dstBinding = uav_counter->binding.binding;
vk_descriptor_writes[i].dstArrayElement = 0;
vk_descriptor_writes[i].descriptorCount = 1;
vk_descriptor_writes[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
vk_descriptor_writes[i].pImageInfo = NULL;
vk_descriptor_writes[i].pBufferInfo = NULL;
vk_descriptor_writes[i].pTexelBufferView = &vk_uav_counter_views[uav_counter->register_index];
}
VK_CALL(vkUpdateDescriptorSets(vk_device, uav_counter_count, vk_descriptor_writes, 0, NULL));
VK_CALL(vkCmdBindDescriptorSets(list->vk_command_buffer, bind_point,
state->vk_pipeline_layout, state->set_index, 1, &vk_descriptor_set, 0, NULL));
bindings->uav_counter_dirty_mask = 0;
}
static void d3d12_command_list_update_descriptors(struct d3d12_command_list *list,
VkPipelineBindPoint bind_point)
{
@ -2827,31 +2960,86 @@ static void d3d12_command_list_update_descriptors(struct d3d12_command_list *lis
if (!rs || !rs->vk_set_layout)
return;
if ((bindings->descriptor_table_active_mask | bindings->push_descriptor_dirty_mask |
(list->state->uav_counter_mask & bindings->uav_counter_dirty_mask)) == 0)
{
/* Nothing is dirty, so just return early. */
return;
}
if (bindings->descriptor_table_dirty_mask || bindings->push_descriptor_dirty_mask)
d3d12_command_list_prepare_descriptors(list, bind_point);
if (list->state->uav_counter_mask & bindings->uav_counter_dirty_mask)
d3d12_command_list_prepare_uav_counter_descriptors(list, bind_point);
for (i = 0; i < ARRAY_SIZE(bindings->descriptor_tables); ++i)
if (list->device->vk_info.EXT_descriptor_indexing)
{
if (bindings->descriptor_table_dirty_mask & ((uint64_t)1 << i))
d3d12_command_list_defer_update_descriptor_table(list, bind_point, bindings->descriptor_table_dirty_mask,
false);
}
else
{
/* FIXME: FOR_EACH_BIT */
for (i = 0; i < ARRAY_SIZE(bindings->descriptor_tables); ++i)
{
if ((base_descriptor = d3d12_desc_from_gpu_handle(bindings->descriptor_tables[i])))
d3d12_command_list_update_descriptor_table(list, bind_point, i, base_descriptor);
else
WARN("Descriptor table %u is not set.\n", i);
if (bindings->descriptor_table_dirty_mask & ((uint64_t) 1 << i))
{
if ((base_descriptor = d3d12_desc_from_gpu_handle(bindings->descriptor_tables[i])))
d3d12_command_list_update_descriptor_table(list, bind_point, i, base_descriptor);
else
WARN("Descriptor table %u is not set.\n", i);
}
}
}
bindings->descriptor_table_dirty_mask = 0;
/* Need to go through all descriptor tables here in the root signature,
* not just descriptor_table_dirty_mask. Binding a different shader may not invalidate descriptor tables,
* but it may invalidate the UAV counter set. */
if (bindings->uav_counter_dirty_mask)
{
if (list->device->vk_info.EXT_descriptor_indexing)
{
d3d12_command_list_defer_update_descriptor_table(list, bind_point, bindings->descriptor_table_active_mask,
true);
}
else
{
/* FIXME: FOR_EACH_BIT */
for (i = 0; i < ARRAY_SIZE(bindings->descriptor_tables); i++)
{
if (bindings->descriptor_table_active_mask & ((uint64_t) 1 << i))
{
if ((base_descriptor = d3d12_desc_from_gpu_handle(bindings->descriptor_tables[i])))
d3d12_command_list_update_uav_counter_descriptor_table(list, bind_point, i, base_descriptor);
else
WARN("Descriptor table %u is not set.\n", i);
}
}
}
}
bindings->uav_counter_dirty_mask = 0;
d3d12_command_list_update_push_descriptors(list, bind_point);
if (bindings->descriptor_set)
/* Don't rebind the same descriptor set as long as we're in same pipeline layout. */
if (bindings->descriptor_set && bindings->descriptor_set != bindings->descriptor_set_bound)
{
VK_CALL(vkCmdBindDescriptorSets(list->vk_command_buffer, bind_point,
rs->vk_pipeline_layout, rs->main_set, 1, &bindings->descriptor_set, 0, NULL));
bindings->in_use = true;
bindings->descriptor_set_bound = bindings->descriptor_set;
}
d3d12_command_list_update_uav_counter_descriptors(list, bind_point);
/* Don't rebind the same descriptor set as long as we're in same pipeline layout. */
if (bindings->uav_counter_descriptor_set &&
bindings->uav_counter_descriptor_set != bindings->uav_counter_descriptor_set_bound)
{
VK_CALL(vkCmdBindDescriptorSets(list->vk_command_buffer, bind_point,
list->state->vk_pipeline_layout, list->state->set_index, 1, &bindings->uav_counter_descriptor_set, 0, NULL));
bindings->uav_counter_in_use = true;
bindings->uav_counter_descriptor_set_bound = bindings->uav_counter_descriptor_set;
}
}
static bool d3d12_command_list_begin_render_pass(struct d3d12_command_list *list)
@ -3031,8 +3219,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyBufferRegion(ID3D12Graphics
d3d12_command_list_end_current_render_pass(list);
buffer_copy.srcOffset = src_offset;
buffer_copy.dstOffset = dst_offset;
buffer_copy.srcOffset = src_offset + src_resource->heap_offset;
buffer_copy.dstOffset = dst_offset + dst_resource->heap_offset;
buffer_copy.size = byte_count;
VK_CALL(vkCmdCopyBuffer(list->vk_command_buffer,
@ -3450,8 +3638,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsComm
assert(d3d12_resource_is_buffer(src_resource));
assert(src_resource->desc.Width == dst_resource->desc.Width);
vk_buffer_copy.srcOffset = 0;
vk_buffer_copy.dstOffset = 0;
vk_buffer_copy.srcOffset = src_resource->heap_offset;
vk_buffer_copy.dstOffset = dst_resource->heap_offset;
vk_buffer_copy.size = dst_resource->desc.Width;
VK_CALL(vkCmdCopyBuffer(list->vk_command_buffer,
src_resource->u.vk_buffer, dst_resource->u.vk_buffer, 1, &vk_buffer_copy));
@ -3932,6 +4120,9 @@ static void d3d12_command_list_set_root_signature(struct d3d12_command_list *lis
bindings->root_signature = root_signature;
bindings->descriptor_set = VK_NULL_HANDLE;
bindings->uav_counter_descriptor_set = VK_NULL_HANDLE;
bindings->descriptor_set_bound = VK_NULL_HANDLE;
bindings->uav_counter_descriptor_set_bound = VK_NULL_HANDLE;
bindings->descriptor_table_dirty_mask = 0;
bindings->descriptor_table_active_mask = 0;
bindings->push_descriptor_dirty_mask = 0;
@ -4076,6 +4267,7 @@ static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list,
resource = vkd3d_gpu_va_allocator_dereference(&list->device->gpu_va_allocator, gpu_address);
buffer_info.buffer = resource->u.vk_buffer;
buffer_info.offset = gpu_address - resource->gpu_address;
buffer_info.range = resource->desc.Width - buffer_info.offset;
buffer_info.range = min(buffer_info.range, vk_info->device_limits.maxUniformBufferRange);
@ -5309,6 +5501,9 @@ static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d
d3d12_device_add_ref(list->device = device);
list->allocator = allocator;
list->descriptor_updates = NULL;
list->descriptor_updates_count = 0;
list->descriptor_updates_size = 0;
if (SUCCEEDED(hr = d3d12_command_allocator_allocate_command_buffer(allocator, list)))
{
@ -5546,6 +5741,13 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm
return;
}
/* Descriptors in root signature in 1.0 are VOLATILE by default, so
* the descriptor heap only need to be valid right before we submit them to the GPU.
* If we have EXT_descriptor_indexing enabled with UpdateAfterBind, we update
* descriptor sets here rather than while we're recording the command buffer.
* For each submission of the command buffer, we can modify the descriptor heap as we please. */
d3d12_command_list_resolve_descriptor_tables(cmd_list);
buffers[i] = cmd_list->vk_command_buffer;
}

View File

@ -1334,6 +1334,8 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device,
device->feature_options.CrossAdapterRowMajorTextureSupported = FALSE;
/* SPV_EXT_shader_viewport_index_layer */
device->feature_options.VPAndRTArrayIndexFromAnyShaderFeedingRasterizerSupportedWithoutGSEmulation = FALSE;
/* FIXME: Does this actually work on NV which has 64k bufferImage alignment quirks with VkDeviceMemory? */
device->feature_options.ResourceHeapTier = D3D12_RESOURCE_HEAP_TIER_2;
if ((vr = VK_CALL(vkEnumerateDeviceExtensionProperties(physical_device, NULL, &count, NULL))) < 0)
@ -1436,11 +1438,25 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device,
}
if (vulkan_info->EXT_descriptor_indexing && descriptor_indexing
&& (descriptor_indexing->descriptorBindingUniformBufferUpdateAfterBind
|| descriptor_indexing->descriptorBindingStorageBufferUpdateAfterBind
&& descriptor_indexing->descriptorBindingUniformBufferUpdateAfterBind
&& descriptor_indexing->descriptorBindingSampledImageUpdateAfterBind
&& descriptor_indexing->descriptorBindingStorageImageUpdateAfterBind
&& descriptor_indexing->descriptorBindingUniformTexelBufferUpdateAfterBind)
{
TRACE("Enabling VK_EXT_descriptor_indexing for volatile descriptor updates.\n");
}
else
{
WARN("VK_EXT_descriptor indexing not supported in sufficient capacity. Volatile descriptor updates will not work.\n");
vulkan_info->EXT_descriptor_indexing = false;
}
if (vulkan_info->EXT_descriptor_indexing && descriptor_indexing
&& (descriptor_indexing->descriptorBindingUniformBufferUpdateAfterBind
|| descriptor_indexing->descriptorBindingUniformTexelBufferUpdateAfterBind
|| descriptor_indexing->descriptorBindingStorageBufferUpdateAfterBind
|| descriptor_indexing->descriptorBindingStorageTexelBufferUpdateAfterBind)
&& !physical_device_info->descriptor_indexing_properties.robustBufferAccessUpdateAfterBind)
&& !physical_device_info->descriptor_indexing_properties.robustBufferAccessUpdateAfterBind)
{
WARN("Disabling robust buffer access for the update after bind feature.\n");
features->robustBufferAccess = VK_FALSE;
@ -1822,12 +1838,49 @@ static void d3d12_device_destroy_pipeline_cache(struct d3d12_device *device)
pthread_mutex_destroy(&device->mutex);
}
D3D12_GPU_VIRTUAL_ADDRESS vkd3d_gpu_va_allocator_allocate(struct vkd3d_gpu_va_allocator *allocator,
size_t size, void *ptr)
#define VKD3D_MAX_VA_SLAB_ALLOCATIONS (64 * 1024)
#define VKD3D_BASE_VA_SLAB (0x1000000000ull)
#define VKD3D_BASE_VA_FALLBACK (0x8000000000000000ull)
#define VKD3D_SLAB_ALLOCATION_SIZE (0x100000000ull)
#define VKD3D_SLAB_ALLOCATION_SIZE_LOG2 32
static D3D12_GPU_VIRTUAL_ADDRESS vkd3d_gpu_va_allocator_allocate_fallback(struct vkd3d_gpu_va_allocator *allocator,
size_t size, size_t alignment, void *ptr)
{
D3D12_GPU_VIRTUAL_ADDRESS ceiling = ~(D3D12_GPU_VIRTUAL_ADDRESS)0;
struct vkd3d_gpu_va_allocation *allocation;
if (!vkd3d_array_reserve((void **)&allocator->fallback_mem_allocations, &allocator->fallback_mem_allocations_size,
allocator->fallback_mem_allocation_count + 1, sizeof(*allocator->fallback_mem_allocations)))
{
return 0;
}
allocator->fallback_mem_floor = (allocator->fallback_mem_floor + alignment - 1) & ~((D3D12_GPU_VIRTUAL_ADDRESS)alignment - 1);
if (size > ceiling || ceiling - size < allocator->fallback_mem_floor)
{
return 0;
}
allocation = &allocator->fallback_mem_allocations[allocator->fallback_mem_allocation_count++];
allocation->base = allocator->fallback_mem_floor;
allocation->size = size;
allocation->ptr = ptr;
/* This pointer is bumped and never lowered on a free.
* However, this will only fail once we have exhausted 63 bits of address space. */
allocator->fallback_mem_floor += size;
return allocation->base;
}
static D3D12_GPU_VIRTUAL_ADDRESS vkd3d_gpu_va_allocator_allocate_slab(struct vkd3d_gpu_va_allocator *allocator,
size_t size, size_t alignment, void *ptr)
{
int rc;
unsigned vacant_index;
D3D12_GPU_VIRTUAL_ADDRESS virtual_address = 0;
if ((rc = pthread_mutex_lock(&allocator->mutex)))
{
@ -1835,29 +1888,56 @@ D3D12_GPU_VIRTUAL_ADDRESS vkd3d_gpu_va_allocator_allocate(struct vkd3d_gpu_va_al
return 0;
}
if (!vkd3d_array_reserve((void **)&allocator->allocations, &allocator->allocations_size,
allocator->allocation_count + 1, sizeof(*allocator->allocations)))
TRACE("Allocating %zu bytes (%zu align) of VA from slab allocator.\n", size, alignment);
if (allocator->mem_vacant_count > 0)
{
pthread_mutex_unlock(&allocator->mutex);
return 0;
vacant_index = allocator->mem_vacant[--allocator->mem_vacant_count];
/* It is critical that the multiplication happens in 64-bit to not overflow. */
virtual_address = VKD3D_BASE_VA_SLAB + vacant_index * VKD3D_SLAB_ALLOCATION_SIZE;
TRACE("Allocating VA: 0x%llx: vacant index %u from slab.\n",
(unsigned long long)virtual_address, vacant_index);
assert(!allocator->slab_mem_allocations[vacant_index].ptr);
allocator->slab_mem_allocations[vacant_index].ptr = ptr;
allocator->slab_mem_allocations[vacant_index].size = size;
}
if (size > ceiling || ceiling - size < allocator->floor)
if (virtual_address == 0)
{
pthread_mutex_unlock(&allocator->mutex);
return 0;
TRACE("Slab allocator is empty, allocating %zu bytes (%zu align) of VA from fallback allocator.\n",
size, alignment);
/* Fall back to slow allocator. */
virtual_address = vkd3d_gpu_va_allocator_allocate_fallback(allocator, size, alignment, ptr);
}
allocation = &allocator->allocations[allocator->allocation_count++];
allocation->base = allocator->floor;
allocation->size = size;
allocation->ptr = ptr;
allocator->floor += size;
pthread_mutex_unlock(&allocator->mutex);
return virtual_address;
}
return allocation->base;
D3D12_GPU_VIRTUAL_ADDRESS vkd3d_gpu_va_allocator_allocate(struct vkd3d_gpu_va_allocator *allocator,
size_t size, size_t alignment, void *ptr)
{
D3D12_GPU_VIRTUAL_ADDRESS virtual_address;
int rc;
size_t aligned_size;
aligned_size = size > alignment ? size : alignment;
if (aligned_size > VKD3D_SLAB_ALLOCATION_SIZE)
{
/* For massive VA allocations, go straight to high-mem with a slower allocator. */
if ((rc = pthread_mutex_lock(&allocator->mutex)))
{
ERR("Failed to lock mutex, error %d.\n", rc);
return 0;
}
virtual_address = vkd3d_gpu_va_allocator_allocate_fallback(allocator, size, alignment, ptr);
pthread_mutex_unlock(&allocator->mutex);
}
else
virtual_address = vkd3d_gpu_va_allocator_allocate_slab(allocator, size, alignment, ptr);
return virtual_address;
}
static int vkd3d_gpu_va_allocation_compare(const void *k, const void *e)
@ -1872,24 +1952,93 @@ static int vkd3d_gpu_va_allocation_compare(const void *k, const void *e)
return 0;
}
static void *vkd3d_gpu_va_allocator_dereference_slab(struct vkd3d_gpu_va_allocator *allocator, D3D12_GPU_VIRTUAL_ADDRESS address)
{
D3D12_GPU_VIRTUAL_ADDRESS base_offset;
uint64_t base_index;
const struct vkd3d_gpu_va_slab_entry *slab;
base_offset = address - VKD3D_BASE_VA_SLAB;
base_index = base_offset >> VKD3D_SLAB_ALLOCATION_SIZE_LOG2;
if (base_index >= VKD3D_MAX_VA_SLAB_ALLOCATIONS)
{
ERR("Accessed slab size class out of range.\n");
return NULL;
}
slab = &allocator->slab_mem_allocations[base_index];
base_offset -= base_index * VKD3D_SLAB_ALLOCATION_SIZE;
if (base_offset >= slab->size)
{
ERR("Accessed slab out of range.\n");
return NULL;
}
return slab->ptr;
}
static void vkd3d_gpu_va_allocator_free_slab(struct vkd3d_gpu_va_allocator *allocator, D3D12_GPU_VIRTUAL_ADDRESS address)
{
D3D12_GPU_VIRTUAL_ADDRESS base_offset;
unsigned base_index;
struct vkd3d_gpu_va_slab_entry *slab;
base_offset = address - VKD3D_BASE_VA_SLAB;
base_index = base_offset >> VKD3D_SLAB_ALLOCATION_SIZE_LOG2;
if (base_index >= VKD3D_MAX_VA_SLAB_ALLOCATIONS)
{
ERR("Accessed slab size class out of range.\n");
return;
}
slab = &allocator->slab_mem_allocations[base_index];
if (slab->ptr == NULL)
{
ERR("Attempting to free NULL VA.\n");
return;
}
if (allocator->mem_vacant_count >= VKD3D_MAX_VA_SLAB_ALLOCATIONS)
{
ERR("Invalid free, slab size class is fully freed.\n");
return;
}
TRACE("Freeing VA: 0x%llx: index %u from slab.\n",
(unsigned long long)address, base_index);
slab->ptr = NULL;
allocator->mem_vacant[allocator->mem_vacant_count++] = base_index;
}
void *vkd3d_gpu_va_allocator_dereference(struct vkd3d_gpu_va_allocator *allocator,
D3D12_GPU_VIRTUAL_ADDRESS address)
{
struct vkd3d_gpu_va_allocation *allocation;
int rc;
if ((rc = pthread_mutex_lock(&allocator->mutex)))
/* If we land in the non-fallback region, dereferencing VA is lockless. The base pointer is immutable,
* and only way we can have a data race is if some other thread is poking into the slab_mem_allocation[class][base_index] block.
* This can only happen if someone is trying to free the entry while we're dereferencing, which would be a serious app bug. */
if (address < VKD3D_BASE_VA_FALLBACK)
{
ERR("Failed to lock mutex, error %d.\n", rc);
return NULL;
return vkd3d_gpu_va_allocator_dereference_slab(allocator, address);
}
else
{
/* Slow fallback. */
if ((rc = pthread_mutex_lock(&allocator->mutex)))
{
ERR("Failed to lock mutex, error %d.\n", rc);
return NULL;
}
allocation = bsearch(&address, allocator->allocations, allocator->allocation_count,
sizeof(*allocation), vkd3d_gpu_va_allocation_compare);
allocation = bsearch(&address, allocator->fallback_mem_allocations, allocator->fallback_mem_allocation_count,
sizeof(*allocation), vkd3d_gpu_va_allocation_compare);
pthread_mutex_unlock(&allocator->mutex);
return allocation ? allocation->ptr : NULL;
pthread_mutex_unlock(&allocator->mutex);
return allocation ? allocation->ptr : NULL;
}
}
void vkd3d_gpu_va_allocator_free(struct vkd3d_gpu_va_allocator *allocator, D3D12_GPU_VIRTUAL_ADDRESS address)
@ -1904,16 +2053,23 @@ void vkd3d_gpu_va_allocator_free(struct vkd3d_gpu_va_allocator *allocator, D3D12
return;
}
allocation = bsearch(&address, allocator->allocations, allocator->allocation_count,
sizeof(*allocation), vkd3d_gpu_va_allocation_compare);
if (allocation && allocation->base == address)
if (address < VKD3D_BASE_VA_FALLBACK)
{
index = allocation - allocator->allocations;
--allocator->allocation_count;
if (index != allocator->allocation_count)
vkd3d_gpu_va_allocator_free_slab(allocator, address);
}
else
{
allocation = bsearch(&address, allocator->fallback_mem_allocations, allocator->fallback_mem_allocation_count,
sizeof(*allocation), vkd3d_gpu_va_allocation_compare);
if (allocation && allocation->base == address)
{
memmove(&allocator->allocations[index], &allocator->allocations[index + 1],
(allocator->allocation_count - index) * sizeof(*allocation));
index = allocation - allocator->fallback_mem_allocations;
--allocator->fallback_mem_allocation_count;
if (index != allocator->fallback_mem_allocation_count)
{
memmove(&allocator->fallback_mem_allocations[index], &allocator->fallback_mem_allocations[index + 1],
(allocator->fallback_mem_allocation_count - index) * sizeof(*allocation));
}
}
}
@ -1923,29 +2079,59 @@ void vkd3d_gpu_va_allocator_free(struct vkd3d_gpu_va_allocator *allocator, D3D12
static bool vkd3d_gpu_va_allocator_init(struct vkd3d_gpu_va_allocator *allocator)
{
int rc;
int i;
memset(allocator, 0, sizeof(*allocator));
allocator->floor = 0x1000;
allocator->fallback_mem_floor = VKD3D_BASE_VA_FALLBACK;
/* To remain lock-less, we cannot grow these lists after the fact. If we commit to a maximum number of allocations
* here, we can dereference without taking a lock as the base pointer never changes.
* We would be able to grow more seamlessly using an array of pointers,
* but would make dereferencing slightly less efficient. */
allocator->slab_mem_allocations = vkd3d_calloc(VKD3D_MAX_VA_SLAB_ALLOCATIONS, sizeof(*allocator->slab_mem_allocations));
if (!allocator->slab_mem_allocations)
goto error;
/* Otherwise we need 32-bit indices. */
assert(VKD3D_MAX_VA_SLAB_ALLOCATIONS <= 64 * 1024);
allocator->mem_vacant = vkd3d_malloc(VKD3D_MAX_VA_SLAB_ALLOCATIONS * sizeof(uint16_t));
if (!allocator->mem_vacant)
goto error;
/* Build a stack of which slab indices are available for allocation.
* Place lowest indices last (first to be popped off stack). */
for (i = 0; i < VKD3D_MAX_VA_SLAB_ALLOCATIONS; i++)
allocator->mem_vacant[i] = (VKD3D_MAX_VA_SLAB_ALLOCATIONS - 1) - i;
allocator->mem_vacant_count = VKD3D_MAX_VA_SLAB_ALLOCATIONS;
if ((rc = pthread_mutex_init(&allocator->mutex, NULL)))
{
ERR("Failed to initialize mutex, error %d.\n", rc);
return false;
goto error;
}
return true;
error:
vkd3d_free(allocator->slab_mem_allocations);
vkd3d_free(allocator->mem_vacant);
return false;
}
static void vkd3d_gpu_va_allocator_cleanup(struct vkd3d_gpu_va_allocator *allocator)
{
int rc;
vkd3d_free(allocator->slab_mem_allocations);
vkd3d_free(allocator->mem_vacant);
if ((rc = pthread_mutex_lock(&allocator->mutex)))
{
ERR("Failed to lock mutex, error %d.\n", rc);
return;
}
vkd3d_free(allocator->allocations);
vkd3d_free(allocator->fallback_mem_allocations);
pthread_mutex_unlock(&allocator->mutex);
pthread_mutex_destroy(&allocator->mutex);
}

View File

@ -292,6 +292,8 @@ static ULONG STDMETHODCALLTYPE d3d12_heap_AddRef(ID3D12Heap *iface)
return refcount;
}
static ULONG d3d12_resource_decref(struct d3d12_resource *resource);
static void d3d12_heap_destroy(struct d3d12_heap *heap)
{
struct d3d12_device *device = heap->device;
@ -299,6 +301,9 @@ static void d3d12_heap_destroy(struct d3d12_heap *heap)
TRACE("Destroying heap %p.\n", heap);
if (heap->buffer_resource)
d3d12_resource_decref(heap->buffer_resource);
vkd3d_private_store_destroy(&heap->private_store);
VK_CALL(vkFreeMemory(device->vk_device, heap->vk_memory, NULL));
@ -539,6 +544,12 @@ static HRESULT validate_heap_desc(const D3D12_HEAP_DESC *desc, const struct d3d1
return S_OK;
}
static HRESULT d3d12_resource_create(struct d3d12_device *device,
const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags,
const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state,
const D3D12_CLEAR_VALUE *optimized_clear_value, bool placed,
struct d3d12_resource **resource);
static HRESULT d3d12_heap_init(struct d3d12_heap *heap,
struct d3d12_device *device, const D3D12_HEAP_DESC *desc, const struct d3d12_resource *resource)
{
@ -546,6 +557,10 @@ static HRESULT d3d12_heap_init(struct d3d12_heap *heap,
VkDeviceSize vk_memory_size;
HRESULT hr;
int rc;
bool buffers_allowed;
D3D12_RESOURCE_DESC resource_desc;
D3D12_RESOURCE_STATES initial_resource_state;
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
heap->ID3D12Heap_iface.lpVtbl = &d3d12_heap_vtbl;
heap->refcount = 1;
@ -556,6 +571,7 @@ static HRESULT d3d12_heap_init(struct d3d12_heap *heap,
heap->map_ptr = NULL;
heap->map_count = 0;
heap->buffer_resource = NULL;
if (!heap->desc.Properties.CreationNodeMask)
heap->desc.Properties.CreationNodeMask = 1;
@ -583,6 +599,53 @@ static HRESULT d3d12_heap_init(struct d3d12_heap *heap,
return hr;
}
buffers_allowed = !(heap->desc.Flags & D3D12_HEAP_FLAG_DENY_BUFFERS);
if (buffers_allowed && !resource)
{
/* Create a single omnipotent buffer which fills the entire heap.
* Whenever we place buffer resources on this heap, we'll just offset this VkBuffer.
* This allows us to keep VA space somewhat sane, and keeps number of (limited) VA allocations down.
* One possible downside is that the buffer might be slightly slower to access,
* but D3D12 has very lenient usage flags for buffers. */
memset(&resource_desc, 0, sizeof(resource_desc));
resource_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
resource_desc.Width = desc->SizeInBytes;
resource_desc.Height = 1;
resource_desc.DepthOrArraySize = 1;
resource_desc.MipLevels = 1;
resource_desc.SampleDesc.Count = 1;
resource_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
switch (desc->Properties.Type)
{
case D3D12_HEAP_TYPE_UPLOAD:
initial_resource_state = D3D12_RESOURCE_STATE_GENERIC_READ;
break;
case D3D12_HEAP_TYPE_READBACK:
initial_resource_state = D3D12_RESOURCE_STATE_COPY_DEST;
break;
default:
/* Upload and readback heaps do not allow UAV access, only enable this flag for other heaps. */
resource_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
initial_resource_state = D3D12_RESOURCE_STATE_COMMON;
break;
}
if (FAILED(hr = d3d12_resource_create(device, &desc->Properties, desc->Flags,
&resource_desc, initial_resource_state,
NULL, false, &heap->buffer_resource)))
{
heap->buffer_resource = NULL;
return hr;
}
/* This internal resource should not own a reference on the device.
* d3d12_resource_create takes a reference on the device. */
d3d12_device_release(device);
}
if (resource)
{
if (d3d12_resource_is_buffer(resource))
@ -600,12 +663,19 @@ static HRESULT d3d12_heap_init(struct d3d12_heap *heap,
heap->desc.SizeInBytes = vk_memory_size;
}
else if (heap->buffer_resource)
{
hr = vkd3d_allocate_buffer_memory(device, heap->buffer_resource->u.vk_buffer,
&heap->desc.Properties, heap->desc.Flags,
&heap->vk_memory, &heap->vk_memory_type, &vk_memory_size);
}
else
{
/* Allocate generic memory which should hopefully match up with whatever resources
* we want to place here. */
memory_requirements.size = heap->desc.SizeInBytes;
memory_requirements.alignment = heap->desc.Alignment;
memory_requirements.memoryTypeBits = ~(uint32_t)0;
hr = vkd3d_allocate_device_memory(device, &heap->desc.Properties,
heap->desc.Flags, &memory_requirements, NULL,
&heap->vk_memory, &heap->vk_memory_type);
@ -614,6 +684,11 @@ static HRESULT d3d12_heap_init(struct d3d12_heap *heap,
{
vkd3d_private_store_destroy(&heap->private_store);
pthread_mutex_destroy(&heap->mutex);
if (heap->buffer_resource)
{
d3d12_resource_decref(heap->buffer_resource);
heap->buffer_resource = NULL;
}
return hr;
}
@ -1003,13 +1078,16 @@ static void d3d12_resource_destroy(struct d3d12_resource *resource, struct d3d12
if (resource->flags & VKD3D_RESOURCE_EXTERNAL)
return;
if (resource->gpu_address)
vkd3d_gpu_va_allocator_free(&device->gpu_va_allocator, resource->gpu_address);
if (!(resource->flags & VKD3D_RESOURCE_PLACED_BUFFER))
{
if (resource->gpu_address)
vkd3d_gpu_va_allocator_free(&device->gpu_va_allocator, resource->gpu_address);
if (d3d12_resource_is_buffer(resource))
VK_CALL(vkDestroyBuffer(device->vk_device, resource->u.vk_buffer, NULL));
else
VK_CALL(vkDestroyImage(device->vk_device, resource->u.vk_image, NULL));
if (d3d12_resource_is_buffer(resource))
VK_CALL(vkDestroyBuffer(device->vk_device, resource->u.vk_buffer, NULL));
else
VK_CALL(vkDestroyImage(device->vk_device, resource->u.vk_image, NULL));
}
if (resource->flags & VKD3D_RESOURCE_DEDICATED_HEAP)
d3d12_heap_destroy(resource->heap);
@ -1669,7 +1747,7 @@ static bool d3d12_resource_validate_heap_properties(const struct d3d12_resource
static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12_device *device,
const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags,
const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state,
const D3D12_CLEAR_VALUE *optimized_clear_value)
const D3D12_CLEAR_VALUE *optimized_clear_value, bool placed)
{
HRESULT hr;
@ -1699,6 +1777,8 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12
resource->gpu_address = 0;
resource->flags = 0;
if (placed && desc->Dimension == D3D12_RESOURCE_DIMENSION_BUFFER)
resource->flags |= VKD3D_RESOURCE_PLACED_BUFFER;
if (FAILED(hr = d3d12_resource_validate_desc(&resource->desc)))
return hr;
@ -1706,11 +1786,18 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12
switch (desc->Dimension)
{
case D3D12_RESOURCE_DIMENSION_BUFFER:
/* We'll inherit a VkBuffer reference from the heap with an implied offset. */
if (placed)
{
resource->u.vk_buffer = VK_NULL_HANDLE;
break;
}
if (FAILED(hr = vkd3d_create_buffer(device, heap_properties, heap_flags,
&resource->desc, &resource->u.vk_buffer)))
return hr;
if (!(resource->gpu_address = vkd3d_gpu_va_allocator_allocate(&device->gpu_va_allocator,
desc->Width, resource)))
desc->Width, D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT, resource)))
{
ERR("Failed to allocate GPU VA.\n");
d3d12_resource_destroy(resource, device);
@ -1755,7 +1842,7 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12
static HRESULT d3d12_resource_create(struct d3d12_device *device,
const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags,
const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state,
const D3D12_CLEAR_VALUE *optimized_clear_value, struct d3d12_resource **resource)
const D3D12_CLEAR_VALUE *optimized_clear_value, bool placed, struct d3d12_resource **resource)
{
struct d3d12_resource *object;
HRESULT hr;
@ -1764,7 +1851,7 @@ static HRESULT d3d12_resource_create(struct d3d12_device *device,
return E_OUTOFMEMORY;
if (FAILED(hr = d3d12_resource_init(object, device, heap_properties, heap_flags,
desc, initial_state, optimized_clear_value)))
desc, initial_state, optimized_clear_value, placed)))
{
vkd3d_free(object);
return hr;
@ -1806,7 +1893,7 @@ HRESULT d3d12_committed_resource_create(struct d3d12_device *device,
}
if (FAILED(hr = d3d12_resource_create(device, heap_properties, heap_flags,
desc, initial_state, optimized_clear_value, &object)))
desc, initial_state, optimized_clear_value, false, &object)))
return hr;
if (FAILED(hr = vkd3d_allocate_resource_memory(device, object, heap_properties, heap_flags)))
@ -1830,6 +1917,16 @@ static HRESULT vkd3d_bind_heap_memory(struct d3d12_device *device,
VkMemoryRequirements requirements;
VkResult vr;
if (resource->flags & VKD3D_RESOURCE_PLACED_BUFFER)
{
/* Just inherit the buffer from the heap. */
resource->u.vk_buffer = heap->buffer_resource->u.vk_buffer;
resource->heap = heap;
resource->heap_offset = heap_offset;
resource->gpu_address = heap->buffer_resource->gpu_address + heap_offset;
return S_OK;
}
if (d3d12_resource_is_buffer(resource))
VK_CALL(vkGetBufferMemoryRequirements(vk_device, resource->u.vk_buffer, &requirements));
else
@ -1879,7 +1976,7 @@ HRESULT d3d12_placed_resource_create(struct d3d12_device *device, struct d3d12_h
HRESULT hr;
if (FAILED(hr = d3d12_resource_create(device, &heap->desc.Properties, heap->desc.Flags,
desc, initial_state, optimized_clear_value, &object)))
desc, initial_state, optimized_clear_value, true, &object)))
return hr;
if (FAILED(hr = vkd3d_bind_heap_memory(device, object, heap, heap_offset)))
@ -1903,7 +2000,7 @@ HRESULT d3d12_reserved_resource_create(struct d3d12_device *device,
HRESULT hr;
if (FAILED(hr = d3d12_resource_create(device, NULL, 0,
desc, initial_state, optimized_clear_value, &object)))
desc, initial_state, optimized_clear_value, false, &object)))
return hr;
TRACE("Created reserved resource %p.\n", object);
@ -2205,7 +2302,7 @@ static bool vkd3d_create_buffer_view_for_resource(struct d3d12_device *device,
assert(d3d12_resource_is_buffer(resource));
return vkd3d_create_buffer_view(device, resource->u.vk_buffer,
format, offset * element_size, size * element_size, view);
format, resource->heap_offset + offset * element_size, size * element_size, view);
}
static void vkd3d_set_view_swizzle_for_format(VkComponentMapping *components,
@ -2807,7 +2904,7 @@ static void vkd3d_create_buffer_uav(struct d3d12_desc *descriptor, struct d3d12_
format = vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false);
if (!vkd3d_create_vk_buffer_view(device, counter_resource->u.vk_buffer, format,
desc->u.Buffer.CounterOffsetInBytes, sizeof(uint32_t), &view->vk_counter_view))
desc->u.Buffer.CounterOffsetInBytes + resource->heap_offset, sizeof(uint32_t), &view->vk_counter_view))
{
WARN("Failed to create counter buffer view.\n");
view->vk_counter_view = VK_NULL_HANDLE;
@ -2913,12 +3010,18 @@ bool vkd3d_create_raw_buffer_view(struct d3d12_device *device,
{
const struct vkd3d_format *format;
struct d3d12_resource *resource;
uint64_t range;
uint64_t offset;
format = vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false);
resource = vkd3d_gpu_va_allocator_dereference(&device->gpu_va_allocator, gpu_address);
assert(d3d12_resource_is_buffer(resource));
offset = gpu_address - resource->gpu_address;
range = min(resource->desc.Width - offset, device->vk_info.device_limits.maxStorageBufferRange);
return vkd3d_create_vk_buffer_view(device, resource->u.vk_buffer, format,
gpu_address - resource->gpu_address, VK_WHOLE_SIZE, vk_buffer_view);
offset, range, vk_buffer_view);
}
/* samplers */

View File

@ -737,21 +737,43 @@ static HRESULT vkd3d_create_descriptor_set_layout(struct d3d12_device *device,
VkDescriptorSetLayoutCreateFlags flags, unsigned int binding_count,
const VkDescriptorSetLayoutBinding *bindings, VkDescriptorSetLayout *set_layout)
{
unsigned int i;
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
VkDescriptorSetLayoutCreateInfo set_desc;
VkResult vr;
VkDescriptorSetLayoutBindingFlagsCreateInfoEXT flags_info;
VkDescriptorBindingFlagsEXT *binding_flags = NULL;
set_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
set_desc.pNext = NULL;
set_desc.flags = flags;
set_desc.bindingCount = binding_count;
set_desc.pBindings = bindings;
if (!(flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR) && device->vk_info.EXT_descriptor_indexing)
{
set_desc.flags |= VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT_EXT;
flags_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO_EXT;
flags_info.pNext = NULL;
flags_info.bindingCount = binding_count;
binding_flags = vkd3d_malloc(sizeof(*binding_flags) * binding_count);
for (i = 0; i < binding_count; i++)
{
binding_flags[i] = VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT_EXT |
VK_DESCRIPTOR_BINDING_UPDATE_UNUSED_WHILE_PENDING_BIT_EXT;
}
flags_info.pBindingFlags = binding_flags;
set_desc.pNext = &flags_info;
}
if ((vr = VK_CALL(vkCreateDescriptorSetLayout(device->vk_device, &set_desc, NULL, set_layout))) < 0)
{
WARN("Failed to create Vulkan descriptor set layout, vr %d.\n", vr);
vkd3d_free(binding_flags);
return hresult_from_vk_result(vr);
}
vkd3d_free(binding_flags);
return S_OK;
}

View File

@ -202,24 +202,35 @@ HRESULT vkd3d_fence_worker_start(struct vkd3d_fence_worker *worker,
HRESULT vkd3d_fence_worker_stop(struct vkd3d_fence_worker *worker,
struct d3d12_device *device) DECLSPEC_HIDDEN;
struct vkd3d_gpu_va_allocation
{
D3D12_GPU_VIRTUAL_ADDRESS base;
SIZE_T size;
void *ptr;
};
struct vkd3d_gpu_va_slab_entry
{
void *ptr;
SIZE_T size;
};
struct vkd3d_gpu_va_allocator
{
pthread_mutex_t mutex;
D3D12_GPU_VIRTUAL_ADDRESS floor;
struct vkd3d_gpu_va_slab_entry *slab_mem_allocations;
uint16_t *mem_vacant;
size_t mem_vacant_count;
struct vkd3d_gpu_va_allocation
{
D3D12_GPU_VIRTUAL_ADDRESS base;
SIZE_T size;
void *ptr;
} *allocations;
size_t allocations_size;
size_t allocation_count;
struct vkd3d_gpu_va_allocation *fallback_mem_allocations;
size_t fallback_mem_allocations_size;
size_t fallback_mem_allocation_count;
D3D12_GPU_VIRTUAL_ADDRESS fallback_mem_floor;
};
D3D12_GPU_VIRTUAL_ADDRESS vkd3d_gpu_va_allocator_allocate(struct vkd3d_gpu_va_allocator *allocator,
size_t size, void *ptr) DECLSPEC_HIDDEN;
size_t size, size_t alignment, void *ptr) DECLSPEC_HIDDEN;
void *vkd3d_gpu_va_allocator_dereference(struct vkd3d_gpu_va_allocator *allocator,
D3D12_GPU_VIRTUAL_ADDRESS address) DECLSPEC_HIDDEN;
void vkd3d_gpu_va_allocator_free(struct vkd3d_gpu_va_allocator *allocator,
@ -369,6 +380,7 @@ struct d3d12_heap
unsigned int map_count;
uint32_t vk_memory_type;
struct d3d12_resource *buffer_resource;
struct d3d12_device *device;
struct vkd3d_private_store private_store;
@ -383,6 +395,7 @@ struct d3d12_heap *unsafe_impl_from_ID3D12Heap(ID3D12Heap *iface) DECLSPEC_HIDDE
#define VKD3D_RESOURCE_EXTERNAL 0x00000004
#define VKD3D_RESOURCE_DEDICATED_HEAP 0x00000008
#define VKD3D_RESOURCE_LINEAR_TILING 0x00000010
#define VKD3D_RESOURCE_PLACED_BUFFER 0x00000020
/* ID3D12Resource */
struct d3d12_resource
@ -875,13 +888,16 @@ struct vkd3d_pipeline_bindings
const struct d3d12_root_signature *root_signature;
VkDescriptorSet descriptor_set;
VkDescriptorSet uav_counter_descriptor_set;
VkDescriptorSet descriptor_set_bound;
VkDescriptorSet uav_counter_descriptor_set_bound;
bool in_use;
bool uav_counter_in_use;
D3D12_GPU_DESCRIPTOR_HANDLE descriptor_tables[D3D12_MAX_ROOT_COST];
uint64_t descriptor_table_dirty_mask;
uint64_t descriptor_table_active_mask;
VkBufferView vk_uav_counter_views[VKD3D_SHADER_MAX_UNORDERED_ACCESS_VIEWS];
uint8_t uav_counter_dirty_mask;
/* Needed when VK_KHR_push_descriptor is not available. */
@ -890,6 +906,16 @@ struct vkd3d_pipeline_bindings
uint32_t push_descriptor_active_mask;
};
struct d3d12_deferred_descriptor_set_update
{
const struct d3d12_desc *base_descriptor;
unsigned int index;
const struct d3d12_root_signature *root_signature;
VkDescriptorSet descriptor_set;
bool uav;
};
/* ID3D12CommandList */
struct d3d12_command_list
{
@ -933,6 +959,10 @@ struct d3d12_command_list
VkBuffer so_counter_buffers[D3D12_SO_BUFFER_SLOT_COUNT];
VkDeviceSize so_counter_buffer_offsets[D3D12_SO_BUFFER_SLOT_COUNT];
struct d3d12_deferred_descriptor_set_update *descriptor_updates;
size_t descriptor_updates_size;
size_t descriptor_updates_count;
struct vkd3d_private_store private_store;
};

View File

@ -15708,7 +15708,7 @@ static void test_update_descriptor_heap_after_closing_command_list(void)
D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
get_texture_readback_with_command_list(context.render_target, 0, &rb, queue, command_list);
value = get_readback_uint(&rb, 0, 0, 0);
todo ok(value == 0xff00ff00, "Got unexpected value %#x.\n", value);
ok(value == 0xff00ff00, "Got unexpected value %#x.\n", value);
release_resource_readback(&rb);
ID3D12DescriptorHeap_Release(cpu_heap);