Compare commits
5 Commits
master
...
descriptor
Author | SHA1 | Date |
---|---|---|
Hans-Kristian Arntzen | f74705b11a | |
Hans-Kristian Arntzen | ad15a7eb01 | |
Hans-Kristian Arntzen | 02d9b6c61c | |
Hans-Kristian Arntzen | b6aac42aa6 | |
Hans-Kristian Arntzen | be3b44b01c |
|
@ -311,4 +311,19 @@ static inline uint64_t vkd3d_get_current_time_ns(void)
|
|||
#endif
|
||||
}
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma intrinsic(__rdtsc)
|
||||
#endif
|
||||
|
||||
static inline uint64_t vkd3d_get_current_time_ticks(void)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
return __rdtsc();
|
||||
#elif defined(__i386__) || defined(__x86_64__)
|
||||
return __builtin_ia32_rdtsc();
|
||||
#else
|
||||
return vkd3d_get_current_time_ns();
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif /* __VKD3D_COMMON_H */
|
||||
|
|
|
@ -25,13 +25,6 @@
|
|||
|
||||
#ifdef VKD3D_ENABLE_PROFILING
|
||||
|
||||
#ifdef _WIN32
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
#else
|
||||
#include <time.h>
|
||||
#endif
|
||||
|
||||
void vkd3d_init_profiling(void);
|
||||
bool vkd3d_uses_profiling(void);
|
||||
unsigned int vkd3d_profiling_register_region(const char *name, spinlock_t *lock, uint32_t *latch);
|
||||
|
@ -48,12 +41,12 @@ void vkd3d_profiling_notify_work(unsigned int index, uint64_t start_ticks, uint6
|
|||
do { \
|
||||
if (!(_vkd3d_region_index_##name = vkd3d_atomic_uint32_load_explicit(&_vkd3d_region_latch_##name, vkd3d_memory_order_acquire))) \
|
||||
_vkd3d_region_index_##name = vkd3d_profiling_register_region(#name, &_vkd3d_region_lock_##name, &_vkd3d_region_latch_##name); \
|
||||
_vkd3d_region_begin_tick_##name = vkd3d_get_current_time_ns(); \
|
||||
_vkd3d_region_begin_tick_##name = vkd3d_get_current_time_ticks(); \
|
||||
} while(0)
|
||||
|
||||
#define VKD3D_REGION_END_ITERATIONS(name, iter) \
|
||||
do { \
|
||||
_vkd3d_region_end_tick_##name = vkd3d_get_current_time_ns(); \
|
||||
_vkd3d_region_end_tick_##name = vkd3d_get_current_time_ticks(); \
|
||||
vkd3d_profiling_notify_work(_vkd3d_region_index_##name, _vkd3d_region_begin_tick_##name, _vkd3d_region_end_tick_##name, iter); \
|
||||
} while(0)
|
||||
|
||||
|
|
|
@ -12413,6 +12413,8 @@ static void *d3d12_command_queue_submission_worker_main(void *userdata)
|
|||
memmove(queue->submissions, queue->submissions + 1, queue->submissions_count * sizeof(submission));
|
||||
pthread_mutex_unlock(&queue->queue_lock);
|
||||
|
||||
vkd3d_descriptor_update_ring_flush(&queue->device->descriptor_update_ring, queue->device);
|
||||
|
||||
switch (submission.type)
|
||||
{
|
||||
case VKD3D_SUBMISSION_STOP:
|
||||
|
|
|
@ -2875,6 +2875,8 @@ static void d3d12_device_destroy(struct d3d12_device *device)
|
|||
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
||||
size_t i, j;
|
||||
|
||||
vkd3d_descriptor_update_ring_cleanup(&device->descriptor_update_ring, device);
|
||||
|
||||
for (i = 0; i < VKD3D_SCRATCH_POOL_KIND_COUNT; i++)
|
||||
for (j = 0; j < device->scratch_pools[i].scratch_buffer_count; j++)
|
||||
d3d12_device_destroy_scratch_buffer(device, &device->scratch_pools[i].scratch_buffers[j]);
|
||||
|
@ -4008,10 +4010,20 @@ static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(d3d12_device
|
|||
const D3D12_CONSTANT_BUFFER_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor)
|
||||
{
|
||||
struct d3d12_device *device = impl_from_ID3D12Device(iface);
|
||||
vkd3d_cpu_descriptor_va_t src = 0, dst = 0;
|
||||
|
||||
TRACE("iface %p, desc %p, descriptor %#lx.\n", iface, desc, descriptor.ptr);
|
||||
|
||||
if (descriptor.ptr & VKD3D_RESOURCE_DESC_DEFER_COPY_MASK)
|
||||
{
|
||||
dst = descriptor.ptr;
|
||||
descriptor.ptr = src = vkd3d_descriptor_update_ring_allocate_scratch(&device->descriptor_update_ring);
|
||||
}
|
||||
|
||||
d3d12_desc_create_cbv(descriptor.ptr, device, desc);
|
||||
|
||||
if (dst)
|
||||
vkd3d_descriptor_update_ring_push_copy(&device->descriptor_update_ring, device, src, dst);
|
||||
}
|
||||
|
||||
static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(d3d12_device_iface *iface,
|
||||
|
@ -4019,11 +4031,21 @@ static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(d3d12_device
|
|||
D3D12_CPU_DESCRIPTOR_HANDLE descriptor)
|
||||
{
|
||||
struct d3d12_device *device = impl_from_ID3D12Device(iface);
|
||||
vkd3d_cpu_descriptor_va_t src = 0, dst = 0;
|
||||
|
||||
TRACE("iface %p, resource %p, desc %p, descriptor %#lx.\n",
|
||||
iface, resource, desc, descriptor.ptr);
|
||||
|
||||
if (descriptor.ptr & VKD3D_RESOURCE_DESC_DEFER_COPY_MASK)
|
||||
{
|
||||
dst = descriptor.ptr;
|
||||
descriptor.ptr = src = vkd3d_descriptor_update_ring_allocate_scratch(&device->descriptor_update_ring);
|
||||
}
|
||||
|
||||
d3d12_desc_create_srv(descriptor.ptr, device, impl_from_ID3D12Resource(resource), desc);
|
||||
|
||||
if (dst)
|
||||
vkd3d_descriptor_update_ring_push_copy(&device->descriptor_update_ring, device, src, dst);
|
||||
}
|
||||
|
||||
VKD3D_THREAD_LOCAL struct D3D12_UAV_INFO *d3d12_uav_info = NULL;
|
||||
|
@ -4035,12 +4057,19 @@ static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(d3d12_devic
|
|||
VkImageViewAddressPropertiesNVX out_info = { VK_STRUCTURE_TYPE_IMAGE_VIEW_ADDRESS_PROPERTIES_NVX };
|
||||
VkImageViewHandleInfoNVX imageViewHandleInfo = { VK_STRUCTURE_TYPE_IMAGE_VIEW_HANDLE_INFO_NVX };
|
||||
const struct vkd3d_vk_device_procs *vk_procs;
|
||||
vkd3d_cpu_descriptor_va_t src = 0, dst = 0;
|
||||
VkResult vr;
|
||||
struct d3d12_resource *d3d12_resource_ = impl_from_ID3D12Resource(resource);
|
||||
struct d3d12_device *device = impl_from_ID3D12Device(iface);
|
||||
TRACE("iface %p, resource %p, counter_resource %p, desc %p, descriptor %#lx.\n",
|
||||
iface, resource, counter_resource, desc, descriptor.ptr);
|
||||
|
||||
if (descriptor.ptr & VKD3D_RESOURCE_DESC_DEFER_COPY_MASK)
|
||||
{
|
||||
dst = descriptor.ptr;
|
||||
descriptor.ptr = src = vkd3d_descriptor_update_ring_allocate_scratch(&device->descriptor_update_ring);
|
||||
}
|
||||
|
||||
d3d12_desc_create_uav(descriptor.ptr,
|
||||
device, d3d12_resource_,
|
||||
impl_from_ID3D12Resource(counter_resource), desc);
|
||||
|
@ -4067,6 +4096,9 @@ static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(d3d12_devic
|
|||
/* Set this to null so that subsequent calls to this API wont update the previous pointer. */
|
||||
d3d12_uav_info = NULL;
|
||||
}
|
||||
|
||||
if (dst)
|
||||
vkd3d_descriptor_update_ring_push_copy(&device->descriptor_update_ring, device, src, dst);
|
||||
}
|
||||
|
||||
static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(d3d12_device_iface *iface,
|
||||
|
@ -4216,7 +4248,10 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(d3d12_device_if
|
|||
const D3D12_CPU_DESCRIPTOR_HANDLE src_descriptor_range_offset,
|
||||
D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type)
|
||||
{
|
||||
vkd3d_cpu_descriptor_va_t src, dst;
|
||||
struct d3d12_device *device;
|
||||
UINT i;
|
||||
|
||||
TRACE("iface %p, descriptor_count %u, dst_descriptor_range_offset %#lx, "
|
||||
"src_descriptor_range_offset %#lx, descriptor_heap_type %#x.\n",
|
||||
iface, descriptor_count, dst_descriptor_range_offset.ptr, src_descriptor_range_offset.ptr,
|
||||
|
@ -4224,7 +4259,21 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(d3d12_device_if
|
|||
|
||||
device = unsafe_impl_from_ID3D12Device(iface);
|
||||
|
||||
if (descriptor_heap_type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV ||
|
||||
if (descriptor_heap_type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV &&
|
||||
(dst_descriptor_range_offset.ptr & VKD3D_RESOURCE_DESC_DEFER_COPY_MASK))
|
||||
{
|
||||
src = src_descriptor_range_offset.ptr;
|
||||
dst = dst_descriptor_range_offset.ptr;
|
||||
for (i = 0; i < descriptor_count; i++)
|
||||
{
|
||||
vkd3d_descriptor_update_ring_push_copy(&device->descriptor_update_ring,
|
||||
device, src, dst);
|
||||
|
||||
src += VKD3D_RESOURCE_DESC_INCREMENT;
|
||||
dst += VKD3D_RESOURCE_DESC_INCREMENT;
|
||||
}
|
||||
}
|
||||
else if (descriptor_heap_type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV ||
|
||||
descriptor_heap_type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER)
|
||||
{
|
||||
/* Fast and hot path. */
|
||||
|
@ -6229,10 +6278,13 @@ static HRESULT d3d12_device_init(struct d3d12_device *device,
|
|||
if (FAILED(hr = vkd3d_shader_debug_ring_init(&device->debug_ring, device)))
|
||||
goto out_cleanup_meta_ops;
|
||||
|
||||
if (FAILED(hr = vkd3d_descriptor_update_ring_init(&device->descriptor_update_ring, device)))
|
||||
goto out_cleanup_debug_ring;
|
||||
|
||||
#ifdef VKD3D_ENABLE_BREADCRUMBS
|
||||
if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS)
|
||||
if (FAILED(hr = vkd3d_breadcrumb_tracer_init(&device->breadcrumb_tracer, device)))
|
||||
goto out_cleanup_debug_ring;
|
||||
goto out_cleanup_update_ring;
|
||||
#endif
|
||||
|
||||
if (vkd3d_descriptor_debug_active_qa_checks())
|
||||
|
@ -6267,8 +6319,10 @@ out_cleanup_breadcrumb_tracer:
|
|||
#ifdef VKD3D_ENABLE_BREADCRUMBS
|
||||
if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS)
|
||||
vkd3d_breadcrumb_tracer_cleanup(&device->breadcrumb_tracer, device);
|
||||
out_cleanup_debug_ring:
|
||||
out_cleanup_update_ring:
|
||||
#endif
|
||||
vkd3d_descriptor_update_ring_cleanup(&device->descriptor_update_ring, device);
|
||||
out_cleanup_debug_ring:
|
||||
vkd3d_shader_debug_ring_cleanup(&device->debug_ring, device);
|
||||
out_cleanup_meta_ops:
|
||||
vkd3d_meta_ops_cleanup(&device->meta_ops, device);
|
||||
|
|
|
@ -190,6 +190,13 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetCudaSurfaceObject(ID3
|
|||
return E_INVALIDARG;
|
||||
|
||||
device = d3d12_device_from_ID3D12DeviceExt(iface);
|
||||
|
||||
if (uav_handle.ptr & VKD3D_RESOURCE_DESC_DEFER_COPY_MASK)
|
||||
{
|
||||
INFO("Flushing copy queue in place due to weird GetCudaSurfaceObject call!\n");
|
||||
vkd3d_descriptor_update_ring_flush(&device->descriptor_update_ring, device);
|
||||
}
|
||||
|
||||
uav_desc = d3d12_desc_decode_va(uav_handle.ptr);
|
||||
|
||||
imageViewHandleInfo.imageView = uav_desc.view->info.view->vk_image_view;
|
||||
|
|
|
@ -5295,6 +5295,22 @@ static ULONG STDMETHODCALLTYPE d3d12_descriptor_heap_AddRef(ID3D12DescriptorHeap
|
|||
return refcount;
|
||||
}
|
||||
|
||||
void d3d12_descriptor_heap_inc_ref(struct d3d12_descriptor_heap *heap)
|
||||
{
|
||||
InterlockedIncrement(&heap->internal_refcount);
|
||||
}
|
||||
|
||||
void d3d12_descriptor_heap_dec_ref(struct d3d12_descriptor_heap *heap)
|
||||
{
|
||||
ULONG refcount = InterlockedDecrement(&heap->internal_refcount);
|
||||
|
||||
if (!refcount)
|
||||
{
|
||||
d3d12_descriptor_heap_cleanup(heap);
|
||||
vkd3d_free_aligned(heap);
|
||||
}
|
||||
}
|
||||
|
||||
static ULONG STDMETHODCALLTYPE d3d12_descriptor_heap_Release(ID3D12DescriptorHeap *iface)
|
||||
{
|
||||
struct d3d12_descriptor_heap *heap = impl_from_ID3D12DescriptorHeap(iface);
|
||||
|
@ -5305,11 +5321,12 @@ static ULONG STDMETHODCALLTYPE d3d12_descriptor_heap_Release(ID3D12DescriptorHea
|
|||
if (!refcount)
|
||||
{
|
||||
struct d3d12_device *device = heap->device;
|
||||
|
||||
d3d12_descriptor_heap_cleanup(heap);
|
||||
/* There might be pending descriptor copies from our heap, so keep it alive
|
||||
* until we have observed that all possible copies have completed using the current
|
||||
* write_count timestamp. */
|
||||
vkd3d_descriptor_update_ring_mark_heap_destruction(&device->descriptor_update_ring, heap);
|
||||
vkd3d_private_store_destroy(&heap->private_store);
|
||||
vkd3d_free_aligned(heap);
|
||||
|
||||
d3d12_descriptor_heap_dec_ref(heap);
|
||||
d3d12_device_release(device);
|
||||
}
|
||||
|
||||
|
@ -5853,6 +5870,7 @@ static HRESULT d3d12_descriptor_heap_init(struct d3d12_descriptor_heap *descript
|
|||
memset(descriptor_heap, 0, sizeof(*descriptor_heap));
|
||||
descriptor_heap->ID3D12DescriptorHeap_iface.lpVtbl = &d3d12_descriptor_heap_vtbl;
|
||||
descriptor_heap->refcount = 1;
|
||||
descriptor_heap->internal_refcount = 1;
|
||||
descriptor_heap->device = device;
|
||||
descriptor_heap->desc = *desc;
|
||||
|
||||
|
@ -6005,6 +6023,18 @@ HRESULT d3d12_descriptor_heap_create(struct d3d12_device *device,
|
|||
{
|
||||
/* See comments above on how this is supposed to work */
|
||||
object->cpu_va.ptr = (SIZE_T)object + num_descriptor_bits;
|
||||
|
||||
/* FIXME: This is gross. We need to repurpose some of the lower order bits to make this robust. */
|
||||
if (object->cpu_va.ptr & VKD3D_RESOURCE_DESC_DEFER_COPY_MASK)
|
||||
{
|
||||
FIXME("High bit in CPU va is set ...\n");
|
||||
vkd3d_free_aligned(object);
|
||||
return E_OUTOFMEMORY;
|
||||
}
|
||||
|
||||
if (desc->Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV &&
|
||||
(desc->Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE))
|
||||
object->cpu_va.ptr |= VKD3D_RESOURCE_DESC_DEFER_COPY_MASK;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -6044,6 +6074,172 @@ void d3d12_descriptor_heap_cleanup(struct d3d12_descriptor_heap *descriptor_heap
|
|||
vkd3d_descriptor_debug_unregister_heap(descriptor_heap->cookie);
|
||||
}
|
||||
|
||||
HRESULT vkd3d_descriptor_update_ring_init(struct vkd3d_descriptor_update_ring *ring,
|
||||
struct d3d12_device *device)
|
||||
{
|
||||
D3D12_DESCRIPTOR_HEAP_DESC desc;
|
||||
HRESULT hr;
|
||||
|
||||
desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
|
||||
desc.NumDescriptors = VKD3D_DESCRIPTOR_UPDATE_RING_SIZE;
|
||||
desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE;
|
||||
desc.NodeMask = 0;
|
||||
|
||||
if (FAILED(hr = d3d12_descriptor_heap_create(device, &desc, &ring->staging)))
|
||||
return hr;
|
||||
|
||||
/* Hold private reference. */
|
||||
d3d12_descriptor_heap_inc_ref(ring->staging);
|
||||
d3d12_descriptor_heap_Release(&ring->staging->ID3D12DescriptorHeap_iface);
|
||||
|
||||
ring->staging_base = ring->staging->cpu_va.ptr;
|
||||
ring->cbv_srv_uav_copies = vkd3d_calloc(VKD3D_DESCRIPTOR_UPDATE_RING_SIZE, sizeof(*ring->cbv_srv_uav_copies));
|
||||
|
||||
pthread_mutex_init(&ring->submission_lock, NULL);
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
void vkd3d_descriptor_update_ring_cleanup(struct vkd3d_descriptor_update_ring *ring,
|
||||
struct d3d12_device *device)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
d3d12_descriptor_heap_dec_ref(ring->staging);
|
||||
pthread_mutex_destroy(&ring->submission_lock);
|
||||
vkd3d_free(ring->cbv_srv_uav_copies);
|
||||
|
||||
for (i = 0; i < ring->deferred_release_count; i++)
|
||||
d3d12_descriptor_heap_dec_ref(ring->deferred_releases[i].heap);
|
||||
vkd3d_free(ring->deferred_releases);
|
||||
}
|
||||
|
||||
vkd3d_cpu_descriptor_va_t vkd3d_descriptor_update_ring_allocate_scratch(struct vkd3d_descriptor_update_ring *ring)
|
||||
{
|
||||
/* FIXME: This can in theory overflow before we've read all entries. If we start to fill the buffer,
|
||||
* we can do an emergency stall, but it should basically never happen unless application
|
||||
* is specifically trying to break us. We can read the read_count. */
|
||||
uint32_t offset = vkd3d_atomic_uint32_increment(&ring->staging_write_count, vkd3d_memory_order_relaxed) - 1;
|
||||
offset &= VKD3D_DESCRIPTOR_UPDATE_RING_MASK;
|
||||
return ring->staging_base + offset * VKD3D_RESOURCE_DESC_INCREMENT;
|
||||
}
|
||||
|
||||
void vkd3d_descriptor_update_ring_push_copy(struct vkd3d_descriptor_update_ring *ring,
|
||||
struct d3d12_device *device,
|
||||
vkd3d_cpu_descriptor_va_t src_, vkd3d_cpu_descriptor_va_t dst_)
|
||||
{
|
||||
/* FIXME: Have to handle the case where we're overflowing the buffers. */
|
||||
struct vkd3d_descriptor_update_ring_copy *copy;
|
||||
void *src = (void*)src_;
|
||||
void *dst = (void*)dst_;
|
||||
uint32_t write_count;
|
||||
uint32_t read_count;
|
||||
uint32_t offset;
|
||||
|
||||
/* Detect internal overflow. When we've filled half the buffer it's getting spicy,
|
||||
* so perform an emergency flush.
|
||||
* This should only happen in code that tries hard to break us. */
|
||||
write_count = vkd3d_atomic_uint32_load_explicit(&ring->write_count, vkd3d_memory_order_relaxed);
|
||||
read_count = vkd3d_atomic_uint32_load_explicit(&ring->read_count, vkd3d_memory_order_relaxed);
|
||||
if ((write_count - read_count) > VKD3D_DESCRIPTOR_UPDATE_RING_SIZE / 2)
|
||||
{
|
||||
INFO("Emergency descriptor update ring flush in render threads!\n");
|
||||
vkd3d_descriptor_update_ring_flush(ring, device);
|
||||
}
|
||||
|
||||
write_count = vkd3d_atomic_uint32_increment(&ring->write_count, vkd3d_memory_order_relaxed) - 1;
|
||||
offset = write_count & VKD3D_DESCRIPTOR_UPDATE_RING_MASK;
|
||||
copy = &ring->cbv_srv_uav_copies[offset];
|
||||
|
||||
/* There is a race here where submission threads may observe that the write count has increased, but application
|
||||
* has not completely written the descriptor copy write yet.
|
||||
* Submission will stop processing in this case. Next iteration, the read count will start working on the first failed descriptor.
|
||||
* All descriptor updates which happened before the ExecuteCommandLists
|
||||
* is guaranteed to be completely written, so the only edge case occurs if ExecuteCommandLists is called
|
||||
* concurrently with descriptor updates, but it's not guaranteed that ExecuteCommandLists would observe those descriptors
|
||||
* in the first place. The only real memory order we need to consider is release semantic on the dst write.
|
||||
* Submission thread will read dst first with acquire order, then src. */
|
||||
vkd3d_atomic_ptr_store_explicit(©->src, src, vkd3d_memory_order_relaxed);
|
||||
vkd3d_atomic_ptr_store_explicit(©->dst, dst, vkd3d_memory_order_release);
|
||||
}
|
||||
|
||||
void vkd3d_descriptor_update_ring_mark_heap_destruction(struct vkd3d_descriptor_update_ring *ring,
|
||||
struct d3d12_descriptor_heap *heap)
|
||||
{
|
||||
uint32_t write_count;
|
||||
|
||||
if (heap->desc.Type != D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV)
|
||||
return;
|
||||
|
||||
write_count = vkd3d_atomic_uint32_load_explicit(&ring->write_count, vkd3d_memory_order_relaxed);
|
||||
d3d12_descriptor_heap_inc_ref(heap);
|
||||
|
||||
pthread_mutex_lock(&ring->submission_lock);
|
||||
vkd3d_array_reserve((void**)&ring->deferred_releases, &ring->deferred_release_size,
|
||||
ring->deferred_release_count + 1, sizeof(*ring->deferred_releases));
|
||||
ring->deferred_releases[ring->deferred_release_count].heap = heap;
|
||||
ring->deferred_releases[ring->deferred_release_count].write_count = write_count;
|
||||
ring->deferred_release_count++;
|
||||
pthread_mutex_unlock(&ring->submission_lock);
|
||||
}
|
||||
|
||||
void vkd3d_descriptor_update_ring_flush(struct vkd3d_descriptor_update_ring *ring,
|
||||
struct d3d12_device *device)
|
||||
{
|
||||
/* This is called from submission threads and perform the main "meat" of the descriptor management. */
|
||||
|
||||
uint32_t write_count = vkd3d_atomic_uint32_load_explicit(&ring->write_count, vkd3d_memory_order_acquire);
|
||||
struct vkd3d_descriptor_update_ring_copy *copy;
|
||||
vkd3d_cpu_descriptor_va_t src, dst;
|
||||
uint32_t read_count;
|
||||
size_t i, j;
|
||||
|
||||
pthread_mutex_lock(&ring->submission_lock);
|
||||
|
||||
read_count = ring->read_count;
|
||||
|
||||
/* Wrap arithmetic, so != check is appropriate. */
|
||||
while (read_count != write_count)
|
||||
{
|
||||
copy = &ring->cbv_srv_uav_copies[read_count & VKD3D_DESCRIPTOR_UPDATE_RING_MASK];
|
||||
dst = (vkd3d_cpu_descriptor_va_t)vkd3d_atomic_ptr_load_explicit(©->dst, vkd3d_memory_order_acquire);
|
||||
|
||||
if (dst)
|
||||
{
|
||||
src = (vkd3d_cpu_descriptor_va_t) vkd3d_atomic_ptr_load_explicit(©->src, vkd3d_memory_order_relaxed);
|
||||
d3d12_desc_copy_single(dst, src, device);
|
||||
|
||||
/* Consume the descriptor. */
|
||||
vkd3d_atomic_ptr_store_explicit(©->dst, NULL, vkd3d_memory_order_release);
|
||||
vkd3d_atomic_ptr_store_explicit(©->src, NULL, vkd3d_memory_order_release);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Race condition, but it's okay. We'll stop here and pick it up later.
|
||||
* The write to dst will come in finite time. */
|
||||
break;
|
||||
}
|
||||
|
||||
read_count++;
|
||||
}
|
||||
|
||||
vkd3d_atomic_uint32_store_explicit(&ring->read_count, read_count, vkd3d_memory_order_release);
|
||||
|
||||
/* If we're done copying descriptors from a heap, we can now release it safely. */
|
||||
for (i = 0, j = 0; i < ring->deferred_release_count; i++)
|
||||
{
|
||||
/* With wrap arithmetic, test that read_count is positive. In the case where read counter hasn't caught up,
|
||||
* the wrapped value will be close-ish to UINT_MAX instead. */
|
||||
if ((read_count - ring->deferred_releases[i].write_count) <= VKD3D_DESCRIPTOR_UPDATE_RING_SIZE)
|
||||
d3d12_descriptor_heap_dec_ref(ring->deferred_releases[i].heap);
|
||||
else
|
||||
ring->deferred_releases[j++] = ring->deferred_releases[i];
|
||||
}
|
||||
|
||||
ring->deferred_release_count = j;
|
||||
|
||||
pthread_mutex_unlock(&ring->submission_lock);
|
||||
}
|
||||
|
||||
static void d3d12_query_heap_set_name(struct d3d12_query_heap *heap, const char *name)
|
||||
{
|
||||
if (heap->vk_query_pool)
|
||||
|
|
|
@ -1037,6 +1037,9 @@ struct vkd3d_descriptor_binding
|
|||
#define VKD3D_RESOURCE_DESC_INCREMENT_LOG2 5
|
||||
#define VKD3D_RESOURCE_DESC_INCREMENT (1u << VKD3D_RESOURCE_DESC_INCREMENT_LOG2)
|
||||
|
||||
/* FIXME: Gross hack for now. */
|
||||
#define VKD3D_RESOURCE_DESC_DEFER_COPY_MASK ((uintptr_t)INTPTR_MIN)
|
||||
|
||||
/* Arrange data so that it can pack as tightly as possible.
|
||||
* When we copy descriptors, we must copy both structures.
|
||||
* In copy_desc_range we scan through the entire metadata_binding, so
|
||||
|
@ -1167,6 +1170,7 @@ struct d3d12_descriptor_heap
|
|||
{
|
||||
ID3D12DescriptorHeap ID3D12DescriptorHeap_iface;
|
||||
LONG refcount;
|
||||
LONG internal_refcount;
|
||||
|
||||
uint64_t gpu_va;
|
||||
D3D12_DESCRIPTOR_HEAP_DESC desc;
|
||||
|
@ -1200,6 +1204,8 @@ struct d3d12_descriptor_heap
|
|||
|
||||
HRESULT d3d12_descriptor_heap_create(struct d3d12_device *device,
|
||||
const D3D12_DESCRIPTOR_HEAP_DESC *desc, struct d3d12_descriptor_heap **descriptor_heap);
|
||||
void d3d12_descriptor_heap_dec_ref(struct d3d12_descriptor_heap *heap);
|
||||
void d3d12_descriptor_heap_inc_ref(struct d3d12_descriptor_heap *heap);
|
||||
void d3d12_descriptor_heap_cleanup(struct d3d12_descriptor_heap *descriptor_heap);
|
||||
|
||||
static inline struct d3d12_descriptor_heap *impl_from_ID3D12DescriptorHeap(ID3D12DescriptorHeap *iface)
|
||||
|
@ -1235,6 +1241,8 @@ static inline struct d3d12_desc_split d3d12_desc_decode_va(vkd3d_cpu_descriptor_
|
|||
* Above that, we have the d3d12_descriptor_heap, which is allocated with enough alignment
|
||||
* to contain these twiddle bits. */
|
||||
|
||||
va &= ~VKD3D_RESOURCE_DESC_DEFER_COPY_MASK;
|
||||
|
||||
num_bits_descriptors = va & (VKD3D_RESOURCE_DESC_INCREMENT - 1);
|
||||
heap_offset = (va >> VKD3D_RESOURCE_DESC_INCREMENT_LOG2) & (((size_t)1 << num_bits_descriptors) - 1);
|
||||
split.offset = (uint32_t)heap_offset;
|
||||
|
@ -1256,6 +1264,54 @@ static inline uint32_t d3d12_desc_heap_offset_from_gpu_handle(D3D12_GPU_DESCRIPT
|
|||
return (uint32_t)handle.ptr / VKD3D_RESOURCE_DESC_INCREMENT;
|
||||
}
|
||||
|
||||
struct vkd3d_descriptor_update_ring_copy
|
||||
{
|
||||
void *src;
|
||||
void *dst;
|
||||
};
|
||||
|
||||
struct vkd3d_descriptor_update_deferred_release
|
||||
{
|
||||
struct d3d12_descriptor_heap *heap;
|
||||
uint32_t write_count;
|
||||
};
|
||||
|
||||
#define VKD3D_DESCRIPTOR_UPDATE_RING_SIZE (256 * 1024)
|
||||
#define VKD3D_DESCRIPTOR_UPDATE_RING_MASK (VKD3D_DESCRIPTOR_UPDATE_RING_SIZE - 1)
|
||||
|
||||
struct vkd3d_descriptor_update_ring
|
||||
{
|
||||
struct vkd3d_descriptor_update_ring_copy *cbv_srv_uav_copies;
|
||||
uint32_t write_count;
|
||||
uint32_t read_count;
|
||||
|
||||
vkd3d_cpu_descriptor_va_t staging_base;
|
||||
struct d3d12_descriptor_heap *staging;
|
||||
uint32_t staging_write_count;
|
||||
|
||||
struct vkd3d_descriptor_update_deferred_release *deferred_releases;
|
||||
size_t deferred_release_count;
|
||||
size_t deferred_release_size;
|
||||
|
||||
pthread_mutex_t submission_lock;
|
||||
};
|
||||
|
||||
/* When writing shader visible descriptors, defer the write and perform the work
|
||||
* last minute in submission queues. */
|
||||
HRESULT vkd3d_descriptor_update_ring_init(struct vkd3d_descriptor_update_ring *ring,
|
||||
struct d3d12_device *device);
|
||||
void vkd3d_descriptor_update_ring_cleanup(struct vkd3d_descriptor_update_ring *ring,
|
||||
struct d3d12_device *device);
|
||||
void vkd3d_descriptor_update_ring_flush(struct vkd3d_descriptor_update_ring *ring,
|
||||
struct d3d12_device *device);
|
||||
|
||||
vkd3d_cpu_descriptor_va_t vkd3d_descriptor_update_ring_allocate_scratch(struct vkd3d_descriptor_update_ring *ring);
|
||||
void vkd3d_descriptor_update_ring_push_copy(struct vkd3d_descriptor_update_ring *ring,
|
||||
struct d3d12_device *device,
|
||||
vkd3d_cpu_descriptor_va_t src, vkd3d_cpu_descriptor_va_t dst);
|
||||
void vkd3d_descriptor_update_ring_mark_heap_destruction(struct vkd3d_descriptor_update_ring *ring,
|
||||
struct d3d12_descriptor_heap *heap);
|
||||
|
||||
/* ID3D12QueryHeap */
|
||||
struct d3d12_query_heap
|
||||
{
|
||||
|
@ -3325,6 +3381,7 @@ struct d3d12_device
|
|||
struct vkd3d_sampler_state sampler_state;
|
||||
struct vkd3d_shader_debug_ring debug_ring;
|
||||
struct vkd3d_pipeline_library_disk_cache disk_cache;
|
||||
struct vkd3d_descriptor_update_ring descriptor_update_ring;
|
||||
#ifdef VKD3D_ENABLE_BREADCRUMBS
|
||||
struct vkd3d_breadcrumb_tracer breadcrumb_tracer;
|
||||
#endif
|
||||
|
|
|
@ -75,17 +75,35 @@ def main():
|
|||
parser.add_argument('--per-iteration', action = 'store_true', help = 'Represent ticks in terms of ticks / iteration. Cannot be used with --divider.')
|
||||
parser.add_argument('--name', nargs = '+', type = str, help = 'Only display data for certain counters.')
|
||||
parser.add_argument('--sort', type = str, default = 'none', help = 'Sorts input data according to "iterations" or "ticks".')
|
||||
parser.add_argument('--delta', type = str, help = 'Subtract iterations and timing from other profile blob.')
|
||||
parser.add_argument('profile', help = 'The profile binary blob.')
|
||||
|
||||
args = parser.parse_args()
|
||||
if not args.profile:
|
||||
raise AssertionError('Need profile folder.')
|
||||
|
||||
delta_map = {}
|
||||
if args.delta is not None:
|
||||
with open(args.delta, 'rb') as f:
|
||||
for block in iter(lambda: f.read(64), b''):
|
||||
if is_valid_block(block):
|
||||
b = parse_block(block)
|
||||
delta_map[b.name] = b
|
||||
|
||||
blocks = []
|
||||
with open(args.profile, 'rb') as f:
|
||||
for block in iter(lambda: f.read(64), b''):
|
||||
if is_valid_block(block):
|
||||
blocks.append(parse_block(block))
|
||||
b = parse_block(block)
|
||||
if b.name in delta_map:
|
||||
d = delta_map[b.name]
|
||||
b = ProfileCase(ticks = b.ticks - d.ticks,
|
||||
iterations = b.iterations - d.iterations,
|
||||
name = b.name)
|
||||
if b.iterations < 0 or b.ticks < 0:
|
||||
raise AssertionError('After subtracting, iterations or ticks became negative.')
|
||||
if b.iterations > 0:
|
||||
blocks.append(b)
|
||||
|
||||
if args.divider is not None:
|
||||
if args.per_iteration:
|
||||
|
@ -114,11 +132,11 @@ def main():
|
|||
print(' Iterations:', block.iterations)
|
||||
|
||||
if args.divider is not None:
|
||||
print(' Time spent per iteration of {}: {:.3f}'.format(args.divider, block.ticks / 1000.0), "us")
|
||||
print(' Time spent per iteration of {}: {:.3f}'.format(args.divider, block.ticks / 1000.0), "Kcycles")
|
||||
elif args.per_iteration:
|
||||
print(' Time spent per iteration: {:.3f}'.format(block.ticks / 1000.0), "us")
|
||||
print(' Time spent per iteration: {:.3f}'.format(block.ticks / 1000.0), "Kcycles")
|
||||
else:
|
||||
print(' Total time spent: {:.3f}'.format(block.ticks / 1000.0), "us")
|
||||
print(' Total time spent: {:.3f}'.format(block.ticks / 1000.0), "Kcycles")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
Loading…
Reference in New Issue