vkd3d: Add punchthrough path for descriptor copies.

Proves out the viability of this style of implementation. Ideally we'd
have a more officially sanctioned way of doing similar things later :)

Unfortunately, the overhead removal is too great to ignore on target
platform. Makes use of a private (reserved) extension for now ...

Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
This commit is contained in:
Hans-Kristian Arntzen 2022-02-24 10:55:16 +01:00
parent 277f485321
commit 9a63df07b8
6 changed files with 241 additions and 46 deletions

View File

@ -7476,7 +7476,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetDescriptorHeaps(d3d12_comman
if (bindless_state->set_info[j].heap_type != heap->desc.Type)
continue;
list->descriptor_heaps[j] = heap->vk_descriptor_sets[set_index++];
list->descriptor_heaps[j] = heap->sets[set_index++].vk_descriptor_set;
dirty_mask |= 1ull << j;
}

View File

@ -116,6 +116,7 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] =
VK_EXTENSION(NV_COMPUTE_SHADER_DERIVATIVES, NV_compute_shader_derivatives),
/* VALVE extensions */
VK_EXTENSION(VALVE_MUTABLE_DESCRIPTOR_TYPE, VALVE_mutable_descriptor_type),
VK_EXTENSION(VALVE_DESCRIPTOR_SET_HOST_MAPPING, VALVE_descriptor_set_host_mapping),
};
static unsigned int get_spec_version(const VkExtensionProperties *extensions,
@ -1367,6 +1368,13 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i
vk_prepend_struct(&info->features2, &info->scalar_block_layout_features);
}
if (vulkan_info->VALVE_descriptor_set_host_mapping)
{
info->descriptor_set_host_mapping_features.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_SET_HOST_MAPPING_FEATURES_VALVE;
vk_prepend_struct(&info->features2, &info->descriptor_set_host_mapping_features);
}
/* Core in Vulkan 1.1. */
info->shader_draw_parameters_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES;
vk_prepend_struct(&info->features2, &info->shader_draw_parameters_features);

View File

@ -3051,11 +3051,10 @@ void d3d12_desc_copy_single(vkd3d_cpu_descriptor_va_t dst_va, vkd3d_cpu_descript
{
VkCopyDescriptorSet vk_copies[VKD3D_MAX_BINDLESS_DESCRIPTOR_SETS];
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
const struct vkd3d_bindless_set_info *set_info;
struct vkd3d_descriptor_binding binding;
uint32_t set_mask, set_info_index;
struct d3d12_desc_split src, dst;
const VkDescriptorSet *src_sets;
const VkDescriptorSet *dst_sets;
VkCopyDescriptorSet *vk_copy;
uint32_t copy_count = 0;
uint32_t flags;
@ -3064,8 +3063,6 @@ void d3d12_desc_copy_single(vkd3d_cpu_descriptor_va_t dst_va, vkd3d_cpu_descript
dst = d3d12_desc_decode_va(dst_va);
flags = src.types->flags;
src_sets = src.heap->vk_descriptor_sets;
dst_sets = dst.heap->vk_descriptor_sets;
set_mask = src.types->set_info_mask;
if (flags & VKD3D_DESCRIPTOR_FLAG_SINGLE_DESCRIPTOR)
@ -3075,16 +3072,26 @@ void d3d12_desc_copy_single(vkd3d_cpu_descriptor_va_t dst_va, vkd3d_cpu_descript
* it seems. */
binding = src.types->single_binding;
vk_copy = &vk_copies[copy_count++];
vk_copy->sType = VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET;
vk_copy->pNext = NULL;
vk_copy->srcSet = src_sets[binding.set];
vk_copy->srcBinding = binding.binding;
vk_copy->srcArrayElement = src.offset;
vk_copy->dstSet = dst_sets[binding.set];
vk_copy->dstBinding = binding.binding;
vk_copy->dstArrayElement = dst.offset;
vk_copy->descriptorCount = 1;
if (src.heap->sets[binding.set].copy_template_single)
{
src.heap->sets[binding.set].copy_template_single(
dst.heap->sets[binding.set].mapped_set,
src.heap->sets[binding.set].mapped_set,
dst.offset, src.offset);
}
else
{
vk_copy = &vk_copies[copy_count++];
vk_copy->sType = VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET;
vk_copy->pNext = NULL;
vk_copy->srcSet = src.heap->sets[binding.set].vk_descriptor_set;
vk_copy->srcBinding = binding.binding;
vk_copy->srcArrayElement = src.offset;
vk_copy->dstSet = dst.heap->sets[binding.set].vk_descriptor_set;
vk_copy->dstBinding = binding.binding;
vk_copy->dstArrayElement = dst.offset;
vk_copy->descriptorCount = 1;
}
}
else
{
@ -3092,18 +3099,30 @@ void d3d12_desc_copy_single(vkd3d_cpu_descriptor_va_t dst_va, vkd3d_cpu_descript
while (set_mask)
{
set_info_index = vkd3d_bitmask_iter32(&set_mask);
binding = vkd3d_bindless_state_binding_from_info_index(&device->bindless_state, set_info_index);
set_info = &device->bindless_state.set_info[set_info_index];
vk_copy = &vk_copies[copy_count++];
vk_copy->sType = VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET;
vk_copy->pNext = NULL;
vk_copy->srcSet = src_sets[binding.set];
vk_copy->srcBinding = binding.binding;
vk_copy->srcArrayElement = src.offset;
vk_copy->dstSet = dst_sets[binding.set];
vk_copy->dstBinding = binding.binding;
vk_copy->dstArrayElement = dst.offset;
vk_copy->descriptorCount = 1;
if (set_info->host_copy_template_single)
{
set_info->host_copy_template_single(
dst.heap->sets[set_info->set_index].mapped_set,
src.heap->sets[set_info->set_index].mapped_set,
dst.offset, src.offset);
}
else
{
binding = vkd3d_bindless_state_binding_from_info_index(&device->bindless_state, set_info_index);
vk_copy = &vk_copies[copy_count++];
vk_copy->sType = VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET;
vk_copy->pNext = NULL;
vk_copy->srcSet = src.heap->sets[binding.set].vk_descriptor_set;
vk_copy->srcBinding = binding.binding;
vk_copy->srcArrayElement = src.offset;
vk_copy->dstSet = dst.heap->sets[binding.set].vk_descriptor_set;
vk_copy->dstBinding = binding.binding;
vk_copy->dstArrayElement = dst.offset;
vk_copy->descriptorCount = 1;
}
}
}
@ -3123,10 +3142,10 @@ void d3d12_desc_copy_single(vkd3d_cpu_descriptor_va_t dst_va, vkd3d_cpu_descript
vk_copy = &vk_copies[copy_count++];
vk_copy->sType = VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET;
vk_copy->pNext = NULL;
vk_copy->srcSet = src_sets[binding.set];
vk_copy->srcSet = src.heap->sets[binding.set].vk_descriptor_set;
vk_copy->srcBinding = binding.binding;
vk_copy->srcArrayElement = src.offset;
vk_copy->dstSet = dst_sets[binding.set];
vk_copy->dstSet = dst.heap->sets[binding.set].vk_descriptor_set;
vk_copy->dstBinding = binding.binding;
vk_copy->dstArrayElement = dst.offset;
vk_copy->descriptorCount = 1;
@ -3152,6 +3171,7 @@ void d3d12_desc_copy_range(vkd3d_cpu_descriptor_va_t dst_va, vkd3d_cpu_descripto
{
VkCopyDescriptorSet vk_copies[VKD3D_MAX_BINDLESS_DESCRIPTOR_SETS];
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
const struct vkd3d_bindless_set_info *set_info;
struct vkd3d_descriptor_binding binding;
struct d3d12_desc_split src, dst;
VkCopyDescriptorSet *vk_copy;
@ -3172,18 +3192,30 @@ void d3d12_desc_copy_range(vkd3d_cpu_descriptor_va_t dst_va, vkd3d_cpu_descripto
while (set_info_mask)
{
set_info_index = vkd3d_bitmask_iter32(&set_info_mask);
binding = vkd3d_bindless_state_binding_from_info_index(&device->bindless_state, set_info_index);
set_info = &device->bindless_state.set_info[set_info_index];
vk_copy = &vk_copies[copy_count++];
vk_copy->sType = VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET;
vk_copy->pNext = NULL;
vk_copy->srcSet = src.heap->vk_descriptor_sets[binding.set];
vk_copy->srcBinding = binding.binding;
vk_copy->srcArrayElement = src.offset;
vk_copy->dstSet = dst.heap->vk_descriptor_sets[binding.set];
vk_copy->dstBinding = binding.binding;
vk_copy->dstArrayElement = dst.offset;
vk_copy->descriptorCount = count;
if (set_info->host_copy_template)
{
set_info->host_copy_template(
dst.heap->sets[set_info->set_index].mapped_set,
src.heap->sets[set_info->set_index].mapped_set,
dst.offset, src.offset, count);
}
else
{
binding = vkd3d_bindless_state_binding_from_info_index(&device->bindless_state, set_info_index);
vk_copy = &vk_copies[copy_count++];
vk_copy->sType = VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET;
vk_copy->pNext = NULL;
vk_copy->srcSet = src.heap->sets[binding.set].vk_descriptor_set;
vk_copy->srcBinding = binding.binding;
vk_copy->srcArrayElement = src.offset;
vk_copy->dstSet = dst.heap->sets[binding.set].vk_descriptor_set;
vk_copy->dstBinding = binding.binding;
vk_copy->dstArrayElement = dst.offset;
vk_copy->descriptorCount = count;
}
}
if (heap_type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV)
@ -3201,10 +3233,10 @@ void d3d12_desc_copy_range(vkd3d_cpu_descriptor_va_t dst_va, vkd3d_cpu_descripto
vk_copy = &vk_copies[copy_count++];
vk_copy->sType = VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET;
vk_copy->pNext = NULL;
vk_copy->srcSet = src.heap->vk_descriptor_sets[binding.set];
vk_copy->srcSet = src.heap->sets[binding.set].vk_descriptor_set;
vk_copy->srcBinding = binding.binding;
vk_copy->srcArrayElement = src.offset;
vk_copy->dstSet = dst.heap->vk_descriptor_sets[binding.set];
vk_copy->dstSet = dst.heap->sets[binding.set].vk_descriptor_set;
vk_copy->dstBinding = binding.binding;
vk_copy->dstArrayElement = dst.offset;
vk_copy->descriptorCount = count;
@ -3692,7 +3724,7 @@ static inline void vkd3d_init_write_descriptor_set(VkWriteDescriptorSet *vk_writ
{
vk_write->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
vk_write->pNext = NULL;
vk_write->dstSet = split->heap->vk_descriptor_sets[binding.set];
vk_write->dstSet = split->heap->sets[binding.set].vk_descriptor_set;
vk_write->dstBinding = binding.binding;
vk_write->dstArrayElement = split->offset;
vk_write->descriptorCount = 1;
@ -5420,6 +5452,32 @@ static void d3d12_descriptor_heap_zero_initialize(struct d3d12_descriptor_heap *
vkd3d_free(buffer_infos);
}
static void d3d12_descriptor_heap_get_host_mapping(struct d3d12_descriptor_heap *descriptor_heap,
const struct vkd3d_bindless_set_info *binding, uint32_t set_index)
{
const struct vkd3d_vk_device_procs *vk_procs = &descriptor_heap->device->vk_procs;
uint8_t *mapped_set = NULL;
descriptor_heap->sets[set_index].mapped_set = NULL;
descriptor_heap->sets[set_index].copy_template = NULL;
descriptor_heap->sets[set_index].copy_template_single = NULL;
if (binding->host_copy_template && binding->host_copy_template_single)
{
VK_CALL(vkGetDescriptorSetHostMappingVALVE(descriptor_heap->device->vk_device,
descriptor_heap->sets[set_index].vk_descriptor_set, (void**)&mapped_set));
if (mapped_set)
{
mapped_set += binding->host_mapping_offset;
descriptor_heap->sets[set_index].mapped_set = mapped_set;
/* Keep a local copy close so we can fetch stuff from same cache line easily. */
descriptor_heap->sets[set_index].copy_template = binding->host_copy_template;
descriptor_heap->sets[set_index].copy_template_single = binding->host_copy_template_single;
}
}
}
static HRESULT d3d12_descriptor_heap_create_descriptor_set(struct d3d12_descriptor_heap *descriptor_heap,
const struct vkd3d_bindless_set_info *binding, VkDescriptorSet *vk_descriptor_set)
{
@ -5606,7 +5664,7 @@ static void d3d12_descriptor_heap_update_extra_bindings(struct d3d12_descriptor_
vk_write->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
vk_write->pNext = NULL;
vk_write->dstSet = descriptor_heap->vk_descriptor_sets[set_index];
vk_write->dstSet = descriptor_heap->sets[set_index].vk_descriptor_set;
vk_write->dstBinding = binding_index++;
vk_write->dstArrayElement = 0;
vk_write->descriptorCount = 1;
@ -5665,7 +5723,7 @@ static void d3d12_descriptor_heap_add_null_descriptor_template(
write->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
write->pNext = NULL;
write->descriptorCount = 1;
write->dstSet = descriptor_heap->vk_descriptor_sets[set_info->set_index];
write->dstSet = descriptor_heap->sets[set_info->set_index].vk_descriptor_set;
write->dstBinding = set_info->binding_index;
/* Replaced when instantiating template. */
@ -5724,9 +5782,11 @@ static HRESULT d3d12_descriptor_heap_init(struct d3d12_descriptor_heap *descript
if (set_info->heap_type == desc->Type)
{
if (FAILED(hr = d3d12_descriptor_heap_create_descriptor_set(descriptor_heap,
set_info, &descriptor_heap->vk_descriptor_sets[set_info->set_index])))
set_info, &descriptor_heap->sets[set_info->set_index].vk_descriptor_set)))
goto fail;
d3d12_descriptor_heap_get_host_mapping(descriptor_heap, set_info, set_info->set_index);
if (descriptor_heap->desc.Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV)
d3d12_descriptor_heap_add_null_descriptor_template(descriptor_heap, set_info, i);
}

View File

@ -4664,6 +4664,71 @@ static uint32_t vkd3d_bindless_build_mutable_type_list(VkDescriptorType *list, u
return count;
}
/* Make sure copy sizes are deducible to constants by compiler, especially the single descriptor case.
* We can get a linear stream of SIMD copies this way.
* Potentially we can also use alignment hints to get aligned moves here,
* but it doesn't seem to matter at all for perf, so don't bother adding the extra complexity. */
#define VKD3D_DECL_DESCRIPTOR_COPY_SIZE(bytes) \
static inline void vkd3d_descriptor_copy_desc_##bytes(void * restrict dst_, const void * restrict src_, \
size_t dst_index, size_t src_index, size_t count) \
{ \
uint8_t *dst = dst_; \
const uint8_t *src = src_; \
memcpy(dst + dst_index * (bytes), src + src_index * (bytes), count * (bytes)); \
} \
static inline void vkd3d_descriptor_copy_desc_##bytes##_single(void * restrict dst_, const void * restrict src_, \
size_t dst_index, size_t src_index) \
{ \
vkd3d_descriptor_copy_desc_##bytes(dst_, src_, dst_index, src_index, 1); \
}
VKD3D_DECL_DESCRIPTOR_COPY_SIZE(8)
VKD3D_DECL_DESCRIPTOR_COPY_SIZE(16)
VKD3D_DECL_DESCRIPTOR_COPY_SIZE(32)
VKD3D_DECL_DESCRIPTOR_COPY_SIZE(48)
VKD3D_DECL_DESCRIPTOR_COPY_SIZE(64)
static pfn_vkd3d_host_mapping_copy_template vkd3d_bindless_find_copy_template(uint32_t descriptor_size)
{
switch (descriptor_size)
{
case 8:
return vkd3d_descriptor_copy_desc_8;
case 16:
return vkd3d_descriptor_copy_desc_16;
case 32:
return vkd3d_descriptor_copy_desc_32;
case 48:
return vkd3d_descriptor_copy_desc_48;
case 64:
return vkd3d_descriptor_copy_desc_64;
default:
break;
}
return NULL;
}
static pfn_vkd3d_host_mapping_copy_template_single vkd3d_bindless_find_copy_template_single(uint32_t descriptor_size)
{
switch (descriptor_size)
{
case 8:
return vkd3d_descriptor_copy_desc_8_single;
case 16:
return vkd3d_descriptor_copy_desc_16_single;
case 32:
return vkd3d_descriptor_copy_desc_32_single;
case 48:
return vkd3d_descriptor_copy_desc_48_single;
case 64:
return vkd3d_descriptor_copy_desc_64_single;
default:
break;
}
return NULL;
}
static HRESULT vkd3d_bindless_state_add_binding(struct vkd3d_bindless_state *bindless_state,
struct d3d12_device *device, uint32_t flags, VkDescriptorType vk_descriptor_type)
{
@ -4674,6 +4739,8 @@ static HRESULT vkd3d_bindless_state_add_binding(struct vkd3d_bindless_state *bin
VkDescriptorType mutable_descriptor_types[VKD3D_MAX_MUTABLE_DESCRIPTOR_TYPES];
VkDescriptorSetLayoutBindingFlagsCreateInfoEXT vk_binding_flags_info;
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
VkDescriptorSetLayoutHostMappingInfoVALVE mapping_info;
VkDescriptorSetBindingReferenceVALVE binding_reference;
VkMutableDescriptorTypeCreateInfoVALVE mutable_info;
VkDescriptorSetLayoutCreateInfo vk_set_layout_info;
VkDescriptorSetLayoutBinding *vk_binding;
@ -4746,6 +4813,40 @@ static HRESULT vkd3d_bindless_state_add_binding(struct vkd3d_bindless_state *bin
&vk_set_layout_info, NULL, &set_info->vk_set_layout))) < 0)
ERR("Failed to create descriptor set layout, vr %d.\n", vr);
/* If we're able, we should implement descriptor copies with functions we roll ourselves. */
if (device->device_info.descriptor_set_host_mapping_features.descriptorSetHostMapping)
{
INFO("Device supports VK_VALVE_descriptor_set_host_mapping!\n");
binding_reference.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_BINDING_REFERENCE_VALVE;
binding_reference.pNext = NULL;
binding_reference.descriptorSetLayout = set_info->vk_set_layout;
binding_reference.binding = set_info->binding_index;
mapping_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_HOST_MAPPING_INFO_VALVE;
mapping_info.pNext = NULL;
VK_CALL(vkGetDescriptorSetLayoutHostMappingInfoVALVE(device->vk_device,
&binding_reference, &mapping_info));
set_info->host_mapping_offset = mapping_info.descriptorOffset;
set_info->host_mapping_descriptor_size = mapping_info.descriptorSize;
set_info->host_copy_template = vkd3d_bindless_find_copy_template(mapping_info.descriptorSize);
set_info->host_copy_template_single = vkd3d_bindless_find_copy_template_single(mapping_info.descriptorSize);
if (!set_info->host_copy_template || !set_info->host_copy_template_single)
{
FIXME("Couldn't find suitable host copy template.\n");
set_info->host_copy_template = NULL;
set_info->host_copy_template_single = NULL;
}
}
else
{
set_info->host_mapping_offset = 0;
set_info->host_mapping_descriptor_size = 0;
set_info->host_copy_template = NULL;
set_info->host_copy_template_single = NULL;
}
vk_binding->descriptorCount = d3d12_max_host_descriptor_count_from_heap_type(device, set_info->heap_type);
if (device->bindless_state.flags & VKD3D_BINDLESS_MUTABLE_TYPE)

View File

@ -164,6 +164,7 @@ struct vkd3d_vulkan_info
bool NV_compute_shader_derivatives;
/* VALVE extensions */
bool VALVE_mutable_descriptor_type;
bool VALVE_descriptor_set_host_mapping;
bool rasterization_stream;
bool transform_feedback_queries;
@ -1167,6 +1168,19 @@ struct d3d12_null_descriptor_template
bool has_mutable_descriptors;
};
typedef void (*pfn_vkd3d_host_mapping_copy_template)(void * restrict dst, const void * restrict src,
size_t dst_index, size_t src_index, size_t count);
typedef void (*pfn_vkd3d_host_mapping_copy_template_single)(void * restrict dst, const void * restrict src,
size_t dst_index, size_t src_index);
struct d3d12_descriptor_heap_set
{
VkDescriptorSet vk_descriptor_set;
void *mapped_set;
pfn_vkd3d_host_mapping_copy_template copy_template;
pfn_vkd3d_host_mapping_copy_template_single copy_template_single;
};
struct d3d12_descriptor_heap
{
ID3D12DescriptorHeap ID3D12DescriptorHeap_iface;
@ -1177,7 +1191,8 @@ struct d3d12_descriptor_heap
D3D12_CPU_DESCRIPTOR_HANDLE cpu_va;
VkDescriptorPool vk_descriptor_pool;
VkDescriptorSet vk_descriptor_sets[VKD3D_MAX_BINDLESS_DESCRIPTOR_SETS];
struct d3d12_descriptor_heap_set sets[VKD3D_MAX_BINDLESS_DESCRIPTOR_SETS];
struct vkd3d_device_memory_allocation device_allocation;
VkBuffer vk_buffer;
@ -2407,6 +2422,12 @@ struct vkd3d_bindless_set_info
uint32_t set_index;
uint32_t binding_index;
/* For VK_VALVE_descriptor_set_host_mapping */
size_t host_mapping_offset;
size_t host_mapping_descriptor_size;
pfn_vkd3d_host_mapping_copy_template host_copy_template;
pfn_vkd3d_host_mapping_copy_template_single host_copy_template_single;
VkDescriptorSetLayout vk_set_layout;
VkDescriptorSetLayout vk_host_set_layout;
};
@ -2833,6 +2854,7 @@ struct vkd3d_physical_device_info
VkPhysicalDeviceShaderImageAtomicInt64FeaturesEXT shader_image_atomic_int64_features;
VkPhysicalDeviceScalarBlockLayoutFeaturesEXT scalar_block_layout_features;
VkPhysicalDeviceImageViewMinLodFeaturesEXT image_view_min_lod_features;
VkPhysicalDeviceDescriptorSetHostMappingFeaturesVALVE descriptor_set_host_mapping_features;
VkPhysicalDeviceFeatures2 features2;

View File

@ -293,6 +293,10 @@ VK_DEVICE_EXT_PFN(vkCmdCuLaunchKernelNVX)
VK_DEVICE_EXT_PFN(vkGetImageViewHandleNVX)
VK_DEVICE_EXT_PFN(vkGetImageViewAddressNVX)
/* VK_VALVE_descriptor_set_host_mapping */
VK_DEVICE_EXT_PFN(vkGetDescriptorSetLayoutHostMappingInfoVALVE)
VK_DEVICE_EXT_PFN(vkGetDescriptorSetHostMappingVALVE)
#undef VK_INSTANCE_PFN
#undef VK_INSTANCE_EXT_PFN
#undef VK_DEVICE_PFN