diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index ff06c872..530abe38 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -4749,21 +4749,18 @@ static D3D12_RAYTRACING_TIER d3d12_device_determine_ray_tracing_tier(struct d3d1 static D3D12_RESOURCE_HEAP_TIER d3d12_device_determine_heap_tier(struct d3d12_device *device) { const VkPhysicalDeviceLimits *limits = &device->device_info.properties2.properties.limits; - const VkPhysicalDeviceMemoryProperties *mem_properties = &device->memory_properties; const struct vkd3d_memory_info *mem_info = &device->memory_info; - uint32_t i, host_visible_types = 0; + const struct vkd3d_memory_info_domain *non_cpu_domain; + const struct vkd3d_memory_info_domain *cpu_domain; - for (i = 0; i < mem_properties->memoryTypeCount; i++) - { - if (mem_properties->memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) - host_visible_types |= 1u << i; - } + non_cpu_domain = &mem_info->non_cpu_accessible_domain; + cpu_domain = &mem_info->cpu_accessible_domain; // Heap Tier 2 requires us to be able to create a heap that supports all resource // categories at the same time, except RT/DS textures on UPLOAD/READBACK heaps. if (limits->bufferImageGranularity > D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT || - !(mem_info->buffer_type_mask & mem_info->sampled_type_mask & mem_info->rt_ds_type_mask) || - !(mem_info->buffer_type_mask & mem_info->sampled_type_mask & host_visible_types)) + !(non_cpu_domain->buffer_type_mask & non_cpu_domain->sampled_type_mask & non_cpu_domain->rt_ds_type_mask) || + !(cpu_domain->buffer_type_mask & cpu_domain->sampled_type_mask)) return D3D12_RESOURCE_HEAP_TIER_1; return D3D12_RESOURCE_HEAP_TIER_2; diff --git a/libs/vkd3d/memory.c b/libs/vkd3d/memory.c index a01e0b25..aceb18fd 100644 --- a/libs/vkd3d/memory.c +++ b/libs/vkd3d/memory.c @@ -24,34 +24,25 @@ static void vkd3d_memory_allocator_wait_allocation(struct vkd3d_memory_allocator *allocator, struct d3d12_device *device, const struct vkd3d_memory_allocation *allocation); -static inline bool is_cpu_accessible_heap(const D3D12_HEAP_PROPERTIES *properties) -{ - if (properties->Type == D3D12_HEAP_TYPE_DEFAULT) - return false; - if (properties->Type == D3D12_HEAP_TYPE_CUSTOM) - { - return properties->CPUPageProperty == D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE - || properties->CPUPageProperty == D3D12_CPU_PAGE_PROPERTY_WRITE_BACK; - } - return true; -} - static uint32_t vkd3d_select_memory_types(struct d3d12_device *device, const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags) { const VkPhysicalDeviceMemoryProperties *memory_info = &device->memory_properties; uint32_t type_mask = (1 << memory_info->memoryTypeCount) - 1; + const struct vkd3d_memory_info_domain *domain_info; + + domain_info = d3d12_device_get_memory_info_domain(device, heap_properties); if (!(heap_flags & D3D12_HEAP_FLAG_DENY_BUFFERS)) - type_mask &= device->memory_info.buffer_type_mask; + type_mask &= domain_info->buffer_type_mask; if (!(heap_flags & D3D12_HEAP_FLAG_DENY_NON_RT_DS_TEXTURES)) - type_mask &= device->memory_info.sampled_type_mask; + type_mask &= domain_info->sampled_type_mask; /* Render targets are not allowed on UPLOAD and READBACK heaps */ if (!(heap_flags & D3D12_HEAP_FLAG_DENY_RT_DS_TEXTURES) && heap_properties->Type != D3D12_HEAP_TYPE_UPLOAD && heap_properties->Type != D3D12_HEAP_TYPE_READBACK) - type_mask &= device->memory_info.rt_ds_type_mask; + type_mask &= domain_info->rt_ds_type_mask; if (!type_mask) ERR("No memory type found for heap flags %#x.\n", heap_flags); diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index adb10785..d432ae22 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -36,18 +36,6 @@ LONG64 vkd3d_allocate_cookie() return InterlockedIncrement64(&global_cookie_counter); } -static inline bool is_cpu_accessible_heap(const D3D12_HEAP_PROPERTIES *properties) -{ - if (properties->Type == D3D12_HEAP_TYPE_DEFAULT) - return false; - if (properties->Type == D3D12_HEAP_TYPE_CUSTOM) - { - return properties->CPUPageProperty == D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE - || properties->CPUPageProperty == D3D12_CPU_PAGE_PROPERTY_WRITE_BACK; - } - return true; -} - static VkImageType vk_image_type_from_d3d12_resource_dimension(D3D12_RESOURCE_DIMENSION dimension) { switch (dimension) @@ -2523,7 +2511,8 @@ HRESULT d3d12_resource_create_committed(struct d3d12_device *device, const D3D12 if (!(use_dedicated_allocation = dedicated_requirements.prefersDedicatedAllocation)) { const uint32_t type_mask = memory_requirements.memoryRequirements.memoryTypeBits & device->memory_info.global_mask; - use_dedicated_allocation = (type_mask & device->memory_info.buffer_type_mask) != type_mask; + const struct vkd3d_memory_info_domain *domain = d3d12_device_get_memory_info_domain(device, heap_properties); + use_dedicated_allocation = (type_mask & domain->buffer_type_mask) != type_mask; } memset(&allocate_info, 0, sizeof(allocate_info)); @@ -5980,10 +5969,17 @@ HRESULT vkd3d_memory_info_init(struct vkd3d_memory_info *info, const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; VkMemoryRequirements memory_requirements; VkBufferCreateInfo buffer_info; + uint32_t sampled_type_mask_cpu; VkImageCreateInfo image_info; + uint32_t rt_ds_type_mask_cpu; + uint32_t sampled_type_mask; + uint32_t host_visible_mask; + uint32_t buffer_type_mask; + uint32_t rt_ds_type_mask; VkBuffer buffer; VkImage image; VkResult vr; + uint32_t i; info->global_mask = vkd3d_memory_info_find_global_mask(device); @@ -6017,7 +6013,7 @@ HRESULT vkd3d_memory_info_init(struct vkd3d_memory_info *info, VK_CALL(vkGetBufferMemoryRequirements(device->vk_device, buffer, &memory_requirements)); VK_CALL(vkDestroyBuffer(device->vk_device, buffer, NULL)); - info->buffer_type_mask = memory_requirements.memoryTypeBits; + buffer_type_mask = memory_requirements.memoryTypeBits; memset(&image_info, 0, sizeof(image_info)); image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; @@ -6045,7 +6041,30 @@ HRESULT vkd3d_memory_info_init(struct vkd3d_memory_info *info, VK_CALL(vkGetImageMemoryRequirements(device->vk_device, image, &memory_requirements)); VK_CALL(vkDestroyImage(device->vk_device, image, NULL)); - info->sampled_type_mask = memory_requirements.memoryTypeBits; + sampled_type_mask = memory_requirements.memoryTypeBits; + + /* CPU accessible images are always LINEAR. + * If we ever get a way to write to OPTIMAL-ly tiled images, we can drop this and just + * do sampled_type_mask_cpu & host_visible_set. */ + image_info.tiling = VK_IMAGE_TILING_LINEAR; + image_info.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED; + image_info.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT; + /* Deliberately omit STORAGE_BIT here, since it's not supported at all on NV with HOST_VISIBLE. + * Probably not 100% correct, but we can fix this if we get host visible OPTIMAL at some point. */ + sampled_type_mask_cpu = 0; + if (vkd3d_is_linear_tiling_supported(device, &image_info)) + { + if ((vr = VK_CALL(vkCreateImage(device->vk_device, &image_info, NULL, &image))) == VK_SUCCESS) + { + VK_CALL(vkGetImageMemoryRequirements(device->vk_device, image, &memory_requirements)); + VK_CALL(vkDestroyImage(device->vk_device, image, NULL)); + sampled_type_mask_cpu = memory_requirements.memoryTypeBits; + } + } + image_info.tiling = VK_IMAGE_TILING_OPTIMAL; + image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; image_info.format = VK_FORMAT_R8G8B8A8_UNORM; image_info.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | @@ -6062,7 +6081,22 @@ HRESULT vkd3d_memory_info_init(struct vkd3d_memory_info *info, VK_CALL(vkGetImageMemoryRequirements(device->vk_device, image, &memory_requirements)); VK_CALL(vkDestroyImage(device->vk_device, image, NULL)); - info->rt_ds_type_mask = memory_requirements.memoryTypeBits; + rt_ds_type_mask = memory_requirements.memoryTypeBits; + + image_info.tiling = VK_IMAGE_TILING_LINEAR; + image_info.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED; + rt_ds_type_mask_cpu = 0; + if (vkd3d_is_linear_tiling_supported(device, &image_info)) + { + if ((vr = VK_CALL(vkCreateImage(device->vk_device, &image_info, NULL, &image))) == VK_SUCCESS) + { + VK_CALL(vkGetImageMemoryRequirements(device->vk_device, image, &memory_requirements)); + VK_CALL(vkDestroyImage(device->vk_device, image, NULL)); + rt_ds_type_mask_cpu = memory_requirements.memoryTypeBits; + } + } + image_info.tiling = VK_IMAGE_TILING_OPTIMAL; + image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; image_info.format = VK_FORMAT_D32_SFLOAT_S8_UINT; image_info.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | @@ -6078,14 +6112,37 @@ HRESULT vkd3d_memory_info_init(struct vkd3d_memory_info *info, VK_CALL(vkGetImageMemoryRequirements(device->vk_device, image, &memory_requirements)); VK_CALL(vkDestroyImage(device->vk_device, image, NULL)); - info->rt_ds_type_mask &= memory_requirements.memoryTypeBits; + rt_ds_type_mask &= memory_requirements.memoryTypeBits; - info->buffer_type_mask &= info->global_mask; - info->sampled_type_mask &= info->global_mask; - info->rt_ds_type_mask &= info->global_mask; + /* Unsure if we can have host visible depth-stencil. + * On AMD, we can get linear RT, but not linear DS, so for now, just don't check for that. + * We will fail in resource creation instead. */ - TRACE("Device supports buffers on memory types 0x%#x.\n", info->buffer_type_mask); - TRACE("Device supports textures on memory types 0x%#x.\n", info->sampled_type_mask); - TRACE("Device supports render targets on memory types 0x%#x.\n", info->rt_ds_type_mask); + buffer_type_mask &= info->global_mask; + sampled_type_mask &= info->global_mask; + rt_ds_type_mask &= info->global_mask; + sampled_type_mask_cpu &= info->global_mask; + rt_ds_type_mask_cpu &= info->global_mask; + + info->non_cpu_accessible_domain.buffer_type_mask = buffer_type_mask; + info->non_cpu_accessible_domain.sampled_type_mask = sampled_type_mask; + info->non_cpu_accessible_domain.rt_ds_type_mask = rt_ds_type_mask; + + host_visible_mask = 0; + for (i = 0; i < device->memory_properties.memoryTypeCount; i++) + if (device->memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + host_visible_mask |= 1u << i; + + info->cpu_accessible_domain.buffer_type_mask = buffer_type_mask & host_visible_mask; + info->cpu_accessible_domain.sampled_type_mask = sampled_type_mask_cpu & host_visible_mask; + info->cpu_accessible_domain.rt_ds_type_mask = rt_ds_type_mask_cpu & host_visible_mask; + + TRACE("Device supports buffers on memory types 0x%#x.\n", buffer_type_mask); + TRACE("Device supports textures on memory types 0x%#x.\n", sampled_type_mask); + TRACE("Device supports render targets on memory types 0x%#x.\n", rt_ds_type_mask); + TRACE("Device supports CPU visible textures on memory types 0x%#x.\n", + info->cpu_accessible_domain.sampled_type_mask); + TRACE("Device supports CPU visible render targets on memory types 0x%#x.\n", + info->cpu_accessible_domain.rt_ds_type_mask); return S_OK; } diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index def3a66b..3400b119 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -2209,14 +2209,28 @@ struct vkd3d_format_compatibility_list VkFormat vk_formats[VKD3D_MAX_COMPATIBLE_FORMAT_COUNT]; }; -struct vkd3d_memory_info +struct vkd3d_memory_info_domain { - uint32_t global_mask; uint32_t buffer_type_mask; uint32_t sampled_type_mask; uint32_t rt_ds_type_mask; }; +struct vkd3d_memory_info +{ + uint32_t global_mask; + /* Includes normal system memory, but also resizable BAR memory. + * Only types which have HOST_VISIBLE_BIT can be in this domain. + * For images, we only include memory types which are LINEAR tiled. */ + struct vkd3d_memory_info_domain cpu_accessible_domain; + /* Also includes fallback memory types when DEVICE_LOCAL is exhausted. + * It can include HOST_VISIBLE_BIT as well, but when choosing this domain, + * that's not something we care about. + * Used when we want to allocate DEFAULT heaps or non-visible CUSTOM heaps. + * For images, we only include memory types which are OPTIMAL tiled. */ + struct vkd3d_memory_info_domain non_cpu_accessible_domain; +}; + HRESULT vkd3d_memory_info_init(struct vkd3d_memory_info *info, struct d3d12_device *device); @@ -2696,6 +2710,30 @@ void d3d12_device_return_query_pool(struct d3d12_device *device, const struct vk uint64_t d3d12_device_get_descriptor_heap_gpu_va(struct d3d12_device *device); void d3d12_device_return_descriptor_heap_gpu_va(struct d3d12_device *device, uint64_t va); +static inline bool is_cpu_accessible_heap(const D3D12_HEAP_PROPERTIES *properties) +{ + if (properties->Type == D3D12_HEAP_TYPE_DEFAULT) + return false; + if (properties->Type == D3D12_HEAP_TYPE_CUSTOM) + { + return properties->CPUPageProperty == D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE + || properties->CPUPageProperty == D3D12_CPU_PAGE_PROPERTY_WRITE_BACK; + } + return true; +} + +static inline const struct vkd3d_memory_info_domain *d3d12_device_get_memory_info_domain( + struct d3d12_device *device, + const D3D12_HEAP_PROPERTIES *heap_properties) +{ + /* Host visible and non-host visible memory types do not necessarily + * overlap. Need to select memory types appropriately. */ + if (is_cpu_accessible_heap(heap_properties)) + return &device->memory_info.cpu_accessible_domain; + else + return &device->memory_info.non_cpu_accessible_domain; +} + static inline HRESULT d3d12_device_query_interface(struct d3d12_device *device, REFIID iid, void **object) { return ID3D12Device6_QueryInterface(&device->ID3D12Device_iface, iid, object);