diff --git a/libs/vkd3d/memory.c b/libs/vkd3d/memory.c index 70754ed9..c997e26f 100644 --- a/libs/vkd3d/memory.c +++ b/libs/vkd3d/memory.c @@ -177,7 +177,6 @@ static HRESULT vkd3d_try_allocate_device_memory(struct d3d12_device *device, void *pNext, struct vkd3d_device_memory_allocation *allocation) { const VkPhysicalDeviceMemoryProperties *memory_props = &device->memory_properties; - const VkMemoryPropertyFlags optional_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; struct vkd3d_memory_info *memory_info = &device->memory_info; VkMemoryAllocateInfo allocate_info; @@ -186,87 +185,87 @@ static HRESULT vkd3d_try_allocate_device_memory(struct d3d12_device *device, bool budget_sensitive; VkResult vr; - /* buffer_mask / sampled_mask etc will generally take care of this, - * but for certain fallback scenarios where we select other memory - * types, we need to mask here as well. */ - type_mask &= device->memory_info.global_mask; - allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; allocate_info.pNext = pNext; allocate_info.allocationSize = size; + allocate_info.memoryTypeIndex = UINT32_MAX; while (type_mask) { uint32_t type_index = vkd3d_bitmask_iter32(&type_mask); - - if ((memory_props->memoryTypes[type_index].propertyFlags & type_flags) != type_flags) - continue; - - allocate_info.memoryTypeIndex = type_index; - - budget_sensitive = !!(device->memory_info.budget_sensitive_mask & (1u << type_index)); - if (budget_sensitive) + if ((memory_props->memoryTypes[type_index].propertyFlags & type_flags) == type_flags) { - type_budget = &memory_info->type_budget[type_index]; - type_current = &memory_info->type_current[type_index]; - pthread_mutex_lock(&memory_info->budget_lock); - if (*type_current + size > *type_budget) - { - if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET) - { - INFO("Attempting to allocate from memory type %u, but exceeding fixed budget: %"PRIu64" + %"PRIu64" > %"PRIu64".\n", - type_index, *type_current, size, *type_budget); - } - pthread_mutex_unlock(&memory_info->budget_lock); - - /* If we're out of DEVICE budget, don't try other types. */ - if (type_flags & optional_flags) - return E_OUTOFMEMORY; - else - continue; - } + allocate_info.memoryTypeIndex = type_index; + break; } + } - vr = VK_CALL(vkAllocateMemory(device->vk_device, &allocate_info, NULL, &allocation->vk_memory)); + if (allocate_info.memoryTypeIndex == UINT32_MAX) + { + FIXME("Found no suitable memory type for requested type_flags #%x.\n", type_flags); + return E_OUTOFMEMORY; + } - if (budget_sensitive) + /* Once we have found a suitable memory type, only attempt to allocate that memory type. + * This avoids some problems by design: + * - If we want to allocate DEVICE_LOCAL memory, we don't try to fallback to PCI-e BAR memory by mistake. + * - If we want to allocate system memory, we don't try to fallback to PCI-e BAR memory by mistake. + * - There is no reasonable scenario where we can expect one memory type to fail, and another memory type + * with more memory property bits set to pass. This makes use of the rule where memory types which are a super-set + * of another must have a larger type index. + * - We will only attempt to allocate PCI-e BAR memory if DEVICE_LOCAL | HOST_VISIBLE is set, otherwise we + * will find a candidate memory type which is either DEVICE_LOCAL or HOST_VISIBLE before we find a PCI-e BAR type. + * - For iGPU where everything is DEVICE_LOCAL | HOST_VISIBLE, we will just find that memory type first anyways, + * but there we don't have anything to worry about w.r.t. PCI-e BAR. + */ + + /* Budgets only really apply to PCI-e BAR or other "special" types which always have a fallback. */ + budget_sensitive = !!(device->memory_info.budget_sensitive_mask & (1u << allocate_info.memoryTypeIndex)); + if (budget_sensitive) + { + type_budget = &memory_info->type_budget[allocate_info.memoryTypeIndex]; + type_current = &memory_info->type_current[allocate_info.memoryTypeIndex]; + pthread_mutex_lock(&memory_info->budget_lock); + if (*type_current + size > *type_budget) { - if (vr == VK_SUCCESS) + if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET) { - *type_current += size; - if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET) - { - INFO("Allocated memory of type %u, new total allocated size %"PRIu64" MiB.\n", - type_index, *type_current / (1024 * 1024)); - } + INFO("Attempting to allocate from memory type %u, but exceeding fixed budget: %"PRIu64" + %"PRIu64" > %"PRIu64".\n", + allocate_info.memoryTypeIndex, *type_current, size, *type_budget); } pthread_mutex_unlock(&memory_info->budget_lock); - } - else if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET) - { - INFO("%s memory of type #%u, size %"PRIu64" KiB.\n", - (vr == VK_SUCCESS ? "Allocated" : "Failed to allocate"), - type_index, allocate_info.allocationSize / 1024); - } - - if (vr == VK_SUCCESS) - { - allocation->vk_memory_type = type_index; - allocation->size = size; - return S_OK; - } - else if (type_flags & optional_flags) - { - /* If we fail to allocate DEVICE_LOCAL memory, immediately fail the call. - * This way we avoid any attempt to fall back to PCI-e BAR memory types - * which are also DEVICE_LOCAL. - * After failure, the calling code removes the DEVICE_LOCAL_BIT flag and tries again, - * where we will fall back to system memory instead. */ return E_OUTOFMEMORY; } } - return E_OUTOFMEMORY; + vr = VK_CALL(vkAllocateMemory(device->vk_device, &allocate_info, NULL, &allocation->vk_memory)); + + if (budget_sensitive) + { + if (vr == VK_SUCCESS) + { + *type_current += size; + if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET) + { + INFO("Allocated memory of type %u, new total allocated size %"PRIu64" MiB.\n", + allocate_info.memoryTypeIndex, *type_current / (1024 * 1024)); + } + } + pthread_mutex_unlock(&memory_info->budget_lock); + } + else if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET) + { + INFO("%s memory of type #%u, size %"PRIu64" KiB.\n", + (vr == VK_SUCCESS ? "Allocated" : "Failed to allocate"), + allocate_info.memoryTypeIndex, allocate_info.allocationSize / 1024); + } + + if (vr != VK_SUCCESS) + return E_OUTOFMEMORY; + + allocation->vk_memory_type = allocate_info.memoryTypeIndex; + allocation->size = size; + return S_OK; } static bool vkd3d_memory_info_type_mask_covers_multiple_memory_heaps( @@ -529,9 +528,7 @@ static HRESULT vkd3d_memory_allocation_init(struct vkd3d_memory_allocation *allo * we must not look at heap_flags, since we might end up noping out * the memory types we want to allocate with. */ type_mask = memory_requirements.memoryTypeBits; - if (info->flags & VKD3D_ALLOCATION_FLAG_DEDICATED) - type_mask &= device->memory_info.global_mask; - else + if (!(info->flags & VKD3D_ALLOCATION_FLAG_DEDICATED)) type_mask &= vkd3d_select_memory_types(device, &info->heap_properties, info->heap_flags); /* Allocate actual backing storage */ @@ -601,8 +598,7 @@ static HRESULT vkd3d_memory_allocation_init(struct vkd3d_memory_allocation *allo 0, VK_WHOLE_SIZE, 0, &allocation->cpu_address)))) { ERR("Failed to map memory, vr %d.\n", vr); - vkd3d_memory_allocation_free(allocation, device, allocator); - return hresult_from_vk_result(vr); + goto out_free_allocation; } } @@ -618,18 +614,14 @@ static HRESULT vkd3d_memory_allocation_init(struct vkd3d_memory_allocation *allo if ((vr = VK_CALL(vkBindBufferMemory2KHR(device->vk_device, 1, &bind_info))) < 0) { ERR("Failed to bind buffer memory, vr %d.\n", vr); - vkd3d_memory_allocation_free(allocation, device, allocator); - return hresult_from_vk_result(vr); + goto out_free_allocation; } /* Assign GPU address as necessary. */ if (allocation->flags & VKD3D_ALLOCATION_FLAG_GPU_ADDRESS) { if (FAILED(hr = vkd3d_allocation_assign_gpu_address(allocation, device, allocator))) - { - vkd3d_memory_allocation_free(allocation, device, allocator); - return hresult_from_vk_result(vr); - } + goto out_free_allocation; } } @@ -640,6 +632,11 @@ static HRESULT vkd3d_memory_allocation_init(struct vkd3d_memory_allocation *allo TRACE("Created allocation %p on memory type %u (%"PRIu64" bytes).\n", allocation, allocation->device_allocation.vk_memory_type, allocation->resource.size); return S_OK; + +out_free_allocation: + vkd3d_memory_allocation_free(allocation, device, allocator); + memset(allocation, 0, sizeof(*allocation)); + return hresult_from_vk_result(vr); } static void vkd3d_memory_chunk_insert_range(struct vkd3d_memory_chunk *chunk, @@ -1316,7 +1313,6 @@ static HRESULT vkd3d_memory_allocator_try_suballocate_memory(struct vkd3d_memory HRESULT hr; size_t i; - type_mask &= device->memory_info.global_mask; type_mask &= memory_requirements->memoryTypeBits; for (i = 0; i < allocator->chunks_count; i++) diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index 06e3c292..b333b91b 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -2756,7 +2756,7 @@ HRESULT d3d12_resource_create_committed(struct d3d12_device *device, const D3D12 if (!(use_dedicated_allocation = dedicated_requirements.prefersDedicatedAllocation)) { - const uint32_t type_mask = memory_requirements.memoryRequirements.memoryTypeBits & device->memory_info.global_mask; + const uint32_t type_mask = memory_requirements.memoryRequirements.memoryTypeBits; const struct vkd3d_memory_info_domain *domain = d3d12_device_get_memory_info_domain(device, heap_properties); use_dedicated_allocation = (type_mask & domain->buffer_type_mask) != type_mask; } @@ -6403,49 +6403,6 @@ static void vkd3d_memory_info_get_topology(struct vkd3d_memory_topology *topolog } } -static uint32_t vkd3d_memory_info_find_global_mask(const struct vkd3d_memory_topology *topology, struct d3d12_device *device) -{ - /* Never allow memory types from any PCI-pinned heap. - * If we allow it, it might end up being used as a fallback memory type, which will cause severe instabilities. - * These types should only be used in a controlled fashion. */ - VkMemoryPropertyFlags flags; - uint32_t heap_index; - uint32_t i, mask; - - if (!(vkd3d_config_flags & VKD3D_CONFIG_FLAG_NO_UPLOAD_HVV)) - return UINT32_MAX; - - /* If we only have one device local heap, or no host-only heaps, there is nothing to do. */ - if (topology->device_local_heap_count <= 1 || topology->host_only_heap_count == 0) - return UINT32_MAX; - - /* Verify that there exists a DEVICE_LOCAL type that is not HOST_VISIBLE on this device - * which maps to the largest device local heap. That way, it is safe to mask out all memory types which are - * DEVICE_LOCAL | HOST_VISIBLE. - * Similarly, there must exist a host-only type. */ - if (!topology->exists_device_only_type || !topology->exists_host_only_type) - return UINT32_MAX; - - /* Mask out any memory types which are deemed problematic. */ - for (i = 0, mask = 0; i < device->memory_properties.memoryTypeCount; i++) - { - const VkMemoryPropertyFlags pinned_mask = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; - flags = device->memory_properties.memoryTypes[i].propertyFlags; - heap_index = device->memory_properties.memoryTypes[i].heapIndex; - - if (heap_index != topology->largest_device_local_heap_index && - heap_index != topology->largest_host_only_heap_index && - (flags & pinned_mask) == pinned_mask) - { - mask |= 1u << i; - WARN("Blocking memory type %u for use (PCI-pinned memory).\n", i); - } - } - - return ~mask; -} - static void vkd3d_memory_info_init_budgets(struct vkd3d_memory_info *info, const struct vkd3d_memory_topology *topology, struct d3d12_device *device) @@ -6525,7 +6482,6 @@ HRESULT vkd3d_memory_info_init(struct vkd3d_memory_info *info, uint32_t i; vkd3d_memory_info_get_topology(&topology, device); - info->global_mask = vkd3d_memory_info_find_global_mask(&topology, device); vkd3d_memory_info_init_budgets(info, &topology, device); if (pthread_mutex_init(&info->budget_lock, NULL) != 0) @@ -6642,12 +6598,6 @@ HRESULT vkd3d_memory_info_init(struct vkd3d_memory_info *info, * On AMD, we can get linear RT, but not linear DS, so for now, just don't check for that. * We will fail in resource creation instead. */ - buffer_type_mask &= info->global_mask; - sampled_type_mask &= info->global_mask; - rt_ds_type_mask &= info->global_mask; - sampled_type_mask_cpu &= info->global_mask; - rt_ds_type_mask_cpu &= info->global_mask; - info->non_cpu_accessible_domain.buffer_type_mask = buffer_type_mask; info->non_cpu_accessible_domain.sampled_type_mask = sampled_type_mask; info->non_cpu_accessible_domain.rt_ds_type_mask = rt_ds_type_mask; diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index ac4311a6..62d0818e 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -2877,7 +2877,6 @@ struct vkd3d_memory_info_domain struct vkd3d_memory_info { - uint32_t global_mask; /* Includes normal system memory, but also resizable BAR memory. * Only types which have HOST_VISIBLE_BIT can be in this domain. * For images, we only include memory types which are LINEAR tiled. */