This commit is contained in:
Hans-Kristian Arntzen 2022-07-28 21:04:48 +02:00 committed by GitHub
commit 6b2c33fd8c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 72 additions and 127 deletions

View File

@ -177,7 +177,6 @@ static HRESULT vkd3d_try_allocate_device_memory(struct d3d12_device *device,
void *pNext, struct vkd3d_device_memory_allocation *allocation)
{
const VkPhysicalDeviceMemoryProperties *memory_props = &device->memory_properties;
const VkMemoryPropertyFlags optional_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
struct vkd3d_memory_info *memory_info = &device->memory_info;
VkMemoryAllocateInfo allocate_info;
@ -186,87 +185,87 @@ static HRESULT vkd3d_try_allocate_device_memory(struct d3d12_device *device,
bool budget_sensitive;
VkResult vr;
/* buffer_mask / sampled_mask etc will generally take care of this,
* but for certain fallback scenarios where we select other memory
* types, we need to mask here as well. */
type_mask &= device->memory_info.global_mask;
allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
allocate_info.pNext = pNext;
allocate_info.allocationSize = size;
allocate_info.memoryTypeIndex = UINT32_MAX;
while (type_mask)
{
uint32_t type_index = vkd3d_bitmask_iter32(&type_mask);
if ((memory_props->memoryTypes[type_index].propertyFlags & type_flags) != type_flags)
continue;
allocate_info.memoryTypeIndex = type_index;
budget_sensitive = !!(device->memory_info.budget_sensitive_mask & (1u << type_index));
if (budget_sensitive)
if ((memory_props->memoryTypes[type_index].propertyFlags & type_flags) == type_flags)
{
type_budget = &memory_info->type_budget[type_index];
type_current = &memory_info->type_current[type_index];
pthread_mutex_lock(&memory_info->budget_lock);
if (*type_current + size > *type_budget)
{
if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET)
{
INFO("Attempting to allocate from memory type %u, but exceeding fixed budget: %"PRIu64" + %"PRIu64" > %"PRIu64".\n",
type_index, *type_current, size, *type_budget);
}
pthread_mutex_unlock(&memory_info->budget_lock);
/* If we're out of DEVICE budget, don't try other types. */
if (type_flags & optional_flags)
return E_OUTOFMEMORY;
else
continue;
}
allocate_info.memoryTypeIndex = type_index;
break;
}
}
vr = VK_CALL(vkAllocateMemory(device->vk_device, &allocate_info, NULL, &allocation->vk_memory));
if (allocate_info.memoryTypeIndex == UINT32_MAX)
{
FIXME("Found no suitable memory type for requested type_flags #%x.\n", type_flags);
return E_OUTOFMEMORY;
}
if (budget_sensitive)
/* Once we have found a suitable memory type, only attempt to allocate that memory type.
* This avoids some problems by design:
* - If we want to allocate DEVICE_LOCAL memory, we don't try to fallback to PCI-e BAR memory by mistake.
* - If we want to allocate system memory, we don't try to fallback to PCI-e BAR memory by mistake.
* - There is no reasonable scenario where we can expect one memory type to fail, and another memory type
* with more memory property bits set to pass. This makes use of the rule where memory types which are a super-set
* of another must have a larger type index.
* - We will only attempt to allocate PCI-e BAR memory if DEVICE_LOCAL | HOST_VISIBLE is set, otherwise we
* will find a candidate memory type which is either DEVICE_LOCAL or HOST_VISIBLE before we find a PCI-e BAR type.
* - For iGPU where everything is DEVICE_LOCAL | HOST_VISIBLE, we will just find that memory type first anyways,
* but there we don't have anything to worry about w.r.t. PCI-e BAR.
*/
/* Budgets only really apply to PCI-e BAR or other "special" types which always have a fallback. */
budget_sensitive = !!(device->memory_info.budget_sensitive_mask & (1u << allocate_info.memoryTypeIndex));
if (budget_sensitive)
{
type_budget = &memory_info->type_budget[allocate_info.memoryTypeIndex];
type_current = &memory_info->type_current[allocate_info.memoryTypeIndex];
pthread_mutex_lock(&memory_info->budget_lock);
if (*type_current + size > *type_budget)
{
if (vr == VK_SUCCESS)
if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET)
{
*type_current += size;
if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET)
{
INFO("Allocated memory of type %u, new total allocated size %"PRIu64" MiB.\n",
type_index, *type_current / (1024 * 1024));
}
INFO("Attempting to allocate from memory type %u, but exceeding fixed budget: %"PRIu64" + %"PRIu64" > %"PRIu64".\n",
allocate_info.memoryTypeIndex, *type_current, size, *type_budget);
}
pthread_mutex_unlock(&memory_info->budget_lock);
}
else if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET)
{
INFO("%s memory of type #%u, size %"PRIu64" KiB.\n",
(vr == VK_SUCCESS ? "Allocated" : "Failed to allocate"),
type_index, allocate_info.allocationSize / 1024);
}
if (vr == VK_SUCCESS)
{
allocation->vk_memory_type = type_index;
allocation->size = size;
return S_OK;
}
else if (type_flags & optional_flags)
{
/* If we fail to allocate DEVICE_LOCAL memory, immediately fail the call.
* This way we avoid any attempt to fall back to PCI-e BAR memory types
* which are also DEVICE_LOCAL.
* After failure, the calling code removes the DEVICE_LOCAL_BIT flag and tries again,
* where we will fall back to system memory instead. */
return E_OUTOFMEMORY;
}
}
return E_OUTOFMEMORY;
vr = VK_CALL(vkAllocateMemory(device->vk_device, &allocate_info, NULL, &allocation->vk_memory));
if (budget_sensitive)
{
if (vr == VK_SUCCESS)
{
*type_current += size;
if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET)
{
INFO("Allocated memory of type %u, new total allocated size %"PRIu64" MiB.\n",
allocate_info.memoryTypeIndex, *type_current / (1024 * 1024));
}
}
pthread_mutex_unlock(&memory_info->budget_lock);
}
else if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET)
{
INFO("%s memory of type #%u, size %"PRIu64" KiB.\n",
(vr == VK_SUCCESS ? "Allocated" : "Failed to allocate"),
allocate_info.memoryTypeIndex, allocate_info.allocationSize / 1024);
}
if (vr != VK_SUCCESS)
return E_OUTOFMEMORY;
allocation->vk_memory_type = allocate_info.memoryTypeIndex;
allocation->size = size;
return S_OK;
}
static bool vkd3d_memory_info_type_mask_covers_multiple_memory_heaps(
@ -529,9 +528,7 @@ static HRESULT vkd3d_memory_allocation_init(struct vkd3d_memory_allocation *allo
* we must not look at heap_flags, since we might end up noping out
* the memory types we want to allocate with. */
type_mask = memory_requirements.memoryTypeBits;
if (info->flags & VKD3D_ALLOCATION_FLAG_DEDICATED)
type_mask &= device->memory_info.global_mask;
else
if (!(info->flags & VKD3D_ALLOCATION_FLAG_DEDICATED))
type_mask &= vkd3d_select_memory_types(device, &info->heap_properties, info->heap_flags);
/* Allocate actual backing storage */
@ -601,8 +598,7 @@ static HRESULT vkd3d_memory_allocation_init(struct vkd3d_memory_allocation *allo
0, VK_WHOLE_SIZE, 0, &allocation->cpu_address))))
{
ERR("Failed to map memory, vr %d.\n", vr);
vkd3d_memory_allocation_free(allocation, device, allocator);
return hresult_from_vk_result(vr);
goto out_free_allocation;
}
}
@ -618,18 +614,14 @@ static HRESULT vkd3d_memory_allocation_init(struct vkd3d_memory_allocation *allo
if ((vr = VK_CALL(vkBindBufferMemory2KHR(device->vk_device, 1, &bind_info))) < 0)
{
ERR("Failed to bind buffer memory, vr %d.\n", vr);
vkd3d_memory_allocation_free(allocation, device, allocator);
return hresult_from_vk_result(vr);
goto out_free_allocation;
}
/* Assign GPU address as necessary. */
if (allocation->flags & VKD3D_ALLOCATION_FLAG_GPU_ADDRESS)
{
if (FAILED(hr = vkd3d_allocation_assign_gpu_address(allocation, device, allocator)))
{
vkd3d_memory_allocation_free(allocation, device, allocator);
return hresult_from_vk_result(vr);
}
goto out_free_allocation;
}
}
@ -640,6 +632,11 @@ static HRESULT vkd3d_memory_allocation_init(struct vkd3d_memory_allocation *allo
TRACE("Created allocation %p on memory type %u (%"PRIu64" bytes).\n",
allocation, allocation->device_allocation.vk_memory_type, allocation->resource.size);
return S_OK;
out_free_allocation:
vkd3d_memory_allocation_free(allocation, device, allocator);
memset(allocation, 0, sizeof(*allocation));
return hresult_from_vk_result(vr);
}
static void vkd3d_memory_chunk_insert_range(struct vkd3d_memory_chunk *chunk,
@ -1316,7 +1313,6 @@ static HRESULT vkd3d_memory_allocator_try_suballocate_memory(struct vkd3d_memory
HRESULT hr;
size_t i;
type_mask &= device->memory_info.global_mask;
type_mask &= memory_requirements->memoryTypeBits;
for (i = 0; i < allocator->chunks_count; i++)

View File

@ -2756,7 +2756,7 @@ HRESULT d3d12_resource_create_committed(struct d3d12_device *device, const D3D12
if (!(use_dedicated_allocation = dedicated_requirements.prefersDedicatedAllocation))
{
const uint32_t type_mask = memory_requirements.memoryRequirements.memoryTypeBits & device->memory_info.global_mask;
const uint32_t type_mask = memory_requirements.memoryRequirements.memoryTypeBits;
const struct vkd3d_memory_info_domain *domain = d3d12_device_get_memory_info_domain(device, heap_properties);
use_dedicated_allocation = (type_mask & domain->buffer_type_mask) != type_mask;
}
@ -6403,49 +6403,6 @@ static void vkd3d_memory_info_get_topology(struct vkd3d_memory_topology *topolog
}
}
static uint32_t vkd3d_memory_info_find_global_mask(const struct vkd3d_memory_topology *topology, struct d3d12_device *device)
{
/* Never allow memory types from any PCI-pinned heap.
* If we allow it, it might end up being used as a fallback memory type, which will cause severe instabilities.
* These types should only be used in a controlled fashion. */
VkMemoryPropertyFlags flags;
uint32_t heap_index;
uint32_t i, mask;
if (!(vkd3d_config_flags & VKD3D_CONFIG_FLAG_NO_UPLOAD_HVV))
return UINT32_MAX;
/* If we only have one device local heap, or no host-only heaps, there is nothing to do. */
if (topology->device_local_heap_count <= 1 || topology->host_only_heap_count == 0)
return UINT32_MAX;
/* Verify that there exists a DEVICE_LOCAL type that is not HOST_VISIBLE on this device
* which maps to the largest device local heap. That way, it is safe to mask out all memory types which are
* DEVICE_LOCAL | HOST_VISIBLE.
* Similarly, there must exist a host-only type. */
if (!topology->exists_device_only_type || !topology->exists_host_only_type)
return UINT32_MAX;
/* Mask out any memory types which are deemed problematic. */
for (i = 0, mask = 0; i < device->memory_properties.memoryTypeCount; i++)
{
const VkMemoryPropertyFlags pinned_mask = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
flags = device->memory_properties.memoryTypes[i].propertyFlags;
heap_index = device->memory_properties.memoryTypes[i].heapIndex;
if (heap_index != topology->largest_device_local_heap_index &&
heap_index != topology->largest_host_only_heap_index &&
(flags & pinned_mask) == pinned_mask)
{
mask |= 1u << i;
WARN("Blocking memory type %u for use (PCI-pinned memory).\n", i);
}
}
return ~mask;
}
static void vkd3d_memory_info_init_budgets(struct vkd3d_memory_info *info,
const struct vkd3d_memory_topology *topology,
struct d3d12_device *device)
@ -6525,7 +6482,6 @@ HRESULT vkd3d_memory_info_init(struct vkd3d_memory_info *info,
uint32_t i;
vkd3d_memory_info_get_topology(&topology, device);
info->global_mask = vkd3d_memory_info_find_global_mask(&topology, device);
vkd3d_memory_info_init_budgets(info, &topology, device);
if (pthread_mutex_init(&info->budget_lock, NULL) != 0)
@ -6642,12 +6598,6 @@ HRESULT vkd3d_memory_info_init(struct vkd3d_memory_info *info,
* On AMD, we can get linear RT, but not linear DS, so for now, just don't check for that.
* We will fail in resource creation instead. */
buffer_type_mask &= info->global_mask;
sampled_type_mask &= info->global_mask;
rt_ds_type_mask &= info->global_mask;
sampled_type_mask_cpu &= info->global_mask;
rt_ds_type_mask_cpu &= info->global_mask;
info->non_cpu_accessible_domain.buffer_type_mask = buffer_type_mask;
info->non_cpu_accessible_domain.sampled_type_mask = sampled_type_mask;
info->non_cpu_accessible_domain.rt_ds_type_mask = rt_ds_type_mask;

View File

@ -2877,7 +2877,6 @@ struct vkd3d_memory_info_domain
struct vkd3d_memory_info
{
uint32_t global_mask;
/* Includes normal system memory, but also resizable BAR memory.
* Only types which have HOST_VISIBLE_BIT can be in this domain.
* For images, we only include memory types which are LINEAR tiled. */