vkd3d: Refactor away the global_mask.

The global mask was added with the intention to block PCI-e BAR memory
types from being used, however, this ends up being somewhat clumsy when
we want to allow PCI-e BAR for specific allocations.

Refactor the memory allocation scheme to never end up in a situation
where we accidentally allocate PCI-e BAR memory. We only attempt one
memory type. This makes is so that pure DEVICE cannot fallback to BAR,
and pure HOST allocations (if they somehow fail) cannot fallback to BAR.

The first eligible memory type is always selected.

Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
This commit is contained in:
Hans-Kristian Arntzen 2022-05-04 12:44:56 +02:00
parent c539bd3f03
commit 3dfcc10312
3 changed files with 64 additions and 119 deletions

View File

@ -177,7 +177,6 @@ static HRESULT vkd3d_try_allocate_device_memory(struct d3d12_device *device,
void *pNext, struct vkd3d_device_memory_allocation *allocation)
{
const VkPhysicalDeviceMemoryProperties *memory_props = &device->memory_properties;
const VkMemoryPropertyFlags optional_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
struct vkd3d_memory_info *memory_info = &device->memory_info;
VkMemoryAllocateInfo allocate_info;
@ -186,87 +185,87 @@ static HRESULT vkd3d_try_allocate_device_memory(struct d3d12_device *device,
bool budget_sensitive;
VkResult vr;
/* buffer_mask / sampled_mask etc will generally take care of this,
* but for certain fallback scenarios where we select other memory
* types, we need to mask here as well. */
type_mask &= device->memory_info.global_mask;
allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
allocate_info.pNext = pNext;
allocate_info.allocationSize = size;
allocate_info.memoryTypeIndex = UINT32_MAX;
while (type_mask)
{
uint32_t type_index = vkd3d_bitmask_iter32(&type_mask);
if ((memory_props->memoryTypes[type_index].propertyFlags & type_flags) != type_flags)
continue;
allocate_info.memoryTypeIndex = type_index;
budget_sensitive = !!(device->memory_info.budget_sensitive_mask & (1u << type_index));
if (budget_sensitive)
if ((memory_props->memoryTypes[type_index].propertyFlags & type_flags) == type_flags)
{
type_budget = &memory_info->type_budget[type_index];
type_current = &memory_info->type_current[type_index];
pthread_mutex_lock(&memory_info->budget_lock);
if (*type_current + size > *type_budget)
{
if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET)
{
INFO("Attempting to allocate from memory type %u, but exceeding fixed budget: %"PRIu64" + %"PRIu64" > %"PRIu64".\n",
type_index, *type_current, size, *type_budget);
}
pthread_mutex_unlock(&memory_info->budget_lock);
/* If we're out of DEVICE budget, don't try other types. */
if (type_flags & optional_flags)
return E_OUTOFMEMORY;
else
continue;
}
allocate_info.memoryTypeIndex = type_index;
break;
}
}
vr = VK_CALL(vkAllocateMemory(device->vk_device, &allocate_info, NULL, &allocation->vk_memory));
if (allocate_info.memoryTypeIndex == UINT32_MAX)
{
FIXME("Found no suitable memory type for requested type_flags #%x.\n", type_flags);
return E_OUTOFMEMORY;
}
if (budget_sensitive)
/* Once we have found a suitable memory type, only attempt to allocate that memory type.
* This avoids some problems by design:
* - If we want to allocate DEVICE_LOCAL memory, we don't try to fallback to PCI-e BAR memory by mistake.
* - If we want to allocate system memory, we don't try to fallback to PCI-e BAR memory by mistake.
* - There is no reasonable scenario where we can expect one memory type to fail, and another memory type
* with more memory property bits set to pass. This makes use of the rule where memory types which are a super-set
* of another must have a larger type index.
* - We will only attempt to allocate PCI-e BAR memory if DEVICE_LOCAL | HOST_VISIBLE is set, otherwise we
* will find a candidate memory type which is either DEVICE_LOCAL or HOST_VISIBLE before we find a PCI-e BAR type.
* - For iGPU where everything is DEVICE_LOCAL | HOST_VISIBLE, we will just find that memory type first anyways,
* but there we don't have anything to worry about w.r.t. PCI-e BAR.
*/
/* Budgets only really apply to PCI-e BAR or other "special" types which always have a fallback. */
budget_sensitive = !!(device->memory_info.budget_sensitive_mask & (1u << allocate_info.memoryTypeIndex));
if (budget_sensitive)
{
type_budget = &memory_info->type_budget[allocate_info.memoryTypeIndex];
type_current = &memory_info->type_current[allocate_info.memoryTypeIndex];
pthread_mutex_lock(&memory_info->budget_lock);
if (*type_current + size > *type_budget)
{
if (vr == VK_SUCCESS)
if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET)
{
*type_current += size;
if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET)
{
INFO("Allocated memory of type %u, new total allocated size %"PRIu64" MiB.\n",
type_index, *type_current / (1024 * 1024));
}
INFO("Attempting to allocate from memory type %u, but exceeding fixed budget: %"PRIu64" + %"PRIu64" > %"PRIu64".\n",
allocate_info.memoryTypeIndex, *type_current, size, *type_budget);
}
pthread_mutex_unlock(&memory_info->budget_lock);
}
else if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET)
{
INFO("%s memory of type #%u, size %"PRIu64" KiB.\n",
(vr == VK_SUCCESS ? "Allocated" : "Failed to allocate"),
type_index, allocate_info.allocationSize / 1024);
}
if (vr == VK_SUCCESS)
{
allocation->vk_memory_type = type_index;
allocation->size = size;
return S_OK;
}
else if (type_flags & optional_flags)
{
/* If we fail to allocate DEVICE_LOCAL memory, immediately fail the call.
* This way we avoid any attempt to fall back to PCI-e BAR memory types
* which are also DEVICE_LOCAL.
* After failure, the calling code removes the DEVICE_LOCAL_BIT flag and tries again,
* where we will fall back to system memory instead. */
return E_OUTOFMEMORY;
}
}
return E_OUTOFMEMORY;
vr = VK_CALL(vkAllocateMemory(device->vk_device, &allocate_info, NULL, &allocation->vk_memory));
if (budget_sensitive)
{
if (vr == VK_SUCCESS)
{
*type_current += size;
if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET)
{
INFO("Allocated memory of type %u, new total allocated size %"PRIu64" MiB.\n",
allocate_info.memoryTypeIndex, *type_current / (1024 * 1024));
}
}
pthread_mutex_unlock(&memory_info->budget_lock);
}
else if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET)
{
INFO("%s memory of type #%u, size %"PRIu64" KiB.\n",
(vr == VK_SUCCESS ? "Allocated" : "Failed to allocate"),
allocate_info.memoryTypeIndex, allocate_info.allocationSize / 1024);
}
if (vr != VK_SUCCESS)
return E_OUTOFMEMORY;
allocation->vk_memory_type = allocate_info.memoryTypeIndex;
allocation->size = size;
return S_OK;
}
static bool vkd3d_memory_info_type_mask_covers_multiple_memory_heaps(
@ -520,9 +519,7 @@ static HRESULT vkd3d_memory_allocation_init(struct vkd3d_memory_allocation *allo
* we must not look at heap_flags, since we might end up noping out
* the memory types we want to allocate with. */
type_mask = memory_requirements.memoryTypeBits;
if (info->flags & VKD3D_ALLOCATION_FLAG_DEDICATED)
type_mask &= device->memory_info.global_mask;
else
if (!(info->flags & VKD3D_ALLOCATION_FLAG_DEDICATED))
type_mask &= vkd3d_select_memory_types(device, &info->heap_properties, info->heap_flags);
/* Allocate actual backing storage */
@ -1306,7 +1303,6 @@ static HRESULT vkd3d_memory_allocator_try_suballocate_memory(struct vkd3d_memory
HRESULT hr;
size_t i;
type_mask &= device->memory_info.global_mask;
type_mask &= memory_requirements->memoryTypeBits;
for (i = 0; i < allocator->chunks_count; i++)

View File

@ -2741,7 +2741,7 @@ HRESULT d3d12_resource_create_committed(struct d3d12_device *device, const D3D12
if (!(use_dedicated_allocation = dedicated_requirements.prefersDedicatedAllocation))
{
const uint32_t type_mask = memory_requirements.memoryRequirements.memoryTypeBits & device->memory_info.global_mask;
const uint32_t type_mask = memory_requirements.memoryRequirements.memoryTypeBits;
const struct vkd3d_memory_info_domain *domain = d3d12_device_get_memory_info_domain(device, heap_properties);
use_dedicated_allocation = (type_mask & domain->buffer_type_mask) != type_mask;
}
@ -6335,49 +6335,6 @@ static void vkd3d_memory_info_get_topology(struct vkd3d_memory_topology *topolog
}
}
static uint32_t vkd3d_memory_info_find_global_mask(const struct vkd3d_memory_topology *topology, struct d3d12_device *device)
{
/* Never allow memory types from any PCI-pinned heap.
* If we allow it, it might end up being used as a fallback memory type, which will cause severe instabilities.
* These types should only be used in a controlled fashion. */
VkMemoryPropertyFlags flags;
uint32_t heap_index;
uint32_t i, mask;
if (!(vkd3d_config_flags & VKD3D_CONFIG_FLAG_NO_UPLOAD_HVV))
return UINT32_MAX;
/* If we only have one device local heap, or no host-only heaps, there is nothing to do. */
if (topology->device_local_heap_count <= 1 || topology->host_only_heap_count == 0)
return UINT32_MAX;
/* Verify that there exists a DEVICE_LOCAL type that is not HOST_VISIBLE on this device
* which maps to the largest device local heap. That way, it is safe to mask out all memory types which are
* DEVICE_LOCAL | HOST_VISIBLE.
* Similarly, there must exist a host-only type. */
if (!topology->exists_device_only_type || !topology->exists_host_only_type)
return UINT32_MAX;
/* Mask out any memory types which are deemed problematic. */
for (i = 0, mask = 0; i < device->memory_properties.memoryTypeCount; i++)
{
const VkMemoryPropertyFlags pinned_mask = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
flags = device->memory_properties.memoryTypes[i].propertyFlags;
heap_index = device->memory_properties.memoryTypes[i].heapIndex;
if (heap_index != topology->largest_device_local_heap_index &&
heap_index != topology->largest_host_only_heap_index &&
(flags & pinned_mask) == pinned_mask)
{
mask |= 1u << i;
WARN("Blocking memory type %u for use (PCI-pinned memory).\n", i);
}
}
return ~mask;
}
static void vkd3d_memory_info_init_budgets(struct vkd3d_memory_info *info,
const struct vkd3d_memory_topology *topology,
struct d3d12_device *device)
@ -6457,7 +6414,6 @@ HRESULT vkd3d_memory_info_init(struct vkd3d_memory_info *info,
uint32_t i;
vkd3d_memory_info_get_topology(&topology, device);
info->global_mask = vkd3d_memory_info_find_global_mask(&topology, device);
vkd3d_memory_info_init_budgets(info, &topology, device);
if (pthread_mutex_init(&info->budget_lock, NULL) != 0)
@ -6574,12 +6530,6 @@ HRESULT vkd3d_memory_info_init(struct vkd3d_memory_info *info,
* On AMD, we can get linear RT, but not linear DS, so for now, just don't check for that.
* We will fail in resource creation instead. */
buffer_type_mask &= info->global_mask;
sampled_type_mask &= info->global_mask;
rt_ds_type_mask &= info->global_mask;
sampled_type_mask_cpu &= info->global_mask;
rt_ds_type_mask_cpu &= info->global_mask;
info->non_cpu_accessible_domain.buffer_type_mask = buffer_type_mask;
info->non_cpu_accessible_domain.sampled_type_mask = sampled_type_mask;
info->non_cpu_accessible_domain.rt_ds_type_mask = rt_ds_type_mask;

View File

@ -2703,7 +2703,6 @@ struct vkd3d_memory_info_domain
struct vkd3d_memory_info
{
uint32_t global_mask;
/* Includes normal system memory, but also resizable BAR memory.
* Only types which have HOST_VISIBLE_BIT can be in this domain.
* For images, we only include memory types which are LINEAR tiled. */