Compare commits
2 Commits
master
...
global-mas
Author | SHA1 | Date |
---|---|---|
Hans-Kristian Arntzen | 3dfcc10312 | |
Hans-Kristian Arntzen | c539bd3f03 |
|
@ -177,7 +177,6 @@ static HRESULT vkd3d_try_allocate_device_memory(struct d3d12_device *device,
|
|||
void *pNext, struct vkd3d_device_memory_allocation *allocation)
|
||||
{
|
||||
const VkPhysicalDeviceMemoryProperties *memory_props = &device->memory_properties;
|
||||
const VkMemoryPropertyFlags optional_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
||||
struct vkd3d_memory_info *memory_info = &device->memory_info;
|
||||
VkMemoryAllocateInfo allocate_info;
|
||||
|
@ -186,87 +185,87 @@ static HRESULT vkd3d_try_allocate_device_memory(struct d3d12_device *device,
|
|||
bool budget_sensitive;
|
||||
VkResult vr;
|
||||
|
||||
/* buffer_mask / sampled_mask etc will generally take care of this,
|
||||
* but for certain fallback scenarios where we select other memory
|
||||
* types, we need to mask here as well. */
|
||||
type_mask &= device->memory_info.global_mask;
|
||||
|
||||
allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
|
||||
allocate_info.pNext = pNext;
|
||||
allocate_info.allocationSize = size;
|
||||
allocate_info.memoryTypeIndex = UINT32_MAX;
|
||||
|
||||
while (type_mask)
|
||||
{
|
||||
uint32_t type_index = vkd3d_bitmask_iter32(&type_mask);
|
||||
|
||||
if ((memory_props->memoryTypes[type_index].propertyFlags & type_flags) != type_flags)
|
||||
continue;
|
||||
|
||||
allocate_info.memoryTypeIndex = type_index;
|
||||
|
||||
budget_sensitive = !!(device->memory_info.budget_sensitive_mask & (1u << type_index));
|
||||
if (budget_sensitive)
|
||||
if ((memory_props->memoryTypes[type_index].propertyFlags & type_flags) == type_flags)
|
||||
{
|
||||
type_budget = &memory_info->type_budget[type_index];
|
||||
type_current = &memory_info->type_current[type_index];
|
||||
pthread_mutex_lock(&memory_info->budget_lock);
|
||||
if (*type_current + size > *type_budget)
|
||||
{
|
||||
if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET)
|
||||
{
|
||||
INFO("Attempting to allocate from memory type %u, but exceeding fixed budget: %"PRIu64" + %"PRIu64" > %"PRIu64".\n",
|
||||
type_index, *type_current, size, *type_budget);
|
||||
}
|
||||
pthread_mutex_unlock(&memory_info->budget_lock);
|
||||
|
||||
/* If we're out of DEVICE budget, don't try other types. */
|
||||
if (type_flags & optional_flags)
|
||||
return E_OUTOFMEMORY;
|
||||
else
|
||||
continue;
|
||||
}
|
||||
allocate_info.memoryTypeIndex = type_index;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
vr = VK_CALL(vkAllocateMemory(device->vk_device, &allocate_info, NULL, &allocation->vk_memory));
|
||||
if (allocate_info.memoryTypeIndex == UINT32_MAX)
|
||||
{
|
||||
FIXME("Found no suitable memory type for requested type_flags #%x.\n", type_flags);
|
||||
return E_OUTOFMEMORY;
|
||||
}
|
||||
|
||||
if (budget_sensitive)
|
||||
/* Once we have found a suitable memory type, only attempt to allocate that memory type.
|
||||
* This avoids some problems by design:
|
||||
* - If we want to allocate DEVICE_LOCAL memory, we don't try to fallback to PCI-e BAR memory by mistake.
|
||||
* - If we want to allocate system memory, we don't try to fallback to PCI-e BAR memory by mistake.
|
||||
* - There is no reasonable scenario where we can expect one memory type to fail, and another memory type
|
||||
* with more memory property bits set to pass. This makes use of the rule where memory types which are a super-set
|
||||
* of another must have a larger type index.
|
||||
* - We will only attempt to allocate PCI-e BAR memory if DEVICE_LOCAL | HOST_VISIBLE is set, otherwise we
|
||||
* will find a candidate memory type which is either DEVICE_LOCAL or HOST_VISIBLE before we find a PCI-e BAR type.
|
||||
* - For iGPU where everything is DEVICE_LOCAL | HOST_VISIBLE, we will just find that memory type first anyways,
|
||||
* but there we don't have anything to worry about w.r.t. PCI-e BAR.
|
||||
*/
|
||||
|
||||
/* Budgets only really apply to PCI-e BAR or other "special" types which always have a fallback. */
|
||||
budget_sensitive = !!(device->memory_info.budget_sensitive_mask & (1u << allocate_info.memoryTypeIndex));
|
||||
if (budget_sensitive)
|
||||
{
|
||||
type_budget = &memory_info->type_budget[allocate_info.memoryTypeIndex];
|
||||
type_current = &memory_info->type_current[allocate_info.memoryTypeIndex];
|
||||
pthread_mutex_lock(&memory_info->budget_lock);
|
||||
if (*type_current + size > *type_budget)
|
||||
{
|
||||
if (vr == VK_SUCCESS)
|
||||
if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET)
|
||||
{
|
||||
*type_current += size;
|
||||
if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET)
|
||||
{
|
||||
INFO("Allocated memory of type %u, new total allocated size %"PRIu64" MiB.\n",
|
||||
type_index, *type_current / (1024 * 1024));
|
||||
}
|
||||
INFO("Attempting to allocate from memory type %u, but exceeding fixed budget: %"PRIu64" + %"PRIu64" > %"PRIu64".\n",
|
||||
allocate_info.memoryTypeIndex, *type_current, size, *type_budget);
|
||||
}
|
||||
pthread_mutex_unlock(&memory_info->budget_lock);
|
||||
}
|
||||
else if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET)
|
||||
{
|
||||
INFO("%s memory of type #%u, size %"PRIu64" KiB.\n",
|
||||
(vr == VK_SUCCESS ? "Allocated" : "Failed to allocate"),
|
||||
type_index, allocate_info.allocationSize / 1024);
|
||||
}
|
||||
|
||||
if (vr == VK_SUCCESS)
|
||||
{
|
||||
allocation->vk_memory_type = type_index;
|
||||
allocation->size = size;
|
||||
return S_OK;
|
||||
}
|
||||
else if (type_flags & optional_flags)
|
||||
{
|
||||
/* If we fail to allocate DEVICE_LOCAL memory, immediately fail the call.
|
||||
* This way we avoid any attempt to fall back to PCI-e BAR memory types
|
||||
* which are also DEVICE_LOCAL.
|
||||
* After failure, the calling code removes the DEVICE_LOCAL_BIT flag and tries again,
|
||||
* where we will fall back to system memory instead. */
|
||||
return E_OUTOFMEMORY;
|
||||
}
|
||||
}
|
||||
|
||||
return E_OUTOFMEMORY;
|
||||
vr = VK_CALL(vkAllocateMemory(device->vk_device, &allocate_info, NULL, &allocation->vk_memory));
|
||||
|
||||
if (budget_sensitive)
|
||||
{
|
||||
if (vr == VK_SUCCESS)
|
||||
{
|
||||
*type_current += size;
|
||||
if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET)
|
||||
{
|
||||
INFO("Allocated memory of type %u, new total allocated size %"PRIu64" MiB.\n",
|
||||
allocate_info.memoryTypeIndex, *type_current / (1024 * 1024));
|
||||
}
|
||||
}
|
||||
pthread_mutex_unlock(&memory_info->budget_lock);
|
||||
}
|
||||
else if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET)
|
||||
{
|
||||
INFO("%s memory of type #%u, size %"PRIu64" KiB.\n",
|
||||
(vr == VK_SUCCESS ? "Allocated" : "Failed to allocate"),
|
||||
allocate_info.memoryTypeIndex, allocate_info.allocationSize / 1024);
|
||||
}
|
||||
|
||||
if (vr != VK_SUCCESS)
|
||||
return E_OUTOFMEMORY;
|
||||
|
||||
allocation->vk_memory_type = allocate_info.memoryTypeIndex;
|
||||
allocation->size = size;
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
static bool vkd3d_memory_info_type_mask_covers_multiple_memory_heaps(
|
||||
|
@ -520,9 +519,7 @@ static HRESULT vkd3d_memory_allocation_init(struct vkd3d_memory_allocation *allo
|
|||
* we must not look at heap_flags, since we might end up noping out
|
||||
* the memory types we want to allocate with. */
|
||||
type_mask = memory_requirements.memoryTypeBits;
|
||||
if (info->flags & VKD3D_ALLOCATION_FLAG_DEDICATED)
|
||||
type_mask &= device->memory_info.global_mask;
|
||||
else
|
||||
if (!(info->flags & VKD3D_ALLOCATION_FLAG_DEDICATED))
|
||||
type_mask &= vkd3d_select_memory_types(device, &info->heap_properties, info->heap_flags);
|
||||
|
||||
/* Allocate actual backing storage */
|
||||
|
@ -592,8 +589,7 @@ static HRESULT vkd3d_memory_allocation_init(struct vkd3d_memory_allocation *allo
|
|||
0, VK_WHOLE_SIZE, 0, &allocation->cpu_address))))
|
||||
{
|
||||
ERR("Failed to map memory, vr %d.\n", vr);
|
||||
vkd3d_memory_allocation_free(allocation, device, allocator);
|
||||
return hresult_from_vk_result(vr);
|
||||
goto out_free_allocation;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -609,18 +605,14 @@ static HRESULT vkd3d_memory_allocation_init(struct vkd3d_memory_allocation *allo
|
|||
if ((vr = VK_CALL(vkBindBufferMemory2KHR(device->vk_device, 1, &bind_info))) < 0)
|
||||
{
|
||||
ERR("Failed to bind buffer memory, vr %d.\n", vr);
|
||||
vkd3d_memory_allocation_free(allocation, device, allocator);
|
||||
return hresult_from_vk_result(vr);
|
||||
goto out_free_allocation;
|
||||
}
|
||||
|
||||
/* Assign GPU address as necessary. */
|
||||
if (allocation->flags & VKD3D_ALLOCATION_FLAG_GPU_ADDRESS)
|
||||
{
|
||||
if (FAILED(hr = vkd3d_allocation_assign_gpu_address(allocation, device, allocator)))
|
||||
{
|
||||
vkd3d_memory_allocation_free(allocation, device, allocator);
|
||||
return hresult_from_vk_result(vr);
|
||||
}
|
||||
goto out_free_allocation;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -631,6 +623,11 @@ static HRESULT vkd3d_memory_allocation_init(struct vkd3d_memory_allocation *allo
|
|||
TRACE("Created allocation %p on memory type %u (%"PRIu64" bytes).\n",
|
||||
allocation, allocation->device_allocation.vk_memory_type, allocation->resource.size);
|
||||
return S_OK;
|
||||
|
||||
out_free_allocation:
|
||||
vkd3d_memory_allocation_free(allocation, device, allocator);
|
||||
memset(allocation, 0, sizeof(*allocation));
|
||||
return hresult_from_vk_result(vr);
|
||||
}
|
||||
|
||||
static void vkd3d_memory_chunk_insert_range(struct vkd3d_memory_chunk *chunk,
|
||||
|
@ -1306,7 +1303,6 @@ static HRESULT vkd3d_memory_allocator_try_suballocate_memory(struct vkd3d_memory
|
|||
HRESULT hr;
|
||||
size_t i;
|
||||
|
||||
type_mask &= device->memory_info.global_mask;
|
||||
type_mask &= memory_requirements->memoryTypeBits;
|
||||
|
||||
for (i = 0; i < allocator->chunks_count; i++)
|
||||
|
|
|
@ -2741,7 +2741,7 @@ HRESULT d3d12_resource_create_committed(struct d3d12_device *device, const D3D12
|
|||
|
||||
if (!(use_dedicated_allocation = dedicated_requirements.prefersDedicatedAllocation))
|
||||
{
|
||||
const uint32_t type_mask = memory_requirements.memoryRequirements.memoryTypeBits & device->memory_info.global_mask;
|
||||
const uint32_t type_mask = memory_requirements.memoryRequirements.memoryTypeBits;
|
||||
const struct vkd3d_memory_info_domain *domain = d3d12_device_get_memory_info_domain(device, heap_properties);
|
||||
use_dedicated_allocation = (type_mask & domain->buffer_type_mask) != type_mask;
|
||||
}
|
||||
|
@ -6335,49 +6335,6 @@ static void vkd3d_memory_info_get_topology(struct vkd3d_memory_topology *topolog
|
|||
}
|
||||
}
|
||||
|
||||
static uint32_t vkd3d_memory_info_find_global_mask(const struct vkd3d_memory_topology *topology, struct d3d12_device *device)
|
||||
{
|
||||
/* Never allow memory types from any PCI-pinned heap.
|
||||
* If we allow it, it might end up being used as a fallback memory type, which will cause severe instabilities.
|
||||
* These types should only be used in a controlled fashion. */
|
||||
VkMemoryPropertyFlags flags;
|
||||
uint32_t heap_index;
|
||||
uint32_t i, mask;
|
||||
|
||||
if (!(vkd3d_config_flags & VKD3D_CONFIG_FLAG_NO_UPLOAD_HVV))
|
||||
return UINT32_MAX;
|
||||
|
||||
/* If we only have one device local heap, or no host-only heaps, there is nothing to do. */
|
||||
if (topology->device_local_heap_count <= 1 || topology->host_only_heap_count == 0)
|
||||
return UINT32_MAX;
|
||||
|
||||
/* Verify that there exists a DEVICE_LOCAL type that is not HOST_VISIBLE on this device
|
||||
* which maps to the largest device local heap. That way, it is safe to mask out all memory types which are
|
||||
* DEVICE_LOCAL | HOST_VISIBLE.
|
||||
* Similarly, there must exist a host-only type. */
|
||||
if (!topology->exists_device_only_type || !topology->exists_host_only_type)
|
||||
return UINT32_MAX;
|
||||
|
||||
/* Mask out any memory types which are deemed problematic. */
|
||||
for (i = 0, mask = 0; i < device->memory_properties.memoryTypeCount; i++)
|
||||
{
|
||||
const VkMemoryPropertyFlags pinned_mask = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
|
||||
flags = device->memory_properties.memoryTypes[i].propertyFlags;
|
||||
heap_index = device->memory_properties.memoryTypes[i].heapIndex;
|
||||
|
||||
if (heap_index != topology->largest_device_local_heap_index &&
|
||||
heap_index != topology->largest_host_only_heap_index &&
|
||||
(flags & pinned_mask) == pinned_mask)
|
||||
{
|
||||
mask |= 1u << i;
|
||||
WARN("Blocking memory type %u for use (PCI-pinned memory).\n", i);
|
||||
}
|
||||
}
|
||||
|
||||
return ~mask;
|
||||
}
|
||||
|
||||
static void vkd3d_memory_info_init_budgets(struct vkd3d_memory_info *info,
|
||||
const struct vkd3d_memory_topology *topology,
|
||||
struct d3d12_device *device)
|
||||
|
@ -6457,7 +6414,6 @@ HRESULT vkd3d_memory_info_init(struct vkd3d_memory_info *info,
|
|||
uint32_t i;
|
||||
|
||||
vkd3d_memory_info_get_topology(&topology, device);
|
||||
info->global_mask = vkd3d_memory_info_find_global_mask(&topology, device);
|
||||
vkd3d_memory_info_init_budgets(info, &topology, device);
|
||||
|
||||
if (pthread_mutex_init(&info->budget_lock, NULL) != 0)
|
||||
|
@ -6574,12 +6530,6 @@ HRESULT vkd3d_memory_info_init(struct vkd3d_memory_info *info,
|
|||
* On AMD, we can get linear RT, but not linear DS, so for now, just don't check for that.
|
||||
* We will fail in resource creation instead. */
|
||||
|
||||
buffer_type_mask &= info->global_mask;
|
||||
sampled_type_mask &= info->global_mask;
|
||||
rt_ds_type_mask &= info->global_mask;
|
||||
sampled_type_mask_cpu &= info->global_mask;
|
||||
rt_ds_type_mask_cpu &= info->global_mask;
|
||||
|
||||
info->non_cpu_accessible_domain.buffer_type_mask = buffer_type_mask;
|
||||
info->non_cpu_accessible_domain.sampled_type_mask = sampled_type_mask;
|
||||
info->non_cpu_accessible_domain.rt_ds_type_mask = rt_ds_type_mask;
|
||||
|
|
|
@ -2703,7 +2703,6 @@ struct vkd3d_memory_info_domain
|
|||
|
||||
struct vkd3d_memory_info
|
||||
{
|
||||
uint32_t global_mask;
|
||||
/* Includes normal system memory, but also resizable BAR memory.
|
||||
* Only types which have HOST_VISIBLE_BIT can be in this domain.
|
||||
* For images, we only include memory types which are LINEAR tiled. */
|
||||
|
|
Loading…
Reference in New Issue