tu: Move vsc_data and vsc_data2 allocation into the device

In addition to preparing us for dynamically resizing them, which has to
be controlled by the device, this greatly reduces the memory usage when
allocating large numbers of command buffers, making
dEQP-VK.api.object_management.max_concurrent.command_buffer_primary go
from crash -> pass.

Reviewed-by: Kristian H. Kristensen <hoegsberg@gmail.com>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3621>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3621>
This commit is contained in:
Connor Abbott 2020-01-28 17:30:44 +01:00
parent 84bd4da468
commit 8455648cca
3 changed files with 49 additions and 36 deletions

View File

@ -1670,27 +1670,14 @@ tu_create_cmd_buffer(struct tu_device *device,
if (result != VK_SUCCESS)
goto fail_scratch_bo;
#define VSC_DATA_SIZE(pitch) ((pitch) * 32 + 0x100) /* extra size to store VSC_SIZE */
#define VSC_DATA2_SIZE(pitch) ((pitch) * 32)
/* TODO: resize on overflow or compute a max size from # of vertices in renderpass?? */
cmd_buffer->vsc_data_pitch = 0x440 * 4;
cmd_buffer->vsc_data2_pitch = 0x1040 * 4;
result = tu_bo_init_new(device, &cmd_buffer->vsc_data, VSC_DATA_SIZE(cmd_buffer->vsc_data_pitch));
if (result != VK_SUCCESS)
goto fail_vsc_data;
result = tu_bo_init_new(device, &cmd_buffer->vsc_data2, VSC_DATA2_SIZE(cmd_buffer->vsc_data2_pitch));
if (result != VK_SUCCESS)
goto fail_vsc_data2;
/* TODO: resize on overflow */
cmd_buffer->vsc_data_pitch = device->vsc_data_pitch;
cmd_buffer->vsc_data2_pitch = device->vsc_data2_pitch;
cmd_buffer->vsc_data = device->vsc_data;
cmd_buffer->vsc_data2 = device->vsc_data2;
return VK_SUCCESS;
fail_vsc_data2:
tu_bo_finish(cmd_buffer->device, &cmd_buffer->vsc_data);
fail_vsc_data:
tu_bo_finish(cmd_buffer->device, &cmd_buffer->scratch_bo);
fail_scratch_bo:
list_del(&cmd_buffer->pool_link);
return result;
@ -1700,8 +1687,6 @@ static void
tu_cmd_buffer_destroy(struct tu_cmd_buffer *cmd_buffer)
{
tu_bo_finish(cmd_buffer->device, &cmd_buffer->scratch_bo);
tu_bo_finish(cmd_buffer->device, &cmd_buffer->vsc_data);
tu_bo_finish(cmd_buffer->device, &cmd_buffer->vsc_data2);
list_del(&cmd_buffer->pool_link);

View File

@ -1074,7 +1074,7 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
if (!device->queues[qfi]) {
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto fail;
goto fail_queues;
}
memset(device->queues[qfi], 0,
@ -1086,13 +1086,27 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
result = tu_queue_init(device, &device->queues[qfi][q], qfi, q,
queue_create->flags);
if (result != VK_SUCCESS)
goto fail;
goto fail_queues;
}
}
device->compiler = ir3_compiler_create(NULL, physical_device->gpu_id);
if (!device->compiler)
goto fail;
goto fail_queues;
#define VSC_DATA_SIZE(pitch) ((pitch) * 32 + 0x100) /* extra size to store VSC_SIZE */
#define VSC_DATA2_SIZE(pitch) ((pitch) * 32)
device->vsc_data_pitch = 0x440 * 4;
device->vsc_data2_pitch = 0x1040 * 4;
result = tu_bo_init_new(device, &device->vsc_data, VSC_DATA_SIZE(device->vsc_data_pitch));
if (result != VK_SUCCESS)
goto fail_vsc_data;
result = tu_bo_init_new(device, &device->vsc_data2, VSC_DATA2_SIZE(device->vsc_data2_pitch));
if (result != VK_SUCCESS)
goto fail_vsc_data2;
VkPipelineCacheCreateInfo ci;
ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
@ -1104,14 +1118,23 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
result =
tu_CreatePipelineCache(tu_device_to_handle(device), &ci, NULL, &pc);
if (result != VK_SUCCESS)
goto fail;
goto fail_pipeline_cache;
device->mem_cache = tu_pipeline_cache_from_handle(pc);
*pDevice = tu_device_to_handle(device);
return VK_SUCCESS;
fail:
fail_pipeline_cache:
tu_bo_finish(device, &device->vsc_data2);
fail_vsc_data2:
tu_bo_finish(device, &device->vsc_data);
fail_vsc_data:
ralloc_free(device->compiler);
fail_queues:
for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
for (unsigned q = 0; q < device->queue_count[i]; q++)
tu_queue_finish(&device->queues[i][q]);
@ -1119,9 +1142,6 @@ fail:
vk_free(&device->alloc, device->queues[i]);
}
if (device->compiler)
ralloc_free(device->compiler);
vk_free(&device->alloc, device);
return result;
}
@ -1134,6 +1154,9 @@ tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
if (!device)
return;
tu_bo_finish(device, &device->vsc_data);
tu_bo_finish(device, &device->vsc_data2);
for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
for (unsigned q = 0; q < device->queue_count[i]; q++)
tu_queue_finish(&device->queues[i][q]);

View File

@ -453,6 +453,14 @@ struct tu_queue
struct tu_fence submit_fence;
};
struct tu_bo
{
uint32_t gem_handle;
uint64_t size;
uint64_t iova;
void *map;
};
struct tu_device
{
VK_LOADER_DATA _loader_data;
@ -473,20 +481,17 @@ struct tu_device
/* Backup in-memory cache to be used if the app doesn't provide one */
struct tu_pipeline_cache *mem_cache;
struct tu_bo vsc_data;
struct tu_bo vsc_data2;
uint32_t vsc_data_pitch;
uint32_t vsc_data2_pitch;
struct list_head shader_slabs;
mtx_t shader_slab_mutex;
struct tu_device_extension_table enabled_extensions;
};
struct tu_bo
{
uint32_t gem_handle;
uint64_t size;
uint64_t iova;
void *map;
};
VkResult
tu_bo_init_new(struct tu_device *dev, struct tu_bo *bo, uint64_t size);
VkResult