turnip: Add a refcount mechanism to BOs

Until now we have lived without a refcount mechanism in the driver
because in Vulkan the user is responsible for handling the life
span of memory allocations for all Vulkan objects, however,
imported BOs are tricky because the kernel doesn't refcount
so user-space needs to make sure that:

1. When importing a BO into the same device used to create it
   (self-importing) it does not double free the same BO.
2. Frees imported BOs that were not allocated through the same
   device.

Our initial implementation always freed BOs when requested,
so we handled 2) correctly but not 1) on drm and we would
double-free self-imported BOs because kernel doesn't return
a unique gem_handle on each import.

Beside this the submit ioctl checks for duplicates in the
BO list and returns an error if there is one.

This fixes the problem for good by adding refcounts to BOs
so that self-imported BOs have a refcnt > 1 and are only freed
when all references are freed.

KGSL on the other hand does not have the same problems,
at least not with ION buffers which are used for exportable
BOs on pre 5.10 android kernels.

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5936
Fixes CTS tests: dEQP-VK.drm_format_modifiers.export_import.*

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15031>
This commit is contained in:
Danylo Piliaiev 2022-02-02 19:29:34 +02:00 committed by Marge Bot
parent 2763a8af5a
commit a814a4f9db
11 changed files with 206 additions and 121 deletions

View File

@ -362,8 +362,7 @@ tu_autotune_init(struct tu_autotune *at, struct tu_device *dev)
renderpass_key_equals);
u_rwlock_init(&at->ht_lock);
at->results_bo = malloc(sizeof(struct tu_bo));
result = tu_bo_init_new(dev, at->results_bo,
result = tu_bo_init_new(dev, &at->results_bo,
sizeof(struct tu_autotune_results),
TU_BO_ALLOC_NO_FLAGS);
if (result != VK_SUCCESS) {
@ -389,7 +388,6 @@ fail_map_bo:
tu_bo_finish(dev, at->results_bo);
fail_bo:
free(at->results_bo);
u_rwlock_destroy(&at->ht_lock);
_mesa_hash_table_destroy(at->ht, NULL);
@ -428,7 +426,6 @@ tu_autotune_fini(struct tu_autotune *at, struct tu_device *dev)
_mesa_hash_table_destroy(at->ht, NULL);
u_rwlock_destroy(&at->ht_lock);
tu_bo_finish(dev, at->results_bo);
free(at->results_bo);
}
bool

View File

@ -559,13 +559,13 @@ compile_shader(struct tu_device *dev, struct nir_shader *nir,
struct ir3_shader_variant *so =
ir3_shader_get_variant(sh, &key, false, false, &created);
struct tu6_global *global = dev->global_bo.map;
struct tu6_global *global = dev->global_bo->map;
assert(*offset + so->info.sizedwords <= ARRAY_SIZE(global->shaders));
dev->global_shaders[idx] = so;
memcpy(&global->shaders[*offset], so->bin,
sizeof(uint32_t) * so->info.sizedwords);
dev->global_shader_va[idx] = dev->global_bo.iova +
dev->global_shader_va[idx] = dev->global_bo->iova +
gb_offset(shaders[*offset]);
*offset += align(so->info.sizedwords, 32);
}

View File

@ -77,7 +77,7 @@ tu6_lazy_emit_tessfactor_addr(struct tu_cmd_buffer *cmd)
assert(cmd->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
tu_cs_emit_regs(&cmd->cs, A6XX_PC_TESSFACTOR_ADDR(.qword = cmd->device->tess_bo.iova));
tu_cs_emit_regs(&cmd->cs, A6XX_PC_TESSFACTOR_ADDR(.qword = cmd->device->tess_bo->iova));
cmd->state.tessfactor_addr_set = true;
}
@ -896,10 +896,10 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
tu_disable_draw_states(cmd, cs);
tu_cs_emit_regs(cs,
A6XX_SP_TP_BORDER_COLOR_BASE_ADDR(.bo = &dev->global_bo,
A6XX_SP_TP_BORDER_COLOR_BASE_ADDR(.bo = dev->global_bo,
.bo_offset = gb_offset(bcolor_builtin)));
tu_cs_emit_regs(cs,
A6XX_SP_PS_TP_BORDER_COLOR_BASE_ADDR(.bo = &dev->global_bo,
A6XX_SP_PS_TP_BORDER_COLOR_BASE_ADDR(.bo = dev->global_bo,
.bo_offset = gb_offset(bcolor_builtin)));
/* VSC buffers:
@ -911,7 +911,7 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
*/
mtx_lock(&dev->mutex);
struct tu6_global *global = dev->global_bo.map;
struct tu6_global *global = dev->global_bo->map;
uint32_t vsc_draw_overflow = global->vsc_draw_overflow;
uint32_t vsc_prim_overflow = global->vsc_prim_overflow;
@ -4831,7 +4831,7 @@ tu_barrier(struct tu_cmd_buffer *cmd,
tu_cs_emit_pkt7(cs, CP_WAIT_REG_MEM, 6);
tu_cs_emit(cs, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ) |
CP_WAIT_REG_MEM_0_POLL_MEMORY);
tu_cs_emit_qw(cs, event->bo.iova); /* POLL_ADDR_LO/HI */
tu_cs_emit_qw(cs, event->bo->iova); /* POLL_ADDR_LO/HI */
tu_cs_emit(cs, CP_WAIT_REG_MEM_3_REF(1));
tu_cs_emit(cs, CP_WAIT_REG_MEM_4_MASK(~0u));
tu_cs_emit(cs, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(20));
@ -4883,13 +4883,13 @@ write_event(struct tu_cmd_buffer *cmd, struct tu_event *event,
if (!(stageMask & ~top_of_pipe_flags)) {
tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 3);
tu_cs_emit_qw(cs, event->bo.iova); /* ADDR_LO/HI */
tu_cs_emit_qw(cs, event->bo->iova); /* ADDR_LO/HI */
tu_cs_emit(cs, value);
} else {
/* Use a RB_DONE_TS event to wait for everything to complete. */
tu_cs_emit_pkt7(cs, CP_EVENT_WRITE, 4);
tu_cs_emit(cs, CP_EVENT_WRITE_0_EVENT(RB_DONE_TS));
tu_cs_emit_qw(cs, event->bo.iova);
tu_cs_emit_qw(cs, event->bo->iova);
tu_cs_emit(cs, value);
}
}

View File

@ -64,7 +64,6 @@ tu_cs_finish(struct tu_cs *cs)
{
for (uint32_t i = 0; i < cs->bo_count; ++i) {
tu_bo_finish(cs->device, cs->bos[i]);
free(cs->bos[i]);
}
free(cs->entries);
@ -107,12 +106,10 @@ tu_cs_add_bo(struct tu_cs *cs, uint32_t size)
cs->bos = new_bos;
}
struct tu_bo *new_bo = malloc(sizeof(struct tu_bo));
if (!new_bo)
return VK_ERROR_OUT_OF_HOST_MEMORY;
struct tu_bo *new_bo;
VkResult result =
tu_bo_init_new(cs->device, new_bo, size * sizeof(uint32_t),
tu_bo_init_new(cs->device, &new_bo, size * sizeof(uint32_t),
TU_BO_ALLOC_GPU_READ_ONLY | TU_BO_ALLOC_ALLOW_DUMP);
if (result != VK_SUCCESS) {
free(new_bo);
@ -122,7 +119,6 @@ tu_cs_add_bo(struct tu_cs *cs, uint32_t size)
result = tu_bo_map(cs->device, new_bo);
if (result != VK_SUCCESS) {
tu_bo_finish(cs->device, new_bo);
free(new_bo);
return result;
}
@ -408,7 +404,6 @@ tu_cs_reset(struct tu_cs *cs)
for (uint32_t i = 0; i + 1 < cs->bo_count; ++i) {
tu_bo_finish(cs->device, cs->bos[i]);
free(cs->bos[i]);
}
if (cs->bo_count) {

View File

@ -50,7 +50,7 @@
static inline uint8_t *
pool_base(struct tu_descriptor_pool *pool)
{
return pool->host_bo ?: pool->bo.map;
return pool->host_bo ?: pool->bo->map;
}
static uint32_t
@ -504,7 +504,7 @@ tu_descriptor_set_create(struct tu_device *device,
* resets via the pool. */
if (pool->current_offset + layout_size <= pool->size) {
set->mapped_ptr = (uint32_t*)(pool_base(pool) + pool->current_offset);
set->va = pool->host_bo ? 0 : pool->bo.iova + pool->current_offset;
set->va = pool->host_bo ? 0 : pool->bo->iova + pool->current_offset;
if (!pool->host_memory_base) {
pool->entries[pool->entry_count].offset = pool->current_offset;
@ -529,7 +529,7 @@ tu_descriptor_set_create(struct tu_device *device,
}
set->mapped_ptr = (uint32_t*)(pool_base(pool) + offset);
set->va = pool->host_bo ? 0 : pool->bo.iova + offset;
set->va = pool->host_bo ? 0 : pool->bo->iova + offset;
memmove(&pool->entries[index + 1], &pool->entries[index],
sizeof(pool->entries[0]) * (pool->entry_count - index));
@ -666,7 +666,7 @@ tu_CreateDescriptorPool(VkDevice _device,
if (ret)
goto fail_alloc;
ret = tu_bo_map(device, &pool->bo);
ret = tu_bo_map(device, pool->bo);
if (ret)
goto fail_map;
} else {
@ -687,7 +687,7 @@ tu_CreateDescriptorPool(VkDevice _device,
return VK_SUCCESS;
fail_map:
tu_bo_finish(device, &pool->bo);
tu_bo_finish(device, pool->bo);
fail_alloc:
vk_object_free(&device->vk, pAllocator, pool);
return ret;
@ -719,7 +719,7 @@ tu_DestroyDescriptorPool(VkDevice _device,
if (pool->host_bo)
vk_free2(&device->vk.alloc, pAllocator, pool->host_bo);
else
tu_bo_finish(device, &pool->bo);
tu_bo_finish(device, pool->bo);
}
vk_object_free(&device->vk, pAllocator, pool);

View File

@ -1402,8 +1402,8 @@ tu_trace_create_ts_buffer(struct u_trace_context *utctx, uint32_t size)
struct tu_device *device =
container_of(utctx, struct tu_device, trace_context);
struct tu_bo *bo = ralloc(NULL, struct tu_bo);
tu_bo_init_new(device, bo, size, false);
struct tu_bo *bo;
tu_bo_init_new(device, &bo, size, false);
return bo;
}
@ -1416,7 +1416,6 @@ tu_trace_destroy_ts_buffer(struct u_trace_context *utctx, void *timestamps)
struct tu_bo *bo = timestamps;
tu_bo_finish(device, bo);
ralloc_free(bo);
}
static void
@ -1674,6 +1673,7 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
device->fd = physical_device->local_fd;
mtx_init(&device->bo_mutex, mtx_plain);
u_rwlock_init(&device->dma_bo_lock);
pthread_mutex_init(&device->submit_mutex, NULL);
#ifndef TU_USE_KGSL
@ -1716,6 +1716,9 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
goto fail_queues;
}
/* Initialize sparse array for refcounting imported BOs */
util_sparse_array_init(&device->bo_map, sizeof(struct tu_bo), 512);
/* initial sizes, these will increase if there is overflow */
device->vsc_draw_strm_pitch = 0x1000 + VSC_PAD;
device->vsc_prim_strm_pitch = 0x4000 + VSC_PAD;
@ -1731,13 +1734,13 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
goto fail_global_bo;
}
result = tu_bo_map(device, &device->global_bo);
result = tu_bo_map(device, device->global_bo);
if (result != VK_SUCCESS) {
vk_startup_errorf(device->instance, result, "BO map");
goto fail_global_bo_map;
}
struct tu6_global *global = device->global_bo.map;
struct tu6_global *global = device->global_bo->map;
tu_init_clear_blit_shaders(device);
global->predicate = 0;
tu6_pack_border_color(&global->bcolor_builtin[VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK],
@ -1868,11 +1871,11 @@ fail_perfcntrs_pass_alloc:
fail_pipeline_cache:
tu_destroy_clear_blit_shaders(device);
fail_global_bo_map:
tu_bo_finish(device, &device->global_bo);
vk_free(&device->vk.alloc, device->bo_idx);
tu_bo_finish(device, device->global_bo);
vk_free(&device->vk.alloc, device->bo_list);
fail_global_bo:
ir3_compiler_destroy(device->compiler);
util_sparse_array_finish(&device->bo_map);
fail_queues:
for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
@ -1882,6 +1885,7 @@ fail_queues:
vk_free(&device->vk.alloc, device->queues[i]);
}
u_rwlock_destroy(&device->dma_bo_lock);
vk_device_finish(&device->vk);
vk_free(&device->vk.alloc, device);
return result;
@ -1906,7 +1910,7 @@ tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
for (unsigned i = 0; i < ARRAY_SIZE(device->scratch_bos); i++) {
if (device->scratch_bos[i].initialized)
tu_bo_finish(device, &device->scratch_bos[i].bo);
tu_bo_finish(device, device->scratch_bos[i].bo);
}
tu_destroy_clear_blit_shaders(device);
@ -1924,9 +1928,11 @@ tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
tu_autotune_fini(&device->autotune, device);
util_sparse_array_finish(&device->bo_map);
u_rwlock_destroy(&device->dma_bo_lock);
pthread_cond_destroy(&device->timeline_cond);
vk_free(&device->vk.alloc, device->bo_list);
vk_free(&device->vk.alloc, device->bo_idx);
vk_device_finish(&device->vk);
vk_free(&device->vk.alloc, device);
}
@ -1941,7 +1947,7 @@ tu_get_scratch_bo(struct tu_device *dev, uint64_t size, struct tu_bo **bo)
for (unsigned i = index; i < ARRAY_SIZE(dev->scratch_bos); i++) {
if (p_atomic_read(&dev->scratch_bos[i].initialized)) {
/* Fast path: just return the already-allocated BO. */
*bo = &dev->scratch_bos[i].bo;
*bo = dev->scratch_bos[i].bo;
return VK_SUCCESS;
}
}
@ -1957,7 +1963,7 @@ tu_get_scratch_bo(struct tu_device *dev, uint64_t size, struct tu_bo **bo)
*/
if (dev->scratch_bos[index].initialized) {
mtx_unlock(&dev->scratch_bos[index].construct_mtx);
*bo = &dev->scratch_bos[index].bo;
*bo = dev->scratch_bos[index].bo;
return VK_SUCCESS;
}
@ -1973,7 +1979,7 @@ tu_get_scratch_bo(struct tu_device *dev, uint64_t size, struct tu_bo **bo)
mtx_unlock(&dev->scratch_bos[index].construct_mtx);
*bo = &dev->scratch_bos[index].bo;
*bo = dev->scratch_bos[index].bo;
return VK_SUCCESS;
}
@ -2123,10 +2129,10 @@ tu_AllocateMemory(VkDevice _device,
if (result == VK_SUCCESS) {
mem_heap_used = p_atomic_add_return(&mem_heap->used, mem->bo.size);
mem_heap_used = p_atomic_add_return(&mem_heap->used, mem->bo->size);
if (mem_heap_used > mem_heap->size) {
p_atomic_add(&mem_heap->used, -mem->bo.size);
tu_bo_finish(device, &mem->bo);
p_atomic_add(&mem_heap->used, -mem->bo->size);
tu_bo_finish(device, mem->bo);
result = vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY,
"Out of heap memory");
}
@ -2153,8 +2159,8 @@ tu_FreeMemory(VkDevice _device,
if (mem == NULL)
return;
p_atomic_add(&device->physical_device->heap.used, -mem->bo.size);
tu_bo_finish(device, &mem->bo);
p_atomic_add(&device->physical_device->heap.used, -mem->bo->size);
tu_bo_finish(device, mem->bo);
vk_object_free(&device->vk, pAllocator, mem);
}
@ -2175,13 +2181,13 @@ tu_MapMemory(VkDevice _device,
return VK_SUCCESS;
}
if (!mem->bo.map) {
result = tu_bo_map(device, &mem->bo);
if (!mem->bo->map) {
result = tu_bo_map(device, mem->bo);
if (result != VK_SUCCESS)
return result;
}
*ppData = mem->bo.map + offset;
*ppData = mem->bo->map + offset;
return VK_SUCCESS;
}
@ -2292,8 +2298,8 @@ tu_BindBufferMemory2(VkDevice device,
TU_FROM_HANDLE(tu_buffer, buffer, pBindInfos[i].buffer);
if (mem) {
buffer->bo = &mem->bo;
buffer->iova = mem->bo.iova + pBindInfos[i].memoryOffset;
buffer->bo = mem->bo;
buffer->iova = mem->bo->iova + pBindInfos[i].memoryOffset;
} else {
buffer->bo = NULL;
}
@ -2311,8 +2317,8 @@ tu_BindImageMemory2(VkDevice device,
TU_FROM_HANDLE(tu_device_memory, mem, pBindInfos[i].memory);
if (mem) {
image->bo = &mem->bo;
image->iova = mem->bo.iova + pBindInfos[i].memoryOffset;
image->bo = mem->bo;
image->iova = mem->bo->iova + pBindInfos[i].memoryOffset;
} else {
image->bo = NULL;
image->iova = 0;
@ -2350,7 +2356,7 @@ tu_CreateEvent(VkDevice _device,
if (result != VK_SUCCESS)
goto fail_alloc;
result = tu_bo_map(device, &event->bo);
result = tu_bo_map(device, event->bo);
if (result != VK_SUCCESS)
goto fail_map;
@ -2359,7 +2365,7 @@ tu_CreateEvent(VkDevice _device,
return VK_SUCCESS;
fail_map:
tu_bo_finish(device, &event->bo);
tu_bo_finish(device, event->bo);
fail_alloc:
vk_object_free(&device->vk, pAllocator, event);
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
@ -2376,7 +2382,7 @@ tu_DestroyEvent(VkDevice _device,
if (!event)
return;
tu_bo_finish(device, &event->bo);
tu_bo_finish(device, event->bo);
vk_object_free(&device->vk, pAllocator, event);
}
@ -2385,7 +2391,7 @@ tu_GetEventStatus(VkDevice _device, VkEvent _event)
{
TU_FROM_HANDLE(tu_event, event, _event);
if (*(uint64_t*) event->bo.map == 1)
if (*(uint64_t*) event->bo->map == 1)
return VK_EVENT_SET;
return VK_EVENT_RESET;
}
@ -2394,7 +2400,7 @@ VKAPI_ATTR VkResult VKAPI_CALL
tu_SetEvent(VkDevice _device, VkEvent _event)
{
TU_FROM_HANDLE(tu_event, event, _event);
*(uint64_t*) event->bo.map = 1;
*(uint64_t*) event->bo->map = 1;
return VK_SUCCESS;
}
@ -2403,7 +2409,7 @@ VKAPI_ATTR VkResult VKAPI_CALL
tu_ResetEvent(VkDevice _device, VkEvent _event)
{
TU_FROM_HANDLE(tu_event, event, _event);
*(uint64_t*) event->bo.map = 0;
*(uint64_t*) event->bo->map = 0;
return VK_SUCCESS;
}
@ -2524,7 +2530,7 @@ tu_init_sampler(struct tu_device *device,
border_color = BITSET_FFS(device->custom_border_color);
BITSET_CLEAR(device->custom_border_color, border_color);
mtx_unlock(&device->mutex);
tu6_pack_border_color(device->global_bo.map + gb_offset(bcolor[border_color]),
tu6_pack_border_color(device->global_bo->map + gb_offset(bcolor[border_color]),
&custom_border_color->customBorderColor,
pCreateInfo->borderColor == VK_BORDER_COLOR_INT_CUSTOM_EXT);
border_color += TU_BORDER_COLOR_BUILTIN;
@ -2690,7 +2696,7 @@ tu_GetMemoryFdKHR(VkDevice _device,
pGetFdInfo->handleType ==
VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
int prime_fd = tu_bo_export_dmabuf(device, &memory->bo);
int prime_fd = tu_bo_export_dmabuf(device, memory->bo);
if (prime_fd < 0)
return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);

View File

@ -196,12 +196,6 @@ tu_bo_init(struct tu_device *dev,
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
}
*bo = (struct tu_bo) {
.gem_handle = gem_handle,
.size = size,
.iova = iova,
};
mtx_lock(&dev->bo_mutex);
uint32_t idx = dev->bo_count++;
@ -218,39 +212,32 @@ tu_bo_init(struct tu_device *dev,
dev->bo_list_size = new_len;
}
/* grow the "bo idx" list (maps gem handles to index in the bo list) */
if (bo->gem_handle >= dev->bo_idx_size) {
uint32_t new_len = bo->gem_handle + 256;
uint32_t *new_ptr =
vk_realloc(&dev->vk.alloc, dev->bo_idx, new_len * sizeof(*dev->bo_idx),
8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
if (!new_ptr)
goto fail_bo_idx;
dev->bo_idx = new_ptr;
dev->bo_idx_size = new_len;
}
dev->bo_idx[bo->gem_handle] = idx;
dev->bo_list[idx] = (struct drm_msm_gem_submit_bo) {
.flags = MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE |
COND(dump, MSM_SUBMIT_BO_DUMP),
.handle = gem_handle,
.presumed = iova,
};
*bo = (struct tu_bo) {
.gem_handle = gem_handle,
.size = size,
.iova = iova,
.refcnt = 1,
.bo_list_idx = idx,
};
mtx_unlock(&dev->bo_mutex);
return VK_SUCCESS;
fail_bo_idx:
vk_free(&dev->vk.alloc, dev->bo_list);
fail_bo_list:
tu_gem_close(dev, gem_handle);
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
VkResult
tu_bo_init_new(struct tu_device *dev, struct tu_bo *bo, uint64_t size,
tu_bo_init_new(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size,
enum tu_bo_alloc_flags flags)
{
/* TODO: Choose better flags. As of 2018-11-12, freedreno/drm/msm_bo.c
@ -269,12 +256,23 @@ tu_bo_init_new(struct tu_device *dev, struct tu_bo *bo, uint64_t size,
if (ret)
return vk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
return tu_bo_init(dev, bo, req.handle, size, flags & TU_BO_ALLOC_ALLOW_DUMP);
struct tu_bo* bo = tu_device_lookup_bo(dev, req.handle);
assert(bo && bo->gem_handle == 0);
VkResult result =
tu_bo_init(dev, bo, req.handle, size, flags & TU_BO_ALLOC_ALLOW_DUMP);
if (result != VK_SUCCESS)
memset(bo, 0, sizeof(*bo));
else
*out_bo = bo;
return result;
}
VkResult
tu_bo_init_dmabuf(struct tu_device *dev,
struct tu_bo *bo,
struct tu_bo **out_bo,
uint64_t size,
int prime_fd)
{
@ -284,13 +282,42 @@ tu_bo_init_dmabuf(struct tu_device *dev,
if (real_size < 0 || (uint64_t) real_size < size)
return vk_error(dev, VK_ERROR_INVALID_EXTERNAL_HANDLE);
/* Importing the same dmabuf several times would yield the same
* gem_handle. Thus there could be a race when destroying
* BO and importing the same dmabuf from different threads.
* We must not permit the creation of dmabuf BO and its release
* to happen in parallel.
*/
u_rwlock_wrlock(&dev->dma_bo_lock);
uint32_t gem_handle;
int ret = drmPrimeFDToHandle(dev->fd, prime_fd,
&gem_handle);
if (ret)
if (ret) {
u_rwlock_wrunlock(&dev->dma_bo_lock);
return vk_error(dev, VK_ERROR_INVALID_EXTERNAL_HANDLE);
}
return tu_bo_init(dev, bo, gem_handle, size, false);
struct tu_bo* bo = tu_device_lookup_bo(dev, gem_handle);
if (bo->refcnt != 0) {
p_atomic_inc(&bo->refcnt);
u_rwlock_wrunlock(&dev->dma_bo_lock);
*out_bo = bo;
return VK_SUCCESS;
}
VkResult result = tu_bo_init(dev, bo, gem_handle, size, false);
if (result != VK_SUCCESS)
memset(bo, 0, sizeof(*bo));
else
*out_bo = bo;
u_rwlock_wrunlock(&dev->dma_bo_lock);
return result;
}
int
@ -328,17 +355,35 @@ tu_bo_finish(struct tu_device *dev, struct tu_bo *bo)
{
assert(bo->gem_handle);
u_rwlock_rdlock(&dev->dma_bo_lock);
if (!p_atomic_dec_zero(&bo->refcnt)) {
u_rwlock_rdunlock(&dev->dma_bo_lock);
return;
}
if (bo->map)
munmap(bo->map, bo->size);
mtx_lock(&dev->bo_mutex);
uint32_t idx = dev->bo_idx[bo->gem_handle];
dev->bo_count--;
dev->bo_list[idx] = dev->bo_list[dev->bo_count];
dev->bo_idx[dev->bo_list[idx].handle] = idx;
dev->bo_list[bo->bo_list_idx] = dev->bo_list[dev->bo_count];
struct tu_bo* exchanging_bo = tu_device_lookup_bo(dev, dev->bo_list[bo->bo_list_idx].handle);
exchanging_bo->bo_list_idx = bo->bo_list_idx;
mtx_unlock(&dev->bo_mutex);
tu_gem_close(dev, bo->gem_handle);
/* Our BO structs are stored in a sparse array in the physical device,
* so we don't want to free the BO pointer, instead we want to reset it
* to 0, to signal that array entry as being free.
*/
uint32_t gem_handle = bo->gem_handle;
memset(bo, 0, sizeof(*bo));
tu_gem_close(dev, gem_handle);
u_rwlock_rdunlock(&dev->dma_bo_lock);
}
extern const struct vk_sync_type tu_timeline_sync_type;
@ -833,8 +878,7 @@ tu_fill_msm_gem_submit(struct tu_device *dev,
struct tu_cs_entry *cs_entry)
{
cmd->type = MSM_SUBMIT_CMD_BUF;
cmd->submit_idx =
dev->bo_idx[cs_entry->bo->gem_handle];
cmd->submit_idx = cs_entry->bo->bo_list_idx;
cmd->submit_offset = cs_entry->offset;
cmd->size = cs_entry->size;
cmd->pad = 0;

View File

@ -83,7 +83,7 @@ tu_drm_submitqueue_close(const struct tu_device *dev, uint32_t queue_id)
}
VkResult
tu_bo_init_new(struct tu_device *dev, struct tu_bo *bo, uint64_t size,
tu_bo_init_new(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size,
enum tu_bo_alloc_flags flags)
{
struct kgsl_gpumem_alloc_id req = {
@ -102,18 +102,23 @@ tu_bo_init_new(struct tu_device *dev, struct tu_bo *bo, uint64_t size,
"GPUMEM_ALLOC_ID failed (%s)", strerror(errno));
}
struct tu_bo* bo = tu_device_lookup_bo(dev, req.id);
assert(bo && bo->gem_handle == 0);
*bo = (struct tu_bo) {
.gem_handle = req.id,
.size = req.mmapsize,
.iova = req.gpuaddr,
};
*out_bo = bo;
return VK_SUCCESS;
}
VkResult
tu_bo_init_dmabuf(struct tu_device *dev,
struct tu_bo *bo,
struct tu_bo **out_bo,
uint64_t size,
int fd)
{
@ -144,12 +149,17 @@ tu_bo_init_dmabuf(struct tu_device *dev,
return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
"Failed to get dma-buf info (%s)\n", strerror(errno));
struct tu_bo* bo = tu_device_lookup_bo(dev, req.id);
assert(bo && bo->gem_handle == 0);
*bo = (struct tu_bo) {
.gem_handle = req.id,
.size = info_req.size,
.iova = info_req.gpuaddr,
};
*out_bo = bo;
return VK_SUCCESS;
}
@ -190,6 +200,9 @@ tu_bo_finish(struct tu_device *dev, struct tu_bo *bo)
.id = bo->gem_handle
};
/* Tell sparse array that entry is free */
memset(bo, 0, sizeof(*bo));
safe_ioctl(dev->physical_device->local_fd, IOCTL_KGSL_GPUMEM_FREE_ID, &req);
}

View File

@ -1610,11 +1610,11 @@ tu6_emit_geom_tess_consts(struct tu_cs *cs,
/* Create the shared tess factor BO the first time tess is used on the device. */
mtx_lock(&dev->mutex);
if (!dev->tess_bo.size)
if (!dev->tess_bo)
tu_bo_init_new(dev, &dev->tess_bo, TU_TESS_BO_SIZE, TU_BO_ALLOC_NO_FLAGS);
mtx_unlock(&dev->mutex);
uint64_t tess_factor_iova = dev->tess_bo.iova;
uint64_t tess_factor_iova = dev->tess_bo->iova;
uint64_t tess_param_iova = tess_factor_iova + TU_TESS_FACTOR_SIZE;
uint32_t hs_params[8] = {
@ -2215,7 +2215,7 @@ tu_setup_pvtmem(struct tu_device *dev,
if (result != VK_SUCCESS)
return result;
config->iova = pipeline->pvtmem_bo.iova;
config->iova = pipeline->pvtmem_bo->iova;
return result;
}
@ -3156,8 +3156,8 @@ tu_pipeline_finish(struct tu_pipeline *pipeline,
{
tu_cs_finish(&pipeline->cs);
if (pipeline->pvtmem_bo.size)
tu_bo_finish(dev, &pipeline->pvtmem_bo);
if (pipeline->pvtmem_bo)
tu_bo_finish(dev, pipeline->pvtmem_bo);
ralloc_free(pipeline->executables_mem_ctx);
}

View File

@ -51,6 +51,7 @@
#include "util/list.h"
#include "util/log.h"
#include "util/macros.h"
#include "util/sparse_array.h"
#include "util/u_atomic.h"
#include "util/u_dynarray.h"
#include "util/xmlconfig.h"
@ -356,6 +357,11 @@ struct tu_bo
uint64_t size;
uint64_t iova;
void *map;
#ifndef TU_USE_KGSL
int32_t refcnt;
uint32_t bo_list_idx;
#endif
};
enum global_shader {
@ -400,7 +406,7 @@ struct tu6_global
struct bcolor_entry bcolor_builtin[TU_BORDER_COLOR_BUILTIN], bcolor[];
};
#define gb_offset(member) offsetof(struct tu6_global, member)
#define global_iova(cmd, member) ((cmd)->device->global_bo.iova + gb_offset(member))
#define global_iova(cmd, member) ((cmd)->device->global_bo->iova + gb_offset(member))
/* extra space in vsc draw/prim streams */
#define VSC_PAD 0x40
@ -427,19 +433,19 @@ struct tu_device
* should be impossible to go beyond 48 bits.
*/
struct {
struct tu_bo bo;
struct tu_bo *bo;
mtx_t construct_mtx;
bool initialized;
} scratch_bos[48 - MIN_SCRATCH_BO_SIZE_LOG2];
struct tu_bo global_bo;
struct tu_bo *global_bo;
/* the blob seems to always use 8K factor and 128K param sizes, copy them */
#define TU_TESS_FACTOR_SIZE (8 * 1024)
#define TU_TESS_PARAM_SIZE (128 * 1024)
#define TU_TESS_BO_SIZE (TU_TESS_FACTOR_SIZE + TU_TESS_PARAM_SIZE)
/* Lazily allocated, protected by the device mutex. */
struct tu_bo tess_bo;
struct tu_bo *tess_bo;
struct ir3_shader_variant *global_shaders[GLOBAL_SH_COUNT];
uint64_t global_shader_va[GLOBAL_SH_COUNT];
@ -452,9 +458,27 @@ struct tu_device
/* bo list for submits: */
struct drm_msm_gem_submit_bo *bo_list;
/* map bo handles to bo list index: */
uint32_t *bo_idx;
uint32_t bo_count, bo_list_size, bo_idx_size;
uint32_t bo_count, bo_list_size;
mtx_t bo_mutex;
/* protects imported BOs creation/freeing */
struct u_rwlock dma_bo_lock;
/* This array holds all our 'struct tu_bo' allocations. We use this
* so we can add a refcount to our BOs and check if a particular BO
* was already allocated in this device using its GEM handle. This is
* necessary to properly manage BO imports, because the kernel doesn't
* refcount the underlying BO memory.
*
* Specifically, when self-importing (i.e. importing a BO into the same
* device that created it), the kernel will give us the same BO handle
* for both BOs and we must only free it once when both references are
* freed. Otherwise, if we are not self-importing, we get two different BO
* handles, and we want to free each one individually.
*
* The BOs in this map all have a refcnt with the reference counter and
* only self-imported BOs will ever have a refcnt > 1.
*/
struct util_sparse_array bo_map;
/* Command streams to set pass index to a scratch reg */
struct tu_cs *perfcntrs_pass_cs;
@ -506,11 +530,11 @@ enum tu_bo_alloc_flags
};
VkResult
tu_bo_init_new(struct tu_device *dev, struct tu_bo *bo, uint64_t size,
tu_bo_init_new(struct tu_device *dev, struct tu_bo **bo, uint64_t size,
enum tu_bo_alloc_flags flags);
VkResult
tu_bo_init_dmabuf(struct tu_device *dev,
struct tu_bo *bo,
struct tu_bo **bo,
uint64_t size,
int fd);
int
@ -520,6 +544,12 @@ tu_bo_finish(struct tu_device *dev, struct tu_bo *bo);
VkResult
tu_bo_map(struct tu_device *dev, struct tu_bo *bo);
static inline struct tu_bo *
tu_device_lookup_bo(struct tu_device *device, uint32_t handle)
{
return (struct tu_bo *) util_sparse_array_get(&device->bo_map, handle);
}
/* Get a scratch bo for use inside a command buffer. This will always return
* the same bo given the same size or similar sizes, so only one scratch bo
* can be used at the same time. It's meant for short-lived things where we
@ -650,7 +680,7 @@ struct tu_device_memory
{
struct vk_object_base base;
struct tu_bo bo;
struct tu_bo *bo;
};
struct tu_descriptor_range
@ -687,7 +717,7 @@ struct tu_descriptor_pool
{
struct vk_object_base base;
struct tu_bo bo;
struct tu_bo *bo;
uint64_t current_offset;
uint64_t size;
@ -1190,7 +1220,7 @@ tu_get_descriptors_state(struct tu_cmd_buffer *cmd_buffer,
struct tu_event
{
struct vk_object_base base;
struct tu_bo bo;
struct tu_bo *bo;
};
struct tu_push_constant_range
@ -1257,7 +1287,7 @@ struct tu_pipeline
struct tu_cs cs;
/* Separate BO for private memory since it should GPU writable */
struct tu_bo pvtmem_bo;
struct tu_bo *pvtmem_bo;
struct tu_pipeline_layout *layout;
@ -1729,7 +1759,7 @@ struct tu_query_pool
uint32_t stride;
uint64_t size;
uint32_t pipeline_statistics;
struct tu_bo bo;
struct tu_bo *bo;
/* For performance query */
const struct fd_perfcntr_group *perf_group;

View File

@ -111,13 +111,13 @@ struct PACKED perf_query_slot {
/* Returns the IOVA of a given uint64_t field in a given slot of a query
* pool. */
#define query_iova(type, pool, query, field) \
pool->bo.iova + pool->stride * (query) + offsetof(type, field)
pool->bo->iova + pool->stride * (query) + offsetof(type, field)
#define occlusion_query_iova(pool, query, field) \
query_iova(struct occlusion_query_slot, pool, query, field)
#define pipeline_stat_query_iova(pool, query, field) \
pool->bo.iova + pool->stride * (query) + \
pool->bo->iova + pool->stride * (query) + \
offsetof(struct pipeline_stat_query_slot, field)
#define primitive_query_iova(pool, query, field, i) \
@ -125,7 +125,7 @@ struct PACKED perf_query_slot {
offsetof(struct primitive_slot_value, values[i])
#define perf_query_iova(pool, query, field, i) \
pool->bo.iova + pool->stride * (query) + \
pool->bo->iova + pool->stride * (query) + \
sizeof(struct query_slot) + \
sizeof(struct perfcntr_query_slot) * (i) + \
offsetof(struct perfcntr_query_slot, field)
@ -134,11 +134,11 @@ struct PACKED perf_query_slot {
query_iova(struct query_slot, pool, query, available)
#define query_result_iova(pool, query, type, i) \
pool->bo.iova + pool->stride * (query) + \
pool->bo->iova + pool->stride * (query) + \
sizeof(struct query_slot) + sizeof(type) * (i)
#define query_result_addr(pool, query, type, i) \
pool->bo.map + pool->stride * (query) + \
pool->bo->map + pool->stride * (query) + \
sizeof(struct query_slot) + sizeof(type) * (i)
#define query_is_available(slot) slot->available
@ -185,7 +185,7 @@ fd_perfcntr_type_to_vk_storage[] = {
*/
static void* slot_address(struct tu_query_pool *pool, uint32_t query)
{
return (char*)pool->bo.map + query * pool->stride;
return (char*)pool->bo->map + query * pool->stride;
}
static void
@ -323,15 +323,15 @@ tu_CreateQueryPool(VkDevice _device,
return result;
}
result = tu_bo_map(device, &pool->bo);
result = tu_bo_map(device, pool->bo);
if (result != VK_SUCCESS) {
tu_bo_finish(device, &pool->bo);
tu_bo_finish(device, pool->bo);
vk_object_free(&device->vk, pAllocator, pool);
return result;
}
/* Initialize all query statuses to unavailable */
memset(pool->bo.map, 0, pool->bo.size);
memset(pool->bo->map, 0, pool->bo->size);
pool->type = pCreateInfo->queryType;
pool->stride = slot_size;
@ -353,7 +353,7 @@ tu_DestroyQueryPool(VkDevice _device,
if (!pool)
return;
tu_bo_finish(device, &pool->bo);
tu_bo_finish(device, pool->bo);
vk_object_free(&device->vk, pAllocator, pool);
}