/* * Copyright © 2020 Google, Inc. * SPDX-License-Identifier: MIT */ #include "tu_knl.h" #include #include #include #include #include #include #include "msm_kgsl.h" #include "vk_util.h" #include "util/u_debug.h" #include "util/u_vector.h" #include "util/libsync.h" #include "util/timespec.h" #include "tu_cmd_buffer.h" #include "tu_cs.h" #include "tu_device.h" #include "tu_dynamic_rendering.h" static int safe_ioctl(int fd, unsigned long request, void *arg) { int ret; do { ret = ioctl(fd, request, arg); } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); return ret; } static int kgsl_submitqueue_new(const struct tu_device *dev, int priority, uint32_t *queue_id) { struct kgsl_drawctxt_create req = { .flags = KGSL_CONTEXT_SAVE_GMEM | KGSL_CONTEXT_NO_GMEM_ALLOC | KGSL_CONTEXT_PREAMBLE, }; int ret = safe_ioctl(dev->physical_device->local_fd, IOCTL_KGSL_DRAWCTXT_CREATE, &req); if (ret) return ret; *queue_id = req.drawctxt_id; return 0; } static void kgsl_submitqueue_close(const struct tu_device *dev, uint32_t queue_id) { struct kgsl_drawctxt_destroy req = { .drawctxt_id = queue_id, }; safe_ioctl(dev->physical_device->local_fd, IOCTL_KGSL_DRAWCTXT_DESTROY, &req); } static VkResult kgsl_bo_init(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size, uint64_t client_iova, VkMemoryPropertyFlags mem_property, enum tu_bo_alloc_flags flags, const char *name) { assert(client_iova == 0); struct kgsl_gpumem_alloc_id req = { .size = size, }; if (mem_property & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) { if (mem_property & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) { req.flags |= KGSL_MEMFLAGS_IOCOHERENT; } req.flags |= KGSL_CACHEMODE_WRITEBACK << KGSL_CACHEMODE_SHIFT; } else { req.flags |= KGSL_CACHEMODE_WRITECOMBINE << KGSL_CACHEMODE_SHIFT; } if (flags & TU_BO_ALLOC_GPU_READ_ONLY) req.flags |= KGSL_MEMFLAGS_GPUREADONLY; int ret; ret = safe_ioctl(dev->physical_device->local_fd, IOCTL_KGSL_GPUMEM_ALLOC_ID, &req); if (ret) { return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY, "GPUMEM_ALLOC_ID failed (%s)", strerror(errno)); } struct tu_bo* bo = tu_device_lookup_bo(dev, req.id); assert(bo && bo->gem_handle == 0); *bo = (struct tu_bo) { .gem_handle = req.id, .size = req.mmapsize, .iova = req.gpuaddr, .name = tu_debug_bos_add(dev, req.mmapsize, name), .refcnt = 1, }; *out_bo = bo; return VK_SUCCESS; } static VkResult kgsl_bo_init_dmabuf(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size, int fd) { struct kgsl_gpuobj_import_dma_buf import_dmabuf = { .fd = fd, }; struct kgsl_gpuobj_import req = { .priv = (uintptr_t)&import_dmabuf, .priv_len = sizeof(import_dmabuf), .flags = 0, .type = KGSL_USER_MEM_TYPE_DMABUF, }; int ret; ret = safe_ioctl(dev->physical_device->local_fd, IOCTL_KGSL_GPUOBJ_IMPORT, &req); if (ret) return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY, "Failed to import dma-buf (%s)\n", strerror(errno)); struct kgsl_gpuobj_info info_req = { .id = req.id, }; ret = safe_ioctl(dev->physical_device->local_fd, IOCTL_KGSL_GPUOBJ_INFO, &info_req); if (ret) return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY, "Failed to get dma-buf info (%s)\n", strerror(errno)); struct tu_bo* bo = tu_device_lookup_bo(dev, req.id); assert(bo && bo->gem_handle == 0); *bo = (struct tu_bo) { .gem_handle = req.id, .size = info_req.size, .iova = info_req.gpuaddr, .name = tu_debug_bos_add(dev, info_req.size, "dmabuf"), .refcnt = 1, }; *out_bo = bo; return VK_SUCCESS; } static int kgsl_bo_export_dmabuf(struct tu_device *dev, struct tu_bo *bo) { tu_stub(); return -1; } static VkResult kgsl_bo_map(struct tu_device *dev, struct tu_bo *bo) { if (bo->map) return VK_SUCCESS; uint64_t offset = bo->gem_handle << 12; void *map = mmap(0, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED, dev->physical_device->local_fd, offset); if (map == MAP_FAILED) return vk_error(dev, VK_ERROR_MEMORY_MAP_FAILED); bo->map = map; return VK_SUCCESS; } static void kgsl_bo_allow_dump(struct tu_device *dev, struct tu_bo *bo) { } static void kgsl_bo_finish(struct tu_device *dev, struct tu_bo *bo) { assert(bo->gem_handle); if (!p_atomic_dec_zero(&bo->refcnt)) return; if (bo->map) munmap(bo->map, bo->size); struct kgsl_gpumem_free_id req = { .id = bo->gem_handle }; /* Tell sparse array that entry is free */ memset(bo, 0, sizeof(*bo)); safe_ioctl(dev->physical_device->local_fd, IOCTL_KGSL_GPUMEM_FREE_ID, &req); } static VkResult kgsl_sync_cache(VkDevice _device, uint32_t op, uint32_t count, const VkMappedMemoryRange *ranges) { TU_FROM_HANDLE(tu_device, device, _device); struct kgsl_gpuobj_sync_obj *sync_list = (struct kgsl_gpuobj_sync_obj *) vk_zalloc( &device->vk.alloc, sizeof(*sync_list), 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); struct kgsl_gpuobj_sync gpuobj_sync = { .objs = (uintptr_t) sync_list, .obj_len = sizeof(*sync_list), .count = count, }; for (uint32_t i = 0; i < count; i++) { TU_FROM_HANDLE(tu_device_memory, mem, ranges[i].memory); sync_list[i].op = op; sync_list[i].id = mem->bo->gem_handle; sync_list[i].offset = ranges[i].offset; sync_list[i].length = ranges[i].size == VK_WHOLE_SIZE ? (mem->bo->size - ranges[i].offset) : ranges[i].size; } /* There are two other KGSL ioctls for flushing/invalidation: * - IOCTL_KGSL_GPUMEM_SYNC_CACHE - processes one memory range at a time; * - IOCTL_KGSL_GPUMEM_SYNC_CACHE_BULK - processes several buffers but * not way to specify ranges. * * While IOCTL_KGSL_GPUOBJ_SYNC exactly maps to VK function. */ safe_ioctl(device->fd, IOCTL_KGSL_GPUOBJ_SYNC, &gpuobj_sync); vk_free(&device->vk.alloc, sync_list); return VK_SUCCESS; } VkResult tu_FlushMappedMemoryRanges(VkDevice device, uint32_t count, const VkMappedMemoryRange *ranges) { return kgsl_sync_cache(device, KGSL_GPUMEM_CACHE_TO_GPU, count, ranges); } VkResult tu_InvalidateMappedMemoryRanges(VkDevice device, uint32_t count, const VkMappedMemoryRange *ranges) { return kgsl_sync_cache(device, KGSL_GPUMEM_CACHE_FROM_GPU, count, ranges); } static VkResult get_kgsl_prop(int fd, unsigned int type, void *value, size_t size) { struct kgsl_device_getproperty getprop = { .type = type, .value = value, .sizebytes = size, }; return safe_ioctl(fd, IOCTL_KGSL_DEVICE_GETPROPERTY, &getprop) ? VK_ERROR_UNKNOWN : VK_SUCCESS; } static bool kgsl_is_memory_type_supported(int fd, uint32_t flags) { struct kgsl_gpumem_alloc_id req_alloc = { .flags = flags, .size = 0x1000, }; int ret = safe_ioctl(fd, IOCTL_KGSL_GPUMEM_ALLOC_ID, &req_alloc); if (ret) { return false; } struct kgsl_gpumem_free_id req_free = { .id = req_alloc.id }; safe_ioctl(fd, IOCTL_KGSL_GPUMEM_FREE_ID, &req_free); return true; } enum kgsl_syncobj_state { KGSL_SYNCOBJ_STATE_UNSIGNALED, KGSL_SYNCOBJ_STATE_SIGNALED, KGSL_SYNCOBJ_STATE_TS, KGSL_SYNCOBJ_STATE_FD, }; struct kgsl_syncobj { struct vk_object_base base; enum kgsl_syncobj_state state; struct tu_queue *queue; uint32_t timestamp; int fd; }; static void kgsl_syncobj_init(struct kgsl_syncobj *s, bool signaled) { s->state = signaled ? KGSL_SYNCOBJ_STATE_SIGNALED : KGSL_SYNCOBJ_STATE_UNSIGNALED; s->timestamp = UINT32_MAX; s->fd = -1; } static void kgsl_syncobj_reset(struct kgsl_syncobj *s) { if (s->state == KGSL_SYNCOBJ_STATE_FD && s->fd >= 0) { ASSERTED int ret = close(s->fd); assert(ret == 0); s->fd = -1; } else if (s->state == KGSL_SYNCOBJ_STATE_TS) { s->timestamp = UINT32_MAX; } s->state = KGSL_SYNCOBJ_STATE_UNSIGNALED; } static void kgsl_syncobj_destroy(struct kgsl_syncobj *s) { kgsl_syncobj_reset(s); } static int timestamp_to_fd(struct tu_queue *queue, uint32_t timestamp) { int fd; struct kgsl_timestamp_event event = { .type = KGSL_TIMESTAMP_EVENT_FENCE, .timestamp = timestamp, .context_id = queue->msm_queue_id, .priv = &fd, .len = sizeof(fd), }; int ret = safe_ioctl(queue->device->fd, IOCTL_KGSL_TIMESTAMP_EVENT, &event); if (ret) return -1; return fd; } static int kgsl_syncobj_ts_to_fd(const struct kgsl_syncobj *syncobj) { assert(syncobj->state == KGSL_SYNCOBJ_STATE_TS); return timestamp_to_fd(syncobj->queue, syncobj->timestamp); } /* return true if timestamp a is greater (more recent) then b * this relies on timestamps never having a difference > (1<<31) */ static inline bool timestamp_cmp(uint32_t a, uint32_t b) { return (int32_t) (a - b) >= 0; } static uint32_t max_ts(uint32_t a, uint32_t b) { return timestamp_cmp(a, b) ? a : b; } static uint32_t min_ts(uint32_t a, uint32_t b) { return timestamp_cmp(a, b) ? b : a; } static int get_relative_ms(uint64_t abs_timeout_ns) { if (abs_timeout_ns >= INT64_MAX) /* We can assume that a wait with a value this high is a forever wait * and return -1 here as it's the infinite timeout for ppoll() while * being the highest unsigned integer value for the wait KGSL IOCTL */ return -1; uint64_t cur_time_ms = os_time_get_nano() / 1000000; uint64_t abs_timeout_ms = abs_timeout_ns / 1000000; if (abs_timeout_ms <= cur_time_ms) return 0; return abs_timeout_ms - cur_time_ms; } /* safe_ioctl is not enough as restarted waits would not adjust the timeout * which could lead to waiting substantially longer than requested */ static int wait_timestamp_safe(int fd, unsigned int context_id, unsigned int timestamp, uint64_t abs_timeout_ns) { struct kgsl_device_waittimestamp_ctxtid wait = { .context_id = context_id, .timestamp = timestamp, .timeout = get_relative_ms(abs_timeout_ns), }; while (true) { int ret = ioctl(fd, IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID, &wait); if (ret == -1 && (errno == EINTR || errno == EAGAIN)) { int timeout_ms = get_relative_ms(abs_timeout_ns); /* update timeout to consider time that has passed since the start */ if (timeout_ms == 0) { errno = ETIME; return -1; } wait.timeout = timeout_ms; } else if (ret == -1 && errno == ETIMEDOUT) { /* The kernel returns ETIMEDOUT if the timeout is reached, but * we want to return ETIME instead. */ errno = ETIME; return -1; } else { return ret; } } } static VkResult kgsl_syncobj_wait(struct tu_device *device, struct kgsl_syncobj *s, uint64_t abs_timeout_ns) { if (s->state == KGSL_SYNCOBJ_STATE_UNSIGNALED) { /* If this syncobj is unsignaled we need to wait for it to resolve to a * valid syncobj prior to letting the rest of the wait continue, this * avoids needing kernel support for wait-before-signal semantics. */ if (abs_timeout_ns == 0) return VK_TIMEOUT; // If this is a simple poll then we can return early pthread_mutex_lock(&device->submit_mutex); struct timespec abstime; timespec_from_nsec(&abstime, abs_timeout_ns); while (s->state == KGSL_SYNCOBJ_STATE_UNSIGNALED) { int ret; if (abs_timeout_ns == UINT64_MAX) { ret = pthread_cond_wait(&device->timeline_cond, &device->submit_mutex); } else { ret = pthread_cond_timedwait(&device->timeline_cond, &device->submit_mutex, &abstime); } if (ret != 0) { assert(ret == ETIMEDOUT); pthread_mutex_unlock(&device->submit_mutex); return VK_TIMEOUT; } } pthread_mutex_unlock(&device->submit_mutex); } switch (s->state) { case KGSL_SYNCOBJ_STATE_SIGNALED: return VK_SUCCESS; case KGSL_SYNCOBJ_STATE_UNSIGNALED: return VK_TIMEOUT; case KGSL_SYNCOBJ_STATE_TS: { int ret = wait_timestamp_safe(device->fd, s->queue->msm_queue_id, s->timestamp, abs_timeout_ns); if (ret) { assert(errno == ETIME); return VK_TIMEOUT; } else { return VK_SUCCESS; } } case KGSL_SYNCOBJ_STATE_FD: { int ret = sync_wait(device->fd, get_relative_ms(abs_timeout_ns)); if (ret) { assert(errno == ETIME); return VK_TIMEOUT; } else { return VK_SUCCESS; } } default: unreachable("invalid syncobj state"); } } #define kgsl_syncobj_foreach_state(syncobjs, filter) \ for (uint32_t i = 0; sync = syncobjs[i], i < count; i++) \ if (sync->state == filter) static VkResult kgsl_syncobj_wait_any(struct tu_device* device, struct kgsl_syncobj **syncobjs, uint32_t count, uint64_t abs_timeout_ns) { if (count == 0) return VK_TIMEOUT; else if (count == 1) return kgsl_syncobj_wait(device, syncobjs[0], abs_timeout_ns); uint32_t num_fds = 0; struct tu_queue *queue = NULL; struct kgsl_syncobj *sync = NULL; /* Simple case, we already have a signal one */ kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_SIGNALED) return VK_SUCCESS; kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_FD) num_fds++; /* If we have TS from different queues we cannot compare them and would * have to convert them into FDs */ bool convert_ts_to_fd = false; kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_TS) { if (queue != NULL && sync->queue != queue) { convert_ts_to_fd = true; break; } queue = sync->queue; } /* If we have no FD nor TS syncobjs then we can return immediately */ if (num_fds == 0 && queue == NULL) return VK_TIMEOUT; VkResult result = VK_TIMEOUT; struct u_vector poll_fds = { 0 }; uint32_t lowest_timestamp = 0; if (convert_ts_to_fd || num_fds > 0) u_vector_init(&poll_fds, 4, sizeof(struct pollfd)); if (convert_ts_to_fd) { kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_TS) { struct pollfd *poll_fd = (struct pollfd *) u_vector_add(&poll_fds); poll_fd->fd = timestamp_to_fd(sync->queue, sync->timestamp); poll_fd->events = POLLIN; } } else { /* TSs could be merged by finding the one with the lowest timestamp */ bool first_ts = true; kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_TS) { if (first_ts || timestamp_cmp(sync->timestamp, lowest_timestamp)) { first_ts = false; lowest_timestamp = sync->timestamp; } } if (num_fds) { struct pollfd *poll_fd = (struct pollfd *) u_vector_add(&poll_fds); poll_fd->fd = timestamp_to_fd(queue, lowest_timestamp); poll_fd->events = POLLIN; } } if (num_fds) { kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_FD) { struct pollfd *poll_fd = (struct pollfd *) u_vector_add(&poll_fds); poll_fd->fd = sync->fd; poll_fd->events = POLLIN; } } if (u_vector_length(&poll_fds) == 0) { int ret = wait_timestamp_safe(device->fd, queue->msm_queue_id, lowest_timestamp, MIN2(abs_timeout_ns, INT64_MAX)); if (ret) { assert(errno == ETIME); result = VK_TIMEOUT; } else { result = VK_SUCCESS; } } else { int ret, i; struct pollfd *fds = (struct pollfd *) poll_fds.data; uint32_t fds_count = u_vector_length(&poll_fds); do { ret = poll(fds, fds_count, get_relative_ms(abs_timeout_ns)); if (ret > 0) { for (i = 0; i < fds_count; i++) { if (fds[i].revents & (POLLERR | POLLNVAL)) { errno = EINVAL; ret = -1; break; } } break; } else if (ret == 0) { errno = ETIME; break; } } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); for (uint32_t i = 0; i < fds_count - num_fds; i++) close(fds[i].fd); if (ret != 0) { assert(errno == ETIME); result = VK_TIMEOUT; } else { result = VK_SUCCESS; } } u_vector_finish(&poll_fds); return result; } static VkResult kgsl_syncobj_export(struct kgsl_syncobj *s, int *pFd) { if (!pFd) return VK_SUCCESS; switch (s->state) { case KGSL_SYNCOBJ_STATE_SIGNALED: case KGSL_SYNCOBJ_STATE_UNSIGNALED: /* Getting a sync FD from an unsignaled syncobj is UB in Vulkan */ *pFd = -1; return VK_SUCCESS; case KGSL_SYNCOBJ_STATE_FD: if (s->fd < 0) *pFd = -1; else *pFd = dup(s->fd); return VK_SUCCESS; case KGSL_SYNCOBJ_STATE_TS: *pFd = kgsl_syncobj_ts_to_fd(s); return VK_SUCCESS; default: unreachable("Invalid syncobj state"); } } static VkResult kgsl_syncobj_import(struct kgsl_syncobj *s, int fd) { kgsl_syncobj_reset(s); if (fd >= 0) { s->state = KGSL_SYNCOBJ_STATE_FD; s->fd = fd; } else { s->state = KGSL_SYNCOBJ_STATE_SIGNALED; } return VK_SUCCESS; } static int sync_merge_close(const char *name, int fd1, int fd2, bool close_fd2) { int fd = sync_merge(name, fd1, fd2); if (fd < 0) return -1; close(fd1); if (close_fd2) close(fd2); return fd; } /* Merges multiple kgsl_syncobjs into a single one which is only signalled * after all submitted syncobjs are signalled */ static struct kgsl_syncobj kgsl_syncobj_merge(const struct kgsl_syncobj **syncobjs, uint32_t count) { struct kgsl_syncobj ret; kgsl_syncobj_init(&ret, true); if (count == 0) return ret; for (uint32_t i = 0; i < count; ++i) { const struct kgsl_syncobj *sync = syncobjs[i]; switch (sync->state) { case KGSL_SYNCOBJ_STATE_SIGNALED: break; case KGSL_SYNCOBJ_STATE_UNSIGNALED: kgsl_syncobj_reset(&ret); return ret; case KGSL_SYNCOBJ_STATE_TS: if (ret.state == KGSL_SYNCOBJ_STATE_TS) { if (ret.queue == sync->queue) { ret.timestamp = max_ts(ret.timestamp, sync->timestamp); } else { ret.state = KGSL_SYNCOBJ_STATE_FD; int sync_fd = kgsl_syncobj_ts_to_fd(sync); ret.fd = sync_merge_close("tu_sync", ret.fd, sync_fd, true); assert(ret.fd >= 0); } } else if (ret.state == KGSL_SYNCOBJ_STATE_FD) { int sync_fd = kgsl_syncobj_ts_to_fd(sync); ret.fd = sync_merge_close("tu_sync", ret.fd, sync_fd, true); assert(ret.fd >= 0); } else { ret = *sync; } break; case KGSL_SYNCOBJ_STATE_FD: if (ret.state == KGSL_SYNCOBJ_STATE_FD) { ret.fd = sync_merge_close("tu_sync", ret.fd, sync->fd, false); assert(ret.fd >= 0); } else if (ret.state == KGSL_SYNCOBJ_STATE_TS) { ret.state = KGSL_SYNCOBJ_STATE_FD; int sync_fd = kgsl_syncobj_ts_to_fd(sync); ret.fd = sync_merge_close("tu_sync", ret.fd, sync_fd, true); assert(ret.fd >= 0); } else { ret = *sync; ret.fd = dup(ret.fd); assert(ret.fd >= 0); } break; default: unreachable("invalid syncobj state"); } } return ret; } struct vk_kgsl_syncobj { struct vk_sync vk; struct kgsl_syncobj syncobj; }; static VkResult vk_kgsl_sync_init(struct vk_device *device, struct vk_sync *sync, uint64_t initial_value) { struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk); kgsl_syncobj_init(&s->syncobj, initial_value != 0); return VK_SUCCESS; } static void vk_kgsl_sync_finish(struct vk_device *device, struct vk_sync *sync) { struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk); kgsl_syncobj_destroy(&s->syncobj); } static VkResult vk_kgsl_sync_reset(struct vk_device *device, struct vk_sync *sync) { struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk); kgsl_syncobj_reset(&s->syncobj); return VK_SUCCESS; } static VkResult vk_kgsl_sync_move(struct vk_device *device, struct vk_sync *dst, struct vk_sync *src) { struct vk_kgsl_syncobj *d = container_of(dst, struct vk_kgsl_syncobj, vk); struct vk_kgsl_syncobj *s = container_of(src, struct vk_kgsl_syncobj, vk); kgsl_syncobj_reset(&d->syncobj); d->syncobj = s->syncobj; kgsl_syncobj_init(&s->syncobj, false); return VK_SUCCESS; } static VkResult vk_kgsl_sync_wait(struct vk_device *_device, struct vk_sync *sync, uint64_t wait_value, enum vk_sync_wait_flags wait_flags, uint64_t abs_timeout_ns) { struct tu_device *device = container_of(_device, struct tu_device, vk); struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk); if (wait_flags & VK_SYNC_WAIT_PENDING) return VK_SUCCESS; return kgsl_syncobj_wait(device, &s->syncobj, abs_timeout_ns); } static VkResult vk_kgsl_sync_wait_many(struct vk_device *_device, uint32_t wait_count, const struct vk_sync_wait *waits, enum vk_sync_wait_flags wait_flags, uint64_t abs_timeout_ns) { struct tu_device *device = container_of(_device, struct tu_device, vk); if (wait_flags & VK_SYNC_WAIT_PENDING) return VK_SUCCESS; if (wait_flags & VK_SYNC_WAIT_ANY) { struct kgsl_syncobj *syncobjs[wait_count]; for (uint32_t i = 0; i < wait_count; i++) { syncobjs[i] = &container_of(waits[i].sync, struct vk_kgsl_syncobj, vk)->syncobj; } return kgsl_syncobj_wait_any(device, syncobjs, wait_count, abs_timeout_ns); } else { for (uint32_t i = 0; i < wait_count; i++) { struct vk_kgsl_syncobj *s = container_of(waits[i].sync, struct vk_kgsl_syncobj, vk); VkResult result = kgsl_syncobj_wait(device, &s->syncobj, abs_timeout_ns); if (result != VK_SUCCESS) return result; } return VK_SUCCESS; } } static VkResult vk_kgsl_sync_import_sync_file(struct vk_device *device, struct vk_sync *sync, int fd) { struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk); if (fd >= 0) { fd = dup(fd); if (fd < 0) { mesa_loge("vk_kgsl_sync_import_sync_file: dup failed: %s", strerror(errno)); return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); } } return kgsl_syncobj_import(&s->syncobj, fd); } static VkResult vk_kgsl_sync_export_sync_file(struct vk_device *device, struct vk_sync *sync, int *pFd) { struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk); return kgsl_syncobj_export(&s->syncobj, pFd); } const struct vk_sync_type vk_kgsl_sync_type = { .size = sizeof(struct vk_kgsl_syncobj), .features = (enum vk_sync_features) (VK_SYNC_FEATURE_BINARY | VK_SYNC_FEATURE_GPU_WAIT | VK_SYNC_FEATURE_GPU_MULTI_WAIT | VK_SYNC_FEATURE_CPU_WAIT | VK_SYNC_FEATURE_CPU_RESET | VK_SYNC_FEATURE_WAIT_ANY | VK_SYNC_FEATURE_WAIT_PENDING), .init = vk_kgsl_sync_init, .finish = vk_kgsl_sync_finish, .reset = vk_kgsl_sync_reset, .move = vk_kgsl_sync_move, .wait = vk_kgsl_sync_wait, .wait_many = vk_kgsl_sync_wait_many, .import_sync_file = vk_kgsl_sync_import_sync_file, .export_sync_file = vk_kgsl_sync_export_sync_file, }; static VkResult kgsl_queue_submit(struct tu_queue *queue, struct vk_queue_submit *vk_submit) { MESA_TRACE_FUNC(); if (vk_submit->command_buffer_count == 0) { pthread_mutex_lock(&queue->device->submit_mutex); const struct kgsl_syncobj *wait_semaphores[vk_submit->wait_count + 1]; for (uint32_t i = 0; i < vk_submit->wait_count; i++) { wait_semaphores[i] = &container_of(vk_submit->waits[i].sync, struct vk_kgsl_syncobj, vk) ->syncobj; } struct kgsl_syncobj last_submit_sync; if (queue->last_submit_timestamp >= 0) last_submit_sync = (struct kgsl_syncobj) { .state = KGSL_SYNCOBJ_STATE_TS, .queue = queue, .timestamp = queue->last_submit_timestamp, }; else last_submit_sync = (struct kgsl_syncobj) { .state = KGSL_SYNCOBJ_STATE_SIGNALED, }; wait_semaphores[vk_submit->wait_count] = &last_submit_sync; struct kgsl_syncobj wait_sync = kgsl_syncobj_merge(wait_semaphores, vk_submit->wait_count + 1); assert(wait_sync.state != KGSL_SYNCOBJ_STATE_UNSIGNALED); // Would wait forever for (uint32_t i = 0; i < vk_submit->signal_count; i++) { struct kgsl_syncobj *signal_sync = &container_of(vk_submit->signals[i].sync, struct vk_kgsl_syncobj, vk) ->syncobj; kgsl_syncobj_reset(signal_sync); *signal_sync = wait_sync; } pthread_mutex_unlock(&queue->device->submit_mutex); pthread_cond_broadcast(&queue->device->timeline_cond); return VK_SUCCESS; } uint32_t perf_pass_index = queue->device->perfcntrs_pass_cs ? vk_submit->perf_pass_index : ~0; if (TU_DEBUG(LOG_SKIP_GMEM_OPS)) tu_dbg_log_gmem_load_store_skips(queue->device); VkResult result = VK_SUCCESS; pthread_mutex_lock(&queue->device->submit_mutex); struct tu_cmd_buffer **cmd_buffers = (struct tu_cmd_buffer **) vk_submit->command_buffers; static_assert(offsetof(struct tu_cmd_buffer, vk) == 0, "vk must be first member of tu_cmd_buffer"); uint32_t cmdbuf_count = vk_submit->command_buffer_count; result = tu_insert_dynamic_cmdbufs(queue->device, &cmd_buffers, &cmdbuf_count); if (result != VK_SUCCESS) { pthread_mutex_unlock(&queue->device->submit_mutex); return result; } uint32_t entry_count = 0; for (uint32_t i = 0; i < cmdbuf_count; ++i) { struct tu_cmd_buffer *cmd_buffer = cmd_buffers[i]; if (perf_pass_index != ~0) entry_count++; entry_count += cmd_buffer->cs.entry_count; } if (tu_autotune_submit_requires_fence(cmd_buffers, cmdbuf_count)) entry_count++; struct kgsl_command_object *cmds = (struct kgsl_command_object *) vk_alloc(&queue->device->vk.alloc, sizeof(*cmds) * entry_count, alignof(*cmds), VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); if (cmds == NULL) { pthread_mutex_unlock(&queue->device->submit_mutex); return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY); } uint32_t entry_idx = 0; for (uint32_t i = 0; i < cmdbuf_count; i++) { struct tu_cmd_buffer *cmd_buffer = cmd_buffers[i]; struct tu_cs *cs = &cmd_buffer->cs; if (perf_pass_index != ~0) { struct tu_cs_entry *perf_cs_entry = &cmd_buffer->device->perfcntrs_pass_cs_entries[perf_pass_index]; cmds[entry_idx++] = (struct kgsl_command_object) { .gpuaddr = perf_cs_entry->bo->iova + perf_cs_entry->offset, .size = perf_cs_entry->size, .flags = KGSL_CMDLIST_IB, .id = perf_cs_entry->bo->gem_handle, }; } for (uint32_t j = 0; j < cs->entry_count; j++) { cmds[entry_idx++] = (struct kgsl_command_object) { .gpuaddr = cs->entries[j].bo->iova + cs->entries[j].offset, .size = cs->entries[j].size, .flags = KGSL_CMDLIST_IB, .id = cs->entries[j].bo->gem_handle, }; } } if (tu_autotune_submit_requires_fence(cmd_buffers, cmdbuf_count)) { struct tu_cs *autotune_cs = tu_autotune_on_submit( queue->device, &queue->device->autotune, cmd_buffers, cmdbuf_count); cmds[entry_idx++] = (struct kgsl_command_object) { .gpuaddr = autotune_cs->entries[0].bo->iova + autotune_cs->entries[0].offset, .size = autotune_cs->entries[0].size, .flags = KGSL_CMDLIST_IB, .id = autotune_cs->entries[0].bo->gem_handle, }; } const struct kgsl_syncobj *wait_semaphores[vk_submit->wait_count]; for (uint32_t i = 0; i < vk_submit->wait_count; i++) { wait_semaphores[i] = &container_of(vk_submit->waits[i].sync, struct vk_kgsl_syncobj, vk) ->syncobj; } struct kgsl_syncobj wait_sync = kgsl_syncobj_merge(wait_semaphores, vk_submit->wait_count); assert(wait_sync.state != KGSL_SYNCOBJ_STATE_UNSIGNALED); // Would wait forever struct kgsl_cmd_syncpoint_timestamp ts; struct kgsl_cmd_syncpoint_fence fn; struct kgsl_command_syncpoint sync = { 0 }; bool has_sync = false; switch (wait_sync.state) { case KGSL_SYNCOBJ_STATE_SIGNALED: break; case KGSL_SYNCOBJ_STATE_TS: ts.context_id = wait_sync.queue->msm_queue_id; ts.timestamp = wait_sync.timestamp; has_sync = true; sync.type = KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP; sync.priv = (uintptr_t) &ts; sync.size = sizeof(ts); break; case KGSL_SYNCOBJ_STATE_FD: fn.fd = wait_sync.fd; has_sync = true; sync.type = KGSL_CMD_SYNCPOINT_TYPE_FENCE; sync.priv = (uintptr_t) &fn; sync.size = sizeof(fn); break; default: unreachable("invalid syncobj state"); } struct kgsl_gpu_command req = { .flags = KGSL_CMDBATCH_SUBMIT_IB_LIST, .cmdlist = (uintptr_t) cmds, .cmdsize = sizeof(struct kgsl_command_object), .numcmds = entry_idx, .synclist = (uintptr_t) &sync, .syncsize = sizeof(sync), .numsyncs = has_sync != 0 ? 1 : 0, .context_id = queue->msm_queue_id, }; int ret = safe_ioctl(queue->device->physical_device->local_fd, IOCTL_KGSL_GPU_COMMAND, &req); kgsl_syncobj_destroy(&wait_sync); if (ret) { result = vk_device_set_lost(&queue->device->vk, "submit failed: %s\n", strerror(errno)); pthread_mutex_unlock(&queue->device->submit_mutex); return result; } queue->last_submit_timestamp = req.timestamp; for (uint32_t i = 0; i < vk_submit->signal_count; i++) { struct kgsl_syncobj *signal_sync = &container_of(vk_submit->signals[i].sync, struct vk_kgsl_syncobj, vk) ->syncobj; kgsl_syncobj_reset(signal_sync); signal_sync->state = KGSL_SYNCOBJ_STATE_TS; signal_sync->queue = queue; signal_sync->timestamp = req.timestamp; } pthread_mutex_unlock(&queue->device->submit_mutex); pthread_cond_broadcast(&queue->device->timeline_cond); return result; } static VkResult kgsl_device_wait_u_trace(struct tu_device *dev, struct tu_u_trace_syncobj *syncobj) { tu_finishme("tu_device_wait_u_trace"); return VK_SUCCESS; } static int kgsl_device_get_gpu_timestamp(struct tu_device *dev, uint64_t *ts) { tu_finishme("tu_device_get_gpu_timestamp"); return 0; } static int kgsl_device_get_suspend_count(struct tu_device *dev, uint64_t *suspend_count) { /* kgsl doesn't have a way to get it */ *suspend_count = 0; return 0; } static VkResult kgsl_device_check_status(struct tu_device *device) { for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) { for (unsigned q = 0; q < device->queue_count[i]; q++) { /* KGSL's KGSL_PROP_GPU_RESET_STAT takes the u32 msm_queue_id and returns a * KGSL_CTX_STAT_* for the worst reset that happened since the last time it * was queried on that queue. */ uint32_t value = device->queues[i][q].msm_queue_id; VkResult status = get_kgsl_prop(device->fd, KGSL_PROP_GPU_RESET_STAT, &value, sizeof(value)); if (status != VK_SUCCESS) return vk_device_set_lost(&device->vk, "Failed to get GPU reset status"); if (value != KGSL_CTX_STAT_NO_ERROR && value != KGSL_CTX_STAT_INNOCENT_CONTEXT_RESET_EXT) { return vk_device_set_lost(&device->vk, "GPU faulted or hung"); } } } return VK_SUCCESS; } static const struct tu_knl kgsl_knl_funcs = { .name = "kgsl", .device_get_gpu_timestamp = kgsl_device_get_gpu_timestamp, .device_get_suspend_count = kgsl_device_get_suspend_count, .device_check_status = kgsl_device_check_status, .submitqueue_new = kgsl_submitqueue_new, .submitqueue_close = kgsl_submitqueue_close, .bo_init = kgsl_bo_init, .bo_init_dmabuf = kgsl_bo_init_dmabuf, .bo_export_dmabuf = kgsl_bo_export_dmabuf, .bo_map = kgsl_bo_map, .bo_allow_dump = kgsl_bo_allow_dump, .bo_finish = kgsl_bo_finish, .device_wait_u_trace = kgsl_device_wait_u_trace, .queue_submit = kgsl_queue_submit, }; VkResult tu_knl_kgsl_load(struct tu_instance *instance, int fd) { if (instance->vk.enabled_extensions.KHR_display) { return vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "I can't KHR_display"); } struct tu_physical_device *device = (struct tu_physical_device *) vk_zalloc(&instance->vk.alloc, sizeof(*device), 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); if (!device) { close(fd); return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY); } VkResult result = VK_ERROR_INITIALIZATION_FAILED; struct kgsl_devinfo info; if (get_kgsl_prop(fd, KGSL_PROP_DEVICE_INFO, &info, sizeof(info))) goto fail; uint64_t gmem_iova; if (get_kgsl_prop(fd, KGSL_PROP_UCHE_GMEM_VADDR, &gmem_iova, sizeof(gmem_iova))) goto fail; /* kgsl version check? */ device->instance = instance; device->master_fd = -1; device->local_fd = fd; device->dev_id.gpu_id = ((info.chip_id >> 24) & 0xff) * 100 + ((info.chip_id >> 16) & 0xff) * 10 + ((info.chip_id >> 8) & 0xff); device->dev_id.chip_id = info.chip_id; device->gmem_size = debug_get_num_option("TU_GMEM", info.gmem_sizebytes); device->gmem_base = gmem_iova; device->submitqueue_priority_count = 1; device->timeline_type = vk_sync_timeline_get_type(&vk_kgsl_sync_type); device->sync_types[0] = &vk_kgsl_sync_type; device->sync_types[1] = &device->timeline_type.sync; device->sync_types[2] = NULL; device->heap.size = tu_get_system_heap_size(); device->heap.used = 0u; device->heap.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT; /* Even if kernel is new enough, the GPU itself may not support it. */ device->has_cached_coherent_memory = kgsl_is_memory_type_supported( fd, KGSL_MEMFLAGS_IOCOHERENT | (KGSL_CACHEMODE_WRITEBACK << KGSL_CACHEMODE_SHIFT)); device->has_cached_non_coherent_memory = true; instance->knl = &kgsl_knl_funcs; result = tu_physical_device_init(device, instance); if (result != VK_SUCCESS) goto fail; list_addtail(&device->vk.link, &instance->vk.physical_devices.list); return VK_SUCCESS; fail: vk_free(&instance->vk.alloc, device); close(fd); return result; }