diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c index 21cead1df7b..4aefa58ea1e 100644 --- a/src/intel/vulkan/anv_batch_chain.c +++ b/src/intel/vulkan/anv_batch_chain.c @@ -1091,6 +1091,8 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, struct anv_execbuf { struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences; + struct drm_i915_gem_exec_object2 * objects; uint32_t bo_count; struct anv_bo ** bos; @@ -1119,6 +1121,24 @@ anv_execbuf_finish(struct anv_execbuf *exec) vk_free(exec->alloc, exec->bos); } +static void +anv_execbuf_add_ext(struct anv_execbuf *exec, + uint32_t ext_name, + struct i915_user_extension *ext) +{ + __u64 *iter = &exec->execbuf.cliprects_ptr; + + exec->execbuf.flags |= I915_EXEC_USE_EXTENSIONS; + + while (*iter != 0) { + iter = (__u64 *) &((struct i915_user_extension *)(uintptr_t)*iter)->next_extension; + } + + ext->name = ext_name; + + *iter = (uintptr_t) ext; +} + static VkResult anv_execbuf_add_bo_bitset(struct anv_device *device, struct anv_execbuf *exec, @@ -1754,18 +1774,30 @@ anv_queue_execbuf_locked(struct anv_queue *queue, if (submit->fence_count > 0) { assert(device->physical->has_syncobj); - execbuf.execbuf.flags |= I915_EXEC_FENCE_ARRAY; - execbuf.execbuf.num_cliprects = submit->fence_count; - execbuf.execbuf.cliprects_ptr = (uintptr_t)submit->fences; + if (device->has_thread_submit) { + execbuf.timeline_fences.fence_count = submit->fence_count; + execbuf.timeline_fences.handles_ptr = (uintptr_t)submit->fences; + execbuf.timeline_fences.values_ptr = (uintptr_t)submit->fence_values; + anv_execbuf_add_ext(&execbuf, + DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES, + &execbuf.timeline_fences.base); + } else { + execbuf.execbuf.flags |= I915_EXEC_FENCE_ARRAY; + execbuf.execbuf.num_cliprects = submit->fence_count; + execbuf.execbuf.cliprects_ptr = (uintptr_t)submit->fences; + } } if (submit->in_fence != -1) { + assert(!device->has_thread_submit); execbuf.execbuf.flags |= I915_EXEC_FENCE_IN; execbuf.execbuf.rsvd2 |= (uint32_t)submit->in_fence; } - if (submit->need_out_fence) + if (submit->need_out_fence) { + assert(!device->has_thread_submit); execbuf.execbuf.flags |= I915_EXEC_FENCE_OUT; + } if (has_perf_query) { struct anv_query_pool *query_pool = submit->cmd_buffer->perf_query_pool; diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 55d079e133f..be4d1909d2e 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -460,6 +460,9 @@ anv_physical_device_try_create(struct anv_instance *instance, if (env_var_as_boolean("ANV_QUEUE_THREAD_DISABLE", false)) device->has_exec_timeline = false; + device->has_thread_submit = + device->has_syncobj_wait_available && device->has_exec_timeline; + device->always_use_bindless = env_var_as_boolean("ANV_ALWAYS_BINDLESS", false); @@ -2821,6 +2824,8 @@ VkResult anv_CreateDevice( goto fail_fd; } + device->has_thread_submit = physical_device->has_thread_submit; + result = anv_queue_init(device, &device->queue); if (result != VK_SUCCESS) goto fail_context_id; @@ -3111,12 +3116,12 @@ void anv_DestroyDevice( if (!device) return; + anv_queue_finish(&device->queue); + anv_device_finish_blorp(device); anv_pipeline_cache_finish(&device->default_pipeline_cache); - anv_queue_finish(&device->queue); - #ifdef HAVE_VALGRIND /* We only need to free these to prevent valgrind errors. The backing * BO will go away in a couple of lines so we don't actually leak. @@ -3228,6 +3233,22 @@ void anv_GetDeviceQueue2( *pQueue = NULL; } +void +_anv_device_report_lost(struct anv_device *device) +{ + assert(p_atomic_read(&device->_lost) > 0); + + device->lost_reported = true; + + struct anv_queue *queue = &device->queue; + + __vk_errorf(device->physical->instance, device, + VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT, + VK_ERROR_DEVICE_LOST, + queue->error_file, queue->error_line, + "%s", queue->error_msg); +} + VkResult _anv_device_set_lost(struct anv_device *device, const char *file, int line, @@ -3236,7 +3257,11 @@ _anv_device_set_lost(struct anv_device *device, VkResult err; va_list ap; + if (p_atomic_read(&device->_lost) > 0) + return VK_ERROR_DEVICE_LOST; + p_atomic_inc(&device->_lost); + device->lost_reported = true; va_start(ap, msg); err = __vk_errorv(device->physical->instance, device, @@ -3252,24 +3277,29 @@ _anv_device_set_lost(struct anv_device *device, VkResult _anv_queue_set_lost(struct anv_queue *queue, - const char *file, int line, - const char *msg, ...) + const char *file, int line, + const char *msg, ...) { - VkResult err; va_list ap; - p_atomic_inc(&queue->device->_lost); + if (queue->lost) + return VK_ERROR_DEVICE_LOST; + queue->lost = true; + + queue->error_file = file; + queue->error_line = line; va_start(ap, msg); - err = __vk_errorv(queue->device->physical->instance, queue->device, - VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT, - VK_ERROR_DEVICE_LOST, file, line, msg, ap); + vsnprintf(queue->error_msg, sizeof(queue->error_msg), + msg, ap); va_end(ap); + p_atomic_inc(&queue->device->_lost); + if (env_var_as_boolean("ANV_ABORT_ON_DEVICE_LOSS", false)) abort(); - return err; + return VK_ERROR_DEVICE_LOST; } VkResult diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 54b1c730791..46f64cfd8b2 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1082,6 +1082,7 @@ struct anv_physical_device { bool has_syncobj_wait_available; bool has_context_priority; bool has_context_isolation; + bool has_thread_submit; bool has_mem_available; bool has_mmap_offset; uint64_t gtt_size; @@ -1183,6 +1184,7 @@ struct anv_queue_submit { uint32_t fence_count; uint32_t fence_array_length; struct drm_i915_gem_exec_fence * fences; + uint64_t * fence_values; uint32_t temporary_semaphore_count; uint32_t temporary_semaphore_array_length; @@ -1194,7 +1196,10 @@ struct anv_queue_submit { uint32_t sync_fd_semaphore_array_length; /* Allocated only with non shareable timelines. */ - struct anv_timeline ** wait_timelines; + union { + struct anv_timeline ** wait_timelines; + uint32_t * wait_timeline_syncobjs; + }; uint32_t wait_timeline_count; uint32_t wait_timeline_array_length; uint64_t * wait_timeline_values; @@ -1229,14 +1234,34 @@ struct anv_queue_submit { struct anv_queue { struct vk_object_base base; - struct anv_device * device; + struct anv_device * device; - /* - * A list of struct anv_queue_submit to be submitted to i915. - */ - struct list_head queued_submits; + VkDeviceQueueCreateFlags flags; - VkDeviceQueueCreateFlags flags; + /* Set once from the device api calls. */ + bool lost_signaled; + + /* Only set once atomically by the queue */ + int lost; + int error_line; + const char * error_file; + char error_msg[80]; + + /* + * This mutext protects the variables below. + */ + pthread_mutex_t mutex; + + pthread_t thread; + pthread_cond_t cond; + + /* + * A list of struct anv_queue_submit to be submitted to i915. + */ + struct list_head queued_submits; + + /* Set to true to stop the submission thread */ + bool quit; }; struct anv_pipeline_cache { @@ -1330,6 +1355,7 @@ struct anv_device { int fd; bool can_chain_batches; bool robust_buffer_access; + bool has_thread_submit; struct anv_device_extension_table enabled_extensions; struct anv_device_dispatch_table dispatch; @@ -1382,6 +1408,7 @@ struct anv_device { pthread_mutex_t mutex; pthread_cond_t queue_submit; int _lost; + int lost_reported; struct gen_batch_decode_ctx decoder_ctx; /* @@ -1439,7 +1466,7 @@ anv_mocs_for_bo(const struct anv_device *device, const struct anv_bo *bo) void anv_device_init_blorp(struct anv_device *device); void anv_device_finish_blorp(struct anv_device *device); -void _anv_device_set_all_queue_lost(struct anv_device *device); +void _anv_device_report_lost(struct anv_device *device); VkResult _anv_device_set_lost(struct anv_device *device, const char *file, int line, const char *msg, ...) @@ -1451,12 +1478,17 @@ VkResult _anv_queue_set_lost(struct anv_queue *queue, #define anv_device_set_lost(dev, ...) \ _anv_device_set_lost(dev, __FILE__, __LINE__, __VA_ARGS__) #define anv_queue_set_lost(queue, ...) \ - _anv_queue_set_lost(queue, __FILE__, __LINE__, __VA_ARGS__) + (queue)->device->has_thread_submit ? \ + _anv_queue_set_lost(queue, __FILE__, __LINE__, __VA_ARGS__) : \ + _anv_device_set_lost(queue->device, __FILE__, __LINE__, __VA_ARGS__) static inline bool anv_device_is_lost(struct anv_device *device) { - return unlikely(p_atomic_read(&device->_lost)); + int lost = p_atomic_read(&device->_lost); + if (unlikely(lost && !device->lost_reported)) + _anv_device_report_lost(device); + return lost; } VkResult anv_device_query_status(struct anv_device *device); @@ -3176,6 +3208,7 @@ enum anv_semaphore_type { ANV_SEMAPHORE_TYPE_SYNC_FILE, ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ, ANV_SEMAPHORE_TYPE_TIMELINE, + ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE, }; struct anv_timeline_point { diff --git a/src/intel/vulkan/anv_queue.c b/src/intel/vulkan/anv_queue.c index 25646d07f1a..fdf10f2c012 100644 --- a/src/intel/vulkan/anv_queue.c +++ b/src/intel/vulkan/anv_queue.c @@ -95,11 +95,16 @@ anv_queue_submit_free(struct anv_device *device, for (uint32_t i = 0; i < submit->sync_fd_semaphore_count; i++) anv_semaphore_unref(device, submit->sync_fd_semaphores[i]); /* Execbuf does not consume the in_fence. It's our job to close it. */ - if (submit->in_fence != -1) + if (submit->in_fence != -1) { + assert(!device->has_thread_submit); close(submit->in_fence); - if (submit->out_fence != -1) + } + if (submit->out_fence != -1) { + assert(!device->has_thread_submit); close(submit->out_fence); + } vk_free(alloc, submit->fences); + vk_free(alloc, submit->fence_values); vk_free(alloc, submit->temporary_semaphores); vk_free(alloc, submit->wait_timelines); vk_free(alloc, submit->wait_timeline_values); @@ -349,6 +354,98 @@ anv_device_submit_deferred_locked(struct anv_device *device) return anv_queue_submit_deferred_locked(&device->queue, &advance); } +static void +anv_queue_submit_signal_fences(struct anv_device *device, + struct anv_queue_submit *submit) +{ + for (uint32_t i = 0; i < submit->fence_count; i++) { + if (submit->fences[i].flags & I915_EXEC_FENCE_SIGNAL) { + anv_gem_syncobj_timeline_signal(device, &submit->fences[i].handle, + &submit->fence_values[i], 1); + } + } +} + +static void * +anv_queue_task(void *_queue) +{ + struct anv_queue *queue = _queue; + + pthread_mutex_lock(&queue->mutex); + + while (!queue->quit) { + while (!list_is_empty(&queue->queued_submits)) { + struct anv_queue_submit *submit = + list_first_entry(&queue->queued_submits, struct anv_queue_submit, link); + list_del(&submit->link); + + pthread_mutex_unlock(&queue->mutex); + + VkResult result = VK_ERROR_DEVICE_LOST; + + /* Wait for timeline points to materialize before submitting. We need + * to do this because we're using threads to do the submit to i915. + * We could end up in a situation where the application submits to 2 + * queues with the first submit creating the dma-fence for the + * second. But because the scheduling of the submission threads might + * wakeup the second queue thread first, this would make that execbuf + * fail because the dma-fence it depends on hasn't materialized yet. + */ + if (!queue->lost && submit->wait_timeline_count > 0) { + int ret = queue->device->no_hw ? 0 : + anv_gem_syncobj_timeline_wait( + queue->device, submit->wait_timeline_syncobjs, + submit->wait_timeline_values, submit->wait_timeline_count, + anv_get_absolute_timeout(UINT64_MAX) /* wait forever */, + true /* wait for all */, true /* wait for materialize */); + if (ret) { + result = anv_queue_set_lost(queue, "timeline timeout: %s", + strerror(errno)); + } + } + + /* Now submit */ + if (!queue->lost) { + pthread_mutex_lock(&queue->device->mutex); + result = anv_queue_execbuf_locked(queue, submit); + pthread_mutex_unlock(&queue->device->mutex); + } + + for (uint32_t i = 0; i < submit->sync_fd_semaphore_count; i++) { + struct anv_semaphore *semaphore = submit->sync_fd_semaphores[i]; + /* Out fences can't have temporary state because that would imply + * that we imported a sync file and are trying to signal it. + */ + assert(semaphore->temporary.type == ANV_SEMAPHORE_TYPE_NONE); + struct anv_semaphore_impl *impl = &semaphore->permanent; + + assert(impl->type == ANV_SEMAPHORE_TYPE_SYNC_FILE); + impl->fd = dup(submit->out_fence); + } + + if (result != VK_SUCCESS) { + /* vkQueueSubmit or some other entry point will report the + * DEVICE_LOST error at some point, but until we have emptied our + * list of execbufs we need to wake up all potential the waiters + * until one of them spots the error. + */ + anv_queue_submit_signal_fences(queue->device, submit); + } + + anv_queue_submit_free(queue->device, submit); + + pthread_mutex_lock(&queue->mutex); + } + + if (!queue->quit) + pthread_cond_wait(&queue->cond, &queue->mutex); + } + + pthread_mutex_unlock(&queue->mutex); + + return NULL; +} + static VkResult _anv_queue_submit(struct anv_queue *queue, struct anv_queue_submit **_submit, bool flush_queue) @@ -360,42 +457,92 @@ _anv_queue_submit(struct anv_queue *queue, struct anv_queue_submit **_submit, * anv_queue. */ *_submit = NULL; + if (queue->device->has_thread_submit) { + pthread_mutex_lock(&queue->mutex); + pthread_cond_broadcast(&queue->cond); + list_addtail(&submit->link, &queue->queued_submits); + pthread_mutex_unlock(&queue->mutex); + return VK_SUCCESS; + } else { + pthread_mutex_lock(&queue->device->mutex); + list_addtail(&submit->link, &queue->queued_submits); + VkResult result = anv_device_submit_deferred_locked(queue->device); + if (flush_queue) { + while (result == VK_SUCCESS && !list_is_empty(&queue->queued_submits)) { + int ret = pthread_cond_wait(&queue->device->queue_submit, + &queue->device->mutex); + if (ret != 0) { + result = anv_device_set_lost(queue->device, "wait timeout"); + break; + } - pthread_mutex_lock(&queue->device->mutex); - list_addtail(&submit->link, &queue->queued_submits); - VkResult result = anv_device_submit_deferred_locked(queue->device); - if (flush_queue) { - while (result == VK_SUCCESS && !list_is_empty(&queue->queued_submits)) { - int ret = pthread_cond_wait(&queue->device->queue_submit, - &queue->device->mutex); - if (ret != 0) { - result = anv_device_set_lost(queue->device, "wait timeout"); - break; + result = anv_device_submit_deferred_locked(queue->device); } - - result = anv_device_submit_deferred_locked(queue->device); } + pthread_mutex_unlock(&queue->device->mutex); + return result; } - pthread_mutex_unlock(&queue->device->mutex); - return result; } VkResult anv_queue_init(struct anv_device *device, struct anv_queue *queue) { - vk_object_base_init(&device->vk, &queue->base, VK_OBJECT_TYPE_QUEUE); + VkResult result; + queue->device = device; queue->flags = 0; + queue->lost = false; + queue->quit = false; list_inithead(&queue->queued_submits); + /* We only need those additional thread/mutex when using a thread for + * submission. + */ + if (device->has_thread_submit) { + if (pthread_mutex_init(&queue->mutex, NULL) != 0) + return vk_error(VK_ERROR_INITIALIZATION_FAILED); + + if (pthread_cond_init(&queue->cond, NULL) != 0) { + result = vk_error(VK_ERROR_INITIALIZATION_FAILED); + goto fail_mutex; + } + if (pthread_create(&queue->thread, NULL, anv_queue_task, queue)) { + result = vk_error(VK_ERROR_INITIALIZATION_FAILED); + goto fail_cond; + } + } + + vk_object_base_init(&device->vk, &queue->base, VK_OBJECT_TYPE_QUEUE); + return VK_SUCCESS; + + fail_cond: + pthread_cond_destroy(&queue->cond); + fail_mutex: + pthread_mutex_destroy(&queue->mutex); + + return result; } void anv_queue_finish(struct anv_queue *queue) { vk_object_base_finish(&queue->base); + + if (!queue->device->has_thread_submit) + return; + + pthread_mutex_lock(&queue->mutex); + pthread_cond_broadcast(&queue->cond); + queue->quit = true; + pthread_mutex_unlock(&queue->mutex); + + void *ret; + pthread_join(queue->thread, &ret); + + pthread_cond_destroy(&queue->cond); + pthread_mutex_destroy(&queue->mutex); } static VkResult @@ -427,10 +574,42 @@ anv_queue_submit_add_fence_bo(struct anv_queue_submit *submit, static VkResult anv_queue_submit_add_syncobj(struct anv_queue_submit* submit, struct anv_device *device, - uint32_t handle, uint32_t flags) + uint32_t handle, uint32_t flags, + uint64_t value) { assert(flags != 0); + if (device->has_thread_submit && (flags & I915_EXEC_FENCE_WAIT)) { + if (submit->wait_timeline_count >= submit->wait_timeline_array_length) { + uint32_t new_len = MAX2(submit->wait_timeline_array_length * 2, 64); + + uint32_t *new_wait_timeline_syncobjs = + vk_realloc(submit->alloc, + submit->wait_timeline_syncobjs, + new_len * sizeof(*submit->wait_timeline_syncobjs), + 8, submit->alloc_scope); + if (new_wait_timeline_syncobjs == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + submit->wait_timeline_syncobjs = new_wait_timeline_syncobjs; + + uint64_t *new_wait_timeline_values = + vk_realloc(submit->alloc, + submit->wait_timeline_values, new_len * sizeof(*submit->wait_timeline_values), + 8, submit->alloc_scope); + if (new_wait_timeline_values == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + submit->wait_timeline_values = new_wait_timeline_values; + submit->wait_timeline_array_length = new_len; + } + + submit->wait_timeline_syncobjs[submit->wait_timeline_count] = handle; + submit->wait_timeline_values[submit->wait_timeline_count] = value; + + submit->wait_timeline_count++; + } + if (submit->fence_count >= submit->fence_array_length) { uint32_t new_len = MAX2(submit->fence_array_length * 2, 64); struct drm_i915_gem_exec_fence *new_fences = @@ -441,13 +620,24 @@ anv_queue_submit_add_syncobj(struct anv_queue_submit* submit, return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); submit->fences = new_fences; + + uint64_t *new_fence_values = + vk_realloc(submit->alloc, + submit->fence_values, new_len * sizeof(*submit->fence_values), + 8, submit->alloc_scope); + if (new_fence_values == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + submit->fence_values = new_fence_values; submit->fence_array_length = new_len; } - submit->fences[submit->fence_count++] = (struct drm_i915_gem_exec_fence) { + submit->fences[submit->fence_count] = (struct drm_i915_gem_exec_fence) { .handle = handle, .flags = flags, }; + submit->fence_values[submit->fence_count] = value; + submit->fence_count++; return VK_SUCCESS; } @@ -595,7 +785,7 @@ anv_queue_submit_simple_batch(struct anv_queue *queue, } result = anv_queue_submit_add_syncobj(submit, device, syncobj, - I915_EXEC_FENCE_SIGNAL); + I915_EXEC_FENCE_SIGNAL, 0); } else { result = anv_device_alloc_bo(device, 4096, ANV_BO_ALLOC_EXTERNAL | @@ -742,7 +932,6 @@ anv_queue_submit(struct anv_queue *queue, submit->cmd_buffer = cmd_buffer; VkResult result = VK_SUCCESS; - for (uint32_t i = 0; i < num_in_semaphores; i++) { ANV_FROM_HANDLE(anv_semaphore, semaphore, in_semaphores[i]); struct anv_semaphore_impl *impl; @@ -796,7 +985,8 @@ anv_queue_submit(struct anv_queue *queue, case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ: { result = anv_queue_submit_add_syncobj(submit, device, impl->syncobj, - I915_EXEC_FENCE_WAIT); + I915_EXEC_FENCE_WAIT, + 0); if (result != VK_SUCCESS) goto error; break; @@ -810,6 +1000,15 @@ anv_queue_submit(struct anv_queue *queue, goto error; break; + case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE: + result = anv_queue_submit_add_syncobj(submit, device, + impl->syncobj, + I915_EXEC_FENCE_WAIT, + in_values ? in_values[i] : 0); + if (result != VK_SUCCESS) + goto error; + break; + default: break; } @@ -850,7 +1049,8 @@ anv_queue_submit(struct anv_queue *queue, case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ: { result = anv_queue_submit_add_syncobj(submit, device, impl->syncobj, - I915_EXEC_FENCE_SIGNAL); + I915_EXEC_FENCE_SIGNAL, + 0); if (result != VK_SUCCESS) goto error; break; @@ -864,6 +1064,14 @@ anv_queue_submit(struct anv_queue *queue, goto error; break; + case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE: + result = anv_queue_submit_add_syncobj(submit, device, impl->syncobj, + I915_EXEC_FENCE_SIGNAL, + out_values ? out_values[i] : 0); + if (result != VK_SUCCESS) + goto error; + break; + default: break; } @@ -893,6 +1101,7 @@ anv_queue_submit(struct anv_queue *queue, switch (impl->type) { case ANV_FENCE_TYPE_BO: + assert(!device->has_thread_submit); result = anv_queue_submit_add_fence_bo(submit, impl->bo.bo, true /* signal */); if (result != VK_SUCCESS) goto error; @@ -904,8 +1113,11 @@ anv_queue_submit(struct anv_queue *queue, * also reset the fence's syncobj so that they don't contain a * signaled dma-fence. */ + anv_gem_syncobj_reset(device, impl->syncobj); + result = anv_queue_submit_add_syncobj(submit, device, impl->syncobj, - I915_EXEC_FENCE_SIGNAL); + I915_EXEC_FENCE_SIGNAL, + 0); if (result != VK_SUCCESS) goto error; break; @@ -921,6 +1133,7 @@ anv_queue_submit(struct anv_queue *queue, goto error; if (fence && fence->permanent.type == ANV_FENCE_TYPE_BO) { + assert(!device->has_thread_submit); /* If we have permanent BO fence, the only type of temporary possible * would be BO_WSI (because BO fences are not shareable). The Vulkan spec * also requires that the fence passed to vkQueueSubmit() be : @@ -1291,16 +1504,34 @@ VkResult anv_GetFenceStatus( } case ANV_FENCE_TYPE_SYNCOBJ: { - int ret = anv_gem_syncobj_wait(device, &impl->syncobj, 1, 0, true); - if (ret == -1) { - if (errno == ETIME) { - return VK_NOT_READY; + if (device->has_thread_submit) { + uint64_t binary_value = 0; + int ret = anv_gem_syncobj_timeline_wait(device, &impl->syncobj, + &binary_value, 1, 0, + true /* wait_all */, + false /* wait_materialize */); + if (ret == -1) { + if (errno == ETIME) { + return VK_NOT_READY; + } else { + /* We don't know the real error. */ + return anv_device_set_lost(device, "drm_syncobj_wait failed: %m"); + } } else { - /* We don't know the real error. */ - return anv_device_set_lost(device, "drm_syncobj_wait failed: %m"); + return VK_SUCCESS; } } else { - return VK_SUCCESS; + int ret = anv_gem_syncobj_wait(device, &impl->syncobj, 1, 0, false); + if (ret == -1) { + if (errno == ETIME) { + return VK_NOT_READY; + } else { + /* We don't know the real error. */ + return anv_device_set_lost(device, "drm_syncobj_wait failed: %m"); + } + } else { + return VK_SUCCESS; + } } } @@ -1334,11 +1565,11 @@ anv_wait_for_syncobj_fences(struct anv_device *device, syncobjs[i] = impl->syncobj; } + int ret = 0; /* The gem_syncobj_wait ioctl may return early due to an inherent - * limitation in the way it computes timeouts. Loop until we've actually + * limitation in the way it computes timeouts. Loop until we've actually * passed the timeout. */ - int ret; do { ret = anv_gem_syncobj_wait(device, syncobjs, fenceCount, abs_timeout_ns, waitAll); @@ -1496,6 +1727,8 @@ anv_wait_for_fences(struct anv_device *device, switch (impl->type) { case ANV_FENCE_TYPE_BO: + assert(!device->physical->has_syncobj_wait); + /* fall-through */ case ANV_FENCE_TYPE_WSI_BO: result = anv_wait_for_bo_fences(device, 1, &pFences[i], true, abs_timeout); @@ -1695,6 +1928,31 @@ VkResult anv_ImportFenceFdKHR( return VK_SUCCESS; } +/* The sideband payload of the DRM syncobj was incremented when the + * application called vkQueueSubmit(). Here we wait for a fence with the same + * value to materialize so that we can exporting (typically as a SyncFD). + */ +static VkResult +wait_syncobj_materialize(struct anv_device *device, + uint32_t syncobj, + int *fd) +{ + if (!device->has_thread_submit) + return VK_SUCCESS; + + uint64_t binary_value = 0; + /* We might need to wait until the fence materializes before we can + * export to a sync FD when we use a thread for submission. + */ + if (anv_gem_syncobj_timeline_wait(device, &syncobj, &binary_value, 1, + anv_get_absolute_timeout(5ull * NSEC_PER_SEC), + true /* wait_all */, + true /* wait_materialize */)) + return anv_device_set_lost(device, "anv_gem_syncobj_timeline_wait failed: %m"); + + return VK_SUCCESS; +} + VkResult anv_GetFenceFdKHR( VkDevice _device, const VkFenceGetFdInfoKHR* pGetFdInfo, @@ -1721,6 +1979,10 @@ VkResult anv_GetFenceFdKHR( } case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT: { + VkResult result = wait_syncobj_materialize(device, impl->syncobj, pFd); + if (result != VK_SUCCESS) + return result; + int fd = anv_gem_syncobj_export_sync_file(device, impl->syncobj); if (fd < 0) return vk_error(VK_ERROR_TOO_MANY_OBJECTS); @@ -1794,8 +2056,24 @@ timeline_semaphore_create(struct anv_device *device, struct anv_semaphore_impl *impl, uint64_t initial_value) { - impl->type = ANV_SEMAPHORE_TYPE_TIMELINE; - anv_timeline_init(device, &impl->timeline, initial_value); + if (device->has_thread_submit) { + impl->type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE; + impl->syncobj = anv_gem_syncobj_create(device, 0); + if (!impl->syncobj) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + if (initial_value) { + if (anv_gem_syncobj_timeline_signal(device, + &impl->syncobj, + &initial_value, 1)) { + anv_gem_syncobj_destroy(device, impl->syncobj); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + } + } else { + impl->type = ANV_SEMAPHORE_TYPE_TIMELINE; + anv_timeline_init(device, &impl->timeline, initial_value); + } + return VK_SUCCESS; } @@ -1824,7 +2102,7 @@ VkResult anv_CreateSemaphore( const VkExportSemaphoreCreateInfo *export = vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO); - VkExternalSemaphoreHandleTypeFlags handleTypes = + VkExternalSemaphoreHandleTypeFlags handleTypes = export ? export->handleTypes : 0; VkResult result; @@ -1839,8 +2117,10 @@ VkResult anv_CreateSemaphore( } } else if (handleTypes & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) { assert(handleTypes == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT); - assert(sem_type == VK_SEMAPHORE_TYPE_BINARY_KHR); - result = binary_semaphore_create(device, &semaphore->permanent, true); + if (sem_type == VK_SEMAPHORE_TYPE_BINARY_KHR) + result = binary_semaphore_create(device, &semaphore->permanent, true); + else + result = timeline_semaphore_create(device, &semaphore->permanent, timeline_value); if (result != VK_SUCCESS) { vk_free2(&device->vk.alloc, pAllocator, semaphore); return result; @@ -1897,6 +2177,7 @@ anv_semaphore_impl_cleanup(struct anv_device *device, break; case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ: + case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE: anv_gem_syncobj_destroy(device, impl->syncobj); break; @@ -1964,8 +2245,10 @@ void anv_GetPhysicalDeviceExternalSemaphoreProperties( switch (pExternalSemaphoreInfo->handleType) { case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT: - /* Timeline semaphores are not exportable. */ - if (sem_type == VK_SEMAPHORE_TYPE_TIMELINE_KHR) + /* Timeline semaphores are not exportable, unless we have threaded + * submission. + */ + if (sem_type == VK_SEMAPHORE_TYPE_TIMELINE_KHR && !device->has_thread_submit) break; pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT; @@ -2014,7 +2297,15 @@ VkResult anv_ImportSemaphoreFdKHR( switch (pImportSemaphoreFdInfo->handleType) { case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT: if (device->physical->has_syncobj) { - new_impl.type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ; + /* When importing non temporarily, reuse the semaphore's existing + * type. The Linux/DRM implementation allows to interchangeably use + * binary & timeline semaphores and we have no way to differenciate + * them. + */ + if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT) + new_impl.type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ; + else + new_impl.type = semaphore->permanent.type; new_impl.syncobj = anv_gem_syncobj_fd_to_handle(device, fd); if (!new_impl.syncobj) @@ -2168,9 +2459,13 @@ VkResult anv_GetSemaphoreFdKHR( } case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ: - if (pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT) + if (pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT) { + VkResult result = wait_syncobj_materialize(device, impl->syncobj, pFd); + if (result != VK_SUCCESS) + return result; + fd = anv_gem_syncobj_export_sync_file(device, impl->syncobj); - else { + } else { assert(pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT); fd = anv_gem_syncobj_handle_to_fd(device, impl->syncobj); } @@ -2179,6 +2474,14 @@ VkResult anv_GetSemaphoreFdKHR( *pFd = fd; break; + case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE: + assert(pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT); + fd = anv_gem_syncobj_handle_to_fd(device, impl->syncobj); + if (fd < 0) + return vk_error(VK_ERROR_TOO_MANY_OBJECTS); + *pFd = fd; + break; + default: return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE); } @@ -2217,6 +2520,15 @@ VkResult anv_GetSemaphoreCounterValue( return VK_SUCCESS; } + case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE: { + int ret = anv_gem_syncobj_timeline_query(device, &impl->syncobj, pValue, 1); + + if (ret != 0) + return anv_device_set_lost(device, "unable to query timeline syncobj"); + + return VK_SUCCESS; + } + default: unreachable("Invalid semaphore type"); } @@ -2236,8 +2548,8 @@ anv_timeline_wait_locked(struct anv_device *device, .tv_nsec = abs_timeout_ns % NSEC_PER_SEC, }; - int ret = pthread_cond_timedwait(&device->queue_submit, - &device->mutex, &abstime); + UNUSED int ret = pthread_cond_timedwait(&device->queue_submit, + &device->mutex, &abstime); assert(ret != EINVAL); if (anv_gettime_ns() >= abs_timeout_ns && timeline->highest_pending < serial) @@ -2336,25 +2648,23 @@ VkResult anv_WaitSemaphores( uint64_t timeout) { ANV_FROM_HANDLE(anv_device, device, _device); + uint32_t *handles; + struct anv_timeline **timelines; + uint64_t *values; - if (device->no_hw) - return VK_SUCCESS; + ANV_MULTIALLOC(ma); - struct anv_timeline **timelines = - vk_alloc(&device->vk.alloc, - pWaitInfo->semaphoreCount * sizeof(*timelines), - 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); - if (!timelines) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - uint64_t *values = vk_alloc(&device->vk.alloc, - pWaitInfo->semaphoreCount * sizeof(*values), - 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); - if (!values) { - vk_free(&device->vk.alloc, timelines); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + anv_multialloc_add(&ma, &values, pWaitInfo->semaphoreCount); + if (device->has_thread_submit) { + anv_multialloc_add(&ma, &handles, pWaitInfo->semaphoreCount); + } else { + anv_multialloc_add(&ma, &timelines, pWaitInfo->semaphoreCount); } + if (!anv_multialloc_alloc(&ma, &device->vk.alloc, + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND)) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + uint32_t handle_count = 0; for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; i++) { ANV_FROM_HANDLE(anv_semaphore, semaphore, pWaitInfo->pSemaphores[i]); @@ -2362,24 +2672,40 @@ VkResult anv_WaitSemaphores( semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ? &semaphore->temporary : &semaphore->permanent; - assert(impl->type == ANV_SEMAPHORE_TYPE_TIMELINE); - if (pWaitInfo->pValues[i] == 0) continue; - timelines[handle_count] = &impl->timeline; + if (device->has_thread_submit) { + assert(impl->type == ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE); + handles[handle_count] = impl->syncobj; + } else { + assert(impl->type == ANV_SEMAPHORE_TYPE_TIMELINE); + timelines[handle_count] = &impl->timeline; + } values[handle_count] = pWaitInfo->pValues[i]; handle_count++; } VkResult result = VK_SUCCESS; if (handle_count > 0) { - result = anv_timelines_wait(device, timelines, values, handle_count, - !(pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR), - anv_get_absolute_timeout(timeout)); + if (device->has_thread_submit) { + int ret = + anv_gem_syncobj_timeline_wait(device, + handles, values, handle_count, + anv_get_absolute_timeout(timeout), + !(pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR), + false); + if (ret != 0) + result = errno == ETIME ? VK_TIMEOUT : + anv_device_set_lost(device, "unable to wait on timeline syncobj"); + } else { + result = + anv_timelines_wait(device, timelines, values, handle_count, + !(pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR), + anv_get_absolute_timeout(timeout)); + } } - vk_free(&device->vk.alloc, timelines); vk_free(&device->vk.alloc, values); return result; @@ -2414,6 +2740,20 @@ VkResult anv_SignalSemaphore( return result; } + case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE: { + /* Timeline semaphores are created with a value of 0, so signaling on 0 + * is a waste of time. + */ + if (pSignalInfo->value == 0) + return VK_SUCCESS; + + int ret = anv_gem_syncobj_timeline_signal(device, &impl->syncobj, + &pSignalInfo->value, 1); + + return ret == 0 ? VK_SUCCESS : + anv_device_set_lost(device, "unable to signal timeline syncobj"); + } + default: unreachable("Invalid semaphore type"); } diff --git a/src/intel/vulkan/anv_wsi.c b/src/intel/vulkan/anv_wsi.c index 75bf4feadd3..cbe5bb02914 100644 --- a/src/intel/vulkan/anv_wsi.c +++ b/src/intel/vulkan/anv_wsi.c @@ -299,10 +299,62 @@ VkResult anv_QueuePresentKHR( } } - return wsi_common_queue_present(&queue->device->physical->wsi_device, - anv_device_to_handle(queue->device), - _queue, 0, - pPresentInfo); + if (device->has_thread_submit && + pPresentInfo->waitSemaphoreCount > 0) { + /* Make sure all of the dependency semaphores have materialized when + * using a threaded submission. + */ + uint32_t *syncobjs = vk_alloc(&device->vk.alloc, + sizeof(*syncobjs) * pPresentInfo->waitSemaphoreCount, 8, + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + + if (!syncobjs) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + uint32_t wait_count = 0; + for (uint32_t i = 0; i < pPresentInfo->waitSemaphoreCount; i++) { + ANV_FROM_HANDLE(anv_semaphore, semaphore, pPresentInfo->pWaitSemaphores[i]); + struct anv_semaphore_impl *impl = + semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ? + &semaphore->temporary : &semaphore->permanent; + + if (impl->type == ANV_SEMAPHORE_TYPE_DUMMY) + continue; + assert(impl->type == ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ); + syncobjs[wait_count++] = impl->syncobj; + } + + int ret = 0; + if (wait_count > 0) { + ret = + anv_gem_syncobj_wait(device, syncobjs, wait_count, + anv_get_absolute_timeout(INT64_MAX), + true /* wait_all */); + } + + vk_free(&device->vk.alloc, syncobjs); + + if (ret) + return vk_error(VK_ERROR_DEVICE_LOST); + } + + VkResult result = wsi_common_queue_present(&device->physical->wsi_device, + anv_device_to_handle(queue->device), + _queue, 0, + pPresentInfo); + + for (uint32_t i = 0; i < pPresentInfo->waitSemaphoreCount; i++) { + ANV_FROM_HANDLE(anv_semaphore, semaphore, pPresentInfo->pWaitSemaphores[i]); + /* From the Vulkan 1.0.53 spec: + * + * "If the import is temporary, the implementation must restore the + * semaphore to its prior permanent state after submitting the next + * semaphore wait operation." + */ + anv_semaphore_reset_temporary(queue->device, semaphore); + } + + return result; } VkResult anv_GetDeviceGroupPresentCapabilitiesKHR(