vulkan: Add common implementations of vkQueueSubmit and vkQueueWaitIdle
This adds a new vk_queue_submit object which contains a list of command buffers as well as wait and signal operations along with a driver hook which takes a vk_queue and a vk_queue_submit and does the actual submit. The common code then handles spawning a submit thread if needed, waiting for timeline points to materialize, dealing with timeline semaphore emulation via vk_timeline, etc. All the driver sees are vk_queue.submit calls with fully materialized vk_sync objects which it can wait on unconditionally. This implementation takes a page from RADV's book and only ever spawns the submit thread if it sees a timeline wait on a time point that has not yet materialized. If this never happens, it calls vk_queue.submit directly from vkQueueSubmit() and the thread is never spawned. One other nicety of the new framework is that there is no longer a distinction, from the driver's PoV, between fences and semaphores. The fence, if any, is included as just one more signal operation on the final vk_queue_submit in the batch. Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Acked-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13427>
This commit is contained in:
parent
673b5e97ec
commit
9bffd81f1c
|
@ -193,6 +193,10 @@ Core Mesa environment variables
|
|||
causes the Vulkan driver to call abort() immediately after detecting a
|
||||
lost device. This is extremely useful when testing as it prevents the
|
||||
test suite from continuing on with a lost device.
|
||||
:envvar:`MESA_VK_ENABLE_SUBMIT_THREAD`
|
||||
for Vulkan drivers which support real timeline semaphores, this forces
|
||||
them to use a submit thread from the beginning, regardless of whether or
|
||||
not they ever see a wait-before-signal condition.
|
||||
:envvar:`MESA_LOADER_DRIVER_OVERRIDE`
|
||||
chooses a different driver binary such as ``etnaviv`` or ``zink``.
|
||||
|
||||
|
|
|
@ -28,11 +28,53 @@
|
|||
#include "vk_log.h"
|
||||
#include "vk_physical_device.h"
|
||||
#include "vk_queue.h"
|
||||
#include "vk_sync.h"
|
||||
#include "vk_sync_timeline.h"
|
||||
#include "vk_util.h"
|
||||
#include "util/debug.h"
|
||||
#include "util/hash_table.h"
|
||||
#include "util/ralloc.h"
|
||||
|
||||
static enum vk_device_timeline_mode
|
||||
get_timeline_mode(struct vk_physical_device *physical_device)
|
||||
{
|
||||
if (physical_device->supported_sync_types == NULL)
|
||||
return VK_DEVICE_TIMELINE_MODE_NONE;
|
||||
|
||||
const struct vk_sync_type *timeline_type = NULL;
|
||||
for (const struct vk_sync_type *const *t =
|
||||
physical_device->supported_sync_types; *t; t++) {
|
||||
if ((*t)->features & VK_SYNC_FEATURE_TIMELINE) {
|
||||
/* We can only have one timeline mode */
|
||||
assert(timeline_type == NULL);
|
||||
timeline_type = *t;
|
||||
}
|
||||
}
|
||||
|
||||
if (timeline_type == NULL)
|
||||
return VK_DEVICE_TIMELINE_MODE_NONE;
|
||||
|
||||
if (vk_sync_type_is_vk_sync_timeline(timeline_type))
|
||||
return VK_DEVICE_TIMELINE_MODE_EMULATED;
|
||||
|
||||
if (timeline_type->features & VK_SYNC_FEATURE_WAIT_BEFORE_SIGNAL)
|
||||
return VK_DEVICE_TIMELINE_MODE_NATIVE;
|
||||
|
||||
/* For assisted mode, we require a few additional things of all sync types
|
||||
* which may be used as semaphores.
|
||||
*/
|
||||
for (const struct vk_sync_type *const *t =
|
||||
physical_device->supported_sync_types; *t; t++) {
|
||||
if ((*t)->features & VK_SYNC_FEATURE_GPU_WAIT) {
|
||||
assert((*t)->features & VK_SYNC_FEATURE_WAIT_PENDING);
|
||||
if ((*t)->features & VK_SYNC_FEATURE_BINARY)
|
||||
assert((*t)->features & VK_SYNC_FEATURE_CPU_RESET);
|
||||
}
|
||||
}
|
||||
|
||||
return VK_DEVICE_TIMELINE_MODE_ASSISTED;
|
||||
}
|
||||
|
||||
VkResult
|
||||
vk_device_init(struct vk_device *device,
|
||||
struct vk_physical_device *physical_device,
|
||||
|
@ -95,6 +137,8 @@ vk_device_init(struct vk_device *device,
|
|||
|
||||
device->drm_fd = -1;
|
||||
|
||||
device->timeline_mode = get_timeline_mode(physical_device);
|
||||
|
||||
#ifdef ANDROID
|
||||
mtx_init(&device->swapchain_private_mtx, mtx_plain);
|
||||
device->swapchain_private = NULL;
|
||||
|
@ -120,6 +164,30 @@ vk_device_finish(UNUSED struct vk_device *device)
|
|||
vk_object_base_finish(&device->base);
|
||||
}
|
||||
|
||||
VkResult
|
||||
vk_device_flush(struct vk_device *device)
|
||||
{
|
||||
if (device->timeline_mode != VK_DEVICE_TIMELINE_MODE_EMULATED)
|
||||
return VK_SUCCESS;
|
||||
|
||||
bool progress;
|
||||
do {
|
||||
progress = false;
|
||||
|
||||
vk_foreach_queue(queue, device) {
|
||||
uint32_t queue_submit_count;
|
||||
VkResult result = vk_queue_flush(queue, &queue_submit_count);
|
||||
if (unlikely(result != VK_SUCCESS))
|
||||
return result;
|
||||
|
||||
if (queue_submit_count)
|
||||
progress = true;
|
||||
}
|
||||
} while (progress);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
void
|
||||
_vk_device_report_lost(struct vk_device *device)
|
||||
{
|
||||
|
|
|
@ -68,6 +68,61 @@ struct vk_device {
|
|||
/* Set by vk_device_set_drm_fd() */
|
||||
int drm_fd;
|
||||
|
||||
/** An enum describing how timeline semaphores work */
|
||||
enum vk_device_timeline_mode {
|
||||
/** Timeline semaphores are not supported */
|
||||
VK_DEVICE_TIMELINE_MODE_NONE,
|
||||
|
||||
/** Timeline semaphores are emulated with vk_timeline
|
||||
*
|
||||
* In this mode, timeline semaphores are emulated using vk_timeline
|
||||
* which is a collection of binary semaphores, one per time point.
|
||||
* These timeline semaphores cannot be shared because the data structure
|
||||
* exists entirely in userspace. These timelines are virtually
|
||||
* invisible to the driver; all it sees are the binary vk_syncs, one per
|
||||
* time point.
|
||||
*
|
||||
* To handle wait-before-signal, we place all vk_queue_submits in the
|
||||
* queue's submit list in vkQueueSubmit() and call vk_device_flush() at
|
||||
* key points such as the end of vkQueueSubmit() and vkSemaphoreSignal().
|
||||
* This ensures that, as soon as a given submit's dependencies are fully
|
||||
* resolvable, it gets submitted to the driver.
|
||||
*/
|
||||
VK_DEVICE_TIMELINE_MODE_EMULATED,
|
||||
|
||||
/** Timeline semaphores are a kernel-assisted emulation
|
||||
*
|
||||
* In this mode, timeline semaphores are still technically an emulation
|
||||
* in the sense that they don't support wait-before-signal natively.
|
||||
* Instead, all GPU-waitable objects support a CPU wait-for-pending
|
||||
* operation which lets the userspace driver wait until a given event
|
||||
* on the (possibly shared) vk_sync is pending. The event is "pending"
|
||||
* if a job has been submitted to the kernel (possibly from a different
|
||||
* process) which will signal it. In vkQueueSubit, we use this wait
|
||||
* mode to detect waits which are not yet pending and, the first time we
|
||||
* do, spawn a thread to manage the queue. That thread waits for each
|
||||
* submit's waits to all be pending before submitting to the driver
|
||||
* queue.
|
||||
*
|
||||
* We have to be a bit more careful about a few things in this mode.
|
||||
* In particular, we can never assume that any given wait operation is
|
||||
* pending. For instance, when we go to export a sync file from a
|
||||
* binary semaphore, we need to first wait for it to be pending. The
|
||||
* spec guarantees that the vast majority of these waits return almost
|
||||
* immediately, but we do need to insert them for correctness.
|
||||
*/
|
||||
VK_DEVICE_TIMELINE_MODE_ASSISTED,
|
||||
|
||||
/** Timeline semaphores are 100% native
|
||||
*
|
||||
* In this mode, wait-before-signal is natively supported by the
|
||||
* underlying timeline implementation. We can submit-and-forget and
|
||||
* assume that dependencies will get resolved for us by the kernel.
|
||||
* Currently, this isn't supported by any Linux primitives.
|
||||
*/
|
||||
VK_DEVICE_TIMELINE_MODE_NATIVE,
|
||||
} timeline_mode;
|
||||
|
||||
#ifdef ANDROID
|
||||
mtx_t swapchain_private_mtx;
|
||||
struct hash_table *swapchain_private;
|
||||
|
@ -93,6 +148,8 @@ vk_device_set_drm_fd(struct vk_device *device, int drm_fd)
|
|||
void
|
||||
vk_device_finish(struct vk_device *device);
|
||||
|
||||
VkResult vk_device_flush(struct vk_device *device);
|
||||
|
||||
VkResult PRINTFLIKE(4, 5)
|
||||
_vk_device_set_lost(struct vk_device *device,
|
||||
const char *file, int line,
|
||||
|
|
|
@ -434,6 +434,18 @@ vk_common_GetFenceFdKHR(VkDevice _device,
|
|||
break;
|
||||
|
||||
case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:
|
||||
/* There's no direct spec quote for this but the same rules as for
|
||||
* semaphore export apply. We can't export a sync file from a fence
|
||||
* if the fence event hasn't been submitted to the kernel yet.
|
||||
*/
|
||||
if (device->timeline_mode == VK_DEVICE_TIMELINE_MODE_ASSISTED) {
|
||||
result = vk_sync_wait(device, sync, 0,
|
||||
VK_SYNC_WAIT_PENDING,
|
||||
UINT64_MAX);
|
||||
if (unlikely(result != VK_SUCCESS))
|
||||
return result;
|
||||
}
|
||||
|
||||
result = vk_sync_export_sync_file(device, sync, pFd);
|
||||
if (unlikely(result != VK_SUCCESS))
|
||||
return result;
|
||||
|
|
|
@ -24,14 +24,28 @@
|
|||
#include "vk_queue.h"
|
||||
|
||||
#include "util/debug.h"
|
||||
#include <inttypes.h>
|
||||
|
||||
#include "vk_alloc.h"
|
||||
#include "vk_command_buffer.h"
|
||||
#include "vk_common_entrypoints.h"
|
||||
#include "vk_device.h"
|
||||
#include "vk_fence.h"
|
||||
#include "vk_log.h"
|
||||
#include "vk_physical_device.h"
|
||||
#include "vk_semaphore.h"
|
||||
#include "vk_sync.h"
|
||||
#include "vk_sync_timeline.h"
|
||||
#include "vk_util.h"
|
||||
|
||||
VkResult
|
||||
vk_queue_init(struct vk_queue *queue, struct vk_device *device,
|
||||
const VkDeviceQueueCreateInfo *pCreateInfo,
|
||||
uint32_t index_in_family)
|
||||
{
|
||||
VkResult result = VK_SUCCESS;
|
||||
int ret;
|
||||
|
||||
memset(queue, 0, sizeof(*queue));
|
||||
vk_object_base_init(device, &queue->base, VK_OBJECT_TYPE_QUEUE);
|
||||
|
||||
|
@ -43,18 +57,43 @@ vk_queue_init(struct vk_queue *queue, struct vk_device *device,
|
|||
assert(index_in_family < pCreateInfo->queueCount);
|
||||
queue->index_in_family = index_in_family;
|
||||
|
||||
list_inithead(&queue->submit.submits);
|
||||
|
||||
ret = mtx_init(&queue->submit.mutex, mtx_plain);
|
||||
if (ret == thrd_error) {
|
||||
result = vk_errorf(queue, VK_ERROR_UNKNOWN, "mtx_init failed");
|
||||
goto fail_mutex;
|
||||
}
|
||||
|
||||
ret = cnd_init(&queue->submit.push);
|
||||
if (ret == thrd_error) {
|
||||
result = vk_errorf(queue, VK_ERROR_UNKNOWN, "cnd_init failed");
|
||||
goto fail_push;
|
||||
}
|
||||
|
||||
ret = cnd_init(&queue->submit.pop);
|
||||
if (ret == thrd_error) {
|
||||
result = vk_errorf(queue, VK_ERROR_UNKNOWN, "cnd_init failed");
|
||||
goto fail_pop;
|
||||
}
|
||||
|
||||
util_dynarray_init(&queue->labels, NULL);
|
||||
queue->region_begin = true;
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
fail_pop:
|
||||
cnd_destroy(&queue->submit.push);
|
||||
fail_push:
|
||||
mtx_destroy(&queue->submit.mutex);
|
||||
fail_mutex:
|
||||
return result;
|
||||
}
|
||||
|
||||
void
|
||||
vk_queue_finish(struct vk_queue *queue)
|
||||
static bool
|
||||
vk_queue_has_submit_thread(struct vk_queue *queue)
|
||||
{
|
||||
util_dynarray_fini(&queue->labels);
|
||||
list_del(&queue->link);
|
||||
vk_object_base_finish(&queue->base);
|
||||
return queue->submit.has_thread;
|
||||
}
|
||||
|
||||
VkResult
|
||||
|
@ -83,3 +122,869 @@ _vk_queue_set_lost(struct vk_queue *queue,
|
|||
|
||||
return VK_ERROR_DEVICE_LOST;
|
||||
}
|
||||
|
||||
static struct vk_queue_submit *
|
||||
vk_queue_submit_alloc(struct vk_queue *queue,
|
||||
uint32_t wait_count,
|
||||
uint32_t command_buffer_count,
|
||||
uint32_t signal_count)
|
||||
{
|
||||
VK_MULTIALLOC(ma);
|
||||
VK_MULTIALLOC_DECL(&ma, struct vk_queue_submit, submit, 1);
|
||||
VK_MULTIALLOC_DECL(&ma, struct vk_sync_wait, waits, wait_count);
|
||||
VK_MULTIALLOC_DECL(&ma, struct vk_command_buffer *, command_buffers,
|
||||
command_buffer_count);
|
||||
VK_MULTIALLOC_DECL(&ma, struct vk_sync_signal, signals, signal_count);
|
||||
VK_MULTIALLOC_DECL(&ma, struct vk_sync *, wait_temps, wait_count);
|
||||
|
||||
struct vk_sync_timeline_point **wait_points = NULL, **signal_points = NULL;
|
||||
if (queue->base.device->timeline_mode == VK_DEVICE_TIMELINE_MODE_EMULATED) {
|
||||
vk_multialloc_add(&ma, &wait_points,
|
||||
struct vk_sync_timeline_point *, wait_count);
|
||||
vk_multialloc_add(&ma, &signal_points,
|
||||
struct vk_sync_timeline_point *, signal_count);
|
||||
}
|
||||
|
||||
if (!vk_multialloc_zalloc(&ma, &queue->base.device->alloc,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
|
||||
return NULL;
|
||||
|
||||
submit->wait_count = wait_count;
|
||||
submit->command_buffer_count = command_buffer_count;
|
||||
submit->signal_count = signal_count;
|
||||
|
||||
submit->waits = waits;
|
||||
submit->command_buffers = command_buffers;
|
||||
submit->signals = signals;
|
||||
submit->_wait_temps = wait_temps;
|
||||
submit->_wait_points = wait_points;
|
||||
submit->_signal_points = signal_points;
|
||||
|
||||
return submit;
|
||||
}
|
||||
|
||||
static void
|
||||
vk_queue_submit_cleanup(struct vk_queue *queue,
|
||||
struct vk_queue_submit *submit)
|
||||
{
|
||||
for (uint32_t i = 0; i < submit->wait_count; i++) {
|
||||
if (submit->_wait_temps[i] != NULL)
|
||||
vk_sync_destroy(queue->base.device, submit->_wait_temps[i]);
|
||||
}
|
||||
|
||||
if (submit->_wait_points != NULL) {
|
||||
for (uint32_t i = 0; i < submit->wait_count; i++) {
|
||||
if (unlikely(submit->_wait_points[i] != NULL)) {
|
||||
vk_sync_timeline_point_release(queue->base.device,
|
||||
submit->_wait_points[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (submit->_signal_points != NULL) {
|
||||
for (uint32_t i = 0; i < submit->signal_count; i++) {
|
||||
if (unlikely(submit->_signal_points[i] != NULL)) {
|
||||
vk_sync_timeline_point_free(queue->base.device,
|
||||
submit->_signal_points[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
vk_queue_submit_free(struct vk_queue *queue,
|
||||
struct vk_queue_submit *submit)
|
||||
{
|
||||
vk_free(&queue->base.device->alloc, submit);
|
||||
}
|
||||
|
||||
static void
|
||||
vk_queue_submit_destroy(struct vk_queue *queue,
|
||||
struct vk_queue_submit *submit)
|
||||
{
|
||||
vk_queue_submit_cleanup(queue, submit);
|
||||
vk_queue_submit_free(queue, submit);
|
||||
}
|
||||
|
||||
static void
|
||||
vk_queue_push_submit(struct vk_queue *queue,
|
||||
struct vk_queue_submit *submit)
|
||||
{
|
||||
mtx_lock(&queue->submit.mutex);
|
||||
list_addtail(&submit->link, &queue->submit.submits);
|
||||
cnd_signal(&queue->submit.push);
|
||||
mtx_unlock(&queue->submit.mutex);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
vk_queue_drain(struct vk_queue *queue)
|
||||
{
|
||||
VkResult result = VK_SUCCESS;
|
||||
|
||||
mtx_lock(&queue->submit.mutex);
|
||||
while (!list_is_empty(&queue->submit.submits)) {
|
||||
if (vk_device_is_lost(queue->base.device)) {
|
||||
result = VK_ERROR_DEVICE_LOST;
|
||||
break;
|
||||
}
|
||||
|
||||
int ret = cnd_wait(&queue->submit.pop, &queue->submit.mutex);
|
||||
if (ret == thrd_error) {
|
||||
result = vk_queue_set_lost(queue, "cnd_wait failed");
|
||||
break;
|
||||
}
|
||||
}
|
||||
mtx_unlock(&queue->submit.mutex);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
vk_queue_submit_final(struct vk_queue *queue,
|
||||
struct vk_queue_submit *submit)
|
||||
{
|
||||
VkResult result;
|
||||
|
||||
/* Now that we know all our time points exist, fetch the time point syncs
|
||||
* from any vk_sync_timelines. While we're here, also compact down the
|
||||
* list of waits to get rid of any trivial timeline waits.
|
||||
*/
|
||||
uint32_t wait_count = 0;
|
||||
for (uint32_t i = 0; i < submit->wait_count; i++) {
|
||||
/* A timeline wait on 0 is always a no-op */
|
||||
if ((submit->waits[i].sync->flags & VK_SYNC_IS_TIMELINE) &&
|
||||
submit->waits[i].wait_value == 0)
|
||||
continue;
|
||||
|
||||
/* For emulated timelines, we have a binary vk_sync associated with
|
||||
* each time point and pass the binary vk_sync to the driver.
|
||||
*/
|
||||
struct vk_sync_timeline *timeline =
|
||||
vk_sync_as_timeline(submit->waits[i].sync);
|
||||
if (timeline) {
|
||||
assert(queue->base.device->timeline_mode ==
|
||||
VK_DEVICE_TIMELINE_MODE_EMULATED);
|
||||
result = vk_sync_timeline_get_point(queue->base.device, timeline,
|
||||
submit->waits[i].wait_value,
|
||||
&submit->_wait_points[i]);
|
||||
if (unlikely(result != VK_SUCCESS)) {
|
||||
result = vk_queue_set_lost(queue,
|
||||
"Time point >= %"PRIu64" not found",
|
||||
submit->waits[i].wait_value);
|
||||
}
|
||||
|
||||
/* This can happen if the point is long past */
|
||||
if (submit->_wait_points[i] == NULL)
|
||||
continue;
|
||||
|
||||
submit->waits[i].sync = &submit->_wait_points[i]->sync;
|
||||
submit->waits[i].wait_value = 0;
|
||||
}
|
||||
|
||||
assert((submit->waits[i].sync->flags & VK_SYNC_IS_TIMELINE) ||
|
||||
submit->waits[i].wait_value == 0);
|
||||
|
||||
assert(wait_count <= i);
|
||||
if (wait_count < i) {
|
||||
submit->waits[wait_count] = submit->waits[i];
|
||||
submit->_wait_temps[wait_count] = submit->_wait_temps[i];
|
||||
if (submit->_wait_points)
|
||||
submit->_wait_points[wait_count] = submit->_wait_points[i];
|
||||
}
|
||||
wait_count++;
|
||||
}
|
||||
|
||||
assert(wait_count <= submit->wait_count);
|
||||
submit->wait_count = wait_count;
|
||||
|
||||
for (uint32_t i = 0; i < submit->signal_count; i++) {
|
||||
assert((submit->signals[i].sync->flags & VK_SYNC_IS_TIMELINE) ||
|
||||
submit->signals[i].signal_value == 0);
|
||||
}
|
||||
|
||||
result = queue->driver_submit(queue, submit);
|
||||
if (unlikely(result != VK_SUCCESS))
|
||||
return result;
|
||||
|
||||
if (submit->_signal_points) {
|
||||
for (uint32_t i = 0; i < submit->signal_count; i++) {
|
||||
if (submit->_signal_points[i] == NULL)
|
||||
continue;
|
||||
|
||||
vk_sync_timeline_point_install(queue->base.device,
|
||||
submit->_signal_points[i]);
|
||||
submit->_signal_points[i] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkResult
|
||||
vk_queue_flush(struct vk_queue *queue, uint32_t *submit_count_out)
|
||||
{
|
||||
VkResult result = VK_SUCCESS;
|
||||
|
||||
assert(queue->base.device->timeline_mode ==
|
||||
VK_DEVICE_TIMELINE_MODE_EMULATED);
|
||||
|
||||
mtx_lock(&queue->submit.mutex);
|
||||
|
||||
uint32_t submit_count = 0;
|
||||
while (!list_is_empty(&queue->submit.submits)) {
|
||||
struct vk_queue_submit *submit =
|
||||
list_first_entry(&queue->submit.submits,
|
||||
struct vk_queue_submit, link);
|
||||
|
||||
for (uint32_t i = 0; i < submit->wait_count; i++) {
|
||||
/* In emulated timeline mode, only emulated timelines are allowed */
|
||||
if (!vk_sync_type_is_vk_sync_timeline(submit->waits[i].sync->type)) {
|
||||
assert(!(submit->waits[i].sync->flags & VK_SYNC_IS_TIMELINE));
|
||||
continue;
|
||||
}
|
||||
|
||||
result = vk_sync_wait(queue->base.device,
|
||||
submit->waits[i].sync,
|
||||
submit->waits[i].wait_value,
|
||||
VK_SYNC_WAIT_PENDING, 0);
|
||||
if (result == VK_TIMEOUT) {
|
||||
/* This one's not ready yet */
|
||||
result = VK_SUCCESS;
|
||||
goto done;
|
||||
} else if (result != VK_SUCCESS) {
|
||||
result = vk_queue_set_lost(queue, "Wait for time points failed");
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
|
||||
result = vk_queue_submit_final(queue, submit);
|
||||
if (unlikely(result != VK_SUCCESS)) {
|
||||
result = vk_queue_set_lost(queue, "queue::driver_submit failed");
|
||||
goto done;
|
||||
}
|
||||
|
||||
submit_count++;
|
||||
|
||||
list_del(&submit->link);
|
||||
|
||||
vk_queue_submit_destroy(queue, submit);
|
||||
}
|
||||
|
||||
done:
|
||||
if (submit_count)
|
||||
cnd_broadcast(&queue->submit.pop);
|
||||
|
||||
mtx_unlock(&queue->submit.mutex);
|
||||
|
||||
if (submit_count_out)
|
||||
*submit_count_out = submit_count;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static int
|
||||
vk_queue_submit_thread_func(void *_data)
|
||||
{
|
||||
struct vk_queue *queue = _data;
|
||||
VkResult result;
|
||||
|
||||
assert(queue->base.device->timeline_mode ==
|
||||
VK_DEVICE_TIMELINE_MODE_ASSISTED);
|
||||
|
||||
mtx_lock(&queue->submit.mutex);
|
||||
|
||||
while (queue->submit.thread_run) {
|
||||
if (list_is_empty(&queue->submit.submits)) {
|
||||
int ret = cnd_wait(&queue->submit.push, &queue->submit.mutex);
|
||||
if (ret == thrd_error) {
|
||||
mtx_unlock(&queue->submit.mutex);
|
||||
vk_queue_set_lost(queue, "cnd_wait failed");
|
||||
return 1;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
struct vk_queue_submit *submit =
|
||||
list_first_entry(&queue->submit.submits,
|
||||
struct vk_queue_submit, link);
|
||||
|
||||
/* Drop the lock while we wait */
|
||||
mtx_unlock(&queue->submit.mutex);
|
||||
|
||||
result = vk_sync_wait_many(queue->base.device,
|
||||
submit->wait_count, submit->waits,
|
||||
VK_SYNC_WAIT_PENDING, UINT64_MAX);
|
||||
if (unlikely(result != VK_SUCCESS)) {
|
||||
vk_queue_set_lost(queue, "Wait for time points failed");
|
||||
return 1;
|
||||
}
|
||||
|
||||
result = vk_queue_submit_final(queue, submit);
|
||||
if (unlikely(result != VK_SUCCESS)) {
|
||||
vk_queue_set_lost(queue, "queue::driver_submit failed");
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Do all our cleanup of individual fences etc. outside the lock.
|
||||
* We can't actually remove it from the list yet. We have to do
|
||||
* that under the lock.
|
||||
*/
|
||||
vk_queue_submit_cleanup(queue, submit);
|
||||
|
||||
mtx_lock(&queue->submit.mutex);
|
||||
|
||||
/* Only remove the submit from from the list and free it after
|
||||
* queue->submit() has completed. This ensures that, when
|
||||
* vk_queue_drain() completes, there are no more pending jobs.
|
||||
*/
|
||||
list_del(&submit->link);
|
||||
vk_queue_submit_free(queue, submit);
|
||||
|
||||
cnd_broadcast(&queue->submit.pop);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
vk_queue_enable_submit_thread(struct vk_queue *queue)
|
||||
{
|
||||
int ret;
|
||||
|
||||
queue->submit.thread_run = true;
|
||||
|
||||
ret = thrd_create(&queue->submit.thread,
|
||||
vk_queue_submit_thread_func,
|
||||
queue);
|
||||
if (ret == thrd_error)
|
||||
return vk_errorf(queue, VK_ERROR_UNKNOWN, "thrd_create failed");
|
||||
|
||||
queue->submit.has_thread = true;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static void
|
||||
vk_queue_disable_submit_thread(struct vk_queue *queue)
|
||||
{
|
||||
vk_queue_drain(queue);
|
||||
|
||||
/* Kick the thread to disable it */
|
||||
mtx_lock(&queue->submit.mutex);
|
||||
queue->submit.thread_run = false;
|
||||
cnd_signal(&queue->submit.push);
|
||||
mtx_unlock(&queue->submit.mutex);
|
||||
|
||||
thrd_join(queue->submit.thread, NULL);
|
||||
|
||||
queue->submit.has_thread = false;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
vk_queue_submit(struct vk_queue *queue,
|
||||
const VkSubmitInfo2KHR *info,
|
||||
struct vk_fence *fence)
|
||||
{
|
||||
VkResult result;
|
||||
|
||||
struct vk_queue_submit *submit =
|
||||
vk_queue_submit_alloc(queue, info->waitSemaphoreInfoCount,
|
||||
info->commandBufferInfoCount,
|
||||
info->signalSemaphoreInfoCount + (fence != NULL));
|
||||
if (unlikely(submit == NULL))
|
||||
return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
/* From the Vulkan 1.2.194 spec:
|
||||
*
|
||||
* "If the VkSubmitInfo::pNext chain does not include this structure,
|
||||
* the batch defaults to use counter pass index 0."
|
||||
*/
|
||||
const VkPerformanceQuerySubmitInfoKHR *perf_info =
|
||||
vk_find_struct_const(info->pNext, PERFORMANCE_QUERY_SUBMIT_INFO_KHR);
|
||||
submit->perf_pass_index = perf_info ? perf_info->counterPassIndex : 0;
|
||||
|
||||
bool has_binary_permanent_semaphore_wait = false;
|
||||
for (uint32_t i = 0; i < info->waitSemaphoreInfoCount; i++) {
|
||||
VK_FROM_HANDLE(vk_semaphore, semaphore,
|
||||
info->pWaitSemaphoreInfos[i].semaphore);
|
||||
|
||||
/* From the Vulkan 1.2.194 spec:
|
||||
*
|
||||
* "Applications can import a semaphore payload into an existing
|
||||
* semaphore using an external semaphore handle. The effects of the
|
||||
* import operation will be either temporary or permanent, as
|
||||
* specified by the application. If the import is temporary, the
|
||||
* implementation must restore the semaphore to its prior permanent
|
||||
* state after submitting the next semaphore wait operation."
|
||||
*
|
||||
* and
|
||||
*
|
||||
* VUID-VkImportSemaphoreFdInfoKHR-flags-03323
|
||||
*
|
||||
* "If flags contains VK_SEMAPHORE_IMPORT_TEMPORARY_BIT, the
|
||||
* VkSemaphoreTypeCreateInfo::semaphoreType field of the semaphore
|
||||
* from which handle or name was exported must not be
|
||||
* VK_SEMAPHORE_TYPE_TIMELINE"
|
||||
*/
|
||||
struct vk_sync *sync;
|
||||
if (semaphore->temporary) {
|
||||
assert(semaphore->type == VK_SEMAPHORE_TYPE_BINARY);
|
||||
sync = submit->_wait_temps[i] = semaphore->temporary;
|
||||
semaphore->temporary = NULL;
|
||||
} else {
|
||||
if (semaphore->type == VK_SEMAPHORE_TYPE_BINARY) {
|
||||
if (queue->base.device->timeline_mode ==
|
||||
VK_DEVICE_TIMELINE_MODE_ASSISTED)
|
||||
assert(semaphore->permanent.type->move);
|
||||
has_binary_permanent_semaphore_wait = true;
|
||||
}
|
||||
|
||||
sync = &semaphore->permanent;
|
||||
}
|
||||
|
||||
uint32_t wait_value = semaphore->type == VK_SEMAPHORE_TYPE_TIMELINE ?
|
||||
info->pWaitSemaphoreInfos[i].value : 0;
|
||||
|
||||
submit->waits[i] = (struct vk_sync_wait) {
|
||||
.sync = sync,
|
||||
.stage_mask = info->pWaitSemaphoreInfos[i].stageMask,
|
||||
.wait_value = wait_value,
|
||||
};
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < info->commandBufferInfoCount; i++) {
|
||||
VK_FROM_HANDLE(vk_command_buffer, cmd_buffer,
|
||||
info->pCommandBufferInfos[i].commandBuffer);
|
||||
assert(info->pCommandBufferInfos[i].deviceMask == 0 ||
|
||||
info->pCommandBufferInfos[i].deviceMask == 1);
|
||||
submit->command_buffers[i] = cmd_buffer;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < info->signalSemaphoreInfoCount; i++) {
|
||||
VK_FROM_HANDLE(vk_semaphore, semaphore,
|
||||
info->pSignalSemaphoreInfos[i].semaphore);
|
||||
|
||||
struct vk_sync *sync = vk_semaphore_get_active_sync(semaphore);
|
||||
uint32_t signal_value = info->pSignalSemaphoreInfos[i].value;
|
||||
if (semaphore->type == VK_SEMAPHORE_TYPE_TIMELINE) {
|
||||
if (signal_value == 0) {
|
||||
result = vk_queue_set_lost(queue,
|
||||
"Tried to signal a timeline with value 0");
|
||||
goto fail;
|
||||
}
|
||||
} else {
|
||||
signal_value = 0;
|
||||
}
|
||||
|
||||
/* For emulated timelines, we need to associate a binary vk_sync with
|
||||
* each time point and pass the binary vk_sync to the driver. We could
|
||||
* do this in vk_queue_submit_final but it might require doing memory
|
||||
* allocation and we don't want to to add extra failure paths there.
|
||||
* Instead, allocate and replace the driver-visible vk_sync now and
|
||||
* we'll insert it into the timeline in vk_queue_submit_final. The
|
||||
* insert step is guaranteed to not fail.
|
||||
*/
|
||||
struct vk_sync_timeline *timeline = vk_sync_as_timeline(sync);
|
||||
if (timeline) {
|
||||
assert(queue->base.device->timeline_mode ==
|
||||
VK_DEVICE_TIMELINE_MODE_EMULATED);
|
||||
result = vk_sync_timeline_alloc_point(queue->base.device, timeline,
|
||||
signal_value,
|
||||
&submit->_signal_points[i]);
|
||||
if (unlikely(result != VK_SUCCESS))
|
||||
goto fail;
|
||||
|
||||
sync = &submit->_signal_points[i]->sync;
|
||||
signal_value = 0;
|
||||
}
|
||||
|
||||
submit->signals[i] = (struct vk_sync_signal) {
|
||||
.sync = sync,
|
||||
.stage_mask = info->pSignalSemaphoreInfos[i].stageMask,
|
||||
.signal_value = signal_value,
|
||||
};
|
||||
}
|
||||
|
||||
if (fence != NULL) {
|
||||
uint32_t fence_idx = info->signalSemaphoreInfoCount;
|
||||
assert(submit->signal_count == fence_idx + 1);
|
||||
assert(submit->signals[fence_idx].sync == NULL);
|
||||
submit->signals[fence_idx] = (struct vk_sync_signal) {
|
||||
.sync = vk_fence_get_active_sync(fence),
|
||||
.stage_mask = ~(VkPipelineStageFlags2KHR)0,
|
||||
};
|
||||
}
|
||||
|
||||
switch (queue->base.device->timeline_mode) {
|
||||
case VK_DEVICE_TIMELINE_MODE_ASSISTED:
|
||||
if (!vk_queue_has_submit_thread(queue)) {
|
||||
static int force_submit_thread = -1;
|
||||
if (unlikely(force_submit_thread < 0)) {
|
||||
force_submit_thread =
|
||||
env_var_as_boolean("MESA_VK_ENABLE_SUBMIT_THREAD", false);
|
||||
}
|
||||
|
||||
if (unlikely(force_submit_thread)) {
|
||||
result = vk_queue_enable_submit_thread(queue);
|
||||
} else {
|
||||
/* Otherwise, only enable the submit thread if we need it in order
|
||||
* to resolve timeline semaphore wait-before-signal issues.
|
||||
*/
|
||||
result = vk_sync_wait_many(queue->base.device,
|
||||
submit->wait_count, submit->waits,
|
||||
VK_SYNC_WAIT_PENDING, 0);
|
||||
if (result == VK_TIMEOUT)
|
||||
result = vk_queue_enable_submit_thread(queue);
|
||||
}
|
||||
if (unlikely(result != VK_SUCCESS))
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (vk_queue_has_submit_thread(queue)) {
|
||||
if (has_binary_permanent_semaphore_wait) {
|
||||
for (uint32_t i = 0; i < info->waitSemaphoreInfoCount; i++) {
|
||||
VK_FROM_HANDLE(vk_semaphore, semaphore,
|
||||
info->pWaitSemaphoreInfos[i].semaphore);
|
||||
|
||||
if (semaphore->type != VK_SEMAPHORE_TYPE_BINARY)
|
||||
continue;
|
||||
|
||||
/* From the Vulkan 1.2.194 spec:
|
||||
*
|
||||
* "When a batch is submitted to a queue via a queue
|
||||
* submission, and it includes semaphores to be waited on,
|
||||
* it defines a memory dependency between prior semaphore
|
||||
* signal operations and the batch, and defines semaphore
|
||||
* wait operations.
|
||||
*
|
||||
* Such semaphore wait operations set the semaphores
|
||||
* created with a VkSemaphoreType of
|
||||
* VK_SEMAPHORE_TYPE_BINARY to the unsignaled state."
|
||||
*
|
||||
* For threaded submit, we depend on tracking the unsignaled
|
||||
* state of binary semaphores to determine when we can safely
|
||||
* submit. The VK_SYNC_WAIT_PENDING check above as well as the
|
||||
* one in the sumbit thread depend on all binary semaphores
|
||||
* being reset when they're not in active use from the point
|
||||
* of view of the client's CPU timeline. This means we need to
|
||||
* reset them inside vkQueueSubmit and cannot wait until the
|
||||
* actual submit which happens later in the thread.
|
||||
*
|
||||
* We've already stolen temporary semaphore payloads above as
|
||||
* part of basic semaphore processing. We steal permanent
|
||||
* semaphore payloads here by way of vk_sync_move. For shared
|
||||
* semaphores, this can be a bit expensive (sync file import
|
||||
* and export) but, for non-shared semaphores, it can be made
|
||||
* fairly cheap. Also, we only do this semaphore swapping in
|
||||
* the case where you have real timelines AND the client is
|
||||
* using timeline semaphores with wait-before-signal (that's
|
||||
* the only way to get a submit thread) AND mixing those with
|
||||
* waits on binary semaphores AND said binary semaphore is
|
||||
* using its permanent payload. In other words, this code
|
||||
* should basically only ever get executed in CTS tests.
|
||||
*/
|
||||
if (submit->_wait_temps[i] != NULL)
|
||||
continue;
|
||||
|
||||
assert(submit->waits[i].sync == &semaphore->permanent);
|
||||
|
||||
/* From the Vulkan 1.2.194 spec:
|
||||
*
|
||||
* VUID-vkQueueSubmit-pWaitSemaphores-03238
|
||||
*
|
||||
* "All elements of the pWaitSemaphores member of all
|
||||
* elements of pSubmits created with a VkSemaphoreType of
|
||||
* VK_SEMAPHORE_TYPE_BINARY must reference a semaphore
|
||||
* signal operation that has been submitted for execution
|
||||
* and any semaphore signal operations on which it depends
|
||||
* (if any) must have also been submitted for execution."
|
||||
*
|
||||
* Therefore, we can safely do a blocking wait here and it
|
||||
* won't actually block for long. This ensures that the
|
||||
* vk_sync_move below will succeed.
|
||||
*/
|
||||
result = vk_sync_wait(queue->base.device,
|
||||
submit->waits[i].sync, 0,
|
||||
VK_SYNC_WAIT_PENDING, UINT64_MAX);
|
||||
if (unlikely(result != VK_SUCCESS))
|
||||
goto fail;
|
||||
|
||||
result = vk_sync_create(queue->base.device,
|
||||
semaphore->permanent.type,
|
||||
0 /* flags */,
|
||||
0 /* initial value */,
|
||||
&submit->_wait_temps[i]);
|
||||
if (unlikely(result != VK_SUCCESS))
|
||||
goto fail;
|
||||
|
||||
result = vk_sync_move(queue->base.device,
|
||||
submit->_wait_temps[i],
|
||||
&semaphore->permanent);
|
||||
if (unlikely(result != VK_SUCCESS))
|
||||
goto fail;
|
||||
|
||||
submit->waits[i].sync = submit->_wait_temps[i];
|
||||
}
|
||||
}
|
||||
|
||||
vk_queue_push_submit(queue, submit);
|
||||
return VK_SUCCESS;
|
||||
} else {
|
||||
result = vk_queue_submit_final(queue, submit);
|
||||
if (unlikely(result != VK_SUCCESS))
|
||||
goto fail;
|
||||
|
||||
/* If we don't have a submit thread, we can more directly ensure
|
||||
* that binary semaphore payloads get reset. If we also signal the
|
||||
* vk_sync, then we can consider it to have been both reset and
|
||||
* signaled. A reset in this case would be wrong because it would
|
||||
* throw away our signal operation. If we don't signal the vk_sync,
|
||||
* then we need to reset it.
|
||||
*/
|
||||
if (has_binary_permanent_semaphore_wait) {
|
||||
for (uint32_t i = 0; i < submit->wait_count; i++) {
|
||||
if ((submit->waits[i].sync->flags & VK_SYNC_IS_TIMELINE) ||
|
||||
submit->_wait_temps[i] != NULL)
|
||||
continue;
|
||||
|
||||
bool was_signaled = false;
|
||||
for (uint32_t j = 0; j < submit->signal_count; j++) {
|
||||
if (submit->signals[j].sync == submit->waits[i].sync) {
|
||||
was_signaled = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!was_signaled) {
|
||||
result = vk_sync_reset(queue->base.device,
|
||||
submit->waits[i].sync);
|
||||
if (unlikely(result != VK_SUCCESS))
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
vk_queue_submit_destroy(queue, submit);
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
unreachable("Should have returned");
|
||||
|
||||
case VK_DEVICE_TIMELINE_MODE_EMULATED:
|
||||
vk_queue_push_submit(queue, submit);
|
||||
return vk_device_flush(queue->base.device);
|
||||
|
||||
case VK_DEVICE_TIMELINE_MODE_NONE:
|
||||
case VK_DEVICE_TIMELINE_MODE_NATIVE:
|
||||
result = vk_queue_submit_final(queue, submit);
|
||||
vk_queue_submit_destroy(queue, submit);
|
||||
return result;
|
||||
}
|
||||
unreachable("Invalid timeline mode");
|
||||
|
||||
fail:
|
||||
vk_queue_submit_destroy(queue, submit);
|
||||
return result;
|
||||
}
|
||||
|
||||
VkResult
|
||||
vk_queue_wait_before_present(struct vk_queue *queue,
|
||||
const VkPresentInfoKHR *pPresentInfo)
|
||||
{
|
||||
if (vk_device_is_lost(queue->base.device))
|
||||
return VK_ERROR_DEVICE_LOST;
|
||||
|
||||
/* From the Vulkan 1.2.194 spec:
|
||||
*
|
||||
* VUID-vkQueuePresentKHR-pWaitSemaphores-03268
|
||||
*
|
||||
* "All elements of the pWaitSemaphores member of pPresentInfo must
|
||||
* reference a semaphore signal operation that has been submitted for
|
||||
* execution and any semaphore signal operations on which it depends (if
|
||||
* any) must have also been submitted for execution."
|
||||
*
|
||||
* As with vkQueueSubmit above, we need to ensure that any binary
|
||||
* semaphores we use in this present actually exist. If we don't have
|
||||
* timeline semaphores, this is a non-issue. If they're emulated, then
|
||||
* this is ensured for us by the vk_device_flush() at the end of every
|
||||
* vkQueueSubmit() and every vkSignalSemaphore(). For real timeline
|
||||
* semaphores, however, we need to do a wait. Thanks to the above bit of
|
||||
* spec text, that wait should never block for long.
|
||||
*/
|
||||
if (queue->base.device->timeline_mode != VK_DEVICE_TIMELINE_MODE_ASSISTED)
|
||||
return VK_SUCCESS;
|
||||
|
||||
const uint32_t wait_count = pPresentInfo->waitSemaphoreCount;
|
||||
STACK_ARRAY(struct vk_sync_wait, waits, wait_count);
|
||||
|
||||
for (uint32_t i = 0; i < wait_count; i++) {
|
||||
VK_FROM_HANDLE(vk_semaphore, semaphore,
|
||||
pPresentInfo->pWaitSemaphores[i]);
|
||||
|
||||
/* From the Vulkan 1.2.194 spec:
|
||||
*
|
||||
* VUID-vkQueuePresentKHR-pWaitSemaphores-03267
|
||||
*
|
||||
* "All elements of the pWaitSemaphores member of pPresentInfo must
|
||||
* be created with a VkSemaphoreType of VK_SEMAPHORE_TYPE_BINARY."
|
||||
*/
|
||||
assert(semaphore->type == VK_SEMAPHORE_TYPE_BINARY);
|
||||
|
||||
waits[i] = (struct vk_sync_wait) {
|
||||
.sync = vk_semaphore_get_active_sync(semaphore),
|
||||
.stage_mask = ~(VkPipelineStageFlags2KHR)0,
|
||||
};
|
||||
}
|
||||
|
||||
VkResult result = vk_sync_wait_many(queue->base.device, wait_count, waits,
|
||||
VK_SYNC_WAIT_PENDING, UINT64_MAX);
|
||||
|
||||
STACK_ARRAY_FINISH(waits);
|
||||
|
||||
/* Check again, just in case */
|
||||
if (vk_device_is_lost(queue->base.device))
|
||||
return VK_ERROR_DEVICE_LOST;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
vk_queue_signal_sync(struct vk_queue *queue,
|
||||
struct vk_sync *sync,
|
||||
uint32_t signal_value)
|
||||
{
|
||||
struct vk_queue_submit *submit = vk_queue_submit_alloc(queue, 0, 0, 1);
|
||||
if (unlikely(submit == NULL))
|
||||
return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
submit->signals[0] = (struct vk_sync_signal) {
|
||||
.sync = sync,
|
||||
.stage_mask = ~(VkPipelineStageFlags2KHR)0,
|
||||
.signal_value = signal_value,
|
||||
};
|
||||
|
||||
VkResult result;
|
||||
switch (queue->base.device->timeline_mode) {
|
||||
case VK_DEVICE_TIMELINE_MODE_ASSISTED:
|
||||
if (vk_queue_has_submit_thread(queue)) {
|
||||
vk_queue_push_submit(queue, submit);
|
||||
return VK_SUCCESS;
|
||||
} else {
|
||||
result = vk_queue_submit_final(queue, submit);
|
||||
vk_queue_submit_destroy(queue, submit);
|
||||
return result;
|
||||
}
|
||||
|
||||
case VK_DEVICE_TIMELINE_MODE_EMULATED:
|
||||
vk_queue_push_submit(queue, submit);
|
||||
return vk_device_flush(queue->base.device);
|
||||
|
||||
case VK_DEVICE_TIMELINE_MODE_NONE:
|
||||
case VK_DEVICE_TIMELINE_MODE_NATIVE:
|
||||
result = vk_queue_submit_final(queue, submit);
|
||||
vk_queue_submit_destroy(queue, submit);
|
||||
return result;
|
||||
}
|
||||
unreachable("Invalid timeline mode");
|
||||
}
|
||||
|
||||
void
|
||||
vk_queue_finish(struct vk_queue *queue)
|
||||
{
|
||||
if (vk_queue_has_submit_thread(queue))
|
||||
vk_queue_disable_submit_thread(queue);
|
||||
|
||||
while (!list_is_empty(&queue->submit.submits)) {
|
||||
assert(vk_device_is_lost_no_report(queue->base.device));
|
||||
|
||||
struct vk_queue_submit *submit =
|
||||
list_first_entry(&queue->submit.submits,
|
||||
struct vk_queue_submit, link);
|
||||
|
||||
list_del(&submit->link);
|
||||
vk_queue_submit_destroy(queue, submit);
|
||||
}
|
||||
|
||||
cnd_destroy(&queue->submit.pop);
|
||||
cnd_destroy(&queue->submit.push);
|
||||
mtx_destroy(&queue->submit.mutex);
|
||||
|
||||
util_dynarray_fini(&queue->labels);
|
||||
list_del(&queue->link);
|
||||
vk_object_base_finish(&queue->base);
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
vk_common_QueueSubmit2KHR(VkQueue _queue,
|
||||
uint32_t submitCount,
|
||||
const VkSubmitInfo2KHR *pSubmits,
|
||||
VkFence _fence)
|
||||
{
|
||||
VK_FROM_HANDLE(vk_queue, queue, _queue);
|
||||
VK_FROM_HANDLE(vk_fence, fence, _fence);
|
||||
|
||||
if (vk_device_is_lost(queue->base.device))
|
||||
return VK_ERROR_DEVICE_LOST;
|
||||
|
||||
if (submitCount == 0) {
|
||||
if (fence == NULL) {
|
||||
return VK_SUCCESS;
|
||||
} else {
|
||||
return vk_queue_signal_sync(queue, vk_fence_get_active_sync(fence), 0);
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < submitCount; i++) {
|
||||
VkResult result = vk_queue_submit(queue, &pSubmits[i],
|
||||
i == submitCount - 1 ? fence : NULL);
|
||||
if (unlikely(result != VK_SUCCESS))
|
||||
return result;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static const struct vk_sync_type *
|
||||
get_cpu_wait_type(struct vk_physical_device *pdevice)
|
||||
{
|
||||
for (const struct vk_sync_type *const *t =
|
||||
pdevice->supported_sync_types; *t; t++) {
|
||||
if (((*t)->features & VK_SYNC_FEATURE_BINARY) &&
|
||||
((*t)->features & VK_SYNC_FEATURE_CPU_WAIT))
|
||||
return *t;
|
||||
}
|
||||
|
||||
unreachable("You must have a non-timeline CPU wait sync type");
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
vk_common_QueueWaitIdle(VkQueue _queue)
|
||||
{
|
||||
VK_FROM_HANDLE(vk_queue, queue, _queue);
|
||||
VkResult result;
|
||||
|
||||
if (vk_device_is_lost(queue->base.device))
|
||||
return VK_ERROR_DEVICE_LOST;
|
||||
|
||||
const struct vk_sync_type *sync_type =
|
||||
get_cpu_wait_type(queue->base.device->physical);
|
||||
|
||||
struct vk_sync *sync;
|
||||
result = vk_sync_create(queue->base.device, sync_type, 0, 0, &sync);
|
||||
if (unlikely(result != VK_SUCCESS))
|
||||
return result;
|
||||
|
||||
result = vk_queue_signal_sync(queue, sync, 0);
|
||||
if (unlikely(result != VK_SUCCESS))
|
||||
return result;
|
||||
|
||||
result = vk_sync_wait(queue->base.device, sync, 0,
|
||||
VK_SYNC_WAIT_COMPLETE, UINT64_MAX);
|
||||
|
||||
vk_sync_destroy(queue->base.device, sync);
|
||||
|
||||
VkResult device_status = vk_device_check_status(queue->base.device);
|
||||
if (device_status != VK_SUCCESS)
|
||||
return device_status;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -26,6 +26,8 @@
|
|||
|
||||
#include "vk_object.h"
|
||||
|
||||
#include "c11/threads.h"
|
||||
|
||||
#include "util/list.h"
|
||||
#include "util/u_dynarray.h"
|
||||
|
||||
|
@ -33,6 +35,13 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct vk_command_buffer;
|
||||
struct vk_queue_submit;
|
||||
struct vk_sync;
|
||||
struct vk_sync_wait;
|
||||
struct vk_sync_signal;
|
||||
struct vk_sync_timeline_point;
|
||||
|
||||
struct vk_queue {
|
||||
struct vk_object_base base;
|
||||
|
||||
|
@ -48,6 +57,32 @@ struct vk_queue {
|
|||
/* Which queue this is within the queue family */
|
||||
uint32_t index_in_family;
|
||||
|
||||
/** Driver queue submit hook
|
||||
*
|
||||
* When using the common implementation of vkQueueSubmit(), this function
|
||||
* is called to do the final submit to the kernel driver after all
|
||||
* semaphore dependencies have been resolved. Depending on the timeline
|
||||
* mode and application usage, this function may be called directly from
|
||||
* the client thread on which vkQueueSubmit was called or from a runtime-
|
||||
* managed submit thread. We do, however, guarantee that as long as the
|
||||
* client follows the Vulkan threading rules, this function will never be
|
||||
* called by the runtime concurrently on the same queue.
|
||||
*/
|
||||
VkResult (*driver_submit)(struct vk_queue *queue,
|
||||
struct vk_queue_submit *submit);
|
||||
|
||||
struct {
|
||||
mtx_t mutex;
|
||||
cnd_t push;
|
||||
cnd_t pop;
|
||||
|
||||
struct list_head submits;
|
||||
|
||||
bool thread_run;
|
||||
bool has_thread;
|
||||
thrd_t thread;
|
||||
} submit;
|
||||
|
||||
struct {
|
||||
/* Only set once atomically by the queue */
|
||||
int lost;
|
||||
|
@ -107,6 +142,17 @@ vk_queue_init(struct vk_queue *queue, struct vk_device *device,
|
|||
void
|
||||
vk_queue_finish(struct vk_queue *queue);
|
||||
|
||||
static inline bool
|
||||
vk_queue_is_empty(struct vk_queue *queue)
|
||||
{
|
||||
return list_is_empty(&queue->submit.submits);
|
||||
}
|
||||
|
||||
VkResult vk_queue_flush(struct vk_queue *queue, uint32_t *submit_count_out);
|
||||
|
||||
VkResult vk_queue_wait_before_present(struct vk_queue *queue,
|
||||
const VkPresentInfoKHR *pPresentInfo);
|
||||
|
||||
VkResult PRINTFLIKE(4, 5)
|
||||
_vk_queue_set_lost(struct vk_queue *queue,
|
||||
const char *file, int line,
|
||||
|
@ -127,6 +173,25 @@ vk_queue_is_lost(struct vk_queue *queue)
|
|||
#define vk_foreach_queue_safe(queue, device) \
|
||||
list_for_each_entry_safe(struct vk_queue, queue, &(device)->queues, link)
|
||||
|
||||
struct vk_queue_submit {
|
||||
struct list_head link;
|
||||
|
||||
uint32_t wait_count;
|
||||
uint32_t command_buffer_count;
|
||||
uint32_t signal_count;
|
||||
|
||||
struct vk_sync_wait *waits;
|
||||
struct vk_command_buffer **command_buffers;
|
||||
struct vk_sync_signal *signals;
|
||||
|
||||
uint32_t perf_pass_index;
|
||||
|
||||
/* Used internally; should be ignored by drivers */
|
||||
struct vk_sync **_wait_temps;
|
||||
struct vk_sync_timeline_point **_wait_points;
|
||||
struct vk_sync_timeline_point **_signal_points;
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -132,6 +132,9 @@ vk_common_CreateSemaphore(VkDevice _device,
|
|||
const VkSemaphoreType semaphore_type =
|
||||
get_semaphore_type(pCreateInfo->pNext, &initial_value);
|
||||
|
||||
if (semaphore_type == VK_SEMAPHORE_TYPE_TIMELINE)
|
||||
assert(device->timeline_mode != VK_DEVICE_TIMELINE_MODE_NONE);
|
||||
|
||||
const VkExportSemaphoreCreateInfo *export =
|
||||
vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO);
|
||||
VkExternalSemaphoreHandleTypeFlags handle_types =
|
||||
|
@ -147,6 +150,15 @@ vk_common_CreateSemaphore(VkDevice _device,
|
|||
"for VkSemaphore creation.");
|
||||
}
|
||||
|
||||
/* If the timeline mode is ASSISTED, then any permanent binary semaphore
|
||||
* types need to be able to support move. We don't require this for
|
||||
* temporary unless that temporary is also used as a semaphore signal
|
||||
* operation which is much trickier to assert early.
|
||||
*/
|
||||
if (semaphore_type == VK_SEMAPHORE_TYPE_BINARY &&
|
||||
device->timeline_mode == VK_DEVICE_TIMELINE_MODE_ASSISTED)
|
||||
assert(sync_type->move);
|
||||
|
||||
/* Allocate a vk_semaphore + vk_sync implementation. Because the permanent
|
||||
* field of vk_semaphore is the base field of the vk_sync implementation,
|
||||
* we can make the 2 structures overlap.
|
||||
|
@ -359,6 +371,12 @@ vk_common_SignalSemaphore(VkDevice _device,
|
|||
if (unlikely(result != VK_SUCCESS))
|
||||
return result;
|
||||
|
||||
if (device->timeline_mode == VK_DEVICE_TIMELINE_MODE_EMULATED) {
|
||||
result = vk_device_flush(device);
|
||||
if (unlikely(result != VK_SUCCESS))
|
||||
return result;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -489,6 +507,28 @@ vk_common_GetSemaphoreFdKHR(VkDevice _device,
|
|||
"Cannot export a timeline semaphore as SYNC_FD");
|
||||
}
|
||||
|
||||
/* From the Vulkan 1.2.194 spec:
|
||||
* VUID-VkSemaphoreGetFdInfoKHR-handleType-03254
|
||||
*
|
||||
* "If handleType refers to a handle type with copy payload
|
||||
* transference semantics, semaphore must have an associated
|
||||
* semaphore signal operation that has been submitted for execution
|
||||
* and any semaphore signal operations on which it depends (if any)
|
||||
* must have also been submitted for execution."
|
||||
*
|
||||
* If we have real timelines, it's possible that the time point doesn't
|
||||
* exist yet and is waiting for one of our submit threads to trigger.
|
||||
* However, thanks to the above bit of spec text, that wait should never
|
||||
* block for long.
|
||||
*/
|
||||
if (device->timeline_mode == VK_DEVICE_TIMELINE_MODE_ASSISTED) {
|
||||
result = vk_sync_wait(device, sync, 0,
|
||||
VK_SYNC_WAIT_PENDING,
|
||||
UINT64_MAX);
|
||||
if (unlikely(result != VK_SUCCESS))
|
||||
return result;
|
||||
}
|
||||
|
||||
result = vk_sync_export_sync_file(device, sync, pFd);
|
||||
if (unlikely(result != VK_SUCCESS))
|
||||
return result;
|
||||
|
|
|
@ -297,6 +297,13 @@ struct vk_sync_wait {
|
|||
uint64_t wait_value;
|
||||
};
|
||||
|
||||
/* See VkSemaphoreSubmitInfoKHR */
|
||||
struct vk_sync_signal {
|
||||
struct vk_sync *sync;
|
||||
VkPipelineStageFlags2KHR stage_mask;
|
||||
uint64_t signal_value;
|
||||
};
|
||||
|
||||
VkResult MUST_CHECK vk_sync_init(struct vk_device *device,
|
||||
struct vk_sync *sync,
|
||||
const struct vk_sync_type *type,
|
||||
|
|
Loading…
Reference in New Issue