vulkan/queue: Handle WSI memory signal information

We handle it by asking the driver to create a vk_sync that wraps a VkDeviceMemory object and gets passed as one of the signal ops. Fixes: 9bffd81f1c ("vulkan: Add common implementations of vkQueueSubmit and vkQueueWaitIdle") Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14237>
2021-12-16 10:29:03 -06:00 · 2021-12-16 10:29:03 -06:00 · 2188829d29
parent cdf101455d
commit 2188829d29
3 changed files with 77 additions and 5 deletions
--- a/src/vulkan/runtime/vk_device.h
+++ b/src/vulkan/runtime/vk_device.h
@ -34,6 +34,8 @@
 extern "C" {
 #endif

+struct vk_sync;
+
 struct vk_device {
   struct vk_object_base base;
   VkAllocationCallbacks alloc;
@ -65,6 +67,29 @@ struct vk_device {
    */
   VkResult (*check_status)(struct vk_device *device);

+   /** Creates a vk_sync that wraps a memory object
+    *
+    * This is always a one-shot object so it need not track any additional
+    * state.  Since it's intended for synchronizing between processes using
+    * implicit synchronization mechanisms, no such tracking would be valid
+    * anyway.
+    *
+    * If `signal_memory` is set, the resulting vk_sync will be used to signal
+    * the memory object from a queue via vk_queue_submit::signals.  The common
+    * code guarantees that, by the time vkQueueSubmit() returns, the signal
+    * operation has been submitted to the kernel via the driver's
+    * vk_queue::driver_submit hook.  This means that any vkQueueSubmit() call
+    * which needs implicit synchronization may block.
+    *
+    * If `signal_memory` is not set, it can be assumed that memory object
+    * already has a signal operation pending from some other process and we
+    * need only wait on it.
+    */
+   VkResult (*create_sync_for_memory)(struct vk_device *device,
+                                      VkDeviceMemory memory,
+                                      bool signal_memory,
+                                      struct vk_sync **sync_out);
+
   /* Set by vk_device_set_drm_fd() */
   int drm_fd;

--- a/src/vulkan/runtime/vk_queue.c
+++ b/src/vulkan/runtime/vk_queue.c
@ -40,6 +40,8 @@
 #include "vk_sync_timeline.h"
 #include "vk_util.h"

+#include "vulkan/wsi/wsi_common.h"
+
 VkResult
 vk_queue_init(struct vk_queue *queue, struct vk_device *device,
              const VkDeviceQueueCreateInfo *pCreateInfo,
@ -174,6 +176,9 @@ vk_queue_submit_cleanup(struct vk_queue *queue,
         vk_sync_destroy(queue->base.device, submit->_wait_temps[i]);
   }

+   if (submit->_mem_signal_temp != NULL)
+      vk_sync_destroy(queue->base.device, submit->_mem_signal_temp);
+
   if (submit->_wait_points != NULL) {
      for (uint32_t i = 0; i < submit->wait_count; i++) {
         if (unlikely(submit->_wait_points[i] != NULL)) {
@ -508,10 +513,17 @@ vk_queue_submit(struct vk_queue *queue,
 {
   VkResult result;

+   const struct wsi_memory_signal_submit_info *mem_signal =
+      vk_find_struct_const(info->pNext, WSI_MEMORY_SIGNAL_SUBMIT_INFO_MESA);
+   bool signal_mem_sync = mem_signal != NULL &&
+                          mem_signal->memory != VK_NULL_HANDLE &&
+                          queue->base.device->create_sync_for_memory != NULL;
+
   struct vk_queue_submit *submit =
      vk_queue_submit_alloc(queue, info->waitSemaphoreInfoCount,
                            info->commandBufferInfoCount,
-                            info->signalSemaphoreInfoCount + (fence != NULL));
+                            info->signalSemaphoreInfoCount +
+                            signal_mem_sync + (fence != NULL));
   if (unlikely(submit == NULL))
      return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);

@ -626,16 +638,34 @@ vk_queue_submit(struct vk_queue *queue,
      };
   }

+   uint32_t signal_count = info->signalSemaphoreInfoCount;
+   if (signal_mem_sync) {
+      struct vk_sync *mem_sync;
+      result = queue->base.device->create_sync_for_memory(queue->base.device,
+                                                          mem_signal->memory,
+                                                          true, &mem_sync);
+      if (unlikely(result != VK_SUCCESS))
+         goto fail;
+
+      submit->_mem_signal_temp = mem_sync;
+
+      assert(submit->signals[signal_count].sync == NULL);
+      submit->signals[signal_count++] = (struct vk_sync_signal) {
+         .sync = mem_sync,
+         .stage_mask = ~(VkPipelineStageFlags2KHR)0,
+      };
+   }
+
   if (fence != NULL) {
-      uint32_t fence_idx = info->signalSemaphoreInfoCount;
-      assert(submit->signal_count == fence_idx + 1);
-      assert(submit->signals[fence_idx].sync == NULL);
-      submit->signals[fence_idx] = (struct vk_sync_signal) {
+      assert(submit->signals[signal_count].sync == NULL);
+      submit->signals[signal_count++] = (struct vk_sync_signal) {
         .sync = vk_fence_get_active_sync(fence),
         .stage_mask = ~(VkPipelineStageFlags2KHR)0,
      };
   }

+   assert(signal_count == submit->signal_count);
+
   switch (queue->base.device->timeline_mode) {
   case VK_DEVICE_TIMELINE_MODE_ASSISTED:
      if (!vk_queue_has_submit_thread(queue)) {
@ -749,6 +779,22 @@ vk_queue_submit(struct vk_queue *queue,
         }

         vk_queue_push_submit(queue, submit);
+
+         if (signal_mem_sync) {
+            /* If we're signaling a memory object, we have to ensure that
+             * vkQueueSubmit does not return until the kernel submission has
+             * happened.  Otherwise, we may get a race between this process
+             * and whatever is going to wait on the object where the other
+             * process may wait before we've submitted our work.  Drain the
+             * queue now to avoid this.  It's the responsibility of the caller
+             * to ensure that any vkQueueSubmit which signals a memory object
+             * has fully resolved dependencies.
+             */
+            result = vk_queue_drain(queue);
+            if (unlikely(result != VK_SUCCESS))
+               return result;
+         }
+
         return VK_SUCCESS;
      } else {
         result = vk_queue_submit_final(queue, submit);
--- a/src/vulkan/runtime/vk_queue.h
+++ b/src/vulkan/runtime/vk_queue.h
@ -188,6 +188,7 @@ struct vk_queue_submit {

   /* Used internally; should be ignored by drivers */
   struct vk_sync **_wait_temps;
+   struct vk_sync *_mem_signal_temp;
   struct vk_sync_timeline_point **_wait_points;
   struct vk_sync_timeline_point **_signal_points;
 };