turnip: Porting to common implementation for timeline semaphore

Define struct tu_timeline_sync for emulated timeline support in common implementation that is on top of drm syncobj as a binary sync. Also implement init/finish/reset/wait_many methods for the struct. v1. Does not set MSM_SUBMIT_SYNCOBJ_RESET for waiting syncobjs since it's being managed in the common implementation already. Signed-off-by: Hyunjun Ko <zzoon@igalia.com> Reviewed-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14105>
2022-01-12 02:12:19 +00:00 · 2022-01-12 02:12:19 +00:00 · 0a82a26a18
parent 479a1c405e
commit 0a82a26a18
3 changed files with 286 additions and 20 deletions
--- a/src/freedreno/vulkan/tu_device.c
+++ b/src/freedreno/vulkan/tu_device.c
@ -159,7 +159,7 @@ get_device_extensions(const struct tu_physical_device *device,
      .KHR_buffer_device_address = true,
      .KHR_shader_integer_dot_product = true,
 #ifndef TU_USE_KGSL
-      .KHR_timeline_semaphore = false,
+      .KHR_timeline_semaphore = true,
 #endif
 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
      /* This extension is supported by common code across drivers, but it is
@ -562,7 +562,7 @@ tu_get_physical_device_features_1_2(struct tu_physical_device *pdevice,
   features->shaderSubgroupExtendedTypes         = true;
   features->separateDepthStencilLayouts         = true;
   features->hostQueryReset                      = true;
-   features->timelineSemaphore                   = false;
+   features->timelineSemaphore                   = true;
   features->bufferDeviceAddress                 = true;
   features->bufferDeviceAddressCaptureReplay    = false;
   features->bufferDeviceAddressMultiDevice      = false;
@ -757,7 +757,7 @@ tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES: {
         VkPhysicalDeviceTimelineSemaphoreFeaturesKHR *features =
            (VkPhysicalDeviceTimelineSemaphoreFeaturesKHR *) ext;
-         features->timelineSemaphore = false;
+         features->timelineSemaphore = true;
         break;
      }
      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT: {
--- a/src/freedreno/vulkan/tu_drm.c
+++ b/src/freedreno/vulkan/tu_drm.c
@ -331,6 +331,221 @@ tu_bo_finish(struct tu_device *dev, struct tu_bo *bo)
   tu_gem_close(dev, bo->gem_handle);
 }

+extern const struct vk_sync_type tu_timeline_sync_type;
+
+static inline bool
+vk_sync_is_tu_timeline_sync(const struct vk_sync *sync)
+{
+   return sync->type == &tu_timeline_sync_type;
+}
+
+static struct tu_timeline_sync *
+to_tu_timeline_sync(struct vk_sync *sync)
+{
+   assert(sync->type == &tu_timeline_sync_type);
+   return container_of(sync, struct tu_timeline_sync, base);
+}
+
+static uint32_t
+tu_syncobj_from_vk_sync(struct vk_sync *sync)
+{
+   uint32_t syncobj = -1;
+   if (vk_sync_is_tu_timeline_sync(sync)) {
+      syncobj = to_tu_timeline_sync(sync)->syncobj;
+   } else if (vk_sync_type_is_drm_syncobj(sync->type)) {
+      syncobj = vk_sync_as_drm_syncobj(sync)->syncobj;
+   }
+
+   assert(syncobj != -1);
+
+   return syncobj;
+}
+
+static VkResult
+tu_timeline_sync_init(struct vk_device *vk_device,
+                      struct vk_sync *vk_sync,
+                      uint64_t initial_value)
+{
+   struct tu_device *device = container_of(vk_device, struct tu_device, vk);
+   struct tu_timeline_sync *sync = to_tu_timeline_sync(vk_sync);
+   uint32_t flags = 0;
+
+   assert(device->fd >= 0);
+
+   int err = drmSyncobjCreate(device->fd, flags, &sync->syncobj);
+
+   if (err < 0) {
+        return vk_error(device, VK_ERROR_DEVICE_LOST);
+   }
+
+   sync->state = initial_value ? TU_TIMELINE_SYNC_STATE_SIGNALED :
+                                    TU_TIMELINE_SYNC_STATE_RESET;
+
+   return VK_SUCCESS;
+}
+
+static void
+tu_timeline_sync_finish(struct vk_device *vk_device,
+                   struct vk_sync *vk_sync)
+{
+   struct tu_device *dev = container_of(vk_device, struct tu_device, vk);
+   struct tu_timeline_sync *sync = to_tu_timeline_sync(vk_sync);
+
+   assert(dev->fd >= 0);
+   ASSERTED int err = drmSyncobjDestroy(dev->fd, sync->syncobj);
+   assert(err == 0);
+}
+
+static VkResult
+tu_timeline_sync_reset(struct vk_device *vk_device,
+                  struct vk_sync *vk_sync)
+{
+   struct tu_device *dev = container_of(vk_device, struct tu_device, vk);
+   struct tu_timeline_sync *sync = to_tu_timeline_sync(vk_sync);
+
+   int err = drmSyncobjReset(dev->fd, &sync->syncobj, 1);
+   if (err) {
+      return vk_errorf(dev, VK_ERROR_UNKNOWN,
+                       "DRM_IOCTL_SYNCOBJ_RESET failed: %m");
+   } else {
+       sync->state = TU_TIMELINE_SYNC_STATE_RESET;
+   }
+
+   return VK_SUCCESS;
+}
+
+static VkResult
+drm_syncobj_wait(struct tu_device *device,
+                 uint32_t *handles, uint32_t count_handles,
+                 int64_t timeout_nsec, bool wait_all)
+{
+   uint32_t syncobj_wait_flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
+   if (wait_all) syncobj_wait_flags |= DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL;
+
+   int err = drmSyncobjWait(device->fd, handles,
+                            count_handles, timeout_nsec,
+                            syncobj_wait_flags,
+                            NULL /* first_signaled */);
+   if (err && errno == ETIME) {
+      return VK_TIMEOUT;
+   } else if (err) {
+      return vk_errorf(device, VK_ERROR_UNKNOWN,
+                       "DRM_IOCTL_SYNCOBJ_WAIT failed: %m");
+   }
+
+   return VK_SUCCESS;
+}
+
+/* Based on anv_bo_sync_wait */
+static VkResult
+tu_timeline_sync_wait(struct vk_device *vk_device,
+                 uint32_t wait_count,
+                 const struct vk_sync_wait *waits,
+                 enum vk_sync_wait_flags wait_flags,
+                 uint64_t abs_timeout_ns)
+{
+   struct tu_device *dev = container_of(vk_device, struct tu_device, vk);
+   bool wait_all = !(wait_flags & VK_SYNC_WAIT_ANY);
+
+   uint32_t handles[wait_count];
+   uint32_t submit_count;
+   VkResult ret = VK_SUCCESS;
+   uint32_t pending = wait_count;
+   struct tu_timeline_sync *submitted_syncs[wait_count];
+
+   while (pending) {
+      pending = 0;
+      submit_count = 0;
+
+      for (unsigned i = 0; i < wait_count; ++i) {
+         struct tu_timeline_sync *sync = to_tu_timeline_sync(waits[i].sync);
+
+         if (sync->state == TU_TIMELINE_SYNC_STATE_RESET) {
+            assert(!(wait_flags & VK_SYNC_WAIT_PENDING));
+            pending++;
+         } else if (sync->state == TU_TIMELINE_SYNC_STATE_SIGNALED) {
+            if (wait_flags & VK_SYNC_WAIT_ANY)
+               return VK_SUCCESS;
+         } else if (sync->state == TU_TIMELINE_SYNC_STATE_SUBMITTED) {
+            if (!(wait_flags & VK_SYNC_WAIT_PENDING)) {
+               handles[submit_count] = sync->syncobj;
+               submitted_syncs[submit_count++] = sync;
+            }
+         }
+      }
+
+      if (submit_count > 0) {
+         do {
+            ret = drm_syncobj_wait(dev, handles, submit_count, abs_timeout_ns, wait_all);
+         } while (ret == VK_TIMEOUT && os_time_get_nano() < abs_timeout_ns);
+
+         if (ret == VK_SUCCESS) {
+            for (unsigned i = 0; i < submit_count; ++i) {
+               struct tu_timeline_sync *sync = submitted_syncs[i];
+               sync->state = TU_TIMELINE_SYNC_STATE_SIGNALED;
+            }
+         } else {
+            /* return error covering timeout */
+            return ret;
+         }
+      } else if (pending > 0) {
+         /* If we've hit this then someone decided to vkWaitForFences before
+          * they've actually submitted any of them to a queue.  This is a
+          * fairly pessimal case, so it's ok to lock here and use a standard
+          * pthreads condition variable.
+          */
+         pthread_mutex_lock(&dev->submit_mutex);
+
+         /* It's possible that some of the fences have changed state since the
+          * last time we checked.  Now that we have the lock, check for
+          * pending fences again and don't wait if it's changed.
+          */
+         uint32_t now_pending = 0;
+         for (uint32_t i = 0; i < wait_count; i++) {
+            struct tu_timeline_sync *sync = to_tu_timeline_sync(waits[i].sync);
+            if (sync->state == TU_TIMELINE_SYNC_STATE_RESET)
+               now_pending++;
+         }
+         assert(now_pending <= pending);
+
+         if (now_pending == pending) {
+            struct timespec abstime = {
+               .tv_sec = abs_timeout_ns / NSEC_PER_SEC,
+               .tv_nsec = abs_timeout_ns % NSEC_PER_SEC,
+            };
+
+            ASSERTED int ret;
+            ret = pthread_cond_timedwait(&dev->timeline_cond,
+                                         &dev->submit_mutex, &abstime);
+            assert(ret != EINVAL);
+            if (os_time_get_nano() >= abs_timeout_ns) {
+               pthread_mutex_unlock(&dev->submit_mutex);
+               return VK_TIMEOUT;
+            }
+         }
+
+         pthread_mutex_unlock(&dev->submit_mutex);
+      }
+   }
+
+   return ret;
+}
+
+const struct vk_sync_type tu_timeline_sync_type = {
+   .size = sizeof(struct tu_timeline_sync),
+   .features = VK_SYNC_FEATURE_BINARY |
+               VK_SYNC_FEATURE_GPU_WAIT |
+               VK_SYNC_FEATURE_GPU_MULTI_WAIT |
+               VK_SYNC_FEATURE_CPU_WAIT |
+               VK_SYNC_FEATURE_CPU_RESET |
+               VK_SYNC_FEATURE_WAIT_ANY |
+               VK_SYNC_FEATURE_WAIT_PENDING,
+   .init = tu_timeline_sync_init,
+   .finish = tu_timeline_sync_finish,
+   .reset = tu_timeline_sync_reset,
+   .wait_many = tu_timeline_sync_wait,
+};
+
 static VkResult
 tu_drm_device_init(struct tu_physical_device *device,
                   struct tu_instance *instance,
@ -427,9 +642,11 @@ tu_drm_device_init(struct tu_physical_device *device,
   }

   device->syncobj_type = vk_drm_syncobj_get_type(fd);
+   device->timeline_type = vk_sync_timeline_get_type(&tu_timeline_sync_type);

   device->sync_types[0] = &device->syncobj_type;
-   device->sync_types[1] = NULL;
+   device->sync_types[1] = &device->timeline_type.sync;
+   device->sync_types[2] = NULL;

   device->heap.size = tu_get_system_heap_size();
   device->heap.used = 0u;
@ -745,6 +962,37 @@ tu_queue_submit_locked(struct tu_queue *queue, struct tu_queue_submit *submit)
      }
   }

+   for (uint32_t i = 0; i < submit->vk_submit->wait_count; i++) {
+      if (!vk_sync_is_tu_timeline_sync(submit->vk_submit->waits[i].sync))
+         continue;
+
+      struct tu_timeline_sync *sync =
+         container_of(submit->vk_submit->waits[i].sync, struct tu_timeline_sync, base);
+
+      assert(sync->state != TU_TIMELINE_SYNC_STATE_RESET);
+
+      /* Set SIGNALED to the state of the wait timeline sync since this means the syncobj
+       * is done and ready again so this can be garbage-collectioned later.
+       */
+      sync->state = TU_TIMELINE_SYNC_STATE_SIGNALED;
+   }
+
+   for (uint32_t i = 0; i < submit->vk_submit->signal_count; i++) {
+      if (!vk_sync_is_tu_timeline_sync(submit->vk_submit->signals[i].sync))
+         continue;
+
+      struct tu_timeline_sync *sync =
+         container_of(submit->vk_submit->signals[i].sync, struct tu_timeline_sync, base);
+
+      assert(sync->state == TU_TIMELINE_SYNC_STATE_RESET);
+      /* Set SUBMITTED to the state of the signal timeline sync so we could wait for
+       * this timeline sync until completed if necessary.
+       */
+      sync->state = TU_TIMELINE_SYNC_STATE_SUBMITTED;
+   }
+
+   pthread_cond_broadcast(&queue->device->timeline_cond);
+
   return VK_SUCCESS;
 }

@ -756,6 +1004,7 @@ get_abs_timeout(struct drm_msm_timespec *tv, uint64_t ns)
   tv->tv_sec = t.tv_sec + ns / 1000000000;
   tv->tv_nsec = t.tv_nsec + ns % 1000000000;
 }
+
 VkResult
 tu_device_wait_u_trace(struct tu_device *dev, struct tu_u_trace_syncobj *syncobj)
 {
@ -804,27 +1053,19 @@ tu_queue_submit(struct vk_queue *vk_queue, struct vk_queue_submit *submit)
   for (uint32_t i = 0; i < submit->wait_count; i++) {
      struct vk_sync *sync = submit->waits[i].sync;

-      if (vk_sync_type_is_drm_syncobj(sync->type)) {
-         struct vk_drm_syncobj *syncobj = vk_sync_as_drm_syncobj(sync);
-
-         in_syncobjs[nr_in_syncobjs++] = (struct drm_msm_gem_submit_syncobj) {
-            .handle = syncobj->syncobj,
-            .flags = 0,
-         };
-      }
+      in_syncobjs[nr_in_syncobjs++] = (struct drm_msm_gem_submit_syncobj) {
+         .handle = tu_syncobj_from_vk_sync(sync),
+         .flags = 0,
+      };
   }

   for (uint32_t i = 0; i < submit->signal_count; i++) {
      struct vk_sync *sync = submit->signals[i].sync;

-      if (vk_sync_type_is_drm_syncobj(sync->type)) {
-         struct vk_drm_syncobj *syncobj = vk_sync_as_drm_syncobj(sync);
-
-         out_syncobjs[nr_out_syncobjs++] = (struct drm_msm_gem_submit_syncobj) {
-            .handle = syncobj->syncobj,
-            .flags = 0,
-         };
-      }
+      out_syncobjs[nr_out_syncobjs++] = (struct drm_msm_gem_submit_syncobj) {
+         .handle = tu_syncobj_from_vk_sync(sync),
+         .flags = 0,
+      };
   }

   ret = tu_queue_submit_locked(queue, submit_req);
--- a/src/freedreno/vulkan/tu_private.h
+++ b/src/freedreno/vulkan/tu_private.h
@ -102,6 +102,7 @@ typedef uint32_t xcb_window_t;
 #include "vk_fence.h"
 #include "vk_semaphore.h"
 #include "vk_drm_syncobj.h"
+#include "vk_sync_timeline.h"

 #define MAX_VBS 32
 #define MAX_VERTEX_ATTRIBS 32
@ -232,6 +233,7 @@ struct tu_physical_device
   struct tu_memory_heap heap;

   struct vk_sync_type syncobj_type;
+   struct vk_sync_timeline_type timeline_type;
   const struct vk_sync_type *sync_types[3];
 };

@ -312,6 +314,29 @@ struct tu_syncobj;
 #endif
 struct tu_u_trace_syncobj;

+/* Define tu_timeline_sync type based on drm syncobj for a point type
+ * for vk_sync_timeline, and the logic to handle is mostly copied from
+ * anv_bo_sync since it seems it can be used by similar way to anv.
+ */
+enum tu_timeline_sync_state {
+   /** Indicates that this is a new (or newly reset fence) */
+   TU_TIMELINE_SYNC_STATE_RESET,
+
+   /** Indicates that this fence has been submitted to the GPU but is still
+    * (as far as we know) in use by the GPU.
+    */
+   TU_TIMELINE_SYNC_STATE_SUBMITTED,
+
+   TU_TIMELINE_SYNC_STATE_SIGNALED,
+};
+
+struct tu_timeline_sync {
+   struct vk_sync base;
+
+   enum tu_timeline_sync_state state;
+   uint32_t syncobj;
+};
+
 struct tu_queue
 {
   struct vk_queue vk;