freedreno/drm: Add pipe tracking for deferred submits

Now that we have some bo state tracking for userspace fences, we can build on this to add a way for the pipe implementation to defer a submit flush in order to merge submits into a single ioctl. Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10444>
2021-04-17 11:37:06 -07:00 · 2021-04-17 11:37:06 -07:00 · 62a6773d80
parent aafcd8aacb
commit 62a6773d80
8 changed files with 125 additions and 1 deletions
--- a/src/freedreno/.clang-format
+++ b/src/freedreno/.clang-format
@ -93,6 +93,8 @@ ForEachMacros:
  - foreach_line_in_section
  - perf_time
  - perf_time_ctx
+  - foreach_submit
+  - foreach_submit_safe

 IncludeBlocks: Preserve
 IncludeCategories:
--- a/src/freedreno/drm/freedreno_bo.c
+++ b/src/freedreno/drm/freedreno_bo.c
@ -358,6 +358,15 @@ bo_del(struct fd_bo *bo)
   bo->funcs->destroy(bo);
 }

+static void
+bo_flush(struct fd_bo *bo)
+{
+   for (int i = 0; i < bo->nr_fences; i++) {
+      struct fd_bo_fence *f = &bo->fences[i];
+      fd_pipe_flush(f->pipe, f->fence);
+   }
+}
+
 int
 fd_bo_get_name(struct fd_bo *bo, uint32_t *name)
 {
@ -377,6 +386,7 @@ fd_bo_get_name(struct fd_bo *bo, uint32_t *name)
      simple_mtx_unlock(&table_lock);
      bo->bo_reuse = NO_CACHE;
      bo->shared = true;
+      bo_flush(bo);
   }

   *name = bo->name;
@ -389,6 +399,7 @@ fd_bo_handle(struct fd_bo *bo)
 {
   bo->bo_reuse = NO_CACHE;
   bo->shared = true;
+   bo_flush(bo);
   return bo->handle;
 }

@ -405,6 +416,7 @@ fd_bo_dmabuf(struct fd_bo *bo)

   bo->bo_reuse = NO_CACHE;
   bo->shared = true;
+   bo_flush(bo);

   return prime_fd;
 }
@ -450,11 +462,19 @@ fd_bo_cpu_prep(struct fd_bo *bo, struct fd_pipe *pipe, uint32_t op)
      case FD_BO_STATE_IDLE:
         return 0;
      case FD_BO_STATE_BUSY:
+         if (op & FD_BO_PREP_FLUSH)
+            bo_flush(bo);
         return -EBUSY;
      case FD_BO_STATE_UNKNOWN:
         break;
      }
   }
+
+   /* In case the bo is referenced by a deferred submit, flush up to the
+    * required fence now:
+    */
+   bo_flush(bo);
+
   return bo->funcs->cpu_prep(bo, pipe, op);
 }

--- a/src/freedreno/drm/freedreno_device.c
+++ b/src/freedreno/drm/freedreno_device.c
@ -85,6 +85,9 @@ out:
   fd_bo_cache_init(&dev->bo_cache, false);
   fd_bo_cache_init(&dev->ring_cache, true);

+   list_inithead(&dev->deferred_submits);
+   simple_mtx_init(&dev->submit_lock, mtx_plain);
+
   return dev;
 }

@ -110,6 +113,15 @@ fd_device_ref(struct fd_device *dev)
   return dev;
 }

+void
+fd_device_purge(struct fd_device *dev)
+{
+   simple_mtx_lock(&table_lock);
+   fd_bo_cache_cleanup(&dev->bo_cache, 0);
+   fd_bo_cache_cleanup(&dev->ring_cache, 0);
+   simple_mtx_unlock(&table_lock);
+}
+
 static void
 fd_device_del_impl(struct fd_device *dev)
 {
@ -117,6 +129,8 @@ fd_device_del_impl(struct fd_device *dev)

   simple_mtx_assert_locked(&table_lock);

+   assert(list_is_empty(&dev->deferred_submits));
+
   fd_bo_cache_cleanup(&dev->bo_cache, 0);
   fd_bo_cache_cleanup(&dev->ring_cache, 0);
   _mesa_hash_table_destroy(dev->handle_table, NULL);
--- a/src/freedreno/drm/freedreno_drmif.h
+++ b/src/freedreno/drm/freedreno_drmif.h
@ -113,6 +113,7 @@ struct fd_fence {
 struct fd_device *fd_device_new(int fd);
 struct fd_device *fd_device_new_dup(int fd);
 struct fd_device *fd_device_ref(struct fd_device *dev);
+void fd_device_purge(struct fd_device *dev);
 void fd_device_del(struct fd_device *dev);
 int fd_device_fd(struct fd_device *dev);

@ -140,6 +141,7 @@ struct fd_pipe *fd_pipe_new2(struct fd_device *dev, enum fd_pipe_id id,
 struct fd_pipe *fd_pipe_ref(struct fd_pipe *pipe);
 struct fd_pipe *fd_pipe_ref_locked(struct fd_pipe *pipe);
 void fd_pipe_del(struct fd_pipe *pipe);
+void fd_pipe_purge(struct fd_pipe *pipe);
 int fd_pipe_get_param(struct fd_pipe *pipe, enum fd_param_id param,
                      uint64_t *value);
 int fd_pipe_wait(struct fd_pipe *pipe, const struct fd_fence *fence);
--- a/src/freedreno/drm/freedreno_pipe.c
+++ b/src/freedreno/drm/freedreno_pipe.c
@ -117,6 +117,37 @@ fd_pipe_del_locked(struct fd_pipe *pipe)
   pipe->funcs->destroy(pipe);
 }

+/**
+ * Discard any unflushed deferred submits.  This is called at context-
+ * destroy to make sure we don't leak unflushed submits.
+ */
+void
+fd_pipe_purge(struct fd_pipe *pipe)
+{
+   struct fd_device *dev = pipe->dev;
+   struct list_head deferred_submits;
+
+   list_inithead(&deferred_submits);
+
+   simple_mtx_lock(&dev->submit_lock);
+
+   foreach_submit_safe (deferred_submit, &dev->deferred_submits) {
+      if (deferred_submit->pipe != pipe)
+         continue;
+
+      list_del(&deferred_submit->node);
+      list_addtail(&deferred_submit->node, &deferred_submits);
+      dev->deferred_cmds -= fd_ringbuffer_cmd_count(deferred_submit->primary);
+   }
+
+   simple_mtx_unlock(&dev->submit_lock);
+
+   foreach_submit_safe (deferred_submit, &deferred_submits) {
+      list_del(&deferred_submit->node);
+      fd_submit_del(deferred_submit);
+   }
+}
+
 int
 fd_pipe_get_param(struct fd_pipe *pipe, enum fd_param_id param, uint64_t *value)
 {
@ -136,6 +167,8 @@ fd_pipe_wait_timeout(struct fd_pipe *pipe, const struct fd_fence *fence,
   if (!fd_fence_after(fence->ufence, pipe->control->fence))
      return 0;

+   fd_pipe_flush(pipe, fence->ufence);
+
   return pipe->funcs->wait(pipe, fence, timeout);
 }

--- a/src/freedreno/drm/freedreno_priv.h
+++ b/src/freedreno/drm/freedreno_priv.h
@ -128,8 +128,27 @@ struct fd_device {

   /* just for valgrind: */
   int bo_size;
+
+   /**
+    * List of deferred submits, protected by submit_lock.  The deferred
+    * submits are tracked globally per-device, even if they execute in
+    * different order on the kernel side (ie. due to different priority
+    * submitqueues, etc) to preserve the order that they are passed off
+    * to the kernel.  Once the kernel has them, it is the fences' job
+    * to preserve correct order of execution.
+    */
+   struct list_head deferred_submits;
+   unsigned deferred_cmds;
+   simple_mtx_t submit_lock;
 };

+#define foreach_submit(name, list) \
+   list_for_each_entry(struct fd_submit, name, list, node)
+#define foreach_submit_safe(name, list) \
+   list_for_each_entry_safe(struct fd_submit, name, list, node)
+#define last_submit(list) \
+   list_last_entry(list, struct fd_submit, node)
+
 void fd_bo_cache_init(struct fd_bo_cache *cache, int coarse);
 void fd_bo_cache_cleanup(struct fd_bo_cache *cache, time_t time);
 struct fd_bo *fd_bo_cache_alloc(struct fd_bo_cache *cache, uint32_t *size,
@ -145,6 +164,13 @@ struct fd_pipe_funcs {
   struct fd_ringbuffer *(*ringbuffer_new_object)(struct fd_pipe *pipe,
                                                  uint32_t size);
   struct fd_submit *(*submit_new)(struct fd_pipe *pipe);
+
+   /**
+    * Flush any deferred submits (if deferred submits are supported by
+    * the pipe implementation)
+    */
+   void (*flush)(struct fd_pipe *pipe, uint32_t fence);
+
   int (*get_param)(struct fd_pipe *pipe, enum fd_param_id param,
                    uint64_t *value);
   int (*wait)(struct fd_pipe *pipe, const struct fd_fence *fence,
@ -177,6 +203,7 @@ struct fd_pipe {
    * play)
    */
   uint32_t last_fence;
+
   struct fd_bo *control_mem;
   volatile struct fd_pipe_control *control;

@ -185,6 +212,14 @@ struct fd_pipe {

 uint32_t fd_pipe_emit_fence(struct fd_pipe *pipe, struct fd_ringbuffer *ring);

+static inline void
+fd_pipe_flush(struct fd_pipe *pipe, uint32_t fence)
+{
+   if (!pipe->funcs->flush)
+      return;
+   pipe->funcs->flush(pipe, fence);
+}
+
 struct fd_submit_funcs {
   struct fd_ringbuffer *(*new_ringbuffer)(struct fd_submit *submit,
                                           uint32_t size,
@ -201,8 +236,23 @@ struct fd_submit {

   struct fd_ringbuffer *primary;
   uint32_t fence;
+   struct list_head node;  /* node in fd_pipe::deferred_submits */
 };

+static inline unsigned
+fd_dev_count_deferred_cmds(struct fd_device *dev)
+{
+   unsigned nr = 0;
+
+   simple_mtx_assert_locked(&dev->submit_lock);
+
+   list_for_each_entry (struct fd_submit, submit, &dev->deferred_submits, node) {
+      nr += fd_ringbuffer_cmd_count(submit->primary);
+   }
+
+   return nr;
+}
+
 struct fd_bo_funcs {
   int (*offset)(struct fd_bo *bo, uint64_t *offset);
   int (*cpu_prep)(struct fd_bo *bo, struct fd_pipe *pipe, uint32_t op);
--- a/src/gallium/drivers/freedreno/freedreno_context.c
+++ b/src/gallium/drivers/freedreno/freedreno_context.c
@ -377,6 +377,7 @@ fd_context_destroy(struct pipe_context *pctx)
   }

   fd_device_del(ctx->dev);
+   fd_pipe_purge(ctx->pipe);
   fd_pipe_del(ctx->pipe);

   simple_mtx_destroy(&ctx->gmem_lock);
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@ -147,8 +147,10 @@ fd_screen_destroy(struct pipe_screen *pscreen)
   if (screen->pipe)
      fd_pipe_del(screen->pipe);

-   if (screen->dev)
+   if (screen->dev) {
+      fd_device_purge(screen->dev);
      fd_device_del(screen->dev);
+   }

   if (screen->ro)
      screen->ro->destroy(screen->ro);