freedreno/drm: Add pipe tracking for deferred submits

Now that we have some bo state tracking for userspace fences, we can
build on this to add a way for the pipe implementation to defer a submit
flush in order to merge submits into a single ioctl.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10444>
This commit is contained in:
Rob Clark 2021-04-17 11:37:06 -07:00 committed by Marge Bot
parent aafcd8aacb
commit 62a6773d80
8 changed files with 125 additions and 1 deletions

View File

@ -93,6 +93,8 @@ ForEachMacros:
- foreach_line_in_section
- perf_time
- perf_time_ctx
- foreach_submit
- foreach_submit_safe
IncludeBlocks: Preserve
IncludeCategories:

View File

@ -358,6 +358,15 @@ bo_del(struct fd_bo *bo)
bo->funcs->destroy(bo);
}
static void
bo_flush(struct fd_bo *bo)
{
for (int i = 0; i < bo->nr_fences; i++) {
struct fd_bo_fence *f = &bo->fences[i];
fd_pipe_flush(f->pipe, f->fence);
}
}
int
fd_bo_get_name(struct fd_bo *bo, uint32_t *name)
{
@ -377,6 +386,7 @@ fd_bo_get_name(struct fd_bo *bo, uint32_t *name)
simple_mtx_unlock(&table_lock);
bo->bo_reuse = NO_CACHE;
bo->shared = true;
bo_flush(bo);
}
*name = bo->name;
@ -389,6 +399,7 @@ fd_bo_handle(struct fd_bo *bo)
{
bo->bo_reuse = NO_CACHE;
bo->shared = true;
bo_flush(bo);
return bo->handle;
}
@ -405,6 +416,7 @@ fd_bo_dmabuf(struct fd_bo *bo)
bo->bo_reuse = NO_CACHE;
bo->shared = true;
bo_flush(bo);
return prime_fd;
}
@ -450,11 +462,19 @@ fd_bo_cpu_prep(struct fd_bo *bo, struct fd_pipe *pipe, uint32_t op)
case FD_BO_STATE_IDLE:
return 0;
case FD_BO_STATE_BUSY:
if (op & FD_BO_PREP_FLUSH)
bo_flush(bo);
return -EBUSY;
case FD_BO_STATE_UNKNOWN:
break;
}
}
/* In case the bo is referenced by a deferred submit, flush up to the
* required fence now:
*/
bo_flush(bo);
return bo->funcs->cpu_prep(bo, pipe, op);
}

View File

@ -85,6 +85,9 @@ out:
fd_bo_cache_init(&dev->bo_cache, false);
fd_bo_cache_init(&dev->ring_cache, true);
list_inithead(&dev->deferred_submits);
simple_mtx_init(&dev->submit_lock, mtx_plain);
return dev;
}
@ -110,6 +113,15 @@ fd_device_ref(struct fd_device *dev)
return dev;
}
void
fd_device_purge(struct fd_device *dev)
{
simple_mtx_lock(&table_lock);
fd_bo_cache_cleanup(&dev->bo_cache, 0);
fd_bo_cache_cleanup(&dev->ring_cache, 0);
simple_mtx_unlock(&table_lock);
}
static void
fd_device_del_impl(struct fd_device *dev)
{
@ -117,6 +129,8 @@ fd_device_del_impl(struct fd_device *dev)
simple_mtx_assert_locked(&table_lock);
assert(list_is_empty(&dev->deferred_submits));
fd_bo_cache_cleanup(&dev->bo_cache, 0);
fd_bo_cache_cleanup(&dev->ring_cache, 0);
_mesa_hash_table_destroy(dev->handle_table, NULL);

View File

@ -113,6 +113,7 @@ struct fd_fence {
struct fd_device *fd_device_new(int fd);
struct fd_device *fd_device_new_dup(int fd);
struct fd_device *fd_device_ref(struct fd_device *dev);
void fd_device_purge(struct fd_device *dev);
void fd_device_del(struct fd_device *dev);
int fd_device_fd(struct fd_device *dev);
@ -140,6 +141,7 @@ struct fd_pipe *fd_pipe_new2(struct fd_device *dev, enum fd_pipe_id id,
struct fd_pipe *fd_pipe_ref(struct fd_pipe *pipe);
struct fd_pipe *fd_pipe_ref_locked(struct fd_pipe *pipe);
void fd_pipe_del(struct fd_pipe *pipe);
void fd_pipe_purge(struct fd_pipe *pipe);
int fd_pipe_get_param(struct fd_pipe *pipe, enum fd_param_id param,
uint64_t *value);
int fd_pipe_wait(struct fd_pipe *pipe, const struct fd_fence *fence);

View File

@ -117,6 +117,37 @@ fd_pipe_del_locked(struct fd_pipe *pipe)
pipe->funcs->destroy(pipe);
}
/**
* Discard any unflushed deferred submits. This is called at context-
* destroy to make sure we don't leak unflushed submits.
*/
void
fd_pipe_purge(struct fd_pipe *pipe)
{
struct fd_device *dev = pipe->dev;
struct list_head deferred_submits;
list_inithead(&deferred_submits);
simple_mtx_lock(&dev->submit_lock);
foreach_submit_safe (deferred_submit, &dev->deferred_submits) {
if (deferred_submit->pipe != pipe)
continue;
list_del(&deferred_submit->node);
list_addtail(&deferred_submit->node, &deferred_submits);
dev->deferred_cmds -= fd_ringbuffer_cmd_count(deferred_submit->primary);
}
simple_mtx_unlock(&dev->submit_lock);
foreach_submit_safe (deferred_submit, &deferred_submits) {
list_del(&deferred_submit->node);
fd_submit_del(deferred_submit);
}
}
int
fd_pipe_get_param(struct fd_pipe *pipe, enum fd_param_id param, uint64_t *value)
{
@ -136,6 +167,8 @@ fd_pipe_wait_timeout(struct fd_pipe *pipe, const struct fd_fence *fence,
if (!fd_fence_after(fence->ufence, pipe->control->fence))
return 0;
fd_pipe_flush(pipe, fence->ufence);
return pipe->funcs->wait(pipe, fence, timeout);
}

View File

@ -128,8 +128,27 @@ struct fd_device {
/* just for valgrind: */
int bo_size;
/**
* List of deferred submits, protected by submit_lock. The deferred
* submits are tracked globally per-device, even if they execute in
* different order on the kernel side (ie. due to different priority
* submitqueues, etc) to preserve the order that they are passed off
* to the kernel. Once the kernel has them, it is the fences' job
* to preserve correct order of execution.
*/
struct list_head deferred_submits;
unsigned deferred_cmds;
simple_mtx_t submit_lock;
};
#define foreach_submit(name, list) \
list_for_each_entry(struct fd_submit, name, list, node)
#define foreach_submit_safe(name, list) \
list_for_each_entry_safe(struct fd_submit, name, list, node)
#define last_submit(list) \
list_last_entry(list, struct fd_submit, node)
void fd_bo_cache_init(struct fd_bo_cache *cache, int coarse);
void fd_bo_cache_cleanup(struct fd_bo_cache *cache, time_t time);
struct fd_bo *fd_bo_cache_alloc(struct fd_bo_cache *cache, uint32_t *size,
@ -145,6 +164,13 @@ struct fd_pipe_funcs {
struct fd_ringbuffer *(*ringbuffer_new_object)(struct fd_pipe *pipe,
uint32_t size);
struct fd_submit *(*submit_new)(struct fd_pipe *pipe);
/**
* Flush any deferred submits (if deferred submits are supported by
* the pipe implementation)
*/
void (*flush)(struct fd_pipe *pipe, uint32_t fence);
int (*get_param)(struct fd_pipe *pipe, enum fd_param_id param,
uint64_t *value);
int (*wait)(struct fd_pipe *pipe, const struct fd_fence *fence,
@ -177,6 +203,7 @@ struct fd_pipe {
* play)
*/
uint32_t last_fence;
struct fd_bo *control_mem;
volatile struct fd_pipe_control *control;
@ -185,6 +212,14 @@ struct fd_pipe {
uint32_t fd_pipe_emit_fence(struct fd_pipe *pipe, struct fd_ringbuffer *ring);
static inline void
fd_pipe_flush(struct fd_pipe *pipe, uint32_t fence)
{
if (!pipe->funcs->flush)
return;
pipe->funcs->flush(pipe, fence);
}
struct fd_submit_funcs {
struct fd_ringbuffer *(*new_ringbuffer)(struct fd_submit *submit,
uint32_t size,
@ -201,8 +236,23 @@ struct fd_submit {
struct fd_ringbuffer *primary;
uint32_t fence;
struct list_head node; /* node in fd_pipe::deferred_submits */
};
static inline unsigned
fd_dev_count_deferred_cmds(struct fd_device *dev)
{
unsigned nr = 0;
simple_mtx_assert_locked(&dev->submit_lock);
list_for_each_entry (struct fd_submit, submit, &dev->deferred_submits, node) {
nr += fd_ringbuffer_cmd_count(submit->primary);
}
return nr;
}
struct fd_bo_funcs {
int (*offset)(struct fd_bo *bo, uint64_t *offset);
int (*cpu_prep)(struct fd_bo *bo, struct fd_pipe *pipe, uint32_t op);

View File

@ -377,6 +377,7 @@ fd_context_destroy(struct pipe_context *pctx)
}
fd_device_del(ctx->dev);
fd_pipe_purge(ctx->pipe);
fd_pipe_del(ctx->pipe);
simple_mtx_destroy(&ctx->gmem_lock);

View File

@ -147,8 +147,10 @@ fd_screen_destroy(struct pipe_screen *pscreen)
if (screen->pipe)
fd_pipe_del(screen->pipe);
if (screen->dev)
if (screen->dev) {
fd_device_purge(screen->dev);
fd_device_del(screen->dev);
}
if (screen->ro)
screen->ro->destroy(screen->ro);