freedreno/drm: Extract out "softpin" submit/ringbuffer base class
We are going to want basically the identical thing, other than flush_submit_list, for virtio backend. Now that we've moved various other dependencies into the base classes, extract out an abstract base class for submit/ringbuffer. Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14900>
This commit is contained in:
parent
72a427244f
commit
ea339137b0
|
@ -0,0 +1,651 @@
|
|||
/*
|
||||
* Copyright (C) 2018 Rob Clark <robclark@freedesktop.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Rob Clark <robclark@freedesktop.org>
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <inttypes.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#include "util/hash_table.h"
|
||||
#include "util/os_file.h"
|
||||
#include "util/slab.h"
|
||||
|
||||
#include "freedreno_ringbuffer_sp.h"
|
||||
|
||||
/* A "softpin" implementation of submit/ringbuffer, which lowers CPU overhead
|
||||
* by avoiding the additional tracking necessary to build cmds/relocs tables
|
||||
* (but still builds a bos table)
|
||||
*/
|
||||
|
||||
#define INIT_SIZE 0x1000
|
||||
|
||||
#define SUBALLOC_SIZE (32 * 1024)
|
||||
|
||||
/* In the pipe->flush() path, we don't have a util_queue_fence we can wait on,
|
||||
* instead use a condition-variable. Note that pipe->flush() is not expected
|
||||
* to be a common/hot path.
|
||||
*/
|
||||
static pthread_cond_t flush_cnd = PTHREAD_COND_INITIALIZER;
|
||||
static pthread_mutex_t flush_mtx = PTHREAD_MUTEX_INITIALIZER;
|
||||
|
||||
static void finalize_current_cmd(struct fd_ringbuffer *ring);
|
||||
static struct fd_ringbuffer *
|
||||
fd_ringbuffer_sp_init(struct fd_ringbuffer_sp *fd_ring, uint32_t size,
|
||||
enum fd_ringbuffer_flags flags);
|
||||
|
||||
/* add (if needed) bo to submit and return index: */
|
||||
uint32_t
|
||||
fd_submit_append_bo(struct fd_submit_sp *submit, struct fd_bo *bo)
|
||||
{
|
||||
uint32_t idx;
|
||||
|
||||
/* NOTE: it is legal to use the same bo on different threads for
|
||||
* different submits. But it is not legal to use the same submit
|
||||
* from different threads.
|
||||
*/
|
||||
idx = READ_ONCE(bo->idx);
|
||||
|
||||
if (unlikely((idx >= submit->nr_bos) || (submit->bos[idx] != bo))) {
|
||||
uint32_t hash = _mesa_hash_pointer(bo);
|
||||
struct hash_entry *entry;
|
||||
|
||||
entry = _mesa_hash_table_search_pre_hashed(submit->bo_table, hash, bo);
|
||||
if (entry) {
|
||||
/* found */
|
||||
idx = (uint32_t)(uintptr_t)entry->data;
|
||||
} else {
|
||||
idx = APPEND(submit, bos, fd_bo_ref(bo));
|
||||
|
||||
_mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo,
|
||||
(void *)(uintptr_t)idx);
|
||||
}
|
||||
bo->idx = idx;
|
||||
}
|
||||
|
||||
return idx;
|
||||
}
|
||||
|
||||
static void
|
||||
fd_submit_suballoc_ring_bo(struct fd_submit *submit,
|
||||
struct fd_ringbuffer_sp *fd_ring, uint32_t size)
|
||||
{
|
||||
struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
|
||||
unsigned suballoc_offset = 0;
|
||||
struct fd_bo *suballoc_bo = NULL;
|
||||
|
||||
if (fd_submit->suballoc_ring) {
|
||||
struct fd_ringbuffer_sp *suballoc_ring =
|
||||
to_fd_ringbuffer_sp(fd_submit->suballoc_ring);
|
||||
|
||||
suballoc_bo = suballoc_ring->ring_bo;
|
||||
suballoc_offset =
|
||||
fd_ringbuffer_size(fd_submit->suballoc_ring) + suballoc_ring->offset;
|
||||
|
||||
suballoc_offset = align(suballoc_offset, 0x10);
|
||||
|
||||
if ((size + suballoc_offset) > suballoc_bo->size) {
|
||||
suballoc_bo = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if (!suballoc_bo) {
|
||||
// TODO possibly larger size for streaming bo?
|
||||
fd_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, SUBALLOC_SIZE);
|
||||
fd_ring->offset = 0;
|
||||
} else {
|
||||
fd_ring->ring_bo = fd_bo_ref(suballoc_bo);
|
||||
fd_ring->offset = suballoc_offset;
|
||||
}
|
||||
|
||||
struct fd_ringbuffer *old_suballoc_ring = fd_submit->suballoc_ring;
|
||||
|
||||
fd_submit->suballoc_ring = fd_ringbuffer_ref(&fd_ring->base);
|
||||
|
||||
if (old_suballoc_ring)
|
||||
fd_ringbuffer_del(old_suballoc_ring);
|
||||
}
|
||||
|
||||
static struct fd_ringbuffer *
|
||||
fd_submit_sp_new_ringbuffer(struct fd_submit *submit, uint32_t size,
|
||||
enum fd_ringbuffer_flags flags)
|
||||
{
|
||||
struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
|
||||
struct fd_ringbuffer_sp *fd_ring;
|
||||
|
||||
fd_ring = slab_alloc(&fd_submit->ring_pool);
|
||||
|
||||
fd_ring->u.submit = submit;
|
||||
|
||||
/* NOTE: needs to be before _suballoc_ring_bo() since it could
|
||||
* increment the refcnt of the current ring
|
||||
*/
|
||||
fd_ring->base.refcnt = 1;
|
||||
|
||||
if (flags & FD_RINGBUFFER_STREAMING) {
|
||||
fd_submit_suballoc_ring_bo(submit, fd_ring, size);
|
||||
} else {
|
||||
if (flags & FD_RINGBUFFER_GROWABLE)
|
||||
size = INIT_SIZE;
|
||||
|
||||
fd_ring->offset = 0;
|
||||
fd_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size);
|
||||
}
|
||||
|
||||
if (!fd_ringbuffer_sp_init(fd_ring, size, flags))
|
||||
return NULL;
|
||||
|
||||
return &fd_ring->base;
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare submit for flush, always done synchronously.
|
||||
*
|
||||
* 1) Finalize primary ringbuffer, at this point no more cmdstream may
|
||||
* be written into it, since from the PoV of the upper level driver
|
||||
* the submit is flushed, even if deferred
|
||||
* 2) Add cmdstream bos to bos table
|
||||
* 3) Update bo fences
|
||||
*/
|
||||
static bool
|
||||
fd_submit_sp_flush_prep(struct fd_submit *submit, int in_fence_fd,
|
||||
struct fd_submit_fence *out_fence)
|
||||
{
|
||||
struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
|
||||
bool has_shared = false;
|
||||
|
||||
finalize_current_cmd(submit->primary);
|
||||
|
||||
struct fd_ringbuffer_sp *primary =
|
||||
to_fd_ringbuffer_sp(submit->primary);
|
||||
|
||||
for (unsigned i = 0; i < primary->u.nr_cmds; i++)
|
||||
fd_submit_append_bo(fd_submit, primary->u.cmds[i].ring_bo);
|
||||
|
||||
simple_mtx_lock(&table_lock);
|
||||
for (unsigned i = 0; i < fd_submit->nr_bos; i++) {
|
||||
fd_bo_add_fence(fd_submit->bos[i], submit->pipe, submit->fence);
|
||||
has_shared |= fd_submit->bos[i]->shared;
|
||||
}
|
||||
simple_mtx_unlock(&table_lock);
|
||||
|
||||
fd_submit->out_fence = out_fence;
|
||||
fd_submit->in_fence_fd = (in_fence_fd == -1) ?
|
||||
-1 : os_dupfd_cloexec(in_fence_fd);
|
||||
|
||||
return has_shared;
|
||||
}
|
||||
|
||||
static void
|
||||
fd_submit_sp_flush_execute(void *job, void *gdata, int thread_index)
|
||||
{
|
||||
struct fd_submit *submit = job;
|
||||
struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
|
||||
struct fd_pipe *pipe = submit->pipe;
|
||||
|
||||
fd_submit->flush_submit_list(&fd_submit->submit_list);
|
||||
|
||||
pthread_mutex_lock(&flush_mtx);
|
||||
assert(fd_fence_before(pipe->last_submit_fence, fd_submit->base.fence));
|
||||
pipe->last_submit_fence = fd_submit->base.fence;
|
||||
pthread_cond_broadcast(&flush_cnd);
|
||||
pthread_mutex_unlock(&flush_mtx);
|
||||
|
||||
DEBUG_MSG("finish: %u", submit->fence);
|
||||
}
|
||||
|
||||
static void
|
||||
fd_submit_sp_flush_cleanup(void *job, void *gdata, int thread_index)
|
||||
{
|
||||
struct fd_submit *submit = job;
|
||||
fd_submit_del(submit);
|
||||
}
|
||||
|
||||
static int
|
||||
enqueue_submit_list(struct list_head *submit_list)
|
||||
{
|
||||
struct fd_submit *submit = last_submit(submit_list);
|
||||
struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
|
||||
|
||||
list_replace(submit_list, &fd_submit->submit_list);
|
||||
list_inithead(submit_list);
|
||||
|
||||
struct util_queue_fence *fence;
|
||||
if (fd_submit->out_fence) {
|
||||
fence = &fd_submit->out_fence->ready;
|
||||
} else {
|
||||
util_queue_fence_init(&fd_submit->fence);
|
||||
fence = &fd_submit->fence;
|
||||
}
|
||||
|
||||
DEBUG_MSG("enqueue: %u", submit->fence);
|
||||
|
||||
util_queue_add_job(&submit->pipe->dev->submit_queue,
|
||||
submit, fence,
|
||||
fd_submit_sp_flush_execute,
|
||||
fd_submit_sp_flush_cleanup,
|
||||
0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool
|
||||
should_defer(struct fd_submit *submit)
|
||||
{
|
||||
struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
|
||||
|
||||
/* if too many bo's, it may not be worth the CPU cost of submit merging: */
|
||||
if (fd_submit->nr_bos > 30)
|
||||
return false;
|
||||
|
||||
/* On the kernel side, with 32K ringbuffer, we have an upper limit of 2k
|
||||
* cmds before we exceed the size of the ringbuffer, which results in
|
||||
* deadlock writing into the RB (ie. kernel doesn't finish writing into
|
||||
* the RB so it doesn't kick the GPU to start consuming from the RB)
|
||||
*/
|
||||
if (submit->pipe->dev->deferred_cmds > 128)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int
|
||||
fd_submit_sp_flush(struct fd_submit *submit, int in_fence_fd,
|
||||
struct fd_submit_fence *out_fence)
|
||||
{
|
||||
struct fd_device *dev = submit->pipe->dev;
|
||||
struct fd_pipe *pipe = submit->pipe;
|
||||
|
||||
/* Acquire lock before flush_prep() because it is possible to race between
|
||||
* this and pipe->flush():
|
||||
*/
|
||||
simple_mtx_lock(&dev->submit_lock);
|
||||
|
||||
/* If there are deferred submits from another fd_pipe, flush them now,
|
||||
* since we can't merge submits from different submitqueue's (ie. they
|
||||
* could have different priority, etc)
|
||||
*/
|
||||
if (!list_is_empty(&dev->deferred_submits) &&
|
||||
(last_submit(&dev->deferred_submits)->pipe != submit->pipe)) {
|
||||
struct list_head submit_list;
|
||||
|
||||
list_replace(&dev->deferred_submits, &submit_list);
|
||||
list_inithead(&dev->deferred_submits);
|
||||
dev->deferred_cmds = 0;
|
||||
|
||||
enqueue_submit_list(&submit_list);
|
||||
}
|
||||
|
||||
list_addtail(&fd_submit_ref(submit)->node, &dev->deferred_submits);
|
||||
|
||||
bool has_shared = fd_submit_sp_flush_prep(submit, in_fence_fd, out_fence);
|
||||
|
||||
assert(fd_fence_before(pipe->last_enqueue_fence, submit->fence));
|
||||
pipe->last_enqueue_fence = submit->fence;
|
||||
|
||||
/* If we don't need an out-fence, we can defer the submit.
|
||||
*
|
||||
* TODO we could defer submits with in-fence as well.. if we took our own
|
||||
* reference to the fd, and merged all the in-fence-fd's when we flush the
|
||||
* deferred submits
|
||||
*/
|
||||
if ((in_fence_fd == -1) && !out_fence && !has_shared && should_defer(submit)) {
|
||||
DEBUG_MSG("defer: %u", submit->fence);
|
||||
dev->deferred_cmds += fd_ringbuffer_cmd_count(submit->primary);
|
||||
assert(dev->deferred_cmds == fd_dev_count_deferred_cmds(dev));
|
||||
simple_mtx_unlock(&dev->submit_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct list_head submit_list;
|
||||
|
||||
list_replace(&dev->deferred_submits, &submit_list);
|
||||
list_inithead(&dev->deferred_submits);
|
||||
dev->deferred_cmds = 0;
|
||||
|
||||
simple_mtx_unlock(&dev->submit_lock);
|
||||
|
||||
return enqueue_submit_list(&submit_list);
|
||||
}
|
||||
|
||||
void
|
||||
fd_pipe_sp_flush(struct fd_pipe *pipe, uint32_t fence)
|
||||
{
|
||||
struct fd_device *dev = pipe->dev;
|
||||
struct list_head submit_list;
|
||||
|
||||
DEBUG_MSG("flush: %u", fence);
|
||||
|
||||
list_inithead(&submit_list);
|
||||
|
||||
simple_mtx_lock(&dev->submit_lock);
|
||||
|
||||
assert(!fd_fence_after(fence, pipe->last_enqueue_fence));
|
||||
|
||||
foreach_submit_safe (deferred_submit, &dev->deferred_submits) {
|
||||
/* We should never have submits from multiple pipes in the deferred
|
||||
* list. If we did, we couldn't compare their fence to our fence,
|
||||
* since each fd_pipe is an independent timeline.
|
||||
*/
|
||||
if (deferred_submit->pipe != pipe)
|
||||
break;
|
||||
|
||||
if (fd_fence_after(deferred_submit->fence, fence))
|
||||
break;
|
||||
|
||||
list_del(&deferred_submit->node);
|
||||
list_addtail(&deferred_submit->node, &submit_list);
|
||||
dev->deferred_cmds -= fd_ringbuffer_cmd_count(deferred_submit->primary);
|
||||
}
|
||||
|
||||
assert(dev->deferred_cmds == fd_dev_count_deferred_cmds(dev));
|
||||
|
||||
simple_mtx_unlock(&dev->submit_lock);
|
||||
|
||||
if (list_is_empty(&submit_list))
|
||||
goto flush_sync;
|
||||
|
||||
enqueue_submit_list(&submit_list);
|
||||
|
||||
flush_sync:
|
||||
/* Once we are sure that we've enqueued at least up to the requested
|
||||
* submit, we need to be sure that submitq has caught up and flushed
|
||||
* them to the kernel
|
||||
*/
|
||||
pthread_mutex_lock(&flush_mtx);
|
||||
while (fd_fence_before(pipe->last_submit_fence, fence)) {
|
||||
pthread_cond_wait(&flush_cnd, &flush_mtx);
|
||||
}
|
||||
pthread_mutex_unlock(&flush_mtx);
|
||||
}
|
||||
|
||||
static void
|
||||
fd_submit_sp_destroy(struct fd_submit *submit)
|
||||
{
|
||||
struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
|
||||
|
||||
if (fd_submit->suballoc_ring)
|
||||
fd_ringbuffer_del(fd_submit->suballoc_ring);
|
||||
|
||||
_mesa_hash_table_destroy(fd_submit->bo_table, NULL);
|
||||
|
||||
// TODO it would be nice to have a way to debug_assert() if all
|
||||
// rb's haven't been free'd back to the slab, because that is
|
||||
// an indication that we are leaking bo's
|
||||
slab_destroy_child(&fd_submit->ring_pool);
|
||||
|
||||
for (unsigned i = 0; i < fd_submit->nr_bos; i++)
|
||||
fd_bo_del(fd_submit->bos[i]);
|
||||
|
||||
free(fd_submit->bos);
|
||||
free(fd_submit);
|
||||
}
|
||||
|
||||
static const struct fd_submit_funcs submit_funcs = {
|
||||
.new_ringbuffer = fd_submit_sp_new_ringbuffer,
|
||||
.flush = fd_submit_sp_flush,
|
||||
.destroy = fd_submit_sp_destroy,
|
||||
};
|
||||
|
||||
struct fd_submit *
|
||||
fd_submit_sp_new(struct fd_pipe *pipe, flush_submit_list_fn flush_submit_list)
|
||||
{
|
||||
struct fd_submit_sp *fd_submit = calloc(1, sizeof(*fd_submit));
|
||||
struct fd_submit *submit;
|
||||
|
||||
fd_submit->bo_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
|
||||
_mesa_key_pointer_equal);
|
||||
|
||||
slab_create_child(&fd_submit->ring_pool, &pipe->ring_pool);
|
||||
|
||||
fd_submit->flush_submit_list = flush_submit_list;
|
||||
|
||||
submit = &fd_submit->base;
|
||||
submit->funcs = &submit_funcs;
|
||||
|
||||
return submit;
|
||||
}
|
||||
|
||||
void
|
||||
fd_pipe_sp_ringpool_init(struct fd_pipe *pipe)
|
||||
{
|
||||
// TODO tune size:
|
||||
slab_create_parent(&pipe->ring_pool, sizeof(struct fd_ringbuffer_sp), 16);
|
||||
}
|
||||
|
||||
void
|
||||
fd_pipe_sp_ringpool_fini(struct fd_pipe *pipe)
|
||||
{
|
||||
if (pipe->ring_pool.num_elements)
|
||||
slab_destroy_parent(&pipe->ring_pool);
|
||||
}
|
||||
|
||||
static void
|
||||
finalize_current_cmd(struct fd_ringbuffer *ring)
|
||||
{
|
||||
debug_assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
|
||||
|
||||
struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
|
||||
APPEND(&fd_ring->u, cmds,
|
||||
(struct fd_cmd_sp){
|
||||
.ring_bo = fd_bo_ref(fd_ring->ring_bo),
|
||||
.size = offset_bytes(ring->cur, ring->start),
|
||||
});
|
||||
}
|
||||
|
||||
static void
|
||||
fd_ringbuffer_sp_grow(struct fd_ringbuffer *ring, uint32_t size)
|
||||
{
|
||||
struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
|
||||
struct fd_pipe *pipe = fd_ring->u.submit->pipe;
|
||||
|
||||
debug_assert(ring->flags & FD_RINGBUFFER_GROWABLE);
|
||||
|
||||
finalize_current_cmd(ring);
|
||||
|
||||
fd_bo_del(fd_ring->ring_bo);
|
||||
fd_ring->ring_bo = fd_bo_new_ring(pipe->dev, size);
|
||||
|
||||
ring->start = fd_bo_map(fd_ring->ring_bo);
|
||||
ring->end = &(ring->start[size / 4]);
|
||||
ring->cur = ring->start;
|
||||
ring->size = size;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
fd_ringbuffer_references_bo(struct fd_ringbuffer *ring, struct fd_bo *bo)
|
||||
{
|
||||
struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
|
||||
|
||||
for (int i = 0; i < fd_ring->u.nr_reloc_bos; i++) {
|
||||
if (fd_ring->u.reloc_bos[i] == bo)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
#define PTRSZ 64
|
||||
#include "freedreno_ringbuffer_sp_reloc.h"
|
||||
#undef PTRSZ
|
||||
#define PTRSZ 32
|
||||
#include "freedreno_ringbuffer_sp_reloc.h"
|
||||
#undef PTRSZ
|
||||
|
||||
static uint32_t
|
||||
fd_ringbuffer_sp_cmd_count(struct fd_ringbuffer *ring)
|
||||
{
|
||||
if (ring->flags & FD_RINGBUFFER_GROWABLE)
|
||||
return to_fd_ringbuffer_sp(ring)->u.nr_cmds + 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static bool
|
||||
fd_ringbuffer_sp_check_size(struct fd_ringbuffer *ring)
|
||||
{
|
||||
assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
|
||||
struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
|
||||
struct fd_submit *submit = fd_ring->u.submit;
|
||||
|
||||
if (to_fd_submit_sp(submit)->nr_bos > MAX_ARRAY_SIZE/2) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
fd_ringbuffer_sp_destroy(struct fd_ringbuffer *ring)
|
||||
{
|
||||
struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
|
||||
|
||||
fd_bo_del(fd_ring->ring_bo);
|
||||
|
||||
if (ring->flags & _FD_RINGBUFFER_OBJECT) {
|
||||
for (unsigned i = 0; i < fd_ring->u.nr_reloc_bos; i++) {
|
||||
fd_bo_del(fd_ring->u.reloc_bos[i]);
|
||||
}
|
||||
free(fd_ring->u.reloc_bos);
|
||||
|
||||
free(fd_ring);
|
||||
} else {
|
||||
struct fd_submit *submit = fd_ring->u.submit;
|
||||
|
||||
for (unsigned i = 0; i < fd_ring->u.nr_cmds; i++) {
|
||||
fd_bo_del(fd_ring->u.cmds[i].ring_bo);
|
||||
}
|
||||
free(fd_ring->u.cmds);
|
||||
|
||||
slab_free(&to_fd_submit_sp(submit)->ring_pool, fd_ring);
|
||||
}
|
||||
}
|
||||
|
||||
static const struct fd_ringbuffer_funcs ring_funcs_nonobj_32 = {
|
||||
.grow = fd_ringbuffer_sp_grow,
|
||||
.emit_reloc = fd_ringbuffer_sp_emit_reloc_nonobj_32,
|
||||
.emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_32,
|
||||
.cmd_count = fd_ringbuffer_sp_cmd_count,
|
||||
.check_size = fd_ringbuffer_sp_check_size,
|
||||
.destroy = fd_ringbuffer_sp_destroy,
|
||||
};
|
||||
|
||||
static const struct fd_ringbuffer_funcs ring_funcs_obj_32 = {
|
||||
.grow = fd_ringbuffer_sp_grow,
|
||||
.emit_reloc = fd_ringbuffer_sp_emit_reloc_obj_32,
|
||||
.emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_32,
|
||||
.cmd_count = fd_ringbuffer_sp_cmd_count,
|
||||
.destroy = fd_ringbuffer_sp_destroy,
|
||||
};
|
||||
|
||||
static const struct fd_ringbuffer_funcs ring_funcs_nonobj_64 = {
|
||||
.grow = fd_ringbuffer_sp_grow,
|
||||
.emit_reloc = fd_ringbuffer_sp_emit_reloc_nonobj_64,
|
||||
.emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_64,
|
||||
.cmd_count = fd_ringbuffer_sp_cmd_count,
|
||||
.check_size = fd_ringbuffer_sp_check_size,
|
||||
.destroy = fd_ringbuffer_sp_destroy,
|
||||
};
|
||||
|
||||
static const struct fd_ringbuffer_funcs ring_funcs_obj_64 = {
|
||||
.grow = fd_ringbuffer_sp_grow,
|
||||
.emit_reloc = fd_ringbuffer_sp_emit_reloc_obj_64,
|
||||
.emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_64,
|
||||
.cmd_count = fd_ringbuffer_sp_cmd_count,
|
||||
.destroy = fd_ringbuffer_sp_destroy,
|
||||
};
|
||||
|
||||
static inline struct fd_ringbuffer *
|
||||
fd_ringbuffer_sp_init(struct fd_ringbuffer_sp *fd_ring, uint32_t size,
|
||||
enum fd_ringbuffer_flags flags)
|
||||
{
|
||||
struct fd_ringbuffer *ring = &fd_ring->base;
|
||||
|
||||
debug_assert(fd_ring->ring_bo);
|
||||
|
||||
uint8_t *base = fd_bo_map(fd_ring->ring_bo);
|
||||
ring->start = (void *)(base + fd_ring->offset);
|
||||
ring->end = &(ring->start[size / 4]);
|
||||
ring->cur = ring->start;
|
||||
|
||||
ring->size = size;
|
||||
ring->flags = flags;
|
||||
|
||||
if (flags & _FD_RINGBUFFER_OBJECT) {
|
||||
if (fd_dev_64b(&fd_ring->u.pipe->dev_id)) {
|
||||
ring->funcs = &ring_funcs_obj_64;
|
||||
} else {
|
||||
ring->funcs = &ring_funcs_obj_32;
|
||||
}
|
||||
} else {
|
||||
if (fd_dev_64b(&fd_ring->u.submit->pipe->dev_id)) {
|
||||
ring->funcs = &ring_funcs_nonobj_64;
|
||||
} else {
|
||||
ring->funcs = &ring_funcs_nonobj_32;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO initializing these could probably be conditional on flags
|
||||
// since unneed for FD_RINGBUFFER_STAGING case..
|
||||
fd_ring->u.cmds = NULL;
|
||||
fd_ring->u.nr_cmds = fd_ring->u.max_cmds = 0;
|
||||
|
||||
fd_ring->u.reloc_bos = NULL;
|
||||
fd_ring->u.nr_reloc_bos = fd_ring->u.max_reloc_bos = 0;
|
||||
|
||||
return ring;
|
||||
}
|
||||
|
||||
struct fd_ringbuffer *
|
||||
fd_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size)
|
||||
{
|
||||
struct fd_device *dev = pipe->dev;
|
||||
struct fd_ringbuffer_sp *fd_ring = malloc(sizeof(*fd_ring));
|
||||
|
||||
/* Lock access to the fd_pipe->suballoc_* since ringbuffer object allocation
|
||||
* can happen both on the frontend (most CSOs) and the driver thread (a6xx
|
||||
* cached tex state, for example)
|
||||
*/
|
||||
simple_mtx_lock(&dev->suballoc_lock);
|
||||
|
||||
/* Maximum known alignment requirement is a6xx's TEX_CONST at 16 dwords */
|
||||
fd_ring->offset = align(dev->suballoc_offset, 64);
|
||||
if (!dev->suballoc_bo ||
|
||||
fd_ring->offset + size > fd_bo_size(dev->suballoc_bo)) {
|
||||
if (dev->suballoc_bo)
|
||||
fd_bo_del(dev->suballoc_bo);
|
||||
dev->suballoc_bo =
|
||||
fd_bo_new_ring(dev, MAX2(SUBALLOC_SIZE, align(size, 4096)));
|
||||
fd_ring->offset = 0;
|
||||
}
|
||||
|
||||
fd_ring->u.pipe = pipe;
|
||||
fd_ring->ring_bo = fd_bo_ref(dev->suballoc_bo);
|
||||
fd_ring->base.refcnt = 1;
|
||||
|
||||
dev->suballoc_offset = fd_ring->offset + size;
|
||||
|
||||
simple_mtx_unlock(&dev->suballoc_lock);
|
||||
|
||||
return fd_ringbuffer_sp_init(fd_ring, size, _FD_RINGBUFFER_OBJECT);
|
||||
}
|
|
@ -0,0 +1,125 @@
|
|||
/*
|
||||
* Copyright (C) 2018 Rob Clark <robclark@freedesktop.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Rob Clark <robclark@freedesktop.org>
|
||||
*/
|
||||
|
||||
#ifndef FREEDRENO_RINGBUFFER_SP_H_
|
||||
#define FREEDRENO_RINGBUFFER_SP_H_
|
||||
|
||||
#include <assert.h>
|
||||
#include <inttypes.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#include "util/hash_table.h"
|
||||
#include "util/os_file.h"
|
||||
#include "util/slab.h"
|
||||
|
||||
#include "freedreno_priv.h"
|
||||
#include "freedreno_ringbuffer.h"
|
||||
|
||||
/* A "softpin" implementation of submit/ringbuffer, which lowers CPU overhead
|
||||
* by avoiding the additional tracking necessary to build cmds/relocs tables
|
||||
* (but still builds a bos table)
|
||||
*/
|
||||
|
||||
typedef int (*flush_submit_list_fn)(struct list_head *submit_list);
|
||||
|
||||
struct fd_submit_sp {
|
||||
struct fd_submit base;
|
||||
|
||||
DECLARE_ARRAY(struct fd_bo *, bos);
|
||||
|
||||
/* maps fd_bo to idx in bos table: */
|
||||
struct hash_table *bo_table;
|
||||
|
||||
struct slab_child_pool ring_pool;
|
||||
|
||||
/* Allow for sub-allocation of stateobj ring buffers (ie. sharing
|
||||
* the same underlying bo)..
|
||||
*
|
||||
* We also rely on previous stateobj having been fully constructed
|
||||
* so we can reclaim extra space at it's end.
|
||||
*/
|
||||
struct fd_ringbuffer *suballoc_ring;
|
||||
|
||||
/* Flush args, potentially attached to the last submit in the list
|
||||
* of submits to merge:
|
||||
*/
|
||||
int in_fence_fd;
|
||||
struct fd_submit_fence *out_fence;
|
||||
|
||||
/* State for enqueued submits:
|
||||
*/
|
||||
struct list_head submit_list; /* includes this submit as last element */
|
||||
|
||||
/* Used in case out_fence==NULL: */
|
||||
struct util_queue_fence fence;
|
||||
|
||||
flush_submit_list_fn flush_submit_list;
|
||||
};
|
||||
FD_DEFINE_CAST(fd_submit, fd_submit_sp);
|
||||
|
||||
/* for FD_RINGBUFFER_GROWABLE rb's, tracks the 'finalized' cmdstream buffers
|
||||
* and sizes. Ie. a finalized buffer can have no more commands appended to
|
||||
* it.
|
||||
*/
|
||||
struct fd_cmd_sp {
|
||||
struct fd_bo *ring_bo;
|
||||
unsigned size;
|
||||
};
|
||||
|
||||
struct fd_ringbuffer_sp {
|
||||
struct fd_ringbuffer base;
|
||||
|
||||
/* for FD_RINGBUFFER_STREAMING rb's which are sub-allocated */
|
||||
unsigned offset;
|
||||
|
||||
union {
|
||||
/* for _FD_RINGBUFFER_OBJECT case, the array of BOs referenced from
|
||||
* this one
|
||||
*/
|
||||
struct {
|
||||
struct fd_pipe *pipe;
|
||||
DECLARE_ARRAY(struct fd_bo *, reloc_bos);
|
||||
};
|
||||
/* for other cases: */
|
||||
struct {
|
||||
struct fd_submit *submit;
|
||||
DECLARE_ARRAY(struct fd_cmd_sp, cmds);
|
||||
};
|
||||
} u;
|
||||
|
||||
struct fd_bo *ring_bo;
|
||||
};
|
||||
FD_DEFINE_CAST(fd_ringbuffer, fd_ringbuffer_sp);
|
||||
|
||||
void fd_pipe_sp_flush(struct fd_pipe *pipe, uint32_t fence);
|
||||
uint32_t fd_submit_append_bo(struct fd_submit_sp *submit, struct fd_bo *bo);
|
||||
struct fd_submit *fd_submit_sp_new(struct fd_pipe *pipe,
|
||||
flush_submit_list_fn flush_submit_list);
|
||||
void fd_pipe_sp_ringpool_init(struct fd_pipe *pipe);
|
||||
void fd_pipe_sp_ringpool_fini(struct fd_pipe *pipe);
|
||||
struct fd_ringbuffer *fd_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size);
|
||||
|
||||
#endif /* FREEDRENO_RINGBUFFER_SP_H_ */
|
|
@ -40,28 +40,28 @@ static void X(emit_reloc_common)(struct fd_ringbuffer *ring,
|
|||
#endif
|
||||
}
|
||||
|
||||
static void X(msm_ringbuffer_sp_emit_reloc_nonobj)(struct fd_ringbuffer *ring,
|
||||
static void X(fd_ringbuffer_sp_emit_reloc_nonobj)(struct fd_ringbuffer *ring,
|
||||
const struct fd_reloc *reloc)
|
||||
{
|
||||
X(emit_reloc_common)(ring, reloc);
|
||||
|
||||
assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
|
||||
|
||||
struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
|
||||
struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
|
||||
|
||||
struct msm_submit_sp *msm_submit = to_msm_submit_sp(msm_ring->u.submit);
|
||||
struct fd_submit_sp *fd_submit = to_fd_submit_sp(fd_ring->u.submit);
|
||||
|
||||
msm_submit_append_bo(msm_submit, reloc->bo);
|
||||
fd_submit_append_bo(fd_submit, reloc->bo);
|
||||
}
|
||||
|
||||
static void X(msm_ringbuffer_sp_emit_reloc_obj)(struct fd_ringbuffer *ring,
|
||||
static void X(fd_ringbuffer_sp_emit_reloc_obj)(struct fd_ringbuffer *ring,
|
||||
const struct fd_reloc *reloc)
|
||||
{
|
||||
X(emit_reloc_common)(ring, reloc);
|
||||
|
||||
assert(ring->flags & _FD_RINGBUFFER_OBJECT);
|
||||
|
||||
struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
|
||||
struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
|
||||
|
||||
/* Avoid emitting duplicate BO references into the list. Ringbuffer
|
||||
* objects are long-lived, so this saves ongoing work at draw time in
|
||||
|
@ -69,60 +69,60 @@ static void X(msm_ringbuffer_sp_emit_reloc_obj)(struct fd_ringbuffer *ring,
|
|||
* relocs per ringbuffer object is fairly small, so the O(n^2) doesn't
|
||||
* hurt much.
|
||||
*/
|
||||
if (!msm_ringbuffer_references_bo(ring, reloc->bo)) {
|
||||
APPEND(&msm_ring->u, reloc_bos, fd_bo_ref(reloc->bo));
|
||||
if (!fd_ringbuffer_references_bo(ring, reloc->bo)) {
|
||||
APPEND(&fd_ring->u, reloc_bos, fd_bo_ref(reloc->bo));
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t X(msm_ringbuffer_sp_emit_reloc_ring)(
|
||||
static uint32_t X(fd_ringbuffer_sp_emit_reloc_ring)(
|
||||
struct fd_ringbuffer *ring, struct fd_ringbuffer *target, uint32_t cmd_idx)
|
||||
{
|
||||
struct msm_ringbuffer_sp *msm_target = to_msm_ringbuffer_sp(target);
|
||||
struct fd_ringbuffer_sp *fd_target = to_fd_ringbuffer_sp(target);
|
||||
struct fd_bo *bo;
|
||||
uint32_t size;
|
||||
|
||||
if ((target->flags & FD_RINGBUFFER_GROWABLE) &&
|
||||
(cmd_idx < msm_target->u.nr_cmds)) {
|
||||
bo = msm_target->u.cmds[cmd_idx].ring_bo;
|
||||
size = msm_target->u.cmds[cmd_idx].size;
|
||||
(cmd_idx < fd_target->u.nr_cmds)) {
|
||||
bo = fd_target->u.cmds[cmd_idx].ring_bo;
|
||||
size = fd_target->u.cmds[cmd_idx].size;
|
||||
} else {
|
||||
bo = msm_target->ring_bo;
|
||||
bo = fd_target->ring_bo;
|
||||
size = offset_bytes(target->cur, target->start);
|
||||
}
|
||||
|
||||
if (ring->flags & _FD_RINGBUFFER_OBJECT) {
|
||||
X(msm_ringbuffer_sp_emit_reloc_obj)(ring, &(struct fd_reloc){
|
||||
X(fd_ringbuffer_sp_emit_reloc_obj)(ring, &(struct fd_reloc){
|
||||
.bo = bo,
|
||||
.iova = bo->iova + msm_target->offset,
|
||||
.offset = msm_target->offset,
|
||||
.iova = bo->iova + fd_target->offset,
|
||||
.offset = fd_target->offset,
|
||||
});
|
||||
} else {
|
||||
X(msm_ringbuffer_sp_emit_reloc_nonobj)(ring, &(struct fd_reloc){
|
||||
X(fd_ringbuffer_sp_emit_reloc_nonobj)(ring, &(struct fd_reloc){
|
||||
.bo = bo,
|
||||
.iova = bo->iova + msm_target->offset,
|
||||
.offset = msm_target->offset,
|
||||
.iova = bo->iova + fd_target->offset,
|
||||
.offset = fd_target->offset,
|
||||
});
|
||||
}
|
||||
|
||||
if (!(target->flags & _FD_RINGBUFFER_OBJECT))
|
||||
return size;
|
||||
|
||||
struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
|
||||
struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
|
||||
|
||||
if (ring->flags & _FD_RINGBUFFER_OBJECT) {
|
||||
for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) {
|
||||
struct fd_bo *target_bo = msm_target->u.reloc_bos[i];
|
||||
if (!msm_ringbuffer_references_bo(ring, target_bo))
|
||||
APPEND(&msm_ring->u, reloc_bos, fd_bo_ref(target_bo));
|
||||
for (unsigned i = 0; i < fd_target->u.nr_reloc_bos; i++) {
|
||||
struct fd_bo *target_bo = fd_target->u.reloc_bos[i];
|
||||
if (!fd_ringbuffer_references_bo(ring, target_bo))
|
||||
APPEND(&fd_ring->u, reloc_bos, fd_bo_ref(target_bo));
|
||||
}
|
||||
} else {
|
||||
// TODO it would be nice to know whether we have already
|
||||
// seen this target before. But hopefully we hit the
|
||||
// append_bo() fast path enough for this to not matter:
|
||||
struct msm_submit_sp *msm_submit = to_msm_submit_sp(msm_ring->u.submit);
|
||||
struct fd_submit_sp *fd_submit = to_fd_submit_sp(fd_ring->u.submit);
|
||||
|
||||
for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) {
|
||||
msm_submit_append_bo(msm_submit, msm_target->u.reloc_bos[i]);
|
||||
for (unsigned i = 0; i < fd_target->u.nr_reloc_bos; i++) {
|
||||
fd_submit_append_bo(fd_submit, fd_target->u.reloc_bos[i]);
|
||||
}
|
||||
}
|
||||
|
|
@ -27,6 +27,9 @@ libfreedreno_drm_files = files(
|
|||
'freedreno_priv.h',
|
||||
'freedreno_ringbuffer.c',
|
||||
'freedreno_ringbuffer.h',
|
||||
'freedreno_ringbuffer_sp.c',
|
||||
'freedreno_ringbuffer_sp.h',
|
||||
'freedreno_ringbuffer_sp_reloc.h',
|
||||
)
|
||||
libfreedreno_drm_flags = []
|
||||
libfreedreno_drm_includes = [
|
||||
|
@ -46,7 +49,6 @@ libfreedreno_drm_msm_files = files(
|
|||
'msm/msm_priv.h',
|
||||
'msm/msm_ringbuffer.c',
|
||||
'msm/msm_ringbuffer_sp.c',
|
||||
'msm/msm_ringbuffer_sp.h',
|
||||
)
|
||||
libfreedreno_drm_files += libfreedreno_drm_msm_files
|
||||
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
|
||||
#include "util/slab.h"
|
||||
|
||||
#include "freedreno_ringbuffer_sp.h"
|
||||
#include "msm_priv.h"
|
||||
|
||||
static int
|
||||
|
@ -199,14 +200,14 @@ msm_pipe_destroy(struct fd_pipe *pipe)
|
|||
struct msm_pipe *msm_pipe = to_msm_pipe(pipe);
|
||||
|
||||
close_submitqueue(pipe, msm_pipe->queue_id);
|
||||
msm_pipe_sp_ringpool_fini(pipe);
|
||||
fd_pipe_sp_ringpool_fini(pipe);
|
||||
free(msm_pipe);
|
||||
}
|
||||
|
||||
static const struct fd_pipe_funcs sp_funcs = {
|
||||
.ringbuffer_new_object = msm_ringbuffer_sp_new_object,
|
||||
.ringbuffer_new_object = fd_ringbuffer_sp_new_object,
|
||||
.submit_new = msm_submit_sp_new,
|
||||
.flush = msm_pipe_sp_flush,
|
||||
.flush = fd_pipe_sp_flush,
|
||||
.get_param = msm_pipe_get_param,
|
||||
.set_param = msm_pipe_set_param,
|
||||
.wait = msm_pipe_wait,
|
||||
|
@ -281,7 +282,7 @@ msm_pipe_new(struct fd_device *dev, enum fd_pipe_id id, uint32_t prio)
|
|||
if (open_submitqueue(pipe, prio))
|
||||
goto fail;
|
||||
|
||||
msm_pipe_sp_ringpool_init(pipe);
|
||||
fd_pipe_sp_ringpool_init(pipe);
|
||||
|
||||
return pipe;
|
||||
fail:
|
||||
|
|
|
@ -68,15 +68,9 @@ struct fd_pipe *msm_pipe_new(struct fd_device *dev, enum fd_pipe_id id,
|
|||
|
||||
struct fd_ringbuffer *msm_ringbuffer_new_object(struct fd_pipe *pipe,
|
||||
uint32_t size);
|
||||
struct fd_ringbuffer *msm_ringbuffer_sp_new_object(struct fd_pipe *pipe,
|
||||
uint32_t size);
|
||||
|
||||
struct fd_submit *msm_submit_new(struct fd_pipe *pipe);
|
||||
struct fd_submit *msm_submit_sp_new(struct fd_pipe *pipe);
|
||||
void msm_pipe_sp_flush(struct fd_pipe *pipe, uint32_t fence);
|
||||
|
||||
void msm_pipe_sp_ringpool_init(struct fd_pipe *pipe);
|
||||
void msm_pipe_sp_ringpool_fini(struct fd_pipe *pipe);
|
||||
|
||||
struct msm_bo {
|
||||
struct fd_bo base;
|
||||
|
|
|
@ -28,249 +28,16 @@
|
|||
#include <inttypes.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#include "util/hash_table.h"
|
||||
#include "util/os_file.h"
|
||||
#include "util/slab.h"
|
||||
|
||||
#include "drm/freedreno_ringbuffer.h"
|
||||
#include "drm/freedreno_ringbuffer_sp.h"
|
||||
#include "msm_priv.h"
|
||||
|
||||
/* A "softpin" implementation of submit/ringbuffer, which lowers CPU overhead
|
||||
* by avoiding the additional tracking necessary to build cmds/relocs tables
|
||||
* (but still builds a bos table)
|
||||
*/
|
||||
|
||||
#define INIT_SIZE 0x1000
|
||||
|
||||
#define SUBALLOC_SIZE (32 * 1024)
|
||||
|
||||
/* In the pipe->flush() path, we don't have a util_queue_fence we can wait on,
|
||||
* instead use a condition-variable. Note that pipe->flush() is not expected
|
||||
* to be a common/hot path.
|
||||
*/
|
||||
static pthread_cond_t flush_cnd = PTHREAD_COND_INITIALIZER;
|
||||
static pthread_mutex_t flush_mtx = PTHREAD_MUTEX_INITIALIZER;
|
||||
|
||||
|
||||
struct msm_submit_sp {
|
||||
struct fd_submit base;
|
||||
|
||||
DECLARE_ARRAY(struct fd_bo *, bos);
|
||||
|
||||
/* maps fd_bo to idx in bos table: */
|
||||
struct hash_table *bo_table;
|
||||
|
||||
struct slab_child_pool ring_pool;
|
||||
|
||||
/* Allow for sub-allocation of stateobj ring buffers (ie. sharing
|
||||
* the same underlying bo)..
|
||||
*
|
||||
* We also rely on previous stateobj having been fully constructed
|
||||
* so we can reclaim extra space at it's end.
|
||||
*/
|
||||
struct fd_ringbuffer *suballoc_ring;
|
||||
|
||||
/* Flush args, potentially attached to the last submit in the list
|
||||
* of submits to merge:
|
||||
*/
|
||||
int in_fence_fd;
|
||||
struct fd_submit_fence *out_fence;
|
||||
|
||||
/* State for enqueued submits:
|
||||
*/
|
||||
struct list_head submit_list; /* includes this submit as last element */
|
||||
|
||||
/* Used in case out_fence==NULL: */
|
||||
struct util_queue_fence fence;
|
||||
};
|
||||
FD_DEFINE_CAST(fd_submit, msm_submit_sp);
|
||||
|
||||
/* for FD_RINGBUFFER_GROWABLE rb's, tracks the 'finalized' cmdstream buffers
|
||||
* and sizes. Ie. a finalized buffer can have no more commands appended to
|
||||
* it.
|
||||
*/
|
||||
struct msm_cmd_sp {
|
||||
struct fd_bo *ring_bo;
|
||||
unsigned size;
|
||||
};
|
||||
|
||||
struct msm_ringbuffer_sp {
|
||||
struct fd_ringbuffer base;
|
||||
|
||||
/* for FD_RINGBUFFER_STREAMING rb's which are sub-allocated */
|
||||
unsigned offset;
|
||||
|
||||
union {
|
||||
/* for _FD_RINGBUFFER_OBJECT case, the array of BOs referenced from
|
||||
* this one
|
||||
*/
|
||||
struct {
|
||||
struct fd_pipe *pipe;
|
||||
DECLARE_ARRAY(struct fd_bo *, reloc_bos);
|
||||
};
|
||||
/* for other cases: */
|
||||
struct {
|
||||
struct fd_submit *submit;
|
||||
DECLARE_ARRAY(struct msm_cmd_sp, cmds);
|
||||
};
|
||||
} u;
|
||||
|
||||
struct fd_bo *ring_bo;
|
||||
};
|
||||
FD_DEFINE_CAST(fd_ringbuffer, msm_ringbuffer_sp);
|
||||
|
||||
static void finalize_current_cmd(struct fd_ringbuffer *ring);
|
||||
static struct fd_ringbuffer *
|
||||
msm_ringbuffer_sp_init(struct msm_ringbuffer_sp *msm_ring, uint32_t size,
|
||||
enum fd_ringbuffer_flags flags);
|
||||
|
||||
/* add (if needed) bo to submit and return index: */
|
||||
static uint32_t
|
||||
msm_submit_append_bo(struct msm_submit_sp *submit, struct fd_bo *bo)
|
||||
{
|
||||
uint32_t idx;
|
||||
|
||||
/* NOTE: it is legal to use the same bo on different threads for
|
||||
* different submits. But it is not legal to use the same submit
|
||||
* from different threads.
|
||||
*/
|
||||
idx = READ_ONCE(bo->idx);
|
||||
|
||||
if (unlikely((idx >= submit->nr_bos) || (submit->bos[idx] != bo))) {
|
||||
uint32_t hash = _mesa_hash_pointer(bo);
|
||||
struct hash_entry *entry;
|
||||
|
||||
entry = _mesa_hash_table_search_pre_hashed(submit->bo_table, hash, bo);
|
||||
if (entry) {
|
||||
/* found */
|
||||
idx = (uint32_t)(uintptr_t)entry->data;
|
||||
} else {
|
||||
idx = APPEND(submit, bos, fd_bo_ref(bo));
|
||||
|
||||
_mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo,
|
||||
(void *)(uintptr_t)idx);
|
||||
}
|
||||
bo->idx = idx;
|
||||
}
|
||||
|
||||
return idx;
|
||||
}
|
||||
|
||||
static void
|
||||
msm_submit_suballoc_ring_bo(struct fd_submit *submit,
|
||||
struct msm_ringbuffer_sp *msm_ring, uint32_t size)
|
||||
{
|
||||
struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
|
||||
unsigned suballoc_offset = 0;
|
||||
struct fd_bo *suballoc_bo = NULL;
|
||||
|
||||
if (msm_submit->suballoc_ring) {
|
||||
struct msm_ringbuffer_sp *suballoc_ring =
|
||||
to_msm_ringbuffer_sp(msm_submit->suballoc_ring);
|
||||
|
||||
suballoc_bo = suballoc_ring->ring_bo;
|
||||
suballoc_offset =
|
||||
fd_ringbuffer_size(msm_submit->suballoc_ring) + suballoc_ring->offset;
|
||||
|
||||
suballoc_offset = align(suballoc_offset, 0x10);
|
||||
|
||||
if ((size + suballoc_offset) > suballoc_bo->size) {
|
||||
suballoc_bo = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if (!suballoc_bo) {
|
||||
// TODO possibly larger size for streaming bo?
|
||||
msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, SUBALLOC_SIZE);
|
||||
msm_ring->offset = 0;
|
||||
} else {
|
||||
msm_ring->ring_bo = fd_bo_ref(suballoc_bo);
|
||||
msm_ring->offset = suballoc_offset;
|
||||
}
|
||||
|
||||
struct fd_ringbuffer *old_suballoc_ring = msm_submit->suballoc_ring;
|
||||
|
||||
msm_submit->suballoc_ring = fd_ringbuffer_ref(&msm_ring->base);
|
||||
|
||||
if (old_suballoc_ring)
|
||||
fd_ringbuffer_del(old_suballoc_ring);
|
||||
}
|
||||
|
||||
static struct fd_ringbuffer *
|
||||
msm_submit_sp_new_ringbuffer(struct fd_submit *submit, uint32_t size,
|
||||
enum fd_ringbuffer_flags flags)
|
||||
{
|
||||
struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
|
||||
struct msm_ringbuffer_sp *msm_ring;
|
||||
|
||||
msm_ring = slab_alloc(&msm_submit->ring_pool);
|
||||
|
||||
msm_ring->u.submit = submit;
|
||||
|
||||
/* NOTE: needs to be before _suballoc_ring_bo() since it could
|
||||
* increment the refcnt of the current ring
|
||||
*/
|
||||
msm_ring->base.refcnt = 1;
|
||||
|
||||
if (flags & FD_RINGBUFFER_STREAMING) {
|
||||
msm_submit_suballoc_ring_bo(submit, msm_ring, size);
|
||||
} else {
|
||||
if (flags & FD_RINGBUFFER_GROWABLE)
|
||||
size = INIT_SIZE;
|
||||
|
||||
msm_ring->offset = 0;
|
||||
msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size);
|
||||
}
|
||||
|
||||
if (!msm_ringbuffer_sp_init(msm_ring, size, flags))
|
||||
return NULL;
|
||||
|
||||
return &msm_ring->base;
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare submit for flush, always done synchronously.
|
||||
*
|
||||
* 1) Finalize primary ringbuffer, at this point no more cmdstream may
|
||||
* be written into it, since from the PoV of the upper level driver
|
||||
* the submit is flushed, even if deferred
|
||||
* 2) Add cmdstream bos to bos table
|
||||
* 3) Update bo fences
|
||||
*/
|
||||
static bool
|
||||
msm_submit_sp_flush_prep(struct fd_submit *submit, int in_fence_fd,
|
||||
struct fd_submit_fence *out_fence)
|
||||
{
|
||||
struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
|
||||
bool has_shared = false;
|
||||
|
||||
finalize_current_cmd(submit->primary);
|
||||
|
||||
struct msm_ringbuffer_sp *primary =
|
||||
to_msm_ringbuffer_sp(submit->primary);
|
||||
|
||||
for (unsigned i = 0; i < primary->u.nr_cmds; i++)
|
||||
msm_submit_append_bo(msm_submit, primary->u.cmds[i].ring_bo);
|
||||
|
||||
simple_mtx_lock(&table_lock);
|
||||
for (unsigned i = 0; i < msm_submit->nr_bos; i++) {
|
||||
fd_bo_add_fence(msm_submit->bos[i], submit->pipe, submit->fence);
|
||||
has_shared |= msm_submit->bos[i]->shared;
|
||||
}
|
||||
simple_mtx_unlock(&table_lock);
|
||||
|
||||
msm_submit->out_fence = out_fence;
|
||||
msm_submit->in_fence_fd = (in_fence_fd == -1) ?
|
||||
-1 : os_dupfd_cloexec(in_fence_fd);
|
||||
|
||||
return has_shared;
|
||||
}
|
||||
|
||||
static int
|
||||
flush_submit_list(struct list_head *submit_list)
|
||||
{
|
||||
struct msm_submit_sp *msm_submit = to_msm_submit_sp(last_submit(submit_list));
|
||||
struct msm_pipe *msm_pipe = to_msm_pipe(msm_submit->base.pipe);
|
||||
struct fd_submit_sp *fd_submit = to_fd_submit_sp(last_submit(submit_list));
|
||||
struct msm_pipe *msm_pipe = to_msm_pipe(fd_submit->base.pipe);
|
||||
struct drm_msm_gem_submit req = {
|
||||
.flags = msm_pipe->pipe,
|
||||
.queueid = msm_pipe->queue_id,
|
||||
|
@ -284,7 +51,7 @@ flush_submit_list(struct list_head *submit_list)
|
|||
*/
|
||||
foreach_submit (submit, submit_list) {
|
||||
assert(submit->pipe == &msm_pipe->base);
|
||||
nr_cmds += to_msm_ringbuffer_sp(submit->primary)->u.nr_cmds;
|
||||
nr_cmds += to_fd_ringbuffer_sp(submit->primary)->u.nr_cmds;
|
||||
}
|
||||
|
||||
struct drm_msm_gem_submit_cmd cmds[nr_cmds];
|
||||
|
@ -295,13 +62,13 @@ flush_submit_list(struct list_head *submit_list)
|
|||
* list, merge their bo tables into the last submit.
|
||||
*/
|
||||
foreach_submit_safe (submit, submit_list) {
|
||||
struct msm_ringbuffer_sp *deferred_primary =
|
||||
to_msm_ringbuffer_sp(submit->primary);
|
||||
struct fd_ringbuffer_sp *deferred_primary =
|
||||
to_fd_ringbuffer_sp(submit->primary);
|
||||
|
||||
for (unsigned i = 0; i < deferred_primary->u.nr_cmds; i++) {
|
||||
cmds[cmd_idx].type = MSM_SUBMIT_CMD_BUF;
|
||||
cmds[cmd_idx].submit_idx =
|
||||
msm_submit_append_bo(msm_submit, deferred_primary->u.cmds[i].ring_bo);
|
||||
fd_submit_append_bo(fd_submit, deferred_primary->u.cmds[i].ring_bo);
|
||||
cmds[cmd_idx].submit_offset = deferred_primary->offset;
|
||||
cmds[cmd_idx].size = deferred_primary->u.cmds[i].size;
|
||||
cmds[cmd_idx].pad = 0;
|
||||
|
@ -318,13 +85,13 @@ flush_submit_list(struct list_head *submit_list)
|
|||
break;
|
||||
}
|
||||
|
||||
struct msm_submit_sp *msm_deferred_submit = to_msm_submit_sp(submit);
|
||||
for (unsigned i = 0; i < msm_deferred_submit->nr_bos; i++) {
|
||||
struct fd_submit_sp *fd_deferred_submit = to_fd_submit_sp(submit);
|
||||
for (unsigned i = 0; i < fd_deferred_submit->nr_bos; i++) {
|
||||
/* Note: if bo is used in both the current submit and the deferred
|
||||
* submit being merged, we expect to hit the fast-path as we add it
|
||||
* to the current submit:
|
||||
*/
|
||||
msm_submit_append_bo(msm_submit, msm_deferred_submit->bos[i]);
|
||||
fd_submit_append_bo(fd_submit, fd_deferred_submit->bos[i]);
|
||||
}
|
||||
|
||||
/* Now that the cmds/bos have been transfered over to the current submit,
|
||||
|
@ -334,9 +101,9 @@ flush_submit_list(struct list_head *submit_list)
|
|||
fd_submit_del(submit);
|
||||
}
|
||||
|
||||
if (msm_submit->in_fence_fd != -1) {
|
||||
if (fd_submit->in_fence_fd != -1) {
|
||||
req.flags |= MSM_SUBMIT_FENCE_FD_IN;
|
||||
req.fence_fd = msm_submit->in_fence_fd;
|
||||
req.fence_fd = fd_submit->in_fence_fd;
|
||||
msm_pipe->no_implicit_sync = true;
|
||||
}
|
||||
|
||||
|
@ -344,7 +111,7 @@ flush_submit_list(struct list_head *submit_list)
|
|||
req.flags |= MSM_SUBMIT_NO_IMPLICIT;
|
||||
}
|
||||
|
||||
if (msm_submit->out_fence && msm_submit->out_fence->use_fence_fd) {
|
||||
if (fd_submit->out_fence && fd_submit->out_fence->use_fence_fd) {
|
||||
req.flags |= MSM_SUBMIT_FENCE_FD_OUT;
|
||||
}
|
||||
|
||||
|
@ -354,24 +121,24 @@ flush_submit_list(struct list_head *submit_list)
|
|||
* bound to limit on-stack allocation to 4k:
|
||||
*/
|
||||
const unsigned bo_limit = sizeof(struct drm_msm_gem_submit_bo) / 4096;
|
||||
bool bos_on_stack = msm_submit->nr_bos < bo_limit;
|
||||
bool bos_on_stack = fd_submit->nr_bos < bo_limit;
|
||||
struct drm_msm_gem_submit_bo
|
||||
_submit_bos[bos_on_stack ? msm_submit->nr_bos : 0];
|
||||
_submit_bos[bos_on_stack ? fd_submit->nr_bos : 0];
|
||||
struct drm_msm_gem_submit_bo *submit_bos;
|
||||
if (bos_on_stack) {
|
||||
submit_bos = _submit_bos;
|
||||
} else {
|
||||
submit_bos = malloc(msm_submit->nr_bos * sizeof(submit_bos[0]));
|
||||
submit_bos = malloc(fd_submit->nr_bos * sizeof(submit_bos[0]));
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < msm_submit->nr_bos; i++) {
|
||||
submit_bos[i].flags = msm_submit->bos[i]->reloc_flags;
|
||||
submit_bos[i].handle = msm_submit->bos[i]->handle;
|
||||
for (unsigned i = 0; i < fd_submit->nr_bos; i++) {
|
||||
submit_bos[i].flags = fd_submit->bos[i]->reloc_flags;
|
||||
submit_bos[i].handle = fd_submit->bos[i]->handle;
|
||||
submit_bos[i].presumed = 0;
|
||||
}
|
||||
|
||||
req.bos = VOID2U64(submit_bos);
|
||||
req.nr_bos = msm_submit->nr_bos;
|
||||
req.nr_bos = fd_submit->nr_bos;
|
||||
req.cmds = VOID2U64(cmds);
|
||||
req.nr_cmds = nr_cmds;
|
||||
|
||||
|
@ -382,472 +149,28 @@ flush_submit_list(struct list_head *submit_list)
|
|||
if (ret) {
|
||||
ERROR_MSG("submit failed: %d (%s)", ret, strerror(errno));
|
||||
msm_dump_submit(&req);
|
||||
} else if (!ret && msm_submit->out_fence) {
|
||||
msm_submit->out_fence->fence.kfence = req.fence;
|
||||
msm_submit->out_fence->fence.ufence = msm_submit->base.fence;
|
||||
msm_submit->out_fence->fence_fd = req.fence_fd;
|
||||
} else if (!ret && fd_submit->out_fence) {
|
||||
fd_submit->out_fence->fence.kfence = req.fence;
|
||||
fd_submit->out_fence->fence.ufence = fd_submit->base.fence;
|
||||
fd_submit->out_fence->fence_fd = req.fence_fd;
|
||||
}
|
||||
|
||||
if (!bos_on_stack)
|
||||
free(submit_bos);
|
||||
|
||||
pthread_mutex_lock(&flush_mtx);
|
||||
assert(fd_fence_before(msm_pipe->base.last_submit_fence, msm_submit->base.fence));
|
||||
msm_pipe->base.last_submit_fence = msm_submit->base.fence;
|
||||
pthread_cond_broadcast(&flush_cnd);
|
||||
pthread_mutex_unlock(&flush_mtx);
|
||||
|
||||
if (msm_submit->in_fence_fd != -1)
|
||||
close(msm_submit->in_fence_fd);
|
||||
if (fd_submit->in_fence_fd != -1)
|
||||
close(fd_submit->in_fence_fd);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
msm_submit_sp_flush_execute(void *job, void *gdata, int thread_index)
|
||||
{
|
||||
struct fd_submit *submit = job;
|
||||
struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
|
||||
|
||||
flush_submit_list(&msm_submit->submit_list);
|
||||
|
||||
DEBUG_MSG("finish: %u", submit->fence);
|
||||
}
|
||||
|
||||
static void
|
||||
msm_submit_sp_flush_cleanup(void *job, void *gdata, int thread_index)
|
||||
{
|
||||
struct fd_submit *submit = job;
|
||||
fd_submit_del(submit);
|
||||
}
|
||||
|
||||
static int
|
||||
enqueue_submit_list(struct list_head *submit_list)
|
||||
{
|
||||
struct fd_submit *submit = last_submit(submit_list);
|
||||
struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
|
||||
|
||||
list_replace(submit_list, &msm_submit->submit_list);
|
||||
list_inithead(submit_list);
|
||||
|
||||
struct util_queue_fence *fence;
|
||||
if (msm_submit->out_fence) {
|
||||
fence = &msm_submit->out_fence->ready;
|
||||
} else {
|
||||
util_queue_fence_init(&msm_submit->fence);
|
||||
fence = &msm_submit->fence;
|
||||
}
|
||||
|
||||
DEBUG_MSG("enqueue: %u", submit->fence);
|
||||
|
||||
util_queue_add_job(&submit->pipe->dev->submit_queue,
|
||||
submit, fence,
|
||||
msm_submit_sp_flush_execute,
|
||||
msm_submit_sp_flush_cleanup,
|
||||
0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool
|
||||
should_defer(struct fd_submit *submit)
|
||||
{
|
||||
struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
|
||||
|
||||
/* if too many bo's, it may not be worth the CPU cost of submit merging: */
|
||||
if (msm_submit->nr_bos > 30)
|
||||
return false;
|
||||
|
||||
/* On the kernel side, with 32K ringbuffer, we have an upper limit of 2k
|
||||
* cmds before we exceed the size of the ringbuffer, which results in
|
||||
* deadlock writing into the RB (ie. kernel doesn't finish writing into
|
||||
* the RB so it doesn't kick the GPU to start consuming from the RB)
|
||||
*/
|
||||
if (submit->pipe->dev->deferred_cmds > 128)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int
|
||||
msm_submit_sp_flush(struct fd_submit *submit, int in_fence_fd,
|
||||
struct fd_submit_fence *out_fence)
|
||||
{
|
||||
struct fd_device *dev = submit->pipe->dev;
|
||||
struct fd_pipe *pipe = submit->pipe;
|
||||
|
||||
/* Acquire lock before flush_prep() because it is possible to race between
|
||||
* this and pipe->flush():
|
||||
*/
|
||||
simple_mtx_lock(&dev->submit_lock);
|
||||
|
||||
/* If there are deferred submits from another fd_pipe, flush them now,
|
||||
* since we can't merge submits from different submitqueue's (ie. they
|
||||
* could have different priority, etc)
|
||||
*/
|
||||
if (!list_is_empty(&dev->deferred_submits) &&
|
||||
(last_submit(&dev->deferred_submits)->pipe != submit->pipe)) {
|
||||
struct list_head submit_list;
|
||||
|
||||
list_replace(&dev->deferred_submits, &submit_list);
|
||||
list_inithead(&dev->deferred_submits);
|
||||
dev->deferred_cmds = 0;
|
||||
|
||||
enqueue_submit_list(&submit_list);
|
||||
}
|
||||
|
||||
list_addtail(&fd_submit_ref(submit)->node, &dev->deferred_submits);
|
||||
|
||||
bool has_shared = msm_submit_sp_flush_prep(submit, in_fence_fd, out_fence);
|
||||
|
||||
assert(fd_fence_before(pipe->last_enqueue_fence, submit->fence));
|
||||
pipe->last_enqueue_fence = submit->fence;
|
||||
|
||||
/* If we don't need an out-fence, we can defer the submit.
|
||||
*
|
||||
* TODO we could defer submits with in-fence as well.. if we took our own
|
||||
* reference to the fd, and merged all the in-fence-fd's when we flush the
|
||||
* deferred submits
|
||||
*/
|
||||
if ((in_fence_fd == -1) && !out_fence && !has_shared && should_defer(submit)) {
|
||||
DEBUG_MSG("defer: %u", submit->fence);
|
||||
dev->deferred_cmds += fd_ringbuffer_cmd_count(submit->primary);
|
||||
assert(dev->deferred_cmds == fd_dev_count_deferred_cmds(dev));
|
||||
simple_mtx_unlock(&dev->submit_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct list_head submit_list;
|
||||
|
||||
list_replace(&dev->deferred_submits, &submit_list);
|
||||
list_inithead(&dev->deferred_submits);
|
||||
dev->deferred_cmds = 0;
|
||||
|
||||
simple_mtx_unlock(&dev->submit_lock);
|
||||
|
||||
return enqueue_submit_list(&submit_list);
|
||||
}
|
||||
|
||||
void
|
||||
msm_pipe_sp_flush(struct fd_pipe *pipe, uint32_t fence)
|
||||
{
|
||||
struct fd_device *dev = pipe->dev;
|
||||
struct list_head submit_list;
|
||||
|
||||
DEBUG_MSG("flush: %u", fence);
|
||||
|
||||
list_inithead(&submit_list);
|
||||
|
||||
simple_mtx_lock(&dev->submit_lock);
|
||||
|
||||
assert(!fd_fence_after(fence, pipe->last_enqueue_fence));
|
||||
|
||||
foreach_submit_safe (deferred_submit, &dev->deferred_submits) {
|
||||
/* We should never have submits from multiple pipes in the deferred
|
||||
* list. If we did, we couldn't compare their fence to our fence,
|
||||
* since each fd_pipe is an independent timeline.
|
||||
*/
|
||||
if (deferred_submit->pipe != pipe)
|
||||
break;
|
||||
|
||||
if (fd_fence_after(deferred_submit->fence, fence))
|
||||
break;
|
||||
|
||||
list_del(&deferred_submit->node);
|
||||
list_addtail(&deferred_submit->node, &submit_list);
|
||||
dev->deferred_cmds -= fd_ringbuffer_cmd_count(deferred_submit->primary);
|
||||
}
|
||||
|
||||
assert(dev->deferred_cmds == fd_dev_count_deferred_cmds(dev));
|
||||
|
||||
simple_mtx_unlock(&dev->submit_lock);
|
||||
|
||||
if (list_is_empty(&submit_list))
|
||||
goto flush_sync;
|
||||
|
||||
enqueue_submit_list(&submit_list);
|
||||
|
||||
flush_sync:
|
||||
/* Once we are sure that we've enqueued at least up to the requested
|
||||
* submit, we need to be sure that submitq has caught up and flushed
|
||||
* them to the kernel
|
||||
*/
|
||||
pthread_mutex_lock(&flush_mtx);
|
||||
while (fd_fence_before(pipe->last_submit_fence, fence)) {
|
||||
pthread_cond_wait(&flush_cnd, &flush_mtx);
|
||||
}
|
||||
pthread_mutex_unlock(&flush_mtx);
|
||||
}
|
||||
|
||||
static void
|
||||
msm_submit_sp_destroy(struct fd_submit *submit)
|
||||
{
|
||||
struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
|
||||
|
||||
if (msm_submit->suballoc_ring)
|
||||
fd_ringbuffer_del(msm_submit->suballoc_ring);
|
||||
|
||||
_mesa_hash_table_destroy(msm_submit->bo_table, NULL);
|
||||
|
||||
// TODO it would be nice to have a way to debug_assert() if all
|
||||
// rb's haven't been free'd back to the slab, because that is
|
||||
// an indication that we are leaking bo's
|
||||
slab_destroy_child(&msm_submit->ring_pool);
|
||||
|
||||
for (unsigned i = 0; i < msm_submit->nr_bos; i++)
|
||||
fd_bo_del(msm_submit->bos[i]);
|
||||
|
||||
free(msm_submit->bos);
|
||||
free(msm_submit);
|
||||
}
|
||||
|
||||
static const struct fd_submit_funcs submit_funcs = {
|
||||
.new_ringbuffer = msm_submit_sp_new_ringbuffer,
|
||||
.flush = msm_submit_sp_flush,
|
||||
.destroy = msm_submit_sp_destroy,
|
||||
};
|
||||
|
||||
struct fd_submit *
|
||||
msm_submit_sp_new(struct fd_pipe *pipe)
|
||||
{
|
||||
struct msm_submit_sp *msm_submit = calloc(1, sizeof(*msm_submit));
|
||||
struct fd_submit *submit;
|
||||
|
||||
msm_submit->bo_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
|
||||
_mesa_key_pointer_equal);
|
||||
|
||||
slab_create_child(&msm_submit->ring_pool, &pipe->ring_pool);
|
||||
|
||||
submit = &msm_submit->base;
|
||||
submit->funcs = &submit_funcs;
|
||||
|
||||
return submit;
|
||||
}
|
||||
|
||||
void
|
||||
msm_pipe_sp_ringpool_init(struct fd_pipe *pipe)
|
||||
{
|
||||
// TODO tune size:
|
||||
slab_create_parent(&pipe->ring_pool, sizeof(struct msm_ringbuffer_sp), 16);
|
||||
}
|
||||
|
||||
void
|
||||
msm_pipe_sp_ringpool_fini(struct fd_pipe *pipe)
|
||||
{
|
||||
if (pipe->ring_pool.num_elements)
|
||||
slab_destroy_parent(&pipe->ring_pool);
|
||||
}
|
||||
|
||||
static void
|
||||
finalize_current_cmd(struct fd_ringbuffer *ring)
|
||||
{
|
||||
debug_assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
|
||||
|
||||
struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
|
||||
APPEND(&msm_ring->u, cmds,
|
||||
(struct msm_cmd_sp){
|
||||
.ring_bo = fd_bo_ref(msm_ring->ring_bo),
|
||||
.size = offset_bytes(ring->cur, ring->start),
|
||||
});
|
||||
}
|
||||
|
||||
static void
|
||||
msm_ringbuffer_sp_grow(struct fd_ringbuffer *ring, uint32_t size)
|
||||
{
|
||||
struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
|
||||
struct fd_pipe *pipe = msm_ring->u.submit->pipe;
|
||||
|
||||
debug_assert(ring->flags & FD_RINGBUFFER_GROWABLE);
|
||||
|
||||
finalize_current_cmd(ring);
|
||||
|
||||
fd_bo_del(msm_ring->ring_bo);
|
||||
msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size);
|
||||
|
||||
ring->start = fd_bo_map(msm_ring->ring_bo);
|
||||
ring->end = &(ring->start[size / 4]);
|
||||
ring->cur = ring->start;
|
||||
ring->size = size;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
msm_ringbuffer_references_bo(struct fd_ringbuffer *ring, struct fd_bo *bo)
|
||||
{
|
||||
struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
|
||||
|
||||
for (int i = 0; i < msm_ring->u.nr_reloc_bos; i++) {
|
||||
if (msm_ring->u.reloc_bos[i] == bo)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
#define PTRSZ 64
|
||||
#include "msm_ringbuffer_sp.h"
|
||||
#undef PTRSZ
|
||||
#define PTRSZ 32
|
||||
#include "msm_ringbuffer_sp.h"
|
||||
#undef PTRSZ
|
||||
|
||||
static uint32_t
|
||||
msm_ringbuffer_sp_cmd_count(struct fd_ringbuffer *ring)
|
||||
{
|
||||
if (ring->flags & FD_RINGBUFFER_GROWABLE)
|
||||
return to_msm_ringbuffer_sp(ring)->u.nr_cmds + 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static bool
|
||||
msm_ringbuffer_sp_check_size(struct fd_ringbuffer *ring)
|
||||
{
|
||||
assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
|
||||
struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
|
||||
struct fd_submit *submit = msm_ring->u.submit;
|
||||
|
||||
if (to_msm_submit_sp(submit)->nr_bos > MAX_ARRAY_SIZE/2) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
msm_ringbuffer_sp_destroy(struct fd_ringbuffer *ring)
|
||||
{
|
||||
struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
|
||||
|
||||
fd_bo_del(msm_ring->ring_bo);
|
||||
|
||||
if (ring->flags & _FD_RINGBUFFER_OBJECT) {
|
||||
for (unsigned i = 0; i < msm_ring->u.nr_reloc_bos; i++) {
|
||||
fd_bo_del(msm_ring->u.reloc_bos[i]);
|
||||
}
|
||||
free(msm_ring->u.reloc_bos);
|
||||
|
||||
free(msm_ring);
|
||||
} else {
|
||||
struct fd_submit *submit = msm_ring->u.submit;
|
||||
|
||||
for (unsigned i = 0; i < msm_ring->u.nr_cmds; i++) {
|
||||
fd_bo_del(msm_ring->u.cmds[i].ring_bo);
|
||||
}
|
||||
free(msm_ring->u.cmds);
|
||||
|
||||
slab_free(&to_msm_submit_sp(submit)->ring_pool, msm_ring);
|
||||
}
|
||||
}
|
||||
|
||||
static const struct fd_ringbuffer_funcs ring_funcs_nonobj_32 = {
|
||||
.grow = msm_ringbuffer_sp_grow,
|
||||
.emit_reloc = msm_ringbuffer_sp_emit_reloc_nonobj_32,
|
||||
.emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring_32,
|
||||
.cmd_count = msm_ringbuffer_sp_cmd_count,
|
||||
.check_size = msm_ringbuffer_sp_check_size,
|
||||
.destroy = msm_ringbuffer_sp_destroy,
|
||||
};
|
||||
|
||||
static const struct fd_ringbuffer_funcs ring_funcs_obj_32 = {
|
||||
.grow = msm_ringbuffer_sp_grow,
|
||||
.emit_reloc = msm_ringbuffer_sp_emit_reloc_obj_32,
|
||||
.emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring_32,
|
||||
.cmd_count = msm_ringbuffer_sp_cmd_count,
|
||||
.destroy = msm_ringbuffer_sp_destroy,
|
||||
};
|
||||
|
||||
static const struct fd_ringbuffer_funcs ring_funcs_nonobj_64 = {
|
||||
.grow = msm_ringbuffer_sp_grow,
|
||||
.emit_reloc = msm_ringbuffer_sp_emit_reloc_nonobj_64,
|
||||
.emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring_64,
|
||||
.cmd_count = msm_ringbuffer_sp_cmd_count,
|
||||
.check_size = msm_ringbuffer_sp_check_size,
|
||||
.destroy = msm_ringbuffer_sp_destroy,
|
||||
};
|
||||
|
||||
static const struct fd_ringbuffer_funcs ring_funcs_obj_64 = {
|
||||
.grow = msm_ringbuffer_sp_grow,
|
||||
.emit_reloc = msm_ringbuffer_sp_emit_reloc_obj_64,
|
||||
.emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring_64,
|
||||
.cmd_count = msm_ringbuffer_sp_cmd_count,
|
||||
.destroy = msm_ringbuffer_sp_destroy,
|
||||
};
|
||||
|
||||
static inline struct fd_ringbuffer *
|
||||
msm_ringbuffer_sp_init(struct msm_ringbuffer_sp *msm_ring, uint32_t size,
|
||||
enum fd_ringbuffer_flags flags)
|
||||
{
|
||||
struct fd_ringbuffer *ring = &msm_ring->base;
|
||||
|
||||
/* We don't do any translation from internal FD_RELOC flags to MSM flags. */
|
||||
STATIC_ASSERT(FD_RELOC_READ == MSM_SUBMIT_BO_READ);
|
||||
STATIC_ASSERT(FD_RELOC_WRITE == MSM_SUBMIT_BO_WRITE);
|
||||
STATIC_ASSERT(FD_RELOC_DUMP == MSM_SUBMIT_BO_DUMP);
|
||||
|
||||
debug_assert(msm_ring->ring_bo);
|
||||
|
||||
uint8_t *base = fd_bo_map(msm_ring->ring_bo);
|
||||
ring->start = (void *)(base + msm_ring->offset);
|
||||
ring->end = &(ring->start[size / 4]);
|
||||
ring->cur = ring->start;
|
||||
|
||||
ring->size = size;
|
||||
ring->flags = flags;
|
||||
|
||||
if (flags & _FD_RINGBUFFER_OBJECT) {
|
||||
if (fd_dev_64b(&msm_ring->u.pipe->dev_id)) {
|
||||
ring->funcs = &ring_funcs_obj_64;
|
||||
} else {
|
||||
ring->funcs = &ring_funcs_obj_32;
|
||||
}
|
||||
} else {
|
||||
if (fd_dev_64b(&msm_ring->u.submit->pipe->dev_id)) {
|
||||
ring->funcs = &ring_funcs_nonobj_64;
|
||||
} else {
|
||||
ring->funcs = &ring_funcs_nonobj_32;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO initializing these could probably be conditional on flags
|
||||
// since unneed for FD_RINGBUFFER_STAGING case..
|
||||
msm_ring->u.cmds = NULL;
|
||||
msm_ring->u.nr_cmds = msm_ring->u.max_cmds = 0;
|
||||
|
||||
msm_ring->u.reloc_bos = NULL;
|
||||
msm_ring->u.nr_reloc_bos = msm_ring->u.max_reloc_bos = 0;
|
||||
|
||||
return ring;
|
||||
}
|
||||
|
||||
struct fd_ringbuffer *
|
||||
msm_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size)
|
||||
{
|
||||
struct fd_device *dev = pipe->dev;
|
||||
struct msm_ringbuffer_sp *msm_ring = malloc(sizeof(*msm_ring));
|
||||
|
||||
/* Lock access to the msm_pipe->suballoc_* since ringbuffer object allocation
|
||||
* can happen both on the frontend (most CSOs) and the driver thread (a6xx
|
||||
* cached tex state, for example)
|
||||
*/
|
||||
simple_mtx_lock(&dev->suballoc_lock);
|
||||
|
||||
/* Maximum known alignment requirement is a6xx's TEX_CONST at 16 dwords */
|
||||
msm_ring->offset = align(dev->suballoc_offset, 64);
|
||||
if (!dev->suballoc_bo ||
|
||||
msm_ring->offset + size > fd_bo_size(dev->suballoc_bo)) {
|
||||
if (dev->suballoc_bo)
|
||||
fd_bo_del(dev->suballoc_bo);
|
||||
dev->suballoc_bo =
|
||||
fd_bo_new_ring(dev, MAX2(SUBALLOC_SIZE, align(size, 4096)));
|
||||
msm_ring->offset = 0;
|
||||
}
|
||||
|
||||
msm_ring->u.pipe = pipe;
|
||||
msm_ring->ring_bo = fd_bo_ref(dev->suballoc_bo);
|
||||
msm_ring->base.refcnt = 1;
|
||||
|
||||
dev->suballoc_offset = msm_ring->offset + size;
|
||||
|
||||
simple_mtx_unlock(&dev->suballoc_lock);
|
||||
|
||||
return msm_ringbuffer_sp_init(msm_ring, size, _FD_RINGBUFFER_OBJECT);
|
||||
return fd_submit_sp_new(pipe, flush_submit_list);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue