diff --git a/src/freedreno/drm/msm_ringbuffer_sp.c b/src/freedreno/drm/msm_ringbuffer_sp.c index 6a330981caa..7c3d2ff1a43 100644 --- a/src/freedreno/drm/msm_ringbuffer_sp.c +++ b/src/freedreno/drm/msm_ringbuffer_sp.c @@ -389,123 +389,12 @@ msm_ringbuffer_sp_grow(struct fd_ringbuffer *ring, uint32_t size) ring->size = size; } -static void -emit_reloc_tail(struct fd_ringbuffer *ring, const struct fd_reloc *reloc, - struct fd_pipe *pipe) -{ - uint64_t iova = reloc->bo->iova + reloc->offset; - int shift = reloc->shift; - - if (shift < 0) - iova >>= -shift; - else - iova <<= shift; - - uint32_t dword = iova; - - (*ring->cur++) = dword | reloc->or; - - if (pipe->gpu_id >= 500) { - dword = iova >> 32; - (*ring->cur++) = dword | reloc->orhi; - } -} - -static void -msm_ringbuffer_sp_emit_reloc_nonobj(struct fd_ringbuffer *ring, - const struct fd_reloc *reloc) -{ - struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring); - - assert(!(ring->flags & _FD_RINGBUFFER_OBJECT)); - - struct msm_submit_sp *msm_submit = - to_msm_submit_sp(msm_ring->u.submit); - - msm_submit_append_bo(msm_submit, reloc->bo); - - emit_reloc_tail(ring, reloc, msm_ring->u.submit->pipe); -} - -static void -msm_ringbuffer_sp_emit_reloc_obj(struct fd_ringbuffer *ring, - const struct fd_reloc *reloc) -{ - struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring); - - assert(ring->flags & _FD_RINGBUFFER_OBJECT); - - /* Avoid emitting duplicate BO references into the list. Ringbuffer - * objects are long-lived, so this saves ongoing work at draw time in - * exchange for a bit at context setup/first draw. And the number of - * relocs per ringbuffer object is fairly small, so the O(n^2) doesn't - * hurt much. - */ - bool found = false; - for (int i = 0; i < msm_ring->u.nr_reloc_bos; i++) { - if (msm_ring->u.reloc_bos[i] == reloc->bo) { - found = true; - break; - } - } - if (!found) { - APPEND(&msm_ring->u, reloc_bos, fd_bo_ref(reloc->bo)); - } - - emit_reloc_tail(ring, reloc, msm_ring->u.pipe); -} - -static uint32_t -msm_ringbuffer_sp_emit_reloc_ring(struct fd_ringbuffer *ring, - struct fd_ringbuffer *target, uint32_t cmd_idx) -{ - struct msm_ringbuffer_sp *msm_target = to_msm_ringbuffer_sp(target); - struct fd_bo *bo; - uint32_t size; - - if ((target->flags & FD_RINGBUFFER_GROWABLE) && - (cmd_idx < msm_target->u.nr_cmds)) { - bo = msm_target->u.cmds[cmd_idx].ring_bo; - size = msm_target->u.cmds[cmd_idx].size; - } else { - bo = msm_target->ring_bo; - size = offset_bytes(target->cur, target->start); - } - - if (ring->flags & _FD_RINGBUFFER_OBJECT) { - msm_ringbuffer_sp_emit_reloc_obj(ring, &(struct fd_reloc){ - .bo = bo, - .offset = msm_target->offset, - }); - } else { - msm_ringbuffer_sp_emit_reloc_nonobj(ring, &(struct fd_reloc){ - .bo = bo, - .offset = msm_target->offset, - }); - } - - if (!(target->flags & _FD_RINGBUFFER_OBJECT)) - return size; - - struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring); - - if (ring->flags & _FD_RINGBUFFER_OBJECT) { - for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) { - APPEND(&msm_ring->u, reloc_bos, fd_bo_ref(msm_target->u.reloc_bos[i])); - } - } else { - // TODO it would be nice to know whether we have already - // seen this target before. But hopefully we hit the - // append_bo() fast path enough for this to not matter: - struct msm_submit_sp *msm_submit = to_msm_submit_sp(msm_ring->u.submit); - - for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) { - msm_submit_append_bo(msm_submit, msm_target->u.reloc_bos[i]); - } - } - - return size; -} +#define PTRSZ 64 +#include "msm_ringbuffer_sp.h" +#undef PTRSZ +#define PTRSZ 32 +#include "msm_ringbuffer_sp.h" +#undef PTRSZ static uint32_t msm_ringbuffer_sp_cmd_count(struct fd_ringbuffer *ring) @@ -541,18 +430,34 @@ msm_ringbuffer_sp_destroy(struct fd_ringbuffer *ring) } } -static const struct fd_ringbuffer_funcs ring_funcs_nonobj = { +static const struct fd_ringbuffer_funcs ring_funcs_nonobj_32 = { .grow = msm_ringbuffer_sp_grow, - .emit_reloc = msm_ringbuffer_sp_emit_reloc_nonobj, - .emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring, + .emit_reloc = msm_ringbuffer_sp_emit_reloc_nonobj_32, + .emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring_32, .cmd_count = msm_ringbuffer_sp_cmd_count, .destroy = msm_ringbuffer_sp_destroy, }; -static const struct fd_ringbuffer_funcs ring_funcs_obj = { +static const struct fd_ringbuffer_funcs ring_funcs_obj_32 = { .grow = msm_ringbuffer_sp_grow, - .emit_reloc = msm_ringbuffer_sp_emit_reloc_obj, - .emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring, + .emit_reloc = msm_ringbuffer_sp_emit_reloc_obj_32, + .emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring_32, + .cmd_count = msm_ringbuffer_sp_cmd_count, + .destroy = msm_ringbuffer_sp_destroy, +}; + +static const struct fd_ringbuffer_funcs ring_funcs_nonobj_64 = { + .grow = msm_ringbuffer_sp_grow, + .emit_reloc = msm_ringbuffer_sp_emit_reloc_nonobj_64, + .emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring_64, + .cmd_count = msm_ringbuffer_sp_cmd_count, + .destroy = msm_ringbuffer_sp_destroy, +}; + +static const struct fd_ringbuffer_funcs ring_funcs_obj_64 = { + .grow = msm_ringbuffer_sp_grow, + .emit_reloc = msm_ringbuffer_sp_emit_reloc_obj_64, + .emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring_64, .cmd_count = msm_ringbuffer_sp_cmd_count, .destroy = msm_ringbuffer_sp_destroy, }; @@ -579,9 +484,17 @@ msm_ringbuffer_sp_init(struct msm_ringbuffer_sp *msm_ring, uint32_t size, ring->flags = flags; if (flags & _FD_RINGBUFFER_OBJECT) { - ring->funcs = &ring_funcs_obj; + if (msm_ring->u.pipe->gpu_id >= 500) { + ring->funcs = &ring_funcs_obj_64; + } else { + ring->funcs = &ring_funcs_obj_32; + } } else { - ring->funcs = &ring_funcs_nonobj; + if (msm_ring->u.submit->pipe->gpu_id >= 500) { + ring->funcs = &ring_funcs_nonobj_64; + } else { + ring->funcs = &ring_funcs_nonobj_32; + } } // TODO initializing these could probably be conditional on flags diff --git a/src/freedreno/drm/msm_ringbuffer_sp.h b/src/freedreno/drm/msm_ringbuffer_sp.h new file mode 100644 index 00000000000..300c4cb55d8 --- /dev/null +++ b/src/freedreno/drm/msm_ringbuffer_sp.h @@ -0,0 +1,150 @@ +/* + * Copyright © 2021 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifdef X +# undef X +#endif + +#if PTRSZ == 32 +# define X(n) n ## _32 +#else +# define X(n) n ## _64 +#endif + + +static void +X(emit_reloc_tail)(struct fd_ringbuffer *ring, const struct fd_reloc *reloc) +{ + uint64_t iova = reloc->bo->iova + reloc->offset; + int shift = reloc->shift; + + if (shift < 0) + iova >>= -shift; + else + iova <<= shift; + + uint32_t dword = iova; + + (*ring->cur++) = dword | reloc->or; + +#if PTRSZ == 64 + dword = iova >> 32; + (*ring->cur++) = dword | reloc->orhi; +#endif +} + +static void +X(msm_ringbuffer_sp_emit_reloc_nonobj)(struct fd_ringbuffer *ring, + const struct fd_reloc *reloc) +{ + struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring); + + assert(!(ring->flags & _FD_RINGBUFFER_OBJECT)); + + struct msm_submit_sp *msm_submit = + to_msm_submit_sp(msm_ring->u.submit); + + msm_submit_append_bo(msm_submit, reloc->bo); + + X(emit_reloc_tail)(ring, reloc); +} + +static void +X(msm_ringbuffer_sp_emit_reloc_obj)(struct fd_ringbuffer *ring, + const struct fd_reloc *reloc) +{ + struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring); + + assert(ring->flags & _FD_RINGBUFFER_OBJECT); + + /* Avoid emitting duplicate BO references into the list. Ringbuffer + * objects are long-lived, so this saves ongoing work at draw time in + * exchange for a bit at context setup/first draw. And the number of + * relocs per ringbuffer object is fairly small, so the O(n^2) doesn't + * hurt much. + */ + bool found = false; + for (int i = 0; i < msm_ring->u.nr_reloc_bos; i++) { + if (msm_ring->u.reloc_bos[i] == reloc->bo) { + found = true; + break; + } + } + if (!found) { + APPEND(&msm_ring->u, reloc_bos, fd_bo_ref(reloc->bo)); + } + + X(emit_reloc_tail)(ring, reloc); +} + +static uint32_t +X(msm_ringbuffer_sp_emit_reloc_ring)(struct fd_ringbuffer *ring, + struct fd_ringbuffer *target, uint32_t cmd_idx) +{ + struct msm_ringbuffer_sp *msm_target = to_msm_ringbuffer_sp(target); + struct fd_bo *bo; + uint32_t size; + + if ((target->flags & FD_RINGBUFFER_GROWABLE) && + (cmd_idx < msm_target->u.nr_cmds)) { + bo = msm_target->u.cmds[cmd_idx].ring_bo; + size = msm_target->u.cmds[cmd_idx].size; + } else { + bo = msm_target->ring_bo; + size = offset_bytes(target->cur, target->start); + } + + if (ring->flags & _FD_RINGBUFFER_OBJECT) { + X(msm_ringbuffer_sp_emit_reloc_obj)(ring, &(struct fd_reloc){ + .bo = bo, + .offset = msm_target->offset, + }); + } else { + X(msm_ringbuffer_sp_emit_reloc_nonobj)(ring, &(struct fd_reloc){ + .bo = bo, + .offset = msm_target->offset, + }); + } + + if (!(target->flags & _FD_RINGBUFFER_OBJECT)) + return size; + + struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring); + + if (ring->flags & _FD_RINGBUFFER_OBJECT) { + for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) { + APPEND(&msm_ring->u, reloc_bos, fd_bo_ref(msm_target->u.reloc_bos[i])); + } + } else { + // TODO it would be nice to know whether we have already + // seen this target before. But hopefully we hit the + // append_bo() fast path enough for this to not matter: + struct msm_submit_sp *msm_submit = to_msm_submit_sp(msm_ring->u.submit); + + for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) { + msm_submit_append_bo(msm_submit, msm_target->u.reloc_bos[i]); + } + } + + return size; +}