freedreno/drm: Split 64b vs 32b paths

No need to 'if (gpu_id >= 500)' on every reloc

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9581>
This commit is contained in:
Rob Clark 2021-03-12 09:40:01 -08:00
parent 9168d9cbfb
commit 684586b96e
2 changed files with 188 additions and 125 deletions

View File

@ -389,123 +389,12 @@ msm_ringbuffer_sp_grow(struct fd_ringbuffer *ring, uint32_t size)
ring->size = size;
}
static void
emit_reloc_tail(struct fd_ringbuffer *ring, const struct fd_reloc *reloc,
struct fd_pipe *pipe)
{
uint64_t iova = reloc->bo->iova + reloc->offset;
int shift = reloc->shift;
if (shift < 0)
iova >>= -shift;
else
iova <<= shift;
uint32_t dword = iova;
(*ring->cur++) = dword | reloc->or;
if (pipe->gpu_id >= 500) {
dword = iova >> 32;
(*ring->cur++) = dword | reloc->orhi;
}
}
static void
msm_ringbuffer_sp_emit_reloc_nonobj(struct fd_ringbuffer *ring,
const struct fd_reloc *reloc)
{
struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
struct msm_submit_sp *msm_submit =
to_msm_submit_sp(msm_ring->u.submit);
msm_submit_append_bo(msm_submit, reloc->bo);
emit_reloc_tail(ring, reloc, msm_ring->u.submit->pipe);
}
static void
msm_ringbuffer_sp_emit_reloc_obj(struct fd_ringbuffer *ring,
const struct fd_reloc *reloc)
{
struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
assert(ring->flags & _FD_RINGBUFFER_OBJECT);
/* Avoid emitting duplicate BO references into the list. Ringbuffer
* objects are long-lived, so this saves ongoing work at draw time in
* exchange for a bit at context setup/first draw. And the number of
* relocs per ringbuffer object is fairly small, so the O(n^2) doesn't
* hurt much.
*/
bool found = false;
for (int i = 0; i < msm_ring->u.nr_reloc_bos; i++) {
if (msm_ring->u.reloc_bos[i] == reloc->bo) {
found = true;
break;
}
}
if (!found) {
APPEND(&msm_ring->u, reloc_bos, fd_bo_ref(reloc->bo));
}
emit_reloc_tail(ring, reloc, msm_ring->u.pipe);
}
static uint32_t
msm_ringbuffer_sp_emit_reloc_ring(struct fd_ringbuffer *ring,
struct fd_ringbuffer *target, uint32_t cmd_idx)
{
struct msm_ringbuffer_sp *msm_target = to_msm_ringbuffer_sp(target);
struct fd_bo *bo;
uint32_t size;
if ((target->flags & FD_RINGBUFFER_GROWABLE) &&
(cmd_idx < msm_target->u.nr_cmds)) {
bo = msm_target->u.cmds[cmd_idx].ring_bo;
size = msm_target->u.cmds[cmd_idx].size;
} else {
bo = msm_target->ring_bo;
size = offset_bytes(target->cur, target->start);
}
if (ring->flags & _FD_RINGBUFFER_OBJECT) {
msm_ringbuffer_sp_emit_reloc_obj(ring, &(struct fd_reloc){
.bo = bo,
.offset = msm_target->offset,
});
} else {
msm_ringbuffer_sp_emit_reloc_nonobj(ring, &(struct fd_reloc){
.bo = bo,
.offset = msm_target->offset,
});
}
if (!(target->flags & _FD_RINGBUFFER_OBJECT))
return size;
struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
if (ring->flags & _FD_RINGBUFFER_OBJECT) {
for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) {
APPEND(&msm_ring->u, reloc_bos, fd_bo_ref(msm_target->u.reloc_bos[i]));
}
} else {
// TODO it would be nice to know whether we have already
// seen this target before. But hopefully we hit the
// append_bo() fast path enough for this to not matter:
struct msm_submit_sp *msm_submit = to_msm_submit_sp(msm_ring->u.submit);
for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) {
msm_submit_append_bo(msm_submit, msm_target->u.reloc_bos[i]);
}
}
return size;
}
#define PTRSZ 64
#include "msm_ringbuffer_sp.h"
#undef PTRSZ
#define PTRSZ 32
#include "msm_ringbuffer_sp.h"
#undef PTRSZ
static uint32_t
msm_ringbuffer_sp_cmd_count(struct fd_ringbuffer *ring)
@ -541,18 +430,34 @@ msm_ringbuffer_sp_destroy(struct fd_ringbuffer *ring)
}
}
static const struct fd_ringbuffer_funcs ring_funcs_nonobj = {
static const struct fd_ringbuffer_funcs ring_funcs_nonobj_32 = {
.grow = msm_ringbuffer_sp_grow,
.emit_reloc = msm_ringbuffer_sp_emit_reloc_nonobj,
.emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring,
.emit_reloc = msm_ringbuffer_sp_emit_reloc_nonobj_32,
.emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring_32,
.cmd_count = msm_ringbuffer_sp_cmd_count,
.destroy = msm_ringbuffer_sp_destroy,
};
static const struct fd_ringbuffer_funcs ring_funcs_obj = {
static const struct fd_ringbuffer_funcs ring_funcs_obj_32 = {
.grow = msm_ringbuffer_sp_grow,
.emit_reloc = msm_ringbuffer_sp_emit_reloc_obj,
.emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring,
.emit_reloc = msm_ringbuffer_sp_emit_reloc_obj_32,
.emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring_32,
.cmd_count = msm_ringbuffer_sp_cmd_count,
.destroy = msm_ringbuffer_sp_destroy,
};
static const struct fd_ringbuffer_funcs ring_funcs_nonobj_64 = {
.grow = msm_ringbuffer_sp_grow,
.emit_reloc = msm_ringbuffer_sp_emit_reloc_nonobj_64,
.emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring_64,
.cmd_count = msm_ringbuffer_sp_cmd_count,
.destroy = msm_ringbuffer_sp_destroy,
};
static const struct fd_ringbuffer_funcs ring_funcs_obj_64 = {
.grow = msm_ringbuffer_sp_grow,
.emit_reloc = msm_ringbuffer_sp_emit_reloc_obj_64,
.emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring_64,
.cmd_count = msm_ringbuffer_sp_cmd_count,
.destroy = msm_ringbuffer_sp_destroy,
};
@ -579,9 +484,17 @@ msm_ringbuffer_sp_init(struct msm_ringbuffer_sp *msm_ring, uint32_t size,
ring->flags = flags;
if (flags & _FD_RINGBUFFER_OBJECT) {
ring->funcs = &ring_funcs_obj;
if (msm_ring->u.pipe->gpu_id >= 500) {
ring->funcs = &ring_funcs_obj_64;
} else {
ring->funcs = &ring_funcs_obj_32;
}
} else {
ring->funcs = &ring_funcs_nonobj;
if (msm_ring->u.submit->pipe->gpu_id >= 500) {
ring->funcs = &ring_funcs_nonobj_64;
} else {
ring->funcs = &ring_funcs_nonobj_32;
}
}
// TODO initializing these could probably be conditional on flags

View File

@ -0,0 +1,150 @@
/*
* Copyright © 2021 Google, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifdef X
# undef X
#endif
#if PTRSZ == 32
# define X(n) n ## _32
#else
# define X(n) n ## _64
#endif
static void
X(emit_reloc_tail)(struct fd_ringbuffer *ring, const struct fd_reloc *reloc)
{
uint64_t iova = reloc->bo->iova + reloc->offset;
int shift = reloc->shift;
if (shift < 0)
iova >>= -shift;
else
iova <<= shift;
uint32_t dword = iova;
(*ring->cur++) = dword | reloc->or;
#if PTRSZ == 64
dword = iova >> 32;
(*ring->cur++) = dword | reloc->orhi;
#endif
}
static void
X(msm_ringbuffer_sp_emit_reloc_nonobj)(struct fd_ringbuffer *ring,
const struct fd_reloc *reloc)
{
struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
struct msm_submit_sp *msm_submit =
to_msm_submit_sp(msm_ring->u.submit);
msm_submit_append_bo(msm_submit, reloc->bo);
X(emit_reloc_tail)(ring, reloc);
}
static void
X(msm_ringbuffer_sp_emit_reloc_obj)(struct fd_ringbuffer *ring,
const struct fd_reloc *reloc)
{
struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
assert(ring->flags & _FD_RINGBUFFER_OBJECT);
/* Avoid emitting duplicate BO references into the list. Ringbuffer
* objects are long-lived, so this saves ongoing work at draw time in
* exchange for a bit at context setup/first draw. And the number of
* relocs per ringbuffer object is fairly small, so the O(n^2) doesn't
* hurt much.
*/
bool found = false;
for (int i = 0; i < msm_ring->u.nr_reloc_bos; i++) {
if (msm_ring->u.reloc_bos[i] == reloc->bo) {
found = true;
break;
}
}
if (!found) {
APPEND(&msm_ring->u, reloc_bos, fd_bo_ref(reloc->bo));
}
X(emit_reloc_tail)(ring, reloc);
}
static uint32_t
X(msm_ringbuffer_sp_emit_reloc_ring)(struct fd_ringbuffer *ring,
struct fd_ringbuffer *target, uint32_t cmd_idx)
{
struct msm_ringbuffer_sp *msm_target = to_msm_ringbuffer_sp(target);
struct fd_bo *bo;
uint32_t size;
if ((target->flags & FD_RINGBUFFER_GROWABLE) &&
(cmd_idx < msm_target->u.nr_cmds)) {
bo = msm_target->u.cmds[cmd_idx].ring_bo;
size = msm_target->u.cmds[cmd_idx].size;
} else {
bo = msm_target->ring_bo;
size = offset_bytes(target->cur, target->start);
}
if (ring->flags & _FD_RINGBUFFER_OBJECT) {
X(msm_ringbuffer_sp_emit_reloc_obj)(ring, &(struct fd_reloc){
.bo = bo,
.offset = msm_target->offset,
});
} else {
X(msm_ringbuffer_sp_emit_reloc_nonobj)(ring, &(struct fd_reloc){
.bo = bo,
.offset = msm_target->offset,
});
}
if (!(target->flags & _FD_RINGBUFFER_OBJECT))
return size;
struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
if (ring->flags & _FD_RINGBUFFER_OBJECT) {
for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) {
APPEND(&msm_ring->u, reloc_bos, fd_bo_ref(msm_target->u.reloc_bos[i]));
}
} else {
// TODO it would be nice to know whether we have already
// seen this target before. But hopefully we hit the
// append_bo() fast path enough for this to not matter:
struct msm_submit_sp *msm_submit = to_msm_submit_sp(msm_ring->u.submit);
for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) {
msm_submit_append_bo(msm_submit, msm_target->u.reloc_bos[i]);
}
}
return size;
}