freedreno/cffdec: Use rb trees for tracking buffers
Gets rid of the arbitrary size limitation, and should make decoding faster with many buffers. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8838>
This commit is contained in:
parent
b43f40166c
commit
ccd7986f59
|
@ -30,8 +30,10 @@
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
#include "buffers.h"
|
#include "buffers.h"
|
||||||
|
#include "util/rb_tree.h"
|
||||||
|
|
||||||
struct buffer {
|
struct buffer {
|
||||||
|
struct rb_node node;
|
||||||
void *hostptr;
|
void *hostptr;
|
||||||
unsigned int len;
|
unsigned int len;
|
||||||
uint64_t gpuaddr;
|
uint64_t gpuaddr;
|
||||||
|
@ -46,13 +48,31 @@ struct buffer {
|
||||||
unsigned noffsets;
|
unsigned noffsets;
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct buffer buffers[512];
|
static struct rb_tree buffers;
|
||||||
static int nbuffers;
|
|
||||||
|
|
||||||
static int
|
static int buffer_insert_cmp(const struct rb_node *n1, const struct rb_node *n2)
|
||||||
buffer_contains_gpuaddr(struct buffer *buf, uint64_t gpuaddr, uint32_t len)
|
|
||||||
{
|
{
|
||||||
return (buf->gpuaddr <= gpuaddr) && (gpuaddr < (buf->gpuaddr + buf->len));
|
const struct buffer *buf1 = (const struct buffer *) n1;
|
||||||
|
const struct buffer *buf2 = (const struct buffer *) n2;
|
||||||
|
return buf1->gpuaddr - buf2->gpuaddr;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int buffer_search_cmp(const struct rb_node *node, const void *addrptr)
|
||||||
|
{
|
||||||
|
const struct buffer *buf = (const struct buffer *) node;
|
||||||
|
uint64_t gpuaddr = *(uint64_t *)addrptr;
|
||||||
|
if (buf->gpuaddr + buf->len <= gpuaddr)
|
||||||
|
return -1;
|
||||||
|
else if (buf->gpuaddr > gpuaddr)
|
||||||
|
return 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct buffer *get_buffer(uint64_t gpuaddr)
|
||||||
|
{
|
||||||
|
if (gpuaddr == 0)
|
||||||
|
return NULL;
|
||||||
|
return (struct buffer *) rb_tree_search(&buffers, &gpuaddr, buffer_search_cmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
|
@ -65,47 +85,41 @@ buffer_contains_hostptr(struct buffer *buf, void *hostptr)
|
||||||
uint64_t
|
uint64_t
|
||||||
gpuaddr(void *hostptr)
|
gpuaddr(void *hostptr)
|
||||||
{
|
{
|
||||||
int i;
|
rb_tree_foreach(struct buffer, buf, &buffers, node) {
|
||||||
for (i = 0; i < nbuffers; i++)
|
if (buffer_contains_hostptr(buf, hostptr))
|
||||||
if (buffer_contains_hostptr(&buffers[i], hostptr))
|
return buf->gpuaddr + (hostptr - buf->hostptr);
|
||||||
return buffers[i].gpuaddr + (hostptr - buffers[i].hostptr);
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t
|
uint64_t
|
||||||
gpubaseaddr(uint64_t gpuaddr)
|
gpubaseaddr(uint64_t gpuaddr)
|
||||||
{
|
{
|
||||||
int i;
|
struct buffer *buf = get_buffer(gpuaddr);
|
||||||
if (!gpuaddr)
|
if (buf)
|
||||||
|
return buf->gpuaddr;
|
||||||
|
else
|
||||||
return 0;
|
return 0;
|
||||||
for (i = 0; i < nbuffers; i++)
|
|
||||||
if (buffer_contains_gpuaddr(&buffers[i], gpuaddr, 0))
|
|
||||||
return buffers[i].gpuaddr;
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void *
|
void *
|
||||||
hostptr(uint64_t gpuaddr)
|
hostptr(uint64_t gpuaddr)
|
||||||
{
|
{
|
||||||
int i;
|
struct buffer *buf = get_buffer(gpuaddr);
|
||||||
if (!gpuaddr)
|
if (buf)
|
||||||
|
return buf->hostptr + (gpuaddr - buf->gpuaddr);
|
||||||
|
else
|
||||||
return 0;
|
return 0;
|
||||||
for (i = 0; i < nbuffers; i++)
|
|
||||||
if (buffer_contains_gpuaddr(&buffers[i], gpuaddr, 0))
|
|
||||||
return buffers[i].hostptr + (gpuaddr - buffers[i].gpuaddr);
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned
|
unsigned
|
||||||
hostlen(uint64_t gpuaddr)
|
hostlen(uint64_t gpuaddr)
|
||||||
{
|
{
|
||||||
int i;
|
struct buffer *buf = get_buffer(gpuaddr);
|
||||||
if (!gpuaddr)
|
if (buf)
|
||||||
|
return buf->len + buf->gpuaddr - gpuaddr;
|
||||||
|
else
|
||||||
return 0;
|
return 0;
|
||||||
for (i = 0; i < nbuffers; i++)
|
|
||||||
if (buffer_contains_gpuaddr(&buffers[i], gpuaddr, 0))
|
|
||||||
return buffers[i].len + buffers[i].gpuaddr - gpuaddr;
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
|
@ -114,49 +128,44 @@ has_dumped(uint64_t gpuaddr, unsigned enable_mask)
|
||||||
if (!gpuaddr)
|
if (!gpuaddr)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
for (int i = 0; i < nbuffers; i++) {
|
struct buffer *b = get_buffer(gpuaddr);
|
||||||
if (buffer_contains_gpuaddr(&buffers[i], gpuaddr, 0)) {
|
if (!b)
|
||||||
struct buffer *b = &buffers[i];
|
return false;
|
||||||
assert(gpuaddr >= b->gpuaddr);
|
|
||||||
unsigned offset = gpuaddr - b->gpuaddr;
|
|
||||||
|
|
||||||
unsigned n = 0;
|
assert(gpuaddr >= b->gpuaddr);
|
||||||
while (n < b->noffsets) {
|
unsigned offset = gpuaddr - b->gpuaddr;
|
||||||
if (offset == b->offsets[n].offset)
|
|
||||||
break;
|
|
||||||
n++;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* if needed, allocate a new offset entry: */
|
unsigned n = 0;
|
||||||
if (n == b->noffsets) {
|
while (n < b->noffsets) {
|
||||||
b->noffsets++;
|
if (offset == b->offsets[n].offset)
|
||||||
assert(b->noffsets < ARRAY_SIZE(b->offsets));
|
break;
|
||||||
b->offsets[n].dumped_mask = 0;
|
n++;
|
||||||
b->offsets[n].offset = offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((b->offsets[n].dumped_mask & enable_mask) == enable_mask)
|
|
||||||
return true;
|
|
||||||
|
|
||||||
b->offsets[n].dumped_mask |= enable_mask;
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* if needed, allocate a new offset entry: */
|
||||||
|
if (n == b->noffsets) {
|
||||||
|
b->noffsets++;
|
||||||
|
assert(b->noffsets < ARRAY_SIZE(b->offsets));
|
||||||
|
b->offsets[n].dumped_mask = 0;
|
||||||
|
b->offsets[n].offset = offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((b->offsets[n].dumped_mask & enable_mask) == enable_mask)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
b->offsets[n].dumped_mask |= enable_mask;
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
reset_buffers(void)
|
reset_buffers(void)
|
||||||
{
|
{
|
||||||
for (int i = 0; i < nbuffers; i++) {
|
rb_tree_foreach_safe(struct buffer, buf, &buffers, node) {
|
||||||
free(buffers[i].hostptr);
|
rb_tree_remove(&buffers, &buf->node);
|
||||||
buffers[i].hostptr = NULL;
|
free(buf->hostptr);
|
||||||
buffers[i].len = 0;
|
free(buf);
|
||||||
buffers[i].noffsets = 0;
|
|
||||||
}
|
}
|
||||||
nbuffers = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -166,26 +175,16 @@ reset_buffers(void)
|
||||||
void
|
void
|
||||||
add_buffer(uint64_t gpuaddr, unsigned int len, void *hostptr)
|
add_buffer(uint64_t gpuaddr, unsigned int len, void *hostptr)
|
||||||
{
|
{
|
||||||
int i;
|
struct buffer *buf = get_buffer(gpuaddr);
|
||||||
|
|
||||||
for (i = 0; i < nbuffers; i++) {
|
if (!buf) {
|
||||||
if (buffers[i].gpuaddr == gpuaddr)
|
buf = calloc(sizeof(struct buffer), 1);
|
||||||
break;
|
buf->gpuaddr = gpuaddr;
|
||||||
|
rb_tree_insert(&buffers, &buf->node, buffer_insert_cmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (i == nbuffers) {
|
assert(buf->gpuaddr == gpuaddr);
|
||||||
/* some traces, like test-perf, with some blob versions,
|
|
||||||
* seem to generate an unreasonable # of gpu buffers (a
|
|
||||||
* leak?), so just ignore them.
|
|
||||||
*/
|
|
||||||
if (nbuffers >= ARRAY_SIZE(buffers)) {
|
|
||||||
free(hostptr);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
nbuffers++;
|
|
||||||
}
|
|
||||||
|
|
||||||
buffers[i].hostptr = hostptr;
|
buf->hostptr = hostptr;
|
||||||
buffers[i].len = len;
|
buf->len = len;
|
||||||
buffers[i].gpuaddr = gpuaddr;
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -45,6 +45,7 @@ libfreedreno_cffdec = static_library(
|
||||||
libfreedreno_rnn,
|
libfreedreno_rnn,
|
||||||
libfreedreno_ir2, # for disasm_a2xx
|
libfreedreno_ir2, # for disasm_a2xx
|
||||||
libfreedreno_ir3, # for disasm_a3xx
|
libfreedreno_ir3, # for disasm_a3xx
|
||||||
|
_libmesa_util,
|
||||||
],
|
],
|
||||||
build_by_default: false,
|
build_by_default: false,
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in New Issue