winsys/amdgpu,radeonsi: add HUD counters for how much memory is wasted by slabs

Slabs always allocate the next power of two size from their pools. This
wastes memory if the size is not a power of two.

bo->base.size is overwritten because the default is the allocated power of
two size, but we need the real size to compute the wasted size in
amdgpu_bo_slab_destroy. entry_size is added to the hole in pb_slab_entry
to hold the real entry size.

Like other memory stats, no atomics are used.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8683>
This commit is contained in:
Marek Olšák 2021-01-23 17:21:44 -05:00 committed by Marge Bot
parent 7e47fe9a94
commit 965c6445ad
9 changed files with 47 additions and 1 deletions

View File

@ -62,6 +62,7 @@ struct pb_slab_entry
struct list_head head;
struct pb_slab *slab; /* the slab that contains this buffer */
unsigned group_index; /* index into pb_slabs::groups */
unsigned entry_size;
};
/* Descriptor of a slab from which many entries are carved out.

View File

@ -119,6 +119,8 @@ enum radeon_value_id
RADEON_REQUESTED_GTT_MEMORY,
RADEON_MAPPED_VRAM,
RADEON_MAPPED_GTT,
RADEON_SLAB_WASTED_VRAM,
RADEON_SLAB_WASTED_GTT,
RADEON_BUFFER_WAIT_TIME_NS,
RADEON_NUM_MAPPED_BUFFERS,
RADEON_TIMESTAMP,

View File

@ -77,6 +77,10 @@ static enum radeon_value_id winsys_id_from_type(unsigned type)
return RADEON_MAPPED_VRAM;
case SI_QUERY_MAPPED_GTT:
return RADEON_MAPPED_GTT;
case SI_QUERY_SLAB_WASTED_VRAM:
return RADEON_SLAB_WASTED_VRAM;
case SI_QUERY_SLAB_WASTED_GTT:
return RADEON_SLAB_WASTED_GTT;
case SI_QUERY_BUFFER_WAIT_TIME:
return RADEON_BUFFER_WAIT_TIME_NS;
case SI_QUERY_NUM_MAPPED_BUFFERS:
@ -173,6 +177,8 @@ static bool si_query_sw_begin(struct si_context *sctx, struct si_query *squery)
case SI_QUERY_REQUESTED_GTT:
case SI_QUERY_MAPPED_VRAM:
case SI_QUERY_MAPPED_GTT:
case SI_QUERY_SLAB_WASTED_VRAM:
case SI_QUERY_SLAB_WASTED_GTT:
case SI_QUERY_VRAM_USAGE:
case SI_QUERY_VRAM_VIS_USAGE:
case SI_QUERY_GTT_USAGE:
@ -339,6 +345,8 @@ static bool si_query_sw_end(struct si_context *sctx, struct si_query *squery)
case SI_QUERY_REQUESTED_GTT:
case SI_QUERY_MAPPED_VRAM:
case SI_QUERY_MAPPED_GTT:
case SI_QUERY_SLAB_WASTED_VRAM:
case SI_QUERY_SLAB_WASTED_GTT:
case SI_QUERY_VRAM_USAGE:
case SI_QUERY_VRAM_VIS_USAGE:
case SI_QUERY_GTT_USAGE:
@ -1691,6 +1699,8 @@ static struct pipe_driver_query_info si_driver_query_list[] = {
X("requested-GTT", REQUESTED_GTT, BYTES, AVERAGE),
X("mapped-VRAM", MAPPED_VRAM, BYTES, AVERAGE),
X("mapped-GTT", MAPPED_GTT, BYTES, AVERAGE),
X("slab-wasted-VRAM", SLAB_WASTED_VRAM, BYTES, AVERAGE),
X("slab-wasted-GTT", SLAB_WASTED_GTT, BYTES, AVERAGE),
X("buffer-wait-time", BUFFER_WAIT_TIME, MICROSECONDS, CUMULATIVE),
X("num-mapped-buffers", NUM_MAPPED_BUFFERS, UINT64, AVERAGE),
X("num-GFX-IBs", NUM_GFX_IBS, UINT64, AVERAGE),
@ -1804,11 +1814,13 @@ static int si_get_driver_query_info(struct pipe_screen *screen, unsigned index,
case SI_QUERY_REQUESTED_VRAM:
case SI_QUERY_VRAM_USAGE:
case SI_QUERY_MAPPED_VRAM:
case SI_QUERY_SLAB_WASTED_VRAM:
info->max_value.u64 = sscreen->info.vram_size;
break;
case SI_QUERY_REQUESTED_GTT:
case SI_QUERY_GTT_USAGE:
case SI_QUERY_MAPPED_GTT:
case SI_QUERY_SLAB_WASTED_GTT:
info->max_value.u64 = sscreen->info.gart_size;
break;
case SI_QUERY_GPU_TEMPERATURE:

View File

@ -64,6 +64,8 @@ enum
SI_QUERY_REQUESTED_GTT,
SI_QUERY_MAPPED_VRAM,
SI_QUERY_MAPPED_GTT,
SI_QUERY_SLAB_WASTED_VRAM,
SI_QUERY_SLAB_WASTED_GTT,
SI_QUERY_BUFFER_WAIT_TIME,
SI_QUERY_NUM_MAPPED_BUFFERS,
SI_QUERY_NUM_GFX_IBS,

View File

@ -647,6 +647,15 @@ static struct pb_slabs *get_slabs(struct amdgpu_winsys *ws, uint64_t size,
return NULL;
}
static unsigned get_slab_wasted_size(struct amdgpu_winsys_bo *bo)
{
assert(bo->base.size <= bo->u.slab.entry.entry_size);
assert(bo->base.size < bo->base.alignment ||
bo->base.size < 1 << bo->ws->bo_slabs[0].min_order ||
bo->base.size > bo->u.slab.entry.entry_size / 2);
return bo->u.slab.entry.entry_size - bo->base.size;
}
static void amdgpu_bo_slab_destroy(struct pb_buffer *_buf)
{
struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
@ -661,6 +670,11 @@ static void amdgpu_bo_slab_destroy(struct pb_buffer *_buf)
pb_slab_free(get_slabs(bo->ws,
bo->base.size,
0), &bo->u.slab.entry);
if (bo->base.placement & RADEON_DOMAIN_VRAM)
bo->ws->slab_wasted_vram -= get_slab_wasted_size(bo);
else
bo->ws->slab_wasted_gtt -= get_slab_wasted_size(bo);
}
static const struct pb_vtbl amdgpu_winsys_bo_slab_vtbl = {
@ -737,6 +751,7 @@ static struct pb_slab *amdgpu_bo_slab_alloc(void *priv, unsigned heap,
bo->unique_id = base_id + i;
bo->u.slab.entry.slab = &slab->base;
bo->u.slab.entry.group_index = group_index;
bo->u.slab.entry.entry_size = entry_size;
if (slab->buffer->bo) {
/* The slab is not suballocated. */
@ -1331,8 +1346,13 @@ amdgpu_bo_create(struct amdgpu_winsys *ws,
return NULL;
bo = container_of(entry, struct amdgpu_winsys_bo, u.slab.entry);
pipe_reference_init(&bo->base.reference, 1);
bo->base.size = size;
if (domain & RADEON_DOMAIN_VRAM)
ws->slab_wasted_vram += get_slab_wasted_size(bo);
else
ws->slab_wasted_gtt += get_slab_wasted_size(bo);
return &bo->base;
}

View File

@ -223,6 +223,10 @@ static uint64_t amdgpu_query_value(struct radeon_winsys *rws,
return ws->mapped_vram;
case RADEON_MAPPED_GTT:
return ws->mapped_gtt;
case RADEON_SLAB_WASTED_VRAM:
return ws->slab_wasted_vram;
case RADEON_SLAB_WASTED_GTT:
return ws->slab_wasted_gtt;
case RADEON_BUFFER_WAIT_TIME_NS:
return ws->buffer_wait_time;
case RADEON_NUM_MAPPED_BUFFERS:

View File

@ -66,6 +66,8 @@ struct amdgpu_winsys {
uint64_t allocated_gtt;
uint64_t mapped_vram;
uint64_t mapped_gtt;
uint64_t slab_wasted_vram;
uint64_t slab_wasted_gtt;
uint64_t buffer_wait_time; /* time spent in buffer_wait in ns */
uint64_t num_gfx_IBs;
uint64_t num_sdma_IBs;

View File

@ -812,6 +812,7 @@ struct pb_slab *radeon_bo_slab_alloc(void *priv, unsigned heap,
bo->hash = base_hash + i;
bo->u.slab.entry.slab = &slab->base;
bo->u.slab.entry.group_index = group_index;
bo->u.slab.entry.entry_size = entry_size;
bo->u.slab.real = slab->buffer;
list_addtail(&bo->u.slab.entry.head, &slab->base.free);

View File

@ -730,6 +730,8 @@ static uint64_t radeon_query_value(struct radeon_winsys *rws,
case RADEON_VRAM_VIS_USAGE:
case RADEON_GFX_BO_LIST_COUNTER:
case RADEON_GFX_IB_SIZE_COUNTER:
case RADEON_SLAB_WASTED_VRAM:
case RADEON_SLAB_WASTED_GTT:
return 0; /* unimplemented */
case RADEON_VRAM_USAGE:
radeon_get_drm_value(ws->fd, RADEON_INFO_VRAM_USAGE,