panfrost: Allocate polygon lists on-demand

Rather than alloacting a huge (64MB) polygon list on context creation
and sharing it across framebuffers, we instead allocate polygon lists as
BOs (which consistently hit the cache) sized appropriately; for about a
month, we've known how to calculate the polygon list size so this has
only recently become possible.

The good news is we can render to truly massive framebuffers without
crashing and, more importantly, we eliminate the 64MB upfront overhead.
If a list that size isn't actually needed, it's not allocated.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
This commit is contained in:
Alyssa Rosenzweig 2019-08-02 19:18:48 +02:00 committed by Boris Brezillon
parent ed501c00cb
commit cd98d94516
6 changed files with 36 additions and 10 deletions

View File

@ -62,6 +62,7 @@ panfrost_emit_midg_tiler(
unsigned vertex_count)
{
struct midgard_tiler_descriptor t = {};
struct panfrost_job *batch = panfrost_get_job_for_fbo(ctx);
t.hierarchy_mask =
panfrost_choose_hierarchy_mask(width, height, vertex_count);
@ -77,10 +78,7 @@ panfrost_emit_midg_tiler(
/* Sanity check */
if (t.hierarchy_mask) {
assert(ctx->tiler_polygon_list.bo->size >= (header_size + body_size));
/* Specify allocated tiler structures */
t.polygon_list = ctx->tiler_polygon_list.bo->gpu;
t.polygon_list = panfrost_job_get_polygon_list(batch, header_size + body_size);
/* Allow the entire tiler heap */
t.heap_start = ctx->tiler_heap.bo->gpu;
@ -2532,7 +2530,6 @@ panfrost_destroy(struct pipe_context *pipe)
panfrost_drm_free_slab(screen, &panfrost->scratchpad);
panfrost_drm_free_slab(screen, &panfrost->shaders);
panfrost_drm_free_slab(screen, &panfrost->tiler_heap);
panfrost_drm_free_slab(screen, &panfrost->tiler_polygon_list);
panfrost_drm_free_slab(screen, &panfrost->tiler_dummy);
ralloc_free(pipe);
@ -2678,7 +2675,6 @@ panfrost_setup_hardware(struct panfrost_context *ctx)
panfrost_drm_allocate_slab(screen, &ctx->scratchpad, 64*4, false, 0, 0, 0);
panfrost_drm_allocate_slab(screen, &ctx->shaders, 4096, true, PAN_ALLOCATE_EXECUTE, 0, 0);
panfrost_drm_allocate_slab(screen, &ctx->tiler_heap, 4096, false, PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_GROWABLE, 1, 128);
panfrost_drm_allocate_slab(screen, &ctx->tiler_polygon_list, 128*128, false, PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_GROWABLE, 1, 128);
panfrost_drm_allocate_slab(screen, &ctx->tiler_dummy, 1, false, PAN_ALLOCATE_INVISIBLE, 0, 0);
}

View File

@ -111,7 +111,6 @@ struct panfrost_context {
struct panfrost_memory shaders;
struct panfrost_memory scratchpad;
struct panfrost_memory tiler_heap;
struct panfrost_memory tiler_polygon_list;
struct panfrost_memory tiler_dummy;
struct panfrost_memory depth_stencil_buffer;

View File

@ -288,7 +288,7 @@ panfrost_drm_submit_vs_fs_job(struct panfrost_context *ctx, bool has_draws, bool
panfrost_job_add_bo(job, ctx->shaders.bo);
panfrost_job_add_bo(job, ctx->scratchpad.bo);
panfrost_job_add_bo(job, ctx->tiler_heap.bo);
panfrost_job_add_bo(job, ctx->tiler_polygon_list.bo);
panfrost_job_add_bo(job, job->polygon_list);
if (job->first_job.gpu) {
ret = panfrost_drm_submit_job(ctx, job->first_job.gpu, 0);

View File

@ -72,6 +72,9 @@ panfrost_free_job(struct panfrost_context *ctx, struct panfrost_job *job)
BITSET_SET(screen->free_transient, *index);
}
/* Unreference the polygon list */
panfrost_bo_unreference(ctx->base.screen, job->polygon_list);
_mesa_hash_table_remove_key(ctx->jobs, &job->key);
if (ctx->job == job)
@ -160,6 +163,27 @@ panfrost_job_add_bo(struct panfrost_job *job, struct panfrost_bo *bo)
_mesa_set_add(job->bos, bo);
}
/* Returns the polygon list's GPU address if available, or otherwise allocates
* the polygon list. It's perfectly fast to use allocate/free BO directly,
* since we'll hit the BO cache and this is one-per-batch anyway. */
mali_ptr
panfrost_job_get_polygon_list(struct panfrost_job *batch, unsigned size)
{
if (batch->polygon_list) {
assert(batch->polygon_list->size >= size);
} else {
struct panfrost_screen *screen = pan_screen(batch->ctx->base.screen);
/* Create the BO as invisible, as there's no reason to map */
batch->polygon_list = panfrost_drm_create_bo(screen,
size, PAN_ALLOCATE_INVISIBLE);
}
return batch->polygon_list->gpu;
}
void
panfrost_flush_jobs_writing_resource(struct panfrost_context *panfrost,
struct pipe_resource *prsc)

View File

@ -112,6 +112,9 @@ struct panfrost_job {
/* Within the topmost transient BO, how much has been used? */
unsigned transient_offset;
/* Polygon list bound to the batch, or NULL if none bound yet */
struct panfrost_bo *polygon_list;
};
/* Functions for managing the above */
@ -150,6 +153,9 @@ void
panfrost_job_set_requirements(struct panfrost_context *ctx,
struct panfrost_job *job);
mali_ptr
panfrost_job_get_polygon_list(struct panfrost_job *batch, unsigned size);
void
panfrost_job_clear(struct panfrost_context *ctx,
struct panfrost_job *job,

View File

@ -303,10 +303,11 @@ panfrost_scoreboard_set_value(struct panfrost_job *batch)
if (!batch->last_tiler.gpu)
return;
/* Okay, we do. Let's generate it */
/* Okay, we do. Let's generate it. We'll need the job's polygon list
* regardless of size. */
struct panfrost_context *ctx = batch->ctx;
mali_ptr polygon_list = ctx->tiler_polygon_list.bo->gpu;
mali_ptr polygon_list = panfrost_job_get_polygon_list(batch, 0);
struct panfrost_transfer job =
panfrost_set_value_job(ctx, polygon_list);