panfrost: Eliminate reserve_* functions
We always want to reserve _something_, so reserve what we need at batch creation time and stop trying to re-reserve in a zillion places after. This has a neglible (<128 bytes per batch) increase in memory usage for compute-only workloads, but given the amount of simplication, that's a fair tradeoff. Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11074>
This commit is contained in:
parent
ae93796cc0
commit
478ae974a1
|
@ -298,7 +298,7 @@ panfrost_blit(struct pipe_context *pipe,
|
|||
mali_ptr tiler = pan_is_bifrost(dev) ?
|
||||
panfrost_batch_get_bifrost_tiler(batch, ~0) : 0;
|
||||
pan_blit(&bctx, &batch->pool, &batch->scoreboard,
|
||||
panfrost_batch_reserve_tls(batch, false), tiler);
|
||||
batch->tls.gpu, tiler);
|
||||
|
||||
/* We don't want this batch to interfere with subsequent draw
|
||||
* calls, but we want to keep it in the list of pending batches
|
||||
|
|
|
@ -103,11 +103,6 @@ panfrost_launch_grid(struct pipe_context *pipe,
|
|||
struct panfrost_device *dev = pan_device(pipe->screen);
|
||||
struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
|
||||
|
||||
/* Reserve a thread storage descriptor now (will be emitted at submit
|
||||
* time).
|
||||
*/
|
||||
panfrost_batch_reserve_tls(batch, true);
|
||||
|
||||
ctx->compute_grid = info;
|
||||
|
||||
struct panfrost_ptr t =
|
||||
|
|
|
@ -227,8 +227,7 @@ static void
|
|||
panfrost_draw_emit_vertex(struct panfrost_batch *batch,
|
||||
const struct pipe_draw_info *info,
|
||||
void *invocation_template,
|
||||
mali_ptr shared_mem, mali_ptr vs_vary,
|
||||
mali_ptr varyings,
|
||||
mali_ptr vs_vary, mali_ptr varyings,
|
||||
mali_ptr attribs, mali_ptr attrib_bufs,
|
||||
void *job)
|
||||
{
|
||||
|
@ -252,7 +251,7 @@ panfrost_draw_emit_vertex(struct panfrost_batch *batch,
|
|||
cfg.attribute_buffers = attrib_bufs;
|
||||
cfg.varyings = vs_vary;
|
||||
cfg.varying_buffers = vs_vary ? varyings : 0;
|
||||
cfg.thread_storage = shared_mem;
|
||||
cfg.thread_storage = batch->tls.gpu;
|
||||
pan_emit_draw_descs(batch, &cfg, PIPE_SHADER_VERTEX);
|
||||
}
|
||||
|
||||
|
@ -357,8 +356,7 @@ panfrost_draw_emit_tiler(struct panfrost_batch *batch,
|
|||
const struct pipe_draw_info *info,
|
||||
const struct pipe_draw_start_count_bias *draw,
|
||||
void *invocation_template,
|
||||
mali_ptr shared_mem, mali_ptr indices,
|
||||
mali_ptr fs_vary, mali_ptr varyings,
|
||||
mali_ptr indices, mali_ptr fs_vary, mali_ptr varyings,
|
||||
mali_ptr pos, mali_ptr psiz, void *job)
|
||||
{
|
||||
struct panfrost_context *ctx = batch->ctx;
|
||||
|
@ -438,7 +436,7 @@ panfrost_draw_emit_tiler(struct panfrost_batch *batch,
|
|||
cfg.viewport = batch->viewport;
|
||||
cfg.varyings = fs_vary;
|
||||
cfg.varying_buffers = fs_vary ? varyings : 0;
|
||||
cfg.thread_storage = shared_mem;
|
||||
cfg.thread_storage = batch->tls.gpu;
|
||||
|
||||
/* For all primitives but lines DRAW.flat_shading_vertex must
|
||||
* be set to 0 and the provoking vertex is selected with the
|
||||
|
@ -516,8 +514,6 @@ panfrost_direct_draw(struct panfrost_batch *batch,
|
|||
|
||||
unsigned vertex_count = ctx->vertex_count;
|
||||
|
||||
mali_ptr shared_mem = panfrost_batch_reserve_tls(batch, false);
|
||||
|
||||
unsigned min_index = 0, max_index = 0;
|
||||
mali_ptr indices = 0;
|
||||
|
||||
|
@ -571,9 +567,9 @@ panfrost_direct_draw(struct panfrost_batch *batch,
|
|||
attribs = panfrost_emit_vertex_data(batch, &attrib_bufs);
|
||||
|
||||
/* Fire off the draw itself */
|
||||
panfrost_draw_emit_vertex(batch, info, &invocation, shared_mem,
|
||||
panfrost_draw_emit_vertex(batch, info, &invocation,
|
||||
vs_vary, varyings, attribs, attrib_bufs, vertex.cpu);
|
||||
panfrost_draw_emit_tiler(batch, info, draw, &invocation, shared_mem, indices,
|
||||
panfrost_draw_emit_tiler(batch, info, draw, &invocation, indices,
|
||||
fs_vary, varyings, pos, psiz, tiler.cpu);
|
||||
panfrost_emit_vertex_tiler_jobs(batch, &vertex, &tiler);
|
||||
|
||||
|
@ -603,8 +599,6 @@ panfrost_indirect_draw(struct panfrost_batch *batch,
|
|||
ctx->drawid = drawid_offset;
|
||||
ctx->indirect_draw = true;
|
||||
|
||||
mali_ptr shared_mem = panfrost_batch_reserve_tls(batch, false);
|
||||
|
||||
struct panfrost_ptr tiler =
|
||||
panfrost_pool_alloc_aligned(&batch->pool,
|
||||
pan_is_bifrost(dev) ?
|
||||
|
@ -660,10 +654,9 @@ panfrost_indirect_draw(struct panfrost_batch *batch,
|
|||
static struct mali_invocation_packed invocation;
|
||||
|
||||
/* Fire off the draw itself */
|
||||
panfrost_draw_emit_vertex(batch, info, &invocation, shared_mem,
|
||||
vs_vary, varyings, attribs, attrib_bufs,
|
||||
vertex.cpu);
|
||||
panfrost_draw_emit_tiler(batch, info, draw, &invocation, shared_mem,
|
||||
panfrost_draw_emit_vertex(batch, info, &invocation, vs_vary, varyings,
|
||||
attribs, attrib_bufs, vertex.cpu);
|
||||
panfrost_draw_emit_tiler(batch, info, draw, &invocation,
|
||||
index_buf ? index_buf->ptr.gpu : 0,
|
||||
fs_vary, varyings, pos, psiz, tiler.cpu);
|
||||
|
||||
|
|
|
@ -94,6 +94,25 @@ panfrost_batch_init(struct panfrost_context *ctx,
|
|||
PAN_BO_INVISIBLE, 65536, "Varyings", false, true);
|
||||
|
||||
panfrost_batch_add_fbo_bos(batch);
|
||||
|
||||
/* Reserve the framebuffer and local storage descriptors */
|
||||
batch->framebuffer =
|
||||
(dev->quirks & MIDGARD_SFBD) ?
|
||||
panfrost_pool_alloc_desc(&batch->pool, SINGLE_TARGET_FRAMEBUFFER) :
|
||||
panfrost_pool_alloc_desc_aggregate(&batch->pool,
|
||||
PAN_DESC(MULTI_TARGET_FRAMEBUFFER),
|
||||
PAN_DESC(ZS_CRC_EXTENSION),
|
||||
PAN_DESC_ARRAY(MAX2(key->nr_cbufs, 1), RENDER_TARGET));
|
||||
|
||||
/* Add the MFBD tag now, other tags will be added at submit-time */
|
||||
if (!(dev->quirks & MIDGARD_SFBD))
|
||||
batch->framebuffer.gpu |= MALI_FBD_TAG_IS_MFBD;
|
||||
|
||||
/* On Midgard, the TLS is embedded in the FB descriptor */
|
||||
if (pan_is_bifrost(dev))
|
||||
batch->tls = panfrost_pool_alloc_desc(&batch->pool, LOCAL_STORAGE);
|
||||
else
|
||||
batch->tls = batch->framebuffer;
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -729,66 +748,6 @@ panfrost_batch_to_fb_info(const struct panfrost_batch *batch,
|
|||
}
|
||||
}
|
||||
|
||||
static mali_ptr
|
||||
panfrost_batch_reserve_framebuffer(struct panfrost_batch *batch)
|
||||
{
|
||||
struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
|
||||
|
||||
if (batch->framebuffer.gpu)
|
||||
return batch->framebuffer.gpu;
|
||||
|
||||
/* If we haven't, reserve space for a framebuffer descriptor */
|
||||
|
||||
struct pan_image_view rts[8];
|
||||
struct pan_image_view zs;
|
||||
struct pan_image_view s;
|
||||
struct pan_fb_info fb;
|
||||
|
||||
panfrost_batch_to_fb_info(batch, &fb, rts, &zs, &s, true);
|
||||
|
||||
unsigned zs_crc_count = pan_fbd_has_zs_crc_ext(dev, &fb) ? 1 : 0;
|
||||
unsigned rt_count = MAX2(fb.rt_count, 1);
|
||||
batch->framebuffer =
|
||||
(dev->quirks & MIDGARD_SFBD) ?
|
||||
panfrost_pool_alloc_desc(&batch->pool, SINGLE_TARGET_FRAMEBUFFER) :
|
||||
panfrost_pool_alloc_desc_aggregate(&batch->pool,
|
||||
PAN_DESC(MULTI_TARGET_FRAMEBUFFER),
|
||||
PAN_DESC_ARRAY(zs_crc_count, ZS_CRC_EXTENSION),
|
||||
PAN_DESC_ARRAY(rt_count, RENDER_TARGET));
|
||||
|
||||
/* Add the MFBD tag now, other tags will be added when emitting the
|
||||
* FB desc.
|
||||
*/
|
||||
if (!(dev->quirks & MIDGARD_SFBD))
|
||||
batch->framebuffer.gpu |= MALI_FBD_TAG_IS_MFBD;
|
||||
|
||||
return batch->framebuffer.gpu;
|
||||
}
|
||||
|
||||
mali_ptr
|
||||
panfrost_batch_reserve_tls(struct panfrost_batch *batch, bool compute)
|
||||
{
|
||||
struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
|
||||
|
||||
/* If we haven't, reserve space for the thread storage descriptor */
|
||||
|
||||
if (batch->tls.gpu)
|
||||
return batch->tls.gpu;
|
||||
|
||||
if (pan_is_bifrost(dev) || compute) {
|
||||
batch->tls = panfrost_pool_alloc_desc(&batch->pool, LOCAL_STORAGE);
|
||||
} else {
|
||||
/* On Midgard, the FB descriptor contains a thread storage
|
||||
* descriptor, and tiler jobs need more than thread storage
|
||||
* info. Let's point to the FB desc in that case.
|
||||
*/
|
||||
panfrost_batch_reserve_framebuffer(batch);
|
||||
batch->tls = batch->framebuffer;
|
||||
}
|
||||
|
||||
return batch->tls.gpu;
|
||||
}
|
||||
|
||||
static void
|
||||
panfrost_batch_draw_wallpaper(struct panfrost_batch *batch,
|
||||
struct pan_fb_info *fb)
|
||||
|
@ -962,15 +921,11 @@ panfrost_batch_submit(struct panfrost_batch *batch,
|
|||
if (!batch->scoreboard.first_job && !batch->clear)
|
||||
goto out;
|
||||
|
||||
if (batch->scoreboard.first_tiler || batch->clear)
|
||||
panfrost_batch_reserve_framebuffer(batch);
|
||||
|
||||
struct pan_fb_info fb;
|
||||
struct pan_image_view rts[8], zs, s;
|
||||
|
||||
panfrost_batch_to_fb_info(batch, &fb, rts, &zs, &s, false);
|
||||
|
||||
panfrost_batch_reserve_tls(batch, false);
|
||||
panfrost_batch_draw_wallpaper(batch, &fb);
|
||||
|
||||
|
||||
|
@ -981,13 +936,12 @@ panfrost_batch_submit(struct panfrost_batch *batch,
|
|||
}
|
||||
|
||||
/* Now that all draws are in, we can finally prepare the
|
||||
* FBD for the batch */
|
||||
* FBD for the batch (if there is one). */
|
||||
|
||||
panfrost_emit_tls(batch);
|
||||
|
||||
panfrost_emit_tile_map(batch, &fb);
|
||||
|
||||
if (batch->framebuffer.gpu)
|
||||
if (batch->scoreboard.first_tiler || batch->clear)
|
||||
panfrost_emit_fbd(batch, &fb);
|
||||
|
||||
ret = panfrost_batch_submit_jobs(batch, &fb, in_sync, out_sync);
|
||||
|
|
|
@ -189,7 +189,4 @@ panfrost_batch_intersection_scissor(struct panfrost_batch *batch,
|
|||
mali_ptr
|
||||
panfrost_batch_get_bifrost_tiler(struct panfrost_batch *batch, unsigned vertex_count);
|
||||
|
||||
mali_ptr
|
||||
panfrost_batch_reserve_tls(struct panfrost_batch *batch, bool compute);
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue