From bef10747006b784310692ad99b0bde6b50eb9450 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 17 May 2021 14:42:13 -0400 Subject: [PATCH] panfrost: Express dependencies as resources, not BOs This is a more ergnomic API, and will allow further optimizations. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/gallium/drivers/panfrost/pan_blend_cso.c | 7 +- src/gallium/drivers/panfrost/pan_blit.c | 21 +---- src/gallium/drivers/panfrost/pan_cmdstream.c | 96 ++++---------------- src/gallium/drivers/panfrost/pan_compute.c | 4 +- src/gallium/drivers/panfrost/pan_context.c | 19 ++-- src/gallium/drivers/panfrost/pan_job.c | 94 ++++++++++++------- src/gallium/drivers/panfrost/pan_job.h | 15 ++- 7 files changed, 105 insertions(+), 151 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_blend_cso.c b/src/gallium/drivers/panfrost/pan_blend_cso.c index 904c93c42f6..7d63b807fb3 100644 --- a/src/gallium/drivers/panfrost/pan_blend_cso.c +++ b/src/gallium/drivers/panfrost/pan_blend_cso.c @@ -187,11 +187,8 @@ panfrost_get_blend(struct panfrost_batch *batch, unsigned rti, struct panfrost_b /* Upload the shader, sharing a BO */ if (!(*bo)) { - *bo = panfrost_batch_create_bo(batch, 4096, - PAN_BO_EXECUTE, - PAN_BO_ACCESS_PRIVATE | - PAN_BO_ACCESS_READ | - PAN_BO_ACCESS_FRAGMENT, "Blend shader"); + *bo = panfrost_batch_create_bo(batch, 4096, PAN_BO_EXECUTE, + PIPE_SHADER_FRAGMENT, "Blend shader"); } struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT); diff --git a/src/gallium/drivers/panfrost/pan_blit.c b/src/gallium/drivers/panfrost/pan_blit.c index 89a9983b3a2..ff9d6548128 100644 --- a/src/gallium/drivers/panfrost/pan_blit.c +++ b/src/gallium/drivers/panfrost/pan_blit.c @@ -93,10 +93,10 @@ panfrost_blit_add_ctx_bos(struct panfrost_batch *batch, { util_dynarray_foreach(&blit_pool->bos, struct panfrost_bo *, bo) { panfrost_batch_add_bo(batch, *bo, - PAN_BO_ACCESS_SHARED | - PAN_BO_ACCESS_READ | - PAN_BO_ACCESS_VERTEX_TILER | - PAN_BO_ACCESS_FRAGMENT); + PIPE_SHADER_VERTEX); + + panfrost_batch_add_bo(batch, *bo, + PIPE_SHADER_FRAGMENT); } } @@ -270,18 +270,7 @@ panfrost_blit(struct pipe_context *pipe, pipe_surface_reference(&dst_surf, NULL); - panfrost_batch_add_bo(batch, pinfo.src.planes[0].image->data.bo, - PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ | - PAN_BO_ACCESS_FRAGMENT); - - if (pinfo.src.planes[1].image) { - panfrost_batch_add_bo(batch, - pinfo.src.planes[1].image->data.bo, - PAN_BO_ACCESS_SHARED | - PAN_BO_ACCESS_READ | - PAN_BO_ACCESS_FRAGMENT); - } - + panfrost_batch_read_rsrc(batch, psrc, PIPE_SHADER_FRAGMENT); panfrost_batch_add_fbo_bos(batch); panfrost_blit_add_ctx_bos(batch, &blit_pool); batch->draws = draw_flags; diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index c2978acc3de..eb64fbecd57 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -57,23 +57,6 @@ pan_pipe_asserts() PIPE_ASSERT(PIPE_FUNC_ALWAYS == MALI_FUNC_ALWAYS); } -/* If a BO is accessed for a particular shader stage, will it be in the primary - * batch (vertex/tiler) or the secondary batch (fragment)? Anything but - * fragment will be primary, e.g. compute jobs will be considered - * "vertex/tiler" by analogy */ - -static inline uint32_t -panfrost_bo_access_for_stage(enum pipe_shader_type stage) -{ - assert(stage == PIPE_SHADER_FRAGMENT || - stage == PIPE_SHADER_VERTEX || - stage == PIPE_SHADER_COMPUTE); - - return stage == PIPE_SHADER_FRAGMENT ? - PAN_BO_ACCESS_FRAGMENT : - PAN_BO_ACCESS_VERTEX_TILER; -} - /* Gets a GPU address for the associated index buffer. Only gauranteed to be * good for the duration of the draw (transient), could last longer. Also get * the bounds on the index buffer for the range accessed by the draw. We do @@ -100,10 +83,7 @@ panfrost_get_index_buffer_bounded(struct panfrost_batch *batch, if (!info->has_user_indices) { /* Only resources can be directly mapped */ - panfrost_batch_add_bo(batch, rsrc->image.data.bo, - PAN_BO_ACCESS_SHARED | - PAN_BO_ACCESS_READ | - PAN_BO_ACCESS_VERTEX_TILER); + panfrost_batch_read_rsrc(batch, rsrc, PIPE_SHADER_VERTEX); out = rsrc->image.data.bo->ptr.gpu + offset; /* Check the cache */ @@ -639,15 +619,8 @@ panfrost_emit_compute_shader_meta(struct panfrost_batch *batch, enum pipe_shader { struct panfrost_shader_state *ss = panfrost_get_shader_state(batch->ctx, stage); - panfrost_batch_add_bo(batch, ss->bin.bo, - PAN_BO_ACCESS_SHARED | - PAN_BO_ACCESS_READ | - PAN_BO_ACCESS_VERTEX_TILER); - - panfrost_batch_add_bo(batch, ss->state.bo, - PAN_BO_ACCESS_SHARED | - PAN_BO_ACCESS_READ | - PAN_BO_ACCESS_VERTEX_TILER); + panfrost_batch_add_bo(batch, ss->bin.bo, PIPE_SHADER_VERTEX); + panfrost_batch_add_bo(batch, ss->state.bo, PIPE_SHADER_VERTEX); return ss->state.gpu; } @@ -658,11 +631,7 @@ panfrost_emit_frag_shader_meta(struct panfrost_batch *batch) struct panfrost_context *ctx = batch->ctx; struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT); - /* Add the shader BO to the batch. */ - panfrost_batch_add_bo(batch, ss->bin.bo, - PAN_BO_ACCESS_SHARED | - PAN_BO_ACCESS_READ | - PAN_BO_ACCESS_FRAGMENT); + panfrost_batch_add_bo(batch, ss->bin.bo, PIPE_SHADER_FRAGMENT); struct panfrost_device *dev = pan_device(ctx->base.screen); unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1); @@ -772,10 +741,7 @@ panfrost_map_constant_buffer_gpu(struct panfrost_batch *batch, struct panfrost_resource *rsrc = pan_resource(cb->buffer); if (rsrc) { - panfrost_batch_add_bo(batch, rsrc->image.data.bo, - PAN_BO_ACCESS_SHARED | - PAN_BO_ACCESS_READ | - panfrost_bo_access_for_stage(st)); + panfrost_batch_read_rsrc(batch, rsrc, st); /* Alignment gauranteed by * PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT */ @@ -907,9 +873,7 @@ panfrost_upload_ssbo_sysval(struct panfrost_batch *batch, struct panfrost_resource *rsrc = pan_resource(sb.buffer); struct panfrost_bo *bo = rsrc->image.data.bo; - panfrost_batch_add_bo(batch, bo, - PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_RW | - panfrost_bo_access_for_stage(st)); + panfrost_batch_write_rsrc(batch, rsrc, st); util_range_add(&rsrc->base, &rsrc->valid_buffer_range, sb.buffer_offset, sb.buffer_size); @@ -1366,15 +1330,8 @@ panfrost_get_tex_desc(struct panfrost_batch *batch, struct pipe_sampler_view *pview = &view->base; struct panfrost_resource *rsrc = pan_resource(pview->texture); - /* Add the BO to the job so it's retained until the job is done. */ - - panfrost_batch_add_bo(batch, rsrc->image.data.bo, - PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ | - panfrost_bo_access_for_stage(st)); - - panfrost_batch_add_bo(batch, view->state.bo, - PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ | - panfrost_bo_access_for_stage(st)); + panfrost_batch_read_rsrc(batch, rsrc, st); + panfrost_batch_add_bo(batch, view->state.bo, st); return view->state.gpu; } @@ -1417,15 +1374,8 @@ panfrost_emit_texture_descriptors(struct panfrost_batch *batch, panfrost_update_sampler_view(view, &ctx->base); out[i] = view->bifrost_descriptor; - /* Add the BOs to the job so they are retained until the job is done. */ - - panfrost_batch_add_bo(batch, rsrc->image.data.bo, - PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ | - panfrost_bo_access_for_stage(stage)); - - panfrost_batch_add_bo(batch, view->state.bo, - PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ | - panfrost_bo_access_for_stage(stage)); + panfrost_batch_read_rsrc(batch, rsrc, stage); + panfrost_batch_add_bo(batch, view->state.bo, stage); } return T.gpu; @@ -1541,12 +1491,9 @@ emit_image_bufs(struct panfrost_batch *batch, enum pipe_shader_type shader, is_3d ? 0 : image->u.tex.first_layer, is_3d ? image->u.tex.first_layer : 0); - /* Add a dependency of the batch on the shader image buffer */ - uint32_t flags = PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_VERTEX_TILER; - if (image->shader_access & PIPE_IMAGE_ACCESS_READ) - flags |= PAN_BO_ACCESS_READ; if (image->shader_access & PIPE_IMAGE_ACCESS_WRITE) { - flags |= PAN_BO_ACCESS_WRITE; + panfrost_batch_write_rsrc(batch, rsrc, shader); + unsigned level = is_buffer ? 0 : image->u.tex.level; BITSET_SET(rsrc->valid.data, level); @@ -1554,8 +1501,9 @@ emit_image_bufs(struct panfrost_batch *batch, enum pipe_shader_type shader, util_range_add(&rsrc->base, &rsrc->valid_buffer_range, 0, rsrc->base.width0); } + } else { + panfrost_batch_read_rsrc(batch, rsrc, shader); } - panfrost_batch_add_bo(batch, rsrc->image.data.bo, flags); pan_pack(bufs + (i * 2), ATTRIBUTE_BUFFER, cfg) { cfg.type = pan_modifier_to_attr_type(rsrc->image.layout.modifier); @@ -1697,11 +1645,7 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch, if (!rsrc) continue; - /* Add a dependency of the batch on the vertex buffer */ - panfrost_batch_add_bo(batch, rsrc->image.data.bo, - PAN_BO_ACCESS_SHARED | - PAN_BO_ACCESS_READ | - PAN_BO_ACCESS_VERTEX_TILER); + panfrost_batch_read_rsrc(batch, rsrc, PIPE_SHADER_VERTEX); /* Mask off lower bits, see offset fixup below */ mali_ptr raw_addr = rsrc->image.data.bo->ptr.gpu + buf->buffer_offset; @@ -1906,14 +1850,8 @@ panfrost_emit_streamout(struct panfrost_batch *batch, struct panfrost_resource *rsrc = pan_resource(target->buffer); struct panfrost_bo *bo = rsrc->image.data.bo; - /* Varyings are WRITE from the perspective of the VERTEX but READ from - * the perspective of the TILER and FRAGMENT. - */ - panfrost_batch_add_bo(batch, bo, - PAN_BO_ACCESS_SHARED | - PAN_BO_ACCESS_RW | - PAN_BO_ACCESS_VERTEX_TILER | - PAN_BO_ACCESS_FRAGMENT); + panfrost_batch_write_rsrc(batch, rsrc, PIPE_SHADER_VERTEX); + panfrost_batch_read_rsrc(batch, rsrc, PIPE_SHADER_FRAGMENT); unsigned offset = panfrost_xfb_offset(stride, target); diff --git a/src/gallium/drivers/panfrost/pan_compute.c b/src/gallium/drivers/panfrost/pan_compute.c index 56e4efc273e..8a0d68f59e4 100644 --- a/src/gallium/drivers/panfrost/pan_compute.c +++ b/src/gallium/drivers/panfrost/pan_compute.c @@ -237,9 +237,7 @@ panfrost_set_global_binding(struct pipe_context *pctx, for (unsigned i = first; i < first + count; ++i) { struct panfrost_resource *rsrc = pan_resource(resources[i]); - - panfrost_batch_add_bo(batch, rsrc->image.data.bo, - PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_RW); + panfrost_batch_write_rsrc(batch, rsrc, PIPE_SHADER_COMPUTE); util_range_add(&rsrc->base, &rsrc->valid_buffer_range, 0, rsrc->base.width0); diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index 5dea4198785..3a82e3a59b5 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -448,9 +448,7 @@ panfrost_draw_emit_tiler(struct panfrost_batch *batch, cfg.occlusion_query = MALI_OCCLUSION_MODE_PREDICATE; cfg.occlusion = ctx->occlusion_query->bo->ptr.gpu; panfrost_batch_add_bo(ctx->batch, ctx->occlusion_query->bo, - PAN_BO_ACCESS_SHARED | - PAN_BO_ACCESS_RW | - PAN_BO_ACCESS_FRAGMENT); + PIPE_SHADER_FRAGMENT); } } @@ -610,11 +608,9 @@ panfrost_indirect_draw(struct panfrost_batch *batch, if (info->index_size) { assert(!info->has_user_indices); - index_buf = pan_resource(info->index.resource)->image.data.bo; - panfrost_batch_add_bo(batch, - index_buf, - PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ | - PAN_BO_ACCESS_VERTEX_TILER); + struct panfrost_resource *rsrc = pan_resource(info->index.resource); + index_buf = rsrc->image.data.bo; + panfrost_batch_read_rsrc(batch, rsrc, PIPE_SHADER_VERTEX); } mali_ptr varyings = 0, vs_vary = 0, fs_vary = 0, pos = 0, psiz = 0; @@ -662,8 +658,7 @@ panfrost_indirect_draw(struct panfrost_batch *batch, if (varyings) { panfrost_batch_add_bo(batch, dev->indirect_draw_shaders.varying_heap, - PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_RW | - PAN_BO_ACCESS_VERTEX_TILER); + PIPE_SHADER_VERTEX); } assert(indirect->buffer); @@ -675,9 +670,7 @@ panfrost_indirect_draw(struct panfrost_batch *batch, vs->info.attribute_count - util_bitcount(ctx->image_mask[PIPE_SHADER_VERTEX]); - panfrost_batch_add_bo(batch, draw_buf->image.data.bo, - PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ | - PAN_BO_ACCESS_VERTEX_TILER); + panfrost_batch_read_rsrc(batch, draw_buf, PIPE_SHADER_VERTEX); struct pan_indirect_draw_info draw_info = { .last_indirect_draw = batch->indirect_draw_job_id, diff --git a/src/gallium/drivers/panfrost/pan_job.c b/src/gallium/drivers/panfrost/pan_job.c index b26c922b599..7175505911b 100644 --- a/src/gallium/drivers/panfrost/pan_job.c +++ b/src/gallium/drivers/panfrost/pan_job.c @@ -388,9 +388,9 @@ panfrost_batch_update_bo_access(struct panfrost_batch *batch, access->last_is_write = writes; } -void -panfrost_batch_add_bo(struct panfrost_batch *batch, struct panfrost_bo *bo, - uint32_t flags) +static void +panfrost_batch_add_bo_old(struct panfrost_batch *batch, + struct panfrost_bo *bo, uint32_t flags) { if (!bo) return; @@ -403,10 +403,6 @@ panfrost_batch_add_bo(struct panfrost_batch *batch, struct panfrost_bo *bo, batch->first_bo = MIN2(batch->first_bo, bo->gem_handle); batch->last_bo = MAX2(batch->last_bo, bo->gem_handle); panfrost_bo_reference(bo); - } else { - /* All batches have to agree on the shared flag. */ - assert((old_flags & PAN_BO_ACCESS_SHARED) == - (flags & PAN_BO_ACCESS_SHARED)); } if (old_flags == flags) @@ -431,18 +427,57 @@ panfrost_batch_add_bo(struct panfrost_batch *batch, struct panfrost_bo *bo, panfrost_batch_update_bo_access(batch, bo, flags & PAN_BO_ACCESS_WRITE); } -static void -panfrost_batch_add_resource_bos(struct panfrost_batch *batch, - struct panfrost_resource *rsrc, - uint32_t flags) +static uint32_t +panfrost_access_for_stage(enum pipe_shader_type stage) { - panfrost_batch_add_bo(batch, rsrc->image.data.bo, flags); + return (stage == PIPE_SHADER_FRAGMENT) ? + PAN_BO_ACCESS_FRAGMENT : PAN_BO_ACCESS_VERTEX_TILER; +} + +void +panfrost_batch_add_bo(struct panfrost_batch *batch, + struct panfrost_bo *bo, enum pipe_shader_type stage) +{ + panfrost_batch_add_bo_old(batch, bo, PAN_BO_ACCESS_READ | + panfrost_access_for_stage(stage)); +} + +void +panfrost_batch_read_rsrc(struct panfrost_batch *batch, + struct panfrost_resource *rsrc, + enum pipe_shader_type stage) +{ + uint32_t access = + PAN_BO_ACCESS_SHARED | + PAN_BO_ACCESS_READ | + panfrost_access_for_stage(stage); + + panfrost_batch_add_bo_old(batch, rsrc->image.data.bo, access); if (rsrc->image.crc.bo) - panfrost_batch_add_bo(batch, rsrc->image.crc.bo, flags); + panfrost_batch_add_bo_old(batch, rsrc->image.crc.bo, access); if (rsrc->separate_stencil) - panfrost_batch_add_bo(batch, rsrc->separate_stencil->image.data.bo, flags); + panfrost_batch_add_bo_old(batch, rsrc->separate_stencil->image.data.bo, access); +} + +void +panfrost_batch_write_rsrc(struct panfrost_batch *batch, + struct panfrost_resource *rsrc, + enum pipe_shader_type stage) +{ + uint32_t access = + PAN_BO_ACCESS_SHARED | + PAN_BO_ACCESS_WRITE | + panfrost_access_for_stage(stage); + + panfrost_batch_add_bo_old(batch, rsrc->image.data.bo, access); + + if (rsrc->image.crc.bo) + panfrost_batch_add_bo_old(batch, rsrc->image.crc.bo, access); + + if (rsrc->separate_stencil) + panfrost_batch_add_bo_old(batch, rsrc->separate_stencil->image.data.bo, access); } /* Adds the BO backing surface to a batch if the surface is non-null */ @@ -450,14 +485,10 @@ panfrost_batch_add_resource_bos(struct panfrost_batch *batch, static void panfrost_batch_add_surface(struct panfrost_batch *batch, struct pipe_surface *surf) { - uint32_t flags = PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_WRITE | - PAN_BO_ACCESS_VERTEX_TILER | - PAN_BO_ACCESS_FRAGMENT; if (surf) { struct panfrost_resource *rsrc = pan_resource(surf->texture); - panfrost_batch_add_resource_bos(batch, rsrc, flags); + panfrost_batch_write_rsrc(batch, rsrc, PIPE_SHADER_FRAGMENT); } - } void @@ -471,14 +502,14 @@ panfrost_batch_add_fbo_bos(struct panfrost_batch *batch) struct panfrost_bo * panfrost_batch_create_bo(struct panfrost_batch *batch, size_t size, - uint32_t create_flags, uint32_t access_flags, + uint32_t create_flags, enum pipe_shader_type stage, const char *label) { struct panfrost_bo *bo; bo = panfrost_bo_create(pan_device(batch->ctx->base.screen), size, create_flags, label); - panfrost_batch_add_bo(batch, bo, access_flags); + panfrost_batch_add_bo(batch, bo, stage); /* panfrost_batch_add_bo() has retained a reference and * panfrost_bo_create() initialize the refcnt to 1, so let's @@ -516,11 +547,10 @@ panfrost_batch_get_polygon_list(struct panfrost_batch *batch) batch->tiler_ctx.midgard.polygon_list = panfrost_batch_create_bo(batch, size, init_polygon_list ? 0 : PAN_BO_INVISIBLE, - PAN_BO_ACCESS_PRIVATE | - PAN_BO_ACCESS_RW | - PAN_BO_ACCESS_VERTEX_TILER | - PAN_BO_ACCESS_FRAGMENT, "Polygon list"); - + PIPE_SHADER_VERTEX, + "Polygon list"); + panfrost_batch_add_bo(batch, batch->tiler_ctx.midgard.polygon_list, + PIPE_SHADER_FRAGMENT); if (init_polygon_list) { assert(batch->tiler_ctx.midgard.polygon_list->ptr.cpu); @@ -551,11 +581,11 @@ panfrost_batch_get_scratchpad(struct panfrost_batch *batch, } else { batch->scratchpad = panfrost_batch_create_bo(batch, size, PAN_BO_INVISIBLE, - PAN_BO_ACCESS_PRIVATE | - PAN_BO_ACCESS_RW | - PAN_BO_ACCESS_VERTEX_TILER | - PAN_BO_ACCESS_FRAGMENT, + PIPE_SHADER_VERTEX, "Thread local storage"); + + panfrost_batch_add_bo(batch, batch->scratchpad, + PIPE_SHADER_FRAGMENT); } return batch->scratchpad; @@ -571,9 +601,7 @@ panfrost_batch_get_shared_memory(struct panfrost_batch *batch, } else { batch->shared_memory = panfrost_batch_create_bo(batch, size, PAN_BO_INVISIBLE, - PAN_BO_ACCESS_PRIVATE | - PAN_BO_ACCESS_RW | - PAN_BO_ACCESS_VERTEX_TILER, + PIPE_SHADER_VERTEX, "Workgroup shared memory"); } diff --git a/src/gallium/drivers/panfrost/pan_job.h b/src/gallium/drivers/panfrost/pan_job.h index 397b6e75ea0..8871b1fa9b9 100644 --- a/src/gallium/drivers/panfrost/pan_job.h +++ b/src/gallium/drivers/panfrost/pan_job.h @@ -144,8 +144,19 @@ struct panfrost_batch * panfrost_get_fresh_batch_for_fbo(struct panfrost_context *ctx); void -panfrost_batch_add_bo(struct panfrost_batch *batch, struct panfrost_bo *bo, - uint32_t flags); +panfrost_batch_add_bo(struct panfrost_batch *batch, + struct panfrost_bo *bo, + enum pipe_shader_type stage); + +void +panfrost_batch_read_rsrc(struct panfrost_batch *batch, + struct panfrost_resource *rsrc, + enum pipe_shader_type stage); + +void +panfrost_batch_write_rsrc(struct panfrost_batch *batch, + struct panfrost_resource *rsrc, + enum pipe_shader_type stage); void panfrost_batch_add_fbo_bos(struct panfrost_batch *batch);