diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index 5a872ae1b43..ddeb4f5f164 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -57,21 +57,25 @@ panfrost_vt_emit_shared_memory(struct panfrost_batch *batch) { struct panfrost_device *dev = pan_device(batch->ctx->base.screen); - struct mali_shared_memory shared = { - .shared_workgroup_count = ~0, - }; + struct panfrost_transfer t = + panfrost_pool_alloc_aligned(&batch->pool, + MALI_LOCAL_STORAGE_LENGTH, + 64); - if (batch->stack_size) { - struct panfrost_bo *stack = - panfrost_batch_get_scratchpad(batch, batch->stack_size, - dev->thread_tls_alloc, - dev->core_count); + pan_pack(t.cpu, LOCAL_STORAGE, ls) { + ls.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM; + if (batch->stack_size) { + struct panfrost_bo *stack = + panfrost_batch_get_scratchpad(batch, batch->stack_size, + dev->thread_tls_alloc, + dev->core_count); - shared.stack_shift = panfrost_get_stack_shift(batch->stack_size); - shared.scratchpad = stack->gpu; + ls.tls_size = panfrost_get_stack_shift(batch->stack_size); + ls.tls_base_pointer = stack->gpu; + } } - return panfrost_pool_upload_aligned(&batch->pool, &shared, sizeof(shared), 64); + return t.gpu; } void @@ -950,15 +954,18 @@ panfrost_emit_shared_memory(struct panfrost_batch *batch, struct panfrost_bo *bo = panfrost_batch_get_shared_memory(batch, shared_size, 1); + struct panfrost_transfer t = + panfrost_pool_alloc_aligned(&batch->pool, + MALI_LOCAL_STORAGE_LENGTH, + 64); - struct mali_shared_memory shared = { - .shared_memory = bo->gpu, - .shared_workgroup_count = log2_instances, - .shared_shift = util_logbase2(single_size) + 1 + pan_pack(t.cpu, LOCAL_STORAGE, ls) { + ls.wls_base_pointer = bo->gpu; + ls.wls_instances = log2_instances; + ls.wls_size_scale = util_logbase2(single_size) + 1; }; - return panfrost_pool_upload_aligned(&batch->pool, &shared, - sizeof(shared), 64); + return t.gpu; } static mali_ptr diff --git a/src/gallium/drivers/panfrost/pan_mfbd.c b/src/gallium/drivers/panfrost/pan_mfbd.c index deccd3a4239..fad3f029cd0 100644 --- a/src/gallium/drivers/panfrost/pan_mfbd.c +++ b/src/gallium/drivers/panfrost/pan_mfbd.c @@ -498,18 +498,24 @@ panfrost_emit_mfbd(struct panfrost_batch *batch, unsigned vertex_count) mfbd.msaa.sample_locations = panfrost_emit_sample_locations(batch); mfbd.tiler_meta = panfrost_batch_get_tiler_meta(batch, vertex_count); } else { - if (batch->stack_size) { - unsigned shift = panfrost_get_stack_shift(batch->stack_size); - struct panfrost_bo *bo = panfrost_batch_get_scratchpad(batch, - batch->stack_size, - dev->thread_tls_alloc, - dev->core_count); - mfbd.shared_memory.stack_shift = shift; - mfbd.shared_memory.scratchpad = bo->gpu; + struct mali_local_storage_packed lsp; + + pan_pack(&lsp, LOCAL_STORAGE, ls) { + if (batch->stack_size) { + unsigned shift = + panfrost_get_stack_shift(batch->stack_size); + struct panfrost_bo *bo = + panfrost_batch_get_scratchpad(batch, + batch->stack_size, + dev->thread_tls_alloc, + dev->core_count); + ls.tls_size = shift; + ls.tls_base_pointer = bo->gpu; + } + + ls.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM; } - - mfbd.shared_memory.shared_workgroup_count = ~0; - + mfbd.shared_memory = lsp; mfbd.tiler = panfrost_emit_midg_tiler(batch, vertex_count); } diff --git a/src/gallium/drivers/panfrost/pan_sfbd.c b/src/gallium/drivers/panfrost/pan_sfbd.c index d9173baace1..ce6874036a2 100644 --- a/src/gallium/drivers/panfrost/pan_sfbd.c +++ b/src/gallium/drivers/panfrost/pan_sfbd.c @@ -207,11 +207,6 @@ panfrost_emit_sfbd(struct panfrost_batch *batch, unsigned vertex_count) struct mali_single_framebuffer framebuffer = { .width = MALI_POSITIVE(width), .height = MALI_POSITIVE(height), - .shared_memory = { - .stack_shift = shift, - .shared_workgroup_count = ~0, - .scratchpad = panfrost_batch_get_scratchpad(batch, shift, dev->thread_tls_alloc, dev->core_count)->gpu, - }, .format = { .unk3 = 0x3, }, @@ -219,6 +214,18 @@ panfrost_emit_sfbd(struct panfrost_batch *batch, unsigned vertex_count) .tiler = panfrost_emit_midg_tiler(batch, vertex_count), }; + struct mali_local_storage_packed lsp; + pan_pack(&lsp, LOCAL_STORAGE, ls) { + ls.tls_size = shift; + ls.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM; + ls.tls_base_pointer = + panfrost_batch_get_scratchpad(batch, + shift, + dev->thread_tls_alloc, + dev->core_count)->gpu; + } + framebuffer.shared_memory = lsp; + return framebuffer; } diff --git a/src/panfrost/bifrost/test/bi_submit.c b/src/panfrost/bifrost/test/bi_submit.c index c953e87e343..408171f3cd0 100644 --- a/src/panfrost/bifrost/test/bi_submit.c +++ b/src/panfrost/bifrost/test/bi_submit.c @@ -169,11 +169,10 @@ bit_vertex(struct panfrost_device *dev, panfrost_program prog, memcpy(attr->cpu + 1024, iattr, sz_attr); struct panfrost_bo *shmem = bit_bo_create(dev, 4096); - struct mali_shared_memory shmemp = { - .shared_workgroup_count = 0x1f, - }; - memcpy(shmem->cpu, &shmemp, sizeof(shmemp)); + pan_pack(shmem->cpu, LOCAL_STORAGE, cfg) { + cfg.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM; + } pan_pack(shader_desc->cpu, STATE, cfg) { cfg.shader.shader = shader->gpu; diff --git a/src/panfrost/include/panfrost-job.h b/src/panfrost/include/panfrost-job.h index 634f62b47ad..5be625335ab 100644 --- a/src/panfrost/include/panfrost-job.h +++ b/src/panfrost/include/panfrost-job.h @@ -630,37 +630,6 @@ struct mali_sfbd_format { unsigned unk3 : 4; }; -/* Shared structure at the start of framebuffer descriptors, or used bare for - * compute jobs, configuring stack and shared memory */ - -struct mali_shared_memory { - u32 stack_shift : 4; - u32 unk0 : 28; - - /* Configuration for shared memory for compute shaders. - * shared_workgroup_count is logarithmic and may be computed for a - * compute shader using shared memory as: - * - * shared_workgroup_count = MAX2(ceil(log2(count_x)) + ... + ceil(log2(count_z), 10) - * - * For compute shaders that don't use shared memory, or non-compute - * shaders, this is set to ~0 - */ - - u32 shared_workgroup_count : 5; - u32 shared_unk1 : 3; - u32 shared_shift : 4; - u32 shared_zero : 20; - - mali_ptr scratchpad; - - /* For compute shaders, the RAM backing of workgroup-shared memory. For - * fragment shaders on Bifrost, apparently multisampling locations */ - - mali_ptr shared_memory; - mali_ptr unknown1; -} __attribute__((packed)); - /* Configures multisampling on Bifrost fragment jobs */ struct bifrost_multisampling { @@ -671,7 +640,7 @@ struct bifrost_multisampling { } __attribute__((packed)); struct mali_single_framebuffer { - struct mali_shared_memory shared_memory; + struct mali_local_storage_packed shared_memory; struct mali_sfbd_format format; u32 clear_flags; @@ -876,7 +845,7 @@ struct mali_framebuffer_extra { struct mali_framebuffer { union { - struct mali_shared_memory shared_memory; + struct mali_local_storage_packed shared_memory; struct bifrost_multisampling msaa; }; diff --git a/src/panfrost/lib/decode.c b/src/panfrost/lib/decode.c index 12f9f8eb9b2..463a779f902 100644 --- a/src/panfrost/lib/decode.c +++ b/src/panfrost/lib/decode.c @@ -446,41 +446,6 @@ pandecode_sfbd_format(struct mali_sfbd_format format) pandecode_log("},\n"); } -static void -pandecode_shared_memory(const struct mali_shared_memory *desc, bool is_compute) -{ - pandecode_prop("stack_shift = 0x%x", desc->stack_shift); - - if (desc->unk0) - pandecode_prop("unk0 = 0x%x", desc->unk0); - - if (desc->shared_workgroup_count != 0x1F) { - pandecode_prop("shared_workgroup_count = %d", desc->shared_workgroup_count); - if (!is_compute) - pandecode_msg("XXX: wrong workgroup count for noncompute\n"); - } - - if (desc->shared_unk1 || desc->shared_shift) { - pandecode_prop("shared_unk1 = %X", desc->shared_unk1); - pandecode_prop("shared_shift = %X", desc->shared_shift); - - if (!is_compute) - pandecode_msg("XXX: shared memory configured in noncompute shader"); - } - - if (desc->shared_zero) { - pandecode_msg("XXX: shared memory zero tripped\n"); - pandecode_prop("shared_zero = 0x%" PRIx32, desc->shared_zero); - } - - if (desc->shared_memory && !is_compute) - pandecode_msg("XXX: shared memory used in noncompute shader\n"); - - MEMORY_PROP(desc, scratchpad); - MEMORY_PROP(desc, shared_memory); - MEMORY_PROP(desc, unknown1); -} - static struct pandecode_fbd pandecode_sfbd(uint64_t gpu_va, int job_no, bool is_fragment, unsigned gpu_id) { @@ -494,13 +459,7 @@ pandecode_sfbd(uint64_t gpu_va, int job_no, bool is_fragment, unsigned gpu_id) pandecode_log("struct mali_single_framebuffer framebuffer_%"PRIx64"_%d = {\n", gpu_va, job_no); pandecode_indent++; - - pandecode_log(".shared_memory = {\n"); - pandecode_indent++; - pandecode_shared_memory(&s->shared_memory, false); - pandecode_indent--; - pandecode_log("},\n"); - + DUMP_CL(LOCAL_STORAGE, &s->shared_memory, "Local Storage:\n"); pandecode_sfbd_format(s->format); info.width = s->width + 1; @@ -599,13 +558,8 @@ static void pandecode_compute_fbd(uint64_t gpu_va, int job_no) { struct pandecode_mapped_memory *mem = pandecode_find_mapped_gpu_mem_containing(gpu_va); - const struct mali_shared_memory *PANDECODE_PTR_VAR(s, mem, (mali_ptr) gpu_va); - - pandecode_log("struct mali_shared_memory shared_%"PRIx64"_%d = {\n", gpu_va, job_no); - pandecode_indent++; - pandecode_shared_memory(s, true); - pandecode_indent--; - pandecode_log("},\n"); + const struct mali_local_storage_packed *PANDECODE_PTR_VAR(s, mem, (mali_ptr) gpu_va); + DUMP_CL(LOCAL_STORAGE, s, "Local Storage:\n"); } /* Extracts the number of components associated with a Mali format */ @@ -872,11 +826,8 @@ pandecode_mfbd_bfr(uint64_t gpu_va, int job_no, bool is_fragment, bool is_comput pandecode_indent--; pandecode_log("},\n"); } else { - pandecode_log(".shared_memory = {\n"); - pandecode_indent++; - pandecode_shared_memory(&fb->shared_memory, is_compute); - pandecode_indent--; - pandecode_log("},\n"); + struct mali_local_storage_packed ls = fb->shared_memory; + DUMP_CL(LOCAL_STORAGE, &ls, "Local Storage:\n"); } info.width = fb->width1 + 1; diff --git a/src/panfrost/lib/midgard.xml b/src/panfrost/lib/midgard.xml index 6fb896d5b4d..e27f880a837 100644 --- a/src/panfrost/lib/midgard.xml +++ b/src/panfrost/lib/midgard.xml @@ -561,4 +561,15 @@ + + + + + + + + + + +