panfrost: XML-ify the local storage descriptor

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6797>
This commit is contained in:
Boris Brezillon 2020-09-03 09:18:09 +02:00 committed by Alyssa Rosenzweig
parent 706974ce17
commit 3a06fc3d34
7 changed files with 74 additions and 124 deletions

View File

@ -57,21 +57,25 @@ panfrost_vt_emit_shared_memory(struct panfrost_batch *batch)
{
struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
struct mali_shared_memory shared = {
.shared_workgroup_count = ~0,
};
struct panfrost_transfer t =
panfrost_pool_alloc_aligned(&batch->pool,
MALI_LOCAL_STORAGE_LENGTH,
64);
if (batch->stack_size) {
struct panfrost_bo *stack =
panfrost_batch_get_scratchpad(batch, batch->stack_size,
dev->thread_tls_alloc,
dev->core_count);
pan_pack(t.cpu, LOCAL_STORAGE, ls) {
ls.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
if (batch->stack_size) {
struct panfrost_bo *stack =
panfrost_batch_get_scratchpad(batch, batch->stack_size,
dev->thread_tls_alloc,
dev->core_count);
shared.stack_shift = panfrost_get_stack_shift(batch->stack_size);
shared.scratchpad = stack->gpu;
ls.tls_size = panfrost_get_stack_shift(batch->stack_size);
ls.tls_base_pointer = stack->gpu;
}
}
return panfrost_pool_upload_aligned(&batch->pool, &shared, sizeof(shared), 64);
return t.gpu;
}
void
@ -950,15 +954,18 @@ panfrost_emit_shared_memory(struct panfrost_batch *batch,
struct panfrost_bo *bo = panfrost_batch_get_shared_memory(batch,
shared_size,
1);
struct panfrost_transfer t =
panfrost_pool_alloc_aligned(&batch->pool,
MALI_LOCAL_STORAGE_LENGTH,
64);
struct mali_shared_memory shared = {
.shared_memory = bo->gpu,
.shared_workgroup_count = log2_instances,
.shared_shift = util_logbase2(single_size) + 1
pan_pack(t.cpu, LOCAL_STORAGE, ls) {
ls.wls_base_pointer = bo->gpu;
ls.wls_instances = log2_instances;
ls.wls_size_scale = util_logbase2(single_size) + 1;
};
return panfrost_pool_upload_aligned(&batch->pool, &shared,
sizeof(shared), 64);
return t.gpu;
}
static mali_ptr

View File

@ -498,18 +498,24 @@ panfrost_emit_mfbd(struct panfrost_batch *batch, unsigned vertex_count)
mfbd.msaa.sample_locations = panfrost_emit_sample_locations(batch);
mfbd.tiler_meta = panfrost_batch_get_tiler_meta(batch, vertex_count);
} else {
if (batch->stack_size) {
unsigned shift = panfrost_get_stack_shift(batch->stack_size);
struct panfrost_bo *bo = panfrost_batch_get_scratchpad(batch,
batch->stack_size,
dev->thread_tls_alloc,
dev->core_count);
mfbd.shared_memory.stack_shift = shift;
mfbd.shared_memory.scratchpad = bo->gpu;
struct mali_local_storage_packed lsp;
pan_pack(&lsp, LOCAL_STORAGE, ls) {
if (batch->stack_size) {
unsigned shift =
panfrost_get_stack_shift(batch->stack_size);
struct panfrost_bo *bo =
panfrost_batch_get_scratchpad(batch,
batch->stack_size,
dev->thread_tls_alloc,
dev->core_count);
ls.tls_size = shift;
ls.tls_base_pointer = bo->gpu;
}
ls.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
}
mfbd.shared_memory.shared_workgroup_count = ~0;
mfbd.shared_memory = lsp;
mfbd.tiler = panfrost_emit_midg_tiler(batch, vertex_count);
}

View File

@ -207,11 +207,6 @@ panfrost_emit_sfbd(struct panfrost_batch *batch, unsigned vertex_count)
struct mali_single_framebuffer framebuffer = {
.width = MALI_POSITIVE(width),
.height = MALI_POSITIVE(height),
.shared_memory = {
.stack_shift = shift,
.shared_workgroup_count = ~0,
.scratchpad = panfrost_batch_get_scratchpad(batch, shift, dev->thread_tls_alloc, dev->core_count)->gpu,
},
.format = {
.unk3 = 0x3,
},
@ -219,6 +214,18 @@ panfrost_emit_sfbd(struct panfrost_batch *batch, unsigned vertex_count)
.tiler = panfrost_emit_midg_tiler(batch, vertex_count),
};
struct mali_local_storage_packed lsp;
pan_pack(&lsp, LOCAL_STORAGE, ls) {
ls.tls_size = shift;
ls.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
ls.tls_base_pointer =
panfrost_batch_get_scratchpad(batch,
shift,
dev->thread_tls_alloc,
dev->core_count)->gpu;
}
framebuffer.shared_memory = lsp;
return framebuffer;
}

View File

@ -169,11 +169,10 @@ bit_vertex(struct panfrost_device *dev, panfrost_program prog,
memcpy(attr->cpu + 1024, iattr, sz_attr);
struct panfrost_bo *shmem = bit_bo_create(dev, 4096);
struct mali_shared_memory shmemp = {
.shared_workgroup_count = 0x1f,
};
memcpy(shmem->cpu, &shmemp, sizeof(shmemp));
pan_pack(shmem->cpu, LOCAL_STORAGE, cfg) {
cfg.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
}
pan_pack(shader_desc->cpu, STATE, cfg) {
cfg.shader.shader = shader->gpu;

View File

@ -630,37 +630,6 @@ struct mali_sfbd_format {
unsigned unk3 : 4;
};
/* Shared structure at the start of framebuffer descriptors, or used bare for
* compute jobs, configuring stack and shared memory */
struct mali_shared_memory {
u32 stack_shift : 4;
u32 unk0 : 28;
/* Configuration for shared memory for compute shaders.
* shared_workgroup_count is logarithmic and may be computed for a
* compute shader using shared memory as:
*
* shared_workgroup_count = MAX2(ceil(log2(count_x)) + ... + ceil(log2(count_z), 10)
*
* For compute shaders that don't use shared memory, or non-compute
* shaders, this is set to ~0
*/
u32 shared_workgroup_count : 5;
u32 shared_unk1 : 3;
u32 shared_shift : 4;
u32 shared_zero : 20;
mali_ptr scratchpad;
/* For compute shaders, the RAM backing of workgroup-shared memory. For
* fragment shaders on Bifrost, apparently multisampling locations */
mali_ptr shared_memory;
mali_ptr unknown1;
} __attribute__((packed));
/* Configures multisampling on Bifrost fragment jobs */
struct bifrost_multisampling {
@ -671,7 +640,7 @@ struct bifrost_multisampling {
} __attribute__((packed));
struct mali_single_framebuffer {
struct mali_shared_memory shared_memory;
struct mali_local_storage_packed shared_memory;
struct mali_sfbd_format format;
u32 clear_flags;
@ -876,7 +845,7 @@ struct mali_framebuffer_extra {
struct mali_framebuffer {
union {
struct mali_shared_memory shared_memory;
struct mali_local_storage_packed shared_memory;
struct bifrost_multisampling msaa;
};

View File

@ -446,41 +446,6 @@ pandecode_sfbd_format(struct mali_sfbd_format format)
pandecode_log("},\n");
}
static void
pandecode_shared_memory(const struct mali_shared_memory *desc, bool is_compute)
{
pandecode_prop("stack_shift = 0x%x", desc->stack_shift);
if (desc->unk0)
pandecode_prop("unk0 = 0x%x", desc->unk0);
if (desc->shared_workgroup_count != 0x1F) {
pandecode_prop("shared_workgroup_count = %d", desc->shared_workgroup_count);
if (!is_compute)
pandecode_msg("XXX: wrong workgroup count for noncompute\n");
}
if (desc->shared_unk1 || desc->shared_shift) {
pandecode_prop("shared_unk1 = %X", desc->shared_unk1);
pandecode_prop("shared_shift = %X", desc->shared_shift);
if (!is_compute)
pandecode_msg("XXX: shared memory configured in noncompute shader");
}
if (desc->shared_zero) {
pandecode_msg("XXX: shared memory zero tripped\n");
pandecode_prop("shared_zero = 0x%" PRIx32, desc->shared_zero);
}
if (desc->shared_memory && !is_compute)
pandecode_msg("XXX: shared memory used in noncompute shader\n");
MEMORY_PROP(desc, scratchpad);
MEMORY_PROP(desc, shared_memory);
MEMORY_PROP(desc, unknown1);
}
static struct pandecode_fbd
pandecode_sfbd(uint64_t gpu_va, int job_no, bool is_fragment, unsigned gpu_id)
{
@ -494,13 +459,7 @@ pandecode_sfbd(uint64_t gpu_va, int job_no, bool is_fragment, unsigned gpu_id)
pandecode_log("struct mali_single_framebuffer framebuffer_%"PRIx64"_%d = {\n", gpu_va, job_no);
pandecode_indent++;
pandecode_log(".shared_memory = {\n");
pandecode_indent++;
pandecode_shared_memory(&s->shared_memory, false);
pandecode_indent--;
pandecode_log("},\n");
DUMP_CL(LOCAL_STORAGE, &s->shared_memory, "Local Storage:\n");
pandecode_sfbd_format(s->format);
info.width = s->width + 1;
@ -599,13 +558,8 @@ static void
pandecode_compute_fbd(uint64_t gpu_va, int job_no)
{
struct pandecode_mapped_memory *mem = pandecode_find_mapped_gpu_mem_containing(gpu_va);
const struct mali_shared_memory *PANDECODE_PTR_VAR(s, mem, (mali_ptr) gpu_va);
pandecode_log("struct mali_shared_memory shared_%"PRIx64"_%d = {\n", gpu_va, job_no);
pandecode_indent++;
pandecode_shared_memory(s, true);
pandecode_indent--;
pandecode_log("},\n");
const struct mali_local_storage_packed *PANDECODE_PTR_VAR(s, mem, (mali_ptr) gpu_va);
DUMP_CL(LOCAL_STORAGE, s, "Local Storage:\n");
}
/* Extracts the number of components associated with a Mali format */
@ -872,11 +826,8 @@ pandecode_mfbd_bfr(uint64_t gpu_va, int job_no, bool is_fragment, bool is_comput
pandecode_indent--;
pandecode_log("},\n");
} else {
pandecode_log(".shared_memory = {\n");
pandecode_indent++;
pandecode_shared_memory(&fb->shared_memory, is_compute);
pandecode_indent--;
pandecode_log("},\n");
struct mali_local_storage_packed ls = fb->shared_memory;
DUMP_CL(LOCAL_STORAGE, &ls, "Local Storage:\n");
}
info.width = fb->width1 + 1;

View File

@ -561,4 +561,15 @@
<field name="Scissor Maximum Y" size="16" start="7:16" type="uint"/>
</struct>
<struct name="Local Storage" size="8">
<field name="TLS Size" size="5" start="0:0" type="uint"/>
<field name="TLS Initial Stack Pointer Offset" size="27" start="0:5" type="uint"/>
<field name="WLS Instances" size="5" start="1:0" type="uint" modifier="log2" prefix="MALI_LOCAL_STORAGE">
<value name="No Workgroup Mem" value="0x80000000"/>
</field>
<field name="WLS Size Base" size="2" start="1:5" type="uint"/>
<field name="WLS Size Scale" size="5" start="1:8" type="uint"/>
<field name="TLS Base Pointer" size="64" start="2:0" type="address"/>
<field name="WLS Base Pointer" size="64" start="4:0" type="address"/>
</struct>
</panxml>