panfrost: XML-ify the local storage descriptor
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6797>
This commit is contained in:
parent
706974ce17
commit
3a06fc3d34
|
@ -57,21 +57,25 @@ panfrost_vt_emit_shared_memory(struct panfrost_batch *batch)
|
|||
{
|
||||
struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
|
||||
|
||||
struct mali_shared_memory shared = {
|
||||
.shared_workgroup_count = ~0,
|
||||
};
|
||||
struct panfrost_transfer t =
|
||||
panfrost_pool_alloc_aligned(&batch->pool,
|
||||
MALI_LOCAL_STORAGE_LENGTH,
|
||||
64);
|
||||
|
||||
if (batch->stack_size) {
|
||||
struct panfrost_bo *stack =
|
||||
panfrost_batch_get_scratchpad(batch, batch->stack_size,
|
||||
dev->thread_tls_alloc,
|
||||
dev->core_count);
|
||||
pan_pack(t.cpu, LOCAL_STORAGE, ls) {
|
||||
ls.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
|
||||
if (batch->stack_size) {
|
||||
struct panfrost_bo *stack =
|
||||
panfrost_batch_get_scratchpad(batch, batch->stack_size,
|
||||
dev->thread_tls_alloc,
|
||||
dev->core_count);
|
||||
|
||||
shared.stack_shift = panfrost_get_stack_shift(batch->stack_size);
|
||||
shared.scratchpad = stack->gpu;
|
||||
ls.tls_size = panfrost_get_stack_shift(batch->stack_size);
|
||||
ls.tls_base_pointer = stack->gpu;
|
||||
}
|
||||
}
|
||||
|
||||
return panfrost_pool_upload_aligned(&batch->pool, &shared, sizeof(shared), 64);
|
||||
return t.gpu;
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -950,15 +954,18 @@ panfrost_emit_shared_memory(struct panfrost_batch *batch,
|
|||
struct panfrost_bo *bo = panfrost_batch_get_shared_memory(batch,
|
||||
shared_size,
|
||||
1);
|
||||
struct panfrost_transfer t =
|
||||
panfrost_pool_alloc_aligned(&batch->pool,
|
||||
MALI_LOCAL_STORAGE_LENGTH,
|
||||
64);
|
||||
|
||||
struct mali_shared_memory shared = {
|
||||
.shared_memory = bo->gpu,
|
||||
.shared_workgroup_count = log2_instances,
|
||||
.shared_shift = util_logbase2(single_size) + 1
|
||||
pan_pack(t.cpu, LOCAL_STORAGE, ls) {
|
||||
ls.wls_base_pointer = bo->gpu;
|
||||
ls.wls_instances = log2_instances;
|
||||
ls.wls_size_scale = util_logbase2(single_size) + 1;
|
||||
};
|
||||
|
||||
return panfrost_pool_upload_aligned(&batch->pool, &shared,
|
||||
sizeof(shared), 64);
|
||||
return t.gpu;
|
||||
}
|
||||
|
||||
static mali_ptr
|
||||
|
|
|
@ -498,18 +498,24 @@ panfrost_emit_mfbd(struct panfrost_batch *batch, unsigned vertex_count)
|
|||
mfbd.msaa.sample_locations = panfrost_emit_sample_locations(batch);
|
||||
mfbd.tiler_meta = panfrost_batch_get_tiler_meta(batch, vertex_count);
|
||||
} else {
|
||||
if (batch->stack_size) {
|
||||
unsigned shift = panfrost_get_stack_shift(batch->stack_size);
|
||||
struct panfrost_bo *bo = panfrost_batch_get_scratchpad(batch,
|
||||
batch->stack_size,
|
||||
dev->thread_tls_alloc,
|
||||
dev->core_count);
|
||||
mfbd.shared_memory.stack_shift = shift;
|
||||
mfbd.shared_memory.scratchpad = bo->gpu;
|
||||
struct mali_local_storage_packed lsp;
|
||||
|
||||
pan_pack(&lsp, LOCAL_STORAGE, ls) {
|
||||
if (batch->stack_size) {
|
||||
unsigned shift =
|
||||
panfrost_get_stack_shift(batch->stack_size);
|
||||
struct panfrost_bo *bo =
|
||||
panfrost_batch_get_scratchpad(batch,
|
||||
batch->stack_size,
|
||||
dev->thread_tls_alloc,
|
||||
dev->core_count);
|
||||
ls.tls_size = shift;
|
||||
ls.tls_base_pointer = bo->gpu;
|
||||
}
|
||||
|
||||
ls.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
|
||||
}
|
||||
|
||||
mfbd.shared_memory.shared_workgroup_count = ~0;
|
||||
|
||||
mfbd.shared_memory = lsp;
|
||||
mfbd.tiler = panfrost_emit_midg_tiler(batch, vertex_count);
|
||||
}
|
||||
|
||||
|
|
|
@ -207,11 +207,6 @@ panfrost_emit_sfbd(struct panfrost_batch *batch, unsigned vertex_count)
|
|||
struct mali_single_framebuffer framebuffer = {
|
||||
.width = MALI_POSITIVE(width),
|
||||
.height = MALI_POSITIVE(height),
|
||||
.shared_memory = {
|
||||
.stack_shift = shift,
|
||||
.shared_workgroup_count = ~0,
|
||||
.scratchpad = panfrost_batch_get_scratchpad(batch, shift, dev->thread_tls_alloc, dev->core_count)->gpu,
|
||||
},
|
||||
.format = {
|
||||
.unk3 = 0x3,
|
||||
},
|
||||
|
@ -219,6 +214,18 @@ panfrost_emit_sfbd(struct panfrost_batch *batch, unsigned vertex_count)
|
|||
.tiler = panfrost_emit_midg_tiler(batch, vertex_count),
|
||||
};
|
||||
|
||||
struct mali_local_storage_packed lsp;
|
||||
pan_pack(&lsp, LOCAL_STORAGE, ls) {
|
||||
ls.tls_size = shift;
|
||||
ls.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
|
||||
ls.tls_base_pointer =
|
||||
panfrost_batch_get_scratchpad(batch,
|
||||
shift,
|
||||
dev->thread_tls_alloc,
|
||||
dev->core_count)->gpu;
|
||||
}
|
||||
framebuffer.shared_memory = lsp;
|
||||
|
||||
return framebuffer;
|
||||
}
|
||||
|
||||
|
|
|
@ -169,11 +169,10 @@ bit_vertex(struct panfrost_device *dev, panfrost_program prog,
|
|||
memcpy(attr->cpu + 1024, iattr, sz_attr);
|
||||
|
||||
struct panfrost_bo *shmem = bit_bo_create(dev, 4096);
|
||||
struct mali_shared_memory shmemp = {
|
||||
.shared_workgroup_count = 0x1f,
|
||||
};
|
||||
|
||||
memcpy(shmem->cpu, &shmemp, sizeof(shmemp));
|
||||
pan_pack(shmem->cpu, LOCAL_STORAGE, cfg) {
|
||||
cfg.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
|
||||
}
|
||||
|
||||
pan_pack(shader_desc->cpu, STATE, cfg) {
|
||||
cfg.shader.shader = shader->gpu;
|
||||
|
|
|
@ -630,37 +630,6 @@ struct mali_sfbd_format {
|
|||
unsigned unk3 : 4;
|
||||
};
|
||||
|
||||
/* Shared structure at the start of framebuffer descriptors, or used bare for
|
||||
* compute jobs, configuring stack and shared memory */
|
||||
|
||||
struct mali_shared_memory {
|
||||
u32 stack_shift : 4;
|
||||
u32 unk0 : 28;
|
||||
|
||||
/* Configuration for shared memory for compute shaders.
|
||||
* shared_workgroup_count is logarithmic and may be computed for a
|
||||
* compute shader using shared memory as:
|
||||
*
|
||||
* shared_workgroup_count = MAX2(ceil(log2(count_x)) + ... + ceil(log2(count_z), 10)
|
||||
*
|
||||
* For compute shaders that don't use shared memory, or non-compute
|
||||
* shaders, this is set to ~0
|
||||
*/
|
||||
|
||||
u32 shared_workgroup_count : 5;
|
||||
u32 shared_unk1 : 3;
|
||||
u32 shared_shift : 4;
|
||||
u32 shared_zero : 20;
|
||||
|
||||
mali_ptr scratchpad;
|
||||
|
||||
/* For compute shaders, the RAM backing of workgroup-shared memory. For
|
||||
* fragment shaders on Bifrost, apparently multisampling locations */
|
||||
|
||||
mali_ptr shared_memory;
|
||||
mali_ptr unknown1;
|
||||
} __attribute__((packed));
|
||||
|
||||
/* Configures multisampling on Bifrost fragment jobs */
|
||||
|
||||
struct bifrost_multisampling {
|
||||
|
@ -671,7 +640,7 @@ struct bifrost_multisampling {
|
|||
} __attribute__((packed));
|
||||
|
||||
struct mali_single_framebuffer {
|
||||
struct mali_shared_memory shared_memory;
|
||||
struct mali_local_storage_packed shared_memory;
|
||||
struct mali_sfbd_format format;
|
||||
|
||||
u32 clear_flags;
|
||||
|
@ -876,7 +845,7 @@ struct mali_framebuffer_extra {
|
|||
|
||||
struct mali_framebuffer {
|
||||
union {
|
||||
struct mali_shared_memory shared_memory;
|
||||
struct mali_local_storage_packed shared_memory;
|
||||
struct bifrost_multisampling msaa;
|
||||
};
|
||||
|
||||
|
|
|
@ -446,41 +446,6 @@ pandecode_sfbd_format(struct mali_sfbd_format format)
|
|||
pandecode_log("},\n");
|
||||
}
|
||||
|
||||
static void
|
||||
pandecode_shared_memory(const struct mali_shared_memory *desc, bool is_compute)
|
||||
{
|
||||
pandecode_prop("stack_shift = 0x%x", desc->stack_shift);
|
||||
|
||||
if (desc->unk0)
|
||||
pandecode_prop("unk0 = 0x%x", desc->unk0);
|
||||
|
||||
if (desc->shared_workgroup_count != 0x1F) {
|
||||
pandecode_prop("shared_workgroup_count = %d", desc->shared_workgroup_count);
|
||||
if (!is_compute)
|
||||
pandecode_msg("XXX: wrong workgroup count for noncompute\n");
|
||||
}
|
||||
|
||||
if (desc->shared_unk1 || desc->shared_shift) {
|
||||
pandecode_prop("shared_unk1 = %X", desc->shared_unk1);
|
||||
pandecode_prop("shared_shift = %X", desc->shared_shift);
|
||||
|
||||
if (!is_compute)
|
||||
pandecode_msg("XXX: shared memory configured in noncompute shader");
|
||||
}
|
||||
|
||||
if (desc->shared_zero) {
|
||||
pandecode_msg("XXX: shared memory zero tripped\n");
|
||||
pandecode_prop("shared_zero = 0x%" PRIx32, desc->shared_zero);
|
||||
}
|
||||
|
||||
if (desc->shared_memory && !is_compute)
|
||||
pandecode_msg("XXX: shared memory used in noncompute shader\n");
|
||||
|
||||
MEMORY_PROP(desc, scratchpad);
|
||||
MEMORY_PROP(desc, shared_memory);
|
||||
MEMORY_PROP(desc, unknown1);
|
||||
}
|
||||
|
||||
static struct pandecode_fbd
|
||||
pandecode_sfbd(uint64_t gpu_va, int job_no, bool is_fragment, unsigned gpu_id)
|
||||
{
|
||||
|
@ -494,13 +459,7 @@ pandecode_sfbd(uint64_t gpu_va, int job_no, bool is_fragment, unsigned gpu_id)
|
|||
|
||||
pandecode_log("struct mali_single_framebuffer framebuffer_%"PRIx64"_%d = {\n", gpu_va, job_no);
|
||||
pandecode_indent++;
|
||||
|
||||
pandecode_log(".shared_memory = {\n");
|
||||
pandecode_indent++;
|
||||
pandecode_shared_memory(&s->shared_memory, false);
|
||||
pandecode_indent--;
|
||||
pandecode_log("},\n");
|
||||
|
||||
DUMP_CL(LOCAL_STORAGE, &s->shared_memory, "Local Storage:\n");
|
||||
pandecode_sfbd_format(s->format);
|
||||
|
||||
info.width = s->width + 1;
|
||||
|
@ -599,13 +558,8 @@ static void
|
|||
pandecode_compute_fbd(uint64_t gpu_va, int job_no)
|
||||
{
|
||||
struct pandecode_mapped_memory *mem = pandecode_find_mapped_gpu_mem_containing(gpu_va);
|
||||
const struct mali_shared_memory *PANDECODE_PTR_VAR(s, mem, (mali_ptr) gpu_va);
|
||||
|
||||
pandecode_log("struct mali_shared_memory shared_%"PRIx64"_%d = {\n", gpu_va, job_no);
|
||||
pandecode_indent++;
|
||||
pandecode_shared_memory(s, true);
|
||||
pandecode_indent--;
|
||||
pandecode_log("},\n");
|
||||
const struct mali_local_storage_packed *PANDECODE_PTR_VAR(s, mem, (mali_ptr) gpu_va);
|
||||
DUMP_CL(LOCAL_STORAGE, s, "Local Storage:\n");
|
||||
}
|
||||
|
||||
/* Extracts the number of components associated with a Mali format */
|
||||
|
@ -872,11 +826,8 @@ pandecode_mfbd_bfr(uint64_t gpu_va, int job_no, bool is_fragment, bool is_comput
|
|||
pandecode_indent--;
|
||||
pandecode_log("},\n");
|
||||
} else {
|
||||
pandecode_log(".shared_memory = {\n");
|
||||
pandecode_indent++;
|
||||
pandecode_shared_memory(&fb->shared_memory, is_compute);
|
||||
pandecode_indent--;
|
||||
pandecode_log("},\n");
|
||||
struct mali_local_storage_packed ls = fb->shared_memory;
|
||||
DUMP_CL(LOCAL_STORAGE, &ls, "Local Storage:\n");
|
||||
}
|
||||
|
||||
info.width = fb->width1 + 1;
|
||||
|
|
|
@ -561,4 +561,15 @@
|
|||
<field name="Scissor Maximum Y" size="16" start="7:16" type="uint"/>
|
||||
</struct>
|
||||
|
||||
<struct name="Local Storage" size="8">
|
||||
<field name="TLS Size" size="5" start="0:0" type="uint"/>
|
||||
<field name="TLS Initial Stack Pointer Offset" size="27" start="0:5" type="uint"/>
|
||||
<field name="WLS Instances" size="5" start="1:0" type="uint" modifier="log2" prefix="MALI_LOCAL_STORAGE">
|
||||
<value name="No Workgroup Mem" value="0x80000000"/>
|
||||
</field>
|
||||
<field name="WLS Size Base" size="2" start="1:5" type="uint"/>
|
||||
<field name="WLS Size Scale" size="5" start="1:8" type="uint"/>
|
||||
<field name="TLS Base Pointer" size="64" start="2:0" type="address"/>
|
||||
<field name="WLS Base Pointer" size="64" start="4:0" type="address"/>
|
||||
</struct>
|
||||
</panxml>
|
||||
|
|
Loading…
Reference in New Issue