panvk: Move the dynamic SSBO descriptors to their own UBO

We are about to put our sysvals in the push uniforms array, but before
we can do that, we need to store our dynamic storage buffers in a
dedicated UBO. We put this dynamic descriptor UBO after the dynamic
UBOs.

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Mary Guillemard <mary.guillemard@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28399>
This commit is contained in:
Boris Brezillon 2024-01-30 18:29:21 +01:00 committed by Marge Bot
parent cfe2254149
commit a603c66659
11 changed files with 134 additions and 88 deletions

View File

@ -101,6 +101,7 @@ struct panvk_descriptor_state {
mali_ptr ubos;
mali_ptr textures;
mali_ptr samplers;
mali_ptr dyn_desc_ubo;
mali_ptr push_uniforms;
mali_ptr vs_attribs;
mali_ptr vs_attrib_bufs;

View File

@ -70,8 +70,6 @@ struct panvk_pipeline {
/* shader stage bit is set of the stage accesses storage images */
uint32_t img_access_mask;
unsigned num_ubos;
struct {
unsigned ubo_idx;
} sysvals[MESA_SHADER_STAGES];

View File

@ -46,6 +46,7 @@ struct panvk_pipeline_layout {
unsigned dyn_ubo_offset;
unsigned dyn_ssbo_offset;
unsigned img_offset;
unsigned dyn_desc_ubo_offset;
} sets[MAX_SETS];
};
@ -59,4 +60,12 @@ unsigned panvk_per_arch(pipeline_layout_ubo_index)(
const struct panvk_pipeline_layout *layout, unsigned set, unsigned binding,
unsigned array_index);
unsigned
panvk_per_arch(pipeline_layout_dyn_desc_ubo_index)(
const struct panvk_pipeline_layout *layout);
unsigned
panvk_per_arch(pipeline_layout_total_ubo_count)(
const struct panvk_pipeline_layout *layout);
#endif

View File

@ -51,10 +51,6 @@ struct panvk_sysvals {
union panvk_sysval_vec4 local_group_size;
};
};
/* The back-end compiler doesn't know about any sysvals after this point */
struct panvk_ssbo_addr dyn_ssbos[MAX_DYNAMIC_STORAGE_BUFFERS];
};
struct panvk_shader {

View File

@ -413,20 +413,51 @@ panvk_cmd_unprepare_push_sets(struct panvk_cmd_buffer *cmdbuf,
}
}
static void
panvk_cmd_prepare_dyn_ssbos(struct panvk_cmd_buffer *cmdbuf,
struct panvk_cmd_bind_point_state *bind_point_state)
{
struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
if (!pipeline->layout->num_dyn_ssbos || desc_state->dyn_desc_ubo)
return;
struct panfrost_ptr ssbo_descs = pan_pool_alloc_aligned(
&cmdbuf->desc_pool.base,
pipeline->layout->num_dyn_ssbos * sizeof(struct panvk_ssbo_addr), 16);
struct panvk_ssbo_addr *ssbos = ssbo_descs.cpu;
for (uint32_t i = 0; i < pipeline->layout->num_dyn_ssbos; i++) {
const struct panvk_buffer_desc *bdesc = &desc_state->dyn.ssbos[i];
ssbos[i] = (struct panvk_ssbo_addr){
.base_addr = panvk_buffer_gpu_ptr(bdesc->buffer, bdesc->offset),
.size = panvk_buffer_range(bdesc->buffer, bdesc->offset, bdesc->size),
};
}
desc_state->dyn_desc_ubo = ssbo_descs.gpu;
}
static void
panvk_cmd_prepare_ubos(struct panvk_cmd_buffer *cmdbuf,
struct panvk_cmd_bind_point_state *bind_point_state)
{
struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
unsigned ubo_count =
panvk_per_arch(pipeline_layout_total_ubo_count)(pipeline->layout);
if (!pipeline->num_ubos || desc_state->ubos)
if (!ubo_count || desc_state->ubos)
return;
panvk_cmd_prepare_sysvals(cmdbuf, bind_point_state);
panvk_cmd_prepare_dyn_ssbos(cmdbuf, bind_point_state);
struct panfrost_ptr ubos = pan_pool_alloc_desc_array(
&cmdbuf->desc_pool.base, pipeline->num_ubos, UNIFORM_BUFFER);
&cmdbuf->desc_pool.base, ubo_count, UNIFORM_BUFFER);
struct mali_uniform_buffer_packed *ubo_descs = ubos.cpu;
pan_pack(&ubo_descs[PANVK_SYSVAL_UBO_INDEX], UNIFORM_BUFFER, cfg) {
@ -474,6 +505,17 @@ panvk_cmd_prepare_ubos(struct panvk_cmd_buffer *cmdbuf,
}
}
if (pipeline->layout->num_dyn_ssbos) {
unsigned dyn_desc_ubo =
panvk_per_arch(pipeline_layout_dyn_desc_ubo_index)(pipeline->layout);
pan_pack(&ubo_descs[dyn_desc_ubo], UNIFORM_BUFFER, cfg) {
cfg.pointer = desc_state->dyn_desc_ubo;
cfg.entries =
pipeline->layout->num_dyn_ssbos * sizeof(struct panvk_ssbo_addr);
}
}
desc_state->ubos = ubos.gpu;
}
@ -2015,26 +2057,6 @@ panvk_per_arch(CmdBindIndexBuffer)(VkCommandBuffer commandBuffer,
}
}
static void
panvk_set_dyn_ssbo_pointers(struct panvk_descriptor_state *desc_state,
unsigned dyn_ssbo_offset,
struct panvk_descriptor_set *set)
{
struct panvk_sysvals *sysvals = &desc_state->sysvals;
for (unsigned i = 0; i < set->layout->num_dyn_ssbos; i++) {
const struct panvk_buffer_desc *ssbo =
&desc_state->dyn.ssbos[dyn_ssbo_offset + i];
sysvals->dyn_ssbos[dyn_ssbo_offset + i] = (struct panvk_ssbo_addr){
.base_addr = panvk_buffer_gpu_ptr(ssbo->buffer, ssbo->offset),
.size = panvk_buffer_range(ssbo->buffer, ssbo->offset, ssbo->size),
};
}
desc_state->sysvals_ptr = 0;
}
VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdBindDescriptorSets)(
VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint,
@ -2081,32 +2103,22 @@ panvk_per_arch(CmdBindDescriptorSets)(
}
}
}
if (set->layout->num_dyn_ssbos) {
panvk_set_dyn_ssbo_pointers(descriptors_state,
playout->sets[idx].dyn_ssbo_offset, set);
}
if (set->layout->num_dyn_ssbos)
descriptors_state->dirty |= PANVK_DYNAMIC_SSBO;
if (set->layout->num_ubos || set->layout->num_dyn_ubos ||
set->layout->num_dyn_ssbos || set->layout->desc_ubo_size)
descriptors_state->ubos = 0;
if (set->layout->num_textures)
descriptors_state->textures = 0;
if (set->layout->num_samplers)
descriptors_state->samplers = 0;
if (set->layout->num_imgs) {
descriptors_state->vs_attrib_bufs =
descriptors_state->non_vs_attrib_bufs = 0;
descriptors_state->vs_attribs = descriptors_state->non_vs_attribs = 0;
}
}
/* Unconditionally reset all previously emitted descriptors tables.
* TODO: we could be smarter by checking which part of the pipeline layout
* are compatible with the previouly bound descriptor sets.
*/
descriptors_state->sysvals_ptr = 0;
descriptors_state->ubos = 0;
descriptors_state->textures = 0;
descriptors_state->samplers = 0;
descriptors_state->dyn_desc_ubo = 0;
descriptors_state->vs_attrib_bufs = 0;
descriptors_state->non_vs_attrib_bufs = 0;
descriptors_state->vs_attribs = 0;
descriptors_state->non_vs_attribs = 0;
assert(dynoffset_idx == dynamicOffsetCount);
}

View File

@ -334,6 +334,11 @@ panvk_desc_ubo_data(struct panvk_descriptor_set *set, uint32_t binding,
const struct panvk_descriptor_set_binding_layout *binding_layout =
&set->layout->bindings[binding];
/* Dynamic SSBO info are stored in a separate UBO allocated from the
* cmd_buffer descriptor pool.
*/
assert(binding_layout->type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC);
return (char *)set->desc_ubo.addr.host + binding_layout->desc_ubo_offset +
elem * binding_layout->desc_ubo_stride;
}
@ -754,8 +759,17 @@ panvk_per_arch(UpdateDescriptorSets)(
assert(dst_binding_layout->type == src_binding_layout->type);
if (dst_binding_layout->desc_ubo_stride > 0 &&
src_binding_layout->desc_ubo_stride > 0) {
/* Dynamic SSBO info are stored in a separate UBO allocated from the
* cmd_buffer descriptor pool.
*/
bool src_has_data_in_desc_ubo =
src_binding_layout->desc_ubo_stride > 0 &&
src_binding_layout->type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC;
bool dst_has_data_in_desc_ubo =
dst_binding_layout->desc_ubo_stride > 0 &&
dst_binding_layout->type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC;
if (src_has_data_in_desc_ubo && dst_has_data_in_desc_ubo) {
for (uint32_t j = 0; j < copy->descriptorCount; j++) {
memcpy(panvk_desc_ubo_data(dst_set, copy->dstBinding,
copy->dstArrayElement + j),

View File

@ -138,7 +138,7 @@ panvk_per_arch(CreateDescriptorSetLayout)(
unsigned sampler_idx = 0, tex_idx = 0, ubo_idx = 0;
unsigned dyn_ubo_idx = 0, dyn_ssbo_idx = 0, img_idx = 0;
uint32_t desc_ubo_size = 0;
uint32_t desc_ubo_size = 0, dyn_desc_ubo_size = 0;
for (unsigned i = 0; i < pCreateInfo->bindingCount; i++) {
const VkDescriptorSetLayoutBinding *binding = &bindings[i];
@ -193,6 +193,7 @@ panvk_per_arch(CreateDescriptorSetLayout)(
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
binding_layout->dyn_ssbo_idx = dyn_ssbo_idx;
dyn_ssbo_idx += binding_layout->array_size;
binding_layout->desc_ubo_stride = sizeof(struct panvk_ssbo_addr);
break;
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
binding_layout->desc_ubo_stride = sizeof(struct panvk_ssbo_addr);
@ -211,10 +212,17 @@ panvk_per_arch(CreateDescriptorSetLayout)(
unreachable("Invalid descriptor type");
}
desc_ubo_size = ALIGN_POT(desc_ubo_size, PANVK_DESCRIPTOR_ALIGN);
binding_layout->desc_ubo_offset = desc_ubo_size;
desc_ubo_size +=
binding_layout->desc_ubo_stride * binding_layout->array_size;
if (binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
binding_layout->desc_ubo_offset = dyn_desc_ubo_size;
dyn_desc_ubo_size +=
binding_layout->desc_ubo_stride * binding_layout->array_size;
} else {
desc_ubo_size = ALIGN_POT(desc_ubo_size, PANVK_DESCRIPTOR_ALIGN);
binding_layout->desc_ubo_offset = desc_ubo_size;
desc_ubo_size +=
binding_layout->desc_ubo_stride * binding_layout->array_size;
}
}
set_layout->desc_ubo_size = desc_ubo_size;

View File

@ -119,37 +119,28 @@ build_res_index(nir_builder *b, uint32_t set, uint32_t binding,
return nir_vec2(b, nir_imm_int(b, packed), array_index);
}
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: {
assert(addr_format == nir_address_format_64bit_bounded_global ||
addr_format == nir_address_format_64bit_global_32bit_offset);
const unsigned set_ubo_idx =
panvk_per_arch(pipeline_layout_ubo_start)(ctx->layout, set, false) +
set_layout->desc_ubo_index;
const uint32_t packed =
(bind_layout->desc_ubo_stride << 16) | set_ubo_idx;
return nir_vec4(b, nir_imm_int(b, packed),
nir_imm_int(b, bind_layout->desc_ubo_offset),
nir_imm_int(b, array_size - 1), array_index);
}
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
assert(addr_format == nir_address_format_64bit_bounded_global ||
addr_format == nir_address_format_64bit_global_32bit_offset);
const unsigned dyn_ssbo_idx =
ctx->layout->sets[set].dyn_ssbo_offset + bind_layout->dyn_ssbo_idx;
const bool is_dynamic =
bind_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC;
const unsigned desc_ubo_idx =
is_dynamic
? panvk_per_arch(pipeline_layout_dyn_desc_ubo_index)(ctx->layout)
: panvk_per_arch(pipeline_layout_ubo_start)(ctx->layout, set,
false) +
set_layout->desc_ubo_index;
const unsigned desc_ubo_offset =
bind_layout->desc_ubo_offset +
(is_dynamic ? ctx->layout->sets[set].dyn_desc_ubo_offset : 0);
const unsigned ubo_idx = PANVK_SYSVAL_UBO_INDEX;
const unsigned desc_stride = sizeof(struct panvk_ssbo_addr);
const uint32_t ubo_offset =
offsetof(struct panvk_sysvals, dyn_ssbos) + dyn_ssbo_idx * desc_stride;
const uint32_t packed =
(bind_layout->desc_ubo_stride << 16) | desc_ubo_idx;
const uint32_t packed = (desc_stride << 16) | ubo_idx;
return nir_vec4(b, nir_imm_int(b, packed), nir_imm_int(b, ubo_offset),
return nir_vec4(b, nir_imm_int(b, packed),
nir_imm_int(b, desc_ubo_offset),
nir_imm_int(b, array_size - 1), array_index);
}

View File

@ -496,9 +496,6 @@ panvk_pipeline_builder_init_shaders(struct panvk_pipeline_builder *builder,
&pipeline->blend.bd_template[rt]);
}
}
pipeline->num_ubos = PANVK_NUM_BUILTIN_UBOS + builder->layout->num_ubos +
builder->layout->num_dyn_ubos;
}
static void

View File

@ -9,6 +9,7 @@
#include "vk_log.h"
#include "panvk_device.h"
#include "panvk_descriptor_set.h"
#include "panvk_entrypoints.h"
#include "panvk_macros.h"
#include "panvk_pipeline_layout.h"
@ -40,6 +41,7 @@ panvk_per_arch(CreatePipelineLayout)(
unsigned sampler_idx = 0, tex_idx = 0, ubo_idx = 0;
unsigned dyn_ubo_idx = 0, dyn_ssbo_idx = 0, img_idx = 0;
unsigned dyn_desc_ubo_offset = 0;
for (unsigned set = 0; set < pCreateInfo->setLayoutCount; set++) {
const struct panvk_descriptor_set_layout *set_layout =
vk_to_panvk_descriptor_set_layout(layout->vk.set_layouts[set]);
@ -50,12 +52,15 @@ panvk_per_arch(CreatePipelineLayout)(
layout->sets[set].dyn_ubo_offset = dyn_ubo_idx;
layout->sets[set].dyn_ssbo_offset = dyn_ssbo_idx;
layout->sets[set].img_offset = img_idx;
layout->sets[set].dyn_desc_ubo_offset = dyn_desc_ubo_offset;
sampler_idx += set_layout->num_samplers;
tex_idx += set_layout->num_textures;
ubo_idx += set_layout->num_ubos;
dyn_ubo_idx += set_layout->num_dyn_ubos;
dyn_ssbo_idx += set_layout->num_dyn_ssbos;
img_idx += set_layout->num_imgs;
dyn_desc_ubo_offset +=
set_layout->num_dyn_ssbos * sizeof(struct panvk_ssbo_addr);
for (unsigned b = 0; b < set_layout->binding_count; b++) {
const struct panvk_descriptor_set_binding_layout *binding_layout =
@ -140,3 +145,18 @@ panvk_per_arch(pipeline_layout_ubo_index)(
return panvk_per_arch(pipeline_layout_ubo_start)(layout, set, is_dynamic) +
ubo_idx + array_index;
}
unsigned
panvk_per_arch(pipeline_layout_dyn_desc_ubo_index)(
const struct panvk_pipeline_layout *layout)
{
return PANVK_NUM_BUILTIN_UBOS + layout->num_ubos + layout->num_dyn_ubos;
}
unsigned
panvk_per_arch(pipeline_layout_total_ubo_count)(
const struct panvk_pipeline_layout *layout)
{
return PANVK_NUM_BUILTIN_UBOS + layout->num_ubos + layout->num_dyn_ubos +
(layout->num_dyn_ssbos ? 1 : 0);
}

View File

@ -377,7 +377,7 @@ panvk_per_arch(shader_create)(struct panvk_device *dev, gl_shader_stage stage,
/* Patch the descriptor count */
shader->info.ubo_count =
PANVK_NUM_BUILTIN_UBOS + layout->num_ubos + layout->num_dyn_ubos;
panvk_per_arch(pipeline_layout_total_ubo_count)(layout);
shader->info.sampler_count = layout->num_samplers;
shader->info.texture_count = layout->num_textures;
if (shader->has_img_access)