panvk: Lower sysvals to push uniforms

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Mary Guillemard <mary.guillemard@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28399>
This commit is contained in:
Boris Brezillon 2024-01-30 19:37:44 +01:00 committed by Marge Bot
parent 9c553bda9c
commit 6d4b376a9b
7 changed files with 35 additions and 79 deletions

View File

@ -97,7 +97,6 @@ struct panvk_descriptor_state {
struct mali_uniform_buffer_packed ubos[MAX_DYNAMIC_UNIFORM_BUFFERS]; struct mali_uniform_buffer_packed ubos[MAX_DYNAMIC_UNIFORM_BUFFERS];
struct panvk_ssbo_addr ssbos[MAX_DYNAMIC_STORAGE_BUFFERS]; struct panvk_ssbo_addr ssbos[MAX_DYNAMIC_STORAGE_BUFFERS];
} dyn; } dyn;
mali_ptr sysvals_ptr;
mali_ptr ubos; mali_ptr ubos;
mali_ptr textures; mali_ptr textures;
mali_ptr samplers; mali_ptr samplers;

View File

@ -70,10 +70,6 @@ struct panvk_pipeline {
/* shader stage bit is set of the stage accesses storage images */ /* shader stage bit is set of the stage accesses storage images */
uint32_t img_access_mask; uint32_t img_access_mask;
struct {
unsigned ubo_idx;
} sysvals[MESA_SHADER_STAGES];
unsigned tls_size; unsigned tls_size;
unsigned wls_size; unsigned wls_size;

View File

@ -20,9 +20,6 @@
#include "panvk_macros.h" #include "panvk_macros.h"
#include "panvk_pipeline_layout.h" #include "panvk_pipeline_layout.h"
#define PANVK_SYSVAL_UBO_INDEX 0
#define PANVK_NUM_BUILTIN_UBOS 1
struct nir_shader; struct nir_shader;
struct pan_blend_state; struct pan_blend_state;
struct panvk_device; struct panvk_device;
@ -56,7 +53,6 @@ struct panvk_sysvals {
struct panvk_shader { struct panvk_shader {
struct pan_shader_info info; struct pan_shader_info info;
struct util_dynarray binary; struct util_dynarray binary;
unsigned sysval_ubo;
struct pan_compute_dim local_size; struct pan_compute_dim local_size;
bool has_img_access; bool has_img_access;
}; };
@ -68,7 +64,7 @@ bool panvk_per_arch(blend_needs_lowering)(const struct panvk_device *dev,
struct panvk_shader *panvk_per_arch(shader_create)( struct panvk_shader *panvk_per_arch(shader_create)(
struct panvk_device *dev, gl_shader_stage stage, struct panvk_device *dev, gl_shader_stage stage,
const VkPipelineShaderStageCreateInfo *stage_info, const VkPipelineShaderStageCreateInfo *stage_info,
const struct panvk_pipeline_layout *layout, unsigned sysval_ubo, const struct panvk_pipeline_layout *layout,
struct pan_blend_state *blend_state, bool static_blend_constants, struct pan_blend_state *blend_state, bool static_blend_constants,
const VkAllocationCallbacks *alloc); const VkAllocationCallbacks *alloc);

View File

@ -307,13 +307,13 @@ panvk_cmd_prepare_draw_sysvals(
sysvals->first_vertex = draw->offset_start; sysvals->first_vertex = draw->offset_start;
sysvals->base_vertex = base_vertex; sysvals->base_vertex = base_vertex;
sysvals->base_instance = draw->first_instance; sysvals->base_instance = draw->first_instance;
bind_point_state->desc_state.sysvals_ptr = 0; bind_point_state->desc_state.push_uniforms = 0;
} }
if (cmdbuf->state.dirty & PANVK_DYNAMIC_BLEND_CONSTANTS) { if (cmdbuf->state.dirty & PANVK_DYNAMIC_BLEND_CONSTANTS) {
memcpy(&sysvals->blend_constants, cmdbuf->state.blend.constants, memcpy(&sysvals->blend_constants, cmdbuf->state.blend.constants,
sizeof(cmdbuf->state.blend.constants)); sizeof(cmdbuf->state.blend.constants));
bind_point_state->desc_state.sysvals_ptr = 0; bind_point_state->desc_state.push_uniforms = 0;
} }
if (cmdbuf->state.dirty & PANVK_DYNAMIC_VIEWPORT) { if (cmdbuf->state.dirty & PANVK_DYNAMIC_VIEWPORT) {
@ -321,42 +321,31 @@ panvk_cmd_prepare_draw_sysvals(
&sysvals->viewport_scale); &sysvals->viewport_scale);
panvk_sysval_upload_viewport_offset(&cmdbuf->state.viewport, panvk_sysval_upload_viewport_offset(&cmdbuf->state.viewport,
&sysvals->viewport_offset); &sysvals->viewport_offset);
bind_point_state->desc_state.sysvals_ptr = 0; bind_point_state->desc_state.push_uniforms = 0;
} }
} }
static void
panvk_cmd_prepare_sysvals(struct panvk_cmd_buffer *cmdbuf,
struct panvk_cmd_bind_point_state *bind_point_state)
{
struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
if (desc_state->sysvals_ptr)
return;
struct panfrost_ptr sysvals = pan_pool_alloc_aligned(
&cmdbuf->desc_pool.base, sizeof(desc_state->sysvals), 16);
memcpy(sysvals.cpu, &desc_state->sysvals, sizeof(desc_state->sysvals));
desc_state->sysvals_ptr = sysvals.gpu;
}
static void static void
panvk_cmd_prepare_push_uniforms( panvk_cmd_prepare_push_uniforms(
struct panvk_cmd_buffer *cmdbuf, struct panvk_cmd_buffer *cmdbuf,
struct panvk_cmd_bind_point_state *bind_point_state) struct panvk_cmd_bind_point_state *bind_point_state)
{ {
struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state; struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
if (!pipeline->layout->push_constants.size || desc_state->push_uniforms) if (desc_state->push_uniforms)
return; return;
struct panfrost_ptr push_uniforms = pan_pool_alloc_aligned( struct panfrost_ptr push_uniforms = pan_pool_alloc_aligned(
&cmdbuf->desc_pool.base, &cmdbuf->desc_pool.base, 512, 16);
ALIGN_POT(pipeline->layout->push_constants.size, 16), 16);
/* The first half is used for push constants. */
memcpy(push_uniforms.cpu, cmdbuf->push_constants, memcpy(push_uniforms.cpu, cmdbuf->push_constants,
pipeline->layout->push_constants.size); sizeof(cmdbuf->push_constants));
/* The second half is used for sysvals. */
memcpy((uint8_t *)push_uniforms.cpu + 256, &desc_state->sysvals,
sizeof(desc_state->sysvals));
desc_state->push_uniforms = push_uniforms.gpu; desc_state->push_uniforms = push_uniforms.gpu;
} }
@ -443,18 +432,12 @@ panvk_cmd_prepare_ubos(struct panvk_cmd_buffer *cmdbuf,
if (!ubo_count || desc_state->ubos) if (!ubo_count || desc_state->ubos)
return; return;
panvk_cmd_prepare_sysvals(cmdbuf, bind_point_state);
panvk_cmd_prepare_dyn_ssbos(cmdbuf, bind_point_state); panvk_cmd_prepare_dyn_ssbos(cmdbuf, bind_point_state);
struct panfrost_ptr ubos = pan_pool_alloc_desc_array( struct panfrost_ptr ubos = pan_pool_alloc_desc_array(
&cmdbuf->desc_pool.base, ubo_count, UNIFORM_BUFFER); &cmdbuf->desc_pool.base, ubo_count, UNIFORM_BUFFER);
struct mali_uniform_buffer_packed *ubo_descs = ubos.cpu; struct mali_uniform_buffer_packed *ubo_descs = ubos.cpu;
pan_pack(&ubo_descs[PANVK_SYSVAL_UBO_INDEX], UNIFORM_BUFFER, cfg) {
cfg.pointer = desc_state->sysvals_ptr;
cfg.entries = DIV_ROUND_UP(sizeof(desc_state->sysvals), 16);
}
for (unsigned s = 0; s < pipeline->layout->vk.set_count; s++) { for (unsigned s = 0; s < pipeline->layout->vk.set_count; s++) {
const struct panvk_descriptor_set_layout *set_layout = const struct panvk_descriptor_set_layout *set_layout =
vk_to_panvk_descriptor_set_layout(pipeline->layout->vk.set_layouts[s]); vk_to_panvk_descriptor_set_layout(pipeline->layout->vk.set_layouts[s]);
@ -1759,7 +1742,7 @@ panvk_per_arch(CmdDispatch)(VkCommandBuffer commandBuffer, uint32_t x,
sysvals->local_group_size.u32[0] = pipeline->cs.local_size.x; sysvals->local_group_size.u32[0] = pipeline->cs.local_size.x;
sysvals->local_group_size.u32[1] = pipeline->cs.local_size.y; sysvals->local_group_size.u32[1] = pipeline->cs.local_size.y;
sysvals->local_group_size.u32[2] = pipeline->cs.local_size.z; sysvals->local_group_size.u32[2] = pipeline->cs.local_size.z;
desc_state->sysvals_ptr = 0; desc_state->push_uniforms = 0;
panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false); panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
dispatch.tsd = batch->tls.gpu; dispatch.tsd = batch->tls.gpu;
@ -2132,7 +2115,6 @@ panvk_per_arch(CmdBindDescriptorSets)(
* TODO: we could be smarter by checking which part of the pipeline layout * TODO: we could be smarter by checking which part of the pipeline layout
* are compatible with the previouly bound descriptor sets. * are compatible with the previouly bound descriptor sets.
*/ */
descriptors_state->sysvals_ptr = 0;
descriptors_state->ubos = 0; descriptors_state->ubos = 0;
descriptors_state->textures = 0; descriptors_state->textures = 0;
descriptors_state->samplers = 0; descriptors_state->samplers = 0;

View File

@ -142,7 +142,7 @@ panvk_pipeline_builder_compile_shaders(struct panvk_pipeline_builder *builder,
shader = panvk_per_arch(shader_create)( shader = panvk_per_arch(shader_create)(
builder->device, stage, stage_info, builder->layout, builder->device, stage, stage_info, builder->layout,
PANVK_SYSVAL_UBO_INDEX, &pipeline->blend.state, &pipeline->blend.state,
panvk_pipeline_static_state(pipeline, panvk_pipeline_static_state(pipeline,
VK_DYNAMIC_STATE_BLEND_CONSTANTS), VK_DYNAMIC_STATE_BLEND_CONSTANTS),
builder->alloc); builder->alloc);
@ -223,16 +223,6 @@ panvk_pipeline_builder_alloc_static_state_bo(
} }
} }
static void
panvk_pipeline_builder_init_sysvals(struct panvk_pipeline_builder *builder,
struct panvk_pipeline *pipeline,
gl_shader_stage stage)
{
const struct panvk_shader *shader = builder->shaders[stage];
pipeline->sysvals[stage].ubo_idx = shader->sysval_ubo;
}
static void static void
panvk_pipeline_builder_emit_non_fs_rsd( panvk_pipeline_builder_emit_non_fs_rsd(
const struct pan_shader_info *shader_info, mali_ptr shader_ptr, void *rsd) const struct pan_shader_info *shader_info, mali_ptr shader_ptr, void *rsd)
@ -467,8 +457,6 @@ panvk_pipeline_builder_init_shaders(struct panvk_pipeline_builder *builder,
pipeline->rsds[i] = gpu_rsd; pipeline->rsds[i] = gpu_rsd;
} }
panvk_pipeline_builder_init_sysvals(builder, pipeline, i);
if (i == MESA_SHADER_COMPUTE) if (i == MESA_SHADER_COMPUTE)
pipeline->cs.local_size = shader->local_size; pipeline->cs.local_size = shader->local_size;
} }

View File

@ -117,14 +117,10 @@ unsigned
panvk_per_arch(pipeline_layout_ubo_start)( panvk_per_arch(pipeline_layout_ubo_start)(
const struct panvk_pipeline_layout *layout, unsigned set, bool is_dynamic) const struct panvk_pipeline_layout *layout, unsigned set, bool is_dynamic)
{ {
unsigned offset = PANVK_NUM_BUILTIN_UBOS;
if (is_dynamic) if (is_dynamic)
offset += layout->num_ubos + layout->sets[set].dyn_ubo_offset; return layout->num_ubos + layout->sets[set].dyn_ubo_offset;
else
offset += layout->sets[set].ubo_offset;
return offset; return layout->sets[set].ubo_offset;
} }
unsigned unsigned
@ -150,14 +146,14 @@ unsigned
panvk_per_arch(pipeline_layout_dyn_desc_ubo_index)( panvk_per_arch(pipeline_layout_dyn_desc_ubo_index)(
const struct panvk_pipeline_layout *layout) const struct panvk_pipeline_layout *layout)
{ {
return PANVK_NUM_BUILTIN_UBOS + layout->num_ubos + layout->num_dyn_ubos; return layout->num_ubos + layout->num_dyn_ubos;
} }
unsigned unsigned
panvk_per_arch(pipeline_layout_total_ubo_count)( panvk_per_arch(pipeline_layout_total_ubo_count)(
const struct panvk_pipeline_layout *layout) const struct panvk_pipeline_layout *layout)
{ {
return PANVK_NUM_BUILTIN_UBOS + layout->num_ubos + layout->num_dyn_ubos + return layout->num_ubos + layout->num_dyn_ubos +
(layout->num_dyn_ssbos ? 1 : 0); (layout->num_dyn_ssbos ? 1 : 0);
} }
@ -165,5 +161,5 @@ unsigned
panvk_per_arch(pipeline_layout_dyn_ubos_offset)( panvk_per_arch(pipeline_layout_dyn_ubos_offset)(
const struct panvk_pipeline_layout *layout) const struct panvk_pipeline_layout *layout)
{ {
return PANVK_NUM_BUILTIN_UBOS + layout->num_ubos; return layout->num_ubos;
} }

View File

@ -51,13 +51,14 @@
#include "vk_util.h" #include "vk_util.h"
static nir_def * static nir_def *
load_sysval_from_ubo(nir_builder *b, nir_intrinsic_instr *intr, unsigned offset) load_sysval_from_push_const(nir_builder *b, nir_intrinsic_instr *intr,
unsigned offset)
{ {
return nir_load_ubo(b, intr->def.num_components, intr->def.bit_size, return nir_load_push_constant(
nir_imm_int(b, PANVK_SYSVAL_UBO_INDEX), b, intr->def.num_components, intr->def.bit_size, nir_imm_int(b, 0),
nir_imm_int(b, offset), /* Push constants are placed first, and then come the sysvals. */
.align_mul = intr->def.bit_size / 8, .align_offset = 0, .base = offset + 256,
.range_base = offset, .range = intr->def.bit_size / 8); .range = intr->def.num_components * intr->def.bit_size / 8);
} }
struct sysval_options { struct sysval_options {
@ -81,25 +82,25 @@ panvk_lower_sysvals(nir_builder *b, nir_instr *instr, void *data)
#define SYSVAL(name) offsetof(struct panvk_sysvals, name) #define SYSVAL(name) offsetof(struct panvk_sysvals, name)
switch (intr->intrinsic) { switch (intr->intrinsic) {
case nir_intrinsic_load_num_workgroups: case nir_intrinsic_load_num_workgroups:
val = load_sysval_from_ubo(b, intr, SYSVAL(num_work_groups)); val = load_sysval_from_push_const(b, intr, SYSVAL(num_work_groups));
break; break;
case nir_intrinsic_load_workgroup_size: case nir_intrinsic_load_workgroup_size:
val = load_sysval_from_ubo(b, intr, SYSVAL(local_group_size)); val = load_sysval_from_push_const(b, intr, SYSVAL(local_group_size));
break; break;
case nir_intrinsic_load_viewport_scale: case nir_intrinsic_load_viewport_scale:
val = load_sysval_from_ubo(b, intr, SYSVAL(viewport_scale)); val = load_sysval_from_push_const(b, intr, SYSVAL(viewport_scale));
break; break;
case nir_intrinsic_load_viewport_offset: case nir_intrinsic_load_viewport_offset:
val = load_sysval_from_ubo(b, intr, SYSVAL(viewport_offset)); val = load_sysval_from_push_const(b, intr, SYSVAL(viewport_offset));
break; break;
case nir_intrinsic_load_first_vertex: case nir_intrinsic_load_first_vertex:
val = load_sysval_from_ubo(b, intr, SYSVAL(first_vertex)); val = load_sysval_from_push_const(b, intr, SYSVAL(first_vertex));
break; break;
case nir_intrinsic_load_base_vertex: case nir_intrinsic_load_base_vertex:
val = load_sysval_from_ubo(b, intr, SYSVAL(base_vertex)); val = load_sysval_from_push_const(b, intr, SYSVAL(base_vertex));
break; break;
case nir_intrinsic_load_base_instance: case nir_intrinsic_load_base_instance:
val = load_sysval_from_ubo(b, intr, SYSVAL(base_instance)); val = load_sysval_from_push_const(b, intr, SYSVAL(base_instance));
break; break;
case nir_intrinsic_load_blend_const_color_rgba: case nir_intrinsic_load_blend_const_color_rgba:
if (opts->static_blend_constants) { if (opts->static_blend_constants) {
@ -112,7 +113,7 @@ panvk_lower_sysvals(nir_builder *b, nir_instr *instr, void *data)
val = nir_build_imm(b, 4, 32, constants); val = nir_build_imm(b, 4, 32, constants);
} else { } else {
val = load_sysval_from_ubo(b, intr, SYSVAL(blend_constants)); val = load_sysval_from_push_const(b, intr, SYSVAL(blend_constants));
} }
break; break;
@ -206,7 +207,6 @@ struct panvk_shader *
panvk_per_arch(shader_create)(struct panvk_device *dev, gl_shader_stage stage, panvk_per_arch(shader_create)(struct panvk_device *dev, gl_shader_stage stage,
const VkPipelineShaderStageCreateInfo *stage_info, const VkPipelineShaderStageCreateInfo *stage_info,
const struct panvk_pipeline_layout *layout, const struct panvk_pipeline_layout *layout,
unsigned sysval_ubo,
struct pan_blend_state *blend_state, struct pan_blend_state *blend_state,
bool static_blend_constants, bool static_blend_constants,
const VkAllocationCallbacks *alloc) const VkAllocationCallbacks *alloc)
@ -383,7 +383,6 @@ panvk_per_arch(shader_create)(struct panvk_device *dev, gl_shader_stage stage,
if (shader->has_img_access) if (shader->has_img_access)
shader->info.attribute_count += layout->num_imgs; shader->info.attribute_count += layout->num_imgs;
shader->sysval_ubo = sysval_ubo;
shader->local_size.x = nir->info.workgroup_size[0]; shader->local_size.x = nir->info.workgroup_size[0];
shader->local_size.y = nir->info.workgroup_size[1]; shader->local_size.y = nir->info.workgroup_size[1];
shader->local_size.z = nir->info.workgroup_size[2]; shader->local_size.z = nir->info.workgroup_size[2];