panvk: Lower sysvals to push uniforms

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Mary Guillemard <mary.guillemard@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28399>
This commit is contained in:
Boris Brezillon 2024-01-30 19:37:44 +01:00 committed by Marge Bot
parent 9c553bda9c
commit 6d4b376a9b
7 changed files with 35 additions and 79 deletions

View File

@ -97,7 +97,6 @@ struct panvk_descriptor_state {
struct mali_uniform_buffer_packed ubos[MAX_DYNAMIC_UNIFORM_BUFFERS];
struct panvk_ssbo_addr ssbos[MAX_DYNAMIC_STORAGE_BUFFERS];
} dyn;
mali_ptr sysvals_ptr;
mali_ptr ubos;
mali_ptr textures;
mali_ptr samplers;

View File

@ -70,10 +70,6 @@ struct panvk_pipeline {
/* shader stage bit is set of the stage accesses storage images */
uint32_t img_access_mask;
struct {
unsigned ubo_idx;
} sysvals[MESA_SHADER_STAGES];
unsigned tls_size;
unsigned wls_size;

View File

@ -20,9 +20,6 @@
#include "panvk_macros.h"
#include "panvk_pipeline_layout.h"
#define PANVK_SYSVAL_UBO_INDEX 0
#define PANVK_NUM_BUILTIN_UBOS 1
struct nir_shader;
struct pan_blend_state;
struct panvk_device;
@ -56,7 +53,6 @@ struct panvk_sysvals {
struct panvk_shader {
struct pan_shader_info info;
struct util_dynarray binary;
unsigned sysval_ubo;
struct pan_compute_dim local_size;
bool has_img_access;
};
@ -68,7 +64,7 @@ bool panvk_per_arch(blend_needs_lowering)(const struct panvk_device *dev,
struct panvk_shader *panvk_per_arch(shader_create)(
struct panvk_device *dev, gl_shader_stage stage,
const VkPipelineShaderStageCreateInfo *stage_info,
const struct panvk_pipeline_layout *layout, unsigned sysval_ubo,
const struct panvk_pipeline_layout *layout,
struct pan_blend_state *blend_state, bool static_blend_constants,
const VkAllocationCallbacks *alloc);

View File

@ -307,13 +307,13 @@ panvk_cmd_prepare_draw_sysvals(
sysvals->first_vertex = draw->offset_start;
sysvals->base_vertex = base_vertex;
sysvals->base_instance = draw->first_instance;
bind_point_state->desc_state.sysvals_ptr = 0;
bind_point_state->desc_state.push_uniforms = 0;
}
if (cmdbuf->state.dirty & PANVK_DYNAMIC_BLEND_CONSTANTS) {
memcpy(&sysvals->blend_constants, cmdbuf->state.blend.constants,
sizeof(cmdbuf->state.blend.constants));
bind_point_state->desc_state.sysvals_ptr = 0;
bind_point_state->desc_state.push_uniforms = 0;
}
if (cmdbuf->state.dirty & PANVK_DYNAMIC_VIEWPORT) {
@ -321,42 +321,31 @@ panvk_cmd_prepare_draw_sysvals(
&sysvals->viewport_scale);
panvk_sysval_upload_viewport_offset(&cmdbuf->state.viewport,
&sysvals->viewport_offset);
bind_point_state->desc_state.sysvals_ptr = 0;
bind_point_state->desc_state.push_uniforms = 0;
}
}
static void
panvk_cmd_prepare_sysvals(struct panvk_cmd_buffer *cmdbuf,
struct panvk_cmd_bind_point_state *bind_point_state)
{
struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
if (desc_state->sysvals_ptr)
return;
struct panfrost_ptr sysvals = pan_pool_alloc_aligned(
&cmdbuf->desc_pool.base, sizeof(desc_state->sysvals), 16);
memcpy(sysvals.cpu, &desc_state->sysvals, sizeof(desc_state->sysvals));
desc_state->sysvals_ptr = sysvals.gpu;
}
static void
panvk_cmd_prepare_push_uniforms(
struct panvk_cmd_buffer *cmdbuf,
struct panvk_cmd_bind_point_state *bind_point_state)
{
struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
if (!pipeline->layout->push_constants.size || desc_state->push_uniforms)
if (desc_state->push_uniforms)
return;
struct panfrost_ptr push_uniforms = pan_pool_alloc_aligned(
&cmdbuf->desc_pool.base,
ALIGN_POT(pipeline->layout->push_constants.size, 16), 16);
&cmdbuf->desc_pool.base, 512, 16);
/* The first half is used for push constants. */
memcpy(push_uniforms.cpu, cmdbuf->push_constants,
pipeline->layout->push_constants.size);
sizeof(cmdbuf->push_constants));
/* The second half is used for sysvals. */
memcpy((uint8_t *)push_uniforms.cpu + 256, &desc_state->sysvals,
sizeof(desc_state->sysvals));
desc_state->push_uniforms = push_uniforms.gpu;
}
@ -443,18 +432,12 @@ panvk_cmd_prepare_ubos(struct panvk_cmd_buffer *cmdbuf,
if (!ubo_count || desc_state->ubos)
return;
panvk_cmd_prepare_sysvals(cmdbuf, bind_point_state);
panvk_cmd_prepare_dyn_ssbos(cmdbuf, bind_point_state);
struct panfrost_ptr ubos = pan_pool_alloc_desc_array(
&cmdbuf->desc_pool.base, ubo_count, UNIFORM_BUFFER);
struct mali_uniform_buffer_packed *ubo_descs = ubos.cpu;
pan_pack(&ubo_descs[PANVK_SYSVAL_UBO_INDEX], UNIFORM_BUFFER, cfg) {
cfg.pointer = desc_state->sysvals_ptr;
cfg.entries = DIV_ROUND_UP(sizeof(desc_state->sysvals), 16);
}
for (unsigned s = 0; s < pipeline->layout->vk.set_count; s++) {
const struct panvk_descriptor_set_layout *set_layout =
vk_to_panvk_descriptor_set_layout(pipeline->layout->vk.set_layouts[s]);
@ -1759,7 +1742,7 @@ panvk_per_arch(CmdDispatch)(VkCommandBuffer commandBuffer, uint32_t x,
sysvals->local_group_size.u32[0] = pipeline->cs.local_size.x;
sysvals->local_group_size.u32[1] = pipeline->cs.local_size.y;
sysvals->local_group_size.u32[2] = pipeline->cs.local_size.z;
desc_state->sysvals_ptr = 0;
desc_state->push_uniforms = 0;
panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
dispatch.tsd = batch->tls.gpu;
@ -2132,7 +2115,6 @@ panvk_per_arch(CmdBindDescriptorSets)(
* TODO: we could be smarter by checking which part of the pipeline layout
* are compatible with the previouly bound descriptor sets.
*/
descriptors_state->sysvals_ptr = 0;
descriptors_state->ubos = 0;
descriptors_state->textures = 0;
descriptors_state->samplers = 0;

View File

@ -142,7 +142,7 @@ panvk_pipeline_builder_compile_shaders(struct panvk_pipeline_builder *builder,
shader = panvk_per_arch(shader_create)(
builder->device, stage, stage_info, builder->layout,
PANVK_SYSVAL_UBO_INDEX, &pipeline->blend.state,
&pipeline->blend.state,
panvk_pipeline_static_state(pipeline,
VK_DYNAMIC_STATE_BLEND_CONSTANTS),
builder->alloc);
@ -223,16 +223,6 @@ panvk_pipeline_builder_alloc_static_state_bo(
}
}
static void
panvk_pipeline_builder_init_sysvals(struct panvk_pipeline_builder *builder,
struct panvk_pipeline *pipeline,
gl_shader_stage stage)
{
const struct panvk_shader *shader = builder->shaders[stage];
pipeline->sysvals[stage].ubo_idx = shader->sysval_ubo;
}
static void
panvk_pipeline_builder_emit_non_fs_rsd(
const struct pan_shader_info *shader_info, mali_ptr shader_ptr, void *rsd)
@ -467,8 +457,6 @@ panvk_pipeline_builder_init_shaders(struct panvk_pipeline_builder *builder,
pipeline->rsds[i] = gpu_rsd;
}
panvk_pipeline_builder_init_sysvals(builder, pipeline, i);
if (i == MESA_SHADER_COMPUTE)
pipeline->cs.local_size = shader->local_size;
}

View File

@ -117,14 +117,10 @@ unsigned
panvk_per_arch(pipeline_layout_ubo_start)(
const struct panvk_pipeline_layout *layout, unsigned set, bool is_dynamic)
{
unsigned offset = PANVK_NUM_BUILTIN_UBOS;
if (is_dynamic)
offset += layout->num_ubos + layout->sets[set].dyn_ubo_offset;
else
offset += layout->sets[set].ubo_offset;
return layout->num_ubos + layout->sets[set].dyn_ubo_offset;
return offset;
return layout->sets[set].ubo_offset;
}
unsigned
@ -150,14 +146,14 @@ unsigned
panvk_per_arch(pipeline_layout_dyn_desc_ubo_index)(
const struct panvk_pipeline_layout *layout)
{
return PANVK_NUM_BUILTIN_UBOS + layout->num_ubos + layout->num_dyn_ubos;
return layout->num_ubos + layout->num_dyn_ubos;
}
unsigned
panvk_per_arch(pipeline_layout_total_ubo_count)(
const struct panvk_pipeline_layout *layout)
{
return PANVK_NUM_BUILTIN_UBOS + layout->num_ubos + layout->num_dyn_ubos +
return layout->num_ubos + layout->num_dyn_ubos +
(layout->num_dyn_ssbos ? 1 : 0);
}
@ -165,5 +161,5 @@ unsigned
panvk_per_arch(pipeline_layout_dyn_ubos_offset)(
const struct panvk_pipeline_layout *layout)
{
return PANVK_NUM_BUILTIN_UBOS + layout->num_ubos;
return layout->num_ubos;
}

View File

@ -51,13 +51,14 @@
#include "vk_util.h"
static nir_def *
load_sysval_from_ubo(nir_builder *b, nir_intrinsic_instr *intr, unsigned offset)
load_sysval_from_push_const(nir_builder *b, nir_intrinsic_instr *intr,
unsigned offset)
{
return nir_load_ubo(b, intr->def.num_components, intr->def.bit_size,
nir_imm_int(b, PANVK_SYSVAL_UBO_INDEX),
nir_imm_int(b, offset),
.align_mul = intr->def.bit_size / 8, .align_offset = 0,
.range_base = offset, .range = intr->def.bit_size / 8);
return nir_load_push_constant(
b, intr->def.num_components, intr->def.bit_size, nir_imm_int(b, 0),
/* Push constants are placed first, and then come the sysvals. */
.base = offset + 256,
.range = intr->def.num_components * intr->def.bit_size / 8);
}
struct sysval_options {
@ -81,25 +82,25 @@ panvk_lower_sysvals(nir_builder *b, nir_instr *instr, void *data)
#define SYSVAL(name) offsetof(struct panvk_sysvals, name)
switch (intr->intrinsic) {
case nir_intrinsic_load_num_workgroups:
val = load_sysval_from_ubo(b, intr, SYSVAL(num_work_groups));
val = load_sysval_from_push_const(b, intr, SYSVAL(num_work_groups));
break;
case nir_intrinsic_load_workgroup_size:
val = load_sysval_from_ubo(b, intr, SYSVAL(local_group_size));
val = load_sysval_from_push_const(b, intr, SYSVAL(local_group_size));
break;
case nir_intrinsic_load_viewport_scale:
val = load_sysval_from_ubo(b, intr, SYSVAL(viewport_scale));
val = load_sysval_from_push_const(b, intr, SYSVAL(viewport_scale));
break;
case nir_intrinsic_load_viewport_offset:
val = load_sysval_from_ubo(b, intr, SYSVAL(viewport_offset));
val = load_sysval_from_push_const(b, intr, SYSVAL(viewport_offset));
break;
case nir_intrinsic_load_first_vertex:
val = load_sysval_from_ubo(b, intr, SYSVAL(first_vertex));
val = load_sysval_from_push_const(b, intr, SYSVAL(first_vertex));
break;
case nir_intrinsic_load_base_vertex:
val = load_sysval_from_ubo(b, intr, SYSVAL(base_vertex));
val = load_sysval_from_push_const(b, intr, SYSVAL(base_vertex));
break;
case nir_intrinsic_load_base_instance:
val = load_sysval_from_ubo(b, intr, SYSVAL(base_instance));
val = load_sysval_from_push_const(b, intr, SYSVAL(base_instance));
break;
case nir_intrinsic_load_blend_const_color_rgba:
if (opts->static_blend_constants) {
@ -112,7 +113,7 @@ panvk_lower_sysvals(nir_builder *b, nir_instr *instr, void *data)
val = nir_build_imm(b, 4, 32, constants);
} else {
val = load_sysval_from_ubo(b, intr, SYSVAL(blend_constants));
val = load_sysval_from_push_const(b, intr, SYSVAL(blend_constants));
}
break;
@ -206,7 +207,6 @@ struct panvk_shader *
panvk_per_arch(shader_create)(struct panvk_device *dev, gl_shader_stage stage,
const VkPipelineShaderStageCreateInfo *stage_info,
const struct panvk_pipeline_layout *layout,
unsigned sysval_ubo,
struct pan_blend_state *blend_state,
bool static_blend_constants,
const VkAllocationCallbacks *alloc)
@ -383,7 +383,6 @@ panvk_per_arch(shader_create)(struct panvk_device *dev, gl_shader_stage stage,
if (shader->has_img_access)
shader->info.attribute_count += layout->num_imgs;
shader->sysval_ubo = sysval_ubo;
shader->local_size.x = nir->info.workgroup_size[0];
shader->local_size.y = nir->info.workgroup_size[1];
shader->local_size.z = nir->info.workgroup_size[2];