panfrost: Move the shader compilation logic out of the gallium driver

While at it, rework the code to avoid copies between intermediate
structures: the pan_shader_info is passed to the compiler context so
the compiler can fill shader information directly.

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8963>
This commit is contained in:
Boris Brezillon 2021-02-13 08:24:03 +01:00 committed by Marge Bot
parent d18fc89066
commit d5b1a33460
23 changed files with 558 additions and 540 deletions

View File

@ -40,35 +40,30 @@
#include "tgsi/tgsi_dump.h"
static void
pan_prepare_midgard_props(struct panfrost_shader_state *state,
panfrost_program *program,
gl_shader_stage stage)
pan_prepare_midgard_props(struct panfrost_shader_state *state)
{
pan_prepare(&state->properties, RENDERER_PROPERTIES);
state->properties.uniform_buffer_count = state->ubo_count;
state->properties.midgard.uniform_count = program->uniform_cutoff;
state->properties.midgard.shader_has_side_effects = state->writes_global;
state->properties.uniform_buffer_count = state->info.ubo_count;
state->properties.midgard.uniform_count = state->info.midgard.uniform_cutoff;
state->properties.midgard.shader_has_side_effects = state->info.writes_global;
state->properties.midgard.fp_mode = MALI_FP_MODE_GL_INF_NAN_ALLOWED;
/* For fragment shaders, work register count, early-z, reads at draw-time */
if (stage != MESA_SHADER_FRAGMENT)
state->properties.midgard.work_register_count = state->work_reg_count;
if (state->info.stage != MESA_SHADER_FRAGMENT)
state->properties.midgard.work_register_count = state->info.work_reg_count;
}
static void
pan_prepare_bifrost_props(struct panfrost_shader_state *state,
panfrost_program *program,
gl_shader_stage stage,
shader_info *info)
pan_prepare_bifrost_props(struct panfrost_shader_state *state)
{
unsigned fau_count = DIV_ROUND_UP(program->push.count, 2);
unsigned fau_count = DIV_ROUND_UP(state->info.push.count, 2);
switch (stage) {
switch (state->info.stage) {
case MESA_SHADER_VERTEX:
pan_prepare(&state->properties, RENDERER_PROPERTIES);
state->properties.bifrost.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
state->properties.uniform_buffer_count = state->ubo_count;
state->properties.uniform_buffer_count = state->info.ubo_count;
pan_prepare(&state->preload, PRELOAD);
state->preload.uniform_count = fau_count;
@ -78,39 +73,39 @@ pan_prepare_bifrost_props(struct panfrost_shader_state *state,
case MESA_SHADER_FRAGMENT:
pan_prepare(&state->properties, RENDERER_PROPERTIES);
/* Early-Z set at draw-time */
if (state->writes_depth || state->writes_stencil) {
if (state->info.fs.writes_depth || state->info.fs.writes_stencil) {
state->properties.bifrost.zs_update_operation = MALI_PIXEL_KILL_FORCE_LATE;
state->properties.bifrost.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_LATE;
} else if (state->can_discard) {
} else if (state->info.fs.can_discard) {
state->properties.bifrost.zs_update_operation = MALI_PIXEL_KILL_FORCE_LATE;
state->properties.bifrost.pixel_kill_operation = MALI_PIXEL_KILL_WEAK_EARLY;
} else {
state->properties.bifrost.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
state->properties.bifrost.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_EARLY;
}
state->properties.uniform_buffer_count = state->ubo_count;
state->properties.bifrost.shader_modifies_coverage = state->can_discard;
state->properties.bifrost.shader_wait_dependency_6 = program->wait_6;
state->properties.bifrost.shader_wait_dependency_7 = program->wait_7;
state->properties.uniform_buffer_count = state->info.ubo_count;
state->properties.bifrost.shader_modifies_coverage = state->info.fs.can_discard;
state->properties.bifrost.shader_wait_dependency_6 = state->info.bifrost.wait_6;
state->properties.bifrost.shader_wait_dependency_7 = state->info.bifrost.wait_7;
pan_prepare(&state->preload, PRELOAD);
state->preload.uniform_count = fau_count;
state->preload.fragment.fragment_position = state->reads_frag_coord;
state->preload.fragment.fragment_position = state->info.fs.reads_frag_coord;
state->preload.fragment.coverage = true;
state->preload.fragment.primitive_flags = state->reads_face;
state->preload.fragment.primitive_flags = state->info.fs.reads_face;
/* Contains sample ID and sample mask. Sample position and
* helper invocation are expressed in terms of the above, so
* preload for those too */
state->preload.fragment.sample_mask_id =
BITSET_TEST(info->system_values_read, SYSTEM_VALUE_SAMPLE_ID) ||
BITSET_TEST(info->system_values_read, SYSTEM_VALUE_SAMPLE_POS) ||
BITSET_TEST(info->system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN) ||
BITSET_TEST(info->system_values_read, SYSTEM_VALUE_HELPER_INVOCATION);
state->info.fs.reads_sample_id |
state->info.fs.reads_sample_pos |
state->info.fs.reads_sample_mask_in |
state->info.fs.reads_helper_invocation;
break;
case MESA_SHADER_COMPUTE:
pan_prepare(&state->properties, RENDERER_PROPERTIES);
state->properties.uniform_buffer_count = state->ubo_count;
state->properties.uniform_buffer_count = state->info.ubo_count;
pan_prepare(&state->preload, PRELOAD);
state->preload.uniform_count = fau_count;
@ -152,112 +147,12 @@ pan_upload_shader_descriptor(struct panfrost_context *ctx,
u_upload_unmap(ctx->state_uploader);
}
static unsigned
pan_format_from_nir_base(nir_alu_type base)
{
switch (base) {
case nir_type_int:
return MALI_FORMAT_SINT;
case nir_type_uint:
case nir_type_bool:
return MALI_FORMAT_UINT;
case nir_type_float:
return MALI_CHANNEL_FLOAT;
default:
unreachable("Invalid base");
}
}
static unsigned
pan_format_from_nir_size(nir_alu_type base, unsigned size)
{
if (base == nir_type_float) {
switch (size) {
case 16: return MALI_FORMAT_SINT;
case 32: return MALI_FORMAT_UNORM;
default:
unreachable("Invalid float size for format");
}
} else {
switch (size) {
case 1:
case 8: return MALI_CHANNEL_8;
case 16: return MALI_CHANNEL_16;
case 32: return MALI_CHANNEL_32;
default:
unreachable("Invalid int size for format");
}
}
}
static enum mali_format
pan_format_from_glsl(const struct glsl_type *type, unsigned precision, unsigned frac)
{
const struct glsl_type *column = glsl_without_array_or_matrix(type);
enum glsl_base_type glsl_base = glsl_get_base_type(column);
nir_alu_type t = nir_get_nir_type_for_glsl_base_type(glsl_base);
unsigned chan = glsl_get_components(column);
/* If we have a fractional location added, we need to increase the size
* so it will fit, i.e. a vec3 in YZW requires us to allocate a vec4.
* We could do better but this is an edge case as it is, normally
* packed varyings will be aligned. */
chan += frac;
assert(chan >= 1 && chan <= 4);
unsigned base = nir_alu_type_get_base_type(t);
unsigned size = nir_alu_type_get_type_size(t);
/* Demote to fp16 where possible. int16 varyings are TODO as the hw
* will saturate instead of wrap which is not conformant, so we need to
* insert i2i16/u2u16 instructions before the st_vary_32i/32u to get
* the intended behaviour */
bool is_16 = (precision == GLSL_PRECISION_MEDIUM)
|| (precision == GLSL_PRECISION_LOW);
if (is_16 && base == nir_type_float)
size = 16;
else
size = 32;
return pan_format_from_nir_base(base) |
pan_format_from_nir_size(base, size) |
MALI_NR_CHANNELS(chan);
}
static enum mali_bifrost_register_file_format
bifrost_blend_type_from_nir(nir_alu_type nir_type)
{
switch(nir_type) {
case 0: /* Render target not in use */
return 0;
case nir_type_float16:
return MALI_BIFROST_REGISTER_FILE_FORMAT_F16;
case nir_type_float32:
return MALI_BIFROST_REGISTER_FILE_FORMAT_F32;
case nir_type_int32:
return MALI_BIFROST_REGISTER_FILE_FORMAT_I32;
case nir_type_uint32:
return MALI_BIFROST_REGISTER_FILE_FORMAT_U32;
case nir_type_int16:
return MALI_BIFROST_REGISTER_FILE_FORMAT_I16;
case nir_type_uint16:
return MALI_BIFROST_REGISTER_FILE_FORMAT_U16;
default:
unreachable("Unsupported blend shader type for NIR alu type");
return 0;
}
}
void
panfrost_shader_compile(struct panfrost_context *ctx,
enum pipe_shader_ir ir_type,
const void *ir,
gl_shader_stage stage,
struct panfrost_shader_state *state,
uint64_t *outputs_written)
struct panfrost_shader_state *state)
{
struct panfrost_device *dev = pan_device(ctx->base.screen);
@ -280,169 +175,62 @@ panfrost_shader_compile(struct panfrost_context *ctx,
memcpy(inputs.rt_formats, state->rt_formats, sizeof(inputs.rt_formats));
panfrost_program *program;
struct util_dynarray binary;
program = pan_shader_compile(dev, NULL, s, &inputs);
util_dynarray_init(&binary, NULL);
pan_shader_compile(dev, s, &inputs, &binary, &state->info);
/* Prepare the compiled binary for upload */
mali_ptr shader = 0;
unsigned attribute_count = 0, varying_count = 0;
int size = program->compiled.size;
int size = binary.size;
if (size) {
state->bo = panfrost_bo_create(dev, size, PAN_BO_EXECUTE);
memcpy(state->bo->ptr.cpu, program->compiled.data, size);
memcpy(state->bo->ptr.cpu, binary.data, size);
shader = state->bo->ptr.gpu;
}
/* Midgard needs the first tag on the bottom nibble */
if (!pan_is_bifrost(dev)) {
/* If size = 0, we tag as "end-of-shader" */
if (size)
shader |= program->first_tag;
else
shader = 0x1;
}
state->sysval_count = program->sysval_count;
memcpy(state->sysval, program->sysvals, sizeof(state->sysval[0]) * state->sysval_count);
memcpy(&state->push, &program->push, sizeof(program->push));
bool vertex_id = BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_VERTEX_ID);
bool instance_id = BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID);
state->writes_global = s->info.writes_memory;
switch (stage) {
case MESA_SHADER_VERTEX:
attribute_count = util_bitcount64(s->info.inputs_read) +
util_bitcount(s->info.images_used);
varying_count = util_bitcount64(s->info.outputs_written);
if (vertex_id)
attribute_count = MAX2(attribute_count, PAN_VERTEX_ID + 1);
if (instance_id)
attribute_count = MAX2(attribute_count, PAN_INSTANCE_ID + 1);
break;
case MESA_SHADER_FRAGMENT:
for (unsigned i = 0; i < ARRAY_SIZE(state->blend_ret_addrs); i++) {
if (!program->blend_ret_offsets[i])
continue;
state->blend_ret_addrs[i] = (state->bo->ptr.gpu & UINT32_MAX) +
program->blend_ret_offsets[i];
assert(!(state->blend_ret_addrs[i] & 0x7));
}
attribute_count = util_bitcount(s->info.images_used);
varying_count = util_bitcount64(s->info.inputs_read);
if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
state->writes_depth = true;
if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL))
state->writes_stencil = true;
uint64_t outputs_read = s->info.outputs_read;
if (outputs_read & BITFIELD64_BIT(FRAG_RESULT_COLOR))
outputs_read |= BITFIELD64_BIT(FRAG_RESULT_DATA0);
state->outputs_read = outputs_read >> FRAG_RESULT_DATA0;
/* EXT_shader_framebuffer_fetch requires per-sample */
state->sample_shading = s->info.fs.uses_sample_shading ||
outputs_read;
/* List of reasons we need to execute frag shaders when things
* are masked off */
state->fs_sidefx =
s->info.writes_memory ||
s->info.fs.uses_discard ||
s->info.fs.uses_demote;
state->can_discard = s->info.fs.uses_discard;
break;
case MESA_SHADER_COMPUTE:
attribute_count = util_bitcount(s->info.images_used);
state->shared_size = s->info.cs.shared_size;
break;
default:
unreachable("Unknown shader state");
}
state->stack_size = program->tls_size;
state->reads_frag_coord = (s->info.inputs_read & (1 << VARYING_SLOT_POS)) ||
BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_FRAG_COORD);
state->reads_point_coord = s->info.inputs_read & (1 << VARYING_SLOT_PNTC);
state->reads_face = (s->info.inputs_read & (1 << VARYING_SLOT_FACE)) ||
BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_FRONT_FACE);
state->writes_point_size = s->info.outputs_written & (1 << VARYING_SLOT_PSIZ);
if (outputs_written)
*outputs_written = s->info.outputs_written;
state->work_reg_count = program->work_register_count;
if (pan_is_bifrost(dev))
for (unsigned i = 0; i < ARRAY_SIZE(state->blend_types); i++)
state->blend_types[i] = bifrost_blend_type_from_nir(program->blend_types[i]);
/* Record the varying mapping for the command stream's bookkeeping */
nir_variable_mode varying_mode =
stage == MESA_SHADER_VERTEX ? nir_var_shader_out : nir_var_shader_in;
nir_foreach_variable_with_modes(var, s, varying_mode) {
unsigned loc = var->data.driver_location;
unsigned sz = glsl_count_attribute_slots(var->type, FALSE);
for (int c = 0; c < sz; ++c) {
state->varyings_loc[loc + c] = var->data.location + c;
state->varyings[loc + c] = pan_format_from_glsl(var->type,
var->data.precision, var->data.location_frac);
}
}
/* Needed for linkage */
state->attribute_count = attribute_count;
state->varying_count = varying_count;
/* Sysvals have dedicated UBO */
state->ubo_count = s->info.num_ubos + (state->sysval_count ? 1 : 0);
if (!pan_is_bifrost(dev))
shader |= state->info.midgard.first_tag;
/* Prepare the descriptors at compile-time */
state->shader.shader = shader;
state->shader.attribute_count = attribute_count;
state->shader.varying_count = varying_count;
state->shader.texture_count = s->info.num_textures;
state->shader.sampler_count = s->info.num_textures;
state->shader.attribute_count = state->info.attribute_count;
state->shader.varying_count = state->info.varyings.input_count +
state->info.varyings.output_count;
state->shader.texture_count = state->info.texture_count;
state->shader.sampler_count = state->info.texture_count;
if (pan_is_bifrost(dev))
pan_prepare_bifrost_props(state, program, stage, &s->info);
pan_prepare_bifrost_props(state);
else
pan_prepare_midgard_props(state, program, stage);
pan_prepare_midgard_props(state);
state->properties.shader_contains_barrier =
s->info.uses_memory_barrier |
s->info.uses_control_barrier;
state->info.contains_barrier;
/* Ordering gaurantees are the same */
if (stage == MESA_SHADER_FRAGMENT) {
state->properties.shader_contains_barrier |=
s->info.fs.needs_quad_helper_invocations;
state->info.fs.helper_invocations;
state->properties.stencil_from_shader =
state->info.fs.writes_stencil;
state->properties.depth_source =
state->info.fs.writes_depth ?
MALI_DEPTH_SOURCE_SHADER :
MALI_DEPTH_SOURCE_FIXED_FUNCTION;
} else {
state->properties.depth_source =
MALI_DEPTH_SOURCE_FIXED_FUNCTION;
}
state->properties.stencil_from_shader = state->writes_stencil;
state->properties.depth_source = state->writes_depth ?
MALI_DEPTH_SOURCE_SHADER :
MALI_DEPTH_SOURCE_FIXED_FUNCTION;
if (stage != MESA_SHADER_FRAGMENT)
pan_upload_shader_descriptor(ctx, state);
ralloc_free(program);
util_dynarray_fini(&binary);
/* In both clone and tgsi_to_nir paths, the shader is ralloc'd against
* a NULL context */

View File

@ -295,21 +295,23 @@ panfrost_compile_blend_shader(struct panfrost_blend_shader *shader,
if (constants)
memcpy(inputs.blend.constants, constants, sizeof(inputs.blend.constants));
panfrost_program *program;
if (pan_is_bifrost(dev)) {
inputs.blend.bifrost_blend_desc =
bifrost_get_blend_desc(dev, shader->key.format, shader->key.rt);
}
program = pan_shader_compile(dev, NULL, shader->nir, &inputs);
struct pan_shader_info info;
struct util_dynarray binary;
util_dynarray_init(&binary, NULL);
pan_shader_compile(dev, shader->nir, &inputs, &binary, &info);
/* Allow us to patch later */
shader->first_tag = program->first_tag;
shader->size = program->compiled.size;
shader->first_tag = pan_is_bifrost(dev) ? 0 : info.midgard.first_tag;
shader->size = binary.size;
shader->buffer = reralloc_size(shader, shader->buffer, shader->size);
memcpy(shader->buffer, program->compiled.data, shader->size);
shader->work_count = program->work_register_count;
memcpy(shader->buffer, binary.data, shader->size);
shader->work_count = info.work_reg_count;
ralloc_free(program);
util_dynarray_fini(&binary);
}

View File

@ -241,7 +241,7 @@ panfrost_fs_required(
unsigned rt_count)
{
/* If we generally have side effects */
if (fs->fs_sidefx)
if (fs->info.fs.sidefx)
return true;
/* If colour is written we need to execute */
@ -252,7 +252,31 @@ panfrost_fs_required(
/* If depth is written and not implied we need to execute.
* TODO: Predicate on Z/S writes being enabled */
return (fs->writes_depth || fs->writes_stencil);
return (fs->info.fs.writes_depth || fs->info.fs.writes_stencil);
}
static enum mali_bifrost_register_file_format
bifrost_blend_type_from_nir(nir_alu_type nir_type)
{
switch(nir_type) {
case 0: /* Render target not in use */
return 0;
case nir_type_float16:
return MALI_BIFROST_REGISTER_FILE_FORMAT_F16;
case nir_type_float32:
return MALI_BIFROST_REGISTER_FILE_FORMAT_F32;
case nir_type_int32:
return MALI_BIFROST_REGISTER_FILE_FORMAT_I32;
case nir_type_uint32:
return MALI_BIFROST_REGISTER_FILE_FORMAT_U32;
case nir_type_int16:
return MALI_BIFROST_REGISTER_FILE_FORMAT_I16;
case nir_type_uint16:
return MALI_BIFROST_REGISTER_FILE_FORMAT_U16;
default:
unreachable("Unsupported blend shader type for NIR alu type");
return 0;
}
}
static void
@ -292,8 +316,12 @@ panfrost_emit_bifrost_blend(struct panfrost_batch *batch,
assert((blend[i].shader.gpu & (0xffffffffull << 32)) ==
(fs->bo->ptr.gpu & (0xffffffffull << 32)));
cfg.bifrost.internal.shader.pc = (u32)blend[i].shader.gpu;
assert(!(fs->blend_ret_addrs[i] & 0x7));
cfg.bifrost.internal.shader.return_value = fs->blend_ret_addrs[i];
unsigned ret_offset = fs->info.bifrost.blend[i].return_offset;
if (ret_offset) {
assert(!(ret_offset & 0x7));
cfg.bifrost.internal.shader.return_value =
fs->bo->ptr.gpu + ret_offset;
}
cfg.bifrost.internal.mode = MALI_BIFROST_BLEND_MODE_SHADER;
} else {
enum pipe_format format = batch->key.cbufs[i]->format;
@ -324,7 +352,7 @@ panfrost_emit_bifrost_blend(struct panfrost_batch *batch,
cfg.bifrost.internal.fixed_function.conversion.memory_format =
panfrost_format_to_bifrost_blend(dev, format_desc, true);
cfg.bifrost.internal.fixed_function.conversion.register_format =
fs->blend_types[i];
bifrost_blend_type_from_nir(fs->info.bifrost.blend[i].type);
cfg.bifrost.internal.fixed_function.rt = i;
}
}
@ -412,7 +440,9 @@ panfrost_prepare_bifrost_fs_state(struct panfrost_context *ctx,
state->properties = fs->properties;
state->properties.bifrost.allow_forward_pixel_to_kill =
!fs->can_discard && !fs->writes_depth && no_blend;
!fs->info.fs.can_discard &&
!fs->info.fs.writes_depth &&
no_blend;
state->shader = fs->shader;
state->preload = fs->preload;
}
@ -436,8 +466,8 @@ panfrost_prepare_midgard_fs_state(struct panfrost_context *ctx,
state->properties.midgard.force_early_z = true;
} else {
/* Reasons to disable early-Z from a shader perspective */
bool late_z = fs->can_discard || fs->writes_global ||
fs->writes_depth || fs->writes_stencil;
bool late_z = fs->info.fs.can_discard || fs->info.writes_global ||
fs->info.fs.writes_depth || fs->info.fs.writes_stencil;
/* If either depth or stencil is enabled, discard matters */
bool zs_enabled =
@ -452,9 +482,9 @@ panfrost_prepare_midgard_fs_state(struct panfrost_context *ctx,
/* TODO: Reduce this limit? */
state->properties = fs->properties;
if (has_blend_shader)
state->properties.midgard.work_register_count = MAX2(fs->work_reg_count, 8);
state->properties.midgard.work_register_count = MAX2(fs->info.work_reg_count, 8);
else
state->properties.midgard.work_register_count = fs->work_reg_count;
state->properties.midgard.work_register_count = fs->info.work_reg_count;
state->properties.midgard.force_early_z = !(late_z || alpha_to_coverage);
@ -463,8 +493,10 @@ panfrost_prepare_midgard_fs_state(struct panfrost_context *ctx,
* lying to the hardware about the discard and setting the
* reads tilebuffer? flag to compensate */
state->properties.midgard.shader_reads_tilebuffer =
fs->outputs_read || (!zs_enabled && fs->can_discard);
state->properties.midgard.shader_contains_discard = zs_enabled && fs->can_discard;
fs->info.fs.outputs_read ||
(!zs_enabled && fs->info.fs.can_discard);
state->properties.midgard.shader_contains_discard =
zs_enabled && fs->info.fs.can_discard;
state->shader = fs->shader;
}
@ -528,7 +560,7 @@ panfrost_prepare_fs_state(struct panfrost_context *ctx,
state->multisample_misc.sample_mask = (msaa ? ctx->sample_mask : ~0) & 0xFFFF;
state->multisample_misc.evaluate_per_sample =
msaa && (ctx->min_samples > 1 || fs->sample_shading);
msaa && (ctx->min_samples > 1 || fs->info.fs.sample_shading);
state->multisample_misc.depth_function = zsa->base.depth_enabled ?
panfrost_translate_compare_func(zsa->base.depth_func) :
@ -930,8 +962,8 @@ panfrost_upload_sysvals(struct panfrost_batch *batch, void *buf,
{
struct sysval_uniform *uniforms = (void *)buf;
for (unsigned i = 0; i < ss->sysval_count; ++i) {
int sysval = ss->sysval[i];
for (unsigned i = 0; i < ss->info.sysvals.sysval_count; ++i) {
int sysval = ss->info.sysvals.sysvals[i];
switch (PAN_SYSVAL_TYPE(sysval)) {
case PAN_SYSVAL_VIEWPORT_SCALE:
@ -1023,7 +1055,7 @@ panfrost_emit_const_buf(struct panfrost_batch *batch,
struct panfrost_shader_state *ss = &all->variants[all->active_variant];
/* Allocate room for the sysval and the uniforms */
size_t sys_size = sizeof(float) * 4 * ss->sysval_count;
size_t sys_size = sizeof(float) * 4 * ss->info.sysvals.sysval_count;
struct panfrost_ptr transfer =
panfrost_pool_alloc_aligned(&batch->pool, sys_size, 16);
@ -1032,7 +1064,7 @@ panfrost_emit_const_buf(struct panfrost_batch *batch,
/* Next up, attach UBOs. UBO count includes gaps but no sysval UBO */
struct panfrost_shader_state *shader = panfrost_get_shader_state(ctx, stage);
unsigned ubo_count = shader->ubo_count - (sys_size ? 1 : 0);
unsigned ubo_count = shader->info.ubo_count - (sys_size ? 1 : 0);
unsigned sysval_ubo = sys_size ? ubo_count : ~0;
size_t sz = MALI_UNIFORM_BUFFER_LENGTH * (ubo_count + 1);
@ -1076,13 +1108,14 @@ panfrost_emit_const_buf(struct panfrost_batch *batch,
/* Copy push constants required by the shader */
struct panfrost_ptr push_transfer =
panfrost_pool_alloc_aligned(&batch->pool, ss->push.count * 4, 16);
panfrost_pool_alloc_aligned(&batch->pool,
ss->info.push.count * 4, 16);
uint32_t *push_cpu = (uint32_t *) push_transfer.cpu;
*push_constants = push_transfer.gpu;
for (unsigned i = 0; i < ss->push.count; ++i) {
struct panfrost_ubo_word src = ss->push.words[i];
for (unsigned i = 0; i < ss->info.push.count; ++i) {
struct panfrost_ubo_word src = ss->info.push.words[i];
/* Map the UBO, this should be cheap. However this is reading
* from write-combine memory which is _very_ slow. It might pay
@ -1108,7 +1141,7 @@ panfrost_emit_shared_memory(struct panfrost_batch *batch,
struct panfrost_device *dev = pan_device(ctx->base.screen);
struct panfrost_shader_variants *all = ctx->shader[PIPE_SHADER_COMPUTE];
struct panfrost_shader_state *ss = &all->variants[all->active_variant];
unsigned single_size = util_next_power_of_two(MAX2(ss->shared_size,
unsigned single_size = util_next_power_of_two(MAX2(ss->info.wls_size,
128));
unsigned instances =
@ -1130,12 +1163,12 @@ panfrost_emit_shared_memory(struct panfrost_batch *batch,
ls.wls_instances = instances;
ls.wls_size_scale = util_logbase2(single_size) + 1;
if (ss->stack_size) {
if (ss->info.tls_size) {
unsigned shift =
panfrost_get_stack_shift(ss->stack_size);
panfrost_get_stack_shift(ss->info.tls_size);
struct panfrost_bo *bo =
panfrost_batch_get_scratchpad(batch,
ss->stack_size,
ss->info.tls_size,
dev->thread_tls_alloc,
dev->core_count);
@ -1366,7 +1399,7 @@ panfrost_emit_image_attribs(struct panfrost_batch *batch,
struct panfrost_context *ctx = batch->ctx;
struct panfrost_shader_state *shader = panfrost_get_shader_state(ctx, type);
if (!shader->attribute_count) {
if (!shader->info.attribute_count) {
*buffers = 0;
return 0;
}
@ -1375,11 +1408,11 @@ panfrost_emit_image_attribs(struct panfrost_batch *batch,
unsigned attrib_buf_size = MALI_ATTRIBUTE_BUFFER_LENGTH +
MALI_ATTRIBUTE_BUFFER_CONTINUATION_3D_LENGTH;
unsigned bytes_per_image_desc = MALI_ATTRIBUTE_LENGTH + attrib_buf_size;
unsigned attribs_offset = attrib_buf_size * shader->attribute_count;
unsigned attribs_offset = attrib_buf_size * shader->info.attribute_count;
struct panfrost_ptr ptr =
panfrost_pool_alloc_aligned(&batch->pool,
bytes_per_image_desc * shader->attribute_count,
bytes_per_image_desc * shader->info.attribute_count,
util_next_power_of_two(bytes_per_image_desc));
emit_image_attribs(batch, type, ptr.cpu + attribs_offset, ptr.cpu, 0);
@ -1404,7 +1437,7 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch,
* Also, we allocate more memory than what's needed here if either instancing
* is enabled or images are present, this can be improved. */
unsigned bufs_per_attrib = (ctx->instance_count > 1 || nr_images > 0) ? 2 : 1;
unsigned nr_bufs = (vs->attribute_count * bufs_per_attrib) +
unsigned nr_bufs = (vs->info.attribute_count * bufs_per_attrib) +
(pan_is_bifrost(dev) ? 1 : 0);
if (!nr_bufs) {
@ -1417,7 +1450,7 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch,
MALI_ATTRIBUTE_BUFFER_LENGTH * 2);
struct panfrost_ptr T = panfrost_pool_alloc_aligned(&batch->pool,
MALI_ATTRIBUTE_LENGTH * vs->attribute_count,
MALI_ATTRIBUTE_LENGTH * vs->info.attribute_count,
MALI_ATTRIBUTE_LENGTH);
struct mali_attribute_buffer_packed *bufs =
@ -1525,7 +1558,7 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch,
/* Add special gl_VertexID/gl_InstanceID buffers */
if (unlikely(vs->attribute_count >= PAN_VERTEX_ID)) {
if (unlikely(vs->info.attribute_count >= PAN_VERTEX_ID)) {
panfrost_vertex_id(ctx->padded_count, &bufs[k], ctx->instance_count > 1);
pan_pack(out + PAN_VERTEX_ID, ATTRIBUTE, cfg) {
@ -1742,22 +1775,22 @@ pan_varying_present(const struct panfrost_device *dev,
/* Enable special buffers by the shader info */
if (vs->writes_point_size)
if (vs->info.vs.writes_point_size)
present |= (1 << PAN_VARY_PSIZ);
if (fs->reads_point_coord)
if (fs->info.fs.reads_point_coord)
present |= (1 << PAN_VARY_PNTCOORD);
if (fs->reads_face)
if (fs->info.fs.reads_face)
present |= (1 << PAN_VARY_FACE);
if (fs->reads_frag_coord && !pan_is_bifrost(dev))
if (fs->info.fs.reads_frag_coord && !pan_is_bifrost(dev))
present |= (1 << PAN_VARY_FRAGCOORD);
/* Also, if we have a point sprite, we need a point coord buffer */
for (unsigned i = 0; i < fs->varying_count; i++) {
gl_varying_slot loc = fs->varyings_loc[i];
for (unsigned i = 0; i < fs->info.varyings.input_count; i++) {
gl_varying_slot loc = fs->info.varyings.input[i].location;
if (util_varying_is_point_coord(loc, point_coord_mask))
present |= (1 << PAN_VARY_PNTCOORD);
@ -1886,10 +1919,18 @@ pan_emit_general_varying(const struct panfrost_device *dev,
bool should_alloc)
{
/* Check if we're linked */
unsigned other_varying_count =
other->info.stage == MESA_SHADER_FRAGMENT ?
other->info.varyings.input_count :
other->info.varyings.output_count;
const struct pan_shader_varying *other_varyings =
other->info.stage == MESA_SHADER_FRAGMENT ?
other->info.varyings.input :
other->info.varyings.output;
signed other_idx = -1;
for (unsigned j = 0; j < other->varying_count; ++j) {
if (other->varyings_loc[j] == loc) {
for (unsigned j = 0; j < other_varying_count; ++j) {
if (other_varyings[j].location == loc) {
other_idx = j;
break;
}
@ -1904,7 +1945,8 @@ pan_emit_general_varying(const struct panfrost_device *dev,
if (should_alloc) {
/* We're linked, so allocate a space via a watermark allocation */
enum mali_format alt = other->varyings[other_idx];
enum mali_format alt =
dev->formats[other_varyings[other_idx].format].hw >> 12;
/* Do interpolation at minimum precision */
unsigned size_main = pan_varying_size(format);
@ -1953,8 +1995,14 @@ panfrost_emit_varying(const struct panfrost_device *dev,
bool should_alloc,
bool is_fragment)
{
gl_varying_slot loc = stage->varyings_loc[idx];
enum mali_format format = stage->varyings[idx];
gl_varying_slot loc =
stage->info.stage == MESA_SHADER_FRAGMENT ?
stage->info.varyings.input[idx].location :
stage->info.varyings.output[idx].location;
enum mali_format format =
stage->info.stage == MESA_SHADER_FRAGMENT ?
dev->formats[stage->info.varyings.input[idx].format].hw >> 12 :
dev->formats[stage->info.varyings.output[idx].format].hw >> 12;
/* Override format to match linkage */
if (!should_alloc && gen_formats[idx])
@ -2018,8 +2066,8 @@ panfrost_emit_varying_descriptor(struct panfrost_batch *batch,
vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
vs_size = MALI_ATTRIBUTE_LENGTH * vs->varying_count;
fs_size = MALI_ATTRIBUTE_LENGTH * fs->varying_count;
vs_size = MALI_ATTRIBUTE_LENGTH * vs->info.varyings.output_count;
fs_size = MALI_ATTRIBUTE_LENGTH * fs->info.varyings.input_count;
struct panfrost_ptr trans = panfrost_pool_alloc_aligned(
&batch->pool, vs_size + fs_size, MALI_ATTRIBUTE_LENGTH);
@ -2044,8 +2092,8 @@ panfrost_emit_varying_descriptor(struct panfrost_batch *batch,
memset(gen_formats, 0, sizeof(gen_formats));
unsigned gen_stride = 0;
assert(vs->varying_count < ARRAY_SIZE(gen_offsets));
assert(fs->varying_count < ARRAY_SIZE(gen_offsets));
assert(vs->info.varyings.output_count < ARRAY_SIZE(gen_offsets));
assert(fs->info.varyings.input_count < ARRAY_SIZE(gen_offsets));
unsigned streamout_offsets[32];
@ -2056,16 +2104,16 @@ panfrost_emit_varying_descriptor(struct panfrost_batch *batch,
}
struct mali_attribute_packed *ovs = (struct mali_attribute_packed *)trans.cpu;
struct mali_attribute_packed *ofs = ovs + vs->varying_count;
struct mali_attribute_packed *ofs = ovs + vs->info.varyings.output_count;
for (unsigned i = 0; i < vs->varying_count; i++) {
for (unsigned i = 0; i < vs->info.varyings.output_count; i++) {
panfrost_emit_varying(dev, ovs + i, vs, fs, vs, present, 0,
ctx->streamout.num_targets, streamout_offsets,
gen_offsets, gen_formats, &gen_stride, i,
true, false);
}
for (unsigned i = 0; i < fs->varying_count; i++) {
for (unsigned i = 0; i < fs->info.varyings.input_count; i++) {
panfrost_emit_varying(dev, ofs + i, fs, vs, vs, present, point_coord_mask,
ctx->streamout.num_targets, streamout_offsets,
gen_offsets, gen_formats, &gen_stride, i,
@ -2114,8 +2162,8 @@ panfrost_emit_varying_descriptor(struct panfrost_batch *batch,
pan_emit_special_input(varyings, present, PAN_VARY_FRAGCOORD, MALI_ATTRIBUTE_SPECIAL_FRAG_COORD);
*buffers = T.gpu;
*vs_attribs = vs->varying_count ? trans.gpu : 0;
*fs_attribs = fs->varying_count ? trans.gpu + vs_size : 0;
*vs_attribs = vs->info.varyings.output_count ? trans.gpu : 0;
*fs_attribs = fs->info.varyings.input_count ? trans.gpu + vs_size : 0;
}
void

View File

@ -71,7 +71,7 @@ panfrost_create_compute_state(
}
panfrost_shader_compile(ctx, so->cbase.ir_type, so->cbase.prog,
MESA_SHADER_COMPUTE, v, NULL);
MESA_SHADER_COMPUTE, v);
return so;
}

View File

@ -150,7 +150,7 @@ panfrost_writes_point_size(struct panfrost_context *ctx)
assert(ctx->shader[PIPE_SHADER_VERTEX]);
struct panfrost_shader_state *vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
return vs->writes_point_size && ctx->active_prim == PIPE_PRIM_POINTS;
return vs->info.vs.writes_point_size && ctx->active_prim == PIPE_PRIM_POINTS;
}
/* The entire frame is in memory -- send it off to the kernel! */
@ -739,12 +739,11 @@ panfrost_create_shader_state(
struct panfrost_context *ctx = pan_context(pctx);
struct panfrost_shader_state state = { 0 };
uint64_t outputs_written;
panfrost_shader_compile(ctx, PIPE_SHADER_IR_NIR,
so->base.ir.nir,
tgsi_processor_to_shader_stage(stage),
&state, &outputs_written);
&state);
}
return so;
@ -821,11 +820,12 @@ panfrost_variant_matches(
{
struct panfrost_device *dev = pan_device(ctx->base.screen);
if (variant->outputs_read) {
if (variant->info.stage == MESA_SHADER_FRAGMENT &&
variant->info.fs.outputs_read) {
struct pipe_framebuffer_state *fb = &ctx->pipe_framebuffer;
unsigned i;
BITSET_FOREACH_SET(i, &variant->outputs_read, 8) {
BITSET_FOREACH_SET(i, &variant->info.fs.outputs_read, 8) {
enum pipe_format fmt = PIPE_FORMAT_R8G8B8A8_UNORM;
if ((fb->nr_cbufs > i) && fb->cbufs[i])
@ -963,15 +963,12 @@ panfrost_bind_shader_state(
/* We finally have a variant, so compile it */
if (!shader_state->compiled) {
uint64_t outputs_written = 0;
panfrost_shader_compile(ctx, variants->base.type,
variants->base.type == PIPE_SHADER_IR_NIR ?
variants->base.ir.nir :
variants->base.tokens,
tgsi_processor_to_shader_stage(type),
shader_state,
&outputs_written);
shader_state);
shader_state->compiled = true;
@ -980,7 +977,8 @@ panfrost_bind_shader_state(
shader_state->stream_output = variants->base.stream_output;
shader_state->so_mask =
update_so_info(&shader_state->stream_output, outputs_written);
update_so_info(&shader_state->stream_output,
shader_state->info.outputs_written);
}
}
@ -1251,7 +1249,8 @@ panfrost_set_framebuffer_state(struct pipe_context *pctx,
* keyed to the framebuffer format (due to EXT_framebuffer_fetch) */
struct panfrost_shader_variants *fs = ctx->shader[PIPE_SHADER_FRAGMENT];
if (fs && fs->variant_count && fs->variants[fs->active_variant].outputs_read)
if (fs && fs->variant_count &&
fs->variants[fs->active_variant].info.fs.outputs_read)
ctx->base.bind_fs_state(&ctx->base, fs);
}

View File

@ -214,46 +214,15 @@ struct panfrost_shader_state {
struct MALI_RENDERER_PROPERTIES properties;
struct MALI_PRELOAD preload;
/* Non-descript information */
unsigned work_reg_count;
bool sample_shading;
bool can_discard;
bool writes_point_size;
bool writes_depth;
bool writes_stencil;
bool reads_point_coord;
bool reads_face;
bool reads_frag_coord;
bool writes_global;
unsigned stack_size;
unsigned shared_size;
struct pan_shader_info info;
/* Does the fragment shader have side effects? In particular, if output
* is masked out, is it legal to skip shader execution? */
bool fs_sidefx;
/* For Bifrost - output type for each RT */
enum mali_bifrost_register_file_format blend_types[MALI_BIFROST_BLEND_MAX_RT];
unsigned attribute_count, varying_count, ubo_count;
enum mali_format varyings[PIPE_MAX_ATTRIBS];
gl_varying_slot varyings_loc[PIPE_MAX_ATTRIBS];
struct pipe_stream_output_info stream_output;
uint64_t so_mask;
unsigned sysval_count;
unsigned sysval[MAX_SYSVAL_COUNT];
struct panfrost_ubo_push push;
/* GPU-executable memory */
struct panfrost_bo *bo;
BITSET_WORD outputs_read;
enum pipe_format rt_formats[8];
/* Blend return addresses */
uint32_t blend_ret_addrs[8];
};
/* A collection of varyings (the CSO) */
@ -374,8 +343,7 @@ panfrost_shader_compile(struct panfrost_context *ctx,
enum pipe_shader_ir ir_type,
const void *ir,
gl_shader_stage stage,
struct panfrost_shader_state *state,
uint64_t *outputs_written);
struct panfrost_shader_state *state);
void
panfrost_create_sampler_view_bo(struct panfrost_sampler_view *so,

View File

@ -1238,7 +1238,7 @@ panfrost_batch_adjust_stack_size(struct panfrost_batch *batch)
if (!ss)
continue;
batch->stack_size = MAX2(batch->stack_size, ss->stack_size);
batch->stack_size = MAX2(batch->stack_size, ss->info.tls_size);
}
}

View File

@ -119,10 +119,10 @@ void
bi_opt_push_ubo(bi_context *ctx)
{
/* This pass only runs once */
assert(ctx->push->count == 0);
assert(ctx->info->push.count == 0);
struct bi_ubo_analysis analysis = bi_analyze_ranges(ctx);
bi_pick_ubo(ctx->push, &analysis);
bi_pick_ubo(&ctx->info->push, &analysis);
bi_foreach_instr_global_safe(ctx, ins) {
if (!bi_is_direct_aligned_ubo(ins)) continue;
@ -141,8 +141,9 @@ bi_opt_push_ubo(bi_context *ctx)
for (unsigned w = 0; w < channels; ++w) {
/* FAU is grouped in pairs (2 x 4-byte) */
unsigned base = pan_lookup_pushed_ubo(ctx->push, ubo,
(offset + 4 * w));
unsigned base =
pan_lookup_pushed_ubo(&ctx->info->push, ubo,
(offset + 4 * w));
unsigned fau_idx = (base >> 1);
unsigned fau_hi = (base & 1);

View File

@ -713,11 +713,11 @@ bi_collect_blend_ret_addr(bi_context *ctx, struct util_dynarray *emission,
unsigned loc = tuple->regs.fau_idx - BIR_FAU_BLEND_0;
assert(loc < ARRAY_SIZE(ctx->blend_ret_offsets));
assert(!ctx->blend_ret_offsets[loc]);
ctx->blend_ret_offsets[loc] =
assert(loc < ARRAY_SIZE(ctx->info->bifrost.blend));
assert(!ctx->info->bifrost.blend[loc].return_offset);
ctx->info->bifrost.blend[loc].return_offset =
util_dynarray_num_elements(emission, uint8_t);
assert(!(ctx->blend_ret_offsets[loc] & 0x7));
assert(!(ctx->info->bifrost.blend[loc].return_offset & 0x7));
}
unsigned

View File

@ -388,7 +388,7 @@ bi_register_allocate(bi_context *ctx)
unsigned iter_count = 1000; /* max iterations */
/* Number of bytes of memory we've spilled into */
unsigned spill_count = ctx->tls_size;
unsigned spill_count = ctx->info->tls_size;
do {
if (l) {
@ -410,7 +410,7 @@ bi_register_allocate(bi_context *ctx)
assert(success);
ctx->tls_size = spill_count;
ctx->info->tls_size = spill_count;
bi_install_registers(ctx, l);
lcra_free(l);

View File

@ -297,7 +297,8 @@ bi_load_sysval_to(bi_builder *b, bi_index dest, int sysval,
unsigned nr_components, unsigned offset)
{
unsigned uniform =
pan_lookup_sysval(b->shader->sysval_to_id, &b->shader->sysvals,
pan_lookup_sysval(b->shader->sysval_to_id,
&b->shader->info->sysvals,
sysval);
unsigned idx = (uniform * 16) + offset;
@ -368,8 +369,7 @@ bi_emit_blend_op(bi_builder *b, bi_index rgba, nir_alu_type T, unsigned rt)
}
assert(rt < 8);
assert(b->shader->blend_types);
b->shader->blend_types[rt] = T;
b->shader->info->bifrost.blend[rt].type = T;
}
/* Blend shaders do not need to run ATEST since they are dependent on a
@ -2511,23 +2511,23 @@ bi_lower_branch(bi_block *block)
}
}
panfrost_program *
bifrost_compile_shader_nir(void *mem_ctx, nir_shader *nir,
const struct panfrost_compile_inputs *inputs)
void
bifrost_compile_shader_nir(nir_shader *nir,
const struct panfrost_compile_inputs *inputs,
struct util_dynarray *binary,
struct pan_shader_info *info)
{
panfrost_program *program = rzalloc(mem_ctx, panfrost_program);
bifrost_debug = debug_get_option_bifrost_debug();
bi_context *ctx = rzalloc(NULL, bi_context);
ctx->sysval_to_id = panfrost_init_sysvals(&ctx->sysvals, ctx);
ctx->sysval_to_id = panfrost_init_sysvals(&info->sysvals, ctx);
ctx->inputs = inputs;
ctx->nir = nir;
ctx->info = info;
ctx->stage = nir->info.stage;
ctx->quirks = bifrost_get_quirks(inputs->gpu_id);
ctx->arch = inputs->gpu_id >> 12;
ctx->push = &program->push;
list_inithead(&ctx->blocks);
/* Lower gl_Position pre-optimisation, but after lowering vars to ssa
@ -2565,8 +2565,7 @@ bifrost_compile_shader_nir(void *mem_ctx, nir_shader *nir,
nir_print_shader(nir, stdout);
}
ctx->blend_types = program->blend_types;
ctx->tls_size = nir->scratch_size;
info->tls_size = nir->scratch_size;
nir_foreach_function(func, nir) {
if (!func->impl)
@ -2614,8 +2613,7 @@ bifrost_compile_shader_nir(void *mem_ctx, nir_shader *nir,
if (bifrost_debug & BIFROST_DBG_SHADERS && !skip_internal)
bi_print_shader(ctx, stdout);
util_dynarray_init(&program->compiled, NULL);
unsigned final_clause = bi_pack(ctx, &program->compiled);
unsigned final_clause = bi_pack(ctx, binary);
/* If we need to wait for ATEST or BLEND in the first clause, pass the
* corresponding bits through to the renderer state descriptor */
@ -2623,17 +2621,12 @@ bifrost_compile_shader_nir(void *mem_ctx, nir_shader *nir,
bi_clause *first_clause = bi_next_clause(ctx, first_block, NULL);
unsigned first_deps = first_clause ? first_clause->dependencies : 0;
program->wait_6 = (first_deps & (1 << 6));
program->wait_7 = (first_deps & (1 << 7));
memcpy(program->blend_ret_offsets, ctx->blend_ret_offsets, sizeof(program->blend_ret_offsets));
program->sysval_count = ctx->sysvals.sysval_count;
memcpy(program->sysvals, ctx->sysvals.sysvals, sizeof(ctx->sysvals.sysvals[0]) * ctx->sysvals.sysval_count);
info->bifrost.wait_6 = (first_deps & (1 << 6));
info->bifrost.wait_7 = (first_deps & (1 << 7));
if (bifrost_debug & BIFROST_DBG_SHADERS && !skip_internal) {
disassemble_bifrost(stdout, program->compiled.data,
program->compiled.size,
bifrost_debug & BIFROST_DBG_VERBOSE);
disassemble_bifrost(stdout, binary->data, binary->size,
bifrost_debug & BIFROST_DBG_VERBOSE);
}
/* Pad the shader with enough zero bytes to trick the prefetcher,
@ -2641,19 +2634,15 @@ bifrost_compile_shader_nir(void *mem_ctx, nir_shader *nir,
* so the size remains 0) */
unsigned prefetch_size = BIFROST_SHADER_PREFETCH - final_clause;
if (program->compiled.size) {
memset(util_dynarray_grow(&program->compiled, uint8_t, prefetch_size),
if (binary->size) {
memset(util_dynarray_grow(binary, uint8_t, prefetch_size),
0, prefetch_size);
}
program->tls_size = ctx->tls_size;
if ((bifrost_debug & BIFROST_DBG_SHADERDB || inputs->shaderdb) &&
!skip_internal) {
bi_print_stats(ctx, program->compiled.size, stderr);
bi_print_stats(ctx, binary->size, stderr);
}
ralloc_free(ctx);
return program;
}

View File

@ -28,9 +28,11 @@
#include "util/u_dynarray.h"
#include "panfrost/util/pan_ir.h"
panfrost_program *
bifrost_compile_shader_nir(void *mem_ctx, nir_shader *nir,
const struct panfrost_compile_inputs *inputs);
void
bifrost_compile_shader_nir(nir_shader *nir,
const struct panfrost_compile_inputs *inputs,
struct util_dynarray *binary,
struct pan_shader_info *info);
static const nir_shader_compiler_options bifrost_nir_options = {
.lower_scmp = true,

View File

@ -32,7 +32,7 @@
#include "util/u_dynarray.h"
#include "bifrost_compile.h"
static panfrost_program *
static void
compile_shader(char **argv, bool vertex_only)
{
struct gl_shader_program *prog;
@ -53,7 +53,10 @@ compile_shader(char **argv, bool vertex_only)
prog = standalone_compile_shader(&options, 2, argv, &local_ctx);
prog->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program->info.stage = MESA_SHADER_FRAGMENT;
panfrost_program *compiled;
struct util_dynarray binary;
util_dynarray_init(&binary, NULL);
for (unsigned i = 0; i < 2; ++i) {
nir[i] = glsl_to_nir(&local_ctx, prog, shader_types[i], &bifrost_nir_options);
NIR_PASS_V(nir[i], nir_lower_global_vars_to_local);
@ -70,14 +73,16 @@ compile_shader(char **argv, bool vertex_only)
struct panfrost_compile_inputs inputs = {
.gpu_id = 0x7212, /* Mali G52 */
};
struct pan_shader_info info;
compiled = bifrost_compile_shader_nir(NULL, nir[i], &inputs);
util_dynarray_clear(&binary);
bifrost_compile_shader_nir(nir[i], &inputs, &binary, &info);
if (vertex_only)
return compiled;
break;
}
return compiled;
util_dynarray_fini(&binary);
}
#define BI_FOURCC(ch0, ch1, ch2, ch3) ( \

View File

@ -496,17 +496,12 @@ typedef struct bi_block {
typedef struct {
const struct panfrost_compile_inputs *inputs;
nir_shader *nir;
struct pan_shader_info *info;
gl_shader_stage stage;
struct list_head blocks; /* list of bi_block */
struct panfrost_sysvals sysvals;
struct hash_table_u64 *sysval_to_id;
struct panfrost_ubo_push *push;
uint32_t quirks;
unsigned arch;
unsigned tls_size;
/* Blend return offsets */
uint32_t blend_ret_offsets[8];
/* During NIR->BIR */
bi_block *current_block;
@ -514,7 +509,6 @@ typedef struct {
bi_block *break_block;
bi_block *continue_block;
bool emitted_atest;
nir_alu_type *blend_types;
/* For creating temporaries */
unsigned ssa_alloc;

View File

@ -43,11 +43,13 @@
* This is primarily designed as a fallback for preloads but could be extended
* for other clears/blits if needed in the future. */
static panfrost_program *
static void
panfrost_build_blit_shader(struct panfrost_device *dev,
gl_frag_result loc,
nir_alu_type T,
bool ms)
bool ms,
struct util_dynarray *binary,
struct pan_shader_info *info)
{
bool is_colour = loc >= FRAG_RESULT_DATA0;
@ -110,11 +112,9 @@ panfrost_build_blit_shader(struct panfrost_device *dev,
.is_blit = true,
};
panfrost_program *program =
pan_shader_compile(dev, NULL, shader, &inputs);
pan_shader_compile(dev, shader, &inputs, binary, info);
ralloc_free(shader);
return program;
}
/* Compile and upload all possible blit shaders ahead-of-time to reduce draw
@ -162,6 +162,9 @@ panfrost_init_blit_shaders(struct panfrost_device *dev)
/* Don't bother generating multisampling variants if we don't actually
* support multisampling */
bool has_ms = !(dev->quirks & MIDGARD_SFBD);
struct util_dynarray binary;
util_dynarray_init(&binary, NULL);
for (unsigned ms = 0; ms <= has_ms; ++ms) {
for (unsigned i = 0; i < ARRAY_SIZE(shader_descs); ++i) {
@ -172,27 +175,38 @@ panfrost_init_blit_shaders(struct panfrost_device *dev)
continue;
struct pan_blit_shader *shader = &dev->blit_shaders.loads[loc][T][ms];
panfrost_program *program =
panfrost_build_blit_shader(dev, loc,
nir_types[T], ms);
struct pan_shader_info info;
assert(offset + program->compiled.size < total_size);
util_dynarray_clear(&binary);
panfrost_build_blit_shader(dev, loc,
nir_types[T], ms,
&binary, &info);
assert(offset + binary.size < total_size);
memcpy(dev->blit_shaders.bo->ptr.cpu + offset,
program->compiled.data, program->compiled.size);
binary.data, binary.size);
shader->shader = (dev->blit_shaders.bo->ptr.gpu + offset) |
program->first_tag;
shader->shader = (dev->blit_shaders.bo->ptr.gpu + offset);
if (pan_is_bifrost(dev)) {
int rt = loc - FRAG_RESULT_DATA0;
if (rt >= 0 && rt < 8 &&
info.bifrost.blend[rt].return_offset) {
shader->blend_ret_addr =
shader->shader +
info.bifrost.blend[rt].return_offset;
}
} else {
shader->shader |= info.midgard.first_tag;
}
int rt = loc - FRAG_RESULT_DATA0;
if (rt >= 0 && rt < 8 && program->blend_ret_offsets[rt])
shader->blend_ret_addr = program->blend_ret_offsets[rt] + shader->shader;
offset += ALIGN_POT(program->compiled.size,
offset += ALIGN_POT(binary.size,
pan_is_bifrost(dev) ? 128 : 64);
ralloc_free(program);
}
}
}
util_dynarray_fini(&binary);
}
static void

View File

@ -37,13 +37,196 @@ pan_shader_get_compiler_options(const struct panfrost_device *dev)
return &midgard_nir_options;
}
panfrost_program *
pan_shader_compile(const struct panfrost_device *dev,
void *mem_ctx, nir_shader *nir,
const struct panfrost_compile_inputs *inputs)
static enum pipe_format
varying_format(nir_alu_type t, unsigned ncomps)
{
if (pan_is_bifrost(dev))
return bifrost_compile_shader_nir(mem_ctx, nir, inputs);
#define VARYING_FORMAT(ntype, nsz, ptype, psz) \
{ \
.type = nir_type_ ## ntype ## nsz, \
.formats = { \
PIPE_FORMAT_R ## psz ## _ ## ptype, \
PIPE_FORMAT_R ## psz ## G ## psz ## _ ## ptype, \
PIPE_FORMAT_R ## psz ## G ## psz ## B ## psz ## _ ## ptype, \
PIPE_FORMAT_R ## psz ## G ## psz ## B ## psz ## A ## psz ## _ ## ptype, \
} \
}
return midgard_compile_shader_nir(mem_ctx, nir, inputs);
static const struct {
nir_alu_type type;
enum pipe_format formats[4];
} conv[] = {
VARYING_FORMAT(float, 32, FLOAT, 32),
VARYING_FORMAT(int, 32, SINT, 32),
VARYING_FORMAT(uint, 32, UINT, 32),
VARYING_FORMAT(float, 16, FLOAT, 16),
VARYING_FORMAT(int, 16, SINT, 16),
VARYING_FORMAT(uint, 16, UINT, 16),
VARYING_FORMAT(int, 8, SINT, 8),
VARYING_FORMAT(uint, 8, UINT, 8),
VARYING_FORMAT(bool, 32, UINT, 32),
VARYING_FORMAT(bool, 16, UINT, 16),
VARYING_FORMAT(bool, 8, UINT, 8),
VARYING_FORMAT(bool, 1, UINT, 8),
};
#undef VARYING_FORMAT
assert(ncomps > 0 && ncomps <= ARRAY_SIZE(conv[0].formats));
for (unsigned i = 0; i < ARRAY_SIZE(conv); i++) {
if (conv[i].type == t)
return conv[i].formats[ncomps - 1];
}
return PIPE_FORMAT_NONE;
}
static void
collect_varyings(nir_shader *s, nir_variable_mode varying_mode,
struct pan_shader_varying *varyings,
unsigned *varying_count)
{
*varying_count = 0;
nir_foreach_variable_with_modes(var, s, varying_mode) {
unsigned loc = var->data.driver_location;
unsigned sz = glsl_count_attribute_slots(var->type, FALSE);
const struct glsl_type *column =
glsl_without_array_or_matrix(var->type);
unsigned chan = glsl_get_components(column);
enum glsl_base_type base_type = glsl_get_base_type(column);
/* If we have a fractional location added, we need to increase the size
* so it will fit, i.e. a vec3 in YZW requires us to allocate a vec4.
* We could do better but this is an edge case as it is, normally
* packed varyings will be aligned.
*/
chan += var->data.location_frac;
assert(chan >= 1 && chan <= 4);
nir_alu_type type = nir_get_nir_type_for_glsl_base_type(base_type);
type = nir_alu_type_get_base_type(type);
/* Demote to fp16 where possible. int16 varyings are TODO as the hw
* will saturate instead of wrap which is not conformant, so we need to
* insert i2i16/u2u16 instructions before the st_vary_32i/32u to get
* the intended behaviour.
*/
if (type == nir_type_float &&
(var->data.precision == GLSL_PRECISION_MEDIUM ||
var->data.precision == GLSL_PRECISION_LOW)) {
type |= 16;
} else {
type |= 32;
}
enum pipe_format format = varying_format(type, chan);
assert(format != PIPE_FORMAT_NONE);
for (int c = 0; c < sz; ++c) {
varyings[loc + c].location = var->data.location + c;
varyings[loc + c].format = format;
}
*varying_count = MAX2(*varying_count, loc + sz);
}
}
void
pan_shader_compile(const struct panfrost_device *dev,
nir_shader *s,
const struct panfrost_compile_inputs *inputs,
struct util_dynarray *binary,
struct pan_shader_info *info)
{
memset(info, 0, sizeof(*info));
if (pan_is_bifrost(dev))
bifrost_compile_shader_nir(s, inputs, binary, info);
else
midgard_compile_shader_nir(s, inputs, binary, info);
info->stage = s->info.stage;
info->contains_barrier = s->info.uses_memory_barrier ||
s->info.uses_control_barrier;
switch (info->stage) {
case MESA_SHADER_VERTEX:
info->attribute_count = util_bitcount64(s->info.inputs_read);
bool vertex_id = BITSET_TEST(s->info.system_values_read,
SYSTEM_VALUE_VERTEX_ID);
if (vertex_id)
info->attribute_count = MAX2(info->attribute_count, PAN_VERTEX_ID + 1);
bool instance_id = BITSET_TEST(s->info.system_values_read,
SYSTEM_VALUE_INSTANCE_ID);
if (instance_id)
info->attribute_count = MAX2(info->attribute_count, PAN_INSTANCE_ID + 1);
info->vs.writes_point_size =
s->info.outputs_written & (1 << VARYING_SLOT_PSIZ);
collect_varyings(s, nir_var_shader_out, info->varyings.output,
&info->varyings.output_count);
break;
case MESA_SHADER_FRAGMENT:
if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
info->fs.writes_depth = true;
if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL))
info->fs.writes_stencil = true;
uint64_t outputs_read = s->info.outputs_read;
if (outputs_read & BITFIELD64_BIT(FRAG_RESULT_COLOR))
outputs_read |= BITFIELD64_BIT(FRAG_RESULT_DATA0);
info->fs.outputs_read = outputs_read >> FRAG_RESULT_DATA0;
/* EXT_shader_framebuffer_fetch requires per-sample */
info->fs.sample_shading = s->info.fs.uses_sample_shading ||
outputs_read;
info->fs.can_discard = s->info.fs.uses_discard;
info->fs.helper_invocations = s->info.fs.needs_quad_helper_invocations;
/* List of reasons we need to execute frag shaders when things
* are masked off */
info->fs.sidefx = s->info.writes_memory ||
s->info.fs.uses_discard ||
s->info.fs.uses_demote;
info->fs.reads_frag_coord =
(s->info.inputs_read & (1 << VARYING_SLOT_POS)) ||
BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_FRAG_COORD);
info->fs.reads_point_coord =
s->info.inputs_read & (1 << VARYING_SLOT_PNTC);
info->fs.reads_face =
(s->info.inputs_read & (1 << VARYING_SLOT_FACE)) ||
BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_FRONT_FACE);
info->fs.reads_sample_id =
BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID);
info->fs.reads_sample_pos =
BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS);
info->fs.reads_sample_mask_in =
BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN);
info->fs.reads_helper_invocation =
BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_HELPER_INVOCATION);
collect_varyings(s, nir_var_shader_in, info->varyings.input,
&info->varyings.input_count);
break;
case MESA_SHADER_COMPUTE:
info->wls_size = s->info.cs.shared_size;
break;
default:
unreachable("Unknown shader state");
}
info->outputs_written = s->info.outputs_written;
/* Sysvals have dedicated UBO */
info->ubo_count = s->info.num_ubos + (info->sysvals.sysval_count ? 1 : 0);
info->attribute_count += util_bitcount(s->info.images_used);
info->writes_global = s->info.writes_memory;
info->texture_count = s->info.num_textures;
}

View File

@ -33,9 +33,11 @@ struct panfrost_device;
const nir_shader_compiler_options *
pan_shader_get_compiler_options(const struct panfrost_device *dev);
panfrost_program *
void
pan_shader_compile(const struct panfrost_device *dev,
void *mem_ctx, nir_shader *nir,
const struct panfrost_compile_inputs *inputs);
nir_shader *nir,
const struct panfrost_compile_inputs *inputs,
struct util_dynarray *binary,
struct pan_shader_info *info);
#endif

View File

@ -238,6 +238,7 @@ enum midgard_rt_id {
typedef struct compiler_context {
const struct panfrost_compile_inputs *inputs;
nir_shader *nir;
struct pan_shader_info *info;
gl_shader_stage stage;
/* Number of samples for a keyed blend shader. Depends on is_blend */
@ -249,9 +250,6 @@ typedef struct compiler_context {
/* Index to precolour to r2 for a dual-source blend colour */
unsigned blend_src1;
/* Number of bytes used for Thread Local Storage */
unsigned tls_size;
/* Count of spills and fills for shaderdb */
unsigned spills;
unsigned fills;
@ -291,10 +289,6 @@ typedef struct compiler_context {
/* Set of NIR indices that were already emitted as outmods */
BITSET_WORD *already_emitted;
/* Just the count of the max register used. Higher count => higher
* register pressure */
int work_registers;
/* The number of uniforms allowable for the fast path */
int uniform_cutoff;
@ -312,9 +306,7 @@ typedef struct compiler_context {
/* Writeout instructions for each render target */
midgard_instruction *writeout_branch[MIDGARD_NUM_RTS][MIDGARD_MAX_SAMPLE_ITER];
struct panfrost_sysvals sysvals;
struct hash_table_u64 *sysval_to_id;
struct panfrost_ubo_push *push;
} compiler_context;
/* Per-block live_in/live_out */

View File

@ -1448,7 +1448,7 @@ emit_sysval_read(compiler_context *ctx, nir_instr *instr,
int sysval = panfrost_sysval_for_instr(instr, &nir_dest);
unsigned dest = nir_dest_index(&nir_dest);
unsigned uniform =
pan_lookup_sysval(ctx->sysval_to_id, &ctx->sysvals, sysval);
pan_lookup_sysval(ctx->sysval_to_id, &ctx->info->sysvals, sysval);
/* Emit the read itself -- this is never indirect */
midgard_instruction *ins =
@ -2978,24 +2978,22 @@ mir_add_writeout_loops(compiler_context *ctx)
}
}
panfrost_program *
midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
const struct panfrost_compile_inputs *inputs)
void
midgard_compile_shader_nir(nir_shader *nir,
const struct panfrost_compile_inputs *inputs,
struct util_dynarray *binary,
struct pan_shader_info *info)
{
panfrost_program *program = rzalloc(mem_ctx, panfrost_program);
struct util_dynarray *compiled = &program->compiled;
midgard_debug = debug_get_option_midgard_debug();
/* TODO: Bound against what? */
compiler_context *ctx = rzalloc(NULL, compiler_context);
ctx->sysval_to_id = panfrost_init_sysvals(&ctx->sysvals, ctx);
ctx->sysval_to_id = panfrost_init_sysvals(&info->sysvals, ctx);
ctx->inputs = inputs;
ctx->nir = nir;
ctx->info = info;
ctx->stage = nir->info.stage;
ctx->push = &program->push;
if (inputs->is_blend) {
unsigned nr_samples = MAX2(inputs->blend.nr_samples, 1);
@ -3013,7 +3011,7 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
/* Start off with a safe cutoff, allowing usage of all 16 work
* registers. Later, we'll promote uniform reads to uniform registers
* if we determine it is beneficial to do so */
ctx->uniform_cutoff = 8;
info->midgard.uniform_cutoff = 8;
/* Initialize at a global (not block) level hash tables */
@ -3059,7 +3057,7 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
nir_print_shader(nir, stdout);
}
ctx->tls_size = nir->scratch_size;
info->tls_size = nir->scratch_size;
nir_foreach_function(func, nir) {
if (!func->impl)
@ -3086,8 +3084,6 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
break; /* TODO: Multi-function shaders */
}
util_dynarray_init(compiled, program);
/* Per-block lowering before opts */
mir_foreach_block(ctx, _block) {
@ -3164,7 +3160,7 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
if (!bundle->last_writeout && (current_bundle + 1 < bundle_count))
lookahead = source_order_bundles[current_bundle + 1]->tag;
emit_binary_bundle(ctx, block, bundle, compiled, lookahead);
emit_binary_bundle(ctx, block, bundle, binary, lookahead);
++current_bundle;
}
@ -3175,20 +3171,11 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
free(source_order_bundles);
/* Report the very first tag executed */
program->first_tag = midgard_get_first_tag_from_block(ctx, 0);
/* Deal with off-by-one related to the fencepost problem */
program->work_register_count = ctx->work_registers + 1;
program->uniform_cutoff = ctx->uniform_cutoff;
program->tls_size = ctx->tls_size;
program->sysval_count = ctx->sysvals.sysval_count;
memcpy(program->sysvals, ctx->sysvals.sysvals, sizeof(ctx->sysvals.sysvals[0]) * ctx->sysvals.sysval_count);
info->midgard.first_tag = midgard_get_first_tag_from_block(ctx, 0);
if ((midgard_debug & MIDGARD_DBG_SHADERS) && !nir->info.internal) {
disassemble_midgard(stdout, program->compiled.data,
program->compiled.size, inputs->gpu_id);
disassemble_midgard(stdout, binary->data,
binary->size, inputs->gpu_id);
}
if ((midgard_debug & MIDGARD_DBG_SHADERDB || inputs->shaderdb) &&
@ -3209,7 +3196,7 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
/* Calculate thread count. There are certain cutoffs by
* register count for thread count */
unsigned nr_registers = program->work_register_count;
unsigned nr_registers = info->work_reg_count;
unsigned nr_threads =
(nr_registers <= 4) ? 4 :
@ -3232,6 +3219,4 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
}
ralloc_free(ctx);
return program;
}

View File

@ -29,9 +29,11 @@
#include "util/u_dynarray.h"
#include "panfrost/util/pan_ir.h"
panfrost_program *
midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
const struct panfrost_compile_inputs *inputs);
void
midgard_compile_shader_nir(nir_shader *nir,
const struct panfrost_compile_inputs *inputs,
struct util_dynarray *binary,
struct pan_shader_info *info);
/* NIR options are shared between the standalone compiler and the online
* compiler. Defining it here is the simplest, though maybe not the Right

View File

@ -99,7 +99,7 @@ index_to_reg(compiler_context *ctx, struct lcra_state *l, unsigned reg, unsigned
/* Report that we actually use this register, and return it */
if (r.reg < 16)
ctx->work_registers = MAX2(ctx->work_registers, r.reg);
ctx->info->work_reg_count = MAX2(ctx->info->work_reg_count, r.reg + 1);
return r;
}
@ -395,7 +395,7 @@ allocate_registers(compiler_context *ctx, bool *spilled)
* uniforms start and the shader stage. By ABI we limit blend shaders
* to 8 registers, should be lower XXX */
int work_count = ctx->inputs->is_blend ? 8 :
16 - MAX2((ctx->uniform_cutoff - 8), 0);
16 - MAX2((ctx->info->midgard.uniform_cutoff - 8), 0);
/* No register allocation to do with no SSA */
@ -646,7 +646,7 @@ allocate_registers(compiler_context *ctx, bool *spilled)
if (ctx->blend_src1 != ~0) {
assert(ctx->blend_src1 < ctx->temp_count);
l->solutions[ctx->blend_src1] = (16 * 2);
ctx->work_registers = MAX2(ctx->work_registers, 2);
ctx->info->work_reg_count = MAX2(ctx->info->work_reg_count, 3);
}
mir_compute_interference(ctx, l);
@ -959,13 +959,14 @@ mir_spill_register(
static void
mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff)
{
unsigned old_work_count = 16 - MAX2((ctx->uniform_cutoff - 8), 0);
unsigned old_work_count =
16 - MAX2((ctx->info->midgard.uniform_cutoff - 8), 0);
unsigned work_count = 16 - MAX2((new_cutoff - 8), 0);
unsigned min_demote = SSA_FIXED_REGISTER(old_work_count);
unsigned max_demote = SSA_FIXED_REGISTER(work_count);
ctx->uniform_cutoff = new_cutoff;
ctx->info->midgard.uniform_cutoff = new_cutoff;
mir_foreach_block(ctx, _block) {
midgard_block *block = (midgard_block *) _block;
@ -978,7 +979,7 @@ mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff)
unsigned temp = make_compiler_temp(ctx);
unsigned idx = (23 - SSA_REG_FROM_FIXED(ins->src[i])) * 4;
assert(idx < ctx->push->count);
assert(idx < ctx->info->push.count);
midgard_instruction ld = {
.type = TAG_LOAD_STORE_4,
@ -989,10 +990,10 @@ mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff)
.swizzle = SWIZZLE_IDENTITY_4,
.op = midgard_op_ld_ubo_int4,
.load_store = {
.arg_1 = ctx->push->words[idx].ubo,
.arg_1 = ctx->info->push.words[idx].ubo,
.arg_2 = 0x1E,
},
.constants.u32[0] = ctx->push->words[idx].offset
.constants.u32[0] = ctx->info->push.words[idx].offset
};
mir_insert_instruction_before_scheduled(ctx, block, before, ld);
@ -1013,7 +1014,7 @@ mir_ra(compiler_context *ctx)
int iter_count = 1000; /* max iterations */
/* Number of 128-bit slots in memory we've spilled into */
unsigned spill_count = DIV_ROUND_UP(ctx->tls_size, 16);
unsigned spill_count = DIV_ROUND_UP(ctx->info->tls_size, 16);
mir_create_pipeline_registers(ctx);
@ -1025,9 +1026,9 @@ mir_ra(compiler_context *ctx)
/* It's a lot cheaper to demote uniforms to get more
* work registers than to spill to TLS. */
if (l->spill_class == REG_CLASS_WORK &&
ctx->uniform_cutoff > 8) {
ctx->info->midgard.uniform_cutoff > 8) {
mir_demote_uniforms(ctx, MAX2(ctx->uniform_cutoff - 4, 8));
mir_demote_uniforms(ctx, MAX2(ctx->info->midgard.uniform_cutoff - 4, 8));
} else if (spill_node == -1) {
fprintf(stderr, "ERROR: Failed to choose spill node\n");
lcra_free(l);
@ -1056,7 +1057,7 @@ mir_ra(compiler_context *ctx)
/* Report spilling information. spill_count is in 128-bit slots (vec4 x
* fp32), but tls_size is in bytes, so multiply by 16 */
ctx->tls_size = spill_count * 16;
ctx->info->tls_size = spill_count * 16;
install_registers(ctx, l);

View File

@ -263,7 +263,7 @@ midgard_promote_uniforms(compiler_context *ctx)
unsigned work_count = mir_work_heuristic(ctx, &analysis);
unsigned promoted_count = 24 - work_count;
mir_pick_ubo(ctx->push, &analysis, promoted_count);
mir_pick_ubo(&ctx->info->push, &analysis, promoted_count);
/* First, figure out special indices a priori so we don't recompute a lot */
BITSET_WORD *special = mir_special_indices(ctx);
@ -279,7 +279,7 @@ midgard_promote_uniforms(compiler_context *ctx)
if (!BITSET_TEST(analysis.blocks[ubo].pushed, qword)) continue;
/* Find where we pushed to, TODO: unaligned pushes to pack */
unsigned base = pan_lookup_pushed_ubo(ctx->push, ubo, qword * 16);
unsigned base = pan_lookup_pushed_ubo(&ctx->info->push, ubo, qword * 16);
assert((base & 0x3) == 0);
unsigned address = base / 4;
@ -288,7 +288,8 @@ midgard_promote_uniforms(compiler_context *ctx)
/* Should've taken into account when pushing */
assert(address < promoted_count);
ctx->uniform_cutoff = MAX2(ctx->uniform_cutoff, address + 1);
ctx->info->midgard.uniform_cutoff =
MAX2(ctx->info->midgard.uniform_cutoff, address + 1);
unsigned promoted = SSA_FIXED_REGISTER(uniform_reg);
/* We do need the move for safety for a non-SSA dest, or if

View File

@ -115,40 +115,6 @@ pan_lookup_sysval(struct hash_table_u64 *sysval_to_id,
int
panfrost_sysval_for_instr(nir_instr *instr, nir_dest *dest);
typedef struct {
int work_register_count;
int uniform_cutoff;
/* For Bifrost - output type for each RT */
nir_alu_type blend_types[8];
/* For Bifrost - return address for blend instructions */
uint32_t blend_ret_offsets[8];
/* Prepended before uniforms, mapping to SYSVAL_ names for the
* sysval */
unsigned sysval_count;
unsigned sysvals[MAX_SYSVAL_COUNT];
/* UBOs to push to Register Mapped Uniforms (Midgard) or Fast Access
* Uniforms (Bifrost) */
struct panfrost_ubo_push push;
int first_tag;
struct util_dynarray compiled;
/* The number of bytes to allocate per-thread for Thread Local Storage
* (register spilling), or zero if no spilling is used */
unsigned tls_size;
/* For Bifrost, should the program wait on dependency slots 6/7 before
* starting? For ATEST/BLEND in the first clause, which can occur with
* extremely simple shaders */
bool wait_6, wait_7;
} panfrost_program;
struct panfrost_compile_inputs {
unsigned gpu_id;
bool is_blend, is_blit;
@ -163,6 +129,82 @@ struct panfrost_compile_inputs {
enum pipe_format rt_formats[8];
};
struct pan_shader_varying {
gl_varying_slot location;
enum pipe_format format;
};
struct bifrost_shader_blend_info {
nir_alu_type type;
uint32_t return_offset;
};
struct bifrost_shader_info {
struct bifrost_shader_blend_info blend[8];
bool wait_6, wait_7;
};
struct midgard_shader_info {
unsigned uniform_cutoff;
unsigned first_tag;
};
struct pan_shader_info {
gl_shader_stage stage;
unsigned work_reg_count;
unsigned tls_size;
unsigned wls_size;
union {
struct {
bool reads_frag_coord;
bool reads_point_coord;
bool reads_face;
bool helper_invocations;
bool can_discard;
bool writes_depth;
bool writes_stencil;
bool sidefx;
bool reads_sample_id;
bool reads_sample_pos;
bool reads_sample_mask_in;
bool reads_helper_invocation;
bool sample_shading;
BITSET_WORD outputs_read;
} fs;
struct {
bool writes_point_size;
} vs;
};
bool contains_barrier;
bool writes_global;
uint64_t outputs_written;
unsigned texture_count;
unsigned ubo_count;
unsigned attribute_count;
struct {
unsigned input_count;
struct pan_shader_varying input[MAX_VARYING];
unsigned output_count;
struct pan_shader_varying output[MAX_VARYING];
} varyings;
struct panfrost_sysvals sysvals;
/* UBOs to push to Register Mapped Uniforms (Midgard) or Fast Access
* Uniforms (Bifrost) */
struct panfrost_ubo_push push;
union {
struct bifrost_shader_info bifrost;
struct midgard_shader_info midgard;
};
};
typedef struct pan_block {
/* Link to next block. Must be first for mir_get_block */
struct list_head link;