panfrost: Move the shader compilation logic out of the gallium driver
While at it, rework the code to avoid copies between intermediate structures: the pan_shader_info is passed to the compiler context so the compiler can fill shader information directly. Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8963>
This commit is contained in:
parent
d18fc89066
commit
d5b1a33460
|
@ -40,35 +40,30 @@
|
|||
#include "tgsi/tgsi_dump.h"
|
||||
|
||||
static void
|
||||
pan_prepare_midgard_props(struct panfrost_shader_state *state,
|
||||
panfrost_program *program,
|
||||
gl_shader_stage stage)
|
||||
pan_prepare_midgard_props(struct panfrost_shader_state *state)
|
||||
{
|
||||
pan_prepare(&state->properties, RENDERER_PROPERTIES);
|
||||
state->properties.uniform_buffer_count = state->ubo_count;
|
||||
state->properties.midgard.uniform_count = program->uniform_cutoff;
|
||||
state->properties.midgard.shader_has_side_effects = state->writes_global;
|
||||
state->properties.uniform_buffer_count = state->info.ubo_count;
|
||||
state->properties.midgard.uniform_count = state->info.midgard.uniform_cutoff;
|
||||
state->properties.midgard.shader_has_side_effects = state->info.writes_global;
|
||||
state->properties.midgard.fp_mode = MALI_FP_MODE_GL_INF_NAN_ALLOWED;
|
||||
|
||||
/* For fragment shaders, work register count, early-z, reads at draw-time */
|
||||
|
||||
if (stage != MESA_SHADER_FRAGMENT)
|
||||
state->properties.midgard.work_register_count = state->work_reg_count;
|
||||
if (state->info.stage != MESA_SHADER_FRAGMENT)
|
||||
state->properties.midgard.work_register_count = state->info.work_reg_count;
|
||||
}
|
||||
|
||||
static void
|
||||
pan_prepare_bifrost_props(struct panfrost_shader_state *state,
|
||||
panfrost_program *program,
|
||||
gl_shader_stage stage,
|
||||
shader_info *info)
|
||||
pan_prepare_bifrost_props(struct panfrost_shader_state *state)
|
||||
{
|
||||
unsigned fau_count = DIV_ROUND_UP(program->push.count, 2);
|
||||
unsigned fau_count = DIV_ROUND_UP(state->info.push.count, 2);
|
||||
|
||||
switch (stage) {
|
||||
switch (state->info.stage) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
pan_prepare(&state->properties, RENDERER_PROPERTIES);
|
||||
state->properties.bifrost.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
|
||||
state->properties.uniform_buffer_count = state->ubo_count;
|
||||
state->properties.uniform_buffer_count = state->info.ubo_count;
|
||||
|
||||
pan_prepare(&state->preload, PRELOAD);
|
||||
state->preload.uniform_count = fau_count;
|
||||
|
@ -78,39 +73,39 @@ pan_prepare_bifrost_props(struct panfrost_shader_state *state,
|
|||
case MESA_SHADER_FRAGMENT:
|
||||
pan_prepare(&state->properties, RENDERER_PROPERTIES);
|
||||
/* Early-Z set at draw-time */
|
||||
if (state->writes_depth || state->writes_stencil) {
|
||||
if (state->info.fs.writes_depth || state->info.fs.writes_stencil) {
|
||||
state->properties.bifrost.zs_update_operation = MALI_PIXEL_KILL_FORCE_LATE;
|
||||
state->properties.bifrost.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_LATE;
|
||||
} else if (state->can_discard) {
|
||||
} else if (state->info.fs.can_discard) {
|
||||
state->properties.bifrost.zs_update_operation = MALI_PIXEL_KILL_FORCE_LATE;
|
||||
state->properties.bifrost.pixel_kill_operation = MALI_PIXEL_KILL_WEAK_EARLY;
|
||||
} else {
|
||||
state->properties.bifrost.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
|
||||
state->properties.bifrost.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_EARLY;
|
||||
}
|
||||
state->properties.uniform_buffer_count = state->ubo_count;
|
||||
state->properties.bifrost.shader_modifies_coverage = state->can_discard;
|
||||
state->properties.bifrost.shader_wait_dependency_6 = program->wait_6;
|
||||
state->properties.bifrost.shader_wait_dependency_7 = program->wait_7;
|
||||
state->properties.uniform_buffer_count = state->info.ubo_count;
|
||||
state->properties.bifrost.shader_modifies_coverage = state->info.fs.can_discard;
|
||||
state->properties.bifrost.shader_wait_dependency_6 = state->info.bifrost.wait_6;
|
||||
state->properties.bifrost.shader_wait_dependency_7 = state->info.bifrost.wait_7;
|
||||
|
||||
pan_prepare(&state->preload, PRELOAD);
|
||||
state->preload.uniform_count = fau_count;
|
||||
state->preload.fragment.fragment_position = state->reads_frag_coord;
|
||||
state->preload.fragment.fragment_position = state->info.fs.reads_frag_coord;
|
||||
state->preload.fragment.coverage = true;
|
||||
state->preload.fragment.primitive_flags = state->reads_face;
|
||||
state->preload.fragment.primitive_flags = state->info.fs.reads_face;
|
||||
|
||||
/* Contains sample ID and sample mask. Sample position and
|
||||
* helper invocation are expressed in terms of the above, so
|
||||
* preload for those too */
|
||||
state->preload.fragment.sample_mask_id =
|
||||
BITSET_TEST(info->system_values_read, SYSTEM_VALUE_SAMPLE_ID) ||
|
||||
BITSET_TEST(info->system_values_read, SYSTEM_VALUE_SAMPLE_POS) ||
|
||||
BITSET_TEST(info->system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN) ||
|
||||
BITSET_TEST(info->system_values_read, SYSTEM_VALUE_HELPER_INVOCATION);
|
||||
state->info.fs.reads_sample_id |
|
||||
state->info.fs.reads_sample_pos |
|
||||
state->info.fs.reads_sample_mask_in |
|
||||
state->info.fs.reads_helper_invocation;
|
||||
break;
|
||||
case MESA_SHADER_COMPUTE:
|
||||
pan_prepare(&state->properties, RENDERER_PROPERTIES);
|
||||
state->properties.uniform_buffer_count = state->ubo_count;
|
||||
state->properties.uniform_buffer_count = state->info.ubo_count;
|
||||
|
||||
pan_prepare(&state->preload, PRELOAD);
|
||||
state->preload.uniform_count = fau_count;
|
||||
|
@ -152,112 +147,12 @@ pan_upload_shader_descriptor(struct panfrost_context *ctx,
|
|||
u_upload_unmap(ctx->state_uploader);
|
||||
}
|
||||
|
||||
static unsigned
|
||||
pan_format_from_nir_base(nir_alu_type base)
|
||||
{
|
||||
switch (base) {
|
||||
case nir_type_int:
|
||||
return MALI_FORMAT_SINT;
|
||||
case nir_type_uint:
|
||||
case nir_type_bool:
|
||||
return MALI_FORMAT_UINT;
|
||||
case nir_type_float:
|
||||
return MALI_CHANNEL_FLOAT;
|
||||
default:
|
||||
unreachable("Invalid base");
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned
|
||||
pan_format_from_nir_size(nir_alu_type base, unsigned size)
|
||||
{
|
||||
if (base == nir_type_float) {
|
||||
switch (size) {
|
||||
case 16: return MALI_FORMAT_SINT;
|
||||
case 32: return MALI_FORMAT_UNORM;
|
||||
default:
|
||||
unreachable("Invalid float size for format");
|
||||
}
|
||||
} else {
|
||||
switch (size) {
|
||||
case 1:
|
||||
case 8: return MALI_CHANNEL_8;
|
||||
case 16: return MALI_CHANNEL_16;
|
||||
case 32: return MALI_CHANNEL_32;
|
||||
default:
|
||||
unreachable("Invalid int size for format");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static enum mali_format
|
||||
pan_format_from_glsl(const struct glsl_type *type, unsigned precision, unsigned frac)
|
||||
{
|
||||
const struct glsl_type *column = glsl_without_array_or_matrix(type);
|
||||
enum glsl_base_type glsl_base = glsl_get_base_type(column);
|
||||
nir_alu_type t = nir_get_nir_type_for_glsl_base_type(glsl_base);
|
||||
unsigned chan = glsl_get_components(column);
|
||||
|
||||
/* If we have a fractional location added, we need to increase the size
|
||||
* so it will fit, i.e. a vec3 in YZW requires us to allocate a vec4.
|
||||
* We could do better but this is an edge case as it is, normally
|
||||
* packed varyings will be aligned. */
|
||||
chan += frac;
|
||||
|
||||
assert(chan >= 1 && chan <= 4);
|
||||
|
||||
unsigned base = nir_alu_type_get_base_type(t);
|
||||
unsigned size = nir_alu_type_get_type_size(t);
|
||||
|
||||
/* Demote to fp16 where possible. int16 varyings are TODO as the hw
|
||||
* will saturate instead of wrap which is not conformant, so we need to
|
||||
* insert i2i16/u2u16 instructions before the st_vary_32i/32u to get
|
||||
* the intended behaviour */
|
||||
|
||||
bool is_16 = (precision == GLSL_PRECISION_MEDIUM)
|
||||
|| (precision == GLSL_PRECISION_LOW);
|
||||
|
||||
if (is_16 && base == nir_type_float)
|
||||
size = 16;
|
||||
else
|
||||
size = 32;
|
||||
|
||||
return pan_format_from_nir_base(base) |
|
||||
pan_format_from_nir_size(base, size) |
|
||||
MALI_NR_CHANNELS(chan);
|
||||
}
|
||||
|
||||
static enum mali_bifrost_register_file_format
|
||||
bifrost_blend_type_from_nir(nir_alu_type nir_type)
|
||||
{
|
||||
switch(nir_type) {
|
||||
case 0: /* Render target not in use */
|
||||
return 0;
|
||||
case nir_type_float16:
|
||||
return MALI_BIFROST_REGISTER_FILE_FORMAT_F16;
|
||||
case nir_type_float32:
|
||||
return MALI_BIFROST_REGISTER_FILE_FORMAT_F32;
|
||||
case nir_type_int32:
|
||||
return MALI_BIFROST_REGISTER_FILE_FORMAT_I32;
|
||||
case nir_type_uint32:
|
||||
return MALI_BIFROST_REGISTER_FILE_FORMAT_U32;
|
||||
case nir_type_int16:
|
||||
return MALI_BIFROST_REGISTER_FILE_FORMAT_I16;
|
||||
case nir_type_uint16:
|
||||
return MALI_BIFROST_REGISTER_FILE_FORMAT_U16;
|
||||
default:
|
||||
unreachable("Unsupported blend shader type for NIR alu type");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
panfrost_shader_compile(struct panfrost_context *ctx,
|
||||
enum pipe_shader_ir ir_type,
|
||||
const void *ir,
|
||||
gl_shader_stage stage,
|
||||
struct panfrost_shader_state *state,
|
||||
uint64_t *outputs_written)
|
||||
struct panfrost_shader_state *state)
|
||||
{
|
||||
struct panfrost_device *dev = pan_device(ctx->base.screen);
|
||||
|
||||
|
@ -280,169 +175,62 @@ panfrost_shader_compile(struct panfrost_context *ctx,
|
|||
|
||||
memcpy(inputs.rt_formats, state->rt_formats, sizeof(inputs.rt_formats));
|
||||
|
||||
panfrost_program *program;
|
||||
struct util_dynarray binary;
|
||||
|
||||
program = pan_shader_compile(dev, NULL, s, &inputs);
|
||||
util_dynarray_init(&binary, NULL);
|
||||
pan_shader_compile(dev, s, &inputs, &binary, &state->info);
|
||||
|
||||
/* Prepare the compiled binary for upload */
|
||||
mali_ptr shader = 0;
|
||||
unsigned attribute_count = 0, varying_count = 0;
|
||||
int size = program->compiled.size;
|
||||
int size = binary.size;
|
||||
|
||||
if (size) {
|
||||
state->bo = panfrost_bo_create(dev, size, PAN_BO_EXECUTE);
|
||||
memcpy(state->bo->ptr.cpu, program->compiled.data, size);
|
||||
memcpy(state->bo->ptr.cpu, binary.data, size);
|
||||
shader = state->bo->ptr.gpu;
|
||||
}
|
||||
|
||||
/* Midgard needs the first tag on the bottom nibble */
|
||||
|
||||
if (!pan_is_bifrost(dev)) {
|
||||
/* If size = 0, we tag as "end-of-shader" */
|
||||
|
||||
if (size)
|
||||
shader |= program->first_tag;
|
||||
else
|
||||
shader = 0x1;
|
||||
}
|
||||
|
||||
state->sysval_count = program->sysval_count;
|
||||
memcpy(state->sysval, program->sysvals, sizeof(state->sysval[0]) * state->sysval_count);
|
||||
memcpy(&state->push, &program->push, sizeof(program->push));
|
||||
|
||||
bool vertex_id = BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_VERTEX_ID);
|
||||
bool instance_id = BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID);
|
||||
|
||||
state->writes_global = s->info.writes_memory;
|
||||
|
||||
switch (stage) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
attribute_count = util_bitcount64(s->info.inputs_read) +
|
||||
util_bitcount(s->info.images_used);
|
||||
varying_count = util_bitcount64(s->info.outputs_written);
|
||||
|
||||
if (vertex_id)
|
||||
attribute_count = MAX2(attribute_count, PAN_VERTEX_ID + 1);
|
||||
|
||||
if (instance_id)
|
||||
attribute_count = MAX2(attribute_count, PAN_INSTANCE_ID + 1);
|
||||
|
||||
break;
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(state->blend_ret_addrs); i++) {
|
||||
if (!program->blend_ret_offsets[i])
|
||||
continue;
|
||||
|
||||
state->blend_ret_addrs[i] = (state->bo->ptr.gpu & UINT32_MAX) +
|
||||
program->blend_ret_offsets[i];
|
||||
assert(!(state->blend_ret_addrs[i] & 0x7));
|
||||
}
|
||||
attribute_count = util_bitcount(s->info.images_used);
|
||||
varying_count = util_bitcount64(s->info.inputs_read);
|
||||
if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
|
||||
state->writes_depth = true;
|
||||
if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL))
|
||||
state->writes_stencil = true;
|
||||
|
||||
uint64_t outputs_read = s->info.outputs_read;
|
||||
if (outputs_read & BITFIELD64_BIT(FRAG_RESULT_COLOR))
|
||||
outputs_read |= BITFIELD64_BIT(FRAG_RESULT_DATA0);
|
||||
|
||||
state->outputs_read = outputs_read >> FRAG_RESULT_DATA0;
|
||||
|
||||
/* EXT_shader_framebuffer_fetch requires per-sample */
|
||||
state->sample_shading = s->info.fs.uses_sample_shading ||
|
||||
outputs_read;
|
||||
|
||||
/* List of reasons we need to execute frag shaders when things
|
||||
* are masked off */
|
||||
|
||||
state->fs_sidefx =
|
||||
s->info.writes_memory ||
|
||||
s->info.fs.uses_discard ||
|
||||
s->info.fs.uses_demote;
|
||||
|
||||
state->can_discard = s->info.fs.uses_discard;
|
||||
break;
|
||||
case MESA_SHADER_COMPUTE:
|
||||
attribute_count = util_bitcount(s->info.images_used);
|
||||
state->shared_size = s->info.cs.shared_size;
|
||||
break;
|
||||
default:
|
||||
unreachable("Unknown shader state");
|
||||
}
|
||||
|
||||
state->stack_size = program->tls_size;
|
||||
state->reads_frag_coord = (s->info.inputs_read & (1 << VARYING_SLOT_POS)) ||
|
||||
BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_FRAG_COORD);
|
||||
state->reads_point_coord = s->info.inputs_read & (1 << VARYING_SLOT_PNTC);
|
||||
state->reads_face = (s->info.inputs_read & (1 << VARYING_SLOT_FACE)) ||
|
||||
BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_FRONT_FACE);
|
||||
state->writes_point_size = s->info.outputs_written & (1 << VARYING_SLOT_PSIZ);
|
||||
|
||||
if (outputs_written)
|
||||
*outputs_written = s->info.outputs_written;
|
||||
|
||||
state->work_reg_count = program->work_register_count;
|
||||
|
||||
if (pan_is_bifrost(dev))
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(state->blend_types); i++)
|
||||
state->blend_types[i] = bifrost_blend_type_from_nir(program->blend_types[i]);
|
||||
|
||||
/* Record the varying mapping for the command stream's bookkeeping */
|
||||
|
||||
nir_variable_mode varying_mode =
|
||||
stage == MESA_SHADER_VERTEX ? nir_var_shader_out : nir_var_shader_in;
|
||||
|
||||
nir_foreach_variable_with_modes(var, s, varying_mode) {
|
||||
unsigned loc = var->data.driver_location;
|
||||
unsigned sz = glsl_count_attribute_slots(var->type, FALSE);
|
||||
|
||||
for (int c = 0; c < sz; ++c) {
|
||||
state->varyings_loc[loc + c] = var->data.location + c;
|
||||
state->varyings[loc + c] = pan_format_from_glsl(var->type,
|
||||
var->data.precision, var->data.location_frac);
|
||||
}
|
||||
}
|
||||
|
||||
/* Needed for linkage */
|
||||
state->attribute_count = attribute_count;
|
||||
state->varying_count = varying_count;
|
||||
|
||||
/* Sysvals have dedicated UBO */
|
||||
state->ubo_count = s->info.num_ubos + (state->sysval_count ? 1 : 0);
|
||||
if (!pan_is_bifrost(dev))
|
||||
shader |= state->info.midgard.first_tag;
|
||||
|
||||
/* Prepare the descriptors at compile-time */
|
||||
state->shader.shader = shader;
|
||||
state->shader.attribute_count = attribute_count;
|
||||
state->shader.varying_count = varying_count;
|
||||
state->shader.texture_count = s->info.num_textures;
|
||||
state->shader.sampler_count = s->info.num_textures;
|
||||
state->shader.attribute_count = state->info.attribute_count;
|
||||
state->shader.varying_count = state->info.varyings.input_count +
|
||||
state->info.varyings.output_count;
|
||||
state->shader.texture_count = state->info.texture_count;
|
||||
state->shader.sampler_count = state->info.texture_count;
|
||||
|
||||
if (pan_is_bifrost(dev))
|
||||
pan_prepare_bifrost_props(state, program, stage, &s->info);
|
||||
pan_prepare_bifrost_props(state);
|
||||
else
|
||||
pan_prepare_midgard_props(state, program, stage);
|
||||
pan_prepare_midgard_props(state);
|
||||
|
||||
state->properties.shader_contains_barrier =
|
||||
s->info.uses_memory_barrier |
|
||||
s->info.uses_control_barrier;
|
||||
state->info.contains_barrier;
|
||||
|
||||
/* Ordering gaurantees are the same */
|
||||
if (stage == MESA_SHADER_FRAGMENT) {
|
||||
state->properties.shader_contains_barrier |=
|
||||
s->info.fs.needs_quad_helper_invocations;
|
||||
state->info.fs.helper_invocations;
|
||||
state->properties.stencil_from_shader =
|
||||
state->info.fs.writes_stencil;
|
||||
state->properties.depth_source =
|
||||
state->info.fs.writes_depth ?
|
||||
MALI_DEPTH_SOURCE_SHADER :
|
||||
MALI_DEPTH_SOURCE_FIXED_FUNCTION;
|
||||
} else {
|
||||
state->properties.depth_source =
|
||||
MALI_DEPTH_SOURCE_FIXED_FUNCTION;
|
||||
}
|
||||
|
||||
state->properties.stencil_from_shader = state->writes_stencil;
|
||||
state->properties.depth_source = state->writes_depth ?
|
||||
MALI_DEPTH_SOURCE_SHADER :
|
||||
MALI_DEPTH_SOURCE_FIXED_FUNCTION;
|
||||
|
||||
if (stage != MESA_SHADER_FRAGMENT)
|
||||
pan_upload_shader_descriptor(ctx, state);
|
||||
|
||||
ralloc_free(program);
|
||||
util_dynarray_fini(&binary);
|
||||
|
||||
/* In both clone and tgsi_to_nir paths, the shader is ralloc'd against
|
||||
* a NULL context */
|
||||
|
|
|
@ -295,21 +295,23 @@ panfrost_compile_blend_shader(struct panfrost_blend_shader *shader,
|
|||
if (constants)
|
||||
memcpy(inputs.blend.constants, constants, sizeof(inputs.blend.constants));
|
||||
|
||||
panfrost_program *program;
|
||||
|
||||
if (pan_is_bifrost(dev)) {
|
||||
inputs.blend.bifrost_blend_desc =
|
||||
bifrost_get_blend_desc(dev, shader->key.format, shader->key.rt);
|
||||
}
|
||||
|
||||
program = pan_shader_compile(dev, NULL, shader->nir, &inputs);
|
||||
struct pan_shader_info info;
|
||||
struct util_dynarray binary;
|
||||
|
||||
util_dynarray_init(&binary, NULL);
|
||||
pan_shader_compile(dev, shader->nir, &inputs, &binary, &info);
|
||||
|
||||
/* Allow us to patch later */
|
||||
shader->first_tag = program->first_tag;
|
||||
shader->size = program->compiled.size;
|
||||
shader->first_tag = pan_is_bifrost(dev) ? 0 : info.midgard.first_tag;
|
||||
shader->size = binary.size;
|
||||
shader->buffer = reralloc_size(shader, shader->buffer, shader->size);
|
||||
memcpy(shader->buffer, program->compiled.data, shader->size);
|
||||
shader->work_count = program->work_register_count;
|
||||
memcpy(shader->buffer, binary.data, shader->size);
|
||||
shader->work_count = info.work_reg_count;
|
||||
|
||||
ralloc_free(program);
|
||||
util_dynarray_fini(&binary);
|
||||
}
|
||||
|
|
|
@ -241,7 +241,7 @@ panfrost_fs_required(
|
|||
unsigned rt_count)
|
||||
{
|
||||
/* If we generally have side effects */
|
||||
if (fs->fs_sidefx)
|
||||
if (fs->info.fs.sidefx)
|
||||
return true;
|
||||
|
||||
/* If colour is written we need to execute */
|
||||
|
@ -252,7 +252,31 @@ panfrost_fs_required(
|
|||
|
||||
/* If depth is written and not implied we need to execute.
|
||||
* TODO: Predicate on Z/S writes being enabled */
|
||||
return (fs->writes_depth || fs->writes_stencil);
|
||||
return (fs->info.fs.writes_depth || fs->info.fs.writes_stencil);
|
||||
}
|
||||
|
||||
static enum mali_bifrost_register_file_format
|
||||
bifrost_blend_type_from_nir(nir_alu_type nir_type)
|
||||
{
|
||||
switch(nir_type) {
|
||||
case 0: /* Render target not in use */
|
||||
return 0;
|
||||
case nir_type_float16:
|
||||
return MALI_BIFROST_REGISTER_FILE_FORMAT_F16;
|
||||
case nir_type_float32:
|
||||
return MALI_BIFROST_REGISTER_FILE_FORMAT_F32;
|
||||
case nir_type_int32:
|
||||
return MALI_BIFROST_REGISTER_FILE_FORMAT_I32;
|
||||
case nir_type_uint32:
|
||||
return MALI_BIFROST_REGISTER_FILE_FORMAT_U32;
|
||||
case nir_type_int16:
|
||||
return MALI_BIFROST_REGISTER_FILE_FORMAT_I16;
|
||||
case nir_type_uint16:
|
||||
return MALI_BIFROST_REGISTER_FILE_FORMAT_U16;
|
||||
default:
|
||||
unreachable("Unsupported blend shader type for NIR alu type");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -292,8 +316,12 @@ panfrost_emit_bifrost_blend(struct panfrost_batch *batch,
|
|||
assert((blend[i].shader.gpu & (0xffffffffull << 32)) ==
|
||||
(fs->bo->ptr.gpu & (0xffffffffull << 32)));
|
||||
cfg.bifrost.internal.shader.pc = (u32)blend[i].shader.gpu;
|
||||
assert(!(fs->blend_ret_addrs[i] & 0x7));
|
||||
cfg.bifrost.internal.shader.return_value = fs->blend_ret_addrs[i];
|
||||
unsigned ret_offset = fs->info.bifrost.blend[i].return_offset;
|
||||
if (ret_offset) {
|
||||
assert(!(ret_offset & 0x7));
|
||||
cfg.bifrost.internal.shader.return_value =
|
||||
fs->bo->ptr.gpu + ret_offset;
|
||||
}
|
||||
cfg.bifrost.internal.mode = MALI_BIFROST_BLEND_MODE_SHADER;
|
||||
} else {
|
||||
enum pipe_format format = batch->key.cbufs[i]->format;
|
||||
|
@ -324,7 +352,7 @@ panfrost_emit_bifrost_blend(struct panfrost_batch *batch,
|
|||
cfg.bifrost.internal.fixed_function.conversion.memory_format =
|
||||
panfrost_format_to_bifrost_blend(dev, format_desc, true);
|
||||
cfg.bifrost.internal.fixed_function.conversion.register_format =
|
||||
fs->blend_types[i];
|
||||
bifrost_blend_type_from_nir(fs->info.bifrost.blend[i].type);
|
||||
cfg.bifrost.internal.fixed_function.rt = i;
|
||||
}
|
||||
}
|
||||
|
@ -412,7 +440,9 @@ panfrost_prepare_bifrost_fs_state(struct panfrost_context *ctx,
|
|||
|
||||
state->properties = fs->properties;
|
||||
state->properties.bifrost.allow_forward_pixel_to_kill =
|
||||
!fs->can_discard && !fs->writes_depth && no_blend;
|
||||
!fs->info.fs.can_discard &&
|
||||
!fs->info.fs.writes_depth &&
|
||||
no_blend;
|
||||
state->shader = fs->shader;
|
||||
state->preload = fs->preload;
|
||||
}
|
||||
|
@ -436,8 +466,8 @@ panfrost_prepare_midgard_fs_state(struct panfrost_context *ctx,
|
|||
state->properties.midgard.force_early_z = true;
|
||||
} else {
|
||||
/* Reasons to disable early-Z from a shader perspective */
|
||||
bool late_z = fs->can_discard || fs->writes_global ||
|
||||
fs->writes_depth || fs->writes_stencil;
|
||||
bool late_z = fs->info.fs.can_discard || fs->info.writes_global ||
|
||||
fs->info.fs.writes_depth || fs->info.fs.writes_stencil;
|
||||
|
||||
/* If either depth or stencil is enabled, discard matters */
|
||||
bool zs_enabled =
|
||||
|
@ -452,9 +482,9 @@ panfrost_prepare_midgard_fs_state(struct panfrost_context *ctx,
|
|||
/* TODO: Reduce this limit? */
|
||||
state->properties = fs->properties;
|
||||
if (has_blend_shader)
|
||||
state->properties.midgard.work_register_count = MAX2(fs->work_reg_count, 8);
|
||||
state->properties.midgard.work_register_count = MAX2(fs->info.work_reg_count, 8);
|
||||
else
|
||||
state->properties.midgard.work_register_count = fs->work_reg_count;
|
||||
state->properties.midgard.work_register_count = fs->info.work_reg_count;
|
||||
|
||||
state->properties.midgard.force_early_z = !(late_z || alpha_to_coverage);
|
||||
|
||||
|
@ -463,8 +493,10 @@ panfrost_prepare_midgard_fs_state(struct panfrost_context *ctx,
|
|||
* lying to the hardware about the discard and setting the
|
||||
* reads tilebuffer? flag to compensate */
|
||||
state->properties.midgard.shader_reads_tilebuffer =
|
||||
fs->outputs_read || (!zs_enabled && fs->can_discard);
|
||||
state->properties.midgard.shader_contains_discard = zs_enabled && fs->can_discard;
|
||||
fs->info.fs.outputs_read ||
|
||||
(!zs_enabled && fs->info.fs.can_discard);
|
||||
state->properties.midgard.shader_contains_discard =
|
||||
zs_enabled && fs->info.fs.can_discard;
|
||||
state->shader = fs->shader;
|
||||
}
|
||||
|
||||
|
@ -528,7 +560,7 @@ panfrost_prepare_fs_state(struct panfrost_context *ctx,
|
|||
state->multisample_misc.sample_mask = (msaa ? ctx->sample_mask : ~0) & 0xFFFF;
|
||||
|
||||
state->multisample_misc.evaluate_per_sample =
|
||||
msaa && (ctx->min_samples > 1 || fs->sample_shading);
|
||||
msaa && (ctx->min_samples > 1 || fs->info.fs.sample_shading);
|
||||
|
||||
state->multisample_misc.depth_function = zsa->base.depth_enabled ?
|
||||
panfrost_translate_compare_func(zsa->base.depth_func) :
|
||||
|
@ -930,8 +962,8 @@ panfrost_upload_sysvals(struct panfrost_batch *batch, void *buf,
|
|||
{
|
||||
struct sysval_uniform *uniforms = (void *)buf;
|
||||
|
||||
for (unsigned i = 0; i < ss->sysval_count; ++i) {
|
||||
int sysval = ss->sysval[i];
|
||||
for (unsigned i = 0; i < ss->info.sysvals.sysval_count; ++i) {
|
||||
int sysval = ss->info.sysvals.sysvals[i];
|
||||
|
||||
switch (PAN_SYSVAL_TYPE(sysval)) {
|
||||
case PAN_SYSVAL_VIEWPORT_SCALE:
|
||||
|
@ -1023,7 +1055,7 @@ panfrost_emit_const_buf(struct panfrost_batch *batch,
|
|||
struct panfrost_shader_state *ss = &all->variants[all->active_variant];
|
||||
|
||||
/* Allocate room for the sysval and the uniforms */
|
||||
size_t sys_size = sizeof(float) * 4 * ss->sysval_count;
|
||||
size_t sys_size = sizeof(float) * 4 * ss->info.sysvals.sysval_count;
|
||||
struct panfrost_ptr transfer =
|
||||
panfrost_pool_alloc_aligned(&batch->pool, sys_size, 16);
|
||||
|
||||
|
@ -1032,7 +1064,7 @@ panfrost_emit_const_buf(struct panfrost_batch *batch,
|
|||
|
||||
/* Next up, attach UBOs. UBO count includes gaps but no sysval UBO */
|
||||
struct panfrost_shader_state *shader = panfrost_get_shader_state(ctx, stage);
|
||||
unsigned ubo_count = shader->ubo_count - (sys_size ? 1 : 0);
|
||||
unsigned ubo_count = shader->info.ubo_count - (sys_size ? 1 : 0);
|
||||
unsigned sysval_ubo = sys_size ? ubo_count : ~0;
|
||||
|
||||
size_t sz = MALI_UNIFORM_BUFFER_LENGTH * (ubo_count + 1);
|
||||
|
@ -1076,13 +1108,14 @@ panfrost_emit_const_buf(struct panfrost_batch *batch,
|
|||
|
||||
/* Copy push constants required by the shader */
|
||||
struct panfrost_ptr push_transfer =
|
||||
panfrost_pool_alloc_aligned(&batch->pool, ss->push.count * 4, 16);
|
||||
panfrost_pool_alloc_aligned(&batch->pool,
|
||||
ss->info.push.count * 4, 16);
|
||||
|
||||
uint32_t *push_cpu = (uint32_t *) push_transfer.cpu;
|
||||
*push_constants = push_transfer.gpu;
|
||||
|
||||
for (unsigned i = 0; i < ss->push.count; ++i) {
|
||||
struct panfrost_ubo_word src = ss->push.words[i];
|
||||
for (unsigned i = 0; i < ss->info.push.count; ++i) {
|
||||
struct panfrost_ubo_word src = ss->info.push.words[i];
|
||||
|
||||
/* Map the UBO, this should be cheap. However this is reading
|
||||
* from write-combine memory which is _very_ slow. It might pay
|
||||
|
@ -1108,7 +1141,7 @@ panfrost_emit_shared_memory(struct panfrost_batch *batch,
|
|||
struct panfrost_device *dev = pan_device(ctx->base.screen);
|
||||
struct panfrost_shader_variants *all = ctx->shader[PIPE_SHADER_COMPUTE];
|
||||
struct panfrost_shader_state *ss = &all->variants[all->active_variant];
|
||||
unsigned single_size = util_next_power_of_two(MAX2(ss->shared_size,
|
||||
unsigned single_size = util_next_power_of_two(MAX2(ss->info.wls_size,
|
||||
128));
|
||||
|
||||
unsigned instances =
|
||||
|
@ -1130,12 +1163,12 @@ panfrost_emit_shared_memory(struct panfrost_batch *batch,
|
|||
ls.wls_instances = instances;
|
||||
ls.wls_size_scale = util_logbase2(single_size) + 1;
|
||||
|
||||
if (ss->stack_size) {
|
||||
if (ss->info.tls_size) {
|
||||
unsigned shift =
|
||||
panfrost_get_stack_shift(ss->stack_size);
|
||||
panfrost_get_stack_shift(ss->info.tls_size);
|
||||
struct panfrost_bo *bo =
|
||||
panfrost_batch_get_scratchpad(batch,
|
||||
ss->stack_size,
|
||||
ss->info.tls_size,
|
||||
dev->thread_tls_alloc,
|
||||
dev->core_count);
|
||||
|
||||
|
@ -1366,7 +1399,7 @@ panfrost_emit_image_attribs(struct panfrost_batch *batch,
|
|||
struct panfrost_context *ctx = batch->ctx;
|
||||
struct panfrost_shader_state *shader = panfrost_get_shader_state(ctx, type);
|
||||
|
||||
if (!shader->attribute_count) {
|
||||
if (!shader->info.attribute_count) {
|
||||
*buffers = 0;
|
||||
return 0;
|
||||
}
|
||||
|
@ -1375,11 +1408,11 @@ panfrost_emit_image_attribs(struct panfrost_batch *batch,
|
|||
unsigned attrib_buf_size = MALI_ATTRIBUTE_BUFFER_LENGTH +
|
||||
MALI_ATTRIBUTE_BUFFER_CONTINUATION_3D_LENGTH;
|
||||
unsigned bytes_per_image_desc = MALI_ATTRIBUTE_LENGTH + attrib_buf_size;
|
||||
unsigned attribs_offset = attrib_buf_size * shader->attribute_count;
|
||||
unsigned attribs_offset = attrib_buf_size * shader->info.attribute_count;
|
||||
|
||||
struct panfrost_ptr ptr =
|
||||
panfrost_pool_alloc_aligned(&batch->pool,
|
||||
bytes_per_image_desc * shader->attribute_count,
|
||||
bytes_per_image_desc * shader->info.attribute_count,
|
||||
util_next_power_of_two(bytes_per_image_desc));
|
||||
|
||||
emit_image_attribs(batch, type, ptr.cpu + attribs_offset, ptr.cpu, 0);
|
||||
|
@ -1404,7 +1437,7 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch,
|
|||
* Also, we allocate more memory than what's needed here if either instancing
|
||||
* is enabled or images are present, this can be improved. */
|
||||
unsigned bufs_per_attrib = (ctx->instance_count > 1 || nr_images > 0) ? 2 : 1;
|
||||
unsigned nr_bufs = (vs->attribute_count * bufs_per_attrib) +
|
||||
unsigned nr_bufs = (vs->info.attribute_count * bufs_per_attrib) +
|
||||
(pan_is_bifrost(dev) ? 1 : 0);
|
||||
|
||||
if (!nr_bufs) {
|
||||
|
@ -1417,7 +1450,7 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch,
|
|||
MALI_ATTRIBUTE_BUFFER_LENGTH * 2);
|
||||
|
||||
struct panfrost_ptr T = panfrost_pool_alloc_aligned(&batch->pool,
|
||||
MALI_ATTRIBUTE_LENGTH * vs->attribute_count,
|
||||
MALI_ATTRIBUTE_LENGTH * vs->info.attribute_count,
|
||||
MALI_ATTRIBUTE_LENGTH);
|
||||
|
||||
struct mali_attribute_buffer_packed *bufs =
|
||||
|
@ -1525,7 +1558,7 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch,
|
|||
|
||||
/* Add special gl_VertexID/gl_InstanceID buffers */
|
||||
|
||||
if (unlikely(vs->attribute_count >= PAN_VERTEX_ID)) {
|
||||
if (unlikely(vs->info.attribute_count >= PAN_VERTEX_ID)) {
|
||||
panfrost_vertex_id(ctx->padded_count, &bufs[k], ctx->instance_count > 1);
|
||||
|
||||
pan_pack(out + PAN_VERTEX_ID, ATTRIBUTE, cfg) {
|
||||
|
@ -1742,22 +1775,22 @@ pan_varying_present(const struct panfrost_device *dev,
|
|||
|
||||
/* Enable special buffers by the shader info */
|
||||
|
||||
if (vs->writes_point_size)
|
||||
if (vs->info.vs.writes_point_size)
|
||||
present |= (1 << PAN_VARY_PSIZ);
|
||||
|
||||
if (fs->reads_point_coord)
|
||||
if (fs->info.fs.reads_point_coord)
|
||||
present |= (1 << PAN_VARY_PNTCOORD);
|
||||
|
||||
if (fs->reads_face)
|
||||
if (fs->info.fs.reads_face)
|
||||
present |= (1 << PAN_VARY_FACE);
|
||||
|
||||
if (fs->reads_frag_coord && !pan_is_bifrost(dev))
|
||||
if (fs->info.fs.reads_frag_coord && !pan_is_bifrost(dev))
|
||||
present |= (1 << PAN_VARY_FRAGCOORD);
|
||||
|
||||
/* Also, if we have a point sprite, we need a point coord buffer */
|
||||
|
||||
for (unsigned i = 0; i < fs->varying_count; i++) {
|
||||
gl_varying_slot loc = fs->varyings_loc[i];
|
||||
for (unsigned i = 0; i < fs->info.varyings.input_count; i++) {
|
||||
gl_varying_slot loc = fs->info.varyings.input[i].location;
|
||||
|
||||
if (util_varying_is_point_coord(loc, point_coord_mask))
|
||||
present |= (1 << PAN_VARY_PNTCOORD);
|
||||
|
@ -1886,10 +1919,18 @@ pan_emit_general_varying(const struct panfrost_device *dev,
|
|||
bool should_alloc)
|
||||
{
|
||||
/* Check if we're linked */
|
||||
unsigned other_varying_count =
|
||||
other->info.stage == MESA_SHADER_FRAGMENT ?
|
||||
other->info.varyings.input_count :
|
||||
other->info.varyings.output_count;
|
||||
const struct pan_shader_varying *other_varyings =
|
||||
other->info.stage == MESA_SHADER_FRAGMENT ?
|
||||
other->info.varyings.input :
|
||||
other->info.varyings.output;
|
||||
signed other_idx = -1;
|
||||
|
||||
for (unsigned j = 0; j < other->varying_count; ++j) {
|
||||
if (other->varyings_loc[j] == loc) {
|
||||
for (unsigned j = 0; j < other_varying_count; ++j) {
|
||||
if (other_varyings[j].location == loc) {
|
||||
other_idx = j;
|
||||
break;
|
||||
}
|
||||
|
@ -1904,7 +1945,8 @@ pan_emit_general_varying(const struct panfrost_device *dev,
|
|||
|
||||
if (should_alloc) {
|
||||
/* We're linked, so allocate a space via a watermark allocation */
|
||||
enum mali_format alt = other->varyings[other_idx];
|
||||
enum mali_format alt =
|
||||
dev->formats[other_varyings[other_idx].format].hw >> 12;
|
||||
|
||||
/* Do interpolation at minimum precision */
|
||||
unsigned size_main = pan_varying_size(format);
|
||||
|
@ -1953,8 +1995,14 @@ panfrost_emit_varying(const struct panfrost_device *dev,
|
|||
bool should_alloc,
|
||||
bool is_fragment)
|
||||
{
|
||||
gl_varying_slot loc = stage->varyings_loc[idx];
|
||||
enum mali_format format = stage->varyings[idx];
|
||||
gl_varying_slot loc =
|
||||
stage->info.stage == MESA_SHADER_FRAGMENT ?
|
||||
stage->info.varyings.input[idx].location :
|
||||
stage->info.varyings.output[idx].location;
|
||||
enum mali_format format =
|
||||
stage->info.stage == MESA_SHADER_FRAGMENT ?
|
||||
dev->formats[stage->info.varyings.input[idx].format].hw >> 12 :
|
||||
dev->formats[stage->info.varyings.output[idx].format].hw >> 12;
|
||||
|
||||
/* Override format to match linkage */
|
||||
if (!should_alloc && gen_formats[idx])
|
||||
|
@ -2018,8 +2066,8 @@ panfrost_emit_varying_descriptor(struct panfrost_batch *batch,
|
|||
|
||||
vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
|
||||
fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
|
||||
vs_size = MALI_ATTRIBUTE_LENGTH * vs->varying_count;
|
||||
fs_size = MALI_ATTRIBUTE_LENGTH * fs->varying_count;
|
||||
vs_size = MALI_ATTRIBUTE_LENGTH * vs->info.varyings.output_count;
|
||||
fs_size = MALI_ATTRIBUTE_LENGTH * fs->info.varyings.input_count;
|
||||
|
||||
struct panfrost_ptr trans = panfrost_pool_alloc_aligned(
|
||||
&batch->pool, vs_size + fs_size, MALI_ATTRIBUTE_LENGTH);
|
||||
|
@ -2044,8 +2092,8 @@ panfrost_emit_varying_descriptor(struct panfrost_batch *batch,
|
|||
memset(gen_formats, 0, sizeof(gen_formats));
|
||||
|
||||
unsigned gen_stride = 0;
|
||||
assert(vs->varying_count < ARRAY_SIZE(gen_offsets));
|
||||
assert(fs->varying_count < ARRAY_SIZE(gen_offsets));
|
||||
assert(vs->info.varyings.output_count < ARRAY_SIZE(gen_offsets));
|
||||
assert(fs->info.varyings.input_count < ARRAY_SIZE(gen_offsets));
|
||||
|
||||
unsigned streamout_offsets[32];
|
||||
|
||||
|
@ -2056,16 +2104,16 @@ panfrost_emit_varying_descriptor(struct panfrost_batch *batch,
|
|||
}
|
||||
|
||||
struct mali_attribute_packed *ovs = (struct mali_attribute_packed *)trans.cpu;
|
||||
struct mali_attribute_packed *ofs = ovs + vs->varying_count;
|
||||
struct mali_attribute_packed *ofs = ovs + vs->info.varyings.output_count;
|
||||
|
||||
for (unsigned i = 0; i < vs->varying_count; i++) {
|
||||
for (unsigned i = 0; i < vs->info.varyings.output_count; i++) {
|
||||
panfrost_emit_varying(dev, ovs + i, vs, fs, vs, present, 0,
|
||||
ctx->streamout.num_targets, streamout_offsets,
|
||||
gen_offsets, gen_formats, &gen_stride, i,
|
||||
true, false);
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < fs->varying_count; i++) {
|
||||
for (unsigned i = 0; i < fs->info.varyings.input_count; i++) {
|
||||
panfrost_emit_varying(dev, ofs + i, fs, vs, vs, present, point_coord_mask,
|
||||
ctx->streamout.num_targets, streamout_offsets,
|
||||
gen_offsets, gen_formats, &gen_stride, i,
|
||||
|
@ -2114,8 +2162,8 @@ panfrost_emit_varying_descriptor(struct panfrost_batch *batch,
|
|||
pan_emit_special_input(varyings, present, PAN_VARY_FRAGCOORD, MALI_ATTRIBUTE_SPECIAL_FRAG_COORD);
|
||||
|
||||
*buffers = T.gpu;
|
||||
*vs_attribs = vs->varying_count ? trans.gpu : 0;
|
||||
*fs_attribs = fs->varying_count ? trans.gpu + vs_size : 0;
|
||||
*vs_attribs = vs->info.varyings.output_count ? trans.gpu : 0;
|
||||
*fs_attribs = fs->info.varyings.input_count ? trans.gpu + vs_size : 0;
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
@ -71,7 +71,7 @@ panfrost_create_compute_state(
|
|||
}
|
||||
|
||||
panfrost_shader_compile(ctx, so->cbase.ir_type, so->cbase.prog,
|
||||
MESA_SHADER_COMPUTE, v, NULL);
|
||||
MESA_SHADER_COMPUTE, v);
|
||||
|
||||
return so;
|
||||
}
|
||||
|
|
|
@ -150,7 +150,7 @@ panfrost_writes_point_size(struct panfrost_context *ctx)
|
|||
assert(ctx->shader[PIPE_SHADER_VERTEX]);
|
||||
struct panfrost_shader_state *vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
|
||||
|
||||
return vs->writes_point_size && ctx->active_prim == PIPE_PRIM_POINTS;
|
||||
return vs->info.vs.writes_point_size && ctx->active_prim == PIPE_PRIM_POINTS;
|
||||
}
|
||||
|
||||
/* The entire frame is in memory -- send it off to the kernel! */
|
||||
|
@ -739,12 +739,11 @@ panfrost_create_shader_state(
|
|||
struct panfrost_context *ctx = pan_context(pctx);
|
||||
|
||||
struct panfrost_shader_state state = { 0 };
|
||||
uint64_t outputs_written;
|
||||
|
||||
panfrost_shader_compile(ctx, PIPE_SHADER_IR_NIR,
|
||||
so->base.ir.nir,
|
||||
tgsi_processor_to_shader_stage(stage),
|
||||
&state, &outputs_written);
|
||||
&state);
|
||||
}
|
||||
|
||||
return so;
|
||||
|
@ -821,11 +820,12 @@ panfrost_variant_matches(
|
|||
{
|
||||
struct panfrost_device *dev = pan_device(ctx->base.screen);
|
||||
|
||||
if (variant->outputs_read) {
|
||||
if (variant->info.stage == MESA_SHADER_FRAGMENT &&
|
||||
variant->info.fs.outputs_read) {
|
||||
struct pipe_framebuffer_state *fb = &ctx->pipe_framebuffer;
|
||||
|
||||
unsigned i;
|
||||
BITSET_FOREACH_SET(i, &variant->outputs_read, 8) {
|
||||
BITSET_FOREACH_SET(i, &variant->info.fs.outputs_read, 8) {
|
||||
enum pipe_format fmt = PIPE_FORMAT_R8G8B8A8_UNORM;
|
||||
|
||||
if ((fb->nr_cbufs > i) && fb->cbufs[i])
|
||||
|
@ -963,15 +963,12 @@ panfrost_bind_shader_state(
|
|||
/* We finally have a variant, so compile it */
|
||||
|
||||
if (!shader_state->compiled) {
|
||||
uint64_t outputs_written = 0;
|
||||
|
||||
panfrost_shader_compile(ctx, variants->base.type,
|
||||
variants->base.type == PIPE_SHADER_IR_NIR ?
|
||||
variants->base.ir.nir :
|
||||
variants->base.tokens,
|
||||
tgsi_processor_to_shader_stage(type),
|
||||
shader_state,
|
||||
&outputs_written);
|
||||
shader_state);
|
||||
|
||||
shader_state->compiled = true;
|
||||
|
||||
|
@ -980,7 +977,8 @@ panfrost_bind_shader_state(
|
|||
|
||||
shader_state->stream_output = variants->base.stream_output;
|
||||
shader_state->so_mask =
|
||||
update_so_info(&shader_state->stream_output, outputs_written);
|
||||
update_so_info(&shader_state->stream_output,
|
||||
shader_state->info.outputs_written);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1251,7 +1249,8 @@ panfrost_set_framebuffer_state(struct pipe_context *pctx,
|
|||
* keyed to the framebuffer format (due to EXT_framebuffer_fetch) */
|
||||
struct panfrost_shader_variants *fs = ctx->shader[PIPE_SHADER_FRAGMENT];
|
||||
|
||||
if (fs && fs->variant_count && fs->variants[fs->active_variant].outputs_read)
|
||||
if (fs && fs->variant_count &&
|
||||
fs->variants[fs->active_variant].info.fs.outputs_read)
|
||||
ctx->base.bind_fs_state(&ctx->base, fs);
|
||||
}
|
||||
|
||||
|
|
|
@ -214,46 +214,15 @@ struct panfrost_shader_state {
|
|||
struct MALI_RENDERER_PROPERTIES properties;
|
||||
struct MALI_PRELOAD preload;
|
||||
|
||||
/* Non-descript information */
|
||||
unsigned work_reg_count;
|
||||
bool sample_shading;
|
||||
bool can_discard;
|
||||
bool writes_point_size;
|
||||
bool writes_depth;
|
||||
bool writes_stencil;
|
||||
bool reads_point_coord;
|
||||
bool reads_face;
|
||||
bool reads_frag_coord;
|
||||
bool writes_global;
|
||||
unsigned stack_size;
|
||||
unsigned shared_size;
|
||||
struct pan_shader_info info;
|
||||
|
||||
/* Does the fragment shader have side effects? In particular, if output
|
||||
* is masked out, is it legal to skip shader execution? */
|
||||
bool fs_sidefx;
|
||||
|
||||
/* For Bifrost - output type for each RT */
|
||||
enum mali_bifrost_register_file_format blend_types[MALI_BIFROST_BLEND_MAX_RT];
|
||||
|
||||
unsigned attribute_count, varying_count, ubo_count;
|
||||
enum mali_format varyings[PIPE_MAX_ATTRIBS];
|
||||
gl_varying_slot varyings_loc[PIPE_MAX_ATTRIBS];
|
||||
struct pipe_stream_output_info stream_output;
|
||||
uint64_t so_mask;
|
||||
|
||||
unsigned sysval_count;
|
||||
unsigned sysval[MAX_SYSVAL_COUNT];
|
||||
|
||||
struct panfrost_ubo_push push;
|
||||
|
||||
/* GPU-executable memory */
|
||||
struct panfrost_bo *bo;
|
||||
|
||||
BITSET_WORD outputs_read;
|
||||
enum pipe_format rt_formats[8];
|
||||
|
||||
/* Blend return addresses */
|
||||
uint32_t blend_ret_addrs[8];
|
||||
};
|
||||
|
||||
/* A collection of varyings (the CSO) */
|
||||
|
@ -374,8 +343,7 @@ panfrost_shader_compile(struct panfrost_context *ctx,
|
|||
enum pipe_shader_ir ir_type,
|
||||
const void *ir,
|
||||
gl_shader_stage stage,
|
||||
struct panfrost_shader_state *state,
|
||||
uint64_t *outputs_written);
|
||||
struct panfrost_shader_state *state);
|
||||
|
||||
void
|
||||
panfrost_create_sampler_view_bo(struct panfrost_sampler_view *so,
|
||||
|
|
|
@ -1238,7 +1238,7 @@ panfrost_batch_adjust_stack_size(struct panfrost_batch *batch)
|
|||
if (!ss)
|
||||
continue;
|
||||
|
||||
batch->stack_size = MAX2(batch->stack_size, ss->stack_size);
|
||||
batch->stack_size = MAX2(batch->stack_size, ss->info.tls_size);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -119,10 +119,10 @@ void
|
|||
bi_opt_push_ubo(bi_context *ctx)
|
||||
{
|
||||
/* This pass only runs once */
|
||||
assert(ctx->push->count == 0);
|
||||
assert(ctx->info->push.count == 0);
|
||||
|
||||
struct bi_ubo_analysis analysis = bi_analyze_ranges(ctx);
|
||||
bi_pick_ubo(ctx->push, &analysis);
|
||||
bi_pick_ubo(&ctx->info->push, &analysis);
|
||||
|
||||
bi_foreach_instr_global_safe(ctx, ins) {
|
||||
if (!bi_is_direct_aligned_ubo(ins)) continue;
|
||||
|
@ -141,8 +141,9 @@ bi_opt_push_ubo(bi_context *ctx)
|
|||
|
||||
for (unsigned w = 0; w < channels; ++w) {
|
||||
/* FAU is grouped in pairs (2 x 4-byte) */
|
||||
unsigned base = pan_lookup_pushed_ubo(ctx->push, ubo,
|
||||
(offset + 4 * w));
|
||||
unsigned base =
|
||||
pan_lookup_pushed_ubo(&ctx->info->push, ubo,
|
||||
(offset + 4 * w));
|
||||
|
||||
unsigned fau_idx = (base >> 1);
|
||||
unsigned fau_hi = (base & 1);
|
||||
|
|
|
@ -713,11 +713,11 @@ bi_collect_blend_ret_addr(bi_context *ctx, struct util_dynarray *emission,
|
|||
|
||||
|
||||
unsigned loc = tuple->regs.fau_idx - BIR_FAU_BLEND_0;
|
||||
assert(loc < ARRAY_SIZE(ctx->blend_ret_offsets));
|
||||
assert(!ctx->blend_ret_offsets[loc]);
|
||||
ctx->blend_ret_offsets[loc] =
|
||||
assert(loc < ARRAY_SIZE(ctx->info->bifrost.blend));
|
||||
assert(!ctx->info->bifrost.blend[loc].return_offset);
|
||||
ctx->info->bifrost.blend[loc].return_offset =
|
||||
util_dynarray_num_elements(emission, uint8_t);
|
||||
assert(!(ctx->blend_ret_offsets[loc] & 0x7));
|
||||
assert(!(ctx->info->bifrost.blend[loc].return_offset & 0x7));
|
||||
}
|
||||
|
||||
unsigned
|
||||
|
|
|
@ -388,7 +388,7 @@ bi_register_allocate(bi_context *ctx)
|
|||
unsigned iter_count = 1000; /* max iterations */
|
||||
|
||||
/* Number of bytes of memory we've spilled into */
|
||||
unsigned spill_count = ctx->tls_size;
|
||||
unsigned spill_count = ctx->info->tls_size;
|
||||
|
||||
do {
|
||||
if (l) {
|
||||
|
@ -410,7 +410,7 @@ bi_register_allocate(bi_context *ctx)
|
|||
|
||||
assert(success);
|
||||
|
||||
ctx->tls_size = spill_count;
|
||||
ctx->info->tls_size = spill_count;
|
||||
bi_install_registers(ctx, l);
|
||||
|
||||
lcra_free(l);
|
||||
|
|
|
@ -297,7 +297,8 @@ bi_load_sysval_to(bi_builder *b, bi_index dest, int sysval,
|
|||
unsigned nr_components, unsigned offset)
|
||||
{
|
||||
unsigned uniform =
|
||||
pan_lookup_sysval(b->shader->sysval_to_id, &b->shader->sysvals,
|
||||
pan_lookup_sysval(b->shader->sysval_to_id,
|
||||
&b->shader->info->sysvals,
|
||||
sysval);
|
||||
unsigned idx = (uniform * 16) + offset;
|
||||
|
||||
|
@ -368,8 +369,7 @@ bi_emit_blend_op(bi_builder *b, bi_index rgba, nir_alu_type T, unsigned rt)
|
|||
}
|
||||
|
||||
assert(rt < 8);
|
||||
assert(b->shader->blend_types);
|
||||
b->shader->blend_types[rt] = T;
|
||||
b->shader->info->bifrost.blend[rt].type = T;
|
||||
}
|
||||
|
||||
/* Blend shaders do not need to run ATEST since they are dependent on a
|
||||
|
@ -2511,23 +2511,23 @@ bi_lower_branch(bi_block *block)
|
|||
}
|
||||
}
|
||||
|
||||
panfrost_program *
|
||||
bifrost_compile_shader_nir(void *mem_ctx, nir_shader *nir,
|
||||
const struct panfrost_compile_inputs *inputs)
|
||||
void
|
||||
bifrost_compile_shader_nir(nir_shader *nir,
|
||||
const struct panfrost_compile_inputs *inputs,
|
||||
struct util_dynarray *binary,
|
||||
struct pan_shader_info *info)
|
||||
{
|
||||
panfrost_program *program = rzalloc(mem_ctx, panfrost_program);
|
||||
|
||||
bifrost_debug = debug_get_option_bifrost_debug();
|
||||
|
||||
bi_context *ctx = rzalloc(NULL, bi_context);
|
||||
ctx->sysval_to_id = panfrost_init_sysvals(&ctx->sysvals, ctx);
|
||||
ctx->sysval_to_id = panfrost_init_sysvals(&info->sysvals, ctx);
|
||||
|
||||
ctx->inputs = inputs;
|
||||
ctx->nir = nir;
|
||||
ctx->info = info;
|
||||
ctx->stage = nir->info.stage;
|
||||
ctx->quirks = bifrost_get_quirks(inputs->gpu_id);
|
||||
ctx->arch = inputs->gpu_id >> 12;
|
||||
ctx->push = &program->push;
|
||||
list_inithead(&ctx->blocks);
|
||||
|
||||
/* Lower gl_Position pre-optimisation, but after lowering vars to ssa
|
||||
|
@ -2565,8 +2565,7 @@ bifrost_compile_shader_nir(void *mem_ctx, nir_shader *nir,
|
|||
nir_print_shader(nir, stdout);
|
||||
}
|
||||
|
||||
ctx->blend_types = program->blend_types;
|
||||
ctx->tls_size = nir->scratch_size;
|
||||
info->tls_size = nir->scratch_size;
|
||||
|
||||
nir_foreach_function(func, nir) {
|
||||
if (!func->impl)
|
||||
|
@ -2614,8 +2613,7 @@ bifrost_compile_shader_nir(void *mem_ctx, nir_shader *nir,
|
|||
if (bifrost_debug & BIFROST_DBG_SHADERS && !skip_internal)
|
||||
bi_print_shader(ctx, stdout);
|
||||
|
||||
util_dynarray_init(&program->compiled, NULL);
|
||||
unsigned final_clause = bi_pack(ctx, &program->compiled);
|
||||
unsigned final_clause = bi_pack(ctx, binary);
|
||||
|
||||
/* If we need to wait for ATEST or BLEND in the first clause, pass the
|
||||
* corresponding bits through to the renderer state descriptor */
|
||||
|
@ -2623,17 +2621,12 @@ bifrost_compile_shader_nir(void *mem_ctx, nir_shader *nir,
|
|||
bi_clause *first_clause = bi_next_clause(ctx, first_block, NULL);
|
||||
|
||||
unsigned first_deps = first_clause ? first_clause->dependencies : 0;
|
||||
program->wait_6 = (first_deps & (1 << 6));
|
||||
program->wait_7 = (first_deps & (1 << 7));
|
||||
|
||||
memcpy(program->blend_ret_offsets, ctx->blend_ret_offsets, sizeof(program->blend_ret_offsets));
|
||||
program->sysval_count = ctx->sysvals.sysval_count;
|
||||
memcpy(program->sysvals, ctx->sysvals.sysvals, sizeof(ctx->sysvals.sysvals[0]) * ctx->sysvals.sysval_count);
|
||||
info->bifrost.wait_6 = (first_deps & (1 << 6));
|
||||
info->bifrost.wait_7 = (first_deps & (1 << 7));
|
||||
|
||||
if (bifrost_debug & BIFROST_DBG_SHADERS && !skip_internal) {
|
||||
disassemble_bifrost(stdout, program->compiled.data,
|
||||
program->compiled.size,
|
||||
bifrost_debug & BIFROST_DBG_VERBOSE);
|
||||
disassemble_bifrost(stdout, binary->data, binary->size,
|
||||
bifrost_debug & BIFROST_DBG_VERBOSE);
|
||||
}
|
||||
|
||||
/* Pad the shader with enough zero bytes to trick the prefetcher,
|
||||
|
@ -2641,19 +2634,15 @@ bifrost_compile_shader_nir(void *mem_ctx, nir_shader *nir,
|
|||
* so the size remains 0) */
|
||||
unsigned prefetch_size = BIFROST_SHADER_PREFETCH - final_clause;
|
||||
|
||||
if (program->compiled.size) {
|
||||
memset(util_dynarray_grow(&program->compiled, uint8_t, prefetch_size),
|
||||
if (binary->size) {
|
||||
memset(util_dynarray_grow(binary, uint8_t, prefetch_size),
|
||||
0, prefetch_size);
|
||||
}
|
||||
|
||||
program->tls_size = ctx->tls_size;
|
||||
|
||||
if ((bifrost_debug & BIFROST_DBG_SHADERDB || inputs->shaderdb) &&
|
||||
!skip_internal) {
|
||||
bi_print_stats(ctx, program->compiled.size, stderr);
|
||||
bi_print_stats(ctx, binary->size, stderr);
|
||||
}
|
||||
|
||||
ralloc_free(ctx);
|
||||
|
||||
return program;
|
||||
}
|
||||
|
|
|
@ -28,9 +28,11 @@
|
|||
#include "util/u_dynarray.h"
|
||||
#include "panfrost/util/pan_ir.h"
|
||||
|
||||
panfrost_program *
|
||||
bifrost_compile_shader_nir(void *mem_ctx, nir_shader *nir,
|
||||
const struct panfrost_compile_inputs *inputs);
|
||||
void
|
||||
bifrost_compile_shader_nir(nir_shader *nir,
|
||||
const struct panfrost_compile_inputs *inputs,
|
||||
struct util_dynarray *binary,
|
||||
struct pan_shader_info *info);
|
||||
|
||||
static const nir_shader_compiler_options bifrost_nir_options = {
|
||||
.lower_scmp = true,
|
||||
|
|
|
@ -32,7 +32,7 @@
|
|||
#include "util/u_dynarray.h"
|
||||
#include "bifrost_compile.h"
|
||||
|
||||
static panfrost_program *
|
||||
static void
|
||||
compile_shader(char **argv, bool vertex_only)
|
||||
{
|
||||
struct gl_shader_program *prog;
|
||||
|
@ -53,7 +53,10 @@ compile_shader(char **argv, bool vertex_only)
|
|||
prog = standalone_compile_shader(&options, 2, argv, &local_ctx);
|
||||
prog->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program->info.stage = MESA_SHADER_FRAGMENT;
|
||||
|
||||
panfrost_program *compiled;
|
||||
struct util_dynarray binary;
|
||||
|
||||
util_dynarray_init(&binary, NULL);
|
||||
|
||||
for (unsigned i = 0; i < 2; ++i) {
|
||||
nir[i] = glsl_to_nir(&local_ctx, prog, shader_types[i], &bifrost_nir_options);
|
||||
NIR_PASS_V(nir[i], nir_lower_global_vars_to_local);
|
||||
|
@ -70,14 +73,16 @@ compile_shader(char **argv, bool vertex_only)
|
|||
struct panfrost_compile_inputs inputs = {
|
||||
.gpu_id = 0x7212, /* Mali G52 */
|
||||
};
|
||||
struct pan_shader_info info;
|
||||
|
||||
compiled = bifrost_compile_shader_nir(NULL, nir[i], &inputs);
|
||||
util_dynarray_clear(&binary);
|
||||
bifrost_compile_shader_nir(nir[i], &inputs, &binary, &info);
|
||||
|
||||
if (vertex_only)
|
||||
return compiled;
|
||||
break;
|
||||
}
|
||||
|
||||
return compiled;
|
||||
util_dynarray_fini(&binary);
|
||||
}
|
||||
|
||||
#define BI_FOURCC(ch0, ch1, ch2, ch3) ( \
|
||||
|
|
|
@ -496,17 +496,12 @@ typedef struct bi_block {
|
|||
typedef struct {
|
||||
const struct panfrost_compile_inputs *inputs;
|
||||
nir_shader *nir;
|
||||
struct pan_shader_info *info;
|
||||
gl_shader_stage stage;
|
||||
struct list_head blocks; /* list of bi_block */
|
||||
struct panfrost_sysvals sysvals;
|
||||
struct hash_table_u64 *sysval_to_id;
|
||||
struct panfrost_ubo_push *push;
|
||||
uint32_t quirks;
|
||||
unsigned arch;
|
||||
unsigned tls_size;
|
||||
|
||||
/* Blend return offsets */
|
||||
uint32_t blend_ret_offsets[8];
|
||||
|
||||
/* During NIR->BIR */
|
||||
bi_block *current_block;
|
||||
|
@ -514,7 +509,6 @@ typedef struct {
|
|||
bi_block *break_block;
|
||||
bi_block *continue_block;
|
||||
bool emitted_atest;
|
||||
nir_alu_type *blend_types;
|
||||
|
||||
/* For creating temporaries */
|
||||
unsigned ssa_alloc;
|
||||
|
|
|
@ -43,11 +43,13 @@
|
|||
* This is primarily designed as a fallback for preloads but could be extended
|
||||
* for other clears/blits if needed in the future. */
|
||||
|
||||
static panfrost_program *
|
||||
static void
|
||||
panfrost_build_blit_shader(struct panfrost_device *dev,
|
||||
gl_frag_result loc,
|
||||
nir_alu_type T,
|
||||
bool ms)
|
||||
bool ms,
|
||||
struct util_dynarray *binary,
|
||||
struct pan_shader_info *info)
|
||||
{
|
||||
bool is_colour = loc >= FRAG_RESULT_DATA0;
|
||||
|
||||
|
@ -110,11 +112,9 @@ panfrost_build_blit_shader(struct panfrost_device *dev,
|
|||
.is_blit = true,
|
||||
};
|
||||
|
||||
panfrost_program *program =
|
||||
pan_shader_compile(dev, NULL, shader, &inputs);
|
||||
pan_shader_compile(dev, shader, &inputs, binary, info);
|
||||
|
||||
ralloc_free(shader);
|
||||
return program;
|
||||
}
|
||||
|
||||
/* Compile and upload all possible blit shaders ahead-of-time to reduce draw
|
||||
|
@ -162,6 +162,9 @@ panfrost_init_blit_shaders(struct panfrost_device *dev)
|
|||
/* Don't bother generating multisampling variants if we don't actually
|
||||
* support multisampling */
|
||||
bool has_ms = !(dev->quirks & MIDGARD_SFBD);
|
||||
struct util_dynarray binary;
|
||||
|
||||
util_dynarray_init(&binary, NULL);
|
||||
|
||||
for (unsigned ms = 0; ms <= has_ms; ++ms) {
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(shader_descs); ++i) {
|
||||
|
@ -172,27 +175,38 @@ panfrost_init_blit_shaders(struct panfrost_device *dev)
|
|||
continue;
|
||||
|
||||
struct pan_blit_shader *shader = &dev->blit_shaders.loads[loc][T][ms];
|
||||
panfrost_program *program =
|
||||
panfrost_build_blit_shader(dev, loc,
|
||||
nir_types[T], ms);
|
||||
struct pan_shader_info info;
|
||||
|
||||
assert(offset + program->compiled.size < total_size);
|
||||
util_dynarray_clear(&binary);
|
||||
panfrost_build_blit_shader(dev, loc,
|
||||
nir_types[T], ms,
|
||||
&binary, &info);
|
||||
|
||||
assert(offset + binary.size < total_size);
|
||||
memcpy(dev->blit_shaders.bo->ptr.cpu + offset,
|
||||
program->compiled.data, program->compiled.size);
|
||||
binary.data, binary.size);
|
||||
|
||||
shader->shader = (dev->blit_shaders.bo->ptr.gpu + offset) |
|
||||
program->first_tag;
|
||||
shader->shader = (dev->blit_shaders.bo->ptr.gpu + offset);
|
||||
if (pan_is_bifrost(dev)) {
|
||||
int rt = loc - FRAG_RESULT_DATA0;
|
||||
if (rt >= 0 && rt < 8 &&
|
||||
info.bifrost.blend[rt].return_offset) {
|
||||
shader->blend_ret_addr =
|
||||
shader->shader +
|
||||
info.bifrost.blend[rt].return_offset;
|
||||
}
|
||||
} else {
|
||||
shader->shader |= info.midgard.first_tag;
|
||||
}
|
||||
|
||||
int rt = loc - FRAG_RESULT_DATA0;
|
||||
if (rt >= 0 && rt < 8 && program->blend_ret_offsets[rt])
|
||||
shader->blend_ret_addr = program->blend_ret_offsets[rt] + shader->shader;
|
||||
|
||||
offset += ALIGN_POT(program->compiled.size,
|
||||
offset += ALIGN_POT(binary.size,
|
||||
pan_is_bifrost(dev) ? 128 : 64);
|
||||
ralloc_free(program);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
util_dynarray_fini(&binary);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
@ -37,13 +37,196 @@ pan_shader_get_compiler_options(const struct panfrost_device *dev)
|
|||
return &midgard_nir_options;
|
||||
}
|
||||
|
||||
panfrost_program *
|
||||
pan_shader_compile(const struct panfrost_device *dev,
|
||||
void *mem_ctx, nir_shader *nir,
|
||||
const struct panfrost_compile_inputs *inputs)
|
||||
static enum pipe_format
|
||||
varying_format(nir_alu_type t, unsigned ncomps)
|
||||
{
|
||||
if (pan_is_bifrost(dev))
|
||||
return bifrost_compile_shader_nir(mem_ctx, nir, inputs);
|
||||
#define VARYING_FORMAT(ntype, nsz, ptype, psz) \
|
||||
{ \
|
||||
.type = nir_type_ ## ntype ## nsz, \
|
||||
.formats = { \
|
||||
PIPE_FORMAT_R ## psz ## _ ## ptype, \
|
||||
PIPE_FORMAT_R ## psz ## G ## psz ## _ ## ptype, \
|
||||
PIPE_FORMAT_R ## psz ## G ## psz ## B ## psz ## _ ## ptype, \
|
||||
PIPE_FORMAT_R ## psz ## G ## psz ## B ## psz ## A ## psz ## _ ## ptype, \
|
||||
} \
|
||||
}
|
||||
|
||||
return midgard_compile_shader_nir(mem_ctx, nir, inputs);
|
||||
static const struct {
|
||||
nir_alu_type type;
|
||||
enum pipe_format formats[4];
|
||||
} conv[] = {
|
||||
VARYING_FORMAT(float, 32, FLOAT, 32),
|
||||
VARYING_FORMAT(int, 32, SINT, 32),
|
||||
VARYING_FORMAT(uint, 32, UINT, 32),
|
||||
VARYING_FORMAT(float, 16, FLOAT, 16),
|
||||
VARYING_FORMAT(int, 16, SINT, 16),
|
||||
VARYING_FORMAT(uint, 16, UINT, 16),
|
||||
VARYING_FORMAT(int, 8, SINT, 8),
|
||||
VARYING_FORMAT(uint, 8, UINT, 8),
|
||||
VARYING_FORMAT(bool, 32, UINT, 32),
|
||||
VARYING_FORMAT(bool, 16, UINT, 16),
|
||||
VARYING_FORMAT(bool, 8, UINT, 8),
|
||||
VARYING_FORMAT(bool, 1, UINT, 8),
|
||||
};
|
||||
#undef VARYING_FORMAT
|
||||
|
||||
assert(ncomps > 0 && ncomps <= ARRAY_SIZE(conv[0].formats));
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(conv); i++) {
|
||||
if (conv[i].type == t)
|
||||
return conv[i].formats[ncomps - 1];
|
||||
}
|
||||
|
||||
return PIPE_FORMAT_NONE;
|
||||
}
|
||||
|
||||
static void
|
||||
collect_varyings(nir_shader *s, nir_variable_mode varying_mode,
|
||||
struct pan_shader_varying *varyings,
|
||||
unsigned *varying_count)
|
||||
{
|
||||
*varying_count = 0;
|
||||
|
||||
nir_foreach_variable_with_modes(var, s, varying_mode) {
|
||||
unsigned loc = var->data.driver_location;
|
||||
unsigned sz = glsl_count_attribute_slots(var->type, FALSE);
|
||||
const struct glsl_type *column =
|
||||
glsl_without_array_or_matrix(var->type);
|
||||
unsigned chan = glsl_get_components(column);
|
||||
enum glsl_base_type base_type = glsl_get_base_type(column);
|
||||
|
||||
/* If we have a fractional location added, we need to increase the size
|
||||
* so it will fit, i.e. a vec3 in YZW requires us to allocate a vec4.
|
||||
* We could do better but this is an edge case as it is, normally
|
||||
* packed varyings will be aligned.
|
||||
*/
|
||||
chan += var->data.location_frac;
|
||||
assert(chan >= 1 && chan <= 4);
|
||||
|
||||
nir_alu_type type = nir_get_nir_type_for_glsl_base_type(base_type);
|
||||
|
||||
type = nir_alu_type_get_base_type(type);
|
||||
|
||||
/* Demote to fp16 where possible. int16 varyings are TODO as the hw
|
||||
* will saturate instead of wrap which is not conformant, so we need to
|
||||
* insert i2i16/u2u16 instructions before the st_vary_32i/32u to get
|
||||
* the intended behaviour.
|
||||
*/
|
||||
if (type == nir_type_float &&
|
||||
(var->data.precision == GLSL_PRECISION_MEDIUM ||
|
||||
var->data.precision == GLSL_PRECISION_LOW)) {
|
||||
type |= 16;
|
||||
} else {
|
||||
type |= 32;
|
||||
}
|
||||
|
||||
enum pipe_format format = varying_format(type, chan);
|
||||
assert(format != PIPE_FORMAT_NONE);
|
||||
|
||||
for (int c = 0; c < sz; ++c) {
|
||||
varyings[loc + c].location = var->data.location + c;
|
||||
varyings[loc + c].format = format;
|
||||
}
|
||||
|
||||
*varying_count = MAX2(*varying_count, loc + sz);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
pan_shader_compile(const struct panfrost_device *dev,
|
||||
nir_shader *s,
|
||||
const struct panfrost_compile_inputs *inputs,
|
||||
struct util_dynarray *binary,
|
||||
struct pan_shader_info *info)
|
||||
{
|
||||
memset(info, 0, sizeof(*info));
|
||||
|
||||
if (pan_is_bifrost(dev))
|
||||
bifrost_compile_shader_nir(s, inputs, binary, info);
|
||||
else
|
||||
midgard_compile_shader_nir(s, inputs, binary, info);
|
||||
|
||||
info->stage = s->info.stage;
|
||||
info->contains_barrier = s->info.uses_memory_barrier ||
|
||||
s->info.uses_control_barrier;
|
||||
|
||||
switch (info->stage) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
info->attribute_count = util_bitcount64(s->info.inputs_read);
|
||||
|
||||
bool vertex_id = BITSET_TEST(s->info.system_values_read,
|
||||
SYSTEM_VALUE_VERTEX_ID);
|
||||
if (vertex_id)
|
||||
info->attribute_count = MAX2(info->attribute_count, PAN_VERTEX_ID + 1);
|
||||
|
||||
bool instance_id = BITSET_TEST(s->info.system_values_read,
|
||||
SYSTEM_VALUE_INSTANCE_ID);
|
||||
if (instance_id)
|
||||
info->attribute_count = MAX2(info->attribute_count, PAN_INSTANCE_ID + 1);
|
||||
|
||||
info->vs.writes_point_size =
|
||||
s->info.outputs_written & (1 << VARYING_SLOT_PSIZ);
|
||||
collect_varyings(s, nir_var_shader_out, info->varyings.output,
|
||||
&info->varyings.output_count);
|
||||
break;
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
|
||||
info->fs.writes_depth = true;
|
||||
if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL))
|
||||
info->fs.writes_stencil = true;
|
||||
|
||||
uint64_t outputs_read = s->info.outputs_read;
|
||||
if (outputs_read & BITFIELD64_BIT(FRAG_RESULT_COLOR))
|
||||
outputs_read |= BITFIELD64_BIT(FRAG_RESULT_DATA0);
|
||||
|
||||
info->fs.outputs_read = outputs_read >> FRAG_RESULT_DATA0;
|
||||
|
||||
/* EXT_shader_framebuffer_fetch requires per-sample */
|
||||
info->fs.sample_shading = s->info.fs.uses_sample_shading ||
|
||||
outputs_read;
|
||||
|
||||
info->fs.can_discard = s->info.fs.uses_discard;
|
||||
info->fs.helper_invocations = s->info.fs.needs_quad_helper_invocations;
|
||||
|
||||
/* List of reasons we need to execute frag shaders when things
|
||||
* are masked off */
|
||||
|
||||
info->fs.sidefx = s->info.writes_memory ||
|
||||
s->info.fs.uses_discard ||
|
||||
s->info.fs.uses_demote;
|
||||
info->fs.reads_frag_coord =
|
||||
(s->info.inputs_read & (1 << VARYING_SLOT_POS)) ||
|
||||
BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_FRAG_COORD);
|
||||
info->fs.reads_point_coord =
|
||||
s->info.inputs_read & (1 << VARYING_SLOT_PNTC);
|
||||
info->fs.reads_face =
|
||||
(s->info.inputs_read & (1 << VARYING_SLOT_FACE)) ||
|
||||
BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_FRONT_FACE);
|
||||
info->fs.reads_sample_id =
|
||||
BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID);
|
||||
info->fs.reads_sample_pos =
|
||||
BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS);
|
||||
info->fs.reads_sample_mask_in =
|
||||
BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN);
|
||||
info->fs.reads_helper_invocation =
|
||||
BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_HELPER_INVOCATION);
|
||||
collect_varyings(s, nir_var_shader_in, info->varyings.input,
|
||||
&info->varyings.input_count);
|
||||
break;
|
||||
case MESA_SHADER_COMPUTE:
|
||||
info->wls_size = s->info.cs.shared_size;
|
||||
break;
|
||||
default:
|
||||
unreachable("Unknown shader state");
|
||||
}
|
||||
|
||||
info->outputs_written = s->info.outputs_written;
|
||||
|
||||
/* Sysvals have dedicated UBO */
|
||||
info->ubo_count = s->info.num_ubos + (info->sysvals.sysval_count ? 1 : 0);
|
||||
|
||||
info->attribute_count += util_bitcount(s->info.images_used);
|
||||
info->writes_global = s->info.writes_memory;
|
||||
|
||||
info->texture_count = s->info.num_textures;
|
||||
}
|
||||
|
|
|
@ -33,9 +33,11 @@ struct panfrost_device;
|
|||
const nir_shader_compiler_options *
|
||||
pan_shader_get_compiler_options(const struct panfrost_device *dev);
|
||||
|
||||
panfrost_program *
|
||||
void
|
||||
pan_shader_compile(const struct panfrost_device *dev,
|
||||
void *mem_ctx, nir_shader *nir,
|
||||
const struct panfrost_compile_inputs *inputs);
|
||||
nir_shader *nir,
|
||||
const struct panfrost_compile_inputs *inputs,
|
||||
struct util_dynarray *binary,
|
||||
struct pan_shader_info *info);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -238,6 +238,7 @@ enum midgard_rt_id {
|
|||
typedef struct compiler_context {
|
||||
const struct panfrost_compile_inputs *inputs;
|
||||
nir_shader *nir;
|
||||
struct pan_shader_info *info;
|
||||
gl_shader_stage stage;
|
||||
|
||||
/* Number of samples for a keyed blend shader. Depends on is_blend */
|
||||
|
@ -249,9 +250,6 @@ typedef struct compiler_context {
|
|||
/* Index to precolour to r2 for a dual-source blend colour */
|
||||
unsigned blend_src1;
|
||||
|
||||
/* Number of bytes used for Thread Local Storage */
|
||||
unsigned tls_size;
|
||||
|
||||
/* Count of spills and fills for shaderdb */
|
||||
unsigned spills;
|
||||
unsigned fills;
|
||||
|
@ -291,10 +289,6 @@ typedef struct compiler_context {
|
|||
/* Set of NIR indices that were already emitted as outmods */
|
||||
BITSET_WORD *already_emitted;
|
||||
|
||||
/* Just the count of the max register used. Higher count => higher
|
||||
* register pressure */
|
||||
int work_registers;
|
||||
|
||||
/* The number of uniforms allowable for the fast path */
|
||||
int uniform_cutoff;
|
||||
|
||||
|
@ -312,9 +306,7 @@ typedef struct compiler_context {
|
|||
/* Writeout instructions for each render target */
|
||||
midgard_instruction *writeout_branch[MIDGARD_NUM_RTS][MIDGARD_MAX_SAMPLE_ITER];
|
||||
|
||||
struct panfrost_sysvals sysvals;
|
||||
struct hash_table_u64 *sysval_to_id;
|
||||
struct panfrost_ubo_push *push;
|
||||
} compiler_context;
|
||||
|
||||
/* Per-block live_in/live_out */
|
||||
|
|
|
@ -1448,7 +1448,7 @@ emit_sysval_read(compiler_context *ctx, nir_instr *instr,
|
|||
int sysval = panfrost_sysval_for_instr(instr, &nir_dest);
|
||||
unsigned dest = nir_dest_index(&nir_dest);
|
||||
unsigned uniform =
|
||||
pan_lookup_sysval(ctx->sysval_to_id, &ctx->sysvals, sysval);
|
||||
pan_lookup_sysval(ctx->sysval_to_id, &ctx->info->sysvals, sysval);
|
||||
|
||||
/* Emit the read itself -- this is never indirect */
|
||||
midgard_instruction *ins =
|
||||
|
@ -2978,24 +2978,22 @@ mir_add_writeout_loops(compiler_context *ctx)
|
|||
}
|
||||
}
|
||||
|
||||
panfrost_program *
|
||||
midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
|
||||
const struct panfrost_compile_inputs *inputs)
|
||||
void
|
||||
midgard_compile_shader_nir(nir_shader *nir,
|
||||
const struct panfrost_compile_inputs *inputs,
|
||||
struct util_dynarray *binary,
|
||||
struct pan_shader_info *info)
|
||||
{
|
||||
panfrost_program *program = rzalloc(mem_ctx, panfrost_program);
|
||||
|
||||
struct util_dynarray *compiled = &program->compiled;
|
||||
|
||||
midgard_debug = debug_get_option_midgard_debug();
|
||||
|
||||
/* TODO: Bound against what? */
|
||||
compiler_context *ctx = rzalloc(NULL, compiler_context);
|
||||
ctx->sysval_to_id = panfrost_init_sysvals(&ctx->sysvals, ctx);
|
||||
ctx->sysval_to_id = panfrost_init_sysvals(&info->sysvals, ctx);
|
||||
|
||||
ctx->inputs = inputs;
|
||||
ctx->nir = nir;
|
||||
ctx->info = info;
|
||||
ctx->stage = nir->info.stage;
|
||||
ctx->push = &program->push;
|
||||
|
||||
if (inputs->is_blend) {
|
||||
unsigned nr_samples = MAX2(inputs->blend.nr_samples, 1);
|
||||
|
@ -3013,7 +3011,7 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
|
|||
/* Start off with a safe cutoff, allowing usage of all 16 work
|
||||
* registers. Later, we'll promote uniform reads to uniform registers
|
||||
* if we determine it is beneficial to do so */
|
||||
ctx->uniform_cutoff = 8;
|
||||
info->midgard.uniform_cutoff = 8;
|
||||
|
||||
/* Initialize at a global (not block) level hash tables */
|
||||
|
||||
|
@ -3059,7 +3057,7 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
|
|||
nir_print_shader(nir, stdout);
|
||||
}
|
||||
|
||||
ctx->tls_size = nir->scratch_size;
|
||||
info->tls_size = nir->scratch_size;
|
||||
|
||||
nir_foreach_function(func, nir) {
|
||||
if (!func->impl)
|
||||
|
@ -3086,8 +3084,6 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
|
|||
break; /* TODO: Multi-function shaders */
|
||||
}
|
||||
|
||||
util_dynarray_init(compiled, program);
|
||||
|
||||
/* Per-block lowering before opts */
|
||||
|
||||
mir_foreach_block(ctx, _block) {
|
||||
|
@ -3164,7 +3160,7 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
|
|||
if (!bundle->last_writeout && (current_bundle + 1 < bundle_count))
|
||||
lookahead = source_order_bundles[current_bundle + 1]->tag;
|
||||
|
||||
emit_binary_bundle(ctx, block, bundle, compiled, lookahead);
|
||||
emit_binary_bundle(ctx, block, bundle, binary, lookahead);
|
||||
++current_bundle;
|
||||
}
|
||||
|
||||
|
@ -3175,20 +3171,11 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
|
|||
free(source_order_bundles);
|
||||
|
||||
/* Report the very first tag executed */
|
||||
program->first_tag = midgard_get_first_tag_from_block(ctx, 0);
|
||||
|
||||
/* Deal with off-by-one related to the fencepost problem */
|
||||
program->work_register_count = ctx->work_registers + 1;
|
||||
program->uniform_cutoff = ctx->uniform_cutoff;
|
||||
|
||||
program->tls_size = ctx->tls_size;
|
||||
|
||||
program->sysval_count = ctx->sysvals.sysval_count;
|
||||
memcpy(program->sysvals, ctx->sysvals.sysvals, sizeof(ctx->sysvals.sysvals[0]) * ctx->sysvals.sysval_count);
|
||||
info->midgard.first_tag = midgard_get_first_tag_from_block(ctx, 0);
|
||||
|
||||
if ((midgard_debug & MIDGARD_DBG_SHADERS) && !nir->info.internal) {
|
||||
disassemble_midgard(stdout, program->compiled.data,
|
||||
program->compiled.size, inputs->gpu_id);
|
||||
disassemble_midgard(stdout, binary->data,
|
||||
binary->size, inputs->gpu_id);
|
||||
}
|
||||
|
||||
if ((midgard_debug & MIDGARD_DBG_SHADERDB || inputs->shaderdb) &&
|
||||
|
@ -3209,7 +3196,7 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
|
|||
/* Calculate thread count. There are certain cutoffs by
|
||||
* register count for thread count */
|
||||
|
||||
unsigned nr_registers = program->work_register_count;
|
||||
unsigned nr_registers = info->work_reg_count;
|
||||
|
||||
unsigned nr_threads =
|
||||
(nr_registers <= 4) ? 4 :
|
||||
|
@ -3232,6 +3219,4 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
|
|||
}
|
||||
|
||||
ralloc_free(ctx);
|
||||
|
||||
return program;
|
||||
}
|
||||
|
|
|
@ -29,9 +29,11 @@
|
|||
#include "util/u_dynarray.h"
|
||||
#include "panfrost/util/pan_ir.h"
|
||||
|
||||
panfrost_program *
|
||||
midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
|
||||
const struct panfrost_compile_inputs *inputs);
|
||||
void
|
||||
midgard_compile_shader_nir(nir_shader *nir,
|
||||
const struct panfrost_compile_inputs *inputs,
|
||||
struct util_dynarray *binary,
|
||||
struct pan_shader_info *info);
|
||||
|
||||
/* NIR options are shared between the standalone compiler and the online
|
||||
* compiler. Defining it here is the simplest, though maybe not the Right
|
||||
|
|
|
@ -99,7 +99,7 @@ index_to_reg(compiler_context *ctx, struct lcra_state *l, unsigned reg, unsigned
|
|||
/* Report that we actually use this register, and return it */
|
||||
|
||||
if (r.reg < 16)
|
||||
ctx->work_registers = MAX2(ctx->work_registers, r.reg);
|
||||
ctx->info->work_reg_count = MAX2(ctx->info->work_reg_count, r.reg + 1);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
@ -395,7 +395,7 @@ allocate_registers(compiler_context *ctx, bool *spilled)
|
|||
* uniforms start and the shader stage. By ABI we limit blend shaders
|
||||
* to 8 registers, should be lower XXX */
|
||||
int work_count = ctx->inputs->is_blend ? 8 :
|
||||
16 - MAX2((ctx->uniform_cutoff - 8), 0);
|
||||
16 - MAX2((ctx->info->midgard.uniform_cutoff - 8), 0);
|
||||
|
||||
/* No register allocation to do with no SSA */
|
||||
|
||||
|
@ -646,7 +646,7 @@ allocate_registers(compiler_context *ctx, bool *spilled)
|
|||
if (ctx->blend_src1 != ~0) {
|
||||
assert(ctx->blend_src1 < ctx->temp_count);
|
||||
l->solutions[ctx->blend_src1] = (16 * 2);
|
||||
ctx->work_registers = MAX2(ctx->work_registers, 2);
|
||||
ctx->info->work_reg_count = MAX2(ctx->info->work_reg_count, 3);
|
||||
}
|
||||
|
||||
mir_compute_interference(ctx, l);
|
||||
|
@ -959,13 +959,14 @@ mir_spill_register(
|
|||
static void
|
||||
mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff)
|
||||
{
|
||||
unsigned old_work_count = 16 - MAX2((ctx->uniform_cutoff - 8), 0);
|
||||
unsigned old_work_count =
|
||||
16 - MAX2((ctx->info->midgard.uniform_cutoff - 8), 0);
|
||||
unsigned work_count = 16 - MAX2((new_cutoff - 8), 0);
|
||||
|
||||
unsigned min_demote = SSA_FIXED_REGISTER(old_work_count);
|
||||
unsigned max_demote = SSA_FIXED_REGISTER(work_count);
|
||||
|
||||
ctx->uniform_cutoff = new_cutoff;
|
||||
ctx->info->midgard.uniform_cutoff = new_cutoff;
|
||||
|
||||
mir_foreach_block(ctx, _block) {
|
||||
midgard_block *block = (midgard_block *) _block;
|
||||
|
@ -978,7 +979,7 @@ mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff)
|
|||
|
||||
unsigned temp = make_compiler_temp(ctx);
|
||||
unsigned idx = (23 - SSA_REG_FROM_FIXED(ins->src[i])) * 4;
|
||||
assert(idx < ctx->push->count);
|
||||
assert(idx < ctx->info->push.count);
|
||||
|
||||
midgard_instruction ld = {
|
||||
.type = TAG_LOAD_STORE_4,
|
||||
|
@ -989,10 +990,10 @@ mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff)
|
|||
.swizzle = SWIZZLE_IDENTITY_4,
|
||||
.op = midgard_op_ld_ubo_int4,
|
||||
.load_store = {
|
||||
.arg_1 = ctx->push->words[idx].ubo,
|
||||
.arg_1 = ctx->info->push.words[idx].ubo,
|
||||
.arg_2 = 0x1E,
|
||||
},
|
||||
.constants.u32[0] = ctx->push->words[idx].offset
|
||||
.constants.u32[0] = ctx->info->push.words[idx].offset
|
||||
};
|
||||
|
||||
mir_insert_instruction_before_scheduled(ctx, block, before, ld);
|
||||
|
@ -1013,7 +1014,7 @@ mir_ra(compiler_context *ctx)
|
|||
int iter_count = 1000; /* max iterations */
|
||||
|
||||
/* Number of 128-bit slots in memory we've spilled into */
|
||||
unsigned spill_count = DIV_ROUND_UP(ctx->tls_size, 16);
|
||||
unsigned spill_count = DIV_ROUND_UP(ctx->info->tls_size, 16);
|
||||
|
||||
|
||||
mir_create_pipeline_registers(ctx);
|
||||
|
@ -1025,9 +1026,9 @@ mir_ra(compiler_context *ctx)
|
|||
/* It's a lot cheaper to demote uniforms to get more
|
||||
* work registers than to spill to TLS. */
|
||||
if (l->spill_class == REG_CLASS_WORK &&
|
||||
ctx->uniform_cutoff > 8) {
|
||||
ctx->info->midgard.uniform_cutoff > 8) {
|
||||
|
||||
mir_demote_uniforms(ctx, MAX2(ctx->uniform_cutoff - 4, 8));
|
||||
mir_demote_uniforms(ctx, MAX2(ctx->info->midgard.uniform_cutoff - 4, 8));
|
||||
} else if (spill_node == -1) {
|
||||
fprintf(stderr, "ERROR: Failed to choose spill node\n");
|
||||
lcra_free(l);
|
||||
|
@ -1056,7 +1057,7 @@ mir_ra(compiler_context *ctx)
|
|||
/* Report spilling information. spill_count is in 128-bit slots (vec4 x
|
||||
* fp32), but tls_size is in bytes, so multiply by 16 */
|
||||
|
||||
ctx->tls_size = spill_count * 16;
|
||||
ctx->info->tls_size = spill_count * 16;
|
||||
|
||||
install_registers(ctx, l);
|
||||
|
||||
|
|
|
@ -263,7 +263,7 @@ midgard_promote_uniforms(compiler_context *ctx)
|
|||
unsigned work_count = mir_work_heuristic(ctx, &analysis);
|
||||
unsigned promoted_count = 24 - work_count;
|
||||
|
||||
mir_pick_ubo(ctx->push, &analysis, promoted_count);
|
||||
mir_pick_ubo(&ctx->info->push, &analysis, promoted_count);
|
||||
|
||||
/* First, figure out special indices a priori so we don't recompute a lot */
|
||||
BITSET_WORD *special = mir_special_indices(ctx);
|
||||
|
@ -279,7 +279,7 @@ midgard_promote_uniforms(compiler_context *ctx)
|
|||
if (!BITSET_TEST(analysis.blocks[ubo].pushed, qword)) continue;
|
||||
|
||||
/* Find where we pushed to, TODO: unaligned pushes to pack */
|
||||
unsigned base = pan_lookup_pushed_ubo(ctx->push, ubo, qword * 16);
|
||||
unsigned base = pan_lookup_pushed_ubo(&ctx->info->push, ubo, qword * 16);
|
||||
assert((base & 0x3) == 0);
|
||||
|
||||
unsigned address = base / 4;
|
||||
|
@ -288,7 +288,8 @@ midgard_promote_uniforms(compiler_context *ctx)
|
|||
/* Should've taken into account when pushing */
|
||||
assert(address < promoted_count);
|
||||
|
||||
ctx->uniform_cutoff = MAX2(ctx->uniform_cutoff, address + 1);
|
||||
ctx->info->midgard.uniform_cutoff =
|
||||
MAX2(ctx->info->midgard.uniform_cutoff, address + 1);
|
||||
unsigned promoted = SSA_FIXED_REGISTER(uniform_reg);
|
||||
|
||||
/* We do need the move for safety for a non-SSA dest, or if
|
||||
|
|
|
@ -115,40 +115,6 @@ pan_lookup_sysval(struct hash_table_u64 *sysval_to_id,
|
|||
int
|
||||
panfrost_sysval_for_instr(nir_instr *instr, nir_dest *dest);
|
||||
|
||||
typedef struct {
|
||||
int work_register_count;
|
||||
int uniform_cutoff;
|
||||
|
||||
/* For Bifrost - output type for each RT */
|
||||
nir_alu_type blend_types[8];
|
||||
|
||||
/* For Bifrost - return address for blend instructions */
|
||||
uint32_t blend_ret_offsets[8];
|
||||
|
||||
/* Prepended before uniforms, mapping to SYSVAL_ names for the
|
||||
* sysval */
|
||||
|
||||
unsigned sysval_count;
|
||||
unsigned sysvals[MAX_SYSVAL_COUNT];
|
||||
|
||||
/* UBOs to push to Register Mapped Uniforms (Midgard) or Fast Access
|
||||
* Uniforms (Bifrost) */
|
||||
struct panfrost_ubo_push push;
|
||||
|
||||
int first_tag;
|
||||
|
||||
struct util_dynarray compiled;
|
||||
|
||||
/* The number of bytes to allocate per-thread for Thread Local Storage
|
||||
* (register spilling), or zero if no spilling is used */
|
||||
unsigned tls_size;
|
||||
|
||||
/* For Bifrost, should the program wait on dependency slots 6/7 before
|
||||
* starting? For ATEST/BLEND in the first clause, which can occur with
|
||||
* extremely simple shaders */
|
||||
bool wait_6, wait_7;
|
||||
} panfrost_program;
|
||||
|
||||
struct panfrost_compile_inputs {
|
||||
unsigned gpu_id;
|
||||
bool is_blend, is_blit;
|
||||
|
@ -163,6 +129,82 @@ struct panfrost_compile_inputs {
|
|||
enum pipe_format rt_formats[8];
|
||||
};
|
||||
|
||||
struct pan_shader_varying {
|
||||
gl_varying_slot location;
|
||||
enum pipe_format format;
|
||||
};
|
||||
|
||||
struct bifrost_shader_blend_info {
|
||||
nir_alu_type type;
|
||||
uint32_t return_offset;
|
||||
};
|
||||
|
||||
struct bifrost_shader_info {
|
||||
struct bifrost_shader_blend_info blend[8];
|
||||
bool wait_6, wait_7;
|
||||
};
|
||||
|
||||
struct midgard_shader_info {
|
||||
unsigned uniform_cutoff;
|
||||
unsigned first_tag;
|
||||
};
|
||||
|
||||
struct pan_shader_info {
|
||||
gl_shader_stage stage;
|
||||
unsigned work_reg_count;
|
||||
unsigned tls_size;
|
||||
unsigned wls_size;
|
||||
|
||||
union {
|
||||
struct {
|
||||
bool reads_frag_coord;
|
||||
bool reads_point_coord;
|
||||
bool reads_face;
|
||||
bool helper_invocations;
|
||||
bool can_discard;
|
||||
bool writes_depth;
|
||||
bool writes_stencil;
|
||||
bool sidefx;
|
||||
bool reads_sample_id;
|
||||
bool reads_sample_pos;
|
||||
bool reads_sample_mask_in;
|
||||
bool reads_helper_invocation;
|
||||
bool sample_shading;
|
||||
BITSET_WORD outputs_read;
|
||||
} fs;
|
||||
|
||||
struct {
|
||||
bool writes_point_size;
|
||||
} vs;
|
||||
};
|
||||
|
||||
bool contains_barrier;
|
||||
bool writes_global;
|
||||
uint64_t outputs_written;
|
||||
|
||||
unsigned texture_count;
|
||||
unsigned ubo_count;
|
||||
unsigned attribute_count;
|
||||
|
||||
struct {
|
||||
unsigned input_count;
|
||||
struct pan_shader_varying input[MAX_VARYING];
|
||||
unsigned output_count;
|
||||
struct pan_shader_varying output[MAX_VARYING];
|
||||
} varyings;
|
||||
|
||||
struct panfrost_sysvals sysvals;
|
||||
|
||||
/* UBOs to push to Register Mapped Uniforms (Midgard) or Fast Access
|
||||
* Uniforms (Bifrost) */
|
||||
struct panfrost_ubo_push push;
|
||||
|
||||
union {
|
||||
struct bifrost_shader_info bifrost;
|
||||
struct midgard_shader_info midgard;
|
||||
};
|
||||
};
|
||||
|
||||
typedef struct pan_block {
|
||||
/* Link to next block. Must be first for mir_get_block */
|
||||
struct list_head link;
|
||||
|
|
Loading…
Reference in New Issue