panfrost: Prepare shader_meta descriptors at emission time

This way we avoid potential state leaks and keep the shader_meta
initialization in once place. The time spent preparing the shader
descriptors should be negligible compared to the time spent pushing
those descriptors to the transient buffer (remember we are writing to
non-cacheable memory here).

Note that we might get back to some sort of shader_meta descriptor
caching at some point if that proves necessary, but now we have those
panfrost_frag_meta_xxx_update() helpers now where xxx maps directly to
a CSO bind, which should ease desc template updates.

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4083>
This commit is contained in:
Boris Brezillon 2020-03-05 16:20:18 +01:00
parent 55e014336f
commit b02f97c875
7 changed files with 388 additions and 390 deletions

View File

@ -37,14 +37,12 @@
#include "tgsi/tgsi_dump.h"
void
panfrost_shader_compile(
struct panfrost_context *ctx,
struct mali_shader_meta *meta,
enum pipe_shader_ir ir_type,
const void *ir,
gl_shader_stage stage,
struct panfrost_shader_state *state,
uint64_t *outputs_written)
panfrost_shader_compile(struct panfrost_context *ctx,
enum pipe_shader_ir ir_type,
const void *ir,
gl_shader_stage stage,
struct panfrost_shader_state *state,
uint64_t *outputs_written)
{
struct panfrost_screen *screen = pan_screen(ctx->base.screen);
uint8_t *dst;
@ -80,11 +78,9 @@ panfrost_shader_compile(
if (size) {
state->bo = panfrost_bo_create(screen, size, PAN_BO_EXECUTE);
memcpy(state->bo->cpu, dst, size);
meta->shader = state->bo->gpu | program.first_tag;
state->first_tag = program.first_tag;
} else {
/* No shader. Use dummy tag to avoid INSTR_INVALID_ENC */
meta->shader = 0x0 | 1;
state->first_tag = 1;
}
@ -95,9 +91,6 @@ panfrost_shader_compile(
state->sysval_count = program.sysval_count;
memcpy(state->sysval, program.sysvals, sizeof(state->sysval[0]) * state->sysval_count);
meta->midgard1.uniform_count = MIN2(program.uniform_count, program.uniform_cutoff);
meta->midgard1.work_count = program.work_register_count;
bool vertex_id = s->info.system_values_read & (1 << SYSTEM_VALUE_VERTEX_ID);
bool instance_id = s->info.system_values_read & (1 << SYSTEM_VALUE_INSTANCE_ID);
@ -145,10 +138,6 @@ panfrost_shader_compile(
state->uniform_cutoff = program.uniform_cutoff;
state->work_reg_count = program.work_register_count;
meta->attribute_count = state->attribute_count;
meta->varying_count = state->varying_count;
meta->midgard1.flags_hi = 8; /* XXX */
unsigned default_vec1_swizzle = panfrost_get_default_swizzle(1);
unsigned default_vec2_swizzle = panfrost_get_default_swizzle(2);
unsigned default_vec4_swizzle = panfrost_get_default_swizzle(4);

View File

@ -144,17 +144,12 @@ panfrost_bind_blend_state(struct pipe_context *pipe,
void *cso)
{
struct panfrost_context *ctx = pan_context(pipe);
struct panfrost_screen *screen = pan_screen(ctx->base.screen);
struct pipe_blend_state *blend = (struct pipe_blend_state *) cso;
struct panfrost_blend_state *pblend = (struct panfrost_blend_state *) cso;
ctx->blend = pblend;
if (!blend)
return;
if (screen->quirks & MIDGARD_SFBD) {
SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_DITHER, !blend->dither);
}
}
static void

View File

@ -99,6 +99,333 @@ panfrost_vt_update_occlusion_query(struct panfrost_context *ctx,
tp->postfix.occlusion_counter = 0;
}
static void
panfrost_shader_meta_init(struct panfrost_context *ctx,
enum pipe_shader_type st,
struct mali_shader_meta *meta)
{
struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, st);
memset(meta, 0, sizeof(*meta));
meta->shader = (ss->bo ? ss->bo->gpu : 0) | ss->first_tag;
meta->midgard1.uniform_count = MIN2(ss->uniform_count,
ss->uniform_cutoff);
meta->midgard1.work_count = ss->work_reg_count;
meta->attribute_count = ss->attribute_count;
meta->varying_count = ss->varying_count;
meta->midgard1.flags_hi = 0x8; /* XXX */
meta->midgard1.flags_lo = 0x220;
meta->texture_count = ctx->sampler_view_count[st];
meta->sampler_count = ctx->sampler_count[st];
meta->midgard1.uniform_buffer_count = panfrost_ubo_count(ctx, st);
}
unsigned
panfrost_translate_compare_func(enum pipe_compare_func in)
{
switch (in) {
case PIPE_FUNC_NEVER:
return MALI_FUNC_NEVER;
case PIPE_FUNC_LESS:
return MALI_FUNC_LESS;
case PIPE_FUNC_EQUAL:
return MALI_FUNC_EQUAL;
case PIPE_FUNC_LEQUAL:
return MALI_FUNC_LEQUAL;
case PIPE_FUNC_GREATER:
return MALI_FUNC_GREATER;
case PIPE_FUNC_NOTEQUAL:
return MALI_FUNC_NOTEQUAL;
case PIPE_FUNC_GEQUAL:
return MALI_FUNC_GEQUAL;
case PIPE_FUNC_ALWAYS:
return MALI_FUNC_ALWAYS;
default:
unreachable("Invalid func");
}
}
static unsigned
panfrost_translate_stencil_op(enum pipe_stencil_op in)
{
switch (in) {
case PIPE_STENCIL_OP_KEEP:
return MALI_STENCIL_KEEP;
case PIPE_STENCIL_OP_ZERO:
return MALI_STENCIL_ZERO;
case PIPE_STENCIL_OP_REPLACE:
return MALI_STENCIL_REPLACE;
case PIPE_STENCIL_OP_INCR:
return MALI_STENCIL_INCR;
case PIPE_STENCIL_OP_DECR:
return MALI_STENCIL_DECR;
case PIPE_STENCIL_OP_INCR_WRAP:
return MALI_STENCIL_INCR_WRAP;
case PIPE_STENCIL_OP_DECR_WRAP:
return MALI_STENCIL_DECR_WRAP;
case PIPE_STENCIL_OP_INVERT:
return MALI_STENCIL_INVERT;
default:
unreachable("Invalid stencil op");
}
}
static void
panfrost_make_stencil_state(const struct pipe_stencil_state *in,
struct mali_stencil_test *out)
{
out->ref = 0; /* Gallium gets it from elsewhere */
out->mask = in->valuemask;
out->func = panfrost_translate_compare_func(in->func);
out->sfail = panfrost_translate_stencil_op(in->fail_op);
out->dpfail = panfrost_translate_stencil_op(in->zfail_op);
out->dppass = panfrost_translate_stencil_op(in->zpass_op);
}
static void
panfrost_frag_meta_rasterizer_update(struct panfrost_context *ctx,
struct mali_shader_meta *fragmeta)
{
if (!ctx->rasterizer) {
SET_BIT(fragmeta->unknown2_4, MALI_NO_MSAA, true);
SET_BIT(fragmeta->unknown2_3, MALI_HAS_MSAA, false);
fragmeta->depth_units = 0.0f;
fragmeta->depth_factor = 0.0f;
SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_A, false);
SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_B, false);
return;
}
bool msaa = ctx->rasterizer->base.multisample;
/* TODO: Sample size */
SET_BIT(fragmeta->unknown2_3, MALI_HAS_MSAA, msaa);
SET_BIT(fragmeta->unknown2_4, MALI_NO_MSAA, !msaa);
fragmeta->depth_units = ctx->rasterizer->base.offset_units * 2.0f;
fragmeta->depth_factor = ctx->rasterizer->base.offset_scale;
/* XXX: Which bit is which? Does this maybe allow offseting not-tri? */
SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_A,
ctx->rasterizer->base.offset_tri);
SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_B,
ctx->rasterizer->base.offset_tri);
}
static void
panfrost_frag_meta_zsa_update(struct panfrost_context *ctx,
struct mali_shader_meta *fragmeta)
{
const struct pipe_depth_stencil_alpha_state *zsa = ctx->depth_stencil;
int zfunc = PIPE_FUNC_ALWAYS;
if (!zsa) {
struct pipe_stencil_state default_stencil = {
.enabled = 0,
.func = PIPE_FUNC_ALWAYS,
.fail_op = MALI_STENCIL_KEEP,
.zfail_op = MALI_STENCIL_KEEP,
.zpass_op = MALI_STENCIL_KEEP,
.writemask = 0xFF,
.valuemask = 0xFF
};
panfrost_make_stencil_state(&default_stencil,
&fragmeta->stencil_front);
fragmeta->stencil_mask_front = default_stencil.writemask;
fragmeta->stencil_back = fragmeta->stencil_front;
fragmeta->stencil_mask_back = default_stencil.writemask;
SET_BIT(fragmeta->unknown2_4, MALI_STENCIL_TEST, false);
SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_WRITEMASK, false);
} else {
SET_BIT(fragmeta->unknown2_4, MALI_STENCIL_TEST,
zsa->stencil[0].enabled);
panfrost_make_stencil_state(&zsa->stencil[0],
&fragmeta->stencil_front);
fragmeta->stencil_mask_front = zsa->stencil[0].writemask;
fragmeta->stencil_front.ref = ctx->stencil_ref.ref_value[0];
/* If back-stencil is not enabled, use the front values */
if (zsa->stencil[1].enabled) {
panfrost_make_stencil_state(&zsa->stencil[1],
&fragmeta->stencil_back);
fragmeta->stencil_mask_back = zsa->stencil[1].writemask;
fragmeta->stencil_back.ref = ctx->stencil_ref.ref_value[1];
} else {
fragmeta->stencil_back = fragmeta->stencil_front;
fragmeta->stencil_mask_back = fragmeta->stencil_mask_front;
fragmeta->stencil_back.ref = fragmeta->stencil_front.ref;
}
if (zsa->depth.enabled)
zfunc = zsa->depth.func;
/* Depth state (TODO: Refactor) */
SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_WRITEMASK,
zsa->depth.writemask);
}
fragmeta->unknown2_3 &= ~MALI_DEPTH_FUNC_MASK;
fragmeta->unknown2_3 |= MALI_DEPTH_FUNC(panfrost_translate_compare_func(zfunc));
}
static void
panfrost_frag_meta_blend_update(struct panfrost_context *ctx,
struct mali_shader_meta *fragmeta,
struct midgard_blend_rt *rts)
{
const struct panfrost_screen *screen = pan_screen(ctx->base.screen);
SET_BIT(fragmeta->unknown2_4, MALI_NO_DITHER,
(screen->quirks & MIDGARD_SFBD) && ctx->blend &&
!ctx->blend->base.dither);
/* Get blending setup */
unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
struct panfrost_blend_final blend[PIPE_MAX_COLOR_BUFS];
unsigned shader_offset = 0;
struct panfrost_bo *shader_bo = NULL;
for (unsigned c = 0; c < rt_count; ++c)
blend[c] = panfrost_get_blend_for_context(ctx, c, &shader_bo,
&shader_offset);
/* If there is a blend shader, work registers are shared. XXX: opt */
for (unsigned c = 0; c < rt_count; ++c) {
if (blend[c].is_shader)
fragmeta->midgard1.work_count = 16;
}
/* Even on MFBD, the shader descriptor gets blend shaders. It's *also*
* copied to the blend_meta appended (by convention), but this is the
* field actually read by the hardware. (Or maybe both are read...?).
* Specify the last RTi with a blend shader. */
fragmeta->blend.shader = 0;
for (signed rt = (rt_count - 1); rt >= 0; --rt) {
if (!blend[rt].is_shader)
continue;
fragmeta->blend.shader = blend[rt].shader.gpu |
blend[rt].shader.first_tag;
break;
}
if (screen->quirks & MIDGARD_SFBD) {
/* When only a single render target platform is used, the blend
* information is inside the shader meta itself. We additionally
* need to signal CAN_DISCARD for nontrivial blend modes (so
* we're able to read back the destination buffer) */
SET_BIT(fragmeta->unknown2_3, MALI_HAS_BLEND_SHADER,
blend[0].is_shader);
if (!blend[0].is_shader) {
fragmeta->blend.equation = *blend[0].equation.equation;
fragmeta->blend.constant = blend[0].equation.constant;
}
SET_BIT(fragmeta->unknown2_3, MALI_CAN_DISCARD,
!blend[0].no_blending);
return;
}
/* Additional blend descriptor tacked on for jobs using MFBD */
for (unsigned i = 0; i < rt_count; ++i) {
rts[i].flags = 0x200;
bool is_srgb = (ctx->pipe_framebuffer.nr_cbufs > i) &&
(ctx->pipe_framebuffer.cbufs[i]) &&
util_format_is_srgb(ctx->pipe_framebuffer.cbufs[i]->format);
SET_BIT(rts[i].flags, MALI_BLEND_MRT_SHADER, blend[i].is_shader);
SET_BIT(rts[i].flags, MALI_BLEND_LOAD_TIB, !blend[i].no_blending);
SET_BIT(rts[i].flags, MALI_BLEND_SRGB, is_srgb);
SET_BIT(rts[i].flags, MALI_BLEND_NO_DITHER, !ctx->blend->base.dither);
if (blend[i].is_shader) {
rts[i].blend.shader = blend[i].shader.gpu | blend[i].shader.first_tag;
} else {
rts[i].blend.equation = *blend[i].equation.equation;
rts[i].blend.constant = blend[i].equation.constant;
}
}
}
static void
panfrost_frag_shader_meta_init(struct panfrost_context *ctx,
struct mali_shader_meta *fragmeta,
struct midgard_blend_rt *rts)
{
const struct panfrost_screen *screen = pan_screen(ctx->base.screen);
struct panfrost_shader_state *fs;
fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
fragmeta->alpha_coverage = ~MALI_ALPHA_COVERAGE(0.000000);
fragmeta->unknown2_3 = MALI_DEPTH_FUNC(MALI_FUNC_ALWAYS) | 0x3010;
fragmeta->unknown2_4 = 0x4e0;
/* unknown2_4 has 0x10 bit set on T6XX and T720. We don't know why this
* is required (independent of 32-bit/64-bit descriptors), or why it's
* not used on later GPU revisions. Otherwise, all shader jobs fault on
* these earlier chips (perhaps this is a chicken bit of some kind).
* More investigation is needed. */
SET_BIT(fragmeta->unknown2_4, 0x10, screen->quirks & MIDGARD_SFBD);
/* Depending on whether it's legal to in the given shader, we try to
* enable early-z testing (or forward-pixel kill?) */
SET_BIT(fragmeta->midgard1.flags_lo, MALI_EARLY_Z,
!fs->can_discard && !fs->writes_depth);
/* Add the writes Z/S flags if needed. */
SET_BIT(fragmeta->midgard1.flags_lo, MALI_WRITES_Z, fs->writes_depth);
SET_BIT(fragmeta->midgard1.flags_hi, MALI_WRITES_S, fs->writes_stencil);
/* Any time texturing is used, derivatives are implicitly calculated,
* so we need to enable helper invocations */
SET_BIT(fragmeta->midgard1.flags_lo, MALI_HELPER_INVOCATIONS,
fs->helper_invocations);
/* CAN_DISCARD should be set if the fragment shader possibly contains a
* 'discard' instruction. It is likely this is related to optimizations
* related to forward-pixel kill, as per "Mali Performance 3: Is
* EGL_BUFFER_PRESERVED a good thing?" by Peter Harris */
SET_BIT(fragmeta->unknown2_3, MALI_CAN_DISCARD, fs->can_discard);
SET_BIT(fragmeta->midgard1.flags_lo, 0x400, fs->can_discard);
panfrost_frag_meta_rasterizer_update(ctx, fragmeta);
panfrost_frag_meta_zsa_update(ctx, fragmeta);
panfrost_frag_meta_blend_update(ctx, fragmeta, rts);
}
void
panfrost_emit_shader_meta(struct panfrost_batch *batch,
enum pipe_shader_type st,
@ -112,14 +439,44 @@ panfrost_emit_shader_meta(struct panfrost_batch *batch,
return;
}
struct mali_shader_meta meta;
panfrost_shader_meta_init(ctx, st, &meta);
/* Add the shader BO to the batch. */
panfrost_batch_add_bo(batch, ss->bo,
PAN_BO_ACCESS_PRIVATE |
PAN_BO_ACCESS_READ |
panfrost_bo_access_for_stage(st));
vtp->postfix.shader = panfrost_upload_transient(batch, ss->tripipe,
sizeof(*ss->tripipe));
mali_ptr shader_ptr;
if (st == PIPE_SHADER_FRAGMENT) {
struct panfrost_screen *screen = pan_screen(ctx->base.screen);
unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
size_t desc_size = sizeof(meta);
struct midgard_blend_rt rts[4];
struct panfrost_transfer xfer;
assert(rt_count <= ARRAY_SIZE(rts));
panfrost_frag_shader_meta_init(ctx, &meta, rts);
if (!(screen->quirks & MIDGARD_SFBD))
desc_size += sizeof(*rts) * rt_count;
xfer = panfrost_allocate_transient(batch, desc_size);
memcpy(xfer.cpu, &meta, sizeof(meta));
memcpy(xfer.cpu + sizeof(meta), rts, sizeof(*rts) * rt_count);
shader_ptr = xfer.gpu;
} else {
shader_ptr = panfrost_upload_transient(batch, &meta,
sizeof(meta));
}
vtp->postfix.shader = shader_ptr;
}
static void

View File

@ -32,6 +32,9 @@
#include "pan_job.h"
unsigned
panfrost_translate_compare_func(enum pipe_compare_func in);
void
panfrost_vt_attach_framebuffer(struct panfrost_context *ctx,
struct midgard_payload_vertex_tiler *vt);

View File

@ -54,9 +54,6 @@ panfrost_create_compute_state(
so->variant_count = 1;
so->active_variant = 0;
/* calloc, instead of malloc - to zero unused fields */
v->tripipe = CALLOC_STRUCT(mali_shader_meta);
if (cso->ir_type == PIPE_SHADER_IR_NIR_SERIALIZED) {
struct blob_reader reader;
const struct pipe_binary_program_header *hdr = cso->prog;
@ -66,9 +63,8 @@ panfrost_create_compute_state(
so->cbase.ir_type = PIPE_SHADER_IR_NIR;
}
panfrost_shader_compile(ctx, v->tripipe,
so->cbase.ir_type, so->cbase.prog,
MESA_SHADER_COMPUTE, v, NULL);
panfrost_shader_compile(ctx, so->cbase.ir_type, so->cbase.prog,
MESA_SHADER_COMPUTE, v, NULL);
return so;
}
@ -121,7 +117,6 @@ panfrost_launch_grid(struct pipe_context *pipe,
if (info->input)
pipe->set_constant_buffer(pipe, PIPE_SHADER_COMPUTE, 0, &ubuf);
panfrost_patch_shader_state(ctx, PIPE_SHADER_COMPUTE);
panfrost_emit_shader_meta(batch, PIPE_SHADER_COMPUTE, payload);
panfrost_emit_const_buf(batch, PIPE_SHADER_COMPUTE, payload);
panfrost_emit_shared_memory(batch, info, payload);

View File

@ -207,126 +207,6 @@ translate_tex_wrap(enum pipe_tex_wrap w)
}
}
static unsigned
panfrost_translate_compare_func(enum pipe_compare_func in)
{
switch (in) {
case PIPE_FUNC_NEVER:
return MALI_FUNC_NEVER;
case PIPE_FUNC_LESS:
return MALI_FUNC_LESS;
case PIPE_FUNC_EQUAL:
return MALI_FUNC_EQUAL;
case PIPE_FUNC_LEQUAL:
return MALI_FUNC_LEQUAL;
case PIPE_FUNC_GREATER:
return MALI_FUNC_GREATER;
case PIPE_FUNC_NOTEQUAL:
return MALI_FUNC_NOTEQUAL;
case PIPE_FUNC_GEQUAL:
return MALI_FUNC_GEQUAL;
case PIPE_FUNC_ALWAYS:
return MALI_FUNC_ALWAYS;
default:
unreachable("Invalid func");
}
}
static unsigned
panfrost_translate_stencil_op(enum pipe_stencil_op in)
{
switch (in) {
case PIPE_STENCIL_OP_KEEP:
return MALI_STENCIL_KEEP;
case PIPE_STENCIL_OP_ZERO:
return MALI_STENCIL_ZERO;
case PIPE_STENCIL_OP_REPLACE:
return MALI_STENCIL_REPLACE;
case PIPE_STENCIL_OP_INCR:
return MALI_STENCIL_INCR;
case PIPE_STENCIL_OP_DECR:
return MALI_STENCIL_DECR;
case PIPE_STENCIL_OP_INCR_WRAP:
return MALI_STENCIL_INCR_WRAP;
case PIPE_STENCIL_OP_DECR_WRAP:
return MALI_STENCIL_DECR_WRAP;
case PIPE_STENCIL_OP_INVERT:
return MALI_STENCIL_INVERT;
default:
unreachable("Invalid stencil op");
}
}
static void
panfrost_make_stencil_state(const struct pipe_stencil_state *in, struct mali_stencil_test *out)
{
out->ref = 0; /* Gallium gets it from elsewhere */
out->mask = in->valuemask;
out->func = panfrost_translate_compare_func(in->func);
out->sfail = panfrost_translate_stencil_op(in->fail_op);
out->dpfail = panfrost_translate_stencil_op(in->zfail_op);
out->dppass = panfrost_translate_stencil_op(in->zpass_op);
}
static void
panfrost_default_shader_backend(struct panfrost_context *ctx)
{
struct panfrost_screen *screen = pan_screen(ctx->base.screen);
struct mali_shader_meta shader = {
.alpha_coverage = ~MALI_ALPHA_COVERAGE(0.000000),
.unknown2_3 = MALI_DEPTH_FUNC(MALI_FUNC_ALWAYS) | 0x3010,
.unknown2_4 = MALI_NO_MSAA | 0x4e0,
};
/* unknown2_4 has 0x10 bit set on T6XX and T720. We don't know why this is
* required (independent of 32-bit/64-bit descriptors), or why it's not
* used on later GPU revisions. Otherwise, all shader jobs fault on
* these earlier chips (perhaps this is a chicken bit of some kind).
* More investigation is needed. */
if (screen->quirks & MIDGARD_SFBD)
shader.unknown2_4 |= 0x10;
struct pipe_stencil_state default_stencil = {
.enabled = 0,
.func = PIPE_FUNC_ALWAYS,
.fail_op = MALI_STENCIL_KEEP,
.zfail_op = MALI_STENCIL_KEEP,
.zpass_op = MALI_STENCIL_KEEP,
.writemask = 0xFF,
.valuemask = 0xFF
};
panfrost_make_stencil_state(&default_stencil, &shader.stencil_front);
shader.stencil_mask_front = default_stencil.writemask;
panfrost_make_stencil_state(&default_stencil, &shader.stencil_back);
shader.stencil_mask_back = default_stencil.writemask;
if (default_stencil.enabled)
shader.unknown2_4 |= MALI_STENCIL_TEST;
memcpy(&ctx->fragment_shader_core, &shader, sizeof(shader));
}
bool
panfrost_writes_point_size(struct panfrost_context *ctx)
{
@ -503,33 +383,12 @@ panfrost_ubo_count(struct panfrost_context *ctx, enum pipe_shader_type stage)
return 32 - __builtin_clz(mask);
}
/* Fixes up a shader state with current state */
void
panfrost_patch_shader_state(struct panfrost_context *ctx,
enum pipe_shader_type stage)
{
struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, stage);
if (!ss)
return;
ss->tripipe->texture_count = ctx->sampler_view_count[stage];
ss->tripipe->sampler_count = ctx->sampler_count[stage];
ss->tripipe->midgard1.flags_lo = 0x220;
unsigned ubo_count = panfrost_ubo_count(ctx, stage);
ss->tripipe->midgard1.uniform_buffer_count = ubo_count;
}
/* Go through dirty flags and actualise them in the cmdstream. */
static void
panfrost_emit_for_draw(struct panfrost_context *ctx)
{
struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
struct panfrost_screen *screen = pan_screen(ctx->base.screen);
panfrost_batch_add_fbo_bos(batch);
@ -542,166 +401,15 @@ panfrost_emit_for_draw(struct panfrost_context *ctx)
unsigned total_count = ctx->padded_count * ctx->instance_count;
panfrost_emit_varying_descriptor(ctx, total_count);
if (ctx->rasterizer) {
bool msaa = ctx->rasterizer->base.multisample;
/* TODO: Sample size */
SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_HAS_MSAA, msaa);
SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_MSAA, !msaa);
}
panfrost_batch_set_requirements(batch);
panfrost_vt_update_rasterizer(ctx, &ctx->payloads[PIPE_SHADER_FRAGMENT]);
panfrost_vt_update_occlusion_query(ctx, &ctx->payloads[PIPE_SHADER_FRAGMENT]);
panfrost_patch_shader_state(ctx, PIPE_SHADER_VERTEX);
panfrost_emit_shader_meta(batch, PIPE_SHADER_VERTEX,
&ctx->payloads[PIPE_SHADER_VERTEX]);
if (ctx->shader[PIPE_SHADER_FRAGMENT]) {
struct panfrost_shader_state *variant = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
panfrost_patch_shader_state(ctx, PIPE_SHADER_FRAGMENT);
#define COPY(name) ctx->fragment_shader_core.name = variant->tripipe->name
COPY(shader);
COPY(attribute_count);
COPY(varying_count);
COPY(texture_count);
COPY(sampler_count);
COPY(midgard1.uniform_count);
COPY(midgard1.uniform_buffer_count);
COPY(midgard1.work_count);
COPY(midgard1.flags_lo);
COPY(midgard1.flags_hi);
#undef COPY
/* Get blending setup */
unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
struct panfrost_blend_final blend[PIPE_MAX_COLOR_BUFS];
unsigned shader_offset = 0;
struct panfrost_bo *shader_bo = NULL;
for (unsigned c = 0; c < rt_count; ++c) {
blend[c] = panfrost_get_blend_for_context(ctx, c, &shader_bo, &shader_offset);
}
/* If there is a blend shader, work registers are shared. XXX: opt */
for (unsigned c = 0; c < rt_count; ++c) {
if (blend[c].is_shader)
ctx->fragment_shader_core.midgard1.work_count = 16;
}
/* Depending on whether it's legal to in the given shader, we
* try to enable early-z testing (or forward-pixel kill?) */
SET_BIT(ctx->fragment_shader_core.midgard1.flags_lo, MALI_EARLY_Z,
!variant->can_discard && !variant->writes_depth);
/* Add the writes Z/S flags if needed. */
SET_BIT(ctx->fragment_shader_core.midgard1.flags_lo,
MALI_WRITES_Z, variant->writes_depth);
SET_BIT(ctx->fragment_shader_core.midgard1.flags_hi,
MALI_WRITES_S, variant->writes_stencil);
/* Any time texturing is used, derivatives are implicitly
* calculated, so we need to enable helper invocations */
SET_BIT(ctx->fragment_shader_core.midgard1.flags_lo, MALI_HELPER_INVOCATIONS, variant->helper_invocations);
/* Assign the stencil refs late */
unsigned front_ref = ctx->stencil_ref.ref_value[0];
unsigned back_ref = ctx->stencil_ref.ref_value[1];
bool back_enab = ctx->depth_stencil->stencil[1].enabled;
ctx->fragment_shader_core.stencil_front.ref = front_ref;
ctx->fragment_shader_core.stencil_back.ref = back_enab ? back_ref : front_ref;
/* CAN_DISCARD should be set if the fragment shader possibly
* contains a 'discard' instruction. It is likely this is
* related to optimizations related to forward-pixel kill, as
* per "Mali Performance 3: Is EGL_BUFFER_PRESERVED a good
* thing?" by Peter Harris
*/
SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_CAN_DISCARD, variant->can_discard);
SET_BIT(ctx->fragment_shader_core.midgard1.flags_lo, 0x400, variant->can_discard);
/* Even on MFBD, the shader descriptor gets blend shaders. It's
* *also* copied to the blend_meta appended (by convention),
* but this is the field actually read by the hardware. (Or
* maybe both are read...?). Specify the last RTi with a blend
* shader. */
ctx->fragment_shader_core.blend.shader = 0;
for (signed rt = (rt_count - 1); rt >= 0; --rt) {
if (blend[rt].is_shader) {
ctx->fragment_shader_core.blend.shader =
blend[rt].shader.gpu | blend[rt].shader.first_tag;
break;
}
}
if (screen->quirks & MIDGARD_SFBD) {
/* When only a single render target platform is used, the blend
* information is inside the shader meta itself. We
* additionally need to signal CAN_DISCARD for nontrivial blend
* modes (so we're able to read back the destination buffer) */
SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_HAS_BLEND_SHADER, blend[0].is_shader);
if (!blend[0].is_shader) {
ctx->fragment_shader_core.blend.equation =
*blend[0].equation.equation;
ctx->fragment_shader_core.blend.constant =
blend[0].equation.constant;
}
SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_CAN_DISCARD, !blend[0].no_blending);
}
size_t size = sizeof(struct mali_shader_meta) + (sizeof(struct midgard_blend_rt) * rt_count);
struct panfrost_transfer transfer = panfrost_allocate_transient(batch, size);
memcpy(transfer.cpu, &ctx->fragment_shader_core, sizeof(struct mali_shader_meta));
ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.shader = transfer.gpu;
if (!(screen->quirks & MIDGARD_SFBD)) {
/* Additional blend descriptor tacked on for jobs using MFBD */
struct midgard_blend_rt rts[4];
for (unsigned i = 0; i < rt_count; ++i) {
rts[i].flags = 0x200;
bool is_srgb =
(ctx->pipe_framebuffer.nr_cbufs > i) &&
(ctx->pipe_framebuffer.cbufs[i]) &&
util_format_is_srgb(ctx->pipe_framebuffer.cbufs[i]->format);
SET_BIT(rts[i].flags, MALI_BLEND_MRT_SHADER, blend[i].is_shader);
SET_BIT(rts[i].flags, MALI_BLEND_LOAD_TIB, !blend[i].no_blending);
SET_BIT(rts[i].flags, MALI_BLEND_SRGB, is_srgb);
SET_BIT(rts[i].flags, MALI_BLEND_NO_DITHER, !ctx->blend->base.dither);
if (blend[i].is_shader) {
rts[i].blend.shader = blend[i].shader.gpu | blend[i].shader.first_tag;
} else {
rts[i].blend.equation = *blend[i].equation.equation;
rts[i].blend.constant = blend[i].equation.constant;
}
}
memcpy(transfer.cpu + sizeof(struct mali_shader_meta), rts, sizeof(rts[0]) * rt_count);
}
}
panfrost_emit_shader_meta(batch, PIPE_SHADER_FRAGMENT,
&ctx->payloads[PIPE_SHADER_FRAGMENT]);
/* We stage to transient, so always dirty.. */
if (ctx->vertex)
@ -1110,17 +818,9 @@ panfrost_bind_rasterizer_state(
if (!hwcso)
return;
ctx->fragment_shader_core.depth_units = ctx->rasterizer->base.offset_units * 2.0f;
ctx->fragment_shader_core.depth_factor = ctx->rasterizer->base.offset_scale;
/* Gauranteed with the core GL call, so don't expose ARB_polygon_offset */
assert(ctx->rasterizer->base.offset_clamp == 0.0);
/* XXX: Which bit is which? Does this maybe allow offseting not-tri? */
SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_DEPTH_RANGE_A, ctx->rasterizer->base.offset_tri);
SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_DEPTH_RANGE_B, ctx->rasterizer->base.offset_tri);
/* Point sprites are emulated */
struct panfrost_shader_state *variant = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
@ -1184,15 +884,13 @@ panfrost_create_shader_state(
if (unlikely((pan_debug & PAN_DBG_PRECOMPILE) && cso->type == PIPE_SHADER_IR_NIR)) {
struct panfrost_context *ctx = pan_context(pctx);
struct mali_shader_meta meta;
struct panfrost_shader_state state;
uint64_t outputs_written;
panfrost_shader_compile(ctx, &meta,
PIPE_SHADER_IR_NIR,
so->base.ir.nir,
tgsi_processor_to_shader_stage(stage), &state,
&outputs_written);
panfrost_shader_compile(ctx, PIPE_SHADER_IR_NIR,
so->base.ir.nir,
tgsi_processor_to_shader_stage(stage),
&state, &outputs_written);
}
return so;
@ -1440,9 +1138,6 @@ panfrost_bind_shader_state(
PIPE_SPRITE_COORD_UPPER_LEFT;
}
}
variants->variants[variant].tripipe = calloc(1, sizeof(struct mali_shader_meta));
}
/* Select this variant */
@ -1456,12 +1151,12 @@ panfrost_bind_shader_state(
if (!shader_state->compiled) {
uint64_t outputs_written = 0;
panfrost_shader_compile(ctx, shader_state->tripipe,
variants->base.type,
variants->base.type == PIPE_SHADER_IR_NIR ?
variants->base.ir.nir :
variants->base.tokens,
tgsi_processor_to_shader_stage(type), shader_state,
panfrost_shader_compile(ctx, variants->base.type,
variants->base.type == PIPE_SHADER_IR_NIR ?
variants->base.ir.nir :
variants->base.tokens,
tgsi_processor_to_shader_stage(type),
shader_state,
&outputs_written);
shader_state->compiled = true;
@ -1753,28 +1448,6 @@ panfrost_bind_depth_stencil_state(struct pipe_context *pipe,
ctx->base.bind_fs_state(&ctx->base, ctx->shader[PIPE_SHADER_FRAGMENT]);
}
/* Stencil state */
SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_STENCIL_TEST, depth_stencil->stencil[0].enabled);
panfrost_make_stencil_state(&depth_stencil->stencil[0], &ctx->fragment_shader_core.stencil_front);
ctx->fragment_shader_core.stencil_mask_front = depth_stencil->stencil[0].writemask;
/* If back-stencil is not enabled, use the front values */
bool back_enab = ctx->depth_stencil->stencil[1].enabled;
unsigned back_index = back_enab ? 1 : 0;
panfrost_make_stencil_state(&depth_stencil->stencil[back_index], &ctx->fragment_shader_core.stencil_back);
ctx->fragment_shader_core.stencil_mask_back = depth_stencil->stencil[back_index].writemask;
/* Depth state (TODO: Refactor) */
SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_DEPTH_WRITEMASK,
depth_stencil->depth.writemask);
int func = depth_stencil->depth.enabled ? depth_stencil->depth.func : PIPE_FUNC_ALWAYS;
ctx->fragment_shader_core.unknown2_3 &= ~MALI_DEPTH_FUNC_MASK;
ctx->fragment_shader_core.unknown2_3 |= MALI_DEPTH_FUNC(panfrost_translate_compare_func(func));
/* Bounds test not implemented */
assert(!depth_stencil->depth.bounds_test);
}
@ -2138,7 +1811,6 @@ panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags)
panfrost_batch_init(ctx);
panfrost_emit_vertex_payload(ctx);
panfrost_invalidate_frame(ctx);
panfrost_default_shader_backend(ctx);
return gallium;
}

View File

@ -119,12 +119,6 @@ struct panfrost_context {
/* Each draw has corresponding vertex and tiler payloads */
struct midgard_payload_vertex_tiler payloads[PIPE_SHADER_TYPES];
/* The fragment shader binary itself is pointed here (for the tripipe) but
* also everything else in the shader core, including blending, the
* stencil/depth tests, etc. Refer to the presentations. */
struct mali_shader_meta fragment_shader_core;
unsigned vertex_count;
unsigned instance_count;
enum pipe_prim_type active_prim;
@ -188,7 +182,6 @@ struct panfrost_rasterizer {
struct panfrost_shader_state {
/* Compiled, mapped descriptor, ready for the hardware */
bool compiled;
struct mali_shader_meta *tripipe;
/* Non-descript information */
int uniform_count;
@ -296,10 +289,6 @@ panfrost_invalidate_frame(struct panfrost_context *ctx);
bool
panfrost_writes_point_size(struct panfrost_context *ctx);
void
panfrost_patch_shader_state(struct panfrost_context *ctx,
enum pipe_shader_type stage);
struct panfrost_transfer
panfrost_vertex_tiler_job(struct panfrost_context *ctx, bool is_tiler);
@ -325,14 +314,12 @@ mali_ptr
panfrost_fragment_job(struct panfrost_batch *batch, bool has_draws);
void
panfrost_shader_compile(
struct panfrost_context *ctx,
struct mali_shader_meta *meta,
enum pipe_shader_ir ir_type,
const void *ir,
gl_shader_stage stage,
struct panfrost_shader_state *state,
uint64_t *outputs_written);
panfrost_shader_compile(struct panfrost_context *ctx,
enum pipe_shader_ir ir_type,
const void *ir,
gl_shader_stage stage,
struct panfrost_shader_state *state,
uint64_t *outputs_written);
unsigned
panfrost_ubo_count(struct panfrost_context *ctx, enum pipe_shader_type stage);