pan/mdg: Add support for multi sample iteration writeout
Some MSAA+fmt combination require writeout to be split. Right now, it only impacts blend shaders since we only support MSAA 4x, and the only formats that could exceed the 128bit/pixel limit in MSAA 4x are not supported by the fixed-function blend unit. We thus rely on the blend shader to split things properly. Things will change once we add MSAA 8x/16x to the mix, since even the blendable formats will exceed the 128b/pixel limit in that case. Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7984>
This commit is contained in:
parent
29f938a0ec
commit
e27052281a
|
@ -233,6 +233,8 @@ enum midgard_rt_id {
|
|||
MIDGARD_NUM_RTS,
|
||||
};
|
||||
|
||||
#define MIDGARD_MAX_SAMPLE_ITER 16
|
||||
|
||||
typedef struct compiler_context {
|
||||
nir_shader *nir;
|
||||
gl_shader_stage stage;
|
||||
|
@ -243,6 +245,9 @@ typedef struct compiler_context {
|
|||
/* Render target number for a keyed blend shader. Depends on is_blend */
|
||||
unsigned blend_rt;
|
||||
|
||||
/* Number of samples for a keyed blend shader. Depends on is_blend */
|
||||
unsigned blend_sample_iterations;
|
||||
|
||||
/* Index to precolour to r0 for an input blend colour */
|
||||
unsigned blend_input;
|
||||
|
||||
|
@ -313,7 +318,7 @@ typedef struct compiler_context {
|
|||
uint32_t quirks;
|
||||
|
||||
/* Writeout instructions for each render target */
|
||||
midgard_instruction *writeout_branch[MIDGARD_NUM_RTS];
|
||||
midgard_instruction *writeout_branch[MIDGARD_NUM_RTS][MIDGARD_MAX_SAMPLE_ITER];
|
||||
|
||||
struct panfrost_sysvals sysvals;
|
||||
} compiler_context;
|
||||
|
|
|
@ -1316,11 +1316,13 @@ compute_builtin_arg(nir_op op)
|
|||
}
|
||||
|
||||
static void
|
||||
emit_fragment_store(compiler_context *ctx, unsigned src, unsigned src_z, unsigned src_s, enum midgard_rt_id rt)
|
||||
emit_fragment_store(compiler_context *ctx, unsigned src, unsigned src_z, unsigned src_s,
|
||||
enum midgard_rt_id rt, unsigned sample_iter)
|
||||
{
|
||||
assert(rt < ARRAY_SIZE(ctx->writeout_branch));
|
||||
assert(sample_iter < ARRAY_SIZE(ctx->writeout_branch[0]));
|
||||
|
||||
midgard_instruction *br = ctx->writeout_branch[rt];
|
||||
midgard_instruction *br = ctx->writeout_branch[rt][sample_iter];
|
||||
|
||||
assert(!br);
|
||||
|
||||
|
@ -1336,7 +1338,12 @@ emit_fragment_store(compiler_context *ctx, unsigned src, unsigned src_z, unsigne
|
|||
/* Add dependencies */
|
||||
ins.src[0] = src;
|
||||
ins.src_types[0] = nir_type_uint32;
|
||||
ins.constants.u32[0] = depth_only ? 0xFF : (rt - MIDGARD_COLOR_RT0) * 0x100;
|
||||
|
||||
if (depth_only)
|
||||
ins.constants.u32[0] = 0xFF;
|
||||
else
|
||||
ins.constants.u32[0] = ((rt - MIDGARD_COLOR_RT0) << 8) | sample_iter;
|
||||
|
||||
for (int i = 0; i < 4; ++i)
|
||||
ins.swizzle[0][i] = i;
|
||||
|
||||
|
@ -1356,7 +1363,7 @@ emit_fragment_store(compiler_context *ctx, unsigned src, unsigned src_z, unsigne
|
|||
/* Emit the branch */
|
||||
br = emit_mir_instruction(ctx, ins);
|
||||
schedule_barrier(ctx);
|
||||
ctx->writeout_branch[rt] = br;
|
||||
ctx->writeout_branch[rt][sample_iter] = br;
|
||||
|
||||
/* Push our current location = current block count - 1 = where we'll
|
||||
* jump to. Maybe a bit too clever for my own good */
|
||||
|
@ -1693,7 +1700,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
|
|||
reg_s = nir_src_index(ctx, &instr->src[3]);
|
||||
}
|
||||
|
||||
emit_fragment_store(ctx, reg, reg_z, reg_s, rt);
|
||||
emit_fragment_store(ctx, reg, reg_z, reg_s, rt, 0);
|
||||
} else if (ctx->stage == MESA_SHADER_VERTEX) {
|
||||
assert(instr->intrinsic == nir_intrinsic_store_output);
|
||||
|
||||
|
@ -1757,7 +1764,8 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
|
|||
case nir_intrinsic_store_raw_output_pan:
|
||||
assert (ctx->stage == MESA_SHADER_FRAGMENT);
|
||||
reg = nir_src_index(ctx, &instr->src[0]);
|
||||
emit_fragment_store(ctx, reg, ~0, ~0, ctx->blend_rt);
|
||||
for (unsigned s = 0; s < ctx->blend_sample_iterations; s++)
|
||||
emit_fragment_store(ctx, reg, ~0, ~0, ctx->blend_rt, s);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_store_global:
|
||||
|
@ -2451,10 +2459,10 @@ midgard_legalize_invert(compiler_context *ctx, midgard_block *block)
|
|||
}
|
||||
|
||||
static unsigned
|
||||
emit_fragment_epilogue(compiler_context *ctx, unsigned rt)
|
||||
emit_fragment_epilogue(compiler_context *ctx, unsigned rt, unsigned sample_iter)
|
||||
{
|
||||
/* Loop to ourselves */
|
||||
midgard_instruction *br = ctx->writeout_branch[rt];
|
||||
midgard_instruction *br = ctx->writeout_branch[rt][sample_iter];
|
||||
struct midgard_instruction ins = v_branch(false, false);
|
||||
ins.writeout = br->writeout;
|
||||
ins.branch.target_block = ctx->block_count - 1;
|
||||
|
@ -2683,27 +2691,38 @@ static void
|
|||
mir_add_writeout_loops(compiler_context *ctx)
|
||||
{
|
||||
for (unsigned rt = 0; rt < ARRAY_SIZE(ctx->writeout_branch); ++rt) {
|
||||
midgard_instruction *br = ctx->writeout_branch[rt];
|
||||
if (!br) continue;
|
||||
for (unsigned s = 0; s < MIDGARD_MAX_SAMPLE_ITER; ++s) {
|
||||
midgard_instruction *br = ctx->writeout_branch[rt][s];
|
||||
if (!br) continue;
|
||||
|
||||
unsigned popped = br->branch.target_block;
|
||||
pan_block_add_successor(&(mir_get_block(ctx, popped - 1)->base), &ctx->current_block->base);
|
||||
br->branch.target_block = emit_fragment_epilogue(ctx, rt);
|
||||
br->branch.target_type = TARGET_GOTO;
|
||||
unsigned popped = br->branch.target_block;
|
||||
pan_block_add_successor(&(mir_get_block(ctx, popped - 1)->base),
|
||||
&ctx->current_block->base);
|
||||
br->branch.target_block = emit_fragment_epilogue(ctx, rt, s);
|
||||
br->branch.target_type = TARGET_GOTO;
|
||||
|
||||
/* If we have more RTs, we'll need to restore back after our
|
||||
* loop terminates */
|
||||
/* If we have more RTs, we'll need to restore back after our
|
||||
* loop terminates */
|
||||
midgard_instruction *next_br = NULL;
|
||||
|
||||
if ((rt + 1) < ARRAY_SIZE(ctx->writeout_branch) && ctx->writeout_branch[rt + 1]) {
|
||||
midgard_instruction uncond = v_branch(false, false);
|
||||
uncond.branch.target_block = popped;
|
||||
uncond.branch.target_type = TARGET_GOTO;
|
||||
emit_mir_instruction(ctx, uncond);
|
||||
pan_block_add_successor(&ctx->current_block->base, &(mir_get_block(ctx, popped)->base));
|
||||
schedule_barrier(ctx);
|
||||
} else {
|
||||
/* We're last, so we can terminate here */
|
||||
br->last_writeout = true;
|
||||
if ((s + 1) < MIDGARD_MAX_SAMPLE_ITER)
|
||||
next_br = ctx->writeout_branch[rt][s + 1];
|
||||
|
||||
if (!next_br && (rt + 1) < ARRAY_SIZE(ctx->writeout_branch))
|
||||
next_br = ctx->writeout_branch[rt + 1][0];
|
||||
|
||||
if (next_br) {
|
||||
midgard_instruction uncond = v_branch(false, false);
|
||||
uncond.branch.target_block = popped;
|
||||
uncond.branch.target_type = TARGET_GOTO;
|
||||
emit_mir_instruction(ctx, uncond);
|
||||
pan_block_add_successor(&ctx->current_block->base,
|
||||
&(mir_get_block(ctx, popped)->base));
|
||||
schedule_barrier(ctx);
|
||||
} else {
|
||||
/* We're last, so we can terminate here */
|
||||
br->last_writeout = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2725,6 +2744,15 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
|
|||
ctx->stage = nir->info.stage;
|
||||
ctx->is_blend = inputs->is_blend;
|
||||
ctx->blend_rt = MIDGARD_COLOR_RT0 + inputs->blend.rt;
|
||||
if (inputs->is_blend) {
|
||||
unsigned nr_samples = MAX2(inputs->blend.nr_samples, 1);
|
||||
const struct util_format_description *desc =
|
||||
util_format_description(inputs->rt_formats[inputs->blend.rt]);
|
||||
|
||||
/* We have to split writeout in 128 bit chunks */
|
||||
ctx->blend_sample_iterations =
|
||||
DIV_ROUND_UP(desc->block.bits * nr_samples, 128);
|
||||
}
|
||||
memcpy(ctx->blend_constants, inputs->blend.constants, sizeof(ctx->blend_constants));
|
||||
ctx->blend_input = ~0;
|
||||
ctx->blend_src1 = ~0;
|
||||
|
|
|
@ -112,6 +112,7 @@ struct panfrost_compile_inputs {
|
|||
bool is_blend;
|
||||
struct {
|
||||
unsigned rt;
|
||||
unsigned nr_samples;
|
||||
float constants[4];
|
||||
uint64_t bifrost_blend_desc;
|
||||
} blend;
|
||||
|
|
Loading…
Reference in New Issue