pan/bi: Use fused dual source blending

Instead of emitting a pile of moves to fixed registers at codegen time
and hoping everything works out, add a second staging source to the
BLEND instruction in the intermediate representation containing the dual
source colour, and modify register allocation appropriately. This better
models the operation of blending render target #0 with two sources.

Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13714>
This commit is contained in:
Alyssa Rosenzweig 2021-11-08 09:52:25 -05:00 committed by Marge Bot
parent af3863c658
commit 795638767d
6 changed files with 27 additions and 26 deletions

View File

@ -2520,8 +2520,11 @@
<src start="0"/>
<src start="3" mask="0xf7"/>
<src start="6" mask="0xf7"/>
<!-- pseudo source for a dual source blend input -->
<src start="9" pseudo="true"/>
<!-- not actually encoded, but used for IR -->
<immediate name="sr_count" size="4" pseudo="true"/>
<immediate name="sr_count_2" size="4" pseudo="true"/>
</ins>
<ins name="+BRANCH.f16" mask="0xf8000" exact="0x68000" last="true" dests="0">

View File

@ -318,6 +318,11 @@ bi_allocate_registers(bi_context *ctx, bool *success, bool full_regs)
unsigned node = bi_get_node(ins->src[0]);
assert(node < node_count);
l->solutions[node] = 0;
/* Dual source blend input in r4-r7 */
node = bi_get_node(ins->src[4]);
if (node < node_count)
l->solutions[node] = 4;
}
if (dest < node_count)

View File

@ -504,11 +504,14 @@ bi_emit_load_blend_input(bi_builder *b, nir_intrinsic_instr *instr)
}
static void
bi_emit_blend_op(bi_builder *b, bi_index rgba, nir_alu_type T, unsigned rt)
bi_emit_blend_op(bi_builder *b, bi_index rgba, nir_alu_type T,
bi_index rgba2, nir_alu_type T2, unsigned rt)
{
/* Reads 2 or 4 staging registers to cover the input */
unsigned size = nir_alu_type_get_type_size(T);
unsigned size_2 = nir_alu_type_get_type_size(T2);
unsigned sr_count = (size <= 16) ? 2 : 4;
unsigned sr_count_2 = (size_2 <= 16) ? 2 : 4;
const struct panfrost_compile_inputs *inputs = b->shader->inputs;
uint64_t blend_desc = inputs->blend.bifrost_blend_desc;
@ -523,7 +526,8 @@ bi_emit_blend_op(bi_builder *b, bi_index rgba, nir_alu_type T, unsigned rt)
bi_blend_to(b, bi_register(0), rgba,
bi_register(60),
bi_imm_u32(blend_desc & 0xffffffff),
bi_imm_u32(blend_desc >> 32), sr_count);
bi_imm_u32(blend_desc >> 32),
bi_null(), sr_count, 0);
} else {
/* Blend descriptor comes from the FAU RAM. By convention, the
* return address is stored in r48 and will be used by the
@ -531,11 +535,15 @@ bi_emit_blend_op(bi_builder *b, bi_index rgba, nir_alu_type T, unsigned rt)
bi_blend_to(b, bi_register(48), rgba,
bi_register(60),
bi_fau(BIR_FAU_BLEND_0 + rt, false),
bi_fau(BIR_FAU_BLEND_0 + rt, true), sr_count);
bi_fau(BIR_FAU_BLEND_0 + rt, true),
rgba2, sr_count, sr_count_2);
}
assert(rt < 8);
b->shader->info.bifrost->blend[rt].type = T;
if (T2)
b->shader->info.bifrost->blend_src1_type = T2;
}
/* Blend shaders do not need to run ATEST since they are dependent on a
@ -586,7 +594,6 @@ bi_emit_fragment_out(bi_builder *b, nir_intrinsic_instr *instr)
}
bi_index src0 = bi_src_index(&instr->src[0]);
bi_index src1 = combined ? bi_src_index(&instr->src[4]) : bi_null();
/* By ISA convention, the coverage mask is stored in R60. The store
* itself will be handled by a subsequent ATEST instruction */
@ -598,19 +605,6 @@ bi_emit_fragment_out(bi_builder *b, nir_intrinsic_instr *instr)
return;
}
/* Dual-source blending is implemented by putting the color in
* registers r4-r7. */
if (writeout & PAN_WRITEOUT_2) {
unsigned count = nir_src_num_components(instr->src[4]);
for (unsigned i = 0; i < count; ++i)
bi_mov_i32_to(b, bi_register(4 + i), bi_word(src1, i));
b->shader->info.bifrost->blend_src1_type =
nir_intrinsic_dest_type(instr);
}
/* Emit ATEST if we have to, note ATEST requires a floating-point alpha
* value, but render target #0 might not be floating point. However the
* alpha value is only used for alpha-to-coverage, a stage which is
@ -648,7 +642,10 @@ bi_emit_fragment_out(bi_builder *b, nir_intrinsic_instr *instr)
if (emit_blend) {
unsigned rt = combined ? 0 : (loc - FRAG_RESULT_DATA0);
bool dual = (writeout & PAN_WRITEOUT_2);
bi_index color = bi_src_index(&instr->src[0]);
bi_index color2 = dual ? bi_src_index(&instr->src[4]) : bi_null();
nir_alu_type T2 = dual ? nir_intrinsic_dest_type(instr) : 0;
/* Explicit copy since BLEND inputs are precoloured to R0-R3,
* TODO: maybe schedule around this or implement in RA as a
@ -667,7 +664,8 @@ bi_emit_fragment_out(bi_builder *b, nir_intrinsic_instr *instr)
nir_alu_type_get_type_size(nir_intrinsic_src_type(instr)));
}
bi_emit_blend_op(b, color, nir_intrinsic_src_type(instr), rt);
bi_emit_blend_op(b, color, nir_intrinsic_src_type(instr),
color2, T2, rt);
}
if (b->shader->inputs->is_blend) {

View File

@ -91,6 +91,8 @@ bi_count_read_registers(const bi_instr *ins, unsigned s)
return 1;
else if (s == 0 && bi_opcode_props[ins->op].sr_read)
return bi_count_staging_registers(ins);
else if (s == 4 && ins->op == BI_OPCODE_BLEND)
return ins->sr_count_2; /* Dual source blending */
else
return 1;
}

View File

@ -81,7 +81,7 @@ TEST_F(SchedulerPredicates, LOAD)
TEST_F(SchedulerPredicates, BLEND)
{
bi_instr *blend = bi_blend_to(b, TMP(), TMP(), TMP(), TMP(), TMP(), 4);
bi_instr *blend = bi_blend_to(b, TMP(), TMP(), TMP(), TMP(), TMP(), TMP(), 4, 4);
ASSERT_FALSE(bi_can_fma(blend));
ASSERT_TRUE(bi_can_add(blend));
ASSERT_TRUE(bi_must_message(blend));

View File

@ -29,13 +29,6 @@ shaders@point-vertex-id gl_vertexid gl_instanceid divisor,Fail
shaders@point-vertex-id gl_vertexid gl_instanceid,Fail
spec@arb_base_instance@arb_base_instance-drawarrays,Fail
spec@arb_blend_func_extended@arb_blend_func_extended-dual-src-blending-issue-1917_gles3,Crash
spec@arb_blend_func_extended@arb_blend_func_extended-fbo-extended-blend-explicit,Fail
spec@arb_blend_func_extended@arb_blend_func_extended-fbo-extended-blend-explicit_gles3,Fail
spec@arb_blend_func_extended@arb_blend_func_extended-fbo-extended-blend,Fail
spec@arb_blend_func_extended@arb_blend_func_extended-fbo-extended-blend_gles3,Fail
spec@arb_blend_func_extended@arb_blend_func_extended-fbo-extended-blend-pattern,Fail
spec@arb_blend_func_extended@arb_blend_func_extended-fbo-extended-blend-pattern_gles2,Crash
spec@arb_blend_func_extended@arb_blend_func_extended-fbo-extended-blend-pattern_gles3,Fail
spec@arb_color_buffer_float@gl_rgba16f-render,Fail
spec@arb_color_buffer_float@gl_rgba16f-render-fog,Fail
spec@arb_color_buffer_float@gl_rgba16f-render-sanity,Fail