pan/bi: Use fused dual source blending
Instead of emitting a pile of moves to fixed registers at codegen time and hoping everything works out, add a second staging source to the BLEND instruction in the intermediate representation containing the dual source colour, and modify register allocation appropriately. This better models the operation of blending render target #0 with two sources. Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13714>
This commit is contained in:
parent
af3863c658
commit
795638767d
|
@ -2520,8 +2520,11 @@
|
||||||
<src start="0"/>
|
<src start="0"/>
|
||||||
<src start="3" mask="0xf7"/>
|
<src start="3" mask="0xf7"/>
|
||||||
<src start="6" mask="0xf7"/>
|
<src start="6" mask="0xf7"/>
|
||||||
|
<!-- pseudo source for a dual source blend input -->
|
||||||
|
<src start="9" pseudo="true"/>
|
||||||
<!-- not actually encoded, but used for IR -->
|
<!-- not actually encoded, but used for IR -->
|
||||||
<immediate name="sr_count" size="4" pseudo="true"/>
|
<immediate name="sr_count" size="4" pseudo="true"/>
|
||||||
|
<immediate name="sr_count_2" size="4" pseudo="true"/>
|
||||||
</ins>
|
</ins>
|
||||||
|
|
||||||
<ins name="+BRANCH.f16" mask="0xf8000" exact="0x68000" last="true" dests="0">
|
<ins name="+BRANCH.f16" mask="0xf8000" exact="0x68000" last="true" dests="0">
|
||||||
|
|
|
@ -318,6 +318,11 @@ bi_allocate_registers(bi_context *ctx, bool *success, bool full_regs)
|
||||||
unsigned node = bi_get_node(ins->src[0]);
|
unsigned node = bi_get_node(ins->src[0]);
|
||||||
assert(node < node_count);
|
assert(node < node_count);
|
||||||
l->solutions[node] = 0;
|
l->solutions[node] = 0;
|
||||||
|
|
||||||
|
/* Dual source blend input in r4-r7 */
|
||||||
|
node = bi_get_node(ins->src[4]);
|
||||||
|
if (node < node_count)
|
||||||
|
l->solutions[node] = 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dest < node_count)
|
if (dest < node_count)
|
||||||
|
|
|
@ -504,11 +504,14 @@ bi_emit_load_blend_input(bi_builder *b, nir_intrinsic_instr *instr)
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
bi_emit_blend_op(bi_builder *b, bi_index rgba, nir_alu_type T, unsigned rt)
|
bi_emit_blend_op(bi_builder *b, bi_index rgba, nir_alu_type T,
|
||||||
|
bi_index rgba2, nir_alu_type T2, unsigned rt)
|
||||||
{
|
{
|
||||||
/* Reads 2 or 4 staging registers to cover the input */
|
/* Reads 2 or 4 staging registers to cover the input */
|
||||||
unsigned size = nir_alu_type_get_type_size(T);
|
unsigned size = nir_alu_type_get_type_size(T);
|
||||||
|
unsigned size_2 = nir_alu_type_get_type_size(T2);
|
||||||
unsigned sr_count = (size <= 16) ? 2 : 4;
|
unsigned sr_count = (size <= 16) ? 2 : 4;
|
||||||
|
unsigned sr_count_2 = (size_2 <= 16) ? 2 : 4;
|
||||||
const struct panfrost_compile_inputs *inputs = b->shader->inputs;
|
const struct panfrost_compile_inputs *inputs = b->shader->inputs;
|
||||||
uint64_t blend_desc = inputs->blend.bifrost_blend_desc;
|
uint64_t blend_desc = inputs->blend.bifrost_blend_desc;
|
||||||
|
|
||||||
|
@ -523,7 +526,8 @@ bi_emit_blend_op(bi_builder *b, bi_index rgba, nir_alu_type T, unsigned rt)
|
||||||
bi_blend_to(b, bi_register(0), rgba,
|
bi_blend_to(b, bi_register(0), rgba,
|
||||||
bi_register(60),
|
bi_register(60),
|
||||||
bi_imm_u32(blend_desc & 0xffffffff),
|
bi_imm_u32(blend_desc & 0xffffffff),
|
||||||
bi_imm_u32(blend_desc >> 32), sr_count);
|
bi_imm_u32(blend_desc >> 32),
|
||||||
|
bi_null(), sr_count, 0);
|
||||||
} else {
|
} else {
|
||||||
/* Blend descriptor comes from the FAU RAM. By convention, the
|
/* Blend descriptor comes from the FAU RAM. By convention, the
|
||||||
* return address is stored in r48 and will be used by the
|
* return address is stored in r48 and will be used by the
|
||||||
|
@ -531,11 +535,15 @@ bi_emit_blend_op(bi_builder *b, bi_index rgba, nir_alu_type T, unsigned rt)
|
||||||
bi_blend_to(b, bi_register(48), rgba,
|
bi_blend_to(b, bi_register(48), rgba,
|
||||||
bi_register(60),
|
bi_register(60),
|
||||||
bi_fau(BIR_FAU_BLEND_0 + rt, false),
|
bi_fau(BIR_FAU_BLEND_0 + rt, false),
|
||||||
bi_fau(BIR_FAU_BLEND_0 + rt, true), sr_count);
|
bi_fau(BIR_FAU_BLEND_0 + rt, true),
|
||||||
|
rgba2, sr_count, sr_count_2);
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(rt < 8);
|
assert(rt < 8);
|
||||||
b->shader->info.bifrost->blend[rt].type = T;
|
b->shader->info.bifrost->blend[rt].type = T;
|
||||||
|
|
||||||
|
if (T2)
|
||||||
|
b->shader->info.bifrost->blend_src1_type = T2;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Blend shaders do not need to run ATEST since they are dependent on a
|
/* Blend shaders do not need to run ATEST since they are dependent on a
|
||||||
|
@ -586,7 +594,6 @@ bi_emit_fragment_out(bi_builder *b, nir_intrinsic_instr *instr)
|
||||||
}
|
}
|
||||||
|
|
||||||
bi_index src0 = bi_src_index(&instr->src[0]);
|
bi_index src0 = bi_src_index(&instr->src[0]);
|
||||||
bi_index src1 = combined ? bi_src_index(&instr->src[4]) : bi_null();
|
|
||||||
|
|
||||||
/* By ISA convention, the coverage mask is stored in R60. The store
|
/* By ISA convention, the coverage mask is stored in R60. The store
|
||||||
* itself will be handled by a subsequent ATEST instruction */
|
* itself will be handled by a subsequent ATEST instruction */
|
||||||
|
@ -598,19 +605,6 @@ bi_emit_fragment_out(bi_builder *b, nir_intrinsic_instr *instr)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Dual-source blending is implemented by putting the color in
|
|
||||||
* registers r4-r7. */
|
|
||||||
if (writeout & PAN_WRITEOUT_2) {
|
|
||||||
unsigned count = nir_src_num_components(instr->src[4]);
|
|
||||||
|
|
||||||
for (unsigned i = 0; i < count; ++i)
|
|
||||||
bi_mov_i32_to(b, bi_register(4 + i), bi_word(src1, i));
|
|
||||||
|
|
||||||
b->shader->info.bifrost->blend_src1_type =
|
|
||||||
nir_intrinsic_dest_type(instr);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Emit ATEST if we have to, note ATEST requires a floating-point alpha
|
/* Emit ATEST if we have to, note ATEST requires a floating-point alpha
|
||||||
* value, but render target #0 might not be floating point. However the
|
* value, but render target #0 might not be floating point. However the
|
||||||
* alpha value is only used for alpha-to-coverage, a stage which is
|
* alpha value is only used for alpha-to-coverage, a stage which is
|
||||||
|
@ -648,7 +642,10 @@ bi_emit_fragment_out(bi_builder *b, nir_intrinsic_instr *instr)
|
||||||
|
|
||||||
if (emit_blend) {
|
if (emit_blend) {
|
||||||
unsigned rt = combined ? 0 : (loc - FRAG_RESULT_DATA0);
|
unsigned rt = combined ? 0 : (loc - FRAG_RESULT_DATA0);
|
||||||
|
bool dual = (writeout & PAN_WRITEOUT_2);
|
||||||
bi_index color = bi_src_index(&instr->src[0]);
|
bi_index color = bi_src_index(&instr->src[0]);
|
||||||
|
bi_index color2 = dual ? bi_src_index(&instr->src[4]) : bi_null();
|
||||||
|
nir_alu_type T2 = dual ? nir_intrinsic_dest_type(instr) : 0;
|
||||||
|
|
||||||
/* Explicit copy since BLEND inputs are precoloured to R0-R3,
|
/* Explicit copy since BLEND inputs are precoloured to R0-R3,
|
||||||
* TODO: maybe schedule around this or implement in RA as a
|
* TODO: maybe schedule around this or implement in RA as a
|
||||||
|
@ -667,7 +664,8 @@ bi_emit_fragment_out(bi_builder *b, nir_intrinsic_instr *instr)
|
||||||
nir_alu_type_get_type_size(nir_intrinsic_src_type(instr)));
|
nir_alu_type_get_type_size(nir_intrinsic_src_type(instr)));
|
||||||
}
|
}
|
||||||
|
|
||||||
bi_emit_blend_op(b, color, nir_intrinsic_src_type(instr), rt);
|
bi_emit_blend_op(b, color, nir_intrinsic_src_type(instr),
|
||||||
|
color2, T2, rt);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (b->shader->inputs->is_blend) {
|
if (b->shader->inputs->is_blend) {
|
||||||
|
|
|
@ -91,6 +91,8 @@ bi_count_read_registers(const bi_instr *ins, unsigned s)
|
||||||
return 1;
|
return 1;
|
||||||
else if (s == 0 && bi_opcode_props[ins->op].sr_read)
|
else if (s == 0 && bi_opcode_props[ins->op].sr_read)
|
||||||
return bi_count_staging_registers(ins);
|
return bi_count_staging_registers(ins);
|
||||||
|
else if (s == 4 && ins->op == BI_OPCODE_BLEND)
|
||||||
|
return ins->sr_count_2; /* Dual source blending */
|
||||||
else
|
else
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
|
@ -81,7 +81,7 @@ TEST_F(SchedulerPredicates, LOAD)
|
||||||
|
|
||||||
TEST_F(SchedulerPredicates, BLEND)
|
TEST_F(SchedulerPredicates, BLEND)
|
||||||
{
|
{
|
||||||
bi_instr *blend = bi_blend_to(b, TMP(), TMP(), TMP(), TMP(), TMP(), 4);
|
bi_instr *blend = bi_blend_to(b, TMP(), TMP(), TMP(), TMP(), TMP(), TMP(), 4, 4);
|
||||||
ASSERT_FALSE(bi_can_fma(blend));
|
ASSERT_FALSE(bi_can_fma(blend));
|
||||||
ASSERT_TRUE(bi_can_add(blend));
|
ASSERT_TRUE(bi_can_add(blend));
|
||||||
ASSERT_TRUE(bi_must_message(blend));
|
ASSERT_TRUE(bi_must_message(blend));
|
||||||
|
|
|
@ -29,13 +29,6 @@ shaders@point-vertex-id gl_vertexid gl_instanceid divisor,Fail
|
||||||
shaders@point-vertex-id gl_vertexid gl_instanceid,Fail
|
shaders@point-vertex-id gl_vertexid gl_instanceid,Fail
|
||||||
spec@arb_base_instance@arb_base_instance-drawarrays,Fail
|
spec@arb_base_instance@arb_base_instance-drawarrays,Fail
|
||||||
spec@arb_blend_func_extended@arb_blend_func_extended-dual-src-blending-issue-1917_gles3,Crash
|
spec@arb_blend_func_extended@arb_blend_func_extended-dual-src-blending-issue-1917_gles3,Crash
|
||||||
spec@arb_blend_func_extended@arb_blend_func_extended-fbo-extended-blend-explicit,Fail
|
|
||||||
spec@arb_blend_func_extended@arb_blend_func_extended-fbo-extended-blend-explicit_gles3,Fail
|
|
||||||
spec@arb_blend_func_extended@arb_blend_func_extended-fbo-extended-blend,Fail
|
|
||||||
spec@arb_blend_func_extended@arb_blend_func_extended-fbo-extended-blend_gles3,Fail
|
|
||||||
spec@arb_blend_func_extended@arb_blend_func_extended-fbo-extended-blend-pattern,Fail
|
|
||||||
spec@arb_blend_func_extended@arb_blend_func_extended-fbo-extended-blend-pattern_gles2,Crash
|
|
||||||
spec@arb_blend_func_extended@arb_blend_func_extended-fbo-extended-blend-pattern_gles3,Fail
|
|
||||||
spec@arb_color_buffer_float@gl_rgba16f-render,Fail
|
spec@arb_color_buffer_float@gl_rgba16f-render,Fail
|
||||||
spec@arb_color_buffer_float@gl_rgba16f-render-fog,Fail
|
spec@arb_color_buffer_float@gl_rgba16f-render-fog,Fail
|
||||||
spec@arb_color_buffer_float@gl_rgba16f-render-sanity,Fail
|
spec@arb_color_buffer_float@gl_rgba16f-render-sanity,Fail
|
||||||
|
|
Loading…
Reference in New Issue