ac,radeonsi/gfx11: swizzle MRT0/1 for dual source blending
If dual source blending is enabled, use export targets 21 and 22. Also we have to swap odd/even lanes between export target 21 and 22. Signed-off-by: Yogesh Mohan Marimuthu <yogesh.mohanmarimuthu@amd.com> Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16328>
This commit is contained in:
parent
12a606c1bd
commit
6531ec8922
|
@ -4085,6 +4085,56 @@ void ac_build_wg_scan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
|
||||||
ac_build_wg_scan_bottom(ctx, ws);
|
ac_build_wg_scan_bottom(ctx, ws);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void _ac_build_dual_src_blend_swizzle(struct ac_llvm_context *ctx,
|
||||||
|
LLVMValueRef *arg0, LLVMValueRef *arg1)
|
||||||
|
{
|
||||||
|
LLVMValueRef tid;
|
||||||
|
LLVMValueRef src0, src1;
|
||||||
|
LLVMValueRef tmp0;
|
||||||
|
LLVMValueRef params[2];
|
||||||
|
LLVMValueRef is_even;
|
||||||
|
|
||||||
|
src0 = LLVMBuildBitCast(ctx->builder, *arg0, ctx->i32, "");
|
||||||
|
src1 = LLVMBuildBitCast(ctx->builder, *arg1, ctx->i32, "");
|
||||||
|
|
||||||
|
/* swap odd,even lanes of arg_0*/
|
||||||
|
params[0] = src0;
|
||||||
|
params[1] = LLVMConstInt(ctx->i32, 0xde54c1, 0);
|
||||||
|
src0 = ac_build_intrinsic(ctx, "llvm.amdgcn.mov.dpp8.i32",
|
||||||
|
ctx->i32, params, 2, AC_FUNC_ATTR_CONVERGENT);
|
||||||
|
|
||||||
|
/* swap even lanes between arg_0 and arg_1 */
|
||||||
|
tid = ac_get_thread_id(ctx);
|
||||||
|
is_even = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
|
||||||
|
LLVMBuildAnd(ctx->builder, tid, ctx->i32_1, ""),
|
||||||
|
ctx->i32_0, "");
|
||||||
|
tmp0 = src0;
|
||||||
|
src0 = LLVMBuildSelect(ctx->builder, is_even, src1, src0, "");
|
||||||
|
src1 = LLVMBuildSelect(ctx->builder, is_even, tmp0, src1, "");
|
||||||
|
|
||||||
|
/* swap odd,even lanes again for arg_0*/
|
||||||
|
params[0] = src0;
|
||||||
|
params[1] = LLVMConstInt(ctx->i32, 0xde54c1, 0);
|
||||||
|
src0 = ac_build_intrinsic(ctx, "llvm.amdgcn.mov.dpp8.i32",
|
||||||
|
ctx->i32, params, 2, AC_FUNC_ATTR_CONVERGENT);
|
||||||
|
|
||||||
|
*arg0 = src0;
|
||||||
|
*arg1 = src1;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ac_build_dual_src_blend_swizzle(struct ac_llvm_context *ctx,
|
||||||
|
struct ac_export_args *mrt0,
|
||||||
|
struct ac_export_args *mrt1)
|
||||||
|
{
|
||||||
|
assert(ctx->chip_class >= GFX11);
|
||||||
|
assert(mrt0->enabled_channels == mrt1->enabled_channels);
|
||||||
|
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
if (mrt0->enabled_channels & (1 << i) && mrt1->enabled_channels & (1 << i))
|
||||||
|
_ac_build_dual_src_blend_swizzle(ctx, &mrt0->out[i], &mrt1->out[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
LLVMValueRef ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned lane0,
|
LLVMValueRef ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned lane0,
|
||||||
unsigned lane1, unsigned lane2, unsigned lane3)
|
unsigned lane1, unsigned lane2, unsigned lane3)
|
||||||
{
|
{
|
||||||
|
|
|
@ -599,6 +599,10 @@ void ac_build_triangle_strip_indices_to_triangle(struct ac_llvm_context *ctx, LL
|
||||||
LLVMValueRef index[3]);
|
LLVMValueRef index[3]);
|
||||||
LLVMValueRef ac_build_is_inf_or_nan(struct ac_llvm_context *ctx, LLVMValueRef a);
|
LLVMValueRef ac_build_is_inf_or_nan(struct ac_llvm_context *ctx, LLVMValueRef a);
|
||||||
|
|
||||||
|
void ac_build_dual_src_blend_swizzle(struct ac_llvm_context *ctx,
|
||||||
|
struct ac_export_args *mrt0,
|
||||||
|
struct ac_export_args *mrt1);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1284,6 +1284,7 @@ static void si_dump_shader_key(const struct si_shader *shader, FILE *f)
|
||||||
fprintf(f, " epilog.alpha_to_one = %u\n", key->ps.part.epilog.alpha_to_one);
|
fprintf(f, " epilog.alpha_to_one = %u\n", key->ps.part.epilog.alpha_to_one);
|
||||||
fprintf(f, " epilog.alpha_to_coverage_via_mrtz = %u\n", key->ps.part.epilog.alpha_to_coverage_via_mrtz);
|
fprintf(f, " epilog.alpha_to_coverage_via_mrtz = %u\n", key->ps.part.epilog.alpha_to_coverage_via_mrtz);
|
||||||
fprintf(f, " epilog.clamp_color = %u\n", key->ps.part.epilog.clamp_color);
|
fprintf(f, " epilog.clamp_color = %u\n", key->ps.part.epilog.clamp_color);
|
||||||
|
fprintf(f, " epilog.dual_src_blend_swizzle = %u\n", key->ps.part.epilog.dual_src_blend_swizzle);
|
||||||
fprintf(f, " mono.poly_line_smoothing = %u\n", key->ps.mono.poly_line_smoothing);
|
fprintf(f, " mono.poly_line_smoothing = %u\n", key->ps.mono.poly_line_smoothing);
|
||||||
fprintf(f, " mono.interpolate_at_sample_force_center = %u\n",
|
fprintf(f, " mono.interpolate_at_sample_force_center = %u\n",
|
||||||
key->ps.mono.interpolate_at_sample_force_center);
|
key->ps.mono.interpolate_at_sample_force_center);
|
||||||
|
|
|
@ -578,6 +578,7 @@ struct si_ps_epilog_bits {
|
||||||
unsigned alpha_to_one : 1;
|
unsigned alpha_to_one : 1;
|
||||||
unsigned alpha_to_coverage_via_mrtz : 1; /* gfx11+ */
|
unsigned alpha_to_coverage_via_mrtz : 1; /* gfx11+ */
|
||||||
unsigned clamp_color : 1;
|
unsigned clamp_color : 1;
|
||||||
|
unsigned dual_src_blend_swizzle : 1; /* gfx11+ */
|
||||||
};
|
};
|
||||||
|
|
||||||
union si_shader_part_key {
|
union si_shader_part_key {
|
||||||
|
|
|
@ -300,6 +300,12 @@ static bool si_llvm_init_ps_export_args(struct si_shader_context *ctx, LLVMValue
|
||||||
/* Specify the target we are exporting */
|
/* Specify the target we are exporting */
|
||||||
args->target = V_008DFC_SQ_EXP_MRT + compacted_mrt_index;
|
args->target = V_008DFC_SQ_EXP_MRT + compacted_mrt_index;
|
||||||
|
|
||||||
|
if (key->ps.part.epilog.dual_src_blend_swizzle &&
|
||||||
|
(compacted_mrt_index == 0 || compacted_mrt_index == 1)) {
|
||||||
|
assert(ctx->ac.chip_class >= GFX11);
|
||||||
|
args->target += 21;
|
||||||
|
}
|
||||||
|
|
||||||
args->compr = false;
|
args->compr = false;
|
||||||
args->out[0] = f32undef;
|
args->out[0] = f32undef;
|
||||||
args->out[1] = f32undef;
|
args->out[1] = f32undef;
|
||||||
|
@ -937,6 +943,13 @@ void si_llvm_build_ps_epilog(struct si_shader_context *ctx, union si_shader_part
|
||||||
exp.args[exp.num - 1].valid_mask = 1; /* whether the EXEC mask is valid */
|
exp.args[exp.num - 1].valid_mask = 1; /* whether the EXEC mask is valid */
|
||||||
exp.args[exp.num - 1].done = 1; /* DONE bit */
|
exp.args[exp.num - 1].done = 1; /* DONE bit */
|
||||||
|
|
||||||
|
if (key->ps_epilog.states.dual_src_blend_swizzle) {
|
||||||
|
assert(ctx->ac.chip_class >= GFX11);
|
||||||
|
assert((key->ps_epilog.colors_written & 0x3) == 0x3);
|
||||||
|
ac_build_dual_src_blend_swizzle(&ctx->ac, &exp.args[first_color_export],
|
||||||
|
&exp.args[first_color_export + 1]);
|
||||||
|
}
|
||||||
|
|
||||||
for (unsigned i = 0; i < exp.num; i++)
|
for (unsigned i = 0; i < exp.num; i++)
|
||||||
ac_build_export(&ctx->ac, &exp.args[i]);
|
ac_build_export(&ctx->ac, &exp.args[i]);
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -2217,6 +2217,10 @@ void si_ps_key_update_framebuffer_blend(struct si_context *sctx)
|
||||||
sctx->framebuffer.spi_shader_col_format);
|
sctx->framebuffer.spi_shader_col_format);
|
||||||
key->ps.part.epilog.spi_shader_col_format &= blend->cb_target_enabled_4bit;
|
key->ps.part.epilog.spi_shader_col_format &= blend->cb_target_enabled_4bit;
|
||||||
|
|
||||||
|
key->ps.part.epilog.dual_src_blend_swizzle = sctx->chip_class >= GFX11 &&
|
||||||
|
blend->dual_src_blend &&
|
||||||
|
(sel->info.colors_written_4bit & 0xff) == 0xff;
|
||||||
|
|
||||||
/* The output for dual source blending should have
|
/* The output for dual source blending should have
|
||||||
* the same format as the first output.
|
* the same format as the first output.
|
||||||
*/
|
*/
|
||||||
|
|
Loading…
Reference in New Issue