ac,radeonsi/gfx11: swizzle MRT0/1 for dual source blending

If dual source blending is enabled, use export targets 21 and 22.
Also we have to swap odd/even lanes between export target 21 and 22.

Signed-off-by: Yogesh Mohan Marimuthu <yogesh.mohanmarimuthu@amd.com>
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16328>
This commit is contained in:
Yogesh mohan marimuthu 2021-12-23 02:03:48 +05:30 committed by Marge Bot
parent 12a606c1bd
commit 6531ec8922
6 changed files with 73 additions and 0 deletions

View File

@ -4085,6 +4085,56 @@ void ac_build_wg_scan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
ac_build_wg_scan_bottom(ctx, ws);
}
static void _ac_build_dual_src_blend_swizzle(struct ac_llvm_context *ctx,
LLVMValueRef *arg0, LLVMValueRef *arg1)
{
LLVMValueRef tid;
LLVMValueRef src0, src1;
LLVMValueRef tmp0;
LLVMValueRef params[2];
LLVMValueRef is_even;
src0 = LLVMBuildBitCast(ctx->builder, *arg0, ctx->i32, "");
src1 = LLVMBuildBitCast(ctx->builder, *arg1, ctx->i32, "");
/* swap odd,even lanes of arg_0*/
params[0] = src0;
params[1] = LLVMConstInt(ctx->i32, 0xde54c1, 0);
src0 = ac_build_intrinsic(ctx, "llvm.amdgcn.mov.dpp8.i32",
ctx->i32, params, 2, AC_FUNC_ATTR_CONVERGENT);
/* swap even lanes between arg_0 and arg_1 */
tid = ac_get_thread_id(ctx);
is_even = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
LLVMBuildAnd(ctx->builder, tid, ctx->i32_1, ""),
ctx->i32_0, "");
tmp0 = src0;
src0 = LLVMBuildSelect(ctx->builder, is_even, src1, src0, "");
src1 = LLVMBuildSelect(ctx->builder, is_even, tmp0, src1, "");
/* swap odd,even lanes again for arg_0*/
params[0] = src0;
params[1] = LLVMConstInt(ctx->i32, 0xde54c1, 0);
src0 = ac_build_intrinsic(ctx, "llvm.amdgcn.mov.dpp8.i32",
ctx->i32, params, 2, AC_FUNC_ATTR_CONVERGENT);
*arg0 = src0;
*arg1 = src1;
}
void ac_build_dual_src_blend_swizzle(struct ac_llvm_context *ctx,
struct ac_export_args *mrt0,
struct ac_export_args *mrt1)
{
assert(ctx->chip_class >= GFX11);
assert(mrt0->enabled_channels == mrt1->enabled_channels);
for (int i = 0; i < 4; i++) {
if (mrt0->enabled_channels & (1 << i) && mrt1->enabled_channels & (1 << i))
_ac_build_dual_src_blend_swizzle(ctx, &mrt0->out[i], &mrt1->out[i]);
}
}
LLVMValueRef ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned lane0,
unsigned lane1, unsigned lane2, unsigned lane3)
{

View File

@ -599,6 +599,10 @@ void ac_build_triangle_strip_indices_to_triangle(struct ac_llvm_context *ctx, LL
LLVMValueRef index[3]);
LLVMValueRef ac_build_is_inf_or_nan(struct ac_llvm_context *ctx, LLVMValueRef a);
void ac_build_dual_src_blend_swizzle(struct ac_llvm_context *ctx,
struct ac_export_args *mrt0,
struct ac_export_args *mrt1);
#ifdef __cplusplus
}
#endif

View File

@ -1284,6 +1284,7 @@ static void si_dump_shader_key(const struct si_shader *shader, FILE *f)
fprintf(f, " epilog.alpha_to_one = %u\n", key->ps.part.epilog.alpha_to_one);
fprintf(f, " epilog.alpha_to_coverage_via_mrtz = %u\n", key->ps.part.epilog.alpha_to_coverage_via_mrtz);
fprintf(f, " epilog.clamp_color = %u\n", key->ps.part.epilog.clamp_color);
fprintf(f, " epilog.dual_src_blend_swizzle = %u\n", key->ps.part.epilog.dual_src_blend_swizzle);
fprintf(f, " mono.poly_line_smoothing = %u\n", key->ps.mono.poly_line_smoothing);
fprintf(f, " mono.interpolate_at_sample_force_center = %u\n",
key->ps.mono.interpolate_at_sample_force_center);

View File

@ -578,6 +578,7 @@ struct si_ps_epilog_bits {
unsigned alpha_to_one : 1;
unsigned alpha_to_coverage_via_mrtz : 1; /* gfx11+ */
unsigned clamp_color : 1;
unsigned dual_src_blend_swizzle : 1; /* gfx11+ */
};
union si_shader_part_key {

View File

@ -300,6 +300,12 @@ static bool si_llvm_init_ps_export_args(struct si_shader_context *ctx, LLVMValue
/* Specify the target we are exporting */
args->target = V_008DFC_SQ_EXP_MRT + compacted_mrt_index;
if (key->ps.part.epilog.dual_src_blend_swizzle &&
(compacted_mrt_index == 0 || compacted_mrt_index == 1)) {
assert(ctx->ac.chip_class >= GFX11);
args->target += 21;
}
args->compr = false;
args->out[0] = f32undef;
args->out[1] = f32undef;
@ -937,6 +943,13 @@ void si_llvm_build_ps_epilog(struct si_shader_context *ctx, union si_shader_part
exp.args[exp.num - 1].valid_mask = 1; /* whether the EXEC mask is valid */
exp.args[exp.num - 1].done = 1; /* DONE bit */
if (key->ps_epilog.states.dual_src_blend_swizzle) {
assert(ctx->ac.chip_class >= GFX11);
assert((key->ps_epilog.colors_written & 0x3) == 0x3);
ac_build_dual_src_blend_swizzle(&ctx->ac, &exp.args[first_color_export],
&exp.args[first_color_export + 1]);
}
for (unsigned i = 0; i < exp.num; i++)
ac_build_export(&ctx->ac, &exp.args[i]);
} else {

View File

@ -2217,6 +2217,10 @@ void si_ps_key_update_framebuffer_blend(struct si_context *sctx)
sctx->framebuffer.spi_shader_col_format);
key->ps.part.epilog.spi_shader_col_format &= blend->cb_target_enabled_4bit;
key->ps.part.epilog.dual_src_blend_swizzle = sctx->chip_class >= GFX11 &&
blend->dual_src_blend &&
(sel->info.colors_written_4bit & 0xff) == 0xff;
/* The output for dual source blending should have
* the same format as the first output.
*/