radeonsi: move smoothing to the main shader part to remove 1 live VGPR
The samplemask VGPR that we had to pass to the epilog increased VGPR usage by 1 for all shaders. Do it in the main function by using the mono key structure, which causes on-demand compilation and stall, but we'll save the VGPR. 57794 shaders in 35145 tests Totals: SGPRS: 2715856 -> 2716272 (0.02 %) VGPRS: 1776168 -> 1718432 (-3.25 %) Spilled SGPRs: 3704 -> 3630 (-2.00 %) Spilled VGPRs: 1727 -> 1733 (0.35 %) Private memory VGPRs: 256 -> 256 (0.00 %) Scratch size: 2008 -> 2016 (0.40 %) dwords per thread Code Size: 61429584 -> 61393288 (-0.06 %) bytes Max Waves: 838645 -> 840484 (0.22 %) Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14266>
This commit is contained in:
parent
12b942bd16
commit
198ad7e4dc
|
@ -701,8 +701,6 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader)
|
|||
shader->selector->info.writes_z + shader->selector->info.writes_stencil +
|
||||
shader->selector->info.writes_samplemask + 1 /* SampleMaskIn */;
|
||||
|
||||
num_returns = MAX2(num_returns, num_return_sgprs + PS_EPILOG_SAMPLEMASK_MIN_LOC + 1);
|
||||
|
||||
for (i = 0; i < num_return_sgprs; i++)
|
||||
ac_add_return(&ctx->args, AC_ARG_SGPR);
|
||||
for (; i < num_returns; i++)
|
||||
|
@ -1249,9 +1247,8 @@ static void si_dump_shader_key(const struct si_shader *shader, FILE *f)
|
|||
fprintf(f, " epilog.last_cbuf = %u\n", key->ps.part.epilog.last_cbuf);
|
||||
fprintf(f, " epilog.alpha_func = %u\n", key->ps.part.epilog.alpha_func);
|
||||
fprintf(f, " epilog.alpha_to_one = %u\n", key->ps.part.epilog.alpha_to_one);
|
||||
fprintf(f, " epilog.poly_line_smoothing = %u\n",
|
||||
key->ps.part.epilog.poly_line_smoothing);
|
||||
fprintf(f, " epilog.clamp_color = %u\n", key->ps.part.epilog.clamp_color);
|
||||
fprintf(f, " mono.poly_line_smoothing = %u\n", key->ps.mono.poly_line_smoothing);
|
||||
fprintf(f, " mono.interpolate_at_sample_force_center = %u\n",
|
||||
key->ps.mono.interpolate_at_sample_force_center);
|
||||
fprintf(f, " mono.fbfetch_msaa = %u\n", key->ps.mono.fbfetch_msaa);
|
||||
|
@ -1986,12 +1983,6 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen, struct ac_llvm_
|
|||
assert(G_0286CC_ANCILLARY_ENA(shader->config.spi_ps_input_addr));
|
||||
}
|
||||
|
||||
/* The sample mask input is always enabled, because the API shader always
|
||||
* passes it through to the epilog. Disable it here if it's unused.
|
||||
*/
|
||||
if (!shader->key.ps.part.epilog.poly_line_smoothing && !shader->selector->info.reads_samplemask)
|
||||
shader->config.spi_ps_input_ena &= C_0286CC_SAMPLE_COVERAGE_ENA;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -578,7 +578,6 @@ struct si_ps_epilog_bits {
|
|||
unsigned last_cbuf : 3;
|
||||
unsigned alpha_func : 3;
|
||||
unsigned alpha_to_one : 1;
|
||||
unsigned poly_line_smoothing : 1;
|
||||
unsigned clamp_color : 1;
|
||||
};
|
||||
|
||||
|
@ -708,6 +707,7 @@ struct si_shader_key_ps {
|
|||
|
||||
/* Flags for monolithic compilation only. */
|
||||
struct {
|
||||
unsigned poly_line_smoothing : 1;
|
||||
unsigned interpolate_at_sample_force_center : 1;
|
||||
unsigned fbfetch_msaa : 1;
|
||||
unsigned fbfetch_is_1D : 1;
|
||||
|
|
|
@ -30,11 +30,6 @@
|
|||
|
||||
struct pipe_debug_callback;
|
||||
|
||||
/* Ideally pass the sample mask input to the PS epilog as v14, which
|
||||
* is its usual location, so that the shader doesn't have to add v_mov.
|
||||
*/
|
||||
#define PS_EPILOG_SAMPLEMASK_MIN_LOC 14
|
||||
|
||||
struct si_shader_output_values {
|
||||
LLVMValueRef values[4];
|
||||
ubyte vertex_stream[4];
|
||||
|
|
|
@ -221,23 +221,17 @@ static void si_alpha_test(struct si_shader_context *ctx, LLVMValueRef alpha)
|
|||
}
|
||||
}
|
||||
|
||||
static LLVMValueRef si_scale_alpha_by_sample_mask(struct si_shader_context *ctx, LLVMValueRef alpha,
|
||||
unsigned samplemask_param)
|
||||
static LLVMValueRef si_get_coverage_from_sample_mask(struct si_shader_context *ctx)
|
||||
{
|
||||
LLVMValueRef coverage;
|
||||
|
||||
/* alpha = alpha * popcount(coverage) / SI_NUM_SMOOTH_AA_SAMPLES */
|
||||
coverage = LLVMGetParam(ctx->main_fn, samplemask_param);
|
||||
coverage = LLVMGetParam(ctx->main_fn, SI_PARAM_SAMPLE_COVERAGE);
|
||||
coverage = ac_build_bit_count(&ctx->ac, ac_to_integer(&ctx->ac, coverage));
|
||||
coverage = LLVMBuildUIToFP(ctx->ac.builder, coverage, ctx->ac.f32, "");
|
||||
|
||||
coverage = LLVMBuildFMul(ctx->ac.builder, coverage,
|
||||
LLVMConstReal(ctx->ac.f32, 1.0 / SI_NUM_SMOOTH_AA_SAMPLES), "");
|
||||
|
||||
if (LLVMTypeOf(alpha) == ctx->ac.f16)
|
||||
coverage = LLVMBuildFPTrunc(ctx->ac.builder, coverage, ctx->ac.f16, "");
|
||||
|
||||
return LLVMBuildFMul(ctx->ac.builder, alpha, coverage, "");
|
||||
return LLVMBuildFMul(ctx->ac.builder, coverage,
|
||||
LLVMConstReal(ctx->ac.f32, 1.0 / SI_NUM_SMOOTH_AA_SAMPLES), "");
|
||||
}
|
||||
|
||||
struct si_ps_exports {
|
||||
|
@ -407,8 +401,8 @@ static bool si_llvm_init_ps_export_args(struct si_shader_context *ctx, LLVMValue
|
|||
}
|
||||
|
||||
static void si_export_mrt_color(struct si_shader_context *ctx, LLVMValueRef *color, unsigned index,
|
||||
unsigned first_color_export, unsigned samplemask_param,
|
||||
unsigned color_type, struct si_ps_exports *exp)
|
||||
unsigned first_color_export, unsigned color_type,
|
||||
struct si_ps_exports *exp)
|
||||
{
|
||||
int i;
|
||||
|
||||
|
@ -425,10 +419,6 @@ static void si_export_mrt_color(struct si_shader_context *ctx, LLVMValueRef *col
|
|||
if (index == 0 && ctx->shader->key.ps.part.epilog.alpha_func != PIPE_FUNC_ALWAYS)
|
||||
si_alpha_test(ctx, color[3]);
|
||||
|
||||
/* Line & polygon smoothing */
|
||||
if (ctx->shader->key.ps.part.epilog.poly_line_smoothing)
|
||||
color[3] = si_scale_alpha_by_sample_mask(ctx, color[3], samplemask_param);
|
||||
|
||||
/* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */
|
||||
if (ctx->shader->key.ps.part.epilog.last_cbuf > 0) {
|
||||
assert(exp->num == first_color_export);
|
||||
|
@ -470,7 +460,7 @@ static void si_llvm_return_fs_outputs(struct ac_shader_abi *abi)
|
|||
struct si_shader *shader = ctx->shader;
|
||||
struct si_shader_info *info = &shader->selector->info;
|
||||
LLVMBuilderRef builder = ctx->ac.builder;
|
||||
unsigned i, j, first_vgpr, vgpr;
|
||||
unsigned i, j, vgpr;
|
||||
LLVMValueRef *addrs = abi->outputs;
|
||||
|
||||
LLVMValueRef color[8][4] = {};
|
||||
|
@ -507,6 +497,10 @@ static void si_llvm_return_fs_outputs(struct ac_shader_abi *abi)
|
|||
}
|
||||
}
|
||||
|
||||
LLVMValueRef smoothing_coverage = NULL;
|
||||
if (ctx->shader->key.ps.mono.poly_line_smoothing)
|
||||
smoothing_coverage = si_get_coverage_from_sample_mask(ctx);
|
||||
|
||||
/* Fill the return structure. */
|
||||
ret = ctx->return_value;
|
||||
|
||||
|
@ -516,12 +510,17 @@ static void si_llvm_return_fs_outputs(struct ac_shader_abi *abi)
|
|||
SI_SGPR_ALPHA_REF, "");
|
||||
|
||||
/* Set VGPRs */
|
||||
first_vgpr = vgpr = SI_SGPR_ALPHA_REF + 1;
|
||||
vgpr = SI_SGPR_ALPHA_REF + 1;
|
||||
for (i = 0; i < ARRAY_SIZE(color); i++) {
|
||||
if (!color[i][0])
|
||||
continue;
|
||||
|
||||
if (LLVMTypeOf(color[i][0]) == ctx->ac.f16) {
|
||||
if (smoothing_coverage) {
|
||||
color[i][3] = LLVMBuildFMul(builder, color[i][3],
|
||||
LLVMBuildFPTrunc(builder, smoothing_coverage, ctx->ac.f16, ""), "");
|
||||
}
|
||||
|
||||
for (j = 0; j < 2; j++) {
|
||||
LLVMValueRef tmp = ac_build_gather_values(&ctx->ac, &color[i][j * 2], 2);
|
||||
tmp = LLVMBuildBitCast(builder, tmp, ctx->ac.f32, "");
|
||||
|
@ -529,6 +528,9 @@ static void si_llvm_return_fs_outputs(struct ac_shader_abi *abi)
|
|||
}
|
||||
vgpr += 2;
|
||||
} else {
|
||||
if (smoothing_coverage)
|
||||
color[i][3] = LLVMBuildFMul(builder, color[i][3], smoothing_coverage, "");
|
||||
|
||||
for (j = 0; j < 4; j++)
|
||||
ret = LLVMBuildInsertValue(builder, ret, color[i][j], vgpr++, "");
|
||||
}
|
||||
|
@ -540,12 +542,6 @@ static void si_llvm_return_fs_outputs(struct ac_shader_abi *abi)
|
|||
if (samplemask)
|
||||
ret = LLVMBuildInsertValue(builder, ret, samplemask, vgpr++, "");
|
||||
|
||||
/* Add the input sample mask for smoothing at the end. */
|
||||
if (vgpr < first_vgpr + PS_EPILOG_SAMPLEMASK_MIN_LOC)
|
||||
vgpr = first_vgpr + PS_EPILOG_SAMPLEMASK_MIN_LOC;
|
||||
ret = LLVMBuildInsertValue(builder, ret, LLVMGetParam(ctx->main_fn, SI_PARAM_SAMPLE_COVERAGE),
|
||||
vgpr++, "");
|
||||
|
||||
ctx->return_value = ret;
|
||||
}
|
||||
|
||||
|
@ -860,9 +856,6 @@ void si_llvm_build_ps_epilog(struct si_shader_context *ctx, union si_shader_part
|
|||
ctx->args.num_sgprs_used + util_bitcount(key->ps_epilog.colors_written) * 4 +
|
||||
key->ps_epilog.writes_z + key->ps_epilog.writes_stencil + key->ps_epilog.writes_samplemask;
|
||||
|
||||
required_num_params =
|
||||
MAX2(required_num_params, ctx->args.num_sgprs_used + PS_EPILOG_SAMPLEMASK_MIN_LOC + 1);
|
||||
|
||||
while (ctx->args.arg_count < required_num_params)
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL);
|
||||
|
||||
|
@ -911,8 +904,7 @@ void si_llvm_build_ps_epilog(struct si_shader_context *ctx, union si_shader_part
|
|||
color[i] = LLVMGetParam(ctx->main_fn, vgpr++);
|
||||
}
|
||||
|
||||
si_export_mrt_color(ctx, color, output_index, first_color_export,
|
||||
ctx->args.arg_count - 1, color_type, &exp);
|
||||
si_export_mrt_color(ctx, color, output_index, first_color_export, color_type, &exp);
|
||||
}
|
||||
|
||||
if (exp.num) {
|
||||
|
|
|
@ -275,8 +275,8 @@ static bool si_update_shaders(struct si_context *sctx)
|
|||
si_mark_atom_dirty(sctx, &sctx->atoms.s.cb_render_state);
|
||||
|
||||
if (sctx->smoothing_enabled !=
|
||||
sctx->shader.ps.current->key.ps.part.epilog.poly_line_smoothing) {
|
||||
sctx->smoothing_enabled = sctx->shader.ps.current->key.ps.part.epilog.poly_line_smoothing;
|
||||
sctx->shader.ps.current->key.ps.mono.poly_line_smoothing) {
|
||||
sctx->smoothing_enabled = sctx->shader.ps.current->key.ps.mono.poly_line_smoothing;
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
|
||||
|
||||
/* NGG cull state uses smoothing_enabled. */
|
||||
|
|
|
@ -2196,7 +2196,7 @@ static void si_ps_key_update_primtype_shader_rasterizer_framebuffer(struct si_co
|
|||
bool is_line = util_prim_is_lines(sctx->current_rast_prim);
|
||||
|
||||
key->ps.part.prolog.poly_stipple = rs->poly_stipple_enable && is_poly;
|
||||
key->ps.part.epilog.poly_line_smoothing =
|
||||
key->ps.mono.poly_line_smoothing =
|
||||
((is_poly && rs->poly_smooth) || (is_line && rs->line_smooth)) &&
|
||||
sctx->framebuffer.nr_samples <= 1;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue