radeonsi: move PS shader key code into a separate function

There is reordering and new comments.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12343>
This commit is contained in:
Marek Olšák 2021-08-10 02:32:19 -04:00 committed by Marge Bot
parent a912c80439
commit 46bda71a54
1 changed files with 152 additions and 134 deletions

View File

@ -1874,6 +1874,156 @@ static void si_shader_selector_key_hw_vs(struct si_context *sctx, struct si_shad
key->opt.kill_pointsize = 1;
}
static void si_update_ps_shader_key(struct si_context *sctx)
{
struct si_shader_selector *sel = sctx->shader.ps.cso;
struct si_shader_key *key = &sctx->shader.ps.key;
memset(&key->part, 0, sizeof(key->part));
memset(&key->mono, 0, sizeof(key->mono));
memset(&key->opt, 0, sizeof(key->opt));
/** Framebuffer dependencies. */
if (sel->info.color0_writes_all_cbufs &&
sel->info.colors_written == 0x1)
key->part.ps.epilog.last_cbuf = MAX2(sctx->framebuffer.state.nr_cbufs, 1) - 1;
/* ps_uses_fbfetch is true only if the color buffer is bound. */
if (sctx->ps_uses_fbfetch && !sctx->blitter_running) {
struct pipe_surface *cb0 = sctx->framebuffer.state.cbufs[0];
struct pipe_resource *tex = cb0->texture;
/* 1D textures are allocated and used as 2D on GFX9. */
key->mono.u.ps.fbfetch_msaa = sctx->framebuffer.nr_samples > 1;
key->mono.u.ps.fbfetch_is_1D =
sctx->chip_class != GFX9 &&
(tex->target == PIPE_TEXTURE_1D || tex->target == PIPE_TEXTURE_1D_ARRAY);
key->mono.u.ps.fbfetch_layered =
tex->target == PIPE_TEXTURE_1D_ARRAY || tex->target == PIPE_TEXTURE_2D_ARRAY ||
tex->target == PIPE_TEXTURE_CUBE || tex->target == PIPE_TEXTURE_CUBE_ARRAY ||
tex->target == PIPE_TEXTURE_3D;
}
/** Framebuffer and blend dependencies. */
/* Select the shader color format based on whether
* blending or alpha are needed.
*/
struct si_state_blend *blend = sctx->queued.named.blend;
key->part.ps.epilog.spi_shader_col_format =
(blend->blend_enable_4bit & blend->need_src_alpha_4bit &
sctx->framebuffer.spi_shader_col_format_blend_alpha) |
(blend->blend_enable_4bit & ~blend->need_src_alpha_4bit &
sctx->framebuffer.spi_shader_col_format_blend) |
(~blend->blend_enable_4bit & blend->need_src_alpha_4bit &
sctx->framebuffer.spi_shader_col_format_alpha) |
(~blend->blend_enable_4bit & ~blend->need_src_alpha_4bit &
sctx->framebuffer.spi_shader_col_format);
key->part.ps.epilog.spi_shader_col_format &= blend->cb_target_enabled_4bit;
/* The output for dual source blending should have
* the same format as the first output.
*/
if (blend->dual_src_blend) {
key->part.ps.epilog.spi_shader_col_format |=
(key->part.ps.epilog.spi_shader_col_format & 0xf) << 4;
}
/* If alpha-to-coverage is enabled, we have to export alpha
* even if there is no color buffer.
*/
if (!(key->part.ps.epilog.spi_shader_col_format & 0xf) && blend->alpha_to_coverage)
key->part.ps.epilog.spi_shader_col_format |= V_028710_SPI_SHADER_32_AR;
/* On GFX6 and GFX7 except Hawaii, the CB doesn't clamp outputs
* to the range supported by the type if a channel has less
* than 16 bits and the export format is 16_ABGR.
*/
if (sctx->chip_class <= GFX7 && sctx->family != CHIP_HAWAII) {
key->part.ps.epilog.color_is_int8 = sctx->framebuffer.color_is_int8;
key->part.ps.epilog.color_is_int10 = sctx->framebuffer.color_is_int10;
}
/* Disable unwritten outputs (if WRITE_ALL_CBUFS isn't enabled). */
if (!key->part.ps.epilog.last_cbuf) {
key->part.ps.epilog.spi_shader_col_format &= sel->colors_written_4bit;
key->part.ps.epilog.color_is_int8 &= sel->info.colors_written;
key->part.ps.epilog.color_is_int10 &= sel->info.colors_written;
}
/* Eliminate shader code computing output values that are unused.
* This enables dead code elimination between shader parts.
* Check if any output is eliminated.
*/
if (sel->colors_written_4bit &
~(sctx->framebuffer.colorbuf_enabled_4bit & blend->cb_target_enabled_4bit))
key->opt.prefer_mono = 1;
/** Primitive type and shader dependencies. */
bool is_poly = !util_prim_is_points_or_lines(sctx->current_rast_prim);
bool is_line = util_prim_is_lines(sctx->current_rast_prim);
/** Blend and rasterizer dependencies. */
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
key->part.ps.epilog.alpha_to_one = blend->alpha_to_one && rs->multisample_enable;
/** Rasterizer dependencies. */
key->part.ps.prolog.color_two_side = rs->two_side && sel->info.colors_read;
key->part.ps.prolog.flatshade_colors = rs->flatshade && sel->info.uses_interp_color;
key->part.ps.epilog.clamp_color = rs->clamp_fragment_color;
/** Primitive type, shader, and rasterizer dependencies. */
key->part.ps.prolog.poly_stipple = rs->poly_stipple_enable && is_poly;
/** Primitive type, shader, rasterizer, and framebuffer dependencies. */
key->part.ps.epilog.poly_line_smoothing =
((is_poly && rs->poly_smooth) || (is_line && rs->line_smooth)) &&
sctx->framebuffer.nr_samples <= 1;
/** Sample shading dependencies. */
if (sctx->ps_iter_samples > 1 && sel->info.reads_samplemask)
key->part.ps.prolog.samplemask_log_ps_iter = util_logbase2(sctx->ps_iter_samples);
/** Framebuffer, rasterizer, and sample shading dependencies. */
bool uses_persp_center = sel->info.uses_persp_center ||
(!rs->flatshade && sel->info.uses_persp_center_color);
bool uses_persp_centroid = sel->info.uses_persp_centroid ||
(!rs->flatshade && sel->info.uses_persp_centroid_color);
bool uses_persp_sample = sel->info.uses_persp_sample ||
(!rs->flatshade && sel->info.uses_persp_sample_color);
if (rs->force_persample_interp && rs->multisample_enable &&
sctx->framebuffer.nr_samples > 1 && sctx->ps_iter_samples > 1) {
key->part.ps.prolog.force_persp_sample_interp =
uses_persp_center || uses_persp_centroid;
key->part.ps.prolog.force_linear_sample_interp =
sel->info.uses_linear_center || sel->info.uses_linear_centroid;
} else if (rs->multisample_enable && sctx->framebuffer.nr_samples > 1) {
key->part.ps.prolog.bc_optimize_for_persp =
uses_persp_center && uses_persp_centroid;
key->part.ps.prolog.bc_optimize_for_linear =
sel->info.uses_linear_center && sel->info.uses_linear_centroid;
} else {
/* Make sure SPI doesn't compute more than 1 pair
* of (i,j), which is the optimization here. */
key->part.ps.prolog.force_persp_center_interp = uses_persp_center +
uses_persp_centroid +
uses_persp_sample > 1;
key->part.ps.prolog.force_linear_center_interp = sel->info.uses_linear_center +
sel->info.uses_linear_centroid +
sel->info.uses_linear_sample > 1;
if (sel->info.uses_interp_at_sample)
key->mono.u.ps.interpolate_at_sample_force_center = 1;
}
/** DSA dependencies. */
key->part.ps.epilog.alpha_func = sctx->queued.named.dsa->alpha_func;
}
/* Compute the key for the hw shader variant */
static inline void si_shader_selector_key(struct pipe_context *ctx, struct si_shader_selector *sel,
struct si_shader_key *key)
@ -1966,141 +2116,9 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, struct si_sh
}
key->part.gs.prolog.tri_strip_adj_fix = sctx->gs_tri_strip_adj_fix;
break;
case MESA_SHADER_FRAGMENT: {
memset(&key->part, 0, sizeof(key->part));
memset(&key->mono, 0, sizeof(key->mono));
memset(&key->opt, 0, sizeof(key->opt));
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
struct si_state_blend *blend = sctx->queued.named.blend;
if (sel->info.color0_writes_all_cbufs &&
sel->info.colors_written == 0x1)
key->part.ps.epilog.last_cbuf = MAX2(sctx->framebuffer.state.nr_cbufs, 1) - 1;
/* Select the shader color format based on whether
* blending or alpha are needed.
*/
key->part.ps.epilog.spi_shader_col_format =
(blend->blend_enable_4bit & blend->need_src_alpha_4bit &
sctx->framebuffer.spi_shader_col_format_blend_alpha) |
(blend->blend_enable_4bit & ~blend->need_src_alpha_4bit &
sctx->framebuffer.spi_shader_col_format_blend) |
(~blend->blend_enable_4bit & blend->need_src_alpha_4bit &
sctx->framebuffer.spi_shader_col_format_alpha) |
(~blend->blend_enable_4bit & ~blend->need_src_alpha_4bit &
sctx->framebuffer.spi_shader_col_format);
key->part.ps.epilog.spi_shader_col_format &= blend->cb_target_enabled_4bit;
/* The output for dual source blending should have
* the same format as the first output.
*/
if (blend->dual_src_blend) {
key->part.ps.epilog.spi_shader_col_format |=
(key->part.ps.epilog.spi_shader_col_format & 0xf) << 4;
}
/* If alpha-to-coverage is enabled, we have to export alpha
* even if there is no color buffer.
*/
if (!(key->part.ps.epilog.spi_shader_col_format & 0xf) && blend->alpha_to_coverage)
key->part.ps.epilog.spi_shader_col_format |= V_028710_SPI_SHADER_32_AR;
/* On GFX6 and GFX7 except Hawaii, the CB doesn't clamp outputs
* to the range supported by the type if a channel has less
* than 16 bits and the export format is 16_ABGR.
*/
if (sctx->chip_class <= GFX7 && sctx->family != CHIP_HAWAII) {
key->part.ps.epilog.color_is_int8 = sctx->framebuffer.color_is_int8;
key->part.ps.epilog.color_is_int10 = sctx->framebuffer.color_is_int10;
}
/* Disable unwritten outputs (if WRITE_ALL_CBUFS isn't enabled). */
if (!key->part.ps.epilog.last_cbuf) {
key->part.ps.epilog.spi_shader_col_format &= sel->colors_written_4bit;
key->part.ps.epilog.color_is_int8 &= sel->info.colors_written;
key->part.ps.epilog.color_is_int10 &= sel->info.colors_written;
}
/* Eliminate shader code computing output values that are unused.
* This enables dead code elimination between shader parts.
* Check if any output is eliminated.
*/
if (sel->colors_written_4bit &
~(sctx->framebuffer.colorbuf_enabled_4bit & blend->cb_target_enabled_4bit))
key->opt.prefer_mono = 1;
bool is_poly = !util_prim_is_points_or_lines(sctx->current_rast_prim);
bool is_line = util_prim_is_lines(sctx->current_rast_prim);
key->part.ps.prolog.color_two_side = rs->two_side && sel->info.colors_read;
key->part.ps.prolog.flatshade_colors = rs->flatshade && sel->info.uses_interp_color;
key->part.ps.epilog.alpha_to_one = blend->alpha_to_one && rs->multisample_enable;
key->part.ps.prolog.poly_stipple = rs->poly_stipple_enable && is_poly;
key->part.ps.epilog.poly_line_smoothing =
((is_poly && rs->poly_smooth) || (is_line && rs->line_smooth)) &&
sctx->framebuffer.nr_samples <= 1;
key->part.ps.epilog.clamp_color = rs->clamp_fragment_color;
if (sctx->ps_iter_samples > 1 && sel->info.reads_samplemask) {
key->part.ps.prolog.samplemask_log_ps_iter = util_logbase2(sctx->ps_iter_samples);
}
bool uses_persp_center = sel->info.uses_persp_center ||
(!rs->flatshade && sel->info.uses_persp_center_color);
bool uses_persp_centroid = sel->info.uses_persp_centroid ||
(!rs->flatshade && sel->info.uses_persp_centroid_color);
bool uses_persp_sample = sel->info.uses_persp_sample ||
(!rs->flatshade && sel->info.uses_persp_sample_color);
if (rs->force_persample_interp && rs->multisample_enable &&
sctx->framebuffer.nr_samples > 1 && sctx->ps_iter_samples > 1) {
key->part.ps.prolog.force_persp_sample_interp =
uses_persp_center || uses_persp_centroid;
key->part.ps.prolog.force_linear_sample_interp =
sel->info.uses_linear_center || sel->info.uses_linear_centroid;
} else if (rs->multisample_enable && sctx->framebuffer.nr_samples > 1) {
key->part.ps.prolog.bc_optimize_for_persp =
uses_persp_center && uses_persp_centroid;
key->part.ps.prolog.bc_optimize_for_linear =
sel->info.uses_linear_center && sel->info.uses_linear_centroid;
} else {
/* Make sure SPI doesn't compute more than 1 pair
* of (i,j), which is the optimization here. */
key->part.ps.prolog.force_persp_center_interp = uses_persp_center +
uses_persp_centroid +
uses_persp_sample > 1;
key->part.ps.prolog.force_linear_center_interp = sel->info.uses_linear_center +
sel->info.uses_linear_centroid +
sel->info.uses_linear_sample > 1;
if (sel->info.uses_interp_at_sample)
key->mono.u.ps.interpolate_at_sample_force_center = 1;
}
key->part.ps.epilog.alpha_func = sctx->queued.named.dsa->alpha_func;
/* ps_uses_fbfetch is true only if the color buffer is bound. */
if (sctx->ps_uses_fbfetch && !sctx->blitter_running) {
struct pipe_surface *cb0 = sctx->framebuffer.state.cbufs[0];
struct pipe_resource *tex = cb0->texture;
/* 1D textures are allocated and used as 2D on GFX9. */
key->mono.u.ps.fbfetch_msaa = sctx->framebuffer.nr_samples > 1;
key->mono.u.ps.fbfetch_is_1D =
sctx->chip_class != GFX9 &&
(tex->target == PIPE_TEXTURE_1D || tex->target == PIPE_TEXTURE_1D_ARRAY);
key->mono.u.ps.fbfetch_layered =
tex->target == PIPE_TEXTURE_1D_ARRAY || tex->target == PIPE_TEXTURE_2D_ARRAY ||
tex->target == PIPE_TEXTURE_CUBE || tex->target == PIPE_TEXTURE_CUBE_ARRAY ||
tex->target == PIPE_TEXTURE_3D;
}
case MESA_SHADER_FRAGMENT:
si_update_ps_shader_key(sctx);
break;
}
default:
assert(0);
}