radeonsi: implement 16-bit VS->PS varyings

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9051>
This commit is contained in:
Marek Olšák 2021-02-10 11:08:25 -05:00
parent 1dff495057
commit 7db43960f6
4 changed files with 41 additions and 9 deletions

View File

@ -338,8 +338,9 @@ struct si_shader_info {
ubyte input_semantic[PIPE_MAX_SHADER_INPUTS];
ubyte input_interpolate[PIPE_MAX_SHADER_INPUTS];
ubyte input_usage_mask[PIPE_MAX_SHADER_INPUTS];
ubyte input_fp16_lo_hi_valid[PIPE_MAX_SHADER_INPUTS];
ubyte output_semantic[PIPE_MAX_SHADER_OUTPUTS];
char output_semantic_to_slot[VARYING_SLOT_TESS_MAX];
char output_semantic_to_slot[VARYING_SLOT_VAR15_16BIT + 1];
ubyte output_usagemask[PIPE_MAX_SHADER_OUTPUTS];
ubyte output_readmask[PIPE_MAX_SHADER_OUTPUTS];
ubyte output_streams[PIPE_MAX_SHADER_OUTPUTS];

View File

@ -502,7 +502,9 @@ static bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *
for (unsigned i = 0; i < info->num_outputs; i++) {
LLVMTypeRef type = ctx->ac.f32;
if (nir_alu_type_get_type_size(ctx->shader->selector->info.output_type[i]) == 16)
/* Only FS uses unpacked f16. Other stages pack 16-bit outputs into low and high bits of f32. */
if (nir->info.stage == MESA_SHADER_FRAGMENT &&
nir_alu_type_get_type_size(ctx->shader->selector->info.output_type[i]) == 16)
type = ctx->ac.f16;
for (unsigned j = 0; j < 4; j++)

View File

@ -115,6 +115,12 @@ static void scan_io_usage(struct si_shader_info *info, nir_intrinsic_instr *intr
if (mask) {
info->input_usage_mask[loc] |= mask;
if (bit_size == 16) {
if (nir_intrinsic_io_semantics(intr).high_16bits)
info->input_fp16_lo_hi_valid[loc] |= 0x2;
else
info->input_fp16_lo_hi_valid[loc] |= 0x1;
}
info->num_inputs = MAX2(info->num_inputs, loc + 1);
}
}
@ -796,10 +802,15 @@ static void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir)
NIR_PASS_V(nir, nir_lower_compute_system_values, &options);
}
if (nir->info.stage == MESA_SHADER_FRAGMENT &&
sscreen->info.has_packed_math_16bit &&
sscreen->b.get_shader_param(&sscreen->b, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_FP16))
NIR_PASS_V(nir, nir_lower_mediump_io, nir_var_shader_out, 0, false);
if (sscreen->b.get_shader_param(&sscreen->b, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_FP16)) {
NIR_PASS_V(nir, nir_lower_mediump_io,
/* TODO: LLVM fails to compile this test if VS inputs are 16-bit:
* dEQP-GLES31.functional.shaders.builtin_functions.integer.bitfieldinsert.uvec3_lowp_geometry
*/
(nir->info.stage != MESA_SHADER_VERTEX ? nir_var_shader_in : 0) | nir_var_shader_out,
BITFIELD64_BIT(VARYING_SLOT_PNTC) | BITFIELD64_RANGE(VARYING_SLOT_VAR0, 32),
true);
}
si_nir_opts(sscreen, nir, true);

View File

@ -3336,7 +3336,8 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
}
static unsigned si_get_ps_input_cntl(struct si_context *sctx, struct si_shader *vs,
unsigned semantic, enum glsl_interp_mode interpolate)
unsigned semantic, enum glsl_interp_mode interpolate,
ubyte fp16_lo_hi_mask)
{
struct si_shader_info *vsinfo = &vs->selector->info;
unsigned offset, ps_input_cntl = 0;
@ -3350,6 +3351,10 @@ static unsigned si_get_ps_input_cntl(struct si_context *sctx, struct si_shader *
(semantic >= VARYING_SLOT_TEX0 && semantic <= VARYING_SLOT_TEX7 &&
sctx->sprite_coord_enable & (1 << (semantic - VARYING_SLOT_TEX0)))) {
ps_input_cntl |= S_028644_PT_SPRITE_TEX(1);
if (fp16_lo_hi_mask & 0x1) {
ps_input_cntl |= S_028644_FP16_INTERP_MODE(1) |
S_028644_ATTR0_VALID(1);
}
}
int vs_slot = vsinfo->output_semantic_to_slot[semantic];
@ -3372,6 +3377,16 @@ static unsigned si_get_ps_input_cntl(struct si_context *sctx, struct si_shader *
ps_input_cntl = S_028644_OFFSET(0x20) | S_028644_DEFAULT_VAL(offset);
}
if (fp16_lo_hi_mask && !G_028644_PT_SPRITE_TEX(ps_input_cntl)) {
assert(offset <= AC_EXP_PARAM_OFFSET_31 || offset == AC_EXP_PARAM_DEFAULT_VAL_0000);
ps_input_cntl |= S_028644_FP16_INTERP_MODE(1) |
S_028644_USE_DEFAULT_ATTR1(offset == AC_EXP_PARAM_DEFAULT_VAL_0000) |
S_028644_DEFAULT_VAL_ATTR1(0) |
S_028644_ATTR0_VALID(1) | /* this must be set if FP16_INTERP_MODE is set */
S_028644_ATTR1_VALID(!!(fp16_lo_hi_mask & 0x2));
}
} else {
/* VS output not found. */
if (semantic == VARYING_SLOT_PRIMITIVE_ID) {
@ -3414,8 +3429,10 @@ static void si_emit_spi_map(struct si_context *sctx)
for (i = 0; i < psinfo->num_inputs; i++) {
unsigned semantic = psinfo->input_semantic[i];
unsigned interpolate = psinfo->input_interpolate[i];
ubyte fp16_lo_hi_mask = psinfo->input_fp16_lo_hi_valid[i];
spi_ps_input_cntl[num_written++] = si_get_ps_input_cntl(sctx, vs, semantic, interpolate);
spi_ps_input_cntl[num_written++] = si_get_ps_input_cntl(sctx, vs, semantic, interpolate,
fp16_lo_hi_mask);
}
if (ps->key.part.ps.prolog.color_two_side) {
@ -3425,7 +3442,8 @@ static void si_emit_spi_map(struct si_context *sctx)
unsigned semantic = VARYING_SLOT_BFC0 + i;
spi_ps_input_cntl[num_written++] = si_get_ps_input_cntl(sctx, vs, semantic,
psinfo->color_interpolate[i]);
psinfo->color_interpolate[i],
false);
}
}
assert(num_interp == num_written);