diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index d2d5718f201..5346dbb5cf9 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -338,8 +338,9 @@ struct si_shader_info { ubyte input_semantic[PIPE_MAX_SHADER_INPUTS]; ubyte input_interpolate[PIPE_MAX_SHADER_INPUTS]; ubyte input_usage_mask[PIPE_MAX_SHADER_INPUTS]; + ubyte input_fp16_lo_hi_valid[PIPE_MAX_SHADER_INPUTS]; ubyte output_semantic[PIPE_MAX_SHADER_OUTPUTS]; - char output_semantic_to_slot[VARYING_SLOT_TESS_MAX]; + char output_semantic_to_slot[VARYING_SLOT_VAR15_16BIT + 1]; ubyte output_usagemask[PIPE_MAX_SHADER_OUTPUTS]; ubyte output_readmask[PIPE_MAX_SHADER_OUTPUTS]; ubyte output_streams[PIPE_MAX_SHADER_OUTPUTS]; diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index f1ff4044e2d..bbd02749d08 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -502,7 +502,9 @@ static bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader * for (unsigned i = 0; i < info->num_outputs; i++) { LLVMTypeRef type = ctx->ac.f32; - if (nir_alu_type_get_type_size(ctx->shader->selector->info.output_type[i]) == 16) + /* Only FS uses unpacked f16. Other stages pack 16-bit outputs into low and high bits of f32. */ + if (nir->info.stage == MESA_SHADER_FRAGMENT && + nir_alu_type_get_type_size(ctx->shader->selector->info.output_type[i]) == 16) type = ctx->ac.f16; for (unsigned j = 0; j < 4; j++) diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index d48c8483c4e..68f6e4295c0 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -115,6 +115,12 @@ static void scan_io_usage(struct si_shader_info *info, nir_intrinsic_instr *intr if (mask) { info->input_usage_mask[loc] |= mask; + if (bit_size == 16) { + if (nir_intrinsic_io_semantics(intr).high_16bits) + info->input_fp16_lo_hi_valid[loc] |= 0x2; + else + info->input_fp16_lo_hi_valid[loc] |= 0x1; + } info->num_inputs = MAX2(info->num_inputs, loc + 1); } } @@ -796,10 +802,15 @@ static void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir) NIR_PASS_V(nir, nir_lower_compute_system_values, &options); } - if (nir->info.stage == MESA_SHADER_FRAGMENT && - sscreen->info.has_packed_math_16bit && - sscreen->b.get_shader_param(&sscreen->b, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_FP16)) - NIR_PASS_V(nir, nir_lower_mediump_io, nir_var_shader_out, 0, false); + if (sscreen->b.get_shader_param(&sscreen->b, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_FP16)) { + NIR_PASS_V(nir, nir_lower_mediump_io, + /* TODO: LLVM fails to compile this test if VS inputs are 16-bit: + * dEQP-GLES31.functional.shaders.builtin_functions.integer.bitfieldinsert.uvec3_lowp_geometry + */ + (nir->info.stage != MESA_SHADER_VERTEX ? nir_var_shader_in : 0) | nir_var_shader_out, + BITFIELD64_BIT(VARYING_SLOT_PNTC) | BITFIELD64_RANGE(VARYING_SLOT_VAR0, 32), + true); + } si_nir_opts(sscreen, nir, true); diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index c149f1c122e..90ad60851a0 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -3336,7 +3336,8 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state) } static unsigned si_get_ps_input_cntl(struct si_context *sctx, struct si_shader *vs, - unsigned semantic, enum glsl_interp_mode interpolate) + unsigned semantic, enum glsl_interp_mode interpolate, + ubyte fp16_lo_hi_mask) { struct si_shader_info *vsinfo = &vs->selector->info; unsigned offset, ps_input_cntl = 0; @@ -3350,6 +3351,10 @@ static unsigned si_get_ps_input_cntl(struct si_context *sctx, struct si_shader * (semantic >= VARYING_SLOT_TEX0 && semantic <= VARYING_SLOT_TEX7 && sctx->sprite_coord_enable & (1 << (semantic - VARYING_SLOT_TEX0)))) { ps_input_cntl |= S_028644_PT_SPRITE_TEX(1); + if (fp16_lo_hi_mask & 0x1) { + ps_input_cntl |= S_028644_FP16_INTERP_MODE(1) | + S_028644_ATTR0_VALID(1); + } } int vs_slot = vsinfo->output_semantic_to_slot[semantic]; @@ -3372,6 +3377,16 @@ static unsigned si_get_ps_input_cntl(struct si_context *sctx, struct si_shader * ps_input_cntl = S_028644_OFFSET(0x20) | S_028644_DEFAULT_VAL(offset); } + + if (fp16_lo_hi_mask && !G_028644_PT_SPRITE_TEX(ps_input_cntl)) { + assert(offset <= AC_EXP_PARAM_OFFSET_31 || offset == AC_EXP_PARAM_DEFAULT_VAL_0000); + + ps_input_cntl |= S_028644_FP16_INTERP_MODE(1) | + S_028644_USE_DEFAULT_ATTR1(offset == AC_EXP_PARAM_DEFAULT_VAL_0000) | + S_028644_DEFAULT_VAL_ATTR1(0) | + S_028644_ATTR0_VALID(1) | /* this must be set if FP16_INTERP_MODE is set */ + S_028644_ATTR1_VALID(!!(fp16_lo_hi_mask & 0x2)); + } } else { /* VS output not found. */ if (semantic == VARYING_SLOT_PRIMITIVE_ID) { @@ -3414,8 +3429,10 @@ static void si_emit_spi_map(struct si_context *sctx) for (i = 0; i < psinfo->num_inputs; i++) { unsigned semantic = psinfo->input_semantic[i]; unsigned interpolate = psinfo->input_interpolate[i]; + ubyte fp16_lo_hi_mask = psinfo->input_fp16_lo_hi_valid[i]; - spi_ps_input_cntl[num_written++] = si_get_ps_input_cntl(sctx, vs, semantic, interpolate); + spi_ps_input_cntl[num_written++] = si_get_ps_input_cntl(sctx, vs, semantic, interpolate, + fp16_lo_hi_mask); } if (ps->key.part.ps.prolog.color_two_side) { @@ -3425,7 +3442,8 @@ static void si_emit_spi_map(struct si_context *sctx) unsigned semantic = VARYING_SLOT_BFC0 + i; spi_ps_input_cntl[num_written++] = si_get_ps_input_cntl(sctx, vs, semantic, - psinfo->color_interpolate[i]); + psinfo->color_interpolate[i], + false); } } assert(num_interp == num_written);