radeonsi: implement 16-bit VS->PS varyings
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9051>
This commit is contained in:
parent
1dff495057
commit
7db43960f6
|
@ -338,8 +338,9 @@ struct si_shader_info {
|
||||||
ubyte input_semantic[PIPE_MAX_SHADER_INPUTS];
|
ubyte input_semantic[PIPE_MAX_SHADER_INPUTS];
|
||||||
ubyte input_interpolate[PIPE_MAX_SHADER_INPUTS];
|
ubyte input_interpolate[PIPE_MAX_SHADER_INPUTS];
|
||||||
ubyte input_usage_mask[PIPE_MAX_SHADER_INPUTS];
|
ubyte input_usage_mask[PIPE_MAX_SHADER_INPUTS];
|
||||||
|
ubyte input_fp16_lo_hi_valid[PIPE_MAX_SHADER_INPUTS];
|
||||||
ubyte output_semantic[PIPE_MAX_SHADER_OUTPUTS];
|
ubyte output_semantic[PIPE_MAX_SHADER_OUTPUTS];
|
||||||
char output_semantic_to_slot[VARYING_SLOT_TESS_MAX];
|
char output_semantic_to_slot[VARYING_SLOT_VAR15_16BIT + 1];
|
||||||
ubyte output_usagemask[PIPE_MAX_SHADER_OUTPUTS];
|
ubyte output_usagemask[PIPE_MAX_SHADER_OUTPUTS];
|
||||||
ubyte output_readmask[PIPE_MAX_SHADER_OUTPUTS];
|
ubyte output_readmask[PIPE_MAX_SHADER_OUTPUTS];
|
||||||
ubyte output_streams[PIPE_MAX_SHADER_OUTPUTS];
|
ubyte output_streams[PIPE_MAX_SHADER_OUTPUTS];
|
||||||
|
|
|
@ -502,7 +502,9 @@ static bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *
|
||||||
for (unsigned i = 0; i < info->num_outputs; i++) {
|
for (unsigned i = 0; i < info->num_outputs; i++) {
|
||||||
LLVMTypeRef type = ctx->ac.f32;
|
LLVMTypeRef type = ctx->ac.f32;
|
||||||
|
|
||||||
if (nir_alu_type_get_type_size(ctx->shader->selector->info.output_type[i]) == 16)
|
/* Only FS uses unpacked f16. Other stages pack 16-bit outputs into low and high bits of f32. */
|
||||||
|
if (nir->info.stage == MESA_SHADER_FRAGMENT &&
|
||||||
|
nir_alu_type_get_type_size(ctx->shader->selector->info.output_type[i]) == 16)
|
||||||
type = ctx->ac.f16;
|
type = ctx->ac.f16;
|
||||||
|
|
||||||
for (unsigned j = 0; j < 4; j++)
|
for (unsigned j = 0; j < 4; j++)
|
||||||
|
|
|
@ -115,6 +115,12 @@ static void scan_io_usage(struct si_shader_info *info, nir_intrinsic_instr *intr
|
||||||
|
|
||||||
if (mask) {
|
if (mask) {
|
||||||
info->input_usage_mask[loc] |= mask;
|
info->input_usage_mask[loc] |= mask;
|
||||||
|
if (bit_size == 16) {
|
||||||
|
if (nir_intrinsic_io_semantics(intr).high_16bits)
|
||||||
|
info->input_fp16_lo_hi_valid[loc] |= 0x2;
|
||||||
|
else
|
||||||
|
info->input_fp16_lo_hi_valid[loc] |= 0x1;
|
||||||
|
}
|
||||||
info->num_inputs = MAX2(info->num_inputs, loc + 1);
|
info->num_inputs = MAX2(info->num_inputs, loc + 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -796,10 +802,15 @@ static void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir)
|
||||||
NIR_PASS_V(nir, nir_lower_compute_system_values, &options);
|
NIR_PASS_V(nir, nir_lower_compute_system_values, &options);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nir->info.stage == MESA_SHADER_FRAGMENT &&
|
if (sscreen->b.get_shader_param(&sscreen->b, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_FP16)) {
|
||||||
sscreen->info.has_packed_math_16bit &&
|
NIR_PASS_V(nir, nir_lower_mediump_io,
|
||||||
sscreen->b.get_shader_param(&sscreen->b, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_FP16))
|
/* TODO: LLVM fails to compile this test if VS inputs are 16-bit:
|
||||||
NIR_PASS_V(nir, nir_lower_mediump_io, nir_var_shader_out, 0, false);
|
* dEQP-GLES31.functional.shaders.builtin_functions.integer.bitfieldinsert.uvec3_lowp_geometry
|
||||||
|
*/
|
||||||
|
(nir->info.stage != MESA_SHADER_VERTEX ? nir_var_shader_in : 0) | nir_var_shader_out,
|
||||||
|
BITFIELD64_BIT(VARYING_SLOT_PNTC) | BITFIELD64_RANGE(VARYING_SLOT_VAR0, 32),
|
||||||
|
true);
|
||||||
|
}
|
||||||
|
|
||||||
si_nir_opts(sscreen, nir, true);
|
si_nir_opts(sscreen, nir, true);
|
||||||
|
|
||||||
|
|
|
@ -3336,7 +3336,8 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned si_get_ps_input_cntl(struct si_context *sctx, struct si_shader *vs,
|
static unsigned si_get_ps_input_cntl(struct si_context *sctx, struct si_shader *vs,
|
||||||
unsigned semantic, enum glsl_interp_mode interpolate)
|
unsigned semantic, enum glsl_interp_mode interpolate,
|
||||||
|
ubyte fp16_lo_hi_mask)
|
||||||
{
|
{
|
||||||
struct si_shader_info *vsinfo = &vs->selector->info;
|
struct si_shader_info *vsinfo = &vs->selector->info;
|
||||||
unsigned offset, ps_input_cntl = 0;
|
unsigned offset, ps_input_cntl = 0;
|
||||||
|
@ -3350,6 +3351,10 @@ static unsigned si_get_ps_input_cntl(struct si_context *sctx, struct si_shader *
|
||||||
(semantic >= VARYING_SLOT_TEX0 && semantic <= VARYING_SLOT_TEX7 &&
|
(semantic >= VARYING_SLOT_TEX0 && semantic <= VARYING_SLOT_TEX7 &&
|
||||||
sctx->sprite_coord_enable & (1 << (semantic - VARYING_SLOT_TEX0)))) {
|
sctx->sprite_coord_enable & (1 << (semantic - VARYING_SLOT_TEX0)))) {
|
||||||
ps_input_cntl |= S_028644_PT_SPRITE_TEX(1);
|
ps_input_cntl |= S_028644_PT_SPRITE_TEX(1);
|
||||||
|
if (fp16_lo_hi_mask & 0x1) {
|
||||||
|
ps_input_cntl |= S_028644_FP16_INTERP_MODE(1) |
|
||||||
|
S_028644_ATTR0_VALID(1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int vs_slot = vsinfo->output_semantic_to_slot[semantic];
|
int vs_slot = vsinfo->output_semantic_to_slot[semantic];
|
||||||
|
@ -3372,6 +3377,16 @@ static unsigned si_get_ps_input_cntl(struct si_context *sctx, struct si_shader *
|
||||||
|
|
||||||
ps_input_cntl = S_028644_OFFSET(0x20) | S_028644_DEFAULT_VAL(offset);
|
ps_input_cntl = S_028644_OFFSET(0x20) | S_028644_DEFAULT_VAL(offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (fp16_lo_hi_mask && !G_028644_PT_SPRITE_TEX(ps_input_cntl)) {
|
||||||
|
assert(offset <= AC_EXP_PARAM_OFFSET_31 || offset == AC_EXP_PARAM_DEFAULT_VAL_0000);
|
||||||
|
|
||||||
|
ps_input_cntl |= S_028644_FP16_INTERP_MODE(1) |
|
||||||
|
S_028644_USE_DEFAULT_ATTR1(offset == AC_EXP_PARAM_DEFAULT_VAL_0000) |
|
||||||
|
S_028644_DEFAULT_VAL_ATTR1(0) |
|
||||||
|
S_028644_ATTR0_VALID(1) | /* this must be set if FP16_INTERP_MODE is set */
|
||||||
|
S_028644_ATTR1_VALID(!!(fp16_lo_hi_mask & 0x2));
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
/* VS output not found. */
|
/* VS output not found. */
|
||||||
if (semantic == VARYING_SLOT_PRIMITIVE_ID) {
|
if (semantic == VARYING_SLOT_PRIMITIVE_ID) {
|
||||||
|
@ -3414,8 +3429,10 @@ static void si_emit_spi_map(struct si_context *sctx)
|
||||||
for (i = 0; i < psinfo->num_inputs; i++) {
|
for (i = 0; i < psinfo->num_inputs; i++) {
|
||||||
unsigned semantic = psinfo->input_semantic[i];
|
unsigned semantic = psinfo->input_semantic[i];
|
||||||
unsigned interpolate = psinfo->input_interpolate[i];
|
unsigned interpolate = psinfo->input_interpolate[i];
|
||||||
|
ubyte fp16_lo_hi_mask = psinfo->input_fp16_lo_hi_valid[i];
|
||||||
|
|
||||||
spi_ps_input_cntl[num_written++] = si_get_ps_input_cntl(sctx, vs, semantic, interpolate);
|
spi_ps_input_cntl[num_written++] = si_get_ps_input_cntl(sctx, vs, semantic, interpolate,
|
||||||
|
fp16_lo_hi_mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ps->key.part.ps.prolog.color_two_side) {
|
if (ps->key.part.ps.prolog.color_two_side) {
|
||||||
|
@ -3425,7 +3442,8 @@ static void si_emit_spi_map(struct si_context *sctx)
|
||||||
|
|
||||||
unsigned semantic = VARYING_SLOT_BFC0 + i;
|
unsigned semantic = VARYING_SLOT_BFC0 + i;
|
||||||
spi_ps_input_cntl[num_written++] = si_get_ps_input_cntl(sctx, vs, semantic,
|
spi_ps_input_cntl[num_written++] = si_get_ps_input_cntl(sctx, vs, semantic,
|
||||||
psinfo->color_interpolate[i]);
|
psinfo->color_interpolate[i],
|
||||||
|
false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
assert(num_interp == num_written);
|
assert(num_interp == num_written);
|
||||||
|
|
Loading…
Reference in New Issue